aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-01-29 06:54:01 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2008-01-29 06:54:01 -0500
commit0ba6c33bcddc64a54b5f1c25a696c4767dc76292 (patch)
tree62e616f97a4762d8e75bf732e4827af2d15d52c5 /net
parent21af0297c7e56024a5ccc4d8ad2a590f9ec371ba (diff)
parent85040bcb4643cba578839e953f25e2d1965d83d0 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6.25
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6.25: (1470 commits) [IPV6] ADDRLABEL: Fix double free on label deletion. [PPP]: Sparse warning fixes. [IPV4] fib_trie: remove unneeded NULL check [IPV4] fib_trie: More whitespace cleanup. [NET_SCHED]: Use nla_policy for attribute validation in ematches [NET_SCHED]: Use nla_policy for attribute validation in actions [NET_SCHED]: Use nla_policy for attribute validation in classifiers [NET_SCHED]: Use nla_policy for attribute validation in packet schedulers [NET_SCHED]: sch_api: introduce constant for rate table size [NET_SCHED]: Use typeful attribute parsing helpers [NET_SCHED]: Use typeful attribute construction helpers [NET_SCHED]: Use NLA_PUT_STRING for string dumping [NET_SCHED]: Use nla_nest_start/nla_nest_end [NET_SCHED]: Propagate nla_parse return value [NET_SCHED]: act_api: use PTR_ERR in tcf_action_init/tcf_action_get [NET_SCHED]: act_api: use nlmsg_parse [NET_SCHED]: act_api: fix netlink API conversion bug [NET_SCHED]: sch_netem: use nla_parse_nested_compat [NET_SCHED]: sch_atm: fix format string warning [NETNS]: Add namespace for ICMP replying code. ...
Diffstat (limited to 'net')
-rw-r--r--net/802/Makefile3
-rw-r--r--net/802/sysctl_net_802.c33
-rw-r--r--net/802/tr.c29
-rw-r--r--net/8021q/vlan.c437
-rw-r--r--net/8021q/vlan.h42
-rw-r--r--net/8021q/vlan_dev.c484
-rw-r--r--net/8021q/vlan_netlink.c17
-rw-r--r--net/8021q/vlanproc.c109
-rw-r--r--net/8021q/vlanproc.h11
-rw-r--r--net/Kconfig13
-rw-r--r--net/Makefile1
-rw-r--r--net/appletalk/aarp.c6
-rw-r--r--net/appletalk/atalk_proc.c6
-rw-r--r--net/appletalk/ddp.c5
-rw-r--r--net/appletalk/sysctl_net_atalk.c24
-rw-r--r--net/atm/Kconfig13
-rw-r--r--net/atm/atm_sysfs.c66
-rw-r--r--net/atm/br2684.c296
-rw-r--r--net/atm/clip.c33
-rw-r--r--net/atm/common.c2
-rw-r--r--net/atm/lec.c7
-rw-r--r--net/atm/proc.c6
-rw-r--r--net/ax25/af_ax25.c9
-rw-r--r--net/ax25/ax25_ds_timer.c2
-rw-r--r--net/ax25/ax25_route.c2
-rw-r--r--net/ax25/ax25_std_timer.c4
-rw-r--r--net/ax25/ax25_uid.c6
-rw-r--r--net/ax25/sysctl_net_ax25.c27
-rw-r--r--net/bluetooth/bnep/sock.c4
-rw-r--r--net/bluetooth/cmtp/sock.c4
-rw-r--r--net/bluetooth/hci_conn.c9
-rw-r--r--net/bluetooth/hidp/core.c5
-rw-r--r--net/bluetooth/hidp/sock.c10
-rw-r--r--net/bluetooth/l2cap.c13
-rw-r--r--net/bluetooth/rfcomm/core.c4
-rw-r--r--net/bluetooth/sco.c9
-rw-r--r--net/bridge/br_input.c2
-rw-r--r--net/bridge/br_netfilter.c40
-rw-r--r--net/bridge/br_netlink.c13
-rw-r--r--net/bridge/netfilter/Kconfig2
-rw-r--r--net/bridge/netfilter/ebt_log.c3
-rw-r--r--net/bridge/netfilter/ebt_ulog.c7
-rw-r--r--net/bridge/netfilter/ebt_vlan.c2
-rw-r--r--net/bridge/netfilter/ebtable_filter.c2
-rw-r--r--net/bridge/netfilter/ebtable_nat.c2
-rw-r--r--net/bridge/netfilter/ebtables.c2
-rw-r--r--net/can/Kconfig44
-rw-r--r--net/can/Makefile12
-rw-r--r--net/can/af_can.c861
-rw-r--r--net/can/af_can.h122
-rw-r--r--net/can/bcm.c1561
-rw-r--r--net/can/proc.c533
-rw-r--r--net/can/raw.c763
-rw-r--r--net/compat.c106
-rw-r--r--net/core/datagram.c54
-rw-r--r--net/core/dev.c61
-rw-r--r--net/core/dev_mcast.c28
-rw-r--r--net/core/dst.c11
-rw-r--r--net/core/fib_rules.c104
-rw-r--r--net/core/flow.c3
-rw-r--r--net/core/gen_estimator.c23
-rw-r--r--net/core/gen_stats.c10
-rw-r--r--net/core/neighbour.c262
-rw-r--r--net/core/net-sysfs.c20
-rw-r--r--net/core/net_namespace.c9
-rw-r--r--net/core/netpoll.c62
-rw-r--r--net/core/pktgen.c104
-rw-r--r--net/core/request_sock.c5
-rw-r--r--net/core/rtnetlink.c56
-rw-r--r--net/core/skbuff.c246
-rw-r--r--net/core/sock.c200
-rw-r--r--net/core/stream.c85
-rw-r--r--net/core/sysctl_net_core.c70
-rw-r--r--net/core/utils.c27
-rw-r--r--net/dccp/Kconfig1
-rw-r--r--net/dccp/ackvec.c163
-rw-r--r--net/dccp/ackvec.h62
-rw-r--r--net/dccp/ccid.c8
-rw-r--r--net/dccp/ccid.h37
-rw-r--r--net/dccp/ccids/Kconfig30
-rw-r--r--net/dccp/ccids/ccid2.c228
-rw-r--r--net/dccp/ccids/ccid2.h21
-rw-r--r--net/dccp/ccids/ccid3.c710
-rw-r--r--net/dccp/ccids/ccid3.h41
-rw-r--r--net/dccp/ccids/lib/Makefile2
-rw-r--r--net/dccp/ccids/lib/loss_interval.c352
-rw-r--r--net/dccp/ccids/lib/loss_interval.h64
-rw-r--r--net/dccp/ccids/lib/packet_history.c599
-rw-r--r--net/dccp/ccids/lib/packet_history.h220
-rw-r--r--net/dccp/ccids/lib/tfrc.c63
-rw-r--r--net/dccp/ccids/lib/tfrc.h29
-rw-r--r--net/dccp/dccp.h35
-rw-r--r--net/dccp/feat.c29
-rw-r--r--net/dccp/feat.h26
-rw-r--r--net/dccp/input.c155
-rw-r--r--net/dccp/ipv4.c12
-rw-r--r--net/dccp/ipv6.c10
-rw-r--r--net/dccp/minisocks.c33
-rw-r--r--net/dccp/options.c139
-rw-r--r--net/dccp/output.c55
-rw-r--r--net/dccp/proto.c194
-rw-r--r--net/dccp/sysctl.c36
-rw-r--r--net/dccp/timer.c5
-rw-r--r--net/decnet/af_decnet.c2
-rw-r--r--net/decnet/dn_dev.c70
-rw-r--r--net/decnet/dn_fib.c10
-rw-r--r--net/decnet/dn_neigh.c6
-rw-r--r--net/decnet/dn_nsp_out.c2
-rw-r--r--net/decnet/dn_route.c37
-rw-r--r--net/decnet/dn_rules.c3
-rw-r--r--net/decnet/dn_table.c8
-rw-r--r--net/decnet/netfilter/Kconfig1
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c6
-rw-r--r--net/decnet/sysctl_net_decnet.c23
-rw-r--r--net/econet/af_econet.c3
-rw-r--r--net/ethernet/eth.c30
-rw-r--r--net/ieee80211/Kconfig5
-rw-r--r--net/ieee80211/ieee80211_crypt_tkip.c22
-rw-r--r--net/ieee80211/ieee80211_module.c5
-rw-r--r--net/ieee80211/ieee80211_rx.c51
-rw-r--r--net/ieee80211/ieee80211_tx.c14
-rw-r--r--net/ieee80211/ieee80211_wx.c2
-rw-r--r--net/ieee80211/softmac/ieee80211softmac_auth.c6
-rw-r--r--net/ieee80211/softmac/ieee80211softmac_io.c10
-rw-r--r--net/ipv4/Kconfig7
-rw-r--r--net/ipv4/Makefile3
-rw-r--r--net/ipv4/af_inet.c46
-rw-r--r--net/ipv4/ah4.c17
-rw-r--r--net/ipv4/arp.c163
-rw-r--r--net/ipv4/cipso_ipv4.c2
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/devinet.c415
-rw-r--r--net/ipv4/esp4.c33
-rw-r--r--net/ipv4/fib_frontend.c271
-rw-r--r--net/ipv4/fib_hash.c126
-rw-r--r--net/ipv4/fib_lookup.h16
-rw-r--r--net/ipv4/fib_rules.c81
-rw-r--r--net/ipv4/fib_semantics.c64
-rw-r--r--net/ipv4/fib_trie.c866
-rw-r--r--net/ipv4/icmp.c130
-rw-r--r--net/ipv4/igmp.c56
-rw-r--r--net/ipv4/inet_connection_sock.c19
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/inet_fragment.c85
-rw-r--r--net/ipv4/inet_hashtables.c87
-rw-r--r--net/ipv4/inet_timewait_sock.c23
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_fragment.c202
-rw-r--r--net/ipv4/ip_gre.c136
-rw-r--r--net/ipv4/ip_input.c22
-rw-r--r--net/ipv4/ip_options.c4
-rw-r--r--net/ipv4/ip_output.c55
-rw-r--r--net/ipv4/ip_sockglue.c2
-rw-r--r--net/ipv4/ipcomp.c20
-rw-r--r--net/ipv4/ipconfig.c27
-rw-r--r--net/ipv4/ipip.c72
-rw-r--r--net/ipv4/ipmr.c35
-rw-r--r--net/ipv4/ipvs/ip_vs_app.c9
-rw-r--r--net/ipv4/ipvs/ip_vs_conn.c73
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c116
-rw-r--r--net/ipv4/ipvs/ip_vs_ctl.c39
-rw-r--r--net/ipv4/ipvs/ip_vs_est.c4
-rw-r--r--net/ipv4/ipvs/ip_vs_lblc.c36
-rw-r--r--net/ipv4/ipvs/ip_vs_lblcr.c36
-rw-r--r--net/ipv4/ipvs/ip_vs_proto.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_esp.c16
-rw-r--r--net/ipv4/ipvs/ip_vs_sched.c1
-rw-r--r--net/ipv4/ipvs/ip_vs_sync.c34
-rw-r--r--net/ipv4/ipvs/ip_vs_xmit.c16
-rw-r--r--net/ipv4/netfilter.c43
-rw-r--r--net/ipv4/netfilter/Kconfig85
-rw-r--r--net/ipv4/netfilter/Makefile5
-rw-r--r--net/ipv4/netfilter/arp_tables.c989
-rw-r--r--net/ipv4/netfilter/arptable_filter.c2
-rw-r--r--net/ipv4/netfilter/ip_queue.c210
-rw-r--r--net/ipv4/netfilter/ip_tables.c493
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c55
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c43
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c45
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c43
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c52
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c47
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c155
-rw-r--r--net/ipv4/netfilter/ipt_SAME.c179
-rw-r--r--net/ipv4/netfilter/ipt_TOS.c87
-rw-r--r--net/ipv4/netfilter/ipt_TTL.c40
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c52
-rw-r--r--net/ipv4/netfilter/ipt_addrtype.c115
-rw-r--r--net/ipv4/netfilter/ipt_ah.c39
-rw-r--r--net/ipv4/netfilter/ipt_ecn.c35
-rw-r--r--net/ipv4/netfilter/ipt_iprange.c79
-rw-r--r--net/ipv4/netfilter/ipt_owner.c92
-rw-r--r--net/ipv4/netfilter/ipt_recent.c41
-rw-r--r--net/ipv4/netfilter/ipt_tos.c55
-rw-r--r--net/ipv4/netfilter/ipt_ttl.c26
-rw-r--r--net/ipv4/netfilter/iptable_filter.c24
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c42
-rw-r--r--net/ipv4/netfilter/iptable_raw.c16
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c37
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c5
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c30
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c68
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c26
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c29
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c3
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_tcp.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udp.c8
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_unknown.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c38
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c22
-rw-r--r--net/ipv4/proc.c15
-rw-r--r--net/ipv4/raw.c216
-rw-r--r--net/ipv4/route.c427
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c167
-rw-r--r--net/ipv4/tcp.c223
-rw-r--r--net/ipv4/tcp_bic.c3
-rw-r--r--net/ipv4/tcp_cong.c23
-rw-r--r--net/ipv4/tcp_cubic.c3
-rw-r--r--net/ipv4/tcp_highspeed.c3
-rw-r--r--net/ipv4/tcp_htcp.c3
-rw-r--r--net/ipv4/tcp_hybla.c5
-rw-r--r--net/ipv4/tcp_illinois.c3
-rw-r--r--net/ipv4/tcp_input.c1281
-rw-r--r--net/ipv4/tcp_ipv4.c8
-rw-r--r--net/ipv4/tcp_lp.c4
-rw-r--r--net/ipv4/tcp_output.c651
-rw-r--r--net/ipv4/tcp_scalable.c3
-rw-r--r--net/ipv4/tcp_timer.c43
-rw-r--r--net/ipv4/tcp_vegas.c7
-rw-r--r--net/ipv4/tcp_veno.c7
-rw-r--r--net/ipv4/tcp_yeah.c3
-rw-r--r--net/ipv4/udp.c106
-rw-r--r--net/ipv4/udplite.c2
-rw-r--r--net/ipv4/xfrm4_input.c134
-rw-r--r--net/ipv4/xfrm4_mode_beet.c62
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c93
-rw-r--r--net/ipv4/xfrm4_output.c95
-rw-r--r--net/ipv4/xfrm4_policy.c219
-rw-r--r--net/ipv4/xfrm4_state.c20
-rw-r--r--net/ipv6/Makefile5
-rw-r--r--net/ipv6/addrconf.c574
-rw-r--r--net/ipv6/addrlabel.c561
-rw-r--r--net/ipv6/af_inet6.c184
-rw-r--r--net/ipv6/ah6.c16
-rw-r--r--net/ipv6/anycast.c4
-rw-r--r--net/ipv6/datagram.c5
-rw-r--r--net/ipv6/esp6.c40
-rw-r--r--net/ipv6/exthdrs.c77
-rw-r--r--net/ipv6/fib6_rules.c22
-rw-r--r--net/ipv6/icmp.c98
-rw-r--r--net/ipv6/inet6_hashtables.c4
-rw-r--r--net/ipv6/ip6_fib.c43
-rw-r--r--net/ipv6/ip6_flowlabel.c32
-rw-r--r--net/ipv6/ip6_input.c15
-rw-r--r--net/ipv6/ip6_output.c56
-rw-r--r--net/ipv6/ip6_tunnel.c18
-rw-r--r--net/ipv6/ipcomp6.c19
-rw-r--r--net/ipv6/ipv6_sockglue.c11
-rw-r--r--net/ipv6/mcast.c24
-rw-r--r--net/ipv6/mip6.c25
-rw-r--r--net/ipv6/ndisc.c36
-rw-r--r--net/ipv6/netfilter.c26
-rw-r--r--net/ipv6/netfilter/Kconfig60
-rw-r--r--net/ipv6/netfilter/Makefile1
-rw-r--r--net/ipv6/netfilter/ip6_queue.c214
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1210
-rw-r--r--net/ipv6/netfilter/ip6t_HL.c39
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c45
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c49
-rw-r--r--net/ipv6/netfilter/ip6t_ah.c39
-rw-r--r--net/ipv6/netfilter/ip6t_eui64.c34
-rw-r--r--net/ipv6/netfilter/ip6t_frag.c40
-rw-r--r--net/ipv6/netfilter/ip6t_hbh.c44
-rw-r--r--net/ipv6/netfilter/ip6t_hl.c26
-rw-r--r--net/ipv6/netfilter/ip6t_ipv6header.c40
-rw-r--r--net/ipv6/netfilter/ip6t_mh.c39
-rw-r--r--net/ipv6/netfilter/ip6t_owner.c92
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c39
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c24
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c42
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c16
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c50
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c29
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c63
-rw-r--r--net/ipv6/proc.c11
-rw-r--r--net/ipv6/raw.c264
-rw-r--r--net/ipv6/reassembly.c181
-rw-r--r--net/ipv6/route.c246
-rw-r--r--net/ipv6/sit.c149
-rw-r--r--net/ipv6/sysctl_net_ipv6.c163
-rw-r--r--net/ipv6/tcp_ipv6.c46
-rw-r--r--net/ipv6/udp.c92
-rw-r--r--net/ipv6/udp_impl.h1
-rw-r--r--net/ipv6/udplite.c27
-rw-r--r--net/ipv6/xfrm6_input.c183
-rw-r--r--net/ipv6/xfrm6_mode_beet.c48
-rw-r--r--net/ipv6/xfrm6_mode_ro.c1
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c73
-rw-r--r--net/ipv6/xfrm6_output.c93
-rw-r--r--net/ipv6/xfrm6_policy.c277
-rw-r--r--net/ipv6/xfrm6_state.c25
-rw-r--r--net/ipx/sysctl_net_ipx.c24
-rw-r--r--net/irda/af_irda.c5
-rw-r--r--net/irda/ircomm/ircomm_core.c12
-rw-r--r--net/irda/irda_device.c169
-rw-r--r--net/irda/iriap.c2
-rw-r--r--net/irda/irlap_event.c25
-rw-r--r--net/irda/irlmp.c7
-rw-r--r--net/irda/irlmp_event.c4
-rw-r--r--net/irda/irsysctl.c56
-rw-r--r--net/iucv/af_iucv.c9
-rw-r--r--net/iucv/iucv.c4
-rw-r--r--net/key/af_key.c6
-rw-r--r--net/lapb/lapb_iface.c2
-rw-r--r--net/llc/llc_conn.c20
-rw-r--r--net/llc/llc_station.c5
-rw-r--r--net/llc/sysctl_net_llc.c24
-rw-r--r--net/mac80211/Kconfig93
-rw-r--r--net/mac80211/Makefile35
-rw-r--r--net/mac80211/cfg.c552
-rw-r--r--net/mac80211/debugfs_netdev.c60
-rw-r--r--net/mac80211/ieee80211.c199
-rw-r--r--net/mac80211/ieee80211_i.h99
-rw-r--r--net/mac80211/ieee80211_iface.c17
-rw-r--r--net/mac80211/ieee80211_ioctl.c112
-rw-r--r--net/mac80211/ieee80211_led.c35
-rw-r--r--net/mac80211/ieee80211_led.h6
-rw-r--r--net/mac80211/ieee80211_rate.c59
-rw-r--r--net/mac80211/ieee80211_rate.h108
-rw-r--r--net/mac80211/ieee80211_sta.c731
-rw-r--r--net/mac80211/key.c6
-rw-r--r--net/mac80211/rc80211_pid.h285
-rw-r--r--net/mac80211/rc80211_pid_algo.c549
-rw-r--r--net/mac80211/rc80211_pid_debugfs.c223
-rw-r--r--net/mac80211/rc80211_simple.c82
-rw-r--r--net/mac80211/rx.c765
-rw-r--r--net/mac80211/sta_info.c49
-rw-r--r--net/mac80211/sta_info.h50
-rw-r--r--net/mac80211/tx.c370
-rw-r--r--net/mac80211/util.c132
-rw-r--r--net/mac80211/wep.c10
-rw-r--r--net/mac80211/wme.c27
-rw-r--r--net/mac80211/wpa.c14
-rw-r--r--net/netfilter/Kconfig180
-rw-r--r--net/netfilter/Makefile6
-rw-r--r--net/netfilter/core.c47
-rw-r--r--net/netfilter/nf_conntrack_core.c17
-rw-r--r--net/netfilter/nf_conntrack_expect.c12
-rw-r--r--net/netfilter/nf_conntrack_ftp.c2
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c8
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c53
-rw-r--r--net/netfilter/nf_conntrack_l3proto_generic.c7
-rw-r--r--net/netfilter/nf_conntrack_netlink.c264
-rw-r--r--net/netfilter/nf_conntrack_proto.c7
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c8
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c328
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c96
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c14
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c13
-rw-r--r--net/netfilter/nf_conntrack_sip.c8
-rw-r--r--net/netfilter/nf_conntrack_standalone.c20
-rw-r--r--net/netfilter/nf_log.c12
-rw-r--r--net/netfilter/nf_queue.c183
-rw-r--r--net/netfilter/nf_sysctl.c134
-rw-r--r--net/netfilter/nfnetlink.c2
-rw-r--r--net/netfilter/nfnetlink_log.c203
-rw-r--r--net/netfilter/nfnetlink_queue.c595
-rw-r--r--net/netfilter/x_tables.c65
-rw-r--r--net/netfilter/xt_CLASSIFY.c44
-rw-r--r--net/netfilter/xt_CONNMARK.c150
-rw-r--r--net/netfilter/xt_CONNSECMARK.c56
-rw-r--r--net/netfilter/xt_DSCP.c167
-rw-r--r--net/netfilter/xt_MARK.c158
-rw-r--r--net/netfilter/xt_NFLOG.c39
-rw-r--r--net/netfilter/xt_NFQUEUE.c32
-rw-r--r--net/netfilter/xt_NOTRACK.c29
-rw-r--r--net/netfilter/xt_RATEEST.c205
-rw-r--r--net/netfilter/xt_SECMARK.c40
-rw-r--r--net/netfilter/xt_TCPMSS.c94
-rw-r--r--net/netfilter/xt_TCPOPTSTRIP.c147
-rw-r--r--net/netfilter/xt_TRACE.c29
-rw-r--r--net/netfilter/xt_comment.c33
-rw-r--r--net/netfilter/xt_connbytes.c56
-rw-r--r--net/netfilter/xt_connlimit.c75
-rw-r--r--net/netfilter/xt_connmark.c129
-rw-r--r--net/netfilter/xt_conntrack.c237
-rw-r--r--net/netfilter/xt_dccp.c43
-rw-r--r--net/netfilter/xt_dscp.c112
-rw-r--r--net/netfilter/xt_esp.c43
-rw-r--r--net/netfilter/xt_hashlimit.c104
-rw-r--r--net/netfilter/xt_helper.c56
-rw-r--r--net/netfilter/xt_iprange.c180
-rw-r--r--net/netfilter/xt_length.c45
-rw-r--r--net/netfilter/xt_limit.c57
-rw-r--r--net/netfilter/xt_mac.c43
-rw-r--r--net/netfilter/xt_mark.c100
-rw-r--r--net/netfilter/xt_multiport.c100
-rw-r--r--net/netfilter/xt_owner.c211
-rw-r--r--net/netfilter/xt_physdev.c51
-rw-r--r--net/netfilter/xt_pkttype.c51
-rw-r--r--net/netfilter/xt_policy.c67
-rw-r--r--net/netfilter/xt_quota.c37
-rw-r--r--net/netfilter/xt_rateest.c178
-rw-r--r--net/netfilter/xt_realm.c34
-rw-r--r--net/netfilter/xt_sctp.c43
-rw-r--r--net/netfilter/xt_state.c52
-rw-r--r--net/netfilter/xt_statistic.c42
-rw-r--r--net/netfilter/xt_string.c54
-rw-r--r--net/netfilter/xt_tcpmss.c33
-rw-r--r--net/netfilter/xt_tcpudp.c79
-rw-r--r--net/netfilter/xt_time.c40
-rw-r--r--net/netfilter/xt_u32.c29
-rw-r--r--net/netlink/af_netlink.c167
-rw-r--r--net/netlink/attr.c19
-rw-r--r--net/netrom/nr_timer.c19
-rw-r--r--net/netrom/sysctl_net_netrom.c24
-rw-r--r--net/packet/af_packet.c115
-rw-r--r--net/rose/af_rose.c13
-rw-r--r--net/rose/rose_in.c2
-rw-r--r--net/rose/rose_route.c10
-rw-r--r--net/rose/sysctl_net_rose.c24
-rw-r--r--net/rxrpc/af_rxrpc.c2
-rw-r--r--net/rxrpc/ar-connection.c2
-rw-r--r--net/rxrpc/ar-input.c8
-rw-r--r--net/rxrpc/ar-peer.c2
-rw-r--r--net/rxrpc/rxkad.c4
-rw-r--r--net/sched/Kconfig11
-rw-r--r--net/sched/act_api.c340
-rw-r--r--net/sched/act_gact.c40
-rw-r--r--net/sched/act_ipt.c55
-rw-r--r--net/sched/act_mirred.c27
-rw-r--r--net/sched/act_nat.c39
-rw-r--r--net/sched/act_pedit.c29
-rw-r--r--net/sched/act_police.c67
-rw-r--r--net/sched/act_simple.c33
-rw-r--r--net/sched/cls_api.c189
-rw-r--r--net/sched/cls_basic.c57
-rw-r--r--net/sched/cls_fw.c68
-rw-r--r--net/sched/cls_route.c84
-rw-r--r--net/sched/cls_rsvp.h69
-rw-r--r--net/sched/cls_tcindex.c141
-rw-r--r--net/sched/cls_u32.c103
-rw-r--r--net/sched/em_meta.c64
-rw-r--r--net/sched/em_text.c9
-rw-r--r--net/sched/ematch.c98
-rw-r--r--net/sched/sch_api.c134
-rw-r--r--net/sched/sch_atm.c208
-rw-r--r--net/sched/sch_blackhole.c2
-rw-r--r--net/sched/sch_cbq.c183
-rw-r--r--net/sched/sch_dsmark.c276
-rw-r--r--net/sched/sch_fifo.c17
-rw-r--r--net/sched/sch_generic.c46
-rw-r--r--net/sched/sch_gred.c77
-rw-r--r--net/sched/sch_hfsc.c92
-rw-r--r--net/sched/sch_htb.c112
-rw-r--r--net/sched/sch_ingress.c292
-rw-r--r--net/sched/sch_netem.c143
-rw-r--r--net/sched/sch_prio.c41
-rw-r--r--net/sched/sch_red.c64
-rw-r--r--net/sched/sch_sfq.c66
-rw-r--r--net/sched/sch_tbf.c68
-rw-r--r--net/sched/sch_teql.c2
-rw-r--r--net/sctp/Kconfig1
-rw-r--r--net/sctp/Makefile2
-rw-r--r--net/sctp/associola.c83
-rw-r--r--net/sctp/bind_addr.c35
-rw-r--r--net/sctp/crc32c.c222
-rw-r--r--net/sctp/input.c125
-rw-r--r--net/sctp/ipv6.c7
-rw-r--r--net/sctp/output.c1
-rw-r--r--net/sctp/outqueue.c29
-rw-r--r--net/sctp/protocol.c28
-rw-r--r--net/sctp/sm_make_chunk.c132
-rw-r--r--net/sctp/sm_statefuns.c148
-rw-r--r--net/sctp/sm_statetable.c18
-rw-r--r--net/sctp/socket.c22
-rw-r--r--net/sctp/sysctl.c24
-rw-r--r--net/sctp/transport.c13
-rw-r--r--net/sctp/ulpevent.c2
-rw-r--r--net/sctp/ulpqueue.c2
-rw-r--r--net/socket.c55
-rw-r--r--net/sunrpc/cache.c2
-rw-r--r--net/sunrpc/sched.c5
-rw-r--r--net/sunrpc/xprt.c5
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c6
-rw-r--r--net/sunrpc/xprtsock.c6
-rw-r--r--net/sysctl_net.c79
-rw-r--r--net/tipc/core.h4
-rw-r--r--net/tipc/port.c20
-rw-r--r--net/unix/af_unix.c171
-rw-r--r--net/unix/sysctl_net_unix.c52
-rw-r--r--net/wireless/Kconfig10
-rw-r--r--net/wireless/core.c3
-rw-r--r--net/wireless/nl80211.c737
-rw-r--r--net/wireless/wext.c43
-rw-r--r--net/x25/af_x25.c4
-rw-r--r--net/x25/sysctl_net_x25.c24
-rw-r--r--net/x25/x25_facilities.c4
-rw-r--r--net/x25/x25_forward.c2
-rw-r--r--net/x25/x25_in.c2
-rw-r--r--net/x25/x25_link.c7
-rw-r--r--net/x25/x25_proc.c8
-rw-r--r--net/x25/x25_route.c2
-rw-r--r--net/x25/x25_subr.c2
-rw-r--r--net/x25/x25_timer.c4
-rw-r--r--net/xfrm/Kconfig11
-rw-r--r--net/xfrm/Makefile1
-rw-r--r--net/xfrm/xfrm_algo.c2
-rw-r--r--net/xfrm/xfrm_hash.c9
-rw-r--r--net/xfrm/xfrm_input.c176
-rw-r--r--net/xfrm/xfrm_output.c154
-rw-r--r--net/xfrm/xfrm_policy.c431
-rw-r--r--net/xfrm/xfrm_proc.c96
-rw-r--r--net/xfrm/xfrm_state.c214
-rw-r--r--net/xfrm/xfrm_user.c8
520 files changed, 28870 insertions, 16810 deletions
diff --git a/net/802/Makefile b/net/802/Makefile
index 977704a54f68..68569ffddea1 100644
--- a/net/802/Makefile
+++ b/net/802/Makefile
@@ -3,9 +3,8 @@
3# 3#
4 4
5# Check the p8022 selections against net/core/Makefile. 5# Check the p8022 selections against net/core/Makefile.
6obj-$(CONFIG_SYSCTL) += sysctl_net_802.o
7obj-$(CONFIG_LLC) += p8022.o psnap.o 6obj-$(CONFIG_LLC) += p8022.o psnap.o
8obj-$(CONFIG_TR) += p8022.o psnap.o tr.o sysctl_net_802.o 7obj-$(CONFIG_TR) += p8022.o psnap.o tr.o
9obj-$(CONFIG_NET_FC) += fc.o 8obj-$(CONFIG_NET_FC) += fc.o
10obj-$(CONFIG_FDDI) += fddi.o 9obj-$(CONFIG_FDDI) += fddi.o
11obj-$(CONFIG_HIPPI) += hippi.o 10obj-$(CONFIG_HIPPI) += hippi.o
diff --git a/net/802/sysctl_net_802.c b/net/802/sysctl_net_802.c
deleted file mode 100644
index ead56037398b..000000000000
--- a/net/802/sysctl_net_802.c
+++ /dev/null
@@ -1,33 +0,0 @@
1/* -*- linux-c -*-
2 * sysctl_net_802.c: sysctl interface to net 802 subsystem.
3 *
4 * Begun April 1, 1996, Mike Shaver.
5 * Added /proc/sys/net/802 directory entry (empty =) ). [MS]
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/mm.h>
14#include <linux/if_tr.h>
15#include <linux/sysctl.h>
16
17#ifdef CONFIG_TR
18extern int sysctl_tr_rif_timeout;
19#endif
20
21struct ctl_table tr_table[] = {
22#ifdef CONFIG_TR
23 {
24 .ctl_name = NET_TR_RIF_TIMEOUT,
25 .procname = "rif_timeout",
26 .data = &sysctl_tr_rif_timeout,
27 .maxlen = sizeof(int),
28 .mode = 0644,
29 .proc_handler = &proc_dointvec
30 },
31#endif /* CONFIG_TR */
32 { 0 },
33};
diff --git a/net/802/tr.c b/net/802/tr.c
index 1e115e5beab6..3f16b1720554 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -35,6 +35,7 @@
35#include <linux/proc_fs.h> 35#include <linux/proc_fs.h>
36#include <linux/seq_file.h> 36#include <linux/seq_file.h>
37#include <linux/init.h> 37#include <linux/init.h>
38#include <linux/sysctl.h>
38#include <net/arp.h> 39#include <net/arp.h>
39#include <net/net_namespace.h> 40#include <net/net_namespace.h>
40 41
@@ -634,6 +635,26 @@ struct net_device *alloc_trdev(int sizeof_priv)
634 return alloc_netdev(sizeof_priv, "tr%d", tr_setup); 635 return alloc_netdev(sizeof_priv, "tr%d", tr_setup);
635} 636}
636 637
638#ifdef CONFIG_SYSCTL
639static struct ctl_table tr_table[] = {
640 {
641 .ctl_name = NET_TR_RIF_TIMEOUT,
642 .procname = "rif_timeout",
643 .data = &sysctl_tr_rif_timeout,
644 .maxlen = sizeof(int),
645 .mode = 0644,
646 .proc_handler = &proc_dointvec
647 },
648 { 0 },
649};
650
651static __initdata struct ctl_path tr_path[] = {
652 { .procname = "net", .ctl_name = CTL_NET, },
653 { .procname = "token-ring", .ctl_name = NET_TR, },
654 { }
655};
656#endif
657
637/* 658/*
638 * Called during bootup. We don't actually have to initialise 659 * Called during bootup. We don't actually have to initialise
639 * too much for this. 660 * too much for this.
@@ -641,12 +662,12 @@ struct net_device *alloc_trdev(int sizeof_priv)
641 662
642static int __init rif_init(void) 663static int __init rif_init(void)
643{ 664{
644 init_timer(&rif_timer);
645 rif_timer.expires = jiffies + sysctl_tr_rif_timeout; 665 rif_timer.expires = jiffies + sysctl_tr_rif_timeout;
646 rif_timer.data = 0L; 666 setup_timer(&rif_timer, rif_check_expire, 0);
647 rif_timer.function = rif_check_expire;
648 add_timer(&rif_timer); 667 add_timer(&rif_timer);
649 668#ifdef CONFIG_SYSCTL
669 register_sysctl_paths(tr_path, tr_table);
670#endif
650 proc_net_fops_create(&init_net, "tr_rif", S_IRUGO, &rif_seq_fops); 671 proc_net_fops_create(&init_net, "tr_rif", S_IRUGO, &rif_seq_fops);
651 return 0; 672 return 0;
652} 673}
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 032bf44eca5e..dbc81b965096 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -3,7 +3,7 @@
3 * Ethernet-type device handling. 3 * Ethernet-type device handling.
4 * 4 *
5 * Authors: Ben Greear <greearb@candelatech.com> 5 * Authors: Ben Greear <greearb@candelatech.com>
6 * Please send support related email to: vlan@scry.wanfear.com 6 * Please send support related email to: netdev@vger.kernel.org
7 * VLAN Home Page: http://www.candelatech.com/~greear/vlan.html 7 * VLAN Home Page: http://www.candelatech.com/~greear/vlan.html
8 * 8 *
9 * Fixes: 9 * Fixes:
@@ -43,23 +43,12 @@
43 43
44/* Our listing of VLAN group(s) */ 44/* Our listing of VLAN group(s) */
45static struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE]; 45static struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE];
46#define vlan_grp_hashfn(IDX) ((((IDX) >> VLAN_GRP_HASH_SHIFT) ^ (IDX)) & VLAN_GRP_HASH_MASK)
47 46
48static char vlan_fullname[] = "802.1Q VLAN Support"; 47static char vlan_fullname[] = "802.1Q VLAN Support";
49static char vlan_version[] = DRV_VERSION; 48static char vlan_version[] = DRV_VERSION;
50static char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>"; 49static char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>";
51static char vlan_buggyright[] = "David S. Miller <davem@redhat.com>"; 50static char vlan_buggyright[] = "David S. Miller <davem@redhat.com>";
52 51
53static int vlan_device_event(struct notifier_block *, unsigned long, void *);
54static int vlan_ioctl_handler(struct net *net, void __user *);
55static int unregister_vlan_dev(struct net_device *, unsigned short );
56
57static struct notifier_block vlan_notifier_block = {
58 .notifier_call = vlan_device_event,
59};
60
61/* These may be changed at run-time through IOCTLs */
62
63/* Determines interface naming scheme. */ 52/* Determines interface naming scheme. */
64unsigned short vlan_name_type = VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD; 53unsigned short vlan_name_type = VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD;
65 54
@@ -70,82 +59,11 @@ static struct packet_type vlan_packet_type = {
70 59
71/* End of global variables definitions. */ 60/* End of global variables definitions. */
72 61
73/* 62static inline unsigned int vlan_grp_hashfn(unsigned int idx)
74 * Function vlan_proto_init (pro)
75 *
76 * Initialize VLAN protocol layer,
77 *
78 */
79static int __init vlan_proto_init(void)
80{ 63{
81 int err; 64 return ((idx >> VLAN_GRP_HASH_SHIFT) ^ idx) & VLAN_GRP_HASH_MASK;
82
83 printk(VLAN_INF "%s v%s %s\n",
84 vlan_fullname, vlan_version, vlan_copyright);
85 printk(VLAN_INF "All bugs added by %s\n",
86 vlan_buggyright);
87
88 /* proc file system initialization */
89 err = vlan_proc_init();
90 if (err < 0) {
91 printk(KERN_ERR
92 "%s %s: can't create entry in proc filesystem!\n",
93 __FUNCTION__, VLAN_NAME);
94 return err;
95 }
96
97 dev_add_pack(&vlan_packet_type);
98
99 /* Register us to receive netdevice events */
100 err = register_netdevice_notifier(&vlan_notifier_block);
101 if (err < 0)
102 goto err1;
103
104 err = vlan_netlink_init();
105 if (err < 0)
106 goto err2;
107
108 vlan_ioctl_set(vlan_ioctl_handler);
109 return 0;
110
111err2:
112 unregister_netdevice_notifier(&vlan_notifier_block);
113err1:
114 vlan_proc_cleanup();
115 dev_remove_pack(&vlan_packet_type);
116 return err;
117} 65}
118 66
119/*
120 * Module 'remove' entry point.
121 * o delete /proc/net/router directory and static entries.
122 */
123static void __exit vlan_cleanup_module(void)
124{
125 int i;
126
127 vlan_ioctl_set(NULL);
128 vlan_netlink_fini();
129
130 /* Un-register us from receiving netdevice events */
131 unregister_netdevice_notifier(&vlan_notifier_block);
132
133 dev_remove_pack(&vlan_packet_type);
134
135 /* This table must be empty if there are no module
136 * references left.
137 */
138 for (i = 0; i < VLAN_GRP_HASH_SIZE; i++) {
139 BUG_ON(!hlist_empty(&vlan_group_hash[i]));
140 }
141 vlan_proc_cleanup();
142
143 synchronize_net();
144}
145
146module_init(vlan_proto_init);
147module_exit(vlan_cleanup_module);
148
149/* Must be invoked with RCU read lock (no preempt) */ 67/* Must be invoked with RCU read lock (no preempt) */
150static struct vlan_group *__vlan_find_group(int real_dev_ifindex) 68static struct vlan_group *__vlan_find_group(int real_dev_ifindex)
151{ 69{
@@ -180,7 +98,7 @@ static void vlan_group_free(struct vlan_group *grp)
180{ 98{
181 int i; 99 int i;
182 100
183 for (i=0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) 101 for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++)
184 kfree(grp->vlan_devices_arrays[i]); 102 kfree(grp->vlan_devices_arrays[i]);
185 kfree(grp); 103 kfree(grp);
186} 104}
@@ -218,179 +136,50 @@ static void vlan_rcu_free(struct rcu_head *rcu)
218 vlan_group_free(container_of(rcu, struct vlan_group, rcu)); 136 vlan_group_free(container_of(rcu, struct vlan_group, rcu));
219} 137}
220 138
221 139void unregister_vlan_dev(struct net_device *dev)
222/* This returns 0 if everything went fine.
223 * It will return 1 if the group was killed as a result.
224 * A negative return indicates failure.
225 *
226 * The RTNL lock must be held.
227 */
228static int unregister_vlan_dev(struct net_device *real_dev,
229 unsigned short vlan_id)
230{ 140{
231 struct net_device *dev = NULL; 141 struct vlan_dev_info *vlan = vlan_dev_info(dev);
232 int real_dev_ifindex = real_dev->ifindex; 142 struct net_device *real_dev = vlan->real_dev;
233 struct vlan_group *grp; 143 struct vlan_group *grp;
234 int i, ret; 144 unsigned short vlan_id = vlan->vlan_id;
235
236#ifdef VLAN_DEBUG
237 printk(VLAN_DBG "%s: VID: %i\n", __FUNCTION__, vlan_id);
238#endif
239
240 /* sanity check */
241 if (vlan_id >= VLAN_VID_MASK)
242 return -EINVAL;
243 145
244 ASSERT_RTNL(); 146 ASSERT_RTNL();
245 grp = __vlan_find_group(real_dev_ifindex);
246
247 ret = 0;
248
249 if (grp) {
250 dev = vlan_group_get_device(grp, vlan_id);
251 if (dev) {
252 /* Remove proc entry */
253 vlan_proc_rem_dev(dev);
254 147
255 /* Take it out of our own structures, but be sure to 148 grp = __vlan_find_group(real_dev->ifindex);
256 * interlock with HW accelerating devices or SW vlan 149 BUG_ON(!grp);
257 * input packet processing.
258 */
259 if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
260 real_dev->vlan_rx_kill_vid(real_dev, vlan_id);
261
262 vlan_group_set_device(grp, vlan_id, NULL);
263 synchronize_net();
264 150
151 vlan_proc_rem_dev(dev);
265 152
266 /* Caller unregisters (and if necessary, puts) 153 /* Take it out of our own structures, but be sure to interlock with
267 * VLAN device, but we get rid of the reference to 154 * HW accelerating devices or SW vlan input packet processing.
268 * real_dev here. 155 */
269 */ 156 if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
270 dev_put(real_dev); 157 real_dev->vlan_rx_kill_vid(real_dev, vlan_id);
271 158
272 /* If the group is now empty, kill off the 159 vlan_group_set_device(grp, vlan_id, NULL);
273 * group. 160 grp->nr_vlans--;
274 */
275 for (i = 0; i < VLAN_VID_MASK; i++)
276 if (vlan_group_get_device(grp, i))
277 break;
278 161
279 if (i == VLAN_VID_MASK) { 162 synchronize_net();
280 if (real_dev->features & NETIF_F_HW_VLAN_RX)
281 real_dev->vlan_rx_register(real_dev, NULL);
282 163
283 hlist_del_rcu(&grp->hlist); 164 /* If the group is now empty, kill off the group. */
165 if (grp->nr_vlans == 0) {
166 if (real_dev->features & NETIF_F_HW_VLAN_RX)
167 real_dev->vlan_rx_register(real_dev, NULL);
284 168
285 /* Free the group, after all cpu's are done. */ 169 hlist_del_rcu(&grp->hlist);
286 call_rcu(&grp->rcu, vlan_rcu_free);
287 170
288 grp = NULL; 171 /* Free the group, after all cpu's are done. */
289 ret = 1; 172 call_rcu(&grp->rcu, vlan_rcu_free);
290 }
291 }
292 } 173 }
293 174
294 return ret; 175 /* Get rid of the vlan's reference to real_dev */
295} 176 dev_put(real_dev);
296 177
297int unregister_vlan_device(struct net_device *dev)
298{
299 int ret;
300
301 ret = unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev,
302 VLAN_DEV_INFO(dev)->vlan_id);
303 unregister_netdevice(dev); 178 unregister_netdevice(dev);
304
305 if (ret == 1)
306 ret = 0;
307 return ret;
308}
309
310/*
311 * vlan network devices have devices nesting below it, and are a special
312 * "super class" of normal network devices; split their locks off into a
313 * separate class since they always nest.
314 */
315static struct lock_class_key vlan_netdev_xmit_lock_key;
316
317static const struct header_ops vlan_header_ops = {
318 .create = vlan_dev_hard_header,
319 .rebuild = vlan_dev_rebuild_header,
320 .parse = eth_header_parse,
321};
322
323static int vlan_dev_init(struct net_device *dev)
324{
325 struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev;
326 int subclass = 0;
327
328 /* IFF_BROADCAST|IFF_MULTICAST; ??? */
329 dev->flags = real_dev->flags & ~IFF_UP;
330 dev->iflink = real_dev->ifindex;
331 dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) |
332 (1<<__LINK_STATE_DORMANT))) |
333 (1<<__LINK_STATE_PRESENT);
334
335 /* ipv6 shared card related stuff */
336 dev->dev_id = real_dev->dev_id;
337
338 if (is_zero_ether_addr(dev->dev_addr))
339 memcpy(dev->dev_addr, real_dev->dev_addr, dev->addr_len);
340 if (is_zero_ether_addr(dev->broadcast))
341 memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len);
342
343 if (real_dev->features & NETIF_F_HW_VLAN_TX) {
344 dev->header_ops = real_dev->header_ops;
345 dev->hard_header_len = real_dev->hard_header_len;
346 dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit;
347 } else {
348 dev->header_ops = &vlan_header_ops;
349 dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN;
350 dev->hard_start_xmit = vlan_dev_hard_start_xmit;
351 }
352
353 if (real_dev->priv_flags & IFF_802_1Q_VLAN)
354 subclass = 1;
355
356 lockdep_set_class_and_subclass(&dev->_xmit_lock,
357 &vlan_netdev_xmit_lock_key, subclass);
358 return 0;
359}
360
361void vlan_setup(struct net_device *new_dev)
362{
363 ether_setup(new_dev);
364
365 /* new_dev->ifindex = 0; it will be set when added to
366 * the global list.
367 * iflink is set as well.
368 */
369 new_dev->get_stats = vlan_dev_get_stats;
370
371 /* Make this thing known as a VLAN device */
372 new_dev->priv_flags |= IFF_802_1Q_VLAN;
373
374 /* Set us up to have no queue, as the underlying Hardware device
375 * can do all the queueing we could want.
376 */
377 new_dev->tx_queue_len = 0;
378
379 /* set up method calls */
380 new_dev->change_mtu = vlan_dev_change_mtu;
381 new_dev->init = vlan_dev_init;
382 new_dev->open = vlan_dev_open;
383 new_dev->stop = vlan_dev_stop;
384 new_dev->set_mac_address = vlan_set_mac_address;
385 new_dev->set_multicast_list = vlan_dev_set_multicast_list;
386 new_dev->change_rx_flags = vlan_change_rx_flags;
387 new_dev->destructor = free_netdev;
388 new_dev->do_ioctl = vlan_dev_ioctl;
389
390 memset(new_dev->broadcast, 0, ETH_ALEN);
391} 179}
392 180
393static void vlan_transfer_operstate(const struct net_device *dev, struct net_device *vlandev) 181static void vlan_transfer_operstate(const struct net_device *dev,
182 struct net_device *vlandev)
394{ 183{
395 /* Have to respect userspace enforced dormant state 184 /* Have to respect userspace enforced dormant state
396 * of real device, also must allow supplicant running 185 * of real device, also must allow supplicant running
@@ -412,23 +201,22 @@ static void vlan_transfer_operstate(const struct net_device *dev, struct net_dev
412 201
413int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id) 202int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id)
414{ 203{
204 char *name = real_dev->name;
205
415 if (real_dev->features & NETIF_F_VLAN_CHALLENGED) { 206 if (real_dev->features & NETIF_F_VLAN_CHALLENGED) {
416 printk(VLAN_DBG "%s: VLANs not supported on %s.\n", 207 pr_info("8021q: VLANs not supported on %s\n", name);
417 __FUNCTION__, real_dev->name);
418 return -EOPNOTSUPP; 208 return -EOPNOTSUPP;
419 } 209 }
420 210
421 if ((real_dev->features & NETIF_F_HW_VLAN_RX) && 211 if ((real_dev->features & NETIF_F_HW_VLAN_RX) &&
422 !real_dev->vlan_rx_register) { 212 !real_dev->vlan_rx_register) {
423 printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n", 213 pr_info("8021q: device %s has buggy VLAN hw accel\n", name);
424 __FUNCTION__, real_dev->name);
425 return -EOPNOTSUPP; 214 return -EOPNOTSUPP;
426 } 215 }
427 216
428 if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) && 217 if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) &&
429 (!real_dev->vlan_rx_add_vid || !real_dev->vlan_rx_kill_vid)) { 218 (!real_dev->vlan_rx_add_vid || !real_dev->vlan_rx_kill_vid)) {
430 printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n", 219 pr_info("8021q: Device %s has buggy VLAN hw accel\n", name);
431 __FUNCTION__, real_dev->name);
432 return -EOPNOTSUPP; 220 return -EOPNOTSUPP;
433 } 221 }
434 222
@@ -438,18 +226,15 @@ int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id)
438 if (!(real_dev->flags & IFF_UP)) 226 if (!(real_dev->flags & IFF_UP))
439 return -ENETDOWN; 227 return -ENETDOWN;
440 228
441 if (__find_vlan_dev(real_dev, vlan_id) != NULL) { 229 if (__find_vlan_dev(real_dev, vlan_id) != NULL)
442 /* was already registered. */
443 printk(VLAN_DBG "%s: ALREADY had VLAN registered\n", __FUNCTION__);
444 return -EEXIST; 230 return -EEXIST;
445 }
446 231
447 return 0; 232 return 0;
448} 233}
449 234
450int register_vlan_dev(struct net_device *dev) 235int register_vlan_dev(struct net_device *dev)
451{ 236{
452 struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); 237 struct vlan_dev_info *vlan = vlan_dev_info(dev);
453 struct net_device *real_dev = vlan->real_dev; 238 struct net_device *real_dev = vlan->real_dev;
454 unsigned short vlan_id = vlan->vlan_id; 239 unsigned short vlan_id = vlan->vlan_id;
455 struct vlan_group *grp, *ngrp = NULL; 240 struct vlan_group *grp, *ngrp = NULL;
@@ -476,14 +261,16 @@ int register_vlan_dev(struct net_device *dev)
476 * it into our local structure. 261 * it into our local structure.
477 */ 262 */
478 vlan_group_set_device(grp, vlan_id, dev); 263 vlan_group_set_device(grp, vlan_id, dev);
264 grp->nr_vlans++;
265
479 if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX) 266 if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX)
480 real_dev->vlan_rx_register(real_dev, ngrp); 267 real_dev->vlan_rx_register(real_dev, ngrp);
481 if (real_dev->features & NETIF_F_HW_VLAN_FILTER) 268 if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
482 real_dev->vlan_rx_add_vid(real_dev, vlan_id); 269 real_dev->vlan_rx_add_vid(real_dev, vlan_id);
483 270
484 if (vlan_proc_add_dev(dev) < 0) 271 if (vlan_proc_add_dev(dev) < 0)
485 printk(KERN_WARNING "VLAN: failed to add proc entry for %s\n", 272 pr_warning("8021q: failed to add proc entry for %s\n",
486 dev->name); 273 dev->name);
487 return 0; 274 return 0;
488 275
489out_free_group: 276out_free_group:
@@ -502,11 +289,6 @@ static int register_vlan_device(struct net_device *real_dev,
502 char name[IFNAMSIZ]; 289 char name[IFNAMSIZ];
503 int err; 290 int err;
504 291
505#ifdef VLAN_DEBUG
506 printk(VLAN_DBG "%s: if_name -:%s:- vid: %i\n",
507 __FUNCTION__, eth_IF_name, VLAN_ID);
508#endif
509
510 if (VLAN_ID >= VLAN_VID_MASK) 292 if (VLAN_ID >= VLAN_VID_MASK)
511 return -ERANGE; 293 return -ERANGE;
512 294
@@ -515,10 +297,6 @@ static int register_vlan_device(struct net_device *real_dev,
515 return err; 297 return err;
516 298
517 /* Gotta set up the fields for the device. */ 299 /* Gotta set up the fields for the device. */
518#ifdef VLAN_DEBUG
519 printk(VLAN_DBG "About to allocate name, vlan_name_type: %i\n",
520 vlan_name_type);
521#endif
522 switch (vlan_name_type) { 300 switch (vlan_name_type) {
523 case VLAN_NAME_TYPE_RAW_PLUS_VID: 301 case VLAN_NAME_TYPE_RAW_PLUS_VID:
524 /* name will look like: eth1.0005 */ 302 /* name will look like: eth1.0005 */
@@ -555,26 +333,16 @@ static int register_vlan_device(struct net_device *real_dev,
555 */ 333 */
556 new_dev->mtu = real_dev->mtu; 334 new_dev->mtu = real_dev->mtu;
557 335
558#ifdef VLAN_DEBUG 336 vlan_dev_info(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */
559 printk(VLAN_DBG "Allocated new name -:%s:-\n", new_dev->name); 337 vlan_dev_info(new_dev)->real_dev = real_dev;
560 VLAN_MEM_DBG("new_dev->priv malloc, addr: %p size: %i\n", 338 vlan_dev_info(new_dev)->dent = NULL;
561 new_dev->priv, 339 vlan_dev_info(new_dev)->flags = VLAN_FLAG_REORDER_HDR;
562 sizeof(struct vlan_dev_info));
563#endif
564
565 VLAN_DEV_INFO(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */
566 VLAN_DEV_INFO(new_dev)->real_dev = real_dev;
567 VLAN_DEV_INFO(new_dev)->dent = NULL;
568 VLAN_DEV_INFO(new_dev)->flags = VLAN_FLAG_REORDER_HDR;
569 340
570 new_dev->rtnl_link_ops = &vlan_link_ops; 341 new_dev->rtnl_link_ops = &vlan_link_ops;
571 err = register_vlan_dev(new_dev); 342 err = register_vlan_dev(new_dev);
572 if (err < 0) 343 if (err < 0)
573 goto out_free_newdev; 344 goto out_free_newdev;
574 345
575#ifdef VLAN_DEBUG
576 printk(VLAN_DBG "Allocated new device successfully, returning.\n");
577#endif
578 return 0; 346 return 0;
579 347
580out_free_newdev: 348out_free_newdev:
@@ -585,7 +353,7 @@ out_free_newdev:
585static void vlan_sync_address(struct net_device *dev, 353static void vlan_sync_address(struct net_device *dev,
586 struct net_device *vlandev) 354 struct net_device *vlandev)
587{ 355{
588 struct vlan_dev_info *vlan = VLAN_DEV_INFO(vlandev); 356 struct vlan_dev_info *vlan = vlan_dev_info(vlandev);
589 357
590 /* May be called without an actual change */ 358 /* May be called without an actual change */
591 if (!compare_ether_addr(vlan->real_dev_addr, dev->dev_addr)) 359 if (!compare_ether_addr(vlan->real_dev_addr, dev->dev_addr))
@@ -606,7 +374,8 @@ static void vlan_sync_address(struct net_device *dev,
606 memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN); 374 memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN);
607} 375}
608 376
609static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) 377static int vlan_device_event(struct notifier_block *unused, unsigned long event,
378 void *ptr)
610{ 379{
611 struct net_device *dev = ptr; 380 struct net_device *dev = ptr;
612 struct vlan_group *grp = __vlan_find_group(dev->ifindex); 381 struct vlan_group *grp = __vlan_find_group(dev->ifindex);
@@ -683,20 +452,16 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
683 case NETDEV_UNREGISTER: 452 case NETDEV_UNREGISTER:
684 /* Delete all VLANs for this dev. */ 453 /* Delete all VLANs for this dev. */
685 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { 454 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
686 int ret;
687
688 vlandev = vlan_group_get_device(grp, i); 455 vlandev = vlan_group_get_device(grp, i);
689 if (!vlandev) 456 if (!vlandev)
690 continue; 457 continue;
691 458
692 ret = unregister_vlan_dev(dev, 459 /* unregistration of last vlan destroys group, abort
693 VLAN_DEV_INFO(vlandev)->vlan_id); 460 * afterwards */
461 if (grp->nr_vlans == 1)
462 i = VLAN_GROUP_ARRAY_LEN;
694 463
695 unregister_netdevice(vlandev); 464 unregister_vlan_dev(vlandev);
696
697 /* Group was destroyed? */
698 if (ret == 1)
699 break;
700 } 465 }
701 break; 466 break;
702 } 467 }
@@ -705,6 +470,10 @@ out:
705 return NOTIFY_DONE; 470 return NOTIFY_DONE;
706} 471}
707 472
473static struct notifier_block vlan_notifier_block __read_mostly = {
474 .notifier_call = vlan_device_event,
475};
476
708/* 477/*
709 * VLAN IOCTL handler. 478 * VLAN IOCTL handler.
710 * o execute requested action or pass command to the device driver 479 * o execute requested action or pass command to the device driver
@@ -724,10 +493,6 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
724 args.device1[23] = 0; 493 args.device1[23] = 0;
725 args.u.device2[23] = 0; 494 args.u.device2[23] = 0;
726 495
727#ifdef VLAN_DEBUG
728 printk(VLAN_DBG "%s: args.cmd: %x\n", __FUNCTION__, args.cmd);
729#endif
730
731 rtnl_lock(); 496 rtnl_lock();
732 497
733 switch (args.cmd) { 498 switch (args.cmd) {
@@ -802,36 +567,16 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
802 err = -EPERM; 567 err = -EPERM;
803 if (!capable(CAP_NET_ADMIN)) 568 if (!capable(CAP_NET_ADMIN))
804 break; 569 break;
805 err = unregister_vlan_device(dev); 570 unregister_vlan_dev(dev);
571 err = 0;
806 break; 572 break;
807 573
808 case GET_VLAN_INGRESS_PRIORITY_CMD:
809 /* TODO: Implement
810 err = vlan_dev_get_ingress_priority(args);
811 if (copy_to_user((void*)arg, &args,
812 sizeof(struct vlan_ioctl_args))) {
813 err = -EFAULT;
814 }
815 */
816 err = -EINVAL;
817 break;
818 case GET_VLAN_EGRESS_PRIORITY_CMD:
819 /* TODO: Implement
820 err = vlan_dev_get_egress_priority(args.device1, &(args.args);
821 if (copy_to_user((void*)arg, &args,
822 sizeof(struct vlan_ioctl_args))) {
823 err = -EFAULT;
824 }
825 */
826 err = -EINVAL;
827 break;
828 case GET_VLAN_REALDEV_NAME_CMD: 574 case GET_VLAN_REALDEV_NAME_CMD:
829 err = 0; 575 err = 0;
830 vlan_dev_get_realdev_name(dev, args.u.device2); 576 vlan_dev_get_realdev_name(dev, args.u.device2);
831 if (copy_to_user(arg, &args, 577 if (copy_to_user(arg, &args,
832 sizeof(struct vlan_ioctl_args))) { 578 sizeof(struct vlan_ioctl_args)))
833 err = -EFAULT; 579 err = -EFAULT;
834 }
835 break; 580 break;
836 581
837 case GET_VLAN_VID_CMD: 582 case GET_VLAN_VID_CMD:
@@ -839,16 +584,12 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
839 vlan_dev_get_vid(dev, &vid); 584 vlan_dev_get_vid(dev, &vid);
840 args.u.VID = vid; 585 args.u.VID = vid;
841 if (copy_to_user(arg, &args, 586 if (copy_to_user(arg, &args,
842 sizeof(struct vlan_ioctl_args))) { 587 sizeof(struct vlan_ioctl_args)))
843 err = -EFAULT; 588 err = -EFAULT;
844 }
845 break; 589 break;
846 590
847 default: 591 default:
848 /* pass on to underlying device instead?? */ 592 err = -EOPNOTSUPP;
849 printk(VLAN_DBG "%s: Unknown VLAN CMD: %x \n",
850 __FUNCTION__, args.cmd);
851 err = -EINVAL;
852 break; 593 break;
853 } 594 }
854out: 595out:
@@ -856,5 +597,59 @@ out:
856 return err; 597 return err;
857} 598}
858 599
600static int __init vlan_proto_init(void)
601{
602 int err;
603
604 pr_info("%s v%s %s\n", vlan_fullname, vlan_version, vlan_copyright);
605 pr_info("All bugs added by %s\n", vlan_buggyright);
606
607 err = vlan_proc_init();
608 if (err < 0)
609 goto err1;
610
611 err = register_netdevice_notifier(&vlan_notifier_block);
612 if (err < 0)
613 goto err2;
614
615 err = vlan_netlink_init();
616 if (err < 0)
617 goto err3;
618
619 dev_add_pack(&vlan_packet_type);
620 vlan_ioctl_set(vlan_ioctl_handler);
621 return 0;
622
623err3:
624 unregister_netdevice_notifier(&vlan_notifier_block);
625err2:
626 vlan_proc_cleanup();
627err1:
628 return err;
629}
630
631static void __exit vlan_cleanup_module(void)
632{
633 unsigned int i;
634
635 vlan_ioctl_set(NULL);
636 vlan_netlink_fini();
637
638 unregister_netdevice_notifier(&vlan_notifier_block);
639
640 dev_remove_pack(&vlan_packet_type);
641
642 /* This table must be empty if there are no module references left. */
643 for (i = 0; i < VLAN_GRP_HASH_SIZE; i++)
644 BUG_ON(!hlist_empty(&vlan_group_hash[i]));
645
646 vlan_proc_cleanup();
647
648 synchronize_net();
649}
650
651module_init(vlan_proto_init);
652module_exit(vlan_cleanup_module);
653
859MODULE_LICENSE("GPL"); 654MODULE_LICENSE("GPL");
860MODULE_VERSION(DRV_VERSION); 655MODULE_VERSION(DRV_VERSION);
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 2cd1393073ec..73efcc715ccb 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -3,31 +3,6 @@
3 3
4#include <linux/if_vlan.h> 4#include <linux/if_vlan.h>
5 5
6/* Uncomment this if you want debug traces to be shown. */
7/* #define VLAN_DEBUG */
8
9#define VLAN_ERR KERN_ERR
10#define VLAN_INF KERN_INFO
11#define VLAN_DBG KERN_ALERT /* change these... to debug, having a hard time
12 * changing the log level at run-time..for some reason.
13 */
14
15/*
16
17These I use for memory debugging. I feared a leak at one time, but
18I never found it..and the problem seems to have dissappeared. Still,
19I'll bet they might prove useful again... --Ben
20
21
22#define VLAN_MEM_DBG(x, y, z) printk(VLAN_DBG "%s: " x, __FUNCTION__, y, z);
23#define VLAN_FMEM_DBG(x, y) printk(VLAN_DBG "%s: " x, __FUNCTION__, y);
24*/
25
26/* This way they don't do anything! */
27#define VLAN_MEM_DBG(x, y, z)
28#define VLAN_FMEM_DBG(x, y)
29
30
31extern unsigned short vlan_name_type; 6extern unsigned short vlan_name_type;
32 7
33#define VLAN_GRP_HASH_SHIFT 5 8#define VLAN_GRP_HASH_SHIFT 5
@@ -45,23 +20,12 @@ extern unsigned short vlan_name_type;
45 * Must be invoked with rcu_read_lock (ie preempt disabled) 20 * Must be invoked with rcu_read_lock (ie preempt disabled)
46 * or with RTNL. 21 * or with RTNL.
47 */ 22 */
48struct net_device *__find_vlan_dev(struct net_device* real_dev, 23struct net_device *__find_vlan_dev(struct net_device *real_dev,
49 unsigned short VID); /* vlan.c */ 24 unsigned short VID); /* vlan.c */
50 25
51/* found in vlan_dev.c */ 26/* found in vlan_dev.c */
52int vlan_dev_rebuild_header(struct sk_buff *skb);
53int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, 27int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
54 struct packet_type *ptype, struct net_device *orig_dev); 28 struct packet_type *ptype, struct net_device *orig_dev);
55int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
56 unsigned short type, const void *daddr,
57 const void *saddr, unsigned len);
58int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev);
59int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev);
60int vlan_dev_change_mtu(struct net_device *dev, int new_mtu);
61int vlan_dev_open(struct net_device* dev);
62int vlan_dev_stop(struct net_device* dev);
63int vlan_set_mac_address(struct net_device *dev, void *p);
64int vlan_dev_ioctl(struct net_device* dev, struct ifreq *ifr, int cmd);
65void vlan_dev_set_ingress_priority(const struct net_device *dev, 29void vlan_dev_set_ingress_priority(const struct net_device *dev,
66 u32 skb_prio, short vlan_prio); 30 u32 skb_prio, short vlan_prio);
67int vlan_dev_set_egress_priority(const struct net_device *dev, 31int vlan_dev_set_egress_priority(const struct net_device *dev,
@@ -70,13 +34,11 @@ int vlan_dev_set_vlan_flag(const struct net_device *dev,
70 u32 flag, short flag_val); 34 u32 flag, short flag_val);
71void vlan_dev_get_realdev_name(const struct net_device *dev, char *result); 35void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
72void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result); 36void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result);
73void vlan_change_rx_flags(struct net_device *dev, int change);
74void vlan_dev_set_multicast_list(struct net_device *vlan_dev);
75 37
76int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id); 38int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id);
77void vlan_setup(struct net_device *dev); 39void vlan_setup(struct net_device *dev);
78int register_vlan_dev(struct net_device *dev); 40int register_vlan_dev(struct net_device *dev);
79int unregister_vlan_device(struct net_device *dev); 41void unregister_vlan_dev(struct net_device *dev);
80 42
81int vlan_netlink_init(void); 43int vlan_netlink_init(void);
82void vlan_netlink_fini(void); 44void vlan_netlink_fini(void);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 4f99bb86af5c..8059fa42b085 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -3,7 +3,7 @@
3 * Ethernet-type device handling. 3 * Ethernet-type device handling.
4 * 4 *
5 * Authors: Ben Greear <greearb@candelatech.com> 5 * Authors: Ben Greear <greearb@candelatech.com>
6 * Please send support related email to: vlan@scry.wanfear.com 6 * Please send support related email to: netdev@vger.kernel.org
7 * VLAN Home Page: http://www.candelatech.com/~greear/vlan.html 7 * VLAN Home Page: http://www.candelatech.com/~greear/vlan.html
8 * 8 *
9 * Fixes: Mar 22 2001: Martin Bokaemper <mbokaemper@unispherenetworks.com> 9 * Fixes: Mar 22 2001: Martin Bokaemper <mbokaemper@unispherenetworks.com>
@@ -47,7 +47,7 @@
47 * 47 *
48 * TODO: This needs a checkup, I'm ignorant here. --BLG 48 * TODO: This needs a checkup, I'm ignorant here. --BLG
49 */ 49 */
50int vlan_dev_rebuild_header(struct sk_buff *skb) 50static int vlan_dev_rebuild_header(struct sk_buff *skb)
51{ 51{
52 struct net_device *dev = skb->dev; 52 struct net_device *dev = skb->dev;
53 struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); 53 struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
@@ -60,9 +60,8 @@ int vlan_dev_rebuild_header(struct sk_buff *skb)
60 return arp_find(veth->h_dest, skb); 60 return arp_find(veth->h_dest, skb);
61#endif 61#endif
62 default: 62 default:
63 printk(VLAN_DBG 63 pr_debug("%s: unable to resolve type %X addresses.\n",
64 "%s: unable to resolve type %X addresses.\n", 64 dev->name, ntohs(veth->h_vlan_encapsulated_proto));
65 dev->name, ntohs(veth->h_vlan_encapsulated_proto));
66 65
67 memcpy(veth->h_source, dev->dev_addr, ETH_ALEN); 66 memcpy(veth->h_source, dev->dev_addr, ETH_ALEN);
68 break; 67 break;
@@ -73,7 +72,7 @@ int vlan_dev_rebuild_header(struct sk_buff *skb)
73 72
74static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb) 73static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
75{ 74{
76 if (VLAN_DEV_INFO(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) { 75 if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
77 if (skb_shared(skb) || skb_cloned(skb)) { 76 if (skb_shared(skb) || skb_cloned(skb)) {
78 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); 77 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
79 kfree_skb(skb); 78 kfree_skb(skb);
@@ -90,6 +89,40 @@ static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
90 return skb; 89 return skb;
91} 90}
92 91
92static inline void vlan_set_encap_proto(struct sk_buff *skb,
93 struct vlan_hdr *vhdr)
94{
95 __be16 proto;
96 unsigned char *rawp;
97
98 /*
99 * Was a VLAN packet, grab the encapsulated protocol, which the layer
100 * three protocols care about.
101 */
102
103 proto = vhdr->h_vlan_encapsulated_proto;
104 if (ntohs(proto) >= 1536) {
105 skb->protocol = proto;
106 return;
107 }
108
109 rawp = skb->data;
110 if (*(unsigned short *)rawp == 0xFFFF)
111 /*
112 * This is a magic hack to spot IPX packets. Older Novell
113 * breaks the protocol design and runs IPX over 802.3 without
114 * an 802.2 LLC layer. We look for FFFF which isn't a used
115 * 802.2 SSAP/DSAP. This won't work for fault tolerant netware
116 * but does for the rest.
117 */
118 skb->protocol = htons(ETH_P_802_3);
119 else
120 /*
121 * Real 802.2 LLC
122 */
123 skb->protocol = htons(ETH_P_802_2);
124}
125
93/* 126/*
94 * Determine the packet's protocol ID. The rule here is that we 127 * Determine the packet's protocol ID. The rule here is that we
95 * assume 802.3 if the type field is short enough to be a length. 128 * assume 802.3 if the type field is short enough to be a length.
@@ -107,115 +140,58 @@ static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
107 * SANITY NOTE 2: We are referencing to the VLAN_HDR frields, which MAY be 140 * SANITY NOTE 2: We are referencing to the VLAN_HDR frields, which MAY be
108 * stored UNALIGNED in the memory. RISC systems don't like 141 * stored UNALIGNED in the memory. RISC systems don't like
109 * such cases very much... 142 * such cases very much...
110 * SANITY NOTE 2a: According to Dave Miller & Alexey, it will always be aligned, 143 * SANITY NOTE 2a: According to Dave Miller & Alexey, it will always be
111 * so there doesn't need to be any of the unaligned stuff. It has 144 * aligned, so there doesn't need to be any of the unaligned
112 * been commented out now... --Ben 145 * stuff. It has been commented out now... --Ben
113 * 146 *
114 */ 147 */
115int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, 148int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
116 struct packet_type* ptype, struct net_device *orig_dev) 149 struct packet_type *ptype, struct net_device *orig_dev)
117{ 150{
118 unsigned char *rawp = NULL;
119 struct vlan_hdr *vhdr; 151 struct vlan_hdr *vhdr;
120 unsigned short vid; 152 unsigned short vid;
121 struct net_device_stats *stats; 153 struct net_device_stats *stats;
122 unsigned short vlan_TCI; 154 unsigned short vlan_TCI;
123 __be16 proto;
124 155
125 if (dev->nd_net != &init_net) { 156 if (dev->nd_net != &init_net)
126 kfree_skb(skb); 157 goto err_free;
127 return -1;
128 }
129 158
130 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) 159 skb = skb_share_check(skb, GFP_ATOMIC);
131 return -1; 160 if (skb == NULL)
161 goto err_free;
132 162
133 if (unlikely(!pskb_may_pull(skb, VLAN_HLEN))) { 163 if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
134 kfree_skb(skb); 164 goto err_free;
135 return -1;
136 }
137
138 vhdr = (struct vlan_hdr *)(skb->data);
139 165
140 /* vlan_TCI = ntohs(get_unaligned(&vhdr->h_vlan_TCI)); */ 166 vhdr = (struct vlan_hdr *)skb->data;
141 vlan_TCI = ntohs(vhdr->h_vlan_TCI); 167 vlan_TCI = ntohs(vhdr->h_vlan_TCI);
142
143 vid = (vlan_TCI & VLAN_VID_MASK); 168 vid = (vlan_TCI & VLAN_VID_MASK);
144 169
145#ifdef VLAN_DEBUG
146 printk(VLAN_DBG "%s: skb: %p vlan_id: %hx\n",
147 __FUNCTION__, skb, vid);
148#endif
149
150 /* Ok, we will find the correct VLAN device, strip the header,
151 * and then go on as usual.
152 */
153
154 /* We have 12 bits of vlan ID.
155 *
156 * We must not drop allow preempt until we hold a
157 * reference to the device (netif_rx does that) or we
158 * fail.
159 */
160
161 rcu_read_lock(); 170 rcu_read_lock();
162 skb->dev = __find_vlan_dev(dev, vid); 171 skb->dev = __find_vlan_dev(dev, vid);
163 if (!skb->dev) { 172 if (!skb->dev) {
164 rcu_read_unlock(); 173 pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
165 174 __FUNCTION__, (unsigned int)vid, dev->name);
166#ifdef VLAN_DEBUG 175 goto err_unlock;
167 printk(VLAN_DBG "%s: ERROR: No net_device for VID: %i on dev: %s [%i]\n",
168 __FUNCTION__, (unsigned int)(vid), dev->name, dev->ifindex);
169#endif
170 kfree_skb(skb);
171 return -1;
172 } 176 }
173 177
174 skb->dev->last_rx = jiffies; 178 skb->dev->last_rx = jiffies;
175 179
176 /* Bump the rx counters for the VLAN device. */ 180 stats = &skb->dev->stats;
177 stats = vlan_dev_get_stats(skb->dev);
178 stats->rx_packets++; 181 stats->rx_packets++;
179 stats->rx_bytes += skb->len; 182 stats->rx_bytes += skb->len;
180 183
181 /* Take off the VLAN header (4 bytes currently) */
182 skb_pull_rcsum(skb, VLAN_HLEN); 184 skb_pull_rcsum(skb, VLAN_HLEN);
183 185
184 /* Ok, lets check to make sure the device (dev) we 186 skb->priority = vlan_get_ingress_priority(skb->dev,
185 * came in on is what this VLAN is attached to. 187 ntohs(vhdr->h_vlan_TCI));
186 */
187
188 if (dev != VLAN_DEV_INFO(skb->dev)->real_dev) {
189 rcu_read_unlock();
190
191#ifdef VLAN_DEBUG
192 printk(VLAN_DBG "%s: dropping skb: %p because came in on wrong device, dev: %s real_dev: %s, skb_dev: %s\n",
193 __FUNCTION__, skb, dev->name,
194 VLAN_DEV_INFO(skb->dev)->real_dev->name,
195 skb->dev->name);
196#endif
197 kfree_skb(skb);
198 stats->rx_errors++;
199 return -1;
200 }
201
202 /*
203 * Deal with ingress priority mapping.
204 */
205 skb->priority = vlan_get_ingress_priority(skb->dev, ntohs(vhdr->h_vlan_TCI));
206 188
207#ifdef VLAN_DEBUG 189 pr_debug("%s: priority: %u for TCI: %hu\n",
208 printk(VLAN_DBG "%s: priority: %lu for TCI: %hu (hbo)\n", 190 __FUNCTION__, skb->priority, ntohs(vhdr->h_vlan_TCI));
209 __FUNCTION__, (unsigned long)(skb->priority),
210 ntohs(vhdr->h_vlan_TCI));
211#endif
212 191
213 /* The ethernet driver already did the pkt_type calculations
214 * for us...
215 */
216 switch (skb->pkt_type) { 192 switch (skb->pkt_type) {
217 case PACKET_BROADCAST: /* Yeah, stats collect these together.. */ 193 case PACKET_BROADCAST: /* Yeah, stats collect these together.. */
218 // stats->broadcast ++; // no such counter :-( 194 /* stats->broadcast ++; // no such counter :-( */
219 break; 195 break;
220 196
221 case PACKET_MULTICAST: 197 case PACKET_MULTICAST:
@@ -224,109 +200,47 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
224 200
225 case PACKET_OTHERHOST: 201 case PACKET_OTHERHOST:
226 /* Our lower layer thinks this is not local, let's make sure. 202 /* Our lower layer thinks this is not local, let's make sure.
227 * This allows the VLAN to have a different MAC than the underlying 203 * This allows the VLAN to have a different MAC than the
228 * device, and still route correctly. 204 * underlying device, and still route correctly.
229 */ 205 */
230 if (!compare_ether_addr(eth_hdr(skb)->h_dest, skb->dev->dev_addr)) { 206 if (!compare_ether_addr(eth_hdr(skb)->h_dest,
231 /* It is for our (changed) MAC-address! */ 207 skb->dev->dev_addr))
232 skb->pkt_type = PACKET_HOST; 208 skb->pkt_type = PACKET_HOST;
233 }
234 break; 209 break;
235 default: 210 default:
236 break; 211 break;
237 } 212 }
238 213
239 /* Was a VLAN packet, grab the encapsulated protocol, which the layer 214 vlan_set_encap_proto(skb, vhdr);
240 * three protocols care about.
241 */
242 /* proto = get_unaligned(&vhdr->h_vlan_encapsulated_proto); */
243 proto = vhdr->h_vlan_encapsulated_proto;
244
245 skb->protocol = proto;
246 if (ntohs(proto) >= 1536) {
247 /* place it back on the queue to be handled by
248 * true layer 3 protocols.
249 */
250
251 /* See if we are configured to re-write the VLAN header
252 * to make it look like ethernet...
253 */
254 skb = vlan_check_reorder_header(skb);
255
256 /* Can be null if skb-clone fails when re-ordering */
257 if (skb) {
258 netif_rx(skb);
259 } else {
260 /* TODO: Add a more specific counter here. */
261 stats->rx_errors++;
262 }
263 rcu_read_unlock();
264 return 0;
265 }
266
267 rawp = skb->data;
268
269 /*
270 * This is a magic hack to spot IPX packets. Older Novell breaks
271 * the protocol design and runs IPX over 802.3 without an 802.2 LLC
272 * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
273 * won't work for fault tolerant netware but does for the rest.
274 */
275 if (*(unsigned short *)rawp == 0xFFFF) {
276 skb->protocol = htons(ETH_P_802_3);
277 /* place it back on the queue to be handled by true layer 3 protocols.
278 */
279
280 /* See if we are configured to re-write the VLAN header
281 * to make it look like ethernet...
282 */
283 skb = vlan_check_reorder_header(skb);
284 215
285 /* Can be null if skb-clone fails when re-ordering */
286 if (skb) {
287 netif_rx(skb);
288 } else {
289 /* TODO: Add a more specific counter here. */
290 stats->rx_errors++;
291 }
292 rcu_read_unlock();
293 return 0;
294 }
295
296 /*
297 * Real 802.2 LLC
298 */
299 skb->protocol = htons(ETH_P_802_2);
300 /* place it back on the queue to be handled by upper layer protocols.
301 */
302
303 /* See if we are configured to re-write the VLAN header
304 * to make it look like ethernet...
305 */
306 skb = vlan_check_reorder_header(skb); 216 skb = vlan_check_reorder_header(skb);
307 217 if (!skb) {
308 /* Can be null if skb-clone fails when re-ordering */
309 if (skb) {
310 netif_rx(skb);
311 } else {
312 /* TODO: Add a more specific counter here. */
313 stats->rx_errors++; 218 stats->rx_errors++;
219 goto err_unlock;
314 } 220 }
221
222 netif_rx(skb);
315 rcu_read_unlock(); 223 rcu_read_unlock();
316 return 0; 224 return NET_RX_SUCCESS;
225
226err_unlock:
227 rcu_read_unlock();
228err_free:
229 kfree_skb(skb);
230 return NET_RX_DROP;
317} 231}
318 232
319static inline unsigned short vlan_dev_get_egress_qos_mask(struct net_device* dev, 233static inline unsigned short
320 struct sk_buff* skb) 234vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb)
321{ 235{
322 struct vlan_priority_tci_mapping *mp = 236 struct vlan_priority_tci_mapping *mp;
323 VLAN_DEV_INFO(dev)->egress_priority_map[(skb->priority & 0xF)];
324 237
238 mp = vlan_dev_info(dev)->egress_priority_map[(skb->priority & 0xF)];
325 while (mp) { 239 while (mp) {
326 if (mp->priority == skb->priority) { 240 if (mp->priority == skb->priority) {
327 return mp->vlan_qos; /* This should already be shifted to mask 241 return mp->vlan_qos; /* This should already be shifted
328 * correctly with the VLAN's TCI 242 * to mask correctly with the
329 */ 243 * VLAN's TCI */
330 } 244 }
331 mp = mp->next; 245 mp = mp->next;
332 } 246 }
@@ -342,20 +256,20 @@ static inline unsigned short vlan_dev_get_egress_qos_mask(struct net_device* dev
342 * This is called when the SKB is moving down the stack towards the 256 * This is called when the SKB is moving down the stack towards the
343 * physical devices. 257 * physical devices.
344 */ 258 */
345int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, 259static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
346 unsigned short type, 260 unsigned short type,
347 const void *daddr, const void *saddr, unsigned len) 261 const void *daddr, const void *saddr,
262 unsigned int len)
348{ 263{
349 struct vlan_hdr *vhdr; 264 struct vlan_hdr *vhdr;
350 unsigned short veth_TCI = 0; 265 unsigned short veth_TCI = 0;
351 int rc = 0; 266 int rc = 0;
352 int build_vlan_header = 0; 267 int build_vlan_header = 0;
353 struct net_device *vdev = dev; /* save this for the bottom of the method */ 268 struct net_device *vdev = dev;
354 269
355#ifdef VLAN_DEBUG 270 pr_debug("%s: skb: %p type: %hx len: %u vlan_id: %hx, daddr: %p\n",
356 printk(VLAN_DBG "%s: skb: %p type: %hx len: %x vlan_id: %hx, daddr: %p\n", 271 __FUNCTION__, skb, type, len, vlan_dev_info(dev)->vlan_id,
357 __FUNCTION__, skb, type, len, VLAN_DEV_INFO(dev)->vlan_id, daddr); 272 daddr);
358#endif
359 273
360 /* build vlan header only if re_order_header flag is NOT set. This 274 /* build vlan header only if re_order_header flag is NOT set. This
361 * fixes some programs that get confused when they see a VLAN device 275 * fixes some programs that get confused when they see a VLAN device
@@ -365,7 +279,7 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
365 * header shuffling in the hard_start_xmit. Users can turn off this 279 * header shuffling in the hard_start_xmit. Users can turn off this
366 * REORDER behaviour with the vconfig tool. 280 * REORDER behaviour with the vconfig tool.
367 */ 281 */
368 if (!(VLAN_DEV_INFO(dev)->flags & VLAN_FLAG_REORDER_HDR)) 282 if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR))
369 build_vlan_header = 1; 283 build_vlan_header = 1;
370 284
371 if (build_vlan_header) { 285 if (build_vlan_header) {
@@ -373,29 +287,28 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
373 287
374 /* build the four bytes that make this a VLAN header. */ 288 /* build the four bytes that make this a VLAN header. */
375 289
376 /* Now, construct the second two bytes. This field looks something 290 /* Now, construct the second two bytes. This field looks
377 * like: 291 * something like:
378 * usr_priority: 3 bits (high bits) 292 * usr_priority: 3 bits (high bits)
379 * CFI 1 bit 293 * CFI 1 bit
380 * VLAN ID 12 bits (low bits) 294 * VLAN ID 12 bits (low bits)
381 * 295 *
382 */ 296 */
383 veth_TCI = VLAN_DEV_INFO(dev)->vlan_id; 297 veth_TCI = vlan_dev_info(dev)->vlan_id;
384 veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb); 298 veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb);
385 299
386 vhdr->h_vlan_TCI = htons(veth_TCI); 300 vhdr->h_vlan_TCI = htons(veth_TCI);
387 301
388 /* 302 /*
389 * Set the protocol type. 303 * Set the protocol type. For a packet of type ETH_P_802_3 we
390 * For a packet of type ETH_P_802_3 we put the length in here instead. 304 * put the length in here instead. It is up to the 802.2
391 * It is up to the 802.2 layer to carry protocol information. 305 * layer to carry protocol information.
392 */ 306 */
393 307
394 if (type != ETH_P_802_3) { 308 if (type != ETH_P_802_3)
395 vhdr->h_vlan_encapsulated_proto = htons(type); 309 vhdr->h_vlan_encapsulated_proto = htons(type);
396 } else { 310 else
397 vhdr->h_vlan_encapsulated_proto = htons(len); 311 vhdr->h_vlan_encapsulated_proto = htons(len);
398 }
399 312
400 skb->protocol = htons(ETH_P_8021Q); 313 skb->protocol = htons(ETH_P_8021Q);
401 skb_reset_network_header(skb); 314 skb_reset_network_header(skb);
@@ -405,16 +318,16 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
405 if (saddr == NULL) 318 if (saddr == NULL)
406 saddr = dev->dev_addr; 319 saddr = dev->dev_addr;
407 320
408 dev = VLAN_DEV_INFO(dev)->real_dev; 321 dev = vlan_dev_info(dev)->real_dev;
409 322
410 /* MPLS can send us skbuffs w/out enough space. This check will grow the 323 /* MPLS can send us skbuffs w/out enough space. This check will grow
411 * skb if it doesn't have enough headroom. Not a beautiful solution, so 324 * the skb if it doesn't have enough headroom. Not a beautiful solution,
412 * I'll tick a counter so that users can know it's happening... If they 325 * so I'll tick a counter so that users can know it's happening...
413 * care... 326 * If they care...
414 */ 327 */
415 328
416 /* NOTE: This may still break if the underlying device is not the final 329 /* NOTE: This may still break if the underlying device is not the final
417 * device (and thus there are more headers to add...) It should work for 330 * device (and thus there are more headers to add...) It should work for
418 * good-ole-ethernet though. 331 * good-ole-ethernet though.
419 */ 332 */
420 if (skb_headroom(skb) < dev->hard_header_len) { 333 if (skb_headroom(skb) < dev->hard_header_len) {
@@ -422,14 +335,12 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
422 skb = skb_realloc_headroom(sk_tmp, dev->hard_header_len); 335 skb = skb_realloc_headroom(sk_tmp, dev->hard_header_len);
423 kfree_skb(sk_tmp); 336 kfree_skb(sk_tmp);
424 if (skb == NULL) { 337 if (skb == NULL) {
425 struct net_device_stats *stats = vlan_dev_get_stats(vdev); 338 struct net_device_stats *stats = &vdev->stats;
426 stats->tx_dropped++; 339 stats->tx_dropped++;
427 return -ENOMEM; 340 return -ENOMEM;
428 } 341 }
429 VLAN_DEV_INFO(vdev)->cnt_inc_headroom_on_tx++; 342 vlan_dev_info(vdev)->cnt_inc_headroom_on_tx++;
430#ifdef VLAN_DEBUG 343 pr_debug("%s: %s: had to grow skb\n", __FUNCTION__, vdev->name);
431 printk(VLAN_DBG "%s: %s: had to grow skb.\n", __FUNCTION__, vdev->name);
432#endif
433 } 344 }
434 345
435 if (build_vlan_header) { 346 if (build_vlan_header) {
@@ -441,19 +352,19 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
441 else if (rc < 0) 352 else if (rc < 0)
442 rc -= VLAN_HLEN; 353 rc -= VLAN_HLEN;
443 } else 354 } else
444 /* If here, then we'll just make a normal looking ethernet frame, 355 /* If here, then we'll just make a normal looking ethernet
445 * but, the hard_start_xmit method will insert the tag (it has to 356 * frame, but, the hard_start_xmit method will insert the tag
446 * be able to do this for bridged and other skbs that don't come 357 * (it has to be able to do this for bridged and other skbs
447 * down the protocol stack in an orderly manner. 358 * that don't come down the protocol stack in an orderly manner.
448 */ 359 */
449 rc = dev_hard_header(skb, dev, type, daddr, saddr, len); 360 rc = dev_hard_header(skb, dev, type, daddr, saddr, len);
450 361
451 return rc; 362 return rc;
452} 363}
453 364
454int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) 365static int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
455{ 366{
456 struct net_device_stats *stats = vlan_dev_get_stats(dev); 367 struct net_device_stats *stats = &dev->stats;
457 struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); 368 struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
458 369
459 /* Handle non-VLAN frames if they are sent to us, for example by DHCP. 370 /* Handle non-VLAN frames if they are sent to us, for example by DHCP.
@@ -463,24 +374,22 @@ int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
463 */ 374 */
464 375
465 if (veth->h_vlan_proto != htons(ETH_P_8021Q) || 376 if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
466 VLAN_DEV_INFO(dev)->flags & VLAN_FLAG_REORDER_HDR) { 377 vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) {
467 int orig_headroom = skb_headroom(skb); 378 int orig_headroom = skb_headroom(skb);
468 unsigned short veth_TCI; 379 unsigned short veth_TCI;
469 380
470 /* This is not a VLAN frame...but we can fix that! */ 381 /* This is not a VLAN frame...but we can fix that! */
471 VLAN_DEV_INFO(dev)->cnt_encap_on_xmit++; 382 vlan_dev_info(dev)->cnt_encap_on_xmit++;
472 383
473#ifdef VLAN_DEBUG 384 pr_debug("%s: proto to encap: 0x%hx\n",
474 printk(VLAN_DBG "%s: proto to encap: 0x%hx (hbo)\n", 385 __FUNCTION__, htons(veth->h_vlan_proto));
475 __FUNCTION__, htons(veth->h_vlan_proto));
476#endif
477 /* Construct the second two bytes. This field looks something 386 /* Construct the second two bytes. This field looks something
478 * like: 387 * like:
479 * usr_priority: 3 bits (high bits) 388 * usr_priority: 3 bits (high bits)
480 * CFI 1 bit 389 * CFI 1 bit
481 * VLAN ID 12 bits (low bits) 390 * VLAN ID 12 bits (low bits)
482 */ 391 */
483 veth_TCI = VLAN_DEV_INFO(dev)->vlan_id; 392 veth_TCI = vlan_dev_info(dev)->vlan_id;
484 veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb); 393 veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb);
485 394
486 skb = __vlan_put_tag(skb, veth_TCI); 395 skb = __vlan_put_tag(skb, veth_TCI);
@@ -489,32 +398,33 @@ int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
489 return 0; 398 return 0;
490 } 399 }
491 400
492 if (orig_headroom < VLAN_HLEN) { 401 if (orig_headroom < VLAN_HLEN)
493 VLAN_DEV_INFO(dev)->cnt_inc_headroom_on_tx++; 402 vlan_dev_info(dev)->cnt_inc_headroom_on_tx++;
494 }
495 } 403 }
496 404
497#ifdef VLAN_DEBUG 405 pr_debug("%s: about to send skb: %p to dev: %s\n",
498 printk(VLAN_DBG "%s: about to send skb: %p to dev: %s\n",
499 __FUNCTION__, skb, skb->dev->name); 406 __FUNCTION__, skb, skb->dev->name);
500 printk(VLAN_DBG " %2hx.%2hx.%2hx.%2xh.%2hx.%2hx %2hx.%2hx.%2hx.%2hx.%2hx.%2hx %4hx %4hx %4hx\n", 407 pr_debug(" " MAC_FMT " " MAC_FMT " %4hx %4hx %4hx\n",
501 veth->h_dest[0], veth->h_dest[1], veth->h_dest[2], veth->h_dest[3], veth->h_dest[4], veth->h_dest[5], 408 veth->h_dest[0], veth->h_dest[1], veth->h_dest[2],
502 veth->h_source[0], veth->h_source[1], veth->h_source[2], veth->h_source[3], veth->h_source[4], veth->h_source[5], 409 veth->h_dest[3], veth->h_dest[4], veth->h_dest[5],
503 veth->h_vlan_proto, veth->h_vlan_TCI, veth->h_vlan_encapsulated_proto); 410 veth->h_source[0], veth->h_source[1], veth->h_source[2],
504#endif 411 veth->h_source[3], veth->h_source[4], veth->h_source[5],
412 veth->h_vlan_proto, veth->h_vlan_TCI,
413 veth->h_vlan_encapsulated_proto);
505 414
506 stats->tx_packets++; /* for statics only */ 415 stats->tx_packets++; /* for statics only */
507 stats->tx_bytes += skb->len; 416 stats->tx_bytes += skb->len;
508 417
509 skb->dev = VLAN_DEV_INFO(dev)->real_dev; 418 skb->dev = vlan_dev_info(dev)->real_dev;
510 dev_queue_xmit(skb); 419 dev_queue_xmit(skb);
511 420
512 return 0; 421 return 0;
513} 422}
514 423
515int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) 424static int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb,
425 struct net_device *dev)
516{ 426{
517 struct net_device_stats *stats = vlan_dev_get_stats(dev); 427 struct net_device_stats *stats = &dev->stats;
518 unsigned short veth_TCI; 428 unsigned short veth_TCI;
519 429
520 /* Construct the second two bytes. This field looks something 430 /* Construct the second two bytes. This field looks something
@@ -523,25 +433,25 @@ int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev
523 * CFI 1 bit 433 * CFI 1 bit
524 * VLAN ID 12 bits (low bits) 434 * VLAN ID 12 bits (low bits)
525 */ 435 */
526 veth_TCI = VLAN_DEV_INFO(dev)->vlan_id; 436 veth_TCI = vlan_dev_info(dev)->vlan_id;
527 veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb); 437 veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb);
528 skb = __vlan_hwaccel_put_tag(skb, veth_TCI); 438 skb = __vlan_hwaccel_put_tag(skb, veth_TCI);
529 439
530 stats->tx_packets++; 440 stats->tx_packets++;
531 stats->tx_bytes += skb->len; 441 stats->tx_bytes += skb->len;
532 442
533 skb->dev = VLAN_DEV_INFO(dev)->real_dev; 443 skb->dev = vlan_dev_info(dev)->real_dev;
534 dev_queue_xmit(skb); 444 dev_queue_xmit(skb);
535 445
536 return 0; 446 return 0;
537} 447}
538 448
539int vlan_dev_change_mtu(struct net_device *dev, int new_mtu) 449static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
540{ 450{
541 /* TODO: gotta make sure the underlying layer can handle it, 451 /* TODO: gotta make sure the underlying layer can handle it,
542 * maybe an IFF_VLAN_CAPABLE flag for devices? 452 * maybe an IFF_VLAN_CAPABLE flag for devices?
543 */ 453 */
544 if (VLAN_DEV_INFO(dev)->real_dev->mtu < new_mtu) 454 if (vlan_dev_info(dev)->real_dev->mtu < new_mtu)
545 return -ERANGE; 455 return -ERANGE;
546 456
547 dev->mtu = new_mtu; 457 dev->mtu = new_mtu;
@@ -552,7 +462,7 @@ int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
552void vlan_dev_set_ingress_priority(const struct net_device *dev, 462void vlan_dev_set_ingress_priority(const struct net_device *dev,
553 u32 skb_prio, short vlan_prio) 463 u32 skb_prio, short vlan_prio)
554{ 464{
555 struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); 465 struct vlan_dev_info *vlan = vlan_dev_info(dev);
556 466
557 if (vlan->ingress_priority_map[vlan_prio & 0x7] && !skb_prio) 467 if (vlan->ingress_priority_map[vlan_prio & 0x7] && !skb_prio)
558 vlan->nr_ingress_mappings--; 468 vlan->nr_ingress_mappings--;
@@ -565,7 +475,7 @@ void vlan_dev_set_ingress_priority(const struct net_device *dev,
565int vlan_dev_set_egress_priority(const struct net_device *dev, 475int vlan_dev_set_egress_priority(const struct net_device *dev,
566 u32 skb_prio, short vlan_prio) 476 u32 skb_prio, short vlan_prio)
567{ 477{
568 struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); 478 struct vlan_dev_info *vlan = vlan_dev_info(dev);
569 struct vlan_priority_tci_mapping *mp = NULL; 479 struct vlan_priority_tci_mapping *mp = NULL;
570 struct vlan_priority_tci_mapping *np; 480 struct vlan_priority_tci_mapping *np;
571 u32 vlan_qos = (vlan_prio << 13) & 0xE000; 481 u32 vlan_qos = (vlan_prio << 13) & 0xE000;
@@ -605,30 +515,28 @@ int vlan_dev_set_vlan_flag(const struct net_device *dev,
605{ 515{
606 /* verify flag is supported */ 516 /* verify flag is supported */
607 if (flag == VLAN_FLAG_REORDER_HDR) { 517 if (flag == VLAN_FLAG_REORDER_HDR) {
608 if (flag_val) { 518 if (flag_val)
609 VLAN_DEV_INFO(dev)->flags |= VLAN_FLAG_REORDER_HDR; 519 vlan_dev_info(dev)->flags |= VLAN_FLAG_REORDER_HDR;
610 } else { 520 else
611 VLAN_DEV_INFO(dev)->flags &= ~VLAN_FLAG_REORDER_HDR; 521 vlan_dev_info(dev)->flags &= ~VLAN_FLAG_REORDER_HDR;
612 }
613 return 0; 522 return 0;
614 } 523 }
615 printk(KERN_ERR "%s: flag %i is not valid.\n", __FUNCTION__, flag);
616 return -EINVAL; 524 return -EINVAL;
617} 525}
618 526
619void vlan_dev_get_realdev_name(const struct net_device *dev, char *result) 527void vlan_dev_get_realdev_name(const struct net_device *dev, char *result)
620{ 528{
621 strncpy(result, VLAN_DEV_INFO(dev)->real_dev->name, 23); 529 strncpy(result, vlan_dev_info(dev)->real_dev->name, 23);
622} 530}
623 531
624void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result) 532void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result)
625{ 533{
626 *result = VLAN_DEV_INFO(dev)->vlan_id; 534 *result = vlan_dev_info(dev)->vlan_id;
627} 535}
628 536
629int vlan_dev_open(struct net_device *dev) 537static int vlan_dev_open(struct net_device *dev)
630{ 538{
631 struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); 539 struct vlan_dev_info *vlan = vlan_dev_info(dev);
632 struct net_device *real_dev = vlan->real_dev; 540 struct net_device *real_dev = vlan->real_dev;
633 int err; 541 int err;
634 542
@@ -650,9 +558,9 @@ int vlan_dev_open(struct net_device *dev)
650 return 0; 558 return 0;
651} 559}
652 560
653int vlan_dev_stop(struct net_device *dev) 561static int vlan_dev_stop(struct net_device *dev)
654{ 562{
655 struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev; 563 struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
656 564
657 dev_mc_unsync(real_dev, dev); 565 dev_mc_unsync(real_dev, dev);
658 if (dev->flags & IFF_ALLMULTI) 566 if (dev->flags & IFF_ALLMULTI)
@@ -666,9 +574,9 @@ int vlan_dev_stop(struct net_device *dev)
666 return 0; 574 return 0;
667} 575}
668 576
669int vlan_set_mac_address(struct net_device *dev, void *p) 577static int vlan_dev_set_mac_address(struct net_device *dev, void *p)
670{ 578{
671 struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev; 579 struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
672 struct sockaddr *addr = p; 580 struct sockaddr *addr = p;
673 int err; 581 int err;
674 582
@@ -692,16 +600,16 @@ out:
692 return 0; 600 return 0;
693} 601}
694 602
695int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) 603static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
696{ 604{
697 struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev; 605 struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
698 struct ifreq ifrr; 606 struct ifreq ifrr;
699 int err = -EOPNOTSUPP; 607 int err = -EOPNOTSUPP;
700 608
701 strncpy(ifrr.ifr_name, real_dev->name, IFNAMSIZ); 609 strncpy(ifrr.ifr_name, real_dev->name, IFNAMSIZ);
702 ifrr.ifr_ifru = ifr->ifr_ifru; 610 ifrr.ifr_ifru = ifr->ifr_ifru;
703 611
704 switch(cmd) { 612 switch (cmd) {
705 case SIOCGMIIPHY: 613 case SIOCGMIIPHY:
706 case SIOCGMIIREG: 614 case SIOCGMIIREG:
707 case SIOCSMIIREG: 615 case SIOCSMIIREG:
@@ -716,9 +624,9 @@ int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
716 return err; 624 return err;
717} 625}
718 626
719void vlan_change_rx_flags(struct net_device *dev, int change) 627static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
720{ 628{
721 struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev; 629 struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
722 630
723 if (change & IFF_ALLMULTI) 631 if (change & IFF_ALLMULTI)
724 dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1); 632 dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1);
@@ -726,8 +634,78 @@ void vlan_change_rx_flags(struct net_device *dev, int change)
726 dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1); 634 dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1);
727} 635}
728 636
729/** Taken from Gleb + Lennert's VLAN code, and modified... */ 637static void vlan_dev_set_multicast_list(struct net_device *vlan_dev)
730void vlan_dev_set_multicast_list(struct net_device *vlan_dev) 638{
639 dev_mc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
640}
641
642/*
643 * vlan network devices have devices nesting below it, and are a special
644 * "super class" of normal network devices; split their locks off into a
645 * separate class since they always nest.
646 */
647static struct lock_class_key vlan_netdev_xmit_lock_key;
648
649static const struct header_ops vlan_header_ops = {
650 .create = vlan_dev_hard_header,
651 .rebuild = vlan_dev_rebuild_header,
652 .parse = eth_header_parse,
653};
654
655static int vlan_dev_init(struct net_device *dev)
656{
657 struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
658 int subclass = 0;
659
660 /* IFF_BROADCAST|IFF_MULTICAST; ??? */
661 dev->flags = real_dev->flags & ~IFF_UP;
662 dev->iflink = real_dev->ifindex;
663 dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) |
664 (1<<__LINK_STATE_DORMANT))) |
665 (1<<__LINK_STATE_PRESENT);
666
667 /* ipv6 shared card related stuff */
668 dev->dev_id = real_dev->dev_id;
669
670 if (is_zero_ether_addr(dev->dev_addr))
671 memcpy(dev->dev_addr, real_dev->dev_addr, dev->addr_len);
672 if (is_zero_ether_addr(dev->broadcast))
673 memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len);
674
675 if (real_dev->features & NETIF_F_HW_VLAN_TX) {
676 dev->header_ops = real_dev->header_ops;
677 dev->hard_header_len = real_dev->hard_header_len;
678 dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit;
679 } else {
680 dev->header_ops = &vlan_header_ops;
681 dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN;
682 dev->hard_start_xmit = vlan_dev_hard_start_xmit;
683 }
684
685 if (real_dev->priv_flags & IFF_802_1Q_VLAN)
686 subclass = 1;
687
688 lockdep_set_class_and_subclass(&dev->_xmit_lock,
689 &vlan_netdev_xmit_lock_key, subclass);
690 return 0;
691}
692
693void vlan_setup(struct net_device *dev)
731{ 694{
732 dev_mc_sync(VLAN_DEV_INFO(vlan_dev)->real_dev, vlan_dev); 695 ether_setup(dev);
696
697 dev->priv_flags |= IFF_802_1Q_VLAN;
698 dev->tx_queue_len = 0;
699
700 dev->change_mtu = vlan_dev_change_mtu;
701 dev->init = vlan_dev_init;
702 dev->open = vlan_dev_open;
703 dev->stop = vlan_dev_stop;
704 dev->set_mac_address = vlan_dev_set_mac_address;
705 dev->set_multicast_list = vlan_dev_set_multicast_list;
706 dev->change_rx_flags = vlan_dev_change_rx_flags;
707 dev->do_ioctl = vlan_dev_ioctl;
708 dev->destructor = free_netdev;
709
710 memset(dev->broadcast, 0, ETH_ALEN);
733} 711}
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 0996185e2ed5..e32eeb37987e 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -75,7 +75,7 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
75static int vlan_changelink(struct net_device *dev, 75static int vlan_changelink(struct net_device *dev,
76 struct nlattr *tb[], struct nlattr *data[]) 76 struct nlattr *tb[], struct nlattr *data[])
77{ 77{
78 struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); 78 struct vlan_dev_info *vlan = vlan_dev_info(dev);
79 struct ifla_vlan_flags *flags; 79 struct ifla_vlan_flags *flags;
80 struct ifla_vlan_qos_mapping *m; 80 struct ifla_vlan_qos_mapping *m;
81 struct nlattr *attr; 81 struct nlattr *attr;
@@ -104,7 +104,7 @@ static int vlan_changelink(struct net_device *dev,
104static int vlan_newlink(struct net_device *dev, 104static int vlan_newlink(struct net_device *dev,
105 struct nlattr *tb[], struct nlattr *data[]) 105 struct nlattr *tb[], struct nlattr *data[])
106{ 106{
107 struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); 107 struct vlan_dev_info *vlan = vlan_dev_info(dev);
108 struct net_device *real_dev; 108 struct net_device *real_dev;
109 int err; 109 int err;
110 110
@@ -137,11 +137,6 @@ static int vlan_newlink(struct net_device *dev,
137 return register_vlan_dev(dev); 137 return register_vlan_dev(dev);
138} 138}
139 139
140static void vlan_dellink(struct net_device *dev)
141{
142 unregister_vlan_device(dev);
143}
144
145static inline size_t vlan_qos_map_size(unsigned int n) 140static inline size_t vlan_qos_map_size(unsigned int n)
146{ 141{
147 if (n == 0) 142 if (n == 0)
@@ -153,7 +148,7 @@ static inline size_t vlan_qos_map_size(unsigned int n)
153 148
154static size_t vlan_get_size(const struct net_device *dev) 149static size_t vlan_get_size(const struct net_device *dev)
155{ 150{
156 struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); 151 struct vlan_dev_info *vlan = vlan_dev_info(dev);
157 152
158 return nla_total_size(2) + /* IFLA_VLAN_ID */ 153 return nla_total_size(2) + /* IFLA_VLAN_ID */
159 vlan_qos_map_size(vlan->nr_ingress_mappings) + 154 vlan_qos_map_size(vlan->nr_ingress_mappings) +
@@ -162,14 +157,14 @@ static size_t vlan_get_size(const struct net_device *dev)
162 157
163static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev) 158static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
164{ 159{
165 struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); 160 struct vlan_dev_info *vlan = vlan_dev_info(dev);
166 struct vlan_priority_tci_mapping *pm; 161 struct vlan_priority_tci_mapping *pm;
167 struct ifla_vlan_flags f; 162 struct ifla_vlan_flags f;
168 struct ifla_vlan_qos_mapping m; 163 struct ifla_vlan_qos_mapping m;
169 struct nlattr *nest; 164 struct nlattr *nest;
170 unsigned int i; 165 unsigned int i;
171 166
172 NLA_PUT_U16(skb, IFLA_VLAN_ID, VLAN_DEV_INFO(dev)->vlan_id); 167 NLA_PUT_U16(skb, IFLA_VLAN_ID, vlan_dev_info(dev)->vlan_id);
173 if (vlan->flags) { 168 if (vlan->flags) {
174 f.flags = vlan->flags; 169 f.flags = vlan->flags;
175 f.mask = ~0; 170 f.mask = ~0;
@@ -226,7 +221,7 @@ struct rtnl_link_ops vlan_link_ops __read_mostly = {
226 .validate = vlan_validate, 221 .validate = vlan_validate,
227 .newlink = vlan_newlink, 222 .newlink = vlan_newlink,
228 .changelink = vlan_changelink, 223 .changelink = vlan_changelink,
229 .dellink = vlan_dellink, 224 .dellink = unregister_vlan_dev,
230 .get_size = vlan_get_size, 225 .get_size = vlan_get_size,
231 .fill_info = vlan_fill_info, 226 .fill_info = vlan_fill_info,
232}; 227};
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 6cefdf8e381a..a0ec47925597 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -125,10 +125,10 @@ static struct proc_dir_entry *proc_vlan_conf;
125 125
126/* Strings */ 126/* Strings */
127static const char *vlan_name_type_str[VLAN_NAME_TYPE_HIGHEST] = { 127static const char *vlan_name_type_str[VLAN_NAME_TYPE_HIGHEST] = {
128 [VLAN_NAME_TYPE_RAW_PLUS_VID] = "VLAN_NAME_TYPE_RAW_PLUS_VID", 128 [VLAN_NAME_TYPE_RAW_PLUS_VID] = "VLAN_NAME_TYPE_RAW_PLUS_VID",
129 [VLAN_NAME_TYPE_PLUS_VID_NO_PAD] = "VLAN_NAME_TYPE_PLUS_VID_NO_PAD", 129 [VLAN_NAME_TYPE_PLUS_VID_NO_PAD] = "VLAN_NAME_TYPE_PLUS_VID_NO_PAD",
130 [VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD]= "VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD", 130 [VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD] = "VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD",
131 [VLAN_NAME_TYPE_PLUS_VID] = "VLAN_NAME_TYPE_PLUS_VID", 131 [VLAN_NAME_TYPE_PLUS_VID] = "VLAN_NAME_TYPE_PLUS_VID",
132}; 132};
133/* 133/*
134 * Interface functions 134 * Interface functions
@@ -158,15 +158,18 @@ void vlan_proc_cleanup(void)
158int __init vlan_proc_init(void) 158int __init vlan_proc_init(void)
159{ 159{
160 proc_vlan_dir = proc_mkdir(name_root, init_net.proc_net); 160 proc_vlan_dir = proc_mkdir(name_root, init_net.proc_net);
161 if (proc_vlan_dir) { 161 if (!proc_vlan_dir)
162 proc_vlan_conf = create_proc_entry(name_conf, 162 goto err;
163 S_IFREG|S_IRUSR|S_IWUSR, 163
164 proc_vlan_dir); 164 proc_vlan_conf = create_proc_entry(name_conf, S_IFREG|S_IRUSR|S_IWUSR,
165 if (proc_vlan_conf) { 165 proc_vlan_dir);
166 proc_vlan_conf->proc_fops = &vlan_fops; 166 if (!proc_vlan_conf)
167 return 0; 167 goto err;
168 } 168 proc_vlan_conf->proc_fops = &vlan_fops;
169 } 169 return 0;
170
171err:
172 pr_err("%s: can't create entry in proc filesystem!\n", __FUNCTION__);
170 vlan_proc_cleanup(); 173 vlan_proc_cleanup();
171 return -ENOBUFS; 174 return -ENOBUFS;
172} 175}
@@ -175,16 +178,9 @@ int __init vlan_proc_init(void)
175 * Add directory entry for VLAN device. 178 * Add directory entry for VLAN device.
176 */ 179 */
177 180
178int vlan_proc_add_dev (struct net_device *vlandev) 181int vlan_proc_add_dev(struct net_device *vlandev)
179{ 182{
180 struct vlan_dev_info *dev_info = VLAN_DEV_INFO(vlandev); 183 struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
181
182 if (!(vlandev->priv_flags & IFF_802_1Q_VLAN)) {
183 printk(KERN_ERR
184 "ERROR: vlan_proc_add, device -:%s:- is NOT a VLAN\n",
185 vlandev->name);
186 return -EINVAL;
187 }
188 184
189 dev_info->dent = create_proc_entry(vlandev->name, 185 dev_info->dent = create_proc_entry(vlandev->name,
190 S_IFREG|S_IRUSR|S_IWUSR, 186 S_IFREG|S_IRUSR|S_IWUSR,
@@ -194,11 +190,6 @@ int vlan_proc_add_dev (struct net_device *vlandev)
194 190
195 dev_info->dent->proc_fops = &vlandev_fops; 191 dev_info->dent->proc_fops = &vlandev_fops;
196 dev_info->dent->data = vlandev; 192 dev_info->dent->data = vlandev;
197
198#ifdef VLAN_DEBUG
199 printk(KERN_ERR "vlan_proc_add, device -:%s:- being added.\n",
200 vlandev->name);
201#endif
202 return 0; 193 return 0;
203} 194}
204 195
@@ -207,28 +198,12 @@ int vlan_proc_add_dev (struct net_device *vlandev)
207 */ 198 */
208int vlan_proc_rem_dev(struct net_device *vlandev) 199int vlan_proc_rem_dev(struct net_device *vlandev)
209{ 200{
210 if (!vlandev) {
211 printk(VLAN_ERR "%s: invalid argument: %p\n",
212 __FUNCTION__, vlandev);
213 return -EINVAL;
214 }
215
216 if (!(vlandev->priv_flags & IFF_802_1Q_VLAN)) {
217 printk(VLAN_DBG "%s: invalid argument, device: %s is not a VLAN device, priv_flags: 0x%4hX.\n",
218 __FUNCTION__, vlandev->name, vlandev->priv_flags);
219 return -EINVAL;
220 }
221
222#ifdef VLAN_DEBUG
223 printk(VLAN_DBG "%s: dev: %p\n", __FUNCTION__, vlandev);
224#endif
225
226 /** NOTE: This will consume the memory pointed to by dent, it seems. */ 201 /** NOTE: This will consume the memory pointed to by dent, it seems. */
227 if (VLAN_DEV_INFO(vlandev)->dent) { 202 if (vlan_dev_info(vlandev)->dent) {
228 remove_proc_entry(VLAN_DEV_INFO(vlandev)->dent->name, proc_vlan_dir); 203 remove_proc_entry(vlan_dev_info(vlandev)->dent->name,
229 VLAN_DEV_INFO(vlandev)->dent = NULL; 204 proc_vlan_dir);
205 vlan_dev_info(vlandev)->dent = NULL;
230 } 206 }
231
232 return 0; 207 return 0;
233} 208}
234 209
@@ -245,6 +220,7 @@ static inline int is_vlan_dev(struct net_device *dev)
245 220
246/* start read of /proc/net/vlan/config */ 221/* start read of /proc/net/vlan/config */
247static void *vlan_seq_start(struct seq_file *seq, loff_t *pos) 222static void *vlan_seq_start(struct seq_file *seq, loff_t *pos)
223 __acquires(dev_base_lock)
248{ 224{
249 struct net_device *dev; 225 struct net_device *dev;
250 loff_t i = 1; 226 loff_t i = 1;
@@ -286,6 +262,7 @@ static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos)
286} 262}
287 263
288static void vlan_seq_stop(struct seq_file *seq, void *v) 264static void vlan_seq_stop(struct seq_file *seq, void *v)
265 __releases(dev_base_lock)
289{ 266{
290 read_unlock(&dev_base_lock); 267 read_unlock(&dev_base_lock);
291} 268}
@@ -301,10 +278,10 @@ static int vlan_seq_show(struct seq_file *seq, void *v)
301 nmtype = vlan_name_type_str[vlan_name_type]; 278 nmtype = vlan_name_type_str[vlan_name_type];
302 279
303 seq_printf(seq, "Name-Type: %s\n", 280 seq_printf(seq, "Name-Type: %s\n",
304 nmtype ? nmtype : "UNKNOWN" ); 281 nmtype ? nmtype : "UNKNOWN");
305 } else { 282 } else {
306 const struct net_device *vlandev = v; 283 const struct net_device *vlandev = v;
307 const struct vlan_dev_info *dev_info = VLAN_DEV_INFO(vlandev); 284 const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
308 285
309 seq_printf(seq, "%-15s| %d | %s\n", vlandev->name, 286 seq_printf(seq, "%-15s| %d | %s\n", vlandev->name,
310 dev_info->vlan_id, dev_info->real_dev->name); 287 dev_info->vlan_id, dev_info->real_dev->name);
@@ -315,20 +292,18 @@ static int vlan_seq_show(struct seq_file *seq, void *v)
315static int vlandev_seq_show(struct seq_file *seq, void *offset) 292static int vlandev_seq_show(struct seq_file *seq, void *offset)
316{ 293{
317 struct net_device *vlandev = (struct net_device *) seq->private; 294 struct net_device *vlandev = (struct net_device *) seq->private;
318 const struct vlan_dev_info *dev_info = VLAN_DEV_INFO(vlandev); 295 const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
319 struct net_device_stats *stats; 296 struct net_device_stats *stats = &vlandev->stats;
320 static const char fmt[] = "%30s %12lu\n"; 297 static const char fmt[] = "%30s %12lu\n";
321 int i; 298 int i;
322 299
323 if (!(vlandev->priv_flags & IFF_802_1Q_VLAN)) 300 if (!(vlandev->priv_flags & IFF_802_1Q_VLAN))
324 return 0; 301 return 0;
325 302
326 seq_printf(seq, "%s VID: %d REORDER_HDR: %i dev->priv_flags: %hx\n", 303 seq_printf(seq,
327 vlandev->name, dev_info->vlan_id, 304 "%s VID: %d REORDER_HDR: %i dev->priv_flags: %hx\n",
328 (int)(dev_info->flags & 1), vlandev->priv_flags); 305 vlandev->name, dev_info->vlan_id,
329 306 (int)(dev_info->flags & 1), vlandev->priv_flags);
330
331 stats = vlan_dev_get_stats(vlandev);
332 307
333 seq_printf(seq, fmt, "total frames received", stats->rx_packets); 308 seq_printf(seq, fmt, "total frames received", stats->rx_packets);
334 seq_printf(seq, fmt, "total bytes received", stats->rx_bytes); 309 seq_printf(seq, fmt, "total bytes received", stats->rx_bytes);
@@ -342,16 +317,16 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
342 dev_info->cnt_encap_on_xmit); 317 dev_info->cnt_encap_on_xmit);
343 seq_printf(seq, "Device: %s", dev_info->real_dev->name); 318 seq_printf(seq, "Device: %s", dev_info->real_dev->name);
344 /* now show all PRIORITY mappings relating to this VLAN */ 319 /* now show all PRIORITY mappings relating to this VLAN */
345 seq_printf(seq, 320 seq_printf(seq, "\nINGRESS priority mappings: "
346 "\nINGRESS priority mappings: 0:%u 1:%u 2:%u 3:%u 4:%u 5:%u 6:%u 7:%u\n", 321 "0:%u 1:%u 2:%u 3:%u 4:%u 5:%u 6:%u 7:%u\n",
347 dev_info->ingress_priority_map[0], 322 dev_info->ingress_priority_map[0],
348 dev_info->ingress_priority_map[1], 323 dev_info->ingress_priority_map[1],
349 dev_info->ingress_priority_map[2], 324 dev_info->ingress_priority_map[2],
350 dev_info->ingress_priority_map[3], 325 dev_info->ingress_priority_map[3],
351 dev_info->ingress_priority_map[4], 326 dev_info->ingress_priority_map[4],
352 dev_info->ingress_priority_map[5], 327 dev_info->ingress_priority_map[5],
353 dev_info->ingress_priority_map[6], 328 dev_info->ingress_priority_map[6],
354 dev_info->ingress_priority_map[7]); 329 dev_info->ingress_priority_map[7]);
355 330
356 seq_printf(seq, "EGRESSS priority Mappings: "); 331 seq_printf(seq, "EGRESSS priority Mappings: ");
357 for (i = 0; i < 16; i++) { 332 for (i = 0; i < 16; i++) {
diff --git a/net/8021q/vlanproc.h b/net/8021q/vlanproc.h
index f908ee332fd8..da542cacc5a5 100644
--- a/net/8021q/vlanproc.h
+++ b/net/8021q/vlanproc.h
@@ -4,16 +4,15 @@
4#ifdef CONFIG_PROC_FS 4#ifdef CONFIG_PROC_FS
5int vlan_proc_init(void); 5int vlan_proc_init(void);
6int vlan_proc_rem_dev(struct net_device *vlandev); 6int vlan_proc_rem_dev(struct net_device *vlandev);
7int vlan_proc_add_dev (struct net_device *vlandev); 7int vlan_proc_add_dev(struct net_device *vlandev);
8void vlan_proc_cleanup (void); 8void vlan_proc_cleanup(void);
9 9
10#else /* No CONFIG_PROC_FS */ 10#else /* No CONFIG_PROC_FS */
11 11
12#define vlan_proc_init() (0) 12#define vlan_proc_init() (0)
13#define vlan_proc_cleanup() do {} while(0) 13#define vlan_proc_cleanup() do {} while (0)
14#define vlan_proc_add_dev(dev) ({(void)(dev), 0;}) 14#define vlan_proc_add_dev(dev) ({(void)(dev), 0; })
15#define vlan_proc_rem_dev(dev) ({(void)(dev), 0;}) 15#define vlan_proc_rem_dev(dev) ({(void)(dev), 0; })
16
17#endif 16#endif
18 17
19#endif /* !(__BEN_VLAN_PROC_INC__) */ 18#endif /* !(__BEN_VLAN_PROC_INC__) */
diff --git a/net/Kconfig b/net/Kconfig
index ab4e6da5012f..b6a5d454f2ff 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -144,9 +144,21 @@ config NETFILTER_DEBUG
144 You can say Y here if you want to get additional messages useful in 144 You can say Y here if you want to get additional messages useful in
145 debugging the netfilter code. 145 debugging the netfilter code.
146 146
147config NETFILTER_ADVANCED
148 bool "Advanced netfilter configuration"
149 depends on NETFILTER
150 default y
151 help
152 If you say Y here you can select between all the netfilter modules.
153 If you say N the more ununsual ones will not be shown and the
154 basic ones needed by most people will default to 'M'.
155
156 If unsure, say Y.
157
147config BRIDGE_NETFILTER 158config BRIDGE_NETFILTER
148 bool "Bridged IP/ARP packets filtering" 159 bool "Bridged IP/ARP packets filtering"
149 depends on BRIDGE && NETFILTER && INET 160 depends on BRIDGE && NETFILTER && INET
161 depends on NETFILTER_ADVANCED
150 default y 162 default y
151 ---help--- 163 ---help---
152 Enabling this option will let arptables resp. iptables see bridged 164 Enabling this option will let arptables resp. iptables see bridged
@@ -218,6 +230,7 @@ endmenu
218endmenu 230endmenu
219 231
220source "net/ax25/Kconfig" 232source "net/ax25/Kconfig"
233source "net/can/Kconfig"
221source "net/irda/Kconfig" 234source "net/irda/Kconfig"
222source "net/bluetooth/Kconfig" 235source "net/bluetooth/Kconfig"
223source "net/rxrpc/Kconfig" 236source "net/rxrpc/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index bbe7d2a41486..b7a13643b549 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_LAPB) += lapb/
34obj-$(CONFIG_NETROM) += netrom/ 34obj-$(CONFIG_NETROM) += netrom/
35obj-$(CONFIG_ROSE) += rose/ 35obj-$(CONFIG_ROSE) += rose/
36obj-$(CONFIG_AX25) += ax25/ 36obj-$(CONFIG_AX25) += ax25/
37obj-$(CONFIG_CAN) += can/
37obj-$(CONFIG_IRDA) += irda/ 38obj-$(CONFIG_IRDA) += irda/
38obj-$(CONFIG_BT) += bluetooth/ 39obj-$(CONFIG_BT) += bluetooth/
39obj-$(CONFIG_SUNRPC) += sunrpc/ 40obj-$(CONFIG_SUNRPC) += sunrpc/
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 6c5c6dc098ec..18058bbc7962 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -874,9 +874,7 @@ void __init aarp_proto_init(void)
874 aarp_dl = register_snap_client(aarp_snap_id, aarp_rcv); 874 aarp_dl = register_snap_client(aarp_snap_id, aarp_rcv);
875 if (!aarp_dl) 875 if (!aarp_dl)
876 printk(KERN_CRIT "Unable to register AARP with SNAP.\n"); 876 printk(KERN_CRIT "Unable to register AARP with SNAP.\n");
877 init_timer(&aarp_timer); 877 setup_timer(&aarp_timer, aarp_expire_timeout, 0);
878 aarp_timer.function = aarp_expire_timeout;
879 aarp_timer.data = 0;
880 aarp_timer.expires = jiffies + sysctl_aarp_expiry_time; 878 aarp_timer.expires = jiffies + sysctl_aarp_expiry_time;
881 add_timer(&aarp_timer); 879 add_timer(&aarp_timer);
882 register_netdevice_notifier(&aarp_notifier); 880 register_netdevice_notifier(&aarp_notifier);
@@ -943,6 +941,7 @@ static struct aarp_entry *iter_next(struct aarp_iter_state *iter, loff_t *pos)
943} 941}
944 942
945static void *aarp_seq_start(struct seq_file *seq, loff_t *pos) 943static void *aarp_seq_start(struct seq_file *seq, loff_t *pos)
944 __acquires(aarp_lock)
946{ 945{
947 struct aarp_iter_state *iter = seq->private; 946 struct aarp_iter_state *iter = seq->private;
948 947
@@ -977,6 +976,7 @@ static void *aarp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
977} 976}
978 977
979static void aarp_seq_stop(struct seq_file *seq, void *v) 978static void aarp_seq_stop(struct seq_file *seq, void *v)
979 __releases(aarp_lock)
980{ 980{
981 read_unlock_bh(&aarp_lock); 981 read_unlock_bh(&aarp_lock);
982} 982}
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index 05d9652afcb6..8e8dcfd532db 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -27,6 +27,7 @@ static __inline__ struct atalk_iface *atalk_get_interface_idx(loff_t pos)
27} 27}
28 28
29static void *atalk_seq_interface_start(struct seq_file *seq, loff_t *pos) 29static void *atalk_seq_interface_start(struct seq_file *seq, loff_t *pos)
30 __acquires(atalk_interfaces_lock)
30{ 31{
31 loff_t l = *pos; 32 loff_t l = *pos;
32 33
@@ -52,6 +53,7 @@ out:
52} 53}
53 54
54static void atalk_seq_interface_stop(struct seq_file *seq, void *v) 55static void atalk_seq_interface_stop(struct seq_file *seq, void *v)
56 __releases(atalk_interfaces_lock)
55{ 57{
56 read_unlock_bh(&atalk_interfaces_lock); 58 read_unlock_bh(&atalk_interfaces_lock);
57} 59}
@@ -86,6 +88,7 @@ static __inline__ struct atalk_route *atalk_get_route_idx(loff_t pos)
86} 88}
87 89
88static void *atalk_seq_route_start(struct seq_file *seq, loff_t *pos) 90static void *atalk_seq_route_start(struct seq_file *seq, loff_t *pos)
91 __acquires(atalk_routes_lock)
89{ 92{
90 loff_t l = *pos; 93 loff_t l = *pos;
91 94
@@ -111,6 +114,7 @@ out:
111} 114}
112 115
113static void atalk_seq_route_stop(struct seq_file *seq, void *v) 116static void atalk_seq_route_stop(struct seq_file *seq, void *v)
117 __releases(atalk_routes_lock)
114{ 118{
115 read_unlock_bh(&atalk_routes_lock); 119 read_unlock_bh(&atalk_routes_lock);
116} 120}
@@ -154,6 +158,7 @@ found:
154} 158}
155 159
156static void *atalk_seq_socket_start(struct seq_file *seq, loff_t *pos) 160static void *atalk_seq_socket_start(struct seq_file *seq, loff_t *pos)
161 __acquires(atalk_sockets_lock)
157{ 162{
158 loff_t l = *pos; 163 loff_t l = *pos;
159 164
@@ -176,6 +181,7 @@ out:
176} 181}
177 182
178static void atalk_seq_socket_stop(struct seq_file *seq, void *v) 183static void atalk_seq_socket_stop(struct seq_file *seq, void *v)
184 __releases(atalk_sockets_lock)
179{ 185{
180 read_unlock_bh(&atalk_sockets_lock); 186 read_unlock_bh(&atalk_sockets_lock);
181} 187}
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index e0d37d6dc1f8..3be55c8ca4ef 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -177,10 +177,9 @@ static inline void atalk_destroy_socket(struct sock *sk)
177 177
178 if (atomic_read(&sk->sk_wmem_alloc) || 178 if (atomic_read(&sk->sk_wmem_alloc) ||
179 atomic_read(&sk->sk_rmem_alloc)) { 179 atomic_read(&sk->sk_rmem_alloc)) {
180 init_timer(&sk->sk_timer); 180 setup_timer(&sk->sk_timer, atalk_destroy_timer,
181 (unsigned long)sk);
181 sk->sk_timer.expires = jiffies + SOCK_DESTROY_TIME; 182 sk->sk_timer.expires = jiffies + SOCK_DESTROY_TIME;
182 sk->sk_timer.function = atalk_destroy_timer;
183 sk->sk_timer.data = (unsigned long)sk;
184 add_timer(&sk->sk_timer); 183 add_timer(&sk->sk_timer);
185 } else 184 } else
186 sock_put(sk); 185 sock_put(sk);
diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c
index 7df1778e221a..621805dfa2f4 100644
--- a/net/appletalk/sysctl_net_atalk.c
+++ b/net/appletalk/sysctl_net_atalk.c
@@ -49,31 +49,17 @@ static struct ctl_table atalk_table[] = {
49 { 0 }, 49 { 0 },
50}; 50};
51 51
52static struct ctl_table atalk_dir_table[] = { 52static struct ctl_path atalk_path[] = {
53 { 53 { .procname = "net", .ctl_name = CTL_NET, },
54 .ctl_name = NET_ATALK, 54 { .procname = "appletalk", .ctl_name = NET_ATALK, },
55 .procname = "appletalk", 55 { }
56 .mode = 0555,
57 .child = atalk_table,
58 },
59 { 0 },
60};
61
62static struct ctl_table atalk_root_table[] = {
63 {
64 .ctl_name = CTL_NET,
65 .procname = "net",
66 .mode = 0555,
67 .child = atalk_dir_table,
68 },
69 { 0 },
70}; 56};
71 57
72static struct ctl_table_header *atalk_table_header; 58static struct ctl_table_header *atalk_table_header;
73 59
74void atalk_register_sysctl(void) 60void atalk_register_sysctl(void)
75{ 61{
76 atalk_table_header = register_sysctl_table(atalk_root_table); 62 atalk_table_header = register_sysctl_paths(atalk_path, atalk_table);
77} 63}
78 64
79void atalk_unregister_sysctl(void) 65void atalk_unregister_sysctl(void)
diff --git a/net/atm/Kconfig b/net/atm/Kconfig
index 21ff276b2d80..754ea103b378 100644
--- a/net/atm/Kconfig
+++ b/net/atm/Kconfig
@@ -1,10 +1,9 @@
1# 1#
2# Asynchronous Transfer Mode (ATM) (EXPERIMENTAL) 2# Asynchronous Transfer Mode (ATM)
3# 3#
4 4
5config ATM 5config ATM
6 tristate "Asynchronous Transfer Mode (ATM) (EXPERIMENTAL)" 6 tristate "Asynchronous Transfer Mode (ATM)"
7 depends on EXPERIMENTAL
8 ---help--- 7 ---help---
9 ATM is a high-speed networking technology for Local Area Networks 8 ATM is a high-speed networking technology for Local Area Networks
10 and Wide Area Networks. It uses a fixed packet size and is 9 and Wide Area Networks. It uses a fixed packet size and is
@@ -20,7 +19,7 @@ config ATM
20 further details. 19 further details.
21 20
22config ATM_CLIP 21config ATM_CLIP
23 tristate "Classical IP over ATM (EXPERIMENTAL)" 22 tristate "Classical IP over ATM"
24 depends on ATM && INET 23 depends on ATM && INET
25 help 24 help
26 Classical IP over ATM for PVCs and SVCs, supporting InARP and 25 Classical IP over ATM for PVCs and SVCs, supporting InARP and
@@ -29,7 +28,7 @@ config ATM_CLIP
29 (LANE)" below. 28 (LANE)" below.
30 29
31config ATM_CLIP_NO_ICMP 30config ATM_CLIP_NO_ICMP
32 bool "Do NOT send ICMP if no neighbour (EXPERIMENTAL)" 31 bool "Do NOT send ICMP if no neighbour"
33 depends on ATM_CLIP 32 depends on ATM_CLIP
34 help 33 help
35 Normally, an "ICMP host unreachable" message is sent if a neighbour 34 Normally, an "ICMP host unreachable" message is sent if a neighbour
@@ -39,7 +38,7 @@ config ATM_CLIP_NO_ICMP
39 such neighbours are silently discarded instead. 38 such neighbours are silently discarded instead.
40 39
41config ATM_LANE 40config ATM_LANE
42 tristate "LAN Emulation (LANE) support (EXPERIMENTAL)" 41 tristate "LAN Emulation (LANE) support"
43 depends on ATM 42 depends on ATM
44 help 43 help
45 LAN Emulation emulates services of existing LANs across an ATM 44 LAN Emulation emulates services of existing LANs across an ATM
@@ -48,7 +47,7 @@ config ATM_LANE
48 ELAN and Ethernet segments. You need LANE if you want to try MPOA. 47 ELAN and Ethernet segments. You need LANE if you want to try MPOA.
49 48
50config ATM_MPOA 49config ATM_MPOA
51 tristate "Multi-Protocol Over ATM (MPOA) support (EXPERIMENTAL)" 50 tristate "Multi-Protocol Over ATM (MPOA) support"
52 depends on ATM && INET && ATM_LANE!=n 51 depends on ATM && INET && ATM_LANE!=n
53 help 52 help
54 Multi-Protocol Over ATM allows ATM edge devices such as routers, 53 Multi-Protocol Over ATM allows ATM edge devices such as routers,
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index 9ef07eda2c43..1b88311f2130 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -9,13 +9,15 @@
9 9
10#define to_atm_dev(cldev) container_of(cldev, struct atm_dev, class_dev) 10#define to_atm_dev(cldev) container_of(cldev, struct atm_dev, class_dev)
11 11
12static ssize_t show_type(struct class_device *cdev, char *buf) 12static ssize_t show_type(struct device *cdev,
13 struct device_attribute *attr, char *buf)
13{ 14{
14 struct atm_dev *adev = to_atm_dev(cdev); 15 struct atm_dev *adev = to_atm_dev(cdev);
15 return sprintf(buf, "%s\n", adev->type); 16 return sprintf(buf, "%s\n", adev->type);
16} 17}
17 18
18static ssize_t show_address(struct class_device *cdev, char *buf) 19static ssize_t show_address(struct device *cdev,
20 struct device_attribute *attr, char *buf)
19{ 21{
20 char *pos = buf; 22 char *pos = buf;
21 struct atm_dev *adev = to_atm_dev(cdev); 23 struct atm_dev *adev = to_atm_dev(cdev);
@@ -28,7 +30,8 @@ static ssize_t show_address(struct class_device *cdev, char *buf)
28 return pos - buf; 30 return pos - buf;
29} 31}
30 32
31static ssize_t show_atmaddress(struct class_device *cdev, char *buf) 33static ssize_t show_atmaddress(struct device *cdev,
34 struct device_attribute *attr, char *buf)
32{ 35{
33 unsigned long flags; 36 unsigned long flags;
34 char *pos = buf; 37 char *pos = buf;
@@ -54,7 +57,8 @@ static ssize_t show_atmaddress(struct class_device *cdev, char *buf)
54 return pos - buf; 57 return pos - buf;
55} 58}
56 59
57static ssize_t show_carrier(struct class_device *cdev, char *buf) 60static ssize_t show_carrier(struct device *cdev,
61 struct device_attribute *attr, char *buf)
58{ 62{
59 char *pos = buf; 63 char *pos = buf;
60 struct atm_dev *adev = to_atm_dev(cdev); 64 struct atm_dev *adev = to_atm_dev(cdev);
@@ -65,7 +69,8 @@ static ssize_t show_carrier(struct class_device *cdev, char *buf)
65 return pos - buf; 69 return pos - buf;
66} 70}
67 71
68static ssize_t show_link_rate(struct class_device *cdev, char *buf) 72static ssize_t show_link_rate(struct device *cdev,
73 struct device_attribute *attr, char *buf)
69{ 74{
70 char *pos = buf; 75 char *pos = buf;
71 struct atm_dev *adev = to_atm_dev(cdev); 76 struct atm_dev *adev = to_atm_dev(cdev);
@@ -90,22 +95,23 @@ static ssize_t show_link_rate(struct class_device *cdev, char *buf)
90 return pos - buf; 95 return pos - buf;
91} 96}
92 97
93static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL); 98static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
94static CLASS_DEVICE_ATTR(atmaddress, S_IRUGO, show_atmaddress, NULL); 99static DEVICE_ATTR(atmaddress, S_IRUGO, show_atmaddress, NULL);
95static CLASS_DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL); 100static DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL);
96static CLASS_DEVICE_ATTR(type, S_IRUGO, show_type, NULL); 101static DEVICE_ATTR(type, S_IRUGO, show_type, NULL);
97static CLASS_DEVICE_ATTR(link_rate, S_IRUGO, show_link_rate, NULL); 102static DEVICE_ATTR(link_rate, S_IRUGO, show_link_rate, NULL);
98 103
99static struct class_device_attribute *atm_attrs[] = { 104static struct device_attribute *atm_attrs[] = {
100 &class_device_attr_atmaddress, 105 &dev_attr_atmaddress,
101 &class_device_attr_address, 106 &dev_attr_address,
102 &class_device_attr_carrier, 107 &dev_attr_carrier,
103 &class_device_attr_type, 108 &dev_attr_type,
104 &class_device_attr_link_rate, 109 &dev_attr_link_rate,
105 NULL 110 NULL
106}; 111};
107 112
108static int atm_uevent(struct class_device *cdev, struct kobj_uevent_env *env) 113
114static int atm_uevent(struct device *cdev, struct kobj_uevent_env *env)
109{ 115{
110 struct atm_dev *adev; 116 struct atm_dev *adev;
111 117
@@ -122,7 +128,7 @@ static int atm_uevent(struct class_device *cdev, struct kobj_uevent_env *env)
122 return 0; 128 return 0;
123} 129}
124 130
125static void atm_release(struct class_device *cdev) 131static void atm_release(struct device *cdev)
126{ 132{
127 struct atm_dev *adev = to_atm_dev(cdev); 133 struct atm_dev *adev = to_atm_dev(cdev);
128 134
@@ -131,25 +137,25 @@ static void atm_release(struct class_device *cdev)
131 137
132static struct class atm_class = { 138static struct class atm_class = {
133 .name = "atm", 139 .name = "atm",
134 .release = atm_release, 140 .dev_release = atm_release,
135 .uevent = atm_uevent, 141 .dev_uevent = atm_uevent,
136}; 142};
137 143
138int atm_register_sysfs(struct atm_dev *adev) 144int atm_register_sysfs(struct atm_dev *adev)
139{ 145{
140 struct class_device *cdev = &adev->class_dev; 146 struct device *cdev = &adev->class_dev;
141 int i, j, err; 147 int i, j, err;
142 148
143 cdev->class = &atm_class; 149 cdev->class = &atm_class;
144 class_set_devdata(cdev, adev); 150 dev_set_drvdata(cdev, adev);
145 151
146 snprintf(cdev->class_id, BUS_ID_SIZE, "%s%d", adev->type, adev->number); 152 snprintf(cdev->bus_id, BUS_ID_SIZE, "%s%d", adev->type, adev->number);
147 err = class_device_register(cdev); 153 err = device_register(cdev);
148 if (err < 0) 154 if (err < 0)
149 return err; 155 return err;
150 156
151 for (i = 0; atm_attrs[i]; i++) { 157 for (i = 0; atm_attrs[i]; i++) {
152 err = class_device_create_file(cdev, atm_attrs[i]); 158 err = device_create_file(cdev, atm_attrs[i]);
153 if (err) 159 if (err)
154 goto err_out; 160 goto err_out;
155 } 161 }
@@ -158,16 +164,16 @@ int atm_register_sysfs(struct atm_dev *adev)
158 164
159err_out: 165err_out:
160 for (j = 0; j < i; j++) 166 for (j = 0; j < i; j++)
161 class_device_remove_file(cdev, atm_attrs[j]); 167 device_remove_file(cdev, atm_attrs[j]);
162 class_device_del(cdev); 168 device_del(cdev);
163 return err; 169 return err;
164} 170}
165 171
166void atm_unregister_sysfs(struct atm_dev *adev) 172void atm_unregister_sysfs(struct atm_dev *adev)
167{ 173{
168 struct class_device *cdev = &adev->class_dev; 174 struct device *cdev = &adev->class_dev;
169 175
170 class_device_del(cdev); 176 device_del(cdev);
171} 177}
172 178
173int __init atm_sysfs_init(void) 179int __init atm_sysfs_init(void)
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index ba6428f204f9..574d9a964176 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -1,8 +1,10 @@
1/* 1/*
2Experimental ethernet netdevice using ATM AAL5 as underlying carrier 2 * Ethernet netdevice using ATM AAL5 as underlying carrier
3(RFC1483 obsoleted by RFC2684) for Linux 2.4 3 * (RFC1483 obsoleted by RFC2684) for Linux
4Author: Marcell GAL, 2000, XDSL Ltd, Hungary 4 *
5*/ 5 * Authors: Marcell GAL, 2000, XDSL Ltd, Hungary
6 * Eric Kinzie, 2006-2007, US Naval Research Laboratory
7 */
6 8
7#include <linux/module.h> 9#include <linux/module.h>
8#include <linux/init.h> 10#include <linux/init.h>
@@ -39,21 +41,35 @@ static void skb_debug(const struct sk_buff *skb)
39#define skb_debug(skb) do {} while (0) 41#define skb_debug(skb) do {} while (0)
40#endif 42#endif
41 43
44#define BR2684_ETHERTYPE_LEN 2
45#define BR2684_PAD_LEN 2
46
47#define LLC 0xaa, 0xaa, 0x03
48#define SNAP_BRIDGED 0x00, 0x80, 0xc2
49#define SNAP_ROUTED 0x00, 0x00, 0x00
50#define PID_ETHERNET 0x00, 0x07
51#define ETHERTYPE_IPV4 0x08, 0x00
52#define ETHERTYPE_IPV6 0x86, 0xdd
53#define PAD_BRIDGED 0x00, 0x00
54
55static unsigned char ethertype_ipv4[] = { ETHERTYPE_IPV4 };
56static unsigned char ethertype_ipv6[] = { ETHERTYPE_IPV6 };
42static unsigned char llc_oui_pid_pad[] = 57static unsigned char llc_oui_pid_pad[] =
43 { 0xAA, 0xAA, 0x03, 0x00, 0x80, 0xC2, 0x00, 0x07, 0x00, 0x00 }; 58 { LLC, SNAP_BRIDGED, PID_ETHERNET, PAD_BRIDGED };
44#define PADLEN (2) 59static unsigned char llc_oui_ipv4[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV4 };
60static unsigned char llc_oui_ipv6[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV6 };
45 61
46enum br2684_encaps { 62enum br2684_encaps {
47 e_vc = BR2684_ENCAPS_VC, 63 e_vc = BR2684_ENCAPS_VC,
48 e_llc = BR2684_ENCAPS_LLC, 64 e_llc = BR2684_ENCAPS_LLC,
49}; 65};
50 66
51struct br2684_vcc { 67struct br2684_vcc {
52 struct atm_vcc *atmvcc; 68 struct atm_vcc *atmvcc;
53 struct net_device *device; 69 struct net_device *device;
54 /* keep old push,pop functions for chaining */ 70 /* keep old push, pop functions for chaining */
55 void (*old_push)(struct atm_vcc *vcc,struct sk_buff *skb); 71 void (*old_push) (struct atm_vcc * vcc, struct sk_buff * skb);
56 /* void (*old_pop)(struct atm_vcc *vcc,struct sk_buff *skb); */ 72 /* void (*old_pop)(struct atm_vcc *vcc, struct sk_buff *skb); */
57 enum br2684_encaps encaps; 73 enum br2684_encaps encaps;
58 struct list_head brvccs; 74 struct list_head brvccs;
59#ifdef CONFIG_ATM_BR2684_IPFILTER 75#ifdef CONFIG_ATM_BR2684_IPFILTER
@@ -66,9 +82,10 @@ struct br2684_dev {
66 struct net_device *net_dev; 82 struct net_device *net_dev;
67 struct list_head br2684_devs; 83 struct list_head br2684_devs;
68 int number; 84 int number;
69 struct list_head brvccs; /* one device <=> one vcc (before xmas) */ 85 struct list_head brvccs; /* one device <=> one vcc (before xmas) */
70 struct net_device_stats stats; 86 struct net_device_stats stats;
71 int mac_was_set; 87 int mac_was_set;
88 enum br2684_payload payload;
72}; 89};
73 90
74/* 91/*
@@ -84,7 +101,7 @@ static LIST_HEAD(br2684_devs);
84 101
85static inline struct br2684_dev *BRPRIV(const struct net_device *net_dev) 102static inline struct br2684_dev *BRPRIV(const struct net_device *net_dev)
86{ 103{
87 return (struct br2684_dev *) net_dev->priv; 104 return (struct br2684_dev *)net_dev->priv;
88} 105}
89 106
90static inline struct net_device *list_entry_brdev(const struct list_head *le) 107static inline struct net_device *list_entry_brdev(const struct list_head *le)
@@ -94,7 +111,7 @@ static inline struct net_device *list_entry_brdev(const struct list_head *le)
94 111
95static inline struct br2684_vcc *BR2684_VCC(const struct atm_vcc *atmvcc) 112static inline struct br2684_vcc *BR2684_VCC(const struct atm_vcc *atmvcc)
96{ 113{
97 return (struct br2684_vcc *) (atmvcc->user_back); 114 return (struct br2684_vcc *)(atmvcc->user_back);
98} 115}
99 116
100static inline struct br2684_vcc *list_entry_brvcc(const struct list_head *le) 117static inline struct br2684_vcc *list_entry_brvcc(const struct list_head *le)
@@ -132,10 +149,11 @@ static struct net_device *br2684_find_dev(const struct br2684_if_spec *s)
132 * otherwise false 149 * otherwise false
133 */ 150 */
134static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev, 151static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
135 struct br2684_vcc *brvcc) 152 struct br2684_vcc *brvcc)
136{ 153{
137 struct atm_vcc *atmvcc; 154 struct atm_vcc *atmvcc;
138 int minheadroom = (brvcc->encaps == e_llc) ? 10 : 2; 155 int minheadroom = (brvcc->encaps == e_llc) ? 10 : 2;
156
139 if (skb_headroom(skb) < minheadroom) { 157 if (skb_headroom(skb) < minheadroom) {
140 struct sk_buff *skb2 = skb_realloc_headroom(skb, minheadroom); 158 struct sk_buff *skb2 = skb_realloc_headroom(skb, minheadroom);
141 brvcc->copies_needed++; 159 brvcc->copies_needed++;
@@ -146,23 +164,48 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
146 } 164 }
147 skb = skb2; 165 skb = skb2;
148 } 166 }
149 skb_push(skb, minheadroom); 167
150 if (brvcc->encaps == e_llc) 168 if (brvcc->encaps == e_llc) {
151 skb_copy_to_linear_data(skb, llc_oui_pid_pad, 10); 169 if (brdev->payload == p_bridged) {
152 else 170 skb_push(skb, sizeof(llc_oui_pid_pad));
153 memset(skb->data, 0, 2); 171 skb_copy_to_linear_data(skb, llc_oui_pid_pad,
172 sizeof(llc_oui_pid_pad));
173 } else if (brdev->payload == p_routed) {
174 unsigned short prot = ntohs(skb->protocol);
175
176 skb_push(skb, sizeof(llc_oui_ipv4));
177 switch (prot) {
178 case ETH_P_IP:
179 skb_copy_to_linear_data(skb, llc_oui_ipv4,
180 sizeof(llc_oui_ipv4));
181 break;
182 case ETH_P_IPV6:
183 skb_copy_to_linear_data(skb, llc_oui_ipv6,
184 sizeof(llc_oui_ipv6));
185 break;
186 default:
187 dev_kfree_skb(skb);
188 return 0;
189 }
190 }
191 } else {
192 skb_push(skb, 2);
193 if (brdev->payload == p_bridged)
194 memset(skb->data, 0, 2);
195 }
154 skb_debug(skb); 196 skb_debug(skb);
155 197
156 ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc; 198 ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc;
157 pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev); 199 pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev);
158 if (!atm_may_send(atmvcc, skb->truesize)) { 200 if (!atm_may_send(atmvcc, skb->truesize)) {
159 /* we free this here for now, because we cannot know in a higher 201 /*
160 layer whether the skb point it supplied wasn't freed yet. 202 * We free this here for now, because we cannot know in a higher
161 now, it always is. 203 * layer whether the skb pointer it supplied wasn't freed yet.
162 */ 204 * Now, it always is.
205 */
163 dev_kfree_skb(skb); 206 dev_kfree_skb(skb);
164 return 0; 207 return 0;
165 } 208 }
166 atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc); 209 atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc);
167 ATM_SKB(skb)->atm_options = atmvcc->atm_options; 210 ATM_SKB(skb)->atm_options = atmvcc->atm_options;
168 brdev->stats.tx_packets++; 211 brdev->stats.tx_packets++;
@@ -172,10 +215,9 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
172} 215}
173 216
174static inline struct br2684_vcc *pick_outgoing_vcc(struct sk_buff *skb, 217static inline struct br2684_vcc *pick_outgoing_vcc(struct sk_buff *skb,
175 struct br2684_dev *brdev) 218 struct br2684_dev *brdev)
176{ 219{
177 return list_empty(&brdev->brvccs) ? NULL : 220 return list_empty(&brdev->brvccs) ? NULL : list_entry_brvcc(brdev->brvccs.next); /* 1 vcc/dev right now */
178 list_entry_brvcc(brdev->brvccs.next); /* 1 vcc/dev right now */
179} 221}
180 222
181static int br2684_start_xmit(struct sk_buff *skb, struct net_device *dev) 223static int br2684_start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -199,11 +241,10 @@ static int br2684_start_xmit(struct sk_buff *skb, struct net_device *dev)
199 /* 241 /*
200 * We should probably use netif_*_queue() here, but that 242 * We should probably use netif_*_queue() here, but that
201 * involves added complication. We need to walk before 243 * involves added complication. We need to walk before
202 * we can run 244 * we can run.
245 *
246 * Don't free here! this pointer might be no longer valid!
203 */ 247 */
204 /* don't free here! this pointer might be no longer valid!
205 dev_kfree_skb(skb);
206 */
207 brdev->stats.tx_errors++; 248 brdev->stats.tx_errors++;
208 brdev->stats.tx_fifo_errors++; 249 brdev->stats.tx_fifo_errors++;
209 } 250 }
@@ -217,12 +258,11 @@ static struct net_device_stats *br2684_get_stats(struct net_device *dev)
217 return &BRPRIV(dev)->stats; 258 return &BRPRIV(dev)->stats;
218} 259}
219 260
220
221/* 261/*
222 * We remember when the MAC gets set, so we don't override it later with 262 * We remember when the MAC gets set, so we don't override it later with
223 * the ESI of the ATM card of the first VC 263 * the ESI of the ATM card of the first VC
224 */ 264 */
225static int (*my_eth_mac_addr)(struct net_device *, void *); 265static int (*my_eth_mac_addr) (struct net_device *, void *);
226static int br2684_mac_addr(struct net_device *dev, void *p) 266static int br2684_mac_addr(struct net_device *dev, void *p)
227{ 267{
228 int err = my_eth_mac_addr(dev, p); 268 int err = my_eth_mac_addr(dev, p);
@@ -233,7 +273,7 @@ static int br2684_mac_addr(struct net_device *dev, void *p)
233 273
234#ifdef CONFIG_ATM_BR2684_IPFILTER 274#ifdef CONFIG_ATM_BR2684_IPFILTER
235/* this IOCTL is experimental. */ 275/* this IOCTL is experimental. */
236static int br2684_setfilt(struct atm_vcc *atmvcc, void __user *arg) 276static int br2684_setfilt(struct atm_vcc *atmvcc, void __user * arg)
237{ 277{
238 struct br2684_vcc *brvcc; 278 struct br2684_vcc *brvcc;
239 struct br2684_filter_set fs; 279 struct br2684_filter_set fs;
@@ -243,13 +283,12 @@ static int br2684_setfilt(struct atm_vcc *atmvcc, void __user *arg)
243 if (fs.ifspec.method != BR2684_FIND_BYNOTHING) { 283 if (fs.ifspec.method != BR2684_FIND_BYNOTHING) {
244 /* 284 /*
245 * This is really a per-vcc thing, but we can also search 285 * This is really a per-vcc thing, but we can also search
246 * by device 286 * by device.
247 */ 287 */
248 struct br2684_dev *brdev; 288 struct br2684_dev *brdev;
249 read_lock(&devs_lock); 289 read_lock(&devs_lock);
250 brdev = BRPRIV(br2684_find_dev(&fs.ifspec)); 290 brdev = BRPRIV(br2684_find_dev(&fs.ifspec));
251 if (brdev == NULL || list_empty(&brdev->brvccs) || 291 if (brdev == NULL || list_empty(&brdev->brvccs) || brdev->brvccs.next != brdev->brvccs.prev) /* >1 VCC */
252 brdev->brvccs.next != brdev->brvccs.prev) /* >1 VCC */
253 brvcc = NULL; 292 brvcc = NULL;
254 else 293 else
255 brvcc = list_entry_brvcc(brdev->brvccs.next); 294 brvcc = list_entry_brvcc(brdev->brvccs.next);
@@ -267,15 +306,16 @@ static inline int
267packet_fails_filter(__be16 type, struct br2684_vcc *brvcc, struct sk_buff *skb) 306packet_fails_filter(__be16 type, struct br2684_vcc *brvcc, struct sk_buff *skb)
268{ 307{
269 if (brvcc->filter.netmask == 0) 308 if (brvcc->filter.netmask == 0)
270 return 0; /* no filter in place */ 309 return 0; /* no filter in place */
271 if (type == htons(ETH_P_IP) && 310 if (type == htons(ETH_P_IP) &&
272 (((struct iphdr *) (skb->data))->daddr & brvcc->filter. 311 (((struct iphdr *)(skb->data))->daddr & brvcc->filter.
273 netmask) == brvcc->filter.prefix) 312 netmask) == brvcc->filter.prefix)
274 return 0; 313 return 0;
275 if (type == htons(ETH_P_ARP)) 314 if (type == htons(ETH_P_ARP))
276 return 0; 315 return 0;
277 /* TODO: we should probably filter ARPs too.. don't want to have 316 /*
278 * them returning values that don't make sense, or is that ok? 317 * TODO: we should probably filter ARPs too.. don't want to have
318 * them returning values that don't make sense, or is that ok?
279 */ 319 */
280 return 1; /* drop */ 320 return 1; /* drop */
281} 321}
@@ -299,7 +339,6 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
299 struct br2684_vcc *brvcc = BR2684_VCC(atmvcc); 339 struct br2684_vcc *brvcc = BR2684_VCC(atmvcc);
300 struct net_device *net_dev = brvcc->device; 340 struct net_device *net_dev = brvcc->device;
301 struct br2684_dev *brdev = BRPRIV(net_dev); 341 struct br2684_dev *brdev = BRPRIV(net_dev);
302 int plen = sizeof(llc_oui_pid_pad) + ETH_HLEN;
303 342
304 pr_debug("br2684_push\n"); 343 pr_debug("br2684_push\n");
305 344
@@ -320,35 +359,58 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
320 atm_return(atmvcc, skb->truesize); 359 atm_return(atmvcc, skb->truesize);
321 pr_debug("skb from brdev %p\n", brdev); 360 pr_debug("skb from brdev %p\n", brdev);
322 if (brvcc->encaps == e_llc) { 361 if (brvcc->encaps == e_llc) {
323 /* let us waste some time for checking the encapsulation. 362
324 Note, that only 7 char is checked so frames with a valid FCS 363 if (skb->len > 7 && skb->data[7] == 0x01)
325 are also accepted (but FCS is not checked of course) */ 364 __skb_trim(skb, skb->len - 4);
326 if (memcmp(skb->data, llc_oui_pid_pad, 7)) { 365
366 /* accept packets that have "ipv[46]" in the snap header */
367 if ((skb->len >= (sizeof(llc_oui_ipv4)))
368 &&
369 (memcmp
370 (skb->data, llc_oui_ipv4,
371 sizeof(llc_oui_ipv4) - BR2684_ETHERTYPE_LEN) == 0)) {
372 if (memcmp
373 (skb->data + 6, ethertype_ipv6,
374 sizeof(ethertype_ipv6)) == 0)
375 skb->protocol = __constant_htons(ETH_P_IPV6);
376 else if (memcmp
377 (skb->data + 6, ethertype_ipv4,
378 sizeof(ethertype_ipv4)) == 0)
379 skb->protocol = __constant_htons(ETH_P_IP);
380 else {
381 brdev->stats.rx_errors++;
382 dev_kfree_skb(skb);
383 return;
384 }
385 skb_pull(skb, sizeof(llc_oui_ipv4));
386 skb_reset_network_header(skb);
387 skb->pkt_type = PACKET_HOST;
388 /*
389 * Let us waste some time for checking the encapsulation.
390 * Note, that only 7 char is checked so frames with a valid FCS
391 * are also accepted (but FCS is not checked of course).
392 */
393 } else if ((skb->len >= sizeof(llc_oui_pid_pad)) &&
394 (memcmp(skb->data, llc_oui_pid_pad, 7) == 0)) {
395 skb_pull(skb, sizeof(llc_oui_pid_pad));
396 skb->protocol = eth_type_trans(skb, net_dev);
397 } else {
327 brdev->stats.rx_errors++; 398 brdev->stats.rx_errors++;
328 dev_kfree_skb(skb); 399 dev_kfree_skb(skb);
329 return; 400 return;
330 } 401 }
331 402
332 /* Strip FCS if present */
333 if (skb->len > 7 && skb->data[7] == 0x01)
334 __skb_trim(skb, skb->len - 4);
335 } else { 403 } else {
336 plen = PADLEN + ETH_HLEN; /* pad, dstmac,srcmac, ethtype */
337 /* first 2 chars should be 0 */ 404 /* first 2 chars should be 0 */
338 if (*((u16 *) (skb->data)) != 0) { 405 if (*((u16 *) (skb->data)) != 0) {
339 brdev->stats.rx_errors++; 406 brdev->stats.rx_errors++;
340 dev_kfree_skb(skb); 407 dev_kfree_skb(skb);
341 return; 408 return;
342 } 409 }
343 } 410 skb_pull(skb, BR2684_PAD_LEN + ETH_HLEN); /* pad, dstmac, srcmac, ethtype */
344 if (skb->len < plen) { 411 skb->protocol = eth_type_trans(skb, net_dev);
345 brdev->stats.rx_errors++;
346 dev_kfree_skb(skb); /* dev_ not needed? */
347 return;
348 } 412 }
349 413
350 skb_pull(skb, plen - ETH_HLEN);
351 skb->protocol = eth_type_trans(skb, net_dev);
352#ifdef CONFIG_ATM_BR2684_IPFILTER 414#ifdef CONFIG_ATM_BR2684_IPFILTER
353 if (unlikely(packet_fails_filter(skb->protocol, brvcc, skb))) { 415 if (unlikely(packet_fails_filter(skb->protocol, brvcc, skb))) {
354 brdev->stats.rx_dropped++; 416 brdev->stats.rx_dropped++;
@@ -372,11 +434,12 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
372 netif_rx(skb); 434 netif_rx(skb);
373} 435}
374 436
375static int br2684_regvcc(struct atm_vcc *atmvcc, void __user *arg) 437/*
438 * Assign a vcc to a dev
439 * Note: we do not have explicit unassign, but look at _push()
440 */
441static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
376{ 442{
377/* assign a vcc to a dev
378Note: we do not have explicit unassign, but look at _push()
379*/
380 int err; 443 int err;
381 struct br2684_vcc *brvcc; 444 struct br2684_vcc *brvcc;
382 struct sk_buff *skb; 445 struct sk_buff *skb;
@@ -395,7 +458,7 @@ Note: we do not have explicit unassign, but look at _push()
395 net_dev = br2684_find_dev(&be.ifspec); 458 net_dev = br2684_find_dev(&be.ifspec);
396 if (net_dev == NULL) { 459 if (net_dev == NULL) {
397 printk(KERN_ERR 460 printk(KERN_ERR
398 "br2684: tried to attach to non-existant device\n"); 461 "br2684: tried to attach to non-existant device\n");
399 err = -ENXIO; 462 err = -ENXIO;
400 goto error; 463 goto error;
401 } 464 }
@@ -411,13 +474,15 @@ Note: we do not have explicit unassign, but look at _push()
411 } 474 }
412 if (be.fcs_in != BR2684_FCSIN_NO || be.fcs_out != BR2684_FCSOUT_NO || 475 if (be.fcs_in != BR2684_FCSIN_NO || be.fcs_out != BR2684_FCSOUT_NO ||
413 be.fcs_auto || be.has_vpiid || be.send_padding || (be.encaps != 476 be.fcs_auto || be.has_vpiid || be.send_padding || (be.encaps !=
414 BR2684_ENCAPS_VC && be.encaps != BR2684_ENCAPS_LLC) || 477 BR2684_ENCAPS_VC
415 be.min_size != 0) { 478 && be.encaps !=
479 BR2684_ENCAPS_LLC)
480 || be.min_size != 0) {
416 err = -EINVAL; 481 err = -EINVAL;
417 goto error; 482 goto error;
418 } 483 }
419 pr_debug("br2684_regvcc vcc=%p, encaps=%d, brvcc=%p\n", atmvcc, be.encaps, 484 pr_debug("br2684_regvcc vcc=%p, encaps=%d, brvcc=%p\n", atmvcc,
420 brvcc); 485 be.encaps, brvcc);
421 if (list_empty(&brdev->brvccs) && !brdev->mac_was_set) { 486 if (list_empty(&brdev->brvccs) && !brdev->mac_was_set) {
422 unsigned char *esi = atmvcc->dev->esi; 487 unsigned char *esi = atmvcc->dev->esi;
423 if (esi[0] | esi[1] | esi[2] | esi[3] | esi[4] | esi[5]) 488 if (esi[0] | esi[1] | esi[2] | esi[3] | esi[4] | esi[5])
@@ -430,7 +495,7 @@ Note: we do not have explicit unassign, but look at _push()
430 brvcc->device = net_dev; 495 brvcc->device = net_dev;
431 brvcc->atmvcc = atmvcc; 496 brvcc->atmvcc = atmvcc;
432 atmvcc->user_back = brvcc; 497 atmvcc->user_back = brvcc;
433 brvcc->encaps = (enum br2684_encaps) be.encaps; 498 brvcc->encaps = (enum br2684_encaps)be.encaps;
434 brvcc->old_push = atmvcc->push; 499 brvcc->old_push = atmvcc->push;
435 barrier(); 500 barrier();
436 atmvcc->push = br2684_push; 501 atmvcc->push = br2684_push;
@@ -461,7 +526,7 @@ Note: we do not have explicit unassign, but look at _push()
461 } 526 }
462 __module_get(THIS_MODULE); 527 __module_get(THIS_MODULE);
463 return 0; 528 return 0;
464 error: 529 error:
465 write_unlock_irq(&devs_lock); 530 write_unlock_irq(&devs_lock);
466 kfree(brvcc); 531 kfree(brvcc);
467 return err; 532 return err;
@@ -482,25 +547,52 @@ static void br2684_setup(struct net_device *netdev)
482 INIT_LIST_HEAD(&brdev->brvccs); 547 INIT_LIST_HEAD(&brdev->brvccs);
483} 548}
484 549
485static int br2684_create(void __user *arg) 550static void br2684_setup_routed(struct net_device *netdev)
551{
552 struct br2684_dev *brdev = BRPRIV(netdev);
553 brdev->net_dev = netdev;
554
555 netdev->hard_header_len = 0;
556 my_eth_mac_addr = netdev->set_mac_address;
557 netdev->set_mac_address = br2684_mac_addr;
558 netdev->hard_start_xmit = br2684_start_xmit;
559 netdev->get_stats = br2684_get_stats;
560 netdev->addr_len = 0;
561 netdev->mtu = 1500;
562 netdev->type = ARPHRD_PPP;
563 netdev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
564 netdev->tx_queue_len = 100;
565 INIT_LIST_HEAD(&brdev->brvccs);
566}
567
568static int br2684_create(void __user * arg)
486{ 569{
487 int err; 570 int err;
488 struct net_device *netdev; 571 struct net_device *netdev;
489 struct br2684_dev *brdev; 572 struct br2684_dev *brdev;
490 struct atm_newif_br2684 ni; 573 struct atm_newif_br2684 ni;
574 enum br2684_payload payload;
491 575
492 pr_debug("br2684_create\n"); 576 pr_debug("br2684_create\n");
493 577
494 if (copy_from_user(&ni, arg, sizeof ni)) { 578 if (copy_from_user(&ni, arg, sizeof ni)) {
495 return -EFAULT; 579 return -EFAULT;
496 } 580 }
581
582 if (ni.media & BR2684_FLAG_ROUTED)
583 payload = p_routed;
584 else
585 payload = p_bridged;
586 ni.media &= 0xffff; /* strip flags */
587
497 if (ni.media != BR2684_MEDIA_ETHERNET || ni.mtu != 1500) { 588 if (ni.media != BR2684_MEDIA_ETHERNET || ni.mtu != 1500) {
498 return -EINVAL; 589 return -EINVAL;
499 } 590 }
500 591
501 netdev = alloc_netdev(sizeof(struct br2684_dev), 592 netdev = alloc_netdev(sizeof(struct br2684_dev),
502 ni.ifname[0] ? ni.ifname : "nas%d", 593 ni.ifname[0] ? ni.ifname : "nas%d",
503 br2684_setup); 594 (payload == p_routed) ?
595 br2684_setup_routed : br2684_setup);
504 if (!netdev) 596 if (!netdev)
505 return -ENOMEM; 597 return -ENOMEM;
506 598
@@ -516,6 +608,7 @@ static int br2684_create(void __user *arg)
516 } 608 }
517 609
518 write_lock_irq(&devs_lock); 610 write_lock_irq(&devs_lock);
611 brdev->payload = payload;
519 brdev->number = list_empty(&br2684_devs) ? 1 : 612 brdev->number = list_empty(&br2684_devs) ? 1 :
520 BRPRIV(list_entry_brdev(br2684_devs.prev))->number + 1; 613 BRPRIV(list_entry_brdev(br2684_devs.prev))->number + 1;
521 list_add_tail(&brdev->br2684_devs, &br2684_devs); 614 list_add_tail(&brdev->br2684_devs, &br2684_devs);
@@ -528,16 +621,16 @@ static int br2684_create(void __user *arg)
528 * -ENOIOCTLCMD for any unrecognized ioctl 621 * -ENOIOCTLCMD for any unrecognized ioctl
529 */ 622 */
530static int br2684_ioctl(struct socket *sock, unsigned int cmd, 623static int br2684_ioctl(struct socket *sock, unsigned int cmd,
531 unsigned long arg) 624 unsigned long arg)
532{ 625{
533 struct atm_vcc *atmvcc = ATM_SD(sock); 626 struct atm_vcc *atmvcc = ATM_SD(sock);
534 void __user *argp = (void __user *)arg; 627 void __user *argp = (void __user *)arg;
628 atm_backend_t b;
535 629
536 int err; 630 int err;
537 switch(cmd) { 631 switch (cmd) {
538 case ATM_SETBACKEND: 632 case ATM_SETBACKEND:
539 case ATM_NEWBACKENDIF: { 633 case ATM_NEWBACKENDIF:
540 atm_backend_t b;
541 err = get_user(b, (atm_backend_t __user *) argp); 634 err = get_user(b, (atm_backend_t __user *) argp);
542 if (err) 635 if (err)
543 return -EFAULT; 636 return -EFAULT;
@@ -549,7 +642,6 @@ static int br2684_ioctl(struct socket *sock, unsigned int cmd,
549 return br2684_regvcc(atmvcc, argp); 642 return br2684_regvcc(atmvcc, argp);
550 else 643 else
551 return br2684_create(argp); 644 return br2684_create(argp);
552 }
553#ifdef CONFIG_ATM_BR2684_IPFILTER 645#ifdef CONFIG_ATM_BR2684_IPFILTER
554 case BR2684_SETFILT: 646 case BR2684_SETFILT:
555 if (atmvcc->push != br2684_push) 647 if (atmvcc->push != br2684_push)
@@ -557,6 +649,7 @@ static int br2684_ioctl(struct socket *sock, unsigned int cmd,
557 if (!capable(CAP_NET_ADMIN)) 649 if (!capable(CAP_NET_ADMIN))
558 return -EPERM; 650 return -EPERM;
559 err = br2684_setfilt(atmvcc, argp); 651 err = br2684_setfilt(atmvcc, argp);
652
560 return err; 653 return err;
561#endif /* CONFIG_ATM_BR2684_IPFILTER */ 654#endif /* CONFIG_ATM_BR2684_IPFILTER */
562 } 655 }
@@ -564,24 +657,25 @@ static int br2684_ioctl(struct socket *sock, unsigned int cmd,
564} 657}
565 658
566static struct atm_ioctl br2684_ioctl_ops = { 659static struct atm_ioctl br2684_ioctl_ops = {
567 .owner = THIS_MODULE, 660 .owner = THIS_MODULE,
568 .ioctl = br2684_ioctl, 661 .ioctl = br2684_ioctl,
569}; 662};
570 663
571
572#ifdef CONFIG_PROC_FS 664#ifdef CONFIG_PROC_FS
573static void *br2684_seq_start(struct seq_file *seq, loff_t *pos) 665static void *br2684_seq_start(struct seq_file *seq, loff_t * pos)
666 __acquires(devs_lock)
574{ 667{
575 read_lock(&devs_lock); 668 read_lock(&devs_lock);
576 return seq_list_start(&br2684_devs, *pos); 669 return seq_list_start(&br2684_devs, *pos);
577} 670}
578 671
579static void *br2684_seq_next(struct seq_file *seq, void *v, loff_t *pos) 672static void *br2684_seq_next(struct seq_file *seq, void *v, loff_t * pos)
580{ 673{
581 return seq_list_next(v, &br2684_devs, pos); 674 return seq_list_next(v, &br2684_devs, pos);
582} 675}
583 676
584static void br2684_seq_stop(struct seq_file *seq, void *v) 677static void br2684_seq_stop(struct seq_file *seq, void *v)
678 __releases(devs_lock)
585{ 679{
586 read_unlock(&devs_lock); 680 read_unlock(&devs_lock);
587} 681}
@@ -589,7 +683,7 @@ static void br2684_seq_stop(struct seq_file *seq, void *v)
589static int br2684_seq_show(struct seq_file *seq, void *v) 683static int br2684_seq_show(struct seq_file *seq, void *v)
590{ 684{
591 const struct br2684_dev *brdev = list_entry(v, struct br2684_dev, 685 const struct br2684_dev *brdev = list_entry(v, struct br2684_dev,
592 br2684_devs); 686 br2684_devs);
593 const struct net_device *net_dev = brdev->net_dev; 687 const struct net_device *net_dev = brdev->net_dev;
594 const struct br2684_vcc *brvcc; 688 const struct br2684_vcc *brvcc;
595 DECLARE_MAC_BUF(mac); 689 DECLARE_MAC_BUF(mac);
@@ -601,21 +695,19 @@ static int br2684_seq_show(struct seq_file *seq, void *v)
601 brdev->mac_was_set ? "set" : "auto"); 695 brdev->mac_was_set ? "set" : "auto");
602 696
603 list_for_each_entry(brvcc, &brdev->brvccs, brvccs) { 697 list_for_each_entry(brvcc, &brdev->brvccs, brvccs) {
604 seq_printf(seq, " vcc %d.%d.%d: encaps=%s" 698 seq_printf(seq, " vcc %d.%d.%d: encaps=%s payload=%s"
605 ", failed copies %u/%u" 699 ", failed copies %u/%u"
606 "\n", brvcc->atmvcc->dev->number, 700 "\n", brvcc->atmvcc->dev->number,
607 brvcc->atmvcc->vpi, brvcc->atmvcc->vci, 701 brvcc->atmvcc->vpi, brvcc->atmvcc->vci,
608 (brvcc->encaps == e_llc) ? "LLC" : "VC" 702 (brvcc->encaps == e_llc) ? "LLC" : "VC",
609 , brvcc->copies_failed 703 (brdev->payload == p_bridged) ? "bridged" : "routed",
610 , brvcc->copies_needed 704 brvcc->copies_failed, brvcc->copies_needed);
611 );
612#ifdef CONFIG_ATM_BR2684_IPFILTER 705#ifdef CONFIG_ATM_BR2684_IPFILTER
613#define b1(var, byte) ((u8 *) &brvcc->filter.var)[byte] 706#define b1(var, byte) ((u8 *) &brvcc->filter.var)[byte]
614#define bs(var) b1(var, 0), b1(var, 1), b1(var, 2), b1(var, 3) 707#define bs(var) b1(var, 0), b1(var, 1), b1(var, 2), b1(var, 3)
615 if (brvcc->filter.netmask != 0) 708 if (brvcc->filter.netmask != 0)
616 seq_printf(seq, " filter=%d.%d.%d.%d/" 709 seq_printf(seq, " filter=%d.%d.%d.%d/"
617 "%d.%d.%d.%d\n", 710 "%d.%d.%d.%d\n", bs(prefix), bs(netmask));
618 bs(prefix), bs(netmask));
619#undef bs 711#undef bs
620#undef b1 712#undef b1
621#endif /* CONFIG_ATM_BR2684_IPFILTER */ 713#endif /* CONFIG_ATM_BR2684_IPFILTER */
@@ -625,9 +717,9 @@ static int br2684_seq_show(struct seq_file *seq, void *v)
625 717
626static const struct seq_operations br2684_seq_ops = { 718static const struct seq_operations br2684_seq_ops = {
627 .start = br2684_seq_start, 719 .start = br2684_seq_start,
628 .next = br2684_seq_next, 720 .next = br2684_seq_next,
629 .stop = br2684_seq_stop, 721 .stop = br2684_seq_stop,
630 .show = br2684_seq_show, 722 .show = br2684_seq_show,
631}; 723};
632 724
633static int br2684_proc_open(struct inode *inode, struct file *file) 725static int br2684_proc_open(struct inode *inode, struct file *file)
@@ -636,15 +728,15 @@ static int br2684_proc_open(struct inode *inode, struct file *file)
636} 728}
637 729
638static const struct file_operations br2684_proc_ops = { 730static const struct file_operations br2684_proc_ops = {
639 .owner = THIS_MODULE, 731 .owner = THIS_MODULE,
640 .open = br2684_proc_open, 732 .open = br2684_proc_open,
641 .read = seq_read, 733 .read = seq_read,
642 .llseek = seq_lseek, 734 .llseek = seq_lseek,
643 .release = seq_release, 735 .release = seq_release,
644}; 736};
645 737
646extern struct proc_dir_entry *atm_proc_root; /* from proc.c */ 738extern struct proc_dir_entry *atm_proc_root; /* from proc.c */
647#endif 739#endif /* CONFIG_PROC_FS */
648 740
649static int __init br2684_init(void) 741static int __init br2684_init(void)
650{ 742{
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 741742f00797..86b885ec1cbd 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -285,7 +285,7 @@ static int clip_constructor(struct neighbour *neigh)
285 struct neigh_parms *parms; 285 struct neigh_parms *parms;
286 286
287 pr_debug("clip_constructor (neigh %p, entry %p)\n", neigh, entry); 287 pr_debug("clip_constructor (neigh %p, entry %p)\n", neigh, entry);
288 neigh->type = inet_addr_type(entry->ip); 288 neigh->type = inet_addr_type(&init_net, entry->ip);
289 if (neigh->type != RTN_UNICAST) 289 if (neigh->type != RTN_UNICAST)
290 return -EINVAL; 290 return -EINVAL;
291 291
@@ -534,7 +534,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
534 unlink_clip_vcc(clip_vcc); 534 unlink_clip_vcc(clip_vcc);
535 return 0; 535 return 0;
536 } 536 }
537 error = ip_route_output_key(&rt, &fl); 537 error = ip_route_output_key(&init_net, &rt, &fl);
538 if (error) 538 if (error)
539 return error; 539 return error;
540 neigh = __neigh_lookup(&clip_tbl, &ip, rt->u.dst.dev, 1); 540 neigh = __neigh_lookup(&clip_tbl, &ip, rt->u.dst.dev, 1);
@@ -903,6 +903,8 @@ static void *clip_seq_sub_iter(struct neigh_seq_state *_state,
903 903
904static void *clip_seq_start(struct seq_file *seq, loff_t * pos) 904static void *clip_seq_start(struct seq_file *seq, loff_t * pos)
905{ 905{
906 struct clip_seq_state *state = seq->private;
907 state->ns.neigh_sub_iter = clip_seq_sub_iter;
906 return neigh_seq_start(seq, pos, &clip_tbl, NEIGH_SEQ_NEIGH_ONLY); 908 return neigh_seq_start(seq, pos, &clip_tbl, NEIGH_SEQ_NEIGH_ONLY);
907} 909}
908 910
@@ -932,36 +934,15 @@ static const struct seq_operations arp_seq_ops = {
932 934
933static int arp_seq_open(struct inode *inode, struct file *file) 935static int arp_seq_open(struct inode *inode, struct file *file)
934{ 936{
935 struct clip_seq_state *state; 937 return seq_open_net(inode, file, &arp_seq_ops,
936 struct seq_file *seq; 938 sizeof(struct clip_seq_state));
937 int rc = -EAGAIN;
938
939 state = kzalloc(sizeof(*state), GFP_KERNEL);
940 if (!state) {
941 rc = -ENOMEM;
942 goto out_kfree;
943 }
944 state->ns.neigh_sub_iter = clip_seq_sub_iter;
945
946 rc = seq_open(file, &arp_seq_ops);
947 if (rc)
948 goto out_kfree;
949
950 seq = file->private_data;
951 seq->private = state;
952out:
953 return rc;
954
955out_kfree:
956 kfree(state);
957 goto out;
958} 939}
959 940
960static const struct file_operations arp_seq_fops = { 941static const struct file_operations arp_seq_fops = {
961 .open = arp_seq_open, 942 .open = arp_seq_open,
962 .read = seq_read, 943 .read = seq_read,
963 .llseek = seq_lseek, 944 .llseek = seq_lseek,
964 .release = seq_release_private, 945 .release = seq_release_net,
965 .owner = THIS_MODULE 946 .owner = THIS_MODULE
966}; 947};
967#endif 948#endif
diff --git a/net/atm/common.c b/net/atm/common.c
index eba09a04f6bf..c865517ba449 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -113,7 +113,7 @@ static void vcc_write_space(struct sock *sk)
113 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 113 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
114 wake_up_interruptible(sk->sk_sleep); 114 wake_up_interruptible(sk->sk_sleep);
115 115
116 sk_wake_async(sk, 2, POLL_OUT); 116 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
117 } 117 }
118 118
119 read_unlock(&sk->sk_callback_lock); 119 read_unlock(&sk->sk_callback_lock);
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 7eb1b21a0e94..1a8c4c6c0cd0 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -176,7 +176,7 @@ static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev)
176static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc) 176static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc)
177{ 177{
178 struct trh_hdr *trh; 178 struct trh_hdr *trh;
179 int riflen, num_rdsc; 179 unsigned int riflen, num_rdsc;
180 180
181 trh = (struct trh_hdr *)packet; 181 trh = (struct trh_hdr *)packet;
182 if (trh->daddr[0] & (uint8_t) 0x80) 182 if (trh->daddr[0] & (uint8_t) 0x80)
@@ -1789,9 +1789,8 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv,
1789 } 1789 }
1790 memcpy(to_return->mac_addr, mac_addr, ETH_ALEN); 1790 memcpy(to_return->mac_addr, mac_addr, ETH_ALEN);
1791 INIT_HLIST_NODE(&to_return->next); 1791 INIT_HLIST_NODE(&to_return->next);
1792 init_timer(&to_return->timer); 1792 setup_timer(&to_return->timer, lec_arp_expire_arp,
1793 to_return->timer.function = lec_arp_expire_arp; 1793 (unsigned long)to_return);
1794 to_return->timer.data = (unsigned long)to_return;
1795 to_return->last_used = jiffies; 1794 to_return->last_used = jiffies;
1796 to_return->priv = priv; 1795 to_return->priv = priv;
1797 skb_queue_head_init(&to_return->tx_wait); 1796 skb_queue_head_init(&to_return->tx_wait);
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 5d9d5ffba145..49125110bb8b 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -142,6 +142,7 @@ static int vcc_seq_release(struct inode *inode, struct file *file)
142} 142}
143 143
144static void *vcc_seq_start(struct seq_file *seq, loff_t *pos) 144static void *vcc_seq_start(struct seq_file *seq, loff_t *pos)
145 __acquires(vcc_sklist_lock)
145{ 146{
146 struct vcc_state *state = seq->private; 147 struct vcc_state *state = seq->private;
147 loff_t left = *pos; 148 loff_t left = *pos;
@@ -152,6 +153,7 @@ static void *vcc_seq_start(struct seq_file *seq, loff_t *pos)
152} 153}
153 154
154static void vcc_seq_stop(struct seq_file *seq, void *v) 155static void vcc_seq_stop(struct seq_file *seq, void *v)
156 __releases(vcc_sklist_lock)
155{ 157{
156 read_unlock(&vcc_sklist_lock); 158 read_unlock(&vcc_sklist_lock);
157} 159}
@@ -476,7 +478,7 @@ static void atm_proc_dirs_remove(void)
476 if (e->dirent) 478 if (e->dirent)
477 remove_proc_entry(e->name, atm_proc_root); 479 remove_proc_entry(e->name, atm_proc_root);
478 } 480 }
479 remove_proc_entry("atm", init_net.proc_net); 481 proc_net_remove(&init_net, "atm");
480} 482}
481 483
482int __init atm_proc_init(void) 484int __init atm_proc_init(void)
@@ -484,7 +486,7 @@ int __init atm_proc_init(void)
484 static struct atm_proc_entry *e; 486 static struct atm_proc_entry *e;
485 int ret; 487 int ret;
486 488
487 atm_proc_root = proc_mkdir("atm", init_net.proc_net); 489 atm_proc_root = proc_net_mkdir(&init_net, "atm", init_net.proc_net);
488 if (!atm_proc_root) 490 if (!atm_proc_root)
489 goto err_out; 491 goto err_out;
490 for (e = atm_proc_ents; e->name; e++) { 492 for (e = atm_proc_ents; e->name; e++) {
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index b4725ff317c0..1bc0e85f04a5 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -330,10 +330,9 @@ void ax25_destroy_socket(ax25_cb *ax25)
330 if (atomic_read(&ax25->sk->sk_wmem_alloc) || 330 if (atomic_read(&ax25->sk->sk_wmem_alloc) ||
331 atomic_read(&ax25->sk->sk_rmem_alloc)) { 331 atomic_read(&ax25->sk->sk_rmem_alloc)) {
332 /* Defer: outstanding buffers */ 332 /* Defer: outstanding buffers */
333 init_timer(&ax25->dtimer); 333 setup_timer(&ax25->dtimer, ax25_destroy_timer,
334 (unsigned long)ax25);
334 ax25->dtimer.expires = jiffies + 2 * HZ; 335 ax25->dtimer.expires = jiffies + 2 * HZ;
335 ax25->dtimer.function = ax25_destroy_timer;
336 ax25->dtimer.data = (unsigned long)ax25;
337 add_timer(&ax25->dtimer); 336 add_timer(&ax25->dtimer);
338 } else { 337 } else {
339 struct sock *sk=ax25->sk; 338 struct sock *sk=ax25->sk;
@@ -571,7 +570,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
571 res = -EINVAL; 570 res = -EINVAL;
572 break; 571 break;
573 } 572 }
574 ax25->rtt = (opt * HZ) / 2; 573 ax25->rtt = (opt * HZ) >> 1;
575 ax25->t1 = opt * HZ; 574 ax25->t1 = opt * HZ;
576 break; 575 break;
577 576
@@ -1864,6 +1863,7 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1864#ifdef CONFIG_PROC_FS 1863#ifdef CONFIG_PROC_FS
1865 1864
1866static void *ax25_info_start(struct seq_file *seq, loff_t *pos) 1865static void *ax25_info_start(struct seq_file *seq, loff_t *pos)
1866 __acquires(ax25_list_lock)
1867{ 1867{
1868 struct ax25_cb *ax25; 1868 struct ax25_cb *ax25;
1869 struct hlist_node *node; 1869 struct hlist_node *node;
@@ -1887,6 +1887,7 @@ static void *ax25_info_next(struct seq_file *seq, void *v, loff_t *pos)
1887} 1887}
1888 1888
1889static void ax25_info_stop(struct seq_file *seq, void *v) 1889static void ax25_info_stop(struct seq_file *seq, void *v)
1890 __releases(ax25_list_lock)
1890{ 1891{
1891 spin_unlock_bh(&ax25_list_lock); 1892 spin_unlock_bh(&ax25_list_lock);
1892} 1893}
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index 4f44185955c7..c4e3b025d21c 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -130,7 +130,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25)
130 */ 130 */
131 if (sk != NULL) { 131 if (sk != NULL) {
132 if (atomic_read(&sk->sk_rmem_alloc) < 132 if (atomic_read(&sk->sk_rmem_alloc) <
133 (sk->sk_rcvbuf / 2) && 133 (sk->sk_rcvbuf >> 1) &&
134 (ax25->condition & AX25_COND_OWN_RX_BUSY)) { 134 (ax25->condition & AX25_COND_OWN_RX_BUSY)) {
135 ax25->condition &= ~AX25_COND_OWN_RX_BUSY; 135 ax25->condition &= ~AX25_COND_OWN_RX_BUSY;
136 ax25->condition &= ~AX25_COND_ACK_PENDING; 136 ax25->condition &= ~AX25_COND_ACK_PENDING;
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 9ecf6f1df863..38c7f3087ec3 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -249,6 +249,7 @@ int ax25_rt_ioctl(unsigned int cmd, void __user *arg)
249#ifdef CONFIG_PROC_FS 249#ifdef CONFIG_PROC_FS
250 250
251static void *ax25_rt_seq_start(struct seq_file *seq, loff_t *pos) 251static void *ax25_rt_seq_start(struct seq_file *seq, loff_t *pos)
252 __acquires(ax25_route_lock)
252{ 253{
253 struct ax25_route *ax25_rt; 254 struct ax25_route *ax25_rt;
254 int i = 1; 255 int i = 1;
@@ -274,6 +275,7 @@ static void *ax25_rt_seq_next(struct seq_file *seq, void *v, loff_t *pos)
274} 275}
275 276
276static void ax25_rt_seq_stop(struct seq_file *seq, void *v) 277static void ax25_rt_seq_stop(struct seq_file *seq, void *v)
278 __releases(ax25_route_lock)
277{ 279{
278 read_unlock(&ax25_route_lock); 280 read_unlock(&ax25_route_lock);
279} 281}
diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c
index f2f6918ac9bb..96e4b9273250 100644
--- a/net/ax25/ax25_std_timer.c
+++ b/net/ax25/ax25_std_timer.c
@@ -32,7 +32,7 @@
32 32
33void ax25_std_heartbeat_expiry(ax25_cb *ax25) 33void ax25_std_heartbeat_expiry(ax25_cb *ax25)
34{ 34{
35 struct sock *sk=ax25->sk; 35 struct sock *sk = ax25->sk;
36 36
37 if (sk) 37 if (sk)
38 bh_lock_sock(sk); 38 bh_lock_sock(sk);
@@ -62,7 +62,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25)
62 */ 62 */
63 if (sk != NULL) { 63 if (sk != NULL) {
64 if (atomic_read(&sk->sk_rmem_alloc) < 64 if (atomic_read(&sk->sk_rmem_alloc) <
65 (sk->sk_rcvbuf / 2) && 65 (sk->sk_rcvbuf >> 1) &&
66 (ax25->condition & AX25_COND_OWN_RX_BUSY)) { 66 (ax25->condition & AX25_COND_OWN_RX_BUSY)) {
67 ax25->condition &= ~AX25_COND_OWN_RX_BUSY; 67 ax25->condition &= ~AX25_COND_OWN_RX_BUSY;
68 ax25->condition &= ~AX25_COND_ACK_PENDING; 68 ax25->condition &= ~AX25_COND_ACK_PENDING;
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index ce0b13d44385..5f4eb73fb9d3 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -43,10 +43,10 @@
43 * Callsign/UID mapper. This is in kernel space for security on multi-amateur machines. 43 * Callsign/UID mapper. This is in kernel space for security on multi-amateur machines.
44 */ 44 */
45 45
46HLIST_HEAD(ax25_uid_list); 46static HLIST_HEAD(ax25_uid_list);
47static DEFINE_RWLOCK(ax25_uid_lock); 47static DEFINE_RWLOCK(ax25_uid_lock);
48 48
49int ax25_uid_policy = 0; 49int ax25_uid_policy;
50 50
51EXPORT_SYMBOL(ax25_uid_policy); 51EXPORT_SYMBOL(ax25_uid_policy);
52 52
@@ -144,6 +144,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
144#ifdef CONFIG_PROC_FS 144#ifdef CONFIG_PROC_FS
145 145
146static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos) 146static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos)
147 __acquires(ax25_uid_lock)
147{ 148{
148 struct ax25_uid_assoc *pt; 149 struct ax25_uid_assoc *pt;
149 struct hlist_node *node; 150 struct hlist_node *node;
@@ -167,6 +168,7 @@ static void *ax25_uid_seq_next(struct seq_file *seq, void *v, loff_t *pos)
167} 168}
168 169
169static void ax25_uid_seq_stop(struct seq_file *seq, void *v) 170static void ax25_uid_seq_stop(struct seq_file *seq, void *v)
171 __releases(ax25_uid_lock)
170{ 172{
171 read_unlock(&ax25_uid_lock); 173 read_unlock(&ax25_uid_lock);
172} 174}
diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c
index 443a83676638..f597987b2424 100644
--- a/net/ax25/sysctl_net_ax25.c
+++ b/net/ax25/sysctl_net_ax25.c
@@ -31,25 +31,11 @@ static struct ctl_table_header *ax25_table_header;
31static ctl_table *ax25_table; 31static ctl_table *ax25_table;
32static int ax25_table_size; 32static int ax25_table_size;
33 33
34static ctl_table ax25_dir_table[] = { 34static struct ctl_path ax25_path[] = {
35 { 35 { .procname = "net", .ctl_name = CTL_NET, },
36 .ctl_name = NET_AX25, 36 { .procname = "ax25", .ctl_name = NET_AX25, },
37 .procname = "ax25", 37 { }
38 .mode = 0555,
39 },
40 { .ctl_name = 0 }
41};
42
43static ctl_table ax25_root_table[] = {
44 {
45 .ctl_name = CTL_NET,
46 .procname = "net",
47 .mode = 0555,
48 .child = ax25_dir_table
49 },
50 { .ctl_name = 0 }
51}; 38};
52
53static const ctl_table ax25_param_table[] = { 39static const ctl_table ax25_param_table[] = {
54 { 40 {
55 .ctl_name = NET_AX25_IP_DEFAULT_MODE, 41 .ctl_name = NET_AX25_IP_DEFAULT_MODE,
@@ -243,9 +229,7 @@ void ax25_register_sysctl(void)
243 } 229 }
244 spin_unlock_bh(&ax25_dev_lock); 230 spin_unlock_bh(&ax25_dev_lock);
245 231
246 ax25_dir_table[0].child = ax25_table; 232 ax25_table_header = register_sysctl_paths(ax25_path, ax25_table);
247
248 ax25_table_header = register_sysctl_table(ax25_root_table);
249} 233}
250 234
251void ax25_unregister_sysctl(void) 235void ax25_unregister_sysctl(void)
@@ -253,7 +237,6 @@ void ax25_unregister_sysctl(void)
253 ctl_table *p; 237 ctl_table *p;
254 unregister_sysctl_table(ax25_table_header); 238 unregister_sysctl_table(ax25_table_header);
255 239
256 ax25_dir_table[0].child = NULL;
257 for (p = ax25_table; p->ctl_name; p++) 240 for (p = ax25_table; p->ctl_name; p++)
258 kfree(p->child); 241 kfree(p->child);
259 kfree(ax25_table); 242 kfree(ax25_table);
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 9ebd3c64474d..81065e548a1f 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -94,7 +94,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
94 return err; 94 return err;
95 95
96 if (nsock->sk->sk_state != BT_CONNECTED) { 96 if (nsock->sk->sk_state != BT_CONNECTED) {
97 fput(nsock->file); 97 sockfd_put(nsock);
98 return -EBADFD; 98 return -EBADFD;
99 } 99 }
100 100
@@ -103,7 +103,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
103 if (copy_to_user(argp, &ca, sizeof(ca))) 103 if (copy_to_user(argp, &ca, sizeof(ca)))
104 err = -EFAULT; 104 err = -EFAULT;
105 } else 105 } else
106 fput(nsock->file); 106 sockfd_put(nsock);
107 107
108 return err; 108 return err;
109 109
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index 783edab12ce8..8c7f7bc4e0ba 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -88,7 +88,7 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
88 return err; 88 return err;
89 89
90 if (nsock->sk->sk_state != BT_CONNECTED) { 90 if (nsock->sk->sk_state != BT_CONNECTED) {
91 fput(nsock->file); 91 sockfd_put(nsock);
92 return -EBADFD; 92 return -EBADFD;
93 } 93 }
94 94
@@ -97,7 +97,7 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
97 if (copy_to_user(argp, &ca, sizeof(ca))) 97 if (copy_to_user(argp, &ca, sizeof(ca)))
98 err = -EFAULT; 98 err = -EFAULT;
99 } else 99 } else
100 fput(nsock->file); 100 sockfd_put(nsock);
101 101
102 return err; 102 return err;
103 103
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 34d1a3c822bf..5fc7be206f62 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -208,13 +208,8 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
208 208
209 skb_queue_head_init(&conn->data_q); 209 skb_queue_head_init(&conn->data_q);
210 210
211 init_timer(&conn->disc_timer); 211 setup_timer(&conn->disc_timer, hci_conn_timeout, (unsigned long)conn);
212 conn->disc_timer.function = hci_conn_timeout; 212 setup_timer(&conn->idle_timer, hci_conn_idle, (unsigned long)conn);
213 conn->disc_timer.data = (unsigned long) conn;
214
215 init_timer(&conn->idle_timer);
216 conn->idle_timer.function = hci_conn_idle;
217 conn->idle_timer.data = (unsigned long) conn;
218 213
219 atomic_set(&conn->refcnt, 0); 214 atomic_set(&conn->refcnt, 0);
220 215
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 4bbacddeb49d..782a22602b86 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -811,10 +811,7 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
811 session->intr_sock = intr_sock; 811 session->intr_sock = intr_sock;
812 session->state = BT_CONNECTED; 812 session->state = BT_CONNECTED;
813 813
814 init_timer(&session->timer); 814 setup_timer(&session->timer, hidp_idle_timeout, (unsigned long)session);
815
816 session->timer.function = hidp_idle_timeout;
817 session->timer.data = (unsigned long) session;
818 815
819 skb_queue_head_init(&session->ctrl_transmit); 816 skb_queue_head_init(&session->ctrl_transmit);
820 skb_queue_head_init(&session->intr_transmit); 817 skb_queue_head_init(&session->intr_transmit);
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 3292b956a7c4..f4dd02ca9a96 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -86,13 +86,13 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
86 86
87 isock = sockfd_lookup(ca.intr_sock, &err); 87 isock = sockfd_lookup(ca.intr_sock, &err);
88 if (!isock) { 88 if (!isock) {
89 fput(csock->file); 89 sockfd_put(csock);
90 return err; 90 return err;
91 } 91 }
92 92
93 if (csock->sk->sk_state != BT_CONNECTED || isock->sk->sk_state != BT_CONNECTED) { 93 if (csock->sk->sk_state != BT_CONNECTED || isock->sk->sk_state != BT_CONNECTED) {
94 fput(csock->file); 94 sockfd_put(csock);
95 fput(isock->file); 95 sockfd_put(isock);
96 return -EBADFD; 96 return -EBADFD;
97 } 97 }
98 98
@@ -101,8 +101,8 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
101 if (copy_to_user(argp, &ca, sizeof(ca))) 101 if (copy_to_user(argp, &ca, sizeof(ca)))
102 err = -EFAULT; 102 err = -EFAULT;
103 } else { 103 } else {
104 fput(csock->file); 104 sockfd_put(csock);
105 fput(isock->file); 105 sockfd_put(isock);
106 } 106 }
107 107
108 return err; 108 return err;
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 477e052b17b5..a8811c0a0cea 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -99,13 +99,6 @@ static void l2cap_sock_clear_timer(struct sock *sk)
99 sk_stop_timer(sk, &sk->sk_timer); 99 sk_stop_timer(sk, &sk->sk_timer);
100} 100}
101 101
102static void l2cap_sock_init_timer(struct sock *sk)
103{
104 init_timer(&sk->sk_timer);
105 sk->sk_timer.function = l2cap_sock_timeout;
106 sk->sk_timer.data = (unsigned long)sk;
107}
108
109/* ---- L2CAP channels ---- */ 102/* ---- L2CAP channels ---- */
110static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid) 103static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid)
111{ 104{
@@ -395,9 +388,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
395 388
396 conn->feat_mask = 0; 389 conn->feat_mask = 0;
397 390
398 init_timer(&conn->info_timer); 391 setup_timer(&conn->info_timer, l2cap_info_timeout, (unsigned long)conn);
399 conn->info_timer.function = l2cap_info_timeout;
400 conn->info_timer.data = (unsigned long) conn;
401 392
402 spin_lock_init(&conn->lock); 393 spin_lock_init(&conn->lock);
403 rwlock_init(&conn->chan_list.lock); 394 rwlock_init(&conn->chan_list.lock);
@@ -622,7 +613,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p
622 sk->sk_protocol = proto; 613 sk->sk_protocol = proto;
623 sk->sk_state = BT_OPEN; 614 sk->sk_state = BT_OPEN;
624 615
625 l2cap_sock_init_timer(sk); 616 setup_timer(&sk->sk_timer, l2cap_sock_timeout, (unsigned long)sk);
626 617
627 bt_sock_link(&l2cap_sk_list, sk); 618 bt_sock_link(&l2cap_sk_list, sk);
628 return sk; 619 return sk;
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index e7ac6ba7ecab..d3e4e1877e6a 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -279,9 +279,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio)
279 if (!d) 279 if (!d)
280 return NULL; 280 return NULL;
281 281
282 init_timer(&d->timer); 282 setup_timer(&d->timer, rfcomm_dlc_timeout, (unsigned long)d);
283 d->timer.function = rfcomm_dlc_timeout;
284 d->timer.data = (unsigned long) d;
285 283
286 skb_queue_head_init(&d->tx_queue); 284 skb_queue_head_init(&d->tx_queue);
287 spin_lock_init(&d->lock); 285 spin_lock_init(&d->lock);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 93ad1aae3f38..b91d3c81a73c 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -97,13 +97,6 @@ static void sco_sock_clear_timer(struct sock *sk)
97 sk_stop_timer(sk, &sk->sk_timer); 97 sk_stop_timer(sk, &sk->sk_timer);
98} 98}
99 99
100static void sco_sock_init_timer(struct sock *sk)
101{
102 init_timer(&sk->sk_timer);
103 sk->sk_timer.function = sco_sock_timeout;
104 sk->sk_timer.data = (unsigned long)sk;
105}
106
107/* ---- SCO connections ---- */ 100/* ---- SCO connections ---- */
108static struct sco_conn *sco_conn_add(struct hci_conn *hcon, __u8 status) 101static struct sco_conn *sco_conn_add(struct hci_conn *hcon, __u8 status)
109{ 102{
@@ -436,7 +429,7 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int pro
436 sk->sk_protocol = proto; 429 sk->sk_protocol = proto;
437 sk->sk_state = BT_OPEN; 430 sk->sk_state = BT_OPEN;
438 431
439 sco_sock_init_timer(sk); 432 setup_timer(&sk->sk_timer, sco_sock_timeout, (unsigned long)sk);
440 433
441 bt_sock_link(&sco_sk_list, sk); 434 bt_sock_link(&sco_sk_list, sk);
442 return sk; 435 return sk;
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 0ee79a726d91..255c00f60ce7 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -109,7 +109,7 @@ static inline int is_link_local(const unsigned char *dest)
109{ 109{
110 __be16 *a = (__be16 *)dest; 110 __be16 *a = (__be16 *)dest;
111 static const __be16 *b = (const __be16 *)br_group_address; 111 static const __be16 *b = (const __be16 *)br_group_address;
112 static const __be16 m = __constant_cpu_to_be16(0xfff0); 112 static const __be16 m = cpu_to_be16(0xfff0);
113 113
114 return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0; 114 return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0;
115} 115}
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 9f78a69d6b8b..80014bab81b0 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -353,7 +353,7 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
353 if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) 353 if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev))
354 goto free_skb; 354 goto free_skb;
355 355
356 if (!ip_route_output_key(&rt, &fl)) { 356 if (!ip_route_output_key(&init_net, &rt, &fl)) {
357 /* - Bridged-and-DNAT'ed traffic doesn't 357 /* - Bridged-and-DNAT'ed traffic doesn't
358 * require ip_forwarding. */ 358 * require ip_forwarding. */
359 if (((struct dst_entry *)rt)->dev == dev) { 359 if (((struct dst_entry *)rt)->dev == dev) {
@@ -511,7 +511,7 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
511 if (!setup_pre_routing(skb)) 511 if (!setup_pre_routing(skb))
512 return NF_DROP; 512 return NF_DROP;
513 513
514 NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL, 514 NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
515 br_nf_pre_routing_finish_ipv6); 515 br_nf_pre_routing_finish_ipv6);
516 516
517 return NF_STOLEN; 517 return NF_STOLEN;
@@ -584,7 +584,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
584 return NF_DROP; 584 return NF_DROP;
585 store_orig_dstaddr(skb); 585 store_orig_dstaddr(skb);
586 586
587 NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL, 587 NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
588 br_nf_pre_routing_finish); 588 br_nf_pre_routing_finish);
589 589
590 return NF_STOLEN; 590 return NF_STOLEN;
@@ -681,7 +681,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
681 nf_bridge->mask |= BRNF_BRIDGED; 681 nf_bridge->mask |= BRNF_BRIDGED;
682 nf_bridge->physoutdev = skb->dev; 682 nf_bridge->physoutdev = skb->dev;
683 683
684 NF_HOOK(pf, NF_IP_FORWARD, skb, bridge_parent(in), parent, 684 NF_HOOK(pf, NF_INET_FORWARD, skb, bridge_parent(in), parent,
685 br_nf_forward_finish); 685 br_nf_forward_finish);
686 686
687 return NF_STOLEN; 687 return NF_STOLEN;
@@ -832,7 +832,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
832 if (nf_bridge->netoutdev) 832 if (nf_bridge->netoutdev)
833 realoutdev = nf_bridge->netoutdev; 833 realoutdev = nf_bridge->netoutdev;
834#endif 834#endif
835 NF_HOOK(pf, NF_IP_POST_ROUTING, skb, NULL, realoutdev, 835 NF_HOOK(pf, NF_INET_POST_ROUTING, skb, NULL, realoutdev,
836 br_nf_dev_queue_xmit); 836 br_nf_dev_queue_xmit);
837 837
838 return NF_STOLEN; 838 return NF_STOLEN;
@@ -871,7 +871,7 @@ static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb,
871 * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input. 871 * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input.
872 * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because 872 * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
873 * ip_refrag() can return NF_STOLEN. */ 873 * ip_refrag() can return NF_STOLEN. */
874static struct nf_hook_ops br_nf_ops[] = { 874static struct nf_hook_ops br_nf_ops[] __read_mostly = {
875 { .hook = br_nf_pre_routing, 875 { .hook = br_nf_pre_routing,
876 .owner = THIS_MODULE, 876 .owner = THIS_MODULE,
877 .pf = PF_BRIDGE, 877 .pf = PF_BRIDGE,
@@ -905,12 +905,12 @@ static struct nf_hook_ops br_nf_ops[] = {
905 { .hook = ip_sabotage_in, 905 { .hook = ip_sabotage_in,
906 .owner = THIS_MODULE, 906 .owner = THIS_MODULE,
907 .pf = PF_INET, 907 .pf = PF_INET,
908 .hooknum = NF_IP_PRE_ROUTING, 908 .hooknum = NF_INET_PRE_ROUTING,
909 .priority = NF_IP_PRI_FIRST, }, 909 .priority = NF_IP_PRI_FIRST, },
910 { .hook = ip_sabotage_in, 910 { .hook = ip_sabotage_in,
911 .owner = THIS_MODULE, 911 .owner = THIS_MODULE,
912 .pf = PF_INET6, 912 .pf = PF_INET6,
913 .hooknum = NF_IP6_PRE_ROUTING, 913 .hooknum = NF_INET_PRE_ROUTING,
914 .priority = NF_IP6_PRI_FIRST, }, 914 .priority = NF_IP6_PRI_FIRST, },
915}; 915};
916 916
@@ -967,24 +967,10 @@ static ctl_table brnf_table[] = {
967 { .ctl_name = 0 } 967 { .ctl_name = 0 }
968}; 968};
969 969
970static ctl_table brnf_bridge_table[] = { 970static struct ctl_path brnf_path[] = {
971 { 971 { .procname = "net", .ctl_name = CTL_NET, },
972 .ctl_name = NET_BRIDGE, 972 { .procname = "bridge", .ctl_name = NET_BRIDGE, },
973 .procname = "bridge", 973 { }
974 .mode = 0555,
975 .child = brnf_table,
976 },
977 { .ctl_name = 0 }
978};
979
980static ctl_table brnf_net_table[] = {
981 {
982 .ctl_name = CTL_NET,
983 .procname = "net",
984 .mode = 0555,
985 .child = brnf_bridge_table,
986 },
987 { .ctl_name = 0 }
988}; 974};
989#endif 975#endif
990 976
@@ -996,7 +982,7 @@ int __init br_netfilter_init(void)
996 if (ret < 0) 982 if (ret < 0)
997 return ret; 983 return ret;
998#ifdef CONFIG_SYSCTL 984#ifdef CONFIG_SYSCTL
999 brnf_sysctl_header = register_sysctl_table(brnf_net_table); 985 brnf_sysctl_header = register_sysctl_paths(brnf_path, brnf_table);
1000 if (brnf_sysctl_header == NULL) { 986 if (brnf_sysctl_header == NULL) {
1001 printk(KERN_WARNING 987 printk(KERN_WARNING
1002 "br_netfilter: can't register to sysctl.\n"); 988 "br_netfilter: can't register to sysctl.\n");
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 53ab8e0cb518..f5d69336d97b 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -13,6 +13,7 @@
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <net/rtnetlink.h> 14#include <net/rtnetlink.h>
15#include <net/net_namespace.h> 15#include <net/net_namespace.h>
16#include <net/sock.h>
16#include "br_private.h" 17#include "br_private.h"
17 18
18static inline size_t br_nlmsg_size(void) 19static inline size_t br_nlmsg_size(void)
@@ -96,10 +97,10 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port)
96 kfree_skb(skb); 97 kfree_skb(skb);
97 goto errout; 98 goto errout;
98 } 99 }
99 err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); 100 err = rtnl_notify(skb, &init_net,0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
100errout: 101errout:
101 if (err < 0) 102 if (err < 0)
102 rtnl_set_sk_err(RTNLGRP_LINK, err); 103 rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
103} 104}
104 105
105/* 106/*
@@ -107,9 +108,13 @@ errout:
107 */ 108 */
108static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) 109static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
109{ 110{
111 struct net *net = skb->sk->sk_net;
110 struct net_device *dev; 112 struct net_device *dev;
111 int idx; 113 int idx;
112 114
115 if (net != &init_net)
116 return 0;
117
113 idx = 0; 118 idx = 0;
114 for_each_netdev(&init_net, dev) { 119 for_each_netdev(&init_net, dev) {
115 /* not a bridge port */ 120 /* not a bridge port */
@@ -135,12 +140,16 @@ skip:
135 */ 140 */
136static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 141static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
137{ 142{
143 struct net *net = skb->sk->sk_net;
138 struct ifinfomsg *ifm; 144 struct ifinfomsg *ifm;
139 struct nlattr *protinfo; 145 struct nlattr *protinfo;
140 struct net_device *dev; 146 struct net_device *dev;
141 struct net_bridge_port *p; 147 struct net_bridge_port *p;
142 u8 new_state; 148 u8 new_state;
143 149
150 if (net != &init_net)
151 return -EINVAL;
152
144 if (nlmsg_len(nlh) < sizeof(*ifm)) 153 if (nlmsg_len(nlh) < sizeof(*ifm))
145 return -EINVAL; 154 return -EINVAL;
146 155
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index b84fc6075fe1..4a3e2bf892c7 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -3,7 +3,7 @@
3# 3#
4 4
5menu "Bridge: Netfilter Configuration" 5menu "Bridge: Netfilter Configuration"
6 depends on BRIDGE && NETFILTER 6 depends on BRIDGE && BRIDGE_NETFILTER
7 7
8config BRIDGE_NF_EBTABLES 8config BRIDGE_NF_EBTABLES
9 tristate "Ethernet Bridge tables (ebtables) support" 9 tristate "Ethernet Bridge tables (ebtables) support"
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 457815fb5584..3be9e9898553 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -17,6 +17,7 @@
17#include <linux/in.h> 17#include <linux/in.h>
18#include <linux/if_arp.h> 18#include <linux/if_arp.h>
19#include <linux/spinlock.h> 19#include <linux/spinlock.h>
20#include <net/netfilter/nf_log.h>
20 21
21static DEFINE_SPINLOCK(ebt_log_lock); 22static DEFINE_SPINLOCK(ebt_log_lock);
22 23
@@ -182,7 +183,7 @@ static struct ebt_watcher log =
182 .me = THIS_MODULE, 183 .me = THIS_MODULE,
183}; 184};
184 185
185static struct nf_logger ebt_log_logger = { 186static const struct nf_logger ebt_log_logger = {
186 .name = "ebt_log", 187 .name = "ebt_log",
187 .logfn = &ebt_log_packet, 188 .logfn = &ebt_log_packet,
188 .me = THIS_MODULE, 189 .me = THIS_MODULE,
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index e7cfd30bac75..8e7b00b68d38 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -38,6 +38,7 @@
38#include <linux/netdevice.h> 38#include <linux/netdevice.h>
39#include <linux/netfilter_bridge/ebtables.h> 39#include <linux/netfilter_bridge/ebtables.h>
40#include <linux/netfilter_bridge/ebt_ulog.h> 40#include <linux/netfilter_bridge/ebt_ulog.h>
41#include <net/netfilter/nf_log.h>
41#include <net/sock.h> 42#include <net/sock.h>
42#include "../br_private.h" 43#include "../br_private.h"
43 44
@@ -278,7 +279,7 @@ static struct ebt_watcher ulog = {
278 .me = THIS_MODULE, 279 .me = THIS_MODULE,
279}; 280};
280 281
281static struct nf_logger ebt_ulog_logger = { 282static const struct nf_logger ebt_ulog_logger = {
282 .name = EBT_ULOG_WATCHER, 283 .name = EBT_ULOG_WATCHER,
283 .logfn = &ebt_log_packet, 284 .logfn = &ebt_log_packet,
284 .me = THIS_MODULE, 285 .me = THIS_MODULE,
@@ -306,7 +307,7 @@ static int __init ebt_ulog_init(void)
306 if (!ebtulognl) 307 if (!ebtulognl)
307 ret = -ENOMEM; 308 ret = -ENOMEM;
308 else if ((ret = ebt_register_watcher(&ulog))) 309 else if ((ret = ebt_register_watcher(&ulog)))
309 sock_release(ebtulognl->sk_socket); 310 netlink_kernel_release(ebtulognl);
310 311
311 if (ret == 0) 312 if (ret == 0)
312 nf_log_register(PF_BRIDGE, &ebt_ulog_logger); 313 nf_log_register(PF_BRIDGE, &ebt_ulog_logger);
@@ -332,7 +333,7 @@ static void __exit ebt_ulog_fini(void)
332 } 333 }
333 spin_unlock_bh(&ub->lock); 334 spin_unlock_bh(&ub->lock);
334 } 335 }
335 sock_release(ebtulognl->sk_socket); 336 netlink_kernel_release(ebtulognl);
336} 337}
337 338
338module_init(ebt_ulog_init); 339module_init(ebt_ulog_init);
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index a43c697d3d73..0ddf7499d496 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -37,9 +37,7 @@ MODULE_LICENSE("GPL");
37 37
38 38
39#define DEBUG_MSG(args...) if (debug) printk (KERN_DEBUG "ebt_vlan: " args) 39#define DEBUG_MSG(args...) if (debug) printk (KERN_DEBUG "ebt_vlan: " args)
40#define INV_FLAG(_inv_flag_) (info->invflags & _inv_flag_) ? "!" : ""
41#define GET_BITMASK(_BIT_MASK_) info->bitmask & _BIT_MASK_ 40#define GET_BITMASK(_BIT_MASK_) info->bitmask & _BIT_MASK_
42#define SET_BITMASK(_BIT_MASK_) info->bitmask |= _BIT_MASK_
43#define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return EBT_NOMATCH;} 41#define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return EBT_NOMATCH;}
44 42
45static int 43static int
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 210493f99bc4..fb810908732f 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -67,7 +67,7 @@ ebt_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in,
67 return ebt_do_table(hook, skb, in, out, &frame_filter); 67 return ebt_do_table(hook, skb, in, out, &frame_filter);
68} 68}
69 69
70static struct nf_hook_ops ebt_ops_filter[] = { 70static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
71 { 71 {
72 .hook = ebt_hook, 72 .hook = ebt_hook,
73 .owner = THIS_MODULE, 73 .owner = THIS_MODULE,
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 3e58c2e5ee21..bc712730c54a 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -74,7 +74,7 @@ ebt_nat_src(unsigned int hook, struct sk_buff *skb, const struct net_device *in
74 return ebt_do_table(hook, skb, in, out, &frame_nat); 74 return ebt_do_table(hook, skb, in, out, &frame_nat);
75} 75}
76 76
77static struct nf_hook_ops ebt_ops_nat[] = { 77static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
78 { 78 {
79 .hook = ebt_nat_dst, 79 .hook = ebt_nat_dst,
80 .owner = THIS_MODULE, 80 .owner = THIS_MODULE,
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 817169e718c1..32afff859e4a 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -15,8 +15,6 @@
15 * 2 of the License, or (at your option) any later version. 15 * 2 of the License, or (at your option) any later version.
16 */ 16 */
17 17
18/* used for print_string */
19#include <linux/tty.h>
20 18
21#include <linux/kmod.h> 19#include <linux/kmod.h>
22#include <linux/module.h> 20#include <linux/module.h>
diff --git a/net/can/Kconfig b/net/can/Kconfig
new file mode 100644
index 000000000000..89395b2c8bca
--- /dev/null
+++ b/net/can/Kconfig
@@ -0,0 +1,44 @@
1#
2# Controller Area Network (CAN) network layer core configuration
3#
4
5menuconfig CAN
6 depends on NET
7 tristate "CAN bus subsystem support"
8 ---help---
9 Controller Area Network (CAN) is a slow (up to 1Mbit/s) serial
10 communications protocol which was developed by Bosch in
11 1991, mainly for automotive, but now widely used in marine
12 (NMEA2000), industrial, and medical applications.
13 More information on the CAN network protocol family PF_CAN
14 is contained in <Documentation/networking/can.txt>.
15
16 If you want CAN support you should say Y here and also to the
17 specific driver for your controller(s) below.
18
19config CAN_RAW
20 tristate "Raw CAN Protocol (raw access with CAN-ID filtering)"
21 depends on CAN
22 default N
23 ---help---
24 The raw CAN protocol option offers access to the CAN bus via
25 the BSD socket API. You probably want to use the raw socket in
26 most cases where no higher level protocol is being used. The raw
27 socket has several filter options e.g. ID masking / error frames.
28 To receive/send raw CAN messages, use AF_CAN with protocol CAN_RAW.
29
30config CAN_BCM
31 tristate "Broadcast Manager CAN Protocol (with content filtering)"
32 depends on CAN
33 default N
34 ---help---
35 The Broadcast Manager offers content filtering, timeout monitoring,
36 sending of RTR frames, and cyclic CAN messages without permanent user
37 interaction. The BCM can be 'programmed' via the BSD socket API and
38 informs you on demand e.g. only on content updates / timeouts.
39 You probably want to use the bcm socket in most cases where cyclic
40 CAN messages are used on the bus (e.g. in automotive environments).
41 To use the Broadcast Manager, use AF_CAN with protocol CAN_BCM.
42
43
44source "drivers/net/can/Kconfig"
diff --git a/net/can/Makefile b/net/can/Makefile
new file mode 100644
index 000000000000..9cd3c4b3abda
--- /dev/null
+++ b/net/can/Makefile
@@ -0,0 +1,12 @@
1#
2# Makefile for the Linux Controller Area Network core.
3#
4
5obj-$(CONFIG_CAN) += can.o
6can-objs := af_can.o proc.o
7
8obj-$(CONFIG_CAN_RAW) += can-raw.o
9can-raw-objs := raw.o
10
11obj-$(CONFIG_CAN_BCM) += can-bcm.o
12can-bcm-objs := bcm.o
diff --git a/net/can/af_can.c b/net/can/af_can.c
new file mode 100644
index 000000000000..5158e886630f
--- /dev/null
+++ b/net/can/af_can.c
@@ -0,0 +1,861 @@
1/*
2 * af_can.c - Protocol family CAN core module
3 * (used by different CAN protocol modules)
4 *
5 * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of Volkswagen nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * Alternatively, provided that this notice is retained in full, this
21 * software may be distributed under the terms of the GNU General
22 * Public License ("GPL") version 2, in which case the provisions of the
23 * GPL apply INSTEAD OF those given above.
24 *
25 * The provided data structures and external interfaces from this code
26 * are not restricted to be used by modules with a GPL compatible license.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
39 * DAMAGE.
40 *
41 * Send feedback to <socketcan-users@lists.berlios.de>
42 *
43 */
44
45#include <linux/module.h>
46#include <linux/init.h>
47#include <linux/kmod.h>
48#include <linux/slab.h>
49#include <linux/list.h>
50#include <linux/spinlock.h>
51#include <linux/rcupdate.h>
52#include <linux/uaccess.h>
53#include <linux/net.h>
54#include <linux/netdevice.h>
55#include <linux/socket.h>
56#include <linux/if_ether.h>
57#include <linux/if_arp.h>
58#include <linux/skbuff.h>
59#include <linux/can.h>
60#include <linux/can/core.h>
61#include <net/net_namespace.h>
62#include <net/sock.h>
63
64#include "af_can.h"
65
66static __initdata const char banner[] = KERN_INFO
67 "can: controller area network core (" CAN_VERSION_STRING ")\n";
68
69MODULE_DESCRIPTION("Controller Area Network PF_CAN core");
70MODULE_LICENSE("Dual BSD/GPL");
71MODULE_AUTHOR("Urs Thuermann <urs.thuermann@volkswagen.de>, "
72 "Oliver Hartkopp <oliver.hartkopp@volkswagen.de>");
73
74MODULE_ALIAS_NETPROTO(PF_CAN);
75
76static int stats_timer __read_mostly = 1;
77module_param(stats_timer, int, S_IRUGO);
78MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)");
79
80HLIST_HEAD(can_rx_dev_list);
81static struct dev_rcv_lists can_rx_alldev_list;
82static DEFINE_SPINLOCK(can_rcvlists_lock);
83
84static struct kmem_cache *rcv_cache __read_mostly;
85
86/* table of registered CAN protocols */
87static struct can_proto *proto_tab[CAN_NPROTO] __read_mostly;
88static DEFINE_SPINLOCK(proto_tab_lock);
89
90struct timer_list can_stattimer; /* timer for statistics update */
91struct s_stats can_stats; /* packet statistics */
92struct s_pstats can_pstats; /* receive list statistics */
93
94/*
95 * af_can socket functions
96 */
97
98static int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
99{
100 struct sock *sk = sock->sk;
101
102 switch (cmd) {
103
104 case SIOCGSTAMP:
105 return sock_get_timestamp(sk, (struct timeval __user *)arg);
106
107 default:
108 return -ENOIOCTLCMD;
109 }
110}
111
112static void can_sock_destruct(struct sock *sk)
113{
114 skb_queue_purge(&sk->sk_receive_queue);
115}
116
117static int can_create(struct net *net, struct socket *sock, int protocol)
118{
119 struct sock *sk;
120 struct can_proto *cp;
121 char module_name[sizeof("can-proto-000")];
122 int err = 0;
123
124 sock->state = SS_UNCONNECTED;
125
126 if (protocol < 0 || protocol >= CAN_NPROTO)
127 return -EINVAL;
128
129 if (net != &init_net)
130 return -EAFNOSUPPORT;
131
132 /* try to load protocol module, when CONFIG_KMOD is defined */
133 if (!proto_tab[protocol]) {
134 sprintf(module_name, "can-proto-%d", protocol);
135 err = request_module(module_name);
136
137 /*
138 * In case of error we only print a message but don't
139 * return the error code immediately. Below we will
140 * return -EPROTONOSUPPORT
141 */
142 if (err == -ENOSYS) {
143 if (printk_ratelimit())
144 printk(KERN_INFO "can: request_module(%s)"
145 " not implemented.\n", module_name);
146 } else if (err) {
147 if (printk_ratelimit())
148 printk(KERN_ERR "can: request_module(%s)"
149 " failed.\n", module_name);
150 }
151 }
152
153 spin_lock(&proto_tab_lock);
154 cp = proto_tab[protocol];
155 if (cp && !try_module_get(cp->prot->owner))
156 cp = NULL;
157 spin_unlock(&proto_tab_lock);
158
159 /* check for available protocol and correct usage */
160
161 if (!cp)
162 return -EPROTONOSUPPORT;
163
164 if (cp->type != sock->type) {
165 err = -EPROTONOSUPPORT;
166 goto errout;
167 }
168
169 if (cp->capability >= 0 && !capable(cp->capability)) {
170 err = -EPERM;
171 goto errout;
172 }
173
174 sock->ops = cp->ops;
175
176 sk = sk_alloc(net, PF_CAN, GFP_KERNEL, cp->prot);
177 if (!sk) {
178 err = -ENOMEM;
179 goto errout;
180 }
181
182 sock_init_data(sock, sk);
183 sk->sk_destruct = can_sock_destruct;
184
185 if (sk->sk_prot->init)
186 err = sk->sk_prot->init(sk);
187
188 if (err) {
189 /* release sk on errors */
190 sock_orphan(sk);
191 sock_put(sk);
192 }
193
194 errout:
195 module_put(cp->prot->owner);
196 return err;
197}
198
199/*
200 * af_can tx path
201 */
202
203/**
204 * can_send - transmit a CAN frame (optional with local loopback)
205 * @skb: pointer to socket buffer with CAN frame in data section
206 * @loop: loopback for listeners on local CAN sockets (recommended default!)
207 *
208 * Return:
209 * 0 on success
210 * -ENETDOWN when the selected interface is down
211 * -ENOBUFS on full driver queue (see net_xmit_errno())
212 * -ENOMEM when local loopback failed at calling skb_clone()
213 * -EPERM when trying to send on a non-CAN interface
214 */
215int can_send(struct sk_buff *skb, int loop)
216{
217 int err;
218
219 if (skb->dev->type != ARPHRD_CAN) {
220 kfree_skb(skb);
221 return -EPERM;
222 }
223
224 if (!(skb->dev->flags & IFF_UP)) {
225 kfree_skb(skb);
226 return -ENETDOWN;
227 }
228
229 skb->protocol = htons(ETH_P_CAN);
230 skb_reset_network_header(skb);
231 skb_reset_transport_header(skb);
232
233 if (loop) {
234 /* local loopback of sent CAN frames */
235
236 /* indication for the CAN driver: do loopback */
237 skb->pkt_type = PACKET_LOOPBACK;
238
239 /*
240 * The reference to the originating sock may be required
241 * by the receiving socket to check whether the frame is
242 * its own. Example: can_raw sockopt CAN_RAW_RECV_OWN_MSGS
243 * Therefore we have to ensure that skb->sk remains the
244 * reference to the originating sock by restoring skb->sk
245 * after each skb_clone() or skb_orphan() usage.
246 */
247
248 if (!(skb->dev->flags & IFF_ECHO)) {
249 /*
250 * If the interface is not capable to do loopback
251 * itself, we do it here.
252 */
253 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
254
255 if (!newskb) {
256 kfree_skb(skb);
257 return -ENOMEM;
258 }
259
260 newskb->sk = skb->sk;
261 newskb->ip_summed = CHECKSUM_UNNECESSARY;
262 newskb->pkt_type = PACKET_BROADCAST;
263 netif_rx(newskb);
264 }
265 } else {
266 /* indication for the CAN driver: no loopback required */
267 skb->pkt_type = PACKET_HOST;
268 }
269
270 /* send to netdevice */
271 err = dev_queue_xmit(skb);
272 if (err > 0)
273 err = net_xmit_errno(err);
274
275 /* update statistics */
276 can_stats.tx_frames++;
277 can_stats.tx_frames_delta++;
278
279 return err;
280}
281EXPORT_SYMBOL(can_send);
282
283/*
284 * af_can rx path
285 */
286
287static struct dev_rcv_lists *find_dev_rcv_lists(struct net_device *dev)
288{
289 struct dev_rcv_lists *d = NULL;
290 struct hlist_node *n;
291
292 /*
293 * find receive list for this device
294 *
295 * The hlist_for_each_entry*() macros curse through the list
296 * using the pointer variable n and set d to the containing
297 * struct in each list iteration. Therefore, after list
298 * iteration, d is unmodified when the list is empty, and it
299 * points to last list element, when the list is non-empty
300 * but no match in the loop body is found. I.e. d is *not*
301 * NULL when no match is found. We can, however, use the
302 * cursor variable n to decide if a match was found.
303 */
304
305 hlist_for_each_entry_rcu(d, n, &can_rx_dev_list, list) {
306 if (d->dev == dev)
307 break;
308 }
309
310 return n ? d : NULL;
311}
312
313static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
314 struct dev_rcv_lists *d)
315{
316 canid_t inv = *can_id & CAN_INV_FILTER; /* save flag before masking */
317
318 /* filter error frames */
319 if (*mask & CAN_ERR_FLAG) {
320 /* clear CAN_ERR_FLAG in list entry */
321 *mask &= CAN_ERR_MASK;
322 return &d->rx[RX_ERR];
323 }
324
325 /* ensure valid values in can_mask */
326 if (*mask & CAN_EFF_FLAG)
327 *mask &= (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG);
328 else
329 *mask &= (CAN_SFF_MASK | CAN_RTR_FLAG);
330
331 /* reduce condition testing at receive time */
332 *can_id &= *mask;
333
334 /* inverse can_id/can_mask filter */
335 if (inv)
336 return &d->rx[RX_INV];
337
338 /* mask == 0 => no condition testing at receive time */
339 if (!(*mask))
340 return &d->rx[RX_ALL];
341
342 /* use extra filterset for the subscription of exactly *ONE* can_id */
343 if (*can_id & CAN_EFF_FLAG) {
344 if (*mask == (CAN_EFF_MASK | CAN_EFF_FLAG)) {
345 /* RFC: a use-case for hash-tables in the future? */
346 return &d->rx[RX_EFF];
347 }
348 } else {
349 if (*mask == CAN_SFF_MASK)
350 return &d->rx_sff[*can_id];
351 }
352
353 /* default: filter via can_id/can_mask */
354 return &d->rx[RX_FIL];
355}
356
357/**
358 * can_rx_register - subscribe CAN frames from a specific interface
359 * @dev: pointer to netdevice (NULL => subcribe from 'all' CAN devices list)
360 * @can_id: CAN identifier (see description)
361 * @mask: CAN mask (see description)
362 * @func: callback function on filter match
363 * @data: returned parameter for callback function
364 * @ident: string for calling module indentification
365 *
366 * Description:
367 * Invokes the callback function with the received sk_buff and the given
368 * parameter 'data' on a matching receive filter. A filter matches, when
369 *
370 * <received_can_id> & mask == can_id & mask
371 *
372 * The filter can be inverted (CAN_INV_FILTER bit set in can_id) or it can
373 * filter for error frames (CAN_ERR_FLAG bit set in mask).
374 *
375 * Return:
376 * 0 on success
377 * -ENOMEM on missing cache mem to create subscription entry
378 * -ENODEV unknown device
379 */
380int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask,
381 void (*func)(struct sk_buff *, void *), void *data,
382 char *ident)
383{
384 struct receiver *r;
385 struct hlist_head *rl;
386 struct dev_rcv_lists *d;
387 int err = 0;
388
389 /* insert new receiver (dev,canid,mask) -> (func,data) */
390
391 r = kmem_cache_alloc(rcv_cache, GFP_KERNEL);
392 if (!r)
393 return -ENOMEM;
394
395 spin_lock(&can_rcvlists_lock);
396
397 d = find_dev_rcv_lists(dev);
398 if (d) {
399 rl = find_rcv_list(&can_id, &mask, d);
400
401 r->can_id = can_id;
402 r->mask = mask;
403 r->matches = 0;
404 r->func = func;
405 r->data = data;
406 r->ident = ident;
407
408 hlist_add_head_rcu(&r->list, rl);
409 d->entries++;
410
411 can_pstats.rcv_entries++;
412 if (can_pstats.rcv_entries_max < can_pstats.rcv_entries)
413 can_pstats.rcv_entries_max = can_pstats.rcv_entries;
414 } else {
415 kmem_cache_free(rcv_cache, r);
416 err = -ENODEV;
417 }
418
419 spin_unlock(&can_rcvlists_lock);
420
421 return err;
422}
423EXPORT_SYMBOL(can_rx_register);
424
425/*
426 * can_rx_delete_device - rcu callback for dev_rcv_lists structure removal
427 */
428static void can_rx_delete_device(struct rcu_head *rp)
429{
430 struct dev_rcv_lists *d = container_of(rp, struct dev_rcv_lists, rcu);
431
432 kfree(d);
433}
434
435/*
436 * can_rx_delete_receiver - rcu callback for single receiver entry removal
437 */
438static void can_rx_delete_receiver(struct rcu_head *rp)
439{
440 struct receiver *r = container_of(rp, struct receiver, rcu);
441
442 kmem_cache_free(rcv_cache, r);
443}
444
445/**
446 * can_rx_unregister - unsubscribe CAN frames from a specific interface
447 * @dev: pointer to netdevice (NULL => unsubcribe from 'all' CAN devices list)
448 * @can_id: CAN identifier
449 * @mask: CAN mask
450 * @func: callback function on filter match
451 * @data: returned parameter for callback function
452 *
453 * Description:
454 * Removes subscription entry depending on given (subscription) values.
455 */
456void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
457 void (*func)(struct sk_buff *, void *), void *data)
458{
459 struct receiver *r = NULL;
460 struct hlist_head *rl;
461 struct hlist_node *next;
462 struct dev_rcv_lists *d;
463
464 spin_lock(&can_rcvlists_lock);
465
466 d = find_dev_rcv_lists(dev);
467 if (!d) {
468 printk(KERN_ERR "BUG: receive list not found for "
469 "dev %s, id %03X, mask %03X\n",
470 DNAME(dev), can_id, mask);
471 goto out;
472 }
473
474 rl = find_rcv_list(&can_id, &mask, d);
475
476 /*
477 * Search the receiver list for the item to delete. This should
478 * exist, since no receiver may be unregistered that hasn't
479 * been registered before.
480 */
481
482 hlist_for_each_entry_rcu(r, next, rl, list) {
483 if (r->can_id == can_id && r->mask == mask
484 && r->func == func && r->data == data)
485 break;
486 }
487
488 /*
489 * Check for bugs in CAN protocol implementations:
490 * If no matching list item was found, the list cursor variable next
491 * will be NULL, while r will point to the last item of the list.
492 */
493
494 if (!next) {
495 printk(KERN_ERR "BUG: receive list entry not found for "
496 "dev %s, id %03X, mask %03X\n",
497 DNAME(dev), can_id, mask);
498 r = NULL;
499 d = NULL;
500 goto out;
501 }
502
503 hlist_del_rcu(&r->list);
504 d->entries--;
505
506 if (can_pstats.rcv_entries > 0)
507 can_pstats.rcv_entries--;
508
509 /* remove device structure requested by NETDEV_UNREGISTER */
510 if (d->remove_on_zero_entries && !d->entries)
511 hlist_del_rcu(&d->list);
512 else
513 d = NULL;
514
515 out:
516 spin_unlock(&can_rcvlists_lock);
517
518 /* schedule the receiver item for deletion */
519 if (r)
520 call_rcu(&r->rcu, can_rx_delete_receiver);
521
522 /* schedule the device structure for deletion */
523 if (d)
524 call_rcu(&d->rcu, can_rx_delete_device);
525}
526EXPORT_SYMBOL(can_rx_unregister);
527
528static inline void deliver(struct sk_buff *skb, struct receiver *r)
529{
530 struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
531
532 if (clone) {
533 clone->sk = skb->sk;
534 r->func(clone, r->data);
535 r->matches++;
536 }
537}
538
539static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb)
540{
541 struct receiver *r;
542 struct hlist_node *n;
543 int matches = 0;
544 struct can_frame *cf = (struct can_frame *)skb->data;
545 canid_t can_id = cf->can_id;
546
547 if (d->entries == 0)
548 return 0;
549
550 if (can_id & CAN_ERR_FLAG) {
551 /* check for error frame entries only */
552 hlist_for_each_entry_rcu(r, n, &d->rx[RX_ERR], list) {
553 if (can_id & r->mask) {
554 deliver(skb, r);
555 matches++;
556 }
557 }
558 return matches;
559 }
560
561 /* check for unfiltered entries */
562 hlist_for_each_entry_rcu(r, n, &d->rx[RX_ALL], list) {
563 deliver(skb, r);
564 matches++;
565 }
566
567 /* check for can_id/mask entries */
568 hlist_for_each_entry_rcu(r, n, &d->rx[RX_FIL], list) {
569 if ((can_id & r->mask) == r->can_id) {
570 deliver(skb, r);
571 matches++;
572 }
573 }
574
575 /* check for inverted can_id/mask entries */
576 hlist_for_each_entry_rcu(r, n, &d->rx[RX_INV], list) {
577 if ((can_id & r->mask) != r->can_id) {
578 deliver(skb, r);
579 matches++;
580 }
581 }
582
583 /* check CAN_ID specific entries */
584 if (can_id & CAN_EFF_FLAG) {
585 hlist_for_each_entry_rcu(r, n, &d->rx[RX_EFF], list) {
586 if (r->can_id == can_id) {
587 deliver(skb, r);
588 matches++;
589 }
590 }
591 } else {
592 can_id &= CAN_SFF_MASK;
593 hlist_for_each_entry_rcu(r, n, &d->rx_sff[can_id], list) {
594 deliver(skb, r);
595 matches++;
596 }
597 }
598
599 return matches;
600}
601
602static int can_rcv(struct sk_buff *skb, struct net_device *dev,
603 struct packet_type *pt, struct net_device *orig_dev)
604{
605 struct dev_rcv_lists *d;
606 int matches;
607
608 if (dev->type != ARPHRD_CAN || dev->nd_net != &init_net) {
609 kfree_skb(skb);
610 return 0;
611 }
612
613 /* update statistics */
614 can_stats.rx_frames++;
615 can_stats.rx_frames_delta++;
616
617 rcu_read_lock();
618
619 /* deliver the packet to sockets listening on all devices */
620 matches = can_rcv_filter(&can_rx_alldev_list, skb);
621
622 /* find receive list for this device */
623 d = find_dev_rcv_lists(dev);
624 if (d)
625 matches += can_rcv_filter(d, skb);
626
627 rcu_read_unlock();
628
629 /* free the skbuff allocated by the netdevice driver */
630 kfree_skb(skb);
631
632 if (matches > 0) {
633 can_stats.matches++;
634 can_stats.matches_delta++;
635 }
636
637 return 0;
638}
639
640/*
641 * af_can protocol functions
642 */
643
644/**
645 * can_proto_register - register CAN transport protocol
646 * @cp: pointer to CAN protocol structure
647 *
648 * Return:
649 * 0 on success
650 * -EINVAL invalid (out of range) protocol number
651 * -EBUSY protocol already in use
652 * -ENOBUF if proto_register() fails
653 */
654int can_proto_register(struct can_proto *cp)
655{
656 int proto = cp->protocol;
657 int err = 0;
658
659 if (proto < 0 || proto >= CAN_NPROTO) {
660 printk(KERN_ERR "can: protocol number %d out of range\n",
661 proto);
662 return -EINVAL;
663 }
664
665 spin_lock(&proto_tab_lock);
666 if (proto_tab[proto]) {
667 printk(KERN_ERR "can: protocol %d already registered\n",
668 proto);
669 err = -EBUSY;
670 goto errout;
671 }
672
673 err = proto_register(cp->prot, 0);
674 if (err < 0)
675 goto errout;
676
677 proto_tab[proto] = cp;
678
679 /* use generic ioctl function if the module doesn't bring its own */
680 if (!cp->ops->ioctl)
681 cp->ops->ioctl = can_ioctl;
682
683 errout:
684 spin_unlock(&proto_tab_lock);
685
686 return err;
687}
688EXPORT_SYMBOL(can_proto_register);
689
690/**
691 * can_proto_unregister - unregister CAN transport protocol
692 * @cp: pointer to CAN protocol structure
693 */
694void can_proto_unregister(struct can_proto *cp)
695{
696 int proto = cp->protocol;
697
698 spin_lock(&proto_tab_lock);
699 if (!proto_tab[proto]) {
700 printk(KERN_ERR "BUG: can: protocol %d is not registered\n",
701 proto);
702 }
703 proto_unregister(cp->prot);
704 proto_tab[proto] = NULL;
705 spin_unlock(&proto_tab_lock);
706}
707EXPORT_SYMBOL(can_proto_unregister);
708
709/*
710 * af_can notifier to create/remove CAN netdevice specific structs
711 */
712static int can_notifier(struct notifier_block *nb, unsigned long msg,
713 void *data)
714{
715 struct net_device *dev = (struct net_device *)data;
716 struct dev_rcv_lists *d;
717
718 if (dev->nd_net != &init_net)
719 return NOTIFY_DONE;
720
721 if (dev->type != ARPHRD_CAN)
722 return NOTIFY_DONE;
723
724 switch (msg) {
725
726 case NETDEV_REGISTER:
727
728 /*
729 * create new dev_rcv_lists for this device
730 *
731 * N.B. zeroing the struct is the correct initialization
732 * for the embedded hlist_head structs.
733 * Another list type, e.g. list_head, would require
734 * explicit initialization.
735 */
736
737 d = kzalloc(sizeof(*d), GFP_KERNEL);
738 if (!d) {
739 printk(KERN_ERR
740 "can: allocation of receive list failed\n");
741 return NOTIFY_DONE;
742 }
743 d->dev = dev;
744
745 spin_lock(&can_rcvlists_lock);
746 hlist_add_head_rcu(&d->list, &can_rx_dev_list);
747 spin_unlock(&can_rcvlists_lock);
748
749 break;
750
751 case NETDEV_UNREGISTER:
752 spin_lock(&can_rcvlists_lock);
753
754 d = find_dev_rcv_lists(dev);
755 if (d) {
756 if (d->entries) {
757 d->remove_on_zero_entries = 1;
758 d = NULL;
759 } else
760 hlist_del_rcu(&d->list);
761 } else
762 printk(KERN_ERR "can: notifier: receive list not "
763 "found for dev %s\n", dev->name);
764
765 spin_unlock(&can_rcvlists_lock);
766
767 if (d)
768 call_rcu(&d->rcu, can_rx_delete_device);
769
770 break;
771 }
772
773 return NOTIFY_DONE;
774}
775
776/*
777 * af_can module init/exit functions
778 */
779
780static struct packet_type can_packet __read_mostly = {
781 .type = __constant_htons(ETH_P_CAN),
782 .dev = NULL,
783 .func = can_rcv,
784};
785
786static struct net_proto_family can_family_ops __read_mostly = {
787 .family = PF_CAN,
788 .create = can_create,
789 .owner = THIS_MODULE,
790};
791
792/* notifier block for netdevice event */
793static struct notifier_block can_netdev_notifier __read_mostly = {
794 .notifier_call = can_notifier,
795};
796
797static __init int can_init(void)
798{
799 printk(banner);
800
801 rcv_cache = kmem_cache_create("can_receiver", sizeof(struct receiver),
802 0, 0, NULL);
803 if (!rcv_cache)
804 return -ENOMEM;
805
806 /*
807 * Insert can_rx_alldev_list for reception on all devices.
808 * This struct is zero initialized which is correct for the
809 * embedded hlist heads, the dev pointer, and the entries counter.
810 */
811
812 spin_lock(&can_rcvlists_lock);
813 hlist_add_head_rcu(&can_rx_alldev_list.list, &can_rx_dev_list);
814 spin_unlock(&can_rcvlists_lock);
815
816 if (stats_timer) {
817 /* the statistics are updated every second (timer triggered) */
818 setup_timer(&can_stattimer, can_stat_update, 0);
819 mod_timer(&can_stattimer, round_jiffies(jiffies + HZ));
820 } else
821 can_stattimer.function = NULL;
822
823 can_init_proc();
824
825 /* protocol register */
826 sock_register(&can_family_ops);
827 register_netdevice_notifier(&can_netdev_notifier);
828 dev_add_pack(&can_packet);
829
830 return 0;
831}
832
833static __exit void can_exit(void)
834{
835 struct dev_rcv_lists *d;
836 struct hlist_node *n, *next;
837
838 if (stats_timer)
839 del_timer(&can_stattimer);
840
841 can_remove_proc();
842
843 /* protocol unregister */
844 dev_remove_pack(&can_packet);
845 unregister_netdevice_notifier(&can_netdev_notifier);
846 sock_unregister(PF_CAN);
847
848 /* remove can_rx_dev_list */
849 spin_lock(&can_rcvlists_lock);
850 hlist_del(&can_rx_alldev_list.list);
851 hlist_for_each_entry_safe(d, n, next, &can_rx_dev_list, list) {
852 hlist_del(&d->list);
853 kfree(d);
854 }
855 spin_unlock(&can_rcvlists_lock);
856
857 kmem_cache_destroy(rcv_cache);
858}
859
860module_init(can_init);
861module_exit(can_exit);
diff --git a/net/can/af_can.h b/net/can/af_can.h
new file mode 100644
index 000000000000..18f91e37cc30
--- /dev/null
+++ b/net/can/af_can.h
@@ -0,0 +1,122 @@
1/*
2 * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of Volkswagen nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * Alternatively, provided that this notice is retained in full, this
18 * software may be distributed under the terms of the GNU General
19 * Public License ("GPL") version 2, in which case the provisions of the
20 * GPL apply INSTEAD OF those given above.
21 *
22 * The provided data structures and external interfaces from this code
23 * are not restricted to be used by modules with a GPL compatible license.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
36 * DAMAGE.
37 *
38 * Send feedback to <socketcan-users@lists.berlios.de>
39 *
40 */
41
42#ifndef AF_CAN_H
43#define AF_CAN_H
44
45#include <linux/skbuff.h>
46#include <linux/netdevice.h>
47#include <linux/list.h>
48#include <linux/rcupdate.h>
49#include <linux/can.h>
50
51/* af_can rx dispatcher structures */
52
53struct receiver {
54 struct hlist_node list;
55 struct rcu_head rcu;
56 canid_t can_id;
57 canid_t mask;
58 unsigned long matches;
59 void (*func)(struct sk_buff *, void *);
60 void *data;
61 char *ident;
62};
63
64enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_EFF, RX_MAX };
65
66struct dev_rcv_lists {
67 struct hlist_node list;
68 struct rcu_head rcu;
69 struct net_device *dev;
70 struct hlist_head rx[RX_MAX];
71 struct hlist_head rx_sff[0x800];
72 int remove_on_zero_entries;
73 int entries;
74};
75
76/* statistic structures */
77
78/* can be reset e.g. by can_init_stats() */
79struct s_stats {
80 unsigned long jiffies_init;
81
82 unsigned long rx_frames;
83 unsigned long tx_frames;
84 unsigned long matches;
85
86 unsigned long total_rx_rate;
87 unsigned long total_tx_rate;
88 unsigned long total_rx_match_ratio;
89
90 unsigned long current_rx_rate;
91 unsigned long current_tx_rate;
92 unsigned long current_rx_match_ratio;
93
94 unsigned long max_rx_rate;
95 unsigned long max_tx_rate;
96 unsigned long max_rx_match_ratio;
97
98 unsigned long rx_frames_delta;
99 unsigned long tx_frames_delta;
100 unsigned long matches_delta;
101};
102
103/* persistent statistics */
104struct s_pstats {
105 unsigned long stats_reset;
106 unsigned long user_reset;
107 unsigned long rcv_entries;
108 unsigned long rcv_entries_max;
109};
110
111/* function prototypes for the CAN networklayer procfs (proc.c) */
112extern void can_init_proc(void);
113extern void can_remove_proc(void);
114extern void can_stat_update(unsigned long data);
115
116/* structures and variables from af_can.c needed in proc.c for reading */
117extern struct timer_list can_stattimer; /* timer for statistics update */
118extern struct s_stats can_stats; /* packet statistics */
119extern struct s_pstats can_pstats; /* receive list statistics */
120extern struct hlist_head can_rx_dev_list; /* rx dispatcher structures */
121
122#endif /* AF_CAN_H */
diff --git a/net/can/bcm.c b/net/can/bcm.c
new file mode 100644
index 000000000000..bd4282dae754
--- /dev/null
+++ b/net/can/bcm.c
@@ -0,0 +1,1561 @@
1/*
2 * bcm.c - Broadcast Manager to filter/send (cyclic) CAN content
3 *
4 * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of Volkswagen nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * Alternatively, provided that this notice is retained in full, this
20 * software may be distributed under the terms of the GNU General
21 * Public License ("GPL") version 2, in which case the provisions of the
22 * GPL apply INSTEAD OF those given above.
23 *
24 * The provided data structures and external interfaces from this code
25 * are not restricted to be used by modules with a GPL compatible license.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
38 * DAMAGE.
39 *
40 * Send feedback to <socketcan-users@lists.berlios.de>
41 *
42 */
43
44#include <linux/module.h>
45#include <linux/init.h>
46#include <linux/list.h>
47#include <linux/proc_fs.h>
48#include <linux/uio.h>
49#include <linux/net.h>
50#include <linux/netdevice.h>
51#include <linux/socket.h>
52#include <linux/if_arp.h>
53#include <linux/skbuff.h>
54#include <linux/can.h>
55#include <linux/can/core.h>
56#include <linux/can/bcm.h>
57#include <net/sock.h>
58#include <net/net_namespace.h>
59
60/* use of last_frames[index].can_dlc */
61#define RX_RECV 0x40 /* received data for this element */
62#define RX_THR 0x80 /* element not been sent due to throttle feature */
63#define BCM_CAN_DLC_MASK 0x0F /* clean private flags in can_dlc by masking */
64
65/* get best masking value for can_rx_register() for a given single can_id */
66#define REGMASK(id) ((id & CAN_RTR_FLAG) | ((id & CAN_EFF_FLAG) ? \
67 (CAN_EFF_MASK | CAN_EFF_FLAG) : CAN_SFF_MASK))
68
69#define CAN_BCM_VERSION CAN_VERSION
70static __initdata const char banner[] = KERN_INFO
71 "can: broadcast manager protocol (rev " CAN_BCM_VERSION ")\n";
72
73MODULE_DESCRIPTION("PF_CAN broadcast manager protocol");
74MODULE_LICENSE("Dual BSD/GPL");
75MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>");
76
77/* easy access to can_frame payload */
78static inline u64 GET_U64(const struct can_frame *cp)
79{
80 return *(u64 *)cp->data;
81}
82
83struct bcm_op {
84 struct list_head list;
85 int ifindex;
86 canid_t can_id;
87 int flags;
88 unsigned long j_ival1, j_ival2, j_lastmsg;
89 unsigned long frames_abs, frames_filtered;
90 struct timer_list timer, thrtimer;
91 struct timeval ival1, ival2;
92 ktime_t rx_stamp;
93 int rx_ifindex;
94 int count;
95 int nframes;
96 int currframe;
97 struct can_frame *frames;
98 struct can_frame *last_frames;
99 struct can_frame sframe;
100 struct can_frame last_sframe;
101 struct sock *sk;
102 struct net_device *rx_reg_dev;
103};
104
105static struct proc_dir_entry *proc_dir;
106
107struct bcm_sock {
108 struct sock sk;
109 int bound;
110 int ifindex;
111 struct notifier_block notifier;
112 struct list_head rx_ops;
113 struct list_head tx_ops;
114 unsigned long dropped_usr_msgs;
115 struct proc_dir_entry *bcm_proc_read;
116 char procname [9]; /* pointer printed in ASCII with \0 */
117};
118
119static inline struct bcm_sock *bcm_sk(const struct sock *sk)
120{
121 return (struct bcm_sock *)sk;
122}
123
124#define CFSIZ sizeof(struct can_frame)
125#define OPSIZ sizeof(struct bcm_op)
126#define MHSIZ sizeof(struct bcm_msg_head)
127
128/*
129 * rounded_tv2jif - calculate jiffies from timeval including optional up
130 * @tv: pointer to timeval
131 *
132 * Description:
133 * Unlike timeval_to_jiffies() provided in include/linux/jiffies.h, this
134 * function is intentionally more relaxed on precise timer ticks to get
135 * exact one jiffy for requested 1000us on a 1000HZ machine.
136 * This code is to be removed when upgrading to kernel hrtimer.
137 *
138 * Return:
139 * calculated jiffies (max: ULONG_MAX)
140 */
141static unsigned long rounded_tv2jif(const struct timeval *tv)
142{
143 unsigned long sec = tv->tv_sec;
144 unsigned long usec = tv->tv_usec;
145 unsigned long jif;
146
147 if (sec > ULONG_MAX / HZ)
148 return ULONG_MAX;
149
150 /* round up to get at least the requested time */
151 usec += 1000000 / HZ - 1;
152
153 jif = usec / (1000000 / HZ);
154
155 if (sec * HZ > ULONG_MAX - jif)
156 return ULONG_MAX;
157
158 return jif + sec * HZ;
159}
160
161/*
162 * procfs functions
163 */
164static char *bcm_proc_getifname(int ifindex)
165{
166 struct net_device *dev;
167
168 if (!ifindex)
169 return "any";
170
171 /* no usage counting */
172 dev = __dev_get_by_index(&init_net, ifindex);
173 if (dev)
174 return dev->name;
175
176 return "???";
177}
178
179static int bcm_read_proc(char *page, char **start, off_t off,
180 int count, int *eof, void *data)
181{
182 int len = 0;
183 struct sock *sk = (struct sock *)data;
184 struct bcm_sock *bo = bcm_sk(sk);
185 struct bcm_op *op;
186
187 len += snprintf(page + len, PAGE_SIZE - len, ">>> socket %p",
188 sk->sk_socket);
189 len += snprintf(page + len, PAGE_SIZE - len, " / sk %p", sk);
190 len += snprintf(page + len, PAGE_SIZE - len, " / bo %p", bo);
191 len += snprintf(page + len, PAGE_SIZE - len, " / dropped %lu",
192 bo->dropped_usr_msgs);
193 len += snprintf(page + len, PAGE_SIZE - len, " / bound %s",
194 bcm_proc_getifname(bo->ifindex));
195 len += snprintf(page + len, PAGE_SIZE - len, " <<<\n");
196
197 list_for_each_entry(op, &bo->rx_ops, list) {
198
199 unsigned long reduction;
200
201 /* print only active entries & prevent division by zero */
202 if (!op->frames_abs)
203 continue;
204
205 len += snprintf(page + len, PAGE_SIZE - len,
206 "rx_op: %03X %-5s ",
207 op->can_id, bcm_proc_getifname(op->ifindex));
208 len += snprintf(page + len, PAGE_SIZE - len, "[%d]%c ",
209 op->nframes,
210 (op->flags & RX_CHECK_DLC)?'d':' ');
211 if (op->j_ival1)
212 len += snprintf(page + len, PAGE_SIZE - len,
213 "timeo=%ld ", op->j_ival1);
214
215 if (op->j_ival2)
216 len += snprintf(page + len, PAGE_SIZE - len,
217 "thr=%ld ", op->j_ival2);
218
219 len += snprintf(page + len, PAGE_SIZE - len,
220 "# recv %ld (%ld) => reduction: ",
221 op->frames_filtered, op->frames_abs);
222
223 reduction = 100 - (op->frames_filtered * 100) / op->frames_abs;
224
225 len += snprintf(page + len, PAGE_SIZE - len, "%s%ld%%\n",
226 (reduction == 100)?"near ":"", reduction);
227
228 if (len > PAGE_SIZE - 200) {
229 /* mark output cut off */
230 len += snprintf(page + len, PAGE_SIZE - len, "(..)\n");
231 break;
232 }
233 }
234
235 list_for_each_entry(op, &bo->tx_ops, list) {
236
237 len += snprintf(page + len, PAGE_SIZE - len,
238 "tx_op: %03X %s [%d] ",
239 op->can_id, bcm_proc_getifname(op->ifindex),
240 op->nframes);
241 if (op->j_ival1)
242 len += snprintf(page + len, PAGE_SIZE - len, "t1=%ld ",
243 op->j_ival1);
244
245 if (op->j_ival2)
246 len += snprintf(page + len, PAGE_SIZE - len, "t2=%ld ",
247 op->j_ival2);
248
249 len += snprintf(page + len, PAGE_SIZE - len, "# sent %ld\n",
250 op->frames_abs);
251
252 if (len > PAGE_SIZE - 100) {
253 /* mark output cut off */
254 len += snprintf(page + len, PAGE_SIZE - len, "(..)\n");
255 break;
256 }
257 }
258
259 len += snprintf(page + len, PAGE_SIZE - len, "\n");
260
261 *eof = 1;
262 return len;
263}
264
265/*
266 * bcm_can_tx - send the (next) CAN frame to the appropriate CAN interface
267 * of the given bcm tx op
268 */
269static void bcm_can_tx(struct bcm_op *op)
270{
271 struct sk_buff *skb;
272 struct net_device *dev;
273 struct can_frame *cf = &op->frames[op->currframe];
274
275 /* no target device? => exit */
276 if (!op->ifindex)
277 return;
278
279 dev = dev_get_by_index(&init_net, op->ifindex);
280 if (!dev) {
281 /* RFC: should this bcm_op remove itself here? */
282 return;
283 }
284
285 skb = alloc_skb(CFSIZ, gfp_any());
286 if (!skb)
287 goto out;
288
289 memcpy(skb_put(skb, CFSIZ), cf, CFSIZ);
290
291 /* send with loopback */
292 skb->dev = dev;
293 skb->sk = op->sk;
294 can_send(skb, 1);
295
296 /* update statistics */
297 op->currframe++;
298 op->frames_abs++;
299
300 /* reached last frame? */
301 if (op->currframe >= op->nframes)
302 op->currframe = 0;
303 out:
304 dev_put(dev);
305}
306
307/*
308 * bcm_send_to_user - send a BCM message to the userspace
309 * (consisting of bcm_msg_head + x CAN frames)
310 */
311static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
312 struct can_frame *frames, int has_timestamp)
313{
314 struct sk_buff *skb;
315 struct can_frame *firstframe;
316 struct sockaddr_can *addr;
317 struct sock *sk = op->sk;
318 int datalen = head->nframes * CFSIZ;
319 int err;
320
321 skb = alloc_skb(sizeof(*head) + datalen, gfp_any());
322 if (!skb)
323 return;
324
325 memcpy(skb_put(skb, sizeof(*head)), head, sizeof(*head));
326
327 if (head->nframes) {
328 /* can_frames starting here */
329 firstframe = (struct can_frame *) skb_tail_pointer(skb);
330
331 memcpy(skb_put(skb, datalen), frames, datalen);
332
333 /*
334 * the BCM uses the can_dlc-element of the can_frame
335 * structure for internal purposes. This is only
336 * relevant for updates that are generated by the
337 * BCM, where nframes is 1
338 */
339 if (head->nframes == 1)
340 firstframe->can_dlc &= BCM_CAN_DLC_MASK;
341 }
342
343 if (has_timestamp) {
344 /* restore rx timestamp */
345 skb->tstamp = op->rx_stamp;
346 }
347
348 /*
349 * Put the datagram to the queue so that bcm_recvmsg() can
350 * get it from there. We need to pass the interface index to
351 * bcm_recvmsg(). We pass a whole struct sockaddr_can in skb->cb
352 * containing the interface index.
353 */
354
355 BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct sockaddr_can));
356 addr = (struct sockaddr_can *)skb->cb;
357 memset(addr, 0, sizeof(*addr));
358 addr->can_family = AF_CAN;
359 addr->can_ifindex = op->rx_ifindex;
360
361 err = sock_queue_rcv_skb(sk, skb);
362 if (err < 0) {
363 struct bcm_sock *bo = bcm_sk(sk);
364
365 kfree_skb(skb);
366 /* don't care about overflows in this statistic */
367 bo->dropped_usr_msgs++;
368 }
369}
370
371/*
372 * bcm_tx_timeout_handler - performes cyclic CAN frame transmissions
373 */
374static void bcm_tx_timeout_handler(unsigned long data)
375{
376 struct bcm_op *op = (struct bcm_op *)data;
377
378 if (op->j_ival1 && (op->count > 0)) {
379
380 op->count--;
381 if (!op->count && (op->flags & TX_COUNTEVT)) {
382 struct bcm_msg_head msg_head;
383
384 /* create notification to user */
385 msg_head.opcode = TX_EXPIRED;
386 msg_head.flags = op->flags;
387 msg_head.count = op->count;
388 msg_head.ival1 = op->ival1;
389 msg_head.ival2 = op->ival2;
390 msg_head.can_id = op->can_id;
391 msg_head.nframes = 0;
392
393 bcm_send_to_user(op, &msg_head, NULL, 0);
394 }
395 }
396
397 if (op->j_ival1 && (op->count > 0)) {
398
399 /* send (next) frame */
400 bcm_can_tx(op);
401 mod_timer(&op->timer, jiffies + op->j_ival1);
402
403 } else {
404 if (op->j_ival2) {
405
406 /* send (next) frame */
407 bcm_can_tx(op);
408 mod_timer(&op->timer, jiffies + op->j_ival2);
409 }
410 }
411
412 return;
413}
414
415/*
416 * bcm_rx_changed - create a RX_CHANGED notification due to changed content
417 */
418static void bcm_rx_changed(struct bcm_op *op, struct can_frame *data)
419{
420 struct bcm_msg_head head;
421
422 op->j_lastmsg = jiffies;
423
424 /* update statistics */
425 op->frames_filtered++;
426
427 /* prevent statistics overflow */
428 if (op->frames_filtered > ULONG_MAX/100)
429 op->frames_filtered = op->frames_abs = 0;
430
431 head.opcode = RX_CHANGED;
432 head.flags = op->flags;
433 head.count = op->count;
434 head.ival1 = op->ival1;
435 head.ival2 = op->ival2;
436 head.can_id = op->can_id;
437 head.nframes = 1;
438
439 bcm_send_to_user(op, &head, data, 1);
440}
441
442/*
443 * bcm_rx_update_and_send - process a detected relevant receive content change
444 * 1. update the last received data
445 * 2. send a notification to the user (if possible)
446 */
447static void bcm_rx_update_and_send(struct bcm_op *op,
448 struct can_frame *lastdata,
449 struct can_frame *rxdata)
450{
451 unsigned long nexttx = op->j_lastmsg + op->j_ival2;
452
453 memcpy(lastdata, rxdata, CFSIZ);
454
455 /* mark as used */
456 lastdata->can_dlc |= RX_RECV;
457
458 /* throttle bcm_rx_changed ? */
459 if ((op->thrtimer.expires) ||
460 ((op->j_ival2) && (nexttx > jiffies))) {
461 /* we are already waiting OR we have to start waiting */
462
463 /* mark as 'throttled' */
464 lastdata->can_dlc |= RX_THR;
465
466 if (!(op->thrtimer.expires)) {
467 /* start the timer only the first time */
468 mod_timer(&op->thrtimer, nexttx);
469 }
470
471 } else {
472 /* send RX_CHANGED to the user immediately */
473 bcm_rx_changed(op, rxdata);
474 }
475}
476
477/*
478 * bcm_rx_cmp_to_index - (bit)compares the currently received data to formerly
479 * received data stored in op->last_frames[]
480 */
481static void bcm_rx_cmp_to_index(struct bcm_op *op, int index,
482 struct can_frame *rxdata)
483{
484 /*
485 * no one uses the MSBs of can_dlc for comparation,
486 * so we use it here to detect the first time of reception
487 */
488
489 if (!(op->last_frames[index].can_dlc & RX_RECV)) {
490 /* received data for the first time => send update to user */
491 bcm_rx_update_and_send(op, &op->last_frames[index], rxdata);
492 return;
493 }
494
495 /* do a real check in can_frame data section */
496
497 if ((GET_U64(&op->frames[index]) & GET_U64(rxdata)) !=
498 (GET_U64(&op->frames[index]) & GET_U64(&op->last_frames[index]))) {
499 bcm_rx_update_and_send(op, &op->last_frames[index], rxdata);
500 return;
501 }
502
503 if (op->flags & RX_CHECK_DLC) {
504 /* do a real check in can_frame dlc */
505 if (rxdata->can_dlc != (op->last_frames[index].can_dlc &
506 BCM_CAN_DLC_MASK)) {
507 bcm_rx_update_and_send(op, &op->last_frames[index],
508 rxdata);
509 return;
510 }
511 }
512}
513
514/*
515 * bcm_rx_starttimer - enable timeout monitoring for CAN frame receiption
516 */
517static void bcm_rx_starttimer(struct bcm_op *op)
518{
519 if (op->flags & RX_NO_AUTOTIMER)
520 return;
521
522 if (op->j_ival1)
523 mod_timer(&op->timer, jiffies + op->j_ival1);
524}
525
526/*
527 * bcm_rx_timeout_handler - when the (cyclic) CAN frame receiption timed out
528 */
529static void bcm_rx_timeout_handler(unsigned long data)
530{
531 struct bcm_op *op = (struct bcm_op *)data;
532 struct bcm_msg_head msg_head;
533
534 msg_head.opcode = RX_TIMEOUT;
535 msg_head.flags = op->flags;
536 msg_head.count = op->count;
537 msg_head.ival1 = op->ival1;
538 msg_head.ival2 = op->ival2;
539 msg_head.can_id = op->can_id;
540 msg_head.nframes = 0;
541
542 bcm_send_to_user(op, &msg_head, NULL, 0);
543
544 /* no restart of the timer is done here! */
545
546 /* if user wants to be informed, when cyclic CAN-Messages come back */
547 if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) {
548 /* clear received can_frames to indicate 'nothing received' */
549 memset(op->last_frames, 0, op->nframes * CFSIZ);
550 }
551}
552
553/*
554 * bcm_rx_thr_handler - the time for blocked content updates is over now:
555 * Check for throttled data and send it to the userspace
556 */
557static void bcm_rx_thr_handler(unsigned long data)
558{
559 struct bcm_op *op = (struct bcm_op *)data;
560 int i = 0;
561
562 /* mark disabled / consumed timer */
563 op->thrtimer.expires = 0;
564
565 if (op->nframes > 1) {
566 /* for MUX filter we start at index 1 */
567 for (i = 1; i < op->nframes; i++) {
568 if ((op->last_frames) &&
569 (op->last_frames[i].can_dlc & RX_THR)) {
570 op->last_frames[i].can_dlc &= ~RX_THR;
571 bcm_rx_changed(op, &op->last_frames[i]);
572 }
573 }
574
575 } else {
576 /* for RX_FILTER_ID and simple filter */
577 if (op->last_frames && (op->last_frames[0].can_dlc & RX_THR)) {
578 op->last_frames[0].can_dlc &= ~RX_THR;
579 bcm_rx_changed(op, &op->last_frames[0]);
580 }
581 }
582}
583
584/*
585 * bcm_rx_handler - handle a CAN frame receiption
586 */
587static void bcm_rx_handler(struct sk_buff *skb, void *data)
588{
589 struct bcm_op *op = (struct bcm_op *)data;
590 struct can_frame rxframe;
591 int i;
592
593 /* disable timeout */
594 del_timer(&op->timer);
595
596 if (skb->len == sizeof(rxframe)) {
597 memcpy(&rxframe, skb->data, sizeof(rxframe));
598 /* save rx timestamp */
599 op->rx_stamp = skb->tstamp;
600 /* save originator for recvfrom() */
601 op->rx_ifindex = skb->dev->ifindex;
602 /* update statistics */
603 op->frames_abs++;
604 kfree_skb(skb);
605
606 } else {
607 kfree_skb(skb);
608 return;
609 }
610
611 if (op->can_id != rxframe.can_id)
612 return;
613
614 if (op->flags & RX_RTR_FRAME) {
615 /* send reply for RTR-request (placed in op->frames[0]) */
616 bcm_can_tx(op);
617 return;
618 }
619
620 if (op->flags & RX_FILTER_ID) {
621 /* the easiest case */
622 bcm_rx_update_and_send(op, &op->last_frames[0], &rxframe);
623 bcm_rx_starttimer(op);
624 return;
625 }
626
627 if (op->nframes == 1) {
628 /* simple compare with index 0 */
629 bcm_rx_cmp_to_index(op, 0, &rxframe);
630 bcm_rx_starttimer(op);
631 return;
632 }
633
634 if (op->nframes > 1) {
635 /*
636 * multiplex compare
637 *
638 * find the first multiplex mask that fits.
639 * Remark: The MUX-mask is stored in index 0
640 */
641
642 for (i = 1; i < op->nframes; i++) {
643 if ((GET_U64(&op->frames[0]) & GET_U64(&rxframe)) ==
644 (GET_U64(&op->frames[0]) &
645 GET_U64(&op->frames[i]))) {
646 bcm_rx_cmp_to_index(op, i, &rxframe);
647 break;
648 }
649 }
650 bcm_rx_starttimer(op);
651 }
652}
653
654/*
655 * helpers for bcm_op handling: find & delete bcm [rx|tx] op elements
656 */
657static struct bcm_op *bcm_find_op(struct list_head *ops, canid_t can_id,
658 int ifindex)
659{
660 struct bcm_op *op;
661
662 list_for_each_entry(op, ops, list) {
663 if ((op->can_id == can_id) && (op->ifindex == ifindex))
664 return op;
665 }
666
667 return NULL;
668}
669
670static void bcm_remove_op(struct bcm_op *op)
671{
672 del_timer(&op->timer);
673 del_timer(&op->thrtimer);
674
675 if ((op->frames) && (op->frames != &op->sframe))
676 kfree(op->frames);
677
678 if ((op->last_frames) && (op->last_frames != &op->last_sframe))
679 kfree(op->last_frames);
680
681 kfree(op);
682
683 return;
684}
685
686static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op)
687{
688 if (op->rx_reg_dev == dev) {
689 can_rx_unregister(dev, op->can_id, REGMASK(op->can_id),
690 bcm_rx_handler, op);
691
692 /* mark as removed subscription */
693 op->rx_reg_dev = NULL;
694 } else
695 printk(KERN_ERR "can-bcm: bcm_rx_unreg: registered device "
696 "mismatch %p %p\n", op->rx_reg_dev, dev);
697}
698
699/*
700 * bcm_delete_rx_op - find and remove a rx op (returns number of removed ops)
701 */
702static int bcm_delete_rx_op(struct list_head *ops, canid_t can_id, int ifindex)
703{
704 struct bcm_op *op, *n;
705
706 list_for_each_entry_safe(op, n, ops, list) {
707 if ((op->can_id == can_id) && (op->ifindex == ifindex)) {
708
709 /*
710 * Don't care if we're bound or not (due to netdev
711 * problems) can_rx_unregister() is always a save
712 * thing to do here.
713 */
714 if (op->ifindex) {
715 /*
716 * Only remove subscriptions that had not
717 * been removed due to NETDEV_UNREGISTER
718 * in bcm_notifier()
719 */
720 if (op->rx_reg_dev) {
721 struct net_device *dev;
722
723 dev = dev_get_by_index(&init_net,
724 op->ifindex);
725 if (dev) {
726 bcm_rx_unreg(dev, op);
727 dev_put(dev);
728 }
729 }
730 } else
731 can_rx_unregister(NULL, op->can_id,
732 REGMASK(op->can_id),
733 bcm_rx_handler, op);
734
735 list_del(&op->list);
736 bcm_remove_op(op);
737 return 1; /* done */
738 }
739 }
740
741 return 0; /* not found */
742}
743
744/*
745 * bcm_delete_tx_op - find and remove a tx op (returns number of removed ops)
746 */
747static int bcm_delete_tx_op(struct list_head *ops, canid_t can_id, int ifindex)
748{
749 struct bcm_op *op, *n;
750
751 list_for_each_entry_safe(op, n, ops, list) {
752 if ((op->can_id == can_id) && (op->ifindex == ifindex)) {
753 list_del(&op->list);
754 bcm_remove_op(op);
755 return 1; /* done */
756 }
757 }
758
759 return 0; /* not found */
760}
761
762/*
763 * bcm_read_op - read out a bcm_op and send it to the user (for bcm_sendmsg)
764 */
765static int bcm_read_op(struct list_head *ops, struct bcm_msg_head *msg_head,
766 int ifindex)
767{
768 struct bcm_op *op = bcm_find_op(ops, msg_head->can_id, ifindex);
769
770 if (!op)
771 return -EINVAL;
772
773 /* put current values into msg_head */
774 msg_head->flags = op->flags;
775 msg_head->count = op->count;
776 msg_head->ival1 = op->ival1;
777 msg_head->ival2 = op->ival2;
778 msg_head->nframes = op->nframes;
779
780 bcm_send_to_user(op, msg_head, op->frames, 0);
781
782 return MHSIZ;
783}
784
785/*
786 * bcm_tx_setup - create or update a bcm tx op (for bcm_sendmsg)
787 */
788static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
789 int ifindex, struct sock *sk)
790{
791 struct bcm_sock *bo = bcm_sk(sk);
792 struct bcm_op *op;
793 int i, err;
794
795 /* we need a real device to send frames */
796 if (!ifindex)
797 return -ENODEV;
798
799 /* we need at least one can_frame */
800 if (msg_head->nframes < 1)
801 return -EINVAL;
802
803 /* check the given can_id */
804 op = bcm_find_op(&bo->tx_ops, msg_head->can_id, ifindex);
805
806 if (op) {
807 /* update existing BCM operation */
808
809 /*
810 * Do we need more space for the can_frames than currently
811 * allocated? -> This is a _really_ unusual use-case and
812 * therefore (complexity / locking) it is not supported.
813 */
814 if (msg_head->nframes > op->nframes)
815 return -E2BIG;
816
817 /* update can_frames content */
818 for (i = 0; i < msg_head->nframes; i++) {
819 err = memcpy_fromiovec((u8 *)&op->frames[i],
820 msg->msg_iov, CFSIZ);
821 if (err < 0)
822 return err;
823
824 if (msg_head->flags & TX_CP_CAN_ID) {
825 /* copy can_id into frame */
826 op->frames[i].can_id = msg_head->can_id;
827 }
828 }
829
830 } else {
831 /* insert new BCM operation for the given can_id */
832
833 op = kzalloc(OPSIZ, GFP_KERNEL);
834 if (!op)
835 return -ENOMEM;
836
837 op->can_id = msg_head->can_id;
838
839 /* create array for can_frames and copy the data */
840 if (msg_head->nframes > 1) {
841 op->frames = kmalloc(msg_head->nframes * CFSIZ,
842 GFP_KERNEL);
843 if (!op->frames) {
844 kfree(op);
845 return -ENOMEM;
846 }
847 } else
848 op->frames = &op->sframe;
849
850 for (i = 0; i < msg_head->nframes; i++) {
851 err = memcpy_fromiovec((u8 *)&op->frames[i],
852 msg->msg_iov, CFSIZ);
853 if (err < 0) {
854 if (op->frames != &op->sframe)
855 kfree(op->frames);
856 kfree(op);
857 return err;
858 }
859
860 if (msg_head->flags & TX_CP_CAN_ID) {
861 /* copy can_id into frame */
862 op->frames[i].can_id = msg_head->can_id;
863 }
864 }
865
866 /* tx_ops never compare with previous received messages */
867 op->last_frames = NULL;
868
869 /* bcm_can_tx / bcm_tx_timeout_handler needs this */
870 op->sk = sk;
871 op->ifindex = ifindex;
872
873 /* initialize uninitialized (kzalloc) structure */
874 setup_timer(&op->timer, bcm_tx_timeout_handler,
875 (unsigned long)op);
876
877 /* currently unused in tx_ops */
878 init_timer(&op->thrtimer);
879
880 /* add this bcm_op to the list of the tx_ops */
881 list_add(&op->list, &bo->tx_ops);
882
883 } /* if ((op = bcm_find_op(&bo->tx_ops, msg_head->can_id, ifindex))) */
884
885 if (op->nframes != msg_head->nframes) {
886 op->nframes = msg_head->nframes;
887 /* start multiple frame transmission with index 0 */
888 op->currframe = 0;
889 }
890
891 /* check flags */
892
893 op->flags = msg_head->flags;
894
895 if (op->flags & TX_RESET_MULTI_IDX) {
896 /* start multiple frame transmission with index 0 */
897 op->currframe = 0;
898 }
899
900 if (op->flags & SETTIMER) {
901 /* set timer values */
902 op->count = msg_head->count;
903 op->ival1 = msg_head->ival1;
904 op->ival2 = msg_head->ival2;
905 op->j_ival1 = rounded_tv2jif(&msg_head->ival1);
906 op->j_ival2 = rounded_tv2jif(&msg_head->ival2);
907
908 /* disable an active timer due to zero values? */
909 if (!op->j_ival1 && !op->j_ival2)
910 del_timer(&op->timer);
911 }
912
913 if ((op->flags & STARTTIMER) &&
914 ((op->j_ival1 && op->count) || op->j_ival2)) {
915
916 /* spec: send can_frame when starting timer */
917 op->flags |= TX_ANNOUNCE;
918
919 if (op->j_ival1 && (op->count > 0)) {
920 /* op->count-- is done in bcm_tx_timeout_handler */
921 mod_timer(&op->timer, jiffies + op->j_ival1);
922 } else
923 mod_timer(&op->timer, jiffies + op->j_ival2);
924 }
925
926 if (op->flags & TX_ANNOUNCE)
927 bcm_can_tx(op);
928
929 return msg_head->nframes * CFSIZ + MHSIZ;
930}
931
932/*
933 * bcm_rx_setup - create or update a bcm rx op (for bcm_sendmsg)
934 */
935static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
936 int ifindex, struct sock *sk)
937{
938 struct bcm_sock *bo = bcm_sk(sk);
939 struct bcm_op *op;
940 int do_rx_register;
941 int err = 0;
942
943 if ((msg_head->flags & RX_FILTER_ID) || (!(msg_head->nframes))) {
944 /* be robust against wrong usage ... */
945 msg_head->flags |= RX_FILTER_ID;
946 /* ignore trailing garbage */
947 msg_head->nframes = 0;
948 }
949
950 if ((msg_head->flags & RX_RTR_FRAME) &&
951 ((msg_head->nframes != 1) ||
952 (!(msg_head->can_id & CAN_RTR_FLAG))))
953 return -EINVAL;
954
955 /* check the given can_id */
956 op = bcm_find_op(&bo->rx_ops, msg_head->can_id, ifindex);
957 if (op) {
958 /* update existing BCM operation */
959
960 /*
961 * Do we need more space for the can_frames than currently
962 * allocated? -> This is a _really_ unusual use-case and
963 * therefore (complexity / locking) it is not supported.
964 */
965 if (msg_head->nframes > op->nframes)
966 return -E2BIG;
967
968 if (msg_head->nframes) {
969 /* update can_frames content */
970 err = memcpy_fromiovec((u8 *)op->frames,
971 msg->msg_iov,
972 msg_head->nframes * CFSIZ);
973 if (err < 0)
974 return err;
975
976 /* clear last_frames to indicate 'nothing received' */
977 memset(op->last_frames, 0, msg_head->nframes * CFSIZ);
978 }
979
980 op->nframes = msg_head->nframes;
981
982 /* Only an update -> do not call can_rx_register() */
983 do_rx_register = 0;
984
985 } else {
986 /* insert new BCM operation for the given can_id */
987 op = kzalloc(OPSIZ, GFP_KERNEL);
988 if (!op)
989 return -ENOMEM;
990
991 op->can_id = msg_head->can_id;
992 op->nframes = msg_head->nframes;
993
994 if (msg_head->nframes > 1) {
995 /* create array for can_frames and copy the data */
996 op->frames = kmalloc(msg_head->nframes * CFSIZ,
997 GFP_KERNEL);
998 if (!op->frames) {
999 kfree(op);
1000 return -ENOMEM;
1001 }
1002
1003 /* create and init array for received can_frames */
1004 op->last_frames = kzalloc(msg_head->nframes * CFSIZ,
1005 GFP_KERNEL);
1006 if (!op->last_frames) {
1007 kfree(op->frames);
1008 kfree(op);
1009 return -ENOMEM;
1010 }
1011
1012 } else {
1013 op->frames = &op->sframe;
1014 op->last_frames = &op->last_sframe;
1015 }
1016
1017 if (msg_head->nframes) {
1018 err = memcpy_fromiovec((u8 *)op->frames, msg->msg_iov,
1019 msg_head->nframes * CFSIZ);
1020 if (err < 0) {
1021 if (op->frames != &op->sframe)
1022 kfree(op->frames);
1023 if (op->last_frames != &op->last_sframe)
1024 kfree(op->last_frames);
1025 kfree(op);
1026 return err;
1027 }
1028 }
1029
1030 /* bcm_can_tx / bcm_tx_timeout_handler needs this */
1031 op->sk = sk;
1032 op->ifindex = ifindex;
1033
1034 /* initialize uninitialized (kzalloc) structure */
1035 setup_timer(&op->timer, bcm_rx_timeout_handler,
1036 (unsigned long)op);
1037
1038 /* init throttle timer for RX_CHANGED */
1039 setup_timer(&op->thrtimer, bcm_rx_thr_handler,
1040 (unsigned long)op);
1041
1042 /* mark disabled timer */
1043 op->thrtimer.expires = 0;
1044
1045 /* add this bcm_op to the list of the rx_ops */
1046 list_add(&op->list, &bo->rx_ops);
1047
1048 /* call can_rx_register() */
1049 do_rx_register = 1;
1050
1051 } /* if ((op = bcm_find_op(&bo->rx_ops, msg_head->can_id, ifindex))) */
1052
1053 /* check flags */
1054 op->flags = msg_head->flags;
1055
1056 if (op->flags & RX_RTR_FRAME) {
1057
1058 /* no timers in RTR-mode */
1059 del_timer(&op->thrtimer);
1060 del_timer(&op->timer);
1061
1062 /*
1063 * funny feature in RX(!)_SETUP only for RTR-mode:
1064 * copy can_id into frame BUT without RTR-flag to
1065 * prevent a full-load-loopback-test ... ;-]
1066 */
1067 if ((op->flags & TX_CP_CAN_ID) ||
1068 (op->frames[0].can_id == op->can_id))
1069 op->frames[0].can_id = op->can_id & ~CAN_RTR_FLAG;
1070
1071 } else {
1072 if (op->flags & SETTIMER) {
1073
1074 /* set timer value */
1075 op->ival1 = msg_head->ival1;
1076 op->ival2 = msg_head->ival2;
1077 op->j_ival1 = rounded_tv2jif(&msg_head->ival1);
1078 op->j_ival2 = rounded_tv2jif(&msg_head->ival2);
1079
1080 /* disable an active timer due to zero value? */
1081 if (!op->j_ival1)
1082 del_timer(&op->timer);
1083
1084 /* free currently blocked msgs ? */
1085 if (op->thrtimer.expires) {
1086 /* send blocked msgs hereafter */
1087 mod_timer(&op->thrtimer, jiffies + 2);
1088 }
1089
1090 /*
1091 * if (op->j_ival2) is zero, no (new) throttling
1092 * will happen. For details see functions
1093 * bcm_rx_update_and_send() and bcm_rx_thr_handler()
1094 */
1095 }
1096
1097 if ((op->flags & STARTTIMER) && op->j_ival1)
1098 mod_timer(&op->timer, jiffies + op->j_ival1);
1099 }
1100
1101 /* now we can register for can_ids, if we added a new bcm_op */
1102 if (do_rx_register) {
1103 if (ifindex) {
1104 struct net_device *dev;
1105
1106 dev = dev_get_by_index(&init_net, ifindex);
1107 if (dev) {
1108 err = can_rx_register(dev, op->can_id,
1109 REGMASK(op->can_id),
1110 bcm_rx_handler, op,
1111 "bcm");
1112
1113 op->rx_reg_dev = dev;
1114 dev_put(dev);
1115 }
1116
1117 } else
1118 err = can_rx_register(NULL, op->can_id,
1119 REGMASK(op->can_id),
1120 bcm_rx_handler, op, "bcm");
1121 if (err) {
1122 /* this bcm rx op is broken -> remove it */
1123 list_del(&op->list);
1124 bcm_remove_op(op);
1125 return err;
1126 }
1127 }
1128
1129 return msg_head->nframes * CFSIZ + MHSIZ;
1130}
1131
1132/*
1133 * bcm_tx_send - send a single CAN frame to the CAN interface (for bcm_sendmsg)
1134 */
1135static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
1136{
1137 struct sk_buff *skb;
1138 struct net_device *dev;
1139 int err;
1140
1141 /* we need a real device to send frames */
1142 if (!ifindex)
1143 return -ENODEV;
1144
1145 skb = alloc_skb(CFSIZ, GFP_KERNEL);
1146
1147 if (!skb)
1148 return -ENOMEM;
1149
1150 err = memcpy_fromiovec(skb_put(skb, CFSIZ), msg->msg_iov, CFSIZ);
1151 if (err < 0) {
1152 kfree_skb(skb);
1153 return err;
1154 }
1155
1156 dev = dev_get_by_index(&init_net, ifindex);
1157 if (!dev) {
1158 kfree_skb(skb);
1159 return -ENODEV;
1160 }
1161
1162 skb->dev = dev;
1163 skb->sk = sk;
1164 can_send(skb, 1); /* send with loopback */
1165 dev_put(dev);
1166
1167 return CFSIZ + MHSIZ;
1168}
1169
1170/*
1171 * bcm_sendmsg - process BCM commands (opcodes) from the userspace
1172 */
1173static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock,
1174 struct msghdr *msg, size_t size)
1175{
1176 struct sock *sk = sock->sk;
1177 struct bcm_sock *bo = bcm_sk(sk);
1178 int ifindex = bo->ifindex; /* default ifindex for this bcm_op */
1179 struct bcm_msg_head msg_head;
1180 int ret; /* read bytes or error codes as return value */
1181
1182 if (!bo->bound)
1183 return -ENOTCONN;
1184
1185 /* check for alternative ifindex for this bcm_op */
1186
1187 if (!ifindex && msg->msg_name) {
1188 /* no bound device as default => check msg_name */
1189 struct sockaddr_can *addr =
1190 (struct sockaddr_can *)msg->msg_name;
1191
1192 if (addr->can_family != AF_CAN)
1193 return -EINVAL;
1194
1195 /* ifindex from sendto() */
1196 ifindex = addr->can_ifindex;
1197
1198 if (ifindex) {
1199 struct net_device *dev;
1200
1201 dev = dev_get_by_index(&init_net, ifindex);
1202 if (!dev)
1203 return -ENODEV;
1204
1205 if (dev->type != ARPHRD_CAN) {
1206 dev_put(dev);
1207 return -ENODEV;
1208 }
1209
1210 dev_put(dev);
1211 }
1212 }
1213
1214 /* read message head information */
1215
1216 ret = memcpy_fromiovec((u8 *)&msg_head, msg->msg_iov, MHSIZ);
1217 if (ret < 0)
1218 return ret;
1219
1220 lock_sock(sk);
1221
1222 switch (msg_head.opcode) {
1223
1224 case TX_SETUP:
1225 ret = bcm_tx_setup(&msg_head, msg, ifindex, sk);
1226 break;
1227
1228 case RX_SETUP:
1229 ret = bcm_rx_setup(&msg_head, msg, ifindex, sk);
1230 break;
1231
1232 case TX_DELETE:
1233 if (bcm_delete_tx_op(&bo->tx_ops, msg_head.can_id, ifindex))
1234 ret = MHSIZ;
1235 else
1236 ret = -EINVAL;
1237 break;
1238
1239 case RX_DELETE:
1240 if (bcm_delete_rx_op(&bo->rx_ops, msg_head.can_id, ifindex))
1241 ret = MHSIZ;
1242 else
1243 ret = -EINVAL;
1244 break;
1245
1246 case TX_READ:
1247 /* reuse msg_head for the reply to TX_READ */
1248 msg_head.opcode = TX_STATUS;
1249 ret = bcm_read_op(&bo->tx_ops, &msg_head, ifindex);
1250 break;
1251
1252 case RX_READ:
1253 /* reuse msg_head for the reply to RX_READ */
1254 msg_head.opcode = RX_STATUS;
1255 ret = bcm_read_op(&bo->rx_ops, &msg_head, ifindex);
1256 break;
1257
1258 case TX_SEND:
1259 /* we need at least one can_frame */
1260 if (msg_head.nframes < 1)
1261 ret = -EINVAL;
1262 else
1263 ret = bcm_tx_send(msg, ifindex, sk);
1264 break;
1265
1266 default:
1267 ret = -EINVAL;
1268 break;
1269 }
1270
1271 release_sock(sk);
1272
1273 return ret;
1274}
1275
1276/*
1277 * notification handler for netdevice status changes
1278 */
1279static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
1280 void *data)
1281{
1282 struct net_device *dev = (struct net_device *)data;
1283 struct bcm_sock *bo = container_of(nb, struct bcm_sock, notifier);
1284 struct sock *sk = &bo->sk;
1285 struct bcm_op *op;
1286 int notify_enodev = 0;
1287
1288 if (dev->nd_net != &init_net)
1289 return NOTIFY_DONE;
1290
1291 if (dev->type != ARPHRD_CAN)
1292 return NOTIFY_DONE;
1293
1294 switch (msg) {
1295
1296 case NETDEV_UNREGISTER:
1297 lock_sock(sk);
1298
1299 /* remove device specific receive entries */
1300 list_for_each_entry(op, &bo->rx_ops, list)
1301 if (op->rx_reg_dev == dev)
1302 bcm_rx_unreg(dev, op);
1303
1304 /* remove device reference, if this is our bound device */
1305 if (bo->bound && bo->ifindex == dev->ifindex) {
1306 bo->bound = 0;
1307 bo->ifindex = 0;
1308 notify_enodev = 1;
1309 }
1310
1311 release_sock(sk);
1312
1313 if (notify_enodev) {
1314 sk->sk_err = ENODEV;
1315 if (!sock_flag(sk, SOCK_DEAD))
1316 sk->sk_error_report(sk);
1317 }
1318 break;
1319
1320 case NETDEV_DOWN:
1321 if (bo->bound && bo->ifindex == dev->ifindex) {
1322 sk->sk_err = ENETDOWN;
1323 if (!sock_flag(sk, SOCK_DEAD))
1324 sk->sk_error_report(sk);
1325 }
1326 }
1327
1328 return NOTIFY_DONE;
1329}
1330
1331/*
1332 * initial settings for all BCM sockets to be set at socket creation time
1333 */
1334static int bcm_init(struct sock *sk)
1335{
1336 struct bcm_sock *bo = bcm_sk(sk);
1337
1338 bo->bound = 0;
1339 bo->ifindex = 0;
1340 bo->dropped_usr_msgs = 0;
1341 bo->bcm_proc_read = NULL;
1342
1343 INIT_LIST_HEAD(&bo->tx_ops);
1344 INIT_LIST_HEAD(&bo->rx_ops);
1345
1346 /* set notifier */
1347 bo->notifier.notifier_call = bcm_notifier;
1348
1349 register_netdevice_notifier(&bo->notifier);
1350
1351 return 0;
1352}
1353
1354/*
1355 * standard socket functions
1356 */
1357static int bcm_release(struct socket *sock)
1358{
1359 struct sock *sk = sock->sk;
1360 struct bcm_sock *bo = bcm_sk(sk);
1361 struct bcm_op *op, *next;
1362
1363 /* remove bcm_ops, timer, rx_unregister(), etc. */
1364
1365 unregister_netdevice_notifier(&bo->notifier);
1366
1367 lock_sock(sk);
1368
1369 list_for_each_entry_safe(op, next, &bo->tx_ops, list)
1370 bcm_remove_op(op);
1371
1372 list_for_each_entry_safe(op, next, &bo->rx_ops, list) {
1373 /*
1374 * Don't care if we're bound or not (due to netdev problems)
1375 * can_rx_unregister() is always a save thing to do here.
1376 */
1377 if (op->ifindex) {
1378 /*
1379 * Only remove subscriptions that had not
1380 * been removed due to NETDEV_UNREGISTER
1381 * in bcm_notifier()
1382 */
1383 if (op->rx_reg_dev) {
1384 struct net_device *dev;
1385
1386 dev = dev_get_by_index(&init_net, op->ifindex);
1387 if (dev) {
1388 bcm_rx_unreg(dev, op);
1389 dev_put(dev);
1390 }
1391 }
1392 } else
1393 can_rx_unregister(NULL, op->can_id,
1394 REGMASK(op->can_id),
1395 bcm_rx_handler, op);
1396
1397 bcm_remove_op(op);
1398 }
1399
1400 /* remove procfs entry */
1401 if (proc_dir && bo->bcm_proc_read)
1402 remove_proc_entry(bo->procname, proc_dir);
1403
1404 /* remove device reference */
1405 if (bo->bound) {
1406 bo->bound = 0;
1407 bo->ifindex = 0;
1408 }
1409
1410 release_sock(sk);
1411 sock_put(sk);
1412
1413 return 0;
1414}
1415
1416static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
1417 int flags)
1418{
1419 struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
1420 struct sock *sk = sock->sk;
1421 struct bcm_sock *bo = bcm_sk(sk);
1422
1423 if (bo->bound)
1424 return -EISCONN;
1425
1426 /* bind a device to this socket */
1427 if (addr->can_ifindex) {
1428 struct net_device *dev;
1429
1430 dev = dev_get_by_index(&init_net, addr->can_ifindex);
1431 if (!dev)
1432 return -ENODEV;
1433
1434 if (dev->type != ARPHRD_CAN) {
1435 dev_put(dev);
1436 return -ENODEV;
1437 }
1438
1439 bo->ifindex = dev->ifindex;
1440 dev_put(dev);
1441
1442 } else {
1443 /* no interface reference for ifindex = 0 ('any' CAN device) */
1444 bo->ifindex = 0;
1445 }
1446
1447 bo->bound = 1;
1448
1449 if (proc_dir) {
1450 /* unique socket address as filename */
1451 sprintf(bo->procname, "%p", sock);
1452 bo->bcm_proc_read = create_proc_read_entry(bo->procname, 0644,
1453 proc_dir,
1454 bcm_read_proc, sk);
1455 }
1456
1457 return 0;
1458}
1459
1460static int bcm_recvmsg(struct kiocb *iocb, struct socket *sock,
1461 struct msghdr *msg, size_t size, int flags)
1462{
1463 struct sock *sk = sock->sk;
1464 struct sk_buff *skb;
1465 int error = 0;
1466 int noblock;
1467 int err;
1468
1469 noblock = flags & MSG_DONTWAIT;
1470 flags &= ~MSG_DONTWAIT;
1471 skb = skb_recv_datagram(sk, flags, noblock, &error);
1472 if (!skb)
1473 return error;
1474
1475 if (skb->len < size)
1476 size = skb->len;
1477
1478 err = memcpy_toiovec(msg->msg_iov, skb->data, size);
1479 if (err < 0) {
1480 skb_free_datagram(sk, skb);
1481 return err;
1482 }
1483
1484 sock_recv_timestamp(msg, sk, skb);
1485
1486 if (msg->msg_name) {
1487 msg->msg_namelen = sizeof(struct sockaddr_can);
1488 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
1489 }
1490
1491 skb_free_datagram(sk, skb);
1492
1493 return size;
1494}
1495
1496static struct proto_ops bcm_ops __read_mostly = {
1497 .family = PF_CAN,
1498 .release = bcm_release,
1499 .bind = sock_no_bind,
1500 .connect = bcm_connect,
1501 .socketpair = sock_no_socketpair,
1502 .accept = sock_no_accept,
1503 .getname = sock_no_getname,
1504 .poll = datagram_poll,
1505 .ioctl = NULL, /* use can_ioctl() from af_can.c */
1506 .listen = sock_no_listen,
1507 .shutdown = sock_no_shutdown,
1508 .setsockopt = sock_no_setsockopt,
1509 .getsockopt = sock_no_getsockopt,
1510 .sendmsg = bcm_sendmsg,
1511 .recvmsg = bcm_recvmsg,
1512 .mmap = sock_no_mmap,
1513 .sendpage = sock_no_sendpage,
1514};
1515
1516static struct proto bcm_proto __read_mostly = {
1517 .name = "CAN_BCM",
1518 .owner = THIS_MODULE,
1519 .obj_size = sizeof(struct bcm_sock),
1520 .init = bcm_init,
1521};
1522
1523static struct can_proto bcm_can_proto __read_mostly = {
1524 .type = SOCK_DGRAM,
1525 .protocol = CAN_BCM,
1526 .capability = -1,
1527 .ops = &bcm_ops,
1528 .prot = &bcm_proto,
1529};
1530
1531static int __init bcm_module_init(void)
1532{
1533 int err;
1534
1535 printk(banner);
1536
1537 err = can_proto_register(&bcm_can_proto);
1538 if (err < 0) {
1539 printk(KERN_ERR "can: registration of bcm protocol failed\n");
1540 return err;
1541 }
1542
1543 /* create /proc/net/can-bcm directory */
1544 proc_dir = proc_mkdir("can-bcm", init_net.proc_net);
1545
1546 if (proc_dir)
1547 proc_dir->owner = THIS_MODULE;
1548
1549 return 0;
1550}
1551
1552static void __exit bcm_module_exit(void)
1553{
1554 can_proto_unregister(&bcm_can_proto);
1555
1556 if (proc_dir)
1557 proc_net_remove(&init_net, "can-bcm");
1558}
1559
1560module_init(bcm_module_init);
1561module_exit(bcm_module_exit);
diff --git a/net/can/proc.c b/net/can/proc.c
new file mode 100644
index 000000000000..520fef5e5398
--- /dev/null
+++ b/net/can/proc.c
@@ -0,0 +1,533 @@
1/*
2 * proc.c - procfs support for Protocol family CAN core module
3 *
4 * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of Volkswagen nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * Alternatively, provided that this notice is retained in full, this
20 * software may be distributed under the terms of the GNU General
21 * Public License ("GPL") version 2, in which case the provisions of the
22 * GPL apply INSTEAD OF those given above.
23 *
24 * The provided data structures and external interfaces from this code
25 * are not restricted to be used by modules with a GPL compatible license.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
38 * DAMAGE.
39 *
40 * Send feedback to <socketcan-users@lists.berlios.de>
41 *
42 */
43
44#include <linux/module.h>
45#include <linux/proc_fs.h>
46#include <linux/list.h>
47#include <linux/rcupdate.h>
48#include <linux/can/core.h>
49
50#include "af_can.h"
51
52/*
53 * proc filenames for the PF_CAN core
54 */
55
56#define CAN_PROC_VERSION "version"
57#define CAN_PROC_STATS "stats"
58#define CAN_PROC_RESET_STATS "reset_stats"
59#define CAN_PROC_RCVLIST_ALL "rcvlist_all"
60#define CAN_PROC_RCVLIST_FIL "rcvlist_fil"
61#define CAN_PROC_RCVLIST_INV "rcvlist_inv"
62#define CAN_PROC_RCVLIST_SFF "rcvlist_sff"
63#define CAN_PROC_RCVLIST_EFF "rcvlist_eff"
64#define CAN_PROC_RCVLIST_ERR "rcvlist_err"
65
66static struct proc_dir_entry *can_dir;
67static struct proc_dir_entry *pde_version;
68static struct proc_dir_entry *pde_stats;
69static struct proc_dir_entry *pde_reset_stats;
70static struct proc_dir_entry *pde_rcvlist_all;
71static struct proc_dir_entry *pde_rcvlist_fil;
72static struct proc_dir_entry *pde_rcvlist_inv;
73static struct proc_dir_entry *pde_rcvlist_sff;
74static struct proc_dir_entry *pde_rcvlist_eff;
75static struct proc_dir_entry *pde_rcvlist_err;
76
77static int user_reset;
78
79static const char rx_list_name[][8] = {
80 [RX_ERR] = "rx_err",
81 [RX_ALL] = "rx_all",
82 [RX_FIL] = "rx_fil",
83 [RX_INV] = "rx_inv",
84 [RX_EFF] = "rx_eff",
85};
86
87/*
88 * af_can statistics stuff
89 */
90
91static void can_init_stats(void)
92{
93 /*
94 * This memset function is called from a timer context (when
95 * can_stattimer is active which is the default) OR in a process
96 * context (reading the proc_fs when can_stattimer is disabled).
97 */
98 memset(&can_stats, 0, sizeof(can_stats));
99 can_stats.jiffies_init = jiffies;
100
101 can_pstats.stats_reset++;
102
103 if (user_reset) {
104 user_reset = 0;
105 can_pstats.user_reset++;
106 }
107}
108
109static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif,
110 unsigned long count)
111{
112 unsigned long rate;
113
114 if (oldjif == newjif)
115 return 0;
116
117 /* see can_stat_update() - this should NEVER happen! */
118 if (count > (ULONG_MAX / HZ)) {
119 printk(KERN_ERR "can: calc_rate: count exceeded! %ld\n",
120 count);
121 return 99999999;
122 }
123
124 rate = (count * HZ) / (newjif - oldjif);
125
126 return rate;
127}
128
129void can_stat_update(unsigned long data)
130{
131 unsigned long j = jiffies; /* snapshot */
132
133 /* restart counting in timer context on user request */
134 if (user_reset)
135 can_init_stats();
136
137 /* restart counting on jiffies overflow */
138 if (j < can_stats.jiffies_init)
139 can_init_stats();
140
141 /* prevent overflow in calc_rate() */
142 if (can_stats.rx_frames > (ULONG_MAX / HZ))
143 can_init_stats();
144
145 /* prevent overflow in calc_rate() */
146 if (can_stats.tx_frames > (ULONG_MAX / HZ))
147 can_init_stats();
148
149 /* matches overflow - very improbable */
150 if (can_stats.matches > (ULONG_MAX / 100))
151 can_init_stats();
152
153 /* calc total values */
154 if (can_stats.rx_frames)
155 can_stats.total_rx_match_ratio = (can_stats.matches * 100) /
156 can_stats.rx_frames;
157
158 can_stats.total_tx_rate = calc_rate(can_stats.jiffies_init, j,
159 can_stats.tx_frames);
160 can_stats.total_rx_rate = calc_rate(can_stats.jiffies_init, j,
161 can_stats.rx_frames);
162
163 /* calc current values */
164 if (can_stats.rx_frames_delta)
165 can_stats.current_rx_match_ratio =
166 (can_stats.matches_delta * 100) /
167 can_stats.rx_frames_delta;
168
169 can_stats.current_tx_rate = calc_rate(0, HZ, can_stats.tx_frames_delta);
170 can_stats.current_rx_rate = calc_rate(0, HZ, can_stats.rx_frames_delta);
171
172 /* check / update maximum values */
173 if (can_stats.max_tx_rate < can_stats.current_tx_rate)
174 can_stats.max_tx_rate = can_stats.current_tx_rate;
175
176 if (can_stats.max_rx_rate < can_stats.current_rx_rate)
177 can_stats.max_rx_rate = can_stats.current_rx_rate;
178
179 if (can_stats.max_rx_match_ratio < can_stats.current_rx_match_ratio)
180 can_stats.max_rx_match_ratio = can_stats.current_rx_match_ratio;
181
182 /* clear values for 'current rate' calculation */
183 can_stats.tx_frames_delta = 0;
184 can_stats.rx_frames_delta = 0;
185 can_stats.matches_delta = 0;
186
187 /* restart timer (one second) */
188 mod_timer(&can_stattimer, round_jiffies(jiffies + HZ));
189}
190
191/*
192 * proc read functions
193 *
194 * From known use-cases we expect about 10 entries in a receive list to be
195 * printed in the proc_fs. So PAGE_SIZE is definitely enough space here.
196 *
197 */
198
199static int can_print_rcvlist(char *page, int len, struct hlist_head *rx_list,
200 struct net_device *dev)
201{
202 struct receiver *r;
203 struct hlist_node *n;
204
205 rcu_read_lock();
206 hlist_for_each_entry_rcu(r, n, rx_list, list) {
207 char *fmt = (r->can_id & CAN_EFF_FLAG)?
208 " %-5s %08X %08x %08x %08x %8ld %s\n" :
209 " %-5s %03X %08x %08lx %08lx %8ld %s\n";
210
211 len += snprintf(page + len, PAGE_SIZE - len, fmt,
212 DNAME(dev), r->can_id, r->mask,
213 (unsigned long)r->func, (unsigned long)r->data,
214 r->matches, r->ident);
215
216 /* does a typical line fit into the current buffer? */
217
218 /* 100 Bytes before end of buffer */
219 if (len > PAGE_SIZE - 100) {
220 /* mark output cut off */
221 len += snprintf(page + len, PAGE_SIZE - len,
222 " (..)\n");
223 break;
224 }
225 }
226 rcu_read_unlock();
227
228 return len;
229}
230
231static int can_print_recv_banner(char *page, int len)
232{
233 /*
234 * can1. 00000000 00000000 00000000
235 * ....... 0 tp20
236 */
237 len += snprintf(page + len, PAGE_SIZE - len,
238 " device can_id can_mask function"
239 " userdata matches ident\n");
240
241 return len;
242}
243
244static int can_proc_read_stats(char *page, char **start, off_t off,
245 int count, int *eof, void *data)
246{
247 int len = 0;
248
249 len += snprintf(page + len, PAGE_SIZE - len, "\n");
250 len += snprintf(page + len, PAGE_SIZE - len,
251 " %8ld transmitted frames (TXF)\n",
252 can_stats.tx_frames);
253 len += snprintf(page + len, PAGE_SIZE - len,
254 " %8ld received frames (RXF)\n", can_stats.rx_frames);
255 len += snprintf(page + len, PAGE_SIZE - len,
256 " %8ld matched frames (RXMF)\n", can_stats.matches);
257
258 len += snprintf(page + len, PAGE_SIZE - len, "\n");
259
260 if (can_stattimer.function == can_stat_update) {
261 len += snprintf(page + len, PAGE_SIZE - len,
262 " %8ld %% total match ratio (RXMR)\n",
263 can_stats.total_rx_match_ratio);
264
265 len += snprintf(page + len, PAGE_SIZE - len,
266 " %8ld frames/s total tx rate (TXR)\n",
267 can_stats.total_tx_rate);
268 len += snprintf(page + len, PAGE_SIZE - len,
269 " %8ld frames/s total rx rate (RXR)\n",
270 can_stats.total_rx_rate);
271
272 len += snprintf(page + len, PAGE_SIZE - len, "\n");
273
274 len += snprintf(page + len, PAGE_SIZE - len,
275 " %8ld %% current match ratio (CRXMR)\n",
276 can_stats.current_rx_match_ratio);
277
278 len += snprintf(page + len, PAGE_SIZE - len,
279 " %8ld frames/s current tx rate (CTXR)\n",
280 can_stats.current_tx_rate);
281 len += snprintf(page + len, PAGE_SIZE - len,
282 " %8ld frames/s current rx rate (CRXR)\n",
283 can_stats.current_rx_rate);
284
285 len += snprintf(page + len, PAGE_SIZE - len, "\n");
286
287 len += snprintf(page + len, PAGE_SIZE - len,
288 " %8ld %% max match ratio (MRXMR)\n",
289 can_stats.max_rx_match_ratio);
290
291 len += snprintf(page + len, PAGE_SIZE - len,
292 " %8ld frames/s max tx rate (MTXR)\n",
293 can_stats.max_tx_rate);
294 len += snprintf(page + len, PAGE_SIZE - len,
295 " %8ld frames/s max rx rate (MRXR)\n",
296 can_stats.max_rx_rate);
297
298 len += snprintf(page + len, PAGE_SIZE - len, "\n");
299 }
300
301 len += snprintf(page + len, PAGE_SIZE - len,
302 " %8ld current receive list entries (CRCV)\n",
303 can_pstats.rcv_entries);
304 len += snprintf(page + len, PAGE_SIZE - len,
305 " %8ld maximum receive list entries (MRCV)\n",
306 can_pstats.rcv_entries_max);
307
308 if (can_pstats.stats_reset)
309 len += snprintf(page + len, PAGE_SIZE - len,
310 "\n %8ld statistic resets (STR)\n",
311 can_pstats.stats_reset);
312
313 if (can_pstats.user_reset)
314 len += snprintf(page + len, PAGE_SIZE - len,
315 " %8ld user statistic resets (USTR)\n",
316 can_pstats.user_reset);
317
318 len += snprintf(page + len, PAGE_SIZE - len, "\n");
319
320 *eof = 1;
321 return len;
322}
323
324static int can_proc_read_reset_stats(char *page, char **start, off_t off,
325 int count, int *eof, void *data)
326{
327 int len = 0;
328
329 user_reset = 1;
330
331 if (can_stattimer.function == can_stat_update) {
332 len += snprintf(page + len, PAGE_SIZE - len,
333 "Scheduled statistic reset #%ld.\n",
334 can_pstats.stats_reset + 1);
335
336 } else {
337 if (can_stats.jiffies_init != jiffies)
338 can_init_stats();
339
340 len += snprintf(page + len, PAGE_SIZE - len,
341 "Performed statistic reset #%ld.\n",
342 can_pstats.stats_reset);
343 }
344
345 *eof = 1;
346 return len;
347}
348
349static int can_proc_read_version(char *page, char **start, off_t off,
350 int count, int *eof, void *data)
351{
352 int len = 0;
353
354 len += snprintf(page + len, PAGE_SIZE - len, "%s\n",
355 CAN_VERSION_STRING);
356 *eof = 1;
357 return len;
358}
359
360static int can_proc_read_rcvlist(char *page, char **start, off_t off,
361 int count, int *eof, void *data)
362{
363 /* double cast to prevent GCC warning */
364 int idx = (int)(long)data;
365 int len = 0;
366 struct dev_rcv_lists *d;
367 struct hlist_node *n;
368
369 len += snprintf(page + len, PAGE_SIZE - len,
370 "\nreceive list '%s':\n", rx_list_name[idx]);
371
372 rcu_read_lock();
373 hlist_for_each_entry_rcu(d, n, &can_rx_dev_list, list) {
374
375 if (!hlist_empty(&d->rx[idx])) {
376 len = can_print_recv_banner(page, len);
377 len = can_print_rcvlist(page, len, &d->rx[idx], d->dev);
378 } else
379 len += snprintf(page + len, PAGE_SIZE - len,
380 " (%s: no entry)\n", DNAME(d->dev));
381
382 /* exit on end of buffer? */
383 if (len > PAGE_SIZE - 100)
384 break;
385 }
386 rcu_read_unlock();
387
388 len += snprintf(page + len, PAGE_SIZE - len, "\n");
389
390 *eof = 1;
391 return len;
392}
393
394static int can_proc_read_rcvlist_sff(char *page, char **start, off_t off,
395 int count, int *eof, void *data)
396{
397 int len = 0;
398 struct dev_rcv_lists *d;
399 struct hlist_node *n;
400
401 /* RX_SFF */
402 len += snprintf(page + len, PAGE_SIZE - len,
403 "\nreceive list 'rx_sff':\n");
404
405 rcu_read_lock();
406 hlist_for_each_entry_rcu(d, n, &can_rx_dev_list, list) {
407 int i, all_empty = 1;
408 /* check wether at least one list is non-empty */
409 for (i = 0; i < 0x800; i++)
410 if (!hlist_empty(&d->rx_sff[i])) {
411 all_empty = 0;
412 break;
413 }
414
415 if (!all_empty) {
416 len = can_print_recv_banner(page, len);
417 for (i = 0; i < 0x800; i++) {
418 if (!hlist_empty(&d->rx_sff[i]) &&
419 len < PAGE_SIZE - 100)
420 len = can_print_rcvlist(page, len,
421 &d->rx_sff[i],
422 d->dev);
423 }
424 } else
425 len += snprintf(page + len, PAGE_SIZE - len,
426 " (%s: no entry)\n", DNAME(d->dev));
427
428 /* exit on end of buffer? */
429 if (len > PAGE_SIZE - 100)
430 break;
431 }
432 rcu_read_unlock();
433
434 len += snprintf(page + len, PAGE_SIZE - len, "\n");
435
436 *eof = 1;
437 return len;
438}
439
440/*
441 * proc utility functions
442 */
443
444static struct proc_dir_entry *can_create_proc_readentry(const char *name,
445 mode_t mode,
446 read_proc_t *read_proc,
447 void *data)
448{
449 if (can_dir)
450 return create_proc_read_entry(name, mode, can_dir, read_proc,
451 data);
452 else
453 return NULL;
454}
455
456static void can_remove_proc_readentry(const char *name)
457{
458 if (can_dir)
459 remove_proc_entry(name, can_dir);
460}
461
462/*
463 * can_init_proc - create main CAN proc directory and procfs entries
464 */
465void can_init_proc(void)
466{
467 /* create /proc/net/can directory */
468 can_dir = proc_mkdir("can", init_net.proc_net);
469
470 if (!can_dir) {
471 printk(KERN_INFO "can: failed to create /proc/net/can . "
472 "CONFIG_PROC_FS missing?\n");
473 return;
474 }
475
476 can_dir->owner = THIS_MODULE;
477
478 /* own procfs entries from the AF_CAN core */
479 pde_version = can_create_proc_readentry(CAN_PROC_VERSION, 0644,
480 can_proc_read_version, NULL);
481 pde_stats = can_create_proc_readentry(CAN_PROC_STATS, 0644,
482 can_proc_read_stats, NULL);
483 pde_reset_stats = can_create_proc_readentry(CAN_PROC_RESET_STATS, 0644,
484 can_proc_read_reset_stats, NULL);
485 pde_rcvlist_err = can_create_proc_readentry(CAN_PROC_RCVLIST_ERR, 0644,
486 can_proc_read_rcvlist, (void *)RX_ERR);
487 pde_rcvlist_all = can_create_proc_readentry(CAN_PROC_RCVLIST_ALL, 0644,
488 can_proc_read_rcvlist, (void *)RX_ALL);
489 pde_rcvlist_fil = can_create_proc_readentry(CAN_PROC_RCVLIST_FIL, 0644,
490 can_proc_read_rcvlist, (void *)RX_FIL);
491 pde_rcvlist_inv = can_create_proc_readentry(CAN_PROC_RCVLIST_INV, 0644,
492 can_proc_read_rcvlist, (void *)RX_INV);
493 pde_rcvlist_eff = can_create_proc_readentry(CAN_PROC_RCVLIST_EFF, 0644,
494 can_proc_read_rcvlist, (void *)RX_EFF);
495 pde_rcvlist_sff = can_create_proc_readentry(CAN_PROC_RCVLIST_SFF, 0644,
496 can_proc_read_rcvlist_sff, NULL);
497}
498
499/*
500 * can_remove_proc - remove procfs entries and main CAN proc directory
501 */
502void can_remove_proc(void)
503{
504 if (pde_version)
505 can_remove_proc_readentry(CAN_PROC_VERSION);
506
507 if (pde_stats)
508 can_remove_proc_readentry(CAN_PROC_STATS);
509
510 if (pde_reset_stats)
511 can_remove_proc_readentry(CAN_PROC_RESET_STATS);
512
513 if (pde_rcvlist_err)
514 can_remove_proc_readentry(CAN_PROC_RCVLIST_ERR);
515
516 if (pde_rcvlist_all)
517 can_remove_proc_readentry(CAN_PROC_RCVLIST_ALL);
518
519 if (pde_rcvlist_fil)
520 can_remove_proc_readentry(CAN_PROC_RCVLIST_FIL);
521
522 if (pde_rcvlist_inv)
523 can_remove_proc_readentry(CAN_PROC_RCVLIST_INV);
524
525 if (pde_rcvlist_eff)
526 can_remove_proc_readentry(CAN_PROC_RCVLIST_EFF);
527
528 if (pde_rcvlist_sff)
529 can_remove_proc_readentry(CAN_PROC_RCVLIST_SFF);
530
531 if (can_dir)
532 proc_net_remove(&init_net, "can");
533}
diff --git a/net/can/raw.c b/net/can/raw.c
new file mode 100644
index 000000000000..aeefd1419d00
--- /dev/null
+++ b/net/can/raw.c
@@ -0,0 +1,763 @@
1/*
2 * raw.c - Raw sockets for protocol family CAN
3 *
4 * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of Volkswagen nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * Alternatively, provided that this notice is retained in full, this
20 * software may be distributed under the terms of the GNU General
21 * Public License ("GPL") version 2, in which case the provisions of the
22 * GPL apply INSTEAD OF those given above.
23 *
24 * The provided data structures and external interfaces from this code
25 * are not restricted to be used by modules with a GPL compatible license.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
38 * DAMAGE.
39 *
40 * Send feedback to <socketcan-users@lists.berlios.de>
41 *
42 */
43
44#include <linux/module.h>
45#include <linux/init.h>
46#include <linux/uio.h>
47#include <linux/net.h>
48#include <linux/netdevice.h>
49#include <linux/socket.h>
50#include <linux/if_arp.h>
51#include <linux/skbuff.h>
52#include <linux/can.h>
53#include <linux/can/core.h>
54#include <linux/can/raw.h>
55#include <net/sock.h>
56#include <net/net_namespace.h>
57
58#define CAN_RAW_VERSION CAN_VERSION
59static __initdata const char banner[] =
60 KERN_INFO "can: raw protocol (rev " CAN_RAW_VERSION ")\n";
61
62MODULE_DESCRIPTION("PF_CAN raw protocol");
63MODULE_LICENSE("Dual BSD/GPL");
64MODULE_AUTHOR("Urs Thuermann <urs.thuermann@volkswagen.de>");
65
66#define MASK_ALL 0
67
68/*
69 * A raw socket has a list of can_filters attached to it, each receiving
70 * the CAN frames matching that filter. If the filter list is empty,
71 * no CAN frames will be received by the socket. The default after
72 * opening the socket, is to have one filter which receives all frames.
73 * The filter list is allocated dynamically with the exception of the
74 * list containing only one item. This common case is optimized by
75 * storing the single filter in dfilter, to avoid using dynamic memory.
76 */
77
78struct raw_sock {
79 struct sock sk;
80 int bound;
81 int ifindex;
82 struct notifier_block notifier;
83 int loopback;
84 int recv_own_msgs;
85 int count; /* number of active filters */
86 struct can_filter dfilter; /* default/single filter */
87 struct can_filter *filter; /* pointer to filter(s) */
88 can_err_mask_t err_mask;
89};
90
91static inline struct raw_sock *raw_sk(const struct sock *sk)
92{
93 return (struct raw_sock *)sk;
94}
95
96static void raw_rcv(struct sk_buff *skb, void *data)
97{
98 struct sock *sk = (struct sock *)data;
99 struct raw_sock *ro = raw_sk(sk);
100 struct sockaddr_can *addr;
101 int error;
102
103 if (!ro->recv_own_msgs) {
104 /* check the received tx sock reference */
105 if (skb->sk == sk) {
106 kfree_skb(skb);
107 return;
108 }
109 }
110
111 /*
112 * Put the datagram to the queue so that raw_recvmsg() can
113 * get it from there. We need to pass the interface index to
114 * raw_recvmsg(). We pass a whole struct sockaddr_can in skb->cb
115 * containing the interface index.
116 */
117
118 BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct sockaddr_can));
119 addr = (struct sockaddr_can *)skb->cb;
120 memset(addr, 0, sizeof(*addr));
121 addr->can_family = AF_CAN;
122 addr->can_ifindex = skb->dev->ifindex;
123
124 error = sock_queue_rcv_skb(sk, skb);
125 if (error < 0)
126 kfree_skb(skb);
127}
128
129static int raw_enable_filters(struct net_device *dev, struct sock *sk,
130 struct can_filter *filter,
131 int count)
132{
133 int err = 0;
134 int i;
135
136 for (i = 0; i < count; i++) {
137 err = can_rx_register(dev, filter[i].can_id,
138 filter[i].can_mask,
139 raw_rcv, sk, "raw");
140 if (err) {
141 /* clean up successfully registered filters */
142 while (--i >= 0)
143 can_rx_unregister(dev, filter[i].can_id,
144 filter[i].can_mask,
145 raw_rcv, sk);
146 break;
147 }
148 }
149
150 return err;
151}
152
153static int raw_enable_errfilter(struct net_device *dev, struct sock *sk,
154 can_err_mask_t err_mask)
155{
156 int err = 0;
157
158 if (err_mask)
159 err = can_rx_register(dev, 0, err_mask | CAN_ERR_FLAG,
160 raw_rcv, sk, "raw");
161
162 return err;
163}
164
165static void raw_disable_filters(struct net_device *dev, struct sock *sk,
166 struct can_filter *filter,
167 int count)
168{
169 int i;
170
171 for (i = 0; i < count; i++)
172 can_rx_unregister(dev, filter[i].can_id, filter[i].can_mask,
173 raw_rcv, sk);
174}
175
176static inline void raw_disable_errfilter(struct net_device *dev,
177 struct sock *sk,
178 can_err_mask_t err_mask)
179
180{
181 if (err_mask)
182 can_rx_unregister(dev, 0, err_mask | CAN_ERR_FLAG,
183 raw_rcv, sk);
184}
185
186static inline void raw_disable_allfilters(struct net_device *dev,
187 struct sock *sk)
188{
189 struct raw_sock *ro = raw_sk(sk);
190
191 raw_disable_filters(dev, sk, ro->filter, ro->count);
192 raw_disable_errfilter(dev, sk, ro->err_mask);
193}
194
195static int raw_enable_allfilters(struct net_device *dev, struct sock *sk)
196{
197 struct raw_sock *ro = raw_sk(sk);
198 int err;
199
200 err = raw_enable_filters(dev, sk, ro->filter, ro->count);
201 if (!err) {
202 err = raw_enable_errfilter(dev, sk, ro->err_mask);
203 if (err)
204 raw_disable_filters(dev, sk, ro->filter, ro->count);
205 }
206
207 return err;
208}
209
210static int raw_notifier(struct notifier_block *nb,
211 unsigned long msg, void *data)
212{
213 struct net_device *dev = (struct net_device *)data;
214 struct raw_sock *ro = container_of(nb, struct raw_sock, notifier);
215 struct sock *sk = &ro->sk;
216
217 if (dev->nd_net != &init_net)
218 return NOTIFY_DONE;
219
220 if (dev->type != ARPHRD_CAN)
221 return NOTIFY_DONE;
222
223 if (ro->ifindex != dev->ifindex)
224 return NOTIFY_DONE;
225
226 switch (msg) {
227
228 case NETDEV_UNREGISTER:
229 lock_sock(sk);
230 /* remove current filters & unregister */
231 if (ro->bound)
232 raw_disable_allfilters(dev, sk);
233
234 if (ro->count > 1)
235 kfree(ro->filter);
236
237 ro->ifindex = 0;
238 ro->bound = 0;
239 ro->count = 0;
240 release_sock(sk);
241
242 sk->sk_err = ENODEV;
243 if (!sock_flag(sk, SOCK_DEAD))
244 sk->sk_error_report(sk);
245 break;
246
247 case NETDEV_DOWN:
248 sk->sk_err = ENETDOWN;
249 if (!sock_flag(sk, SOCK_DEAD))
250 sk->sk_error_report(sk);
251 break;
252 }
253
254 return NOTIFY_DONE;
255}
256
257static int raw_init(struct sock *sk)
258{
259 struct raw_sock *ro = raw_sk(sk);
260
261 ro->bound = 0;
262 ro->ifindex = 0;
263
264 /* set default filter to single entry dfilter */
265 ro->dfilter.can_id = 0;
266 ro->dfilter.can_mask = MASK_ALL;
267 ro->filter = &ro->dfilter;
268 ro->count = 1;
269
270 /* set default loopback behaviour */
271 ro->loopback = 1;
272 ro->recv_own_msgs = 0;
273
274 /* set notifier */
275 ro->notifier.notifier_call = raw_notifier;
276
277 register_netdevice_notifier(&ro->notifier);
278
279 return 0;
280}
281
282static int raw_release(struct socket *sock)
283{
284 struct sock *sk = sock->sk;
285 struct raw_sock *ro = raw_sk(sk);
286
287 unregister_netdevice_notifier(&ro->notifier);
288
289 lock_sock(sk);
290
291 /* remove current filters & unregister */
292 if (ro->bound) {
293 if (ro->ifindex) {
294 struct net_device *dev;
295
296 dev = dev_get_by_index(&init_net, ro->ifindex);
297 if (dev) {
298 raw_disable_allfilters(dev, sk);
299 dev_put(dev);
300 }
301 } else
302 raw_disable_allfilters(NULL, sk);
303 }
304
305 if (ro->count > 1)
306 kfree(ro->filter);
307
308 ro->ifindex = 0;
309 ro->bound = 0;
310 ro->count = 0;
311
312 release_sock(sk);
313 sock_put(sk);
314
315 return 0;
316}
317
318static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
319{
320 struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
321 struct sock *sk = sock->sk;
322 struct raw_sock *ro = raw_sk(sk);
323 int ifindex;
324 int err = 0;
325 int notify_enetdown = 0;
326
327 if (len < sizeof(*addr))
328 return -EINVAL;
329
330 lock_sock(sk);
331
332 if (ro->bound && addr->can_ifindex == ro->ifindex)
333 goto out;
334
335 if (addr->can_ifindex) {
336 struct net_device *dev;
337
338 dev = dev_get_by_index(&init_net, addr->can_ifindex);
339 if (!dev) {
340 err = -ENODEV;
341 goto out;
342 }
343 if (dev->type != ARPHRD_CAN) {
344 dev_put(dev);
345 err = -ENODEV;
346 goto out;
347 }
348 if (!(dev->flags & IFF_UP))
349 notify_enetdown = 1;
350
351 ifindex = dev->ifindex;
352
353 /* filters set by default/setsockopt */
354 err = raw_enable_allfilters(dev, sk);
355 dev_put(dev);
356
357 } else {
358 ifindex = 0;
359
360 /* filters set by default/setsockopt */
361 err = raw_enable_allfilters(NULL, sk);
362 }
363
364 if (!err) {
365 if (ro->bound) {
366 /* unregister old filters */
367 if (ro->ifindex) {
368 struct net_device *dev;
369
370 dev = dev_get_by_index(&init_net, ro->ifindex);
371 if (dev) {
372 raw_disable_allfilters(dev, sk);
373 dev_put(dev);
374 }
375 } else
376 raw_disable_allfilters(NULL, sk);
377 }
378 ro->ifindex = ifindex;
379 ro->bound = 1;
380 }
381
382 out:
383 release_sock(sk);
384
385 if (notify_enetdown) {
386 sk->sk_err = ENETDOWN;
387 if (!sock_flag(sk, SOCK_DEAD))
388 sk->sk_error_report(sk);
389 }
390
391 return err;
392}
393
394static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
395 int *len, int peer)
396{
397 struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
398 struct sock *sk = sock->sk;
399 struct raw_sock *ro = raw_sk(sk);
400
401 if (peer)
402 return -EOPNOTSUPP;
403
404 addr->can_family = AF_CAN;
405 addr->can_ifindex = ro->ifindex;
406
407 *len = sizeof(*addr);
408
409 return 0;
410}
411
412static int raw_setsockopt(struct socket *sock, int level, int optname,
413 char __user *optval, int optlen)
414{
415 struct sock *sk = sock->sk;
416 struct raw_sock *ro = raw_sk(sk);
417 struct can_filter *filter = NULL; /* dyn. alloc'ed filters */
418 struct can_filter sfilter; /* single filter */
419 struct net_device *dev = NULL;
420 can_err_mask_t err_mask = 0;
421 int count = 0;
422 int err = 0;
423
424 if (level != SOL_CAN_RAW)
425 return -EINVAL;
426 if (optlen < 0)
427 return -EINVAL;
428
429 switch (optname) {
430
431 case CAN_RAW_FILTER:
432 if (optlen % sizeof(struct can_filter) != 0)
433 return -EINVAL;
434
435 count = optlen / sizeof(struct can_filter);
436
437 if (count > 1) {
438 /* filter does not fit into dfilter => alloc space */
439 filter = kmalloc(optlen, GFP_KERNEL);
440 if (!filter)
441 return -ENOMEM;
442
443 err = copy_from_user(filter, optval, optlen);
444 if (err) {
445 kfree(filter);
446 return err;
447 }
448 } else if (count == 1) {
449 err = copy_from_user(&sfilter, optval, optlen);
450 if (err)
451 return err;
452 }
453
454 lock_sock(sk);
455
456 if (ro->bound && ro->ifindex)
457 dev = dev_get_by_index(&init_net, ro->ifindex);
458
459 if (ro->bound) {
460 /* (try to) register the new filters */
461 if (count == 1)
462 err = raw_enable_filters(dev, sk, &sfilter, 1);
463 else
464 err = raw_enable_filters(dev, sk, filter,
465 count);
466 if (err) {
467 if (count > 1)
468 kfree(filter);
469
470 goto out_fil;
471 }
472
473 /* remove old filter registrations */
474 raw_disable_filters(dev, sk, ro->filter, ro->count);
475 }
476
477 /* remove old filter space */
478 if (ro->count > 1)
479 kfree(ro->filter);
480
481 /* link new filters to the socket */
482 if (count == 1) {
483 /* copy filter data for single filter */
484 ro->dfilter = sfilter;
485 filter = &ro->dfilter;
486 }
487 ro->filter = filter;
488 ro->count = count;
489
490 out_fil:
491 if (dev)
492 dev_put(dev);
493
494 release_sock(sk);
495
496 break;
497
498 case CAN_RAW_ERR_FILTER:
499 if (optlen != sizeof(err_mask))
500 return -EINVAL;
501
502 err = copy_from_user(&err_mask, optval, optlen);
503 if (err)
504 return err;
505
506 err_mask &= CAN_ERR_MASK;
507
508 lock_sock(sk);
509
510 if (ro->bound && ro->ifindex)
511 dev = dev_get_by_index(&init_net, ro->ifindex);
512
513 /* remove current error mask */
514 if (ro->bound) {
515 /* (try to) register the new err_mask */
516 err = raw_enable_errfilter(dev, sk, err_mask);
517
518 if (err)
519 goto out_err;
520
521 /* remove old err_mask registration */
522 raw_disable_errfilter(dev, sk, ro->err_mask);
523 }
524
525 /* link new err_mask to the socket */
526 ro->err_mask = err_mask;
527
528 out_err:
529 if (dev)
530 dev_put(dev);
531
532 release_sock(sk);
533
534 break;
535
536 case CAN_RAW_LOOPBACK:
537 if (optlen != sizeof(ro->loopback))
538 return -EINVAL;
539
540 err = copy_from_user(&ro->loopback, optval, optlen);
541
542 break;
543
544 case CAN_RAW_RECV_OWN_MSGS:
545 if (optlen != sizeof(ro->recv_own_msgs))
546 return -EINVAL;
547
548 err = copy_from_user(&ro->recv_own_msgs, optval, optlen);
549
550 break;
551
552 default:
553 return -ENOPROTOOPT;
554 }
555 return err;
556}
557
558static int raw_getsockopt(struct socket *sock, int level, int optname,
559 char __user *optval, int __user *optlen)
560{
561 struct sock *sk = sock->sk;
562 struct raw_sock *ro = raw_sk(sk);
563 int len;
564 void *val;
565 int err = 0;
566
567 if (level != SOL_CAN_RAW)
568 return -EINVAL;
569 if (get_user(len, optlen))
570 return -EFAULT;
571 if (len < 0)
572 return -EINVAL;
573
574 switch (optname) {
575
576 case CAN_RAW_FILTER:
577 lock_sock(sk);
578 if (ro->count > 0) {
579 int fsize = ro->count * sizeof(struct can_filter);
580 if (len > fsize)
581 len = fsize;
582 err = copy_to_user(optval, ro->filter, len);
583 } else
584 len = 0;
585 release_sock(sk);
586
587 if (!err)
588 err = put_user(len, optlen);
589 return err;
590
591 case CAN_RAW_ERR_FILTER:
592 if (len > sizeof(can_err_mask_t))
593 len = sizeof(can_err_mask_t);
594 val = &ro->err_mask;
595 break;
596
597 case CAN_RAW_LOOPBACK:
598 if (len > sizeof(int))
599 len = sizeof(int);
600 val = &ro->loopback;
601 break;
602
603 case CAN_RAW_RECV_OWN_MSGS:
604 if (len > sizeof(int))
605 len = sizeof(int);
606 val = &ro->recv_own_msgs;
607 break;
608
609 default:
610 return -ENOPROTOOPT;
611 }
612
613 if (put_user(len, optlen))
614 return -EFAULT;
615 if (copy_to_user(optval, val, len))
616 return -EFAULT;
617 return 0;
618}
619
620static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
621 struct msghdr *msg, size_t size)
622{
623 struct sock *sk = sock->sk;
624 struct raw_sock *ro = raw_sk(sk);
625 struct sk_buff *skb;
626 struct net_device *dev;
627 int ifindex;
628 int err;
629
630 if (msg->msg_name) {
631 struct sockaddr_can *addr =
632 (struct sockaddr_can *)msg->msg_name;
633
634 if (addr->can_family != AF_CAN)
635 return -EINVAL;
636
637 ifindex = addr->can_ifindex;
638 } else
639 ifindex = ro->ifindex;
640
641 dev = dev_get_by_index(&init_net, ifindex);
642 if (!dev)
643 return -ENXIO;
644
645 skb = sock_alloc_send_skb(sk, size, msg->msg_flags & MSG_DONTWAIT,
646 &err);
647 if (!skb) {
648 dev_put(dev);
649 return err;
650 }
651
652 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
653 if (err < 0) {
654 kfree_skb(skb);
655 dev_put(dev);
656 return err;
657 }
658 skb->dev = dev;
659 skb->sk = sk;
660
661 err = can_send(skb, ro->loopback);
662
663 dev_put(dev);
664
665 if (err)
666 return err;
667
668 return size;
669}
670
671static int raw_recvmsg(struct kiocb *iocb, struct socket *sock,
672 struct msghdr *msg, size_t size, int flags)
673{
674 struct sock *sk = sock->sk;
675 struct sk_buff *skb;
676 int error = 0;
677 int noblock;
678
679 noblock = flags & MSG_DONTWAIT;
680 flags &= ~MSG_DONTWAIT;
681
682 skb = skb_recv_datagram(sk, flags, noblock, &error);
683 if (!skb)
684 return error;
685
686 if (size < skb->len)
687 msg->msg_flags |= MSG_TRUNC;
688 else
689 size = skb->len;
690
691 error = memcpy_toiovec(msg->msg_iov, skb->data, size);
692 if (error < 0) {
693 skb_free_datagram(sk, skb);
694 return error;
695 }
696
697 sock_recv_timestamp(msg, sk, skb);
698
699 if (msg->msg_name) {
700 msg->msg_namelen = sizeof(struct sockaddr_can);
701 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
702 }
703
704 skb_free_datagram(sk, skb);
705
706 return size;
707}
708
709static struct proto_ops raw_ops __read_mostly = {
710 .family = PF_CAN,
711 .release = raw_release,
712 .bind = raw_bind,
713 .connect = sock_no_connect,
714 .socketpair = sock_no_socketpair,
715 .accept = sock_no_accept,
716 .getname = raw_getname,
717 .poll = datagram_poll,
718 .ioctl = NULL, /* use can_ioctl() from af_can.c */
719 .listen = sock_no_listen,
720 .shutdown = sock_no_shutdown,
721 .setsockopt = raw_setsockopt,
722 .getsockopt = raw_getsockopt,
723 .sendmsg = raw_sendmsg,
724 .recvmsg = raw_recvmsg,
725 .mmap = sock_no_mmap,
726 .sendpage = sock_no_sendpage,
727};
728
729static struct proto raw_proto __read_mostly = {
730 .name = "CAN_RAW",
731 .owner = THIS_MODULE,
732 .obj_size = sizeof(struct raw_sock),
733 .init = raw_init,
734};
735
736static struct can_proto raw_can_proto __read_mostly = {
737 .type = SOCK_RAW,
738 .protocol = CAN_RAW,
739 .capability = -1,
740 .ops = &raw_ops,
741 .prot = &raw_proto,
742};
743
744static __init int raw_module_init(void)
745{
746 int err;
747
748 printk(banner);
749
750 err = can_proto_register(&raw_can_proto);
751 if (err < 0)
752 printk(KERN_ERR "can: registration of raw protocol failed\n");
753
754 return err;
755}
756
757static __exit void raw_module_exit(void)
758{
759 can_proto_unregister(&raw_can_proto);
760}
761
762module_init(raw_module_init);
763module_exit(raw_module_exit);
diff --git a/net/compat.c b/net/compat.c
index 377e560ab5c9..80013fb69a61 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -20,7 +20,6 @@
20#include <linux/syscalls.h> 20#include <linux/syscalls.h>
21#include <linux/filter.h> 21#include <linux/filter.h>
22#include <linux/compat.h> 22#include <linux/compat.h>
23#include <linux/netfilter_ipv4/ip_tables.h>
24#include <linux/security.h> 23#include <linux/security.h>
25 24
26#include <net/scm.h> 25#include <net/scm.h>
@@ -317,107 +316,6 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm)
317} 316}
318 317
319/* 318/*
320 * For now, we assume that the compatibility and native version
321 * of struct ipt_entry are the same - sfr. FIXME
322 */
323struct compat_ipt_replace {
324 char name[IPT_TABLE_MAXNAMELEN];
325 u32 valid_hooks;
326 u32 num_entries;
327 u32 size;
328 u32 hook_entry[NF_IP_NUMHOOKS];
329 u32 underflow[NF_IP_NUMHOOKS];
330 u32 num_counters;
331 compat_uptr_t counters; /* struct ipt_counters * */
332 struct ipt_entry entries[0];
333};
334
335static int do_netfilter_replace(int fd, int level, int optname,
336 char __user *optval, int optlen)
337{
338 struct compat_ipt_replace __user *urepl;
339 struct ipt_replace __user *repl_nat;
340 char name[IPT_TABLE_MAXNAMELEN];
341 u32 origsize, tmp32, num_counters;
342 unsigned int repl_nat_size;
343 int ret;
344 int i;
345 compat_uptr_t ucntrs;
346
347 urepl = (struct compat_ipt_replace __user *)optval;
348 if (get_user(origsize, &urepl->size))
349 return -EFAULT;
350
351 /* Hack: Causes ipchains to give correct error msg --RR */
352 if (optlen != sizeof(*urepl) + origsize)
353 return -ENOPROTOOPT;
354
355 /* XXX Assumes that size of ipt_entry is the same both in
356 * native and compat environments.
357 */
358 repl_nat_size = sizeof(*repl_nat) + origsize;
359 repl_nat = compat_alloc_user_space(repl_nat_size);
360
361 ret = -EFAULT;
362 if (put_user(origsize, &repl_nat->size))
363 goto out;
364
365 if (!access_ok(VERIFY_READ, urepl, optlen) ||
366 !access_ok(VERIFY_WRITE, repl_nat, optlen))
367 goto out;
368
369 if (__copy_from_user(name, urepl->name, sizeof(urepl->name)) ||
370 __copy_to_user(repl_nat->name, name, sizeof(repl_nat->name)))
371 goto out;
372
373 if (__get_user(tmp32, &urepl->valid_hooks) ||
374 __put_user(tmp32, &repl_nat->valid_hooks))
375 goto out;
376
377 if (__get_user(tmp32, &urepl->num_entries) ||
378 __put_user(tmp32, &repl_nat->num_entries))
379 goto out;
380
381 if (__get_user(num_counters, &urepl->num_counters) ||
382 __put_user(num_counters, &repl_nat->num_counters))
383 goto out;
384
385 if (__get_user(ucntrs, &urepl->counters) ||
386 __put_user(compat_ptr(ucntrs), &repl_nat->counters))
387 goto out;
388
389 if (__copy_in_user(&repl_nat->entries[0],
390 &urepl->entries[0],
391 origsize))
392 goto out;
393
394 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
395 if (__get_user(tmp32, &urepl->hook_entry[i]) ||
396 __put_user(tmp32, &repl_nat->hook_entry[i]) ||
397 __get_user(tmp32, &urepl->underflow[i]) ||
398 __put_user(tmp32, &repl_nat->underflow[i]))
399 goto out;
400 }
401
402 /*
403 * Since struct ipt_counters just contains two u_int64_t members
404 * we can just do the access_ok check here and pass the (converted)
405 * pointer into the standard syscall. We hope that the pointer is
406 * not misaligned ...
407 */
408 if (!access_ok(VERIFY_WRITE, compat_ptr(ucntrs),
409 num_counters * sizeof(struct ipt_counters)))
410 goto out;
411
412
413 ret = sys_setsockopt(fd, level, optname,
414 (char __user *)repl_nat, repl_nat_size);
415
416out:
417 return ret;
418}
419
420/*
421 * A struct sock_filter is architecture independent. 319 * A struct sock_filter is architecture independent.
422 */ 320 */
423struct compat_sock_fprog { 321struct compat_sock_fprog {
@@ -485,10 +383,6 @@ asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
485 int err; 383 int err;
486 struct socket *sock; 384 struct socket *sock;
487 385
488 if (level == SOL_IPV6 && optname == IPT_SO_SET_REPLACE)
489 return do_netfilter_replace(fd, level, optname,
490 optval, optlen);
491
492 if (optlen < 0) 386 if (optlen < 0)
493 return -EINVAL; 387 return -EINVAL;
494 388
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 029b93e246b4..8a28fc93b724 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -115,10 +115,10 @@ out_noerr:
115} 115}
116 116
117/** 117/**
118 * skb_recv_datagram - Receive a datagram skbuff 118 * __skb_recv_datagram - Receive a datagram skbuff
119 * @sk: socket 119 * @sk: socket
120 * @flags: MSG_ flags 120 * @flags: MSG_ flags
121 * @noblock: blocking operation? 121 * @peeked: returns non-zero if this packet has been seen before
122 * @err: error code returned 122 * @err: error code returned
123 * 123 *
124 * Get a datagram skbuff, understands the peeking, nonblocking wakeups 124 * Get a datagram skbuff, understands the peeking, nonblocking wakeups
@@ -143,8 +143,8 @@ out_noerr:
143 * quite explicitly by POSIX 1003.1g, don't change them without having 143 * quite explicitly by POSIX 1003.1g, don't change them without having
144 * the standard around please. 144 * the standard around please.
145 */ 145 */
146struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, 146struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
147 int noblock, int *err) 147 int *peeked, int *err)
148{ 148{
149 struct sk_buff *skb; 149 struct sk_buff *skb;
150 long timeo; 150 long timeo;
@@ -156,7 +156,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
156 if (error) 156 if (error)
157 goto no_packet; 157 goto no_packet;
158 158
159 timeo = sock_rcvtimeo(sk, noblock); 159 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
160 160
161 do { 161 do {
162 /* Again only user level code calls this function, so nothing 162 /* Again only user level code calls this function, so nothing
@@ -165,18 +165,19 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
165 * Look at current nfs client by the way... 165 * Look at current nfs client by the way...
166 * However, this function was corrent in any case. 8) 166 * However, this function was corrent in any case. 8)
167 */ 167 */
168 if (flags & MSG_PEEK) { 168 unsigned long cpu_flags;
169 unsigned long cpu_flags; 169
170 170 spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags);
171 spin_lock_irqsave(&sk->sk_receive_queue.lock, 171 skb = skb_peek(&sk->sk_receive_queue);
172 cpu_flags); 172 if (skb) {
173 skb = skb_peek(&sk->sk_receive_queue); 173 *peeked = skb->peeked;
174 if (skb) 174 if (flags & MSG_PEEK) {
175 skb->peeked = 1;
175 atomic_inc(&skb->users); 176 atomic_inc(&skb->users);
176 spin_unlock_irqrestore(&sk->sk_receive_queue.lock, 177 } else
177 cpu_flags); 178 __skb_unlink(skb, &sk->sk_receive_queue);
178 } else 179 }
179 skb = skb_dequeue(&sk->sk_receive_queue); 180 spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags);
180 181
181 if (skb) 182 if (skb)
182 return skb; 183 return skb;
@@ -194,10 +195,21 @@ no_packet:
194 *err = error; 195 *err = error;
195 return NULL; 196 return NULL;
196} 197}
198EXPORT_SYMBOL(__skb_recv_datagram);
199
200struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
201 int noblock, int *err)
202{
203 int peeked;
204
205 return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
206 &peeked, err);
207}
197 208
198void skb_free_datagram(struct sock *sk, struct sk_buff *skb) 209void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
199{ 210{
200 kfree_skb(skb); 211 kfree_skb(skb);
212 sk_mem_reclaim(sk);
201} 213}
202 214
203/** 215/**
@@ -217,20 +229,28 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
217 * This function currently only disables BH when acquiring the 229 * This function currently only disables BH when acquiring the
218 * sk_receive_queue lock. Therefore it must not be used in a 230 * sk_receive_queue lock. Therefore it must not be used in a
219 * context where that lock is acquired in an IRQ context. 231 * context where that lock is acquired in an IRQ context.
232 *
233 * It returns 0 if the packet was removed by us.
220 */ 234 */
221 235
222void skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) 236int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
223{ 237{
238 int err = 0;
239
224 if (flags & MSG_PEEK) { 240 if (flags & MSG_PEEK) {
241 err = -ENOENT;
225 spin_lock_bh(&sk->sk_receive_queue.lock); 242 spin_lock_bh(&sk->sk_receive_queue.lock);
226 if (skb == skb_peek(&sk->sk_receive_queue)) { 243 if (skb == skb_peek(&sk->sk_receive_queue)) {
227 __skb_unlink(skb, &sk->sk_receive_queue); 244 __skb_unlink(skb, &sk->sk_receive_queue);
228 atomic_dec(&skb->users); 245 atomic_dec(&skb->users);
246 err = 0;
229 } 247 }
230 spin_unlock_bh(&sk->sk_receive_queue.lock); 248 spin_unlock_bh(&sk->sk_receive_queue.lock);
231 } 249 }
232 250
233 kfree_skb(skb); 251 kfree_skb(skb);
252 sk_mem_reclaim(sk);
253 return err;
234} 254}
235 255
236EXPORT_SYMBOL(skb_kill_datagram); 256EXPORT_SYMBOL(skb_kill_datagram);
diff --git a/net/core/dev.c b/net/core/dev.c
index 0879f52115eb..c9c593e1ba6f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -150,8 +150,11 @@
150 * 86DD IPv6 150 * 86DD IPv6
151 */ 151 */
152 152
153#define PTYPE_HASH_SIZE (16)
154#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
155
153static DEFINE_SPINLOCK(ptype_lock); 156static DEFINE_SPINLOCK(ptype_lock);
154static struct list_head ptype_base[16] __read_mostly; /* 16 way hashed list */ 157static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
155static struct list_head ptype_all __read_mostly; /* Taps */ 158static struct list_head ptype_all __read_mostly; /* Taps */
156 159
157#ifdef CONFIG_NET_DMA 160#ifdef CONFIG_NET_DMA
@@ -362,7 +365,7 @@ void dev_add_pack(struct packet_type *pt)
362 if (pt->type == htons(ETH_P_ALL)) 365 if (pt->type == htons(ETH_P_ALL))
363 list_add_rcu(&pt->list, &ptype_all); 366 list_add_rcu(&pt->list, &ptype_all);
364 else { 367 else {
365 hash = ntohs(pt->type) & 15; 368 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
366 list_add_rcu(&pt->list, &ptype_base[hash]); 369 list_add_rcu(&pt->list, &ptype_base[hash]);
367 } 370 }
368 spin_unlock_bh(&ptype_lock); 371 spin_unlock_bh(&ptype_lock);
@@ -391,7 +394,7 @@ void __dev_remove_pack(struct packet_type *pt)
391 if (pt->type == htons(ETH_P_ALL)) 394 if (pt->type == htons(ETH_P_ALL))
392 head = &ptype_all; 395 head = &ptype_all;
393 else 396 else
394 head = &ptype_base[ntohs(pt->type) & 15]; 397 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
395 398
396 list_for_each_entry(pt1, head, list) { 399 list_for_each_entry(pt1, head, list) {
397 if (pt == pt1) { 400 if (pt == pt1) {
@@ -672,7 +675,7 @@ struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *h
672 675
673 ASSERT_RTNL(); 676 ASSERT_RTNL();
674 677
675 for_each_netdev(&init_net, dev) 678 for_each_netdev(net, dev)
676 if (dev->type == type && 679 if (dev->type == type &&
677 !memcmp(dev->dev_addr, ha, dev->addr_len)) 680 !memcmp(dev->dev_addr, ha, dev->addr_len))
678 return dev; 681 return dev;
@@ -1420,7 +1423,8 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1420 } 1423 }
1421 1424
1422 rcu_read_lock(); 1425 rcu_read_lock();
1423 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { 1426 list_for_each_entry_rcu(ptype,
1427 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1424 if (ptype->type == type && !ptype->dev && ptype->gso_segment) { 1428 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1425 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { 1429 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1426 err = ptype->gso_send_check(skb); 1430 err = ptype->gso_send_check(skb);
@@ -2077,7 +2081,8 @@ ncls:
2077 goto out; 2081 goto out;
2078 2082
2079 type = skb->protocol; 2083 type = skb->protocol;
2080 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) { 2084 list_for_each_entry_rcu(ptype,
2085 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2081 if (ptype->type == type && 2086 if (ptype->type == type &&
2082 (!ptype->dev || ptype->dev == skb->dev)) { 2087 (!ptype->dev || ptype->dev == skb->dev)) {
2083 if (pt_prev) 2088 if (pt_prev)
@@ -2363,8 +2368,9 @@ static int dev_ifconf(struct net *net, char __user *arg)
2363 * in detail. 2368 * in detail.
2364 */ 2369 */
2365void *dev_seq_start(struct seq_file *seq, loff_t *pos) 2370void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2371 __acquires(dev_base_lock)
2366{ 2372{
2367 struct net *net = seq->private; 2373 struct net *net = seq_file_net(seq);
2368 loff_t off; 2374 loff_t off;
2369 struct net_device *dev; 2375 struct net_device *dev;
2370 2376
@@ -2382,13 +2388,14 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2382 2388
2383void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2389void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2384{ 2390{
2385 struct net *net = seq->private; 2391 struct net *net = seq_file_net(seq);
2386 ++*pos; 2392 ++*pos;
2387 return v == SEQ_START_TOKEN ? 2393 return v == SEQ_START_TOKEN ?
2388 first_net_device(net) : next_net_device((struct net_device *)v); 2394 first_net_device(net) : next_net_device((struct net_device *)v);
2389} 2395}
2390 2396
2391void dev_seq_stop(struct seq_file *seq, void *v) 2397void dev_seq_stop(struct seq_file *seq, void *v)
2398 __releases(dev_base_lock)
2392{ 2399{
2393 read_unlock(&dev_base_lock); 2400 read_unlock(&dev_base_lock);
2394} 2401}
@@ -2481,26 +2488,8 @@ static const struct seq_operations dev_seq_ops = {
2481 2488
2482static int dev_seq_open(struct inode *inode, struct file *file) 2489static int dev_seq_open(struct inode *inode, struct file *file)
2483{ 2490{
2484 struct seq_file *seq; 2491 return seq_open_net(inode, file, &dev_seq_ops,
2485 int res; 2492 sizeof(struct seq_net_private));
2486 res = seq_open(file, &dev_seq_ops);
2487 if (!res) {
2488 seq = file->private_data;
2489 seq->private = get_proc_net(inode);
2490 if (!seq->private) {
2491 seq_release(inode, file);
2492 res = -ENXIO;
2493 }
2494 }
2495 return res;
2496}
2497
2498static int dev_seq_release(struct inode *inode, struct file *file)
2499{
2500 struct seq_file *seq = file->private_data;
2501 struct net *net = seq->private;
2502 put_net(net);
2503 return seq_release(inode, file);
2504} 2493}
2505 2494
2506static const struct file_operations dev_seq_fops = { 2495static const struct file_operations dev_seq_fops = {
@@ -2508,7 +2497,7 @@ static const struct file_operations dev_seq_fops = {
2508 .open = dev_seq_open, 2497 .open = dev_seq_open,
2509 .read = seq_read, 2498 .read = seq_read,
2510 .llseek = seq_lseek, 2499 .llseek = seq_lseek,
2511 .release = dev_seq_release, 2500 .release = seq_release_net,
2512}; 2501};
2513 2502
2514static const struct seq_operations softnet_seq_ops = { 2503static const struct seq_operations softnet_seq_ops = {
@@ -2543,7 +2532,7 @@ static void *ptype_get_idx(loff_t pos)
2543 ++i; 2532 ++i;
2544 } 2533 }
2545 2534
2546 for (t = 0; t < 16; t++) { 2535 for (t = 0; t < PTYPE_HASH_SIZE; t++) {
2547 list_for_each_entry_rcu(pt, &ptype_base[t], list) { 2536 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2548 if (i == pos) 2537 if (i == pos)
2549 return pt; 2538 return pt;
@@ -2554,6 +2543,7 @@ static void *ptype_get_idx(loff_t pos)
2554} 2543}
2555 2544
2556static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) 2545static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2546 __acquires(RCU)
2557{ 2547{
2558 rcu_read_lock(); 2548 rcu_read_lock();
2559 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; 2549 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
@@ -2577,10 +2567,10 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2577 hash = 0; 2567 hash = 0;
2578 nxt = ptype_base[0].next; 2568 nxt = ptype_base[0].next;
2579 } else 2569 } else
2580 hash = ntohs(pt->type) & 15; 2570 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
2581 2571
2582 while (nxt == &ptype_base[hash]) { 2572 while (nxt == &ptype_base[hash]) {
2583 if (++hash >= 16) 2573 if (++hash >= PTYPE_HASH_SIZE)
2584 return NULL; 2574 return NULL;
2585 nxt = ptype_base[hash].next; 2575 nxt = ptype_base[hash].next;
2586 } 2576 }
@@ -2589,6 +2579,7 @@ found:
2589} 2579}
2590 2580
2591static void ptype_seq_stop(struct seq_file *seq, void *v) 2581static void ptype_seq_stop(struct seq_file *seq, void *v)
2582 __releases(RCU)
2592{ 2583{
2593 rcu_read_unlock(); 2584 rcu_read_unlock();
2594} 2585}
@@ -3505,7 +3496,7 @@ static int dev_new_index(struct net *net)
3505 3496
3506/* Delayed registration/unregisteration */ 3497/* Delayed registration/unregisteration */
3507static DEFINE_SPINLOCK(net_todo_list_lock); 3498static DEFINE_SPINLOCK(net_todo_list_lock);
3508static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list); 3499static LIST_HEAD(net_todo_list);
3509 3500
3510static void net_set_todo(struct net_device *dev) 3501static void net_set_todo(struct net_device *dev)
3511{ 3502{
@@ -3984,6 +3975,8 @@ void synchronize_net(void)
3984 3975
3985void unregister_netdevice(struct net_device *dev) 3976void unregister_netdevice(struct net_device *dev)
3986{ 3977{
3978 ASSERT_RTNL();
3979
3987 rollback_registered(dev); 3980 rollback_registered(dev);
3988 /* Finish processing unregister after unlock */ 3981 /* Finish processing unregister after unlock */
3989 net_set_todo(dev); 3982 net_set_todo(dev);
@@ -4416,7 +4409,7 @@ static int __init net_dev_init(void)
4416 goto out; 4409 goto out;
4417 4410
4418 INIT_LIST_HEAD(&ptype_all); 4411 INIT_LIST_HEAD(&ptype_all);
4419 for (i = 0; i < 16; i++) 4412 for (i = 0; i < PTYPE_HASH_SIZE; i++)
4420 INIT_LIST_HEAD(&ptype_base[i]); 4413 INIT_LIST_HEAD(&ptype_base[i]);
4421 4414
4422 if (register_pernet_subsys(&netdev_net_ops)) 4415 if (register_pernet_subsys(&netdev_net_ops))
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index 69fff16ece10..cadbfbf7e7f5 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -186,8 +186,9 @@ EXPORT_SYMBOL(dev_mc_unsync);
186 186
187#ifdef CONFIG_PROC_FS 187#ifdef CONFIG_PROC_FS
188static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos) 188static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos)
189 __acquires(dev_base_lock)
189{ 190{
190 struct net *net = seq->private; 191 struct net *net = seq_file_net(seq);
191 struct net_device *dev; 192 struct net_device *dev;
192 loff_t off = 0; 193 loff_t off = 0;
193 194
@@ -206,6 +207,7 @@ static void *dev_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
206} 207}
207 208
208static void dev_mc_seq_stop(struct seq_file *seq, void *v) 209static void dev_mc_seq_stop(struct seq_file *seq, void *v)
210 __releases(dev_base_lock)
209{ 211{
210 read_unlock(&dev_base_lock); 212 read_unlock(&dev_base_lock);
211} 213}
@@ -241,26 +243,8 @@ static const struct seq_operations dev_mc_seq_ops = {
241 243
242static int dev_mc_seq_open(struct inode *inode, struct file *file) 244static int dev_mc_seq_open(struct inode *inode, struct file *file)
243{ 245{
244 struct seq_file *seq; 246 return seq_open_net(inode, file, &dev_mc_seq_ops,
245 int res; 247 sizeof(struct seq_net_private));
246 res = seq_open(file, &dev_mc_seq_ops);
247 if (!res) {
248 seq = file->private_data;
249 seq->private = get_proc_net(inode);
250 if (!seq->private) {
251 seq_release(inode, file);
252 res = -ENXIO;
253 }
254 }
255 return res;
256}
257
258static int dev_mc_seq_release(struct inode *inode, struct file *file)
259{
260 struct seq_file *seq = file->private_data;
261 struct net *net = seq->private;
262 put_net(net);
263 return seq_release(inode, file);
264} 248}
265 249
266static const struct file_operations dev_mc_seq_fops = { 250static const struct file_operations dev_mc_seq_fops = {
@@ -268,7 +252,7 @@ static const struct file_operations dev_mc_seq_fops = {
268 .open = dev_mc_seq_open, 252 .open = dev_mc_seq_open,
269 .read = seq_read, 253 .read = seq_read,
270 .llseek = seq_lseek, 254 .llseek = seq_lseek,
271 .release = dev_mc_seq_release, 255 .release = seq_release_net,
272}; 256};
273 257
274#endif 258#endif
diff --git a/net/core/dst.c b/net/core/dst.c
index 03daead3592a..7deef483c79f 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -153,18 +153,19 @@ loop:
153#endif 153#endif
154} 154}
155 155
156static int dst_discard(struct sk_buff *skb) 156int dst_discard(struct sk_buff *skb)
157{ 157{
158 kfree_skb(skb); 158 kfree_skb(skb);
159 return 0; 159 return 0;
160} 160}
161EXPORT_SYMBOL(dst_discard);
161 162
162void * dst_alloc(struct dst_ops * ops) 163void * dst_alloc(struct dst_ops * ops)
163{ 164{
164 struct dst_entry * dst; 165 struct dst_entry * dst;
165 166
166 if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) { 167 if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
167 if (ops->gc()) 168 if (ops->gc(ops))
168 return NULL; 169 return NULL;
169 } 170 }
170 dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC); 171 dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC);
@@ -278,13 +279,13 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
278 if (!unregister) { 279 if (!unregister) {
279 dst->input = dst->output = dst_discard; 280 dst->input = dst->output = dst_discard;
280 } else { 281 } else {
281 dst->dev = init_net.loopback_dev; 282 dst->dev = dst->dev->nd_net->loopback_dev;
282 dev_hold(dst->dev); 283 dev_hold(dst->dev);
283 dev_put(dev); 284 dev_put(dev);
284 if (dst->neighbour && dst->neighbour->dev == dev) { 285 if (dst->neighbour && dst->neighbour->dev == dev) {
285 dst->neighbour->dev = init_net.loopback_dev; 286 dst->neighbour->dev = dst->dev;
287 dev_hold(dst->dev);
286 dev_put(dev); 288 dev_put(dev);
287 dev_hold(dst->neighbour->dev);
288 } 289 }
289 } 290 }
290} 291}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 848132b6cb73..42ccaf5b8509 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -15,9 +15,6 @@
15#include <net/sock.h> 15#include <net/sock.h>
16#include <net/fib_rules.h> 16#include <net/fib_rules.h>
17 17
18static LIST_HEAD(rules_ops);
19static DEFINE_SPINLOCK(rules_mod_lock);
20
21int fib_default_rule_add(struct fib_rules_ops *ops, 18int fib_default_rule_add(struct fib_rules_ops *ops,
22 u32 pref, u32 table, u32 flags) 19 u32 pref, u32 table, u32 flags)
23{ 20{
@@ -32,6 +29,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
32 r->pref = pref; 29 r->pref = pref;
33 r->table = table; 30 r->table = table;
34 r->flags = flags; 31 r->flags = flags;
32 r->fr_net = ops->fro_net;
35 33
36 /* The lock is not required here, the list in unreacheable 34 /* The lock is not required here, the list in unreacheable
37 * at the moment this function is called */ 35 * at the moment this function is called */
@@ -44,12 +42,12 @@ static void notify_rule_change(int event, struct fib_rule *rule,
44 struct fib_rules_ops *ops, struct nlmsghdr *nlh, 42 struct fib_rules_ops *ops, struct nlmsghdr *nlh,
45 u32 pid); 43 u32 pid);
46 44
47static struct fib_rules_ops *lookup_rules_ops(int family) 45static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family)
48{ 46{
49 struct fib_rules_ops *ops; 47 struct fib_rules_ops *ops;
50 48
51 rcu_read_lock(); 49 rcu_read_lock();
52 list_for_each_entry_rcu(ops, &rules_ops, list) { 50 list_for_each_entry_rcu(ops, &net->rules_ops, list) {
53 if (ops->family == family) { 51 if (ops->family == family) {
54 if (!try_module_get(ops->owner)) 52 if (!try_module_get(ops->owner))
55 ops = NULL; 53 ops = NULL;
@@ -78,6 +76,9 @@ int fib_rules_register(struct fib_rules_ops *ops)
78{ 76{
79 int err = -EEXIST; 77 int err = -EEXIST;
80 struct fib_rules_ops *o; 78 struct fib_rules_ops *o;
79 struct net *net;
80
81 net = ops->fro_net;
81 82
82 if (ops->rule_size < sizeof(struct fib_rule)) 83 if (ops->rule_size < sizeof(struct fib_rule))
83 return -EINVAL; 84 return -EINVAL;
@@ -87,22 +88,23 @@ int fib_rules_register(struct fib_rules_ops *ops)
87 ops->action == NULL) 88 ops->action == NULL)
88 return -EINVAL; 89 return -EINVAL;
89 90
90 spin_lock(&rules_mod_lock); 91 spin_lock(&net->rules_mod_lock);
91 list_for_each_entry(o, &rules_ops, list) 92 list_for_each_entry(o, &net->rules_ops, list)
92 if (ops->family == o->family) 93 if (ops->family == o->family)
93 goto errout; 94 goto errout;
94 95
95 list_add_tail_rcu(&ops->list, &rules_ops); 96 hold_net(net);
97 list_add_tail_rcu(&ops->list, &net->rules_ops);
96 err = 0; 98 err = 0;
97errout: 99errout:
98 spin_unlock(&rules_mod_lock); 100 spin_unlock(&net->rules_mod_lock);
99 101
100 return err; 102 return err;
101} 103}
102 104
103EXPORT_SYMBOL_GPL(fib_rules_register); 105EXPORT_SYMBOL_GPL(fib_rules_register);
104 106
105static void cleanup_ops(struct fib_rules_ops *ops) 107void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
106{ 108{
107 struct fib_rule *rule, *tmp; 109 struct fib_rule *rule, *tmp;
108 110
@@ -111,28 +113,19 @@ static void cleanup_ops(struct fib_rules_ops *ops)
111 fib_rule_put(rule); 113 fib_rule_put(rule);
112 } 114 }
113} 115}
116EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops);
114 117
115int fib_rules_unregister(struct fib_rules_ops *ops) 118void fib_rules_unregister(struct fib_rules_ops *ops)
116{ 119{
117 int err = 0; 120 struct net *net = ops->fro_net;
118 struct fib_rules_ops *o;
119
120 spin_lock(&rules_mod_lock);
121 list_for_each_entry(o, &rules_ops, list) {
122 if (o == ops) {
123 list_del_rcu(&o->list);
124 cleanup_ops(ops);
125 goto out;
126 }
127 }
128 121
129 err = -ENOENT; 122 spin_lock(&net->rules_mod_lock);
130out: 123 list_del_rcu(&ops->list);
131 spin_unlock(&rules_mod_lock); 124 fib_rules_cleanup_ops(ops);
125 spin_unlock(&net->rules_mod_lock);
132 126
133 synchronize_rcu(); 127 synchronize_rcu();
134 128 release_net(net);
135 return err;
136} 129}
137 130
138EXPORT_SYMBOL_GPL(fib_rules_unregister); 131EXPORT_SYMBOL_GPL(fib_rules_unregister);
@@ -231,7 +224,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
231 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) 224 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
232 goto errout; 225 goto errout;
233 226
234 ops = lookup_rules_ops(frh->family); 227 ops = lookup_rules_ops(net, frh->family);
235 if (ops == NULL) { 228 if (ops == NULL) {
236 err = EAFNOSUPPORT; 229 err = EAFNOSUPPORT;
237 goto errout; 230 goto errout;
@@ -250,6 +243,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
250 err = -ENOMEM; 243 err = -ENOMEM;
251 goto errout; 244 goto errout;
252 } 245 }
246 rule->fr_net = net;
253 247
254 if (tb[FRA_PRIORITY]) 248 if (tb[FRA_PRIORITY])
255 rule->pref = nla_get_u32(tb[FRA_PRIORITY]); 249 rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
@@ -281,7 +275,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
281 rule->table = frh_get_table(frh, tb); 275 rule->table = frh_get_table(frh, tb);
282 276
283 if (!rule->pref && ops->default_pref) 277 if (!rule->pref && ops->default_pref)
284 rule->pref = ops->default_pref(); 278 rule->pref = ops->default_pref(ops);
285 279
286 err = -EINVAL; 280 err = -EINVAL;
287 if (tb[FRA_GOTO]) { 281 if (tb[FRA_GOTO]) {
@@ -358,6 +352,7 @@ errout:
358 352
359static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 353static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
360{ 354{
355 struct net *net = skb->sk->sk_net;
361 struct fib_rule_hdr *frh = nlmsg_data(nlh); 356 struct fib_rule_hdr *frh = nlmsg_data(nlh);
362 struct fib_rules_ops *ops = NULL; 357 struct fib_rules_ops *ops = NULL;
363 struct fib_rule *rule, *tmp; 358 struct fib_rule *rule, *tmp;
@@ -367,7 +362,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
367 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) 362 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
368 goto errout; 363 goto errout;
369 364
370 ops = lookup_rules_ops(frh->family); 365 ops = lookup_rules_ops(net, frh->family);
371 if (ops == NULL) { 366 if (ops == NULL) {
372 err = EAFNOSUPPORT; 367 err = EAFNOSUPPORT;
373 goto errout; 368 goto errout;
@@ -539,13 +534,14 @@ skip:
539 534
540static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) 535static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
541{ 536{
537 struct net *net = skb->sk->sk_net;
542 struct fib_rules_ops *ops; 538 struct fib_rules_ops *ops;
543 int idx = 0, family; 539 int idx = 0, family;
544 540
545 family = rtnl_msg_family(cb->nlh); 541 family = rtnl_msg_family(cb->nlh);
546 if (family != AF_UNSPEC) { 542 if (family != AF_UNSPEC) {
547 /* Protocol specific dump request */ 543 /* Protocol specific dump request */
548 ops = lookup_rules_ops(family); 544 ops = lookup_rules_ops(net, family);
549 if (ops == NULL) 545 if (ops == NULL)
550 return -EAFNOSUPPORT; 546 return -EAFNOSUPPORT;
551 547
@@ -553,7 +549,7 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
553 } 549 }
554 550
555 rcu_read_lock(); 551 rcu_read_lock();
556 list_for_each_entry_rcu(ops, &rules_ops, list) { 552 list_for_each_entry_rcu(ops, &net->rules_ops, list) {
557 if (idx < cb->args[0] || !try_module_get(ops->owner)) 553 if (idx < cb->args[0] || !try_module_get(ops->owner))
558 goto skip; 554 goto skip;
559 555
@@ -574,9 +570,11 @@ static void notify_rule_change(int event, struct fib_rule *rule,
574 struct fib_rules_ops *ops, struct nlmsghdr *nlh, 570 struct fib_rules_ops *ops, struct nlmsghdr *nlh,
575 u32 pid) 571 u32 pid)
576{ 572{
573 struct net *net;
577 struct sk_buff *skb; 574 struct sk_buff *skb;
578 int err = -ENOBUFS; 575 int err = -ENOBUFS;
579 576
577 net = ops->fro_net;
580 skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL); 578 skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
581 if (skb == NULL) 579 if (skb == NULL)
582 goto errout; 580 goto errout;
@@ -588,10 +586,11 @@ static void notify_rule_change(int event, struct fib_rule *rule,
588 kfree_skb(skb); 586 kfree_skb(skb);
589 goto errout; 587 goto errout;
590 } 588 }
591 err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL); 589
590 err = rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
592errout: 591errout:
593 if (err < 0) 592 if (err < 0)
594 rtnl_set_sk_err(ops->nlgroup, err); 593 rtnl_set_sk_err(net, ops->nlgroup, err);
595} 594}
596 595
597static void attach_rules(struct list_head *rules, struct net_device *dev) 596static void attach_rules(struct list_head *rules, struct net_device *dev)
@@ -619,22 +618,20 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
619 void *ptr) 618 void *ptr)
620{ 619{
621 struct net_device *dev = ptr; 620 struct net_device *dev = ptr;
621 struct net *net = dev->nd_net;
622 struct fib_rules_ops *ops; 622 struct fib_rules_ops *ops;
623 623
624 if (dev->nd_net != &init_net)
625 return NOTIFY_DONE;
626
627 ASSERT_RTNL(); 624 ASSERT_RTNL();
628 rcu_read_lock(); 625 rcu_read_lock();
629 626
630 switch (event) { 627 switch (event) {
631 case NETDEV_REGISTER: 628 case NETDEV_REGISTER:
632 list_for_each_entry(ops, &rules_ops, list) 629 list_for_each_entry(ops, &net->rules_ops, list)
633 attach_rules(&ops->rules_list, dev); 630 attach_rules(&ops->rules_list, dev);
634 break; 631 break;
635 632
636 case NETDEV_UNREGISTER: 633 case NETDEV_UNREGISTER:
637 list_for_each_entry(ops, &rules_ops, list) 634 list_for_each_entry(ops, &net->rules_ops, list)
638 detach_rules(&ops->rules_list, dev); 635 detach_rules(&ops->rules_list, dev);
639 break; 636 break;
640 } 637 }
@@ -648,13 +645,40 @@ static struct notifier_block fib_rules_notifier = {
648 .notifier_call = fib_rules_event, 645 .notifier_call = fib_rules_event,
649}; 646};
650 647
648static int fib_rules_net_init(struct net *net)
649{
650 INIT_LIST_HEAD(&net->rules_ops);
651 spin_lock_init(&net->rules_mod_lock);
652 return 0;
653}
654
655static struct pernet_operations fib_rules_net_ops = {
656 .init = fib_rules_net_init,
657};
658
651static int __init fib_rules_init(void) 659static int __init fib_rules_init(void)
652{ 660{
661 int err;
653 rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL); 662 rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL);
654 rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL); 663 rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL);
655 rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule); 664 rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule);
656 665
657 return register_netdevice_notifier(&fib_rules_notifier); 666 err = register_netdevice_notifier(&fib_rules_notifier);
667 if (err < 0)
668 goto fail;
669
670 err = register_pernet_subsys(&fib_rules_net_ops);
671 if (err < 0)
672 goto fail_unregister;
673 return 0;
674
675fail_unregister:
676 unregister_netdevice_notifier(&fib_rules_notifier);
677fail:
678 rtnl_unregister(PF_UNSPEC, RTM_NEWRULE);
679 rtnl_unregister(PF_UNSPEC, RTM_DELRULE);
680 rtnl_unregister(PF_UNSPEC, RTM_GETRULE);
681 return err;
658} 682}
659 683
660subsys_initcall(fib_rules_init); 684subsys_initcall(fib_rules_init);
diff --git a/net/core/flow.c b/net/core/flow.c
index 6489f4e24ecf..46b38e06e0d7 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -352,8 +352,7 @@ static int __init flow_cache_init(void)
352 flow_lwm = 2 * flow_hash_size; 352 flow_lwm = 2 * flow_hash_size;
353 flow_hwm = 4 * flow_hash_size; 353 flow_hwm = 4 * flow_hash_size;
354 354
355 init_timer(&flow_hash_rnd_timer); 355 setup_timer(&flow_hash_rnd_timer, flow_cache_new_hashrnd, 0);
356 flow_hash_rnd_timer.function = flow_cache_new_hashrnd;
357 flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; 356 flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
358 add_timer(&flow_hash_rnd_timer); 357 add_timer(&flow_hash_rnd_timer);
359 358
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index daadbcc4e8dd..57abe8266be1 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -135,7 +135,7 @@ skip:
135 } 135 }
136 136
137 if (!list_empty(&elist[idx].list)) 137 if (!list_empty(&elist[idx].list))
138 mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); 138 mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx));
139 rcu_read_unlock(); 139 rcu_read_unlock();
140} 140}
141 141
@@ -159,13 +159,13 @@ skip:
159int gen_new_estimator(struct gnet_stats_basic *bstats, 159int gen_new_estimator(struct gnet_stats_basic *bstats,
160 struct gnet_stats_rate_est *rate_est, 160 struct gnet_stats_rate_est *rate_est,
161 spinlock_t *stats_lock, 161 spinlock_t *stats_lock,
162 struct rtattr *opt) 162 struct nlattr *opt)
163{ 163{
164 struct gen_estimator *est; 164 struct gen_estimator *est;
165 struct gnet_estimator *parm = RTA_DATA(opt); 165 struct gnet_estimator *parm = nla_data(opt);
166 int idx; 166 int idx;
167 167
168 if (RTA_PAYLOAD(opt) < sizeof(*parm)) 168 if (nla_len(opt) < sizeof(*parm))
169 return -EINVAL; 169 return -EINVAL;
170 170
171 if (parm->interval < -2 || parm->interval > 3) 171 if (parm->interval < -2 || parm->interval > 3)
@@ -191,7 +191,7 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
191 } 191 }
192 192
193 if (list_empty(&elist[idx].list)) 193 if (list_empty(&elist[idx].list))
194 mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); 194 mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx));
195 195
196 list_add_rcu(&est->list, &elist[idx].list); 196 list_add_rcu(&est->list, &elist[idx].list);
197 return 0; 197 return 0;
@@ -241,7 +241,7 @@ void gen_kill_estimator(struct gnet_stats_basic *bstats,
241} 241}
242 242
243/** 243/**
244 * gen_replace_estimator - replace rate estimator configruation 244 * gen_replace_estimator - replace rate estimator configuration
245 * @bstats: basic statistics 245 * @bstats: basic statistics
246 * @rate_est: rate estimator statistics 246 * @rate_est: rate estimator statistics
247 * @stats_lock: statistics lock 247 * @stats_lock: statistics lock
@@ -252,13 +252,12 @@ void gen_kill_estimator(struct gnet_stats_basic *bstats,
252 * 252 *
253 * Returns 0 on success or a negative error code. 253 * Returns 0 on success or a negative error code.
254 */ 254 */
255int 255int gen_replace_estimator(struct gnet_stats_basic *bstats,
256gen_replace_estimator(struct gnet_stats_basic *bstats, 256 struct gnet_stats_rate_est *rate_est,
257 struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, 257 spinlock_t *stats_lock, struct nlattr *opt)
258 struct rtattr *opt)
259{ 258{
260 gen_kill_estimator(bstats, rate_est); 259 gen_kill_estimator(bstats, rate_est);
261 return gen_new_estimator(bstats, rate_est, stats_lock, opt); 260 return gen_new_estimator(bstats, rate_est, stats_lock, opt);
262} 261}
263 262
264 263
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index bcc25591d8ac..c3d0ffeac243 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -20,16 +20,17 @@
20#include <linux/socket.h> 20#include <linux/socket.h>
21#include <linux/rtnetlink.h> 21#include <linux/rtnetlink.h>
22#include <linux/gen_stats.h> 22#include <linux/gen_stats.h>
23#include <net/netlink.h>
23#include <net/gen_stats.h> 24#include <net/gen_stats.h>
24 25
25 26
26static inline int 27static inline int
27gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size) 28gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size)
28{ 29{
29 RTA_PUT(d->skb, type, size, buf); 30 NLA_PUT(d->skb, type, size, buf);
30 return 0; 31 return 0;
31 32
32rtattr_failure: 33nla_put_failure:
33 spin_unlock_bh(d->lock); 34 spin_unlock_bh(d->lock);
34 return -1; 35 return -1;
35} 36}
@@ -55,13 +56,14 @@ rtattr_failure:
55int 56int
56gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type, 57gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
57 int xstats_type, spinlock_t *lock, struct gnet_dump *d) 58 int xstats_type, spinlock_t *lock, struct gnet_dump *d)
59 __acquires(lock)
58{ 60{
59 memset(d, 0, sizeof(*d)); 61 memset(d, 0, sizeof(*d));
60 62
61 spin_lock_bh(lock); 63 spin_lock_bh(lock);
62 d->lock = lock; 64 d->lock = lock;
63 if (type) 65 if (type)
64 d->tail = (struct rtattr *)skb_tail_pointer(skb); 66 d->tail = (struct nlattr *)skb_tail_pointer(skb);
65 d->skb = skb; 67 d->skb = skb;
66 d->compat_tc_stats = tc_stats_type; 68 d->compat_tc_stats = tc_stats_type;
67 d->compat_xstats = xstats_type; 69 d->compat_xstats = xstats_type;
@@ -212,7 +214,7 @@ int
212gnet_stats_finish_copy(struct gnet_dump *d) 214gnet_stats_finish_copy(struct gnet_dump *d)
213{ 215{
214 if (d->tail) 216 if (d->tail)
215 d->tail->rta_len = skb_tail_pointer(d->skb) - (u8 *)d->tail; 217 d->tail->nla_len = skb_tail_pointer(d->skb) - (u8 *)d->tail;
216 218
217 if (d->compat_tc_stats) 219 if (d->compat_tc_stats)
218 if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats, 220 if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats,
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 29b8ee4e35d6..a16cf1ec5e5e 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -59,7 +59,6 @@ static void neigh_timer_handler(unsigned long arg);
59static void __neigh_notify(struct neighbour *n, int type, int flags); 59static void __neigh_notify(struct neighbour *n, int type, int flags);
60static void neigh_update_notify(struct neighbour *neigh); 60static void neigh_update_notify(struct neighbour *neigh);
61static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev); 61static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
62void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
63 62
64static struct neigh_table *neigh_tables; 63static struct neigh_table *neigh_tables;
65#ifdef CONFIG_PROC_FS 64#ifdef CONFIG_PROC_FS
@@ -165,6 +164,16 @@ static int neigh_forced_gc(struct neigh_table *tbl)
165 return shrunk; 164 return shrunk;
166} 165}
167 166
167static void neigh_add_timer(struct neighbour *n, unsigned long when)
168{
169 neigh_hold(n);
170 if (unlikely(mod_timer(&n->timer, when))) {
171 printk("NEIGH: BUG, double timer add, state is %x\n",
172 n->nud_state);
173 dump_stack();
174 }
175}
176
168static int neigh_del_timer(struct neighbour *n) 177static int neigh_del_timer(struct neighbour *n)
169{ 178{
170 if ((n->nud_state & NUD_IN_TIMER) && 179 if ((n->nud_state & NUD_IN_TIMER) &&
@@ -270,9 +279,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
270 n->nud_state = NUD_NONE; 279 n->nud_state = NUD_NONE;
271 n->output = neigh_blackhole; 280 n->output = neigh_blackhole;
272 n->parms = neigh_parms_clone(&tbl->parms); 281 n->parms = neigh_parms_clone(&tbl->parms);
273 init_timer(&n->timer); 282 setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
274 n->timer.function = neigh_timer_handler;
275 n->timer.data = (unsigned long)n;
276 283
277 NEIGH_CACHE_STAT_INC(tbl, allocs); 284 NEIGH_CACHE_STAT_INC(tbl, allocs);
278 n->tbl = tbl; 285 n->tbl = tbl;
@@ -367,7 +374,8 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
367 return n; 374 return n;
368} 375}
369 376
370struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey) 377struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
378 const void *pkey)
371{ 379{
372 struct neighbour *n; 380 struct neighbour *n;
373 int key_len = tbl->key_len; 381 int key_len = tbl->key_len;
@@ -377,7 +385,8 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey)
377 385
378 read_lock_bh(&tbl->lock); 386 read_lock_bh(&tbl->lock);
379 for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { 387 for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
380 if (!memcmp(n->primary_key, pkey, key_len)) { 388 if (!memcmp(n->primary_key, pkey, key_len) &&
389 (net == n->dev->nd_net)) {
381 neigh_hold(n); 390 neigh_hold(n);
382 NEIGH_CACHE_STAT_INC(tbl, hits); 391 NEIGH_CACHE_STAT_INC(tbl, hits);
383 break; 392 break;
@@ -455,7 +464,8 @@ out_neigh_release:
455 goto out; 464 goto out;
456} 465}
457 466
458struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey, 467struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
468 struct net *net, const void *pkey,
459 struct net_device *dev, int creat) 469 struct net_device *dev, int creat)
460{ 470{
461 struct pneigh_entry *n; 471 struct pneigh_entry *n;
@@ -471,6 +481,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
471 481
472 for (n = tbl->phash_buckets[hash_val]; n; n = n->next) { 482 for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
473 if (!memcmp(n->key, pkey, key_len) && 483 if (!memcmp(n->key, pkey, key_len) &&
484 (n->net == net) &&
474 (n->dev == dev || !n->dev)) { 485 (n->dev == dev || !n->dev)) {
475 read_unlock_bh(&tbl->lock); 486 read_unlock_bh(&tbl->lock);
476 goto out; 487 goto out;
@@ -487,6 +498,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
487 if (!n) 498 if (!n)
488 goto out; 499 goto out;
489 500
501 n->net = hold_net(net);
490 memcpy(n->key, pkey, key_len); 502 memcpy(n->key, pkey, key_len);
491 n->dev = dev; 503 n->dev = dev;
492 if (dev) 504 if (dev)
@@ -509,7 +521,7 @@ out:
509} 521}
510 522
511 523
512int pneigh_delete(struct neigh_table *tbl, const void *pkey, 524int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
513 struct net_device *dev) 525 struct net_device *dev)
514{ 526{
515 struct pneigh_entry *n, **np; 527 struct pneigh_entry *n, **np;
@@ -524,13 +536,15 @@ int pneigh_delete(struct neigh_table *tbl, const void *pkey,
524 write_lock_bh(&tbl->lock); 536 write_lock_bh(&tbl->lock);
525 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL; 537 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
526 np = &n->next) { 538 np = &n->next) {
527 if (!memcmp(n->key, pkey, key_len) && n->dev == dev) { 539 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
540 (n->net == net)) {
528 *np = n->next; 541 *np = n->next;
529 write_unlock_bh(&tbl->lock); 542 write_unlock_bh(&tbl->lock);
530 if (tbl->pdestructor) 543 if (tbl->pdestructor)
531 tbl->pdestructor(n); 544 tbl->pdestructor(n);
532 if (n->dev) 545 if (n->dev)
533 dev_put(n->dev); 546 dev_put(n->dev);
547 release_net(n->net);
534 kfree(n); 548 kfree(n);
535 return 0; 549 return 0;
536 } 550 }
@@ -553,6 +567,7 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
553 tbl->pdestructor(n); 567 tbl->pdestructor(n);
554 if (n->dev) 568 if (n->dev)
555 dev_put(n->dev); 569 dev_put(n->dev);
570 release_net(n->net);
556 kfree(n); 571 kfree(n);
557 continue; 572 continue;
558 } 573 }
@@ -562,6 +577,13 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
562 return -ENOENT; 577 return -ENOENT;
563} 578}
564 579
580static void neigh_parms_destroy(struct neigh_parms *parms);
581
582static inline void neigh_parms_put(struct neigh_parms *parms)
583{
584 if (atomic_dec_and_test(&parms->refcnt))
585 neigh_parms_destroy(parms);
586}
565 587
566/* 588/*
567 * neighbour must already be out of the table; 589 * neighbour must already be out of the table;
@@ -718,15 +740,6 @@ static __inline__ int neigh_max_probes(struct neighbour *n)
718 p->ucast_probes + p->app_probes + p->mcast_probes); 740 p->ucast_probes + p->app_probes + p->mcast_probes);
719} 741}
720 742
721static inline void neigh_add_timer(struct neighbour *n, unsigned long when)
722{
723 if (unlikely(mod_timer(&n->timer, when))) {
724 printk("NEIGH: BUG, double timer add, state is %x\n",
725 n->nud_state);
726 dump_stack();
727 }
728}
729
730/* Called when a timer expires for a neighbour entry. */ 743/* Called when a timer expires for a neighbour entry. */
731 744
732static void neigh_timer_handler(unsigned long arg) 745static void neigh_timer_handler(unsigned long arg)
@@ -858,7 +871,6 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
858 atomic_set(&neigh->probes, neigh->parms->ucast_probes); 871 atomic_set(&neigh->probes, neigh->parms->ucast_probes);
859 neigh->nud_state = NUD_INCOMPLETE; 872 neigh->nud_state = NUD_INCOMPLETE;
860 neigh->updated = jiffies; 873 neigh->updated = jiffies;
861 neigh_hold(neigh);
862 neigh_add_timer(neigh, now + 1); 874 neigh_add_timer(neigh, now + 1);
863 } else { 875 } else {
864 neigh->nud_state = NUD_FAILED; 876 neigh->nud_state = NUD_FAILED;
@@ -871,7 +883,6 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
871 } 883 }
872 } else if (neigh->nud_state & NUD_STALE) { 884 } else if (neigh->nud_state & NUD_STALE) {
873 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); 885 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
874 neigh_hold(neigh);
875 neigh->nud_state = NUD_DELAY; 886 neigh->nud_state = NUD_DELAY;
876 neigh->updated = jiffies; 887 neigh->updated = jiffies;
877 neigh_add_timer(neigh, 888 neigh_add_timer(neigh,
@@ -1015,13 +1026,11 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1015 1026
1016 if (new != old) { 1027 if (new != old) {
1017 neigh_del_timer(neigh); 1028 neigh_del_timer(neigh);
1018 if (new & NUD_IN_TIMER) { 1029 if (new & NUD_IN_TIMER)
1019 neigh_hold(neigh);
1020 neigh_add_timer(neigh, (jiffies + 1030 neigh_add_timer(neigh, (jiffies +
1021 ((new & NUD_REACHABLE) ? 1031 ((new & NUD_REACHABLE) ?
1022 neigh->parms->reachable_time : 1032 neigh->parms->reachable_time :
1023 0))); 1033 0)));
1024 }
1025 neigh->nud_state = new; 1034 neigh->nud_state = new;
1026 } 1035 }
1027 1036
@@ -1266,27 +1275,49 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1266 spin_unlock(&tbl->proxy_queue.lock); 1275 spin_unlock(&tbl->proxy_queue.lock);
1267} 1276}
1268 1277
1278static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
1279 struct net *net, int ifindex)
1280{
1281 struct neigh_parms *p;
1282
1283 for (p = &tbl->parms; p; p = p->next) {
1284 if (p->net != net)
1285 continue;
1286 if ((p->dev && p->dev->ifindex == ifindex) ||
1287 (!p->dev && !ifindex))
1288 return p;
1289 }
1290
1291 return NULL;
1292}
1269 1293
1270struct neigh_parms *neigh_parms_alloc(struct net_device *dev, 1294struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1271 struct neigh_table *tbl) 1295 struct neigh_table *tbl)
1272{ 1296{
1273 struct neigh_parms *p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); 1297 struct neigh_parms *p, *ref;
1298 struct net *net;
1299
1300 net = dev->nd_net;
1301 ref = lookup_neigh_params(tbl, net, 0);
1302 if (!ref)
1303 return NULL;
1274 1304
1305 p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1275 if (p) { 1306 if (p) {
1276 p->tbl = tbl; 1307 p->tbl = tbl;
1277 atomic_set(&p->refcnt, 1); 1308 atomic_set(&p->refcnt, 1);
1278 INIT_RCU_HEAD(&p->rcu_head); 1309 INIT_RCU_HEAD(&p->rcu_head);
1279 p->reachable_time = 1310 p->reachable_time =
1280 neigh_rand_reach_time(p->base_reachable_time); 1311 neigh_rand_reach_time(p->base_reachable_time);
1281 if (dev) {
1282 if (dev->neigh_setup && dev->neigh_setup(dev, p)) {
1283 kfree(p);
1284 return NULL;
1285 }
1286 1312
1287 dev_hold(dev); 1313 if (dev->neigh_setup && dev->neigh_setup(dev, p)) {
1288 p->dev = dev; 1314 kfree(p);
1315 return NULL;
1289 } 1316 }
1317
1318 dev_hold(dev);
1319 p->dev = dev;
1320 p->net = hold_net(net);
1290 p->sysctl_table = NULL; 1321 p->sysctl_table = NULL;
1291 write_lock_bh(&tbl->lock); 1322 write_lock_bh(&tbl->lock);
1292 p->next = tbl->parms.next; 1323 p->next = tbl->parms.next;
@@ -1326,8 +1357,9 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1326 NEIGH_PRINTK1("neigh_parms_release: not found\n"); 1357 NEIGH_PRINTK1("neigh_parms_release: not found\n");
1327} 1358}
1328 1359
1329void neigh_parms_destroy(struct neigh_parms *parms) 1360static void neigh_parms_destroy(struct neigh_parms *parms)
1330{ 1361{
1362 release_net(parms->net);
1331 kfree(parms); 1363 kfree(parms);
1332} 1364}
1333 1365
@@ -1338,6 +1370,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
1338 unsigned long now = jiffies; 1370 unsigned long now = jiffies;
1339 unsigned long phsize; 1371 unsigned long phsize;
1340 1372
1373 tbl->parms.net = &init_net;
1341 atomic_set(&tbl->parms.refcnt, 1); 1374 atomic_set(&tbl->parms.refcnt, 1);
1342 INIT_RCU_HEAD(&tbl->parms.rcu_head); 1375 INIT_RCU_HEAD(&tbl->parms.rcu_head);
1343 tbl->parms.reachable_time = 1376 tbl->parms.reachable_time =
@@ -1372,15 +1405,11 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
1372 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); 1405 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
1373 1406
1374 rwlock_init(&tbl->lock); 1407 rwlock_init(&tbl->lock);
1375 init_timer(&tbl->gc_timer); 1408 setup_timer(&tbl->gc_timer, neigh_periodic_timer, (unsigned long)tbl);
1376 tbl->gc_timer.data = (unsigned long)tbl;
1377 tbl->gc_timer.function = neigh_periodic_timer;
1378 tbl->gc_timer.expires = now + 1; 1409 tbl->gc_timer.expires = now + 1;
1379 add_timer(&tbl->gc_timer); 1410 add_timer(&tbl->gc_timer);
1380 1411
1381 init_timer(&tbl->proxy_timer); 1412 setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1382 tbl->proxy_timer.data = (unsigned long)tbl;
1383 tbl->proxy_timer.function = neigh_proxy_process;
1384 skb_queue_head_init_class(&tbl->proxy_queue, 1413 skb_queue_head_init_class(&tbl->proxy_queue,
1385 &neigh_table_proxy_queue_class); 1414 &neigh_table_proxy_queue_class);
1386 1415
@@ -1483,7 +1512,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1483 goto out_dev_put; 1512 goto out_dev_put;
1484 1513
1485 if (ndm->ndm_flags & NTF_PROXY) { 1514 if (ndm->ndm_flags & NTF_PROXY) {
1486 err = pneigh_delete(tbl, nla_data(dst_attr), dev); 1515 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1487 goto out_dev_put; 1516 goto out_dev_put;
1488 } 1517 }
1489 1518
@@ -1560,7 +1589,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1560 struct pneigh_entry *pn; 1589 struct pneigh_entry *pn;
1561 1590
1562 err = -ENOBUFS; 1591 err = -ENOBUFS;
1563 pn = pneigh_lookup(tbl, dst, dev, 1); 1592 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1564 if (pn) { 1593 if (pn) {
1565 pn->flags = ndm->ndm_flags; 1594 pn->flags = ndm->ndm_flags;
1566 err = 0; 1595 err = 0;
@@ -1755,19 +1784,6 @@ errout:
1755 return -EMSGSIZE; 1784 return -EMSGSIZE;
1756} 1785}
1757 1786
1758static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
1759 int ifindex)
1760{
1761 struct neigh_parms *p;
1762
1763 for (p = &tbl->parms; p; p = p->next)
1764 if ((p->dev && p->dev->ifindex == ifindex) ||
1765 (!p->dev && !ifindex))
1766 return p;
1767
1768 return NULL;
1769}
1770
1771static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = { 1787static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1772 [NDTA_NAME] = { .type = NLA_STRING }, 1788 [NDTA_NAME] = { .type = NLA_STRING },
1773 [NDTA_THRESH1] = { .type = NLA_U32 }, 1789 [NDTA_THRESH1] = { .type = NLA_U32 },
@@ -1795,6 +1811,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1795 1811
1796static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1812static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1797{ 1813{
1814 struct net *net = skb->sk->sk_net;
1798 struct neigh_table *tbl; 1815 struct neigh_table *tbl;
1799 struct ndtmsg *ndtmsg; 1816 struct ndtmsg *ndtmsg;
1800 struct nlattr *tb[NDTA_MAX+1]; 1817 struct nlattr *tb[NDTA_MAX+1];
@@ -1844,7 +1861,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1844 if (tbp[NDTPA_IFINDEX]) 1861 if (tbp[NDTPA_IFINDEX])
1845 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); 1862 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1846 1863
1847 p = lookup_neigh_params(tbl, ifindex); 1864 p = lookup_neigh_params(tbl, net, ifindex);
1848 if (p == NULL) { 1865 if (p == NULL) {
1849 err = -ENOENT; 1866 err = -ENOENT;
1850 goto errout_tbl_lock; 1867 goto errout_tbl_lock;
@@ -1919,6 +1936,7 @@ errout:
1919 1936
1920static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) 1937static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
1921{ 1938{
1939 struct net *net = skb->sk->sk_net;
1922 int family, tidx, nidx = 0; 1940 int family, tidx, nidx = 0;
1923 int tbl_skip = cb->args[0]; 1941 int tbl_skip = cb->args[0];
1924 int neigh_skip = cb->args[1]; 1942 int neigh_skip = cb->args[1];
@@ -1938,8 +1956,11 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
1938 NLM_F_MULTI) <= 0) 1956 NLM_F_MULTI) <= 0)
1939 break; 1957 break;
1940 1958
1941 for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) { 1959 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
1942 if (nidx < neigh_skip) 1960 if (net != p->net)
1961 continue;
1962
1963 if (nidx++ < neigh_skip)
1943 continue; 1964 continue;
1944 1965
1945 if (neightbl_fill_param_info(skb, tbl, p, 1966 if (neightbl_fill_param_info(skb, tbl, p,
@@ -2015,6 +2036,7 @@ static void neigh_update_notify(struct neighbour *neigh)
2015static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, 2036static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2016 struct netlink_callback *cb) 2037 struct netlink_callback *cb)
2017{ 2038{
2039 struct net * net = skb->sk->sk_net;
2018 struct neighbour *n; 2040 struct neighbour *n;
2019 int rc, h, s_h = cb->args[1]; 2041 int rc, h, s_h = cb->args[1];
2020 int idx, s_idx = idx = cb->args[2]; 2042 int idx, s_idx = idx = cb->args[2];
@@ -2025,8 +2047,12 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2025 continue; 2047 continue;
2026 if (h > s_h) 2048 if (h > s_h)
2027 s_idx = 0; 2049 s_idx = 0;
2028 for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) { 2050 for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) {
2029 if (idx < s_idx) 2051 int lidx;
2052 if (n->dev->nd_net != net)
2053 continue;
2054 lidx = idx++;
2055 if (lidx < s_idx)
2030 continue; 2056 continue;
2031 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, 2057 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2032 cb->nlh->nlmsg_seq, 2058 cb->nlh->nlmsg_seq,
@@ -2118,6 +2144,7 @@ EXPORT_SYMBOL(__neigh_for_each_release);
2118static struct neighbour *neigh_get_first(struct seq_file *seq) 2144static struct neighbour *neigh_get_first(struct seq_file *seq)
2119{ 2145{
2120 struct neigh_seq_state *state = seq->private; 2146 struct neigh_seq_state *state = seq->private;
2147 struct net *net = state->p.net;
2121 struct neigh_table *tbl = state->tbl; 2148 struct neigh_table *tbl = state->tbl;
2122 struct neighbour *n = NULL; 2149 struct neighbour *n = NULL;
2123 int bucket = state->bucket; 2150 int bucket = state->bucket;
@@ -2127,6 +2154,8 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
2127 n = tbl->hash_buckets[bucket]; 2154 n = tbl->hash_buckets[bucket];
2128 2155
2129 while (n) { 2156 while (n) {
2157 if (n->dev->nd_net != net)
2158 goto next;
2130 if (state->neigh_sub_iter) { 2159 if (state->neigh_sub_iter) {
2131 loff_t fakep = 0; 2160 loff_t fakep = 0;
2132 void *v; 2161 void *v;
@@ -2156,6 +2185,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
2156 loff_t *pos) 2185 loff_t *pos)
2157{ 2186{
2158 struct neigh_seq_state *state = seq->private; 2187 struct neigh_seq_state *state = seq->private;
2188 struct net *net = state->p.net;
2159 struct neigh_table *tbl = state->tbl; 2189 struct neigh_table *tbl = state->tbl;
2160 2190
2161 if (state->neigh_sub_iter) { 2191 if (state->neigh_sub_iter) {
@@ -2167,6 +2197,8 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
2167 2197
2168 while (1) { 2198 while (1) {
2169 while (n) { 2199 while (n) {
2200 if (n->dev->nd_net != net)
2201 goto next;
2170 if (state->neigh_sub_iter) { 2202 if (state->neigh_sub_iter) {
2171 void *v = state->neigh_sub_iter(state, n, pos); 2203 void *v = state->neigh_sub_iter(state, n, pos);
2172 if (v) 2204 if (v)
@@ -2213,6 +2245,7 @@ static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2213static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) 2245static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2214{ 2246{
2215 struct neigh_seq_state *state = seq->private; 2247 struct neigh_seq_state *state = seq->private;
2248 struct net * net = state->p.net;
2216 struct neigh_table *tbl = state->tbl; 2249 struct neigh_table *tbl = state->tbl;
2217 struct pneigh_entry *pn = NULL; 2250 struct pneigh_entry *pn = NULL;
2218 int bucket = state->bucket; 2251 int bucket = state->bucket;
@@ -2220,6 +2253,8 @@ static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2220 state->flags |= NEIGH_SEQ_IS_PNEIGH; 2253 state->flags |= NEIGH_SEQ_IS_PNEIGH;
2221 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) { 2254 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2222 pn = tbl->phash_buckets[bucket]; 2255 pn = tbl->phash_buckets[bucket];
2256 while (pn && (pn->net != net))
2257 pn = pn->next;
2223 if (pn) 2258 if (pn)
2224 break; 2259 break;
2225 } 2260 }
@@ -2233,6 +2268,7 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2233 loff_t *pos) 2268 loff_t *pos)
2234{ 2269{
2235 struct neigh_seq_state *state = seq->private; 2270 struct neigh_seq_state *state = seq->private;
2271 struct net * net = state->p.net;
2236 struct neigh_table *tbl = state->tbl; 2272 struct neigh_table *tbl = state->tbl;
2237 2273
2238 pn = pn->next; 2274 pn = pn->next;
@@ -2240,6 +2276,8 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2240 if (++state->bucket > PNEIGH_HASHMASK) 2276 if (++state->bucket > PNEIGH_HASHMASK)
2241 break; 2277 break;
2242 pn = tbl->phash_buckets[state->bucket]; 2278 pn = tbl->phash_buckets[state->bucket];
2279 while (pn && (pn->net != net))
2280 pn = pn->next;
2243 if (pn) 2281 if (pn)
2244 break; 2282 break;
2245 } 2283 }
@@ -2277,6 +2315,7 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2277} 2315}
2278 2316
2279void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) 2317void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2318 __acquires(tbl->lock)
2280{ 2319{
2281 struct neigh_seq_state *state = seq->private; 2320 struct neigh_seq_state *state = seq->private;
2282 loff_t pos_minus_one; 2321 loff_t pos_minus_one;
@@ -2320,6 +2359,7 @@ out:
2320EXPORT_SYMBOL(neigh_seq_next); 2359EXPORT_SYMBOL(neigh_seq_next);
2321 2360
2322void neigh_seq_stop(struct seq_file *seq, void *v) 2361void neigh_seq_stop(struct seq_file *seq, void *v)
2362 __releases(tbl->lock)
2323{ 2363{
2324 struct neigh_seq_state *state = seq->private; 2364 struct neigh_seq_state *state = seq->private;
2325 struct neigh_table *tbl = state->tbl; 2365 struct neigh_table *tbl = state->tbl;
@@ -2441,6 +2481,7 @@ static inline size_t neigh_nlmsg_size(void)
2441 2481
2442static void __neigh_notify(struct neighbour *n, int type, int flags) 2482static void __neigh_notify(struct neighbour *n, int type, int flags)
2443{ 2483{
2484 struct net *net = n->dev->nd_net;
2444 struct sk_buff *skb; 2485 struct sk_buff *skb;
2445 int err = -ENOBUFS; 2486 int err = -ENOBUFS;
2446 2487
@@ -2455,10 +2496,10 @@ static void __neigh_notify(struct neighbour *n, int type, int flags)
2455 kfree_skb(skb); 2496 kfree_skb(skb);
2456 goto errout; 2497 goto errout;
2457 } 2498 }
2458 err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); 2499 err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2459errout: 2500errout:
2460 if (err < 0) 2501 if (err < 0)
2461 rtnl_set_sk_err(RTNLGRP_NEIGH, err); 2502 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2462} 2503}
2463 2504
2464#ifdef CONFIG_ARPD 2505#ifdef CONFIG_ARPD
@@ -2472,11 +2513,8 @@ void neigh_app_ns(struct neighbour *n)
2472 2513
2473static struct neigh_sysctl_table { 2514static struct neigh_sysctl_table {
2474 struct ctl_table_header *sysctl_header; 2515 struct ctl_table_header *sysctl_header;
2475 ctl_table neigh_vars[__NET_NEIGH_MAX]; 2516 struct ctl_table neigh_vars[__NET_NEIGH_MAX];
2476 ctl_table neigh_dev[2]; 2517 char *dev_name;
2477 ctl_table neigh_neigh_dir[2];
2478 ctl_table neigh_proto_dir[2];
2479 ctl_table neigh_root_dir[2];
2480} neigh_sysctl_template __read_mostly = { 2518} neigh_sysctl_template __read_mostly = {
2481 .neigh_vars = { 2519 .neigh_vars = {
2482 { 2520 {
@@ -2607,32 +2645,7 @@ static struct neigh_sysctl_table {
2607 .mode = 0644, 2645 .mode = 0644,
2608 .proc_handler = &proc_dointvec, 2646 .proc_handler = &proc_dointvec,
2609 }, 2647 },
2610 {} 2648 {},
2611 },
2612 .neigh_dev = {
2613 {
2614 .ctl_name = NET_PROTO_CONF_DEFAULT,
2615 .procname = "default",
2616 .mode = 0555,
2617 },
2618 },
2619 .neigh_neigh_dir = {
2620 {
2621 .procname = "neigh",
2622 .mode = 0555,
2623 },
2624 },
2625 .neigh_proto_dir = {
2626 {
2627 .mode = 0555,
2628 },
2629 },
2630 .neigh_root_dir = {
2631 {
2632 .ctl_name = CTL_NET,
2633 .procname = "net",
2634 .mode = 0555,
2635 },
2636 }, 2649 },
2637}; 2650};
2638 2651
@@ -2640,14 +2653,26 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2640 int p_id, int pdev_id, char *p_name, 2653 int p_id, int pdev_id, char *p_name,
2641 proc_handler *handler, ctl_handler *strategy) 2654 proc_handler *handler, ctl_handler *strategy)
2642{ 2655{
2643 struct neigh_sysctl_table *t = kmemdup(&neigh_sysctl_template, 2656 struct neigh_sysctl_table *t;
2644 sizeof(*t), GFP_KERNEL);
2645 const char *dev_name_source = NULL; 2657 const char *dev_name_source = NULL;
2646 char *dev_name = NULL;
2647 int err = 0;
2648 2658
2659#define NEIGH_CTL_PATH_ROOT 0
2660#define NEIGH_CTL_PATH_PROTO 1
2661#define NEIGH_CTL_PATH_NEIGH 2
2662#define NEIGH_CTL_PATH_DEV 3
2663
2664 struct ctl_path neigh_path[] = {
2665 { .procname = "net", .ctl_name = CTL_NET, },
2666 { .procname = "proto", .ctl_name = 0, },
2667 { .procname = "neigh", .ctl_name = 0, },
2668 { .procname = "default", .ctl_name = NET_PROTO_CONF_DEFAULT, },
2669 { },
2670 };
2671
2672 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2649 if (!t) 2673 if (!t)
2650 return -ENOBUFS; 2674 goto err;
2675
2651 t->neigh_vars[0].data = &p->mcast_probes; 2676 t->neigh_vars[0].data = &p->mcast_probes;
2652 t->neigh_vars[1].data = &p->ucast_probes; 2677 t->neigh_vars[1].data = &p->ucast_probes;
2653 t->neigh_vars[2].data = &p->app_probes; 2678 t->neigh_vars[2].data = &p->app_probes;
@@ -2665,11 +2690,11 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2665 2690
2666 if (dev) { 2691 if (dev) {
2667 dev_name_source = dev->name; 2692 dev_name_source = dev->name;
2668 t->neigh_dev[0].ctl_name = dev->ifindex; 2693 neigh_path[NEIGH_CTL_PATH_DEV].ctl_name = dev->ifindex;
2669 /* Terminate the table early */ 2694 /* Terminate the table early */
2670 memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14])); 2695 memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
2671 } else { 2696 } else {
2672 dev_name_source = t->neigh_dev[0].procname; 2697 dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2673 t->neigh_vars[14].data = (int *)(p + 1); 2698 t->neigh_vars[14].data = (int *)(p + 1);
2674 t->neigh_vars[15].data = (int *)(p + 1) + 1; 2699 t->neigh_vars[15].data = (int *)(p + 1) + 1;
2675 t->neigh_vars[16].data = (int *)(p + 1) + 2; 2700 t->neigh_vars[16].data = (int *)(p + 1) + 2;
@@ -2704,39 +2729,28 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2704 t->neigh_vars[13].ctl_name = CTL_UNNUMBERED; 2729 t->neigh_vars[13].ctl_name = CTL_UNNUMBERED;
2705 } 2730 }
2706 2731
2707 dev_name = kstrdup(dev_name_source, GFP_KERNEL); 2732 t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2708 if (!dev_name) { 2733 if (!t->dev_name)
2709 err = -ENOBUFS;
2710 goto free; 2734 goto free;
2711 }
2712
2713 t->neigh_dev[0].procname = dev_name;
2714
2715 t->neigh_neigh_dir[0].ctl_name = pdev_id;
2716 2735
2717 t->neigh_proto_dir[0].procname = p_name; 2736 neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2718 t->neigh_proto_dir[0].ctl_name = p_id; 2737 neigh_path[NEIGH_CTL_PATH_NEIGH].ctl_name = pdev_id;
2738 neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2739 neigh_path[NEIGH_CTL_PATH_PROTO].ctl_name = p_id;
2719 2740
2720 t->neigh_dev[0].child = t->neigh_vars; 2741 t->sysctl_header = register_sysctl_paths(neigh_path, t->neigh_vars);
2721 t->neigh_neigh_dir[0].child = t->neigh_dev; 2742 if (!t->sysctl_header)
2722 t->neigh_proto_dir[0].child = t->neigh_neigh_dir;
2723 t->neigh_root_dir[0].child = t->neigh_proto_dir;
2724
2725 t->sysctl_header = register_sysctl_table(t->neigh_root_dir);
2726 if (!t->sysctl_header) {
2727 err = -ENOBUFS;
2728 goto free_procname; 2743 goto free_procname;
2729 } 2744
2730 p->sysctl_table = t; 2745 p->sysctl_table = t;
2731 return 0; 2746 return 0;
2732 2747
2733 /* error path */ 2748free_procname:
2734 free_procname: 2749 kfree(t->dev_name);
2735 kfree(dev_name); 2750free:
2736 free:
2737 kfree(t); 2751 kfree(t);
2738 2752err:
2739 return err; 2753 return -ENOBUFS;
2740} 2754}
2741 2755
2742void neigh_sysctl_unregister(struct neigh_parms *p) 2756void neigh_sysctl_unregister(struct neigh_parms *p)
@@ -2745,7 +2759,7 @@ void neigh_sysctl_unregister(struct neigh_parms *p)
2745 struct neigh_sysctl_table *t = p->sysctl_table; 2759 struct neigh_sysctl_table *t = p->sysctl_table;
2746 p->sysctl_table = NULL; 2760 p->sysctl_table = NULL;
2747 unregister_sysctl_table(t->sysctl_header); 2761 unregister_sysctl_table(t->sysctl_header);
2748 kfree(t->neigh_dev[0].procname); 2762 kfree(t->dev_name);
2749 kfree(t); 2763 kfree(t);
2750 } 2764 }
2751} 2765}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 61ead1d11132..7635d3f72723 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -95,17 +95,6 @@ NETDEVICE_SHOW(type, fmt_dec);
95NETDEVICE_SHOW(link_mode, fmt_dec); 95NETDEVICE_SHOW(link_mode, fmt_dec);
96 96
97/* use same locking rules as GIFHWADDR ioctl's */ 97/* use same locking rules as GIFHWADDR ioctl's */
98static ssize_t format_addr(char *buf, const unsigned char *addr, int len)
99{
100 int i;
101 char *cp = buf;
102
103 for (i = 0; i < len; i++)
104 cp += sprintf(cp, "%02x%c", addr[i],
105 i == (len - 1) ? '\n' : ':');
106 return cp - buf;
107}
108
109static ssize_t show_address(struct device *dev, struct device_attribute *attr, 98static ssize_t show_address(struct device *dev, struct device_attribute *attr,
110 char *buf) 99 char *buf)
111{ 100{
@@ -114,7 +103,7 @@ static ssize_t show_address(struct device *dev, struct device_attribute *attr,
114 103
115 read_lock(&dev_base_lock); 104 read_lock(&dev_base_lock);
116 if (dev_isalive(net)) 105 if (dev_isalive(net))
117 ret = format_addr(buf, net->dev_addr, net->addr_len); 106 ret = sysfs_format_mac(buf, net->dev_addr, net->addr_len);
118 read_unlock(&dev_base_lock); 107 read_unlock(&dev_base_lock);
119 return ret; 108 return ret;
120} 109}
@@ -124,7 +113,7 @@ static ssize_t show_broadcast(struct device *dev,
124{ 113{
125 struct net_device *net = to_net_dev(dev); 114 struct net_device *net = to_net_dev(dev);
126 if (dev_isalive(net)) 115 if (dev_isalive(net))
127 return format_addr(buf, net->broadcast, net->addr_len); 116 return sysfs_format_mac(buf, net->broadcast, net->addr_len);
128 return -EINVAL; 117 return -EINVAL;
129} 118}
130 119
@@ -247,9 +236,8 @@ static ssize_t netstat_show(const struct device *d,
247 struct net_device_stats *stats; 236 struct net_device_stats *stats;
248 ssize_t ret = -EINVAL; 237 ssize_t ret = -EINVAL;
249 238
250 if (offset > sizeof(struct net_device_stats) || 239 WARN_ON(offset > sizeof(struct net_device_stats) ||
251 offset % sizeof(unsigned long) != 0) 240 offset % sizeof(unsigned long) != 0);
252 WARN_ON(1);
253 241
254 read_lock(&dev_base_lock); 242 read_lock(&dev_base_lock);
255 if (dev_isalive(dev) && dev->get_stats && 243 if (dev_isalive(dev) && dev->get_stats &&
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index ec936ae92458..26e941d912e8 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -58,6 +58,7 @@ out_undo:
58 58
59#ifdef CONFIG_NET_NS 59#ifdef CONFIG_NET_NS
60static struct kmem_cache *net_cachep; 60static struct kmem_cache *net_cachep;
61static struct workqueue_struct *netns_wq;
61 62
62static struct net *net_alloc(void) 63static struct net *net_alloc(void)
63{ 64{
@@ -149,7 +150,7 @@ void __put_net(struct net *net)
149{ 150{
150 /* Cleanup the network namespace in process context */ 151 /* Cleanup the network namespace in process context */
151 INIT_WORK(&net->work, cleanup_net); 152 INIT_WORK(&net->work, cleanup_net);
152 schedule_work(&net->work); 153 queue_work(netns_wq, &net->work);
153} 154}
154EXPORT_SYMBOL_GPL(__put_net); 155EXPORT_SYMBOL_GPL(__put_net);
155 156
@@ -171,7 +172,13 @@ static int __init net_ns_init(void)
171 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), 172 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
172 SMP_CACHE_BYTES, 173 SMP_CACHE_BYTES,
173 SLAB_PANIC, NULL); 174 SLAB_PANIC, NULL);
175
176 /* Create workqueue for cleanup */
177 netns_wq = create_singlethread_workqueue("netns");
178 if (!netns_wq)
179 panic("Could not create netns workq");
174#endif 180#endif
181
175 mutex_lock(&net_mutex); 182 mutex_lock(&net_mutex);
176 err = setup_net(&init_net); 183 err = setup_net(&init_net);
177 184
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c499b5c69bed..6faa128a4c8e 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -39,8 +39,6 @@ static struct sk_buff_head skb_pool;
39static atomic_t trapped; 39static atomic_t trapped;
40 40
41#define USEC_PER_POLL 50 41#define USEC_PER_POLL 50
42#define NETPOLL_RX_ENABLED 1
43#define NETPOLL_RX_DROP 2
44 42
45#define MAX_SKB_SIZE \ 43#define MAX_SKB_SIZE \
46 (MAX_UDP_CHUNK + sizeof(struct udphdr) + \ 44 (MAX_UDP_CHUNK + sizeof(struct udphdr) + \
@@ -128,27 +126,24 @@ static int poll_one_napi(struct netpoll_info *npinfo,
128 if (!test_bit(NAPI_STATE_SCHED, &napi->state)) 126 if (!test_bit(NAPI_STATE_SCHED, &napi->state))
129 return budget; 127 return budget;
130 128
131 npinfo->rx_flags |= NETPOLL_RX_DROP;
132 atomic_inc(&trapped); 129 atomic_inc(&trapped);
133 130
134 work = napi->poll(napi, budget); 131 work = napi->poll(napi, budget);
135 132
136 atomic_dec(&trapped); 133 atomic_dec(&trapped);
137 npinfo->rx_flags &= ~NETPOLL_RX_DROP;
138 134
139 return budget - work; 135 return budget - work;
140} 136}
141 137
142static void poll_napi(struct netpoll *np) 138static void poll_napi(struct net_device *dev)
143{ 139{
144 struct netpoll_info *npinfo = np->dev->npinfo;
145 struct napi_struct *napi; 140 struct napi_struct *napi;
146 int budget = 16; 141 int budget = 16;
147 142
148 list_for_each_entry(napi, &np->dev->napi_list, dev_list) { 143 list_for_each_entry(napi, &dev->napi_list, dev_list) {
149 if (napi->poll_owner != smp_processor_id() && 144 if (napi->poll_owner != smp_processor_id() &&
150 spin_trylock(&napi->poll_lock)) { 145 spin_trylock(&napi->poll_lock)) {
151 budget = poll_one_napi(npinfo, napi, budget); 146 budget = poll_one_napi(dev->npinfo, napi, budget);
152 spin_unlock(&napi->poll_lock); 147 spin_unlock(&napi->poll_lock);
153 148
154 if (!budget) 149 if (!budget)
@@ -159,30 +154,27 @@ static void poll_napi(struct netpoll *np)
159 154
160static void service_arp_queue(struct netpoll_info *npi) 155static void service_arp_queue(struct netpoll_info *npi)
161{ 156{
162 struct sk_buff *skb; 157 if (npi) {
163 158 struct sk_buff *skb;
164 if (unlikely(!npi))
165 return;
166
167 skb = skb_dequeue(&npi->arp_tx);
168 159
169 while (skb != NULL) { 160 while ((skb = skb_dequeue(&npi->arp_tx)))
170 arp_reply(skb); 161 arp_reply(skb);
171 skb = skb_dequeue(&npi->arp_tx);
172 } 162 }
173} 163}
174 164
175void netpoll_poll(struct netpoll *np) 165void netpoll_poll(struct netpoll *np)
176{ 166{
177 if (!np->dev || !netif_running(np->dev) || !np->dev->poll_controller) 167 struct net_device *dev = np->dev;
168
169 if (!dev || !netif_running(dev) || !dev->poll_controller)
178 return; 170 return;
179 171
180 /* Process pending work on NIC */ 172 /* Process pending work on NIC */
181 np->dev->poll_controller(np->dev); 173 dev->poll_controller(dev);
182 if (!list_empty(&np->dev->napi_list)) 174
183 poll_napi(np); 175 poll_napi(dev);
184 176
185 service_arp_queue(np->dev->npinfo); 177 service_arp_queue(dev->npinfo);
186 178
187 zap_completion_queue(); 179 zap_completion_queue();
188} 180}
@@ -364,8 +356,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
364 eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); 356 eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
365 skb_reset_mac_header(skb); 357 skb_reset_mac_header(skb);
366 skb->protocol = eth->h_proto = htons(ETH_P_IP); 358 skb->protocol = eth->h_proto = htons(ETH_P_IP);
367 memcpy(eth->h_source, np->local_mac, 6); 359 memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN);
368 memcpy(eth->h_dest, np->remote_mac, 6); 360 memcpy(eth->h_dest, np->remote_mac, ETH_ALEN);
369 361
370 skb->dev = np->dev; 362 skb->dev = np->dev;
371 363
@@ -418,7 +410,8 @@ static void arp_reply(struct sk_buff *skb)
418 memcpy(&tip, arp_ptr, 4); 410 memcpy(&tip, arp_ptr, 4);
419 411
420 /* Should we ignore arp? */ 412 /* Should we ignore arp? */
421 if (tip != htonl(np->local_ip) || LOOPBACK(tip) || MULTICAST(tip)) 413 if (tip != htonl(np->local_ip) ||
414 ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
422 return; 415 return;
423 416
424 size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4); 417 size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4);
@@ -435,7 +428,7 @@ static void arp_reply(struct sk_buff *skb)
435 428
436 /* Fill the device header for the ARP frame */ 429 /* Fill the device header for the ARP frame */
437 if (dev_hard_header(send_skb, skb->dev, ptype, 430 if (dev_hard_header(send_skb, skb->dev, ptype,
438 sha, np->local_mac, 431 sha, np->dev->dev_addr,
439 send_skb->len) < 0) { 432 send_skb->len) < 0) {
440 kfree_skb(send_skb); 433 kfree_skb(send_skb);
441 return; 434 return;
@@ -479,7 +472,7 @@ int __netpoll_rx(struct sk_buff *skb)
479 if (skb->dev->type != ARPHRD_ETHER) 472 if (skb->dev->type != ARPHRD_ETHER)
480 goto out; 473 goto out;
481 474
482 /* check if netpoll clients need ARP */ 475 /* if receive ARP during middle of NAPI poll, then queue */
483 if (skb->protocol == htons(ETH_P_ARP) && 476 if (skb->protocol == htons(ETH_P_ARP) &&
484 atomic_read(&trapped)) { 477 atomic_read(&trapped)) {
485 skb_queue_tail(&npi->arp_tx, skb); 478 skb_queue_tail(&npi->arp_tx, skb);
@@ -541,6 +534,9 @@ int __netpoll_rx(struct sk_buff *skb)
541 return 1; 534 return 1;
542 535
543out: 536out:
537 /* If packet received while already in poll then just
538 * silently drop.
539 */
544 if (atomic_read(&trapped)) { 540 if (atomic_read(&trapped)) {
545 kfree_skb(skb); 541 kfree_skb(skb);
546 return 1; 542 return 1;
@@ -679,7 +675,6 @@ int netpoll_setup(struct netpoll *np)
679 goto release; 675 goto release;
680 } 676 }
681 677
682 npinfo->rx_flags = 0;
683 npinfo->rx_np = NULL; 678 npinfo->rx_np = NULL;
684 679
685 spin_lock_init(&npinfo->rx_lock); 680 spin_lock_init(&npinfo->rx_lock);
@@ -741,9 +736,6 @@ int netpoll_setup(struct netpoll *np)
741 } 736 }
742 } 737 }
743 738
744 if (is_zero_ether_addr(np->local_mac) && ndev->dev_addr)
745 memcpy(np->local_mac, ndev->dev_addr, 6);
746
747 if (!np->local_ip) { 739 if (!np->local_ip) {
748 rcu_read_lock(); 740 rcu_read_lock();
749 in_dev = __in_dev_get_rcu(ndev); 741 in_dev = __in_dev_get_rcu(ndev);
@@ -764,7 +756,6 @@ int netpoll_setup(struct netpoll *np)
764 756
765 if (np->rx_hook) { 757 if (np->rx_hook) {
766 spin_lock_irqsave(&npinfo->rx_lock, flags); 758 spin_lock_irqsave(&npinfo->rx_lock, flags);
767 npinfo->rx_flags |= NETPOLL_RX_ENABLED;
768 npinfo->rx_np = np; 759 npinfo->rx_np = np;
769 spin_unlock_irqrestore(&npinfo->rx_lock, flags); 760 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
770 } 761 }
@@ -806,7 +797,6 @@ void netpoll_cleanup(struct netpoll *np)
806 if (npinfo->rx_np == np) { 797 if (npinfo->rx_np == np) {
807 spin_lock_irqsave(&npinfo->rx_lock, flags); 798 spin_lock_irqsave(&npinfo->rx_lock, flags);
808 npinfo->rx_np = NULL; 799 npinfo->rx_np = NULL;
809 npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
810 spin_unlock_irqrestore(&npinfo->rx_lock, flags); 800 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
811 } 801 }
812 802
@@ -816,11 +806,7 @@ void netpoll_cleanup(struct netpoll *np)
816 cancel_rearming_delayed_work(&npinfo->tx_work); 806 cancel_rearming_delayed_work(&npinfo->tx_work);
817 807
818 /* clean after last, unfinished work */ 808 /* clean after last, unfinished work */
819 if (!skb_queue_empty(&npinfo->txq)) { 809 __skb_queue_purge(&npinfo->txq);
820 struct sk_buff *skb;
821 skb = __skb_dequeue(&npinfo->txq);
822 kfree_skb(skb);
823 }
824 kfree(npinfo); 810 kfree(npinfo);
825 np->dev->npinfo = NULL; 811 np->dev->npinfo = NULL;
826 } 812 }
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 285ec3ed9b37..eebccdbdbaca 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -397,62 +397,6 @@ struct pktgen_thread {
397#define REMOVE 1 397#define REMOVE 1
398#define FIND 0 398#define FIND 0
399 399
400/* This code works around the fact that do_div cannot handle two 64-bit
401 numbers, and regular 64-bit division doesn't work on x86 kernels.
402 --Ben
403*/
404
405#define PG_DIV 0
406
407/* This was emailed to LMKL by: Chris Caputo <ccaputo@alt.net>
408 * Function copied/adapted/optimized from:
409 *
410 * nemesis.sourceforge.net/browse/lib/static/intmath/ix86/intmath.c.html
411 *
412 * Copyright 1994, University of Cambridge Computer Laboratory
413 * All Rights Reserved.
414 *
415 */
416static inline s64 divremdi3(s64 x, s64 y, int type)
417{
418 u64 a = (x < 0) ? -x : x;
419 u64 b = (y < 0) ? -y : y;
420 u64 res = 0, d = 1;
421
422 if (b > 0) {
423 while (b < a) {
424 b <<= 1;
425 d <<= 1;
426 }
427 }
428
429 do {
430 if (a >= b) {
431 a -= b;
432 res += d;
433 }
434 b >>= 1;
435 d >>= 1;
436 }
437 while (d);
438
439 if (PG_DIV == type) {
440 return (((x ^ y) & (1ll << 63)) == 0) ? res : -(s64) res;
441 } else {
442 return ((x & (1ll << 63)) == 0) ? a : -(s64) a;
443 }
444}
445
446/* End of hacks to deal with 64-bit math on x86 */
447
448/** Convert to milliseconds */
449static inline __u64 tv_to_ms(const struct timeval *tv)
450{
451 __u64 ms = tv->tv_usec / 1000;
452 ms += (__u64) tv->tv_sec * (__u64) 1000;
453 return ms;
454}
455
456/** Convert to micro-seconds */ 400/** Convert to micro-seconds */
457static inline __u64 tv_to_us(const struct timeval *tv) 401static inline __u64 tv_to_us(const struct timeval *tv)
458{ 402{
@@ -461,51 +405,13 @@ static inline __u64 tv_to_us(const struct timeval *tv)
461 return us; 405 return us;
462} 406}
463 407
464static inline __u64 pg_div(__u64 n, __u32 base) 408static __u64 getCurUs(void)
465{
466 __u64 tmp = n;
467 do_div(tmp, base);
468 /* printk("pktgen: pg_div, n: %llu base: %d rv: %llu\n",
469 n, base, tmp); */
470 return tmp;
471}
472
473static inline __u64 pg_div64(__u64 n, __u64 base)
474{
475 __u64 tmp = n;
476/*
477 * How do we know if the architecture we are running on
478 * supports division with 64 bit base?
479 *
480 */
481#if defined(__sparc_v9__) || defined(__powerpc64__) || defined(__alpha__) || defined(__x86_64__) || defined(__ia64__)
482
483 do_div(tmp, base);
484#else
485 tmp = divremdi3(n, base, PG_DIV);
486#endif
487 return tmp;
488}
489
490static inline __u64 getCurMs(void)
491{
492 struct timeval tv;
493 do_gettimeofday(&tv);
494 return tv_to_ms(&tv);
495}
496
497static inline __u64 getCurUs(void)
498{ 409{
499 struct timeval tv; 410 struct timeval tv;
500 do_gettimeofday(&tv); 411 do_gettimeofday(&tv);
501 return tv_to_us(&tv); 412 return tv_to_us(&tv);
502} 413}
503 414
504static inline __u64 tv_diff(const struct timeval *a, const struct timeval *b)
505{
506 return tv_to_us(a) - tv_to_us(b);
507}
508
509/* old include end */ 415/* old include end */
510 416
511static char version[] __initdata = VERSION; 417static char version[] __initdata = VERSION;
@@ -2358,9 +2264,11 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2358 t = random32() % (imx - imn) + imn; 2264 t = random32() % (imx - imn) + imn;
2359 s = htonl(t); 2265 s = htonl(t);
2360 2266
2361 while (LOOPBACK(s) || MULTICAST(s) 2267 while (ipv4_is_loopback(s) ||
2362 || BADCLASS(s) || ZERONET(s) 2268 ipv4_is_multicast(s) ||
2363 || LOCAL_MCAST(s)) { 2269 ipv4_is_lbcast(s) ||
2270 ipv4_is_zeronet(s) ||
2271 ipv4_is_local_multicast(s)) {
2364 t = random32() % (imx - imn) + imn; 2272 t = random32() % (imx - imn) + imn;
2365 s = htonl(t); 2273 s = htonl(t);
2366 } 2274 }
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 45aed75cb571..2d3035d3abd7 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -69,8 +69,6 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
69 return 0; 69 return 0;
70} 70}
71 71
72EXPORT_SYMBOL(reqsk_queue_alloc);
73
74void __reqsk_queue_destroy(struct request_sock_queue *queue) 72void __reqsk_queue_destroy(struct request_sock_queue *queue)
75{ 73{
76 struct listen_sock *lopt; 74 struct listen_sock *lopt;
@@ -91,8 +89,6 @@ void __reqsk_queue_destroy(struct request_sock_queue *queue)
91 kfree(lopt); 89 kfree(lopt);
92} 90}
93 91
94EXPORT_SYMBOL(__reqsk_queue_destroy);
95
96static inline struct listen_sock *reqsk_queue_yank_listen_sk( 92static inline struct listen_sock *reqsk_queue_yank_listen_sk(
97 struct request_sock_queue *queue) 93 struct request_sock_queue *queue)
98{ 94{
@@ -134,4 +130,3 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
134 kfree(lopt); 130 kfree(lopt);
135} 131}
136 132
137EXPORT_SYMBOL(reqsk_queue_destroy);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index fed95a323b28..ddbdde82a700 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -60,7 +60,6 @@ struct rtnl_link
60}; 60};
61 61
62static DEFINE_MUTEX(rtnl_mutex); 62static DEFINE_MUTEX(rtnl_mutex);
63static struct sock *rtnl;
64 63
65void rtnl_lock(void) 64void rtnl_lock(void)
66{ 65{
@@ -458,8 +457,9 @@ size_t rtattr_strlcpy(char *dest, const struct rtattr *rta, size_t size)
458 return ret; 457 return ret;
459} 458}
460 459
461int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) 460int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo)
462{ 461{
462 struct sock *rtnl = net->rtnl;
463 int err = 0; 463 int err = 0;
464 464
465 NETLINK_CB(skb).dst_group = group; 465 NETLINK_CB(skb).dst_group = group;
@@ -471,14 +471,17 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
471 return err; 471 return err;
472} 472}
473 473
474int rtnl_unicast(struct sk_buff *skb, u32 pid) 474int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid)
475{ 475{
476 struct sock *rtnl = net->rtnl;
477
476 return nlmsg_unicast(rtnl, skb, pid); 478 return nlmsg_unicast(rtnl, skb, pid);
477} 479}
478 480
479int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, 481int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
480 struct nlmsghdr *nlh, gfp_t flags) 482 struct nlmsghdr *nlh, gfp_t flags)
481{ 483{
484 struct sock *rtnl = net->rtnl;
482 int report = 0; 485 int report = 0;
483 486
484 if (nlh) 487 if (nlh)
@@ -487,8 +490,10 @@ int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
487 return nlmsg_notify(rtnl, skb, pid, group, report, flags); 490 return nlmsg_notify(rtnl, skb, pid, group, report, flags);
488} 491}
489 492
490void rtnl_set_sk_err(u32 group, int error) 493void rtnl_set_sk_err(struct net *net, u32 group, int error)
491{ 494{
495 struct sock *rtnl = net->rtnl;
496
492 netlink_set_err(rtnl, 0, group, error); 497 netlink_set_err(rtnl, 0, group, error);
493} 498}
494 499
@@ -1186,7 +1191,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1186 kfree_skb(nskb); 1191 kfree_skb(nskb);
1187 goto errout; 1192 goto errout;
1188 } 1193 }
1189 err = rtnl_unicast(nskb, NETLINK_CB(skb).pid); 1194 err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid);
1190errout: 1195errout:
1191 dev_put(dev); 1196 dev_put(dev);
1192 1197
@@ -1219,6 +1224,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
1219 1224
1220void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) 1225void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
1221{ 1226{
1227 struct net *net = dev->nd_net;
1222 struct sk_buff *skb; 1228 struct sk_buff *skb;
1223 int err = -ENOBUFS; 1229 int err = -ENOBUFS;
1224 1230
@@ -1233,10 +1239,10 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
1233 kfree_skb(skb); 1239 kfree_skb(skb);
1234 goto errout; 1240 goto errout;
1235 } 1241 }
1236 err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); 1242 err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
1237errout: 1243errout:
1238 if (err < 0) 1244 if (err < 0)
1239 rtnl_set_sk_err(RTNLGRP_LINK, err); 1245 rtnl_set_sk_err(net, RTNLGRP_LINK, err);
1240} 1246}
1241 1247
1242/* Protected by RTNL sempahore. */ 1248/* Protected by RTNL sempahore. */
@@ -1247,6 +1253,7 @@ static int rtattr_max;
1247 1253
1248static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) 1254static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
1249{ 1255{
1256 struct net *net = skb->sk->sk_net;
1250 rtnl_doit_func doit; 1257 rtnl_doit_func doit;
1251 int sz_idx, kind; 1258 int sz_idx, kind;
1252 int min_len; 1259 int min_len;
@@ -1275,6 +1282,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
1275 return -EPERM; 1282 return -EPERM;
1276 1283
1277 if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { 1284 if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
1285 struct sock *rtnl;
1278 rtnl_dumpit_func dumpit; 1286 rtnl_dumpit_func dumpit;
1279 1287
1280 dumpit = rtnl_get_dumpit(family, type); 1288 dumpit = rtnl_get_dumpit(family, type);
@@ -1282,6 +1290,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
1282 return -EOPNOTSUPP; 1290 return -EOPNOTSUPP;
1283 1291
1284 __rtnl_unlock(); 1292 __rtnl_unlock();
1293 rtnl = net->rtnl;
1285 err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL); 1294 err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
1286 rtnl_lock(); 1295 rtnl_lock();
1287 return err; 1296 return err;
@@ -1326,9 +1335,6 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
1326{ 1335{
1327 struct net_device *dev = ptr; 1336 struct net_device *dev = ptr;
1328 1337
1329 if (dev->nd_net != &init_net)
1330 return NOTIFY_DONE;
1331
1332 switch (event) { 1338 switch (event) {
1333 case NETDEV_UNREGISTER: 1339 case NETDEV_UNREGISTER:
1334 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); 1340 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
@@ -1354,6 +1360,29 @@ static struct notifier_block rtnetlink_dev_notifier = {
1354 .notifier_call = rtnetlink_event, 1360 .notifier_call = rtnetlink_event,
1355}; 1361};
1356 1362
1363
1364static int rtnetlink_net_init(struct net *net)
1365{
1366 struct sock *sk;
1367 sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX,
1368 rtnetlink_rcv, &rtnl_mutex, THIS_MODULE);
1369 if (!sk)
1370 return -ENOMEM;
1371 net->rtnl = sk;
1372 return 0;
1373}
1374
1375static void rtnetlink_net_exit(struct net *net)
1376{
1377 netlink_kernel_release(net->rtnl);
1378 net->rtnl = NULL;
1379}
1380
1381static struct pernet_operations rtnetlink_net_ops = {
1382 .init = rtnetlink_net_init,
1383 .exit = rtnetlink_net_exit,
1384};
1385
1357void __init rtnetlink_init(void) 1386void __init rtnetlink_init(void)
1358{ 1387{
1359 int i; 1388 int i;
@@ -1366,10 +1395,9 @@ void __init rtnetlink_init(void)
1366 if (!rta_buf) 1395 if (!rta_buf)
1367 panic("rtnetlink_init: cannot allocate rta_buf\n"); 1396 panic("rtnetlink_init: cannot allocate rta_buf\n");
1368 1397
1369 rtnl = netlink_kernel_create(&init_net, NETLINK_ROUTE, RTNLGRP_MAX, 1398 if (register_pernet_subsys(&rtnetlink_net_ops))
1370 rtnetlink_rcv, &rtnl_mutex, THIS_MODULE);
1371 if (rtnl == NULL)
1372 panic("rtnetlink_init: cannot initialize rtnetlink\n"); 1399 panic("rtnetlink_init: cannot initialize rtnetlink\n");
1400
1373 netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); 1401 netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
1374 register_netdevice_notifier(&rtnetlink_dev_notifier); 1402 register_netdevice_notifier(&rtnetlink_dev_notifier);
1375 1403
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b6283779e93d..98420f9c4b6d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -52,6 +52,7 @@
52#endif 52#endif
53#include <linux/string.h> 53#include <linux/string.h>
54#include <linux/skbuff.h> 54#include <linux/skbuff.h>
55#include <linux/splice.h>
55#include <linux/cache.h> 56#include <linux/cache.h>
56#include <linux/rtnetlink.h> 57#include <linux/rtnetlink.h>
57#include <linux/init.h> 58#include <linux/init.h>
@@ -71,6 +72,40 @@
71static struct kmem_cache *skbuff_head_cache __read_mostly; 72static struct kmem_cache *skbuff_head_cache __read_mostly;
72static struct kmem_cache *skbuff_fclone_cache __read_mostly; 73static struct kmem_cache *skbuff_fclone_cache __read_mostly;
73 74
75static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
76 struct pipe_buffer *buf)
77{
78 struct sk_buff *skb = (struct sk_buff *) buf->private;
79
80 kfree_skb(skb);
81}
82
83static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
84 struct pipe_buffer *buf)
85{
86 struct sk_buff *skb = (struct sk_buff *) buf->private;
87
88 skb_get(skb);
89}
90
91static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
92 struct pipe_buffer *buf)
93{
94 return 1;
95}
96
97
98/* Pipe buffer operations for a socket. */
99static struct pipe_buf_operations sock_pipe_buf_ops = {
100 .can_merge = 0,
101 .map = generic_pipe_buf_map,
102 .unmap = generic_pipe_buf_unmap,
103 .confirm = generic_pipe_buf_confirm,
104 .release = sock_pipe_buf_release,
105 .steal = sock_pipe_buf_steal,
106 .get = sock_pipe_buf_get,
107};
108
74/* 109/*
75 * Keep out-of-line to prevent kernel bloat. 110 * Keep out-of-line to prevent kernel bloat.
76 * __builtin_return_address is not used because it is not always 111 * __builtin_return_address is not used because it is not always
@@ -1122,6 +1157,217 @@ fault:
1122 return -EFAULT; 1157 return -EFAULT;
1123} 1158}
1124 1159
1160/*
1161 * Callback from splice_to_pipe(), if we need to release some pages
1162 * at the end of the spd in case we error'ed out in filling the pipe.
1163 */
1164static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
1165{
1166 struct sk_buff *skb = (struct sk_buff *) spd->partial[i].private;
1167
1168 kfree_skb(skb);
1169}
1170
1171/*
1172 * Fill page/offset/length into spd, if it can hold more pages.
1173 */
1174static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
1175 unsigned int len, unsigned int offset,
1176 struct sk_buff *skb)
1177{
1178 if (unlikely(spd->nr_pages == PIPE_BUFFERS))
1179 return 1;
1180
1181 spd->pages[spd->nr_pages] = page;
1182 spd->partial[spd->nr_pages].len = len;
1183 spd->partial[spd->nr_pages].offset = offset;
1184 spd->partial[spd->nr_pages].private = (unsigned long) skb_get(skb);
1185 spd->nr_pages++;
1186 return 0;
1187}
1188
1189/*
1190 * Map linear and fragment data from the skb to spd. Returns number of
1191 * pages mapped.
1192 */
1193static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
1194 unsigned int *total_len,
1195 struct splice_pipe_desc *spd)
1196{
1197 unsigned int nr_pages = spd->nr_pages;
1198 unsigned int poff, plen, len, toff, tlen;
1199 int headlen, seg;
1200
1201 toff = *offset;
1202 tlen = *total_len;
1203 if (!tlen)
1204 goto err;
1205
1206 /*
1207 * if the offset is greater than the linear part, go directly to
1208 * the fragments.
1209 */
1210 headlen = skb_headlen(skb);
1211 if (toff >= headlen) {
1212 toff -= headlen;
1213 goto map_frag;
1214 }
1215
1216 /*
1217 * first map the linear region into the pages/partial map, skipping
1218 * any potential initial offset.
1219 */
1220 len = 0;
1221 while (len < headlen) {
1222 void *p = skb->data + len;
1223
1224 poff = (unsigned long) p & (PAGE_SIZE - 1);
1225 plen = min_t(unsigned int, headlen - len, PAGE_SIZE - poff);
1226 len += plen;
1227
1228 if (toff) {
1229 if (plen <= toff) {
1230 toff -= plen;
1231 continue;
1232 }
1233 plen -= toff;
1234 poff += toff;
1235 toff = 0;
1236 }
1237
1238 plen = min(plen, tlen);
1239 if (!plen)
1240 break;
1241
1242 /*
1243 * just jump directly to update and return, no point
1244 * in going over fragments when the output is full.
1245 */
1246 if (spd_fill_page(spd, virt_to_page(p), plen, poff, skb))
1247 goto done;
1248
1249 tlen -= plen;
1250 }
1251
1252 /*
1253 * then map the fragments
1254 */
1255map_frag:
1256 for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
1257 const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
1258
1259 plen = f->size;
1260 poff = f->page_offset;
1261
1262 if (toff) {
1263 if (plen <= toff) {
1264 toff -= plen;
1265 continue;
1266 }
1267 plen -= toff;
1268 poff += toff;
1269 toff = 0;
1270 }
1271
1272 plen = min(plen, tlen);
1273 if (!plen)
1274 break;
1275
1276 if (spd_fill_page(spd, f->page, plen, poff, skb))
1277 break;
1278
1279 tlen -= plen;
1280 }
1281
1282done:
1283 if (spd->nr_pages - nr_pages) {
1284 *offset = 0;
1285 *total_len = tlen;
1286 return 0;
1287 }
1288err:
1289 return 1;
1290}
1291
1292/*
1293 * Map data from the skb to a pipe. Should handle both the linear part,
1294 * the fragments, and the frag list. It does NOT handle frag lists within
1295 * the frag list, if such a thing exists. We'd probably need to recurse to
1296 * handle that cleanly.
1297 */
1298int skb_splice_bits(struct sk_buff *__skb, unsigned int offset,
1299 struct pipe_inode_info *pipe, unsigned int tlen,
1300 unsigned int flags)
1301{
1302 struct partial_page partial[PIPE_BUFFERS];
1303 struct page *pages[PIPE_BUFFERS];
1304 struct splice_pipe_desc spd = {
1305 .pages = pages,
1306 .partial = partial,
1307 .flags = flags,
1308 .ops = &sock_pipe_buf_ops,
1309 .spd_release = sock_spd_release,
1310 };
1311 struct sk_buff *skb;
1312
1313 /*
1314 * I'd love to avoid the clone here, but tcp_read_sock()
1315 * ignores reference counts and unconditonally kills the sk_buff
1316 * on return from the actor.
1317 */
1318 skb = skb_clone(__skb, GFP_KERNEL);
1319 if (unlikely(!skb))
1320 return -ENOMEM;
1321
1322 /*
1323 * __skb_splice_bits() only fails if the output has no room left,
1324 * so no point in going over the frag_list for the error case.
1325 */
1326 if (__skb_splice_bits(skb, &offset, &tlen, &spd))
1327 goto done;
1328 else if (!tlen)
1329 goto done;
1330
1331 /*
1332 * now see if we have a frag_list to map
1333 */
1334 if (skb_shinfo(skb)->frag_list) {
1335 struct sk_buff *list = skb_shinfo(skb)->frag_list;
1336
1337 for (; list && tlen; list = list->next) {
1338 if (__skb_splice_bits(list, &offset, &tlen, &spd))
1339 break;
1340 }
1341 }
1342
1343done:
1344 /*
1345 * drop our reference to the clone, the pipe consumption will
1346 * drop the rest.
1347 */
1348 kfree_skb(skb);
1349
1350 if (spd.nr_pages) {
1351 int ret;
1352
1353 /*
1354 * Drop the socket lock, otherwise we have reverse
1355 * locking dependencies between sk_lock and i_mutex
1356 * here as compared to sendfile(). We enter here
1357 * with the socket lock held, and splice_to_pipe() will
1358 * grab the pipe inode lock. For sendfile() emulation,
1359 * we call into ->sendpage() with the i_mutex lock held
1360 * and networking will grab the socket lock.
1361 */
1362 release_sock(__skb->sk);
1363 ret = splice_to_pipe(pipe, &spd);
1364 lock_sock(__skb->sk);
1365 return ret;
1366 }
1367
1368 return 0;
1369}
1370
1125/** 1371/**
1126 * skb_store_bits - store bits from kernel buffer to skb 1372 * skb_store_bits - store bits from kernel buffer to skb
1127 * @skb: destination buffer 1373 * @skb: destination buffer
diff --git a/net/core/sock.c b/net/core/sock.c
index c519b439b8b1..1c4b1cd16d65 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -154,7 +154,7 @@ static const char *af_family_key_strings[AF_MAX+1] = {
154 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" , 154 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
155 "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" , 155 "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
156 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , 156 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
157 "sk_lock-27" , "sk_lock-28" , "sk_lock-29" , 157 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
158 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , 158 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
159 "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX" 159 "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
160}; 160};
@@ -168,7 +168,7 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = {
168 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" , 168 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
169 "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" , 169 "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" ,
170 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , 170 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
171 "slock-27" , "slock-28" , "slock-29" , 171 "slock-27" , "slock-28" , "slock-AF_CAN" ,
172 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , 172 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
173 "slock-AF_RXRPC" , "slock-AF_MAX" 173 "slock-AF_RXRPC" , "slock-AF_MAX"
174}; 174};
@@ -282,6 +282,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
282 if (err) 282 if (err)
283 goto out; 283 goto out;
284 284
285 if (!sk_rmem_schedule(sk, skb->truesize)) {
286 err = -ENOBUFS;
287 goto out;
288 }
289
285 skb->dev = NULL; 290 skb->dev = NULL;
286 skb_set_owner_r(skb, sk); 291 skb_set_owner_r(skb, sk);
287 292
@@ -419,6 +424,14 @@ out:
419 return ret; 424 return ret;
420} 425}
421 426
427static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
428{
429 if (valbool)
430 sock_set_flag(sk, bit);
431 else
432 sock_reset_flag(sk, bit);
433}
434
422/* 435/*
423 * This is meant for all protocols to use and covers goings on 436 * This is meant for all protocols to use and covers goings on
424 * at the socket level. Everything here is generic. 437 * at the socket level. Everything here is generic.
@@ -463,11 +476,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
463 case SO_DEBUG: 476 case SO_DEBUG:
464 if (val && !capable(CAP_NET_ADMIN)) { 477 if (val && !capable(CAP_NET_ADMIN)) {
465 ret = -EACCES; 478 ret = -EACCES;
466 } 479 } else
467 else if (valbool) 480 sock_valbool_flag(sk, SOCK_DBG, valbool);
468 sock_set_flag(sk, SOCK_DBG);
469 else
470 sock_reset_flag(sk, SOCK_DBG);
471 break; 481 break;
472 case SO_REUSEADDR: 482 case SO_REUSEADDR:
473 sk->sk_reuse = valbool; 483 sk->sk_reuse = valbool;
@@ -477,10 +487,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
477 ret = -ENOPROTOOPT; 487 ret = -ENOPROTOOPT;
478 break; 488 break;
479 case SO_DONTROUTE: 489 case SO_DONTROUTE:
480 if (valbool) 490 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
481 sock_set_flag(sk, SOCK_LOCALROUTE);
482 else
483 sock_reset_flag(sk, SOCK_LOCALROUTE);
484 break; 491 break;
485 case SO_BROADCAST: 492 case SO_BROADCAST:
486 sock_valbool_flag(sk, SOCK_BROADCAST, valbool); 493 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
@@ -1105,7 +1112,9 @@ void sock_rfree(struct sk_buff *skb)
1105{ 1112{
1106 struct sock *sk = skb->sk; 1113 struct sock *sk = skb->sk;
1107 1114
1115 skb_truesize_check(skb);
1108 atomic_sub(skb->truesize, &sk->sk_rmem_alloc); 1116 atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1117 sk_mem_uncharge(skb->sk, skb->truesize);
1109} 1118}
1110 1119
1111 1120
@@ -1382,6 +1391,103 @@ int sk_wait_data(struct sock *sk, long *timeo)
1382 1391
1383EXPORT_SYMBOL(sk_wait_data); 1392EXPORT_SYMBOL(sk_wait_data);
1384 1393
1394/**
1395 * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
1396 * @sk: socket
1397 * @size: memory size to allocate
1398 * @kind: allocation type
1399 *
1400 * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
1401 * rmem allocation. This function assumes that protocols which have
1402 * memory_pressure use sk_wmem_queued as write buffer accounting.
1403 */
1404int __sk_mem_schedule(struct sock *sk, int size, int kind)
1405{
1406 struct proto *prot = sk->sk_prot;
1407 int amt = sk_mem_pages(size);
1408 int allocated;
1409
1410 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
1411 allocated = atomic_add_return(amt, prot->memory_allocated);
1412
1413 /* Under limit. */
1414 if (allocated <= prot->sysctl_mem[0]) {
1415 if (prot->memory_pressure && *prot->memory_pressure)
1416 *prot->memory_pressure = 0;
1417 return 1;
1418 }
1419
1420 /* Under pressure. */
1421 if (allocated > prot->sysctl_mem[1])
1422 if (prot->enter_memory_pressure)
1423 prot->enter_memory_pressure();
1424
1425 /* Over hard limit. */
1426 if (allocated > prot->sysctl_mem[2])
1427 goto suppress_allocation;
1428
1429 /* guarantee minimum buffer size under pressure */
1430 if (kind == SK_MEM_RECV) {
1431 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
1432 return 1;
1433 } else { /* SK_MEM_SEND */
1434 if (sk->sk_type == SOCK_STREAM) {
1435 if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
1436 return 1;
1437 } else if (atomic_read(&sk->sk_wmem_alloc) <
1438 prot->sysctl_wmem[0])
1439 return 1;
1440 }
1441
1442 if (prot->memory_pressure) {
1443 if (!*prot->memory_pressure ||
1444 prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) *
1445 sk_mem_pages(sk->sk_wmem_queued +
1446 atomic_read(&sk->sk_rmem_alloc) +
1447 sk->sk_forward_alloc))
1448 return 1;
1449 }
1450
1451suppress_allocation:
1452
1453 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
1454 sk_stream_moderate_sndbuf(sk);
1455
1456 /* Fail only if socket is _under_ its sndbuf.
1457 * In this case we cannot block, so that we have to fail.
1458 */
1459 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
1460 return 1;
1461 }
1462
1463 /* Alas. Undo changes. */
1464 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
1465 atomic_sub(amt, prot->memory_allocated);
1466 return 0;
1467}
1468
1469EXPORT_SYMBOL(__sk_mem_schedule);
1470
1471/**
1472 * __sk_reclaim - reclaim memory_allocated
1473 * @sk: socket
1474 */
1475void __sk_mem_reclaim(struct sock *sk)
1476{
1477 struct proto *prot = sk->sk_prot;
1478
1479 atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
1480 prot->memory_allocated);
1481 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
1482
1483 if (prot->memory_pressure && *prot->memory_pressure &&
1484 (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
1485 *prot->memory_pressure = 0;
1486}
1487
1488EXPORT_SYMBOL(__sk_mem_reclaim);
1489
1490
1385/* 1491/*
1386 * Set of default routines for initialising struct proto_ops when 1492 * Set of default routines for initialising struct proto_ops when
1387 * the protocol does not support a particular function. In certain 1493 * the protocol does not support a particular function. In certain
@@ -1496,7 +1602,7 @@ static void sock_def_error_report(struct sock *sk)
1496 read_lock(&sk->sk_callback_lock); 1602 read_lock(&sk->sk_callback_lock);
1497 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 1603 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1498 wake_up_interruptible(sk->sk_sleep); 1604 wake_up_interruptible(sk->sk_sleep);
1499 sk_wake_async(sk,0,POLL_ERR); 1605 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
1500 read_unlock(&sk->sk_callback_lock); 1606 read_unlock(&sk->sk_callback_lock);
1501} 1607}
1502 1608
@@ -1505,7 +1611,7 @@ static void sock_def_readable(struct sock *sk, int len)
1505 read_lock(&sk->sk_callback_lock); 1611 read_lock(&sk->sk_callback_lock);
1506 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 1612 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1507 wake_up_interruptible(sk->sk_sleep); 1613 wake_up_interruptible(sk->sk_sleep);
1508 sk_wake_async(sk,1,POLL_IN); 1614 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
1509 read_unlock(&sk->sk_callback_lock); 1615 read_unlock(&sk->sk_callback_lock);
1510} 1616}
1511 1617
@@ -1522,7 +1628,7 @@ static void sock_def_write_space(struct sock *sk)
1522 1628
1523 /* Should agree with poll, otherwise some programs break */ 1629 /* Should agree with poll, otherwise some programs break */
1524 if (sock_writeable(sk)) 1630 if (sock_writeable(sk))
1525 sk_wake_async(sk, 2, POLL_OUT); 1631 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1526 } 1632 }
1527 1633
1528 read_unlock(&sk->sk_callback_lock); 1634 read_unlock(&sk->sk_callback_lock);
@@ -1537,7 +1643,7 @@ void sk_send_sigurg(struct sock *sk)
1537{ 1643{
1538 if (sk->sk_socket && sk->sk_socket->file) 1644 if (sk->sk_socket && sk->sk_socket->file)
1539 if (send_sigurg(&sk->sk_socket->file->f_owner)) 1645 if (send_sigurg(&sk->sk_socket->file->f_owner))
1540 sk_wake_async(sk, 3, POLL_PRI); 1646 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
1541} 1647}
1542 1648
1543void sk_reset_timer(struct sock *sk, struct timer_list* timer, 1649void sk_reset_timer(struct sock *sk, struct timer_list* timer,
@@ -1611,6 +1717,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1611 sk->sk_stamp = ktime_set(-1L, -1L); 1717 sk->sk_stamp = ktime_set(-1L, -1L);
1612 1718
1613 atomic_set(&sk->sk_refcnt, 1); 1719 atomic_set(&sk->sk_refcnt, 1);
1720 atomic_set(&sk->sk_drops, 0);
1614} 1721}
1615 1722
1616void fastcall lock_sock_nested(struct sock *sk, int subclass) 1723void fastcall lock_sock_nested(struct sock *sk, int subclass)
@@ -1801,65 +1908,15 @@ EXPORT_SYMBOL(sk_common_release);
1801static DEFINE_RWLOCK(proto_list_lock); 1908static DEFINE_RWLOCK(proto_list_lock);
1802static LIST_HEAD(proto_list); 1909static LIST_HEAD(proto_list);
1803 1910
1804#ifdef CONFIG_SMP
1805/*
1806 * Define default functions to keep track of inuse sockets per protocol
1807 * Note that often used protocols use dedicated functions to get a speed increase.
1808 * (see DEFINE_PROTO_INUSE/REF_PROTO_INUSE)
1809 */
1810static void inuse_add(struct proto *prot, int inc)
1811{
1812 per_cpu_ptr(prot->inuse_ptr, smp_processor_id())[0] += inc;
1813}
1814
1815static int inuse_get(const struct proto *prot)
1816{
1817 int res = 0, cpu;
1818 for_each_possible_cpu(cpu)
1819 res += per_cpu_ptr(prot->inuse_ptr, cpu)[0];
1820 return res;
1821}
1822
1823static int inuse_init(struct proto *prot)
1824{
1825 if (!prot->inuse_getval || !prot->inuse_add) {
1826 prot->inuse_ptr = alloc_percpu(int);
1827 if (prot->inuse_ptr == NULL)
1828 return -ENOBUFS;
1829
1830 prot->inuse_getval = inuse_get;
1831 prot->inuse_add = inuse_add;
1832 }
1833 return 0;
1834}
1835
1836static void inuse_fini(struct proto *prot)
1837{
1838 if (prot->inuse_ptr != NULL) {
1839 free_percpu(prot->inuse_ptr);
1840 prot->inuse_ptr = NULL;
1841 prot->inuse_getval = NULL;
1842 prot->inuse_add = NULL;
1843 }
1844}
1845#else
1846static inline int inuse_init(struct proto *prot)
1847{
1848 return 0;
1849}
1850
1851static inline void inuse_fini(struct proto *prot)
1852{
1853}
1854#endif
1855
1856int proto_register(struct proto *prot, int alloc_slab) 1911int proto_register(struct proto *prot, int alloc_slab)
1857{ 1912{
1858 char *request_sock_slab_name = NULL; 1913 char *request_sock_slab_name = NULL;
1859 char *timewait_sock_slab_name; 1914 char *timewait_sock_slab_name;
1860 1915
1861 if (inuse_init(prot)) 1916 if (sock_prot_inuse_init(prot) != 0) {
1917 printk(KERN_CRIT "%s: Can't alloc inuse counters!\n", prot->name);
1862 goto out; 1918 goto out;
1919 }
1863 1920
1864 if (alloc_slab) { 1921 if (alloc_slab) {
1865 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, 1922 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
@@ -1927,7 +1984,7 @@ out_free_sock_slab:
1927 kmem_cache_destroy(prot->slab); 1984 kmem_cache_destroy(prot->slab);
1928 prot->slab = NULL; 1985 prot->slab = NULL;
1929out_free_inuse: 1986out_free_inuse:
1930 inuse_fini(prot); 1987 sock_prot_inuse_free(prot);
1931out: 1988out:
1932 return -ENOBUFS; 1989 return -ENOBUFS;
1933} 1990}
@@ -1940,7 +1997,8 @@ void proto_unregister(struct proto *prot)
1940 list_del(&prot->node); 1997 list_del(&prot->node);
1941 write_unlock(&proto_list_lock); 1998 write_unlock(&proto_list_lock);
1942 1999
1943 inuse_fini(prot); 2000 sock_prot_inuse_free(prot);
2001
1944 if (prot->slab != NULL) { 2002 if (prot->slab != NULL) {
1945 kmem_cache_destroy(prot->slab); 2003 kmem_cache_destroy(prot->slab);
1946 prot->slab = NULL; 2004 prot->slab = NULL;
@@ -1967,6 +2025,7 @@ EXPORT_SYMBOL(proto_unregister);
1967 2025
1968#ifdef CONFIG_PROC_FS 2026#ifdef CONFIG_PROC_FS
1969static void *proto_seq_start(struct seq_file *seq, loff_t *pos) 2027static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
2028 __acquires(proto_list_lock)
1970{ 2029{
1971 read_lock(&proto_list_lock); 2030 read_lock(&proto_list_lock);
1972 return seq_list_start_head(&proto_list, *pos); 2031 return seq_list_start_head(&proto_list, *pos);
@@ -1978,6 +2037,7 @@ static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1978} 2037}
1979 2038
1980static void proto_seq_stop(struct seq_file *seq, void *v) 2039static void proto_seq_stop(struct seq_file *seq, void *v)
2040 __releases(proto_list_lock)
1981{ 2041{
1982 read_unlock(&proto_list_lock); 2042 read_unlock(&proto_list_lock);
1983} 2043}
diff --git a/net/core/stream.c b/net/core/stream.c
index 755bacbcb321..4a0ad152c9c4 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -35,7 +35,7 @@ void sk_stream_write_space(struct sock *sk)
35 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 35 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
36 wake_up_interruptible(sk->sk_sleep); 36 wake_up_interruptible(sk->sk_sleep);
37 if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) 37 if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
38 sock_wake_async(sock, 2, POLL_OUT); 38 sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
39 } 39 }
40} 40}
41 41
@@ -172,17 +172,6 @@ do_interrupted:
172 172
173EXPORT_SYMBOL(sk_stream_wait_memory); 173EXPORT_SYMBOL(sk_stream_wait_memory);
174 174
175void sk_stream_rfree(struct sk_buff *skb)
176{
177 struct sock *sk = skb->sk;
178
179 skb_truesize_check(skb);
180 atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
181 sk->sk_forward_alloc += skb->truesize;
182}
183
184EXPORT_SYMBOL(sk_stream_rfree);
185
186int sk_stream_error(struct sock *sk, int flags, int err) 175int sk_stream_error(struct sock *sk, int flags, int err)
187{ 176{
188 if (err == -EPIPE) 177 if (err == -EPIPE)
@@ -194,76 +183,6 @@ int sk_stream_error(struct sock *sk, int flags, int err)
194 183
195EXPORT_SYMBOL(sk_stream_error); 184EXPORT_SYMBOL(sk_stream_error);
196 185
197void __sk_stream_mem_reclaim(struct sock *sk)
198{
199 atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM,
200 sk->sk_prot->memory_allocated);
201 sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1;
202 if (*sk->sk_prot->memory_pressure &&
203 (atomic_read(sk->sk_prot->memory_allocated) <
204 sk->sk_prot->sysctl_mem[0]))
205 *sk->sk_prot->memory_pressure = 0;
206}
207
208EXPORT_SYMBOL(__sk_stream_mem_reclaim);
209
210int sk_stream_mem_schedule(struct sock *sk, int size, int kind)
211{
212 int amt = sk_stream_pages(size);
213
214 sk->sk_forward_alloc += amt * SK_STREAM_MEM_QUANTUM;
215 atomic_add(amt, sk->sk_prot->memory_allocated);
216
217 /* Under limit. */
218 if (atomic_read(sk->sk_prot->memory_allocated) < sk->sk_prot->sysctl_mem[0]) {
219 if (*sk->sk_prot->memory_pressure)
220 *sk->sk_prot->memory_pressure = 0;
221 return 1;
222 }
223
224 /* Over hard limit. */
225 if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[2]) {
226 sk->sk_prot->enter_memory_pressure();
227 goto suppress_allocation;
228 }
229
230 /* Under pressure. */
231 if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[1])
232 sk->sk_prot->enter_memory_pressure();
233
234 if (kind) {
235 if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_prot->sysctl_rmem[0])
236 return 1;
237 } else if (sk->sk_wmem_queued < sk->sk_prot->sysctl_wmem[0])
238 return 1;
239
240 if (!*sk->sk_prot->memory_pressure ||
241 sk->sk_prot->sysctl_mem[2] > atomic_read(sk->sk_prot->sockets_allocated) *
242 sk_stream_pages(sk->sk_wmem_queued +
243 atomic_read(&sk->sk_rmem_alloc) +
244 sk->sk_forward_alloc))
245 return 1;
246
247suppress_allocation:
248
249 if (!kind) {
250 sk_stream_moderate_sndbuf(sk);
251
252 /* Fail only if socket is _under_ its sndbuf.
253 * In this case we cannot block, so that we have to fail.
254 */
255 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
256 return 1;
257 }
258
259 /* Alas. Undo changes. */
260 sk->sk_forward_alloc -= amt * SK_STREAM_MEM_QUANTUM;
261 atomic_sub(amt, sk->sk_prot->memory_allocated);
262 return 0;
263}
264
265EXPORT_SYMBOL(sk_stream_mem_schedule);
266
267void sk_stream_kill_queues(struct sock *sk) 186void sk_stream_kill_queues(struct sock *sk)
268{ 187{
269 /* First the read buffer. */ 188 /* First the read buffer. */
@@ -276,7 +195,7 @@ void sk_stream_kill_queues(struct sock *sk)
276 BUG_TRAP(skb_queue_empty(&sk->sk_write_queue)); 195 BUG_TRAP(skb_queue_empty(&sk->sk_write_queue));
277 196
278 /* Account for returned memory. */ 197 /* Account for returned memory. */
279 sk_stream_mem_reclaim(sk); 198 sk_mem_reclaim(sk);
280 199
281 BUG_TRAP(!sk->sk_wmem_queued); 200 BUG_TRAP(!sk->sk_wmem_queued);
282 BUG_TRAP(!sk->sk_forward_alloc); 201 BUG_TRAP(!sk->sk_forward_alloc);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 113cc728dc31..130338f83ae5 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -10,12 +10,11 @@
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/socket.h> 11#include <linux/socket.h>
12#include <linux/netdevice.h> 12#include <linux/netdevice.h>
13#include <linux/init.h>
13#include <net/sock.h> 14#include <net/sock.h>
14#include <net/xfrm.h> 15#include <net/xfrm.h>
15 16
16#ifdef CONFIG_SYSCTL 17static struct ctl_table net_core_table[] = {
17
18ctl_table core_table[] = {
19#ifdef CONFIG_NET 18#ifdef CONFIG_NET
20 { 19 {
21 .ctl_name = NET_CORE_WMEM_MAX, 20 .ctl_name = NET_CORE_WMEM_MAX,
@@ -128,7 +127,7 @@ ctl_table core_table[] = {
128 { 127 {
129 .ctl_name = NET_CORE_SOMAXCONN, 128 .ctl_name = NET_CORE_SOMAXCONN,
130 .procname = "somaxconn", 129 .procname = "somaxconn",
131 .data = &sysctl_somaxconn, 130 .data = &init_net.sysctl_somaxconn,
132 .maxlen = sizeof(int), 131 .maxlen = sizeof(int),
133 .mode = 0644, 132 .mode = 0644,
134 .proc_handler = &proc_dointvec 133 .proc_handler = &proc_dointvec
@@ -152,4 +151,65 @@ ctl_table core_table[] = {
152 { .ctl_name = 0 } 151 { .ctl_name = 0 }
153}; 152};
154 153
155#endif 154static __net_initdata struct ctl_path net_core_path[] = {
155 { .procname = "net", .ctl_name = CTL_NET, },
156 { .procname = "core", .ctl_name = NET_CORE, },
157 { },
158};
159
160static __net_init int sysctl_core_net_init(struct net *net)
161{
162 struct ctl_table *tbl, *tmp;
163
164 net->sysctl_somaxconn = SOMAXCONN;
165
166 tbl = net_core_table;
167 if (net != &init_net) {
168 tbl = kmemdup(tbl, sizeof(net_core_table), GFP_KERNEL);
169 if (tbl == NULL)
170 goto err_dup;
171
172 for (tmp = tbl; tmp->procname; tmp++) {
173 if (tmp->data >= (void *)&init_net &&
174 tmp->data < (void *)(&init_net + 1))
175 tmp->data += (char *)net - (char *)&init_net;
176 else
177 tmp->mode &= ~0222;
178 }
179 }
180
181 net->sysctl_core_hdr = register_net_sysctl_table(net,
182 net_core_path, tbl);
183 if (net->sysctl_core_hdr == NULL)
184 goto err_reg;
185
186 return 0;
187
188err_reg:
189 if (tbl != net_core_table)
190 kfree(tbl);
191err_dup:
192 return -ENOMEM;
193}
194
195static __net_exit void sysctl_core_net_exit(struct net *net)
196{
197 struct ctl_table *tbl;
198
199 tbl = net->sysctl_core_hdr->ctl_table_arg;
200 unregister_net_sysctl_table(net->sysctl_core_hdr);
201 BUG_ON(tbl == net_core_table);
202 kfree(tbl);
203}
204
205static __net_initdata struct pernet_operations sysctl_core_ops = {
206 .init = sysctl_core_net_init,
207 .exit = sysctl_core_net_exit,
208};
209
210static __init int sysctl_core_init(void)
211{
212 return register_pernet_subsys(&sysctl_core_ops);
213}
214
215__initcall(sysctl_core_init);
diff --git a/net/core/utils.c b/net/core/utils.c
index 0bf17da40d52..8031eb59054e 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -91,17 +91,6 @@ EXPORT_SYMBOL(in_aton);
91#define IN6PTON_NULL 0x20000000 /* first/tail */ 91#define IN6PTON_NULL 0x20000000 /* first/tail */
92#define IN6PTON_UNKNOWN 0x40000000 92#define IN6PTON_UNKNOWN 0x40000000
93 93
94static inline int digit2bin(char c, int delim)
95{
96 if (c == delim || c == '\0')
97 return IN6PTON_DELIM;
98 if (c == '.')
99 return IN6PTON_DOT;
100 if (c >= '0' && c <= '9')
101 return (IN6PTON_DIGIT | (c - '0'));
102 return IN6PTON_UNKNOWN;
103}
104
105static inline int xdigit2bin(char c, int delim) 94static inline int xdigit2bin(char c, int delim)
106{ 95{
107 if (c == delim || c == '\0') 96 if (c == delim || c == '\0')
@@ -293,3 +282,19 @@ out:
293} 282}
294 283
295EXPORT_SYMBOL(in6_pton); 284EXPORT_SYMBOL(in6_pton);
285
286void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
287 __be32 from, __be32 to, int pseudohdr)
288{
289 __be32 diff[] = { ~from, to };
290 if (skb->ip_summed != CHECKSUM_PARTIAL) {
291 *sum = csum_fold(csum_partial(diff, sizeof(diff),
292 ~csum_unfold(*sum)));
293 if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
294 skb->csum = ~csum_partial(diff, sizeof(diff),
295 ~skb->csum);
296 } else if (pseudohdr)
297 *sum = ~csum_fold(csum_partial(diff, sizeof(diff),
298 csum_unfold(*sum)));
299}
300EXPORT_SYMBOL(inet_proto_csum_replace4);
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index 0549e4719b13..7aa2a7acc7ec 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -1,6 +1,7 @@
1menuconfig IP_DCCP 1menuconfig IP_DCCP
2 tristate "The DCCP Protocol (EXPERIMENTAL)" 2 tristate "The DCCP Protocol (EXPERIMENTAL)"
3 depends on INET && EXPERIMENTAL 3 depends on INET && EXPERIMENTAL
4 select IP_DCCP_CCID2
4 ---help--- 5 ---help---
5 Datagram Congestion Control Protocol (RFC 4340) 6 Datagram Congestion Control Protocol (RFC 4340)
6 7
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 83378f379f72..6de4bd195d28 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -30,7 +30,7 @@ static struct dccp_ackvec_record *dccp_ackvec_record_new(void)
30 kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC); 30 kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
31 31
32 if (avr != NULL) 32 if (avr != NULL)
33 INIT_LIST_HEAD(&avr->dccpavr_node); 33 INIT_LIST_HEAD(&avr->avr_node);
34 34
35 return avr; 35 return avr;
36} 36}
@@ -40,7 +40,7 @@ static void dccp_ackvec_record_delete(struct dccp_ackvec_record *avr)
40 if (unlikely(avr == NULL)) 40 if (unlikely(avr == NULL))
41 return; 41 return;
42 /* Check if deleting a linked record */ 42 /* Check if deleting a linked record */
43 WARN_ON(!list_empty(&avr->dccpavr_node)); 43 WARN_ON(!list_empty(&avr->avr_node));
44 kmem_cache_free(dccp_ackvec_record_slab, avr); 44 kmem_cache_free(dccp_ackvec_record_slab, avr);
45} 45}
46 46
@@ -52,16 +52,15 @@ static void dccp_ackvec_insert_avr(struct dccp_ackvec *av,
52 * just add the AVR at the head of the list. 52 * just add the AVR at the head of the list.
53 * -sorbo. 53 * -sorbo.
54 */ 54 */
55 if (!list_empty(&av->dccpav_records)) { 55 if (!list_empty(&av->av_records)) {
56 const struct dccp_ackvec_record *head = 56 const struct dccp_ackvec_record *head =
57 list_entry(av->dccpav_records.next, 57 list_entry(av->av_records.next,
58 struct dccp_ackvec_record, 58 struct dccp_ackvec_record,
59 dccpavr_node); 59 avr_node);
60 BUG_ON(before48(avr->dccpavr_ack_seqno, 60 BUG_ON(before48(avr->avr_ack_seqno, head->avr_ack_seqno));
61 head->dccpavr_ack_seqno));
62 } 61 }
63 62
64 list_add(&avr->dccpavr_node, &av->dccpav_records); 63 list_add(&avr->avr_node, &av->av_records);
65} 64}
66 65
67int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) 66int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
@@ -69,9 +68,8 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
69 struct dccp_sock *dp = dccp_sk(sk); 68 struct dccp_sock *dp = dccp_sk(sk);
70 struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; 69 struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
71 /* Figure out how many options do we need to represent the ackvec */ 70 /* Figure out how many options do we need to represent the ackvec */
72 const u16 nr_opts = DIV_ROUND_UP(av->dccpav_vec_len, 71 const u16 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_MAX_ACKVEC_OPT_LEN);
73 DCCP_MAX_ACKVEC_OPT_LEN); 72 u16 len = av->av_vec_len + 2 * nr_opts, i;
74 u16 len = av->dccpav_vec_len + 2 * nr_opts, i;
75 u32 elapsed_time; 73 u32 elapsed_time;
76 const unsigned char *tail, *from; 74 const unsigned char *tail, *from;
77 unsigned char *to; 75 unsigned char *to;
@@ -81,7 +79,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
81 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) 79 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
82 return -1; 80 return -1;
83 81
84 delta = ktime_us_delta(ktime_get_real(), av->dccpav_time); 82 delta = ktime_us_delta(ktime_get_real(), av->av_time);
85 elapsed_time = delta / 10; 83 elapsed_time = delta / 10;
86 84
87 if (elapsed_time != 0 && 85 if (elapsed_time != 0 &&
@@ -95,9 +93,9 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
95 DCCP_SKB_CB(skb)->dccpd_opt_len += len; 93 DCCP_SKB_CB(skb)->dccpd_opt_len += len;
96 94
97 to = skb_push(skb, len); 95 to = skb_push(skb, len);
98 len = av->dccpav_vec_len; 96 len = av->av_vec_len;
99 from = av->dccpav_buf + av->dccpav_buf_head; 97 from = av->av_buf + av->av_buf_head;
100 tail = av->dccpav_buf + DCCP_MAX_ACKVEC_LEN; 98 tail = av->av_buf + DCCP_MAX_ACKVEC_LEN;
101 99
102 for (i = 0; i < nr_opts; ++i) { 100 for (i = 0; i < nr_opts; ++i) {
103 int copylen = len; 101 int copylen = len;
@@ -116,7 +114,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
116 to += tailsize; 114 to += tailsize;
117 len -= tailsize; 115 len -= tailsize;
118 copylen -= tailsize; 116 copylen -= tailsize;
119 from = av->dccpav_buf; 117 from = av->av_buf;
120 } 118 }
121 119
122 memcpy(to, from, copylen); 120 memcpy(to, from, copylen);
@@ -134,19 +132,19 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
134 * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will 132 * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
135 * equal buf_nonce. 133 * equal buf_nonce.
136 */ 134 */
137 avr->dccpavr_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; 135 avr->avr_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
138 avr->dccpavr_ack_ptr = av->dccpav_buf_head; 136 avr->avr_ack_ptr = av->av_buf_head;
139 avr->dccpavr_ack_ackno = av->dccpav_buf_ackno; 137 avr->avr_ack_ackno = av->av_buf_ackno;
140 avr->dccpavr_ack_nonce = av->dccpav_buf_nonce; 138 avr->avr_ack_nonce = av->av_buf_nonce;
141 avr->dccpavr_sent_len = av->dccpav_vec_len; 139 avr->avr_sent_len = av->av_vec_len;
142 140
143 dccp_ackvec_insert_avr(av, avr); 141 dccp_ackvec_insert_avr(av, avr);
144 142
145 dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, " 143 dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, "
146 "ack_ackno=%llu\n", 144 "ack_ackno=%llu\n",
147 dccp_role(sk), avr->dccpavr_sent_len, 145 dccp_role(sk), avr->avr_sent_len,
148 (unsigned long long)avr->dccpavr_ack_seqno, 146 (unsigned long long)avr->avr_ack_seqno,
149 (unsigned long long)avr->dccpavr_ack_ackno); 147 (unsigned long long)avr->avr_ack_ackno);
150 return 0; 148 return 0;
151} 149}
152 150
@@ -155,12 +153,12 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
155 struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority); 153 struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority);
156 154
157 if (av != NULL) { 155 if (av != NULL) {
158 av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1; 156 av->av_buf_head = DCCP_MAX_ACKVEC_LEN - 1;
159 av->dccpav_buf_ackno = UINT48_MAX + 1; 157 av->av_buf_ackno = UINT48_MAX + 1;
160 av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; 158 av->av_buf_nonce = 0;
161 av->dccpav_time = ktime_set(0, 0); 159 av->av_time = ktime_set(0, 0);
162 av->dccpav_vec_len = 0; 160 av->av_vec_len = 0;
163 INIT_LIST_HEAD(&av->dccpav_records); 161 INIT_LIST_HEAD(&av->av_records);
164 } 162 }
165 163
166 return av; 164 return av;
@@ -171,12 +169,11 @@ void dccp_ackvec_free(struct dccp_ackvec *av)
171 if (unlikely(av == NULL)) 169 if (unlikely(av == NULL))
172 return; 170 return;
173 171
174 if (!list_empty(&av->dccpav_records)) { 172 if (!list_empty(&av->av_records)) {
175 struct dccp_ackvec_record *avr, *next; 173 struct dccp_ackvec_record *avr, *next;
176 174
177 list_for_each_entry_safe(avr, next, &av->dccpav_records, 175 list_for_each_entry_safe(avr, next, &av->av_records, avr_node) {
178 dccpavr_node) { 176 list_del_init(&avr->avr_node);
179 list_del_init(&avr->dccpavr_node);
180 dccp_ackvec_record_delete(avr); 177 dccp_ackvec_record_delete(avr);
181 } 178 }
182 } 179 }
@@ -187,13 +184,13 @@ void dccp_ackvec_free(struct dccp_ackvec *av)
187static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av, 184static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av,
188 const u32 index) 185 const u32 index)
189{ 186{
190 return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK; 187 return av->av_buf[index] & DCCP_ACKVEC_STATE_MASK;
191} 188}
192 189
193static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av, 190static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
194 const u32 index) 191 const u32 index)
195{ 192{
196 return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK; 193 return av->av_buf[index] & DCCP_ACKVEC_LEN_MASK;
197} 194}
198 195
199/* 196/*
@@ -208,29 +205,29 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
208 unsigned int gap; 205 unsigned int gap;
209 long new_head; 206 long new_head;
210 207
211 if (av->dccpav_vec_len + packets > DCCP_MAX_ACKVEC_LEN) 208 if (av->av_vec_len + packets > DCCP_MAX_ACKVEC_LEN)
212 return -ENOBUFS; 209 return -ENOBUFS;
213 210
214 gap = packets - 1; 211 gap = packets - 1;
215 new_head = av->dccpav_buf_head - packets; 212 new_head = av->av_buf_head - packets;
216 213
217 if (new_head < 0) { 214 if (new_head < 0) {
218 if (gap > 0) { 215 if (gap > 0) {
219 memset(av->dccpav_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED, 216 memset(av->av_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED,
220 gap + new_head + 1); 217 gap + new_head + 1);
221 gap = -new_head; 218 gap = -new_head;
222 } 219 }
223 new_head += DCCP_MAX_ACKVEC_LEN; 220 new_head += DCCP_MAX_ACKVEC_LEN;
224 } 221 }
225 222
226 av->dccpav_buf_head = new_head; 223 av->av_buf_head = new_head;
227 224
228 if (gap > 0) 225 if (gap > 0)
229 memset(av->dccpav_buf + av->dccpav_buf_head + 1, 226 memset(av->av_buf + av->av_buf_head + 1,
230 DCCP_ACKVEC_STATE_NOT_RECEIVED, gap); 227 DCCP_ACKVEC_STATE_NOT_RECEIVED, gap);
231 228
232 av->dccpav_buf[av->dccpav_buf_head] = state; 229 av->av_buf[av->av_buf_head] = state;
233 av->dccpav_vec_len += packets; 230 av->av_vec_len += packets;
234 return 0; 231 return 0;
235} 232}
236 233
@@ -243,7 +240,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
243 /* 240 /*
244 * Check at the right places if the buffer is full, if it is, tell the 241 * Check at the right places if the buffer is full, if it is, tell the
245 * caller to start dropping packets till the HC-Sender acks our ACK 242 * caller to start dropping packets till the HC-Sender acks our ACK
246 * vectors, when we will free up space in dccpav_buf. 243 * vectors, when we will free up space in av_buf.
247 * 244 *
248 * We may well decide to do buffer compression, etc, but for now lets 245 * We may well decide to do buffer compression, etc, but for now lets
249 * just drop. 246 * just drop.
@@ -263,22 +260,20 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
263 */ 260 */
264 261
265 /* See if this is the first ackno being inserted */ 262 /* See if this is the first ackno being inserted */
266 if (av->dccpav_vec_len == 0) { 263 if (av->av_vec_len == 0) {
267 av->dccpav_buf[av->dccpav_buf_head] = state; 264 av->av_buf[av->av_buf_head] = state;
268 av->dccpav_vec_len = 1; 265 av->av_vec_len = 1;
269 } else if (after48(ackno, av->dccpav_buf_ackno)) { 266 } else if (after48(ackno, av->av_buf_ackno)) {
270 const u64 delta = dccp_delta_seqno(av->dccpav_buf_ackno, 267 const u64 delta = dccp_delta_seqno(av->av_buf_ackno, ackno);
271 ackno);
272 268
273 /* 269 /*
274 * Look if the state of this packet is the same as the 270 * Look if the state of this packet is the same as the
275 * previous ackno and if so if we can bump the head len. 271 * previous ackno and if so if we can bump the head len.
276 */ 272 */
277 if (delta == 1 && 273 if (delta == 1 &&
278 dccp_ackvec_state(av, av->dccpav_buf_head) == state && 274 dccp_ackvec_state(av, av->av_buf_head) == state &&
279 (dccp_ackvec_len(av, av->dccpav_buf_head) < 275 dccp_ackvec_len(av, av->av_buf_head) < DCCP_ACKVEC_LEN_MASK)
280 DCCP_ACKVEC_LEN_MASK)) 276 av->av_buf[av->av_buf_head]++;
281 av->dccpav_buf[av->dccpav_buf_head]++;
282 else if (dccp_ackvec_set_buf_head_state(av, delta, state)) 277 else if (dccp_ackvec_set_buf_head_state(av, delta, state))
283 return -ENOBUFS; 278 return -ENOBUFS;
284 } else { 279 } else {
@@ -290,14 +285,14 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
290 * the byte corresponding to S. (Indexing structures 285 * the byte corresponding to S. (Indexing structures
291 * could reduce the complexity of this scan.) 286 * could reduce the complexity of this scan.)
292 */ 287 */
293 u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno); 288 u64 delta = dccp_delta_seqno(ackno, av->av_buf_ackno);
294 u32 index = av->dccpav_buf_head; 289 u32 index = av->av_buf_head;
295 290
296 while (1) { 291 while (1) {
297 const u8 len = dccp_ackvec_len(av, index); 292 const u8 len = dccp_ackvec_len(av, index);
298 const u8 state = dccp_ackvec_state(av, index); 293 const u8 state = dccp_ackvec_state(av, index);
299 /* 294 /*
300 * valid packets not yet in dccpav_buf have a reserved 295 * valid packets not yet in av_buf have a reserved
301 * entry, with a len equal to 0. 296 * entry, with a len equal to 0.
302 */ 297 */
303 if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED && 298 if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED &&
@@ -305,7 +300,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
305 reserved seat! */ 300 reserved seat! */
306 dccp_pr_debug("Found %llu reserved seat!\n", 301 dccp_pr_debug("Found %llu reserved seat!\n",
307 (unsigned long long)ackno); 302 (unsigned long long)ackno);
308 av->dccpav_buf[index] = state; 303 av->av_buf[index] = state;
309 goto out; 304 goto out;
310 } 305 }
311 /* len == 0 means one packet */ 306 /* len == 0 means one packet */
@@ -318,8 +313,8 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
318 } 313 }
319 } 314 }
320 315
321 av->dccpav_buf_ackno = ackno; 316 av->av_buf_ackno = ackno;
322 av->dccpav_time = ktime_get_real(); 317 av->av_time = ktime_get_real();
323out: 318out:
324 return 0; 319 return 0;
325 320
@@ -349,9 +344,9 @@ void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len)
349 344
350void dccp_ackvec_print(const struct dccp_ackvec *av) 345void dccp_ackvec_print(const struct dccp_ackvec *av)
351{ 346{
352 dccp_ackvector_print(av->dccpav_buf_ackno, 347 dccp_ackvector_print(av->av_buf_ackno,
353 av->dccpav_buf + av->dccpav_buf_head, 348 av->av_buf + av->av_buf_head,
354 av->dccpav_vec_len); 349 av->av_vec_len);
355} 350}
356#endif 351#endif
357 352
@@ -361,17 +356,15 @@ static void dccp_ackvec_throw_record(struct dccp_ackvec *av,
361 struct dccp_ackvec_record *next; 356 struct dccp_ackvec_record *next;
362 357
363 /* sort out vector length */ 358 /* sort out vector length */
364 if (av->dccpav_buf_head <= avr->dccpavr_ack_ptr) 359 if (av->av_buf_head <= avr->avr_ack_ptr)
365 av->dccpav_vec_len = avr->dccpavr_ack_ptr - av->dccpav_buf_head; 360 av->av_vec_len = avr->avr_ack_ptr - av->av_buf_head;
366 else 361 else
367 av->dccpav_vec_len = DCCP_MAX_ACKVEC_LEN - 1 362 av->av_vec_len = DCCP_MAX_ACKVEC_LEN - 1 -
368 - av->dccpav_buf_head 363 av->av_buf_head + avr->avr_ack_ptr;
369 + avr->dccpavr_ack_ptr;
370 364
371 /* free records */ 365 /* free records */
372 list_for_each_entry_safe_from(avr, next, &av->dccpav_records, 366 list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) {
373 dccpavr_node) { 367 list_del_init(&avr->avr_node);
374 list_del_init(&avr->dccpavr_node);
375 dccp_ackvec_record_delete(avr); 368 dccp_ackvec_record_delete(avr);
376 } 369 }
377} 370}
@@ -386,16 +379,16 @@ void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
386 * windows. We will be receiving ACKs for stuff we sent a while back 379 * windows. We will be receiving ACKs for stuff we sent a while back
387 * -sorbo. 380 * -sorbo.
388 */ 381 */
389 list_for_each_entry_reverse(avr, &av->dccpav_records, dccpavr_node) { 382 list_for_each_entry_reverse(avr, &av->av_records, avr_node) {
390 if (ackno == avr->dccpavr_ack_seqno) { 383 if (ackno == avr->avr_ack_seqno) {
391 dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, " 384 dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, "
392 "ack_ackno=%llu, ACKED!\n", 385 "ack_ackno=%llu, ACKED!\n",
393 dccp_role(sk), 1, 386 dccp_role(sk), 1,
394 (unsigned long long)avr->dccpavr_ack_seqno, 387 (unsigned long long)avr->avr_ack_seqno,
395 (unsigned long long)avr->dccpavr_ack_ackno); 388 (unsigned long long)avr->avr_ack_ackno);
396 dccp_ackvec_throw_record(av, avr); 389 dccp_ackvec_throw_record(av, avr);
397 break; 390 break;
398 } else if (avr->dccpavr_ack_seqno > ackno) 391 } else if (avr->avr_ack_seqno > ackno)
399 break; /* old news */ 392 break; /* old news */
400 } 393 }
401} 394}
@@ -409,7 +402,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
409 struct dccp_ackvec_record *avr; 402 struct dccp_ackvec_record *avr;
410 403
411 /* Check if we actually sent an ACK vector */ 404 /* Check if we actually sent an ACK vector */
412 if (list_empty(&av->dccpav_records)) 405 if (list_empty(&av->av_records))
413 return; 406 return;
414 407
415 i = len; 408 i = len;
@@ -418,8 +411,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
418 * I think it might be more efficient to work backwards. See comment on 411 * I think it might be more efficient to work backwards. See comment on
419 * rcv_ackno. -sorbo. 412 * rcv_ackno. -sorbo.
420 */ 413 */
421 avr = list_entry(av->dccpav_records.next, struct dccp_ackvec_record, 414 avr = list_entry(av->av_records.next, struct dccp_ackvec_record, avr_node);
422 dccpavr_node);
423 while (i--) { 415 while (i--) {
424 const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; 416 const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
425 u64 ackno_end_rl; 417 u64 ackno_end_rl;
@@ -430,15 +422,14 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
430 * If our AVR sequence number is greater than the ack, go 422 * If our AVR sequence number is greater than the ack, go
431 * forward in the AVR list until it is not so. 423 * forward in the AVR list until it is not so.
432 */ 424 */
433 list_for_each_entry_from(avr, &av->dccpav_records, 425 list_for_each_entry_from(avr, &av->av_records, avr_node) {
434 dccpavr_node) { 426 if (!after48(avr->avr_ack_seqno, *ackno))
435 if (!after48(avr->dccpavr_ack_seqno, *ackno))
436 goto found; 427 goto found;
437 } 428 }
438 /* End of the dccpav_records list, not found, exit */ 429 /* End of the av_records list, not found, exit */
439 break; 430 break;
440found: 431found:
441 if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, *ackno)) { 432 if (between48(avr->avr_ack_seqno, ackno_end_rl, *ackno)) {
442 const u8 state = *vector & DCCP_ACKVEC_STATE_MASK; 433 const u8 state = *vector & DCCP_ACKVEC_STATE_MASK;
443 if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { 434 if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
444 dccp_pr_debug("%s ACK vector 0, len=%d, " 435 dccp_pr_debug("%s ACK vector 0, len=%d, "
@@ -446,9 +437,9 @@ found:
446 "ACKED!\n", 437 "ACKED!\n",
447 dccp_role(sk), len, 438 dccp_role(sk), len,
448 (unsigned long long) 439 (unsigned long long)
449 avr->dccpavr_ack_seqno, 440 avr->avr_ack_seqno,
450 (unsigned long long) 441 (unsigned long long)
451 avr->dccpavr_ack_ackno); 442 avr->avr_ack_ackno);
452 dccp_ackvec_throw_record(av, avr); 443 dccp_ackvec_throw_record(av, avr);
453 break; 444 break;
454 } 445 }
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 9671ecd17e00..bcb64fb4acef 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -32,54 +32,54 @@
32 * 32 *
33 * This data structure is the one defined in RFC 4340, Appendix A. 33 * This data structure is the one defined in RFC 4340, Appendix A.
34 * 34 *
35 * @dccpav_buf_head - circular buffer head 35 * @av_buf_head - circular buffer head
36 * @dccpav_buf_tail - circular buffer tail 36 * @av_buf_tail - circular buffer tail
37 * @dccpav_buf_ackno - ack # of the most recent packet acknowledgeable in the 37 * @av_buf_ackno - ack # of the most recent packet acknowledgeable in the
38 * buffer (i.e. %dccpav_buf_head) 38 * buffer (i.e. %av_buf_head)
39 * @dccpav_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked 39 * @av_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
40 * by the buffer with State 0 40 * by the buffer with State 0
41 * 41 *
42 * Additionally, the HC-Receiver must keep some information about the 42 * Additionally, the HC-Receiver must keep some information about the
43 * Ack Vectors it has recently sent. For each packet sent carrying an 43 * Ack Vectors it has recently sent. For each packet sent carrying an
44 * Ack Vector, it remembers four variables: 44 * Ack Vector, it remembers four variables:
45 * 45 *
46 * @dccpav_records - list of dccp_ackvec_record 46 * @av_records - list of dccp_ackvec_record
47 * @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. 47 * @av_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
48 * 48 *
49 * @dccpav_time - the time in usecs 49 * @av_time - the time in usecs
50 * @dccpav_buf - circular buffer of acknowledgeable packets 50 * @av_buf - circular buffer of acknowledgeable packets
51 */ 51 */
52struct dccp_ackvec { 52struct dccp_ackvec {
53 u64 dccpav_buf_ackno; 53 u64 av_buf_ackno;
54 struct list_head dccpav_records; 54 struct list_head av_records;
55 ktime_t dccpav_time; 55 ktime_t av_time;
56 u16 dccpav_buf_head; 56 u16 av_buf_head;
57 u16 dccpav_vec_len; 57 u16 av_vec_len;
58 u8 dccpav_buf_nonce; 58 u8 av_buf_nonce;
59 u8 dccpav_ack_nonce; 59 u8 av_ack_nonce;
60 u8 dccpav_buf[DCCP_MAX_ACKVEC_LEN]; 60 u8 av_buf[DCCP_MAX_ACKVEC_LEN];
61}; 61};
62 62
63/** struct dccp_ackvec_record - ack vector record 63/** struct dccp_ackvec_record - ack vector record
64 * 64 *
65 * ACK vector record as defined in Appendix A of spec. 65 * ACK vector record as defined in Appendix A of spec.
66 * 66 *
67 * The list is sorted by dccpavr_ack_seqno 67 * The list is sorted by avr_ack_seqno
68 * 68 *
69 * @dccpavr_node - node in dccpav_records 69 * @avr_node - node in av_records
70 * @dccpavr_ack_seqno - sequence number of the packet this record was sent on 70 * @avr_ack_seqno - sequence number of the packet this record was sent on
71 * @dccpavr_ack_ackno - sequence number being acknowledged 71 * @avr_ack_ackno - sequence number being acknowledged
72 * @dccpavr_ack_ptr - pointer into dccpav_buf where this record starts 72 * @avr_ack_ptr - pointer into av_buf where this record starts
73 * @dccpavr_ack_nonce - dccpav_ack_nonce at the time this record was sent 73 * @avr_ack_nonce - av_ack_nonce at the time this record was sent
74 * @dccpavr_sent_len - length of the record in dccpav_buf 74 * @avr_sent_len - lenght of the record in av_buf
75 */ 75 */
76struct dccp_ackvec_record { 76struct dccp_ackvec_record {
77 struct list_head dccpavr_node; 77 struct list_head avr_node;
78 u64 dccpavr_ack_seqno; 78 u64 avr_ack_seqno;
79 u64 dccpavr_ack_ackno; 79 u64 avr_ack_ackno;
80 u16 dccpavr_ack_ptr; 80 u16 avr_ack_ptr;
81 u16 dccpavr_sent_len; 81 u16 avr_sent_len;
82 u8 dccpavr_ack_nonce; 82 u8 avr_ack_nonce;
83}; 83};
84 84
85struct sock; 85struct sock;
@@ -105,7 +105,7 @@ extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb);
105 105
106static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) 106static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
107{ 107{
108 return av->dccpav_vec_len; 108 return av->av_vec_len;
109} 109}
110#else /* CONFIG_IP_DCCP_ACKVEC */ 110#else /* CONFIG_IP_DCCP_ACKVEC */
111static inline int dccp_ackvec_init(void) 111static inline int dccp_ackvec_init(void)
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index c45088b5e6fb..4809753d12ae 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -92,15 +92,15 @@ int ccid_register(struct ccid_operations *ccid_ops)
92 92
93 ccid_ops->ccid_hc_rx_slab = 93 ccid_ops->ccid_hc_rx_slab =
94 ccid_kmem_cache_create(ccid_ops->ccid_hc_rx_obj_size, 94 ccid_kmem_cache_create(ccid_ops->ccid_hc_rx_obj_size,
95 "%s_hc_rx_sock", 95 "ccid%u_hc_rx_sock",
96 ccid_ops->ccid_name); 96 ccid_ops->ccid_id);
97 if (ccid_ops->ccid_hc_rx_slab == NULL) 97 if (ccid_ops->ccid_hc_rx_slab == NULL)
98 goto out; 98 goto out;
99 99
100 ccid_ops->ccid_hc_tx_slab = 100 ccid_ops->ccid_hc_tx_slab =
101 ccid_kmem_cache_create(ccid_ops->ccid_hc_tx_obj_size, 101 ccid_kmem_cache_create(ccid_ops->ccid_hc_tx_obj_size,
102 "%s_hc_tx_sock", 102 "ccid%u_hc_tx_sock",
103 ccid_ops->ccid_name); 103 ccid_ops->ccid_id);
104 if (ccid_ops->ccid_hc_tx_slab == NULL) 104 if (ccid_ops->ccid_hc_tx_slab == NULL)
105 goto out_free_rx_slab; 105 goto out_free_rx_slab;
106 106
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index c65cb2453e43..fdeae7b57319 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -23,14 +23,37 @@
23 23
24struct tcp_info; 24struct tcp_info;
25 25
26/**
27 * struct ccid_operations - Interface to Congestion-Control Infrastructure
28 *
29 * @ccid_id: numerical CCID ID (up to %CCID_MAX, cf. table 5 in RFC 4340, 10.)
30 * @ccid_ccmps: the CCMPS including network/transport headers (0 when disabled)
31 * @ccid_name: alphabetical identifier string for @ccid_id
32 * @ccid_owner: module which implements/owns this CCID
33 * @ccid_hc_{r,t}x_slab: memory pool for the receiver/sender half-connection
34 * @ccid_hc_{r,t}x_obj_size: size of the receiver/sender half-connection socket
35 *
36 * @ccid_hc_{r,t}x_init: CCID-specific initialisation routine (before startup)
37 * @ccid_hc_{r,t}x_exit: CCID-specific cleanup routine (before destruction)
38 * @ccid_hc_rx_packet_recv: implements the HC-receiver side
39 * @ccid_hc_{r,t}x_parse_options: parsing routine for CCID/HC-specific options
40 * @ccid_hc_{r,t}x_insert_options: insert routine for CCID/HC-specific options
41 * @ccid_hc_tx_packet_recv: implements feedback processing for the HC-sender
42 * @ccid_hc_tx_send_packet: implements the sending part of the HC-sender
43 * @ccid_hc_tx_packet_sent: does accounting for packets in flight by HC-sender
44 * @ccid_hc_{r,t}x_get_info: INET_DIAG information for HC-receiver/sender
45 * @ccid_hc_{r,t}x_getsockopt: socket options specific to HC-receiver/sender
46 */
26struct ccid_operations { 47struct ccid_operations {
27 unsigned char ccid_id; 48 unsigned char ccid_id;
28 const char *ccid_name; 49 __u32 ccid_ccmps;
29 struct module *ccid_owner; 50 const char *ccid_name;
30 struct kmem_cache *ccid_hc_rx_slab; 51 struct module *ccid_owner;
31 __u32 ccid_hc_rx_obj_size; 52 struct kmem_cache *ccid_hc_rx_slab,
32 struct kmem_cache *ccid_hc_tx_slab; 53 *ccid_hc_tx_slab;
33 __u32 ccid_hc_tx_obj_size; 54 __u32 ccid_hc_rx_obj_size,
55 ccid_hc_tx_obj_size;
56 /* Interface Routines */
34 int (*ccid_hc_rx_init)(struct ccid *ccid, struct sock *sk); 57 int (*ccid_hc_rx_init)(struct ccid *ccid, struct sock *sk);
35 int (*ccid_hc_tx_init)(struct ccid *ccid, struct sock *sk); 58 int (*ccid_hc_tx_init)(struct ccid *ccid, struct sock *sk);
36 void (*ccid_hc_rx_exit)(struct sock *sk); 59 void (*ccid_hc_rx_exit)(struct sock *sk);
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 80f469887691..12275943eab8 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -1,9 +1,8 @@
1menu "DCCP CCIDs Configuration (EXPERIMENTAL)" 1menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
2 depends on IP_DCCP && EXPERIMENTAL 2 depends on EXPERIMENTAL
3 3
4config IP_DCCP_CCID2 4config IP_DCCP_CCID2
5 tristate "CCID2 (TCP-Like) (EXPERIMENTAL)" 5 tristate "CCID2 (TCP-Like) (EXPERIMENTAL)"
6 depends on IP_DCCP
7 def_tristate IP_DCCP 6 def_tristate IP_DCCP
8 select IP_DCCP_ACKVEC 7 select IP_DCCP_ACKVEC
9 ---help--- 8 ---help---
@@ -20,18 +19,9 @@ config IP_DCCP_CCID2
20 to the user. For example, a hypothetical application that 19 to the user. For example, a hypothetical application that
21 transferred files over DCCP, using application-level retransmissions 20 transferred files over DCCP, using application-level retransmissions
22 for lost packets, would prefer CCID 2 to CCID 3. On-line games may 21 for lost packets, would prefer CCID 2 to CCID 3. On-line games may
23 also prefer CCID 2. 22 also prefer CCID 2. See RFC 4341 for further details.
24 23
25 CCID 2 is further described in RFC 4341, 24 CCID2 is the default CCID used by DCCP.
26 http://www.ietf.org/rfc/rfc4341.txt
27
28 This text was extracted from RFC 4340 (sec. 10.1),
29 http://www.ietf.org/rfc/rfc4340.txt
30
31 To compile this CCID as a module, choose M here: the module will be
32 called dccp_ccid2.
33
34 If in doubt, say M.
35 25
36config IP_DCCP_CCID2_DEBUG 26config IP_DCCP_CCID2_DEBUG
37 bool "CCID2 debugging messages" 27 bool "CCID2 debugging messages"
@@ -47,8 +37,8 @@ config IP_DCCP_CCID2_DEBUG
47 37
48config IP_DCCP_CCID3 38config IP_DCCP_CCID3
49 tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)" 39 tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)"
50 depends on IP_DCCP
51 def_tristate IP_DCCP 40 def_tristate IP_DCCP
41 select IP_DCCP_TFRC_LIB
52 ---help--- 42 ---help---
53 CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based 43 CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
54 rate-controlled congestion control mechanism. TFRC is designed to 44 rate-controlled congestion control mechanism. TFRC is designed to
@@ -74,10 +64,6 @@ config IP_DCCP_CCID3
74 64
75 If in doubt, say M. 65 If in doubt, say M.
76 66
77config IP_DCCP_TFRC_LIB
78 depends on IP_DCCP_CCID3
79 def_tristate IP_DCCP_CCID3
80
81config IP_DCCP_CCID3_DEBUG 67config IP_DCCP_CCID3_DEBUG
82 bool "CCID3 debugging messages" 68 bool "CCID3 debugging messages"
83 depends on IP_DCCP_CCID3 69 depends on IP_DCCP_CCID3
@@ -121,5 +107,13 @@ config IP_DCCP_CCID3_RTO
121 is serious network congestion: experimenting with larger values should 107 is serious network congestion: experimenting with larger values should
122 therefore not be performed on WANs. 108 therefore not be performed on WANs.
123 109
110config IP_DCCP_TFRC_LIB
111 tristate
112 default n
113
114config IP_DCCP_TFRC_DEBUG
115 bool
116 depends on IP_DCCP_TFRC_LIB
117 default y if IP_DCCP_CCID3_DEBUG
124 118
125endmenu 119endmenu
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index d694656b8800..b5b52ebb2693 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -24,9 +24,6 @@
24 24
25/* 25/*
26 * This implementation should follow RFC 4341 26 * This implementation should follow RFC 4341
27 *
28 * BUGS:
29 * - sequence number wrapping
30 */ 27 */
31 28
32#include "../ccid.h" 29#include "../ccid.h"
@@ -129,50 +126,35 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
129{ 126{
130 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); 127 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
131 128
132 ccid2_pr_debug("pipe=%d cwnd=%d\n", hctx->ccid2hctx_pipe, 129 if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd)
133 hctx->ccid2hctx_cwnd); 130 return 0;
134
135 if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd) {
136 /* OK we can send... make sure previous packet was sent off */
137 if (!hctx->ccid2hctx_sendwait) {
138 hctx->ccid2hctx_sendwait = 1;
139 return 0;
140 }
141 }
142 131
143 return 1; /* XXX CCID should dequeue when ready instead of polling */ 132 return 1; /* XXX CCID should dequeue when ready instead of polling */
144} 133}
145 134
146static void ccid2_change_l_ack_ratio(struct sock *sk, int val) 135static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
147{ 136{
148 struct dccp_sock *dp = dccp_sk(sk); 137 struct dccp_sock *dp = dccp_sk(sk);
138 u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->ccid2hctx_cwnd, 2);
139
149 /* 140 /*
150 * XXX I don't really agree with val != 2. If cwnd is 1, ack ratio 141 * Ensure that Ack Ratio does not exceed ceil(cwnd/2), which is (2) from
151 * should be 1... it shouldn't be allowed to become 2. 142 * RFC 4341, 6.1.2. We ignore the statement that Ack Ratio 2 is always
152 * -sorbo. 143 * acceptable since this causes starvation/deadlock whenever cwnd < 2.
144 * The same problem arises when Ack Ratio is 0 (ie. Ack Ratio disabled).
153 */ 145 */
154 if (val != 2) { 146 if (val == 0 || val > max_ratio) {
155 const struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); 147 DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio);
156 int max = hctx->ccid2hctx_cwnd / 2; 148 val = max_ratio;
157
158 /* round up */
159 if (hctx->ccid2hctx_cwnd & 1)
160 max++;
161
162 if (val > max)
163 val = max;
164 } 149 }
150 if (val > 0xFFFF) /* RFC 4340, 11.3 */
151 val = 0xFFFF;
165 152
166 ccid2_pr_debug("changing local ack ratio to %d\n", val); 153 if (val == dp->dccps_l_ack_ratio)
167 WARN_ON(val <= 0); 154 return;
168 dp->dccps_l_ack_ratio = val;
169}
170 155
171static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, u32 val) 156 ccid2_pr_debug("changing local ack ratio to %u\n", val);
172{ 157 dp->dccps_l_ack_ratio = val;
173 /* XXX do we need to change ack ratio? */
174 hctx->ccid2hctx_cwnd = val? : 1;
175 ccid2_pr_debug("changed cwnd to %u\n", hctx->ccid2hctx_cwnd);
176} 158}
177 159
178static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val) 160static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val)
@@ -181,11 +163,6 @@ static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val)
181 hctx->ccid2hctx_srtt = val; 163 hctx->ccid2hctx_srtt = val;
182} 164}
183 165
184static void ccid2_change_pipe(struct ccid2_hc_tx_sock *hctx, long val)
185{
186 hctx->ccid2hctx_pipe = val;
187}
188
189static void ccid2_start_rto_timer(struct sock *sk); 166static void ccid2_start_rto_timer(struct sock *sk);
190 167
191static void ccid2_hc_tx_rto_expire(unsigned long data) 168static void ccid2_hc_tx_rto_expire(unsigned long data)
@@ -215,21 +192,17 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
215 ccid2_start_rto_timer(sk); 192 ccid2_start_rto_timer(sk);
216 193
217 /* adjust pipe, cwnd etc */ 194 /* adjust pipe, cwnd etc */
218 ccid2_change_pipe(hctx, 0); 195 hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd / 2;
219 hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd >> 1;
220 if (hctx->ccid2hctx_ssthresh < 2) 196 if (hctx->ccid2hctx_ssthresh < 2)
221 hctx->ccid2hctx_ssthresh = 2; 197 hctx->ccid2hctx_ssthresh = 2;
222 ccid2_change_cwnd(hctx, 1); 198 hctx->ccid2hctx_cwnd = 1;
199 hctx->ccid2hctx_pipe = 0;
223 200
224 /* clear state about stuff we sent */ 201 /* clear state about stuff we sent */
225 hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh; 202 hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh;
226 hctx->ccid2hctx_ssacks = 0; 203 hctx->ccid2hctx_packets_acked = 0;
227 hctx->ccid2hctx_acks = 0;
228 hctx->ccid2hctx_sent = 0;
229 204
230 /* clear ack ratio state. */ 205 /* clear ack ratio state. */
231 hctx->ccid2hctx_arsent = 0;
232 hctx->ccid2hctx_ackloss = 0;
233 hctx->ccid2hctx_rpseq = 0; 206 hctx->ccid2hctx_rpseq = 0;
234 hctx->ccid2hctx_rpdupack = -1; 207 hctx->ccid2hctx_rpdupack = -1;
235 ccid2_change_l_ack_ratio(sk, 1); 208 ccid2_change_l_ack_ratio(sk, 1);
@@ -255,23 +228,10 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
255 struct dccp_sock *dp = dccp_sk(sk); 228 struct dccp_sock *dp = dccp_sk(sk);
256 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); 229 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
257 struct ccid2_seq *next; 230 struct ccid2_seq *next;
258 u64 seq;
259
260 ccid2_hc_tx_check_sanity(hctx);
261 231
262 BUG_ON(!hctx->ccid2hctx_sendwait); 232 hctx->ccid2hctx_pipe++;
263 hctx->ccid2hctx_sendwait = 0;
264 ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe + 1);
265 BUG_ON(hctx->ccid2hctx_pipe < 0);
266 233
267 /* There is an issue. What if another packet is sent between 234 hctx->ccid2hctx_seqh->ccid2s_seq = dp->dccps_gss;
268 * packet_send() and packet_sent(). Then the sequence number would be
269 * wrong.
270 * -sorbo.
271 */
272 seq = dp->dccps_gss;
273
274 hctx->ccid2hctx_seqh->ccid2s_seq = seq;
275 hctx->ccid2hctx_seqh->ccid2s_acked = 0; 235 hctx->ccid2hctx_seqh->ccid2s_acked = 0;
276 hctx->ccid2hctx_seqh->ccid2s_sent = jiffies; 236 hctx->ccid2hctx_seqh->ccid2s_sent = jiffies;
277 237
@@ -291,8 +251,26 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
291 ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd, 251 ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd,
292 hctx->ccid2hctx_pipe); 252 hctx->ccid2hctx_pipe);
293 253
294 hctx->ccid2hctx_sent++; 254 /*
295 255 * FIXME: The code below is broken and the variables have been removed
256 * from the socket struct. The `ackloss' variable was always set to 0,
257 * and with arsent there are several problems:
258 * (i) it doesn't just count the number of Acks, but all sent packets;
259 * (ii) it is expressed in # of packets, not # of windows, so the
260 * comparison below uses the wrong formula: Appendix A of RFC 4341
261 * comes up with the number K = cwnd / (R^2 - R) of consecutive windows
262 * of data with no lost or marked Ack packets. If arsent were the # of
263 * consecutive Acks received without loss, then Ack Ratio needs to be
264 * decreased by 1 when
265 * arsent >= K * cwnd / R = cwnd^2 / (R^3 - R^2)
266 * where cwnd / R is the number of Acks received per window of data
267 * (cf. RFC 4341, App. A). The problems are that
268 * - arsent counts other packets as well;
269 * - the comparison uses a formula different from RFC 4341;
270 * - computing a cubic/quadratic equation each time is too complicated.
271 * Hence a different algorithm is needed.
272 */
273#if 0
296 /* Ack Ratio. Need to maintain a concept of how many windows we sent */ 274 /* Ack Ratio. Need to maintain a concept of how many windows we sent */
297 hctx->ccid2hctx_arsent++; 275 hctx->ccid2hctx_arsent++;
298 /* We had an ack loss in this window... */ 276 /* We had an ack loss in this window... */
@@ -320,14 +298,13 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
320 hctx->ccid2hctx_arsent = 0; /* or maybe set it to cwnd*/ 298 hctx->ccid2hctx_arsent = 0; /* or maybe set it to cwnd*/
321 } 299 }
322 } 300 }
301#endif
323 302
324 /* setup RTO timer */ 303 /* setup RTO timer */
325 if (!timer_pending(&hctx->ccid2hctx_rtotimer)) 304 if (!timer_pending(&hctx->ccid2hctx_rtotimer))
326 ccid2_start_rto_timer(sk); 305 ccid2_start_rto_timer(sk);
327 306
328#ifdef CONFIG_IP_DCCP_CCID2_DEBUG 307#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
329 ccid2_pr_debug("pipe=%d\n", hctx->ccid2hctx_pipe);
330 ccid2_pr_debug("Sent: seq=%llu\n", (unsigned long long)seq);
331 do { 308 do {
332 struct ccid2_seq *seqp = hctx->ccid2hctx_seqt; 309 struct ccid2_seq *seqp = hctx->ccid2hctx_seqt;
333 310
@@ -419,31 +396,15 @@ static inline void ccid2_new_ack(struct sock *sk,
419{ 396{
420 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); 397 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
421 398
422 /* slow start */
423 if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) { 399 if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) {
424 hctx->ccid2hctx_acks = 0; 400 if (*maxincr > 0 && ++hctx->ccid2hctx_packets_acked == 2) {
425 401 hctx->ccid2hctx_cwnd += 1;
426 /* We can increase cwnd at most maxincr [ack_ratio/2] */ 402 *maxincr -= 1;
427 if (*maxincr) { 403 hctx->ccid2hctx_packets_acked = 0;
428 /* increase every 2 acks */
429 hctx->ccid2hctx_ssacks++;
430 if (hctx->ccid2hctx_ssacks == 2) {
431 ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd+1);
432 hctx->ccid2hctx_ssacks = 0;
433 *maxincr = *maxincr - 1;
434 }
435 } else {
436 /* increased cwnd enough for this single ack */
437 hctx->ccid2hctx_ssacks = 0;
438 }
439 } else {
440 hctx->ccid2hctx_ssacks = 0;
441 hctx->ccid2hctx_acks++;
442
443 if (hctx->ccid2hctx_acks >= hctx->ccid2hctx_cwnd) {
444 ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd + 1);
445 hctx->ccid2hctx_acks = 0;
446 } 404 }
405 } else if (++hctx->ccid2hctx_packets_acked >= hctx->ccid2hctx_cwnd) {
406 hctx->ccid2hctx_cwnd += 1;
407 hctx->ccid2hctx_packets_acked = 0;
447 } 408 }
448 409
449 /* update RTO */ 410 /* update RTO */
@@ -502,7 +463,6 @@ static inline void ccid2_new_ack(struct sock *sk,
502 ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n", 463 ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
503 hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar, 464 hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar,
504 hctx->ccid2hctx_rto, HZ, r); 465 hctx->ccid2hctx_rto, HZ, r);
505 hctx->ccid2hctx_sent = 0;
506 } 466 }
507 467
508 /* we got a new ack, so re-start RTO timer */ 468 /* we got a new ack, so re-start RTO timer */
@@ -514,16 +474,19 @@ static void ccid2_hc_tx_dec_pipe(struct sock *sk)
514{ 474{
515 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); 475 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
516 476
517 ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe-1); 477 if (hctx->ccid2hctx_pipe == 0)
518 BUG_ON(hctx->ccid2hctx_pipe < 0); 478 DCCP_BUG("pipe == 0");
479 else
480 hctx->ccid2hctx_pipe--;
519 481
520 if (hctx->ccid2hctx_pipe == 0) 482 if (hctx->ccid2hctx_pipe == 0)
521 ccid2_hc_tx_kill_rto_timer(sk); 483 ccid2_hc_tx_kill_rto_timer(sk);
522} 484}
523 485
524static void ccid2_congestion_event(struct ccid2_hc_tx_sock *hctx, 486static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
525 struct ccid2_seq *seqp)
526{ 487{
488 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
489
527 if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) { 490 if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) {
528 ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); 491 ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
529 return; 492 return;
@@ -531,10 +494,12 @@ static void ccid2_congestion_event(struct ccid2_hc_tx_sock *hctx,
531 494
532 hctx->ccid2hctx_last_cong = jiffies; 495 hctx->ccid2hctx_last_cong = jiffies;
533 496
534 ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd >> 1); 497 hctx->ccid2hctx_cwnd = hctx->ccid2hctx_cwnd / 2 ? : 1U;
535 hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd; 498 hctx->ccid2hctx_ssthresh = max(hctx->ccid2hctx_cwnd, 2U);
536 if (hctx->ccid2hctx_ssthresh < 2) 499
537 hctx->ccid2hctx_ssthresh = 2; 500 /* Avoid spurious timeouts resulting from Ack Ratio > cwnd */
501 if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->ccid2hctx_cwnd)
502 ccid2_change_l_ack_ratio(sk, hctx->ccid2hctx_cwnd);
538} 503}
539 504
540static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) 505static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
@@ -570,12 +535,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
570 hctx->ccid2hctx_rpdupack++; 535 hctx->ccid2hctx_rpdupack++;
571 536
572 /* check if we got enough dupacks */ 537 /* check if we got enough dupacks */
573 if (hctx->ccid2hctx_rpdupack >= 538 if (hctx->ccid2hctx_rpdupack >= NUMDUPACK) {
574 hctx->ccid2hctx_numdupack) {
575 hctx->ccid2hctx_rpdupack = -1; /* XXX lame */ 539 hctx->ccid2hctx_rpdupack = -1; /* XXX lame */
576 hctx->ccid2hctx_rpseq = 0; 540 hctx->ccid2hctx_rpseq = 0;
577 541
578 ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio << 1); 542 ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio);
579 } 543 }
580 } 544 }
581 } 545 }
@@ -606,12 +570,13 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
606 } 570 }
607 } 571 }
608 572
609 /* If in slow-start, cwnd can increase at most Ack Ratio / 2 packets for 573 /*
610 * this single ack. I round up. 574 * In slow-start, cwnd can increase up to a maximum of Ack Ratio/2
611 * -sorbo. 575 * packets per acknowledgement. Rounding up avoids that cwnd is not
576 * advanced when Ack Ratio is 1 and gives a slight edge otherwise.
612 */ 577 */
613 maxincr = dp->dccps_l_ack_ratio >> 1; 578 if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh)
614 maxincr++; 579 maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2);
615 580
616 /* go through all ack vectors */ 581 /* go through all ack vectors */
617 while ((offset = ccid2_ackvector(sk, skb, offset, 582 while ((offset = ccid2_ackvector(sk, skb, offset,
@@ -619,9 +584,8 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
619 /* go through this ack vector */ 584 /* go through this ack vector */
620 while (veclen--) { 585 while (veclen--) {
621 const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; 586 const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
622 u64 ackno_end_rl; 587 u64 ackno_end_rl = SUB48(ackno, rl);
623 588
624 dccp_set_seqno(&ackno_end_rl, ackno - rl);
625 ccid2_pr_debug("ackvec start:%llu end:%llu\n", 589 ccid2_pr_debug("ackvec start:%llu end:%llu\n",
626 (unsigned long long)ackno, 590 (unsigned long long)ackno,
627 (unsigned long long)ackno_end_rl); 591 (unsigned long long)ackno_end_rl);
@@ -651,7 +615,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
651 !seqp->ccid2s_acked) { 615 !seqp->ccid2s_acked) {
652 if (state == 616 if (state ==
653 DCCP_ACKVEC_STATE_ECN_MARKED) { 617 DCCP_ACKVEC_STATE_ECN_MARKED) {
654 ccid2_congestion_event(hctx, 618 ccid2_congestion_event(sk,
655 seqp); 619 seqp);
656 } else 620 } else
657 ccid2_new_ack(sk, seqp, 621 ccid2_new_ack(sk, seqp,
@@ -666,13 +630,12 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
666 done = 1; 630 done = 1;
667 break; 631 break;
668 } 632 }
669 seqp = seqp->ccid2s_next; 633 seqp = seqp->ccid2s_prev;
670 } 634 }
671 if (done) 635 if (done)
672 break; 636 break;
673 637
674 638 ackno = SUB48(ackno_end_rl, 1);
675 dccp_set_seqno(&ackno, ackno_end_rl - 1);
676 vector++; 639 vector++;
677 } 640 }
678 if (done) 641 if (done)
@@ -694,7 +657,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
694 while (1) { 657 while (1) {
695 if (seqp->ccid2s_acked) { 658 if (seqp->ccid2s_acked) {
696 done++; 659 done++;
697 if (done == hctx->ccid2hctx_numdupack) 660 if (done == NUMDUPACK)
698 break; 661 break;
699 } 662 }
700 if (seqp == hctx->ccid2hctx_seqt) 663 if (seqp == hctx->ccid2hctx_seqt)
@@ -705,7 +668,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
705 /* If there are at least 3 acknowledgements, anything unacknowledged 668 /* If there are at least 3 acknowledgements, anything unacknowledged
706 * below the last sequence number is considered lost 669 * below the last sequence number is considered lost
707 */ 670 */
708 if (done == hctx->ccid2hctx_numdupack) { 671 if (done == NUMDUPACK) {
709 struct ccid2_seq *last_acked = seqp; 672 struct ccid2_seq *last_acked = seqp;
710 673
711 /* check for lost packets */ 674 /* check for lost packets */
@@ -717,7 +680,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
717 * order to detect multiple congestion events in 680 * order to detect multiple congestion events in
718 * one ack vector. 681 * one ack vector.
719 */ 682 */
720 ccid2_congestion_event(hctx, seqp); 683 ccid2_congestion_event(sk, seqp);
721 ccid2_hc_tx_dec_pipe(sk); 684 ccid2_hc_tx_dec_pipe(sk);
722 } 685 }
723 if (seqp == hctx->ccid2hctx_seqt) 686 if (seqp == hctx->ccid2hctx_seqt)
@@ -742,14 +705,23 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
742static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) 705static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
743{ 706{
744 struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid); 707 struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid);
708 struct dccp_sock *dp = dccp_sk(sk);
709 u32 max_ratio;
710
711 /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
712 hctx->ccid2hctx_ssthresh = ~0U;
745 713
746 ccid2_change_cwnd(hctx, 1); 714 /*
747 /* Initialize ssthresh to infinity. This means that we will exit the 715 * RFC 4341, 5: "The cwnd parameter is initialized to at most four
748 * initial slow-start after the first packet loss. This is what we 716 * packets for new connections, following the rules from [RFC3390]".
749 * want. 717 * We need to convert the bytes of RFC3390 into the packets of RFC 4341.
750 */ 718 */
751 hctx->ccid2hctx_ssthresh = ~0; 719 hctx->ccid2hctx_cwnd = min(4U, max(2U, 4380U / dp->dccps_mss_cache));
752 hctx->ccid2hctx_numdupack = 3; 720
721 /* Make sure that Ack Ratio is enabled and within bounds. */
722 max_ratio = DIV_ROUND_UP(hctx->ccid2hctx_cwnd, 2);
723 if (dp->dccps_l_ack_ratio == 0 || dp->dccps_l_ack_ratio > max_ratio)
724 dp->dccps_l_ack_ratio = max_ratio;
753 725
754 /* XXX init ~ to window size... */ 726 /* XXX init ~ to window size... */
755 if (ccid2_hc_tx_alloc_seq(hctx)) 727 if (ccid2_hc_tx_alloc_seq(hctx))
@@ -760,10 +732,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
760 hctx->ccid2hctx_rttvar = -1; 732 hctx->ccid2hctx_rttvar = -1;
761 hctx->ccid2hctx_rpdupack = -1; 733 hctx->ccid2hctx_rpdupack = -1;
762 hctx->ccid2hctx_last_cong = jiffies; 734 hctx->ccid2hctx_last_cong = jiffies;
763 735 setup_timer(&hctx->ccid2hctx_rtotimer, ccid2_hc_tx_rto_expire,
764 hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire; 736 (unsigned long)sk);
765 hctx->ccid2hctx_rtotimer.data = (unsigned long)sk;
766 init_timer(&hctx->ccid2hctx_rtotimer);
767 737
768 ccid2_hc_tx_check_sanity(hctx); 738 ccid2_hc_tx_check_sanity(hctx);
769 return 0; 739 return 0;
@@ -800,7 +770,7 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
800 770
801static struct ccid_operations ccid2 = { 771static struct ccid_operations ccid2 = {
802 .ccid_id = DCCPC_CCID2, 772 .ccid_id = DCCPC_CCID2,
803 .ccid_name = "ccid2", 773 .ccid_name = "TCP-like",
804 .ccid_owner = THIS_MODULE, 774 .ccid_owner = THIS_MODULE,
805 .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), 775 .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
806 .ccid_hc_tx_init = ccid2_hc_tx_init, 776 .ccid_hc_tx_init = ccid2_hc_tx_init,
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index d9daa534c9be..2c94ca029010 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -24,6 +24,8 @@
24#include <linux/timer.h> 24#include <linux/timer.h>
25#include <linux/types.h> 25#include <linux/types.h>
26#include "../ccid.h" 26#include "../ccid.h"
27/* NUMDUPACK parameter from RFC 4341, p. 6 */
28#define NUMDUPACK 3
27 29
28struct sock; 30struct sock;
29 31
@@ -40,22 +42,17 @@ struct ccid2_seq {
40 42
41/** struct ccid2_hc_tx_sock - CCID2 TX half connection 43/** struct ccid2_hc_tx_sock - CCID2 TX half connection
42 * 44 *
43 * @ccid2hctx_ssacks - ACKs recv in slow start 45 * @ccid2hctx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
44 * @ccid2hctx_acks - ACKS recv in AI phase 46 * @ccid2hctx_packets_acked - Ack counter for deriving cwnd growth (RFC 3465)
45 * @ccid2hctx_sent - packets sent in this window
46 * @ccid2hctx_lastrtt -time RTT was last measured 47 * @ccid2hctx_lastrtt -time RTT was last measured
47 * @ccid2hctx_arsent - packets sent [ack ratio]
48 * @ccid2hctx_ackloss - ack was lost in this win
49 * @ccid2hctx_rpseq - last consecutive seqno 48 * @ccid2hctx_rpseq - last consecutive seqno
50 * @ccid2hctx_rpdupack - dupacks since rpseq 49 * @ccid2hctx_rpdupack - dupacks since rpseq
51*/ 50*/
52struct ccid2_hc_tx_sock { 51struct ccid2_hc_tx_sock {
53 u32 ccid2hctx_cwnd; 52 u32 ccid2hctx_cwnd;
54 int ccid2hctx_ssacks; 53 u32 ccid2hctx_ssthresh;
55 int ccid2hctx_acks; 54 u32 ccid2hctx_pipe;
56 unsigned int ccid2hctx_ssthresh; 55 u32 ccid2hctx_packets_acked;
57 int ccid2hctx_pipe;
58 int ccid2hctx_numdupack;
59 struct ccid2_seq *ccid2hctx_seqbuf[CCID2_SEQBUF_MAX]; 56 struct ccid2_seq *ccid2hctx_seqbuf[CCID2_SEQBUF_MAX];
60 int ccid2hctx_seqbufc; 57 int ccid2hctx_seqbufc;
61 struct ccid2_seq *ccid2hctx_seqh; 58 struct ccid2_seq *ccid2hctx_seqh;
@@ -63,14 +60,10 @@ struct ccid2_hc_tx_sock {
63 long ccid2hctx_rto; 60 long ccid2hctx_rto;
64 long ccid2hctx_srtt; 61 long ccid2hctx_srtt;
65 long ccid2hctx_rttvar; 62 long ccid2hctx_rttvar;
66 int ccid2hctx_sent;
67 unsigned long ccid2hctx_lastrtt; 63 unsigned long ccid2hctx_lastrtt;
68 struct timer_list ccid2hctx_rtotimer; 64 struct timer_list ccid2hctx_rtotimer;
69 unsigned long ccid2hctx_arsent;
70 int ccid2hctx_ackloss;
71 u64 ccid2hctx_rpseq; 65 u64 ccid2hctx_rpseq;
72 int ccid2hctx_rpdupack; 66 int ccid2hctx_rpdupack;
73 int ccid2hctx_sendwait;
74 unsigned long ccid2hctx_last_cong; 67 unsigned long ccid2hctx_last_cong;
75 u64 ccid2hctx_high_ack; 68 u64 ccid2hctx_high_ack;
76}; 69};
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index d133416d3970..e76f460af0ea 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * net/dccp/ccids/ccid3.c 2 * net/dccp/ccids/ccid3.c
3 * 3 *
4 * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
4 * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. 5 * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
5 * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz> 6 * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
6 * 7 *
@@ -33,11 +34,7 @@
33 * along with this program; if not, write to the Free Software 34 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 35 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 */ 36 */
36#include "../ccid.h"
37#include "../dccp.h" 37#include "../dccp.h"
38#include "lib/packet_history.h"
39#include "lib/loss_interval.h"
40#include "lib/tfrc.h"
41#include "ccid3.h" 38#include "ccid3.h"
42 39
43#include <asm/unaligned.h> 40#include <asm/unaligned.h>
@@ -49,9 +46,6 @@ static int ccid3_debug;
49#define ccid3_pr_debug(format, a...) 46#define ccid3_pr_debug(format, a...)
50#endif 47#endif
51 48
52static struct dccp_tx_hist *ccid3_tx_hist;
53static struct dccp_rx_hist *ccid3_rx_hist;
54
55/* 49/*
56 * Transmitter Half-Connection Routines 50 * Transmitter Half-Connection Routines
57 */ 51 */
@@ -83,24 +77,27 @@ static void ccid3_hc_tx_set_state(struct sock *sk,
83} 77}
84 78
85/* 79/*
86 * Compute the initial sending rate X_init according to RFC 3390: 80 * Compute the initial sending rate X_init in the manner of RFC 3390:
87 * w_init = min(4 * MSS, max(2 * MSS, 4380 bytes)) 81 *
88 * X_init = w_init / RTT 82 * X_init = min(4 * s, max(2 * s, 4380 bytes)) / RTT
83 *
84 * Note that RFC 3390 uses MSS, RFC 4342 refers to RFC 3390, and rfc3448bis
85 * (rev-02) clarifies the use of RFC 3390 with regard to the above formula.
89 * For consistency with other parts of the code, X_init is scaled by 2^6. 86 * For consistency with other parts of the code, X_init is scaled by 2^6.
90 */ 87 */
91static inline u64 rfc3390_initial_rate(struct sock *sk) 88static inline u64 rfc3390_initial_rate(struct sock *sk)
92{ 89{
93 const struct dccp_sock *dp = dccp_sk(sk); 90 const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
94 const __u32 w_init = min(4 * dp->dccps_mss_cache, 91 const __u32 w_init = min_t(__u32, 4 * hctx->ccid3hctx_s,
95 max(2 * dp->dccps_mss_cache, 4380U)); 92 max_t(__u32, 2 * hctx->ccid3hctx_s, 4380));
96 93
97 return scaled_div(w_init << 6, ccid3_hc_tx_sk(sk)->ccid3hctx_rtt); 94 return scaled_div(w_init << 6, hctx->ccid3hctx_rtt);
98} 95}
99 96
100/* 97/*
101 * Recalculate t_ipi and delta (should be called whenever X changes) 98 * Recalculate t_ipi and delta (should be called whenever X changes)
102 */ 99 */
103static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx) 100static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
104{ 101{
105 /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */ 102 /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
106 hctx->ccid3hctx_t_ipi = scaled_div32(((u64)hctx->ccid3hctx_s) << 6, 103 hctx->ccid3hctx_t_ipi = scaled_div32(((u64)hctx->ccid3hctx_s) << 6,
@@ -116,6 +113,13 @@ static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
116 113
117} 114}
118 115
116static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hctx, ktime_t now)
117{
118 u32 delta = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count);
119
120 return delta / hctx->ccid3hctx_rtt;
121}
122
119/** 123/**
120 * ccid3_hc_tx_update_x - Update allowed sending rate X 124 * ccid3_hc_tx_update_x - Update allowed sending rate X
121 * @stamp: most recent time if available - can be left NULL. 125 * @stamp: most recent time if available - can be left NULL.
@@ -127,19 +131,19 @@ static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
127 * 131 *
128 */ 132 */
129static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) 133static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp)
130
131{ 134{
132 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); 135 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
133 __u64 min_rate = 2 * hctx->ccid3hctx_x_recv; 136 __u64 min_rate = 2 * hctx->ccid3hctx_x_recv;
134 const __u64 old_x = hctx->ccid3hctx_x; 137 const __u64 old_x = hctx->ccid3hctx_x;
135 ktime_t now = stamp? *stamp : ktime_get_real(); 138 ktime_t now = stamp ? *stamp : ktime_get_real();
136 139
137 /* 140 /*
138 * Handle IDLE periods: do not reduce below RFC3390 initial sending rate 141 * Handle IDLE periods: do not reduce below RFC3390 initial sending rate
139 * when idling [RFC 4342, 5.1]. See also draft-ietf-dccp-rfc3448bis. 142 * when idling [RFC 4342, 5.1]. Definition of idling is from rfc3448bis:
143 * a sender is idle if it has not sent anything over a 2-RTT-period.
140 * For consistency with X and X_recv, min_rate is also scaled by 2^6. 144 * For consistency with X and X_recv, min_rate is also scaled by 2^6.
141 */ 145 */
142 if (unlikely(hctx->ccid3hctx_idle)) { 146 if (ccid3_hc_tx_idle_rtt(hctx, now) >= 2) {
143 min_rate = rfc3390_initial_rate(sk); 147 min_rate = rfc3390_initial_rate(sk);
144 min_rate = max(min_rate, 2 * hctx->ccid3hctx_x_recv); 148 min_rate = max(min_rate, 2 * hctx->ccid3hctx_x_recv);
145 } 149 }
@@ -181,7 +185,7 @@ static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len)
181{ 185{
182 const u16 old_s = hctx->ccid3hctx_s; 186 const u16 old_s = hctx->ccid3hctx_s;
183 187
184 hctx->ccid3hctx_s = old_s == 0 ? len : (9 * old_s + len) / 10; 188 hctx->ccid3hctx_s = tfrc_ewma(hctx->ccid3hctx_s, len, 9);
185 189
186 if (hctx->ccid3hctx_s != old_s) 190 if (hctx->ccid3hctx_s != old_s)
187 ccid3_update_send_interval(hctx); 191 ccid3_update_send_interval(hctx);
@@ -225,29 +229,27 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
225 ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk, 229 ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk,
226 ccid3_tx_state_name(hctx->ccid3hctx_state)); 230 ccid3_tx_state_name(hctx->ccid3hctx_state));
227 231
228 hctx->ccid3hctx_idle = 1; 232 if (hctx->ccid3hctx_state == TFRC_SSTATE_FBACK)
233 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
234 else if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK)
235 goto out;
229 236
230 switch (hctx->ccid3hctx_state) { 237 /*
231 case TFRC_SSTATE_NO_FBACK: 238 * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4
232 /* RFC 3448, 4.4: Halve send rate directly */ 239 */
240 if (hctx->ccid3hctx_t_rto == 0 || /* no feedback received yet */
241 hctx->ccid3hctx_p == 0) {
242
243 /* halve send rate directly */
233 hctx->ccid3hctx_x = max(hctx->ccid3hctx_x / 2, 244 hctx->ccid3hctx_x = max(hctx->ccid3hctx_x / 2,
234 (((__u64)hctx->ccid3hctx_s) << 6) / 245 (((__u64)hctx->ccid3hctx_s) << 6) /
235 TFRC_T_MBI); 246 TFRC_T_MBI);
236
237 ccid3_pr_debug("%s(%p, state=%s), updated tx rate to %u "
238 "bytes/s\n", dccp_role(sk), sk,
239 ccid3_tx_state_name(hctx->ccid3hctx_state),
240 (unsigned)(hctx->ccid3hctx_x >> 6));
241 /* The value of R is still undefined and so we can not recompute
242 * the timeout value. Keep initial value as per [RFC 4342, 5]. */
243 t_nfb = TFRC_INITIAL_TIMEOUT;
244 ccid3_update_send_interval(hctx); 247 ccid3_update_send_interval(hctx);
245 break; 248 } else {
246 case TFRC_SSTATE_FBACK:
247 /* 249 /*
248 * Modify the cached value of X_recv [RFC 3448, 4.4] 250 * Modify the cached value of X_recv
249 * 251 *
250 * If (p == 0 || X_calc > 2 * X_recv) 252 * If (X_calc > 2 * X_recv)
251 * X_recv = max(X_recv / 2, s / (2 * t_mbi)); 253 * X_recv = max(X_recv / 2, s / (2 * t_mbi));
252 * Else 254 * Else
253 * X_recv = X_calc / 4; 255 * X_recv = X_calc / 4;
@@ -256,32 +258,28 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
256 */ 258 */
257 BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc); 259 BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
258 260
259 if (hctx->ccid3hctx_p == 0 || 261 if (hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5))
260 (hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5))) {
261
262 hctx->ccid3hctx_x_recv = 262 hctx->ccid3hctx_x_recv =
263 max(hctx->ccid3hctx_x_recv / 2, 263 max(hctx->ccid3hctx_x_recv / 2,
264 (((__u64)hctx->ccid3hctx_s) << 6) / 264 (((__u64)hctx->ccid3hctx_s) << 6) /
265 (2 * TFRC_T_MBI)); 265 (2 * TFRC_T_MBI));
266 } else { 266 else {
267 hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc; 267 hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc;
268 hctx->ccid3hctx_x_recv <<= 4; 268 hctx->ccid3hctx_x_recv <<= 4;
269 } 269 }
270 /* Now recalculate X [RFC 3448, 4.3, step (4)] */
271 ccid3_hc_tx_update_x(sk, NULL); 270 ccid3_hc_tx_update_x(sk, NULL);
272 /*
273 * Schedule no feedback timer to expire in
274 * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
275 * See comments in packet_recv() regarding the value of t_RTO.
276 */
277 t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
278 break;
279 case TFRC_SSTATE_NO_SENT:
280 DCCP_BUG("%s(%p) - Illegal state NO_SENT", dccp_role(sk), sk);
281 /* fall through */
282 case TFRC_SSTATE_TERM:
283 goto out;
284 } 271 }
272 ccid3_pr_debug("Reduced X to %llu/64 bytes/sec\n",
273 (unsigned long long)hctx->ccid3hctx_x);
274
275 /*
276 * Set new timeout for the nofeedback timer.
277 * See comments in packet_recv() regarding the value of t_RTO.
278 */
279 if (unlikely(hctx->ccid3hctx_t_rto == 0)) /* no feedback yet */
280 t_nfb = TFRC_INITIAL_TIMEOUT;
281 else
282 t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
285 283
286restart_timer: 284restart_timer:
287 sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, 285 sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
@@ -336,8 +334,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
336 hctx->ccid3hctx_x = rfc3390_initial_rate(sk); 334 hctx->ccid3hctx_x = rfc3390_initial_rate(sk);
337 hctx->ccid3hctx_t_ld = now; 335 hctx->ccid3hctx_t_ld = now;
338 } else { 336 } else {
339 /* Sender does not have RTT sample: X = MSS/second */ 337 /* Sender does not have RTT sample: X_pps = 1 pkt/sec */
340 hctx->ccid3hctx_x = dp->dccps_mss_cache; 338 hctx->ccid3hctx_x = hctx->ccid3hctx_s;
341 hctx->ccid3hctx_x <<= 6; 339 hctx->ccid3hctx_x <<= 6;
342 } 340 }
343 ccid3_update_send_interval(hctx); 341 ccid3_update_send_interval(hctx);
@@ -369,7 +367,6 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
369 /* prepare to send now (add options etc.) */ 367 /* prepare to send now (add options etc.) */
370 dp->dccps_hc_tx_insert_options = 1; 368 dp->dccps_hc_tx_insert_options = 1;
371 DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; 369 DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
372 hctx->ccid3hctx_idle = 0;
373 370
374 /* set the nominal send time for the next following packet */ 371 /* set the nominal send time for the next following packet */
375 hctx->ccid3hctx_t_nom = ktime_add_us(hctx->ccid3hctx_t_nom, 372 hctx->ccid3hctx_t_nom = ktime_add_us(hctx->ccid3hctx_t_nom,
@@ -381,28 +378,17 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
381 unsigned int len) 378 unsigned int len)
382{ 379{
383 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); 380 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
384 struct dccp_tx_hist_entry *packet;
385 381
386 ccid3_hc_tx_update_s(hctx, len); 382 ccid3_hc_tx_update_s(hctx, len);
387 383
388 packet = dccp_tx_hist_entry_new(ccid3_tx_hist, GFP_ATOMIC); 384 if (tfrc_tx_hist_add(&hctx->ccid3hctx_hist, dccp_sk(sk)->dccps_gss))
389 if (unlikely(packet == NULL)) {
390 DCCP_CRIT("packet history - out of memory!"); 385 DCCP_CRIT("packet history - out of memory!");
391 return;
392 }
393 dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, packet);
394
395 packet->dccphtx_tstamp = ktime_get_real();
396 packet->dccphtx_seqno = dccp_sk(sk)->dccps_gss;
397 packet->dccphtx_rtt = hctx->ccid3hctx_rtt;
398 packet->dccphtx_sent = 1;
399} 386}
400 387
401static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) 388static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
402{ 389{
403 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); 390 struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
404 struct ccid3_options_received *opt_recv; 391 struct ccid3_options_received *opt_recv;
405 struct dccp_tx_hist_entry *packet;
406 ktime_t now; 392 ktime_t now;
407 unsigned long t_nfb; 393 unsigned long t_nfb;
408 u32 pinv, r_sample; 394 u32 pinv, r_sample;
@@ -411,131 +397,112 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
411 if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || 397 if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
412 DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) 398 DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
413 return; 399 return;
400 /* ... and only in the established state */
401 if (hctx->ccid3hctx_state != TFRC_SSTATE_FBACK &&
402 hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK)
403 return;
414 404
415 opt_recv = &hctx->ccid3hctx_options_received; 405 opt_recv = &hctx->ccid3hctx_options_received;
406 now = ktime_get_real();
416 407
417 switch (hctx->ccid3hctx_state) { 408 /* Estimate RTT from history if ACK number is valid */
418 case TFRC_SSTATE_NO_FBACK: 409 r_sample = tfrc_tx_hist_rtt(hctx->ccid3hctx_hist,
419 case TFRC_SSTATE_FBACK: 410 DCCP_SKB_CB(skb)->dccpd_ack_seq, now);
420 /* get packet from history to look up t_recvdata */ 411 if (r_sample == 0) {
421 packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist, 412 DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk,
422 DCCP_SKB_CB(skb)->dccpd_ack_seq); 413 dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type),
423 if (unlikely(packet == NULL)) { 414 (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq);
424 DCCP_WARN("%s(%p), seqno %llu(%s) doesn't exist " 415 return;
425 "in history!\n", dccp_role(sk), sk, 416 }
426 (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
427 dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
428 return;
429 }
430
431 /* Update receive rate in units of 64 * bytes/second */
432 hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate;
433 hctx->ccid3hctx_x_recv <<= 6;
434 417
435 /* Update loss event rate */ 418 /* Update receive rate in units of 64 * bytes/second */
436 pinv = opt_recv->ccid3or_loss_event_rate; 419 hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate;
437 if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */ 420 hctx->ccid3hctx_x_recv <<= 6;
438 hctx->ccid3hctx_p = 0;
439 else /* can not exceed 100% */
440 hctx->ccid3hctx_p = 1000000 / pinv;
441 421
442 now = ktime_get_real(); 422 /* Update loss event rate (which is scaled by 1e6) */
443 /* 423 pinv = opt_recv->ccid3or_loss_event_rate;
444 * Calculate new round trip sample as per [RFC 3448, 4.3] by 424 if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */
445 * R_sample = (now - t_recvdata) - t_elapsed 425 hctx->ccid3hctx_p = 0;
446 */ 426 else /* can not exceed 100% */
447 r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, packet->dccphtx_tstamp)); 427 hctx->ccid3hctx_p = scaled_div(1, pinv);
428 /*
429 * Validate new RTT sample and update moving average
430 */
431 r_sample = dccp_sample_rtt(sk, r_sample);
432 hctx->ccid3hctx_rtt = tfrc_ewma(hctx->ccid3hctx_rtt, r_sample, 9);
433 /*
434 * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3
435 */
436 if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
437 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
448 438
449 /* 439 if (hctx->ccid3hctx_t_rto == 0) {
450 * Update RTT estimate by
451 * If (No feedback recv)
452 * R = R_sample;
453 * Else
454 * R = q * R + (1 - q) * R_sample;
455 *
456 * q is a constant, RFC 3448 recomments 0.9
457 */
458 if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
459 /* 440 /*
460 * Larger Initial Windows [RFC 4342, sec. 5] 441 * Initial feedback packet: Larger Initial Windows (4.2)
461 */ 442 */
462 hctx->ccid3hctx_rtt = r_sample;
463 hctx->ccid3hctx_x = rfc3390_initial_rate(sk); 443 hctx->ccid3hctx_x = rfc3390_initial_rate(sk);
464 hctx->ccid3hctx_t_ld = now; 444 hctx->ccid3hctx_t_ld = now;
465 445
466 ccid3_update_send_interval(hctx); 446 ccid3_update_send_interval(hctx);
467 447
468 ccid3_pr_debug("%s(%p), s=%u, MSS=%u, " 448 goto done_computing_x;
469 "R_sample=%uus, X=%u\n", dccp_role(sk), 449 } else if (hctx->ccid3hctx_p == 0) {
470 sk, hctx->ccid3hctx_s, 450 /*
471 dccp_sk(sk)->dccps_mss_cache, r_sample, 451 * First feedback after nofeedback timer expiry (4.3)
472 (unsigned)(hctx->ccid3hctx_x >> 6)); 452 */
473 453 goto done_computing_x;
474 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
475 } else {
476 hctx->ccid3hctx_rtt = (9 * hctx->ccid3hctx_rtt +
477 r_sample) / 10;
478
479 /* Update sending rate (step 4 of [RFC 3448, 4.3]) */
480 if (hctx->ccid3hctx_p > 0)
481 hctx->ccid3hctx_x_calc =
482 tfrc_calc_x(hctx->ccid3hctx_s,
483 hctx->ccid3hctx_rtt,
484 hctx->ccid3hctx_p);
485 ccid3_hc_tx_update_x(sk, &now);
486
487 ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, "
488 "p=%u, X_calc=%u, X_recv=%u, X=%u\n",
489 dccp_role(sk),
490 sk, hctx->ccid3hctx_rtt, r_sample,
491 hctx->ccid3hctx_s, hctx->ccid3hctx_p,
492 hctx->ccid3hctx_x_calc,
493 (unsigned)(hctx->ccid3hctx_x_recv >> 6),
494 (unsigned)(hctx->ccid3hctx_x >> 6));
495 } 454 }
455 }
496 456
497 /* unschedule no feedback timer */ 457 /* Update sending rate (step 4 of [RFC 3448, 4.3]) */
498 sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); 458 if (hctx->ccid3hctx_p > 0)
459 hctx->ccid3hctx_x_calc =
460 tfrc_calc_x(hctx->ccid3hctx_s,
461 hctx->ccid3hctx_rtt,
462 hctx->ccid3hctx_p);
463 ccid3_hc_tx_update_x(sk, &now);
464
465done_computing_x:
466 ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, "
467 "p=%u, X_calc=%u, X_recv=%u, X=%u\n",
468 dccp_role(sk),
469 sk, hctx->ccid3hctx_rtt, r_sample,
470 hctx->ccid3hctx_s, hctx->ccid3hctx_p,
471 hctx->ccid3hctx_x_calc,
472 (unsigned)(hctx->ccid3hctx_x_recv >> 6),
473 (unsigned)(hctx->ccid3hctx_x >> 6));
499 474
500 /* remove all packets older than the one acked from history */ 475 /* unschedule no feedback timer */
501 dccp_tx_hist_purge_older(ccid3_tx_hist, 476 sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
502 &hctx->ccid3hctx_hist, packet);
503 /*
504 * As we have calculated new ipi, delta, t_nom it is possible
505 * that we now can send a packet, so wake up dccp_wait_for_ccid
506 */
507 sk->sk_write_space(sk);
508 477
509 /* 478 /*
510 * Update timeout interval for the nofeedback timer. 479 * As we have calculated new ipi, delta, t_nom it is possible
511 * We use a configuration option to increase the lower bound. 480 * that we now can send a packet, so wake up dccp_wait_for_ccid
512 * This can help avoid triggering the nofeedback timer too 481 */
513 * often ('spinning') on LANs with small RTTs. 482 sk->sk_write_space(sk);
514 */
515 hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
516 CONFIG_IP_DCCP_CCID3_RTO *
517 (USEC_PER_SEC/1000));
518 /*
519 * Schedule no feedback timer to expire in
520 * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
521 */
522 t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
523 483
524 ccid3_pr_debug("%s(%p), Scheduled no feedback timer to " 484 /*
525 "expire in %lu jiffies (%luus)\n", 485 * Update timeout interval for the nofeedback timer.
526 dccp_role(sk), 486 * We use a configuration option to increase the lower bound.
527 sk, usecs_to_jiffies(t_nfb), t_nfb); 487 * This can help avoid triggering the nofeedback timer too
488 * often ('spinning') on LANs with small RTTs.
489 */
490 hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
491 (CONFIG_IP_DCCP_CCID3_RTO *
492 (USEC_PER_SEC / 1000)));
493 /*
494 * Schedule no feedback timer to expire in
495 * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
496 */
497 t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi);
528 498
529 sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, 499 ccid3_pr_debug("%s(%p), Scheduled no feedback timer to "
530 jiffies + usecs_to_jiffies(t_nfb)); 500 "expire in %lu jiffies (%luus)\n",
501 dccp_role(sk),
502 sk, usecs_to_jiffies(t_nfb), t_nfb);
531 503
532 /* set idle flag */ 504 sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
533 hctx->ccid3hctx_idle = 1; 505 jiffies + usecs_to_jiffies(t_nfb));
534 break;
535 case TFRC_SSTATE_NO_SENT: /* fall through */
536 case TFRC_SSTATE_TERM: /* ignore feedback when closing */
537 break;
538 }
539} 506}
540 507
541static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, 508static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
@@ -605,12 +572,9 @@ static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
605 struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid); 572 struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid);
606 573
607 hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; 574 hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT;
608 INIT_LIST_HEAD(&hctx->ccid3hctx_hist); 575 hctx->ccid3hctx_hist = NULL;
609 576 setup_timer(&hctx->ccid3hctx_no_feedback_timer,
610 hctx->ccid3hctx_no_feedback_timer.function = 577 ccid3_hc_tx_no_feedback_timer, (unsigned long)sk);
611 ccid3_hc_tx_no_feedback_timer;
612 hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk;
613 init_timer(&hctx->ccid3hctx_no_feedback_timer);
614 578
615 return 0; 579 return 0;
616} 580}
@@ -622,8 +586,7 @@ static void ccid3_hc_tx_exit(struct sock *sk)
622 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM); 586 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
623 sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); 587 sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
624 588
625 /* Empty packet history */ 589 tfrc_tx_hist_purge(&hctx->ccid3hctx_hist);
626 dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist);
627} 590}
628 591
629static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) 592static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
@@ -670,6 +633,15 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
670/* 633/*
671 * Receiver Half-Connection Routines 634 * Receiver Half-Connection Routines
672 */ 635 */
636
637/* CCID3 feedback types */
638enum ccid3_fback_type {
639 CCID3_FBACK_NONE = 0,
640 CCID3_FBACK_INITIAL,
641 CCID3_FBACK_PERIODIC,
642 CCID3_FBACK_PARAM_CHANGE
643};
644
673#ifdef CONFIG_IP_DCCP_CCID3_DEBUG 645#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
674static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) 646static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
675{ 647{
@@ -696,67 +668,58 @@ static void ccid3_hc_rx_set_state(struct sock *sk,
696 hcrx->ccid3hcrx_state = state; 668 hcrx->ccid3hcrx_state = state;
697} 669}
698 670
699static inline void ccid3_hc_rx_update_s(struct ccid3_hc_rx_sock *hcrx, int len) 671static void ccid3_hc_rx_send_feedback(struct sock *sk,
700{ 672 const struct sk_buff *skb,
701 if (unlikely(len == 0)) /* don't update on empty packets (e.g. ACKs) */ 673 enum ccid3_fback_type fbtype)
702 ccid3_pr_debug("Packet payload length is 0 - not updating\n");
703 else
704 hcrx->ccid3hcrx_s = hcrx->ccid3hcrx_s == 0 ? len :
705 (9 * hcrx->ccid3hcrx_s + len) / 10;
706}
707
708static void ccid3_hc_rx_send_feedback(struct sock *sk)
709{ 674{
710 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); 675 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
711 struct dccp_sock *dp = dccp_sk(sk); 676 struct dccp_sock *dp = dccp_sk(sk);
712 struct dccp_rx_hist_entry *packet;
713 ktime_t now; 677 ktime_t now;
714 suseconds_t delta; 678 s64 delta = 0;
715 679
716 ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk); 680 if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_TERM))
681 return;
717 682
718 now = ktime_get_real(); 683 now = ktime_get_real();
719 684
720 switch (hcrx->ccid3hcrx_state) { 685 switch (fbtype) {
721 case TFRC_RSTATE_NO_DATA: 686 case CCID3_FBACK_INITIAL:
722 hcrx->ccid3hcrx_x_recv = 0; 687 hcrx->ccid3hcrx_x_recv = 0;
688 hcrx->ccid3hcrx_pinv = ~0U; /* see RFC 4342, 8.5 */
723 break; 689 break;
724 case TFRC_RSTATE_DATA: 690 case CCID3_FBACK_PARAM_CHANGE:
725 delta = ktime_us_delta(now, 691 /*
726 hcrx->ccid3hcrx_tstamp_last_feedback); 692 * When parameters change (new loss or p > p_prev), we do not
727 DCCP_BUG_ON(delta < 0); 693 * have a reliable estimate for R_m of [RFC 3448, 6.2] and so
728 hcrx->ccid3hcrx_x_recv = 694 * need to reuse the previous value of X_recv. However, when
729 scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); 695 * X_recv was 0 (due to early loss), this would kill X down to
696 * s/t_mbi (i.e. one packet in 64 seconds).
697 * To avoid such drastic reduction, we approximate X_recv as
698 * the number of bytes since last feedback.
699 * This is a safe fallback, since X is bounded above by X_calc.
700 */
701 if (hcrx->ccid3hcrx_x_recv > 0)
702 break;
703 /* fall through */
704 case CCID3_FBACK_PERIODIC:
705 delta = ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_feedback);
706 if (delta <= 0)
707 DCCP_BUG("delta (%ld) <= 0", (long)delta);
708 else
709 hcrx->ccid3hcrx_x_recv =
710 scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
730 break; 711 break;
731 case TFRC_RSTATE_TERM: 712 default:
732 DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
733 return; 713 return;
734 } 714 }
735 715
736 packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist); 716 ccid3_pr_debug("Interval %ldusec, X_recv=%u, 1/p=%u\n", (long)delta,
737 if (unlikely(packet == NULL)) { 717 hcrx->ccid3hcrx_x_recv, hcrx->ccid3hcrx_pinv);
738 DCCP_WARN("%s(%p), no data packet in history!\n",
739 dccp_role(sk), sk);
740 return;
741 }
742 718
743 hcrx->ccid3hcrx_tstamp_last_feedback = now; 719 hcrx->ccid3hcrx_tstamp_last_feedback = now;
744 hcrx->ccid3hcrx_ccval_last_counter = packet->dccphrx_ccval; 720 hcrx->ccid3hcrx_last_counter = dccp_hdr(skb)->dccph_ccval;
745 hcrx->ccid3hcrx_bytes_recv = 0; 721 hcrx->ccid3hcrx_bytes_recv = 0;
746 722
747 /* Elapsed time information [RFC 4340, 13.2] in units of 10 * usecs */
748 delta = ktime_us_delta(now, packet->dccphrx_tstamp);
749 DCCP_BUG_ON(delta < 0);
750 hcrx->ccid3hcrx_elapsed_time = delta / 10;
751
752 if (hcrx->ccid3hcrx_p == 0)
753 hcrx->ccid3hcrx_pinv = ~0U; /* see RFC 4342, 8.5 */
754 else if (hcrx->ccid3hcrx_p > 1000000) {
755 DCCP_WARN("p (%u) > 100%%\n", hcrx->ccid3hcrx_p);
756 hcrx->ccid3hcrx_pinv = 1; /* use 100% in this case */
757 } else
758 hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p;
759
760 dp->dccps_hc_rx_insert_options = 1; 723 dp->dccps_hc_rx_insert_options = 1;
761 dccp_send_ack(sk); 724 dccp_send_ack(sk);
762} 725}
@@ -770,7 +733,6 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
770 return 0; 733 return 0;
771 734
772 hcrx = ccid3_hc_rx_sk(sk); 735 hcrx = ccid3_hc_rx_sk(sk);
773 DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_ccval_last_counter;
774 736
775 if (dccp_packet_without_ack(skb)) 737 if (dccp_packet_without_ack(skb))
776 return 0; 738 return 0;
@@ -778,11 +740,7 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
778 x_recv = htonl(hcrx->ccid3hcrx_x_recv); 740 x_recv = htonl(hcrx->ccid3hcrx_x_recv);
779 pinv = htonl(hcrx->ccid3hcrx_pinv); 741 pinv = htonl(hcrx->ccid3hcrx_pinv);
780 742
781 if ((hcrx->ccid3hcrx_elapsed_time != 0 && 743 if (dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
782 dccp_insert_option_elapsed_time(sk, skb,
783 hcrx->ccid3hcrx_elapsed_time)) ||
784 dccp_insert_option_timestamp(sk, skb) ||
785 dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
786 &pinv, sizeof(pinv)) || 744 &pinv, sizeof(pinv)) ||
787 dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, 745 dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE,
788 &x_recv, sizeof(x_recv))) 746 &x_recv, sizeof(x_recv)))
@@ -791,180 +749,139 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
791 return 0; 749 return 0;
792} 750}
793 751
794static int ccid3_hc_rx_detect_loss(struct sock *sk, 752/** ccid3_first_li - Implements [RFC 3448, 6.3.1]
795 struct dccp_rx_hist_entry *packet) 753 *
754 * Determine the length of the first loss interval via inverse lookup.
755 * Assume that X_recv can be computed by the throughput equation
756 * s
757 * X_recv = --------
758 * R * fval
759 * Find some p such that f(p) = fval; return 1/p (scaled).
760 */
761static u32 ccid3_first_li(struct sock *sk)
796{ 762{
797 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); 763 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
798 struct dccp_rx_hist_entry *rx_hist = 764 u32 x_recv, p, delta;
799 dccp_rx_hist_head(&hcrx->ccid3hcrx_hist); 765 u64 fval;
800 u64 seqno = packet->dccphrx_seqno;
801 u64 tmp_seqno;
802 int loss = 0;
803 u8 ccval;
804
805
806 tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
807 766
808 if (!rx_hist || 767 if (hcrx->ccid3hcrx_rtt == 0) {
809 follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) { 768 DCCP_WARN("No RTT estimate available, using fallback RTT\n");
810 hcrx->ccid3hcrx_seqno_nonloss = seqno; 769 hcrx->ccid3hcrx_rtt = DCCP_FALLBACK_RTT;
811 hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval;
812 goto detect_out;
813 } 770 }
814 771
815 772 delta = ktime_to_us(net_timedelta(hcrx->ccid3hcrx_tstamp_last_feedback));
816 while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno) 773 x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
817 > TFRC_RECV_NUM_LATE_LOSS) { 774 if (x_recv == 0) { /* would also trigger divide-by-zero */
818 loss = 1; 775 DCCP_WARN("X_recv==0\n");
819 dccp_li_update_li(sk, 776 if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) {
820 &hcrx->ccid3hcrx_li_hist, 777 DCCP_BUG("stored value of X_recv is zero");
821 &hcrx->ccid3hcrx_hist, 778 return ~0U;
822 hcrx->ccid3hcrx_tstamp_last_feedback,
823 hcrx->ccid3hcrx_s,
824 hcrx->ccid3hcrx_bytes_recv,
825 hcrx->ccid3hcrx_x_recv,
826 hcrx->ccid3hcrx_seqno_nonloss,
827 hcrx->ccid3hcrx_ccval_nonloss);
828 tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
829 dccp_inc_seqno(&tmp_seqno);
830 hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
831 dccp_inc_seqno(&tmp_seqno);
832 while (dccp_rx_hist_find_entry(&hcrx->ccid3hcrx_hist,
833 tmp_seqno, &ccval)) {
834 hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
835 hcrx->ccid3hcrx_ccval_nonloss = ccval;
836 dccp_inc_seqno(&tmp_seqno);
837 } 779 }
838 } 780 }
839 781
840 /* FIXME - this code could be simplified with above while */ 782 fval = scaled_div(hcrx->ccid3hcrx_s, hcrx->ccid3hcrx_rtt);
841 /* but works at moment */ 783 fval = scaled_div32(fval, x_recv);
842 if (follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) { 784 p = tfrc_calc_x_reverse_lookup(fval);
843 hcrx->ccid3hcrx_seqno_nonloss = seqno;
844 hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval;
845 }
846 785
847detect_out: 786 ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
848 dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist, 787 "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
849 &hcrx->ccid3hcrx_li_hist, packet, 788
850 hcrx->ccid3hcrx_seqno_nonloss); 789 return p == 0 ? ~0U : scaled_div(1, p);
851 return loss;
852} 790}
853 791
854static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) 792static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
855{ 793{
856 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); 794 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
857 const struct dccp_options_received *opt_recv; 795 enum ccid3_fback_type do_feedback = CCID3_FBACK_NONE;
858 struct dccp_rx_hist_entry *packet; 796 const u32 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp;
859 u32 p_prev, r_sample, rtt_prev; 797 const bool is_data_packet = dccp_data_packet(skb);
860 int loss, payload_size; 798
861 ktime_t now; 799 if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)) {
862 800 if (is_data_packet) {
863 opt_recv = &dccp_sk(sk)->dccps_options_received; 801 const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
864 802 do_feedback = CCID3_FBACK_INITIAL;
865 switch (DCCP_SKB_CB(skb)->dccpd_type) { 803 ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
866 case DCCP_PKT_ACK: 804 hcrx->ccid3hcrx_s = payload;
867 if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) 805 /*
868 return; 806 * Not necessary to update ccid3hcrx_bytes_recv here,
869 case DCCP_PKT_DATAACK: 807 * since X_recv = 0 for the first feedback packet (cf.
870 if (opt_recv->dccpor_timestamp_echo == 0) 808 * RFC 3448, 6.3) -- gerrit
871 break; 809 */
872 r_sample = dccp_timestamp() - opt_recv->dccpor_timestamp_echo; 810 }
873 rtt_prev = hcrx->ccid3hcrx_rtt; 811 goto update_records;
874 r_sample = dccp_sample_rtt(sk, 10 * r_sample); 812 }
875 813
876 if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) 814 if (tfrc_rx_hist_duplicate(&hcrx->ccid3hcrx_hist, skb))
877 hcrx->ccid3hcrx_rtt = r_sample; 815 return; /* done receiving */
878 else
879 hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 +
880 r_sample / 10;
881 816
882 if (rtt_prev != hcrx->ccid3hcrx_rtt) 817 if (is_data_packet) {
883 ccid3_pr_debug("%s(%p), New RTT=%uus, elapsed time=%u\n", 818 const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
884 dccp_role(sk), sk, hcrx->ccid3hcrx_rtt, 819 /*
885 opt_recv->dccpor_elapsed_time); 820 * Update moving-average of s and the sum of received payload bytes
886 break; 821 */
887 case DCCP_PKT_DATA: 822 hcrx->ccid3hcrx_s = tfrc_ewma(hcrx->ccid3hcrx_s, payload, 9);
888 break; 823 hcrx->ccid3hcrx_bytes_recv += payload;
889 default: /* We're not interested in other packet types, move along */
890 return;
891 } 824 }
892 825
893 packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp, 826 /*
894 skb, GFP_ATOMIC); 827 * Handle pending losses and otherwise check for new loss
895 if (unlikely(packet == NULL)) { 828 */
896 DCCP_WARN("%s(%p), Not enough mem to add rx packet " 829 if (tfrc_rx_hist_loss_pending(&hcrx->ccid3hcrx_hist) &&
897 "to history, consider it lost!\n", dccp_role(sk), sk); 830 tfrc_rx_handle_loss(&hcrx->ccid3hcrx_hist,
898 return; 831 &hcrx->ccid3hcrx_li_hist,
832 skb, ndp, ccid3_first_li, sk) ) {
833 do_feedback = CCID3_FBACK_PARAM_CHANGE;
834 goto done_receiving;
899 } 835 }
900 836
901 loss = ccid3_hc_rx_detect_loss(sk, packet); 837 if (tfrc_rx_hist_new_loss_indicated(&hcrx->ccid3hcrx_hist, skb, ndp))
838 goto update_records;
902 839
903 if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) 840 /*
904 return; 841 * Handle data packets: RTT sampling and monitoring p
905 842 */
906 payload_size = skb->len - dccp_hdr(skb)->dccph_doff * 4; 843 if (unlikely(!is_data_packet))
907 ccid3_hc_rx_update_s(hcrx, payload_size); 844 goto update_records;
908 845
909 switch (hcrx->ccid3hcrx_state) { 846 if (!tfrc_lh_is_initialised(&hcrx->ccid3hcrx_li_hist)) {
910 case TFRC_RSTATE_NO_DATA: 847 const u32 sample = tfrc_rx_hist_sample_rtt(&hcrx->ccid3hcrx_hist, skb);
911 ccid3_pr_debug("%s(%p, state=%s), skb=%p, sending initial " 848 /*
912 "feedback\n", dccp_role(sk), sk, 849 * Empty loss history: no loss so far, hence p stays 0.
913 dccp_state_name(sk->sk_state), skb); 850 * Sample RTT values, since an RTT estimate is required for the
914 ccid3_hc_rx_send_feedback(sk); 851 * computation of p when the first loss occurs; RFC 3448, 6.3.1.
915 ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); 852 */
916 return; 853 if (sample != 0)
917 case TFRC_RSTATE_DATA: 854 hcrx->ccid3hcrx_rtt = tfrc_ewma(hcrx->ccid3hcrx_rtt, sample, 9);
918 hcrx->ccid3hcrx_bytes_recv += payload_size;
919 if (loss)
920 break;
921 855
922 now = ktime_get_real(); 856 } else if (tfrc_lh_update_i_mean(&hcrx->ccid3hcrx_li_hist, skb)) {
923 if ((ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_ack) - 857 /*
924 (s64)hcrx->ccid3hcrx_rtt) >= 0) { 858 * Step (3) of [RFC 3448, 6.1]: Recompute I_mean and, if I_mean
925 hcrx->ccid3hcrx_tstamp_last_ack = now; 859 * has decreased (resp. p has increased), send feedback now.
926 ccid3_hc_rx_send_feedback(sk); 860 */
927 } 861 do_feedback = CCID3_FBACK_PARAM_CHANGE;
928 return;
929 case TFRC_RSTATE_TERM:
930 DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
931 return;
932 } 862 }
933 863
934 /* Dealing with packet loss */ 864 /*
935 ccid3_pr_debug("%s(%p, state=%s), data loss! Reacting...\n", 865 * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3
936 dccp_role(sk), sk, dccp_state_name(sk->sk_state)); 866 */
937 867 if (SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->ccid3hcrx_last_counter) > 3)
938 p_prev = hcrx->ccid3hcrx_p; 868 do_feedback = CCID3_FBACK_PERIODIC;
939
940 /* Calculate loss event rate */
941 if (!list_empty(&hcrx->ccid3hcrx_li_hist)) {
942 u32 i_mean = dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist);
943 869
944 /* Scaling up by 1000000 as fixed decimal */ 870update_records:
945 if (i_mean != 0) 871 tfrc_rx_hist_add_packet(&hcrx->ccid3hcrx_hist, skb, ndp);
946 hcrx->ccid3hcrx_p = 1000000 / i_mean;
947 } else
948 DCCP_BUG("empty loss history");
949 872
950 if (hcrx->ccid3hcrx_p > p_prev) { 873done_receiving:
951 ccid3_hc_rx_send_feedback(sk); 874 if (do_feedback)
952 return; 875 ccid3_hc_rx_send_feedback(sk, skb, do_feedback);
953 }
954} 876}
955 877
956static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) 878static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
957{ 879{
958 struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid); 880 struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid);
959 881
960 ccid3_pr_debug("entry\n");
961
962 hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; 882 hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
963 INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); 883 tfrc_lh_init(&hcrx->ccid3hcrx_li_hist);
964 INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); 884 return tfrc_rx_hist_alloc(&hcrx->ccid3hcrx_hist);
965 hcrx->ccid3hcrx_tstamp_last_feedback =
966 hcrx->ccid3hcrx_tstamp_last_ack = ktime_get_real();
967 return 0;
968} 885}
969 886
970static void ccid3_hc_rx_exit(struct sock *sk) 887static void ccid3_hc_rx_exit(struct sock *sk)
@@ -973,11 +890,8 @@ static void ccid3_hc_rx_exit(struct sock *sk)
973 890
974 ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); 891 ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
975 892
976 /* Empty packet history */ 893 tfrc_rx_hist_purge(&hcrx->ccid3hcrx_hist);
977 dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist); 894 tfrc_lh_cleanup(&hcrx->ccid3hcrx_li_hist);
978
979 /* Empty loss interval history */
980 dccp_li_hist_purge(&hcrx->ccid3hcrx_li_hist);
981} 895}
982 896
983static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) 897static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
@@ -998,6 +912,7 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
998 u32 __user *optval, int __user *optlen) 912 u32 __user *optval, int __user *optlen)
999{ 913{
1000 const struct ccid3_hc_rx_sock *hcrx; 914 const struct ccid3_hc_rx_sock *hcrx;
915 struct tfrc_rx_info rx_info;
1001 const void *val; 916 const void *val;
1002 917
1003 /* Listen socks doesn't have a private CCID block */ 918 /* Listen socks doesn't have a private CCID block */
@@ -1007,10 +922,14 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
1007 hcrx = ccid3_hc_rx_sk(sk); 922 hcrx = ccid3_hc_rx_sk(sk);
1008 switch (optname) { 923 switch (optname) {
1009 case DCCP_SOCKOPT_CCID_RX_INFO: 924 case DCCP_SOCKOPT_CCID_RX_INFO:
1010 if (len < sizeof(hcrx->ccid3hcrx_tfrc)) 925 if (len < sizeof(rx_info))
1011 return -EINVAL; 926 return -EINVAL;
1012 len = sizeof(hcrx->ccid3hcrx_tfrc); 927 rx_info.tfrcrx_x_recv = hcrx->ccid3hcrx_x_recv;
1013 val = &hcrx->ccid3hcrx_tfrc; 928 rx_info.tfrcrx_rtt = hcrx->ccid3hcrx_rtt;
929 rx_info.tfrcrx_p = hcrx->ccid3hcrx_pinv == 0 ? ~0U :
930 scaled_div(1, hcrx->ccid3hcrx_pinv);
931 len = sizeof(rx_info);
932 val = &rx_info;
1014 break; 933 break;
1015 default: 934 default:
1016 return -ENOPROTOOPT; 935 return -ENOPROTOOPT;
@@ -1024,7 +943,7 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
1024 943
1025static struct ccid_operations ccid3 = { 944static struct ccid_operations ccid3 = {
1026 .ccid_id = DCCPC_CCID3, 945 .ccid_id = DCCPC_CCID3,
1027 .ccid_name = "ccid3", 946 .ccid_name = "TCP-Friendly Rate Control",
1028 .ccid_owner = THIS_MODULE, 947 .ccid_owner = THIS_MODULE,
1029 .ccid_hc_tx_obj_size = sizeof(struct ccid3_hc_tx_sock), 948 .ccid_hc_tx_obj_size = sizeof(struct ccid3_hc_tx_sock),
1030 .ccid_hc_tx_init = ccid3_hc_tx_init, 949 .ccid_hc_tx_init = ccid3_hc_tx_init,
@@ -1051,44 +970,13 @@ MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
1051 970
1052static __init int ccid3_module_init(void) 971static __init int ccid3_module_init(void)
1053{ 972{
1054 int rc = -ENOBUFS; 973 return ccid_register(&ccid3);
1055
1056 ccid3_rx_hist = dccp_rx_hist_new("ccid3");
1057 if (ccid3_rx_hist == NULL)
1058 goto out;
1059
1060 ccid3_tx_hist = dccp_tx_hist_new("ccid3");
1061 if (ccid3_tx_hist == NULL)
1062 goto out_free_rx;
1063
1064 rc = ccid_register(&ccid3);
1065 if (rc != 0)
1066 goto out_free_tx;
1067out:
1068 return rc;
1069
1070out_free_tx:
1071 dccp_tx_hist_delete(ccid3_tx_hist);
1072 ccid3_tx_hist = NULL;
1073out_free_rx:
1074 dccp_rx_hist_delete(ccid3_rx_hist);
1075 ccid3_rx_hist = NULL;
1076 goto out;
1077} 974}
1078module_init(ccid3_module_init); 975module_init(ccid3_module_init);
1079 976
1080static __exit void ccid3_module_exit(void) 977static __exit void ccid3_module_exit(void)
1081{ 978{
1082 ccid_unregister(&ccid3); 979 ccid_unregister(&ccid3);
1083
1084 if (ccid3_tx_hist != NULL) {
1085 dccp_tx_hist_delete(ccid3_tx_hist);
1086 ccid3_tx_hist = NULL;
1087 }
1088 if (ccid3_rx_hist != NULL) {
1089 dccp_rx_hist_delete(ccid3_rx_hist);
1090 ccid3_rx_hist = NULL;
1091 }
1092} 980}
1093module_exit(ccid3_module_exit); 981module_exit(ccid3_module_exit);
1094 982
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 0cdc982cfe47..49ca32bd7e79 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -1,7 +1,8 @@
1/* 1/*
2 * net/dccp/ccids/ccid3.h 2 * net/dccp/ccids/ccid3.h
3 * 3 *
4 * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. 4 * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
5 * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
5 * 6 *
6 * An implementation of the DCCP protocol 7 * An implementation of the DCCP protocol
7 * 8 *
@@ -40,6 +41,7 @@
40#include <linux/list.h> 41#include <linux/list.h>
41#include <linux/types.h> 42#include <linux/types.h>
42#include <linux/tfrc.h> 43#include <linux/tfrc.h>
44#include "lib/tfrc.h"
43#include "../ccid.h" 45#include "../ccid.h"
44 46
45/* Two seconds as per RFC 3448 4.2 */ 47/* Two seconds as per RFC 3448 4.2 */
@@ -88,7 +90,6 @@ enum ccid3_hc_tx_states {
88 * @ccid3hctx_t_last_win_count - Timestamp of earliest packet 90 * @ccid3hctx_t_last_win_count - Timestamp of earliest packet
89 * with last_win_count value sent 91 * with last_win_count value sent
90 * @ccid3hctx_no_feedback_timer - Handle to no feedback timer 92 * @ccid3hctx_no_feedback_timer - Handle to no feedback timer
91 * @ccid3hctx_idle - Flag indicating that sender is idling
92 * @ccid3hctx_t_ld - Time last doubled during slow start 93 * @ccid3hctx_t_ld - Time last doubled during slow start
93 * @ccid3hctx_t_nom - Nominal send time of next packet 94 * @ccid3hctx_t_nom - Nominal send time of next packet
94 * @ccid3hctx_delta - Send timer delta (RFC 3448, 4.6) in usecs 95 * @ccid3hctx_delta - Send timer delta (RFC 3448, 4.6) in usecs
@@ -107,13 +108,12 @@ struct ccid3_hc_tx_sock {
107 u16 ccid3hctx_s; 108 u16 ccid3hctx_s;
108 enum ccid3_hc_tx_states ccid3hctx_state:8; 109 enum ccid3_hc_tx_states ccid3hctx_state:8;
109 u8 ccid3hctx_last_win_count; 110 u8 ccid3hctx_last_win_count;
110 u8 ccid3hctx_idle;
111 ktime_t ccid3hctx_t_last_win_count; 111 ktime_t ccid3hctx_t_last_win_count;
112 struct timer_list ccid3hctx_no_feedback_timer; 112 struct timer_list ccid3hctx_no_feedback_timer;
113 ktime_t ccid3hctx_t_ld; 113 ktime_t ccid3hctx_t_ld;
114 ktime_t ccid3hctx_t_nom; 114 ktime_t ccid3hctx_t_nom;
115 u32 ccid3hctx_delta; 115 u32 ccid3hctx_delta;
116 struct list_head ccid3hctx_hist; 116 struct tfrc_tx_hist_entry *ccid3hctx_hist;
117 struct ccid3_options_received ccid3hctx_options_received; 117 struct ccid3_options_received ccid3hctx_options_received;
118}; 118};
119 119
@@ -135,37 +135,30 @@ enum ccid3_hc_rx_states {
135 * 135 *
136 * @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448 4.3) 136 * @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448 4.3)
137 * @ccid3hcrx_rtt - Receiver estimate of rtt (non-standard) 137 * @ccid3hcrx_rtt - Receiver estimate of rtt (non-standard)
138 * @ccid3hcrx_p - current loss event rate (RFC 3448 5.4) 138 * @ccid3hcrx_p - Current loss event rate (RFC 3448 5.4)
139 * @ccid3hcrx_seqno_nonloss - Last received non-loss sequence number 139 * @ccid3hcrx_last_counter - Tracks window counter (RFC 4342, 8.1)
140 * @ccid3hcrx_ccval_nonloss - Last received non-loss Window CCVal 140 * @ccid3hcrx_state - Receiver state, one of %ccid3_hc_rx_states
141 * @ccid3hcrx_ccval_last_counter - Tracks window counter (RFC 4342, 8.1)
142 * @ccid3hcrx_state - receiver state, one of %ccid3_hc_rx_states
143 * @ccid3hcrx_bytes_recv - Total sum of DCCP payload bytes 141 * @ccid3hcrx_bytes_recv - Total sum of DCCP payload bytes
142 * @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448, sec. 4.3)
143 * @ccid3hcrx_rtt - Receiver estimate of RTT
144 * @ccid3hcrx_tstamp_last_feedback - Time at which last feedback was sent 144 * @ccid3hcrx_tstamp_last_feedback - Time at which last feedback was sent
145 * @ccid3hcrx_tstamp_last_ack - Time at which last feedback was sent 145 * @ccid3hcrx_tstamp_last_ack - Time at which last feedback was sent
146 * @ccid3hcrx_hist - Packet history 146 * @ccid3hcrx_hist - Packet history (loss detection + RTT sampling)
147 * @ccid3hcrx_li_hist - Loss Interval History 147 * @ccid3hcrx_li_hist - Loss Interval database
148 * @ccid3hcrx_s - Received packet size in bytes 148 * @ccid3hcrx_s - Received packet size in bytes
149 * @ccid3hcrx_pinv - Inverse of Loss Event Rate (RFC 4342, sec. 8.5) 149 * @ccid3hcrx_pinv - Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
150 * @ccid3hcrx_elapsed_time - Time since packet reception
151 */ 150 */
152struct ccid3_hc_rx_sock { 151struct ccid3_hc_rx_sock {
153 struct tfrc_rx_info ccid3hcrx_tfrc; 152 u8 ccid3hcrx_last_counter:4;
154#define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv
155#define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt
156#define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p
157 u64 ccid3hcrx_seqno_nonloss:48,
158 ccid3hcrx_ccval_nonloss:4,
159 ccid3hcrx_ccval_last_counter:4;
160 enum ccid3_hc_rx_states ccid3hcrx_state:8; 153 enum ccid3_hc_rx_states ccid3hcrx_state:8;
161 u32 ccid3hcrx_bytes_recv; 154 u32 ccid3hcrx_bytes_recv;
155 u32 ccid3hcrx_x_recv;
156 u32 ccid3hcrx_rtt;
162 ktime_t ccid3hcrx_tstamp_last_feedback; 157 ktime_t ccid3hcrx_tstamp_last_feedback;
163 ktime_t ccid3hcrx_tstamp_last_ack; 158 struct tfrc_rx_hist ccid3hcrx_hist;
164 struct list_head ccid3hcrx_hist; 159 struct tfrc_loss_hist ccid3hcrx_li_hist;
165 struct list_head ccid3hcrx_li_hist;
166 u16 ccid3hcrx_s; 160 u16 ccid3hcrx_s;
167 u32 ccid3hcrx_pinv; 161#define ccid3hcrx_pinv ccid3hcrx_li_hist.i_mean
168 u32 ccid3hcrx_elapsed_time;
169}; 162};
170 163
171static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk) 164static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk)
diff --git a/net/dccp/ccids/lib/Makefile b/net/dccp/ccids/lib/Makefile
index 5f940a6cbaca..68c93e3d89dc 100644
--- a/net/dccp/ccids/lib/Makefile
+++ b/net/dccp/ccids/lib/Makefile
@@ -1,3 +1,3 @@
1obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o 1obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o
2 2
3dccp_tfrc_lib-y := loss_interval.o packet_history.o tfrc_equation.o 3dccp_tfrc_lib-y := tfrc.o tfrc_equation.o packet_history.o loss_interval.o
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index d26b88dbbb45..849e181e698f 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * net/dccp/ccids/lib/loss_interval.c 2 * net/dccp/ccids/lib/loss_interval.c
3 * 3 *
4 * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
4 * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. 5 * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
5 * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz> 6 * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
6 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> 7 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
@@ -10,285 +11,176 @@
10 * the Free Software Foundation; either version 2 of the License, or 11 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version. 12 * (at your option) any later version.
12 */ 13 */
13
14#include <linux/module.h>
15#include <net/sock.h> 14#include <net/sock.h>
16#include "../../dccp.h"
17#include "loss_interval.h"
18#include "packet_history.h"
19#include "tfrc.h" 15#include "tfrc.h"
20 16
21#define DCCP_LI_HIST_IVAL_F_LENGTH 8 17static struct kmem_cache *tfrc_lh_slab __read_mostly;
22 18/* Loss Interval weights from [RFC 3448, 5.4], scaled by 10 */
23struct dccp_li_hist_entry { 19static const int tfrc_lh_weights[NINTERVAL] = { 10, 10, 10, 10, 8, 6, 4, 2 };
24 struct list_head dccplih_node;
25 u64 dccplih_seqno:48,
26 dccplih_win_count:4;
27 u32 dccplih_interval;
28};
29 20
30static struct kmem_cache *dccp_li_cachep __read_mostly; 21/* implements LIFO semantics on the array */
31 22static inline u8 LIH_INDEX(const u8 ctr)
32static inline struct dccp_li_hist_entry *dccp_li_hist_entry_new(const gfp_t prio)
33{ 23{
34 return kmem_cache_alloc(dccp_li_cachep, prio); 24 return (LIH_SIZE - 1 - (ctr % LIH_SIZE));
35} 25}
36 26
37static inline void dccp_li_hist_entry_delete(struct dccp_li_hist_entry *entry) 27/* the `counter' index always points at the next entry to be populated */
28static inline struct tfrc_loss_interval *tfrc_lh_peek(struct tfrc_loss_hist *lh)
38{ 29{
39 if (entry != NULL) 30 return lh->counter ? lh->ring[LIH_INDEX(lh->counter - 1)] : NULL;
40 kmem_cache_free(dccp_li_cachep, entry);
41} 31}
42 32
43void dccp_li_hist_purge(struct list_head *list) 33/* given i with 0 <= i <= k, return I_i as per the rfc3448bis notation */
34static inline u32 tfrc_lh_get_interval(struct tfrc_loss_hist *lh, const u8 i)
44{ 35{
45 struct dccp_li_hist_entry *entry, *next; 36 BUG_ON(i >= lh->counter);
46 37 return lh->ring[LIH_INDEX(lh->counter - i - 1)]->li_length;
47 list_for_each_entry_safe(entry, next, list, dccplih_node) {
48 list_del_init(&entry->dccplih_node);
49 kmem_cache_free(dccp_li_cachep, entry);
50 }
51} 38}
52 39
53EXPORT_SYMBOL_GPL(dccp_li_hist_purge);
54
55/* Weights used to calculate loss event rate */
56/* 40/*
57 * These are integers as per section 8 of RFC3448. We can then divide by 4 * 41 * On-demand allocation and de-allocation of entries
58 * when we use it.
59 */ 42 */
60static const int dccp_li_hist_w[DCCP_LI_HIST_IVAL_F_LENGTH] = { 43static struct tfrc_loss_interval *tfrc_lh_demand_next(struct tfrc_loss_hist *lh)
61 4, 4, 4, 4, 3, 2, 1, 1,
62};
63
64u32 dccp_li_hist_calc_i_mean(struct list_head *list)
65{ 44{
66 struct dccp_li_hist_entry *li_entry, *li_next; 45 if (lh->ring[LIH_INDEX(lh->counter)] == NULL)
67 int i = 0; 46 lh->ring[LIH_INDEX(lh->counter)] = kmem_cache_alloc(tfrc_lh_slab,
68 u32 i_tot; 47 GFP_ATOMIC);
69 u32 i_tot0 = 0; 48 return lh->ring[LIH_INDEX(lh->counter)];
70 u32 i_tot1 = 0;
71 u32 w_tot = 0;
72
73 list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) {
74 if (li_entry->dccplih_interval != ~0U) {
75 i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i];
76 w_tot += dccp_li_hist_w[i];
77 if (i != 0)
78 i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1];
79 }
80
81
82 if (++i > DCCP_LI_HIST_IVAL_F_LENGTH)
83 break;
84 }
85
86 if (i != DCCP_LI_HIST_IVAL_F_LENGTH)
87 return 0;
88
89 i_tot = max(i_tot0, i_tot1);
90
91 if (!w_tot) {
92 DCCP_WARN("w_tot = 0\n");
93 return 1;
94 }
95
96 return i_tot / w_tot;
97} 49}
98 50
99EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean); 51void tfrc_lh_cleanup(struct tfrc_loss_hist *lh)
100
101static int dccp_li_hist_interval_new(struct list_head *list,
102 const u64 seq_loss, const u8 win_loss)
103{ 52{
104 struct dccp_li_hist_entry *entry; 53 if (!tfrc_lh_is_initialised(lh))
105 int i; 54 return;
106 55
107 for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) { 56 for (lh->counter = 0; lh->counter < LIH_SIZE; lh->counter++)
108 entry = dccp_li_hist_entry_new(GFP_ATOMIC); 57 if (lh->ring[LIH_INDEX(lh->counter)] != NULL) {
109 if (entry == NULL) { 58 kmem_cache_free(tfrc_lh_slab,
110 dccp_li_hist_purge(list); 59 lh->ring[LIH_INDEX(lh->counter)]);
111 DCCP_BUG("loss interval list entry is NULL"); 60 lh->ring[LIH_INDEX(lh->counter)] = NULL;
112 return 0;
113 } 61 }
114 entry->dccplih_interval = ~0;
115 list_add(&entry->dccplih_node, list);
116 }
117
118 entry->dccplih_seqno = seq_loss;
119 entry->dccplih_win_count = win_loss;
120 return 1;
121} 62}
63EXPORT_SYMBOL_GPL(tfrc_lh_cleanup);
122 64
123/* calculate first loss interval 65static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh)
124 *
125 * returns estimated loss interval in usecs */
126static u32 dccp_li_calc_first_li(struct sock *sk,
127 struct list_head *hist_list,
128 ktime_t last_feedback,
129 u16 s, u32 bytes_recv,
130 u32 previous_x_recv)
131{ 66{
132 struct dccp_rx_hist_entry *entry, *next, *tail = NULL; 67 u32 i_i, i_tot0 = 0, i_tot1 = 0, w_tot = 0;
133 u32 x_recv, p; 68 int i, k = tfrc_lh_length(lh) - 1; /* k is as in rfc3448bis, 5.4 */
134 suseconds_t rtt, delta;
135 ktime_t tstamp = ktime_set(0, 0);
136 int interval = 0;
137 int win_count = 0;
138 int step = 0;
139 u64 fval;
140 69
141 list_for_each_entry_safe(entry, next, hist_list, dccphrx_node) { 70 for (i=0; i <= k; i++) {
142 if (dccp_rx_hist_entry_data_packet(entry)) { 71 i_i = tfrc_lh_get_interval(lh, i);
143 tail = entry;
144 72
145 switch (step) { 73 if (i < k) {
146 case 0: 74 i_tot0 += i_i * tfrc_lh_weights[i];
147 tstamp = entry->dccphrx_tstamp; 75 w_tot += tfrc_lh_weights[i];
148 win_count = entry->dccphrx_ccval;
149 step = 1;
150 break;
151 case 1:
152 interval = win_count - entry->dccphrx_ccval;
153 if (interval < 0)
154 interval += TFRC_WIN_COUNT_LIMIT;
155 if (interval > 4)
156 goto found;
157 break;
158 }
159 } 76 }
77 if (i > 0)
78 i_tot1 += i_i * tfrc_lh_weights[i-1];
160 } 79 }
161 80
162 if (unlikely(step == 0)) { 81 BUG_ON(w_tot == 0);
163 DCCP_WARN("%s(%p), packet history has no data packets!\n", 82 lh->i_mean = max(i_tot0, i_tot1) / w_tot;
164 dccp_role(sk), sk); 83}
165 return ~0;
166 }
167
168 if (unlikely(interval == 0)) {
169 DCCP_WARN("%s(%p), Could not find a win_count interval > 0. "
170 "Defaulting to 1\n", dccp_role(sk), sk);
171 interval = 1;
172 }
173found:
174 if (!tail) {
175 DCCP_CRIT("tail is null\n");
176 return ~0;
177 }
178
179 delta = ktime_us_delta(tstamp, tail->dccphrx_tstamp);
180 DCCP_BUG_ON(delta < 0);
181 84
182 rtt = delta * 4 / interval; 85/**
183 dccp_pr_debug("%s(%p), approximated RTT to %dus\n", 86 * tfrc_lh_update_i_mean - Update the `open' loss interval I_0
184 dccp_role(sk), sk, (int)rtt); 87 * For recomputing p: returns `true' if p > p_prev <=> 1/p < 1/p_prev
88 */
89u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
90{
91 struct tfrc_loss_interval *cur = tfrc_lh_peek(lh);
92 u32 old_i_mean = lh->i_mean;
93 s64 length;
185 94
186 /* 95 if (cur == NULL) /* not initialised */
187 * Determine the length of the first loss interval via inverse lookup. 96 return 0;
188 * Assume that X_recv can be computed by the throughput equation
189 * s
190 * X_recv = --------
191 * R * fval
192 * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1].
193 */
194 if (rtt == 0) { /* would result in divide-by-zero */
195 DCCP_WARN("RTT==0\n");
196 return ~0;
197 }
198 97
199 delta = ktime_us_delta(ktime_get_real(), last_feedback); 98 length = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq);
200 DCCP_BUG_ON(delta <= 0);
201 99
202 x_recv = scaled_div32(bytes_recv, delta); 100 if (length - cur->li_length <= 0) /* duplicate or reordered */
203 if (x_recv == 0) { /* would also trigger divide-by-zero */ 101 return 0;
204 DCCP_WARN("X_recv==0\n");
205 if (previous_x_recv == 0) {
206 DCCP_BUG("stored value of X_recv is zero");
207 return ~0;
208 }
209 x_recv = previous_x_recv;
210 }
211 102
212 fval = scaled_div(s, rtt); 103 if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4)
213 fval = scaled_div32(fval, x_recv); 104 /*
214 p = tfrc_calc_x_reverse_lookup(fval); 105 * Implements RFC 4342, 10.2:
106 * If a packet S (skb) exists whose seqno comes `after' the one
107 * starting the current loss interval (cur) and if the modulo-16
108 * distance from C(cur) to C(S) is greater than 4, consider all
109 * subsequent packets as belonging to a new loss interval. This
110 * test is necessary since CCVal may wrap between intervals.
111 */
112 cur->li_is_closed = 1;
113
114 if (tfrc_lh_length(lh) == 1) /* due to RFC 3448, 6.3.1 */
115 return 0;
215 116
216 dccp_pr_debug("%s(%p), receive rate=%u bytes/s, implied " 117 cur->li_length = length;
217 "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); 118 tfrc_lh_calc_i_mean(lh);
218 119
219 if (p == 0) 120 return (lh->i_mean < old_i_mean);
220 return ~0;
221 else
222 return 1000000 / p;
223} 121}
122EXPORT_SYMBOL_GPL(tfrc_lh_update_i_mean);
224 123
225void dccp_li_update_li(struct sock *sk, 124/* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */
226 struct list_head *li_hist_list, 125static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
227 struct list_head *hist_list, 126 struct tfrc_rx_hist_entry *new_loss)
228 ktime_t last_feedback, u16 s, u32 bytes_recv,
229 u32 previous_x_recv, u64 seq_loss, u8 win_loss)
230{ 127{
231 struct dccp_li_hist_entry *head; 128 return dccp_delta_seqno(cur->li_seqno, new_loss->tfrchrx_seqno) > 0 &&
232 u64 seq_temp; 129 (cur->li_is_closed || SUB16(new_loss->tfrchrx_ccval, cur->li_ccval) > 4);
233 130}
234 if (list_empty(li_hist_list)) {
235 if (!dccp_li_hist_interval_new(li_hist_list, seq_loss,
236 win_loss))
237 return;
238
239 head = list_entry(li_hist_list->next, struct dccp_li_hist_entry,
240 dccplih_node);
241 head->dccplih_interval = dccp_li_calc_first_li(sk, hist_list,
242 last_feedback,
243 s, bytes_recv,
244 previous_x_recv);
245 } else {
246 struct dccp_li_hist_entry *entry;
247 struct list_head *tail;
248 131
249 head = list_entry(li_hist_list->next, struct dccp_li_hist_entry, 132/** tfrc_lh_interval_add - Insert new record into the Loss Interval database
250 dccplih_node); 133 * @lh: Loss Interval database
251 /* FIXME win count check removed as was wrong */ 134 * @rh: Receive history containing a fresh loss event
252 /* should make this check with receive history */ 135 * @calc_first_li: Caller-dependent routine to compute length of first interval
253 /* and compare there as per section 10.2 of RFC4342 */ 136 * @sk: Used by @calc_first_li in caller-specific way (subtyping)
137 * Updates I_mean and returns 1 if a new interval has in fact been added to @lh.
138 */
139int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
140 u32 (*calc_first_li)(struct sock *), struct sock *sk)
141{
142 struct tfrc_loss_interval *cur = tfrc_lh_peek(lh), *new;
254 143
255 /* new loss event detected */ 144 if (cur != NULL && !tfrc_lh_is_new_loss(cur, tfrc_rx_hist_loss_prev(rh)))
256 /* calculate last interval length */ 145 return 0;
257 seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss);
258 entry = dccp_li_hist_entry_new(GFP_ATOMIC);
259 146
260 if (entry == NULL) { 147 new = tfrc_lh_demand_next(lh);
261 DCCP_BUG("out of memory - can not allocate entry"); 148 if (unlikely(new == NULL)) {
262 return; 149 DCCP_CRIT("Cannot allocate/add loss record.");
263 } 150 return 0;
151 }
264 152
265 list_add(&entry->dccplih_node, li_hist_list); 153 new->li_seqno = tfrc_rx_hist_loss_prev(rh)->tfrchrx_seqno;
154 new->li_ccval = tfrc_rx_hist_loss_prev(rh)->tfrchrx_ccval;
155 new->li_is_closed = 0;
266 156
267 tail = li_hist_list->prev; 157 if (++lh->counter == 1)
268 list_del(tail); 158 lh->i_mean = new->li_length = (*calc_first_li)(sk);
269 kmem_cache_free(dccp_li_cachep, tail); 159 else {
160 cur->li_length = dccp_delta_seqno(cur->li_seqno, new->li_seqno);
161 new->li_length = dccp_delta_seqno(new->li_seqno,
162 tfrc_rx_hist_last_rcv(rh)->tfrchrx_seqno);
163 if (lh->counter > (2*LIH_SIZE))
164 lh->counter -= LIH_SIZE;
270 165
271 /* Create the newest interval */ 166 tfrc_lh_calc_i_mean(lh);
272 entry->dccplih_seqno = seq_loss;
273 entry->dccplih_interval = seq_temp;
274 entry->dccplih_win_count = win_loss;
275 } 167 }
168 return 1;
276} 169}
170EXPORT_SYMBOL_GPL(tfrc_lh_interval_add);
277 171
278EXPORT_SYMBOL_GPL(dccp_li_update_li); 172int __init tfrc_li_init(void)
279
280static __init int dccp_li_init(void)
281{ 173{
282 dccp_li_cachep = kmem_cache_create("dccp_li_hist", 174 tfrc_lh_slab = kmem_cache_create("tfrc_li_hist",
283 sizeof(struct dccp_li_hist_entry), 175 sizeof(struct tfrc_loss_interval), 0,
284 0, SLAB_HWCACHE_ALIGN, NULL); 176 SLAB_HWCACHE_ALIGN, NULL);
285 return dccp_li_cachep == NULL ? -ENOBUFS : 0; 177 return tfrc_lh_slab == NULL ? -ENOBUFS : 0;
286} 178}
287 179
288static __exit void dccp_li_exit(void) 180void tfrc_li_exit(void)
289{ 181{
290 kmem_cache_destroy(dccp_li_cachep); 182 if (tfrc_lh_slab != NULL) {
183 kmem_cache_destroy(tfrc_lh_slab);
184 tfrc_lh_slab = NULL;
185 }
291} 186}
292
293module_init(dccp_li_init);
294module_exit(dccp_li_exit);
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index 27bee92dae13..246018a3b269 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -3,6 +3,7 @@
3/* 3/*
4 * net/dccp/ccids/lib/loss_interval.h 4 * net/dccp/ccids/lib/loss_interval.h
5 * 5 *
6 * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
6 * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. 7 * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
7 * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz> 8 * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
8 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> 9 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
@@ -12,18 +13,63 @@
12 * Software Foundation; either version 2 of the License, or (at your option) 13 * Software Foundation; either version 2 of the License, or (at your option)
13 * any later version. 14 * any later version.
14 */ 15 */
15
16#include <linux/ktime.h> 16#include <linux/ktime.h>
17#include <linux/list.h> 17#include <linux/list.h>
18#include <linux/slab.h>
19
20/*
21 * Number of loss intervals (RFC 4342, 8.6.1). The history size is one more than
22 * NINTERVAL, since the `open' interval I_0 is always stored as the first entry.
23 */
24#define NINTERVAL 8
25#define LIH_SIZE (NINTERVAL + 1)
26
27/**
28 * tfrc_loss_interval - Loss history record for TFRC-based protocols
29 * @li_seqno: Highest received seqno before the start of loss
30 * @li_ccval: The CCVal belonging to @li_seqno
31 * @li_is_closed: Whether @li_seqno is older than 1 RTT
32 * @li_length: Loss interval sequence length
33 */
34struct tfrc_loss_interval {
35 u64 li_seqno:48,
36 li_ccval:4,
37 li_is_closed:1;
38 u32 li_length;
39};
40
41/**
42 * tfrc_loss_hist - Loss record database
43 * @ring: Circular queue managed in LIFO manner
44 * @counter: Current count of entries (can be more than %LIH_SIZE)
45 * @i_mean: Current Average Loss Interval [RFC 3448, 5.4]
46 */
47struct tfrc_loss_hist {
48 struct tfrc_loss_interval *ring[LIH_SIZE];
49 u8 counter;
50 u32 i_mean;
51};
52
53static inline void tfrc_lh_init(struct tfrc_loss_hist *lh)
54{
55 memset(lh, 0, sizeof(struct tfrc_loss_hist));
56}
57
58static inline u8 tfrc_lh_is_initialised(struct tfrc_loss_hist *lh)
59{
60 return lh->counter > 0;
61}
62
63static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh)
64{
65 return min(lh->counter, (u8)LIH_SIZE);
66}
18 67
19extern void dccp_li_hist_purge(struct list_head *list); 68struct tfrc_rx_hist;
20 69
21extern u32 dccp_li_hist_calc_i_mean(struct list_head *list); 70extern int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
71 u32 (*first_li)(struct sock *), struct sock *);
72extern u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
73extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh);
22 74
23extern void dccp_li_update_li(struct sock *sk,
24 struct list_head *li_hist_list,
25 struct list_head *hist_list,
26 ktime_t last_feedback, u16 s,
27 u32 bytes_recv, u32 previous_x_recv,
28 u64 seq_loss, u8 win_loss);
29#endif /* _DCCP_LI_HIST_ */ 75#endif /* _DCCP_LI_HIST_ */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 34c4f6047724..20af1a693427 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -1,7 +1,8 @@
1/* 1/*
2 * net/dccp/packet_history.c 2 * net/dccp/packet_history.c
3 * 3 *
4 * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. 4 * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
5 * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
5 * 6 *
6 * An implementation of the DCCP protocol 7 * An implementation of the DCCP protocol
7 * 8 *
@@ -34,267 +35,465 @@
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 35 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 */ 36 */
36 37
37#include <linux/module.h>
38#include <linux/string.h> 38#include <linux/string.h>
39#include <linux/slab.h>
39#include "packet_history.h" 40#include "packet_history.h"
41#include "../../dccp.h"
42
43/**
44 * tfrc_tx_hist_entry - Simple singly-linked TX history list
45 * @next: next oldest entry (LIFO order)
46 * @seqno: sequence number of this entry
47 * @stamp: send time of packet with sequence number @seqno
48 */
49struct tfrc_tx_hist_entry {
50 struct tfrc_tx_hist_entry *next;
51 u64 seqno;
52 ktime_t stamp;
53};
40 54
41/* 55/*
42 * Transmitter History Routines 56 * Transmitter History Routines
43 */ 57 */
44struct dccp_tx_hist *dccp_tx_hist_new(const char *name) 58static struct kmem_cache *tfrc_tx_hist_slab;
59
60int __init tfrc_tx_packet_history_init(void)
45{ 61{
46 struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); 62 tfrc_tx_hist_slab = kmem_cache_create("tfrc_tx_hist",
47 static const char dccp_tx_hist_mask[] = "tx_hist_%s"; 63 sizeof(struct tfrc_tx_hist_entry),
48 char *slab_name; 64 0, SLAB_HWCACHE_ALIGN, NULL);
49 65 return tfrc_tx_hist_slab == NULL ? -ENOBUFS : 0;
50 if (hist == NULL)
51 goto out;
52
53 slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1,
54 GFP_ATOMIC);
55 if (slab_name == NULL)
56 goto out_free_hist;
57
58 sprintf(slab_name, dccp_tx_hist_mask, name);
59 hist->dccptxh_slab = kmem_cache_create(slab_name,
60 sizeof(struct dccp_tx_hist_entry),
61 0, SLAB_HWCACHE_ALIGN,
62 NULL);
63 if (hist->dccptxh_slab == NULL)
64 goto out_free_slab_name;
65out:
66 return hist;
67out_free_slab_name:
68 kfree(slab_name);
69out_free_hist:
70 kfree(hist);
71 hist = NULL;
72 goto out;
73} 66}
74 67
75EXPORT_SYMBOL_GPL(dccp_tx_hist_new); 68void tfrc_tx_packet_history_exit(void)
76
77void dccp_tx_hist_delete(struct dccp_tx_hist *hist)
78{ 69{
79 const char* name = kmem_cache_name(hist->dccptxh_slab); 70 if (tfrc_tx_hist_slab != NULL) {
80 71 kmem_cache_destroy(tfrc_tx_hist_slab);
81 kmem_cache_destroy(hist->dccptxh_slab); 72 tfrc_tx_hist_slab = NULL;
82 kfree(name); 73 }
83 kfree(hist);
84} 74}
85 75
86EXPORT_SYMBOL_GPL(dccp_tx_hist_delete); 76static struct tfrc_tx_hist_entry *
87 77 tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
88struct dccp_tx_hist_entry *
89 dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq)
90{ 78{
91 struct dccp_tx_hist_entry *packet = NULL, *entry; 79 while (head != NULL && head->seqno != seqno)
92 80 head = head->next;
93 list_for_each_entry(entry, list, dccphtx_node)
94 if (entry->dccphtx_seqno == seq) {
95 packet = entry;
96 break;
97 }
98 81
99 return packet; 82 return head;
100} 83}
101 84
102EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry); 85int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
86{
87 struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any());
88
89 if (entry == NULL)
90 return -ENOBUFS;
91 entry->seqno = seqno;
92 entry->stamp = ktime_get_real();
93 entry->next = *headp;
94 *headp = entry;
95 return 0;
96}
97EXPORT_SYMBOL_GPL(tfrc_tx_hist_add);
103 98
104void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list) 99void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
105{ 100{
106 struct dccp_tx_hist_entry *entry, *next; 101 struct tfrc_tx_hist_entry *head = *headp;
102
103 while (head != NULL) {
104 struct tfrc_tx_hist_entry *next = head->next;
107 105
108 list_for_each_entry_safe(entry, next, list, dccphtx_node) { 106 kmem_cache_free(tfrc_tx_hist_slab, head);
109 list_del_init(&entry->dccphtx_node); 107 head = next;
110 dccp_tx_hist_entry_delete(hist, entry);
111 } 108 }
112}
113 109
114EXPORT_SYMBOL_GPL(dccp_tx_hist_purge); 110 *headp = NULL;
111}
112EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge);
115 113
116void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, 114u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
117 struct list_head *list, 115 const ktime_t now)
118 struct dccp_tx_hist_entry *packet)
119{ 116{
120 struct dccp_tx_hist_entry *next; 117 u32 rtt = 0;
118 struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno);
121 119
122 list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) { 120 if (packet != NULL) {
123 list_del_init(&packet->dccphtx_node); 121 rtt = ktime_us_delta(now, packet->stamp);
124 dccp_tx_hist_entry_delete(hist, packet); 122 /*
123 * Garbage-collect older (irrelevant) entries:
124 */
125 tfrc_tx_hist_purge(&packet->next);
125 } 126 }
127
128 return rtt;
126} 129}
130EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
127 131
128EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older);
129 132
130/* 133/*
131 * Receiver History Routines 134 * Receiver History Routines
132 */ 135 */
133struct dccp_rx_hist *dccp_rx_hist_new(const char *name) 136static struct kmem_cache *tfrc_rx_hist_slab;
137
138int __init tfrc_rx_packet_history_init(void)
134{ 139{
135 struct dccp_rx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); 140 tfrc_rx_hist_slab = kmem_cache_create("tfrc_rxh_cache",
136 static const char dccp_rx_hist_mask[] = "rx_hist_%s"; 141 sizeof(struct tfrc_rx_hist_entry),
137 char *slab_name; 142 0, SLAB_HWCACHE_ALIGN, NULL);
138 143 return tfrc_rx_hist_slab == NULL ? -ENOBUFS : 0;
139 if (hist == NULL)
140 goto out;
141
142 slab_name = kmalloc(strlen(name) + sizeof(dccp_rx_hist_mask) - 1,
143 GFP_ATOMIC);
144 if (slab_name == NULL)
145 goto out_free_hist;
146
147 sprintf(slab_name, dccp_rx_hist_mask, name);
148 hist->dccprxh_slab = kmem_cache_create(slab_name,
149 sizeof(struct dccp_rx_hist_entry),
150 0, SLAB_HWCACHE_ALIGN,
151 NULL);
152 if (hist->dccprxh_slab == NULL)
153 goto out_free_slab_name;
154out:
155 return hist;
156out_free_slab_name:
157 kfree(slab_name);
158out_free_hist:
159 kfree(hist);
160 hist = NULL;
161 goto out;
162} 144}
163 145
164EXPORT_SYMBOL_GPL(dccp_rx_hist_new); 146void tfrc_rx_packet_history_exit(void)
147{
148 if (tfrc_rx_hist_slab != NULL) {
149 kmem_cache_destroy(tfrc_rx_hist_slab);
150 tfrc_rx_hist_slab = NULL;
151 }
152}
165 153
166void dccp_rx_hist_delete(struct dccp_rx_hist *hist) 154static inline void tfrc_rx_hist_entry_from_skb(struct tfrc_rx_hist_entry *entry,
155 const struct sk_buff *skb,
156 const u32 ndp)
167{ 157{
168 const char* name = kmem_cache_name(hist->dccprxh_slab); 158 const struct dccp_hdr *dh = dccp_hdr(skb);
169 159
170 kmem_cache_destroy(hist->dccprxh_slab); 160 entry->tfrchrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
171 kfree(name); 161 entry->tfrchrx_ccval = dh->dccph_ccval;
172 kfree(hist); 162 entry->tfrchrx_type = dh->dccph_type;
163 entry->tfrchrx_ndp = ndp;
164 entry->tfrchrx_tstamp = ktime_get_real();
173} 165}
174 166
175EXPORT_SYMBOL_GPL(dccp_rx_hist_delete); 167void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
168 const struct sk_buff *skb,
169 const u32 ndp)
170{
171 struct tfrc_rx_hist_entry *entry = tfrc_rx_hist_last_rcv(h);
172
173 tfrc_rx_hist_entry_from_skb(entry, skb, ndp);
174}
175EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet);
176 176
177int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, 177/* has the packet contained in skb been seen before? */
178 u8 *ccval) 178int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
179{ 179{
180 struct dccp_rx_hist_entry *packet = NULL, *entry; 180 const u64 seq = DCCP_SKB_CB(skb)->dccpd_seq;
181 int i;
181 182
182 list_for_each_entry(entry, list, dccphrx_node) 183 if (dccp_delta_seqno(tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, seq) <= 0)
183 if (entry->dccphrx_seqno == seq) { 184 return 1;
184 packet = entry;
185 break;
186 }
187 185
188 if (packet) 186 for (i = 1; i <= h->loss_count; i++)
189 *ccval = packet->dccphrx_ccval; 187 if (tfrc_rx_hist_entry(h, i)->tfrchrx_seqno == seq)
188 return 1;
190 189
191 return packet != NULL; 190 return 0;
192} 191}
192EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate);
193 193
194EXPORT_SYMBOL_GPL(dccp_rx_hist_find_entry); 194static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
195struct dccp_rx_hist_entry *
196 dccp_rx_hist_find_data_packet(const struct list_head *list)
197{ 195{
198 struct dccp_rx_hist_entry *entry, *packet = NULL; 196 const u8 idx_a = tfrc_rx_hist_index(h, a),
199 197 idx_b = tfrc_rx_hist_index(h, b);
200 list_for_each_entry(entry, list, dccphrx_node) 198 struct tfrc_rx_hist_entry *tmp = h->ring[idx_a];
201 if (entry->dccphrx_type == DCCP_PKT_DATA ||
202 entry->dccphrx_type == DCCP_PKT_DATAACK) {
203 packet = entry;
204 break;
205 }
206 199
207 return packet; 200 h->ring[idx_a] = h->ring[idx_b];
201 h->ring[idx_b] = tmp;
208} 202}
209 203
210EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet); 204/*
205 * Private helper functions for loss detection.
206 *
207 * In the descriptions, `Si' refers to the sequence number of entry number i,
208 * whose NDP count is `Ni' (lower case is used for variables).
209 * Note: All __after_loss functions expect that a test against duplicates has
210 * been performed already: the seqno of the skb must not be less than the
211 * seqno of loss_prev; and it must not equal that of any valid hist_entry.
212 */
213static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2)
214{
215 u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
216 s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno,
217 s2 = DCCP_SKB_CB(skb)->dccpd_seq;
218 int n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp,
219 d12 = dccp_delta_seqno(s1, s2), d2;
220
221 if (d12 > 0) { /* S1 < S2 */
222 h->loss_count = 2;
223 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n2);
224 return;
225 }
226
227 /* S0 < S2 < S1 */
228 d2 = dccp_delta_seqno(s0, s2);
211 229
212void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, 230 if (d2 == 1 || n2 >= d2) { /* S2 is direct successor of S0 */
213 struct list_head *rx_list, 231 int d21 = -d12;
214 struct list_head *li_list, 232
215 struct dccp_rx_hist_entry *packet, 233 if (d21 == 1 || n1 >= d21) {
216 u64 nonloss_seqno) 234 /* hole is filled: S0, S2, and S1 are consecutive */
235 h->loss_count = 0;
236 h->loss_start = tfrc_rx_hist_index(h, 1);
237 } else
238 /* gap between S2 and S1: just update loss_prev */
239 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2);
240
241 } else { /* hole between S0 and S2 */
242 /*
243 * Reorder history to insert S2 between S0 and s1
244 */
245 tfrc_rx_hist_swap(h, 0, 3);
246 h->loss_start = tfrc_rx_hist_index(h, 3);
247 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n2);
248 h->loss_count = 2;
249 }
250}
251
252/* return 1 if a new loss event has been identified */
253static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3)
217{ 254{
218 struct dccp_rx_hist_entry *entry, *next; 255 u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
219 u8 num_later = 0; 256 s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno,
220 257 s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno,
221 list_add(&packet->dccphrx_node, rx_list); 258 s3 = DCCP_SKB_CB(skb)->dccpd_seq;
222 259 int n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp,
223 num_later = TFRC_RECV_NUM_LATE_LOSS + 1; 260 d23 = dccp_delta_seqno(s2, s3), d13, d3, d31;
224 261
225 if (!list_empty(li_list)) { 262 if (d23 > 0) { /* S2 < S3 */
226 list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { 263 h->loss_count = 3;
227 if (num_later == 0) { 264 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 3), skb, n3);
228 if (after48(nonloss_seqno, 265 return 1;
229 entry->dccphrx_seqno)) { 266 }
230 list_del_init(&entry->dccphrx_node); 267
231 dccp_rx_hist_entry_delete(hist, entry); 268 /* S3 < S2 */
232 } 269 d13 = dccp_delta_seqno(s1, s3);
233 } else if (dccp_rx_hist_entry_data_packet(entry)) 270
234 --num_later; 271 if (d13 > 0) {
235 }
236 } else {
237 int step = 0;
238 u8 win_count = 0; /* Not needed, but lets shut up gcc */
239 int tmp;
240 /* 272 /*
241 * We have no loss interval history so we need at least one 273 * The sequence number order is S1, S3, S2
242 * rtt:s of data packets to approximate rtt. 274 * Reorder history to insert entry between S1 and S2
243 */ 275 */
244 list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { 276 tfrc_rx_hist_swap(h, 2, 3);
245 if (num_later == 0) { 277 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n3);
246 switch (step) { 278 h->loss_count = 3;
247 case 0: 279 return 1;
248 step = 1; 280 }
249 /* OK, find next data packet */ 281
250 num_later = 1; 282 /* S0 < S3 < S1 */
251 break; 283 d31 = -d13;
252 case 1: 284 d3 = dccp_delta_seqno(s0, s3);
253 step = 2; 285
254 /* OK, find next data packet */ 286 if (d3 == 1 || n3 >= d3) { /* S3 is a successor of S0 */
255 num_later = 1; 287
256 win_count = entry->dccphrx_ccval; 288 if (d31 == 1 || n1 >= d31) {
257 break; 289 /* hole between S0 and S1 filled by S3 */
258 case 2: 290 int d2 = dccp_delta_seqno(s1, s2),
259 tmp = win_count - entry->dccphrx_ccval; 291 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp;
260 if (tmp < 0) 292
261 tmp += TFRC_WIN_COUNT_LIMIT; 293 if (d2 == 1 || n2 >= d2) {
262 if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) { 294 /* entire hole filled by S0, S3, S1, S2 */
263 /* 295 h->loss_start = tfrc_rx_hist_index(h, 2);
264 * We have found a packet older 296 h->loss_count = 0;
265 * than one rtt remove the rest 297 } else {
266 */ 298 /* gap remains between S1 and S2 */
267 step = 3; 299 h->loss_start = tfrc_rx_hist_index(h, 1);
268 } else /* OK, find next data packet */ 300 h->loss_count = 1;
269 num_later = 1; 301 }
270 break; 302
271 case 3: 303 } else /* gap exists between S3 and S1, loss_count stays at 2 */
272 list_del_init(&entry->dccphrx_node); 304 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n3);
273 dccp_rx_hist_entry_delete(hist, entry); 305
274 break; 306 return 0;
275 } 307 }
276 } else if (dccp_rx_hist_entry_data_packet(entry)) 308
277 --num_later; 309 /*
310 * The remaining case: S3 is not a successor of S0.
311 * Sequence order is S0, S3, S1, S2; reorder to insert between S0 and S1
312 */
313 tfrc_rx_hist_swap(h, 0, 3);
314 h->loss_start = tfrc_rx_hist_index(h, 3);
315 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n3);
316 h->loss_count = 3;
317
318 return 1;
319}
320
321/* return the signed modulo-2^48 sequence number distance from entry e1 to e2 */
322static s64 tfrc_rx_hist_delta_seqno(struct tfrc_rx_hist *h, u8 e1, u8 e2)
323{
324 DCCP_BUG_ON(e1 > h->loss_count || e2 > h->loss_count);
325
326 return dccp_delta_seqno(tfrc_rx_hist_entry(h, e1)->tfrchrx_seqno,
327 tfrc_rx_hist_entry(h, e2)->tfrchrx_seqno);
328}
329
330/* recycle RX history records to continue loss detection if necessary */
331static void __three_after_loss(struct tfrc_rx_hist *h)
332{
333 /*
334 * The distance between S0 and S1 is always greater than 1 and the NDP
335 * count of S1 is smaller than this distance. Otherwise there would
336 * have been no loss. Hence it is only necessary to see whether there
337 * are further missing data packets between S1/S2 and S2/S3.
338 */
339 int d2 = tfrc_rx_hist_delta_seqno(h, 1, 2),
340 d3 = tfrc_rx_hist_delta_seqno(h, 2, 3),
341 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp,
342 n3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_ndp;
343
344 if (d2 == 1 || n2 >= d2) { /* S2 is successor to S1 */
345
346 if (d3 == 1 || n3 >= d3) {
347 /* S3 is successor of S2: entire hole is filled */
348 h->loss_start = tfrc_rx_hist_index(h, 3);
349 h->loss_count = 0;
350 } else {
351 /* gap between S2 and S3 */
352 h->loss_start = tfrc_rx_hist_index(h, 2);
353 h->loss_count = 1;
278 } 354 }
355
356 } else { /* gap between S1 and S2 */
357 h->loss_start = tfrc_rx_hist_index(h, 1);
358 h->loss_count = 2;
279 } 359 }
280} 360}
281 361
282EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet); 362/**
363 * tfrc_rx_handle_loss - Loss detection and further processing
364 * @h: The non-empty RX history object
365 * @lh: Loss Intervals database to update
366 * @skb: Currently received packet
367 * @ndp: The NDP count belonging to @skb
368 * @calc_first_li: Caller-dependent computation of first loss interval in @lh
369 * @sk: Used by @calc_first_li (see tfrc_lh_interval_add)
370 * Chooses action according to pending loss, updates LI database when a new
371 * loss was detected, and does required post-processing. Returns 1 when caller
372 * should send feedback, 0 otherwise.
373 */
374int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
375 struct tfrc_loss_hist *lh,
376 struct sk_buff *skb, u32 ndp,
377 u32 (*calc_first_li)(struct sock *), struct sock *sk)
378{
379 int is_new_loss = 0;
283 380
284void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list) 381 if (h->loss_count == 1) {
382 __one_after_loss(h, skb, ndp);
383 } else if (h->loss_count != 2) {
384 DCCP_BUG("invalid loss_count %d", h->loss_count);
385 } else if (__two_after_loss(h, skb, ndp)) {
386 /*
387 * Update Loss Interval database and recycle RX records
388 */
389 is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk);
390 __three_after_loss(h);
391 }
392 return is_new_loss;
393}
394EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss);
395
396int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
285{ 397{
286 struct dccp_rx_hist_entry *entry, *next; 398 int i;
399
400 for (i = 0; i <= TFRC_NDUPACK; i++) {
401 h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
402 if (h->ring[i] == NULL)
403 goto out_free;
404 }
405
406 h->loss_count = h->loss_start = 0;
407 return 0;
287 408
288 list_for_each_entry_safe(entry, next, list, dccphrx_node) { 409out_free:
289 list_del_init(&entry->dccphrx_node); 410 while (i-- != 0) {
290 kmem_cache_free(hist->dccprxh_slab, entry); 411 kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
412 h->ring[i] = NULL;
291 } 413 }
414 return -ENOBUFS;
292} 415}
416EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc);
417
418void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
419{
420 int i;
293 421
294EXPORT_SYMBOL_GPL(dccp_rx_hist_purge); 422 for (i = 0; i <= TFRC_NDUPACK; ++i)
423 if (h->ring[i] != NULL) {
424 kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
425 h->ring[i] = NULL;
426 }
427}
428EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge);
295 429
430/**
431 * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against
432 */
433static inline struct tfrc_rx_hist_entry *
434 tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h)
435{
436 return h->ring[0];
437}
296 438
297MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, " 439/**
298 "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>"); 440 * tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry
299MODULE_DESCRIPTION("DCCP TFRC library"); 441 */
300MODULE_LICENSE("GPL"); 442static inline struct tfrc_rx_hist_entry *
443 tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h)
444{
445 return h->ring[h->rtt_sample_prev];
446}
447
448/**
449 * tfrc_rx_hist_sample_rtt - Sample RTT from timestamp / CCVal
450 * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able
451 * to compute a sample with given data - calling function should check this.
452 */
453u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
454{
455 u32 sample = 0,
456 delta_v = SUB16(dccp_hdr(skb)->dccph_ccval,
457 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
458
459 if (delta_v < 1 || delta_v > 4) { /* unsuitable CCVal delta */
460 if (h->rtt_sample_prev == 2) { /* previous candidate stored */
461 sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
462 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
463 if (sample)
464 sample = 4 / sample *
465 ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp,
466 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp);
467 else /*
468 * FIXME: This condition is in principle not
469 * possible but occurs when CCID is used for
470 * two-way data traffic. I have tried to trace
471 * it, but the cause does not seem to be here.
472 */
473 DCCP_BUG("please report to dccp@vger.kernel.org"
474 " => prev = %u, last = %u",
475 tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
476 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
477 } else if (delta_v < 1) {
478 h->rtt_sample_prev = 1;
479 goto keep_ref_for_next_time;
480 }
481
482 } else if (delta_v == 4) /* optimal match */
483 sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp));
484 else { /* suboptimal match */
485 h->rtt_sample_prev = 2;
486 goto keep_ref_for_next_time;
487 }
488
489 if (unlikely(sample > DCCP_SANE_RTT_MAX)) {
490 DCCP_WARN("RTT sample %u too large, using max\n", sample);
491 sample = DCCP_SANE_RTT_MAX;
492 }
493
494 h->rtt_sample_prev = 0; /* use current entry as next reference */
495keep_ref_for_next_time:
496
497 return sample;
498}
499EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt);
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 032bb61c6e39..c7eeda49cb20 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -1,10 +1,9 @@
1/* 1/*
2 * net/dccp/packet_history.h 2 * Packet RX/TX history data structures and routines for TFRC-based protocols.
3 * 3 *
4 * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
4 * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. 5 * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
5 * 6 *
6 * An implementation of the DCCP protocol
7 *
8 * This code has been developed by the University of Waikato WAND 7 * This code has been developed by the University of Waikato WAND
9 * research group. For further information please see http://www.wand.net.nz/ 8 * research group. For further information please see http://www.wand.net.nz/
10 * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz 9 * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
@@ -37,165 +36,128 @@
37#ifndef _DCCP_PKT_HIST_ 36#ifndef _DCCP_PKT_HIST_
38#define _DCCP_PKT_HIST_ 37#define _DCCP_PKT_HIST_
39 38
40#include <linux/ktime.h>
41#include <linux/list.h> 39#include <linux/list.h>
42#include <linux/slab.h> 40#include <linux/slab.h>
41#include "tfrc.h"
43 42
44#include "../../dccp.h" 43struct tfrc_tx_hist_entry;
45 44
46/* Number of later packets received before one is considered lost */ 45extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno);
47#define TFRC_RECV_NUM_LATE_LOSS 3 46extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
47extern u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head,
48 const u64 seqno, const ktime_t now);
48 49
49#define TFRC_WIN_COUNT_PER_RTT 4 50/* Subtraction a-b modulo-16, respects circular wrap-around */
50#define TFRC_WIN_COUNT_LIMIT 16 51#define SUB16(a, b) (((a) + 16 - (b)) & 0xF)
51 52
52/* 53/* Number of packets to wait after a missing packet (RFC 4342, 6.1) */
53 * Transmitter History data structures and declarations 54#define TFRC_NDUPACK 3
55
56/**
57 * tfrc_rx_hist_entry - Store information about a single received packet
58 * @tfrchrx_seqno: DCCP packet sequence number
59 * @tfrchrx_ccval: window counter value of packet (RFC 4342, 8.1)
60 * @tfrchrx_ndp: the NDP count (if any) of the packet
61 * @tfrchrx_tstamp: actual receive time of packet
54 */ 62 */
55struct dccp_tx_hist_entry { 63struct tfrc_rx_hist_entry {
56 struct list_head dccphtx_node; 64 u64 tfrchrx_seqno:48,
57 u64 dccphtx_seqno:48, 65 tfrchrx_ccval:4,
58 dccphtx_sent:1; 66 tfrchrx_type:4;
59 u32 dccphtx_rtt; 67 u32 tfrchrx_ndp; /* In fact it is from 8 to 24 bits */
60 ktime_t dccphtx_tstamp; 68 ktime_t tfrchrx_tstamp;
61}; 69};
62 70
63struct dccp_tx_hist { 71/**
64 struct kmem_cache *dccptxh_slab; 72 * tfrc_rx_hist - RX history structure for TFRC-based protocols
73 *
74 * @ring: Packet history for RTT sampling and loss detection
75 * @loss_count: Number of entries in circular history
76 * @loss_start: Movable index (for loss detection)
77 * @rtt_sample_prev: Used during RTT sampling, points to candidate entry
78 */
79struct tfrc_rx_hist {
80 struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1];
81 u8 loss_count:2,
82 loss_start:2;
83#define rtt_sample_prev loss_start
65}; 84};
66 85
67extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name); 86/**
68extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist); 87 * tfrc_rx_hist_index - index to reach n-th entry after loss_start
69 88 */
70static inline struct dccp_tx_hist_entry * 89static inline u8 tfrc_rx_hist_index(const struct tfrc_rx_hist *h, const u8 n)
71 dccp_tx_hist_entry_new(struct dccp_tx_hist *hist,
72 const gfp_t prio)
73{ 90{
74 struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab, 91 return (h->loss_start + n) & TFRC_NDUPACK;
75 prio);
76
77 if (entry != NULL)
78 entry->dccphtx_sent = 0;
79
80 return entry;
81} 92}
82 93
83static inline struct dccp_tx_hist_entry * 94/**
84 dccp_tx_hist_head(struct list_head *list) 95 * tfrc_rx_hist_last_rcv - entry with highest-received-seqno so far
96 */
97static inline struct tfrc_rx_hist_entry *
98 tfrc_rx_hist_last_rcv(const struct tfrc_rx_hist *h)
85{ 99{
86 struct dccp_tx_hist_entry *head = NULL; 100 return h->ring[tfrc_rx_hist_index(h, h->loss_count)];
87
88 if (!list_empty(list))
89 head = list_entry(list->next, struct dccp_tx_hist_entry,
90 dccphtx_node);
91 return head;
92} 101}
93 102
94extern struct dccp_tx_hist_entry * 103/**
95 dccp_tx_hist_find_entry(const struct list_head *list, 104 * tfrc_rx_hist_entry - return the n-th history entry after loss_start
96 const u64 seq); 105 */
97 106static inline struct tfrc_rx_hist_entry *
98static inline void dccp_tx_hist_add_entry(struct list_head *list, 107 tfrc_rx_hist_entry(const struct tfrc_rx_hist *h, const u8 n)
99 struct dccp_tx_hist_entry *entry)
100{ 108{
101 list_add(&entry->dccphtx_node, list); 109 return h->ring[tfrc_rx_hist_index(h, n)];
102} 110}
103 111
104static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist, 112/**
105 struct dccp_tx_hist_entry *entry) 113 * tfrc_rx_hist_loss_prev - entry with highest-received-seqno before loss was detected
114 */
115static inline struct tfrc_rx_hist_entry *
116 tfrc_rx_hist_loss_prev(const struct tfrc_rx_hist *h)
106{ 117{
107 if (entry != NULL) 118 return h->ring[h->loss_start];
108 kmem_cache_free(hist->dccptxh_slab, entry);
109} 119}
110 120
111extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist, 121/* initialise loss detection and disable RTT sampling */
112 struct list_head *list); 122static inline void tfrc_rx_hist_loss_indicated(struct tfrc_rx_hist *h)
113
114extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
115 struct list_head *list,
116 struct dccp_tx_hist_entry *next);
117
118/*
119 * Receiver History data structures and declarations
120 */
121struct dccp_rx_hist_entry {
122 struct list_head dccphrx_node;
123 u64 dccphrx_seqno:48,
124 dccphrx_ccval:4,
125 dccphrx_type:4;
126 u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */
127 ktime_t dccphrx_tstamp;
128};
129
130struct dccp_rx_hist {
131 struct kmem_cache *dccprxh_slab;
132};
133
134extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name);
135extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist);
136
137static inline struct dccp_rx_hist_entry *
138 dccp_rx_hist_entry_new(struct dccp_rx_hist *hist,
139 const u32 ndp,
140 const struct sk_buff *skb,
141 const gfp_t prio)
142{ 123{
143 struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab, 124 h->loss_count = 1;
144 prio);
145
146 if (entry != NULL) {
147 const struct dccp_hdr *dh = dccp_hdr(skb);
148
149 entry->dccphrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
150 entry->dccphrx_ccval = dh->dccph_ccval;
151 entry->dccphrx_type = dh->dccph_type;
152 entry->dccphrx_ndp = ndp;
153 entry->dccphrx_tstamp = ktime_get_real();
154 }
155
156 return entry;
157} 125}
158 126
159static inline struct dccp_rx_hist_entry * 127/* indicate whether previously a packet was detected missing */
160 dccp_rx_hist_head(struct list_head *list) 128static inline int tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
161{ 129{
162 struct dccp_rx_hist_entry *head = NULL; 130 return h->loss_count;
163
164 if (!list_empty(list))
165 head = list_entry(list->next, struct dccp_rx_hist_entry,
166 dccphrx_node);
167 return head;
168} 131}
169 132
170extern int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, 133/* any data packets missing between last reception and skb ? */
171 u8 *ccval); 134static inline int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h,
172extern struct dccp_rx_hist_entry * 135 const struct sk_buff *skb,
173 dccp_rx_hist_find_data_packet(const struct list_head *list); 136 u32 ndp)
174
175extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
176 struct list_head *rx_list,
177 struct list_head *li_list,
178 struct dccp_rx_hist_entry *packet,
179 u64 nonloss_seqno);
180
181static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist,
182 struct dccp_rx_hist_entry *entry)
183{ 137{
184 if (entry != NULL) 138 int delta = dccp_delta_seqno(tfrc_rx_hist_last_rcv(h)->tfrchrx_seqno,
185 kmem_cache_free(hist->dccprxh_slab, entry); 139 DCCP_SKB_CB(skb)->dccpd_seq);
186}
187 140
188extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist, 141 if (delta > 1 && ndp < delta)
189 struct list_head *list); 142 tfrc_rx_hist_loss_indicated(h);
190 143
191static inline int 144 return tfrc_rx_hist_loss_pending(h);
192 dccp_rx_hist_entry_data_packet(const struct dccp_rx_hist_entry *entry)
193{
194 return entry->dccphrx_type == DCCP_PKT_DATA ||
195 entry->dccphrx_type == DCCP_PKT_DATAACK;
196} 145}
197 146
198extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, 147extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
199 struct list_head *li_list, u8 *win_loss); 148 const struct sk_buff *skb, const u32 ndp);
149
150extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
151
152struct tfrc_loss_hist;
153extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
154 struct tfrc_loss_hist *lh,
155 struct sk_buff *skb, u32 ndp,
156 u32 (*first_li)(struct sock *sk),
157 struct sock *sk);
158extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
159 const struct sk_buff *skb);
160extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h);
161extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h);
200 162
201#endif /* _DCCP_PKT_HIST_ */ 163#endif /* _DCCP_PKT_HIST_ */
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
new file mode 100644
index 000000000000..d1dfbb8de64c
--- /dev/null
+++ b/net/dccp/ccids/lib/tfrc.c
@@ -0,0 +1,63 @@
1/*
2 * TFRC: main module holding the pieces of the TFRC library together
3 *
4 * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
5 * Copyright (c) 2007 Arnaldo Carvalho de Melo <acme@redhat.com>
6 */
7#include <linux/module.h>
8#include <linux/moduleparam.h>
9#include "tfrc.h"
10
11#ifdef CONFIG_IP_DCCP_TFRC_DEBUG
12int tfrc_debug;
13module_param(tfrc_debug, bool, 0444);
14MODULE_PARM_DESC(tfrc_debug, "Enable debug messages");
15#endif
16
17extern int tfrc_tx_packet_history_init(void);
18extern void tfrc_tx_packet_history_exit(void);
19extern int tfrc_rx_packet_history_init(void);
20extern void tfrc_rx_packet_history_exit(void);
21
22extern int tfrc_li_init(void);
23extern void tfrc_li_exit(void);
24
25static int __init tfrc_module_init(void)
26{
27 int rc = tfrc_li_init();
28
29 if (rc)
30 goto out;
31
32 rc = tfrc_tx_packet_history_init();
33 if (rc)
34 goto out_free_loss_intervals;
35
36 rc = tfrc_rx_packet_history_init();
37 if (rc)
38 goto out_free_tx_history;
39 return 0;
40
41out_free_tx_history:
42 tfrc_tx_packet_history_exit();
43out_free_loss_intervals:
44 tfrc_li_exit();
45out:
46 return rc;
47}
48
49static void __exit tfrc_module_exit(void)
50{
51 tfrc_rx_packet_history_exit();
52 tfrc_tx_packet_history_exit();
53 tfrc_li_exit();
54}
55
56module_init(tfrc_module_init);
57module_exit(tfrc_module_exit);
58
59MODULE_AUTHOR("Gerrit Renker <gerrit@erg.abdn.ac.uk>, "
60 "Ian McDonald <ian.mcdonald@jandi.co.nz>, "
61 "Arnaldo Carvalho de Melo <acme@redhat.com>");
62MODULE_DESCRIPTION("DCCP TFRC library");
63MODULE_LICENSE("GPL");
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index faf5f7e219e3..1fb1187bbf1c 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -3,10 +3,11 @@
3/* 3/*
4 * net/dccp/ccids/lib/tfrc.h 4 * net/dccp/ccids/lib/tfrc.h
5 * 5 *
6 * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. 6 * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
7 * Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz> 7 * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
8 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> 8 * Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
9 * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon 9 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
10 * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
10 * 11 *
11 * This program is free software; you can redistribute it and/or modify 12 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by 13 * it under the terms of the GNU General Public License as published by
@@ -15,6 +16,17 @@
15 */ 16 */
16#include <linux/types.h> 17#include <linux/types.h>
17#include <asm/div64.h> 18#include <asm/div64.h>
19#include "../../dccp.h"
20/* internal includes that this module exports: */
21#include "loss_interval.h"
22#include "packet_history.h"
23
24#ifdef CONFIG_IP_DCCP_TFRC_DEBUG
25extern int tfrc_debug;
26#define tfrc_pr_debug(format, a...) DCCP_PR_DEBUG(tfrc_debug, format, ##a)
27#else
28#define tfrc_pr_debug(format, a...)
29#endif
18 30
19/* integer-arithmetic divisions of type (a * 1000000)/b */ 31/* integer-arithmetic divisions of type (a * 1000000)/b */
20static inline u64 scaled_div(u64 a, u32 b) 32static inline u64 scaled_div(u64 a, u32 b)
@@ -37,6 +49,15 @@ static inline u32 scaled_div32(u64 a, u32 b)
37 return result; 49 return result;
38} 50}
39 51
52/**
53 * tfrc_ewma - Exponentially weighted moving average
54 * @weight: Weight to be used as damping factor, in units of 1/10
55 */
56static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight)
57{
58 return avg ? (weight * avg + (10 - weight) * newval) / 10 : newval;
59}
60
40extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); 61extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
41extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); 62extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
42 63
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index ee97950d77d1..ebe59d98721a 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -72,11 +72,21 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
72/* RFC 1122, 4.2.3.1 initial RTO value */ 72/* RFC 1122, 4.2.3.1 initial RTO value */
73#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ)) 73#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
74 74
75#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */ 75/*
76 * The maximum back-off value for retransmissions. This is needed for
77 * - retransmitting client-Requests (sec. 8.1.1),
78 * - retransmitting Close/CloseReq when closing (sec. 8.3),
79 * - feature-negotiation retransmission (sec. 6.6.3),
80 * - Acks in client-PARTOPEN state (sec. 8.1.5).
81 */
82#define DCCP_RTO_MAX ((unsigned)(64 * HZ))
76 83
77/* bounds for sampled RTT values from packet exchanges (in usec) */ 84/*
85 * RTT sampling: sanity bounds and fallback RTT value from RFC 4340, section 3.4
86 */
78#define DCCP_SANE_RTT_MIN 100 87#define DCCP_SANE_RTT_MIN 100
79#define DCCP_SANE_RTT_MAX (4 * USEC_PER_SEC) 88#define DCCP_FALLBACK_RTT (USEC_PER_SEC / 5)
89#define DCCP_SANE_RTT_MAX (3 * USEC_PER_SEC)
80 90
81/* Maximal interval between probes for local resources. */ 91/* Maximal interval between probes for local resources. */
82#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U)) 92#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U))
@@ -143,12 +153,6 @@ static inline u64 max48(const u64 seq1, const u64 seq2)
143 return after48(seq1, seq2) ? seq1 : seq2; 153 return after48(seq1, seq2) ? seq1 : seq2;
144} 154}
145 155
146/* is seq1 next seqno after seq2 */
147static inline int follows48(const u64 seq1, const u64 seq2)
148{
149 return dccp_delta_seqno(seq2, seq1) == 1;
150}
151
152enum { 156enum {
153 DCCP_MIB_NUM = 0, 157 DCCP_MIB_NUM = 0,
154 DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */ 158 DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */
@@ -334,6 +338,7 @@ struct dccp_skb_cb {
334 338
335#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) 339#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0]))
336 340
341/* RFC 4340, sec. 7.7 */
337static inline int dccp_non_data_packet(const struct sk_buff *skb) 342static inline int dccp_non_data_packet(const struct sk_buff *skb)
338{ 343{
339 const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; 344 const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
@@ -346,6 +351,17 @@ static inline int dccp_non_data_packet(const struct sk_buff *skb)
346 type == DCCP_PKT_SYNCACK; 351 type == DCCP_PKT_SYNCACK;
347} 352}
348 353
354/* RFC 4340, sec. 7.7 */
355static inline int dccp_data_packet(const struct sk_buff *skb)
356{
357 const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
358
359 return type == DCCP_PKT_DATA ||
360 type == DCCP_PKT_DATAACK ||
361 type == DCCP_PKT_REQUEST ||
362 type == DCCP_PKT_RESPONSE;
363}
364
349static inline int dccp_packet_without_ack(const struct sk_buff *skb) 365static inline int dccp_packet_without_ack(const struct sk_buff *skb)
350{ 366{
351 const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; 367 const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
@@ -406,6 +422,7 @@ static inline int dccp_ack_pending(const struct sock *sk)
406} 422}
407 423
408extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb); 424extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
425extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*);
409extern int dccp_insert_option_elapsed_time(struct sock *sk, 426extern int dccp_insert_option_elapsed_time(struct sock *sk,
410 struct sk_buff *skb, 427 struct sk_buff *skb,
411 u32 elapsed_time); 428 u32 elapsed_time);
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 5ebdd86c1b99..4a4f6ce4498d 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -4,10 +4,16 @@
4 * An implementation of the DCCP protocol 4 * An implementation of the DCCP protocol
5 * Andrea Bittau <a.bittau@cs.ucl.ac.uk> 5 * Andrea Bittau <a.bittau@cs.ucl.ac.uk>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * ASSUMPTIONS
8 * modify it under the terms of the GNU General Public License 8 * -----------
9 * as published by the Free Software Foundation; either version 9 * o All currently known SP features have 1-byte quantities. If in the future
10 * 2 of the License, or (at your option) any later version. 10 * extensions of RFCs 4340..42 define features with item lengths larger than
11 * one byte, a feature-specific extension of the code will be required.
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version
16 * 2 of the License, or (at your option) any later version.
11 */ 17 */
12 18
13#include <linux/module.h> 19#include <linux/module.h>
@@ -24,11 +30,7 @@ int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
24 30
25 dccp_feat_debug(type, feature, *val); 31 dccp_feat_debug(type, feature, *val);
26 32
27 if (!dccp_feat_is_valid_type(type)) { 33 if (len > 3) {
28 DCCP_WARN("option type %d invalid in negotiation\n", type);
29 return 1;
30 }
31 if (!dccp_feat_is_valid_length(type, feature, len)) {
32 DCCP_WARN("invalid length %d\n", len); 34 DCCP_WARN("invalid length %d\n", len);
33 return 1; 35 return 1;
34 } 36 }
@@ -99,7 +101,6 @@ static int dccp_feat_update_ccid(struct sock *sk, u8 type, u8 new_ccid_nr)
99 return 0; 101 return 0;
100} 102}
101 103
102/* XXX taking only u8 vals */
103static int dccp_feat_update(struct sock *sk, u8 type, u8 feat, u8 val) 104static int dccp_feat_update(struct sock *sk, u8 type, u8 feat, u8 val)
104{ 105{
105 dccp_feat_debug(type, feat, val); 106 dccp_feat_debug(type, feat, val);
@@ -144,7 +145,6 @@ static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt,
144 /* FIXME sanity check vals */ 145 /* FIXME sanity check vals */
145 146
146 /* Are values in any order? XXX Lame "algorithm" here */ 147 /* Are values in any order? XXX Lame "algorithm" here */
147 /* XXX assume values are 1 byte */
148 for (i = 0; i < slen; i++) { 148 for (i = 0; i < slen; i++) {
149 for (j = 0; j < rlen; j++) { 149 for (j = 0; j < rlen; j++) {
150 if (spref[i] == rpref[j]) { 150 if (spref[i] == rpref[j]) {
@@ -179,7 +179,6 @@ static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt,
179 } 179 }
180 180
181 /* need to put result and our preference list */ 181 /* need to put result and our preference list */
182 /* XXX assume 1 byte vals */
183 rlen = 1 + opt->dccpop_len; 182 rlen = 1 + opt->dccpop_len;
184 rpref = kmalloc(rlen, GFP_ATOMIC); 183 rpref = kmalloc(rlen, GFP_ATOMIC);
185 if (rpref == NULL) 184 if (rpref == NULL)
@@ -637,12 +636,12 @@ const char *dccp_feat_name(const u8 feat)
637 [DCCPF_MIN_CSUM_COVER] = "Min. Csum Coverage", 636 [DCCPF_MIN_CSUM_COVER] = "Min. Csum Coverage",
638 [DCCPF_DATA_CHECKSUM] = "Send Data Checksum", 637 [DCCPF_DATA_CHECKSUM] = "Send Data Checksum",
639 }; 638 };
639 if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC)
640 return feature_names[DCCPF_RESERVED];
641
640 if (feat >= DCCPF_MIN_CCID_SPECIFIC) 642 if (feat >= DCCPF_MIN_CCID_SPECIFIC)
641 return "CCID-specific"; 643 return "CCID-specific";
642 644
643 if (dccp_feat_is_reserved(feat))
644 return feature_names[DCCPF_RESERVED];
645
646 return feature_names[feat]; 645 return feature_names[feat];
647} 646}
648 647
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index 177f7dee4d10..e272222c7ace 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -14,32 +14,6 @@
14#include <linux/types.h> 14#include <linux/types.h>
15#include "dccp.h" 15#include "dccp.h"
16 16
17static inline int dccp_feat_is_valid_length(u8 type, u8 feature, u8 len)
18{
19 /* sec. 6.1: Confirm has at least length 3,
20 * sec. 6.2: Change has at least length 4 */
21 if (len < 3)
22 return 1;
23 if (len < 4 && (type == DCCPO_CHANGE_L || type == DCCPO_CHANGE_R))
24 return 1;
25 /* XXX: add per-feature length validation (sec. 6.6.8) */
26 return 0;
27}
28
29static inline int dccp_feat_is_reserved(const u8 feat)
30{
31 return (feat > DCCPF_DATA_CHECKSUM &&
32 feat < DCCPF_MIN_CCID_SPECIFIC) ||
33 feat == DCCPF_RESERVED;
34}
35
36/* feature negotiation knows only these four option types (RFC 4340, sec. 6) */
37static inline int dccp_feat_is_valid_type(const u8 optnum)
38{
39 return optnum >= DCCPO_CHANGE_L && optnum <= DCCPO_CONFIRM_R;
40
41}
42
43#ifdef CONFIG_IP_DCCP_DEBUG 17#ifdef CONFIG_IP_DCCP_DEBUG
44extern const char *dccp_feat_typename(const u8 type); 18extern const char *dccp_feat_typename(const u8 type);
45extern const char *dccp_feat_name(const u8 feat); 19extern const char *dccp_feat_name(const u8 feat);
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 1ce101062824..08392ed86c25 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -22,26 +22,77 @@
22/* rate-limit for syncs in reply to sequence-invalid packets; RFC 4340, 7.5.4 */ 22/* rate-limit for syncs in reply to sequence-invalid packets; RFC 4340, 7.5.4 */
23int sysctl_dccp_sync_ratelimit __read_mostly = HZ / 8; 23int sysctl_dccp_sync_ratelimit __read_mostly = HZ / 8;
24 24
25static void dccp_fin(struct sock *sk, struct sk_buff *skb) 25static void dccp_enqueue_skb(struct sock *sk, struct sk_buff *skb)
26{ 26{
27 sk->sk_shutdown |= RCV_SHUTDOWN;
28 sock_set_flag(sk, SOCK_DONE);
29 __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4); 27 __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4);
30 __skb_queue_tail(&sk->sk_receive_queue, skb); 28 __skb_queue_tail(&sk->sk_receive_queue, skb);
31 skb_set_owner_r(skb, sk); 29 skb_set_owner_r(skb, sk);
32 sk->sk_data_ready(sk, 0); 30 sk->sk_data_ready(sk, 0);
33} 31}
34 32
35static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb) 33static void dccp_fin(struct sock *sk, struct sk_buff *skb)
36{ 34{
37 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED); 35 /*
38 dccp_fin(sk, skb); 36 * On receiving Close/CloseReq, both RD/WR shutdown are performed.
39 dccp_set_state(sk, DCCP_CLOSED); 37 * RFC 4340, 8.3 says that we MAY send further Data/DataAcks after
40 sk_wake_async(sk, 1, POLL_HUP); 38 * receiving the closing segment, but there is no guarantee that such
39 * data will be processed at all.
40 */
41 sk->sk_shutdown = SHUTDOWN_MASK;
42 sock_set_flag(sk, SOCK_DONE);
43 dccp_enqueue_skb(sk, skb);
44}
45
46static int dccp_rcv_close(struct sock *sk, struct sk_buff *skb)
47{
48 int queued = 0;
49
50 switch (sk->sk_state) {
51 /*
52 * We ignore Close when received in one of the following states:
53 * - CLOSED (may be a late or duplicate packet)
54 * - PASSIVE_CLOSEREQ (the peer has sent a CloseReq earlier)
55 * - RESPOND (already handled by dccp_check_req)
56 */
57 case DCCP_CLOSING:
58 /*
59 * Simultaneous-close: receiving a Close after sending one. This
60 * can happen if both client and server perform active-close and
61 * will result in an endless ping-pong of crossing and retrans-
62 * mitted Close packets, which only terminates when one of the
63 * nodes times out (min. 64 seconds). Quicker convergence can be
64 * achieved when one of the nodes acts as tie-breaker.
65 * This is ok as both ends are done with data transfer and each
66 * end is just waiting for the other to acknowledge termination.
67 */
68 if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT)
69 break;
70 /* fall through */
71 case DCCP_REQUESTING:
72 case DCCP_ACTIVE_CLOSEREQ:
73 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
74 dccp_done(sk);
75 break;
76 case DCCP_OPEN:
77 case DCCP_PARTOPEN:
78 /* Give waiting application a chance to read pending data */
79 queued = 1;
80 dccp_fin(sk, skb);
81 dccp_set_state(sk, DCCP_PASSIVE_CLOSE);
82 /* fall through */
83 case DCCP_PASSIVE_CLOSE:
84 /*
85 * Retransmitted Close: we have already enqueued the first one.
86 */
87 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
88 }
89 return queued;
41} 90}
42 91
43static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) 92static int dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
44{ 93{
94 int queued = 0;
95
45 /* 96 /*
46 * Step 7: Check for unexpected packet types 97 * Step 7: Check for unexpected packet types
47 * If (S.is_server and P.type == CloseReq) 98 * If (S.is_server and P.type == CloseReq)
@@ -50,12 +101,26 @@ static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
50 */ 101 */
51 if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) { 102 if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) {
52 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); 103 dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC);
53 return; 104 return queued;
54 } 105 }
55 106
56 if (sk->sk_state != DCCP_CLOSING) 107 /* Step 13: process relevant Client states < CLOSEREQ */
108 switch (sk->sk_state) {
109 case DCCP_REQUESTING:
110 dccp_send_close(sk, 0);
57 dccp_set_state(sk, DCCP_CLOSING); 111 dccp_set_state(sk, DCCP_CLOSING);
58 dccp_send_close(sk, 0); 112 break;
113 case DCCP_OPEN:
114 case DCCP_PARTOPEN:
115 /* Give waiting application a chance to read pending data */
116 queued = 1;
117 dccp_fin(sk, skb);
118 dccp_set_state(sk, DCCP_PASSIVE_CLOSEREQ);
119 /* fall through */
120 case DCCP_PASSIVE_CLOSEREQ:
121 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
122 }
123 return queued;
59} 124}
60 125
61static u8 dccp_reset_code_convert(const u8 code) 126static u8 dccp_reset_code_convert(const u8 code)
@@ -90,7 +155,7 @@ static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb)
90 dccp_fin(sk, skb); 155 dccp_fin(sk, skb);
91 156
92 if (err && !sock_flag(sk, SOCK_DEAD)) 157 if (err && !sock_flag(sk, SOCK_DEAD))
93 sk_wake_async(sk, 0, POLL_ERR); 158 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
94 dccp_time_wait(sk, DCCP_TIME_WAIT, 0); 159 dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
95} 160}
96 161
@@ -103,6 +168,21 @@ static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
103 DCCP_SKB_CB(skb)->dccpd_ack_seq); 168 DCCP_SKB_CB(skb)->dccpd_ack_seq);
104} 169}
105 170
171static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb)
172{
173 const struct dccp_sock *dp = dccp_sk(sk);
174
175 /* Don't deliver to RX CCID when node has shut down read end. */
176 if (!(sk->sk_shutdown & RCV_SHUTDOWN))
177 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
178 /*
179 * Until the TX queue has been drained, we can not honour SHUT_WR, since
180 * we need received feedback as input to adjust congestion control.
181 */
182 if (sk->sk_write_queue.qlen > 0 || !(sk->sk_shutdown & SEND_SHUTDOWN))
183 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
184}
185
106static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) 186static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
107{ 187{
108 const struct dccp_hdr *dh = dccp_hdr(skb); 188 const struct dccp_hdr *dh = dccp_hdr(skb);
@@ -209,13 +289,11 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
209 case DCCP_PKT_DATAACK: 289 case DCCP_PKT_DATAACK:
210 case DCCP_PKT_DATA: 290 case DCCP_PKT_DATA:
211 /* 291 /*
212 * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED 292 * FIXME: schedule DATA_DROPPED (RFC 4340, 11.7.2) if and when
213 * option if it is. 293 * - sk_shutdown == RCV_SHUTDOWN, use Code 1, "Not Listening"
294 * - sk_receive_queue is full, use Code 2, "Receive Buffer"
214 */ 295 */
215 __skb_pull(skb, dh->dccph_doff * 4); 296 dccp_enqueue_skb(sk, skb);
216 __skb_queue_tail(&sk->sk_receive_queue, skb);
217 skb_set_owner_r(skb, sk);
218 sk->sk_data_ready(sk, 0);
219 return 0; 297 return 0;
220 case DCCP_PKT_ACK: 298 case DCCP_PKT_ACK:
221 goto discard; 299 goto discard;
@@ -231,11 +309,13 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
231 dccp_rcv_reset(sk, skb); 309 dccp_rcv_reset(sk, skb);
232 return 0; 310 return 0;
233 case DCCP_PKT_CLOSEREQ: 311 case DCCP_PKT_CLOSEREQ:
234 dccp_rcv_closereq(sk, skb); 312 if (dccp_rcv_closereq(sk, skb))
313 return 0;
235 goto discard; 314 goto discard;
236 case DCCP_PKT_CLOSE: 315 case DCCP_PKT_CLOSE:
237 dccp_rcv_close(sk, skb); 316 if (dccp_rcv_close(sk, skb))
238 return 0; 317 return 0;
318 goto discard;
239 case DCCP_PKT_REQUEST: 319 case DCCP_PKT_REQUEST:
240 /* Step 7 320 /* Step 7
241 * or (S.is_server and P.type == Response) 321 * or (S.is_server and P.type == Response)
@@ -289,7 +369,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
289 if (dccp_check_seqno(sk, skb)) 369 if (dccp_check_seqno(sk, skb))
290 goto discard; 370 goto discard;
291 371
292 if (dccp_parse_options(sk, skb)) 372 if (dccp_parse_options(sk, NULL, skb))
293 goto discard; 373 goto discard;
294 374
295 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) 375 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
@@ -300,9 +380,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
300 DCCP_SKB_CB(skb)->dccpd_seq, 380 DCCP_SKB_CB(skb)->dccpd_seq,
301 DCCP_ACKVEC_STATE_RECEIVED)) 381 DCCP_ACKVEC_STATE_RECEIVED))
302 goto discard; 382 goto discard;
303 383 dccp_deliver_input_to_ccids(sk, skb);
304 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
305 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
306 384
307 return __dccp_rcv_established(sk, skb, dh, len); 385 return __dccp_rcv_established(sk, skb, dh, len);
308discard: 386discard:
@@ -349,7 +427,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
349 goto out_invalid_packet; 427 goto out_invalid_packet;
350 } 428 }
351 429
352 if (dccp_parse_options(sk, skb)) 430 if (dccp_parse_options(sk, NULL, skb))
353 goto out_invalid_packet; 431 goto out_invalid_packet;
354 432
355 /* Obtain usec RTT sample from SYN exchange (used by CCID 3) */ 433 /* Obtain usec RTT sample from SYN exchange (used by CCID 3) */
@@ -402,7 +480,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
402 480
403 if (!sock_flag(sk, SOCK_DEAD)) { 481 if (!sock_flag(sk, SOCK_DEAD)) {
404 sk->sk_state_change(sk); 482 sk->sk_state_change(sk);
405 sk_wake_async(sk, 0, POLL_OUT); 483 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
406 } 484 }
407 485
408 if (sk->sk_write_pending || icsk->icsk_ack.pingpong || 486 if (sk->sk_write_pending || icsk->icsk_ack.pingpong ||
@@ -531,7 +609,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
531 /* 609 /*
532 * Step 8: Process options and mark acknowledgeable 610 * Step 8: Process options and mark acknowledgeable
533 */ 611 */
534 if (dccp_parse_options(sk, skb)) 612 if (dccp_parse_options(sk, NULL, skb))
535 goto discard; 613 goto discard;
536 614
537 if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) 615 if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
@@ -543,8 +621,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
543 DCCP_ACKVEC_STATE_RECEIVED)) 621 DCCP_ACKVEC_STATE_RECEIVED))
544 goto discard; 622 goto discard;
545 623
546 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); 624 dccp_deliver_input_to_ccids(sk, skb);
547 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
548 } 625 }
549 626
550 /* 627 /*
@@ -560,16 +637,14 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
560 return 0; 637 return 0;
561 /* 638 /*
562 * Step 7: Check for unexpected packet types 639 * Step 7: Check for unexpected packet types
563 * If (S.is_server and P.type == CloseReq) 640 * If (S.is_server and P.type == Response)
564 * or (S.is_server and P.type == Response)
565 * or (S.is_client and P.type == Request) 641 * or (S.is_client and P.type == Request)
566 * or (S.state == RESPOND and P.type == Data), 642 * or (S.state == RESPOND and P.type == Data),
567 * Send Sync packet acknowledging P.seqno 643 * Send Sync packet acknowledging P.seqno
568 * Drop packet and return 644 * Drop packet and return
569 */ 645 */
570 } else if ((dp->dccps_role != DCCP_ROLE_CLIENT && 646 } else if ((dp->dccps_role != DCCP_ROLE_CLIENT &&
571 (dh->dccph_type == DCCP_PKT_RESPONSE || 647 dh->dccph_type == DCCP_PKT_RESPONSE) ||
572 dh->dccph_type == DCCP_PKT_CLOSEREQ)) ||
573 (dp->dccps_role == DCCP_ROLE_CLIENT && 648 (dp->dccps_role == DCCP_ROLE_CLIENT &&
574 dh->dccph_type == DCCP_PKT_REQUEST) || 649 dh->dccph_type == DCCP_PKT_REQUEST) ||
575 (sk->sk_state == DCCP_RESPOND && 650 (sk->sk_state == DCCP_RESPOND &&
@@ -577,11 +652,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
577 dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); 652 dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC);
578 goto discard; 653 goto discard;
579 } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { 654 } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) {
580 dccp_rcv_closereq(sk, skb); 655 if (dccp_rcv_closereq(sk, skb))
656 return 0;
581 goto discard; 657 goto discard;
582 } else if (dh->dccph_type == DCCP_PKT_CLOSE) { 658 } else if (dh->dccph_type == DCCP_PKT_CLOSE) {
583 dccp_rcv_close(sk, skb); 659 if (dccp_rcv_close(sk, skb))
584 return 0; 660 return 0;
661 goto discard;
585 } 662 }
586 663
587 switch (sk->sk_state) { 664 switch (sk->sk_state) {
@@ -611,7 +688,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
611 switch (old_state) { 688 switch (old_state) {
612 case DCCP_PARTOPEN: 689 case DCCP_PARTOPEN:
613 sk->sk_state_change(sk); 690 sk->sk_state_change(sk);
614 sk_wake_async(sk, 0, POLL_OUT); 691 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
615 break; 692 break;
616 } 693 }
617 } else if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { 694 } else if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) {
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index db17b83e8d3e..9e38b0d6195c 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -408,7 +408,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
408 408
409 dccp_sync_mss(newsk, dst_mtu(dst)); 409 dccp_sync_mss(newsk, dst_mtu(dst));
410 410
411 __inet_hash(&dccp_hashinfo, newsk, 0); 411 __inet_hash_nolisten(&dccp_hashinfo, newsk);
412 __inet_inherit_port(&dccp_hashinfo, sk, newsk); 412 __inet_inherit_port(&dccp_hashinfo, sk, newsk);
413 413
414 return newsk; 414 return newsk;
@@ -469,7 +469,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
469 }; 469 };
470 470
471 security_skb_classify_flow(skb, &fl); 471 security_skb_classify_flow(skb, &fl);
472 if (ip_route_output_flow(&rt, &fl, sk, 0)) { 472 if (ip_route_output_flow(&init_net, &rt, &fl, sk, 0)) {
473 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 473 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
474 return NULL; 474 return NULL;
475 } 475 }
@@ -600,11 +600,12 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
600 if (req == NULL) 600 if (req == NULL)
601 goto drop; 601 goto drop;
602 602
603 if (dccp_parse_options(sk, skb))
604 goto drop_and_free;
605
606 dccp_reqsk_init(req, skb); 603 dccp_reqsk_init(req, skb);
607 604
605 dreq = dccp_rsk(req);
606 if (dccp_parse_options(sk, dreq, skb))
607 goto drop_and_free;
608
608 if (security_inet_conn_request(sk, skb, req)) 609 if (security_inet_conn_request(sk, skb, req))
609 goto drop_and_free; 610 goto drop_and_free;
610 611
@@ -621,7 +622,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
621 * In fact we defer setting S.GSR, S.SWL, S.SWH to 622 * In fact we defer setting S.GSR, S.SWL, S.SWH to
622 * dccp_create_openreq_child. 623 * dccp_create_openreq_child.
623 */ 624 */
624 dreq = dccp_rsk(req);
625 dreq->dreq_isr = dcb->dccpd_seq; 625 dreq->dreq_isr = dcb->dccpd_seq;
626 dreq->dreq_iss = dccp_v4_init_sequence(skb); 626 dreq->dreq_iss = dccp_v4_init_sequence(skb);
627 dreq->dreq_service = service; 627 dreq->dreq_service = service;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 87c98fb86fa8..f42b75ce7f5c 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -415,11 +415,12 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
415 if (req == NULL) 415 if (req == NULL)
416 goto drop; 416 goto drop;
417 417
418 if (dccp_parse_options(sk, skb))
419 goto drop_and_free;
420
421 dccp_reqsk_init(req, skb); 418 dccp_reqsk_init(req, skb);
422 419
420 dreq = dccp_rsk(req);
421 if (dccp_parse_options(sk, dreq, skb))
422 goto drop_and_free;
423
423 if (security_inet_conn_request(sk, skb, req)) 424 if (security_inet_conn_request(sk, skb, req))
424 goto drop_and_free; 425 goto drop_and_free;
425 426
@@ -449,7 +450,6 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
449 * In fact we defer setting S.GSR, S.SWL, S.SWH to 450 * In fact we defer setting S.GSR, S.SWL, S.SWH to
450 * dccp_create_openreq_child. 451 * dccp_create_openreq_child.
451 */ 452 */
452 dreq = dccp_rsk(req);
453 dreq->dreq_isr = dcb->dccpd_seq; 453 dreq->dreq_isr = dcb->dccpd_seq;
454 dreq->dreq_iss = dccp_v6_init_sequence(skb); 454 dreq->dreq_iss = dccp_v6_init_sequence(skb);
455 dreq->dreq_service = service; 455 dreq->dreq_service = service;
@@ -994,7 +994,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
994 if (final_p) 994 if (final_p)
995 ipv6_addr_copy(&fl.fl6_dst, final_p); 995 ipv6_addr_copy(&fl.fl6_dst, final_p);
996 996
997 err = __xfrm_lookup(&dst, &fl, sk, 1); 997 err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT);
998 if (err < 0) { 998 if (err < 0) {
999 if (err == -EREMOTE) 999 if (err == -EREMOTE)
1000 err = ip6_dst_blackhole(sk, &dst, &fl); 1000 err = ip6_dst_blackhole(sk, &dst, &fl);
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 831b76e08d02..027d1814e1ab 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -117,11 +117,13 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
117 struct dccp_sock *newdp = dccp_sk(newsk); 117 struct dccp_sock *newdp = dccp_sk(newsk);
118 struct dccp_minisock *newdmsk = dccp_msk(newsk); 118 struct dccp_minisock *newdmsk = dccp_msk(newsk);
119 119
120 newdp->dccps_role = DCCP_ROLE_SERVER; 120 newdp->dccps_role = DCCP_ROLE_SERVER;
121 newdp->dccps_hc_rx_ackvec = NULL; 121 newdp->dccps_hc_rx_ackvec = NULL;
122 newdp->dccps_service_list = NULL; 122 newdp->dccps_service_list = NULL;
123 newdp->dccps_service = dreq->dreq_service; 123 newdp->dccps_service = dreq->dreq_service;
124 newicsk->icsk_rto = DCCP_TIMEOUT_INIT; 124 newdp->dccps_timestamp_echo = dreq->dreq_timestamp_echo;
125 newdp->dccps_timestamp_time = dreq->dreq_timestamp_time;
126 newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
125 127
126 if (dccp_feat_clone(sk, newsk)) 128 if (dccp_feat_clone(sk, newsk))
127 goto out_free; 129 goto out_free;
@@ -200,10 +202,10 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
200 struct request_sock **prev) 202 struct request_sock **prev)
201{ 203{
202 struct sock *child = NULL; 204 struct sock *child = NULL;
205 struct dccp_request_sock *dreq = dccp_rsk(req);
203 206
204 /* Check for retransmitted REQUEST */ 207 /* Check for retransmitted REQUEST */
205 if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { 208 if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
206 struct dccp_request_sock *dreq = dccp_rsk(req);
207 209
208 if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dreq->dreq_isr)) { 210 if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dreq->dreq_isr)) {
209 dccp_pr_debug("Retransmitted REQUEST\n"); 211 dccp_pr_debug("Retransmitted REQUEST\n");
@@ -227,22 +229,22 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
227 goto drop; 229 goto drop;
228 230
229 /* Invalid ACK */ 231 /* Invalid ACK */
230 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) { 232 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dreq->dreq_iss) {
231 dccp_pr_debug("Invalid ACK number: ack_seq=%llu, " 233 dccp_pr_debug("Invalid ACK number: ack_seq=%llu, "
232 "dreq_iss=%llu\n", 234 "dreq_iss=%llu\n",
233 (unsigned long long) 235 (unsigned long long)
234 DCCP_SKB_CB(skb)->dccpd_ack_seq, 236 DCCP_SKB_CB(skb)->dccpd_ack_seq,
235 (unsigned long long) 237 (unsigned long long) dreq->dreq_iss);
236 dccp_rsk(req)->dreq_iss);
237 goto drop; 238 goto drop;
238 } 239 }
239 240
241 if (dccp_parse_options(sk, dreq, skb))
242 goto drop;
243
240 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); 244 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
241 if (child == NULL) 245 if (child == NULL)
242 goto listen_overflow; 246 goto listen_overflow;
243 247
244 /* FIXME: deal with options */
245
246 inet_csk_reqsk_queue_unlink(sk, req, prev); 248 inet_csk_reqsk_queue_unlink(sk, req, prev);
247 inet_csk_reqsk_queue_removed(sk, req); 249 inet_csk_reqsk_queue_removed(sk, req);
248 inet_csk_reqsk_queue_add(sk, req, child); 250 inet_csk_reqsk_queue_add(sk, req, child);
@@ -303,9 +305,12 @@ EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack);
303 305
304void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb) 306void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb)
305{ 307{
306 inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; 308 struct dccp_request_sock *dreq = dccp_rsk(req);
307 inet_rsk(req)->acked = 0; 309
308 req->rcv_wnd = sysctl_dccp_feat_sequence_window; 310 inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
311 inet_rsk(req)->acked = 0;
312 req->rcv_wnd = sysctl_dccp_feat_sequence_window;
313 dreq->dreq_timestamp_echo = 0;
309} 314}
310 315
311EXPORT_SYMBOL_GPL(dccp_reqsk_init); 316EXPORT_SYMBOL_GPL(dccp_reqsk_init);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index d286cffe2c49..d2a84a2fecee 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -46,7 +46,13 @@ static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
46 return value; 46 return value;
47} 47}
48 48
49int dccp_parse_options(struct sock *sk, struct sk_buff *skb) 49/**
50 * dccp_parse_options - Parse DCCP options present in @skb
51 * @sk: client|server|listening dccp socket (when @dreq != NULL)
52 * @dreq: request socket to use during connection setup, or NULL
53 */
54int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
55 struct sk_buff *skb)
50{ 56{
51 struct dccp_sock *dp = dccp_sk(sk); 57 struct dccp_sock *dp = dccp_sk(sk);
52 const struct dccp_hdr *dh = dccp_hdr(skb); 58 const struct dccp_hdr *dh = dccp_hdr(skb);
@@ -92,6 +98,20 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
92 goto out_invalid_option; 98 goto out_invalid_option;
93 } 99 }
94 100
101 /*
102 * CCID-Specific Options (from RFC 4340, sec. 10.3):
103 *
104 * Option numbers 128 through 191 are for options sent from the
105 * HC-Sender to the HC-Receiver; option numbers 192 through 255
106 * are for options sent from the HC-Receiver to the HC-Sender.
107 *
108 * CCID-specific options are ignored during connection setup, as
109 * negotiation may still be in progress (see RFC 4340, 10.3).
110 *
111 */
112 if (dreq != NULL && opt >= 128)
113 goto ignore_option;
114
95 switch (opt) { 115 switch (opt) {
96 case DCCPO_PADDING: 116 case DCCPO_PADDING:
97 break; 117 break;
@@ -112,6 +132,8 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
112 case DCCPO_CHANGE_L: 132 case DCCPO_CHANGE_L:
113 /* fall through */ 133 /* fall through */
114 case DCCPO_CHANGE_R: 134 case DCCPO_CHANGE_R:
135 if (pkt_type == DCCP_PKT_DATA)
136 break;
115 if (len < 2) 137 if (len < 2)
116 goto out_invalid_option; 138 goto out_invalid_option;
117 rc = dccp_feat_change_recv(sk, opt, *value, value + 1, 139 rc = dccp_feat_change_recv(sk, opt, *value, value + 1,
@@ -128,7 +150,9 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
128 case DCCPO_CONFIRM_L: 150 case DCCPO_CONFIRM_L:
129 /* fall through */ 151 /* fall through */
130 case DCCPO_CONFIRM_R: 152 case DCCPO_CONFIRM_R:
131 if (len < 2) 153 if (pkt_type == DCCP_PKT_DATA)
154 break;
155 if (len < 2) /* FIXME this disallows empty confirm */
132 goto out_invalid_option; 156 goto out_invalid_option;
133 if (dccp_feat_confirm_recv(sk, opt, *value, 157 if (dccp_feat_confirm_recv(sk, opt, *value,
134 value + 1, len - 1)) 158 value + 1, len - 1))
@@ -136,7 +160,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
136 break; 160 break;
137 case DCCPO_ACK_VECTOR_0: 161 case DCCPO_ACK_VECTOR_0:
138 case DCCPO_ACK_VECTOR_1: 162 case DCCPO_ACK_VECTOR_1:
139 if (pkt_type == DCCP_PKT_DATA) 163 if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */
140 break; 164 break;
141 165
142 if (dccp_msk(sk)->dccpms_send_ack_vector && 166 if (dccp_msk(sk)->dccpms_send_ack_vector &&
@@ -146,15 +170,27 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
146 case DCCPO_TIMESTAMP: 170 case DCCPO_TIMESTAMP:
147 if (len != 4) 171 if (len != 4)
148 goto out_invalid_option; 172 goto out_invalid_option;
149 173 /*
174 * RFC 4340 13.1: "The precise time corresponding to
175 * Timestamp Value zero is not specified". We use
176 * zero to indicate absence of a meaningful timestamp.
177 */
150 opt_val = get_unaligned((__be32 *)value); 178 opt_val = get_unaligned((__be32 *)value);
151 opt_recv->dccpor_timestamp = ntohl(opt_val); 179 if (unlikely(opt_val == 0)) {
152 180 DCCP_WARN("Timestamp with zero value\n");
153 dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; 181 break;
154 dp->dccps_timestamp_time = ktime_get_real(); 182 }
155 183
184 if (dreq != NULL) {
185 dreq->dreq_timestamp_echo = ntohl(opt_val);
186 dreq->dreq_timestamp_time = dccp_timestamp();
187 } else {
188 opt_recv->dccpor_timestamp =
189 dp->dccps_timestamp_echo = ntohl(opt_val);
190 dp->dccps_timestamp_time = dccp_timestamp();
191 }
156 dccp_pr_debug("%s rx opt: TIMESTAMP=%u, ackno=%llu\n", 192 dccp_pr_debug("%s rx opt: TIMESTAMP=%u, ackno=%llu\n",
157 dccp_role(sk), opt_recv->dccpor_timestamp, 193 dccp_role(sk), ntohl(opt_val),
158 (unsigned long long) 194 (unsigned long long)
159 DCCP_SKB_CB(skb)->dccpd_ack_seq); 195 DCCP_SKB_CB(skb)->dccpd_ack_seq);
160 break; 196 break;
@@ -194,18 +230,17 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
194 opt_recv->dccpor_elapsed_time = elapsed_time; 230 opt_recv->dccpor_elapsed_time = elapsed_time;
195 break; 231 break;
196 case DCCPO_ELAPSED_TIME: 232 case DCCPO_ELAPSED_TIME:
197 if (len != 2 && len != 4) 233 if (dccp_packet_without_ack(skb)) /* RFC 4340, 13.2 */
198 goto out_invalid_option; 234 break;
199
200 if (pkt_type == DCCP_PKT_DATA)
201 continue;
202 235
203 if (len == 2) { 236 if (len == 2) {
204 __be16 opt_val2 = get_unaligned((__be16 *)value); 237 __be16 opt_val2 = get_unaligned((__be16 *)value);
205 elapsed_time = ntohs(opt_val2); 238 elapsed_time = ntohs(opt_val2);
206 } else { 239 } else if (len == 4) {
207 opt_val = get_unaligned((__be32 *)value); 240 opt_val = get_unaligned((__be32 *)value);
208 elapsed_time = ntohl(opt_val); 241 elapsed_time = ntohl(opt_val);
242 } else {
243 goto out_invalid_option;
209 } 244 }
210 245
211 if (elapsed_time > opt_recv->dccpor_elapsed_time) 246 if (elapsed_time > opt_recv->dccpor_elapsed_time)
@@ -214,15 +249,6 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
214 dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n", 249 dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n",
215 dccp_role(sk), elapsed_time); 250 dccp_role(sk), elapsed_time);
216 break; 251 break;
217 /*
218 * From RFC 4340, sec. 10.3:
219 *
220 * Option numbers 128 through 191 are for
221 * options sent from the HC-Sender to the
222 * HC-Receiver; option numbers 192 through 255
223 * are for options sent from the HC-Receiver to
224 * the HC-Sender.
225 */
226 case 128 ... 191: { 252 case 128 ... 191: {
227 const u16 idx = value - options; 253 const u16 idx = value - options;
228 254
@@ -246,7 +272,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
246 "implemented, ignoring", sk, opt, len); 272 "implemented, ignoring", sk, opt, len);
247 break; 273 break;
248 } 274 }
249 275ignore_option:
250 if (opt != DCCPO_MANDATORY) 276 if (opt != DCCPO_MANDATORY)
251 mandatory = 0; 277 mandatory = 0;
252 } 278 }
@@ -382,16 +408,24 @@ int dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb)
382 408
383EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp); 409EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp);
384 410
385static int dccp_insert_option_timestamp_echo(struct sock *sk, 411static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp,
412 struct dccp_request_sock *dreq,
386 struct sk_buff *skb) 413 struct sk_buff *skb)
387{ 414{
388 struct dccp_sock *dp = dccp_sk(sk);
389 __be32 tstamp_echo; 415 __be32 tstamp_echo;
390 int len, elapsed_time_len;
391 unsigned char *to; 416 unsigned char *to;
392 const suseconds_t delta = ktime_us_delta(ktime_get_real(), 417 u32 elapsed_time, elapsed_time_len, len;
393 dp->dccps_timestamp_time); 418
394 u32 elapsed_time = delta / 10; 419 if (dreq != NULL) {
420 elapsed_time = dccp_timestamp() - dreq->dreq_timestamp_time;
421 tstamp_echo = htonl(dreq->dreq_timestamp_echo);
422 dreq->dreq_timestamp_echo = 0;
423 } else {
424 elapsed_time = dccp_timestamp() - dp->dccps_timestamp_time;
425 tstamp_echo = htonl(dp->dccps_timestamp_echo);
426 dp->dccps_timestamp_echo = 0;
427 }
428
395 elapsed_time_len = dccp_elapsed_time_len(elapsed_time); 429 elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
396 len = 6 + elapsed_time_len; 430 len = 6 + elapsed_time_len;
397 431
@@ -404,7 +438,6 @@ static int dccp_insert_option_timestamp_echo(struct sock *sk,
404 *to++ = DCCPO_TIMESTAMP_ECHO; 438 *to++ = DCCPO_TIMESTAMP_ECHO;
405 *to++ = len; 439 *to++ = len;
406 440
407 tstamp_echo = htonl(dp->dccps_timestamp_echo);
408 memcpy(to, &tstamp_echo, 4); 441 memcpy(to, &tstamp_echo, 4);
409 to += 4; 442 to += 4;
410 443
@@ -416,8 +449,6 @@ static int dccp_insert_option_timestamp_echo(struct sock *sk,
416 memcpy(to, &var32, 4); 449 memcpy(to, &var32, 4);
417 } 450 }
418 451
419 dp->dccps_timestamp_echo = 0;
420 dp->dccps_timestamp_time = ktime_set(0, 0);
421 return 0; 452 return 0;
422} 453}
423 454
@@ -510,6 +541,18 @@ static int dccp_insert_options_feat(struct sock *sk, struct sk_buff *skb)
510 return 0; 541 return 0;
511} 542}
512 543
544/* The length of all options needs to be a multiple of 4 (5.8) */
545static void dccp_insert_option_padding(struct sk_buff *skb)
546{
547 int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4;
548
549 if (padding != 0) {
550 padding = 4 - padding;
551 memset(skb_push(skb, padding), 0, padding);
552 DCCP_SKB_CB(skb)->dccpd_opt_len += padding;
553 }
554}
555
513int dccp_insert_options(struct sock *sk, struct sk_buff *skb) 556int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
514{ 557{
515 struct dccp_sock *dp = dccp_sk(sk); 558 struct dccp_sock *dp = dccp_sk(sk);
@@ -526,10 +569,6 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
526 dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) && 569 dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) &&
527 dccp_insert_option_ackvec(sk, skb)) 570 dccp_insert_option_ackvec(sk, skb))
528 return -1; 571 return -1;
529
530 if (dp->dccps_timestamp_echo != 0 &&
531 dccp_insert_option_timestamp_echo(sk, skb))
532 return -1;
533 } 572 }
534 573
535 if (dp->dccps_hc_rx_insert_options) { 574 if (dp->dccps_hc_rx_insert_options) {
@@ -553,18 +592,22 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
553 dccp_insert_option_timestamp(sk, skb)) 592 dccp_insert_option_timestamp(sk, skb))
554 return -1; 593 return -1;
555 594
556 /* XXX: insert other options when appropriate */ 595 if (dp->dccps_timestamp_echo != 0 &&
596 dccp_insert_option_timestamp_echo(dp, NULL, skb))
597 return -1;
598
599 dccp_insert_option_padding(skb);
600 return 0;
601}
557 602
558 if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) { 603int dccp_insert_options_rsk(struct dccp_request_sock *dreq, struct sk_buff *skb)
559 /* The length of all options has to be a multiple of 4 */ 604{
560 int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4; 605 DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
561 606
562 if (padding != 0) { 607 if (dreq->dreq_timestamp_echo != 0 &&
563 padding = 4 - padding; 608 dccp_insert_option_timestamp_echo(NULL, dreq, skb))
564 memset(skb_push(skb, padding), 0, padding); 609 return -1;
565 DCCP_SKB_CB(skb)->dccpd_opt_len += padding;
566 }
567 }
568 610
611 dccp_insert_option_padding(skb);
569 return 0; 612 return 0;
570} 613}
diff --git a/net/dccp/output.c b/net/dccp/output.c
index f49544618f20..3b763db3d863 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -133,15 +133,31 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
133 return -ENOBUFS; 133 return -ENOBUFS;
134} 134}
135 135
136/**
137 * dccp_determine_ccmps - Find out about CCID-specfic packet-size limits
138 * We only consider the HC-sender CCID for setting the CCMPS (RFC 4340, 14.),
139 * since the RX CCID is restricted to feedback packets (Acks), which are small
140 * in comparison with the data traffic. A value of 0 means "no current CCMPS".
141 */
142static u32 dccp_determine_ccmps(const struct dccp_sock *dp)
143{
144 const struct ccid *tx_ccid = dp->dccps_hc_tx_ccid;
145
146 if (tx_ccid == NULL || tx_ccid->ccid_ops == NULL)
147 return 0;
148 return tx_ccid->ccid_ops->ccid_ccmps;
149}
150
136unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) 151unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
137{ 152{
138 struct inet_connection_sock *icsk = inet_csk(sk); 153 struct inet_connection_sock *icsk = inet_csk(sk);
139 struct dccp_sock *dp = dccp_sk(sk); 154 struct dccp_sock *dp = dccp_sk(sk);
140 int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len - 155 u32 ccmps = dccp_determine_ccmps(dp);
141 sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext)); 156 int cur_mps = ccmps ? min(pmtu, ccmps) : pmtu;
142 157
143 /* Now subtract optional transport overhead */ 158 /* Account for header lengths and IPv4/v6 option overhead */
144 mss_now -= icsk->icsk_ext_hdr_len; 159 cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len +
160 sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext));
145 161
146 /* 162 /*
147 * FIXME: this should come from the CCID infrastructure, where, say, 163 * FIXME: this should come from the CCID infrastructure, where, say,
@@ -151,13 +167,13 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
151 * make it a multiple of 4 167 * make it a multiple of 4
152 */ 168 */
153 169
154 mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; 170 cur_mps -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
155 171
156 /* And store cached results */ 172 /* And store cached results */
157 icsk->icsk_pmtu_cookie = pmtu; 173 icsk->icsk_pmtu_cookie = pmtu;
158 dp->dccps_mss_cache = mss_now; 174 dp->dccps_mss_cache = cur_mps;
159 175
160 return mss_now; 176 return cur_mps;
161} 177}
162 178
163EXPORT_SYMBOL_GPL(dccp_sync_mss); 179EXPORT_SYMBOL_GPL(dccp_sync_mss);
@@ -170,7 +186,7 @@ void dccp_write_space(struct sock *sk)
170 wake_up_interruptible(sk->sk_sleep); 186 wake_up_interruptible(sk->sk_sleep);
171 /* Should agree with poll, otherwise some programs break */ 187 /* Should agree with poll, otherwise some programs break */
172 if (sock_writeable(sk)) 188 if (sock_writeable(sk))
173 sk_wake_async(sk, 2, POLL_OUT); 189 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
174 190
175 read_unlock(&sk->sk_callback_lock); 191 read_unlock(&sk->sk_callback_lock);
176} 192}
@@ -303,7 +319,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
303 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; 319 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
304 DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; 320 DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss;
305 321
306 if (dccp_insert_options(sk, skb)) { 322 if (dccp_insert_options_rsk(dreq, skb)) {
307 kfree_skb(skb); 323 kfree_skb(skb);
308 return NULL; 324 return NULL;
309 } 325 }
@@ -391,7 +407,7 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code)
391 * FIXME: what if rebuild_header fails? 407 * FIXME: what if rebuild_header fails?
392 * Should we be doing a rebuild_header here? 408 * Should we be doing a rebuild_header here?
393 */ 409 */
394 int err = inet_sk_rebuild_header(sk); 410 int err = inet_csk(sk)->icsk_af_ops->rebuild_header(sk);
395 411
396 if (err != 0) 412 if (err != 0)
397 return err; 413 return err;
@@ -567,14 +583,27 @@ void dccp_send_close(struct sock *sk, const int active)
567 583
568 /* Reserve space for headers and prepare control bits. */ 584 /* Reserve space for headers and prepare control bits. */
569 skb_reserve(skb, sk->sk_prot->max_header); 585 skb_reserve(skb, sk->sk_prot->max_header);
570 DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? 586 if (dp->dccps_role == DCCP_ROLE_SERVER && !dp->dccps_server_timewait)
571 DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; 587 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSEREQ;
588 else
589 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE;
572 590
573 if (active) { 591 if (active) {
574 dccp_write_xmit(sk, 1); 592 dccp_write_xmit(sk, 1);
575 dccp_skb_entail(sk, skb); 593 dccp_skb_entail(sk, skb);
576 dccp_transmit_skb(sk, skb_clone(skb, prio)); 594 dccp_transmit_skb(sk, skb_clone(skb, prio));
577 /* FIXME do we need a retransmit timer here? */ 595 /*
596 * Retransmission timer for active-close: RFC 4340, 8.3 requires
597 * to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ
598 * state can be left. The initial timeout is 2 RTTs.
599 * Since RTT measurement is done by the CCIDs, there is no easy
600 * way to get an RTT sample. The fallback RTT from RFC 4340, 3.4
601 * is too low (200ms); we use a high value to avoid unnecessary
602 * retransmissions when the link RTT is > 0.2 seconds.
603 * FIXME: Let main module sample RTTs and use that instead.
604 */
605 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
606 DCCP_TIMEOUT_INIT, DCCP_RTO_MAX);
578 } else 607 } else
579 dccp_transmit_skb(sk, skb); 608 dccp_transmit_skb(sk, skb);
580} 609}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 7a3bea9c28c1..0bed4a6095b7 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -60,8 +60,7 @@ void dccp_set_state(struct sock *sk, const int state)
60{ 60{
61 const int oldstate = sk->sk_state; 61 const int oldstate = sk->sk_state;
62 62
63 dccp_pr_debug("%s(%p) %-10.10s -> %s\n", 63 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
64 dccp_role(sk), sk,
65 dccp_state_name(oldstate), dccp_state_name(state)); 64 dccp_state_name(oldstate), dccp_state_name(state));
66 WARN_ON(state == oldstate); 65 WARN_ON(state == oldstate);
67 66
@@ -72,7 +71,8 @@ void dccp_set_state(struct sock *sk, const int state)
72 break; 71 break;
73 72
74 case DCCP_CLOSED: 73 case DCCP_CLOSED:
75 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN) 74 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
75 oldstate == DCCP_CLOSING)
76 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS); 76 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
77 77
78 sk->sk_prot->unhash(sk); 78 sk->sk_prot->unhash(sk);
@@ -93,6 +93,24 @@ void dccp_set_state(struct sock *sk, const int state)
93 93
94EXPORT_SYMBOL_GPL(dccp_set_state); 94EXPORT_SYMBOL_GPL(dccp_set_state);
95 95
96static void dccp_finish_passive_close(struct sock *sk)
97{
98 switch (sk->sk_state) {
99 case DCCP_PASSIVE_CLOSE:
100 /* Node (client or server) has received Close packet. */
101 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
102 dccp_set_state(sk, DCCP_CLOSED);
103 break;
104 case DCCP_PASSIVE_CLOSEREQ:
105 /*
106 * Client received CloseReq. We set the `active' flag so that
107 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
108 */
109 dccp_send_close(sk, 1);
110 dccp_set_state(sk, DCCP_CLOSING);
111 }
112}
113
96void dccp_done(struct sock *sk) 114void dccp_done(struct sock *sk)
97{ 115{
98 dccp_set_state(sk, DCCP_CLOSED); 116 dccp_set_state(sk, DCCP_CLOSED);
@@ -134,14 +152,17 @@ EXPORT_SYMBOL_GPL(dccp_packet_name);
134const char *dccp_state_name(const int state) 152const char *dccp_state_name(const int state)
135{ 153{
136 static char *dccp_state_names[] = { 154 static char *dccp_state_names[] = {
137 [DCCP_OPEN] = "OPEN", 155 [DCCP_OPEN] = "OPEN",
138 [DCCP_REQUESTING] = "REQUESTING", 156 [DCCP_REQUESTING] = "REQUESTING",
139 [DCCP_PARTOPEN] = "PARTOPEN", 157 [DCCP_PARTOPEN] = "PARTOPEN",
140 [DCCP_LISTEN] = "LISTEN", 158 [DCCP_LISTEN] = "LISTEN",
141 [DCCP_RESPOND] = "RESPOND", 159 [DCCP_RESPOND] = "RESPOND",
142 [DCCP_CLOSING] = "CLOSING", 160 [DCCP_CLOSING] = "CLOSING",
143 [DCCP_TIME_WAIT] = "TIME_WAIT", 161 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
144 [DCCP_CLOSED] = "CLOSED", 162 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
163 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
164 [DCCP_TIME_WAIT] = "TIME_WAIT",
165 [DCCP_CLOSED] = "CLOSED",
145 }; 166 };
146 167
147 if (state >= DCCP_MAX_STATES) 168 if (state >= DCCP_MAX_STATES)
@@ -174,6 +195,19 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
174 195
175 dccp_minisock_init(&dp->dccps_minisock); 196 dccp_minisock_init(&dp->dccps_minisock);
176 197
198 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
199 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
200 sk->sk_state = DCCP_CLOSED;
201 sk->sk_write_space = dccp_write_space;
202 icsk->icsk_sync_mss = dccp_sync_mss;
203 dp->dccps_mss_cache = 536;
204 dp->dccps_rate_last = jiffies;
205 dp->dccps_role = DCCP_ROLE_UNDEFINED;
206 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
207 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
208
209 dccp_init_xmit_timers(sk);
210
177 /* 211 /*
178 * FIXME: We're hardcoding the CCID, and doing this at this point makes 212 * FIXME: We're hardcoding the CCID, and doing this at this point makes
179 * the listening (master) sock get CCID control blocks, which is not 213 * the listening (master) sock get CCID control blocks, which is not
@@ -213,18 +247,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
213 INIT_LIST_HEAD(&dmsk->dccpms_conf); 247 INIT_LIST_HEAD(&dmsk->dccpms_conf);
214 } 248 }
215 249
216 dccp_init_xmit_timers(sk);
217 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
218 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
219 sk->sk_state = DCCP_CLOSED;
220 sk->sk_write_space = dccp_write_space;
221 icsk->icsk_sync_mss = dccp_sync_mss;
222 dp->dccps_mss_cache = 536;
223 dp->dccps_rate_last = jiffies;
224 dp->dccps_role = DCCP_ROLE_UNDEFINED;
225 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
226 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
227
228 return 0; 250 return 0;
229} 251}
230 252
@@ -275,6 +297,12 @@ static inline int dccp_listen_start(struct sock *sk, int backlog)
275 return inet_csk_listen_start(sk, backlog); 297 return inet_csk_listen_start(sk, backlog);
276} 298}
277 299
300static inline int dccp_need_reset(int state)
301{
302 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
303 state != DCCP_REQUESTING;
304}
305
278int dccp_disconnect(struct sock *sk, int flags) 306int dccp_disconnect(struct sock *sk, int flags)
279{ 307{
280 struct inet_connection_sock *icsk = inet_csk(sk); 308 struct inet_connection_sock *icsk = inet_csk(sk);
@@ -285,10 +313,15 @@ int dccp_disconnect(struct sock *sk, int flags)
285 if (old_state != DCCP_CLOSED) 313 if (old_state != DCCP_CLOSED)
286 dccp_set_state(sk, DCCP_CLOSED); 314 dccp_set_state(sk, DCCP_CLOSED);
287 315
288 /* ABORT function of RFC793 */ 316 /*
317 * This corresponds to the ABORT function of RFC793, sec. 3.8
318 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
319 */
289 if (old_state == DCCP_LISTEN) { 320 if (old_state == DCCP_LISTEN) {
290 inet_csk_listen_stop(sk); 321 inet_csk_listen_stop(sk);
291 /* FIXME: do the active reset thing */ 322 } else if (dccp_need_reset(old_state)) {
323 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
324 sk->sk_err = ECONNRESET;
292 } else if (old_state == DCCP_REQUESTING) 325 } else if (old_state == DCCP_REQUESTING)
293 sk->sk_err = ECONNRESET; 326 sk->sk_err = ECONNRESET;
294 327
@@ -518,6 +551,12 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
518 (struct dccp_so_feat __user *) 551 (struct dccp_so_feat __user *)
519 optval); 552 optval);
520 break; 553 break;
554 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
555 if (dp->dccps_role != DCCP_ROLE_SERVER)
556 err = -EOPNOTSUPP;
557 else
558 dp->dccps_server_timewait = (val != 0);
559 break;
521 case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */ 560 case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
522 if (val < 0 || val > 15) 561 if (val < 0 || val > 15)
523 err = -EINVAL; 562 err = -EINVAL;
@@ -618,15 +657,15 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
618 (__be32 __user *)optval, optlen); 657 (__be32 __user *)optval, optlen);
619 case DCCP_SOCKOPT_GET_CUR_MPS: 658 case DCCP_SOCKOPT_GET_CUR_MPS:
620 val = dp->dccps_mss_cache; 659 val = dp->dccps_mss_cache;
621 len = sizeof(val); 660 break;
661 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
662 val = dp->dccps_server_timewait;
622 break; 663 break;
623 case DCCP_SOCKOPT_SEND_CSCOV: 664 case DCCP_SOCKOPT_SEND_CSCOV:
624 val = dp->dccps_pcslen; 665 val = dp->dccps_pcslen;
625 len = sizeof(val);
626 break; 666 break;
627 case DCCP_SOCKOPT_RECV_CSCOV: 667 case DCCP_SOCKOPT_RECV_CSCOV:
628 val = dp->dccps_pcrlen; 668 val = dp->dccps_pcrlen;
629 len = sizeof(val);
630 break; 669 break;
631 case 128 ... 191: 670 case 128 ... 191:
632 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, 671 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
@@ -638,6 +677,7 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
638 return -ENOPROTOOPT; 677 return -ENOPROTOOPT;
639 } 678 }
640 679
680 len = sizeof(val);
641 if (put_user(len, optlen) || copy_to_user(optval, &val, len)) 681 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
642 return -EFAULT; 682 return -EFAULT;
643 683
@@ -748,19 +788,26 @@ int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
748 788
749 dh = dccp_hdr(skb); 789 dh = dccp_hdr(skb);
750 790
751 if (dh->dccph_type == DCCP_PKT_DATA || 791 switch (dh->dccph_type) {
752 dh->dccph_type == DCCP_PKT_DATAACK) 792 case DCCP_PKT_DATA:
793 case DCCP_PKT_DATAACK:
753 goto found_ok_skb; 794 goto found_ok_skb;
754 795
755 if (dh->dccph_type == DCCP_PKT_RESET || 796 case DCCP_PKT_CLOSE:
756 dh->dccph_type == DCCP_PKT_CLOSE) { 797 case DCCP_PKT_CLOSEREQ:
757 dccp_pr_debug("found fin ok!\n"); 798 if (!(flags & MSG_PEEK))
799 dccp_finish_passive_close(sk);
800 /* fall through */
801 case DCCP_PKT_RESET:
802 dccp_pr_debug("found fin (%s) ok!\n",
803 dccp_packet_name(dh->dccph_type));
758 len = 0; 804 len = 0;
759 goto found_fin_ok; 805 goto found_fin_ok;
806 default:
807 dccp_pr_debug("packet_type=%s\n",
808 dccp_packet_name(dh->dccph_type));
809 sk_eat_skb(sk, skb, 0);
760 } 810 }
761 dccp_pr_debug("packet_type=%s\n",
762 dccp_packet_name(dh->dccph_type));
763 sk_eat_skb(sk, skb, 0);
764verify_sock_status: 811verify_sock_status:
765 if (sock_flag(sk, SOCK_DONE)) { 812 if (sock_flag(sk, SOCK_DONE)) {
766 len = 0; 813 len = 0;
@@ -862,34 +909,38 @@ out:
862 909
863EXPORT_SYMBOL_GPL(inet_dccp_listen); 910EXPORT_SYMBOL_GPL(inet_dccp_listen);
864 911
865static const unsigned char dccp_new_state[] = { 912static void dccp_terminate_connection(struct sock *sk)
866 /* current state: new state: action: */
867 [0] = DCCP_CLOSED,
868 [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
869 [DCCP_REQUESTING] = DCCP_CLOSED,
870 [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
871 [DCCP_LISTEN] = DCCP_CLOSED,
872 [DCCP_RESPOND] = DCCP_CLOSED,
873 [DCCP_CLOSING] = DCCP_CLOSED,
874 [DCCP_TIME_WAIT] = DCCP_CLOSED,
875 [DCCP_CLOSED] = DCCP_CLOSED,
876};
877
878static int dccp_close_state(struct sock *sk)
879{ 913{
880 const int next = dccp_new_state[sk->sk_state]; 914 u8 next_state = DCCP_CLOSED;
881 const int ns = next & DCCP_STATE_MASK;
882 915
883 if (ns != sk->sk_state) 916 switch (sk->sk_state) {
884 dccp_set_state(sk, ns); 917 case DCCP_PASSIVE_CLOSE:
918 case DCCP_PASSIVE_CLOSEREQ:
919 dccp_finish_passive_close(sk);
920 break;
921 case DCCP_PARTOPEN:
922 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
923 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
924 /* fall through */
925 case DCCP_OPEN:
926 dccp_send_close(sk, 1);
885 927
886 return next & DCCP_ACTION_FIN; 928 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
929 !dccp_sk(sk)->dccps_server_timewait)
930 next_state = DCCP_ACTIVE_CLOSEREQ;
931 else
932 next_state = DCCP_CLOSING;
933 /* fall through */
934 default:
935 dccp_set_state(sk, next_state);
936 }
887} 937}
888 938
889void dccp_close(struct sock *sk, long timeout) 939void dccp_close(struct sock *sk, long timeout)
890{ 940{
891 struct dccp_sock *dp = dccp_sk(sk); 941 struct dccp_sock *dp = dccp_sk(sk);
892 struct sk_buff *skb; 942 struct sk_buff *skb;
943 u32 data_was_unread = 0;
893 int state; 944 int state;
894 945
895 lock_sock(sk); 946 lock_sock(sk);
@@ -912,16 +963,21 @@ void dccp_close(struct sock *sk, long timeout)
912 * descriptor close, not protocol-sourced closes, because the 963 * descriptor close, not protocol-sourced closes, because the
913 *reader process may not have drained the data yet! 964 *reader process may not have drained the data yet!
914 */ 965 */
915 /* FIXME: check for unread data */
916 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { 966 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
967 data_was_unread += skb->len;
917 __kfree_skb(skb); 968 __kfree_skb(skb);
918 } 969 }
919 970
920 if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { 971 if (data_was_unread) {
972 /* Unread data was tossed, send an appropriate Reset Code */
973 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
974 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
975 dccp_set_state(sk, DCCP_CLOSED);
976 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
921 /* Check zero linger _after_ checking for unread data. */ 977 /* Check zero linger _after_ checking for unread data. */
922 sk->sk_prot->disconnect(sk, 0); 978 sk->sk_prot->disconnect(sk, 0);
923 } else if (dccp_close_state(sk)) { 979 } else if (sk->sk_state != DCCP_CLOSED) {
924 dccp_send_close(sk, 1); 980 dccp_terminate_connection(sk);
925 } 981 }
926 982
927 sk_stream_wait_close(sk, timeout); 983 sk_stream_wait_close(sk, timeout);
@@ -948,24 +1004,6 @@ adjudge_to_death:
948 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED) 1004 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
949 goto out; 1005 goto out;
950 1006
951 /*
952 * The last release_sock may have processed the CLOSE or RESET
953 * packet moving sock to CLOSED state, if not we have to fire
954 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
955 * in draft-ietf-dccp-spec-11. -acme
956 */
957 if (sk->sk_state == DCCP_CLOSING) {
958 /* FIXME: should start at 2 * RTT */
959 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
960 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
961 inet_csk(sk)->icsk_rto,
962 DCCP_RTO_MAX);
963#if 0
964 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
965 dccp_set_state(sk, DCCP_CLOSED);
966#endif
967 }
968
969 if (sk->sk_state == DCCP_CLOSED) 1007 if (sk->sk_state == DCCP_CLOSED)
970 inet_csk_destroy_sock(sk); 1008 inet_csk_destroy_sock(sk);
971 1009
@@ -981,7 +1019,7 @@ EXPORT_SYMBOL_GPL(dccp_close);
981 1019
982void dccp_shutdown(struct sock *sk, int how) 1020void dccp_shutdown(struct sock *sk, int how)
983{ 1021{
984 dccp_pr_debug("entry\n"); 1022 dccp_pr_debug("called shutdown(%x)\n", how);
985} 1023}
986 1024
987EXPORT_SYMBOL_GPL(dccp_shutdown); 1025EXPORT_SYMBOL_GPL(dccp_shutdown);
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index c62c05039f69..21295993fdb8 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -100,41 +100,19 @@ static struct ctl_table dccp_default_table[] = {
100 { .ctl_name = 0, } 100 { .ctl_name = 0, }
101}; 101};
102 102
103static struct ctl_table dccp_table[] = { 103static struct ctl_path dccp_path[] = {
104 { 104 { .procname = "net", .ctl_name = CTL_NET, },
105 .ctl_name = NET_DCCP_DEFAULT, 105 { .procname = "dccp", .ctl_name = NET_DCCP, },
106 .procname = "default", 106 { .procname = "default", .ctl_name = NET_DCCP_DEFAULT, },
107 .mode = 0555, 107 { }
108 .child = dccp_default_table,
109 },
110 { .ctl_name = 0, },
111};
112
113static struct ctl_table dccp_dir_table[] = {
114 {
115 .ctl_name = NET_DCCP,
116 .procname = "dccp",
117 .mode = 0555,
118 .child = dccp_table,
119 },
120 { .ctl_name = 0, },
121};
122
123static struct ctl_table dccp_root_table[] = {
124 {
125 .ctl_name = CTL_NET,
126 .procname = "net",
127 .mode = 0555,
128 .child = dccp_dir_table,
129 },
130 { .ctl_name = 0, },
131}; 108};
132 109
133static struct ctl_table_header *dccp_table_header; 110static struct ctl_table_header *dccp_table_header;
134 111
135int __init dccp_sysctl_init(void) 112int __init dccp_sysctl_init(void)
136{ 113{
137 dccp_table_header = register_sysctl_table(dccp_root_table); 114 dccp_table_header = register_sysctl_paths(dccp_path,
115 dccp_default_table);
138 116
139 return dccp_table_header != NULL ? 0 : -ENOMEM; 117 return dccp_table_header != NULL ? 0 : -ENOMEM;
140} 118}
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 3af067354bd4..8703a792b560 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -280,9 +280,8 @@ static void dccp_init_write_xmit_timer(struct sock *sk)
280{ 280{
281 struct dccp_sock *dp = dccp_sk(sk); 281 struct dccp_sock *dp = dccp_sk(sk);
282 282
283 init_timer(&dp->dccps_xmit_timer); 283 setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
284 dp->dccps_xmit_timer.data = (unsigned long)sk; 284 (unsigned long)sk);
285 dp->dccps_xmit_timer.function = dccp_write_xmit_timer;
286} 285}
287 286
288void dccp_init_xmit_timers(struct sock *sk) 287void dccp_init_xmit_timers(struct sock *sk)
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 57d574951838..acd48ee522d6 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1904,7 +1904,7 @@ static inline struct sk_buff *dn_alloc_send_pskb(struct sock *sk,
1904 struct sk_buff *skb = sock_alloc_send_skb(sk, datalen, 1904 struct sk_buff *skb = sock_alloc_send_skb(sk, datalen,
1905 noblock, errcode); 1905 noblock, errcode);
1906 if (skb) { 1906 if (skb) {
1907 skb->protocol = __constant_htons(ETH_P_DNA_RT); 1907 skb->protocol = htons(ETH_P_DNA_RT);
1908 skb->pkt_type = PACKET_OUTGOING; 1908 skb->pkt_type = PACKET_OUTGOING;
1909 } 1909 }
1910 return skb; 1910 return skb;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 3bc82dc83b38..1bbfce5f7a2d 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -173,10 +173,6 @@ static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen,
173static struct dn_dev_sysctl_table { 173static struct dn_dev_sysctl_table {
174 struct ctl_table_header *sysctl_header; 174 struct ctl_table_header *sysctl_header;
175 ctl_table dn_dev_vars[5]; 175 ctl_table dn_dev_vars[5];
176 ctl_table dn_dev_dev[2];
177 ctl_table dn_dev_conf_dir[2];
178 ctl_table dn_dev_proto_dir[2];
179 ctl_table dn_dev_root_dir[2];
180} dn_dev_sysctl = { 176} dn_dev_sysctl = {
181 NULL, 177 NULL,
182 { 178 {
@@ -224,30 +220,6 @@ static struct dn_dev_sysctl_table {
224 }, 220 },
225 {0} 221 {0}
226 }, 222 },
227 {{
228 .ctl_name = 0,
229 .procname = "",
230 .mode = 0555,
231 .child = dn_dev_sysctl.dn_dev_vars
232 }, {0}},
233 {{
234 .ctl_name = NET_DECNET_CONF,
235 .procname = "conf",
236 .mode = 0555,
237 .child = dn_dev_sysctl.dn_dev_dev
238 }, {0}},
239 {{
240 .ctl_name = NET_DECNET,
241 .procname = "decnet",
242 .mode = 0555,
243 .child = dn_dev_sysctl.dn_dev_conf_dir
244 }, {0}},
245 {{
246 .ctl_name = CTL_NET,
247 .procname = "net",
248 .mode = 0555,
249 .child = dn_dev_sysctl.dn_dev_proto_dir
250 }, {0}}
251}; 223};
252 224
253static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *parms) 225static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *parms)
@@ -255,6 +227,16 @@ static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *
255 struct dn_dev_sysctl_table *t; 227 struct dn_dev_sysctl_table *t;
256 int i; 228 int i;
257 229
230#define DN_CTL_PATH_DEV 3
231
232 struct ctl_path dn_ctl_path[] = {
233 { .procname = "net", .ctl_name = CTL_NET, },
234 { .procname = "decnet", .ctl_name = NET_DECNET, },
235 { .procname = "conf", .ctl_name = NET_DECNET_CONF, },
236 { /* to be set */ },
237 { },
238 };
239
258 t = kmemdup(&dn_dev_sysctl, sizeof(*t), GFP_KERNEL); 240 t = kmemdup(&dn_dev_sysctl, sizeof(*t), GFP_KERNEL);
259 if (t == NULL) 241 if (t == NULL)
260 return; 242 return;
@@ -265,20 +247,16 @@ static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *
265 } 247 }
266 248
267 if (dev) { 249 if (dev) {
268 t->dn_dev_dev[0].procname = dev->name; 250 dn_ctl_path[DN_CTL_PATH_DEV].procname = dev->name;
269 t->dn_dev_dev[0].ctl_name = dev->ifindex; 251 dn_ctl_path[DN_CTL_PATH_DEV].ctl_name = dev->ifindex;
270 } else { 252 } else {
271 t->dn_dev_dev[0].procname = parms->name; 253 dn_ctl_path[DN_CTL_PATH_DEV].procname = parms->name;
272 t->dn_dev_dev[0].ctl_name = parms->ctl_name; 254 dn_ctl_path[DN_CTL_PATH_DEV].ctl_name = parms->ctl_name;
273 } 255 }
274 256
275 t->dn_dev_dev[0].child = t->dn_dev_vars;
276 t->dn_dev_conf_dir[0].child = t->dn_dev_dev;
277 t->dn_dev_proto_dir[0].child = t->dn_dev_conf_dir;
278 t->dn_dev_root_dir[0].child = t->dn_dev_proto_dir;
279 t->dn_dev_vars[0].extra1 = (void *)dev; 257 t->dn_dev_vars[0].extra1 = (void *)dev;
280 258
281 t->sysctl_header = register_sysctl_table(t->dn_dev_root_dir); 259 t->sysctl_header = register_sysctl_paths(dn_ctl_path, t->dn_dev_vars);
282 if (t->sysctl_header == NULL) 260 if (t->sysctl_header == NULL)
283 kfree(t); 261 kfree(t);
284 else 262 else
@@ -647,11 +625,15 @@ static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = {
647 625
648static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 626static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
649{ 627{
628 struct net *net = skb->sk->sk_net;
650 struct nlattr *tb[IFA_MAX+1]; 629 struct nlattr *tb[IFA_MAX+1];
651 struct dn_dev *dn_db; 630 struct dn_dev *dn_db;
652 struct ifaddrmsg *ifm; 631 struct ifaddrmsg *ifm;
653 struct dn_ifaddr *ifa, **ifap; 632 struct dn_ifaddr *ifa, **ifap;
654 int err; 633 int err = -EINVAL;
634
635 if (net != &init_net)
636 goto errout;
655 637
656 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy); 638 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy);
657 if (err < 0) 639 if (err < 0)
@@ -681,6 +663,7 @@ errout:
681 663
682static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 664static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
683{ 665{
666 struct net *net = skb->sk->sk_net;
684 struct nlattr *tb[IFA_MAX+1]; 667 struct nlattr *tb[IFA_MAX+1];
685 struct net_device *dev; 668 struct net_device *dev;
686 struct dn_dev *dn_db; 669 struct dn_dev *dn_db;
@@ -688,6 +671,9 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
688 struct dn_ifaddr *ifa; 671 struct dn_ifaddr *ifa;
689 int err; 672 int err;
690 673
674 if (net != &init_net)
675 return -EINVAL;
676
691 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy); 677 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy);
692 if (err < 0) 678 if (err < 0)
693 return err; 679 return err;
@@ -785,19 +771,23 @@ static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa)
785 kfree_skb(skb); 771 kfree_skb(skb);
786 goto errout; 772 goto errout;
787 } 773 }
788 err = rtnl_notify(skb, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); 774 err = rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL);
789errout: 775errout:
790 if (err < 0) 776 if (err < 0)
791 rtnl_set_sk_err(RTNLGRP_DECnet_IFADDR, err); 777 rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_IFADDR, err);
792} 778}
793 779
794static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) 780static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
795{ 781{
782 struct net *net = skb->sk->sk_net;
796 int idx, dn_idx = 0, skip_ndevs, skip_naddr; 783 int idx, dn_idx = 0, skip_ndevs, skip_naddr;
797 struct net_device *dev; 784 struct net_device *dev;
798 struct dn_dev *dn_db; 785 struct dn_dev *dn_db;
799 struct dn_ifaddr *ifa; 786 struct dn_ifaddr *ifa;
800 787
788 if (net != &init_net)
789 return 0;
790
801 skip_ndevs = cb->args[0]; 791 skip_ndevs = cb->args[0];
802 skip_naddr = cb->args[1]; 792 skip_naddr = cb->args[1];
803 793
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 3760a20d10d0..4aa9a423e606 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -203,8 +203,6 @@ static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct
203 struct flowi fl; 203 struct flowi fl;
204 struct dn_fib_res res; 204 struct dn_fib_res res;
205 205
206 memset(&fl, 0, sizeof(fl));
207
208 if (nh->nh_flags&RTNH_F_ONLINK) { 206 if (nh->nh_flags&RTNH_F_ONLINK) {
209 struct net_device *dev; 207 struct net_device *dev;
210 208
@@ -506,10 +504,14 @@ static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta)
506 504
507static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 505static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
508{ 506{
507 struct net *net = skb->sk->sk_net;
509 struct dn_fib_table *tb; 508 struct dn_fib_table *tb;
510 struct rtattr **rta = arg; 509 struct rtattr **rta = arg;
511 struct rtmsg *r = NLMSG_DATA(nlh); 510 struct rtmsg *r = NLMSG_DATA(nlh);
512 511
512 if (net != &init_net)
513 return -EINVAL;
514
513 if (dn_fib_check_attr(r, rta)) 515 if (dn_fib_check_attr(r, rta))
514 return -EINVAL; 516 return -EINVAL;
515 517
@@ -522,10 +524,14 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *
522 524
523static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 525static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
524{ 526{
527 struct net *net = skb->sk->sk_net;
525 struct dn_fib_table *tb; 528 struct dn_fib_table *tb;
526 struct rtattr **rta = arg; 529 struct rtattr **rta = arg;
527 struct rtmsg *r = NLMSG_DATA(nlh); 530 struct rtmsg *r = NLMSG_DATA(nlh);
528 531
532 if (net != &init_net)
533 return -EINVAL;
534
529 if (dn_fib_check_attr(r, rta)) 535 if (dn_fib_check_attr(r, rta))
530 return -EINVAL; 536 return -EINVAL;
531 537
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index e851b143cca3..1ca13b17974d 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -580,8 +580,8 @@ static const struct seq_operations dn_neigh_seq_ops = {
580 580
581static int dn_neigh_seq_open(struct inode *inode, struct file *file) 581static int dn_neigh_seq_open(struct inode *inode, struct file *file)
582{ 582{
583 return seq_open_private(file, &dn_neigh_seq_ops, 583 return seq_open_net(inode, file, &dn_neigh_seq_ops,
584 sizeof(struct neigh_seq_state)); 584 sizeof(struct neigh_seq_state));
585} 585}
586 586
587static const struct file_operations dn_neigh_seq_fops = { 587static const struct file_operations dn_neigh_seq_fops = {
@@ -589,7 +589,7 @@ static const struct file_operations dn_neigh_seq_fops = {
589 .open = dn_neigh_seq_open, 589 .open = dn_neigh_seq_open,
590 .read = seq_read, 590 .read = seq_read,
591 .llseek = seq_lseek, 591 .llseek = seq_lseek,
592 .release = seq_release_private, 592 .release = seq_release_net,
593}; 593};
594 594
595#endif 595#endif
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 7404653880b0..1964faf203e4 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -124,7 +124,7 @@ struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri)
124 if ((skb = alloc_skb(size + hdr, pri)) == NULL) 124 if ((skb = alloc_skb(size + hdr, pri)) == NULL)
125 return NULL; 125 return NULL;
126 126
127 skb->protocol = __constant_htons(ETH_P_DNA_RT); 127 skb->protocol = htons(ETH_P_DNA_RT);
128 skb->pkt_type = PACKET_OUTGOING; 128 skb->pkt_type = PACKET_OUTGOING;
129 129
130 if (sk) 130 if (sk)
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 0e10ff21e292..31be29b8b5a3 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -107,7 +107,7 @@ static const int dn_rt_mtu_expires = 10 * 60 * HZ;
107 107
108static unsigned long dn_rt_deadline; 108static unsigned long dn_rt_deadline;
109 109
110static int dn_dst_gc(void); 110static int dn_dst_gc(struct dst_ops *ops);
111static struct dst_entry *dn_dst_check(struct dst_entry *, __u32); 111static struct dst_entry *dn_dst_check(struct dst_entry *, __u32);
112static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); 112static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
113static void dn_dst_link_failure(struct sk_buff *); 113static void dn_dst_link_failure(struct sk_buff *);
@@ -185,7 +185,7 @@ static void dn_dst_check_expire(unsigned long dummy)
185 mod_timer(&dn_route_timer, now + decnet_dst_gc_interval * HZ); 185 mod_timer(&dn_route_timer, now + decnet_dst_gc_interval * HZ);
186} 186}
187 187
188static int dn_dst_gc(void) 188static int dn_dst_gc(struct dst_ops *ops)
189{ 189{
190 struct dn_route *rt, **rtp; 190 struct dn_route *rt, **rtp;
191 int i; 191 int i;
@@ -765,17 +765,6 @@ drop:
765} 765}
766 766
767/* 767/*
768 * Drop packet. This is used for endnodes and for
769 * when we should not be forwarding packets from
770 * this dest.
771 */
772static int dn_blackhole(struct sk_buff *skb)
773{
774 kfree_skb(skb);
775 return NET_RX_DROP;
776}
777
778/*
779 * Used to catch bugs. This should never normally get 768 * Used to catch bugs. This should never normally get
780 * called. 769 * called.
781 */ 770 */
@@ -995,7 +984,7 @@ source_ok:
995 * here 984 * here
996 */ 985 */
997 if (!try_hard) { 986 if (!try_hard) {
998 neigh = neigh_lookup_nodev(&dn_neigh_table, &fl.fld_dst); 987 neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fl.fld_dst);
999 if (neigh) { 988 if (neigh) {
1000 if ((oldflp->oif && 989 if ((oldflp->oif &&
1001 (neigh->dev->ifindex != oldflp->oif)) || 990 (neigh->dev->ifindex != oldflp->oif)) ||
@@ -1207,7 +1196,8 @@ int dn_route_output_sock(struct dst_entry **pprt, struct flowi *fl, struct sock
1207 1196
1208 err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD); 1197 err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD);
1209 if (err == 0 && fl->proto) { 1198 if (err == 0 && fl->proto) {
1210 err = xfrm_lookup(pprt, fl, sk, !(flags & MSG_DONTWAIT)); 1199 err = xfrm_lookup(pprt, fl, sk, (flags & MSG_DONTWAIT) ?
1200 0 : XFRM_LOOKUP_WAIT);
1211 } 1201 }
1212 return err; 1202 return err;
1213} 1203}
@@ -1396,7 +1386,7 @@ make_route:
1396 default: 1386 default:
1397 case RTN_UNREACHABLE: 1387 case RTN_UNREACHABLE:
1398 case RTN_BLACKHOLE: 1388 case RTN_BLACKHOLE:
1399 rt->u.dst.input = dn_blackhole; 1389 rt->u.dst.input = dst_discard;
1400 } 1390 }
1401 rt->rt_flags = flags; 1391 rt->rt_flags = flags;
1402 if (rt->u.dst.dev) 1392 if (rt->u.dst.dev)
@@ -1522,6 +1512,7 @@ rtattr_failure:
1522 */ 1512 */
1523static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) 1513static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
1524{ 1514{
1515 struct net *net = in_skb->sk->sk_net;
1525 struct rtattr **rta = arg; 1516 struct rtattr **rta = arg;
1526 struct rtmsg *rtm = NLMSG_DATA(nlh); 1517 struct rtmsg *rtm = NLMSG_DATA(nlh);
1527 struct dn_route *rt = NULL; 1518 struct dn_route *rt = NULL;
@@ -1530,6 +1521,9 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
1530 struct sk_buff *skb; 1521 struct sk_buff *skb;
1531 struct flowi fl; 1522 struct flowi fl;
1532 1523
1524 if (net != &init_net)
1525 return -EINVAL;
1526
1533 memset(&fl, 0, sizeof(fl)); 1527 memset(&fl, 0, sizeof(fl));
1534 fl.proto = DNPROTO_NSP; 1528 fl.proto = DNPROTO_NSP;
1535 1529
@@ -1557,7 +1551,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
1557 kfree_skb(skb); 1551 kfree_skb(skb);
1558 return -ENODEV; 1552 return -ENODEV;
1559 } 1553 }
1560 skb->protocol = __constant_htons(ETH_P_DNA_RT); 1554 skb->protocol = htons(ETH_P_DNA_RT);
1561 skb->dev = dev; 1555 skb->dev = dev;
1562 cb->src = fl.fld_src; 1556 cb->src = fl.fld_src;
1563 cb->dst = fl.fld_dst; 1557 cb->dst = fl.fld_dst;
@@ -1594,7 +1588,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
1594 goto out_free; 1588 goto out_free;
1595 } 1589 }
1596 1590
1597 return rtnl_unicast(skb, NETLINK_CB(in_skb).pid); 1591 return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
1598 1592
1599out_free: 1593out_free:
1600 kfree_skb(skb); 1594 kfree_skb(skb);
@@ -1607,10 +1601,14 @@ out_free:
1607 */ 1601 */
1608int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) 1602int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
1609{ 1603{
1604 struct net *net = skb->sk->sk_net;
1610 struct dn_route *rt; 1605 struct dn_route *rt;
1611 int h, s_h; 1606 int h, s_h;
1612 int idx, s_idx; 1607 int idx, s_idx;
1613 1608
1609 if (net != &init_net)
1610 return 0;
1611
1614 if (NLMSG_PAYLOAD(cb->nlh, 0) < sizeof(struct rtmsg)) 1612 if (NLMSG_PAYLOAD(cb->nlh, 0) < sizeof(struct rtmsg))
1615 return -EINVAL; 1613 return -EINVAL;
1616 if (!(((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)) 1614 if (!(((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED))
@@ -1752,8 +1750,7 @@ void __init dn_route_init(void)
1752 dn_dst_ops.kmem_cachep = 1750 dn_dst_ops.kmem_cachep =
1753 kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0, 1751 kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0,
1754 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 1752 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1755 init_timer(&dn_route_timer); 1753 setup_timer(&dn_route_timer, dn_dst_check_expire, 0);
1756 dn_route_timer.function = dn_dst_check_expire;
1757 dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; 1754 dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
1758 add_timer(&dn_route_timer); 1755 add_timer(&dn_route_timer);
1759 1756
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index ffebea04cc99..5b7539b7fe0c 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -212,7 +212,7 @@ nla_put_failure:
212 return -ENOBUFS; 212 return -ENOBUFS;
213} 213}
214 214
215static u32 dn_fib_rule_default_pref(void) 215static u32 dn_fib_rule_default_pref(struct fib_rules_ops *ops)
216{ 216{
217 struct list_head *pos; 217 struct list_head *pos;
218 struct fib_rule *rule; 218 struct fib_rule *rule;
@@ -249,6 +249,7 @@ static struct fib_rules_ops dn_fib_rules_ops = {
249 .policy = dn_fib_rule_policy, 249 .policy = dn_fib_rule_policy,
250 .rules_list = LIST_HEAD_INIT(dn_fib_rules_ops.rules_list), 250 .rules_list = LIST_HEAD_INIT(dn_fib_rules_ops.rules_list),
251 .owner = THIS_MODULE, 251 .owner = THIS_MODULE,
252 .fro_net = &init_net,
252}; 253};
253 254
254void __init dn_fib_rules_init(void) 255void __init dn_fib_rules_init(void)
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index fda0772fa215..e09d915dbd77 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -375,10 +375,10 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id,
375 kfree_skb(skb); 375 kfree_skb(skb);
376 goto errout; 376 goto errout;
377 } 377 }
378 err = rtnl_notify(skb, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); 378 err = rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
379errout: 379errout:
380 if (err < 0) 380 if (err < 0)
381 rtnl_set_sk_err(RTNLGRP_DECnet_ROUTE, err); 381 rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_ROUTE, err);
382} 382}
383 383
384static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, 384static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
@@ -463,12 +463,16 @@ static int dn_fib_table_dump(struct dn_fib_table *tb, struct sk_buff *skb,
463 463
464int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) 464int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
465{ 465{
466 struct net *net = skb->sk->sk_net;
466 unsigned int h, s_h; 467 unsigned int h, s_h;
467 unsigned int e = 0, s_e; 468 unsigned int e = 0, s_e;
468 struct dn_fib_table *tb; 469 struct dn_fib_table *tb;
469 struct hlist_node *node; 470 struct hlist_node *node;
470 int dumped = 0; 471 int dumped = 0;
471 472
473 if (net != &init_net)
474 return 0;
475
472 if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && 476 if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
473 ((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) 477 ((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
474 return dn_cache_dump(skb, cb); 478 return dn_cache_dump(skb, cb);
diff --git a/net/decnet/netfilter/Kconfig b/net/decnet/netfilter/Kconfig
index ecdb3f9f14ca..2f81de5e752f 100644
--- a/net/decnet/netfilter/Kconfig
+++ b/net/decnet/netfilter/Kconfig
@@ -4,6 +4,7 @@
4 4
5menu "DECnet: Netfilter Configuration" 5menu "DECnet: Netfilter Configuration"
6 depends on DECNET && NETFILTER && EXPERIMENTAL 6 depends on DECNET && NETFILTER && EXPERIMENTAL
7 depends on NETFILTER_ADVANCED
7 8
8config DECNET_NF_GRABULATOR 9config DECNET_NF_GRABULATOR
9 tristate "Routing message grabulator (for userland routing daemon)" 10 tristate "Routing message grabulator (for userland routing daemon)"
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 43fcd29046d1..6d2bd3202048 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -115,7 +115,7 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
115 RCV_SKB_FAIL(-EINVAL); 115 RCV_SKB_FAIL(-EINVAL);
116} 116}
117 117
118static struct nf_hook_ops dnrmg_ops = { 118static struct nf_hook_ops dnrmg_ops __read_mostly = {
119 .hook = dnrmg_hook, 119 .hook = dnrmg_hook,
120 .pf = PF_DECnet, 120 .pf = PF_DECnet,
121 .hooknum = NF_DN_ROUTE, 121 .hooknum = NF_DN_ROUTE,
@@ -137,7 +137,7 @@ static int __init dn_rtmsg_init(void)
137 137
138 rv = nf_register_hook(&dnrmg_ops); 138 rv = nf_register_hook(&dnrmg_ops);
139 if (rv) { 139 if (rv) {
140 sock_release(dnrmg->sk_socket); 140 netlink_kernel_release(dnrmg);
141 } 141 }
142 142
143 return rv; 143 return rv;
@@ -146,7 +146,7 @@ static int __init dn_rtmsg_init(void)
146static void __exit dn_rtmsg_fini(void) 146static void __exit dn_rtmsg_fini(void)
147{ 147{
148 nf_unregister_hook(&dnrmg_ops); 148 nf_unregister_hook(&dnrmg_ops);
149 sock_release(dnrmg->sk_socket); 149 netlink_kernel_release(dnrmg);
150} 150}
151 151
152 152
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index ae354a43fb97..228067c571ba 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -470,28 +470,15 @@ static ctl_table dn_table[] = {
470 {0} 470 {0}
471}; 471};
472 472
473static ctl_table dn_dir_table[] = { 473static struct ctl_path dn_path[] = {
474 { 474 { .procname = "net", .ctl_name = CTL_NET, },
475 .ctl_name = NET_DECNET, 475 { .procname = "decnet", .ctl_name = NET_DECNET, },
476 .procname = "decnet", 476 { }
477 .mode = 0555,
478 .child = dn_table},
479 {0}
480};
481
482static ctl_table dn_root_table[] = {
483 {
484 .ctl_name = CTL_NET,
485 .procname = "net",
486 .mode = 0555,
487 .child = dn_dir_table
488 },
489 {0}
490}; 477};
491 478
492void dn_register_sysctl(void) 479void dn_register_sysctl(void)
493{ 480{
494 dn_table_header = register_sysctl_table(dn_root_table); 481 dn_table_header = register_sysctl_paths(dn_path, dn_table);
495} 482}
496 483
497void dn_unregister_sysctl(void) 484void dn_unregister_sysctl(void)
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index f70df073c588..bc0f6252613f 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -1014,9 +1014,8 @@ static int __init aun_udp_initialise(void)
1014 1014
1015 skb_queue_head_init(&aun_queue); 1015 skb_queue_head_init(&aun_queue);
1016 spin_lock_init(&aun_queue_lock); 1016 spin_lock_init(&aun_queue_lock);
1017 init_timer(&ab_cleanup_timer); 1017 setup_timer(&ab_cleanup_timer, ab_cleanup, 0);
1018 ab_cleanup_timer.expires = jiffies + (HZ*2); 1018 ab_cleanup_timer.expires = jiffies + (HZ*2);
1019 ab_cleanup_timer.function = ab_cleanup;
1020 add_timer(&ab_cleanup_timer); 1019 add_timer(&ab_cleanup_timer);
1021 1020
1022 memset(&sin, 0, sizeof(sin)); 1021 memset(&sin, 0, sizeof(sin));
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 6b2e454ae313..a7b417523e9b 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -359,10 +359,34 @@ struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count)
359} 359}
360EXPORT_SYMBOL(alloc_etherdev_mq); 360EXPORT_SYMBOL(alloc_etherdev_mq);
361 361
362char *print_mac(char *buf, const u8 *addr) 362static size_t _format_mac_addr(char *buf, int buflen,
363 const unsigned char *addr, int len)
363{ 364{
364 sprintf(buf, MAC_FMT, 365 int i;
365 addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); 366 char *cp = buf;
367
368 for (i = 0; i < len; i++) {
369 cp += scnprintf(cp, buflen - (cp - buf), "%02x", addr[i]);
370 if (i == len - 1)
371 break;
372 cp += strlcpy(cp, ":", buflen - (cp - buf));
373 }
374 return cp - buf;
375}
376
377ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
378{
379 size_t l;
380
381 l = _format_mac_addr(buf, PAGE_SIZE, addr, len);
382 l += strlcpy(buf + l, "\n", PAGE_SIZE - l);
383 return ((ssize_t) l);
384}
385EXPORT_SYMBOL(sysfs_format_mac);
386
387char *print_mac(char *buf, const unsigned char *addr)
388{
389 _format_mac_addr(buf, MAC_BUF_SIZE, addr, ETH_ALEN);
366 return buf; 390 return buf;
367} 391}
368EXPORT_SYMBOL(print_mac); 392EXPORT_SYMBOL(print_mac);
diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig
index 1438adedbc83..bd501046c9c0 100644
--- a/net/ieee80211/Kconfig
+++ b/net/ieee80211/Kconfig
@@ -1,8 +1,9 @@
1config IEEE80211 1config IEEE80211
2 tristate "Generic IEEE 802.11 Networking Stack" 2 tristate "Generic IEEE 802.11 Networking Stack (DEPRECATED)"
3 ---help--- 3 ---help---
4 This option enables the hardware independent IEEE 802.11 4 This option enables the hardware independent IEEE 802.11
5 networking stack. 5 networking stack. This component is deprecated in favor of the
6 mac80211 component.
6 7
7config IEEE80211_DEBUG 8config IEEE80211_DEBUG
8 bool "Enable full debugging output" 9 bool "Enable full debugging output"
diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c
index 8e146949fc6f..bba0152e2d71 100644
--- a/net/ieee80211/ieee80211_crypt_tkip.c
+++ b/net/ieee80211/ieee80211_crypt_tkip.c
@@ -189,7 +189,7 @@ static inline u16 Mk16(u8 hi, u8 lo)
189 return lo | (((u16) hi) << 8); 189 return lo | (((u16) hi) << 8);
190} 190}
191 191
192static inline u16 Mk16_le(u16 * v) 192static inline u16 Mk16_le(__le16 * v)
193{ 193{
194 return le16_to_cpu(*v); 194 return le16_to_cpu(*v);
195} 195}
@@ -275,15 +275,15 @@ static void tkip_mixing_phase2(u8 * WEPSeed, const u8 * TK, const u16 * TTAK,
275 PPK[5] = TTAK[4] + IV16; 275 PPK[5] = TTAK[4] + IV16;
276 276
277 /* Step 2 - 96-bit bijective mixing using S-box */ 277 /* Step 2 - 96-bit bijective mixing using S-box */
278 PPK[0] += _S_(PPK[5] ^ Mk16_le((u16 *) & TK[0])); 278 PPK[0] += _S_(PPK[5] ^ Mk16_le((__le16 *) & TK[0]));
279 PPK[1] += _S_(PPK[0] ^ Mk16_le((u16 *) & TK[2])); 279 PPK[1] += _S_(PPK[0] ^ Mk16_le((__le16 *) & TK[2]));
280 PPK[2] += _S_(PPK[1] ^ Mk16_le((u16 *) & TK[4])); 280 PPK[2] += _S_(PPK[1] ^ Mk16_le((__le16 *) & TK[4]));
281 PPK[3] += _S_(PPK[2] ^ Mk16_le((u16 *) & TK[6])); 281 PPK[3] += _S_(PPK[2] ^ Mk16_le((__le16 *) & TK[6]));
282 PPK[4] += _S_(PPK[3] ^ Mk16_le((u16 *) & TK[8])); 282 PPK[4] += _S_(PPK[3] ^ Mk16_le((__le16 *) & TK[8]));
283 PPK[5] += _S_(PPK[4] ^ Mk16_le((u16 *) & TK[10])); 283 PPK[5] += _S_(PPK[4] ^ Mk16_le((__le16 *) & TK[10]));
284 284
285 PPK[0] += RotR1(PPK[5] ^ Mk16_le((u16 *) & TK[12])); 285 PPK[0] += RotR1(PPK[5] ^ Mk16_le((__le16 *) & TK[12]));
286 PPK[1] += RotR1(PPK[0] ^ Mk16_le((u16 *) & TK[14])); 286 PPK[1] += RotR1(PPK[0] ^ Mk16_le((__le16 *) & TK[14]));
287 PPK[2] += RotR1(PPK[1]); 287 PPK[2] += RotR1(PPK[1]);
288 PPK[3] += RotR1(PPK[2]); 288 PPK[3] += RotR1(PPK[2]);
289 PPK[4] += RotR1(PPK[3]); 289 PPK[4] += RotR1(PPK[3]);
@@ -294,7 +294,7 @@ static void tkip_mixing_phase2(u8 * WEPSeed, const u8 * TK, const u16 * TTAK,
294 WEPSeed[0] = Hi8(IV16); 294 WEPSeed[0] = Hi8(IV16);
295 WEPSeed[1] = (Hi8(IV16) | 0x20) & 0x7F; 295 WEPSeed[1] = (Hi8(IV16) | 0x20) & 0x7F;
296 WEPSeed[2] = Lo8(IV16); 296 WEPSeed[2] = Lo8(IV16);
297 WEPSeed[3] = Lo8((PPK[5] ^ Mk16_le((u16 *) & TK[0])) >> 1); 297 WEPSeed[3] = Lo8((PPK[5] ^ Mk16_le((__le16 *) & TK[0])) >> 1);
298 298
299#ifdef __BIG_ENDIAN 299#ifdef __BIG_ENDIAN
300 { 300 {
diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c
index 69cb6aad25be..3bca97f55d47 100644
--- a/net/ieee80211/ieee80211_module.c
+++ b/net/ieee80211/ieee80211_module.c
@@ -181,9 +181,8 @@ struct net_device *alloc_ieee80211(int sizeof_priv)
181 ieee->ieee802_1x = 1; /* Default to supporting 802.1x */ 181 ieee->ieee802_1x = 1; /* Default to supporting 802.1x */
182 182
183 INIT_LIST_HEAD(&ieee->crypt_deinit_list); 183 INIT_LIST_HEAD(&ieee->crypt_deinit_list);
184 init_timer(&ieee->crypt_deinit_timer); 184 setup_timer(&ieee->crypt_deinit_timer, ieee80211_crypt_deinit_handler,
185 ieee->crypt_deinit_timer.data = (unsigned long)ieee; 185 (unsigned long)ieee);
186 ieee->crypt_deinit_timer.function = ieee80211_crypt_deinit_handler;
187 ieee->crypt_quiesced = 0; 186 ieee->crypt_quiesced = 0;
188 187
189 spin_lock_init(&ieee->lock); 188 spin_lock_init(&ieee->lock);
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 21c0fadde03b..1e3f87c8c012 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -45,7 +45,7 @@ static void ieee80211_monitor_rx(struct ieee80211_device *ieee,
45 skb_reset_mac_header(skb); 45 skb_reset_mac_header(skb);
46 skb_pull(skb, ieee80211_get_hdrlen(fc)); 46 skb_pull(skb, ieee80211_get_hdrlen(fc));
47 skb->pkt_type = PACKET_OTHERHOST; 47 skb->pkt_type = PACKET_OTHERHOST;
48 skb->protocol = __constant_htons(ETH_P_80211_RAW); 48 skb->protocol = htons(ETH_P_80211_RAW);
49 memset(skb->cb, 0, sizeof(skb->cb)); 49 memset(skb->cb, 0, sizeof(skb->cb));
50 netif_rx(skb); 50 netif_rx(skb);
51} 51}
@@ -754,7 +754,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
754 memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN); 754 memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN);
755 memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN); 755 memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN);
756 } else { 756 } else {
757 u16 len; 757 __be16 len;
758 /* Leave Ethernet header part of hdr and full payload */ 758 /* Leave Ethernet header part of hdr and full payload */
759 skb_pull(skb, hdrlen); 759 skb_pull(skb, hdrlen);
760 len = htons(skb->len); 760 len = htons(skb->len);
@@ -800,7 +800,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
800 if (skb2 != NULL) { 800 if (skb2 != NULL) {
801 /* send to wireless media */ 801 /* send to wireless media */
802 skb2->dev = dev; 802 skb2->dev = dev;
803 skb2->protocol = __constant_htons(ETH_P_802_3); 803 skb2->protocol = htons(ETH_P_802_3);
804 skb_reset_mac_header(skb2); 804 skb_reset_mac_header(skb2);
805 skb_reset_network_header(skb2); 805 skb_reset_network_header(skb2);
806 /* skb2->network_header += ETH_HLEN; */ 806 /* skb2->network_header += ETH_HLEN; */
@@ -1032,16 +1032,16 @@ static int ieee80211_qos_convert_ac_to_parameters(struct
1032 qos_param->aifs[i] -= (qos_param->aifs[i] < 2) ? 0 : 2; 1032 qos_param->aifs[i] -= (qos_param->aifs[i] < 2) ? 0 : 2;
1033 1033
1034 cw_min = ac_params->ecw_min_max & 0x0F; 1034 cw_min = ac_params->ecw_min_max & 0x0F;
1035 qos_param->cw_min[i] = (u16) ((1 << cw_min) - 1); 1035 qos_param->cw_min[i] = cpu_to_le16((1 << cw_min) - 1);
1036 1036
1037 cw_max = (ac_params->ecw_min_max & 0xF0) >> 4; 1037 cw_max = (ac_params->ecw_min_max & 0xF0) >> 4;
1038 qos_param->cw_max[i] = (u16) ((1 << cw_max) - 1); 1038 qos_param->cw_max[i] = cpu_to_le16((1 << cw_max) - 1);
1039 1039
1040 qos_param->flag[i] = 1040 qos_param->flag[i] =
1041 (ac_params->aci_aifsn & 0x10) ? 0x01 : 0x00; 1041 (ac_params->aci_aifsn & 0x10) ? 0x01 : 0x00;
1042 1042
1043 txop = le16_to_cpu(ac_params->tx_op_limit) * 32; 1043 txop = le16_to_cpu(ac_params->tx_op_limit) * 32;
1044 qos_param->tx_op_limit[i] = (u16) txop; 1044 qos_param->tx_op_limit[i] = cpu_to_le16(txop);
1045 } 1045 }
1046 return rc; 1046 return rc;
1047} 1047}
@@ -1585,26 +1585,25 @@ static void ieee80211_process_probe_response(struct ieee80211_device
1585 DECLARE_MAC_BUF(mac); 1585 DECLARE_MAC_BUF(mac);
1586 1586
1587 IEEE80211_DEBUG_SCAN("'%s' (%s" 1587 IEEE80211_DEBUG_SCAN("'%s' (%s"
1588 "): %c%c%c%c %c%c%c%c-%c%c%c%c %c%c%c%c\n", 1588 "): %c%c%c%c %c%c%c%c-%c%c%c%c %c%c%c%c\n",
1589 escape_essid(info_element->data, 1589 escape_essid(info_element->data, info_element->len),
1590 info_element->len), 1590 print_mac(mac, beacon->header.addr3),
1591 print_mac(mac, beacon->header.addr3), 1591 (beacon->capability & cpu_to_le16(1 << 0xf)) ? '1' : '0',
1592 (beacon->capability & (1 << 0xf)) ? '1' : '0', 1592 (beacon->capability & cpu_to_le16(1 << 0xe)) ? '1' : '0',
1593 (beacon->capability & (1 << 0xe)) ? '1' : '0', 1593 (beacon->capability & cpu_to_le16(1 << 0xd)) ? '1' : '0',
1594 (beacon->capability & (1 << 0xd)) ? '1' : '0', 1594 (beacon->capability & cpu_to_le16(1 << 0xc)) ? '1' : '0',
1595 (beacon->capability & (1 << 0xc)) ? '1' : '0', 1595 (beacon->capability & cpu_to_le16(1 << 0xb)) ? '1' : '0',
1596 (beacon->capability & (1 << 0xb)) ? '1' : '0', 1596 (beacon->capability & cpu_to_le16(1 << 0xa)) ? '1' : '0',
1597 (beacon->capability & (1 << 0xa)) ? '1' : '0', 1597 (beacon->capability & cpu_to_le16(1 << 0x9)) ? '1' : '0',
1598 (beacon->capability & (1 << 0x9)) ? '1' : '0', 1598 (beacon->capability & cpu_to_le16(1 << 0x8)) ? '1' : '0',
1599 (beacon->capability & (1 << 0x8)) ? '1' : '0', 1599 (beacon->capability & cpu_to_le16(1 << 0x7)) ? '1' : '0',
1600 (beacon->capability & (1 << 0x7)) ? '1' : '0', 1600 (beacon->capability & cpu_to_le16(1 << 0x6)) ? '1' : '0',
1601 (beacon->capability & (1 << 0x6)) ? '1' : '0', 1601 (beacon->capability & cpu_to_le16(1 << 0x5)) ? '1' : '0',
1602 (beacon->capability & (1 << 0x5)) ? '1' : '0', 1602 (beacon->capability & cpu_to_le16(1 << 0x4)) ? '1' : '0',
1603 (beacon->capability & (1 << 0x4)) ? '1' : '0', 1603 (beacon->capability & cpu_to_le16(1 << 0x3)) ? '1' : '0',
1604 (beacon->capability & (1 << 0x3)) ? '1' : '0', 1604 (beacon->capability & cpu_to_le16(1 << 0x2)) ? '1' : '0',
1605 (beacon->capability & (1 << 0x2)) ? '1' : '0', 1605 (beacon->capability & cpu_to_le16(1 << 0x1)) ? '1' : '0',
1606 (beacon->capability & (1 << 0x1)) ? '1' : '0', 1606 (beacon->capability & cpu_to_le16(1 << 0x0)) ? '1' : '0');
1607 (beacon->capability & (1 << 0x0)) ? '1' : '0');
1608 1607
1609 if (ieee80211_network_init(ieee, beacon, &network, stats)) { 1608 if (ieee80211_network_init(ieee, beacon, &network, stats)) {
1610 IEEE80211_DEBUG_SCAN("Dropped '%s' (%s) via %s.\n", 1609 IEEE80211_DEBUG_SCAN("Dropped '%s' (%s) via %s.\n",
diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c
index 6d06f1385e28..d8b02603cbe5 100644
--- a/net/ieee80211/ieee80211_tx.c
+++ b/net/ieee80211/ieee80211_tx.c
@@ -126,7 +126,7 @@ payload of each frame is reduced to 492 bytes.
126static u8 P802_1H_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0xf8 }; 126static u8 P802_1H_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0xf8 };
127static u8 RFC1042_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0x00 }; 127static u8 RFC1042_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0x00 };
128 128
129static int ieee80211_copy_snap(u8 * data, u16 h_proto) 129static int ieee80211_copy_snap(u8 * data, __be16 h_proto)
130{ 130{
131 struct ieee80211_snap_hdr *snap; 131 struct ieee80211_snap_hdr *snap;
132 u8 *oui; 132 u8 *oui;
@@ -136,7 +136,7 @@ static int ieee80211_copy_snap(u8 * data, u16 h_proto)
136 snap->ssap = 0xaa; 136 snap->ssap = 0xaa;
137 snap->ctrl = 0x03; 137 snap->ctrl = 0x03;
138 138
139 if (h_proto == 0x8137 || h_proto == 0x80f3) 139 if (h_proto == htons(ETH_P_AARP) || h_proto == htons(ETH_P_IPX))
140 oui = P802_1H_OUI; 140 oui = P802_1H_OUI;
141 else 141 else
142 oui = RFC1042_OUI; 142 oui = RFC1042_OUI;
@@ -144,7 +144,6 @@ static int ieee80211_copy_snap(u8 * data, u16 h_proto)
144 snap->oui[1] = oui[1]; 144 snap->oui[1] = oui[1];
145 snap->oui[2] = oui[2]; 145 snap->oui[2] = oui[2];
146 146
147 h_proto = htons(h_proto);
148 memcpy(data + SNAP_SIZE, &h_proto, sizeof(u16)); 147 memcpy(data + SNAP_SIZE, &h_proto, sizeof(u16));
149 148
150 return SNAP_SIZE + sizeof(u16); 149 return SNAP_SIZE + sizeof(u16);
@@ -261,7 +260,8 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
261 rts_required; 260 rts_required;
262 unsigned long flags; 261 unsigned long flags;
263 struct net_device_stats *stats = &ieee->stats; 262 struct net_device_stats *stats = &ieee->stats;
264 int ether_type, encrypt, host_encrypt, host_encrypt_msdu, host_build_iv; 263 int encrypt, host_encrypt, host_encrypt_msdu, host_build_iv;
264 __be16 ether_type;
265 int bytes, fc, hdr_len; 265 int bytes, fc, hdr_len;
266 struct sk_buff *skb_frag; 266 struct sk_buff *skb_frag;
267 struct ieee80211_hdr_3addrqos header = {/* Ensure zero initialized */ 267 struct ieee80211_hdr_3addrqos header = {/* Ensure zero initialized */
@@ -292,11 +292,11 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
292 goto success; 292 goto success;
293 } 293 }
294 294
295 ether_type = ntohs(((struct ethhdr *)skb->data)->h_proto); 295 ether_type = ((struct ethhdr *)skb->data)->h_proto;
296 296
297 crypt = ieee->crypt[ieee->tx_keyidx]; 297 crypt = ieee->crypt[ieee->tx_keyidx];
298 298
299 encrypt = !(ether_type == ETH_P_PAE && ieee->ieee802_1x) && 299 encrypt = !(ether_type == htons(ETH_P_PAE) && ieee->ieee802_1x) &&
300 ieee->sec.encrypt; 300 ieee->sec.encrypt;
301 301
302 host_encrypt = ieee->host_encrypt && encrypt && crypt; 302 host_encrypt = ieee->host_encrypt && encrypt && crypt;
@@ -304,7 +304,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
304 host_build_iv = ieee->host_build_iv && encrypt && crypt; 304 host_build_iv = ieee->host_build_iv && encrypt && crypt;
305 305
306 if (!encrypt && ieee->ieee802_1x && 306 if (!encrypt && ieee->ieee802_1x &&
307 ieee->drop_unencrypted && ether_type != ETH_P_PAE) { 307 ieee->drop_unencrypted && ether_type != htons(ETH_P_PAE)) {
308 stats->tx_dropped++; 308 stats->tx_dropped++;
309 goto success; 309 goto success;
310 } 310 }
diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c
index d309e8f19992..623489afa62c 100644
--- a/net/ieee80211/ieee80211_wx.c
+++ b/net/ieee80211/ieee80211_wx.c
@@ -709,7 +709,7 @@ int ieee80211_wx_get_encodeext(struct ieee80211_device *ieee,
709 } else 709 } else
710 idx = ieee->tx_keyidx; 710 idx = ieee->tx_keyidx;
711 711
712 if (!ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY && 712 if (!(ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY) &&
713 ext->alg != IW_ENCODE_ALG_WEP) 713 ext->alg != IW_ENCODE_ALG_WEP)
714 if (idx != 0 || ieee->iw_mode != IW_MODE_INFRA) 714 if (idx != 0 || ieee->iw_mode != IW_MODE_INFRA)
715 return -EINVAL; 715 return -EINVAL;
diff --git a/net/ieee80211/softmac/ieee80211softmac_auth.c b/net/ieee80211/softmac/ieee80211softmac_auth.c
index a53a751d0702..1a96c2572578 100644
--- a/net/ieee80211/softmac/ieee80211softmac_auth.c
+++ b/net/ieee80211/softmac/ieee80211softmac_auth.c
@@ -178,11 +178,11 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth)
178 } 178 }
179 179
180 /* Parse the auth packet */ 180 /* Parse the auth packet */
181 switch(auth->algorithm) { 181 switch(le16_to_cpu(auth->algorithm)) {
182 case WLAN_AUTH_OPEN: 182 case WLAN_AUTH_OPEN:
183 /* Check the status code of the response */ 183 /* Check the status code of the response */
184 184
185 switch(auth->status) { 185 switch(le16_to_cpu(auth->status)) {
186 case WLAN_STATUS_SUCCESS: 186 case WLAN_STATUS_SUCCESS:
187 /* Update the status to Authenticated */ 187 /* Update the status to Authenticated */
188 spin_lock_irqsave(&mac->lock, flags); 188 spin_lock_irqsave(&mac->lock, flags);
@@ -210,7 +210,7 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth)
210 break; 210 break;
211 case WLAN_AUTH_SHARED_KEY: 211 case WLAN_AUTH_SHARED_KEY:
212 /* Figure out where we are in the process */ 212 /* Figure out where we are in the process */
213 switch(auth->transaction) { 213 switch(le16_to_cpu(auth->transaction)) {
214 case IEEE80211SOFTMAC_AUTH_SHARED_CHALLENGE: 214 case IEEE80211SOFTMAC_AUTH_SHARED_CHALLENGE:
215 /* Check to make sure we have a challenge IE */ 215 /* Check to make sure we have a challenge IE */
216 data = (u8 *)auth->info_element; 216 data = (u8 *)auth->info_element;
diff --git a/net/ieee80211/softmac/ieee80211softmac_io.c b/net/ieee80211/softmac/ieee80211softmac_io.c
index 26c35253be33..73b4b13fbd8f 100644
--- a/net/ieee80211/softmac/ieee80211softmac_io.c
+++ b/net/ieee80211/softmac/ieee80211softmac_io.c
@@ -148,11 +148,11 @@ ieee80211softmac_hdr_3addr(struct ieee80211softmac_device *mac,
148 * shouldn't the sequence number be in ieee80211? */ 148 * shouldn't the sequence number be in ieee80211? */
149} 149}
150 150
151static u16 151static __le16
152ieee80211softmac_capabilities(struct ieee80211softmac_device *mac, 152ieee80211softmac_capabilities(struct ieee80211softmac_device *mac,
153 struct ieee80211softmac_network *net) 153 struct ieee80211softmac_network *net)
154{ 154{
155 u16 capability = 0; 155 __le16 capability = 0;
156 156
157 /* ESS and IBSS bits are set according to the current mode */ 157 /* ESS and IBSS bits are set according to the current mode */
158 switch (mac->ieee->iw_mode) { 158 switch (mac->ieee->iw_mode) {
@@ -163,8 +163,8 @@ ieee80211softmac_capabilities(struct ieee80211softmac_device *mac,
163 capability = cpu_to_le16(WLAN_CAPABILITY_IBSS); 163 capability = cpu_to_le16(WLAN_CAPABILITY_IBSS);
164 break; 164 break;
165 case IW_MODE_AUTO: 165 case IW_MODE_AUTO:
166 capability = net->capabilities & 166 capability = cpu_to_le16(net->capabilities &
167 (WLAN_CAPABILITY_ESS|WLAN_CAPABILITY_IBSS); 167 (WLAN_CAPABILITY_ESS|WLAN_CAPABILITY_IBSS));
168 break; 168 break;
169 default: 169 default:
170 /* bleh. we don't ever go to these modes */ 170 /* bleh. we don't ever go to these modes */
@@ -182,7 +182,7 @@ ieee80211softmac_capabilities(struct ieee80211softmac_device *mac,
182 /* Short Preamble */ 182 /* Short Preamble */
183 /* Always supported: we probably won't ever be powering devices which 183 /* Always supported: we probably won't ever be powering devices which
184 * dont support this... */ 184 * dont support this... */
185 capability |= WLAN_CAPABILITY_SHORT_PREAMBLE; 185 capability |= cpu_to_le16(WLAN_CAPABILITY_SHORT_PREAMBLE);
186 186
187 /* PBCC */ 187 /* PBCC */
188 /* Not widely used */ 188 /* Not widely used */
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 9f9fd2c6f6e2..24e2b7294bf8 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -85,6 +85,13 @@ endchoice
85config IP_FIB_HASH 85config IP_FIB_HASH
86 def_bool ASK_IP_FIB_HASH || !IP_ADVANCED_ROUTER 86 def_bool ASK_IP_FIB_HASH || !IP_ADVANCED_ROUTER
87 87
88config IP_FIB_TRIE_STATS
89 bool "FIB TRIE statistics"
90 depends on IP_FIB_TRIE
91 ---help---
92 Keep track of statistics on structure of FIB TRIE table.
93 Useful for testing and measuring TRIE performance.
94
88config IP_MULTIPLE_TABLES 95config IP_MULTIPLE_TABLES
89 bool "IP: policy routing" 96 bool "IP: policy routing"
90 depends on IP_ADVANCED_ROUTER 97 depends on IP_ADVANCED_ROUTER
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 93fe3966805d..ad40ef3f9ebc 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -10,9 +10,10 @@ obj-y := route.o inetpeer.o protocol.o \
10 tcp_minisocks.o tcp_cong.o \ 10 tcp_minisocks.o tcp_cong.o \
11 datagram.o raw.o udp.o udplite.o \ 11 datagram.o raw.o udp.o udplite.o \
12 arp.o icmp.o devinet.o af_inet.o igmp.o \ 12 arp.o icmp.o devinet.o af_inet.o igmp.o \
13 sysctl_net_ipv4.o fib_frontend.o fib_semantics.o \ 13 fib_frontend.o fib_semantics.o \
14 inet_fragment.o 14 inet_fragment.o
15 15
16obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
16obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o 17obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
17obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o 18obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
18obj-$(CONFIG_PROC_FS) += proc.o 19obj-$(CONFIG_PROC_FS) += proc.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d2f22e74b267..09ca5293d08f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -126,6 +126,10 @@ extern void ip_mc_drop_socket(struct sock *sk);
126static struct list_head inetsw[SOCK_MAX]; 126static struct list_head inetsw[SOCK_MAX];
127static DEFINE_SPINLOCK(inetsw_lock); 127static DEFINE_SPINLOCK(inetsw_lock);
128 128
129struct ipv4_config ipv4_config;
130
131EXPORT_SYMBOL(ipv4_config);
132
129/* New destruction routine */ 133/* New destruction routine */
130 134
131void inet_sock_destruct(struct sock *sk) 135void inet_sock_destruct(struct sock *sk)
@@ -135,6 +139,8 @@ void inet_sock_destruct(struct sock *sk)
135 __skb_queue_purge(&sk->sk_receive_queue); 139 __skb_queue_purge(&sk->sk_receive_queue);
136 __skb_queue_purge(&sk->sk_error_queue); 140 __skb_queue_purge(&sk->sk_error_queue);
137 141
142 sk_mem_reclaim(sk);
143
138 if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) { 144 if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) {
139 printk("Attempt to release TCP socket in state %d %p\n", 145 printk("Attempt to release TCP socket in state %d %p\n",
140 sk->sk_state, sk); 146 sk->sk_state, sk);
@@ -440,7 +446,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
440 if (addr_len < sizeof(struct sockaddr_in)) 446 if (addr_len < sizeof(struct sockaddr_in))
441 goto out; 447 goto out;
442 448
443 chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr); 449 chk_addr_ret = inet_addr_type(&init_net, addr->sin_addr.s_addr);
444 450
445 /* Not specified by any standard per-se, however it breaks too 451 /* Not specified by any standard per-se, however it breaks too
446 * many applications when removed. It is unfortunate since 452 * many applications when removed. It is unfortunate since
@@ -789,12 +795,12 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
789 case SIOCADDRT: 795 case SIOCADDRT:
790 case SIOCDELRT: 796 case SIOCDELRT:
791 case SIOCRTMSG: 797 case SIOCRTMSG:
792 err = ip_rt_ioctl(cmd, (void __user *)arg); 798 err = ip_rt_ioctl(sk->sk_net, cmd, (void __user *)arg);
793 break; 799 break;
794 case SIOCDARP: 800 case SIOCDARP:
795 case SIOCGARP: 801 case SIOCGARP:
796 case SIOCSARP: 802 case SIOCSARP:
797 err = arp_ioctl(cmd, (void __user *)arg); 803 err = arp_ioctl(sk->sk_net, cmd, (void __user *)arg);
798 break; 804 break;
799 case SIOCGIFADDR: 805 case SIOCGIFADDR:
800 case SIOCSIFADDR: 806 case SIOCSIFADDR:
@@ -838,6 +844,7 @@ const struct proto_ops inet_stream_ops = {
838 .recvmsg = sock_common_recvmsg, 844 .recvmsg = sock_common_recvmsg,
839 .mmap = sock_no_mmap, 845 .mmap = sock_no_mmap,
840 .sendpage = tcp_sendpage, 846 .sendpage = tcp_sendpage,
847 .splice_read = tcp_splice_read,
841#ifdef CONFIG_COMPAT 848#ifdef CONFIG_COMPAT
842 .compat_setsockopt = compat_sock_common_setsockopt, 849 .compat_setsockopt = compat_sock_common_setsockopt,
843 .compat_getsockopt = compat_sock_common_getsockopt, 850 .compat_getsockopt = compat_sock_common_getsockopt,
@@ -1106,7 +1113,7 @@ int inet_sk_rebuild_header(struct sock *sk)
1106 }; 1113 };
1107 1114
1108 security_sk_classify_flow(sk, &fl); 1115 security_sk_classify_flow(sk, &fl);
1109 err = ip_route_output_flow(&rt, &fl, sk, 0); 1116 err = ip_route_output_flow(&init_net, &rt, &fl, sk, 0);
1110} 1117}
1111 if (!err) 1118 if (!err)
1112 sk_setup_caps(sk, &rt->u.dst); 1119 sk_setup_caps(sk, &rt->u.dst);
@@ -1237,7 +1244,7 @@ unsigned long snmp_fold_field(void *mib[], int offt)
1237} 1244}
1238EXPORT_SYMBOL_GPL(snmp_fold_field); 1245EXPORT_SYMBOL_GPL(snmp_fold_field);
1239 1246
1240int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign) 1247int snmp_mib_init(void *ptr[2], size_t mibsize)
1241{ 1248{
1242 BUG_ON(ptr == NULL); 1249 BUG_ON(ptr == NULL);
1243 ptr[0] = __alloc_percpu(mibsize); 1250 ptr[0] = __alloc_percpu(mibsize);
@@ -1286,37 +1293,31 @@ static struct net_protocol udp_protocol = {
1286 1293
1287static struct net_protocol icmp_protocol = { 1294static struct net_protocol icmp_protocol = {
1288 .handler = icmp_rcv, 1295 .handler = icmp_rcv,
1296 .no_policy = 1,
1289}; 1297};
1290 1298
1291static int __init init_ipv4_mibs(void) 1299static int __init init_ipv4_mibs(void)
1292{ 1300{
1293 if (snmp_mib_init((void **)net_statistics, 1301 if (snmp_mib_init((void **)net_statistics,
1294 sizeof(struct linux_mib), 1302 sizeof(struct linux_mib)) < 0)
1295 __alignof__(struct linux_mib)) < 0)
1296 goto err_net_mib; 1303 goto err_net_mib;
1297 if (snmp_mib_init((void **)ip_statistics, 1304 if (snmp_mib_init((void **)ip_statistics,
1298 sizeof(struct ipstats_mib), 1305 sizeof(struct ipstats_mib)) < 0)
1299 __alignof__(struct ipstats_mib)) < 0)
1300 goto err_ip_mib; 1306 goto err_ip_mib;
1301 if (snmp_mib_init((void **)icmp_statistics, 1307 if (snmp_mib_init((void **)icmp_statistics,
1302 sizeof(struct icmp_mib), 1308 sizeof(struct icmp_mib)) < 0)
1303 __alignof__(struct icmp_mib)) < 0)
1304 goto err_icmp_mib; 1309 goto err_icmp_mib;
1305 if (snmp_mib_init((void **)icmpmsg_statistics, 1310 if (snmp_mib_init((void **)icmpmsg_statistics,
1306 sizeof(struct icmpmsg_mib), 1311 sizeof(struct icmpmsg_mib)) < 0)
1307 __alignof__(struct icmpmsg_mib)) < 0)
1308 goto err_icmpmsg_mib; 1312 goto err_icmpmsg_mib;
1309 if (snmp_mib_init((void **)tcp_statistics, 1313 if (snmp_mib_init((void **)tcp_statistics,
1310 sizeof(struct tcp_mib), 1314 sizeof(struct tcp_mib)) < 0)
1311 __alignof__(struct tcp_mib)) < 0)
1312 goto err_tcp_mib; 1315 goto err_tcp_mib;
1313 if (snmp_mib_init((void **)udp_statistics, 1316 if (snmp_mib_init((void **)udp_statistics,
1314 sizeof(struct udp_mib), 1317 sizeof(struct udp_mib)) < 0)
1315 __alignof__(struct udp_mib)) < 0)
1316 goto err_udp_mib; 1318 goto err_udp_mib;
1317 if (snmp_mib_init((void **)udplite_statistics, 1319 if (snmp_mib_init((void **)udplite_statistics,
1318 sizeof(struct udp_mib), 1320 sizeof(struct udp_mib)) < 0)
1319 __alignof__(struct udp_mib)) < 0)
1320 goto err_udplite_mib; 1321 goto err_udplite_mib;
1321 1322
1322 tcp_mib_init(); 1323 tcp_mib_init();
@@ -1418,6 +1419,9 @@ static int __init inet_init(void)
1418 /* Setup TCP slab cache for open requests. */ 1419 /* Setup TCP slab cache for open requests. */
1419 tcp_init(); 1420 tcp_init();
1420 1421
1422 /* Setup UDP memory threshold */
1423 udp_init();
1424
1421 /* Add UDP-Lite (RFC 3828) */ 1425 /* Add UDP-Lite (RFC 3828) */
1422 udplite4_register(); 1426 udplite4_register();
1423 1427
@@ -1471,15 +1475,11 @@ static int __init ipv4_proc_init(void)
1471 goto out_tcp; 1475 goto out_tcp;
1472 if (udp4_proc_init()) 1476 if (udp4_proc_init())
1473 goto out_udp; 1477 goto out_udp;
1474 if (fib_proc_init())
1475 goto out_fib;
1476 if (ip_misc_proc_init()) 1478 if (ip_misc_proc_init())
1477 goto out_misc; 1479 goto out_misc;
1478out: 1480out:
1479 return rc; 1481 return rc;
1480out_misc: 1482out_misc:
1481 fib_proc_exit();
1482out_fib:
1483 udp4_proc_exit(); 1483 udp4_proc_exit();
1484out_udp: 1484out_udp:
1485 tcp4_proc_exit(); 1485 tcp4_proc_exit();
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 5fc346d8b566..d76803a3dcae 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -169,6 +169,8 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
169 if (ip_clear_mutable_options(iph, &dummy)) 169 if (ip_clear_mutable_options(iph, &dummy))
170 goto out; 170 goto out;
171 } 171 }
172
173 spin_lock(&x->lock);
172 { 174 {
173 u8 auth_data[MAX_AH_AUTH_LEN]; 175 u8 auth_data[MAX_AH_AUTH_LEN];
174 176
@@ -176,13 +178,16 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
176 skb_push(skb, ihl); 178 skb_push(skb, ihl);
177 err = ah_mac_digest(ahp, skb, ah->auth_data); 179 err = ah_mac_digest(ahp, skb, ah->auth_data);
178 if (err) 180 if (err)
179 goto out; 181 goto unlock;
180 err = -EINVAL; 182 if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len))
181 if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) { 183 err = -EBADMSG;
182 x->stats.integrity_failed++;
183 goto out;
184 }
185 } 184 }
185unlock:
186 spin_unlock(&x->lock);
187
188 if (err)
189 goto out;
190
186 skb->network_header += ah_hlen; 191 skb->network_header += ah_hlen;
187 memcpy(skb_network_header(skb), work_buf, ihl); 192 memcpy(skb_network_header(skb), work_buf, ihl);
188 skb->transport_header = skb->network_header; 193 skb->transport_header = skb->network_header;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 54a76b8b803a..5976c598cc4b 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -235,8 +235,6 @@ static int arp_constructor(struct neighbour *neigh)
235 struct in_device *in_dev; 235 struct in_device *in_dev;
236 struct neigh_parms *parms; 236 struct neigh_parms *parms;
237 237
238 neigh->type = inet_addr_type(addr);
239
240 rcu_read_lock(); 238 rcu_read_lock();
241 in_dev = __in_dev_get_rcu(dev); 239 in_dev = __in_dev_get_rcu(dev);
242 if (in_dev == NULL) { 240 if (in_dev == NULL) {
@@ -244,6 +242,8 @@ static int arp_constructor(struct neighbour *neigh)
244 return -EINVAL; 242 return -EINVAL;
245 } 243 }
246 244
245 neigh->type = inet_addr_type(&init_net, addr);
246
247 parms = in_dev->arp_parms; 247 parms = in_dev->arp_parms;
248 __neigh_parms_put(neigh->parms); 248 __neigh_parms_put(neigh->parms);
249 neigh->parms = neigh_parms_clone(parms); 249 neigh->parms = neigh_parms_clone(parms);
@@ -341,14 +341,14 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
341 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { 341 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
342 default: 342 default:
343 case 0: /* By default announce any local IP */ 343 case 0: /* By default announce any local IP */
344 if (skb && inet_addr_type(ip_hdr(skb)->saddr) == RTN_LOCAL) 344 if (skb && inet_addr_type(&init_net, ip_hdr(skb)->saddr) == RTN_LOCAL)
345 saddr = ip_hdr(skb)->saddr; 345 saddr = ip_hdr(skb)->saddr;
346 break; 346 break;
347 case 1: /* Restrict announcements of saddr in same subnet */ 347 case 1: /* Restrict announcements of saddr in same subnet */
348 if (!skb) 348 if (!skb)
349 break; 349 break;
350 saddr = ip_hdr(skb)->saddr; 350 saddr = ip_hdr(skb)->saddr;
351 if (inet_addr_type(saddr) == RTN_LOCAL) { 351 if (inet_addr_type(&init_net, saddr) == RTN_LOCAL) {
352 /* saddr should be known to target */ 352 /* saddr should be known to target */
353 if (inet_addr_onlink(in_dev, target, saddr)) 353 if (inet_addr_onlink(in_dev, target, saddr))
354 break; 354 break;
@@ -382,8 +382,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
382 read_unlock_bh(&neigh->lock); 382 read_unlock_bh(&neigh->lock);
383} 383}
384 384
385static int arp_ignore(struct in_device *in_dev, struct net_device *dev, 385static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
386 __be32 sip, __be32 tip)
387{ 386{
388 int scope; 387 int scope;
389 388
@@ -403,7 +402,6 @@ static int arp_ignore(struct in_device *in_dev, struct net_device *dev,
403 case 3: /* Do not reply for scope host addresses */ 402 case 3: /* Do not reply for scope host addresses */
404 sip = 0; 403 sip = 0;
405 scope = RT_SCOPE_LINK; 404 scope = RT_SCOPE_LINK;
406 dev = NULL;
407 break; 405 break;
408 case 4: /* Reserved */ 406 case 4: /* Reserved */
409 case 5: 407 case 5:
@@ -415,7 +413,7 @@ static int arp_ignore(struct in_device *in_dev, struct net_device *dev,
415 default: 413 default:
416 return 0; 414 return 0;
417 } 415 }
418 return !inet_confirm_addr(dev, sip, tip, scope); 416 return !inet_confirm_addr(in_dev, sip, tip, scope);
419} 417}
420 418
421static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) 419static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
@@ -426,7 +424,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
426 int flag = 0; 424 int flag = 0;
427 /*unsigned long now; */ 425 /*unsigned long now; */
428 426
429 if (ip_route_output_key(&rt, &fl) < 0) 427 if (ip_route_output_key(&init_net, &rt, &fl) < 0)
430 return 1; 428 return 1;
431 if (rt->u.dst.dev != dev) { 429 if (rt->u.dst.dev != dev) {
432 NET_INC_STATS_BH(LINUX_MIB_ARPFILTER); 430 NET_INC_STATS_BH(LINUX_MIB_ARPFILTER);
@@ -479,7 +477,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
479 477
480 paddr = ((struct rtable*)skb->dst)->rt_gateway; 478 paddr = ((struct rtable*)skb->dst)->rt_gateway;
481 479
482 if (arp_set_predefined(inet_addr_type(paddr), haddr, paddr, dev)) 480 if (arp_set_predefined(inet_addr_type(&init_net, paddr), haddr, paddr, dev))
483 return 0; 481 return 0;
484 482
485 n = __neigh_lookup(&arp_tbl, &paddr, dev, 1); 483 n = __neigh_lookup(&arp_tbl, &paddr, dev, 1);
@@ -777,7 +775,7 @@ static int arp_process(struct sk_buff *skb)
777 * Check for bad requests for 127.x.x.x and requests for multicast 775 * Check for bad requests for 127.x.x.x and requests for multicast
778 * addresses. If this is one such, delete it. 776 * addresses. If this is one such, delete it.
779 */ 777 */
780 if (LOOPBACK(tip) || MULTICAST(tip)) 778 if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
781 goto out; 779 goto out;
782 780
783/* 781/*
@@ -806,8 +804,8 @@ static int arp_process(struct sk_buff *skb)
806 /* Special case: IPv4 duplicate address detection packet (RFC2131) */ 804 /* Special case: IPv4 duplicate address detection packet (RFC2131) */
807 if (sip == 0) { 805 if (sip == 0) {
808 if (arp->ar_op == htons(ARPOP_REQUEST) && 806 if (arp->ar_op == htons(ARPOP_REQUEST) &&
809 inet_addr_type(tip) == RTN_LOCAL && 807 inet_addr_type(&init_net, tip) == RTN_LOCAL &&
810 !arp_ignore(in_dev,dev,sip,tip)) 808 !arp_ignore(in_dev, sip, tip))
811 arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha, 809 arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
812 dev->dev_addr, sha); 810 dev->dev_addr, sha);
813 goto out; 811 goto out;
@@ -825,7 +823,7 @@ static int arp_process(struct sk_buff *skb)
825 int dont_send = 0; 823 int dont_send = 0;
826 824
827 if (!dont_send) 825 if (!dont_send)
828 dont_send |= arp_ignore(in_dev,dev,sip,tip); 826 dont_send |= arp_ignore(in_dev,sip,tip);
829 if (!dont_send && IN_DEV_ARPFILTER(in_dev)) 827 if (!dont_send && IN_DEV_ARPFILTER(in_dev))
830 dont_send |= arp_filter(sip,tip,dev); 828 dont_send |= arp_filter(sip,tip,dev);
831 if (!dont_send) 829 if (!dont_send)
@@ -835,9 +833,8 @@ static int arp_process(struct sk_buff *skb)
835 } 833 }
836 goto out; 834 goto out;
837 } else if (IN_DEV_FORWARD(in_dev)) { 835 } else if (IN_DEV_FORWARD(in_dev)) {
838 if ((rt->rt_flags&RTCF_DNAT) || 836 if (addr_type == RTN_UNICAST && rt->u.dst.dev != dev &&
839 (addr_type == RTN_UNICAST && rt->u.dst.dev != dev && 837 (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &init_net, &tip, dev, 0))) {
840 (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &tip, dev, 0)))) {
841 n = neigh_event_ns(&arp_tbl, sha, &sip, dev); 838 n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
842 if (n) 839 if (n)
843 neigh_release(n); 840 neigh_release(n);
@@ -860,14 +857,14 @@ static int arp_process(struct sk_buff *skb)
860 857
861 n = __neigh_lookup(&arp_tbl, &sip, dev, 0); 858 n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
862 859
863 if (IPV4_DEVCONF_ALL(ARP_ACCEPT)) { 860 if (IPV4_DEVCONF_ALL(dev->nd_net, ARP_ACCEPT)) {
864 /* Unsolicited ARP is not accepted by default. 861 /* Unsolicited ARP is not accepted by default.
865 It is possible, that this option should be enabled for some 862 It is possible, that this option should be enabled for some
866 devices (strip is candidate) 863 devices (strip is candidate)
867 */ 864 */
868 if (n == NULL && 865 if (n == NULL &&
869 arp->ar_op == htons(ARPOP_REPLY) && 866 arp->ar_op == htons(ARPOP_REPLY) &&
870 inet_addr_type(sip) == RTN_UNICAST) 867 inet_addr_type(&init_net, sip) == RTN_UNICAST)
871 n = __neigh_lookup(&arp_tbl, &sip, dev, 1); 868 n = __neigh_lookup(&arp_tbl, &sip, dev, 1);
872 } 869 }
873 870
@@ -952,44 +949,60 @@ out_of_mem:
952 * Set (create) an ARP cache entry. 949 * Set (create) an ARP cache entry.
953 */ 950 */
954 951
955static int arp_req_set(struct arpreq *r, struct net_device * dev) 952static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on)
956{ 953{
957 __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr; 954 if (dev == NULL) {
955 IPV4_DEVCONF_ALL(net, PROXY_ARP) = on;
956 return 0;
957 }
958 if (__in_dev_get_rtnl(dev)) {
959 IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, on);
960 return 0;
961 }
962 return -ENXIO;
963}
964
965static int arp_req_set_public(struct net *net, struct arpreq *r,
966 struct net_device *dev)
967{
968 __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
969 __be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
970
971 if (mask && mask != htonl(0xFFFFFFFF))
972 return -EINVAL;
973 if (!dev && (r->arp_flags & ATF_COM)) {
974 dev = dev_getbyhwaddr(net, r->arp_ha.sa_family,
975 r->arp_ha.sa_data);
976 if (!dev)
977 return -ENODEV;
978 }
979 if (mask) {
980 if (pneigh_lookup(&arp_tbl, net, &ip, dev, 1) == NULL)
981 return -ENOBUFS;
982 return 0;
983 }
984
985 return arp_req_set_proxy(net, dev, 1);
986}
987
988static int arp_req_set(struct net *net, struct arpreq *r,
989 struct net_device * dev)
990{
991 __be32 ip;
958 struct neighbour *neigh; 992 struct neighbour *neigh;
959 int err; 993 int err;
960 994
961 if (r->arp_flags&ATF_PUBL) { 995 if (r->arp_flags & ATF_PUBL)
962 __be32 mask = ((struct sockaddr_in *) &r->arp_netmask)->sin_addr.s_addr; 996 return arp_req_set_public(net, r, dev);
963 if (mask && mask != htonl(0xFFFFFFFF))
964 return -EINVAL;
965 if (!dev && (r->arp_flags & ATF_COM)) {
966 dev = dev_getbyhwaddr(&init_net, r->arp_ha.sa_family, r->arp_ha.sa_data);
967 if (!dev)
968 return -ENODEV;
969 }
970 if (mask) {
971 if (pneigh_lookup(&arp_tbl, &ip, dev, 1) == NULL)
972 return -ENOBUFS;
973 return 0;
974 }
975 if (dev == NULL) {
976 IPV4_DEVCONF_ALL(PROXY_ARP) = 1;
977 return 0;
978 }
979 if (__in_dev_get_rtnl(dev)) {
980 IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, 1);
981 return 0;
982 }
983 return -ENXIO;
984 }
985 997
998 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
986 if (r->arp_flags & ATF_PERM) 999 if (r->arp_flags & ATF_PERM)
987 r->arp_flags |= ATF_COM; 1000 r->arp_flags |= ATF_COM;
988 if (dev == NULL) { 1001 if (dev == NULL) {
989 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, 1002 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip,
990 .tos = RTO_ONLINK } } }; 1003 .tos = RTO_ONLINK } } };
991 struct rtable * rt; 1004 struct rtable * rt;
992 if ((err = ip_route_output_key(&rt, &fl)) != 0) 1005 if ((err = ip_route_output_key(net, &rt, &fl)) != 0)
993 return err; 1006 return err;
994 dev = rt->u.dst.dev; 1007 dev = rt->u.dst.dev;
995 ip_rt_put(rt); 1008 ip_rt_put(rt);
@@ -1066,37 +1079,37 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
1066 return err; 1079 return err;
1067} 1080}
1068 1081
1069static int arp_req_delete(struct arpreq *r, struct net_device * dev) 1082static int arp_req_delete_public(struct net *net, struct arpreq *r,
1083 struct net_device *dev)
1084{
1085 __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
1086 __be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
1087
1088 if (mask == htonl(0xFFFFFFFF))
1089 return pneigh_delete(&arp_tbl, net, &ip, dev);
1090
1091 if (mask)
1092 return -EINVAL;
1093
1094 return arp_req_set_proxy(net, dev, 0);
1095}
1096
1097static int arp_req_delete(struct net *net, struct arpreq *r,
1098 struct net_device * dev)
1070{ 1099{
1071 int err; 1100 int err;
1072 __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; 1101 __be32 ip;
1073 struct neighbour *neigh; 1102 struct neighbour *neigh;
1074 1103
1075 if (r->arp_flags & ATF_PUBL) { 1104 if (r->arp_flags & ATF_PUBL)
1076 __be32 mask = 1105 return arp_req_delete_public(net, r, dev);
1077 ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
1078 if (mask == htonl(0xFFFFFFFF))
1079 return pneigh_delete(&arp_tbl, &ip, dev);
1080 if (mask == 0) {
1081 if (dev == NULL) {
1082 IPV4_DEVCONF_ALL(PROXY_ARP) = 0;
1083 return 0;
1084 }
1085 if (__in_dev_get_rtnl(dev)) {
1086 IN_DEV_CONF_SET(__in_dev_get_rtnl(dev),
1087 PROXY_ARP, 0);
1088 return 0;
1089 }
1090 return -ENXIO;
1091 }
1092 return -EINVAL;
1093 }
1094 1106
1107 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
1095 if (dev == NULL) { 1108 if (dev == NULL) {
1096 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, 1109 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip,
1097 .tos = RTO_ONLINK } } }; 1110 .tos = RTO_ONLINK } } };
1098 struct rtable * rt; 1111 struct rtable * rt;
1099 if ((err = ip_route_output_key(&rt, &fl)) != 0) 1112 if ((err = ip_route_output_key(net, &rt, &fl)) != 0)
1100 return err; 1113 return err;
1101 dev = rt->u.dst.dev; 1114 dev = rt->u.dst.dev;
1102 ip_rt_put(rt); 1115 ip_rt_put(rt);
@@ -1119,7 +1132,7 @@ static int arp_req_delete(struct arpreq *r, struct net_device * dev)
1119 * Handle an ARP layer I/O control request. 1132 * Handle an ARP layer I/O control request.
1120 */ 1133 */
1121 1134
1122int arp_ioctl(unsigned int cmd, void __user *arg) 1135int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1123{ 1136{
1124 int err; 1137 int err;
1125 struct arpreq r; 1138 struct arpreq r;
@@ -1151,7 +1164,7 @@ int arp_ioctl(unsigned int cmd, void __user *arg)
1151 rtnl_lock(); 1164 rtnl_lock();
1152 if (r.arp_dev[0]) { 1165 if (r.arp_dev[0]) {
1153 err = -ENODEV; 1166 err = -ENODEV;
1154 if ((dev = __dev_get_by_name(&init_net, r.arp_dev)) == NULL) 1167 if ((dev = __dev_get_by_name(net, r.arp_dev)) == NULL)
1155 goto out; 1168 goto out;
1156 1169
1157 /* Mmmm... It is wrong... ARPHRD_NETROM==0 */ 1170 /* Mmmm... It is wrong... ARPHRD_NETROM==0 */
@@ -1167,10 +1180,10 @@ int arp_ioctl(unsigned int cmd, void __user *arg)
1167 1180
1168 switch (cmd) { 1181 switch (cmd) {
1169 case SIOCDARP: 1182 case SIOCDARP:
1170 err = arp_req_delete(&r, dev); 1183 err = arp_req_delete(net, &r, dev);
1171 break; 1184 break;
1172 case SIOCSARP: 1185 case SIOCSARP:
1173 err = arp_req_set(&r, dev); 1186 err = arp_req_set(net, &r, dev);
1174 break; 1187 break;
1175 case SIOCGARP: 1188 case SIOCGARP:
1176 err = arp_req_get(&r, dev); 1189 err = arp_req_get(&r, dev);
@@ -1359,8 +1372,8 @@ static const struct seq_operations arp_seq_ops = {
1359 1372
1360static int arp_seq_open(struct inode *inode, struct file *file) 1373static int arp_seq_open(struct inode *inode, struct file *file)
1361{ 1374{
1362 return seq_open_private(file, &arp_seq_ops, 1375 return seq_open_net(inode, file, &arp_seq_ops,
1363 sizeof(struct neigh_seq_state)); 1376 sizeof(struct neigh_seq_state));
1364} 1377}
1365 1378
1366static const struct file_operations arp_seq_fops = { 1379static const struct file_operations arp_seq_fops = {
@@ -1368,7 +1381,7 @@ static const struct file_operations arp_seq_fops = {
1368 .open = arp_seq_open, 1381 .open = arp_seq_open,
1369 .read = seq_read, 1382 .read = seq_read,
1370 .llseek = seq_lseek, 1383 .llseek = seq_lseek,
1371 .release = seq_release_private, 1384 .release = seq_release_net,
1372}; 1385};
1373 1386
1374static int __init arp_proc_init(void) 1387static int __init arp_proc_init(void)
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index f18e88bc86ec..d4dc4eb48d95 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -63,7 +63,7 @@ struct cipso_v4_domhsh_entry {
63 * probably be turned into a hash table or something similar so we 63 * probably be turned into a hash table or something similar so we
64 * can do quick lookups. */ 64 * can do quick lookups. */
65static DEFINE_SPINLOCK(cipso_v4_doi_list_lock); 65static DEFINE_SPINLOCK(cipso_v4_doi_list_lock);
66static struct list_head cipso_v4_doi_list = LIST_HEAD_INIT(cipso_v4_doi_list); 66static LIST_HEAD(cipso_v4_doi_list);
67 67
68/* Label mapping cache */ 68/* Label mapping cache */
69int cipso_v4_cache_enabled = 1; 69int cipso_v4_cache_enabled = 1;
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 0301dd468cf4..0c0c73f368ce 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -40,7 +40,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
40 40
41 oif = sk->sk_bound_dev_if; 41 oif = sk->sk_bound_dev_if;
42 saddr = inet->saddr; 42 saddr = inet->saddr;
43 if (MULTICAST(usin->sin_addr.s_addr)) { 43 if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
44 if (!oif) 44 if (!oif)
45 oif = inet->mc_index; 45 oif = inet->mc_index;
46 if (!saddr) 46 if (!saddr)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index b42f74617bac..21f71bf912d5 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -62,6 +62,7 @@
62#include <net/route.h> 62#include <net/route.h>
63#include <net/ip_fib.h> 63#include <net/ip_fib.h>
64#include <net/rtnetlink.h> 64#include <net/rtnetlink.h>
65#include <net/net_namespace.h>
65 66
66struct ipv4_devconf ipv4_devconf = { 67struct ipv4_devconf ipv4_devconf = {
67 .data = { 68 .data = {
@@ -82,7 +83,8 @@ static struct ipv4_devconf ipv4_devconf_dflt = {
82 }, 83 },
83}; 84};
84 85
85#define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr) 86#define IPV4_DEVCONF_DFLT(net, attr) \
87 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
86 88
87static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { 89static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88 [IFA_LOCAL] = { .type = NLA_U32 }, 90 [IFA_LOCAL] = { .type = NLA_U32 },
@@ -98,9 +100,15 @@ static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
98static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 100static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
99 int destroy); 101 int destroy);
100#ifdef CONFIG_SYSCTL 102#ifdef CONFIG_SYSCTL
101static void devinet_sysctl_register(struct in_device *in_dev, 103static void devinet_sysctl_register(struct in_device *idev);
102 struct ipv4_devconf *p); 104static void devinet_sysctl_unregister(struct in_device *idev);
103static void devinet_sysctl_unregister(struct ipv4_devconf *p); 105#else
106static inline void devinet_sysctl_register(struct in_device *idev)
107{
108}
109static inline void devinet_sysctl_unregister(struct in_device *idev)
110{
111}
104#endif 112#endif
105 113
106/* Locks all the inet devices. */ 114/* Locks all the inet devices. */
@@ -157,24 +165,18 @@ static struct in_device *inetdev_init(struct net_device *dev)
157 if (!in_dev) 165 if (!in_dev)
158 goto out; 166 goto out;
159 INIT_RCU_HEAD(&in_dev->rcu_head); 167 INIT_RCU_HEAD(&in_dev->rcu_head);
160 memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf)); 168 memcpy(&in_dev->cnf, dev->nd_net->ipv4.devconf_dflt,
169 sizeof(in_dev->cnf));
161 in_dev->cnf.sysctl = NULL; 170 in_dev->cnf.sysctl = NULL;
162 in_dev->dev = dev; 171 in_dev->dev = dev;
163 if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL) 172 if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
164 goto out_kfree; 173 goto out_kfree;
165 /* Reference in_dev->dev */ 174 /* Reference in_dev->dev */
166 dev_hold(dev); 175 dev_hold(dev);
167#ifdef CONFIG_SYSCTL
168 neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
169 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
170#endif
171
172 /* Account for reference dev->ip_ptr (below) */ 176 /* Account for reference dev->ip_ptr (below) */
173 in_dev_hold(in_dev); 177 in_dev_hold(in_dev);
174 178
175#ifdef CONFIG_SYSCTL 179 devinet_sysctl_register(in_dev);
176 devinet_sysctl_register(in_dev, &in_dev->cnf);
177#endif
178 ip_mc_init_dev(in_dev); 180 ip_mc_init_dev(in_dev);
179 if (dev->flags & IFF_UP) 181 if (dev->flags & IFF_UP)
180 ip_mc_up(in_dev); 182 ip_mc_up(in_dev);
@@ -213,15 +215,9 @@ static void inetdev_destroy(struct in_device *in_dev)
213 inet_free_ifa(ifa); 215 inet_free_ifa(ifa);
214 } 216 }
215 217
216#ifdef CONFIG_SYSCTL
217 devinet_sysctl_unregister(&in_dev->cnf);
218#endif
219
220 dev->ip_ptr = NULL; 218 dev->ip_ptr = NULL;
221 219
222#ifdef CONFIG_SYSCTL 220 devinet_sysctl_unregister(in_dev);
223 neigh_sysctl_unregister(in_dev->arp_parms);
224#endif
225 neigh_parms_release(&arp_tbl, in_dev->arp_parms); 221 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
226 arp_ifdown(dev); 222 arp_ifdown(dev);
227 223
@@ -408,17 +404,17 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
408 in_dev_hold(in_dev); 404 in_dev_hold(in_dev);
409 ifa->ifa_dev = in_dev; 405 ifa->ifa_dev = in_dev;
410 } 406 }
411 if (LOOPBACK(ifa->ifa_local)) 407 if (ipv4_is_loopback(ifa->ifa_local))
412 ifa->ifa_scope = RT_SCOPE_HOST; 408 ifa->ifa_scope = RT_SCOPE_HOST;
413 return inet_insert_ifa(ifa); 409 return inet_insert_ifa(ifa);
414} 410}
415 411
416struct in_device *inetdev_by_index(int ifindex) 412struct in_device *inetdev_by_index(struct net *net, int ifindex)
417{ 413{
418 struct net_device *dev; 414 struct net_device *dev;
419 struct in_device *in_dev = NULL; 415 struct in_device *in_dev = NULL;
420 read_lock(&dev_base_lock); 416 read_lock(&dev_base_lock);
421 dev = __dev_get_by_index(&init_net, ifindex); 417 dev = __dev_get_by_index(net, ifindex);
422 if (dev) 418 if (dev)
423 in_dev = in_dev_get(dev); 419 in_dev = in_dev_get(dev);
424 read_unlock(&dev_base_lock); 420 read_unlock(&dev_base_lock);
@@ -441,6 +437,7 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
441 437
442static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 438static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
443{ 439{
440 struct net *net = skb->sk->sk_net;
444 struct nlattr *tb[IFA_MAX+1]; 441 struct nlattr *tb[IFA_MAX+1];
445 struct in_device *in_dev; 442 struct in_device *in_dev;
446 struct ifaddrmsg *ifm; 443 struct ifaddrmsg *ifm;
@@ -449,12 +446,15 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
449 446
450 ASSERT_RTNL(); 447 ASSERT_RTNL();
451 448
449 if (net != &init_net)
450 return -EINVAL;
451
452 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); 452 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
453 if (err < 0) 453 if (err < 0)
454 goto errout; 454 goto errout;
455 455
456 ifm = nlmsg_data(nlh); 456 ifm = nlmsg_data(nlh);
457 in_dev = inetdev_by_index(ifm->ifa_index); 457 in_dev = inetdev_by_index(net, ifm->ifa_index);
458 if (in_dev == NULL) { 458 if (in_dev == NULL) {
459 err = -ENODEV; 459 err = -ENODEV;
460 goto errout; 460 goto errout;
@@ -560,10 +560,14 @@ errout:
560 560
561static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 561static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
562{ 562{
563 struct net *net = skb->sk->sk_net;
563 struct in_ifaddr *ifa; 564 struct in_ifaddr *ifa;
564 565
565 ASSERT_RTNL(); 566 ASSERT_RTNL();
566 567
568 if (net != &init_net)
569 return -EINVAL;
570
567 ifa = rtm_to_ifaddr(nlh); 571 ifa = rtm_to_ifaddr(nlh);
568 if (IS_ERR(ifa)) 572 if (IS_ERR(ifa))
569 return PTR_ERR(ifa); 573 return PTR_ERR(ifa);
@@ -579,7 +583,7 @@ static __inline__ int inet_abc_len(__be32 addr)
579{ 583{
580 int rc = -1; /* Something else, probably a multicast. */ 584 int rc = -1; /* Something else, probably a multicast. */
581 585
582 if (ZERONET(addr)) 586 if (ipv4_is_zeronet(addr))
583 rc = 0; 587 rc = 0;
584 else { 588 else {
585 __u32 haddr = ntohl(addr); 589 __u32 haddr = ntohl(addr);
@@ -964,28 +968,25 @@ static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
964 968
965/* 969/*
966 * Confirm that local IP address exists using wildcards: 970 * Confirm that local IP address exists using wildcards:
967 * - dev: only on this interface, 0=any interface 971 * - in_dev: only on this interface, 0=any interface
968 * - dst: only in the same subnet as dst, 0=any dst 972 * - dst: only in the same subnet as dst, 0=any dst
969 * - local: address, 0=autoselect the local address 973 * - local: address, 0=autoselect the local address
970 * - scope: maximum allowed scope value for the local address 974 * - scope: maximum allowed scope value for the local address
971 */ 975 */
972__be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope) 976__be32 inet_confirm_addr(struct in_device *in_dev,
977 __be32 dst, __be32 local, int scope)
973{ 978{
974 __be32 addr = 0; 979 __be32 addr = 0;
975 struct in_device *in_dev; 980 struct net_device *dev;
976 981 struct net *net;
977 if (dev) {
978 rcu_read_lock();
979 if ((in_dev = __in_dev_get_rcu(dev)))
980 addr = confirm_addr_indev(in_dev, dst, local, scope);
981 rcu_read_unlock();
982 982
983 return addr; 983 if (scope != RT_SCOPE_LINK)
984 } 984 return confirm_addr_indev(in_dev, dst, local, scope);
985 985
986 net = in_dev->dev->nd_net;
986 read_lock(&dev_base_lock); 987 read_lock(&dev_base_lock);
987 rcu_read_lock(); 988 rcu_read_lock();
988 for_each_netdev(&init_net, dev) { 989 for_each_netdev(net, dev) {
989 if ((in_dev = __in_dev_get_rcu(dev))) { 990 if ((in_dev = __in_dev_get_rcu(dev))) {
990 addr = confirm_addr_indev(in_dev, dst, local, scope); 991 addr = confirm_addr_indev(in_dev, dst, local, scope);
991 if (addr) 992 if (addr)
@@ -1106,13 +1107,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1106 */ 1107 */
1107 inetdev_changename(dev, in_dev); 1108 inetdev_changename(dev, in_dev);
1108 1109
1109#ifdef CONFIG_SYSCTL 1110 devinet_sysctl_unregister(in_dev);
1110 devinet_sysctl_unregister(&in_dev->cnf); 1111 devinet_sysctl_register(in_dev);
1111 neigh_sysctl_unregister(in_dev->arp_parms);
1112 neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
1113 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1114 devinet_sysctl_register(in_dev, &in_dev->cnf);
1115#endif
1116 break; 1112 break;
1117 } 1113 }
1118out: 1114out:
@@ -1174,12 +1170,16 @@ nla_put_failure:
1174 1170
1175static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) 1171static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1176{ 1172{
1173 struct net *net = skb->sk->sk_net;
1177 int idx, ip_idx; 1174 int idx, ip_idx;
1178 struct net_device *dev; 1175 struct net_device *dev;
1179 struct in_device *in_dev; 1176 struct in_device *in_dev;
1180 struct in_ifaddr *ifa; 1177 struct in_ifaddr *ifa;
1181 int s_ip_idx, s_idx = cb->args[0]; 1178 int s_ip_idx, s_idx = cb->args[0];
1182 1179
1180 if (net != &init_net)
1181 return 0;
1182
1183 s_ip_idx = ip_idx = cb->args[1]; 1183 s_ip_idx = ip_idx = cb->args[1];
1184 idx = 0; 1184 idx = 0;
1185 for_each_netdev(&init_net, dev) { 1185 for_each_netdev(&init_net, dev) {
@@ -1228,28 +1228,50 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1228 kfree_skb(skb); 1228 kfree_skb(skb);
1229 goto errout; 1229 goto errout;
1230 } 1230 }
1231 err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); 1231 err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1232errout: 1232errout:
1233 if (err < 0) 1233 if (err < 0)
1234 rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err); 1234 rtnl_set_sk_err(&init_net, RTNLGRP_IPV4_IFADDR, err);
1235} 1235}
1236 1236
1237#ifdef CONFIG_SYSCTL 1237#ifdef CONFIG_SYSCTL
1238 1238
1239static void devinet_copy_dflt_conf(int i) 1239static void devinet_copy_dflt_conf(struct net *net, int i)
1240{ 1240{
1241 struct net_device *dev; 1241 struct net_device *dev;
1242 1242
1243 read_lock(&dev_base_lock); 1243 read_lock(&dev_base_lock);
1244 for_each_netdev(&init_net, dev) { 1244 for_each_netdev(net, dev) {
1245 struct in_device *in_dev; 1245 struct in_device *in_dev;
1246 rcu_read_lock(); 1246 rcu_read_lock();
1247 in_dev = __in_dev_get_rcu(dev); 1247 in_dev = __in_dev_get_rcu(dev);
1248 if (in_dev && !test_bit(i, in_dev->cnf.state)) 1248 if (in_dev && !test_bit(i, in_dev->cnf.state))
1249 in_dev->cnf.data[i] = ipv4_devconf_dflt.data[i]; 1249 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1250 rcu_read_unlock();
1251 }
1252 read_unlock(&dev_base_lock);
1253}
1254
1255static void inet_forward_change(struct net *net)
1256{
1257 struct net_device *dev;
1258 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1259
1260 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1261 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1262
1263 read_lock(&dev_base_lock);
1264 for_each_netdev(net, dev) {
1265 struct in_device *in_dev;
1266 rcu_read_lock();
1267 in_dev = __in_dev_get_rcu(dev);
1268 if (in_dev)
1269 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1250 rcu_read_unlock(); 1270 rcu_read_unlock();
1251 } 1271 }
1252 read_unlock(&dev_base_lock); 1272 read_unlock(&dev_base_lock);
1273
1274 rt_cache_flush(0);
1253} 1275}
1254 1276
1255static int devinet_conf_proc(ctl_table *ctl, int write, 1277static int devinet_conf_proc(ctl_table *ctl, int write,
@@ -1260,12 +1282,13 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
1260 1282
1261 if (write) { 1283 if (write) {
1262 struct ipv4_devconf *cnf = ctl->extra1; 1284 struct ipv4_devconf *cnf = ctl->extra1;
1285 struct net *net = ctl->extra2;
1263 int i = (int *)ctl->data - cnf->data; 1286 int i = (int *)ctl->data - cnf->data;
1264 1287
1265 set_bit(i, cnf->state); 1288 set_bit(i, cnf->state);
1266 1289
1267 if (cnf == &ipv4_devconf_dflt) 1290 if (cnf == net->ipv4.devconf_dflt)
1268 devinet_copy_dflt_conf(i); 1291 devinet_copy_dflt_conf(net, i);
1269 } 1292 }
1270 1293
1271 return ret; 1294 return ret;
@@ -1276,6 +1299,7 @@ static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1276 void __user *newval, size_t newlen) 1299 void __user *newval, size_t newlen)
1277{ 1300{
1278 struct ipv4_devconf *cnf; 1301 struct ipv4_devconf *cnf;
1302 struct net *net;
1279 int *valp = table->data; 1303 int *valp = table->data;
1280 int new; 1304 int new;
1281 int i; 1305 int i;
@@ -1311,38 +1335,17 @@ static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1311 *valp = new; 1335 *valp = new;
1312 1336
1313 cnf = table->extra1; 1337 cnf = table->extra1;
1338 net = table->extra2;
1314 i = (int *)table->data - cnf->data; 1339 i = (int *)table->data - cnf->data;
1315 1340
1316 set_bit(i, cnf->state); 1341 set_bit(i, cnf->state);
1317 1342
1318 if (cnf == &ipv4_devconf_dflt) 1343 if (cnf == net->ipv4.devconf_dflt)
1319 devinet_copy_dflt_conf(i); 1344 devinet_copy_dflt_conf(net, i);
1320 1345
1321 return 1; 1346 return 1;
1322} 1347}
1323 1348
1324void inet_forward_change(void)
1325{
1326 struct net_device *dev;
1327 int on = IPV4_DEVCONF_ALL(FORWARDING);
1328
1329 IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on;
1330 IPV4_DEVCONF_DFLT(FORWARDING) = on;
1331
1332 read_lock(&dev_base_lock);
1333 for_each_netdev(&init_net, dev) {
1334 struct in_device *in_dev;
1335 rcu_read_lock();
1336 in_dev = __in_dev_get_rcu(dev);
1337 if (in_dev)
1338 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1339 rcu_read_unlock();
1340 }
1341 read_unlock(&dev_base_lock);
1342
1343 rt_cache_flush(0);
1344}
1345
1346static int devinet_sysctl_forward(ctl_table *ctl, int write, 1349static int devinet_sysctl_forward(ctl_table *ctl, int write,
1347 struct file* filp, void __user *buffer, 1350 struct file* filp, void __user *buffer,
1348 size_t *lenp, loff_t *ppos) 1351 size_t *lenp, loff_t *ppos)
@@ -1352,9 +1355,11 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
1352 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 1355 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1353 1356
1354 if (write && *valp != val) { 1357 if (write && *valp != val) {
1355 if (valp == &IPV4_DEVCONF_ALL(FORWARDING)) 1358 struct net *net = ctl->extra2;
1356 inet_forward_change(); 1359
1357 else if (valp != &IPV4_DEVCONF_DFLT(FORWARDING)) 1360 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
1361 inet_forward_change(net);
1362 else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
1358 rt_cache_flush(0); 1363 rt_cache_flush(0);
1359 } 1364 }
1360 1365
@@ -1419,11 +1424,8 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1419 1424
1420static struct devinet_sysctl_table { 1425static struct devinet_sysctl_table {
1421 struct ctl_table_header *sysctl_header; 1426 struct ctl_table_header *sysctl_header;
1422 ctl_table devinet_vars[__NET_IPV4_CONF_MAX]; 1427 struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1423 ctl_table devinet_dev[2]; 1428 char *dev_name;
1424 ctl_table devinet_conf_dir[2];
1425 ctl_table devinet_proto_dir[2];
1426 ctl_table devinet_root_dir[2];
1427} devinet_sysctl = { 1429} devinet_sysctl = {
1428 .devinet_vars = { 1430 .devinet_vars = {
1429 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", 1431 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
@@ -1455,62 +1457,32 @@ static struct devinet_sysctl_table {
1455 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES, 1457 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1456 "promote_secondaries"), 1458 "promote_secondaries"),
1457 }, 1459 },
1458 .devinet_dev = {
1459 {
1460 .ctl_name = NET_PROTO_CONF_ALL,
1461 .procname = "all",
1462 .mode = 0555,
1463 .child = devinet_sysctl.devinet_vars,
1464 },
1465 },
1466 .devinet_conf_dir = {
1467 {
1468 .ctl_name = NET_IPV4_CONF,
1469 .procname = "conf",
1470 .mode = 0555,
1471 .child = devinet_sysctl.devinet_dev,
1472 },
1473 },
1474 .devinet_proto_dir = {
1475 {
1476 .ctl_name = NET_IPV4,
1477 .procname = "ipv4",
1478 .mode = 0555,
1479 .child = devinet_sysctl.devinet_conf_dir,
1480 },
1481 },
1482 .devinet_root_dir = {
1483 {
1484 .ctl_name = CTL_NET,
1485 .procname = "net",
1486 .mode = 0555,
1487 .child = devinet_sysctl.devinet_proto_dir,
1488 },
1489 },
1490}; 1460};
1491 1461
1492static void devinet_sysctl_register(struct in_device *in_dev, 1462static int __devinet_sysctl_register(struct net *net, char *dev_name,
1493 struct ipv4_devconf *p) 1463 int ctl_name, struct ipv4_devconf *p)
1494{ 1464{
1495 int i; 1465 int i;
1496 struct net_device *dev = in_dev ? in_dev->dev : NULL; 1466 struct devinet_sysctl_table *t;
1497 struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t),
1498 GFP_KERNEL);
1499 char *dev_name = NULL;
1500 1467
1468#define DEVINET_CTL_PATH_DEV 3
1469
1470 struct ctl_path devinet_ctl_path[] = {
1471 { .procname = "net", .ctl_name = CTL_NET, },
1472 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1473 { .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1474 { /* to be set */ },
1475 { },
1476 };
1477
1478 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1501 if (!t) 1479 if (!t)
1502 return; 1480 goto out;
1481
1503 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) { 1482 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1504 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf; 1483 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1505 t->devinet_vars[i].extra1 = p; 1484 t->devinet_vars[i].extra1 = p;
1506 } 1485 t->devinet_vars[i].extra2 = net;
1507
1508 if (dev) {
1509 dev_name = dev->name;
1510 t->devinet_dev[0].ctl_name = dev->ifindex;
1511 } else {
1512 dev_name = "default";
1513 t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
1514 } 1486 }
1515 1487
1516 /* 1488 /*
@@ -1518,56 +1490,183 @@ static void devinet_sysctl_register(struct in_device *in_dev,
1518 * by sysctl and we wouldn't want anyone to change it under our feet 1490 * by sysctl and we wouldn't want anyone to change it under our feet
1519 * (see SIOCSIFNAME). 1491 * (see SIOCSIFNAME).
1520 */ 1492 */
1521 dev_name = kstrdup(dev_name, GFP_KERNEL); 1493 t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1522 if (!dev_name) 1494 if (!t->dev_name)
1523 goto free; 1495 goto free;
1524 1496
1525 t->devinet_dev[0].procname = dev_name; 1497 devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1526 t->devinet_dev[0].child = t->devinet_vars; 1498 devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1527 t->devinet_conf_dir[0].child = t->devinet_dev;
1528 t->devinet_proto_dir[0].child = t->devinet_conf_dir;
1529 t->devinet_root_dir[0].child = t->devinet_proto_dir;
1530 1499
1531 t->sysctl_header = register_sysctl_table(t->devinet_root_dir); 1500 t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1501 t->devinet_vars);
1532 if (!t->sysctl_header) 1502 if (!t->sysctl_header)
1533 goto free_procname; 1503 goto free_procname;
1534 1504
1535 p->sysctl = t; 1505 p->sysctl = t;
1536 return; 1506 return 0;
1537 1507
1538 /* error path */ 1508free_procname:
1539 free_procname: 1509 kfree(t->dev_name);
1540 kfree(dev_name); 1510free:
1541 free:
1542 kfree(t); 1511 kfree(t);
1543 return; 1512out:
1513 return -ENOBUFS;
1544} 1514}
1545 1515
1546static void devinet_sysctl_unregister(struct ipv4_devconf *p) 1516static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1517{
1518 struct devinet_sysctl_table *t = cnf->sysctl;
1519
1520 if (t == NULL)
1521 return;
1522
1523 cnf->sysctl = NULL;
1524 unregister_sysctl_table(t->sysctl_header);
1525 kfree(t->dev_name);
1526 kfree(t);
1527}
1528
1529static void devinet_sysctl_register(struct in_device *idev)
1530{
1531 neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1532 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1533 __devinet_sysctl_register(idev->dev->nd_net, idev->dev->name,
1534 idev->dev->ifindex, &idev->cnf);
1535}
1536
1537static void devinet_sysctl_unregister(struct in_device *idev)
1538{
1539 __devinet_sysctl_unregister(&idev->cnf);
1540 neigh_sysctl_unregister(idev->arp_parms);
1541}
1542
1543static struct ctl_table ctl_forward_entry[] = {
1544 {
1545 .ctl_name = NET_IPV4_FORWARD,
1546 .procname = "ip_forward",
1547 .data = &ipv4_devconf.data[
1548 NET_IPV4_CONF_FORWARDING - 1],
1549 .maxlen = sizeof(int),
1550 .mode = 0644,
1551 .proc_handler = devinet_sysctl_forward,
1552 .strategy = devinet_conf_sysctl,
1553 .extra1 = &ipv4_devconf,
1554 .extra2 = &init_net,
1555 },
1556 { },
1557};
1558
1559static __net_initdata struct ctl_path net_ipv4_path[] = {
1560 { .procname = "net", .ctl_name = CTL_NET, },
1561 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1562 { },
1563};
1564#endif
1565
1566static __net_init int devinet_init_net(struct net *net)
1547{ 1567{
1548 if (p->sysctl) { 1568 int err;
1549 struct devinet_sysctl_table *t = p->sysctl; 1569 struct ipv4_devconf *all, *dflt;
1550 p->sysctl = NULL; 1570#ifdef CONFIG_SYSCTL
1551 unregister_sysctl_table(t->sysctl_header); 1571 struct ctl_table *tbl = ctl_forward_entry;
1552 kfree(t->devinet_dev[0].procname); 1572 struct ctl_table_header *forw_hdr;
1553 kfree(t); 1573#endif
1574
1575 err = -ENOMEM;
1576 all = &ipv4_devconf;
1577 dflt = &ipv4_devconf_dflt;
1578
1579 if (net != &init_net) {
1580 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1581 if (all == NULL)
1582 goto err_alloc_all;
1583
1584 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1585 if (dflt == NULL)
1586 goto err_alloc_dflt;
1587
1588#ifdef CONFIG_SYSCTL
1589 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1590 if (tbl == NULL)
1591 goto err_alloc_ctl;
1592
1593 tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1594 tbl[0].extra1 = all;
1595 tbl[0].extra2 = net;
1596#endif
1554 } 1597 }
1598
1599#ifdef CONFIG_SYSCTL
1600 err = __devinet_sysctl_register(net, "all",
1601 NET_PROTO_CONF_ALL, all);
1602 if (err < 0)
1603 goto err_reg_all;
1604
1605 err = __devinet_sysctl_register(net, "default",
1606 NET_PROTO_CONF_DEFAULT, dflt);
1607 if (err < 0)
1608 goto err_reg_dflt;
1609
1610 err = -ENOMEM;
1611 forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1612 if (forw_hdr == NULL)
1613 goto err_reg_ctl;
1614 net->ipv4.forw_hdr = forw_hdr;
1615#endif
1616
1617 net->ipv4.devconf_all = all;
1618 net->ipv4.devconf_dflt = dflt;
1619 return 0;
1620
1621#ifdef CONFIG_SYSCTL
1622err_reg_ctl:
1623 __devinet_sysctl_unregister(dflt);
1624err_reg_dflt:
1625 __devinet_sysctl_unregister(all);
1626err_reg_all:
1627 if (tbl != ctl_forward_entry)
1628 kfree(tbl);
1629err_alloc_ctl:
1630#endif
1631 if (dflt != &ipv4_devconf_dflt)
1632 kfree(dflt);
1633err_alloc_dflt:
1634 if (all != &ipv4_devconf)
1635 kfree(all);
1636err_alloc_all:
1637 return err;
1555} 1638}
1639
1640static __net_exit void devinet_exit_net(struct net *net)
1641{
1642#ifdef CONFIG_SYSCTL
1643 struct ctl_table *tbl;
1644
1645 tbl = net->ipv4.forw_hdr->ctl_table_arg;
1646 unregister_net_sysctl_table(net->ipv4.forw_hdr);
1647 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1648 __devinet_sysctl_unregister(net->ipv4.devconf_all);
1649 kfree(tbl);
1556#endif 1650#endif
1651 kfree(net->ipv4.devconf_dflt);
1652 kfree(net->ipv4.devconf_all);
1653}
1654
1655static __net_initdata struct pernet_operations devinet_ops = {
1656 .init = devinet_init_net,
1657 .exit = devinet_exit_net,
1658};
1557 1659
1558void __init devinet_init(void) 1660void __init devinet_init(void)
1559{ 1661{
1662 register_pernet_subsys(&devinet_ops);
1663
1560 register_gifconf(PF_INET, inet_gifconf); 1664 register_gifconf(PF_INET, inet_gifconf);
1561 register_netdevice_notifier(&ip_netdev_notifier); 1665 register_netdevice_notifier(&ip_netdev_notifier);
1562 1666
1563 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); 1667 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1564 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); 1668 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1565 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); 1669 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1566#ifdef CONFIG_SYSCTL
1567 devinet_sysctl.sysctl_header =
1568 register_sysctl_table(devinet_sysctl.devinet_root_dir);
1569 devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
1570#endif
1571} 1670}
1572 1671
1573EXPORT_SYMBOL(in_dev_finish_destroy); 1672EXPORT_SYMBOL(in_dev_finish_destroy);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 1738113268bc..28ea5c77ca23 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -163,7 +163,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
163 u8 nexthdr[2]; 163 u8 nexthdr[2];
164 struct scatterlist *sg; 164 struct scatterlist *sg;
165 int padlen; 165 int padlen;
166 int err; 166 int err = -EINVAL;
167 167
168 if (!pskb_may_pull(skb, sizeof(*esph))) 168 if (!pskb_may_pull(skb, sizeof(*esph)))
169 goto out; 169 goto out;
@@ -171,28 +171,31 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
171 if (elen <= 0 || (elen & (blksize-1))) 171 if (elen <= 0 || (elen & (blksize-1)))
172 goto out; 172 goto out;
173 173
174 if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
175 goto out;
176 nfrags = err;
177
178 skb->ip_summed = CHECKSUM_NONE;
179
180 spin_lock(&x->lock);
181
174 /* If integrity check is required, do this. */ 182 /* If integrity check is required, do this. */
175 if (esp->auth.icv_full_len) { 183 if (esp->auth.icv_full_len) {
176 u8 sum[alen]; 184 u8 sum[alen];
177 185
178 err = esp_mac_digest(esp, skb, 0, skb->len - alen); 186 err = esp_mac_digest(esp, skb, 0, skb->len - alen);
179 if (err) 187 if (err)
180 goto out; 188 goto unlock;
181 189
182 if (skb_copy_bits(skb, skb->len - alen, sum, alen)) 190 if (skb_copy_bits(skb, skb->len - alen, sum, alen))
183 BUG(); 191 BUG();
184 192
185 if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) { 193 if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) {
186 x->stats.integrity_failed++; 194 err = -EBADMSG;
187 goto out; 195 goto unlock;
188 } 196 }
189 } 197 }
190 198
191 if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0)
192 goto out;
193
194 skb->ip_summed = CHECKSUM_NONE;
195
196 esph = (struct ip_esp_hdr *)skb->data; 199 esph = (struct ip_esp_hdr *)skb->data;
197 200
198 /* Get ivec. This can be wrong, check against another impls. */ 201 /* Get ivec. This can be wrong, check against another impls. */
@@ -202,9 +205,10 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
202 sg = &esp->sgbuf[0]; 205 sg = &esp->sgbuf[0];
203 206
204 if (unlikely(nfrags > ESP_NUM_FAST_SG)) { 207 if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
208 err = -ENOMEM;
205 sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC); 209 sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
206 if (!sg) 210 if (!sg)
207 goto out; 211 goto unlock;
208 } 212 }
209 sg_init_table(sg, nfrags); 213 sg_init_table(sg, nfrags);
210 skb_to_sgvec(skb, sg, 214 skb_to_sgvec(skb, sg,
@@ -213,12 +217,17 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
213 err = crypto_blkcipher_decrypt(&desc, sg, sg, elen); 217 err = crypto_blkcipher_decrypt(&desc, sg, sg, elen);
214 if (unlikely(sg != &esp->sgbuf[0])) 218 if (unlikely(sg != &esp->sgbuf[0]))
215 kfree(sg); 219 kfree(sg);
220
221unlock:
222 spin_unlock(&x->lock);
223
216 if (unlikely(err)) 224 if (unlikely(err))
217 return err; 225 goto out;
218 226
219 if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2)) 227 if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
220 BUG(); 228 BUG();
221 229
230 err = -EINVAL;
222 padlen = nexthdr[0]; 231 padlen = nexthdr[0];
223 if (padlen+2 >= elen) 232 if (padlen+2 >= elen)
224 goto out; 233 goto out;
@@ -276,7 +285,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
276 return nexthdr[1]; 285 return nexthdr[1];
277 286
278out: 287out:
279 return -EINVAL; 288 return err;
280} 289}
281 290
282static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) 291static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 97abf934d185..d28261826bc2 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -47,59 +47,65 @@
47#include <net/ip_fib.h> 47#include <net/ip_fib.h>
48#include <net/rtnetlink.h> 48#include <net/rtnetlink.h>
49 49
50#define FFprint(a...) printk(KERN_DEBUG a) 50#ifndef CONFIG_IP_MULTIPLE_TABLES
51 51
52static struct sock *fibnl; 52static int __net_init fib4_rules_init(struct net *net)
53{
54 struct fib_table *local_table, *main_table;
53 55
54#ifndef CONFIG_IP_MULTIPLE_TABLES 56 local_table = fib_hash_table(RT_TABLE_LOCAL);
57 if (local_table == NULL)
58 return -ENOMEM;
55 59
56struct fib_table *ip_fib_local_table; 60 main_table = fib_hash_table(RT_TABLE_MAIN);
57struct fib_table *ip_fib_main_table; 61 if (main_table == NULL)
62 goto fail;
58 63
59#define FIB_TABLE_HASHSZ 1 64 hlist_add_head_rcu(&local_table->tb_hlist,
60static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; 65 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
66 hlist_add_head_rcu(&main_table->tb_hlist,
67 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
68 return 0;
61 69
62static void __init fib4_rules_init(void) 70fail:
63{ 71 kfree(local_table);
64 ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); 72 return -ENOMEM;
65 hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
66 ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
67 hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
68} 73}
69#else 74#else
70 75
71#define FIB_TABLE_HASHSZ 256 76struct fib_table *fib_new_table(struct net *net, u32 id)
72static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
73
74struct fib_table *fib_new_table(u32 id)
75{ 77{
76 struct fib_table *tb; 78 struct fib_table *tb;
77 unsigned int h; 79 unsigned int h;
78 80
79 if (id == 0) 81 if (id == 0)
80 id = RT_TABLE_MAIN; 82 id = RT_TABLE_MAIN;
81 tb = fib_get_table(id); 83 tb = fib_get_table(net, id);
82 if (tb) 84 if (tb)
83 return tb; 85 return tb;
84 tb = fib_hash_init(id); 86
87 tb = fib_hash_table(id);
85 if (!tb) 88 if (!tb)
86 return NULL; 89 return NULL;
87 h = id & (FIB_TABLE_HASHSZ - 1); 90 h = id & (FIB_TABLE_HASHSZ - 1);
88 hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]); 91 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
89 return tb; 92 return tb;
90} 93}
91 94
92struct fib_table *fib_get_table(u32 id) 95struct fib_table *fib_get_table(struct net *net, u32 id)
93{ 96{
94 struct fib_table *tb; 97 struct fib_table *tb;
95 struct hlist_node *node; 98 struct hlist_node *node;
99 struct hlist_head *head;
96 unsigned int h; 100 unsigned int h;
97 101
98 if (id == 0) 102 if (id == 0)
99 id = RT_TABLE_MAIN; 103 id = RT_TABLE_MAIN;
100 h = id & (FIB_TABLE_HASHSZ - 1); 104 h = id & (FIB_TABLE_HASHSZ - 1);
105
101 rcu_read_lock(); 106 rcu_read_lock();
102 hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) { 107 head = &net->ipv4.fib_table_hash[h];
108 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
103 if (tb->tb_id == id) { 109 if (tb->tb_id == id) {
104 rcu_read_unlock(); 110 rcu_read_unlock();
105 return tb; 111 return tb;
@@ -110,15 +116,32 @@ struct fib_table *fib_get_table(u32 id)
110} 116}
111#endif /* CONFIG_IP_MULTIPLE_TABLES */ 117#endif /* CONFIG_IP_MULTIPLE_TABLES */
112 118
113static void fib_flush(void) 119void fib_select_default(struct net *net,
120 const struct flowi *flp, struct fib_result *res)
121{
122 struct fib_table *tb;
123 int table = RT_TABLE_MAIN;
124#ifdef CONFIG_IP_MULTIPLE_TABLES
125 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
126 return;
127 table = res->r->table;
128#endif
129 tb = fib_get_table(net, table);
130 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
131 tb->tb_select_default(tb, flp, res);
132}
133
134static void fib_flush(struct net *net)
114{ 135{
115 int flushed = 0; 136 int flushed = 0;
116 struct fib_table *tb; 137 struct fib_table *tb;
117 struct hlist_node *node; 138 struct hlist_node *node;
139 struct hlist_head *head;
118 unsigned int h; 140 unsigned int h;
119 141
120 for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 142 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
121 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) 143 head = &net->ipv4.fib_table_hash[h];
144 hlist_for_each_entry(tb, node, head, tb_hlist)
122 flushed += tb->tb_flush(tb); 145 flushed += tb->tb_flush(tb);
123 } 146 }
124 147
@@ -130,7 +153,7 @@ static void fib_flush(void)
130 * Find the first device with a given source address. 153 * Find the first device with a given source address.
131 */ 154 */
132 155
133struct net_device * ip_dev_find(__be32 addr) 156struct net_device * ip_dev_find(struct net *net, __be32 addr)
134{ 157{
135 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 158 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
136 struct fib_result res; 159 struct fib_result res;
@@ -141,7 +164,7 @@ struct net_device * ip_dev_find(__be32 addr)
141 res.r = NULL; 164 res.r = NULL;
142#endif 165#endif
143 166
144 local_table = fib_get_table(RT_TABLE_LOCAL); 167 local_table = fib_get_table(net, RT_TABLE_LOCAL);
145 if (!local_table || local_table->tb_lookup(local_table, &fl, &res)) 168 if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
146 return NULL; 169 return NULL;
147 if (res.type != RTN_LOCAL) 170 if (res.type != RTN_LOCAL)
@@ -155,33 +178,51 @@ out:
155 return dev; 178 return dev;
156} 179}
157 180
158unsigned inet_addr_type(__be32 addr) 181/*
182 * Find address type as if only "dev" was present in the system. If
183 * on_dev is NULL then all interfaces are taken into consideration.
184 */
185static inline unsigned __inet_dev_addr_type(struct net *net,
186 const struct net_device *dev,
187 __be32 addr)
159{ 188{
160 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 189 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
161 struct fib_result res; 190 struct fib_result res;
162 unsigned ret = RTN_BROADCAST; 191 unsigned ret = RTN_BROADCAST;
163 struct fib_table *local_table; 192 struct fib_table *local_table;
164 193
165 if (ZERONET(addr) || BADCLASS(addr)) 194 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
166 return RTN_BROADCAST; 195 return RTN_BROADCAST;
167 if (MULTICAST(addr)) 196 if (ipv4_is_multicast(addr))
168 return RTN_MULTICAST; 197 return RTN_MULTICAST;
169 198
170#ifdef CONFIG_IP_MULTIPLE_TABLES 199#ifdef CONFIG_IP_MULTIPLE_TABLES
171 res.r = NULL; 200 res.r = NULL;
172#endif 201#endif
173 202
174 local_table = fib_get_table(RT_TABLE_LOCAL); 203 local_table = fib_get_table(net, RT_TABLE_LOCAL);
175 if (local_table) { 204 if (local_table) {
176 ret = RTN_UNICAST; 205 ret = RTN_UNICAST;
177 if (!local_table->tb_lookup(local_table, &fl, &res)) { 206 if (!local_table->tb_lookup(local_table, &fl, &res)) {
178 ret = res.type; 207 if (!dev || dev == res.fi->fib_dev)
208 ret = res.type;
179 fib_res_put(&res); 209 fib_res_put(&res);
180 } 210 }
181 } 211 }
182 return ret; 212 return ret;
183} 213}
184 214
215unsigned int inet_addr_type(struct net *net, __be32 addr)
216{
217 return __inet_dev_addr_type(net, NULL, addr);
218}
219
220unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
221 __be32 addr)
222{
223 return __inet_dev_addr_type(net, dev, addr);
224}
225
185/* Given (packet source, input interface) and optional (dst, oif, tos): 226/* Given (packet source, input interface) and optional (dst, oif, tos):
186 - (main) check, that source is valid i.e. not broadcast or our local 227 - (main) check, that source is valid i.e. not broadcast or our local
187 address. 228 address.
@@ -202,6 +243,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
202 struct fib_result res; 243 struct fib_result res;
203 int no_addr, rpf; 244 int no_addr, rpf;
204 int ret; 245 int ret;
246 struct net *net;
205 247
206 no_addr = rpf = 0; 248 no_addr = rpf = 0;
207 rcu_read_lock(); 249 rcu_read_lock();
@@ -215,7 +257,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
215 if (in_dev == NULL) 257 if (in_dev == NULL)
216 goto e_inval; 258 goto e_inval;
217 259
218 if (fib_lookup(&fl, &res)) 260 net = dev->nd_net;
261 if (fib_lookup(net, &fl, &res))
219 goto last_resort; 262 goto last_resort;
220 if (res.type != RTN_UNICAST) 263 if (res.type != RTN_UNICAST)
221 goto e_inval_res; 264 goto e_inval_res;
@@ -239,7 +282,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
239 fl.oif = dev->ifindex; 282 fl.oif = dev->ifindex;
240 283
241 ret = 0; 284 ret = 0;
242 if (fib_lookup(&fl, &res) == 0) { 285 if (fib_lookup(net, &fl, &res) == 0) {
243 if (res.type == RTN_UNICAST) { 286 if (res.type == RTN_UNICAST) {
244 *spec_dst = FIB_RES_PREFSRC(res); 287 *spec_dst = FIB_RES_PREFSRC(res);
245 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 288 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
@@ -278,13 +321,14 @@ static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
278 return len + nla_total_size(4); 321 return len + nla_total_size(4);
279} 322}
280 323
281static int rtentry_to_fib_config(int cmd, struct rtentry *rt, 324static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
282 struct fib_config *cfg) 325 struct fib_config *cfg)
283{ 326{
284 __be32 addr; 327 __be32 addr;
285 int plen; 328 int plen;
286 329
287 memset(cfg, 0, sizeof(*cfg)); 330 memset(cfg, 0, sizeof(*cfg));
331 cfg->fc_nlinfo.nl_net = net;
288 332
289 if (rt->rt_dst.sa_family != AF_INET) 333 if (rt->rt_dst.sa_family != AF_INET)
290 return -EAFNOSUPPORT; 334 return -EAFNOSUPPORT;
@@ -345,7 +389,7 @@ static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
345 colon = strchr(devname, ':'); 389 colon = strchr(devname, ':');
346 if (colon) 390 if (colon)
347 *colon = 0; 391 *colon = 0;
348 dev = __dev_get_by_name(&init_net, devname); 392 dev = __dev_get_by_name(net, devname);
349 if (!dev) 393 if (!dev)
350 return -ENODEV; 394 return -ENODEV;
351 cfg->fc_oif = dev->ifindex; 395 cfg->fc_oif = dev->ifindex;
@@ -368,7 +412,7 @@ static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
368 if (rt->rt_gateway.sa_family == AF_INET && addr) { 412 if (rt->rt_gateway.sa_family == AF_INET && addr) {
369 cfg->fc_gw = addr; 413 cfg->fc_gw = addr;
370 if (rt->rt_flags & RTF_GATEWAY && 414 if (rt->rt_flags & RTF_GATEWAY &&
371 inet_addr_type(addr) == RTN_UNICAST) 415 inet_addr_type(net, addr) == RTN_UNICAST)
372 cfg->fc_scope = RT_SCOPE_UNIVERSE; 416 cfg->fc_scope = RT_SCOPE_UNIVERSE;
373 } 417 }
374 418
@@ -409,7 +453,7 @@ static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
409 * Handle IP routing ioctl calls. These are used to manipulate the routing tables 453 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
410 */ 454 */
411 455
412int ip_rt_ioctl(unsigned int cmd, void __user *arg) 456int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
413{ 457{
414 struct fib_config cfg; 458 struct fib_config cfg;
415 struct rtentry rt; 459 struct rtentry rt;
@@ -425,18 +469,18 @@ int ip_rt_ioctl(unsigned int cmd, void __user *arg)
425 return -EFAULT; 469 return -EFAULT;
426 470
427 rtnl_lock(); 471 rtnl_lock();
428 err = rtentry_to_fib_config(cmd, &rt, &cfg); 472 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
429 if (err == 0) { 473 if (err == 0) {
430 struct fib_table *tb; 474 struct fib_table *tb;
431 475
432 if (cmd == SIOCDELRT) { 476 if (cmd == SIOCDELRT) {
433 tb = fib_get_table(cfg.fc_table); 477 tb = fib_get_table(net, cfg.fc_table);
434 if (tb) 478 if (tb)
435 err = tb->tb_delete(tb, &cfg); 479 err = tb->tb_delete(tb, &cfg);
436 else 480 else
437 err = -ESRCH; 481 err = -ESRCH;
438 } else { 482 } else {
439 tb = fib_new_table(cfg.fc_table); 483 tb = fib_new_table(net, cfg.fc_table);
440 if (tb) 484 if (tb)
441 err = tb->tb_insert(tb, &cfg); 485 err = tb->tb_insert(tb, &cfg);
442 else 486 else
@@ -466,8 +510,8 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
466 [RTA_FLOW] = { .type = NLA_U32 }, 510 [RTA_FLOW] = { .type = NLA_U32 },
467}; 511};
468 512
469static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, 513static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
470 struct fib_config *cfg) 514 struct nlmsghdr *nlh, struct fib_config *cfg)
471{ 515{
472 struct nlattr *attr; 516 struct nlattr *attr;
473 int err, remaining; 517 int err, remaining;
@@ -491,6 +535,7 @@ static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
491 535
492 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 536 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
493 cfg->fc_nlinfo.nlh = nlh; 537 cfg->fc_nlinfo.nlh = nlh;
538 cfg->fc_nlinfo.nl_net = net;
494 539
495 if (cfg->fc_type > RTN_MAX) { 540 if (cfg->fc_type > RTN_MAX) {
496 err = -EINVAL; 541 err = -EINVAL;
@@ -538,15 +583,16 @@ errout:
538 583
539static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 584static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
540{ 585{
586 struct net *net = skb->sk->sk_net;
541 struct fib_config cfg; 587 struct fib_config cfg;
542 struct fib_table *tb; 588 struct fib_table *tb;
543 int err; 589 int err;
544 590
545 err = rtm_to_fib_config(skb, nlh, &cfg); 591 err = rtm_to_fib_config(net, skb, nlh, &cfg);
546 if (err < 0) 592 if (err < 0)
547 goto errout; 593 goto errout;
548 594
549 tb = fib_get_table(cfg.fc_table); 595 tb = fib_get_table(net, cfg.fc_table);
550 if (tb == NULL) { 596 if (tb == NULL) {
551 err = -ESRCH; 597 err = -ESRCH;
552 goto errout; 598 goto errout;
@@ -559,15 +605,16 @@ errout:
559 605
560static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 606static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
561{ 607{
608 struct net *net = skb->sk->sk_net;
562 struct fib_config cfg; 609 struct fib_config cfg;
563 struct fib_table *tb; 610 struct fib_table *tb;
564 int err; 611 int err;
565 612
566 err = rtm_to_fib_config(skb, nlh, &cfg); 613 err = rtm_to_fib_config(net, skb, nlh, &cfg);
567 if (err < 0) 614 if (err < 0)
568 goto errout; 615 goto errout;
569 616
570 tb = fib_new_table(cfg.fc_table); 617 tb = fib_new_table(net, cfg.fc_table);
571 if (tb == NULL) { 618 if (tb == NULL) {
572 err = -ENOBUFS; 619 err = -ENOBUFS;
573 goto errout; 620 goto errout;
@@ -580,10 +627,12 @@ errout:
580 627
581static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 628static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
582{ 629{
630 struct net *net = skb->sk->sk_net;
583 unsigned int h, s_h; 631 unsigned int h, s_h;
584 unsigned int e = 0, s_e; 632 unsigned int e = 0, s_e;
585 struct fib_table *tb; 633 struct fib_table *tb;
586 struct hlist_node *node; 634 struct hlist_node *node;
635 struct hlist_head *head;
587 int dumped = 0; 636 int dumped = 0;
588 637
589 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && 638 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
@@ -595,7 +644,8 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
595 644
596 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { 645 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
597 e = 0; 646 e = 0;
598 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) { 647 head = &net->ipv4.fib_table_hash[h];
648 hlist_for_each_entry(tb, node, head, tb_hlist) {
599 if (e < s_e) 649 if (e < s_e)
600 goto next; 650 goto next;
601 if (dumped) 651 if (dumped)
@@ -624,6 +674,7 @@ out:
624 674
625static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) 675static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
626{ 676{
677 struct net *net = ifa->ifa_dev->dev->nd_net;
627 struct fib_table *tb; 678 struct fib_table *tb;
628 struct fib_config cfg = { 679 struct fib_config cfg = {
629 .fc_protocol = RTPROT_KERNEL, 680 .fc_protocol = RTPROT_KERNEL,
@@ -633,12 +684,15 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad
633 .fc_prefsrc = ifa->ifa_local, 684 .fc_prefsrc = ifa->ifa_local,
634 .fc_oif = ifa->ifa_dev->dev->ifindex, 685 .fc_oif = ifa->ifa_dev->dev->ifindex,
635 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND, 686 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
687 .fc_nlinfo = {
688 .nl_net = net,
689 },
636 }; 690 };
637 691
638 if (type == RTN_UNICAST) 692 if (type == RTN_UNICAST)
639 tb = fib_new_table(RT_TABLE_MAIN); 693 tb = fib_new_table(net, RT_TABLE_MAIN);
640 else 694 else
641 tb = fib_new_table(RT_TABLE_LOCAL); 695 tb = fib_new_table(net, RT_TABLE_LOCAL);
642 696
643 if (tb == NULL) 697 if (tb == NULL)
644 return; 698 return;
@@ -668,7 +722,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
668 if (ifa->ifa_flags&IFA_F_SECONDARY) { 722 if (ifa->ifa_flags&IFA_F_SECONDARY) {
669 prim = inet_ifa_byprefix(in_dev, prefix, mask); 723 prim = inet_ifa_byprefix(in_dev, prefix, mask);
670 if (prim == NULL) { 724 if (prim == NULL) {
671 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n"); 725 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
672 return; 726 return;
673 } 727 }
674 } 728 }
@@ -682,7 +736,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
682 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) 736 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
683 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 737 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
684 738
685 if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) && 739 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
686 (prefix != addr || ifa->ifa_prefixlen < 32)) { 740 (prefix != addr || ifa->ifa_prefixlen < 32)) {
687 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 741 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
688 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim); 742 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
@@ -715,7 +769,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
715 else { 769 else {
716 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); 770 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
717 if (prim == NULL) { 771 if (prim == NULL) {
718 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n"); 772 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
719 return; 773 return;
720 } 774 }
721 } 775 }
@@ -747,7 +801,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
747 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); 801 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
748 802
749 /* Check, that this local address finally disappeared. */ 803 /* Check, that this local address finally disappeared. */
750 if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) { 804 if (inet_addr_type(dev->nd_net, ifa->ifa_local) != RTN_LOCAL) {
751 /* And the last, but not the least thing. 805 /* And the last, but not the least thing.
752 We must flush stray FIB entries. 806 We must flush stray FIB entries.
753 807
@@ -755,7 +809,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
755 for stray nexthop entries, then ignite fib_flush. 809 for stray nexthop entries, then ignite fib_flush.
756 */ 810 */
757 if (fib_sync_down(ifa->ifa_local, NULL, 0)) 811 if (fib_sync_down(ifa->ifa_local, NULL, 0))
758 fib_flush(); 812 fib_flush(dev->nd_net);
759 } 813 }
760 } 814 }
761#undef LOCAL_OK 815#undef LOCAL_OK
@@ -797,11 +851,13 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
797 851
798static void nl_fib_input(struct sk_buff *skb) 852static void nl_fib_input(struct sk_buff *skb)
799{ 853{
854 struct net *net;
800 struct fib_result_nl *frn; 855 struct fib_result_nl *frn;
801 struct nlmsghdr *nlh; 856 struct nlmsghdr *nlh;
802 struct fib_table *tb; 857 struct fib_table *tb;
803 u32 pid; 858 u32 pid;
804 859
860 net = skb->sk->sk_net;
805 nlh = nlmsg_hdr(skb); 861 nlh = nlmsg_hdr(skb);
806 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || 862 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
807 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) 863 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
@@ -813,26 +869,37 @@ static void nl_fib_input(struct sk_buff *skb)
813 nlh = nlmsg_hdr(skb); 869 nlh = nlmsg_hdr(skb);
814 870
815 frn = (struct fib_result_nl *) NLMSG_DATA(nlh); 871 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
816 tb = fib_get_table(frn->tb_id_in); 872 tb = fib_get_table(net, frn->tb_id_in);
817 873
818 nl_fib_lookup(frn, tb); 874 nl_fib_lookup(frn, tb);
819 875
820 pid = NETLINK_CB(skb).pid; /* pid of sending process */ 876 pid = NETLINK_CB(skb).pid; /* pid of sending process */
821 NETLINK_CB(skb).pid = 0; /* from kernel */ 877 NETLINK_CB(skb).pid = 0; /* from kernel */
822 NETLINK_CB(skb).dst_group = 0; /* unicast */ 878 NETLINK_CB(skb).dst_group = 0; /* unicast */
823 netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT); 879 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
880}
881
882static int nl_fib_lookup_init(struct net *net)
883{
884 struct sock *sk;
885 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
886 nl_fib_input, NULL, THIS_MODULE);
887 if (sk == NULL)
888 return -EAFNOSUPPORT;
889 net->ipv4.fibnl = sk;
890 return 0;
824} 891}
825 892
826static void nl_fib_lookup_init(void) 893static void nl_fib_lookup_exit(struct net *net)
827{ 894{
828 fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0, 895 netlink_kernel_release(net->ipv4.fibnl);
829 nl_fib_input, NULL, THIS_MODULE); 896 net->ipv4.fibnl = NULL;
830} 897}
831 898
832static void fib_disable_ip(struct net_device *dev, int force) 899static void fib_disable_ip(struct net_device *dev, int force)
833{ 900{
834 if (fib_sync_down(0, dev, force)) 901 if (fib_sync_down(0, dev, force))
835 fib_flush(); 902 fib_flush(dev->nd_net);
836 rt_cache_flush(0); 903 rt_cache_flush(0);
837 arp_ifdown(dev); 904 arp_ifdown(dev);
838} 905}
@@ -869,9 +936,6 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
869 struct net_device *dev = ptr; 936 struct net_device *dev = ptr;
870 struct in_device *in_dev = __in_dev_get_rtnl(dev); 937 struct in_device *in_dev = __in_dev_get_rtnl(dev);
871 938
872 if (dev->nd_net != &init_net)
873 return NOTIFY_DONE;
874
875 if (event == NETDEV_UNREGISTER) { 939 if (event == NETDEV_UNREGISTER) {
876 fib_disable_ip(dev, 2); 940 fib_disable_ip(dev, 2);
877 return NOTIFY_DONE; 941 return NOTIFY_DONE;
@@ -909,23 +973,92 @@ static struct notifier_block fib_netdev_notifier = {
909 .notifier_call =fib_netdev_event, 973 .notifier_call =fib_netdev_event,
910}; 974};
911 975
912void __init ip_fib_init(void) 976static int __net_init ip_fib_net_init(struct net *net)
913{ 977{
914 unsigned int i; 978 unsigned int i;
915 979
980 net->ipv4.fib_table_hash = kzalloc(
981 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
982 if (net->ipv4.fib_table_hash == NULL)
983 return -ENOMEM;
984
916 for (i = 0; i < FIB_TABLE_HASHSZ; i++) 985 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
917 INIT_HLIST_HEAD(&fib_table_hash[i]); 986 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
918 987
919 fib4_rules_init(); 988 return fib4_rules_init(net);
989}
920 990
921 register_netdevice_notifier(&fib_netdev_notifier); 991static void __net_exit ip_fib_net_exit(struct net *net)
922 register_inetaddr_notifier(&fib_inetaddr_notifier); 992{
923 nl_fib_lookup_init(); 993 unsigned int i;
994
995#ifdef CONFIG_IP_MULTIPLE_TABLES
996 fib4_rules_exit(net);
997#endif
924 998
999 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1000 struct fib_table *tb;
1001 struct hlist_head *head;
1002 struct hlist_node *node, *tmp;
1003
1004 head = &net->ipv4.fib_table_hash[i];
1005 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1006 hlist_del(node);
1007 tb->tb_flush(tb);
1008 kfree(tb);
1009 }
1010 }
1011 kfree(net->ipv4.fib_table_hash);
1012}
1013
1014static int __net_init fib_net_init(struct net *net)
1015{
1016 int error;
1017
1018 error = ip_fib_net_init(net);
1019 if (error < 0)
1020 goto out;
1021 error = nl_fib_lookup_init(net);
1022 if (error < 0)
1023 goto out_nlfl;
1024 error = fib_proc_init(net);
1025 if (error < 0)
1026 goto out_proc;
1027out:
1028 return error;
1029
1030out_proc:
1031 nl_fib_lookup_exit(net);
1032out_nlfl:
1033 ip_fib_net_exit(net);
1034 goto out;
1035}
1036
1037static void __net_exit fib_net_exit(struct net *net)
1038{
1039 fib_proc_exit(net);
1040 nl_fib_lookup_exit(net);
1041 ip_fib_net_exit(net);
1042}
1043
1044static struct pernet_operations fib_net_ops = {
1045 .init = fib_net_init,
1046 .exit = fib_net_exit,
1047};
1048
1049void __init ip_fib_init(void)
1050{
925 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL); 1051 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
926 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL); 1052 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
927 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib); 1053 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1054
1055 register_pernet_subsys(&fib_net_ops);
1056 register_netdevice_notifier(&fib_netdev_notifier);
1057 register_inetaddr_notifier(&fib_inetaddr_notifier);
1058
1059 fib_hash_init();
928} 1060}
929 1061
930EXPORT_SYMBOL(inet_addr_type); 1062EXPORT_SYMBOL(inet_addr_type);
1063EXPORT_SYMBOL(inet_dev_addr_type);
931EXPORT_SYMBOL(ip_dev_find); 1064EXPORT_SYMBOL(ip_dev_find);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 0dfee27cfbcd..a15b2f1b2721 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -52,6 +52,7 @@ struct fib_node {
52 struct hlist_node fn_hash; 52 struct hlist_node fn_hash;
53 struct list_head fn_alias; 53 struct list_head fn_alias;
54 __be32 fn_key; 54 __be32 fn_key;
55 struct fib_alias fn_embedded_alias;
55}; 56};
56 57
57struct fn_zone { 58struct fn_zone {
@@ -102,10 +103,10 @@ static struct hlist_head *fz_hash_alloc(int divisor)
102 unsigned long size = divisor * sizeof(struct hlist_head); 103 unsigned long size = divisor * sizeof(struct hlist_head);
103 104
104 if (size <= PAGE_SIZE) { 105 if (size <= PAGE_SIZE) {
105 return kmalloc(size, GFP_KERNEL); 106 return kzalloc(size, GFP_KERNEL);
106 } else { 107 } else {
107 return (struct hlist_head *) 108 return (struct hlist_head *)
108 __get_free_pages(GFP_KERNEL, get_order(size)); 109 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size));
109 } 110 }
110} 111}
111 112
@@ -168,14 +169,13 @@ static void fn_rehash_zone(struct fn_zone *fz)
168 new_hashmask = (new_divisor - 1); 169 new_hashmask = (new_divisor - 1);
169 170
170#if RT_CACHE_DEBUG >= 2 171#if RT_CACHE_DEBUG >= 2
171 printk("fn_rehash_zone: hash for zone %d grows from %d\n", fz->fz_order, old_divisor); 172 printk(KERN_DEBUG "fn_rehash_zone: hash for zone %d grows from %d\n",
173 fz->fz_order, old_divisor);
172#endif 174#endif
173 175
174 ht = fz_hash_alloc(new_divisor); 176 ht = fz_hash_alloc(new_divisor);
175 177
176 if (ht) { 178 if (ht) {
177 memset(ht, 0, new_divisor * sizeof(struct hlist_head));
178
179 write_lock_bh(&fib_hash_lock); 179 write_lock_bh(&fib_hash_lock);
180 old_ht = fz->fz_hash; 180 old_ht = fz->fz_hash;
181 fz->fz_hash = ht; 181 fz->fz_hash = ht;
@@ -194,10 +194,13 @@ static inline void fn_free_node(struct fib_node * f)
194 kmem_cache_free(fn_hash_kmem, f); 194 kmem_cache_free(fn_hash_kmem, f);
195} 195}
196 196
197static inline void fn_free_alias(struct fib_alias *fa) 197static inline void fn_free_alias(struct fib_alias *fa, struct fib_node *f)
198{ 198{
199 fib_release_info(fa->fa_info); 199 fib_release_info(fa->fa_info);
200 kmem_cache_free(fn_alias_kmem, fa); 200 if (fa == &f->fn_embedded_alias)
201 fa->fa_info = NULL;
202 else
203 kmem_cache_free(fn_alias_kmem, fa);
201} 204}
202 205
203static struct fn_zone * 206static struct fn_zone *
@@ -219,7 +222,6 @@ fn_new_zone(struct fn_hash *table, int z)
219 kfree(fz); 222 kfree(fz);
220 return NULL; 223 return NULL;
221 } 224 }
222 memset(fz->fz_hash, 0, fz->fz_divisor * sizeof(struct hlist_head *));
223 fz->fz_order = z; 225 fz->fz_order = z;
224 fz->fz_mask = inet_make_mask(z); 226 fz->fz_mask = inet_make_mask(z);
225 227
@@ -275,8 +277,6 @@ out:
275 return err; 277 return err;
276} 278}
277 279
278static int fn_hash_last_dflt=-1;
279
280static void 280static void
281fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) 281fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
282{ 282{
@@ -317,12 +317,9 @@ fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
317 if (next_fi != res->fi) 317 if (next_fi != res->fi)
318 break; 318 break;
319 } else if (!fib_detect_death(fi, order, &last_resort, 319 } else if (!fib_detect_death(fi, order, &last_resort,
320 &last_idx, &fn_hash_last_dflt)) { 320 &last_idx, tb->tb_default)) {
321 if (res->fi) 321 fib_result_assign(res, fi);
322 fib_info_put(res->fi); 322 tb->tb_default = order;
323 res->fi = fi;
324 atomic_inc(&fi->fib_clntref);
325 fn_hash_last_dflt = order;
326 goto out; 323 goto out;
327 } 324 }
328 fi = next_fi; 325 fi = next_fi;
@@ -331,27 +328,20 @@ fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
331 } 328 }
332 329
333 if (order <= 0 || fi == NULL) { 330 if (order <= 0 || fi == NULL) {
334 fn_hash_last_dflt = -1; 331 tb->tb_default = -1;
335 goto out; 332 goto out;
336 } 333 }
337 334
338 if (!fib_detect_death(fi, order, &last_resort, &last_idx, &fn_hash_last_dflt)) { 335 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
339 if (res->fi) 336 tb->tb_default)) {
340 fib_info_put(res->fi); 337 fib_result_assign(res, fi);
341 res->fi = fi; 338 tb->tb_default = order;
342 atomic_inc(&fi->fib_clntref);
343 fn_hash_last_dflt = order;
344 goto out; 339 goto out;
345 } 340 }
346 341
347 if (last_idx >= 0) { 342 if (last_idx >= 0)
348 if (res->fi) 343 fib_result_assign(res, last_resort);
349 fib_info_put(res->fi); 344 tb->tb_default = last_idx;
350 res->fi = last_resort;
351 if (last_resort)
352 atomic_inc(&last_resort->fib_clntref);
353 }
354 fn_hash_last_dflt = last_idx;
355out: 345out:
356 read_unlock(&fib_hash_lock); 346 read_unlock(&fib_hash_lock);
357} 347}
@@ -490,15 +480,12 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
490 goto out; 480 goto out;
491 481
492 err = -ENOBUFS; 482 err = -ENOBUFS;
493 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
494 if (new_fa == NULL)
495 goto out;
496 483
497 new_f = NULL; 484 new_f = NULL;
498 if (!f) { 485 if (!f) {
499 new_f = kmem_cache_alloc(fn_hash_kmem, GFP_KERNEL); 486 new_f = kmem_cache_zalloc(fn_hash_kmem, GFP_KERNEL);
500 if (new_f == NULL) 487 if (new_f == NULL)
501 goto out_free_new_fa; 488 goto out;
502 489
503 INIT_HLIST_NODE(&new_f->fn_hash); 490 INIT_HLIST_NODE(&new_f->fn_hash);
504 INIT_LIST_HEAD(&new_f->fn_alias); 491 INIT_LIST_HEAD(&new_f->fn_alias);
@@ -506,6 +493,12 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
506 f = new_f; 493 f = new_f;
507 } 494 }
508 495
496 new_fa = &f->fn_embedded_alias;
497 if (new_fa->fa_info != NULL) {
498 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
499 if (new_fa == NULL)
500 goto out_free_new_f;
501 }
509 new_fa->fa_info = fi; 502 new_fa->fa_info = fi;
510 new_fa->fa_tos = tos; 503 new_fa->fa_tos = tos;
511 new_fa->fa_type = cfg->fc_type; 504 new_fa->fa_type = cfg->fc_type;
@@ -532,8 +525,8 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
532 &cfg->fc_nlinfo, 0); 525 &cfg->fc_nlinfo, 0);
533 return 0; 526 return 0;
534 527
535out_free_new_fa: 528out_free_new_f:
536 kmem_cache_free(fn_alias_kmem, new_fa); 529 kmem_cache_free(fn_hash_kmem, new_f);
537out: 530out:
538 fib_release_info(fi); 531 fib_release_info(fi);
539 return err; 532 return err;
@@ -609,7 +602,7 @@ static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg)
609 602
610 if (fa->fa_state & FA_S_ACCESSED) 603 if (fa->fa_state & FA_S_ACCESSED)
611 rt_cache_flush(-1); 604 rt_cache_flush(-1);
612 fn_free_alias(fa); 605 fn_free_alias(fa, f);
613 if (kill_fn) { 606 if (kill_fn) {
614 fn_free_node(f); 607 fn_free_node(f);
615 fz->fz_nent--; 608 fz->fz_nent--;
@@ -645,7 +638,7 @@ static int fn_flush_list(struct fn_zone *fz, int idx)
645 fib_hash_genid++; 638 fib_hash_genid++;
646 write_unlock_bh(&fib_hash_lock); 639 write_unlock_bh(&fib_hash_lock);
647 640
648 fn_free_alias(fa); 641 fn_free_alias(fa, f);
649 found++; 642 found++;
650 } 643 }
651 } 644 }
@@ -761,25 +754,19 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin
761 return skb->len; 754 return skb->len;
762} 755}
763 756
764#ifdef CONFIG_IP_MULTIPLE_TABLES 757void __init fib_hash_init(void)
765struct fib_table * fib_hash_init(u32 id)
766#else
767struct fib_table * __init fib_hash_init(u32 id)
768#endif
769{ 758{
770 struct fib_table *tb; 759 fn_hash_kmem = kmem_cache_create("ip_fib_hash", sizeof(struct fib_node),
760 0, SLAB_PANIC, NULL);
771 761
772 if (fn_hash_kmem == NULL) 762 fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias),
773 fn_hash_kmem = kmem_cache_create("ip_fib_hash", 763 0, SLAB_PANIC, NULL);
774 sizeof(struct fib_node),
775 0, SLAB_HWCACHE_ALIGN,
776 NULL);
777 764
778 if (fn_alias_kmem == NULL) 765}
779 fn_alias_kmem = kmem_cache_create("ip_fib_alias", 766
780 sizeof(struct fib_alias), 767struct fib_table *fib_hash_table(u32 id)
781 0, SLAB_HWCACHE_ALIGN, 768{
782 NULL); 769 struct fib_table *tb;
783 770
784 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash), 771 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash),
785 GFP_KERNEL); 772 GFP_KERNEL);
@@ -787,6 +774,7 @@ struct fib_table * __init fib_hash_init(u32 id)
787 return NULL; 774 return NULL;
788 775
789 tb->tb_id = id; 776 tb->tb_id = id;
777 tb->tb_default = -1;
790 tb->tb_lookup = fn_hash_lookup; 778 tb->tb_lookup = fn_hash_lookup;
791 tb->tb_insert = fn_hash_insert; 779 tb->tb_insert = fn_hash_insert;
792 tb->tb_delete = fn_hash_delete; 780 tb->tb_delete = fn_hash_delete;
@@ -801,6 +789,7 @@ struct fib_table * __init fib_hash_init(u32 id)
801#ifdef CONFIG_PROC_FS 789#ifdef CONFIG_PROC_FS
802 790
803struct fib_iter_state { 791struct fib_iter_state {
792 struct seq_net_private p;
804 struct fn_zone *zone; 793 struct fn_zone *zone;
805 int bucket; 794 int bucket;
806 struct hlist_head *hash_head; 795 struct hlist_head *hash_head;
@@ -814,7 +803,11 @@ struct fib_iter_state {
814static struct fib_alias *fib_get_first(struct seq_file *seq) 803static struct fib_alias *fib_get_first(struct seq_file *seq)
815{ 804{
816 struct fib_iter_state *iter = seq->private; 805 struct fib_iter_state *iter = seq->private;
817 struct fn_hash *table = (struct fn_hash *) ip_fib_main_table->tb_data; 806 struct fib_table *main_table;
807 struct fn_hash *table;
808
809 main_table = fib_get_table(iter->p.net, RT_TABLE_MAIN);
810 table = (struct fn_hash *)main_table->tb_data;
818 811
819 iter->bucket = 0; 812 iter->bucket = 0;
820 iter->hash_head = NULL; 813 iter->hash_head = NULL;
@@ -949,11 +942,13 @@ static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos)
949} 942}
950 943
951static void *fib_seq_start(struct seq_file *seq, loff_t *pos) 944static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
945 __acquires(fib_hash_lock)
952{ 946{
947 struct fib_iter_state *iter = seq->private;
953 void *v = NULL; 948 void *v = NULL;
954 949
955 read_lock(&fib_hash_lock); 950 read_lock(&fib_hash_lock);
956 if (ip_fib_main_table) 951 if (fib_get_table(iter->p.net, RT_TABLE_MAIN))
957 v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 952 v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
958 return v; 953 return v;
959} 954}
@@ -965,6 +960,7 @@ static void *fib_seq_next(struct seq_file *seq, void *v, loff_t *pos)
965} 960}
966 961
967static void fib_seq_stop(struct seq_file *seq, void *v) 962static void fib_seq_stop(struct seq_file *seq, void *v)
963 __releases(fib_hash_lock)
968{ 964{
969 read_unlock(&fib_hash_lock); 965 read_unlock(&fib_hash_lock);
970} 966}
@@ -1040,8 +1036,8 @@ static const struct seq_operations fib_seq_ops = {
1040 1036
1041static int fib_seq_open(struct inode *inode, struct file *file) 1037static int fib_seq_open(struct inode *inode, struct file *file)
1042{ 1038{
1043 return seq_open_private(file, &fib_seq_ops, 1039 return seq_open_net(inode, file, &fib_seq_ops,
1044 sizeof(struct fib_iter_state)); 1040 sizeof(struct fib_iter_state));
1045} 1041}
1046 1042
1047static const struct file_operations fib_seq_fops = { 1043static const struct file_operations fib_seq_fops = {
@@ -1049,18 +1045,18 @@ static const struct file_operations fib_seq_fops = {
1049 .open = fib_seq_open, 1045 .open = fib_seq_open,
1050 .read = seq_read, 1046 .read = seq_read,
1051 .llseek = seq_lseek, 1047 .llseek = seq_lseek,
1052 .release = seq_release_private, 1048 .release = seq_release_net,
1053}; 1049};
1054 1050
1055int __init fib_proc_init(void) 1051int __net_init fib_proc_init(struct net *net)
1056{ 1052{
1057 if (!proc_net_fops_create(&init_net, "route", S_IRUGO, &fib_seq_fops)) 1053 if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_seq_fops))
1058 return -ENOMEM; 1054 return -ENOMEM;
1059 return 0; 1055 return 0;
1060} 1056}
1061 1057
1062void __init fib_proc_exit(void) 1058void __net_exit fib_proc_exit(struct net *net)
1063{ 1059{
1064 proc_net_remove(&init_net, "route"); 1060 proc_net_remove(net, "route");
1065} 1061}
1066#endif /* CONFIG_PROC_FS */ 1062#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index eef9eec17e0c..2c1623d2768b 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -7,12 +7,14 @@
7 7
8struct fib_alias { 8struct fib_alias {
9 struct list_head fa_list; 9 struct list_head fa_list;
10 struct rcu_head rcu;
11 struct fib_info *fa_info; 10 struct fib_info *fa_info;
12 u8 fa_tos; 11 u8 fa_tos;
13 u8 fa_type; 12 u8 fa_type;
14 u8 fa_scope; 13 u8 fa_scope;
15 u8 fa_state; 14 u8 fa_state;
15#ifdef CONFIG_IP_FIB_TRIE
16 struct rcu_head rcu;
17#endif
16}; 18};
17 19
18#define FA_S_ACCESSED 0x01 20#define FA_S_ACCESSED 0x01
@@ -36,6 +38,16 @@ extern struct fib_alias *fib_find_alias(struct list_head *fah,
36 u8 tos, u32 prio); 38 u8 tos, u32 prio);
37extern int fib_detect_death(struct fib_info *fi, int order, 39extern int fib_detect_death(struct fib_info *fi, int order,
38 struct fib_info **last_resort, 40 struct fib_info **last_resort,
39 int *last_idx, int *dflt); 41 int *last_idx, int dflt);
42
43static inline void fib_result_assign(struct fib_result *res,
44 struct fib_info *fi)
45{
46 if (res->fi != NULL)
47 fib_info_put(res->fi);
48 res->fi = fi;
49 if (fi != NULL)
50 atomic_inc(&fi->fib_clntref);
51}
40 52
41#endif /* _FIB_LOOKUP_H */ 53#endif /* _FIB_LOOKUP_H */
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index a0ada3a8d8dd..19274d01afa4 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -32,8 +32,6 @@
32#include <net/ip_fib.h> 32#include <net/ip_fib.h>
33#include <net/fib_rules.h> 33#include <net/fib_rules.h>
34 34
35static struct fib_rules_ops fib4_rules_ops;
36
37struct fib4_rule 35struct fib4_rule
38{ 36{
39 struct fib_rule common; 37 struct fib_rule common;
@@ -56,14 +54,14 @@ u32 fib_rules_tclass(struct fib_result *res)
56} 54}
57#endif 55#endif
58 56
59int fib_lookup(struct flowi *flp, struct fib_result *res) 57int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res)
60{ 58{
61 struct fib_lookup_arg arg = { 59 struct fib_lookup_arg arg = {
62 .result = res, 60 .result = res,
63 }; 61 };
64 int err; 62 int err;
65 63
66 err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg); 64 err = fib_rules_lookup(net->ipv4.rules_ops, flp, 0, &arg);
67 res->r = arg.rule; 65 res->r = arg.rule;
68 66
69 return err; 67 return err;
@@ -93,7 +91,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
93 goto errout; 91 goto errout;
94 } 92 }
95 93
96 if ((tbl = fib_get_table(rule->table)) == NULL) 94 if ((tbl = fib_get_table(rule->fr_net, rule->table)) == NULL)
97 goto errout; 95 goto errout;
98 96
99 err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result); 97 err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result);
@@ -104,16 +102,6 @@ errout:
104} 102}
105 103
106 104
107void fib_select_default(const struct flowi *flp, struct fib_result *res)
108{
109 if (res->r && res->r->action == FR_ACT_TO_TBL &&
110 FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
111 struct fib_table *tb;
112 if ((tb = fib_get_table(res->r->table)) != NULL)
113 tb->tb_select_default(tb, flp, res);
114 }
115}
116
117static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) 105static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
118{ 106{
119 struct fib4_rule *r = (struct fib4_rule *) rule; 107 struct fib4_rule *r = (struct fib4_rule *) rule;
@@ -130,13 +118,13 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
130 return 1; 118 return 1;
131} 119}
132 120
133static struct fib_table *fib_empty_table(void) 121static struct fib_table *fib_empty_table(struct net *net)
134{ 122{
135 u32 id; 123 u32 id;
136 124
137 for (id = 1; id <= RT_TABLE_MAX; id++) 125 for (id = 1; id <= RT_TABLE_MAX; id++)
138 if (fib_get_table(id) == NULL) 126 if (fib_get_table(net, id) == NULL)
139 return fib_new_table(id); 127 return fib_new_table(net, id);
140 return NULL; 128 return NULL;
141} 129}
142 130
@@ -149,6 +137,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
149 struct nlmsghdr *nlh, struct fib_rule_hdr *frh, 137 struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
150 struct nlattr **tb) 138 struct nlattr **tb)
151{ 139{
140 struct net *net = skb->sk->sk_net;
152 int err = -EINVAL; 141 int err = -EINVAL;
153 struct fib4_rule *rule4 = (struct fib4_rule *) rule; 142 struct fib4_rule *rule4 = (struct fib4_rule *) rule;
154 143
@@ -159,7 +148,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
159 if (rule->action == FR_ACT_TO_TBL) { 148 if (rule->action == FR_ACT_TO_TBL) {
160 struct fib_table *table; 149 struct fib_table *table;
161 150
162 table = fib_empty_table(); 151 table = fib_empty_table(net);
163 if (table == NULL) { 152 if (table == NULL) {
164 err = -ENOBUFS; 153 err = -ENOBUFS;
165 goto errout; 154 goto errout;
@@ -245,14 +234,14 @@ nla_put_failure:
245 return -ENOBUFS; 234 return -ENOBUFS;
246} 235}
247 236
248static u32 fib4_rule_default_pref(void) 237static u32 fib4_rule_default_pref(struct fib_rules_ops *ops)
249{ 238{
250 struct list_head *pos; 239 struct list_head *pos;
251 struct fib_rule *rule; 240 struct fib_rule *rule;
252 241
253 if (!list_empty(&fib4_rules_ops.rules_list)) { 242 if (!list_empty(&ops->rules_list)) {
254 pos = fib4_rules_ops.rules_list.next; 243 pos = ops->rules_list.next;
255 if (pos->next != &fib4_rules_ops.rules_list) { 244 if (pos->next != &ops->rules_list) {
256 rule = list_entry(pos->next, struct fib_rule, list); 245 rule = list_entry(pos->next, struct fib_rule, list);
257 if (rule->pref) 246 if (rule->pref)
258 return rule->pref - 1; 247 return rule->pref - 1;
@@ -274,7 +263,7 @@ static void fib4_rule_flush_cache(void)
274 rt_cache_flush(-1); 263 rt_cache_flush(-1);
275} 264}
276 265
277static struct fib_rules_ops fib4_rules_ops = { 266static struct fib_rules_ops fib4_rules_ops_template = {
278 .family = AF_INET, 267 .family = AF_INET,
279 .rule_size = sizeof(struct fib4_rule), 268 .rule_size = sizeof(struct fib4_rule),
280 .addr_size = sizeof(u32), 269 .addr_size = sizeof(u32),
@@ -288,31 +277,53 @@ static struct fib_rules_ops fib4_rules_ops = {
288 .flush_cache = fib4_rule_flush_cache, 277 .flush_cache = fib4_rule_flush_cache,
289 .nlgroup = RTNLGRP_IPV4_RULE, 278 .nlgroup = RTNLGRP_IPV4_RULE,
290 .policy = fib4_rule_policy, 279 .policy = fib4_rule_policy,
291 .rules_list = LIST_HEAD_INIT(fib4_rules_ops.rules_list),
292 .owner = THIS_MODULE, 280 .owner = THIS_MODULE,
293}; 281};
294 282
295static int __init fib_default_rules_init(void) 283static int fib_default_rules_init(struct fib_rules_ops *ops)
296{ 284{
297 int err; 285 int err;
298 286
299 err = fib_default_rule_add(&fib4_rules_ops, 0, 287 err = fib_default_rule_add(ops, 0, RT_TABLE_LOCAL, FIB_RULE_PERMANENT);
300 RT_TABLE_LOCAL, FIB_RULE_PERMANENT);
301 if (err < 0) 288 if (err < 0)
302 return err; 289 return err;
303 err = fib_default_rule_add(&fib4_rules_ops, 0x7FFE, 290 err = fib_default_rule_add(ops, 0x7FFE, RT_TABLE_MAIN, 0);
304 RT_TABLE_MAIN, 0);
305 if (err < 0) 291 if (err < 0)
306 return err; 292 return err;
307 err = fib_default_rule_add(&fib4_rules_ops, 0x7FFF, 293 err = fib_default_rule_add(ops, 0x7FFF, RT_TABLE_DEFAULT, 0);
308 RT_TABLE_DEFAULT, 0);
309 if (err < 0) 294 if (err < 0)
310 return err; 295 return err;
311 return 0; 296 return 0;
312} 297}
313 298
314void __init fib4_rules_init(void) 299int __net_init fib4_rules_init(struct net *net)
300{
301 int err;
302 struct fib_rules_ops *ops;
303
304 ops = kmemdup(&fib4_rules_ops_template, sizeof(*ops), GFP_KERNEL);
305 if (ops == NULL)
306 return -ENOMEM;
307 INIT_LIST_HEAD(&ops->rules_list);
308 ops->fro_net = net;
309
310 fib_rules_register(ops);
311
312 err = fib_default_rules_init(ops);
313 if (err < 0)
314 goto fail;
315 net->ipv4.rules_ops = ops;
316 return 0;
317
318fail:
319 /* also cleans all rules already added */
320 fib_rules_unregister(ops);
321 kfree(ops);
322 return err;
323}
324
325void __net_exit fib4_rules_exit(struct net *net)
315{ 326{
316 BUG_ON(fib_default_rules_init()); 327 fib_rules_unregister(net->ipv4.rules_ops);
317 fib_rules_register(&fib4_rules_ops); 328 kfree(net->ipv4.rules_ops);
318} 329}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 1351a2617dce..c7912866d987 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -47,8 +47,6 @@
47 47
48#include "fib_lookup.h" 48#include "fib_lookup.h"
49 49
50#define FSprintk(a...)
51
52static DEFINE_SPINLOCK(fib_info_lock); 50static DEFINE_SPINLOCK(fib_info_lock);
53static struct hlist_head *fib_info_hash; 51static struct hlist_head *fib_info_hash;
54static struct hlist_head *fib_info_laddrhash; 52static struct hlist_head *fib_info_laddrhash;
@@ -145,7 +143,7 @@ static const struct
145void free_fib_info(struct fib_info *fi) 143void free_fib_info(struct fib_info *fi)
146{ 144{
147 if (fi->fib_dead == 0) { 145 if (fi->fib_dead == 0) {
148 printk("Freeing alive fib_info %p\n", fi); 146 printk(KERN_WARNING "Freeing alive fib_info %p\n", fi);
149 return; 147 return;
150 } 148 }
151 change_nexthops(fi) { 149 change_nexthops(fi) {
@@ -196,6 +194,15 @@ static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *
196 return 0; 194 return 0;
197} 195}
198 196
197static inline unsigned int fib_devindex_hashfn(unsigned int val)
198{
199 unsigned int mask = DEVINDEX_HASHSIZE - 1;
200
201 return (val ^
202 (val >> DEVINDEX_HASHBITS) ^
203 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
204}
205
199static inline unsigned int fib_info_hashfn(const struct fib_info *fi) 206static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
200{ 207{
201 unsigned int mask = (fib_hash_size - 1); 208 unsigned int mask = (fib_hash_size - 1);
@@ -204,6 +211,9 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
204 val ^= fi->fib_protocol; 211 val ^= fi->fib_protocol;
205 val ^= (__force u32)fi->fib_prefsrc; 212 val ^= (__force u32)fi->fib_prefsrc;
206 val ^= fi->fib_priority; 213 val ^= fi->fib_priority;
214 for_nexthops(fi) {
215 val ^= fib_devindex_hashfn(nh->nh_oif);
216 } endfor_nexthops(fi)
207 217
208 return (val ^ (val >> 7) ^ (val >> 12)) & mask; 218 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
209} 219}
@@ -234,15 +244,6 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
234 return NULL; 244 return NULL;
235} 245}
236 246
237static inline unsigned int fib_devindex_hashfn(unsigned int val)
238{
239 unsigned int mask = DEVINDEX_HASHSIZE - 1;
240
241 return (val ^
242 (val >> DEVINDEX_HASHBITS) ^
243 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
244}
245
246/* Check, that the gateway is already configured. 247/* Check, that the gateway is already configured.
247 Used only by redirect accept routine. 248 Used only by redirect accept routine.
248 */ 249 */
@@ -320,11 +321,11 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
320 kfree_skb(skb); 321 kfree_skb(skb);
321 goto errout; 322 goto errout;
322 } 323 }
323 err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE, 324 err = rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
324 info->nlh, GFP_KERNEL); 325 info->nlh, GFP_KERNEL);
325errout: 326errout:
326 if (err < 0) 327 if (err < 0)
327 rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err); 328 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
328} 329}
329 330
330/* Return the first fib alias matching TOS with 331/* Return the first fib alias matching TOS with
@@ -346,7 +347,7 @@ struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
346} 347}
347 348
348int fib_detect_death(struct fib_info *fi, int order, 349int fib_detect_death(struct fib_info *fi, int order,
349 struct fib_info **last_resort, int *last_idx, int *dflt) 350 struct fib_info **last_resort, int *last_idx, int dflt)
350{ 351{
351 struct neighbour *n; 352 struct neighbour *n;
352 int state = NUD_NONE; 353 int state = NUD_NONE;
@@ -358,10 +359,10 @@ int fib_detect_death(struct fib_info *fi, int order,
358 } 359 }
359 if (state==NUD_REACHABLE) 360 if (state==NUD_REACHABLE)
360 return 0; 361 return 0;
361 if ((state&NUD_VALID) && order != *dflt) 362 if ((state&NUD_VALID) && order != dflt)
362 return 0; 363 return 0;
363 if ((state&NUD_VALID) || 364 if ((state&NUD_VALID) ||
364 (*last_idx<0 && order > *dflt)) { 365 (*last_idx<0 && order > dflt)) {
365 *last_resort = fi; 366 *last_resort = fi;
366 *last_idx = order; 367 *last_idx = order;
367 } 368 }
@@ -518,7 +519,9 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
518 struct fib_nh *nh) 519 struct fib_nh *nh)
519{ 520{
520 int err; 521 int err;
522 struct net *net;
521 523
524 net = cfg->fc_nlinfo.nl_net;
522 if (nh->nh_gw) { 525 if (nh->nh_gw) {
523 struct fib_result res; 526 struct fib_result res;
524 527
@@ -531,9 +534,9 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
531 534
532 if (cfg->fc_scope >= RT_SCOPE_LINK) 535 if (cfg->fc_scope >= RT_SCOPE_LINK)
533 return -EINVAL; 536 return -EINVAL;
534 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) 537 if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST)
535 return -EINVAL; 538 return -EINVAL;
536 if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL) 539 if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL)
537 return -ENODEV; 540 return -ENODEV;
538 if (!(dev->flags&IFF_UP)) 541 if (!(dev->flags&IFF_UP))
539 return -ENETDOWN; 542 return -ENETDOWN;
@@ -556,7 +559,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
556 /* It is not necessary, but requires a bit of thinking */ 559 /* It is not necessary, but requires a bit of thinking */
557 if (fl.fl4_scope < RT_SCOPE_LINK) 560 if (fl.fl4_scope < RT_SCOPE_LINK)
558 fl.fl4_scope = RT_SCOPE_LINK; 561 fl.fl4_scope = RT_SCOPE_LINK;
559 if ((err = fib_lookup(&fl, &res)) != 0) 562 if ((err = fib_lookup(net, &fl, &res)) != 0)
560 return err; 563 return err;
561 } 564 }
562 err = -EINVAL; 565 err = -EINVAL;
@@ -580,7 +583,7 @@ out:
580 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) 583 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
581 return -EINVAL; 584 return -EINVAL;
582 585
583 in_dev = inetdev_by_index(nh->nh_oif); 586 in_dev = inetdev_by_index(net, nh->nh_oif);
584 if (in_dev == NULL) 587 if (in_dev == NULL)
585 return -ENODEV; 588 return -ENODEV;
586 if (!(in_dev->dev->flags&IFF_UP)) { 589 if (!(in_dev->dev->flags&IFF_UP)) {
@@ -605,10 +608,10 @@ static inline unsigned int fib_laddr_hashfn(__be32 val)
605static struct hlist_head *fib_hash_alloc(int bytes) 608static struct hlist_head *fib_hash_alloc(int bytes)
606{ 609{
607 if (bytes <= PAGE_SIZE) 610 if (bytes <= PAGE_SIZE)
608 return kmalloc(bytes, GFP_KERNEL); 611 return kzalloc(bytes, GFP_KERNEL);
609 else 612 else
610 return (struct hlist_head *) 613 return (struct hlist_head *)
611 __get_free_pages(GFP_KERNEL, get_order(bytes)); 614 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes));
612} 615}
613 616
614static void fib_hash_free(struct hlist_head *hash, int bytes) 617static void fib_hash_free(struct hlist_head *hash, int bytes)
@@ -712,12 +715,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
712 if (!new_info_hash || !new_laddrhash) { 715 if (!new_info_hash || !new_laddrhash) {
713 fib_hash_free(new_info_hash, bytes); 716 fib_hash_free(new_info_hash, bytes);
714 fib_hash_free(new_laddrhash, bytes); 717 fib_hash_free(new_laddrhash, bytes);
715 } else { 718 } else
716 memset(new_info_hash, 0, bytes);
717 memset(new_laddrhash, 0, bytes);
718
719 fib_hash_move(new_info_hash, new_laddrhash, new_size); 719 fib_hash_move(new_info_hash, new_laddrhash, new_size);
720 }
721 720
722 if (!fib_hash_size) 721 if (!fib_hash_size)
723 goto failure; 722 goto failure;
@@ -799,7 +798,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
799 if (nhs != 1 || nh->nh_gw) 798 if (nhs != 1 || nh->nh_gw)
800 goto err_inval; 799 goto err_inval;
801 nh->nh_scope = RT_SCOPE_NOWHERE; 800 nh->nh_scope = RT_SCOPE_NOWHERE;
802 nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif); 801 nh->nh_dev = dev_get_by_index(cfg->fc_nlinfo.nl_net,
802 fi->fib_nh->nh_oif);
803 err = -ENODEV; 803 err = -ENODEV;
804 if (nh->nh_dev == NULL) 804 if (nh->nh_dev == NULL)
805 goto failure; 805 goto failure;
@@ -813,7 +813,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
813 if (fi->fib_prefsrc) { 813 if (fi->fib_prefsrc) {
814 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || 814 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
815 fi->fib_prefsrc != cfg->fc_dst) 815 fi->fib_prefsrc != cfg->fc_dst)
816 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) 816 if (inet_addr_type(cfg->fc_nlinfo.nl_net,
817 fi->fib_prefsrc) != RTN_LOCAL)
817 goto err_inval; 818 goto err_inval;
818 } 819 }
819 820
@@ -914,7 +915,8 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp,
914 continue; 915 continue;
915 916
916 default: 917 default:
917 printk(KERN_DEBUG "impossible 102\n"); 918 printk(KERN_WARNING "fib_semantic_match bad type %#x\n",
919 fa->fa_type);
918 return -EINVAL; 920 return -EINVAL;
919 } 921 }
920 } 922 }
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 1010b469d7d3..f2f47033f31f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -82,7 +82,6 @@
82#include <net/ip_fib.h> 82#include <net/ip_fib.h>
83#include "fib_lookup.h" 83#include "fib_lookup.h"
84 84
85#undef CONFIG_IP_FIB_TRIE_STATS
86#define MAX_STAT_DEPTH 32 85#define MAX_STAT_DEPTH 32
87 86
88#define KEYLENGTH (8*sizeof(t_key)) 87#define KEYLENGTH (8*sizeof(t_key))
@@ -98,13 +97,13 @@ typedef unsigned int t_key;
98#define IS_LEAF(n) (n->parent & T_LEAF) 97#define IS_LEAF(n) (n->parent & T_LEAF)
99 98
100struct node { 99struct node {
101 t_key key;
102 unsigned long parent; 100 unsigned long parent;
101 t_key key;
103}; 102};
104 103
105struct leaf { 104struct leaf {
106 t_key key;
107 unsigned long parent; 105 unsigned long parent;
106 t_key key;
108 struct hlist_head list; 107 struct hlist_head list;
109 struct rcu_head rcu; 108 struct rcu_head rcu;
110}; 109};
@@ -117,12 +116,12 @@ struct leaf_info {
117}; 116};
118 117
119struct tnode { 118struct tnode {
120 t_key key;
121 unsigned long parent; 119 unsigned long parent;
122 unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */ 120 t_key key;
123 unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */ 121 unsigned char pos; /* 2log(KEYLENGTH) bits needed */
124 unsigned short full_children; /* KEYLENGTH bits needed */ 122 unsigned char bits; /* 2log(KEYLENGTH) bits needed */
125 unsigned short empty_children; /* KEYLENGTH bits needed */ 123 unsigned int full_children; /* KEYLENGTH bits needed */
124 unsigned int empty_children; /* KEYLENGTH bits needed */
126 struct rcu_head rcu; 125 struct rcu_head rcu;
127 struct node *child[0]; 126 struct node *child[0];
128}; 127};
@@ -144,6 +143,7 @@ struct trie_stat {
144 unsigned int tnodes; 143 unsigned int tnodes;
145 unsigned int leaves; 144 unsigned int leaves;
146 unsigned int nullpointers; 145 unsigned int nullpointers;
146 unsigned int prefixes;
147 unsigned int nodesizes[MAX_STAT_DEPTH]; 147 unsigned int nodesizes[MAX_STAT_DEPTH];
148}; 148};
149 149
@@ -152,25 +152,28 @@ struct trie {
152#ifdef CONFIG_IP_FIB_TRIE_STATS 152#ifdef CONFIG_IP_FIB_TRIE_STATS
153 struct trie_use_stats stats; 153 struct trie_use_stats stats;
154#endif 154#endif
155 int size;
156 unsigned int revision;
157}; 155};
158 156
159static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); 157static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
160static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull); 158static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
159 int wasfull);
161static struct node *resize(struct trie *t, struct tnode *tn); 160static struct node *resize(struct trie *t, struct tnode *tn);
162static struct tnode *inflate(struct trie *t, struct tnode *tn); 161static struct tnode *inflate(struct trie *t, struct tnode *tn);
163static struct tnode *halve(struct trie *t, struct tnode *tn); 162static struct tnode *halve(struct trie *t, struct tnode *tn);
164static void tnode_free(struct tnode *tn); 163static void tnode_free(struct tnode *tn);
165 164
166static struct kmem_cache *fn_alias_kmem __read_mostly; 165static struct kmem_cache *fn_alias_kmem __read_mostly;
167static struct trie *trie_local = NULL, *trie_main = NULL; 166static struct kmem_cache *trie_leaf_kmem __read_mostly;
168 167
169static inline struct tnode *node_parent(struct node *node) 168static inline struct tnode *node_parent(struct node *node)
170{ 169{
171 struct tnode *ret; 170 return (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
171}
172
173static inline struct tnode *node_parent_rcu(struct node *node)
174{
175 struct tnode *ret = node_parent(node);
172 176
173 ret = (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
174 return rcu_dereference(ret); 177 return rcu_dereference(ret);
175} 178}
176 179
@@ -180,13 +183,18 @@ static inline void node_set_parent(struct node *node, struct tnode *ptr)
180 (unsigned long)ptr | NODE_TYPE(node)); 183 (unsigned long)ptr | NODE_TYPE(node));
181} 184}
182 185
183/* rcu_read_lock needs to be hold by caller from readside */ 186static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i)
187{
188 BUG_ON(i >= 1U << tn->bits);
189
190 return tn->child[i];
191}
184 192
185static inline struct node *tnode_get_child(struct tnode *tn, int i) 193static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
186{ 194{
187 BUG_ON(i >= 1 << tn->bits); 195 struct node *ret = tnode_get_child(tn, i);
188 196
189 return rcu_dereference(tn->child[i]); 197 return rcu_dereference(ret);
190} 198}
191 199
192static inline int tnode_child_length(const struct tnode *tn) 200static inline int tnode_child_length(const struct tnode *tn)
@@ -300,10 +308,10 @@ static inline void check_tnode(const struct tnode *tn)
300 WARN_ON(tn && tn->pos+tn->bits > 32); 308 WARN_ON(tn && tn->pos+tn->bits > 32);
301} 309}
302 310
303static int halve_threshold = 25; 311static const int halve_threshold = 25;
304static int inflate_threshold = 50; 312static const int inflate_threshold = 50;
305static int halve_threshold_root = 8; 313static const int halve_threshold_root = 8;
306static int inflate_threshold_root = 15; 314static const int inflate_threshold_root = 15;
307 315
308 316
309static void __alias_free_mem(struct rcu_head *head) 317static void __alias_free_mem(struct rcu_head *head)
@@ -319,7 +327,8 @@ static inline void alias_free_mem_rcu(struct fib_alias *fa)
319 327
320static void __leaf_free_rcu(struct rcu_head *head) 328static void __leaf_free_rcu(struct rcu_head *head)
321{ 329{
322 kfree(container_of(head, struct leaf, rcu)); 330 struct leaf *l = container_of(head, struct leaf, rcu);
331 kmem_cache_free(trie_leaf_kmem, l);
323} 332}
324 333
325static void __leaf_info_free_rcu(struct rcu_head *head) 334static void __leaf_info_free_rcu(struct rcu_head *head)
@@ -332,12 +341,12 @@ static inline void free_leaf_info(struct leaf_info *leaf)
332 call_rcu(&leaf->rcu, __leaf_info_free_rcu); 341 call_rcu(&leaf->rcu, __leaf_info_free_rcu);
333} 342}
334 343
335static struct tnode *tnode_alloc(unsigned int size) 344static struct tnode *tnode_alloc(size_t size)
336{ 345{
337 struct page *pages; 346 struct page *pages;
338 347
339 if (size <= PAGE_SIZE) 348 if (size <= PAGE_SIZE)
340 return kcalloc(size, 1, GFP_KERNEL); 349 return kzalloc(size, GFP_KERNEL);
341 350
342 pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size)); 351 pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size));
343 if (!pages) 352 if (!pages)
@@ -349,8 +358,8 @@ static struct tnode *tnode_alloc(unsigned int size)
349static void __tnode_free_rcu(struct rcu_head *head) 358static void __tnode_free_rcu(struct rcu_head *head)
350{ 359{
351 struct tnode *tn = container_of(head, struct tnode, rcu); 360 struct tnode *tn = container_of(head, struct tnode, rcu);
352 unsigned int size = sizeof(struct tnode) + 361 size_t size = sizeof(struct tnode) +
353 (1 << tn->bits) * sizeof(struct node *); 362 (sizeof(struct node *) << tn->bits);
354 363
355 if (size <= PAGE_SIZE) 364 if (size <= PAGE_SIZE)
356 kfree(tn); 365 kfree(tn);
@@ -369,7 +378,7 @@ static inline void tnode_free(struct tnode *tn)
369 378
370static struct leaf *leaf_new(void) 379static struct leaf *leaf_new(void)
371{ 380{
372 struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL); 381 struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
373 if (l) { 382 if (l) {
374 l->parent = T_LEAF; 383 l->parent = T_LEAF;
375 INIT_HLIST_HEAD(&l->list); 384 INIT_HLIST_HEAD(&l->list);
@@ -387,14 +396,12 @@ static struct leaf_info *leaf_info_new(int plen)
387 return li; 396 return li;
388} 397}
389 398
390static struct tnode* tnode_new(t_key key, int pos, int bits) 399static struct tnode *tnode_new(t_key key, int pos, int bits)
391{ 400{
392 int nchildren = 1<<bits; 401 size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits);
393 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
394 struct tnode *tn = tnode_alloc(sz); 402 struct tnode *tn = tnode_alloc(sz);
395 403
396 if (tn) { 404 if (tn) {
397 memset(tn, 0, sz);
398 tn->parent = T_TNODE; 405 tn->parent = T_TNODE;
399 tn->pos = pos; 406 tn->pos = pos;
400 tn->bits = bits; 407 tn->bits = bits;
@@ -403,8 +410,8 @@ static struct tnode* tnode_new(t_key key, int pos, int bits)
403 tn->empty_children = 1<<bits; 410 tn->empty_children = 1<<bits;
404 } 411 }
405 412
406 pr_debug("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode), 413 pr_debug("AT %p s=%u %lu\n", tn, (unsigned int) sizeof(struct tnode),
407 (unsigned int) (sizeof(struct node) * 1<<bits)); 414 (unsigned long) (sizeof(struct node) << bits));
408 return tn; 415 return tn;
409} 416}
410 417
@@ -421,7 +428,8 @@ static inline int tnode_full(const struct tnode *tn, const struct node *n)
421 return ((struct tnode *) n)->pos == tn->pos + tn->bits; 428 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
422} 429}
423 430
424static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n) 431static inline void put_child(struct trie *t, struct tnode *tn, int i,
432 struct node *n)
425{ 433{
426 tnode_put_child_reorg(tn, i, n, -1); 434 tnode_put_child_reorg(tn, i, n, -1);
427} 435}
@@ -431,14 +439,14 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i, struct nod
431 * Update the value of full_children and empty_children. 439 * Update the value of full_children and empty_children.
432 */ 440 */
433 441
434static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull) 442static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
443 int wasfull)
435{ 444{
436 struct node *chi = tn->child[i]; 445 struct node *chi = tn->child[i];
437 int isfull; 446 int isfull;
438 447
439 BUG_ON(i >= 1<<tn->bits); 448 BUG_ON(i >= 1<<tn->bits);
440 449
441
442 /* update emptyChildren */ 450 /* update emptyChildren */
443 if (n == NULL && chi != NULL) 451 if (n == NULL && chi != NULL)
444 tn->empty_children++; 452 tn->empty_children++;
@@ -571,11 +579,13 @@ static struct node *resize(struct trie *t, struct tnode *tn)
571 err = 0; 579 err = 0;
572 max_resize = 10; 580 max_resize = 10;
573 while ((tn->full_children > 0 && max_resize-- && 581 while ((tn->full_children > 0 && max_resize-- &&
574 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= 582 50 * (tn->full_children + tnode_child_length(tn)
575 inflate_threshold_use * tnode_child_length(tn))) { 583 - tn->empty_children)
584 >= inflate_threshold_use * tnode_child_length(tn))) {
576 585
577 old_tn = tn; 586 old_tn = tn;
578 tn = inflate(t, tn); 587 tn = inflate(t, tn);
588
579 if (IS_ERR(tn)) { 589 if (IS_ERR(tn)) {
580 tn = old_tn; 590 tn = old_tn;
581#ifdef CONFIG_IP_FIB_TRIE_STATS 591#ifdef CONFIG_IP_FIB_TRIE_STATS
@@ -587,11 +597,13 @@ static struct node *resize(struct trie *t, struct tnode *tn)
587 597
588 if (max_resize < 0) { 598 if (max_resize < 0) {
589 if (!tn->parent) 599 if (!tn->parent)
590 printk(KERN_WARNING "Fix inflate_threshold_root. Now=%d size=%d bits\n", 600 pr_warning("Fix inflate_threshold_root."
591 inflate_threshold_root, tn->bits); 601 " Now=%d size=%d bits\n",
602 inflate_threshold_root, tn->bits);
592 else 603 else
593 printk(KERN_WARNING "Fix inflate_threshold. Now=%d size=%d bits\n", 604 pr_warning("Fix inflate_threshold."
594 inflate_threshold, tn->bits); 605 " Now=%d size=%d bits\n",
606 inflate_threshold, tn->bits);
595 } 607 }
596 608
597 check_tnode(tn); 609 check_tnode(tn);
@@ -628,11 +640,13 @@ static struct node *resize(struct trie *t, struct tnode *tn)
628 640
629 if (max_resize < 0) { 641 if (max_resize < 0) {
630 if (!tn->parent) 642 if (!tn->parent)
631 printk(KERN_WARNING "Fix halve_threshold_root. Now=%d size=%d bits\n", 643 pr_warning("Fix halve_threshold_root."
632 halve_threshold_root, tn->bits); 644 " Now=%d size=%d bits\n",
645 halve_threshold_root, tn->bits);
633 else 646 else
634 printk(KERN_WARNING "Fix halve_threshold. Now=%d size=%d bits\n", 647 pr_warning("Fix halve_threshold."
635 halve_threshold, tn->bits); 648 " Now=%d size=%d bits\n",
649 halve_threshold, tn->bits);
636 } 650 }
637 651
638 /* Only one child remains */ 652 /* Only one child remains */
@@ -656,7 +670,6 @@ static struct node *resize(struct trie *t, struct tnode *tn)
656 670
657static struct tnode *inflate(struct trie *t, struct tnode *tn) 671static struct tnode *inflate(struct trie *t, struct tnode *tn)
658{ 672{
659 struct tnode *inode;
660 struct tnode *oldtnode = tn; 673 struct tnode *oldtnode = tn;
661 int olen = tnode_child_length(tn); 674 int olen = tnode_child_length(tn);
662 int i; 675 int i;
@@ -676,8 +689,9 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
676 */ 689 */
677 690
678 for (i = 0; i < olen; i++) { 691 for (i = 0; i < olen; i++) {
679 struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i); 692 struct tnode *inode;
680 693
694 inode = (struct tnode *) tnode_get_child(oldtnode, i);
681 if (inode && 695 if (inode &&
682 IS_TNODE(inode) && 696 IS_TNODE(inode) &&
683 inode->pos == oldtnode->pos + oldtnode->bits && 697 inode->pos == oldtnode->pos + oldtnode->bits &&
@@ -704,6 +718,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
704 } 718 }
705 719
706 for (i = 0; i < olen; i++) { 720 for (i = 0; i < olen; i++) {
721 struct tnode *inode;
707 struct node *node = tnode_get_child(oldtnode, i); 722 struct node *node = tnode_get_child(oldtnode, i);
708 struct tnode *left, *right; 723 struct tnode *left, *right;
709 int size, j; 724 int size, j;
@@ -716,8 +731,9 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
716 731
717 if (IS_LEAF(node) || ((struct tnode *) node)->pos > 732 if (IS_LEAF(node) || ((struct tnode *) node)->pos >
718 tn->pos + tn->bits - 1) { 733 tn->pos + tn->bits - 1) {
719 if (tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits, 734 if (tkey_extract_bits(node->key,
720 1) == 0) 735 oldtnode->pos + oldtnode->bits,
736 1) == 0)
721 put_child(t, tn, 2*i, node); 737 put_child(t, tn, 2*i, node);
722 else 738 else
723 put_child(t, tn, 2*i+1, node); 739 put_child(t, tn, 2*i+1, node);
@@ -877,19 +893,6 @@ nomem:
877 } 893 }
878} 894}
879 895
880static void trie_init(struct trie *t)
881{
882 if (!t)
883 return;
884
885 t->size = 0;
886 rcu_assign_pointer(t->trie, NULL);
887 t->revision = 0;
888#ifdef CONFIG_IP_FIB_TRIE_STATS
889 memset(&t->stats, 0, sizeof(struct trie_use_stats));
890#endif
891}
892
893/* readside must use rcu_read_lock currently dump routines 896/* readside must use rcu_read_lock currently dump routines
894 via get_fa_head and dump */ 897 via get_fa_head and dump */
895 898
@@ -906,7 +909,7 @@ static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
906 return NULL; 909 return NULL;
907} 910}
908 911
909static inline struct list_head * get_fa_head(struct leaf *l, int plen) 912static inline struct list_head *get_fa_head(struct leaf *l, int plen)
910{ 913{
911 struct leaf_info *li = find_leaf_info(l, plen); 914 struct leaf_info *li = find_leaf_info(l, plen);
912 915
@@ -956,7 +959,10 @@ fib_find_node(struct trie *t, u32 key)
956 959
957 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { 960 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
958 pos = tn->pos + tn->bits; 961 pos = tn->pos + tn->bits;
959 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); 962 n = tnode_get_child_rcu(tn,
963 tkey_extract_bits(key,
964 tn->pos,
965 tn->bits));
960 } else 966 } else
961 break; 967 break;
962 } 968 }
@@ -977,8 +983,10 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
977 while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { 983 while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) {
978 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 984 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
979 wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); 985 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
980 tn = (struct tnode *) resize (t, (struct tnode *)tn); 986 tn = (struct tnode *) resize(t, (struct tnode *)tn);
981 tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull); 987
988 tnode_put_child_reorg((struct tnode *)tp, cindex,
989 (struct node *)tn, wasfull);
982 990
983 tp = node_parent((struct node *) tn); 991 tp = node_parent((struct node *) tn);
984 if (!tp) 992 if (!tp)
@@ -988,15 +996,14 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
988 996
989 /* Handle last (top) tnode */ 997 /* Handle last (top) tnode */
990 if (IS_TNODE(tn)) 998 if (IS_TNODE(tn))
991 tn = (struct tnode*) resize(t, (struct tnode *)tn); 999 tn = (struct tnode *)resize(t, (struct tnode *)tn);
992 1000
993 return (struct node*) tn; 1001 return (struct node *)tn;
994} 1002}
995 1003
996/* only used from updater-side */ 1004/* only used from updater-side */
997 1005
998static struct list_head * 1006static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
999fib_insert_node(struct trie *t, int *err, u32 key, int plen)
1000{ 1007{
1001 int pos, newpos; 1008 int pos, newpos;
1002 struct tnode *tp = NULL, *tn = NULL; 1009 struct tnode *tp = NULL, *tn = NULL;
@@ -1036,7 +1043,10 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
1036 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { 1043 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
1037 tp = tn; 1044 tp = tn;
1038 pos = tn->pos + tn->bits; 1045 pos = tn->pos + tn->bits;
1039 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); 1046 n = tnode_get_child(tn,
1047 tkey_extract_bits(key,
1048 tn->pos,
1049 tn->bits));
1040 1050
1041 BUG_ON(n && node_parent(n) != tn); 1051 BUG_ON(n && node_parent(n) != tn);
1042 } else 1052 } else
@@ -1054,34 +1064,27 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
1054 /* Case 1: n is a leaf. Compare prefixes */ 1064 /* Case 1: n is a leaf. Compare prefixes */
1055 1065
1056 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) { 1066 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
1057 struct leaf *l = (struct leaf *) n; 1067 l = (struct leaf *) n;
1058
1059 li = leaf_info_new(plen); 1068 li = leaf_info_new(plen);
1060 1069
1061 if (!li) { 1070 if (!li)
1062 *err = -ENOMEM; 1071 return NULL;
1063 goto err;
1064 }
1065 1072
1066 fa_head = &li->falh; 1073 fa_head = &li->falh;
1067 insert_leaf_info(&l->list, li); 1074 insert_leaf_info(&l->list, li);
1068 goto done; 1075 goto done;
1069 } 1076 }
1070 t->size++;
1071 l = leaf_new(); 1077 l = leaf_new();
1072 1078
1073 if (!l) { 1079 if (!l)
1074 *err = -ENOMEM; 1080 return NULL;
1075 goto err;
1076 }
1077 1081
1078 l->key = key; 1082 l->key = key;
1079 li = leaf_info_new(plen); 1083 li = leaf_info_new(plen);
1080 1084
1081 if (!li) { 1085 if (!li) {
1082 tnode_free((struct tnode *) l); 1086 tnode_free((struct tnode *) l);
1083 *err = -ENOMEM; 1087 return NULL;
1084 goto err;
1085 } 1088 }
1086 1089
1087 fa_head = &li->falh; 1090 fa_head = &li->falh;
@@ -1117,8 +1120,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
1117 if (!tn) { 1120 if (!tn) {
1118 free_leaf_info(li); 1121 free_leaf_info(li);
1119 tnode_free((struct tnode *) l); 1122 tnode_free((struct tnode *) l);
1120 *err = -ENOMEM; 1123 return NULL;
1121 goto err;
1122 } 1124 }
1123 1125
1124 node_set_parent((struct node *)tn, tp); 1126 node_set_parent((struct node *)tn, tp);
@@ -1129,23 +1131,23 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
1129 1131
1130 if (tp) { 1132 if (tp) {
1131 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1133 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1132 put_child(t, (struct tnode *)tp, cindex, (struct node *)tn); 1134 put_child(t, (struct tnode *)tp, cindex,
1135 (struct node *)tn);
1133 } else { 1136 } else {
1134 rcu_assign_pointer(t->trie, (struct node *)tn); /* First tnode */ 1137 rcu_assign_pointer(t->trie, (struct node *)tn);
1135 tp = tn; 1138 tp = tn;
1136 } 1139 }
1137 } 1140 }
1138 1141
1139 if (tp && tp->pos + tp->bits > 32) 1142 if (tp && tp->pos + tp->bits > 32)
1140 printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", 1143 pr_warning("fib_trie"
1141 tp, tp->pos, tp->bits, key, plen); 1144 " tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
1145 tp, tp->pos, tp->bits, key, plen);
1142 1146
1143 /* Rebalance the trie */ 1147 /* Rebalance the trie */
1144 1148
1145 rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); 1149 rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
1146done: 1150done:
1147 t->revision++;
1148err:
1149 return fa_head; 1151 return fa_head;
1150} 1152}
1151 1153
@@ -1253,10 +1255,10 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
1253 break; 1255 break;
1254 if (fa->fa_type == cfg->fc_type && 1256 if (fa->fa_type == cfg->fc_type &&
1255 fa->fa_scope == cfg->fc_scope && 1257 fa->fa_scope == cfg->fc_scope &&
1256 fa->fa_info == fi) { 1258 fa->fa_info == fi)
1257 goto out; 1259 goto out;
1258 }
1259 } 1260 }
1261
1260 if (!(cfg->fc_nlflags & NLM_F_APPEND)) 1262 if (!(cfg->fc_nlflags & NLM_F_APPEND))
1261 fa = fa_orig; 1263 fa = fa_orig;
1262 } 1264 }
@@ -1279,10 +1281,11 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
1279 */ 1281 */
1280 1282
1281 if (!fa_head) { 1283 if (!fa_head) {
1282 err = 0; 1284 fa_head = fib_insert_node(t, key, plen);
1283 fa_head = fib_insert_node(t, &err, key, plen); 1285 if (unlikely(!fa_head)) {
1284 if (err) 1286 err = -ENOMEM;
1285 goto out_free_new_fa; 1287 goto out_free_new_fa;
1288 }
1286 } 1289 }
1287 1290
1288 list_add_tail_rcu(&new_fa->fa_list, 1291 list_add_tail_rcu(&new_fa->fa_list,
@@ -1302,40 +1305,41 @@ err:
1302 return err; 1305 return err;
1303} 1306}
1304 1307
1305
1306/* should be called with rcu_read_lock */ 1308/* should be called with rcu_read_lock */
1307static inline int check_leaf(struct trie *t, struct leaf *l, 1309static int check_leaf(struct trie *t, struct leaf *l,
1308 t_key key, int *plen, const struct flowi *flp, 1310 t_key key, const struct flowi *flp,
1309 struct fib_result *res) 1311 struct fib_result *res)
1310{ 1312{
1311 int err, i;
1312 __be32 mask;
1313 struct leaf_info *li; 1313 struct leaf_info *li;
1314 struct hlist_head *hhead = &l->list; 1314 struct hlist_head *hhead = &l->list;
1315 struct hlist_node *node; 1315 struct hlist_node *node;
1316 1316
1317 hlist_for_each_entry_rcu(li, node, hhead, hlist) { 1317 hlist_for_each_entry_rcu(li, node, hhead, hlist) {
1318 i = li->plen; 1318 int err;
1319 mask = inet_make_mask(i); 1319 int plen = li->plen;
1320 __be32 mask = inet_make_mask(plen);
1321
1320 if (l->key != (key & ntohl(mask))) 1322 if (l->key != (key & ntohl(mask)))
1321 continue; 1323 continue;
1322 1324
1323 if ((err = fib_semantic_match(&li->falh, flp, res, htonl(l->key), mask, i)) <= 0) { 1325 err = fib_semantic_match(&li->falh, flp, res,
1324 *plen = i; 1326 htonl(l->key), mask, plen);
1327
1325#ifdef CONFIG_IP_FIB_TRIE_STATS 1328#ifdef CONFIG_IP_FIB_TRIE_STATS
1329 if (err <= 0)
1326 t->stats.semantic_match_passed++; 1330 t->stats.semantic_match_passed++;
1331 else
1332 t->stats.semantic_match_miss++;
1327#endif 1333#endif
1328 return err; 1334 if (err <= 0)
1329 } 1335 return plen;
1330#ifdef CONFIG_IP_FIB_TRIE_STATS
1331 t->stats.semantic_match_miss++;
1332#endif
1333 } 1336 }
1334 return 1; 1337
1338 return -1;
1335} 1339}
1336 1340
1337static int 1341static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
1338fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) 1342 struct fib_result *res)
1339{ 1343{
1340 struct trie *t = (struct trie *) tb->tb_data; 1344 struct trie *t = (struct trie *) tb->tb_data;
1341 int plen, ret = 0; 1345 int plen, ret = 0;
@@ -1362,10 +1366,13 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
1362 1366
1363 /* Just a leaf? */ 1367 /* Just a leaf? */
1364 if (IS_LEAF(n)) { 1368 if (IS_LEAF(n)) {
1365 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0) 1369 plen = check_leaf(t, (struct leaf *)n, key, flp, res);
1366 goto found; 1370 if (plen < 0)
1367 goto failed; 1371 goto failed;
1372 ret = 0;
1373 goto found;
1368 } 1374 }
1375
1369 pn = (struct tnode *) n; 1376 pn = (struct tnode *) n;
1370 chopped_off = 0; 1377 chopped_off = 0;
1371 1378
@@ -1387,14 +1394,14 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
1387 } 1394 }
1388 1395
1389 if (IS_LEAF(n)) { 1396 if (IS_LEAF(n)) {
1390 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0) 1397 plen = check_leaf(t, (struct leaf *)n, key, flp, res);
1391 goto found; 1398 if (plen < 0)
1392 else
1393 goto backtrace; 1399 goto backtrace;
1400
1401 ret = 0;
1402 goto found;
1394 } 1403 }
1395 1404
1396#define HL_OPTIMIZE
1397#ifdef HL_OPTIMIZE
1398 cn = (struct tnode *)n; 1405 cn = (struct tnode *)n;
1399 1406
1400 /* 1407 /*
@@ -1423,12 +1430,13 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
1423 * *are* zero. 1430 * *are* zero.
1424 */ 1431 */
1425 1432
1426 /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */ 1433 /* NOTA BENE: Checking only skipped bits
1434 for the new node here */
1427 1435
1428 if (current_prefix_length < pos+bits) { 1436 if (current_prefix_length < pos+bits) {
1429 if (tkey_extract_bits(cn->key, current_prefix_length, 1437 if (tkey_extract_bits(cn->key, current_prefix_length,
1430 cn->pos - current_prefix_length) != 0 || 1438 cn->pos - current_prefix_length)
1431 !(cn->child[0])) 1439 || !(cn->child[0]))
1432 goto backtrace; 1440 goto backtrace;
1433 } 1441 }
1434 1442
@@ -1451,14 +1459,17 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
1451 * new tnode's key. 1459 * new tnode's key.
1452 */ 1460 */
1453 1461
1454 /* Note: We aren't very concerned about the piece of the key 1462 /*
1455 * that precede pn->pos+pn->bits, since these have already been 1463 * Note: We aren't very concerned about the piece of
1456 * checked. The bits after cn->pos aren't checked since these are 1464 * the key that precede pn->pos+pn->bits, since these
1457 * by definition "unknown" at this point. Thus, what we want to 1465 * have already been checked. The bits after cn->pos
1458 * see is if we are about to enter the "prefix matching" state, 1466 * aren't checked since these are by definition
1459 * and in that case verify that the skipped bits that will prevail 1467 * "unknown" at this point. Thus, what we want to see
1460 * throughout this subtree are zero, as they have to be if we are 1468 * is if we are about to enter the "prefix matching"
1461 * to find a matching prefix. 1469 * state, and in that case verify that the skipped
1470 * bits that will prevail throughout this subtree are
1471 * zero, as they have to be if we are to find a
1472 * matching prefix.
1462 */ 1473 */
1463 1474
1464 node_prefix = mask_pfx(cn->key, cn->pos); 1475 node_prefix = mask_pfx(cn->key, cn->pos);
@@ -1466,13 +1477,15 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
1466 pref_mismatch = key_prefix^node_prefix; 1477 pref_mismatch = key_prefix^node_prefix;
1467 mp = 0; 1478 mp = 0;
1468 1479
1469 /* In short: If skipped bits in this node do not match the search 1480 /*
1470 * key, enter the "prefix matching" state.directly. 1481 * In short: If skipped bits in this node do not match
1482 * the search key, enter the "prefix matching"
1483 * state.directly.
1471 */ 1484 */
1472 if (pref_mismatch) { 1485 if (pref_mismatch) {
1473 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) { 1486 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1474 mp++; 1487 mp++;
1475 pref_mismatch = pref_mismatch <<1; 1488 pref_mismatch = pref_mismatch << 1;
1476 } 1489 }
1477 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp); 1490 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1478 1491
@@ -1482,7 +1495,7 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
1482 if (current_prefix_length >= cn->pos) 1495 if (current_prefix_length >= cn->pos)
1483 current_prefix_length = mp; 1496 current_prefix_length = mp;
1484 } 1497 }
1485#endif 1498
1486 pn = (struct tnode *)n; /* Descend */ 1499 pn = (struct tnode *)n; /* Descend */
1487 chopped_off = 0; 1500 chopped_off = 0;
1488 continue; 1501 continue;
@@ -1491,12 +1504,14 @@ backtrace:
1491 chopped_off++; 1504 chopped_off++;
1492 1505
1493 /* As zero don't change the child key (cindex) */ 1506 /* As zero don't change the child key (cindex) */
1494 while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1)))) 1507 while ((chopped_off <= pn->bits)
1508 && !(cindex & (1<<(chopped_off-1))))
1495 chopped_off++; 1509 chopped_off++;
1496 1510
1497 /* Decrease current_... with bits chopped off */ 1511 /* Decrease current_... with bits chopped off */
1498 if (current_prefix_length > pn->pos + pn->bits - chopped_off) 1512 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
1499 current_prefix_length = pn->pos + pn->bits - chopped_off; 1513 current_prefix_length = pn->pos + pn->bits
1514 - chopped_off;
1500 1515
1501 /* 1516 /*
1502 * Either we do the actual chop off according or if we have 1517 * Either we do the actual chop off according or if we have
@@ -1528,52 +1543,23 @@ found:
1528 return ret; 1543 return ret;
1529} 1544}
1530 1545
1531/* only called from updater side */ 1546/*
1532static int trie_leaf_remove(struct trie *t, t_key key) 1547 * Remove the leaf and return parent.
1548 */
1549static void trie_leaf_remove(struct trie *t, struct leaf *l)
1533{ 1550{
1534 t_key cindex; 1551 struct tnode *tp = node_parent((struct node *) l);
1535 struct tnode *tp = NULL;
1536 struct node *n = t->trie;
1537 struct leaf *l;
1538
1539 pr_debug("entering trie_leaf_remove(%p)\n", n);
1540
1541 /* Note that in the case skipped bits, those bits are *not* checked!
1542 * When we finish this, we will have NULL or a T_LEAF, and the
1543 * T_LEAF may or may not match our key.
1544 */
1545
1546 while (n != NULL && IS_TNODE(n)) {
1547 struct tnode *tn = (struct tnode *) n;
1548 check_tnode(tn);
1549 n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
1550
1551 BUG_ON(n && node_parent(n) != tn);
1552 }
1553 l = (struct leaf *) n;
1554
1555 if (!n || !tkey_equals(l->key, key))
1556 return 0;
1557
1558 /*
1559 * Key found.
1560 * Remove the leaf and rebalance the tree
1561 */
1562
1563 t->revision++;
1564 t->size--;
1565 1552
1566 tp = node_parent(n); 1553 pr_debug("entering trie_leaf_remove(%p)\n", l);
1567 tnode_free((struct tnode *) n);
1568 1554
1569 if (tp) { 1555 if (tp) {
1570 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1556 t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits);
1571 put_child(t, (struct tnode *)tp, cindex, NULL); 1557 put_child(t, (struct tnode *)tp, cindex, NULL);
1572 rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); 1558 rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
1573 } else 1559 } else
1574 rcu_assign_pointer(t->trie, NULL); 1560 rcu_assign_pointer(t->trie, NULL);
1575 1561
1576 return 1; 1562 tnode_free((struct tnode *) l);
1577} 1563}
1578 1564
1579/* 1565/*
@@ -1651,7 +1637,7 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
1651 } 1637 }
1652 1638
1653 if (hlist_empty(&l->list)) 1639 if (hlist_empty(&l->list))
1654 trie_leaf_remove(t, key); 1640 trie_leaf_remove(t, l);
1655 1641
1656 if (fa->fa_state & FA_S_ACCESSED) 1642 if (fa->fa_state & FA_S_ACCESSED)
1657 rt_cache_flush(-1); 1643 rt_cache_flush(-1);
@@ -1697,64 +1683,64 @@ static int trie_flush_leaf(struct trie *t, struct leaf *l)
1697 return found; 1683 return found;
1698} 1684}
1699 1685
1700/* rcu_read_lock needs to be hold by caller from readside */ 1686/*
1701 1687 * Scan for the next right leaf starting at node p->child[idx]
1702static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf) 1688 * Since we have back pointer, no recursion necessary.
1689 */
1690static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
1703{ 1691{
1704 struct node *c = (struct node *) thisleaf; 1692 do {
1705 struct tnode *p; 1693 t_key idx;
1706 int idx;
1707 struct node *trie = rcu_dereference(t->trie);
1708
1709 if (c == NULL) {
1710 if (trie == NULL)
1711 return NULL;
1712
1713 if (IS_LEAF(trie)) /* trie w. just a leaf */
1714 return (struct leaf *) trie;
1715
1716 p = (struct tnode*) trie; /* Start */
1717 } else
1718 p = node_parent(c);
1719
1720 while (p) {
1721 int pos, last;
1722 1694
1723 /* Find the next child of the parent */
1724 if (c) 1695 if (c)
1725 pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits); 1696 idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1;
1726 else 1697 else
1727 pos = 0; 1698 idx = 0;
1728
1729 last = 1 << p->bits;
1730 for (idx = pos; idx < last ; idx++) {
1731 c = rcu_dereference(p->child[idx]);
1732 1699
1700 while (idx < 1u << p->bits) {
1701 c = tnode_get_child_rcu(p, idx++);
1733 if (!c) 1702 if (!c)
1734 continue; 1703 continue;
1735 1704
1736 /* Decend if tnode */ 1705 if (IS_LEAF(c)) {
1737 while (IS_TNODE(c)) { 1706 prefetch(p->child[idx]);
1738 p = (struct tnode *) c; 1707 return (struct leaf *) c;
1739 idx = 0;
1740
1741 /* Rightmost non-NULL branch */
1742 if (p && IS_TNODE(p))
1743 while (!(c = rcu_dereference(p->child[idx]))
1744 && idx < (1<<p->bits)) idx++;
1745
1746 /* Done with this tnode? */
1747 if (idx >= (1 << p->bits) || !c)
1748 goto up;
1749 } 1708 }
1750 return (struct leaf *) c; 1709
1710 /* Rescan start scanning in new node */
1711 p = (struct tnode *) c;
1712 idx = 0;
1751 } 1713 }
1752up: 1714
1753 /* No more children go up one step */ 1715 /* Node empty, walk back up to parent */
1754 c = (struct node *) p; 1716 c = (struct node *) p;
1755 p = node_parent(c); 1717 } while ( (p = node_parent_rcu(c)) != NULL);
1756 } 1718
1757 return NULL; /* Ready. Root of trie */ 1719 return NULL; /* Root of trie */
1720}
1721
1722static struct leaf *trie_firstleaf(struct trie *t)
1723{
1724 struct tnode *n = (struct tnode *) rcu_dereference(t->trie);
1725
1726 if (!n)
1727 return NULL;
1728
1729 if (IS_LEAF(n)) /* trie is just a leaf */
1730 return (struct leaf *) n;
1731
1732 return leaf_walk_rcu(n, NULL);
1733}
1734
1735static struct leaf *trie_nextleaf(struct leaf *l)
1736{
1737 struct node *c = (struct node *) l;
1738 struct tnode *p = node_parent(c);
1739
1740 if (!p)
1741 return NULL; /* trie with just one leaf */
1742
1743 return leaf_walk_rcu(p, c);
1758} 1744}
1759 1745
1760/* 1746/*
@@ -1763,30 +1749,27 @@ up:
1763static int fn_trie_flush(struct fib_table *tb) 1749static int fn_trie_flush(struct fib_table *tb)
1764{ 1750{
1765 struct trie *t = (struct trie *) tb->tb_data; 1751 struct trie *t = (struct trie *) tb->tb_data;
1766 struct leaf *ll = NULL, *l = NULL; 1752 struct leaf *l, *ll = NULL;
1767 int found = 0, h; 1753 int found = 0;
1768
1769 t->revision++;
1770 1754
1771 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { 1755 for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) {
1772 found += trie_flush_leaf(t, l); 1756 found += trie_flush_leaf(t, l);
1773 1757
1774 if (ll && hlist_empty(&ll->list)) 1758 if (ll && hlist_empty(&ll->list))
1775 trie_leaf_remove(t, ll->key); 1759 trie_leaf_remove(t, ll);
1776 ll = l; 1760 ll = l;
1777 } 1761 }
1778 1762
1779 if (ll && hlist_empty(&ll->list)) 1763 if (ll && hlist_empty(&ll->list))
1780 trie_leaf_remove(t, ll->key); 1764 trie_leaf_remove(t, ll);
1781 1765
1782 pr_debug("trie_flush found=%d\n", found); 1766 pr_debug("trie_flush found=%d\n", found);
1783 return found; 1767 return found;
1784} 1768}
1785 1769
1786static int trie_last_dflt = -1; 1770static void fn_trie_select_default(struct fib_table *tb,
1787 1771 const struct flowi *flp,
1788static void 1772 struct fib_result *res)
1789fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1790{ 1773{
1791 struct trie *t = (struct trie *) tb->tb_data; 1774 struct trie *t = (struct trie *) tb->tb_data;
1792 int order, last_idx; 1775 int order, last_idx;
@@ -1831,48 +1814,38 @@ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
1831 if (next_fi != res->fi) 1814 if (next_fi != res->fi)
1832 break; 1815 break;
1833 } else if (!fib_detect_death(fi, order, &last_resort, 1816 } else if (!fib_detect_death(fi, order, &last_resort,
1834 &last_idx, &trie_last_dflt)) { 1817 &last_idx, tb->tb_default)) {
1835 if (res->fi) 1818 fib_result_assign(res, fi);
1836 fib_info_put(res->fi); 1819 tb->tb_default = order;
1837 res->fi = fi;
1838 atomic_inc(&fi->fib_clntref);
1839 trie_last_dflt = order;
1840 goto out; 1820 goto out;
1841 } 1821 }
1842 fi = next_fi; 1822 fi = next_fi;
1843 order++; 1823 order++;
1844 } 1824 }
1845 if (order <= 0 || fi == NULL) { 1825 if (order <= 0 || fi == NULL) {
1846 trie_last_dflt = -1; 1826 tb->tb_default = -1;
1847 goto out; 1827 goto out;
1848 } 1828 }
1849 1829
1850 if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) { 1830 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1851 if (res->fi) 1831 tb->tb_default)) {
1852 fib_info_put(res->fi); 1832 fib_result_assign(res, fi);
1853 res->fi = fi; 1833 tb->tb_default = order;
1854 atomic_inc(&fi->fib_clntref);
1855 trie_last_dflt = order;
1856 goto out; 1834 goto out;
1857 } 1835 }
1858 if (last_idx >= 0) { 1836 if (last_idx >= 0)
1859 if (res->fi) 1837 fib_result_assign(res, last_resort);
1860 fib_info_put(res->fi); 1838 tb->tb_default = last_idx;
1861 res->fi = last_resort; 1839out:
1862 if (last_resort)
1863 atomic_inc(&last_resort->fib_clntref);
1864 }
1865 trie_last_dflt = last_idx;
1866 out:;
1867 rcu_read_unlock(); 1840 rcu_read_unlock();
1868} 1841}
1869 1842
1870static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb, 1843static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
1844 struct fib_table *tb,
1871 struct sk_buff *skb, struct netlink_callback *cb) 1845 struct sk_buff *skb, struct netlink_callback *cb)
1872{ 1846{
1873 int i, s_i; 1847 int i, s_i;
1874 struct fib_alias *fa; 1848 struct fib_alias *fa;
1875
1876 __be32 xkey = htonl(key); 1849 __be32 xkey = htonl(key);
1877 1850
1878 s_i = cb->args[4]; 1851 s_i = cb->args[4];
@@ -1885,7 +1858,6 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
1885 i++; 1858 i++;
1886 continue; 1859 continue;
1887 } 1860 }
1888 BUG_ON(!fa->fa_info);
1889 1861
1890 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, 1862 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1891 cb->nlh->nlmsg_seq, 1863 cb->nlh->nlmsg_seq,
@@ -1896,7 +1868,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
1896 xkey, 1868 xkey,
1897 plen, 1869 plen,
1898 fa->fa_tos, 1870 fa->fa_tos,
1899 fa->fa_info, 0) < 0) { 1871 fa->fa_info, NLM_F_MULTI) < 0) {
1900 cb->args[4] = i; 1872 cb->args[4] = i;
1901 return -1; 1873 return -1;
1902 } 1874 }
@@ -1906,109 +1878,118 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
1906 return skb->len; 1878 return skb->len;
1907} 1879}
1908 1880
1909static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb, 1881static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
1910 struct netlink_callback *cb) 1882 struct sk_buff *skb, struct netlink_callback *cb)
1911{ 1883{
1912 int h, s_h; 1884 struct leaf_info *li;
1913 struct list_head *fa_head; 1885 struct hlist_node *node;
1914 struct leaf *l = NULL; 1886 int i, s_i;
1915 1887
1916 s_h = cb->args[3]; 1888 s_i = cb->args[3];
1889 i = 0;
1917 1890
1918 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { 1891 /* rcu_read_lock is hold by caller */
1919 if (h < s_h) 1892 hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
1893 if (i < s_i) {
1894 i++;
1920 continue; 1895 continue;
1921 if (h > s_h) 1896 }
1922 memset(&cb->args[4], 0,
1923 sizeof(cb->args) - 4*sizeof(cb->args[0]));
1924
1925 fa_head = get_fa_head(l, plen);
1926 1897
1927 if (!fa_head) 1898 if (i > s_i)
1928 continue; 1899 cb->args[4] = 0;
1929 1900
1930 if (list_empty(fa_head)) 1901 if (list_empty(&li->falh))
1931 continue; 1902 continue;
1932 1903
1933 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) { 1904 if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) {
1934 cb->args[3] = h; 1905 cb->args[3] = i;
1935 return -1; 1906 return -1;
1936 } 1907 }
1908 i++;
1937 } 1909 }
1938 cb->args[3] = h; 1910
1911 cb->args[3] = i;
1939 return skb->len; 1912 return skb->len;
1940} 1913}
1941 1914
1942static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) 1915static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb,
1916 struct netlink_callback *cb)
1943{ 1917{
1944 int m, s_m; 1918 struct leaf *l;
1945 struct trie *t = (struct trie *) tb->tb_data; 1919 struct trie *t = (struct trie *) tb->tb_data;
1946 1920 t_key key = cb->args[2];
1947 s_m = cb->args[2];
1948 1921
1949 rcu_read_lock(); 1922 rcu_read_lock();
1950 for (m = 0; m <= 32; m++) { 1923 /* Dump starting at last key.
1951 if (m < s_m) 1924 * Note: 0.0.0.0/0 (ie default) is first key.
1952 continue; 1925 */
1953 if (m > s_m) 1926 if (!key)
1954 memset(&cb->args[3], 0, 1927 l = trie_firstleaf(t);
1955 sizeof(cb->args) - 3*sizeof(cb->args[0])); 1928 else {
1929 l = fib_find_node(t, key);
1930 if (!l) {
1931 /* The table changed during the dump, rather than
1932 * giving partial data, just make application retry.
1933 */
1934 rcu_read_unlock();
1935 return -EBUSY;
1936 }
1937 }
1956 1938
1957 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) { 1939 while (l) {
1958 cb->args[2] = m; 1940 cb->args[2] = l->key;
1959 goto out; 1941 if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
1942 rcu_read_unlock();
1943 return -1;
1960 } 1944 }
1945
1946 l = trie_nextleaf(l);
1947 memset(&cb->args[3], 0,
1948 sizeof(cb->args) - 3*sizeof(cb->args[0]));
1961 } 1949 }
1962 rcu_read_unlock(); 1950 rcu_read_unlock();
1963 cb->args[2] = m; 1951
1964 return skb->len; 1952 return skb->len;
1965out:
1966 rcu_read_unlock();
1967 return -1;
1968} 1953}
1969 1954
1970/* Fix more generic FIB names for init later */ 1955void __init fib_hash_init(void)
1956{
1957 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1958 sizeof(struct fib_alias),
1959 0, SLAB_PANIC, NULL);
1971 1960
1972#ifdef CONFIG_IP_MULTIPLE_TABLES 1961 trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
1973struct fib_table * fib_hash_init(u32 id) 1962 max(sizeof(struct leaf),
1974#else 1963 sizeof(struct leaf_info)),
1975struct fib_table * __init fib_hash_init(u32 id) 1964 0, SLAB_PANIC, NULL);
1976#endif 1965}
1966
1967
1968/* Fix more generic FIB names for init later */
1969struct fib_table *fib_hash_table(u32 id)
1977{ 1970{
1978 struct fib_table *tb; 1971 struct fib_table *tb;
1979 struct trie *t; 1972 struct trie *t;
1980 1973
1981 if (fn_alias_kmem == NULL)
1982 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1983 sizeof(struct fib_alias),
1984 0, SLAB_HWCACHE_ALIGN,
1985 NULL);
1986
1987 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie), 1974 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
1988 GFP_KERNEL); 1975 GFP_KERNEL);
1989 if (tb == NULL) 1976 if (tb == NULL)
1990 return NULL; 1977 return NULL;
1991 1978
1992 tb->tb_id = id; 1979 tb->tb_id = id;
1980 tb->tb_default = -1;
1993 tb->tb_lookup = fn_trie_lookup; 1981 tb->tb_lookup = fn_trie_lookup;
1994 tb->tb_insert = fn_trie_insert; 1982 tb->tb_insert = fn_trie_insert;
1995 tb->tb_delete = fn_trie_delete; 1983 tb->tb_delete = fn_trie_delete;
1996 tb->tb_flush = fn_trie_flush; 1984 tb->tb_flush = fn_trie_flush;
1997 tb->tb_select_default = fn_trie_select_default; 1985 tb->tb_select_default = fn_trie_select_default;
1998 tb->tb_dump = fn_trie_dump; 1986 tb->tb_dump = fn_trie_dump;
1999 memset(tb->tb_data, 0, sizeof(struct trie));
2000 1987
2001 t = (struct trie *) tb->tb_data; 1988 t = (struct trie *) tb->tb_data;
2002 1989 memset(t, 0, sizeof(*t));
2003 trie_init(t);
2004
2005 if (id == RT_TABLE_LOCAL)
2006 trie_local = t;
2007 else if (id == RT_TABLE_MAIN)
2008 trie_main = t;
2009 1990
2010 if (id == RT_TABLE_LOCAL) 1991 if (id == RT_TABLE_LOCAL)
2011 printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION); 1992 pr_info("IPv4 FIB: Using LC-trie version %s\n", VERSION);
2012 1993
2013 return tb; 1994 return tb;
2014} 1995}
@@ -2016,6 +1997,8 @@ struct fib_table * __init fib_hash_init(u32 id)
2016#ifdef CONFIG_PROC_FS 1997#ifdef CONFIG_PROC_FS
2017/* Depth first Trie walk iterator */ 1998/* Depth first Trie walk iterator */
2018struct fib_trie_iter { 1999struct fib_trie_iter {
2000 struct seq_net_private p;
2001 struct trie *trie_local, *trie_main;
2019 struct tnode *tnode; 2002 struct tnode *tnode;
2020 struct trie *trie; 2003 struct trie *trie;
2021 unsigned index; 2004 unsigned index;
@@ -2036,7 +2019,7 @@ static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
2036 iter->tnode, iter->index, iter->depth); 2019 iter->tnode, iter->index, iter->depth);
2037rescan: 2020rescan:
2038 while (cindex < (1<<tn->bits)) { 2021 while (cindex < (1<<tn->bits)) {
2039 struct node *n = tnode_get_child(tn, cindex); 2022 struct node *n = tnode_get_child_rcu(tn, cindex);
2040 2023
2041 if (n) { 2024 if (n) {
2042 if (IS_LEAF(n)) { 2025 if (IS_LEAF(n)) {
@@ -2055,7 +2038,7 @@ rescan:
2055 } 2038 }
2056 2039
2057 /* Current node exhausted, pop back up */ 2040 /* Current node exhausted, pop back up */
2058 p = node_parent((struct node *)tn); 2041 p = node_parent_rcu((struct node *)tn);
2059 if (p) { 2042 if (p) {
2060 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; 2043 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2061 tn = p; 2044 tn = p;
@@ -2108,10 +2091,17 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
2108 for (n = fib_trie_get_first(&iter, t); n; 2091 for (n = fib_trie_get_first(&iter, t); n;
2109 n = fib_trie_get_next(&iter)) { 2092 n = fib_trie_get_next(&iter)) {
2110 if (IS_LEAF(n)) { 2093 if (IS_LEAF(n)) {
2094 struct leaf *l = (struct leaf *)n;
2095 struct leaf_info *li;
2096 struct hlist_node *tmp;
2097
2111 s->leaves++; 2098 s->leaves++;
2112 s->totdepth += iter.depth; 2099 s->totdepth += iter.depth;
2113 if (iter.depth > s->maxdepth) 2100 if (iter.depth > s->maxdepth)
2114 s->maxdepth = iter.depth; 2101 s->maxdepth = iter.depth;
2102
2103 hlist_for_each_entry_rcu(li, tmp, &l->list, hlist)
2104 ++s->prefixes;
2115 } else { 2105 } else {
2116 const struct tnode *tn = (const struct tnode *) n; 2106 const struct tnode *tn = (const struct tnode *) n;
2117 int i; 2107 int i;
@@ -2140,13 +2130,17 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
2140 else 2130 else
2141 avdepth = 0; 2131 avdepth = 0;
2142 2132
2143 seq_printf(seq, "\tAver depth: %d.%02d\n", avdepth / 100, avdepth % 100 ); 2133 seq_printf(seq, "\tAver depth: %u.%02d\n",
2134 avdepth / 100, avdepth % 100);
2144 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); 2135 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
2145 2136
2146 seq_printf(seq, "\tLeaves: %u\n", stat->leaves); 2137 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
2147
2148 bytes = sizeof(struct leaf) * stat->leaves; 2138 bytes = sizeof(struct leaf) * stat->leaves;
2149 seq_printf(seq, "\tInternal nodes: %d\n\t", stat->tnodes); 2139
2140 seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes);
2141 bytes += sizeof(struct leaf_info) * stat->prefixes;
2142
2143 seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
2150 bytes += sizeof(struct tnode) * stat->tnodes; 2144 bytes += sizeof(struct tnode) * stat->tnodes;
2151 2145
2152 max = MAX_STAT_DEPTH; 2146 max = MAX_STAT_DEPTH;
@@ -2156,60 +2150,89 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
2156 pointers = 0; 2150 pointers = 0;
2157 for (i = 1; i <= max; i++) 2151 for (i = 1; i <= max; i++)
2158 if (stat->nodesizes[i] != 0) { 2152 if (stat->nodesizes[i] != 0) {
2159 seq_printf(seq, " %d: %d", i, stat->nodesizes[i]); 2153 seq_printf(seq, " %u: %u", i, stat->nodesizes[i]);
2160 pointers += (1<<i) * stat->nodesizes[i]; 2154 pointers += (1<<i) * stat->nodesizes[i];
2161 } 2155 }
2162 seq_putc(seq, '\n'); 2156 seq_putc(seq, '\n');
2163 seq_printf(seq, "\tPointers: %d\n", pointers); 2157 seq_printf(seq, "\tPointers: %u\n", pointers);
2164 2158
2165 bytes += sizeof(struct node *) * pointers; 2159 bytes += sizeof(struct node *) * pointers;
2166 seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers); 2160 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2167 seq_printf(seq, "Total size: %d kB\n", (bytes + 1023) / 1024); 2161 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
2162}
2168 2163
2169#ifdef CONFIG_IP_FIB_TRIE_STATS 2164#ifdef CONFIG_IP_FIB_TRIE_STATS
2170 seq_printf(seq, "Counters:\n---------\n"); 2165static void trie_show_usage(struct seq_file *seq,
2171 seq_printf(seq,"gets = %d\n", t->stats.gets); 2166 const struct trie_use_stats *stats)
2172 seq_printf(seq,"backtracks = %d\n", t->stats.backtrack); 2167{
2173 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed); 2168 seq_printf(seq, "\nCounters:\n---------\n");
2174 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss); 2169 seq_printf(seq, "gets = %u\n", stats->gets);
2175 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit); 2170 seq_printf(seq, "backtracks = %u\n", stats->backtrack);
2176 seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped); 2171 seq_printf(seq, "semantic match passed = %u\n",
2177#ifdef CLEAR_STATS 2172 stats->semantic_match_passed);
2178 memset(&(t->stats), 0, sizeof(t->stats)); 2173 seq_printf(seq, "semantic match miss = %u\n",
2179#endif 2174 stats->semantic_match_miss);
2175 seq_printf(seq, "null node hit= %u\n", stats->null_node_hit);
2176 seq_printf(seq, "skipped node resize = %u\n\n",
2177 stats->resize_node_skipped);
2178}
2180#endif /* CONFIG_IP_FIB_TRIE_STATS */ 2179#endif /* CONFIG_IP_FIB_TRIE_STATS */
2180
2181static void fib_trie_show(struct seq_file *seq, const char *name,
2182 struct trie *trie)
2183{
2184 struct trie_stat stat;
2185
2186 trie_collect_stats(trie, &stat);
2187 seq_printf(seq, "%s:\n", name);
2188 trie_show_stats(seq, &stat);
2189#ifdef CONFIG_IP_FIB_TRIE_STATS
2190 trie_show_usage(seq, &trie->stats);
2191#endif
2181} 2192}
2182 2193
2183static int fib_triestat_seq_show(struct seq_file *seq, void *v) 2194static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2184{ 2195{
2185 struct trie_stat *stat; 2196 struct net *net = (struct net *)seq->private;
2186 2197 struct fib_table *tb;
2187 stat = kmalloc(sizeof(*stat), GFP_KERNEL);
2188 if (!stat)
2189 return -ENOMEM;
2190 2198
2191 seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n", 2199 seq_printf(seq,
2200 "Basic info: size of leaf:"
2201 " %Zd bytes, size of tnode: %Zd bytes.\n",
2192 sizeof(struct leaf), sizeof(struct tnode)); 2202 sizeof(struct leaf), sizeof(struct tnode));
2193 2203
2194 if (trie_local) { 2204 tb = fib_get_table(net, RT_TABLE_LOCAL);
2195 seq_printf(seq, "Local:\n"); 2205 if (tb)
2196 trie_collect_stats(trie_local, stat); 2206 fib_trie_show(seq, "Local", (struct trie *) tb->tb_data);
2197 trie_show_stats(seq, stat);
2198 }
2199 2207
2200 if (trie_main) { 2208 tb = fib_get_table(net, RT_TABLE_MAIN);
2201 seq_printf(seq, "Main:\n"); 2209 if (tb)
2202 trie_collect_stats(trie_main, stat); 2210 fib_trie_show(seq, "Main", (struct trie *) tb->tb_data);
2203 trie_show_stats(seq, stat);
2204 }
2205 kfree(stat);
2206 2211
2207 return 0; 2212 return 0;
2208} 2213}
2209 2214
2210static int fib_triestat_seq_open(struct inode *inode, struct file *file) 2215static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2211{ 2216{
2212 return single_open(file, fib_triestat_seq_show, NULL); 2217 int err;
2218 struct net *net;
2219
2220 net = get_proc_net(inode);
2221 if (net == NULL)
2222 return -ENXIO;
2223 err = single_open(file, fib_triestat_seq_show, net);
2224 if (err < 0) {
2225 put_net(net);
2226 return err;
2227 }
2228 return 0;
2229}
2230
2231static int fib_triestat_seq_release(struct inode *ino, struct file *f)
2232{
2233 struct seq_file *seq = f->private_data;
2234 put_net(seq->private);
2235 return single_release(ino, f);
2213} 2236}
2214 2237
2215static const struct file_operations fib_triestat_fops = { 2238static const struct file_operations fib_triestat_fops = {
@@ -2217,7 +2240,7 @@ static const struct file_operations fib_triestat_fops = {
2217 .open = fib_triestat_seq_open, 2240 .open = fib_triestat_seq_open,
2218 .read = seq_read, 2241 .read = seq_read,
2219 .llseek = seq_lseek, 2242 .llseek = seq_lseek,
2220 .release = single_release, 2243 .release = fib_triestat_seq_release,
2221}; 2244};
2222 2245
2223static struct node *fib_trie_get_idx(struct fib_trie_iter *iter, 2246static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
@@ -2226,13 +2249,13 @@ static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
2226 loff_t idx = 0; 2249 loff_t idx = 0;
2227 struct node *n; 2250 struct node *n;
2228 2251
2229 for (n = fib_trie_get_first(iter, trie_local); 2252 for (n = fib_trie_get_first(iter, iter->trie_local);
2230 n; ++idx, n = fib_trie_get_next(iter)) { 2253 n; ++idx, n = fib_trie_get_next(iter)) {
2231 if (pos == idx) 2254 if (pos == idx)
2232 return n; 2255 return n;
2233 } 2256 }
2234 2257
2235 for (n = fib_trie_get_first(iter, trie_main); 2258 for (n = fib_trie_get_first(iter, iter->trie_main);
2236 n; ++idx, n = fib_trie_get_next(iter)) { 2259 n; ++idx, n = fib_trie_get_next(iter)) {
2237 if (pos == idx) 2260 if (pos == idx)
2238 return n; 2261 return n;
@@ -2241,11 +2264,25 @@ static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
2241} 2264}
2242 2265
2243static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) 2266static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2267 __acquires(RCU)
2244{ 2268{
2269 struct fib_trie_iter *iter = seq->private;
2270 struct fib_table *tb;
2271
2272 if (!iter->trie_local) {
2273 tb = fib_get_table(iter->p.net, RT_TABLE_LOCAL);
2274 if (tb)
2275 iter->trie_local = (struct trie *) tb->tb_data;
2276 }
2277 if (!iter->trie_main) {
2278 tb = fib_get_table(iter->p.net, RT_TABLE_MAIN);
2279 if (tb)
2280 iter->trie_main = (struct trie *) tb->tb_data;
2281 }
2245 rcu_read_lock(); 2282 rcu_read_lock();
2246 if (*pos == 0) 2283 if (*pos == 0)
2247 return SEQ_START_TOKEN; 2284 return SEQ_START_TOKEN;
2248 return fib_trie_get_idx(seq->private, *pos - 1); 2285 return fib_trie_get_idx(iter, *pos - 1);
2249} 2286}
2250 2287
2251static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2288static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
@@ -2263,13 +2300,14 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2263 return v; 2300 return v;
2264 2301
2265 /* continue scan in next trie */ 2302 /* continue scan in next trie */
2266 if (iter->trie == trie_local) 2303 if (iter->trie == iter->trie_local)
2267 return fib_trie_get_first(iter, trie_main); 2304 return fib_trie_get_first(iter, iter->trie_main);
2268 2305
2269 return NULL; 2306 return NULL;
2270} 2307}
2271 2308
2272static void fib_trie_seq_stop(struct seq_file *seq, void *v) 2309static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2310 __releases(RCU)
2273{ 2311{
2274 rcu_read_unlock(); 2312 rcu_read_unlock();
2275} 2313}
@@ -2279,10 +2317,8 @@ static void seq_indent(struct seq_file *seq, int n)
2279 while (n-- > 0) seq_puts(seq, " "); 2317 while (n-- > 0) seq_puts(seq, " ");
2280} 2318}
2281 2319
2282static inline const char *rtn_scope(enum rt_scope_t s) 2320static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
2283{ 2321{
2284 static char buf[32];
2285
2286 switch (s) { 2322 switch (s) {
2287 case RT_SCOPE_UNIVERSE: return "universe"; 2323 case RT_SCOPE_UNIVERSE: return "universe";
2288 case RT_SCOPE_SITE: return "site"; 2324 case RT_SCOPE_SITE: return "site";
@@ -2290,7 +2326,7 @@ static inline const char *rtn_scope(enum rt_scope_t s)
2290 case RT_SCOPE_HOST: return "host"; 2326 case RT_SCOPE_HOST: return "host";
2291 case RT_SCOPE_NOWHERE: return "nowhere"; 2327 case RT_SCOPE_NOWHERE: return "nowhere";
2292 default: 2328 default:
2293 snprintf(buf, sizeof(buf), "scope=%d", s); 2329 snprintf(buf, len, "scope=%d", s);
2294 return buf; 2330 return buf;
2295 } 2331 }
2296} 2332}
@@ -2310,13 +2346,11 @@ static const char *rtn_type_names[__RTN_MAX] = {
2310 [RTN_XRESOLVE] = "XRESOLVE", 2346 [RTN_XRESOLVE] = "XRESOLVE",
2311}; 2347};
2312 2348
2313static inline const char *rtn_type(unsigned t) 2349static inline const char *rtn_type(char *buf, size_t len, unsigned t)
2314{ 2350{
2315 static char buf[32];
2316
2317 if (t < __RTN_MAX && rtn_type_names[t]) 2351 if (t < __RTN_MAX && rtn_type_names[t])
2318 return rtn_type_names[t]; 2352 return rtn_type_names[t];
2319 snprintf(buf, sizeof(buf), "type %d", t); 2353 snprintf(buf, len, "type %u", t);
2320 return buf; 2354 return buf;
2321} 2355}
2322 2356
@@ -2329,8 +2363,8 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
2329 if (v == SEQ_START_TOKEN) 2363 if (v == SEQ_START_TOKEN)
2330 return 0; 2364 return 0;
2331 2365
2332 if (!node_parent(n)) { 2366 if (!node_parent_rcu(n)) {
2333 if (iter->trie == trie_local) 2367 if (iter->trie == iter->trie_local)
2334 seq_puts(seq, "<local>:\n"); 2368 seq_puts(seq, "<local>:\n");
2335 else 2369 else
2336 seq_puts(seq, "<main>:\n"); 2370 seq_puts(seq, "<main>:\n");
@@ -2347,25 +2381,29 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
2347 2381
2348 } else { 2382 } else {
2349 struct leaf *l = (struct leaf *) n; 2383 struct leaf *l = (struct leaf *) n;
2350 int i; 2384 struct leaf_info *li;
2385 struct hlist_node *node;
2351 __be32 val = htonl(l->key); 2386 __be32 val = htonl(l->key);
2352 2387
2353 seq_indent(seq, iter->depth); 2388 seq_indent(seq, iter->depth);
2354 seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val)); 2389 seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val));
2355 for (i = 32; i >= 0; i--) { 2390
2356 struct leaf_info *li = find_leaf_info(l, i); 2391 hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
2357 if (li) { 2392 struct fib_alias *fa;
2358 struct fib_alias *fa; 2393
2359 list_for_each_entry_rcu(fa, &li->falh, fa_list) { 2394 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2360 seq_indent(seq, iter->depth+1); 2395 char buf1[32], buf2[32];
2361 seq_printf(seq, " /%d %s %s", i, 2396
2362 rtn_scope(fa->fa_scope), 2397 seq_indent(seq, iter->depth+1);
2363 rtn_type(fa->fa_type)); 2398 seq_printf(seq, " /%d %s %s", li->plen,
2364 if (fa->fa_tos) 2399 rtn_scope(buf1, sizeof(buf1),
2365 seq_printf(seq, "tos =%d\n", 2400 fa->fa_scope),
2366 fa->fa_tos); 2401 rtn_type(buf2, sizeof(buf2),
2367 seq_putc(seq, '\n'); 2402 fa->fa_type));
2368 } 2403 if (fa->fa_tos)
2404 seq_printf(seq, "tos =%d\n",
2405 fa->fa_tos);
2406 seq_putc(seq, '\n');
2369 } 2407 }
2370 } 2408 }
2371 } 2409 }
@@ -2382,8 +2420,8 @@ static const struct seq_operations fib_trie_seq_ops = {
2382 2420
2383static int fib_trie_seq_open(struct inode *inode, struct file *file) 2421static int fib_trie_seq_open(struct inode *inode, struct file *file)
2384{ 2422{
2385 return seq_open_private(file, &fib_trie_seq_ops, 2423 return seq_open_net(inode, file, &fib_trie_seq_ops,
2386 sizeof(struct fib_trie_iter)); 2424 sizeof(struct fib_trie_iter));
2387} 2425}
2388 2426
2389static const struct file_operations fib_trie_fops = { 2427static const struct file_operations fib_trie_fops = {
@@ -2391,7 +2429,7 @@ static const struct file_operations fib_trie_fops = {
2391 .open = fib_trie_seq_open, 2429 .open = fib_trie_seq_open,
2392 .read = seq_read, 2430 .read = seq_read,
2393 .llseek = seq_lseek, 2431 .llseek = seq_lseek,
2394 .release = seq_release_private, 2432 .release = seq_release_net,
2395}; 2433};
2396 2434
2397static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) 2435static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
@@ -2419,8 +2457,8 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
2419{ 2457{
2420 const struct fib_trie_iter *iter = seq->private; 2458 const struct fib_trie_iter *iter = seq->private;
2421 struct leaf *l = v; 2459 struct leaf *l = v;
2422 int i; 2460 struct leaf_info *li;
2423 char bf[128]; 2461 struct hlist_node *node;
2424 2462
2425 if (v == SEQ_START_TOKEN) { 2463 if (v == SEQ_START_TOKEN) {
2426 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " 2464 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
@@ -2429,25 +2467,23 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
2429 return 0; 2467 return 0;
2430 } 2468 }
2431 2469
2432 if (iter->trie == trie_local) 2470 if (iter->trie == iter->trie_local)
2433 return 0; 2471 return 0;
2472
2434 if (IS_TNODE(l)) 2473 if (IS_TNODE(l))
2435 return 0; 2474 return 0;
2436 2475
2437 for (i=32; i>=0; i--) { 2476 hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
2438 struct leaf_info *li = find_leaf_info(l, i);
2439 struct fib_alias *fa; 2477 struct fib_alias *fa;
2440 __be32 mask, prefix; 2478 __be32 mask, prefix;
2441 2479
2442 if (!li)
2443 continue;
2444
2445 mask = inet_make_mask(li->plen); 2480 mask = inet_make_mask(li->plen);
2446 prefix = htonl(l->key); 2481 prefix = htonl(l->key);
2447 2482
2448 list_for_each_entry_rcu(fa, &li->falh, fa_list) { 2483 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2449 const struct fib_info *fi = fa->fa_info; 2484 const struct fib_info *fi = fa->fa_info;
2450 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi); 2485 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
2486 char bf[128];
2451 2487
2452 if (fa->fa_type == RTN_BROADCAST 2488 if (fa->fa_type == RTN_BROADCAST
2453 || fa->fa_type == RTN_MULTICAST) 2489 || fa->fa_type == RTN_MULTICAST)
@@ -2461,7 +2497,8 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
2461 fi->fib_nh->nh_gw, flags, 0, 0, 2497 fi->fib_nh->nh_gw, flags, 0, 0,
2462 fi->fib_priority, 2498 fi->fib_priority,
2463 mask, 2499 mask,
2464 (fi->fib_advmss ? fi->fib_advmss + 40 : 0), 2500 (fi->fib_advmss ?
2501 fi->fib_advmss + 40 : 0),
2465 fi->fib_window, 2502 fi->fib_window,
2466 fi->fib_rtt >> 3); 2503 fi->fib_rtt >> 3);
2467 else 2504 else
@@ -2486,8 +2523,8 @@ static const struct seq_operations fib_route_seq_ops = {
2486 2523
2487static int fib_route_seq_open(struct inode *inode, struct file *file) 2524static int fib_route_seq_open(struct inode *inode, struct file *file)
2488{ 2525{
2489 return seq_open_private(file, &fib_route_seq_ops, 2526 return seq_open_net(inode, file, &fib_route_seq_ops,
2490 sizeof(struct fib_trie_iter)); 2527 sizeof(struct fib_trie_iter));
2491} 2528}
2492 2529
2493static const struct file_operations fib_route_fops = { 2530static const struct file_operations fib_route_fops = {
@@ -2495,35 +2532,36 @@ static const struct file_operations fib_route_fops = {
2495 .open = fib_route_seq_open, 2532 .open = fib_route_seq_open,
2496 .read = seq_read, 2533 .read = seq_read,
2497 .llseek = seq_lseek, 2534 .llseek = seq_lseek,
2498 .release = seq_release_private, 2535 .release = seq_release_net,
2499}; 2536};
2500 2537
2501int __init fib_proc_init(void) 2538int __net_init fib_proc_init(struct net *net)
2502{ 2539{
2503 if (!proc_net_fops_create(&init_net, "fib_trie", S_IRUGO, &fib_trie_fops)) 2540 if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops))
2504 goto out1; 2541 goto out1;
2505 2542
2506 if (!proc_net_fops_create(&init_net, "fib_triestat", S_IRUGO, &fib_triestat_fops)) 2543 if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO,
2544 &fib_triestat_fops))
2507 goto out2; 2545 goto out2;
2508 2546
2509 if (!proc_net_fops_create(&init_net, "route", S_IRUGO, &fib_route_fops)) 2547 if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops))
2510 goto out3; 2548 goto out3;
2511 2549
2512 return 0; 2550 return 0;
2513 2551
2514out3: 2552out3:
2515 proc_net_remove(&init_net, "fib_triestat"); 2553 proc_net_remove(net, "fib_triestat");
2516out2: 2554out2:
2517 proc_net_remove(&init_net, "fib_trie"); 2555 proc_net_remove(net, "fib_trie");
2518out1: 2556out1:
2519 return -ENOMEM; 2557 return -ENOMEM;
2520} 2558}
2521 2559
2522void __init fib_proc_exit(void) 2560void __net_exit fib_proc_exit(struct net *net)
2523{ 2561{
2524 proc_net_remove(&init_net, "fib_trie"); 2562 proc_net_remove(net, "fib_trie");
2525 proc_net_remove(&init_net, "fib_triestat"); 2563 proc_net_remove(net, "fib_triestat");
2526 proc_net_remove(&init_net, "route"); 2564 proc_net_remove(net, "route");
2527} 2565}
2528 2566
2529#endif /* CONFIG_PROC_FS */ 2567#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 82baea026484..a7321a82df6d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -92,6 +92,7 @@
92#include <asm/system.h> 92#include <asm/system.h>
93#include <asm/uaccess.h> 93#include <asm/uaccess.h>
94#include <net/checksum.h> 94#include <net/checksum.h>
95#include <net/xfrm.h>
95 96
96/* 97/*
97 * Build xmit assembly blocks 98 * Build xmit assembly blocks
@@ -231,7 +232,7 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
231static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL; 232static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL;
232#define icmp_socket __get_cpu_var(__icmp_socket) 233#define icmp_socket __get_cpu_var(__icmp_socket)
233 234
234static __inline__ int icmp_xmit_lock(void) 235static inline int icmp_xmit_lock(void)
235{ 236{
236 local_bh_disable(); 237 local_bh_disable();
237 238
@@ -245,7 +246,7 @@ static __inline__ int icmp_xmit_lock(void)
245 return 0; 246 return 0;
246} 247}
247 248
248static void icmp_xmit_unlock(void) 249static inline void icmp_xmit_unlock(void)
249{ 250{
250 spin_unlock_bh(&icmp_socket->sk->sk_lock.slock); 251 spin_unlock_bh(&icmp_socket->sk->sk_lock.slock);
251} 252}
@@ -274,18 +275,19 @@ static void icmp_xmit_unlock(void)
274#define XRLIM_BURST_FACTOR 6 275#define XRLIM_BURST_FACTOR 6
275int xrlim_allow(struct dst_entry *dst, int timeout) 276int xrlim_allow(struct dst_entry *dst, int timeout)
276{ 277{
277 unsigned long now; 278 unsigned long now, token = dst->rate_tokens;
278 int rc = 0; 279 int rc = 0;
279 280
280 now = jiffies; 281 now = jiffies;
281 dst->rate_tokens += now - dst->rate_last; 282 token += now - dst->rate_last;
282 dst->rate_last = now; 283 dst->rate_last = now;
283 if (dst->rate_tokens > XRLIM_BURST_FACTOR * timeout) 284 if (token > XRLIM_BURST_FACTOR * timeout)
284 dst->rate_tokens = XRLIM_BURST_FACTOR * timeout; 285 token = XRLIM_BURST_FACTOR * timeout;
285 if (dst->rate_tokens >= timeout) { 286 if (token >= timeout) {
286 dst->rate_tokens -= timeout; 287 token -= timeout;
287 rc = 1; 288 rc = 1;
288 } 289 }
290 dst->rate_tokens = token;
289 return rc; 291 return rc;
290} 292}
291 293
@@ -403,7 +405,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
403 .tos = RT_TOS(ip_hdr(skb)->tos) } }, 405 .tos = RT_TOS(ip_hdr(skb)->tos) } },
404 .proto = IPPROTO_ICMP }; 406 .proto = IPPROTO_ICMP };
405 security_skb_classify_flow(skb, &fl); 407 security_skb_classify_flow(skb, &fl);
406 if (ip_route_output_key(&rt, &fl)) 408 if (ip_route_output_key(rt->u.dst.dev->nd_net, &rt, &fl))
407 goto out_unlock; 409 goto out_unlock;
408 } 410 }
409 if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type, 411 if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type,
@@ -435,9 +437,11 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
435 struct ipcm_cookie ipc; 437 struct ipcm_cookie ipc;
436 __be32 saddr; 438 __be32 saddr;
437 u8 tos; 439 u8 tos;
440 struct net *net;
438 441
439 if (!rt) 442 if (!rt)
440 goto out; 443 goto out;
444 net = rt->u.dst.dev->nd_net;
441 445
442 /* 446 /*
443 * Find the original header. It is expected to be valid, of course. 447 * Find the original header. It is expected to be valid, of course.
@@ -513,7 +517,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
513 struct net_device *dev = NULL; 517 struct net_device *dev = NULL;
514 518
515 if (rt->fl.iif && sysctl_icmp_errors_use_inbound_ifaddr) 519 if (rt->fl.iif && sysctl_icmp_errors_use_inbound_ifaddr)
516 dev = dev_get_by_index(&init_net, rt->fl.iif); 520 dev = dev_get_by_index(net, rt->fl.iif);
517 521
518 if (dev) { 522 if (dev) {
519 saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); 523 saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);
@@ -563,11 +567,71 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
563 } 567 }
564 } 568 }
565 }; 569 };
570 int err;
571 struct rtable *rt2;
572
566 security_skb_classify_flow(skb_in, &fl); 573 security_skb_classify_flow(skb_in, &fl);
567 if (ip_route_output_key(&rt, &fl)) 574 if (__ip_route_output_key(net, &rt, &fl))
575 goto out_unlock;
576
577 /* No need to clone since we're just using its address. */
578 rt2 = rt;
579
580 err = xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0);
581 switch (err) {
582 case 0:
583 if (rt != rt2)
584 goto route_done;
585 break;
586 case -EPERM:
587 rt = NULL;
588 break;
589 default:
590 goto out_unlock;
591 }
592
593 if (xfrm_decode_session_reverse(skb_in, &fl, AF_INET))
594 goto out_unlock;
595
596 if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL)
597 err = __ip_route_output_key(net, &rt2, &fl);
598 else {
599 struct flowi fl2 = {};
600 struct dst_entry *odst;
601
602 fl2.fl4_dst = fl.fl4_src;
603 if (ip_route_output_key(net, &rt2, &fl2))
604 goto out_unlock;
605
606 /* Ugh! */
607 odst = skb_in->dst;
608 err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src,
609 RT_TOS(tos), rt2->u.dst.dev);
610
611 dst_release(&rt2->u.dst);
612 rt2 = (struct rtable *)skb_in->dst;
613 skb_in->dst = odst;
614 }
615
616 if (err)
617 goto out_unlock;
618
619 err = xfrm_lookup((struct dst_entry **)&rt2, &fl, NULL,
620 XFRM_LOOKUP_ICMP);
621 if (err == -ENOENT) {
622 if (!rt)
623 goto out_unlock;
624 goto route_done;
625 }
626
627 dst_release(&rt->u.dst);
628 rt = rt2;
629
630 if (err)
568 goto out_unlock; 631 goto out_unlock;
569 } 632 }
570 633
634route_done:
571 if (!icmpv4_xrlim_allow(rt, type, code)) 635 if (!icmpv4_xrlim_allow(rt, type, code))
572 goto ende; 636 goto ende;
573 637
@@ -603,8 +667,10 @@ static void icmp_unreach(struct sk_buff *skb)
603 struct icmphdr *icmph; 667 struct icmphdr *icmph;
604 int hash, protocol; 668 int hash, protocol;
605 struct net_protocol *ipprot; 669 struct net_protocol *ipprot;
606 struct sock *raw_sk;
607 u32 info = 0; 670 u32 info = 0;
671 struct net *net;
672
673 net = skb->dst->dev->nd_net;
608 674
609 /* 675 /*
610 * Incomplete header ? 676 * Incomplete header ?
@@ -635,7 +701,7 @@ static void icmp_unreach(struct sk_buff *skb)
635 "and DF set.\n", 701 "and DF set.\n",
636 NIPQUAD(iph->daddr)); 702 NIPQUAD(iph->daddr));
637 } else { 703 } else {
638 info = ip_rt_frag_needed(iph, 704 info = ip_rt_frag_needed(net, iph,
639 ntohs(icmph->un.frag.mtu)); 705 ntohs(icmph->un.frag.mtu));
640 if (!info) 706 if (!info)
641 goto out; 707 goto out;
@@ -673,7 +739,7 @@ static void icmp_unreach(struct sk_buff *skb)
673 */ 739 */
674 740
675 if (!sysctl_icmp_ignore_bogus_error_responses && 741 if (!sysctl_icmp_ignore_bogus_error_responses &&
676 inet_addr_type(iph->daddr) == RTN_BROADCAST) { 742 inet_addr_type(net, iph->daddr) == RTN_BROADCAST) {
677 if (net_ratelimit()) 743 if (net_ratelimit())
678 printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP " 744 printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP "
679 "type %u, code %u " 745 "type %u, code %u "
@@ -697,21 +763,9 @@ static void icmp_unreach(struct sk_buff *skb)
697 /* 763 /*
698 * Deliver ICMP message to raw sockets. Pretty useless feature? 764 * Deliver ICMP message to raw sockets. Pretty useless feature?
699 */ 765 */
766 raw_icmp_error(skb, protocol, info);
700 767
701 /* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */
702 hash = protocol & (MAX_INET_PROTOS - 1); 768 hash = protocol & (MAX_INET_PROTOS - 1);
703 read_lock(&raw_v4_lock);
704 if ((raw_sk = sk_head(&raw_v4_htable[hash])) != NULL) {
705 while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr,
706 iph->saddr,
707 skb->dev->ifindex)) != NULL) {
708 raw_err(raw_sk, skb, info);
709 raw_sk = sk_next(raw_sk);
710 iph = (struct iphdr *)skb->data;
711 }
712 }
713 read_unlock(&raw_v4_lock);
714
715 rcu_read_lock(); 769 rcu_read_lock();
716 ipprot = rcu_dereference(inet_protos[hash]); 770 ipprot = rcu_dereference(inet_protos[hash]);
717 if (ipprot && ipprot->err_handler) 771 if (ipprot && ipprot->err_handler)
@@ -929,6 +983,25 @@ int icmp_rcv(struct sk_buff *skb)
929 struct icmphdr *icmph; 983 struct icmphdr *icmph;
930 struct rtable *rt = (struct rtable *)skb->dst; 984 struct rtable *rt = (struct rtable *)skb->dst;
931 985
986 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
987 int nh;
988
989 if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags &
990 XFRM_STATE_ICMP))
991 goto drop;
992
993 if (!pskb_may_pull(skb, sizeof(*icmph) + sizeof(struct iphdr)))
994 goto drop;
995
996 nh = skb_network_offset(skb);
997 skb_set_network_header(skb, sizeof(*icmph));
998
999 if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
1000 goto drop;
1001
1002 skb_set_network_header(skb, nh);
1003 }
1004
932 ICMP_INC_STATS_BH(ICMP_MIB_INMSGS); 1005 ICMP_INC_STATS_BH(ICMP_MIB_INMSGS);
933 1006
934 switch (skb->ip_summed) { 1007 switch (skb->ip_summed) {
@@ -942,8 +1015,7 @@ int icmp_rcv(struct sk_buff *skb)
942 goto error; 1015 goto error;
943 } 1016 }
944 1017
945 if (!pskb_pull(skb, sizeof(struct icmphdr))) 1018 __skb_pull(skb, sizeof(*icmph));
946 goto error;
947 1019
948 icmph = icmp_hdr(skb); 1020 icmph = icmp_hdr(skb);
949 1021
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 7dbc282d4f9f..994648be80ab 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -130,12 +130,12 @@
130 */ 130 */
131 131
132#define IGMP_V1_SEEN(in_dev) \ 132#define IGMP_V1_SEEN(in_dev) \
133 (IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 1 || \ 133 (IPV4_DEVCONF_ALL(in_dev->dev->nd_net, FORCE_IGMP_VERSION) == 1 || \
134 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \ 134 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \
135 ((in_dev)->mr_v1_seen && \ 135 ((in_dev)->mr_v1_seen && \
136 time_before(jiffies, (in_dev)->mr_v1_seen))) 136 time_before(jiffies, (in_dev)->mr_v1_seen)))
137#define IGMP_V2_SEEN(in_dev) \ 137#define IGMP_V2_SEEN(in_dev) \
138 (IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 2 || \ 138 (IPV4_DEVCONF_ALL(in_dev->dev->nd_net, FORCE_IGMP_VERSION) == 2 || \
139 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \ 139 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \
140 ((in_dev)->mr_v2_seen && \ 140 ((in_dev)->mr_v2_seen && \
141 time_before(jiffies, (in_dev)->mr_v2_seen))) 141 time_before(jiffies, (in_dev)->mr_v2_seen)))
@@ -301,7 +301,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
301 .nl_u = { .ip4_u = { 301 .nl_u = { .ip4_u = {
302 .daddr = IGMPV3_ALL_MCR } }, 302 .daddr = IGMPV3_ALL_MCR } },
303 .proto = IPPROTO_IGMP }; 303 .proto = IPPROTO_IGMP };
304 if (ip_route_output_key(&rt, &fl)) { 304 if (ip_route_output_key(&init_net, &rt, &fl)) {
305 kfree_skb(skb); 305 kfree_skb(skb);
306 return NULL; 306 return NULL;
307 } 307 }
@@ -349,17 +349,12 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
349 349
350static int igmpv3_sendpack(struct sk_buff *skb) 350static int igmpv3_sendpack(struct sk_buff *skb)
351{ 351{
352 struct iphdr *pip = ip_hdr(skb);
353 struct igmphdr *pig = igmp_hdr(skb); 352 struct igmphdr *pig = igmp_hdr(skb);
354 const int iplen = skb->tail - skb->network_header;
355 const int igmplen = skb->tail - skb->transport_header; 353 const int igmplen = skb->tail - skb->transport_header;
356 354
357 pip->tot_len = htons(iplen);
358 ip_send_check(pip);
359 pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen); 355 pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen);
360 356
361 return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dev, 357 return ip_local_out(skb);
362 dst_output);
363} 358}
364 359
365static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel) 360static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel)
@@ -650,7 +645,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
650 struct flowi fl = { .oif = dev->ifindex, 645 struct flowi fl = { .oif = dev->ifindex,
651 .nl_u = { .ip4_u = { .daddr = dst } }, 646 .nl_u = { .ip4_u = { .daddr = dst } },
652 .proto = IPPROTO_IGMP }; 647 .proto = IPPROTO_IGMP };
653 if (ip_route_output_key(&rt, &fl)) 648 if (ip_route_output_key(&init_net, &rt, &fl))
654 return -1; 649 return -1;
655 } 650 }
656 if (rt->rt_src == 0) { 651 if (rt->rt_src == 0) {
@@ -680,13 +675,11 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
680 iph->daddr = dst; 675 iph->daddr = dst;
681 iph->saddr = rt->rt_src; 676 iph->saddr = rt->rt_src;
682 iph->protocol = IPPROTO_IGMP; 677 iph->protocol = IPPROTO_IGMP;
683 iph->tot_len = htons(IGMP_SIZE);
684 ip_select_ident(iph, &rt->u.dst, NULL); 678 ip_select_ident(iph, &rt->u.dst, NULL);
685 ((u8*)&iph[1])[0] = IPOPT_RA; 679 ((u8*)&iph[1])[0] = IPOPT_RA;
686 ((u8*)&iph[1])[1] = 4; 680 ((u8*)&iph[1])[1] = 4;
687 ((u8*)&iph[1])[2] = 0; 681 ((u8*)&iph[1])[2] = 0;
688 ((u8*)&iph[1])[3] = 0; 682 ((u8*)&iph[1])[3] = 0;
689 ip_send_check(iph);
690 683
691 ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); 684 ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
692 ih->type=type; 685 ih->type=type;
@@ -695,8 +688,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
695 ih->group=group; 688 ih->group=group;
696 ih->csum=ip_compute_csum((void *)ih, sizeof(struct igmphdr)); 689 ih->csum=ip_compute_csum((void *)ih, sizeof(struct igmphdr));
697 690
698 return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, 691 return ip_local_out(skb);
699 dst_output);
700} 692}
701 693
702static void igmp_gq_timer_expire(unsigned long data) 694static void igmp_gq_timer_expire(unsigned long data)
@@ -1234,9 +1226,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
1234 spin_lock_init(&im->lock); 1226 spin_lock_init(&im->lock);
1235#ifdef CONFIG_IP_MULTICAST 1227#ifdef CONFIG_IP_MULTICAST
1236 im->tm_running=0; 1228 im->tm_running=0;
1237 init_timer(&im->timer); 1229 setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im);
1238 im->timer.data=(unsigned long)im;
1239 im->timer.function=&igmp_timer_expire;
1240 im->unsolicit_count = IGMP_Unsolicited_Report_Count; 1230 im->unsolicit_count = IGMP_Unsolicited_Report_Count;
1241 im->reporter = 0; 1231 im->reporter = 0;
1242 im->gsquery = 0; 1232 im->gsquery = 0;
@@ -1338,13 +1328,11 @@ void ip_mc_init_dev(struct in_device *in_dev)
1338 in_dev->mc_tomb = NULL; 1328 in_dev->mc_tomb = NULL;
1339#ifdef CONFIG_IP_MULTICAST 1329#ifdef CONFIG_IP_MULTICAST
1340 in_dev->mr_gq_running = 0; 1330 in_dev->mr_gq_running = 0;
1341 init_timer(&in_dev->mr_gq_timer); 1331 setup_timer(&in_dev->mr_gq_timer, igmp_gq_timer_expire,
1342 in_dev->mr_gq_timer.data=(unsigned long) in_dev; 1332 (unsigned long)in_dev);
1343 in_dev->mr_gq_timer.function=&igmp_gq_timer_expire;
1344 in_dev->mr_ifc_count = 0; 1333 in_dev->mr_ifc_count = 0;
1345 init_timer(&in_dev->mr_ifc_timer); 1334 setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire,
1346 in_dev->mr_ifc_timer.data=(unsigned long) in_dev; 1335 (unsigned long)in_dev);
1347 in_dev->mr_ifc_timer.function=&igmp_ifc_timer_expire;
1348 in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; 1336 in_dev->mr_qrv = IGMP_Unsolicited_Report_Count;
1349#endif 1337#endif
1350 1338
@@ -1401,19 +1389,19 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
1401 struct in_device *idev = NULL; 1389 struct in_device *idev = NULL;
1402 1390
1403 if (imr->imr_ifindex) { 1391 if (imr->imr_ifindex) {
1404 idev = inetdev_by_index(imr->imr_ifindex); 1392 idev = inetdev_by_index(&init_net, imr->imr_ifindex);
1405 if (idev) 1393 if (idev)
1406 __in_dev_put(idev); 1394 __in_dev_put(idev);
1407 return idev; 1395 return idev;
1408 } 1396 }
1409 if (imr->imr_address.s_addr) { 1397 if (imr->imr_address.s_addr) {
1410 dev = ip_dev_find(imr->imr_address.s_addr); 1398 dev = ip_dev_find(&init_net, imr->imr_address.s_addr);
1411 if (!dev) 1399 if (!dev)
1412 return NULL; 1400 return NULL;
1413 dev_put(dev); 1401 dev_put(dev);
1414 } 1402 }
1415 1403
1416 if (!dev && !ip_route_output_key(&rt, &fl)) { 1404 if (!dev && !ip_route_output_key(&init_net, &rt, &fl)) {
1417 dev = rt->u.dst.dev; 1405 dev = rt->u.dst.dev;
1418 ip_rt_put(rt); 1406 ip_rt_put(rt);
1419 } 1407 }
@@ -1754,7 +1742,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
1754 int ifindex; 1742 int ifindex;
1755 int count = 0; 1743 int count = 0;
1756 1744
1757 if (!MULTICAST(addr)) 1745 if (!ipv4_is_multicast(addr))
1758 return -EINVAL; 1746 return -EINVAL;
1759 1747
1760 rtnl_lock(); 1748 rtnl_lock();
@@ -1867,7 +1855,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
1867 int leavegroup = 0; 1855 int leavegroup = 0;
1868 int i, j, rv; 1856 int i, j, rv;
1869 1857
1870 if (!MULTICAST(addr)) 1858 if (!ipv4_is_multicast(addr))
1871 return -EINVAL; 1859 return -EINVAL;
1872 1860
1873 rtnl_lock(); 1861 rtnl_lock();
@@ -1997,7 +1985,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
1997 struct ip_sf_socklist *newpsl, *psl; 1985 struct ip_sf_socklist *newpsl, *psl;
1998 int leavegroup = 0; 1986 int leavegroup = 0;
1999 1987
2000 if (!MULTICAST(addr)) 1988 if (!ipv4_is_multicast(addr))
2001 return -EINVAL; 1989 return -EINVAL;
2002 if (msf->imsf_fmode != MCAST_INCLUDE && 1990 if (msf->imsf_fmode != MCAST_INCLUDE &&
2003 msf->imsf_fmode != MCAST_EXCLUDE) 1991 msf->imsf_fmode != MCAST_EXCLUDE)
@@ -2080,7 +2068,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
2080 struct inet_sock *inet = inet_sk(sk); 2068 struct inet_sock *inet = inet_sk(sk);
2081 struct ip_sf_socklist *psl; 2069 struct ip_sf_socklist *psl;
2082 2070
2083 if (!MULTICAST(addr)) 2071 if (!ipv4_is_multicast(addr))
2084 return -EINVAL; 2072 return -EINVAL;
2085 2073
2086 rtnl_lock(); 2074 rtnl_lock();
@@ -2142,7 +2130,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
2142 if (psin->sin_family != AF_INET) 2130 if (psin->sin_family != AF_INET)
2143 return -EINVAL; 2131 return -EINVAL;
2144 addr = psin->sin_addr.s_addr; 2132 addr = psin->sin_addr.s_addr;
2145 if (!MULTICAST(addr)) 2133 if (!ipv4_is_multicast(addr))
2146 return -EINVAL; 2134 return -EINVAL;
2147 2135
2148 rtnl_lock(); 2136 rtnl_lock();
@@ -2192,7 +2180,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
2192 struct ip_sf_socklist *psl; 2180 struct ip_sf_socklist *psl;
2193 int i; 2181 int i;
2194 2182
2195 if (!MULTICAST(loc_addr)) 2183 if (!ipv4_is_multicast(loc_addr))
2196 return 1; 2184 return 1;
2197 2185
2198 for (pmc=inet->mc_list; pmc; pmc=pmc->next) { 2186 for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
@@ -2234,7 +2222,7 @@ void ip_mc_drop_socket(struct sock *sk)
2234 struct in_device *in_dev; 2222 struct in_device *in_dev;
2235 inet->mc_list = iml->next; 2223 inet->mc_list = iml->next;
2236 2224
2237 in_dev = inetdev_by_index(iml->multi.imr_ifindex); 2225 in_dev = inetdev_by_index(&init_net, iml->multi.imr_ifindex);
2238 (void) ip_mc_leave_src(sk, iml, in_dev); 2226 (void) ip_mc_leave_src(sk, iml, in_dev);
2239 if (in_dev != NULL) { 2227 if (in_dev != NULL) {
2240 ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); 2228 ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
@@ -2341,6 +2329,7 @@ static struct ip_mc_list *igmp_mc_get_idx(struct seq_file *seq, loff_t pos)
2341} 2329}
2342 2330
2343static void *igmp_mc_seq_start(struct seq_file *seq, loff_t *pos) 2331static void *igmp_mc_seq_start(struct seq_file *seq, loff_t *pos)
2332 __acquires(dev_base_lock)
2344{ 2333{
2345 read_lock(&dev_base_lock); 2334 read_lock(&dev_base_lock);
2346 return *pos ? igmp_mc_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 2335 return *pos ? igmp_mc_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
@@ -2358,6 +2347,7 @@ static void *igmp_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2358} 2347}
2359 2348
2360static void igmp_mc_seq_stop(struct seq_file *seq, void *v) 2349static void igmp_mc_seq_stop(struct seq_file *seq, void *v)
2350 __releases(dev_base_lock)
2361{ 2351{
2362 struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); 2352 struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
2363 if (likely(state->in_dev != NULL)) { 2353 if (likely(state->in_dev != NULL)) {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8fb6ca23700a..7801cceb2d1b 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -277,18 +277,11 @@ void inet_csk_init_xmit_timers(struct sock *sk,
277{ 277{
278 struct inet_connection_sock *icsk = inet_csk(sk); 278 struct inet_connection_sock *icsk = inet_csk(sk);
279 279
280 init_timer(&icsk->icsk_retransmit_timer); 280 setup_timer(&icsk->icsk_retransmit_timer, retransmit_handler,
281 init_timer(&icsk->icsk_delack_timer); 281 (unsigned long)sk);
282 init_timer(&sk->sk_timer); 282 setup_timer(&icsk->icsk_delack_timer, delack_handler,
283 283 (unsigned long)sk);
284 icsk->icsk_retransmit_timer.function = retransmit_handler; 284 setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk);
285 icsk->icsk_delack_timer.function = delack_handler;
286 sk->sk_timer.function = keepalive_handler;
287
288 icsk->icsk_retransmit_timer.data =
289 icsk->icsk_delack_timer.data =
290 sk->sk_timer.data = (unsigned long)sk;
291
292 icsk->icsk_pending = icsk->icsk_ack.pending = 0; 285 icsk->icsk_pending = icsk->icsk_ack.pending = 0;
293} 286}
294 287
@@ -340,7 +333,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk,
340 .dport = ireq->rmt_port } } }; 333 .dport = ireq->rmt_port } } };
341 334
342 security_req_classify_flow(req, &fl); 335 security_req_classify_flow(req, &fl);
343 if (ip_route_output_flow(&rt, &fl, sk, 0)) { 336 if (ip_route_output_flow(&init_net, &rt, &fl, sk, 0)) {
344 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 337 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
345 return NULL; 338 return NULL;
346 } 339 }
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index e468e7a7aac4..605ed2cd7972 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -935,7 +935,7 @@ out_free_table:
935 935
936static void __exit inet_diag_exit(void) 936static void __exit inet_diag_exit(void)
937{ 937{
938 sock_release(idiagnl->sk_socket); 938 netlink_kernel_release(idiagnl);
939 kfree(inet_diag_table); 939 kfree(inet_diag_table);
940} 940}
941 941
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index e15e04fc6661..724d69aed031 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -47,7 +47,7 @@ static void inet_frag_secret_rebuild(unsigned long dummy)
47 } 47 }
48 write_unlock(&f->lock); 48 write_unlock(&f->lock);
49 49
50 mod_timer(&f->secret_timer, now + f->ctl->secret_interval); 50 mod_timer(&f->secret_timer, now + f->secret_interval);
51} 51}
52 52
53void inet_frags_init(struct inet_frags *f) 53void inet_frags_init(struct inet_frags *f)
@@ -57,35 +57,45 @@ void inet_frags_init(struct inet_frags *f)
57 for (i = 0; i < INETFRAGS_HASHSZ; i++) 57 for (i = 0; i < INETFRAGS_HASHSZ; i++)
58 INIT_HLIST_HEAD(&f->hash[i]); 58 INIT_HLIST_HEAD(&f->hash[i]);
59 59
60 INIT_LIST_HEAD(&f->lru_list);
61 rwlock_init(&f->lock); 60 rwlock_init(&f->lock);
62 61
63 f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ 62 f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
64 (jiffies ^ (jiffies >> 6))); 63 (jiffies ^ (jiffies >> 6)));
65 64
66 f->nqueues = 0; 65 setup_timer(&f->secret_timer, inet_frag_secret_rebuild,
67 atomic_set(&f->mem, 0); 66 (unsigned long)f);
68 67 f->secret_timer.expires = jiffies + f->secret_interval;
69 init_timer(&f->secret_timer);
70 f->secret_timer.function = inet_frag_secret_rebuild;
71 f->secret_timer.data = (unsigned long)f;
72 f->secret_timer.expires = jiffies + f->ctl->secret_interval;
73 add_timer(&f->secret_timer); 68 add_timer(&f->secret_timer);
74} 69}
75EXPORT_SYMBOL(inet_frags_init); 70EXPORT_SYMBOL(inet_frags_init);
76 71
72void inet_frags_init_net(struct netns_frags *nf)
73{
74 nf->nqueues = 0;
75 atomic_set(&nf->mem, 0);
76 INIT_LIST_HEAD(&nf->lru_list);
77}
78EXPORT_SYMBOL(inet_frags_init_net);
79
77void inet_frags_fini(struct inet_frags *f) 80void inet_frags_fini(struct inet_frags *f)
78{ 81{
79 del_timer(&f->secret_timer); 82 del_timer(&f->secret_timer);
80} 83}
81EXPORT_SYMBOL(inet_frags_fini); 84EXPORT_SYMBOL(inet_frags_fini);
82 85
86void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
87{
88 nf->low_thresh = 0;
89 inet_frag_evictor(nf, f);
90}
91EXPORT_SYMBOL(inet_frags_exit_net);
92
83static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) 93static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
84{ 94{
85 write_lock(&f->lock); 95 write_lock(&f->lock);
86 hlist_del(&fq->list); 96 hlist_del(&fq->list);
87 list_del(&fq->lru_list); 97 list_del(&fq->lru_list);
88 f->nqueues--; 98 fq->net->nqueues--;
89 write_unlock(&f->lock); 99 write_unlock(&f->lock);
90} 100}
91 101
@@ -103,13 +113,13 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
103 113
104EXPORT_SYMBOL(inet_frag_kill); 114EXPORT_SYMBOL(inet_frag_kill);
105 115
106static inline void frag_kfree_skb(struct inet_frags *f, struct sk_buff *skb, 116static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
107 int *work) 117 struct sk_buff *skb, int *work)
108{ 118{
109 if (work) 119 if (work)
110 *work -= skb->truesize; 120 *work -= skb->truesize;
111 121
112 atomic_sub(skb->truesize, &f->mem); 122 atomic_sub(skb->truesize, &nf->mem);
113 if (f->skb_free) 123 if (f->skb_free)
114 f->skb_free(skb); 124 f->skb_free(skb);
115 kfree_skb(skb); 125 kfree_skb(skb);
@@ -119,22 +129,24 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
119 int *work) 129 int *work)
120{ 130{
121 struct sk_buff *fp; 131 struct sk_buff *fp;
132 struct netns_frags *nf;
122 133
123 BUG_TRAP(q->last_in & COMPLETE); 134 BUG_TRAP(q->last_in & COMPLETE);
124 BUG_TRAP(del_timer(&q->timer) == 0); 135 BUG_TRAP(del_timer(&q->timer) == 0);
125 136
126 /* Release all fragment data. */ 137 /* Release all fragment data. */
127 fp = q->fragments; 138 fp = q->fragments;
139 nf = q->net;
128 while (fp) { 140 while (fp) {
129 struct sk_buff *xp = fp->next; 141 struct sk_buff *xp = fp->next;
130 142
131 frag_kfree_skb(f, fp, work); 143 frag_kfree_skb(nf, f, fp, work);
132 fp = xp; 144 fp = xp;
133 } 145 }
134 146
135 if (work) 147 if (work)
136 *work -= f->qsize; 148 *work -= f->qsize;
137 atomic_sub(f->qsize, &f->mem); 149 atomic_sub(f->qsize, &nf->mem);
138 150
139 if (f->destructor) 151 if (f->destructor)
140 f->destructor(q); 152 f->destructor(q);
@@ -143,20 +155,20 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
143} 155}
144EXPORT_SYMBOL(inet_frag_destroy); 156EXPORT_SYMBOL(inet_frag_destroy);
145 157
146int inet_frag_evictor(struct inet_frags *f) 158int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f)
147{ 159{
148 struct inet_frag_queue *q; 160 struct inet_frag_queue *q;
149 int work, evicted = 0; 161 int work, evicted = 0;
150 162
151 work = atomic_read(&f->mem) - f->ctl->low_thresh; 163 work = atomic_read(&nf->mem) - nf->low_thresh;
152 while (work > 0) { 164 while (work > 0) {
153 read_lock(&f->lock); 165 read_lock(&f->lock);
154 if (list_empty(&f->lru_list)) { 166 if (list_empty(&nf->lru_list)) {
155 read_unlock(&f->lock); 167 read_unlock(&f->lock);
156 break; 168 break;
157 } 169 }
158 170
159 q = list_first_entry(&f->lru_list, 171 q = list_first_entry(&nf->lru_list,
160 struct inet_frag_queue, lru_list); 172 struct inet_frag_queue, lru_list);
161 atomic_inc(&q->refcnt); 173 atomic_inc(&q->refcnt);
162 read_unlock(&f->lock); 174 read_unlock(&f->lock);
@@ -175,8 +187,9 @@ int inet_frag_evictor(struct inet_frags *f)
175} 187}
176EXPORT_SYMBOL(inet_frag_evictor); 188EXPORT_SYMBOL(inet_frag_evictor);
177 189
178static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, 190static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
179 struct inet_frags *f, unsigned int hash, void *arg) 191 struct inet_frag_queue *qp_in, struct inet_frags *f,
192 unsigned int hash, void *arg)
180{ 193{
181 struct inet_frag_queue *qp; 194 struct inet_frag_queue *qp;
182#ifdef CONFIG_SMP 195#ifdef CONFIG_SMP
@@ -190,7 +203,7 @@ static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in,
190 * promoted read lock to write lock. 203 * promoted read lock to write lock.
191 */ 204 */
192 hlist_for_each_entry(qp, n, &f->hash[hash], list) { 205 hlist_for_each_entry(qp, n, &f->hash[hash], list) {
193 if (f->match(qp, arg)) { 206 if (qp->net == nf && f->match(qp, arg)) {
194 atomic_inc(&qp->refcnt); 207 atomic_inc(&qp->refcnt);
195 write_unlock(&f->lock); 208 write_unlock(&f->lock);
196 qp_in->last_in |= COMPLETE; 209 qp_in->last_in |= COMPLETE;
@@ -200,18 +213,19 @@ static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in,
200 } 213 }
201#endif 214#endif
202 qp = qp_in; 215 qp = qp_in;
203 if (!mod_timer(&qp->timer, jiffies + f->ctl->timeout)) 216 if (!mod_timer(&qp->timer, jiffies + nf->timeout))
204 atomic_inc(&qp->refcnt); 217 atomic_inc(&qp->refcnt);
205 218
206 atomic_inc(&qp->refcnt); 219 atomic_inc(&qp->refcnt);
207 hlist_add_head(&qp->list, &f->hash[hash]); 220 hlist_add_head(&qp->list, &f->hash[hash]);
208 list_add_tail(&qp->lru_list, &f->lru_list); 221 list_add_tail(&qp->lru_list, &nf->lru_list);
209 f->nqueues++; 222 nf->nqueues++;
210 write_unlock(&f->lock); 223 write_unlock(&f->lock);
211 return qp; 224 return qp;
212} 225}
213 226
214static struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f, void *arg) 227static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
228 struct inet_frags *f, void *arg)
215{ 229{
216 struct inet_frag_queue *q; 230 struct inet_frag_queue *q;
217 231
@@ -220,35 +234,36 @@ static struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f, void *arg)
220 return NULL; 234 return NULL;
221 235
222 f->constructor(q, arg); 236 f->constructor(q, arg);
223 atomic_add(f->qsize, &f->mem); 237 atomic_add(f->qsize, &nf->mem);
224 setup_timer(&q->timer, f->frag_expire, (unsigned long)q); 238 setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
225 spin_lock_init(&q->lock); 239 spin_lock_init(&q->lock);
226 atomic_set(&q->refcnt, 1); 240 atomic_set(&q->refcnt, 1);
241 q->net = nf;
227 242
228 return q; 243 return q;
229} 244}
230 245
231static struct inet_frag_queue *inet_frag_create(struct inet_frags *f, 246static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
232 void *arg, unsigned int hash) 247 struct inet_frags *f, void *arg, unsigned int hash)
233{ 248{
234 struct inet_frag_queue *q; 249 struct inet_frag_queue *q;
235 250
236 q = inet_frag_alloc(f, arg); 251 q = inet_frag_alloc(nf, f, arg);
237 if (q == NULL) 252 if (q == NULL)
238 return NULL; 253 return NULL;
239 254
240 return inet_frag_intern(q, f, hash, arg); 255 return inet_frag_intern(nf, q, f, hash, arg);
241} 256}
242 257
243struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key, 258struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
244 unsigned int hash) 259 struct inet_frags *f, void *key, unsigned int hash)
245{ 260{
246 struct inet_frag_queue *q; 261 struct inet_frag_queue *q;
247 struct hlist_node *n; 262 struct hlist_node *n;
248 263
249 read_lock(&f->lock); 264 read_lock(&f->lock);
250 hlist_for_each_entry(q, n, &f->hash[hash], list) { 265 hlist_for_each_entry(q, n, &f->hash[hash], list) {
251 if (f->match(q, key)) { 266 if (q->net == nf && f->match(q, key)) {
252 atomic_inc(&q->refcnt); 267 atomic_inc(&q->refcnt);
253 read_unlock(&f->lock); 268 read_unlock(&f->lock);
254 return q; 269 return q;
@@ -256,6 +271,6 @@ struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key,
256 } 271 }
257 read_unlock(&f->lock); 272 read_unlock(&f->lock);
258 273
259 return inet_frag_create(f, key, hash); 274 return inet_frag_create(nf, f, key, hash);
260} 275}
261EXPORT_SYMBOL(inet_frag_find); 276EXPORT_SYMBOL(inet_frag_find);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 67704da04fc4..619c63c6948a 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -96,6 +96,7 @@ EXPORT_SYMBOL(inet_put_port);
96 * exclusive lock release). It should be ifdefed really. 96 * exclusive lock release). It should be ifdefed really.
97 */ 97 */
98void inet_listen_wlock(struct inet_hashinfo *hashinfo) 98void inet_listen_wlock(struct inet_hashinfo *hashinfo)
99 __acquires(hashinfo->lhash_lock)
99{ 100{
100 write_lock(&hashinfo->lhash_lock); 101 write_lock(&hashinfo->lhash_lock);
101 102
@@ -190,6 +191,44 @@ sherry_cache:
190} 191}
191EXPORT_SYMBOL_GPL(__inet_lookup_listener); 192EXPORT_SYMBOL_GPL(__inet_lookup_listener);
192 193
194struct sock * __inet_lookup_established(struct inet_hashinfo *hashinfo,
195 const __be32 saddr, const __be16 sport,
196 const __be32 daddr, const u16 hnum,
197 const int dif)
198{
199 INET_ADDR_COOKIE(acookie, saddr, daddr)
200 const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
201 struct sock *sk;
202 const struct hlist_node *node;
203 /* Optimize here for direct hit, only listening connections can
204 * have wildcards anyways.
205 */
206 unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport);
207 struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
208 rwlock_t *lock = inet_ehash_lockp(hashinfo, hash);
209
210 prefetch(head->chain.first);
211 read_lock(lock);
212 sk_for_each(sk, node, &head->chain) {
213 if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
214 goto hit; /* You sunk my battleship! */
215 }
216
217 /* Must check for a TIME_WAIT'er before going to listener hash. */
218 sk_for_each(sk, node, &head->twchain) {
219 if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
220 goto hit;
221 }
222 sk = NULL;
223out:
224 read_unlock(lock);
225 return sk;
226hit:
227 sock_hold(sk);
228 goto out;
229}
230EXPORT_SYMBOL_GPL(__inet_lookup_established);
231
193/* called with local bh disabled */ 232/* called with local bh disabled */
194static int __inet_check_established(struct inet_timewait_death_row *death_row, 233static int __inet_check_established(struct inet_timewait_death_row *death_row,
195 struct sock *sk, __u16 lport, 234 struct sock *sk, __u16 lport,
@@ -239,7 +278,7 @@ unique:
239 sk->sk_hash = hash; 278 sk->sk_hash = hash;
240 BUG_TRAP(sk_unhashed(sk)); 279 BUG_TRAP(sk_unhashed(sk));
241 __sk_add_node(sk, &head->chain); 280 __sk_add_node(sk, &head->chain);
242 sock_prot_inc_use(sk->sk_prot); 281 sock_prot_inuse_add(sk->sk_prot, 1);
243 write_unlock(lock); 282 write_unlock(lock);
244 283
245 if (twp) { 284 if (twp) {
@@ -267,6 +306,48 @@ static inline u32 inet_sk_port_offset(const struct sock *sk)
267 inet->dport); 306 inet->dport);
268} 307}
269 308
309void __inet_hash_nolisten(struct inet_hashinfo *hashinfo, struct sock *sk)
310{
311 struct hlist_head *list;
312 rwlock_t *lock;
313 struct inet_ehash_bucket *head;
314
315 BUG_TRAP(sk_unhashed(sk));
316
317 sk->sk_hash = inet_sk_ehashfn(sk);
318 head = inet_ehash_bucket(hashinfo, sk->sk_hash);
319 list = &head->chain;
320 lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
321
322 write_lock(lock);
323 __sk_add_node(sk, list);
324 sock_prot_inuse_add(sk->sk_prot, 1);
325 write_unlock(lock);
326}
327EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
328
329void __inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk)
330{
331 struct hlist_head *list;
332 rwlock_t *lock;
333
334 if (sk->sk_state != TCP_LISTEN) {
335 __inet_hash_nolisten(hashinfo, sk);
336 return;
337 }
338
339 BUG_TRAP(sk_unhashed(sk));
340 list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
341 lock = &hashinfo->lhash_lock;
342
343 inet_listen_wlock(hashinfo);
344 __sk_add_node(sk, list);
345 sock_prot_inuse_add(sk->sk_prot, 1);
346 write_unlock(lock);
347 wake_up(&hashinfo->lhash_wait);
348}
349EXPORT_SYMBOL_GPL(__inet_hash);
350
270/* 351/*
271 * Bind a port for a connect operation and hash it. 352 * Bind a port for a connect operation and hash it.
272 */ 353 */
@@ -334,7 +415,7 @@ ok:
334 inet_bind_hash(sk, tb, port); 415 inet_bind_hash(sk, tb, port);
335 if (sk_unhashed(sk)) { 416 if (sk_unhashed(sk)) {
336 inet_sk(sk)->sport = htons(port); 417 inet_sk(sk)->sport = htons(port);
337 __inet_hash(hinfo, sk, 0); 418 __inet_hash_nolisten(hinfo, sk);
338 } 419 }
339 spin_unlock(&head->lock); 420 spin_unlock(&head->lock);
340 421
@@ -351,7 +432,7 @@ ok:
351 tb = inet_csk(sk)->icsk_bind_hash; 432 tb = inet_csk(sk)->icsk_bind_hash;
352 spin_lock_bh(&head->lock); 433 spin_lock_bh(&head->lock);
353 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { 434 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
354 __inet_hash(hinfo, sk, 0); 435 __inet_hash_nolisten(hinfo, sk);
355 spin_unlock_bh(&head->lock); 436 spin_unlock_bh(&head->lock);
356 return 0; 437 return 0;
357 } else { 438 } else {
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index a60b99e0ebdc..876169f3a528 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -48,6 +48,21 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
48 inet_twsk_put(tw); 48 inet_twsk_put(tw);
49} 49}
50 50
51void inet_twsk_put(struct inet_timewait_sock *tw)
52{
53 if (atomic_dec_and_test(&tw->tw_refcnt)) {
54 struct module *owner = tw->tw_prot->owner;
55 twsk_destructor((struct sock *)tw);
56#ifdef SOCK_REFCNT_DEBUG
57 printk(KERN_DEBUG "%s timewait_sock %p released\n",
58 tw->tw_prot->name, tw);
59#endif
60 kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw);
61 module_put(owner);
62 }
63}
64EXPORT_SYMBOL_GPL(inet_twsk_put);
65
51/* 66/*
52 * Enter the time wait state. This is called with locally disabled BH. 67 * Enter the time wait state. This is called with locally disabled BH.
53 * Essentially we whip up a timewait bucket, copy the relevant info into it 68 * Essentially we whip up a timewait bucket, copy the relevant info into it
@@ -76,7 +91,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
76 91
77 /* Step 2: Remove SK from established hash. */ 92 /* Step 2: Remove SK from established hash. */
78 if (__sk_del_node_init(sk)) 93 if (__sk_del_node_init(sk))
79 sock_prot_dec_use(sk->sk_prot); 94 sock_prot_inuse_add(sk->sk_prot, -1);
80 95
81 /* Step 3: Hash TW into TIMEWAIT chain. */ 96 /* Step 3: Hash TW into TIMEWAIT chain. */
82 inet_twsk_add_node(tw, &ehead->twchain); 97 inet_twsk_add_node(tw, &ehead->twchain);
@@ -194,16 +209,14 @@ out:
194 209
195EXPORT_SYMBOL_GPL(inet_twdr_hangman); 210EXPORT_SYMBOL_GPL(inet_twdr_hangman);
196 211
197extern void twkill_slots_invalid(void);
198
199void inet_twdr_twkill_work(struct work_struct *work) 212void inet_twdr_twkill_work(struct work_struct *work)
200{ 213{
201 struct inet_timewait_death_row *twdr = 214 struct inet_timewait_death_row *twdr =
202 container_of(work, struct inet_timewait_death_row, twkill_work); 215 container_of(work, struct inet_timewait_death_row, twkill_work);
203 int i; 216 int i;
204 217
205 if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8)) 218 BUILD_BUG_ON((INET_TWDR_TWKILL_SLOTS - 1) >
206 twkill_slots_invalid(); 219 (sizeof(twdr->thread_slots) * 8));
207 220
208 while (twdr->thread_slots) { 221 while (twdr->thread_slots) {
209 spin_lock_bh(&twdr->death_lock); 222 spin_lock_bh(&twdr->death_lock);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 877da3ed52e2..0b3b328d82db 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -110,7 +110,7 @@ int ip_forward(struct sk_buff *skb)
110 110
111 skb->priority = rt_tos2priority(iph->tos); 111 skb->priority = rt_tos2priority(iph->tos);
112 112
113 return NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, rt->u.dst.dev, 113 return NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, rt->u.dst.dev,
114 ip_forward_finish); 114 ip_forward_finish);
115 115
116sr_failed: 116sr_failed:
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 2143bf30597a..a2e92f9709db 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -50,7 +50,7 @@
50 * as well. Or notify me, at least. --ANK 50 * as well. Or notify me, at least. --ANK
51 */ 51 */
52 52
53int sysctl_ipfrag_max_dist __read_mostly = 64; 53static int sysctl_ipfrag_max_dist __read_mostly = 64;
54 54
55struct ipfrag_skb_cb 55struct ipfrag_skb_cb
56{ 56{
@@ -74,35 +74,16 @@ struct ipq {
74 struct inet_peer *peer; 74 struct inet_peer *peer;
75}; 75};
76 76
77struct inet_frags_ctl ip4_frags_ctl __read_mostly = {
78 /*
79 * Fragment cache limits. We will commit 256K at one time. Should we
80 * cross that limit we will prune down to 192K. This should cope with
81 * even the most extreme cases without allowing an attacker to
82 * measurably harm machine performance.
83 */
84 .high_thresh = 256 * 1024,
85 .low_thresh = 192 * 1024,
86
87 /*
88 * Important NOTE! Fragment queue must be destroyed before MSL expires.
89 * RFC791 is wrong proposing to prolongate timer each fragment arrival
90 * by TTL.
91 */
92 .timeout = IP_FRAG_TIME,
93 .secret_interval = 10 * 60 * HZ,
94};
95
96static struct inet_frags ip4_frags; 77static struct inet_frags ip4_frags;
97 78
98int ip_frag_nqueues(void) 79int ip_frag_nqueues(struct net *net)
99{ 80{
100 return ip4_frags.nqueues; 81 return net->ipv4.frags.nqueues;
101} 82}
102 83
103int ip_frag_mem(void) 84int ip_frag_mem(struct net *net)
104{ 85{
105 return atomic_read(&ip4_frags.mem); 86 return atomic_read(&net->ipv4.frags.mem);
106} 87}
107 88
108static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, 89static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
@@ -142,11 +123,12 @@ static int ip4_frag_match(struct inet_frag_queue *q, void *a)
142} 123}
143 124
144/* Memory Tracking Functions. */ 125/* Memory Tracking Functions. */
145static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work) 126static __inline__ void frag_kfree_skb(struct netns_frags *nf,
127 struct sk_buff *skb, int *work)
146{ 128{
147 if (work) 129 if (work)
148 *work -= skb->truesize; 130 *work -= skb->truesize;
149 atomic_sub(skb->truesize, &ip4_frags.mem); 131 atomic_sub(skb->truesize, &nf->mem);
150 kfree_skb(skb); 132 kfree_skb(skb);
151} 133}
152 134
@@ -192,11 +174,11 @@ static void ipq_kill(struct ipq *ipq)
192/* Memory limiting on fragments. Evictor trashes the oldest 174/* Memory limiting on fragments. Evictor trashes the oldest
193 * fragment queue until we are back under the threshold. 175 * fragment queue until we are back under the threshold.
194 */ 176 */
195static void ip_evictor(void) 177static void ip_evictor(struct net *net)
196{ 178{
197 int evicted; 179 int evicted;
198 180
199 evicted = inet_frag_evictor(&ip4_frags); 181 evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags);
200 if (evicted) 182 if (evicted)
201 IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted); 183 IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted);
202} 184}
@@ -236,7 +218,7 @@ out:
236/* Find the correct entry in the "incomplete datagrams" queue for 218/* Find the correct entry in the "incomplete datagrams" queue for
237 * this IP datagram, and create new one, if nothing is found. 219 * this IP datagram, and create new one, if nothing is found.
238 */ 220 */
239static inline struct ipq *ip_find(struct iphdr *iph, u32 user) 221static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
240{ 222{
241 struct inet_frag_queue *q; 223 struct inet_frag_queue *q;
242 struct ip4_create_arg arg; 224 struct ip4_create_arg arg;
@@ -246,7 +228,7 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
246 arg.user = user; 228 arg.user = user;
247 hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); 229 hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
248 230
249 q = inet_frag_find(&ip4_frags, &arg, hash); 231 q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
250 if (q == NULL) 232 if (q == NULL)
251 goto out_nomem; 233 goto out_nomem;
252 234
@@ -286,7 +268,7 @@ static int ip_frag_reinit(struct ipq *qp)
286{ 268{
287 struct sk_buff *fp; 269 struct sk_buff *fp;
288 270
289 if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout)) { 271 if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
290 atomic_inc(&qp->q.refcnt); 272 atomic_inc(&qp->q.refcnt);
291 return -ETIMEDOUT; 273 return -ETIMEDOUT;
292 } 274 }
@@ -294,7 +276,7 @@ static int ip_frag_reinit(struct ipq *qp)
294 fp = qp->q.fragments; 276 fp = qp->q.fragments;
295 do { 277 do {
296 struct sk_buff *xp = fp->next; 278 struct sk_buff *xp = fp->next;
297 frag_kfree_skb(fp, NULL); 279 frag_kfree_skb(qp->q.net, fp, NULL);
298 fp = xp; 280 fp = xp;
299 } while (fp); 281 } while (fp);
300 282
@@ -431,7 +413,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
431 qp->q.fragments = next; 413 qp->q.fragments = next;
432 414
433 qp->q.meat -= free_it->len; 415 qp->q.meat -= free_it->len;
434 frag_kfree_skb(free_it, NULL); 416 frag_kfree_skb(qp->q.net, free_it, NULL);
435 } 417 }
436 } 418 }
437 419
@@ -451,7 +433,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
451 } 433 }
452 qp->q.stamp = skb->tstamp; 434 qp->q.stamp = skb->tstamp;
453 qp->q.meat += skb->len; 435 qp->q.meat += skb->len;
454 atomic_add(skb->truesize, &ip4_frags.mem); 436 atomic_add(skb->truesize, &qp->q.net->mem);
455 if (offset == 0) 437 if (offset == 0)
456 qp->q.last_in |= FIRST_IN; 438 qp->q.last_in |= FIRST_IN;
457 439
@@ -459,7 +441,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
459 return ip_frag_reasm(qp, prev, dev); 441 return ip_frag_reasm(qp, prev, dev);
460 442
461 write_lock(&ip4_frags.lock); 443 write_lock(&ip4_frags.lock);
462 list_move_tail(&qp->q.lru_list, &ip4_frags.lru_list); 444 list_move_tail(&qp->q.lru_list, &qp->q.net->lru_list);
463 write_unlock(&ip4_frags.lock); 445 write_unlock(&ip4_frags.lock);
464 return -EINPROGRESS; 446 return -EINPROGRESS;
465 447
@@ -534,12 +516,12 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
534 head->len -= clone->len; 516 head->len -= clone->len;
535 clone->csum = 0; 517 clone->csum = 0;
536 clone->ip_summed = head->ip_summed; 518 clone->ip_summed = head->ip_summed;
537 atomic_add(clone->truesize, &ip4_frags.mem); 519 atomic_add(clone->truesize, &qp->q.net->mem);
538 } 520 }
539 521
540 skb_shinfo(head)->frag_list = head->next; 522 skb_shinfo(head)->frag_list = head->next;
541 skb_push(head, head->data - skb_network_header(head)); 523 skb_push(head, head->data - skb_network_header(head));
542 atomic_sub(head->truesize, &ip4_frags.mem); 524 atomic_sub(head->truesize, &qp->q.net->mem);
543 525
544 for (fp=head->next; fp; fp = fp->next) { 526 for (fp=head->next; fp; fp = fp->next) {
545 head->data_len += fp->len; 527 head->data_len += fp->len;
@@ -549,7 +531,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
549 else if (head->ip_summed == CHECKSUM_COMPLETE) 531 else if (head->ip_summed == CHECKSUM_COMPLETE)
550 head->csum = csum_add(head->csum, fp->csum); 532 head->csum = csum_add(head->csum, fp->csum);
551 head->truesize += fp->truesize; 533 head->truesize += fp->truesize;
552 atomic_sub(fp->truesize, &ip4_frags.mem); 534 atomic_sub(fp->truesize, &qp->q.net->mem);
553 } 535 }
554 536
555 head->next = NULL; 537 head->next = NULL;
@@ -582,15 +564,17 @@ out_fail:
582int ip_defrag(struct sk_buff *skb, u32 user) 564int ip_defrag(struct sk_buff *skb, u32 user)
583{ 565{
584 struct ipq *qp; 566 struct ipq *qp;
567 struct net *net;
585 568
586 IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); 569 IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
587 570
571 net = skb->dev->nd_net;
588 /* Start by cleaning up the memory. */ 572 /* Start by cleaning up the memory. */
589 if (atomic_read(&ip4_frags.mem) > ip4_frags_ctl.high_thresh) 573 if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh)
590 ip_evictor(); 574 ip_evictor(net);
591 575
592 /* Lookup (or create) queue header */ 576 /* Lookup (or create) queue header */
593 if ((qp = ip_find(ip_hdr(skb), user)) != NULL) { 577 if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) {
594 int ret; 578 int ret;
595 579
596 spin_lock(&qp->q.lock); 580 spin_lock(&qp->q.lock);
@@ -607,9 +591,142 @@ int ip_defrag(struct sk_buff *skb, u32 user)
607 return -ENOMEM; 591 return -ENOMEM;
608} 592}
609 593
594#ifdef CONFIG_SYSCTL
595static int zero;
596
597static struct ctl_table ip4_frags_ctl_table[] = {
598 {
599 .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH,
600 .procname = "ipfrag_high_thresh",
601 .data = &init_net.ipv4.frags.high_thresh,
602 .maxlen = sizeof(int),
603 .mode = 0644,
604 .proc_handler = &proc_dointvec
605 },
606 {
607 .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH,
608 .procname = "ipfrag_low_thresh",
609 .data = &init_net.ipv4.frags.low_thresh,
610 .maxlen = sizeof(int),
611 .mode = 0644,
612 .proc_handler = &proc_dointvec
613 },
614 {
615 .ctl_name = NET_IPV4_IPFRAG_TIME,
616 .procname = "ipfrag_time",
617 .data = &init_net.ipv4.frags.timeout,
618 .maxlen = sizeof(int),
619 .mode = 0644,
620 .proc_handler = &proc_dointvec_jiffies,
621 .strategy = &sysctl_jiffies
622 },
623 {
624 .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL,
625 .procname = "ipfrag_secret_interval",
626 .data = &ip4_frags.secret_interval,
627 .maxlen = sizeof(int),
628 .mode = 0644,
629 .proc_handler = &proc_dointvec_jiffies,
630 .strategy = &sysctl_jiffies
631 },
632 {
633 .procname = "ipfrag_max_dist",
634 .data = &sysctl_ipfrag_max_dist,
635 .maxlen = sizeof(int),
636 .mode = 0644,
637 .proc_handler = &proc_dointvec_minmax,
638 .extra1 = &zero
639 },
640 { }
641};
642
643static int ip4_frags_ctl_register(struct net *net)
644{
645 struct ctl_table *table;
646 struct ctl_table_header *hdr;
647
648 table = ip4_frags_ctl_table;
649 if (net != &init_net) {
650 table = kmemdup(table, sizeof(ip4_frags_ctl_table), GFP_KERNEL);
651 if (table == NULL)
652 goto err_alloc;
653
654 table[0].data = &net->ipv4.frags.high_thresh;
655 table[1].data = &net->ipv4.frags.low_thresh;
656 table[2].data = &net->ipv4.frags.timeout;
657 table[3].mode &= ~0222;
658 table[4].mode &= ~0222;
659 }
660
661 hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table);
662 if (hdr == NULL)
663 goto err_reg;
664
665 net->ipv4.frags_hdr = hdr;
666 return 0;
667
668err_reg:
669 if (net != &init_net)
670 kfree(table);
671err_alloc:
672 return -ENOMEM;
673}
674
675static void ip4_frags_ctl_unregister(struct net *net)
676{
677 struct ctl_table *table;
678
679 table = net->ipv4.frags_hdr->ctl_table_arg;
680 unregister_net_sysctl_table(net->ipv4.frags_hdr);
681 kfree(table);
682}
683#else
684static inline int ip4_frags_ctl_register(struct net *net)
685{
686 return 0;
687}
688
689static inline void ip4_frags_ctl_unregister(struct net *net)
690{
691}
692#endif
693
694static int ipv4_frags_init_net(struct net *net)
695{
696 /*
697 * Fragment cache limits. We will commit 256K at one time. Should we
698 * cross that limit we will prune down to 192K. This should cope with
699 * even the most extreme cases without allowing an attacker to
700 * measurably harm machine performance.
701 */
702 net->ipv4.frags.high_thresh = 256 * 1024;
703 net->ipv4.frags.low_thresh = 192 * 1024;
704 /*
705 * Important NOTE! Fragment queue must be destroyed before MSL expires.
706 * RFC791 is wrong proposing to prolongate timer each fragment arrival
707 * by TTL.
708 */
709 net->ipv4.frags.timeout = IP_FRAG_TIME;
710
711 inet_frags_init_net(&net->ipv4.frags);
712
713 return ip4_frags_ctl_register(net);
714}
715
716static void ipv4_frags_exit_net(struct net *net)
717{
718 ip4_frags_ctl_unregister(net);
719 inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
720}
721
722static struct pernet_operations ip4_frags_ops = {
723 .init = ipv4_frags_init_net,
724 .exit = ipv4_frags_exit_net,
725};
726
610void __init ipfrag_init(void) 727void __init ipfrag_init(void)
611{ 728{
612 ip4_frags.ctl = &ip4_frags_ctl; 729 register_pernet_subsys(&ip4_frags_ops);
613 ip4_frags.hashfn = ip4_hashfn; 730 ip4_frags.hashfn = ip4_hashfn;
614 ip4_frags.constructor = ip4_frag_init; 731 ip4_frags.constructor = ip4_frag_init;
615 ip4_frags.destructor = ip4_frag_free; 732 ip4_frags.destructor = ip4_frag_free;
@@ -617,6 +734,7 @@ void __init ipfrag_init(void)
617 ip4_frags.qsize = sizeof(struct ipq); 734 ip4_frags.qsize = sizeof(struct ipq);
618 ip4_frags.match = ip4_frag_match; 735 ip4_frags.match = ip4_frag_match;
619 ip4_frags.frag_expire = ip_expire; 736 ip4_frags.frag_expire = ip_expire;
737 ip4_frags.secret_interval = 10 * 60 * HZ;
620 inet_frags_init(&ip4_frags); 738 inet_frags_init(&ip4_frags);
621} 739}
622 740
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 4b93f32de10d..63f691719353 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -176,7 +176,8 @@ static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be3
176 } 176 }
177 for (t = tunnels_l[h1]; t; t = t->next) { 177 for (t = tunnels_l[h1]; t; t = t->next) {
178 if (local == t->parms.iph.saddr || 178 if (local == t->parms.iph.saddr ||
179 (local == t->parms.iph.daddr && MULTICAST(local))) { 179 (local == t->parms.iph.daddr &&
180 ipv4_is_multicast(local))) {
180 if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) 181 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
181 return t; 182 return t;
182 } 183 }
@@ -201,7 +202,7 @@ static struct ip_tunnel **__ipgre_bucket(struct ip_tunnel_parm *parms)
201 202
202 if (local) 203 if (local)
203 prio |= 1; 204 prio |= 1;
204 if (remote && !MULTICAST(remote)) { 205 if (remote && !ipv4_is_multicast(remote)) {
205 prio |= 2; 206 prio |= 2;
206 h ^= HASH(remote); 207 h ^= HASH(remote);
207 } 208 }
@@ -367,7 +368,8 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
367 368
368 read_lock(&ipgre_lock); 369 read_lock(&ipgre_lock);
369 t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((__be32*)p) + (grehlen>>2) - 1) : 0); 370 t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((__be32*)p) + (grehlen>>2) - 1) : 0);
370 if (t == NULL || t->parms.iph.daddr == 0 || MULTICAST(t->parms.iph.daddr)) 371 if (t == NULL || t->parms.iph.daddr == 0 ||
372 ipv4_is_multicast(t->parms.iph.daddr))
371 goto out; 373 goto out;
372 374
373 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 375 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
@@ -478,7 +480,7 @@ out:
478 fl.fl4_dst = eiph->saddr; 480 fl.fl4_dst = eiph->saddr;
479 fl.fl4_tos = RT_TOS(eiph->tos); 481 fl.fl4_tos = RT_TOS(eiph->tos);
480 fl.proto = IPPROTO_GRE; 482 fl.proto = IPPROTO_GRE;
481 if (ip_route_output_key(&rt, &fl)) { 483 if (ip_route_output_key(&init_net, &rt, &fl)) {
482 kfree_skb(skb2); 484 kfree_skb(skb2);
483 return; 485 return;
484 } 486 }
@@ -491,7 +493,7 @@ out:
491 fl.fl4_dst = eiph->daddr; 493 fl.fl4_dst = eiph->daddr;
492 fl.fl4_src = eiph->saddr; 494 fl.fl4_src = eiph->saddr;
493 fl.fl4_tos = eiph->tos; 495 fl.fl4_tos = eiph->tos;
494 if (ip_route_output_key(&rt, &fl) || 496 if (ip_route_output_key(&init_net, &rt, &fl) ||
495 rt->u.dst.dev->type != ARPHRD_IPGRE) { 497 rt->u.dst.dev->type != ARPHRD_IPGRE) {
496 ip_rt_put(rt); 498 ip_rt_put(rt);
497 kfree_skb(skb2); 499 kfree_skb(skb2);
@@ -619,7 +621,7 @@ static int ipgre_rcv(struct sk_buff *skb)
619 skb_postpull_rcsum(skb, skb_transport_header(skb), offset); 621 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
620 skb->pkt_type = PACKET_HOST; 622 skb->pkt_type = PACKET_HOST;
621#ifdef CONFIG_NET_IPGRE_BROADCAST 623#ifdef CONFIG_NET_IPGRE_BROADCAST
622 if (MULTICAST(iph->daddr)) { 624 if (ipv4_is_multicast(iph->daddr)) {
623 /* Looped back packet, drop it! */ 625 /* Looped back packet, drop it! */
624 if (((struct rtable*)skb->dst)->fl.iif == 0) 626 if (((struct rtable*)skb->dst)->fl.iif == 0)
625 goto drop; 627 goto drop;
@@ -746,7 +748,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
746 .saddr = tiph->saddr, 748 .saddr = tiph->saddr,
747 .tos = RT_TOS(tos) } }, 749 .tos = RT_TOS(tos) } },
748 .proto = IPPROTO_GRE }; 750 .proto = IPPROTO_GRE };
749 if (ip_route_output_key(&rt, &fl)) { 751 if (ip_route_output_key(&init_net, &rt, &fl)) {
750 tunnel->stat.tx_carrier_errors++; 752 tunnel->stat.tx_carrier_errors++;
751 goto tx_error; 753 goto tx_error;
752 } 754 }
@@ -783,7 +785,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
783 struct rt6_info *rt6 = (struct rt6_info*)skb->dst; 785 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
784 786
785 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) { 787 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
786 if ((tunnel->parms.iph.daddr && !MULTICAST(tunnel->parms.iph.daddr)) || 788 if ((tunnel->parms.iph.daddr &&
789 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
787 rt6->rt6i_dst.plen == 128) { 790 rt6->rt6i_dst.plen == 128) {
788 rt6->rt6i_flags |= RTF_MODIFIED; 791 rt6->rt6i_flags |= RTF_MODIFIED;
789 skb->dst->metrics[RTAX_MTU-1] = mtu; 792 skb->dst->metrics[RTAX_MTU-1] = mtu;
@@ -896,6 +899,59 @@ tx_error:
896 return 0; 899 return 0;
897} 900}
898 901
902static void ipgre_tunnel_bind_dev(struct net_device *dev)
903{
904 struct net_device *tdev = NULL;
905 struct ip_tunnel *tunnel;
906 struct iphdr *iph;
907 int hlen = LL_MAX_HEADER;
908 int mtu = ETH_DATA_LEN;
909 int addend = sizeof(struct iphdr) + 4;
910
911 tunnel = netdev_priv(dev);
912 iph = &tunnel->parms.iph;
913
914 /* Guess output device to choose reasonable mtu and hard_header_len */
915
916 if (iph->daddr) {
917 struct flowi fl = { .oif = tunnel->parms.link,
918 .nl_u = { .ip4_u =
919 { .daddr = iph->daddr,
920 .saddr = iph->saddr,
921 .tos = RT_TOS(iph->tos) } },
922 .proto = IPPROTO_GRE };
923 struct rtable *rt;
924 if (!ip_route_output_key(&init_net, &rt, &fl)) {
925 tdev = rt->u.dst.dev;
926 ip_rt_put(rt);
927 }
928 dev->flags |= IFF_POINTOPOINT;
929 }
930
931 if (!tdev && tunnel->parms.link)
932 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
933
934 if (tdev) {
935 hlen = tdev->hard_header_len;
936 mtu = tdev->mtu;
937 }
938 dev->iflink = tunnel->parms.link;
939
940 /* Precalculate GRE options length */
941 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
942 if (tunnel->parms.o_flags&GRE_CSUM)
943 addend += 4;
944 if (tunnel->parms.o_flags&GRE_KEY)
945 addend += 4;
946 if (tunnel->parms.o_flags&GRE_SEQ)
947 addend += 4;
948 }
949 dev->hard_header_len = hlen + addend;
950 dev->mtu = mtu - addend;
951 tunnel->hlen = addend;
952
953}
954
899static int 955static int
900ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 956ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
901{ 957{
@@ -956,7 +1012,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
956 1012
957 t = netdev_priv(dev); 1013 t = netdev_priv(dev);
958 1014
959 if (MULTICAST(p.iph.daddr)) 1015 if (ipv4_is_multicast(p.iph.daddr))
960 nflags = IFF_BROADCAST; 1016 nflags = IFF_BROADCAST;
961 else if (p.iph.daddr) 1017 else if (p.iph.daddr)
962 nflags = IFF_POINTOPOINT; 1018 nflags = IFF_POINTOPOINT;
@@ -983,6 +1039,11 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
983 t->parms.iph.ttl = p.iph.ttl; 1039 t->parms.iph.ttl = p.iph.ttl;
984 t->parms.iph.tos = p.iph.tos; 1040 t->parms.iph.tos = p.iph.tos;
985 t->parms.iph.frag_off = p.iph.frag_off; 1041 t->parms.iph.frag_off = p.iph.frag_off;
1042 if (t->parms.link != p.link) {
1043 t->parms.link = p.link;
1044 ipgre_tunnel_bind_dev(dev);
1045 netdev_state_change(dev);
1046 }
986 } 1047 }
987 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) 1048 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
988 err = -EFAULT; 1049 err = -EFAULT;
@@ -1085,7 +1146,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1085 memcpy(&iph->daddr, daddr, 4); 1146 memcpy(&iph->daddr, daddr, 4);
1086 return t->hlen; 1147 return t->hlen;
1087 } 1148 }
1088 if (iph->daddr && !MULTICAST(iph->daddr)) 1149 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1089 return t->hlen; 1150 return t->hlen;
1090 1151
1091 return -t->hlen; 1152 return -t->hlen;
@@ -1108,7 +1169,7 @@ static int ipgre_open(struct net_device *dev)
1108{ 1169{
1109 struct ip_tunnel *t = netdev_priv(dev); 1170 struct ip_tunnel *t = netdev_priv(dev);
1110 1171
1111 if (MULTICAST(t->parms.iph.daddr)) { 1172 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1112 struct flowi fl = { .oif = t->parms.link, 1173 struct flowi fl = { .oif = t->parms.link,
1113 .nl_u = { .ip4_u = 1174 .nl_u = { .ip4_u =
1114 { .daddr = t->parms.iph.daddr, 1175 { .daddr = t->parms.iph.daddr,
@@ -1116,7 +1177,7 @@ static int ipgre_open(struct net_device *dev)
1116 .tos = RT_TOS(t->parms.iph.tos) } }, 1177 .tos = RT_TOS(t->parms.iph.tos) } },
1117 .proto = IPPROTO_GRE }; 1178 .proto = IPPROTO_GRE };
1118 struct rtable *rt; 1179 struct rtable *rt;
1119 if (ip_route_output_key(&rt, &fl)) 1180 if (ip_route_output_key(&init_net, &rt, &fl))
1120 return -EADDRNOTAVAIL; 1181 return -EADDRNOTAVAIL;
1121 dev = rt->u.dst.dev; 1182 dev = rt->u.dst.dev;
1122 ip_rt_put(rt); 1183 ip_rt_put(rt);
@@ -1131,8 +1192,9 @@ static int ipgre_open(struct net_device *dev)
1131static int ipgre_close(struct net_device *dev) 1192static int ipgre_close(struct net_device *dev)
1132{ 1193{
1133 struct ip_tunnel *t = netdev_priv(dev); 1194 struct ip_tunnel *t = netdev_priv(dev);
1134 if (MULTICAST(t->parms.iph.daddr) && t->mlink) { 1195 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1135 struct in_device *in_dev = inetdev_by_index(t->mlink); 1196 struct in_device *in_dev;
1197 in_dev = inetdev_by_index(dev->nd_net, t->mlink);
1136 if (in_dev) { 1198 if (in_dev) {
1137 ip_mc_dec_group(in_dev, t->parms.iph.daddr); 1199 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1138 in_dev_put(in_dev); 1200 in_dev_put(in_dev);
@@ -1162,12 +1224,8 @@ static void ipgre_tunnel_setup(struct net_device *dev)
1162 1224
1163static int ipgre_tunnel_init(struct net_device *dev) 1225static int ipgre_tunnel_init(struct net_device *dev)
1164{ 1226{
1165 struct net_device *tdev = NULL;
1166 struct ip_tunnel *tunnel; 1227 struct ip_tunnel *tunnel;
1167 struct iphdr *iph; 1228 struct iphdr *iph;
1168 int hlen = LL_MAX_HEADER;
1169 int mtu = ETH_DATA_LEN;
1170 int addend = sizeof(struct iphdr) + 4;
1171 1229
1172 tunnel = netdev_priv(dev); 1230 tunnel = netdev_priv(dev);
1173 iph = &tunnel->parms.iph; 1231 iph = &tunnel->parms.iph;
@@ -1178,25 +1236,11 @@ static int ipgre_tunnel_init(struct net_device *dev)
1178 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 1236 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1179 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 1237 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1180 1238
1181 /* Guess output device to choose reasonable mtu and hard_header_len */ 1239 ipgre_tunnel_bind_dev(dev);
1182 1240
1183 if (iph->daddr) { 1241 if (iph->daddr) {
1184 struct flowi fl = { .oif = tunnel->parms.link,
1185 .nl_u = { .ip4_u =
1186 { .daddr = iph->daddr,
1187 .saddr = iph->saddr,
1188 .tos = RT_TOS(iph->tos) } },
1189 .proto = IPPROTO_GRE };
1190 struct rtable *rt;
1191 if (!ip_route_output_key(&rt, &fl)) {
1192 tdev = rt->u.dst.dev;
1193 ip_rt_put(rt);
1194 }
1195
1196 dev->flags |= IFF_POINTOPOINT;
1197
1198#ifdef CONFIG_NET_IPGRE_BROADCAST 1242#ifdef CONFIG_NET_IPGRE_BROADCAST
1199 if (MULTICAST(iph->daddr)) { 1243 if (ipv4_is_multicast(iph->daddr)) {
1200 if (!iph->saddr) 1244 if (!iph->saddr)
1201 return -EINVAL; 1245 return -EINVAL;
1202 dev->flags = IFF_BROADCAST; 1246 dev->flags = IFF_BROADCAST;
@@ -1205,31 +1249,9 @@ static int ipgre_tunnel_init(struct net_device *dev)
1205 dev->stop = ipgre_close; 1249 dev->stop = ipgre_close;
1206 } 1250 }
1207#endif 1251#endif
1208 } else { 1252 } else
1209 dev->header_ops = &ipgre_header_ops; 1253 dev->header_ops = &ipgre_header_ops;
1210 }
1211
1212 if (!tdev && tunnel->parms.link)
1213 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
1214
1215 if (tdev) {
1216 hlen = tdev->hard_header_len;
1217 mtu = tdev->mtu;
1218 }
1219 dev->iflink = tunnel->parms.link;
1220 1254
1221 /* Precalculate GRE options length */
1222 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1223 if (tunnel->parms.o_flags&GRE_CSUM)
1224 addend += 4;
1225 if (tunnel->parms.o_flags&GRE_KEY)
1226 addend += 4;
1227 if (tunnel->parms.o_flags&GRE_SEQ)
1228 addend += 4;
1229 }
1230 dev->hard_header_len = hlen + addend;
1231 dev->mtu = mtu - addend;
1232 tunnel->hlen = addend;
1233 return 0; 1255 return 0;
1234} 1256}
1235 1257
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 168c871fcd79..65631391d479 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -204,22 +204,14 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
204 204
205 rcu_read_lock(); 205 rcu_read_lock();
206 { 206 {
207 /* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */
208 int protocol = ip_hdr(skb)->protocol; 207 int protocol = ip_hdr(skb)->protocol;
209 int hash; 208 int hash, raw;
210 struct sock *raw_sk;
211 struct net_protocol *ipprot; 209 struct net_protocol *ipprot;
212 210
213 resubmit: 211 resubmit:
214 hash = protocol & (MAX_INET_PROTOS - 1); 212 raw = raw_local_deliver(skb, protocol);
215 raw_sk = sk_head(&raw_v4_htable[hash]);
216
217 /* If there maybe a raw socket we must check - if not we
218 * don't care less
219 */
220 if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash))
221 raw_sk = NULL;
222 213
214 hash = protocol & (MAX_INET_PROTOS - 1);
223 if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) { 215 if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) {
224 int ret; 216 int ret;
225 217
@@ -237,7 +229,7 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
237 } 229 }
238 IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS); 230 IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
239 } else { 231 } else {
240 if (!raw_sk) { 232 if (!raw) {
241 if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 233 if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
242 IP_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS); 234 IP_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS);
243 icmp_send(skb, ICMP_DEST_UNREACH, 235 icmp_send(skb, ICMP_DEST_UNREACH,
@@ -268,7 +260,7 @@ int ip_local_deliver(struct sk_buff *skb)
268 return 0; 260 return 0;
269 } 261 }
270 262
271 return NF_HOOK(PF_INET, NF_IP_LOCAL_IN, skb, skb->dev, NULL, 263 return NF_HOOK(PF_INET, NF_INET_LOCAL_IN, skb, skb->dev, NULL,
272 ip_local_deliver_finish); 264 ip_local_deliver_finish);
273} 265}
274 266
@@ -347,7 +339,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
347 339
348#ifdef CONFIG_NET_CLS_ROUTE 340#ifdef CONFIG_NET_CLS_ROUTE
349 if (unlikely(skb->dst->tclassid)) { 341 if (unlikely(skb->dst->tclassid)) {
350 struct ip_rt_acct *st = ip_rt_acct + 256*smp_processor_id(); 342 struct ip_rt_acct *st = per_cpu_ptr(ip_rt_acct, smp_processor_id());
351 u32 idx = skb->dst->tclassid; 343 u32 idx = skb->dst->tclassid;
352 st[idx&0xFF].o_packets++; 344 st[idx&0xFF].o_packets++;
353 st[idx&0xFF].o_bytes+=skb->len; 345 st[idx&0xFF].o_bytes+=skb->len;
@@ -442,7 +434,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
442 /* Remove any debris in the socket control block */ 434 /* Remove any debris in the socket control block */
443 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 435 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
444 436
445 return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL, 437 return NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, dev, NULL,
446 ip_rcv_finish); 438 ip_rcv_finish);
447 439
448inhdr_error: 440inhdr_error:
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 2f14745a9e1f..4d315158fd3c 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -151,7 +151,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
151 __be32 addr; 151 __be32 addr;
152 152
153 memcpy(&addr, sptr+soffset-1, 4); 153 memcpy(&addr, sptr+soffset-1, 4);
154 if (inet_addr_type(addr) != RTN_LOCAL) { 154 if (inet_addr_type(&init_net, addr) != RTN_LOCAL) {
155 dopt->ts_needtime = 1; 155 dopt->ts_needtime = 1;
156 soffset += 8; 156 soffset += 8;
157 } 157 }
@@ -400,7 +400,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
400 { 400 {
401 __be32 addr; 401 __be32 addr;
402 memcpy(&addr, &optptr[optptr[2]-1], 4); 402 memcpy(&addr, &optptr[optptr[2]-1], 4);
403 if (inet_addr_type(addr) == RTN_UNICAST) 403 if (inet_addr_type(&init_net, addr) == RTN_UNICAST)
404 break; 404 break;
405 if (skb) 405 if (skb)
406 timeptr = (__be32*)&optptr[optptr[2]+3]; 406 timeptr = (__be32*)&optptr[optptr[2]+3];
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index bc9e57550e86..18070ca65771 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -91,6 +91,28 @@ __inline__ void ip_send_check(struct iphdr *iph)
91 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 91 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
92} 92}
93 93
94int __ip_local_out(struct sk_buff *skb)
95{
96 struct iphdr *iph = ip_hdr(skb);
97
98 iph->tot_len = htons(skb->len);
99 ip_send_check(iph);
100 return nf_hook(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
101 dst_output);
102}
103
104int ip_local_out(struct sk_buff *skb)
105{
106 int err;
107
108 err = __ip_local_out(skb);
109 if (likely(err == 1))
110 err = dst_output(skb);
111
112 return err;
113}
114EXPORT_SYMBOL_GPL(ip_local_out);
115
94/* dev_loopback_xmit for use with netfilter. */ 116/* dev_loopback_xmit for use with netfilter. */
95static int ip_dev_loopback_xmit(struct sk_buff *newskb) 117static int ip_dev_loopback_xmit(struct sk_buff *newskb)
96{ 118{
@@ -138,20 +160,17 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
138 iph->daddr = rt->rt_dst; 160 iph->daddr = rt->rt_dst;
139 iph->saddr = rt->rt_src; 161 iph->saddr = rt->rt_src;
140 iph->protocol = sk->sk_protocol; 162 iph->protocol = sk->sk_protocol;
141 iph->tot_len = htons(skb->len);
142 ip_select_ident(iph, &rt->u.dst, sk); 163 ip_select_ident(iph, &rt->u.dst, sk);
143 164
144 if (opt && opt->optlen) { 165 if (opt && opt->optlen) {
145 iph->ihl += opt->optlen>>2; 166 iph->ihl += opt->optlen>>2;
146 ip_options_build(skb, opt, daddr, rt, 0); 167 ip_options_build(skb, opt, daddr, rt, 0);
147 } 168 }
148 ip_send_check(iph);
149 169
150 skb->priority = sk->sk_priority; 170 skb->priority = sk->sk_priority;
151 171
152 /* Send it out. */ 172 /* Send it out. */
153 return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, 173 return ip_local_out(skb);
154 dst_output);
155} 174}
156 175
157EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); 176EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
@@ -251,8 +270,8 @@ int ip_mc_output(struct sk_buff *skb)
251 ) { 270 ) {
252 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 271 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
253 if (newskb) 272 if (newskb)
254 NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL, 273 NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb,
255 newskb->dev, 274 NULL, newskb->dev,
256 ip_dev_loopback_xmit); 275 ip_dev_loopback_xmit);
257 } 276 }
258 277
@@ -267,11 +286,11 @@ int ip_mc_output(struct sk_buff *skb)
267 if (rt->rt_flags&RTCF_BROADCAST) { 286 if (rt->rt_flags&RTCF_BROADCAST) {
268 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 287 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
269 if (newskb) 288 if (newskb)
270 NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL, 289 NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb, NULL,
271 newskb->dev, ip_dev_loopback_xmit); 290 newskb->dev, ip_dev_loopback_xmit);
272 } 291 }
273 292
274 return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev, 293 return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
275 ip_finish_output, 294 ip_finish_output,
276 !(IPCB(skb)->flags & IPSKB_REROUTED)); 295 !(IPCB(skb)->flags & IPSKB_REROUTED));
277} 296}
@@ -285,7 +304,7 @@ int ip_output(struct sk_buff *skb)
285 skb->dev = dev; 304 skb->dev = dev;
286 skb->protocol = htons(ETH_P_IP); 305 skb->protocol = htons(ETH_P_IP);
287 306
288 return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev, 307 return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, dev,
289 ip_finish_output, 308 ip_finish_output,
290 !(IPCB(skb)->flags & IPSKB_REROUTED)); 309 !(IPCB(skb)->flags & IPSKB_REROUTED));
291} 310}
@@ -331,7 +350,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
331 * itself out. 350 * itself out.
332 */ 351 */
333 security_sk_classify_flow(sk, &fl); 352 security_sk_classify_flow(sk, &fl);
334 if (ip_route_output_flow(&rt, &fl, sk, 0)) 353 if (ip_route_output_flow(&init_net, &rt, &fl, sk, 0))
335 goto no_route; 354 goto no_route;
336 } 355 }
337 sk_setup_caps(sk, &rt->u.dst); 356 sk_setup_caps(sk, &rt->u.dst);
@@ -347,7 +366,6 @@ packet_routed:
347 skb_reset_network_header(skb); 366 skb_reset_network_header(skb);
348 iph = ip_hdr(skb); 367 iph = ip_hdr(skb);
349 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); 368 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
350 iph->tot_len = htons(skb->len);
351 if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok) 369 if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
352 iph->frag_off = htons(IP_DF); 370 iph->frag_off = htons(IP_DF);
353 else 371 else
@@ -366,13 +384,9 @@ packet_routed:
366 ip_select_ident_more(iph, &rt->u.dst, sk, 384 ip_select_ident_more(iph, &rt->u.dst, sk,
367 (skb_shinfo(skb)->gso_segs ?: 1) - 1); 385 (skb_shinfo(skb)->gso_segs ?: 1) - 1);
368 386
369 /* Add an IP checksum. */
370 ip_send_check(iph);
371
372 skb->priority = sk->sk_priority; 387 skb->priority = sk->sk_priority;
373 388
374 return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, 389 return ip_local_out(skb);
375 dst_output);
376 390
377no_route: 391no_route:
378 IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES); 392 IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
@@ -1262,14 +1276,12 @@ int ip_push_pending_frames(struct sock *sk)
1262 ip_options_build(skb, opt, inet->cork.addr, rt, 0); 1276 ip_options_build(skb, opt, inet->cork.addr, rt, 0);
1263 } 1277 }
1264 iph->tos = inet->tos; 1278 iph->tos = inet->tos;
1265 iph->tot_len = htons(skb->len);
1266 iph->frag_off = df; 1279 iph->frag_off = df;
1267 ip_select_ident(iph, &rt->u.dst, sk); 1280 ip_select_ident(iph, &rt->u.dst, sk);
1268 iph->ttl = ttl; 1281 iph->ttl = ttl;
1269 iph->protocol = sk->sk_protocol; 1282 iph->protocol = sk->sk_protocol;
1270 iph->saddr = rt->rt_src; 1283 iph->saddr = rt->rt_src;
1271 iph->daddr = rt->rt_dst; 1284 iph->daddr = rt->rt_dst;
1272 ip_send_check(iph);
1273 1285
1274 skb->priority = sk->sk_priority; 1286 skb->priority = sk->sk_priority;
1275 skb->dst = dst_clone(&rt->u.dst); 1287 skb->dst = dst_clone(&rt->u.dst);
@@ -1279,8 +1291,7 @@ int ip_push_pending_frames(struct sock *sk)
1279 skb_transport_header(skb))->type); 1291 skb_transport_header(skb))->type);
1280 1292
1281 /* Netfilter gets whole the not fragmented skb. */ 1293 /* Netfilter gets whole the not fragmented skb. */
1282 err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, 1294 err = ip_local_out(skb);
1283 skb->dst->dev, dst_output);
1284 if (err) { 1295 if (err) {
1285 if (err > 0) 1296 if (err > 0)
1286 err = inet->recverr ? net_xmit_errno(err) : 0; 1297 err = inet->recverr ? net_xmit_errno(err) : 0;
@@ -1330,8 +1341,6 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1330 * 1341 *
1331 * Should run single threaded per socket because it uses the sock 1342 * Should run single threaded per socket because it uses the sock
1332 * structure to pass arguments. 1343 * structure to pass arguments.
1333 *
1334 * LATER: switch from ip_build_xmit to ip_append_*
1335 */ 1344 */
1336void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, 1345void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
1337 unsigned int len) 1346 unsigned int len)
@@ -1370,7 +1379,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1370 .dport = tcp_hdr(skb)->source } }, 1379 .dport = tcp_hdr(skb)->source } },
1371 .proto = sk->sk_protocol }; 1380 .proto = sk->sk_protocol };
1372 security_skb_classify_flow(skb, &fl); 1381 security_skb_classify_flow(skb, &fl);
1373 if (ip_route_output_key(&rt, &fl)) 1382 if (ip_route_output_key(sk->sk_net, &rt, &fl))
1374 return; 1383 return;
1375 } 1384 }
1376 1385
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 82817e554363..754b0a5bbfe9 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -594,7 +594,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
594 err = 0; 594 err = 0;
595 break; 595 break;
596 } 596 }
597 dev = ip_dev_find(mreq.imr_address.s_addr); 597 dev = ip_dev_find(&init_net, mreq.imr_address.s_addr);
598 if (dev) { 598 if (dev) {
599 mreq.imr_ifindex = dev->ifindex; 599 mreq.imr_ifindex = dev->ifindex;
600 dev_put(dev); 600 dev_put(dev);
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 2c44a94c2135..f4af99ad8fdb 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -182,7 +182,6 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
182static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) 182static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
183{ 183{
184 struct xfrm_state *t; 184 struct xfrm_state *t;
185 u8 mode = XFRM_MODE_TUNNEL;
186 185
187 t = xfrm_state_alloc(); 186 t = xfrm_state_alloc();
188 if (t == NULL) 187 if (t == NULL)
@@ -193,9 +192,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
193 t->id.daddr.a4 = x->id.daddr.a4; 192 t->id.daddr.a4 = x->id.daddr.a4;
194 memcpy(&t->sel, &x->sel, sizeof(t->sel)); 193 memcpy(&t->sel, &x->sel, sizeof(t->sel));
195 t->props.family = AF_INET; 194 t->props.family = AF_INET;
196 if (x->props.mode == XFRM_MODE_BEET) 195 t->props.mode = x->props.mode;
197 mode = x->props.mode;
198 t->props.mode = mode;
199 t->props.saddr.a4 = x->props.saddr.a4; 196 t->props.saddr.a4 = x->props.saddr.a4;
200 t->props.flags = x->props.flags; 197 t->props.flags = x->props.flags;
201 198
@@ -389,15 +386,22 @@ static int ipcomp_init_state(struct xfrm_state *x)
389 if (x->encap) 386 if (x->encap)
390 goto out; 387 goto out;
391 388
389 x->props.header_len = 0;
390 switch (x->props.mode) {
391 case XFRM_MODE_TRANSPORT:
392 break;
393 case XFRM_MODE_TUNNEL:
394 x->props.header_len += sizeof(struct iphdr);
395 break;
396 default:
397 goto out;
398 }
399
392 err = -ENOMEM; 400 err = -ENOMEM;
393 ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL); 401 ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL);
394 if (!ipcd) 402 if (!ipcd)
395 goto out; 403 goto out;
396 404
397 x->props.header_len = 0;
398 if (x->props.mode == XFRM_MODE_TUNNEL)
399 x->props.header_len += sizeof(struct iphdr);
400
401 mutex_lock(&ipcomp_resource_mutex); 405 mutex_lock(&ipcomp_resource_mutex);
402 if (!ipcomp_alloc_scratches()) 406 if (!ipcomp_alloc_scratches())
403 goto error; 407 goto error;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index b8f7763b2261..a52b5853aaa8 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -140,6 +140,9 @@ __be32 ic_servaddr = NONE; /* Boot server IP address */
140__be32 root_server_addr = NONE; /* Address of NFS server */ 140__be32 root_server_addr = NONE; /* Address of NFS server */
141u8 root_server_path[256] = { 0, }; /* Path to mount as root */ 141u8 root_server_path[256] = { 0, }; /* Path to mount as root */
142 142
143/* vendor class identifier */
144static char vendor_class_identifier[253] __initdata;
145
143/* Persistent data: */ 146/* Persistent data: */
144 147
145static int ic_proto_used; /* Protocol used, if any */ 148static int ic_proto_used; /* Protocol used, if any */
@@ -299,7 +302,7 @@ static int __init ic_route_ioctl(unsigned int cmd, struct rtentry *arg)
299 302
300 mm_segment_t oldfs = get_fs(); 303 mm_segment_t oldfs = get_fs();
301 set_fs(get_ds()); 304 set_fs(get_ds());
302 res = ip_rt_ioctl(cmd, (void __user *) arg); 305 res = ip_rt_ioctl(&init_net, cmd, (void __user *) arg);
303 set_fs(oldfs); 306 set_fs(oldfs);
304 return res; 307 return res;
305} 308}
@@ -588,6 +591,7 @@ ic_dhcp_init_options(u8 *options)
588 u8 mt = ((ic_servaddr == NONE) 591 u8 mt = ((ic_servaddr == NONE)
589 ? DHCPDISCOVER : DHCPREQUEST); 592 ? DHCPDISCOVER : DHCPREQUEST);
590 u8 *e = options; 593 u8 *e = options;
594 int len;
591 595
592#ifdef IPCONFIG_DEBUG 596#ifdef IPCONFIG_DEBUG
593 printk("DHCP: Sending message type %d\n", mt); 597 printk("DHCP: Sending message type %d\n", mt);
@@ -628,6 +632,16 @@ ic_dhcp_init_options(u8 *options)
628 *e++ = sizeof(ic_req_params); 632 *e++ = sizeof(ic_req_params);
629 memcpy(e, ic_req_params, sizeof(ic_req_params)); 633 memcpy(e, ic_req_params, sizeof(ic_req_params));
630 e += sizeof(ic_req_params); 634 e += sizeof(ic_req_params);
635
636 if (*vendor_class_identifier) {
637 printk(KERN_INFO "DHCP: sending class identifier \"%s\"\n",
638 vendor_class_identifier);
639 *e++ = 60; /* Class-identifier */
640 len = strlen(vendor_class_identifier);
641 *e++ = len;
642 memcpy(e, vendor_class_identifier, len);
643 e += len;
644 }
631 } 645 }
632 646
633 *e++ = 255; /* End of the list */ 647 *e++ = 255; /* End of the list */
@@ -1513,5 +1527,16 @@ static int __init nfsaddrs_config_setup(char *addrs)
1513 return ip_auto_config_setup(addrs); 1527 return ip_auto_config_setup(addrs);
1514} 1528}
1515 1529
1530static int __init vendor_class_identifier_setup(char *addrs)
1531{
1532 if (strlcpy(vendor_class_identifier, addrs,
1533 sizeof(vendor_class_identifier))
1534 >= sizeof(vendor_class_identifier))
1535 printk(KERN_WARNING "DHCP: vendorclass too long, truncated to \"%s\"",
1536 vendor_class_identifier);
1537 return 1;
1538}
1539
1516__setup("ip=", ip_auto_config_setup); 1540__setup("ip=", ip_auto_config_setup);
1517__setup("nfsaddrs=", nfsaddrs_config_setup); 1541__setup("nfsaddrs=", nfsaddrs_config_setup);
1542__setup("dhcpclass=", vendor_class_identifier_setup);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 8c2b2b0741da..da281581692c 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -405,7 +405,7 @@ out:
405 fl.fl4_daddr = eiph->saddr; 405 fl.fl4_daddr = eiph->saddr;
406 fl.fl4_tos = RT_TOS(eiph->tos); 406 fl.fl4_tos = RT_TOS(eiph->tos);
407 fl.proto = IPPROTO_IPIP; 407 fl.proto = IPPROTO_IPIP;
408 if (ip_route_output_key(&rt, &key)) { 408 if (ip_route_output_key(&init_net, &rt, &key)) {
409 kfree_skb(skb2); 409 kfree_skb(skb2);
410 return 0; 410 return 0;
411 } 411 }
@@ -418,7 +418,7 @@ out:
418 fl.fl4_daddr = eiph->daddr; 418 fl.fl4_daddr = eiph->daddr;
419 fl.fl4_src = eiph->saddr; 419 fl.fl4_src = eiph->saddr;
420 fl.fl4_tos = eiph->tos; 420 fl.fl4_tos = eiph->tos;
421 if (ip_route_output_key(&rt, &fl) || 421 if (ip_route_output_key(&init_net, &rt, &fl) ||
422 rt->u.dst.dev->type != ARPHRD_TUNNEL) { 422 rt->u.dst.dev->type != ARPHRD_TUNNEL) {
423 ip_rt_put(rt); 423 ip_rt_put(rt);
424 kfree_skb(skb2); 424 kfree_skb(skb2);
@@ -547,7 +547,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
547 .saddr = tiph->saddr, 547 .saddr = tiph->saddr,
548 .tos = RT_TOS(tos) } }, 548 .tos = RT_TOS(tos) } },
549 .proto = IPPROTO_IPIP }; 549 .proto = IPPROTO_IPIP };
550 if (ip_route_output_key(&rt, &fl)) { 550 if (ip_route_output_key(&init_net, &rt, &fl)) {
551 tunnel->stat.tx_carrier_errors++; 551 tunnel->stat.tx_carrier_errors++;
552 goto tx_error_icmp; 552 goto tx_error_icmp;
553 } 553 }
@@ -651,6 +651,40 @@ tx_error:
651 return 0; 651 return 0;
652} 652}
653 653
654static void ipip_tunnel_bind_dev(struct net_device *dev)
655{
656 struct net_device *tdev = NULL;
657 struct ip_tunnel *tunnel;
658 struct iphdr *iph;
659
660 tunnel = netdev_priv(dev);
661 iph = &tunnel->parms.iph;
662
663 if (iph->daddr) {
664 struct flowi fl = { .oif = tunnel->parms.link,
665 .nl_u = { .ip4_u =
666 { .daddr = iph->daddr,
667 .saddr = iph->saddr,
668 .tos = RT_TOS(iph->tos) } },
669 .proto = IPPROTO_IPIP };
670 struct rtable *rt;
671 if (!ip_route_output_key(&init_net, &rt, &fl)) {
672 tdev = rt->u.dst.dev;
673 ip_rt_put(rt);
674 }
675 dev->flags |= IFF_POINTOPOINT;
676 }
677
678 if (!tdev && tunnel->parms.link)
679 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
680
681 if (tdev) {
682 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
683 dev->mtu = tdev->mtu - sizeof(struct iphdr);
684 }
685 dev->iflink = tunnel->parms.link;
686}
687
654static int 688static int
655ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 689ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
656{ 690{
@@ -723,6 +757,11 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
723 t->parms.iph.ttl = p.iph.ttl; 757 t->parms.iph.ttl = p.iph.ttl;
724 t->parms.iph.tos = p.iph.tos; 758 t->parms.iph.tos = p.iph.tos;
725 t->parms.iph.frag_off = p.iph.frag_off; 759 t->parms.iph.frag_off = p.iph.frag_off;
760 if (t->parms.link != p.link) {
761 t->parms.link = p.link;
762 ipip_tunnel_bind_dev(dev);
763 netdev_state_change(dev);
764 }
726 } 765 }
727 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) 766 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
728 err = -EFAULT; 767 err = -EFAULT;
@@ -791,12 +830,9 @@ static void ipip_tunnel_setup(struct net_device *dev)
791 830
792static int ipip_tunnel_init(struct net_device *dev) 831static int ipip_tunnel_init(struct net_device *dev)
793{ 832{
794 struct net_device *tdev = NULL;
795 struct ip_tunnel *tunnel; 833 struct ip_tunnel *tunnel;
796 struct iphdr *iph;
797 834
798 tunnel = netdev_priv(dev); 835 tunnel = netdev_priv(dev);
799 iph = &tunnel->parms.iph;
800 836
801 tunnel->dev = dev; 837 tunnel->dev = dev;
802 strcpy(tunnel->parms.name, dev->name); 838 strcpy(tunnel->parms.name, dev->name);
@@ -804,29 +840,7 @@ static int ipip_tunnel_init(struct net_device *dev)
804 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 840 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
805 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 841 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
806 842
807 if (iph->daddr) { 843 ipip_tunnel_bind_dev(dev);
808 struct flowi fl = { .oif = tunnel->parms.link,
809 .nl_u = { .ip4_u =
810 { .daddr = iph->daddr,
811 .saddr = iph->saddr,
812 .tos = RT_TOS(iph->tos) } },
813 .proto = IPPROTO_IPIP };
814 struct rtable *rt;
815 if (!ip_route_output_key(&rt, &fl)) {
816 tdev = rt->u.dst.dev;
817 ip_rt_put(rt);
818 }
819 dev->flags |= IFF_POINTOPOINT;
820 }
821
822 if (!tdev && tunnel->parms.link)
823 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
824
825 if (tdev) {
826 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
827 dev->mtu = tdev->mtu - sizeof(struct iphdr);
828 }
829 dev->iflink = tunnel->parms.link;
830 844
831 return 0; 845 return 0;
832} 846}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 37bb497d92af..a94f52c207a7 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -141,7 +141,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v)
141 p.iph.ihl = 5; 141 p.iph.ihl = 5;
142 p.iph.protocol = IPPROTO_IPIP; 142 p.iph.protocol = IPPROTO_IPIP;
143 sprintf(p.name, "dvmrp%d", v->vifc_vifi); 143 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
144 ifr.ifr_ifru.ifru_data = (void*)&p; 144 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
145 145
146 oldfs = get_fs(); set_fs(KERNEL_DS); 146 oldfs = get_fs(); set_fs(KERNEL_DS);
147 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); 147 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
@@ -321,7 +321,7 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
321 e->error = -ETIMEDOUT; 321 e->error = -ETIMEDOUT;
322 memset(&e->msg, 0, sizeof(e->msg)); 322 memset(&e->msg, 0, sizeof(e->msg));
323 323
324 rtnl_unicast(skb, NETLINK_CB(skb).pid); 324 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
325 } else 325 } else
326 kfree_skb(skb); 326 kfree_skb(skb);
327 } 327 }
@@ -423,7 +423,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
423 return -ENOBUFS; 423 return -ENOBUFS;
424 break; 424 break;
425 case 0: 425 case 0:
426 dev = ip_dev_find(vifc->vifc_lcl_addr.s_addr); 426 dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
427 if (!dev) 427 if (!dev)
428 return -EADDRNOTAVAIL; 428 return -EADDRNOTAVAIL;
429 dev_put(dev); 429 dev_put(dev);
@@ -533,7 +533,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
533 memset(&e->msg, 0, sizeof(e->msg)); 533 memset(&e->msg, 0, sizeof(e->msg));
534 } 534 }
535 535
536 rtnl_unicast(skb, NETLINK_CB(skb).pid); 536 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
537 } else 537 } else
538 ip_mr_forward(skb, c, 0); 538 ip_mr_forward(skb, c, 0);
539 } 539 }
@@ -749,7 +749,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
749 return 0; 749 return 0;
750 } 750 }
751 751
752 if (!MULTICAST(mfc->mfcc_mcastgrp.s_addr)) 752 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
753 return -EINVAL; 753 return -EINVAL;
754 754
755 c=ipmr_cache_alloc(); 755 c=ipmr_cache_alloc();
@@ -849,7 +849,7 @@ static void mrtsock_destruct(struct sock *sk)
849{ 849{
850 rtnl_lock(); 850 rtnl_lock();
851 if (sk == mroute_socket) { 851 if (sk == mroute_socket) {
852 IPV4_DEVCONF_ALL(MC_FORWARDING)--; 852 IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)--;
853 853
854 write_lock_bh(&mrt_lock); 854 write_lock_bh(&mrt_lock);
855 mroute_socket=NULL; 855 mroute_socket=NULL;
@@ -898,7 +898,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt
898 mroute_socket=sk; 898 mroute_socket=sk;
899 write_unlock_bh(&mrt_lock); 899 write_unlock_bh(&mrt_lock);
900 900
901 IPV4_DEVCONF_ALL(MC_FORWARDING)++; 901 IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)++;
902 } 902 }
903 rtnl_unlock(); 903 rtnl_unlock();
904 return ret; 904 return ret;
@@ -954,10 +954,12 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt
954#ifdef CONFIG_IP_PIMSM 954#ifdef CONFIG_IP_PIMSM
955 case MRT_PIM: 955 case MRT_PIM:
956 { 956 {
957 int v, ret; 957 int v;
958
958 if (get_user(v,(int __user *)optval)) 959 if (get_user(v,(int __user *)optval))
959 return -EFAULT; 960 return -EFAULT;
960 v = (v)?1:0; 961 v = (v) ? 1 : 0;
962
961 rtnl_lock(); 963 rtnl_lock();
962 ret = 0; 964 ret = 0;
963 if (v != mroute_do_pim) { 965 if (v != mroute_do_pim) {
@@ -1183,7 +1185,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1183 .saddr = vif->local, 1185 .saddr = vif->local,
1184 .tos = RT_TOS(iph->tos) } }, 1186 .tos = RT_TOS(iph->tos) } },
1185 .proto = IPPROTO_IPIP }; 1187 .proto = IPPROTO_IPIP };
1186 if (ip_route_output_key(&rt, &fl)) 1188 if (ip_route_output_key(&init_net, &rt, &fl))
1187 goto out_free; 1189 goto out_free;
1188 encap = sizeof(struct iphdr); 1190 encap = sizeof(struct iphdr);
1189 } else { 1191 } else {
@@ -1192,7 +1194,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1192 { .daddr = iph->daddr, 1194 { .daddr = iph->daddr,
1193 .tos = RT_TOS(iph->tos) } }, 1195 .tos = RT_TOS(iph->tos) } },
1194 .proto = IPPROTO_IPIP }; 1196 .proto = IPPROTO_IPIP };
1195 if (ip_route_output_key(&rt, &fl)) 1197 if (ip_route_output_key(&init_net, &rt, &fl))
1196 goto out_free; 1198 goto out_free;
1197 } 1199 }
1198 1200
@@ -1245,7 +1247,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1245 * not mrouter) cannot join to more than one interface - it will 1247 * not mrouter) cannot join to more than one interface - it will
1246 * result in receiving multiple packets. 1248 * result in receiving multiple packets.
1247 */ 1249 */
1248 NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, dev, 1250 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1249 ipmr_forward_finish); 1251 ipmr_forward_finish);
1250 return; 1252 return;
1251 1253
@@ -1461,7 +1463,7 @@ int pim_rcv_v1(struct sk_buff * skb)
1461 b. packet is not a NULL-REGISTER 1463 b. packet is not a NULL-REGISTER
1462 c. packet is not truncated 1464 c. packet is not truncated
1463 */ 1465 */
1464 if (!MULTICAST(encap->daddr) || 1466 if (!ipv4_is_multicast(encap->daddr) ||
1465 encap->tot_len == 0 || 1467 encap->tot_len == 0 ||
1466 ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 1468 ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1467 goto drop; 1469 goto drop;
@@ -1517,7 +1519,7 @@ static int pim_rcv(struct sk_buff * skb)
1517 /* check if the inner packet is destined to mcast group */ 1519 /* check if the inner packet is destined to mcast group */
1518 encap = (struct iphdr *)(skb_transport_header(skb) + 1520 encap = (struct iphdr *)(skb_transport_header(skb) +
1519 sizeof(struct pimreghdr)); 1521 sizeof(struct pimreghdr));
1520 if (!MULTICAST(encap->daddr) || 1522 if (!ipv4_is_multicast(encap->daddr) ||
1521 encap->tot_len == 0 || 1523 encap->tot_len == 0 ||
1522 ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 1524 ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1523 goto drop; 1525 goto drop;
@@ -1659,6 +1661,7 @@ static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1659} 1661}
1660 1662
1661static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 1663static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1664 __acquires(mrt_lock)
1662{ 1665{
1663 read_lock(&mrt_lock); 1666 read_lock(&mrt_lock);
1664 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1) 1667 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
@@ -1682,6 +1685,7 @@ static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1682} 1685}
1683 1686
1684static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) 1687static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1688 __releases(mrt_lock)
1685{ 1689{
1686 read_unlock(&mrt_lock); 1690 read_unlock(&mrt_lock);
1687} 1691}
@@ -1889,8 +1893,7 @@ void __init ip_mr_init(void)
1889 sizeof(struct mfc_cache), 1893 sizeof(struct mfc_cache),
1890 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 1894 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1891 NULL); 1895 NULL);
1892 init_timer(&ipmr_expire_timer); 1896 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
1893 ipmr_expire_timer.function=ipmr_expire_process;
1894 register_netdevice_notifier(&ip_mr_notifier); 1897 register_netdevice_notifier(&ip_mr_notifier);
1895#ifdef CONFIG_PROC_FS 1898#ifdef CONFIG_PROC_FS
1896 proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops); 1899 proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops);
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 664cb8e97c1c..535abe0c45e7 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -51,18 +51,13 @@ static DEFINE_MUTEX(__ip_vs_app_mutex);
51 */ 51 */
52static inline int ip_vs_app_get(struct ip_vs_app *app) 52static inline int ip_vs_app_get(struct ip_vs_app *app)
53{ 53{
54 /* test and get the module atomically */ 54 return try_module_get(app->module);
55 if (app->module)
56 return try_module_get(app->module);
57 else
58 return 1;
59} 55}
60 56
61 57
62static inline void ip_vs_app_put(struct ip_vs_app *app) 58static inline void ip_vs_app_put(struct ip_vs_app *app)
63{ 59{
64 if (app->module) 60 module_put(app->module);
65 module_put(app->module);
66} 61}
67 62
68 63
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 0a9f3c37e18d..65f1ba112752 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -393,7 +393,15 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
393 atomic_inc(&dest->refcnt); 393 atomic_inc(&dest->refcnt);
394 394
395 /* Bind with the destination and its corresponding transmitter */ 395 /* Bind with the destination and its corresponding transmitter */
396 cp->flags |= atomic_read(&dest->conn_flags); 396 if ((cp->flags & IP_VS_CONN_F_SYNC) &&
397 (!(cp->flags & IP_VS_CONN_F_TEMPLATE)))
398 /* if the connection is not template and is created
399 * by sync, preserve the activity flag.
400 */
401 cp->flags |= atomic_read(&dest->conn_flags) &
402 (~IP_VS_CONN_F_INACTIVE);
403 else
404 cp->flags |= atomic_read(&dest->conn_flags);
397 cp->dest = dest; 405 cp->dest = dest;
398 406
399 IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " 407 IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
@@ -412,7 +420,11 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
412 /* It is a normal connection, so increase the inactive 420 /* It is a normal connection, so increase the inactive
413 connection counter because it is in TCP SYNRECV 421 connection counter because it is in TCP SYNRECV
414 state (inactive) or other protocol inacive state */ 422 state (inactive) or other protocol inacive state */
415 atomic_inc(&dest->inactconns); 423 if ((cp->flags & IP_VS_CONN_F_SYNC) &&
424 (!(cp->flags & IP_VS_CONN_F_INACTIVE)))
425 atomic_inc(&dest->activeconns);
426 else
427 atomic_inc(&dest->inactconns);
416 } else { 428 } else {
417 /* It is a persistent connection/template, so increase 429 /* It is a persistent connection/template, so increase
418 the peristent connection counter */ 430 the peristent connection counter */
@@ -629,9 +641,7 @@ ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport
629 } 641 }
630 642
631 INIT_LIST_HEAD(&cp->c_list); 643 INIT_LIST_HEAD(&cp->c_list);
632 init_timer(&cp->timer); 644 setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
633 cp->timer.data = (unsigned long)cp;
634 cp->timer.function = ip_vs_conn_expire;
635 cp->protocol = proto; 645 cp->protocol = proto;
636 cp->caddr = caddr; 646 cp->caddr = caddr;
637 cp->cport = cport; 647 cp->cport = cport;
@@ -783,6 +793,57 @@ static const struct file_operations ip_vs_conn_fops = {
783 .llseek = seq_lseek, 793 .llseek = seq_lseek,
784 .release = seq_release, 794 .release = seq_release,
785}; 795};
796
797static const char *ip_vs_origin_name(unsigned flags)
798{
799 if (flags & IP_VS_CONN_F_SYNC)
800 return "SYNC";
801 else
802 return "LOCAL";
803}
804
805static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
806{
807
808 if (v == SEQ_START_TOKEN)
809 seq_puts(seq,
810 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n");
811 else {
812 const struct ip_vs_conn *cp = v;
813
814 seq_printf(seq,
815 "%-3s %08X %04X %08X %04X %08X %04X %-11s %-6s %7lu\n",
816 ip_vs_proto_name(cp->protocol),
817 ntohl(cp->caddr), ntohs(cp->cport),
818 ntohl(cp->vaddr), ntohs(cp->vport),
819 ntohl(cp->daddr), ntohs(cp->dport),
820 ip_vs_state_name(cp->protocol, cp->state),
821 ip_vs_origin_name(cp->flags),
822 (cp->timer.expires-jiffies)/HZ);
823 }
824 return 0;
825}
826
827static const struct seq_operations ip_vs_conn_sync_seq_ops = {
828 .start = ip_vs_conn_seq_start,
829 .next = ip_vs_conn_seq_next,
830 .stop = ip_vs_conn_seq_stop,
831 .show = ip_vs_conn_sync_seq_show,
832};
833
834static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
835{
836 return seq_open(file, &ip_vs_conn_sync_seq_ops);
837}
838
839static const struct file_operations ip_vs_conn_sync_fops = {
840 .owner = THIS_MODULE,
841 .open = ip_vs_conn_sync_open,
842 .read = seq_read,
843 .llseek = seq_lseek,
844 .release = seq_release,
845};
846
786#endif 847#endif
787 848
788 849
@@ -942,6 +1003,7 @@ int ip_vs_conn_init(void)
942 } 1003 }
943 1004
944 proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops); 1005 proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
1006 proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
945 1007
946 /* calculate the random value for connection hash */ 1008 /* calculate the random value for connection hash */
947 get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); 1009 get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
@@ -958,5 +1020,6 @@ void ip_vs_conn_cleanup(void)
958 /* Release the empty cache */ 1020 /* Release the empty cache */
959 kmem_cache_destroy(ip_vs_conn_cachep); 1021 kmem_cache_destroy(ip_vs_conn_cachep);
960 proc_net_remove(&init_net, "ip_vs_conn"); 1022 proc_net_remove(&init_net, "ip_vs_conn");
1023 proc_net_remove(&init_net, "ip_vs_conn_sync");
961 vfree(ip_vs_conn_tab); 1024 vfree(ip_vs_conn_tab);
962} 1025}
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 8fba20256f52..963981a9d501 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -423,7 +423,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
423 and the destination is RTN_UNICAST (and not local), then create 423 and the destination is RTN_UNICAST (and not local), then create
424 a cache_bypass connection entry */ 424 a cache_bypass connection entry */
425 if (sysctl_ip_vs_cache_bypass && svc->fwmark 425 if (sysctl_ip_vs_cache_bypass && svc->fwmark
426 && (inet_addr_type(iph->daddr) == RTN_UNICAST)) { 426 && (inet_addr_type(&init_net, iph->daddr) == RTN_UNICAST)) {
427 int ret, cs; 427 int ret, cs;
428 struct ip_vs_conn *cp; 428 struct ip_vs_conn *cp;
429 429
@@ -481,7 +481,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
481 481
482 482
483/* 483/*
484 * It is hooked before NF_IP_PRI_NAT_SRC at the NF_IP_POST_ROUTING 484 * It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING
485 * chain, and is used for VS/NAT. 485 * chain, and is used for VS/NAT.
486 * It detects packets for VS/NAT connections and sends the packets 486 * It detects packets for VS/NAT connections and sends the packets
487 * immediately. This can avoid that iptable_nat mangles the packets 487 * immediately. This can avoid that iptable_nat mangles the packets
@@ -679,7 +679,7 @@ static inline int is_tcp_reset(const struct sk_buff *skb)
679} 679}
680 680
681/* 681/*
682 * It is hooked at the NF_IP_FORWARD chain, used only for VS/NAT. 682 * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
683 * Check if outgoing packet belongs to the established ip_vs_conn, 683 * Check if outgoing packet belongs to the established ip_vs_conn,
684 * rewrite addresses of the packet and send it on its way... 684 * rewrite addresses of the packet and send it on its way...
685 */ 685 */
@@ -814,7 +814,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
814 814
815 /* reassemble IP fragments */ 815 /* reassemble IP fragments */
816 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { 816 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
817 if (ip_vs_gather_frags(skb, hooknum == NF_IP_LOCAL_IN ? 817 if (ip_vs_gather_frags(skb, hooknum == NF_INET_LOCAL_IN ?
818 IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD)) 818 IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD))
819 return NF_STOLEN; 819 return NF_STOLEN;
820 } 820 }
@@ -1003,12 +1003,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
1003 1003
1004 1004
1005/* 1005/*
1006 * It is hooked at the NF_IP_FORWARD chain, in order to catch ICMP 1006 * It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP
1007 * related packets destined for 0.0.0.0/0. 1007 * related packets destined for 0.0.0.0/0.
1008 * When fwmark-based virtual service is used, such as transparent 1008 * When fwmark-based virtual service is used, such as transparent
1009 * cache cluster, TCP packets can be marked and routed to ip_vs_in, 1009 * cache cluster, TCP packets can be marked and routed to ip_vs_in,
1010 * but ICMP destined for 0.0.0.0/0 cannot not be easily marked and 1010 * but ICMP destined for 0.0.0.0/0 cannot not be easily marked and
1011 * sent to ip_vs_in_icmp. So, catch them at the NF_IP_FORWARD chain 1011 * sent to ip_vs_in_icmp. So, catch them at the NF_INET_FORWARD chain
1012 * and send them to ip_vs_in_icmp. 1012 * and send them to ip_vs_in_icmp.
1013 */ 1013 */
1014static unsigned int 1014static unsigned int
@@ -1025,43 +1025,42 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
1025} 1025}
1026 1026
1027 1027
1028/* After packet filtering, forward packet through VS/DR, VS/TUN, 1028static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1029 or VS/NAT(change destination), so that filtering rules can be 1029 /* After packet filtering, forward packet through VS/DR, VS/TUN,
1030 applied to IPVS. */ 1030 * or VS/NAT(change destination), so that filtering rules can be
1031static struct nf_hook_ops ip_vs_in_ops = { 1031 * applied to IPVS. */
1032 .hook = ip_vs_in, 1032 {
1033 .owner = THIS_MODULE, 1033 .hook = ip_vs_in,
1034 .pf = PF_INET, 1034 .owner = THIS_MODULE,
1035 .hooknum = NF_IP_LOCAL_IN, 1035 .pf = PF_INET,
1036 .priority = 100, 1036 .hooknum = NF_INET_LOCAL_IN,
1037}; 1037 .priority = 100,
1038 1038 },
1039/* After packet filtering, change source only for VS/NAT */ 1039 /* After packet filtering, change source only for VS/NAT */
1040static struct nf_hook_ops ip_vs_out_ops = { 1040 {
1041 .hook = ip_vs_out, 1041 .hook = ip_vs_out,
1042 .owner = THIS_MODULE, 1042 .owner = THIS_MODULE,
1043 .pf = PF_INET, 1043 .pf = PF_INET,
1044 .hooknum = NF_IP_FORWARD, 1044 .hooknum = NF_INET_FORWARD,
1045 .priority = 100, 1045 .priority = 100,
1046}; 1046 },
1047 1047 /* After packet filtering (but before ip_vs_out_icmp), catch icmp
1048/* After packet filtering (but before ip_vs_out_icmp), catch icmp 1048 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
1049 destined for 0.0.0.0/0, which is for incoming IPVS connections */ 1049 {
1050static struct nf_hook_ops ip_vs_forward_icmp_ops = { 1050 .hook = ip_vs_forward_icmp,
1051 .hook = ip_vs_forward_icmp, 1051 .owner = THIS_MODULE,
1052 .owner = THIS_MODULE, 1052 .pf = PF_INET,
1053 .pf = PF_INET, 1053 .hooknum = NF_INET_FORWARD,
1054 .hooknum = NF_IP_FORWARD, 1054 .priority = 99,
1055 .priority = 99, 1055 },
1056}; 1056 /* Before the netfilter connection tracking, exit from POST_ROUTING */
1057 1057 {
1058/* Before the netfilter connection tracking, exit from POST_ROUTING */ 1058 .hook = ip_vs_post_routing,
1059static struct nf_hook_ops ip_vs_post_routing_ops = { 1059 .owner = THIS_MODULE,
1060 .hook = ip_vs_post_routing, 1060 .pf = PF_INET,
1061 .owner = THIS_MODULE, 1061 .hooknum = NF_INET_POST_ROUTING,
1062 .pf = PF_INET, 1062 .priority = NF_IP_PRI_NAT_SRC-1,
1063 .hooknum = NF_IP_POST_ROUTING, 1063 },
1064 .priority = NF_IP_PRI_NAT_SRC-1,
1065}; 1064};
1066 1065
1067 1066
@@ -1092,37 +1091,15 @@ static int __init ip_vs_init(void)
1092 goto cleanup_app; 1091 goto cleanup_app;
1093 } 1092 }
1094 1093
1095 ret = nf_register_hook(&ip_vs_in_ops); 1094 ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
1096 if (ret < 0) { 1095 if (ret < 0) {
1097 IP_VS_ERR("can't register in hook.\n"); 1096 IP_VS_ERR("can't register hooks.\n");
1098 goto cleanup_conn; 1097 goto cleanup_conn;
1099 } 1098 }
1100 1099
1101 ret = nf_register_hook(&ip_vs_out_ops);
1102 if (ret < 0) {
1103 IP_VS_ERR("can't register out hook.\n");
1104 goto cleanup_inops;
1105 }
1106 ret = nf_register_hook(&ip_vs_post_routing_ops);
1107 if (ret < 0) {
1108 IP_VS_ERR("can't register post_routing hook.\n");
1109 goto cleanup_outops;
1110 }
1111 ret = nf_register_hook(&ip_vs_forward_icmp_ops);
1112 if (ret < 0) {
1113 IP_VS_ERR("can't register forward_icmp hook.\n");
1114 goto cleanup_postroutingops;
1115 }
1116
1117 IP_VS_INFO("ipvs loaded.\n"); 1100 IP_VS_INFO("ipvs loaded.\n");
1118 return ret; 1101 return ret;
1119 1102
1120 cleanup_postroutingops:
1121 nf_unregister_hook(&ip_vs_post_routing_ops);
1122 cleanup_outops:
1123 nf_unregister_hook(&ip_vs_out_ops);
1124 cleanup_inops:
1125 nf_unregister_hook(&ip_vs_in_ops);
1126 cleanup_conn: 1103 cleanup_conn:
1127 ip_vs_conn_cleanup(); 1104 ip_vs_conn_cleanup();
1128 cleanup_app: 1105 cleanup_app:
@@ -1136,10 +1113,7 @@ static int __init ip_vs_init(void)
1136 1113
1137static void __exit ip_vs_cleanup(void) 1114static void __exit ip_vs_cleanup(void)
1138{ 1115{
1139 nf_unregister_hook(&ip_vs_forward_icmp_ops); 1116 nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
1140 nf_unregister_hook(&ip_vs_post_routing_ops);
1141 nf_unregister_hook(&ip_vs_out_ops);
1142 nf_unregister_hook(&ip_vs_in_ops);
1143 ip_vs_conn_cleanup(); 1117 ip_vs_conn_cleanup();
1144 ip_vs_app_cleanup(); 1118 ip_vs_app_cleanup();
1145 ip_vs_protocol_cleanup(); 1119 ip_vs_protocol_cleanup();
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 693d92490c11..94c5767c8e01 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -704,7 +704,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc,
704 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE; 704 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
705 705
706 /* check if local node and update the flags */ 706 /* check if local node and update the flags */
707 if (inet_addr_type(udest->addr) == RTN_LOCAL) { 707 if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) {
708 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) 708 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
709 | IP_VS_CONN_F_LOCALNODE; 709 | IP_VS_CONN_F_LOCALNODE;
710 } 710 }
@@ -756,7 +756,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
756 756
757 EnterFunction(2); 757 EnterFunction(2);
758 758
759 atype = inet_addr_type(udest->addr); 759 atype = inet_addr_type(&init_net, udest->addr);
760 if (atype != RTN_LOCAL && atype != RTN_UNICAST) 760 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
761 return -EINVAL; 761 return -EINVAL;
762 762
@@ -1591,34 +1591,13 @@ static struct ctl_table vs_vars[] = {
1591 { .ctl_name = 0 } 1591 { .ctl_name = 0 }
1592}; 1592};
1593 1593
1594static ctl_table vs_table[] = { 1594struct ctl_path net_vs_ctl_path[] = {
1595 { 1595 { .procname = "net", .ctl_name = CTL_NET, },
1596 .procname = "vs", 1596 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1597 .mode = 0555, 1597 { .procname = "vs", },
1598 .child = vs_vars 1598 { }
1599 },
1600 { .ctl_name = 0 }
1601};
1602
1603static ctl_table ipvs_ipv4_table[] = {
1604 {
1605 .ctl_name = NET_IPV4,
1606 .procname = "ipv4",
1607 .mode = 0555,
1608 .child = vs_table,
1609 },
1610 { .ctl_name = 0 }
1611};
1612
1613static ctl_table vs_root_table[] = {
1614 {
1615 .ctl_name = CTL_NET,
1616 .procname = "net",
1617 .mode = 0555,
1618 .child = ipvs_ipv4_table,
1619 },
1620 { .ctl_name = 0 }
1621}; 1599};
1600EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1622 1601
1623static struct ctl_table_header * sysctl_header; 1602static struct ctl_table_header * sysctl_header;
1624 1603
@@ -2345,7 +2324,7 @@ int ip_vs_control_init(void)
2345 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); 2324 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
2346 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); 2325 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
2347 2326
2348 sysctl_header = register_sysctl_table(vs_root_table); 2327 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
2349 2328
2350 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */ 2329 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
2351 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2330 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index 7d68b80c4c19..dfa0d713c801 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -18,6 +18,7 @@
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/types.h> 19#include <linux/types.h>
20#include <linux/interrupt.h> 20#include <linux/interrupt.h>
21#include <linux/sysctl.h>
21 22
22#include <net/ip_vs.h> 23#include <net/ip_vs.h>
23 24
@@ -146,9 +147,8 @@ int ip_vs_new_estimator(struct ip_vs_stats *stats)
146 write_lock_bh(&est_lock); 147 write_lock_bh(&est_lock);
147 est->next = est_list; 148 est->next = est_list;
148 if (est->next == NULL) { 149 if (est->next == NULL) {
149 init_timer(&est_timer); 150 setup_timer(&est_timer, estimation_timer, 0);
150 est_timer.expires = jiffies + 2*HZ; 151 est_timer.expires = jiffies + 2*HZ;
151 est_timer.function = estimation_timer;
152 add_timer(&est_timer); 152 add_timer(&est_timer);
153 } 153 }
154 est_list = est; 154 est_list = est;
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index ad89644ef5d2..3888642706ad 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -123,35 +123,6 @@ static ctl_table vs_vars_table[] = {
123 { .ctl_name = 0 } 123 { .ctl_name = 0 }
124}; 124};
125 125
126static ctl_table vs_table[] = {
127 {
128 .procname = "vs",
129 .mode = 0555,
130 .child = vs_vars_table
131 },
132 { .ctl_name = 0 }
133};
134
135static ctl_table ipvs_ipv4_table[] = {
136 {
137 .ctl_name = NET_IPV4,
138 .procname = "ipv4",
139 .mode = 0555,
140 .child = vs_table
141 },
142 { .ctl_name = 0 }
143};
144
145static ctl_table lblc_root_table[] = {
146 {
147 .ctl_name = CTL_NET,
148 .procname = "net",
149 .mode = 0555,
150 .child = ipvs_ipv4_table
151 },
152 { .ctl_name = 0 }
153};
154
155static struct ctl_table_header * sysctl_header; 126static struct ctl_table_header * sysctl_header;
156 127
157/* 128/*
@@ -391,9 +362,8 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
391 /* 362 /*
392 * Hook periodic timer for garbage collection 363 * Hook periodic timer for garbage collection
393 */ 364 */
394 init_timer(&tbl->periodic_timer); 365 setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire,
395 tbl->periodic_timer.data = (unsigned long)tbl; 366 (unsigned long)tbl);
396 tbl->periodic_timer.function = ip_vs_lblc_check_expire;
397 tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL; 367 tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
398 add_timer(&tbl->periodic_timer); 368 add_timer(&tbl->periodic_timer);
399 369
@@ -583,7 +553,7 @@ static int __init ip_vs_lblc_init(void)
583 int ret; 553 int ret;
584 554
585 INIT_LIST_HEAD(&ip_vs_lblc_scheduler.n_list); 555 INIT_LIST_HEAD(&ip_vs_lblc_scheduler.n_list);
586 sysctl_header = register_sysctl_table(lblc_root_table); 556 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
587 ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler); 557 ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
588 if (ret) 558 if (ret)
589 unregister_sysctl_table(sysctl_header); 559 unregister_sysctl_table(sysctl_header);
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index 2a5ed85a3352..daa260eb21cf 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -311,35 +311,6 @@ static ctl_table vs_vars_table[] = {
311 { .ctl_name = 0 } 311 { .ctl_name = 0 }
312}; 312};
313 313
314static ctl_table vs_table[] = {
315 {
316 .procname = "vs",
317 .mode = 0555,
318 .child = vs_vars_table
319 },
320 { .ctl_name = 0 }
321};
322
323static ctl_table ipvs_ipv4_table[] = {
324 {
325 .ctl_name = NET_IPV4,
326 .procname = "ipv4",
327 .mode = 0555,
328 .child = vs_table
329 },
330 { .ctl_name = 0 }
331};
332
333static ctl_table lblcr_root_table[] = {
334 {
335 .ctl_name = CTL_NET,
336 .procname = "net",
337 .mode = 0555,
338 .child = ipvs_ipv4_table
339 },
340 { .ctl_name = 0 }
341};
342
343static struct ctl_table_header * sysctl_header; 314static struct ctl_table_header * sysctl_header;
344 315
345/* 316/*
@@ -575,9 +546,8 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
575 /* 546 /*
576 * Hook periodic timer for garbage collection 547 * Hook periodic timer for garbage collection
577 */ 548 */
578 init_timer(&tbl->periodic_timer); 549 setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire,
579 tbl->periodic_timer.data = (unsigned long)tbl; 550 (unsigned long)tbl);
580 tbl->periodic_timer.function = ip_vs_lblcr_check_expire;
581 tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL; 551 tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
582 add_timer(&tbl->periodic_timer); 552 add_timer(&tbl->periodic_timer);
583 553
@@ -772,7 +742,7 @@ static int __init ip_vs_lblcr_init(void)
772 int ret; 742 int ret;
773 743
774 INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list); 744 INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list);
775 sysctl_header = register_sysctl_table(lblcr_root_table); 745 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
776 ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); 746 ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
777 if (ret) 747 if (ret)
778 unregister_sysctl_table(sysctl_header); 748 unregister_sysctl_table(sysctl_header);
diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c
index c0e11ec8f0f9..dde28a250d92 100644
--- a/net/ipv4/ipvs/ip_vs_proto.c
+++ b/net/ipv4/ipvs/ip_vs_proto.c
@@ -165,7 +165,7 @@ ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
165 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); 165 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
166 if (ih == NULL) 166 if (ih == NULL)
167 sprintf(buf, "%s TRUNCATED", pp->name); 167 sprintf(buf, "%s TRUNCATED", pp->name);
168 else if (ih->frag_off & __constant_htons(IP_OFFSET)) 168 else if (ih->frag_off & htons(IP_OFFSET))
169 sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag", 169 sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag",
170 pp->name, NIPQUAD(ih->saddr), 170 pp->name, NIPQUAD(ih->saddr),
171 NIPQUAD(ih->daddr)); 171 NIPQUAD(ih->daddr));
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c
index c36ccf057a19..aef0d3ee8e44 100644
--- a/net/ipv4/ipvs/ip_vs_proto_esp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_esp.c
@@ -52,15 +52,15 @@ esp_conn_in_get(const struct sk_buff *skb,
52 if (likely(!inverse)) { 52 if (likely(!inverse)) {
53 cp = ip_vs_conn_in_get(IPPROTO_UDP, 53 cp = ip_vs_conn_in_get(IPPROTO_UDP,
54 iph->saddr, 54 iph->saddr,
55 __constant_htons(PORT_ISAKMP), 55 htons(PORT_ISAKMP),
56 iph->daddr, 56 iph->daddr,
57 __constant_htons(PORT_ISAKMP)); 57 htons(PORT_ISAKMP));
58 } else { 58 } else {
59 cp = ip_vs_conn_in_get(IPPROTO_UDP, 59 cp = ip_vs_conn_in_get(IPPROTO_UDP,
60 iph->daddr, 60 iph->daddr,
61 __constant_htons(PORT_ISAKMP), 61 htons(PORT_ISAKMP),
62 iph->saddr, 62 iph->saddr,
63 __constant_htons(PORT_ISAKMP)); 63 htons(PORT_ISAKMP));
64 } 64 }
65 65
66 if (!cp) { 66 if (!cp) {
@@ -89,15 +89,15 @@ esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
89 if (likely(!inverse)) { 89 if (likely(!inverse)) {
90 cp = ip_vs_conn_out_get(IPPROTO_UDP, 90 cp = ip_vs_conn_out_get(IPPROTO_UDP,
91 iph->saddr, 91 iph->saddr,
92 __constant_htons(PORT_ISAKMP), 92 htons(PORT_ISAKMP),
93 iph->daddr, 93 iph->daddr,
94 __constant_htons(PORT_ISAKMP)); 94 htons(PORT_ISAKMP));
95 } else { 95 } else {
96 cp = ip_vs_conn_out_get(IPPROTO_UDP, 96 cp = ip_vs_conn_out_get(IPPROTO_UDP,
97 iph->daddr, 97 iph->daddr,
98 __constant_htons(PORT_ISAKMP), 98 htons(PORT_ISAKMP),
99 iph->saddr, 99 iph->saddr,
100 __constant_htons(PORT_ISAKMP)); 100 htons(PORT_ISAKMP));
101 } 101 }
102 102
103 if (!cp) { 103 if (!cp) {
diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/ipv4/ipvs/ip_vs_sched.c
index 432235861908..121a32b1b756 100644
--- a/net/ipv4/ipvs/ip_vs_sched.c
+++ b/net/ipv4/ipvs/ip_vs_sched.c
@@ -24,6 +24,7 @@
24#include <linux/interrupt.h> 24#include <linux/interrupt.h>
25#include <asm/string.h> 25#include <asm/string.h>
26#include <linux/kmod.h> 26#include <linux/kmod.h>
27#include <linux/sysctl.h>
27 28
28#include <net/ip_vs.h> 29#include <net/ip_vs.h>
29 30
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index bd930efc18da..948378d0a755 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -305,10 +305,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
305 305
306 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); 306 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
307 for (i=0; i<m->nr_conns; i++) { 307 for (i=0; i<m->nr_conns; i++) {
308 unsigned flags; 308 unsigned flags, state;
309 309
310 s = (struct ip_vs_sync_conn *)p; 310 s = (struct ip_vs_sync_conn *)p;
311 flags = ntohs(s->flags); 311 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
312 state = ntohs(s->state);
312 if (!(flags & IP_VS_CONN_F_TEMPLATE)) 313 if (!(flags & IP_VS_CONN_F_TEMPLATE))
313 cp = ip_vs_conn_in_get(s->protocol, 314 cp = ip_vs_conn_in_get(s->protocol,
314 s->caddr, s->cport, 315 s->caddr, s->cport,
@@ -326,6 +327,13 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
326 dest = ip_vs_find_dest(s->daddr, s->dport, 327 dest = ip_vs_find_dest(s->daddr, s->dport,
327 s->vaddr, s->vport, 328 s->vaddr, s->vport,
328 s->protocol); 329 s->protocol);
330 /* Set the approprite ativity flag */
331 if (s->protocol == IPPROTO_TCP) {
332 if (state != IP_VS_TCP_S_ESTABLISHED)
333 flags |= IP_VS_CONN_F_INACTIVE;
334 else
335 flags &= ~IP_VS_CONN_F_INACTIVE;
336 }
329 cp = ip_vs_conn_new(s->protocol, 337 cp = ip_vs_conn_new(s->protocol,
330 s->caddr, s->cport, 338 s->caddr, s->cport,
331 s->vaddr, s->vport, 339 s->vaddr, s->vport,
@@ -337,7 +345,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
337 IP_VS_ERR("ip_vs_conn_new failed\n"); 345 IP_VS_ERR("ip_vs_conn_new failed\n");
338 return; 346 return;
339 } 347 }
340 cp->state = ntohs(s->state); 348 cp->state = state;
341 } else if (!cp->dest) { 349 } else if (!cp->dest) {
342 dest = ip_vs_try_bind_dest(cp); 350 dest = ip_vs_try_bind_dest(cp);
343 if (!dest) { 351 if (!dest) {
@@ -346,8 +354,22 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
346 cp->flags = flags | IP_VS_CONN_F_HASHED; 354 cp->flags = flags | IP_VS_CONN_F_HASHED;
347 } else 355 } else
348 atomic_dec(&dest->refcnt); 356 atomic_dec(&dest->refcnt);
349 } /* Note that we don't touch its state and flags 357 } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
350 if it is a normal entry. */ 358 (cp->state != state)) {
359 /* update active/inactive flag for the connection */
360 dest = cp->dest;
361 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
362 (state != IP_VS_TCP_S_ESTABLISHED)) {
363 atomic_dec(&dest->activeconns);
364 atomic_inc(&dest->inactconns);
365 cp->flags |= IP_VS_CONN_F_INACTIVE;
366 } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
367 (state == IP_VS_TCP_S_ESTABLISHED)) {
368 atomic_inc(&dest->activeconns);
369 atomic_dec(&dest->inactconns);
370 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
371 }
372 }
351 373
352 if (flags & IP_VS_CONN_F_SEQ_MASK) { 374 if (flags & IP_VS_CONN_F_SEQ_MASK) {
353 opt = (struct ip_vs_sync_conn_options *)&s[1]; 375 opt = (struct ip_vs_sync_conn_options *)&s[1];
@@ -357,7 +379,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
357 p += SIMPLE_CONN_SIZE; 379 p += SIMPLE_CONN_SIZE;
358 380
359 atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]); 381 atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
360 cp->state = ntohs(s->state); 382 cp->state = state;
361 pp = ip_vs_proto_get(s->protocol); 383 pp = ip_vs_proto_get(s->protocol);
362 cp->timeout = pp->timeout_table[cp->state]; 384 cp->timeout = pp->timeout_table[cp->state];
363 ip_vs_conn_put(cp); 385 ip_vs_conn_put(cp);
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 7c074e386c17..f63006caea03 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -16,8 +16,8 @@
16 */ 16 */
17 17
18#include <linux/kernel.h> 18#include <linux/kernel.h>
19#include <linux/ip.h>
20#include <linux/tcp.h> /* for tcphdr */ 19#include <linux/tcp.h> /* for tcphdr */
20#include <net/ip.h>
21#include <net/tcp.h> /* for csum_tcpudp_magic */ 21#include <net/tcp.h> /* for csum_tcpudp_magic */
22#include <net/udp.h> 22#include <net/udp.h>
23#include <net/icmp.h> /* for icmp_send */ 23#include <net/icmp.h> /* for icmp_send */
@@ -59,7 +59,7 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
59 return dst; 59 return dst;
60} 60}
61 61
62static inline struct rtable * 62static struct rtable *
63__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) 63__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
64{ 64{
65 struct rtable *rt; /* Route to the other host */ 65 struct rtable *rt; /* Route to the other host */
@@ -78,7 +78,7 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
78 .tos = rtos, } }, 78 .tos = rtos, } },
79 }; 79 };
80 80
81 if (ip_route_output_key(&rt, &fl)) { 81 if (ip_route_output_key(&init_net, &rt, &fl)) {
82 spin_unlock(&dest->dst_lock); 82 spin_unlock(&dest->dst_lock);
83 IP_VS_DBG_RL("ip_route_output error, " 83 IP_VS_DBG_RL("ip_route_output error, "
84 "dest: %u.%u.%u.%u\n", 84 "dest: %u.%u.%u.%u\n",
@@ -101,7 +101,7 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
101 .tos = rtos, } }, 101 .tos = rtos, } },
102 }; 102 };
103 103
104 if (ip_route_output_key(&rt, &fl)) { 104 if (ip_route_output_key(&init_net, &rt, &fl)) {
105 IP_VS_DBG_RL("ip_route_output error, dest: " 105 IP_VS_DBG_RL("ip_route_output error, dest: "
106 "%u.%u.%u.%u\n", NIPQUAD(cp->daddr)); 106 "%u.%u.%u.%u\n", NIPQUAD(cp->daddr));
107 return NULL; 107 return NULL;
@@ -129,7 +129,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
129do { \ 129do { \
130 (skb)->ipvs_property = 1; \ 130 (skb)->ipvs_property = 1; \
131 skb_forward_csum(skb); \ 131 skb_forward_csum(skb); \
132 NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL, \ 132 NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, (skb), NULL, \
133 (rt)->u.dst.dev, dst_output); \ 133 (rt)->u.dst.dev, dst_output); \
134} while (0) 134} while (0)
135 135
@@ -170,7 +170,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
170 170
171 EnterFunction(10); 171 EnterFunction(10);
172 172
173 if (ip_route_output_key(&rt, &fl)) { 173 if (ip_route_output_key(&init_net, &rt, &fl)) {
174 IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, " 174 IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, "
175 "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr)); 175 "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr));
176 goto tx_error_icmp; 176 goto tx_error_icmp;
@@ -406,14 +406,12 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
406 iph->daddr = rt->rt_dst; 406 iph->daddr = rt->rt_dst;
407 iph->saddr = rt->rt_src; 407 iph->saddr = rt->rt_src;
408 iph->ttl = old_iph->ttl; 408 iph->ttl = old_iph->ttl;
409 iph->tot_len = htons(skb->len);
410 ip_select_ident(iph, &rt->u.dst, NULL); 409 ip_select_ident(iph, &rt->u.dst, NULL);
411 ip_send_check(iph);
412 410
413 /* Another hack: avoid icmp_send in ip_fragment */ 411 /* Another hack: avoid icmp_send in ip_fragment */
414 skb->local_df = 1; 412 skb->local_df = 1;
415 413
416 IP_VS_XMIT(skb, rt); 414 ip_local_out(skb);
417 415
418 LeaveFunction(10); 416 LeaveFunction(10);
419 417
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 5539debf4973..9a904c6c0dc8 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -7,6 +7,7 @@
7#include <net/route.h> 7#include <net/route.h>
8#include <net/xfrm.h> 8#include <net/xfrm.h>
9#include <net/ip.h> 9#include <net/ip.h>
10#include <net/netfilter/nf_queue.h>
10 11
11/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ 12/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
12int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) 13int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
@@ -18,12 +19,12 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
18 unsigned int hh_len; 19 unsigned int hh_len;
19 unsigned int type; 20 unsigned int type;
20 21
21 type = inet_addr_type(iph->saddr); 22 type = inet_addr_type(&init_net, iph->saddr);
22 if (addr_type == RTN_UNSPEC) 23 if (addr_type == RTN_UNSPEC)
23 addr_type = type; 24 addr_type = type;
24 25
25 /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause 26 /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
26 * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook. 27 * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook.
27 */ 28 */
28 if (addr_type == RTN_LOCAL) { 29 if (addr_type == RTN_LOCAL) {
29 fl.nl_u.ip4_u.daddr = iph->daddr; 30 fl.nl_u.ip4_u.daddr = iph->daddr;
@@ -32,7 +33,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
32 fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); 33 fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
33 fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; 34 fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
34 fl.mark = skb->mark; 35 fl.mark = skb->mark;
35 if (ip_route_output_key(&rt, &fl) != 0) 36 if (ip_route_output_key(&init_net, &rt, &fl) != 0)
36 return -1; 37 return -1;
37 38
38 /* Drop old route. */ 39 /* Drop old route. */
@@ -42,7 +43,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
42 /* non-local src, find valid iif to satisfy 43 /* non-local src, find valid iif to satisfy
43 * rp-filter when calling ip_route_input. */ 44 * rp-filter when calling ip_route_input. */
44 fl.nl_u.ip4_u.daddr = iph->saddr; 45 fl.nl_u.ip4_u.daddr = iph->saddr;
45 if (ip_route_output_key(&rt, &fl) != 0) 46 if (ip_route_output_key(&init_net, &rt, &fl) != 0)
46 return -1; 47 return -1;
47 48
48 odst = skb->dst; 49 odst = skb->dst;
@@ -122,11 +123,12 @@ struct ip_rt_info {
122 u_int8_t tos; 123 u_int8_t tos;
123}; 124};
124 125
125static void nf_ip_saveroute(const struct sk_buff *skb, struct nf_info *info) 126static void nf_ip_saveroute(const struct sk_buff *skb,
127 struct nf_queue_entry *entry)
126{ 128{
127 struct ip_rt_info *rt_info = nf_info_reroute(info); 129 struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
128 130
129 if (info->hook == NF_IP_LOCAL_OUT) { 131 if (entry->hook == NF_INET_LOCAL_OUT) {
130 const struct iphdr *iph = ip_hdr(skb); 132 const struct iphdr *iph = ip_hdr(skb);
131 133
132 rt_info->tos = iph->tos; 134 rt_info->tos = iph->tos;
@@ -135,11 +137,12 @@ static void nf_ip_saveroute(const struct sk_buff *skb, struct nf_info *info)
135 } 137 }
136} 138}
137 139
138static int nf_ip_reroute(struct sk_buff *skb, const struct nf_info *info) 140static int nf_ip_reroute(struct sk_buff *skb,
141 const struct nf_queue_entry *entry)
139{ 142{
140 const struct ip_rt_info *rt_info = nf_info_reroute(info); 143 const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
141 144
142 if (info->hook == NF_IP_LOCAL_OUT) { 145 if (entry->hook == NF_INET_LOCAL_OUT) {
143 const struct iphdr *iph = ip_hdr(skb); 146 const struct iphdr *iph = ip_hdr(skb);
144 147
145 if (!(iph->tos == rt_info->tos 148 if (!(iph->tos == rt_info->tos
@@ -158,7 +161,7 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
158 161
159 switch (skb->ip_summed) { 162 switch (skb->ip_summed) {
160 case CHECKSUM_COMPLETE: 163 case CHECKSUM_COMPLETE:
161 if (hook != NF_IP_PRE_ROUTING && hook != NF_IP_LOCAL_IN) 164 if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
162 break; 165 break;
163 if ((protocol == 0 && !csum_fold(skb->csum)) || 166 if ((protocol == 0 && !csum_fold(skb->csum)) ||
164 !csum_tcpudp_magic(iph->saddr, iph->daddr, 167 !csum_tcpudp_magic(iph->saddr, iph->daddr,
@@ -182,9 +185,15 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
182 185
183EXPORT_SYMBOL(nf_ip_checksum); 186EXPORT_SYMBOL(nf_ip_checksum);
184 187
185static struct nf_afinfo nf_ip_afinfo = { 188static int nf_ip_route(struct dst_entry **dst, struct flowi *fl)
189{
190 return ip_route_output_key(&init_net, (struct rtable **)dst, fl);
191}
192
193static const struct nf_afinfo nf_ip_afinfo = {
186 .family = AF_INET, 194 .family = AF_INET,
187 .checksum = nf_ip_checksum, 195 .checksum = nf_ip_checksum,
196 .route = nf_ip_route,
188 .saveroute = nf_ip_saveroute, 197 .saveroute = nf_ip_saveroute,
189 .reroute = nf_ip_reroute, 198 .reroute = nf_ip_reroute,
190 .route_key_size = sizeof(struct ip_rt_info), 199 .route_key_size = sizeof(struct ip_rt_info),
@@ -202,3 +211,13 @@ static void ipv4_netfilter_fini(void)
202 211
203module_init(ipv4_netfilter_init); 212module_init(ipv4_netfilter_init);
204module_exit(ipv4_netfilter_fini); 213module_exit(ipv4_netfilter_fini);
214
215#ifdef CONFIG_SYSCTL
216struct ctl_path nf_net_ipv4_netfilter_sysctl_path[] = {
217 { .procname = "net", .ctl_name = CTL_NET, },
218 { .procname = "ipv4", .ctl_name = NET_IPV4, },
219 { .procname = "netfilter", .ctl_name = NET_IPV4_NETFILTER, },
220 { }
221};
222EXPORT_SYMBOL_GPL(nf_net_ipv4_netfilter_sysctl_path);
223#endif /* CONFIG_SYSCTL */
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 9aca9c55687c..9a077cb24798 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -8,6 +8,7 @@ menu "IP: Netfilter Configuration"
8config NF_CONNTRACK_IPV4 8config NF_CONNTRACK_IPV4
9 tristate "IPv4 connection tracking support (required for NAT)" 9 tristate "IPv4 connection tracking support (required for NAT)"
10 depends on NF_CONNTRACK 10 depends on NF_CONNTRACK
11 default m if NETFILTER_ADVANCED=n
11 ---help--- 12 ---help---
12 Connection tracking keeps a record of what packets have passed 13 Connection tracking keeps a record of what packets have passed
13 through your machine, in order to figure out how they are related 14 through your machine, in order to figure out how they are related
@@ -32,6 +33,7 @@ config NF_CONNTRACK_PROC_COMPAT
32 33
33config IP_NF_QUEUE 34config IP_NF_QUEUE
34 tristate "IP Userspace queueing via NETLINK (OBSOLETE)" 35 tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
36 depends on NETFILTER_ADVANCED
35 help 37 help
36 Netfilter has the ability to queue packets to user space: the 38 Netfilter has the ability to queue packets to user space: the
37 netlink device can be used to access them using this driver. 39 netlink device can be used to access them using this driver.
@@ -44,6 +46,7 @@ config IP_NF_QUEUE
44 46
45config IP_NF_IPTABLES 47config IP_NF_IPTABLES
46 tristate "IP tables support (required for filtering/masq/NAT)" 48 tristate "IP tables support (required for filtering/masq/NAT)"
49 default m if NETFILTER_ADVANCED=n
47 select NETFILTER_XTABLES 50 select NETFILTER_XTABLES
48 help 51 help
49 iptables is a general, extensible packet identification framework. 52 iptables is a general, extensible packet identification framework.
@@ -54,27 +57,10 @@ config IP_NF_IPTABLES
54 To compile it as a module, choose M here. If unsure, say N. 57 To compile it as a module, choose M here. If unsure, say N.
55 58
56# The matches. 59# The matches.
57config IP_NF_MATCH_IPRANGE
58 tristate "IP range match support"
59 depends on IP_NF_IPTABLES
60 help
61 This option makes possible to match IP addresses against IP address
62 ranges.
63
64 To compile it as a module, choose M here. If unsure, say N.
65
66config IP_NF_MATCH_TOS
67 tristate "TOS match support"
68 depends on IP_NF_IPTABLES
69 help
70 TOS matching allows you to match packets based on the Type Of
71 Service fields of the IP packet.
72
73 To compile it as a module, choose M here. If unsure, say N.
74
75config IP_NF_MATCH_RECENT 60config IP_NF_MATCH_RECENT
76 tristate "recent match support" 61 tristate '"recent" match support'
77 depends on IP_NF_IPTABLES 62 depends on IP_NF_IPTABLES
63 depends on NETFILTER_ADVANCED
78 help 64 help
79 This match is used for creating one or many lists of recently 65 This match is used for creating one or many lists of recently
80 used addresses and then matching against that/those list(s). 66 used addresses and then matching against that/those list(s).
@@ -85,8 +71,9 @@ config IP_NF_MATCH_RECENT
85 To compile it as a module, choose M here. If unsure, say N. 71 To compile it as a module, choose M here. If unsure, say N.
86 72
87config IP_NF_MATCH_ECN 73config IP_NF_MATCH_ECN
88 tristate "ECN match support" 74 tristate '"ecn" match support'
89 depends on IP_NF_IPTABLES 75 depends on IP_NF_IPTABLES
76 depends on NETFILTER_ADVANCED
90 help 77 help
91 This option adds a `ECN' match, which allows you to match against 78 This option adds a `ECN' match, which allows you to match against
92 the IPv4 and TCP header ECN fields. 79 the IPv4 and TCP header ECN fields.
@@ -94,8 +81,9 @@ config IP_NF_MATCH_ECN
94 To compile it as a module, choose M here. If unsure, say N. 81 To compile it as a module, choose M here. If unsure, say N.
95 82
96config IP_NF_MATCH_AH 83config IP_NF_MATCH_AH
97 tristate "AH match support" 84 tristate '"ah" match support'
98 depends on IP_NF_IPTABLES 85 depends on IP_NF_IPTABLES
86 depends on NETFILTER_ADVANCED
99 help 87 help
100 This match extension allows you to match a range of SPIs 88 This match extension allows you to match a range of SPIs
101 inside AH header of IPSec packets. 89 inside AH header of IPSec packets.
@@ -103,30 +91,23 @@ config IP_NF_MATCH_AH
103 To compile it as a module, choose M here. If unsure, say N. 91 To compile it as a module, choose M here. If unsure, say N.
104 92
105config IP_NF_MATCH_TTL 93config IP_NF_MATCH_TTL
106 tristate "TTL match support" 94 tristate '"ttl" match support'
107 depends on IP_NF_IPTABLES 95 depends on IP_NF_IPTABLES
96 depends on NETFILTER_ADVANCED
108 help 97 help
109 This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user 98 This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user
110 to match packets by their TTL value. 99 to match packets by their TTL value.
111 100
112 To compile it as a module, choose M here. If unsure, say N. 101 To compile it as a module, choose M here. If unsure, say N.
113 102
114config IP_NF_MATCH_OWNER
115 tristate "Owner match support"
116 depends on IP_NF_IPTABLES
117 help
118 Packet owner matching allows you to match locally-generated packets
119 based on who created them: the user, group, process or session.
120
121 To compile it as a module, choose M here. If unsure, say N.
122
123config IP_NF_MATCH_ADDRTYPE 103config IP_NF_MATCH_ADDRTYPE
124 tristate 'address type match support' 104 tristate '"addrtype" address type match support'
125 depends on IP_NF_IPTABLES 105 depends on IP_NF_IPTABLES
106 depends on NETFILTER_ADVANCED
126 help 107 help
127 This option allows you to match what routing thinks of an address, 108 This option allows you to match what routing thinks of an address,
128 eg. UNICAST, LOCAL, BROADCAST, ... 109 eg. UNICAST, LOCAL, BROADCAST, ...
129 110
130 If you want to compile it as a module, say M here and read 111 If you want to compile it as a module, say M here and read
131 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. 112 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
132 113
@@ -134,6 +115,7 @@ config IP_NF_MATCH_ADDRTYPE
134config IP_NF_FILTER 115config IP_NF_FILTER
135 tristate "Packet filtering" 116 tristate "Packet filtering"
136 depends on IP_NF_IPTABLES 117 depends on IP_NF_IPTABLES
118 default m if NETFILTER_ADVANCED=n
137 help 119 help
138 Packet filtering defines a table `filter', which has a series of 120 Packet filtering defines a table `filter', which has a series of
139 rules for simple packet filtering at local input, forwarding and 121 rules for simple packet filtering at local input, forwarding and
@@ -144,6 +126,7 @@ config IP_NF_FILTER
144config IP_NF_TARGET_REJECT 126config IP_NF_TARGET_REJECT
145 tristate "REJECT target support" 127 tristate "REJECT target support"
146 depends on IP_NF_FILTER 128 depends on IP_NF_FILTER
129 default m if NETFILTER_ADVANCED=n
147 help 130 help
148 The REJECT target allows a filtering rule to specify that an ICMP 131 The REJECT target allows a filtering rule to specify that an ICMP
149 error should be issued in response to an incoming packet, rather 132 error should be issued in response to an incoming packet, rather
@@ -154,6 +137,7 @@ config IP_NF_TARGET_REJECT
154config IP_NF_TARGET_LOG 137config IP_NF_TARGET_LOG
155 tristate "LOG target support" 138 tristate "LOG target support"
156 depends on IP_NF_IPTABLES 139 depends on IP_NF_IPTABLES
140 default m if NETFILTER_ADVANCED=n
157 help 141 help
158 This option adds a `LOG' target, which allows you to create rules in 142 This option adds a `LOG' target, which allows you to create rules in
159 any iptables table which records the packet header to the syslog. 143 any iptables table which records the packet header to the syslog.
@@ -163,6 +147,7 @@ config IP_NF_TARGET_LOG
163config IP_NF_TARGET_ULOG 147config IP_NF_TARGET_ULOG
164 tristate "ULOG target support" 148 tristate "ULOG target support"
165 depends on IP_NF_IPTABLES 149 depends on IP_NF_IPTABLES
150 default m if NETFILTER_ADVANCED=n
166 ---help--- 151 ---help---
167 152
168 This option enables the old IPv4-only "ipt_ULOG" implementation 153 This option enables the old IPv4-only "ipt_ULOG" implementation
@@ -183,6 +168,7 @@ config IP_NF_TARGET_ULOG
183config NF_NAT 168config NF_NAT
184 tristate "Full NAT" 169 tristate "Full NAT"
185 depends on IP_NF_IPTABLES && NF_CONNTRACK_IPV4 170 depends on IP_NF_IPTABLES && NF_CONNTRACK_IPV4
171 default m if NETFILTER_ADVANCED=n
186 help 172 help
187 The Full NAT option allows masquerading, port forwarding and other 173 The Full NAT option allows masquerading, port forwarding and other
188 forms of full Network Address Port Translation. It is controlled by 174 forms of full Network Address Port Translation. It is controlled by
@@ -198,6 +184,7 @@ config NF_NAT_NEEDED
198config IP_NF_TARGET_MASQUERADE 184config IP_NF_TARGET_MASQUERADE
199 tristate "MASQUERADE target support" 185 tristate "MASQUERADE target support"
200 depends on NF_NAT 186 depends on NF_NAT
187 default m if NETFILTER_ADVANCED=n
201 help 188 help
202 Masquerading is a special case of NAT: all outgoing connections are 189 Masquerading is a special case of NAT: all outgoing connections are
203 changed to seem to come from a particular interface's address, and 190 changed to seem to come from a particular interface's address, and
@@ -210,6 +197,7 @@ config IP_NF_TARGET_MASQUERADE
210config IP_NF_TARGET_REDIRECT 197config IP_NF_TARGET_REDIRECT
211 tristate "REDIRECT target support" 198 tristate "REDIRECT target support"
212 depends on NF_NAT 199 depends on NF_NAT
200 depends on NETFILTER_ADVANCED
213 help 201 help
214 REDIRECT is a special case of NAT: all incoming connections are 202 REDIRECT is a special case of NAT: all incoming connections are
215 mapped onto the incoming interface's address, causing the packets to 203 mapped onto the incoming interface's address, causing the packets to
@@ -221,6 +209,7 @@ config IP_NF_TARGET_REDIRECT
221config IP_NF_TARGET_NETMAP 209config IP_NF_TARGET_NETMAP
222 tristate "NETMAP target support" 210 tristate "NETMAP target support"
223 depends on NF_NAT 211 depends on NF_NAT
212 depends on NETFILTER_ADVANCED
224 help 213 help
225 NETMAP is an implementation of static 1:1 NAT mapping of network 214 NETMAP is an implementation of static 1:1 NAT mapping of network
226 addresses. It maps the network address part, while keeping the host 215 addresses. It maps the network address part, while keeping the host
@@ -229,18 +218,10 @@ config IP_NF_TARGET_NETMAP
229 218
230 To compile it as a module, choose M here. If unsure, say N. 219 To compile it as a module, choose M here. If unsure, say N.
231 220
232config IP_NF_TARGET_SAME
233 tristate "SAME target support (OBSOLETE)"
234 depends on NF_NAT
235 help
236 This option adds a `SAME' target, which works like the standard SNAT
237 target, but attempts to give clients the same IP for all connections.
238
239 To compile it as a module, choose M here. If unsure, say N.
240
241config NF_NAT_SNMP_BASIC 221config NF_NAT_SNMP_BASIC
242 tristate "Basic SNMP-ALG support (EXPERIMENTAL)" 222 tristate "Basic SNMP-ALG support"
243 depends on EXPERIMENTAL && NF_NAT 223 depends on NF_NAT
224 depends on NETFILTER_ADVANCED
244 ---help--- 225 ---help---
245 226
246 This module implements an Application Layer Gateway (ALG) for 227 This module implements an Application Layer Gateway (ALG) for
@@ -304,6 +285,7 @@ config NF_NAT_SIP
304config IP_NF_MANGLE 285config IP_NF_MANGLE
305 tristate "Packet mangling" 286 tristate "Packet mangling"
306 depends on IP_NF_IPTABLES 287 depends on IP_NF_IPTABLES
288 default m if NETFILTER_ADVANCED=n
307 help 289 help
308 This option adds a `mangle' table to iptables: see the man page for 290 This option adds a `mangle' table to iptables: see the man page for
309 iptables(8). This table is used for various packet alterations 291 iptables(8). This table is used for various packet alterations
@@ -311,19 +293,10 @@ config IP_NF_MANGLE
311 293
312 To compile it as a module, choose M here. If unsure, say N. 294 To compile it as a module, choose M here. If unsure, say N.
313 295
314config IP_NF_TARGET_TOS
315 tristate "TOS target support"
316 depends on IP_NF_MANGLE
317 help
318 This option adds a `TOS' target, which allows you to create rules in
319 the `mangle' table which alter the Type Of Service field of an IP
320 packet prior to routing.
321
322 To compile it as a module, choose M here. If unsure, say N.
323
324config IP_NF_TARGET_ECN 296config IP_NF_TARGET_ECN
325 tristate "ECN target support" 297 tristate "ECN target support"
326 depends on IP_NF_MANGLE 298 depends on IP_NF_MANGLE
299 depends on NETFILTER_ADVANCED
327 ---help--- 300 ---help---
328 This option adds a `ECN' target, which can be used in the iptables mangle 301 This option adds a `ECN' target, which can be used in the iptables mangle
329 table. 302 table.
@@ -338,6 +311,7 @@ config IP_NF_TARGET_ECN
338config IP_NF_TARGET_TTL 311config IP_NF_TARGET_TTL
339 tristate 'TTL target support' 312 tristate 'TTL target support'
340 depends on IP_NF_MANGLE 313 depends on IP_NF_MANGLE
314 depends on NETFILTER_ADVANCED
341 help 315 help
342 This option adds a `TTL' target, which enables the user to modify 316 This option adds a `TTL' target, which enables the user to modify
343 the TTL value of the IP header. 317 the TTL value of the IP header.
@@ -353,6 +327,7 @@ config IP_NF_TARGET_CLUSTERIP
353 tristate "CLUSTERIP target support (EXPERIMENTAL)" 327 tristate "CLUSTERIP target support (EXPERIMENTAL)"
354 depends on IP_NF_MANGLE && EXPERIMENTAL 328 depends on IP_NF_MANGLE && EXPERIMENTAL
355 depends on NF_CONNTRACK_IPV4 329 depends on NF_CONNTRACK_IPV4
330 depends on NETFILTER_ADVANCED
356 select NF_CONNTRACK_MARK 331 select NF_CONNTRACK_MARK
357 help 332 help
358 The CLUSTERIP target allows you to build load-balancing clusters of 333 The CLUSTERIP target allows you to build load-balancing clusters of
@@ -365,6 +340,7 @@ config IP_NF_TARGET_CLUSTERIP
365config IP_NF_RAW 340config IP_NF_RAW
366 tristate 'raw table support (required for NOTRACK/TRACE)' 341 tristate 'raw table support (required for NOTRACK/TRACE)'
367 depends on IP_NF_IPTABLES 342 depends on IP_NF_IPTABLES
343 depends on NETFILTER_ADVANCED
368 help 344 help
369 This option adds a `raw' table to iptables. This table is the very 345 This option adds a `raw' table to iptables. This table is the very
370 first in the netfilter framework and hooks in at the PREROUTING 346 first in the netfilter framework and hooks in at the PREROUTING
@@ -377,6 +353,7 @@ config IP_NF_RAW
377config IP_NF_ARPTABLES 353config IP_NF_ARPTABLES
378 tristate "ARP tables support" 354 tristate "ARP tables support"
379 select NETFILTER_XTABLES 355 select NETFILTER_XTABLES
356 depends on NETFILTER_ADVANCED
380 help 357 help
381 arptables is a general, extensible packet identification framework. 358 arptables is a general, extensible packet identification framework.
382 The ARP packet filtering and mangling (manipulation)subsystems 359 The ARP packet filtering and mangling (manipulation)subsystems
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 7456833d6ade..0c7dc78a62e9 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -44,10 +44,7 @@ obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
44obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o 44obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
45obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o 45obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
46obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o 46obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
47obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o
48obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o
49obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o 47obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
50obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
51obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o 48obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
52 49
53# targets 50# targets
@@ -58,8 +55,6 @@ obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
58obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o 55obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
59obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o 56obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
60obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o 57obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
61obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o
62obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o
63obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o 58obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o
64obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o 59obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
65 60
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 2909c92ecd99..b4a810c28ac8 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -19,9 +19,10 @@
19#include <linux/proc_fs.h> 19#include <linux/proc_fs.h>
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/init.h> 21#include <linux/init.h>
22
23#include <asm/uaccess.h>
24#include <linux/mutex.h> 22#include <linux/mutex.h>
23#include <linux/err.h>
24#include <net/compat.h>
25#include <asm/uaccess.h>
25 26
26#include <linux/netfilter/x_tables.h> 27#include <linux/netfilter/x_tables.h>
27#include <linux/netfilter_arp/arp_tables.h> 28#include <linux/netfilter_arp/arp_tables.h>
@@ -83,7 +84,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
83 __be32 src_ipaddr, tgt_ipaddr; 84 __be32 src_ipaddr, tgt_ipaddr;
84 int i, ret; 85 int i, ret;
85 86
86#define FWINV(bool,invflg) ((bool) ^ !!(arpinfo->invflags & invflg)) 87#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg)))
87 88
88 if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop, 89 if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop,
89 ARPT_INV_ARPOP)) { 90 ARPT_INV_ARPOP)) {
@@ -179,6 +180,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
179 } 180 }
180 181
181 return 1; 182 return 1;
183#undef FWINV
182} 184}
183 185
184static inline int arp_checkentry(const struct arpt_arp *arp) 186static inline int arp_checkentry(const struct arpt_arp *arp)
@@ -435,29 +437,9 @@ static int mark_source_chains(struct xt_table_info *newinfo,
435 return 1; 437 return 1;
436} 438}
437 439
438static inline int standard_check(const struct arpt_entry_target *t, 440static inline int check_entry(struct arpt_entry *e, const char *name)
439 unsigned int max_offset)
440{
441 /* Check standard info. */
442 if (t->u.target_size
443 != ARPT_ALIGN(sizeof(struct arpt_standard_target))) {
444 duprintf("arpt_standard_check: target size %u != %Zu\n",
445 t->u.target_size,
446 ARPT_ALIGN(sizeof(struct arpt_standard_target)));
447 return 0;
448 }
449
450 return 1;
451}
452
453static struct arpt_target arpt_standard_target;
454
455static inline int check_entry(struct arpt_entry *e, const char *name, unsigned int size,
456 unsigned int *i)
457{ 441{
458 struct arpt_entry_target *t; 442 struct arpt_entry_target *t;
459 struct arpt_target *target;
460 int ret;
461 443
462 if (!arp_checkentry(&e->arp)) { 444 if (!arp_checkentry(&e->arp)) {
463 duprintf("arp_tables: arp check failed %p %s.\n", e, name); 445 duprintf("arp_tables: arp check failed %p %s.\n", e, name);
@@ -471,35 +453,57 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i
471 if (e->target_offset + t->u.target_size > e->next_offset) 453 if (e->target_offset + t->u.target_size > e->next_offset)
472 return -EINVAL; 454 return -EINVAL;
473 455
456 return 0;
457}
458
459static inline int check_target(struct arpt_entry *e, const char *name)
460{
461 struct arpt_entry_target *t;
462 struct arpt_target *target;
463 int ret;
464
465 t = arpt_get_target(e);
466 target = t->u.kernel.target;
467
468 ret = xt_check_target(target, NF_ARP, t->u.target_size - sizeof(*t),
469 name, e->comefrom, 0, 0);
470 if (!ret && t->u.kernel.target->checkentry
471 && !t->u.kernel.target->checkentry(name, e, target, t->data,
472 e->comefrom)) {
473 duprintf("arp_tables: check failed for `%s'.\n",
474 t->u.kernel.target->name);
475 ret = -EINVAL;
476 }
477 return ret;
478}
479
480static inline int
481find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
482 unsigned int *i)
483{
484 struct arpt_entry_target *t;
485 struct arpt_target *target;
486 int ret;
487
488 ret = check_entry(e, name);
489 if (ret)
490 return ret;
491
492 t = arpt_get_target(e);
474 target = try_then_request_module(xt_find_target(NF_ARP, t->u.user.name, 493 target = try_then_request_module(xt_find_target(NF_ARP, t->u.user.name,
475 t->u.user.revision), 494 t->u.user.revision),
476 "arpt_%s", t->u.user.name); 495 "arpt_%s", t->u.user.name);
477 if (IS_ERR(target) || !target) { 496 if (IS_ERR(target) || !target) {
478 duprintf("check_entry: `%s' not found\n", t->u.user.name); 497 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
479 ret = target ? PTR_ERR(target) : -ENOENT; 498 ret = target ? PTR_ERR(target) : -ENOENT;
480 goto out; 499 goto out;
481 } 500 }
482 t->u.kernel.target = target; 501 t->u.kernel.target = target;
483 502
484 ret = xt_check_target(target, NF_ARP, t->u.target_size - sizeof(*t), 503 ret = check_target(e, name);
485 name, e->comefrom, 0, 0);
486 if (ret) 504 if (ret)
487 goto err; 505 goto err;
488 506
489 if (t->u.kernel.target == &arpt_standard_target) {
490 if (!standard_check(t, size)) {
491 ret = -EINVAL;
492 goto err;
493 }
494 } else if (t->u.kernel.target->checkentry
495 && !t->u.kernel.target->checkentry(name, e, target, t->data,
496 e->comefrom)) {
497 duprintf("arp_tables: check failed for `%s'.\n",
498 t->u.kernel.target->name);
499 ret = -EINVAL;
500 goto err;
501 }
502
503 (*i)++; 507 (*i)++;
504 return 0; 508 return 0;
505err: 509err:
@@ -633,7 +637,7 @@ static int translate_table(const char *name,
633 /* Finally, each sanity check must pass */ 637 /* Finally, each sanity check must pass */
634 i = 0; 638 i = 0;
635 ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size, 639 ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
636 check_entry, name, size, &i); 640 find_check_entry, name, size, &i);
637 641
638 if (ret != 0) { 642 if (ret != 0) {
639 ARPT_ENTRY_ITERATE(entry0, newinfo->size, 643 ARPT_ENTRY_ITERATE(entry0, newinfo->size,
@@ -704,16 +708,11 @@ static void get_counters(const struct xt_table_info *t,
704 } 708 }
705} 709}
706 710
707static int copy_entries_to_user(unsigned int total_size, 711static inline struct xt_counters *alloc_counters(struct arpt_table *table)
708 struct arpt_table *table,
709 void __user *userptr)
710{ 712{
711 unsigned int off, num, countersize; 713 unsigned int countersize;
712 struct arpt_entry *e;
713 struct xt_counters *counters; 714 struct xt_counters *counters;
714 struct xt_table_info *private = table->private; 715 struct xt_table_info *private = table->private;
715 int ret = 0;
716 void *loc_cpu_entry;
717 716
718 /* We need atomic snapshot of counters: rest doesn't change 717 /* We need atomic snapshot of counters: rest doesn't change
719 * (other than comefrom, which userspace doesn't care 718 * (other than comefrom, which userspace doesn't care
@@ -723,13 +722,31 @@ static int copy_entries_to_user(unsigned int total_size,
723 counters = vmalloc_node(countersize, numa_node_id()); 722 counters = vmalloc_node(countersize, numa_node_id());
724 723
725 if (counters == NULL) 724 if (counters == NULL)
726 return -ENOMEM; 725 return ERR_PTR(-ENOMEM);
727 726
728 /* First, sum counters... */ 727 /* First, sum counters... */
729 write_lock_bh(&table->lock); 728 write_lock_bh(&table->lock);
730 get_counters(private, counters); 729 get_counters(private, counters);
731 write_unlock_bh(&table->lock); 730 write_unlock_bh(&table->lock);
732 731
732 return counters;
733}
734
735static int copy_entries_to_user(unsigned int total_size,
736 struct arpt_table *table,
737 void __user *userptr)
738{
739 unsigned int off, num;
740 struct arpt_entry *e;
741 struct xt_counters *counters;
742 struct xt_table_info *private = table->private;
743 int ret = 0;
744 void *loc_cpu_entry;
745
746 counters = alloc_counters(table);
747 if (IS_ERR(counters))
748 return PTR_ERR(counters);
749
733 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 750 loc_cpu_entry = private->entries[raw_smp_processor_id()];
734 /* ... then copy entire thing ... */ 751 /* ... then copy entire thing ... */
735 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { 752 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
@@ -767,23 +784,159 @@ static int copy_entries_to_user(unsigned int total_size,
767 return ret; 784 return ret;
768} 785}
769 786
770static int get_entries(const struct arpt_get_entries *entries, 787#ifdef CONFIG_COMPAT
771 struct arpt_get_entries __user *uptr) 788static void compat_standard_from_user(void *dst, void *src)
789{
790 int v = *(compat_int_t *)src;
791
792 if (v > 0)
793 v += xt_compat_calc_jump(NF_ARP, v);
794 memcpy(dst, &v, sizeof(v));
795}
796
797static int compat_standard_to_user(void __user *dst, void *src)
772{ 798{
799 compat_int_t cv = *(int *)src;
800
801 if (cv > 0)
802 cv -= xt_compat_calc_jump(NF_ARP, cv);
803 return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
804}
805
806static int compat_calc_entry(struct arpt_entry *e,
807 const struct xt_table_info *info,
808 void *base, struct xt_table_info *newinfo)
809{
810 struct arpt_entry_target *t;
811 unsigned int entry_offset;
812 int off, i, ret;
813
814 off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
815 entry_offset = (void *)e - base;
816
817 t = arpt_get_target(e);
818 off += xt_compat_target_offset(t->u.kernel.target);
819 newinfo->size -= off;
820 ret = xt_compat_add_offset(NF_ARP, entry_offset, off);
821 if (ret)
822 return ret;
823
824 for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
825 if (info->hook_entry[i] &&
826 (e < (struct arpt_entry *)(base + info->hook_entry[i])))
827 newinfo->hook_entry[i] -= off;
828 if (info->underflow[i] &&
829 (e < (struct arpt_entry *)(base + info->underflow[i])))
830 newinfo->underflow[i] -= off;
831 }
832 return 0;
833}
834
835static int compat_table_info(const struct xt_table_info *info,
836 struct xt_table_info *newinfo)
837{
838 void *loc_cpu_entry;
839
840 if (!newinfo || !info)
841 return -EINVAL;
842
843 /* we dont care about newinfo->entries[] */
844 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
845 newinfo->initial_entries = 0;
846 loc_cpu_entry = info->entries[raw_smp_processor_id()];
847 return ARPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
848 compat_calc_entry, info, loc_cpu_entry,
849 newinfo);
850}
851#endif
852
853static int get_info(void __user *user, int *len, int compat)
854{
855 char name[ARPT_TABLE_MAXNAMELEN];
856 struct arpt_table *t;
773 int ret; 857 int ret;
858
859 if (*len != sizeof(struct arpt_getinfo)) {
860 duprintf("length %u != %Zu\n", *len,
861 sizeof(struct arpt_getinfo));
862 return -EINVAL;
863 }
864
865 if (copy_from_user(name, user, sizeof(name)) != 0)
866 return -EFAULT;
867
868 name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
869#ifdef CONFIG_COMPAT
870 if (compat)
871 xt_compat_lock(NF_ARP);
872#endif
873 t = try_then_request_module(xt_find_table_lock(NF_ARP, name),
874 "arptable_%s", name);
875 if (t && !IS_ERR(t)) {
876 struct arpt_getinfo info;
877 struct xt_table_info *private = t->private;
878
879#ifdef CONFIG_COMPAT
880 if (compat) {
881 struct xt_table_info tmp;
882 ret = compat_table_info(private, &tmp);
883 xt_compat_flush_offsets(NF_ARP);
884 private = &tmp;
885 }
886#endif
887 info.valid_hooks = t->valid_hooks;
888 memcpy(info.hook_entry, private->hook_entry,
889 sizeof(info.hook_entry));
890 memcpy(info.underflow, private->underflow,
891 sizeof(info.underflow));
892 info.num_entries = private->number;
893 info.size = private->size;
894 strcpy(info.name, name);
895
896 if (copy_to_user(user, &info, *len) != 0)
897 ret = -EFAULT;
898 else
899 ret = 0;
900 xt_table_unlock(t);
901 module_put(t->me);
902 } else
903 ret = t ? PTR_ERR(t) : -ENOENT;
904#ifdef CONFIG_COMPAT
905 if (compat)
906 xt_compat_unlock(NF_ARP);
907#endif
908 return ret;
909}
910
911static int get_entries(struct arpt_get_entries __user *uptr, int *len)
912{
913 int ret;
914 struct arpt_get_entries get;
774 struct arpt_table *t; 915 struct arpt_table *t;
775 916
776 t = xt_find_table_lock(NF_ARP, entries->name); 917 if (*len < sizeof(get)) {
918 duprintf("get_entries: %u < %Zu\n", *len, sizeof(get));
919 return -EINVAL;
920 }
921 if (copy_from_user(&get, uptr, sizeof(get)) != 0)
922 return -EFAULT;
923 if (*len != sizeof(struct arpt_get_entries) + get.size) {
924 duprintf("get_entries: %u != %Zu\n", *len,
925 sizeof(struct arpt_get_entries) + get.size);
926 return -EINVAL;
927 }
928
929 t = xt_find_table_lock(NF_ARP, get.name);
777 if (t && !IS_ERR(t)) { 930 if (t && !IS_ERR(t)) {
778 struct xt_table_info *private = t->private; 931 struct xt_table_info *private = t->private;
779 duprintf("t->private->number = %u\n", 932 duprintf("t->private->number = %u\n",
780 private->number); 933 private->number);
781 if (entries->size == private->size) 934 if (get.size == private->size)
782 ret = copy_entries_to_user(private->size, 935 ret = copy_entries_to_user(private->size,
783 t, uptr->entrytable); 936 t, uptr->entrytable);
784 else { 937 else {
785 duprintf("get_entries: I've got %u not %u!\n", 938 duprintf("get_entries: I've got %u not %u!\n",
786 private->size, entries->size); 939 private->size, get.size);
787 ret = -EINVAL; 940 ret = -EINVAL;
788 } 941 }
789 module_put(t->me); 942 module_put(t->me);
@@ -794,71 +947,41 @@ static int get_entries(const struct arpt_get_entries *entries,
794 return ret; 947 return ret;
795} 948}
796 949
797static int do_replace(void __user *user, unsigned int len) 950static int __do_replace(const char *name, unsigned int valid_hooks,
951 struct xt_table_info *newinfo,
952 unsigned int num_counters,
953 void __user *counters_ptr)
798{ 954{
799 int ret; 955 int ret;
800 struct arpt_replace tmp;
801 struct arpt_table *t; 956 struct arpt_table *t;
802 struct xt_table_info *newinfo, *oldinfo; 957 struct xt_table_info *oldinfo;
803 struct xt_counters *counters; 958 struct xt_counters *counters;
804 void *loc_cpu_entry, *loc_cpu_old_entry; 959 void *loc_cpu_old_entry;
805
806 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
807 return -EFAULT;
808 960
809 /* Hack: Causes ipchains to give correct error msg --RR */ 961 ret = 0;
810 if (len != sizeof(tmp) + tmp.size) 962 counters = vmalloc_node(num_counters * sizeof(struct xt_counters),
811 return -ENOPROTOOPT; 963 numa_node_id());
812
813 /* overflow check */
814 if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
815 SMP_CACHE_BYTES)
816 return -ENOMEM;
817 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
818 return -ENOMEM;
819
820 newinfo = xt_alloc_table_info(tmp.size);
821 if (!newinfo)
822 return -ENOMEM;
823
824 /* choose the copy that is on our node/cpu */
825 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
826 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
827 tmp.size) != 0) {
828 ret = -EFAULT;
829 goto free_newinfo;
830 }
831
832 counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
833 if (!counters) { 964 if (!counters) {
834 ret = -ENOMEM; 965 ret = -ENOMEM;
835 goto free_newinfo; 966 goto out;
836 } 967 }
837 968
838 ret = translate_table(tmp.name, tmp.valid_hooks, 969 t = try_then_request_module(xt_find_table_lock(NF_ARP, name),
839 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, 970 "arptable_%s", name);
840 tmp.hook_entry, tmp.underflow);
841 if (ret != 0)
842 goto free_newinfo_counters;
843
844 duprintf("arp_tables: Translated table\n");
845
846 t = try_then_request_module(xt_find_table_lock(NF_ARP, tmp.name),
847 "arptable_%s", tmp.name);
848 if (!t || IS_ERR(t)) { 971 if (!t || IS_ERR(t)) {
849 ret = t ? PTR_ERR(t) : -ENOENT; 972 ret = t ? PTR_ERR(t) : -ENOENT;
850 goto free_newinfo_counters_untrans; 973 goto free_newinfo_counters_untrans;
851 } 974 }
852 975
853 /* You lied! */ 976 /* You lied! */
854 if (tmp.valid_hooks != t->valid_hooks) { 977 if (valid_hooks != t->valid_hooks) {
855 duprintf("Valid hook crap: %08X vs %08X\n", 978 duprintf("Valid hook crap: %08X vs %08X\n",
856 tmp.valid_hooks, t->valid_hooks); 979 valid_hooks, t->valid_hooks);
857 ret = -EINVAL; 980 ret = -EINVAL;
858 goto put_module; 981 goto put_module;
859 } 982 }
860 983
861 oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret); 984 oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
862 if (!oldinfo) 985 if (!oldinfo)
863 goto put_module; 986 goto put_module;
864 987
@@ -876,11 +999,12 @@ static int do_replace(void __user *user, unsigned int len)
876 get_counters(oldinfo, counters); 999 get_counters(oldinfo, counters);
877 /* Decrease module usage counts and free resource */ 1000 /* Decrease module usage counts and free resource */
878 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; 1001 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
879 ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL); 1002 ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
1003 NULL);
880 1004
881 xt_free_table_info(oldinfo); 1005 xt_free_table_info(oldinfo);
882 if (copy_to_user(tmp.counters, counters, 1006 if (copy_to_user(counters_ptr, counters,
883 sizeof(struct xt_counters) * tmp.num_counters) != 0) 1007 sizeof(struct xt_counters) * num_counters) != 0)
884 ret = -EFAULT; 1008 ret = -EFAULT;
885 vfree(counters); 1009 vfree(counters);
886 xt_table_unlock(t); 1010 xt_table_unlock(t);
@@ -890,9 +1014,53 @@ static int do_replace(void __user *user, unsigned int len)
890 module_put(t->me); 1014 module_put(t->me);
891 xt_table_unlock(t); 1015 xt_table_unlock(t);
892 free_newinfo_counters_untrans: 1016 free_newinfo_counters_untrans:
893 ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
894 free_newinfo_counters:
895 vfree(counters); 1017 vfree(counters);
1018 out:
1019 return ret;
1020}
1021
1022static int do_replace(void __user *user, unsigned int len)
1023{
1024 int ret;
1025 struct arpt_replace tmp;
1026 struct xt_table_info *newinfo;
1027 void *loc_cpu_entry;
1028
1029 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1030 return -EFAULT;
1031
1032 /* overflow check */
1033 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1034 return -ENOMEM;
1035
1036 newinfo = xt_alloc_table_info(tmp.size);
1037 if (!newinfo)
1038 return -ENOMEM;
1039
1040 /* choose the copy that is on our node/cpu */
1041 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1042 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1043 tmp.size) != 0) {
1044 ret = -EFAULT;
1045 goto free_newinfo;
1046 }
1047
1048 ret = translate_table(tmp.name, tmp.valid_hooks,
1049 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1050 tmp.hook_entry, tmp.underflow);
1051 if (ret != 0)
1052 goto free_newinfo;
1053
1054 duprintf("arp_tables: Translated table\n");
1055
1056 ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo,
1057 tmp.num_counters, tmp.counters);
1058 if (ret)
1059 goto free_newinfo_untrans;
1060 return 0;
1061
1062 free_newinfo_untrans:
1063 ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
896 free_newinfo: 1064 free_newinfo:
897 xt_free_table_info(newinfo); 1065 xt_free_table_info(newinfo);
898 return ret; 1066 return ret;
@@ -912,31 +1080,59 @@ static inline int add_counter_to_entry(struct arpt_entry *e,
912 return 0; 1080 return 0;
913} 1081}
914 1082
915static int do_add_counters(void __user *user, unsigned int len) 1083static int do_add_counters(void __user *user, unsigned int len, int compat)
916{ 1084{
917 unsigned int i; 1085 unsigned int i;
918 struct xt_counters_info tmp, *paddc; 1086 struct xt_counters_info tmp;
1087 struct xt_counters *paddc;
1088 unsigned int num_counters;
1089 char *name;
1090 int size;
1091 void *ptmp;
919 struct arpt_table *t; 1092 struct arpt_table *t;
920 struct xt_table_info *private; 1093 struct xt_table_info *private;
921 int ret = 0; 1094 int ret = 0;
922 void *loc_cpu_entry; 1095 void *loc_cpu_entry;
1096#ifdef CONFIG_COMPAT
1097 struct compat_xt_counters_info compat_tmp;
923 1098
924 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1099 if (compat) {
1100 ptmp = &compat_tmp;
1101 size = sizeof(struct compat_xt_counters_info);
1102 } else
1103#endif
1104 {
1105 ptmp = &tmp;
1106 size = sizeof(struct xt_counters_info);
1107 }
1108
1109 if (copy_from_user(ptmp, user, size) != 0)
925 return -EFAULT; 1110 return -EFAULT;
926 1111
927 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters)) 1112#ifdef CONFIG_COMPAT
1113 if (compat) {
1114 num_counters = compat_tmp.num_counters;
1115 name = compat_tmp.name;
1116 } else
1117#endif
1118 {
1119 num_counters = tmp.num_counters;
1120 name = tmp.name;
1121 }
1122
1123 if (len != size + num_counters * sizeof(struct xt_counters))
928 return -EINVAL; 1124 return -EINVAL;
929 1125
930 paddc = vmalloc(len); 1126 paddc = vmalloc_node(len - size, numa_node_id());
931 if (!paddc) 1127 if (!paddc)
932 return -ENOMEM; 1128 return -ENOMEM;
933 1129
934 if (copy_from_user(paddc, user, len) != 0) { 1130 if (copy_from_user(paddc, user + size, len - size) != 0) {
935 ret = -EFAULT; 1131 ret = -EFAULT;
936 goto free; 1132 goto free;
937 } 1133 }
938 1134
939 t = xt_find_table_lock(NF_ARP, tmp.name); 1135 t = xt_find_table_lock(NF_ARP, name);
940 if (!t || IS_ERR(t)) { 1136 if (!t || IS_ERR(t)) {
941 ret = t ? PTR_ERR(t) : -ENOENT; 1137 ret = t ? PTR_ERR(t) : -ENOENT;
942 goto free; 1138 goto free;
@@ -944,7 +1140,7 @@ static int do_add_counters(void __user *user, unsigned int len)
944 1140
945 write_lock_bh(&t->lock); 1141 write_lock_bh(&t->lock);
946 private = t->private; 1142 private = t->private;
947 if (private->number != tmp.num_counters) { 1143 if (private->number != num_counters) {
948 ret = -EINVAL; 1144 ret = -EINVAL;
949 goto unlock_up_free; 1145 goto unlock_up_free;
950 } 1146 }
@@ -955,7 +1151,7 @@ static int do_add_counters(void __user *user, unsigned int len)
955 ARPT_ENTRY_ITERATE(loc_cpu_entry, 1151 ARPT_ENTRY_ITERATE(loc_cpu_entry,
956 private->size, 1152 private->size,
957 add_counter_to_entry, 1153 add_counter_to_entry,
958 paddc->counters, 1154 paddc,
959 &i); 1155 &i);
960 unlock_up_free: 1156 unlock_up_free:
961 write_unlock_bh(&t->lock); 1157 write_unlock_bh(&t->lock);
@@ -967,7 +1163,329 @@ static int do_add_counters(void __user *user, unsigned int len)
967 return ret; 1163 return ret;
968} 1164}
969 1165
970static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) 1166#ifdef CONFIG_COMPAT
1167static inline int
1168compat_release_entry(struct compat_arpt_entry *e, unsigned int *i)
1169{
1170 struct arpt_entry_target *t;
1171
1172 if (i && (*i)-- == 0)
1173 return 1;
1174
1175 t = compat_arpt_get_target(e);
1176 module_put(t->u.kernel.target->me);
1177 return 0;
1178}
1179
1180static inline int
1181check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
1182 struct xt_table_info *newinfo,
1183 unsigned int *size,
1184 unsigned char *base,
1185 unsigned char *limit,
1186 unsigned int *hook_entries,
1187 unsigned int *underflows,
1188 unsigned int *i,
1189 const char *name)
1190{
1191 struct arpt_entry_target *t;
1192 struct xt_target *target;
1193 unsigned int entry_offset;
1194 int ret, off, h;
1195
1196 duprintf("check_compat_entry_size_and_hooks %p\n", e);
1197 if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0
1198 || (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) {
1199 duprintf("Bad offset %p, limit = %p\n", e, limit);
1200 return -EINVAL;
1201 }
1202
1203 if (e->next_offset < sizeof(struct compat_arpt_entry) +
1204 sizeof(struct compat_xt_entry_target)) {
1205 duprintf("checking: element %p size %u\n",
1206 e, e->next_offset);
1207 return -EINVAL;
1208 }
1209
1210 /* For purposes of check_entry casting the compat entry is fine */
1211 ret = check_entry((struct arpt_entry *)e, name);
1212 if (ret)
1213 return ret;
1214
1215 off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
1216 entry_offset = (void *)e - (void *)base;
1217
1218 t = compat_arpt_get_target(e);
1219 target = try_then_request_module(xt_find_target(NF_ARP,
1220 t->u.user.name,
1221 t->u.user.revision),
1222 "arpt_%s", t->u.user.name);
1223 if (IS_ERR(target) || !target) {
1224 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1225 t->u.user.name);
1226 ret = target ? PTR_ERR(target) : -ENOENT;
1227 goto out;
1228 }
1229 t->u.kernel.target = target;
1230
1231 off += xt_compat_target_offset(target);
1232 *size += off;
1233 ret = xt_compat_add_offset(NF_ARP, entry_offset, off);
1234 if (ret)
1235 goto release_target;
1236
1237 /* Check hooks & underflows */
1238 for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
1239 if ((unsigned char *)e - base == hook_entries[h])
1240 newinfo->hook_entry[h] = hook_entries[h];
1241 if ((unsigned char *)e - base == underflows[h])
1242 newinfo->underflow[h] = underflows[h];
1243 }
1244
1245 /* Clear counters and comefrom */
1246 memset(&e->counters, 0, sizeof(e->counters));
1247 e->comefrom = 0;
1248
1249 (*i)++;
1250 return 0;
1251
1252release_target:
1253 module_put(t->u.kernel.target->me);
1254out:
1255 return ret;
1256}
1257
1258static int
1259compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
1260 unsigned int *size, const char *name,
1261 struct xt_table_info *newinfo, unsigned char *base)
1262{
1263 struct arpt_entry_target *t;
1264 struct xt_target *target;
1265 struct arpt_entry *de;
1266 unsigned int origsize;
1267 int ret, h;
1268
1269 ret = 0;
1270 origsize = *size;
1271 de = (struct arpt_entry *)*dstptr;
1272 memcpy(de, e, sizeof(struct arpt_entry));
1273 memcpy(&de->counters, &e->counters, sizeof(e->counters));
1274
1275 *dstptr += sizeof(struct arpt_entry);
1276 *size += sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
1277
1278 de->target_offset = e->target_offset - (origsize - *size);
1279 t = compat_arpt_get_target(e);
1280 target = t->u.kernel.target;
1281 xt_compat_target_from_user(t, dstptr, size);
1282
1283 de->next_offset = e->next_offset - (origsize - *size);
1284 for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
1285 if ((unsigned char *)de - base < newinfo->hook_entry[h])
1286 newinfo->hook_entry[h] -= origsize - *size;
1287 if ((unsigned char *)de - base < newinfo->underflow[h])
1288 newinfo->underflow[h] -= origsize - *size;
1289 }
1290 return ret;
1291}
1292
1293static inline int compat_check_entry(struct arpt_entry *e, const char *name,
1294 unsigned int *i)
1295{
1296 int ret;
1297
1298 ret = check_target(e, name);
1299 if (ret)
1300 return ret;
1301
1302 (*i)++;
1303 return 0;
1304}
1305
1306static int translate_compat_table(const char *name,
1307 unsigned int valid_hooks,
1308 struct xt_table_info **pinfo,
1309 void **pentry0,
1310 unsigned int total_size,
1311 unsigned int number,
1312 unsigned int *hook_entries,
1313 unsigned int *underflows)
1314{
1315 unsigned int i, j;
1316 struct xt_table_info *newinfo, *info;
1317 void *pos, *entry0, *entry1;
1318 unsigned int size;
1319 int ret;
1320
1321 info = *pinfo;
1322 entry0 = *pentry0;
1323 size = total_size;
1324 info->number = number;
1325
1326 /* Init all hooks to impossible value. */
1327 for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
1328 info->hook_entry[i] = 0xFFFFFFFF;
1329 info->underflow[i] = 0xFFFFFFFF;
1330 }
1331
1332 duprintf("translate_compat_table: size %u\n", info->size);
1333 j = 0;
1334 xt_compat_lock(NF_ARP);
1335 /* Walk through entries, checking offsets. */
1336 ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size,
1337 check_compat_entry_size_and_hooks,
1338 info, &size, entry0,
1339 entry0 + total_size,
1340 hook_entries, underflows, &j, name);
1341 if (ret != 0)
1342 goto out_unlock;
1343
1344 ret = -EINVAL;
1345 if (j != number) {
1346 duprintf("translate_compat_table: %u not %u entries\n",
1347 j, number);
1348 goto out_unlock;
1349 }
1350
1351 /* Check hooks all assigned */
1352 for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
1353 /* Only hooks which are valid */
1354 if (!(valid_hooks & (1 << i)))
1355 continue;
1356 if (info->hook_entry[i] == 0xFFFFFFFF) {
1357 duprintf("Invalid hook entry %u %u\n",
1358 i, hook_entries[i]);
1359 goto out_unlock;
1360 }
1361 if (info->underflow[i] == 0xFFFFFFFF) {
1362 duprintf("Invalid underflow %u %u\n",
1363 i, underflows[i]);
1364 goto out_unlock;
1365 }
1366 }
1367
1368 ret = -ENOMEM;
1369 newinfo = xt_alloc_table_info(size);
1370 if (!newinfo)
1371 goto out_unlock;
1372
1373 newinfo->number = number;
1374 for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
1375 newinfo->hook_entry[i] = info->hook_entry[i];
1376 newinfo->underflow[i] = info->underflow[i];
1377 }
1378 entry1 = newinfo->entries[raw_smp_processor_id()];
1379 pos = entry1;
1380 size = total_size;
1381 ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size,
1382 compat_copy_entry_from_user,
1383 &pos, &size, name, newinfo, entry1);
1384 xt_compat_flush_offsets(NF_ARP);
1385 xt_compat_unlock(NF_ARP);
1386 if (ret)
1387 goto free_newinfo;
1388
1389 ret = -ELOOP;
1390 if (!mark_source_chains(newinfo, valid_hooks, entry1))
1391 goto free_newinfo;
1392
1393 i = 0;
1394 ret = ARPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
1395 name, &i);
1396 if (ret) {
1397 j -= i;
1398 COMPAT_ARPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
1399 compat_release_entry, &j);
1400 ARPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
1401 xt_free_table_info(newinfo);
1402 return ret;
1403 }
1404
1405 /* And one copy for every other CPU */
1406 for_each_possible_cpu(i)
1407 if (newinfo->entries[i] && newinfo->entries[i] != entry1)
1408 memcpy(newinfo->entries[i], entry1, newinfo->size);
1409
1410 *pinfo = newinfo;
1411 *pentry0 = entry1;
1412 xt_free_table_info(info);
1413 return 0;
1414
1415free_newinfo:
1416 xt_free_table_info(newinfo);
1417out:
1418 COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
1419 return ret;
1420out_unlock:
1421 xt_compat_flush_offsets(NF_ARP);
1422 xt_compat_unlock(NF_ARP);
1423 goto out;
1424}
1425
1426struct compat_arpt_replace {
1427 char name[ARPT_TABLE_MAXNAMELEN];
1428 u32 valid_hooks;
1429 u32 num_entries;
1430 u32 size;
1431 u32 hook_entry[NF_ARP_NUMHOOKS];
1432 u32 underflow[NF_ARP_NUMHOOKS];
1433 u32 num_counters;
1434 compat_uptr_t counters;
1435 struct compat_arpt_entry entries[0];
1436};
1437
1438static int compat_do_replace(void __user *user, unsigned int len)
1439{
1440 int ret;
1441 struct compat_arpt_replace tmp;
1442 struct xt_table_info *newinfo;
1443 void *loc_cpu_entry;
1444
1445 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1446 return -EFAULT;
1447
1448 /* overflow check */
1449 if (tmp.size >= INT_MAX / num_possible_cpus())
1450 return -ENOMEM;
1451 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1452 return -ENOMEM;
1453
1454 newinfo = xt_alloc_table_info(tmp.size);
1455 if (!newinfo)
1456 return -ENOMEM;
1457
1458 /* choose the copy that is on our node/cpu */
1459 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1460 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) {
1461 ret = -EFAULT;
1462 goto free_newinfo;
1463 }
1464
1465 ret = translate_compat_table(tmp.name, tmp.valid_hooks,
1466 &newinfo, &loc_cpu_entry, tmp.size,
1467 tmp.num_entries, tmp.hook_entry,
1468 tmp.underflow);
1469 if (ret != 0)
1470 goto free_newinfo;
1471
1472 duprintf("compat_do_replace: Translated table\n");
1473
1474 ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo,
1475 tmp.num_counters, compat_ptr(tmp.counters));
1476 if (ret)
1477 goto free_newinfo_untrans;
1478 return 0;
1479
1480 free_newinfo_untrans:
1481 ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1482 free_newinfo:
1483 xt_free_table_info(newinfo);
1484 return ret;
1485}
1486
1487static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user,
1488 unsigned int len)
971{ 1489{
972 int ret; 1490 int ret;
973 1491
@@ -976,11 +1494,11 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned
976 1494
977 switch (cmd) { 1495 switch (cmd) {
978 case ARPT_SO_SET_REPLACE: 1496 case ARPT_SO_SET_REPLACE:
979 ret = do_replace(user, len); 1497 ret = compat_do_replace(user, len);
980 break; 1498 break;
981 1499
982 case ARPT_SO_SET_ADD_COUNTERS: 1500 case ARPT_SO_SET_ADD_COUNTERS:
983 ret = do_add_counters(user, len); 1501 ret = do_add_counters(user, len, 1);
984 break; 1502 break;
985 1503
986 default: 1504 default:
@@ -991,74 +1509,190 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned
991 return ret; 1509 return ret;
992} 1510}
993 1511
994static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) 1512static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr,
1513 compat_uint_t *size,
1514 struct xt_counters *counters,
1515 unsigned int *i)
995{ 1516{
1517 struct arpt_entry_target *t;
1518 struct compat_arpt_entry __user *ce;
1519 u_int16_t target_offset, next_offset;
1520 compat_uint_t origsize;
996 int ret; 1521 int ret;
997 1522
998 if (!capable(CAP_NET_ADMIN)) 1523 ret = -EFAULT;
999 return -EPERM; 1524 origsize = *size;
1525 ce = (struct compat_arpt_entry __user *)*dstptr;
1526 if (copy_to_user(ce, e, sizeof(struct arpt_entry)))
1527 goto out;
1000 1528
1001 switch (cmd) { 1529 if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i])))
1002 case ARPT_SO_GET_INFO: { 1530 goto out;
1003 char name[ARPT_TABLE_MAXNAMELEN]; 1531
1004 struct arpt_table *t; 1532 *dstptr += sizeof(struct compat_arpt_entry);
1533 *size -= sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
1005 1534
1006 if (*len != sizeof(struct arpt_getinfo)) { 1535 target_offset = e->target_offset - (origsize - *size);
1007 duprintf("length %u != %Zu\n", *len, 1536
1008 sizeof(struct arpt_getinfo)); 1537 t = arpt_get_target(e);
1538 ret = xt_compat_target_to_user(t, dstptr, size);
1539 if (ret)
1540 goto out;
1541 ret = -EFAULT;
1542 next_offset = e->next_offset - (origsize - *size);
1543 if (put_user(target_offset, &ce->target_offset))
1544 goto out;
1545 if (put_user(next_offset, &ce->next_offset))
1546 goto out;
1547
1548 (*i)++;
1549 return 0;
1550out:
1551 return ret;
1552}
1553
1554static int compat_copy_entries_to_user(unsigned int total_size,
1555 struct arpt_table *table,
1556 void __user *userptr)
1557{
1558 struct xt_counters *counters;
1559 struct xt_table_info *private = table->private;
1560 void __user *pos;
1561 unsigned int size;
1562 int ret = 0;
1563 void *loc_cpu_entry;
1564 unsigned int i = 0;
1565
1566 counters = alloc_counters(table);
1567 if (IS_ERR(counters))
1568 return PTR_ERR(counters);
1569
1570 /* choose the copy on our node/cpu */
1571 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1572 pos = userptr;
1573 size = total_size;
1574 ret = ARPT_ENTRY_ITERATE(loc_cpu_entry, total_size,
1575 compat_copy_entry_to_user,
1576 &pos, &size, counters, &i);
1577 vfree(counters);
1578 return ret;
1579}
1580
1581struct compat_arpt_get_entries {
1582 char name[ARPT_TABLE_MAXNAMELEN];
1583 compat_uint_t size;
1584 struct compat_arpt_entry entrytable[0];
1585};
1586
1587static int compat_get_entries(struct compat_arpt_get_entries __user *uptr,
1588 int *len)
1589{
1590 int ret;
1591 struct compat_arpt_get_entries get;
1592 struct arpt_table *t;
1593
1594 if (*len < sizeof(get)) {
1595 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
1596 return -EINVAL;
1597 }
1598 if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1599 return -EFAULT;
1600 if (*len != sizeof(struct compat_arpt_get_entries) + get.size) {
1601 duprintf("compat_get_entries: %u != %zu\n",
1602 *len, sizeof(get) + get.size);
1603 return -EINVAL;
1604 }
1605
1606 xt_compat_lock(NF_ARP);
1607 t = xt_find_table_lock(NF_ARP, get.name);
1608 if (t && !IS_ERR(t)) {
1609 struct xt_table_info *private = t->private;
1610 struct xt_table_info info;
1611
1612 duprintf("t->private->number = %u\n", private->number);
1613 ret = compat_table_info(private, &info);
1614 if (!ret && get.size == info.size) {
1615 ret = compat_copy_entries_to_user(private->size,
1616 t, uptr->entrytable);
1617 } else if (!ret) {
1618 duprintf("compat_get_entries: I've got %u not %u!\n",
1619 private->size, get.size);
1009 ret = -EINVAL; 1620 ret = -EINVAL;
1010 break;
1011 } 1621 }
1622 xt_compat_flush_offsets(NF_ARP);
1623 module_put(t->me);
1624 xt_table_unlock(t);
1625 } else
1626 ret = t ? PTR_ERR(t) : -ENOENT;
1012 1627
1013 if (copy_from_user(name, user, sizeof(name)) != 0) { 1628 xt_compat_unlock(NF_ARP);
1014 ret = -EFAULT; 1629 return ret;
1015 break; 1630}
1016 } 1631
1017 name[ARPT_TABLE_MAXNAMELEN-1] = '\0'; 1632static int do_arpt_get_ctl(struct sock *, int, void __user *, int *);
1018 1633
1019 t = try_then_request_module(xt_find_table_lock(NF_ARP, name), 1634static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user,
1020 "arptable_%s", name); 1635 int *len)
1021 if (t && !IS_ERR(t)) { 1636{
1022 struct arpt_getinfo info; 1637 int ret;
1023 struct xt_table_info *private = t->private; 1638
1024 1639 if (!capable(CAP_NET_ADMIN))
1025 info.valid_hooks = t->valid_hooks; 1640 return -EPERM;
1026 memcpy(info.hook_entry, private->hook_entry, 1641
1027 sizeof(info.hook_entry)); 1642 switch (cmd) {
1028 memcpy(info.underflow, private->underflow, 1643 case ARPT_SO_GET_INFO:
1029 sizeof(info.underflow)); 1644 ret = get_info(user, len, 1);
1030 info.num_entries = private->number; 1645 break;
1031 info.size = private->size; 1646 case ARPT_SO_GET_ENTRIES:
1032 strcpy(info.name, name); 1647 ret = compat_get_entries(user, len);
1033 1648 break;
1034 if (copy_to_user(user, &info, *len) != 0) 1649 default:
1035 ret = -EFAULT; 1650 ret = do_arpt_get_ctl(sk, cmd, user, len);
1036 else
1037 ret = 0;
1038 xt_table_unlock(t);
1039 module_put(t->me);
1040 } else
1041 ret = t ? PTR_ERR(t) : -ENOENT;
1042 } 1651 }
1043 break; 1652 return ret;
1653}
1654#endif
1044 1655
1045 case ARPT_SO_GET_ENTRIES: { 1656static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1046 struct arpt_get_entries get; 1657{
1658 int ret;
1047 1659
1048 if (*len < sizeof(get)) { 1660 if (!capable(CAP_NET_ADMIN))
1049 duprintf("get_entries: %u < %Zu\n", *len, sizeof(get)); 1661 return -EPERM;
1050 ret = -EINVAL; 1662
1051 } else if (copy_from_user(&get, user, sizeof(get)) != 0) { 1663 switch (cmd) {
1052 ret = -EFAULT; 1664 case ARPT_SO_SET_REPLACE:
1053 } else if (*len != sizeof(struct arpt_get_entries) + get.size) { 1665 ret = do_replace(user, len);
1054 duprintf("get_entries: %u != %Zu\n", *len, 1666 break;
1055 sizeof(struct arpt_get_entries) + get.size); 1667
1056 ret = -EINVAL; 1668 case ARPT_SO_SET_ADD_COUNTERS:
1057 } else 1669 ret = do_add_counters(user, len, 0);
1058 ret = get_entries(&get, user);
1059 break; 1670 break;
1671
1672 default:
1673 duprintf("do_arpt_set_ctl: unknown request %i\n", cmd);
1674 ret = -EINVAL;
1060 } 1675 }
1061 1676
1677 return ret;
1678}
1679
1680static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1681{
1682 int ret;
1683
1684 if (!capable(CAP_NET_ADMIN))
1685 return -EPERM;
1686
1687 switch (cmd) {
1688 case ARPT_SO_GET_INFO:
1689 ret = get_info(user, len, 0);
1690 break;
1691
1692 case ARPT_SO_GET_ENTRIES:
1693 ret = get_entries(user, len);
1694 break;
1695
1062 case ARPT_SO_GET_REVISION_TARGET: { 1696 case ARPT_SO_GET_REVISION_TARGET: {
1063 struct xt_get_revision rev; 1697 struct xt_get_revision rev;
1064 1698
@@ -1090,7 +1724,7 @@ int arpt_register_table(struct arpt_table *table,
1090{ 1724{
1091 int ret; 1725 int ret;
1092 struct xt_table_info *newinfo; 1726 struct xt_table_info *newinfo;
1093 static struct xt_table_info bootstrap 1727 struct xt_table_info bootstrap
1094 = { 0, 0, 0, { 0 }, { 0 }, { } }; 1728 = { 0, 0, 0, { 0 }, { 0 }, { } };
1095 void *loc_cpu_entry; 1729 void *loc_cpu_entry;
1096 1730
@@ -1144,6 +1778,11 @@ static struct arpt_target arpt_standard_target __read_mostly = {
1144 .name = ARPT_STANDARD_TARGET, 1778 .name = ARPT_STANDARD_TARGET,
1145 .targetsize = sizeof(int), 1779 .targetsize = sizeof(int),
1146 .family = NF_ARP, 1780 .family = NF_ARP,
1781#ifdef CONFIG_COMPAT
1782 .compatsize = sizeof(compat_int_t),
1783 .compat_from_user = compat_standard_from_user,
1784 .compat_to_user = compat_standard_to_user,
1785#endif
1147}; 1786};
1148 1787
1149static struct arpt_target arpt_error_target __read_mostly = { 1788static struct arpt_target arpt_error_target __read_mostly = {
@@ -1158,9 +1797,15 @@ static struct nf_sockopt_ops arpt_sockopts = {
1158 .set_optmin = ARPT_BASE_CTL, 1797 .set_optmin = ARPT_BASE_CTL,
1159 .set_optmax = ARPT_SO_SET_MAX+1, 1798 .set_optmax = ARPT_SO_SET_MAX+1,
1160 .set = do_arpt_set_ctl, 1799 .set = do_arpt_set_ctl,
1800#ifdef CONFIG_COMPAT
1801 .compat_set = compat_do_arpt_set_ctl,
1802#endif
1161 .get_optmin = ARPT_BASE_CTL, 1803 .get_optmin = ARPT_BASE_CTL,
1162 .get_optmax = ARPT_SO_GET_MAX+1, 1804 .get_optmax = ARPT_SO_GET_MAX+1,
1163 .get = do_arpt_get_ctl, 1805 .get = do_arpt_get_ctl,
1806#ifdef CONFIG_COMPAT
1807 .compat_get = compat_do_arpt_get_ctl,
1808#endif
1164 .owner = THIS_MODULE, 1809 .owner = THIS_MODULE,
1165}; 1810};
1166 1811
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 302d3da5f696..7201511d54d2 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -64,7 +64,7 @@ static unsigned int arpt_hook(unsigned int hook,
64 return arpt_do_table(skb, hook, in, out, &packet_filter); 64 return arpt_do_table(skb, hook, in, out, &packet_filter);
65} 65}
66 66
67static struct nf_hook_ops arpt_ops[] = { 67static struct nf_hook_ops arpt_ops[] __read_mostly = {
68 { 68 {
69 .hook = arpt_hook, 69 .hook = arpt_hook,
70 .owner = THIS_MODULE, 70 .owner = THIS_MODULE,
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 14d64a383db1..5109839da222 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -28,19 +28,15 @@
28#include <net/net_namespace.h> 28#include <net/net_namespace.h>
29#include <net/sock.h> 29#include <net/sock.h>
30#include <net/route.h> 30#include <net/route.h>
31#include <net/netfilter/nf_queue.h>
32#include <net/ip.h>
31 33
32#define IPQ_QMAX_DEFAULT 1024 34#define IPQ_QMAX_DEFAULT 1024
33#define IPQ_PROC_FS_NAME "ip_queue" 35#define IPQ_PROC_FS_NAME "ip_queue"
34#define NET_IPQ_QMAX 2088 36#define NET_IPQ_QMAX 2088
35#define NET_IPQ_QMAX_NAME "ip_queue_maxlen" 37#define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
36 38
37struct ipq_queue_entry { 39typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
38 struct list_head list;
39 struct nf_info *info;
40 struct sk_buff *skb;
41};
42
43typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
44 40
45static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; 41static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
46static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; 42static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
@@ -54,76 +50,13 @@ static struct sock *ipqnl __read_mostly;
54static LIST_HEAD(queue_list); 50static LIST_HEAD(queue_list);
55static DEFINE_MUTEX(ipqnl_mutex); 51static DEFINE_MUTEX(ipqnl_mutex);
56 52
57static void
58ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
59{
60 /* TCP input path (and probably other bits) assume to be called
61 * from softirq context, not from syscall, like ipq_issue_verdict is
62 * called. TCP input path deadlocks with locks taken from timer
63 * softirq, e.g. We therefore emulate this by local_bh_disable() */
64
65 local_bh_disable();
66 nf_reinject(entry->skb, entry->info, verdict);
67 local_bh_enable();
68
69 kfree(entry);
70}
71
72static inline void 53static inline void
73__ipq_enqueue_entry(struct ipq_queue_entry *entry) 54__ipq_enqueue_entry(struct nf_queue_entry *entry)
74{ 55{
75 list_add(&entry->list, &queue_list); 56 list_add_tail(&entry->list, &queue_list);
76 queue_total++; 57 queue_total++;
77} 58}
78 59
79/*
80 * Find and return a queued entry matched by cmpfn, or return the last
81 * entry if cmpfn is NULL.
82 */
83static inline struct ipq_queue_entry *
84__ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data)
85{
86 struct list_head *p;
87
88 list_for_each_prev(p, &queue_list) {
89 struct ipq_queue_entry *entry = (struct ipq_queue_entry *)p;
90
91 if (!cmpfn || cmpfn(entry, data))
92 return entry;
93 }
94 return NULL;
95}
96
97static inline void
98__ipq_dequeue_entry(struct ipq_queue_entry *entry)
99{
100 list_del(&entry->list);
101 queue_total--;
102}
103
104static inline struct ipq_queue_entry *
105__ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
106{
107 struct ipq_queue_entry *entry;
108
109 entry = __ipq_find_entry(cmpfn, data);
110 if (entry == NULL)
111 return NULL;
112
113 __ipq_dequeue_entry(entry);
114 return entry;
115}
116
117
118static inline void
119__ipq_flush(int verdict)
120{
121 struct ipq_queue_entry *entry;
122
123 while ((entry = __ipq_find_dequeue_entry(NULL, 0)))
124 ipq_issue_verdict(entry, verdict);
125}
126
127static inline int 60static inline int
128__ipq_set_mode(unsigned char mode, unsigned int range) 61__ipq_set_mode(unsigned char mode, unsigned int range)
129{ 62{
@@ -150,36 +83,64 @@ __ipq_set_mode(unsigned char mode, unsigned int range)
150 return status; 83 return status;
151} 84}
152 85
86static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
87
153static inline void 88static inline void
154__ipq_reset(void) 89__ipq_reset(void)
155{ 90{
156 peer_pid = 0; 91 peer_pid = 0;
157 net_disable_timestamp(); 92 net_disable_timestamp();
158 __ipq_set_mode(IPQ_COPY_NONE, 0); 93 __ipq_set_mode(IPQ_COPY_NONE, 0);
159 __ipq_flush(NF_DROP); 94 __ipq_flush(NULL, 0);
160} 95}
161 96
162static struct ipq_queue_entry * 97static struct nf_queue_entry *
163ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data) 98ipq_find_dequeue_entry(unsigned long id)
164{ 99{
165 struct ipq_queue_entry *entry; 100 struct nf_queue_entry *entry = NULL, *i;
166 101
167 write_lock_bh(&queue_lock); 102 write_lock_bh(&queue_lock);
168 entry = __ipq_find_dequeue_entry(cmpfn, data); 103
104 list_for_each_entry(i, &queue_list, list) {
105 if ((unsigned long)i == id) {
106 entry = i;
107 break;
108 }
109 }
110
111 if (entry) {
112 list_del(&entry->list);
113 queue_total--;
114 }
115
169 write_unlock_bh(&queue_lock); 116 write_unlock_bh(&queue_lock);
170 return entry; 117 return entry;
171} 118}
172 119
173static void 120static void
174ipq_flush(int verdict) 121__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
122{
123 struct nf_queue_entry *entry, *next;
124
125 list_for_each_entry_safe(entry, next, &queue_list, list) {
126 if (!cmpfn || cmpfn(entry, data)) {
127 list_del(&entry->list);
128 queue_total--;
129 nf_reinject(entry, NF_DROP);
130 }
131 }
132}
133
134static void
135ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
175{ 136{
176 write_lock_bh(&queue_lock); 137 write_lock_bh(&queue_lock);
177 __ipq_flush(verdict); 138 __ipq_flush(cmpfn, data);
178 write_unlock_bh(&queue_lock); 139 write_unlock_bh(&queue_lock);
179} 140}
180 141
181static struct sk_buff * 142static struct sk_buff *
182ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) 143ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
183{ 144{
184 sk_buff_data_t old_tail; 145 sk_buff_data_t old_tail;
185 size_t size = 0; 146 size_t size = 0;
@@ -236,20 +197,20 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
236 pmsg->timestamp_sec = tv.tv_sec; 197 pmsg->timestamp_sec = tv.tv_sec;
237 pmsg->timestamp_usec = tv.tv_usec; 198 pmsg->timestamp_usec = tv.tv_usec;
238 pmsg->mark = entry->skb->mark; 199 pmsg->mark = entry->skb->mark;
239 pmsg->hook = entry->info->hook; 200 pmsg->hook = entry->hook;
240 pmsg->hw_protocol = entry->skb->protocol; 201 pmsg->hw_protocol = entry->skb->protocol;
241 202
242 if (entry->info->indev) 203 if (entry->indev)
243 strcpy(pmsg->indev_name, entry->info->indev->name); 204 strcpy(pmsg->indev_name, entry->indev->name);
244 else 205 else
245 pmsg->indev_name[0] = '\0'; 206 pmsg->indev_name[0] = '\0';
246 207
247 if (entry->info->outdev) 208 if (entry->outdev)
248 strcpy(pmsg->outdev_name, entry->info->outdev->name); 209 strcpy(pmsg->outdev_name, entry->outdev->name);
249 else 210 else
250 pmsg->outdev_name[0] = '\0'; 211 pmsg->outdev_name[0] = '\0';
251 212
252 if (entry->info->indev && entry->skb->dev) { 213 if (entry->indev && entry->skb->dev) {
253 pmsg->hw_type = entry->skb->dev->type; 214 pmsg->hw_type = entry->skb->dev->type;
254 pmsg->hw_addrlen = dev_parse_header(entry->skb, 215 pmsg->hw_addrlen = dev_parse_header(entry->skb,
255 pmsg->hw_addr); 216 pmsg->hw_addr);
@@ -271,28 +232,17 @@ nlmsg_failure:
271} 232}
272 233
273static int 234static int
274ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, 235ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
275 unsigned int queuenum, void *data)
276{ 236{
277 int status = -EINVAL; 237 int status = -EINVAL;
278 struct sk_buff *nskb; 238 struct sk_buff *nskb;
279 struct ipq_queue_entry *entry;
280 239
281 if (copy_mode == IPQ_COPY_NONE) 240 if (copy_mode == IPQ_COPY_NONE)
282 return -EAGAIN; 241 return -EAGAIN;
283 242
284 entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
285 if (entry == NULL) {
286 printk(KERN_ERR "ip_queue: OOM in ipq_enqueue_packet()\n");
287 return -ENOMEM;
288 }
289
290 entry->info = info;
291 entry->skb = skb;
292
293 nskb = ipq_build_packet_message(entry, &status); 243 nskb = ipq_build_packet_message(entry, &status);
294 if (nskb == NULL) 244 if (nskb == NULL)
295 goto err_out_free; 245 return status;
296 246
297 write_lock_bh(&queue_lock); 247 write_lock_bh(&queue_lock);
298 248
@@ -326,14 +276,11 @@ err_out_free_nskb:
326 276
327err_out_unlock: 277err_out_unlock:
328 write_unlock_bh(&queue_lock); 278 write_unlock_bh(&queue_lock);
329
330err_out_free:
331 kfree(entry);
332 return status; 279 return status;
333} 280}
334 281
335static int 282static int
336ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) 283ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
337{ 284{
338 int diff; 285 int diff;
339 int err; 286 int err;
@@ -368,21 +315,15 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
368 return 0; 315 return 0;
369} 316}
370 317
371static inline int
372id_cmp(struct ipq_queue_entry *e, unsigned long id)
373{
374 return (id == (unsigned long )e);
375}
376
377static int 318static int
378ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) 319ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
379{ 320{
380 struct ipq_queue_entry *entry; 321 struct nf_queue_entry *entry;
381 322
382 if (vmsg->value > NF_MAX_VERDICT) 323 if (vmsg->value > NF_MAX_VERDICT)
383 return -EINVAL; 324 return -EINVAL;
384 325
385 entry = ipq_find_dequeue_entry(id_cmp, vmsg->id); 326 entry = ipq_find_dequeue_entry(vmsg->id);
386 if (entry == NULL) 327 if (entry == NULL)
387 return -ENOENT; 328 return -ENOENT;
388 else { 329 else {
@@ -392,7 +333,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
392 if (ipq_mangle_ipv4(vmsg, entry) < 0) 333 if (ipq_mangle_ipv4(vmsg, entry) < 0)
393 verdict = NF_DROP; 334 verdict = NF_DROP;
394 335
395 ipq_issue_verdict(entry, verdict); 336 nf_reinject(entry, verdict);
396 return 0; 337 return 0;
397 } 338 }
398} 339}
@@ -437,13 +378,13 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg,
437} 378}
438 379
439static int 380static int
440dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex) 381dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
441{ 382{
442 if (entry->info->indev) 383 if (entry->indev)
443 if (entry->info->indev->ifindex == ifindex) 384 if (entry->indev->ifindex == ifindex)
444 return 1; 385 return 1;
445 if (entry->info->outdev) 386 if (entry->outdev)
446 if (entry->info->outdev->ifindex == ifindex) 387 if (entry->outdev->ifindex == ifindex)
447 return 1; 388 return 1;
448#ifdef CONFIG_BRIDGE_NETFILTER 389#ifdef CONFIG_BRIDGE_NETFILTER
449 if (entry->skb->nf_bridge) { 390 if (entry->skb->nf_bridge) {
@@ -461,10 +402,7 @@ dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex)
461static void 402static void
462ipq_dev_drop(int ifindex) 403ipq_dev_drop(int ifindex)
463{ 404{
464 struct ipq_queue_entry *entry; 405 ipq_flush(dev_cmp, ifindex);
465
466 while ((entry = ipq_find_dequeue_entry(dev_cmp, ifindex)) != NULL)
467 ipq_issue_verdict(entry, NF_DROP);
468} 406}
469 407
470#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) 408#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
@@ -588,26 +526,6 @@ static ctl_table ipq_table[] = {
588 { .ctl_name = 0 } 526 { .ctl_name = 0 }
589}; 527};
590 528
591static ctl_table ipq_dir_table[] = {
592 {
593 .ctl_name = NET_IPV4,
594 .procname = "ipv4",
595 .mode = 0555,
596 .child = ipq_table
597 },
598 { .ctl_name = 0 }
599};
600
601static ctl_table ipq_root_table[] = {
602 {
603 .ctl_name = CTL_NET,
604 .procname = "net",
605 .mode = 0555,
606 .child = ipq_dir_table
607 },
608 { .ctl_name = 0 }
609};
610
611static int ip_queue_show(struct seq_file *m, void *v) 529static int ip_queue_show(struct seq_file *m, void *v)
612{ 530{
613 read_lock_bh(&queue_lock); 531 read_lock_bh(&queue_lock);
@@ -645,7 +563,7 @@ static const struct file_operations ip_queue_proc_fops = {
645 .owner = THIS_MODULE, 563 .owner = THIS_MODULE,
646}; 564};
647 565
648static struct nf_queue_handler nfqh = { 566static const struct nf_queue_handler nfqh = {
649 .name = "ip_queue", 567 .name = "ip_queue",
650 .outfn = &ipq_enqueue_packet, 568 .outfn = &ipq_enqueue_packet,
651}; 569};
@@ -673,7 +591,7 @@ static int __init ip_queue_init(void)
673 } 591 }
674 592
675 register_netdevice_notifier(&ipq_dev_notifier); 593 register_netdevice_notifier(&ipq_dev_notifier);
676 ipq_sysctl_header = register_sysctl_table(ipq_root_table); 594 ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table);
677 595
678 status = nf_register_queue_handler(PF_INET, &nfqh); 596 status = nf_register_queue_handler(PF_INET, &nfqh);
679 if (status < 0) { 597 if (status < 0) {
@@ -687,7 +605,7 @@ cleanup_sysctl:
687 unregister_netdevice_notifier(&ipq_dev_notifier); 605 unregister_netdevice_notifier(&ipq_dev_notifier);
688 proc_net_remove(&init_net, IPQ_PROC_FS_NAME); 606 proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
689cleanup_ipqnl: 607cleanup_ipqnl:
690 sock_release(ipqnl->sk_socket); 608 netlink_kernel_release(ipqnl);
691 mutex_lock(&ipqnl_mutex); 609 mutex_lock(&ipqnl_mutex);
692 mutex_unlock(&ipqnl_mutex); 610 mutex_unlock(&ipqnl_mutex);
693 611
@@ -700,13 +618,13 @@ static void __exit ip_queue_fini(void)
700{ 618{
701 nf_unregister_queue_handlers(&nfqh); 619 nf_unregister_queue_handlers(&nfqh);
702 synchronize_net(); 620 synchronize_net();
703 ipq_flush(NF_DROP); 621 ipq_flush(NULL, 0);
704 622
705 unregister_sysctl_table(ipq_sysctl_header); 623 unregister_sysctl_table(ipq_sysctl_header);
706 unregister_netdevice_notifier(&ipq_dev_notifier); 624 unregister_netdevice_notifier(&ipq_dev_notifier);
707 proc_net_remove(&init_net, IPQ_PROC_FS_NAME); 625 proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
708 626
709 sock_release(ipqnl->sk_socket); 627 netlink_kernel_release(ipqnl);
710 mutex_lock(&ipqnl_mutex); 628 mutex_lock(&ipqnl_mutex);
711 mutex_unlock(&ipqnl_mutex); 629 mutex_unlock(&ipqnl_mutex);
712 630
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index b9b189c26208..982b7f986291 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -26,6 +26,7 @@
26 26
27#include <linux/netfilter/x_tables.h> 27#include <linux/netfilter/x_tables.h>
28#include <linux/netfilter_ipv4/ip_tables.h> 28#include <linux/netfilter_ipv4/ip_tables.h>
29#include <net/netfilter/nf_log.h>
29 30
30MODULE_LICENSE("GPL"); 31MODULE_LICENSE("GPL");
31MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 32MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -74,7 +75,8 @@ do { \
74 Hence the start of any table is given by get_table() below. */ 75 Hence the start of any table is given by get_table() below. */
75 76
76/* Returns whether matches rule or not. */ 77/* Returns whether matches rule or not. */
77static inline int 78/* Performance critical - called for every packet */
79static inline bool
78ip_packet_match(const struct iphdr *ip, 80ip_packet_match(const struct iphdr *ip,
79 const char *indev, 81 const char *indev,
80 const char *outdev, 82 const char *outdev,
@@ -84,7 +86,7 @@ ip_packet_match(const struct iphdr *ip,
84 size_t i; 86 size_t i;
85 unsigned long ret; 87 unsigned long ret;
86 88
87#define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg)) 89#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
88 90
89 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr, 91 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
90 IPT_INV_SRCIP) 92 IPT_INV_SRCIP)
@@ -102,7 +104,7 @@ ip_packet_match(const struct iphdr *ip,
102 NIPQUAD(ipinfo->dmsk.s_addr), 104 NIPQUAD(ipinfo->dmsk.s_addr),
103 NIPQUAD(ipinfo->dst.s_addr), 105 NIPQUAD(ipinfo->dst.s_addr),
104 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : ""); 106 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
105 return 0; 107 return false;
106 } 108 }
107 109
108 /* Look for ifname matches; this should unroll nicely. */ 110 /* Look for ifname matches; this should unroll nicely. */
@@ -116,7 +118,7 @@ ip_packet_match(const struct iphdr *ip,
116 dprintf("VIA in mismatch (%s vs %s).%s\n", 118 dprintf("VIA in mismatch (%s vs %s).%s\n",
117 indev, ipinfo->iniface, 119 indev, ipinfo->iniface,
118 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":""); 120 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
119 return 0; 121 return false;
120 } 122 }
121 123
122 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { 124 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
@@ -129,7 +131,7 @@ ip_packet_match(const struct iphdr *ip,
129 dprintf("VIA out mismatch (%s vs %s).%s\n", 131 dprintf("VIA out mismatch (%s vs %s).%s\n",
130 outdev, ipinfo->outiface, 132 outdev, ipinfo->outiface,
131 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":""); 133 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
132 return 0; 134 return false;
133 } 135 }
134 136
135 /* Check specific protocol */ 137 /* Check specific protocol */
@@ -138,7 +140,7 @@ ip_packet_match(const struct iphdr *ip,
138 dprintf("Packet protocol %hi does not match %hi.%s\n", 140 dprintf("Packet protocol %hi does not match %hi.%s\n",
139 ip->protocol, ipinfo->proto, 141 ip->protocol, ipinfo->proto,
140 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":""); 142 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
141 return 0; 143 return false;
142 } 144 }
143 145
144 /* If we have a fragment rule but the packet is not a fragment 146 /* If we have a fragment rule but the packet is not a fragment
@@ -146,13 +148,13 @@ ip_packet_match(const struct iphdr *ip,
146 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) { 148 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
147 dprintf("Fragment rule but not fragment.%s\n", 149 dprintf("Fragment rule but not fragment.%s\n",
148 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : ""); 150 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
149 return 0; 151 return false;
150 } 152 }
151 153
152 return 1; 154 return true;
153} 155}
154 156
155static inline bool 157static bool
156ip_checkentry(const struct ipt_ip *ip) 158ip_checkentry(const struct ipt_ip *ip)
157{ 159{
158 if (ip->flags & ~IPT_F_MASK) { 160 if (ip->flags & ~IPT_F_MASK) {
@@ -182,8 +184,9 @@ ipt_error(struct sk_buff *skb,
182 return NF_DROP; 184 return NF_DROP;
183} 185}
184 186
185static inline 187/* Performance critical - called for every packet */
186bool do_match(struct ipt_entry_match *m, 188static inline bool
189do_match(struct ipt_entry_match *m,
187 const struct sk_buff *skb, 190 const struct sk_buff *skb,
188 const struct net_device *in, 191 const struct net_device *in,
189 const struct net_device *out, 192 const struct net_device *out,
@@ -198,6 +201,7 @@ bool do_match(struct ipt_entry_match *m,
198 return false; 201 return false;
199} 202}
200 203
204/* Performance critical */
201static inline struct ipt_entry * 205static inline struct ipt_entry *
202get_entry(void *base, unsigned int offset) 206get_entry(void *base, unsigned int offset)
203{ 207{
@@ -205,6 +209,7 @@ get_entry(void *base, unsigned int offset)
205} 209}
206 210
207/* All zeroes == unconditional rule. */ 211/* All zeroes == unconditional rule. */
212/* Mildly perf critical (only if packet tracing is on) */
208static inline int 213static inline int
209unconditional(const struct ipt_ip *ip) 214unconditional(const struct ipt_ip *ip)
210{ 215{
@@ -215,16 +220,17 @@ unconditional(const struct ipt_ip *ip)
215 return 0; 220 return 0;
216 221
217 return 1; 222 return 1;
223#undef FWINV
218} 224}
219 225
220#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 226#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
221 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 227 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
222static const char *hooknames[] = { 228static const char *const hooknames[] = {
223 [NF_IP_PRE_ROUTING] = "PREROUTING", 229 [NF_INET_PRE_ROUTING] = "PREROUTING",
224 [NF_IP_LOCAL_IN] = "INPUT", 230 [NF_INET_LOCAL_IN] = "INPUT",
225 [NF_IP_FORWARD] = "FORWARD", 231 [NF_INET_FORWARD] = "FORWARD",
226 [NF_IP_LOCAL_OUT] = "OUTPUT", 232 [NF_INET_LOCAL_OUT] = "OUTPUT",
227 [NF_IP_POST_ROUTING] = "POSTROUTING", 233 [NF_INET_POST_ROUTING] = "POSTROUTING",
228}; 234};
229 235
230enum nf_ip_trace_comments { 236enum nf_ip_trace_comments {
@@ -233,7 +239,7 @@ enum nf_ip_trace_comments {
233 NF_IP_TRACE_COMMENT_POLICY, 239 NF_IP_TRACE_COMMENT_POLICY,
234}; 240};
235 241
236static const char *comments[] = { 242static const char *const comments[] = {
237 [NF_IP_TRACE_COMMENT_RULE] = "rule", 243 [NF_IP_TRACE_COMMENT_RULE] = "rule",
238 [NF_IP_TRACE_COMMENT_RETURN] = "return", 244 [NF_IP_TRACE_COMMENT_RETURN] = "return",
239 [NF_IP_TRACE_COMMENT_POLICY] = "policy", 245 [NF_IP_TRACE_COMMENT_POLICY] = "policy",
@@ -249,6 +255,7 @@ static struct nf_loginfo trace_loginfo = {
249 }, 255 },
250}; 256};
251 257
258/* Mildly perf critical (only if packet tracing is on) */
252static inline int 259static inline int
253get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e, 260get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
254 char *hookname, char **chainname, 261 char *hookname, char **chainname,
@@ -465,10 +472,9 @@ mark_source_chains(struct xt_table_info *newinfo,
465 472
466 /* No recursion; use packet counter to save back ptrs (reset 473 /* No recursion; use packet counter to save back ptrs (reset
467 to 0 as we leave), and comefrom to save source hook bitmask */ 474 to 0 as we leave), and comefrom to save source hook bitmask */
468 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) { 475 for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
469 unsigned int pos = newinfo->hook_entry[hook]; 476 unsigned int pos = newinfo->hook_entry[hook];
470 struct ipt_entry *e 477 struct ipt_entry *e = (struct ipt_entry *)(entry0 + pos);
471 = (struct ipt_entry *)(entry0 + pos);
472 478
473 if (!(valid_hooks & (1 << hook))) 479 if (!(valid_hooks & (1 << hook)))
474 continue; 480 continue;
@@ -481,13 +487,12 @@ mark_source_chains(struct xt_table_info *newinfo,
481 = (void *)ipt_get_target(e); 487 = (void *)ipt_get_target(e);
482 int visited = e->comefrom & (1 << hook); 488 int visited = e->comefrom & (1 << hook);
483 489
484 if (e->comefrom & (1 << NF_IP_NUMHOOKS)) { 490 if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
485 printk("iptables: loop hook %u pos %u %08X.\n", 491 printk("iptables: loop hook %u pos %u %08X.\n",
486 hook, pos, e->comefrom); 492 hook, pos, e->comefrom);
487 return 0; 493 return 0;
488 } 494 }
489 e->comefrom 495 e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
490 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
491 496
492 /* Unconditional return/END. */ 497 /* Unconditional return/END. */
493 if ((e->target_offset == sizeof(struct ipt_entry) 498 if ((e->target_offset == sizeof(struct ipt_entry)
@@ -507,10 +512,10 @@ mark_source_chains(struct xt_table_info *newinfo,
507 /* Return: backtrack through the last 512 /* Return: backtrack through the last
508 big jump. */ 513 big jump. */
509 do { 514 do {
510 e->comefrom ^= (1<<NF_IP_NUMHOOKS); 515 e->comefrom ^= (1<<NF_INET_NUMHOOKS);
511#ifdef DEBUG_IP_FIREWALL_USER 516#ifdef DEBUG_IP_FIREWALL_USER
512 if (e->comefrom 517 if (e->comefrom
513 & (1 << NF_IP_NUMHOOKS)) { 518 & (1 << NF_INET_NUMHOOKS)) {
514 duprintf("Back unset " 519 duprintf("Back unset "
515 "on hook %u " 520 "on hook %u "
516 "rule %u\n", 521 "rule %u\n",
@@ -567,7 +572,7 @@ mark_source_chains(struct xt_table_info *newinfo,
567 return 1; 572 return 1;
568} 573}
569 574
570static inline int 575static int
571cleanup_match(struct ipt_entry_match *m, unsigned int *i) 576cleanup_match(struct ipt_entry_match *m, unsigned int *i)
572{ 577{
573 if (i && (*i)-- == 0) 578 if (i && (*i)-- == 0)
@@ -579,7 +584,7 @@ cleanup_match(struct ipt_entry_match *m, unsigned int *i)
579 return 0; 584 return 0;
580} 585}
581 586
582static inline int 587static int
583check_entry(struct ipt_entry *e, const char *name) 588check_entry(struct ipt_entry *e, const char *name)
584{ 589{
585 struct ipt_entry_target *t; 590 struct ipt_entry_target *t;
@@ -589,7 +594,8 @@ check_entry(struct ipt_entry *e, const char *name)
589 return -EINVAL; 594 return -EINVAL;
590 } 595 }
591 596
592 if (e->target_offset + sizeof(struct ipt_entry_target) > e->next_offset) 597 if (e->target_offset + sizeof(struct ipt_entry_target) >
598 e->next_offset)
593 return -EINVAL; 599 return -EINVAL;
594 600
595 t = ipt_get_target(e); 601 t = ipt_get_target(e);
@@ -599,9 +605,10 @@ check_entry(struct ipt_entry *e, const char *name)
599 return 0; 605 return 0;
600} 606}
601 607
602static inline int check_match(struct ipt_entry_match *m, const char *name, 608static int
603 const struct ipt_ip *ip, unsigned int hookmask, 609check_match(struct ipt_entry_match *m, const char *name,
604 unsigned int *i) 610 const struct ipt_ip *ip,
611 unsigned int hookmask, unsigned int *i)
605{ 612{
606 struct xt_match *match; 613 struct xt_match *match;
607 int ret; 614 int ret;
@@ -622,18 +629,18 @@ static inline int check_match(struct ipt_entry_match *m, const char *name,
622 return ret; 629 return ret;
623} 630}
624 631
625static inline int 632static int
626find_check_match(struct ipt_entry_match *m, 633find_check_match(struct ipt_entry_match *m,
627 const char *name, 634 const char *name,
628 const struct ipt_ip *ip, 635 const struct ipt_ip *ip,
629 unsigned int hookmask, 636 unsigned int hookmask,
630 unsigned int *i) 637 unsigned int *i)
631{ 638{
632 struct xt_match *match; 639 struct xt_match *match;
633 int ret; 640 int ret;
634 641
635 match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name, 642 match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
636 m->u.user.revision), 643 m->u.user.revision),
637 "ipt_%s", m->u.user.name); 644 "ipt_%s", m->u.user.name);
638 if (IS_ERR(match) || !match) { 645 if (IS_ERR(match) || !match) {
639 duprintf("find_check_match: `%s' not found\n", m->u.user.name); 646 duprintf("find_check_match: `%s' not found\n", m->u.user.name);
@@ -651,7 +658,7 @@ err:
651 return ret; 658 return ret;
652} 659}
653 660
654static inline int check_target(struct ipt_entry *e, const char *name) 661static int check_target(struct ipt_entry *e, const char *name)
655{ 662{
656 struct ipt_entry_target *t; 663 struct ipt_entry_target *t;
657 struct xt_target *target; 664 struct xt_target *target;
@@ -663,8 +670,8 @@ static inline int check_target(struct ipt_entry *e, const char *name)
663 name, e->comefrom, e->ip.proto, 670 name, e->comefrom, e->ip.proto,
664 e->ip.invflags & IPT_INV_PROTO); 671 e->ip.invflags & IPT_INV_PROTO);
665 if (!ret && t->u.kernel.target->checkentry 672 if (!ret && t->u.kernel.target->checkentry
666 && !t->u.kernel.target->checkentry(name, e, target, 673 && !t->u.kernel.target->checkentry(name, e, target, t->data,
667 t->data, e->comefrom)) { 674 e->comefrom)) {
668 duprintf("ip_tables: check failed for `%s'.\n", 675 duprintf("ip_tables: check failed for `%s'.\n",
669 t->u.kernel.target->name); 676 t->u.kernel.target->name);
670 ret = -EINVAL; 677 ret = -EINVAL;
@@ -672,9 +679,9 @@ static inline int check_target(struct ipt_entry *e, const char *name)
672 return ret; 679 return ret;
673} 680}
674 681
675static inline int 682static int
676find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, 683find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
677 unsigned int *i) 684 unsigned int *i)
678{ 685{
679 struct ipt_entry_target *t; 686 struct ipt_entry_target *t;
680 struct xt_target *target; 687 struct xt_target *target;
@@ -687,14 +694,14 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
687 694
688 j = 0; 695 j = 0;
689 ret = IPT_MATCH_ITERATE(e, find_check_match, name, &e->ip, 696 ret = IPT_MATCH_ITERATE(e, find_check_match, name, &e->ip,
690 e->comefrom, &j); 697 e->comefrom, &j);
691 if (ret != 0) 698 if (ret != 0)
692 goto cleanup_matches; 699 goto cleanup_matches;
693 700
694 t = ipt_get_target(e); 701 t = ipt_get_target(e);
695 target = try_then_request_module(xt_find_target(AF_INET, 702 target = try_then_request_module(xt_find_target(AF_INET,
696 t->u.user.name, 703 t->u.user.name,
697 t->u.user.revision), 704 t->u.user.revision),
698 "ipt_%s", t->u.user.name); 705 "ipt_%s", t->u.user.name);
699 if (IS_ERR(target) || !target) { 706 if (IS_ERR(target) || !target) {
700 duprintf("find_check_entry: `%s' not found\n", t->u.user.name); 707 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
@@ -716,7 +723,7 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
716 return ret; 723 return ret;
717} 724}
718 725
719static inline int 726static int
720check_entry_size_and_hooks(struct ipt_entry *e, 727check_entry_size_and_hooks(struct ipt_entry *e,
721 struct xt_table_info *newinfo, 728 struct xt_table_info *newinfo,
722 unsigned char *base, 729 unsigned char *base,
@@ -741,7 +748,7 @@ check_entry_size_and_hooks(struct ipt_entry *e,
741 } 748 }
742 749
743 /* Check hooks & underflows */ 750 /* Check hooks & underflows */
744 for (h = 0; h < NF_IP_NUMHOOKS; h++) { 751 for (h = 0; h < NF_INET_NUMHOOKS; h++) {
745 if ((unsigned char *)e - base == hook_entries[h]) 752 if ((unsigned char *)e - base == hook_entries[h])
746 newinfo->hook_entry[h] = hook_entries[h]; 753 newinfo->hook_entry[h] = hook_entries[h];
747 if ((unsigned char *)e - base == underflows[h]) 754 if ((unsigned char *)e - base == underflows[h])
@@ -759,7 +766,7 @@ check_entry_size_and_hooks(struct ipt_entry *e,
759 return 0; 766 return 0;
760} 767}
761 768
762static inline int 769static int
763cleanup_entry(struct ipt_entry *e, unsigned int *i) 770cleanup_entry(struct ipt_entry *e, unsigned int *i)
764{ 771{
765 struct ipt_entry_target *t; 772 struct ipt_entry_target *t;
@@ -795,7 +802,7 @@ translate_table(const char *name,
795 newinfo->number = number; 802 newinfo->number = number;
796 803
797 /* Init all hooks to impossible value. */ 804 /* Init all hooks to impossible value. */
798 for (i = 0; i < NF_IP_NUMHOOKS; i++) { 805 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
799 newinfo->hook_entry[i] = 0xFFFFFFFF; 806 newinfo->hook_entry[i] = 0xFFFFFFFF;
800 newinfo->underflow[i] = 0xFFFFFFFF; 807 newinfo->underflow[i] = 0xFFFFFFFF;
801 } 808 }
@@ -819,7 +826,7 @@ translate_table(const char *name,
819 } 826 }
820 827
821 /* Check hooks all assigned */ 828 /* Check hooks all assigned */
822 for (i = 0; i < NF_IP_NUMHOOKS; i++) { 829 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
823 /* Only hooks which are valid */ 830 /* Only hooks which are valid */
824 if (!(valid_hooks & (1 << i))) 831 if (!(valid_hooks & (1 << i)))
825 continue; 832 continue;
@@ -915,7 +922,7 @@ get_counters(const struct xt_table_info *t,
915 } 922 }
916} 923}
917 924
918static inline struct xt_counters * alloc_counters(struct xt_table *table) 925static struct xt_counters * alloc_counters(struct xt_table *table)
919{ 926{
920 unsigned int countersize; 927 unsigned int countersize;
921 struct xt_counters *counters; 928 struct xt_counters *counters;
@@ -959,7 +966,6 @@ copy_entries_to_user(unsigned int total_size,
959 * allowed to migrate to another cpu) 966 * allowed to migrate to another cpu)
960 */ 967 */
961 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 968 loc_cpu_entry = private->entries[raw_smp_processor_id()];
962 /* ... then copy entire thing ... */
963 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { 969 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
964 ret = -EFAULT; 970 ret = -EFAULT;
965 goto free_counters; 971 goto free_counters;
@@ -1014,63 +1020,12 @@ copy_entries_to_user(unsigned int total_size,
1014} 1020}
1015 1021
1016#ifdef CONFIG_COMPAT 1022#ifdef CONFIG_COMPAT
1017struct compat_delta {
1018 struct compat_delta *next;
1019 unsigned int offset;
1020 short delta;
1021};
1022
1023static struct compat_delta *compat_offsets = NULL;
1024
1025static int compat_add_offset(unsigned int offset, short delta)
1026{
1027 struct compat_delta *tmp;
1028
1029 tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL);
1030 if (!tmp)
1031 return -ENOMEM;
1032 tmp->offset = offset;
1033 tmp->delta = delta;
1034 if (compat_offsets) {
1035 tmp->next = compat_offsets->next;
1036 compat_offsets->next = tmp;
1037 } else {
1038 compat_offsets = tmp;
1039 tmp->next = NULL;
1040 }
1041 return 0;
1042}
1043
1044static void compat_flush_offsets(void)
1045{
1046 struct compat_delta *tmp, *next;
1047
1048 if (compat_offsets) {
1049 for(tmp = compat_offsets; tmp; tmp = next) {
1050 next = tmp->next;
1051 kfree(tmp);
1052 }
1053 compat_offsets = NULL;
1054 }
1055}
1056
1057static short compat_calc_jump(unsigned int offset)
1058{
1059 struct compat_delta *tmp;
1060 short delta;
1061
1062 for(tmp = compat_offsets, delta = 0; tmp; tmp = tmp->next)
1063 if (tmp->offset < offset)
1064 delta += tmp->delta;
1065 return delta;
1066}
1067
1068static void compat_standard_from_user(void *dst, void *src) 1023static void compat_standard_from_user(void *dst, void *src)
1069{ 1024{
1070 int v = *(compat_int_t *)src; 1025 int v = *(compat_int_t *)src;
1071 1026
1072 if (v > 0) 1027 if (v > 0)
1073 v += compat_calc_jump(v); 1028 v += xt_compat_calc_jump(AF_INET, v);
1074 memcpy(dst, &v, sizeof(v)); 1029 memcpy(dst, &v, sizeof(v));
1075} 1030}
1076 1031
@@ -1079,64 +1034,61 @@ static int compat_standard_to_user(void __user *dst, void *src)
1079 compat_int_t cv = *(int *)src; 1034 compat_int_t cv = *(int *)src;
1080 1035
1081 if (cv > 0) 1036 if (cv > 0)
1082 cv -= compat_calc_jump(cv); 1037 cv -= xt_compat_calc_jump(AF_INET, cv);
1083 return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; 1038 return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
1084} 1039}
1085 1040
1086static inline int 1041static inline int
1087compat_calc_match(struct ipt_entry_match *m, int * size) 1042compat_calc_match(struct ipt_entry_match *m, int *size)
1088{ 1043{
1089 *size += xt_compat_match_offset(m->u.kernel.match); 1044 *size += xt_compat_match_offset(m->u.kernel.match);
1090 return 0; 1045 return 0;
1091} 1046}
1092 1047
1093static int compat_calc_entry(struct ipt_entry *e, struct xt_table_info *info, 1048static int compat_calc_entry(struct ipt_entry *e,
1094 void *base, struct xt_table_info *newinfo) 1049 const struct xt_table_info *info,
1050 void *base, struct xt_table_info *newinfo)
1095{ 1051{
1096 struct ipt_entry_target *t; 1052 struct ipt_entry_target *t;
1097 unsigned int entry_offset; 1053 unsigned int entry_offset;
1098 int off, i, ret; 1054 int off, i, ret;
1099 1055
1100 off = 0; 1056 off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1101 entry_offset = (void *)e - base; 1057 entry_offset = (void *)e - base;
1102 IPT_MATCH_ITERATE(e, compat_calc_match, &off); 1058 IPT_MATCH_ITERATE(e, compat_calc_match, &off);
1103 t = ipt_get_target(e); 1059 t = ipt_get_target(e);
1104 off += xt_compat_target_offset(t->u.kernel.target); 1060 off += xt_compat_target_offset(t->u.kernel.target);
1105 newinfo->size -= off; 1061 newinfo->size -= off;
1106 ret = compat_add_offset(entry_offset, off); 1062 ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1107 if (ret) 1063 if (ret)
1108 return ret; 1064 return ret;
1109 1065
1110 for (i = 0; i< NF_IP_NUMHOOKS; i++) { 1066 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1111 if (info->hook_entry[i] && (e < (struct ipt_entry *) 1067 if (info->hook_entry[i] &&
1112 (base + info->hook_entry[i]))) 1068 (e < (struct ipt_entry *)(base + info->hook_entry[i])))
1113 newinfo->hook_entry[i] -= off; 1069 newinfo->hook_entry[i] -= off;
1114 if (info->underflow[i] && (e < (struct ipt_entry *) 1070 if (info->underflow[i] &&
1115 (base + info->underflow[i]))) 1071 (e < (struct ipt_entry *)(base + info->underflow[i])))
1116 newinfo->underflow[i] -= off; 1072 newinfo->underflow[i] -= off;
1117 } 1073 }
1118 return 0; 1074 return 0;
1119} 1075}
1120 1076
1121static int compat_table_info(struct xt_table_info *info, 1077static int compat_table_info(const struct xt_table_info *info,
1122 struct xt_table_info *newinfo) 1078 struct xt_table_info *newinfo)
1123{ 1079{
1124 void *loc_cpu_entry; 1080 void *loc_cpu_entry;
1125 int i;
1126 1081
1127 if (!newinfo || !info) 1082 if (!newinfo || !info)
1128 return -EINVAL; 1083 return -EINVAL;
1129 1084
1130 memset(newinfo, 0, sizeof(struct xt_table_info)); 1085 /* we dont care about newinfo->entries[] */
1131 newinfo->size = info->size; 1086 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1132 newinfo->number = info->number; 1087 newinfo->initial_entries = 0;
1133 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
1134 newinfo->hook_entry[i] = info->hook_entry[i];
1135 newinfo->underflow[i] = info->underflow[i];
1136 }
1137 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 1088 loc_cpu_entry = info->entries[raw_smp_processor_id()];
1138 return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size, 1089 return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
1139 compat_calc_entry, info, loc_cpu_entry, newinfo); 1090 compat_calc_entry, info, loc_cpu_entry,
1091 newinfo);
1140} 1092}
1141#endif 1093#endif
1142 1094
@@ -1147,8 +1099,8 @@ static int get_info(void __user *user, int *len, int compat)
1147 int ret; 1099 int ret;
1148 1100
1149 if (*len != sizeof(struct ipt_getinfo)) { 1101 if (*len != sizeof(struct ipt_getinfo)) {
1150 duprintf("length %u != %u\n", *len, 1102 duprintf("length %u != %zu\n", *len,
1151 (unsigned int)sizeof(struct ipt_getinfo)); 1103 sizeof(struct ipt_getinfo));
1152 return -EINVAL; 1104 return -EINVAL;
1153 } 1105 }
1154 1106
@@ -1161,7 +1113,7 @@ static int get_info(void __user *user, int *len, int compat)
1161 xt_compat_lock(AF_INET); 1113 xt_compat_lock(AF_INET);
1162#endif 1114#endif
1163 t = try_then_request_module(xt_find_table_lock(AF_INET, name), 1115 t = try_then_request_module(xt_find_table_lock(AF_INET, name),
1164 "iptable_%s", name); 1116 "iptable_%s", name);
1165 if (t && !IS_ERR(t)) { 1117 if (t && !IS_ERR(t)) {
1166 struct ipt_getinfo info; 1118 struct ipt_getinfo info;
1167 struct xt_table_info *private = t->private; 1119 struct xt_table_info *private = t->private;
@@ -1170,15 +1122,15 @@ static int get_info(void __user *user, int *len, int compat)
1170 if (compat) { 1122 if (compat) {
1171 struct xt_table_info tmp; 1123 struct xt_table_info tmp;
1172 ret = compat_table_info(private, &tmp); 1124 ret = compat_table_info(private, &tmp);
1173 compat_flush_offsets(); 1125 xt_compat_flush_offsets(AF_INET);
1174 private = &tmp; 1126 private = &tmp;
1175 } 1127 }
1176#endif 1128#endif
1177 info.valid_hooks = t->valid_hooks; 1129 info.valid_hooks = t->valid_hooks;
1178 memcpy(info.hook_entry, private->hook_entry, 1130 memcpy(info.hook_entry, private->hook_entry,
1179 sizeof(info.hook_entry)); 1131 sizeof(info.hook_entry));
1180 memcpy(info.underflow, private->underflow, 1132 memcpy(info.underflow, private->underflow,
1181 sizeof(info.underflow)); 1133 sizeof(info.underflow));
1182 info.num_entries = private->number; 1134 info.num_entries = private->number;
1183 info.size = private->size; 1135 info.size = private->size;
1184 strcpy(info.name, name); 1136 strcpy(info.name, name);
@@ -1207,31 +1159,27 @@ get_entries(struct ipt_get_entries __user *uptr, int *len)
1207 struct xt_table *t; 1159 struct xt_table *t;
1208 1160
1209 if (*len < sizeof(get)) { 1161 if (*len < sizeof(get)) {
1210 duprintf("get_entries: %u < %d\n", *len, 1162 duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
1211 (unsigned int)sizeof(get));
1212 return -EINVAL; 1163 return -EINVAL;
1213 } 1164 }
1214 if (copy_from_user(&get, uptr, sizeof(get)) != 0) 1165 if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1215 return -EFAULT; 1166 return -EFAULT;
1216 if (*len != sizeof(struct ipt_get_entries) + get.size) { 1167 if (*len != sizeof(struct ipt_get_entries) + get.size) {
1217 duprintf("get_entries: %u != %u\n", *len, 1168 duprintf("get_entries: %u != %zu\n",
1218 (unsigned int)(sizeof(struct ipt_get_entries) + 1169 *len, sizeof(get) + get.size);
1219 get.size));
1220 return -EINVAL; 1170 return -EINVAL;
1221 } 1171 }
1222 1172
1223 t = xt_find_table_lock(AF_INET, get.name); 1173 t = xt_find_table_lock(AF_INET, get.name);
1224 if (t && !IS_ERR(t)) { 1174 if (t && !IS_ERR(t)) {
1225 struct xt_table_info *private = t->private; 1175 struct xt_table_info *private = t->private;
1226 duprintf("t->private->number = %u\n", 1176 duprintf("t->private->number = %u\n", private->number);
1227 private->number);
1228 if (get.size == private->size) 1177 if (get.size == private->size)
1229 ret = copy_entries_to_user(private->size, 1178 ret = copy_entries_to_user(private->size,
1230 t, uptr->entrytable); 1179 t, uptr->entrytable);
1231 else { 1180 else {
1232 duprintf("get_entries: I've got %u not %u!\n", 1181 duprintf("get_entries: I've got %u not %u!\n",
1233 private->size, 1182 private->size, get.size);
1234 get.size);
1235 ret = -EINVAL; 1183 ret = -EINVAL;
1236 } 1184 }
1237 module_put(t->me); 1185 module_put(t->me);
@@ -1244,8 +1192,8 @@ get_entries(struct ipt_get_entries __user *uptr, int *len)
1244 1192
1245static int 1193static int
1246__do_replace(const char *name, unsigned int valid_hooks, 1194__do_replace(const char *name, unsigned int valid_hooks,
1247 struct xt_table_info *newinfo, unsigned int num_counters, 1195 struct xt_table_info *newinfo, unsigned int num_counters,
1248 void __user *counters_ptr) 1196 void __user *counters_ptr)
1249{ 1197{
1250 int ret; 1198 int ret;
1251 struct xt_table *t; 1199 struct xt_table *t;
@@ -1293,7 +1241,8 @@ __do_replace(const char *name, unsigned int valid_hooks,
1293 get_counters(oldinfo, counters); 1241 get_counters(oldinfo, counters);
1294 /* Decrease module usage counts and free resource */ 1242 /* Decrease module usage counts and free resource */
1295 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; 1243 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1296 IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL); 1244 IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
1245 NULL);
1297 xt_free_table_info(oldinfo); 1246 xt_free_table_info(oldinfo);
1298 if (copy_to_user(counters_ptr, counters, 1247 if (copy_to_user(counters_ptr, counters,
1299 sizeof(struct xt_counters) * num_counters) != 0) 1248 sizeof(struct xt_counters) * num_counters) != 0)
@@ -1322,14 +1271,7 @@ do_replace(void __user *user, unsigned int len)
1322 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1271 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1323 return -EFAULT; 1272 return -EFAULT;
1324 1273
1325 /* Hack: Causes ipchains to give correct error msg --RR */
1326 if (len != sizeof(tmp) + tmp.size)
1327 return -ENOPROTOOPT;
1328
1329 /* overflow check */ 1274 /* overflow check */
1330 if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
1331 SMP_CACHE_BYTES)
1332 return -ENOMEM;
1333 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) 1275 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1334 return -ENOMEM; 1276 return -ENOMEM;
1335 1277
@@ -1337,7 +1279,7 @@ do_replace(void __user *user, unsigned int len)
1337 if (!newinfo) 1279 if (!newinfo)
1338 return -ENOMEM; 1280 return -ENOMEM;
1339 1281
1340 /* choose the copy that is our node/cpu */ 1282 /* choose the copy that is on our node/cpu */
1341 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; 1283 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1342 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), 1284 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1343 tmp.size) != 0) { 1285 tmp.size) != 0) {
@@ -1353,15 +1295,14 @@ do_replace(void __user *user, unsigned int len)
1353 1295
1354 duprintf("ip_tables: Translated table\n"); 1296 duprintf("ip_tables: Translated table\n");
1355 1297
1356 ret = __do_replace(tmp.name, tmp.valid_hooks, 1298 ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo,
1357 newinfo, tmp.num_counters, 1299 tmp.num_counters, tmp.counters);
1358 tmp.counters);
1359 if (ret) 1300 if (ret)
1360 goto free_newinfo_untrans; 1301 goto free_newinfo_untrans;
1361 return 0; 1302 return 0;
1362 1303
1363 free_newinfo_untrans: 1304 free_newinfo_untrans:
1364 IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL); 1305 IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1365 free_newinfo: 1306 free_newinfo:
1366 xt_free_table_info(newinfo); 1307 xt_free_table_info(newinfo);
1367 return ret; 1308 return ret;
@@ -1369,7 +1310,7 @@ do_replace(void __user *user, unsigned int len)
1369 1310
1370/* We're lazy, and add to the first CPU; overflow works its fey magic 1311/* We're lazy, and add to the first CPU; overflow works its fey magic
1371 * and everything is OK. */ 1312 * and everything is OK. */
1372static inline int 1313static int
1373add_counter_to_entry(struct ipt_entry *e, 1314add_counter_to_entry(struct ipt_entry *e,
1374 const struct xt_counters addme[], 1315 const struct xt_counters addme[],
1375 unsigned int *i) 1316 unsigned int *i)
@@ -1479,19 +1420,13 @@ struct compat_ipt_replace {
1479 u32 valid_hooks; 1420 u32 valid_hooks;
1480 u32 num_entries; 1421 u32 num_entries;
1481 u32 size; 1422 u32 size;
1482 u32 hook_entry[NF_IP_NUMHOOKS]; 1423 u32 hook_entry[NF_INET_NUMHOOKS];
1483 u32 underflow[NF_IP_NUMHOOKS]; 1424 u32 underflow[NF_INET_NUMHOOKS];
1484 u32 num_counters; 1425 u32 num_counters;
1485 compat_uptr_t counters; /* struct ipt_counters * */ 1426 compat_uptr_t counters; /* struct ipt_counters * */
1486 struct compat_ipt_entry entries[0]; 1427 struct compat_ipt_entry entries[0];
1487}; 1428};
1488 1429
1489static inline int compat_copy_match_to_user(struct ipt_entry_match *m,
1490 void __user **dstptr, compat_uint_t *size)
1491{
1492 return xt_compat_match_to_user(m, dstptr, size);
1493}
1494
1495static int 1430static int
1496compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, 1431compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1497 compat_uint_t *size, struct xt_counters *counters, 1432 compat_uint_t *size, struct xt_counters *counters,
@@ -1513,7 +1448,9 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1513 goto out; 1448 goto out;
1514 1449
1515 *dstptr += sizeof(struct compat_ipt_entry); 1450 *dstptr += sizeof(struct compat_ipt_entry);
1516 ret = IPT_MATCH_ITERATE(e, compat_copy_match_to_user, dstptr, size); 1451 *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1452
1453 ret = IPT_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size);
1517 target_offset = e->target_offset - (origsize - *size); 1454 target_offset = e->target_offset - (origsize - *size);
1518 if (ret) 1455 if (ret)
1519 goto out; 1456 goto out;
@@ -1534,21 +1471,21 @@ out:
1534 return ret; 1471 return ret;
1535} 1472}
1536 1473
1537static inline int 1474static int
1538compat_find_calc_match(struct ipt_entry_match *m, 1475compat_find_calc_match(struct ipt_entry_match *m,
1539 const char *name, 1476 const char *name,
1540 const struct ipt_ip *ip, 1477 const struct ipt_ip *ip,
1541 unsigned int hookmask, 1478 unsigned int hookmask,
1542 int *size, int *i) 1479 int *size, int *i)
1543{ 1480{
1544 struct xt_match *match; 1481 struct xt_match *match;
1545 1482
1546 match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name, 1483 match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
1547 m->u.user.revision), 1484 m->u.user.revision),
1548 "ipt_%s", m->u.user.name); 1485 "ipt_%s", m->u.user.name);
1549 if (IS_ERR(match) || !match) { 1486 if (IS_ERR(match) || !match) {
1550 duprintf("compat_check_calc_match: `%s' not found\n", 1487 duprintf("compat_check_calc_match: `%s' not found\n",
1551 m->u.user.name); 1488 m->u.user.name);
1552 return match ? PTR_ERR(match) : -ENOENT; 1489 return match ? PTR_ERR(match) : -ENOENT;
1553 } 1490 }
1554 m->u.kernel.match = match; 1491 m->u.kernel.match = match;
@@ -1558,7 +1495,7 @@ compat_find_calc_match(struct ipt_entry_match *m,
1558 return 0; 1495 return 0;
1559} 1496}
1560 1497
1561static inline int 1498static int
1562compat_release_match(struct ipt_entry_match *m, unsigned int *i) 1499compat_release_match(struct ipt_entry_match *m, unsigned int *i)
1563{ 1500{
1564 if (i && (*i)-- == 0) 1501 if (i && (*i)-- == 0)
@@ -1568,8 +1505,8 @@ compat_release_match(struct ipt_entry_match *m, unsigned int *i)
1568 return 0; 1505 return 0;
1569} 1506}
1570 1507
1571static inline int 1508static int
1572compat_release_entry(struct ipt_entry *e, unsigned int *i) 1509compat_release_entry(struct compat_ipt_entry *e, unsigned int *i)
1573{ 1510{
1574 struct ipt_entry_target *t; 1511 struct ipt_entry_target *t;
1575 1512
@@ -1577,22 +1514,22 @@ compat_release_entry(struct ipt_entry *e, unsigned int *i)
1577 return 1; 1514 return 1;
1578 1515
1579 /* Cleanup all matches */ 1516 /* Cleanup all matches */
1580 IPT_MATCH_ITERATE(e, compat_release_match, NULL); 1517 COMPAT_IPT_MATCH_ITERATE(e, compat_release_match, NULL);
1581 t = ipt_get_target(e); 1518 t = compat_ipt_get_target(e);
1582 module_put(t->u.kernel.target->me); 1519 module_put(t->u.kernel.target->me);
1583 return 0; 1520 return 0;
1584} 1521}
1585 1522
1586static inline int 1523static int
1587check_compat_entry_size_and_hooks(struct ipt_entry *e, 1524check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1588 struct xt_table_info *newinfo, 1525 struct xt_table_info *newinfo,
1589 unsigned int *size, 1526 unsigned int *size,
1590 unsigned char *base, 1527 unsigned char *base,
1591 unsigned char *limit, 1528 unsigned char *limit,
1592 unsigned int *hook_entries, 1529 unsigned int *hook_entries,
1593 unsigned int *underflows, 1530 unsigned int *underflows,
1594 unsigned int *i, 1531 unsigned int *i,
1595 const char *name) 1532 const char *name)
1596{ 1533{
1597 struct ipt_entry_target *t; 1534 struct ipt_entry_target *t;
1598 struct xt_target *target; 1535 struct xt_target *target;
@@ -1607,32 +1544,33 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
1607 } 1544 }
1608 1545
1609 if (e->next_offset < sizeof(struct compat_ipt_entry) + 1546 if (e->next_offset < sizeof(struct compat_ipt_entry) +
1610 sizeof(struct compat_xt_entry_target)) { 1547 sizeof(struct compat_xt_entry_target)) {
1611 duprintf("checking: element %p size %u\n", 1548 duprintf("checking: element %p size %u\n",
1612 e, e->next_offset); 1549 e, e->next_offset);
1613 return -EINVAL; 1550 return -EINVAL;
1614 } 1551 }
1615 1552
1616 ret = check_entry(e, name); 1553 /* For purposes of check_entry casting the compat entry is fine */
1554 ret = check_entry((struct ipt_entry *)e, name);
1617 if (ret) 1555 if (ret)
1618 return ret; 1556 return ret;
1619 1557
1620 off = 0; 1558 off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1621 entry_offset = (void *)e - (void *)base; 1559 entry_offset = (void *)e - (void *)base;
1622 j = 0; 1560 j = 0;
1623 ret = IPT_MATCH_ITERATE(e, compat_find_calc_match, name, &e->ip, 1561 ret = COMPAT_IPT_MATCH_ITERATE(e, compat_find_calc_match, name,
1624 e->comefrom, &off, &j); 1562 &e->ip, e->comefrom, &off, &j);
1625 if (ret != 0) 1563 if (ret != 0)
1626 goto release_matches; 1564 goto release_matches;
1627 1565
1628 t = ipt_get_target(e); 1566 t = compat_ipt_get_target(e);
1629 target = try_then_request_module(xt_find_target(AF_INET, 1567 target = try_then_request_module(xt_find_target(AF_INET,
1630 t->u.user.name, 1568 t->u.user.name,
1631 t->u.user.revision), 1569 t->u.user.revision),
1632 "ipt_%s", t->u.user.name); 1570 "ipt_%s", t->u.user.name);
1633 if (IS_ERR(target) || !target) { 1571 if (IS_ERR(target) || !target) {
1634 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", 1572 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1635 t->u.user.name); 1573 t->u.user.name);
1636 ret = target ? PTR_ERR(target) : -ENOENT; 1574 ret = target ? PTR_ERR(target) : -ENOENT;
1637 goto release_matches; 1575 goto release_matches;
1638 } 1576 }
@@ -1640,12 +1578,12 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
1640 1578
1641 off += xt_compat_target_offset(target); 1579 off += xt_compat_target_offset(target);
1642 *size += off; 1580 *size += off;
1643 ret = compat_add_offset(entry_offset, off); 1581 ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1644 if (ret) 1582 if (ret)
1645 goto out; 1583 goto out;
1646 1584
1647 /* Check hooks & underflows */ 1585 /* Check hooks & underflows */
1648 for (h = 0; h < NF_IP_NUMHOOKS; h++) { 1586 for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1649 if ((unsigned char *)e - base == hook_entries[h]) 1587 if ((unsigned char *)e - base == hook_entries[h])
1650 newinfo->hook_entry[h] = hook_entries[h]; 1588 newinfo->hook_entry[h] = hook_entries[h];
1651 if ((unsigned char *)e - base == underflows[h]) 1589 if ((unsigned char *)e - base == underflows[h])
@@ -1653,7 +1591,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
1653 } 1591 }
1654 1592
1655 /* Clear counters and comefrom */ 1593 /* Clear counters and comefrom */
1656 e->counters = ((struct ipt_counters) { 0, 0 }); 1594 memset(&e->counters, 0, sizeof(e->counters));
1657 e->comefrom = 0; 1595 e->comefrom = 0;
1658 1596
1659 (*i)++; 1597 (*i)++;
@@ -1666,17 +1604,10 @@ release_matches:
1666 return ret; 1604 return ret;
1667} 1605}
1668 1606
1669static inline int compat_copy_match_from_user(struct ipt_entry_match *m, 1607static int
1670 void **dstptr, compat_uint_t *size, const char *name, 1608compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
1671 const struct ipt_ip *ip, unsigned int hookmask) 1609 unsigned int *size, const char *name,
1672{ 1610 struct xt_table_info *newinfo, unsigned char *base)
1673 xt_compat_match_from_user(m, dstptr, size);
1674 return 0;
1675}
1676
1677static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
1678 unsigned int *size, const char *name,
1679 struct xt_table_info *newinfo, unsigned char *base)
1680{ 1611{
1681 struct ipt_entry_target *t; 1612 struct ipt_entry_target *t;
1682 struct xt_target *target; 1613 struct xt_target *target;
@@ -1688,19 +1619,22 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
1688 origsize = *size; 1619 origsize = *size;
1689 de = (struct ipt_entry *)*dstptr; 1620 de = (struct ipt_entry *)*dstptr;
1690 memcpy(de, e, sizeof(struct ipt_entry)); 1621 memcpy(de, e, sizeof(struct ipt_entry));
1622 memcpy(&de->counters, &e->counters, sizeof(e->counters));
1691 1623
1692 *dstptr += sizeof(struct compat_ipt_entry); 1624 *dstptr += sizeof(struct ipt_entry);
1693 ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size, 1625 *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1694 name, &de->ip, de->comefrom); 1626
1627 ret = COMPAT_IPT_MATCH_ITERATE(e, xt_compat_match_from_user,
1628 dstptr, size);
1695 if (ret) 1629 if (ret)
1696 return ret; 1630 return ret;
1697 de->target_offset = e->target_offset - (origsize - *size); 1631 de->target_offset = e->target_offset - (origsize - *size);
1698 t = ipt_get_target(e); 1632 t = compat_ipt_get_target(e);
1699 target = t->u.kernel.target; 1633 target = t->u.kernel.target;
1700 xt_compat_target_from_user(t, dstptr, size); 1634 xt_compat_target_from_user(t, dstptr, size);
1701 1635
1702 de->next_offset = e->next_offset - (origsize - *size); 1636 de->next_offset = e->next_offset - (origsize - *size);
1703 for (h = 0; h < NF_IP_NUMHOOKS; h++) { 1637 for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1704 if ((unsigned char *)de - base < newinfo->hook_entry[h]) 1638 if ((unsigned char *)de - base < newinfo->hook_entry[h])
1705 newinfo->hook_entry[h] -= origsize - *size; 1639 newinfo->hook_entry[h] -= origsize - *size;
1706 if ((unsigned char *)de - base < newinfo->underflow[h]) 1640 if ((unsigned char *)de - base < newinfo->underflow[h])
@@ -1709,13 +1643,15 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
1709 return ret; 1643 return ret;
1710} 1644}
1711 1645
1712static inline int compat_check_entry(struct ipt_entry *e, const char *name, 1646static int
1713 unsigned int *i) 1647compat_check_entry(struct ipt_entry *e, const char *name,
1648 unsigned int *i)
1714{ 1649{
1715 int j, ret; 1650 int j, ret;
1716 1651
1717 j = 0; 1652 j = 0;
1718 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j); 1653 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip,
1654 e->comefrom, &j);
1719 if (ret) 1655 if (ret)
1720 goto cleanup_matches; 1656 goto cleanup_matches;
1721 1657
@@ -1733,13 +1669,13 @@ static inline int compat_check_entry(struct ipt_entry *e, const char *name,
1733 1669
1734static int 1670static int
1735translate_compat_table(const char *name, 1671translate_compat_table(const char *name,
1736 unsigned int valid_hooks, 1672 unsigned int valid_hooks,
1737 struct xt_table_info **pinfo, 1673 struct xt_table_info **pinfo,
1738 void **pentry0, 1674 void **pentry0,
1739 unsigned int total_size, 1675 unsigned int total_size,
1740 unsigned int number, 1676 unsigned int number,
1741 unsigned int *hook_entries, 1677 unsigned int *hook_entries,
1742 unsigned int *underflows) 1678 unsigned int *underflows)
1743{ 1679{
1744 unsigned int i, j; 1680 unsigned int i, j;
1745 struct xt_table_info *newinfo, *info; 1681 struct xt_table_info *newinfo, *info;
@@ -1753,7 +1689,7 @@ translate_compat_table(const char *name,
1753 info->number = number; 1689 info->number = number;
1754 1690
1755 /* Init all hooks to impossible value. */ 1691 /* Init all hooks to impossible value. */
1756 for (i = 0; i < NF_IP_NUMHOOKS; i++) { 1692 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1757 info->hook_entry[i] = 0xFFFFFFFF; 1693 info->hook_entry[i] = 0xFFFFFFFF;
1758 info->underflow[i] = 0xFFFFFFFF; 1694 info->underflow[i] = 0xFFFFFFFF;
1759 } 1695 }
@@ -1762,11 +1698,11 @@ translate_compat_table(const char *name,
1762 j = 0; 1698 j = 0;
1763 xt_compat_lock(AF_INET); 1699 xt_compat_lock(AF_INET);
1764 /* Walk through entries, checking offsets. */ 1700 /* Walk through entries, checking offsets. */
1765 ret = IPT_ENTRY_ITERATE(entry0, total_size, 1701 ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
1766 check_compat_entry_size_and_hooks, 1702 check_compat_entry_size_and_hooks,
1767 info, &size, entry0, 1703 info, &size, entry0,
1768 entry0 + total_size, 1704 entry0 + total_size,
1769 hook_entries, underflows, &j, name); 1705 hook_entries, underflows, &j, name);
1770 if (ret != 0) 1706 if (ret != 0)
1771 goto out_unlock; 1707 goto out_unlock;
1772 1708
@@ -1778,7 +1714,7 @@ translate_compat_table(const char *name,
1778 } 1714 }
1779 1715
1780 /* Check hooks all assigned */ 1716 /* Check hooks all assigned */
1781 for (i = 0; i < NF_IP_NUMHOOKS; i++) { 1717 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1782 /* Only hooks which are valid */ 1718 /* Only hooks which are valid */
1783 if (!(valid_hooks & (1 << i))) 1719 if (!(valid_hooks & (1 << i)))
1784 continue; 1720 continue;
@@ -1800,17 +1736,17 @@ translate_compat_table(const char *name,
1800 goto out_unlock; 1736 goto out_unlock;
1801 1737
1802 newinfo->number = number; 1738 newinfo->number = number;
1803 for (i = 0; i < NF_IP_NUMHOOKS; i++) { 1739 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1804 newinfo->hook_entry[i] = info->hook_entry[i]; 1740 newinfo->hook_entry[i] = info->hook_entry[i];
1805 newinfo->underflow[i] = info->underflow[i]; 1741 newinfo->underflow[i] = info->underflow[i];
1806 } 1742 }
1807 entry1 = newinfo->entries[raw_smp_processor_id()]; 1743 entry1 = newinfo->entries[raw_smp_processor_id()];
1808 pos = entry1; 1744 pos = entry1;
1809 size = total_size; 1745 size = total_size;
1810 ret = IPT_ENTRY_ITERATE(entry0, total_size, 1746 ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
1811 compat_copy_entry_from_user, &pos, &size, 1747 compat_copy_entry_from_user,
1812 name, newinfo, entry1); 1748 &pos, &size, name, newinfo, entry1);
1813 compat_flush_offsets(); 1749 xt_compat_flush_offsets(AF_INET);
1814 xt_compat_unlock(AF_INET); 1750 xt_compat_unlock(AF_INET);
1815 if (ret) 1751 if (ret)
1816 goto free_newinfo; 1752 goto free_newinfo;
@@ -1821,11 +1757,11 @@ translate_compat_table(const char *name,
1821 1757
1822 i = 0; 1758 i = 0;
1823 ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry, 1759 ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
1824 name, &i); 1760 name, &i);
1825 if (ret) { 1761 if (ret) {
1826 j -= i; 1762 j -= i;
1827 IPT_ENTRY_ITERATE_CONTINUE(entry1, newinfo->size, i, 1763 COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
1828 compat_release_entry, &j); 1764 compat_release_entry, &j);
1829 IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i); 1765 IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
1830 xt_free_table_info(newinfo); 1766 xt_free_table_info(newinfo);
1831 return ret; 1767 return ret;
@@ -1844,10 +1780,10 @@ translate_compat_table(const char *name,
1844free_newinfo: 1780free_newinfo:
1845 xt_free_table_info(newinfo); 1781 xt_free_table_info(newinfo);
1846out: 1782out:
1847 IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j); 1783 COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
1848 return ret; 1784 return ret;
1849out_unlock: 1785out_unlock:
1850 compat_flush_offsets(); 1786 xt_compat_flush_offsets(AF_INET);
1851 xt_compat_unlock(AF_INET); 1787 xt_compat_unlock(AF_INET);
1852 goto out; 1788 goto out;
1853} 1789}
@@ -1863,13 +1799,8 @@ compat_do_replace(void __user *user, unsigned int len)
1863 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1799 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1864 return -EFAULT; 1800 return -EFAULT;
1865 1801
1866 /* Hack: Causes ipchains to give correct error msg --RR */
1867 if (len != sizeof(tmp) + tmp.size)
1868 return -ENOPROTOOPT;
1869
1870 /* overflow check */ 1802 /* overflow check */
1871 if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS - 1803 if (tmp.size >= INT_MAX / num_possible_cpus())
1872 SMP_CACHE_BYTES)
1873 return -ENOMEM; 1804 return -ENOMEM;
1874 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) 1805 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1875 return -ENOMEM; 1806 return -ENOMEM;
@@ -1878,7 +1809,7 @@ compat_do_replace(void __user *user, unsigned int len)
1878 if (!newinfo) 1809 if (!newinfo)
1879 return -ENOMEM; 1810 return -ENOMEM;
1880 1811
1881 /* choose the copy that is our node/cpu */ 1812 /* choose the copy that is on our node/cpu */
1882 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; 1813 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1883 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), 1814 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1884 tmp.size) != 0) { 1815 tmp.size) != 0) {
@@ -1887,22 +1818,22 @@ compat_do_replace(void __user *user, unsigned int len)
1887 } 1818 }
1888 1819
1889 ret = translate_compat_table(tmp.name, tmp.valid_hooks, 1820 ret = translate_compat_table(tmp.name, tmp.valid_hooks,
1890 &newinfo, &loc_cpu_entry, tmp.size, 1821 &newinfo, &loc_cpu_entry, tmp.size,
1891 tmp.num_entries, tmp.hook_entry, tmp.underflow); 1822 tmp.num_entries, tmp.hook_entry,
1823 tmp.underflow);
1892 if (ret != 0) 1824 if (ret != 0)
1893 goto free_newinfo; 1825 goto free_newinfo;
1894 1826
1895 duprintf("compat_do_replace: Translated table\n"); 1827 duprintf("compat_do_replace: Translated table\n");
1896 1828
1897 ret = __do_replace(tmp.name, tmp.valid_hooks, 1829 ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo,
1898 newinfo, tmp.num_counters, 1830 tmp.num_counters, compat_ptr(tmp.counters));
1899 compat_ptr(tmp.counters));
1900 if (ret) 1831 if (ret)
1901 goto free_newinfo_untrans; 1832 goto free_newinfo_untrans;
1902 return 0; 1833 return 0;
1903 1834
1904 free_newinfo_untrans: 1835 free_newinfo_untrans:
1905 IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL); 1836 IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1906 free_newinfo: 1837 free_newinfo:
1907 xt_free_table_info(newinfo); 1838 xt_free_table_info(newinfo);
1908 return ret; 1839 return ret;
@@ -1910,7 +1841,7 @@ compat_do_replace(void __user *user, unsigned int len)
1910 1841
1911static int 1842static int
1912compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, 1843compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
1913 unsigned int len) 1844 unsigned int len)
1914{ 1845{
1915 int ret; 1846 int ret;
1916 1847
@@ -1934,15 +1865,15 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
1934 return ret; 1865 return ret;
1935} 1866}
1936 1867
1937struct compat_ipt_get_entries 1868struct compat_ipt_get_entries {
1938{
1939 char name[IPT_TABLE_MAXNAMELEN]; 1869 char name[IPT_TABLE_MAXNAMELEN];
1940 compat_uint_t size; 1870 compat_uint_t size;
1941 struct compat_ipt_entry entrytable[0]; 1871 struct compat_ipt_entry entrytable[0];
1942}; 1872};
1943 1873
1944static int compat_copy_entries_to_user(unsigned int total_size, 1874static int
1945 struct xt_table *table, void __user *userptr) 1875compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1876 void __user *userptr)
1946{ 1877{
1947 struct xt_counters *counters; 1878 struct xt_counters *counters;
1948 struct xt_table_info *private = table->private; 1879 struct xt_table_info *private = table->private;
@@ -1978,10 +1909,8 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
1978 struct compat_ipt_get_entries get; 1909 struct compat_ipt_get_entries get;
1979 struct xt_table *t; 1910 struct xt_table *t;
1980 1911
1981
1982 if (*len < sizeof(get)) { 1912 if (*len < sizeof(get)) {
1983 duprintf("compat_get_entries: %u < %u\n", 1913 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
1984 *len, (unsigned int)sizeof(get));
1985 return -EINVAL; 1914 return -EINVAL;
1986 } 1915 }
1987 1916
@@ -1989,9 +1918,8 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
1989 return -EFAULT; 1918 return -EFAULT;
1990 1919
1991 if (*len != sizeof(struct compat_ipt_get_entries) + get.size) { 1920 if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
1992 duprintf("compat_get_entries: %u != %u\n", *len, 1921 duprintf("compat_get_entries: %u != %zu\n",
1993 (unsigned int)(sizeof(struct compat_ipt_get_entries) + 1922 *len, sizeof(get) + get.size);
1994 get.size));
1995 return -EINVAL; 1923 return -EINVAL;
1996 } 1924 }
1997 1925
@@ -2000,19 +1928,17 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
2000 if (t && !IS_ERR(t)) { 1928 if (t && !IS_ERR(t)) {
2001 struct xt_table_info *private = t->private; 1929 struct xt_table_info *private = t->private;
2002 struct xt_table_info info; 1930 struct xt_table_info info;
2003 duprintf("t->private->number = %u\n", 1931 duprintf("t->private->number = %u\n", private->number);
2004 private->number);
2005 ret = compat_table_info(private, &info); 1932 ret = compat_table_info(private, &info);
2006 if (!ret && get.size == info.size) { 1933 if (!ret && get.size == info.size) {
2007 ret = compat_copy_entries_to_user(private->size, 1934 ret = compat_copy_entries_to_user(private->size,
2008 t, uptr->entrytable); 1935 t, uptr->entrytable);
2009 } else if (!ret) { 1936 } else if (!ret) {
2010 duprintf("compat_get_entries: I've got %u not %u!\n", 1937 duprintf("compat_get_entries: I've got %u not %u!\n",
2011 private->size, 1938 private->size, get.size);
2012 get.size);
2013 ret = -EINVAL; 1939 ret = -EINVAL;
2014 } 1940 }
2015 compat_flush_offsets(); 1941 xt_compat_flush_offsets(AF_INET);
2016 module_put(t->me); 1942 module_put(t->me);
2017 xt_table_unlock(t); 1943 xt_table_unlock(t);
2018 } else 1944 } else
@@ -2047,7 +1973,7 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2047#endif 1973#endif
2048 1974
2049static int 1975static int
2050do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) 1976do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2051{ 1977{
2052 int ret; 1978 int ret;
2053 1979
@@ -2126,7 +2052,7 @@ int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
2126{ 2052{
2127 int ret; 2053 int ret;
2128 struct xt_table_info *newinfo; 2054 struct xt_table_info *newinfo;
2129 static struct xt_table_info bootstrap 2055 struct xt_table_info bootstrap
2130 = { 0, 0, 0, { 0 }, { 0 }, { } }; 2056 = { 0, 0, 0, { 0 }, { 0 }, { } };
2131 void *loc_cpu_entry; 2057 void *loc_cpu_entry;
2132 2058
@@ -2134,9 +2060,7 @@ int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
2134 if (!newinfo) 2060 if (!newinfo)
2135 return -ENOMEM; 2061 return -ENOMEM;
2136 2062
2137 /* choose the copy on our node/cpu 2063 /* choose the copy on our node/cpu, but dont care about preemption */
2138 * but dont care of preemption
2139 */
2140 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; 2064 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
2141 memcpy(loc_cpu_entry, repl->entries, repl->size); 2065 memcpy(loc_cpu_entry, repl->entries, repl->size);
2142 2066
@@ -2178,7 +2102,8 @@ icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
2178 u_int8_t type, u_int8_t code, 2102 u_int8_t type, u_int8_t code,
2179 bool invert) 2103 bool invert)
2180{ 2104{
2181 return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code)) 2105 return ((test_type == 0xFF) ||
2106 (type == test_type && code >= min_code && code <= max_code))
2182 ^ invert; 2107 ^ invert;
2183} 2108}
2184 2109
@@ -2219,7 +2144,7 @@ icmp_match(const struct sk_buff *skb,
2219/* Called when user tries to insert an entry of this type. */ 2144/* Called when user tries to insert an entry of this type. */
2220static bool 2145static bool
2221icmp_checkentry(const char *tablename, 2146icmp_checkentry(const char *tablename,
2222 const void *info, 2147 const void *entry,
2223 const struct xt_match *match, 2148 const struct xt_match *match,
2224 void *matchinfo, 2149 void *matchinfo,
2225 unsigned int hook_mask) 2150 unsigned int hook_mask)
@@ -2270,9 +2195,9 @@ static struct xt_match icmp_matchstruct __read_mostly = {
2270 .name = "icmp", 2195 .name = "icmp",
2271 .match = icmp_match, 2196 .match = icmp_match,
2272 .matchsize = sizeof(struct ipt_icmp), 2197 .matchsize = sizeof(struct ipt_icmp),
2198 .checkentry = icmp_checkentry,
2273 .proto = IPPROTO_ICMP, 2199 .proto = IPPROTO_ICMP,
2274 .family = AF_INET, 2200 .family = AF_INET,
2275 .checkentry = icmp_checkentry,
2276}; 2201};
2277 2202
2278static int __init ip_tables_init(void) 2203static int __init ip_tables_init(void)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 2f544dac72df..1b31f7d14d46 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -32,7 +32,7 @@
32 32
33MODULE_LICENSE("GPL"); 33MODULE_LICENSE("GPL");
34MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 34MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
35MODULE_DESCRIPTION("iptables target for CLUSTERIP"); 35MODULE_DESCRIPTION("Xtables: CLUSTERIP target");
36 36
37struct clusterip_config { 37struct clusterip_config {
38 struct list_head list; /* list of all configs */ 38 struct list_head list; /* list of all configs */
@@ -109,11 +109,9 @@ clusterip_config_entry_put(struct clusterip_config *c)
109static struct clusterip_config * 109static struct clusterip_config *
110__clusterip_config_find(__be32 clusterip) 110__clusterip_config_find(__be32 clusterip)
111{ 111{
112 struct list_head *pos; 112 struct clusterip_config *c;
113 113
114 list_for_each(pos, &clusterip_configs) { 114 list_for_each_entry(c, &clusterip_configs, list) {
115 struct clusterip_config *c = list_entry(pos,
116 struct clusterip_config, list);
117 if (c->clusterip == clusterip) 115 if (c->clusterip == clusterip)
118 return c; 116 return c;
119 } 117 }
@@ -275,7 +273,7 @@ clusterip_hashfn(const struct sk_buff *skb,
275 } 273 }
276 274
277 /* node numbers are 1..n, not 0..n */ 275 /* node numbers are 1..n, not 0..n */
278 return (hashval % config->num_total_nodes) + 1; 276 return (((u64)hashval * config->num_total_nodes) >> 32) + 1;
279} 277}
280 278
281static inline int 279static inline int
@@ -289,12 +287,9 @@ clusterip_responsible(const struct clusterip_config *config, u_int32_t hash)
289 ***********************************************************************/ 287 ***********************************************************************/
290 288
291static unsigned int 289static unsigned int
292target(struct sk_buff *skb, 290clusterip_tg(struct sk_buff *skb, const struct net_device *in,
293 const struct net_device *in, 291 const struct net_device *out, unsigned int hooknum,
294 const struct net_device *out, 292 const struct xt_target *target, const void *targinfo)
295 unsigned int hooknum,
296 const struct xt_target *target,
297 const void *targinfo)
298{ 293{
299 const struct ipt_clusterip_tgt_info *cipinfo = targinfo; 294 const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
300 struct nf_conn *ct; 295 struct nf_conn *ct;
@@ -361,11 +356,9 @@ target(struct sk_buff *skb,
361} 356}
362 357
363static bool 358static bool
364checkentry(const char *tablename, 359clusterip_tg_check(const char *tablename, const void *e_void,
365 const void *e_void, 360 const struct xt_target *target, void *targinfo,
366 const struct xt_target *target, 361 unsigned int hook_mask)
367 void *targinfo,
368 unsigned int hook_mask)
369{ 362{
370 struct ipt_clusterip_tgt_info *cipinfo = targinfo; 363 struct ipt_clusterip_tgt_info *cipinfo = targinfo;
371 const struct ipt_entry *e = e_void; 364 const struct ipt_entry *e = e_void;
@@ -421,7 +414,7 @@ checkentry(const char *tablename,
421 414
422 if (nf_ct_l3proto_try_module_get(target->family) < 0) { 415 if (nf_ct_l3proto_try_module_get(target->family) < 0) {
423 printk(KERN_WARNING "can't load conntrack support for " 416 printk(KERN_WARNING "can't load conntrack support for "
424 "proto=%d\n", target->family); 417 "proto=%u\n", target->family);
425 return false; 418 return false;
426 } 419 }
427 420
@@ -429,7 +422,7 @@ checkentry(const char *tablename,
429} 422}
430 423
431/* drop reference count of cluster config when rule is deleted */ 424/* drop reference count of cluster config when rule is deleted */
432static void destroy(const struct xt_target *target, void *targinfo) 425static void clusterip_tg_destroy(const struct xt_target *target, void *targinfo)
433{ 426{
434 struct ipt_clusterip_tgt_info *cipinfo = targinfo; 427 struct ipt_clusterip_tgt_info *cipinfo = targinfo;
435 428
@@ -456,12 +449,12 @@ struct compat_ipt_clusterip_tgt_info
456}; 449};
457#endif /* CONFIG_COMPAT */ 450#endif /* CONFIG_COMPAT */
458 451
459static struct xt_target clusterip_tgt __read_mostly = { 452static struct xt_target clusterip_tg_reg __read_mostly = {
460 .name = "CLUSTERIP", 453 .name = "CLUSTERIP",
461 .family = AF_INET, 454 .family = AF_INET,
462 .target = target, 455 .target = clusterip_tg,
463 .checkentry = checkentry, 456 .checkentry = clusterip_tg_check,
464 .destroy = destroy, 457 .destroy = clusterip_tg_destroy,
465 .targetsize = sizeof(struct ipt_clusterip_tgt_info), 458 .targetsize = sizeof(struct ipt_clusterip_tgt_info),
466#ifdef CONFIG_COMPAT 459#ifdef CONFIG_COMPAT
467 .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info), 460 .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info),
@@ -558,7 +551,7 @@ arp_mangle(unsigned int hook,
558 return NF_ACCEPT; 551 return NF_ACCEPT;
559} 552}
560 553
561static struct nf_hook_ops cip_arp_ops = { 554static struct nf_hook_ops cip_arp_ops __read_mostly = {
562 .hook = arp_mangle, 555 .hook = arp_mangle,
563 .pf = NF_ARP, 556 .pf = NF_ARP,
564 .hooknum = NF_ARP_OUT, 557 .hooknum = NF_ARP_OUT,
@@ -714,11 +707,11 @@ static const struct file_operations clusterip_proc_fops = {
714 707
715#endif /* CONFIG_PROC_FS */ 708#endif /* CONFIG_PROC_FS */
716 709
717static int __init ipt_clusterip_init(void) 710static int __init clusterip_tg_init(void)
718{ 711{
719 int ret; 712 int ret;
720 713
721 ret = xt_register_target(&clusterip_tgt); 714 ret = xt_register_target(&clusterip_tg_reg);
722 if (ret < 0) 715 if (ret < 0)
723 return ret; 716 return ret;
724 717
@@ -744,11 +737,11 @@ cleanup_hook:
744 nf_unregister_hook(&cip_arp_ops); 737 nf_unregister_hook(&cip_arp_ops);
745#endif /* CONFIG_PROC_FS */ 738#endif /* CONFIG_PROC_FS */
746cleanup_target: 739cleanup_target:
747 xt_unregister_target(&clusterip_tgt); 740 xt_unregister_target(&clusterip_tg_reg);
748 return ret; 741 return ret;
749} 742}
750 743
751static void __exit ipt_clusterip_fini(void) 744static void __exit clusterip_tg_exit(void)
752{ 745{
753 printk(KERN_NOTICE "ClusterIP Version %s unloading\n", 746 printk(KERN_NOTICE "ClusterIP Version %s unloading\n",
754 CLUSTERIP_VERSION); 747 CLUSTERIP_VERSION);
@@ -756,8 +749,8 @@ static void __exit ipt_clusterip_fini(void)
756 remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent); 749 remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent);
757#endif 750#endif
758 nf_unregister_hook(&cip_arp_ops); 751 nf_unregister_hook(&cip_arp_ops);
759 xt_unregister_target(&clusterip_tgt); 752 xt_unregister_target(&clusterip_tg_reg);
760} 753}
761 754
762module_init(ipt_clusterip_init); 755module_init(clusterip_tg_init);
763module_exit(ipt_clusterip_fini); 756module_exit(clusterip_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index add110060a22..21395bc2b27f 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -21,7 +21,7 @@
21 21
22MODULE_LICENSE("GPL"); 22MODULE_LICENSE("GPL");
23MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 23MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
24MODULE_DESCRIPTION("iptables ECN modification module"); 24MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag modification");
25 25
26/* set ECT codepoint from IP header. 26/* set ECT codepoint from IP header.
27 * return false if there was an error. */ 27 * return false if there was an error. */
@@ -38,7 +38,7 @@ set_ect_ip(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
38 oldtos = iph->tos; 38 oldtos = iph->tos;
39 iph->tos &= ~IPT_ECN_IP_MASK; 39 iph->tos &= ~IPT_ECN_IP_MASK;
40 iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); 40 iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
41 nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos)); 41 csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
42 } 42 }
43 return true; 43 return true;
44} 44}
@@ -71,18 +71,15 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
71 if (einfo->operation & IPT_ECN_OP_SET_CWR) 71 if (einfo->operation & IPT_ECN_OP_SET_CWR)
72 tcph->cwr = einfo->proto.tcp.cwr; 72 tcph->cwr = einfo->proto.tcp.cwr;
73 73
74 nf_proto_csum_replace2(&tcph->check, skb, 74 inet_proto_csum_replace2(&tcph->check, skb,
75 oldval, ((__be16 *)tcph)[6], 0); 75 oldval, ((__be16 *)tcph)[6], 0);
76 return true; 76 return true;
77} 77}
78 78
79static unsigned int 79static unsigned int
80target(struct sk_buff *skb, 80ecn_tg(struct sk_buff *skb, const struct net_device *in,
81 const struct net_device *in, 81 const struct net_device *out, unsigned int hooknum,
82 const struct net_device *out, 82 const struct xt_target *target, const void *targinfo)
83 unsigned int hooknum,
84 const struct xt_target *target,
85 const void *targinfo)
86{ 83{
87 const struct ipt_ECN_info *einfo = targinfo; 84 const struct ipt_ECN_info *einfo = targinfo;
88 85
@@ -99,11 +96,9 @@ target(struct sk_buff *skb,
99} 96}
100 97
101static bool 98static bool
102checkentry(const char *tablename, 99ecn_tg_check(const char *tablename, const void *e_void,
103 const void *e_void, 100 const struct xt_target *target, void *targinfo,
104 const struct xt_target *target, 101 unsigned int hook_mask)
105 void *targinfo,
106 unsigned int hook_mask)
107{ 102{
108 const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo; 103 const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo;
109 const struct ipt_entry *e = e_void; 104 const struct ipt_entry *e = e_void;
@@ -127,25 +122,25 @@ checkentry(const char *tablename,
127 return true; 122 return true;
128} 123}
129 124
130static struct xt_target ipt_ecn_reg __read_mostly = { 125static struct xt_target ecn_tg_reg __read_mostly = {
131 .name = "ECN", 126 .name = "ECN",
132 .family = AF_INET, 127 .family = AF_INET,
133 .target = target, 128 .target = ecn_tg,
134 .targetsize = sizeof(struct ipt_ECN_info), 129 .targetsize = sizeof(struct ipt_ECN_info),
135 .table = "mangle", 130 .table = "mangle",
136 .checkentry = checkentry, 131 .checkentry = ecn_tg_check,
137 .me = THIS_MODULE, 132 .me = THIS_MODULE,
138}; 133};
139 134
140static int __init ipt_ecn_init(void) 135static int __init ecn_tg_init(void)
141{ 136{
142 return xt_register_target(&ipt_ecn_reg); 137 return xt_register_target(&ecn_tg_reg);
143} 138}
144 139
145static void __exit ipt_ecn_fini(void) 140static void __exit ecn_tg_exit(void)
146{ 141{
147 xt_unregister_target(&ipt_ecn_reg); 142 xt_unregister_target(&ecn_tg_reg);
148} 143}
149 144
150module_init(ipt_ecn_init); 145module_init(ecn_tg_init);
151module_exit(ipt_ecn_fini); 146module_exit(ecn_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 4b5e8216a4e7..b38d7850f506 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -22,10 +22,11 @@
22#include <linux/netfilter.h> 22#include <linux/netfilter.h>
23#include <linux/netfilter/x_tables.h> 23#include <linux/netfilter/x_tables.h>
24#include <linux/netfilter_ipv4/ipt_LOG.h> 24#include <linux/netfilter_ipv4/ipt_LOG.h>
25#include <net/netfilter/nf_log.h>
25 26
26MODULE_LICENSE("GPL"); 27MODULE_LICENSE("GPL");
27MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 28MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
28MODULE_DESCRIPTION("iptables syslog logging module"); 29MODULE_DESCRIPTION("Xtables: IPv4 packet logging to syslog");
29 30
30/* Use lock to serialize, so printks don't overlap */ 31/* Use lock to serialize, so printks don't overlap */
31static DEFINE_SPINLOCK(log_lock); 32static DEFINE_SPINLOCK(log_lock);
@@ -337,7 +338,9 @@ static void dump_packet(const struct nf_loginfo *info,
337 if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) { 338 if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
338 read_lock_bh(&skb->sk->sk_callback_lock); 339 read_lock_bh(&skb->sk->sk_callback_lock);
339 if (skb->sk->sk_socket && skb->sk->sk_socket->file) 340 if (skb->sk->sk_socket && skb->sk->sk_socket->file)
340 printk("UID=%u ", skb->sk->sk_socket->file->f_uid); 341 printk("UID=%u GID=%u",
342 skb->sk->sk_socket->file->f_uid,
343 skb->sk->sk_socket->file->f_gid);
341 read_unlock_bh(&skb->sk->sk_callback_lock); 344 read_unlock_bh(&skb->sk->sk_callback_lock);
342 } 345 }
343 346
@@ -418,12 +421,9 @@ ipt_log_packet(unsigned int pf,
418} 421}
419 422
420static unsigned int 423static unsigned int
421ipt_log_target(struct sk_buff *skb, 424log_tg(struct sk_buff *skb, const struct net_device *in,
422 const struct net_device *in, 425 const struct net_device *out, unsigned int hooknum,
423 const struct net_device *out, 426 const struct xt_target *target, const void *targinfo)
424 unsigned int hooknum,
425 const struct xt_target *target,
426 const void *targinfo)
427{ 427{
428 const struct ipt_log_info *loginfo = targinfo; 428 const struct ipt_log_info *loginfo = targinfo;
429 struct nf_loginfo li; 429 struct nf_loginfo li;
@@ -437,11 +437,10 @@ ipt_log_target(struct sk_buff *skb,
437 return XT_CONTINUE; 437 return XT_CONTINUE;
438} 438}
439 439
440static bool ipt_log_checkentry(const char *tablename, 440static bool
441 const void *e, 441log_tg_check(const char *tablename, const void *e,
442 const struct xt_target *target, 442 const struct xt_target *target, void *targinfo,
443 void *targinfo, 443 unsigned int hook_mask)
444 unsigned int hook_mask)
445{ 444{
446 const struct ipt_log_info *loginfo = targinfo; 445 const struct ipt_log_info *loginfo = targinfo;
447 446
@@ -457,37 +456,37 @@ static bool ipt_log_checkentry(const char *tablename,
457 return true; 456 return true;
458} 457}
459 458
460static struct xt_target ipt_log_reg __read_mostly = { 459static struct xt_target log_tg_reg __read_mostly = {
461 .name = "LOG", 460 .name = "LOG",
462 .family = AF_INET, 461 .family = AF_INET,
463 .target = ipt_log_target, 462 .target = log_tg,
464 .targetsize = sizeof(struct ipt_log_info), 463 .targetsize = sizeof(struct ipt_log_info),
465 .checkentry = ipt_log_checkentry, 464 .checkentry = log_tg_check,
466 .me = THIS_MODULE, 465 .me = THIS_MODULE,
467}; 466};
468 467
469static struct nf_logger ipt_log_logger ={ 468static const struct nf_logger ipt_log_logger ={
470 .name = "ipt_LOG", 469 .name = "ipt_LOG",
471 .logfn = &ipt_log_packet, 470 .logfn = &ipt_log_packet,
472 .me = THIS_MODULE, 471 .me = THIS_MODULE,
473}; 472};
474 473
475static int __init ipt_log_init(void) 474static int __init log_tg_init(void)
476{ 475{
477 int ret; 476 int ret;
478 477
479 ret = xt_register_target(&ipt_log_reg); 478 ret = xt_register_target(&log_tg_reg);
480 if (ret < 0) 479 if (ret < 0)
481 return ret; 480 return ret;
482 nf_log_register(PF_INET, &ipt_log_logger); 481 nf_log_register(PF_INET, &ipt_log_logger);
483 return 0; 482 return 0;
484} 483}
485 484
486static void __exit ipt_log_fini(void) 485static void __exit log_tg_exit(void)
487{ 486{
488 nf_log_unregister(&ipt_log_logger); 487 nf_log_unregister(&ipt_log_logger);
489 xt_unregister_target(&ipt_log_reg); 488 xt_unregister_target(&log_tg_reg);
490} 489}
491 490
492module_init(ipt_log_init); 491module_init(log_tg_init);
493module_exit(ipt_log_fini); 492module_exit(log_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 44b516e7cb79..d80fee8327e4 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -25,18 +25,16 @@
25 25
26MODULE_LICENSE("GPL"); 26MODULE_LICENSE("GPL");
27MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 27MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
28MODULE_DESCRIPTION("iptables MASQUERADE target module"); 28MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
29 29
30/* Lock protects masq region inside conntrack */ 30/* Lock protects masq region inside conntrack */
31static DEFINE_RWLOCK(masq_lock); 31static DEFINE_RWLOCK(masq_lock);
32 32
33/* FIXME: Multiple targets. --RR */ 33/* FIXME: Multiple targets. --RR */
34static bool 34static bool
35masquerade_check(const char *tablename, 35masquerade_tg_check(const char *tablename, const void *e,
36 const void *e, 36 const struct xt_target *target, void *targinfo,
37 const struct xt_target *target, 37 unsigned int hook_mask)
38 void *targinfo,
39 unsigned int hook_mask)
40{ 38{
41 const struct nf_nat_multi_range_compat *mr = targinfo; 39 const struct nf_nat_multi_range_compat *mr = targinfo;
42 40
@@ -52,12 +50,9 @@ masquerade_check(const char *tablename,
52} 50}
53 51
54static unsigned int 52static unsigned int
55masquerade_target(struct sk_buff *skb, 53masquerade_tg(struct sk_buff *skb, const struct net_device *in,
56 const struct net_device *in, 54 const struct net_device *out, unsigned int hooknum,
57 const struct net_device *out, 55 const struct xt_target *target, const void *targinfo)
58 unsigned int hooknum,
59 const struct xt_target *target,
60 const void *targinfo)
61{ 56{
62 struct nf_conn *ct; 57 struct nf_conn *ct;
63 struct nf_conn_nat *nat; 58 struct nf_conn_nat *nat;
@@ -67,7 +62,7 @@ masquerade_target(struct sk_buff *skb,
67 const struct rtable *rt; 62 const struct rtable *rt;
68 __be32 newsrc; 63 __be32 newsrc;
69 64
70 NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING); 65 NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING);
71 66
72 ct = nf_ct_get(skb, &ctinfo); 67 ct = nf_ct_get(skb, &ctinfo);
73 nat = nfct_nat(ct); 68 nat = nfct_nat(ct);
@@ -100,7 +95,7 @@ masquerade_target(struct sk_buff *skb,
100 mr->range[0].min, mr->range[0].max }); 95 mr->range[0].min, mr->range[0].max });
101 96
102 /* Hand modified range to generic setup. */ 97 /* Hand modified range to generic setup. */
103 return nf_nat_setup_info(ct, &newrange, hooknum); 98 return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_SRC);
104} 99}
105 100
106static int 101static int
@@ -166,22 +161,22 @@ static struct notifier_block masq_inet_notifier = {
166 .notifier_call = masq_inet_event, 161 .notifier_call = masq_inet_event,
167}; 162};
168 163
169static struct xt_target masquerade __read_mostly = { 164static struct xt_target masquerade_tg_reg __read_mostly = {
170 .name = "MASQUERADE", 165 .name = "MASQUERADE",
171 .family = AF_INET, 166 .family = AF_INET,
172 .target = masquerade_target, 167 .target = masquerade_tg,
173 .targetsize = sizeof(struct nf_nat_multi_range_compat), 168 .targetsize = sizeof(struct nf_nat_multi_range_compat),
174 .table = "nat", 169 .table = "nat",
175 .hooks = 1 << NF_IP_POST_ROUTING, 170 .hooks = 1 << NF_INET_POST_ROUTING,
176 .checkentry = masquerade_check, 171 .checkentry = masquerade_tg_check,
177 .me = THIS_MODULE, 172 .me = THIS_MODULE,
178}; 173};
179 174
180static int __init ipt_masquerade_init(void) 175static int __init masquerade_tg_init(void)
181{ 176{
182 int ret; 177 int ret;
183 178
184 ret = xt_register_target(&masquerade); 179 ret = xt_register_target(&masquerade_tg_reg);
185 180
186 if (ret == 0) { 181 if (ret == 0) {
187 /* Register for device down reports */ 182 /* Register for device down reports */
@@ -193,12 +188,12 @@ static int __init ipt_masquerade_init(void)
193 return ret; 188 return ret;
194} 189}
195 190
196static void __exit ipt_masquerade_fini(void) 191static void __exit masquerade_tg_exit(void)
197{ 192{
198 xt_unregister_target(&masquerade); 193 xt_unregister_target(&masquerade_tg_reg);
199 unregister_netdevice_notifier(&masq_dev_notifier); 194 unregister_netdevice_notifier(&masq_dev_notifier);
200 unregister_inetaddr_notifier(&masq_inet_notifier); 195 unregister_inetaddr_notifier(&masq_inet_notifier);
201} 196}
202 197
203module_init(ipt_masquerade_init); 198module_init(masquerade_tg_init);
204module_exit(ipt_masquerade_fini); 199module_exit(masquerade_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index f8699291e33d..6739abfd1521 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -20,14 +20,12 @@
20 20
21MODULE_LICENSE("GPL"); 21MODULE_LICENSE("GPL");
22MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>"); 22MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>");
23MODULE_DESCRIPTION("iptables 1:1 NAT mapping of IP networks target"); 23MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets");
24 24
25static bool 25static bool
26check(const char *tablename, 26netmap_tg_check(const char *tablename, const void *e,
27 const void *e, 27 const struct xt_target *target, void *targinfo,
28 const struct xt_target *target, 28 unsigned int hook_mask)
29 void *targinfo,
30 unsigned int hook_mask)
31{ 29{
32 const struct nf_nat_multi_range_compat *mr = targinfo; 30 const struct nf_nat_multi_range_compat *mr = targinfo;
33 31
@@ -43,12 +41,9 @@ check(const char *tablename,
43} 41}
44 42
45static unsigned int 43static unsigned int
46target(struct sk_buff *skb, 44netmap_tg(struct sk_buff *skb, const struct net_device *in,
47 const struct net_device *in, 45 const struct net_device *out, unsigned int hooknum,
48 const struct net_device *out, 46 const struct xt_target *target, const void *targinfo)
49 unsigned int hooknum,
50 const struct xt_target *target,
51 const void *targinfo)
52{ 47{
53 struct nf_conn *ct; 48 struct nf_conn *ct;
54 enum ip_conntrack_info ctinfo; 49 enum ip_conntrack_info ctinfo;
@@ -56,14 +51,14 @@ target(struct sk_buff *skb,
56 const struct nf_nat_multi_range_compat *mr = targinfo; 51 const struct nf_nat_multi_range_compat *mr = targinfo;
57 struct nf_nat_range newrange; 52 struct nf_nat_range newrange;
58 53
59 NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING 54 NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING
60 || hooknum == NF_IP_POST_ROUTING 55 || hooknum == NF_INET_POST_ROUTING
61 || hooknum == NF_IP_LOCAL_OUT); 56 || hooknum == NF_INET_LOCAL_OUT);
62 ct = nf_ct_get(skb, &ctinfo); 57 ct = nf_ct_get(skb, &ctinfo);
63 58
64 netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); 59 netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
65 60
66 if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT) 61 if (hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_LOCAL_OUT)
67 new_ip = ip_hdr(skb)->daddr & ~netmask; 62 new_ip = ip_hdr(skb)->daddr & ~netmask;
68 else 63 else
69 new_ip = ip_hdr(skb)->saddr & ~netmask; 64 new_ip = ip_hdr(skb)->saddr & ~netmask;
@@ -75,30 +70,31 @@ target(struct sk_buff *skb,
75 mr->range[0].min, mr->range[0].max }); 70 mr->range[0].min, mr->range[0].max });
76 71
77 /* Hand modified range to generic setup. */ 72 /* Hand modified range to generic setup. */
78 return nf_nat_setup_info(ct, &newrange, hooknum); 73 return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(hooknum));
79} 74}
80 75
81static struct xt_target target_module __read_mostly = { 76static struct xt_target netmap_tg_reg __read_mostly = {
82 .name = "NETMAP", 77 .name = "NETMAP",
83 .family = AF_INET, 78 .family = AF_INET,
84 .target = target, 79 .target = netmap_tg,
85 .targetsize = sizeof(struct nf_nat_multi_range_compat), 80 .targetsize = sizeof(struct nf_nat_multi_range_compat),
86 .table = "nat", 81 .table = "nat",
87 .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_POST_ROUTING) | 82 .hooks = (1 << NF_INET_PRE_ROUTING) |
88 (1 << NF_IP_LOCAL_OUT), 83 (1 << NF_INET_POST_ROUTING) |
89 .checkentry = check, 84 (1 << NF_INET_LOCAL_OUT),
85 .checkentry = netmap_tg_check,
90 .me = THIS_MODULE 86 .me = THIS_MODULE
91}; 87};
92 88
93static int __init ipt_netmap_init(void) 89static int __init netmap_tg_init(void)
94{ 90{
95 return xt_register_target(&target_module); 91 return xt_register_target(&netmap_tg_reg);
96} 92}
97 93
98static void __exit ipt_netmap_fini(void) 94static void __exit netmap_tg_exit(void)
99{ 95{
100 xt_unregister_target(&target_module); 96 xt_unregister_target(&netmap_tg_reg);
101} 97}
102 98
103module_init(ipt_netmap_init); 99module_init(netmap_tg_init);
104module_exit(ipt_netmap_fini); 100module_exit(netmap_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index f7cf7d61a2d4..5c6292449d13 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -23,15 +23,13 @@
23 23
24MODULE_LICENSE("GPL"); 24MODULE_LICENSE("GPL");
25MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 25MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
26MODULE_DESCRIPTION("iptables REDIRECT target module"); 26MODULE_DESCRIPTION("Xtables: Connection redirection to localhost");
27 27
28/* FIXME: Take multiple ranges --RR */ 28/* FIXME: Take multiple ranges --RR */
29static bool 29static bool
30redirect_check(const char *tablename, 30redirect_tg_check(const char *tablename, const void *e,
31 const void *e, 31 const struct xt_target *target, void *targinfo,
32 const struct xt_target *target, 32 unsigned int hook_mask)
33 void *targinfo,
34 unsigned int hook_mask)
35{ 33{
36 const struct nf_nat_multi_range_compat *mr = targinfo; 34 const struct nf_nat_multi_range_compat *mr = targinfo;
37 35
@@ -47,12 +45,9 @@ redirect_check(const char *tablename,
47} 45}
48 46
49static unsigned int 47static unsigned int
50redirect_target(struct sk_buff *skb, 48redirect_tg(struct sk_buff *skb, const struct net_device *in,
51 const struct net_device *in, 49 const struct net_device *out, unsigned int hooknum,
52 const struct net_device *out, 50 const struct xt_target *target, const void *targinfo)
53 unsigned int hooknum,
54 const struct xt_target *target,
55 const void *targinfo)
56{ 51{
57 struct nf_conn *ct; 52 struct nf_conn *ct;
58 enum ip_conntrack_info ctinfo; 53 enum ip_conntrack_info ctinfo;
@@ -60,14 +55,14 @@ redirect_target(struct sk_buff *skb,
60 const struct nf_nat_multi_range_compat *mr = targinfo; 55 const struct nf_nat_multi_range_compat *mr = targinfo;
61 struct nf_nat_range newrange; 56 struct nf_nat_range newrange;
62 57
63 NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING 58 NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING
64 || hooknum == NF_IP_LOCAL_OUT); 59 || hooknum == NF_INET_LOCAL_OUT);
65 60
66 ct = nf_ct_get(skb, &ctinfo); 61 ct = nf_ct_get(skb, &ctinfo);
67 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); 62 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
68 63
69 /* Local packets: make them go to loopback */ 64 /* Local packets: make them go to loopback */
70 if (hooknum == NF_IP_LOCAL_OUT) 65 if (hooknum == NF_INET_LOCAL_OUT)
71 newdst = htonl(0x7F000001); 66 newdst = htonl(0x7F000001);
72 else { 67 else {
73 struct in_device *indev; 68 struct in_device *indev;
@@ -92,29 +87,29 @@ redirect_target(struct sk_buff *skb,
92 mr->range[0].min, mr->range[0].max }); 87 mr->range[0].min, mr->range[0].max });
93 88
94 /* Hand modified range to generic setup. */ 89 /* Hand modified range to generic setup. */
95 return nf_nat_setup_info(ct, &newrange, hooknum); 90 return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_DST);
96} 91}
97 92
98static struct xt_target redirect_reg __read_mostly = { 93static struct xt_target redirect_tg_reg __read_mostly = {
99 .name = "REDIRECT", 94 .name = "REDIRECT",
100 .family = AF_INET, 95 .family = AF_INET,
101 .target = redirect_target, 96 .target = redirect_tg,
102 .targetsize = sizeof(struct nf_nat_multi_range_compat), 97 .targetsize = sizeof(struct nf_nat_multi_range_compat),
103 .table = "nat", 98 .table = "nat",
104 .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT), 99 .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
105 .checkentry = redirect_check, 100 .checkentry = redirect_tg_check,
106 .me = THIS_MODULE, 101 .me = THIS_MODULE,
107}; 102};
108 103
109static int __init ipt_redirect_init(void) 104static int __init redirect_tg_init(void)
110{ 105{
111 return xt_register_target(&redirect_reg); 106 return xt_register_target(&redirect_tg_reg);
112} 107}
113 108
114static void __exit ipt_redirect_fini(void) 109static void __exit redirect_tg_exit(void)
115{ 110{
116 xt_unregister_target(&redirect_reg); 111 xt_unregister_target(&redirect_tg_reg);
117} 112}
118 113
119module_init(ipt_redirect_init); 114module_init(redirect_tg_init);
120module_exit(ipt_redirect_fini); 115module_exit(redirect_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index dcf4d21d5116..22606e2baa16 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -29,17 +29,14 @@
29 29
30MODULE_LICENSE("GPL"); 30MODULE_LICENSE("GPL");
31MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 31MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
32MODULE_DESCRIPTION("iptables REJECT target module"); 32MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4");
33 33
34/* Send RST reply */ 34/* Send RST reply */
35static void send_reset(struct sk_buff *oldskb, int hook) 35static void send_reset(struct sk_buff *oldskb, int hook)
36{ 36{
37 struct sk_buff *nskb; 37 struct sk_buff *nskb;
38 struct iphdr *niph; 38 struct iphdr *oiph, *niph;
39 struct tcphdr _otcph, *oth, *tcph; 39 struct tcphdr _otcph, *oth, *tcph;
40 __be16 tmp_port;
41 __be32 tmp_addr;
42 int needs_ack;
43 unsigned int addr_type; 40 unsigned int addr_type;
44 41
45 /* IP header checks: fragment. */ 42 /* IP header checks: fragment. */
@@ -58,99 +55,73 @@ static void send_reset(struct sk_buff *oldskb, int hook)
58 /* Check checksum */ 55 /* Check checksum */
59 if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) 56 if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
60 return; 57 return;
58 oiph = ip_hdr(oldskb);
61 59
62 /* We need a linear, writeable skb. We also need to expand 60 nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
63 headroom in case hh_len of incoming interface < hh_len of 61 LL_MAX_HEADER, GFP_ATOMIC);
64 outgoing interface */
65 nskb = skb_copy_expand(oldskb, LL_MAX_HEADER, skb_tailroom(oldskb),
66 GFP_ATOMIC);
67 if (!nskb) 62 if (!nskb)
68 return; 63 return;
69 64
70 /* This packet will not be the same as the other: clear nf fields */ 65 skb_reserve(nskb, LL_MAX_HEADER);
71 nf_reset(nskb); 66
72 nskb->mark = 0; 67 skb_reset_network_header(nskb);
73 skb_init_secmark(nskb); 68 niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
74 69 niph->version = 4;
75 skb_shinfo(nskb)->gso_size = 0; 70 niph->ihl = sizeof(struct iphdr) / 4;
76 skb_shinfo(nskb)->gso_segs = 0; 71 niph->tos = 0;
77 skb_shinfo(nskb)->gso_type = 0; 72 niph->id = 0;
78 73 niph->frag_off = htons(IP_DF);
79 tcph = (struct tcphdr *)(skb_network_header(nskb) + ip_hdrlen(nskb)); 74 niph->protocol = IPPROTO_TCP;
80 75 niph->check = 0;
81 /* Swap source and dest */ 76 niph->saddr = oiph->daddr;
82 niph = ip_hdr(nskb); 77 niph->daddr = oiph->saddr;
83 tmp_addr = niph->saddr; 78
84 niph->saddr = niph->daddr; 79 tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
85 niph->daddr = tmp_addr; 80 memset(tcph, 0, sizeof(*tcph));
86 tmp_port = tcph->source; 81 tcph->source = oth->dest;
87 tcph->source = tcph->dest; 82 tcph->dest = oth->source;
88 tcph->dest = tmp_port; 83 tcph->doff = sizeof(struct tcphdr) / 4;
89 84
90 /* Truncate to length (no data) */ 85 if (oth->ack)
91 tcph->doff = sizeof(struct tcphdr)/4;
92 skb_trim(nskb, ip_hdrlen(nskb) + sizeof(struct tcphdr));
93 niph->tot_len = htons(nskb->len);
94
95 if (tcph->ack) {
96 needs_ack = 0;
97 tcph->seq = oth->ack_seq; 86 tcph->seq = oth->ack_seq;
98 tcph->ack_seq = 0; 87 else {
99 } else {
100 needs_ack = 1;
101 tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + 88 tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
102 oldskb->len - ip_hdrlen(oldskb) - 89 oldskb->len - ip_hdrlen(oldskb) -
103 (oth->doff << 2)); 90 (oth->doff << 2));
104 tcph->seq = 0; 91 tcph->ack = 1;
105 } 92 }
106 93
107 /* Reset flags */ 94 tcph->rst = 1;
108 ((u_int8_t *)tcph)[13] = 0; 95 tcph->check = tcp_v4_check(sizeof(struct tcphdr),
109 tcph->rst = 1; 96 niph->saddr, niph->daddr,
110 tcph->ack = needs_ack; 97 csum_partial(tcph,
111 98 sizeof(struct tcphdr), 0));
112 tcph->window = 0;
113 tcph->urg_ptr = 0;
114
115 /* Adjust TCP checksum */
116 tcph->check = 0;
117 tcph->check = tcp_v4_check(sizeof(struct tcphdr),
118 niph->saddr, niph->daddr,
119 csum_partial(tcph,
120 sizeof(struct tcphdr), 0));
121
122 /* Set DF, id = 0 */
123 niph->frag_off = htons(IP_DF);
124 niph->id = 0;
125 99
126 addr_type = RTN_UNSPEC; 100 addr_type = RTN_UNSPEC;
127 if (hook != NF_IP_FORWARD 101 if (hook != NF_INET_FORWARD
128#ifdef CONFIG_BRIDGE_NETFILTER 102#ifdef CONFIG_BRIDGE_NETFILTER
129 || (nskb->nf_bridge && nskb->nf_bridge->mask & BRNF_BRIDGED) 103 || (nskb->nf_bridge && nskb->nf_bridge->mask & BRNF_BRIDGED)
130#endif 104#endif
131 ) 105 )
132 addr_type = RTN_LOCAL; 106 addr_type = RTN_LOCAL;
133 107
108 /* ip_route_me_harder expects skb->dst to be set */
109 dst_hold(oldskb->dst);
110 nskb->dst = oldskb->dst;
111
134 if (ip_route_me_harder(nskb, addr_type)) 112 if (ip_route_me_harder(nskb, addr_type))
135 goto free_nskb; 113 goto free_nskb;
136 114
115 niph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
137 nskb->ip_summed = CHECKSUM_NONE; 116 nskb->ip_summed = CHECKSUM_NONE;
138 117
139 /* Adjust IP TTL */
140 niph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
141
142 /* Adjust IP checksum */
143 niph->check = 0;
144 niph->check = ip_fast_csum(skb_network_header(nskb), niph->ihl);
145
146 /* "Never happens" */ 118 /* "Never happens" */
147 if (nskb->len > dst_mtu(nskb->dst)) 119 if (nskb->len > dst_mtu(nskb->dst))
148 goto free_nskb; 120 goto free_nskb;
149 121
150 nf_ct_attach(nskb, oldskb); 122 nf_ct_attach(nskb, oldskb);
151 123
152 NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, nskb, NULL, nskb->dst->dev, 124 ip_local_out(nskb);
153 dst_output);
154 return; 125 return;
155 126
156 free_nskb: 127 free_nskb:
@@ -162,20 +133,13 @@ static inline void send_unreach(struct sk_buff *skb_in, int code)
162 icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); 133 icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
163} 134}
164 135
165static unsigned int reject(struct sk_buff *skb, 136static unsigned int
166 const struct net_device *in, 137reject_tg(struct sk_buff *skb, const struct net_device *in,
167 const struct net_device *out, 138 const struct net_device *out, unsigned int hooknum,
168 unsigned int hooknum, 139 const struct xt_target *target, const void *targinfo)
169 const struct xt_target *target,
170 const void *targinfo)
171{ 140{
172 const struct ipt_reject_info *reject = targinfo; 141 const struct ipt_reject_info *reject = targinfo;
173 142
174 /* Our naive response construction doesn't deal with IP
175 options, and probably shouldn't try. */
176 if (ip_hdrlen(skb) != sizeof(struct iphdr))
177 return NF_DROP;
178
179 /* WARNING: This code causes reentry within iptables. 143 /* WARNING: This code causes reentry within iptables.
180 This means that the iptables jump stack is now crap. We 144 This means that the iptables jump stack is now crap. We
181 must return an absolute verdict. --RR */ 145 must return an absolute verdict. --RR */
@@ -211,11 +175,10 @@ static unsigned int reject(struct sk_buff *skb,
211 return NF_DROP; 175 return NF_DROP;
212} 176}
213 177
214static bool check(const char *tablename, 178static bool
215 const void *e_void, 179reject_tg_check(const char *tablename, const void *e_void,
216 const struct xt_target *target, 180 const struct xt_target *target, void *targinfo,
217 void *targinfo, 181 unsigned int hook_mask)
218 unsigned int hook_mask)
219{ 182{
220 const struct ipt_reject_info *rejinfo = targinfo; 183 const struct ipt_reject_info *rejinfo = targinfo;
221 const struct ipt_entry *e = e_void; 184 const struct ipt_entry *e = e_void;
@@ -234,27 +197,27 @@ static bool check(const char *tablename,
234 return true; 197 return true;
235} 198}
236 199
237static struct xt_target ipt_reject_reg __read_mostly = { 200static struct xt_target reject_tg_reg __read_mostly = {
238 .name = "REJECT", 201 .name = "REJECT",
239 .family = AF_INET, 202 .family = AF_INET,
240 .target = reject, 203 .target = reject_tg,
241 .targetsize = sizeof(struct ipt_reject_info), 204 .targetsize = sizeof(struct ipt_reject_info),
242 .table = "filter", 205 .table = "filter",
243 .hooks = (1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | 206 .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) |
244 (1 << NF_IP_LOCAL_OUT), 207 (1 << NF_INET_LOCAL_OUT),
245 .checkentry = check, 208 .checkentry = reject_tg_check,
246 .me = THIS_MODULE, 209 .me = THIS_MODULE,
247}; 210};
248 211
249static int __init ipt_reject_init(void) 212static int __init reject_tg_init(void)
250{ 213{
251 return xt_register_target(&ipt_reject_reg); 214 return xt_register_target(&reject_tg_reg);
252} 215}
253 216
254static void __exit ipt_reject_fini(void) 217static void __exit reject_tg_exit(void)
255{ 218{
256 xt_unregister_target(&ipt_reject_reg); 219 xt_unregister_target(&reject_tg_reg);
257} 220}
258 221
259module_init(ipt_reject_init); 222module_init(reject_tg_init);
260module_exit(ipt_reject_fini); 223module_exit(reject_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
deleted file mode 100644
index 8988571436b8..000000000000
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ /dev/null
@@ -1,179 +0,0 @@
1/* Same. Just like SNAT, only try to make the connections
2 * between client A and server B always have the same source ip.
3 *
4 * (C) 2000 Paul `Rusty' Russell
5 * (C) 2001 Martin Josefsson
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/types.h>
12#include <linux/ip.h>
13#include <linux/timer.h>
14#include <linux/module.h>
15#include <linux/netfilter.h>
16#include <linux/netdevice.h>
17#include <linux/if.h>
18#include <linux/inetdevice.h>
19#include <net/protocol.h>
20#include <net/checksum.h>
21#include <linux/netfilter_ipv4.h>
22#include <linux/netfilter/x_tables.h>
23#include <net/netfilter/nf_nat_rule.h>
24#include <linux/netfilter_ipv4/ipt_SAME.h>
25
26MODULE_LICENSE("GPL");
27MODULE_AUTHOR("Martin Josefsson <gandalf@wlug.westbo.se>");
28MODULE_DESCRIPTION("iptables special SNAT module for consistent sourceip");
29
30static bool
31same_check(const char *tablename,
32 const void *e,
33 const struct xt_target *target,
34 void *targinfo,
35 unsigned int hook_mask)
36{
37 unsigned int count, countess, rangeip, index = 0;
38 struct ipt_same_info *mr = targinfo;
39
40 mr->ipnum = 0;
41
42 if (mr->rangesize < 1) {
43 pr_debug("same_check: need at least one dest range.\n");
44 return false;
45 }
46 if (mr->rangesize > IPT_SAME_MAX_RANGE) {
47 pr_debug("same_check: too many ranges specified, maximum "
48 "is %u ranges\n", IPT_SAME_MAX_RANGE);
49 return false;
50 }
51 for (count = 0; count < mr->rangesize; count++) {
52 if (ntohl(mr->range[count].min_ip) >
53 ntohl(mr->range[count].max_ip)) {
54 pr_debug("same_check: min_ip is larger than max_ip in "
55 "range `%u.%u.%u.%u-%u.%u.%u.%u'.\n",
56 NIPQUAD(mr->range[count].min_ip),
57 NIPQUAD(mr->range[count].max_ip));
58 return false;
59 }
60 if (!(mr->range[count].flags & IP_NAT_RANGE_MAP_IPS)) {
61 pr_debug("same_check: bad MAP_IPS.\n");
62 return false;
63 }
64 rangeip = (ntohl(mr->range[count].max_ip) -
65 ntohl(mr->range[count].min_ip) + 1);
66 mr->ipnum += rangeip;
67
68 pr_debug("same_check: range %u, ipnum = %u\n", count, rangeip);
69 }
70 pr_debug("same_check: total ipaddresses = %u\n", mr->ipnum);
71
72 mr->iparray = kmalloc((sizeof(u_int32_t) * mr->ipnum), GFP_KERNEL);
73 if (!mr->iparray) {
74 pr_debug("same_check: Couldn't allocate %Zu bytes "
75 "for %u ipaddresses!\n",
76 (sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
77 return false;
78 }
79 pr_debug("same_check: Allocated %Zu bytes for %u ipaddresses.\n",
80 (sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
81
82 for (count = 0; count < mr->rangesize; count++) {
83 for (countess = ntohl(mr->range[count].min_ip);
84 countess <= ntohl(mr->range[count].max_ip);
85 countess++) {
86 mr->iparray[index] = countess;
87 pr_debug("same_check: Added ipaddress `%u.%u.%u.%u' "
88 "in index %u.\n", HIPQUAD(countess), index);
89 index++;
90 }
91 }
92 return true;
93}
94
95static void
96same_destroy(const struct xt_target *target, void *targinfo)
97{
98 struct ipt_same_info *mr = targinfo;
99
100 kfree(mr->iparray);
101
102 pr_debug("same_destroy: Deallocated %Zu bytes for %u ipaddresses.\n",
103 (sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
104}
105
106static unsigned int
107same_target(struct sk_buff *skb,
108 const struct net_device *in,
109 const struct net_device *out,
110 unsigned int hooknum,
111 const struct xt_target *target,
112 const void *targinfo)
113{
114 struct nf_conn *ct;
115 enum ip_conntrack_info ctinfo;
116 u_int32_t tmpip, aindex;
117 __be32 new_ip;
118 const struct ipt_same_info *same = targinfo;
119 struct nf_nat_range newrange;
120 const struct nf_conntrack_tuple *t;
121
122 NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
123 hooknum == NF_IP_POST_ROUTING);
124 ct = nf_ct_get(skb, &ctinfo);
125
126 t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
127
128 /* Base new source on real src ip and optionally dst ip,
129 giving some hope for consistency across reboots.
130 Here we calculate the index in same->iparray which
131 holds the ipaddress we should use */
132
133 tmpip = ntohl(t->src.u3.ip);
134
135 if (!(same->info & IPT_SAME_NODST))
136 tmpip += ntohl(t->dst.u3.ip);
137 aindex = tmpip % same->ipnum;
138
139 new_ip = htonl(same->iparray[aindex]);
140
141 pr_debug("ipt_SAME: src=%u.%u.%u.%u dst=%u.%u.%u.%u, "
142 "new src=%u.%u.%u.%u\n",
143 NIPQUAD(t->src.u3.ip), NIPQUAD(t->dst.u3.ip), NIPQUAD(new_ip));
144
145 /* Transfer from original range. */
146 newrange = ((struct nf_nat_range)
147 { same->range[0].flags, new_ip, new_ip,
148 /* FIXME: Use ports from correct range! */
149 same->range[0].min, same->range[0].max });
150
151 /* Hand modified range to generic setup. */
152 return nf_nat_setup_info(ct, &newrange, hooknum);
153}
154
155static struct xt_target same_reg __read_mostly = {
156 .name = "SAME",
157 .family = AF_INET,
158 .target = same_target,
159 .targetsize = sizeof(struct ipt_same_info),
160 .table = "nat",
161 .hooks = (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_POST_ROUTING),
162 .checkentry = same_check,
163 .destroy = same_destroy,
164 .me = THIS_MODULE,
165};
166
167static int __init ipt_same_init(void)
168{
169 return xt_register_target(&same_reg);
170}
171
172static void __exit ipt_same_fini(void)
173{
174 xt_unregister_target(&same_reg);
175}
176
177module_init(ipt_same_init);
178module_exit(ipt_same_fini);
179
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
deleted file mode 100644
index d4573baa7f27..000000000000
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ /dev/null
@@ -1,87 +0,0 @@
1/* This is a module which is used for setting the TOS field of a packet. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <linux/ip.h>
14#include <net/checksum.h>
15
16#include <linux/netfilter/x_tables.h>
17#include <linux/netfilter_ipv4/ipt_TOS.h>
18
19MODULE_LICENSE("GPL");
20MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
21MODULE_DESCRIPTION("iptables TOS mangling module");
22
23static unsigned int
24target(struct sk_buff *skb,
25 const struct net_device *in,
26 const struct net_device *out,
27 unsigned int hooknum,
28 const struct xt_target *target,
29 const void *targinfo)
30{
31 const struct ipt_tos_target_info *tosinfo = targinfo;
32 struct iphdr *iph = ip_hdr(skb);
33
34 if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
35 __u8 oldtos;
36 if (!skb_make_writable(skb, sizeof(struct iphdr)))
37 return NF_DROP;
38 iph = ip_hdr(skb);
39 oldtos = iph->tos;
40 iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
41 nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
42 }
43 return XT_CONTINUE;
44}
45
46static bool
47checkentry(const char *tablename,
48 const void *e_void,
49 const struct xt_target *target,
50 void *targinfo,
51 unsigned int hook_mask)
52{
53 const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos;
54
55 if (tos != IPTOS_LOWDELAY
56 && tos != IPTOS_THROUGHPUT
57 && tos != IPTOS_RELIABILITY
58 && tos != IPTOS_MINCOST
59 && tos != IPTOS_NORMALSVC) {
60 printk(KERN_WARNING "TOS: bad tos value %#x\n", tos);
61 return false;
62 }
63 return true;
64}
65
66static struct xt_target ipt_tos_reg __read_mostly = {
67 .name = "TOS",
68 .family = AF_INET,
69 .target = target,
70 .targetsize = sizeof(struct ipt_tos_target_info),
71 .table = "mangle",
72 .checkentry = checkentry,
73 .me = THIS_MODULE,
74};
75
76static int __init ipt_tos_init(void)
77{
78 return xt_register_target(&ipt_tos_reg);
79}
80
81static void __exit ipt_tos_fini(void)
82{
83 xt_unregister_target(&ipt_tos_reg);
84}
85
86module_init(ipt_tos_init);
87module_exit(ipt_tos_fini);
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index c620a0527666..30eed65e7338 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -16,14 +16,13 @@
16#include <linux/netfilter_ipv4/ipt_TTL.h> 16#include <linux/netfilter_ipv4/ipt_TTL.h>
17 17
18MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 18MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
19MODULE_DESCRIPTION("IP tables TTL modification module"); 19MODULE_DESCRIPTION("Xtables: IPv4 TTL field modification target");
20MODULE_LICENSE("GPL"); 20MODULE_LICENSE("GPL");
21 21
22static unsigned int 22static unsigned int
23ipt_ttl_target(struct sk_buff *skb, 23ttl_tg(struct sk_buff *skb, const struct net_device *in,
24 const struct net_device *in, const struct net_device *out, 24 const struct net_device *out, unsigned int hooknum,
25 unsigned int hooknum, const struct xt_target *target, 25 const struct xt_target *target, const void *targinfo)
26 const void *targinfo)
27{ 26{
28 struct iphdr *iph; 27 struct iphdr *iph;
29 const struct ipt_TTL_info *info = targinfo; 28 const struct ipt_TTL_info *info = targinfo;
@@ -54,19 +53,18 @@ ipt_ttl_target(struct sk_buff *skb,
54 } 53 }
55 54
56 if (new_ttl != iph->ttl) { 55 if (new_ttl != iph->ttl) {
57 nf_csum_replace2(&iph->check, htons(iph->ttl << 8), 56 csum_replace2(&iph->check, htons(iph->ttl << 8),
58 htons(new_ttl << 8)); 57 htons(new_ttl << 8));
59 iph->ttl = new_ttl; 58 iph->ttl = new_ttl;
60 } 59 }
61 60
62 return XT_CONTINUE; 61 return XT_CONTINUE;
63} 62}
64 63
65static bool ipt_ttl_checkentry(const char *tablename, 64static bool
66 const void *e, 65ttl_tg_check(const char *tablename, const void *e,
67 const struct xt_target *target, 66 const struct xt_target *target, void *targinfo,
68 void *targinfo, 67 unsigned int hook_mask)
69 unsigned int hook_mask)
70{ 68{
71 const struct ipt_TTL_info *info = targinfo; 69 const struct ipt_TTL_info *info = targinfo;
72 70
@@ -80,25 +78,25 @@ static bool ipt_ttl_checkentry(const char *tablename,
80 return true; 78 return true;
81} 79}
82 80
83static struct xt_target ipt_TTL __read_mostly = { 81static struct xt_target ttl_tg_reg __read_mostly = {
84 .name = "TTL", 82 .name = "TTL",
85 .family = AF_INET, 83 .family = AF_INET,
86 .target = ipt_ttl_target, 84 .target = ttl_tg,
87 .targetsize = sizeof(struct ipt_TTL_info), 85 .targetsize = sizeof(struct ipt_TTL_info),
88 .table = "mangle", 86 .table = "mangle",
89 .checkentry = ipt_ttl_checkentry, 87 .checkentry = ttl_tg_check,
90 .me = THIS_MODULE, 88 .me = THIS_MODULE,
91}; 89};
92 90
93static int __init ipt_ttl_init(void) 91static int __init ttl_tg_init(void)
94{ 92{
95 return xt_register_target(&ipt_TTL); 93 return xt_register_target(&ttl_tg_reg);
96} 94}
97 95
98static void __exit ipt_ttl_fini(void) 96static void __exit ttl_tg_exit(void)
99{ 97{
100 xt_unregister_target(&ipt_TTL); 98 xt_unregister_target(&ttl_tg_reg);
101} 99}
102 100
103module_init(ipt_ttl_init); 101module_init(ttl_tg_init);
104module_exit(ipt_ttl_fini); 102module_exit(ttl_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 212b830765a4..b192756c6d0d 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -43,13 +43,14 @@
43#include <linux/netfilter.h> 43#include <linux/netfilter.h>
44#include <linux/netfilter/x_tables.h> 44#include <linux/netfilter/x_tables.h>
45#include <linux/netfilter_ipv4/ipt_ULOG.h> 45#include <linux/netfilter_ipv4/ipt_ULOG.h>
46#include <net/netfilter/nf_log.h>
46#include <net/sock.h> 47#include <net/sock.h>
47#include <linux/bitops.h> 48#include <linux/bitops.h>
48#include <asm/unaligned.h> 49#include <asm/unaligned.h>
49 50
50MODULE_LICENSE("GPL"); 51MODULE_LICENSE("GPL");
51MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); 52MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
52MODULE_DESCRIPTION("iptables userspace logging module"); 53MODULE_DESCRIPTION("Xtables: packet logging to netlink using ULOG");
53MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG); 54MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG);
54 55
55#define ULOG_NL_EVENT 111 /* Harald's favorite number */ 56#define ULOG_NL_EVENT 111 /* Harald's favorite number */
@@ -279,12 +280,10 @@ alloc_failure:
279 spin_unlock_bh(&ulog_lock); 280 spin_unlock_bh(&ulog_lock);
280} 281}
281 282
282static unsigned int ipt_ulog_target(struct sk_buff *skb, 283static unsigned int
283 const struct net_device *in, 284ulog_tg(struct sk_buff *skb, const struct net_device *in,
284 const struct net_device *out, 285 const struct net_device *out, unsigned int hooknum,
285 unsigned int hooknum, 286 const struct xt_target *target, const void *targinfo)
286 const struct xt_target *target,
287 const void *targinfo)
288{ 287{
289 struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; 288 struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
290 289
@@ -318,11 +317,10 @@ static void ipt_logfn(unsigned int pf,
318 ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); 317 ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
319} 318}
320 319
321static bool ipt_ulog_checkentry(const char *tablename, 320static bool
322 const void *e, 321ulog_tg_check(const char *tablename, const void *e,
323 const struct xt_target *target, 322 const struct xt_target *target, void *targinfo,
324 void *targinfo, 323 unsigned int hookmask)
325 unsigned int hookmask)
326{ 324{
327 const struct ipt_ulog_info *loginfo = targinfo; 325 const struct ipt_ulog_info *loginfo = targinfo;
328 326
@@ -347,7 +345,7 @@ struct compat_ipt_ulog_info {
347 char prefix[ULOG_PREFIX_LEN]; 345 char prefix[ULOG_PREFIX_LEN];
348}; 346};
349 347
350static void compat_from_user(void *dst, void *src) 348static void ulog_tg_compat_from_user(void *dst, void *src)
351{ 349{
352 const struct compat_ipt_ulog_info *cl = src; 350 const struct compat_ipt_ulog_info *cl = src;
353 struct ipt_ulog_info l = { 351 struct ipt_ulog_info l = {
@@ -360,7 +358,7 @@ static void compat_from_user(void *dst, void *src)
360 memcpy(dst, &l, sizeof(l)); 358 memcpy(dst, &l, sizeof(l));
361} 359}
362 360
363static int compat_to_user(void __user *dst, void *src) 361static int ulog_tg_compat_to_user(void __user *dst, void *src)
364{ 362{
365 const struct ipt_ulog_info *l = src; 363 const struct ipt_ulog_info *l = src;
366 struct compat_ipt_ulog_info cl = { 364 struct compat_ipt_ulog_info cl = {
@@ -374,16 +372,16 @@ static int compat_to_user(void __user *dst, void *src)
374} 372}
375#endif /* CONFIG_COMPAT */ 373#endif /* CONFIG_COMPAT */
376 374
377static struct xt_target ipt_ulog_reg __read_mostly = { 375static struct xt_target ulog_tg_reg __read_mostly = {
378 .name = "ULOG", 376 .name = "ULOG",
379 .family = AF_INET, 377 .family = AF_INET,
380 .target = ipt_ulog_target, 378 .target = ulog_tg,
381 .targetsize = sizeof(struct ipt_ulog_info), 379 .targetsize = sizeof(struct ipt_ulog_info),
382 .checkentry = ipt_ulog_checkentry, 380 .checkentry = ulog_tg_check,
383#ifdef CONFIG_COMPAT 381#ifdef CONFIG_COMPAT
384 .compatsize = sizeof(struct compat_ipt_ulog_info), 382 .compatsize = sizeof(struct compat_ipt_ulog_info),
385 .compat_from_user = compat_from_user, 383 .compat_from_user = ulog_tg_compat_from_user,
386 .compat_to_user = compat_to_user, 384 .compat_to_user = ulog_tg_compat_to_user,
387#endif 385#endif
388 .me = THIS_MODULE, 386 .me = THIS_MODULE,
389}; 387};
@@ -394,7 +392,7 @@ static struct nf_logger ipt_ulog_logger = {
394 .me = THIS_MODULE, 392 .me = THIS_MODULE,
395}; 393};
396 394
397static int __init ipt_ulog_init(void) 395static int __init ulog_tg_init(void)
398{ 396{
399 int ret, i; 397 int ret, i;
400 398
@@ -415,9 +413,9 @@ static int __init ipt_ulog_init(void)
415 if (!nflognl) 413 if (!nflognl)
416 return -ENOMEM; 414 return -ENOMEM;
417 415
418 ret = xt_register_target(&ipt_ulog_reg); 416 ret = xt_register_target(&ulog_tg_reg);
419 if (ret < 0) { 417 if (ret < 0) {
420 sock_release(nflognl->sk_socket); 418 netlink_kernel_release(nflognl);
421 return ret; 419 return ret;
422 } 420 }
423 if (nflog) 421 if (nflog)
@@ -426,7 +424,7 @@ static int __init ipt_ulog_init(void)
426 return 0; 424 return 0;
427} 425}
428 426
429static void __exit ipt_ulog_fini(void) 427static void __exit ulog_tg_exit(void)
430{ 428{
431 ulog_buff_t *ub; 429 ulog_buff_t *ub;
432 int i; 430 int i;
@@ -435,8 +433,8 @@ static void __exit ipt_ulog_fini(void)
435 433
436 if (nflog) 434 if (nflog)
437 nf_log_unregister(&ipt_ulog_logger); 435 nf_log_unregister(&ipt_ulog_logger);
438 xt_unregister_target(&ipt_ulog_reg); 436 xt_unregister_target(&ulog_tg_reg);
439 sock_release(nflognl->sk_socket); 437 netlink_kernel_release(nflognl);
440 438
441 /* remove pending timers and free allocated skb's */ 439 /* remove pending timers and free allocated skb's */
442 for (i = 0; i < ULOG_MAXNLGROUPS; i++) { 440 for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
@@ -453,5 +451,5 @@ static void __exit ipt_ulog_fini(void)
453 } 451 }
454} 452}
455 453
456module_init(ipt_ulog_init); 454module_init(ulog_tg_init);
457module_exit(ipt_ulog_fini); 455module_exit(ulog_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index 59f01f7ba6b4..49587a497229 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -2,6 +2,7 @@
2 * iptables module to match inet_addr_type() of an ip. 2 * iptables module to match inet_addr_type() of an ip.
3 * 3 *
4 * Copyright (c) 2004 Patrick McHardy <kaber@trash.net> 4 * Copyright (c) 2004 Patrick McHardy <kaber@trash.net>
5 * (C) 2007 Laszlo Attila Toth <panther@balabit.hu>
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
@@ -20,47 +21,119 @@
20 21
21MODULE_LICENSE("GPL"); 22MODULE_LICENSE("GPL");
22MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 23MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
23MODULE_DESCRIPTION("iptables addrtype match"); 24MODULE_DESCRIPTION("Xtables: address type match for IPv4");
24 25
25static inline bool match_type(__be32 addr, u_int16_t mask) 26static inline bool match_type(const struct net_device *dev, __be32 addr,
27 u_int16_t mask)
26{ 28{
27 return !!(mask & (1 << inet_addr_type(addr))); 29 return !!(mask & (1 << inet_dev_addr_type(&init_net, dev, addr)));
28} 30}
29 31
30static bool match(const struct sk_buff *skb, 32static bool
31 const struct net_device *in, const struct net_device *out, 33addrtype_mt_v0(const struct sk_buff *skb, const struct net_device *in,
32 const struct xt_match *match, const void *matchinfo, 34 const struct net_device *out, const struct xt_match *match,
33 int offset, unsigned int protoff, bool *hotdrop) 35 const void *matchinfo, int offset, unsigned int protoff,
36 bool *hotdrop)
34{ 37{
35 const struct ipt_addrtype_info *info = matchinfo; 38 const struct ipt_addrtype_info *info = matchinfo;
36 const struct iphdr *iph = ip_hdr(skb); 39 const struct iphdr *iph = ip_hdr(skb);
37 bool ret = true; 40 bool ret = true;
38 41
39 if (info->source) 42 if (info->source)
40 ret &= match_type(iph->saddr, info->source)^info->invert_source; 43 ret &= match_type(NULL, iph->saddr, info->source) ^
44 info->invert_source;
41 if (info->dest) 45 if (info->dest)
42 ret &= match_type(iph->daddr, info->dest)^info->invert_dest; 46 ret &= match_type(NULL, iph->daddr, info->dest) ^
47 info->invert_dest;
43 48
44 return ret; 49 return ret;
45} 50}
46 51
47static struct xt_match addrtype_match __read_mostly = { 52static bool
48 .name = "addrtype", 53addrtype_mt_v1(const struct sk_buff *skb, const struct net_device *in,
49 .family = AF_INET, 54 const struct net_device *out, const struct xt_match *match,
50 .match = match, 55 const void *matchinfo, int offset, unsigned int protoff,
51 .matchsize = sizeof(struct ipt_addrtype_info), 56 bool *hotdrop)
52 .me = THIS_MODULE 57{
58 const struct ipt_addrtype_info_v1 *info = matchinfo;
59 const struct iphdr *iph = ip_hdr(skb);
60 const struct net_device *dev = NULL;
61 bool ret = true;
62
63 if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN)
64 dev = in;
65 else if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT)
66 dev = out;
67
68 if (info->source)
69 ret &= match_type(dev, iph->saddr, info->source) ^
70 (info->flags & IPT_ADDRTYPE_INVERT_SOURCE);
71 if (ret && info->dest)
72 ret &= match_type(dev, iph->daddr, info->dest) ^
73 (info->flags & IPT_ADDRTYPE_INVERT_DEST);
74 return ret;
75}
76
77static bool
78addrtype_mt_checkentry_v1(const char *tablename, const void *ip_void,
79 const struct xt_match *match, void *matchinfo,
80 unsigned int hook_mask)
81{
82 struct ipt_addrtype_info_v1 *info = matchinfo;
83
84 if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN &&
85 info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
86 printk(KERN_ERR "ipt_addrtype: both incoming and outgoing "
87 "interface limitation cannot be selected\n");
88 return false;
89 }
90
91 if (hook_mask & (1 << NF_INET_PRE_ROUTING | 1 << NF_INET_LOCAL_IN) &&
92 info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
93 printk(KERN_ERR "ipt_addrtype: output interface limitation "
94 "not valid in PRE_ROUTING and INPUT\n");
95 return false;
96 }
97
98 if (hook_mask & (1 << NF_INET_POST_ROUTING | 1 << NF_INET_LOCAL_OUT) &&
99 info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) {
100 printk(KERN_ERR "ipt_addrtype: input interface limitation "
101 "not valid in POST_ROUTING and OUTPUT\n");
102 return false;
103 }
104
105 return true;
106}
107
108static struct xt_match addrtype_mt_reg[] __read_mostly = {
109 {
110 .name = "addrtype",
111 .family = AF_INET,
112 .match = addrtype_mt_v0,
113 .matchsize = sizeof(struct ipt_addrtype_info),
114 .me = THIS_MODULE
115 },
116 {
117 .name = "addrtype",
118 .family = AF_INET,
119 .revision = 1,
120 .match = addrtype_mt_v1,
121 .checkentry = addrtype_mt_checkentry_v1,
122 .matchsize = sizeof(struct ipt_addrtype_info_v1),
123 .me = THIS_MODULE
124 }
53}; 125};
54 126
55static int __init ipt_addrtype_init(void) 127static int __init addrtype_mt_init(void)
56{ 128{
57 return xt_register_match(&addrtype_match); 129 return xt_register_matches(addrtype_mt_reg,
130 ARRAY_SIZE(addrtype_mt_reg));
58} 131}
59 132
60static void __exit ipt_addrtype_fini(void) 133static void __exit addrtype_mt_exit(void)
61{ 134{
62 xt_unregister_match(&addrtype_match); 135 xt_unregister_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg));
63} 136}
64 137
65module_init(ipt_addrtype_init); 138module_init(addrtype_mt_init);
66module_exit(ipt_addrtype_fini); 139module_exit(addrtype_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 61b017fd743c..e977989629c7 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -16,7 +16,7 @@
16 16
17MODULE_LICENSE("GPL"); 17MODULE_LICENSE("GPL");
18MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>"); 18MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>");
19MODULE_DESCRIPTION("iptables AH SPI match module"); 19MODULE_DESCRIPTION("Xtables: IPv4 IPsec-AH SPI match");
20 20
21#ifdef DEBUG_CONNTRACK 21#ifdef DEBUG_CONNTRACK
22#define duprintf(format, args...) printk(format , ## args) 22#define duprintf(format, args...) printk(format , ## args)
@@ -37,14 +37,9 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
37} 37}
38 38
39static bool 39static bool
40match(const struct sk_buff *skb, 40ah_mt(const struct sk_buff *skb, const struct net_device *in,
41 const struct net_device *in, 41 const struct net_device *out, const struct xt_match *match,
42 const struct net_device *out, 42 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
43 const struct xt_match *match,
44 const void *matchinfo,
45 int offset,
46 unsigned int protoff,
47 bool *hotdrop)
48{ 43{
49 struct ip_auth_hdr _ahdr; 44 struct ip_auth_hdr _ahdr;
50 const struct ip_auth_hdr *ah; 45 const struct ip_auth_hdr *ah;
@@ -72,11 +67,9 @@ match(const struct sk_buff *skb,
72 67
73/* Called when user tries to insert an entry of this type. */ 68/* Called when user tries to insert an entry of this type. */
74static bool 69static bool
75checkentry(const char *tablename, 70ah_mt_check(const char *tablename, const void *ip_void,
76 const void *ip_void, 71 const struct xt_match *match, void *matchinfo,
77 const struct xt_match *match, 72 unsigned int hook_mask)
78 void *matchinfo,
79 unsigned int hook_mask)
80{ 73{
81 const struct ipt_ah *ahinfo = matchinfo; 74 const struct ipt_ah *ahinfo = matchinfo;
82 75
@@ -88,25 +81,25 @@ checkentry(const char *tablename,
88 return true; 81 return true;
89} 82}
90 83
91static struct xt_match ah_match __read_mostly = { 84static struct xt_match ah_mt_reg __read_mostly = {
92 .name = "ah", 85 .name = "ah",
93 .family = AF_INET, 86 .family = AF_INET,
94 .match = match, 87 .match = ah_mt,
95 .matchsize = sizeof(struct ipt_ah), 88 .matchsize = sizeof(struct ipt_ah),
96 .proto = IPPROTO_AH, 89 .proto = IPPROTO_AH,
97 .checkentry = checkentry, 90 .checkentry = ah_mt_check,
98 .me = THIS_MODULE, 91 .me = THIS_MODULE,
99}; 92};
100 93
101static int __init ipt_ah_init(void) 94static int __init ah_mt_init(void)
102{ 95{
103 return xt_register_match(&ah_match); 96 return xt_register_match(&ah_mt_reg);
104} 97}
105 98
106static void __exit ipt_ah_fini(void) 99static void __exit ah_mt_exit(void)
107{ 100{
108 xt_unregister_match(&ah_match); 101 xt_unregister_match(&ah_mt_reg);
109} 102}
110 103
111module_init(ipt_ah_init); 104module_init(ah_mt_init);
112module_exit(ipt_ah_fini); 105module_exit(ah_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index d6925c674069..749de8284ce5 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -19,7 +19,7 @@
19#include <linux/netfilter_ipv4/ipt_ecn.h> 19#include <linux/netfilter_ipv4/ipt_ecn.h>
20 20
21MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 21MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
22MODULE_DESCRIPTION("iptables ECN matching module"); 22MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag match for IPv4");
23MODULE_LICENSE("GPL"); 23MODULE_LICENSE("GPL");
24 24
25static inline bool match_ip(const struct sk_buff *skb, 25static inline bool match_ip(const struct sk_buff *skb,
@@ -67,10 +67,10 @@ static inline bool match_tcp(const struct sk_buff *skb,
67 return true; 67 return true;
68} 68}
69 69
70static bool match(const struct sk_buff *skb, 70static bool
71 const struct net_device *in, const struct net_device *out, 71ecn_mt(const struct sk_buff *skb, const struct net_device *in,
72 const struct xt_match *match, const void *matchinfo, 72 const struct net_device *out, const struct xt_match *match,
73 int offset, unsigned int protoff, bool *hotdrop) 73 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
74{ 74{
75 const struct ipt_ecn_info *info = matchinfo; 75 const struct ipt_ecn_info *info = matchinfo;
76 76
@@ -88,9 +88,10 @@ static bool match(const struct sk_buff *skb,
88 return true; 88 return true;
89} 89}
90 90
91static bool checkentry(const char *tablename, const void *ip_void, 91static bool
92 const struct xt_match *match, 92ecn_mt_check(const char *tablename, const void *ip_void,
93 void *matchinfo, unsigned int hook_mask) 93 const struct xt_match *match, void *matchinfo,
94 unsigned int hook_mask)
94{ 95{
95 const struct ipt_ecn_info *info = matchinfo; 96 const struct ipt_ecn_info *info = matchinfo;
96 const struct ipt_ip *ip = ip_void; 97 const struct ipt_ip *ip = ip_void;
@@ -111,24 +112,24 @@ static bool checkentry(const char *tablename, const void *ip_void,
111 return true; 112 return true;
112} 113}
113 114
114static struct xt_match ecn_match __read_mostly = { 115static struct xt_match ecn_mt_reg __read_mostly = {
115 .name = "ecn", 116 .name = "ecn",
116 .family = AF_INET, 117 .family = AF_INET,
117 .match = match, 118 .match = ecn_mt,
118 .matchsize = sizeof(struct ipt_ecn_info), 119 .matchsize = sizeof(struct ipt_ecn_info),
119 .checkentry = checkentry, 120 .checkentry = ecn_mt_check,
120 .me = THIS_MODULE, 121 .me = THIS_MODULE,
121}; 122};
122 123
123static int __init ipt_ecn_init(void) 124static int __init ecn_mt_init(void)
124{ 125{
125 return xt_register_match(&ecn_match); 126 return xt_register_match(&ecn_mt_reg);
126} 127}
127 128
128static void __exit ipt_ecn_fini(void) 129static void __exit ecn_mt_exit(void)
129{ 130{
130 xt_unregister_match(&ecn_match); 131 xt_unregister_match(&ecn_mt_reg);
131} 132}
132 133
133module_init(ipt_ecn_init); 134module_init(ecn_mt_init);
134module_exit(ipt_ecn_fini); 135module_exit(ecn_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c
deleted file mode 100644
index 0106dc955a69..000000000000
--- a/net/ipv4/netfilter/ipt_iprange.c
+++ /dev/null
@@ -1,79 +0,0 @@
1/*
2 * iptables module to match IP address ranges
3 *
4 * (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <linux/module.h>
11#include <linux/skbuff.h>
12#include <linux/ip.h>
13#include <linux/netfilter/x_tables.h>
14#include <linux/netfilter_ipv4/ipt_iprange.h>
15
16MODULE_LICENSE("GPL");
17MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
18MODULE_DESCRIPTION("iptables arbitrary IP range match module");
19
20static bool
21match(const struct sk_buff *skb,
22 const struct net_device *in,
23 const struct net_device *out,
24 const struct xt_match *match,
25 const void *matchinfo,
26 int offset, unsigned int protoff, bool *hotdrop)
27{
28 const struct ipt_iprange_info *info = matchinfo;
29 const struct iphdr *iph = ip_hdr(skb);
30
31 if (info->flags & IPRANGE_SRC) {
32 if ((ntohl(iph->saddr) < ntohl(info->src.min_ip)
33 || ntohl(iph->saddr) > ntohl(info->src.max_ip))
34 ^ !!(info->flags & IPRANGE_SRC_INV)) {
35 pr_debug("src IP %u.%u.%u.%u NOT in range %s"
36 "%u.%u.%u.%u-%u.%u.%u.%u\n",
37 NIPQUAD(iph->saddr),
38 info->flags & IPRANGE_SRC_INV ? "(INV) " : "",
39 NIPQUAD(info->src.min_ip),
40 NIPQUAD(info->src.max_ip));
41 return false;
42 }
43 }
44 if (info->flags & IPRANGE_DST) {
45 if ((ntohl(iph->daddr) < ntohl(info->dst.min_ip)
46 || ntohl(iph->daddr) > ntohl(info->dst.max_ip))
47 ^ !!(info->flags & IPRANGE_DST_INV)) {
48 pr_debug("dst IP %u.%u.%u.%u NOT in range %s"
49 "%u.%u.%u.%u-%u.%u.%u.%u\n",
50 NIPQUAD(iph->daddr),
51 info->flags & IPRANGE_DST_INV ? "(INV) " : "",
52 NIPQUAD(info->dst.min_ip),
53 NIPQUAD(info->dst.max_ip));
54 return false;
55 }
56 }
57 return true;
58}
59
60static struct xt_match iprange_match __read_mostly = {
61 .name = "iprange",
62 .family = AF_INET,
63 .match = match,
64 .matchsize = sizeof(struct ipt_iprange_info),
65 .me = THIS_MODULE
66};
67
68static int __init ipt_iprange_init(void)
69{
70 return xt_register_match(&iprange_match);
71}
72
73static void __exit ipt_iprange_fini(void)
74{
75 xt_unregister_match(&iprange_match);
76}
77
78module_init(ipt_iprange_init);
79module_exit(ipt_iprange_fini);
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
deleted file mode 100644
index b14e77da7a33..000000000000
--- a/net/ipv4/netfilter/ipt_owner.c
+++ /dev/null
@@ -1,92 +0,0 @@
1/* Kernel module to match various things tied to sockets associated with
2 locally generated outgoing packets. */
3
4/* (C) 2000 Marc Boucher <marc@mbsi.ca>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <linux/file.h>
14#include <linux/rcupdate.h>
15#include <net/sock.h>
16
17#include <linux/netfilter_ipv4/ipt_owner.h>
18#include <linux/netfilter/x_tables.h>
19
20MODULE_LICENSE("GPL");
21MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
22MODULE_DESCRIPTION("iptables owner match");
23
24static bool
25match(const struct sk_buff *skb,
26 const struct net_device *in,
27 const struct net_device *out,
28 const struct xt_match *match,
29 const void *matchinfo,
30 int offset,
31 unsigned int protoff,
32 bool *hotdrop)
33{
34 const struct ipt_owner_info *info = matchinfo;
35
36 if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file)
37 return false;
38
39 if(info->match & IPT_OWNER_UID) {
40 if ((skb->sk->sk_socket->file->f_uid != info->uid) ^
41 !!(info->invert & IPT_OWNER_UID))
42 return false;
43 }
44
45 if(info->match & IPT_OWNER_GID) {
46 if ((skb->sk->sk_socket->file->f_gid != info->gid) ^
47 !!(info->invert & IPT_OWNER_GID))
48 return false;
49 }
50
51 return true;
52}
53
54static bool
55checkentry(const char *tablename,
56 const void *ip,
57 const struct xt_match *match,
58 void *matchinfo,
59 unsigned int hook_mask)
60{
61 const struct ipt_owner_info *info = matchinfo;
62
63 if (info->match & (IPT_OWNER_PID|IPT_OWNER_SID|IPT_OWNER_COMM)) {
64 printk("ipt_owner: pid, sid and command matching "
65 "not supported anymore\n");
66 return false;
67 }
68 return true;
69}
70
71static struct xt_match owner_match __read_mostly = {
72 .name = "owner",
73 .family = AF_INET,
74 .match = match,
75 .matchsize = sizeof(struct ipt_owner_info),
76 .hooks = (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING),
77 .checkentry = checkentry,
78 .me = THIS_MODULE,
79};
80
81static int __init ipt_owner_init(void)
82{
83 return xt_register_match(&owner_match);
84}
85
86static void __exit ipt_owner_fini(void)
87{
88 xt_unregister_match(&owner_match);
89}
90
91module_init(ipt_owner_init);
92module_exit(ipt_owner_fini);
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 11d39fb5f38b..e3154a99c08a 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -30,7 +30,7 @@
30#include <linux/netfilter_ipv4/ipt_recent.h> 30#include <linux/netfilter_ipv4/ipt_recent.h>
31 31
32MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 32MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
33MODULE_DESCRIPTION("IP tables recently seen matching module"); 33MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching for IPv4");
34MODULE_LICENSE("GPL"); 34MODULE_LICENSE("GPL");
35 35
36static unsigned int ip_list_tot = 100; 36static unsigned int ip_list_tot = 100;
@@ -170,10 +170,10 @@ static void recent_table_flush(struct recent_table *t)
170} 170}
171 171
172static bool 172static bool
173ipt_recent_match(const struct sk_buff *skb, 173recent_mt(const struct sk_buff *skb, const struct net_device *in,
174 const struct net_device *in, const struct net_device *out, 174 const struct net_device *out, const struct xt_match *match,
175 const struct xt_match *match, const void *matchinfo, 175 const void *matchinfo, int offset, unsigned int protoff,
176 int offset, unsigned int protoff, bool *hotdrop) 176 bool *hotdrop)
177{ 177{
178 const struct ipt_recent_info *info = matchinfo; 178 const struct ipt_recent_info *info = matchinfo;
179 struct recent_table *t; 179 struct recent_table *t;
@@ -236,9 +236,9 @@ out:
236} 236}
237 237
238static bool 238static bool
239ipt_recent_checkentry(const char *tablename, const void *ip, 239recent_mt_check(const char *tablename, const void *ip,
240 const struct xt_match *match, void *matchinfo, 240 const struct xt_match *match, void *matchinfo,
241 unsigned int hook_mask) 241 unsigned int hook_mask)
242{ 242{
243 const struct ipt_recent_info *info = matchinfo; 243 const struct ipt_recent_info *info = matchinfo;
244 struct recent_table *t; 244 struct recent_table *t;
@@ -293,8 +293,7 @@ out:
293 return ret; 293 return ret;
294} 294}
295 295
296static void 296static void recent_mt_destroy(const struct xt_match *match, void *matchinfo)
297ipt_recent_destroy(const struct xt_match *match, void *matchinfo)
298{ 297{
299 const struct ipt_recent_info *info = matchinfo; 298 const struct ipt_recent_info *info = matchinfo;
300 struct recent_table *t; 299 struct recent_table *t;
@@ -455,17 +454,17 @@ static const struct file_operations recent_fops = {
455}; 454};
456#endif /* CONFIG_PROC_FS */ 455#endif /* CONFIG_PROC_FS */
457 456
458static struct xt_match recent_match __read_mostly = { 457static struct xt_match recent_mt_reg __read_mostly = {
459 .name = "recent", 458 .name = "recent",
460 .family = AF_INET, 459 .family = AF_INET,
461 .match = ipt_recent_match, 460 .match = recent_mt,
462 .matchsize = sizeof(struct ipt_recent_info), 461 .matchsize = sizeof(struct ipt_recent_info),
463 .checkentry = ipt_recent_checkentry, 462 .checkentry = recent_mt_check,
464 .destroy = ipt_recent_destroy, 463 .destroy = recent_mt_destroy,
465 .me = THIS_MODULE, 464 .me = THIS_MODULE,
466}; 465};
467 466
468static int __init ipt_recent_init(void) 467static int __init recent_mt_init(void)
469{ 468{
470 int err; 469 int err;
471 470
@@ -473,27 +472,27 @@ static int __init ipt_recent_init(void)
473 return -EINVAL; 472 return -EINVAL;
474 ip_list_hash_size = 1 << fls(ip_list_tot); 473 ip_list_hash_size = 1 << fls(ip_list_tot);
475 474
476 err = xt_register_match(&recent_match); 475 err = xt_register_match(&recent_mt_reg);
477#ifdef CONFIG_PROC_FS 476#ifdef CONFIG_PROC_FS
478 if (err) 477 if (err)
479 return err; 478 return err;
480 proc_dir = proc_mkdir("ipt_recent", init_net.proc_net); 479 proc_dir = proc_mkdir("ipt_recent", init_net.proc_net);
481 if (proc_dir == NULL) { 480 if (proc_dir == NULL) {
482 xt_unregister_match(&recent_match); 481 xt_unregister_match(&recent_mt_reg);
483 err = -ENOMEM; 482 err = -ENOMEM;
484 } 483 }
485#endif 484#endif
486 return err; 485 return err;
487} 486}
488 487
489static void __exit ipt_recent_exit(void) 488static void __exit recent_mt_exit(void)
490{ 489{
491 BUG_ON(!list_empty(&tables)); 490 BUG_ON(!list_empty(&tables));
492 xt_unregister_match(&recent_match); 491 xt_unregister_match(&recent_mt_reg);
493#ifdef CONFIG_PROC_FS 492#ifdef CONFIG_PROC_FS
494 remove_proc_entry("ipt_recent", init_net.proc_net); 493 remove_proc_entry("ipt_recent", init_net.proc_net);
495#endif 494#endif
496} 495}
497 496
498module_init(ipt_recent_init); 497module_init(recent_mt_init);
499module_exit(ipt_recent_exit); 498module_exit(recent_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c
deleted file mode 100644
index e740441c973d..000000000000
--- a/net/ipv4/netfilter/ipt_tos.c
+++ /dev/null
@@ -1,55 +0,0 @@
1/* Kernel module to match TOS values. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/ip.h>
12#include <linux/module.h>
13#include <linux/skbuff.h>
14
15#include <linux/netfilter_ipv4/ipt_tos.h>
16#include <linux/netfilter/x_tables.h>
17
18MODULE_LICENSE("GPL");
19MODULE_DESCRIPTION("iptables TOS match module");
20
21static bool
22match(const struct sk_buff *skb,
23 const struct net_device *in,
24 const struct net_device *out,
25 const struct xt_match *match,
26 const void *matchinfo,
27 int offset,
28 unsigned int protoff,
29 bool *hotdrop)
30{
31 const struct ipt_tos_info *info = matchinfo;
32
33 return (ip_hdr(skb)->tos == info->tos) ^ info->invert;
34}
35
36static struct xt_match tos_match __read_mostly = {
37 .name = "tos",
38 .family = AF_INET,
39 .match = match,
40 .matchsize = sizeof(struct ipt_tos_info),
41 .me = THIS_MODULE,
42};
43
44static int __init ipt_multiport_init(void)
45{
46 return xt_register_match(&tos_match);
47}
48
49static void __exit ipt_multiport_fini(void)
50{
51 xt_unregister_match(&tos_match);
52}
53
54module_init(ipt_multiport_init);
55module_exit(ipt_multiport_fini);
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
index a439900a4ba5..e0b8caeb710c 100644
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -15,13 +15,13 @@
15#include <linux/netfilter/x_tables.h> 15#include <linux/netfilter/x_tables.h>
16 16
17MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 17MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
18MODULE_DESCRIPTION("IP tables TTL matching module"); 18MODULE_DESCRIPTION("Xtables: IPv4 TTL field match");
19MODULE_LICENSE("GPL"); 19MODULE_LICENSE("GPL");
20 20
21static bool match(const struct sk_buff *skb, 21static bool
22 const struct net_device *in, const struct net_device *out, 22ttl_mt(const struct sk_buff *skb, const struct net_device *in,
23 const struct xt_match *match, const void *matchinfo, 23 const struct net_device *out, const struct xt_match *match,
24 int offset, unsigned int protoff, bool *hotdrop) 24 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
25{ 25{
26 const struct ipt_ttl_info *info = matchinfo; 26 const struct ipt_ttl_info *info = matchinfo;
27 const u8 ttl = ip_hdr(skb)->ttl; 27 const u8 ttl = ip_hdr(skb)->ttl;
@@ -44,23 +44,23 @@ static bool match(const struct sk_buff *skb,
44 return false; 44 return false;
45} 45}
46 46
47static struct xt_match ttl_match __read_mostly = { 47static struct xt_match ttl_mt_reg __read_mostly = {
48 .name = "ttl", 48 .name = "ttl",
49 .family = AF_INET, 49 .family = AF_INET,
50 .match = match, 50 .match = ttl_mt,
51 .matchsize = sizeof(struct ipt_ttl_info), 51 .matchsize = sizeof(struct ipt_ttl_info),
52 .me = THIS_MODULE, 52 .me = THIS_MODULE,
53}; 53};
54 54
55static int __init ipt_ttl_init(void) 55static int __init ttl_mt_init(void)
56{ 56{
57 return xt_register_match(&ttl_match); 57 return xt_register_match(&ttl_mt_reg);
58} 58}
59 59
60static void __exit ipt_ttl_fini(void) 60static void __exit ttl_mt_exit(void)
61{ 61{
62 xt_unregister_match(&ttl_match); 62 xt_unregister_match(&ttl_mt_reg);
63} 63}
64 64
65module_init(ipt_ttl_init); 65module_init(ttl_mt_init);
66module_exit(ipt_ttl_fini); 66module_exit(ttl_mt_exit);
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index ba3262c60437..29bb4f9fbda0 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -19,7 +19,9 @@ MODULE_LICENSE("GPL");
19MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 19MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
20MODULE_DESCRIPTION("iptables filter table"); 20MODULE_DESCRIPTION("iptables filter table");
21 21
22#define FILTER_VALID_HOOKS ((1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT)) 22#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
23 (1 << NF_INET_FORWARD) | \
24 (1 << NF_INET_LOCAL_OUT))
23 25
24static struct 26static struct
25{ 27{
@@ -33,14 +35,14 @@ static struct
33 .num_entries = 4, 35 .num_entries = 4,
34 .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), 36 .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
35 .hook_entry = { 37 .hook_entry = {
36 [NF_IP_LOCAL_IN] = 0, 38 [NF_INET_LOCAL_IN] = 0,
37 [NF_IP_FORWARD] = sizeof(struct ipt_standard), 39 [NF_INET_FORWARD] = sizeof(struct ipt_standard),
38 [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, 40 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
39 }, 41 },
40 .underflow = { 42 .underflow = {
41 [NF_IP_LOCAL_IN] = 0, 43 [NF_INET_LOCAL_IN] = 0,
42 [NF_IP_FORWARD] = sizeof(struct ipt_standard), 44 [NF_INET_FORWARD] = sizeof(struct ipt_standard),
43 [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, 45 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
44 }, 46 },
45 }, 47 },
46 .entries = { 48 .entries = {
@@ -89,26 +91,26 @@ ipt_local_out_hook(unsigned int hook,
89 return ipt_do_table(skb, hook, in, out, &packet_filter); 91 return ipt_do_table(skb, hook, in, out, &packet_filter);
90} 92}
91 93
92static struct nf_hook_ops ipt_ops[] = { 94static struct nf_hook_ops ipt_ops[] __read_mostly = {
93 { 95 {
94 .hook = ipt_hook, 96 .hook = ipt_hook,
95 .owner = THIS_MODULE, 97 .owner = THIS_MODULE,
96 .pf = PF_INET, 98 .pf = PF_INET,
97 .hooknum = NF_IP_LOCAL_IN, 99 .hooknum = NF_INET_LOCAL_IN,
98 .priority = NF_IP_PRI_FILTER, 100 .priority = NF_IP_PRI_FILTER,
99 }, 101 },
100 { 102 {
101 .hook = ipt_hook, 103 .hook = ipt_hook,
102 .owner = THIS_MODULE, 104 .owner = THIS_MODULE,
103 .pf = PF_INET, 105 .pf = PF_INET,
104 .hooknum = NF_IP_FORWARD, 106 .hooknum = NF_INET_FORWARD,
105 .priority = NF_IP_PRI_FILTER, 107 .priority = NF_IP_PRI_FILTER,
106 }, 108 },
107 { 109 {
108 .hook = ipt_local_out_hook, 110 .hook = ipt_local_out_hook,
109 .owner = THIS_MODULE, 111 .owner = THIS_MODULE,
110 .pf = PF_INET, 112 .pf = PF_INET,
111 .hooknum = NF_IP_LOCAL_OUT, 113 .hooknum = NF_INET_LOCAL_OUT,
112 .priority = NF_IP_PRI_FILTER, 114 .priority = NF_IP_PRI_FILTER,
113 }, 115 },
114}; 116};
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index b4360a69d5ca..5c4be202430c 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -21,11 +21,11 @@ MODULE_LICENSE("GPL");
21MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 21MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
22MODULE_DESCRIPTION("iptables mangle table"); 22MODULE_DESCRIPTION("iptables mangle table");
23 23
24#define MANGLE_VALID_HOOKS ((1 << NF_IP_PRE_ROUTING) | \ 24#define MANGLE_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
25 (1 << NF_IP_LOCAL_IN) | \ 25 (1 << NF_INET_LOCAL_IN) | \
26 (1 << NF_IP_FORWARD) | \ 26 (1 << NF_INET_FORWARD) | \
27 (1 << NF_IP_LOCAL_OUT) | \ 27 (1 << NF_INET_LOCAL_OUT) | \
28 (1 << NF_IP_POST_ROUTING)) 28 (1 << NF_INET_POST_ROUTING))
29 29
30/* Ouch - five different hooks? Maybe this should be a config option..... -- BC */ 30/* Ouch - five different hooks? Maybe this should be a config option..... -- BC */
31static struct 31static struct
@@ -40,18 +40,18 @@ static struct
40 .num_entries = 6, 40 .num_entries = 6,
41 .size = sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error), 41 .size = sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error),
42 .hook_entry = { 42 .hook_entry = {
43 [NF_IP_PRE_ROUTING] = 0, 43 [NF_INET_PRE_ROUTING] = 0,
44 [NF_IP_LOCAL_IN] = sizeof(struct ipt_standard), 44 [NF_INET_LOCAL_IN] = sizeof(struct ipt_standard),
45 [NF_IP_FORWARD] = sizeof(struct ipt_standard) * 2, 45 [NF_INET_FORWARD] = sizeof(struct ipt_standard) * 2,
46 [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 3, 46 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 3,
47 [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard) * 4, 47 [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard) * 4,
48 }, 48 },
49 .underflow = { 49 .underflow = {
50 [NF_IP_PRE_ROUTING] = 0, 50 [NF_INET_PRE_ROUTING] = 0,
51 [NF_IP_LOCAL_IN] = sizeof(struct ipt_standard), 51 [NF_INET_LOCAL_IN] = sizeof(struct ipt_standard),
52 [NF_IP_FORWARD] = sizeof(struct ipt_standard) * 2, 52 [NF_INET_FORWARD] = sizeof(struct ipt_standard) * 2,
53 [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 3, 53 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 3,
54 [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard) * 4, 54 [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard) * 4,
55 }, 55 },
56 }, 56 },
57 .entries = { 57 .entries = {
@@ -128,40 +128,40 @@ ipt_local_hook(unsigned int hook,
128 return ret; 128 return ret;
129} 129}
130 130
131static struct nf_hook_ops ipt_ops[] = { 131static struct nf_hook_ops ipt_ops[] __read_mostly = {
132 { 132 {
133 .hook = ipt_route_hook, 133 .hook = ipt_route_hook,
134 .owner = THIS_MODULE, 134 .owner = THIS_MODULE,
135 .pf = PF_INET, 135 .pf = PF_INET,
136 .hooknum = NF_IP_PRE_ROUTING, 136 .hooknum = NF_INET_PRE_ROUTING,
137 .priority = NF_IP_PRI_MANGLE, 137 .priority = NF_IP_PRI_MANGLE,
138 }, 138 },
139 { 139 {
140 .hook = ipt_route_hook, 140 .hook = ipt_route_hook,
141 .owner = THIS_MODULE, 141 .owner = THIS_MODULE,
142 .pf = PF_INET, 142 .pf = PF_INET,
143 .hooknum = NF_IP_LOCAL_IN, 143 .hooknum = NF_INET_LOCAL_IN,
144 .priority = NF_IP_PRI_MANGLE, 144 .priority = NF_IP_PRI_MANGLE,
145 }, 145 },
146 { 146 {
147 .hook = ipt_route_hook, 147 .hook = ipt_route_hook,
148 .owner = THIS_MODULE, 148 .owner = THIS_MODULE,
149 .pf = PF_INET, 149 .pf = PF_INET,
150 .hooknum = NF_IP_FORWARD, 150 .hooknum = NF_INET_FORWARD,
151 .priority = NF_IP_PRI_MANGLE, 151 .priority = NF_IP_PRI_MANGLE,
152 }, 152 },
153 { 153 {
154 .hook = ipt_local_hook, 154 .hook = ipt_local_hook,
155 .owner = THIS_MODULE, 155 .owner = THIS_MODULE,
156 .pf = PF_INET, 156 .pf = PF_INET,
157 .hooknum = NF_IP_LOCAL_OUT, 157 .hooknum = NF_INET_LOCAL_OUT,
158 .priority = NF_IP_PRI_MANGLE, 158 .priority = NF_IP_PRI_MANGLE,
159 }, 159 },
160 { 160 {
161 .hook = ipt_route_hook, 161 .hook = ipt_route_hook,
162 .owner = THIS_MODULE, 162 .owner = THIS_MODULE,
163 .pf = PF_INET, 163 .pf = PF_INET,
164 .hooknum = NF_IP_POST_ROUTING, 164 .hooknum = NF_INET_POST_ROUTING,
165 .priority = NF_IP_PRI_MANGLE, 165 .priority = NF_IP_PRI_MANGLE,
166 }, 166 },
167}; 167};
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index f8678651250f..dc34aa274533 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -7,7 +7,7 @@
7#include <linux/netfilter_ipv4/ip_tables.h> 7#include <linux/netfilter_ipv4/ip_tables.h>
8#include <net/ip.h> 8#include <net/ip.h>
9 9
10#define RAW_VALID_HOOKS ((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT)) 10#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
11 11
12static struct 12static struct
13{ 13{
@@ -21,12 +21,12 @@ static struct
21 .num_entries = 3, 21 .num_entries = 3,
22 .size = sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error), 22 .size = sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error),
23 .hook_entry = { 23 .hook_entry = {
24 [NF_IP_PRE_ROUTING] = 0, 24 [NF_INET_PRE_ROUTING] = 0,
25 [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) 25 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard)
26 }, 26 },
27 .underflow = { 27 .underflow = {
28 [NF_IP_PRE_ROUTING] = 0, 28 [NF_INET_PRE_ROUTING] = 0,
29 [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) 29 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard)
30 }, 30 },
31 }, 31 },
32 .entries = { 32 .entries = {
@@ -74,18 +74,18 @@ ipt_local_hook(unsigned int hook,
74} 74}
75 75
76/* 'raw' is the very first table. */ 76/* 'raw' is the very first table. */
77static struct nf_hook_ops ipt_ops[] = { 77static struct nf_hook_ops ipt_ops[] __read_mostly = {
78 { 78 {
79 .hook = ipt_hook, 79 .hook = ipt_hook,
80 .pf = PF_INET, 80 .pf = PF_INET,
81 .hooknum = NF_IP_PRE_ROUTING, 81 .hooknum = NF_INET_PRE_ROUTING,
82 .priority = NF_IP_PRI_RAW, 82 .priority = NF_IP_PRI_RAW,
83 .owner = THIS_MODULE, 83 .owner = THIS_MODULE,
84 }, 84 },
85 { 85 {
86 .hook = ipt_local_hook, 86 .hook = ipt_local_hook,
87 .pf = PF_INET, 87 .pf = PF_INET,
88 .hooknum = NF_IP_LOCAL_OUT, 88 .hooknum = NF_INET_LOCAL_OUT,
89 .priority = NF_IP_PRI_RAW, 89 .priority = NF_IP_PRI_RAW,
90 .owner = THIS_MODULE, 90 .owner = THIS_MODULE,
91 }, 91 },
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 910dae732a0f..ac3d61d8026e 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -56,12 +56,6 @@ static int ipv4_print_tuple(struct seq_file *s,
56 NIPQUAD(tuple->dst.u3.ip)); 56 NIPQUAD(tuple->dst.u3.ip));
57} 57}
58 58
59static int ipv4_print_conntrack(struct seq_file *s,
60 const struct nf_conn *conntrack)
61{
62 return 0;
63}
64
65/* Returns new sk_buff, or NULL */ 59/* Returns new sk_buff, or NULL */
66static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) 60static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
67{ 61{
@@ -150,7 +144,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
150 /* Gather fragments. */ 144 /* Gather fragments. */
151 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { 145 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
152 if (nf_ct_ipv4_gather_frags(skb, 146 if (nf_ct_ipv4_gather_frags(skb,
153 hooknum == NF_IP_PRE_ROUTING ? 147 hooknum == NF_INET_PRE_ROUTING ?
154 IP_DEFRAG_CONNTRACK_IN : 148 IP_DEFRAG_CONNTRACK_IN :
155 IP_DEFRAG_CONNTRACK_OUT)) 149 IP_DEFRAG_CONNTRACK_OUT))
156 return NF_STOLEN; 150 return NF_STOLEN;
@@ -185,61 +179,61 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum,
185 179
186/* Connection tracking may drop packets, but never alters them, so 180/* Connection tracking may drop packets, but never alters them, so
187 make it the first hook. */ 181 make it the first hook. */
188static struct nf_hook_ops ipv4_conntrack_ops[] = { 182static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
189 { 183 {
190 .hook = ipv4_conntrack_defrag, 184 .hook = ipv4_conntrack_defrag,
191 .owner = THIS_MODULE, 185 .owner = THIS_MODULE,
192 .pf = PF_INET, 186 .pf = PF_INET,
193 .hooknum = NF_IP_PRE_ROUTING, 187 .hooknum = NF_INET_PRE_ROUTING,
194 .priority = NF_IP_PRI_CONNTRACK_DEFRAG, 188 .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
195 }, 189 },
196 { 190 {
197 .hook = ipv4_conntrack_in, 191 .hook = ipv4_conntrack_in,
198 .owner = THIS_MODULE, 192 .owner = THIS_MODULE,
199 .pf = PF_INET, 193 .pf = PF_INET,
200 .hooknum = NF_IP_PRE_ROUTING, 194 .hooknum = NF_INET_PRE_ROUTING,
201 .priority = NF_IP_PRI_CONNTRACK, 195 .priority = NF_IP_PRI_CONNTRACK,
202 }, 196 },
203 { 197 {
204 .hook = ipv4_conntrack_defrag, 198 .hook = ipv4_conntrack_defrag,
205 .owner = THIS_MODULE, 199 .owner = THIS_MODULE,
206 .pf = PF_INET, 200 .pf = PF_INET,
207 .hooknum = NF_IP_LOCAL_OUT, 201 .hooknum = NF_INET_LOCAL_OUT,
208 .priority = NF_IP_PRI_CONNTRACK_DEFRAG, 202 .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
209 }, 203 },
210 { 204 {
211 .hook = ipv4_conntrack_local, 205 .hook = ipv4_conntrack_local,
212 .owner = THIS_MODULE, 206 .owner = THIS_MODULE,
213 .pf = PF_INET, 207 .pf = PF_INET,
214 .hooknum = NF_IP_LOCAL_OUT, 208 .hooknum = NF_INET_LOCAL_OUT,
215 .priority = NF_IP_PRI_CONNTRACK, 209 .priority = NF_IP_PRI_CONNTRACK,
216 }, 210 },
217 { 211 {
218 .hook = ipv4_conntrack_help, 212 .hook = ipv4_conntrack_help,
219 .owner = THIS_MODULE, 213 .owner = THIS_MODULE,
220 .pf = PF_INET, 214 .pf = PF_INET,
221 .hooknum = NF_IP_POST_ROUTING, 215 .hooknum = NF_INET_POST_ROUTING,
222 .priority = NF_IP_PRI_CONNTRACK_HELPER, 216 .priority = NF_IP_PRI_CONNTRACK_HELPER,
223 }, 217 },
224 { 218 {
225 .hook = ipv4_conntrack_help, 219 .hook = ipv4_conntrack_help,
226 .owner = THIS_MODULE, 220 .owner = THIS_MODULE,
227 .pf = PF_INET, 221 .pf = PF_INET,
228 .hooknum = NF_IP_LOCAL_IN, 222 .hooknum = NF_INET_LOCAL_IN,
229 .priority = NF_IP_PRI_CONNTRACK_HELPER, 223 .priority = NF_IP_PRI_CONNTRACK_HELPER,
230 }, 224 },
231 { 225 {
232 .hook = ipv4_confirm, 226 .hook = ipv4_confirm,
233 .owner = THIS_MODULE, 227 .owner = THIS_MODULE,
234 .pf = PF_INET, 228 .pf = PF_INET,
235 .hooknum = NF_IP_POST_ROUTING, 229 .hooknum = NF_INET_POST_ROUTING,
236 .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 230 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
237 }, 231 },
238 { 232 {
239 .hook = ipv4_confirm, 233 .hook = ipv4_confirm,
240 .owner = THIS_MODULE, 234 .owner = THIS_MODULE,
241 .pf = PF_INET, 235 .pf = PF_INET,
242 .hooknum = NF_IP_LOCAL_IN, 236 .hooknum = NF_INET_LOCAL_IN,
243 .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 237 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
244 }, 238 },
245}; 239};
@@ -363,10 +357,8 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
363static int ipv4_tuple_to_nlattr(struct sk_buff *skb, 357static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
364 const struct nf_conntrack_tuple *tuple) 358 const struct nf_conntrack_tuple *tuple)
365{ 359{
366 NLA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), 360 NLA_PUT_BE32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip);
367 &tuple->src.u3.ip); 361 NLA_PUT_BE32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip);
368 NLA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t),
369 &tuple->dst.u3.ip);
370 return 0; 362 return 0;
371 363
372nla_put_failure: 364nla_put_failure:
@@ -384,8 +376,8 @@ static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
384 if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST]) 376 if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
385 return -EINVAL; 377 return -EINVAL;
386 378
387 t->src.u3.ip = *(__be32 *)nla_data(tb[CTA_IP_V4_SRC]); 379 t->src.u3.ip = nla_get_be32(tb[CTA_IP_V4_SRC]);
388 t->dst.u3.ip = *(__be32 *)nla_data(tb[CTA_IP_V4_DST]); 380 t->dst.u3.ip = nla_get_be32(tb[CTA_IP_V4_DST]);
389 381
390 return 0; 382 return 0;
391} 383}
@@ -405,7 +397,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
405 .pkt_to_tuple = ipv4_pkt_to_tuple, 397 .pkt_to_tuple = ipv4_pkt_to_tuple,
406 .invert_tuple = ipv4_invert_tuple, 398 .invert_tuple = ipv4_invert_tuple,
407 .print_tuple = ipv4_print_tuple, 399 .print_tuple = ipv4_print_tuple,
408 .print_conntrack = ipv4_print_conntrack,
409 .get_l4proto = ipv4_get_l4proto, 400 .get_l4proto = ipv4_get_l4proto,
410#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 401#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
411 .tuple_to_nlattr = ipv4_tuple_to_nlattr, 402 .tuple_to_nlattr = ipv4_tuple_to_nlattr,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 741f3dfaa5a1..543c02b74c96 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -121,10 +121,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
121 ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0) 121 ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0)
122 return -ENOSPC; 122 return -ENOSPC;
123 123
124 if (l3proto->print_conntrack(s, ct)) 124 if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct))
125 return -ENOSPC;
126
127 if (l4proto->print_conntrack(s, ct))
128 return -ENOSPC; 125 return -ENOSPC;
129 126
130 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 127 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index adcbaf6d4299..4004a04c5510 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -18,6 +18,7 @@
18#include <net/netfilter/nf_conntrack_tuple.h> 18#include <net/netfilter/nf_conntrack_tuple.h>
19#include <net/netfilter/nf_conntrack_l4proto.h> 19#include <net/netfilter/nf_conntrack_l4proto.h>
20#include <net/netfilter/nf_conntrack_core.h> 20#include <net/netfilter/nf_conntrack_core.h>
21#include <net/netfilter/nf_log.h>
21 22
22static unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ; 23static unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ;
23 24
@@ -73,13 +74,6 @@ static int icmp_print_tuple(struct seq_file *s,
73 ntohs(tuple->src.u.icmp.id)); 74 ntohs(tuple->src.u.icmp.id));
74} 75}
75 76
76/* Print out the private part of the conntrack. */
77static int icmp_print_conntrack(struct seq_file *s,
78 const struct nf_conn *conntrack)
79{
80 return 0;
81}
82
83/* Returns verdict for packet, or -1 for invalid. */ 77/* Returns verdict for packet, or -1 for invalid. */
84static int icmp_packet(struct nf_conn *ct, 78static int icmp_packet(struct nf_conn *ct,
85 const struct sk_buff *skb, 79 const struct sk_buff *skb,
@@ -128,7 +122,6 @@ static int icmp_new(struct nf_conn *conntrack,
128 return 1; 122 return 1;
129} 123}
130 124
131extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
132/* Returns conntrack if it dealt with ICMP, and filled in skb fields */ 125/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
133static int 126static int
134icmp_error_message(struct sk_buff *skb, 127icmp_error_message(struct sk_buff *skb,
@@ -195,7 +188,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff,
195 } 188 }
196 189
197 /* See ip_conntrack_proto_tcp.c */ 190 /* See ip_conntrack_proto_tcp.c */
198 if (nf_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && 191 if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
199 nf_ip_checksum(skb, hooknum, dataoff, 0)) { 192 nf_ip_checksum(skb, hooknum, dataoff, 0)) {
200 if (LOG_INVALID(IPPROTO_ICMP)) 193 if (LOG_INVALID(IPPROTO_ICMP))
201 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, 194 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
@@ -235,12 +228,9 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff,
235static int icmp_tuple_to_nlattr(struct sk_buff *skb, 228static int icmp_tuple_to_nlattr(struct sk_buff *skb,
236 const struct nf_conntrack_tuple *t) 229 const struct nf_conntrack_tuple *t)
237{ 230{
238 NLA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t), 231 NLA_PUT_BE16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id);
239 &t->src.u.icmp.id); 232 NLA_PUT_U8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type);
240 NLA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t), 233 NLA_PUT_U8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code);
241 &t->dst.u.icmp.type);
242 NLA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
243 &t->dst.u.icmp.code);
244 234
245 return 0; 235 return 0;
246 236
@@ -262,12 +252,9 @@ static int icmp_nlattr_to_tuple(struct nlattr *tb[],
262 || !tb[CTA_PROTO_ICMP_ID]) 252 || !tb[CTA_PROTO_ICMP_ID])
263 return -EINVAL; 253 return -EINVAL;
264 254
265 tuple->dst.u.icmp.type = 255 tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]);
266 *(u_int8_t *)nla_data(tb[CTA_PROTO_ICMP_TYPE]); 256 tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]);
267 tuple->dst.u.icmp.code = 257 tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]);
268 *(u_int8_t *)nla_data(tb[CTA_PROTO_ICMP_CODE]);
269 tuple->src.u.icmp.id =
270 *(__be16 *)nla_data(tb[CTA_PROTO_ICMP_ID]);
271 258
272 if (tuple->dst.u.icmp.type >= sizeof(invmap) 259 if (tuple->dst.u.icmp.type >= sizeof(invmap)
273 || !invmap[tuple->dst.u.icmp.type]) 260 || !invmap[tuple->dst.u.icmp.type])
@@ -315,7 +302,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
315 .pkt_to_tuple = icmp_pkt_to_tuple, 302 .pkt_to_tuple = icmp_pkt_to_tuple,
316 .invert_tuple = icmp_invert_tuple, 303 .invert_tuple = icmp_invert_tuple,
317 .print_tuple = icmp_print_tuple, 304 .print_tuple = icmp_print_tuple,
318 .print_conntrack = icmp_print_conntrack,
319 .packet = icmp_packet, 305 .packet = icmp_packet,
320 .new = icmp_new, 306 .new = icmp_new,
321 .error = icmp_error, 307 .error = icmp_error,
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 86b465b176ba..e53ae1ef8f5e 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -33,27 +33,28 @@
33 33
34static DEFINE_RWLOCK(nf_nat_lock); 34static DEFINE_RWLOCK(nf_nat_lock);
35 35
36static struct nf_conntrack_l3proto *l3proto = NULL; 36static struct nf_conntrack_l3proto *l3proto __read_mostly;
37 37
38/* Calculated at init based on memory size */ 38/* Calculated at init based on memory size */
39static unsigned int nf_nat_htable_size; 39static unsigned int nf_nat_htable_size __read_mostly;
40static int nf_nat_vmalloced; 40static int nf_nat_vmalloced;
41 41
42static struct hlist_head *bysource; 42static struct hlist_head *bysource __read_mostly;
43 43
44#define MAX_IP_NAT_PROTO 256 44#define MAX_IP_NAT_PROTO 256
45static struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]; 45static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]
46 __read_mostly;
46 47
47static inline struct nf_nat_protocol * 48static inline const struct nf_nat_protocol *
48__nf_nat_proto_find(u_int8_t protonum) 49__nf_nat_proto_find(u_int8_t protonum)
49{ 50{
50 return rcu_dereference(nf_nat_protos[protonum]); 51 return rcu_dereference(nf_nat_protos[protonum]);
51} 52}
52 53
53struct nf_nat_protocol * 54const struct nf_nat_protocol *
54nf_nat_proto_find_get(u_int8_t protonum) 55nf_nat_proto_find_get(u_int8_t protonum)
55{ 56{
56 struct nf_nat_protocol *p; 57 const struct nf_nat_protocol *p;
57 58
58 rcu_read_lock(); 59 rcu_read_lock();
59 p = __nf_nat_proto_find(protonum); 60 p = __nf_nat_proto_find(protonum);
@@ -66,7 +67,7 @@ nf_nat_proto_find_get(u_int8_t protonum)
66EXPORT_SYMBOL_GPL(nf_nat_proto_find_get); 67EXPORT_SYMBOL_GPL(nf_nat_proto_find_get);
67 68
68void 69void
69nf_nat_proto_put(struct nf_nat_protocol *p) 70nf_nat_proto_put(const struct nf_nat_protocol *p)
70{ 71{
71 module_put(p->me); 72 module_put(p->me);
72} 73}
@@ -76,10 +77,13 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put);
76static inline unsigned int 77static inline unsigned int
77hash_by_src(const struct nf_conntrack_tuple *tuple) 78hash_by_src(const struct nf_conntrack_tuple *tuple)
78{ 79{
80 unsigned int hash;
81
79 /* Original src, to ensure we map it consistently if poss. */ 82 /* Original src, to ensure we map it consistently if poss. */
80 return jhash_3words((__force u32)tuple->src.u3.ip, 83 hash = jhash_3words((__force u32)tuple->src.u3.ip,
81 (__force u32)tuple->src.u.all, 84 (__force u32)tuple->src.u.all,
82 tuple->dst.protonum, 0) % nf_nat_htable_size; 85 tuple->dst.protonum, 0);
86 return ((u64)hash * nf_nat_htable_size) >> 32;
83} 87}
84 88
85/* Is this tuple already taken? (not by us) */ 89/* Is this tuple already taken? (not by us) */
@@ -105,7 +109,7 @@ static int
105in_range(const struct nf_conntrack_tuple *tuple, 109in_range(const struct nf_conntrack_tuple *tuple,
106 const struct nf_nat_range *range) 110 const struct nf_nat_range *range)
107{ 111{
108 struct nf_nat_protocol *proto; 112 const struct nf_nat_protocol *proto;
109 int ret = 0; 113 int ret = 0;
110 114
111 /* If we are supposed to map IPs, then we must be in the 115 /* If we are supposed to map IPs, then we must be in the
@@ -210,12 +214,13 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple,
210 maxip = ntohl(range->max_ip); 214 maxip = ntohl(range->max_ip);
211 j = jhash_2words((__force u32)tuple->src.u3.ip, 215 j = jhash_2words((__force u32)tuple->src.u3.ip,
212 (__force u32)tuple->dst.u3.ip, 0); 216 (__force u32)tuple->dst.u3.ip, 0);
213 *var_ipp = htonl(minip + j % (maxip - minip + 1)); 217 j = ((u64)j * (maxip - minip + 1)) >> 32;
218 *var_ipp = htonl(minip + j);
214} 219}
215 220
216/* Manipulate the tuple into the range given. For NF_IP_POST_ROUTING, 221/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING,
217 * we change the source to map into the range. For NF_IP_PRE_ROUTING 222 * we change the source to map into the range. For NF_INET_PRE_ROUTING
218 * and NF_IP_LOCAL_OUT, we change the destination to map into the 223 * and NF_INET_LOCAL_OUT, we change the destination to map into the
219 * range. It might not be possible to get a unique tuple, but we try. 224 * range. It might not be possible to get a unique tuple, but we try.
220 * At worst (or if we race), we will end up with a final duplicate in 225 * At worst (or if we race), we will end up with a final duplicate in
221 * __ip_conntrack_confirm and drop the packet. */ 226 * __ip_conntrack_confirm and drop the packet. */
@@ -226,7 +231,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
226 struct nf_conn *ct, 231 struct nf_conn *ct,
227 enum nf_nat_manip_type maniptype) 232 enum nf_nat_manip_type maniptype)
228{ 233{
229 struct nf_nat_protocol *proto; 234 const struct nf_nat_protocol *proto;
230 235
231 /* 1) If this srcip/proto/src-proto-part is currently mapped, 236 /* 1) If this srcip/proto/src-proto-part is currently mapped,
232 and that same mapping gives a unique tuple within the given 237 and that same mapping gives a unique tuple within the given
@@ -276,12 +281,11 @@ out:
276unsigned int 281unsigned int
277nf_nat_setup_info(struct nf_conn *ct, 282nf_nat_setup_info(struct nf_conn *ct,
278 const struct nf_nat_range *range, 283 const struct nf_nat_range *range,
279 unsigned int hooknum) 284 enum nf_nat_manip_type maniptype)
280{ 285{
281 struct nf_conntrack_tuple curr_tuple, new_tuple; 286 struct nf_conntrack_tuple curr_tuple, new_tuple;
282 struct nf_conn_nat *nat; 287 struct nf_conn_nat *nat;
283 int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK); 288 int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);
284 enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
285 289
286 /* nat helper or nfctnetlink also setup binding */ 290 /* nat helper or nfctnetlink also setup binding */
287 nat = nfct_nat(ct); 291 nat = nfct_nat(ct);
@@ -293,10 +297,8 @@ nf_nat_setup_info(struct nf_conn *ct,
293 } 297 }
294 } 298 }
295 299
296 NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || 300 NF_CT_ASSERT(maniptype == IP_NAT_MANIP_SRC ||
297 hooknum == NF_IP_POST_ROUTING || 301 maniptype == IP_NAT_MANIP_DST);
298 hooknum == NF_IP_LOCAL_IN ||
299 hooknum == NF_IP_LOCAL_OUT);
300 BUG_ON(nf_nat_initialized(ct, maniptype)); 302 BUG_ON(nf_nat_initialized(ct, maniptype));
301 303
302 /* What we've got will look like inverse of reply. Normally 304 /* What we've got will look like inverse of reply. Normally
@@ -355,7 +357,7 @@ manip_pkt(u_int16_t proto,
355 enum nf_nat_manip_type maniptype) 357 enum nf_nat_manip_type maniptype)
356{ 358{
357 struct iphdr *iph; 359 struct iphdr *iph;
358 struct nf_nat_protocol *p; 360 const struct nf_nat_protocol *p;
359 361
360 if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) 362 if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
361 return 0; 363 return 0;
@@ -372,10 +374,10 @@ manip_pkt(u_int16_t proto,
372 iph = (void *)skb->data + iphdroff; 374 iph = (void *)skb->data + iphdroff;
373 375
374 if (maniptype == IP_NAT_MANIP_SRC) { 376 if (maniptype == IP_NAT_MANIP_SRC) {
375 nf_csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); 377 csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
376 iph->saddr = target->src.u3.ip; 378 iph->saddr = target->src.u3.ip;
377 } else { 379 } else {
378 nf_csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); 380 csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
379 iph->daddr = target->dst.u3.ip; 381 iph->daddr = target->dst.u3.ip;
380 } 382 }
381 return 1; 383 return 1;
@@ -515,7 +517,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
515EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); 517EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
516 518
517/* Protocol registration. */ 519/* Protocol registration. */
518int nf_nat_protocol_register(struct nf_nat_protocol *proto) 520int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
519{ 521{
520 int ret = 0; 522 int ret = 0;
521 523
@@ -532,7 +534,7 @@ int nf_nat_protocol_register(struct nf_nat_protocol *proto)
532EXPORT_SYMBOL(nf_nat_protocol_register); 534EXPORT_SYMBOL(nf_nat_protocol_register);
533 535
534/* Noone stores the protocol anywhere; simply delete it. */ 536/* Noone stores the protocol anywhere; simply delete it. */
535void nf_nat_protocol_unregister(struct nf_nat_protocol *proto) 537void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto)
536{ 538{
537 write_lock_bh(&nf_nat_lock); 539 write_lock_bh(&nf_nat_lock);
538 rcu_assign_pointer(nf_nat_protos[proto->protonum], 540 rcu_assign_pointer(nf_nat_protos[proto->protonum],
@@ -547,10 +549,8 @@ int
547nf_nat_port_range_to_nlattr(struct sk_buff *skb, 549nf_nat_port_range_to_nlattr(struct sk_buff *skb,
548 const struct nf_nat_range *range) 550 const struct nf_nat_range *range)
549{ 551{
550 NLA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16), 552 NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MIN, range->min.tcp.port);
551 &range->min.tcp.port); 553 NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MAX, range->max.tcp.port);
552 NLA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16),
553 &range->max.tcp.port);
554 554
555 return 0; 555 return 0;
556 556
@@ -568,8 +568,7 @@ nf_nat_port_nlattr_to_range(struct nlattr *tb[], struct nf_nat_range *range)
568 568
569 if (tb[CTA_PROTONAT_PORT_MIN]) { 569 if (tb[CTA_PROTONAT_PORT_MIN]) {
570 ret = 1; 570 ret = 1;
571 range->min.tcp.port = 571 range->min.tcp.port = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
572 *(__be16 *)nla_data(tb[CTA_PROTONAT_PORT_MIN]);
573 } 572 }
574 573
575 if (!tb[CTA_PROTONAT_PORT_MAX]) { 574 if (!tb[CTA_PROTONAT_PORT_MAX]) {
@@ -577,8 +576,7 @@ nf_nat_port_nlattr_to_range(struct nlattr *tb[], struct nf_nat_range *range)
577 range->max.tcp.port = range->min.tcp.port; 576 range->max.tcp.port = range->min.tcp.port;
578 } else { 577 } else {
579 ret = 1; 578 ret = 1;
580 range->max.tcp.port = 579 range->max.tcp.port = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
581 *(__be16 *)nla_data(tb[CTA_PROTONAT_PORT_MAX]);
582 } 580 }
583 581
584 return ret; 582 return ret;
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 93e18ef114f2..a121989fdad7 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -76,7 +76,7 @@ static int set_addr(struct sk_buff *skb,
76static int set_h225_addr(struct sk_buff *skb, 76static int set_h225_addr(struct sk_buff *skb,
77 unsigned char **data, int dataoff, 77 unsigned char **data, int dataoff,
78 TransportAddress *taddr, 78 TransportAddress *taddr,
79 union nf_conntrack_address *addr, __be16 port) 79 union nf_inet_addr *addr, __be16 port)
80{ 80{
81 return set_addr(skb, data, dataoff, taddr->ipAddress.ip, 81 return set_addr(skb, data, dataoff, taddr->ipAddress.ip,
82 addr->ip, port); 82 addr->ip, port);
@@ -86,7 +86,7 @@ static int set_h225_addr(struct sk_buff *skb,
86static int set_h245_addr(struct sk_buff *skb, 86static int set_h245_addr(struct sk_buff *skb,
87 unsigned char **data, int dataoff, 87 unsigned char **data, int dataoff,
88 H245_TransportAddress *taddr, 88 H245_TransportAddress *taddr,
89 union nf_conntrack_address *addr, __be16 port) 89 union nf_inet_addr *addr, __be16 port)
90{ 90{
91 return set_addr(skb, data, dataoff, 91 return set_addr(skb, data, dataoff,
92 taddr->unicastAddress.iPAddress.network, 92 taddr->unicastAddress.iPAddress.network,
@@ -103,7 +103,7 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
103 int dir = CTINFO2DIR(ctinfo); 103 int dir = CTINFO2DIR(ctinfo);
104 int i; 104 int i;
105 __be16 port; 105 __be16 port;
106 union nf_conntrack_address addr; 106 union nf_inet_addr addr;
107 107
108 for (i = 0; i < count; i++) { 108 for (i = 0; i < count; i++) {
109 if (get_h225_addr(ct, *data, &taddr[i], &addr, &port)) { 109 if (get_h225_addr(ct, *data, &taddr[i], &addr, &port)) {
@@ -155,7 +155,7 @@ static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct,
155 int dir = CTINFO2DIR(ctinfo); 155 int dir = CTINFO2DIR(ctinfo);
156 int i; 156 int i;
157 __be16 port; 157 __be16 port;
158 union nf_conntrack_address addr; 158 union nf_inet_addr addr;
159 159
160 for (i = 0; i < count; i++) { 160 for (i = 0; i < count; i++) {
161 if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) && 161 if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) &&
@@ -389,18 +389,14 @@ static void ip_nat_q931_expect(struct nf_conn *new,
389 /* Change src to where master sends to */ 389 /* Change src to where master sends to */
390 range.flags = IP_NAT_RANGE_MAP_IPS; 390 range.flags = IP_NAT_RANGE_MAP_IPS;
391 range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; 391 range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
392 392 nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC);
393 /* hook doesn't matter, but it has to do source manip */
394 nf_nat_setup_info(new, &range, NF_IP_POST_ROUTING);
395 393
396 /* For DST manip, map port here to where it's expected. */ 394 /* For DST manip, map port here to where it's expected. */
397 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); 395 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
398 range.min = range.max = this->saved_proto; 396 range.min = range.max = this->saved_proto;
399 range.min_ip = range.max_ip = 397 range.min_ip = range.max_ip =
400 new->master->tuplehash[!this->dir].tuple.src.u3.ip; 398 new->master->tuplehash[!this->dir].tuple.src.u3.ip;
401 399 nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST);
402 /* hook doesn't matter, but it has to do destination manip */
403 nf_nat_setup_info(new, &range, NF_IP_PRE_ROUTING);
404} 400}
405 401
406/****************************************************************************/ 402/****************************************************************************/
@@ -412,7 +408,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
412 struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; 408 struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
413 int dir = CTINFO2DIR(ctinfo); 409 int dir = CTINFO2DIR(ctinfo);
414 u_int16_t nated_port = ntohs(port); 410 u_int16_t nated_port = ntohs(port);
415 union nf_conntrack_address addr; 411 union nf_inet_addr addr;
416 412
417 /* Set expectations for NAT */ 413 /* Set expectations for NAT */
418 exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; 414 exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
@@ -479,17 +475,13 @@ static void ip_nat_callforwarding_expect(struct nf_conn *new,
479 /* Change src to where master sends to */ 475 /* Change src to where master sends to */
480 range.flags = IP_NAT_RANGE_MAP_IPS; 476 range.flags = IP_NAT_RANGE_MAP_IPS;
481 range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; 477 range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
482 478 nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC);
483 /* hook doesn't matter, but it has to do source manip */
484 nf_nat_setup_info(new, &range, NF_IP_POST_ROUTING);
485 479
486 /* For DST manip, map port here to where it's expected. */ 480 /* For DST manip, map port here to where it's expected. */
487 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); 481 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
488 range.min = range.max = this->saved_proto; 482 range.min = range.max = this->saved_proto;
489 range.min_ip = range.max_ip = this->saved_ip; 483 range.min_ip = range.max_ip = this->saved_ip;
490 484 nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST);
491 /* hook doesn't matter, but it has to do destination manip */
492 nf_nat_setup_info(new, &range, NF_IP_PRE_ROUTING);
493} 485}
494 486
495/****************************************************************************/ 487/****************************************************************************/
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 8718da00ef2a..4c0232842e75 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -20,6 +20,7 @@
20#include <linux/netfilter_ipv4.h> 20#include <linux/netfilter_ipv4.h>
21#include <net/netfilter/nf_conntrack.h> 21#include <net/netfilter/nf_conntrack.h>
22#include <net/netfilter/nf_conntrack_helper.h> 22#include <net/netfilter/nf_conntrack_helper.h>
23#include <net/netfilter/nf_conntrack_ecache.h>
23#include <net/netfilter/nf_conntrack_expect.h> 24#include <net/netfilter/nf_conntrack_expect.h>
24#include <net/netfilter/nf_nat.h> 25#include <net/netfilter/nf_nat.h>
25#include <net/netfilter/nf_nat_protocol.h> 26#include <net/netfilter/nf_nat_protocol.h>
@@ -180,8 +181,8 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb,
180 datalen, 0)); 181 datalen, 0));
181 } 182 }
182 } else 183 } else
183 nf_proto_csum_replace2(&tcph->check, skb, 184 inet_proto_csum_replace2(&tcph->check, skb,
184 htons(oldlen), htons(datalen), 1); 185 htons(oldlen), htons(datalen), 1);
185 186
186 if (rep_len != match_len) { 187 if (rep_len != match_len) {
187 set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); 188 set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
@@ -191,6 +192,8 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb,
191 /* Tell TCP window tracking about seq change */ 192 /* Tell TCP window tracking about seq change */
192 nf_conntrack_tcp_update(skb, ip_hdrlen(skb), 193 nf_conntrack_tcp_update(skb, ip_hdrlen(skb),
193 ct, CTINFO2DIR(ctinfo)); 194 ct, CTINFO2DIR(ctinfo));
195
196 nf_conntrack_event_cache(IPCT_NATSEQADJ, skb);
194 } 197 }
195 return 1; 198 return 1;
196} 199}
@@ -270,8 +273,8 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
270 udph->check = CSUM_MANGLED_0; 273 udph->check = CSUM_MANGLED_0;
271 } 274 }
272 } else 275 } else
273 nf_proto_csum_replace2(&udph->check, skb, 276 inet_proto_csum_replace2(&udph->check, skb,
274 htons(oldlen), htons(datalen), 1); 277 htons(oldlen), htons(datalen), 1);
275 278
276 return 1; 279 return 1;
277} 280}
@@ -310,10 +313,10 @@ sack_adjust(struct sk_buff *skb,
310 ntohl(sack->start_seq), new_start_seq, 313 ntohl(sack->start_seq), new_start_seq,
311 ntohl(sack->end_seq), new_end_seq); 314 ntohl(sack->end_seq), new_end_seq);
312 315
313 nf_proto_csum_replace4(&tcph->check, skb, 316 inet_proto_csum_replace4(&tcph->check, skb,
314 sack->start_seq, new_start_seq, 0); 317 sack->start_seq, new_start_seq, 0);
315 nf_proto_csum_replace4(&tcph->check, skb, 318 inet_proto_csum_replace4(&tcph->check, skb,
316 sack->end_seq, new_end_seq, 0); 319 sack->end_seq, new_end_seq, 0);
317 sack->start_seq = new_start_seq; 320 sack->start_seq = new_start_seq;
318 sack->end_seq = new_end_seq; 321 sack->end_seq = new_end_seq;
319 sackoff += sizeof(*sack); 322 sackoff += sizeof(*sack);
@@ -397,8 +400,8 @@ nf_nat_seq_adjust(struct sk_buff *skb,
397 else 400 else
398 newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before); 401 newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before);
399 402
400 nf_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); 403 inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
401 nf_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); 404 inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
402 405
403 pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", 406 pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
404 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), 407 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
@@ -430,15 +433,13 @@ void nf_nat_follow_master(struct nf_conn *ct,
430 range.flags = IP_NAT_RANGE_MAP_IPS; 433 range.flags = IP_NAT_RANGE_MAP_IPS;
431 range.min_ip = range.max_ip 434 range.min_ip = range.max_ip
432 = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; 435 = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
433 /* hook doesn't matter, but it has to do source manip */ 436 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
434 nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
435 437
436 /* For DST manip, map port here to where it's expected. */ 438 /* For DST manip, map port here to where it's expected. */
437 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); 439 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
438 range.min = range.max = exp->saved_proto; 440 range.min = range.max = exp->saved_proto;
439 range.min_ip = range.max_ip 441 range.min_ip = range.max_ip
440 = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; 442 = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
441 /* hook doesn't matter, but it has to do destination manip */ 443 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
442 nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
443} 444}
444EXPORT_SYMBOL(nf_nat_follow_master); 445EXPORT_SYMBOL(nf_nat_follow_master);
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 6817e7995f35..e63b944a2ebb 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -93,8 +93,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
93 range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED; 93 range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
94 range.min = range.max = exp->saved_proto; 94 range.min = range.max = exp->saved_proto;
95 } 95 }
96 /* hook doesn't matter, but it has to do source manip */ 96 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
97 nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
98 97
99 /* For DST manip, map port here to where it's expected. */ 98 /* For DST manip, map port here to where it's expected. */
100 range.flags = IP_NAT_RANGE_MAP_IPS; 99 range.flags = IP_NAT_RANGE_MAP_IPS;
@@ -104,8 +103,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
104 range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED; 103 range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
105 range.min = range.max = exp->saved_proto; 104 range.min = range.max = exp->saved_proto;
106 } 105 }
107 /* hook doesn't matter, but it has to do destination manip */ 106 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
108 nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
109} 107}
110 108
111/* outbound packets == from PNS to PAC */ 109/* outbound packets == from PNS to PAC */
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index b820f9960356..9fa272e73113 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -135,9 +135,10 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
135 return 1; 135 return 1;
136} 136}
137 137
138static struct nf_nat_protocol gre __read_mostly = { 138static const struct nf_nat_protocol gre = {
139 .name = "GRE", 139 .name = "GRE",
140 .protonum = IPPROTO_GRE, 140 .protonum = IPPROTO_GRE,
141 .me = THIS_MODULE,
141 .manip_pkt = gre_manip_pkt, 142 .manip_pkt = gre_manip_pkt,
142 .in_range = gre_in_range, 143 .in_range = gre_in_range,
143 .unique_tuple = gre_unique_tuple, 144 .unique_tuple = gre_unique_tuple,
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index b9fc724388fc..a0e44c953cb6 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -65,13 +65,13 @@ icmp_manip_pkt(struct sk_buff *skb,
65 return 0; 65 return 0;
66 66
67 hdr = (struct icmphdr *)(skb->data + hdroff); 67 hdr = (struct icmphdr *)(skb->data + hdroff);
68 nf_proto_csum_replace2(&hdr->checksum, skb, 68 inet_proto_csum_replace2(&hdr->checksum, skb,
69 hdr->un.echo.id, tuple->src.u.icmp.id, 0); 69 hdr->un.echo.id, tuple->src.u.icmp.id, 0);
70 hdr->un.echo.id = tuple->src.u.icmp.id; 70 hdr->un.echo.id = tuple->src.u.icmp.id;
71 return 1; 71 return 1;
72} 72}
73 73
74struct nf_nat_protocol nf_nat_protocol_icmp = { 74const struct nf_nat_protocol nf_nat_protocol_icmp = {
75 .name = "ICMP", 75 .name = "ICMP",
76 .protonum = IPPROTO_ICMP, 76 .protonum = IPPROTO_ICMP,
77 .me = THIS_MODULE, 77 .me = THIS_MODULE,
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index 6bab2e184455..da23e9fbe679 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -132,12 +132,12 @@ tcp_manip_pkt(struct sk_buff *skb,
132 if (hdrsize < sizeof(*hdr)) 132 if (hdrsize < sizeof(*hdr))
133 return 1; 133 return 1;
134 134
135 nf_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); 135 inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
136 nf_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0); 136 inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
137 return 1; 137 return 1;
138} 138}
139 139
140struct nf_nat_protocol nf_nat_protocol_tcp = { 140const struct nf_nat_protocol nf_nat_protocol_tcp = {
141 .name = "TCP", 141 .name = "TCP",
142 .protonum = IPPROTO_TCP, 142 .protonum = IPPROTO_TCP,
143 .me = THIS_MODULE, 143 .me = THIS_MODULE,
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index cbf1a61e2908..10df4db078af 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -117,9 +117,9 @@ udp_manip_pkt(struct sk_buff *skb,
117 portptr = &hdr->dest; 117 portptr = &hdr->dest;
118 } 118 }
119 if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) { 119 if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) {
120 nf_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); 120 inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
121 nf_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 121 inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
122 0); 122 0);
123 if (!hdr->check) 123 if (!hdr->check)
124 hdr->check = CSUM_MANGLED_0; 124 hdr->check = CSUM_MANGLED_0;
125 } 125 }
@@ -127,7 +127,7 @@ udp_manip_pkt(struct sk_buff *skb,
127 return 1; 127 return 1;
128} 128}
129 129
130struct nf_nat_protocol nf_nat_protocol_udp = { 130const struct nf_nat_protocol nf_nat_protocol_udp = {
131 .name = "UDP", 131 .name = "UDP",
132 .protonum = IPPROTO_UDP, 132 .protonum = IPPROTO_UDP,
133 .me = THIS_MODULE, 133 .me = THIS_MODULE,
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
index cfd2742e9706..a26efeb073cb 100644
--- a/net/ipv4/netfilter/nf_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -45,7 +45,7 @@ unknown_manip_pkt(struct sk_buff *skb,
45 return 1; 45 return 1;
46} 46}
47 47
48struct nf_nat_protocol nf_nat_unknown_protocol = { 48const struct nf_nat_protocol nf_nat_unknown_protocol = {
49 .name = "unknown", 49 .name = "unknown",
50 /* .me isn't set: getting a ref to this cannot fail. */ 50 /* .me isn't set: getting a ref to this cannot fail. */
51 .manip_pkt = unknown_manip_pkt, 51 .manip_pkt = unknown_manip_pkt,
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 46b25ab5f78b..519182269e76 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -24,7 +24,9 @@
24#include <net/netfilter/nf_nat_core.h> 24#include <net/netfilter/nf_nat_core.h>
25#include <net/netfilter/nf_nat_rule.h> 25#include <net/netfilter/nf_nat_rule.h>
26 26
27#define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT)) 27#define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
28 (1 << NF_INET_POST_ROUTING) | \
29 (1 << NF_INET_LOCAL_OUT))
28 30
29static struct 31static struct
30{ 32{
@@ -38,14 +40,14 @@ static struct
38 .num_entries = 4, 40 .num_entries = 4,
39 .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), 41 .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
40 .hook_entry = { 42 .hook_entry = {
41 [NF_IP_PRE_ROUTING] = 0, 43 [NF_INET_PRE_ROUTING] = 0,
42 [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard), 44 [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard),
43 [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 45 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2
44 }, 46 },
45 .underflow = { 47 .underflow = {
46 [NF_IP_PRE_ROUTING] = 0, 48 [NF_INET_PRE_ROUTING] = 0,
47 [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard), 49 [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard),
48 [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 50 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2
49 }, 51 },
50 }, 52 },
51 .entries = { 53 .entries = {
@@ -76,7 +78,7 @@ static unsigned int ipt_snat_target(struct sk_buff *skb,
76 enum ip_conntrack_info ctinfo; 78 enum ip_conntrack_info ctinfo;
77 const struct nf_nat_multi_range_compat *mr = targinfo; 79 const struct nf_nat_multi_range_compat *mr = targinfo;
78 80
79 NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING); 81 NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING);
80 82
81 ct = nf_ct_get(skb, &ctinfo); 83 ct = nf_ct_get(skb, &ctinfo);
82 84
@@ -85,7 +87,7 @@ static unsigned int ipt_snat_target(struct sk_buff *skb,
85 ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); 87 ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
86 NF_CT_ASSERT(out); 88 NF_CT_ASSERT(out);
87 89
88 return nf_nat_setup_info(ct, &mr->range[0], hooknum); 90 return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC);
89} 91}
90 92
91/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */ 93/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
@@ -95,7 +97,7 @@ static void warn_if_extra_mangle(__be32 dstip, __be32 srcip)
95 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } }; 97 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
96 struct rtable *rt; 98 struct rtable *rt;
97 99
98 if (ip_route_output_key(&rt, &fl) != 0) 100 if (ip_route_output_key(&init_net, &rt, &fl) != 0)
99 return; 101 return;
100 102
101 if (rt->rt_src != srcip && !warned) { 103 if (rt->rt_src != srcip && !warned) {
@@ -118,20 +120,20 @@ static unsigned int ipt_dnat_target(struct sk_buff *skb,
118 enum ip_conntrack_info ctinfo; 120 enum ip_conntrack_info ctinfo;
119 const struct nf_nat_multi_range_compat *mr = targinfo; 121 const struct nf_nat_multi_range_compat *mr = targinfo;
120 122
121 NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || 123 NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING ||
122 hooknum == NF_IP_LOCAL_OUT); 124 hooknum == NF_INET_LOCAL_OUT);
123 125
124 ct = nf_ct_get(skb, &ctinfo); 126 ct = nf_ct_get(skb, &ctinfo);
125 127
126 /* Connection must be valid and new. */ 128 /* Connection must be valid and new. */
127 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); 129 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
128 130
129 if (hooknum == NF_IP_LOCAL_OUT && 131 if (hooknum == NF_INET_LOCAL_OUT &&
130 mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) 132 mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
131 warn_if_extra_mangle(ip_hdr(skb)->daddr, 133 warn_if_extra_mangle(ip_hdr(skb)->daddr,
132 mr->range[0].min_ip); 134 mr->range[0].min_ip);
133 135
134 return nf_nat_setup_info(ct, &mr->range[0], hooknum); 136 return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST);
135} 137}
136 138
137static bool ipt_snat_checkentry(const char *tablename, 139static bool ipt_snat_checkentry(const char *tablename,
@@ -182,7 +184,7 @@ alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
182 184
183 pr_debug("Allocating NULL binding for %p (%u.%u.%u.%u)\n", 185 pr_debug("Allocating NULL binding for %p (%u.%u.%u.%u)\n",
184 ct, NIPQUAD(ip)); 186 ct, NIPQUAD(ip));
185 return nf_nat_setup_info(ct, &range, hooknum); 187 return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
186} 188}
187 189
188unsigned int 190unsigned int
@@ -201,7 +203,7 @@ alloc_null_binding_confirmed(struct nf_conn *ct, unsigned int hooknum)
201 203
202 pr_debug("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n", 204 pr_debug("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n",
203 ct, NIPQUAD(ip)); 205 ct, NIPQUAD(ip));
204 return nf_nat_setup_info(ct, &range, hooknum); 206 return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
205} 207}
206 208
207int nf_nat_rule_find(struct sk_buff *skb, 209int nf_nat_rule_find(struct sk_buff *skb,
@@ -227,7 +229,7 @@ static struct xt_target ipt_snat_reg __read_mostly = {
227 .target = ipt_snat_target, 229 .target = ipt_snat_target,
228 .targetsize = sizeof(struct nf_nat_multi_range_compat), 230 .targetsize = sizeof(struct nf_nat_multi_range_compat),
229 .table = "nat", 231 .table = "nat",
230 .hooks = 1 << NF_IP_POST_ROUTING, 232 .hooks = 1 << NF_INET_POST_ROUTING,
231 .checkentry = ipt_snat_checkentry, 233 .checkentry = ipt_snat_checkentry,
232 .family = AF_INET, 234 .family = AF_INET,
233}; 235};
@@ -237,7 +239,7 @@ static struct xt_target ipt_dnat_reg __read_mostly = {
237 .target = ipt_dnat_target, 239 .target = ipt_dnat_target,
238 .targetsize = sizeof(struct nf_nat_multi_range_compat), 240 .targetsize = sizeof(struct nf_nat_multi_range_compat),
239 .table = "nat", 241 .table = "nat",
240 .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT), 242 .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
241 .checkentry = ipt_dnat_checkentry, 243 .checkentry = ipt_dnat_checkentry,
242 .family = AF_INET, 244 .family = AF_INET,
243}; 245};
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 8996ccb757db..606a170bf4ca 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -228,15 +228,13 @@ static void ip_nat_sdp_expect(struct nf_conn *ct,
228 range.flags = IP_NAT_RANGE_MAP_IPS; 228 range.flags = IP_NAT_RANGE_MAP_IPS;
229 range.min_ip = range.max_ip 229 range.min_ip = range.max_ip
230 = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; 230 = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
231 /* hook doesn't matter, but it has to do source manip */ 231 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
232 nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
233 232
234 /* For DST manip, map port here to where it's expected. */ 233 /* For DST manip, map port here to where it's expected. */
235 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); 234 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
236 range.min = range.max = exp->saved_proto; 235 range.min = range.max = exp->saved_proto;
237 range.min_ip = range.max_ip = exp->saved_ip; 236 range.min_ip = range.max_ip = exp->saved_ip;
238 /* hook doesn't matter, but it has to do destination manip */ 237 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
239 nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
240} 238}
241 239
242/* So, this packet has hit the connection tracking matching code. 240/* So, this packet has hit the connection tracking matching code.
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 03709d6b4b06..07f2a49926d4 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -60,7 +60,7 @@ MODULE_ALIAS("ip_nat_snmp_basic");
60 60
61#define SNMP_PORT 161 61#define SNMP_PORT 161
62#define SNMP_TRAP_PORT 162 62#define SNMP_TRAP_PORT 162
63#define NOCT1(n) (*(u8 *)n) 63#define NOCT1(n) (*(u8 *)(n))
64 64
65static int debug; 65static int debug;
66static DEFINE_SPINLOCK(snmp_lock); 66static DEFINE_SPINLOCK(snmp_lock);
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 7db76ea9af91..99b2c788d5a8 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -137,7 +137,7 @@ nf_nat_fn(unsigned int hooknum,
137 if (unlikely(nf_ct_is_confirmed(ct))) 137 if (unlikely(nf_ct_is_confirmed(ct)))
138 /* NAT module was loaded late */ 138 /* NAT module was loaded late */
139 ret = alloc_null_binding_confirmed(ct, hooknum); 139 ret = alloc_null_binding_confirmed(ct, hooknum);
140 else if (hooknum == NF_IP_LOCAL_IN) 140 else if (hooknum == NF_INET_LOCAL_IN)
141 /* LOCAL_IN hook doesn't have a chain! */ 141 /* LOCAL_IN hook doesn't have a chain! */
142 ret = alloc_null_binding(ct, hooknum); 142 ret = alloc_null_binding(ct, hooknum);
143 else 143 else
@@ -273,13 +273,13 @@ nf_nat_adjust(unsigned int hooknum,
273 273
274/* We must be after connection tracking and before packet filtering. */ 274/* We must be after connection tracking and before packet filtering. */
275 275
276static struct nf_hook_ops nf_nat_ops[] = { 276static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
277 /* Before packet filtering, change destination */ 277 /* Before packet filtering, change destination */
278 { 278 {
279 .hook = nf_nat_in, 279 .hook = nf_nat_in,
280 .owner = THIS_MODULE, 280 .owner = THIS_MODULE,
281 .pf = PF_INET, 281 .pf = PF_INET,
282 .hooknum = NF_IP_PRE_ROUTING, 282 .hooknum = NF_INET_PRE_ROUTING,
283 .priority = NF_IP_PRI_NAT_DST, 283 .priority = NF_IP_PRI_NAT_DST,
284 }, 284 },
285 /* After packet filtering, change source */ 285 /* After packet filtering, change source */
@@ -287,7 +287,7 @@ static struct nf_hook_ops nf_nat_ops[] = {
287 .hook = nf_nat_out, 287 .hook = nf_nat_out,
288 .owner = THIS_MODULE, 288 .owner = THIS_MODULE,
289 .pf = PF_INET, 289 .pf = PF_INET,
290 .hooknum = NF_IP_POST_ROUTING, 290 .hooknum = NF_INET_POST_ROUTING,
291 .priority = NF_IP_PRI_NAT_SRC, 291 .priority = NF_IP_PRI_NAT_SRC,
292 }, 292 },
293 /* After conntrack, adjust sequence number */ 293 /* After conntrack, adjust sequence number */
@@ -295,7 +295,7 @@ static struct nf_hook_ops nf_nat_ops[] = {
295 .hook = nf_nat_adjust, 295 .hook = nf_nat_adjust,
296 .owner = THIS_MODULE, 296 .owner = THIS_MODULE,
297 .pf = PF_INET, 297 .pf = PF_INET,
298 .hooknum = NF_IP_POST_ROUTING, 298 .hooknum = NF_INET_POST_ROUTING,
299 .priority = NF_IP_PRI_NAT_SEQ_ADJUST, 299 .priority = NF_IP_PRI_NAT_SEQ_ADJUST,
300 }, 300 },
301 /* Before packet filtering, change destination */ 301 /* Before packet filtering, change destination */
@@ -303,7 +303,7 @@ static struct nf_hook_ops nf_nat_ops[] = {
303 .hook = nf_nat_local_fn, 303 .hook = nf_nat_local_fn,
304 .owner = THIS_MODULE, 304 .owner = THIS_MODULE,
305 .pf = PF_INET, 305 .pf = PF_INET,
306 .hooknum = NF_IP_LOCAL_OUT, 306 .hooknum = NF_INET_LOCAL_OUT,
307 .priority = NF_IP_PRI_NAT_DST, 307 .priority = NF_IP_PRI_NAT_DST,
308 }, 308 },
309 /* After packet filtering, change source */ 309 /* After packet filtering, change source */
@@ -311,7 +311,7 @@ static struct nf_hook_ops nf_nat_ops[] = {
311 .hook = nf_nat_fn, 311 .hook = nf_nat_fn,
312 .owner = THIS_MODULE, 312 .owner = THIS_MODULE,
313 .pf = PF_INET, 313 .pf = PF_INET,
314 .hooknum = NF_IP_LOCAL_IN, 314 .hooknum = NF_INET_LOCAL_IN,
315 .priority = NF_IP_PRI_NAT_SRC, 315 .priority = NF_IP_PRI_NAT_SRC,
316 }, 316 },
317 /* After conntrack, adjust sequence number */ 317 /* After conntrack, adjust sequence number */
@@ -319,7 +319,7 @@ static struct nf_hook_ops nf_nat_ops[] = {
319 .hook = nf_nat_adjust, 319 .hook = nf_nat_adjust,
320 .owner = THIS_MODULE, 320 .owner = THIS_MODULE,
321 .pf = PF_INET, 321 .pf = PF_INET,
322 .hooknum = NF_IP_LOCAL_IN, 322 .hooknum = NF_INET_LOCAL_IN,
323 .priority = NF_IP_PRI_NAT_SEQ_ADJUST, 323 .priority = NF_IP_PRI_NAT_SEQ_ADJUST,
324 }, 324 },
325}; 325};
@@ -332,7 +332,7 @@ static int __init nf_nat_standalone_init(void)
332 332
333#ifdef CONFIG_XFRM 333#ifdef CONFIG_XFRM
334 BUG_ON(ip_nat_decode_session != NULL); 334 BUG_ON(ip_nat_decode_session != NULL);
335 ip_nat_decode_session = nat_decode_session; 335 rcu_assign_pointer(ip_nat_decode_session, nat_decode_session);
336#endif 336#endif
337 ret = nf_nat_rule_init(); 337 ret = nf_nat_rule_init();
338 if (ret < 0) { 338 if (ret < 0) {
@@ -350,7 +350,7 @@ static int __init nf_nat_standalone_init(void)
350 nf_nat_rule_cleanup(); 350 nf_nat_rule_cleanup();
351 cleanup_decode_session: 351 cleanup_decode_session:
352#ifdef CONFIG_XFRM 352#ifdef CONFIG_XFRM
353 ip_nat_decode_session = NULL; 353 rcu_assign_pointer(ip_nat_decode_session, NULL);
354 synchronize_net(); 354 synchronize_net();
355#endif 355#endif
356 return ret; 356 return ret;
@@ -361,7 +361,7 @@ static void __exit nf_nat_standalone_fini(void)
361 nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); 361 nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
362 nf_nat_rule_cleanup(); 362 nf_nat_rule_cleanup();
363#ifdef CONFIG_XFRM 363#ifdef CONFIG_XFRM
364 ip_nat_decode_session = NULL; 364 rcu_assign_pointer(ip_nat_decode_session, NULL);
365 synchronize_net(); 365 synchronize_net();
366#endif 366#endif
367 /* Conntrack caches are unregistered in nf_conntrack_cleanup */ 367 /* Conntrack caches are unregistered in nf_conntrack_cleanup */
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ce34b281803f..d63474c6b400 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -53,14 +53,16 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
53{ 53{
54 socket_seq_show(seq); 54 socket_seq_show(seq);
55 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", 55 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
56 sock_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), 56 sock_prot_inuse_get(&tcp_prot),
57 atomic_read(&tcp_orphan_count),
57 tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), 58 tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated),
58 atomic_read(&tcp_memory_allocated)); 59 atomic_read(&tcp_memory_allocated));
59 seq_printf(seq, "UDP: inuse %d\n", sock_prot_inuse(&udp_prot)); 60 seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(&udp_prot),
60 seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse(&udplite_prot)); 61 atomic_read(&udp_memory_allocated));
61 seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse(&raw_prot)); 62 seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot));
63 seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot));
62 seq_printf(seq, "FRAG: inuse %d memory %d\n", 64 seq_printf(seq, "FRAG: inuse %d memory %d\n",
63 ip_frag_nqueues(), ip_frag_mem()); 65 ip_frag_nqueues(&init_net), ip_frag_mem(&init_net));
64 return 0; 66 return 0;
65} 67}
66 68
@@ -309,7 +311,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
309 seq_printf(seq, " %s", snmp4_ipstats_list[i].name); 311 seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
310 312
311 seq_printf(seq, "\nIp: %d %d", 313 seq_printf(seq, "\nIp: %d %d",
312 IPV4_DEVCONF_ALL(FORWARDING) ? 1 : 2, sysctl_ip_default_ttl); 314 IPV4_DEVCONF_ALL(&init_net, FORWARDING) ? 1 : 2,
315 sysctl_ip_default_ttl);
313 316
314 for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) 317 for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
315 seq_printf(seq, " %lu", 318 seq_printf(seq, " %lu",
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index e7050f8eabeb..85c08696abbe 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -80,38 +80,51 @@
80#include <linux/netfilter.h> 80#include <linux/netfilter.h>
81#include <linux/netfilter_ipv4.h> 81#include <linux/netfilter_ipv4.h>
82 82
83struct hlist_head raw_v4_htable[RAWV4_HTABLE_SIZE]; 83static struct raw_hashinfo raw_v4_hashinfo = {
84DEFINE_RWLOCK(raw_v4_lock); 84 .lock = __RW_LOCK_UNLOCKED(),
85};
85 86
86static void raw_v4_hash(struct sock *sk) 87void raw_hash_sk(struct sock *sk, struct raw_hashinfo *h)
87{ 88{
88 struct hlist_head *head = &raw_v4_htable[inet_sk(sk)->num & 89 struct hlist_head *head;
89 (RAWV4_HTABLE_SIZE - 1)]; 90
91 head = &h->ht[inet_sk(sk)->num & (RAW_HTABLE_SIZE - 1)];
90 92
91 write_lock_bh(&raw_v4_lock); 93 write_lock_bh(&h->lock);
92 sk_add_node(sk, head); 94 sk_add_node(sk, head);
93 sock_prot_inc_use(sk->sk_prot); 95 sock_prot_inuse_add(sk->sk_prot, 1);
94 write_unlock_bh(&raw_v4_lock); 96 write_unlock_bh(&h->lock);
95} 97}
98EXPORT_SYMBOL_GPL(raw_hash_sk);
96 99
97static void raw_v4_unhash(struct sock *sk) 100void raw_unhash_sk(struct sock *sk, struct raw_hashinfo *h)
98{ 101{
99 write_lock_bh(&raw_v4_lock); 102 write_lock_bh(&h->lock);
100 if (sk_del_node_init(sk)) 103 if (sk_del_node_init(sk))
101 sock_prot_dec_use(sk->sk_prot); 104 sock_prot_inuse_add(sk->sk_prot, -1);
102 write_unlock_bh(&raw_v4_lock); 105 write_unlock_bh(&h->lock);
106}
107EXPORT_SYMBOL_GPL(raw_unhash_sk);
108
109static void raw_v4_hash(struct sock *sk)
110{
111 raw_hash_sk(sk, &raw_v4_hashinfo);
103} 112}
104 113
105struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, 114static void raw_v4_unhash(struct sock *sk)
106 __be32 raddr, __be32 laddr, 115{
107 int dif) 116 raw_unhash_sk(sk, &raw_v4_hashinfo);
117}
118
119static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
120 unsigned short num, __be32 raddr, __be32 laddr, int dif)
108{ 121{
109 struct hlist_node *node; 122 struct hlist_node *node;
110 123
111 sk_for_each_from(sk, node) { 124 sk_for_each_from(sk, node) {
112 struct inet_sock *inet = inet_sk(sk); 125 struct inet_sock *inet = inet_sk(sk);
113 126
114 if (inet->num == num && 127 if (sk->sk_net == net && inet->num == num &&
115 !(inet->daddr && inet->daddr != raddr) && 128 !(inet->daddr && inet->daddr != raddr) &&
116 !(inet->rcv_saddr && inet->rcv_saddr != laddr) && 129 !(inet->rcv_saddr && inet->rcv_saddr != laddr) &&
117 !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) 130 !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
@@ -150,17 +163,20 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
150 * RFC 1122: SHOULD pass TOS value up to the transport layer. 163 * RFC 1122: SHOULD pass TOS value up to the transport layer.
151 * -> It does. And not only TOS, but all IP header. 164 * -> It does. And not only TOS, but all IP header.
152 */ 165 */
153int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) 166static int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
154{ 167{
155 struct sock *sk; 168 struct sock *sk;
156 struct hlist_head *head; 169 struct hlist_head *head;
157 int delivered = 0; 170 int delivered = 0;
171 struct net *net;
158 172
159 read_lock(&raw_v4_lock); 173 read_lock(&raw_v4_hashinfo.lock);
160 head = &raw_v4_htable[hash]; 174 head = &raw_v4_hashinfo.ht[hash];
161 if (hlist_empty(head)) 175 if (hlist_empty(head))
162 goto out; 176 goto out;
163 sk = __raw_v4_lookup(__sk_head(head), iph->protocol, 177
178 net = skb->dev->nd_net;
179 sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol,
164 iph->saddr, iph->daddr, 180 iph->saddr, iph->daddr,
165 skb->dev->ifindex); 181 skb->dev->ifindex);
166 182
@@ -173,16 +189,34 @@ int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
173 if (clone) 189 if (clone)
174 raw_rcv(sk, clone); 190 raw_rcv(sk, clone);
175 } 191 }
176 sk = __raw_v4_lookup(sk_next(sk), iph->protocol, 192 sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol,
177 iph->saddr, iph->daddr, 193 iph->saddr, iph->daddr,
178 skb->dev->ifindex); 194 skb->dev->ifindex);
179 } 195 }
180out: 196out:
181 read_unlock(&raw_v4_lock); 197 read_unlock(&raw_v4_hashinfo.lock);
182 return delivered; 198 return delivered;
183} 199}
184 200
185void raw_err (struct sock *sk, struct sk_buff *skb, u32 info) 201int raw_local_deliver(struct sk_buff *skb, int protocol)
202{
203 int hash;
204 struct sock *raw_sk;
205
206 hash = protocol & (RAW_HTABLE_SIZE - 1);
207 raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
208
209 /* If there maybe a raw socket we must check - if not we
210 * don't care less
211 */
212 if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash))
213 raw_sk = NULL;
214
215 return raw_sk != NULL;
216
217}
218
219static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
186{ 220{
187 struct inet_sock *inet = inet_sk(sk); 221 struct inet_sock *inet = inet_sk(sk);
188 const int type = icmp_hdr(skb)->type; 222 const int type = icmp_hdr(skb)->type;
@@ -236,12 +270,38 @@ void raw_err (struct sock *sk, struct sk_buff *skb, u32 info)
236 } 270 }
237} 271}
238 272
273void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
274{
275 int hash;
276 struct sock *raw_sk;
277 struct iphdr *iph;
278 struct net *net;
279
280 hash = protocol & (RAW_HTABLE_SIZE - 1);
281
282 read_lock(&raw_v4_hashinfo.lock);
283 raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
284 if (raw_sk != NULL) {
285 iph = (struct iphdr *)skb->data;
286 net = skb->dev->nd_net;
287
288 while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol,
289 iph->daddr, iph->saddr,
290 skb->dev->ifindex)) != NULL) {
291 raw_err(raw_sk, skb, info);
292 raw_sk = sk_next(raw_sk);
293 iph = (struct iphdr *)skb->data;
294 }
295 }
296 read_unlock(&raw_v4_hashinfo.lock);
297}
298
239static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb) 299static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
240{ 300{
241 /* Charge it to the socket. */ 301 /* Charge it to the socket. */
242 302
243 if (sock_queue_rcv_skb(sk, skb) < 0) { 303 if (sock_queue_rcv_skb(sk, skb) < 0) {
244 /* FIXME: increment a raw drops counter here */ 304 atomic_inc(&sk->sk_drops);
245 kfree_skb(skb); 305 kfree_skb(skb);
246 return NET_RX_DROP; 306 return NET_RX_DROP;
247 } 307 }
@@ -252,6 +312,7 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
252int raw_rcv(struct sock *sk, struct sk_buff *skb) 312int raw_rcv(struct sock *sk, struct sk_buff *skb)
253{ 313{
254 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { 314 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
315 atomic_inc(&sk->sk_drops);
255 kfree_skb(skb); 316 kfree_skb(skb);
256 return NET_RX_DROP; 317 return NET_RX_DROP;
257 } 318 }
@@ -320,7 +381,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
320 icmp_out_count(((struct icmphdr *) 381 icmp_out_count(((struct icmphdr *)
321 skb_transport_header(skb))->type); 382 skb_transport_header(skb))->type);
322 383
323 err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, 384 err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
324 dst_output); 385 dst_output);
325 if (err > 0) 386 if (err > 0)
326 err = inet->recverr ? net_xmit_errno(err) : 0; 387 err = inet->recverr ? net_xmit_errno(err) : 0;
@@ -474,7 +535,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
474 if (msg->msg_flags & MSG_DONTROUTE) 535 if (msg->msg_flags & MSG_DONTROUTE)
475 tos |= RTO_ONLINK; 536 tos |= RTO_ONLINK;
476 537
477 if (MULTICAST(daddr)) { 538 if (ipv4_is_multicast(daddr)) {
478 if (!ipc.oif) 539 if (!ipc.oif)
479 ipc.oif = inet->mc_index; 540 ipc.oif = inet->mc_index;
480 if (!saddr) 541 if (!saddr)
@@ -497,7 +558,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
497 } 558 }
498 559
499 security_sk_classify_flow(sk, &fl); 560 security_sk_classify_flow(sk, &fl);
500 err = ip_route_output_flow(&rt, &fl, sk, 1); 561 err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1);
501 } 562 }
502 if (err) 563 if (err)
503 goto done; 564 goto done;
@@ -564,7 +625,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
564 625
565 if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) 626 if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
566 goto out; 627 goto out;
567 chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr); 628 chk_addr_ret = inet_addr_type(sk->sk_net, addr->sin_addr.s_addr);
568 ret = -EADDRNOTAVAIL; 629 ret = -EADDRNOTAVAIL;
569 if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL && 630 if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
570 chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) 631 chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
@@ -789,22 +850,18 @@ struct proto raw_prot = {
789}; 850};
790 851
791#ifdef CONFIG_PROC_FS 852#ifdef CONFIG_PROC_FS
792struct raw_iter_state {
793 int bucket;
794};
795
796#define raw_seq_private(seq) ((struct raw_iter_state *)(seq)->private)
797
798static struct sock *raw_get_first(struct seq_file *seq) 853static struct sock *raw_get_first(struct seq_file *seq)
799{ 854{
800 struct sock *sk; 855 struct sock *sk;
801 struct raw_iter_state* state = raw_seq_private(seq); 856 struct raw_iter_state* state = raw_seq_private(seq);
802 857
803 for (state->bucket = 0; state->bucket < RAWV4_HTABLE_SIZE; ++state->bucket) { 858 for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE;
859 ++state->bucket) {
804 struct hlist_node *node; 860 struct hlist_node *node;
805 861
806 sk_for_each(sk, node, &raw_v4_htable[state->bucket]) 862 sk_for_each(sk, node, &state->h->ht[state->bucket])
807 if (sk->sk_family == PF_INET) 863 if (sk->sk_net == state->p.net &&
864 sk->sk_family == state->family)
808 goto found; 865 goto found;
809 } 866 }
810 sk = NULL; 867 sk = NULL;
@@ -820,10 +877,11 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk)
820 sk = sk_next(sk); 877 sk = sk_next(sk);
821try_again: 878try_again:
822 ; 879 ;
823 } while (sk && sk->sk_family != PF_INET); 880 } while (sk && sk->sk_net != state->p.net &&
881 sk->sk_family != state->family);
824 882
825 if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) { 883 if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
826 sk = sk_head(&raw_v4_htable[state->bucket]); 884 sk = sk_head(&state->h->ht[state->bucket]);
827 goto try_again; 885 goto try_again;
828 } 886 }
829 return sk; 887 return sk;
@@ -839,13 +897,16 @@ static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos)
839 return pos ? NULL : sk; 897 return pos ? NULL : sk;
840} 898}
841 899
842static void *raw_seq_start(struct seq_file *seq, loff_t *pos) 900void *raw_seq_start(struct seq_file *seq, loff_t *pos)
843{ 901{
844 read_lock(&raw_v4_lock); 902 struct raw_iter_state *state = raw_seq_private(seq);
903
904 read_lock(&state->h->lock);
845 return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 905 return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
846} 906}
907EXPORT_SYMBOL_GPL(raw_seq_start);
847 908
848static void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos) 909void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos)
849{ 910{
850 struct sock *sk; 911 struct sock *sk;
851 912
@@ -856,11 +917,15 @@ static void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos)
856 ++*pos; 917 ++*pos;
857 return sk; 918 return sk;
858} 919}
920EXPORT_SYMBOL_GPL(raw_seq_next);
859 921
860static void raw_seq_stop(struct seq_file *seq, void *v) 922void raw_seq_stop(struct seq_file *seq, void *v)
861{ 923{
862 read_unlock(&raw_v4_lock); 924 struct raw_iter_state *state = raw_seq_private(seq);
925
926 read_unlock(&state->h->lock);
863} 927}
928EXPORT_SYMBOL_GPL(raw_seq_stop);
864 929
865static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i) 930static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i)
866{ 931{
@@ -871,28 +936,30 @@ static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i)
871 srcp = inet->num; 936 srcp = inet->num;
872 937
873 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" 938 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
874 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p", 939 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d",
875 i, src, srcp, dest, destp, sp->sk_state, 940 i, src, srcp, dest, destp, sp->sk_state,
876 atomic_read(&sp->sk_wmem_alloc), 941 atomic_read(&sp->sk_wmem_alloc),
877 atomic_read(&sp->sk_rmem_alloc), 942 atomic_read(&sp->sk_rmem_alloc),
878 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), 943 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
879 atomic_read(&sp->sk_refcnt), sp); 944 atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
880 return tmpbuf; 945 return tmpbuf;
881} 946}
882 947
948#define TMPSZ 128
949
883static int raw_seq_show(struct seq_file *seq, void *v) 950static int raw_seq_show(struct seq_file *seq, void *v)
884{ 951{
885 char tmpbuf[129]; 952 char tmpbuf[TMPSZ+1];
886 953
887 if (v == SEQ_START_TOKEN) 954 if (v == SEQ_START_TOKEN)
888 seq_printf(seq, "%-127s\n", 955 seq_printf(seq, "%-*s\n", TMPSZ-1,
889 " sl local_address rem_address st tx_queue " 956 " sl local_address rem_address st tx_queue "
890 "rx_queue tr tm->when retrnsmt uid timeout " 957 "rx_queue tr tm->when retrnsmt uid timeout "
891 "inode"); 958 "inode drops");
892 else { 959 else {
893 struct raw_iter_state *state = raw_seq_private(seq); 960 struct raw_iter_state *state = raw_seq_private(seq);
894 961
895 seq_printf(seq, "%-127s\n", 962 seq_printf(seq, "%-*s\n", TMPSZ-1,
896 get_raw_sock(v, tmpbuf, state->bucket)); 963 get_raw_sock(v, tmpbuf, state->bucket));
897 } 964 }
898 return 0; 965 return 0;
@@ -905,29 +972,62 @@ static const struct seq_operations raw_seq_ops = {
905 .show = raw_seq_show, 972 .show = raw_seq_show,
906}; 973};
907 974
908static int raw_seq_open(struct inode *inode, struct file *file) 975int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h,
976 unsigned short family)
909{ 977{
910 return seq_open_private(file, &raw_seq_ops, 978 int err;
979 struct raw_iter_state *i;
980
981 err = seq_open_net(ino, file, &raw_seq_ops,
911 sizeof(struct raw_iter_state)); 982 sizeof(struct raw_iter_state));
983 if (err < 0)
984 return err;
985
986 i = raw_seq_private((struct seq_file *)file->private_data);
987 i->h = h;
988 i->family = family;
989 return 0;
990}
991EXPORT_SYMBOL_GPL(raw_seq_open);
992
993static int raw_v4_seq_open(struct inode *inode, struct file *file)
994{
995 return raw_seq_open(inode, file, &raw_v4_hashinfo, PF_INET);
912} 996}
913 997
914static const struct file_operations raw_seq_fops = { 998static const struct file_operations raw_seq_fops = {
915 .owner = THIS_MODULE, 999 .owner = THIS_MODULE,
916 .open = raw_seq_open, 1000 .open = raw_v4_seq_open,
917 .read = seq_read, 1001 .read = seq_read,
918 .llseek = seq_lseek, 1002 .llseek = seq_lseek,
919 .release = seq_release_private, 1003 .release = seq_release_net,
920}; 1004};
921 1005
922int __init raw_proc_init(void) 1006static __net_init int raw_init_net(struct net *net)
923{ 1007{
924 if (!proc_net_fops_create(&init_net, "raw", S_IRUGO, &raw_seq_fops)) 1008 if (!proc_net_fops_create(net, "raw", S_IRUGO, &raw_seq_fops))
925 return -ENOMEM; 1009 return -ENOMEM;
1010
926 return 0; 1011 return 0;
927} 1012}
928 1013
1014static __net_exit void raw_exit_net(struct net *net)
1015{
1016 proc_net_remove(net, "raw");
1017}
1018
1019static __net_initdata struct pernet_operations raw_net_ops = {
1020 .init = raw_init_net,
1021 .exit = raw_exit_net,
1022};
1023
1024int __init raw_proc_init(void)
1025{
1026 return register_pernet_subsys(&raw_net_ops);
1027}
1028
929void __init raw_proc_exit(void) 1029void __init raw_proc_exit(void)
930{ 1030{
931 proc_net_remove(&init_net, "raw"); 1031 unregister_pernet_subsys(&raw_net_ops);
932} 1032}
933#endif /* CONFIG_PROC_FS */ 1033#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 28484f396b04..896c768e41a2 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -92,6 +92,7 @@
92#include <linux/jhash.h> 92#include <linux/jhash.h>
93#include <linux/rcupdate.h> 93#include <linux/rcupdate.h>
94#include <linux/times.h> 94#include <linux/times.h>
95#include <net/dst.h>
95#include <net/net_namespace.h> 96#include <net/net_namespace.h>
96#include <net/protocol.h> 97#include <net/protocol.h>
97#include <net/ip.h> 98#include <net/ip.h>
@@ -132,13 +133,14 @@ static int ip_rt_mtu_expires = 10 * 60 * HZ;
132static int ip_rt_min_pmtu = 512 + 20 + 20; 133static int ip_rt_min_pmtu = 512 + 20 + 20;
133static int ip_rt_min_advmss = 256; 134static int ip_rt_min_advmss = 256;
134static int ip_rt_secret_interval = 10 * 60 * HZ; 135static int ip_rt_secret_interval = 10 * 60 * HZ;
136static int ip_rt_flush_expected;
135static unsigned long rt_deadline; 137static unsigned long rt_deadline;
136 138
137#define RTprint(a...) printk(KERN_DEBUG a) 139#define RTprint(a...) printk(KERN_DEBUG a)
138 140
139static struct timer_list rt_flush_timer; 141static struct timer_list rt_flush_timer;
140static void rt_check_expire(struct work_struct *work); 142static void rt_worker_func(struct work_struct *work);
141static DECLARE_DELAYED_WORK(expires_work, rt_check_expire); 143static DECLARE_DELAYED_WORK(expires_work, rt_worker_func);
142static struct timer_list rt_secret_timer; 144static struct timer_list rt_secret_timer;
143 145
144/* 146/*
@@ -152,7 +154,7 @@ static void ipv4_dst_ifdown(struct dst_entry *dst,
152static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); 154static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
153static void ipv4_link_failure(struct sk_buff *skb); 155static void ipv4_link_failure(struct sk_buff *skb);
154static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 156static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
155static int rt_garbage_collect(void); 157static int rt_garbage_collect(struct dst_ops *ops);
156 158
157 159
158static struct dst_ops ipv4_dst_ops = { 160static struct dst_ops ipv4_dst_ops = {
@@ -165,6 +167,7 @@ static struct dst_ops ipv4_dst_ops = {
165 .negative_advice = ipv4_negative_advice, 167 .negative_advice = ipv4_negative_advice,
166 .link_failure = ipv4_link_failure, 168 .link_failure = ipv4_link_failure,
167 .update_pmtu = ip_rt_update_pmtu, 169 .update_pmtu = ip_rt_update_pmtu,
170 .local_out = ip_local_out,
168 .entry_size = sizeof(struct rtable), 171 .entry_size = sizeof(struct rtable),
169}; 172};
170 173
@@ -232,16 +235,25 @@ struct rt_hash_bucket {
232 235
233static spinlock_t *rt_hash_locks; 236static spinlock_t *rt_hash_locks;
234# define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)] 237# define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)]
235# define rt_hash_lock_init() { \ 238
236 int i; \ 239static __init void rt_hash_lock_init(void)
237 rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ, GFP_KERNEL); \ 240{
238 if (!rt_hash_locks) panic("IP: failed to allocate rt_hash_locks\n"); \ 241 int i;
239 for (i = 0; i < RT_HASH_LOCK_SZ; i++) \ 242
240 spin_lock_init(&rt_hash_locks[i]); \ 243 rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ,
241 } 244 GFP_KERNEL);
245 if (!rt_hash_locks)
246 panic("IP: failed to allocate rt_hash_locks\n");
247
248 for (i = 0; i < RT_HASH_LOCK_SZ; i++)
249 spin_lock_init(&rt_hash_locks[i]);
250}
242#else 251#else
243# define rt_hash_lock_addr(slot) NULL 252# define rt_hash_lock_addr(slot) NULL
244# define rt_hash_lock_init() 253
254static inline void rt_hash_lock_init(void)
255{
256}
245#endif 257#endif
246 258
247static struct rt_hash_bucket *rt_hash_table; 259static struct rt_hash_bucket *rt_hash_table;
@@ -478,6 +490,83 @@ static const struct file_operations rt_cpu_seq_fops = {
478 .release = seq_release, 490 .release = seq_release,
479}; 491};
480 492
493#ifdef CONFIG_NET_CLS_ROUTE
494static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
495 int length, int *eof, void *data)
496{
497 unsigned int i;
498
499 if ((offset & 3) || (length & 3))
500 return -EIO;
501
502 if (offset >= sizeof(struct ip_rt_acct) * 256) {
503 *eof = 1;
504 return 0;
505 }
506
507 if (offset + length >= sizeof(struct ip_rt_acct) * 256) {
508 length = sizeof(struct ip_rt_acct) * 256 - offset;
509 *eof = 1;
510 }
511
512 offset /= sizeof(u32);
513
514 if (length > 0) {
515 u32 *dst = (u32 *) buffer;
516
517 *start = buffer;
518 memset(dst, 0, length);
519
520 for_each_possible_cpu(i) {
521 unsigned int j;
522 u32 *src;
523
524 src = ((u32 *) per_cpu_ptr(ip_rt_acct, i)) + offset;
525 for (j = 0; j < length/4; j++)
526 dst[j] += src[j];
527 }
528 }
529 return length;
530}
531#endif
532
533static __init int ip_rt_proc_init(struct net *net)
534{
535 struct proc_dir_entry *pde;
536
537 pde = proc_net_fops_create(net, "rt_cache", S_IRUGO,
538 &rt_cache_seq_fops);
539 if (!pde)
540 goto err1;
541
542 pde = create_proc_entry("rt_cache", S_IRUGO, net->proc_net_stat);
543 if (!pde)
544 goto err2;
545
546 pde->proc_fops = &rt_cpu_seq_fops;
547
548#ifdef CONFIG_NET_CLS_ROUTE
549 pde = create_proc_read_entry("rt_acct", 0, net->proc_net,
550 ip_rt_acct_read, NULL);
551 if (!pde)
552 goto err3;
553#endif
554 return 0;
555
556#ifdef CONFIG_NET_CLS_ROUTE
557err3:
558 remove_proc_entry("rt_cache", net->proc_net_stat);
559#endif
560err2:
561 remove_proc_entry("rt_cache", net->proc_net);
562err1:
563 return -ENOMEM;
564}
565#else
566static inline int ip_rt_proc_init(struct net *net)
567{
568 return 0;
569}
481#endif /* CONFIG_PROC_FS */ 570#endif /* CONFIG_PROC_FS */
482 571
483static __inline__ void rt_free(struct rtable *rt) 572static __inline__ void rt_free(struct rtable *rt)
@@ -559,7 +648,41 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
559 (fl1->iif ^ fl2->iif)) == 0; 648 (fl1->iif ^ fl2->iif)) == 0;
560} 649}
561 650
562static void rt_check_expire(struct work_struct *work) 651static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
652{
653 return rt1->u.dst.dev->nd_net == rt2->u.dst.dev->nd_net;
654}
655
656/*
657 * Perform a full scan of hash table and free all entries.
658 * Can be called by a softirq or a process.
659 * In the later case, we want to be reschedule if necessary
660 */
661static void rt_do_flush(int process_context)
662{
663 unsigned int i;
664 struct rtable *rth, *next;
665
666 for (i = 0; i <= rt_hash_mask; i++) {
667 if (process_context && need_resched())
668 cond_resched();
669 rth = rt_hash_table[i].chain;
670 if (!rth)
671 continue;
672
673 spin_lock_bh(rt_hash_lock_addr(i));
674 rth = rt_hash_table[i].chain;
675 rt_hash_table[i].chain = NULL;
676 spin_unlock_bh(rt_hash_lock_addr(i));
677
678 for (; rth; rth = next) {
679 next = rth->u.dst.rt_next;
680 rt_free(rth);
681 }
682 }
683}
684
685static void rt_check_expire(void)
563{ 686{
564 static unsigned int rover; 687 static unsigned int rover;
565 unsigned int i = rover, goal; 688 unsigned int i = rover, goal;
@@ -605,33 +728,33 @@ static void rt_check_expire(struct work_struct *work)
605 spin_unlock_bh(rt_hash_lock_addr(i)); 728 spin_unlock_bh(rt_hash_lock_addr(i));
606 } 729 }
607 rover = i; 730 rover = i;
731}
732
733/*
734 * rt_worker_func() is run in process context.
735 * If a whole flush was scheduled, it is done.
736 * Else, we call rt_check_expire() to scan part of the hash table
737 */
738static void rt_worker_func(struct work_struct *work)
739{
740 if (ip_rt_flush_expected) {
741 ip_rt_flush_expected = 0;
742 rt_do_flush(1);
743 } else
744 rt_check_expire();
608 schedule_delayed_work(&expires_work, ip_rt_gc_interval); 745 schedule_delayed_work(&expires_work, ip_rt_gc_interval);
609} 746}
610 747
611/* This can run from both BH and non-BH contexts, the latter 748/* This can run from both BH and non-BH contexts, the latter
612 * in the case of a forced flush event. 749 * in the case of a forced flush event.
613 */ 750 */
614static void rt_run_flush(unsigned long dummy) 751static void rt_run_flush(unsigned long process_context)
615{ 752{
616 int i;
617 struct rtable *rth, *next;
618
619 rt_deadline = 0; 753 rt_deadline = 0;
620 754
621 get_random_bytes(&rt_hash_rnd, 4); 755 get_random_bytes(&rt_hash_rnd, 4);
622 756
623 for (i = rt_hash_mask; i >= 0; i--) { 757 rt_do_flush(process_context);
624 spin_lock_bh(rt_hash_lock_addr(i));
625 rth = rt_hash_table[i].chain;
626 if (rth)
627 rt_hash_table[i].chain = NULL;
628 spin_unlock_bh(rt_hash_lock_addr(i));
629
630 for (; rth; rth = next) {
631 next = rth->u.dst.rt_next;
632 rt_free(rth);
633 }
634 }
635} 758}
636 759
637static DEFINE_SPINLOCK(rt_flush_lock); 760static DEFINE_SPINLOCK(rt_flush_lock);
@@ -665,7 +788,7 @@ void rt_cache_flush(int delay)
665 788
666 if (delay <= 0) { 789 if (delay <= 0) {
667 spin_unlock_bh(&rt_flush_lock); 790 spin_unlock_bh(&rt_flush_lock);
668 rt_run_flush(0); 791 rt_run_flush(user_mode);
669 return; 792 return;
670 } 793 }
671 794
@@ -676,12 +799,17 @@ void rt_cache_flush(int delay)
676 spin_unlock_bh(&rt_flush_lock); 799 spin_unlock_bh(&rt_flush_lock);
677} 800}
678 801
802/*
803 * We change rt_hash_rnd and ask next rt_worker_func() invocation
804 * to perform a flush in process context
805 */
679static void rt_secret_rebuild(unsigned long dummy) 806static void rt_secret_rebuild(unsigned long dummy)
680{ 807{
681 unsigned long now = jiffies; 808 get_random_bytes(&rt_hash_rnd, 4);
682 809 ip_rt_flush_expected = 1;
683 rt_cache_flush(0); 810 cancel_delayed_work(&expires_work);
684 mod_timer(&rt_secret_timer, now + ip_rt_secret_interval); 811 schedule_delayed_work(&expires_work, HZ/10);
812 mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval);
685} 813}
686 814
687/* 815/*
@@ -697,7 +825,7 @@ static void rt_secret_rebuild(unsigned long dummy)
697 and when load increases it reduces to limit cache size. 825 and when load increases it reduces to limit cache size.
698 */ 826 */
699 827
700static int rt_garbage_collect(void) 828static int rt_garbage_collect(struct dst_ops *ops)
701{ 829{
702 static unsigned long expire = RT_GC_TIMEOUT; 830 static unsigned long expire = RT_GC_TIMEOUT;
703 static unsigned long last_gc; 831 static unsigned long last_gc;
@@ -728,14 +856,14 @@ static int rt_garbage_collect(void)
728 equilibrium = ipv4_dst_ops.gc_thresh; 856 equilibrium = ipv4_dst_ops.gc_thresh;
729 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; 857 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
730 if (goal > 0) { 858 if (goal > 0) {
731 equilibrium += min_t(unsigned int, goal / 2, rt_hash_mask + 1); 859 equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1);
732 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; 860 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
733 } 861 }
734 } else { 862 } else {
735 /* We are in dangerous area. Try to reduce cache really 863 /* We are in dangerous area. Try to reduce cache really
736 * aggressively. 864 * aggressively.
737 */ 865 */
738 goal = max_t(unsigned int, goal / 2, rt_hash_mask + 1); 866 goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1);
739 equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal; 867 equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal;
740 } 868 }
741 869
@@ -838,7 +966,7 @@ restart:
838 966
839 spin_lock_bh(rt_hash_lock_addr(hash)); 967 spin_lock_bh(rt_hash_lock_addr(hash));
840 while ((rth = *rthp) != NULL) { 968 while ((rth = *rthp) != NULL) {
841 if (compare_keys(&rth->fl, &rt->fl)) { 969 if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) {
842 /* Put it first */ 970 /* Put it first */
843 *rthp = rth->u.dst.rt_next; 971 *rthp = rth->u.dst.rt_next;
844 /* 972 /*
@@ -912,7 +1040,7 @@ restart:
912 int saved_int = ip_rt_gc_min_interval; 1040 int saved_int = ip_rt_gc_min_interval;
913 ip_rt_gc_elasticity = 1; 1041 ip_rt_gc_elasticity = 1;
914 ip_rt_gc_min_interval = 0; 1042 ip_rt_gc_min_interval = 0;
915 rt_garbage_collect(); 1043 rt_garbage_collect(&ipv4_dst_ops);
916 ip_rt_gc_min_interval = saved_int; 1044 ip_rt_gc_min_interval = saved_int;
917 ip_rt_gc_elasticity = saved_elasticity; 1045 ip_rt_gc_elasticity = saved_elasticity;
918 goto restart; 1046 goto restart;
@@ -1031,7 +1159,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1031 return; 1159 return;
1032 1160
1033 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) 1161 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev)
1034 || MULTICAST(new_gw) || BADCLASS(new_gw) || ZERONET(new_gw)) 1162 || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw)
1163 || ipv4_is_zeronet(new_gw))
1035 goto reject_redirect; 1164 goto reject_redirect;
1036 1165
1037 if (!IN_DEV_SHARED_MEDIA(in_dev)) { 1166 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
@@ -1040,7 +1169,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1040 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev)) 1169 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
1041 goto reject_redirect; 1170 goto reject_redirect;
1042 } else { 1171 } else {
1043 if (inet_addr_type(new_gw) != RTN_UNICAST) 1172 if (inet_addr_type(&init_net, new_gw) != RTN_UNICAST)
1044 goto reject_redirect; 1173 goto reject_redirect;
1045 } 1174 }
1046 1175
@@ -1291,7 +1420,8 @@ static __inline__ unsigned short guess_mtu(unsigned short old_mtu)
1291 return 68; 1420 return 68;
1292} 1421}
1293 1422
1294unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu) 1423unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1424 unsigned short new_mtu)
1295{ 1425{
1296 int i; 1426 int i;
1297 unsigned short old_mtu = ntohs(iph->tot_len); 1427 unsigned short old_mtu = ntohs(iph->tot_len);
@@ -1314,7 +1444,8 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
1314 rth->rt_dst == daddr && 1444 rth->rt_dst == daddr &&
1315 rth->rt_src == iph->saddr && 1445 rth->rt_src == iph->saddr &&
1316 rth->fl.iif == 0 && 1446 rth->fl.iif == 0 &&
1317 !(dst_metric_locked(&rth->u.dst, RTAX_MTU))) { 1447 !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) &&
1448 rth->u.dst.dev->nd_net == net) {
1318 unsigned short mtu = new_mtu; 1449 unsigned short mtu = new_mtu;
1319 1450
1320 if (new_mtu < 68 || new_mtu >= old_mtu) { 1451 if (new_mtu < 68 || new_mtu >= old_mtu) {
@@ -1389,8 +1520,9 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
1389{ 1520{
1390 struct rtable *rt = (struct rtable *) dst; 1521 struct rtable *rt = (struct rtable *) dst;
1391 struct in_device *idev = rt->idev; 1522 struct in_device *idev = rt->idev;
1392 if (dev != init_net.loopback_dev && idev && idev->dev == dev) { 1523 if (dev != dev->nd_net->loopback_dev && idev && idev->dev == dev) {
1393 struct in_device *loopback_idev = in_dev_get(init_net.loopback_dev); 1524 struct in_device *loopback_idev =
1525 in_dev_get(dev->nd_net->loopback_dev);
1394 if (loopback_idev) { 1526 if (loopback_idev) {
1395 rt->idev = loopback_idev; 1527 rt->idev = loopback_idev;
1396 in_dev_put(idev); 1528 in_dev_put(idev);
@@ -1434,7 +1566,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1434 1566
1435 if (rt->fl.iif == 0) 1567 if (rt->fl.iif == 0)
1436 src = rt->rt_src; 1568 src = rt->rt_src;
1437 else if (fib_lookup(&rt->fl, &res) == 0) { 1569 else if (fib_lookup(rt->u.dst.dev->nd_net, &rt->fl, &res) == 0) {
1438 src = FIB_RES_PREFSRC(res); 1570 src = FIB_RES_PREFSRC(res);
1439 fib_res_put(&res); 1571 fib_res_put(&res);
1440 } else 1572 } else
@@ -1509,12 +1641,12 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1509 if (in_dev == NULL) 1641 if (in_dev == NULL)
1510 return -EINVAL; 1642 return -EINVAL;
1511 1643
1512 if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr) || 1644 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
1513 skb->protocol != htons(ETH_P_IP)) 1645 ipv4_is_loopback(saddr) || skb->protocol != htons(ETH_P_IP))
1514 goto e_inval; 1646 goto e_inval;
1515 1647
1516 if (ZERONET(saddr)) { 1648 if (ipv4_is_zeronet(saddr)) {
1517 if (!LOCAL_MCAST(daddr)) 1649 if (!ipv4_is_local_multicast(daddr))
1518 goto e_inval; 1650 goto e_inval;
1519 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); 1651 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1520 } else if (fib_validate_source(saddr, 0, tos, 0, 1652 } else if (fib_validate_source(saddr, 0, tos, 0,
@@ -1556,7 +1688,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1556 } 1688 }
1557 1689
1558#ifdef CONFIG_IP_MROUTE 1690#ifdef CONFIG_IP_MROUTE
1559 if (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev)) 1691 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
1560 rth->u.dst.input = ip_mr_input; 1692 rth->u.dst.input = ip_mr_input;
1561#endif 1693#endif
1562 RT_CACHE_STAT_INC(in_slow_mc); 1694 RT_CACHE_STAT_INC(in_slow_mc);
@@ -1643,7 +1775,7 @@ static inline int __mkroute_input(struct sk_buff *skb,
1643 if (err) 1775 if (err)
1644 flags |= RTCF_DIRECTSRC; 1776 flags |= RTCF_DIRECTSRC;
1645 1777
1646 if (out_dev == in_dev && err && !(flags & (RTCF_NAT | RTCF_MASQ)) && 1778 if (out_dev == in_dev && err && !(flags & RTCF_MASQ) &&
1647 (IN_DEV_SHARED_MEDIA(out_dev) || 1779 (IN_DEV_SHARED_MEDIA(out_dev) ||
1648 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) 1780 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1649 flags |= RTCF_DOREDIRECT; 1781 flags |= RTCF_DOREDIRECT;
@@ -1652,7 +1784,7 @@ static inline int __mkroute_input(struct sk_buff *skb,
1652 /* Not IP (i.e. ARP). Do not create route, if it is 1784 /* Not IP (i.e. ARP). Do not create route, if it is
1653 * invalid for proxy arp. DNAT routes are always valid. 1785 * invalid for proxy arp. DNAT routes are always valid.
1654 */ 1786 */
1655 if (out_dev == in_dev && !(flags & RTCF_DNAT)) { 1787 if (out_dev == in_dev) {
1656 err = -EINVAL; 1788 err = -EINVAL;
1657 goto cleanup; 1789 goto cleanup;
1658 } 1790 }
@@ -1756,6 +1888,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1756 __be32 spec_dst; 1888 __be32 spec_dst;
1757 int err = -EINVAL; 1889 int err = -EINVAL;
1758 int free_res = 0; 1890 int free_res = 0;
1891 struct net * net = dev->nd_net;
1759 1892
1760 /* IP on this device is disabled. */ 1893 /* IP on this device is disabled. */
1761 1894
@@ -1766,7 +1899,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1766 by fib_lookup. 1899 by fib_lookup.
1767 */ 1900 */
1768 1901
1769 if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr)) 1902 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
1903 ipv4_is_loopback(saddr))
1770 goto martian_source; 1904 goto martian_source;
1771 1905
1772 if (daddr == htonl(0xFFFFFFFF) || (saddr == 0 && daddr == 0)) 1906 if (daddr == htonl(0xFFFFFFFF) || (saddr == 0 && daddr == 0))
@@ -1775,16 +1909,17 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1775 /* Accept zero addresses only to limited broadcast; 1909 /* Accept zero addresses only to limited broadcast;
1776 * I even do not know to fix it or not. Waiting for complains :-) 1910 * I even do not know to fix it or not. Waiting for complains :-)
1777 */ 1911 */
1778 if (ZERONET(saddr)) 1912 if (ipv4_is_zeronet(saddr))
1779 goto martian_source; 1913 goto martian_source;
1780 1914
1781 if (BADCLASS(daddr) || ZERONET(daddr) || LOOPBACK(daddr)) 1915 if (ipv4_is_lbcast(daddr) || ipv4_is_zeronet(daddr) ||
1916 ipv4_is_loopback(daddr))
1782 goto martian_destination; 1917 goto martian_destination;
1783 1918
1784 /* 1919 /*
1785 * Now we are ready to route packet. 1920 * Now we are ready to route packet.
1786 */ 1921 */
1787 if ((err = fib_lookup(&fl, &res)) != 0) { 1922 if ((err = fib_lookup(net, &fl, &res)) != 0) {
1788 if (!IN_DEV_FORWARD(in_dev)) 1923 if (!IN_DEV_FORWARD(in_dev))
1789 goto e_hostunreach; 1924 goto e_hostunreach;
1790 goto no_route; 1925 goto no_route;
@@ -1799,7 +1934,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1799 if (res.type == RTN_LOCAL) { 1934 if (res.type == RTN_LOCAL) {
1800 int result; 1935 int result;
1801 result = fib_validate_source(saddr, daddr, tos, 1936 result = fib_validate_source(saddr, daddr, tos,
1802 init_net.loopback_dev->ifindex, 1937 net->loopback_dev->ifindex,
1803 dev, &spec_dst, &itag); 1938 dev, &spec_dst, &itag);
1804 if (result < 0) 1939 if (result < 0)
1805 goto martian_source; 1940 goto martian_source;
@@ -1825,7 +1960,7 @@ brd_input:
1825 if (skb->protocol != htons(ETH_P_IP)) 1960 if (skb->protocol != htons(ETH_P_IP))
1826 goto e_inval; 1961 goto e_inval;
1827 1962
1828 if (ZERONET(saddr)) 1963 if (ipv4_is_zeronet(saddr))
1829 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); 1964 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1830 else { 1965 else {
1831 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, 1966 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
@@ -1861,7 +1996,7 @@ local_input:
1861#endif 1996#endif
1862 rth->rt_iif = 1997 rth->rt_iif =
1863 rth->fl.iif = dev->ifindex; 1998 rth->fl.iif = dev->ifindex;
1864 rth->u.dst.dev = init_net.loopback_dev; 1999 rth->u.dst.dev = net->loopback_dev;
1865 dev_hold(rth->u.dst.dev); 2000 dev_hold(rth->u.dst.dev);
1866 rth->idev = in_dev_get(rth->u.dst.dev); 2001 rth->idev = in_dev_get(rth->u.dst.dev);
1867 rth->rt_gateway = daddr; 2002 rth->rt_gateway = daddr;
@@ -1921,7 +2056,9 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1921 struct rtable * rth; 2056 struct rtable * rth;
1922 unsigned hash; 2057 unsigned hash;
1923 int iif = dev->ifindex; 2058 int iif = dev->ifindex;
2059 struct net *net;
1924 2060
2061 net = skb->dev->nd_net;
1925 tos &= IPTOS_RT_MASK; 2062 tos &= IPTOS_RT_MASK;
1926 hash = rt_hash(daddr, saddr, iif); 2063 hash = rt_hash(daddr, saddr, iif);
1927 2064
@@ -1933,7 +2070,8 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1933 rth->fl.iif == iif && 2070 rth->fl.iif == iif &&
1934 rth->fl.oif == 0 && 2071 rth->fl.oif == 0 &&
1935 rth->fl.mark == skb->mark && 2072 rth->fl.mark == skb->mark &&
1936 rth->fl.fl4_tos == tos) { 2073 rth->fl.fl4_tos == tos &&
2074 rth->u.dst.dev->nd_net == net) {
1937 dst_use(&rth->u.dst, jiffies); 2075 dst_use(&rth->u.dst, jiffies);
1938 RT_CACHE_STAT_INC(in_hit); 2076 RT_CACHE_STAT_INC(in_hit);
1939 rcu_read_unlock(); 2077 rcu_read_unlock();
@@ -1955,7 +2093,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1955 Note, that multicast routers are not affected, because 2093 Note, that multicast routers are not affected, because
1956 route cache entry is created eventually. 2094 route cache entry is created eventually.
1957 */ 2095 */
1958 if (MULTICAST(daddr)) { 2096 if (ipv4_is_multicast(daddr)) {
1959 struct in_device *in_dev; 2097 struct in_device *in_dev;
1960 2098
1961 rcu_read_lock(); 2099 rcu_read_lock();
@@ -1964,7 +2102,8 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1964 ip_hdr(skb)->protocol); 2102 ip_hdr(skb)->protocol);
1965 if (our 2103 if (our
1966#ifdef CONFIG_IP_MROUTE 2104#ifdef CONFIG_IP_MROUTE
1967 || (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev)) 2105 || (!ipv4_is_local_multicast(daddr) &&
2106 IN_DEV_MFORWARD(in_dev))
1968#endif 2107#endif
1969 ) { 2108 ) {
1970 rcu_read_unlock(); 2109 rcu_read_unlock();
@@ -1990,14 +2129,14 @@ static inline int __mkroute_output(struct rtable **result,
1990 u32 tos = RT_FL_TOS(oldflp); 2129 u32 tos = RT_FL_TOS(oldflp);
1991 int err = 0; 2130 int err = 0;
1992 2131
1993 if (LOOPBACK(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK)) 2132 if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK))
1994 return -EINVAL; 2133 return -EINVAL;
1995 2134
1996 if (fl->fl4_dst == htonl(0xFFFFFFFF)) 2135 if (fl->fl4_dst == htonl(0xFFFFFFFF))
1997 res->type = RTN_BROADCAST; 2136 res->type = RTN_BROADCAST;
1998 else if (MULTICAST(fl->fl4_dst)) 2137 else if (ipv4_is_multicast(fl->fl4_dst))
1999 res->type = RTN_MULTICAST; 2138 res->type = RTN_MULTICAST;
2000 else if (BADCLASS(fl->fl4_dst) || ZERONET(fl->fl4_dst)) 2139 else if (ipv4_is_lbcast(fl->fl4_dst) || ipv4_is_zeronet(fl->fl4_dst))
2001 return -EINVAL; 2140 return -EINVAL;
2002 2141
2003 if (dev_out->flags & IFF_LOOPBACK) 2142 if (dev_out->flags & IFF_LOOPBACK)
@@ -2077,7 +2216,7 @@ static inline int __mkroute_output(struct rtable **result,
2077#ifdef CONFIG_IP_MROUTE 2216#ifdef CONFIG_IP_MROUTE
2078 if (res->type == RTN_MULTICAST) { 2217 if (res->type == RTN_MULTICAST) {
2079 if (IN_DEV_MFORWARD(in_dev) && 2218 if (IN_DEV_MFORWARD(in_dev) &&
2080 !LOCAL_MCAST(oldflp->fl4_dst)) { 2219 !ipv4_is_local_multicast(oldflp->fl4_dst)) {
2081 rth->u.dst.input = ip_mr_input; 2220 rth->u.dst.input = ip_mr_input;
2082 rth->u.dst.output = ip_mc_output; 2221 rth->u.dst.output = ip_mc_output;
2083 } 2222 }
@@ -2119,7 +2258,8 @@ static inline int ip_mkroute_output(struct rtable **rp,
2119 * Major route resolver routine. 2258 * Major route resolver routine.
2120 */ 2259 */
2121 2260
2122static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) 2261static int ip_route_output_slow(struct net *net, struct rtable **rp,
2262 const struct flowi *oldflp)
2123{ 2263{
2124 u32 tos = RT_FL_TOS(oldflp); 2264 u32 tos = RT_FL_TOS(oldflp);
2125 struct flowi fl = { .nl_u = { .ip4_u = 2265 struct flowi fl = { .nl_u = { .ip4_u =
@@ -2131,7 +2271,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
2131 RT_SCOPE_UNIVERSE), 2271 RT_SCOPE_UNIVERSE),
2132 } }, 2272 } },
2133 .mark = oldflp->mark, 2273 .mark = oldflp->mark,
2134 .iif = init_net.loopback_dev->ifindex, 2274 .iif = net->loopback_dev->ifindex,
2135 .oif = oldflp->oif }; 2275 .oif = oldflp->oif };
2136 struct fib_result res; 2276 struct fib_result res;
2137 unsigned flags = 0; 2277 unsigned flags = 0;
@@ -2147,26 +2287,27 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
2147 2287
2148 if (oldflp->fl4_src) { 2288 if (oldflp->fl4_src) {
2149 err = -EINVAL; 2289 err = -EINVAL;
2150 if (MULTICAST(oldflp->fl4_src) || 2290 if (ipv4_is_multicast(oldflp->fl4_src) ||
2151 BADCLASS(oldflp->fl4_src) || 2291 ipv4_is_lbcast(oldflp->fl4_src) ||
2152 ZERONET(oldflp->fl4_src)) 2292 ipv4_is_zeronet(oldflp->fl4_src))
2153 goto out; 2293 goto out;
2154 2294
2155 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2295 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2156 dev_out = ip_dev_find(oldflp->fl4_src); 2296 dev_out = ip_dev_find(net, oldflp->fl4_src);
2157 if (dev_out == NULL) 2297 if (dev_out == NULL)
2158 goto out; 2298 goto out;
2159 2299
2160 /* I removed check for oif == dev_out->oif here. 2300 /* I removed check for oif == dev_out->oif here.
2161 It was wrong for two reasons: 2301 It was wrong for two reasons:
2162 1. ip_dev_find(saddr) can return wrong iface, if saddr is 2302 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
2163 assigned to multiple interfaces. 2303 is assigned to multiple interfaces.
2164 2. Moreover, we are allowed to send packets with saddr 2304 2. Moreover, we are allowed to send packets with saddr
2165 of another iface. --ANK 2305 of another iface. --ANK
2166 */ 2306 */
2167 2307
2168 if (oldflp->oif == 0 2308 if (oldflp->oif == 0
2169 && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) { 2309 && (ipv4_is_multicast(oldflp->fl4_dst) ||
2310 oldflp->fl4_dst == htonl(0xFFFFFFFF))) {
2170 /* Special hack: user can direct multicasts 2311 /* Special hack: user can direct multicasts
2171 and limited broadcast via necessary interface 2312 and limited broadcast via necessary interface
2172 without fiddling with IP_MULTICAST_IF or IP_PKTINFO. 2313 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
@@ -2192,7 +2333,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
2192 2333
2193 2334
2194 if (oldflp->oif) { 2335 if (oldflp->oif) {
2195 dev_out = dev_get_by_index(&init_net, oldflp->oif); 2336 dev_out = dev_get_by_index(net, oldflp->oif);
2196 err = -ENODEV; 2337 err = -ENODEV;
2197 if (dev_out == NULL) 2338 if (dev_out == NULL)
2198 goto out; 2339 goto out;
@@ -2203,14 +2344,15 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
2203 goto out; /* Wrong error code */ 2344 goto out; /* Wrong error code */
2204 } 2345 }
2205 2346
2206 if (LOCAL_MCAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF)) { 2347 if (ipv4_is_local_multicast(oldflp->fl4_dst) ||
2348 oldflp->fl4_dst == htonl(0xFFFFFFFF)) {
2207 if (!fl.fl4_src) 2349 if (!fl.fl4_src)
2208 fl.fl4_src = inet_select_addr(dev_out, 0, 2350 fl.fl4_src = inet_select_addr(dev_out, 0,
2209 RT_SCOPE_LINK); 2351 RT_SCOPE_LINK);
2210 goto make_route; 2352 goto make_route;
2211 } 2353 }
2212 if (!fl.fl4_src) { 2354 if (!fl.fl4_src) {
2213 if (MULTICAST(oldflp->fl4_dst)) 2355 if (ipv4_is_multicast(oldflp->fl4_dst))
2214 fl.fl4_src = inet_select_addr(dev_out, 0, 2356 fl.fl4_src = inet_select_addr(dev_out, 0,
2215 fl.fl4_scope); 2357 fl.fl4_scope);
2216 else if (!oldflp->fl4_dst) 2358 else if (!oldflp->fl4_dst)
@@ -2225,15 +2367,15 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
2225 fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); 2367 fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
2226 if (dev_out) 2368 if (dev_out)
2227 dev_put(dev_out); 2369 dev_put(dev_out);
2228 dev_out = init_net.loopback_dev; 2370 dev_out = net->loopback_dev;
2229 dev_hold(dev_out); 2371 dev_hold(dev_out);
2230 fl.oif = init_net.loopback_dev->ifindex; 2372 fl.oif = net->loopback_dev->ifindex;
2231 res.type = RTN_LOCAL; 2373 res.type = RTN_LOCAL;
2232 flags |= RTCF_LOCAL; 2374 flags |= RTCF_LOCAL;
2233 goto make_route; 2375 goto make_route;
2234 } 2376 }
2235 2377
2236 if (fib_lookup(&fl, &res)) { 2378 if (fib_lookup(net, &fl, &res)) {
2237 res.fi = NULL; 2379 res.fi = NULL;
2238 if (oldflp->oif) { 2380 if (oldflp->oif) {
2239 /* Apparently, routing tables are wrong. Assume, 2381 /* Apparently, routing tables are wrong. Assume,
@@ -2272,7 +2414,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
2272 fl.fl4_src = fl.fl4_dst; 2414 fl.fl4_src = fl.fl4_dst;
2273 if (dev_out) 2415 if (dev_out)
2274 dev_put(dev_out); 2416 dev_put(dev_out);
2275 dev_out = init_net.loopback_dev; 2417 dev_out = net->loopback_dev;
2276 dev_hold(dev_out); 2418 dev_hold(dev_out);
2277 fl.oif = dev_out->ifindex; 2419 fl.oif = dev_out->ifindex;
2278 if (res.fi) 2420 if (res.fi)
@@ -2288,7 +2430,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
2288 else 2430 else
2289#endif 2431#endif
2290 if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) 2432 if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
2291 fib_select_default(&fl, &res); 2433 fib_select_default(net, &fl, &res);
2292 2434
2293 if (!fl.fl4_src) 2435 if (!fl.fl4_src)
2294 fl.fl4_src = FIB_RES_PREFSRC(res); 2436 fl.fl4_src = FIB_RES_PREFSRC(res);
@@ -2311,7 +2453,8 @@ make_route:
2311out: return err; 2453out: return err;
2312} 2454}
2313 2455
2314int __ip_route_output_key(struct rtable **rp, const struct flowi *flp) 2456int __ip_route_output_key(struct net *net, struct rtable **rp,
2457 const struct flowi *flp)
2315{ 2458{
2316 unsigned hash; 2459 unsigned hash;
2317 struct rtable *rth; 2460 struct rtable *rth;
@@ -2327,7 +2470,8 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
2327 rth->fl.oif == flp->oif && 2470 rth->fl.oif == flp->oif &&
2328 rth->fl.mark == flp->mark && 2471 rth->fl.mark == flp->mark &&
2329 !((rth->fl.fl4_tos ^ flp->fl4_tos) & 2472 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
2330 (IPTOS_RT_MASK | RTO_ONLINK))) { 2473 (IPTOS_RT_MASK | RTO_ONLINK)) &&
2474 rth->u.dst.dev->nd_net == net) {
2331 dst_use(&rth->u.dst, jiffies); 2475 dst_use(&rth->u.dst, jiffies);
2332 RT_CACHE_STAT_INC(out_hit); 2476 RT_CACHE_STAT_INC(out_hit);
2333 rcu_read_unlock_bh(); 2477 rcu_read_unlock_bh();
@@ -2338,7 +2482,7 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
2338 } 2482 }
2339 rcu_read_unlock_bh(); 2483 rcu_read_unlock_bh();
2340 2484
2341 return ip_route_output_slow(rp, flp); 2485 return ip_route_output_slow(net, rp, flp);
2342} 2486}
2343 2487
2344EXPORT_SYMBOL_GPL(__ip_route_output_key); 2488EXPORT_SYMBOL_GPL(__ip_route_output_key);
@@ -2357,12 +2501,6 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2357}; 2501};
2358 2502
2359 2503
2360static int ipv4_blackhole_output(struct sk_buff *skb)
2361{
2362 kfree_skb(skb);
2363 return 0;
2364}
2365
2366static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock *sk) 2504static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock *sk)
2367{ 2505{
2368 struct rtable *ort = *rp; 2506 struct rtable *ort = *rp;
@@ -2374,8 +2512,8 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock
2374 2512
2375 atomic_set(&new->__refcnt, 1); 2513 atomic_set(&new->__refcnt, 1);
2376 new->__use = 1; 2514 new->__use = 1;
2377 new->input = ipv4_blackhole_output; 2515 new->input = dst_discard;
2378 new->output = ipv4_blackhole_output; 2516 new->output = dst_discard;
2379 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 2517 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
2380 2518
2381 new->dev = ort->u.dst.dev; 2519 new->dev = ort->u.dst.dev;
@@ -2406,11 +2544,12 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock
2406 return (rt ? 0 : -ENOMEM); 2544 return (rt ? 0 : -ENOMEM);
2407} 2545}
2408 2546
2409int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags) 2547int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
2548 struct sock *sk, int flags)
2410{ 2549{
2411 int err; 2550 int err;
2412 2551
2413 if ((err = __ip_route_output_key(rp, flp)) != 0) 2552 if ((err = __ip_route_output_key(net, rp, flp)) != 0)
2414 return err; 2553 return err;
2415 2554
2416 if (flp->proto) { 2555 if (flp->proto) {
@@ -2418,7 +2557,8 @@ int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk,
2418 flp->fl4_src = (*rp)->rt_src; 2557 flp->fl4_src = (*rp)->rt_src;
2419 if (!flp->fl4_dst) 2558 if (!flp->fl4_dst)
2420 flp->fl4_dst = (*rp)->rt_dst; 2559 flp->fl4_dst = (*rp)->rt_dst;
2421 err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, flags); 2560 err = __xfrm_lookup((struct dst_entry **)rp, flp, sk,
2561 flags ? XFRM_LOOKUP_WAIT : 0);
2422 if (err == -EREMOTE) 2562 if (err == -EREMOTE)
2423 err = ipv4_dst_blackhole(rp, flp, sk); 2563 err = ipv4_dst_blackhole(rp, flp, sk);
2424 2564
@@ -2430,9 +2570,9 @@ int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk,
2430 2570
2431EXPORT_SYMBOL_GPL(ip_route_output_flow); 2571EXPORT_SYMBOL_GPL(ip_route_output_flow);
2432 2572
2433int ip_route_output_key(struct rtable **rp, struct flowi *flp) 2573int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
2434{ 2574{
2435 return ip_route_output_flow(rp, flp, NULL, 0); 2575 return ip_route_output_flow(net, rp, flp, NULL, 0);
2436} 2576}
2437 2577
2438static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 2578static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
@@ -2499,8 +2639,8 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2499#ifdef CONFIG_IP_MROUTE 2639#ifdef CONFIG_IP_MROUTE
2500 __be32 dst = rt->rt_dst; 2640 __be32 dst = rt->rt_dst;
2501 2641
2502 if (MULTICAST(dst) && !LOCAL_MCAST(dst) && 2642 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2503 IPV4_DEVCONF_ALL(MC_FORWARDING)) { 2643 IPV4_DEVCONF_ALL(&init_net, MC_FORWARDING)) {
2504 int err = ipmr_get_route(skb, r, nowait); 2644 int err = ipmr_get_route(skb, r, nowait);
2505 if (err <= 0) { 2645 if (err <= 0) {
2506 if (!nowait) { 2646 if (!nowait) {
@@ -2531,6 +2671,7 @@ nla_put_failure:
2531 2671
2532static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2672static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2533{ 2673{
2674 struct net *net = in_skb->sk->sk_net;
2534 struct rtmsg *rtm; 2675 struct rtmsg *rtm;
2535 struct nlattr *tb[RTA_MAX+1]; 2676 struct nlattr *tb[RTA_MAX+1];
2536 struct rtable *rt = NULL; 2677 struct rtable *rt = NULL;
@@ -2540,6 +2681,9 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2540 int err; 2681 int err;
2541 struct sk_buff *skb; 2682 struct sk_buff *skb;
2542 2683
2684 if (net != &init_net)
2685 return -EINVAL;
2686
2543 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); 2687 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2544 if (err < 0) 2688 if (err < 0)
2545 goto errout; 2689 goto errout;
@@ -2595,7 +2739,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2595 }, 2739 },
2596 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, 2740 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
2597 }; 2741 };
2598 err = ip_route_output_key(&rt, &fl); 2742 err = ip_route_output_key(&init_net, &rt, &fl);
2599 } 2743 }
2600 2744
2601 if (err) 2745 if (err)
@@ -2610,7 +2754,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2610 if (err <= 0) 2754 if (err <= 0)
2611 goto errout_free; 2755 goto errout_free;
2612 2756
2613 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); 2757 err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
2614errout: 2758errout:
2615 return err; 2759 return err;
2616 2760
@@ -2862,51 +3006,7 @@ ctl_table ipv4_route_table[] = {
2862#endif 3006#endif
2863 3007
2864#ifdef CONFIG_NET_CLS_ROUTE 3008#ifdef CONFIG_NET_CLS_ROUTE
2865struct ip_rt_acct *ip_rt_acct; 3009struct ip_rt_acct *ip_rt_acct __read_mostly;
2866
2867/* This code sucks. But you should have seen it before! --RR */
2868
2869/* IP route accounting ptr for this logical cpu number. */
2870#define IP_RT_ACCT_CPU(i) (ip_rt_acct + i * 256)
2871
2872#ifdef CONFIG_PROC_FS
2873static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
2874 int length, int *eof, void *data)
2875{
2876 unsigned int i;
2877
2878 if ((offset & 3) || (length & 3))
2879 return -EIO;
2880
2881 if (offset >= sizeof(struct ip_rt_acct) * 256) {
2882 *eof = 1;
2883 return 0;
2884 }
2885
2886 if (offset + length >= sizeof(struct ip_rt_acct) * 256) {
2887 length = sizeof(struct ip_rt_acct) * 256 - offset;
2888 *eof = 1;
2889 }
2890
2891 offset /= sizeof(u32);
2892
2893 if (length > 0) {
2894 u32 *dst = (u32 *) buffer;
2895
2896 *start = buffer;
2897 memset(dst, 0, length);
2898
2899 for_each_possible_cpu(i) {
2900 unsigned int j;
2901 u32 *src = ((u32 *) IP_RT_ACCT_CPU(i)) + offset;
2902
2903 for (j = 0; j < length/4; j++)
2904 dst[j] += src[j];
2905 }
2906 }
2907 return length;
2908}
2909#endif /* CONFIG_PROC_FS */
2910#endif /* CONFIG_NET_CLS_ROUTE */ 3010#endif /* CONFIG_NET_CLS_ROUTE */
2911 3011
2912static __initdata unsigned long rhash_entries; 3012static __initdata unsigned long rhash_entries;
@@ -2927,16 +3027,9 @@ int __init ip_rt_init(void)
2927 (jiffies ^ (jiffies >> 7))); 3027 (jiffies ^ (jiffies >> 7)));
2928 3028
2929#ifdef CONFIG_NET_CLS_ROUTE 3029#ifdef CONFIG_NET_CLS_ROUTE
2930 { 3030 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct));
2931 int order;
2932 for (order = 0;
2933 (PAGE_SIZE << order) < 256 * sizeof(struct ip_rt_acct) * NR_CPUS; order++)
2934 /* NOTHING */;
2935 ip_rt_acct = (struct ip_rt_acct *)__get_free_pages(GFP_KERNEL, order);
2936 if (!ip_rt_acct) 3031 if (!ip_rt_acct)
2937 panic("IP: failed to allocate ip_rt_acct\n"); 3032 panic("IP: failed to allocate ip_rt_acct\n");
2938 memset(ip_rt_acct, 0, PAGE_SIZE << order);
2939 }
2940#endif 3033#endif
2941 3034
2942 ipv4_dst_ops.kmem_cachep = 3035 ipv4_dst_ops.kmem_cachep =
@@ -2964,10 +3057,8 @@ int __init ip_rt_init(void)
2964 devinet_init(); 3057 devinet_init();
2965 ip_fib_init(); 3058 ip_fib_init();
2966 3059
2967 init_timer(&rt_flush_timer); 3060 setup_timer(&rt_flush_timer, rt_run_flush, 0);
2968 rt_flush_timer.function = rt_run_flush; 3061 setup_timer(&rt_secret_timer, rt_secret_rebuild, 0);
2969 init_timer(&rt_secret_timer);
2970 rt_secret_timer.function = rt_secret_rebuild;
2971 3062
2972 /* All the timers, started at system startup tend 3063 /* All the timers, started at system startup tend
2973 to synchronize. Perturb it a bit. 3064 to synchronize. Perturb it a bit.
@@ -2979,20 +3070,8 @@ int __init ip_rt_init(void)
2979 ip_rt_secret_interval; 3070 ip_rt_secret_interval;
2980 add_timer(&rt_secret_timer); 3071 add_timer(&rt_secret_timer);
2981 3072
2982#ifdef CONFIG_PROC_FS 3073 if (ip_rt_proc_init(&init_net))
2983 { 3074 printk(KERN_ERR "Unable to create route proc files\n");
2984 struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */
2985 if (!proc_net_fops_create(&init_net, "rt_cache", S_IRUGO, &rt_cache_seq_fops) ||
2986 !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO,
2987 init_net.proc_net_stat))) {
2988 return -ENOMEM;
2989 }
2990 rtstat_pde->proc_fops = &rt_cpu_seq_fops;
2991 }
2992#ifdef CONFIG_NET_CLS_ROUTE
2993 create_proc_read_entry("rt_acct", 0, init_net.proc_net, ip_rt_acct_read, NULL);
2994#endif
2995#endif
2996#ifdef CONFIG_XFRM 3075#ifdef CONFIG_XFRM
2997 xfrm_init(); 3076 xfrm_init();
2998 xfrm4_init(); 3077 xfrm4_init();
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 2da1be0589a9..f470fe4511db 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -264,7 +264,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
264 { .sport = th->dest, 264 { .sport = th->dest,
265 .dport = th->source } } }; 265 .dport = th->source } } };
266 security_req_classify_flow(req, &fl); 266 security_req_classify_flow(req, &fl);
267 if (ip_route_output_key(&rt, &fl)) { 267 if (ip_route_output_key(&init_net, &rt, &fl)) {
268 reqsk_free(req); 268 reqsk_free(req);
269 goto out; 269 goto out;
270 } 270 }
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index bec6fe880657..82cdf23837e3 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -13,83 +13,20 @@
13#include <linux/igmp.h> 13#include <linux/igmp.h>
14#include <linux/inetdevice.h> 14#include <linux/inetdevice.h>
15#include <linux/seqlock.h> 15#include <linux/seqlock.h>
16#include <linux/init.h>
16#include <net/snmp.h> 17#include <net/snmp.h>
17#include <net/icmp.h> 18#include <net/icmp.h>
18#include <net/ip.h> 19#include <net/ip.h>
19#include <net/route.h> 20#include <net/route.h>
20#include <net/tcp.h> 21#include <net/tcp.h>
22#include <net/udp.h>
21#include <net/cipso_ipv4.h> 23#include <net/cipso_ipv4.h>
22#include <net/inet_frag.h> 24#include <net/inet_frag.h>
23 25
24/* From af_inet.c */
25extern int sysctl_ip_nonlocal_bind;
26
27#ifdef CONFIG_SYSCTL
28static int zero; 26static int zero;
29static int tcp_retr1_max = 255; 27static int tcp_retr1_max = 255;
30static int ip_local_port_range_min[] = { 1, 1 }; 28static int ip_local_port_range_min[] = { 1, 1 };
31static int ip_local_port_range_max[] = { 65535, 65535 }; 29static int ip_local_port_range_max[] = { 65535, 65535 };
32#endif
33
34struct ipv4_config ipv4_config;
35
36#ifdef CONFIG_SYSCTL
37
38static
39int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
40 void __user *buffer, size_t *lenp, loff_t *ppos)
41{
42 int val = IPV4_DEVCONF_ALL(FORWARDING);
43 int ret;
44
45 ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
46
47 if (write && IPV4_DEVCONF_ALL(FORWARDING) != val)
48 inet_forward_change();
49
50 return ret;
51}
52
53static int ipv4_sysctl_forward_strategy(ctl_table *table,
54 int __user *name, int nlen,
55 void __user *oldval, size_t __user *oldlenp,
56 void __user *newval, size_t newlen)
57{
58 int *valp = table->data;
59 int new;
60
61 if (!newval || !newlen)
62 return 0;
63
64 if (newlen != sizeof(int))
65 return -EINVAL;
66
67 if (get_user(new, (int __user *)newval))
68 return -EFAULT;
69
70 if (new == *valp)
71 return 0;
72
73 if (oldval && oldlenp) {
74 size_t len;
75
76 if (get_user(len, oldlenp))
77 return -EFAULT;
78
79 if (len) {
80 if (len > table->maxlen)
81 len = table->maxlen;
82 if (copy_to_user(oldval, valp, len))
83 return -EFAULT;
84 if (put_user(len, oldlenp))
85 return -EFAULT;
86 }
87 }
88
89 *valp = new;
90 inet_forward_change();
91 return 1;
92}
93 30
94extern seqlock_t sysctl_port_range_lock; 31extern seqlock_t sysctl_port_range_lock;
95extern int sysctl_local_port_range[2]; 32extern int sysctl_local_port_range[2];
@@ -256,7 +193,7 @@ static int strategy_allowed_congestion_control(ctl_table *table, int __user *nam
256 193
257} 194}
258 195
259ctl_table ipv4_table[] = { 196static struct ctl_table ipv4_table[] = {
260 { 197 {
261 .ctl_name = NET_IPV4_TCP_TIMESTAMPS, 198 .ctl_name = NET_IPV4_TCP_TIMESTAMPS,
262 .procname = "tcp_timestamps", 199 .procname = "tcp_timestamps",
@@ -290,15 +227,6 @@ ctl_table ipv4_table[] = {
290 .proc_handler = &proc_dointvec 227 .proc_handler = &proc_dointvec
291 }, 228 },
292 { 229 {
293 .ctl_name = NET_IPV4_FORWARD,
294 .procname = "ip_forward",
295 .data = &IPV4_DEVCONF_ALL(FORWARDING),
296 .maxlen = sizeof(int),
297 .mode = 0644,
298 .proc_handler = &ipv4_sysctl_forward,
299 .strategy = &ipv4_sysctl_forward_strategy
300 },
301 {
302 .ctl_name = NET_IPV4_DEFAULT_TTL, 230 .ctl_name = NET_IPV4_DEFAULT_TTL,
303 .procname = "ip_default_ttl", 231 .procname = "ip_default_ttl",
304 .data = &sysctl_ip_default_ttl, 232 .data = &sysctl_ip_default_ttl,
@@ -356,22 +284,6 @@ ctl_table ipv4_table[] = {
356 .proc_handler = &proc_dointvec 284 .proc_handler = &proc_dointvec
357 }, 285 },
358 { 286 {
359 .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH,
360 .procname = "ipfrag_high_thresh",
361 .data = &ip4_frags_ctl.high_thresh,
362 .maxlen = sizeof(int),
363 .mode = 0644,
364 .proc_handler = &proc_dointvec
365 },
366 {
367 .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH,
368 .procname = "ipfrag_low_thresh",
369 .data = &ip4_frags_ctl.low_thresh,
370 .maxlen = sizeof(int),
371 .mode = 0644,
372 .proc_handler = &proc_dointvec
373 },
374 {
375 .ctl_name = NET_IPV4_DYNADDR, 287 .ctl_name = NET_IPV4_DYNADDR,
376 .procname = "ip_dynaddr", 288 .procname = "ip_dynaddr",
377 .data = &sysctl_ip_dynaddr, 289 .data = &sysctl_ip_dynaddr,
@@ -380,15 +292,6 @@ ctl_table ipv4_table[] = {
380 .proc_handler = &proc_dointvec 292 .proc_handler = &proc_dointvec
381 }, 293 },
382 { 294 {
383 .ctl_name = NET_IPV4_IPFRAG_TIME,
384 .procname = "ipfrag_time",
385 .data = &ip4_frags_ctl.timeout,
386 .maxlen = sizeof(int),
387 .mode = 0644,
388 .proc_handler = &proc_dointvec_jiffies,
389 .strategy = &sysctl_jiffies
390 },
391 {
392 .ctl_name = NET_IPV4_TCP_KEEPALIVE_TIME, 295 .ctl_name = NET_IPV4_TCP_KEEPALIVE_TIME,
393 .procname = "tcp_keepalive_time", 296 .procname = "tcp_keepalive_time",
394 .data = &sysctl_tcp_keepalive_time, 297 .data = &sysctl_tcp_keepalive_time,
@@ -731,23 +634,6 @@ ctl_table ipv4_table[] = {
731 .proc_handler = &proc_dointvec 634 .proc_handler = &proc_dointvec
732 }, 635 },
733 { 636 {
734 .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL,
735 .procname = "ipfrag_secret_interval",
736 .data = &ip4_frags_ctl.secret_interval,
737 .maxlen = sizeof(int),
738 .mode = 0644,
739 .proc_handler = &proc_dointvec_jiffies,
740 .strategy = &sysctl_jiffies
741 },
742 {
743 .procname = "ipfrag_max_dist",
744 .data = &sysctl_ipfrag_max_dist,
745 .maxlen = sizeof(int),
746 .mode = 0644,
747 .proc_handler = &proc_dointvec_minmax,
748 .extra1 = &zero
749 },
750 {
751 .ctl_name = NET_TCP_NO_METRICS_SAVE, 637 .ctl_name = NET_TCP_NO_METRICS_SAVE,
752 .procname = "tcp_no_metrics_save", 638 .procname = "tcp_no_metrics_save",
753 .data = &sysctl_tcp_nometrics_save, 639 .data = &sysctl_tcp_nometrics_save,
@@ -885,9 +771,52 @@ ctl_table ipv4_table[] = {
885 .mode = 0644, 771 .mode = 0644,
886 .proc_handler = &proc_dointvec, 772 .proc_handler = &proc_dointvec,
887 }, 773 },
774 {
775 .ctl_name = CTL_UNNUMBERED,
776 .procname = "udp_mem",
777 .data = &sysctl_udp_mem,
778 .maxlen = sizeof(sysctl_udp_mem),
779 .mode = 0644,
780 .proc_handler = &proc_dointvec_minmax,
781 .strategy = &sysctl_intvec,
782 .extra1 = &zero
783 },
784 {
785 .ctl_name = CTL_UNNUMBERED,
786 .procname = "udp_rmem_min",
787 .data = &sysctl_udp_rmem_min,
788 .maxlen = sizeof(sysctl_udp_rmem_min),
789 .mode = 0644,
790 .proc_handler = &proc_dointvec_minmax,
791 .strategy = &sysctl_intvec,
792 .extra1 = &zero
793 },
794 {
795 .ctl_name = CTL_UNNUMBERED,
796 .procname = "udp_wmem_min",
797 .data = &sysctl_udp_wmem_min,
798 .maxlen = sizeof(sysctl_udp_wmem_min),
799 .mode = 0644,
800 .proc_handler = &proc_dointvec_minmax,
801 .strategy = &sysctl_intvec,
802 .extra1 = &zero
803 },
888 { .ctl_name = 0 } 804 { .ctl_name = 0 }
889}; 805};
890 806
891#endif /* CONFIG_SYSCTL */ 807struct ctl_path net_ipv4_ctl_path[] = {
808 { .procname = "net", .ctl_name = CTL_NET, },
809 { .procname = "ipv4", .ctl_name = NET_IPV4, },
810 { },
811};
812EXPORT_SYMBOL_GPL(net_ipv4_ctl_path);
813
814static __init int sysctl_ipv4_init(void)
815{
816 struct ctl_table_header *hdr;
817
818 hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table);
819 return hdr == NULL ? -ENOMEM : 0;
820}
892 821
893EXPORT_SYMBOL(ipv4_config); 822__initcall(sysctl_ipv4_init);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8e65182f7af1..a0d373bd9065 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -254,6 +254,10 @@
254#include <linux/poll.h> 254#include <linux/poll.h>
255#include <linux/init.h> 255#include <linux/init.h>
256#include <linux/fs.h> 256#include <linux/fs.h>
257#include <linux/skbuff.h>
258#include <linux/splice.h>
259#include <linux/net.h>
260#include <linux/socket.h>
257#include <linux/random.h> 261#include <linux/random.h>
258#include <linux/bootmem.h> 262#include <linux/bootmem.h>
259#include <linux/cache.h> 263#include <linux/cache.h>
@@ -265,6 +269,7 @@
265#include <net/xfrm.h> 269#include <net/xfrm.h>
266#include <net/ip.h> 270#include <net/ip.h>
267#include <net/netdma.h> 271#include <net/netdma.h>
272#include <net/sock.h>
268 273
269#include <asm/uaccess.h> 274#include <asm/uaccess.h>
270#include <asm/ioctls.h> 275#include <asm/ioctls.h>
@@ -292,9 +297,18 @@ EXPORT_SYMBOL(tcp_memory_allocated);
292EXPORT_SYMBOL(tcp_sockets_allocated); 297EXPORT_SYMBOL(tcp_sockets_allocated);
293 298
294/* 299/*
300 * TCP splice context
301 */
302struct tcp_splice_state {
303 struct pipe_inode_info *pipe;
304 size_t len;
305 unsigned int flags;
306};
307
308/*
295 * Pressure flag: try to collapse. 309 * Pressure flag: try to collapse.
296 * Technical note: it is used by multiple contexts non atomically. 310 * Technical note: it is used by multiple contexts non atomically.
297 * All the sk_stream_mem_schedule() is of this nature: accounting 311 * All the __sk_mem_schedule() is of this nature: accounting
298 * is strict, actions are advisory and have some latency. 312 * is strict, actions are advisory and have some latency.
299 */ 313 */
300int tcp_memory_pressure __read_mostly; 314int tcp_memory_pressure __read_mostly;
@@ -471,7 +485,8 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
471 tcb->sacked = 0; 485 tcb->sacked = 0;
472 skb_header_release(skb); 486 skb_header_release(skb);
473 tcp_add_write_queue_tail(sk, skb); 487 tcp_add_write_queue_tail(sk, skb);
474 sk_charge_skb(sk, skb); 488 sk->sk_wmem_queued += skb->truesize;
489 sk_mem_charge(sk, skb->truesize);
475 if (tp->nonagle & TCP_NAGLE_PUSH) 490 if (tp->nonagle & TCP_NAGLE_PUSH)
476 tp->nonagle &= ~TCP_NAGLE_PUSH; 491 tp->nonagle &= ~TCP_NAGLE_PUSH;
477} 492}
@@ -482,7 +497,6 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags,
482 if (flags & MSG_OOB) { 497 if (flags & MSG_OOB) {
483 tp->urg_mode = 1; 498 tp->urg_mode = 1;
484 tp->snd_up = tp->write_seq; 499 tp->snd_up = tp->write_seq;
485 TCP_SKB_CB(skb)->sacked |= TCPCB_URG;
486 } 500 }
487} 501}
488 502
@@ -501,6 +515,145 @@ static inline void tcp_push(struct sock *sk, int flags, int mss_now,
501 } 515 }
502} 516}
503 517
518static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
519 unsigned int offset, size_t len)
520{
521 struct tcp_splice_state *tss = rd_desc->arg.data;
522
523 return skb_splice_bits(skb, offset, tss->pipe, tss->len, tss->flags);
524}
525
526static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss)
527{
528 /* Store TCP splice context information in read_descriptor_t. */
529 read_descriptor_t rd_desc = {
530 .arg.data = tss,
531 };
532
533 return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv);
534}
535
536/**
537 * tcp_splice_read - splice data from TCP socket to a pipe
538 * @sock: socket to splice from
539 * @ppos: position (not valid)
540 * @pipe: pipe to splice to
541 * @len: number of bytes to splice
542 * @flags: splice modifier flags
543 *
544 * Description:
545 * Will read pages from given socket and fill them into a pipe.
546 *
547 **/
548ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
549 struct pipe_inode_info *pipe, size_t len,
550 unsigned int flags)
551{
552 struct sock *sk = sock->sk;
553 struct tcp_splice_state tss = {
554 .pipe = pipe,
555 .len = len,
556 .flags = flags,
557 };
558 long timeo;
559 ssize_t spliced;
560 int ret;
561
562 /*
563 * We can't seek on a socket input
564 */
565 if (unlikely(*ppos))
566 return -ESPIPE;
567
568 ret = spliced = 0;
569
570 lock_sock(sk);
571
572 timeo = sock_rcvtimeo(sk, flags & SPLICE_F_NONBLOCK);
573 while (tss.len) {
574 ret = __tcp_splice_read(sk, &tss);
575 if (ret < 0)
576 break;
577 else if (!ret) {
578 if (spliced)
579 break;
580 if (flags & SPLICE_F_NONBLOCK) {
581 ret = -EAGAIN;
582 break;
583 }
584 if (sock_flag(sk, SOCK_DONE))
585 break;
586 if (sk->sk_err) {
587 ret = sock_error(sk);
588 break;
589 }
590 if (sk->sk_shutdown & RCV_SHUTDOWN)
591 break;
592 if (sk->sk_state == TCP_CLOSE) {
593 /*
594 * This occurs when user tries to read
595 * from never connected socket.
596 */
597 if (!sock_flag(sk, SOCK_DONE))
598 ret = -ENOTCONN;
599 break;
600 }
601 if (!timeo) {
602 ret = -EAGAIN;
603 break;
604 }
605 sk_wait_data(sk, &timeo);
606 if (signal_pending(current)) {
607 ret = sock_intr_errno(timeo);
608 break;
609 }
610 continue;
611 }
612 tss.len -= ret;
613 spliced += ret;
614
615 release_sock(sk);
616 lock_sock(sk);
617
618 if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
619 (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo ||
620 signal_pending(current))
621 break;
622 }
623
624 release_sock(sk);
625
626 if (spliced)
627 return spliced;
628
629 return ret;
630}
631
632struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
633{
634 struct sk_buff *skb;
635
636 /* The TCP header must be at least 32-bit aligned. */
637 size = ALIGN(size, 4);
638
639 skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
640 if (skb) {
641 if (sk_wmem_schedule(sk, skb->truesize)) {
642 /*
643 * Make sure that we have exactly size bytes
644 * available to the caller, no more, no less.
645 */
646 skb_reserve(skb, skb_tailroom(skb) - size);
647 return skb;
648 }
649 __kfree_skb(skb);
650 } else {
651 sk->sk_prot->enter_memory_pressure();
652 sk_stream_moderate_sndbuf(sk);
653 }
654 return NULL;
655}
656
504static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, 657static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
505 size_t psize, int flags) 658 size_t psize, int flags)
506{ 659{
@@ -537,8 +690,7 @@ new_segment:
537 if (!sk_stream_memory_free(sk)) 690 if (!sk_stream_memory_free(sk))
538 goto wait_for_sndbuf; 691 goto wait_for_sndbuf;
539 692
540 skb = sk_stream_alloc_pskb(sk, 0, 0, 693 skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
541 sk->sk_allocation);
542 if (!skb) 694 if (!skb)
543 goto wait_for_memory; 695 goto wait_for_memory;
544 696
@@ -555,7 +707,7 @@ new_segment:
555 tcp_mark_push(tp, skb); 707 tcp_mark_push(tp, skb);
556 goto new_segment; 708 goto new_segment;
557 } 709 }
558 if (!sk_stream_wmem_schedule(sk, copy)) 710 if (!sk_wmem_schedule(sk, copy))
559 goto wait_for_memory; 711 goto wait_for_memory;
560 712
561 if (can_coalesce) { 713 if (can_coalesce) {
@@ -569,7 +721,7 @@ new_segment:
569 skb->data_len += copy; 721 skb->data_len += copy;
570 skb->truesize += copy; 722 skb->truesize += copy;
571 sk->sk_wmem_queued += copy; 723 sk->sk_wmem_queued += copy;
572 sk->sk_forward_alloc -= copy; 724 sk_mem_charge(sk, copy);
573 skb->ip_summed = CHECKSUM_PARTIAL; 725 skb->ip_summed = CHECKSUM_PARTIAL;
574 tp->write_seq += copy; 726 tp->write_seq += copy;
575 TCP_SKB_CB(skb)->end_seq += copy; 727 TCP_SKB_CB(skb)->end_seq += copy;
@@ -718,8 +870,8 @@ new_segment:
718 if (!sk_stream_memory_free(sk)) 870 if (!sk_stream_memory_free(sk))
719 goto wait_for_sndbuf; 871 goto wait_for_sndbuf;
720 872
721 skb = sk_stream_alloc_pskb(sk, select_size(sk), 873 skb = sk_stream_alloc_skb(sk, select_size(sk),
722 0, sk->sk_allocation); 874 sk->sk_allocation);
723 if (!skb) 875 if (!skb)
724 goto wait_for_memory; 876 goto wait_for_memory;
725 877
@@ -776,7 +928,7 @@ new_segment:
776 if (copy > PAGE_SIZE - off) 928 if (copy > PAGE_SIZE - off)
777 copy = PAGE_SIZE - off; 929 copy = PAGE_SIZE - off;
778 930
779 if (!sk_stream_wmem_schedule(sk, copy)) 931 if (!sk_wmem_schedule(sk, copy))
780 goto wait_for_memory; 932 goto wait_for_memory;
781 933
782 if (!page) { 934 if (!page) {
@@ -867,7 +1019,7 @@ do_fault:
867 * reset, where we can be unlinking the send_head. 1019 * reset, where we can be unlinking the send_head.
868 */ 1020 */
869 tcp_check_send_head(sk, skb); 1021 tcp_check_send_head(sk, skb);
870 sk_stream_free_skb(sk, skb); 1022 sk_wmem_free_skb(sk, skb);
871 } 1023 }
872 1024
873do_error: 1025do_error:
@@ -1500,6 +1652,41 @@ recv_urg:
1500 goto out; 1652 goto out;
1501} 1653}
1502 1654
1655void tcp_set_state(struct sock *sk, int state)
1656{
1657 int oldstate = sk->sk_state;
1658
1659 switch (state) {
1660 case TCP_ESTABLISHED:
1661 if (oldstate != TCP_ESTABLISHED)
1662 TCP_INC_STATS(TCP_MIB_CURRESTAB);
1663 break;
1664
1665 case TCP_CLOSE:
1666 if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
1667 TCP_INC_STATS(TCP_MIB_ESTABRESETS);
1668
1669 sk->sk_prot->unhash(sk);
1670 if (inet_csk(sk)->icsk_bind_hash &&
1671 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
1672 inet_put_port(&tcp_hashinfo, sk);
1673 /* fall through */
1674 default:
1675 if (oldstate==TCP_ESTABLISHED)
1676 TCP_DEC_STATS(TCP_MIB_CURRESTAB);
1677 }
1678
1679 /* Change state AFTER socket is unhashed to avoid closed
1680 * socket sitting in hash tables.
1681 */
1682 sk->sk_state = state;
1683
1684#ifdef STATE_TRACE
1685 SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n",sk, statename[oldstate],statename[state]);
1686#endif
1687}
1688EXPORT_SYMBOL_GPL(tcp_set_state);
1689
1503/* 1690/*
1504 * State processing on a close. This implements the state shift for 1691 * State processing on a close. This implements the state shift for
1505 * sending our FIN frame. Note that we only send a FIN for some 1692 * sending our FIN frame. Note that we only send a FIN for some
@@ -1586,7 +1773,7 @@ void tcp_close(struct sock *sk, long timeout)
1586 __kfree_skb(skb); 1773 __kfree_skb(skb);
1587 } 1774 }
1588 1775
1589 sk_stream_mem_reclaim(sk); 1776 sk_mem_reclaim(sk);
1590 1777
1591 /* As outlined in RFC 2525, section 2.17, we send a RST here because 1778 /* As outlined in RFC 2525, section 2.17, we send a RST here because
1592 * data was lost. To witness the awful effects of the old behavior of 1779 * data was lost. To witness the awful effects of the old behavior of
@@ -1689,7 +1876,7 @@ adjudge_to_death:
1689 } 1876 }
1690 } 1877 }
1691 if (sk->sk_state != TCP_CLOSE) { 1878 if (sk->sk_state != TCP_CLOSE) {
1692 sk_stream_mem_reclaim(sk); 1879 sk_mem_reclaim(sk);
1693 if (tcp_too_many_orphans(sk, 1880 if (tcp_too_many_orphans(sk,
1694 atomic_read(sk->sk_prot->orphan_count))) { 1881 atomic_read(sk->sk_prot->orphan_count))) {
1695 if (net_ratelimit()) 1882 if (net_ratelimit())
@@ -2411,7 +2598,6 @@ void tcp_done(struct sock *sk)
2411} 2598}
2412EXPORT_SYMBOL_GPL(tcp_done); 2599EXPORT_SYMBOL_GPL(tcp_done);
2413 2600
2414extern void __skb_cb_too_small_for_tcp(int, int);
2415extern struct tcp_congestion_ops tcp_reno; 2601extern struct tcp_congestion_ops tcp_reno;
2416 2602
2417static __initdata unsigned long thash_entries; 2603static __initdata unsigned long thash_entries;
@@ -2430,9 +2616,7 @@ void __init tcp_init(void)
2430 unsigned long limit; 2616 unsigned long limit;
2431 int order, i, max_share; 2617 int order, i, max_share;
2432 2618
2433 if (sizeof(struct tcp_skb_cb) > sizeof(skb->cb)) 2619 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
2434 __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb),
2435 sizeof(skb->cb));
2436 2620
2437 tcp_hashinfo.bind_bucket_cachep = 2621 tcp_hashinfo.bind_bucket_cachep =
2438 kmem_cache_create("tcp_bind_bucket", 2622 kmem_cache_create("tcp_bind_bucket",
@@ -2509,11 +2693,11 @@ void __init tcp_init(void)
2509 limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7); 2693 limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7);
2510 max_share = min(4UL*1024*1024, limit); 2694 max_share = min(4UL*1024*1024, limit);
2511 2695
2512 sysctl_tcp_wmem[0] = SK_STREAM_MEM_QUANTUM; 2696 sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
2513 sysctl_tcp_wmem[1] = 16*1024; 2697 sysctl_tcp_wmem[1] = 16*1024;
2514 sysctl_tcp_wmem[2] = max(64*1024, max_share); 2698 sysctl_tcp_wmem[2] = max(64*1024, max_share);
2515 2699
2516 sysctl_tcp_rmem[0] = SK_STREAM_MEM_QUANTUM; 2700 sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
2517 sysctl_tcp_rmem[1] = 87380; 2701 sysctl_tcp_rmem[1] = 87380;
2518 sysctl_tcp_rmem[2] = max(87380, max_share); 2702 sysctl_tcp_rmem[2] = max(87380, max_share);
2519 2703
@@ -2532,6 +2716,7 @@ EXPORT_SYMBOL(tcp_poll);
2532EXPORT_SYMBOL(tcp_read_sock); 2716EXPORT_SYMBOL(tcp_read_sock);
2533EXPORT_SYMBOL(tcp_recvmsg); 2717EXPORT_SYMBOL(tcp_recvmsg);
2534EXPORT_SYMBOL(tcp_sendmsg); 2718EXPORT_SYMBOL(tcp_sendmsg);
2719EXPORT_SYMBOL(tcp_splice_read);
2535EXPORT_SYMBOL(tcp_sendpage); 2720EXPORT_SYMBOL(tcp_sendpage);
2536EXPORT_SYMBOL(tcp_setsockopt); 2721EXPORT_SYMBOL(tcp_setsockopt);
2537EXPORT_SYMBOL(tcp_shutdown); 2722EXPORT_SYMBOL(tcp_shutdown);
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 5dba0fc8f579..5212ed9b0c98 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -136,8 +136,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
136 ca->cnt = 1; 136 ca->cnt = 1;
137} 137}
138 138
139static void bictcp_cong_avoid(struct sock *sk, u32 ack, 139static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
140 u32 in_flight, int data_acked)
141{ 140{
142 struct tcp_sock *tp = tcp_sk(sk); 141 struct tcp_sock *tp = tcp_sk(sk);
143 struct bictcp *ca = inet_csk_ca(sk); 142 struct bictcp *ca = inet_csk_ca(sk);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 55fca1820c34..3a6be23d222f 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -274,6 +274,27 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
274 return err; 274 return err;
275} 275}
276 276
277/* RFC2861 Check whether we are limited by application or congestion window
278 * This is the inverse of cwnd check in tcp_tso_should_defer
279 */
280int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
281{
282 const struct tcp_sock *tp = tcp_sk(sk);
283 u32 left;
284
285 if (in_flight >= tp->snd_cwnd)
286 return 1;
287
288 if (!sk_can_gso(sk))
289 return 0;
290
291 left = tp->snd_cwnd - in_flight;
292 if (sysctl_tcp_tso_win_divisor)
293 return left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd;
294 else
295 return left <= tcp_max_burst(tp);
296}
297EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited);
277 298
278/* 299/*
279 * Slow start is used when congestion window is less than slow start 300 * Slow start is used when congestion window is less than slow start
@@ -324,7 +345,7 @@ EXPORT_SYMBOL_GPL(tcp_slow_start);
324/* This is Jacobson's slow start and congestion avoidance. 345/* This is Jacobson's slow start and congestion avoidance.
325 * SIGCOMM '88, p. 328. 346 * SIGCOMM '88, p. 328.
326 */ 347 */
327void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag) 348void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
328{ 349{
329 struct tcp_sock *tp = tcp_sk(sk); 350 struct tcp_sock *tp = tcp_sk(sk);
330 351
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 80bd084a9f91..3aa0b23c1ea0 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -246,8 +246,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
246 ca->cnt = 1; 246 ca->cnt = 1;
247} 247}
248 248
249static void bictcp_cong_avoid(struct sock *sk, u32 ack, 249static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
250 u32 in_flight, int data_acked)
251{ 250{
252 struct tcp_sock *tp = tcp_sk(sk); 251 struct tcp_sock *tp = tcp_sk(sk);
253 struct bictcp *ca = inet_csk_ca(sk); 252 struct bictcp *ca = inet_csk_ca(sk);
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 14a073d8b60f..8b6caaf75bb9 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -109,8 +109,7 @@ static void hstcp_init(struct sock *sk)
109 tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); 109 tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
110} 110}
111 111
112static void hstcp_cong_avoid(struct sock *sk, u32 adk, 112static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 in_flight)
113 u32 in_flight, int data_acked)
114{ 113{
115 struct tcp_sock *tp = tcp_sk(sk); 114 struct tcp_sock *tp = tcp_sk(sk);
116 struct hstcp *ca = inet_csk_ca(sk); 115 struct hstcp *ca = inet_csk_ca(sk);
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 5215691f2760..af99776146ff 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -225,8 +225,7 @@ static u32 htcp_recalc_ssthresh(struct sock *sk)
225 return max((tp->snd_cwnd * ca->beta) >> 7, 2U); 225 return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
226} 226}
227 227
228static void htcp_cong_avoid(struct sock *sk, u32 ack, 228static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
229 u32 in_flight, int data_acked)
230{ 229{
231 struct tcp_sock *tp = tcp_sk(sk); 230 struct tcp_sock *tp = tcp_sk(sk);
232 struct htcp *ca = inet_csk_ca(sk); 231 struct htcp *ca = inet_csk_ca(sk);
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index b3e55cf56171..44618b675916 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -85,8 +85,7 @@ static inline u32 hybla_fraction(u32 odds)
85 * o Give cwnd a new value based on the model proposed 85 * o Give cwnd a new value based on the model proposed
86 * o remember increments <1 86 * o remember increments <1
87 */ 87 */
88static void hybla_cong_avoid(struct sock *sk, u32 ack, 88static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
89 u32 in_flight, int flag)
90{ 89{
91 struct tcp_sock *tp = tcp_sk(sk); 90 struct tcp_sock *tp = tcp_sk(sk);
92 struct hybla *ca = inet_csk_ca(sk); 91 struct hybla *ca = inet_csk_ca(sk);
@@ -103,7 +102,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack,
103 return; 102 return;
104 103
105 if (!ca->hybla_en) 104 if (!ca->hybla_en)
106 return tcp_reno_cong_avoid(sk, ack, in_flight, flag); 105 return tcp_reno_cong_avoid(sk, ack, in_flight);
107 106
108 if (ca->rho == 0) 107 if (ca->rho == 0)
109 hybla_recalc_param(sk); 108 hybla_recalc_param(sk);
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 5aa5f5496d6d..1eba160b72dc 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -256,8 +256,7 @@ static void tcp_illinois_state(struct sock *sk, u8 new_state)
256/* 256/*
257 * Increase window in response to successful acknowledgment. 257 * Increase window in response to successful acknowledgment.
258 */ 258 */
259static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, 259static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
260 u32 in_flight, int flag)
261{ 260{
262 struct tcp_sock *tp = tcp_sk(sk); 261 struct tcp_sock *tp = tcp_sk(sk);
263 struct illinois *ca = inet_csk_ca(sk); 262 struct illinois *ca = inet_csk_ca(sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b39f0d86e44c..fa2c85ca5bc3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -105,6 +105,7 @@ int sysctl_tcp_abc __read_mostly;
105#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ 105#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
106#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ 106#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */
107#define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */ 107#define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */
108#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
108 109
109#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) 110#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
110#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) 111#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -120,8 +121,7 @@ int sysctl_tcp_abc __read_mostly;
120/* Adapt the MSS value used to make delayed ack decision to the 121/* Adapt the MSS value used to make delayed ack decision to the
121 * real world. 122 * real world.
122 */ 123 */
123static void tcp_measure_rcv_mss(struct sock *sk, 124static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
124 const struct sk_buff *skb)
125{ 125{
126 struct inet_connection_sock *icsk = inet_csk(sk); 126 struct inet_connection_sock *icsk = inet_csk(sk);
127 const unsigned int lss = icsk->icsk_ack.last_seg_size; 127 const unsigned int lss = icsk->icsk_ack.last_seg_size;
@@ -132,7 +132,7 @@ static void tcp_measure_rcv_mss(struct sock *sk,
132 /* skb->len may jitter because of SACKs, even if peer 132 /* skb->len may jitter because of SACKs, even if peer
133 * sends good full-sized frames. 133 * sends good full-sized frames.
134 */ 134 */
135 len = skb_shinfo(skb)->gso_size ?: skb->len; 135 len = skb_shinfo(skb)->gso_size ? : skb->len;
136 if (len >= icsk->icsk_ack.rcv_mss) { 136 if (len >= icsk->icsk_ack.rcv_mss) {
137 icsk->icsk_ack.rcv_mss = len; 137 icsk->icsk_ack.rcv_mss = len;
138 } else { 138 } else {
@@ -172,8 +172,8 @@ static void tcp_incr_quickack(struct sock *sk)
172 struct inet_connection_sock *icsk = inet_csk(sk); 172 struct inet_connection_sock *icsk = inet_csk(sk);
173 unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss); 173 unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
174 174
175 if (quickacks==0) 175 if (quickacks == 0)
176 quickacks=2; 176 quickacks = 2;
177 if (quickacks > icsk->icsk_ack.quick) 177 if (quickacks > icsk->icsk_ack.quick)
178 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); 178 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
179} 179}
@@ -198,7 +198,7 @@ static inline int tcp_in_quickack_mode(const struct sock *sk)
198 198
199static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp) 199static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
200{ 200{
201 if (tp->ecn_flags&TCP_ECN_OK) 201 if (tp->ecn_flags & TCP_ECN_OK)
202 tp->ecn_flags |= TCP_ECN_QUEUE_CWR; 202 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
203} 203}
204 204
@@ -215,7 +215,7 @@ static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
215 215
216static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb) 216static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
217{ 217{
218 if (tp->ecn_flags&TCP_ECN_OK) { 218 if (tp->ecn_flags & TCP_ECN_OK) {
219 if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags)) 219 if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags))
220 tp->ecn_flags |= TCP_ECN_DEMAND_CWR; 220 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
221 /* Funny extension: if ECT is not set on a segment, 221 /* Funny extension: if ECT is not set on a segment,
@@ -228,19 +228,19 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
228 228
229static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th) 229static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th)
230{ 230{
231 if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || th->cwr)) 231 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
232 tp->ecn_flags &= ~TCP_ECN_OK; 232 tp->ecn_flags &= ~TCP_ECN_OK;
233} 233}
234 234
235static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th) 235static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th)
236{ 236{
237 if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || !th->cwr)) 237 if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
238 tp->ecn_flags &= ~TCP_ECN_OK; 238 tp->ecn_flags &= ~TCP_ECN_OK;
239} 239}
240 240
241static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) 241static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
242{ 242{
243 if (th->ece && !th->syn && (tp->ecn_flags&TCP_ECN_OK)) 243 if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
244 return 1; 244 return 1;
245 return 0; 245 return 0;
246} 246}
@@ -289,8 +289,8 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
289{ 289{
290 struct tcp_sock *tp = tcp_sk(sk); 290 struct tcp_sock *tp = tcp_sk(sk);
291 /* Optimize this! */ 291 /* Optimize this! */
292 int truesize = tcp_win_from_space(skb->truesize)/2; 292 int truesize = tcp_win_from_space(skb->truesize) >> 1;
293 int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2; 293 int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
294 294
295 while (tp->rcv_ssthresh <= window) { 295 while (tp->rcv_ssthresh <= window) {
296 if (truesize <= skb->len) 296 if (truesize <= skb->len)
@@ -302,8 +302,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
302 return 0; 302 return 0;
303} 303}
304 304
305static void tcp_grow_window(struct sock *sk, 305static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
306 struct sk_buff *skb)
307{ 306{
308 struct tcp_sock *tp = tcp_sk(sk); 307 struct tcp_sock *tp = tcp_sk(sk);
309 308
@@ -317,12 +316,13 @@ static void tcp_grow_window(struct sock *sk,
317 * will fit to rcvbuf in future. 316 * will fit to rcvbuf in future.
318 */ 317 */
319 if (tcp_win_from_space(skb->truesize) <= skb->len) 318 if (tcp_win_from_space(skb->truesize) <= skb->len)
320 incr = 2*tp->advmss; 319 incr = 2 * tp->advmss;
321 else 320 else
322 incr = __tcp_grow_window(sk, skb); 321 incr = __tcp_grow_window(sk, skb);
323 322
324 if (incr) { 323 if (incr) {
325 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp); 324 tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
325 tp->window_clamp);
326 inet_csk(sk)->icsk_ack.quick |= 1; 326 inet_csk(sk)->icsk_ack.quick |= 1;
327 } 327 }
328 } 328 }
@@ -397,10 +397,9 @@ static void tcp_clamp_window(struct sock *sk)
397 sysctl_tcp_rmem[2]); 397 sysctl_tcp_rmem[2]);
398 } 398 }
399 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) 399 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
400 tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); 400 tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
401} 401}
402 402
403
404/* Initialize RCV_MSS value. 403/* Initialize RCV_MSS value.
405 * RCV_MSS is an our guess about MSS used by the peer. 404 * RCV_MSS is an our guess about MSS used by the peer.
406 * We haven't any direct information about the MSS. 405 * We haven't any direct information about the MSS.
@@ -413,7 +412,7 @@ void tcp_initialize_rcv_mss(struct sock *sk)
413 struct tcp_sock *tp = tcp_sk(sk); 412 struct tcp_sock *tp = tcp_sk(sk);
414 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); 413 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
415 414
416 hint = min(hint, tp->rcv_wnd/2); 415 hint = min(hint, tp->rcv_wnd / 2);
417 hint = min(hint, TCP_MIN_RCVMSS); 416 hint = min(hint, TCP_MIN_RCVMSS);
418 hint = max(hint, TCP_MIN_MSS); 417 hint = max(hint, TCP_MIN_MSS);
419 418
@@ -470,16 +469,15 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
470 goto new_measure; 469 goto new_measure;
471 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq)) 470 if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
472 return; 471 return;
473 tcp_rcv_rtt_update(tp, 472 tcp_rcv_rtt_update(tp, jiffies - tp->rcv_rtt_est.time, 1);
474 jiffies - tp->rcv_rtt_est.time,
475 1);
476 473
477new_measure: 474new_measure:
478 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd; 475 tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
479 tp->rcv_rtt_est.time = tcp_time_stamp; 476 tp->rcv_rtt_est.time = tcp_time_stamp;
480} 477}
481 478
482static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, const struct sk_buff *skb) 479static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
480 const struct sk_buff *skb)
483{ 481{
484 struct tcp_sock *tp = tcp_sk(sk); 482 struct tcp_sock *tp = tcp_sk(sk);
485 if (tp->rx_opt.rcv_tsecr && 483 if (tp->rx_opt.rcv_tsecr &&
@@ -502,8 +500,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
502 goto new_measure; 500 goto new_measure;
503 501
504 time = tcp_time_stamp - tp->rcvq_space.time; 502 time = tcp_time_stamp - tp->rcvq_space.time;
505 if (time < (tp->rcv_rtt_est.rtt >> 3) || 503 if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
506 tp->rcv_rtt_est.rtt == 0)
507 return; 504 return;
508 505
509 space = 2 * (tp->copied_seq - tp->rcvq_space.seq); 506 space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
@@ -579,7 +576,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
579 } else { 576 } else {
580 int m = now - icsk->icsk_ack.lrcvtime; 577 int m = now - icsk->icsk_ack.lrcvtime;
581 578
582 if (m <= TCP_ATO_MIN/2) { 579 if (m <= TCP_ATO_MIN / 2) {
583 /* The fastest case is the first. */ 580 /* The fastest case is the first. */
584 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2; 581 icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
585 } else if (m < icsk->icsk_ack.ato) { 582 } else if (m < icsk->icsk_ack.ato) {
@@ -591,7 +588,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
591 * restart window, so that we send ACKs quickly. 588 * restart window, so that we send ACKs quickly.
592 */ 589 */
593 tcp_incr_quickack(sk); 590 tcp_incr_quickack(sk);
594 sk_stream_mem_reclaim(sk); 591 sk_mem_reclaim(sk);
595 } 592 }
596 } 593 }
597 icsk->icsk_ack.lrcvtime = now; 594 icsk->icsk_ack.lrcvtime = now;
@@ -608,7 +605,7 @@ static u32 tcp_rto_min(struct sock *sk)
608 u32 rto_min = TCP_RTO_MIN; 605 u32 rto_min = TCP_RTO_MIN;
609 606
610 if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) 607 if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
611 rto_min = dst->metrics[RTAX_RTO_MIN-1]; 608 rto_min = dst->metrics[RTAX_RTO_MIN - 1];
612 return rto_min; 609 return rto_min;
613} 610}
614 611
@@ -671,14 +668,14 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
671 } 668 }
672 if (after(tp->snd_una, tp->rtt_seq)) { 669 if (after(tp->snd_una, tp->rtt_seq)) {
673 if (tp->mdev_max < tp->rttvar) 670 if (tp->mdev_max < tp->rttvar)
674 tp->rttvar -= (tp->rttvar-tp->mdev_max)>>2; 671 tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2;
675 tp->rtt_seq = tp->snd_nxt; 672 tp->rtt_seq = tp->snd_nxt;
676 tp->mdev_max = tcp_rto_min(sk); 673 tp->mdev_max = tcp_rto_min(sk);
677 } 674 }
678 } else { 675 } else {
679 /* no previous measure. */ 676 /* no previous measure. */
680 tp->srtt = m<<3; /* take the measured time to be rtt */ 677 tp->srtt = m << 3; /* take the measured time to be rtt */
681 tp->mdev = m<<1; /* make sure rto = 3*rtt */ 678 tp->mdev = m << 1; /* make sure rto = 3*rtt */
682 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); 679 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
683 tp->rtt_seq = tp->snd_nxt; 680 tp->rtt_seq = tp->snd_nxt;
684 } 681 }
@@ -732,7 +729,7 @@ void tcp_update_metrics(struct sock *sk)
732 729
733 dst_confirm(dst); 730 dst_confirm(dst);
734 731
735 if (dst && (dst->flags&DST_HOST)) { 732 if (dst && (dst->flags & DST_HOST)) {
736 const struct inet_connection_sock *icsk = inet_csk(sk); 733 const struct inet_connection_sock *icsk = inet_csk(sk);
737 int m; 734 int m;
738 735
@@ -742,7 +739,7 @@ void tcp_update_metrics(struct sock *sk)
742 * Reset our results. 739 * Reset our results.
743 */ 740 */
744 if (!(dst_metric_locked(dst, RTAX_RTT))) 741 if (!(dst_metric_locked(dst, RTAX_RTT)))
745 dst->metrics[RTAX_RTT-1] = 0; 742 dst->metrics[RTAX_RTT - 1] = 0;
746 return; 743 return;
747 } 744 }
748 745
@@ -754,9 +751,9 @@ void tcp_update_metrics(struct sock *sk)
754 */ 751 */
755 if (!(dst_metric_locked(dst, RTAX_RTT))) { 752 if (!(dst_metric_locked(dst, RTAX_RTT))) {
756 if (m <= 0) 753 if (m <= 0)
757 dst->metrics[RTAX_RTT-1] = tp->srtt; 754 dst->metrics[RTAX_RTT - 1] = tp->srtt;
758 else 755 else
759 dst->metrics[RTAX_RTT-1] -= (m>>3); 756 dst->metrics[RTAX_RTT - 1] -= (m >> 3);
760 } 757 }
761 758
762 if (!(dst_metric_locked(dst, RTAX_RTTVAR))) { 759 if (!(dst_metric_locked(dst, RTAX_RTTVAR))) {
@@ -769,7 +766,7 @@ void tcp_update_metrics(struct sock *sk)
769 m = tp->mdev; 766 m = tp->mdev;
770 767
771 if (m >= dst_metric(dst, RTAX_RTTVAR)) 768 if (m >= dst_metric(dst, RTAX_RTTVAR))
772 dst->metrics[RTAX_RTTVAR-1] = m; 769 dst->metrics[RTAX_RTTVAR - 1] = m;
773 else 770 else
774 dst->metrics[RTAX_RTTVAR-1] -= 771 dst->metrics[RTAX_RTTVAR-1] -=
775 (dst->metrics[RTAX_RTTVAR-1] - m)>>2; 772 (dst->metrics[RTAX_RTTVAR-1] - m)>>2;
@@ -783,7 +780,7 @@ void tcp_update_metrics(struct sock *sk)
783 dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1; 780 dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1;
784 if (!dst_metric_locked(dst, RTAX_CWND) && 781 if (!dst_metric_locked(dst, RTAX_CWND) &&
785 tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) 782 tp->snd_cwnd > dst_metric(dst, RTAX_CWND))
786 dst->metrics[RTAX_CWND-1] = tp->snd_cwnd; 783 dst->metrics[RTAX_CWND - 1] = tp->snd_cwnd;
787 } else if (tp->snd_cwnd > tp->snd_ssthresh && 784 } else if (tp->snd_cwnd > tp->snd_ssthresh &&
788 icsk->icsk_ca_state == TCP_CA_Open) { 785 icsk->icsk_ca_state == TCP_CA_Open) {
789 /* Cong. avoidance phase, cwnd is reliable. */ 786 /* Cong. avoidance phase, cwnd is reliable. */
@@ -863,6 +860,9 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
863 */ 860 */
864static void tcp_disable_fack(struct tcp_sock *tp) 861static void tcp_disable_fack(struct tcp_sock *tp)
865{ 862{
863 /* RFC3517 uses different metric in lost marker => reset on change */
864 if (tcp_is_fack(tp))
865 tp->lost_skb_hint = NULL;
866 tp->rx_opt.sack_ok &= ~2; 866 tp->rx_opt.sack_ok &= ~2;
867} 867}
868 868
@@ -1112,16 +1112,22 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
1112 * 1112 *
1113 * Search retransmitted skbs from write_queue that were sent when snd_nxt was 1113 * Search retransmitted skbs from write_queue that were sent when snd_nxt was
1114 * less than what is now known to be received by the other end (derived from 1114 * less than what is now known to be received by the other end (derived from
1115 * SACK blocks by the caller). Also calculate the lowest snd_nxt among the 1115 * highest SACK block). Also calculate the lowest snd_nxt among the remaining
1116 * remaining retransmitted skbs to avoid some costly processing per ACKs. 1116 * retransmitted skbs to avoid some costly processing per ACKs.
1117 */ 1117 */
1118static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto) 1118static void tcp_mark_lost_retrans(struct sock *sk)
1119{ 1119{
1120 const struct inet_connection_sock *icsk = inet_csk(sk);
1120 struct tcp_sock *tp = tcp_sk(sk); 1121 struct tcp_sock *tp = tcp_sk(sk);
1121 struct sk_buff *skb; 1122 struct sk_buff *skb;
1122 int flag = 0;
1123 int cnt = 0; 1123 int cnt = 0;
1124 u32 new_low_seq = tp->snd_nxt; 1124 u32 new_low_seq = tp->snd_nxt;
1125 u32 received_upto = tcp_highest_sack_seq(tp);
1126
1127 if (!tcp_is_fack(tp) || !tp->retrans_out ||
1128 !after(received_upto, tp->lost_retrans_low) ||
1129 icsk->icsk_ca_state != TCP_CA_Recovery)
1130 return;
1125 1131
1126 tcp_for_write_queue(skb, sk) { 1132 tcp_for_write_queue(skb, sk) {
1127 u32 ack_seq = TCP_SKB_CB(skb)->ack_seq; 1133 u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
@@ -1149,9 +1155,8 @@ static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto)
1149 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { 1155 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
1150 tp->lost_out += tcp_skb_pcount(skb); 1156 tp->lost_out += tcp_skb_pcount(skb);
1151 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1157 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1152 flag |= FLAG_DATA_SACKED;
1153 NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
1154 } 1158 }
1159 NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
1155 } else { 1160 } else {
1156 if (before(ack_seq, new_low_seq)) 1161 if (before(ack_seq, new_low_seq))
1157 new_low_seq = ack_seq; 1162 new_low_seq = ack_seq;
@@ -1161,8 +1166,6 @@ static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto)
1161 1166
1162 if (tp->retrans_out) 1167 if (tp->retrans_out)
1163 tp->lost_retrans_low = new_low_seq; 1168 tp->lost_retrans_low = new_low_seq;
1164
1165 return flag;
1166} 1169}
1167 1170
1168static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb, 1171static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb,
@@ -1230,34 +1233,205 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1230 return in_sack; 1233 return in_sack;
1231} 1234}
1232 1235
1236static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
1237 int *reord, int dup_sack, int fack_count)
1238{
1239 struct tcp_sock *tp = tcp_sk(sk);
1240 u8 sacked = TCP_SKB_CB(skb)->sacked;
1241 int flag = 0;
1242
1243 /* Account D-SACK for retransmitted packet. */
1244 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1245 if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
1246 tp->undo_retrans--;
1247 if (sacked & TCPCB_SACKED_ACKED)
1248 *reord = min(fack_count, *reord);
1249 }
1250
1251 /* Nothing to do; acked frame is about to be dropped (was ACKed). */
1252 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
1253 return flag;
1254
1255 if (!(sacked & TCPCB_SACKED_ACKED)) {
1256 if (sacked & TCPCB_SACKED_RETRANS) {
1257 /* If the segment is not tagged as lost,
1258 * we do not clear RETRANS, believing
1259 * that retransmission is still in flight.
1260 */
1261 if (sacked & TCPCB_LOST) {
1262 TCP_SKB_CB(skb)->sacked &=
1263 ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1264 tp->lost_out -= tcp_skb_pcount(skb);
1265 tp->retrans_out -= tcp_skb_pcount(skb);
1266
1267 /* clear lost hint */
1268 tp->retransmit_skb_hint = NULL;
1269 }
1270 } else {
1271 if (!(sacked & TCPCB_RETRANS)) {
1272 /* New sack for not retransmitted frame,
1273 * which was in hole. It is reordering.
1274 */
1275 if (before(TCP_SKB_CB(skb)->seq,
1276 tcp_highest_sack_seq(tp)))
1277 *reord = min(fack_count, *reord);
1278
1279 /* SACK enhanced F-RTO (RFC4138; Appendix B) */
1280 if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
1281 flag |= FLAG_ONLY_ORIG_SACKED;
1282 }
1283
1284 if (sacked & TCPCB_LOST) {
1285 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1286 tp->lost_out -= tcp_skb_pcount(skb);
1287
1288 /* clear lost hint */
1289 tp->retransmit_skb_hint = NULL;
1290 }
1291 }
1292
1293 TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
1294 flag |= FLAG_DATA_SACKED;
1295 tp->sacked_out += tcp_skb_pcount(skb);
1296
1297 fack_count += tcp_skb_pcount(skb);
1298
1299 /* Lost marker hint past SACKed? Tweak RFC3517 cnt */
1300 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
1301 before(TCP_SKB_CB(skb)->seq,
1302 TCP_SKB_CB(tp->lost_skb_hint)->seq))
1303 tp->lost_cnt_hint += tcp_skb_pcount(skb);
1304
1305 if (fack_count > tp->fackets_out)
1306 tp->fackets_out = fack_count;
1307
1308 if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
1309 tcp_advance_highest_sack(sk, skb);
1310 }
1311
1312 /* D-SACK. We can detect redundant retransmission in S|R and plain R
1313 * frames and clear it. undo_retrans is decreased above, L|R frames
1314 * are accounted above as well.
1315 */
1316 if (dup_sack && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) {
1317 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1318 tp->retrans_out -= tcp_skb_pcount(skb);
1319 tp->retransmit_skb_hint = NULL;
1320 }
1321
1322 return flag;
1323}
1324
1325static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1326 struct tcp_sack_block *next_dup,
1327 u32 start_seq, u32 end_seq,
1328 int dup_sack_in, int *fack_count,
1329 int *reord, int *flag)
1330{
1331 tcp_for_write_queue_from(skb, sk) {
1332 int in_sack = 0;
1333 int dup_sack = dup_sack_in;
1334
1335 if (skb == tcp_send_head(sk))
1336 break;
1337
1338 /* queue is in-order => we can short-circuit the walk early */
1339 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1340 break;
1341
1342 if ((next_dup != NULL) &&
1343 before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
1344 in_sack = tcp_match_skb_to_sack(sk, skb,
1345 next_dup->start_seq,
1346 next_dup->end_seq);
1347 if (in_sack > 0)
1348 dup_sack = 1;
1349 }
1350
1351 if (in_sack <= 0)
1352 in_sack = tcp_match_skb_to_sack(sk, skb, start_seq,
1353 end_seq);
1354 if (unlikely(in_sack < 0))
1355 break;
1356
1357 if (in_sack)
1358 *flag |= tcp_sacktag_one(skb, sk, reord, dup_sack,
1359 *fack_count);
1360
1361 *fack_count += tcp_skb_pcount(skb);
1362 }
1363 return skb;
1364}
1365
1366/* Avoid all extra work that is being done by sacktag while walking in
1367 * a normal way
1368 */
1369static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
1370 u32 skip_to_seq)
1371{
1372 tcp_for_write_queue_from(skb, sk) {
1373 if (skb == tcp_send_head(sk))
1374 break;
1375
1376 if (!before(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
1377 break;
1378 }
1379 return skb;
1380}
1381
1382static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1383 struct sock *sk,
1384 struct tcp_sack_block *next_dup,
1385 u32 skip_to_seq,
1386 int *fack_count, int *reord,
1387 int *flag)
1388{
1389 if (next_dup == NULL)
1390 return skb;
1391
1392 if (before(next_dup->start_seq, skip_to_seq)) {
1393 skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq);
1394 tcp_sacktag_walk(skb, sk, NULL,
1395 next_dup->start_seq, next_dup->end_seq,
1396 1, fack_count, reord, flag);
1397 }
1398
1399 return skb;
1400}
1401
1402static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache)
1403{
1404 return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1405}
1406
1233static int 1407static int
1234tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una) 1408tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
1409 u32 prior_snd_una)
1235{ 1410{
1236 const struct inet_connection_sock *icsk = inet_csk(sk); 1411 const struct inet_connection_sock *icsk = inet_csk(sk);
1237 struct tcp_sock *tp = tcp_sk(sk); 1412 struct tcp_sock *tp = tcp_sk(sk);
1238 unsigned char *ptr = (skb_transport_header(ack_skb) + 1413 unsigned char *ptr = (skb_transport_header(ack_skb) +
1239 TCP_SKB_CB(ack_skb)->sacked); 1414 TCP_SKB_CB(ack_skb)->sacked);
1240 struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2); 1415 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1241 struct sk_buff *cached_skb; 1416 struct tcp_sack_block sp[4];
1242 int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; 1417 struct tcp_sack_block *cache;
1418 struct sk_buff *skb;
1419 int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE) >> 3;
1420 int used_sacks;
1243 int reord = tp->packets_out; 1421 int reord = tp->packets_out;
1244 int prior_fackets;
1245 u32 highest_sack_end_seq = tp->lost_retrans_low;
1246 int flag = 0; 1422 int flag = 0;
1247 int found_dup_sack = 0; 1423 int found_dup_sack = 0;
1248 int cached_fack_count; 1424 int fack_count;
1249 int i; 1425 int i, j;
1250 int first_sack_index; 1426 int first_sack_index;
1251 int force_one_sack;
1252 1427
1253 if (!tp->sacked_out) { 1428 if (!tp->sacked_out) {
1254 if (WARN_ON(tp->fackets_out)) 1429 if (WARN_ON(tp->fackets_out))
1255 tp->fackets_out = 0; 1430 tp->fackets_out = 0;
1256 tp->highest_sack = tp->snd_una; 1431 tcp_highest_sack_reset(sk);
1257 } 1432 }
1258 prior_fackets = tp->fackets_out;
1259 1433
1260 found_dup_sack = tcp_check_dsack(tp, ack_skb, sp, 1434 found_dup_sack = tcp_check_dsack(tp, ack_skb, sp_wire,
1261 num_sacks, prior_snd_una); 1435 num_sacks, prior_snd_una);
1262 if (found_dup_sack) 1436 if (found_dup_sack)
1263 flag |= FLAG_DSACKING_ACK; 1437 flag |= FLAG_DSACKING_ACK;
@@ -1272,78 +1446,17 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1272 if (!tp->packets_out) 1446 if (!tp->packets_out)
1273 goto out; 1447 goto out;
1274 1448
1275 /* SACK fastpath: 1449 used_sacks = 0;
1276 * if the only SACK change is the increase of the end_seq of
1277 * the first block then only apply that SACK block
1278 * and use retrans queue hinting otherwise slowpath */
1279 force_one_sack = 1;
1280 for (i = 0; i < num_sacks; i++) {
1281 __be32 start_seq = sp[i].start_seq;
1282 __be32 end_seq = sp[i].end_seq;
1283
1284 if (i == 0) {
1285 if (tp->recv_sack_cache[i].start_seq != start_seq)
1286 force_one_sack = 0;
1287 } else {
1288 if ((tp->recv_sack_cache[i].start_seq != start_seq) ||
1289 (tp->recv_sack_cache[i].end_seq != end_seq))
1290 force_one_sack = 0;
1291 }
1292 tp->recv_sack_cache[i].start_seq = start_seq;
1293 tp->recv_sack_cache[i].end_seq = end_seq;
1294 }
1295 /* Clear the rest of the cache sack blocks so they won't match mistakenly. */
1296 for (; i < ARRAY_SIZE(tp->recv_sack_cache); i++) {
1297 tp->recv_sack_cache[i].start_seq = 0;
1298 tp->recv_sack_cache[i].end_seq = 0;
1299 }
1300
1301 first_sack_index = 0; 1450 first_sack_index = 0;
1302 if (force_one_sack)
1303 num_sacks = 1;
1304 else {
1305 int j;
1306 tp->fastpath_skb_hint = NULL;
1307
1308 /* order SACK blocks to allow in order walk of the retrans queue */
1309 for (i = num_sacks-1; i > 0; i--) {
1310 for (j = 0; j < i; j++){
1311 if (after(ntohl(sp[j].start_seq),
1312 ntohl(sp[j+1].start_seq))){
1313 struct tcp_sack_block_wire tmp;
1314
1315 tmp = sp[j];
1316 sp[j] = sp[j+1];
1317 sp[j+1] = tmp;
1318
1319 /* Track where the first SACK block goes to */
1320 if (j == first_sack_index)
1321 first_sack_index = j+1;
1322 }
1323
1324 }
1325 }
1326 }
1327
1328 /* Use SACK fastpath hint if valid */
1329 cached_skb = tp->fastpath_skb_hint;
1330 cached_fack_count = tp->fastpath_cnt_hint;
1331 if (!cached_skb) {
1332 cached_skb = tcp_write_queue_head(sk);
1333 cached_fack_count = 0;
1334 }
1335
1336 for (i = 0; i < num_sacks; i++) { 1451 for (i = 0; i < num_sacks; i++) {
1337 struct sk_buff *skb; 1452 int dup_sack = !i && found_dup_sack;
1338 __u32 start_seq = ntohl(sp->start_seq);
1339 __u32 end_seq = ntohl(sp->end_seq);
1340 int fack_count;
1341 int dup_sack = (found_dup_sack && (i == first_sack_index));
1342 int next_dup = (found_dup_sack && (i+1 == first_sack_index));
1343 1453
1344 sp++; 1454 sp[used_sacks].start_seq = ntohl(get_unaligned(&sp_wire[i].start_seq));
1455 sp[used_sacks].end_seq = ntohl(get_unaligned(&sp_wire[i].end_seq));
1345 1456
1346 if (!tcp_is_sackblock_valid(tp, dup_sack, start_seq, end_seq)) { 1457 if (!tcp_is_sackblock_valid(tp, dup_sack,
1458 sp[used_sacks].start_seq,
1459 sp[used_sacks].end_seq)) {
1347 if (dup_sack) { 1460 if (dup_sack) {
1348 if (!tp->undo_marker) 1461 if (!tp->undo_marker)
1349 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO); 1462 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO);
@@ -1352,169 +1465,148 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1352 } else { 1465 } else {
1353 /* Don't count olds caused by ACK reordering */ 1466 /* Don't count olds caused by ACK reordering */
1354 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) && 1467 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1355 !after(end_seq, tp->snd_una)) 1468 !after(sp[used_sacks].end_seq, tp->snd_una))
1356 continue; 1469 continue;
1357 NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD); 1470 NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD);
1358 } 1471 }
1472 if (i == 0)
1473 first_sack_index = -1;
1359 continue; 1474 continue;
1360 } 1475 }
1361 1476
1362 skb = cached_skb; 1477 /* Ignore very old stuff early */
1363 fack_count = cached_fack_count; 1478 if (!after(sp[used_sacks].end_seq, prior_snd_una))
1364 1479 continue;
1365 /* Event "B" in the comment above. */
1366 if (after(end_seq, tp->high_seq))
1367 flag |= FLAG_DATA_LOST;
1368
1369 tcp_for_write_queue_from(skb, sk) {
1370 int in_sack = 0;
1371 u8 sacked;
1372
1373 if (skb == tcp_send_head(sk))
1374 break;
1375
1376 cached_skb = skb;
1377 cached_fack_count = fack_count;
1378 if (i == first_sack_index) {
1379 tp->fastpath_skb_hint = skb;
1380 tp->fastpath_cnt_hint = fack_count;
1381 }
1382 1480
1383 /* The retransmission queue is always in order, so 1481 used_sacks++;
1384 * we can short-circuit the walk early. 1482 }
1385 */
1386 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
1387 break;
1388 1483
1389 dup_sack = (found_dup_sack && (i == first_sack_index)); 1484 /* order SACK blocks to allow in order walk of the retrans queue */
1485 for (i = used_sacks - 1; i > 0; i--) {
1486 for (j = 0; j < i; j++) {
1487 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1488 struct tcp_sack_block tmp;
1390 1489
1391 /* Due to sorting DSACK may reside within this SACK block! */ 1490 tmp = sp[j];
1392 if (next_dup) { 1491 sp[j] = sp[j + 1];
1393 u32 dup_start = ntohl(sp->start_seq); 1492 sp[j + 1] = tmp;
1394 u32 dup_end = ntohl(sp->end_seq);
1395 1493
1396 if (before(TCP_SKB_CB(skb)->seq, dup_end)) { 1494 /* Track where the first SACK block goes to */
1397 in_sack = tcp_match_skb_to_sack(sk, skb, dup_start, dup_end); 1495 if (j == first_sack_index)
1398 if (in_sack > 0) 1496 first_sack_index = j + 1;
1399 dup_sack = 1;
1400 }
1401 } 1497 }
1498 }
1499 }
1402 1500
1403 /* DSACK info lost if out-of-mem, try SACK still */ 1501 skb = tcp_write_queue_head(sk);
1404 if (in_sack <= 0) 1502 fack_count = 0;
1405 in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, end_seq); 1503 i = 0;
1406 if (unlikely(in_sack < 0))
1407 break;
1408 1504
1409 sacked = TCP_SKB_CB(skb)->sacked; 1505 if (!tp->sacked_out) {
1506 /* It's already past, so skip checking against it */
1507 cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
1508 } else {
1509 cache = tp->recv_sack_cache;
1510 /* Skip empty blocks in at head of the cache */
1511 while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
1512 !cache->end_seq)
1513 cache++;
1514 }
1410 1515
1411 /* Account D-SACK for retransmitted packet. */ 1516 while (i < used_sacks) {
1412 if ((dup_sack && in_sack) && 1517 u32 start_seq = sp[i].start_seq;
1413 (sacked & TCPCB_RETRANS) && 1518 u32 end_seq = sp[i].end_seq;
1414 after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) 1519 int dup_sack = (found_dup_sack && (i == first_sack_index));
1415 tp->undo_retrans--; 1520 struct tcp_sack_block *next_dup = NULL;
1416 1521
1417 /* The frame is ACKed. */ 1522 if (found_dup_sack && ((i + 1) == first_sack_index))
1418 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) { 1523 next_dup = &sp[i + 1];
1419 if (sacked&TCPCB_RETRANS) {
1420 if ((dup_sack && in_sack) &&
1421 (sacked&TCPCB_SACKED_ACKED))
1422 reord = min(fack_count, reord);
1423 }
1424 1524
1425 /* Nothing to do; acked frame is about to be dropped. */ 1525 /* Event "B" in the comment above. */
1426 fack_count += tcp_skb_pcount(skb); 1526 if (after(end_seq, tp->high_seq))
1427 continue; 1527 flag |= FLAG_DATA_LOST;
1428 }
1429 1528
1430 if (!in_sack) { 1529 /* Skip too early cached blocks */
1431 fack_count += tcp_skb_pcount(skb); 1530 while (tcp_sack_cache_ok(tp, cache) &&
1432 continue; 1531 !before(start_seq, cache->end_seq))
1532 cache++;
1533
1534 /* Can skip some work by looking recv_sack_cache? */
1535 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1536 after(end_seq, cache->start_seq)) {
1537
1538 /* Head todo? */
1539 if (before(start_seq, cache->start_seq)) {
1540 skb = tcp_sacktag_skip(skb, sk, start_seq);
1541 skb = tcp_sacktag_walk(skb, sk, next_dup,
1542 start_seq,
1543 cache->start_seq,
1544 dup_sack, &fack_count,
1545 &reord, &flag);
1433 } 1546 }
1434 1547
1435 if (!(sacked&TCPCB_SACKED_ACKED)) { 1548 /* Rest of the block already fully processed? */
1436 if (sacked & TCPCB_SACKED_RETRANS) { 1549 if (!after(end_seq, cache->end_seq))
1437 /* If the segment is not tagged as lost, 1550 goto advance_sp;
1438 * we do not clear RETRANS, believing
1439 * that retransmission is still in flight.
1440 */
1441 if (sacked & TCPCB_LOST) {
1442 TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1443 tp->lost_out -= tcp_skb_pcount(skb);
1444 tp->retrans_out -= tcp_skb_pcount(skb);
1445
1446 /* clear lost hint */
1447 tp->retransmit_skb_hint = NULL;
1448 }
1449 } else {
1450 if (!(sacked & TCPCB_RETRANS)) {
1451 /* New sack for not retransmitted frame,
1452 * which was in hole. It is reordering.
1453 */
1454 if (fack_count < prior_fackets)
1455 reord = min(fack_count, reord);
1456
1457 /* SACK enhanced F-RTO (RFC4138; Appendix B) */
1458 if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
1459 flag |= FLAG_ONLY_ORIG_SACKED;
1460 }
1461
1462 if (sacked & TCPCB_LOST) {
1463 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1464 tp->lost_out -= tcp_skb_pcount(skb);
1465 1551
1466 /* clear lost hint */ 1552 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1467 tp->retransmit_skb_hint = NULL; 1553 cache->end_seq,
1468 } 1554 &fack_count, &reord,
1469 } 1555 &flag);
1470 1556
1471 TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED; 1557 /* ...tail remains todo... */
1472 flag |= FLAG_DATA_SACKED; 1558 if (tcp_highest_sack_seq(tp) == cache->end_seq) {
1473 tp->sacked_out += tcp_skb_pcount(skb); 1559 /* ...but better entrypoint exists! */
1474 1560 skb = tcp_highest_sack(sk);
1475 fack_count += tcp_skb_pcount(skb); 1561 if (skb == NULL)
1476 if (fack_count > tp->fackets_out) 1562 break;
1477 tp->fackets_out = fack_count; 1563 fack_count = tp->fackets_out;
1478 1564 cache++;
1479 if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack)) { 1565 goto walk;
1480 tp->highest_sack = TCP_SKB_CB(skb)->seq;
1481 highest_sack_end_seq = TCP_SKB_CB(skb)->end_seq;
1482 }
1483 } else {
1484 if (dup_sack && (sacked&TCPCB_RETRANS))
1485 reord = min(fack_count, reord);
1486
1487 fack_count += tcp_skb_pcount(skb);
1488 } 1566 }
1489 1567
1490 /* D-SACK. We can detect redundant retransmission 1568 skb = tcp_sacktag_skip(skb, sk, cache->end_seq);
1491 * in S|R and plain R frames and clear it. 1569 /* Check overlap against next cached too (past this one already) */
1492 * undo_retrans is decreased above, L|R frames 1570 cache++;
1493 * are accounted above as well. 1571 continue;
1494 */ 1572 }
1495 if (dup_sack && 1573
1496 (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) { 1574 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1497 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1575 skb = tcp_highest_sack(sk);
1498 tp->retrans_out -= tcp_skb_pcount(skb); 1576 if (skb == NULL)
1499 tp->retransmit_skb_hint = NULL; 1577 break;
1500 } 1578 fack_count = tp->fackets_out;
1501 } 1579 }
1580 skb = tcp_sacktag_skip(skb, sk, start_seq);
1581
1582walk:
1583 skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq, end_seq,
1584 dup_sack, &fack_count, &reord, &flag);
1502 1585
1586advance_sp:
1503 /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct 1587 /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct
1504 * due to in-order walk 1588 * due to in-order walk
1505 */ 1589 */
1506 if (after(end_seq, tp->frto_highmark)) 1590 if (after(end_seq, tp->frto_highmark))
1507 flag &= ~FLAG_ONLY_ORIG_SACKED; 1591 flag &= ~FLAG_ONLY_ORIG_SACKED;
1592
1593 i++;
1508 } 1594 }
1509 1595
1510 if (tp->retrans_out && 1596 /* Clear the head of the cache sack blocks so we can skip it next time */
1511 after(highest_sack_end_seq, tp->lost_retrans_low) && 1597 for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
1512 icsk->icsk_ca_state == TCP_CA_Recovery) 1598 tp->recv_sack_cache[i].start_seq = 0;
1513 flag |= tcp_mark_lost_retrans(sk, highest_sack_end_seq); 1599 tp->recv_sack_cache[i].end_seq = 0;
1600 }
1601 for (j = 0; j < used_sacks; j++)
1602 tp->recv_sack_cache[i++] = sp[j];
1603
1604 tcp_mark_lost_retrans(sk);
1514 1605
1515 tcp_verify_left_out(tp); 1606 tcp_verify_left_out(tp);
1516 1607
1517 if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss && 1608 if ((reord < tp->fackets_out) &&
1609 ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) &&
1518 (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) 1610 (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
1519 tcp_update_reordering(sk, tp->fackets_out - reord, 0); 1611 tcp_update_reordering(sk, tp->fackets_out - reord, 0);
1520 1612
@@ -1565,10 +1657,10 @@ static void tcp_remove_reno_sacks(struct sock *sk, int acked)
1565 1657
1566 if (acked > 0) { 1658 if (acked > 0) {
1567 /* One ACK acked hole. The rest eat duplicate ACKs. */ 1659 /* One ACK acked hole. The rest eat duplicate ACKs. */
1568 if (acked-1 >= tp->sacked_out) 1660 if (acked - 1 >= tp->sacked_out)
1569 tp->sacked_out = 0; 1661 tp->sacked_out = 0;
1570 else 1662 else
1571 tp->sacked_out -= acked-1; 1663 tp->sacked_out -= acked - 1;
1572 } 1664 }
1573 tcp_check_reno_reordering(sk, acked); 1665 tcp_check_reno_reordering(sk, acked);
1574 tcp_verify_left_out(tp); 1666 tcp_verify_left_out(tp);
@@ -1602,10 +1694,10 @@ int tcp_use_frto(struct sock *sk)
1602 tcp_for_write_queue_from(skb, sk) { 1694 tcp_for_write_queue_from(skb, sk) {
1603 if (skb == tcp_send_head(sk)) 1695 if (skb == tcp_send_head(sk))
1604 break; 1696 break;
1605 if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS) 1697 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1606 return 0; 1698 return 0;
1607 /* Short-circuit when first non-SACKed skb has been checked */ 1699 /* Short-circuit when first non-SACKed skb has been checked */
1608 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) 1700 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
1609 break; 1701 break;
1610 } 1702 }
1611 return 1; 1703 return 1;
@@ -1715,7 +1807,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
1715 * Count the retransmission made on RTO correctly (only when 1807 * Count the retransmission made on RTO correctly (only when
1716 * waiting for the first ACK and did not get it)... 1808 * waiting for the first ACK and did not get it)...
1717 */ 1809 */
1718 if ((tp->frto_counter == 1) && !(flag&FLAG_DATA_ACKED)) { 1810 if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) {
1719 /* For some reason this R-bit might get cleared? */ 1811 /* For some reason this R-bit might get cleared? */
1720 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) 1812 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1721 tp->retrans_out += tcp_skb_pcount(skb); 1813 tp->retrans_out += tcp_skb_pcount(skb);
@@ -1728,7 +1820,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
1728 } 1820 }
1729 1821
1730 /* Don't lost mark skbs that were fwd transmitted after RTO */ 1822 /* Don't lost mark skbs that were fwd transmitted after RTO */
1731 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) && 1823 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) &&
1732 !after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) { 1824 !after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) {
1733 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1825 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1734 tp->lost_out += tcp_skb_pcount(skb); 1826 tp->lost_out += tcp_skb_pcount(skb);
@@ -1743,7 +1835,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
1743 tp->bytes_acked = 0; 1835 tp->bytes_acked = 0;
1744 1836
1745 tp->reordering = min_t(unsigned int, tp->reordering, 1837 tp->reordering = min_t(unsigned int, tp->reordering,
1746 sysctl_tcp_reordering); 1838 sysctl_tcp_reordering);
1747 tcp_set_ca_state(sk, TCP_CA_Loss); 1839 tcp_set_ca_state(sk, TCP_CA_Loss);
1748 tp->high_seq = tp->frto_highmark; 1840 tp->high_seq = tp->frto_highmark;
1749 TCP_ECN_queue_cwr(tp); 1841 TCP_ECN_queue_cwr(tp);
@@ -1810,7 +1902,7 @@ void tcp_enter_loss(struct sock *sk, int how)
1810 if (skb == tcp_send_head(sk)) 1902 if (skb == tcp_send_head(sk))
1811 break; 1903 break;
1812 1904
1813 if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS) 1905 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1814 tp->undo_marker = 0; 1906 tp->undo_marker = 0;
1815 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; 1907 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
1816 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) { 1908 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
@@ -1822,7 +1914,7 @@ void tcp_enter_loss(struct sock *sk, int how)
1822 tcp_verify_left_out(tp); 1914 tcp_verify_left_out(tp);
1823 1915
1824 tp->reordering = min_t(unsigned int, tp->reordering, 1916 tp->reordering = min_t(unsigned int, tp->reordering,
1825 sysctl_tcp_reordering); 1917 sysctl_tcp_reordering);
1826 tcp_set_ca_state(sk, TCP_CA_Loss); 1918 tcp_set_ca_state(sk, TCP_CA_Loss);
1827 tp->high_seq = tp->snd_nxt; 1919 tp->high_seq = tp->snd_nxt;
1828 TCP_ECN_queue_cwr(tp); 1920 TCP_ECN_queue_cwr(tp);
@@ -1830,18 +1922,15 @@ void tcp_enter_loss(struct sock *sk, int how)
1830 tp->frto_counter = 0; 1922 tp->frto_counter = 0;
1831} 1923}
1832 1924
1833static int tcp_check_sack_reneging(struct sock *sk) 1925/* If ACK arrived pointing to a remembered SACK, it means that our
1926 * remembered SACKs do not reflect real state of receiver i.e.
1927 * receiver _host_ is heavily congested (or buggy).
1928 *
1929 * Do processing similar to RTO timeout.
1930 */
1931static int tcp_check_sack_reneging(struct sock *sk, int flag)
1834{ 1932{
1835 struct sk_buff *skb; 1933 if (flag & FLAG_SACK_RENEGING) {
1836
1837 /* If ACK arrived pointing to a remembered SACK,
1838 * it means that our remembered SACKs do not reflect
1839 * real state of receiver i.e.
1840 * receiver _host_ is heavily congested (or buggy).
1841 * Do processing similar to RTO timeout.
1842 */
1843 if ((skb = tcp_write_queue_head(sk)) != NULL &&
1844 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
1845 struct inet_connection_sock *icsk = inet_csk(sk); 1934 struct inet_connection_sock *icsk = inet_csk(sk);
1846 NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING); 1935 NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING);
1847 1936
@@ -1857,7 +1946,27 @@ static int tcp_check_sack_reneging(struct sock *sk)
1857 1946
1858static inline int tcp_fackets_out(struct tcp_sock *tp) 1947static inline int tcp_fackets_out(struct tcp_sock *tp)
1859{ 1948{
1860 return tcp_is_reno(tp) ? tp->sacked_out+1 : tp->fackets_out; 1949 return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
1950}
1951
1952/* Heurestics to calculate number of duplicate ACKs. There's no dupACKs
1953 * counter when SACK is enabled (without SACK, sacked_out is used for
1954 * that purpose).
1955 *
1956 * Instead, with FACK TCP uses fackets_out that includes both SACKed
1957 * segments up to the highest received SACK block so far and holes in
1958 * between them.
1959 *
1960 * With reordering, holes may still be in flight, so RFC3517 recovery
1961 * uses pure sacked_out (total number of SACKed segments) even though
1962 * it violates the RFC that uses duplicate ACKs, often these are equal
1963 * but when e.g. out-of-window ACKs or packet duplication occurs,
1964 * they differ. Since neither occurs due to loss, TCP should really
1965 * ignore them.
1966 */
1967static inline int tcp_dupack_heurestics(struct tcp_sock *tp)
1968{
1969 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
1861} 1970}
1862 1971
1863static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) 1972static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
@@ -1980,13 +2089,13 @@ static int tcp_time_to_recover(struct sock *sk)
1980 return 1; 2089 return 1;
1981 2090
1982 /* Not-A-Trick#2 : Classic rule... */ 2091 /* Not-A-Trick#2 : Classic rule... */
1983 if (tcp_fackets_out(tp) > tp->reordering) 2092 if (tcp_dupack_heurestics(tp) > tp->reordering)
1984 return 1; 2093 return 1;
1985 2094
1986 /* Trick#3 : when we use RFC2988 timer restart, fast 2095 /* Trick#3 : when we use RFC2988 timer restart, fast
1987 * retransmit can be triggered by timeout of queue head. 2096 * retransmit can be triggered by timeout of queue head.
1988 */ 2097 */
1989 if (tcp_head_timedout(sk)) 2098 if (tcp_is_fack(tp) && tcp_head_timedout(sk))
1990 return 1; 2099 return 1;
1991 2100
1992 /* Trick#4: It is still not OK... But will it be useful to delay 2101 /* Trick#4: It is still not OK... But will it be useful to delay
@@ -2010,17 +2119,18 @@ static int tcp_time_to_recover(struct sock *sk)
2010 * retransmitted past LOST markings in the first place? I'm not fully sure 2119 * retransmitted past LOST markings in the first place? I'm not fully sure
2011 * about undo and end of connection cases, which can cause R without L? 2120 * about undo and end of connection cases, which can cause R without L?
2012 */ 2121 */
2013static void tcp_verify_retransmit_hint(struct tcp_sock *tp, 2122static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
2014 struct sk_buff *skb)
2015{ 2123{
2016 if ((tp->retransmit_skb_hint != NULL) && 2124 if ((tp->retransmit_skb_hint != NULL) &&
2017 before(TCP_SKB_CB(skb)->seq, 2125 before(TCP_SKB_CB(skb)->seq,
2018 TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) 2126 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
2019 tp->retransmit_skb_hint = NULL; 2127 tp->retransmit_skb_hint = NULL;
2020} 2128}
2021 2129
2022/* Mark head of queue up as lost. */ 2130/* Mark head of queue up as lost. With RFC3517 SACK, the packets is
2023static void tcp_mark_head_lost(struct sock *sk, int packets) 2131 * is against sacked "cnt", otherwise it's against facked "cnt"
2132 */
2133static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit)
2024{ 2134{
2025 struct tcp_sock *tp = tcp_sk(sk); 2135 struct tcp_sock *tp = tcp_sk(sk);
2026 struct sk_buff *skb; 2136 struct sk_buff *skb;
@@ -2042,8 +2152,13 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
2042 /* this is not the most efficient way to do this... */ 2152 /* this is not the most efficient way to do this... */
2043 tp->lost_skb_hint = skb; 2153 tp->lost_skb_hint = skb;
2044 tp->lost_cnt_hint = cnt; 2154 tp->lost_cnt_hint = cnt;
2045 cnt += tcp_skb_pcount(skb); 2155
2046 if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) 2156 if (tcp_is_fack(tp) ||
2157 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2158 cnt += tcp_skb_pcount(skb);
2159
2160 if (((!fast_rexmit || (tp->lost_out > 0)) && (cnt > packets)) ||
2161 after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
2047 break; 2162 break;
2048 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { 2163 if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
2049 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 2164 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
@@ -2056,17 +2171,22 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
2056 2171
2057/* Account newly detected lost packet(s) */ 2172/* Account newly detected lost packet(s) */
2058 2173
2059static void tcp_update_scoreboard(struct sock *sk) 2174static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2060{ 2175{
2061 struct tcp_sock *tp = tcp_sk(sk); 2176 struct tcp_sock *tp = tcp_sk(sk);
2062 2177
2063 if (tcp_is_fack(tp)) { 2178 if (tcp_is_reno(tp)) {
2179 tcp_mark_head_lost(sk, 1, fast_rexmit);
2180 } else if (tcp_is_fack(tp)) {
2064 int lost = tp->fackets_out - tp->reordering; 2181 int lost = tp->fackets_out - tp->reordering;
2065 if (lost <= 0) 2182 if (lost <= 0)
2066 lost = 1; 2183 lost = 1;
2067 tcp_mark_head_lost(sk, lost); 2184 tcp_mark_head_lost(sk, lost, fast_rexmit);
2068 } else { 2185 } else {
2069 tcp_mark_head_lost(sk, 1); 2186 int sacked_upto = tp->sacked_out - tp->reordering;
2187 if (sacked_upto < 0)
2188 sacked_upto = 0;
2189 tcp_mark_head_lost(sk, sacked_upto, fast_rexmit);
2070 } 2190 }
2071 2191
2072 /* New heuristics: it is possible only after we switched 2192 /* New heuristics: it is possible only after we switched
@@ -2074,7 +2194,7 @@ static void tcp_update_scoreboard(struct sock *sk)
2074 * Hence, we can detect timed out packets during fast 2194 * Hence, we can detect timed out packets during fast
2075 * retransmit without falling to slow start. 2195 * retransmit without falling to slow start.
2076 */ 2196 */
2077 if (!tcp_is_reno(tp) && tcp_head_timedout(sk)) { 2197 if (tcp_is_fack(tp) && tcp_head_timedout(sk)) {
2078 struct sk_buff *skb; 2198 struct sk_buff *skb;
2079 2199
2080 skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint 2200 skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
@@ -2105,7 +2225,7 @@ static void tcp_update_scoreboard(struct sock *sk)
2105static inline void tcp_moderate_cwnd(struct tcp_sock *tp) 2225static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
2106{ 2226{
2107 tp->snd_cwnd = min(tp->snd_cwnd, 2227 tp->snd_cwnd = min(tp->snd_cwnd,
2108 tcp_packets_in_flight(tp)+tcp_max_burst(tp)); 2228 tcp_packets_in_flight(tp) + tcp_max_burst(tp));
2109 tp->snd_cwnd_stamp = tcp_time_stamp; 2229 tp->snd_cwnd_stamp = tcp_time_stamp;
2110} 2230}
2111 2231
@@ -2125,15 +2245,15 @@ static void tcp_cwnd_down(struct sock *sk, int flag)
2125 struct tcp_sock *tp = tcp_sk(sk); 2245 struct tcp_sock *tp = tcp_sk(sk);
2126 int decr = tp->snd_cwnd_cnt + 1; 2246 int decr = tp->snd_cwnd_cnt + 1;
2127 2247
2128 if ((flag&(FLAG_ANY_PROGRESS|FLAG_DSACKING_ACK)) || 2248 if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) ||
2129 (tcp_is_reno(tp) && !(flag&FLAG_NOT_DUP))) { 2249 (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) {
2130 tp->snd_cwnd_cnt = decr&1; 2250 tp->snd_cwnd_cnt = decr & 1;
2131 decr >>= 1; 2251 decr >>= 1;
2132 2252
2133 if (decr && tp->snd_cwnd > tcp_cwnd_min(sk)) 2253 if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
2134 tp->snd_cwnd -= decr; 2254 tp->snd_cwnd -= decr;
2135 2255
2136 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1); 2256 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
2137 tp->snd_cwnd_stamp = tcp_time_stamp; 2257 tp->snd_cwnd_stamp = tcp_time_stamp;
2138 } 2258 }
2139} 2259}
@@ -2177,7 +2297,7 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
2177 if (icsk->icsk_ca_ops->undo_cwnd) 2297 if (icsk->icsk_ca_ops->undo_cwnd)
2178 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk); 2298 tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
2179 else 2299 else
2180 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1); 2300 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
2181 2301
2182 if (undo && tp->prior_ssthresh > tp->snd_ssthresh) { 2302 if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {
2183 tp->snd_ssthresh = tp->prior_ssthresh; 2303 tp->snd_ssthresh = tp->prior_ssthresh;
@@ -2196,8 +2316,7 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
2196 2316
2197static inline int tcp_may_undo(struct tcp_sock *tp) 2317static inline int tcp_may_undo(struct tcp_sock *tp)
2198{ 2318{
2199 return tp->undo_marker && 2319 return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
2200 (!tp->undo_retrans || tcp_packet_delayed(tp));
2201} 2320}
2202 2321
2203/* People celebrate: "We love our President!" */ 2322/* People celebrate: "We love our President!" */
@@ -2247,7 +2366,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
2247{ 2366{
2248 struct tcp_sock *tp = tcp_sk(sk); 2367 struct tcp_sock *tp = tcp_sk(sk);
2249 /* Partial ACK arrived. Force Hoe's retransmit. */ 2368 /* Partial ACK arrived. Force Hoe's retransmit. */
2250 int failed = tcp_is_reno(tp) || tp->fackets_out>tp->reordering; 2369 int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering);
2251 2370
2252 if (tcp_may_undo(tp)) { 2371 if (tcp_may_undo(tp)) {
2253 /* Plain luck! Hole if filled with delayed 2372 /* Plain luck! Hole if filled with delayed
@@ -2316,7 +2435,7 @@ static void tcp_try_to_open(struct sock *sk, int flag)
2316 if (tp->retrans_out == 0) 2435 if (tp->retrans_out == 0)
2317 tp->retrans_stamp = 0; 2436 tp->retrans_stamp = 0;
2318 2437
2319 if (flag&FLAG_ECE) 2438 if (flag & FLAG_ECE)
2320 tcp_enter_cwr(sk, 1); 2439 tcp_enter_cwr(sk, 1);
2321 2440
2322 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { 2441 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
@@ -2362,7 +2481,6 @@ static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb)
2362 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); 2481 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
2363} 2482}
2364 2483
2365
2366/* Process an event, which can update packets-in-flight not trivially. 2484/* Process an event, which can update packets-in-flight not trivially.
2367 * Main goal of this function is to calculate new estimate for left_out, 2485 * Main goal of this function is to calculate new estimate for left_out,
2368 * taking into account both packets sitting in receiver's buffer and 2486 * taking into account both packets sitting in receiver's buffer and
@@ -2374,38 +2492,35 @@ static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb)
2374 * It does _not_ decide what to send, it is made in function 2492 * It does _not_ decide what to send, it is made in function
2375 * tcp_xmit_retransmit_queue(). 2493 * tcp_xmit_retransmit_queue().
2376 */ 2494 */
2377static void 2495static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2378tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2379{ 2496{
2380 struct inet_connection_sock *icsk = inet_csk(sk); 2497 struct inet_connection_sock *icsk = inet_csk(sk);
2381 struct tcp_sock *tp = tcp_sk(sk); 2498 struct tcp_sock *tp = tcp_sk(sk);
2382 int is_dupack = !(flag&(FLAG_SND_UNA_ADVANCED|FLAG_NOT_DUP)); 2499 int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
2383 int do_lost = is_dupack || ((flag&FLAG_DATA_SACKED) && 2500 int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
2384 (tp->fackets_out > tp->reordering)); 2501 (tcp_fackets_out(tp) > tp->reordering));
2502 int fast_rexmit = 0;
2385 2503
2386 /* Some technical things: 2504 if (WARN_ON(!tp->packets_out && tp->sacked_out))
2387 * 1. Reno does not count dupacks (sacked_out) automatically. */
2388 if (!tp->packets_out)
2389 tp->sacked_out = 0; 2505 tp->sacked_out = 0;
2390
2391 if (WARN_ON(!tp->sacked_out && tp->fackets_out)) 2506 if (WARN_ON(!tp->sacked_out && tp->fackets_out))
2392 tp->fackets_out = 0; 2507 tp->fackets_out = 0;
2393 2508
2394 /* Now state machine starts. 2509 /* Now state machine starts.
2395 * A. ECE, hence prohibit cwnd undoing, the reduction is required. */ 2510 * A. ECE, hence prohibit cwnd undoing, the reduction is required. */
2396 if (flag&FLAG_ECE) 2511 if (flag & FLAG_ECE)
2397 tp->prior_ssthresh = 0; 2512 tp->prior_ssthresh = 0;
2398 2513
2399 /* B. In all the states check for reneging SACKs. */ 2514 /* B. In all the states check for reneging SACKs. */
2400 if (tp->sacked_out && tcp_check_sack_reneging(sk)) 2515 if (tcp_check_sack_reneging(sk, flag))
2401 return; 2516 return;
2402 2517
2403 /* C. Process data loss notification, provided it is valid. */ 2518 /* C. Process data loss notification, provided it is valid. */
2404 if ((flag&FLAG_DATA_LOST) && 2519 if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) &&
2405 before(tp->snd_una, tp->high_seq) && 2520 before(tp->snd_una, tp->high_seq) &&
2406 icsk->icsk_ca_state != TCP_CA_Open && 2521 icsk->icsk_ca_state != TCP_CA_Open &&
2407 tp->fackets_out > tp->reordering) { 2522 tp->fackets_out > tp->reordering) {
2408 tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering); 2523 tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0);
2409 NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); 2524 NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
2410 } 2525 }
2411 2526
@@ -2465,7 +2580,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2465 do_lost = tcp_try_undo_partial(sk, pkts_acked); 2580 do_lost = tcp_try_undo_partial(sk, pkts_acked);
2466 break; 2581 break;
2467 case TCP_CA_Loss: 2582 case TCP_CA_Loss:
2468 if (flag&FLAG_DATA_ACKED) 2583 if (flag & FLAG_DATA_ACKED)
2469 icsk->icsk_retransmits = 0; 2584 icsk->icsk_retransmits = 0;
2470 if (!tcp_try_undo_loss(sk)) { 2585 if (!tcp_try_undo_loss(sk)) {
2471 tcp_moderate_cwnd(tp); 2586 tcp_moderate_cwnd(tp);
@@ -2515,7 +2630,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2515 tp->undo_retrans = tp->retrans_out; 2630 tp->undo_retrans = tp->retrans_out;
2516 2631
2517 if (icsk->icsk_ca_state < TCP_CA_CWR) { 2632 if (icsk->icsk_ca_state < TCP_CA_CWR) {
2518 if (!(flag&FLAG_ECE)) 2633 if (!(flag & FLAG_ECE))
2519 tp->prior_ssthresh = tcp_current_ssthresh(sk); 2634 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2520 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); 2635 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2521 TCP_ECN_queue_cwr(tp); 2636 TCP_ECN_queue_cwr(tp);
@@ -2524,10 +2639,11 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2524 tp->bytes_acked = 0; 2639 tp->bytes_acked = 0;
2525 tp->snd_cwnd_cnt = 0; 2640 tp->snd_cwnd_cnt = 0;
2526 tcp_set_ca_state(sk, TCP_CA_Recovery); 2641 tcp_set_ca_state(sk, TCP_CA_Recovery);
2642 fast_rexmit = 1;
2527 } 2643 }
2528 2644
2529 if (do_lost || tcp_head_timedout(sk)) 2645 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
2530 tcp_update_scoreboard(sk); 2646 tcp_update_scoreboard(sk, fast_rexmit);
2531 tcp_cwnd_down(sk, flag); 2647 tcp_cwnd_down(sk, flag);
2532 tcp_xmit_retransmit_queue(sk); 2648 tcp_xmit_retransmit_queue(sk);
2533} 2649}
@@ -2591,11 +2707,10 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
2591 tcp_ack_no_tstamp(sk, seq_rtt, flag); 2707 tcp_ack_no_tstamp(sk, seq_rtt, flag);
2592} 2708}
2593 2709
2594static void tcp_cong_avoid(struct sock *sk, u32 ack, 2710static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
2595 u32 in_flight, int good)
2596{ 2711{
2597 const struct inet_connection_sock *icsk = inet_csk(sk); 2712 const struct inet_connection_sock *icsk = inet_csk(sk);
2598 icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight, good); 2713 icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight);
2599 tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; 2714 tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
2600} 2715}
2601 2716
@@ -2609,7 +2724,8 @@ static void tcp_rearm_rto(struct sock *sk)
2609 if (!tp->packets_out) { 2724 if (!tp->packets_out) {
2610 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); 2725 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
2611 } else { 2726 } else {
2612 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); 2727 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2728 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
2613 } 2729 }
2614} 2730}
2615 2731
@@ -2638,8 +2754,7 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
2638 * is before the ack sequence we can discard it as it's confirmed to have 2754 * is before the ack sequence we can discard it as it's confirmed to have
2639 * arrived at the other end. 2755 * arrived at the other end.
2640 */ 2756 */
2641static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p, 2757static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
2642 int prior_fackets)
2643{ 2758{
2644 struct tcp_sock *tp = tcp_sk(sk); 2759 struct tcp_sock *tp = tcp_sk(sk);
2645 const struct inet_connection_sock *icsk = inet_csk(sk); 2760 const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2647,8 +2762,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
2647 u32 now = tcp_time_stamp; 2762 u32 now = tcp_time_stamp;
2648 int fully_acked = 1; 2763 int fully_acked = 1;
2649 int flag = 0; 2764 int flag = 0;
2650 int prior_packets = tp->packets_out; 2765 u32 pkts_acked = 0;
2651 u32 cnt = 0;
2652 u32 reord = tp->packets_out; 2766 u32 reord = tp->packets_out;
2653 s32 seq_rtt = -1; 2767 s32 seq_rtt = -1;
2654 s32 ca_seq_rtt = -1; 2768 s32 ca_seq_rtt = -1;
@@ -2657,7 +2771,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
2657 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { 2771 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
2658 struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 2772 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
2659 u32 end_seq; 2773 u32 end_seq;
2660 u32 packets_acked; 2774 u32 acked_pcount;
2661 u8 sacked = scb->sacked; 2775 u8 sacked = scb->sacked;
2662 2776
2663 /* Determine how many packets and what bytes were acked, tso and else */ 2777 /* Determine how many packets and what bytes were acked, tso and else */
@@ -2666,14 +2780,14 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
2666 !after(tp->snd_una, scb->seq)) 2780 !after(tp->snd_una, scb->seq))
2667 break; 2781 break;
2668 2782
2669 packets_acked = tcp_tso_acked(sk, skb); 2783 acked_pcount = tcp_tso_acked(sk, skb);
2670 if (!packets_acked) 2784 if (!acked_pcount)
2671 break; 2785 break;
2672 2786
2673 fully_acked = 0; 2787 fully_acked = 0;
2674 end_seq = tp->snd_una; 2788 end_seq = tp->snd_una;
2675 } else { 2789 } else {
2676 packets_acked = tcp_skb_pcount(skb); 2790 acked_pcount = tcp_skb_pcount(skb);
2677 end_seq = scb->end_seq; 2791 end_seq = scb->end_seq;
2678 } 2792 }
2679 2793
@@ -2683,44 +2797,34 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
2683 tcp_mtup_probe_success(sk, skb); 2797 tcp_mtup_probe_success(sk, skb);
2684 } 2798 }
2685 2799
2686 if (sacked) { 2800 if (sacked & TCPCB_RETRANS) {
2687 if (sacked & TCPCB_RETRANS) { 2801 if (sacked & TCPCB_SACKED_RETRANS)
2688 if (sacked & TCPCB_SACKED_RETRANS) 2802 tp->retrans_out -= acked_pcount;
2689 tp->retrans_out -= packets_acked; 2803 flag |= FLAG_RETRANS_DATA_ACKED;
2690 flag |= FLAG_RETRANS_DATA_ACKED; 2804 ca_seq_rtt = -1;
2691 ca_seq_rtt = -1; 2805 seq_rtt = -1;
2692 seq_rtt = -1; 2806 if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1))
2693 if ((flag & FLAG_DATA_ACKED) || 2807 flag |= FLAG_NONHEAD_RETRANS_ACKED;
2694 (packets_acked > 1))
2695 flag |= FLAG_NONHEAD_RETRANS_ACKED;
2696 } else {
2697 ca_seq_rtt = now - scb->when;
2698 last_ackt = skb->tstamp;
2699 if (seq_rtt < 0) {
2700 seq_rtt = ca_seq_rtt;
2701 }
2702 if (!(sacked & TCPCB_SACKED_ACKED))
2703 reord = min(cnt, reord);
2704 }
2705
2706 if (sacked & TCPCB_SACKED_ACKED)
2707 tp->sacked_out -= packets_acked;
2708 if (sacked & TCPCB_LOST)
2709 tp->lost_out -= packets_acked;
2710
2711 if ((sacked & TCPCB_URG) && tp->urg_mode &&
2712 !before(end_seq, tp->snd_up))
2713 tp->urg_mode = 0;
2714 } else { 2808 } else {
2715 ca_seq_rtt = now - scb->when; 2809 ca_seq_rtt = now - scb->when;
2716 last_ackt = skb->tstamp; 2810 last_ackt = skb->tstamp;
2717 if (seq_rtt < 0) { 2811 if (seq_rtt < 0) {
2718 seq_rtt = ca_seq_rtt; 2812 seq_rtt = ca_seq_rtt;
2719 } 2813 }
2720 reord = min(cnt, reord); 2814 if (!(sacked & TCPCB_SACKED_ACKED))
2815 reord = min(pkts_acked, reord);
2721 } 2816 }
2722 tp->packets_out -= packets_acked; 2817
2723 cnt += packets_acked; 2818 if (sacked & TCPCB_SACKED_ACKED)
2819 tp->sacked_out -= acked_pcount;
2820 if (sacked & TCPCB_LOST)
2821 tp->lost_out -= acked_pcount;
2822
2823 if (unlikely(tp->urg_mode && !before(end_seq, tp->snd_up)))
2824 tp->urg_mode = 0;
2825
2826 tp->packets_out -= acked_pcount;
2827 pkts_acked += acked_pcount;
2724 2828
2725 /* Initial outgoing SYN's get put onto the write_queue 2829 /* Initial outgoing SYN's get put onto the write_queue
2726 * just like anything else we transmit. It is not 2830 * just like anything else we transmit. It is not
@@ -2740,12 +2844,14 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
2740 break; 2844 break;
2741 2845
2742 tcp_unlink_write_queue(skb, sk); 2846 tcp_unlink_write_queue(skb, sk);
2743 sk_stream_free_skb(sk, skb); 2847 sk_wmem_free_skb(sk, skb);
2744 tcp_clear_all_retrans_hints(tp); 2848 tcp_clear_all_retrans_hints(tp);
2745 } 2849 }
2746 2850
2851 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2852 flag |= FLAG_SACK_RENEGING;
2853
2747 if (flag & FLAG_ACKED) { 2854 if (flag & FLAG_ACKED) {
2748 u32 pkts_acked = prior_packets - tp->packets_out;
2749 const struct tcp_congestion_ops *ca_ops 2855 const struct tcp_congestion_ops *ca_ops
2750 = inet_csk(sk)->icsk_ca_ops; 2856 = inet_csk(sk)->icsk_ca_ops;
2751 2857
@@ -2761,9 +2867,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
2761 } 2867 }
2762 2868
2763 tp->fackets_out -= min(pkts_acked, tp->fackets_out); 2869 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
2764 /* hint's skb might be NULL but we don't need to care */ 2870
2765 tp->fastpath_cnt_hint -= min_t(u32, pkts_acked,
2766 tp->fastpath_cnt_hint);
2767 if (ca_ops->pkts_acked) { 2871 if (ca_ops->pkts_acked) {
2768 s32 rtt_us = -1; 2872 s32 rtt_us = -1;
2769 2873
@@ -2806,7 +2910,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
2806 } 2910 }
2807 } 2911 }
2808#endif 2912#endif
2809 *seq_rtt_p = seq_rtt;
2810 return flag; 2913 return flag;
2811} 2914}
2812 2915
@@ -2817,8 +2920,7 @@ static void tcp_ack_probe(struct sock *sk)
2817 2920
2818 /* Was it a usable window open? */ 2921 /* Was it a usable window open? */
2819 2922
2820 if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, 2923 if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
2821 tp->snd_una + tp->snd_wnd)) {
2822 icsk->icsk_backoff = 0; 2924 icsk->icsk_backoff = 0;
2823 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0); 2925 inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
2824 /* Socket must be waked up by subsequent tcp_data_snd_check(). 2926 /* Socket must be waked up by subsequent tcp_data_snd_check().
@@ -2847,8 +2949,9 @@ static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
2847/* Check that window update is acceptable. 2949/* Check that window update is acceptable.
2848 * The function assumes that snd_una<=ack<=snd_next. 2950 * The function assumes that snd_una<=ack<=snd_next.
2849 */ 2951 */
2850static inline int tcp_may_update_window(const struct tcp_sock *tp, const u32 ack, 2952static inline int tcp_may_update_window(const struct tcp_sock *tp,
2851 const u32 ack_seq, const u32 nwin) 2953 const u32 ack, const u32 ack_seq,
2954 const u32 nwin)
2852{ 2955{
2853 return (after(ack, tp->snd_una) || 2956 return (after(ack, tp->snd_una) ||
2854 after(ack_seq, tp->snd_wl1) || 2957 after(ack_seq, tp->snd_wl1) ||
@@ -2917,7 +3020,7 @@ static void tcp_ratehalving_spur_to_response(struct sock *sk)
2917 3020
2918static void tcp_undo_spur_to_response(struct sock *sk, int flag) 3021static void tcp_undo_spur_to_response(struct sock *sk, int flag)
2919{ 3022{
2920 if (flag&FLAG_ECE) 3023 if (flag & FLAG_ECE)
2921 tcp_ratehalving_spur_to_response(sk); 3024 tcp_ratehalving_spur_to_response(sk);
2922 else 3025 else
2923 tcp_undo_cwr(sk, 1); 3026 tcp_undo_cwr(sk, 1);
@@ -2960,7 +3063,7 @@ static int tcp_process_frto(struct sock *sk, int flag)
2960 tcp_verify_left_out(tp); 3063 tcp_verify_left_out(tp);
2961 3064
2962 /* Duplicate the behavior from Loss state (fastretrans_alert) */ 3065 /* Duplicate the behavior from Loss state (fastretrans_alert) */
2963 if (flag&FLAG_DATA_ACKED) 3066 if (flag & FLAG_DATA_ACKED)
2964 inet_csk(sk)->icsk_retransmits = 0; 3067 inet_csk(sk)->icsk_retransmits = 0;
2965 3068
2966 if ((flag & FLAG_NONHEAD_RETRANS_ACKED) || 3069 if ((flag & FLAG_NONHEAD_RETRANS_ACKED) ||
@@ -2977,16 +3080,16 @@ static int tcp_process_frto(struct sock *sk, int flag)
2977 * ACK isn't duplicate nor advances window, e.g., opposite dir 3080 * ACK isn't duplicate nor advances window, e.g., opposite dir
2978 * data, winupdate 3081 * data, winupdate
2979 */ 3082 */
2980 if (!(flag&FLAG_ANY_PROGRESS) && (flag&FLAG_NOT_DUP)) 3083 if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
2981 return 1; 3084 return 1;
2982 3085
2983 if (!(flag&FLAG_DATA_ACKED)) { 3086 if (!(flag & FLAG_DATA_ACKED)) {
2984 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), 3087 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
2985 flag); 3088 flag);
2986 return 1; 3089 return 1;
2987 } 3090 }
2988 } else { 3091 } else {
2989 if (!(flag&FLAG_DATA_ACKED) && (tp->frto_counter == 1)) { 3092 if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
2990 /* Prevent sending of new data. */ 3093 /* Prevent sending of new data. */
2991 tp->snd_cwnd = min(tp->snd_cwnd, 3094 tp->snd_cwnd = min(tp->snd_cwnd,
2992 tcp_packets_in_flight(tp)); 3095 tcp_packets_in_flight(tp));
@@ -2994,10 +3097,12 @@ static int tcp_process_frto(struct sock *sk, int flag)
2994 } 3097 }
2995 3098
2996 if ((tp->frto_counter >= 2) && 3099 if ((tp->frto_counter >= 2) &&
2997 (!(flag&FLAG_FORWARD_PROGRESS) || 3100 (!(flag & FLAG_FORWARD_PROGRESS) ||
2998 ((flag&FLAG_DATA_SACKED) && !(flag&FLAG_ONLY_ORIG_SACKED)))) { 3101 ((flag & FLAG_DATA_SACKED) &&
3102 !(flag & FLAG_ONLY_ORIG_SACKED)))) {
2999 /* RFC4138 shortcoming (see comment above) */ 3103 /* RFC4138 shortcoming (see comment above) */
3000 if (!(flag&FLAG_FORWARD_PROGRESS) && (flag&FLAG_NOT_DUP)) 3104 if (!(flag & FLAG_FORWARD_PROGRESS) &&
3105 (flag & FLAG_NOT_DUP))
3001 return 1; 3106 return 1;
3002 3107
3003 tcp_enter_frto_loss(sk, 3, flag); 3108 tcp_enter_frto_loss(sk, 3, flag);
@@ -3043,7 +3148,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3043 u32 ack = TCP_SKB_CB(skb)->ack_seq; 3148 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3044 u32 prior_in_flight; 3149 u32 prior_in_flight;
3045 u32 prior_fackets; 3150 u32 prior_fackets;
3046 s32 seq_rtt;
3047 int prior_packets; 3151 int prior_packets;
3048 int frto_cwnd = 0; 3152 int frto_cwnd = 0;
3049 3153
@@ -3064,13 +3168,14 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3064 tp->bytes_acked += ack - prior_snd_una; 3168 tp->bytes_acked += ack - prior_snd_una;
3065 else if (icsk->icsk_ca_state == TCP_CA_Loss) 3169 else if (icsk->icsk_ca_state == TCP_CA_Loss)
3066 /* we assume just one segment left network */ 3170 /* we assume just one segment left network */
3067 tp->bytes_acked += min(ack - prior_snd_una, tp->mss_cache); 3171 tp->bytes_acked += min(ack - prior_snd_una,
3172 tp->mss_cache);
3068 } 3173 }
3069 3174
3070 prior_fackets = tp->fackets_out; 3175 prior_fackets = tp->fackets_out;
3071 prior_in_flight = tcp_packets_in_flight(tp); 3176 prior_in_flight = tcp_packets_in_flight(tp);
3072 3177
3073 if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { 3178 if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
3074 /* Window is constant, pure forward advance. 3179 /* Window is constant, pure forward advance.
3075 * No more checks are required. 3180 * No more checks are required.
3076 * Note, we use the fact that SND.UNA>=SND.WL2. 3181 * Note, we use the fact that SND.UNA>=SND.WL2.
@@ -3109,7 +3214,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3109 goto no_queue; 3214 goto no_queue;
3110 3215
3111 /* See if we can take anything off of the retransmit queue. */ 3216 /* See if we can take anything off of the retransmit queue. */
3112 flag |= tcp_clean_rtx_queue(sk, &seq_rtt, prior_fackets); 3217 flag |= tcp_clean_rtx_queue(sk, prior_fackets);
3113 3218
3114 if (tp->frto_counter) 3219 if (tp->frto_counter)
3115 frto_cwnd = tcp_process_frto(sk, flag); 3220 frto_cwnd = tcp_process_frto(sk, flag);
@@ -3121,14 +3226,15 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3121 /* Advance CWND, if state allows this. */ 3226 /* Advance CWND, if state allows this. */
3122 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && 3227 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
3123 tcp_may_raise_cwnd(sk, flag)) 3228 tcp_may_raise_cwnd(sk, flag))
3124 tcp_cong_avoid(sk, ack, prior_in_flight, 0); 3229 tcp_cong_avoid(sk, ack, prior_in_flight);
3125 tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, flag); 3230 tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,
3231 flag);
3126 } else { 3232 } else {
3127 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) 3233 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
3128 tcp_cong_avoid(sk, ack, prior_in_flight, 1); 3234 tcp_cong_avoid(sk, ack, prior_in_flight);
3129 } 3235 }
3130 3236
3131 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP)) 3237 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
3132 dst_confirm(sk->sk_dst_cache); 3238 dst_confirm(sk->sk_dst_cache);
3133 3239
3134 return 1; 3240 return 1;
@@ -3153,100 +3259,99 @@ uninteresting_ack:
3153 return 0; 3259 return 0;
3154} 3260}
3155 3261
3156
3157/* Look for tcp options. Normally only called on SYN and SYNACK packets. 3262/* Look for tcp options. Normally only called on SYN and SYNACK packets.
3158 * But, this can also be called on packets in the established flow when 3263 * But, this can also be called on packets in the established flow when
3159 * the fast version below fails. 3264 * the fast version below fails.
3160 */ 3265 */
3161void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab) 3266void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3267 int estab)
3162{ 3268{
3163 unsigned char *ptr; 3269 unsigned char *ptr;
3164 struct tcphdr *th = tcp_hdr(skb); 3270 struct tcphdr *th = tcp_hdr(skb);
3165 int length=(th->doff*4)-sizeof(struct tcphdr); 3271 int length = (th->doff * 4) - sizeof(struct tcphdr);
3166 3272
3167 ptr = (unsigned char *)(th + 1); 3273 ptr = (unsigned char *)(th + 1);
3168 opt_rx->saw_tstamp = 0; 3274 opt_rx->saw_tstamp = 0;
3169 3275
3170 while (length > 0) { 3276 while (length > 0) {
3171 int opcode=*ptr++; 3277 int opcode = *ptr++;
3172 int opsize; 3278 int opsize;
3173 3279
3174 switch (opcode) { 3280 switch (opcode) {
3175 case TCPOPT_EOL: 3281 case TCPOPT_EOL:
3282 return;
3283 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
3284 length--;
3285 continue;
3286 default:
3287 opsize = *ptr++;
3288 if (opsize < 2) /* "silly options" */
3176 return; 3289 return;
3177 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ 3290 if (opsize > length)
3178 length--; 3291 return; /* don't parse partial options */
3179 continue; 3292 switch (opcode) {
3180 default: 3293 case TCPOPT_MSS:
3181 opsize=*ptr++; 3294 if (opsize == TCPOLEN_MSS && th->syn && !estab) {
3182 if (opsize < 2) /* "silly options" */ 3295 u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
3183 return; 3296 if (in_mss) {
3184 if (opsize > length) 3297 if (opt_rx->user_mss &&
3185 return; /* don't parse partial options */ 3298 opt_rx->user_mss < in_mss)
3186 switch (opcode) { 3299 in_mss = opt_rx->user_mss;
3187 case TCPOPT_MSS: 3300 opt_rx->mss_clamp = in_mss;
3188 if (opsize==TCPOLEN_MSS && th->syn && !estab) {
3189 u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
3190 if (in_mss) {
3191 if (opt_rx->user_mss && opt_rx->user_mss < in_mss)
3192 in_mss = opt_rx->user_mss;
3193 opt_rx->mss_clamp = in_mss;
3194 }
3195 }
3196 break;
3197 case TCPOPT_WINDOW:
3198 if (opsize==TCPOLEN_WINDOW && th->syn && !estab)
3199 if (sysctl_tcp_window_scaling) {
3200 __u8 snd_wscale = *(__u8 *) ptr;
3201 opt_rx->wscale_ok = 1;
3202 if (snd_wscale > 14) {
3203 if (net_ratelimit())
3204 printk(KERN_INFO "tcp_parse_options: Illegal window "
3205 "scaling value %d >14 received.\n",
3206 snd_wscale);
3207 snd_wscale = 14;
3208 }
3209 opt_rx->snd_wscale = snd_wscale;
3210 }
3211 break;
3212 case TCPOPT_TIMESTAMP:
3213 if (opsize==TCPOLEN_TIMESTAMP) {
3214 if ((estab && opt_rx->tstamp_ok) ||
3215 (!estab && sysctl_tcp_timestamps)) {
3216 opt_rx->saw_tstamp = 1;
3217 opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
3218 opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
3219 }
3220 } 3301 }
3221 break; 3302 }
3222 case TCPOPT_SACK_PERM: 3303 break;
3223 if (opsize==TCPOLEN_SACK_PERM && th->syn && !estab) { 3304 case TCPOPT_WINDOW:
3224 if (sysctl_tcp_sack) { 3305 if (opsize == TCPOLEN_WINDOW && th->syn &&
3225 opt_rx->sack_ok = 1; 3306 !estab && sysctl_tcp_window_scaling) {
3226 tcp_sack_reset(opt_rx); 3307 __u8 snd_wscale = *(__u8 *)ptr;
3227 } 3308 opt_rx->wscale_ok = 1;
3309 if (snd_wscale > 14) {
3310 if (net_ratelimit())
3311 printk(KERN_INFO "tcp_parse_options: Illegal window "
3312 "scaling value %d >14 received.\n",
3313 snd_wscale);
3314 snd_wscale = 14;
3228 } 3315 }
3229 break; 3316 opt_rx->snd_wscale = snd_wscale;
3317 }
3318 break;
3319 case TCPOPT_TIMESTAMP:
3320 if ((opsize == TCPOLEN_TIMESTAMP) &&
3321 ((estab && opt_rx->tstamp_ok) ||
3322 (!estab && sysctl_tcp_timestamps))) {
3323 opt_rx->saw_tstamp = 1;
3324 opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
3325 opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
3326 }
3327 break;
3328 case TCPOPT_SACK_PERM:
3329 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3330 !estab && sysctl_tcp_sack) {
3331 opt_rx->sack_ok = 1;
3332 tcp_sack_reset(opt_rx);
3333 }
3334 break;
3230 3335
3231 case TCPOPT_SACK: 3336 case TCPOPT_SACK:
3232 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) && 3337 if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
3233 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) && 3338 !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
3234 opt_rx->sack_ok) { 3339 opt_rx->sack_ok) {
3235 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th; 3340 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
3236 } 3341 }
3237 break; 3342 break;
3238#ifdef CONFIG_TCP_MD5SIG 3343#ifdef CONFIG_TCP_MD5SIG
3239 case TCPOPT_MD5SIG: 3344 case TCPOPT_MD5SIG:
3240 /* 3345 /*
3241 * The MD5 Hash has already been 3346 * The MD5 Hash has already been
3242 * checked (see tcp_v{4,6}_do_rcv()). 3347 * checked (see tcp_v{4,6}_do_rcv()).
3243 */ 3348 */
3244 break; 3349 break;
3245#endif 3350#endif
3246 } 3351 }
3247 3352
3248 ptr+=opsize-2; 3353 ptr += opsize-2;
3249 length-=opsize; 3354 length -= opsize;
3250 } 3355 }
3251 } 3356 }
3252} 3357}
@@ -3257,7 +3362,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3257static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, 3362static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
3258 struct tcp_sock *tp) 3363 struct tcp_sock *tp)
3259{ 3364{
3260 if (th->doff == sizeof(struct tcphdr)>>2) { 3365 if (th->doff == sizeof(struct tcphdr) >> 2) {
3261 tp->rx_opt.saw_tstamp = 0; 3366 tp->rx_opt.saw_tstamp = 0;
3262 return 0; 3367 return 0;
3263 } else if (tp->rx_opt.tstamp_ok && 3368 } else if (tp->rx_opt.tstamp_ok &&
@@ -3342,7 +3447,8 @@ static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
3342 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ); 3447 (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
3343} 3448}
3344 3449
3345static inline int tcp_paws_discard(const struct sock *sk, const struct sk_buff *skb) 3450static inline int tcp_paws_discard(const struct sock *sk,
3451 const struct sk_buff *skb)
3346{ 3452{
3347 const struct tcp_sock *tp = tcp_sk(sk); 3453 const struct tcp_sock *tp = tcp_sk(sk);
3348 return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW && 3454 return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW &&
@@ -3374,16 +3480,16 @@ static void tcp_reset(struct sock *sk)
3374{ 3480{
3375 /* We want the right error as BSD sees it (and indeed as we do). */ 3481 /* We want the right error as BSD sees it (and indeed as we do). */
3376 switch (sk->sk_state) { 3482 switch (sk->sk_state) {
3377 case TCP_SYN_SENT: 3483 case TCP_SYN_SENT:
3378 sk->sk_err = ECONNREFUSED; 3484 sk->sk_err = ECONNREFUSED;
3379 break; 3485 break;
3380 case TCP_CLOSE_WAIT: 3486 case TCP_CLOSE_WAIT:
3381 sk->sk_err = EPIPE; 3487 sk->sk_err = EPIPE;
3382 break; 3488 break;
3383 case TCP_CLOSE: 3489 case TCP_CLOSE:
3384 return; 3490 return;
3385 default: 3491 default:
3386 sk->sk_err = ECONNRESET; 3492 sk->sk_err = ECONNRESET;
3387 } 3493 }
3388 3494
3389 if (!sock_flag(sk, SOCK_DEAD)) 3495 if (!sock_flag(sk, SOCK_DEAD))
@@ -3416,43 +3522,43 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
3416 sock_set_flag(sk, SOCK_DONE); 3522 sock_set_flag(sk, SOCK_DONE);
3417 3523
3418 switch (sk->sk_state) { 3524 switch (sk->sk_state) {
3419 case TCP_SYN_RECV: 3525 case TCP_SYN_RECV:
3420 case TCP_ESTABLISHED: 3526 case TCP_ESTABLISHED:
3421 /* Move to CLOSE_WAIT */ 3527 /* Move to CLOSE_WAIT */
3422 tcp_set_state(sk, TCP_CLOSE_WAIT); 3528 tcp_set_state(sk, TCP_CLOSE_WAIT);
3423 inet_csk(sk)->icsk_ack.pingpong = 1; 3529 inet_csk(sk)->icsk_ack.pingpong = 1;
3424 break; 3530 break;
3425 3531
3426 case TCP_CLOSE_WAIT: 3532 case TCP_CLOSE_WAIT:
3427 case TCP_CLOSING: 3533 case TCP_CLOSING:
3428 /* Received a retransmission of the FIN, do 3534 /* Received a retransmission of the FIN, do
3429 * nothing. 3535 * nothing.
3430 */ 3536 */
3431 break; 3537 break;
3432 case TCP_LAST_ACK: 3538 case TCP_LAST_ACK:
3433 /* RFC793: Remain in the LAST-ACK state. */ 3539 /* RFC793: Remain in the LAST-ACK state. */
3434 break; 3540 break;
3435 3541
3436 case TCP_FIN_WAIT1: 3542 case TCP_FIN_WAIT1:
3437 /* This case occurs when a simultaneous close 3543 /* This case occurs when a simultaneous close
3438 * happens, we must ack the received FIN and 3544 * happens, we must ack the received FIN and
3439 * enter the CLOSING state. 3545 * enter the CLOSING state.
3440 */ 3546 */
3441 tcp_send_ack(sk); 3547 tcp_send_ack(sk);
3442 tcp_set_state(sk, TCP_CLOSING); 3548 tcp_set_state(sk, TCP_CLOSING);
3443 break; 3549 break;
3444 case TCP_FIN_WAIT2: 3550 case TCP_FIN_WAIT2:
3445 /* Received a FIN -- send ACK and enter TIME_WAIT. */ 3551 /* Received a FIN -- send ACK and enter TIME_WAIT. */
3446 tcp_send_ack(sk); 3552 tcp_send_ack(sk);
3447 tcp_time_wait(sk, TCP_TIME_WAIT, 0); 3553 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
3448 break; 3554 break;
3449 default: 3555 default:
3450 /* Only TCP_LISTEN and TCP_CLOSE are left, in these 3556 /* Only TCP_LISTEN and TCP_CLOSE are left, in these
3451 * cases we should never reach this piece of code. 3557 * cases we should never reach this piece of code.
3452 */ 3558 */
3453 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n", 3559 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
3454 __FUNCTION__, sk->sk_state); 3560 __FUNCTION__, sk->sk_state);
3455 break; 3561 break;
3456 } 3562 }
3457 3563
3458 /* It _is_ possible, that we have something out-of-order _after_ FIN. 3564 /* It _is_ possible, that we have something out-of-order _after_ FIN.
@@ -3461,7 +3567,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
3461 __skb_queue_purge(&tp->out_of_order_queue); 3567 __skb_queue_purge(&tp->out_of_order_queue);
3462 if (tcp_is_sack(tp)) 3568 if (tcp_is_sack(tp))
3463 tcp_sack_reset(&tp->rx_opt); 3569 tcp_sack_reset(&tp->rx_opt);
3464 sk_stream_mem_reclaim(sk); 3570 sk_mem_reclaim(sk);
3465 3571
3466 if (!sock_flag(sk, SOCK_DEAD)) { 3572 if (!sock_flag(sk, SOCK_DEAD)) {
3467 sk->sk_state_change(sk); 3573 sk->sk_state_change(sk);
@@ -3469,13 +3575,14 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
3469 /* Do not send POLL_HUP for half duplex close. */ 3575 /* Do not send POLL_HUP for half duplex close. */
3470 if (sk->sk_shutdown == SHUTDOWN_MASK || 3576 if (sk->sk_shutdown == SHUTDOWN_MASK ||
3471 sk->sk_state == TCP_CLOSE) 3577 sk->sk_state == TCP_CLOSE)
3472 sk_wake_async(sk, 1, POLL_HUP); 3578 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
3473 else 3579 else
3474 sk_wake_async(sk, 1, POLL_IN); 3580 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
3475 } 3581 }
3476} 3582}
3477 3583
3478static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq) 3584static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
3585 u32 end_seq)
3479{ 3586{
3480 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) { 3587 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
3481 if (before(seq, sp->start_seq)) 3588 if (before(seq, sp->start_seq))
@@ -3498,7 +3605,8 @@ static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
3498 tp->rx_opt.dsack = 1; 3605 tp->rx_opt.dsack = 1;
3499 tp->duplicate_sack[0].start_seq = seq; 3606 tp->duplicate_sack[0].start_seq = seq;
3500 tp->duplicate_sack[0].end_seq = end_seq; 3607 tp->duplicate_sack[0].end_seq = end_seq;
3501 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + 1, 4 - tp->rx_opt.tstamp_ok); 3608 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + 1,
3609 4 - tp->rx_opt.tstamp_ok);
3502 } 3610 }
3503} 3611}
3504 3612
@@ -3538,12 +3646,12 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
3538{ 3646{
3539 int this_sack; 3647 int this_sack;
3540 struct tcp_sack_block *sp = &tp->selective_acks[0]; 3648 struct tcp_sack_block *sp = &tp->selective_acks[0];
3541 struct tcp_sack_block *swalk = sp+1; 3649 struct tcp_sack_block *swalk = sp + 1;
3542 3650
3543 /* See if the recent change to the first SACK eats into 3651 /* See if the recent change to the first SACK eats into
3544 * or hits the sequence space of other SACK blocks, if so coalesce. 3652 * or hits the sequence space of other SACK blocks, if so coalesce.
3545 */ 3653 */
3546 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks; ) { 3654 for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
3547 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) { 3655 if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
3548 int i; 3656 int i;
3549 3657
@@ -3551,16 +3659,19 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
3551 * Decrease num_sacks. 3659 * Decrease num_sacks.
3552 */ 3660 */
3553 tp->rx_opt.num_sacks--; 3661 tp->rx_opt.num_sacks--;
3554 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok); 3662 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks +
3555 for (i=this_sack; i < tp->rx_opt.num_sacks; i++) 3663 tp->rx_opt.dsack,
3556 sp[i] = sp[i+1]; 3664 4 - tp->rx_opt.tstamp_ok);
3665 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
3666 sp[i] = sp[i + 1];
3557 continue; 3667 continue;
3558 } 3668 }
3559 this_sack++, swalk++; 3669 this_sack++, swalk++;
3560 } 3670 }
3561} 3671}
3562 3672
3563static inline void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2) 3673static inline void tcp_sack_swap(struct tcp_sack_block *sack1,
3674 struct tcp_sack_block *sack2)
3564{ 3675{
3565 __u32 tmp; 3676 __u32 tmp;
3566 3677
@@ -3583,11 +3694,11 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
3583 if (!cur_sacks) 3694 if (!cur_sacks)
3584 goto new_sack; 3695 goto new_sack;
3585 3696
3586 for (this_sack=0; this_sack<cur_sacks; this_sack++, sp++) { 3697 for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
3587 if (tcp_sack_extend(sp, seq, end_seq)) { 3698 if (tcp_sack_extend(sp, seq, end_seq)) {
3588 /* Rotate this_sack to the first one. */ 3699 /* Rotate this_sack to the first one. */
3589 for (; this_sack>0; this_sack--, sp--) 3700 for (; this_sack > 0; this_sack--, sp--)
3590 tcp_sack_swap(sp, sp-1); 3701 tcp_sack_swap(sp, sp - 1);
3591 if (cur_sacks > 1) 3702 if (cur_sacks > 1)
3592 tcp_sack_maybe_coalesce(tp); 3703 tcp_sack_maybe_coalesce(tp);
3593 return; 3704 return;
@@ -3606,14 +3717,15 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
3606 sp--; 3717 sp--;
3607 } 3718 }
3608 for (; this_sack > 0; this_sack--, sp--) 3719 for (; this_sack > 0; this_sack--, sp--)
3609 *sp = *(sp-1); 3720 *sp = *(sp - 1);
3610 3721
3611new_sack: 3722new_sack:
3612 /* Build the new head SACK, and we're done. */ 3723 /* Build the new head SACK, and we're done. */
3613 sp->start_seq = seq; 3724 sp->start_seq = seq;
3614 sp->end_seq = end_seq; 3725 sp->end_seq = end_seq;
3615 tp->rx_opt.num_sacks++; 3726 tp->rx_opt.num_sacks++;
3616 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok); 3727 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack,
3728 4 - tp->rx_opt.tstamp_ok);
3617} 3729}
3618 3730
3619/* RCV.NXT advances, some SACKs should be eaten. */ 3731/* RCV.NXT advances, some SACKs should be eaten. */
@@ -3631,7 +3743,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
3631 return; 3743 return;
3632 } 3744 }
3633 3745
3634 for (this_sack = 0; this_sack < num_sacks; ) { 3746 for (this_sack = 0; this_sack < num_sacks;) {
3635 /* Check if the start of the sack is covered by RCV.NXT. */ 3747 /* Check if the start of the sack is covered by RCV.NXT. */
3636 if (!before(tp->rcv_nxt, sp->start_seq)) { 3748 if (!before(tp->rcv_nxt, sp->start_seq)) {
3637 int i; 3749 int i;
@@ -3650,7 +3762,9 @@ static void tcp_sack_remove(struct tcp_sock *tp)
3650 } 3762 }
3651 if (num_sacks != tp->rx_opt.num_sacks) { 3763 if (num_sacks != tp->rx_opt.num_sacks) {
3652 tp->rx_opt.num_sacks = num_sacks; 3764 tp->rx_opt.num_sacks = num_sacks;
3653 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok); 3765 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks +
3766 tp->rx_opt.dsack,
3767 4 - tp->rx_opt.tstamp_ok);
3654 } 3768 }
3655} 3769}
3656 3770
@@ -3703,14 +3817,14 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
3703 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) 3817 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
3704 goto drop; 3818 goto drop;
3705 3819
3706 __skb_pull(skb, th->doff*4); 3820 __skb_pull(skb, th->doff * 4);
3707 3821
3708 TCP_ECN_accept_cwr(tp, skb); 3822 TCP_ECN_accept_cwr(tp, skb);
3709 3823
3710 if (tp->rx_opt.dsack) { 3824 if (tp->rx_opt.dsack) {
3711 tp->rx_opt.dsack = 0; 3825 tp->rx_opt.dsack = 0;
3712 tp->rx_opt.eff_sacks = min_t(unsigned int, tp->rx_opt.num_sacks, 3826 tp->rx_opt.eff_sacks = min_t(unsigned int, tp->rx_opt.num_sacks,
3713 4 - tp->rx_opt.tstamp_ok); 3827 4 - tp->rx_opt.tstamp_ok);
3714 } 3828 }
3715 3829
3716 /* Queue data for delivery to the user. 3830 /* Queue data for delivery to the user.
@@ -3726,7 +3840,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
3726 tp->copied_seq == tp->rcv_nxt && tp->ucopy.len && 3840 tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
3727 sock_owned_by_user(sk) && !tp->urg_data) { 3841 sock_owned_by_user(sk) && !tp->urg_data) {
3728 int chunk = min_t(unsigned int, skb->len, 3842 int chunk = min_t(unsigned int, skb->len,
3729 tp->ucopy.len); 3843 tp->ucopy.len);
3730 3844
3731 __set_current_state(TASK_RUNNING); 3845 __set_current_state(TASK_RUNNING);
3732 3846
@@ -3744,12 +3858,12 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
3744queue_and_out: 3858queue_and_out:
3745 if (eaten < 0 && 3859 if (eaten < 0 &&
3746 (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 3860 (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
3747 !sk_stream_rmem_schedule(sk, skb))) { 3861 !sk_rmem_schedule(sk, skb->truesize))) {
3748 if (tcp_prune_queue(sk) < 0 || 3862 if (tcp_prune_queue(sk) < 0 ||
3749 !sk_stream_rmem_schedule(sk, skb)) 3863 !sk_rmem_schedule(sk, skb->truesize))
3750 goto drop; 3864 goto drop;
3751 } 3865 }
3752 sk_stream_set_owner_r(skb, sk); 3866 skb_set_owner_r(skb, sk);
3753 __skb_queue_tail(&sk->sk_receive_queue, skb); 3867 __skb_queue_tail(&sk->sk_receive_queue, skb);
3754 } 3868 }
3755 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 3869 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
@@ -3818,9 +3932,9 @@ drop:
3818 TCP_ECN_check_ce(tp, skb); 3932 TCP_ECN_check_ce(tp, skb);
3819 3933
3820 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 3934 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
3821 !sk_stream_rmem_schedule(sk, skb)) { 3935 !sk_rmem_schedule(sk, skb->truesize)) {
3822 if (tcp_prune_queue(sk) < 0 || 3936 if (tcp_prune_queue(sk) < 0 ||
3823 !sk_stream_rmem_schedule(sk, skb)) 3937 !sk_rmem_schedule(sk, skb->truesize))
3824 goto drop; 3938 goto drop;
3825 } 3939 }
3826 3940
@@ -3831,7 +3945,7 @@ drop:
3831 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", 3945 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
3832 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); 3946 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
3833 3947
3834 sk_stream_set_owner_r(skb, sk); 3948 skb_set_owner_r(skb, sk);
3835 3949
3836 if (!skb_peek(&tp->out_of_order_queue)) { 3950 if (!skb_peek(&tp->out_of_order_queue)) {
3837 /* Initial out of order segment, build 1 SACK. */ 3951 /* Initial out of order segment, build 1 SACK. */
@@ -3843,7 +3957,7 @@ drop:
3843 tp->selective_acks[0].end_seq = 3957 tp->selective_acks[0].end_seq =
3844 TCP_SKB_CB(skb)->end_seq; 3958 TCP_SKB_CB(skb)->end_seq;
3845 } 3959 }
3846 __skb_queue_head(&tp->out_of_order_queue,skb); 3960 __skb_queue_head(&tp->out_of_order_queue, skb);
3847 } else { 3961 } else {
3848 struct sk_buff *skb1 = tp->out_of_order_queue.prev; 3962 struct sk_buff *skb1 = tp->out_of_order_queue.prev;
3849 u32 seq = TCP_SKB_CB(skb)->seq; 3963 u32 seq = TCP_SKB_CB(skb)->seq;
@@ -3866,10 +3980,10 @@ drop:
3866 if (!after(TCP_SKB_CB(skb1)->seq, seq)) 3980 if (!after(TCP_SKB_CB(skb1)->seq, seq))
3867 break; 3981 break;
3868 } while ((skb1 = skb1->prev) != 3982 } while ((skb1 = skb1->prev) !=
3869 (struct sk_buff*)&tp->out_of_order_queue); 3983 (struct sk_buff *)&tp->out_of_order_queue);
3870 3984
3871 /* Do skb overlap to previous one? */ 3985 /* Do skb overlap to previous one? */
3872 if (skb1 != (struct sk_buff*)&tp->out_of_order_queue && 3986 if (skb1 != (struct sk_buff *)&tp->out_of_order_queue &&
3873 before(seq, TCP_SKB_CB(skb1)->end_seq)) { 3987 before(seq, TCP_SKB_CB(skb1)->end_seq)) {
3874 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { 3988 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
3875 /* All the bits are present. Drop. */ 3989 /* All the bits are present. Drop. */
@@ -3879,7 +3993,8 @@ drop:
3879 } 3993 }
3880 if (after(seq, TCP_SKB_CB(skb1)->seq)) { 3994 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
3881 /* Partial overlap. */ 3995 /* Partial overlap. */
3882 tcp_dsack_set(tp, seq, TCP_SKB_CB(skb1)->end_seq); 3996 tcp_dsack_set(tp, seq,
3997 TCP_SKB_CB(skb1)->end_seq);
3883 } else { 3998 } else {
3884 skb1 = skb1->prev; 3999 skb1 = skb1->prev;
3885 } 4000 }
@@ -3888,15 +4003,17 @@ drop:
3888 4003
3889 /* And clean segments covered by new one as whole. */ 4004 /* And clean segments covered by new one as whole. */
3890 while ((skb1 = skb->next) != 4005 while ((skb1 = skb->next) !=
3891 (struct sk_buff*)&tp->out_of_order_queue && 4006 (struct sk_buff *)&tp->out_of_order_queue &&
3892 after(end_seq, TCP_SKB_CB(skb1)->seq)) { 4007 after(end_seq, TCP_SKB_CB(skb1)->seq)) {
3893 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { 4008 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
3894 tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, end_seq); 4009 tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq,
3895 break; 4010 end_seq);
3896 } 4011 break;
3897 __skb_unlink(skb1, &tp->out_of_order_queue); 4012 }
3898 tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); 4013 __skb_unlink(skb1, &tp->out_of_order_queue);
3899 __kfree_skb(skb1); 4014 tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq,
4015 TCP_SKB_CB(skb1)->end_seq);
4016 __kfree_skb(skb1);
3900 } 4017 }
3901 4018
3902add_sack: 4019add_sack:
@@ -3919,7 +4036,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
3919 4036
3920 /* First, check that queue is collapsible and find 4037 /* First, check that queue is collapsible and find
3921 * the point where collapsing can be useful. */ 4038 * the point where collapsing can be useful. */
3922 for (skb = head; skb != tail; ) { 4039 for (skb = head; skb != tail;) {
3923 /* No new bits? It is possible on ofo queue. */ 4040 /* No new bits? It is possible on ofo queue. */
3924 if (!before(start, TCP_SKB_CB(skb)->end_seq)) { 4041 if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
3925 struct sk_buff *next = skb->next; 4042 struct sk_buff *next = skb->next;
@@ -3957,9 +4074,9 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
3957 /* Too big header? This can happen with IPv6. */ 4074 /* Too big header? This can happen with IPv6. */
3958 if (copy < 0) 4075 if (copy < 0)
3959 return; 4076 return;
3960 if (end-start < copy) 4077 if (end - start < copy)
3961 copy = end-start; 4078 copy = end - start;
3962 nskb = alloc_skb(copy+header, GFP_ATOMIC); 4079 nskb = alloc_skb(copy + header, GFP_ATOMIC);
3963 if (!nskb) 4080 if (!nskb)
3964 return; 4081 return;
3965 4082
@@ -3973,7 +4090,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
3973 memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); 4090 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
3974 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; 4091 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
3975 __skb_insert(nskb, skb->prev, skb, list); 4092 __skb_insert(nskb, skb->prev, skb, list);
3976 sk_stream_set_owner_r(nskb, sk); 4093 skb_set_owner_r(nskb, sk);
3977 4094
3978 /* Copy data, releasing collapsed skbs. */ 4095 /* Copy data, releasing collapsed skbs. */
3979 while (copy > 0) { 4096 while (copy > 0) {
@@ -4069,9 +4186,9 @@ static int tcp_prune_queue(struct sock *sk)
4069 tcp_collapse_ofo_queue(sk); 4186 tcp_collapse_ofo_queue(sk);
4070 tcp_collapse(sk, &sk->sk_receive_queue, 4187 tcp_collapse(sk, &sk->sk_receive_queue,
4071 sk->sk_receive_queue.next, 4188 sk->sk_receive_queue.next,
4072 (struct sk_buff*)&sk->sk_receive_queue, 4189 (struct sk_buff *)&sk->sk_receive_queue,
4073 tp->copied_seq, tp->rcv_nxt); 4190 tp->copied_seq, tp->rcv_nxt);
4074 sk_stream_mem_reclaim(sk); 4191 sk_mem_reclaim(sk);
4075 4192
4076 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) 4193 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4077 return 0; 4194 return 0;
@@ -4091,7 +4208,7 @@ static int tcp_prune_queue(struct sock *sk)
4091 */ 4208 */
4092 if (tcp_is_sack(tp)) 4209 if (tcp_is_sack(tp))
4093 tcp_sack_reset(&tp->rx_opt); 4210 tcp_sack_reset(&tp->rx_opt);
4094 sk_stream_mem_reclaim(sk); 4211 sk_mem_reclaim(sk);
4095 } 4212 }
4096 4213
4097 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) 4214 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
@@ -4108,7 +4225,6 @@ static int tcp_prune_queue(struct sock *sk)
4108 return -1; 4225 return -1;
4109} 4226}
4110 4227
4111
4112/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto. 4228/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
4113 * As additional protections, we do not touch cwnd in retransmission phases, 4229 * As additional protections, we do not touch cwnd in retransmission phases,
4114 * and if application hit its sndbuf limit recently. 4230 * and if application hit its sndbuf limit recently.
@@ -4170,8 +4286,8 @@ static void tcp_new_space(struct sock *sk)
4170 int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + 4286 int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
4171 MAX_TCP_HEADER + 16 + sizeof(struct sk_buff), 4287 MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
4172 demanded = max_t(unsigned int, tp->snd_cwnd, 4288 demanded = max_t(unsigned int, tp->snd_cwnd,
4173 tp->reordering + 1); 4289 tp->reordering + 1);
4174 sndmem *= 2*demanded; 4290 sndmem *= 2 * demanded;
4175 if (sndmem > sk->sk_sndbuf) 4291 if (sndmem > sk->sk_sndbuf)
4176 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); 4292 sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
4177 tp->snd_cwnd_stamp = tcp_time_stamp; 4293 tp->snd_cwnd_stamp = tcp_time_stamp;
@@ -4212,8 +4328,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
4212 /* We ACK each frame or... */ 4328 /* We ACK each frame or... */
4213 tcp_in_quickack_mode(sk) || 4329 tcp_in_quickack_mode(sk) ||
4214 /* We have out of order data. */ 4330 /* We have out of order data. */
4215 (ofo_possible && 4331 (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
4216 skb_peek(&tp->out_of_order_queue))) {
4217 /* Then ack it now */ 4332 /* Then ack it now */
4218 tcp_send_ack(sk); 4333 tcp_send_ack(sk);
4219 } else { 4334 } else {
@@ -4241,7 +4356,7 @@ static inline void tcp_ack_snd_check(struct sock *sk)
4241 * either form (or just set the sysctl tcp_stdurg). 4356 * either form (or just set the sysctl tcp_stdurg).
4242 */ 4357 */
4243 4358
4244static void tcp_check_urg(struct sock * sk, struct tcphdr * th) 4359static void tcp_check_urg(struct sock *sk, struct tcphdr *th)
4245{ 4360{
4246 struct tcp_sock *tp = tcp_sk(sk); 4361 struct tcp_sock *tp = tcp_sk(sk);
4247 u32 ptr = ntohs(th->urg_ptr); 4362 u32 ptr = ntohs(th->urg_ptr);
@@ -4290,8 +4405,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
4290 * buggy users. 4405 * buggy users.
4291 */ 4406 */
4292 if (tp->urg_seq == tp->copied_seq && tp->urg_data && 4407 if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
4293 !sock_flag(sk, SOCK_URGINLINE) && 4408 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
4294 tp->copied_seq != tp->rcv_nxt) {
4295 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); 4409 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
4296 tp->copied_seq++; 4410 tp->copied_seq++;
4297 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) { 4411 if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
@@ -4300,8 +4414,8 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
4300 } 4414 }
4301 } 4415 }
4302 4416
4303 tp->urg_data = TCP_URG_NOTYET; 4417 tp->urg_data = TCP_URG_NOTYET;
4304 tp->urg_seq = ptr; 4418 tp->urg_seq = ptr;
4305 4419
4306 /* Disable header prediction. */ 4420 /* Disable header prediction. */
4307 tp->pred_flags = 0; 4421 tp->pred_flags = 0;
@@ -4314,7 +4428,7 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
4314 4428
4315 /* Check if we get a new urgent pointer - normally not. */ 4429 /* Check if we get a new urgent pointer - normally not. */
4316 if (th->urg) 4430 if (th->urg)
4317 tcp_check_urg(sk,th); 4431 tcp_check_urg(sk, th);
4318 4432
4319 /* Do we wait for any urgent data? - normally not... */ 4433 /* Do we wait for any urgent data? - normally not... */
4320 if (tp->urg_data == TCP_URG_NOTYET) { 4434 if (tp->urg_data == TCP_URG_NOTYET) {
@@ -4356,7 +4470,8 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
4356 return err; 4470 return err;
4357} 4471}
4358 4472
4359static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) 4473static __sum16 __tcp_checksum_complete_user(struct sock *sk,
4474 struct sk_buff *skb)
4360{ 4475{
4361 __sum16 result; 4476 __sum16 result;
4362 4477
@@ -4370,14 +4485,16 @@ static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb
4370 return result; 4485 return result;
4371} 4486}
4372 4487
4373static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) 4488static inline int tcp_checksum_complete_user(struct sock *sk,
4489 struct sk_buff *skb)
4374{ 4490{
4375 return !skb_csum_unnecessary(skb) && 4491 return !skb_csum_unnecessary(skb) &&
4376 __tcp_checksum_complete_user(sk, skb); 4492 __tcp_checksum_complete_user(sk, skb);
4377} 4493}
4378 4494
4379#ifdef CONFIG_NET_DMA 4495#ifdef CONFIG_NET_DMA
4380static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen) 4496static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
4497 int hlen)
4381{ 4498{
4382 struct tcp_sock *tp = tcp_sk(sk); 4499 struct tcp_sock *tp = tcp_sk(sk);
4383 int chunk = skb->len - hlen; 4500 int chunk = skb->len - hlen;
@@ -4393,7 +4510,9 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen
4393 if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) { 4510 if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
4394 4511
4395 dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan, 4512 dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
4396 skb, hlen, tp->ucopy.iov, chunk, tp->ucopy.pinned_list); 4513 skb, hlen,
4514 tp->ucopy.iov, chunk,
4515 tp->ucopy.pinned_list);
4397 4516
4398 if (dma_cookie < 0) 4517 if (dma_cookie < 0)
4399 goto out; 4518 goto out;
@@ -4475,7 +4594,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
4475 */ 4594 */
4476 4595
4477 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags && 4596 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
4478 TCP_SKB_CB(skb)->seq == tp->rcv_nxt) { 4597 TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
4479 int tcp_header_len = tp->tcp_header_len; 4598 int tcp_header_len = tp->tcp_header_len;
4480 4599
4481 /* Timestamp header prediction: tcp_header_len 4600 /* Timestamp header prediction: tcp_header_len
@@ -4544,7 +4663,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
4544 eaten = 1; 4663 eaten = 1;
4545 } 4664 }
4546#endif 4665#endif
4547 if (tp->ucopy.task == current && sock_owned_by_user(sk) && !copied_early) { 4666 if (tp->ucopy.task == current &&
4667 sock_owned_by_user(sk) && !copied_early) {
4548 __set_current_state(TASK_RUNNING); 4668 __set_current_state(TASK_RUNNING);
4549 4669
4550 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) 4670 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len))
@@ -4591,9 +4711,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
4591 NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS); 4711 NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS);
4592 4712
4593 /* Bulk data transfer: receiver */ 4713 /* Bulk data transfer: receiver */
4594 __skb_pull(skb,tcp_header_len); 4714 __skb_pull(skb, tcp_header_len);
4595 __skb_queue_tail(&sk->sk_receive_queue, skb); 4715 __skb_queue_tail(&sk->sk_receive_queue, skb);
4596 sk_stream_set_owner_r(skb, sk); 4716 skb_set_owner_r(skb, sk);
4597 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 4717 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4598 } 4718 }
4599 4719
@@ -4623,7 +4743,7 @@ no_ack:
4623 } 4743 }
4624 4744
4625slow_path: 4745slow_path:
4626 if (len < (th->doff<<2) || tcp_checksum_complete_user(sk, skb)) 4746 if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
4627 goto csum_error; 4747 goto csum_error;
4628 4748
4629 /* 4749 /*
@@ -4830,7 +4950,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
4830 4950
4831 if (!sock_flag(sk, SOCK_DEAD)) { 4951 if (!sock_flag(sk, SOCK_DEAD)) {
4832 sk->sk_state_change(sk); 4952 sk->sk_state_change(sk);
4833 sk_wake_async(sk, 0, POLL_OUT); 4953 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
4834 } 4954 }
4835 4955
4836 if (sk->sk_write_pending || 4956 if (sk->sk_write_pending ||
@@ -4873,7 +4993,8 @@ discard:
4873 } 4993 }
4874 4994
4875 /* PAWS check. */ 4995 /* PAWS check. */
4876 if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && tcp_paws_check(&tp->rx_opt, 0)) 4996 if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
4997 tcp_paws_check(&tp->rx_opt, 0))
4877 goto discard_and_undo; 4998 goto discard_and_undo;
4878 4999
4879 if (th->syn) { 5000 if (th->syn) {
@@ -4908,7 +5029,6 @@ discard:
4908 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); 5029 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
4909 tcp_initialize_rcv_mss(sk); 5030 tcp_initialize_rcv_mss(sk);
4910 5031
4911
4912 tcp_send_synack(sk); 5032 tcp_send_synack(sk);
4913#if 0 5033#if 0
4914 /* Note, we could accept data and URG from this segment. 5034 /* Note, we could accept data and URG from this segment.
@@ -4940,7 +5060,6 @@ reset_and_undo:
4940 return 1; 5060 return 1;
4941} 5061}
4942 5062
4943
4944/* 5063/*
4945 * This function implements the receiving procedure of RFC 793 for 5064 * This function implements the receiving procedure of RFC 793 for
4946 * all states except ESTABLISHED and TIME_WAIT. 5065 * all states except ESTABLISHED and TIME_WAIT.
@@ -5060,9 +5179,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5060 * are not waked up, because sk->sk_sleep == 5179 * are not waked up, because sk->sk_sleep ==
5061 * NULL and sk->sk_socket == NULL. 5180 * NULL and sk->sk_socket == NULL.
5062 */ 5181 */
5063 if (sk->sk_socket) { 5182 if (sk->sk_socket)
5064 sk_wake_async(sk,0,POLL_OUT); 5183 sk_wake_async(sk,
5065 } 5184 SOCK_WAKE_IO, POLL_OUT);
5066 5185
5067 tp->snd_una = TCP_SKB_CB(skb)->ack_seq; 5186 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
5068 tp->snd_wnd = ntohs(th->window) << 5187 tp->snd_wnd = ntohs(th->window) <<
@@ -5074,8 +5193,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5074 * and does not calculate rtt. 5193 * and does not calculate rtt.
5075 * Fix it at least with timestamps. 5194 * Fix it at least with timestamps.
5076 */ 5195 */
5077 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && 5196 if (tp->rx_opt.saw_tstamp &&
5078 !tp->srtt) 5197 tp->rx_opt.rcv_tsecr && !tp->srtt)
5079 tcp_ack_saw_tstamp(sk, 0); 5198 tcp_ack_saw_tstamp(sk, 0);
5080 5199
5081 if (tp->rx_opt.tstamp_ok) 5200 if (tp->rx_opt.tstamp_ok)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 652c32368ccc..9aea88b8d4fc 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -99,7 +99,7 @@ static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
99static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, 99static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
100 __be32 saddr, __be32 daddr, 100 __be32 saddr, __be32 daddr,
101 struct tcphdr *th, int protocol, 101 struct tcphdr *th, int protocol,
102 int tcplen); 102 unsigned int tcplen);
103#endif 103#endif
104 104
105struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { 105struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
@@ -1020,7 +1020,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1020static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, 1020static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1021 __be32 saddr, __be32 daddr, 1021 __be32 saddr, __be32 daddr,
1022 struct tcphdr *th, int protocol, 1022 struct tcphdr *th, int protocol,
1023 int tcplen) 1023 unsigned int tcplen)
1024{ 1024{
1025 struct scatterlist sg[4]; 1025 struct scatterlist sg[4];
1026 __u16 data_len; 1026 __u16 data_len;
@@ -1113,7 +1113,7 @@ int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1113 struct dst_entry *dst, 1113 struct dst_entry *dst,
1114 struct request_sock *req, 1114 struct request_sock *req,
1115 struct tcphdr *th, int protocol, 1115 struct tcphdr *th, int protocol,
1116 int tcplen) 1116 unsigned int tcplen)
1117{ 1117{
1118 __be32 saddr, daddr; 1118 __be32 saddr, daddr;
1119 1119
@@ -1478,7 +1478,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1478 } 1478 }
1479#endif 1479#endif
1480 1480
1481 __inet_hash(&tcp_hashinfo, newsk, 0); 1481 __inet_hash_nolisten(&tcp_hashinfo, newsk);
1482 __inet_inherit_port(&tcp_hashinfo, sk, newsk); 1482 __inet_inherit_port(&tcp_hashinfo, sk, newsk);
1483 1483
1484 return newsk; 1484 return newsk;
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index e7f5ef92cbd8..ce3c41ff50b2 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -115,12 +115,12 @@ static void tcp_lp_init(struct sock *sk)
115 * Will only call newReno CA when away from inference. 115 * Will only call newReno CA when away from inference.
116 * From TCP-LP's paper, this will be handled in additive increasement. 116 * From TCP-LP's paper, this will be handled in additive increasement.
117 */ 117 */
118static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag) 118static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
119{ 119{
120 struct lp *lp = inet_csk_ca(sk); 120 struct lp *lp = inet_csk_ca(sk);
121 121
122 if (!(lp->flag & LP_WITHIN_INF)) 122 if (!(lp->flag & LP_WITHIN_INF))
123 tcp_reno_cong_avoid(sk, ack, in_flight, flag); 123 tcp_reno_cong_avoid(sk, ack, in_flight);
124} 124}
125 125
126/** 126/**
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f4c1eef89af0..89f0188885c7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -61,27 +61,24 @@ int sysctl_tcp_base_mss __read_mostly = 512;
61/* By default, RFC2861 behavior. */ 61/* By default, RFC2861 behavior. */
62int sysctl_tcp_slow_start_after_idle __read_mostly = 1; 62int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
63 63
64static inline void tcp_packets_out_inc(struct sock *sk, 64static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
65 const struct sk_buff *skb)
66{ 65{
67 struct tcp_sock *tp = tcp_sk(sk); 66 struct tcp_sock *tp = tcp_sk(sk);
68 int orig = tp->packets_out; 67 unsigned int prior_packets = tp->packets_out;
68
69 tcp_advance_send_head(sk, skb);
70 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
71
72 /* Don't override Nagle indefinately with F-RTO */
73 if (tp->frto_counter == 2)
74 tp->frto_counter = 3;
69 75
70 tp->packets_out += tcp_skb_pcount(skb); 76 tp->packets_out += tcp_skb_pcount(skb);
71 if (!orig) 77 if (!prior_packets)
72 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 78 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
73 inet_csk(sk)->icsk_rto, TCP_RTO_MAX); 79 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
74} 80}
75 81
76static void update_send_head(struct sock *sk, struct sk_buff *skb)
77{
78 struct tcp_sock *tp = tcp_sk(sk);
79
80 tcp_advance_send_head(sk, skb);
81 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
82 tcp_packets_out_inc(sk, skb);
83}
84
85/* SND.NXT, if window was not shrunk. 82/* SND.NXT, if window was not shrunk.
86 * If window has been shrunk, what should we make? It is not clear at all. 83 * If window has been shrunk, what should we make? It is not clear at all.
87 * Using SND.UNA we will fail to open window, SND.NXT is out of window. :-( 84 * Using SND.UNA we will fail to open window, SND.NXT is out of window. :-(
@@ -92,10 +89,10 @@ static inline __u32 tcp_acceptable_seq(struct sock *sk)
92{ 89{
93 struct tcp_sock *tp = tcp_sk(sk); 90 struct tcp_sock *tp = tcp_sk(sk);
94 91
95 if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt)) 92 if (!before(tcp_wnd_end(tp), tp->snd_nxt))
96 return tp->snd_nxt; 93 return tp->snd_nxt;
97 else 94 else
98 return tp->snd_una+tp->snd_wnd; 95 return tcp_wnd_end(tp);
99} 96}
100 97
101/* Calculate mss to advertise in SYN segment. 98/* Calculate mss to advertise in SYN segment.
@@ -224,14 +221,14 @@ void tcp_select_initial_window(int __space, __u32 mss,
224 * following RFC2414. Senders, not following this RFC, 221 * following RFC2414. Senders, not following this RFC,
225 * will be satisfied with 2. 222 * will be satisfied with 2.
226 */ 223 */
227 if (mss > (1<<*rcv_wscale)) { 224 if (mss > (1 << *rcv_wscale)) {
228 int init_cwnd = 4; 225 int init_cwnd = 4;
229 if (mss > 1460*3) 226 if (mss > 1460 * 3)
230 init_cwnd = 2; 227 init_cwnd = 2;
231 else if (mss > 1460) 228 else if (mss > 1460)
232 init_cwnd = 3; 229 init_cwnd = 3;
233 if (*rcv_wnd > init_cwnd*mss) 230 if (*rcv_wnd > init_cwnd * mss)
234 *rcv_wnd = init_cwnd*mss; 231 *rcv_wnd = init_cwnd * mss;
235 } 232 }
236 233
237 /* Set the clamp no higher than max representable value */ 234 /* Set the clamp no higher than max representable value */
@@ -281,11 +278,10 @@ static u16 tcp_select_window(struct sock *sk)
281 return new_win; 278 return new_win;
282} 279}
283 280
284static inline void TCP_ECN_send_synack(struct tcp_sock *tp, 281static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
285 struct sk_buff *skb)
286{ 282{
287 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR; 283 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR;
288 if (!(tp->ecn_flags&TCP_ECN_OK)) 284 if (!(tp->ecn_flags & TCP_ECN_OK))
289 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; 285 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE;
290} 286}
291 287
@@ -295,7 +291,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
295 291
296 tp->ecn_flags = 0; 292 tp->ecn_flags = 0;
297 if (sysctl_tcp_ecn) { 293 if (sysctl_tcp_ecn) {
298 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE|TCPCB_FLAG_CWR; 294 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR;
299 tp->ecn_flags = TCP_ECN_OK; 295 tp->ecn_flags = TCP_ECN_OK;
300 } 296 }
301} 297}
@@ -317,7 +313,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
317 if (skb->len != tcp_header_len && 313 if (skb->len != tcp_header_len &&
318 !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) { 314 !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
319 INET_ECN_xmit(sk); 315 INET_ECN_xmit(sk);
320 if (tp->ecn_flags&TCP_ECN_QUEUE_CWR) { 316 if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
321 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; 317 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
322 tcp_hdr(skb)->cwr = 1; 318 tcp_hdr(skb)->cwr = 1;
323 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; 319 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
@@ -331,6 +327,26 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
331 } 327 }
332} 328}
333 329
330/* Constructs common control bits of non-data skb. If SYN/FIN is present,
331 * auto increment end seqno.
332 */
333static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
334{
335 skb->csum = 0;
336
337 TCP_SKB_CB(skb)->flags = flags;
338 TCP_SKB_CB(skb)->sacked = 0;
339
340 skb_shinfo(skb)->gso_segs = 1;
341 skb_shinfo(skb)->gso_size = 0;
342 skb_shinfo(skb)->gso_type = 0;
343
344 TCP_SKB_CB(skb)->seq = seq;
345 if (flags & (TCPCB_FLAG_SYN | TCPCB_FLAG_FIN))
346 seq++;
347 TCP_SKB_CB(skb)->end_seq = seq;
348}
349
334static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, 350static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
335 __u32 tstamp, __u8 **md5_hash) 351 __u32 tstamp, __u8 **md5_hash)
336{ 352{
@@ -434,7 +450,7 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
434 (TCPOPT_NOP << 16) | 450 (TCPOPT_NOP << 16) |
435 (TCPOPT_MD5SIG << 8) | 451 (TCPOPT_MD5SIG << 8) |
436 TCPOLEN_MD5SIG); 452 TCPOLEN_MD5SIG);
437 *md5_hash = (__u8 *) ptr; 453 *md5_hash = (__u8 *)ptr;
438 } 454 }
439#endif 455#endif
440} 456}
@@ -450,7 +466,8 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
450 * We are working here with either a clone of the original 466 * We are working here with either a clone of the original
451 * SKB, or a fresh unique copy made by the retransmit engine. 467 * SKB, or a fresh unique copy made by the retransmit engine.
452 */ 468 */
453static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask) 469static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
470 gfp_t gfp_mask)
454{ 471{
455 const struct inet_connection_sock *icsk = inet_csk(sk); 472 const struct inet_connection_sock *icsk = inet_csk(sk);
456 struct inet_sock *inet; 473 struct inet_sock *inet;
@@ -554,8 +571,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
554 th->urg_ptr = 0; 571 th->urg_ptr = 0;
555 572
556 if (unlikely(tp->urg_mode && 573 if (unlikely(tp->urg_mode &&
557 between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF))) { 574 between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) {
558 th->urg_ptr = htons(tp->snd_up-tcb->seq); 575 th->urg_ptr = htons(tp->snd_up - tcb->seq);
559 th->urg = 1; 576 th->urg = 1;
560 } 577 }
561 578
@@ -619,7 +636,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
619#undef SYSCTL_FLAG_SACK 636#undef SYSCTL_FLAG_SACK
620} 637}
621 638
622
623/* This routine just queue's the buffer 639/* This routine just queue's the buffer
624 * 640 *
625 * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, 641 * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
@@ -633,10 +649,12 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
633 tp->write_seq = TCP_SKB_CB(skb)->end_seq; 649 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
634 skb_header_release(skb); 650 skb_header_release(skb);
635 tcp_add_write_queue_tail(sk, skb); 651 tcp_add_write_queue_tail(sk, skb);
636 sk_charge_skb(sk, skb); 652 sk->sk_wmem_queued += skb->truesize;
653 sk_mem_charge(sk, skb->truesize);
637} 654}
638 655
639static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) 656static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb,
657 unsigned int mss_now)
640{ 658{
641 if (skb->len <= mss_now || !sk_can_gso(sk)) { 659 if (skb->len <= mss_now || !sk_can_gso(sk)) {
642 /* Avoid the costly divide in the normal 660 /* Avoid the costly divide in the normal
@@ -653,23 +671,18 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned
653} 671}
654 672
655/* When a modification to fackets out becomes necessary, we need to check 673/* When a modification to fackets out becomes necessary, we need to check
656 * skb is counted to fackets_out or not. Another important thing is to 674 * skb is counted to fackets_out or not.
657 * tweak SACK fastpath hint too as it would overwrite all changes unless
658 * hint is also changed.
659 */ 675 */
660static void tcp_adjust_fackets_out(struct tcp_sock *tp, struct sk_buff *skb, 676static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb,
661 int decr) 677 int decr)
662{ 678{
679 struct tcp_sock *tp = tcp_sk(sk);
680
663 if (!tp->sacked_out || tcp_is_reno(tp)) 681 if (!tp->sacked_out || tcp_is_reno(tp))
664 return; 682 return;
665 683
666 if (!before(tp->highest_sack, TCP_SKB_CB(skb)->seq)) 684 if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
667 tp->fackets_out -= decr; 685 tp->fackets_out -= decr;
668
669 /* cnt_hint is "off-by-one" compared with fackets_out (see sacktag) */
670 if (tp->fastpath_skb_hint != NULL &&
671 after(TCP_SKB_CB(tp->fastpath_skb_hint)->seq, TCP_SKB_CB(skb)->seq))
672 tp->fastpath_cnt_hint -= decr;
673} 686}
674 687
675/* Function to create two new TCP segments. Shrinks the given segment 688/* Function to create two new TCP segments. Shrinks the given segment
@@ -677,7 +690,8 @@ static void tcp_adjust_fackets_out(struct tcp_sock *tp, struct sk_buff *skb,
677 * packet to the list. This won't be called frequently, I hope. 690 * packet to the list. This won't be called frequently, I hope.
678 * Remember, these are still headerless SKBs at this point. 691 * Remember, these are still headerless SKBs at this point.
679 */ 692 */
680int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now) 693int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
694 unsigned int mss_now)
681{ 695{
682 struct tcp_sock *tp = tcp_sk(sk); 696 struct tcp_sock *tp = tcp_sk(sk);
683 struct sk_buff *buff; 697 struct sk_buff *buff;
@@ -702,7 +716,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
702 if (buff == NULL) 716 if (buff == NULL)
703 return -ENOMEM; /* We'll just try again later. */ 717 return -ENOMEM; /* We'll just try again later. */
704 718
705 sk_charge_skb(sk, buff); 719 sk->sk_wmem_queued += buff->truesize;
720 sk_mem_charge(sk, buff->truesize);
706 nlen = skb->len - len - nsize; 721 nlen = skb->len - len - nsize;
707 buff->truesize += nlen; 722 buff->truesize += nlen;
708 skb->truesize -= nlen; 723 skb->truesize -= nlen;
@@ -712,20 +727,16 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
712 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq; 727 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
713 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; 728 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
714 729
715 if (tcp_is_sack(tp) && tp->sacked_out &&
716 (TCP_SKB_CB(skb)->seq == tp->highest_sack))
717 tp->highest_sack = TCP_SKB_CB(buff)->seq;
718
719 /* PSH and FIN should only be set in the second packet. */ 730 /* PSH and FIN should only be set in the second packet. */
720 flags = TCP_SKB_CB(skb)->flags; 731 flags = TCP_SKB_CB(skb)->flags;
721 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); 732 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
722 TCP_SKB_CB(buff)->flags = flags; 733 TCP_SKB_CB(buff)->flags = flags;
723 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; 734 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
724 TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
725 735
726 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) { 736 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
727 /* Copy and checksum data tail into the new buffer. */ 737 /* Copy and checksum data tail into the new buffer. */
728 buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize), 738 buff->csum = csum_partial_copy_nocheck(skb->data + len,
739 skb_put(buff, nsize),
729 nsize, 0); 740 nsize, 0);
730 741
731 skb_trim(skb, len); 742 skb_trim(skb, len);
@@ -772,7 +783,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
772 tcp_dec_pcount_approx_int(&tp->sacked_out, diff); 783 tcp_dec_pcount_approx_int(&tp->sacked_out, diff);
773 tcp_verify_left_out(tp); 784 tcp_verify_left_out(tp);
774 } 785 }
775 tcp_adjust_fackets_out(tp, skb, diff); 786 tcp_adjust_fackets_out(sk, skb, diff);
776 } 787 }
777 788
778 /* Link BUFF into the send queue. */ 789 /* Link BUFF into the send queue. */
@@ -792,7 +803,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
792 803
793 eat = len; 804 eat = len;
794 k = 0; 805 k = 0;
795 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { 806 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
796 if (skb_shinfo(skb)->frags[i].size <= eat) { 807 if (skb_shinfo(skb)->frags[i].size <= eat) {
797 put_page(skb_shinfo(skb)->frags[i].page); 808 put_page(skb_shinfo(skb)->frags[i].page);
798 eat -= skb_shinfo(skb)->frags[i].size; 809 eat -= skb_shinfo(skb)->frags[i].size;
@@ -815,8 +826,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
815 826
816int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) 827int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
817{ 828{
818 if (skb_cloned(skb) && 829 if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
819 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
820 return -ENOMEM; 830 return -ENOMEM;
821 831
822 /* If len == headlen, we avoid __skb_pull to preserve alignment. */ 832 /* If len == headlen, we avoid __skb_pull to preserve alignment. */
@@ -830,7 +840,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
830 840
831 skb->truesize -= len; 841 skb->truesize -= len;
832 sk->sk_wmem_queued -= len; 842 sk->sk_wmem_queued -= len;
833 sk->sk_forward_alloc += len; 843 sk_mem_uncharge(sk, len);
834 sock_set_flag(sk, SOCK_QUEUE_SHRUNK); 844 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
835 845
836 /* Any change of skb->len requires recalculation of tso 846 /* Any change of skb->len requires recalculation of tso
@@ -898,6 +908,15 @@ void tcp_mtup_init(struct sock *sk)
898 icsk->icsk_mtup.probe_size = 0; 908 icsk->icsk_mtup.probe_size = 0;
899} 909}
900 910
911/* Bound MSS / TSO packet size with the half of the window */
912static int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
913{
914 if (tp->max_window && pktsize > (tp->max_window >> 1))
915 return max(tp->max_window >> 1, 68U - tp->tcp_header_len);
916 else
917 return pktsize;
918}
919
901/* This function synchronize snd mss to current pmtu/exthdr set. 920/* This function synchronize snd mss to current pmtu/exthdr set.
902 921
903 tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts 922 tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
@@ -920,7 +939,6 @@ void tcp_mtup_init(struct sock *sk)
920 NOTE2. inet_csk(sk)->icsk_pmtu_cookie and tp->mss_cache 939 NOTE2. inet_csk(sk)->icsk_pmtu_cookie and tp->mss_cache
921 are READ ONLY outside this function. --ANK (980731) 940 are READ ONLY outside this function. --ANK (980731)
922 */ 941 */
923
924unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) 942unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
925{ 943{
926 struct tcp_sock *tp = tcp_sk(sk); 944 struct tcp_sock *tp = tcp_sk(sk);
@@ -931,10 +949,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
931 icsk->icsk_mtup.search_high = pmtu; 949 icsk->icsk_mtup.search_high = pmtu;
932 950
933 mss_now = tcp_mtu_to_mss(sk, pmtu); 951 mss_now = tcp_mtu_to_mss(sk, pmtu);
934 952 mss_now = tcp_bound_to_half_wnd(tp, mss_now);
935 /* Bound mss with half of window */
936 if (tp->max_window && mss_now > (tp->max_window>>1))
937 mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len);
938 953
939 /* And store cached results */ 954 /* And store cached results */
940 icsk->icsk_pmtu_cookie = pmtu; 955 icsk->icsk_pmtu_cookie = pmtu;
@@ -988,11 +1003,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
988 inet_csk(sk)->icsk_ext_hdr_len - 1003 inet_csk(sk)->icsk_ext_hdr_len -
989 tp->tcp_header_len); 1004 tp->tcp_header_len);
990 1005
991 if (tp->max_window && 1006 xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
992 (xmit_size_goal > (tp->max_window >> 1)))
993 xmit_size_goal = max((tp->max_window >> 1),
994 68U - tp->tcp_header_len);
995
996 xmit_size_goal -= (xmit_size_goal % mss_now); 1007 xmit_size_goal -= (xmit_size_goal % mss_now);
997 } 1008 }
998 tp->xmit_size_goal = xmit_size_goal; 1009 tp->xmit_size_goal = xmit_size_goal;
@@ -1001,13 +1012,11 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
1001} 1012}
1002 1013
1003/* Congestion window validation. (RFC2861) */ 1014/* Congestion window validation. (RFC2861) */
1004
1005static void tcp_cwnd_validate(struct sock *sk) 1015static void tcp_cwnd_validate(struct sock *sk)
1006{ 1016{
1007 struct tcp_sock *tp = tcp_sk(sk); 1017 struct tcp_sock *tp = tcp_sk(sk);
1008 __u32 packets_out = tp->packets_out;
1009 1018
1010 if (packets_out >= tp->snd_cwnd) { 1019 if (tp->packets_out >= tp->snd_cwnd) {
1011 /* Network is feed fully. */ 1020 /* Network is feed fully. */
1012 tp->snd_cwnd_used = 0; 1021 tp->snd_cwnd_used = 0;
1013 tp->snd_cwnd_stamp = tcp_time_stamp; 1022 tp->snd_cwnd_stamp = tcp_time_stamp;
@@ -1022,19 +1031,35 @@ static void tcp_cwnd_validate(struct sock *sk)
1022 } 1031 }
1023} 1032}
1024 1033
1025static unsigned int tcp_window_allows(struct tcp_sock *tp, struct sk_buff *skb, unsigned int mss_now, unsigned int cwnd) 1034/* Returns the portion of skb which can be sent right away without
1035 * introducing MSS oddities to segment boundaries. In rare cases where
1036 * mss_now != mss_cache, we will request caller to create a small skb
1037 * per input skb which could be mostly avoided here (if desired).
1038 */
1039static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
1040 unsigned int mss_now, unsigned int cwnd)
1026{ 1041{
1027 u32 window, cwnd_len; 1042 struct tcp_sock *tp = tcp_sk(sk);
1043 u32 needed, window, cwnd_len;
1028 1044
1029 window = (tp->snd_una + tp->snd_wnd - TCP_SKB_CB(skb)->seq); 1045 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1030 cwnd_len = mss_now * cwnd; 1046 cwnd_len = mss_now * cwnd;
1031 return min(window, cwnd_len); 1047
1048 if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
1049 return cwnd_len;
1050
1051 if (skb == tcp_write_queue_tail(sk) && cwnd_len <= skb->len)
1052 return cwnd_len;
1053
1054 needed = min(skb->len, window);
1055 return needed - needed % mss_now;
1032} 1056}
1033 1057
1034/* Can at least one segment of SKB be sent right now, according to the 1058/* Can at least one segment of SKB be sent right now, according to the
1035 * congestion window rules? If so, return how many segments are allowed. 1059 * congestion window rules? If so, return how many segments are allowed.
1036 */ 1060 */
1037static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *skb) 1061static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
1062 struct sk_buff *skb)
1038{ 1063{
1039 u32 in_flight, cwnd; 1064 u32 in_flight, cwnd;
1040 1065
@@ -1054,13 +1079,12 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk
1054/* This must be invoked the first time we consider transmitting 1079/* This must be invoked the first time we consider transmitting
1055 * SKB onto the wire. 1080 * SKB onto the wire.
1056 */ 1081 */
1057static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) 1082static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb,
1083 unsigned int mss_now)
1058{ 1084{
1059 int tso_segs = tcp_skb_pcount(skb); 1085 int tso_segs = tcp_skb_pcount(skb);
1060 1086
1061 if (!tso_segs || 1087 if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
1062 (tso_segs > 1 &&
1063 tcp_skb_mss(skb) != mss_now)) {
1064 tcp_set_skb_tso_segs(sk, skb, mss_now); 1088 tcp_set_skb_tso_segs(sk, skb, mss_now);
1065 tso_segs = tcp_skb_pcount(skb); 1089 tso_segs = tcp_skb_pcount(skb);
1066 } 1090 }
@@ -1080,16 +1104,13 @@ static inline int tcp_minshall_check(const struct tcp_sock *tp)
1080 * 4. Or TCP_CORK is not set, and all sent packets are ACKed. 1104 * 4. Or TCP_CORK is not set, and all sent packets are ACKed.
1081 * With Minshall's modification: all sent small packets are ACKed. 1105 * With Minshall's modification: all sent small packets are ACKed.
1082 */ 1106 */
1083
1084static inline int tcp_nagle_check(const struct tcp_sock *tp, 1107static inline int tcp_nagle_check(const struct tcp_sock *tp,
1085 const struct sk_buff *skb, 1108 const struct sk_buff *skb,
1086 unsigned mss_now, int nonagle) 1109 unsigned mss_now, int nonagle)
1087{ 1110{
1088 return (skb->len < mss_now && 1111 return (skb->len < mss_now &&
1089 ((nonagle&TCP_NAGLE_CORK) || 1112 ((nonagle & TCP_NAGLE_CORK) ||
1090 (!nonagle && 1113 (!nonagle && tp->packets_out && tcp_minshall_check(tp))));
1091 tp->packets_out &&
1092 tcp_minshall_check(tp))));
1093} 1114}
1094 1115
1095/* Return non-zero if the Nagle test allows this packet to be 1116/* Return non-zero if the Nagle test allows this packet to be
@@ -1121,14 +1142,15 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
1121} 1142}
1122 1143
1123/* Does at least the first segment of SKB fit into the send window? */ 1144/* Does at least the first segment of SKB fit into the send window? */
1124static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, unsigned int cur_mss) 1145static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb,
1146 unsigned int cur_mss)
1125{ 1147{
1126 u32 end_seq = TCP_SKB_CB(skb)->end_seq; 1148 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
1127 1149
1128 if (skb->len > cur_mss) 1150 if (skb->len > cur_mss)
1129 end_seq = TCP_SKB_CB(skb)->seq + cur_mss; 1151 end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
1130 1152
1131 return !after(end_seq, tp->snd_una + tp->snd_wnd); 1153 return !after(end_seq, tcp_wnd_end(tp));
1132} 1154}
1133 1155
1134/* This checks if the data bearing packet SKB (usually tcp_send_head(sk)) 1156/* This checks if the data bearing packet SKB (usually tcp_send_head(sk))
@@ -1147,8 +1169,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
1147 return 0; 1169 return 0;
1148 1170
1149 cwnd_quota = tcp_cwnd_test(tp, skb); 1171 cwnd_quota = tcp_cwnd_test(tp, skb);
1150 if (cwnd_quota && 1172 if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
1151 !tcp_snd_wnd_test(tp, skb, cur_mss))
1152 cwnd_quota = 0; 1173 cwnd_quota = 0;
1153 1174
1154 return cwnd_quota; 1175 return cwnd_quota;
@@ -1172,7 +1193,8 @@ int tcp_may_send_now(struct sock *sk)
1172 * know that all the data is in scatter-gather pages, and that the 1193 * know that all the data is in scatter-gather pages, and that the
1173 * packet has never been sent out before (and thus is not cloned). 1194 * packet has never been sent out before (and thus is not cloned).
1174 */ 1195 */
1175static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, unsigned int mss_now) 1196static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1197 unsigned int mss_now)
1176{ 1198{
1177 struct sk_buff *buff; 1199 struct sk_buff *buff;
1178 int nlen = skb->len - len; 1200 int nlen = skb->len - len;
@@ -1182,11 +1204,12 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1182 if (skb->len != skb->data_len) 1204 if (skb->len != skb->data_len)
1183 return tcp_fragment(sk, skb, len, mss_now); 1205 return tcp_fragment(sk, skb, len, mss_now);
1184 1206
1185 buff = sk_stream_alloc_pskb(sk, 0, 0, GFP_ATOMIC); 1207 buff = sk_stream_alloc_skb(sk, 0, GFP_ATOMIC);
1186 if (unlikely(buff == NULL)) 1208 if (unlikely(buff == NULL))
1187 return -ENOMEM; 1209 return -ENOMEM;
1188 1210
1189 sk_charge_skb(sk, buff); 1211 sk->sk_wmem_queued += buff->truesize;
1212 sk_mem_charge(sk, buff->truesize);
1190 buff->truesize += nlen; 1213 buff->truesize += nlen;
1191 skb->truesize -= nlen; 1214 skb->truesize -= nlen;
1192 1215
@@ -1197,7 +1220,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1197 1220
1198 /* PSH and FIN should only be set in the second packet. */ 1221 /* PSH and FIN should only be set in the second packet. */
1199 flags = TCP_SKB_CB(skb)->flags; 1222 flags = TCP_SKB_CB(skb)->flags;
1200 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); 1223 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
1201 TCP_SKB_CB(buff)->flags = flags; 1224 TCP_SKB_CB(buff)->flags = flags;
1202 1225
1203 /* This packet was never sent out yet, so no SACK bits. */ 1226 /* This packet was never sent out yet, so no SACK bits. */
@@ -1235,15 +1258,15 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1235 goto send_now; 1258 goto send_now;
1236 1259
1237 /* Defer for less than two clock ticks. */ 1260 /* Defer for less than two clock ticks. */
1238 if (!tp->tso_deferred && ((jiffies<<1)>>1) - (tp->tso_deferred>>1) > 1) 1261 if (tp->tso_deferred &&
1262 ((jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)
1239 goto send_now; 1263 goto send_now;
1240 1264
1241 in_flight = tcp_packets_in_flight(tp); 1265 in_flight = tcp_packets_in_flight(tp);
1242 1266
1243 BUG_ON(tcp_skb_pcount(skb) <= 1 || 1267 BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight));
1244 (tp->snd_cwnd <= in_flight));
1245 1268
1246 send_win = (tp->snd_una + tp->snd_wnd) - TCP_SKB_CB(skb)->seq; 1269 send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1247 1270
1248 /* From in_flight test above, we know that cwnd > in_flight. */ 1271 /* From in_flight test above, we know that cwnd > in_flight. */
1249 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache; 1272 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
@@ -1274,7 +1297,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1274 } 1297 }
1275 1298
1276 /* Ok, it looks like it is advisable to defer. */ 1299 /* Ok, it looks like it is advisable to defer. */
1277 tp->tso_deferred = 1 | (jiffies<<1); 1300 tp->tso_deferred = 1 | (jiffies << 1);
1278 1301
1279 return 1; 1302 return 1;
1280 1303
@@ -1286,7 +1309,8 @@ send_now:
1286/* Create a new MTU probe if we are ready. 1309/* Create a new MTU probe if we are ready.
1287 * Returns 0 if we should wait to probe (no cwnd available), 1310 * Returns 0 if we should wait to probe (no cwnd available),
1288 * 1 if a probe was sent, 1311 * 1 if a probe was sent,
1289 * -1 otherwise */ 1312 * -1 otherwise
1313 */
1290static int tcp_mtu_probe(struct sock *sk) 1314static int tcp_mtu_probe(struct sock *sk)
1291{ 1315{
1292 struct tcp_sock *tp = tcp_sk(sk); 1316 struct tcp_sock *tp = tcp_sk(sk);
@@ -1295,7 +1319,6 @@ static int tcp_mtu_probe(struct sock *sk)
1295 int len; 1319 int len;
1296 int probe_size; 1320 int probe_size;
1297 int size_needed; 1321 int size_needed;
1298 unsigned int pif;
1299 int copy; 1322 int copy;
1300 int mss_now; 1323 int mss_now;
1301 1324
@@ -1312,7 +1335,7 @@ static int tcp_mtu_probe(struct sock *sk)
1312 1335
1313 /* Very simple search strategy: just double the MSS. */ 1336 /* Very simple search strategy: just double the MSS. */
1314 mss_now = tcp_current_mss(sk, 0); 1337 mss_now = tcp_current_mss(sk, 0);
1315 probe_size = 2*tp->mss_cache; 1338 probe_size = 2 * tp->mss_cache;
1316 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache; 1339 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
1317 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) { 1340 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
1318 /* TODO: set timer for probe_converge_event */ 1341 /* TODO: set timer for probe_converge_event */
@@ -1325,14 +1348,12 @@ static int tcp_mtu_probe(struct sock *sk)
1325 1348
1326 if (tp->snd_wnd < size_needed) 1349 if (tp->snd_wnd < size_needed)
1327 return -1; 1350 return -1;
1328 if (after(tp->snd_nxt + size_needed, tp->snd_una + tp->snd_wnd)) 1351 if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp)))
1329 return 0; 1352 return 0;
1330 1353
1331 /* Do we need to wait to drain cwnd? */ 1354 /* Do we need to wait to drain cwnd? With none in flight, don't stall */
1332 pif = tcp_packets_in_flight(tp); 1355 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
1333 if (pif + 2 > tp->snd_cwnd) { 1356 if (!tcp_packets_in_flight(tp))
1334 /* With no packets in flight, don't stall. */
1335 if (pif == 0)
1336 return -1; 1357 return -1;
1337 else 1358 else
1338 return 0; 1359 return 0;
@@ -1341,10 +1362,10 @@ static int tcp_mtu_probe(struct sock *sk)
1341 /* We're allowed to probe. Build it now. */ 1362 /* We're allowed to probe. Build it now. */
1342 if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL) 1363 if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL)
1343 return -1; 1364 return -1;
1344 sk_charge_skb(sk, nskb); 1365 sk->sk_wmem_queued += nskb->truesize;
1366 sk_mem_charge(sk, nskb->truesize);
1345 1367
1346 skb = tcp_send_head(sk); 1368 skb = tcp_send_head(sk);
1347 tcp_insert_write_queue_before(nskb, skb, sk);
1348 1369
1349 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; 1370 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
1350 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; 1371 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
@@ -1353,30 +1374,32 @@ static int tcp_mtu_probe(struct sock *sk)
1353 nskb->csum = 0; 1374 nskb->csum = 0;
1354 nskb->ip_summed = skb->ip_summed; 1375 nskb->ip_summed = skb->ip_summed;
1355 1376
1356 len = 0; 1377 tcp_insert_write_queue_before(nskb, skb, sk);
1357 while (len < probe_size) {
1358 next = tcp_write_queue_next(sk, skb);
1359 1378
1379 len = 0;
1380 tcp_for_write_queue_from_safe(skb, next, sk) {
1360 copy = min_t(int, skb->len, probe_size - len); 1381 copy = min_t(int, skb->len, probe_size - len);
1361 if (nskb->ip_summed) 1382 if (nskb->ip_summed)
1362 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy); 1383 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
1363 else 1384 else
1364 nskb->csum = skb_copy_and_csum_bits(skb, 0, 1385 nskb->csum = skb_copy_and_csum_bits(skb, 0,
1365 skb_put(nskb, copy), copy, nskb->csum); 1386 skb_put(nskb, copy),
1387 copy, nskb->csum);
1366 1388
1367 if (skb->len <= copy) { 1389 if (skb->len <= copy) {
1368 /* We've eaten all the data from this skb. 1390 /* We've eaten all the data from this skb.
1369 * Throw it away. */ 1391 * Throw it away. */
1370 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags; 1392 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags;
1371 tcp_unlink_write_queue(skb, sk); 1393 tcp_unlink_write_queue(skb, sk);
1372 sk_stream_free_skb(sk, skb); 1394 sk_wmem_free_skb(sk, skb);
1373 } else { 1395 } else {
1374 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags & 1396 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
1375 ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); 1397 ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
1376 if (!skb_shinfo(skb)->nr_frags) { 1398 if (!skb_shinfo(skb)->nr_frags) {
1377 skb_pull(skb, copy); 1399 skb_pull(skb, copy);
1378 if (skb->ip_summed != CHECKSUM_PARTIAL) 1400 if (skb->ip_summed != CHECKSUM_PARTIAL)
1379 skb->csum = csum_partial(skb->data, skb->len, 0); 1401 skb->csum = csum_partial(skb->data,
1402 skb->len, 0);
1380 } else { 1403 } else {
1381 __pskb_trim_head(skb, copy); 1404 __pskb_trim_head(skb, copy);
1382 tcp_set_skb_tso_segs(sk, skb, mss_now); 1405 tcp_set_skb_tso_segs(sk, skb, mss_now);
@@ -1385,7 +1408,9 @@ static int tcp_mtu_probe(struct sock *sk)
1385 } 1408 }
1386 1409
1387 len += copy; 1410 len += copy;
1388 skb = next; 1411
1412 if (len >= probe_size)
1413 break;
1389 } 1414 }
1390 tcp_init_tso_segs(sk, nskb, nskb->len); 1415 tcp_init_tso_segs(sk, nskb, nskb->len);
1391 1416
@@ -1394,9 +1419,9 @@ static int tcp_mtu_probe(struct sock *sk)
1394 TCP_SKB_CB(nskb)->when = tcp_time_stamp; 1419 TCP_SKB_CB(nskb)->when = tcp_time_stamp;
1395 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) { 1420 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
1396 /* Decrement cwnd here because we are sending 1421 /* Decrement cwnd here because we are sending
1397 * effectively two packets. */ 1422 * effectively two packets. */
1398 tp->snd_cwnd--; 1423 tp->snd_cwnd--;
1399 update_send_head(sk, nskb); 1424 tcp_event_new_data_sent(sk, nskb);
1400 1425
1401 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len); 1426 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
1402 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq; 1427 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
@@ -1408,7 +1433,6 @@ static int tcp_mtu_probe(struct sock *sk)
1408 return -1; 1433 return -1;
1409} 1434}
1410 1435
1411
1412/* This routine writes packets to the network. It advances the 1436/* This routine writes packets to the network. It advances the
1413 * send_head. This happens as incoming acks open up the remote 1437 * send_head. This happens as incoming acks open up the remote
1414 * window for us. 1438 * window for us.
@@ -1464,17 +1488,9 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
1464 } 1488 }
1465 1489
1466 limit = mss_now; 1490 limit = mss_now;
1467 if (tso_segs > 1) { 1491 if (tso_segs > 1)
1468 limit = tcp_window_allows(tp, skb, 1492 limit = tcp_mss_split_point(sk, skb, mss_now,
1469 mss_now, cwnd_quota); 1493 cwnd_quota);
1470
1471 if (skb->len < limit) {
1472 unsigned int trim = skb->len % mss_now;
1473
1474 if (trim)
1475 limit = skb->len - trim;
1476 }
1477 }
1478 1494
1479 if (skb->len > limit && 1495 if (skb->len > limit &&
1480 unlikely(tso_fragment(sk, skb, limit, mss_now))) 1496 unlikely(tso_fragment(sk, skb, limit, mss_now)))
@@ -1488,7 +1504,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
1488 /* Advance the send_head. This one is sent out. 1504 /* Advance the send_head. This one is sent out.
1489 * This call will increment packets_out. 1505 * This call will increment packets_out.
1490 */ 1506 */
1491 update_send_head(sk, skb); 1507 tcp_event_new_data_sent(sk, skb);
1492 1508
1493 tcp_minshall_update(tp, mss_now, skb); 1509 tcp_minshall_update(tp, mss_now, skb);
1494 sent_pkts++; 1510 sent_pkts++;
@@ -1521,7 +1537,6 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
1521 */ 1537 */
1522void tcp_push_one(struct sock *sk, unsigned int mss_now) 1538void tcp_push_one(struct sock *sk, unsigned int mss_now)
1523{ 1539{
1524 struct tcp_sock *tp = tcp_sk(sk);
1525 struct sk_buff *skb = tcp_send_head(sk); 1540 struct sk_buff *skb = tcp_send_head(sk);
1526 unsigned int tso_segs, cwnd_quota; 1541 unsigned int tso_segs, cwnd_quota;
1527 1542
@@ -1536,17 +1551,9 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
1536 BUG_ON(!tso_segs); 1551 BUG_ON(!tso_segs);
1537 1552
1538 limit = mss_now; 1553 limit = mss_now;
1539 if (tso_segs > 1) { 1554 if (tso_segs > 1)
1540 limit = tcp_window_allows(tp, skb, 1555 limit = tcp_mss_split_point(sk, skb, mss_now,
1541 mss_now, cwnd_quota); 1556 cwnd_quota);
1542
1543 if (skb->len < limit) {
1544 unsigned int trim = skb->len % mss_now;
1545
1546 if (trim)
1547 limit = skb->len - trim;
1548 }
1549 }
1550 1557
1551 if (skb->len > limit && 1558 if (skb->len > limit &&
1552 unlikely(tso_fragment(sk, skb, limit, mss_now))) 1559 unlikely(tso_fragment(sk, skb, limit, mss_now)))
@@ -1556,7 +1563,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
1556 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1563 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1557 1564
1558 if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) { 1565 if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) {
1559 update_send_head(sk, skb); 1566 tcp_event_new_data_sent(sk, skb);
1560 tcp_cwnd_validate(sk); 1567 tcp_cwnd_validate(sk);
1561 return; 1568 return;
1562 } 1569 }
@@ -1633,11 +1640,12 @@ u32 __tcp_select_window(struct sock *sk)
1633 if (mss > full_space) 1640 if (mss > full_space)
1634 mss = full_space; 1641 mss = full_space;
1635 1642
1636 if (free_space < full_space/2) { 1643 if (free_space < (full_space >> 1)) {
1637 icsk->icsk_ack.quick = 0; 1644 icsk->icsk_ack.quick = 0;
1638 1645
1639 if (tcp_memory_pressure) 1646 if (tcp_memory_pressure)
1640 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss); 1647 tp->rcv_ssthresh = min(tp->rcv_ssthresh,
1648 4U * tp->advmss);
1641 1649
1642 if (free_space < mss) 1650 if (free_space < mss)
1643 return 0; 1651 return 0;
@@ -1670,9 +1678,9 @@ u32 __tcp_select_window(struct sock *sk)
1670 * is too small. 1678 * is too small.
1671 */ 1679 */
1672 if (window <= free_space - mss || window > free_space) 1680 if (window <= free_space - mss || window > free_space)
1673 window = (free_space/mss)*mss; 1681 window = (free_space / mss) * mss;
1674 else if (mss == full_space && 1682 else if (mss == full_space &&
1675 free_space > window + full_space/2) 1683 free_space > window + (full_space >> 1))
1676 window = free_space; 1684 window = free_space;
1677 } 1685 }
1678 1686
@@ -1680,86 +1688,82 @@ u32 __tcp_select_window(struct sock *sk)
1680} 1688}
1681 1689
1682/* Attempt to collapse two adjacent SKB's during retransmission. */ 1690/* Attempt to collapse two adjacent SKB's during retransmission. */
1683static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now) 1691static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb,
1692 int mss_now)
1684{ 1693{
1685 struct tcp_sock *tp = tcp_sk(sk); 1694 struct tcp_sock *tp = tcp_sk(sk);
1686 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); 1695 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
1696 int skb_size, next_skb_size;
1697 u16 flags;
1687 1698
1688 /* The first test we must make is that neither of these two 1699 /* The first test we must make is that neither of these two
1689 * SKB's are still referenced by someone else. 1700 * SKB's are still referenced by someone else.
1690 */ 1701 */
1691 if (!skb_cloned(skb) && !skb_cloned(next_skb)) { 1702 if (skb_cloned(skb) || skb_cloned(next_skb))
1692 int skb_size = skb->len, next_skb_size = next_skb->len; 1703 return;
1693 u16 flags = TCP_SKB_CB(skb)->flags;
1694 1704
1695 /* Also punt if next skb has been SACK'd. */ 1705 skb_size = skb->len;
1696 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED) 1706 next_skb_size = next_skb->len;
1697 return; 1707 flags = TCP_SKB_CB(skb)->flags;
1698 1708
1699 /* Next skb is out of window. */ 1709 /* Also punt if next skb has been SACK'd. */
1700 if (after(TCP_SKB_CB(next_skb)->end_seq, tp->snd_una+tp->snd_wnd)) 1710 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
1701 return; 1711 return;
1702 1712
1703 /* Punt if not enough space exists in the first SKB for 1713 /* Next skb is out of window. */
1704 * the data in the second, or the total combined payload 1714 if (after(TCP_SKB_CB(next_skb)->end_seq, tcp_wnd_end(tp)))
1705 * would exceed the MSS. 1715 return;
1706 */
1707 if ((next_skb_size > skb_tailroom(skb)) ||
1708 ((skb_size + next_skb_size) > mss_now))
1709 return;
1710 1716
1711 BUG_ON(tcp_skb_pcount(skb) != 1 || 1717 /* Punt if not enough space exists in the first SKB for
1712 tcp_skb_pcount(next_skb) != 1); 1718 * the data in the second, or the total combined payload
1719 * would exceed the MSS.
1720 */
1721 if ((next_skb_size > skb_tailroom(skb)) ||
1722 ((skb_size + next_skb_size) > mss_now))
1723 return;
1713 1724
1714 if (WARN_ON(tcp_is_sack(tp) && tp->sacked_out && 1725 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
1715 (TCP_SKB_CB(next_skb)->seq == tp->highest_sack)))
1716 return;
1717 1726
1718 /* Ok. We will be able to collapse the packet. */ 1727 tcp_highest_sack_combine(sk, next_skb, skb);
1719 tcp_unlink_write_queue(next_skb, sk);
1720 1728
1721 skb_copy_from_linear_data(next_skb, 1729 /* Ok. We will be able to collapse the packet. */
1722 skb_put(skb, next_skb_size), 1730 tcp_unlink_write_queue(next_skb, sk);
1723 next_skb_size);
1724 1731
1725 if (next_skb->ip_summed == CHECKSUM_PARTIAL) 1732 skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
1726 skb->ip_summed = CHECKSUM_PARTIAL; 1733 next_skb_size);
1727 1734
1728 if (skb->ip_summed != CHECKSUM_PARTIAL) 1735 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
1729 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size); 1736 skb->ip_summed = CHECKSUM_PARTIAL;
1730 1737
1731 /* Update sequence range on original skb. */ 1738 if (skb->ip_summed != CHECKSUM_PARTIAL)
1732 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; 1739 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
1733 1740
1734 /* Merge over control information. */ 1741 /* Update sequence range on original skb. */
1735 flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */ 1742 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
1736 TCP_SKB_CB(skb)->flags = flags;
1737 1743
1738 /* All done, get rid of second SKB and account for it so 1744 /* Merge over control information. */
1739 * packet counting does not break. 1745 flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */
1740 */ 1746 TCP_SKB_CB(skb)->flags = flags;
1741 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL);
1742 if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS)
1743 tp->retrans_out -= tcp_skb_pcount(next_skb);
1744 if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST)
1745 tp->lost_out -= tcp_skb_pcount(next_skb);
1746 /* Reno case is special. Sigh... */
1747 if (tcp_is_reno(tp) && tp->sacked_out)
1748 tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
1749
1750 tcp_adjust_fackets_out(tp, next_skb, tcp_skb_pcount(next_skb));
1751 tp->packets_out -= tcp_skb_pcount(next_skb);
1752
1753 /* changed transmit queue under us so clear hints */
1754 tcp_clear_retrans_hints_partial(tp);
1755 /* manually tune sacktag skb hint */
1756 if (tp->fastpath_skb_hint == next_skb) {
1757 tp->fastpath_skb_hint = skb;
1758 tp->fastpath_cnt_hint -= tcp_skb_pcount(skb);
1759 }
1760 1747
1761 sk_stream_free_skb(sk, next_skb); 1748 /* All done, get rid of second SKB and account for it so
1762 } 1749 * packet counting does not break.
1750 */
1751 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
1752 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_RETRANS)
1753 tp->retrans_out -= tcp_skb_pcount(next_skb);
1754 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_LOST)
1755 tp->lost_out -= tcp_skb_pcount(next_skb);
1756 /* Reno case is special. Sigh... */
1757 if (tcp_is_reno(tp) && tp->sacked_out)
1758 tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
1759
1760 tcp_adjust_fackets_out(sk, next_skb, tcp_skb_pcount(next_skb));
1761 tp->packets_out -= tcp_skb_pcount(next_skb);
1762
1763 /* changed transmit queue under us so clear hints */
1764 tcp_clear_retrans_hints_partial(tp);
1765
1766 sk_wmem_free_skb(sk, next_skb);
1763} 1767}
1764 1768
1765/* Do a simple retransmit without using the backoff mechanisms in 1769/* Do a simple retransmit without using the backoff mechanisms in
@@ -1778,12 +1782,12 @@ void tcp_simple_retransmit(struct sock *sk)
1778 if (skb == tcp_send_head(sk)) 1782 if (skb == tcp_send_head(sk))
1779 break; 1783 break;
1780 if (skb->len > mss && 1784 if (skb->len > mss &&
1781 !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { 1785 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
1782 if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { 1786 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
1783 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1787 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1784 tp->retrans_out -= tcp_skb_pcount(skb); 1788 tp->retrans_out -= tcp_skb_pcount(skb);
1785 } 1789 }
1786 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_LOST)) { 1790 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST)) {
1787 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1791 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1788 tp->lost_out += tcp_skb_pcount(skb); 1792 tp->lost_out += tcp_skb_pcount(skb);
1789 lost = 1; 1793 lost = 1;
@@ -1848,7 +1852,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1848 * case, when window is shrunk to zero. In this case 1852 * case, when window is shrunk to zero. In this case
1849 * our retransmit serves as a zero window probe. 1853 * our retransmit serves as a zero window probe.
1850 */ 1854 */
1851 if (!before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd) 1855 if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))
1852 && TCP_SKB_CB(skb)->seq != tp->snd_una) 1856 && TCP_SKB_CB(skb)->seq != tp->snd_una)
1853 return -EAGAIN; 1857 return -EAGAIN;
1854 1858
@@ -1862,8 +1866,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1862 (skb->len < (cur_mss >> 1)) && 1866 (skb->len < (cur_mss >> 1)) &&
1863 (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && 1867 (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&
1864 (!tcp_skb_is_last(sk, skb)) && 1868 (!tcp_skb_is_last(sk, skb)) &&
1865 (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) && 1869 (skb_shinfo(skb)->nr_frags == 0 &&
1866 (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) && 1870 skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&
1871 (tcp_skb_pcount(skb) == 1 &&
1872 tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) &&
1867 (sysctl_tcp_retrans_collapse != 0)) 1873 (sysctl_tcp_retrans_collapse != 0))
1868 tcp_retrans_try_collapse(sk, skb, cur_mss); 1874 tcp_retrans_try_collapse(sk, skb, cur_mss);
1869 1875
@@ -1878,12 +1884,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1878 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && 1884 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
1879 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { 1885 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
1880 if (!pskb_trim(skb, 0)) { 1886 if (!pskb_trim(skb, 0)) {
1881 TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1; 1887 /* Reuse, even though it does some unnecessary work */
1882 skb_shinfo(skb)->gso_segs = 1; 1888 tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
1883 skb_shinfo(skb)->gso_size = 0; 1889 TCP_SKB_CB(skb)->flags);
1884 skb_shinfo(skb)->gso_type = 0;
1885 skb->ip_summed = CHECKSUM_NONE; 1890 skb->ip_summed = CHECKSUM_NONE;
1886 skb->csum = 0;
1887 } 1891 }
1888 } 1892 }
1889 1893
@@ -1901,7 +1905,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1901 tp->total_retrans++; 1905 tp->total_retrans++;
1902 1906
1903#if FASTRETRANS_DEBUG > 0 1907#if FASTRETRANS_DEBUG > 0
1904 if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { 1908 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
1905 if (net_ratelimit()) 1909 if (net_ratelimit())
1906 printk(KERN_DEBUG "retrans_out leaked.\n"); 1910 printk(KERN_DEBUG "retrans_out leaked.\n");
1907 } 1911 }
@@ -1943,7 +1947,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1943 if (tp->retransmit_skb_hint) { 1947 if (tp->retransmit_skb_hint) {
1944 skb = tp->retransmit_skb_hint; 1948 skb = tp->retransmit_skb_hint;
1945 packet_cnt = tp->retransmit_cnt_hint; 1949 packet_cnt = tp->retransmit_cnt_hint;
1946 }else{ 1950 } else {
1947 skb = tcp_write_queue_head(sk); 1951 skb = tcp_write_queue_head(sk);
1948 packet_cnt = 0; 1952 packet_cnt = 0;
1949 } 1953 }
@@ -1970,7 +1974,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1970 return; 1974 return;
1971 1975
1972 if (sacked & TCPCB_LOST) { 1976 if (sacked & TCPCB_LOST) {
1973 if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { 1977 if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
1974 if (tcp_retransmit_skb(sk, skb)) { 1978 if (tcp_retransmit_skb(sk, skb)) {
1975 tp->retransmit_skb_hint = NULL; 1979 tp->retransmit_skb_hint = NULL;
1976 return; 1980 return;
@@ -2028,7 +2032,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
2028 break; 2032 break;
2029 tp->forward_skb_hint = skb; 2033 tp->forward_skb_hint = skb;
2030 2034
2031 if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack)) 2035 if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
2032 break; 2036 break;
2033 2037
2034 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) 2038 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
@@ -2052,7 +2056,6 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
2052 } 2056 }
2053} 2057}
2054 2058
2055
2056/* Send a fin. The caller locks the socket for us. This cannot be 2059/* Send a fin. The caller locks the socket for us. This cannot be
2057 * allowed to fail queueing a FIN frame under any circumstances. 2060 * allowed to fail queueing a FIN frame under any circumstances.
2058 */ 2061 */
@@ -2083,16 +2086,9 @@ void tcp_send_fin(struct sock *sk)
2083 2086
2084 /* Reserve space for headers and prepare control bits. */ 2087 /* Reserve space for headers and prepare control bits. */
2085 skb_reserve(skb, MAX_TCP_HEADER); 2088 skb_reserve(skb, MAX_TCP_HEADER);
2086 skb->csum = 0;
2087 TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
2088 TCP_SKB_CB(skb)->sacked = 0;
2089 skb_shinfo(skb)->gso_segs = 1;
2090 skb_shinfo(skb)->gso_size = 0;
2091 skb_shinfo(skb)->gso_type = 0;
2092
2093 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ 2089 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
2094 TCP_SKB_CB(skb)->seq = tp->write_seq; 2090 tcp_init_nondata_skb(skb, tp->write_seq,
2095 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; 2091 TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
2096 tcp_queue_skb(sk, skb); 2092 tcp_queue_skb(sk, skb);
2097 } 2093 }
2098 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); 2094 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
@@ -2116,16 +2112,9 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2116 2112
2117 /* Reserve space for headers and prepare control bits. */ 2113 /* Reserve space for headers and prepare control bits. */
2118 skb_reserve(skb, MAX_TCP_HEADER); 2114 skb_reserve(skb, MAX_TCP_HEADER);
2119 skb->csum = 0; 2115 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
2120 TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST); 2116 TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
2121 TCP_SKB_CB(skb)->sacked = 0;
2122 skb_shinfo(skb)->gso_segs = 1;
2123 skb_shinfo(skb)->gso_size = 0;
2124 skb_shinfo(skb)->gso_type = 0;
2125
2126 /* Send it off. */ 2117 /* Send it off. */
2127 TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk);
2128 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
2129 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2118 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2130 if (tcp_transmit_skb(sk, skb, 0, priority)) 2119 if (tcp_transmit_skb(sk, skb, 0, priority))
2131 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); 2120 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
@@ -2138,14 +2127,14 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2138 */ 2127 */
2139int tcp_send_synack(struct sock *sk) 2128int tcp_send_synack(struct sock *sk)
2140{ 2129{
2141 struct sk_buff* skb; 2130 struct sk_buff *skb;
2142 2131
2143 skb = tcp_write_queue_head(sk); 2132 skb = tcp_write_queue_head(sk);
2144 if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) { 2133 if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)) {
2145 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); 2134 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
2146 return -EFAULT; 2135 return -EFAULT;
2147 } 2136 }
2148 if (!(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_ACK)) { 2137 if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_ACK)) {
2149 if (skb_cloned(skb)) { 2138 if (skb_cloned(skb)) {
2150 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); 2139 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2151 if (nskb == NULL) 2140 if (nskb == NULL)
@@ -2153,8 +2142,9 @@ int tcp_send_synack(struct sock *sk)
2153 tcp_unlink_write_queue(skb, sk); 2142 tcp_unlink_write_queue(skb, sk);
2154 skb_header_release(nskb); 2143 skb_header_release(nskb);
2155 __tcp_add_write_queue_head(sk, nskb); 2144 __tcp_add_write_queue_head(sk, nskb);
2156 sk_stream_free_skb(sk, skb); 2145 sk_wmem_free_skb(sk, skb);
2157 sk_charge_skb(sk, nskb); 2146 sk->sk_wmem_queued += nskb->truesize;
2147 sk_mem_charge(sk, nskb->truesize);
2158 skb = nskb; 2148 skb = nskb;
2159 } 2149 }
2160 2150
@@ -2168,8 +2158,8 @@ int tcp_send_synack(struct sock *sk)
2168/* 2158/*
2169 * Prepare a SYN-ACK. 2159 * Prepare a SYN-ACK.
2170 */ 2160 */
2171struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, 2161struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2172 struct request_sock *req) 2162 struct request_sock *req)
2173{ 2163{
2174 struct inet_request_sock *ireq = inet_rsk(req); 2164 struct inet_request_sock *ireq = inet_rsk(req);
2175 struct tcp_sock *tp = tcp_sk(sk); 2165 struct tcp_sock *tp = tcp_sk(sk);
@@ -2212,12 +2202,11 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2212 TCP_ECN_make_synack(req, th); 2202 TCP_ECN_make_synack(req, th);
2213 th->source = inet_sk(sk)->sport; 2203 th->source = inet_sk(sk)->sport;
2214 th->dest = ireq->rmt_port; 2204 th->dest = ireq->rmt_port;
2215 TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn; 2205 /* Setting of flags are superfluous here for callers (and ECE is
2216 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; 2206 * not even correctly set)
2217 TCP_SKB_CB(skb)->sacked = 0; 2207 */
2218 skb_shinfo(skb)->gso_segs = 1; 2208 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
2219 skb_shinfo(skb)->gso_size = 0; 2209 TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
2220 skb_shinfo(skb)->gso_type = 0;
2221 th->seq = htonl(TCP_SKB_CB(skb)->seq); 2210 th->seq = htonl(TCP_SKB_CB(skb)->seq);
2222 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); 2211 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
2223 if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ 2212 if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
@@ -2249,7 +2238,6 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2249 NULL) 2238 NULL)
2250 ); 2239 );
2251 2240
2252 skb->csum = 0;
2253 th->doff = (tcp_header_size >> 2); 2241 th->doff = (tcp_header_size >> 2);
2254 TCP_INC_STATS(TCP_MIB_OUTSEGS); 2242 TCP_INC_STATS(TCP_MIB_OUTSEGS);
2255 2243
@@ -2341,23 +2329,17 @@ int tcp_connect(struct sock *sk)
2341 /* Reserve space for headers. */ 2329 /* Reserve space for headers. */
2342 skb_reserve(buff, MAX_TCP_HEADER); 2330 skb_reserve(buff, MAX_TCP_HEADER);
2343 2331
2344 TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
2345 TCP_ECN_send_syn(sk, buff);
2346 TCP_SKB_CB(buff)->sacked = 0;
2347 skb_shinfo(buff)->gso_segs = 1;
2348 skb_shinfo(buff)->gso_size = 0;
2349 skb_shinfo(buff)->gso_type = 0;
2350 buff->csum = 0;
2351 tp->snd_nxt = tp->write_seq; 2332 tp->snd_nxt = tp->write_seq;
2352 TCP_SKB_CB(buff)->seq = tp->write_seq++; 2333 tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN);
2353 TCP_SKB_CB(buff)->end_seq = tp->write_seq; 2334 TCP_ECN_send_syn(sk, buff);
2354 2335
2355 /* Send it off. */ 2336 /* Send it off. */
2356 TCP_SKB_CB(buff)->when = tcp_time_stamp; 2337 TCP_SKB_CB(buff)->when = tcp_time_stamp;
2357 tp->retrans_stamp = TCP_SKB_CB(buff)->when; 2338 tp->retrans_stamp = TCP_SKB_CB(buff)->when;
2358 skb_header_release(buff); 2339 skb_header_release(buff);
2359 __tcp_add_write_queue_tail(sk, buff); 2340 __tcp_add_write_queue_tail(sk, buff);
2360 sk_charge_skb(sk, buff); 2341 sk->sk_wmem_queued += buff->truesize;
2342 sk_mem_charge(sk, buff->truesize);
2361 tp->packets_out += tcp_skb_pcount(buff); 2343 tp->packets_out += tcp_skb_pcount(buff);
2362 tcp_transmit_skb(sk, buff, 1, GFP_KERNEL); 2344 tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
2363 2345
@@ -2386,9 +2368,10 @@ void tcp_send_delayed_ack(struct sock *sk)
2386 2368
2387 if (ato > TCP_DELACK_MIN) { 2369 if (ato > TCP_DELACK_MIN) {
2388 const struct tcp_sock *tp = tcp_sk(sk); 2370 const struct tcp_sock *tp = tcp_sk(sk);
2389 int max_ato = HZ/2; 2371 int max_ato = HZ / 2;
2390 2372
2391 if (icsk->icsk_ack.pingpong || (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)) 2373 if (icsk->icsk_ack.pingpong ||
2374 (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
2392 max_ato = TCP_DELACK_MAX; 2375 max_ato = TCP_DELACK_MAX;
2393 2376
2394 /* Slow path, intersegment interval is "high". */ 2377 /* Slow path, intersegment interval is "high". */
@@ -2398,7 +2381,7 @@ void tcp_send_delayed_ack(struct sock *sk)
2398 * directly. 2381 * directly.
2399 */ 2382 */
2400 if (tp->srtt) { 2383 if (tp->srtt) {
2401 int rtt = max(tp->srtt>>3, TCP_DELACK_MIN); 2384 int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN);
2402 2385
2403 if (rtt < max_ato) 2386 if (rtt < max_ato)
2404 max_ato = rtt; 2387 max_ato = rtt;
@@ -2432,37 +2415,32 @@ void tcp_send_delayed_ack(struct sock *sk)
2432/* This routine sends an ack and also updates the window. */ 2415/* This routine sends an ack and also updates the window. */
2433void tcp_send_ack(struct sock *sk) 2416void tcp_send_ack(struct sock *sk)
2434{ 2417{
2435 /* If we have been reset, we may not send again. */ 2418 struct sk_buff *buff;
2436 if (sk->sk_state != TCP_CLOSE) {
2437 struct sk_buff *buff;
2438 2419
2439 /* We are not putting this on the write queue, so 2420 /* If we have been reset, we may not send again. */
2440 * tcp_transmit_skb() will set the ownership to this 2421 if (sk->sk_state == TCP_CLOSE)
2441 * sock. 2422 return;
2442 */
2443 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
2444 if (buff == NULL) {
2445 inet_csk_schedule_ack(sk);
2446 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
2447 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
2448 TCP_DELACK_MAX, TCP_RTO_MAX);
2449 return;
2450 }
2451 2423
2452 /* Reserve space for headers and prepare control bits. */ 2424 /* We are not putting this on the write queue, so
2453 skb_reserve(buff, MAX_TCP_HEADER); 2425 * tcp_transmit_skb() will set the ownership to this
2454 buff->csum = 0; 2426 * sock.
2455 TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK; 2427 */
2456 TCP_SKB_CB(buff)->sacked = 0; 2428 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
2457 skb_shinfo(buff)->gso_segs = 1; 2429 if (buff == NULL) {
2458 skb_shinfo(buff)->gso_size = 0; 2430 inet_csk_schedule_ack(sk);
2459 skb_shinfo(buff)->gso_type = 0; 2431 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
2460 2432 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
2461 /* Send it off, this clears delayed acks for us. */ 2433 TCP_DELACK_MAX, TCP_RTO_MAX);
2462 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk); 2434 return;
2463 TCP_SKB_CB(buff)->when = tcp_time_stamp;
2464 tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
2465 } 2435 }
2436
2437 /* Reserve space for headers and prepare control bits. */
2438 skb_reserve(buff, MAX_TCP_HEADER);
2439 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPCB_FLAG_ACK);
2440
2441 /* Send it off, this clears delayed acks for us. */
2442 TCP_SKB_CB(buff)->when = tcp_time_stamp;
2443 tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
2466} 2444}
2467 2445
2468/* This routine sends a packet with an out of date sequence 2446/* This routine sends a packet with an out of date sequence
@@ -2488,66 +2466,57 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
2488 2466
2489 /* Reserve space for headers and set control bits. */ 2467 /* Reserve space for headers and set control bits. */
2490 skb_reserve(skb, MAX_TCP_HEADER); 2468 skb_reserve(skb, MAX_TCP_HEADER);
2491 skb->csum = 0;
2492 TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
2493 TCP_SKB_CB(skb)->sacked = urgent;
2494 skb_shinfo(skb)->gso_segs = 1;
2495 skb_shinfo(skb)->gso_size = 0;
2496 skb_shinfo(skb)->gso_type = 0;
2497
2498 /* Use a previous sequence. This should cause the other 2469 /* Use a previous sequence. This should cause the other
2499 * end to send an ack. Don't queue or clone SKB, just 2470 * end to send an ack. Don't queue or clone SKB, just
2500 * send it. 2471 * send it.
2501 */ 2472 */
2502 TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1; 2473 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPCB_FLAG_ACK);
2503 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
2504 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2474 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2505 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); 2475 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
2506} 2476}
2507 2477
2508int tcp_write_wakeup(struct sock *sk) 2478int tcp_write_wakeup(struct sock *sk)
2509{ 2479{
2510 if (sk->sk_state != TCP_CLOSE) { 2480 struct tcp_sock *tp = tcp_sk(sk);
2511 struct tcp_sock *tp = tcp_sk(sk); 2481 struct sk_buff *skb;
2512 struct sk_buff *skb;
2513
2514 if ((skb = tcp_send_head(sk)) != NULL &&
2515 before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) {
2516 int err;
2517 unsigned int mss = tcp_current_mss(sk, 0);
2518 unsigned int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq;
2519
2520 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
2521 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
2522
2523 /* We are probing the opening of a window
2524 * but the window size is != 0
2525 * must have been a result SWS avoidance ( sender )
2526 */
2527 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
2528 skb->len > mss) {
2529 seg_size = min(seg_size, mss);
2530 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
2531 if (tcp_fragment(sk, skb, seg_size, mss))
2532 return -1;
2533 } else if (!tcp_skb_pcount(skb))
2534 tcp_set_skb_tso_segs(sk, skb, mss);
2535 2482
2483 if (sk->sk_state == TCP_CLOSE)
2484 return -1;
2485
2486 if ((skb = tcp_send_head(sk)) != NULL &&
2487 before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
2488 int err;
2489 unsigned int mss = tcp_current_mss(sk, 0);
2490 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
2491
2492 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
2493 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
2494
2495 /* We are probing the opening of a window
2496 * but the window size is != 0
2497 * must have been a result SWS avoidance ( sender )
2498 */
2499 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
2500 skb->len > mss) {
2501 seg_size = min(seg_size, mss);
2536 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; 2502 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
2537 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2503 if (tcp_fragment(sk, skb, seg_size, mss))
2538 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2504 return -1;
2539 if (!err) { 2505 } else if (!tcp_skb_pcount(skb))
2540 update_send_head(sk, skb); 2506 tcp_set_skb_tso_segs(sk, skb, mss);
2541 } 2507
2542 return err; 2508 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
2543 } else { 2509 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2544 if (tp->urg_mode && 2510 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2545 between(tp->snd_up, tp->snd_una+1, tp->snd_una+0xFFFF)) 2511 if (!err)
2546 tcp_xmit_probe_skb(sk, TCPCB_URG); 2512 tcp_event_new_data_sent(sk, skb);
2547 return tcp_xmit_probe_skb(sk, 0); 2513 return err;
2548 } 2514 } else {
2515 if (tp->urg_mode &&
2516 between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
2517 tcp_xmit_probe_skb(sk, 1);
2518 return tcp_xmit_probe_skb(sk, 0);
2549 } 2519 }
2550 return -1;
2551} 2520}
2552 2521
2553/* A window probe timeout has occurred. If window is not closed send 2522/* A window probe timeout has occurred. If window is not closed send
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index be27a33a1c68..2747ec7bfb63 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -15,8 +15,7 @@
15#define TCP_SCALABLE_AI_CNT 50U 15#define TCP_SCALABLE_AI_CNT 50U
16#define TCP_SCALABLE_MD_SCALE 3 16#define TCP_SCALABLE_MD_SCALE 3
17 17
18static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, 18static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
19 u32 in_flight, int flag)
20{ 19{
21 struct tcp_sock *tp = tcp_sk(sk); 20 struct tcp_sock *tp = tcp_sk(sk);
22 21
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index d8970ecfcfc8..803d758a2b12 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -114,13 +114,31 @@ static int tcp_orphan_retries(struct sock *sk, int alive)
114 return retries; 114 return retries;
115} 115}
116 116
117static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
118{
119 /* Black hole detection */
120 if (sysctl_tcp_mtu_probing) {
121 if (!icsk->icsk_mtup.enabled) {
122 icsk->icsk_mtup.enabled = 1;
123 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
124 } else {
125 struct tcp_sock *tp = tcp_sk(sk);
126 int mss;
127
128 mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
129 mss = min(sysctl_tcp_base_mss, mss);
130 mss = max(mss, 68 - tp->tcp_header_len);
131 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
132 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
133 }
134 }
135}
136
117/* A write timeout has occurred. Process the after effects. */ 137/* A write timeout has occurred. Process the after effects. */
118static int tcp_write_timeout(struct sock *sk) 138static int tcp_write_timeout(struct sock *sk)
119{ 139{
120 struct inet_connection_sock *icsk = inet_csk(sk); 140 struct inet_connection_sock *icsk = inet_csk(sk);
121 struct tcp_sock *tp = tcp_sk(sk);
122 int retry_until; 141 int retry_until;
123 int mss;
124 142
125 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { 143 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
126 if (icsk->icsk_retransmits) 144 if (icsk->icsk_retransmits)
@@ -129,18 +147,7 @@ static int tcp_write_timeout(struct sock *sk)
129 } else { 147 } else {
130 if (icsk->icsk_retransmits >= sysctl_tcp_retries1) { 148 if (icsk->icsk_retransmits >= sysctl_tcp_retries1) {
131 /* Black hole detection */ 149 /* Black hole detection */
132 if (sysctl_tcp_mtu_probing) { 150 tcp_mtu_probing(icsk, sk);
133 if (!icsk->icsk_mtup.enabled) {
134 icsk->icsk_mtup.enabled = 1;
135 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
136 } else {
137 mss = min(sysctl_tcp_base_mss,
138 tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low)/2);
139 mss = max(mss, 68 - tp->tcp_header_len);
140 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
141 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
142 }
143 }
144 151
145 dst_negative_advice(&sk->sk_dst_cache); 152 dst_negative_advice(&sk->sk_dst_cache);
146 } 153 }
@@ -179,7 +186,7 @@ static void tcp_delack_timer(unsigned long data)
179 goto out_unlock; 186 goto out_unlock;
180 } 187 }
181 188
182 sk_stream_mem_reclaim(sk); 189 sk_mem_reclaim_partial(sk);
183 190
184 if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) 191 if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
185 goto out; 192 goto out;
@@ -219,7 +226,7 @@ static void tcp_delack_timer(unsigned long data)
219 226
220out: 227out:
221 if (tcp_memory_pressure) 228 if (tcp_memory_pressure)
222 sk_stream_mem_reclaim(sk); 229 sk_mem_reclaim(sk);
223out_unlock: 230out_unlock:
224 bh_unlock_sock(sk); 231 bh_unlock_sock(sk);
225 sock_put(sk); 232 sock_put(sk);
@@ -413,7 +420,7 @@ static void tcp_write_timer(unsigned long data)
413 TCP_CHECK_TIMER(sk); 420 TCP_CHECK_TIMER(sk);
414 421
415out: 422out:
416 sk_stream_mem_reclaim(sk); 423 sk_mem_reclaim(sk);
417out_unlock: 424out_unlock:
418 bh_unlock_sock(sk); 425 bh_unlock_sock(sk);
419 sock_put(sk); 426 sock_put(sk);
@@ -507,7 +514,7 @@ static void tcp_keepalive_timer (unsigned long data)
507 } 514 }
508 515
509 TCP_CHECK_TIMER(sk); 516 TCP_CHECK_TIMER(sk);
510 sk_stream_mem_reclaim(sk); 517 sk_mem_reclaim(sk);
511 518
512resched: 519resched:
513 inet_csk_reset_keepalive_timer (sk, elapsed); 520 inet_csk_reset_keepalive_timer (sk, elapsed);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 007304e99842..be24d6ee34bd 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -162,14 +162,13 @@ void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
162} 162}
163EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); 163EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
164 164
165static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, 165static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
166 u32 in_flight, int flag)
167{ 166{
168 struct tcp_sock *tp = tcp_sk(sk); 167 struct tcp_sock *tp = tcp_sk(sk);
169 struct vegas *vegas = inet_csk_ca(sk); 168 struct vegas *vegas = inet_csk_ca(sk);
170 169
171 if (!vegas->doing_vegas_now) 170 if (!vegas->doing_vegas_now)
172 return tcp_reno_cong_avoid(sk, ack, in_flight, flag); 171 return tcp_reno_cong_avoid(sk, ack, in_flight);
173 172
174 /* The key players are v_beg_snd_una and v_beg_snd_nxt. 173 /* The key players are v_beg_snd_una and v_beg_snd_nxt.
175 * 174 *
@@ -228,7 +227,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
228 /* We don't have enough RTT samples to do the Vegas 227 /* We don't have enough RTT samples to do the Vegas
229 * calculation, so we'll behave like Reno. 228 * calculation, so we'll behave like Reno.
230 */ 229 */
231 tcp_reno_cong_avoid(sk, ack, in_flight, flag); 230 tcp_reno_cong_avoid(sk, ack, in_flight);
232 } else { 231 } else {
233 u32 rtt, target_cwnd, diff; 232 u32 rtt, target_cwnd, diff;
234 233
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 8fb2aee0b1a4..d16689e98516 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -114,14 +114,13 @@ static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event)
114 tcp_veno_init(sk); 114 tcp_veno_init(sk);
115} 115}
116 116
117static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, 117static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
118 u32 in_flight, int flag)
119{ 118{
120 struct tcp_sock *tp = tcp_sk(sk); 119 struct tcp_sock *tp = tcp_sk(sk);
121 struct veno *veno = inet_csk_ca(sk); 120 struct veno *veno = inet_csk_ca(sk);
122 121
123 if (!veno->doing_veno_now) 122 if (!veno->doing_veno_now)
124 return tcp_reno_cong_avoid(sk, ack, in_flight, flag); 123 return tcp_reno_cong_avoid(sk, ack, in_flight);
125 124
126 /* limited by applications */ 125 /* limited by applications */
127 if (!tcp_is_cwnd_limited(sk, in_flight)) 126 if (!tcp_is_cwnd_limited(sk, in_flight))
@@ -132,7 +131,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack,
132 /* We don't have enough rtt samples to do the Veno 131 /* We don't have enough rtt samples to do the Veno
133 * calculation, so we'll behave like Reno. 132 * calculation, so we'll behave like Reno.
134 */ 133 */
135 tcp_reno_cong_avoid(sk, ack, in_flight, flag); 134 tcp_reno_cong_avoid(sk, ack, in_flight);
136 } else { 135 } else {
137 u32 rtt, target_cwnd; 136 u32 rtt, target_cwnd;
138 137
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index c107fba7430e..e03b10183a8b 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -69,8 +69,7 @@ static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us)
69 tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us); 69 tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us);
70} 70}
71 71
72static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, 72static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
73 u32 in_flight, int flag)
74{ 73{
75 struct tcp_sock *tp = tcp_sk(sk); 74 struct tcp_sock *tp = tcp_sk(sk);
76 struct yeah *yeah = inet_csk_ca(sk); 75 struct yeah *yeah = inet_csk_ca(sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 03c400ca14c5..2fb8d731026b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -82,6 +82,7 @@
82#include <asm/system.h> 82#include <asm/system.h>
83#include <asm/uaccess.h> 83#include <asm/uaccess.h>
84#include <asm/ioctls.h> 84#include <asm/ioctls.h>
85#include <linux/bootmem.h>
85#include <linux/types.h> 86#include <linux/types.h>
86#include <linux/fcntl.h> 87#include <linux/fcntl.h>
87#include <linux/module.h> 88#include <linux/module.h>
@@ -110,10 +111,25 @@
110 */ 111 */
111 112
112DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly; 113DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly;
114EXPORT_SYMBOL(udp_statistics);
115
116DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
117EXPORT_SYMBOL(udp_stats_in6);
113 118
114struct hlist_head udp_hash[UDP_HTABLE_SIZE]; 119struct hlist_head udp_hash[UDP_HTABLE_SIZE];
115DEFINE_RWLOCK(udp_hash_lock); 120DEFINE_RWLOCK(udp_hash_lock);
116 121
122int sysctl_udp_mem[3] __read_mostly;
123int sysctl_udp_rmem_min __read_mostly;
124int sysctl_udp_wmem_min __read_mostly;
125
126EXPORT_SYMBOL(sysctl_udp_mem);
127EXPORT_SYMBOL(sysctl_udp_rmem_min);
128EXPORT_SYMBOL(sysctl_udp_wmem_min);
129
130atomic_t udp_memory_allocated;
131EXPORT_SYMBOL(udp_memory_allocated);
132
117static inline int __udp_lib_lport_inuse(__u16 num, 133static inline int __udp_lib_lport_inuse(__u16 num,
118 const struct hlist_head udptable[]) 134 const struct hlist_head udptable[])
119{ 135{
@@ -214,7 +230,7 @@ gotit:
214 if (sk_unhashed(sk)) { 230 if (sk_unhashed(sk)) {
215 head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; 231 head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
216 sk_add_node(sk, head); 232 sk_add_node(sk, head);
217 sock_prot_inc_use(sk->sk_prot); 233 sock_prot_inuse_add(sk->sk_prot, 1);
218 } 234 }
219 error = 0; 235 error = 0;
220fail: 236fail:
@@ -402,7 +418,7 @@ out:
402 418
403void udp_err(struct sk_buff *skb, u32 info) 419void udp_err(struct sk_buff *skb, u32 info)
404{ 420{
405 return __udp4_lib_err(skb, info, udp_hash); 421 __udp4_lib_err(skb, info, udp_hash);
406} 422}
407 423
408/* 424/*
@@ -471,6 +487,7 @@ static int udp_push_pending_frames(struct sock *sk)
471 struct sk_buff *skb; 487 struct sk_buff *skb;
472 struct udphdr *uh; 488 struct udphdr *uh;
473 int err = 0; 489 int err = 0;
490 int is_udplite = IS_UDPLITE(sk);
474 __wsum csum = 0; 491 __wsum csum = 0;
475 492
476 /* Grab the skbuff where UDP header space exists. */ 493 /* Grab the skbuff where UDP header space exists. */
@@ -486,7 +503,7 @@ static int udp_push_pending_frames(struct sock *sk)
486 uh->len = htons(up->len); 503 uh->len = htons(up->len);
487 uh->check = 0; 504 uh->check = 0;
488 505
489 if (up->pcflag) /* UDP-Lite */ 506 if (is_udplite) /* UDP-Lite */
490 csum = udplite_csum_outgoing(sk, skb); 507 csum = udplite_csum_outgoing(sk, skb);
491 508
492 else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ 509 else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
@@ -514,7 +531,7 @@ out:
514 up->len = 0; 531 up->len = 0;
515 up->pending = 0; 532 up->pending = 0;
516 if (!err) 533 if (!err)
517 UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, up->pcflag); 534 UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
518 return err; 535 return err;
519} 536}
520 537
@@ -531,7 +548,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
531 __be32 daddr, faddr, saddr; 548 __be32 daddr, faddr, saddr;
532 __be16 dport; 549 __be16 dport;
533 u8 tos; 550 u8 tos;
534 int err, is_udplite = up->pcflag; 551 int err, is_udplite = IS_UDPLITE(sk);
535 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; 552 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
536 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); 553 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
537 554
@@ -621,7 +638,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
621 connected = 0; 638 connected = 0;
622 } 639 }
623 640
624 if (MULTICAST(daddr)) { 641 if (ipv4_is_multicast(daddr)) {
625 if (!ipc.oif) 642 if (!ipc.oif)
626 ipc.oif = inet->mc_index; 643 ipc.oif = inet->mc_index;
627 if (!saddr) 644 if (!saddr)
@@ -643,7 +660,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
643 { .sport = inet->sport, 660 { .sport = inet->sport,
644 .dport = dport } } }; 661 .dport = dport } } };
645 security_sk_classify_flow(sk, &fl); 662 security_sk_classify_flow(sk, &fl);
646 err = ip_route_output_flow(&rt, &fl, sk, 1); 663 err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1);
647 if (err) { 664 if (err) {
648 if (err == -ENETUNREACH) 665 if (err == -ENETUNREACH)
649 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 666 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
@@ -825,6 +842,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
825 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; 842 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
826 struct sk_buff *skb; 843 struct sk_buff *skb;
827 unsigned int ulen, copied; 844 unsigned int ulen, copied;
845 int peeked;
828 int err; 846 int err;
829 int is_udplite = IS_UDPLITE(sk); 847 int is_udplite = IS_UDPLITE(sk);
830 848
@@ -838,7 +856,8 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
838 return ip_recv_error(sk, msg, len); 856 return ip_recv_error(sk, msg, len);
839 857
840try_again: 858try_again:
841 skb = skb_recv_datagram(sk, flags, noblock, &err); 859 skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
860 &peeked, &err);
842 if (!skb) 861 if (!skb)
843 goto out; 862 goto out;
844 863
@@ -873,6 +892,9 @@ try_again:
873 if (err) 892 if (err)
874 goto out_free; 893 goto out_free;
875 894
895 if (!peeked)
896 UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
897
876 sock_recv_timestamp(msg, sk, skb); 898 sock_recv_timestamp(msg, sk, skb);
877 899
878 /* Copy the address. */ 900 /* Copy the address. */
@@ -891,14 +913,17 @@ try_again:
891 err = ulen; 913 err = ulen;
892 914
893out_free: 915out_free:
916 lock_sock(sk);
894 skb_free_datagram(sk, skb); 917 skb_free_datagram(sk, skb);
918 release_sock(sk);
895out: 919out:
896 return err; 920 return err;
897 921
898csum_copy_err: 922csum_copy_err:
899 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); 923 lock_sock(sk);
900 924 if (!skb_kill_datagram(sk, skb, flags))
901 skb_kill_datagram(sk, skb, flags); 925 UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
926 release_sock(sk);
902 927
903 if (noblock) 928 if (noblock)
904 return -EAGAIN; 929 return -EAGAIN;
@@ -940,6 +965,7 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
940{ 965{
941 struct udp_sock *up = udp_sk(sk); 966 struct udp_sock *up = udp_sk(sk);
942 int rc; 967 int rc;
968 int is_udplite = IS_UDPLITE(sk);
943 969
944 /* 970 /*
945 * Charge it to the socket, dropping if the queue is full. 971 * Charge it to the socket, dropping if the queue is full.
@@ -967,7 +993,8 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
967 993
968 ret = (*up->encap_rcv)(sk, skb); 994 ret = (*up->encap_rcv)(sk, skb);
969 if (ret <= 0) { 995 if (ret <= 0) {
970 UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); 996 UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS,
997 is_udplite);
971 return -ret; 998 return -ret;
972 } 999 }
973 } 1000 }
@@ -978,7 +1005,7 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
978 /* 1005 /*
979 * UDP-Lite specific tests, ignored on UDP sockets 1006 * UDP-Lite specific tests, ignored on UDP sockets
980 */ 1007 */
981 if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { 1008 if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
982 1009
983 /* 1010 /*
984 * MIB statistics other than incrementing the error count are 1011 * MIB statistics other than incrementing the error count are
@@ -1019,15 +1046,14 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
1019 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { 1046 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
1020 /* Note that an ENOMEM error is charged twice */ 1047 /* Note that an ENOMEM error is charged twice */
1021 if (rc == -ENOMEM) 1048 if (rc == -ENOMEM)
1022 UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag); 1049 UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite);
1023 goto drop; 1050 goto drop;
1024 } 1051 }
1025 1052
1026 UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag);
1027 return 0; 1053 return 0;
1028 1054
1029drop: 1055drop:
1030 UDP_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag); 1056 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
1031 kfree_skb(skb); 1057 kfree_skb(skb);
1032 return -1; 1058 return -1;
1033} 1059}
@@ -1062,7 +1088,15 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
1062 skb1 = skb_clone(skb, GFP_ATOMIC); 1088 skb1 = skb_clone(skb, GFP_ATOMIC);
1063 1089
1064 if (skb1) { 1090 if (skb1) {
1065 int ret = udp_queue_rcv_skb(sk, skb1); 1091 int ret = 0;
1092
1093 bh_lock_sock_nested(sk);
1094 if (!sock_owned_by_user(sk))
1095 ret = udp_queue_rcv_skb(sk, skb1);
1096 else
1097 sk_add_backlog(sk, skb1);
1098 bh_unlock_sock(sk);
1099
1066 if (ret > 0) 1100 if (ret > 0)
1067 /* we should probably re-process instead 1101 /* we should probably re-process instead
1068 * of dropping packets here. */ 1102 * of dropping packets here. */
@@ -1155,7 +1189,13 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
1155 inet_iif(skb), udptable); 1189 inet_iif(skb), udptable);
1156 1190
1157 if (sk != NULL) { 1191 if (sk != NULL) {
1158 int ret = udp_queue_rcv_skb(sk, skb); 1192 int ret = 0;
1193 bh_lock_sock_nested(sk);
1194 if (!sock_owned_by_user(sk))
1195 ret = udp_queue_rcv_skb(sk, skb);
1196 else
1197 sk_add_backlog(sk, skb);
1198 bh_unlock_sock(sk);
1159 sock_put(sk); 1199 sock_put(sk);
1160 1200
1161 /* a return value > 0 means to resubmit the input, but 1201 /* a return value > 0 means to resubmit the input, but
@@ -1236,6 +1276,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1236 struct udp_sock *up = udp_sk(sk); 1276 struct udp_sock *up = udp_sk(sk);
1237 int val; 1277 int val;
1238 int err = 0; 1278 int err = 0;
1279 int is_udplite = IS_UDPLITE(sk);
1239 1280
1240 if (optlen<sizeof(int)) 1281 if (optlen<sizeof(int))
1241 return -EINVAL; 1282 return -EINVAL;
@@ -1277,7 +1318,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1277 /* The sender sets actual checksum coverage length via this option. 1318 /* The sender sets actual checksum coverage length via this option.
1278 * The case coverage > packet length is handled by send module. */ 1319 * The case coverage > packet length is handled by send module. */
1279 case UDPLITE_SEND_CSCOV: 1320 case UDPLITE_SEND_CSCOV:
1280 if (!up->pcflag) /* Disable the option on UDP sockets */ 1321 if (!is_udplite) /* Disable the option on UDP sockets */
1281 return -ENOPROTOOPT; 1322 return -ENOPROTOOPT;
1282 if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ 1323 if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
1283 val = 8; 1324 val = 8;
@@ -1289,7 +1330,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1289 * sense, this should be set to at least 8 (as done below). If zero is 1330 * sense, this should be set to at least 8 (as done below). If zero is
1290 * used, this again means full checksum coverage. */ 1331 * used, this again means full checksum coverage. */
1291 case UDPLITE_RECV_CSCOV: 1332 case UDPLITE_RECV_CSCOV:
1292 if (!up->pcflag) /* Disable the option on UDP sockets */ 1333 if (!is_udplite) /* Disable the option on UDP sockets */
1293 return -ENOPROTOOPT; 1334 return -ENOPROTOOPT;
1294 if (val != 0 && val < 8) /* Avoid silly minimal values. */ 1335 if (val != 0 && val < 8) /* Avoid silly minimal values. */
1295 val = 8; 1336 val = 8;
@@ -1449,6 +1490,10 @@ struct proto udp_prot = {
1449 .hash = udp_lib_hash, 1490 .hash = udp_lib_hash,
1450 .unhash = udp_lib_unhash, 1491 .unhash = udp_lib_unhash,
1451 .get_port = udp_v4_get_port, 1492 .get_port = udp_v4_get_port,
1493 .memory_allocated = &udp_memory_allocated,
1494 .sysctl_mem = sysctl_udp_mem,
1495 .sysctl_wmem = &sysctl_udp_wmem_min,
1496 .sysctl_rmem = &sysctl_udp_rmem_min,
1452 .obj_size = sizeof(struct udp_sock), 1497 .obj_size = sizeof(struct udp_sock),
1453#ifdef CONFIG_COMPAT 1498#ifdef CONFIG_COMPAT
1454 .compat_setsockopt = compat_udp_setsockopt, 1499 .compat_setsockopt = compat_udp_setsockopt,
@@ -1505,6 +1550,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
1505} 1550}
1506 1551
1507static void *udp_seq_start(struct seq_file *seq, loff_t *pos) 1552static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
1553 __acquires(udp_hash_lock)
1508{ 1554{
1509 read_lock(&udp_hash_lock); 1555 read_lock(&udp_hash_lock);
1510 return *pos ? udp_get_idx(seq, *pos-1) : (void *)1; 1556 return *pos ? udp_get_idx(seq, *pos-1) : (void *)1;
@@ -1524,6 +1570,7 @@ static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1524} 1570}
1525 1571
1526static void udp_seq_stop(struct seq_file *seq, void *v) 1572static void udp_seq_stop(struct seq_file *seq, void *v)
1573 __releases(udp_hash_lock)
1527{ 1574{
1528 read_unlock(&udp_hash_lock); 1575 read_unlock(&udp_hash_lock);
1529} 1576}
@@ -1644,6 +1691,25 @@ void udp4_proc_exit(void)
1644} 1691}
1645#endif /* CONFIG_PROC_FS */ 1692#endif /* CONFIG_PROC_FS */
1646 1693
1694void __init udp_init(void)
1695{
1696 unsigned long limit;
1697
1698 /* Set the pressure threshold up by the same strategy of TCP. It is a
1699 * fraction of global memory that is up to 1/2 at 256 MB, decreasing
1700 * toward zero with the amount of memory, with a floor of 128 pages.
1701 */
1702 limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
1703 limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
1704 limit = max(limit, 128UL);
1705 sysctl_udp_mem[0] = limit / 4 * 3;
1706 sysctl_udp_mem[1] = limit;
1707 sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
1708
1709 sysctl_udp_rmem_min = SK_MEM_QUANTUM;
1710 sysctl_udp_wmem_min = SK_MEM_QUANTUM;
1711}
1712
1647EXPORT_SYMBOL(udp_disconnect); 1713EXPORT_SYMBOL(udp_disconnect);
1648EXPORT_SYMBOL(udp_hash); 1714EXPORT_SYMBOL(udp_hash);
1649EXPORT_SYMBOL(udp_hash_lock); 1715EXPORT_SYMBOL(udp_hash_lock);
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index f5baeb3e8b85..001b881ca36f 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -35,7 +35,7 @@ static int udplite_rcv(struct sk_buff *skb)
35 35
36static void udplite_err(struct sk_buff *skb, u32 info) 36static void udplite_err(struct sk_buff *skb, u32 info)
37{ 37{
38 return __udp4_lib_err(skb, info, udplite_hash); 38 __udp4_lib_err(skb, info, udplite_hash);
39} 39}
40 40
41static struct net_protocol udplite_protocol = { 41static struct net_protocol udplite_protocol = {
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 5e95c8a07efb..390dcb1354a5 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -16,7 +16,11 @@
16#include <net/ip.h> 16#include <net/ip.h>
17#include <net/xfrm.h> 17#include <net/xfrm.h>
18 18
19#ifdef CONFIG_NETFILTER 19int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb)
20{
21 return xfrm4_extract_header(skb);
22}
23
20static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) 24static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
21{ 25{
22 if (skb->dst == NULL) { 26 if (skb->dst == NULL) {
@@ -31,129 +35,35 @@ drop:
31 kfree_skb(skb); 35 kfree_skb(skb);
32 return NET_RX_DROP; 36 return NET_RX_DROP;
33} 37}
34#endif
35 38
36int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, 39int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
37 int encap_type) 40 int encap_type)
38{ 41{
39 int err; 42 XFRM_SPI_SKB_CB(skb)->family = AF_INET;
40 __be32 seq; 43 XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
41 struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH]; 44 return xfrm_input(skb, nexthdr, spi, encap_type);
42 struct xfrm_state *x; 45}
43 int xfrm_nr = 0; 46EXPORT_SYMBOL(xfrm4_rcv_encap);
44 int decaps = 0;
45 unsigned int nhoff = offsetof(struct iphdr, protocol);
46
47 seq = 0;
48 if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
49 goto drop;
50
51 do {
52 const struct iphdr *iph = ip_hdr(skb);
53
54 if (xfrm_nr == XFRM_MAX_DEPTH)
55 goto drop;
56
57 x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi,
58 nexthdr, AF_INET);
59 if (x == NULL)
60 goto drop;
61
62 spin_lock(&x->lock);
63 if (unlikely(x->km.state != XFRM_STATE_VALID))
64 goto drop_unlock;
65
66 if ((x->encap ? x->encap->encap_type : 0) != encap_type)
67 goto drop_unlock;
68
69 if (x->props.replay_window && xfrm_replay_check(x, seq))
70 goto drop_unlock;
71
72 if (xfrm_state_check_expire(x))
73 goto drop_unlock;
74
75 nexthdr = x->type->input(x, skb);
76 if (nexthdr <= 0)
77 goto drop_unlock;
78
79 skb_network_header(skb)[nhoff] = nexthdr;
80
81 /* only the first xfrm gets the encap type */
82 encap_type = 0;
83
84 if (x->props.replay_window)
85 xfrm_replay_advance(x, seq);
86
87 x->curlft.bytes += skb->len;
88 x->curlft.packets++;
89
90 spin_unlock(&x->lock);
91
92 xfrm_vec[xfrm_nr++] = x;
93
94 if (x->outer_mode->input(x, skb))
95 goto drop;
96
97 if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
98 decaps = 1;
99 break;
100 }
101
102 err = xfrm_parse_spi(skb, nexthdr, &spi, &seq);
103 if (err < 0)
104 goto drop;
105 } while (!err);
106
107 /* Allocate new secpath or COW existing one. */
108
109 if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
110 struct sec_path *sp;
111 sp = secpath_dup(skb->sp);
112 if (!sp)
113 goto drop;
114 if (skb->sp)
115 secpath_put(skb->sp);
116 skb->sp = sp;
117 }
118 if (xfrm_nr + skb->sp->len > XFRM_MAX_DEPTH)
119 goto drop;
120
121 memcpy(skb->sp->xvec + skb->sp->len, xfrm_vec,
122 xfrm_nr * sizeof(xfrm_vec[0]));
123 skb->sp->len += xfrm_nr;
124 47
125 nf_reset(skb); 48int xfrm4_transport_finish(struct sk_buff *skb, int async)
49{
50 struct iphdr *iph = ip_hdr(skb);
126 51
127 if (decaps) { 52 iph->protocol = XFRM_MODE_SKB_CB(skb)->protocol;
128 dst_release(skb->dst);
129 skb->dst = NULL;
130 netif_rx(skb);
131 return 0;
132 } else {
133#ifdef CONFIG_NETFILTER
134 __skb_push(skb, skb->data - skb_network_header(skb));
135 ip_hdr(skb)->tot_len = htons(skb->len);
136 ip_send_check(ip_hdr(skb));
137 53
138 NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL, 54#ifndef CONFIG_NETFILTER
139 xfrm4_rcv_encap_finish); 55 if (!async)
140 return 0; 56 return -iph->protocol;
141#else
142 return -ip_hdr(skb)->protocol;
143#endif 57#endif
144 }
145 58
146drop_unlock: 59 __skb_push(skb, skb->data - skb_network_header(skb));
147 spin_unlock(&x->lock); 60 iph->tot_len = htons(skb->len);
148 xfrm_state_put(x); 61 ip_send_check(iph);
149drop:
150 while (--xfrm_nr >= 0)
151 xfrm_state_put(xfrm_vec[xfrm_nr]);
152 62
153 kfree_skb(skb); 63 NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
64 xfrm4_rcv_encap_finish);
154 return 0; 65 return 0;
155} 66}
156EXPORT_SYMBOL(xfrm4_rcv_encap);
157 67
158/* If it's a keepalive packet, then just eat it. 68/* If it's a keepalive packet, then just eat it.
159 * If it's an encapsulated packet, then pass it to the 69 * If it's an encapsulated packet, then pass it to the
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index e42e122414be..e093a7b59e18 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -17,6 +17,21 @@
17#include <net/ip.h> 17#include <net/ip.h>
18#include <net/xfrm.h> 18#include <net/xfrm.h>
19 19
20static void xfrm4_beet_make_header(struct sk_buff *skb)
21{
22 struct iphdr *iph = ip_hdr(skb);
23
24 iph->ihl = 5;
25 iph->version = 4;
26
27 iph->protocol = XFRM_MODE_SKB_CB(skb)->protocol;
28 iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
29
30 iph->id = XFRM_MODE_SKB_CB(skb)->id;
31 iph->frag_off = XFRM_MODE_SKB_CB(skb)->frag_off;
32 iph->ttl = XFRM_MODE_SKB_CB(skb)->ttl;
33}
34
20/* Add encapsulation header. 35/* Add encapsulation header.
21 * 36 *
22 * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. 37 * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
@@ -40,10 +55,12 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
40 offsetof(struct iphdr, protocol); 55 offsetof(struct iphdr, protocol);
41 skb->transport_header = skb->network_header + sizeof(*iph); 56 skb->transport_header = skb->network_header + sizeof(*iph);
42 57
58 xfrm4_beet_make_header(skb);
59
43 ph = (struct ip_beet_phdr *)__skb_pull(skb, sizeof(*iph) - hdrlen); 60 ph = (struct ip_beet_phdr *)__skb_pull(skb, sizeof(*iph) - hdrlen);
44 61
45 top_iph = ip_hdr(skb); 62 top_iph = ip_hdr(skb);
46 memmove(top_iph, iph, sizeof(*iph)); 63
47 if (unlikely(optlen)) { 64 if (unlikely(optlen)) {
48 BUG_ON(optlen < 0); 65 BUG_ON(optlen < 0);
49 66
@@ -65,43 +82,46 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
65 82
66static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb) 83static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
67{ 84{
68 struct iphdr *iph = ip_hdr(skb); 85 struct iphdr *iph;
69 int phlen = 0;
70 int optlen = 0; 86 int optlen = 0;
71 u8 ph_nexthdr = 0;
72 int err = -EINVAL; 87 int err = -EINVAL;
73 88
74 if (unlikely(iph->protocol == IPPROTO_BEETPH)) { 89 if (unlikely(XFRM_MODE_SKB_CB(skb)->protocol == IPPROTO_BEETPH)) {
75 struct ip_beet_phdr *ph; 90 struct ip_beet_phdr *ph;
91 int phlen;
76 92
77 if (!pskb_may_pull(skb, sizeof(*ph))) 93 if (!pskb_may_pull(skb, sizeof(*ph)))
78 goto out; 94 goto out;
79 ph = (struct ip_beet_phdr *)(ipip_hdr(skb) + 1); 95
96 ph = (struct ip_beet_phdr *)skb->data;
80 97
81 phlen = sizeof(*ph) + ph->padlen; 98 phlen = sizeof(*ph) + ph->padlen;
82 optlen = ph->hdrlen * 8 + (IPV4_BEET_PHMAXLEN - phlen); 99 optlen = ph->hdrlen * 8 + (IPV4_BEET_PHMAXLEN - phlen);
83 if (optlen < 0 || optlen & 3 || optlen > 250) 100 if (optlen < 0 || optlen & 3 || optlen > 250)
84 goto out; 101 goto out;
85 102
86 if (!pskb_may_pull(skb, phlen + optlen)) 103 XFRM_MODE_SKB_CB(skb)->protocol = ph->nexthdr;
87 goto out;
88 skb->len -= phlen + optlen;
89 104
90 ph_nexthdr = ph->nexthdr; 105 if (!pskb_may_pull(skb, phlen));
106 goto out;
107 __skb_pull(skb, phlen);
91 } 108 }
92 109
93 skb_set_network_header(skb, phlen - sizeof(*iph)); 110 skb_push(skb, sizeof(*iph));
94 memmove(skb_network_header(skb), iph, sizeof(*iph)); 111 skb_reset_network_header(skb);
95 skb_set_transport_header(skb, phlen + optlen); 112
96 skb->data = skb_transport_header(skb); 113 memmove(skb->data - skb->mac_len, skb_mac_header(skb),
114 skb->mac_len);
115 skb_set_mac_header(skb, -skb->mac_len);
116
117 xfrm4_beet_make_header(skb);
97 118
98 iph = ip_hdr(skb); 119 iph = ip_hdr(skb);
99 iph->ihl = (sizeof(*iph) + optlen) / 4; 120
100 iph->tot_len = htons(skb->len + iph->ihl * 4); 121 iph->ihl += optlen / 4;
122 iph->tot_len = htons(skb->len);
101 iph->daddr = x->sel.daddr.a4; 123 iph->daddr = x->sel.daddr.a4;
102 iph->saddr = x->sel.saddr.a4; 124 iph->saddr = x->sel.saddr.a4;
103 if (ph_nexthdr)
104 iph->protocol = ph_nexthdr;
105 iph->check = 0; 125 iph->check = 0;
106 iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl); 126 iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
107 err = 0; 127 err = 0;
@@ -110,8 +130,10 @@ out:
110} 130}
111 131
112static struct xfrm_mode xfrm4_beet_mode = { 132static struct xfrm_mode xfrm4_beet_mode = {
113 .input = xfrm4_beet_input, 133 .input2 = xfrm4_beet_input,
114 .output = xfrm4_beet_output, 134 .input = xfrm_prepare_input,
135 .output2 = xfrm4_beet_output,
136 .output = xfrm4_prepare_output,
115 .owner = THIS_MODULE, 137 .owner = THIS_MODULE,
116 .encap = XFRM_MODE_BEET, 138 .encap = XFRM_MODE_BEET,
117 .flags = XFRM_MODE_FLAG_TUNNEL, 139 .flags = XFRM_MODE_FLAG_TUNNEL,
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index e4deecba6dd2..8dee617ee900 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -16,92 +16,60 @@
16 16
17static inline void ipip_ecn_decapsulate(struct sk_buff *skb) 17static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
18{ 18{
19 struct iphdr *outer_iph = ip_hdr(skb);
20 struct iphdr *inner_iph = ipip_hdr(skb); 19 struct iphdr *inner_iph = ipip_hdr(skb);
21 20
22 if (INET_ECN_is_ce(outer_iph->tos)) 21 if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos))
23 IP_ECN_set_ce(inner_iph); 22 IP_ECN_set_ce(inner_iph);
24} 23}
25 24
26static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
27{
28 if (INET_ECN_is_ce(iph->tos))
29 IP6_ECN_set_ce(ipv6_hdr(skb));
30}
31
32/* Add encapsulation header. 25/* Add encapsulation header.
33 * 26 *
34 * The top IP header will be constructed per RFC 2401. 27 * The top IP header will be constructed per RFC 2401.
35 */ 28 */
36static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) 29static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
37{ 30{
38 struct dst_entry *dst = skb->dst; 31 struct dst_entry *dst = skb->dst;
39 struct xfrm_dst *xdst = (struct xfrm_dst*)dst; 32 struct iphdr *top_iph;
40 struct iphdr *iph, *top_iph;
41 int flags; 33 int flags;
42 34
43 iph = ip_hdr(skb);
44
45 skb_set_network_header(skb, -x->props.header_len); 35 skb_set_network_header(skb, -x->props.header_len);
46 skb->mac_header = skb->network_header + 36 skb->mac_header = skb->network_header +
47 offsetof(struct iphdr, protocol); 37 offsetof(struct iphdr, protocol);
48 skb->transport_header = skb->network_header + sizeof(*iph); 38 skb->transport_header = skb->network_header + sizeof(*top_iph);
49 top_iph = ip_hdr(skb); 39 top_iph = ip_hdr(skb);
50 40
51 top_iph->ihl = 5; 41 top_iph->ihl = 5;
52 top_iph->version = 4; 42 top_iph->version = 4;
53 43
54 flags = x->props.flags; 44 top_iph->protocol = x->inner_mode->afinfo->proto;
55 45
56 /* DS disclosed */ 46 /* DS disclosed */
57 if (xdst->route->ops->family == AF_INET) { 47 top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos,
58 top_iph->protocol = IPPROTO_IPIP; 48 XFRM_MODE_SKB_CB(skb)->tos);
59 top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos);
60 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
61 0 : (iph->frag_off & htons(IP_DF));
62 }
63#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
64 else {
65 struct ipv6hdr *ipv6h = (struct ipv6hdr*)iph;
66 top_iph->protocol = IPPROTO_IPV6;
67 top_iph->tos = INET_ECN_encapsulate(iph->tos, ipv6_get_dsfield(ipv6h));
68 top_iph->frag_off = 0;
69 }
70#endif
71 49
50 flags = x->props.flags;
72 if (flags & XFRM_STATE_NOECN) 51 if (flags & XFRM_STATE_NOECN)
73 IP_ECN_clear(top_iph); 52 IP_ECN_clear(top_iph);
74 53
75 if (!top_iph->frag_off) 54 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
76 __ip_select_ident(top_iph, dst->child, 0); 55 0 : XFRM_MODE_SKB_CB(skb)->frag_off;
56 ip_select_ident(top_iph, dst->child, NULL);
77 57
78 top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT); 58 top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT);
79 59
80 top_iph->saddr = x->props.saddr.a4; 60 top_iph->saddr = x->props.saddr.a4;
81 top_iph->daddr = x->id.daddr.a4; 61 top_iph->daddr = x->id.daddr.a4;
82 62
83 skb->protocol = htons(ETH_P_IP);
84
85 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
86 return 0; 63 return 0;
87} 64}
88 65
89static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) 66static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
90{ 67{
91 struct iphdr *iph = ip_hdr(skb);
92 const unsigned char *old_mac; 68 const unsigned char *old_mac;
93 int err = -EINVAL; 69 int err = -EINVAL;
94 70
95 switch (iph->protocol){ 71 if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP)
96 case IPPROTO_IPIP: 72 goto out;
97 break;
98#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
99 case IPPROTO_IPV6:
100 break;
101#endif
102 default:
103 goto out;
104 }
105 73
106 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 74 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
107 goto out; 75 goto out;
@@ -110,20 +78,11 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
110 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 78 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
111 goto out; 79 goto out;
112 80
113 iph = ip_hdr(skb); 81 if (x->props.flags & XFRM_STATE_DECAP_DSCP)
114 if (iph->protocol == IPPROTO_IPIP) { 82 ipv4_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, ipip_hdr(skb));
115 if (x->props.flags & XFRM_STATE_DECAP_DSCP) 83 if (!(x->props.flags & XFRM_STATE_NOECN))
116 ipv4_copy_dscp(iph, ipip_hdr(skb)); 84 ipip_ecn_decapsulate(skb);
117 if (!(x->props.flags & XFRM_STATE_NOECN)) 85
118 ipip_ecn_decapsulate(skb);
119 }
120#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
121 else {
122 if (!(x->props.flags & XFRM_STATE_NOECN))
123 ipip6_ecn_decapsulate(iph, skb);
124 skb->protocol = htons(ETH_P_IPV6);
125 }
126#endif
127 old_mac = skb_mac_header(skb); 86 old_mac = skb_mac_header(skb);
128 skb_set_mac_header(skb, -skb->mac_len); 87 skb_set_mac_header(skb, -skb->mac_len);
129 memmove(skb_mac_header(skb), old_mac, skb->mac_len); 88 memmove(skb_mac_header(skb), old_mac, skb->mac_len);
@@ -135,19 +94,21 @@ out:
135} 94}
136 95
137static struct xfrm_mode xfrm4_tunnel_mode = { 96static struct xfrm_mode xfrm4_tunnel_mode = {
138 .input = xfrm4_tunnel_input, 97 .input2 = xfrm4_mode_tunnel_input,
139 .output = xfrm4_tunnel_output, 98 .input = xfrm_prepare_input,
99 .output2 = xfrm4_mode_tunnel_output,
100 .output = xfrm4_prepare_output,
140 .owner = THIS_MODULE, 101 .owner = THIS_MODULE,
141 .encap = XFRM_MODE_TUNNEL, 102 .encap = XFRM_MODE_TUNNEL,
142 .flags = XFRM_MODE_FLAG_TUNNEL, 103 .flags = XFRM_MODE_FLAG_TUNNEL,
143}; 104};
144 105
145static int __init xfrm4_tunnel_init(void) 106static int __init xfrm4_mode_tunnel_init(void)
146{ 107{
147 return xfrm_register_mode(&xfrm4_tunnel_mode, AF_INET); 108 return xfrm_register_mode(&xfrm4_tunnel_mode, AF_INET);
148} 109}
149 110
150static void __exit xfrm4_tunnel_exit(void) 111static void __exit xfrm4_mode_tunnel_exit(void)
151{ 112{
152 int err; 113 int err;
153 114
@@ -155,7 +116,7 @@ static void __exit xfrm4_tunnel_exit(void)
155 BUG_ON(err); 116 BUG_ON(err);
156} 117}
157 118
158module_init(xfrm4_tunnel_init); 119module_init(xfrm4_mode_tunnel_init);
159module_exit(xfrm4_tunnel_exit); 120module_exit(xfrm4_mode_tunnel_exit);
160MODULE_LICENSE("GPL"); 121MODULE_LICENSE("GPL");
161MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_TUNNEL); 122MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_TUNNEL);
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index c4a7156962bd..d5a58a818021 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -8,11 +8,12 @@
8 * 2 of the License, or (at your option) any later version. 8 * 2 of the License, or (at your option) any later version.
9 */ 9 */
10 10
11#include <linux/compiler.h>
12#include <linux/if_ether.h> 11#include <linux/if_ether.h>
13#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <linux/netfilter_ipv4.h> 15#include <linux/netfilter_ipv4.h>
16#include <net/dst.h>
16#include <net/ip.h> 17#include <net/ip.h>
17#include <net/xfrm.h> 18#include <net/xfrm.h>
18#include <net/icmp.h> 19#include <net/icmp.h>
@@ -25,8 +26,6 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
25 if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) 26 if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
26 goto out; 27 goto out;
27 28
28 IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
29
30 if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df) 29 if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df)
31 goto out; 30 goto out;
32 31
@@ -40,106 +39,54 @@ out:
40 return ret; 39 return ret;
41} 40}
42 41
43static inline int xfrm4_output_one(struct sk_buff *skb) 42int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb)
44{ 43{
45 struct dst_entry *dst = skb->dst;
46 struct xfrm_state *x = dst->xfrm;
47 struct iphdr *iph;
48 int err; 44 int err;
49 45
50 if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { 46 err = xfrm4_tunnel_check_size(skb);
51 err = xfrm4_tunnel_check_size(skb);
52 if (err)
53 goto error_nolock;
54 }
55
56 err = xfrm_output(skb);
57 if (err) 47 if (err)
58 goto error_nolock; 48 return err;
59 49
60 iph = ip_hdr(skb); 50 XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol;
61 iph->tot_len = htons(skb->len);
62 ip_send_check(iph);
63 51
64 IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; 52 return xfrm4_extract_header(skb);
65 err = 0;
66
67out_exit:
68 return err;
69error_nolock:
70 kfree_skb(skb);
71 goto out_exit;
72} 53}
73 54
74static int xfrm4_output_finish2(struct sk_buff *skb) 55int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
75{ 56{
76 int err; 57 int err;
77 58
78 while (likely((err = xfrm4_output_one(skb)) == 0)) { 59 err = x->inner_mode->afinfo->extract_output(x, skb);
79 nf_reset(skb); 60 if (err)
80 61 return err;
81 err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
82 skb->dst->dev, dst_output);
83 if (unlikely(err != 1))
84 break;
85 62
86 if (!skb->dst->xfrm) 63 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
87 return dst_output(skb); 64 IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED;
88 65
89 err = nf_hook(PF_INET, NF_IP_POST_ROUTING, skb, NULL, 66 skb->protocol = htons(ETH_P_IP);
90 skb->dst->dev, xfrm4_output_finish2);
91 if (unlikely(err != 1))
92 break;
93 }
94 67
95 return err; 68 return x->outer_mode->output2(x, skb);
96} 69}
70EXPORT_SYMBOL(xfrm4_prepare_output);
97 71
98static int xfrm4_output_finish(struct sk_buff *skb) 72static int xfrm4_output_finish(struct sk_buff *skb)
99{ 73{
100 struct sk_buff *segs;
101
102#ifdef CONFIG_NETFILTER 74#ifdef CONFIG_NETFILTER
103 if (!skb->dst->xfrm) { 75 if (!skb->dst->xfrm) {
104 IPCB(skb)->flags |= IPSKB_REROUTED; 76 IPCB(skb)->flags |= IPSKB_REROUTED;
105 return dst_output(skb); 77 return dst_output(skb);
106 } 78 }
107#endif
108 79
109 if (!skb_is_gso(skb)) 80 IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
110 return xfrm4_output_finish2(skb); 81#endif
111 82
112 skb->protocol = htons(ETH_P_IP); 83 skb->protocol = htons(ETH_P_IP);
113 segs = skb_gso_segment(skb, 0); 84 return xfrm_output(skb);
114 kfree_skb(skb);
115 if (unlikely(IS_ERR(segs)))
116 return PTR_ERR(segs);
117
118 do {
119 struct sk_buff *nskb = segs->next;
120 int err;
121
122 segs->next = NULL;
123 err = xfrm4_output_finish2(segs);
124
125 if (unlikely(err)) {
126 while ((segs = nskb)) {
127 nskb = segs->next;
128 segs->next = NULL;
129 kfree_skb(segs);
130 }
131 return err;
132 }
133
134 segs = nskb;
135 } while (segs);
136
137 return 0;
138} 85}
139 86
140int xfrm4_output(struct sk_buff *skb) 87int xfrm4_output(struct sk_buff *skb)
141{ 88{
142 return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev, 89 return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb,
143 xfrm4_output_finish, 90 NULL, skb->dst->dev, xfrm4_output_finish,
144 !(IPCB(skb)->flags & IPSKB_REROUTED)); 91 !(IPCB(skb)->flags & IPSKB_REROUTED));
145} 92}
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index cc86fb110dd8..3783e3ee56a4 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -8,36 +8,54 @@
8 * 8 *
9 */ 9 */
10 10
11#include <linux/compiler.h> 11#include <linux/err.h>
12#include <linux/kernel.h>
12#include <linux/inetdevice.h> 13#include <linux/inetdevice.h>
14#include <net/dst.h>
13#include <net/xfrm.h> 15#include <net/xfrm.h>
14#include <net/ip.h> 16#include <net/ip.h>
15 17
16static struct dst_ops xfrm4_dst_ops; 18static struct dst_ops xfrm4_dst_ops;
17static struct xfrm_policy_afinfo xfrm4_policy_afinfo; 19static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
18 20
19static int xfrm4_dst_lookup(struct xfrm_dst **dst, struct flowi *fl) 21static struct dst_entry *xfrm4_dst_lookup(int tos, xfrm_address_t *saddr,
22 xfrm_address_t *daddr)
20{ 23{
21 return __ip_route_output_key((struct rtable**)dst, fl); 24 struct flowi fl = {
22}
23
24static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
25{
26 struct rtable *rt;
27 struct flowi fl_tunnel = {
28 .nl_u = { 25 .nl_u = {
29 .ip4_u = { 26 .ip4_u = {
27 .tos = tos,
30 .daddr = daddr->a4, 28 .daddr = daddr->a4,
31 }, 29 },
32 }, 30 },
33 }; 31 };
32 struct dst_entry *dst;
33 struct rtable *rt;
34 int err;
34 35
35 if (!xfrm4_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) { 36 if (saddr)
36 saddr->a4 = rt->rt_src; 37 fl.fl4_src = saddr->a4;
37 dst_release(&rt->u.dst); 38
38 return 0; 39 err = __ip_route_output_key(&init_net, &rt, &fl);
39 } 40 dst = &rt->u.dst;
40 return -EHOSTUNREACH; 41 if (err)
42 dst = ERR_PTR(err);
43 return dst;
44}
45
46static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
47{
48 struct dst_entry *dst;
49 struct rtable *rt;
50
51 dst = xfrm4_dst_lookup(0, NULL, daddr);
52 if (IS_ERR(dst))
53 return -EHOSTUNREACH;
54
55 rt = (struct rtable *)dst;
56 saddr->a4 = rt->rt_src;
57 dst_release(dst);
58 return 0;
41} 59}
42 60
43static struct dst_entry * 61static struct dst_entry *
@@ -61,142 +79,49 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
61 return dst; 79 return dst;
62} 80}
63 81
64/* Allocate chain of dst_entry's, attach known xfrm's, calculate 82static int xfrm4_get_tos(struct flowi *fl)
65 * all the metrics... Shortly, bundle a bundle. 83{
66 */ 84 return fl->fl4_tos;
85}
67 86
68static int 87static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
69__xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, 88 int nfheader_len)
70 struct flowi *fl, struct dst_entry **dst_p)
71{ 89{
72 struct dst_entry *dst, *dst_prev; 90 return 0;
73 struct rtable *rt0 = (struct rtable*)(*dst_p); 91}
74 struct rtable *rt = rt0;
75 struct flowi fl_tunnel = {
76 .nl_u = {
77 .ip4_u = {
78 .saddr = fl->fl4_src,
79 .daddr = fl->fl4_dst,
80 .tos = fl->fl4_tos
81 }
82 }
83 };
84 int i;
85 int err;
86 int header_len = 0;
87 int trailer_len = 0;
88 92
89 dst = dst_prev = NULL; 93static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
90 dst_hold(&rt->u.dst); 94{
95 struct rtable *rt = (struct rtable *)xdst->route;
91 96
92 for (i = 0; i < nx; i++) { 97 xdst->u.rt.fl = rt->fl;
93 struct dst_entry *dst1 = dst_alloc(&xfrm4_dst_ops);
94 struct xfrm_dst *xdst;
95 98
96 if (unlikely(dst1 == NULL)) { 99 xdst->u.dst.dev = dev;
97 err = -ENOBUFS; 100 dev_hold(dev);
98 dst_release(&rt->u.dst);
99 goto error;
100 }
101 101
102 if (!dst) 102 xdst->u.rt.idev = in_dev_get(dev);
103 dst = dst1; 103 if (!xdst->u.rt.idev)
104 else { 104 return -ENODEV;
105 dst_prev->child = dst1;
106 dst1->flags |= DST_NOHASH;
107 dst_clone(dst1);
108 }
109 105
110 xdst = (struct xfrm_dst *)dst1; 106 xdst->u.rt.peer = rt->peer;
111 xdst->route = &rt->u.dst; 107 if (rt->peer)
112 xdst->genid = xfrm[i]->genid; 108 atomic_inc(&rt->peer->refcnt);
113
114 dst1->next = dst_prev;
115 dst_prev = dst1;
116
117 header_len += xfrm[i]->props.header_len;
118 trailer_len += xfrm[i]->props.trailer_len;
119
120 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
121 unsigned short encap_family = xfrm[i]->props.family;
122 switch (encap_family) {
123 case AF_INET:
124 fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4;
125 fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4;
126 break;
127#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
128 case AF_INET6:
129 ipv6_addr_copy(&fl_tunnel.fl6_dst, (struct in6_addr*)&xfrm[i]->id.daddr.a6);
130 ipv6_addr_copy(&fl_tunnel.fl6_src, (struct in6_addr*)&xfrm[i]->props.saddr.a6);
131 break;
132#endif
133 default:
134 BUG_ON(1);
135 }
136 err = xfrm_dst_lookup((struct xfrm_dst **)&rt,
137 &fl_tunnel, encap_family);
138 if (err)
139 goto error;
140 } else
141 dst_hold(&rt->u.dst);
142 }
143 109
144 dst_prev->child = &rt->u.dst; 110 /* Sheit... I remember I did this right. Apparently,
145 dst->path = &rt->u.dst; 111 * it was magically lost, so this code needs audit */
146 112 xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST |
147 *dst_p = dst; 113 RTCF_LOCAL);
148 dst = dst_prev; 114 xdst->u.rt.rt_type = rt->rt_type;
149 115 xdst->u.rt.rt_src = rt->rt_src;
150 dst_prev = *dst_p; 116 xdst->u.rt.rt_dst = rt->rt_dst;
151 i = 0; 117 xdst->u.rt.rt_gateway = rt->rt_gateway;
152 for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) { 118 xdst->u.rt.rt_spec_dst = rt->rt_spec_dst;
153 struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
154 x->u.rt.fl = *fl;
155
156 dst_prev->xfrm = xfrm[i++];
157 dst_prev->dev = rt->u.dst.dev;
158 if (rt->u.dst.dev)
159 dev_hold(rt->u.dst.dev);
160 dst_prev->obsolete = -1;
161 dst_prev->flags |= DST_HOST;
162 dst_prev->lastuse = jiffies;
163 dst_prev->header_len = header_len;
164 dst_prev->nfheader_len = 0;
165 dst_prev->trailer_len = trailer_len;
166 memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
167
168 /* Copy neighbout for reachability confirmation */
169 dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour);
170 dst_prev->input = rt->u.dst.input;
171 dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output;
172 if (rt0->peer)
173 atomic_inc(&rt0->peer->refcnt);
174 x->u.rt.peer = rt0->peer;
175 /* Sheit... I remember I did this right. Apparently,
176 * it was magically lost, so this code needs audit */
177 x->u.rt.rt_flags = rt0->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL);
178 x->u.rt.rt_type = rt0->rt_type;
179 x->u.rt.rt_src = rt0->rt_src;
180 x->u.rt.rt_dst = rt0->rt_dst;
181 x->u.rt.rt_gateway = rt0->rt_gateway;
182 x->u.rt.rt_spec_dst = rt0->rt_spec_dst;
183 x->u.rt.idev = rt0->idev;
184 in_dev_hold(rt0->idev);
185 header_len -= x->u.dst.xfrm->props.header_len;
186 trailer_len -= x->u.dst.xfrm->props.trailer_len;
187 }
188 119
189 xfrm_init_pmtu(dst);
190 return 0; 120 return 0;
191
192error:
193 if (dst)
194 dst_free(dst);
195 return err;
196} 121}
197 122
198static void 123static void
199_decode_session4(struct sk_buff *skb, struct flowi *fl) 124_decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
200{ 125{
201 struct iphdr *iph = ip_hdr(skb); 126 struct iphdr *iph = ip_hdr(skb);
202 u8 *xprth = skb_network_header(skb) + iph->ihl * 4; 127 u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
@@ -212,8 +137,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
212 if (pskb_may_pull(skb, xprth + 4 - skb->data)) { 137 if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
213 __be16 *ports = (__be16 *)xprth; 138 __be16 *ports = (__be16 *)xprth;
214 139
215 fl->fl_ip_sport = ports[0]; 140 fl->fl_ip_sport = ports[!!reverse];
216 fl->fl_ip_dport = ports[1]; 141 fl->fl_ip_dport = ports[!reverse];
217 } 142 }
218 break; 143 break;
219 144
@@ -255,12 +180,12 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
255 } 180 }
256 } 181 }
257 fl->proto = iph->protocol; 182 fl->proto = iph->protocol;
258 fl->fl4_dst = iph->daddr; 183 fl->fl4_dst = reverse ? iph->saddr : iph->daddr;
259 fl->fl4_src = iph->saddr; 184 fl->fl4_src = reverse ? iph->daddr : iph->saddr;
260 fl->fl4_tos = iph->tos; 185 fl->fl4_tos = iph->tos;
261} 186}
262 187
263static inline int xfrm4_garbage_collect(void) 188static inline int xfrm4_garbage_collect(struct dst_ops *ops)
264{ 189{
265 xfrm4_policy_afinfo.garbage_collect(); 190 xfrm4_policy_afinfo.garbage_collect();
266 return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2); 191 return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2);
@@ -295,7 +220,8 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
295 220
296 xdst = (struct xfrm_dst *)dst; 221 xdst = (struct xfrm_dst *)dst;
297 if (xdst->u.rt.idev->dev == dev) { 222 if (xdst->u.rt.idev->dev == dev) {
298 struct in_device *loopback_idev = in_dev_get(init_net.loopback_dev); 223 struct in_device *loopback_idev =
224 in_dev_get(dev->nd_net->loopback_dev);
299 BUG_ON(!loopback_idev); 225 BUG_ON(!loopback_idev);
300 226
301 do { 227 do {
@@ -318,6 +244,7 @@ static struct dst_ops xfrm4_dst_ops = {
318 .update_pmtu = xfrm4_update_pmtu, 244 .update_pmtu = xfrm4_update_pmtu,
319 .destroy = xfrm4_dst_destroy, 245 .destroy = xfrm4_dst_destroy,
320 .ifdown = xfrm4_dst_ifdown, 246 .ifdown = xfrm4_dst_ifdown,
247 .local_out = __ip_local_out,
321 .gc_thresh = 1024, 248 .gc_thresh = 1024,
322 .entry_size = sizeof(struct xfrm_dst), 249 .entry_size = sizeof(struct xfrm_dst),
323}; 250};
@@ -328,8 +255,10 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
328 .dst_lookup = xfrm4_dst_lookup, 255 .dst_lookup = xfrm4_dst_lookup,
329 .get_saddr = xfrm4_get_saddr, 256 .get_saddr = xfrm4_get_saddr,
330 .find_bundle = __xfrm4_find_bundle, 257 .find_bundle = __xfrm4_find_bundle,
331 .bundle_create = __xfrm4_bundle_create,
332 .decode_session = _decode_session4, 258 .decode_session = _decode_session4,
259 .get_tos = xfrm4_get_tos,
260 .init_path = xfrm4_init_path,
261 .fill_dst = xfrm4_fill_dst,
333}; 262};
334 263
335static void __init xfrm4_policy_init(void) 264static void __init xfrm4_policy_init(void)
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 13d54a1c3337..fdeebe68a379 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -11,6 +11,7 @@
11#include <net/xfrm.h> 11#include <net/xfrm.h>
12#include <linux/pfkeyv2.h> 12#include <linux/pfkeyv2.h>
13#include <linux/ipsec.h> 13#include <linux/ipsec.h>
14#include <linux/netfilter_ipv4.h>
14 15
15static struct xfrm_state_afinfo xfrm4_state_afinfo; 16static struct xfrm_state_afinfo xfrm4_state_afinfo;
16 17
@@ -47,12 +48,31 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
47 x->props.family = AF_INET; 48 x->props.family = AF_INET;
48} 49}
49 50
51int xfrm4_extract_header(struct sk_buff *skb)
52{
53 struct iphdr *iph = ip_hdr(skb);
54
55 XFRM_MODE_SKB_CB(skb)->id = iph->id;
56 XFRM_MODE_SKB_CB(skb)->frag_off = iph->frag_off;
57 XFRM_MODE_SKB_CB(skb)->tos = iph->tos;
58 XFRM_MODE_SKB_CB(skb)->ttl = iph->ttl;
59 memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0,
60 sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
61
62 return 0;
63}
64
50static struct xfrm_state_afinfo xfrm4_state_afinfo = { 65static struct xfrm_state_afinfo xfrm4_state_afinfo = {
51 .family = AF_INET, 66 .family = AF_INET,
67 .proto = IPPROTO_IPIP,
68 .eth_proto = htons(ETH_P_IP),
52 .owner = THIS_MODULE, 69 .owner = THIS_MODULE,
53 .init_flags = xfrm4_init_flags, 70 .init_flags = xfrm4_init_flags,
54 .init_tempsel = __xfrm4_init_tempsel, 71 .init_tempsel = __xfrm4_init_tempsel,
55 .output = xfrm4_output, 72 .output = xfrm4_output,
73 .extract_input = xfrm4_extract_input,
74 .extract_output = xfrm4_extract_output,
75 .transport_finish = xfrm4_transport_finish,
56}; 76};
57 77
58void __init xfrm4_state_init(void) 78void __init xfrm4_state_init(void)
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 87c23a73d284..24f3aa0f2a35 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -5,11 +5,12 @@
5obj-$(CONFIG_IPV6) += ipv6.o 5obj-$(CONFIG_IPV6) += ipv6.o
6 6
7ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ 7ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
8 addrlabel.o \
8 route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ 9 route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
9 raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ 10 raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
10 exthdrs.o sysctl_net_ipv6.o datagram.o \ 11 exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o
11 ip6_flowlabel.o inet6_connection_sock.o
12 12
13ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o
13ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ 14ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
14 xfrm6_output.o 15 xfrm6_output.o
15ipv6-$(CONFIG_NETFILTER) += netfilter.o 16ipv6-$(CONFIG_NETFILTER) += netfilter.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e8c347579da9..e40213db9e4c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -101,8 +101,16 @@
101#define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b))) 101#define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b)))
102 102
103#ifdef CONFIG_SYSCTL 103#ifdef CONFIG_SYSCTL
104static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p); 104static void addrconf_sysctl_register(struct inet6_dev *idev);
105static void addrconf_sysctl_unregister(struct ipv6_devconf *p); 105static void addrconf_sysctl_unregister(struct inet6_dev *idev);
106#else
107static inline void addrconf_sysctl_register(struct inet6_dev *idev)
108{
109}
110
111static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
112{
113}
106#endif 114#endif
107 115
108#ifdef CONFIG_IPV6_PRIVACY 116#ifdef CONFIG_IPV6_PRIVACY
@@ -141,7 +149,8 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
141 149
142static void inet6_prefix_notify(int event, struct inet6_dev *idev, 150static void inet6_prefix_notify(int event, struct inet6_dev *idev,
143 struct prefix_info *pinfo); 151 struct prefix_info *pinfo);
144static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev); 152static int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
153 struct net_device *dev);
145 154
146static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); 155static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
147 156
@@ -256,16 +265,13 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
256static int snmp6_alloc_dev(struct inet6_dev *idev) 265static int snmp6_alloc_dev(struct inet6_dev *idev)
257{ 266{
258 if (snmp_mib_init((void **)idev->stats.ipv6, 267 if (snmp_mib_init((void **)idev->stats.ipv6,
259 sizeof(struct ipstats_mib), 268 sizeof(struct ipstats_mib)) < 0)
260 __alignof__(struct ipstats_mib)) < 0)
261 goto err_ip; 269 goto err_ip;
262 if (snmp_mib_init((void **)idev->stats.icmpv6, 270 if (snmp_mib_init((void **)idev->stats.icmpv6,
263 sizeof(struct icmpv6_mib), 271 sizeof(struct icmpv6_mib)) < 0)
264 __alignof__(struct icmpv6_mib)) < 0)
265 goto err_icmp; 272 goto err_icmp;
266 if (snmp_mib_init((void **)idev->stats.icmpv6msg, 273 if (snmp_mib_init((void **)idev->stats.icmpv6msg,
267 sizeof(struct icmpv6msg_mib), 274 sizeof(struct icmpv6msg_mib)) < 0)
268 __alignof__(struct icmpv6msg_mib)) < 0)
269 goto err_icmpmsg; 275 goto err_icmpmsg;
270 276
271 return 0; 277 return 0;
@@ -329,7 +335,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
329 335
330 rwlock_init(&ndev->lock); 336 rwlock_init(&ndev->lock);
331 ndev->dev = dev; 337 ndev->dev = dev;
332 memcpy(&ndev->cnf, &ipv6_devconf_dflt, sizeof(ndev->cnf)); 338 memcpy(&ndev->cnf, dev->nd_net->ipv6.devconf_dflt, sizeof(ndev->cnf));
333 ndev->cnf.mtu6 = dev->mtu; 339 ndev->cnf.mtu6 = dev->mtu;
334 ndev->cnf.sysctl = NULL; 340 ndev->cnf.sysctl = NULL;
335 ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); 341 ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
@@ -366,9 +372,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
366 in6_dev_hold(ndev); 372 in6_dev_hold(ndev);
367 373
368#ifdef CONFIG_IPV6_PRIVACY 374#ifdef CONFIG_IPV6_PRIVACY
369 init_timer(&ndev->regen_timer); 375 setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev);
370 ndev->regen_timer.function = ipv6_regen_rndid;
371 ndev->regen_timer.data = (unsigned long) ndev;
372 if ((dev->flags&IFF_LOOPBACK) || 376 if ((dev->flags&IFF_LOOPBACK) ||
373 dev->type == ARPHRD_TUNNEL || 377 dev->type == ARPHRD_TUNNEL ||
374#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) 378#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
@@ -379,6 +383,13 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
379 "%s: Disabled Privacy Extensions\n", 383 "%s: Disabled Privacy Extensions\n",
380 dev->name); 384 dev->name);
381 ndev->cnf.use_tempaddr = -1; 385 ndev->cnf.use_tempaddr = -1;
386
387 if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) {
388 printk(KERN_INFO
389 "%s: Disabled Multicast RS\n",
390 dev->name);
391 ndev->cnf.rtr_solicits = 0;
392 }
382 } else { 393 } else {
383 in6_dev_hold(ndev); 394 in6_dev_hold(ndev);
384 ipv6_regen_rndid((unsigned long) ndev); 395 ipv6_regen_rndid((unsigned long) ndev);
@@ -390,13 +401,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
390 401
391 ipv6_mc_init_dev(ndev); 402 ipv6_mc_init_dev(ndev);
392 ndev->tstamp = jiffies; 403 ndev->tstamp = jiffies;
393#ifdef CONFIG_SYSCTL 404 addrconf_sysctl_register(ndev);
394 neigh_sysctl_register(dev, ndev->nd_parms, NET_IPV6,
395 NET_IPV6_NEIGH, "ipv6",
396 &ndisc_ifinfo_sysctl_change,
397 NULL);
398 addrconf_sysctl_register(ndev, &ndev->cnf);
399#endif
400 /* protected by rtnl_lock */ 405 /* protected by rtnl_lock */
401 rcu_assign_pointer(dev->ip6_ptr, ndev); 406 rcu_assign_pointer(dev->ip6_ptr, ndev);
402 407
@@ -452,18 +457,18 @@ static void dev_forward_change(struct inet6_dev *idev)
452} 457}
453 458
454 459
455static void addrconf_forward_change(void) 460static void addrconf_forward_change(struct net *net, __s32 newf)
456{ 461{
457 struct net_device *dev; 462 struct net_device *dev;
458 struct inet6_dev *idev; 463 struct inet6_dev *idev;
459 464
460 read_lock(&dev_base_lock); 465 read_lock(&dev_base_lock);
461 for_each_netdev(&init_net, dev) { 466 for_each_netdev(net, dev) {
462 rcu_read_lock(); 467 rcu_read_lock();
463 idev = __in6_dev_get(dev); 468 idev = __in6_dev_get(dev);
464 if (idev) { 469 if (idev) {
465 int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding); 470 int changed = (!idev->cnf.forwarding) ^ (!newf);
466 idev->cnf.forwarding = ipv6_devconf.forwarding; 471 idev->cnf.forwarding = newf;
467 if (changed) 472 if (changed)
468 dev_forward_change(idev); 473 dev_forward_change(idev);
469 } 474 }
@@ -471,6 +476,25 @@ static void addrconf_forward_change(void)
471 } 476 }
472 read_unlock(&dev_base_lock); 477 read_unlock(&dev_base_lock);
473} 478}
479
480static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
481{
482 struct net *net;
483
484 net = (struct net *)table->extra2;
485 if (p == &net->ipv6.devconf_dflt->forwarding)
486 return;
487
488 if (p == &net->ipv6.devconf_all->forwarding) {
489 __s32 newf = net->ipv6.devconf_all->forwarding;
490 net->ipv6.devconf_dflt->forwarding = newf;
491 addrconf_forward_change(net, newf);
492 } else if ((!*p) ^ (!old))
493 dev_forward_change((struct inet6_dev *)table->extra1);
494
495 if (*p)
496 rt6_purge_dflt_routers();
497}
474#endif 498#endif
475 499
476/* Nobody refers to this ifaddr, destroy it */ 500/* Nobody refers to this ifaddr, destroy it */
@@ -537,7 +561,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
537 write_lock(&addrconf_hash_lock); 561 write_lock(&addrconf_hash_lock);
538 562
539 /* Ignore adding duplicate addresses on an interface */ 563 /* Ignore adding duplicate addresses on an interface */
540 if (ipv6_chk_same_addr(addr, idev->dev)) { 564 if (ipv6_chk_same_addr(&init_net, addr, idev->dev)) {
541 ADBG(("ipv6_add_addr: already assigned\n")); 565 ADBG(("ipv6_add_addr: already assigned\n"));
542 err = -EEXIST; 566 err = -EEXIST;
543 goto out; 567 goto out;
@@ -876,35 +900,6 @@ static inline int ipv6_saddr_preferred(int type)
876 return 0; 900 return 0;
877} 901}
878 902
879/* static matching label */
880static inline int ipv6_saddr_label(const struct in6_addr *addr, int type)
881{
882 /*
883 * prefix (longest match) label
884 * -----------------------------
885 * ::1/128 0
886 * ::/0 1
887 * 2002::/16 2
888 * ::/96 3
889 * ::ffff:0:0/96 4
890 * fc00::/7 5
891 * 2001::/32 6
892 */
893 if (type & IPV6_ADDR_LOOPBACK)
894 return 0;
895 else if (type & IPV6_ADDR_COMPATv4)
896 return 3;
897 else if (type & IPV6_ADDR_MAPPED)
898 return 4;
899 else if (addr->s6_addr32[0] == htonl(0x20010000))
900 return 6;
901 else if (addr->s6_addr16[0] == htons(0x2002))
902 return 2;
903 else if ((addr->s6_addr[0] & 0xfe) == 0xfc)
904 return 5;
905 return 1;
906}
907
908int ipv6_dev_get_saddr(struct net_device *daddr_dev, 903int ipv6_dev_get_saddr(struct net_device *daddr_dev,
909 struct in6_addr *daddr, struct in6_addr *saddr) 904 struct in6_addr *daddr, struct in6_addr *saddr)
910{ 905{
@@ -912,7 +907,8 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
912 struct inet6_ifaddr *ifa_result = NULL; 907 struct inet6_ifaddr *ifa_result = NULL;
913 int daddr_type = __ipv6_addr_type(daddr); 908 int daddr_type = __ipv6_addr_type(daddr);
914 int daddr_scope = __ipv6_addr_src_scope(daddr_type); 909 int daddr_scope = __ipv6_addr_src_scope(daddr_type);
915 u32 daddr_label = ipv6_saddr_label(daddr, daddr_type); 910 int daddr_ifindex = daddr_dev ? daddr_dev->ifindex : 0;
911 u32 daddr_label = ipv6_addr_label(daddr, daddr_type, daddr_ifindex);
916 struct net_device *dev; 912 struct net_device *dev;
917 913
918 memset(&hiscore, 0, sizeof(hiscore)); 914 memset(&hiscore, 0, sizeof(hiscore));
@@ -1085,11 +1081,15 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
1085 1081
1086 /* Rule 6: Prefer matching label */ 1082 /* Rule 6: Prefer matching label */
1087 if (hiscore.rule < 6) { 1083 if (hiscore.rule < 6) {
1088 if (ipv6_saddr_label(&ifa_result->addr, hiscore.addr_type) == daddr_label) 1084 if (ipv6_addr_label(&ifa_result->addr,
1085 hiscore.addr_type,
1086 ifa_result->idev->dev->ifindex) == daddr_label)
1089 hiscore.attrs |= IPV6_SADDR_SCORE_LABEL; 1087 hiscore.attrs |= IPV6_SADDR_SCORE_LABEL;
1090 hiscore.rule++; 1088 hiscore.rule++;
1091 } 1089 }
1092 if (ipv6_saddr_label(&ifa->addr, score.addr_type) == daddr_label) { 1090 if (ipv6_addr_label(&ifa->addr,
1091 score.addr_type,
1092 ifa->idev->dev->ifindex) == daddr_label) {
1093 score.attrs |= IPV6_SADDR_SCORE_LABEL; 1093 score.attrs |= IPV6_SADDR_SCORE_LABEL;
1094 if (!(hiscore.attrs & IPV6_SADDR_SCORE_LABEL)) { 1094 if (!(hiscore.attrs & IPV6_SADDR_SCORE_LABEL)) {
1095 score.rule = 6; 1095 score.rule = 6;
@@ -1207,13 +1207,16 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
1207 return cnt; 1207 return cnt;
1208} 1208}
1209 1209
1210int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict) 1210int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
1211 struct net_device *dev, int strict)
1211{ 1212{
1212 struct inet6_ifaddr * ifp; 1213 struct inet6_ifaddr * ifp;
1213 u8 hash = ipv6_addr_hash(addr); 1214 u8 hash = ipv6_addr_hash(addr);
1214 1215
1215 read_lock_bh(&addrconf_hash_lock); 1216 read_lock_bh(&addrconf_hash_lock);
1216 for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { 1217 for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
1218 if (ifp->idev->dev->nd_net != net)
1219 continue;
1217 if (ipv6_addr_equal(&ifp->addr, addr) && 1220 if (ipv6_addr_equal(&ifp->addr, addr) &&
1218 !(ifp->flags&IFA_F_TENTATIVE)) { 1221 !(ifp->flags&IFA_F_TENTATIVE)) {
1219 if (dev == NULL || ifp->idev->dev == dev || 1222 if (dev == NULL || ifp->idev->dev == dev ||
@@ -1224,16 +1227,18 @@ int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict)
1224 read_unlock_bh(&addrconf_hash_lock); 1227 read_unlock_bh(&addrconf_hash_lock);
1225 return ifp != NULL; 1228 return ifp != NULL;
1226} 1229}
1227
1228EXPORT_SYMBOL(ipv6_chk_addr); 1230EXPORT_SYMBOL(ipv6_chk_addr);
1229 1231
1230static 1232static
1231int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev) 1233int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
1234 struct net_device *dev)
1232{ 1235{
1233 struct inet6_ifaddr * ifp; 1236 struct inet6_ifaddr * ifp;
1234 u8 hash = ipv6_addr_hash(addr); 1237 u8 hash = ipv6_addr_hash(addr);
1235 1238
1236 for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { 1239 for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
1240 if (ifp->idev->dev->nd_net != net)
1241 continue;
1237 if (ipv6_addr_equal(&ifp->addr, addr)) { 1242 if (ipv6_addr_equal(&ifp->addr, addr)) {
1238 if (dev == NULL || ifp->idev->dev == dev) 1243 if (dev == NULL || ifp->idev->dev == dev)
1239 break; 1244 break;
@@ -1242,13 +1247,16 @@ int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev)
1242 return ifp != NULL; 1247 return ifp != NULL;
1243} 1248}
1244 1249
1245struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev, int strict) 1250struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, struct in6_addr *addr,
1251 struct net_device *dev, int strict)
1246{ 1252{
1247 struct inet6_ifaddr * ifp; 1253 struct inet6_ifaddr * ifp;
1248 u8 hash = ipv6_addr_hash(addr); 1254 u8 hash = ipv6_addr_hash(addr);
1249 1255
1250 read_lock_bh(&addrconf_hash_lock); 1256 read_lock_bh(&addrconf_hash_lock);
1251 for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { 1257 for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
1258 if (ifp->idev->dev->nd_net != net)
1259 continue;
1252 if (ipv6_addr_equal(&ifp->addr, addr)) { 1260 if (ipv6_addr_equal(&ifp->addr, addr)) {
1253 if (dev == NULL || ifp->idev->dev == dev || 1261 if (dev == NULL || ifp->idev->dev == dev ||
1254 !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { 1262 !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
@@ -1435,6 +1443,9 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
1435 return addrconf_ifid_arcnet(eui, dev); 1443 return addrconf_ifid_arcnet(eui, dev);
1436 case ARPHRD_INFINIBAND: 1444 case ARPHRD_INFINIBAND:
1437 return addrconf_ifid_infiniband(eui, dev); 1445 return addrconf_ifid_infiniband(eui, dev);
1446 case ARPHRD_SIT:
1447 if (dev->priv_flags & IFF_ISATAP)
1448 return ipv6_isatap_eui64(eui, *(__be32 *)dev->dev_addr);
1438 } 1449 }
1439 return -1; 1450 return -1;
1440} 1451}
@@ -1470,7 +1481,7 @@ regen:
1470 * 1481 *
1471 * - Reserved subnet anycast (RFC 2526) 1482 * - Reserved subnet anycast (RFC 2526)
1472 * 11111101 11....11 1xxxxxxx 1483 * 11111101 11....11 1xxxxxxx
1473 * - ISATAP (draft-ietf-ngtrans-isatap-13.txt) 5.1 1484 * - ISATAP (RFC4214) 6.1
1474 * 00-00-5E-FE-xx-xx-xx-xx 1485 * 00-00-5E-FE-xx-xx-xx-xx
1475 * - value 0 1486 * - value 0
1476 * - XXX: already assigned to an address on the device 1487 * - XXX: already assigned to an address on the device
@@ -1731,7 +1742,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
1731 1742
1732ok: 1743ok:
1733 1744
1734 ifp = ipv6_get_ifaddr(&addr, dev, 1); 1745 ifp = ipv6_get_ifaddr(&init_net, &addr, dev, 1);
1735 1746
1736 if (ifp == NULL && valid_lft) { 1747 if (ifp == NULL && valid_lft) {
1737 int max_addresses = in6_dev->cnf.max_addresses; 1748 int max_addresses = in6_dev->cnf.max_addresses;
@@ -1889,7 +1900,7 @@ int addrconf_set_dstaddr(void __user *arg)
1889 p.iph.ihl = 5; 1900 p.iph.ihl = 5;
1890 p.iph.protocol = IPPROTO_IPV6; 1901 p.iph.protocol = IPPROTO_IPV6;
1891 p.iph.ttl = 64; 1902 p.iph.ttl = 64;
1892 ifr.ifr_ifru.ifru_data = (void __user *)&p; 1903 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
1893 1904
1894 oldfs = get_fs(); set_fs(KERNEL_DS); 1905 oldfs = get_fs(); set_fs(KERNEL_DS);
1895 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); 1906 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
@@ -2201,6 +2212,16 @@ static void addrconf_sit_config(struct net_device *dev)
2201 return; 2212 return;
2202 } 2213 }
2203 2214
2215 if (dev->priv_flags & IFF_ISATAP) {
2216 struct in6_addr addr;
2217
2218 ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
2219 addrconf_prefix_route(&addr, 64, dev, 0, 0);
2220 if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
2221 addrconf_add_linklocal(idev, &addr);
2222 return;
2223 }
2224
2204 sit_add_v4_addrs(idev); 2225 sit_add_v4_addrs(idev);
2205 2226
2206 if (dev->flags&IFF_POINTOPOINT) { 2227 if (dev->flags&IFF_POINTOPOINT) {
@@ -2385,15 +2406,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
2385 case NETDEV_CHANGENAME: 2406 case NETDEV_CHANGENAME:
2386 if (idev) { 2407 if (idev) {
2387 snmp6_unregister_dev(idev); 2408 snmp6_unregister_dev(idev);
2388#ifdef CONFIG_SYSCTL 2409 addrconf_sysctl_unregister(idev);
2389 addrconf_sysctl_unregister(&idev->cnf); 2410 addrconf_sysctl_register(idev);
2390 neigh_sysctl_unregister(idev->nd_parms);
2391 neigh_sysctl_register(dev, idev->nd_parms,
2392 NET_IPV6, NET_IPV6_NEIGH, "ipv6",
2393 &ndisc_ifinfo_sysctl_change,
2394 NULL);
2395 addrconf_sysctl_register(idev, &idev->cnf);
2396#endif
2397 err = snmp6_register_dev(idev); 2411 err = snmp6_register_dev(idev);
2398 if (err) 2412 if (err)
2399 return notifier_from_errno(err); 2413 return notifier_from_errno(err);
@@ -2517,10 +2531,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
2517 /* Shot the device (if unregistered) */ 2531 /* Shot the device (if unregistered) */
2518 2532
2519 if (how == 1) { 2533 if (how == 1) {
2520#ifdef CONFIG_SYSCTL 2534 addrconf_sysctl_unregister(idev);
2521 addrconf_sysctl_unregister(&idev->cnf);
2522 neigh_sysctl_unregister(idev->nd_parms);
2523#endif
2524 neigh_parms_release(&nd_tbl, idev->nd_parms); 2535 neigh_parms_release(&nd_tbl, idev->nd_parms);
2525 neigh_ifdown(&nd_tbl, dev); 2536 neigh_ifdown(&nd_tbl, dev);
2526 in6_dev_put(idev); 2537 in6_dev_put(idev);
@@ -2734,6 +2745,7 @@ static void addrconf_dad_run(struct inet6_dev *idev) {
2734 2745
2735#ifdef CONFIG_PROC_FS 2746#ifdef CONFIG_PROC_FS
2736struct if6_iter_state { 2747struct if6_iter_state {
2748 struct seq_net_private p;
2737 int bucket; 2749 int bucket;
2738}; 2750};
2739 2751
@@ -2741,9 +2753,13 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
2741{ 2753{
2742 struct inet6_ifaddr *ifa = NULL; 2754 struct inet6_ifaddr *ifa = NULL;
2743 struct if6_iter_state *state = seq->private; 2755 struct if6_iter_state *state = seq->private;
2756 struct net *net = state->p.net;
2744 2757
2745 for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { 2758 for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
2746 ifa = inet6_addr_lst[state->bucket]; 2759 ifa = inet6_addr_lst[state->bucket];
2760
2761 while (ifa && ifa->idev->dev->nd_net != net)
2762 ifa = ifa->lst_next;
2747 if (ifa) 2763 if (ifa)
2748 break; 2764 break;
2749 } 2765 }
@@ -2753,13 +2769,22 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
2753static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa) 2769static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa)
2754{ 2770{
2755 struct if6_iter_state *state = seq->private; 2771 struct if6_iter_state *state = seq->private;
2772 struct net *net = state->p.net;
2756 2773
2757 ifa = ifa->lst_next; 2774 ifa = ifa->lst_next;
2758try_again: 2775try_again:
2776 if (ifa) {
2777 if (ifa->idev->dev->nd_net != net) {
2778 ifa = ifa->lst_next;
2779 goto try_again;
2780 }
2781 }
2782
2759 if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) { 2783 if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) {
2760 ifa = inet6_addr_lst[state->bucket]; 2784 ifa = inet6_addr_lst[state->bucket];
2761 goto try_again; 2785 goto try_again;
2762 } 2786 }
2787
2763 return ifa; 2788 return ifa;
2764} 2789}
2765 2790
@@ -2774,6 +2799,7 @@ static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
2774} 2799}
2775 2800
2776static void *if6_seq_start(struct seq_file *seq, loff_t *pos) 2801static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
2802 __acquires(addrconf_hash_lock)
2777{ 2803{
2778 read_lock_bh(&addrconf_hash_lock); 2804 read_lock_bh(&addrconf_hash_lock);
2779 return if6_get_idx(seq, *pos); 2805 return if6_get_idx(seq, *pos);
@@ -2789,6 +2815,7 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2789} 2815}
2790 2816
2791static void if6_seq_stop(struct seq_file *seq, void *v) 2817static void if6_seq_stop(struct seq_file *seq, void *v)
2818 __releases(addrconf_hash_lock)
2792{ 2819{
2793 read_unlock_bh(&addrconf_hash_lock); 2820 read_unlock_bh(&addrconf_hash_lock);
2794} 2821}
@@ -2816,8 +2843,8 @@ static const struct seq_operations if6_seq_ops = {
2816 2843
2817static int if6_seq_open(struct inode *inode, struct file *file) 2844static int if6_seq_open(struct inode *inode, struct file *file)
2818{ 2845{
2819 return seq_open_private(file, &if6_seq_ops, 2846 return seq_open_net(inode, file, &if6_seq_ops,
2820 sizeof(struct if6_iter_state)); 2847 sizeof(struct if6_iter_state));
2821} 2848}
2822 2849
2823static const struct file_operations if6_fops = { 2850static const struct file_operations if6_fops = {
@@ -2825,31 +2852,48 @@ static const struct file_operations if6_fops = {
2825 .open = if6_seq_open, 2852 .open = if6_seq_open,
2826 .read = seq_read, 2853 .read = seq_read,
2827 .llseek = seq_lseek, 2854 .llseek = seq_lseek,
2828 .release = seq_release_private, 2855 .release = seq_release_net,
2829}; 2856};
2830 2857
2831int __init if6_proc_init(void) 2858static int if6_proc_net_init(struct net *net)
2832{ 2859{
2833 if (!proc_net_fops_create(&init_net, "if_inet6", S_IRUGO, &if6_fops)) 2860 if (!proc_net_fops_create(net, "if_inet6", S_IRUGO, &if6_fops))
2834 return -ENOMEM; 2861 return -ENOMEM;
2835 return 0; 2862 return 0;
2836} 2863}
2837 2864
2865static void if6_proc_net_exit(struct net *net)
2866{
2867 proc_net_remove(net, "if_inet6");
2868}
2869
2870static struct pernet_operations if6_proc_net_ops = {
2871 .init = if6_proc_net_init,
2872 .exit = if6_proc_net_exit,
2873};
2874
2875int __init if6_proc_init(void)
2876{
2877 return register_pernet_subsys(&if6_proc_net_ops);
2878}
2879
2838void if6_proc_exit(void) 2880void if6_proc_exit(void)
2839{ 2881{
2840 proc_net_remove(&init_net, "if_inet6"); 2882 unregister_pernet_subsys(&if6_proc_net_ops);
2841} 2883}
2842#endif /* CONFIG_PROC_FS */ 2884#endif /* CONFIG_PROC_FS */
2843 2885
2844#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 2886#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
2845/* Check if address is a home address configured on any interface. */ 2887/* Check if address is a home address configured on any interface. */
2846int ipv6_chk_home_addr(struct in6_addr *addr) 2888int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
2847{ 2889{
2848 int ret = 0; 2890 int ret = 0;
2849 struct inet6_ifaddr * ifp; 2891 struct inet6_ifaddr * ifp;
2850 u8 hash = ipv6_addr_hash(addr); 2892 u8 hash = ipv6_addr_hash(addr);
2851 read_lock_bh(&addrconf_hash_lock); 2893 read_lock_bh(&addrconf_hash_lock);
2852 for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) { 2894 for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) {
2895 if (ifp->idev->dev->nd_net != net)
2896 continue;
2853 if (ipv6_addr_cmp(&ifp->addr, addr) == 0 && 2897 if (ipv6_addr_cmp(&ifp->addr, addr) == 0 &&
2854 (ifp->flags & IFA_F_HOMEADDRESS)) { 2898 (ifp->flags & IFA_F_HOMEADDRESS)) {
2855 ret = 1; 2899 ret = 1;
@@ -2997,11 +3041,15 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
2997static int 3041static int
2998inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 3042inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2999{ 3043{
3044 struct net *net = skb->sk->sk_net;
3000 struct ifaddrmsg *ifm; 3045 struct ifaddrmsg *ifm;
3001 struct nlattr *tb[IFA_MAX+1]; 3046 struct nlattr *tb[IFA_MAX+1];
3002 struct in6_addr *pfx; 3047 struct in6_addr *pfx;
3003 int err; 3048 int err;
3004 3049
3050 if (net != &init_net)
3051 return -EINVAL;
3052
3005 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); 3053 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
3006 if (err < 0) 3054 if (err < 0)
3007 return err; 3055 return err;
@@ -3054,6 +3102,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
3054static int 3102static int
3055inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 3103inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
3056{ 3104{
3105 struct net *net = skb->sk->sk_net;
3057 struct ifaddrmsg *ifm; 3106 struct ifaddrmsg *ifm;
3058 struct nlattr *tb[IFA_MAX+1]; 3107 struct nlattr *tb[IFA_MAX+1];
3059 struct in6_addr *pfx; 3108 struct in6_addr *pfx;
@@ -3063,6 +3112,9 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
3063 u8 ifa_flags; 3112 u8 ifa_flags;
3064 int err; 3113 int err;
3065 3114
3115 if (net != &init_net)
3116 return -EINVAL;
3117
3066 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); 3118 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
3067 if (err < 0) 3119 if (err < 0)
3068 return err; 3120 return err;
@@ -3090,7 +3142,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
3090 /* We ignore other flags so far. */ 3142 /* We ignore other flags so far. */
3091 ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS); 3143 ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS);
3092 3144
3093 ifa = ipv6_get_ifaddr(pfx, dev, 1); 3145 ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
3094 if (ifa == NULL) { 3146 if (ifa == NULL) {
3095 /* 3147 /*
3096 * It would be best to check for !NLM_F_CREATE here but 3148 * It would be best to check for !NLM_F_CREATE here but
@@ -3283,11 +3335,11 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
3283 ifa = ifa->if_next, ip_idx++) { 3335 ifa = ifa->if_next, ip_idx++) {
3284 if (ip_idx < s_ip_idx) 3336 if (ip_idx < s_ip_idx)
3285 continue; 3337 continue;
3286 if ((err = inet6_fill_ifaddr(skb, ifa, 3338 err = inet6_fill_ifaddr(skb, ifa,
3287 NETLINK_CB(cb->skb).pid, 3339 NETLINK_CB(cb->skb).pid,
3288 cb->nlh->nlmsg_seq, RTM_NEWADDR, 3340 cb->nlh->nlmsg_seq,
3289 NLM_F_MULTI)) <= 0) 3341 RTM_NEWADDR,
3290 goto done; 3342 NLM_F_MULTI);
3291 } 3343 }
3292 break; 3344 break;
3293 case MULTICAST_ADDR: 3345 case MULTICAST_ADDR:
@@ -3296,11 +3348,11 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
3296 ifmca = ifmca->next, ip_idx++) { 3348 ifmca = ifmca->next, ip_idx++) {
3297 if (ip_idx < s_ip_idx) 3349 if (ip_idx < s_ip_idx)
3298 continue; 3350 continue;
3299 if ((err = inet6_fill_ifmcaddr(skb, ifmca, 3351 err = inet6_fill_ifmcaddr(skb, ifmca,
3300 NETLINK_CB(cb->skb).pid, 3352 NETLINK_CB(cb->skb).pid,
3301 cb->nlh->nlmsg_seq, RTM_GETMULTICAST, 3353 cb->nlh->nlmsg_seq,
3302 NLM_F_MULTI)) <= 0) 3354 RTM_GETMULTICAST,
3303 goto done; 3355 NLM_F_MULTI);
3304 } 3356 }
3305 break; 3357 break;
3306 case ANYCAST_ADDR: 3358 case ANYCAST_ADDR:
@@ -3309,11 +3361,11 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
3309 ifaca = ifaca->aca_next, ip_idx++) { 3361 ifaca = ifaca->aca_next, ip_idx++) {
3310 if (ip_idx < s_ip_idx) 3362 if (ip_idx < s_ip_idx)
3311 continue; 3363 continue;
3312 if ((err = inet6_fill_ifacaddr(skb, ifaca, 3364 err = inet6_fill_ifacaddr(skb, ifaca,
3313 NETLINK_CB(cb->skb).pid, 3365 NETLINK_CB(cb->skb).pid,
3314 cb->nlh->nlmsg_seq, RTM_GETANYCAST, 3366 cb->nlh->nlmsg_seq,
3315 NLM_F_MULTI)) <= 0) 3367 RTM_GETANYCAST,
3316 goto done; 3368 NLM_F_MULTI);
3317 } 3369 }
3318 break; 3370 break;
3319 default: 3371 default:
@@ -3321,14 +3373,12 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
3321 } 3373 }
3322 read_unlock_bh(&idev->lock); 3374 read_unlock_bh(&idev->lock);
3323 in6_dev_put(idev); 3375 in6_dev_put(idev);
3376
3377 if (err <= 0)
3378 break;
3324cont: 3379cont:
3325 idx++; 3380 idx++;
3326 } 3381 }
3327done:
3328 if (err <= 0) {
3329 read_unlock_bh(&idev->lock);
3330 in6_dev_put(idev);
3331 }
3332 cb->args[0] = idx; 3382 cb->args[0] = idx;
3333 cb->args[1] = ip_idx; 3383 cb->args[1] = ip_idx;
3334 return skb->len; 3384 return skb->len;
@@ -3336,26 +3386,42 @@ done:
3336 3386
3337static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) 3387static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
3338{ 3388{
3389 struct net *net = skb->sk->sk_net;
3339 enum addr_type_t type = UNICAST_ADDR; 3390 enum addr_type_t type = UNICAST_ADDR;
3391
3392 if (net != &init_net)
3393 return 0;
3394
3340 return inet6_dump_addr(skb, cb, type); 3395 return inet6_dump_addr(skb, cb, type);
3341} 3396}
3342 3397
3343static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb) 3398static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb)
3344{ 3399{
3400 struct net *net = skb->sk->sk_net;
3345 enum addr_type_t type = MULTICAST_ADDR; 3401 enum addr_type_t type = MULTICAST_ADDR;
3402
3403 if (net != &init_net)
3404 return 0;
3405
3346 return inet6_dump_addr(skb, cb, type); 3406 return inet6_dump_addr(skb, cb, type);
3347} 3407}
3348 3408
3349 3409
3350static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) 3410static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
3351{ 3411{
3412 struct net *net = skb->sk->sk_net;
3352 enum addr_type_t type = ANYCAST_ADDR; 3413 enum addr_type_t type = ANYCAST_ADDR;
3414
3415 if (net != &init_net)
3416 return 0;
3417
3353 return inet6_dump_addr(skb, cb, type); 3418 return inet6_dump_addr(skb, cb, type);
3354} 3419}
3355 3420
3356static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, 3421static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
3357 void *arg) 3422 void *arg)
3358{ 3423{
3424 struct net *net = in_skb->sk->sk_net;
3359 struct ifaddrmsg *ifm; 3425 struct ifaddrmsg *ifm;
3360 struct nlattr *tb[IFA_MAX+1]; 3426 struct nlattr *tb[IFA_MAX+1];
3361 struct in6_addr *addr = NULL; 3427 struct in6_addr *addr = NULL;
@@ -3364,6 +3430,9 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
3364 struct sk_buff *skb; 3430 struct sk_buff *skb;
3365 int err; 3431 int err;
3366 3432
3433 if (net != &init_net)
3434 return -EINVAL;
3435
3367 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); 3436 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
3368 if (err < 0) 3437 if (err < 0)
3369 goto errout; 3438 goto errout;
@@ -3378,7 +3447,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
3378 if (ifm->ifa_index) 3447 if (ifm->ifa_index)
3379 dev = __dev_get_by_index(&init_net, ifm->ifa_index); 3448 dev = __dev_get_by_index(&init_net, ifm->ifa_index);
3380 3449
3381 if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) { 3450 if ((ifa = ipv6_get_ifaddr(net, addr, dev, 1)) == NULL) {
3382 err = -EADDRNOTAVAIL; 3451 err = -EADDRNOTAVAIL;
3383 goto errout; 3452 goto errout;
3384 } 3453 }
@@ -3396,7 +3465,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
3396 kfree_skb(skb); 3465 kfree_skb(skb);
3397 goto errout_ifa; 3466 goto errout_ifa;
3398 } 3467 }
3399 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); 3468 err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
3400errout_ifa: 3469errout_ifa:
3401 in6_ifa_put(ifa); 3470 in6_ifa_put(ifa);
3402errout: 3471errout:
@@ -3419,10 +3488,10 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
3419 kfree_skb(skb); 3488 kfree_skb(skb);
3420 goto errout; 3489 goto errout;
3421 } 3490 }
3422 err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); 3491 err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
3423errout: 3492errout:
3424 if (err < 0) 3493 if (err < 0)
3425 rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); 3494 rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err);
3426} 3495}
3427 3496
3428static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, 3497static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
@@ -3581,11 +3650,15 @@ nla_put_failure:
3581 3650
3582static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) 3651static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
3583{ 3652{
3653 struct net *net = skb->sk->sk_net;
3584 int idx, err; 3654 int idx, err;
3585 int s_idx = cb->args[0]; 3655 int s_idx = cb->args[0];
3586 struct net_device *dev; 3656 struct net_device *dev;
3587 struct inet6_dev *idev; 3657 struct inet6_dev *idev;
3588 3658
3659 if (net != &init_net)
3660 return 0;
3661
3589 read_lock(&dev_base_lock); 3662 read_lock(&dev_base_lock);
3590 idx = 0; 3663 idx = 0;
3591 for_each_netdev(&init_net, dev) { 3664 for_each_netdev(&init_net, dev) {
@@ -3623,10 +3696,10 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
3623 kfree_skb(skb); 3696 kfree_skb(skb);
3624 goto errout; 3697 goto errout;
3625 } 3698 }
3626 err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); 3699 err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
3627errout: 3700errout:
3628 if (err < 0) 3701 if (err < 0)
3629 rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); 3702 rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err);
3630} 3703}
3631 3704
3632static inline size_t inet6_prefix_nlmsg_size(void) 3705static inline size_t inet6_prefix_nlmsg_size(void)
@@ -3692,10 +3765,10 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
3692 kfree_skb(skb); 3765 kfree_skb(skb);
3693 goto errout; 3766 goto errout;
3694 } 3767 }
3695 err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); 3768 err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
3696errout: 3769errout:
3697 if (err < 0) 3770 if (err < 0)
3698 rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err); 3771 rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_PREFIX, err);
3699} 3772}
3700 3773
3701static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) 3774static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
@@ -3746,22 +3819,8 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
3746 3819
3747 ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 3820 ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
3748 3821
3749 if (write && valp != &ipv6_devconf_dflt.forwarding) { 3822 if (write)
3750 if (valp != &ipv6_devconf.forwarding) { 3823 addrconf_fixup_forwarding(ctl, valp, val);
3751 if ((!*valp) ^ (!val)) {
3752 struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
3753 if (idev == NULL)
3754 return ret;
3755 dev_forward_change(idev);
3756 }
3757 } else {
3758 ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding;
3759 addrconf_forward_change();
3760 }
3761 if (*valp)
3762 rt6_purge_dflt_routers();
3763 }
3764
3765 return ret; 3824 return ret;
3766} 3825}
3767 3826
@@ -3772,6 +3831,7 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table,
3772 void __user *newval, size_t newlen) 3831 void __user *newval, size_t newlen)
3773{ 3832{
3774 int *valp = table->data; 3833 int *valp = table->data;
3834 int val = *valp;
3775 int new; 3835 int new;
3776 3836
3777 if (!newval || !newlen) 3837 if (!newval || !newlen)
@@ -3796,26 +3856,8 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table,
3796 } 3856 }
3797 } 3857 }
3798 3858
3799 if (valp != &ipv6_devconf_dflt.forwarding) { 3859 *valp = new;
3800 if (valp != &ipv6_devconf.forwarding) { 3860 addrconf_fixup_forwarding(table, valp, val);
3801 struct inet6_dev *idev = (struct inet6_dev *)table->extra1;
3802 int changed;
3803 if (unlikely(idev == NULL))
3804 return -ENODEV;
3805 changed = (!*valp) ^ (!new);
3806 *valp = new;
3807 if (changed)
3808 dev_forward_change(idev);
3809 } else {
3810 *valp = new;
3811 addrconf_forward_change();
3812 }
3813
3814 if (*valp)
3815 rt6_purge_dflt_routers();
3816 } else
3817 *valp = new;
3818
3819 return 1; 3861 return 1;
3820} 3862}
3821 3863
@@ -3823,10 +3865,7 @@ static struct addrconf_sysctl_table
3823{ 3865{
3824 struct ctl_table_header *sysctl_header; 3866 struct ctl_table_header *sysctl_header;
3825 ctl_table addrconf_vars[__NET_IPV6_MAX]; 3867 ctl_table addrconf_vars[__NET_IPV6_MAX];
3826 ctl_table addrconf_dev[2]; 3868 char *dev_name;
3827 ctl_table addrconf_conf_dir[2];
3828 ctl_table addrconf_proto_dir[2];
3829 ctl_table addrconf_root_dir[2];
3830} addrconf_sysctl __read_mostly = { 3869} addrconf_sysctl __read_mostly = {
3831 .sysctl_header = NULL, 3870 .sysctl_header = NULL,
3832 .addrconf_vars = { 3871 .addrconf_vars = {
@@ -4047,72 +4086,33 @@ static struct addrconf_sysctl_table
4047 .ctl_name = 0, /* sentinel */ 4086 .ctl_name = 0, /* sentinel */
4048 } 4087 }
4049 }, 4088 },
4050 .addrconf_dev = {
4051 {
4052 .ctl_name = NET_PROTO_CONF_ALL,
4053 .procname = "all",
4054 .mode = 0555,
4055 .child = addrconf_sysctl.addrconf_vars,
4056 },
4057 {
4058 .ctl_name = 0, /* sentinel */
4059 }
4060 },
4061 .addrconf_conf_dir = {
4062 {
4063 .ctl_name = NET_IPV6_CONF,
4064 .procname = "conf",
4065 .mode = 0555,
4066 .child = addrconf_sysctl.addrconf_dev,
4067 },
4068 {
4069 .ctl_name = 0, /* sentinel */
4070 }
4071 },
4072 .addrconf_proto_dir = {
4073 {
4074 .ctl_name = NET_IPV6,
4075 .procname = "ipv6",
4076 .mode = 0555,
4077 .child = addrconf_sysctl.addrconf_conf_dir,
4078 },
4079 {
4080 .ctl_name = 0, /* sentinel */
4081 }
4082 },
4083 .addrconf_root_dir = {
4084 {
4085 .ctl_name = CTL_NET,
4086 .procname = "net",
4087 .mode = 0555,
4088 .child = addrconf_sysctl.addrconf_proto_dir,
4089 },
4090 {
4091 .ctl_name = 0, /* sentinel */
4092 }
4093 },
4094}; 4089};
4095 4090
4096static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p) 4091static int __addrconf_sysctl_register(struct net *net, char *dev_name,
4092 int ctl_name, struct inet6_dev *idev, struct ipv6_devconf *p)
4097{ 4093{
4098 int i; 4094 int i;
4099 struct net_device *dev = idev ? idev->dev : NULL;
4100 struct addrconf_sysctl_table *t; 4095 struct addrconf_sysctl_table *t;
4101 char *dev_name = NULL; 4096
4097#define ADDRCONF_CTL_PATH_DEV 3
4098
4099 struct ctl_path addrconf_ctl_path[] = {
4100 { .procname = "net", .ctl_name = CTL_NET, },
4101 { .procname = "ipv6", .ctl_name = NET_IPV6, },
4102 { .procname = "conf", .ctl_name = NET_IPV6_CONF, },
4103 { /* to be set */ },
4104 { },
4105 };
4106
4102 4107
4103 t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL); 4108 t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL);
4104 if (t == NULL) 4109 if (t == NULL)
4105 return; 4110 goto out;
4111
4106 for (i=0; t->addrconf_vars[i].data; i++) { 4112 for (i=0; t->addrconf_vars[i].data; i++) {
4107 t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf; 4113 t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
4108 t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */ 4114 t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
4109 } 4115 t->addrconf_vars[i].extra2 = net;
4110 if (dev) {
4111 dev_name = dev->name;
4112 t->addrconf_dev[0].ctl_name = dev->ifindex;
4113 } else {
4114 dev_name = "default";
4115 t->addrconf_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
4116 } 4116 }
4117 4117
4118 /* 4118 /*
@@ -4120,47 +4120,126 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf
4120 * by sysctl and we wouldn't want anyone to change it under our feet 4120 * by sysctl and we wouldn't want anyone to change it under our feet
4121 * (see SIOCSIFNAME). 4121 * (see SIOCSIFNAME).
4122 */ 4122 */
4123 dev_name = kstrdup(dev_name, GFP_KERNEL); 4123 t->dev_name = kstrdup(dev_name, GFP_KERNEL);
4124 if (!dev_name) 4124 if (!t->dev_name)
4125 goto free; 4125 goto free;
4126
4127 t->addrconf_dev[0].procname = dev_name;
4128 4126
4129 t->addrconf_dev[0].child = t->addrconf_vars; 4127 addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].procname = t->dev_name;
4130 t->addrconf_conf_dir[0].child = t->addrconf_dev; 4128 addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].ctl_name = ctl_name;
4131 t->addrconf_proto_dir[0].child = t->addrconf_conf_dir;
4132 t->addrconf_root_dir[0].child = t->addrconf_proto_dir;
4133 4129
4134 t->sysctl_header = register_sysctl_table(t->addrconf_root_dir); 4130 t->sysctl_header = register_net_sysctl_table(net, addrconf_ctl_path,
4131 t->addrconf_vars);
4135 if (t->sysctl_header == NULL) 4132 if (t->sysctl_header == NULL)
4136 goto free_procname; 4133 goto free_procname;
4137 else
4138 p->sysctl = t;
4139 return;
4140 4134
4141 /* error path */ 4135 p->sysctl = t;
4142 free_procname: 4136 return 0;
4143 kfree(dev_name); 4137
4144 free: 4138free_procname:
4139 kfree(t->dev_name);
4140free:
4145 kfree(t); 4141 kfree(t);
4142out:
4143 return -ENOBUFS;
4144}
4146 4145
4147 return; 4146static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
4147{
4148 struct addrconf_sysctl_table *t;
4149
4150 if (p->sysctl == NULL)
4151 return;
4152
4153 t = p->sysctl;
4154 p->sysctl = NULL;
4155 unregister_sysctl_table(t->sysctl_header);
4156 kfree(t->dev_name);
4157 kfree(t);
4148} 4158}
4149 4159
4150static void addrconf_sysctl_unregister(struct ipv6_devconf *p) 4160static void addrconf_sysctl_register(struct inet6_dev *idev)
4151{ 4161{
4152 if (p->sysctl) { 4162 neigh_sysctl_register(idev->dev, idev->nd_parms, NET_IPV6,
4153 struct addrconf_sysctl_table *t = p->sysctl; 4163 NET_IPV6_NEIGH, "ipv6",
4154 p->sysctl = NULL; 4164 &ndisc_ifinfo_sysctl_change,
4155 unregister_sysctl_table(t->sysctl_header); 4165 NULL);
4156 kfree(t->addrconf_dev[0].procname); 4166 __addrconf_sysctl_register(idev->dev->nd_net, idev->dev->name,
4157 kfree(t); 4167 idev->dev->ifindex, idev, &idev->cnf);
4158 } 4168}
4169
4170static void addrconf_sysctl_unregister(struct inet6_dev *idev)
4171{
4172 __addrconf_sysctl_unregister(&idev->cnf);
4173 neigh_sysctl_unregister(idev->nd_parms);
4159} 4174}
4160 4175
4161 4176
4162#endif 4177#endif
4163 4178
4179static int addrconf_init_net(struct net *net)
4180{
4181 int err;
4182 struct ipv6_devconf *all, *dflt;
4183
4184 err = -ENOMEM;
4185 all = &ipv6_devconf;
4186 dflt = &ipv6_devconf_dflt;
4187
4188 if (net != &init_net) {
4189 all = kmemdup(all, sizeof(ipv6_devconf), GFP_KERNEL);
4190 if (all == NULL)
4191 goto err_alloc_all;
4192
4193 dflt = kmemdup(dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL);
4194 if (dflt == NULL)
4195 goto err_alloc_dflt;
4196 }
4197
4198 net->ipv6.devconf_all = all;
4199 net->ipv6.devconf_dflt = dflt;
4200
4201#ifdef CONFIG_SYSCTL
4202 err = __addrconf_sysctl_register(net, "all", NET_PROTO_CONF_ALL,
4203 NULL, all);
4204 if (err < 0)
4205 goto err_reg_all;
4206
4207 err = __addrconf_sysctl_register(net, "default", NET_PROTO_CONF_DEFAULT,
4208 NULL, dflt);
4209 if (err < 0)
4210 goto err_reg_dflt;
4211#endif
4212 return 0;
4213
4214#ifdef CONFIG_SYSCTL
4215err_reg_dflt:
4216 __addrconf_sysctl_unregister(all);
4217err_reg_all:
4218 kfree(dflt);
4219#endif
4220err_alloc_dflt:
4221 kfree(all);
4222err_alloc_all:
4223 return err;
4224}
4225
4226static void addrconf_exit_net(struct net *net)
4227{
4228#ifdef CONFIG_SYSCTL
4229 __addrconf_sysctl_unregister(net->ipv6.devconf_dflt);
4230 __addrconf_sysctl_unregister(net->ipv6.devconf_all);
4231#endif
4232 if (net != &init_net) {
4233 kfree(net->ipv6.devconf_dflt);
4234 kfree(net->ipv6.devconf_all);
4235 }
4236}
4237
4238static struct pernet_operations addrconf_ops = {
4239 .init = addrconf_init_net,
4240 .exit = addrconf_exit_net,
4241};
4242
4164/* 4243/*
4165 * Device notifier 4244 * Device notifier
4166 */ 4245 */
@@ -4185,7 +4264,15 @@ EXPORT_SYMBOL(unregister_inet6addr_notifier);
4185 4264
4186int __init addrconf_init(void) 4265int __init addrconf_init(void)
4187{ 4266{
4188 int err = 0; 4267 int err;
4268
4269 if ((err = ipv6_addr_label_init()) < 0) {
4270 printk(KERN_CRIT "IPv6 Addrconf: cannot initialize default policy table: %d.\n",
4271 err);
4272 return err;
4273 }
4274
4275 register_pernet_subsys(&addrconf_ops);
4189 4276
4190 /* The addrconf netdev notifier requires that loopback_dev 4277 /* The addrconf netdev notifier requires that loopback_dev
4191 * has it's ipv6 private information allocated and setup 4278 * has it's ipv6 private information allocated and setup
@@ -4210,7 +4297,7 @@ int __init addrconf_init(void)
4210 err = -ENOMEM; 4297 err = -ENOMEM;
4211 rtnl_unlock(); 4298 rtnl_unlock();
4212 if (err) 4299 if (err)
4213 return err; 4300 goto errlo;
4214 4301
4215 ip6_null_entry.u.dst.dev = init_net.loopback_dev; 4302 ip6_null_entry.u.dst.dev = init_net.loopback_dev;
4216 ip6_null_entry.rt6i_idev = in6_dev_get(init_net.loopback_dev); 4303 ip6_null_entry.rt6i_idev = in6_dev_get(init_net.loopback_dev);
@@ -4236,20 +4323,18 @@ int __init addrconf_init(void)
4236 __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, inet6_dump_ifmcaddr); 4323 __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, inet6_dump_ifmcaddr);
4237 __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr); 4324 __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr);
4238 4325
4239#ifdef CONFIG_SYSCTL 4326 ipv6_addr_label_rtnl_register();
4240 addrconf_sysctl.sysctl_header =
4241 register_sysctl_table(addrconf_sysctl.addrconf_root_dir);
4242 addrconf_sysctl_register(NULL, &ipv6_devconf_dflt);
4243#endif
4244 4327
4245 return 0; 4328 return 0;
4246errout: 4329errout:
4247 unregister_netdevice_notifier(&ipv6_dev_notf); 4330 unregister_netdevice_notifier(&ipv6_dev_notf);
4331errlo:
4332 unregister_pernet_subsys(&addrconf_ops);
4248 4333
4249 return err; 4334 return err;
4250} 4335}
4251 4336
4252void __exit addrconf_cleanup(void) 4337void addrconf_cleanup(void)
4253{ 4338{
4254 struct net_device *dev; 4339 struct net_device *dev;
4255 struct inet6_ifaddr *ifa; 4340 struct inet6_ifaddr *ifa;
@@ -4257,10 +4342,7 @@ void __exit addrconf_cleanup(void)
4257 4342
4258 unregister_netdevice_notifier(&ipv6_dev_notf); 4343 unregister_netdevice_notifier(&ipv6_dev_notf);
4259 4344
4260#ifdef CONFIG_SYSCTL 4345 unregister_pernet_subsys(&addrconf_ops);
4261 addrconf_sysctl_unregister(&ipv6_devconf_dflt);
4262 addrconf_sysctl_unregister(&ipv6_devconf);
4263#endif
4264 4346
4265 rtnl_lock(); 4347 rtnl_lock();
4266 4348
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
new file mode 100644
index 000000000000..a3c5a72218fd
--- /dev/null
+++ b/net/ipv6/addrlabel.c
@@ -0,0 +1,561 @@
1/*
2 * IPv6 Address Label subsystem
3 * for the IPv6 "Default" Source Address Selection
4 *
5 * Copyright (C)2007 USAGI/WIDE Project
6 */
7/*
8 * Author:
9 * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org>
10 */
11
12#include <linux/kernel.h>
13#include <linux/list.h>
14#include <linux/rcupdate.h>
15#include <linux/in6.h>
16#include <net/addrconf.h>
17#include <linux/if_addrlabel.h>
18#include <linux/netlink.h>
19#include <linux/rtnetlink.h>
20
21#if 0
22#define ADDRLABEL(x...) printk(x)
23#else
24#define ADDRLABEL(x...) do { ; } while(0)
25#endif
26
27/*
28 * Policy Table
29 */
30struct ip6addrlbl_entry
31{
32 struct in6_addr prefix;
33 int prefixlen;
34 int ifindex;
35 int addrtype;
36 u32 label;
37 struct hlist_node list;
38 atomic_t refcnt;
39 struct rcu_head rcu;
40};
41
42static struct ip6addrlbl_table
43{
44 struct hlist_head head;
45 spinlock_t lock;
46 u32 seq;
47} ip6addrlbl_table;
48
49/*
50 * Default policy table (RFC3484 + extensions)
51 *
52 * prefix addr_type label
53 * -------------------------------------------------------------------------
54 * ::1/128 LOOPBACK 0
55 * ::/0 N/A 1
56 * 2002::/16 N/A 2
57 * ::/96 COMPATv4 3
58 * ::ffff:0:0/96 V4MAPPED 4
59 * fc00::/7 N/A 5 ULA (RFC 4193)
60 * 2001::/32 N/A 6 Teredo (RFC 4380)
61 *
62 * Note: 0xffffffff is used if we do not have any policies.
63 */
64
65#define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL
66
67static const __initdata struct ip6addrlbl_init_table
68{
69 const struct in6_addr *prefix;
70 int prefixlen;
71 u32 label;
72} ip6addrlbl_init_table[] = {
73 { /* ::/0 */
74 .prefix = &in6addr_any,
75 .label = 1,
76 },{ /* fc00::/7 */
77 .prefix = &(struct in6_addr){{{ 0xfc }}},
78 .prefixlen = 7,
79 .label = 5,
80 },{ /* 2002::/16 */
81 .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}},
82 .prefixlen = 16,
83 .label = 2,
84 },{ /* 2001::/32 */
85 .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}},
86 .prefixlen = 32,
87 .label = 6,
88 },{ /* ::ffff:0:0 */
89 .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}},
90 .prefixlen = 96,
91 .label = 4,
92 },{ /* ::/96 */
93 .prefix = &in6addr_any,
94 .prefixlen = 96,
95 .label = 3,
96 },{ /* ::1/128 */
97 .prefix = &in6addr_loopback,
98 .prefixlen = 128,
99 .label = 0,
100 }
101};
102
103/* Object management */
104static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p)
105{
106 kfree(p);
107}
108
109static void ip6addrlbl_free_rcu(struct rcu_head *h)
110{
111 ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu));
112}
113
114static inline int ip6addrlbl_hold(struct ip6addrlbl_entry *p)
115{
116 return atomic_inc_not_zero(&p->refcnt);
117}
118
119static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
120{
121 if (atomic_dec_and_test(&p->refcnt))
122 call_rcu(&p->rcu, ip6addrlbl_free_rcu);
123}
124
125/* Find label */
126static int __ip6addrlbl_match(struct ip6addrlbl_entry *p,
127 const struct in6_addr *addr,
128 int addrtype, int ifindex)
129{
130 if (p->ifindex && p->ifindex != ifindex)
131 return 0;
132 if (p->addrtype && p->addrtype != addrtype)
133 return 0;
134 if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen))
135 return 0;
136 return 1;
137}
138
139static struct ip6addrlbl_entry *__ipv6_addr_label(const struct in6_addr *addr,
140 int type, int ifindex)
141{
142 struct hlist_node *pos;
143 struct ip6addrlbl_entry *p;
144 hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
145 if (__ip6addrlbl_match(p, addr, type, ifindex))
146 return p;
147 }
148 return NULL;
149}
150
151u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex)
152{
153 u32 label;
154 struct ip6addrlbl_entry *p;
155
156 type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK;
157
158 rcu_read_lock();
159 p = __ipv6_addr_label(addr, type, ifindex);
160 label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT;
161 rcu_read_unlock();
162
163 ADDRLABEL(KERN_DEBUG "%s(addr=" NIP6_FMT ", type=%d, ifindex=%d) => %08x\n",
164 __FUNCTION__,
165 NIP6(*addr), type, ifindex,
166 label);
167
168 return label;
169}
170
171/* allocate one entry */
172static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
173 int prefixlen, int ifindex,
174 u32 label)
175{
176 struct ip6addrlbl_entry *newp;
177 int addrtype;
178
179 ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u)\n",
180 __FUNCTION__,
181 NIP6(*prefix), prefixlen,
182 ifindex,
183 (unsigned int)label);
184
185 addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK);
186
187 switch (addrtype) {
188 case IPV6_ADDR_MAPPED:
189 if (prefixlen > 96)
190 return ERR_PTR(-EINVAL);
191 if (prefixlen < 96)
192 addrtype = 0;
193 break;
194 case IPV6_ADDR_COMPATv4:
195 if (prefixlen != 96)
196 addrtype = 0;
197 break;
198 case IPV6_ADDR_LOOPBACK:
199 if (prefixlen != 128)
200 addrtype = 0;
201 break;
202 }
203
204 newp = kmalloc(sizeof(*newp), GFP_KERNEL);
205 if (!newp)
206 return ERR_PTR(-ENOMEM);
207
208 ipv6_addr_prefix(&newp->prefix, prefix, prefixlen);
209 newp->prefixlen = prefixlen;
210 newp->ifindex = ifindex;
211 newp->addrtype = addrtype;
212 newp->label = label;
213 INIT_HLIST_NODE(&newp->list);
214 atomic_set(&newp->refcnt, 1);
215 return newp;
216}
217
218/* add a label */
219static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
220{
221 int ret = 0;
222
223 ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n",
224 __FUNCTION__,
225 newp, replace);
226
227 if (hlist_empty(&ip6addrlbl_table.head)) {
228 hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
229 } else {
230 struct hlist_node *pos, *n;
231 struct ip6addrlbl_entry *p = NULL;
232 hlist_for_each_entry_safe(p, pos, n,
233 &ip6addrlbl_table.head, list) {
234 if (p->prefixlen == newp->prefixlen &&
235 p->ifindex == newp->ifindex &&
236 ipv6_addr_equal(&p->prefix, &newp->prefix)) {
237 if (!replace) {
238 ret = -EEXIST;
239 goto out;
240 }
241 hlist_replace_rcu(&p->list, &newp->list);
242 ip6addrlbl_put(p);
243 goto out;
244 } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
245 (p->prefixlen < newp->prefixlen)) {
246 hlist_add_before_rcu(&newp->list, &p->list);
247 goto out;
248 }
249 }
250 hlist_add_after_rcu(&p->list, &newp->list);
251 }
252out:
253 if (!ret)
254 ip6addrlbl_table.seq++;
255 return ret;
256}
257
258/* add a label */
259static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen,
260 int ifindex, u32 label, int replace)
261{
262 struct ip6addrlbl_entry *newp;
263 int ret = 0;
264
265 ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n",
266 __FUNCTION__,
267 NIP6(*prefix), prefixlen,
268 ifindex,
269 (unsigned int)label,
270 replace);
271
272 newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label);
273 if (IS_ERR(newp))
274 return PTR_ERR(newp);
275 spin_lock(&ip6addrlbl_table.lock);
276 ret = __ip6addrlbl_add(newp, replace);
277 spin_unlock(&ip6addrlbl_table.lock);
278 if (ret)
279 ip6addrlbl_free(newp);
280 return ret;
281}
282
283/* remove a label */
284static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
285 int ifindex)
286{
287 struct ip6addrlbl_entry *p = NULL;
288 struct hlist_node *pos, *n;
289 int ret = -ESRCH;
290
291 ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n",
292 __FUNCTION__,
293 NIP6(*prefix), prefixlen,
294 ifindex);
295
296 hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) {
297 if (p->prefixlen == prefixlen &&
298 p->ifindex == ifindex &&
299 ipv6_addr_equal(&p->prefix, prefix)) {
300 hlist_del_rcu(&p->list);
301 ip6addrlbl_put(p);
302 ret = 0;
303 break;
304 }
305 }
306 return ret;
307}
308
309static int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
310 int ifindex)
311{
312 struct in6_addr prefix_buf;
313 int ret;
314
315 ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n",
316 __FUNCTION__,
317 NIP6(*prefix), prefixlen,
318 ifindex);
319
320 ipv6_addr_prefix(&prefix_buf, prefix, prefixlen);
321 spin_lock(&ip6addrlbl_table.lock);
322 ret = __ip6addrlbl_del(&prefix_buf, prefixlen, ifindex);
323 spin_unlock(&ip6addrlbl_table.lock);
324 return ret;
325}
326
327/* add default label */
328static __init int ip6addrlbl_init(void)
329{
330 int err = 0;
331 int i;
332
333 ADDRLABEL(KERN_DEBUG "%s()\n", __FUNCTION__);
334
335 for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
336 int ret = ip6addrlbl_add(ip6addrlbl_init_table[i].prefix,
337 ip6addrlbl_init_table[i].prefixlen,
338 0,
339 ip6addrlbl_init_table[i].label, 0);
340 /* XXX: should we free all rules when we catch an error? */
341 if (ret && (!err || err != -ENOMEM))
342 err = ret;
343 }
344 return err;
345}
346
347int __init ipv6_addr_label_init(void)
348{
349 spin_lock_init(&ip6addrlbl_table.lock);
350
351 return ip6addrlbl_init();
352}
353
354static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
355 [IFAL_ADDRESS] = { .len = sizeof(struct in6_addr), },
356 [IFAL_LABEL] = { .len = sizeof(u32), },
357};
358
359static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
360 void *arg)
361{
362 struct net *net = skb->sk->sk_net;
363 struct ifaddrlblmsg *ifal;
364 struct nlattr *tb[IFAL_MAX+1];
365 struct in6_addr *pfx;
366 u32 label;
367 int err = 0;
368
369 if (net != &init_net)
370 return 0;
371
372 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
373 if (err < 0)
374 return err;
375
376 ifal = nlmsg_data(nlh);
377
378 if (ifal->ifal_family != AF_INET6 ||
379 ifal->ifal_prefixlen > 128)
380 return -EINVAL;
381
382 if (ifal->ifal_index &&
383 !__dev_get_by_index(&init_net, ifal->ifal_index))
384 return -EINVAL;
385
386 if (!tb[IFAL_ADDRESS])
387 return -EINVAL;
388
389 pfx = nla_data(tb[IFAL_ADDRESS]);
390 if (!pfx)
391 return -EINVAL;
392
393 if (!tb[IFAL_LABEL])
394 return -EINVAL;
395 label = nla_get_u32(tb[IFAL_LABEL]);
396 if (label == IPV6_ADDR_LABEL_DEFAULT)
397 return -EINVAL;
398
399 switch(nlh->nlmsg_type) {
400 case RTM_NEWADDRLABEL:
401 err = ip6addrlbl_add(pfx, ifal->ifal_prefixlen,
402 ifal->ifal_index, label,
403 nlh->nlmsg_flags & NLM_F_REPLACE);
404 break;
405 case RTM_DELADDRLABEL:
406 err = ip6addrlbl_del(pfx, ifal->ifal_prefixlen,
407 ifal->ifal_index);
408 break;
409 default:
410 err = -EOPNOTSUPP;
411 }
412 return err;
413}
414
415static inline void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
416 int prefixlen, int ifindex, u32 lseq)
417{
418 struct ifaddrlblmsg *ifal = nlmsg_data(nlh);
419 ifal->ifal_family = AF_INET6;
420 ifal->ifal_prefixlen = prefixlen;
421 ifal->ifal_flags = 0;
422 ifal->ifal_index = ifindex;
423 ifal->ifal_seq = lseq;
424};
425
426static int ip6addrlbl_fill(struct sk_buff *skb,
427 struct ip6addrlbl_entry *p,
428 u32 lseq,
429 u32 pid, u32 seq, int event,
430 unsigned int flags)
431{
432 struct nlmsghdr *nlh = nlmsg_put(skb, pid, seq, event,
433 sizeof(struct ifaddrlblmsg), flags);
434 if (!nlh)
435 return -EMSGSIZE;
436
437 ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq);
438
439 if (nla_put(skb, IFAL_ADDRESS, 16, &p->prefix) < 0 ||
440 nla_put_u32(skb, IFAL_LABEL, p->label) < 0) {
441 nlmsg_cancel(skb, nlh);
442 return -EMSGSIZE;
443 }
444
445 return nlmsg_end(skb, nlh);
446}
447
448static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
449{
450 struct net *net = skb->sk->sk_net;
451 struct ip6addrlbl_entry *p;
452 struct hlist_node *pos;
453 int idx = 0, s_idx = cb->args[0];
454 int err;
455
456 if (net != &init_net)
457 return 0;
458
459 rcu_read_lock();
460 hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
461 if (idx >= s_idx) {
462 if ((err = ip6addrlbl_fill(skb, p,
463 ip6addrlbl_table.seq,
464 NETLINK_CB(cb->skb).pid,
465 cb->nlh->nlmsg_seq,
466 RTM_NEWADDRLABEL,
467 NLM_F_MULTI)) <= 0)
468 break;
469 }
470 idx++;
471 }
472 rcu_read_unlock();
473 cb->args[0] = idx;
474 return skb->len;
475}
476
477static inline int ip6addrlbl_msgsize(void)
478{
479 return (NLMSG_ALIGN(sizeof(struct ifaddrlblmsg))
480 + nla_total_size(16) /* IFAL_ADDRESS */
481 + nla_total_size(4) /* IFAL_LABEL */
482 );
483}
484
485static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
486 void *arg)
487{
488 struct net *net = in_skb->sk->sk_net;
489 struct ifaddrlblmsg *ifal;
490 struct nlattr *tb[IFAL_MAX+1];
491 struct in6_addr *addr;
492 u32 lseq;
493 int err = 0;
494 struct ip6addrlbl_entry *p;
495 struct sk_buff *skb;
496
497 if (net != &init_net)
498 return 0;
499
500 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
501 if (err < 0)
502 return err;
503
504 ifal = nlmsg_data(nlh);
505
506 if (ifal->ifal_family != AF_INET6 ||
507 ifal->ifal_prefixlen != 128)
508 return -EINVAL;
509
510 if (ifal->ifal_index &&
511 !__dev_get_by_index(&init_net, ifal->ifal_index))
512 return -EINVAL;
513
514 if (!tb[IFAL_ADDRESS])
515 return -EINVAL;
516
517 addr = nla_data(tb[IFAL_ADDRESS]);
518 if (!addr)
519 return -EINVAL;
520
521 rcu_read_lock();
522 p = __ipv6_addr_label(addr, ipv6_addr_type(addr), ifal->ifal_index);
523 if (p && ip6addrlbl_hold(p))
524 p = NULL;
525 lseq = ip6addrlbl_table.seq;
526 rcu_read_unlock();
527
528 if (!p) {
529 err = -ESRCH;
530 goto out;
531 }
532
533 if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) {
534 ip6addrlbl_put(p);
535 return -ENOBUFS;
536 }
537
538 err = ip6addrlbl_fill(skb, p, lseq,
539 NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
540 RTM_NEWADDRLABEL, 0);
541
542 ip6addrlbl_put(p);
543
544 if (err < 0) {
545 WARN_ON(err == -EMSGSIZE);
546 kfree_skb(skb);
547 goto out;
548 }
549
550 err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
551out:
552 return err;
553}
554
555void __init ipv6_addr_label_rtnl_register(void)
556{
557 __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, NULL);
558 __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, NULL);
559 __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, ip6addrlbl_dump);
560}
561
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index ecbd38894fdd..bddac0e8780f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -66,9 +66,7 @@ MODULE_AUTHOR("Cast of dozens");
66MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); 66MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
67MODULE_LICENSE("GPL"); 67MODULE_LICENSE("GPL");
68 68
69int sysctl_ipv6_bindv6only __read_mostly; 69/* The inetsw6 table contains everything that inet6_create needs to
70
71/* The inetsw table contains everything that inet_create needs to
72 * build a new socket. 70 * build a new socket.
73 */ 71 */
74static struct list_head inetsw6[SOCK_MAX]; 72static struct list_head inetsw6[SOCK_MAX];
@@ -193,7 +191,7 @@ lookup_protocol:
193 np->mcast_hops = -1; 191 np->mcast_hops = -1;
194 np->mc_loop = 1; 192 np->mc_loop = 1;
195 np->pmtudisc = IPV6_PMTUDISC_WANT; 193 np->pmtudisc = IPV6_PMTUDISC_WANT;
196 np->ipv6only = sysctl_ipv6_bindv6only; 194 np->ipv6only = init_net.ipv6.sysctl.bindv6only;
197 195
198 /* Init the ipv4 part of the socket since we can have sockets 196 /* Init the ipv4 part of the socket since we can have sockets
199 * using v6 API for ipv4. 197 * using v6 API for ipv4.
@@ -280,7 +278,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
280 /* Check if the address belongs to the host. */ 278 /* Check if the address belongs to the host. */
281 if (addr_type == IPV6_ADDR_MAPPED) { 279 if (addr_type == IPV6_ADDR_MAPPED) {
282 v4addr = addr->sin6_addr.s6_addr32[3]; 280 v4addr = addr->sin6_addr.s6_addr32[3];
283 if (inet_addr_type(v4addr) != RTN_LOCAL) { 281 if (inet_addr_type(&init_net, v4addr) != RTN_LOCAL) {
284 err = -EADDRNOTAVAIL; 282 err = -EADDRNOTAVAIL;
285 goto out; 283 goto out;
286 } 284 }
@@ -314,7 +312,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
314 */ 312 */
315 v4addr = LOOPBACK4_IPV6; 313 v4addr = LOOPBACK4_IPV6;
316 if (!(addr_type & IPV6_ADDR_MULTICAST)) { 314 if (!(addr_type & IPV6_ADDR_MULTICAST)) {
317 if (!ipv6_chk_addr(&addr->sin6_addr, dev, 0)) { 315 if (!ipv6_chk_addr(&init_net, &addr->sin6_addr,
316 dev, 0)) {
318 if (dev) 317 if (dev)
319 dev_put(dev); 318 dev_put(dev);
320 err = -EADDRNOTAVAIL; 319 err = -EADDRNOTAVAIL;
@@ -491,6 +490,7 @@ const struct proto_ops inet6_stream_ops = {
491 .recvmsg = sock_common_recvmsg, /* ok */ 490 .recvmsg = sock_common_recvmsg, /* ok */
492 .mmap = sock_no_mmap, 491 .mmap = sock_no_mmap,
493 .sendpage = tcp_sendpage, 492 .sendpage = tcp_sendpage,
493 .splice_read = tcp_splice_read,
494#ifdef CONFIG_COMPAT 494#ifdef CONFIG_COMPAT
495 .compat_setsockopt = compat_sock_common_setsockopt, 495 .compat_setsockopt = compat_sock_common_setsockopt,
496 .compat_getsockopt = compat_sock_common_getsockopt, 496 .compat_getsockopt = compat_sock_common_getsockopt,
@@ -528,57 +528,23 @@ static struct net_proto_family inet6_family_ops = {
528 .owner = THIS_MODULE, 528 .owner = THIS_MODULE,
529}; 529};
530 530
531/* Same as inet6_dgram_ops, sans udp_poll. */ 531int inet6_register_protosw(struct inet_protosw *p)
532static const struct proto_ops inet6_sockraw_ops = {
533 .family = PF_INET6,
534 .owner = THIS_MODULE,
535 .release = inet6_release,
536 .bind = inet6_bind,
537 .connect = inet_dgram_connect, /* ok */
538 .socketpair = sock_no_socketpair, /* a do nothing */
539 .accept = sock_no_accept, /* a do nothing */
540 .getname = inet6_getname,
541 .poll = datagram_poll, /* ok */
542 .ioctl = inet6_ioctl, /* must change */
543 .listen = sock_no_listen, /* ok */
544 .shutdown = inet_shutdown, /* ok */
545 .setsockopt = sock_common_setsockopt, /* ok */
546 .getsockopt = sock_common_getsockopt, /* ok */
547 .sendmsg = inet_sendmsg, /* ok */
548 .recvmsg = sock_common_recvmsg, /* ok */
549 .mmap = sock_no_mmap,
550 .sendpage = sock_no_sendpage,
551#ifdef CONFIG_COMPAT
552 .compat_setsockopt = compat_sock_common_setsockopt,
553 .compat_getsockopt = compat_sock_common_getsockopt,
554#endif
555};
556
557static struct inet_protosw rawv6_protosw = {
558 .type = SOCK_RAW,
559 .protocol = IPPROTO_IP, /* wild card */
560 .prot = &rawv6_prot,
561 .ops = &inet6_sockraw_ops,
562 .capability = CAP_NET_RAW,
563 .no_check = UDP_CSUM_DEFAULT,
564 .flags = INET_PROTOSW_REUSE,
565};
566
567void
568inet6_register_protosw(struct inet_protosw *p)
569{ 532{
570 struct list_head *lh; 533 struct list_head *lh;
571 struct inet_protosw *answer; 534 struct inet_protosw *answer;
572 int protocol = p->protocol;
573 struct list_head *last_perm; 535 struct list_head *last_perm;
536 int protocol = p->protocol;
537 int ret;
574 538
575 spin_lock_bh(&inetsw6_lock); 539 spin_lock_bh(&inetsw6_lock);
576 540
541 ret = -EINVAL;
577 if (p->type >= SOCK_MAX) 542 if (p->type >= SOCK_MAX)
578 goto out_illegal; 543 goto out_illegal;
579 544
580 /* If we are trying to override a permanent protocol, bail. */ 545 /* If we are trying to override a permanent protocol, bail. */
581 answer = NULL; 546 answer = NULL;
547 ret = -EPERM;
582 last_perm = &inetsw6[p->type]; 548 last_perm = &inetsw6[p->type];
583 list_for_each(lh, &inetsw6[p->type]) { 549 list_for_each(lh, &inetsw6[p->type]) {
584 answer = list_entry(lh, struct inet_protosw, list); 550 answer = list_entry(lh, struct inet_protosw, list);
@@ -602,9 +568,10 @@ inet6_register_protosw(struct inet_protosw *p)
602 * system automatically returns to the old behavior. 568 * system automatically returns to the old behavior.
603 */ 569 */
604 list_add_rcu(&p->list, last_perm); 570 list_add_rcu(&p->list, last_perm);
571 ret = 0;
605out: 572out:
606 spin_unlock_bh(&inetsw6_lock); 573 spin_unlock_bh(&inetsw6_lock);
607 return; 574 return ret;
608 575
609out_permanent: 576out_permanent:
610 printk(KERN_ERR "Attempt to override permanent protocol %d.\n", 577 printk(KERN_ERR "Attempt to override permanent protocol %d.\n",
@@ -713,20 +680,19 @@ EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
713 680
714static int __init init_ipv6_mibs(void) 681static int __init init_ipv6_mibs(void)
715{ 682{
716 if (snmp_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib), 683 if (snmp_mib_init((void **)ipv6_statistics,
717 __alignof__(struct ipstats_mib)) < 0) 684 sizeof(struct ipstats_mib)) < 0)
718 goto err_ip_mib; 685 goto err_ip_mib;
719 if (snmp_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib), 686 if (snmp_mib_init((void **)icmpv6_statistics,
720 __alignof__(struct icmpv6_mib)) < 0) 687 sizeof(struct icmpv6_mib)) < 0)
721 goto err_icmp_mib; 688 goto err_icmp_mib;
722 if (snmp_mib_init((void **)icmpv6msg_statistics, 689 if (snmp_mib_init((void **)icmpv6msg_statistics,
723 sizeof (struct icmpv6msg_mib), __alignof__(struct icmpv6_mib)) < 0) 690 sizeof(struct icmpv6msg_mib)) < 0)
724 goto err_icmpmsg_mib; 691 goto err_icmpmsg_mib;
725 if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib), 692 if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib)) < 0)
726 __alignof__(struct udp_mib)) < 0)
727 goto err_udp_mib; 693 goto err_udp_mib;
728 if (snmp_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib), 694 if (snmp_mib_init((void **)udplite_stats_in6,
729 __alignof__(struct udp_mib)) < 0) 695 sizeof (struct udp_mib)) < 0)
730 goto err_udplite_mib; 696 goto err_udplite_mib;
731 return 0; 697 return 0;
732 698
@@ -752,6 +718,32 @@ static void cleanup_ipv6_mibs(void)
752 snmp_mib_free((void **)udplite_stats_in6); 718 snmp_mib_free((void **)udplite_stats_in6);
753} 719}
754 720
721static int inet6_net_init(struct net *net)
722{
723 net->ipv6.sysctl.bindv6only = 0;
724 net->ipv6.sysctl.flush_delay = 0;
725 net->ipv6.sysctl.ip6_rt_max_size = 4096;
726 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
727 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
728 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
729 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
730 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
731 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
732 net->ipv6.sysctl.icmpv6_time = 1*HZ;
733
734 return 0;
735}
736
737static void inet6_net_exit(struct net *net)
738{
739 return;
740}
741
742static struct pernet_operations inet6_net_ops = {
743 .init = inet6_net_init,
744 .exit = inet6_net_exit,
745};
746
755static int __init inet6_init(void) 747static int __init inet6_init(void)
756{ 748{
757 struct sk_buff *dummy_skb; 749 struct sk_buff *dummy_skb;
@@ -768,7 +760,6 @@ static int __init inet6_init(void)
768 __this_module.can_unload = &ipv6_unload; 760 __this_module.can_unload = &ipv6_unload;
769#endif 761#endif
770#endif 762#endif
771
772 err = proto_register(&tcpv6_prot, 1); 763 err = proto_register(&tcpv6_prot, 1);
773 if (err) 764 if (err)
774 goto out; 765 goto out;
@@ -793,14 +784,16 @@ static int __init inet6_init(void)
793 /* We MUST register RAW sockets before we create the ICMP6, 784 /* We MUST register RAW sockets before we create the ICMP6,
794 * IGMP6, or NDISC control sockets. 785 * IGMP6, or NDISC control sockets.
795 */ 786 */
796 inet6_register_protosw(&rawv6_protosw); 787 err = rawv6_init();
788 if (err)
789 goto out_unregister_raw_proto;
797 790
798 /* Register the family here so that the init calls below will 791 /* Register the family here so that the init calls below will
799 * be able to create sockets. (?? is this dangerous ??) 792 * be able to create sockets. (?? is this dangerous ??)
800 */ 793 */
801 err = sock_register(&inet6_family_ops); 794 err = sock_register(&inet6_family_ops);
802 if (err) 795 if (err)
803 goto out_unregister_raw_proto; 796 goto out_sock_register_fail;
804 797
805 /* Initialise ipv6 mibs */ 798 /* Initialise ipv6 mibs */
806 err = init_ipv6_mibs(); 799 err = init_ipv6_mibs();
@@ -814,8 +807,14 @@ static int __init inet6_init(void)
814 * able to communicate via both network protocols. 807 * able to communicate via both network protocols.
815 */ 808 */
816 809
810 err = register_pernet_subsys(&inet6_net_ops);
811 if (err)
812 goto register_pernet_fail;
813
817#ifdef CONFIG_SYSCTL 814#ifdef CONFIG_SYSCTL
818 ipv6_sysctl_register(); 815 err = ipv6_sysctl_register();
816 if (err)
817 goto sysctl_fail;
819#endif 818#endif
820 err = icmpv6_init(&inet6_family_ops); 819 err = icmpv6_init(&inet6_family_ops);
821 if (err) 820 if (err)
@@ -848,31 +847,61 @@ static int __init inet6_init(void)
848 if (if6_proc_init()) 847 if (if6_proc_init())
849 goto proc_if6_fail; 848 goto proc_if6_fail;
850#endif 849#endif
851 ip6_route_init(); 850 err = ip6_route_init();
852 ip6_flowlabel_init(); 851 if (err)
852 goto ip6_route_fail;
853 err = ip6_flowlabel_init();
854 if (err)
855 goto ip6_flowlabel_fail;
853 err = addrconf_init(); 856 err = addrconf_init();
854 if (err) 857 if (err)
855 goto addrconf_fail; 858 goto addrconf_fail;
856 859
857 /* Init v6 extension headers. */ 860 /* Init v6 extension headers. */
858 ipv6_rthdr_init(); 861 err = ipv6_exthdrs_init();
859 ipv6_frag_init(); 862 if (err)
860 ipv6_nodata_init(); 863 goto ipv6_exthdrs_fail;
861 ipv6_destopt_init(); 864
865 err = ipv6_frag_init();
866 if (err)
867 goto ipv6_frag_fail;
862 868
863 /* Init v6 transport protocols. */ 869 /* Init v6 transport protocols. */
864 udpv6_init(); 870 err = udpv6_init();
865 udplitev6_init(); 871 if (err)
866 tcpv6_init(); 872 goto udpv6_fail;
867 873
868 ipv6_packet_init(); 874 err = udplitev6_init();
869 err = 0; 875 if (err)
876 goto udplitev6_fail;
877
878 err = tcpv6_init();
879 if (err)
880 goto tcpv6_fail;
881
882 err = ipv6_packet_init();
883 if (err)
884 goto ipv6_packet_fail;
870out: 885out:
871 return err; 886 return err;
872 887
888ipv6_packet_fail:
889 tcpv6_exit();
890tcpv6_fail:
891 udplitev6_exit();
892udplitev6_fail:
893 udpv6_exit();
894udpv6_fail:
895 ipv6_frag_exit();
896ipv6_frag_fail:
897 ipv6_exthdrs_exit();
898ipv6_exthdrs_fail:
899 addrconf_cleanup();
873addrconf_fail: 900addrconf_fail:
874 ip6_flowlabel_cleanup(); 901 ip6_flowlabel_cleanup();
902ip6_flowlabel_fail:
875 ip6_route_cleanup(); 903 ip6_route_cleanup();
904ip6_route_fail:
876#ifdef CONFIG_PROC_FS 905#ifdef CONFIG_PROC_FS
877 if6_proc_exit(); 906 if6_proc_exit();
878proc_if6_fail: 907proc_if6_fail:
@@ -899,10 +928,16 @@ ndisc_fail:
899icmp_fail: 928icmp_fail:
900#ifdef CONFIG_SYSCTL 929#ifdef CONFIG_SYSCTL
901 ipv6_sysctl_unregister(); 930 ipv6_sysctl_unregister();
931sysctl_fail:
902#endif 932#endif
933 unregister_pernet_subsys(&inet6_net_ops);
934register_pernet_fail:
903 cleanup_ipv6_mibs(); 935 cleanup_ipv6_mibs();
904out_unregister_sock: 936out_unregister_sock:
905 sock_unregister(PF_INET6); 937 sock_unregister(PF_INET6);
938 rtnl_unregister_all(PF_INET6);
939out_sock_register_fail:
940 rawv6_exit();
906out_unregister_raw_proto: 941out_unregister_raw_proto:
907 proto_unregister(&rawv6_prot); 942 proto_unregister(&rawv6_prot);
908out_unregister_udplite_proto: 943out_unregister_udplite_proto:
@@ -922,9 +957,14 @@ static void __exit inet6_exit(void)
922 /* Disallow any further netlink messages */ 957 /* Disallow any further netlink messages */
923 rtnl_unregister_all(PF_INET6); 958 rtnl_unregister_all(PF_INET6);
924 959
960 udpv6_exit();
961 udplitev6_exit();
962 tcpv6_exit();
963
925 /* Cleanup code parts. */ 964 /* Cleanup code parts. */
926 ipv6_packet_cleanup(); 965 ipv6_packet_cleanup();
927 966 ipv6_frag_exit();
967 ipv6_exthdrs_exit();
928 addrconf_cleanup(); 968 addrconf_cleanup();
929 ip6_flowlabel_cleanup(); 969 ip6_flowlabel_cleanup();
930 ip6_route_cleanup(); 970 ip6_route_cleanup();
@@ -943,9 +983,11 @@ static void __exit inet6_exit(void)
943 igmp6_cleanup(); 983 igmp6_cleanup();
944 ndisc_cleanup(); 984 ndisc_cleanup();
945 icmpv6_cleanup(); 985 icmpv6_cleanup();
986 rawv6_exit();
946#ifdef CONFIG_SYSCTL 987#ifdef CONFIG_SYSCTL
947 ipv6_sysctl_unregister(); 988 ipv6_sysctl_unregister();
948#endif 989#endif
990 unregister_pernet_subsys(&inet6_net_ops);
949 cleanup_ipv6_mibs(); 991 cleanup_ipv6_mibs();
950 proto_unregister(&rawv6_prot); 992 proto_unregister(&rawv6_prot);
951 proto_unregister(&udplitev6_prot); 993 proto_unregister(&udplitev6_prot);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 4eaf55072b1b..fb0d07a15e93 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -370,6 +370,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
370 ip6h->flow_lbl[2] = 0; 370 ip6h->flow_lbl[2] = 0;
371 ip6h->hop_limit = 0; 371 ip6h->hop_limit = 0;
372 372
373 spin_lock(&x->lock);
373 { 374 {
374 u8 auth_data[MAX_AH_AUTH_LEN]; 375 u8 auth_data[MAX_AH_AUTH_LEN];
375 376
@@ -378,14 +379,15 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
378 skb_push(skb, hdr_len); 379 skb_push(skb, hdr_len);
379 err = ah_mac_digest(ahp, skb, ah->auth_data); 380 err = ah_mac_digest(ahp, skb, ah->auth_data);
380 if (err) 381 if (err)
381 goto free_out; 382 goto unlock;
382 err = -EINVAL; 383 if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len))
383 if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) { 384 err = -EBADMSG;
384 LIMIT_NETDEBUG(KERN_WARNING "ipsec ah authentication error\n");
385 x->stats.integrity_failed++;
386 goto free_out;
387 }
388 } 385 }
386unlock:
387 spin_unlock(&x->lock);
388
389 if (err)
390 goto free_out;
389 391
390 skb->network_header += ah_hlen; 392 skb->network_header += ah_hlen;
391 memcpy(skb_network_header(skb), tmp_hdr, hdr_len); 393 memcpy(skb_network_header(skb), tmp_hdr, hdr_len);
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index f915c4df9820..9c7f83fbc3a1 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -89,7 +89,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
89 return -EPERM; 89 return -EPERM;
90 if (ipv6_addr_is_multicast(addr)) 90 if (ipv6_addr_is_multicast(addr))
91 return -EINVAL; 91 return -EINVAL;
92 if (ipv6_chk_addr(addr, NULL, 0)) 92 if (ipv6_chk_addr(&init_net, addr, NULL, 0))
93 return -EINVAL; 93 return -EINVAL;
94 94
95 pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); 95 pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
@@ -504,6 +504,7 @@ static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos)
504} 504}
505 505
506static void *ac6_seq_start(struct seq_file *seq, loff_t *pos) 506static void *ac6_seq_start(struct seq_file *seq, loff_t *pos)
507 __acquires(dev_base_lock)
507{ 508{
508 read_lock(&dev_base_lock); 509 read_lock(&dev_base_lock);
509 return ac6_get_idx(seq, *pos); 510 return ac6_get_idx(seq, *pos);
@@ -518,6 +519,7 @@ static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
518} 519}
519 520
520static void ac6_seq_stop(struct seq_file *seq, void *v) 521static void ac6_seq_stop(struct seq_file *seq, void *v)
522 __releases(dev_base_lock)
521{ 523{
522 struct ac6_iter_state *state = ac6_seq_private(seq); 524 struct ac6_iter_state *state = ac6_seq_private(seq);
523 if (likely(state->idev != NULL)) { 525 if (likely(state->idev != NULL)) {
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 5d4245ab4183..94fa6ae77cfe 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -177,7 +177,7 @@ ipv4_connected:
177 if (final_p) 177 if (final_p)
178 ipv6_addr_copy(&fl.fl6_dst, final_p); 178 ipv6_addr_copy(&fl.fl6_dst, final_p);
179 179
180 if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) { 180 if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) {
181 if (err == -EREMOTE) 181 if (err == -EREMOTE)
182 err = ip6_dst_blackhole(sk, &dst, &fl); 182 err = ip6_dst_blackhole(sk, &dst, &fl);
183 if (err < 0) 183 if (err < 0)
@@ -549,7 +549,8 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
549 return -ENODEV; 549 return -ENODEV;
550 } 550 }
551 } 551 }
552 if (!ipv6_chk_addr(&src_info->ipi6_addr, dev, 0)) { 552 if (!ipv6_chk_addr(&init_net, &src_info->ipi6_addr,
553 dev, 0)) {
553 if (dev) 554 if (dev)
554 dev_put(dev); 555 dev_put(dev);
555 err = -EINVAL; 556 err = -EINVAL;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 444053254676..5bd5292ad9fa 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -165,31 +165,32 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
165 goto out; 165 goto out;
166 } 166 }
167 167
168 if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) {
169 ret = -EINVAL;
170 goto out;
171 }
172
173 skb->ip_summed = CHECKSUM_NONE;
174
175 spin_lock(&x->lock);
176
168 /* If integrity check is required, do this. */ 177 /* If integrity check is required, do this. */
169 if (esp->auth.icv_full_len) { 178 if (esp->auth.icv_full_len) {
170 u8 sum[alen]; 179 u8 sum[alen];
171 180
172 ret = esp_mac_digest(esp, skb, 0, skb->len - alen); 181 ret = esp_mac_digest(esp, skb, 0, skb->len - alen);
173 if (ret) 182 if (ret)
174 goto out; 183 goto unlock;
175 184
176 if (skb_copy_bits(skb, skb->len - alen, sum, alen)) 185 if (skb_copy_bits(skb, skb->len - alen, sum, alen))
177 BUG(); 186 BUG();
178 187
179 if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) { 188 if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) {
180 x->stats.integrity_failed++; 189 ret = -EBADMSG;
181 ret = -EINVAL; 190 goto unlock;
182 goto out;
183 } 191 }
184 } 192 }
185 193
186 if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) {
187 ret = -EINVAL;
188 goto out;
189 }
190
191 skb->ip_summed = CHECKSUM_NONE;
192
193 esph = (struct ip_esp_hdr *)skb->data; 194 esph = (struct ip_esp_hdr *)skb->data;
194 iph = ipv6_hdr(skb); 195 iph = ipv6_hdr(skb);
195 196
@@ -198,15 +199,13 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
198 crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen); 199 crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen);
199 200
200 { 201 {
201 u8 nexthdr[2];
202 struct scatterlist *sg = &esp->sgbuf[0]; 202 struct scatterlist *sg = &esp->sgbuf[0];
203 u8 padlen;
204 203
205 if (unlikely(nfrags > ESP_NUM_FAST_SG)) { 204 if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
206 sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC); 205 sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
207 if (!sg) { 206 if (!sg) {
208 ret = -ENOMEM; 207 ret = -ENOMEM;
209 goto out; 208 goto unlock;
210 } 209 }
211 } 210 }
212 sg_init_table(sg, nfrags); 211 sg_init_table(sg, nfrags);
@@ -216,8 +215,17 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
216 ret = crypto_blkcipher_decrypt(&desc, sg, sg, elen); 215 ret = crypto_blkcipher_decrypt(&desc, sg, sg, elen);
217 if (unlikely(sg != &esp->sgbuf[0])) 216 if (unlikely(sg != &esp->sgbuf[0]))
218 kfree(sg); 217 kfree(sg);
219 if (unlikely(ret)) 218 }
220 goto out; 219
220unlock:
221 spin_unlock(&x->lock);
222
223 if (unlikely(ret))
224 goto out;
225
226 {
227 u8 nexthdr[2];
228 u8 padlen;
221 229
222 if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2)) 230 if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
223 BUG(); 231 BUG();
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 1e89efd38a0c..3cd1c993d52b 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -32,6 +32,7 @@
32#include <linux/in6.h> 32#include <linux/in6.h>
33#include <linux/icmpv6.h> 33#include <linux/icmpv6.h>
34 34
35#include <net/dst.h>
35#include <net/sock.h> 36#include <net/sock.h>
36#include <net/snmp.h> 37#include <net/snmp.h>
37 38
@@ -307,38 +308,6 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
307 return -1; 308 return -1;
308} 309}
309 310
310static struct inet6_protocol destopt_protocol = {
311 .handler = ipv6_destopt_rcv,
312 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
313};
314
315void __init ipv6_destopt_init(void)
316{
317 if (inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS) < 0)
318 printk(KERN_ERR "ipv6_destopt_init: Could not register protocol\n");
319}
320
321/********************************
322 NONE header. No data in packet.
323 ********************************/
324
325static int ipv6_nodata_rcv(struct sk_buff *skb)
326{
327 kfree_skb(skb);
328 return 0;
329}
330
331static struct inet6_protocol nodata_protocol = {
332 .handler = ipv6_nodata_rcv,
333 .flags = INET6_PROTO_NOPOLICY,
334};
335
336void __init ipv6_nodata_init(void)
337{
338 if (inet6_add_protocol(&nodata_protocol, IPPROTO_NONE) < 0)
339 printk(KERN_ERR "ipv6_nodata_init: Could not register protocol\n");
340}
341
342/******************************** 311/********************************
343 Routing header. 312 Routing header.
344 ********************************/ 313 ********************************/
@@ -476,7 +445,7 @@ looped_back:
476 kfree_skb(skb); 445 kfree_skb(skb);
477 return -1; 446 return -1;
478 } 447 }
479 if (!ipv6_chk_home_addr(addr)) { 448 if (!ipv6_chk_home_addr(&init_net, addr)) {
480 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), 449 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
481 IPSTATS_MIB_INADDRERRORS); 450 IPSTATS_MIB_INADDRERRORS);
482 kfree_skb(skb); 451 kfree_skb(skb);
@@ -536,12 +505,48 @@ static struct inet6_protocol rthdr_protocol = {
536 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR, 505 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
537}; 506};
538 507
539void __init ipv6_rthdr_init(void) 508static struct inet6_protocol destopt_protocol = {
509 .handler = ipv6_destopt_rcv,
510 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
511};
512
513static struct inet6_protocol nodata_protocol = {
514 .handler = dst_discard,
515 .flags = INET6_PROTO_NOPOLICY,
516};
517
518int __init ipv6_exthdrs_init(void)
540{ 519{
541 if (inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING) < 0) 520 int ret;
542 printk(KERN_ERR "ipv6_rthdr_init: Could not register protocol\n"); 521
522 ret = inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING);
523 if (ret)
524 goto out;
525
526 ret = inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS);
527 if (ret)
528 goto out_rthdr;
529
530 ret = inet6_add_protocol(&nodata_protocol, IPPROTO_NONE);
531 if (ret)
532 goto out_destopt;
533
534out:
535 return ret;
536out_rthdr:
537 inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING);
538out_destopt:
539 inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS);
540 goto out;
543}; 541};
544 542
543void ipv6_exthdrs_exit(void)
544{
545 inet6_del_protocol(&nodata_protocol, IPPROTO_NONE);
546 inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS);
547 inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING);
548}
549
545/********************************** 550/**********************************
546 Hop-by-hop options. 551 Hop-by-hop options.
547 **********************************/ 552 **********************************/
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 428c6b0e26d8..695c0ca8a417 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -223,7 +223,7 @@ nla_put_failure:
223 return -ENOBUFS; 223 return -ENOBUFS;
224} 224}
225 225
226static u32 fib6_rule_default_pref(void) 226static u32 fib6_rule_default_pref(struct fib_rules_ops *ops)
227{ 227{
228 return 0x3FFF; 228 return 0x3FFF;
229} 229}
@@ -249,6 +249,7 @@ static struct fib_rules_ops fib6_rules_ops = {
249 .policy = fib6_rule_policy, 249 .policy = fib6_rule_policy,
250 .rules_list = LIST_HEAD_INIT(fib6_rules_ops.rules_list), 250 .rules_list = LIST_HEAD_INIT(fib6_rules_ops.rules_list),
251 .owner = THIS_MODULE, 251 .owner = THIS_MODULE,
252 .fro_net = &init_net,
252}; 253};
253 254
254static int __init fib6_default_rules_init(void) 255static int __init fib6_default_rules_init(void)
@@ -265,10 +266,23 @@ static int __init fib6_default_rules_init(void)
265 return 0; 266 return 0;
266} 267}
267 268
268void __init fib6_rules_init(void) 269int __init fib6_rules_init(void)
269{ 270{
270 BUG_ON(fib6_default_rules_init()); 271 int ret;
271 fib_rules_register(&fib6_rules_ops); 272
273 ret = fib6_default_rules_init();
274 if (ret)
275 goto out;
276
277 ret = fib_rules_register(&fib6_rules_ops);
278 if (ret)
279 goto out_default_rules_init;
280out:
281 return ret;
282
283out_default_rules_init:
284 fib_rules_cleanup_ops(&fib6_rules_ops);
285 goto out;
272} 286}
273 287
274void fib6_rules_cleanup(void) 288void fib6_rules_cleanup(void)
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index f1240688dc58..cbb5b9cf84ad 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -63,6 +63,7 @@
63#include <net/ip6_route.h> 63#include <net/ip6_route.h>
64#include <net/addrconf.h> 64#include <net/addrconf.h>
65#include <net/icmp.h> 65#include <net/icmp.h>
66#include <net/xfrm.h>
66 67
67#include <asm/uaccess.h> 68#include <asm/uaccess.h>
68#include <asm/system.h> 69#include <asm/system.h>
@@ -86,7 +87,7 @@ static int icmpv6_rcv(struct sk_buff *skb);
86 87
87static struct inet6_protocol icmpv6_protocol = { 88static struct inet6_protocol icmpv6_protocol = {
88 .handler = icmpv6_rcv, 89 .handler = icmpv6_rcv,
89 .flags = INET6_PROTO_FINAL, 90 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
90}; 91};
91 92
92static __inline__ int icmpv6_xmit_lock(void) 93static __inline__ int icmpv6_xmit_lock(void)
@@ -153,8 +154,6 @@ static int is_ineligible(struct sk_buff *skb)
153 return 0; 154 return 0;
154} 155}
155 156
156static int sysctl_icmpv6_time __read_mostly = 1*HZ;
157
158/* 157/*
159 * Check the ICMP output rate limit 158 * Check the ICMP output rate limit
160 */ 159 */
@@ -185,7 +184,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
185 res = 1; 184 res = 1;
186 } else { 185 } else {
187 struct rt6_info *rt = (struct rt6_info *)dst; 186 struct rt6_info *rt = (struct rt6_info *)dst;
188 int tmo = sysctl_icmpv6_time; 187 int tmo = init_net.ipv6.sysctl.icmpv6_time;
189 188
190 /* Give more bandwidth to wider prefixes. */ 189 /* Give more bandwidth to wider prefixes. */
191 if (rt->rt6i_dst.plen < 128) 190 if (rt->rt6i_dst.plen < 128)
@@ -310,8 +309,10 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
310 struct ipv6_pinfo *np; 309 struct ipv6_pinfo *np;
311 struct in6_addr *saddr = NULL; 310 struct in6_addr *saddr = NULL;
312 struct dst_entry *dst; 311 struct dst_entry *dst;
312 struct dst_entry *dst2;
313 struct icmp6hdr tmp_hdr; 313 struct icmp6hdr tmp_hdr;
314 struct flowi fl; 314 struct flowi fl;
315 struct flowi fl2;
315 struct icmpv6_msg msg; 316 struct icmpv6_msg msg;
316 int iif = 0; 317 int iif = 0;
317 int addr_type = 0; 318 int addr_type = 0;
@@ -331,7 +332,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
331 */ 332 */
332 addr_type = ipv6_addr_type(&hdr->daddr); 333 addr_type = ipv6_addr_type(&hdr->daddr);
333 334
334 if (ipv6_chk_addr(&hdr->daddr, skb->dev, 0)) 335 if (ipv6_chk_addr(&init_net, &hdr->daddr, skb->dev, 0))
335 saddr = &hdr->daddr; 336 saddr = &hdr->daddr;
336 337
337 /* 338 /*
@@ -418,9 +419,42 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
418 goto out_dst_release; 419 goto out_dst_release;
419 } 420 }
420 421
421 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) 422 /* No need to clone since we're just using its address. */
423 dst2 = dst;
424
425 err = xfrm_lookup(&dst, &fl, sk, 0);
426 switch (err) {
427 case 0:
428 if (dst != dst2)
429 goto route_done;
430 break;
431 case -EPERM:
432 dst = NULL;
433 break;
434 default:
435 goto out;
436 }
437
438 if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6))
439 goto out;
440
441 if (ip6_dst_lookup(sk, &dst2, &fl))
442 goto out;
443
444 err = xfrm_lookup(&dst2, &fl, sk, XFRM_LOOKUP_ICMP);
445 if (err == -ENOENT) {
446 if (!dst)
447 goto out;
448 goto route_done;
449 }
450
451 dst_release(dst);
452 dst = dst2;
453
454 if (err)
422 goto out; 455 goto out;
423 456
457route_done:
424 if (ipv6_addr_is_multicast(&fl.fl6_dst)) 458 if (ipv6_addr_is_multicast(&fl.fl6_dst))
425 hlimit = np->mcast_hops; 459 hlimit = np->mcast_hops;
426 else 460 else
@@ -555,9 +589,7 @@ out:
555 589
556static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info) 590static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
557{ 591{
558 struct in6_addr *saddr, *daddr;
559 struct inet6_protocol *ipprot; 592 struct inet6_protocol *ipprot;
560 struct sock *sk;
561 int inner_offset; 593 int inner_offset;
562 int hash; 594 int hash;
563 u8 nexthdr; 595 u8 nexthdr;
@@ -579,9 +611,6 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
579 if (!pskb_may_pull(skb, inner_offset+8)) 611 if (!pskb_may_pull(skb, inner_offset+8))
580 return; 612 return;
581 613
582 saddr = &ipv6_hdr(skb)->saddr;
583 daddr = &ipv6_hdr(skb)->daddr;
584
585 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet. 614 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
586 Without this we will not able f.e. to make source routed 615 Without this we will not able f.e. to make source routed
587 pmtu discovery. 616 pmtu discovery.
@@ -597,15 +626,7 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
597 ipprot->err_handler(skb, NULL, type, code, inner_offset, info); 626 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
598 rcu_read_unlock(); 627 rcu_read_unlock();
599 628
600 read_lock(&raw_v6_lock); 629 raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
601 if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) {
602 while ((sk = __raw_v6_lookup(sk, nexthdr, saddr, daddr,
603 IP6CB(skb)->iif))) {
604 rawv6_err(sk, skb, NULL, type, code, inner_offset, info);
605 sk = sk_next(sk);
606 }
607 }
608 read_unlock(&raw_v6_lock);
609} 630}
610 631
611/* 632/*
@@ -621,6 +642,25 @@ static int icmpv6_rcv(struct sk_buff *skb)
621 struct icmp6hdr *hdr; 642 struct icmp6hdr *hdr;
622 int type; 643 int type;
623 644
645 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
646 int nh;
647
648 if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags &
649 XFRM_STATE_ICMP))
650 goto drop_no_count;
651
652 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr)))
653 goto drop_no_count;
654
655 nh = skb_network_offset(skb);
656 skb_set_network_header(skb, sizeof(*hdr));
657
658 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
659 goto drop_no_count;
660
661 skb_set_network_header(skb, nh);
662 }
663
624 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS); 664 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
625 665
626 saddr = &ipv6_hdr(skb)->saddr; 666 saddr = &ipv6_hdr(skb)->saddr;
@@ -643,8 +683,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
643 } 683 }
644 } 684 }
645 685
646 if (!pskb_pull(skb, sizeof(struct icmp6hdr))) 686 __skb_pull(skb, sizeof(*hdr));
647 goto discard_it;
648 687
649 hdr = icmp6_hdr(skb); 688 hdr = icmp6_hdr(skb);
650 689
@@ -730,6 +769,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
730 769
731discard_it: 770discard_it:
732 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS); 771 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
772drop_no_count:
733 kfree_skb(skb); 773 kfree_skb(skb);
734 return 0; 774 return 0;
735} 775}
@@ -865,16 +905,26 @@ int icmpv6_err_convert(int type, int code, int *err)
865EXPORT_SYMBOL(icmpv6_err_convert); 905EXPORT_SYMBOL(icmpv6_err_convert);
866 906
867#ifdef CONFIG_SYSCTL 907#ifdef CONFIG_SYSCTL
868ctl_table ipv6_icmp_table[] = { 908ctl_table ipv6_icmp_table_template[] = {
869 { 909 {
870 .ctl_name = NET_IPV6_ICMP_RATELIMIT, 910 .ctl_name = NET_IPV6_ICMP_RATELIMIT,
871 .procname = "ratelimit", 911 .procname = "ratelimit",
872 .data = &sysctl_icmpv6_time, 912 .data = &init_net.ipv6.sysctl.icmpv6_time,
873 .maxlen = sizeof(int), 913 .maxlen = sizeof(int),
874 .mode = 0644, 914 .mode = 0644,
875 .proc_handler = &proc_dointvec 915 .proc_handler = &proc_dointvec
876 }, 916 },
877 { .ctl_name = 0 }, 917 { .ctl_name = 0 },
878}; 918};
919
920struct ctl_table *ipv6_icmp_sysctl_init(struct net *net)
921{
922 struct ctl_table *table;
923
924 table = kmemdup(ipv6_icmp_table_template,
925 sizeof(ipv6_icmp_table_template),
926 GFP_KERNEL);
927 return table;
928}
879#endif 929#endif
880 930
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 0765d8bd380f..a66a7d8e2811 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -43,7 +43,7 @@ void __inet6_hash(struct inet_hashinfo *hashinfo,
43 } 43 }
44 44
45 __sk_add_node(sk, list); 45 __sk_add_node(sk, list);
46 sock_prot_inc_use(sk->sk_prot); 46 sock_prot_inuse_add(sk->sk_prot, 1);
47 write_unlock(lock); 47 write_unlock(lock);
48} 48}
49EXPORT_SYMBOL(__inet6_hash); 49EXPORT_SYMBOL(__inet6_hash);
@@ -216,7 +216,7 @@ unique:
216 BUG_TRAP(sk_unhashed(sk)); 216 BUG_TRAP(sk_unhashed(sk));
217 __sk_add_node(sk, &head->chain); 217 __sk_add_node(sk, &head->chain);
218 sk->sk_hash = hash; 218 sk->sk_hash = hash;
219 sock_prot_inc_use(sk->sk_prot); 219 sock_prot_inuse_add(sk->sk_prot, 1);
220 write_unlock(lock); 220 write_unlock(lock);
221 221
222 if (twp != NULL) { 222 if (twp != NULL) {
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 946cf389ab95..f93407cf6515 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -361,6 +361,7 @@ end:
361 361
362static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 362static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
363{ 363{
364 struct net *net = skb->sk->sk_net;
364 unsigned int h, s_h; 365 unsigned int h, s_h;
365 unsigned int e = 0, s_e; 366 unsigned int e = 0, s_e;
366 struct rt6_rtnl_dump_arg arg; 367 struct rt6_rtnl_dump_arg arg;
@@ -369,6 +370,9 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
369 struct hlist_node *node; 370 struct hlist_node *node;
370 int res = 0; 371 int res = 0;
371 372
373 if (net != &init_net)
374 return 0;
375
372 s_h = cb->args[0]; 376 s_h = cb->args[0];
373 s_e = cb->args[1]; 377 s_e = cb->args[1];
374 378
@@ -677,13 +681,15 @@ static __inline__ void fib6_start_gc(struct rt6_info *rt)
677{ 681{
678 if (ip6_fib_timer.expires == 0 && 682 if (ip6_fib_timer.expires == 0 &&
679 (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE))) 683 (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE)))
680 mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); 684 mod_timer(&ip6_fib_timer, jiffies +
685 init_net.ipv6.sysctl.ip6_rt_gc_interval);
681} 686}
682 687
683void fib6_force_start_gc(void) 688void fib6_force_start_gc(void)
684{ 689{
685 if (ip6_fib_timer.expires == 0) 690 if (ip6_fib_timer.expires == 0)
686 mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); 691 mod_timer(&ip6_fib_timer, jiffies +
692 init_net.ipv6.sysctl.ip6_rt_gc_interval);
687} 693}
688 694
689/* 695/*
@@ -1122,9 +1128,6 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
1122 1128
1123 rt->u.dst.rt6_next = NULL; 1129 rt->u.dst.rt6_next = NULL;
1124 1130
1125 if (fn->leaf == NULL && fn->fn_flags&RTN_TL_ROOT)
1126 fn->leaf = &ip6_null_entry;
1127
1128 /* If it was last route, expunge its radix tree node */ 1131 /* If it was last route, expunge its radix tree node */
1129 if (fn->leaf == NULL) { 1132 if (fn->leaf == NULL) {
1130 fn->fn_flags &= ~RTN_RTINFO; 1133 fn->fn_flags &= ~RTN_RTINFO;
@@ -1311,6 +1314,9 @@ static int fib6_walk(struct fib6_walker_t *w)
1311 1314
1312static int fib6_clean_node(struct fib6_walker_t *w) 1315static int fib6_clean_node(struct fib6_walker_t *w)
1313{ 1316{
1317 struct nl_info info = {
1318 .nl_net = &init_net,
1319 };
1314 int res; 1320 int res;
1315 struct rt6_info *rt; 1321 struct rt6_info *rt;
1316 struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w); 1322 struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w);
@@ -1319,7 +1325,7 @@ static int fib6_clean_node(struct fib6_walker_t *w)
1319 res = c->func(rt, c->arg); 1325 res = c->func(rt, c->arg);
1320 if (res < 0) { 1326 if (res < 0) {
1321 w->leaf = rt; 1327 w->leaf = rt;
1322 res = fib6_del(rt, NULL); 1328 res = fib6_del(rt, &info);
1323 if (res) { 1329 if (res) {
1324#if RT6_DEBUG >= 2 1330#if RT6_DEBUG >= 2
1325 printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res); 1331 printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
@@ -1445,7 +1451,8 @@ void fib6_run_gc(unsigned long dummy)
1445{ 1451{
1446 if (dummy != ~0UL) { 1452 if (dummy != ~0UL) {
1447 spin_lock_bh(&fib6_gc_lock); 1453 spin_lock_bh(&fib6_gc_lock);
1448 gc_args.timeout = dummy ? (int)dummy : ip6_rt_gc_interval; 1454 gc_args.timeout = dummy ? (int)dummy :
1455 init_net.ipv6.sysctl.ip6_rt_gc_interval;
1449 } else { 1456 } else {
1450 local_bh_disable(); 1457 local_bh_disable();
1451 if (!spin_trylock(&fib6_gc_lock)) { 1458 if (!spin_trylock(&fib6_gc_lock)) {
@@ -1453,7 +1460,7 @@ void fib6_run_gc(unsigned long dummy)
1453 local_bh_enable(); 1460 local_bh_enable();
1454 return; 1461 return;
1455 } 1462 }
1456 gc_args.timeout = ip6_rt_gc_interval; 1463 gc_args.timeout = init_net.ipv6.sysctl.ip6_rt_gc_interval;
1457 } 1464 }
1458 gc_args.more = 0; 1465 gc_args.more = 0;
1459 1466
@@ -1461,7 +1468,8 @@ void fib6_run_gc(unsigned long dummy)
1461 fib6_clean_all(fib6_age, 0, NULL); 1468 fib6_clean_all(fib6_age, 0, NULL);
1462 1469
1463 if (gc_args.more) 1470 if (gc_args.more)
1464 mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); 1471 mod_timer(&ip6_fib_timer, jiffies +
1472 init_net.ipv6.sysctl.ip6_rt_gc_interval);
1465 else { 1473 else {
1466 del_timer(&ip6_fib_timer); 1474 del_timer(&ip6_fib_timer);
1467 ip6_fib_timer.expires = 0; 1475 ip6_fib_timer.expires = 0;
@@ -1469,16 +1477,27 @@ void fib6_run_gc(unsigned long dummy)
1469 spin_unlock_bh(&fib6_gc_lock); 1477 spin_unlock_bh(&fib6_gc_lock);
1470} 1478}
1471 1479
1472void __init fib6_init(void) 1480int __init fib6_init(void)
1473{ 1481{
1482 int ret;
1474 fib6_node_kmem = kmem_cache_create("fib6_nodes", 1483 fib6_node_kmem = kmem_cache_create("fib6_nodes",
1475 sizeof(struct fib6_node), 1484 sizeof(struct fib6_node),
1476 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 1485 0, SLAB_HWCACHE_ALIGN,
1477 NULL); 1486 NULL);
1487 if (!fib6_node_kmem)
1488 return -ENOMEM;
1478 1489
1479 fib6_tables_init(); 1490 fib6_tables_init();
1480 1491
1481 __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib); 1492 ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib);
1493 if (ret)
1494 goto out_kmem_cache_create;
1495out:
1496 return ret;
1497
1498out_kmem_cache_create:
1499 kmem_cache_destroy(fib6_node_kmem);
1500 goto out;
1482} 1501}
1483 1502
1484void fib6_gc_cleanup(void) 1503void fib6_gc_cleanup(void)
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index b12cc22e7745..2b7d9ee98832 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -629,6 +629,7 @@ static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
629} 629}
630 630
631static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos) 631static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
632 __acquires(ip6_fl_lock)
632{ 633{
633 read_lock_bh(&ip6_fl_lock); 634 read_lock_bh(&ip6_fl_lock);
634 return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 635 return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
@@ -647,6 +648,7 @@ static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
647} 648}
648 649
649static void ip6fl_seq_stop(struct seq_file *seq, void *v) 650static void ip6fl_seq_stop(struct seq_file *seq, void *v)
651 __releases(ip6_fl_lock)
650{ 652{
651 read_unlock_bh(&ip6_fl_lock); 653 read_unlock_bh(&ip6_fl_lock);
652} 654}
@@ -692,20 +694,36 @@ static const struct file_operations ip6fl_seq_fops = {
692 .llseek = seq_lseek, 694 .llseek = seq_lseek,
693 .release = seq_release_private, 695 .release = seq_release_private,
694}; 696};
695#endif
696 697
698static int ip6_flowlabel_proc_init(struct net *net)
699{
700 if (!proc_net_fops_create(net, "ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops))
701 return -ENOMEM;
702 return 0;
703}
697 704
698void ip6_flowlabel_init(void) 705static void ip6_flowlabel_proc_fini(struct net *net)
699{ 706{
700#ifdef CONFIG_PROC_FS 707 proc_net_remove(net, "ip6_flowlabel");
701 proc_net_fops_create(&init_net, "ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops); 708}
709#else
710static inline int ip6_flowlabel_proc_init(struct net *net)
711{
712 return 0;
713}
714static inline void ip6_flowlabel_proc_fini(struct net *net)
715{
716 return ;
717}
702#endif 718#endif
719
720int ip6_flowlabel_init(void)
721{
722 return ip6_flowlabel_proc_init(&init_net);
703} 723}
704 724
705void ip6_flowlabel_cleanup(void) 725void ip6_flowlabel_cleanup(void)
706{ 726{
707 del_timer(&ip6_fl_gc_timer); 727 del_timer(&ip6_fl_gc_timer);
708#ifdef CONFIG_PROC_FS 728 ip6_flowlabel_proc_fini(&init_net);
709 proc_net_remove(&init_net, "ip6_flowlabel");
710#endif
711} 729}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index fac6f7f9dd73..178aebc0427a 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -134,7 +134,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
134 134
135 rcu_read_unlock(); 135 rcu_read_unlock();
136 136
137 return NF_HOOK(PF_INET6,NF_IP6_PRE_ROUTING, skb, dev, NULL, ip6_rcv_finish); 137 return NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, dev, NULL,
138 ip6_rcv_finish);
138err: 139err:
139 IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); 140 IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
140drop: 141drop:
@@ -152,9 +153,8 @@ out:
152static int ip6_input_finish(struct sk_buff *skb) 153static int ip6_input_finish(struct sk_buff *skb)
153{ 154{
154 struct inet6_protocol *ipprot; 155 struct inet6_protocol *ipprot;
155 struct sock *raw_sk;
156 unsigned int nhoff; 156 unsigned int nhoff;
157 int nexthdr; 157 int nexthdr, raw;
158 u8 hash; 158 u8 hash;
159 struct inet6_dev *idev; 159 struct inet6_dev *idev;
160 160
@@ -170,9 +170,7 @@ resubmit:
170 nhoff = IP6CB(skb)->nhoff; 170 nhoff = IP6CB(skb)->nhoff;
171 nexthdr = skb_network_header(skb)[nhoff]; 171 nexthdr = skb_network_header(skb)[nhoff];
172 172
173 raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]); 173 raw = raw6_local_deliver(skb, nexthdr);
174 if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
175 raw_sk = NULL;
176 174
177 hash = nexthdr & (MAX_INET_PROTOS - 1); 175 hash = nexthdr & (MAX_INET_PROTOS - 1);
178 if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) { 176 if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) {
@@ -205,7 +203,7 @@ resubmit:
205 else if (ret == 0) 203 else if (ret == 0)
206 IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS); 204 IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS);
207 } else { 205 } else {
208 if (!raw_sk) { 206 if (!raw) {
209 if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 207 if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
210 IP6_INC_STATS_BH(idev, IPSTATS_MIB_INUNKNOWNPROTOS); 208 IP6_INC_STATS_BH(idev, IPSTATS_MIB_INUNKNOWNPROTOS);
211 icmpv6_send(skb, ICMPV6_PARAMPROB, 209 icmpv6_send(skb, ICMPV6_PARAMPROB,
@@ -229,7 +227,8 @@ discard:
229 227
230int ip6_input(struct sk_buff *skb) 228int ip6_input(struct sk_buff *skb)
231{ 229{
232 return NF_HOOK(PF_INET6,NF_IP6_LOCAL_IN, skb, skb->dev, NULL, ip6_input_finish); 230 return NF_HOOK(PF_INET6, NF_INET_LOCAL_IN, skb, skb->dev, NULL,
231 ip6_input_finish);
233} 232}
234 233
235int ip6_mc_input(struct sk_buff *skb) 234int ip6_mc_input(struct sk_buff *skb)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 3bef30e4a23d..15c4f6cee3e6 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -29,7 +29,7 @@
29 */ 29 */
30 30
31#include <linux/errno.h> 31#include <linux/errno.h>
32#include <linux/types.h> 32#include <linux/kernel.h>
33#include <linux/string.h> 33#include <linux/string.h>
34#include <linux/socket.h> 34#include <linux/socket.h>
35#include <linux/net.h> 35#include <linux/net.h>
@@ -70,6 +70,31 @@ static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *f
70 spin_unlock_bh(&ip6_id_lock); 70 spin_unlock_bh(&ip6_id_lock);
71} 71}
72 72
73int __ip6_local_out(struct sk_buff *skb)
74{
75 int len;
76
77 len = skb->len - sizeof(struct ipv6hdr);
78 if (len > IPV6_MAXPLEN)
79 len = 0;
80 ipv6_hdr(skb)->payload_len = htons(len);
81
82 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
83 dst_output);
84}
85
86int ip6_local_out(struct sk_buff *skb)
87{
88 int err;
89
90 err = __ip6_local_out(skb);
91 if (likely(err == 1))
92 err = dst_output(skb);
93
94 return err;
95}
96EXPORT_SYMBOL_GPL(ip6_local_out);
97
73static int ip6_output_finish(struct sk_buff *skb) 98static int ip6_output_finish(struct sk_buff *skb)
74{ 99{
75 struct dst_entry *dst = skb->dst; 100 struct dst_entry *dst = skb->dst;
@@ -120,8 +145,8 @@ static int ip6_output2(struct sk_buff *skb)
120 is not supported in any case. 145 is not supported in any case.
121 */ 146 */
122 if (newskb) 147 if (newskb)
123 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL, 148 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
124 newskb->dev, 149 NULL, newskb->dev,
125 ip6_dev_loopback_xmit); 150 ip6_dev_loopback_xmit);
126 151
127 if (ipv6_hdr(skb)->hop_limit == 0) { 152 if (ipv6_hdr(skb)->hop_limit == 0) {
@@ -134,7 +159,8 @@ static int ip6_output2(struct sk_buff *skb)
134 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS); 159 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
135 } 160 }
136 161
137 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish); 162 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
163 ip6_output_finish);
138} 164}
139 165
140static inline int ip6_skb_dst_mtu(struct sk_buff *skb) 166static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
@@ -236,7 +262,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
236 if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) { 262 if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
237 IP6_INC_STATS(ip6_dst_idev(skb->dst), 263 IP6_INC_STATS(ip6_dst_idev(skb->dst),
238 IPSTATS_MIB_OUTREQUESTS); 264 IPSTATS_MIB_OUTREQUESTS);
239 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, 265 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
240 dst_output); 266 dst_output);
241 } 267 }
242 268
@@ -423,7 +449,7 @@ int ip6_forward(struct sk_buff *skb)
423 449
424 /* XXX: idev->cnf.proxy_ndp? */ 450 /* XXX: idev->cnf.proxy_ndp? */
425 if (ipv6_devconf.proxy_ndp && 451 if (ipv6_devconf.proxy_ndp &&
426 pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) { 452 pneigh_lookup(&nd_tbl, &init_net, &hdr->daddr, skb->dev, 0)) {
427 int proxied = ip6_forward_proxy_check(skb); 453 int proxied = ip6_forward_proxy_check(skb);
428 if (proxied > 0) 454 if (proxied > 0)
429 return ip6_input(skb); 455 return ip6_input(skb);
@@ -500,7 +526,8 @@ int ip6_forward(struct sk_buff *skb)
500 hdr->hop_limit--; 526 hdr->hop_limit--;
501 527
502 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 528 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
503 return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish); 529 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
530 ip6_forward_finish);
504 531
505error: 532error:
506 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); 533 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
@@ -909,7 +936,8 @@ static int ip6_dst_lookup_tail(struct sock *sk,
909 struct flowi fl_gw; 936 struct flowi fl_gw;
910 int redirect; 937 int redirect;
911 938
912 ifp = ipv6_get_ifaddr(&fl->fl6_src, (*dst)->dev, 1); 939 ifp = ipv6_get_ifaddr(&init_net, &fl->fl6_src,
940 (*dst)->dev, 1);
913 941
914 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 942 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
915 if (ifp) 943 if (ifp)
@@ -1098,7 +1126,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1098 inet->cork.length = 0; 1126 inet->cork.length = 0;
1099 sk->sk_sndmsg_page = NULL; 1127 sk->sk_sndmsg_page = NULL;
1100 sk->sk_sndmsg_off = 0; 1128 sk->sk_sndmsg_off = 0;
1101 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0); 1129 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1130 rt->rt6i_nfheader_len;
1102 length += exthdrlen; 1131 length += exthdrlen;
1103 transhdrlen += exthdrlen; 1132 transhdrlen += exthdrlen;
1104 } else { 1133 } else {
@@ -1113,7 +1142,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1113 1142
1114 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); 1143 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1115 1144
1116 fragheaderlen = sizeof(struct ipv6hdr) + rt->u.dst.nfheader_len + (opt ? opt->opt_nflen : 0); 1145 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1146 (opt ? opt->opt_nflen : 0);
1117 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); 1147 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1118 1148
1119 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { 1149 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
@@ -1401,10 +1431,6 @@ int ip6_push_pending_frames(struct sock *sk)
1401 *(__be32*)hdr = fl->fl6_flowlabel | 1431 *(__be32*)hdr = fl->fl6_flowlabel |
1402 htonl(0x60000000 | ((int)np->cork.tclass << 20)); 1432 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1403 1433
1404 if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1405 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1406 else
1407 hdr->payload_len = 0;
1408 hdr->hop_limit = np->cork.hop_limit; 1434 hdr->hop_limit = np->cork.hop_limit;
1409 hdr->nexthdr = proto; 1435 hdr->nexthdr = proto;
1410 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 1436 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
@@ -1421,7 +1447,7 @@ int ip6_push_pending_frames(struct sock *sk)
1421 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); 1447 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1422 } 1448 }
1423 1449
1424 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); 1450 err = ip6_local_out(skb);
1425 if (err) { 1451 if (err) {
1426 if (err > 0) 1452 if (err > 0)
1427 err = np->recverr ? net_xmit_errno(err) : 0; 1453 err = np->recverr ? net_xmit_errno(err) : 0;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 5383b33db8ca..9031e521c1df 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -533,7 +533,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
533 fl.fl4_dst = eiph->saddr; 533 fl.fl4_dst = eiph->saddr;
534 fl.fl4_tos = RT_TOS(eiph->tos); 534 fl.fl4_tos = RT_TOS(eiph->tos);
535 fl.proto = IPPROTO_IPIP; 535 fl.proto = IPPROTO_IPIP;
536 if (ip_route_output_key(&rt, &fl)) 536 if (ip_route_output_key(&init_net, &rt, &fl))
537 goto out; 537 goto out;
538 538
539 skb2->dev = rt->u.dst.dev; 539 skb2->dev = rt->u.dst.dev;
@@ -545,7 +545,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
545 fl.fl4_dst = eiph->daddr; 545 fl.fl4_dst = eiph->daddr;
546 fl.fl4_src = eiph->saddr; 546 fl.fl4_src = eiph->saddr;
547 fl.fl4_tos = eiph->tos; 547 fl.fl4_tos = eiph->tos;
548 if (ip_route_output_key(&rt, &fl) || 548 if (ip_route_output_key(&init_net, &rt, &fl) ||
549 rt->u.dst.dev->type != ARPHRD_TUNNEL) { 549 rt->u.dst.dev->type != ARPHRD_TUNNEL) {
550 ip_rt_put(rt); 550 ip_rt_put(rt);
551 goto out; 551 goto out;
@@ -635,7 +635,7 @@ static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
635 struct sk_buff *skb) 635 struct sk_buff *skb)
636{ 636{
637 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) 637 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
638 ipv6_copy_dscp(ipv6h, ipv6_hdr(skb)); 638 ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
639 639
640 if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h))) 640 if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
641 IP6_ECN_set_ce(ipv6_hdr(skb)); 641 IP6_ECN_set_ce(ipv6_hdr(skb));
@@ -653,8 +653,8 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
653 ldev = dev_get_by_index(&init_net, p->link); 653 ldev = dev_get_by_index(&init_net, p->link);
654 654
655 if ((ipv6_addr_is_multicast(&p->laddr) || 655 if ((ipv6_addr_is_multicast(&p->laddr) ||
656 likely(ipv6_chk_addr(&p->laddr, ldev, 0))) && 656 likely(ipv6_chk_addr(&init_net, &p->laddr, ldev, 0))) &&
657 likely(!ipv6_chk_addr(&p->raddr, NULL, 0))) 657 likely(!ipv6_chk_addr(&init_net, &p->raddr, NULL, 0)))
658 ret = 1; 658 ret = 1;
659 659
660 if (ldev) 660 if (ldev)
@@ -788,12 +788,12 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
788 if (p->link) 788 if (p->link)
789 ldev = dev_get_by_index(&init_net, p->link); 789 ldev = dev_get_by_index(&init_net, p->link);
790 790
791 if (unlikely(!ipv6_chk_addr(&p->laddr, ldev, 0))) 791 if (unlikely(!ipv6_chk_addr(&init_net, &p->laddr, ldev, 0)))
792 printk(KERN_WARNING 792 printk(KERN_WARNING
793 "%s xmit: Local address not yet configured!\n", 793 "%s xmit: Local address not yet configured!\n",
794 p->name); 794 p->name);
795 else if (!ipv6_addr_is_multicast(&p->raddr) && 795 else if (!ipv6_addr_is_multicast(&p->raddr) &&
796 unlikely(ipv6_chk_addr(&p->raddr, NULL, 0))) 796 unlikely(ipv6_chk_addr(&init_net, &p->raddr, NULL, 0)))
797 printk(KERN_WARNING 797 printk(KERN_WARNING
798 "%s xmit: Routing loop! " 798 "%s xmit: Routing loop! "
799 "Remote address found on this node!\n", 799 "Remote address found on this node!\n",
@@ -910,15 +910,13 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
910 *(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000); 910 *(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000);
911 dsfield = INET_ECN_encapsulate(0, dsfield); 911 dsfield = INET_ECN_encapsulate(0, dsfield);
912 ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); 912 ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
913 ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
914 ipv6h->hop_limit = t->parms.hop_limit; 913 ipv6h->hop_limit = t->parms.hop_limit;
915 ipv6h->nexthdr = proto; 914 ipv6h->nexthdr = proto;
916 ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src); 915 ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src);
917 ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst); 916 ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst);
918 nf_reset(skb); 917 nf_reset(skb);
919 pkt_len = skb->len; 918 pkt_len = skb->len;
920 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, 919 err = ip6_local_out(skb);
921 skb->dst->dev, dst_output);
922 920
923 if (net_xmit_eval(err) == 0) { 921 if (net_xmit_eval(err) == 0) {
924 stats->tx_bytes += pkt_len; 922 stats->tx_bytes += pkt_len;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 0cd4056f9127..b276d04d6db5 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -190,7 +190,6 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
190static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) 190static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
191{ 191{
192 struct xfrm_state *t = NULL; 192 struct xfrm_state *t = NULL;
193 u8 mode = XFRM_MODE_TUNNEL;
194 193
195 t = xfrm_state_alloc(); 194 t = xfrm_state_alloc();
196 if (!t) 195 if (!t)
@@ -204,9 +203,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
204 memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr)); 203 memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr));
205 memcpy(&t->sel, &x->sel, sizeof(t->sel)); 204 memcpy(&t->sel, &x->sel, sizeof(t->sel));
206 t->props.family = AF_INET6; 205 t->props.family = AF_INET6;
207 if (x->props.mode == XFRM_MODE_BEET) 206 t->props.mode = x->props.mode;
208 mode = x->props.mode;
209 t->props.mode = mode;
210 memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); 207 memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr));
211 208
212 if (xfrm_init_state(t)) 209 if (xfrm_init_state(t))
@@ -405,22 +402,22 @@ static int ipcomp6_init_state(struct xfrm_state *x)
405 if (x->encap) 402 if (x->encap)
406 goto out; 403 goto out;
407 404
408 err = -ENOMEM;
409 ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL);
410 if (!ipcd)
411 goto out;
412
413 x->props.header_len = 0; 405 x->props.header_len = 0;
414 switch (x->props.mode) { 406 switch (x->props.mode) {
415 case XFRM_MODE_BEET:
416 case XFRM_MODE_TRANSPORT: 407 case XFRM_MODE_TRANSPORT:
417 break; 408 break;
418 case XFRM_MODE_TUNNEL: 409 case XFRM_MODE_TUNNEL:
419 x->props.header_len += sizeof(struct ipv6hdr); 410 x->props.header_len += sizeof(struct ipv6hdr);
411 break;
420 default: 412 default:
421 goto error; 413 goto out;
422 } 414 }
423 415
416 err = -ENOMEM;
417 ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL);
418 if (!ipcd)
419 goto out;
420
424 mutex_lock(&ipcomp6_resource_mutex); 421 mutex_lock(&ipcomp6_resource_mutex);
425 if (!ipcomp6_alloc_scratches()) 422 if (!ipcomp6_alloc_scratches())
426 goto error; 423 goto error;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 8c5f80fd03ad..bf2a686aa13d 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -268,8 +268,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
268 struct inet_connection_sock *icsk = inet_csk(sk); 268 struct inet_connection_sock *icsk = inet_csk(sk);
269 269
270 local_bh_disable(); 270 local_bh_disable();
271 sock_prot_dec_use(sk->sk_prot); 271 sock_prot_inuse_add(sk->sk_prot, -1);
272 sock_prot_inc_use(&tcp_prot); 272 sock_prot_inuse_add(&tcp_prot, 1);
273 local_bh_enable(); 273 local_bh_enable();
274 sk->sk_prot = &tcp_prot; 274 sk->sk_prot = &tcp_prot;
275 icsk->icsk_af_ops = &ipv4_specific; 275 icsk->icsk_af_ops = &ipv4_specific;
@@ -282,8 +282,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
282 if (sk->sk_protocol == IPPROTO_UDPLITE) 282 if (sk->sk_protocol == IPPROTO_UDPLITE)
283 prot = &udplite_prot; 283 prot = &udplite_prot;
284 local_bh_disable(); 284 local_bh_disable();
285 sock_prot_dec_use(sk->sk_prot); 285 sock_prot_inuse_add(sk->sk_prot, -1);
286 sock_prot_inc_use(prot); 286 sock_prot_inuse_add(prot, 1);
287 local_bh_enable(); 287 local_bh_enable();
288 sk->sk_prot = prot; 288 sk->sk_prot = prot;
289 sk->sk_socket->ops = &inet_dgram_ops; 289 sk->sk_socket->ops = &inet_dgram_ops;
@@ -1128,9 +1128,10 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
1128EXPORT_SYMBOL(compat_ipv6_getsockopt); 1128EXPORT_SYMBOL(compat_ipv6_getsockopt);
1129#endif 1129#endif
1130 1130
1131void __init ipv6_packet_init(void) 1131int __init ipv6_packet_init(void)
1132{ 1132{
1133 dev_add_pack(&ipv6_packet_type); 1133 dev_add_pack(&ipv6_packet_type);
1134 return 0;
1134} 1135}
1135 1136
1136void ipv6_packet_cleanup(void) 1137void ipv6_packet_cleanup(void)
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 331d728c2035..ab228d1ea114 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -903,9 +903,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, struct in6_addr *addr)
903 return -ENOMEM; 903 return -ENOMEM;
904 } 904 }
905 905
906 init_timer(&mc->mca_timer); 906 setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc);
907 mc->mca_timer.function = igmp6_timer_handler;
908 mc->mca_timer.data = (unsigned long) mc;
909 907
910 ipv6_addr_copy(&mc->mca_addr, addr); 908 ipv6_addr_copy(&mc->mca_addr, addr);
911 mc->idev = idev; 909 mc->idev = idev;
@@ -1450,7 +1448,7 @@ static inline int mld_dev_queue_xmit2(struct sk_buff *skb)
1450 1448
1451static inline int mld_dev_queue_xmit(struct sk_buff *skb) 1449static inline int mld_dev_queue_xmit(struct sk_buff *skb)
1452{ 1450{
1453 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dev, 1451 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
1454 mld_dev_queue_xmit2); 1452 mld_dev_queue_xmit2);
1455} 1453}
1456 1454
@@ -1471,7 +1469,7 @@ static void mld_sendpack(struct sk_buff *skb)
1471 pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen, 1469 pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
1472 IPPROTO_ICMPV6, csum_partial(skb_transport_header(skb), 1470 IPPROTO_ICMPV6, csum_partial(skb_transport_header(skb),
1473 mldlen, 0)); 1471 mldlen, 0));
1474 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev, 1472 err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
1475 mld_dev_queue_xmit); 1473 mld_dev_queue_xmit);
1476 if (!err) { 1474 if (!err) {
1477 ICMP6MSGOUT_INC_STATS_BH(idev, ICMPV6_MLD2_REPORT); 1475 ICMP6MSGOUT_INC_STATS_BH(idev, ICMPV6_MLD2_REPORT);
@@ -1815,7 +1813,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
1815 1813
1816 idev = in6_dev_get(skb->dev); 1814 idev = in6_dev_get(skb->dev);
1817 1815
1818 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev, 1816 err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
1819 mld_dev_queue_xmit); 1817 mld_dev_queue_xmit);
1820 if (!err) { 1818 if (!err) {
1821 ICMP6MSGOUT_INC_STATS(idev, type); 1819 ICMP6MSGOUT_INC_STATS(idev, type);
@@ -2259,14 +2257,12 @@ void ipv6_mc_init_dev(struct inet6_dev *idev)
2259 write_lock_bh(&idev->lock); 2257 write_lock_bh(&idev->lock);
2260 rwlock_init(&idev->mc_lock); 2258 rwlock_init(&idev->mc_lock);
2261 idev->mc_gq_running = 0; 2259 idev->mc_gq_running = 0;
2262 init_timer(&idev->mc_gq_timer); 2260 setup_timer(&idev->mc_gq_timer, mld_gq_timer_expire,
2263 idev->mc_gq_timer.data = (unsigned long) idev; 2261 (unsigned long)idev);
2264 idev->mc_gq_timer.function = &mld_gq_timer_expire;
2265 idev->mc_tomb = NULL; 2262 idev->mc_tomb = NULL;
2266 idev->mc_ifc_count = 0; 2263 idev->mc_ifc_count = 0;
2267 init_timer(&idev->mc_ifc_timer); 2264 setup_timer(&idev->mc_ifc_timer, mld_ifc_timer_expire,
2268 idev->mc_ifc_timer.data = (unsigned long) idev; 2265 (unsigned long)idev);
2269 idev->mc_ifc_timer.function = &mld_ifc_timer_expire;
2270 idev->mc_qrv = MLD_QRV_DEFAULT; 2266 idev->mc_qrv = MLD_QRV_DEFAULT;
2271 idev->mc_maxdelay = IGMP6_UNSOLICITED_IVAL; 2267 idev->mc_maxdelay = IGMP6_UNSOLICITED_IVAL;
2272 idev->mc_v1_seen = 0; 2268 idev->mc_v1_seen = 0;
@@ -2377,6 +2373,7 @@ static struct ifmcaddr6 *igmp6_mc_get_idx(struct seq_file *seq, loff_t pos)
2377} 2373}
2378 2374
2379static void *igmp6_mc_seq_start(struct seq_file *seq, loff_t *pos) 2375static void *igmp6_mc_seq_start(struct seq_file *seq, loff_t *pos)
2376 __acquires(dev_base_lock)
2380{ 2377{
2381 read_lock(&dev_base_lock); 2378 read_lock(&dev_base_lock);
2382 return igmp6_mc_get_idx(seq, *pos); 2379 return igmp6_mc_get_idx(seq, *pos);
@@ -2391,6 +2388,7 @@ static void *igmp6_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2391} 2388}
2392 2389
2393static void igmp6_mc_seq_stop(struct seq_file *seq, void *v) 2390static void igmp6_mc_seq_stop(struct seq_file *seq, void *v)
2391 __releases(dev_base_lock)
2394{ 2392{
2395 struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); 2393 struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
2396 if (likely(state->idev != NULL)) { 2394 if (likely(state->idev != NULL)) {
@@ -2520,6 +2518,7 @@ static struct ip6_sf_list *igmp6_mcf_get_idx(struct seq_file *seq, loff_t pos)
2520} 2518}
2521 2519
2522static void *igmp6_mcf_seq_start(struct seq_file *seq, loff_t *pos) 2520static void *igmp6_mcf_seq_start(struct seq_file *seq, loff_t *pos)
2521 __acquires(dev_base_lock)
2523{ 2522{
2524 read_lock(&dev_base_lock); 2523 read_lock(&dev_base_lock);
2525 return *pos ? igmp6_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 2524 return *pos ? igmp6_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
@@ -2537,6 +2536,7 @@ static void *igmp6_mcf_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2537} 2536}
2538 2537
2539static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v) 2538static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v)
2539 __releases(dev_base_lock)
2540{ 2540{
2541 struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); 2541 struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
2542 if (likely(state->im != NULL)) { 2542 if (likely(state->im != NULL)) {
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 7fd841d41019..49d396620eac 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -34,11 +34,6 @@
34#include <net/xfrm.h> 34#include <net/xfrm.h>
35#include <net/mip6.h> 35#include <net/mip6.h>
36 36
37static xfrm_address_t *mip6_xfrm_addr(struct xfrm_state *x, xfrm_address_t *addr)
38{
39 return x->coaddr;
40}
41
42static inline unsigned int calc_padlen(unsigned int len, unsigned int n) 37static inline unsigned int calc_padlen(unsigned int len, unsigned int n)
43{ 38{
44 return (n - len + 16) & 0x7; 39 return (n - len + 16) & 0x7;
@@ -133,12 +128,15 @@ static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb)
133{ 128{
134 struct ipv6hdr *iph = ipv6_hdr(skb); 129 struct ipv6hdr *iph = ipv6_hdr(skb);
135 struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data; 130 struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data;
131 int err = destopt->nexthdr;
136 132
133 spin_lock(&x->lock);
137 if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) && 134 if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) &&
138 !ipv6_addr_any((struct in6_addr *)x->coaddr)) 135 !ipv6_addr_any((struct in6_addr *)x->coaddr))
139 return -ENOENT; 136 err = -ENOENT;
137 spin_unlock(&x->lock);
140 138
141 return destopt->nexthdr; 139 return err;
142} 140}
143 141
144/* Destination Option Header is inserted. 142/* Destination Option Header is inserted.
@@ -337,25 +335,27 @@ static struct xfrm_type mip6_destopt_type =
337 .description = "MIP6DESTOPT", 335 .description = "MIP6DESTOPT",
338 .owner = THIS_MODULE, 336 .owner = THIS_MODULE,
339 .proto = IPPROTO_DSTOPTS, 337 .proto = IPPROTO_DSTOPTS,
340 .flags = XFRM_TYPE_NON_FRAGMENT, 338 .flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_LOCAL_COADDR,
341 .init_state = mip6_destopt_init_state, 339 .init_state = mip6_destopt_init_state,
342 .destructor = mip6_destopt_destroy, 340 .destructor = mip6_destopt_destroy,
343 .input = mip6_destopt_input, 341 .input = mip6_destopt_input,
344 .output = mip6_destopt_output, 342 .output = mip6_destopt_output,
345 .reject = mip6_destopt_reject, 343 .reject = mip6_destopt_reject,
346 .hdr_offset = mip6_destopt_offset, 344 .hdr_offset = mip6_destopt_offset,
347 .local_addr = mip6_xfrm_addr,
348}; 345};
349 346
350static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb) 347static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
351{ 348{
352 struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data; 349 struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data;
350 int err = rt2->rt_hdr.nexthdr;
353 351
352 spin_lock(&x->lock);
354 if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) && 353 if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) &&
355 !ipv6_addr_any((struct in6_addr *)x->coaddr)) 354 !ipv6_addr_any((struct in6_addr *)x->coaddr))
356 return -ENOENT; 355 err = -ENOENT;
356 spin_unlock(&x->lock);
357 357
358 return rt2->rt_hdr.nexthdr; 358 return err;
359} 359}
360 360
361/* Routing Header type 2 is inserted. 361/* Routing Header type 2 is inserted.
@@ -467,13 +467,12 @@ static struct xfrm_type mip6_rthdr_type =
467 .description = "MIP6RT", 467 .description = "MIP6RT",
468 .owner = THIS_MODULE, 468 .owner = THIS_MODULE,
469 .proto = IPPROTO_ROUTING, 469 .proto = IPPROTO_ROUTING,
470 .flags = XFRM_TYPE_NON_FRAGMENT, 470 .flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_REMOTE_COADDR,
471 .init_state = mip6_rthdr_init_state, 471 .init_state = mip6_rthdr_init_state,
472 .destructor = mip6_rthdr_destroy, 472 .destructor = mip6_rthdr_destroy,
473 .input = mip6_rthdr_input, 473 .input = mip6_rthdr_input,
474 .output = mip6_rthdr_output, 474 .output = mip6_rthdr_output,
475 .hdr_offset = mip6_rthdr_offset, 475 .hdr_offset = mip6_rthdr_offset,
476 .remote_addr = mip6_xfrm_addr,
477}; 476};
478 477
479static int __init mip6_init(void) 478static int __init mip6_init(void)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 85947eae5bf7..0d33a7d32125 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -533,7 +533,8 @@ static void __ndisc_send(struct net_device *dev,
533 idev = in6_dev_get(dst->dev); 533 idev = in6_dev_get(dst->dev);
534 IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); 534 IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
535 535
536 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output); 536 err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
537 dst_output);
537 if (!err) { 538 if (!err) {
538 ICMP6MSGOUT_INC_STATS(idev, type); 539 ICMP6MSGOUT_INC_STATS(idev, type);
539 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); 540 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
@@ -555,7 +556,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
555 }; 556 };
556 557
557 /* for anycast or proxy, solicited_addr != src_addr */ 558 /* for anycast or proxy, solicited_addr != src_addr */
558 ifp = ipv6_get_ifaddr(solicited_addr, dev, 1); 559 ifp = ipv6_get_ifaddr(&init_net, solicited_addr, dev, 1);
559 if (ifp) { 560 if (ifp) {
560 src_addr = solicited_addr; 561 src_addr = solicited_addr;
561 if (ifp->flags & IFA_F_OPTIMISTIC) 562 if (ifp->flags & IFA_F_OPTIMISTIC)
@@ -615,7 +616,8 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
615 * suppress the inclusion of the sllao. 616 * suppress the inclusion of the sllao.
616 */ 617 */
617 if (send_sllao) { 618 if (send_sllao) {
618 struct inet6_ifaddr *ifp = ipv6_get_ifaddr(saddr, dev, 1); 619 struct inet6_ifaddr *ifp = ipv6_get_ifaddr(&init_net, saddr,
620 dev, 1);
619 if (ifp) { 621 if (ifp) {
620 if (ifp->flags & IFA_F_OPTIMISTIC) { 622 if (ifp->flags & IFA_F_OPTIMISTIC) {
621 send_sllao = 0; 623 send_sllao = 0;
@@ -652,7 +654,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
652 struct in6_addr *target = (struct in6_addr *)&neigh->primary_key; 654 struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
653 int probes = atomic_read(&neigh->probes); 655 int probes = atomic_read(&neigh->probes);
654 656
655 if (skb && ipv6_chk_addr(&ipv6_hdr(skb)->saddr, dev, 1)) 657 if (skb && ipv6_chk_addr(&init_net, &ipv6_hdr(skb)->saddr, dev, 1))
656 saddr = &ipv6_hdr(skb)->saddr; 658 saddr = &ipv6_hdr(skb)->saddr;
657 659
658 if ((probes -= neigh->parms->ucast_probes) < 0) { 660 if ((probes -= neigh->parms->ucast_probes) < 0) {
@@ -740,7 +742,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
740 742
741 inc = ipv6_addr_is_multicast(daddr); 743 inc = ipv6_addr_is_multicast(daddr);
742 744
743 if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1)) != NULL) { 745 if ((ifp = ipv6_get_ifaddr(&init_net, &msg->target, dev, 1)) != NULL) {
744 746
745 if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) { 747 if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
746 if (dad) { 748 if (dad) {
@@ -788,7 +790,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
788 if (ipv6_chk_acast_addr(dev, &msg->target) || 790 if (ipv6_chk_acast_addr(dev, &msg->target) ||
789 (idev->cnf.forwarding && 791 (idev->cnf.forwarding &&
790 (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) && 792 (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) &&
791 (pneigh = pneigh_lookup(&nd_tbl, 793 (pneigh = pneigh_lookup(&nd_tbl, &init_net,
792 &msg->target, dev, 0)) != NULL)) { 794 &msg->target, dev, 0)) != NULL)) {
793 if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && 795 if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
794 skb->pkt_type != PACKET_HOST && 796 skb->pkt_type != PACKET_HOST &&
@@ -898,7 +900,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
898 return; 900 return;
899 } 901 }
900 } 902 }
901 if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1))) { 903 if ((ifp = ipv6_get_ifaddr(&init_net, &msg->target, dev, 1))) {
902 if (ifp->flags & IFA_F_TENTATIVE) { 904 if (ifp->flags & IFA_F_TENTATIVE) {
903 addrconf_dad_failure(ifp); 905 addrconf_dad_failure(ifp);
904 return; 906 return;
@@ -929,7 +931,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
929 */ 931 */
930 if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && 932 if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
931 ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp && 933 ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp &&
932 pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) { 934 pneigh_lookup(&nd_tbl, &init_net, &msg->target, dev, 0)) {
933 /* XXX: idev->cnf.prixy_ndp */ 935 /* XXX: idev->cnf.prixy_ndp */
934 goto out; 936 goto out;
935 } 937 }
@@ -1048,7 +1050,8 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
1048 &ipv6_hdr(ra)->saddr); 1050 &ipv6_hdr(ra)->saddr);
1049 nlmsg_end(skb, nlh); 1051 nlmsg_end(skb, nlh);
1050 1052
1051 err = rtnl_notify(skb, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC); 1053 err = rtnl_notify(skb, &init_net, 0, RTNLGRP_ND_USEROPT, NULL,
1054 GFP_ATOMIC);
1052 if (err < 0) 1055 if (err < 0)
1053 goto errout; 1056 goto errout;
1054 1057
@@ -1058,7 +1061,7 @@ nla_put_failure:
1058 nlmsg_free(skb); 1061 nlmsg_free(skb);
1059 err = -EMSGSIZE; 1062 err = -EMSGSIZE;
1060errout: 1063errout:
1061 rtnl_set_sk_err(RTNLGRP_ND_USEROPT, err); 1064 rtnl_set_sk_err(&init_net, RTNLGRP_ND_USEROPT, err);
1062} 1065}
1063 1066
1064static void ndisc_router_discovery(struct sk_buff *skb) 1067static void ndisc_router_discovery(struct sk_buff *skb)
@@ -1294,11 +1297,11 @@ skip_defrtr:
1294 } 1297 }
1295 1298
1296 if (ndopts.nd_useropts) { 1299 if (ndopts.nd_useropts) {
1297 struct nd_opt_hdr *opt; 1300 struct nd_opt_hdr *p;
1298 for (opt = ndopts.nd_useropts; 1301 for (p = ndopts.nd_useropts;
1299 opt; 1302 p;
1300 opt = ndisc_next_useropt(opt, ndopts.nd_useropts_end)) { 1303 p = ndisc_next_useropt(p, ndopts.nd_useropts_end)) {
1301 ndisc_ra_useropt(skb, opt); 1304 ndisc_ra_useropt(skb, p);
1302 } 1305 }
1303 } 1306 }
1304 1307
@@ -1538,7 +1541,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1538 buff->dst = dst; 1541 buff->dst = dst;
1539 idev = in6_dev_get(dst->dev); 1542 idev = in6_dev_get(dst->dev);
1540 IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); 1543 IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
1541 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, buff, NULL, dst->dev, dst_output); 1544 err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
1545 dst_output);
1542 if (!err) { 1546 if (!err) {
1543 ICMP6MSGOUT_INC_STATS(idev, NDISC_REDIRECT); 1547 ICMP6MSGOUT_INC_STATS(idev, NDISC_REDIRECT);
1544 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); 1548 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index b1326c2bf8aa..2e06724dc348 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -8,6 +8,7 @@
8#include <net/ip6_route.h> 8#include <net/ip6_route.h>
9#include <net/xfrm.h> 9#include <net/xfrm.h>
10#include <net/ip6_checksum.h> 10#include <net/ip6_checksum.h>
11#include <net/netfilter/nf_queue.h>
11 12
12int ip6_route_me_harder(struct sk_buff *skb) 13int ip6_route_me_harder(struct sk_buff *skb)
13{ 14{
@@ -56,11 +57,12 @@ struct ip6_rt_info {
56 struct in6_addr saddr; 57 struct in6_addr saddr;
57}; 58};
58 59
59static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_info *info) 60static void nf_ip6_saveroute(const struct sk_buff *skb,
61 struct nf_queue_entry *entry)
60{ 62{
61 struct ip6_rt_info *rt_info = nf_info_reroute(info); 63 struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
62 64
63 if (info->hook == NF_IP6_LOCAL_OUT) { 65 if (entry->hook == NF_INET_LOCAL_OUT) {
64 struct ipv6hdr *iph = ipv6_hdr(skb); 66 struct ipv6hdr *iph = ipv6_hdr(skb);
65 67
66 rt_info->daddr = iph->daddr; 68 rt_info->daddr = iph->daddr;
@@ -68,11 +70,12 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_info *info)
68 } 70 }
69} 71}
70 72
71static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_info *info) 73static int nf_ip6_reroute(struct sk_buff *skb,
74 const struct nf_queue_entry *entry)
72{ 75{
73 struct ip6_rt_info *rt_info = nf_info_reroute(info); 76 struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
74 77
75 if (info->hook == NF_IP6_LOCAL_OUT) { 78 if (entry->hook == NF_INET_LOCAL_OUT) {
76 struct ipv6hdr *iph = ipv6_hdr(skb); 79 struct ipv6hdr *iph = ipv6_hdr(skb);
77 if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || 80 if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
78 !ipv6_addr_equal(&iph->saddr, &rt_info->saddr)) 81 !ipv6_addr_equal(&iph->saddr, &rt_info->saddr))
@@ -81,6 +84,12 @@ static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_info *info)
81 return 0; 84 return 0;
82} 85}
83 86
87static int nf_ip6_route(struct dst_entry **dst, struct flowi *fl)
88{
89 *dst = ip6_route_output(NULL, fl);
90 return (*dst)->error;
91}
92
84__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, 93__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
85 unsigned int dataoff, u_int8_t protocol) 94 unsigned int dataoff, u_int8_t protocol)
86{ 95{
@@ -89,7 +98,7 @@ __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
89 98
90 switch (skb->ip_summed) { 99 switch (skb->ip_summed) {
91 case CHECKSUM_COMPLETE: 100 case CHECKSUM_COMPLETE:
92 if (hook != NF_IP6_PRE_ROUTING && hook != NF_IP6_LOCAL_IN) 101 if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
93 break; 102 break;
94 if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, 103 if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
95 skb->len - dataoff, protocol, 104 skb->len - dataoff, protocol,
@@ -115,9 +124,10 @@ __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
115 124
116EXPORT_SYMBOL(nf_ip6_checksum); 125EXPORT_SYMBOL(nf_ip6_checksum);
117 126
118static struct nf_afinfo nf_ip6_afinfo = { 127static const struct nf_afinfo nf_ip6_afinfo = {
119 .family = AF_INET6, 128 .family = AF_INET6,
120 .checksum = nf_ip6_checksum, 129 .checksum = nf_ip6_checksum,
130 .route = nf_ip6_route,
121 .saveroute = nf_ip6_saveroute, 131 .saveroute = nf_ip6_saveroute,
122 .reroute = nf_ip6_reroute, 132 .reroute = nf_ip6_reroute,
123 .route_key_size = sizeof(struct ip6_rt_info), 133 .route_key_size = sizeof(struct ip6_rt_info),
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 838b8ddee8c0..4fc0b023cfd7 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -2,12 +2,13 @@
2# IP netfilter configuration 2# IP netfilter configuration
3# 3#
4 4
5menu "IPv6: Netfilter Configuration (EXPERIMENTAL)" 5menu "IPv6: Netfilter Configuration"
6 depends on INET && IPV6 && NETFILTER && EXPERIMENTAL 6 depends on INET && IPV6 && NETFILTER
7 7
8config NF_CONNTRACK_IPV6 8config NF_CONNTRACK_IPV6
9 tristate "IPv6 connection tracking support (EXPERIMENTAL)" 9 tristate "IPv6 connection tracking support"
10 depends on INET && IPV6 && EXPERIMENTAL && NF_CONNTRACK 10 depends on INET && IPV6 && NF_CONNTRACK
11 default m if NETFILTER_ADVANCED=n
11 ---help--- 12 ---help---
12 Connection tracking keeps a record of what packets have passed 13 Connection tracking keeps a record of what packets have passed
13 through your machine, in order to figure out how they are related 14 through your machine, in order to figure out how they are related
@@ -21,7 +22,8 @@ config NF_CONNTRACK_IPV6
21 22
22config IP6_NF_QUEUE 23config IP6_NF_QUEUE
23 tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)" 24 tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)"
24 depends on INET && IPV6 && NETFILTER && EXPERIMENTAL 25 depends on INET && IPV6 && NETFILTER
26 depends on NETFILTER_ADVANCED
25 ---help--- 27 ---help---
26 28
27 This option adds a queue handler to the kernel for IPv6 29 This option adds a queue handler to the kernel for IPv6
@@ -42,8 +44,9 @@ config IP6_NF_QUEUE
42 44
43config IP6_NF_IPTABLES 45config IP6_NF_IPTABLES
44 tristate "IP6 tables support (required for filtering)" 46 tristate "IP6 tables support (required for filtering)"
45 depends on INET && IPV6 && EXPERIMENTAL 47 depends on INET && IPV6
46 select NETFILTER_XTABLES 48 select NETFILTER_XTABLES
49 default m if NETFILTER_ADVANCED=n
47 help 50 help
48 ip6tables is a general, extensible packet identification framework. 51 ip6tables is a general, extensible packet identification framework.
49 Currently only the packet filtering and packet mangling subsystem 52 Currently only the packet filtering and packet mangling subsystem
@@ -54,8 +57,9 @@ config IP6_NF_IPTABLES
54 57
55# The simple matches. 58# The simple matches.
56config IP6_NF_MATCH_RT 59config IP6_NF_MATCH_RT
57 tristate "Routing header match support" 60 tristate '"rt" Routing header match support'
58 depends on IP6_NF_IPTABLES 61 depends on IP6_NF_IPTABLES
62 depends on NETFILTER_ADVANCED
59 help 63 help
60 rt matching allows you to match packets based on the routing 64 rt matching allows you to match packets based on the routing
61 header of the packet. 65 header of the packet.
@@ -63,8 +67,9 @@ config IP6_NF_MATCH_RT
63 To compile it as a module, choose M here. If unsure, say N. 67 To compile it as a module, choose M here. If unsure, say N.
64 68
65config IP6_NF_MATCH_OPTS 69config IP6_NF_MATCH_OPTS
66 tristate "Hop-by-hop and Dst opts header match support" 70 tristate '"hopbyhop" and "dst" opts header match support'
67 depends on IP6_NF_IPTABLES 71 depends on IP6_NF_IPTABLES
72 depends on NETFILTER_ADVANCED
68 help 73 help
69 This allows one to match packets based on the hop-by-hop 74 This allows one to match packets based on the hop-by-hop
70 and destination options headers of a packet. 75 and destination options headers of a packet.
@@ -72,8 +77,9 @@ config IP6_NF_MATCH_OPTS
72 To compile it as a module, choose M here. If unsure, say N. 77 To compile it as a module, choose M here. If unsure, say N.
73 78
74config IP6_NF_MATCH_FRAG 79config IP6_NF_MATCH_FRAG
75 tristate "Fragmentation header match support" 80 tristate '"frag" Fragmentation header match support'
76 depends on IP6_NF_IPTABLES 81 depends on IP6_NF_IPTABLES
82 depends on NETFILTER_ADVANCED
77 help 83 help
78 frag matching allows you to match packets based on the fragmentation 84 frag matching allows you to match packets based on the fragmentation
79 header of the packet. 85 header of the packet.
@@ -81,26 +87,19 @@ config IP6_NF_MATCH_FRAG
81 To compile it as a module, choose M here. If unsure, say N. 87 To compile it as a module, choose M here. If unsure, say N.
82 88
83config IP6_NF_MATCH_HL 89config IP6_NF_MATCH_HL
84 tristate "HL match support" 90 tristate '"hl" match support'
85 depends on IP6_NF_IPTABLES 91 depends on IP6_NF_IPTABLES
92 depends on NETFILTER_ADVANCED
86 help 93 help
87 HL matching allows you to match packets based on the hop 94 HL matching allows you to match packets based on the hop
88 limit of the packet. 95 limit of the packet.
89 96
90 To compile it as a module, choose M here. If unsure, say N. 97 To compile it as a module, choose M here. If unsure, say N.
91 98
92config IP6_NF_MATCH_OWNER
93 tristate "Owner match support"
94 depends on IP6_NF_IPTABLES
95 help
96 Packet owner matching allows you to match locally-generated packets
97 based on who created them: the user, group, process or session.
98
99 To compile it as a module, choose M here. If unsure, say N.
100
101config IP6_NF_MATCH_IPV6HEADER 99config IP6_NF_MATCH_IPV6HEADER
102 tristate "IPv6 Extension Headers Match" 100 tristate '"ipv6header" IPv6 Extension Headers Match'
103 depends on IP6_NF_IPTABLES 101 depends on IP6_NF_IPTABLES
102 depends on NETFILTER_ADVANCED
104 help 103 help
105 This module allows one to match packets based upon 104 This module allows one to match packets based upon
106 the ipv6 extension headers. 105 the ipv6 extension headers.
@@ -108,24 +107,27 @@ config IP6_NF_MATCH_IPV6HEADER
108 To compile it as a module, choose M here. If unsure, say N. 107 To compile it as a module, choose M here. If unsure, say N.
109 108
110config IP6_NF_MATCH_AH 109config IP6_NF_MATCH_AH
111 tristate "AH match support" 110 tristate '"ah" match support'
112 depends on IP6_NF_IPTABLES 111 depends on IP6_NF_IPTABLES
112 depends on NETFILTER_ADVANCED
113 help 113 help
114 This module allows one to match AH packets. 114 This module allows one to match AH packets.
115 115
116 To compile it as a module, choose M here. If unsure, say N. 116 To compile it as a module, choose M here. If unsure, say N.
117 117
118config IP6_NF_MATCH_MH 118config IP6_NF_MATCH_MH
119 tristate "MH match support" 119 tristate '"mh" match support'
120 depends on IP6_NF_IPTABLES 120 depends on IP6_NF_IPTABLES
121 depends on NETFILTER_ADVANCED
121 help 122 help
122 This module allows one to match MH packets. 123 This module allows one to match MH packets.
123 124
124 To compile it as a module, choose M here. If unsure, say N. 125 To compile it as a module, choose M here. If unsure, say N.
125 126
126config IP6_NF_MATCH_EUI64 127config IP6_NF_MATCH_EUI64
127 tristate "EUI64 address check" 128 tristate '"eui64" address check'
128 depends on IP6_NF_IPTABLES 129 depends on IP6_NF_IPTABLES
130 depends on NETFILTER_ADVANCED
129 help 131 help
130 This module performs checking on the IPv6 source address 132 This module performs checking on the IPv6 source address
131 Compares the last 64 bits with the EUI64 (delivered 133 Compares the last 64 bits with the EUI64 (delivered
@@ -137,6 +139,7 @@ config IP6_NF_MATCH_EUI64
137config IP6_NF_FILTER 139config IP6_NF_FILTER
138 tristate "Packet filtering" 140 tristate "Packet filtering"
139 depends on IP6_NF_IPTABLES 141 depends on IP6_NF_IPTABLES
142 default m if NETFILTER_ADVANCED=n
140 help 143 help
141 Packet filtering defines a table `filter', which has a series of 144 Packet filtering defines a table `filter', which has a series of
142 rules for simple packet filtering at local input, forwarding and 145 rules for simple packet filtering at local input, forwarding and
@@ -147,6 +150,7 @@ config IP6_NF_FILTER
147config IP6_NF_TARGET_LOG 150config IP6_NF_TARGET_LOG
148 tristate "LOG target support" 151 tristate "LOG target support"
149 depends on IP6_NF_FILTER 152 depends on IP6_NF_FILTER
153 default m if NETFILTER_ADVANCED=n
150 help 154 help
151 This option adds a `LOG' target, which allows you to create rules in 155 This option adds a `LOG' target, which allows you to create rules in
152 any iptables table which records the packet header to the syslog. 156 any iptables table which records the packet header to the syslog.
@@ -156,6 +160,7 @@ config IP6_NF_TARGET_LOG
156config IP6_NF_TARGET_REJECT 160config IP6_NF_TARGET_REJECT
157 tristate "REJECT target support" 161 tristate "REJECT target support"
158 depends on IP6_NF_FILTER 162 depends on IP6_NF_FILTER
163 default m if NETFILTER_ADVANCED=n
159 help 164 help
160 The REJECT target allows a filtering rule to specify that an ICMPv6 165 The REJECT target allows a filtering rule to specify that an ICMPv6
161 error should be issued in response to an incoming packet, rather 166 error should be issued in response to an incoming packet, rather
@@ -166,6 +171,7 @@ config IP6_NF_TARGET_REJECT
166config IP6_NF_MANGLE 171config IP6_NF_MANGLE
167 tristate "Packet mangling" 172 tristate "Packet mangling"
168 depends on IP6_NF_IPTABLES 173 depends on IP6_NF_IPTABLES
174 default m if NETFILTER_ADVANCED=n
169 help 175 help
170 This option adds a `mangle' table to iptables: see the man page for 176 This option adds a `mangle' table to iptables: see the man page for
171 iptables(8). This table is used for various packet alterations 177 iptables(8). This table is used for various packet alterations
@@ -176,27 +182,29 @@ config IP6_NF_MANGLE
176config IP6_NF_TARGET_HL 182config IP6_NF_TARGET_HL
177 tristate 'HL (hoplimit) target support' 183 tristate 'HL (hoplimit) target support'
178 depends on IP6_NF_MANGLE 184 depends on IP6_NF_MANGLE
185 depends on NETFILTER_ADVANCED
179 help 186 help
180 This option adds a `HL' target, which enables the user to decrement 187 This option adds a `HL' target, which enables the user to decrement
181 the hoplimit value of the IPv6 header or set it to a given (lower) 188 the hoplimit value of the IPv6 header or set it to a given (lower)
182 value. 189 value.
183 190
184 While it is safe to decrement the hoplimit value, this option also 191 While it is safe to decrement the hoplimit value, this option also
185 enables functionality to increment and set the hoplimit value of the 192 enables functionality to increment and set the hoplimit value of the
186 IPv6 header to arbitrary values. This is EXTREMELY DANGEROUS since 193 IPv6 header to arbitrary values. This is EXTREMELY DANGEROUS since
187 you can easily create immortal packets that loop forever on the 194 you can easily create immortal packets that loop forever on the
188 network. 195 network.
189 196
190 To compile it as a module, choose M here. If unsure, say N. 197 To compile it as a module, choose M here. If unsure, say N.
191 198
192config IP6_NF_RAW 199config IP6_NF_RAW
193 tristate 'raw table support (required for TRACE)' 200 tristate 'raw table support (required for TRACE)'
194 depends on IP6_NF_IPTABLES 201 depends on IP6_NF_IPTABLES
202 depends on NETFILTER_ADVANCED
195 help 203 help
196 This option adds a `raw' table to ip6tables. This table is the very 204 This option adds a `raw' table to ip6tables. This table is the very
197 first in the netfilter framework and hooks in at the PREROUTING 205 first in the netfilter framework and hooks in at the PREROUTING
198 and OUTPUT chains. 206 and OUTPUT chains.
199 207
200 If you want to compile it as a module, say M here and read 208 If you want to compile it as a module, say M here and read
201 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. 209 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
202 210
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index e789ec44d23b..fbf2c14ed887 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -23,7 +23,6 @@ obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o
23obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o 23obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o
24obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o 24obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o
25obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o 25obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o
26obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o
27obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o 26obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
28 27
29# targets 28# targets
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index e273605eef85..56b4ea6d29ed 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -29,6 +29,7 @@
29#include <net/sock.h> 29#include <net/sock.h>
30#include <net/ipv6.h> 30#include <net/ipv6.h>
31#include <net/ip6_route.h> 31#include <net/ip6_route.h>
32#include <net/netfilter/nf_queue.h>
32#include <linux/netfilter_ipv4/ip_queue.h> 33#include <linux/netfilter_ipv4/ip_queue.h>
33#include <linux/netfilter_ipv4/ip_tables.h> 34#include <linux/netfilter_ipv4/ip_tables.h>
34#include <linux/netfilter_ipv6/ip6_tables.h> 35#include <linux/netfilter_ipv6/ip6_tables.h>
@@ -38,13 +39,7 @@
38#define NET_IPQ_QMAX 2088 39#define NET_IPQ_QMAX 2088
39#define NET_IPQ_QMAX_NAME "ip6_queue_maxlen" 40#define NET_IPQ_QMAX_NAME "ip6_queue_maxlen"
40 41
41struct ipq_queue_entry { 42typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
42 struct list_head list;
43 struct nf_info *info;
44 struct sk_buff *skb;
45};
46
47typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
48 43
49static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; 44static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
50static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; 45static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
@@ -58,70 +53,13 @@ static struct sock *ipqnl __read_mostly;
58static LIST_HEAD(queue_list); 53static LIST_HEAD(queue_list);
59static DEFINE_MUTEX(ipqnl_mutex); 54static DEFINE_MUTEX(ipqnl_mutex);
60 55
61static void
62ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
63{
64 local_bh_disable();
65 nf_reinject(entry->skb, entry->info, verdict);
66 local_bh_enable();
67 kfree(entry);
68}
69
70static inline void 56static inline void
71__ipq_enqueue_entry(struct ipq_queue_entry *entry) 57__ipq_enqueue_entry(struct nf_queue_entry *entry)
72{ 58{
73 list_add(&entry->list, &queue_list); 59 list_add_tail(&entry->list, &queue_list);
74 queue_total++; 60 queue_total++;
75} 61}
76 62
77/*
78 * Find and return a queued entry matched by cmpfn, or return the last
79 * entry if cmpfn is NULL.
80 */
81static inline struct ipq_queue_entry *
82__ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data)
83{
84 struct list_head *p;
85
86 list_for_each_prev(p, &queue_list) {
87 struct ipq_queue_entry *entry = (struct ipq_queue_entry *)p;
88
89 if (!cmpfn || cmpfn(entry, data))
90 return entry;
91 }
92 return NULL;
93}
94
95static inline void
96__ipq_dequeue_entry(struct ipq_queue_entry *entry)
97{
98 list_del(&entry->list);
99 queue_total--;
100}
101
102static inline struct ipq_queue_entry *
103__ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
104{
105 struct ipq_queue_entry *entry;
106
107 entry = __ipq_find_entry(cmpfn, data);
108 if (entry == NULL)
109 return NULL;
110
111 __ipq_dequeue_entry(entry);
112 return entry;
113}
114
115
116static inline void
117__ipq_flush(int verdict)
118{
119 struct ipq_queue_entry *entry;
120
121 while ((entry = __ipq_find_dequeue_entry(NULL, 0)))
122 ipq_issue_verdict(entry, verdict);
123}
124
125static inline int 63static inline int
126__ipq_set_mode(unsigned char mode, unsigned int range) 64__ipq_set_mode(unsigned char mode, unsigned int range)
127{ 65{
@@ -148,36 +86,64 @@ __ipq_set_mode(unsigned char mode, unsigned int range)
148 return status; 86 return status;
149} 87}
150 88
89static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
90
151static inline void 91static inline void
152__ipq_reset(void) 92__ipq_reset(void)
153{ 93{
154 peer_pid = 0; 94 peer_pid = 0;
155 net_disable_timestamp(); 95 net_disable_timestamp();
156 __ipq_set_mode(IPQ_COPY_NONE, 0); 96 __ipq_set_mode(IPQ_COPY_NONE, 0);
157 __ipq_flush(NF_DROP); 97 __ipq_flush(NULL, 0);
158} 98}
159 99
160static struct ipq_queue_entry * 100static struct nf_queue_entry *
161ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data) 101ipq_find_dequeue_entry(unsigned long id)
162{ 102{
163 struct ipq_queue_entry *entry; 103 struct nf_queue_entry *entry = NULL, *i;
164 104
165 write_lock_bh(&queue_lock); 105 write_lock_bh(&queue_lock);
166 entry = __ipq_find_dequeue_entry(cmpfn, data); 106
107 list_for_each_entry(i, &queue_list, list) {
108 if ((unsigned long)i == id) {
109 entry = i;
110 break;
111 }
112 }
113
114 if (entry) {
115 list_del(&entry->list);
116 queue_total--;
117 }
118
167 write_unlock_bh(&queue_lock); 119 write_unlock_bh(&queue_lock);
168 return entry; 120 return entry;
169} 121}
170 122
171static void 123static void
172ipq_flush(int verdict) 124__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
125{
126 struct nf_queue_entry *entry, *next;
127
128 list_for_each_entry_safe(entry, next, &queue_list, list) {
129 if (!cmpfn || cmpfn(entry, data)) {
130 list_del(&entry->list);
131 queue_total--;
132 nf_reinject(entry, NF_DROP);
133 }
134 }
135}
136
137static void
138ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
173{ 139{
174 write_lock_bh(&queue_lock); 140 write_lock_bh(&queue_lock);
175 __ipq_flush(verdict); 141 __ipq_flush(cmpfn, data);
176 write_unlock_bh(&queue_lock); 142 write_unlock_bh(&queue_lock);
177} 143}
178 144
179static struct sk_buff * 145static struct sk_buff *
180ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) 146ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
181{ 147{
182 sk_buff_data_t old_tail; 148 sk_buff_data_t old_tail;
183 size_t size = 0; 149 size_t size = 0;
@@ -234,20 +200,20 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
234 pmsg->timestamp_sec = tv.tv_sec; 200 pmsg->timestamp_sec = tv.tv_sec;
235 pmsg->timestamp_usec = tv.tv_usec; 201 pmsg->timestamp_usec = tv.tv_usec;
236 pmsg->mark = entry->skb->mark; 202 pmsg->mark = entry->skb->mark;
237 pmsg->hook = entry->info->hook; 203 pmsg->hook = entry->hook;
238 pmsg->hw_protocol = entry->skb->protocol; 204 pmsg->hw_protocol = entry->skb->protocol;
239 205
240 if (entry->info->indev) 206 if (entry->indev)
241 strcpy(pmsg->indev_name, entry->info->indev->name); 207 strcpy(pmsg->indev_name, entry->indev->name);
242 else 208 else
243 pmsg->indev_name[0] = '\0'; 209 pmsg->indev_name[0] = '\0';
244 210
245 if (entry->info->outdev) 211 if (entry->outdev)
246 strcpy(pmsg->outdev_name, entry->info->outdev->name); 212 strcpy(pmsg->outdev_name, entry->outdev->name);
247 else 213 else
248 pmsg->outdev_name[0] = '\0'; 214 pmsg->outdev_name[0] = '\0';
249 215
250 if (entry->info->indev && entry->skb->dev) { 216 if (entry->indev && entry->skb->dev) {
251 pmsg->hw_type = entry->skb->dev->type; 217 pmsg->hw_type = entry->skb->dev->type;
252 pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr); 218 pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr);
253 } 219 }
@@ -268,28 +234,17 @@ nlmsg_failure:
268} 234}
269 235
270static int 236static int
271ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, 237ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
272 unsigned int queuenum, void *data)
273{ 238{
274 int status = -EINVAL; 239 int status = -EINVAL;
275 struct sk_buff *nskb; 240 struct sk_buff *nskb;
276 struct ipq_queue_entry *entry;
277 241
278 if (copy_mode == IPQ_COPY_NONE) 242 if (copy_mode == IPQ_COPY_NONE)
279 return -EAGAIN; 243 return -EAGAIN;
280 244
281 entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
282 if (entry == NULL) {
283 printk(KERN_ERR "ip6_queue: OOM in ipq_enqueue_packet()\n");
284 return -ENOMEM;
285 }
286
287 entry->info = info;
288 entry->skb = skb;
289
290 nskb = ipq_build_packet_message(entry, &status); 245 nskb = ipq_build_packet_message(entry, &status);
291 if (nskb == NULL) 246 if (nskb == NULL)
292 goto err_out_free; 247 return status;
293 248
294 write_lock_bh(&queue_lock); 249 write_lock_bh(&queue_lock);
295 250
@@ -323,14 +278,11 @@ err_out_free_nskb:
323 278
324err_out_unlock: 279err_out_unlock:
325 write_unlock_bh(&queue_lock); 280 write_unlock_bh(&queue_lock);
326
327err_out_free:
328 kfree(entry);
329 return status; 281 return status;
330} 282}
331 283
332static int 284static int
333ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) 285ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
334{ 286{
335 int diff; 287 int diff;
336 int err; 288 int err;
@@ -365,21 +317,15 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
365 return 0; 317 return 0;
366} 318}
367 319
368static inline int
369id_cmp(struct ipq_queue_entry *e, unsigned long id)
370{
371 return (id == (unsigned long )e);
372}
373
374static int 320static int
375ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) 321ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
376{ 322{
377 struct ipq_queue_entry *entry; 323 struct nf_queue_entry *entry;
378 324
379 if (vmsg->value > NF_MAX_VERDICT) 325 if (vmsg->value > NF_MAX_VERDICT)
380 return -EINVAL; 326 return -EINVAL;
381 327
382 entry = ipq_find_dequeue_entry(id_cmp, vmsg->id); 328 entry = ipq_find_dequeue_entry(vmsg->id);
383 if (entry == NULL) 329 if (entry == NULL)
384 return -ENOENT; 330 return -ENOENT;
385 else { 331 else {
@@ -389,7 +335,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
389 if (ipq_mangle_ipv6(vmsg, entry) < 0) 335 if (ipq_mangle_ipv6(vmsg, entry) < 0)
390 verdict = NF_DROP; 336 verdict = NF_DROP;
391 337
392 ipq_issue_verdict(entry, verdict); 338 nf_reinject(entry, verdict);
393 return 0; 339 return 0;
394 } 340 }
395} 341}
@@ -434,26 +380,32 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg,
434} 380}
435 381
436static int 382static int
437dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex) 383dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
438{ 384{
439 if (entry->info->indev) 385 if (entry->indev)
440 if (entry->info->indev->ifindex == ifindex) 386 if (entry->indev->ifindex == ifindex)
441 return 1; 387 return 1;
442 388
443 if (entry->info->outdev) 389 if (entry->outdev)
444 if (entry->info->outdev->ifindex == ifindex) 390 if (entry->outdev->ifindex == ifindex)
445 return 1; 391 return 1;
446 392#ifdef CONFIG_BRIDGE_NETFILTER
393 if (entry->skb->nf_bridge) {
394 if (entry->skb->nf_bridge->physindev &&
395 entry->skb->nf_bridge->physindev->ifindex == ifindex)
396 return 1;
397 if (entry->skb->nf_bridge->physoutdev &&
398 entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
399 return 1;
400 }
401#endif
447 return 0; 402 return 0;
448} 403}
449 404
450static void 405static void
451ipq_dev_drop(int ifindex) 406ipq_dev_drop(int ifindex)
452{ 407{
453 struct ipq_queue_entry *entry; 408 ipq_flush(dev_cmp, ifindex);
454
455 while ((entry = ipq_find_dequeue_entry(dev_cmp, ifindex)) != NULL)
456 ipq_issue_verdict(entry, NF_DROP);
457} 409}
458 410
459#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) 411#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
@@ -577,26 +529,6 @@ static ctl_table ipq_table[] = {
577 { .ctl_name = 0 } 529 { .ctl_name = 0 }
578}; 530};
579 531
580static ctl_table ipq_dir_table[] = {
581 {
582 .ctl_name = NET_IPV6,
583 .procname = "ipv6",
584 .mode = 0555,
585 .child = ipq_table
586 },
587 { .ctl_name = 0 }
588};
589
590static ctl_table ipq_root_table[] = {
591 {
592 .ctl_name = CTL_NET,
593 .procname = "net",
594 .mode = 0555,
595 .child = ipq_dir_table
596 },
597 { .ctl_name = 0 }
598};
599
600static int ip6_queue_show(struct seq_file *m, void *v) 532static int ip6_queue_show(struct seq_file *m, void *v)
601{ 533{
602 read_lock_bh(&queue_lock); 534 read_lock_bh(&queue_lock);
@@ -634,7 +566,7 @@ static const struct file_operations ip6_queue_proc_fops = {
634 .owner = THIS_MODULE, 566 .owner = THIS_MODULE,
635}; 567};
636 568
637static struct nf_queue_handler nfqh = { 569static const struct nf_queue_handler nfqh = {
638 .name = "ip6_queue", 570 .name = "ip6_queue",
639 .outfn = &ipq_enqueue_packet, 571 .outfn = &ipq_enqueue_packet,
640}; 572};
@@ -662,7 +594,7 @@ static int __init ip6_queue_init(void)
662 } 594 }
663 595
664 register_netdevice_notifier(&ipq_dev_notifier); 596 register_netdevice_notifier(&ipq_dev_notifier);
665 ipq_sysctl_header = register_sysctl_table(ipq_root_table); 597 ipq_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path, ipq_table);
666 598
667 status = nf_register_queue_handler(PF_INET6, &nfqh); 599 status = nf_register_queue_handler(PF_INET6, &nfqh);
668 if (status < 0) { 600 if (status < 0) {
@@ -677,7 +609,7 @@ cleanup_sysctl:
677 proc_net_remove(&init_net, IPQ_PROC_FS_NAME); 609 proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
678 610
679cleanup_ipqnl: 611cleanup_ipqnl:
680 sock_release(ipqnl->sk_socket); 612 netlink_kernel_release(ipqnl);
681 mutex_lock(&ipqnl_mutex); 613 mutex_lock(&ipqnl_mutex);
682 mutex_unlock(&ipqnl_mutex); 614 mutex_unlock(&ipqnl_mutex);
683 615
@@ -690,13 +622,13 @@ static void __exit ip6_queue_fini(void)
690{ 622{
691 nf_unregister_queue_handlers(&nfqh); 623 nf_unregister_queue_handlers(&nfqh);
692 synchronize_net(); 624 synchronize_net();
693 ipq_flush(NF_DROP); 625 ipq_flush(NULL, 0);
694 626
695 unregister_sysctl_table(ipq_sysctl_header); 627 unregister_sysctl_table(ipq_sysctl_header);
696 unregister_netdevice_notifier(&ipq_dev_notifier); 628 unregister_netdevice_notifier(&ipq_dev_notifier);
697 proc_net_remove(&init_net, IPQ_PROC_FS_NAME); 629 proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
698 630
699 sock_release(ipqnl->sk_socket); 631 netlink_kernel_release(ipqnl);
700 mutex_lock(&ipqnl_mutex); 632 mutex_lock(&ipqnl_mutex);
701 mutex_unlock(&ipqnl_mutex); 633 mutex_unlock(&ipqnl_mutex);
702 634
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index acaba1537931..dd7860fea61f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -19,21 +19,21 @@
19#include <linux/poison.h> 19#include <linux/poison.h>
20#include <linux/icmpv6.h> 20#include <linux/icmpv6.h>
21#include <net/ipv6.h> 21#include <net/ipv6.h>
22#include <net/compat.h>
22#include <asm/uaccess.h> 23#include <asm/uaccess.h>
23#include <linux/mutex.h> 24#include <linux/mutex.h>
24#include <linux/proc_fs.h> 25#include <linux/proc_fs.h>
26#include <linux/err.h>
25#include <linux/cpumask.h> 27#include <linux/cpumask.h>
26 28
27#include <linux/netfilter_ipv6/ip6_tables.h> 29#include <linux/netfilter_ipv6/ip6_tables.h>
28#include <linux/netfilter/x_tables.h> 30#include <linux/netfilter/x_tables.h>
31#include <net/netfilter/nf_log.h>
29 32
30MODULE_LICENSE("GPL"); 33MODULE_LICENSE("GPL");
31MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 34MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
32MODULE_DESCRIPTION("IPv6 packet filter"); 35MODULE_DESCRIPTION("IPv6 packet filter");
33 36
34#define IPV6_HDR_LEN (sizeof(struct ipv6hdr))
35#define IPV6_OPTHDR_LEN (sizeof(struct ipv6_opt_hdr))
36
37/*#define DEBUG_IP_FIREWALL*/ 37/*#define DEBUG_IP_FIREWALL*/
38/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */ 38/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
39/*#define DEBUG_IP_FIREWALL_USER*/ 39/*#define DEBUG_IP_FIREWALL_USER*/
@@ -76,12 +76,6 @@ do { \
76 76
77 Hence the start of any table is given by get_table() below. */ 77 Hence the start of any table is given by get_table() below. */
78 78
79#if 0
80#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
81#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
82#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
83#endif
84
85/* Check for an extension */ 79/* Check for an extension */
86int 80int
87ip6t_ext_hdr(u8 nexthdr) 81ip6t_ext_hdr(u8 nexthdr)
@@ -96,6 +90,7 @@ ip6t_ext_hdr(u8 nexthdr)
96} 90}
97 91
98/* Returns whether matches rule or not. */ 92/* Returns whether matches rule or not. */
93/* Performance critical - called for every packet */
99static inline bool 94static inline bool
100ip6_packet_match(const struct sk_buff *skb, 95ip6_packet_match(const struct sk_buff *skb,
101 const char *indev, 96 const char *indev,
@@ -108,7 +103,7 @@ ip6_packet_match(const struct sk_buff *skb,
108 unsigned long ret; 103 unsigned long ret;
109 const struct ipv6hdr *ipv6 = ipv6_hdr(skb); 104 const struct ipv6hdr *ipv6 = ipv6_hdr(skb);
110 105
111#define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg)) 106#define FWINV(bool, invflg) ((bool) ^ !!(ip6info->invflags & (invflg)))
112 107
113 if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk, 108 if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk,
114 &ip6info->src), IP6T_INV_SRCIP) 109 &ip6info->src), IP6T_INV_SRCIP)
@@ -188,7 +183,7 @@ ip6_packet_match(const struct sk_buff *skb,
188} 183}
189 184
190/* should be ip6 safe */ 185/* should be ip6 safe */
191static inline bool 186static bool
192ip6_checkentry(const struct ip6t_ip6 *ipv6) 187ip6_checkentry(const struct ip6t_ip6 *ipv6)
193{ 188{
194 if (ipv6->flags & ~IP6T_F_MASK) { 189 if (ipv6->flags & ~IP6T_F_MASK) {
@@ -218,8 +213,9 @@ ip6t_error(struct sk_buff *skb,
218 return NF_DROP; 213 return NF_DROP;
219} 214}
220 215
221static inline 216/* Performance critical - called for every packet */
222bool do_match(struct ip6t_entry_match *m, 217static inline bool
218do_match(struct ip6t_entry_match *m,
223 const struct sk_buff *skb, 219 const struct sk_buff *skb,
224 const struct net_device *in, 220 const struct net_device *in,
225 const struct net_device *out, 221 const struct net_device *out,
@@ -242,6 +238,7 @@ get_entry(void *base, unsigned int offset)
242} 238}
243 239
244/* All zeroes == unconditional rule. */ 240/* All zeroes == unconditional rule. */
241/* Mildly perf critical (only if packet tracing is on) */
245static inline int 242static inline int
246unconditional(const struct ip6t_ip6 *ipv6) 243unconditional(const struct ip6t_ip6 *ipv6)
247{ 244{
@@ -257,12 +254,12 @@ unconditional(const struct ip6t_ip6 *ipv6)
257#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 254#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
258 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 255 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
259/* This cries for unification! */ 256/* This cries for unification! */
260static const char *hooknames[] = { 257static const char *const hooknames[] = {
261 [NF_IP6_PRE_ROUTING] = "PREROUTING", 258 [NF_INET_PRE_ROUTING] = "PREROUTING",
262 [NF_IP6_LOCAL_IN] = "INPUT", 259 [NF_INET_LOCAL_IN] = "INPUT",
263 [NF_IP6_FORWARD] = "FORWARD", 260 [NF_INET_FORWARD] = "FORWARD",
264 [NF_IP6_LOCAL_OUT] = "OUTPUT", 261 [NF_INET_LOCAL_OUT] = "OUTPUT",
265 [NF_IP6_POST_ROUTING] = "POSTROUTING", 262 [NF_INET_POST_ROUTING] = "POSTROUTING",
266}; 263};
267 264
268enum nf_ip_trace_comments { 265enum nf_ip_trace_comments {
@@ -271,7 +268,7 @@ enum nf_ip_trace_comments {
271 NF_IP6_TRACE_COMMENT_POLICY, 268 NF_IP6_TRACE_COMMENT_POLICY,
272}; 269};
273 270
274static const char *comments[] = { 271static const char *const comments[] = {
275 [NF_IP6_TRACE_COMMENT_RULE] = "rule", 272 [NF_IP6_TRACE_COMMENT_RULE] = "rule",
276 [NF_IP6_TRACE_COMMENT_RETURN] = "return", 273 [NF_IP6_TRACE_COMMENT_RETURN] = "return",
277 [NF_IP6_TRACE_COMMENT_POLICY] = "policy", 274 [NF_IP6_TRACE_COMMENT_POLICY] = "policy",
@@ -287,6 +284,7 @@ static struct nf_loginfo trace_loginfo = {
287 }, 284 },
288}; 285};
289 286
287/* Mildly perf critical (only if packet tracing is on) */
290static inline int 288static inline int
291get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e, 289get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e,
292 char *hookname, char **chainname, 290 char *hookname, char **chainname,
@@ -378,8 +376,8 @@ ip6t_do_table(struct sk_buff *skb,
378 * match it. */ 376 * match it. */
379 377
380 read_lock_bh(&table->lock); 378 read_lock_bh(&table->lock);
381 private = table->private;
382 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 379 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
380 private = table->private;
383 table_base = (void *)private->entries[smp_processor_id()]; 381 table_base = (void *)private->entries[smp_processor_id()];
384 e = get_entry(table_base, private->hook_entry[hook]); 382 e = get_entry(table_base, private->hook_entry[hook]);
385 383
@@ -399,9 +397,8 @@ ip6t_do_table(struct sk_buff *skb,
399 goto no_match; 397 goto no_match;
400 398
401 ADD_COUNTER(e->counters, 399 ADD_COUNTER(e->counters,
402 ntohs(ipv6_hdr(skb)->payload_len) 400 ntohs(ipv6_hdr(skb)->payload_len) +
403 + IPV6_HDR_LEN, 401 sizeof(struct ipv6hdr), 1);
404 1);
405 402
406 t = ip6t_get_target(e); 403 t = ip6t_get_target(e);
407 IP_NF_ASSERT(t->u.kernel.target); 404 IP_NF_ASSERT(t->u.kernel.target);
@@ -502,11 +499,9 @@ mark_source_chains(struct xt_table_info *newinfo,
502 499
503 /* No recursion; use packet counter to save back ptrs (reset 500 /* No recursion; use packet counter to save back ptrs (reset
504 to 0 as we leave), and comefrom to save source hook bitmask */ 501 to 0 as we leave), and comefrom to save source hook bitmask */
505 for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) { 502 for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
506 unsigned int pos = newinfo->hook_entry[hook]; 503 unsigned int pos = newinfo->hook_entry[hook];
507 struct ip6t_entry *e 504 struct ip6t_entry *e = (struct ip6t_entry *)(entry0 + pos);
508 = (struct ip6t_entry *)(entry0 + pos);
509 int visited = e->comefrom & (1 << hook);
510 505
511 if (!(valid_hooks & (1 << hook))) 506 if (!(valid_hooks & (1 << hook)))
512 continue; 507 continue;
@@ -517,14 +512,14 @@ mark_source_chains(struct xt_table_info *newinfo,
517 for (;;) { 512 for (;;) {
518 struct ip6t_standard_target *t 513 struct ip6t_standard_target *t
519 = (void *)ip6t_get_target(e); 514 = (void *)ip6t_get_target(e);
515 int visited = e->comefrom & (1 << hook);
520 516
521 if (e->comefrom & (1 << NF_IP6_NUMHOOKS)) { 517 if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
522 printk("iptables: loop hook %u pos %u %08X.\n", 518 printk("iptables: loop hook %u pos %u %08X.\n",
523 hook, pos, e->comefrom); 519 hook, pos, e->comefrom);
524 return 0; 520 return 0;
525 } 521 }
526 e->comefrom 522 e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
527 |= ((1 << hook) | (1 << NF_IP6_NUMHOOKS));
528 523
529 /* Unconditional return/END. */ 524 /* Unconditional return/END. */
530 if ((e->target_offset == sizeof(struct ip6t_entry) 525 if ((e->target_offset == sizeof(struct ip6t_entry)
@@ -544,10 +539,10 @@ mark_source_chains(struct xt_table_info *newinfo,
544 /* Return: backtrack through the last 539 /* Return: backtrack through the last
545 big jump. */ 540 big jump. */
546 do { 541 do {
547 e->comefrom ^= (1<<NF_IP6_NUMHOOKS); 542 e->comefrom ^= (1<<NF_INET_NUMHOOKS);
548#ifdef DEBUG_IP_FIREWALL_USER 543#ifdef DEBUG_IP_FIREWALL_USER
549 if (e->comefrom 544 if (e->comefrom
550 & (1 << NF_IP6_NUMHOOKS)) { 545 & (1 << NF_INET_NUMHOOKS)) {
551 duprintf("Back unset " 546 duprintf("Back unset "
552 "on hook %u " 547 "on hook %u "
553 "rule %u\n", 548 "rule %u\n",
@@ -604,7 +599,7 @@ mark_source_chains(struct xt_table_info *newinfo,
604 return 1; 599 return 1;
605} 600}
606 601
607static inline int 602static int
608cleanup_match(struct ip6t_entry_match *m, unsigned int *i) 603cleanup_match(struct ip6t_entry_match *m, unsigned int *i)
609{ 604{
610 if (i && (*i)-- == 0) 605 if (i && (*i)-- == 0)
@@ -616,102 +611,135 @@ cleanup_match(struct ip6t_entry_match *m, unsigned int *i)
616 return 0; 611 return 0;
617} 612}
618 613
619static inline int 614static int
620check_match(struct ip6t_entry_match *m, 615check_entry(struct ip6t_entry *e, const char *name)
621 const char *name, 616{
622 const struct ip6t_ip6 *ipv6, 617 struct ip6t_entry_target *t;
623 unsigned int hookmask, 618
624 unsigned int *i) 619 if (!ip6_checkentry(&e->ipv6)) {
620 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
621 return -EINVAL;
622 }
623
624 if (e->target_offset + sizeof(struct ip6t_entry_target) >
625 e->next_offset)
626 return -EINVAL;
627
628 t = ip6t_get_target(e);
629 if (e->target_offset + t->u.target_size > e->next_offset)
630 return -EINVAL;
631
632 return 0;
633}
634
635static int check_match(struct ip6t_entry_match *m, const char *name,
636 const struct ip6t_ip6 *ipv6,
637 unsigned int hookmask, unsigned int *i)
638{
639 struct xt_match *match;
640 int ret;
641
642 match = m->u.kernel.match;
643 ret = xt_check_match(match, AF_INET6, m->u.match_size - sizeof(*m),
644 name, hookmask, ipv6->proto,
645 ipv6->invflags & IP6T_INV_PROTO);
646 if (!ret && m->u.kernel.match->checkentry
647 && !m->u.kernel.match->checkentry(name, ipv6, match, m->data,
648 hookmask)) {
649 duprintf("ip_tables: check failed for `%s'.\n",
650 m->u.kernel.match->name);
651 ret = -EINVAL;
652 }
653 if (!ret)
654 (*i)++;
655 return ret;
656}
657
658static int
659find_check_match(struct ip6t_entry_match *m,
660 const char *name,
661 const struct ip6t_ip6 *ipv6,
662 unsigned int hookmask,
663 unsigned int *i)
625{ 664{
626 struct xt_match *match; 665 struct xt_match *match;
627 int ret; 666 int ret;
628 667
629 match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name, 668 match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name,
630 m->u.user.revision), 669 m->u.user.revision),
631 "ip6t_%s", m->u.user.name); 670 "ip6t_%s", m->u.user.name);
632 if (IS_ERR(match) || !match) { 671 if (IS_ERR(match) || !match) {
633 duprintf("check_match: `%s' not found\n", m->u.user.name); 672 duprintf("find_check_match: `%s' not found\n", m->u.user.name);
634 return match ? PTR_ERR(match) : -ENOENT; 673 return match ? PTR_ERR(match) : -ENOENT;
635 } 674 }
636 m->u.kernel.match = match; 675 m->u.kernel.match = match;
637 676
638 ret = xt_check_match(match, AF_INET6, m->u.match_size - sizeof(*m), 677 ret = check_match(m, name, ipv6, hookmask, i);
639 name, hookmask, ipv6->proto,
640 ipv6->invflags & IP6T_INV_PROTO);
641 if (ret) 678 if (ret)
642 goto err; 679 goto err;
643 680
644 if (m->u.kernel.match->checkentry
645 && !m->u.kernel.match->checkentry(name, ipv6, match, m->data,
646 hookmask)) {
647 duprintf("ip_tables: check failed for `%s'.\n",
648 m->u.kernel.match->name);
649 ret = -EINVAL;
650 goto err;
651 }
652
653 (*i)++;
654 return 0; 681 return 0;
655err: 682err:
656 module_put(m->u.kernel.match->me); 683 module_put(m->u.kernel.match->me);
657 return ret; 684 return ret;
658} 685}
659 686
660static struct xt_target ip6t_standard_target; 687static int check_target(struct ip6t_entry *e, const char *name)
661
662static inline int
663check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
664 unsigned int *i)
665{ 688{
666 struct ip6t_entry_target *t; 689 struct ip6t_entry_target *t;
667 struct xt_target *target; 690 struct xt_target *target;
668 int ret; 691 int ret;
669 unsigned int j;
670 692
671 if (!ip6_checkentry(&e->ipv6)) { 693 t = ip6t_get_target(e);
672 duprintf("ip_tables: ip check failed %p %s.\n", e, name); 694 target = t->u.kernel.target;
673 return -EINVAL; 695 ret = xt_check_target(target, AF_INET6, t->u.target_size - sizeof(*t),
696 name, e->comefrom, e->ipv6.proto,
697 e->ipv6.invflags & IP6T_INV_PROTO);
698 if (!ret && t->u.kernel.target->checkentry
699 && !t->u.kernel.target->checkentry(name, e, target, t->data,
700 e->comefrom)) {
701 duprintf("ip_tables: check failed for `%s'.\n",
702 t->u.kernel.target->name);
703 ret = -EINVAL;
674 } 704 }
705 return ret;
706}
675 707
676 if (e->target_offset + sizeof(struct ip6t_entry_target) > 708static int
677 e->next_offset) 709find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
678 return -EINVAL; 710 unsigned int *i)
711{
712 struct ip6t_entry_target *t;
713 struct xt_target *target;
714 int ret;
715 unsigned int j;
716
717 ret = check_entry(e, name);
718 if (ret)
719 return ret;
679 720
680 j = 0; 721 j = 0;
681 ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6, e->comefrom, &j); 722 ret = IP6T_MATCH_ITERATE(e, find_check_match, name, &e->ipv6,
723 e->comefrom, &j);
682 if (ret != 0) 724 if (ret != 0)
683 goto cleanup_matches; 725 goto cleanup_matches;
684 726
685 t = ip6t_get_target(e); 727 t = ip6t_get_target(e);
686 ret = -EINVAL;
687 if (e->target_offset + t->u.target_size > e->next_offset)
688 goto cleanup_matches;
689 target = try_then_request_module(xt_find_target(AF_INET6, 728 target = try_then_request_module(xt_find_target(AF_INET6,
690 t->u.user.name, 729 t->u.user.name,
691 t->u.user.revision), 730 t->u.user.revision),
692 "ip6t_%s", t->u.user.name); 731 "ip6t_%s", t->u.user.name);
693 if (IS_ERR(target) || !target) { 732 if (IS_ERR(target) || !target) {
694 duprintf("check_entry: `%s' not found\n", t->u.user.name); 733 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
695 ret = target ? PTR_ERR(target) : -ENOENT; 734 ret = target ? PTR_ERR(target) : -ENOENT;
696 goto cleanup_matches; 735 goto cleanup_matches;
697 } 736 }
698 t->u.kernel.target = target; 737 t->u.kernel.target = target;
699 738
700 ret = xt_check_target(target, AF_INET6, t->u.target_size - sizeof(*t), 739 ret = check_target(e, name);
701 name, e->comefrom, e->ipv6.proto,
702 e->ipv6.invflags & IP6T_INV_PROTO);
703 if (ret) 740 if (ret)
704 goto err; 741 goto err;
705 742
706 if (t->u.kernel.target->checkentry
707 && !t->u.kernel.target->checkentry(name, e, target, t->data,
708 e->comefrom)) {
709 duprintf("ip_tables: check failed for `%s'.\n",
710 t->u.kernel.target->name);
711 ret = -EINVAL;
712 goto err;
713 }
714
715 (*i)++; 743 (*i)++;
716 return 0; 744 return 0;
717 err: 745 err:
@@ -721,7 +749,7 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
721 return ret; 749 return ret;
722} 750}
723 751
724static inline int 752static int
725check_entry_size_and_hooks(struct ip6t_entry *e, 753check_entry_size_and_hooks(struct ip6t_entry *e,
726 struct xt_table_info *newinfo, 754 struct xt_table_info *newinfo,
727 unsigned char *base, 755 unsigned char *base,
@@ -746,7 +774,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
746 } 774 }
747 775
748 /* Check hooks & underflows */ 776 /* Check hooks & underflows */
749 for (h = 0; h < NF_IP6_NUMHOOKS; h++) { 777 for (h = 0; h < NF_INET_NUMHOOKS; h++) {
750 if ((unsigned char *)e - base == hook_entries[h]) 778 if ((unsigned char *)e - base == hook_entries[h])
751 newinfo->hook_entry[h] = hook_entries[h]; 779 newinfo->hook_entry[h] = hook_entries[h];
752 if ((unsigned char *)e - base == underflows[h]) 780 if ((unsigned char *)e - base == underflows[h])
@@ -764,7 +792,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
764 return 0; 792 return 0;
765} 793}
766 794
767static inline int 795static int
768cleanup_entry(struct ip6t_entry *e, unsigned int *i) 796cleanup_entry(struct ip6t_entry *e, unsigned int *i)
769{ 797{
770 struct ip6t_entry_target *t; 798 struct ip6t_entry_target *t;
@@ -800,7 +828,7 @@ translate_table(const char *name,
800 newinfo->number = number; 828 newinfo->number = number;
801 829
802 /* Init all hooks to impossible value. */ 830 /* Init all hooks to impossible value. */
803 for (i = 0; i < NF_IP6_NUMHOOKS; i++) { 831 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
804 newinfo->hook_entry[i] = 0xFFFFFFFF; 832 newinfo->hook_entry[i] = 0xFFFFFFFF;
805 newinfo->underflow[i] = 0xFFFFFFFF; 833 newinfo->underflow[i] = 0xFFFFFFFF;
806 } 834 }
@@ -824,7 +852,7 @@ translate_table(const char *name,
824 } 852 }
825 853
826 /* Check hooks all assigned */ 854 /* Check hooks all assigned */
827 for (i = 0; i < NF_IP6_NUMHOOKS; i++) { 855 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
828 /* Only hooks which are valid */ 856 /* Only hooks which are valid */
829 if (!(valid_hooks & (1 << i))) 857 if (!(valid_hooks & (1 << i)))
830 continue; 858 continue;
@@ -846,7 +874,7 @@ translate_table(const char *name,
846 /* Finally, each sanity check must pass */ 874 /* Finally, each sanity check must pass */
847 i = 0; 875 i = 0;
848 ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size, 876 ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
849 check_entry, name, size, &i); 877 find_check_entry, name, size, &i);
850 878
851 if (ret != 0) { 879 if (ret != 0) {
852 IP6T_ENTRY_ITERATE(entry0, newinfo->size, 880 IP6T_ENTRY_ITERATE(entry0, newinfo->size,
@@ -860,7 +888,7 @@ translate_table(const char *name,
860 memcpy(newinfo->entries[i], entry0, newinfo->size); 888 memcpy(newinfo->entries[i], entry0, newinfo->size);
861 } 889 }
862 890
863 return 0; 891 return ret;
864} 892}
865 893
866/* Gets counters. */ 894/* Gets counters. */
@@ -920,33 +948,49 @@ get_counters(const struct xt_table_info *t,
920 } 948 }
921} 949}
922 950
923static int 951static struct xt_counters *alloc_counters(struct xt_table *table)
924copy_entries_to_user(unsigned int total_size,
925 struct xt_table *table,
926 void __user *userptr)
927{ 952{
928 unsigned int off, num, countersize; 953 unsigned int countersize;
929 struct ip6t_entry *e;
930 struct xt_counters *counters; 954 struct xt_counters *counters;
931 struct xt_table_info *private = table->private; 955 struct xt_table_info *private = table->private;
932 int ret = 0;
933 void *loc_cpu_entry;
934 956
935 /* We need atomic snapshot of counters: rest doesn't change 957 /* We need atomic snapshot of counters: rest doesn't change
936 (other than comefrom, which userspace doesn't care 958 (other than comefrom, which userspace doesn't care
937 about). */ 959 about). */
938 countersize = sizeof(struct xt_counters) * private->number; 960 countersize = sizeof(struct xt_counters) * private->number;
939 counters = vmalloc(countersize); 961 counters = vmalloc_node(countersize, numa_node_id());
940 962
941 if (counters == NULL) 963 if (counters == NULL)
942 return -ENOMEM; 964 return ERR_PTR(-ENOMEM);
943 965
944 /* First, sum counters... */ 966 /* First, sum counters... */
945 write_lock_bh(&table->lock); 967 write_lock_bh(&table->lock);
946 get_counters(private, counters); 968 get_counters(private, counters);
947 write_unlock_bh(&table->lock); 969 write_unlock_bh(&table->lock);
948 970
949 /* choose the copy that is on ourc node/cpu */ 971 return counters;
972}
973
974static int
975copy_entries_to_user(unsigned int total_size,
976 struct xt_table *table,
977 void __user *userptr)
978{
979 unsigned int off, num;
980 struct ip6t_entry *e;
981 struct xt_counters *counters;
982 struct xt_table_info *private = table->private;
983 int ret = 0;
984 void *loc_cpu_entry;
985
986 counters = alloc_counters(table);
987 if (IS_ERR(counters))
988 return PTR_ERR(counters);
989
990 /* choose the copy that is on our node/cpu, ...
991 * This choice is lazy (because current thread is
992 * allowed to migrate to another cpu)
993 */
950 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 994 loc_cpu_entry = private->entries[raw_smp_processor_id()];
951 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { 995 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
952 ret = -EFAULT; 996 ret = -EFAULT;
@@ -1001,23 +1045,167 @@ copy_entries_to_user(unsigned int total_size,
1001 return ret; 1045 return ret;
1002} 1046}
1003 1047
1048#ifdef CONFIG_COMPAT
1049static void compat_standard_from_user(void *dst, void *src)
1050{
1051 int v = *(compat_int_t *)src;
1052
1053 if (v > 0)
1054 v += xt_compat_calc_jump(AF_INET6, v);
1055 memcpy(dst, &v, sizeof(v));
1056}
1057
1058static int compat_standard_to_user(void __user *dst, void *src)
1059{
1060 compat_int_t cv = *(int *)src;
1061
1062 if (cv > 0)
1063 cv -= xt_compat_calc_jump(AF_INET6, cv);
1064 return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
1065}
1066
1067static inline int
1068compat_calc_match(struct ip6t_entry_match *m, int *size)
1069{
1070 *size += xt_compat_match_offset(m->u.kernel.match);
1071 return 0;
1072}
1073
1074static int compat_calc_entry(struct ip6t_entry *e,
1075 const struct xt_table_info *info,
1076 void *base, struct xt_table_info *newinfo)
1077{
1078 struct ip6t_entry_target *t;
1079 unsigned int entry_offset;
1080 int off, i, ret;
1081
1082 off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
1083 entry_offset = (void *)e - base;
1084 IP6T_MATCH_ITERATE(e, compat_calc_match, &off);
1085 t = ip6t_get_target(e);
1086 off += xt_compat_target_offset(t->u.kernel.target);
1087 newinfo->size -= off;
1088 ret = xt_compat_add_offset(AF_INET6, entry_offset, off);
1089 if (ret)
1090 return ret;
1091
1092 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1093 if (info->hook_entry[i] &&
1094 (e < (struct ip6t_entry *)(base + info->hook_entry[i])))
1095 newinfo->hook_entry[i] -= off;
1096 if (info->underflow[i] &&
1097 (e < (struct ip6t_entry *)(base + info->underflow[i])))
1098 newinfo->underflow[i] -= off;
1099 }
1100 return 0;
1101}
1102
1103static int compat_table_info(const struct xt_table_info *info,
1104 struct xt_table_info *newinfo)
1105{
1106 void *loc_cpu_entry;
1107
1108 if (!newinfo || !info)
1109 return -EINVAL;
1110
1111 /* we dont care about newinfo->entries[] */
1112 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1113 newinfo->initial_entries = 0;
1114 loc_cpu_entry = info->entries[raw_smp_processor_id()];
1115 return IP6T_ENTRY_ITERATE(loc_cpu_entry, info->size,
1116 compat_calc_entry, info, loc_cpu_entry,
1117 newinfo);
1118}
1119#endif
1120
1121static int get_info(void __user *user, int *len, int compat)
1122{
1123 char name[IP6T_TABLE_MAXNAMELEN];
1124 struct xt_table *t;
1125 int ret;
1126
1127 if (*len != sizeof(struct ip6t_getinfo)) {
1128 duprintf("length %u != %zu\n", *len,
1129 sizeof(struct ip6t_getinfo));
1130 return -EINVAL;
1131 }
1132
1133 if (copy_from_user(name, user, sizeof(name)) != 0)
1134 return -EFAULT;
1135
1136 name[IP6T_TABLE_MAXNAMELEN-1] = '\0';
1137#ifdef CONFIG_COMPAT
1138 if (compat)
1139 xt_compat_lock(AF_INET6);
1140#endif
1141 t = try_then_request_module(xt_find_table_lock(AF_INET6, name),
1142 "ip6table_%s", name);
1143 if (t && !IS_ERR(t)) {
1144 struct ip6t_getinfo info;
1145 struct xt_table_info *private = t->private;
1146
1147#ifdef CONFIG_COMPAT
1148 if (compat) {
1149 struct xt_table_info tmp;
1150 ret = compat_table_info(private, &tmp);
1151 xt_compat_flush_offsets(AF_INET6);
1152 private = &tmp;
1153 }
1154#endif
1155 info.valid_hooks = t->valid_hooks;
1156 memcpy(info.hook_entry, private->hook_entry,
1157 sizeof(info.hook_entry));
1158 memcpy(info.underflow, private->underflow,
1159 sizeof(info.underflow));
1160 info.num_entries = private->number;
1161 info.size = private->size;
1162 strcpy(info.name, name);
1163
1164 if (copy_to_user(user, &info, *len) != 0)
1165 ret = -EFAULT;
1166 else
1167 ret = 0;
1168
1169 xt_table_unlock(t);
1170 module_put(t->me);
1171 } else
1172 ret = t ? PTR_ERR(t) : -ENOENT;
1173#ifdef CONFIG_COMPAT
1174 if (compat)
1175 xt_compat_unlock(AF_INET6);
1176#endif
1177 return ret;
1178}
1179
1004static int 1180static int
1005get_entries(const struct ip6t_get_entries *entries, 1181get_entries(struct ip6t_get_entries __user *uptr, int *len)
1006 struct ip6t_get_entries __user *uptr)
1007{ 1182{
1008 int ret; 1183 int ret;
1184 struct ip6t_get_entries get;
1009 struct xt_table *t; 1185 struct xt_table *t;
1010 1186
1011 t = xt_find_table_lock(AF_INET6, entries->name); 1187 if (*len < sizeof(get)) {
1188 duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
1189 return -EINVAL;
1190 }
1191 if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1192 return -EFAULT;
1193 if (*len != sizeof(struct ip6t_get_entries) + get.size) {
1194 duprintf("get_entries: %u != %zu\n",
1195 *len, sizeof(get) + get.size);
1196 return -EINVAL;
1197 }
1198
1199 t = xt_find_table_lock(AF_INET6, get.name);
1012 if (t && !IS_ERR(t)) { 1200 if (t && !IS_ERR(t)) {
1013 struct xt_table_info *private = t->private; 1201 struct xt_table_info *private = t->private;
1014 duprintf("t->private->number = %u\n", private->number); 1202 duprintf("t->private->number = %u\n", private->number);
1015 if (entries->size == private->size) 1203 if (get.size == private->size)
1016 ret = copy_entries_to_user(private->size, 1204 ret = copy_entries_to_user(private->size,
1017 t, uptr->entrytable); 1205 t, uptr->entrytable);
1018 else { 1206 else {
1019 duprintf("get_entries: I've got %u not %u!\n", 1207 duprintf("get_entries: I've got %u not %u!\n",
1020 private->size, entries->size); 1208 private->size, get.size);
1021 ret = -EINVAL; 1209 ret = -EINVAL;
1022 } 1210 }
1023 module_put(t->me); 1211 module_put(t->me);
@@ -1029,67 +1217,40 @@ get_entries(const struct ip6t_get_entries *entries,
1029} 1217}
1030 1218
1031static int 1219static int
1032do_replace(void __user *user, unsigned int len) 1220__do_replace(const char *name, unsigned int valid_hooks,
1221 struct xt_table_info *newinfo, unsigned int num_counters,
1222 void __user *counters_ptr)
1033{ 1223{
1034 int ret; 1224 int ret;
1035 struct ip6t_replace tmp;
1036 struct xt_table *t; 1225 struct xt_table *t;
1037 struct xt_table_info *newinfo, *oldinfo; 1226 struct xt_table_info *oldinfo;
1038 struct xt_counters *counters; 1227 struct xt_counters *counters;
1039 void *loc_cpu_entry, *loc_cpu_old_entry; 1228 void *loc_cpu_old_entry;
1040 1229
1041 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1230 ret = 0;
1042 return -EFAULT; 1231 counters = vmalloc_node(num_counters * sizeof(struct xt_counters),
1043 1232 numa_node_id());
1044 /* overflow check */
1045 if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
1046 SMP_CACHE_BYTES)
1047 return -ENOMEM;
1048 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1049 return -ENOMEM;
1050
1051 newinfo = xt_alloc_table_info(tmp.size);
1052 if (!newinfo)
1053 return -ENOMEM;
1054
1055 /* choose the copy that is on our node/cpu */
1056 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1057 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1058 tmp.size) != 0) {
1059 ret = -EFAULT;
1060 goto free_newinfo;
1061 }
1062
1063 counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
1064 if (!counters) { 1233 if (!counters) {
1065 ret = -ENOMEM; 1234 ret = -ENOMEM;
1066 goto free_newinfo; 1235 goto out;
1067 } 1236 }
1068 1237
1069 ret = translate_table(tmp.name, tmp.valid_hooks, 1238 t = try_then_request_module(xt_find_table_lock(AF_INET6, name),
1070 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, 1239 "ip6table_%s", name);
1071 tmp.hook_entry, tmp.underflow);
1072 if (ret != 0)
1073 goto free_newinfo_counters;
1074
1075 duprintf("ip_tables: Translated table\n");
1076
1077 t = try_then_request_module(xt_find_table_lock(AF_INET6, tmp.name),
1078 "ip6table_%s", tmp.name);
1079 if (!t || IS_ERR(t)) { 1240 if (!t || IS_ERR(t)) {
1080 ret = t ? PTR_ERR(t) : -ENOENT; 1241 ret = t ? PTR_ERR(t) : -ENOENT;
1081 goto free_newinfo_counters_untrans; 1242 goto free_newinfo_counters_untrans;
1082 } 1243 }
1083 1244
1084 /* You lied! */ 1245 /* You lied! */
1085 if (tmp.valid_hooks != t->valid_hooks) { 1246 if (valid_hooks != t->valid_hooks) {
1086 duprintf("Valid hook crap: %08X vs %08X\n", 1247 duprintf("Valid hook crap: %08X vs %08X\n",
1087 tmp.valid_hooks, t->valid_hooks); 1248 valid_hooks, t->valid_hooks);
1088 ret = -EINVAL; 1249 ret = -EINVAL;
1089 goto put_module; 1250 goto put_module;
1090 } 1251 }
1091 1252
1092 oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret); 1253 oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
1093 if (!oldinfo) 1254 if (!oldinfo)
1094 goto put_module; 1255 goto put_module;
1095 1256
@@ -1107,10 +1268,11 @@ do_replace(void __user *user, unsigned int len)
1107 get_counters(oldinfo, counters); 1268 get_counters(oldinfo, counters);
1108 /* Decrease module usage counts and free resource */ 1269 /* Decrease module usage counts and free resource */
1109 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; 1270 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1110 IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL); 1271 IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
1272 NULL);
1111 xt_free_table_info(oldinfo); 1273 xt_free_table_info(oldinfo);
1112 if (copy_to_user(tmp.counters, counters, 1274 if (copy_to_user(counters_ptr, counters,
1113 sizeof(struct xt_counters) * tmp.num_counters) != 0) 1275 sizeof(struct xt_counters) * num_counters) != 0)
1114 ret = -EFAULT; 1276 ret = -EFAULT;
1115 vfree(counters); 1277 vfree(counters);
1116 xt_table_unlock(t); 1278 xt_table_unlock(t);
@@ -1120,9 +1282,54 @@ do_replace(void __user *user, unsigned int len)
1120 module_put(t->me); 1282 module_put(t->me);
1121 xt_table_unlock(t); 1283 xt_table_unlock(t);
1122 free_newinfo_counters_untrans: 1284 free_newinfo_counters_untrans:
1123 IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
1124 free_newinfo_counters:
1125 vfree(counters); 1285 vfree(counters);
1286 out:
1287 return ret;
1288}
1289
1290static int
1291do_replace(void __user *user, unsigned int len)
1292{
1293 int ret;
1294 struct ip6t_replace tmp;
1295 struct xt_table_info *newinfo;
1296 void *loc_cpu_entry;
1297
1298 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1299 return -EFAULT;
1300
1301 /* overflow check */
1302 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1303 return -ENOMEM;
1304
1305 newinfo = xt_alloc_table_info(tmp.size);
1306 if (!newinfo)
1307 return -ENOMEM;
1308
1309 /* choose the copy that is on our node/cpu */
1310 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1311 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1312 tmp.size) != 0) {
1313 ret = -EFAULT;
1314 goto free_newinfo;
1315 }
1316
1317 ret = translate_table(tmp.name, tmp.valid_hooks,
1318 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1319 tmp.hook_entry, tmp.underflow);
1320 if (ret != 0)
1321 goto free_newinfo;
1322
1323 duprintf("ip_tables: Translated table\n");
1324
1325 ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo,
1326 tmp.num_counters, tmp.counters);
1327 if (ret)
1328 goto free_newinfo_untrans;
1329 return 0;
1330
1331 free_newinfo_untrans:
1332 IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1126 free_newinfo: 1333 free_newinfo:
1127 xt_free_table_info(newinfo); 1334 xt_free_table_info(newinfo);
1128 return ret; 1335 return ret;
@@ -1151,31 +1358,59 @@ add_counter_to_entry(struct ip6t_entry *e,
1151} 1358}
1152 1359
1153static int 1360static int
1154do_add_counters(void __user *user, unsigned int len) 1361do_add_counters(void __user *user, unsigned int len, int compat)
1155{ 1362{
1156 unsigned int i; 1363 unsigned int i;
1157 struct xt_counters_info tmp, *paddc; 1364 struct xt_counters_info tmp;
1158 struct xt_table_info *private; 1365 struct xt_counters *paddc;
1366 unsigned int num_counters;
1367 char *name;
1368 int size;
1369 void *ptmp;
1159 struct xt_table *t; 1370 struct xt_table *t;
1371 struct xt_table_info *private;
1160 int ret = 0; 1372 int ret = 0;
1161 void *loc_cpu_entry; 1373 void *loc_cpu_entry;
1374#ifdef CONFIG_COMPAT
1375 struct compat_xt_counters_info compat_tmp;
1162 1376
1163 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1377 if (compat) {
1378 ptmp = &compat_tmp;
1379 size = sizeof(struct compat_xt_counters_info);
1380 } else
1381#endif
1382 {
1383 ptmp = &tmp;
1384 size = sizeof(struct xt_counters_info);
1385 }
1386
1387 if (copy_from_user(ptmp, user, size) != 0)
1164 return -EFAULT; 1388 return -EFAULT;
1165 1389
1166 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters)) 1390#ifdef CONFIG_COMPAT
1391 if (compat) {
1392 num_counters = compat_tmp.num_counters;
1393 name = compat_tmp.name;
1394 } else
1395#endif
1396 {
1397 num_counters = tmp.num_counters;
1398 name = tmp.name;
1399 }
1400
1401 if (len != size + num_counters * sizeof(struct xt_counters))
1167 return -EINVAL; 1402 return -EINVAL;
1168 1403
1169 paddc = vmalloc(len); 1404 paddc = vmalloc_node(len - size, numa_node_id());
1170 if (!paddc) 1405 if (!paddc)
1171 return -ENOMEM; 1406 return -ENOMEM;
1172 1407
1173 if (copy_from_user(paddc, user, len) != 0) { 1408 if (copy_from_user(paddc, user + size, len - size) != 0) {
1174 ret = -EFAULT; 1409 ret = -EFAULT;
1175 goto free; 1410 goto free;
1176 } 1411 }
1177 1412
1178 t = xt_find_table_lock(AF_INET6, tmp.name); 1413 t = xt_find_table_lock(AF_INET6, name);
1179 if (!t || IS_ERR(t)) { 1414 if (!t || IS_ERR(t)) {
1180 ret = t ? PTR_ERR(t) : -ENOENT; 1415 ret = t ? PTR_ERR(t) : -ENOENT;
1181 goto free; 1416 goto free;
@@ -1183,18 +1418,18 @@ do_add_counters(void __user *user, unsigned int len)
1183 1418
1184 write_lock_bh(&t->lock); 1419 write_lock_bh(&t->lock);
1185 private = t->private; 1420 private = t->private;
1186 if (private->number != tmp.num_counters) { 1421 if (private->number != num_counters) {
1187 ret = -EINVAL; 1422 ret = -EINVAL;
1188 goto unlock_up_free; 1423 goto unlock_up_free;
1189 } 1424 }
1190 1425
1191 i = 0; 1426 i = 0;
1192 /* Choose the copy that is on our node */ 1427 /* Choose the copy that is on our node */
1193 loc_cpu_entry = private->entries[smp_processor_id()]; 1428 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1194 IP6T_ENTRY_ITERATE(loc_cpu_entry, 1429 IP6T_ENTRY_ITERATE(loc_cpu_entry,
1195 private->size, 1430 private->size,
1196 add_counter_to_entry, 1431 add_counter_to_entry,
1197 paddc->counters, 1432 paddc,
1198 &i); 1433 &i);
1199 unlock_up_free: 1434 unlock_up_free:
1200 write_unlock_bh(&t->lock); 1435 write_unlock_bh(&t->lock);
@@ -1206,8 +1441,433 @@ do_add_counters(void __user *user, unsigned int len)
1206 return ret; 1441 return ret;
1207} 1442}
1208 1443
1444#ifdef CONFIG_COMPAT
1445struct compat_ip6t_replace {
1446 char name[IP6T_TABLE_MAXNAMELEN];
1447 u32 valid_hooks;
1448 u32 num_entries;
1449 u32 size;
1450 u32 hook_entry[NF_INET_NUMHOOKS];
1451 u32 underflow[NF_INET_NUMHOOKS];
1452 u32 num_counters;
1453 compat_uptr_t counters; /* struct ip6t_counters * */
1454 struct compat_ip6t_entry entries[0];
1455};
1456
1209static int 1457static int
1210do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) 1458compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
1459 compat_uint_t *size, struct xt_counters *counters,
1460 unsigned int *i)
1461{
1462 struct ip6t_entry_target *t;
1463 struct compat_ip6t_entry __user *ce;
1464 u_int16_t target_offset, next_offset;
1465 compat_uint_t origsize;
1466 int ret;
1467
1468 ret = -EFAULT;
1469 origsize = *size;
1470 ce = (struct compat_ip6t_entry __user *)*dstptr;
1471 if (copy_to_user(ce, e, sizeof(struct ip6t_entry)))
1472 goto out;
1473
1474 if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i])))
1475 goto out;
1476
1477 *dstptr += sizeof(struct compat_ip6t_entry);
1478 *size -= sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
1479
1480 ret = IP6T_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size);
1481 target_offset = e->target_offset - (origsize - *size);
1482 if (ret)
1483 goto out;
1484 t = ip6t_get_target(e);
1485 ret = xt_compat_target_to_user(t, dstptr, size);
1486 if (ret)
1487 goto out;
1488 ret = -EFAULT;
1489 next_offset = e->next_offset - (origsize - *size);
1490 if (put_user(target_offset, &ce->target_offset))
1491 goto out;
1492 if (put_user(next_offset, &ce->next_offset))
1493 goto out;
1494
1495 (*i)++;
1496 return 0;
1497out:
1498 return ret;
1499}
1500
1501static int
1502compat_find_calc_match(struct ip6t_entry_match *m,
1503 const char *name,
1504 const struct ip6t_ip6 *ipv6,
1505 unsigned int hookmask,
1506 int *size, int *i)
1507{
1508 struct xt_match *match;
1509
1510 match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name,
1511 m->u.user.revision),
1512 "ip6t_%s", m->u.user.name);
1513 if (IS_ERR(match) || !match) {
1514 duprintf("compat_check_calc_match: `%s' not found\n",
1515 m->u.user.name);
1516 return match ? PTR_ERR(match) : -ENOENT;
1517 }
1518 m->u.kernel.match = match;
1519 *size += xt_compat_match_offset(match);
1520
1521 (*i)++;
1522 return 0;
1523}
1524
1525static int
1526compat_release_match(struct ip6t_entry_match *m, unsigned int *i)
1527{
1528 if (i && (*i)-- == 0)
1529 return 1;
1530
1531 module_put(m->u.kernel.match->me);
1532 return 0;
1533}
1534
1535static int
1536compat_release_entry(struct compat_ip6t_entry *e, unsigned int *i)
1537{
1538 struct ip6t_entry_target *t;
1539
1540 if (i && (*i)-- == 0)
1541 return 1;
1542
1543 /* Cleanup all matches */
1544 COMPAT_IP6T_MATCH_ITERATE(e, compat_release_match, NULL);
1545 t = compat_ip6t_get_target(e);
1546 module_put(t->u.kernel.target->me);
1547 return 0;
1548}
1549
1550static int
1551check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
1552 struct xt_table_info *newinfo,
1553 unsigned int *size,
1554 unsigned char *base,
1555 unsigned char *limit,
1556 unsigned int *hook_entries,
1557 unsigned int *underflows,
1558 unsigned int *i,
1559 const char *name)
1560{
1561 struct ip6t_entry_target *t;
1562 struct xt_target *target;
1563 unsigned int entry_offset;
1564 int ret, off, h, j;
1565
1566 duprintf("check_compat_entry_size_and_hooks %p\n", e);
1567 if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0
1568 || (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) {
1569 duprintf("Bad offset %p, limit = %p\n", e, limit);
1570 return -EINVAL;
1571 }
1572
1573 if (e->next_offset < sizeof(struct compat_ip6t_entry) +
1574 sizeof(struct compat_xt_entry_target)) {
1575 duprintf("checking: element %p size %u\n",
1576 e, e->next_offset);
1577 return -EINVAL;
1578 }
1579
1580 /* For purposes of check_entry casting the compat entry is fine */
1581 ret = check_entry((struct ip6t_entry *)e, name);
1582 if (ret)
1583 return ret;
1584
1585 off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
1586 entry_offset = (void *)e - (void *)base;
1587 j = 0;
1588 ret = COMPAT_IP6T_MATCH_ITERATE(e, compat_find_calc_match, name,
1589 &e->ipv6, e->comefrom, &off, &j);
1590 if (ret != 0)
1591 goto release_matches;
1592
1593 t = compat_ip6t_get_target(e);
1594 target = try_then_request_module(xt_find_target(AF_INET6,
1595 t->u.user.name,
1596 t->u.user.revision),
1597 "ip6t_%s", t->u.user.name);
1598 if (IS_ERR(target) || !target) {
1599 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1600 t->u.user.name);
1601 ret = target ? PTR_ERR(target) : -ENOENT;
1602 goto release_matches;
1603 }
1604 t->u.kernel.target = target;
1605
1606 off += xt_compat_target_offset(target);
1607 *size += off;
1608 ret = xt_compat_add_offset(AF_INET6, entry_offset, off);
1609 if (ret)
1610 goto out;
1611
1612 /* Check hooks & underflows */
1613 for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1614 if ((unsigned char *)e - base == hook_entries[h])
1615 newinfo->hook_entry[h] = hook_entries[h];
1616 if ((unsigned char *)e - base == underflows[h])
1617 newinfo->underflow[h] = underflows[h];
1618 }
1619
1620 /* Clear counters and comefrom */
1621 memset(&e->counters, 0, sizeof(e->counters));
1622 e->comefrom = 0;
1623
1624 (*i)++;
1625 return 0;
1626
1627out:
1628 module_put(t->u.kernel.target->me);
1629release_matches:
1630 IP6T_MATCH_ITERATE(e, compat_release_match, &j);
1631 return ret;
1632}
1633
1634static int
1635compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
1636 unsigned int *size, const char *name,
1637 struct xt_table_info *newinfo, unsigned char *base)
1638{
1639 struct ip6t_entry_target *t;
1640 struct xt_target *target;
1641 struct ip6t_entry *de;
1642 unsigned int origsize;
1643 int ret, h;
1644
1645 ret = 0;
1646 origsize = *size;
1647 de = (struct ip6t_entry *)*dstptr;
1648 memcpy(de, e, sizeof(struct ip6t_entry));
1649 memcpy(&de->counters, &e->counters, sizeof(e->counters));
1650
1651 *dstptr += sizeof(struct ip6t_entry);
1652 *size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
1653
1654 ret = COMPAT_IP6T_MATCH_ITERATE(e, xt_compat_match_from_user,
1655 dstptr, size);
1656 if (ret)
1657 return ret;
1658 de->target_offset = e->target_offset - (origsize - *size);
1659 t = compat_ip6t_get_target(e);
1660 target = t->u.kernel.target;
1661 xt_compat_target_from_user(t, dstptr, size);
1662
1663 de->next_offset = e->next_offset - (origsize - *size);
1664 for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1665 if ((unsigned char *)de - base < newinfo->hook_entry[h])
1666 newinfo->hook_entry[h] -= origsize - *size;
1667 if ((unsigned char *)de - base < newinfo->underflow[h])
1668 newinfo->underflow[h] -= origsize - *size;
1669 }
1670 return ret;
1671}
1672
1673static int compat_check_entry(struct ip6t_entry *e, const char *name,
1674 unsigned int *i)
1675{
1676 int j, ret;
1677
1678 j = 0;
1679 ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6,
1680 e->comefrom, &j);
1681 if (ret)
1682 goto cleanup_matches;
1683
1684 ret = check_target(e, name);
1685 if (ret)
1686 goto cleanup_matches;
1687
1688 (*i)++;
1689 return 0;
1690
1691 cleanup_matches:
1692 IP6T_MATCH_ITERATE(e, cleanup_match, &j);
1693 return ret;
1694}
1695
1696static int
1697translate_compat_table(const char *name,
1698 unsigned int valid_hooks,
1699 struct xt_table_info **pinfo,
1700 void **pentry0,
1701 unsigned int total_size,
1702 unsigned int number,
1703 unsigned int *hook_entries,
1704 unsigned int *underflows)
1705{
1706 unsigned int i, j;
1707 struct xt_table_info *newinfo, *info;
1708 void *pos, *entry0, *entry1;
1709 unsigned int size;
1710 int ret;
1711
1712 info = *pinfo;
1713 entry0 = *pentry0;
1714 size = total_size;
1715 info->number = number;
1716
1717 /* Init all hooks to impossible value. */
1718 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1719 info->hook_entry[i] = 0xFFFFFFFF;
1720 info->underflow[i] = 0xFFFFFFFF;
1721 }
1722
1723 duprintf("translate_compat_table: size %u\n", info->size);
1724 j = 0;
1725 xt_compat_lock(AF_INET6);
1726 /* Walk through entries, checking offsets. */
1727 ret = COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size,
1728 check_compat_entry_size_and_hooks,
1729 info, &size, entry0,
1730 entry0 + total_size,
1731 hook_entries, underflows, &j, name);
1732 if (ret != 0)
1733 goto out_unlock;
1734
1735 ret = -EINVAL;
1736 if (j != number) {
1737 duprintf("translate_compat_table: %u not %u entries\n",
1738 j, number);
1739 goto out_unlock;
1740 }
1741
1742 /* Check hooks all assigned */
1743 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1744 /* Only hooks which are valid */
1745 if (!(valid_hooks & (1 << i)))
1746 continue;
1747 if (info->hook_entry[i] == 0xFFFFFFFF) {
1748 duprintf("Invalid hook entry %u %u\n",
1749 i, hook_entries[i]);
1750 goto out_unlock;
1751 }
1752 if (info->underflow[i] == 0xFFFFFFFF) {
1753 duprintf("Invalid underflow %u %u\n",
1754 i, underflows[i]);
1755 goto out_unlock;
1756 }
1757 }
1758
1759 ret = -ENOMEM;
1760 newinfo = xt_alloc_table_info(size);
1761 if (!newinfo)
1762 goto out_unlock;
1763
1764 newinfo->number = number;
1765 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1766 newinfo->hook_entry[i] = info->hook_entry[i];
1767 newinfo->underflow[i] = info->underflow[i];
1768 }
1769 entry1 = newinfo->entries[raw_smp_processor_id()];
1770 pos = entry1;
1771 size = total_size;
1772 ret = COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size,
1773 compat_copy_entry_from_user,
1774 &pos, &size, name, newinfo, entry1);
1775 xt_compat_flush_offsets(AF_INET6);
1776 xt_compat_unlock(AF_INET6);
1777 if (ret)
1778 goto free_newinfo;
1779
1780 ret = -ELOOP;
1781 if (!mark_source_chains(newinfo, valid_hooks, entry1))
1782 goto free_newinfo;
1783
1784 i = 0;
1785 ret = IP6T_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
1786 name, &i);
1787 if (ret) {
1788 j -= i;
1789 COMPAT_IP6T_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
1790 compat_release_entry, &j);
1791 IP6T_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
1792 xt_free_table_info(newinfo);
1793 return ret;
1794 }
1795
1796 /* And one copy for every other CPU */
1797 for_each_possible_cpu(i)
1798 if (newinfo->entries[i] && newinfo->entries[i] != entry1)
1799 memcpy(newinfo->entries[i], entry1, newinfo->size);
1800
1801 *pinfo = newinfo;
1802 *pentry0 = entry1;
1803 xt_free_table_info(info);
1804 return 0;
1805
1806free_newinfo:
1807 xt_free_table_info(newinfo);
1808out:
1809 COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
1810 return ret;
1811out_unlock:
1812 xt_compat_flush_offsets(AF_INET6);
1813 xt_compat_unlock(AF_INET6);
1814 goto out;
1815}
1816
1817static int
1818compat_do_replace(void __user *user, unsigned int len)
1819{
1820 int ret;
1821 struct compat_ip6t_replace tmp;
1822 struct xt_table_info *newinfo;
1823 void *loc_cpu_entry;
1824
1825 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1826 return -EFAULT;
1827
1828 /* overflow check */
1829 if (tmp.size >= INT_MAX / num_possible_cpus())
1830 return -ENOMEM;
1831 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1832 return -ENOMEM;
1833
1834 newinfo = xt_alloc_table_info(tmp.size);
1835 if (!newinfo)
1836 return -ENOMEM;
1837
1838 /* choose the copy that is on our node/cpu */
1839 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1840 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1841 tmp.size) != 0) {
1842 ret = -EFAULT;
1843 goto free_newinfo;
1844 }
1845
1846 ret = translate_compat_table(tmp.name, tmp.valid_hooks,
1847 &newinfo, &loc_cpu_entry, tmp.size,
1848 tmp.num_entries, tmp.hook_entry,
1849 tmp.underflow);
1850 if (ret != 0)
1851 goto free_newinfo;
1852
1853 duprintf("compat_do_replace: Translated table\n");
1854
1855 ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo,
1856 tmp.num_counters, compat_ptr(tmp.counters));
1857 if (ret)
1858 goto free_newinfo_untrans;
1859 return 0;
1860
1861 free_newinfo_untrans:
1862 IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1863 free_newinfo:
1864 xt_free_table_info(newinfo);
1865 return ret;
1866}
1867
1868static int
1869compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user,
1870 unsigned int len)
1211{ 1871{
1212 int ret; 1872 int ret;
1213 1873
@@ -1216,11 +1876,11 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1216 1876
1217 switch (cmd) { 1877 switch (cmd) {
1218 case IP6T_SO_SET_REPLACE: 1878 case IP6T_SO_SET_REPLACE:
1219 ret = do_replace(user, len); 1879 ret = compat_do_replace(user, len);
1220 break; 1880 break;
1221 1881
1222 case IP6T_SO_SET_ADD_COUNTERS: 1882 case IP6T_SO_SET_ADD_COUNTERS:
1223 ret = do_add_counters(user, len); 1883 ret = do_add_counters(user, len, 1);
1224 break; 1884 break;
1225 1885
1226 default: 1886 default:
@@ -1231,75 +1891,155 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1231 return ret; 1891 return ret;
1232} 1892}
1233 1893
1894struct compat_ip6t_get_entries {
1895 char name[IP6T_TABLE_MAXNAMELEN];
1896 compat_uint_t size;
1897 struct compat_ip6t_entry entrytable[0];
1898};
1899
1234static int 1900static int
1235do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) 1901compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1902 void __user *userptr)
1903{
1904 struct xt_counters *counters;
1905 struct xt_table_info *private = table->private;
1906 void __user *pos;
1907 unsigned int size;
1908 int ret = 0;
1909 void *loc_cpu_entry;
1910 unsigned int i = 0;
1911
1912 counters = alloc_counters(table);
1913 if (IS_ERR(counters))
1914 return PTR_ERR(counters);
1915
1916 /* choose the copy that is on our node/cpu, ...
1917 * This choice is lazy (because current thread is
1918 * allowed to migrate to another cpu)
1919 */
1920 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1921 pos = userptr;
1922 size = total_size;
1923 ret = IP6T_ENTRY_ITERATE(loc_cpu_entry, total_size,
1924 compat_copy_entry_to_user,
1925 &pos, &size, counters, &i);
1926
1927 vfree(counters);
1928 return ret;
1929}
1930
1931static int
1932compat_get_entries(struct compat_ip6t_get_entries __user *uptr, int *len)
1236{ 1933{
1237 int ret; 1934 int ret;
1935 struct compat_ip6t_get_entries get;
1936 struct xt_table *t;
1238 1937
1239 if (!capable(CAP_NET_ADMIN)) 1938 if (*len < sizeof(get)) {
1240 return -EPERM; 1939 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
1940 return -EINVAL;
1941 }
1241 1942
1242 switch (cmd) { 1943 if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1243 case IP6T_SO_GET_INFO: { 1944 return -EFAULT;
1244 char name[IP6T_TABLE_MAXNAMELEN];
1245 struct xt_table *t;
1246 1945
1247 if (*len != sizeof(struct ip6t_getinfo)) { 1946 if (*len != sizeof(struct compat_ip6t_get_entries) + get.size) {
1248 duprintf("length %u != %u\n", *len, 1947 duprintf("compat_get_entries: %u != %zu\n",
1249 sizeof(struct ip6t_getinfo)); 1948 *len, sizeof(get) + get.size);
1949 return -EINVAL;
1950 }
1951
1952 xt_compat_lock(AF_INET6);
1953 t = xt_find_table_lock(AF_INET6, get.name);
1954 if (t && !IS_ERR(t)) {
1955 struct xt_table_info *private = t->private;
1956 struct xt_table_info info;
1957 duprintf("t->private->number = %u\n", private->number);
1958 ret = compat_table_info(private, &info);
1959 if (!ret && get.size == info.size) {
1960 ret = compat_copy_entries_to_user(private->size,
1961 t, uptr->entrytable);
1962 } else if (!ret) {
1963 duprintf("compat_get_entries: I've got %u not %u!\n",
1964 private->size, get.size);
1250 ret = -EINVAL; 1965 ret = -EINVAL;
1251 break;
1252 } 1966 }
1967 xt_compat_flush_offsets(AF_INET6);
1968 module_put(t->me);
1969 xt_table_unlock(t);
1970 } else
1971 ret = t ? PTR_ERR(t) : -ENOENT;
1253 1972
1254 if (copy_from_user(name, user, sizeof(name)) != 0) { 1973 xt_compat_unlock(AF_INET6);
1255 ret = -EFAULT; 1974 return ret;
1256 break; 1975}
1257 } 1976
1258 name[IP6T_TABLE_MAXNAMELEN-1] = '\0'; 1977static int do_ip6t_get_ctl(struct sock *, int, void __user *, int *);
1259 1978
1260 t = try_then_request_module(xt_find_table_lock(AF_INET6, name), 1979static int
1261 "ip6table_%s", name); 1980compat_do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1262 if (t && !IS_ERR(t)) { 1981{
1263 struct ip6t_getinfo info; 1982 int ret;
1264 struct xt_table_info *private = t->private; 1983
1265 1984 if (!capable(CAP_NET_ADMIN))
1266 info.valid_hooks = t->valid_hooks; 1985 return -EPERM;
1267 memcpy(info.hook_entry, private->hook_entry, 1986
1268 sizeof(info.hook_entry)); 1987 switch (cmd) {
1269 memcpy(info.underflow, private->underflow, 1988 case IP6T_SO_GET_INFO:
1270 sizeof(info.underflow)); 1989 ret = get_info(user, len, 1);
1271 info.num_entries = private->number; 1990 break;
1272 info.size = private->size; 1991 case IP6T_SO_GET_ENTRIES:
1273 memcpy(info.name, name, sizeof(info.name)); 1992 ret = compat_get_entries(user, len);
1274 1993 break;
1275 if (copy_to_user(user, &info, *len) != 0) 1994 default:
1276 ret = -EFAULT; 1995 ret = do_ip6t_get_ctl(sk, cmd, user, len);
1277 else
1278 ret = 0;
1279 xt_table_unlock(t);
1280 module_put(t->me);
1281 } else
1282 ret = t ? PTR_ERR(t) : -ENOENT;
1283 } 1996 }
1284 break; 1997 return ret;
1998}
1999#endif
1285 2000
1286 case IP6T_SO_GET_ENTRIES: { 2001static int
1287 struct ip6t_get_entries get; 2002do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2003{
2004 int ret;
1288 2005
1289 if (*len < sizeof(get)) { 2006 if (!capable(CAP_NET_ADMIN))
1290 duprintf("get_entries: %u < %u\n", *len, sizeof(get)); 2007 return -EPERM;
1291 ret = -EINVAL; 2008
1292 } else if (copy_from_user(&get, user, sizeof(get)) != 0) { 2009 switch (cmd) {
1293 ret = -EFAULT; 2010 case IP6T_SO_SET_REPLACE:
1294 } else if (*len != sizeof(struct ip6t_get_entries) + get.size) { 2011 ret = do_replace(user, len);
1295 duprintf("get_entries: %u != %u\n", *len,
1296 sizeof(struct ip6t_get_entries) + get.size);
1297 ret = -EINVAL;
1298 } else
1299 ret = get_entries(&get, user);
1300 break; 2012 break;
2013
2014 case IP6T_SO_SET_ADD_COUNTERS:
2015 ret = do_add_counters(user, len, 0);
2016 break;
2017
2018 default:
2019 duprintf("do_ip6t_set_ctl: unknown request %i\n", cmd);
2020 ret = -EINVAL;
1301 } 2021 }
1302 2022
2023 return ret;
2024}
2025
2026static int
2027do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2028{
2029 int ret;
2030
2031 if (!capable(CAP_NET_ADMIN))
2032 return -EPERM;
2033
2034 switch (cmd) {
2035 case IP6T_SO_GET_INFO:
2036 ret = get_info(user, len, 0);
2037 break;
2038
2039 case IP6T_SO_GET_ENTRIES:
2040 ret = get_entries(user, len);
2041 break;
2042
1303 case IP6T_SO_GET_REVISION_MATCH: 2043 case IP6T_SO_GET_REVISION_MATCH:
1304 case IP6T_SO_GET_REVISION_TARGET: { 2044 case IP6T_SO_GET_REVISION_TARGET: {
1305 struct ip6t_get_revision rev; 2045 struct ip6t_get_revision rev;
@@ -1334,12 +2074,11 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1334 return ret; 2074 return ret;
1335} 2075}
1336 2076
1337int ip6t_register_table(struct xt_table *table, 2077int ip6t_register_table(struct xt_table *table, const struct ip6t_replace *repl)
1338 const struct ip6t_replace *repl)
1339{ 2078{
1340 int ret; 2079 int ret;
1341 struct xt_table_info *newinfo; 2080 struct xt_table_info *newinfo;
1342 static struct xt_table_info bootstrap 2081 struct xt_table_info bootstrap
1343 = { 0, 0, 0, { 0 }, { 0 }, { } }; 2082 = { 0, 0, 0, { 0 }, { 0 }, { } };
1344 void *loc_cpu_entry; 2083 void *loc_cpu_entry;
1345 2084
@@ -1347,7 +2086,7 @@ int ip6t_register_table(struct xt_table *table,
1347 if (!newinfo) 2086 if (!newinfo)
1348 return -ENOMEM; 2087 return -ENOMEM;
1349 2088
1350 /* choose the copy on our node/cpu */ 2089 /* choose the copy on our node/cpu, but dont care about preemption */
1351 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; 2090 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1352 memcpy(loc_cpu_entry, repl->entries, repl->size); 2091 memcpy(loc_cpu_entry, repl->entries, repl->size);
1353 2092
@@ -1403,17 +2142,18 @@ icmp6_match(const struct sk_buff *skb,
1403 unsigned int protoff, 2142 unsigned int protoff,
1404 bool *hotdrop) 2143 bool *hotdrop)
1405{ 2144{
1406 struct icmp6hdr _icmp, *ic; 2145 struct icmp6hdr _icmph, *ic;
1407 const struct ip6t_icmp *icmpinfo = matchinfo; 2146 const struct ip6t_icmp *icmpinfo = matchinfo;
1408 2147
1409 /* Must not be a fragment. */ 2148 /* Must not be a fragment. */
1410 if (offset) 2149 if (offset)
1411 return false; 2150 return false;
1412 2151
1413 ic = skb_header_pointer(skb, protoff, sizeof(_icmp), &_icmp); 2152 ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph);
1414 if (ic == NULL) { 2153 if (ic == NULL) {
1415 /* We've been asked to examine this packet, and we 2154 /* We've been asked to examine this packet, and we
1416 can't. Hence, no choice but to drop. */ 2155 * can't. Hence, no choice but to drop.
2156 */
1417 duprintf("Dropping evil ICMP tinygram.\n"); 2157 duprintf("Dropping evil ICMP tinygram.\n");
1418 *hotdrop = true; 2158 *hotdrop = true;
1419 return false; 2159 return false;
@@ -1445,6 +2185,11 @@ static struct xt_target ip6t_standard_target __read_mostly = {
1445 .name = IP6T_STANDARD_TARGET, 2185 .name = IP6T_STANDARD_TARGET,
1446 .targetsize = sizeof(int), 2186 .targetsize = sizeof(int),
1447 .family = AF_INET6, 2187 .family = AF_INET6,
2188#ifdef CONFIG_COMPAT
2189 .compatsize = sizeof(compat_int_t),
2190 .compat_from_user = compat_standard_from_user,
2191 .compat_to_user = compat_standard_to_user,
2192#endif
1448}; 2193};
1449 2194
1450static struct xt_target ip6t_error_target __read_mostly = { 2195static struct xt_target ip6t_error_target __read_mostly = {
@@ -1459,15 +2204,21 @@ static struct nf_sockopt_ops ip6t_sockopts = {
1459 .set_optmin = IP6T_BASE_CTL, 2204 .set_optmin = IP6T_BASE_CTL,
1460 .set_optmax = IP6T_SO_SET_MAX+1, 2205 .set_optmax = IP6T_SO_SET_MAX+1,
1461 .set = do_ip6t_set_ctl, 2206 .set = do_ip6t_set_ctl,
2207#ifdef CONFIG_COMPAT
2208 .compat_set = compat_do_ip6t_set_ctl,
2209#endif
1462 .get_optmin = IP6T_BASE_CTL, 2210 .get_optmin = IP6T_BASE_CTL,
1463 .get_optmax = IP6T_SO_GET_MAX+1, 2211 .get_optmax = IP6T_SO_GET_MAX+1,
1464 .get = do_ip6t_get_ctl, 2212 .get = do_ip6t_get_ctl,
2213#ifdef CONFIG_COMPAT
2214 .compat_get = compat_do_ip6t_get_ctl,
2215#endif
1465 .owner = THIS_MODULE, 2216 .owner = THIS_MODULE,
1466}; 2217};
1467 2218
1468static struct xt_match icmp6_matchstruct __read_mostly = { 2219static struct xt_match icmp6_matchstruct __read_mostly = {
1469 .name = "icmp6", 2220 .name = "icmp6",
1470 .match = &icmp6_match, 2221 .match = icmp6_match,
1471 .matchsize = sizeof(struct ip6t_icmp), 2222 .matchsize = sizeof(struct ip6t_icmp),
1472 .checkentry = icmp6_checkentry, 2223 .checkentry = icmp6_checkentry,
1473 .proto = IPPROTO_ICMPV6, 2224 .proto = IPPROTO_ICMPV6,
@@ -1516,6 +2267,7 @@ err1:
1516static void __exit ip6_tables_fini(void) 2267static void __exit ip6_tables_fini(void)
1517{ 2268{
1518 nf_unregister_sockopt(&ip6t_sockopts); 2269 nf_unregister_sockopt(&ip6t_sockopts);
2270
1519 xt_unregister_match(&icmp6_matchstruct); 2271 xt_unregister_match(&icmp6_matchstruct);
1520 xt_unregister_target(&ip6t_error_target); 2272 xt_unregister_target(&ip6t_error_target);
1521 xt_unregister_target(&ip6t_standard_target); 2273 xt_unregister_target(&ip6t_standard_target);
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index 9afc836fd454..d5f8fd5f29d3 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -15,15 +15,13 @@
15#include <linux/netfilter_ipv6/ip6t_HL.h> 15#include <linux/netfilter_ipv6/ip6t_HL.h>
16 16
17MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>"); 17MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
18MODULE_DESCRIPTION("IP6 tables Hop Limit modification module"); 18MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field modification target");
19MODULE_LICENSE("GPL"); 19MODULE_LICENSE("GPL");
20 20
21static unsigned int ip6t_hl_target(struct sk_buff *skb, 21static unsigned int
22 const struct net_device *in, 22hl_tg6(struct sk_buff *skb, const struct net_device *in,
23 const struct net_device *out, 23 const struct net_device *out, unsigned int hooknum,
24 unsigned int hooknum, 24 const struct xt_target *target, const void *targinfo)
25 const struct xt_target *target,
26 const void *targinfo)
27{ 25{
28 struct ipv6hdr *ip6h; 26 struct ipv6hdr *ip6h;
29 const struct ip6t_HL_info *info = targinfo; 27 const struct ip6t_HL_info *info = targinfo;
@@ -58,11 +56,10 @@ static unsigned int ip6t_hl_target(struct sk_buff *skb,
58 return XT_CONTINUE; 56 return XT_CONTINUE;
59} 57}
60 58
61static bool ip6t_hl_checkentry(const char *tablename, 59static bool
62 const void *entry, 60hl_tg6_check(const char *tablename, const void *entry,
63 const struct xt_target *target, 61 const struct xt_target *target, void *targinfo,
64 void *targinfo, 62 unsigned int hook_mask)
65 unsigned int hook_mask)
66{ 63{
67 const struct ip6t_HL_info *info = targinfo; 64 const struct ip6t_HL_info *info = targinfo;
68 65
@@ -79,25 +76,25 @@ static bool ip6t_hl_checkentry(const char *tablename,
79 return true; 76 return true;
80} 77}
81 78
82static struct xt_target ip6t_HL __read_mostly = { 79static struct xt_target hl_tg6_reg __read_mostly = {
83 .name = "HL", 80 .name = "HL",
84 .family = AF_INET6, 81 .family = AF_INET6,
85 .target = ip6t_hl_target, 82 .target = hl_tg6,
86 .targetsize = sizeof(struct ip6t_HL_info), 83 .targetsize = sizeof(struct ip6t_HL_info),
87 .table = "mangle", 84 .table = "mangle",
88 .checkentry = ip6t_hl_checkentry, 85 .checkentry = hl_tg6_check,
89 .me = THIS_MODULE 86 .me = THIS_MODULE
90}; 87};
91 88
92static int __init ip6t_hl_init(void) 89static int __init hl_tg6_init(void)
93{ 90{
94 return xt_register_target(&ip6t_HL); 91 return xt_register_target(&hl_tg6_reg);
95} 92}
96 93
97static void __exit ip6t_hl_fini(void) 94static void __exit hl_tg6_exit(void)
98{ 95{
99 xt_unregister_target(&ip6t_HL); 96 xt_unregister_target(&hl_tg6_reg);
100} 97}
101 98
102module_init(ip6t_hl_init); 99module_init(hl_tg6_init);
103module_exit(ip6t_hl_fini); 100module_exit(hl_tg6_exit);
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 7a48c342df46..86a613810b69 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -23,9 +23,10 @@
23#include <linux/netfilter.h> 23#include <linux/netfilter.h>
24#include <linux/netfilter/x_tables.h> 24#include <linux/netfilter/x_tables.h>
25#include <linux/netfilter_ipv6/ip6_tables.h> 25#include <linux/netfilter_ipv6/ip6_tables.h>
26#include <net/netfilter/nf_log.h>
26 27
27MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>"); 28MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>");
28MODULE_DESCRIPTION("IP6 tables LOG target module"); 29MODULE_DESCRIPTION("Xtables: IPv6 packet logging to syslog");
29MODULE_LICENSE("GPL"); 30MODULE_LICENSE("GPL");
30 31
31struct in_device; 32struct in_device;
@@ -362,7 +363,9 @@ static void dump_packet(const struct nf_loginfo *info,
362 if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) { 363 if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) {
363 read_lock_bh(&skb->sk->sk_callback_lock); 364 read_lock_bh(&skb->sk->sk_callback_lock);
364 if (skb->sk->sk_socket && skb->sk->sk_socket->file) 365 if (skb->sk->sk_socket && skb->sk->sk_socket->file)
365 printk("UID=%u ", skb->sk->sk_socket->file->f_uid); 366 printk("UID=%u GID=%u",
367 skb->sk->sk_socket->file->f_uid,
368 skb->sk->sk_socket->file->f_gid);
366 read_unlock_bh(&skb->sk->sk_callback_lock); 369 read_unlock_bh(&skb->sk->sk_callback_lock);
367 } 370 }
368} 371}
@@ -431,12 +434,9 @@ ip6t_log_packet(unsigned int pf,
431} 434}
432 435
433static unsigned int 436static unsigned int
434ip6t_log_target(struct sk_buff *skb, 437log_tg6(struct sk_buff *skb, const struct net_device *in,
435 const struct net_device *in, 438 const struct net_device *out, unsigned int hooknum,
436 const struct net_device *out, 439 const struct xt_target *target, const void *targinfo)
437 unsigned int hooknum,
438 const struct xt_target *target,
439 const void *targinfo)
440{ 440{
441 const struct ip6t_log_info *loginfo = targinfo; 441 const struct ip6t_log_info *loginfo = targinfo;
442 struct nf_loginfo li; 442 struct nf_loginfo li;
@@ -450,11 +450,10 @@ ip6t_log_target(struct sk_buff *skb,
450} 450}
451 451
452 452
453static bool ip6t_log_checkentry(const char *tablename, 453static bool
454 const void *entry, 454log_tg6_check(const char *tablename, const void *entry,
455 const struct xt_target *target, 455 const struct xt_target *target, void *targinfo,
456 void *targinfo, 456 unsigned int hook_mask)
457 unsigned int hook_mask)
458{ 457{
459 const struct ip6t_log_info *loginfo = targinfo; 458 const struct ip6t_log_info *loginfo = targinfo;
460 459
@@ -470,37 +469,37 @@ static bool ip6t_log_checkentry(const char *tablename,
470 return true; 469 return true;
471} 470}
472 471
473static struct xt_target ip6t_log_reg __read_mostly = { 472static struct xt_target log_tg6_reg __read_mostly = {
474 .name = "LOG", 473 .name = "LOG",
475 .family = AF_INET6, 474 .family = AF_INET6,
476 .target = ip6t_log_target, 475 .target = log_tg6,
477 .targetsize = sizeof(struct ip6t_log_info), 476 .targetsize = sizeof(struct ip6t_log_info),
478 .checkentry = ip6t_log_checkentry, 477 .checkentry = log_tg6_check,
479 .me = THIS_MODULE, 478 .me = THIS_MODULE,
480}; 479};
481 480
482static struct nf_logger ip6t_logger = { 481static const struct nf_logger ip6t_logger = {
483 .name = "ip6t_LOG", 482 .name = "ip6t_LOG",
484 .logfn = &ip6t_log_packet, 483 .logfn = &ip6t_log_packet,
485 .me = THIS_MODULE, 484 .me = THIS_MODULE,
486}; 485};
487 486
488static int __init ip6t_log_init(void) 487static int __init log_tg6_init(void)
489{ 488{
490 int ret; 489 int ret;
491 490
492 ret = xt_register_target(&ip6t_log_reg); 491 ret = xt_register_target(&log_tg6_reg);
493 if (ret < 0) 492 if (ret < 0)
494 return ret; 493 return ret;
495 nf_log_register(PF_INET6, &ip6t_logger); 494 nf_log_register(PF_INET6, &ip6t_logger);
496 return 0; 495 return 0;
497} 496}
498 497
499static void __exit ip6t_log_fini(void) 498static void __exit log_tg6_exit(void)
500{ 499{
501 nf_log_unregister(&ip6t_logger); 500 nf_log_unregister(&ip6t_logger);
502 xt_unregister_target(&ip6t_log_reg); 501 xt_unregister_target(&log_tg6_reg);
503} 502}
504 503
505module_init(ip6t_log_init); 504module_init(log_tg6_init);
506module_exit(ip6t_log_fini); 505module_exit(log_tg6_exit);
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 1a7d2917545d..b23baa635fe0 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -31,7 +31,7 @@
31#include <linux/netfilter_ipv6/ip6t_REJECT.h> 31#include <linux/netfilter_ipv6/ip6t_REJECT.h>
32 32
33MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>"); 33MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>");
34MODULE_DESCRIPTION("IP6 tables REJECT target module"); 34MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6");
35MODULE_LICENSE("GPL"); 35MODULE_LICENSE("GPL");
36 36
37/* Send RST reply */ 37/* Send RST reply */
@@ -121,7 +121,6 @@ static void send_reset(struct sk_buff *oldskb)
121 ip6h->version = 6; 121 ip6h->version = 6;
122 ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT); 122 ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT);
123 ip6h->nexthdr = IPPROTO_TCP; 123 ip6h->nexthdr = IPPROTO_TCP;
124 ip6h->payload_len = htons(sizeof(struct tcphdr));
125 ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr); 124 ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr);
126 ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr); 125 ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr);
127 126
@@ -159,25 +158,22 @@ static void send_reset(struct sk_buff *oldskb)
159 158
160 nf_ct_attach(nskb, oldskb); 159 nf_ct_attach(nskb, oldskb);
161 160
162 NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, nskb, NULL, nskb->dst->dev, 161 ip6_local_out(nskb);
163 dst_output);
164} 162}
165 163
166static inline void 164static inline void
167send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum) 165send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum)
168{ 166{
169 if (hooknum == NF_IP6_LOCAL_OUT && skb_in->dev == NULL) 167 if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
170 skb_in->dev = init_net.loopback_dev; 168 skb_in->dev = init_net.loopback_dev;
171 169
172 icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL); 170 icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL);
173} 171}
174 172
175static unsigned int reject6_target(struct sk_buff *skb, 173static unsigned int
176 const struct net_device *in, 174reject_tg6(struct sk_buff *skb, const struct net_device *in,
177 const struct net_device *out, 175 const struct net_device *out, unsigned int hooknum,
178 unsigned int hooknum, 176 const struct xt_target *target, const void *targinfo)
179 const struct xt_target *target,
180 const void *targinfo)
181{ 177{
182 const struct ip6t_reject_info *reject = targinfo; 178 const struct ip6t_reject_info *reject = targinfo;
183 179
@@ -216,11 +212,10 @@ static unsigned int reject6_target(struct sk_buff *skb,
216 return NF_DROP; 212 return NF_DROP;
217} 213}
218 214
219static bool check(const char *tablename, 215static bool
220 const void *entry, 216reject_tg6_check(const char *tablename, const void *entry,
221 const struct xt_target *target, 217 const struct xt_target *target, void *targinfo,
222 void *targinfo, 218 unsigned int hook_mask)
223 unsigned int hook_mask)
224{ 219{
225 const struct ip6t_reject_info *rejinfo = targinfo; 220 const struct ip6t_reject_info *rejinfo = targinfo;
226 const struct ip6t_entry *e = entry; 221 const struct ip6t_entry *e = entry;
@@ -239,27 +234,27 @@ static bool check(const char *tablename,
239 return true; 234 return true;
240} 235}
241 236
242static struct xt_target ip6t_reject_reg __read_mostly = { 237static struct xt_target reject_tg6_reg __read_mostly = {
243 .name = "REJECT", 238 .name = "REJECT",
244 .family = AF_INET6, 239 .family = AF_INET6,
245 .target = reject6_target, 240 .target = reject_tg6,
246 .targetsize = sizeof(struct ip6t_reject_info), 241 .targetsize = sizeof(struct ip6t_reject_info),
247 .table = "filter", 242 .table = "filter",
248 .hooks = (1 << NF_IP6_LOCAL_IN) | (1 << NF_IP6_FORWARD) | 243 .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) |
249 (1 << NF_IP6_LOCAL_OUT), 244 (1 << NF_INET_LOCAL_OUT),
250 .checkentry = check, 245 .checkentry = reject_tg6_check,
251 .me = THIS_MODULE 246 .me = THIS_MODULE
252}; 247};
253 248
254static int __init ip6t_reject_init(void) 249static int __init reject_tg6_init(void)
255{ 250{
256 return xt_register_target(&ip6t_reject_reg); 251 return xt_register_target(&reject_tg6_reg);
257} 252}
258 253
259static void __exit ip6t_reject_fini(void) 254static void __exit reject_tg6_exit(void)
260{ 255{
261 xt_unregister_target(&ip6t_reject_reg); 256 xt_unregister_target(&reject_tg6_reg);
262} 257}
263 258
264module_init(ip6t_reject_init); 259module_init(reject_tg6_init);
265module_exit(ip6t_reject_fini); 260module_exit(reject_tg6_exit);
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 2a25fe25e0e0..429629fd63b6 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -20,7 +20,7 @@
20#include <linux/netfilter_ipv6/ip6t_ah.h> 20#include <linux/netfilter_ipv6/ip6t_ah.h>
21 21
22MODULE_LICENSE("GPL"); 22MODULE_LICENSE("GPL");
23MODULE_DESCRIPTION("IPv6 AH match"); 23MODULE_DESCRIPTION("Xtables: IPv6 IPsec-AH match");
24MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); 24MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
25 25
26/* Returns 1 if the spi is matched by the range, 0 otherwise */ 26/* Returns 1 if the spi is matched by the range, 0 otherwise */
@@ -37,14 +37,9 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
37} 37}
38 38
39static bool 39static bool
40match(const struct sk_buff *skb, 40ah_mt6(const struct sk_buff *skb, const struct net_device *in,
41 const struct net_device *in, 41 const struct net_device *out, const struct xt_match *match,
42 const struct net_device *out, 42 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
43 const struct xt_match *match,
44 const void *matchinfo,
45 int offset,
46 unsigned int protoff,
47 bool *hotdrop)
48{ 43{
49 struct ip_auth_hdr _ah; 44 struct ip_auth_hdr _ah;
50 const struct ip_auth_hdr *ah; 45 const struct ip_auth_hdr *ah;
@@ -100,11 +95,9 @@ match(const struct sk_buff *skb,
100 95
101/* Called when user tries to insert an entry of this type. */ 96/* Called when user tries to insert an entry of this type. */
102static bool 97static bool
103checkentry(const char *tablename, 98ah_mt6_check(const char *tablename, const void *entry,
104 const void *entry, 99 const struct xt_match *match, void *matchinfo,
105 const struct xt_match *match, 100 unsigned int hook_mask)
106 void *matchinfo,
107 unsigned int hook_mask)
108{ 101{
109 const struct ip6t_ah *ahinfo = matchinfo; 102 const struct ip6t_ah *ahinfo = matchinfo;
110 103
@@ -115,24 +108,24 @@ checkentry(const char *tablename,
115 return true; 108 return true;
116} 109}
117 110
118static struct xt_match ah_match __read_mostly = { 111static struct xt_match ah_mt6_reg __read_mostly = {
119 .name = "ah", 112 .name = "ah",
120 .family = AF_INET6, 113 .family = AF_INET6,
121 .match = match, 114 .match = ah_mt6,
122 .matchsize = sizeof(struct ip6t_ah), 115 .matchsize = sizeof(struct ip6t_ah),
123 .checkentry = checkentry, 116 .checkentry = ah_mt6_check,
124 .me = THIS_MODULE, 117 .me = THIS_MODULE,
125}; 118};
126 119
127static int __init ip6t_ah_init(void) 120static int __init ah_mt6_init(void)
128{ 121{
129 return xt_register_match(&ah_match); 122 return xt_register_match(&ah_mt6_reg);
130} 123}
131 124
132static void __exit ip6t_ah_fini(void) 125static void __exit ah_mt6_exit(void)
133{ 126{
134 xt_unregister_match(&ah_match); 127 xt_unregister_match(&ah_mt6_reg);
135} 128}
136 129
137module_init(ip6t_ah_init); 130module_init(ah_mt6_init);
138module_exit(ip6t_ah_fini); 131module_exit(ah_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index 41df9a578c7a..8f331f12b2ec 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -15,19 +15,15 @@
15#include <linux/netfilter/x_tables.h> 15#include <linux/netfilter/x_tables.h>
16#include <linux/netfilter_ipv6/ip6_tables.h> 16#include <linux/netfilter_ipv6/ip6_tables.h>
17 17
18MODULE_DESCRIPTION("IPv6 EUI64 address checking match"); 18MODULE_DESCRIPTION("Xtables: IPv6 EUI64 address match");
19MODULE_LICENSE("GPL"); 19MODULE_LICENSE("GPL");
20MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); 20MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
21 21
22static bool 22static bool
23match(const struct sk_buff *skb, 23eui64_mt6(const struct sk_buff *skb, const struct net_device *in,
24 const struct net_device *in, 24 const struct net_device *out, const struct xt_match *match,
25 const struct net_device *out, 25 const void *matchinfo, int offset, unsigned int protoff,
26 const struct xt_match *match, 26 bool *hotdrop)
27 const void *matchinfo,
28 int offset,
29 unsigned int protoff,
30 bool *hotdrop)
31{ 27{
32 unsigned char eui64[8]; 28 unsigned char eui64[8];
33 int i = 0; 29 int i = 0;
@@ -62,25 +58,25 @@ match(const struct sk_buff *skb,
62 return false; 58 return false;
63} 59}
64 60
65static struct xt_match eui64_match __read_mostly = { 61static struct xt_match eui64_mt6_reg __read_mostly = {
66 .name = "eui64", 62 .name = "eui64",
67 .family = AF_INET6, 63 .family = AF_INET6,
68 .match = match, 64 .match = eui64_mt6,
69 .matchsize = sizeof(int), 65 .matchsize = sizeof(int),
70 .hooks = (1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_IN) | 66 .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) |
71 (1 << NF_IP6_FORWARD), 67 (1 << NF_INET_FORWARD),
72 .me = THIS_MODULE, 68 .me = THIS_MODULE,
73}; 69};
74 70
75static int __init ip6t_eui64_init(void) 71static int __init eui64_mt6_init(void)
76{ 72{
77 return xt_register_match(&eui64_match); 73 return xt_register_match(&eui64_mt6_reg);
78} 74}
79 75
80static void __exit ip6t_eui64_fini(void) 76static void __exit eui64_mt6_exit(void)
81{ 77{
82 xt_unregister_match(&eui64_match); 78 xt_unregister_match(&eui64_mt6_reg);
83} 79}
84 80
85module_init(ip6t_eui64_init); 81module_init(eui64_mt6_init);
86module_exit(ip6t_eui64_fini); 82module_exit(eui64_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 968aeba02073..e2bbc63dba5b 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -19,7 +19,7 @@
19#include <linux/netfilter_ipv6/ip6t_frag.h> 19#include <linux/netfilter_ipv6/ip6t_frag.h>
20 20
21MODULE_LICENSE("GPL"); 21MODULE_LICENSE("GPL");
22MODULE_DESCRIPTION("IPv6 FRAG match"); 22MODULE_DESCRIPTION("Xtables: IPv6 fragment match");
23MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); 23MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
24 24
25/* Returns 1 if the id is matched by the range, 0 otherwise */ 25/* Returns 1 if the id is matched by the range, 0 otherwise */
@@ -35,14 +35,10 @@ id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
35} 35}
36 36
37static bool 37static bool
38match(const struct sk_buff *skb, 38frag_mt6(const struct sk_buff *skb, const struct net_device *in,
39 const struct net_device *in, 39 const struct net_device *out, const struct xt_match *match,
40 const struct net_device *out, 40 const void *matchinfo, int offset, unsigned int protoff,
41 const struct xt_match *match, 41 bool *hotdrop)
42 const void *matchinfo,
43 int offset,
44 unsigned int protoff,
45 bool *hotdrop)
46{ 42{
47 struct frag_hdr _frag; 43 struct frag_hdr _frag;
48 const struct frag_hdr *fh; 44 const struct frag_hdr *fh;
@@ -116,11 +112,9 @@ match(const struct sk_buff *skb,
116 112
117/* Called when user tries to insert an entry of this type. */ 113/* Called when user tries to insert an entry of this type. */
118static bool 114static bool
119checkentry(const char *tablename, 115frag_mt6_check(const char *tablename, const void *ip,
120 const void *ip, 116 const struct xt_match *match, void *matchinfo,
121 const struct xt_match *match, 117 unsigned int hook_mask)
122 void *matchinfo,
123 unsigned int hook_mask)
124{ 118{
125 const struct ip6t_frag *fraginfo = matchinfo; 119 const struct ip6t_frag *fraginfo = matchinfo;
126 120
@@ -131,24 +125,24 @@ checkentry(const char *tablename,
131 return true; 125 return true;
132} 126}
133 127
134static struct xt_match frag_match __read_mostly = { 128static struct xt_match frag_mt6_reg __read_mostly = {
135 .name = "frag", 129 .name = "frag",
136 .family = AF_INET6, 130 .family = AF_INET6,
137 .match = match, 131 .match = frag_mt6,
138 .matchsize = sizeof(struct ip6t_frag), 132 .matchsize = sizeof(struct ip6t_frag),
139 .checkentry = checkentry, 133 .checkentry = frag_mt6_check,
140 .me = THIS_MODULE, 134 .me = THIS_MODULE,
141}; 135};
142 136
143static int __init ip6t_frag_init(void) 137static int __init frag_mt6_init(void)
144{ 138{
145 return xt_register_match(&frag_match); 139 return xt_register_match(&frag_mt6_reg);
146} 140}
147 141
148static void __exit ip6t_frag_fini(void) 142static void __exit frag_mt6_exit(void)
149{ 143{
150 xt_unregister_match(&frag_match); 144 xt_unregister_match(&frag_mt6_reg);
151} 145}
152 146
153module_init(ip6t_frag_init); 147module_init(frag_mt6_init);
154module_exit(ip6t_frag_fini); 148module_exit(frag_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index e6ca6018b1ea..62e39ace0588 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -21,7 +21,7 @@
21#include <linux/netfilter_ipv6/ip6t_opts.h> 21#include <linux/netfilter_ipv6/ip6t_opts.h>
22 22
23MODULE_LICENSE("GPL"); 23MODULE_LICENSE("GPL");
24MODULE_DESCRIPTION("IPv6 opts match"); 24MODULE_DESCRIPTION("Xtables: IPv6 Hop-By-Hop and Destination Header match");
25MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); 25MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
26MODULE_ALIAS("ip6t_dst"); 26MODULE_ALIAS("ip6t_dst");
27 27
@@ -42,14 +42,10 @@ MODULE_ALIAS("ip6t_dst");
42 */ 42 */
43 43
44static bool 44static bool
45match(const struct sk_buff *skb, 45hbh_mt6(const struct sk_buff *skb, const struct net_device *in,
46 const struct net_device *in, 46 const struct net_device *out, const struct xt_match *match,
47 const struct net_device *out, 47 const void *matchinfo, int offset, unsigned int protoff,
48 const struct xt_match *match, 48 bool *hotdrop)
49 const void *matchinfo,
50 int offset,
51 unsigned int protoff,
52 bool *hotdrop)
53{ 49{
54 struct ipv6_opt_hdr _optsh; 50 struct ipv6_opt_hdr _optsh;
55 const struct ipv6_opt_hdr *oh; 51 const struct ipv6_opt_hdr *oh;
@@ -171,11 +167,9 @@ match(const struct sk_buff *skb,
171 167
172/* Called when user tries to insert an entry of this type. */ 168/* Called when user tries to insert an entry of this type. */
173static bool 169static bool
174checkentry(const char *tablename, 170hbh_mt6_check(const char *tablename, const void *entry,
175 const void *entry, 171 const struct xt_match *match, void *matchinfo,
176 const struct xt_match *match, 172 unsigned int hook_mask)
177 void *matchinfo,
178 unsigned int hook_mask)
179{ 173{
180 const struct ip6t_opts *optsinfo = matchinfo; 174 const struct ip6t_opts *optsinfo = matchinfo;
181 175
@@ -186,36 +180,36 @@ checkentry(const char *tablename,
186 return true; 180 return true;
187} 181}
188 182
189static struct xt_match opts_match[] __read_mostly = { 183static struct xt_match hbh_mt6_reg[] __read_mostly = {
190 { 184 {
191 .name = "hbh", 185 .name = "hbh",
192 .family = AF_INET6, 186 .family = AF_INET6,
193 .match = match, 187 .match = hbh_mt6,
194 .matchsize = sizeof(struct ip6t_opts), 188 .matchsize = sizeof(struct ip6t_opts),
195 .checkentry = checkentry, 189 .checkentry = hbh_mt6_check,
196 .me = THIS_MODULE, 190 .me = THIS_MODULE,
197 .data = NEXTHDR_HOP, 191 .data = NEXTHDR_HOP,
198 }, 192 },
199 { 193 {
200 .name = "dst", 194 .name = "dst",
201 .family = AF_INET6, 195 .family = AF_INET6,
202 .match = match, 196 .match = hbh_mt6,
203 .matchsize = sizeof(struct ip6t_opts), 197 .matchsize = sizeof(struct ip6t_opts),
204 .checkentry = checkentry, 198 .checkentry = hbh_mt6_check,
205 .me = THIS_MODULE, 199 .me = THIS_MODULE,
206 .data = NEXTHDR_DEST, 200 .data = NEXTHDR_DEST,
207 }, 201 },
208}; 202};
209 203
210static int __init ip6t_hbh_init(void) 204static int __init hbh_mt6_init(void)
211{ 205{
212 return xt_register_matches(opts_match, ARRAY_SIZE(opts_match)); 206 return xt_register_matches(hbh_mt6_reg, ARRAY_SIZE(hbh_mt6_reg));
213} 207}
214 208
215static void __exit ip6t_hbh_fini(void) 209static void __exit hbh_mt6_exit(void)
216{ 210{
217 xt_unregister_matches(opts_match, ARRAY_SIZE(opts_match)); 211 xt_unregister_matches(hbh_mt6_reg, ARRAY_SIZE(hbh_mt6_reg));
218} 212}
219 213
220module_init(ip6t_hbh_init); 214module_init(hbh_mt6_init);
221module_exit(ip6t_hbh_fini); 215module_exit(hbh_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c
index ca29ec00dc18..345671673845 100644
--- a/net/ipv6/netfilter/ip6t_hl.c
+++ b/net/ipv6/netfilter/ip6t_hl.c
@@ -16,13 +16,13 @@
16#include <linux/netfilter/x_tables.h> 16#include <linux/netfilter/x_tables.h>
17 17
18MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>"); 18MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
19MODULE_DESCRIPTION("IP tables Hop Limit matching module"); 19MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field match");
20MODULE_LICENSE("GPL"); 20MODULE_LICENSE("GPL");
21 21
22static bool match(const struct sk_buff *skb, 22static bool
23 const struct net_device *in, const struct net_device *out, 23hl_mt6(const struct sk_buff *skb, const struct net_device *in,
24 const struct xt_match *match, const void *matchinfo, 24 const struct net_device *out, const struct xt_match *match,
25 int offset, unsigned int protoff, bool *hotdrop) 25 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
26{ 26{
27 const struct ip6t_hl_info *info = matchinfo; 27 const struct ip6t_hl_info *info = matchinfo;
28 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 28 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -49,23 +49,23 @@ static bool match(const struct sk_buff *skb,
49 return false; 49 return false;
50} 50}
51 51
52static struct xt_match hl_match __read_mostly = { 52static struct xt_match hl_mt6_reg __read_mostly = {
53 .name = "hl", 53 .name = "hl",
54 .family = AF_INET6, 54 .family = AF_INET6,
55 .match = match, 55 .match = hl_mt6,
56 .matchsize = sizeof(struct ip6t_hl_info), 56 .matchsize = sizeof(struct ip6t_hl_info),
57 .me = THIS_MODULE, 57 .me = THIS_MODULE,
58}; 58};
59 59
60static int __init ip6t_hl_init(void) 60static int __init hl_mt6_init(void)
61{ 61{
62 return xt_register_match(&hl_match); 62 return xt_register_match(&hl_mt6_reg);
63} 63}
64 64
65static void __exit ip6t_hl_fini(void) 65static void __exit hl_mt6_exit(void)
66{ 66{
67 xt_unregister_match(&hl_match); 67 xt_unregister_match(&hl_mt6_reg);
68} 68}
69 69
70module_init(ip6t_hl_init); 70module_init(hl_mt6_init);
71module_exit(ip6t_hl_fini); 71module_exit(hl_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 2c65c2f9a4ab..3a940171f829 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -23,18 +23,14 @@
23#include <linux/netfilter_ipv6/ip6t_ipv6header.h> 23#include <linux/netfilter_ipv6/ip6t_ipv6header.h>
24 24
25MODULE_LICENSE("GPL"); 25MODULE_LICENSE("GPL");
26MODULE_DESCRIPTION("IPv6 headers match"); 26MODULE_DESCRIPTION("Xtables: IPv6 header types match");
27MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); 27MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
28 28
29static bool 29static bool
30ipv6header_match(const struct sk_buff *skb, 30ipv6header_mt6(const struct sk_buff *skb, const struct net_device *in,
31 const struct net_device *in, 31 const struct net_device *out, const struct xt_match *match,
32 const struct net_device *out, 32 const void *matchinfo, int offset, unsigned int protoff,
33 const struct xt_match *match, 33 bool *hotdrop)
34 const void *matchinfo,
35 int offset,
36 unsigned int protoff,
37 bool *hotdrop)
38{ 34{
39 const struct ip6t_ipv6header_info *info = matchinfo; 35 const struct ip6t_ipv6header_info *info = matchinfo;
40 unsigned int temp; 36 unsigned int temp;
@@ -125,11 +121,9 @@ ipv6header_match(const struct sk_buff *skb,
125} 121}
126 122
127static bool 123static bool
128ipv6header_checkentry(const char *tablename, 124ipv6header_mt6_check(const char *tablename, const void *ip,
129 const void *ip, 125 const struct xt_match *match, void *matchinfo,
130 const struct xt_match *match, 126 unsigned int hook_mask)
131 void *matchinfo,
132 unsigned int hook_mask)
133{ 127{
134 const struct ip6t_ipv6header_info *info = matchinfo; 128 const struct ip6t_ipv6header_info *info = matchinfo;
135 129
@@ -141,25 +135,25 @@ ipv6header_checkentry(const char *tablename,
141 return true; 135 return true;
142} 136}
143 137
144static struct xt_match ip6t_ipv6header_match __read_mostly = { 138static struct xt_match ipv6header_mt6_reg __read_mostly = {
145 .name = "ipv6header", 139 .name = "ipv6header",
146 .family = AF_INET6, 140 .family = AF_INET6,
147 .match = &ipv6header_match, 141 .match = ipv6header_mt6,
148 .matchsize = sizeof(struct ip6t_ipv6header_info), 142 .matchsize = sizeof(struct ip6t_ipv6header_info),
149 .checkentry = &ipv6header_checkentry, 143 .checkentry = ipv6header_mt6_check,
150 .destroy = NULL, 144 .destroy = NULL,
151 .me = THIS_MODULE, 145 .me = THIS_MODULE,
152}; 146};
153 147
154static int __init ipv6header_init(void) 148static int __init ipv6header_mt6_init(void)
155{ 149{
156 return xt_register_match(&ip6t_ipv6header_match); 150 return xt_register_match(&ipv6header_mt6_reg);
157} 151}
158 152
159static void __exit ipv6header_exit(void) 153static void __exit ipv6header_mt6_exit(void)
160{ 154{
161 xt_unregister_match(&ip6t_ipv6header_match); 155 xt_unregister_match(&ipv6header_mt6_reg);
162} 156}
163 157
164module_init(ipv6header_init); 158module_init(ipv6header_mt6_init);
165module_exit(ipv6header_exit); 159module_exit(ipv6header_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index 0fa714092dc9..e06678d07ec8 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -21,7 +21,7 @@
21#include <linux/netfilter/x_tables.h> 21#include <linux/netfilter/x_tables.h>
22#include <linux/netfilter_ipv6/ip6t_mh.h> 22#include <linux/netfilter_ipv6/ip6t_mh.h>
23 23
24MODULE_DESCRIPTION("ip6t_tables match for MH"); 24MODULE_DESCRIPTION("Xtables: IPv6 Mobility Header match");
25MODULE_LICENSE("GPL"); 25MODULE_LICENSE("GPL");
26 26
27#ifdef DEBUG_IP_FIREWALL_USER 27#ifdef DEBUG_IP_FIREWALL_USER
@@ -38,14 +38,9 @@ type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert)
38} 38}
39 39
40static bool 40static bool
41match(const struct sk_buff *skb, 41mh_mt6(const struct sk_buff *skb, const struct net_device *in,
42 const struct net_device *in, 42 const struct net_device *out, const struct xt_match *match,
43 const struct net_device *out, 43 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
44 const struct xt_match *match,
45 const void *matchinfo,
46 int offset,
47 unsigned int protoff,
48 bool *hotdrop)
49{ 44{
50 struct ip6_mh _mh; 45 struct ip6_mh _mh;
51 const struct ip6_mh *mh; 46 const struct ip6_mh *mh;
@@ -77,11 +72,9 @@ match(const struct sk_buff *skb,
77 72
78/* Called when user tries to insert an entry of this type. */ 73/* Called when user tries to insert an entry of this type. */
79static bool 74static bool
80mh_checkentry(const char *tablename, 75mh_mt6_check(const char *tablename, const void *entry,
81 const void *entry, 76 const struct xt_match *match, void *matchinfo,
82 const struct xt_match *match, 77 unsigned int hook_mask)
83 void *matchinfo,
84 unsigned int hook_mask)
85{ 78{
86 const struct ip6t_mh *mhinfo = matchinfo; 79 const struct ip6t_mh *mhinfo = matchinfo;
87 80
@@ -89,25 +82,25 @@ mh_checkentry(const char *tablename,
89 return !(mhinfo->invflags & ~IP6T_MH_INV_MASK); 82 return !(mhinfo->invflags & ~IP6T_MH_INV_MASK);
90} 83}
91 84
92static struct xt_match mh_match __read_mostly = { 85static struct xt_match mh_mt6_reg __read_mostly = {
93 .name = "mh", 86 .name = "mh",
94 .family = AF_INET6, 87 .family = AF_INET6,
95 .checkentry = mh_checkentry, 88 .checkentry = mh_mt6_check,
96 .match = match, 89 .match = mh_mt6,
97 .matchsize = sizeof(struct ip6t_mh), 90 .matchsize = sizeof(struct ip6t_mh),
98 .proto = IPPROTO_MH, 91 .proto = IPPROTO_MH,
99 .me = THIS_MODULE, 92 .me = THIS_MODULE,
100}; 93};
101 94
102static int __init ip6t_mh_init(void) 95static int __init mh_mt6_init(void)
103{ 96{
104 return xt_register_match(&mh_match); 97 return xt_register_match(&mh_mt6_reg);
105} 98}
106 99
107static void __exit ip6t_mh_fini(void) 100static void __exit mh_mt6_exit(void)
108{ 101{
109 xt_unregister_match(&mh_match); 102 xt_unregister_match(&mh_mt6_reg);
110} 103}
111 104
112module_init(ip6t_mh_init); 105module_init(mh_mt6_init);
113module_exit(ip6t_mh_fini); 106module_exit(mh_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c
deleted file mode 100644
index 6036613aef36..000000000000
--- a/net/ipv6/netfilter/ip6t_owner.c
+++ /dev/null
@@ -1,92 +0,0 @@
1/* Kernel module to match various things tied to sockets associated with
2 locally generated outgoing packets. */
3
4/* (C) 2000-2001 Marc Boucher <marc@mbsi.ca>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <linux/file.h>
14#include <linux/rcupdate.h>
15#include <net/sock.h>
16
17#include <linux/netfilter_ipv6/ip6t_owner.h>
18#include <linux/netfilter_ipv6/ip6_tables.h>
19#include <linux/netfilter/x_tables.h>
20
21MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
22MODULE_DESCRIPTION("IP6 tables owner matching module");
23MODULE_LICENSE("GPL");
24
25
26static bool
27match(const struct sk_buff *skb,
28 const struct net_device *in,
29 const struct net_device *out,
30 const struct xt_match *match,
31 const void *matchinfo,
32 int offset,
33 unsigned int protoff,
34 bool *hotdrop)
35{
36 const struct ip6t_owner_info *info = matchinfo;
37
38 if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file)
39 return false;
40
41 if (info->match & IP6T_OWNER_UID)
42 if ((skb->sk->sk_socket->file->f_uid != info->uid) ^
43 !!(info->invert & IP6T_OWNER_UID))
44 return false;
45
46 if (info->match & IP6T_OWNER_GID)
47 if ((skb->sk->sk_socket->file->f_gid != info->gid) ^
48 !!(info->invert & IP6T_OWNER_GID))
49 return false;
50
51 return true;
52}
53
54static bool
55checkentry(const char *tablename,
56 const void *ip,
57 const struct xt_match *match,
58 void *matchinfo,
59 unsigned int hook_mask)
60{
61 const struct ip6t_owner_info *info = matchinfo;
62
63 if (info->match & (IP6T_OWNER_PID | IP6T_OWNER_SID)) {
64 printk("ipt_owner: pid and sid matching "
65 "not supported anymore\n");
66 return false;
67 }
68 return true;
69}
70
71static struct xt_match owner_match __read_mostly = {
72 .name = "owner",
73 .family = AF_INET6,
74 .match = match,
75 .matchsize = sizeof(struct ip6t_owner_info),
76 .hooks = (1 << NF_IP6_LOCAL_OUT) | (1 << NF_IP6_POST_ROUTING),
77 .checkentry = checkentry,
78 .me = THIS_MODULE,
79};
80
81static int __init ip6t_owner_init(void)
82{
83 return xt_register_match(&owner_match);
84}
85
86static void __exit ip6t_owner_fini(void)
87{
88 xt_unregister_match(&owner_match);
89}
90
91module_init(ip6t_owner_init);
92module_exit(ip6t_owner_fini);
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 357cea703bd9..12a9efe9886e 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -21,7 +21,7 @@
21#include <linux/netfilter_ipv6/ip6t_rt.h> 21#include <linux/netfilter_ipv6/ip6t_rt.h>
22 22
23MODULE_LICENSE("GPL"); 23MODULE_LICENSE("GPL");
24MODULE_DESCRIPTION("IPv6 RT match"); 24MODULE_DESCRIPTION("Xtables: IPv6 Routing Header match");
25MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); 25MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
26 26
27/* Returns 1 if the id is matched by the range, 0 otherwise */ 27/* Returns 1 if the id is matched by the range, 0 otherwise */
@@ -37,14 +37,9 @@ segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
37} 37}
38 38
39static bool 39static bool
40match(const struct sk_buff *skb, 40rt_mt6(const struct sk_buff *skb, const struct net_device *in,
41 const struct net_device *in, 41 const struct net_device *out, const struct xt_match *match,
42 const struct net_device *out, 42 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
43 const struct xt_match *match,
44 const void *matchinfo,
45 int offset,
46 unsigned int protoff,
47 bool *hotdrop)
48{ 43{
49 struct ipv6_rt_hdr _route; 44 struct ipv6_rt_hdr _route;
50 const struct ipv6_rt_hdr *rh; 45 const struct ipv6_rt_hdr *rh;
@@ -195,11 +190,9 @@ match(const struct sk_buff *skb,
195 190
196/* Called when user tries to insert an entry of this type. */ 191/* Called when user tries to insert an entry of this type. */
197static bool 192static bool
198checkentry(const char *tablename, 193rt_mt6_check(const char *tablename, const void *entry,
199 const void *entry, 194 const struct xt_match *match, void *matchinfo,
200 const struct xt_match *match, 195 unsigned int hook_mask)
201 void *matchinfo,
202 unsigned int hook_mask)
203{ 196{
204 const struct ip6t_rt *rtinfo = matchinfo; 197 const struct ip6t_rt *rtinfo = matchinfo;
205 198
@@ -218,24 +211,24 @@ checkentry(const char *tablename,
218 return true; 211 return true;
219} 212}
220 213
221static struct xt_match rt_match __read_mostly = { 214static struct xt_match rt_mt6_reg __read_mostly = {
222 .name = "rt", 215 .name = "rt",
223 .family = AF_INET6, 216 .family = AF_INET6,
224 .match = match, 217 .match = rt_mt6,
225 .matchsize = sizeof(struct ip6t_rt), 218 .matchsize = sizeof(struct ip6t_rt),
226 .checkentry = checkentry, 219 .checkentry = rt_mt6_check,
227 .me = THIS_MODULE, 220 .me = THIS_MODULE,
228}; 221};
229 222
230static int __init ip6t_rt_init(void) 223static int __init rt_mt6_init(void)
231{ 224{
232 return xt_register_match(&rt_match); 225 return xt_register_match(&rt_mt6_reg);
233} 226}
234 227
235static void __exit ip6t_rt_fini(void) 228static void __exit rt_mt6_exit(void)
236{ 229{
237 xt_unregister_match(&rt_match); 230 xt_unregister_match(&rt_mt6_reg);
238} 231}
239 232
240module_init(ip6t_rt_init); 233module_init(rt_mt6_init);
241module_exit(ip6t_rt_fini); 234module_exit(rt_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 1d26b202bf30..87d38d08aad0 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -17,7 +17,9 @@ MODULE_LICENSE("GPL");
17MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 17MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
18MODULE_DESCRIPTION("ip6tables filter table"); 18MODULE_DESCRIPTION("ip6tables filter table");
19 19
20#define FILTER_VALID_HOOKS ((1 << NF_IP6_LOCAL_IN) | (1 << NF_IP6_FORWARD) | (1 << NF_IP6_LOCAL_OUT)) 20#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
21 (1 << NF_INET_FORWARD) | \
22 (1 << NF_INET_LOCAL_OUT))
21 23
22static struct 24static struct
23{ 25{
@@ -31,14 +33,14 @@ static struct
31 .num_entries = 4, 33 .num_entries = 4,
32 .size = sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error), 34 .size = sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error),
33 .hook_entry = { 35 .hook_entry = {
34 [NF_IP6_LOCAL_IN] = 0, 36 [NF_INET_LOCAL_IN] = 0,
35 [NF_IP6_FORWARD] = sizeof(struct ip6t_standard), 37 [NF_INET_FORWARD] = sizeof(struct ip6t_standard),
36 [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 38 [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2
37 }, 39 },
38 .underflow = { 40 .underflow = {
39 [NF_IP6_LOCAL_IN] = 0, 41 [NF_INET_LOCAL_IN] = 0,
40 [NF_IP6_FORWARD] = sizeof(struct ip6t_standard), 42 [NF_INET_FORWARD] = sizeof(struct ip6t_standard),
41 [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 43 [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2
42 }, 44 },
43 }, 45 },
44 .entries = { 46 .entries = {
@@ -88,26 +90,26 @@ ip6t_local_out_hook(unsigned int hook,
88 return ip6t_do_table(skb, hook, in, out, &packet_filter); 90 return ip6t_do_table(skb, hook, in, out, &packet_filter);
89} 91}
90 92
91static struct nf_hook_ops ip6t_ops[] = { 93static struct nf_hook_ops ip6t_ops[] __read_mostly = {
92 { 94 {
93 .hook = ip6t_hook, 95 .hook = ip6t_hook,
94 .owner = THIS_MODULE, 96 .owner = THIS_MODULE,
95 .pf = PF_INET6, 97 .pf = PF_INET6,
96 .hooknum = NF_IP6_LOCAL_IN, 98 .hooknum = NF_INET_LOCAL_IN,
97 .priority = NF_IP6_PRI_FILTER, 99 .priority = NF_IP6_PRI_FILTER,
98 }, 100 },
99 { 101 {
100 .hook = ip6t_hook, 102 .hook = ip6t_hook,
101 .owner = THIS_MODULE, 103 .owner = THIS_MODULE,
102 .pf = PF_INET6, 104 .pf = PF_INET6,
103 .hooknum = NF_IP6_FORWARD, 105 .hooknum = NF_INET_FORWARD,
104 .priority = NF_IP6_PRI_FILTER, 106 .priority = NF_IP6_PRI_FILTER,
105 }, 107 },
106 { 108 {
107 .hook = ip6t_local_out_hook, 109 .hook = ip6t_local_out_hook,
108 .owner = THIS_MODULE, 110 .owner = THIS_MODULE,
109 .pf = PF_INET6, 111 .pf = PF_INET6,
110 .hooknum = NF_IP6_LOCAL_OUT, 112 .hooknum = NF_INET_LOCAL_OUT,
111 .priority = NF_IP6_PRI_FILTER, 113 .priority = NF_IP6_PRI_FILTER,
112 }, 114 },
113}; 115};
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index a0b6381f1e8c..d6082600bc5d 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -15,11 +15,11 @@ MODULE_LICENSE("GPL");
15MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 15MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
16MODULE_DESCRIPTION("ip6tables mangle table"); 16MODULE_DESCRIPTION("ip6tables mangle table");
17 17
18#define MANGLE_VALID_HOOKS ((1 << NF_IP6_PRE_ROUTING) | \ 18#define MANGLE_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
19 (1 << NF_IP6_LOCAL_IN) | \ 19 (1 << NF_INET_LOCAL_IN) | \
20 (1 << NF_IP6_FORWARD) | \ 20 (1 << NF_INET_FORWARD) | \
21 (1 << NF_IP6_LOCAL_OUT) | \ 21 (1 << NF_INET_LOCAL_OUT) | \
22 (1 << NF_IP6_POST_ROUTING)) 22 (1 << NF_INET_POST_ROUTING))
23 23
24static struct 24static struct
25{ 25{
@@ -33,18 +33,18 @@ static struct
33 .num_entries = 6, 33 .num_entries = 6,
34 .size = sizeof(struct ip6t_standard) * 5 + sizeof(struct ip6t_error), 34 .size = sizeof(struct ip6t_standard) * 5 + sizeof(struct ip6t_error),
35 .hook_entry = { 35 .hook_entry = {
36 [NF_IP6_PRE_ROUTING] = 0, 36 [NF_INET_PRE_ROUTING] = 0,
37 [NF_IP6_LOCAL_IN] = sizeof(struct ip6t_standard), 37 [NF_INET_LOCAL_IN] = sizeof(struct ip6t_standard),
38 [NF_IP6_FORWARD] = sizeof(struct ip6t_standard) * 2, 38 [NF_INET_FORWARD] = sizeof(struct ip6t_standard) * 2,
39 [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3, 39 [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3,
40 [NF_IP6_POST_ROUTING] = sizeof(struct ip6t_standard) * 4, 40 [NF_INET_POST_ROUTING] = sizeof(struct ip6t_standard) * 4,
41 }, 41 },
42 .underflow = { 42 .underflow = {
43 [NF_IP6_PRE_ROUTING] = 0, 43 [NF_INET_PRE_ROUTING] = 0,
44 [NF_IP6_LOCAL_IN] = sizeof(struct ip6t_standard), 44 [NF_INET_LOCAL_IN] = sizeof(struct ip6t_standard),
45 [NF_IP6_FORWARD] = sizeof(struct ip6t_standard) * 2, 45 [NF_INET_FORWARD] = sizeof(struct ip6t_standard) * 2,
46 [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3, 46 [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3,
47 [NF_IP6_POST_ROUTING] = sizeof(struct ip6t_standard) * 4, 47 [NF_INET_POST_ROUTING] = sizeof(struct ip6t_standard) * 4,
48 }, 48 },
49 }, 49 },
50 .entries = { 50 .entries = {
@@ -120,40 +120,40 @@ ip6t_local_hook(unsigned int hook,
120 return ret; 120 return ret;
121} 121}
122 122
123static struct nf_hook_ops ip6t_ops[] = { 123static struct nf_hook_ops ip6t_ops[] __read_mostly = {
124 { 124 {
125 .hook = ip6t_route_hook, 125 .hook = ip6t_route_hook,
126 .owner = THIS_MODULE, 126 .owner = THIS_MODULE,
127 .pf = PF_INET6, 127 .pf = PF_INET6,
128 .hooknum = NF_IP6_PRE_ROUTING, 128 .hooknum = NF_INET_PRE_ROUTING,
129 .priority = NF_IP6_PRI_MANGLE, 129 .priority = NF_IP6_PRI_MANGLE,
130 }, 130 },
131 { 131 {
132 .hook = ip6t_local_hook, 132 .hook = ip6t_local_hook,
133 .owner = THIS_MODULE, 133 .owner = THIS_MODULE,
134 .pf = PF_INET6, 134 .pf = PF_INET6,
135 .hooknum = NF_IP6_LOCAL_IN, 135 .hooknum = NF_INET_LOCAL_IN,
136 .priority = NF_IP6_PRI_MANGLE, 136 .priority = NF_IP6_PRI_MANGLE,
137 }, 137 },
138 { 138 {
139 .hook = ip6t_route_hook, 139 .hook = ip6t_route_hook,
140 .owner = THIS_MODULE, 140 .owner = THIS_MODULE,
141 .pf = PF_INET6, 141 .pf = PF_INET6,
142 .hooknum = NF_IP6_FORWARD, 142 .hooknum = NF_INET_FORWARD,
143 .priority = NF_IP6_PRI_MANGLE, 143 .priority = NF_IP6_PRI_MANGLE,
144 }, 144 },
145 { 145 {
146 .hook = ip6t_local_hook, 146 .hook = ip6t_local_hook,
147 .owner = THIS_MODULE, 147 .owner = THIS_MODULE,
148 .pf = PF_INET6, 148 .pf = PF_INET6,
149 .hooknum = NF_IP6_LOCAL_OUT, 149 .hooknum = NF_INET_LOCAL_OUT,
150 .priority = NF_IP6_PRI_MANGLE, 150 .priority = NF_IP6_PRI_MANGLE,
151 }, 151 },
152 { 152 {
153 .hook = ip6t_route_hook, 153 .hook = ip6t_route_hook,
154 .owner = THIS_MODULE, 154 .owner = THIS_MODULE,
155 .pf = PF_INET6, 155 .pf = PF_INET6,
156 .hooknum = NF_IP6_POST_ROUTING, 156 .hooknum = NF_INET_POST_ROUTING,
157 .priority = NF_IP6_PRI_MANGLE, 157 .priority = NF_IP6_PRI_MANGLE,
158 }, 158 },
159}; 159};
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 8f7109f991e6..eccbaaa104af 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -6,7 +6,7 @@
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/netfilter_ipv6/ip6_tables.h> 7#include <linux/netfilter_ipv6/ip6_tables.h>
8 8
9#define RAW_VALID_HOOKS ((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_OUT)) 9#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
10 10
11static struct 11static struct
12{ 12{
@@ -20,12 +20,12 @@ static struct
20 .num_entries = 3, 20 .num_entries = 3,
21 .size = sizeof(struct ip6t_standard) * 2 + sizeof(struct ip6t_error), 21 .size = sizeof(struct ip6t_standard) * 2 + sizeof(struct ip6t_error),
22 .hook_entry = { 22 .hook_entry = {
23 [NF_IP6_PRE_ROUTING] = 0, 23 [NF_INET_PRE_ROUTING] = 0,
24 [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) 24 [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard)
25 }, 25 },
26 .underflow = { 26 .underflow = {
27 [NF_IP6_PRE_ROUTING] = 0, 27 [NF_INET_PRE_ROUTING] = 0,
28 [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) 28 [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard)
29 }, 29 },
30 }, 30 },
31 .entries = { 31 .entries = {
@@ -54,18 +54,18 @@ ip6t_hook(unsigned int hook,
54 return ip6t_do_table(skb, hook, in, out, &packet_raw); 54 return ip6t_do_table(skb, hook, in, out, &packet_raw);
55} 55}
56 56
57static struct nf_hook_ops ip6t_ops[] = { 57static struct nf_hook_ops ip6t_ops[] __read_mostly = {
58 { 58 {
59 .hook = ip6t_hook, 59 .hook = ip6t_hook,
60 .pf = PF_INET6, 60 .pf = PF_INET6,
61 .hooknum = NF_IP6_PRE_ROUTING, 61 .hooknum = NF_INET_PRE_ROUTING,
62 .priority = NF_IP6_PRI_FIRST, 62 .priority = NF_IP6_PRI_FIRST,
63 .owner = THIS_MODULE, 63 .owner = THIS_MODULE,
64 }, 64 },
65 { 65 {
66 .hook = ip6t_hook, 66 .hook = ip6t_hook,
67 .pf = PF_INET6, 67 .pf = PF_INET6,
68 .hooknum = NF_IP6_LOCAL_OUT, 68 .hooknum = NF_INET_LOCAL_OUT,
69 .priority = NF_IP6_PRI_FIRST, 69 .priority = NF_IP6_PRI_FIRST,
70 .owner = THIS_MODULE, 70 .owner = THIS_MODULE,
71 }, 71 },
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index ad74bab05047..2d7b0246475d 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -60,12 +60,6 @@ static int ipv6_print_tuple(struct seq_file *s,
60 NIP6(*((struct in6_addr *)tuple->dst.u3.ip6))); 60 NIP6(*((struct in6_addr *)tuple->dst.u3.ip6)));
61} 61}
62 62
63static int ipv6_print_conntrack(struct seq_file *s,
64 const struct nf_conn *conntrack)
65{
66 return 0;
67}
68
69/* 63/*
70 * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c 64 * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c
71 * 65 *
@@ -258,80 +252,51 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
258 return ipv6_conntrack_in(hooknum, skb, in, out, okfn); 252 return ipv6_conntrack_in(hooknum, skb, in, out, okfn);
259} 253}
260 254
261static struct nf_hook_ops ipv6_conntrack_ops[] = { 255static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
262 { 256 {
263 .hook = ipv6_defrag, 257 .hook = ipv6_defrag,
264 .owner = THIS_MODULE, 258 .owner = THIS_MODULE,
265 .pf = PF_INET6, 259 .pf = PF_INET6,
266 .hooknum = NF_IP6_PRE_ROUTING, 260 .hooknum = NF_INET_PRE_ROUTING,
267 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, 261 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
268 }, 262 },
269 { 263 {
270 .hook = ipv6_conntrack_in, 264 .hook = ipv6_conntrack_in,
271 .owner = THIS_MODULE, 265 .owner = THIS_MODULE,
272 .pf = PF_INET6, 266 .pf = PF_INET6,
273 .hooknum = NF_IP6_PRE_ROUTING, 267 .hooknum = NF_INET_PRE_ROUTING,
274 .priority = NF_IP6_PRI_CONNTRACK, 268 .priority = NF_IP6_PRI_CONNTRACK,
275 }, 269 },
276 { 270 {
277 .hook = ipv6_conntrack_local, 271 .hook = ipv6_conntrack_local,
278 .owner = THIS_MODULE, 272 .owner = THIS_MODULE,
279 .pf = PF_INET6, 273 .pf = PF_INET6,
280 .hooknum = NF_IP6_LOCAL_OUT, 274 .hooknum = NF_INET_LOCAL_OUT,
281 .priority = NF_IP6_PRI_CONNTRACK, 275 .priority = NF_IP6_PRI_CONNTRACK,
282 }, 276 },
283 { 277 {
284 .hook = ipv6_defrag, 278 .hook = ipv6_defrag,
285 .owner = THIS_MODULE, 279 .owner = THIS_MODULE,
286 .pf = PF_INET6, 280 .pf = PF_INET6,
287 .hooknum = NF_IP6_LOCAL_OUT, 281 .hooknum = NF_INET_LOCAL_OUT,
288 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, 282 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
289 }, 283 },
290 { 284 {
291 .hook = ipv6_confirm, 285 .hook = ipv6_confirm,
292 .owner = THIS_MODULE, 286 .owner = THIS_MODULE,
293 .pf = PF_INET6, 287 .pf = PF_INET6,
294 .hooknum = NF_IP6_POST_ROUTING, 288 .hooknum = NF_INET_POST_ROUTING,
295 .priority = NF_IP6_PRI_LAST, 289 .priority = NF_IP6_PRI_LAST,
296 }, 290 },
297 { 291 {
298 .hook = ipv6_confirm, 292 .hook = ipv6_confirm,
299 .owner = THIS_MODULE, 293 .owner = THIS_MODULE,
300 .pf = PF_INET6, 294 .pf = PF_INET6,
301 .hooknum = NF_IP6_LOCAL_IN, 295 .hooknum = NF_INET_LOCAL_IN,
302 .priority = NF_IP6_PRI_LAST-1, 296 .priority = NF_IP6_PRI_LAST-1,
303 }, 297 },
304}; 298};
305 299
306#ifdef CONFIG_SYSCTL
307static ctl_table nf_ct_ipv6_sysctl_table[] = {
308 {
309 .procname = "nf_conntrack_frag6_timeout",
310 .data = &nf_frags_ctl.timeout,
311 .maxlen = sizeof(unsigned int),
312 .mode = 0644,
313 .proc_handler = &proc_dointvec_jiffies,
314 },
315 {
316 .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH,
317 .procname = "nf_conntrack_frag6_low_thresh",
318 .data = &nf_frags_ctl.low_thresh,
319 .maxlen = sizeof(unsigned int),
320 .mode = 0644,
321 .proc_handler = &proc_dointvec,
322 },
323 {
324 .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH,
325 .procname = "nf_conntrack_frag6_high_thresh",
326 .data = &nf_frags_ctl.high_thresh,
327 .maxlen = sizeof(unsigned int),
328 .mode = 0644,
329 .proc_handler = &proc_dointvec,
330 },
331 { .ctl_name = 0 }
332};
333#endif
334
335#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 300#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
336 301
337#include <linux/netfilter/nfnetlink.h> 302#include <linux/netfilter/nfnetlink.h>
@@ -376,7 +341,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
376 .pkt_to_tuple = ipv6_pkt_to_tuple, 341 .pkt_to_tuple = ipv6_pkt_to_tuple,
377 .invert_tuple = ipv6_invert_tuple, 342 .invert_tuple = ipv6_invert_tuple,
378 .print_tuple = ipv6_print_tuple, 343 .print_tuple = ipv6_print_tuple,
379 .print_conntrack = ipv6_print_conntrack,
380 .get_l4proto = ipv6_get_l4proto, 344 .get_l4proto = ipv6_get_l4proto,
381#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 345#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
382 .tuple_to_nlattr = ipv6_tuple_to_nlattr, 346 .tuple_to_nlattr = ipv6_tuple_to_nlattr,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index fd9123f3dc04..da924c6b5f06 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -24,6 +24,7 @@
24#include <net/netfilter/nf_conntrack_l4proto.h> 24#include <net/netfilter/nf_conntrack_l4proto.h>
25#include <net/netfilter/nf_conntrack_core.h> 25#include <net/netfilter/nf_conntrack_core.h>
26#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> 26#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
27#include <net/netfilter/nf_log.h>
27 28
28static unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ; 29static unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ;
29 30
@@ -74,13 +75,6 @@ static int icmpv6_print_tuple(struct seq_file *s,
74 ntohs(tuple->src.u.icmp.id)); 75 ntohs(tuple->src.u.icmp.id));
75} 76}
76 77
77/* Print out the private part of the conntrack. */
78static int icmpv6_print_conntrack(struct seq_file *s,
79 const struct nf_conn *conntrack)
80{
81 return 0;
82}
83
84/* Returns verdict for packet, or -1 for invalid. */ 78/* Returns verdict for packet, or -1 for invalid. */
85static int icmpv6_packet(struct nf_conn *ct, 79static int icmpv6_packet(struct nf_conn *ct,
86 const struct sk_buff *skb, 80 const struct sk_buff *skb,
@@ -192,7 +186,7 @@ icmpv6_error(struct sk_buff *skb, unsigned int dataoff,
192 return -NF_ACCEPT; 186 return -NF_ACCEPT;
193 } 187 }
194 188
195 if (nf_conntrack_checksum && hooknum == NF_IP6_PRE_ROUTING && 189 if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
196 nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { 190 nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
197 nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, 191 nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
198 "nf_ct_icmpv6: ICMPv6 checksum failed\n"); 192 "nf_ct_icmpv6: ICMPv6 checksum failed\n");
@@ -213,12 +207,9 @@ icmpv6_error(struct sk_buff *skb, unsigned int dataoff,
213static int icmpv6_tuple_to_nlattr(struct sk_buff *skb, 207static int icmpv6_tuple_to_nlattr(struct sk_buff *skb,
214 const struct nf_conntrack_tuple *t) 208 const struct nf_conntrack_tuple *t)
215{ 209{
216 NLA_PUT(skb, CTA_PROTO_ICMPV6_ID, sizeof(u_int16_t), 210 NLA_PUT_BE16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id);
217 &t->src.u.icmp.id); 211 NLA_PUT_U8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type);
218 NLA_PUT(skb, CTA_PROTO_ICMPV6_TYPE, sizeof(u_int8_t), 212 NLA_PUT_U8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code);
219 &t->dst.u.icmp.type);
220 NLA_PUT(skb, CTA_PROTO_ICMPV6_CODE, sizeof(u_int8_t),
221 &t->dst.u.icmp.code);
222 213
223 return 0; 214 return 0;
224 215
@@ -240,12 +231,9 @@ static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
240 || !tb[CTA_PROTO_ICMPV6_ID]) 231 || !tb[CTA_PROTO_ICMPV6_ID])
241 return -EINVAL; 232 return -EINVAL;
242 233
243 tuple->dst.u.icmp.type = 234 tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]);
244 *(u_int8_t *)nla_data(tb[CTA_PROTO_ICMPV6_TYPE]); 235 tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]);
245 tuple->dst.u.icmp.code = 236 tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]);
246 *(u_int8_t *)nla_data(tb[CTA_PROTO_ICMPV6_CODE]);
247 tuple->src.u.icmp.id =
248 *(__be16 *)nla_data(tb[CTA_PROTO_ICMPV6_ID]);
249 237
250 if (tuple->dst.u.icmp.type < 128 238 if (tuple->dst.u.icmp.type < 128
251 || tuple->dst.u.icmp.type - 128 >= sizeof(invmap) 239 || tuple->dst.u.icmp.type - 128 >= sizeof(invmap)
@@ -280,7 +268,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
280 .pkt_to_tuple = icmpv6_pkt_to_tuple, 268 .pkt_to_tuple = icmpv6_pkt_to_tuple,
281 .invert_tuple = icmpv6_invert_tuple, 269 .invert_tuple = icmpv6_invert_tuple,
282 .print_tuple = icmpv6_print_tuple, 270 .print_tuple = icmpv6_print_tuple,
283 .print_conntrack = icmpv6_print_conntrack,
284 .packet = icmpv6_packet, 271 .packet = icmpv6_packet,
285 .new = icmpv6_new, 272 .new = icmpv6_new,
286 .error = icmpv6_error, 273 .error = icmpv6_error,
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index e170c67c47a5..022da6ce4c0f 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -70,14 +70,37 @@ struct nf_ct_frag6_queue
70 __u16 nhoffset; 70 __u16 nhoffset;
71}; 71};
72 72
73struct inet_frags_ctl nf_frags_ctl __read_mostly = {
74 .high_thresh = 256 * 1024,
75 .low_thresh = 192 * 1024,
76 .timeout = IPV6_FRAG_TIMEOUT,
77 .secret_interval = 10 * 60 * HZ,
78};
79
80static struct inet_frags nf_frags; 73static struct inet_frags nf_frags;
74static struct netns_frags nf_init_frags;
75
76#ifdef CONFIG_SYSCTL
77struct ctl_table nf_ct_ipv6_sysctl_table[] = {
78 {
79 .procname = "nf_conntrack_frag6_timeout",
80 .data = &nf_init_frags.timeout,
81 .maxlen = sizeof(unsigned int),
82 .mode = 0644,
83 .proc_handler = &proc_dointvec_jiffies,
84 },
85 {
86 .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH,
87 .procname = "nf_conntrack_frag6_low_thresh",
88 .data = &nf_init_frags.low_thresh,
89 .maxlen = sizeof(unsigned int),
90 .mode = 0644,
91 .proc_handler = &proc_dointvec,
92 },
93 {
94 .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH,
95 .procname = "nf_conntrack_frag6_high_thresh",
96 .data = &nf_init_frags.high_thresh,
97 .maxlen = sizeof(unsigned int),
98 .mode = 0644,
99 .proc_handler = &proc_dointvec,
100 },
101 { .ctl_name = 0 }
102};
103#endif
81 104
82static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, 105static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
83 struct in6_addr *daddr) 106 struct in6_addr *daddr)
@@ -125,7 +148,7 @@ static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work)
125{ 148{
126 if (work) 149 if (work)
127 *work -= skb->truesize; 150 *work -= skb->truesize;
128 atomic_sub(skb->truesize, &nf_frags.mem); 151 atomic_sub(skb->truesize, &nf_init_frags.mem);
129 nf_skb_free(skb); 152 nf_skb_free(skb);
130 kfree_skb(skb); 153 kfree_skb(skb);
131} 154}
@@ -147,7 +170,7 @@ static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
147 170
148static void nf_ct_frag6_evictor(void) 171static void nf_ct_frag6_evictor(void)
149{ 172{
150 inet_frag_evictor(&nf_frags); 173 inet_frag_evictor(&nf_init_frags, &nf_frags);
151} 174}
152 175
153static void nf_ct_frag6_expire(unsigned long data) 176static void nf_ct_frag6_expire(unsigned long data)
@@ -183,7 +206,7 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst)
183 arg.dst = dst; 206 arg.dst = dst;
184 hash = ip6qhashfn(id, src, dst); 207 hash = ip6qhashfn(id, src, dst);
185 208
186 q = inet_frag_find(&nf_frags, &arg, hash); 209 q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash);
187 if (q == NULL) 210 if (q == NULL)
188 goto oom; 211 goto oom;
189 212
@@ -352,7 +375,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
352 skb->dev = NULL; 375 skb->dev = NULL;
353 fq->q.stamp = skb->tstamp; 376 fq->q.stamp = skb->tstamp;
354 fq->q.meat += skb->len; 377 fq->q.meat += skb->len;
355 atomic_add(skb->truesize, &nf_frags.mem); 378 atomic_add(skb->truesize, &nf_init_frags.mem);
356 379
357 /* The first fragment. 380 /* The first fragment.
358 * nhoffset is obtained from the first fragment, of course. 381 * nhoffset is obtained from the first fragment, of course.
@@ -362,7 +385,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
362 fq->q.last_in |= FIRST_IN; 385 fq->q.last_in |= FIRST_IN;
363 } 386 }
364 write_lock(&nf_frags.lock); 387 write_lock(&nf_frags.lock);
365 list_move_tail(&fq->q.lru_list, &nf_frags.lru_list); 388 list_move_tail(&fq->q.lru_list, &nf_init_frags.lru_list);
366 write_unlock(&nf_frags.lock); 389 write_unlock(&nf_frags.lock);
367 return 0; 390 return 0;
368 391
@@ -429,7 +452,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
429 clone->ip_summed = head->ip_summed; 452 clone->ip_summed = head->ip_summed;
430 453
431 NFCT_FRAG6_CB(clone)->orig = NULL; 454 NFCT_FRAG6_CB(clone)->orig = NULL;
432 atomic_add(clone->truesize, &nf_frags.mem); 455 atomic_add(clone->truesize, &nf_init_frags.mem);
433 } 456 }
434 457
435 /* We have to remove fragment header from datagram and to relocate 458 /* We have to remove fragment header from datagram and to relocate
@@ -443,7 +466,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
443 skb_shinfo(head)->frag_list = head->next; 466 skb_shinfo(head)->frag_list = head->next;
444 skb_reset_transport_header(head); 467 skb_reset_transport_header(head);
445 skb_push(head, head->data - skb_network_header(head)); 468 skb_push(head, head->data - skb_network_header(head));
446 atomic_sub(head->truesize, &nf_frags.mem); 469 atomic_sub(head->truesize, &nf_init_frags.mem);
447 470
448 for (fp=head->next; fp; fp = fp->next) { 471 for (fp=head->next; fp; fp = fp->next) {
449 head->data_len += fp->len; 472 head->data_len += fp->len;
@@ -453,7 +476,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
453 else if (head->ip_summed == CHECKSUM_COMPLETE) 476 else if (head->ip_summed == CHECKSUM_COMPLETE)
454 head->csum = csum_add(head->csum, fp->csum); 477 head->csum = csum_add(head->csum, fp->csum);
455 head->truesize += fp->truesize; 478 head->truesize += fp->truesize;
456 atomic_sub(fp->truesize, &nf_frags.mem); 479 atomic_sub(fp->truesize, &nf_init_frags.mem);
457 } 480 }
458 481
459 head->next = NULL; 482 head->next = NULL;
@@ -603,7 +626,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
603 goto ret_orig; 626 goto ret_orig;
604 } 627 }
605 628
606 if (atomic_read(&nf_frags.mem) > nf_frags_ctl.high_thresh) 629 if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh)
607 nf_ct_frag6_evictor(); 630 nf_ct_frag6_evictor();
608 631
609 fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr); 632 fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr);
@@ -674,7 +697,6 @@ int nf_ct_frag6_kfree_frags(struct sk_buff *skb)
674 697
675int nf_ct_frag6_init(void) 698int nf_ct_frag6_init(void)
676{ 699{
677 nf_frags.ctl = &nf_frags_ctl;
678 nf_frags.hashfn = nf_hashfn; 700 nf_frags.hashfn = nf_hashfn;
679 nf_frags.constructor = ip6_frag_init; 701 nf_frags.constructor = ip6_frag_init;
680 nf_frags.destructor = NULL; 702 nf_frags.destructor = NULL;
@@ -682,6 +704,11 @@ int nf_ct_frag6_init(void)
682 nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); 704 nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
683 nf_frags.match = ip6_frag_match; 705 nf_frags.match = ip6_frag_match;
684 nf_frags.frag_expire = nf_ct_frag6_expire; 706 nf_frags.frag_expire = nf_ct_frag6_expire;
707 nf_frags.secret_interval = 10 * 60 * HZ;
708 nf_init_frags.timeout = IPV6_FRAG_TIMEOUT;
709 nf_init_frags.high_thresh = 256 * 1024;
710 nf_init_frags.low_thresh = 192 * 1024;
711 inet_frags_init_net(&nf_init_frags);
685 inet_frags_init(&nf_frags); 712 inet_frags_init(&nf_frags);
686 713
687 return 0; 714 return 0;
@@ -691,6 +718,6 @@ void nf_ct_frag6_cleanup(void)
691{ 718{
692 inet_frags_fini(&nf_frags); 719 inet_frags_fini(&nf_frags);
693 720
694 nf_frags_ctl.low_thresh = 0; 721 nf_init_frags.low_thresh = 0;
695 nf_ct_frag6_evictor(); 722 nf_ct_frag6_evictor();
696} 723}
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 44937616057e..35e502a72495 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -27,6 +27,7 @@
27#include <net/ip.h> 27#include <net/ip.h>
28#include <net/sock.h> 28#include <net/sock.h>
29#include <net/tcp.h> 29#include <net/tcp.h>
30#include <net/udp.h>
30#include <net/transp_v6.h> 31#include <net/transp_v6.h>
31#include <net/ipv6.h> 32#include <net/ipv6.h>
32 33
@@ -35,15 +36,15 @@ static struct proc_dir_entry *proc_net_devsnmp6;
35static int sockstat6_seq_show(struct seq_file *seq, void *v) 36static int sockstat6_seq_show(struct seq_file *seq, void *v)
36{ 37{
37 seq_printf(seq, "TCP6: inuse %d\n", 38 seq_printf(seq, "TCP6: inuse %d\n",
38 sock_prot_inuse(&tcpv6_prot)); 39 sock_prot_inuse_get(&tcpv6_prot));
39 seq_printf(seq, "UDP6: inuse %d\n", 40 seq_printf(seq, "UDP6: inuse %d\n",
40 sock_prot_inuse(&udpv6_prot)); 41 sock_prot_inuse_get(&udpv6_prot));
41 seq_printf(seq, "UDPLITE6: inuse %d\n", 42 seq_printf(seq, "UDPLITE6: inuse %d\n",
42 sock_prot_inuse(&udplitev6_prot)); 43 sock_prot_inuse_get(&udplitev6_prot));
43 seq_printf(seq, "RAW6: inuse %d\n", 44 seq_printf(seq, "RAW6: inuse %d\n",
44 sock_prot_inuse(&rawv6_prot)); 45 sock_prot_inuse_get(&rawv6_prot));
45 seq_printf(seq, "FRAG6: inuse %d memory %d\n", 46 seq_printf(seq, "FRAG6: inuse %d memory %d\n",
46 ip6_frag_nqueues(), ip6_frag_mem()); 47 ip6_frag_nqueues(&init_net), ip6_frag_mem(&init_net));
47 return 0; 48 return 0;
48} 49}
49 50
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 807260d03586..4d880551fe6a 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -54,39 +54,31 @@
54#include <net/mip6.h> 54#include <net/mip6.h>
55#endif 55#endif
56 56
57#include <net/raw.h>
57#include <net/rawv6.h> 58#include <net/rawv6.h>
58#include <net/xfrm.h> 59#include <net/xfrm.h>
59 60
60#include <linux/proc_fs.h> 61#include <linux/proc_fs.h>
61#include <linux/seq_file.h> 62#include <linux/seq_file.h>
62 63
63struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE]; 64static struct raw_hashinfo raw_v6_hashinfo = {
64DEFINE_RWLOCK(raw_v6_lock); 65 .lock = __RW_LOCK_UNLOCKED(),
66};
65 67
66static void raw_v6_hash(struct sock *sk) 68static void raw_v6_hash(struct sock *sk)
67{ 69{
68 struct hlist_head *list = &raw_v6_htable[inet_sk(sk)->num & 70 raw_hash_sk(sk, &raw_v6_hashinfo);
69 (RAWV6_HTABLE_SIZE - 1)];
70
71 write_lock_bh(&raw_v6_lock);
72 sk_add_node(sk, list);
73 sock_prot_inc_use(sk->sk_prot);
74 write_unlock_bh(&raw_v6_lock);
75} 71}
76 72
77static void raw_v6_unhash(struct sock *sk) 73static void raw_v6_unhash(struct sock *sk)
78{ 74{
79 write_lock_bh(&raw_v6_lock); 75 raw_unhash_sk(sk, &raw_v6_hashinfo);
80 if (sk_del_node_init(sk))
81 sock_prot_dec_use(sk->sk_prot);
82 write_unlock_bh(&raw_v6_lock);
83} 76}
84 77
85 78
86/* Grumble... icmp and ip_input want to get at this... */ 79static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
87struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, 80 unsigned short num, struct in6_addr *loc_addr,
88 struct in6_addr *loc_addr, struct in6_addr *rmt_addr, 81 struct in6_addr *rmt_addr, int dif)
89 int dif)
90{ 82{
91 struct hlist_node *node; 83 struct hlist_node *node;
92 int is_multicast = ipv6_addr_is_multicast(loc_addr); 84 int is_multicast = ipv6_addr_is_multicast(loc_addr);
@@ -95,6 +87,9 @@ struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num,
95 if (inet_sk(sk)->num == num) { 87 if (inet_sk(sk)->num == num) {
96 struct ipv6_pinfo *np = inet6_sk(sk); 88 struct ipv6_pinfo *np = inet6_sk(sk);
97 89
90 if (sk->sk_net != net)
91 continue;
92
98 if (!ipv6_addr_any(&np->daddr) && 93 if (!ipv6_addr_any(&np->daddr) &&
99 !ipv6_addr_equal(&np->daddr, rmt_addr)) 94 !ipv6_addr_equal(&np->daddr, rmt_addr))
100 continue; 95 continue;
@@ -167,21 +162,22 @@ EXPORT_SYMBOL(rawv6_mh_filter_unregister);
167 * 162 *
168 * Caller owns SKB so we must make clones. 163 * Caller owns SKB so we must make clones.
169 */ 164 */
170int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) 165static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
171{ 166{
172 struct in6_addr *saddr; 167 struct in6_addr *saddr;
173 struct in6_addr *daddr; 168 struct in6_addr *daddr;
174 struct sock *sk; 169 struct sock *sk;
175 int delivered = 0; 170 int delivered = 0;
176 __u8 hash; 171 __u8 hash;
172 struct net *net;
177 173
178 saddr = &ipv6_hdr(skb)->saddr; 174 saddr = &ipv6_hdr(skb)->saddr;
179 daddr = saddr + 1; 175 daddr = saddr + 1;
180 176
181 hash = nexthdr & (MAX_INET_PROTOS - 1); 177 hash = nexthdr & (MAX_INET_PROTOS - 1);
182 178
183 read_lock(&raw_v6_lock); 179 read_lock(&raw_v6_hashinfo.lock);
184 sk = sk_head(&raw_v6_htable[hash]); 180 sk = sk_head(&raw_v6_hashinfo.ht[hash]);
185 181
186 /* 182 /*
187 * The first socket found will be delivered after 183 * The first socket found will be delivered after
@@ -191,7 +187,8 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
191 if (sk == NULL) 187 if (sk == NULL)
192 goto out; 188 goto out;
193 189
194 sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); 190 net = skb->dev->nd_net;
191 sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, IP6CB(skb)->iif);
195 192
196 while (sk) { 193 while (sk) {
197 int filtered; 194 int filtered;
@@ -234,14 +231,25 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
234 rawv6_rcv(sk, clone); 231 rawv6_rcv(sk, clone);
235 } 232 }
236 } 233 }
237 sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr, 234 sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr,
238 IP6CB(skb)->iif); 235 IP6CB(skb)->iif);
239 } 236 }
240out: 237out:
241 read_unlock(&raw_v6_lock); 238 read_unlock(&raw_v6_hashinfo.lock);
242 return delivered; 239 return delivered;
243} 240}
244 241
242int raw6_local_deliver(struct sk_buff *skb, int nexthdr)
243{
244 struct sock *raw_sk;
245
246 raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (MAX_INET_PROTOS - 1)]);
247 if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
248 raw_sk = NULL;
249
250 return raw_sk != NULL;
251}
252
245/* This cleans up af_inet6 a bit. -DaveM */ 253/* This cleans up af_inet6 a bit. -DaveM */
246static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) 254static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
247{ 255{
@@ -283,7 +291,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
283 if (!sk->sk_bound_dev_if) 291 if (!sk->sk_bound_dev_if)
284 goto out; 292 goto out;
285 293
286 dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); 294 dev = dev_get_by_index(sk->sk_net, sk->sk_bound_dev_if);
287 if (!dev) { 295 if (!dev) {
288 err = -ENODEV; 296 err = -ENODEV;
289 goto out; 297 goto out;
@@ -296,7 +304,8 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
296 v4addr = LOOPBACK4_IPV6; 304 v4addr = LOOPBACK4_IPV6;
297 if (!(addr_type & IPV6_ADDR_MULTICAST)) { 305 if (!(addr_type & IPV6_ADDR_MULTICAST)) {
298 err = -EADDRNOTAVAIL; 306 err = -EADDRNOTAVAIL;
299 if (!ipv6_chk_addr(&addr->sin6_addr, dev, 0)) { 307 if (!ipv6_chk_addr(sk->sk_net, &addr->sin6_addr,
308 dev, 0)) {
300 if (dev) 309 if (dev)
301 dev_put(dev); 310 dev_put(dev);
302 goto out; 311 goto out;
@@ -316,7 +325,7 @@ out:
316 return err; 325 return err;
317} 326}
318 327
319void rawv6_err(struct sock *sk, struct sk_buff *skb, 328static void rawv6_err(struct sock *sk, struct sk_buff *skb,
320 struct inet6_skb_parm *opt, 329 struct inet6_skb_parm *opt,
321 int type, int code, int offset, __be32 info) 330 int type, int code, int offset, __be32 info)
322{ 331{
@@ -350,18 +359,45 @@ void rawv6_err(struct sock *sk, struct sk_buff *skb,
350 } 359 }
351} 360}
352 361
362void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
363 int type, int code, int inner_offset, __be32 info)
364{
365 struct sock *sk;
366 int hash;
367 struct in6_addr *saddr, *daddr;
368 struct net *net;
369
370 hash = nexthdr & (RAW_HTABLE_SIZE - 1);
371
372 read_lock(&raw_v6_hashinfo.lock);
373 sk = sk_head(&raw_v6_hashinfo.ht[hash]);
374 if (sk != NULL) {
375 saddr = &ipv6_hdr(skb)->saddr;
376 daddr = &ipv6_hdr(skb)->daddr;
377 net = skb->dev->nd_net;
378
379 while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr,
380 IP6CB(skb)->iif))) {
381 rawv6_err(sk, skb, NULL, type, code,
382 inner_offset, info);
383 sk = sk_next(sk);
384 }
385 }
386 read_unlock(&raw_v6_hashinfo.lock);
387}
388
353static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) 389static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
354{ 390{
355 if ((raw6_sk(sk)->checksum || sk->sk_filter) && 391 if ((raw6_sk(sk)->checksum || sk->sk_filter) &&
356 skb_checksum_complete(skb)) { 392 skb_checksum_complete(skb)) {
357 /* FIXME: increment a raw6 drops counter here */ 393 atomic_inc(&sk->sk_drops);
358 kfree_skb(skb); 394 kfree_skb(skb);
359 return 0; 395 return 0;
360 } 396 }
361 397
362 /* Charge it to the socket. */ 398 /* Charge it to the socket. */
363 if (sock_queue_rcv_skb(sk,skb)<0) { 399 if (sock_queue_rcv_skb(sk,skb)<0) {
364 /* FIXME: increment a raw6 drops counter here */ 400 atomic_inc(&sk->sk_drops);
365 kfree_skb(skb); 401 kfree_skb(skb);
366 return 0; 402 return 0;
367 } 403 }
@@ -382,6 +418,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
382 struct raw6_sock *rp = raw6_sk(sk); 418 struct raw6_sock *rp = raw6_sk(sk);
383 419
384 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 420 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
421 atomic_inc(&sk->sk_drops);
385 kfree_skb(skb); 422 kfree_skb(skb);
386 return NET_RX_DROP; 423 return NET_RX_DROP;
387 } 424 }
@@ -405,7 +442,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
405 442
406 if (inet->hdrincl) { 443 if (inet->hdrincl) {
407 if (skb_checksum_complete(skb)) { 444 if (skb_checksum_complete(skb)) {
408 /* FIXME: increment a raw6 drops counter here */ 445 atomic_inc(&sk->sk_drops);
409 kfree_skb(skb); 446 kfree_skb(skb);
410 return 0; 447 return 0;
411 } 448 }
@@ -496,7 +533,7 @@ csum_copy_err:
496 as some normal condition. 533 as some normal condition.
497 */ 534 */
498 err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH; 535 err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
499 /* FIXME: increment a raw6 drops counter here */ 536 atomic_inc(&sk->sk_drops);
500 goto out; 537 goto out;
501} 538}
502 539
@@ -618,7 +655,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
618 goto error_fault; 655 goto error_fault;
619 656
620 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); 657 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
621 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, rt->u.dst.dev, 658 err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
622 dst_output); 659 dst_output);
623 if (err > 0) 660 if (err > 0)
624 err = np->recverr ? net_xmit_errno(err) : 0; 661 err = np->recverr ? net_xmit_errno(err) : 0;
@@ -843,7 +880,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
843 if (final_p) 880 if (final_p)
844 ipv6_addr_copy(&fl.fl6_dst, final_p); 881 ipv6_addr_copy(&fl.fl6_dst, final_p);
845 882
846 if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) { 883 if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) {
847 if (err == -EREMOTE) 884 if (err == -EREMOTE)
848 err = ip6_dst_blackhole(sk, &dst, &fl); 885 err = ip6_dst_blackhole(sk, &dst, &fl);
849 if (err < 0) 886 if (err < 0)
@@ -1172,76 +1209,6 @@ struct proto rawv6_prot = {
1172}; 1209};
1173 1210
1174#ifdef CONFIG_PROC_FS 1211#ifdef CONFIG_PROC_FS
1175struct raw6_iter_state {
1176 int bucket;
1177};
1178
1179#define raw6_seq_private(seq) ((struct raw6_iter_state *)(seq)->private)
1180
1181static struct sock *raw6_get_first(struct seq_file *seq)
1182{
1183 struct sock *sk;
1184 struct hlist_node *node;
1185 struct raw6_iter_state* state = raw6_seq_private(seq);
1186
1187 for (state->bucket = 0; state->bucket < RAWV6_HTABLE_SIZE; ++state->bucket)
1188 sk_for_each(sk, node, &raw_v6_htable[state->bucket])
1189 if (sk->sk_family == PF_INET6)
1190 goto out;
1191 sk = NULL;
1192out:
1193 return sk;
1194}
1195
1196static struct sock *raw6_get_next(struct seq_file *seq, struct sock *sk)
1197{
1198 struct raw6_iter_state* state = raw6_seq_private(seq);
1199
1200 do {
1201 sk = sk_next(sk);
1202try_again:
1203 ;
1204 } while (sk && sk->sk_family != PF_INET6);
1205
1206 if (!sk && ++state->bucket < RAWV6_HTABLE_SIZE) {
1207 sk = sk_head(&raw_v6_htable[state->bucket]);
1208 goto try_again;
1209 }
1210 return sk;
1211}
1212
1213static struct sock *raw6_get_idx(struct seq_file *seq, loff_t pos)
1214{
1215 struct sock *sk = raw6_get_first(seq);
1216 if (sk)
1217 while (pos && (sk = raw6_get_next(seq, sk)) != NULL)
1218 --pos;
1219 return pos ? NULL : sk;
1220}
1221
1222static void *raw6_seq_start(struct seq_file *seq, loff_t *pos)
1223{
1224 read_lock(&raw_v6_lock);
1225 return *pos ? raw6_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
1226}
1227
1228static void *raw6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1229{
1230 struct sock *sk;
1231
1232 if (v == SEQ_START_TOKEN)
1233 sk = raw6_get_first(seq);
1234 else
1235 sk = raw6_get_next(seq, v);
1236 ++*pos;
1237 return sk;
1238}
1239
1240static void raw6_seq_stop(struct seq_file *seq, void *v)
1241{
1242 read_unlock(&raw_v6_lock);
1243}
1244
1245static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) 1212static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
1246{ 1213{
1247 struct ipv6_pinfo *np = inet6_sk(sp); 1214 struct ipv6_pinfo *np = inet6_sk(sp);
@@ -1254,7 +1221,7 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
1254 srcp = inet_sk(sp)->num; 1221 srcp = inet_sk(sp)->num;
1255 seq_printf(seq, 1222 seq_printf(seq,
1256 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1223 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1257 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p\n", 1224 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
1258 i, 1225 i,
1259 src->s6_addr32[0], src->s6_addr32[1], 1226 src->s6_addr32[0], src->s6_addr32[1],
1260 src->s6_addr32[2], src->s6_addr32[3], srcp, 1227 src->s6_addr32[2], src->s6_addr32[3], srcp,
@@ -1266,7 +1233,7 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
1266 0, 0L, 0, 1233 0, 0L, 0,
1267 sock_i_uid(sp), 0, 1234 sock_i_uid(sp), 0,
1268 sock_i_ino(sp), 1235 sock_i_ino(sp),
1269 atomic_read(&sp->sk_refcnt), sp); 1236 atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
1270} 1237}
1271 1238
1272static int raw6_seq_show(struct seq_file *seq, void *v) 1239static int raw6_seq_show(struct seq_file *seq, void *v)
@@ -1277,23 +1244,22 @@ static int raw6_seq_show(struct seq_file *seq, void *v)
1277 "local_address " 1244 "local_address "
1278 "remote_address " 1245 "remote_address "
1279 "st tx_queue rx_queue tr tm->when retrnsmt" 1246 "st tx_queue rx_queue tr tm->when retrnsmt"
1280 " uid timeout inode\n"); 1247 " uid timeout inode drops\n");
1281 else 1248 else
1282 raw6_sock_seq_show(seq, v, raw6_seq_private(seq)->bucket); 1249 raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket);
1283 return 0; 1250 return 0;
1284} 1251}
1285 1252
1286static const struct seq_operations raw6_seq_ops = { 1253static const struct seq_operations raw6_seq_ops = {
1287 .start = raw6_seq_start, 1254 .start = raw_seq_start,
1288 .next = raw6_seq_next, 1255 .next = raw_seq_next,
1289 .stop = raw6_seq_stop, 1256 .stop = raw_seq_stop,
1290 .show = raw6_seq_show, 1257 .show = raw6_seq_show,
1291}; 1258};
1292 1259
1293static int raw6_seq_open(struct inode *inode, struct file *file) 1260static int raw6_seq_open(struct inode *inode, struct file *file)
1294{ 1261{
1295 return seq_open_private(file, &raw6_seq_ops, 1262 return raw_seq_open(inode, file, &raw_v6_hashinfo, PF_INET6);
1296 sizeof(struct raw6_iter_state));
1297} 1263}
1298 1264
1299static const struct file_operations raw6_seq_fops = { 1265static const struct file_operations raw6_seq_fops = {
@@ -1301,18 +1267,86 @@ static const struct file_operations raw6_seq_fops = {
1301 .open = raw6_seq_open, 1267 .open = raw6_seq_open,
1302 .read = seq_read, 1268 .read = seq_read,
1303 .llseek = seq_lseek, 1269 .llseek = seq_lseek,
1304 .release = seq_release_private, 1270 .release = seq_release_net,
1305}; 1271};
1306 1272
1307int __init raw6_proc_init(void) 1273static int raw6_init_net(struct net *net)
1308{ 1274{
1309 if (!proc_net_fops_create(&init_net, "raw6", S_IRUGO, &raw6_seq_fops)) 1275 if (!proc_net_fops_create(net, "raw6", S_IRUGO, &raw6_seq_fops))
1310 return -ENOMEM; 1276 return -ENOMEM;
1277
1311 return 0; 1278 return 0;
1312} 1279}
1313 1280
1281static void raw6_exit_net(struct net *net)
1282{
1283 proc_net_remove(net, "raw6");
1284}
1285
1286static struct pernet_operations raw6_net_ops = {
1287 .init = raw6_init_net,
1288 .exit = raw6_exit_net,
1289};
1290
1291int __init raw6_proc_init(void)
1292{
1293 return register_pernet_subsys(&raw6_net_ops);
1294}
1295
1314void raw6_proc_exit(void) 1296void raw6_proc_exit(void)
1315{ 1297{
1316 proc_net_remove(&init_net, "raw6"); 1298 unregister_pernet_subsys(&raw6_net_ops);
1317} 1299}
1318#endif /* CONFIG_PROC_FS */ 1300#endif /* CONFIG_PROC_FS */
1301
1302/* Same as inet6_dgram_ops, sans udp_poll. */
1303static const struct proto_ops inet6_sockraw_ops = {
1304 .family = PF_INET6,
1305 .owner = THIS_MODULE,
1306 .release = inet6_release,
1307 .bind = inet6_bind,
1308 .connect = inet_dgram_connect, /* ok */
1309 .socketpair = sock_no_socketpair, /* a do nothing */
1310 .accept = sock_no_accept, /* a do nothing */
1311 .getname = inet6_getname,
1312 .poll = datagram_poll, /* ok */
1313 .ioctl = inet6_ioctl, /* must change */
1314 .listen = sock_no_listen, /* ok */
1315 .shutdown = inet_shutdown, /* ok */
1316 .setsockopt = sock_common_setsockopt, /* ok */
1317 .getsockopt = sock_common_getsockopt, /* ok */
1318 .sendmsg = inet_sendmsg, /* ok */
1319 .recvmsg = sock_common_recvmsg, /* ok */
1320 .mmap = sock_no_mmap,
1321 .sendpage = sock_no_sendpage,
1322#ifdef CONFIG_COMPAT
1323 .compat_setsockopt = compat_sock_common_setsockopt,
1324 .compat_getsockopt = compat_sock_common_getsockopt,
1325#endif
1326};
1327
1328static struct inet_protosw rawv6_protosw = {
1329 .type = SOCK_RAW,
1330 .protocol = IPPROTO_IP, /* wild card */
1331 .prot = &rawv6_prot,
1332 .ops = &inet6_sockraw_ops,
1333 .capability = CAP_NET_RAW,
1334 .no_check = UDP_CSUM_DEFAULT,
1335 .flags = INET_PROTOSW_REUSE,
1336};
1337
1338int __init rawv6_init(void)
1339{
1340 int ret;
1341
1342 ret = inet6_register_protosw(&rawv6_protosw);
1343 if (ret)
1344 goto out;
1345out:
1346 return ret;
1347}
1348
1349void rawv6_exit(void)
1350{
1351 inet6_unregister_protosw(&rawv6_protosw);
1352}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 76c88a93b9b5..f936d045a39d 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -82,23 +82,16 @@ struct frag_queue
82 __u16 nhoffset; 82 __u16 nhoffset;
83}; 83};
84 84
85struct inet_frags_ctl ip6_frags_ctl __read_mostly = {
86 .high_thresh = 256 * 1024,
87 .low_thresh = 192 * 1024,
88 .timeout = IPV6_FRAG_TIMEOUT,
89 .secret_interval = 10 * 60 * HZ,
90};
91
92static struct inet_frags ip6_frags; 85static struct inet_frags ip6_frags;
93 86
94int ip6_frag_nqueues(void) 87int ip6_frag_nqueues(struct net *net)
95{ 88{
96 return ip6_frags.nqueues; 89 return net->ipv6.frags.nqueues;
97} 90}
98 91
99int ip6_frag_mem(void) 92int ip6_frag_mem(struct net *net)
100{ 93{
101 return atomic_read(&ip6_frags.mem); 94 return atomic_read(&net->ipv6.frags.mem);
102} 95}
103 96
104static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, 97static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
@@ -156,11 +149,12 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a)
156EXPORT_SYMBOL(ip6_frag_match); 149EXPORT_SYMBOL(ip6_frag_match);
157 150
158/* Memory Tracking Functions. */ 151/* Memory Tracking Functions. */
159static inline void frag_kfree_skb(struct sk_buff *skb, int *work) 152static inline void frag_kfree_skb(struct netns_frags *nf,
153 struct sk_buff *skb, int *work)
160{ 154{
161 if (work) 155 if (work)
162 *work -= skb->truesize; 156 *work -= skb->truesize;
163 atomic_sub(skb->truesize, &ip6_frags.mem); 157 atomic_sub(skb->truesize, &nf->mem);
164 kfree_skb(skb); 158 kfree_skb(skb);
165} 159}
166 160
@@ -190,11 +184,11 @@ static __inline__ void fq_kill(struct frag_queue *fq)
190 inet_frag_kill(&fq->q, &ip6_frags); 184 inet_frag_kill(&fq->q, &ip6_frags);
191} 185}
192 186
193static void ip6_evictor(struct inet6_dev *idev) 187static void ip6_evictor(struct net *net, struct inet6_dev *idev)
194{ 188{
195 int evicted; 189 int evicted;
196 190
197 evicted = inet_frag_evictor(&ip6_frags); 191 evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags);
198 if (evicted) 192 if (evicted)
199 IP6_ADD_STATS_BH(idev, IPSTATS_MIB_REASMFAILS, evicted); 193 IP6_ADD_STATS_BH(idev, IPSTATS_MIB_REASMFAILS, evicted);
200} 194}
@@ -241,7 +235,7 @@ out:
241} 235}
242 236
243static __inline__ struct frag_queue * 237static __inline__ struct frag_queue *
244fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, 238fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst,
245 struct inet6_dev *idev) 239 struct inet6_dev *idev)
246{ 240{
247 struct inet_frag_queue *q; 241 struct inet_frag_queue *q;
@@ -253,7 +247,7 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst,
253 arg.dst = dst; 247 arg.dst = dst;
254 hash = ip6qhashfn(id, src, dst); 248 hash = ip6qhashfn(id, src, dst);
255 249
256 q = inet_frag_find(&ip6_frags, &arg, hash); 250 q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
257 if (q == NULL) 251 if (q == NULL)
258 goto oom; 252 goto oom;
259 253
@@ -396,7 +390,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
396 fq->q.fragments = next; 390 fq->q.fragments = next;
397 391
398 fq->q.meat -= free_it->len; 392 fq->q.meat -= free_it->len;
399 frag_kfree_skb(free_it, NULL); 393 frag_kfree_skb(fq->q.net, free_it, NULL);
400 } 394 }
401 } 395 }
402 396
@@ -416,7 +410,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
416 } 410 }
417 fq->q.stamp = skb->tstamp; 411 fq->q.stamp = skb->tstamp;
418 fq->q.meat += skb->len; 412 fq->q.meat += skb->len;
419 atomic_add(skb->truesize, &ip6_frags.mem); 413 atomic_add(skb->truesize, &fq->q.net->mem);
420 414
421 /* The first fragment. 415 /* The first fragment.
422 * nhoffset is obtained from the first fragment, of course. 416 * nhoffset is obtained from the first fragment, of course.
@@ -430,7 +424,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
430 return ip6_frag_reasm(fq, prev, dev); 424 return ip6_frag_reasm(fq, prev, dev);
431 425
432 write_lock(&ip6_frags.lock); 426 write_lock(&ip6_frags.lock);
433 list_move_tail(&fq->q.lru_list, &ip6_frags.lru_list); 427 list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list);
434 write_unlock(&ip6_frags.lock); 428 write_unlock(&ip6_frags.lock);
435 return -1; 429 return -1;
436 430
@@ -510,7 +504,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
510 head->len -= clone->len; 504 head->len -= clone->len;
511 clone->csum = 0; 505 clone->csum = 0;
512 clone->ip_summed = head->ip_summed; 506 clone->ip_summed = head->ip_summed;
513 atomic_add(clone->truesize, &ip6_frags.mem); 507 atomic_add(clone->truesize, &fq->q.net->mem);
514 } 508 }
515 509
516 /* We have to remove fragment header from datagram and to relocate 510 /* We have to remove fragment header from datagram and to relocate
@@ -525,7 +519,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
525 skb_shinfo(head)->frag_list = head->next; 519 skb_shinfo(head)->frag_list = head->next;
526 skb_reset_transport_header(head); 520 skb_reset_transport_header(head);
527 skb_push(head, head->data - skb_network_header(head)); 521 skb_push(head, head->data - skb_network_header(head));
528 atomic_sub(head->truesize, &ip6_frags.mem); 522 atomic_sub(head->truesize, &fq->q.net->mem);
529 523
530 for (fp=head->next; fp; fp = fp->next) { 524 for (fp=head->next; fp; fp = fp->next) {
531 head->data_len += fp->len; 525 head->data_len += fp->len;
@@ -535,7 +529,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
535 else if (head->ip_summed == CHECKSUM_COMPLETE) 529 else if (head->ip_summed == CHECKSUM_COMPLETE)
536 head->csum = csum_add(head->csum, fp->csum); 530 head->csum = csum_add(head->csum, fp->csum);
537 head->truesize += fp->truesize; 531 head->truesize += fp->truesize;
538 atomic_sub(fp->truesize, &ip6_frags.mem); 532 atomic_sub(fp->truesize, &fq->q.net->mem);
539 } 533 }
540 534
541 head->next = NULL; 535 head->next = NULL;
@@ -575,6 +569,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
575 struct frag_hdr *fhdr; 569 struct frag_hdr *fhdr;
576 struct frag_queue *fq; 570 struct frag_queue *fq;
577 struct ipv6hdr *hdr = ipv6_hdr(skb); 571 struct ipv6hdr *hdr = ipv6_hdr(skb);
572 struct net *net;
578 573
579 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS); 574 IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS);
580 575
@@ -605,10 +600,11 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
605 return 1; 600 return 1;
606 } 601 }
607 602
608 if (atomic_read(&ip6_frags.mem) > ip6_frags_ctl.high_thresh) 603 net = skb->dev->nd_net;
609 ip6_evictor(ip6_dst_idev(skb->dst)); 604 if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh)
605 ip6_evictor(net, ip6_dst_idev(skb->dst));
610 606
611 if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr, 607 if ((fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
612 ip6_dst_idev(skb->dst))) != NULL) { 608 ip6_dst_idev(skb->dst))) != NULL) {
613 int ret; 609 int ret;
614 610
@@ -632,12 +628,127 @@ static struct inet6_protocol frag_protocol =
632 .flags = INET6_PROTO_NOPOLICY, 628 .flags = INET6_PROTO_NOPOLICY,
633}; 629};
634 630
635void __init ipv6_frag_init(void) 631#ifdef CONFIG_SYSCTL
632static struct ctl_table ip6_frags_ctl_table[] = {
633 {
634 .ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH,
635 .procname = "ip6frag_high_thresh",
636 .data = &init_net.ipv6.frags.high_thresh,
637 .maxlen = sizeof(int),
638 .mode = 0644,
639 .proc_handler = &proc_dointvec
640 },
641 {
642 .ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH,
643 .procname = "ip6frag_low_thresh",
644 .data = &init_net.ipv6.frags.low_thresh,
645 .maxlen = sizeof(int),
646 .mode = 0644,
647 .proc_handler = &proc_dointvec
648 },
649 {
650 .ctl_name = NET_IPV6_IP6FRAG_TIME,
651 .procname = "ip6frag_time",
652 .data = &init_net.ipv6.frags.timeout,
653 .maxlen = sizeof(int),
654 .mode = 0644,
655 .proc_handler = &proc_dointvec_jiffies,
656 .strategy = &sysctl_jiffies,
657 },
658 {
659 .ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL,
660 .procname = "ip6frag_secret_interval",
661 .data = &ip6_frags.secret_interval,
662 .maxlen = sizeof(int),
663 .mode = 0644,
664 .proc_handler = &proc_dointvec_jiffies,
665 .strategy = &sysctl_jiffies
666 },
667 { }
668};
669
670static int ip6_frags_sysctl_register(struct net *net)
636{ 671{
637 if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0) 672 struct ctl_table *table;
638 printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n"); 673 struct ctl_table_header *hdr;
674
675 table = ip6_frags_ctl_table;
676 if (net != &init_net) {
677 table = kmemdup(table, sizeof(ip6_frags_ctl_table), GFP_KERNEL);
678 if (table == NULL)
679 goto err_alloc;
680
681 table[0].data = &net->ipv6.frags.high_thresh;
682 table[1].data = &net->ipv6.frags.low_thresh;
683 table[2].data = &net->ipv6.frags.timeout;
684 table[3].mode &= ~0222;
685 }
686
687 hdr = register_net_sysctl_table(net, net_ipv6_ctl_path, table);
688 if (hdr == NULL)
689 goto err_reg;
690
691 net->ipv6.sysctl.frags_hdr = hdr;
692 return 0;
693
694err_reg:
695 if (net != &init_net)
696 kfree(table);
697err_alloc:
698 return -ENOMEM;
699}
700
701static void ip6_frags_sysctl_unregister(struct net *net)
702{
703 struct ctl_table *table;
704
705 table = net->ipv6.sysctl.frags_hdr->ctl_table_arg;
706 unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr);
707 kfree(table);
708}
709#else
710static inline int ip6_frags_sysctl_register(struct net *net)
711{
712 return 0;
713}
714
715static inline void ip6_frags_sysctl_unregister(struct net *net)
716{
717}
718#endif
719
720static int ipv6_frags_init_net(struct net *net)
721{
722 net->ipv6.frags.high_thresh = 256 * 1024;
723 net->ipv6.frags.low_thresh = 192 * 1024;
724 net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
725
726 inet_frags_init_net(&net->ipv6.frags);
727
728 return ip6_frags_sysctl_register(net);
729}
730
731static void ipv6_frags_exit_net(struct net *net)
732{
733 ip6_frags_sysctl_unregister(net);
734 inet_frags_exit_net(&net->ipv6.frags, &ip6_frags);
735}
736
737static struct pernet_operations ip6_frags_ops = {
738 .init = ipv6_frags_init_net,
739 .exit = ipv6_frags_exit_net,
740};
741
742int __init ipv6_frag_init(void)
743{
744 int ret;
745
746 ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
747 if (ret)
748 goto out;
749
750 register_pernet_subsys(&ip6_frags_ops);
639 751
640 ip6_frags.ctl = &ip6_frags_ctl;
641 ip6_frags.hashfn = ip6_hashfn; 752 ip6_frags.hashfn = ip6_hashfn;
642 ip6_frags.constructor = ip6_frag_init; 753 ip6_frags.constructor = ip6_frag_init;
643 ip6_frags.destructor = NULL; 754 ip6_frags.destructor = NULL;
@@ -645,5 +756,15 @@ void __init ipv6_frag_init(void)
645 ip6_frags.qsize = sizeof(struct frag_queue); 756 ip6_frags.qsize = sizeof(struct frag_queue);
646 ip6_frags.match = ip6_frag_match; 757 ip6_frags.match = ip6_frag_match;
647 ip6_frags.frag_expire = ip6_frag_expire; 758 ip6_frags.frag_expire = ip6_frag_expire;
759 ip6_frags.secret_interval = 10 * 60 * HZ;
648 inet_frags_init(&ip6_frags); 760 inet_frags_init(&ip6_frags);
761out:
762 return ret;
763}
764
765void ipv6_frag_exit(void)
766{
767 inet_frags_fini(&ip6_frags);
768 unregister_pernet_subsys(&ip6_frags_ops);
769 inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
649} 770}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 20083e0d3995..4004c5f0b8d7 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -73,21 +73,13 @@
73 73
74#define CLONE_OFFLINK_ROUTE 0 74#define CLONE_OFFLINK_ROUTE 0
75 75
76static int ip6_rt_max_size = 4096;
77static int ip6_rt_gc_min_interval = HZ / 2;
78static int ip6_rt_gc_timeout = 60*HZ;
79int ip6_rt_gc_interval = 30*HZ;
80static int ip6_rt_gc_elasticity = 9;
81static int ip6_rt_mtu_expires = 10*60*HZ;
82static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83
84static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); 76static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
85static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
86static struct dst_entry *ip6_negative_advice(struct dst_entry *); 78static struct dst_entry *ip6_negative_advice(struct dst_entry *);
87static void ip6_dst_destroy(struct dst_entry *); 79static void ip6_dst_destroy(struct dst_entry *);
88static void ip6_dst_ifdown(struct dst_entry *, 80static void ip6_dst_ifdown(struct dst_entry *,
89 struct net_device *dev, int how); 81 struct net_device *dev, int how);
90static int ip6_dst_gc(void); 82static int ip6_dst_gc(struct dst_ops *ops);
91 83
92static int ip6_pkt_discard(struct sk_buff *skb); 84static int ip6_pkt_discard(struct sk_buff *skb);
93static int ip6_pkt_discard_out(struct sk_buff *skb); 85static int ip6_pkt_discard_out(struct sk_buff *skb);
@@ -113,6 +105,7 @@ static struct dst_ops ip6_dst_ops = {
113 .negative_advice = ip6_negative_advice, 105 .negative_advice = ip6_negative_advice,
114 .link_failure = ip6_link_failure, 106 .link_failure = ip6_link_failure,
115 .update_pmtu = ip6_rt_update_pmtu, 107 .update_pmtu = ip6_rt_update_pmtu,
108 .local_out = ip6_local_out,
116 .entry_size = sizeof(struct rt6_info), 109 .entry_size = sizeof(struct rt6_info),
117}; 110};
118 111
@@ -152,7 +145,6 @@ struct rt6_info ip6_null_entry = {
152 145
153static int ip6_pkt_prohibit(struct sk_buff *skb); 146static int ip6_pkt_prohibit(struct sk_buff *skb);
154static int ip6_pkt_prohibit_out(struct sk_buff *skb); 147static int ip6_pkt_prohibit_out(struct sk_buff *skb);
155static int ip6_pkt_blk_hole(struct sk_buff *skb);
156 148
157struct rt6_info ip6_prohibit_entry = { 149struct rt6_info ip6_prohibit_entry = {
158 .u = { 150 .u = {
@@ -181,8 +173,8 @@ struct rt6_info ip6_blk_hole_entry = {
181 .obsolete = -1, 173 .obsolete = -1,
182 .error = -EINVAL, 174 .error = -EINVAL,
183 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 175 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
184 .input = ip6_pkt_blk_hole, 176 .input = dst_discard,
185 .output = ip6_pkt_blk_hole, 177 .output = dst_discard,
186 .ops = &ip6_dst_ops, 178 .ops = &ip6_dst_ops,
187 .path = (struct dst_entry*)&ip6_blk_hole_entry, 179 .path = (struct dst_entry*)&ip6_blk_hole_entry,
188 } 180 }
@@ -216,9 +208,12 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
216{ 208{
217 struct rt6_info *rt = (struct rt6_info *)dst; 209 struct rt6_info *rt = (struct rt6_info *)dst;
218 struct inet6_dev *idev = rt->rt6i_idev; 210 struct inet6_dev *idev = rt->rt6i_idev;
211 struct net_device *loopback_dev =
212 dev->nd_net->loopback_dev;
219 213
220 if (dev != init_net.loopback_dev && idev != NULL && idev->dev == dev) { 214 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
221 struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev); 215 struct inet6_dev *loopback_idev =
216 in6_dev_get(loopback_dev);
222 if (loopback_idev != NULL) { 217 if (loopback_idev != NULL) {
223 rt->rt6i_idev = loopback_idev; 218 rt->rt6i_idev = loopback_idev;
224 in6_dev_put(idev); 219 in6_dev_put(idev);
@@ -606,7 +601,10 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
606 601
607int ip6_ins_rt(struct rt6_info *rt) 602int ip6_ins_rt(struct rt6_info *rt)
608{ 603{
609 return __ip6_ins_rt(rt, NULL); 604 struct nl_info info = {
605 .nl_net = &init_net,
606 };
607 return __ip6_ins_rt(rt, &info);
610} 608}
611 609
612static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, 610static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
@@ -782,12 +780,6 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
782 780
783EXPORT_SYMBOL(ip6_route_output); 781EXPORT_SYMBOL(ip6_route_output);
784 782
785static int ip6_blackhole_output(struct sk_buff *skb)
786{
787 kfree_skb(skb);
788 return 0;
789}
790
791int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl) 783int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
792{ 784{
793 struct rt6_info *ort = (struct rt6_info *) *dstp; 785 struct rt6_info *ort = (struct rt6_info *) *dstp;
@@ -800,8 +792,8 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
800 792
801 atomic_set(&new->__refcnt, 1); 793 atomic_set(&new->__refcnt, 1);
802 new->__use = 1; 794 new->__use = 1;
803 new->input = ip6_blackhole_output; 795 new->input = dst_discard;
804 new->output = ip6_blackhole_output; 796 new->output = dst_discard;
805 797
806 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 798 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
807 new->dev = ort->u.dst.dev; 799 new->dev = ort->u.dst.dev;
@@ -896,8 +888,8 @@ static inline unsigned int ipv6_advmss(unsigned int mtu)
896{ 888{
897 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 889 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
898 890
899 if (mtu < ip6_rt_min_advmss) 891 if (mtu < init_net.ipv6.sysctl.ip6_rt_min_advmss)
900 mtu = ip6_rt_min_advmss; 892 mtu = init_net.ipv6.sysctl.ip6_rt_min_advmss;
901 893
902 /* 894 /*
903 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 895 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
@@ -991,25 +983,25 @@ int ndisc_dst_gc(int *more)
991 return freed; 983 return freed;
992} 984}
993 985
994static int ip6_dst_gc(void) 986static int ip6_dst_gc(struct dst_ops *ops)
995{ 987{
996 static unsigned expire = 30*HZ; 988 static unsigned expire = 30*HZ;
997 static unsigned long last_gc; 989 static unsigned long last_gc;
998 unsigned long now = jiffies; 990 unsigned long now = jiffies;
999 991
1000 if (time_after(last_gc + ip6_rt_gc_min_interval, now) && 992 if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
1001 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size) 993 atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
1002 goto out; 994 goto out;
1003 995
1004 expire++; 996 expire++;
1005 fib6_run_gc(expire); 997 fib6_run_gc(expire);
1006 last_gc = now; 998 last_gc = now;
1007 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh) 999 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1008 expire = ip6_rt_gc_timeout>>1; 1000 expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
1009 1001
1010out: 1002out:
1011 expire -= expire>>ip6_rt_gc_elasticity; 1003 expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
1012 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size); 1004 return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
1013} 1005}
1014 1006
1015/* Clean host part of a prefix. Not necessary in radix tree, 1007/* Clean host part of a prefix. Not necessary in radix tree,
@@ -1269,7 +1261,10 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1269 1261
1270int ip6_del_rt(struct rt6_info *rt) 1262int ip6_del_rt(struct rt6_info *rt)
1271{ 1263{
1272 return __ip6_del_rt(rt, NULL); 1264 struct nl_info info = {
1265 .nl_net = &init_net,
1266 };
1267 return __ip6_del_rt(rt, &info);
1273} 1268}
1274 1269
1275static int ip6_route_del(struct fib6_config *cfg) 1270static int ip6_route_del(struct fib6_config *cfg)
@@ -1514,7 +1509,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1514 rt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1509 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1515 if (allfrag) 1510 if (allfrag)
1516 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1511 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1517 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires); 1512 dst_set_expires(&rt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
1518 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; 1513 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1519 goto out; 1514 goto out;
1520 } 1515 }
@@ -1540,7 +1535,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1540 * which is 10 mins. After 10 mins the decreased pmtu is expired 1535 * which is 10 mins. After 10 mins the decreased pmtu is expired
1541 * and detecting PMTU increase will be automatically happened. 1536 * and detecting PMTU increase will be automatically happened.
1542 */ 1537 */
1543 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); 1538 dst_set_expires(&nrt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
1544 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; 1539 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1545 1540
1546 ip6_ins_rt(nrt); 1541 ip6_ins_rt(nrt);
@@ -1665,6 +1660,8 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d
1665 return rt; 1660 return rt;
1666} 1661}
1667 1662
1663EXPORT_SYMBOL(rt6_get_dflt_router);
1664
1668struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, 1665struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1669 struct net_device *dev, 1666 struct net_device *dev,
1670 unsigned int pref) 1667 unsigned int pref)
@@ -1766,8 +1763,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1766 * Drop the packet on the floor 1763 * Drop the packet on the floor
1767 */ 1764 */
1768 1765
1769static inline int ip6_pkt_drop(struct sk_buff *skb, int code, 1766static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
1770 int ipstats_mib_noroutes)
1771{ 1767{
1772 int type; 1768 int type;
1773 switch (ipstats_mib_noroutes) { 1769 switch (ipstats_mib_noroutes) {
@@ -1811,12 +1807,6 @@ static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1811 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 1807 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1812} 1808}
1813 1809
1814static int ip6_pkt_blk_hole(struct sk_buff *skb)
1815{
1816 kfree_skb(skb);
1817 return 0;
1818}
1819
1820#endif 1810#endif
1821 1811
1822/* 1812/*
@@ -2015,9 +2005,13 @@ errout:
2015 2005
2016static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2006static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2017{ 2007{
2008 struct net *net = skb->sk->sk_net;
2018 struct fib6_config cfg; 2009 struct fib6_config cfg;
2019 int err; 2010 int err;
2020 2011
2012 if (net != &init_net)
2013 return -EINVAL;
2014
2021 err = rtm_to_fib6_config(skb, nlh, &cfg); 2015 err = rtm_to_fib6_config(skb, nlh, &cfg);
2022 if (err < 0) 2016 if (err < 0)
2023 return err; 2017 return err;
@@ -2027,9 +2021,13 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
2027 2021
2028static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2022static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2029{ 2023{
2024 struct net *net = skb->sk->sk_net;
2030 struct fib6_config cfg; 2025 struct fib6_config cfg;
2031 int err; 2026 int err;
2032 2027
2028 if (net != &init_net)
2029 return -EINVAL;
2030
2033 err = rtm_to_fib6_config(skb, nlh, &cfg); 2031 err = rtm_to_fib6_config(skb, nlh, &cfg);
2034 if (err < 0) 2032 if (err < 0)
2035 return err; 2033 return err;
@@ -2164,6 +2162,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2164 2162
2165static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2163static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2166{ 2164{
2165 struct net *net = in_skb->sk->sk_net;
2167 struct nlattr *tb[RTA_MAX+1]; 2166 struct nlattr *tb[RTA_MAX+1];
2168 struct rt6_info *rt; 2167 struct rt6_info *rt;
2169 struct sk_buff *skb; 2168 struct sk_buff *skb;
@@ -2171,6 +2170,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2171 struct flowi fl; 2170 struct flowi fl;
2172 int err, iif = 0; 2171 int err, iif = 0;
2173 2172
2173 if (net != &init_net)
2174 return -EINVAL;
2175
2174 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2176 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2175 if (err < 0) 2177 if (err < 0)
2176 goto errout; 2178 goto errout;
@@ -2230,7 +2232,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2230 goto errout; 2232 goto errout;
2231 } 2233 }
2232 2234
2233 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); 2235 err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
2234errout: 2236errout:
2235 return err; 2237 return err;
2236} 2238}
@@ -2238,32 +2240,29 @@ errout:
2238void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) 2240void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2239{ 2241{
2240 struct sk_buff *skb; 2242 struct sk_buff *skb;
2241 u32 pid = 0, seq = 0; 2243 u32 seq;
2242 struct nlmsghdr *nlh = NULL; 2244 int err;
2243 int err = -ENOBUFS; 2245
2244 2246 err = -ENOBUFS;
2245 if (info) { 2247 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2246 pid = info->pid;
2247 nlh = info->nlh;
2248 if (nlh)
2249 seq = nlh->nlmsg_seq;
2250 }
2251 2248
2252 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); 2249 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2253 if (skb == NULL) 2250 if (skb == NULL)
2254 goto errout; 2251 goto errout;
2255 2252
2256 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0); 2253 err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2254 event, info->pid, seq, 0, 0);
2257 if (err < 0) { 2255 if (err < 0) {
2258 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 2256 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2259 WARN_ON(err == -EMSGSIZE); 2257 WARN_ON(err == -EMSGSIZE);
2260 kfree_skb(skb); 2258 kfree_skb(skb);
2261 goto errout; 2259 goto errout;
2262 } 2260 }
2263 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); 2261 err = rtnl_notify(skb, &init_net, info->pid,
2262 RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any());
2264errout: 2263errout:
2265 if (err < 0) 2264 if (err < 0)
2266 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err); 2265 rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err);
2267} 2266}
2268 2267
2269/* 2268/*
@@ -2353,28 +2352,61 @@ static const struct file_operations rt6_stats_seq_fops = {
2353 .llseek = seq_lseek, 2352 .llseek = seq_lseek,
2354 .release = single_release, 2353 .release = single_release,
2355}; 2354};
2355
2356static int ipv6_route_proc_init(struct net *net)
2357{
2358 int ret = -ENOMEM;
2359 if (!proc_net_fops_create(net, "ipv6_route",
2360 0, &ipv6_route_proc_fops))
2361 goto out;
2362
2363 if (!proc_net_fops_create(net, "rt6_stats",
2364 S_IRUGO, &rt6_stats_seq_fops))
2365 goto out_ipv6_route;
2366
2367 ret = 0;
2368out:
2369 return ret;
2370out_ipv6_route:
2371 proc_net_remove(net, "ipv6_route");
2372 goto out;
2373}
2374
2375static void ipv6_route_proc_fini(struct net *net)
2376{
2377 proc_net_remove(net, "ipv6_route");
2378 proc_net_remove(net, "rt6_stats");
2379}
2380#else
2381static inline int ipv6_route_proc_init(struct net *net)
2382{
2383 return 0;
2384}
2385static inline void ipv6_route_proc_fini(struct net *net)
2386{
2387 return ;
2388}
2356#endif /* CONFIG_PROC_FS */ 2389#endif /* CONFIG_PROC_FS */
2357 2390
2358#ifdef CONFIG_SYSCTL 2391#ifdef CONFIG_SYSCTL
2359 2392
2360static int flush_delay;
2361
2362static 2393static
2363int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, 2394int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2364 void __user *buffer, size_t *lenp, loff_t *ppos) 2395 void __user *buffer, size_t *lenp, loff_t *ppos)
2365{ 2396{
2397 int delay = init_net.ipv6.sysctl.flush_delay;
2366 if (write) { 2398 if (write) {
2367 proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 2399 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2368 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay); 2400 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay);
2369 return 0; 2401 return 0;
2370 } else 2402 } else
2371 return -EINVAL; 2403 return -EINVAL;
2372} 2404}
2373 2405
2374ctl_table ipv6_route_table[] = { 2406ctl_table ipv6_route_table_template[] = {
2375 { 2407 {
2376 .procname = "flush", 2408 .procname = "flush",
2377 .data = &flush_delay, 2409 .data = &init_net.ipv6.sysctl.flush_delay,
2378 .maxlen = sizeof(int), 2410 .maxlen = sizeof(int),
2379 .mode = 0200, 2411 .mode = 0200,
2380 .proc_handler = &ipv6_sysctl_rtcache_flush 2412 .proc_handler = &ipv6_sysctl_rtcache_flush
@@ -2390,7 +2422,7 @@ ctl_table ipv6_route_table[] = {
2390 { 2422 {
2391 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE, 2423 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2392 .procname = "max_size", 2424 .procname = "max_size",
2393 .data = &ip6_rt_max_size, 2425 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2394 .maxlen = sizeof(int), 2426 .maxlen = sizeof(int),
2395 .mode = 0644, 2427 .mode = 0644,
2396 .proc_handler = &proc_dointvec, 2428 .proc_handler = &proc_dointvec,
@@ -2398,7 +2430,7 @@ ctl_table ipv6_route_table[] = {
2398 { 2430 {
2399 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL, 2431 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2400 .procname = "gc_min_interval", 2432 .procname = "gc_min_interval",
2401 .data = &ip6_rt_gc_min_interval, 2433 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2402 .maxlen = sizeof(int), 2434 .maxlen = sizeof(int),
2403 .mode = 0644, 2435 .mode = 0644,
2404 .proc_handler = &proc_dointvec_jiffies, 2436 .proc_handler = &proc_dointvec_jiffies,
@@ -2407,7 +2439,7 @@ ctl_table ipv6_route_table[] = {
2407 { 2439 {
2408 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT, 2440 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2409 .procname = "gc_timeout", 2441 .procname = "gc_timeout",
2410 .data = &ip6_rt_gc_timeout, 2442 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2411 .maxlen = sizeof(int), 2443 .maxlen = sizeof(int),
2412 .mode = 0644, 2444 .mode = 0644,
2413 .proc_handler = &proc_dointvec_jiffies, 2445 .proc_handler = &proc_dointvec_jiffies,
@@ -2416,7 +2448,7 @@ ctl_table ipv6_route_table[] = {
2416 { 2448 {
2417 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL, 2449 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2418 .procname = "gc_interval", 2450 .procname = "gc_interval",
2419 .data = &ip6_rt_gc_interval, 2451 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2420 .maxlen = sizeof(int), 2452 .maxlen = sizeof(int),
2421 .mode = 0644, 2453 .mode = 0644,
2422 .proc_handler = &proc_dointvec_jiffies, 2454 .proc_handler = &proc_dointvec_jiffies,
@@ -2425,7 +2457,7 @@ ctl_table ipv6_route_table[] = {
2425 { 2457 {
2426 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY, 2458 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2427 .procname = "gc_elasticity", 2459 .procname = "gc_elasticity",
2428 .data = &ip6_rt_gc_elasticity, 2460 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2429 .maxlen = sizeof(int), 2461 .maxlen = sizeof(int),
2430 .mode = 0644, 2462 .mode = 0644,
2431 .proc_handler = &proc_dointvec_jiffies, 2463 .proc_handler = &proc_dointvec_jiffies,
@@ -2434,7 +2466,7 @@ ctl_table ipv6_route_table[] = {
2434 { 2466 {
2435 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES, 2467 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2436 .procname = "mtu_expires", 2468 .procname = "mtu_expires",
2437 .data = &ip6_rt_mtu_expires, 2469 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2438 .maxlen = sizeof(int), 2470 .maxlen = sizeof(int),
2439 .mode = 0644, 2471 .mode = 0644,
2440 .proc_handler = &proc_dointvec_jiffies, 2472 .proc_handler = &proc_dointvec_jiffies,
@@ -2443,7 +2475,7 @@ ctl_table ipv6_route_table[] = {
2443 { 2475 {
2444 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS, 2476 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2445 .procname = "min_adv_mss", 2477 .procname = "min_adv_mss",
2446 .data = &ip6_rt_min_advmss, 2478 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2447 .maxlen = sizeof(int), 2479 .maxlen = sizeof(int),
2448 .mode = 0644, 2480 .mode = 0644,
2449 .proc_handler = &proc_dointvec_jiffies, 2481 .proc_handler = &proc_dointvec_jiffies,
@@ -2452,7 +2484,7 @@ ctl_table ipv6_route_table[] = {
2452 { 2484 {
2453 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, 2485 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2454 .procname = "gc_min_interval_ms", 2486 .procname = "gc_min_interval_ms",
2455 .data = &ip6_rt_gc_min_interval, 2487 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2456 .maxlen = sizeof(int), 2488 .maxlen = sizeof(int),
2457 .mode = 0644, 2489 .mode = 0644,
2458 .proc_handler = &proc_dointvec_ms_jiffies, 2490 .proc_handler = &proc_dointvec_ms_jiffies,
@@ -2461,42 +2493,74 @@ ctl_table ipv6_route_table[] = {
2461 { .ctl_name = 0 } 2493 { .ctl_name = 0 }
2462}; 2494};
2463 2495
2496struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2497{
2498 struct ctl_table *table;
2499
2500 table = kmemdup(ipv6_route_table_template,
2501 sizeof(ipv6_route_table_template),
2502 GFP_KERNEL);
2503 return table;
2504}
2464#endif 2505#endif
2465 2506
2466void __init ip6_route_init(void) 2507int __init ip6_route_init(void)
2467{ 2508{
2509 int ret;
2510
2468 ip6_dst_ops.kmem_cachep = 2511 ip6_dst_ops.kmem_cachep =
2469 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 2512 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2470 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 2513 SLAB_HWCACHE_ALIGN, NULL);
2514 if (!ip6_dst_ops.kmem_cachep)
2515 return -ENOMEM;
2516
2471 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep; 2517 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2472 2518
2473 fib6_init(); 2519 ret = fib6_init();
2474 proc_net_fops_create(&init_net, "ipv6_route", 0, &ipv6_route_proc_fops); 2520 if (ret)
2475 proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); 2521 goto out_kmem_cache;
2476#ifdef CONFIG_XFRM
2477 xfrm6_init();
2478#endif
2479#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2480 fib6_rules_init();
2481#endif
2482 2522
2483 __rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL); 2523 ret = ipv6_route_proc_init(&init_net);
2484 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL); 2524 if (ret)
2485 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL); 2525 goto out_fib6_init;
2526
2527 ret = xfrm6_init();
2528 if (ret)
2529 goto out_proc_init;
2530
2531 ret = fib6_rules_init();
2532 if (ret)
2533 goto xfrm6_init;
2534
2535 ret = -ENOBUFS;
2536 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2537 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2538 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2539 goto fib6_rules_init;
2540
2541 ret = 0;
2542out:
2543 return ret;
2544
2545fib6_rules_init:
2546 fib6_rules_cleanup();
2547xfrm6_init:
2548 xfrm6_fini();
2549out_proc_init:
2550 ipv6_route_proc_fini(&init_net);
2551out_fib6_init:
2552 rt6_ifdown(NULL);
2553 fib6_gc_cleanup();
2554out_kmem_cache:
2555 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2556 goto out;
2486} 2557}
2487 2558
2488void ip6_route_cleanup(void) 2559void ip6_route_cleanup(void)
2489{ 2560{
2490#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2491 fib6_rules_cleanup(); 2561 fib6_rules_cleanup();
2492#endif 2562 ipv6_route_proc_fini(&init_net);
2493#ifdef CONFIG_PROC_FS
2494 proc_net_remove(&init_net, "ipv6_route");
2495 proc_net_remove(&init_net, "rt6_stats");
2496#endif
2497#ifdef CONFIG_XFRM
2498 xfrm6_fini(); 2563 xfrm6_fini();
2499#endif
2500 rt6_ifdown(NULL); 2564 rt6_ifdown(NULL);
2501 fib6_gc_cleanup(); 2565 fib6_gc_cleanup();
2502 kmem_cache_destroy(ip6_dst_ops.kmem_cachep); 2566 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 71433d29d884..e77239d02bf5 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -16,6 +16,7 @@
16 * Changes: 16 * Changes:
17 * Roger Venning <r.venning@telstra.com>: 6to4 support 17 * Roger Venning <r.venning@telstra.com>: 6to4 support
18 * Nate Thompson <nate@thebog.net>: 6to4 support 18 * Nate Thompson <nate@thebog.net>: 6to4 support
19 * Fred L. Templin <fltemplin@acm.org>: isatap support
19 */ 20 */
20 21
21#include <linux/module.h> 22#include <linux/module.h>
@@ -182,6 +183,9 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int
182 dev->init = ipip6_tunnel_init; 183 dev->init = ipip6_tunnel_init;
183 nt->parms = *parms; 184 nt->parms = *parms;
184 185
186 if (parms->i_flags & SIT_ISATAP)
187 dev->priv_flags |= IFF_ISATAP;
188
185 if (register_netdevice(dev) < 0) { 189 if (register_netdevice(dev) < 0) {
186 free_netdev(dev); 190 free_netdev(dev);
187 goto failed; 191 goto failed;
@@ -364,6 +368,48 @@ static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
364 IP6_ECN_set_ce(ipv6_hdr(skb)); 368 IP6_ECN_set_ce(ipv6_hdr(skb));
365} 369}
366 370
371/* ISATAP (RFC4214) - check source address */
372static int
373isatap_srcok(struct sk_buff *skb, struct iphdr *iph, struct net_device *dev)
374{
375 struct neighbour *neigh;
376 struct dst_entry *dst;
377 struct rt6_info *rt;
378 struct flowi fl;
379 struct in6_addr *addr6;
380 struct in6_addr rtr;
381 struct ipv6hdr *iph6;
382 int ok = 0;
383
384 /* from onlink default router */
385 ipv6_addr_set(&rtr, htonl(0xFE800000), 0, 0, 0);
386 ipv6_isatap_eui64(rtr.s6_addr + 8, iph->saddr);
387 if ((rt = rt6_get_dflt_router(&rtr, dev))) {
388 dst_release(&rt->u.dst);
389 return 1;
390 }
391
392 iph6 = ipv6_hdr(skb);
393 memset(&fl, 0, sizeof(fl));
394 fl.proto = iph6->nexthdr;
395 ipv6_addr_copy(&fl.fl6_dst, &iph6->saddr);
396 fl.oif = dev->ifindex;
397 security_skb_classify_flow(skb, &fl);
398
399 dst = ip6_route_output(NULL, &fl);
400 if (!dst->error && (dst->dev == dev) && (neigh = dst->neighbour)) {
401
402 addr6 = (struct in6_addr*)&neigh->primary_key;
403
404 /* from correct previous hop */
405 if (ipv6_addr_is_isatap(addr6) &&
406 (addr6->s6_addr32[3] == iph->saddr))
407 ok = 1;
408 }
409 dst_release(dst);
410 return ok;
411}
412
367static int ipip6_rcv(struct sk_buff *skb) 413static int ipip6_rcv(struct sk_buff *skb)
368{ 414{
369 struct iphdr *iph; 415 struct iphdr *iph;
@@ -382,6 +428,14 @@ static int ipip6_rcv(struct sk_buff *skb)
382 IPCB(skb)->flags = 0; 428 IPCB(skb)->flags = 0;
383 skb->protocol = htons(ETH_P_IPV6); 429 skb->protocol = htons(ETH_P_IPV6);
384 skb->pkt_type = PACKET_HOST; 430 skb->pkt_type = PACKET_HOST;
431
432 if ((tunnel->dev->priv_flags & IFF_ISATAP) &&
433 !isatap_srcok(skb, iph, tunnel->dev)) {
434 tunnel->stat.rx_errors++;
435 read_unlock(&ipip6_lock);
436 kfree_skb(skb);
437 return 0;
438 }
385 tunnel->stat.rx_packets++; 439 tunnel->stat.rx_packets++;
386 tunnel->stat.rx_bytes += skb->len; 440 tunnel->stat.rx_bytes += skb->len;
387 skb->dev = tunnel->dev; 441 skb->dev = tunnel->dev;
@@ -444,6 +498,29 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
444 if (skb->protocol != htons(ETH_P_IPV6)) 498 if (skb->protocol != htons(ETH_P_IPV6))
445 goto tx_error; 499 goto tx_error;
446 500
501 /* ISATAP (RFC4214) - must come before 6to4 */
502 if (dev->priv_flags & IFF_ISATAP) {
503 struct neighbour *neigh = NULL;
504
505 if (skb->dst)
506 neigh = skb->dst->neighbour;
507
508 if (neigh == NULL) {
509 if (net_ratelimit())
510 printk(KERN_DEBUG "sit: nexthop == NULL\n");
511 goto tx_error;
512 }
513
514 addr6 = (struct in6_addr*)&neigh->primary_key;
515 addr_type = ipv6_addr_type(addr6);
516
517 if ((addr_type & IPV6_ADDR_UNICAST) &&
518 ipv6_addr_is_isatap(addr6))
519 dst = addr6->s6_addr32[3];
520 else
521 goto tx_error;
522 }
523
447 if (!dst) 524 if (!dst)
448 dst = try_6to4(&iph6->daddr); 525 dst = try_6to4(&iph6->daddr);
449 526
@@ -480,7 +557,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
480 .tos = RT_TOS(tos) } }, 557 .tos = RT_TOS(tos) } },
481 .oif = tunnel->parms.link, 558 .oif = tunnel->parms.link,
482 .proto = IPPROTO_IPV6 }; 559 .proto = IPPROTO_IPV6 };
483 if (ip_route_output_key(&rt, &fl)) { 560 if (ip_route_output_key(&init_net, &rt, &fl)) {
484 tunnel->stat.tx_carrier_errors++; 561 tunnel->stat.tx_carrier_errors++;
485 goto tx_error_icmp; 562 goto tx_error_icmp;
486 } 563 }
@@ -592,6 +669,42 @@ tx_error:
592 return 0; 669 return 0;
593} 670}
594 671
672static void ipip6_tunnel_bind_dev(struct net_device *dev)
673{
674 struct net_device *tdev = NULL;
675 struct ip_tunnel *tunnel;
676 struct iphdr *iph;
677
678 tunnel = netdev_priv(dev);
679 iph = &tunnel->parms.iph;
680
681 if (iph->daddr) {
682 struct flowi fl = { .nl_u = { .ip4_u =
683 { .daddr = iph->daddr,
684 .saddr = iph->saddr,
685 .tos = RT_TOS(iph->tos) } },
686 .oif = tunnel->parms.link,
687 .proto = IPPROTO_IPV6 };
688 struct rtable *rt;
689 if (!ip_route_output_key(&init_net, &rt, &fl)) {
690 tdev = rt->u.dst.dev;
691 ip_rt_put(rt);
692 }
693 dev->flags |= IFF_POINTOPOINT;
694 }
695
696 if (!tdev && tunnel->parms.link)
697 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
698
699 if (tdev) {
700 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
701 dev->mtu = tdev->mtu - sizeof(struct iphdr);
702 if (dev->mtu < IPV6_MIN_MTU)
703 dev->mtu = IPV6_MIN_MTU;
704 }
705 dev->iflink = tunnel->parms.link;
706}
707
595static int 708static int
596ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 709ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
597{ 710{
@@ -663,6 +776,11 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
663 if (cmd == SIOCCHGTUNNEL) { 776 if (cmd == SIOCCHGTUNNEL) {
664 t->parms.iph.ttl = p.iph.ttl; 777 t->parms.iph.ttl = p.iph.ttl;
665 t->parms.iph.tos = p.iph.tos; 778 t->parms.iph.tos = p.iph.tos;
779 if (t->parms.link != p.link) {
780 t->parms.link = p.link;
781 ipip6_tunnel_bind_dev(dev);
782 netdev_state_change(dev);
783 }
666 } 784 }
667 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) 785 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
668 err = -EFAULT; 786 err = -EFAULT;
@@ -731,12 +849,9 @@ static void ipip6_tunnel_setup(struct net_device *dev)
731 849
732static int ipip6_tunnel_init(struct net_device *dev) 850static int ipip6_tunnel_init(struct net_device *dev)
733{ 851{
734 struct net_device *tdev = NULL;
735 struct ip_tunnel *tunnel; 852 struct ip_tunnel *tunnel;
736 struct iphdr *iph;
737 853
738 tunnel = netdev_priv(dev); 854 tunnel = netdev_priv(dev);
739 iph = &tunnel->parms.iph;
740 855
741 tunnel->dev = dev; 856 tunnel->dev = dev;
742 strcpy(tunnel->parms.name, dev->name); 857 strcpy(tunnel->parms.name, dev->name);
@@ -744,31 +859,7 @@ static int ipip6_tunnel_init(struct net_device *dev)
744 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 859 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
745 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 860 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
746 861
747 if (iph->daddr) { 862 ipip6_tunnel_bind_dev(dev);
748 struct flowi fl = { .nl_u = { .ip4_u =
749 { .daddr = iph->daddr,
750 .saddr = iph->saddr,
751 .tos = RT_TOS(iph->tos) } },
752 .oif = tunnel->parms.link,
753 .proto = IPPROTO_IPV6 };
754 struct rtable *rt;
755 if (!ip_route_output_key(&rt, &fl)) {
756 tdev = rt->u.dst.dev;
757 ip_rt_put(rt);
758 }
759 dev->flags |= IFF_POINTOPOINT;
760 }
761
762 if (!tdev && tunnel->parms.link)
763 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
764
765 if (tdev) {
766 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
767 dev->mtu = tdev->mtu - sizeof(struct iphdr);
768 if (dev->mtu < IPV6_MIN_MTU)
769 dev->mtu = IPV6_MIN_MTU;
770 }
771 dev->iflink = tunnel->parms.link;
772 863
773 return 0; 864 return 0;
774} 865}
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 68bb2548e469..408691b777c2 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -14,66 +14,30 @@
14#include <net/addrconf.h> 14#include <net/addrconf.h>
15#include <net/inet_frag.h> 15#include <net/inet_frag.h>
16 16
17#ifdef CONFIG_SYSCTL 17static ctl_table ipv6_table_template[] = {
18
19static ctl_table ipv6_table[] = {
20 { 18 {
21 .ctl_name = NET_IPV6_ROUTE, 19 .ctl_name = NET_IPV6_ROUTE,
22 .procname = "route", 20 .procname = "route",
23 .maxlen = 0, 21 .maxlen = 0,
24 .mode = 0555, 22 .mode = 0555,
25 .child = ipv6_route_table 23 .child = ipv6_route_table_template
26 }, 24 },
27 { 25 {
28 .ctl_name = NET_IPV6_ICMP, 26 .ctl_name = NET_IPV6_ICMP,
29 .procname = "icmp", 27 .procname = "icmp",
30 .maxlen = 0, 28 .maxlen = 0,
31 .mode = 0555, 29 .mode = 0555,
32 .child = ipv6_icmp_table 30 .child = ipv6_icmp_table_template
33 }, 31 },
34 { 32 {
35 .ctl_name = NET_IPV6_BINDV6ONLY, 33 .ctl_name = NET_IPV6_BINDV6ONLY,
36 .procname = "bindv6only", 34 .procname = "bindv6only",
37 .data = &sysctl_ipv6_bindv6only, 35 .data = &init_net.ipv6.sysctl.bindv6only,
38 .maxlen = sizeof(int), 36 .maxlen = sizeof(int),
39 .mode = 0644, 37 .mode = 0644,
40 .proc_handler = &proc_dointvec 38 .proc_handler = &proc_dointvec
41 }, 39 },
42 { 40 {
43 .ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH,
44 .procname = "ip6frag_high_thresh",
45 .data = &ip6_frags_ctl.high_thresh,
46 .maxlen = sizeof(int),
47 .mode = 0644,
48 .proc_handler = &proc_dointvec
49 },
50 {
51 .ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH,
52 .procname = "ip6frag_low_thresh",
53 .data = &ip6_frags_ctl.low_thresh,
54 .maxlen = sizeof(int),
55 .mode = 0644,
56 .proc_handler = &proc_dointvec
57 },
58 {
59 .ctl_name = NET_IPV6_IP6FRAG_TIME,
60 .procname = "ip6frag_time",
61 .data = &ip6_frags_ctl.timeout,
62 .maxlen = sizeof(int),
63 .mode = 0644,
64 .proc_handler = &proc_dointvec_jiffies,
65 .strategy = &sysctl_jiffies,
66 },
67 {
68 .ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL,
69 .procname = "ip6frag_secret_interval",
70 .data = &ip6_frags_ctl.secret_interval,
71 .maxlen = sizeof(int),
72 .mode = 0644,
73 .proc_handler = &proc_dointvec_jiffies,
74 .strategy = &sysctl_jiffies
75 },
76 {
77 .ctl_name = NET_IPV6_MLD_MAX_MSF, 41 .ctl_name = NET_IPV6_MLD_MAX_MSF,
78 .procname = "mld_max_msf", 42 .procname = "mld_max_msf",
79 .data = &sysctl_mld_max_msf, 43 .data = &sysctl_mld_max_msf,
@@ -84,39 +48,106 @@ static ctl_table ipv6_table[] = {
84 { .ctl_name = 0 } 48 { .ctl_name = 0 }
85}; 49};
86 50
87static struct ctl_table_header *ipv6_sysctl_header; 51struct ctl_path net_ipv6_ctl_path[] = {
88 52 { .procname = "net", .ctl_name = CTL_NET, },
89static ctl_table ipv6_net_table[] = { 53 { .procname = "ipv6", .ctl_name = NET_IPV6, },
90 { 54 { },
91 .ctl_name = NET_IPV6,
92 .procname = "ipv6",
93 .mode = 0555,
94 .child = ipv6_table
95 },
96 { .ctl_name = 0 }
97};
98
99static ctl_table ipv6_root_table[] = {
100 {
101 .ctl_name = CTL_NET,
102 .procname = "net",
103 .mode = 0555,
104 .child = ipv6_net_table
105 },
106 { .ctl_name = 0 }
107}; 55};
56EXPORT_SYMBOL_GPL(net_ipv6_ctl_path);
108 57
109void ipv6_sysctl_register(void) 58static int ipv6_sysctl_net_init(struct net *net)
110{ 59{
111 ipv6_sysctl_header = register_sysctl_table(ipv6_root_table); 60 struct ctl_table *ipv6_table;
61 struct ctl_table *ipv6_route_table;
62 struct ctl_table *ipv6_icmp_table;
63 int err;
64
65 err = -ENOMEM;
66 ipv6_table = kmemdup(ipv6_table_template, sizeof(ipv6_table_template),
67 GFP_KERNEL);
68 if (!ipv6_table)
69 goto out;
70
71 ipv6_route_table = ipv6_route_sysctl_init(net);
72 if (!ipv6_route_table)
73 goto out_ipv6_table;
74
75 ipv6_icmp_table = ipv6_icmp_sysctl_init(net);
76 if (!ipv6_icmp_table)
77 goto out_ipv6_route_table;
78
79 ipv6_route_table[0].data = &net->ipv6.sysctl.flush_delay;
80 /* ipv6_route_table[1].data will be handled when we have
81 routes per namespace */
82 ipv6_route_table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
83 ipv6_route_table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
84 ipv6_route_table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
85 ipv6_route_table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
86 ipv6_route_table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
87 ipv6_route_table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
88 ipv6_route_table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
89 ipv6_table[0].child = ipv6_route_table;
90
91 ipv6_icmp_table[0].data = &net->ipv6.sysctl.icmpv6_time;
92 ipv6_table[1].child = ipv6_icmp_table;
93
94 ipv6_table[2].data = &net->ipv6.sysctl.bindv6only;
95
96 /* We don't want this value to be per namespace, it should be global
97 to all namespaces, so make it read-only when we are not in the
98 init network namespace */
99 if (net != &init_net)
100 ipv6_table[3].mode = 0444;
101
102 net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path,
103 ipv6_table);
104 if (!net->ipv6.sysctl.table)
105 return -ENOMEM;
106
107 if (!net->ipv6.sysctl.table)
108 goto out_ipv6_icmp_table;
109
110 err = 0;
111out:
112 return err;
113
114out_ipv6_icmp_table:
115 kfree(ipv6_icmp_table);
116out_ipv6_route_table:
117 kfree(ipv6_route_table);
118out_ipv6_table:
119 kfree(ipv6_table);
120 goto out;
112} 121}
113 122
114void ipv6_sysctl_unregister(void) 123static void ipv6_sysctl_net_exit(struct net *net)
115{ 124{
116 unregister_sysctl_table(ipv6_sysctl_header); 125 struct ctl_table *ipv6_table;
117} 126 struct ctl_table *ipv6_route_table;
127 struct ctl_table *ipv6_icmp_table;
118 128
119#endif /* CONFIG_SYSCTL */ 129 ipv6_table = net->ipv6.sysctl.table->ctl_table_arg;
130 ipv6_route_table = ipv6_table[0].child;
131 ipv6_icmp_table = ipv6_table[1].child;
120 132
133 unregister_net_sysctl_table(net->ipv6.sysctl.table);
121 134
135 kfree(ipv6_table);
136 kfree(ipv6_route_table);
137 kfree(ipv6_icmp_table);
138}
139
140static struct pernet_operations ipv6_sysctl_net_ops = {
141 .init = ipv6_sysctl_net_init,
142 .exit = ipv6_sysctl_net_exit,
143};
122 144
145int ipv6_sysctl_register(void)
146{
147 return register_pernet_subsys(&ipv6_sysctl_net_ops);
148}
149
150void ipv6_sysctl_unregister(void)
151{
152 unregister_pernet_subsys(&ipv6_sysctl_net_ops);
153}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 93980c3b83e6..00c08399837d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -265,7 +265,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
265 if (final_p) 265 if (final_p)
266 ipv6_addr_copy(&fl.fl6_dst, final_p); 266 ipv6_addr_copy(&fl.fl6_dst, final_p);
267 267
268 if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) { 268 if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) {
269 if (err == -EREMOTE) 269 if (err == -EREMOTE)
270 err = ip6_dst_blackhole(sk, &dst, &fl); 270 err = ip6_dst_blackhole(sk, &dst, &fl);
271 if (err < 0) 271 if (err < 0)
@@ -733,7 +733,7 @@ static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
733 struct in6_addr *saddr, 733 struct in6_addr *saddr,
734 struct in6_addr *daddr, 734 struct in6_addr *daddr,
735 struct tcphdr *th, int protocol, 735 struct tcphdr *th, int protocol,
736 int tcplen) 736 unsigned int tcplen)
737{ 737{
738 struct scatterlist sg[4]; 738 struct scatterlist sg[4];
739 __u16 data_len; 739 __u16 data_len;
@@ -818,7 +818,7 @@ static int tcp_v6_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
818 struct dst_entry *dst, 818 struct dst_entry *dst,
819 struct request_sock *req, 819 struct request_sock *req,
820 struct tcphdr *th, int protocol, 820 struct tcphdr *th, int protocol,
821 int tcplen) 821 unsigned int tcplen)
822{ 822{
823 struct in6_addr *saddr, *daddr; 823 struct in6_addr *saddr, *daddr;
824 824
@@ -985,7 +985,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
985 struct tcphdr *th = tcp_hdr(skb), *t1; 985 struct tcphdr *th = tcp_hdr(skb), *t1;
986 struct sk_buff *buff; 986 struct sk_buff *buff;
987 struct flowi fl; 987 struct flowi fl;
988 int tot_len = sizeof(*th); 988 unsigned int tot_len = sizeof(*th);
989#ifdef CONFIG_TCP_MD5SIG 989#ifdef CONFIG_TCP_MD5SIG
990 struct tcp_md5sig_key *key; 990 struct tcp_md5sig_key *key;
991#endif 991#endif
@@ -1085,7 +1085,7 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
1085 struct tcphdr *th = tcp_hdr(skb), *t1; 1085 struct tcphdr *th = tcp_hdr(skb), *t1;
1086 struct sk_buff *buff; 1086 struct sk_buff *buff;
1087 struct flowi fl; 1087 struct flowi fl;
1088 int tot_len = sizeof(struct tcphdr); 1088 unsigned int tot_len = sizeof(struct tcphdr);
1089 __be32 *topt; 1089 __be32 *topt;
1090#ifdef CONFIG_TCP_MD5SIG 1090#ifdef CONFIG_TCP_MD5SIG
1091 struct tcp_md5sig_key *key; 1091 struct tcp_md5sig_key *key;
@@ -2166,14 +2166,36 @@ static struct inet_protosw tcpv6_protosw = {
2166 INET_PROTOSW_ICSK, 2166 INET_PROTOSW_ICSK,
2167}; 2167};
2168 2168
2169void __init tcpv6_init(void) 2169int __init tcpv6_init(void)
2170{ 2170{
2171 int ret;
2172
2173 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2174 if (ret)
2175 goto out;
2176
2171 /* register inet6 protocol */ 2177 /* register inet6 protocol */
2172 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0) 2178 ret = inet6_register_protosw(&tcpv6_protosw);
2173 printk(KERN_ERR "tcpv6_init: Could not register protocol\n"); 2179 if (ret)
2174 inet6_register_protosw(&tcpv6_protosw); 2180 goto out_tcpv6_protocol;
2181
2182 ret = inet_csk_ctl_sock_create(&tcp6_socket, PF_INET6,
2183 SOCK_RAW, IPPROTO_TCP);
2184 if (ret)
2185 goto out_tcpv6_protosw;
2186out:
2187 return ret;
2175 2188
2176 if (inet_csk_ctl_sock_create(&tcp6_socket, PF_INET6, SOCK_RAW, 2189out_tcpv6_protocol:
2177 IPPROTO_TCP) < 0) 2190 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2178 panic("Failed to create the TCPv6 control socket.\n"); 2191out_tcpv6_protosw:
2192 inet6_unregister_protosw(&tcpv6_protosw);
2193 goto out;
2194}
2195
2196void tcpv6_exit(void)
2197{
2198 sock_release(tcp6_socket);
2199 inet6_unregister_protosw(&tcpv6_protosw);
2200 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2179} 2201}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index ee1cc3f8599f..bd4b9df8f614 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -34,6 +34,7 @@
34#include <linux/ipv6.h> 34#include <linux/ipv6.h>
35#include <linux/icmpv6.h> 35#include <linux/icmpv6.h>
36#include <linux/init.h> 36#include <linux/init.h>
37#include <linux/module.h>
37#include <linux/skbuff.h> 38#include <linux/skbuff.h>
38#include <asm/uaccess.h> 39#include <asm/uaccess.h>
39 40
@@ -50,8 +51,6 @@
50#include <linux/seq_file.h> 51#include <linux/seq_file.h>
51#include "udp_impl.h" 52#include "udp_impl.h"
52 53
53DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
54
55static inline int udp_v6_get_port(struct sock *sk, unsigned short snum) 54static inline int udp_v6_get_port(struct sock *sk, unsigned short snum)
56{ 55{
57 return udp_get_port(sk, snum, ipv6_rcv_saddr_equal); 56 return udp_get_port(sk, snum, ipv6_rcv_saddr_equal);
@@ -121,6 +120,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
121 struct inet_sock *inet = inet_sk(sk); 120 struct inet_sock *inet = inet_sk(sk);
122 struct sk_buff *skb; 121 struct sk_buff *skb;
123 unsigned int ulen, copied; 122 unsigned int ulen, copied;
123 int peeked;
124 int err; 124 int err;
125 int is_udplite = IS_UDPLITE(sk); 125 int is_udplite = IS_UDPLITE(sk);
126 126
@@ -131,7 +131,8 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
131 return ipv6_recv_error(sk, msg, len); 131 return ipv6_recv_error(sk, msg, len);
132 132
133try_again: 133try_again:
134 skb = skb_recv_datagram(sk, flags, noblock, &err); 134 skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
135 &peeked, &err);
135 if (!skb) 136 if (!skb)
136 goto out; 137 goto out;
137 138
@@ -164,6 +165,9 @@ try_again:
164 if (err) 165 if (err)
165 goto out_free; 166 goto out_free;
166 167
168 if (!peeked)
169 UDP6_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
170
167 sock_recv_timestamp(msg, sk, skb); 171 sock_recv_timestamp(msg, sk, skb);
168 172
169 /* Copy the address. */ 173 /* Copy the address. */
@@ -200,13 +204,17 @@ try_again:
200 err = ulen; 204 err = ulen;
201 205
202out_free: 206out_free:
207 lock_sock(sk);
203 skb_free_datagram(sk, skb); 208 skb_free_datagram(sk, skb);
209 release_sock(sk);
204out: 210out:
205 return err; 211 return err;
206 212
207csum_copy_err: 213csum_copy_err:
208 UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); 214 lock_sock(sk);
209 skb_kill_datagram(sk, skb, flags); 215 if (!skb_kill_datagram(sk, skb, flags))
216 UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
217 release_sock(sk);
210 218
211 if (flags & MSG_DONTWAIT) 219 if (flags & MSG_DONTWAIT)
212 return -EAGAIN; 220 return -EAGAIN;
@@ -251,13 +259,14 @@ static __inline__ void udpv6_err(struct sk_buff *skb,
251 struct inet6_skb_parm *opt, int type, 259 struct inet6_skb_parm *opt, int type,
252 int code, int offset, __be32 info ) 260 int code, int offset, __be32 info )
253{ 261{
254 return __udp6_lib_err(skb, opt, type, code, offset, info, udp_hash); 262 __udp6_lib_err(skb, opt, type, code, offset, info, udp_hash);
255} 263}
256 264
257int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) 265int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
258{ 266{
259 struct udp_sock *up = udp_sk(sk); 267 struct udp_sock *up = udp_sk(sk);
260 int rc; 268 int rc;
269 int is_udplite = IS_UDPLITE(sk);
261 270
262 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 271 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
263 goto drop; 272 goto drop;
@@ -265,7 +274,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
265 /* 274 /*
266 * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). 275 * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
267 */ 276 */
268 if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { 277 if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
269 278
270 if (up->pcrlen == 0) { /* full coverage was set */ 279 if (up->pcrlen == 0) { /* full coverage was set */
271 LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage" 280 LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage"
@@ -289,13 +298,13 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
289 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { 298 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
290 /* Note that an ENOMEM error is charged twice */ 299 /* Note that an ENOMEM error is charged twice */
291 if (rc == -ENOMEM) 300 if (rc == -ENOMEM)
292 UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag); 301 UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite);
293 goto drop; 302 goto drop;
294 } 303 }
295 UDP6_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); 304
296 return 0; 305 return 0;
297drop: 306drop:
298 UDP6_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag); 307 UDP6_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
299 kfree_skb(skb); 308 kfree_skb(skb);
300 return -1; 309 return -1;
301} 310}
@@ -361,10 +370,21 @@ static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr,
361 while ((sk2 = udp_v6_mcast_next(sk_next(sk2), uh->dest, daddr, 370 while ((sk2 = udp_v6_mcast_next(sk_next(sk2), uh->dest, daddr,
362 uh->source, saddr, dif))) { 371 uh->source, saddr, dif))) {
363 struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); 372 struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC);
364 if (buff) 373 if (buff) {
365 udpv6_queue_rcv_skb(sk2, buff); 374 bh_lock_sock_nested(sk2);
375 if (!sock_owned_by_user(sk2))
376 udpv6_queue_rcv_skb(sk2, buff);
377 else
378 sk_add_backlog(sk2, buff);
379 bh_unlock_sock(sk2);
380 }
366 } 381 }
367 udpv6_queue_rcv_skb(sk, skb); 382 bh_lock_sock_nested(sk);
383 if (!sock_owned_by_user(sk))
384 udpv6_queue_rcv_skb(sk, skb);
385 else
386 sk_add_backlog(sk, skb);
387 bh_unlock_sock(sk);
368out: 388out:
369 read_unlock(&udp_hash_lock); 389 read_unlock(&udp_hash_lock);
370 return 0; 390 return 0;
@@ -477,7 +497,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
477 497
478 /* deliver */ 498 /* deliver */
479 499
480 udpv6_queue_rcv_skb(sk, skb); 500 bh_lock_sock_nested(sk);
501 if (!sock_owned_by_user(sk))
502 udpv6_queue_rcv_skb(sk, skb);
503 else
504 sk_add_backlog(sk, skb);
505 bh_unlock_sock(sk);
481 sock_put(sk); 506 sock_put(sk);
482 return 0; 507 return 0;
483 508
@@ -523,6 +548,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
523 struct inet_sock *inet = inet_sk(sk); 548 struct inet_sock *inet = inet_sk(sk);
524 struct flowi *fl = &inet->cork.fl; 549 struct flowi *fl = &inet->cork.fl;
525 int err = 0; 550 int err = 0;
551 int is_udplite = IS_UDPLITE(sk);
526 __wsum csum = 0; 552 __wsum csum = 0;
527 553
528 /* Grab the skbuff where UDP header space exists. */ 554 /* Grab the skbuff where UDP header space exists. */
@@ -538,7 +564,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
538 uh->len = htons(up->len); 564 uh->len = htons(up->len);
539 uh->check = 0; 565 uh->check = 0;
540 566
541 if (up->pcflag) 567 if (is_udplite)
542 csum = udplite_csum_outgoing(sk, skb); 568 csum = udplite_csum_outgoing(sk, skb);
543 else 569 else
544 csum = udp_csum_outgoing(sk, skb); 570 csum = udp_csum_outgoing(sk, skb);
@@ -554,7 +580,7 @@ out:
554 up->len = 0; 580 up->len = 0;
555 up->pending = 0; 581 up->pending = 0;
556 if (!err) 582 if (!err)
557 UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, up->pcflag); 583 UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
558 return err; 584 return err;
559} 585}
560 586
@@ -578,7 +604,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
578 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; 604 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
579 int err; 605 int err;
580 int connected = 0; 606 int connected = 0;
581 int is_udplite = up->pcflag; 607 int is_udplite = IS_UDPLITE(sk);
582 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); 608 int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
583 609
584 /* destination address check */ 610 /* destination address check */
@@ -748,7 +774,7 @@ do_udp_sendmsg:
748 if (final_p) 774 if (final_p)
749 ipv6_addr_copy(&fl.fl6_dst, final_p); 775 ipv6_addr_copy(&fl.fl6_dst, final_p);
750 776
751 if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) { 777 if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) {
752 if (err == -EREMOTE) 778 if (err == -EREMOTE)
753 err = ip6_dst_blackhole(sk, &dst, &fl); 779 err = ip6_dst_blackhole(sk, &dst, &fl);
754 if (err < 0) 780 if (err < 0)
@@ -988,6 +1014,10 @@ struct proto udpv6_prot = {
988 .hash = udp_lib_hash, 1014 .hash = udp_lib_hash,
989 .unhash = udp_lib_unhash, 1015 .unhash = udp_lib_unhash,
990 .get_port = udp_v6_get_port, 1016 .get_port = udp_v6_get_port,
1017 .memory_allocated = &udp_memory_allocated,
1018 .sysctl_mem = sysctl_udp_mem,
1019 .sysctl_wmem = &sysctl_udp_wmem_min,
1020 .sysctl_rmem = &sysctl_udp_rmem_min,
991 .obj_size = sizeof(struct udp6_sock), 1021 .obj_size = sizeof(struct udp6_sock),
992#ifdef CONFIG_COMPAT 1022#ifdef CONFIG_COMPAT
993 .compat_setsockopt = compat_udpv6_setsockopt, 1023 .compat_setsockopt = compat_udpv6_setsockopt,
@@ -1007,9 +1037,27 @@ static struct inet_protosw udpv6_protosw = {
1007}; 1037};
1008 1038
1009 1039
1010void __init udpv6_init(void) 1040int __init udpv6_init(void)
1041{
1042 int ret;
1043
1044 ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP);
1045 if (ret)
1046 goto out;
1047
1048 ret = inet6_register_protosw(&udpv6_protosw);
1049 if (ret)
1050 goto out_udpv6_protocol;
1051out:
1052 return ret;
1053
1054out_udpv6_protocol:
1055 inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
1056 goto out;
1057}
1058
1059void udpv6_exit(void)
1011{ 1060{
1012 if (inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP) < 0) 1061 inet6_unregister_protosw(&udpv6_protosw);
1013 printk(KERN_ERR "udpv6_init: Could not register protocol\n"); 1062 inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
1014 inet6_register_protosw(&udpv6_protosw);
1015} 1063}
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 2d3fda601232..21be3a83e7bc 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -5,6 +5,7 @@
5#include <net/protocol.h> 5#include <net/protocol.h>
6#include <net/addrconf.h> 6#include <net/addrconf.h>
7#include <net/inet_common.h> 7#include <net/inet_common.h>
8#include <net/transp_v6.h>
8 9
9extern int __udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int ); 10extern int __udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int );
10extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, 11extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *,
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 5a0379f71415..87d4202522ee 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -26,7 +26,7 @@ static void udplitev6_err(struct sk_buff *skb,
26 struct inet6_skb_parm *opt, 26 struct inet6_skb_parm *opt,
27 int type, int code, int offset, __be32 info) 27 int type, int code, int offset, __be32 info)
28{ 28{
29 return __udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash); 29 __udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash);
30} 30}
31 31
32static struct inet6_protocol udplitev6_protocol = { 32static struct inet6_protocol udplitev6_protocol = {
@@ -77,12 +77,29 @@ static struct inet_protosw udplite6_protosw = {
77 .flags = INET_PROTOSW_PERMANENT, 77 .flags = INET_PROTOSW_PERMANENT,
78}; 78};
79 79
80void __init udplitev6_init(void) 80int __init udplitev6_init(void)
81{ 81{
82 if (inet6_add_protocol(&udplitev6_protocol, IPPROTO_UDPLITE) < 0) 82 int ret;
83 printk(KERN_ERR "%s: Could not register.\n", __FUNCTION__);
84 83
85 inet6_register_protosw(&udplite6_protosw); 84 ret = inet6_add_protocol(&udplitev6_protocol, IPPROTO_UDPLITE);
85 if (ret)
86 goto out;
87
88 ret = inet6_register_protosw(&udplite6_protosw);
89 if (ret)
90 goto out_udplitev6_protocol;
91out:
92 return ret;
93
94out_udplitev6_protocol:
95 inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE);
96 goto out;
97}
98
99void udplitev6_exit(void)
100{
101 inet6_unregister_protosw(&udplite6_protosw);
102 inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE);
86} 103}
87 104
88#ifdef CONFIG_PROC_FS 105#ifdef CONFIG_PROC_FS
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 515783707e86..a4714d76ae6b 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -16,120 +16,37 @@
16#include <net/ipv6.h> 16#include <net/ipv6.h>
17#include <net/xfrm.h> 17#include <net/xfrm.h>
18 18
19int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) 19int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb)
20{ 20{
21 int err; 21 return xfrm6_extract_header(skb);
22 __be32 seq; 22}
23 struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH];
24 struct xfrm_state *x;
25 int xfrm_nr = 0;
26 int decaps = 0;
27 unsigned int nhoff;
28
29 nhoff = IP6CB(skb)->nhoff;
30
31 seq = 0;
32 if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
33 goto drop;
34
35 do {
36 struct ipv6hdr *iph = ipv6_hdr(skb);
37
38 if (xfrm_nr == XFRM_MAX_DEPTH)
39 goto drop;
40
41 x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi,
42 nexthdr, AF_INET6);
43 if (x == NULL)
44 goto drop;
45 spin_lock(&x->lock);
46 if (unlikely(x->km.state != XFRM_STATE_VALID))
47 goto drop_unlock;
48
49 if (x->props.replay_window && xfrm_replay_check(x, seq))
50 goto drop_unlock;
51
52 if (xfrm_state_check_expire(x))
53 goto drop_unlock;
54
55 nexthdr = x->type->input(x, skb);
56 if (nexthdr <= 0)
57 goto drop_unlock;
58
59 skb_network_header(skb)[nhoff] = nexthdr;
60
61 if (x->props.replay_window)
62 xfrm_replay_advance(x, seq);
63
64 x->curlft.bytes += skb->len;
65 x->curlft.packets++;
66
67 spin_unlock(&x->lock);
68
69 xfrm_vec[xfrm_nr++] = x;
70
71 if (x->outer_mode->input(x, skb))
72 goto drop;
73
74 if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
75 decaps = 1;
76 break;
77 }
78
79 if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) < 0)
80 goto drop;
81 } while (!err);
82 23
83 /* Allocate new secpath or COW existing one. */ 24int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
84 if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { 25{
85 struct sec_path *sp; 26 XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
86 sp = secpath_dup(skb->sp); 27 XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
87 if (!sp) 28 return xfrm_input(skb, nexthdr, spi, 0);
88 goto drop; 29}
89 if (skb->sp) 30EXPORT_SYMBOL(xfrm6_rcv_spi);
90 secpath_put(skb->sp);
91 skb->sp = sp;
92 }
93 31
94 if (xfrm_nr + skb->sp->len > XFRM_MAX_DEPTH) 32int xfrm6_transport_finish(struct sk_buff *skb, int async)
95 goto drop; 33{
34 skb_network_header(skb)[IP6CB(skb)->nhoff] =
35 XFRM_MODE_SKB_CB(skb)->protocol;
96 36
97 memcpy(skb->sp->xvec + skb->sp->len, xfrm_vec, 37#ifndef CONFIG_NETFILTER
98 xfrm_nr * sizeof(xfrm_vec[0])); 38 if (!async)
99 skb->sp->len += xfrm_nr;
100
101 nf_reset(skb);
102
103 if (decaps) {
104 dst_release(skb->dst);
105 skb->dst = NULL;
106 netif_rx(skb);
107 return -1;
108 } else {
109#ifdef CONFIG_NETFILTER
110 ipv6_hdr(skb)->payload_len = htons(skb->len);
111 __skb_push(skb, skb->data - skb_network_header(skb));
112
113 NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL,
114 ip6_rcv_finish);
115 return -1;
116#else
117 return 1; 39 return 1;
118#endif 40#endif
119 }
120 41
121drop_unlock: 42 ipv6_hdr(skb)->payload_len = htons(skb->len);
122 spin_unlock(&x->lock); 43 __skb_push(skb, skb->data - skb_network_header(skb));
123 xfrm_state_put(x); 44
124drop: 45 NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
125 while (--xfrm_nr >= 0) 46 ip6_rcv_finish);
126 xfrm_state_put(xfrm_vec[xfrm_nr]);
127 kfree_skb(skb);
128 return -1; 47 return -1;
129} 48}
130 49
131EXPORT_SYMBOL(xfrm6_rcv_spi);
132
133int xfrm6_rcv(struct sk_buff *skb) 50int xfrm6_rcv(struct sk_buff *skb)
134{ 51{
135 return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], 52 return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
@@ -144,10 +61,28 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
144 struct xfrm_state *x = NULL; 61 struct xfrm_state *x = NULL;
145 int wildcard = 0; 62 int wildcard = 0;
146 xfrm_address_t *xany; 63 xfrm_address_t *xany;
147 struct xfrm_state *xfrm_vec_one = NULL;
148 int nh = 0; 64 int nh = 0;
149 int i = 0; 65 int i = 0;
150 66
67 /* Allocate new secpath or COW existing one. */
68 if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
69 struct sec_path *sp;
70
71 sp = secpath_dup(skb->sp);
72 if (!sp) {
73 XFRM_INC_STATS(LINUX_MIB_XFRMINERROR);
74 goto drop;
75 }
76 if (skb->sp)
77 secpath_put(skb->sp);
78 skb->sp = sp;
79 }
80
81 if (1 + skb->sp->len == XFRM_MAX_DEPTH) {
82 XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR);
83 goto drop;
84 }
85
151 xany = (xfrm_address_t *)&in6addr_any; 86 xany = (xfrm_address_t *)&in6addr_any;
152 87
153 for (i = 0; i < 3; i++) { 88 for (i = 0; i < 3; i++) {
@@ -200,47 +135,37 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
200 continue; 135 continue;
201 } 136 }
202 137
138 spin_unlock(&x->lock);
139
203 nh = x->type->input(x, skb); 140 nh = x->type->input(x, skb);
204 if (nh <= 0) { 141 if (nh <= 0) {
205 spin_unlock(&x->lock);
206 xfrm_state_put(x); 142 xfrm_state_put(x);
207 x = NULL; 143 x = NULL;
208 continue; 144 continue;
209 } 145 }
210 146
211 x->curlft.bytes += skb->len; 147 /* Found a state */
212 x->curlft.packets++;
213
214 spin_unlock(&x->lock);
215
216 xfrm_vec_one = x;
217 break; 148 break;
218 } 149 }
219 150
220 if (!xfrm_vec_one) 151 if (!x) {
152 XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES);
153 xfrm_audit_state_notfound_simple(skb, AF_INET6);
221 goto drop; 154 goto drop;
222
223 /* Allocate new secpath or COW existing one. */
224 if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
225 struct sec_path *sp;
226 sp = secpath_dup(skb->sp);
227 if (!sp)
228 goto drop;
229 if (skb->sp)
230 secpath_put(skb->sp);
231 skb->sp = sp;
232 } 155 }
233 156
234 if (1 + skb->sp->len > XFRM_MAX_DEPTH) 157 skb->sp->xvec[skb->sp->len++] = x;
235 goto drop; 158
159 spin_lock(&x->lock);
236 160
237 skb->sp->xvec[skb->sp->len] = xfrm_vec_one; 161 x->curlft.bytes += skb->len;
238 skb->sp->len ++; 162 x->curlft.packets++;
163
164 spin_unlock(&x->lock);
239 165
240 return 1; 166 return 1;
167
241drop: 168drop:
242 if (xfrm_vec_one)
243 xfrm_state_put(xfrm_vec_one);
244 return -1; 169 return -1;
245} 170}
246 171
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index 2bfb4f05c14c..0527d11c1ae3 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -19,31 +19,39 @@
19#include <net/ipv6.h> 19#include <net/ipv6.h>
20#include <net/xfrm.h> 20#include <net/xfrm.h>
21 21
22static void xfrm6_beet_make_header(struct sk_buff *skb)
23{
24 struct ipv6hdr *iph = ipv6_hdr(skb);
25
26 iph->version = 6;
27
28 memcpy(iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
29 sizeof(iph->flow_lbl));
30 iph->nexthdr = XFRM_MODE_SKB_CB(skb)->protocol;
31
32 ipv6_change_dsfield(iph, 0, XFRM_MODE_SKB_CB(skb)->tos);
33 iph->hop_limit = XFRM_MODE_SKB_CB(skb)->ttl;
34}
35
22/* Add encapsulation header. 36/* Add encapsulation header.
23 * 37 *
24 * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. 38 * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
25 */ 39 */
26static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) 40static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
27{ 41{
28 struct ipv6hdr *iph, *top_iph; 42 struct ipv6hdr *top_iph;
29 u8 *prevhdr;
30 int hdr_len;
31 43
32 iph = ipv6_hdr(skb);
33
34 hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
35
36 skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
37 skb_set_network_header(skb, -x->props.header_len); 44 skb_set_network_header(skb, -x->props.header_len);
38 skb->transport_header = skb->network_header + hdr_len; 45 skb->mac_header = skb->network_header +
39 __skb_pull(skb, hdr_len); 46 offsetof(struct ipv6hdr, nexthdr);
47 skb->transport_header = skb->network_header + sizeof(*top_iph);
48
49 xfrm6_beet_make_header(skb);
40 50
41 top_iph = ipv6_hdr(skb); 51 top_iph = ipv6_hdr(skb);
42 memmove(top_iph, iph, hdr_len);
43 52
44 ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); 53 ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
45 ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); 54 ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
46
47 return 0; 55 return 0;
48} 56}
49 57
@@ -52,19 +60,21 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
52 struct ipv6hdr *ip6h; 60 struct ipv6hdr *ip6h;
53 const unsigned char *old_mac; 61 const unsigned char *old_mac;
54 int size = sizeof(struct ipv6hdr); 62 int size = sizeof(struct ipv6hdr);
55 int err = -EINVAL; 63 int err;
56 64
57 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 65 err = skb_cow_head(skb, size + skb->mac_len);
66 if (err)
58 goto out; 67 goto out;
59 68
60 skb_push(skb, size); 69 __skb_push(skb, size);
61 memmove(skb->data, skb_network_header(skb), size);
62 skb_reset_network_header(skb); 70 skb_reset_network_header(skb);
63 71
64 old_mac = skb_mac_header(skb); 72 old_mac = skb_mac_header(skb);
65 skb_set_mac_header(skb, -skb->mac_len); 73 skb_set_mac_header(skb, -skb->mac_len);
66 memmove(skb_mac_header(skb), old_mac, skb->mac_len); 74 memmove(skb_mac_header(skb), old_mac, skb->mac_len);
67 75
76 xfrm6_beet_make_header(skb);
77
68 ip6h = ipv6_hdr(skb); 78 ip6h = ipv6_hdr(skb);
69 ip6h->payload_len = htons(skb->len - size); 79 ip6h->payload_len = htons(skb->len - size);
70 ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6); 80 ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6);
@@ -75,8 +85,10 @@ out:
75} 85}
76 86
77static struct xfrm_mode xfrm6_beet_mode = { 87static struct xfrm_mode xfrm6_beet_mode = {
78 .input = xfrm6_beet_input, 88 .input2 = xfrm6_beet_input,
79 .output = xfrm6_beet_output, 89 .input = xfrm_prepare_input,
90 .output2 = xfrm6_beet_output,
91 .output = xfrm6_prepare_output,
80 .owner = THIS_MODULE, 92 .owner = THIS_MODULE,
81 .encap = XFRM_MODE_BEET, 93 .encap = XFRM_MODE_BEET,
82 .flags = XFRM_MODE_FLAG_TUNNEL, 94 .flags = XFRM_MODE_FLAG_TUNNEL,
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
index a7bc8c62317a..63d5d493098a 100644
--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -28,6 +28,7 @@
28#include <linux/kernel.h> 28#include <linux/kernel.h>
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/skbuff.h> 30#include <linux/skbuff.h>
31#include <linux/spinlock.h>
31#include <linux/stringify.h> 32#include <linux/stringify.h>
32#include <linux/time.h> 33#include <linux/time.h>
33#include <net/ipv6.h> 34#include <net/ipv6.h>
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index fd84e2217274..0c742faaa30b 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -25,46 +25,29 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
25 IP6_ECN_set_ce(inner_iph); 25 IP6_ECN_set_ce(inner_iph);
26} 26}
27 27
28static inline void ip6ip_ecn_decapsulate(struct sk_buff *skb)
29{
30 if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6_hdr(skb))))
31 IP_ECN_set_ce(ipip_hdr(skb));
32}
33
34/* Add encapsulation header. 28/* Add encapsulation header.
35 * 29 *
36 * The top IP header will be constructed per RFC 2401. 30 * The top IP header will be constructed per RFC 2401.
37 */ 31 */
38static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) 32static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
39{ 33{
40 struct dst_entry *dst = skb->dst; 34 struct dst_entry *dst = skb->dst;
41 struct xfrm_dst *xdst = (struct xfrm_dst*)dst; 35 struct ipv6hdr *top_iph;
42 struct ipv6hdr *iph, *top_iph;
43 int dsfield; 36 int dsfield;
44 37
45 iph = ipv6_hdr(skb);
46
47 skb_set_network_header(skb, -x->props.header_len); 38 skb_set_network_header(skb, -x->props.header_len);
48 skb->mac_header = skb->network_header + 39 skb->mac_header = skb->network_header +
49 offsetof(struct ipv6hdr, nexthdr); 40 offsetof(struct ipv6hdr, nexthdr);
50 skb->transport_header = skb->network_header + sizeof(*iph); 41 skb->transport_header = skb->network_header + sizeof(*top_iph);
51 top_iph = ipv6_hdr(skb); 42 top_iph = ipv6_hdr(skb);
52 43
53 top_iph->version = 6; 44 top_iph->version = 6;
54 if (xdst->route->ops->family == AF_INET6) { 45
55 top_iph->priority = iph->priority; 46 memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
56 top_iph->flow_lbl[0] = iph->flow_lbl[0]; 47 sizeof(top_iph->flow_lbl));
57 top_iph->flow_lbl[1] = iph->flow_lbl[1]; 48 top_iph->nexthdr = x->inner_mode->afinfo->proto;
58 top_iph->flow_lbl[2] = iph->flow_lbl[2]; 49
59 top_iph->nexthdr = IPPROTO_IPV6; 50 dsfield = XFRM_MODE_SKB_CB(skb)->tos;
60 } else {
61 top_iph->priority = 0;
62 top_iph->flow_lbl[0] = 0;
63 top_iph->flow_lbl[1] = 0;
64 top_iph->flow_lbl[2] = 0;
65 top_iph->nexthdr = IPPROTO_IPIP;
66 }
67 dsfield = ipv6_get_dsfield(top_iph);
68 dsfield = INET_ECN_encapsulate(dsfield, dsfield); 51 dsfield = INET_ECN_encapsulate(dsfield, dsfield);
69 if (x->props.flags & XFRM_STATE_NOECN) 52 if (x->props.flags & XFRM_STATE_NOECN)
70 dsfield &= ~INET_ECN_MASK; 53 dsfield &= ~INET_ECN_MASK;
@@ -72,18 +55,15 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
72 top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT); 55 top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT);
73 ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); 56 ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
74 ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); 57 ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
75 skb->protocol = htons(ETH_P_IPV6);
76 return 0; 58 return 0;
77} 59}
78 60
79static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) 61static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
80{ 62{
81 int err = -EINVAL; 63 int err = -EINVAL;
82 const unsigned char *old_mac; 64 const unsigned char *old_mac;
83 const unsigned char *nh = skb_network_header(skb);
84 65
85 if (nh[IP6CB(skb)->nhoff] != IPPROTO_IPV6 && 66 if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
86 nh[IP6CB(skb)->nhoff] != IPPROTO_IPIP)
87 goto out; 67 goto out;
88 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 68 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
89 goto out; 69 goto out;
@@ -92,17 +72,12 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
92 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 72 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
93 goto out; 73 goto out;
94 74
95 nh = skb_network_header(skb); 75 if (x->props.flags & XFRM_STATE_DECAP_DSCP)
96 if (nh[IP6CB(skb)->nhoff] == IPPROTO_IPV6) { 76 ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)),
97 if (x->props.flags & XFRM_STATE_DECAP_DSCP) 77 ipipv6_hdr(skb));
98 ipv6_copy_dscp(ipv6_hdr(skb), ipipv6_hdr(skb)); 78 if (!(x->props.flags & XFRM_STATE_NOECN))
99 if (!(x->props.flags & XFRM_STATE_NOECN)) 79 ipip6_ecn_decapsulate(skb);
100 ipip6_ecn_decapsulate(skb); 80
101 } else {
102 if (!(x->props.flags & XFRM_STATE_NOECN))
103 ip6ip_ecn_decapsulate(skb);
104 skb->protocol = htons(ETH_P_IP);
105 }
106 old_mac = skb_mac_header(skb); 81 old_mac = skb_mac_header(skb);
107 skb_set_mac_header(skb, -skb->mac_len); 82 skb_set_mac_header(skb, -skb->mac_len);
108 memmove(skb_mac_header(skb), old_mac, skb->mac_len); 83 memmove(skb_mac_header(skb), old_mac, skb->mac_len);
@@ -114,19 +89,21 @@ out:
114} 89}
115 90
116static struct xfrm_mode xfrm6_tunnel_mode = { 91static struct xfrm_mode xfrm6_tunnel_mode = {
117 .input = xfrm6_tunnel_input, 92 .input2 = xfrm6_mode_tunnel_input,
118 .output = xfrm6_tunnel_output, 93 .input = xfrm_prepare_input,
94 .output2 = xfrm6_mode_tunnel_output,
95 .output = xfrm6_prepare_output,
119 .owner = THIS_MODULE, 96 .owner = THIS_MODULE,
120 .encap = XFRM_MODE_TUNNEL, 97 .encap = XFRM_MODE_TUNNEL,
121 .flags = XFRM_MODE_FLAG_TUNNEL, 98 .flags = XFRM_MODE_FLAG_TUNNEL,
122}; 99};
123 100
124static int __init xfrm6_tunnel_init(void) 101static int __init xfrm6_mode_tunnel_init(void)
125{ 102{
126 return xfrm_register_mode(&xfrm6_tunnel_mode, AF_INET6); 103 return xfrm_register_mode(&xfrm6_tunnel_mode, AF_INET6);
127} 104}
128 105
129static void __exit xfrm6_tunnel_exit(void) 106static void __exit xfrm6_mode_tunnel_exit(void)
130{ 107{
131 int err; 108 int err;
132 109
@@ -134,7 +111,7 @@ static void __exit xfrm6_tunnel_exit(void)
134 BUG_ON(err); 111 BUG_ON(err);
135} 112}
136 113
137module_init(xfrm6_tunnel_init); 114module_init(xfrm6_mode_tunnel_init);
138module_exit(xfrm6_tunnel_exit); 115module_exit(xfrm6_mode_tunnel_exit);
139MODULE_LICENSE("GPL"); 116MODULE_LICENSE("GPL");
140MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TUNNEL); 117MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TUNNEL);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 656976760ad4..b34c58c65656 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -10,10 +10,12 @@
10 */ 10 */
11 11
12#include <linux/if_ether.h> 12#include <linux/if_ether.h>
13#include <linux/compiler.h> 13#include <linux/kernel.h>
14#include <linux/module.h>
14#include <linux/skbuff.h> 15#include <linux/skbuff.h>
15#include <linux/icmpv6.h> 16#include <linux/icmpv6.h>
16#include <linux/netfilter_ipv6.h> 17#include <linux/netfilter_ipv6.h>
18#include <net/dst.h>
17#include <net/ipv6.h> 19#include <net/ipv6.h>
18#include <net/xfrm.h> 20#include <net/xfrm.h>
19 21
@@ -43,97 +45,50 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
43 return ret; 45 return ret;
44} 46}
45 47
46static inline int xfrm6_output_one(struct sk_buff *skb) 48int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb)
47{ 49{
48 struct dst_entry *dst = skb->dst;
49 struct xfrm_state *x = dst->xfrm;
50 struct ipv6hdr *iph;
51 int err; 50 int err;
52 51
53 if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { 52 err = xfrm6_tunnel_check_size(skb);
54 err = xfrm6_tunnel_check_size(skb);
55 if (err)
56 goto error_nolock;
57 }
58
59 err = xfrm_output(skb);
60 if (err) 53 if (err)
61 goto error_nolock; 54 return err;
62 55
63 iph = ipv6_hdr(skb); 56 XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr;
64 iph->payload_len = htons(skb->len - sizeof(*iph));
65 57
66 IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; 58 return xfrm6_extract_header(skb);
67 err = 0;
68
69out_exit:
70 return err;
71error_nolock:
72 kfree_skb(skb);
73 goto out_exit;
74} 59}
75 60
76static int xfrm6_output_finish2(struct sk_buff *skb) 61int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
77{ 62{
78 int err; 63 int err;
79 64
80 while (likely((err = xfrm6_output_one(skb)) == 0)) { 65 err = x->inner_mode->afinfo->extract_output(x, skb);
81 nf_reset(skb); 66 if (err)
82 67 return err;
83 err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL,
84 skb->dst->dev, dst_output);
85 if (unlikely(err != 1))
86 break;
87 68
88 if (!skb->dst->xfrm) 69 memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
89 return dst_output(skb); 70#ifdef CONFIG_NETFILTER
71 IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
72#endif
90 73
91 err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, 74 skb->protocol = htons(ETH_P_IPV6);
92 skb->dst->dev, xfrm6_output_finish2);
93 if (unlikely(err != 1))
94 break;
95 }
96 75
97 return err; 76 return x->outer_mode->output2(x, skb);
98} 77}
78EXPORT_SYMBOL(xfrm6_prepare_output);
99 79
100static int xfrm6_output_finish(struct sk_buff *skb) 80static int xfrm6_output_finish(struct sk_buff *skb)
101{ 81{
102 struct sk_buff *segs; 82#ifdef CONFIG_NETFILTER
103 83 IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
104 if (!skb_is_gso(skb)) 84#endif
105 return xfrm6_output_finish2(skb);
106 85
107 skb->protocol = htons(ETH_P_IPV6); 86 skb->protocol = htons(ETH_P_IPV6);
108 segs = skb_gso_segment(skb, 0); 87 return xfrm_output(skb);
109 kfree_skb(skb);
110 if (unlikely(IS_ERR(segs)))
111 return PTR_ERR(segs);
112
113 do {
114 struct sk_buff *nskb = segs->next;
115 int err;
116
117 segs->next = NULL;
118 err = xfrm6_output_finish2(segs);
119
120 if (unlikely(err)) {
121 while ((segs = nskb)) {
122 nskb = segs->next;
123 segs->next = NULL;
124 kfree_skb(segs);
125 }
126 return err;
127 }
128
129 segs = nskb;
130 } while (segs);
131
132 return 0;
133} 88}
134 89
135int xfrm6_output(struct sk_buff *skb) 90int xfrm6_output(struct sk_buff *skb)
136{ 91{
137 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev, 92 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dst->dev,
138 xfrm6_output_finish); 93 xfrm6_output_finish);
139} 94}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index b8e9eb445d74..c25a6b527fc4 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -11,9 +11,11 @@
11 * 11 *
12 */ 12 */
13 13
14#include <linux/compiler.h> 14#include <linux/err.h>
15#include <linux/kernel.h>
15#include <linux/netdevice.h> 16#include <linux/netdevice.h>
16#include <net/addrconf.h> 17#include <net/addrconf.h>
18#include <net/dst.h>
17#include <net/xfrm.h> 19#include <net/xfrm.h>
18#include <net/ip.h> 20#include <net/ip.h>
19#include <net/ipv6.h> 21#include <net/ipv6.h>
@@ -25,35 +27,40 @@
25static struct dst_ops xfrm6_dst_ops; 27static struct dst_ops xfrm6_dst_ops;
26static struct xfrm_policy_afinfo xfrm6_policy_afinfo; 28static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
27 29
28static int xfrm6_dst_lookup(struct xfrm_dst **xdst, struct flowi *fl) 30static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr,
31 xfrm_address_t *daddr)
29{ 32{
30 struct dst_entry *dst = ip6_route_output(NULL, fl); 33 struct flowi fl = {};
31 int err = dst->error; 34 struct dst_entry *dst;
32 if (!err) 35 int err;
33 *xdst = (struct xfrm_dst *) dst; 36
34 else 37 memcpy(&fl.fl6_dst, daddr, sizeof(fl.fl6_dst));
38 if (saddr)
39 memcpy(&fl.fl6_src, saddr, sizeof(fl.fl6_src));
40
41 dst = ip6_route_output(NULL, &fl);
42
43 err = dst->error;
44 if (dst->error) {
35 dst_release(dst); 45 dst_release(dst);
36 return err; 46 dst = ERR_PTR(err);
47 }
48
49 return dst;
37} 50}
38 51
39static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) 52static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
40{ 53{
41 struct rt6_info *rt; 54 struct dst_entry *dst;
42 struct flowi fl_tunnel = { 55
43 .nl_u = { 56 dst = xfrm6_dst_lookup(0, NULL, daddr);
44 .ip6_u = { 57 if (IS_ERR(dst))
45 .daddr = *(struct in6_addr *)&daddr->a6, 58 return -EHOSTUNREACH;
46 }, 59
47 }, 60 ipv6_get_saddr(dst, (struct in6_addr *)&daddr->a6,
48 }; 61 (struct in6_addr *)&saddr->a6);
49 62 dst_release(dst);
50 if (!xfrm6_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) { 63 return 0;
51 ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)&daddr->a6,
52 (struct in6_addr *)&saddr->a6);
53 dst_release(&rt->u.dst);
54 return 0;
55 }
56 return -EHOSTUNREACH;
57} 64}
58 65
59static struct dst_entry * 66static struct dst_entry *
@@ -86,177 +93,53 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
86 return dst; 93 return dst;
87} 94}
88 95
89static inline struct in6_addr* 96static int xfrm6_get_tos(struct flowi *fl)
90__xfrm6_bundle_addr_remote(struct xfrm_state *x, struct in6_addr *addr)
91{ 97{
92 return (x->type->remote_addr) ? 98 return 0;
93 (struct in6_addr*)x->type->remote_addr(x, (xfrm_address_t *)addr) :
94 (struct in6_addr*)&x->id.daddr;
95} 99}
96 100
97static inline struct in6_addr* 101static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
98__xfrm6_bundle_addr_local(struct xfrm_state *x, struct in6_addr *addr) 102 int nfheader_len)
99{ 103{
100 return (x->type->local_addr) ? 104 if (dst->ops->family == AF_INET6) {
101 (struct in6_addr*)x->type->local_addr(x, (xfrm_address_t *)addr) : 105 struct rt6_info *rt = (struct rt6_info*)dst;
102 (struct in6_addr*)&x->props.saddr; 106 if (rt->rt6i_node)
103} 107 path->path_cookie = rt->rt6i_node->fn_sernum;
108 }
104 109
105static inline void 110 path->u.rt6.rt6i_nfheader_len = nfheader_len;
106__xfrm6_bundle_len_inc(int *len, int *nflen, struct xfrm_state *x)
107{
108 if (x->type->flags & XFRM_TYPE_NON_FRAGMENT)
109 *nflen += x->props.header_len;
110 else
111 *len += x->props.header_len;
112}
113 111
114static inline void 112 return 0;
115__xfrm6_bundle_len_dec(int *len, int *nflen, struct xfrm_state *x)
116{
117 if (x->type->flags & XFRM_TYPE_NON_FRAGMENT)
118 *nflen -= x->props.header_len;
119 else
120 *len -= x->props.header_len;
121} 113}
122 114
123/* Allocate chain of dst_entry's, attach known xfrm's, calculate 115static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
124 * all the metrics... Shortly, bundle a bundle.
125 */
126
127static int
128__xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
129 struct flowi *fl, struct dst_entry **dst_p)
130{ 116{
131 struct dst_entry *dst, *dst_prev; 117 struct rt6_info *rt = (struct rt6_info*)xdst->route;
132 struct rt6_info *rt0 = (struct rt6_info*)(*dst_p);
133 struct rt6_info *rt = rt0;
134 struct flowi fl_tunnel = {
135 .nl_u = {
136 .ip6_u = {
137 .saddr = fl->fl6_src,
138 .daddr = fl->fl6_dst,
139 }
140 }
141 };
142 int i;
143 int err = 0;
144 int header_len = 0;
145 int nfheader_len = 0;
146 int trailer_len = 0;
147
148 dst = dst_prev = NULL;
149 dst_hold(&rt->u.dst);
150
151 for (i = 0; i < nx; i++) {
152 struct dst_entry *dst1 = dst_alloc(&xfrm6_dst_ops);
153 struct xfrm_dst *xdst;
154
155 if (unlikely(dst1 == NULL)) {
156 err = -ENOBUFS;
157 dst_release(&rt->u.dst);
158 goto error;
159 }
160 118
161 if (!dst) 119 xdst->u.dst.dev = dev;
162 dst = dst1; 120 dev_hold(dev);
163 else {
164 dst_prev->child = dst1;
165 dst1->flags |= DST_NOHASH;
166 dst_clone(dst1);
167 }
168
169 xdst = (struct xfrm_dst *)dst1;
170 xdst->route = &rt->u.dst;
171 xdst->genid = xfrm[i]->genid;
172 if (rt->rt6i_node)
173 xdst->route_cookie = rt->rt6i_node->fn_sernum;
174
175 dst1->next = dst_prev;
176 dst_prev = dst1;
177
178 __xfrm6_bundle_len_inc(&header_len, &nfheader_len, xfrm[i]);
179 trailer_len += xfrm[i]->props.trailer_len;
180
181 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
182 unsigned short encap_family = xfrm[i]->props.family;
183 switch(encap_family) {
184 case AF_INET:
185 fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4;
186 fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4;
187 break;
188 case AF_INET6:
189 ipv6_addr_copy(&fl_tunnel.fl6_dst, __xfrm6_bundle_addr_remote(xfrm[i], &fl->fl6_dst));
190
191 ipv6_addr_copy(&fl_tunnel.fl6_src, __xfrm6_bundle_addr_local(xfrm[i], &fl->fl6_src));
192 break;
193 default:
194 BUG_ON(1);
195 }
196 121
197 err = xfrm_dst_lookup((struct xfrm_dst **) &rt, 122 xdst->u.rt6.rt6i_idev = in6_dev_get(rt->u.dst.dev);
198 &fl_tunnel, encap_family); 123 if (!xdst->u.rt6.rt6i_idev)
199 if (err) 124 return -ENODEV;
200 goto error;
201 } else
202 dst_hold(&rt->u.dst);
203 }
204 125
205 dst_prev->child = &rt->u.dst; 126 /* Sheit... I remember I did this right. Apparently,
206 dst->path = &rt->u.dst; 127 * it was magically lost, so this code needs audit */
128 xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
129 RTF_LOCAL);
130 xdst->u.rt6.rt6i_metric = rt->rt6i_metric;
131 xdst->u.rt6.rt6i_node = rt->rt6i_node;
207 if (rt->rt6i_node) 132 if (rt->rt6i_node)
208 ((struct xfrm_dst *)dst)->path_cookie = rt->rt6i_node->fn_sernum; 133 xdst->route_cookie = rt->rt6i_node->fn_sernum;
209 134 xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
210 *dst_p = dst; 135 xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
211 dst = dst_prev; 136 xdst->u.rt6.rt6i_src = rt->rt6i_src;
212
213 dst_prev = *dst_p;
214 i = 0;
215 for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) {
216 struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
217
218 dst_prev->xfrm = xfrm[i++];
219 dst_prev->dev = rt->u.dst.dev;
220 if (rt->u.dst.dev)
221 dev_hold(rt->u.dst.dev);
222 dst_prev->obsolete = -1;
223 dst_prev->flags |= DST_HOST;
224 dst_prev->lastuse = jiffies;
225 dst_prev->header_len = header_len;
226 dst_prev->nfheader_len = nfheader_len;
227 dst_prev->trailer_len = trailer_len;
228 memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
229
230 /* Copy neighbour for reachability confirmation */
231 dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour);
232 dst_prev->input = rt->u.dst.input;
233 dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output;
234 /* Sheit... I remember I did this right. Apparently,
235 * it was magically lost, so this code needs audit */
236 x->u.rt6.rt6i_flags = rt0->rt6i_flags&(RTF_ANYCAST|RTF_LOCAL);
237 x->u.rt6.rt6i_metric = rt0->rt6i_metric;
238 x->u.rt6.rt6i_node = rt0->rt6i_node;
239 x->u.rt6.rt6i_gateway = rt0->rt6i_gateway;
240 memcpy(&x->u.rt6.rt6i_gateway, &rt0->rt6i_gateway, sizeof(x->u.rt6.rt6i_gateway));
241 x->u.rt6.rt6i_dst = rt0->rt6i_dst;
242 x->u.rt6.rt6i_src = rt0->rt6i_src;
243 x->u.rt6.rt6i_idev = rt0->rt6i_idev;
244 in6_dev_hold(rt0->rt6i_idev);
245 __xfrm6_bundle_len_dec(&header_len, &nfheader_len, x->u.dst.xfrm);
246 trailer_len -= x->u.dst.xfrm->props.trailer_len;
247 }
248 137
249 xfrm_init_pmtu(dst);
250 return 0; 138 return 0;
251
252error:
253 if (dst)
254 dst_free(dst);
255 return err;
256} 139}
257 140
258static inline void 141static inline void
259_decode_session6(struct sk_buff *skb, struct flowi *fl) 142_decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
260{ 143{
261 u16 offset = skb_network_header_len(skb); 144 u16 offset = skb_network_header_len(skb);
262 struct ipv6hdr *hdr = ipv6_hdr(skb); 145 struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -265,8 +148,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
265 u8 nexthdr = nh[IP6CB(skb)->nhoff]; 148 u8 nexthdr = nh[IP6CB(skb)->nhoff];
266 149
267 memset(fl, 0, sizeof(struct flowi)); 150 memset(fl, 0, sizeof(struct flowi));
268 ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr); 151 ipv6_addr_copy(&fl->fl6_dst, reverse ? &hdr->saddr : &hdr->daddr);
269 ipv6_addr_copy(&fl->fl6_src, &hdr->saddr); 152 ipv6_addr_copy(&fl->fl6_src, reverse ? &hdr->daddr : &hdr->saddr);
270 153
271 while (pskb_may_pull(skb, nh + offset + 1 - skb->data)) { 154 while (pskb_may_pull(skb, nh + offset + 1 - skb->data)) {
272 nh = skb_network_header(skb); 155 nh = skb_network_header(skb);
@@ -289,8 +172,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
289 if (pskb_may_pull(skb, nh + offset + 4 - skb->data)) { 172 if (pskb_may_pull(skb, nh + offset + 4 - skb->data)) {
290 __be16 *ports = (__be16 *)exthdr; 173 __be16 *ports = (__be16 *)exthdr;
291 174
292 fl->fl_ip_sport = ports[0]; 175 fl->fl_ip_sport = ports[!!reverse];
293 fl->fl_ip_dport = ports[1]; 176 fl->fl_ip_dport = ports[!reverse];
294 } 177 }
295 fl->proto = nexthdr; 178 fl->proto = nexthdr;
296 return; 179 return;
@@ -329,7 +212,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
329 } 212 }
330} 213}
331 214
332static inline int xfrm6_garbage_collect(void) 215static inline int xfrm6_garbage_collect(struct dst_ops *ops)
333{ 216{
334 xfrm6_policy_afinfo.garbage_collect(); 217 xfrm6_policy_afinfo.garbage_collect();
335 return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2); 218 return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2);
@@ -362,7 +245,8 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
362 245
363 xdst = (struct xfrm_dst *)dst; 246 xdst = (struct xfrm_dst *)dst;
364 if (xdst->u.rt6.rt6i_idev->dev == dev) { 247 if (xdst->u.rt6.rt6i_idev->dev == dev) {
365 struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev); 248 struct inet6_dev *loopback_idev =
249 in6_dev_get(dev->nd_net->loopback_dev);
366 BUG_ON(!loopback_idev); 250 BUG_ON(!loopback_idev);
367 251
368 do { 252 do {
@@ -385,6 +269,7 @@ static struct dst_ops xfrm6_dst_ops = {
385 .update_pmtu = xfrm6_update_pmtu, 269 .update_pmtu = xfrm6_update_pmtu,
386 .destroy = xfrm6_dst_destroy, 270 .destroy = xfrm6_dst_destroy,
387 .ifdown = xfrm6_dst_ifdown, 271 .ifdown = xfrm6_dst_ifdown,
272 .local_out = __ip6_local_out,
388 .gc_thresh = 1024, 273 .gc_thresh = 1024,
389 .entry_size = sizeof(struct xfrm_dst), 274 .entry_size = sizeof(struct xfrm_dst),
390}; 275};
@@ -395,13 +280,15 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
395 .dst_lookup = xfrm6_dst_lookup, 280 .dst_lookup = xfrm6_dst_lookup,
396 .get_saddr = xfrm6_get_saddr, 281 .get_saddr = xfrm6_get_saddr,
397 .find_bundle = __xfrm6_find_bundle, 282 .find_bundle = __xfrm6_find_bundle,
398 .bundle_create = __xfrm6_bundle_create,
399 .decode_session = _decode_session6, 283 .decode_session = _decode_session6,
284 .get_tos = xfrm6_get_tos,
285 .init_path = xfrm6_init_path,
286 .fill_dst = xfrm6_fill_dst,
400}; 287};
401 288
402static void __init xfrm6_policy_init(void) 289static int __init xfrm6_policy_init(void)
403{ 290{
404 xfrm_policy_register_afinfo(&xfrm6_policy_afinfo); 291 return xfrm_policy_register_afinfo(&xfrm6_policy_afinfo);
405} 292}
406 293
407static void xfrm6_policy_fini(void) 294static void xfrm6_policy_fini(void)
@@ -409,10 +296,22 @@ static void xfrm6_policy_fini(void)
409 xfrm_policy_unregister_afinfo(&xfrm6_policy_afinfo); 296 xfrm_policy_unregister_afinfo(&xfrm6_policy_afinfo);
410} 297}
411 298
412void __init xfrm6_init(void) 299int __init xfrm6_init(void)
413{ 300{
414 xfrm6_policy_init(); 301 int ret;
415 xfrm6_state_init(); 302
303 ret = xfrm6_policy_init();
304 if (ret)
305 goto out;
306
307 ret = xfrm6_state_init();
308 if (ret)
309 goto out_policy;
310out:
311 return ret;
312out_policy:
313 xfrm6_policy_fini();
314 goto out;
416} 315}
417 316
418void xfrm6_fini(void) 317void xfrm6_fini(void)
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index b392bee396f1..dc817e035e23 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -14,6 +14,8 @@
14#include <net/xfrm.h> 14#include <net/xfrm.h>
15#include <linux/pfkeyv2.h> 15#include <linux/pfkeyv2.h>
16#include <linux/ipsec.h> 16#include <linux/ipsec.h>
17#include <linux/netfilter_ipv6.h>
18#include <net/dsfield.h>
17#include <net/ipv6.h> 19#include <net/ipv6.h>
18#include <net/addrconf.h> 20#include <net/addrconf.h>
19 21
@@ -168,18 +170,37 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n)
168 return 0; 170 return 0;
169} 171}
170 172
173int xfrm6_extract_header(struct sk_buff *skb)
174{
175 struct ipv6hdr *iph = ipv6_hdr(skb);
176
177 XFRM_MODE_SKB_CB(skb)->id = 0;
178 XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF);
179 XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph);
180 XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit;
181 memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl,
182 sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
183
184 return 0;
185}
186
171static struct xfrm_state_afinfo xfrm6_state_afinfo = { 187static struct xfrm_state_afinfo xfrm6_state_afinfo = {
172 .family = AF_INET6, 188 .family = AF_INET6,
189 .proto = IPPROTO_IPV6,
190 .eth_proto = htons(ETH_P_IPV6),
173 .owner = THIS_MODULE, 191 .owner = THIS_MODULE,
174 .init_tempsel = __xfrm6_init_tempsel, 192 .init_tempsel = __xfrm6_init_tempsel,
175 .tmpl_sort = __xfrm6_tmpl_sort, 193 .tmpl_sort = __xfrm6_tmpl_sort,
176 .state_sort = __xfrm6_state_sort, 194 .state_sort = __xfrm6_state_sort,
177 .output = xfrm6_output, 195 .output = xfrm6_output,
196 .extract_input = xfrm6_extract_input,
197 .extract_output = xfrm6_extract_output,
198 .transport_finish = xfrm6_transport_finish,
178}; 199};
179 200
180void __init xfrm6_state_init(void) 201int __init xfrm6_state_init(void)
181{ 202{
182 xfrm_state_register_afinfo(&xfrm6_state_afinfo); 203 return xfrm_state_register_afinfo(&xfrm6_state_afinfo);
183} 204}
184 205
185void xfrm6_state_fini(void) 206void xfrm6_state_fini(void)
diff --git a/net/ipx/sysctl_net_ipx.c b/net/ipx/sysctl_net_ipx.c
index 0cf526450536..92fef864e852 100644
--- a/net/ipx/sysctl_net_ipx.c
+++ b/net/ipx/sysctl_net_ipx.c
@@ -28,31 +28,17 @@ static struct ctl_table ipx_table[] = {
28 { 0 }, 28 { 0 },
29}; 29};
30 30
31static struct ctl_table ipx_dir_table[] = { 31static struct ctl_path ipx_path[] = {
32 { 32 { .procname = "net", .ctl_name = CTL_NET, },
33 .ctl_name = NET_IPX, 33 { .procname = "ipx", .ctl_name = NET_IPX, },
34 .procname = "ipx", 34 { }
35 .mode = 0555,
36 .child = ipx_table,
37 },
38 { 0 },
39};
40
41static struct ctl_table ipx_root_table[] = {
42 {
43 .ctl_name = CTL_NET,
44 .procname = "net",
45 .mode = 0555,
46 .child = ipx_dir_table,
47 },
48 { 0 },
49}; 35};
50 36
51static struct ctl_table_header *ipx_table_header; 37static struct ctl_table_header *ipx_table_header;
52 38
53void ipx_register_sysctl(void) 39void ipx_register_sysctl(void)
54{ 40{
55 ipx_table_header = register_sysctl_table(ipx_root_table); 41 ipx_table_header = register_sysctl_paths(ipx_path, ipx_table);
56} 42}
57 43
58void ipx_unregister_sysctl(void) 44void ipx_unregister_sysctl(void)
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 07dfa7fdd2a0..240b0cbfb532 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -2410,9 +2410,8 @@ bed:
2410 2410
2411 /* Set watchdog timer to expire in <val> ms. */ 2411 /* Set watchdog timer to expire in <val> ms. */
2412 self->errno = 0; 2412 self->errno = 0;
2413 init_timer(&self->watchdog); 2413 setup_timer(&self->watchdog, irda_discovery_timeout,
2414 self->watchdog.function = irda_discovery_timeout; 2414 (unsigned long)self);
2415 self->watchdog.data = (unsigned long) self;
2416 self->watchdog.expires = jiffies + (val * HZ/1000); 2415 self->watchdog.expires = jiffies + (val * HZ/1000);
2417 add_timer(&(self->watchdog)); 2416 add_timer(&(self->watchdog));
2418 2417
diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c
index 2d63fa8e1556..b825399fc160 100644
--- a/net/irda/ircomm/ircomm_core.c
+++ b/net/irda/ircomm/ircomm_core.c
@@ -363,6 +363,18 @@ void ircomm_process_data(struct ircomm_cb *self, struct sk_buff *skb)
363 clen = skb->data[0]; 363 clen = skb->data[0];
364 364
365 /* 365 /*
366 * Input validation check: a stir4200/mcp2150 combinations sometimes
367 * results in frames with clen > remaining packet size. These are
368 * illegal; if we throw away just this frame then it seems to carry on
369 * fine
370 */
371 if (unlikely(skb->len < (clen + 1))) {
372 IRDA_DEBUG(2, "%s() throwing away illegal frame\n",
373 __FUNCTION__ );
374 return;
375 }
376
377 /*
366 * If there are any data hiding in the control channel, we must 378 * If there are any data hiding in the control channel, we must
367 * deliver it first. The side effect is that the control channel 379 * deliver it first. The side effect is that the control channel
368 * will be removed from the skb 380 * will be removed from the skb
diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c
index 435b563d29a6..87185910d0ee 100644
--- a/net/irda/irda_device.c
+++ b/net/irda/irda_device.c
@@ -57,20 +57,6 @@ static void __irda_task_delete(struct irda_task *task);
57static hashbin_t *dongles = NULL; 57static hashbin_t *dongles = NULL;
58static hashbin_t *tasks = NULL; 58static hashbin_t *tasks = NULL;
59 59
60#ifdef CONFIG_IRDA_DEBUG
61static const char *task_state[] = {
62 "IRDA_TASK_INIT",
63 "IRDA_TASK_DONE",
64 "IRDA_TASK_WAIT",
65 "IRDA_TASK_WAIT1",
66 "IRDA_TASK_WAIT2",
67 "IRDA_TASK_WAIT3",
68 "IRDA_TASK_CHILD_INIT",
69 "IRDA_TASK_CHILD_WAIT",
70 "IRDA_TASK_CHILD_DONE",
71};
72#endif /* CONFIG_IRDA_DEBUG */
73
74static void irda_task_timer_expired(void *data); 60static void irda_task_timer_expired(void *data);
75 61
76int __init irda_device_init( void) 62int __init irda_device_init( void)
@@ -176,14 +162,6 @@ int irda_device_is_receiving(struct net_device *dev)
176 return req.ifr_receiving; 162 return req.ifr_receiving;
177} 163}
178 164
179void irda_task_next_state(struct irda_task *task, IRDA_TASK_STATE state)
180{
181 IRDA_DEBUG(2, "%s(), state = %s\n", __FUNCTION__, task_state[state]);
182
183 task->state = state;
184}
185EXPORT_SYMBOL(irda_task_next_state);
186
187static void __irda_task_delete(struct irda_task *task) 165static void __irda_task_delete(struct irda_task *task)
188{ 166{
189 del_timer(&task->timer); 167 del_timer(&task->timer);
@@ -191,14 +169,13 @@ static void __irda_task_delete(struct irda_task *task)
191 kfree(task); 169 kfree(task);
192} 170}
193 171
194void irda_task_delete(struct irda_task *task) 172static void irda_task_delete(struct irda_task *task)
195{ 173{
196 /* Unregister task */ 174 /* Unregister task */
197 hashbin_remove(tasks, (long) task, NULL); 175 hashbin_remove(tasks, (long) task, NULL);
198 176
199 __irda_task_delete(task); 177 __irda_task_delete(task);
200} 178}
201EXPORT_SYMBOL(irda_task_delete);
202 179
203/* 180/*
204 * Function irda_task_kick (task) 181 * Function irda_task_kick (task)
@@ -272,51 +249,6 @@ static int irda_task_kick(struct irda_task *task)
272} 249}
273 250
274/* 251/*
275 * Function irda_task_execute (instance, function, finished)
276 *
277 * This function registers and tries to execute tasks that may take some
278 * time to complete. We do it this hairy way since we may have been
279 * called from interrupt context, so it's not possible to use
280 * schedule_timeout()
281 * Two important notes :
282 * o Make sure you irda_task_delete(task); in case you delete the
283 * calling instance.
284 * o No real need to lock when calling this function, but you may
285 * want to lock within the task handler.
286 * Jean II
287 */
288struct irda_task *irda_task_execute(void *instance,
289 IRDA_TASK_CALLBACK function,
290 IRDA_TASK_CALLBACK finished,
291 struct irda_task *parent, void *param)
292{
293 struct irda_task *task;
294
295 IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
296
297 task = kmalloc(sizeof(struct irda_task), GFP_ATOMIC);
298 if (!task)
299 return NULL;
300
301 task->state = IRDA_TASK_INIT;
302 task->instance = instance;
303 task->function = function;
304 task->finished = finished;
305 task->parent = parent;
306 task->param = param;
307 task->magic = IRDA_TASK_MAGIC;
308
309 init_timer(&task->timer);
310
311 /* Register task */
312 hashbin_insert(tasks, (irda_queue_t *) task, (long) task, NULL);
313
314 /* No time to waste, so lets get going! */
315 return irda_task_kick(task) ? NULL : task;
316}
317EXPORT_SYMBOL(irda_task_execute);
318
319/*
320 * Function irda_task_timer_expired (data) 252 * Function irda_task_timer_expired (data)
321 * 253 *
322 * Task time has expired. We now try to execute task (again), and restart 254 * Task time has expired. We now try to execute task (again), and restart
@@ -364,105 +296,6 @@ struct net_device *alloc_irdadev(int sizeof_priv)
364} 296}
365EXPORT_SYMBOL(alloc_irdadev); 297EXPORT_SYMBOL(alloc_irdadev);
366 298
367/*
368 * Function irda_device_init_dongle (self, type, qos)
369 *
370 * Initialize attached dongle.
371 *
372 * Important : request_module require us to call this function with
373 * a process context and irq enabled. - Jean II
374 */
375dongle_t *irda_device_dongle_init(struct net_device *dev, int type)
376{
377 struct dongle_reg *reg;
378 dongle_t *dongle = kzalloc(sizeof(dongle_t), GFP_KERNEL);
379
380 might_sleep();
381
382 spin_lock(&dongles->hb_spinlock);
383 reg = hashbin_find(dongles, type, NULL);
384
385#ifdef CONFIG_KMOD
386 /* Try to load the module needed */
387 if (!reg && capable(CAP_SYS_MODULE)) {
388 spin_unlock(&dongles->hb_spinlock);
389
390 request_module("irda-dongle-%d", type);
391
392 spin_lock(&dongles->hb_spinlock);
393 reg = hashbin_find(dongles, type, NULL);
394 }
395#endif
396
397 if (!reg || !try_module_get(reg->owner) ) {
398 IRDA_ERROR("IrDA: Unable to find requested dongle type %x\n",
399 type);
400 kfree(dongle);
401 dongle = NULL;
402 }
403 if (dongle) {
404 /* Bind the registration info to this particular instance */
405 dongle->issue = reg;
406 dongle->dev = dev;
407 }
408 spin_unlock(&dongles->hb_spinlock);
409 return dongle;
410}
411EXPORT_SYMBOL(irda_device_dongle_init);
412
413/*
414 * Function irda_device_dongle_cleanup (dongle)
415 */
416int irda_device_dongle_cleanup(dongle_t *dongle)
417{
418 IRDA_ASSERT(dongle != NULL, return -1;);
419
420 dongle->issue->close(dongle);
421 module_put(dongle->issue->owner);
422 kfree(dongle);
423
424 return 0;
425}
426EXPORT_SYMBOL(irda_device_dongle_cleanup);
427
428/*
429 * Function irda_device_register_dongle (dongle)
430 */
431int irda_device_register_dongle(struct dongle_reg *new)
432{
433 spin_lock(&dongles->hb_spinlock);
434 /* Check if this dongle has been registered before */
435 if (hashbin_find(dongles, new->type, NULL)) {
436 IRDA_MESSAGE("%s: Dongle type %x already registered\n",
437 __FUNCTION__, new->type);
438 } else {
439 /* Insert IrDA dongle into hashbin */
440 hashbin_insert(dongles, (irda_queue_t *) new, new->type, NULL);
441 }
442 spin_unlock(&dongles->hb_spinlock);
443
444 return 0;
445}
446EXPORT_SYMBOL(irda_device_register_dongle);
447
448/*
449 * Function irda_device_unregister_dongle (dongle)
450 *
451 * Unregister dongle, and remove dongle from list of registered dongles
452 *
453 */
454void irda_device_unregister_dongle(struct dongle_reg *dongle)
455{
456 struct dongle *node;
457
458 spin_lock(&dongles->hb_spinlock);
459 node = hashbin_remove(dongles, dongle->type, NULL);
460 if (!node)
461 IRDA_ERROR("%s: dongle not found!\n", __FUNCTION__);
462 spin_unlock(&dongles->hb_spinlock);
463}
464EXPORT_SYMBOL(irda_device_unregister_dongle);
465
466#ifdef CONFIG_ISA_DMA_API 299#ifdef CONFIG_ISA_DMA_API
467/* 300/*
468 * Function setup_dma (idev, buffer, count, mode) 301 * Function setup_dma (idev, buffer, count, mode)
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index a86a5d83786b..390a790886eb 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -579,7 +579,7 @@ static void iriap_getvaluebyclass_response(struct iriap_cb *self,
579 fp[n++] = ret_code; 579 fp[n++] = ret_code;
580 580
581 /* Insert list length (MSB first) */ 581 /* Insert list length (MSB first) */
582 tmp_be16 = __constant_htons(0x0001); 582 tmp_be16 = htons(0x0001);
583 memcpy(fp+n, &tmp_be16, 2); n += 2; 583 memcpy(fp+n, &tmp_be16, 2); n += 2;
584 584
585 /* Insert object identifier ( MSB first) */ 585 /* Insert object identifier ( MSB first) */
diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c
index 4c33bf5c8354..6af86eba7463 100644
--- a/net/irda/irlap_event.c
+++ b/net/irda/irlap_event.c
@@ -1199,6 +1199,19 @@ static int irlap_state_nrm_p(struct irlap_cb *self, IRLAP_EVENT event,
1199 1199
1200 switch (event) { 1200 switch (event) {
1201 case RECV_I_RSP: /* Optimize for the common case */ 1201 case RECV_I_RSP: /* Optimize for the common case */
1202 if (unlikely(skb->len <= LAP_ADDR_HEADER + LAP_CTRL_HEADER)) {
1203 /*
1204 * Input validation check: a stir4200/mcp2150
1205 * combination sometimes results in an empty i:rsp.
1206 * This makes no sense; we can just ignore the frame
1207 * and send an rr:cmd immediately. This happens before
1208 * changing nr or ns so triggers a retransmit
1209 */
1210 irlap_wait_min_turn_around(self, &self->qos_tx);
1211 irlap_send_rr_frame(self, CMD_FRAME);
1212 /* Keep state */
1213 break;
1214 }
1202 /* FIXME: must check for remote_busy below */ 1215 /* FIXME: must check for remote_busy below */
1203#ifdef CONFIG_IRDA_FAST_RR 1216#ifdef CONFIG_IRDA_FAST_RR
1204 /* 1217 /*
@@ -1514,9 +1527,15 @@ static int irlap_state_nrm_p(struct irlap_cb *self, IRLAP_EVENT event,
1514 1527
1515 /* N2 is the disconnect timer. Until we reach it, we retry */ 1528 /* N2 is the disconnect timer. Until we reach it, we retry */
1516 if (self->retry_count < self->N2) { 1529 if (self->retry_count < self->N2) {
1517 /* Retry sending the pf bit to the secondary */ 1530 if (skb_peek(&self->wx_list) == NULL) {
1518 irlap_wait_min_turn_around(self, &self->qos_tx); 1531 /* Retry sending the pf bit to the secondary */
1519 irlap_send_rr_frame(self, CMD_FRAME); 1532 IRDA_DEBUG(4, "nrm_p: resending rr");
1533 irlap_wait_min_turn_around(self, &self->qos_tx);
1534 irlap_send_rr_frame(self, CMD_FRAME);
1535 } else {
1536 IRDA_DEBUG(4, "nrm_p: resend frames");
1537 irlap_resend_rejected_frames(self, CMD_FRAME);
1538 }
1520 1539
1521 irlap_start_final_timer(self, self->final_timeout); 1540 irlap_start_final_timer(self, self->final_timeout);
1522 self->retry_count++; 1541 self->retry_count++;
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index f24cb755908e..135ac6907bbf 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -103,9 +103,12 @@ int __init irlmp_init(void)
103 irlmp->last_lsap_sel = 0x0f; /* Reserved 0x00-0x0f */ 103 irlmp->last_lsap_sel = 0x0f; /* Reserved 0x00-0x0f */
104 strcpy(sysctl_devname, "Linux"); 104 strcpy(sysctl_devname, "Linux");
105 105
106 /* Do discovery every 3 seconds */
107 init_timer(&irlmp->discovery_timer); 106 init_timer(&irlmp->discovery_timer);
108 irlmp_start_discovery_timer(irlmp, sysctl_discovery_timeout*HZ); 107
108 /* Do discovery every 3 seconds, conditionaly */
109 if (sysctl_discovery)
110 irlmp_start_discovery_timer(irlmp,
111 sysctl_discovery_timeout*HZ);
109 112
110 return 0; 113 return 0;
111} 114}
diff --git a/net/irda/irlmp_event.c b/net/irda/irlmp_event.c
index 1bba87e78609..150cd3f1129a 100644
--- a/net/irda/irlmp_event.c
+++ b/net/irda/irlmp_event.c
@@ -174,9 +174,7 @@ void irlmp_discovery_timer_expired(void *data)
174 /* We always cleanup the log (active & passive discovery) */ 174 /* We always cleanup the log (active & passive discovery) */
175 irlmp_do_expiry(); 175 irlmp_do_expiry();
176 176
177 /* Active discovery is conditional */ 177 irlmp_do_discovery(sysctl_discovery_slots);
178 if (sysctl_discovery)
179 irlmp_do_discovery(sysctl_discovery_slots);
180 178
181 /* Restart timer */ 179 /* Restart timer */
182 irlmp_start_discovery_timer(irlmp, sysctl_discovery_timeout * HZ); 180 irlmp_start_discovery_timer(irlmp, sysctl_discovery_timeout * HZ);
diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c
index 565cbf0421cd..9ab3df15425d 100644
--- a/net/irda/irsysctl.c
+++ b/net/irda/irsysctl.c
@@ -29,6 +29,8 @@
29#include <linux/init.h> 29#include <linux/init.h>
30 30
31#include <net/irda/irda.h> /* irda_debug */ 31#include <net/irda/irda.h> /* irda_debug */
32#include <net/irda/irlmp.h>
33#include <net/irda/timer.h>
32#include <net/irda/irias_object.h> 34#include <net/irda/irias_object.h>
33 35
34extern int sysctl_discovery; 36extern int sysctl_discovery;
@@ -45,6 +47,8 @@ extern int sysctl_max_noreply_time;
45extern int sysctl_warn_noreply_time; 47extern int sysctl_warn_noreply_time;
46extern int sysctl_lap_keepalive_time; 48extern int sysctl_lap_keepalive_time;
47 49
50extern struct irlmp_cb *irlmp;
51
48/* this is needed for the proc_dointvec_minmax - Jean II */ 52/* this is needed for the proc_dointvec_minmax - Jean II */
49static int max_discovery_slots = 16; /* ??? */ 53static int max_discovery_slots = 16; /* ??? */
50static int min_discovery_slots = 1; 54static int min_discovery_slots = 1;
@@ -85,6 +89,27 @@ static int do_devname(ctl_table *table, int write, struct file *filp,
85 return ret; 89 return ret;
86} 90}
87 91
92
93static int do_discovery(ctl_table *table, int write, struct file *filp,
94 void __user *buffer, size_t *lenp, loff_t *ppos)
95{
96 int ret;
97
98 ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
99 if (ret)
100 return ret;
101
102 if (irlmp == NULL)
103 return -ENODEV;
104
105 if (sysctl_discovery)
106 irlmp_start_discovery_timer(irlmp, sysctl_discovery_timeout*HZ);
107 else
108 del_timer_sync(&irlmp->discovery_timer);
109
110 return ret;
111}
112
88/* One file */ 113/* One file */
89static ctl_table irda_table[] = { 114static ctl_table irda_table[] = {
90 { 115 {
@@ -93,7 +118,8 @@ static ctl_table irda_table[] = {
93 .data = &sysctl_discovery, 118 .data = &sysctl_discovery,
94 .maxlen = sizeof(int), 119 .maxlen = sizeof(int),
95 .mode = 0644, 120 .mode = 0644,
96 .proc_handler = &proc_dointvec 121 .proc_handler = &do_discovery,
122 .strategy = &sysctl_intvec
97 }, 123 },
98 { 124 {
99 .ctl_name = NET_IRDA_DEVNAME, 125 .ctl_name = NET_IRDA_DEVNAME,
@@ -234,28 +260,10 @@ static ctl_table irda_table[] = {
234 { .ctl_name = 0 } 260 { .ctl_name = 0 }
235}; 261};
236 262
237/* One directory */ 263static struct ctl_path irda_path[] = {
238static ctl_table irda_net_table[] = { 264 { .procname = "net", .ctl_name = CTL_NET, },
239 { 265 { .procname = "irda", .ctl_name = NET_IRDA, },
240 .ctl_name = NET_IRDA, 266 { }
241 .procname = "irda",
242 .maxlen = 0,
243 .mode = 0555,
244 .child = irda_table
245 },
246 { .ctl_name = 0 }
247};
248
249/* The parent directory */
250static ctl_table irda_root_table[] = {
251 {
252 .ctl_name = CTL_NET,
253 .procname = "net",
254 .maxlen = 0,
255 .mode = 0555,
256 .child = irda_net_table
257 },
258 { .ctl_name = 0 }
259}; 267};
260 268
261static struct ctl_table_header *irda_table_header; 269static struct ctl_table_header *irda_table_header;
@@ -268,7 +276,7 @@ static struct ctl_table_header *irda_table_header;
268 */ 276 */
269int __init irda_sysctl_register(void) 277int __init irda_sysctl_register(void)
270{ 278{
271 irda_table_header = register_sysctl_table(irda_root_table); 279 irda_table_header = register_sysctl_paths(irda_path, irda_table);
272 if (!irda_table_header) 280 if (!irda_table_header)
273 return -ENOMEM; 281 return -ENOMEM;
274 282
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index aef664580355..2255e3c082ed 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -94,13 +94,6 @@ static void iucv_sock_clear_timer(struct sock *sk)
94 sk_stop_timer(sk, &sk->sk_timer); 94 sk_stop_timer(sk, &sk->sk_timer);
95} 95}
96 96
97static void iucv_sock_init_timer(struct sock *sk)
98{
99 init_timer(&sk->sk_timer);
100 sk->sk_timer.function = iucv_sock_timeout;
101 sk->sk_timer.data = (unsigned long)sk;
102}
103
104static struct sock *__iucv_get_sock_by_name(char *nm) 97static struct sock *__iucv_get_sock_by_name(char *nm)
105{ 98{
106 struct sock *sk; 99 struct sock *sk;
@@ -238,7 +231,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio)
238 sk->sk_protocol = proto; 231 sk->sk_protocol = proto;
239 sk->sk_state = IUCV_OPEN; 232 sk->sk_state = IUCV_OPEN;
240 233
241 iucv_sock_init_timer(sk); 234 setup_timer(&sk->sk_timer, iucv_sock_timeout, (unsigned long)sk);
242 235
243 iucv_sock_link(&iucv_sk_list, sk); 236 iucv_sock_link(&iucv_sk_list, sk);
244 return sk; 237 return sk;
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 7698f6c459d6..f13fe8821cbd 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -1492,7 +1492,7 @@ static void iucv_tasklet_fn(unsigned long ignored)
1492 [0x08] = iucv_message_pending, 1492 [0x08] = iucv_message_pending,
1493 [0x09] = iucv_message_pending, 1493 [0x09] = iucv_message_pending,
1494 }; 1494 };
1495 struct list_head task_queue = LIST_HEAD_INIT(task_queue); 1495 LIST_HEAD(task_queue);
1496 struct iucv_irq_list *p, *n; 1496 struct iucv_irq_list *p, *n;
1497 1497
1498 /* Serialize tasklet, iucv_path_sever and iucv_path_connect. */ 1498 /* Serialize tasklet, iucv_path_sever and iucv_path_connect. */
@@ -1526,7 +1526,7 @@ static void iucv_tasklet_fn(unsigned long ignored)
1526static void iucv_work_fn(struct work_struct *work) 1526static void iucv_work_fn(struct work_struct *work)
1527{ 1527{
1528 typedef void iucv_irq_fn(struct iucv_irq_data *); 1528 typedef void iucv_irq_fn(struct iucv_irq_data *);
1529 struct list_head work_queue = LIST_HEAD_INIT(work_queue); 1529 LIST_HEAD(work_queue);
1530 struct iucv_irq_list *p, *n; 1530 struct iucv_irq_list *p, *n;
1531 1531
1532 /* Serialize tasklet, iucv_path_sever and iucv_path_connect. */ 1532 /* Serialize tasklet, iucv_path_sever and iucv_path_connect. */
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 76dcd882f87b..16b72b5570c3 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2291,8 +2291,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
2291 return 0; 2291 return 0;
2292 2292
2293out: 2293out:
2294 security_xfrm_policy_free(xp); 2294 xfrm_policy_destroy(xp);
2295 kfree(xp);
2296 return err; 2295 return err;
2297} 2296}
2298 2297
@@ -3236,8 +3235,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt,
3236 return xp; 3235 return xp;
3237 3236
3238out: 3237out:
3239 security_xfrm_policy_free(xp); 3238 xfrm_policy_destroy(xp);
3240 kfree(xp);
3241 return NULL; 3239 return NULL;
3242} 3240}
3243 3241
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index a2e7aa63fd8a..2ba1bc4f3c3a 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -39,7 +39,7 @@
39#include <linux/init.h> 39#include <linux/init.h>
40#include <net/lapb.h> 40#include <net/lapb.h>
41 41
42static struct list_head lapb_list = LIST_HEAD_INIT(lapb_list); 42static LIST_HEAD(lapb_list);
43static DEFINE_RWLOCK(lapb_list_lock); 43static DEFINE_RWLOCK(lapb_list_lock);
44 44
45/* 45/*
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 5c0b484237c8..441bc18f996d 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -831,25 +831,21 @@ static void llc_sk_init(struct sock* sk)
831 llc->inc_cntr = llc->dec_cntr = 2; 831 llc->inc_cntr = llc->dec_cntr = 2;
832 llc->dec_step = llc->connect_step = 1; 832 llc->dec_step = llc->connect_step = 1;
833 833
834 init_timer(&llc->ack_timer.timer); 834 setup_timer(&llc->ack_timer.timer, llc_conn_ack_tmr_cb,
835 (unsigned long)sk);
835 llc->ack_timer.expire = sysctl_llc2_ack_timeout; 836 llc->ack_timer.expire = sysctl_llc2_ack_timeout;
836 llc->ack_timer.timer.data = (unsigned long)sk;
837 llc->ack_timer.timer.function = llc_conn_ack_tmr_cb;
838 837
839 init_timer(&llc->pf_cycle_timer.timer); 838 setup_timer(&llc->pf_cycle_timer.timer, llc_conn_pf_cycle_tmr_cb,
839 (unsigned long)sk);
840 llc->pf_cycle_timer.expire = sysctl_llc2_p_timeout; 840 llc->pf_cycle_timer.expire = sysctl_llc2_p_timeout;
841 llc->pf_cycle_timer.timer.data = (unsigned long)sk;
842 llc->pf_cycle_timer.timer.function = llc_conn_pf_cycle_tmr_cb;
843 841
844 init_timer(&llc->rej_sent_timer.timer); 842 setup_timer(&llc->rej_sent_timer.timer, llc_conn_rej_tmr_cb,
843 (unsigned long)sk);
845 llc->rej_sent_timer.expire = sysctl_llc2_rej_timeout; 844 llc->rej_sent_timer.expire = sysctl_llc2_rej_timeout;
846 llc->rej_sent_timer.timer.data = (unsigned long)sk;
847 llc->rej_sent_timer.timer.function = llc_conn_rej_tmr_cb;
848 845
849 init_timer(&llc->busy_state_timer.timer); 846 setup_timer(&llc->busy_state_timer.timer, llc_conn_busy_tmr_cb,
847 (unsigned long)sk);
850 llc->busy_state_timer.expire = sysctl_llc2_busy_timeout; 848 llc->busy_state_timer.expire = sysctl_llc2_busy_timeout;
851 llc->busy_state_timer.timer.data = (unsigned long)sk;
852 llc->busy_state_timer.timer.function = llc_conn_busy_tmr_cb;
853 849
854 llc->n2 = 2; /* max retransmit */ 850 llc->n2 = 2; /* max retransmit */
855 llc->k = 2; /* tx win size, will adjust dynam */ 851 llc->k = 2; /* tx win size, will adjust dynam */
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index 576355a192ab..6f2ea2090322 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -688,9 +688,8 @@ int __init llc_station_init(void)
688 skb_queue_head_init(&llc_main_station.mac_pdu_q); 688 skb_queue_head_init(&llc_main_station.mac_pdu_q);
689 skb_queue_head_init(&llc_main_station.ev_q.list); 689 skb_queue_head_init(&llc_main_station.ev_q.list);
690 spin_lock_init(&llc_main_station.ev_q.lock); 690 spin_lock_init(&llc_main_station.ev_q.lock);
691 init_timer(&llc_main_station.ack_timer); 691 setup_timer(&llc_main_station.ack_timer, llc_station_ack_tmr_cb,
692 llc_main_station.ack_timer.data = (unsigned long)&llc_main_station; 692 (unsigned long)&llc_main_station);
693 llc_main_station.ack_timer.function = llc_station_ack_tmr_cb;
694 llc_main_station.ack_timer.expires = jiffies + 693 llc_main_station.ack_timer.expires = jiffies +
695 sysctl_llc_station_ack_timeout; 694 sysctl_llc_station_ack_timeout;
696 skb = alloc_skb(0, GFP_ATOMIC); 695 skb = alloc_skb(0, GFP_ATOMIC);
diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c
index 46992d036017..5bef1dcf18e3 100644
--- a/net/llc/sysctl_net_llc.c
+++ b/net/llc/sysctl_net_llc.c
@@ -92,31 +92,17 @@ static struct ctl_table llc_table[] = {
92 { 0 }, 92 { 0 },
93}; 93};
94 94
95static struct ctl_table llc_dir_table[] = { 95static struct ctl_path llc_path[] = {
96 { 96 { .procname = "net", .ctl_name = CTL_NET, },
97 .ctl_name = NET_LLC, 97 { .procname = "llc", .ctl_name = NET_LLC, },
98 .procname = "llc", 98 { }
99 .mode = 0555,
100 .child = llc_table,
101 },
102 { 0 },
103};
104
105static struct ctl_table llc_root_table[] = {
106 {
107 .ctl_name = CTL_NET,
108 .procname = "net",
109 .mode = 0555,
110 .child = llc_dir_table,
111 },
112 { 0 },
113}; 99};
114 100
115static struct ctl_table_header *llc_table_header; 101static struct ctl_table_header *llc_table_header;
116 102
117int __init llc_sysctl_init(void) 103int __init llc_sysctl_init(void)
118{ 104{
119 llc_table_header = register_sysctl_table(llc_root_table); 105 llc_table_header = register_sysctl_paths(llc_path, llc_table);
120 106
121 return llc_table_header ? 0 : -ENOMEM; 107 return llc_table_header ? 0 : -ENOMEM;
122} 108}
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index ce176e691afe..09c255002e56 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -10,27 +10,84 @@ config MAC80211
10 select CFG80211 10 select CFG80211
11 select NET_SCH_FIFO 11 select NET_SCH_FIFO
12 ---help--- 12 ---help---
13 This option enables the hardware independent IEEE 802.11 13 This option enables the hardware independent IEEE 802.11
14 networking stack. 14 networking stack.
15 15
16config MAC80211_RCSIMPLE 16menu "Rate control algorithm selection"
17 bool "'simple' rate control algorithm" if EMBEDDED 17 depends on MAC80211 != n
18 default y 18
19 depends on MAC80211 19choice
20 prompt "Default rate control algorithm"
21 default MAC80211_RC_DEFAULT_PID
22 ---help---
23 This option selects the default rate control algorithm
24 mac80211 will use. Note that this default can still be
25 overriden through the ieee80211_default_rc_algo module
26 parameter if different algorithms are available.
27
28config MAC80211_RC_DEFAULT_PID
29 bool "PID controller based rate control algorithm"
30 select MAC80211_RC_PID
31 ---help---
32 Select the PID controller based rate control as the
33 default rate control algorithm. You should choose
34 this unless you know what you are doing.
35
36config MAC80211_RC_DEFAULT_SIMPLE
37 bool "Simple rate control algorithm"
38 select MAC80211_RC_SIMPLE
39 ---help---
40 Select the simple rate control as the default rate
41 control algorithm. Note that this is a non-responsive,
42 dumb algorithm. You should choose the PID rate control
43 instead.
44
45config MAC80211_RC_DEFAULT_NONE
46 bool "No default algorithm"
47 depends on EMBEDDED
20 help 48 help
21 This option allows you to turn off the 'simple' rate 49 Selecting this option will select no default algorithm
22 control algorithm in mac80211. If you do turn it off, 50 and allow you to not build any. Do not choose this
23 you absolutely need another rate control algorithm. 51 option unless you know your driver comes with another
52 suitable algorithm.
53endchoice
54
55comment "Selecting 'y' for an algorithm will"
56comment "build the algorithm into mac80211."
57
58config MAC80211_RC_DEFAULT
59 string
60 default "pid" if MAC80211_RC_DEFAULT_PID
61 default "simple" if MAC80211_RC_DEFAULT_SIMPLE
62 default ""
24 63
25 Say Y unless you know you will have another algorithm 64config MAC80211_RC_PID
26 available. 65 tristate "PID controller based rate control algorithm"
66 ---help---
67 This option enables a TX rate control algorithm for
68 mac80211 that uses a PID controller to select the TX
69 rate.
70
71 Say Y or M unless you're sure you want to use a
72 different rate control algorithm.
73
74config MAC80211_RC_SIMPLE
75 tristate "Simple rate control algorithm (DEPRECATED)"
76 ---help---
77 This option enables a very simple, non-responsive TX
78 rate control algorithm. This algorithm is deprecated
79 and will be removed from the kernel in the near future.
80 It has been replaced by the PID algorithm.
81
82 Say N unless you know what you are doing.
83endmenu
27 84
28config MAC80211_LEDS 85config MAC80211_LEDS
29 bool "Enable LED triggers" 86 bool "Enable LED triggers"
30 depends on MAC80211 && LEDS_TRIGGERS 87 depends on MAC80211 && LEDS_TRIGGERS
31 ---help--- 88 ---help---
32 This option enables a few LED triggers for different 89 This option enables a few LED triggers for different
33 packet receive/transmit events. 90 packet receive/transmit events.
34 91
35config MAC80211_DEBUGFS 92config MAC80211_DEBUGFS
36 bool "Export mac80211 internals in DebugFS" 93 bool "Export mac80211 internals in DebugFS"
@@ -51,6 +108,16 @@ config MAC80211_DEBUG
51 If you are not trying to debug or develop the ieee80211 108 If you are not trying to debug or develop the ieee80211
52 subsystem, you most likely want to say N here. 109 subsystem, you most likely want to say N here.
53 110
111config MAC80211_HT_DEBUG
112 bool "Enable HT debugging output"
113 depends on MAC80211_DEBUG
114 ---help---
115 This option enables 802.11n High Throughput features
116 debug tracing output.
117
118 If you are not trying to debug of develop the ieee80211
119 subsystem, you most likely want to say N here.
120
54config MAC80211_VERBOSE_DEBUG 121config MAC80211_VERBOSE_DEBUG
55 bool "Verbose debugging output" 122 bool "Verbose debugging output"
56 depends on MAC80211_DEBUG 123 depends on MAC80211_DEBUG
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 1e6237b34846..54f46bc80cfe 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -1,11 +1,15 @@
1obj-$(CONFIG_MAC80211) += mac80211.o 1obj-$(CONFIG_MAC80211) += mac80211.o
2 2
3mac80211-objs-$(CONFIG_MAC80211_LEDS) += ieee80211_led.o 3# objects for PID algorithm
4mac80211-objs-$(CONFIG_MAC80211_DEBUGFS) += debugfs.o debugfs_sta.o debugfs_netdev.o debugfs_key.o 4rc80211_pid-y := rc80211_pid_algo.o
5mac80211-objs-$(CONFIG_NET_SCHED) += wme.o 5rc80211_pid-$(CONFIG_MAC80211_DEBUGFS) += rc80211_pid_debugfs.o
6mac80211-objs-$(CONFIG_MAC80211_RCSIMPLE) += rc80211_simple.o
7 6
8mac80211-objs := \ 7# build helper for PID algorithm
8rc-pid-y := $(rc80211_pid-y)
9rc-pid-m := rc80211_pid.o
10
11# mac80211 objects
12mac80211-y := \
9 ieee80211.o \ 13 ieee80211.o \
10 ieee80211_ioctl.o \ 14 ieee80211_ioctl.o \
11 sta_info.o \ 15 sta_info.o \
@@ -23,5 +27,22 @@ mac80211-objs := \
23 tx.o \ 27 tx.o \
24 key.o \ 28 key.o \
25 util.o \ 29 util.o \
26 event.o \ 30 event.o
27 $(mac80211-objs-y) 31
32mac80211-$(CONFIG_MAC80211_LEDS) += ieee80211_led.o
33mac80211-$(CONFIG_NET_SCHED) += wme.o
34mac80211-$(CONFIG_MAC80211_DEBUGFS) += \
35 debugfs.o \
36 debugfs_sta.o \
37 debugfs_netdev.o \
38 debugfs_key.o
39
40
41# Build rate control algorithm(s)
42CFLAGS_rc80211_simple.o += -DRC80211_SIMPLE_COMPILE
43CFLAGS_rc80211_pid_algo.o += -DRC80211_PID_COMPILE
44mac80211-$(CONFIG_MAC80211_RC_SIMPLE) += rc80211_simple.o
45mac80211-$(CONFIG_MAC80211_RC_PID) += $(rc-pid-$(CONFIG_MAC80211_RC_PID))
46
47# Modular rate algorithms are assigned to mac80211-m - make separate modules
48obj-m += $(mac80211-m)
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 9e2bc1fd0237..22c9619ba776 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1,17 +1,20 @@
1/* 1/*
2 * mac80211 configuration hooks for cfg80211 2 * mac80211 configuration hooks for cfg80211
3 * 3 *
4 * Copyright 2006 Johannes Berg <johannes@sipsolutions.net> 4 * Copyright 2006, 2007 Johannes Berg <johannes@sipsolutions.net>
5 * 5 *
6 * This file is GPLv2 as found in COPYING. 6 * This file is GPLv2 as found in COPYING.
7 */ 7 */
8 8
9#include <linux/ieee80211.h>
9#include <linux/nl80211.h> 10#include <linux/nl80211.h>
10#include <linux/rtnetlink.h> 11#include <linux/rtnetlink.h>
11#include <net/net_namespace.h> 12#include <net/net_namespace.h>
13#include <linux/rcupdate.h>
12#include <net/cfg80211.h> 14#include <net/cfg80211.h>
13#include "ieee80211_i.h" 15#include "ieee80211_i.h"
14#include "cfg.h" 16#include "cfg.h"
17#include "ieee80211_rate.h"
15 18
16static enum ieee80211_if_types 19static enum ieee80211_if_types
17nl80211_type_to_mac80211_type(enum nl80211_iftype type) 20nl80211_type_to_mac80211_type(enum nl80211_iftype type)
@@ -90,7 +93,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex,
90 93
91 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 94 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
92 95
93 if (sdata->type == IEEE80211_IF_TYPE_VLAN) 96 if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN)
94 return -EOPNOTSUPP; 97 return -EOPNOTSUPP;
95 98
96 ieee80211_if_reinit(dev); 99 ieee80211_if_reinit(dev);
@@ -99,8 +102,553 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex,
99 return 0; 102 return 0;
100} 103}
101 104
105static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
106 u8 key_idx, u8 *mac_addr,
107 struct key_params *params)
108{
109 struct ieee80211_sub_if_data *sdata;
110 struct sta_info *sta = NULL;
111 enum ieee80211_key_alg alg;
112 int ret;
113
114 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
115
116 switch (params->cipher) {
117 case WLAN_CIPHER_SUITE_WEP40:
118 case WLAN_CIPHER_SUITE_WEP104:
119 alg = ALG_WEP;
120 break;
121 case WLAN_CIPHER_SUITE_TKIP:
122 alg = ALG_TKIP;
123 break;
124 case WLAN_CIPHER_SUITE_CCMP:
125 alg = ALG_CCMP;
126 break;
127 default:
128 return -EINVAL;
129 }
130
131 if (mac_addr) {
132 sta = sta_info_get(sdata->local, mac_addr);
133 if (!sta)
134 return -ENOENT;
135 }
136
137 ret = 0;
138 if (!ieee80211_key_alloc(sdata, sta, alg, key_idx,
139 params->key_len, params->key))
140 ret = -ENOMEM;
141
142 if (sta)
143 sta_info_put(sta);
144
145 return ret;
146}
147
148static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
149 u8 key_idx, u8 *mac_addr)
150{
151 struct ieee80211_sub_if_data *sdata;
152 struct sta_info *sta;
153 int ret;
154
155 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
156
157 if (mac_addr) {
158 sta = sta_info_get(sdata->local, mac_addr);
159 if (!sta)
160 return -ENOENT;
161
162 ret = 0;
163 if (sta->key)
164 ieee80211_key_free(sta->key);
165 else
166 ret = -ENOENT;
167
168 sta_info_put(sta);
169 return ret;
170 }
171
172 if (!sdata->keys[key_idx])
173 return -ENOENT;
174
175 ieee80211_key_free(sdata->keys[key_idx]);
176
177 return 0;
178}
179
180static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
181 u8 key_idx, u8 *mac_addr, void *cookie,
182 void (*callback)(void *cookie,
183 struct key_params *params))
184{
185 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
186 struct sta_info *sta = NULL;
187 u8 seq[6] = {0};
188 struct key_params params;
189 struct ieee80211_key *key;
190 u32 iv32;
191 u16 iv16;
192 int err = -ENOENT;
193
194 if (mac_addr) {
195 sta = sta_info_get(sdata->local, mac_addr);
196 if (!sta)
197 goto out;
198
199 key = sta->key;
200 } else
201 key = sdata->keys[key_idx];
202
203 if (!key)
204 goto out;
205
206 memset(&params, 0, sizeof(params));
207
208 switch (key->conf.alg) {
209 case ALG_TKIP:
210 params.cipher = WLAN_CIPHER_SUITE_TKIP;
211
212 iv32 = key->u.tkip.iv32;
213 iv16 = key->u.tkip.iv16;
214
215 if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE &&
216 sdata->local->ops->get_tkip_seq)
217 sdata->local->ops->get_tkip_seq(
218 local_to_hw(sdata->local),
219 key->conf.hw_key_idx,
220 &iv32, &iv16);
221
222 seq[0] = iv16 & 0xff;
223 seq[1] = (iv16 >> 8) & 0xff;
224 seq[2] = iv32 & 0xff;
225 seq[3] = (iv32 >> 8) & 0xff;
226 seq[4] = (iv32 >> 16) & 0xff;
227 seq[5] = (iv32 >> 24) & 0xff;
228 params.seq = seq;
229 params.seq_len = 6;
230 break;
231 case ALG_CCMP:
232 params.cipher = WLAN_CIPHER_SUITE_CCMP;
233 seq[0] = key->u.ccmp.tx_pn[5];
234 seq[1] = key->u.ccmp.tx_pn[4];
235 seq[2] = key->u.ccmp.tx_pn[3];
236 seq[3] = key->u.ccmp.tx_pn[2];
237 seq[4] = key->u.ccmp.tx_pn[1];
238 seq[5] = key->u.ccmp.tx_pn[0];
239 params.seq = seq;
240 params.seq_len = 6;
241 break;
242 case ALG_WEP:
243 if (key->conf.keylen == 5)
244 params.cipher = WLAN_CIPHER_SUITE_WEP40;
245 else
246 params.cipher = WLAN_CIPHER_SUITE_WEP104;
247 break;
248 }
249
250 params.key = key->conf.key;
251 params.key_len = key->conf.keylen;
252
253 callback(cookie, &params);
254 err = 0;
255
256 out:
257 if (sta)
258 sta_info_put(sta);
259 return err;
260}
261
262static int ieee80211_config_default_key(struct wiphy *wiphy,
263 struct net_device *dev,
264 u8 key_idx)
265{
266 struct ieee80211_sub_if_data *sdata;
267
268 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
269 ieee80211_set_default_key(sdata, key_idx);
270
271 return 0;
272}
273
274static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev,
275 u8 *mac, struct station_stats *stats)
276{
277 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
278 struct sta_info *sta;
279
280 sta = sta_info_get(local, mac);
281 if (!sta)
282 return -ENOENT;
283
284 /* XXX: verify sta->dev == dev */
285
286 stats->filled = STATION_STAT_INACTIVE_TIME |
287 STATION_STAT_RX_BYTES |
288 STATION_STAT_TX_BYTES;
289
290 stats->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
291 stats->rx_bytes = sta->rx_bytes;
292 stats->tx_bytes = sta->tx_bytes;
293
294 sta_info_put(sta);
295
296 return 0;
297}
298
299/*
300 * This handles both adding a beacon and setting new beacon info
301 */
302static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata,
303 struct beacon_parameters *params)
304{
305 struct beacon_data *new, *old;
306 int new_head_len, new_tail_len;
307 int size;
308 int err = -EINVAL;
309
310 old = sdata->u.ap.beacon;
311
312 /* head must not be zero-length */
313 if (params->head && !params->head_len)
314 return -EINVAL;
315
316 /*
317 * This is a kludge. beacon interval should really be part
318 * of the beacon information.
319 */
320 if (params->interval) {
321 sdata->local->hw.conf.beacon_int = params->interval;
322 if (ieee80211_hw_config(sdata->local))
323 return -EINVAL;
324 /*
325 * We updated some parameter so if below bails out
326 * it's not an error.
327 */
328 err = 0;
329 }
330
331 /* Need to have a beacon head if we don't have one yet */
332 if (!params->head && !old)
333 return err;
334
335 /* sorry, no way to start beaconing without dtim period */
336 if (!params->dtim_period && !old)
337 return err;
338
339 /* new or old head? */
340 if (params->head)
341 new_head_len = params->head_len;
342 else
343 new_head_len = old->head_len;
344
345 /* new or old tail? */
346 if (params->tail || !old)
347 /* params->tail_len will be zero for !params->tail */
348 new_tail_len = params->tail_len;
349 else
350 new_tail_len = old->tail_len;
351
352 size = sizeof(*new) + new_head_len + new_tail_len;
353
354 new = kzalloc(size, GFP_KERNEL);
355 if (!new)
356 return -ENOMEM;
357
358 /* start filling the new info now */
359
360 /* new or old dtim period? */
361 if (params->dtim_period)
362 new->dtim_period = params->dtim_period;
363 else
364 new->dtim_period = old->dtim_period;
365
366 /*
367 * pointers go into the block we allocated,
368 * memory is | beacon_data | head | tail |
369 */
370 new->head = ((u8 *) new) + sizeof(*new);
371 new->tail = new->head + new_head_len;
372 new->head_len = new_head_len;
373 new->tail_len = new_tail_len;
374
375 /* copy in head */
376 if (params->head)
377 memcpy(new->head, params->head, new_head_len);
378 else
379 memcpy(new->head, old->head, new_head_len);
380
381 /* copy in optional tail */
382 if (params->tail)
383 memcpy(new->tail, params->tail, new_tail_len);
384 else
385 if (old)
386 memcpy(new->tail, old->tail, new_tail_len);
387
388 rcu_assign_pointer(sdata->u.ap.beacon, new);
389
390 synchronize_rcu();
391
392 kfree(old);
393
394 return ieee80211_if_config_beacon(sdata->dev);
395}
396
397static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev,
398 struct beacon_parameters *params)
399{
400 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
401 struct beacon_data *old;
402
403 if (sdata->vif.type != IEEE80211_IF_TYPE_AP)
404 return -EINVAL;
405
406 old = sdata->u.ap.beacon;
407
408 if (old)
409 return -EALREADY;
410
411 return ieee80211_config_beacon(sdata, params);
412}
413
414static int ieee80211_set_beacon(struct wiphy *wiphy, struct net_device *dev,
415 struct beacon_parameters *params)
416{
417 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
418 struct beacon_data *old;
419
420 if (sdata->vif.type != IEEE80211_IF_TYPE_AP)
421 return -EINVAL;
422
423 old = sdata->u.ap.beacon;
424
425 if (!old)
426 return -ENOENT;
427
428 return ieee80211_config_beacon(sdata, params);
429}
430
431static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev)
432{
433 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
434 struct beacon_data *old;
435
436 if (sdata->vif.type != IEEE80211_IF_TYPE_AP)
437 return -EINVAL;
438
439 old = sdata->u.ap.beacon;
440
441 if (!old)
442 return -ENOENT;
443
444 rcu_assign_pointer(sdata->u.ap.beacon, NULL);
445 synchronize_rcu();
446 kfree(old);
447
448 return ieee80211_if_config_beacon(dev);
449}
450
451/* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */
452struct iapp_layer2_update {
453 u8 da[ETH_ALEN]; /* broadcast */
454 u8 sa[ETH_ALEN]; /* STA addr */
455 __be16 len; /* 6 */
456 u8 dsap; /* 0 */
457 u8 ssap; /* 0 */
458 u8 control;
459 u8 xid_info[3];
460} __attribute__ ((packed));
461
462static void ieee80211_send_layer2_update(struct sta_info *sta)
463{
464 struct iapp_layer2_update *msg;
465 struct sk_buff *skb;
466
467 /* Send Level 2 Update Frame to update forwarding tables in layer 2
468 * bridge devices */
469
470 skb = dev_alloc_skb(sizeof(*msg));
471 if (!skb)
472 return;
473 msg = (struct iapp_layer2_update *)skb_put(skb, sizeof(*msg));
474
475 /* 802.2 Type 1 Logical Link Control (LLC) Exchange Identifier (XID)
476 * Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */
477
478 memset(msg->da, 0xff, ETH_ALEN);
479 memcpy(msg->sa, sta->addr, ETH_ALEN);
480 msg->len = htons(6);
481 msg->dsap = 0;
482 msg->ssap = 0x01; /* NULL LSAP, CR Bit: Response */
483 msg->control = 0xaf; /* XID response lsb.1111F101.
484 * F=0 (no poll command; unsolicited frame) */
485 msg->xid_info[0] = 0x81; /* XID format identifier */
486 msg->xid_info[1] = 1; /* LLC types/classes: Type 1 LLC */
487 msg->xid_info[2] = 0; /* XID sender's receive window size (RW) */
488
489 skb->dev = sta->dev;
490 skb->protocol = eth_type_trans(skb, sta->dev);
491 memset(skb->cb, 0, sizeof(skb->cb));
492 netif_rx(skb);
493}
494
495static void sta_apply_parameters(struct ieee80211_local *local,
496 struct sta_info *sta,
497 struct station_parameters *params)
498{
499 u32 rates;
500 int i, j;
501 struct ieee80211_hw_mode *mode;
502
503 if (params->station_flags & STATION_FLAG_CHANGED) {
504 sta->flags &= ~WLAN_STA_AUTHORIZED;
505 if (params->station_flags & STATION_FLAG_AUTHORIZED)
506 sta->flags |= WLAN_STA_AUTHORIZED;
507
508 sta->flags &= ~WLAN_STA_SHORT_PREAMBLE;
509 if (params->station_flags & STATION_FLAG_SHORT_PREAMBLE)
510 sta->flags |= WLAN_STA_SHORT_PREAMBLE;
511
512 sta->flags &= ~WLAN_STA_WME;
513 if (params->station_flags & STATION_FLAG_WME)
514 sta->flags |= WLAN_STA_WME;
515 }
516
517 if (params->aid) {
518 sta->aid = params->aid;
519 if (sta->aid > IEEE80211_MAX_AID)
520 sta->aid = 0; /* XXX: should this be an error? */
521 }
522
523 if (params->listen_interval >= 0)
524 sta->listen_interval = params->listen_interval;
525
526 if (params->supported_rates) {
527 rates = 0;
528 mode = local->oper_hw_mode;
529 for (i = 0; i < params->supported_rates_len; i++) {
530 int rate = (params->supported_rates[i] & 0x7f) * 5;
531 for (j = 0; j < mode->num_rates; j++) {
532 if (mode->rates[j].rate == rate)
533 rates |= BIT(j);
534 }
535 }
536 sta->supp_rates = rates;
537 }
538}
539
540static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
541 u8 *mac, struct station_parameters *params)
542{
543 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
544 struct sta_info *sta;
545 struct ieee80211_sub_if_data *sdata;
546
547 /* Prevent a race with changing the rate control algorithm */
548 if (!netif_running(dev))
549 return -ENETDOWN;
550
551 /* XXX: get sta belonging to dev */
552 sta = sta_info_get(local, mac);
553 if (sta) {
554 sta_info_put(sta);
555 return -EEXIST;
556 }
557
558 if (params->vlan) {
559 sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
560
561 if (sdata->vif.type != IEEE80211_IF_TYPE_VLAN ||
562 sdata->vif.type != IEEE80211_IF_TYPE_AP)
563 return -EINVAL;
564 } else
565 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
566
567 sta = sta_info_add(local, dev, mac, GFP_KERNEL);
568 if (!sta)
569 return -ENOMEM;
570
571 sta->dev = sdata->dev;
572 if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN ||
573 sdata->vif.type == IEEE80211_IF_TYPE_AP)
574 ieee80211_send_layer2_update(sta);
575
576 sta->flags = WLAN_STA_AUTH | WLAN_STA_ASSOC;
577
578 sta_apply_parameters(local, sta, params);
579
580 rate_control_rate_init(sta, local);
581
582 sta_info_put(sta);
583
584 return 0;
585}
586
587static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
588 u8 *mac)
589{
590 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
591 struct sta_info *sta;
592
593 if (mac) {
594 /* XXX: get sta belonging to dev */
595 sta = sta_info_get(local, mac);
596 if (!sta)
597 return -ENOENT;
598
599 sta_info_free(sta);
600 sta_info_put(sta);
601 } else
602 sta_info_flush(local, dev);
603
604 return 0;
605}
606
607static int ieee80211_change_station(struct wiphy *wiphy,
608 struct net_device *dev,
609 u8 *mac,
610 struct station_parameters *params)
611{
612 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
613 struct sta_info *sta;
614 struct ieee80211_sub_if_data *vlansdata;
615
616 /* XXX: get sta belonging to dev */
617 sta = sta_info_get(local, mac);
618 if (!sta)
619 return -ENOENT;
620
621 if (params->vlan && params->vlan != sta->dev) {
622 vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
623
624 if (vlansdata->vif.type != IEEE80211_IF_TYPE_VLAN ||
625 vlansdata->vif.type != IEEE80211_IF_TYPE_AP)
626 return -EINVAL;
627
628 sta->dev = params->vlan;
629 ieee80211_send_layer2_update(sta);
630 }
631
632 sta_apply_parameters(local, sta, params);
633
634 sta_info_put(sta);
635
636 return 0;
637}
638
102struct cfg80211_ops mac80211_config_ops = { 639struct cfg80211_ops mac80211_config_ops = {
103 .add_virtual_intf = ieee80211_add_iface, 640 .add_virtual_intf = ieee80211_add_iface,
104 .del_virtual_intf = ieee80211_del_iface, 641 .del_virtual_intf = ieee80211_del_iface,
105 .change_virtual_intf = ieee80211_change_iface, 642 .change_virtual_intf = ieee80211_change_iface,
643 .add_key = ieee80211_add_key,
644 .del_key = ieee80211_del_key,
645 .get_key = ieee80211_get_key,
646 .set_default_key = ieee80211_config_default_key,
647 .add_beacon = ieee80211_add_beacon,
648 .set_beacon = ieee80211_set_beacon,
649 .del_beacon = ieee80211_del_beacon,
650 .add_station = ieee80211_add_station,
651 .del_station = ieee80211_del_station,
652 .change_station = ieee80211_change_station,
653 .get_station = ieee80211_get_station,
106}; 654};
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index f0e6ab7eb624..829872a3ae81 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -91,8 +91,7 @@ static const struct file_operations name##_ops = { \
91/* common attributes */ 91/* common attributes */
92IEEE80211_IF_FILE(channel_use, channel_use, DEC); 92IEEE80211_IF_FILE(channel_use, channel_use, DEC);
93IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC); 93IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC);
94IEEE80211_IF_FILE(eapol, eapol, DEC); 94IEEE80211_IF_FILE(ieee802_1x_pac, ieee802_1x_pac, DEC);
95IEEE80211_IF_FILE(ieee8021_x, ieee802_1x, DEC);
96 95
97/* STA/IBSS attributes */ 96/* STA/IBSS attributes */
98IEEE80211_IF_FILE(state, u.sta.state, DEC); 97IEEE80211_IF_FILE(state, u.sta.state, DEC);
@@ -119,13 +118,12 @@ static ssize_t ieee80211_if_fmt_flags(
119 sdata->u.sta.flags & IEEE80211_STA_AUTHENTICATED ? "AUTH\n" : "", 118 sdata->u.sta.flags & IEEE80211_STA_AUTHENTICATED ? "AUTH\n" : "",
120 sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED ? "ASSOC\n" : "", 119 sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED ? "ASSOC\n" : "",
121 sdata->u.sta.flags & IEEE80211_STA_PROBEREQ_POLL ? "PROBEREQ POLL\n" : "", 120 sdata->u.sta.flags & IEEE80211_STA_PROBEREQ_POLL ? "PROBEREQ POLL\n" : "",
122 sdata->flags & IEEE80211_SDATA_USE_PROTECTION ? "CTS prot\n" : ""); 121 sdata->bss_conf.use_cts_prot ? "CTS prot\n" : "");
123} 122}
124__IEEE80211_IF_FILE(flags); 123__IEEE80211_IF_FILE(flags);
125 124
126/* AP attributes */ 125/* AP attributes */
127IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC); 126IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC);
128IEEE80211_IF_FILE(dtim_period, u.ap.dtim_period, DEC);
129IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC); 127IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC);
130IEEE80211_IF_FILE(num_beacons, u.ap.num_beacons, DEC); 128IEEE80211_IF_FILE(num_beacons, u.ap.num_beacons, DEC);
131IEEE80211_IF_FILE(force_unicast_rateidx, u.ap.force_unicast_rateidx, DEC); 129IEEE80211_IF_FILE(force_unicast_rateidx, u.ap.force_unicast_rateidx, DEC);
@@ -139,26 +137,6 @@ static ssize_t ieee80211_if_fmt_num_buffered_multicast(
139} 137}
140__IEEE80211_IF_FILE(num_buffered_multicast); 138__IEEE80211_IF_FILE(num_buffered_multicast);
141 139
142static ssize_t ieee80211_if_fmt_beacon_head_len(
143 const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
144{
145 if (sdata->u.ap.beacon_head)
146 return scnprintf(buf, buflen, "%d\n",
147 sdata->u.ap.beacon_head_len);
148 return scnprintf(buf, buflen, "\n");
149}
150__IEEE80211_IF_FILE(beacon_head_len);
151
152static ssize_t ieee80211_if_fmt_beacon_tail_len(
153 const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
154{
155 if (sdata->u.ap.beacon_tail)
156 return scnprintf(buf, buflen, "%d\n",
157 sdata->u.ap.beacon_tail_len);
158 return scnprintf(buf, buflen, "\n");
159}
160__IEEE80211_IF_FILE(beacon_tail_len);
161
162/* WDS attributes */ 140/* WDS attributes */
163IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC); 141IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC);
164 142
@@ -170,8 +148,7 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
170{ 148{
171 DEBUGFS_ADD(channel_use, sta); 149 DEBUGFS_ADD(channel_use, sta);
172 DEBUGFS_ADD(drop_unencrypted, sta); 150 DEBUGFS_ADD(drop_unencrypted, sta);
173 DEBUGFS_ADD(eapol, sta); 151 DEBUGFS_ADD(ieee802_1x_pac, sta);
174 DEBUGFS_ADD(ieee8021_x, sta);
175 DEBUGFS_ADD(state, sta); 152 DEBUGFS_ADD(state, sta);
176 DEBUGFS_ADD(bssid, sta); 153 DEBUGFS_ADD(bssid, sta);
177 DEBUGFS_ADD(prev_bssid, sta); 154 DEBUGFS_ADD(prev_bssid, sta);
@@ -192,25 +169,20 @@ static void add_ap_files(struct ieee80211_sub_if_data *sdata)
192{ 169{
193 DEBUGFS_ADD(channel_use, ap); 170 DEBUGFS_ADD(channel_use, ap);
194 DEBUGFS_ADD(drop_unencrypted, ap); 171 DEBUGFS_ADD(drop_unencrypted, ap);
195 DEBUGFS_ADD(eapol, ap); 172 DEBUGFS_ADD(ieee802_1x_pac, ap);
196 DEBUGFS_ADD(ieee8021_x, ap);
197 DEBUGFS_ADD(num_sta_ps, ap); 173 DEBUGFS_ADD(num_sta_ps, ap);
198 DEBUGFS_ADD(dtim_period, ap);
199 DEBUGFS_ADD(dtim_count, ap); 174 DEBUGFS_ADD(dtim_count, ap);
200 DEBUGFS_ADD(num_beacons, ap); 175 DEBUGFS_ADD(num_beacons, ap);
201 DEBUGFS_ADD(force_unicast_rateidx, ap); 176 DEBUGFS_ADD(force_unicast_rateidx, ap);
202 DEBUGFS_ADD(max_ratectrl_rateidx, ap); 177 DEBUGFS_ADD(max_ratectrl_rateidx, ap);
203 DEBUGFS_ADD(num_buffered_multicast, ap); 178 DEBUGFS_ADD(num_buffered_multicast, ap);
204 DEBUGFS_ADD(beacon_head_len, ap);
205 DEBUGFS_ADD(beacon_tail_len, ap);
206} 179}
207 180
208static void add_wds_files(struct ieee80211_sub_if_data *sdata) 181static void add_wds_files(struct ieee80211_sub_if_data *sdata)
209{ 182{
210 DEBUGFS_ADD(channel_use, wds); 183 DEBUGFS_ADD(channel_use, wds);
211 DEBUGFS_ADD(drop_unencrypted, wds); 184 DEBUGFS_ADD(drop_unencrypted, wds);
212 DEBUGFS_ADD(eapol, wds); 185 DEBUGFS_ADD(ieee802_1x_pac, wds);
213 DEBUGFS_ADD(ieee8021_x, wds);
214 DEBUGFS_ADD(peer, wds); 186 DEBUGFS_ADD(peer, wds);
215} 187}
216 188
@@ -218,8 +190,7 @@ static void add_vlan_files(struct ieee80211_sub_if_data *sdata)
218{ 190{
219 DEBUGFS_ADD(channel_use, vlan); 191 DEBUGFS_ADD(channel_use, vlan);
220 DEBUGFS_ADD(drop_unencrypted, vlan); 192 DEBUGFS_ADD(drop_unencrypted, vlan);
221 DEBUGFS_ADD(eapol, vlan); 193 DEBUGFS_ADD(ieee802_1x_pac, vlan);
222 DEBUGFS_ADD(ieee8021_x, vlan);
223} 194}
224 195
225static void add_monitor_files(struct ieee80211_sub_if_data *sdata) 196static void add_monitor_files(struct ieee80211_sub_if_data *sdata)
@@ -231,7 +202,7 @@ static void add_files(struct ieee80211_sub_if_data *sdata)
231 if (!sdata->debugfsdir) 202 if (!sdata->debugfsdir)
232 return; 203 return;
233 204
234 switch (sdata->type) { 205 switch (sdata->vif.type) {
235 case IEEE80211_IF_TYPE_STA: 206 case IEEE80211_IF_TYPE_STA:
236 case IEEE80211_IF_TYPE_IBSS: 207 case IEEE80211_IF_TYPE_IBSS:
237 add_sta_files(sdata); 208 add_sta_files(sdata);
@@ -263,8 +234,7 @@ static void del_sta_files(struct ieee80211_sub_if_data *sdata)
263{ 234{
264 DEBUGFS_DEL(channel_use, sta); 235 DEBUGFS_DEL(channel_use, sta);
265 DEBUGFS_DEL(drop_unencrypted, sta); 236 DEBUGFS_DEL(drop_unencrypted, sta);
266 DEBUGFS_DEL(eapol, sta); 237 DEBUGFS_DEL(ieee802_1x_pac, sta);
267 DEBUGFS_DEL(ieee8021_x, sta);
268 DEBUGFS_DEL(state, sta); 238 DEBUGFS_DEL(state, sta);
269 DEBUGFS_DEL(bssid, sta); 239 DEBUGFS_DEL(bssid, sta);
270 DEBUGFS_DEL(prev_bssid, sta); 240 DEBUGFS_DEL(prev_bssid, sta);
@@ -285,25 +255,20 @@ static void del_ap_files(struct ieee80211_sub_if_data *sdata)
285{ 255{
286 DEBUGFS_DEL(channel_use, ap); 256 DEBUGFS_DEL(channel_use, ap);
287 DEBUGFS_DEL(drop_unencrypted, ap); 257 DEBUGFS_DEL(drop_unencrypted, ap);
288 DEBUGFS_DEL(eapol, ap); 258 DEBUGFS_DEL(ieee802_1x_pac, ap);
289 DEBUGFS_DEL(ieee8021_x, ap);
290 DEBUGFS_DEL(num_sta_ps, ap); 259 DEBUGFS_DEL(num_sta_ps, ap);
291 DEBUGFS_DEL(dtim_period, ap);
292 DEBUGFS_DEL(dtim_count, ap); 260 DEBUGFS_DEL(dtim_count, ap);
293 DEBUGFS_DEL(num_beacons, ap); 261 DEBUGFS_DEL(num_beacons, ap);
294 DEBUGFS_DEL(force_unicast_rateidx, ap); 262 DEBUGFS_DEL(force_unicast_rateidx, ap);
295 DEBUGFS_DEL(max_ratectrl_rateidx, ap); 263 DEBUGFS_DEL(max_ratectrl_rateidx, ap);
296 DEBUGFS_DEL(num_buffered_multicast, ap); 264 DEBUGFS_DEL(num_buffered_multicast, ap);
297 DEBUGFS_DEL(beacon_head_len, ap);
298 DEBUGFS_DEL(beacon_tail_len, ap);
299} 265}
300 266
301static void del_wds_files(struct ieee80211_sub_if_data *sdata) 267static void del_wds_files(struct ieee80211_sub_if_data *sdata)
302{ 268{
303 DEBUGFS_DEL(channel_use, wds); 269 DEBUGFS_DEL(channel_use, wds);
304 DEBUGFS_DEL(drop_unencrypted, wds); 270 DEBUGFS_DEL(drop_unencrypted, wds);
305 DEBUGFS_DEL(eapol, wds); 271 DEBUGFS_DEL(ieee802_1x_pac, wds);
306 DEBUGFS_DEL(ieee8021_x, wds);
307 DEBUGFS_DEL(peer, wds); 272 DEBUGFS_DEL(peer, wds);
308} 273}
309 274
@@ -311,8 +276,7 @@ static void del_vlan_files(struct ieee80211_sub_if_data *sdata)
311{ 276{
312 DEBUGFS_DEL(channel_use, vlan); 277 DEBUGFS_DEL(channel_use, vlan);
313 DEBUGFS_DEL(drop_unencrypted, vlan); 278 DEBUGFS_DEL(drop_unencrypted, vlan);
314 DEBUGFS_DEL(eapol, vlan); 279 DEBUGFS_DEL(ieee802_1x_pac, vlan);
315 DEBUGFS_DEL(ieee8021_x, vlan);
316} 280}
317 281
318static void del_monitor_files(struct ieee80211_sub_if_data *sdata) 282static void del_monitor_files(struct ieee80211_sub_if_data *sdata)
@@ -362,7 +326,7 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata)
362 326
363void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata) 327void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata)
364{ 328{
365 del_files(sdata, sdata->type); 329 del_files(sdata, sdata->vif.type);
366 debugfs_remove(sdata->debugfsdir); 330 debugfs_remove(sdata->debugfsdir);
367 sdata->debugfsdir = NULL; 331 sdata->debugfsdir = NULL;
368} 332}
diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c
index 6378850d8580..5dcc2d61551f 100644
--- a/net/mac80211/ieee80211.c
+++ b/net/mac80211/ieee80211.c
@@ -34,6 +34,8 @@
34#include "debugfs.h" 34#include "debugfs.h"
35#include "debugfs_netdev.h" 35#include "debugfs_netdev.h"
36 36
37#define SUPP_MCS_SET_LEN 16
38
37/* 39/*
38 * For seeing transmitted packets on monitor interfaces 40 * For seeing transmitted packets on monitor interfaces
39 * we have a radiotap header too. 41 * we have a radiotap header too.
@@ -175,21 +177,21 @@ static int ieee80211_open(struct net_device *dev)
175 /* 177 /*
176 * check whether it may have the same address 178 * check whether it may have the same address
177 */ 179 */
178 if (!identical_mac_addr_allowed(sdata->type, 180 if (!identical_mac_addr_allowed(sdata->vif.type,
179 nsdata->type)) 181 nsdata->vif.type))
180 return -ENOTUNIQ; 182 return -ENOTUNIQ;
181 183
182 /* 184 /*
183 * can only add VLANs to enabled APs 185 * can only add VLANs to enabled APs
184 */ 186 */
185 if (sdata->type == IEEE80211_IF_TYPE_VLAN && 187 if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN &&
186 nsdata->type == IEEE80211_IF_TYPE_AP && 188 nsdata->vif.type == IEEE80211_IF_TYPE_AP &&
187 netif_running(nsdata->dev)) 189 netif_running(nsdata->dev))
188 sdata->u.vlan.ap = nsdata; 190 sdata->u.vlan.ap = nsdata;
189 } 191 }
190 } 192 }
191 193
192 switch (sdata->type) { 194 switch (sdata->vif.type) {
193 case IEEE80211_IF_TYPE_WDS: 195 case IEEE80211_IF_TYPE_WDS:
194 if (is_zero_ether_addr(sdata->u.wds.remote_addr)) 196 if (is_zero_ether_addr(sdata->u.wds.remote_addr))
195 return -ENOLINK; 197 return -ENOLINK;
@@ -217,9 +219,10 @@ static int ieee80211_open(struct net_device *dev)
217 if (res) 219 if (res)
218 return res; 220 return res;
219 ieee80211_hw_config(local); 221 ieee80211_hw_config(local);
222 ieee80211_led_radio(local, local->hw.conf.radio_enabled);
220 } 223 }
221 224
222 switch (sdata->type) { 225 switch (sdata->vif.type) {
223 case IEEE80211_IF_TYPE_VLAN: 226 case IEEE80211_IF_TYPE_VLAN:
224 list_add(&sdata->u.vlan.list, &sdata->u.vlan.ap->u.ap.vlans); 227 list_add(&sdata->u.vlan.list, &sdata->u.vlan.ap->u.ap.vlans);
225 /* no need to tell driver */ 228 /* no need to tell driver */
@@ -240,8 +243,8 @@ static int ieee80211_open(struct net_device *dev)
240 sdata->u.sta.flags &= ~IEEE80211_STA_PREV_BSSID_SET; 243 sdata->u.sta.flags &= ~IEEE80211_STA_PREV_BSSID_SET;
241 /* fall through */ 244 /* fall through */
242 default: 245 default:
243 conf.if_id = dev->ifindex; 246 conf.vif = &sdata->vif;
244 conf.type = sdata->type; 247 conf.type = sdata->vif.type;
245 conf.mac_addr = dev->dev_addr; 248 conf.mac_addr = dev->dev_addr;
246 res = local->ops->add_interface(local_to_hw(local), &conf); 249 res = local->ops->add_interface(local_to_hw(local), &conf);
247 if (res && !local->open_count && local->ops->stop) 250 if (res && !local->open_count && local->ops->stop)
@@ -253,7 +256,7 @@ static int ieee80211_open(struct net_device *dev)
253 ieee80211_reset_erp_info(dev); 256 ieee80211_reset_erp_info(dev);
254 ieee80211_enable_keys(sdata); 257 ieee80211_enable_keys(sdata);
255 258
256 if (sdata->type == IEEE80211_IF_TYPE_STA && 259 if (sdata->vif.type == IEEE80211_IF_TYPE_STA &&
257 !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)) 260 !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME))
258 netif_carrier_off(dev); 261 netif_carrier_off(dev);
259 else 262 else
@@ -290,9 +293,20 @@ static int ieee80211_stop(struct net_device *dev)
290 struct ieee80211_sub_if_data *sdata; 293 struct ieee80211_sub_if_data *sdata;
291 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 294 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
292 struct ieee80211_if_init_conf conf; 295 struct ieee80211_if_init_conf conf;
296 struct sta_info *sta;
297 int i;
293 298
294 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 299 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
295 300
301 list_for_each_entry(sta, &local->sta_list, list) {
302 if (sta->dev == dev)
303 for (i = 0; i < STA_TID_NUM; i++)
304 ieee80211_sta_stop_rx_ba_session(sta->dev,
305 sta->addr, i,
306 WLAN_BACK_RECIPIENT,
307 WLAN_REASON_QSTA_LEAVE_QBSS);
308 }
309
296 netif_stop_queue(dev); 310 netif_stop_queue(dev);
297 311
298 /* 312 /*
@@ -309,10 +323,17 @@ static int ieee80211_stop(struct net_device *dev)
309 323
310 dev_mc_unsync(local->mdev, dev); 324 dev_mc_unsync(local->mdev, dev);
311 325
312 /* down all dependent devices, that is VLANs */ 326 /* APs need special treatment */
313 if (sdata->type == IEEE80211_IF_TYPE_AP) { 327 if (sdata->vif.type == IEEE80211_IF_TYPE_AP) {
314 struct ieee80211_sub_if_data *vlan, *tmp; 328 struct ieee80211_sub_if_data *vlan, *tmp;
329 struct beacon_data *old_beacon = sdata->u.ap.beacon;
315 330
331 /* remove beacon */
332 rcu_assign_pointer(sdata->u.ap.beacon, NULL);
333 synchronize_rcu();
334 kfree(old_beacon);
335
336 /* down all dependent devices, that is VLANs */
316 list_for_each_entry_safe(vlan, tmp, &sdata->u.ap.vlans, 337 list_for_each_entry_safe(vlan, tmp, &sdata->u.ap.vlans,
317 u.vlan.list) 338 u.vlan.list)
318 dev_close(vlan->dev); 339 dev_close(vlan->dev);
@@ -321,7 +342,7 @@ static int ieee80211_stop(struct net_device *dev)
321 342
322 local->open_count--; 343 local->open_count--;
323 344
324 switch (sdata->type) { 345 switch (sdata->vif.type) {
325 case IEEE80211_IF_TYPE_VLAN: 346 case IEEE80211_IF_TYPE_VLAN:
326 list_del(&sdata->u.vlan.list); 347 list_del(&sdata->u.vlan.list);
327 sdata->u.vlan.ap = NULL; 348 sdata->u.vlan.ap = NULL;
@@ -350,11 +371,14 @@ static int ieee80211_stop(struct net_device *dev)
350 synchronize_rcu(); 371 synchronize_rcu();
351 skb_queue_purge(&sdata->u.sta.skb_queue); 372 skb_queue_purge(&sdata->u.sta.skb_queue);
352 373
353 if (!local->ops->hw_scan && 374 if (local->scan_dev == sdata->dev) {
354 local->scan_dev == sdata->dev) { 375 if (!local->ops->hw_scan) {
355 local->sta_scanning = 0; 376 local->sta_sw_scanning = 0;
356 cancel_delayed_work(&local->scan_work); 377 cancel_delayed_work(&local->scan_work);
378 } else
379 local->sta_hw_scanning = 0;
357 } 380 }
381
358 flush_workqueue(local->hw.workqueue); 382 flush_workqueue(local->hw.workqueue);
359 383
360 sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; 384 sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED;
@@ -363,8 +387,8 @@ static int ieee80211_stop(struct net_device *dev)
363 sdata->u.sta.extra_ie_len = 0; 387 sdata->u.sta.extra_ie_len = 0;
364 /* fall through */ 388 /* fall through */
365 default: 389 default:
366 conf.if_id = dev->ifindex; 390 conf.vif = &sdata->vif;
367 conf.type = sdata->type; 391 conf.type = sdata->vif.type;
368 conf.mac_addr = dev->dev_addr; 392 conf.mac_addr = dev->dev_addr;
369 /* disable all keys for as long as this netdev is down */ 393 /* disable all keys for as long as this netdev is down */
370 ieee80211_disable_keys(sdata); 394 ieee80211_disable_keys(sdata);
@@ -378,6 +402,8 @@ static int ieee80211_stop(struct net_device *dev)
378 if (local->ops->stop) 402 if (local->ops->stop)
379 local->ops->stop(local_to_hw(local)); 403 local->ops->stop(local_to_hw(local));
380 404
405 ieee80211_led_radio(local, 0);
406
381 tasklet_disable(&local->tx_pending_tasklet); 407 tasklet_disable(&local->tx_pending_tasklet);
382 tasklet_disable(&local->tasklet); 408 tasklet_disable(&local->tasklet);
383 } 409 }
@@ -485,20 +511,20 @@ static int __ieee80211_if_config(struct net_device *dev,
485 return 0; 511 return 0;
486 512
487 memset(&conf, 0, sizeof(conf)); 513 memset(&conf, 0, sizeof(conf));
488 conf.type = sdata->type; 514 conf.type = sdata->vif.type;
489 if (sdata->type == IEEE80211_IF_TYPE_STA || 515 if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
490 sdata->type == IEEE80211_IF_TYPE_IBSS) { 516 sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
491 conf.bssid = sdata->u.sta.bssid; 517 conf.bssid = sdata->u.sta.bssid;
492 conf.ssid = sdata->u.sta.ssid; 518 conf.ssid = sdata->u.sta.ssid;
493 conf.ssid_len = sdata->u.sta.ssid_len; 519 conf.ssid_len = sdata->u.sta.ssid_len;
494 } else if (sdata->type == IEEE80211_IF_TYPE_AP) { 520 } else if (sdata->vif.type == IEEE80211_IF_TYPE_AP) {
495 conf.ssid = sdata->u.ap.ssid; 521 conf.ssid = sdata->u.ap.ssid;
496 conf.ssid_len = sdata->u.ap.ssid_len; 522 conf.ssid_len = sdata->u.ap.ssid_len;
497 conf.beacon = beacon; 523 conf.beacon = beacon;
498 conf.beacon_control = control; 524 conf.beacon_control = control;
499 } 525 }
500 return local->ops->config_interface(local_to_hw(local), 526 return local->ops->config_interface(local_to_hw(local),
501 dev->ifindex, &conf); 527 &sdata->vif, &conf);
502} 528}
503 529
504int ieee80211_if_config(struct net_device *dev) 530int ieee80211_if_config(struct net_device *dev)
@@ -510,11 +536,13 @@ int ieee80211_if_config_beacon(struct net_device *dev)
510{ 536{
511 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 537 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
512 struct ieee80211_tx_control control; 538 struct ieee80211_tx_control control;
539 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
513 struct sk_buff *skb; 540 struct sk_buff *skb;
514 541
515 if (!(local->hw.flags & IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE)) 542 if (!(local->hw.flags & IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE))
516 return 0; 543 return 0;
517 skb = ieee80211_beacon_get(local_to_hw(local), dev->ifindex, &control); 544 skb = ieee80211_beacon_get(local_to_hw(local), &sdata->vif,
545 &control);
518 if (!skb) 546 if (!skb)
519 return -ENOMEM; 547 return -ENOMEM;
520 return __ieee80211_if_config(dev, skb, &control); 548 return __ieee80211_if_config(dev, skb, &control);
@@ -526,7 +554,7 @@ int ieee80211_hw_config(struct ieee80211_local *local)
526 struct ieee80211_channel *chan; 554 struct ieee80211_channel *chan;
527 int ret = 0; 555 int ret = 0;
528 556
529 if (local->sta_scanning) { 557 if (local->sta_sw_scanning) {
530 chan = local->scan_channel; 558 chan = local->scan_channel;
531 mode = local->scan_hw_mode; 559 mode = local->scan_hw_mode;
532 } else { 560 } else {
@@ -560,25 +588,79 @@ int ieee80211_hw_config(struct ieee80211_local *local)
560 return ret; 588 return ret;
561} 589}
562 590
563void ieee80211_erp_info_change_notify(struct net_device *dev, u8 changes) 591/**
592 * ieee80211_hw_config_ht should be used only after legacy configuration
593 * has been determined, as ht configuration depends upon the hardware's
594 * HT abilities for a _specific_ band.
595 */
596int ieee80211_hw_config_ht(struct ieee80211_local *local, int enable_ht,
597 struct ieee80211_ht_info *req_ht_cap,
598 struct ieee80211_ht_bss_info *req_bss_cap)
564{ 599{
565 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 600 struct ieee80211_conf *conf = &local->hw.conf;
566 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 601 struct ieee80211_hw_mode *mode = conf->mode;
567 if (local->ops->erp_ie_changed) 602 int i;
568 local->ops->erp_ie_changed(local_to_hw(local), changes, 603
569 !!(sdata->flags & IEEE80211_SDATA_USE_PROTECTION), 604 /* HT is not supported */
570 !(sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE)); 605 if (!mode->ht_info.ht_supported) {
606 conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE;
607 return -EOPNOTSUPP;
608 }
609
610 /* disable HT */
611 if (!enable_ht) {
612 conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE;
613 } else {
614 conf->flags |= IEEE80211_CONF_SUPPORT_HT_MODE;
615 conf->ht_conf.cap = req_ht_cap->cap & mode->ht_info.cap;
616 conf->ht_conf.cap &= ~(IEEE80211_HT_CAP_MIMO_PS);
617 conf->ht_conf.cap |=
618 mode->ht_info.cap & IEEE80211_HT_CAP_MIMO_PS;
619 conf->ht_bss_conf.primary_channel =
620 req_bss_cap->primary_channel;
621 conf->ht_bss_conf.bss_cap = req_bss_cap->bss_cap;
622 conf->ht_bss_conf.bss_op_mode = req_bss_cap->bss_op_mode;
623 for (i = 0; i < SUPP_MCS_SET_LEN; i++)
624 conf->ht_conf.supp_mcs_set[i] =
625 mode->ht_info.supp_mcs_set[i] &
626 req_ht_cap->supp_mcs_set[i];
627
628 /* In STA mode, this gives us indication
629 * to the AP's mode of operation */
630 conf->ht_conf.ht_supported = 1;
631 conf->ht_conf.ampdu_factor = req_ht_cap->ampdu_factor;
632 conf->ht_conf.ampdu_density = req_ht_cap->ampdu_density;
633 }
634
635 local->ops->conf_ht(local_to_hw(local), &local->hw.conf);
636
637 return 0;
638}
639
640void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
641 u32 changed)
642{
643 struct ieee80211_local *local = sdata->local;
644
645 if (!changed)
646 return;
647
648 if (local->ops->bss_info_changed)
649 local->ops->bss_info_changed(local_to_hw(local),
650 &sdata->vif,
651 &sdata->bss_conf,
652 changed);
571} 653}
572 654
573void ieee80211_reset_erp_info(struct net_device *dev) 655void ieee80211_reset_erp_info(struct net_device *dev)
574{ 656{
575 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 657 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
576 658
577 sdata->flags &= ~(IEEE80211_SDATA_USE_PROTECTION | 659 sdata->bss_conf.use_cts_prot = 0;
578 IEEE80211_SDATA_SHORT_PREAMBLE); 660 sdata->bss_conf.use_short_preamble = 0;
579 ieee80211_erp_info_change_notify(dev, 661 ieee80211_bss_info_change_notify(sdata,
580 IEEE80211_ERP_CHANGE_PROTECTION | 662 BSS_CHANGED_ERP_CTS_PROT |
581 IEEE80211_ERP_CHANGE_PREAMBLE); 663 BSS_CHANGED_ERP_PREAMBLE);
582} 664}
583 665
584void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, 666void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw,
@@ -635,7 +717,7 @@ static void ieee80211_tasklet_handler(unsigned long data)
635 case IEEE80211_RX_MSG: 717 case IEEE80211_RX_MSG:
636 /* status is in skb->cb */ 718 /* status is in skb->cb */
637 memcpy(&rx_status, skb->cb, sizeof(rx_status)); 719 memcpy(&rx_status, skb->cb, sizeof(rx_status));
638 /* Clear skb->type in order to not confuse kernel 720 /* Clear skb->pkt_type in order to not confuse kernel
639 * netstack. */ 721 * netstack. */
640 skb->pkt_type = 0; 722 skb->pkt_type = 0;
641 __ieee80211_rx(local_to_hw(local), skb, &rx_status); 723 __ieee80211_rx(local_to_hw(local), skb, &rx_status);
@@ -670,7 +752,7 @@ static void ieee80211_remove_tx_extra(struct ieee80211_local *local,
670 struct ieee80211_tx_packet_data *pkt_data; 752 struct ieee80211_tx_packet_data *pkt_data;
671 753
672 pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; 754 pkt_data = (struct ieee80211_tx_packet_data *)skb->cb;
673 pkt_data->ifindex = control->ifindex; 755 pkt_data->ifindex = vif_to_sdata(control->vif)->dev->ifindex;
674 pkt_data->flags = 0; 756 pkt_data->flags = 0;
675 if (control->flags & IEEE80211_TXCTL_REQ_TX_STATUS) 757 if (control->flags & IEEE80211_TXCTL_REQ_TX_STATUS)
676 pkt_data->flags |= IEEE80211_TXPD_REQ_TX_STATUS; 758 pkt_data->flags |= IEEE80211_TXPD_REQ_TX_STATUS;
@@ -678,6 +760,8 @@ static void ieee80211_remove_tx_extra(struct ieee80211_local *local,
678 pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT; 760 pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT;
679 if (control->flags & IEEE80211_TXCTL_REQUEUE) 761 if (control->flags & IEEE80211_TXCTL_REQUEUE)
680 pkt_data->flags |= IEEE80211_TXPD_REQUEUE; 762 pkt_data->flags |= IEEE80211_TXPD_REQUEUE;
763 if (control->flags & IEEE80211_TXCTL_EAPOL_FRAME)
764 pkt_data->flags |= IEEE80211_TXPD_EAPOL_FRAME;
681 pkt_data->queue = control->queue; 765 pkt_data->queue = control->queue;
682 766
683 hdrlen = ieee80211_get_hdrlen_from_skb(skb); 767 hdrlen = ieee80211_get_hdrlen_from_skb(skb);
@@ -805,10 +889,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb,
805 sta_info_put(sta); 889 sta_info_put(sta);
806 return; 890 return;
807 } 891 }
808 } else { 892 } else
809 /* FIXME: STUPID to call this with both local and local->mdev */ 893 rate_control_tx_status(local->mdev, skb, status);
810 rate_control_tx_status(local, local->mdev, skb, status);
811 }
812 894
813 ieee80211_led_tx(local, 0); 895 ieee80211_led_tx(local, 0);
814 896
@@ -894,7 +976,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb,
894 if (!monitors || !skb) 976 if (!monitors || !skb)
895 goto out; 977 goto out;
896 978
897 if (sdata->type == IEEE80211_IF_TYPE_MNTR) { 979 if (sdata->vif.type == IEEE80211_IF_TYPE_MNTR) {
898 if (!netif_running(sdata->dev)) 980 if (!netif_running(sdata->dev))
899 continue; 981 continue;
900 monitors--; 982 monitors--;
@@ -1016,7 +1098,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
1016 mdev->header_ops = &ieee80211_header_ops; 1098 mdev->header_ops = &ieee80211_header_ops;
1017 mdev->set_multicast_list = ieee80211_master_set_multicast_list; 1099 mdev->set_multicast_list = ieee80211_master_set_multicast_list;
1018 1100
1019 sdata->type = IEEE80211_IF_TYPE_AP; 1101 sdata->vif.type = IEEE80211_IF_TYPE_AP;
1020 sdata->dev = mdev; 1102 sdata->dev = mdev;
1021 sdata->local = local; 1103 sdata->local = local;
1022 sdata->u.ap.force_unicast_rateidx = -1; 1104 sdata->u.ap.force_unicast_rateidx = -1;
@@ -1260,33 +1342,38 @@ static int __init ieee80211_init(void)
1260 1342
1261 BUILD_BUG_ON(sizeof(struct ieee80211_tx_packet_data) > sizeof(skb->cb)); 1343 BUILD_BUG_ON(sizeof(struct ieee80211_tx_packet_data) > sizeof(skb->cb));
1262 1344
1263#ifdef CONFIG_MAC80211_RCSIMPLE 1345 ret = rc80211_simple_init();
1264 ret = ieee80211_rate_control_register(&mac80211_rcsimple);
1265 if (ret) 1346 if (ret)
1266 return ret; 1347 goto fail;
1267#endif 1348
1349 ret = rc80211_pid_init();
1350 if (ret)
1351 goto fail_simple;
1268 1352
1269 ret = ieee80211_wme_register(); 1353 ret = ieee80211_wme_register();
1270 if (ret) { 1354 if (ret) {
1271#ifdef CONFIG_MAC80211_RCSIMPLE
1272 ieee80211_rate_control_unregister(&mac80211_rcsimple);
1273#endif
1274 printk(KERN_DEBUG "ieee80211_init: failed to " 1355 printk(KERN_DEBUG "ieee80211_init: failed to "
1275 "initialize WME (err=%d)\n", ret); 1356 "initialize WME (err=%d)\n", ret);
1276 return ret; 1357 goto fail_pid;
1277 } 1358 }
1278 1359
1279 ieee80211_debugfs_netdev_init(); 1360 ieee80211_debugfs_netdev_init();
1280 ieee80211_regdomain_init(); 1361 ieee80211_regdomain_init();
1281 1362
1282 return 0; 1363 return 0;
1364
1365 fail_pid:
1366 rc80211_simple_exit();
1367 fail_simple:
1368 rc80211_pid_exit();
1369 fail:
1370 return ret;
1283} 1371}
1284 1372
1285static void __exit ieee80211_exit(void) 1373static void __exit ieee80211_exit(void)
1286{ 1374{
1287#ifdef CONFIG_MAC80211_RCSIMPLE 1375 rc80211_simple_exit();
1288 ieee80211_rate_control_unregister(&mac80211_rcsimple); 1376 rc80211_pid_exit();
1289#endif
1290 1377
1291 ieee80211_wme_unregister(); 1378 ieee80211_wme_unregister();
1292 ieee80211_debugfs_netdev_exit(); 1379 ieee80211_debugfs_netdev_exit();
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 72e1c93dd87e..72ecbf7bf962 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -89,6 +89,8 @@ struct ieee80211_sta_bss {
89 size_t rsn_ie_len; 89 size_t rsn_ie_len;
90 u8 *wmm_ie; 90 u8 *wmm_ie;
91 size_t wmm_ie_len; 91 size_t wmm_ie_len;
92 u8 *ht_ie;
93 size_t ht_ie_len;
92#define IEEE80211_MAX_SUPP_RATES 32 94#define IEEE80211_MAX_SUPP_RATES 32
93 u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; 95 u8 supp_rates[IEEE80211_MAX_SUPP_RATES];
94 size_t supp_rates_len; 96 size_t supp_rates_len;
@@ -121,6 +123,7 @@ typedef enum {
121/* frame is destined to interface currently processed (incl. multicast frames) */ 123/* frame is destined to interface currently processed (incl. multicast frames) */
122#define IEEE80211_TXRXD_RXRA_MATCH BIT(5) 124#define IEEE80211_TXRXD_RXRA_MATCH BIT(5)
123#define IEEE80211_TXRXD_TX_INJECTED BIT(6) 125#define IEEE80211_TXRXD_TX_INJECTED BIT(6)
126#define IEEE80211_TXRXD_RX_AMSDU BIT(7)
124struct ieee80211_txrx_data { 127struct ieee80211_txrx_data {
125 struct sk_buff *skb; 128 struct sk_buff *skb;
126 struct net_device *dev; 129 struct net_device *dev;
@@ -161,6 +164,7 @@ struct ieee80211_txrx_data {
161#define IEEE80211_TXPD_REQ_TX_STATUS BIT(0) 164#define IEEE80211_TXPD_REQ_TX_STATUS BIT(0)
162#define IEEE80211_TXPD_DO_NOT_ENCRYPT BIT(1) 165#define IEEE80211_TXPD_DO_NOT_ENCRYPT BIT(1)
163#define IEEE80211_TXPD_REQUEUE BIT(2) 166#define IEEE80211_TXPD_REQUEUE BIT(2)
167#define IEEE80211_TXPD_EAPOL_FRAME BIT(3)
164/* Stored in sk_buff->cb */ 168/* Stored in sk_buff->cb */
165struct ieee80211_tx_packet_data { 169struct ieee80211_tx_packet_data {
166 int ifindex; 170 int ifindex;
@@ -186,9 +190,14 @@ typedef ieee80211_txrx_result (*ieee80211_tx_handler)
186typedef ieee80211_txrx_result (*ieee80211_rx_handler) 190typedef ieee80211_txrx_result (*ieee80211_rx_handler)
187(struct ieee80211_txrx_data *rx); 191(struct ieee80211_txrx_data *rx);
188 192
193struct beacon_data {
194 u8 *head, *tail;
195 int head_len, tail_len;
196 int dtim_period;
197};
198
189struct ieee80211_if_ap { 199struct ieee80211_if_ap {
190 u8 *beacon_head, *beacon_tail; 200 struct beacon_data *beacon;
191 int beacon_head_len, beacon_tail_len;
192 201
193 struct list_head vlans; 202 struct list_head vlans;
194 203
@@ -201,7 +210,7 @@ struct ieee80211_if_ap {
201 u8 tim[sizeof(unsigned long) * BITS_TO_LONGS(IEEE80211_MAX_AID + 1)]; 210 u8 tim[sizeof(unsigned long) * BITS_TO_LONGS(IEEE80211_MAX_AID + 1)];
202 atomic_t num_sta_ps; /* number of stations in PS mode */ 211 atomic_t num_sta_ps; /* number of stations in PS mode */
203 struct sk_buff_head ps_bc_buf; 212 struct sk_buff_head ps_bc_buf;
204 int dtim_period, dtim_count; 213 int dtim_count;
205 int force_unicast_rateidx; /* forced TX rateidx for unicast frames */ 214 int force_unicast_rateidx; /* forced TX rateidx for unicast frames */
206 int max_ratectrl_rateidx; /* max TX rateidx for rate control */ 215 int max_ratectrl_rateidx; /* max TX rateidx for rate control */
207 int num_beacons; /* number of TXed beacon frames for this BSS */ 216 int num_beacons; /* number of TXed beacon frames for this BSS */
@@ -282,15 +291,9 @@ struct ieee80211_if_sta {
282/* flags used in struct ieee80211_sub_if_data.flags */ 291/* flags used in struct ieee80211_sub_if_data.flags */
283#define IEEE80211_SDATA_ALLMULTI BIT(0) 292#define IEEE80211_SDATA_ALLMULTI BIT(0)
284#define IEEE80211_SDATA_PROMISC BIT(1) 293#define IEEE80211_SDATA_PROMISC BIT(1)
285#define IEEE80211_SDATA_USE_PROTECTION BIT(2) /* CTS protect ERP frames */ 294#define IEEE80211_SDATA_USERSPACE_MLME BIT(2)
286/* use short preamble with IEEE 802.11b: this flag is set when the AP or beacon
287 * generator reports that there are no present stations that cannot support short
288 * preambles */
289#define IEEE80211_SDATA_SHORT_PREAMBLE BIT(3)
290#define IEEE80211_SDATA_USERSPACE_MLME BIT(4)
291struct ieee80211_sub_if_data { 295struct ieee80211_sub_if_data {
292 struct list_head list; 296 struct list_head list;
293 enum ieee80211_if_types type;
294 297
295 struct wireless_dev wdev; 298 struct wireless_dev wdev;
296 299
@@ -303,11 +306,11 @@ struct ieee80211_sub_if_data {
303 unsigned int flags; 306 unsigned int flags;
304 307
305 int drop_unencrypted; 308 int drop_unencrypted;
306 int eapol; /* 0 = process EAPOL frames as normal data frames, 309 /*
307 * 1 = send EAPOL frames through wlan#ap to hostapd 310 * IEEE 802.1X Port access control in effect,
308 * (default) */ 311 * drop packets to/from unauthorized port
309 int ieee802_1x; /* IEEE 802.1X PAE - drop packet to/from unauthorized 312 */
310 * port */ 313 int ieee802_1x_pac;
311 314
312 u16 sequence; 315 u16 sequence;
313 316
@@ -319,6 +322,15 @@ struct ieee80211_sub_if_data {
319 struct ieee80211_key *keys[NUM_DEFAULT_KEYS]; 322 struct ieee80211_key *keys[NUM_DEFAULT_KEYS];
320 struct ieee80211_key *default_key; 323 struct ieee80211_key *default_key;
321 324
325 /*
326 * BSS configuration for this interface.
327 *
328 * FIXME: I feel bad putting this here when we already have a
329 * bss pointer, but the bss pointer is just wrong when
330 * you have multiple virtual STA mode interfaces...
331 * This needs to be fixed.
332 */
333 struct ieee80211_bss_conf bss_conf;
322 struct ieee80211_if_ap *bss; /* BSS that this device belongs to */ 334 struct ieee80211_if_ap *bss; /* BSS that this device belongs to */
323 335
324 union { 336 union {
@@ -336,8 +348,7 @@ struct ieee80211_sub_if_data {
336 struct { 348 struct {
337 struct dentry *channel_use; 349 struct dentry *channel_use;
338 struct dentry *drop_unencrypted; 350 struct dentry *drop_unencrypted;
339 struct dentry *eapol; 351 struct dentry *ieee802_1x_pac;
340 struct dentry *ieee8021_x;
341 struct dentry *state; 352 struct dentry *state;
342 struct dentry *bssid; 353 struct dentry *bssid;
343 struct dentry *prev_bssid; 354 struct dentry *prev_bssid;
@@ -356,30 +367,24 @@ struct ieee80211_sub_if_data {
356 struct { 367 struct {
357 struct dentry *channel_use; 368 struct dentry *channel_use;
358 struct dentry *drop_unencrypted; 369 struct dentry *drop_unencrypted;
359 struct dentry *eapol; 370 struct dentry *ieee802_1x_pac;
360 struct dentry *ieee8021_x;
361 struct dentry *num_sta_ps; 371 struct dentry *num_sta_ps;
362 struct dentry *dtim_period;
363 struct dentry *dtim_count; 372 struct dentry *dtim_count;
364 struct dentry *num_beacons; 373 struct dentry *num_beacons;
365 struct dentry *force_unicast_rateidx; 374 struct dentry *force_unicast_rateidx;
366 struct dentry *max_ratectrl_rateidx; 375 struct dentry *max_ratectrl_rateidx;
367 struct dentry *num_buffered_multicast; 376 struct dentry *num_buffered_multicast;
368 struct dentry *beacon_head_len;
369 struct dentry *beacon_tail_len;
370 } ap; 377 } ap;
371 struct { 378 struct {
372 struct dentry *channel_use; 379 struct dentry *channel_use;
373 struct dentry *drop_unencrypted; 380 struct dentry *drop_unencrypted;
374 struct dentry *eapol; 381 struct dentry *ieee802_1x_pac;
375 struct dentry *ieee8021_x;
376 struct dentry *peer; 382 struct dentry *peer;
377 } wds; 383 } wds;
378 struct { 384 struct {
379 struct dentry *channel_use; 385 struct dentry *channel_use;
380 struct dentry *drop_unencrypted; 386 struct dentry *drop_unencrypted;
381 struct dentry *eapol; 387 struct dentry *ieee802_1x_pac;
382 struct dentry *ieee8021_x;
383 } vlan; 388 } vlan;
384 struct { 389 struct {
385 struct dentry *mode; 390 struct dentry *mode;
@@ -387,8 +392,16 @@ struct ieee80211_sub_if_data {
387 struct dentry *default_key; 392 struct dentry *default_key;
388 } debugfs; 393 } debugfs;
389#endif 394#endif
395 /* must be last, dynamically sized area in this! */
396 struct ieee80211_vif vif;
390}; 397};
391 398
399static inline
400struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p)
401{
402 return container_of(p, struct ieee80211_sub_if_data, vif);
403}
404
392#define IEEE80211_DEV_TO_SUB_IF(dev) netdev_priv(dev) 405#define IEEE80211_DEV_TO_SUB_IF(dev) netdev_priv(dev)
393 406
394enum { 407enum {
@@ -470,7 +483,8 @@ struct ieee80211_local {
470 483
471 struct list_head interfaces; 484 struct list_head interfaces;
472 485
473 int sta_scanning; 486 bool sta_sw_scanning;
487 bool sta_hw_scanning;
474 int scan_channel_idx; 488 int scan_channel_idx;
475 enum { SCAN_SET_CHANNEL, SCAN_SEND_PROBE } scan_state; 489 enum { SCAN_SET_CHANNEL, SCAN_SEND_PROBE } scan_state;
476 unsigned long last_scan_completed; 490 unsigned long last_scan_completed;
@@ -483,10 +497,6 @@ struct ieee80211_local {
483 struct list_head sta_bss_list; 497 struct list_head sta_bss_list;
484 struct ieee80211_sta_bss *sta_bss_hash[STA_HASH_SIZE]; 498 struct ieee80211_sta_bss *sta_bss_hash[STA_HASH_SIZE];
485 spinlock_t sta_bss_lock; 499 spinlock_t sta_bss_lock;
486#define IEEE80211_SCAN_MATCH_SSID BIT(0)
487#define IEEE80211_SCAN_WPA_ONLY BIT(1)
488#define IEEE80211_SCAN_EXTRA_INFO BIT(2)
489 int scan_flags;
490 500
491 /* SNMP counters */ 501 /* SNMP counters */
492 /* dot11CountersTable */ 502 /* dot11CountersTable */
@@ -503,8 +513,9 @@ struct ieee80211_local {
503 513
504#ifdef CONFIG_MAC80211_LEDS 514#ifdef CONFIG_MAC80211_LEDS
505 int tx_led_counter, rx_led_counter; 515 int tx_led_counter, rx_led_counter;
506 struct led_trigger *tx_led, *rx_led, *assoc_led; 516 struct led_trigger *tx_led, *rx_led, *assoc_led, *radio_led;
507 char tx_led_name[32], rx_led_name[32], assoc_led_name[32]; 517 char tx_led_name[32], rx_led_name[32],
518 assoc_led_name[32], radio_led_name[32];
508#endif 519#endif
509 520
510 u32 channel_use; 521 u32 channel_use;
@@ -708,6 +719,9 @@ int ieee80211_if_update_wds(struct net_device *dev, u8 *remote_addr);
708void ieee80211_if_setup(struct net_device *dev); 719void ieee80211_if_setup(struct net_device *dev);
709struct ieee80211_rate *ieee80211_get_rate(struct ieee80211_local *local, 720struct ieee80211_rate *ieee80211_get_rate(struct ieee80211_local *local,
710 int phymode, int hwrate); 721 int phymode, int hwrate);
722int ieee80211_hw_config_ht(struct ieee80211_local *local, int enable_ht,
723 struct ieee80211_ht_info *req_ht_cap,
724 struct ieee80211_ht_bss_info *req_bss_cap);
711 725
712/* ieee80211_ioctl.c */ 726/* ieee80211_ioctl.c */
713extern const struct iw_handler_def ieee80211_iw_handler_def; 727extern const struct iw_handler_def ieee80211_iw_handler_def;
@@ -749,7 +763,8 @@ int ieee80211_sta_req_scan(struct net_device *dev, u8 *ssid, size_t ssid_len);
749void ieee80211_sta_req_auth(struct net_device *dev, 763void ieee80211_sta_req_auth(struct net_device *dev,
750 struct ieee80211_if_sta *ifsta); 764 struct ieee80211_if_sta *ifsta);
751int ieee80211_sta_scan_results(struct net_device *dev, char *buf, size_t len); 765int ieee80211_sta_scan_results(struct net_device *dev, char *buf, size_t len);
752void ieee80211_sta_rx_scan(struct net_device *dev, struct sk_buff *skb, 766ieee80211_txrx_result ieee80211_sta_rx_scan(struct net_device *dev,
767 struct sk_buff *skb,
753 struct ieee80211_rx_status *rx_status); 768 struct ieee80211_rx_status *rx_status);
754void ieee80211_rx_bss_list_init(struct net_device *dev); 769void ieee80211_rx_bss_list_init(struct net_device *dev);
755void ieee80211_rx_bss_list_deinit(struct net_device *dev); 770void ieee80211_rx_bss_list_deinit(struct net_device *dev);
@@ -759,9 +774,17 @@ struct sta_info * ieee80211_ibss_add_sta(struct net_device *dev,
759 u8 *addr); 774 u8 *addr);
760int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason); 775int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason);
761int ieee80211_sta_disassociate(struct net_device *dev, u16 reason); 776int ieee80211_sta_disassociate(struct net_device *dev, u16 reason);
762void ieee80211_erp_info_change_notify(struct net_device *dev, u8 changes); 777void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
778 u32 changed);
763void ieee80211_reset_erp_info(struct net_device *dev); 779void ieee80211_reset_erp_info(struct net_device *dev);
764 780int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie,
781 struct ieee80211_ht_info *ht_info);
782int ieee80211_ht_addt_info_ie_to_ht_bss_info(
783 struct ieee80211_ht_addt_info *ht_add_info_ie,
784 struct ieee80211_ht_bss_info *bss_info);
785void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *da,
786 u16 tid, u16 initiator, u16 reason);
787void sta_rx_agg_session_timer_expired(unsigned long data);
765/* ieee80211_iface.c */ 788/* ieee80211_iface.c */
766int ieee80211_if_add(struct net_device *dev, const char *name, 789int ieee80211_if_add(struct net_device *dev, const char *name,
767 struct net_device **new_dev, int type); 790 struct net_device **new_dev, int type);
@@ -793,8 +816,8 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev);
793extern void *mac80211_wiphy_privid; /* for wiphy privid */ 816extern void *mac80211_wiphy_privid; /* for wiphy privid */
794extern const unsigned char rfc1042_header[6]; 817extern const unsigned char rfc1042_header[6];
795extern const unsigned char bridge_tunnel_header[6]; 818extern const unsigned char bridge_tunnel_header[6];
796u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len); 819u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
797int ieee80211_is_eapol(const struct sk_buff *skb); 820 enum ieee80211_if_types type);
798int ieee80211_frame_duration(struct ieee80211_local *local, size_t len, 821int ieee80211_frame_duration(struct ieee80211_local *local, size_t len,
799 int rate, int erp, int short_preamble); 822 int rate, int erp, int short_preamble);
800void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx, 823void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx,
diff --git a/net/mac80211/ieee80211_iface.c b/net/mac80211/ieee80211_iface.c
index 43e505d29452..92f1eb2da311 100644
--- a/net/mac80211/ieee80211_iface.c
+++ b/net/mac80211/ieee80211_iface.c
@@ -22,7 +22,6 @@ void ieee80211_if_sdata_init(struct ieee80211_sub_if_data *sdata)
22 22
23 /* Default values for sub-interface parameters */ 23 /* Default values for sub-interface parameters */
24 sdata->drop_unencrypted = 0; 24 sdata->drop_unencrypted = 0;
25 sdata->eapol = 1;
26 for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) 25 for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
27 skb_queue_head_init(&sdata->fragments[i].skb_list); 26 skb_queue_head_init(&sdata->fragments[i].skb_list);
28 27
@@ -48,7 +47,7 @@ int ieee80211_if_add(struct net_device *dev, const char *name,
48 int ret; 47 int ret;
49 48
50 ASSERT_RTNL(); 49 ASSERT_RTNL();
51 ndev = alloc_netdev(sizeof(struct ieee80211_sub_if_data), 50 ndev = alloc_netdev(sizeof(*sdata) + local->hw.vif_data_size,
52 name, ieee80211_if_setup); 51 name, ieee80211_if_setup);
53 if (!ndev) 52 if (!ndev)
54 return -ENOMEM; 53 return -ENOMEM;
@@ -67,7 +66,7 @@ int ieee80211_if_add(struct net_device *dev, const char *name,
67 sdata = IEEE80211_DEV_TO_SUB_IF(ndev); 66 sdata = IEEE80211_DEV_TO_SUB_IF(ndev);
68 ndev->ieee80211_ptr = &sdata->wdev; 67 ndev->ieee80211_ptr = &sdata->wdev;
69 sdata->wdev.wiphy = local->hw.wiphy; 68 sdata->wdev.wiphy = local->hw.wiphy;
70 sdata->type = IEEE80211_IF_TYPE_AP; 69 sdata->vif.type = IEEE80211_IF_TYPE_AP;
71 sdata->dev = ndev; 70 sdata->dev = ndev;
72 sdata->local = local; 71 sdata->local = local;
73 ieee80211_if_sdata_init(sdata); 72 ieee80211_if_sdata_init(sdata);
@@ -99,7 +98,7 @@ fail:
99void ieee80211_if_set_type(struct net_device *dev, int type) 98void ieee80211_if_set_type(struct net_device *dev, int type)
100{ 99{
101 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 100 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
102 int oldtype = sdata->type; 101 int oldtype = sdata->vif.type;
103 102
104 /* 103 /*
105 * We need to call this function on the master interface 104 * We need to call this function on the master interface
@@ -117,7 +116,7 @@ void ieee80211_if_set_type(struct net_device *dev, int type)
117 116
118 /* most have no BSS pointer */ 117 /* most have no BSS pointer */
119 sdata->bss = NULL; 118 sdata->bss = NULL;
120 sdata->type = type; 119 sdata->vif.type = type;
121 120
122 switch (type) { 121 switch (type) {
123 case IEEE80211_IF_TYPE_WDS: 122 case IEEE80211_IF_TYPE_WDS:
@@ -127,7 +126,6 @@ void ieee80211_if_set_type(struct net_device *dev, int type)
127 sdata->u.vlan.ap = NULL; 126 sdata->u.vlan.ap = NULL;
128 break; 127 break;
129 case IEEE80211_IF_TYPE_AP: 128 case IEEE80211_IF_TYPE_AP:
130 sdata->u.ap.dtim_period = 2;
131 sdata->u.ap.force_unicast_rateidx = -1; 129 sdata->u.ap.force_unicast_rateidx = -1;
132 sdata->u.ap.max_ratectrl_rateidx = -1; 130 sdata->u.ap.max_ratectrl_rateidx = -1;
133 skb_queue_head_init(&sdata->u.ap.ps_bc_buf); 131 skb_queue_head_init(&sdata->u.ap.ps_bc_buf);
@@ -182,7 +180,7 @@ void ieee80211_if_reinit(struct net_device *dev)
182 180
183 ieee80211_if_sdata_deinit(sdata); 181 ieee80211_if_sdata_deinit(sdata);
184 182
185 switch (sdata->type) { 183 switch (sdata->vif.type) {
186 case IEEE80211_IF_TYPE_INVALID: 184 case IEEE80211_IF_TYPE_INVALID:
187 /* cannot happen */ 185 /* cannot happen */
188 WARN_ON(1); 186 WARN_ON(1);
@@ -208,8 +206,7 @@ void ieee80211_if_reinit(struct net_device *dev)
208 } 206 }
209 } 207 }
210 208
211 kfree(sdata->u.ap.beacon_head); 209 kfree(sdata->u.ap.beacon);
212 kfree(sdata->u.ap.beacon_tail);
213 210
214 while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) { 211 while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) {
215 local->total_ps_buffered--; 212 local->total_ps_buffered--;
@@ -280,7 +277,7 @@ int ieee80211_if_remove(struct net_device *dev, const char *name, int id)
280 ASSERT_RTNL(); 277 ASSERT_RTNL();
281 278
282 list_for_each_entry_safe(sdata, n, &local->interfaces, list) { 279 list_for_each_entry_safe(sdata, n, &local->interfaces, list) {
283 if ((sdata->type == id || id == -1) && 280 if ((sdata->vif.type == id || id == -1) &&
284 strcmp(name, sdata->dev->name) == 0 && 281 strcmp(name, sdata->dev->name) == 0 &&
285 sdata->dev != local->mdev) { 282 sdata->dev != local->mdev) {
286 list_del_rcu(&sdata->list); 283 list_del_rcu(&sdata->list);
diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c
index 308bbe4a1333..5024d3733834 100644
--- a/net/mac80211/ieee80211_ioctl.c
+++ b/net/mac80211/ieee80211_ioctl.c
@@ -21,6 +21,7 @@
21 21
22#include <net/mac80211.h> 22#include <net/mac80211.h>
23#include "ieee80211_i.h" 23#include "ieee80211_i.h"
24#include "ieee80211_led.h"
24#include "ieee80211_rate.h" 25#include "ieee80211_rate.h"
25#include "wpa.h" 26#include "wpa.h"
26#include "aes_ccm.h" 27#include "aes_ccm.h"
@@ -111,8 +112,8 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev,
111 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) 112 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)
112 return -EOPNOTSUPP; 113 return -EOPNOTSUPP;
113 114
114 if (sdata->type == IEEE80211_IF_TYPE_STA || 115 if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
115 sdata->type == IEEE80211_IF_TYPE_IBSS) { 116 sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
116 int ret = ieee80211_sta_set_extra_ie(dev, extra, data->length); 117 int ret = ieee80211_sta_set_extra_ie(dev, extra, data->length);
117 if (ret) 118 if (ret)
118 return ret; 119 return ret;
@@ -218,6 +219,8 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev,
218 IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP); 219 IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP);
219 IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN); 220 IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN);
220 221
222 range->scan_capa |= IW_SCAN_CAPA_ESSID;
223
221 return 0; 224 return 0;
222} 225}
223 226
@@ -229,7 +232,7 @@ static int ieee80211_ioctl_siwmode(struct net_device *dev,
229 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 232 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
230 int type; 233 int type;
231 234
232 if (sdata->type == IEEE80211_IF_TYPE_VLAN) 235 if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN)
233 return -EOPNOTSUPP; 236 return -EOPNOTSUPP;
234 237
235 switch (*mode) { 238 switch (*mode) {
@@ -246,7 +249,7 @@ static int ieee80211_ioctl_siwmode(struct net_device *dev,
246 return -EINVAL; 249 return -EINVAL;
247 } 250 }
248 251
249 if (type == sdata->type) 252 if (type == sdata->vif.type)
250 return 0; 253 return 0;
251 if (netif_running(dev)) 254 if (netif_running(dev))
252 return -EBUSY; 255 return -EBUSY;
@@ -265,7 +268,7 @@ static int ieee80211_ioctl_giwmode(struct net_device *dev,
265 struct ieee80211_sub_if_data *sdata; 268 struct ieee80211_sub_if_data *sdata;
266 269
267 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 270 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
268 switch (sdata->type) { 271 switch (sdata->vif.type) {
269 case IEEE80211_IF_TYPE_AP: 272 case IEEE80211_IF_TYPE_AP:
270 *mode = IW_MODE_MASTER; 273 *mode = IW_MODE_MASTER;
271 break; 274 break;
@@ -315,7 +318,7 @@ int ieee80211_set_channel(struct ieee80211_local *local, int channel, int freq)
315 } 318 }
316 319
317 if (set) { 320 if (set) {
318 if (local->sta_scanning) 321 if (local->sta_sw_scanning)
319 ret = 0; 322 ret = 0;
320 else 323 else
321 ret = ieee80211_hw_config(local); 324 ret = ieee80211_hw_config(local);
@@ -333,13 +336,13 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev,
333 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 336 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
334 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 337 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
335 338
336 if (sdata->type == IEEE80211_IF_TYPE_STA) 339 if (sdata->vif.type == IEEE80211_IF_TYPE_STA)
337 sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_CHANNEL_SEL; 340 sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_CHANNEL_SEL;
338 341
339 /* freq->e == 0: freq->m = channel; otherwise freq = m * 10^e */ 342 /* freq->e == 0: freq->m = channel; otherwise freq = m * 10^e */
340 if (freq->e == 0) { 343 if (freq->e == 0) {
341 if (freq->m < 0) { 344 if (freq->m < 0) {
342 if (sdata->type == IEEE80211_IF_TYPE_STA) 345 if (sdata->vif.type == IEEE80211_IF_TYPE_STA)
343 sdata->u.sta.flags |= 346 sdata->u.sta.flags |=
344 IEEE80211_STA_AUTO_CHANNEL_SEL; 347 IEEE80211_STA_AUTO_CHANNEL_SEL;
345 return 0; 348 return 0;
@@ -385,8 +388,8 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev,
385 len--; 388 len--;
386 389
387 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 390 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
388 if (sdata->type == IEEE80211_IF_TYPE_STA || 391 if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
389 sdata->type == IEEE80211_IF_TYPE_IBSS) { 392 sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
390 int ret; 393 int ret;
391 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { 394 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) {
392 if (len > IEEE80211_MAX_SSID_LEN) 395 if (len > IEEE80211_MAX_SSID_LEN)
@@ -406,7 +409,7 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev,
406 return 0; 409 return 0;
407 } 410 }
408 411
409 if (sdata->type == IEEE80211_IF_TYPE_AP) { 412 if (sdata->vif.type == IEEE80211_IF_TYPE_AP) {
410 memcpy(sdata->u.ap.ssid, ssid, len); 413 memcpy(sdata->u.ap.ssid, ssid, len);
411 memset(sdata->u.ap.ssid + len, 0, 414 memset(sdata->u.ap.ssid + len, 0,
412 IEEE80211_MAX_SSID_LEN - len); 415 IEEE80211_MAX_SSID_LEN - len);
@@ -425,8 +428,8 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev,
425 428
426 struct ieee80211_sub_if_data *sdata; 429 struct ieee80211_sub_if_data *sdata;
427 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 430 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
428 if (sdata->type == IEEE80211_IF_TYPE_STA || 431 if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
429 sdata->type == IEEE80211_IF_TYPE_IBSS) { 432 sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
430 int res = ieee80211_sta_get_ssid(dev, ssid, &len); 433 int res = ieee80211_sta_get_ssid(dev, ssid, &len);
431 if (res == 0) { 434 if (res == 0) {
432 data->length = len; 435 data->length = len;
@@ -436,7 +439,7 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev,
436 return res; 439 return res;
437 } 440 }
438 441
439 if (sdata->type == IEEE80211_IF_TYPE_AP) { 442 if (sdata->vif.type == IEEE80211_IF_TYPE_AP) {
440 len = sdata->u.ap.ssid_len; 443 len = sdata->u.ap.ssid_len;
441 if (len > IW_ESSID_MAX_SIZE) 444 if (len > IW_ESSID_MAX_SIZE)
442 len = IW_ESSID_MAX_SIZE; 445 len = IW_ESSID_MAX_SIZE;
@@ -456,8 +459,8 @@ static int ieee80211_ioctl_siwap(struct net_device *dev,
456 struct ieee80211_sub_if_data *sdata; 459 struct ieee80211_sub_if_data *sdata;
457 460
458 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 461 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
459 if (sdata->type == IEEE80211_IF_TYPE_STA || 462 if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
460 sdata->type == IEEE80211_IF_TYPE_IBSS) { 463 sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
461 int ret; 464 int ret;
462 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { 465 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) {
463 memcpy(sdata->u.sta.bssid, (u8 *) &ap_addr->sa_data, 466 memcpy(sdata->u.sta.bssid, (u8 *) &ap_addr->sa_data,
@@ -476,7 +479,7 @@ static int ieee80211_ioctl_siwap(struct net_device *dev,
476 return ret; 479 return ret;
477 ieee80211_sta_req_auth(dev, &sdata->u.sta); 480 ieee80211_sta_req_auth(dev, &sdata->u.sta);
478 return 0; 481 return 0;
479 } else if (sdata->type == IEEE80211_IF_TYPE_WDS) { 482 } else if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) {
480 if (memcmp(sdata->u.wds.remote_addr, (u8 *) &ap_addr->sa_data, 483 if (memcmp(sdata->u.wds.remote_addr, (u8 *) &ap_addr->sa_data,
481 ETH_ALEN) == 0) 484 ETH_ALEN) == 0)
482 return 0; 485 return 0;
@@ -494,12 +497,12 @@ static int ieee80211_ioctl_giwap(struct net_device *dev,
494 struct ieee80211_sub_if_data *sdata; 497 struct ieee80211_sub_if_data *sdata;
495 498
496 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 499 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
497 if (sdata->type == IEEE80211_IF_TYPE_STA || 500 if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
498 sdata->type == IEEE80211_IF_TYPE_IBSS) { 501 sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
499 ap_addr->sa_family = ARPHRD_ETHER; 502 ap_addr->sa_family = ARPHRD_ETHER;
500 memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN); 503 memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN);
501 return 0; 504 return 0;
502 } else if (sdata->type == IEEE80211_IF_TYPE_WDS) { 505 } else if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) {
503 ap_addr->sa_family = ARPHRD_ETHER; 506 ap_addr->sa_family = ARPHRD_ETHER;
504 memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN); 507 memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN);
505 return 0; 508 return 0;
@@ -513,7 +516,6 @@ static int ieee80211_ioctl_siwscan(struct net_device *dev,
513 struct iw_request_info *info, 516 struct iw_request_info *info,
514 union iwreq_data *wrqu, char *extra) 517 union iwreq_data *wrqu, char *extra)
515{ 518{
516 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
517 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 519 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
518 struct iw_scan_req *req = NULL; 520 struct iw_scan_req *req = NULL;
519 u8 *ssid = NULL; 521 u8 *ssid = NULL;
@@ -522,23 +524,10 @@ static int ieee80211_ioctl_siwscan(struct net_device *dev,
522 if (!netif_running(dev)) 524 if (!netif_running(dev))
523 return -ENETDOWN; 525 return -ENETDOWN;
524 526
525 switch (sdata->type) { 527 if (sdata->vif.type != IEEE80211_IF_TYPE_STA &&
526 case IEEE80211_IF_TYPE_STA: 528 sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
527 case IEEE80211_IF_TYPE_IBSS: 529 sdata->vif.type != IEEE80211_IF_TYPE_AP)
528 if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) {
529 ssid = sdata->u.sta.ssid;
530 ssid_len = sdata->u.sta.ssid_len;
531 }
532 break;
533 case IEEE80211_IF_TYPE_AP:
534 if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) {
535 ssid = sdata->u.ap.ssid;
536 ssid_len = sdata->u.ap.ssid_len;
537 }
538 break;
539 default:
540 return -EOPNOTSUPP; 530 return -EOPNOTSUPP;
541 }
542 531
543 /* if SSID was specified explicitly then use that */ 532 /* if SSID was specified explicitly then use that */
544 if (wrqu->data.length == sizeof(struct iw_scan_req) && 533 if (wrqu->data.length == sizeof(struct iw_scan_req) &&
@@ -558,8 +547,10 @@ static int ieee80211_ioctl_giwscan(struct net_device *dev,
558{ 547{
559 int res; 548 int res;
560 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 549 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
561 if (local->sta_scanning) 550
551 if (local->sta_sw_scanning || local->sta_hw_scanning)
562 return -EAGAIN; 552 return -EAGAIN;
553
563 res = ieee80211_sta_scan_results(dev, extra, data->length); 554 res = ieee80211_sta_scan_results(dev, extra, data->length);
564 if (res >= 0) { 555 if (res >= 0) {
565 data->length = res; 556 data->length = res;
@@ -614,7 +605,7 @@ static int ieee80211_ioctl_giwrate(struct net_device *dev,
614 struct ieee80211_sub_if_data *sdata; 605 struct ieee80211_sub_if_data *sdata;
615 606
616 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 607 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
617 if (sdata->type == IEEE80211_IF_TYPE_STA) 608 if (sdata->vif.type == IEEE80211_IF_TYPE_STA)
618 sta = sta_info_get(local, sdata->u.sta.bssid); 609 sta = sta_info_get(local, sdata->u.sta.bssid);
619 else 610 else
620 return -EOPNOTSUPP; 611 return -EOPNOTSUPP;
@@ -634,22 +625,36 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev,
634{ 625{
635 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 626 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
636 bool need_reconfig = 0; 627 bool need_reconfig = 0;
628 u8 new_power_level;
637 629
638 if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM) 630 if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM)
639 return -EINVAL; 631 return -EINVAL;
640 if (data->txpower.flags & IW_TXPOW_RANGE) 632 if (data->txpower.flags & IW_TXPOW_RANGE)
641 return -EINVAL; 633 return -EINVAL;
642 if (!data->txpower.fixed)
643 return -EINVAL;
644 634
645 if (local->hw.conf.power_level != data->txpower.value) { 635 if (data->txpower.fixed) {
646 local->hw.conf.power_level = data->txpower.value; 636 new_power_level = data->txpower.value;
637 } else {
638 /* Automatic power level. Get the px power from the current
639 * channel. */
640 struct ieee80211_channel* chan = local->oper_channel;
641 if (!chan)
642 return -EINVAL;
643
644 new_power_level = chan->power_level;
645 }
646
647 if (local->hw.conf.power_level != new_power_level) {
648 local->hw.conf.power_level = new_power_level;
647 need_reconfig = 1; 649 need_reconfig = 1;
648 } 650 }
651
649 if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) { 652 if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) {
650 local->hw.conf.radio_enabled = !(data->txpower.disabled); 653 local->hw.conf.radio_enabled = !(data->txpower.disabled);
651 need_reconfig = 1; 654 need_reconfig = 1;
655 ieee80211_led_radio(local, local->hw.conf.radio_enabled);
652 } 656 }
657
653 if (need_reconfig) { 658 if (need_reconfig) {
654 ieee80211_hw_config(local); 659 ieee80211_hw_config(local);
655 /* The return value of hw_config is not of big interest here, 660 /* The return value of hw_config is not of big interest here,
@@ -814,8 +819,8 @@ static int ieee80211_ioctl_siwmlme(struct net_device *dev,
814 struct iw_mlme *mlme = (struct iw_mlme *) extra; 819 struct iw_mlme *mlme = (struct iw_mlme *) extra;
815 820
816 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 821 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
817 if (sdata->type != IEEE80211_IF_TYPE_STA && 822 if (sdata->vif.type != IEEE80211_IF_TYPE_STA &&
818 sdata->type != IEEE80211_IF_TYPE_IBSS) 823 sdata->vif.type != IEEE80211_IF_TYPE_IBSS)
819 return -EINVAL; 824 return -EINVAL;
820 825
821 switch (mlme->cmd) { 826 switch (mlme->cmd) {
@@ -928,8 +933,11 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev,
928 case IW_AUTH_RX_UNENCRYPTED_EAPOL: 933 case IW_AUTH_RX_UNENCRYPTED_EAPOL:
929 case IW_AUTH_KEY_MGMT: 934 case IW_AUTH_KEY_MGMT:
930 break; 935 break;
936 case IW_AUTH_DROP_UNENCRYPTED:
937 sdata->drop_unencrypted = !!data->value;
938 break;
931 case IW_AUTH_PRIVACY_INVOKED: 939 case IW_AUTH_PRIVACY_INVOKED:
932 if (sdata->type != IEEE80211_IF_TYPE_STA) 940 if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
933 ret = -EINVAL; 941 ret = -EINVAL;
934 else { 942 else {
935 sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; 943 sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED;
@@ -944,8 +952,8 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev,
944 } 952 }
945 break; 953 break;
946 case IW_AUTH_80211_AUTH_ALG: 954 case IW_AUTH_80211_AUTH_ALG:
947 if (sdata->type == IEEE80211_IF_TYPE_STA || 955 if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
948 sdata->type == IEEE80211_IF_TYPE_IBSS) 956 sdata->vif.type == IEEE80211_IF_TYPE_IBSS)
949 sdata->u.sta.auth_algs = data->value; 957 sdata->u.sta.auth_algs = data->value;
950 else 958 else
951 ret = -EOPNOTSUPP; 959 ret = -EOPNOTSUPP;
@@ -965,8 +973,8 @@ static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev
965 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 973 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
966 struct sta_info *sta = NULL; 974 struct sta_info *sta = NULL;
967 975
968 if (sdata->type == IEEE80211_IF_TYPE_STA || 976 if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
969 sdata->type == IEEE80211_IF_TYPE_IBSS) 977 sdata->vif.type == IEEE80211_IF_TYPE_IBSS)
970 sta = sta_info_get(local, sdata->u.sta.bssid); 978 sta = sta_info_get(local, sdata->u.sta.bssid);
971 if (!sta) { 979 if (!sta) {
972 wstats->discard.fragment = 0; 980 wstats->discard.fragment = 0;
@@ -994,8 +1002,8 @@ static int ieee80211_ioctl_giwauth(struct net_device *dev,
994 1002
995 switch (data->flags & IW_AUTH_INDEX) { 1003 switch (data->flags & IW_AUTH_INDEX) {
996 case IW_AUTH_80211_AUTH_ALG: 1004 case IW_AUTH_80211_AUTH_ALG:
997 if (sdata->type == IEEE80211_IF_TYPE_STA || 1005 if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
998 sdata->type == IEEE80211_IF_TYPE_IBSS) 1006 sdata->vif.type == IEEE80211_IF_TYPE_IBSS)
999 data->value = sdata->u.sta.auth_algs; 1007 data->value = sdata->u.sta.auth_algs;
1000 else 1008 else
1001 ret = -EOPNOTSUPP; 1009 ret = -EOPNOTSUPP;
diff --git a/net/mac80211/ieee80211_led.c b/net/mac80211/ieee80211_led.c
index 4cf89af9d100..f401484ab6d7 100644
--- a/net/mac80211/ieee80211_led.c
+++ b/net/mac80211/ieee80211_led.c
@@ -43,6 +43,16 @@ void ieee80211_led_assoc(struct ieee80211_local *local, bool associated)
43 led_trigger_event(local->assoc_led, LED_OFF); 43 led_trigger_event(local->assoc_led, LED_OFF);
44} 44}
45 45
46void ieee80211_led_radio(struct ieee80211_local *local, bool enabled)
47{
48 if (unlikely(!local->radio_led))
49 return;
50 if (enabled)
51 led_trigger_event(local->radio_led, LED_FULL);
52 else
53 led_trigger_event(local->radio_led, LED_OFF);
54}
55
46void ieee80211_led_init(struct ieee80211_local *local) 56void ieee80211_led_init(struct ieee80211_local *local)
47{ 57{
48 local->rx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); 58 local->rx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
@@ -77,10 +87,25 @@ void ieee80211_led_init(struct ieee80211_local *local)
77 local->assoc_led = NULL; 87 local->assoc_led = NULL;
78 } 88 }
79 } 89 }
90
91 local->radio_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
92 if (local->radio_led) {
93 snprintf(local->radio_led_name, sizeof(local->radio_led_name),
94 "%sradio", wiphy_name(local->hw.wiphy));
95 local->radio_led->name = local->radio_led_name;
96 if (led_trigger_register(local->radio_led)) {
97 kfree(local->radio_led);
98 local->radio_led = NULL;
99 }
100 }
80} 101}
81 102
82void ieee80211_led_exit(struct ieee80211_local *local) 103void ieee80211_led_exit(struct ieee80211_local *local)
83{ 104{
105 if (local->radio_led) {
106 led_trigger_unregister(local->radio_led);
107 kfree(local->radio_led);
108 }
84 if (local->assoc_led) { 109 if (local->assoc_led) {
85 led_trigger_unregister(local->assoc_led); 110 led_trigger_unregister(local->assoc_led);
86 kfree(local->assoc_led); 111 kfree(local->assoc_led);
@@ -95,6 +120,16 @@ void ieee80211_led_exit(struct ieee80211_local *local)
95 } 120 }
96} 121}
97 122
123char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw)
124{
125 struct ieee80211_local *local = hw_to_local(hw);
126
127 if (local->radio_led)
128 return local->radio_led_name;
129 return NULL;
130}
131EXPORT_SYMBOL(__ieee80211_get_radio_led_name);
132
98char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) 133char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw)
99{ 134{
100 struct ieee80211_local *local = hw_to_local(hw); 135 struct ieee80211_local *local = hw_to_local(hw);
diff --git a/net/mac80211/ieee80211_led.h b/net/mac80211/ieee80211_led.h
index 0feb22619835..77b1e1ba6039 100644
--- a/net/mac80211/ieee80211_led.h
+++ b/net/mac80211/ieee80211_led.h
@@ -16,6 +16,8 @@ extern void ieee80211_led_rx(struct ieee80211_local *local);
16extern void ieee80211_led_tx(struct ieee80211_local *local, int q); 16extern void ieee80211_led_tx(struct ieee80211_local *local, int q);
17extern void ieee80211_led_assoc(struct ieee80211_local *local, 17extern void ieee80211_led_assoc(struct ieee80211_local *local,
18 bool associated); 18 bool associated);
19extern void ieee80211_led_radio(struct ieee80211_local *local,
20 bool enabled);
19extern void ieee80211_led_init(struct ieee80211_local *local); 21extern void ieee80211_led_init(struct ieee80211_local *local);
20extern void ieee80211_led_exit(struct ieee80211_local *local); 22extern void ieee80211_led_exit(struct ieee80211_local *local);
21#else 23#else
@@ -29,6 +31,10 @@ static inline void ieee80211_led_assoc(struct ieee80211_local *local,
29 bool associated) 31 bool associated)
30{ 32{
31} 33}
34static inline void ieee80211_led_radio(struct ieee80211_local *local,
35 bool enabled)
36{
37}
32static inline void ieee80211_led_init(struct ieee80211_local *local) 38static inline void ieee80211_led_init(struct ieee80211_local *local)
33{ 39{
34} 40}
diff --git a/net/mac80211/ieee80211_rate.c b/net/mac80211/ieee80211_rate.c
index c3f278393741..b957e67c5fba 100644
--- a/net/mac80211/ieee80211_rate.c
+++ b/net/mac80211/ieee80211_rate.c
@@ -21,6 +21,11 @@ struct rate_control_alg {
21static LIST_HEAD(rate_ctrl_algs); 21static LIST_HEAD(rate_ctrl_algs);
22static DEFINE_MUTEX(rate_ctrl_mutex); 22static DEFINE_MUTEX(rate_ctrl_mutex);
23 23
24static char *ieee80211_default_rc_algo = CONFIG_MAC80211_RC_DEFAULT;
25module_param(ieee80211_default_rc_algo, charp, 0644);
26MODULE_PARM_DESC(ieee80211_default_rc_algo,
27 "Default rate control algorithm for mac80211 to use");
28
24int ieee80211_rate_control_register(struct rate_control_ops *ops) 29int ieee80211_rate_control_register(struct rate_control_ops *ops)
25{ 30{
26 struct rate_control_alg *alg; 31 struct rate_control_alg *alg;
@@ -89,21 +94,31 @@ ieee80211_try_rate_control_ops_get(const char *name)
89 return ops; 94 return ops;
90} 95}
91 96
92/* Get the rate control algorithm. If `name' is NULL, get the first 97/* Get the rate control algorithm. */
93 * available algorithm. */
94static struct rate_control_ops * 98static struct rate_control_ops *
95ieee80211_rate_control_ops_get(const char *name) 99ieee80211_rate_control_ops_get(const char *name)
96{ 100{
97 struct rate_control_ops *ops; 101 struct rate_control_ops *ops;
102 const char *alg_name;
98 103
99 if (!name) 104 if (!name)
100 name = "simple"; 105 alg_name = ieee80211_default_rc_algo;
106 else
107 alg_name = name;
101 108
102 ops = ieee80211_try_rate_control_ops_get(name); 109 ops = ieee80211_try_rate_control_ops_get(alg_name);
103 if (!ops) { 110 if (!ops) {
104 request_module("rc80211_%s", name); 111 request_module("rc80211_%s", alg_name);
105 ops = ieee80211_try_rate_control_ops_get(name); 112 ops = ieee80211_try_rate_control_ops_get(alg_name);
106 } 113 }
114 if (!ops && name)
115 /* try default if specific alg requested but not found */
116 ops = ieee80211_try_rate_control_ops_get(ieee80211_default_rc_algo);
117
118 /* try built-in one if specific alg requested but not found */
119 if (!ops && strlen(CONFIG_MAC80211_RC_DEFAULT))
120 ops = ieee80211_try_rate_control_ops_get(CONFIG_MAC80211_RC_DEFAULT);
121
107 return ops; 122 return ops;
108} 123}
109 124
@@ -147,6 +162,37 @@ static void rate_control_release(struct kref *kref)
147 kfree(ctrl_ref); 162 kfree(ctrl_ref);
148} 163}
149 164
165void rate_control_get_rate(struct net_device *dev,
166 struct ieee80211_hw_mode *mode, struct sk_buff *skb,
167 struct rate_selection *sel)
168{
169 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
170 struct rate_control_ref *ref = local->rate_ctrl;
171 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
172 struct sta_info *sta = sta_info_get(local, hdr->addr1);
173 int i;
174
175 memset(sel, 0, sizeof(struct rate_selection));
176
177 ref->ops->get_rate(ref->priv, dev, mode, skb, sel);
178
179 /* Select a non-ERP backup rate. */
180 if (!sel->nonerp) {
181 for (i = 0; i < mode->num_rates - 1; i++) {
182 struct ieee80211_rate *rate = &mode->rates[i];
183 if (sel->rate->rate < rate->rate)
184 break;
185
186 if (rate_supported(sta, mode, i) &&
187 !(rate->flags & IEEE80211_RATE_ERP))
188 sel->nonerp = rate;
189 }
190 }
191
192 if (sta)
193 sta_info_put(sta);
194}
195
150struct rate_control_ref *rate_control_get(struct rate_control_ref *ref) 196struct rate_control_ref *rate_control_get(struct rate_control_ref *ref)
151{ 197{
152 kref_get(&ref->kref); 198 kref_get(&ref->kref);
@@ -197,3 +243,4 @@ void rate_control_deinitialize(struct ieee80211_local *local)
197 local->rate_ctrl = NULL; 243 local->rate_ctrl = NULL;
198 rate_control_put(ref); 244 rate_control_put(ref);
199} 245}
246
diff --git a/net/mac80211/ieee80211_rate.h b/net/mac80211/ieee80211_rate.h
index 23688139ffb3..73f19e8aa51c 100644
--- a/net/mac80211/ieee80211_rate.h
+++ b/net/mac80211/ieee80211_rate.h
@@ -18,31 +18,24 @@
18#include "ieee80211_i.h" 18#include "ieee80211_i.h"
19#include "sta_info.h" 19#include "sta_info.h"
20 20
21#define RATE_CONTROL_NUM_DOWN 20 21struct rate_selection {
22#define RATE_CONTROL_NUM_UP 15 22 /* Selected transmission rate */
23 23 struct ieee80211_rate *rate;
24 24 /* Non-ERP rate to use if mac80211 decides it cannot use an ERP rate */
25struct rate_control_extra {
26 /* values from rate_control_get_rate() to the caller: */
27 struct ieee80211_rate *probe; /* probe with this rate, or NULL for no
28 * probing */
29 struct ieee80211_rate *nonerp; 25 struct ieee80211_rate *nonerp;
30 26 /* probe with this rate, or NULL for no probing */
31 /* parameters from the caller to rate_control_get_rate(): */ 27 struct ieee80211_rate *probe;
32 struct ieee80211_hw_mode *mode;
33 u16 ethertype;
34}; 28};
35 29
36
37struct rate_control_ops { 30struct rate_control_ops {
38 struct module *module; 31 struct module *module;
39 const char *name; 32 const char *name;
40 void (*tx_status)(void *priv, struct net_device *dev, 33 void (*tx_status)(void *priv, struct net_device *dev,
41 struct sk_buff *skb, 34 struct sk_buff *skb,
42 struct ieee80211_tx_status *status); 35 struct ieee80211_tx_status *status);
43 struct ieee80211_rate *(*get_rate)(void *priv, struct net_device *dev, 36 void (*get_rate)(void *priv, struct net_device *dev,
44 struct sk_buff *skb, 37 struct ieee80211_hw_mode *mode, struct sk_buff *skb,
45 struct rate_control_extra *extra); 38 struct rate_selection *sel);
46 void (*rate_init)(void *priv, void *priv_sta, 39 void (*rate_init)(void *priv, void *priv_sta,
47 struct ieee80211_local *local, struct sta_info *sta); 40 struct ieee80211_local *local, struct sta_info *sta);
48 void (*clear)(void *priv); 41 void (*clear)(void *priv);
@@ -65,9 +58,6 @@ struct rate_control_ref {
65 struct kref kref; 58 struct kref kref;
66}; 59};
67 60
68/* default 'simple' algorithm */
69extern struct rate_control_ops mac80211_rcsimple;
70
71int ieee80211_rate_control_register(struct rate_control_ops *ops); 61int ieee80211_rate_control_register(struct rate_control_ops *ops);
72void ieee80211_rate_control_unregister(struct rate_control_ops *ops); 62void ieee80211_rate_control_unregister(struct rate_control_ops *ops);
73 63
@@ -75,25 +65,20 @@ void ieee80211_rate_control_unregister(struct rate_control_ops *ops);
75 * first available algorithm. */ 65 * first available algorithm. */
76struct rate_control_ref *rate_control_alloc(const char *name, 66struct rate_control_ref *rate_control_alloc(const char *name,
77 struct ieee80211_local *local); 67 struct ieee80211_local *local);
68void rate_control_get_rate(struct net_device *dev,
69 struct ieee80211_hw_mode *mode, struct sk_buff *skb,
70 struct rate_selection *sel);
78struct rate_control_ref *rate_control_get(struct rate_control_ref *ref); 71struct rate_control_ref *rate_control_get(struct rate_control_ref *ref);
79void rate_control_put(struct rate_control_ref *ref); 72void rate_control_put(struct rate_control_ref *ref);
80 73
81static inline void rate_control_tx_status(struct ieee80211_local *local, 74static inline void rate_control_tx_status(struct net_device *dev,
82 struct net_device *dev,
83 struct sk_buff *skb, 75 struct sk_buff *skb,
84 struct ieee80211_tx_status *status) 76 struct ieee80211_tx_status *status)
85{ 77{
78 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
86 struct rate_control_ref *ref = local->rate_ctrl; 79 struct rate_control_ref *ref = local->rate_ctrl;
87 ref->ops->tx_status(ref->priv, dev, skb, status);
88}
89
90 80
91static inline struct ieee80211_rate * 81 ref->ops->tx_status(ref->priv, dev, skb, status);
92rate_control_get_rate(struct ieee80211_local *local, struct net_device *dev,
93 struct sk_buff *skb, struct rate_control_extra *extra)
94{
95 struct rate_control_ref *ref = local->rate_ctrl;
96 return ref->ops->get_rate(ref->priv, dev, skb, extra);
97} 82}
98 83
99 84
@@ -142,10 +127,73 @@ static inline void rate_control_remove_sta_debugfs(struct sta_info *sta)
142#endif 127#endif
143} 128}
144 129
130static inline int
131rate_supported(struct sta_info *sta, struct ieee80211_hw_mode *mode, int index)
132{
133 return (sta == NULL || sta->supp_rates & BIT(index)) &&
134 (mode->rates[index].flags & IEEE80211_RATE_SUPPORTED);
135}
136
137static inline int
138rate_lowest_index(struct ieee80211_local *local, struct ieee80211_hw_mode *mode,
139 struct sta_info *sta)
140{
141 int i;
142
143 for (i = 0; i < mode->num_rates; i++) {
144 if (rate_supported(sta, mode, i))
145 return i;
146 }
147
148 /* warn when we cannot find a rate. */
149 WARN_ON(1);
150
151 return 0;
152}
153
154static inline struct ieee80211_rate *
155rate_lowest(struct ieee80211_local *local, struct ieee80211_hw_mode *mode,
156 struct sta_info *sta)
157{
158 return &mode->rates[rate_lowest_index(local, mode, sta)];
159}
160
145 161
146/* functions for rate control related to a device */ 162/* functions for rate control related to a device */
147int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, 163int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
148 const char *name); 164 const char *name);
149void rate_control_deinitialize(struct ieee80211_local *local); 165void rate_control_deinitialize(struct ieee80211_local *local);
150 166
167
168/* Rate control algorithms */
169#if defined(RC80211_SIMPLE_COMPILE) || \
170 (defined(CONFIG_MAC80211_RC_SIMPLE) && \
171 !defined(CONFIG_MAC80211_RC_SIMPLE_MODULE))
172extern int rc80211_simple_init(void);
173extern void rc80211_simple_exit(void);
174#else
175static inline int rc80211_simple_init(void)
176{
177 return 0;
178}
179static inline void rc80211_simple_exit(void)
180{
181}
182#endif
183
184#if defined(RC80211_PID_COMPILE) || \
185 (defined(CONFIG_MAC80211_RC_PID) && \
186 !defined(CONFIG_MAC80211_RC_PID_MODULE))
187extern int rc80211_pid_init(void);
188extern void rc80211_pid_exit(void);
189#else
190static inline int rc80211_pid_init(void)
191{
192 return 0;
193}
194static inline void rc80211_pid_exit(void)
195{
196}
197#endif
198
151#endif /* IEEE80211_RATE_H */ 199#endif /* IEEE80211_RATE_H */
diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index bee8080f2249..2019b4f0528d 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -57,6 +57,20 @@
57 57
58#define ERP_INFO_USE_PROTECTION BIT(1) 58#define ERP_INFO_USE_PROTECTION BIT(1)
59 59
60/* mgmt header + 1 byte action code */
61#define IEEE80211_MIN_ACTION_SIZE (24 + 1)
62
63#define IEEE80211_ADDBA_PARAM_POLICY_MASK 0x0002
64#define IEEE80211_ADDBA_PARAM_TID_MASK 0x003C
65#define IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK 0xFFA0
66#define IEEE80211_DELBA_PARAM_TID_MASK 0xF000
67#define IEEE80211_DELBA_PARAM_INITIATOR_MASK 0x0800
68
69/* next values represent the buffer size for A-MPDU frame.
70 * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2) */
71#define IEEE80211_MIN_AMPDU_BUF 0x8
72#define IEEE80211_MAX_AMPDU_BUF 0x40
73
60static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst, 74static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst,
61 u8 *ssid, size_t ssid_len); 75 u8 *ssid, size_t ssid_len);
62static struct ieee80211_sta_bss * 76static struct ieee80211_sta_bss *
@@ -90,7 +104,8 @@ struct ieee802_11_elems {
90 u8 *ext_supp_rates; 104 u8 *ext_supp_rates;
91 u8 *wmm_info; 105 u8 *wmm_info;
92 u8 *wmm_param; 106 u8 *wmm_param;
93 107 u8 *ht_cap_elem;
108 u8 *ht_info_elem;
94 /* length of them, respectively */ 109 /* length of them, respectively */
95 u8 ssid_len; 110 u8 ssid_len;
96 u8 supp_rates_len; 111 u8 supp_rates_len;
@@ -106,6 +121,8 @@ struct ieee802_11_elems {
106 u8 ext_supp_rates_len; 121 u8 ext_supp_rates_len;
107 u8 wmm_info_len; 122 u8 wmm_info_len;
108 u8 wmm_param_len; 123 u8 wmm_param_len;
124 u8 ht_cap_elem_len;
125 u8 ht_info_elem_len;
109}; 126};
110 127
111static void ieee802_11_parse_elems(u8 *start, size_t len, 128static void ieee802_11_parse_elems(u8 *start, size_t len,
@@ -190,6 +207,14 @@ static void ieee802_11_parse_elems(u8 *start, size_t len,
190 elems->ext_supp_rates = pos; 207 elems->ext_supp_rates = pos;
191 elems->ext_supp_rates_len = elen; 208 elems->ext_supp_rates_len = elen;
192 break; 209 break;
210 case WLAN_EID_HT_CAPABILITY:
211 elems->ht_cap_elem = pos;
212 elems->ht_cap_elem_len = elen;
213 break;
214 case WLAN_EID_HT_EXTRA_INFO:
215 elems->ht_info_elem = pos;
216 elems->ht_info_elem_len = elen;
217 break;
193 default: 218 default:
194 break; 219 break;
195 } 220 }
@@ -288,50 +313,89 @@ static void ieee80211_sta_wmm_params(struct net_device *dev,
288} 313}
289 314
290 315
291static void ieee80211_handle_erp_ie(struct net_device *dev, u8 erp_value) 316static u32 ieee80211_handle_erp_ie(struct ieee80211_sub_if_data *sdata,
317 u8 erp_value)
292{ 318{
293 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 319 struct ieee80211_bss_conf *bss_conf = &sdata->bss_conf;
294 struct ieee80211_if_sta *ifsta = &sdata->u.sta; 320 struct ieee80211_if_sta *ifsta = &sdata->u.sta;
295 int use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0; 321 bool use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0;
296 int preamble_mode = (erp_value & WLAN_ERP_BARKER_PREAMBLE) != 0; 322 bool preamble_mode = (erp_value & WLAN_ERP_BARKER_PREAMBLE) != 0;
297 u8 changes = 0;
298 DECLARE_MAC_BUF(mac); 323 DECLARE_MAC_BUF(mac);
324 u32 changed = 0;
299 325
300 if (use_protection != !!(sdata->flags & IEEE80211_SDATA_USE_PROTECTION)) { 326 if (use_protection != bss_conf->use_cts_prot) {
301 if (net_ratelimit()) { 327 if (net_ratelimit()) {
302 printk(KERN_DEBUG "%s: CTS protection %s (BSSID=" 328 printk(KERN_DEBUG "%s: CTS protection %s (BSSID="
303 "%s)\n", 329 "%s)\n",
304 dev->name, 330 sdata->dev->name,
305 use_protection ? "enabled" : "disabled", 331 use_protection ? "enabled" : "disabled",
306 print_mac(mac, ifsta->bssid)); 332 print_mac(mac, ifsta->bssid));
307 } 333 }
308 if (use_protection) 334 bss_conf->use_cts_prot = use_protection;
309 sdata->flags |= IEEE80211_SDATA_USE_PROTECTION; 335 changed |= BSS_CHANGED_ERP_CTS_PROT;
310 else
311 sdata->flags &= ~IEEE80211_SDATA_USE_PROTECTION;
312 changes |= IEEE80211_ERP_CHANGE_PROTECTION;
313 } 336 }
314 337
315 if (preamble_mode != !(sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE)) { 338 if (preamble_mode != bss_conf->use_short_preamble) {
316 if (net_ratelimit()) { 339 if (net_ratelimit()) {
317 printk(KERN_DEBUG "%s: switched to %s barker preamble" 340 printk(KERN_DEBUG "%s: switched to %s barker preamble"
318 " (BSSID=%s)\n", 341 " (BSSID=%s)\n",
319 dev->name, 342 sdata->dev->name,
320 (preamble_mode == WLAN_ERP_PREAMBLE_SHORT) ? 343 (preamble_mode == WLAN_ERP_PREAMBLE_SHORT) ?
321 "short" : "long", 344 "short" : "long",
322 print_mac(mac, ifsta->bssid)); 345 print_mac(mac, ifsta->bssid));
323 } 346 }
324 if (preamble_mode) 347 bss_conf->use_short_preamble = preamble_mode;
325 sdata->flags &= ~IEEE80211_SDATA_SHORT_PREAMBLE; 348 changed |= BSS_CHANGED_ERP_PREAMBLE;
326 else
327 sdata->flags |= IEEE80211_SDATA_SHORT_PREAMBLE;
328 changes |= IEEE80211_ERP_CHANGE_PREAMBLE;
329 } 349 }
330 350
331 if (changes) 351 return changed;
332 ieee80211_erp_info_change_notify(dev, changes);
333} 352}
334 353
354int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie,
355 struct ieee80211_ht_info *ht_info)
356{
357
358 if (ht_info == NULL)
359 return -EINVAL;
360
361 memset(ht_info, 0, sizeof(*ht_info));
362
363 if (ht_cap_ie) {
364 u8 ampdu_info = ht_cap_ie->ampdu_params_info;
365
366 ht_info->ht_supported = 1;
367 ht_info->cap = le16_to_cpu(ht_cap_ie->cap_info);
368 ht_info->ampdu_factor =
369 ampdu_info & IEEE80211_HT_CAP_AMPDU_FACTOR;
370 ht_info->ampdu_density =
371 (ampdu_info & IEEE80211_HT_CAP_AMPDU_DENSITY) >> 2;
372 memcpy(ht_info->supp_mcs_set, ht_cap_ie->supp_mcs_set, 16);
373 } else
374 ht_info->ht_supported = 0;
375
376 return 0;
377}
378
379int ieee80211_ht_addt_info_ie_to_ht_bss_info(
380 struct ieee80211_ht_addt_info *ht_add_info_ie,
381 struct ieee80211_ht_bss_info *bss_info)
382{
383 if (bss_info == NULL)
384 return -EINVAL;
385
386 memset(bss_info, 0, sizeof(*bss_info));
387
388 if (ht_add_info_ie) {
389 u16 op_mode;
390 op_mode = le16_to_cpu(ht_add_info_ie->operation_mode);
391
392 bss_info->primary_channel = ht_add_info_ie->control_chan;
393 bss_info->bss_cap = ht_add_info_ie->ht_param;
394 bss_info->bss_op_mode = (u8)(op_mode & 0xff);
395 }
396
397 return 0;
398}
335 399
336static void ieee80211_sta_send_associnfo(struct net_device *dev, 400static void ieee80211_sta_send_associnfo(struct net_device *dev,
337 struct ieee80211_if_sta *ifsta) 401 struct ieee80211_if_sta *ifsta)
@@ -388,20 +452,17 @@ static void ieee80211_set_associated(struct net_device *dev,
388 struct ieee80211_if_sta *ifsta, 452 struct ieee80211_if_sta *ifsta,
389 bool assoc) 453 bool assoc)
390{ 454{
391 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 455 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
456 struct ieee80211_local *local = sdata->local;
392 union iwreq_data wrqu; 457 union iwreq_data wrqu;
393 458 u32 changed = BSS_CHANGED_ASSOC;
394 if (!!(ifsta->flags & IEEE80211_STA_ASSOCIATED) == assoc)
395 return;
396 459
397 if (assoc) { 460 if (assoc) {
398 struct ieee80211_sub_if_data *sdata;
399 struct ieee80211_sta_bss *bss; 461 struct ieee80211_sta_bss *bss;
400 462
401 ifsta->flags |= IEEE80211_STA_ASSOCIATED; 463 ifsta->flags |= IEEE80211_STA_ASSOCIATED;
402 464
403 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 465 if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
404 if (sdata->type != IEEE80211_IF_TYPE_STA)
405 return; 466 return;
406 467
407 bss = ieee80211_rx_bss_get(dev, ifsta->bssid, 468 bss = ieee80211_rx_bss_get(dev, ifsta->bssid,
@@ -409,7 +470,8 @@ static void ieee80211_set_associated(struct net_device *dev,
409 ifsta->ssid, ifsta->ssid_len); 470 ifsta->ssid, ifsta->ssid_len);
410 if (bss) { 471 if (bss) {
411 if (bss->has_erp_value) 472 if (bss->has_erp_value)
412 ieee80211_handle_erp_ie(dev, bss->erp_value); 473 changed |= ieee80211_handle_erp_ie(
474 sdata, bss->erp_value);
413 ieee80211_rx_bss_put(dev, bss); 475 ieee80211_rx_bss_put(dev, bss);
414 } 476 }
415 477
@@ -429,6 +491,8 @@ static void ieee80211_set_associated(struct net_device *dev,
429 wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); 491 wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
430 ifsta->last_probe = jiffies; 492 ifsta->last_probe = jiffies;
431 ieee80211_led_assoc(local, assoc); 493 ieee80211_led_assoc(local, assoc);
494
495 ieee80211_bss_info_change_notify(sdata, changed);
432} 496}
433 497
434static void ieee80211_set_disassoc(struct net_device *dev, 498static void ieee80211_set_disassoc(struct net_device *dev,
@@ -630,6 +694,19 @@ static void ieee80211_send_assoc(struct net_device *dev,
630 *pos++ = 1; /* WME ver */ 694 *pos++ = 1; /* WME ver */
631 *pos++ = 0; 695 *pos++ = 0;
632 } 696 }
697 /* wmm support is a must to HT */
698 if (wmm && mode->ht_info.ht_supported) {
699 __le16 tmp = cpu_to_le16(mode->ht_info.cap);
700 pos = skb_put(skb, sizeof(struct ieee80211_ht_cap)+2);
701 *pos++ = WLAN_EID_HT_CAPABILITY;
702 *pos++ = sizeof(struct ieee80211_ht_cap);
703 memset(pos, 0, sizeof(struct ieee80211_ht_cap));
704 memcpy(pos, &tmp, sizeof(u16));
705 pos += sizeof(u16);
706 *pos++ = (mode->ht_info.ampdu_factor |
707 (mode->ht_info.ampdu_density << 2));
708 memcpy(pos, mode->ht_info.supp_mcs_set, 16);
709 }
633 710
634 kfree(ifsta->assocreq_ies); 711 kfree(ifsta->assocreq_ies);
635 ifsta->assocreq_ies_len = (skb->data + skb->len) - ies; 712 ifsta->assocreq_ies_len = (skb->data + skb->len) - ies;
@@ -918,6 +995,320 @@ static void ieee80211_auth_challenge(struct net_device *dev,
918 elems.challenge_len + 2, 1); 995 elems.challenge_len + 2, 1);
919} 996}
920 997
998static void ieee80211_send_addba_resp(struct net_device *dev, u8 *da, u16 tid,
999 u8 dialog_token, u16 status, u16 policy,
1000 u16 buf_size, u16 timeout)
1001{
1002 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1003 struct ieee80211_if_sta *ifsta = &sdata->u.sta;
1004 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
1005 struct sk_buff *skb;
1006 struct ieee80211_mgmt *mgmt;
1007 u16 capab;
1008
1009 skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom + 1 +
1010 sizeof(mgmt->u.action.u.addba_resp));
1011 if (!skb) {
1012 printk(KERN_DEBUG "%s: failed to allocate buffer "
1013 "for addba resp frame\n", dev->name);
1014 return;
1015 }
1016
1017 skb_reserve(skb, local->hw.extra_tx_headroom);
1018 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
1019 memset(mgmt, 0, 24);
1020 memcpy(mgmt->da, da, ETH_ALEN);
1021 memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
1022 if (sdata->vif.type == IEEE80211_IF_TYPE_AP)
1023 memcpy(mgmt->bssid, dev->dev_addr, ETH_ALEN);
1024 else
1025 memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
1026 mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
1027 IEEE80211_STYPE_ACTION);
1028
1029 skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp));
1030 mgmt->u.action.category = WLAN_CATEGORY_BACK;
1031 mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP;
1032 mgmt->u.action.u.addba_resp.dialog_token = dialog_token;
1033
1034 capab = (u16)(policy << 1); /* bit 1 aggregation policy */
1035 capab |= (u16)(tid << 2); /* bit 5:2 TID number */
1036 capab |= (u16)(buf_size << 6); /* bit 15:6 max size of aggregation */
1037
1038 mgmt->u.action.u.addba_resp.capab = cpu_to_le16(capab);
1039 mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout);
1040 mgmt->u.action.u.addba_resp.status = cpu_to_le16(status);
1041
1042 ieee80211_sta_tx(dev, skb, 0);
1043
1044 return;
1045}
1046
1047static void ieee80211_sta_process_addba_request(struct net_device *dev,
1048 struct ieee80211_mgmt *mgmt,
1049 size_t len)
1050{
1051 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
1052 struct ieee80211_hw *hw = &local->hw;
1053 struct ieee80211_conf *conf = &hw->conf;
1054 struct sta_info *sta;
1055 struct tid_ampdu_rx *tid_agg_rx;
1056 u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num, status;
1057 u8 dialog_token;
1058 int ret = -EOPNOTSUPP;
1059 DECLARE_MAC_BUF(mac);
1060
1061 sta = sta_info_get(local, mgmt->sa);
1062 if (!sta)
1063 return;
1064
1065 /* extract session parameters from addba request frame */
1066 dialog_token = mgmt->u.action.u.addba_req.dialog_token;
1067 timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout);
1068 start_seq_num =
1069 le16_to_cpu(mgmt->u.action.u.addba_req.start_seq_num) >> 4;
1070
1071 capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab);
1072 ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1;
1073 tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
1074 buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
1075
1076 status = WLAN_STATUS_REQUEST_DECLINED;
1077
1078 /* sanity check for incoming parameters:
1079 * check if configuration can support the BA policy
1080 * and if buffer size does not exceeds max value */
1081 if (((ba_policy != 1)
1082 && (!(conf->ht_conf.cap & IEEE80211_HT_CAP_DELAY_BA)))
1083 || (buf_size > IEEE80211_MAX_AMPDU_BUF)) {
1084 status = WLAN_STATUS_INVALID_QOS_PARAM;
1085#ifdef CONFIG_MAC80211_HT_DEBUG
1086 if (net_ratelimit())
1087 printk(KERN_DEBUG "Block Ack Req with bad params from "
1088 "%s on tid %u. policy %d, buffer size %d\n",
1089 print_mac(mac, mgmt->sa), tid, ba_policy,
1090 buf_size);
1091#endif /* CONFIG_MAC80211_HT_DEBUG */
1092 goto end_no_lock;
1093 }
1094 /* determine default buffer size */
1095 if (buf_size == 0) {
1096 struct ieee80211_hw_mode *mode = conf->mode;
1097 buf_size = IEEE80211_MIN_AMPDU_BUF;
1098 buf_size = buf_size << mode->ht_info.ampdu_factor;
1099 }
1100
1101 tid_agg_rx = &sta->ampdu_mlme.tid_rx[tid];
1102
1103 /* examine state machine */
1104 spin_lock_bh(&sta->ampdu_mlme.ampdu_rx);
1105
1106 if (tid_agg_rx->state != HT_AGG_STATE_IDLE) {
1107#ifdef CONFIG_MAC80211_HT_DEBUG
1108 if (net_ratelimit())
1109 printk(KERN_DEBUG "unexpected Block Ack Req from "
1110 "%s on tid %u\n",
1111 print_mac(mac, mgmt->sa), tid);
1112#endif /* CONFIG_MAC80211_HT_DEBUG */
1113 goto end;
1114 }
1115
1116 /* prepare reordering buffer */
1117 tid_agg_rx->reorder_buf =
1118 kmalloc(buf_size * sizeof(struct sk_buf *), GFP_ATOMIC);
1119 if ((!tid_agg_rx->reorder_buf) && net_ratelimit()) {
1120 printk(KERN_ERR "can not allocate reordering buffer "
1121 "to tid %d\n", tid);
1122 goto end;
1123 }
1124 memset(tid_agg_rx->reorder_buf, 0,
1125 buf_size * sizeof(struct sk_buf *));
1126
1127 if (local->ops->ampdu_action)
1128 ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START,
1129 sta->addr, tid, start_seq_num);
1130#ifdef CONFIG_MAC80211_HT_DEBUG
1131 printk(KERN_DEBUG "Rx A-MPDU on tid %d result %d", tid, ret);
1132#endif /* CONFIG_MAC80211_HT_DEBUG */
1133
1134 if (ret) {
1135 kfree(tid_agg_rx->reorder_buf);
1136 goto end;
1137 }
1138
1139 /* change state and send addba resp */
1140 tid_agg_rx->state = HT_AGG_STATE_OPERATIONAL;
1141 tid_agg_rx->dialog_token = dialog_token;
1142 tid_agg_rx->ssn = start_seq_num;
1143 tid_agg_rx->head_seq_num = start_seq_num;
1144 tid_agg_rx->buf_size = buf_size;
1145 tid_agg_rx->timeout = timeout;
1146 tid_agg_rx->stored_mpdu_num = 0;
1147 status = WLAN_STATUS_SUCCESS;
1148end:
1149 spin_unlock_bh(&sta->ampdu_mlme.ampdu_rx);
1150
1151end_no_lock:
1152 ieee80211_send_addba_resp(sta->dev, sta->addr, tid, dialog_token,
1153 status, 1, buf_size, timeout);
1154 sta_info_put(sta);
1155}
1156
1157static void ieee80211_send_delba(struct net_device *dev, const u8 *da, u16 tid,
1158 u16 initiator, u16 reason_code)
1159{
1160 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
1161 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1162 struct ieee80211_if_sta *ifsta = &sdata->u.sta;
1163 struct sk_buff *skb;
1164 struct ieee80211_mgmt *mgmt;
1165 u16 params;
1166
1167 skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom + 1 +
1168 sizeof(mgmt->u.action.u.delba));
1169
1170 if (!skb) {
1171 printk(KERN_ERR "%s: failed to allocate buffer "
1172 "for delba frame\n", dev->name);
1173 return;
1174 }
1175
1176 skb_reserve(skb, local->hw.extra_tx_headroom);
1177 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
1178 memset(mgmt, 0, 24);
1179 memcpy(mgmt->da, da, ETH_ALEN);
1180 memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN);
1181 if (sdata->vif.type == IEEE80211_IF_TYPE_AP)
1182 memcpy(mgmt->bssid, dev->dev_addr, ETH_ALEN);
1183 else
1184 memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
1185 mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT,
1186 IEEE80211_STYPE_ACTION);
1187
1188 skb_put(skb, 1 + sizeof(mgmt->u.action.u.delba));
1189
1190 mgmt->u.action.category = WLAN_CATEGORY_BACK;
1191 mgmt->u.action.u.delba.action_code = WLAN_ACTION_DELBA;
1192 params = (u16)(initiator << 11); /* bit 11 initiator */
1193 params |= (u16)(tid << 12); /* bit 15:12 TID number */
1194
1195 mgmt->u.action.u.delba.params = cpu_to_le16(params);
1196 mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code);
1197
1198 ieee80211_sta_tx(dev, skb, 0);
1199}
1200
1201void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *ra, u16 tid,
1202 u16 initiator, u16 reason)
1203{
1204 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
1205 struct ieee80211_hw *hw = &local->hw;
1206 struct sta_info *sta;
1207 int ret, i;
1208
1209 sta = sta_info_get(local, ra);
1210 if (!sta)
1211 return;
1212
1213 /* check if TID is in operational state */
1214 spin_lock_bh(&sta->ampdu_mlme.ampdu_rx);
1215 if (sta->ampdu_mlme.tid_rx[tid].state
1216 != HT_AGG_STATE_OPERATIONAL) {
1217 spin_unlock_bh(&sta->ampdu_mlme.ampdu_rx);
1218 sta_info_put(sta);
1219 return;
1220 }
1221 sta->ampdu_mlme.tid_rx[tid].state =
1222 HT_AGG_STATE_REQ_STOP_BA_MSK |
1223 (initiator << HT_AGG_STATE_INITIATOR_SHIFT);
1224 spin_unlock_bh(&sta->ampdu_mlme.ampdu_rx);
1225
1226 /* stop HW Rx aggregation. ampdu_action existence
1227 * already verified in session init so we add the BUG_ON */
1228 BUG_ON(!local->ops->ampdu_action);
1229
1230 ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_STOP,
1231 ra, tid, EINVAL);
1232 if (ret)
1233 printk(KERN_DEBUG "HW problem - can not stop rx "
1234 "aggergation for tid %d\n", tid);
1235
1236 /* shutdown timer has not expired */
1237 if (initiator != WLAN_BACK_TIMER)
1238 del_timer_sync(&sta->ampdu_mlme.tid_rx[tid].
1239 session_timer);
1240
1241 /* check if this is a self generated aggregation halt */
1242 if (initiator == WLAN_BACK_RECIPIENT || initiator == WLAN_BACK_TIMER)
1243 ieee80211_send_delba(dev, ra, tid, 0, reason);
1244
1245 /* free the reordering buffer */
1246 for (i = 0; i < sta->ampdu_mlme.tid_rx[tid].buf_size; i++) {
1247 if (sta->ampdu_mlme.tid_rx[tid].reorder_buf[i]) {
1248 /* release the reordered frames */
1249 dev_kfree_skb(sta->ampdu_mlme.tid_rx[tid].reorder_buf[i]);
1250 sta->ampdu_mlme.tid_rx[tid].stored_mpdu_num--;
1251 sta->ampdu_mlme.tid_rx[tid].reorder_buf[i] = NULL;
1252 }
1253 }
1254 kfree(sta->ampdu_mlme.tid_rx[tid].reorder_buf);
1255
1256 sta->ampdu_mlme.tid_rx[tid].state = HT_AGG_STATE_IDLE;
1257 sta_info_put(sta);
1258}
1259
1260static void ieee80211_sta_process_delba(struct net_device *dev,
1261 struct ieee80211_mgmt *mgmt, size_t len)
1262{
1263 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
1264 struct sta_info *sta;
1265 u16 tid, params;
1266 u16 initiator;
1267 DECLARE_MAC_BUF(mac);
1268
1269 sta = sta_info_get(local, mgmt->sa);
1270 if (!sta)
1271 return;
1272
1273 params = le16_to_cpu(mgmt->u.action.u.delba.params);
1274 tid = (params & IEEE80211_DELBA_PARAM_TID_MASK) >> 12;
1275 initiator = (params & IEEE80211_DELBA_PARAM_INITIATOR_MASK) >> 11;
1276
1277#ifdef CONFIG_MAC80211_HT_DEBUG
1278 if (net_ratelimit())
1279 printk(KERN_DEBUG "delba from %s on tid %d reason code %d\n",
1280 print_mac(mac, mgmt->sa), tid,
1281 mgmt->u.action.u.delba.reason_code);
1282#endif /* CONFIG_MAC80211_HT_DEBUG */
1283
1284 if (initiator == WLAN_BACK_INITIATOR)
1285 ieee80211_sta_stop_rx_ba_session(dev, sta->addr, tid,
1286 WLAN_BACK_INITIATOR, 0);
1287 sta_info_put(sta);
1288}
1289
1290/*
1291 * After receiving Block Ack Request (BAR) we activated a
1292 * timer after each frame arrives from the originator.
1293 * if this timer expires ieee80211_sta_stop_rx_ba_session will be executed.
1294 */
1295void sta_rx_agg_session_timer_expired(unsigned long data)
1296{
1297 /* not an elegant detour, but there is no choice as the timer passes
1298 * only one argument, and verious sta_info are needed here, so init
1299 * flow in sta_info_add gives the TID as data, while the timer_to_id
1300 * array gives the sta through container_of */
1301 u8 *ptid = (u8 *)data;
1302 u8 *timer_to_id = ptid - *ptid;
1303 struct sta_info *sta = container_of(timer_to_id, struct sta_info,
1304 timer_to_tid[0]);
1305
1306 printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid);
1307 ieee80211_sta_stop_rx_ba_session(sta->dev, sta->addr, (u16)*ptid,
1308 WLAN_BACK_TIMER,
1309 WLAN_REASON_QSTA_TIMEOUT);
1310}
1311
921 1312
922static void ieee80211_rx_mgmt_auth(struct net_device *dev, 1313static void ieee80211_rx_mgmt_auth(struct net_device *dev,
923 struct ieee80211_if_sta *ifsta, 1314 struct ieee80211_if_sta *ifsta,
@@ -929,7 +1320,7 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev,
929 DECLARE_MAC_BUF(mac); 1320 DECLARE_MAC_BUF(mac);
930 1321
931 if (ifsta->state != IEEE80211_AUTHENTICATE && 1322 if (ifsta->state != IEEE80211_AUTHENTICATE &&
932 sdata->type != IEEE80211_IF_TYPE_IBSS) { 1323 sdata->vif.type != IEEE80211_IF_TYPE_IBSS) {
933 printk(KERN_DEBUG "%s: authentication frame received from " 1324 printk(KERN_DEBUG "%s: authentication frame received from "
934 "%s, but not in authenticate state - ignored\n", 1325 "%s, but not in authenticate state - ignored\n",
935 dev->name, print_mac(mac, mgmt->sa)); 1326 dev->name, print_mac(mac, mgmt->sa));
@@ -943,7 +1334,7 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev,
943 return; 1334 return;
944 } 1335 }
945 1336
946 if (sdata->type != IEEE80211_IF_TYPE_IBSS && 1337 if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
947 memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) { 1338 memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) {
948 printk(KERN_DEBUG "%s: authentication frame received from " 1339 printk(KERN_DEBUG "%s: authentication frame received from "
949 "unknown AP (SA=%s BSSID=%s) - " 1340 "unknown AP (SA=%s BSSID=%s) - "
@@ -952,7 +1343,7 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev,
952 return; 1343 return;
953 } 1344 }
954 1345
955 if (sdata->type != IEEE80211_IF_TYPE_IBSS && 1346 if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
956 memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) { 1347 memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) {
957 printk(KERN_DEBUG "%s: authentication frame received from " 1348 printk(KERN_DEBUG "%s: authentication frame received from "
958 "unknown BSSID (SA=%s BSSID=%s) - " 1349 "unknown BSSID (SA=%s BSSID=%s) - "
@@ -970,7 +1361,7 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev,
970 dev->name, print_mac(mac, mgmt->sa), auth_alg, 1361 dev->name, print_mac(mac, mgmt->sa), auth_alg,
971 auth_transaction, status_code); 1362 auth_transaction, status_code);
972 1363
973 if (sdata->type == IEEE80211_IF_TYPE_IBSS) { 1364 if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
974 /* IEEE 802.11 standard does not require authentication in IBSS 1365 /* IEEE 802.11 standard does not require authentication in IBSS
975 * networks and most implementations do not seem to use it. 1366 * networks and most implementations do not seem to use it.
976 * However, try to reply to authentication attempts if someone 1367 * However, try to reply to authentication attempts if someone
@@ -1136,18 +1527,20 @@ static void ieee80211_rx_mgmt_disassoc(struct net_device *dev,
1136} 1527}
1137 1528
1138 1529
1139static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev, 1530static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
1140 struct ieee80211_if_sta *ifsta, 1531 struct ieee80211_if_sta *ifsta,
1141 struct ieee80211_mgmt *mgmt, 1532 struct ieee80211_mgmt *mgmt,
1142 size_t len, 1533 size_t len,
1143 int reassoc) 1534 int reassoc)
1144{ 1535{
1145 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 1536 struct ieee80211_local *local = sdata->local;
1537 struct net_device *dev = sdata->dev;
1146 struct ieee80211_hw_mode *mode; 1538 struct ieee80211_hw_mode *mode;
1147 struct sta_info *sta; 1539 struct sta_info *sta;
1148 u32 rates; 1540 u32 rates;
1149 u16 capab_info, status_code, aid; 1541 u16 capab_info, status_code, aid;
1150 struct ieee802_11_elems elems; 1542 struct ieee802_11_elems elems;
1543 struct ieee80211_bss_conf *bss_conf = &sdata->bss_conf;
1151 u8 *pos; 1544 u8 *pos;
1152 int i, j; 1545 int i, j;
1153 DECLARE_MAC_BUF(mac); 1546 DECLARE_MAC_BUF(mac);
@@ -1210,20 +1603,6 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev,
1210 return; 1603 return;
1211 } 1604 }
1212 1605
1213 /* it probably doesn't, but if the frame includes an ERP value then
1214 * update our stored copy */
1215 if (elems.erp_info && elems.erp_info_len >= 1) {
1216 struct ieee80211_sta_bss *bss
1217 = ieee80211_rx_bss_get(dev, ifsta->bssid,
1218 local->hw.conf.channel,
1219 ifsta->ssid, ifsta->ssid_len);
1220 if (bss) {
1221 bss->erp_value = elems.erp_info[0];
1222 bss->has_erp_value = 1;
1223 ieee80211_rx_bss_put(dev, bss);
1224 }
1225 }
1226
1227 printk(KERN_DEBUG "%s: associated\n", dev->name); 1606 printk(KERN_DEBUG "%s: associated\n", dev->name);
1228 ifsta->aid = aid; 1607 ifsta->aid = aid;
1229 ifsta->ap_capab = capab_info; 1608 ifsta->ap_capab = capab_info;
@@ -1234,6 +1613,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev,
1234 if (ifsta->assocresp_ies) 1613 if (ifsta->assocresp_ies)
1235 memcpy(ifsta->assocresp_ies, pos, ifsta->assocresp_ies_len); 1614 memcpy(ifsta->assocresp_ies, pos, ifsta->assocresp_ies_len);
1236 1615
1616 /* set AID, ieee80211_set_associated() will tell the driver */
1617 bss_conf->aid = aid;
1237 ieee80211_set_associated(dev, ifsta, 1); 1618 ieee80211_set_associated(dev, ifsta, 1);
1238 1619
1239 /* Add STA entry for the AP */ 1620 /* Add STA entry for the AP */
@@ -1276,6 +1657,19 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev,
1276 } 1657 }
1277 sta->supp_rates = rates; 1658 sta->supp_rates = rates;
1278 1659
1660 if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param &&
1661 local->ops->conf_ht) {
1662 struct ieee80211_ht_bss_info bss_info;
1663
1664 ieee80211_ht_cap_ie_to_ht_info(
1665 (struct ieee80211_ht_cap *)
1666 elems.ht_cap_elem, &sta->ht_info);
1667 ieee80211_ht_addt_info_ie_to_ht_bss_info(
1668 (struct ieee80211_ht_addt_info *)
1669 elems.ht_info_elem, &bss_info);
1670 ieee80211_hw_config_ht(local, 1, &sta->ht_info, &bss_info);
1671 }
1672
1279 rate_control_rate_init(sta, local); 1673 rate_control_rate_init(sta, local);
1280 1674
1281 if (elems.wmm_param && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { 1675 if (elems.wmm_param && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) {
@@ -1380,6 +1774,7 @@ static void ieee80211_rx_bss_free(struct ieee80211_sta_bss *bss)
1380 kfree(bss->wpa_ie); 1774 kfree(bss->wpa_ie);
1381 kfree(bss->rsn_ie); 1775 kfree(bss->rsn_ie);
1382 kfree(bss->wmm_ie); 1776 kfree(bss->wmm_ie);
1777 kfree(bss->ht_ie);
1383 kfree(bss); 1778 kfree(bss);
1384} 1779}
1385 1780
@@ -1449,7 +1844,7 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
1449 1844
1450 timestamp = le64_to_cpu(mgmt->u.beacon.timestamp); 1845 timestamp = le64_to_cpu(mgmt->u.beacon.timestamp);
1451 1846
1452 if (sdata->type == IEEE80211_IF_TYPE_IBSS && beacon && 1847 if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && beacon &&
1453 memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0) { 1848 memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0) {
1454#ifdef CONFIG_MAC80211_IBSS_DEBUG 1849#ifdef CONFIG_MAC80211_IBSS_DEBUG
1455 static unsigned long last_tsf_debug = 0; 1850 static unsigned long last_tsf_debug = 0;
@@ -1474,7 +1869,7 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
1474 1869
1475 ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems); 1870 ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
1476 1871
1477 if (sdata->type == IEEE80211_IF_TYPE_IBSS && elems.supp_rates && 1872 if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && elems.supp_rates &&
1478 memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0 && 1873 memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0 &&
1479 (sta = sta_info_get(local, mgmt->sa))) { 1874 (sta = sta_info_get(local, mgmt->sa))) {
1480 struct ieee80211_hw_mode *mode; 1875 struct ieee80211_hw_mode *mode;
@@ -1483,8 +1878,18 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
1483 u32 supp_rates, prev_rates; 1878 u32 supp_rates, prev_rates;
1484 int i, j; 1879 int i, j;
1485 1880
1486 mode = local->sta_scanning ? 1881 mode = local->sta_sw_scanning ?
1487 local->scan_hw_mode : local->oper_hw_mode; 1882 local->scan_hw_mode : local->oper_hw_mode;
1883
1884 if (local->sta_hw_scanning) {
1885 /* search for the correct mode matches the beacon */
1886 list_for_each_entry(mode, &local->modes_list, list)
1887 if (mode->mode == rx_status->phymode)
1888 break;
1889
1890 if (mode == NULL)
1891 mode = local->oper_hw_mode;
1892 }
1488 rates = mode->rates; 1893 rates = mode->rates;
1489 num_rates = mode->num_rates; 1894 num_rates = mode->num_rates;
1490 1895
@@ -1627,7 +2032,22 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
1627 bss->wmm_ie = NULL; 2032 bss->wmm_ie = NULL;
1628 bss->wmm_ie_len = 0; 2033 bss->wmm_ie_len = 0;
1629 } 2034 }
1630 2035 if (elems.ht_cap_elem &&
2036 (!bss->ht_ie || bss->ht_ie_len != elems.ht_cap_elem_len ||
2037 memcmp(bss->ht_ie, elems.ht_cap_elem, elems.ht_cap_elem_len))) {
2038 kfree(bss->ht_ie);
2039 bss->ht_ie = kmalloc(elems.ht_cap_elem_len + 2, GFP_ATOMIC);
2040 if (bss->ht_ie) {
2041 memcpy(bss->ht_ie, elems.ht_cap_elem - 2,
2042 elems.ht_cap_elem_len + 2);
2043 bss->ht_ie_len = elems.ht_cap_elem_len + 2;
2044 } else
2045 bss->ht_ie_len = 0;
2046 } else if (!elems.ht_cap_elem && bss->ht_ie) {
2047 kfree(bss->ht_ie);
2048 bss->ht_ie = NULL;
2049 bss->ht_ie_len = 0;
2050 }
1631 2051
1632 bss->hw_mode = rx_status->phymode; 2052 bss->hw_mode = rx_status->phymode;
1633 bss->freq = rx_status->freq; 2053 bss->freq = rx_status->freq;
@@ -1672,11 +2092,14 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev,
1672 struct ieee80211_if_sta *ifsta; 2092 struct ieee80211_if_sta *ifsta;
1673 size_t baselen; 2093 size_t baselen;
1674 struct ieee802_11_elems elems; 2094 struct ieee802_11_elems elems;
2095 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
2096 struct ieee80211_conf *conf = &local->hw.conf;
2097 u32 changed = 0;
1675 2098
1676 ieee80211_rx_bss_info(dev, mgmt, len, rx_status, 1); 2099 ieee80211_rx_bss_info(dev, mgmt, len, rx_status, 1);
1677 2100
1678 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 2101 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1679 if (sdata->type != IEEE80211_IF_TYPE_STA) 2102 if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
1680 return; 2103 return;
1681 ifsta = &sdata->u.sta; 2104 ifsta = &sdata->u.sta;
1682 2105
@@ -1692,12 +2115,31 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev,
1692 ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems); 2115 ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
1693 2116
1694 if (elems.erp_info && elems.erp_info_len >= 1) 2117 if (elems.erp_info && elems.erp_info_len >= 1)
1695 ieee80211_handle_erp_ie(dev, elems.erp_info[0]); 2118 changed |= ieee80211_handle_erp_ie(sdata, elems.erp_info[0]);
2119
2120 if (elems.ht_cap_elem && elems.ht_info_elem &&
2121 elems.wmm_param && local->ops->conf_ht &&
2122 conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) {
2123 struct ieee80211_ht_bss_info bss_info;
2124
2125 ieee80211_ht_addt_info_ie_to_ht_bss_info(
2126 (struct ieee80211_ht_addt_info *)
2127 elems.ht_info_elem, &bss_info);
2128 /* check if AP changed bss inforamation */
2129 if ((conf->ht_bss_conf.primary_channel !=
2130 bss_info.primary_channel) ||
2131 (conf->ht_bss_conf.bss_cap != bss_info.bss_cap) ||
2132 (conf->ht_bss_conf.bss_op_mode != bss_info.bss_op_mode))
2133 ieee80211_hw_config_ht(local, 1, &conf->ht_conf,
2134 &bss_info);
2135 }
1696 2136
1697 if (elems.wmm_param && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { 2137 if (elems.wmm_param && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) {
1698 ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param, 2138 ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param,
1699 elems.wmm_param_len); 2139 elems.wmm_param_len);
1700 } 2140 }
2141
2142 ieee80211_bss_info_change_notify(sdata, changed);
1701} 2143}
1702 2144
1703 2145
@@ -1719,7 +2161,7 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev,
1719 DECLARE_MAC_BUF(mac3); 2161 DECLARE_MAC_BUF(mac3);
1720#endif 2162#endif
1721 2163
1722 if (sdata->type != IEEE80211_IF_TYPE_IBSS || 2164 if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS ||
1723 ifsta->state != IEEE80211_IBSS_JOINED || 2165 ifsta->state != IEEE80211_IBSS_JOINED ||
1724 len < 24 + 2 || !ifsta->probe_resp) 2166 len < 24 + 2 || !ifsta->probe_resp)
1725 return; 2167 return;
@@ -1775,6 +2217,40 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev,
1775 ieee80211_sta_tx(dev, skb, 0); 2217 ieee80211_sta_tx(dev, skb, 0);
1776} 2218}
1777 2219
2220static void ieee80211_rx_mgmt_action(struct net_device *dev,
2221 struct ieee80211_if_sta *ifsta,
2222 struct ieee80211_mgmt *mgmt,
2223 size_t len)
2224{
2225 if (len < IEEE80211_MIN_ACTION_SIZE)
2226 return;
2227
2228 switch (mgmt->u.action.category) {
2229 case WLAN_CATEGORY_BACK:
2230 switch (mgmt->u.action.u.addba_req.action_code) {
2231 case WLAN_ACTION_ADDBA_REQ:
2232 if (len < (IEEE80211_MIN_ACTION_SIZE +
2233 sizeof(mgmt->u.action.u.addba_req)))
2234 break;
2235 ieee80211_sta_process_addba_request(dev, mgmt, len);
2236 break;
2237 case WLAN_ACTION_DELBA:
2238 if (len < (IEEE80211_MIN_ACTION_SIZE +
2239 sizeof(mgmt->u.action.u.delba)))
2240 break;
2241 ieee80211_sta_process_delba(dev, mgmt, len);
2242 break;
2243 default:
2244 if (net_ratelimit())
2245 printk(KERN_DEBUG "%s: Rx unknown A-MPDU action\n",
2246 dev->name);
2247 break;
2248 }
2249 break;
2250 default:
2251 break;
2252 }
2253}
1778 2254
1779void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb, 2255void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb,
1780 struct ieee80211_rx_status *rx_status) 2256 struct ieee80211_rx_status *rx_status)
@@ -1804,6 +2280,7 @@ void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb,
1804 case IEEE80211_STYPE_REASSOC_RESP: 2280 case IEEE80211_STYPE_REASSOC_RESP:
1805 case IEEE80211_STYPE_DEAUTH: 2281 case IEEE80211_STYPE_DEAUTH:
1806 case IEEE80211_STYPE_DISASSOC: 2282 case IEEE80211_STYPE_DISASSOC:
2283 case IEEE80211_STYPE_ACTION:
1807 skb_queue_tail(&ifsta->skb_queue, skb); 2284 skb_queue_tail(&ifsta->skb_queue, skb);
1808 queue_work(local->hw.workqueue, &ifsta->work); 2285 queue_work(local->hw.workqueue, &ifsta->work);
1809 return; 2286 return;
@@ -1850,10 +2327,10 @@ static void ieee80211_sta_rx_queued_mgmt(struct net_device *dev,
1850 ieee80211_rx_mgmt_auth(dev, ifsta, mgmt, skb->len); 2327 ieee80211_rx_mgmt_auth(dev, ifsta, mgmt, skb->len);
1851 break; 2328 break;
1852 case IEEE80211_STYPE_ASSOC_RESP: 2329 case IEEE80211_STYPE_ASSOC_RESP:
1853 ieee80211_rx_mgmt_assoc_resp(dev, ifsta, mgmt, skb->len, 0); 2330 ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, skb->len, 0);
1854 break; 2331 break;
1855 case IEEE80211_STYPE_REASSOC_RESP: 2332 case IEEE80211_STYPE_REASSOC_RESP:
1856 ieee80211_rx_mgmt_assoc_resp(dev, ifsta, mgmt, skb->len, 1); 2333 ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, skb->len, 1);
1857 break; 2334 break;
1858 case IEEE80211_STYPE_DEAUTH: 2335 case IEEE80211_STYPE_DEAUTH:
1859 ieee80211_rx_mgmt_deauth(dev, ifsta, mgmt, skb->len); 2336 ieee80211_rx_mgmt_deauth(dev, ifsta, mgmt, skb->len);
@@ -1861,37 +2338,48 @@ static void ieee80211_sta_rx_queued_mgmt(struct net_device *dev,
1861 case IEEE80211_STYPE_DISASSOC: 2338 case IEEE80211_STYPE_DISASSOC:
1862 ieee80211_rx_mgmt_disassoc(dev, ifsta, mgmt, skb->len); 2339 ieee80211_rx_mgmt_disassoc(dev, ifsta, mgmt, skb->len);
1863 break; 2340 break;
2341 case IEEE80211_STYPE_ACTION:
2342 ieee80211_rx_mgmt_action(dev, ifsta, mgmt, skb->len);
2343 break;
1864 } 2344 }
1865 2345
1866 kfree_skb(skb); 2346 kfree_skb(skb);
1867} 2347}
1868 2348
1869 2349
1870void ieee80211_sta_rx_scan(struct net_device *dev, struct sk_buff *skb, 2350ieee80211_txrx_result
1871 struct ieee80211_rx_status *rx_status) 2351ieee80211_sta_rx_scan(struct net_device *dev, struct sk_buff *skb,
2352 struct ieee80211_rx_status *rx_status)
1872{ 2353{
1873 struct ieee80211_mgmt *mgmt; 2354 struct ieee80211_mgmt *mgmt;
1874 u16 fc; 2355 u16 fc;
1875 2356
1876 if (skb->len < 24) { 2357 if (skb->len < 2)
1877 dev_kfree_skb(skb); 2358 return TXRX_DROP;
1878 return;
1879 }
1880 2359
1881 mgmt = (struct ieee80211_mgmt *) skb->data; 2360 mgmt = (struct ieee80211_mgmt *) skb->data;
1882 fc = le16_to_cpu(mgmt->frame_control); 2361 fc = le16_to_cpu(mgmt->frame_control);
1883 2362
2363 if ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL)
2364 return TXRX_CONTINUE;
2365
2366 if (skb->len < 24)
2367 return TXRX_DROP;
2368
1884 if ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT) { 2369 if ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT) {
1885 if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PROBE_RESP) { 2370 if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PROBE_RESP) {
1886 ieee80211_rx_mgmt_probe_resp(dev, mgmt, 2371 ieee80211_rx_mgmt_probe_resp(dev, mgmt,
1887 skb->len, rx_status); 2372 skb->len, rx_status);
2373 dev_kfree_skb(skb);
2374 return TXRX_QUEUED;
1888 } else if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_BEACON) { 2375 } else if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_BEACON) {
1889 ieee80211_rx_mgmt_beacon(dev, mgmt, skb->len, 2376 ieee80211_rx_mgmt_beacon(dev, mgmt, skb->len,
1890 rx_status); 2377 rx_status);
2378 dev_kfree_skb(skb);
2379 return TXRX_QUEUED;
1891 } 2380 }
1892 } 2381 }
1893 2382 return TXRX_CONTINUE;
1894 dev_kfree_skb(skb);
1895} 2383}
1896 2384
1897 2385
@@ -1981,13 +2469,13 @@ void ieee80211_sta_work(struct work_struct *work)
1981 if (!netif_running(dev)) 2469 if (!netif_running(dev))
1982 return; 2470 return;
1983 2471
1984 if (local->sta_scanning) 2472 if (local->sta_sw_scanning || local->sta_hw_scanning)
1985 return; 2473 return;
1986 2474
1987 if (sdata->type != IEEE80211_IF_TYPE_STA && 2475 if (sdata->vif.type != IEEE80211_IF_TYPE_STA &&
1988 sdata->type != IEEE80211_IF_TYPE_IBSS) { 2476 sdata->vif.type != IEEE80211_IF_TYPE_IBSS) {
1989 printk(KERN_DEBUG "%s: ieee80211_sta_work: non-STA interface " 2477 printk(KERN_DEBUG "%s: ieee80211_sta_work: non-STA interface "
1990 "(type=%d)\n", dev->name, sdata->type); 2478 "(type=%d)\n", dev->name, sdata->vif.type);
1991 return; 2479 return;
1992 } 2480 }
1993 ifsta = &sdata->u.sta; 2481 ifsta = &sdata->u.sta;
@@ -2082,7 +2570,7 @@ void ieee80211_sta_req_auth(struct net_device *dev,
2082 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 2570 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
2083 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 2571 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
2084 2572
2085 if (sdata->type != IEEE80211_IF_TYPE_STA) 2573 if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
2086 return; 2574 return;
2087 2575
2088 if ((ifsta->flags & (IEEE80211_STA_BSSID_SET | 2576 if ((ifsta->flags & (IEEE80211_STA_BSSID_SET |
@@ -2204,9 +2692,8 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
2204 struct sk_buff *skb; 2692 struct sk_buff *skb;
2205 struct ieee80211_mgmt *mgmt; 2693 struct ieee80211_mgmt *mgmt;
2206 struct ieee80211_tx_control control; 2694 struct ieee80211_tx_control control;
2207 struct ieee80211_rate *rate;
2208 struct ieee80211_hw_mode *mode; 2695 struct ieee80211_hw_mode *mode;
2209 struct rate_control_extra extra; 2696 struct rate_selection ratesel;
2210 u8 *pos; 2697 u8 *pos;
2211 struct ieee80211_sub_if_data *sdata; 2698 struct ieee80211_sub_if_data *sdata;
2212 2699
@@ -2291,18 +2778,17 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
2291 } 2778 }
2292 2779
2293 memset(&control, 0, sizeof(control)); 2780 memset(&control, 0, sizeof(control));
2294 memset(&extra, 0, sizeof(extra)); 2781 rate_control_get_rate(dev, local->oper_hw_mode, skb, &ratesel);
2295 extra.mode = local->oper_hw_mode; 2782 if (!ratesel.rate) {
2296 rate = rate_control_get_rate(local, dev, skb, &extra);
2297 if (!rate) {
2298 printk(KERN_DEBUG "%s: Failed to determine TX rate " 2783 printk(KERN_DEBUG "%s: Failed to determine TX rate "
2299 "for IBSS beacon\n", dev->name); 2784 "for IBSS beacon\n", dev->name);
2300 break; 2785 break;
2301 } 2786 }
2787 control.vif = &sdata->vif;
2302 control.tx_rate = 2788 control.tx_rate =
2303 ((sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE) && 2789 (sdata->bss_conf.use_short_preamble &&
2304 (rate->flags & IEEE80211_RATE_PREAMBLE2)) ? 2790 (ratesel.rate->flags & IEEE80211_RATE_PREAMBLE2)) ?
2305 rate->val2 : rate->val; 2791 ratesel.rate->val2 : ratesel.rate->val;
2306 control.antenna_sel_tx = local->hw.conf.antenna_sel_tx; 2792 control.antenna_sel_tx = local->hw.conf.antenna_sel_tx;
2307 control.power_level = local->hw.conf.power_level; 2793 control.power_level = local->hw.conf.power_level;
2308 control.flags |= IEEE80211_TXCTL_NO_ACK; 2794 control.flags |= IEEE80211_TXCTL_NO_ACK;
@@ -2552,7 +3038,7 @@ int ieee80211_sta_set_ssid(struct net_device *dev, char *ssid, size_t len)
2552 ifsta->flags |= IEEE80211_STA_SSID_SET; 3038 ifsta->flags |= IEEE80211_STA_SSID_SET;
2553 else 3039 else
2554 ifsta->flags &= ~IEEE80211_STA_SSID_SET; 3040 ifsta->flags &= ~IEEE80211_STA_SSID_SET;
2555 if (sdata->type == IEEE80211_IF_TYPE_IBSS && 3041 if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS &&
2556 !(ifsta->flags & IEEE80211_STA_BSSID_SET)) { 3042 !(ifsta->flags & IEEE80211_STA_BSSID_SET)) {
2557 ifsta->ibss_join_req = jiffies; 3043 ifsta->ibss_join_req = jiffies;
2558 ifsta->state = IEEE80211_IBSS_SEARCH; 3044 ifsta->state = IEEE80211_IBSS_SEARCH;
@@ -2639,9 +3125,15 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
2639 union iwreq_data wrqu; 3125 union iwreq_data wrqu;
2640 3126
2641 local->last_scan_completed = jiffies; 3127 local->last_scan_completed = jiffies;
2642 wmb(); 3128 memset(&wrqu, 0, sizeof(wrqu));
2643 local->sta_scanning = 0; 3129 wireless_send_event(dev, SIOCGIWSCAN, &wrqu, NULL);
3130
3131 if (local->sta_hw_scanning) {
3132 local->sta_hw_scanning = 0;
3133 goto done;
3134 }
2644 3135
3136 local->sta_sw_scanning = 0;
2645 if (ieee80211_hw_config(local)) 3137 if (ieee80211_hw_config(local))
2646 printk(KERN_DEBUG "%s: failed to restore operational " 3138 printk(KERN_DEBUG "%s: failed to restore operational "
2647 "channel after scan\n", dev->name); 3139 "channel after scan\n", dev->name);
@@ -2657,9 +3149,6 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
2657 3149
2658 netif_tx_unlock_bh(local->mdev); 3150 netif_tx_unlock_bh(local->mdev);
2659 3151
2660 memset(&wrqu, 0, sizeof(wrqu));
2661 wireless_send_event(dev, SIOCGIWSCAN, &wrqu, NULL);
2662
2663 rcu_read_lock(); 3152 rcu_read_lock();
2664 list_for_each_entry_rcu(sdata, &local->interfaces, list) { 3153 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
2665 3154
@@ -2667,7 +3156,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
2667 if (sdata->dev == local->mdev) 3156 if (sdata->dev == local->mdev)
2668 continue; 3157 continue;
2669 3158
2670 if (sdata->type == IEEE80211_IF_TYPE_STA) { 3159 if (sdata->vif.type == IEEE80211_IF_TYPE_STA) {
2671 if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) 3160 if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED)
2672 ieee80211_send_nullfunc(local, sdata, 0); 3161 ieee80211_send_nullfunc(local, sdata, 0);
2673 ieee80211_sta_timer((unsigned long)sdata); 3162 ieee80211_sta_timer((unsigned long)sdata);
@@ -2677,8 +3166,9 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
2677 } 3166 }
2678 rcu_read_unlock(); 3167 rcu_read_unlock();
2679 3168
3169done:
2680 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 3170 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
2681 if (sdata->type == IEEE80211_IF_TYPE_IBSS) { 3171 if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
2682 struct ieee80211_if_sta *ifsta = &sdata->u.sta; 3172 struct ieee80211_if_sta *ifsta = &sdata->u.sta;
2683 if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) || 3173 if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) ||
2684 (!ifsta->state == IEEE80211_IBSS_JOINED && 3174 (!ifsta->state == IEEE80211_IBSS_JOINED &&
@@ -2699,7 +3189,7 @@ void ieee80211_sta_scan_work(struct work_struct *work)
2699 int skip; 3189 int skip;
2700 unsigned long next_delay = 0; 3190 unsigned long next_delay = 0;
2701 3191
2702 if (!local->sta_scanning) 3192 if (!local->sta_sw_scanning)
2703 return; 3193 return;
2704 3194
2705 switch (local->scan_state) { 3195 switch (local->scan_state) {
@@ -2713,7 +3203,7 @@ void ieee80211_sta_scan_work(struct work_struct *work)
2713 skip = !(local->enabled_modes & (1 << mode->mode)); 3203 skip = !(local->enabled_modes & (1 << mode->mode));
2714 chan = &mode->channels[local->scan_channel_idx]; 3204 chan = &mode->channels[local->scan_channel_idx];
2715 if (!(chan->flag & IEEE80211_CHAN_W_SCAN) || 3205 if (!(chan->flag & IEEE80211_CHAN_W_SCAN) ||
2716 (sdata->type == IEEE80211_IF_TYPE_IBSS && 3206 (sdata->vif.type == IEEE80211_IF_TYPE_IBSS &&
2717 !(chan->flag & IEEE80211_CHAN_W_IBSS)) || 3207 !(chan->flag & IEEE80211_CHAN_W_IBSS)) ||
2718 (local->hw_modes & local->enabled_modes & 3208 (local->hw_modes & local->enabled_modes &
2719 (1 << MODE_IEEE80211G) && mode->mode == MODE_IEEE80211B)) 3209 (1 << MODE_IEEE80211G) && mode->mode == MODE_IEEE80211B))
@@ -2762,7 +3252,7 @@ void ieee80211_sta_scan_work(struct work_struct *work)
2762 break; 3252 break;
2763 } 3253 }
2764 3254
2765 if (local->sta_scanning) 3255 if (local->sta_sw_scanning)
2766 queue_delayed_work(local->hw.workqueue, &local->scan_work, 3256 queue_delayed_work(local->hw.workqueue, &local->scan_work,
2767 next_delay); 3257 next_delay);
2768} 3258}
@@ -2794,7 +3284,7 @@ static int ieee80211_sta_start_scan(struct net_device *dev,
2794 * ResultCode: SUCCESS, INVALID_PARAMETERS 3284 * ResultCode: SUCCESS, INVALID_PARAMETERS
2795 */ 3285 */
2796 3286
2797 if (local->sta_scanning) { 3287 if (local->sta_sw_scanning || local->sta_hw_scanning) {
2798 if (local->scan_dev == dev) 3288 if (local->scan_dev == dev)
2799 return 0; 3289 return 0;
2800 return -EBUSY; 3290 return -EBUSY;
@@ -2802,15 +3292,15 @@ static int ieee80211_sta_start_scan(struct net_device *dev,
2802 3292
2803 if (local->ops->hw_scan) { 3293 if (local->ops->hw_scan) {
2804 int rc = local->ops->hw_scan(local_to_hw(local), 3294 int rc = local->ops->hw_scan(local_to_hw(local),
2805 ssid, ssid_len); 3295 ssid, ssid_len);
2806 if (!rc) { 3296 if (!rc) {
2807 local->sta_scanning = 1; 3297 local->sta_hw_scanning = 1;
2808 local->scan_dev = dev; 3298 local->scan_dev = dev;
2809 } 3299 }
2810 return rc; 3300 return rc;
2811 } 3301 }
2812 3302
2813 local->sta_scanning = 1; 3303 local->sta_sw_scanning = 1;
2814 3304
2815 rcu_read_lock(); 3305 rcu_read_lock();
2816 list_for_each_entry_rcu(sdata, &local->interfaces, list) { 3306 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
@@ -2821,7 +3311,7 @@ static int ieee80211_sta_start_scan(struct net_device *dev,
2821 continue; 3311 continue;
2822 3312
2823 netif_stop_queue(sdata->dev); 3313 netif_stop_queue(sdata->dev);
2824 if (sdata->type == IEEE80211_IF_TYPE_STA && 3314 if (sdata->vif.type == IEEE80211_IF_TYPE_STA &&
2825 (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED)) 3315 (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED))
2826 ieee80211_send_nullfunc(local, sdata, 1); 3316 ieee80211_send_nullfunc(local, sdata, 1);
2827 } 3317 }
@@ -2862,10 +3352,10 @@ int ieee80211_sta_req_scan(struct net_device *dev, u8 *ssid, size_t ssid_len)
2862 struct ieee80211_if_sta *ifsta = &sdata->u.sta; 3352 struct ieee80211_if_sta *ifsta = &sdata->u.sta;
2863 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 3353 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
2864 3354
2865 if (sdata->type != IEEE80211_IF_TYPE_STA) 3355 if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
2866 return ieee80211_sta_start_scan(dev, ssid, ssid_len); 3356 return ieee80211_sta_start_scan(dev, ssid, ssid_len);
2867 3357
2868 if (local->sta_scanning) { 3358 if (local->sta_sw_scanning || local->sta_hw_scanning) {
2869 if (local->scan_dev == dev) 3359 if (local->scan_dev == dev)
2870 return 0; 3360 return 0;
2871 return -EBUSY; 3361 return -EBUSY;
@@ -2894,15 +3384,6 @@ ieee80211_sta_scan_result(struct net_device *dev,
2894 if (!(local->enabled_modes & (1 << bss->hw_mode))) 3384 if (!(local->enabled_modes & (1 << bss->hw_mode)))
2895 return current_ev; 3385 return current_ev;
2896 3386
2897 if (local->scan_flags & IEEE80211_SCAN_WPA_ONLY &&
2898 !bss->wpa_ie && !bss->rsn_ie)
2899 return current_ev;
2900
2901 if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID &&
2902 (local->scan_ssid_len != bss->ssid_len ||
2903 memcmp(local->scan_ssid, bss->ssid, bss->ssid_len) != 0))
2904 return current_ev;
2905
2906 memset(&iwe, 0, sizeof(iwe)); 3387 memset(&iwe, 0, sizeof(iwe));
2907 iwe.cmd = SIOCGIWAP; 3388 iwe.cmd = SIOCGIWAP;
2908 iwe.u.ap_addr.sa_family = ARPHRD_ETHER; 3389 iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
@@ -3006,34 +3487,6 @@ ieee80211_sta_scan_result(struct net_device *dev,
3006 } 3487 }
3007 } 3488 }
3008 3489
3009 do {
3010 char *buf;
3011
3012 if (!(local->scan_flags & IEEE80211_SCAN_EXTRA_INFO))
3013 break;
3014
3015 buf = kmalloc(100, GFP_ATOMIC);
3016 if (!buf)
3017 break;
3018
3019 memset(&iwe, 0, sizeof(iwe));
3020 iwe.cmd = IWEVCUSTOM;
3021 sprintf(buf, "bcn_int=%d", bss->beacon_int);
3022 iwe.u.data.length = strlen(buf);
3023 current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe,
3024 buf);
3025
3026 memset(&iwe, 0, sizeof(iwe));
3027 iwe.cmd = IWEVCUSTOM;
3028 sprintf(buf, "capab=0x%04x", bss->capability);
3029 iwe.u.data.length = strlen(buf);
3030 current_ev = iwe_stream_add_point(current_ev, end_buf, &iwe,
3031 buf);
3032
3033 kfree(buf);
3034 break;
3035 } while (0);
3036
3037 return current_ev; 3490 return current_ev;
3038} 3491}
3039 3492
@@ -3122,8 +3575,8 @@ int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason)
3122 printk(KERN_DEBUG "%s: deauthenticate(reason=%d)\n", 3575 printk(KERN_DEBUG "%s: deauthenticate(reason=%d)\n",
3123 dev->name, reason); 3576 dev->name, reason);
3124 3577
3125 if (sdata->type != IEEE80211_IF_TYPE_STA && 3578 if (sdata->vif.type != IEEE80211_IF_TYPE_STA &&
3126 sdata->type != IEEE80211_IF_TYPE_IBSS) 3579 sdata->vif.type != IEEE80211_IF_TYPE_IBSS)
3127 return -EINVAL; 3580 return -EINVAL;
3128 3581
3129 ieee80211_send_deauth(dev, ifsta, reason); 3582 ieee80211_send_deauth(dev, ifsta, reason);
@@ -3140,7 +3593,7 @@ int ieee80211_sta_disassociate(struct net_device *dev, u16 reason)
3140 printk(KERN_DEBUG "%s: disassociate(reason=%d)\n", 3593 printk(KERN_DEBUG "%s: disassociate(reason=%d)\n",
3141 dev->name, reason); 3594 dev->name, reason);
3142 3595
3143 if (sdata->type != IEEE80211_IF_TYPE_STA) 3596 if (sdata->vif.type != IEEE80211_IF_TYPE_STA)
3144 return -EINVAL; 3597 return -EINVAL;
3145 3598
3146 if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED)) 3599 if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED))
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 0b2328f7d67c..ed57fb8e82fc 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -49,8 +49,8 @@ static const u8 *get_mac_for_key(struct ieee80211_key *key)
49 * address to indicate a transmit-only key. 49 * address to indicate a transmit-only key.
50 */ 50 */
51 if (key->conf.alg != ALG_WEP && 51 if (key->conf.alg != ALG_WEP &&
52 (key->sdata->type == IEEE80211_IF_TYPE_AP || 52 (key->sdata->vif.type == IEEE80211_IF_TYPE_AP ||
53 key->sdata->type == IEEE80211_IF_TYPE_VLAN)) 53 key->sdata->vif.type == IEEE80211_IF_TYPE_VLAN))
54 addr = zero_addr; 54 addr = zero_addr;
55 55
56 if (key->sta) 56 if (key->sta)
@@ -172,7 +172,7 @@ struct ieee80211_key *ieee80211_key_alloc(struct ieee80211_sub_if_data *sdata,
172 if (sta->flags & WLAN_STA_WME) 172 if (sta->flags & WLAN_STA_WME)
173 key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA; 173 key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA;
174 } else { 174 } else {
175 if (sdata->type == IEEE80211_IF_TYPE_STA) { 175 if (sdata->vif.type == IEEE80211_IF_TYPE_STA) {
176 struct sta_info *ap; 176 struct sta_info *ap;
177 177
178 /* same here, the AP could be using QoS */ 178 /* same here, the AP could be using QoS */
diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h
new file mode 100644
index 000000000000..04afc13ed825
--- /dev/null
+++ b/net/mac80211/rc80211_pid.h
@@ -0,0 +1,285 @@
1/*
2 * Copyright 2007, Mattias Nissler <mattias.nissler@gmx.de>
3 * Copyright 2007, Stefano Brivio <stefano.brivio@polimi.it>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#ifndef RC80211_PID_H
11#define RC80211_PID_H
12
13/* Sampling period for measuring percentage of failed frames in ms. */
14#define RC_PID_INTERVAL 125
15
16/* Exponential averaging smoothness (used for I part of PID controller) */
17#define RC_PID_SMOOTHING_SHIFT 3
18#define RC_PID_SMOOTHING (1 << RC_PID_SMOOTHING_SHIFT)
19
20/* Sharpening factor (used for D part of PID controller) */
21#define RC_PID_SHARPENING_FACTOR 0
22#define RC_PID_SHARPENING_DURATION 0
23
24/* Fixed point arithmetic shifting amount. */
25#define RC_PID_ARITH_SHIFT 8
26
27/* Fixed point arithmetic factor. */
28#define RC_PID_ARITH_FACTOR (1 << RC_PID_ARITH_SHIFT)
29
30/* Proportional PID component coefficient. */
31#define RC_PID_COEFF_P 15
32/* Integral PID component coefficient. */
33#define RC_PID_COEFF_I 9
34/* Derivative PID component coefficient. */
35#define RC_PID_COEFF_D 15
36
37/* Target failed frames rate for the PID controller. NB: This effectively gives
38 * maximum failed frames percentage we're willing to accept. If the wireless
39 * link quality is good, the controller will fail to adjust failed frames
40 * percentage to the target. This is intentional.
41 */
42#define RC_PID_TARGET_PF 14
43
44/* Rate behaviour normalization quantity over time. */
45#define RC_PID_NORM_OFFSET 3
46
47/* Push high rates right after loading. */
48#define RC_PID_FAST_START 0
49
50/* Arithmetic right shift for positive and negative values for ISO C. */
51#define RC_PID_DO_ARITH_RIGHT_SHIFT(x, y) \
52 (x) < 0 ? -((-(x)) >> (y)) : (x) >> (y)
53
54enum rc_pid_event_type {
55 RC_PID_EVENT_TYPE_TX_STATUS,
56 RC_PID_EVENT_TYPE_RATE_CHANGE,
57 RC_PID_EVENT_TYPE_TX_RATE,
58 RC_PID_EVENT_TYPE_PF_SAMPLE,
59};
60
61union rc_pid_event_data {
62 /* RC_PID_EVENT_TX_STATUS */
63 struct {
64 struct ieee80211_tx_status tx_status;
65 };
66 /* RC_PID_EVENT_TYPE_RATE_CHANGE */
67 /* RC_PID_EVENT_TYPE_TX_RATE */
68 struct {
69 int index;
70 int rate;
71 };
72 /* RC_PID_EVENT_TYPE_PF_SAMPLE */
73 struct {
74 s32 pf_sample;
75 s32 prop_err;
76 s32 int_err;
77 s32 der_err;
78 };
79};
80
81struct rc_pid_event {
82 /* The time when the event occured */
83 unsigned long timestamp;
84
85 /* Event ID number */
86 unsigned int id;
87
88 /* Type of event */
89 enum rc_pid_event_type type;
90
91 /* type specific data */
92 union rc_pid_event_data data;
93};
94
95/* Size of the event ring buffer. */
96#define RC_PID_EVENT_RING_SIZE 32
97
98struct rc_pid_event_buffer {
99 /* Counter that generates event IDs */
100 unsigned int ev_count;
101
102 /* Ring buffer of events */
103 struct rc_pid_event ring[RC_PID_EVENT_RING_SIZE];
104
105 /* Index to the entry in events_buf to be reused */
106 unsigned int next_entry;
107
108 /* Lock that guards against concurrent access to this buffer struct */
109 spinlock_t lock;
110
111 /* Wait queue for poll/select and blocking I/O */
112 wait_queue_head_t waitqueue;
113};
114
115struct rc_pid_events_file_info {
116 /* The event buffer we read */
117 struct rc_pid_event_buffer *events;
118
119 /* The entry we have should read next */
120 unsigned int next_entry;
121};
122
123/**
124 * struct rc_pid_debugfs_entries - tunable parameters
125 *
126 * Algorithm parameters, tunable via debugfs.
127 * @dir: the debugfs directory for a specific phy
128 * @target: target percentage for failed frames
129 * @sampling_period: error sampling interval in milliseconds
130 * @coeff_p: absolute value of the proportional coefficient
131 * @coeff_i: absolute value of the integral coefficient
132 * @coeff_d: absolute value of the derivative coefficient
133 * @smoothing_shift: absolute value of the integral smoothing factor (i.e.
134 * amount of smoothing introduced by the exponential moving average)
135 * @sharpen_factor: absolute value of the derivative sharpening factor (i.e.
136 * amount of emphasis given to the derivative term after low activity
137 * events)
138 * @sharpen_duration: duration of the sharpening effect after the detected low
139 * activity event, relative to sampling_period
140 * @norm_offset: amount of normalization periodically performed on the learnt
141 * rate behaviour values (lower means we should trust more what we learnt
142 * about behaviour of rates, higher means we should trust more the natural
143 * ordering of rates)
144 * @fast_start: if Y, push high rates right after initialization
145 */
146struct rc_pid_debugfs_entries {
147 struct dentry *dir;
148 struct dentry *target;
149 struct dentry *sampling_period;
150 struct dentry *coeff_p;
151 struct dentry *coeff_i;
152 struct dentry *coeff_d;
153 struct dentry *smoothing_shift;
154 struct dentry *sharpen_factor;
155 struct dentry *sharpen_duration;
156 struct dentry *norm_offset;
157 struct dentry *fast_start;
158};
159
160void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf,
161 struct ieee80211_tx_status *stat);
162
163void rate_control_pid_event_rate_change(struct rc_pid_event_buffer *buf,
164 int index, int rate);
165
166void rate_control_pid_event_tx_rate(struct rc_pid_event_buffer *buf,
167 int index, int rate);
168
169void rate_control_pid_event_pf_sample(struct rc_pid_event_buffer *buf,
170 s32 pf_sample, s32 prop_err,
171 s32 int_err, s32 der_err);
172
173void rate_control_pid_add_sta_debugfs(void *priv, void *priv_sta,
174 struct dentry *dir);
175
176void rate_control_pid_remove_sta_debugfs(void *priv, void *priv_sta);
177
178struct rc_pid_sta_info {
179 unsigned long last_change;
180 unsigned long last_sample;
181
182 u32 tx_num_failed;
183 u32 tx_num_xmit;
184
185 /* Average failed frames percentage error (i.e. actual vs. target
186 * percentage), scaled by RC_PID_SMOOTHING. This value is computed
187 * using using an exponential weighted average technique:
188 *
189 * (RC_PID_SMOOTHING - 1) * err_avg_old + err
190 * err_avg = ------------------------------------------
191 * RC_PID_SMOOTHING
192 *
193 * where err_avg is the new approximation, err_avg_old the previous one
194 * and err is the error w.r.t. to the current failed frames percentage
195 * sample. Note that the bigger RC_PID_SMOOTHING the more weight is
196 * given to the previous estimate, resulting in smoother behavior (i.e.
197 * corresponding to a longer integration window).
198 *
199 * For computation, we actually don't use the above formula, but this
200 * one:
201 *
202 * err_avg_scaled = err_avg_old_scaled - err_avg_old + err
203 *
204 * where:
205 * err_avg_scaled = err * RC_PID_SMOOTHING
206 * err_avg_old_scaled = err_avg_old * RC_PID_SMOOTHING
207 *
208 * This avoids floating point numbers and the per_failed_old value can
209 * easily be obtained by shifting per_failed_old_scaled right by
210 * RC_PID_SMOOTHING_SHIFT.
211 */
212 s32 err_avg_sc;
213
214 /* Last framed failes percentage sample. */
215 u32 last_pf;
216
217 /* Sharpening needed. */
218 u8 sharp_cnt;
219
220#ifdef CONFIG_MAC80211_DEBUGFS
221 /* Event buffer */
222 struct rc_pid_event_buffer events;
223
224 /* Events debugfs file entry */
225 struct dentry *events_entry;
226#endif
227};
228
229/* Algorithm parameters. We keep them on a per-algorithm approach, so they can
230 * be tuned individually for each interface.
231 */
232struct rc_pid_rateinfo {
233
234 /* Map sorted rates to rates in ieee80211_hw_mode. */
235 int index;
236
237 /* Map rates in ieee80211_hw_mode to sorted rates. */
238 int rev_index;
239
240 /* Did we do any measurement on this rate? */
241 bool valid;
242
243 /* Comparison with the lowest rate. */
244 int diff;
245};
246
247struct rc_pid_info {
248
249 /* The failed frames percentage target. */
250 unsigned int target;
251
252 /* Rate at which failed frames percentage is sampled in 0.001s. */
253 unsigned int sampling_period;
254
255 /* P, I and D coefficients. */
256 int coeff_p;
257 int coeff_i;
258 int coeff_d;
259
260 /* Exponential averaging shift. */
261 unsigned int smoothing_shift;
262
263 /* Sharpening factor and duration. */
264 unsigned int sharpen_factor;
265 unsigned int sharpen_duration;
266
267 /* Normalization offset. */
268 unsigned int norm_offset;
269
270 /* Fast starst parameter. */
271 unsigned int fast_start;
272
273 /* Rates information. */
274 struct rc_pid_rateinfo *rinfo;
275
276 /* Index of the last used rate. */
277 int oldrate;
278
279#ifdef CONFIG_MAC80211_DEBUGFS
280 /* Debugfs entries created for the parameters above. */
281 struct rc_pid_debugfs_entries dentries;
282#endif
283};
284
285#endif /* RC80211_PID_H */
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
new file mode 100644
index 000000000000..554c4baed6fb
--- /dev/null
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -0,0 +1,549 @@
1/*
2 * Copyright 2002-2005, Instant802 Networks, Inc.
3 * Copyright 2005, Devicescape Software, Inc.
4 * Copyright 2007, Mattias Nissler <mattias.nissler@gmx.de>
5 * Copyright 2007, Stefano Brivio <stefano.brivio@polimi.it>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/netdevice.h>
13#include <linux/types.h>
14#include <linux/skbuff.h>
15#include <linux/debugfs.h>
16#include <net/mac80211.h>
17#include "ieee80211_rate.h"
18
19#include "rc80211_pid.h"
20
21
22/* This is an implementation of a TX rate control algorithm that uses a PID
23 * controller. Given a target failed frames rate, the controller decides about
24 * TX rate changes to meet the target failed frames rate.
25 *
26 * The controller basically computes the following:
27 *
28 * adj = CP * err + CI * err_avg + CD * (err - last_err) * (1 + sharpening)
29 *
30 * where
31 * adj adjustment value that is used to switch TX rate (see below)
32 * err current error: target vs. current failed frames percentage
33 * last_err last error
34 * err_avg average (i.e. poor man's integral) of recent errors
35 * sharpening non-zero when fast response is needed (i.e. right after
36 * association or no frames sent for a long time), heading
37 * to zero over time
38 * CP Proportional coefficient
39 * CI Integral coefficient
40 * CD Derivative coefficient
41 *
42 * CP, CI, CD are subject to careful tuning.
43 *
44 * The integral component uses a exponential moving average approach instead of
45 * an actual sliding window. The advantage is that we don't need to keep an
46 * array of the last N error values and computation is easier.
47 *
48 * Once we have the adj value, we map it to a rate by means of a learning
49 * algorithm. This algorithm keeps the state of the percentual failed frames
50 * difference between rates. The behaviour of the lowest available rate is kept
51 * as a reference value, and every time we switch between two rates, we compute
52 * the difference between the failed frames each rate exhibited. By doing so,
53 * we compare behaviours which different rates exhibited in adjacent timeslices,
54 * thus the comparison is minimally affected by external conditions. This
55 * difference gets propagated to the whole set of measurements, so that the
56 * reference is always the same. Periodically, we normalize this set so that
57 * recent events weigh the most. By comparing the adj value with this set, we
58 * avoid pejorative switches to lower rates and allow for switches to higher
59 * rates if they behaved well.
60 *
61 * Note that for the computations we use a fixed-point representation to avoid
62 * floating point arithmetic. Hence, all values are shifted left by
63 * RC_PID_ARITH_SHIFT.
64 */
65
66
67/* Shift the adjustment so that we won't switch to a lower rate if it exhibited
68 * a worse failed frames behaviour and we'll choose the highest rate whose
69 * failed frames behaviour is not worse than the one of the original rate
70 * target. While at it, check that the adjustment is within the ranges. Then,
71 * provide the new rate index. */
72static int rate_control_pid_shift_adjust(struct rc_pid_rateinfo *r,
73 int adj, int cur, int l)
74{
75 int i, j, k, tmp;
76
77 j = r[cur].rev_index;
78 i = j + adj;
79
80 if (i < 0)
81 return r[0].index;
82 if (i >= l - 1)
83 return r[l - 1].index;
84
85 tmp = i;
86
87 if (adj < 0) {
88 for (k = j; k >= i; k--)
89 if (r[k].diff <= r[j].diff)
90 tmp = k;
91 } else {
92 for (k = i + 1; k + i < l; k++)
93 if (r[k].diff <= r[i].diff)
94 tmp = k;
95 }
96
97 return r[tmp].index;
98}
99
100static void rate_control_pid_adjust_rate(struct ieee80211_local *local,
101 struct sta_info *sta, int adj,
102 struct rc_pid_rateinfo *rinfo)
103{
104 struct ieee80211_sub_if_data *sdata;
105 struct ieee80211_hw_mode *mode;
106 int newidx;
107 int maxrate;
108 int back = (adj > 0) ? 1 : -1;
109
110 sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev);
111
112 mode = local->oper_hw_mode;
113 maxrate = sdata->bss ? sdata->bss->max_ratectrl_rateidx : -1;
114
115 newidx = rate_control_pid_shift_adjust(rinfo, adj, sta->txrate,
116 mode->num_rates);
117
118 while (newidx != sta->txrate) {
119 if (rate_supported(sta, mode, newidx) &&
120 (maxrate < 0 || newidx <= maxrate)) {
121 sta->txrate = newidx;
122 break;
123 }
124
125 newidx += back;
126 }
127
128#ifdef CONFIG_MAC80211_DEBUGFS
129 rate_control_pid_event_rate_change(
130 &((struct rc_pid_sta_info *)sta->rate_ctrl_priv)->events,
131 newidx, mode->rates[newidx].rate);
132#endif
133}
134
135/* Normalize the failed frames per-rate differences. */
136static void rate_control_pid_normalize(struct rc_pid_info *pinfo, int l)
137{
138 int i, norm_offset = pinfo->norm_offset;
139 struct rc_pid_rateinfo *r = pinfo->rinfo;
140
141 if (r[0].diff > norm_offset)
142 r[0].diff -= norm_offset;
143 else if (r[0].diff < -norm_offset)
144 r[0].diff += norm_offset;
145 for (i = 0; i < l - 1; i++)
146 if (r[i + 1].diff > r[i].diff + norm_offset)
147 r[i + 1].diff -= norm_offset;
148 else if (r[i + 1].diff <= r[i].diff)
149 r[i + 1].diff += norm_offset;
150}
151
152static void rate_control_pid_sample(struct rc_pid_info *pinfo,
153 struct ieee80211_local *local,
154 struct sta_info *sta)
155{
156 struct rc_pid_sta_info *spinfo = sta->rate_ctrl_priv;
157 struct rc_pid_rateinfo *rinfo = pinfo->rinfo;
158 struct ieee80211_hw_mode *mode;
159 u32 pf;
160 s32 err_avg;
161 u32 err_prop;
162 u32 err_int;
163 u32 err_der;
164 int adj, i, j, tmp;
165 unsigned long period;
166
167 mode = local->oper_hw_mode;
168 spinfo = sta->rate_ctrl_priv;
169
170 /* In case nothing happened during the previous control interval, turn
171 * the sharpening factor on. */
172 period = (HZ * pinfo->sampling_period + 500) / 1000;
173 if (!period)
174 period = 1;
175 if (jiffies - spinfo->last_sample > 2 * period)
176 spinfo->sharp_cnt = pinfo->sharpen_duration;
177
178 spinfo->last_sample = jiffies;
179
180 /* This should never happen, but in case, we assume the old sample is
181 * still a good measurement and copy it. */
182 if (unlikely(spinfo->tx_num_xmit == 0))
183 pf = spinfo->last_pf;
184 else {
185 pf = spinfo->tx_num_failed * 100 / spinfo->tx_num_xmit;
186 pf <<= RC_PID_ARITH_SHIFT;
187 }
188
189 spinfo->tx_num_xmit = 0;
190 spinfo->tx_num_failed = 0;
191
192 /* If we just switched rate, update the rate behaviour info. */
193 if (pinfo->oldrate != sta->txrate) {
194
195 i = rinfo[pinfo->oldrate].rev_index;
196 j = rinfo[sta->txrate].rev_index;
197
198 tmp = (pf - spinfo->last_pf);
199 tmp = RC_PID_DO_ARITH_RIGHT_SHIFT(tmp, RC_PID_ARITH_SHIFT);
200
201 rinfo[j].diff = rinfo[i].diff + tmp;
202 pinfo->oldrate = sta->txrate;
203 }
204 rate_control_pid_normalize(pinfo, mode->num_rates);
205
206 /* Compute the proportional, integral and derivative errors. */
207 err_prop = (pinfo->target << RC_PID_ARITH_SHIFT) - pf;
208
209 err_avg = spinfo->err_avg_sc >> pinfo->smoothing_shift;
210 spinfo->err_avg_sc = spinfo->err_avg_sc - err_avg + err_prop;
211 err_int = spinfo->err_avg_sc >> pinfo->smoothing_shift;
212
213 err_der = (pf - spinfo->last_pf) *
214 (1 + pinfo->sharpen_factor * spinfo->sharp_cnt);
215 spinfo->last_pf = pf;
216 if (spinfo->sharp_cnt)
217 spinfo->sharp_cnt--;
218
219#ifdef CONFIG_MAC80211_DEBUGFS
220 rate_control_pid_event_pf_sample(&spinfo->events, pf, err_prop, err_int,
221 err_der);
222#endif
223
224 /* Compute the controller output. */
225 adj = (err_prop * pinfo->coeff_p + err_int * pinfo->coeff_i
226 + err_der * pinfo->coeff_d);
227 adj = RC_PID_DO_ARITH_RIGHT_SHIFT(adj, 2 * RC_PID_ARITH_SHIFT);
228
229 /* Change rate. */
230 if (adj)
231 rate_control_pid_adjust_rate(local, sta, adj, rinfo);
232}
233
234static void rate_control_pid_tx_status(void *priv, struct net_device *dev,
235 struct sk_buff *skb,
236 struct ieee80211_tx_status *status)
237{
238 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
239 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
240 struct ieee80211_sub_if_data *sdata;
241 struct rc_pid_info *pinfo = priv;
242 struct sta_info *sta;
243 struct rc_pid_sta_info *spinfo;
244 unsigned long period;
245
246 sta = sta_info_get(local, hdr->addr1);
247
248 if (!sta)
249 return;
250
251 /* Don't update the state if we're not controlling the rate. */
252 sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev);
253 if (sdata->bss && sdata->bss->force_unicast_rateidx > -1) {
254 sta->txrate = sdata->bss->max_ratectrl_rateidx;
255 return;
256 }
257
258 /* Ignore all frames that were sent with a different rate than the rate
259 * we currently advise mac80211 to use. */
260 if (status->control.rate != &local->oper_hw_mode->rates[sta->txrate])
261 goto ignore;
262
263 spinfo = sta->rate_ctrl_priv;
264 spinfo->tx_num_xmit++;
265
266#ifdef CONFIG_MAC80211_DEBUGFS
267 rate_control_pid_event_tx_status(&spinfo->events, status);
268#endif
269
270 /* We count frames that totally failed to be transmitted as two bad
271 * frames, those that made it out but had some retries as one good and
272 * one bad frame. */
273 if (status->excessive_retries) {
274 spinfo->tx_num_failed += 2;
275 spinfo->tx_num_xmit++;
276 } else if (status->retry_count) {
277 spinfo->tx_num_failed++;
278 spinfo->tx_num_xmit++;
279 }
280
281 if (status->excessive_retries) {
282 sta->tx_retry_failed++;
283 sta->tx_num_consecutive_failures++;
284 sta->tx_num_mpdu_fail++;
285 } else {
286 sta->last_ack_rssi[0] = sta->last_ack_rssi[1];
287 sta->last_ack_rssi[1] = sta->last_ack_rssi[2];
288 sta->last_ack_rssi[2] = status->ack_signal;
289 sta->tx_num_consecutive_failures = 0;
290 sta->tx_num_mpdu_ok++;
291 }
292 sta->tx_retry_count += status->retry_count;
293 sta->tx_num_mpdu_fail += status->retry_count;
294
295 /* Update PID controller state. */
296 period = (HZ * pinfo->sampling_period + 500) / 1000;
297 if (!period)
298 period = 1;
299 if (time_after(jiffies, spinfo->last_sample + period))
300 rate_control_pid_sample(pinfo, local, sta);
301
302ignore:
303 sta_info_put(sta);
304}
305
306static void rate_control_pid_get_rate(void *priv, struct net_device *dev,
307 struct ieee80211_hw_mode *mode,
308 struct sk_buff *skb,
309 struct rate_selection *sel)
310{
311 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
312 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
313 struct ieee80211_sub_if_data *sdata;
314 struct sta_info *sta;
315 int rateidx;
316 u16 fc;
317
318 sta = sta_info_get(local, hdr->addr1);
319
320 /* Send management frames and broadcast/multicast data using lowest
321 * rate. */
322 fc = le16_to_cpu(hdr->frame_control);
323 if ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA ||
324 is_multicast_ether_addr(hdr->addr1) || !sta) {
325 sel->rate = rate_lowest(local, mode, sta);
326 if (sta)
327 sta_info_put(sta);
328 return;
329 }
330
331 /* If a forced rate is in effect, select it. */
332 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
333 if (sdata->bss && sdata->bss->force_unicast_rateidx > -1)
334 sta->txrate = sdata->bss->force_unicast_rateidx;
335
336 rateidx = sta->txrate;
337
338 if (rateidx >= mode->num_rates)
339 rateidx = mode->num_rates - 1;
340
341 sta->last_txrate = rateidx;
342
343 sta_info_put(sta);
344
345 sel->rate = &mode->rates[rateidx];
346
347#ifdef CONFIG_MAC80211_DEBUGFS
348 rate_control_pid_event_tx_rate(
349 &((struct rc_pid_sta_info *) sta->rate_ctrl_priv)->events,
350 rateidx, mode->rates[rateidx].rate);
351#endif
352}
353
354static void rate_control_pid_rate_init(void *priv, void *priv_sta,
355 struct ieee80211_local *local,
356 struct sta_info *sta)
357{
358 /* TODO: This routine should consider using RSSI from previous packets
359 * as we need to have IEEE 802.1X auth succeed immediately after assoc..
360 * Until that method is implemented, we will use the lowest supported
361 * rate as a workaround. */
362 sta->txrate = rate_lowest_index(local, local->oper_hw_mode, sta);
363}
364
365static void *rate_control_pid_alloc(struct ieee80211_local *local)
366{
367 struct rc_pid_info *pinfo;
368 struct rc_pid_rateinfo *rinfo;
369 struct ieee80211_hw_mode *mode;
370 int i, j, tmp;
371 bool s;
372#ifdef CONFIG_MAC80211_DEBUGFS
373 struct rc_pid_debugfs_entries *de;
374#endif
375
376 pinfo = kmalloc(sizeof(*pinfo), GFP_ATOMIC);
377 if (!pinfo)
378 return NULL;
379
380 /* We can safely assume that oper_hw_mode won't change unless we get
381 * reinitialized. */
382 mode = local->oper_hw_mode;
383 rinfo = kmalloc(sizeof(*rinfo) * mode->num_rates, GFP_ATOMIC);
384 if (!rinfo) {
385 kfree(pinfo);
386 return NULL;
387 }
388
389 /* Sort the rates. This is optimized for the most common case (i.e.
390 * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed
391 * mapping too. */
392 for (i = 0; i < mode->num_rates; i++) {
393 rinfo[i].index = i;
394 rinfo[i].rev_index = i;
395 if (pinfo->fast_start)
396 rinfo[i].diff = 0;
397 else
398 rinfo[i].diff = i * pinfo->norm_offset;
399 }
400 for (i = 1; i < mode->num_rates; i++) {
401 s = 0;
402 for (j = 0; j < mode->num_rates - i; j++)
403 if (unlikely(mode->rates[rinfo[j].index].rate >
404 mode->rates[rinfo[j + 1].index].rate)) {
405 tmp = rinfo[j].index;
406 rinfo[j].index = rinfo[j + 1].index;
407 rinfo[j + 1].index = tmp;
408 rinfo[rinfo[j].index].rev_index = j;
409 rinfo[rinfo[j + 1].index].rev_index = j + 1;
410 s = 1;
411 }
412 if (!s)
413 break;
414 }
415
416 pinfo->target = RC_PID_TARGET_PF;
417 pinfo->sampling_period = RC_PID_INTERVAL;
418 pinfo->coeff_p = RC_PID_COEFF_P;
419 pinfo->coeff_i = RC_PID_COEFF_I;
420 pinfo->coeff_d = RC_PID_COEFF_D;
421 pinfo->smoothing_shift = RC_PID_SMOOTHING_SHIFT;
422 pinfo->sharpen_factor = RC_PID_SHARPENING_FACTOR;
423 pinfo->sharpen_duration = RC_PID_SHARPENING_DURATION;
424 pinfo->norm_offset = RC_PID_NORM_OFFSET;
425 pinfo->fast_start = RC_PID_FAST_START;
426 pinfo->rinfo = rinfo;
427 pinfo->oldrate = 0;
428
429#ifdef CONFIG_MAC80211_DEBUGFS
430 de = &pinfo->dentries;
431 de->dir = debugfs_create_dir("rc80211_pid",
432 local->hw.wiphy->debugfsdir);
433 de->target = debugfs_create_u32("target_pf", S_IRUSR | S_IWUSR,
434 de->dir, &pinfo->target);
435 de->sampling_period = debugfs_create_u32("sampling_period",
436 S_IRUSR | S_IWUSR, de->dir,
437 &pinfo->sampling_period);
438 de->coeff_p = debugfs_create_u32("coeff_p", S_IRUSR | S_IWUSR,
439 de->dir, &pinfo->coeff_p);
440 de->coeff_i = debugfs_create_u32("coeff_i", S_IRUSR | S_IWUSR,
441 de->dir, &pinfo->coeff_i);
442 de->coeff_d = debugfs_create_u32("coeff_d", S_IRUSR | S_IWUSR,
443 de->dir, &pinfo->coeff_d);
444 de->smoothing_shift = debugfs_create_u32("smoothing_shift",
445 S_IRUSR | S_IWUSR, de->dir,
446 &pinfo->smoothing_shift);
447 de->sharpen_factor = debugfs_create_u32("sharpen_factor",
448 S_IRUSR | S_IWUSR, de->dir,
449 &pinfo->sharpen_factor);
450 de->sharpen_duration = debugfs_create_u32("sharpen_duration",
451 S_IRUSR | S_IWUSR, de->dir,
452 &pinfo->sharpen_duration);
453 de->norm_offset = debugfs_create_u32("norm_offset",
454 S_IRUSR | S_IWUSR, de->dir,
455 &pinfo->norm_offset);
456 de->fast_start = debugfs_create_bool("fast_start",
457 S_IRUSR | S_IWUSR, de->dir,
458 &pinfo->fast_start);
459#endif
460
461 return pinfo;
462}
463
464static void rate_control_pid_free(void *priv)
465{
466 struct rc_pid_info *pinfo = priv;
467#ifdef CONFIG_MAC80211_DEBUGFS
468 struct rc_pid_debugfs_entries *de = &pinfo->dentries;
469
470 debugfs_remove(de->fast_start);
471 debugfs_remove(de->norm_offset);
472 debugfs_remove(de->sharpen_duration);
473 debugfs_remove(de->sharpen_factor);
474 debugfs_remove(de->smoothing_shift);
475 debugfs_remove(de->coeff_d);
476 debugfs_remove(de->coeff_i);
477 debugfs_remove(de->coeff_p);
478 debugfs_remove(de->sampling_period);
479 debugfs_remove(de->target);
480 debugfs_remove(de->dir);
481#endif
482
483 kfree(pinfo->rinfo);
484 kfree(pinfo);
485}
486
487static void rate_control_pid_clear(void *priv)
488{
489}
490
491static void *rate_control_pid_alloc_sta(void *priv, gfp_t gfp)
492{
493 struct rc_pid_sta_info *spinfo;
494
495 spinfo = kzalloc(sizeof(*spinfo), gfp);
496 if (spinfo == NULL)
497 return NULL;
498
499 spinfo->last_sample = jiffies;
500
501#ifdef CONFIG_MAC80211_DEBUGFS
502 spin_lock_init(&spinfo->events.lock);
503 init_waitqueue_head(&spinfo->events.waitqueue);
504#endif
505
506 return spinfo;
507}
508
509static void rate_control_pid_free_sta(void *priv, void *priv_sta)
510{
511 struct rc_pid_sta_info *spinfo = priv_sta;
512 kfree(spinfo);
513}
514
515static struct rate_control_ops mac80211_rcpid = {
516 .name = "pid",
517 .tx_status = rate_control_pid_tx_status,
518 .get_rate = rate_control_pid_get_rate,
519 .rate_init = rate_control_pid_rate_init,
520 .clear = rate_control_pid_clear,
521 .alloc = rate_control_pid_alloc,
522 .free = rate_control_pid_free,
523 .alloc_sta = rate_control_pid_alloc_sta,
524 .free_sta = rate_control_pid_free_sta,
525#ifdef CONFIG_MAC80211_DEBUGFS
526 .add_sta_debugfs = rate_control_pid_add_sta_debugfs,
527 .remove_sta_debugfs = rate_control_pid_remove_sta_debugfs,
528#endif
529};
530
531MODULE_DESCRIPTION("PID controller based rate control algorithm");
532MODULE_AUTHOR("Stefano Brivio");
533MODULE_AUTHOR("Mattias Nissler");
534MODULE_LICENSE("GPL");
535
536int __init rc80211_pid_init(void)
537{
538 return ieee80211_rate_control_register(&mac80211_rcpid);
539}
540
541void __exit rc80211_pid_exit(void)
542{
543 ieee80211_rate_control_unregister(&mac80211_rcpid);
544}
545
546#ifdef CONFIG_MAC80211_RC_PID_MODULE
547module_init(rc80211_pid_init);
548module_exit(rc80211_pid_exit);
549#endif
diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c
new file mode 100644
index 000000000000..88b8dc9999bb
--- /dev/null
+++ b/net/mac80211/rc80211_pid_debugfs.c
@@ -0,0 +1,223 @@
1/*
2 * Copyright 2007, Mattias Nissler <mattias.nissler@gmx.de>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/spinlock.h>
10#include <linux/poll.h>
11#include <linux/netdevice.h>
12#include <linux/types.h>
13#include <linux/skbuff.h>
14
15#include <net/mac80211.h>
16#include "ieee80211_rate.h"
17
18#include "rc80211_pid.h"
19
20static void rate_control_pid_event(struct rc_pid_event_buffer *buf,
21 enum rc_pid_event_type type,
22 union rc_pid_event_data *data)
23{
24 struct rc_pid_event *ev;
25 unsigned long status;
26
27 spin_lock_irqsave(&buf->lock, status);
28 ev = &(buf->ring[buf->next_entry]);
29 buf->next_entry = (buf->next_entry + 1) % RC_PID_EVENT_RING_SIZE;
30
31 ev->timestamp = jiffies;
32 ev->id = buf->ev_count++;
33 ev->type = type;
34 ev->data = *data;
35
36 spin_unlock_irqrestore(&buf->lock, status);
37
38 wake_up_all(&buf->waitqueue);
39}
40
41void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf,
42 struct ieee80211_tx_status *stat)
43{
44 union rc_pid_event_data evd;
45
46 memcpy(&evd.tx_status, stat, sizeof(struct ieee80211_tx_status));
47 rate_control_pid_event(buf, RC_PID_EVENT_TYPE_TX_STATUS, &evd);
48}
49
50void rate_control_pid_event_rate_change(struct rc_pid_event_buffer *buf,
51 int index, int rate)
52{
53 union rc_pid_event_data evd;
54
55 evd.index = index;
56 evd.rate = rate;
57 rate_control_pid_event(buf, RC_PID_EVENT_TYPE_RATE_CHANGE, &evd);
58}
59
60void rate_control_pid_event_tx_rate(struct rc_pid_event_buffer *buf,
61 int index, int rate)
62{
63 union rc_pid_event_data evd;
64
65 evd.index = index;
66 evd.rate = rate;
67 rate_control_pid_event(buf, RC_PID_EVENT_TYPE_TX_RATE, &evd);
68}
69
70void rate_control_pid_event_pf_sample(struct rc_pid_event_buffer *buf,
71 s32 pf_sample, s32 prop_err,
72 s32 int_err, s32 der_err)
73{
74 union rc_pid_event_data evd;
75
76 evd.pf_sample = pf_sample;
77 evd.prop_err = prop_err;
78 evd.int_err = int_err;
79 evd.der_err = der_err;
80 rate_control_pid_event(buf, RC_PID_EVENT_TYPE_PF_SAMPLE, &evd);
81}
82
83static int rate_control_pid_events_open(struct inode *inode, struct file *file)
84{
85 struct rc_pid_sta_info *sinfo = inode->i_private;
86 struct rc_pid_event_buffer *events = &sinfo->events;
87 struct rc_pid_events_file_info *file_info;
88 unsigned int status;
89
90 /* Allocate a state struct */
91 file_info = kmalloc(sizeof(*file_info), GFP_KERNEL);
92 if (file_info == NULL)
93 return -ENOMEM;
94
95 spin_lock_irqsave(&events->lock, status);
96
97 file_info->next_entry = events->next_entry;
98 file_info->events = events;
99
100 spin_unlock_irqrestore(&events->lock, status);
101
102 file->private_data = file_info;
103
104 return 0;
105}
106
107static int rate_control_pid_events_release(struct inode *inode,
108 struct file *file)
109{
110 struct rc_pid_events_file_info *file_info = file->private_data;
111
112 kfree(file_info);
113
114 return 0;
115}
116
117static unsigned int rate_control_pid_events_poll(struct file *file,
118 poll_table *wait)
119{
120 struct rc_pid_events_file_info *file_info = file->private_data;
121
122 poll_wait(file, &file_info->events->waitqueue, wait);
123
124 return POLLIN | POLLRDNORM;
125}
126
127#define RC_PID_PRINT_BUF_SIZE 64
128
129static ssize_t rate_control_pid_events_read(struct file *file, char __user *buf,
130 size_t length, loff_t *offset)
131{
132 struct rc_pid_events_file_info *file_info = file->private_data;
133 struct rc_pid_event_buffer *events = file_info->events;
134 struct rc_pid_event *ev;
135 char pb[RC_PID_PRINT_BUF_SIZE];
136 int ret;
137 int p;
138 unsigned int status;
139
140 /* Check if there is something to read. */
141 if (events->next_entry == file_info->next_entry) {
142 if (file->f_flags & O_NONBLOCK)
143 return -EAGAIN;
144
145 /* Wait */
146 ret = wait_event_interruptible(events->waitqueue,
147 events->next_entry != file_info->next_entry);
148
149 if (ret)
150 return ret;
151 }
152
153 /* Write out one event per call. I don't care whether it's a little
154 * inefficient, this is debugging code anyway. */
155 spin_lock_irqsave(&events->lock, status);
156
157 /* Get an event */
158 ev = &(events->ring[file_info->next_entry]);
159 file_info->next_entry = (file_info->next_entry + 1) %
160 RC_PID_EVENT_RING_SIZE;
161
162 /* Print information about the event. Note that userpace needs to
163 * provide large enough buffers. */
164 length = length < RC_PID_PRINT_BUF_SIZE ?
165 length : RC_PID_PRINT_BUF_SIZE;
166 p = snprintf(pb, length, "%u %lu ", ev->id, ev->timestamp);
167 switch (ev->type) {
168 case RC_PID_EVENT_TYPE_TX_STATUS:
169 p += snprintf(pb + p, length - p, "tx_status %u %u",
170 ev->data.tx_status.excessive_retries,
171 ev->data.tx_status.retry_count);
172 break;
173 case RC_PID_EVENT_TYPE_RATE_CHANGE:
174 p += snprintf(pb + p, length - p, "rate_change %d %d",
175 ev->data.index, ev->data.rate);
176 break;
177 case RC_PID_EVENT_TYPE_TX_RATE:
178 p += snprintf(pb + p, length - p, "tx_rate %d %d",
179 ev->data.index, ev->data.rate);
180 break;
181 case RC_PID_EVENT_TYPE_PF_SAMPLE:
182 p += snprintf(pb + p, length - p,
183 "pf_sample %d %d %d %d",
184 ev->data.pf_sample, ev->data.prop_err,
185 ev->data.int_err, ev->data.der_err);
186 break;
187 }
188 p += snprintf(pb + p, length - p, "\n");
189
190 spin_unlock_irqrestore(&events->lock, status);
191
192 if (copy_to_user(buf, pb, p))
193 return -EFAULT;
194
195 return p;
196}
197
198#undef RC_PID_PRINT_BUF_SIZE
199
200static struct file_operations rc_pid_fop_events = {
201 .owner = THIS_MODULE,
202 .read = rate_control_pid_events_read,
203 .poll = rate_control_pid_events_poll,
204 .open = rate_control_pid_events_open,
205 .release = rate_control_pid_events_release,
206};
207
208void rate_control_pid_add_sta_debugfs(void *priv, void *priv_sta,
209 struct dentry *dir)
210{
211 struct rc_pid_sta_info *spinfo = priv_sta;
212
213 spinfo->events_entry = debugfs_create_file("rc_pid_events", S_IRUGO,
214 dir, spinfo,
215 &rc_pid_fop_events);
216}
217
218void rate_control_pid_remove_sta_debugfs(void *priv, void *priv_sta)
219{
220 struct rc_pid_sta_info *spinfo = priv_sta;
221
222 debugfs_remove(spinfo->events_entry);
223}
diff --git a/net/mac80211/rc80211_simple.c b/net/mac80211/rc80211_simple.c
index da72737364e4..934676d687d6 100644
--- a/net/mac80211/rc80211_simple.c
+++ b/net/mac80211/rc80211_simple.c
@@ -13,6 +13,7 @@
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <linux/compiler.h> 15#include <linux/compiler.h>
16#include <linux/module.h>
16 17
17#include <net/mac80211.h> 18#include <net/mac80211.h>
18#include "ieee80211_i.h" 19#include "ieee80211_i.h"
@@ -23,6 +24,8 @@
23/* This is a minimal implementation of TX rate controlling that can be used 24/* This is a minimal implementation of TX rate controlling that can be used
24 * as the default when no improved mechanisms are available. */ 25 * as the default when no improved mechanisms are available. */
25 26
27#define RATE_CONTROL_NUM_DOWN 20
28#define RATE_CONTROL_NUM_UP 15
26 29
27#define RATE_CONTROL_EMERG_DEC 2 30#define RATE_CONTROL_EMERG_DEC 2
28#define RATE_CONTROL_INTERVAL (HZ / 20) 31#define RATE_CONTROL_INTERVAL (HZ / 20)
@@ -87,26 +90,6 @@ static void rate_control_rate_dec(struct ieee80211_local *local,
87 } 90 }
88} 91}
89 92
90
91static struct ieee80211_rate *
92rate_control_lowest_rate(struct ieee80211_local *local,
93 struct ieee80211_hw_mode *mode)
94{
95 int i;
96
97 for (i = 0; i < mode->num_rates; i++) {
98 struct ieee80211_rate *rate = &mode->rates[i];
99
100 if (rate->flags & IEEE80211_RATE_SUPPORTED)
101 return rate;
102 }
103
104 printk(KERN_DEBUG "rate_control_lowest_rate - no supported rates "
105 "found\n");
106 return &mode->rates[0];
107}
108
109
110struct global_rate_control { 93struct global_rate_control {
111 int dummy; 94 int dummy;
112}; 95};
@@ -216,35 +199,33 @@ static void rate_control_simple_tx_status(void *priv, struct net_device *dev,
216} 199}
217 200
218 201
219static struct ieee80211_rate * 202static void
220rate_control_simple_get_rate(void *priv, struct net_device *dev, 203rate_control_simple_get_rate(void *priv, struct net_device *dev,
204 struct ieee80211_hw_mode *mode,
221 struct sk_buff *skb, 205 struct sk_buff *skb,
222 struct rate_control_extra *extra) 206 struct rate_selection *sel)
223{ 207{
224 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 208 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
225 struct ieee80211_sub_if_data *sdata;
226 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; 209 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
227 struct ieee80211_hw_mode *mode = extra->mode; 210 struct ieee80211_sub_if_data *sdata;
228 struct sta_info *sta; 211 struct sta_info *sta;
229 int rateidx, nonerp_idx; 212 int rateidx;
230 u16 fc; 213 u16 fc;
231 214
232 memset(extra, 0, sizeof(*extra)); 215 sta = sta_info_get(local, hdr->addr1);
233 216
217 /* Send management frames and broadcast/multicast data using lowest
218 * rate. */
234 fc = le16_to_cpu(hdr->frame_control); 219 fc = le16_to_cpu(hdr->frame_control);
235 if ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA || 220 if ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA ||
236 (hdr->addr1[0] & 0x01)) { 221 is_multicast_ether_addr(hdr->addr1) || !sta) {
237 /* Send management frames and broadcast/multicast data using 222 sel->rate = rate_lowest(local, mode, sta);
238 * lowest rate. */ 223 if (sta)
239 /* TODO: this could probably be improved.. */ 224 sta_info_put(sta);
240 return rate_control_lowest_rate(local, mode); 225 return;
241 } 226 }
242 227
243 sta = sta_info_get(local, hdr->addr1); 228 /* If a forced rate is in effect, select it. */
244
245 if (!sta)
246 return rate_control_lowest_rate(local, mode);
247
248 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 229 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
249 if (sdata->bss && sdata->bss->force_unicast_rateidx > -1) 230 if (sdata->bss && sdata->bss->force_unicast_rateidx > -1)
250 sta->txrate = sdata->bss->force_unicast_rateidx; 231 sta->txrate = sdata->bss->force_unicast_rateidx;
@@ -255,17 +236,10 @@ rate_control_simple_get_rate(void *priv, struct net_device *dev,
255 rateidx = mode->num_rates - 1; 236 rateidx = mode->num_rates - 1;
256 237
257 sta->last_txrate = rateidx; 238 sta->last_txrate = rateidx;
258 nonerp_idx = rateidx;
259 while (nonerp_idx > 0 &&
260 ((mode->rates[nonerp_idx].flags & IEEE80211_RATE_ERP) ||
261 !(mode->rates[nonerp_idx].flags & IEEE80211_RATE_SUPPORTED) ||
262 !(sta->supp_rates & BIT(nonerp_idx))))
263 nonerp_idx--;
264 extra->nonerp = &mode->rates[nonerp_idx];
265 239
266 sta_info_put(sta); 240 sta_info_put(sta);
267 241
268 return &mode->rates[rateidx]; 242 sel->rate = &mode->rates[rateidx];
269} 243}
270 244
271 245
@@ -391,7 +365,7 @@ static void rate_control_simple_remove_sta_debugfs(void *priv, void *priv_sta)
391} 365}
392#endif 366#endif
393 367
394struct rate_control_ops mac80211_rcsimple = { 368static struct rate_control_ops mac80211_rcsimple = {
395 .name = "simple", 369 .name = "simple",
396 .tx_status = rate_control_simple_tx_status, 370 .tx_status = rate_control_simple_tx_status,
397 .get_rate = rate_control_simple_get_rate, 371 .get_rate = rate_control_simple_get_rate,
@@ -406,3 +380,21 @@ struct rate_control_ops mac80211_rcsimple = {
406 .remove_sta_debugfs = rate_control_simple_remove_sta_debugfs, 380 .remove_sta_debugfs = rate_control_simple_remove_sta_debugfs,
407#endif 381#endif
408}; 382};
383
384MODULE_LICENSE("GPL");
385MODULE_DESCRIPTION("Simple rate control algorithm");
386
387int __init rc80211_simple_init(void)
388{
389 return ieee80211_rate_control_register(&mac80211_rcsimple);
390}
391
392void __exit rc80211_simple_exit(void)
393{
394 ieee80211_rate_control_unregister(&mac80211_rcsimple);
395}
396
397#ifdef CONFIG_MAC80211_RC_SIMPLE_MODULE
398module_init(rc80211_simple_init);
399module_exit(rc80211_simple_exit);
400#endif
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 00f908d9275e..89e1e3070ec1 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -24,6 +24,10 @@
24#include "tkip.h" 24#include "tkip.h"
25#include "wme.h" 25#include "wme.h"
26 26
27u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
28 struct tid_ampdu_rx *tid_agg_rx,
29 struct sk_buff *skb, u16 mpdu_seq_num,
30 int bar_req);
27/* 31/*
28 * monitor mode reception 32 * monitor mode reception
29 * 33 *
@@ -61,8 +65,12 @@ static inline int should_drop_frame(struct ieee80211_rx_status *status,
61 return 1; 65 return 1;
62 if (unlikely(skb->len < 16 + present_fcs_len + radiotap_len)) 66 if (unlikely(skb->len < 16 + present_fcs_len + radiotap_len))
63 return 1; 67 return 1;
64 if ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_FTYPE)) == 68 if (((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_FTYPE)) ==
65 cpu_to_le16(IEEE80211_FTYPE_CTL)) 69 cpu_to_le16(IEEE80211_FTYPE_CTL)) &&
70 ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_STYPE)) !=
71 cpu_to_le16(IEEE80211_STYPE_PSPOLL)) &&
72 ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_STYPE)) !=
73 cpu_to_le16(IEEE80211_STYPE_BACK_REQ)))
66 return 1; 74 return 1;
67 return 0; 75 return 0;
68} 76}
@@ -79,8 +87,9 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
79 struct ieee80211_sub_if_data *sdata; 87 struct ieee80211_sub_if_data *sdata;
80 struct ieee80211_rate *rate; 88 struct ieee80211_rate *rate;
81 int needed_headroom = 0; 89 int needed_headroom = 0;
82 struct ieee80211_rtap_hdr { 90 struct ieee80211_radiotap_header *rthdr;
83 struct ieee80211_radiotap_header hdr; 91 __le64 *rttsft = NULL;
92 struct ieee80211_rtap_fixed_data {
84 u8 flags; 93 u8 flags;
85 u8 rate; 94 u8 rate;
86 __le16 chan_freq; 95 __le16 chan_freq;
@@ -88,7 +97,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
88 u8 antsignal; 97 u8 antsignal;
89 u8 padding_for_rxflags; 98 u8 padding_for_rxflags;
90 __le16 rx_flags; 99 __le16 rx_flags;
91 } __attribute__ ((packed)) *rthdr; 100 } __attribute__ ((packed)) *rtfixed;
92 struct sk_buff *skb, *skb2; 101 struct sk_buff *skb, *skb2;
93 struct net_device *prev_dev = NULL; 102 struct net_device *prev_dev = NULL;
94 int present_fcs_len = 0; 103 int present_fcs_len = 0;
@@ -105,7 +114,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
105 if (status->flag & RX_FLAG_RADIOTAP) 114 if (status->flag & RX_FLAG_RADIOTAP)
106 rtap_len = ieee80211_get_radiotap_len(origskb->data); 115 rtap_len = ieee80211_get_radiotap_len(origskb->data);
107 else 116 else
108 needed_headroom = sizeof(*rthdr); 117 /* room for radiotap header, always present fields and TSFT */
118 needed_headroom = sizeof(*rthdr) + sizeof(*rtfixed) + 8;
109 119
110 if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) 120 if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)
111 present_fcs_len = FCS_LEN; 121 present_fcs_len = FCS_LEN;
@@ -133,7 +143,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
133 * them allocate enough headroom to start with. 143 * them allocate enough headroom to start with.
134 */ 144 */
135 if (skb_headroom(skb) < needed_headroom && 145 if (skb_headroom(skb) < needed_headroom &&
136 pskb_expand_head(skb, sizeof(*rthdr), 0, GFP_ATOMIC)) { 146 pskb_expand_head(skb, needed_headroom, 0, GFP_ATOMIC)) {
137 dev_kfree_skb(skb); 147 dev_kfree_skb(skb);
138 return NULL; 148 return NULL;
139 } 149 }
@@ -152,45 +162,59 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
152 162
153 /* if necessary, prepend radiotap information */ 163 /* if necessary, prepend radiotap information */
154 if (!(status->flag & RX_FLAG_RADIOTAP)) { 164 if (!(status->flag & RX_FLAG_RADIOTAP)) {
165 rtfixed = (void *) skb_push(skb, sizeof(*rtfixed));
166 rtap_len = sizeof(*rthdr) + sizeof(*rtfixed);
167 if (status->flag & RX_FLAG_TSFT) {
168 rttsft = (void *) skb_push(skb, sizeof(*rttsft));
169 rtap_len += 8;
170 }
155 rthdr = (void *) skb_push(skb, sizeof(*rthdr)); 171 rthdr = (void *) skb_push(skb, sizeof(*rthdr));
156 memset(rthdr, 0, sizeof(*rthdr)); 172 memset(rthdr, 0, sizeof(*rthdr));
157 rthdr->hdr.it_len = cpu_to_le16(sizeof(*rthdr)); 173 memset(rtfixed, 0, sizeof(*rtfixed));
158 rthdr->hdr.it_present = 174 rthdr->it_present =
159 cpu_to_le32((1 << IEEE80211_RADIOTAP_FLAGS) | 175 cpu_to_le32((1 << IEEE80211_RADIOTAP_FLAGS) |
160 (1 << IEEE80211_RADIOTAP_RATE) | 176 (1 << IEEE80211_RADIOTAP_RATE) |
161 (1 << IEEE80211_RADIOTAP_CHANNEL) | 177 (1 << IEEE80211_RADIOTAP_CHANNEL) |
162 (1 << IEEE80211_RADIOTAP_DB_ANTSIGNAL) | 178 (1 << IEEE80211_RADIOTAP_DB_ANTSIGNAL) |
163 (1 << IEEE80211_RADIOTAP_RX_FLAGS)); 179 (1 << IEEE80211_RADIOTAP_RX_FLAGS));
164 rthdr->flags = local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS ? 180 rtfixed->flags = 0;
165 IEEE80211_RADIOTAP_F_FCS : 0; 181 if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)
182 rtfixed->flags |= IEEE80211_RADIOTAP_F_FCS;
183
184 if (rttsft) {
185 *rttsft = cpu_to_le64(status->mactime);
186 rthdr->it_present |=
187 cpu_to_le32(1 << IEEE80211_RADIOTAP_TSFT);
188 }
166 189
167 /* FIXME: when radiotap gets a 'bad PLCP' flag use it here */ 190 /* FIXME: when radiotap gets a 'bad PLCP' flag use it here */
168 rthdr->rx_flags = 0; 191 rtfixed->rx_flags = 0;
169 if (status->flag & 192 if (status->flag &
170 (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) 193 (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC))
171 rthdr->rx_flags |= 194 rtfixed->rx_flags |=
172 cpu_to_le16(IEEE80211_RADIOTAP_F_RX_BADFCS); 195 cpu_to_le16(IEEE80211_RADIOTAP_F_RX_BADFCS);
173 196
174 rate = ieee80211_get_rate(local, status->phymode, 197 rate = ieee80211_get_rate(local, status->phymode,
175 status->rate); 198 status->rate);
176 if (rate) 199 if (rate)
177 rthdr->rate = rate->rate / 5; 200 rtfixed->rate = rate->rate / 5;
178 201
179 rthdr->chan_freq = cpu_to_le16(status->freq); 202 rtfixed->chan_freq = cpu_to_le16(status->freq);
180 203
181 if (status->phymode == MODE_IEEE80211A) 204 if (status->phymode == MODE_IEEE80211A)
182 rthdr->chan_flags = 205 rtfixed->chan_flags =
183 cpu_to_le16(IEEE80211_CHAN_OFDM | 206 cpu_to_le16(IEEE80211_CHAN_OFDM |
184 IEEE80211_CHAN_5GHZ); 207 IEEE80211_CHAN_5GHZ);
185 else 208 else
186 rthdr->chan_flags = 209 rtfixed->chan_flags =
187 cpu_to_le16(IEEE80211_CHAN_DYN | 210 cpu_to_le16(IEEE80211_CHAN_DYN |
188 IEEE80211_CHAN_2GHZ); 211 IEEE80211_CHAN_2GHZ);
189 212
190 rthdr->antsignal = status->ssi; 213 rtfixed->antsignal = status->ssi;
214 rthdr->it_len = cpu_to_le16(rtap_len);
191 } 215 }
192 216
193 skb_set_mac_header(skb, 0); 217 skb_reset_mac_header(skb);
194 skb->ip_summed = CHECKSUM_UNNECESSARY; 218 skb->ip_summed = CHECKSUM_UNNECESSARY;
195 skb->pkt_type = PACKET_OTHERHOST; 219 skb->pkt_type = PACKET_OTHERHOST;
196 skb->protocol = htons(ETH_P_802_2); 220 skb->protocol = htons(ETH_P_802_2);
@@ -199,7 +223,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
199 if (!netif_running(sdata->dev)) 223 if (!netif_running(sdata->dev))
200 continue; 224 continue;
201 225
202 if (sdata->type != IEEE80211_IF_TYPE_MNTR) 226 if (sdata->vif.type != IEEE80211_IF_TYPE_MNTR)
203 continue; 227 continue;
204 228
205 if (prev_dev) { 229 if (prev_dev) {
@@ -243,6 +267,10 @@ ieee80211_rx_h_parse_qos(struct ieee80211_txrx_data *rx)
243 u8 *qc = data + ieee80211_get_hdrlen(rx->fc) - QOS_CONTROL_LEN; 267 u8 *qc = data + ieee80211_get_hdrlen(rx->fc) - QOS_CONTROL_LEN;
244 /* frame has qos control */ 268 /* frame has qos control */
245 tid = qc[0] & QOS_CONTROL_TID_MASK; 269 tid = qc[0] & QOS_CONTROL_TID_MASK;
270 if (qc[0] & IEEE80211_QOS_CONTROL_A_MSDU_PRESENT)
271 rx->flags |= IEEE80211_TXRXD_RX_AMSDU;
272 else
273 rx->flags &= ~IEEE80211_TXRXD_RX_AMSDU;
246 } else { 274 } else {
247 if (unlikely((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT)) { 275 if (unlikely((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT)) {
248 /* Separate TID for management frames */ 276 /* Separate TID for management frames */
@@ -266,11 +294,11 @@ ieee80211_rx_h_parse_qos(struct ieee80211_txrx_data *rx)
266 return TXRX_CONTINUE; 294 return TXRX_CONTINUE;
267} 295}
268 296
269static ieee80211_txrx_result 297
270ieee80211_rx_h_load_stats(struct ieee80211_txrx_data *rx) 298static u32 ieee80211_rx_load_stats(struct ieee80211_local *local,
299 struct sk_buff *skb,
300 struct ieee80211_rx_status *status)
271{ 301{
272 struct ieee80211_local *local = rx->local;
273 struct sk_buff *skb = rx->skb;
274 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; 302 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
275 u32 load = 0, hdrtime; 303 u32 load = 0, hdrtime;
276 struct ieee80211_rate *rate; 304 struct ieee80211_rate *rate;
@@ -284,7 +312,7 @@ ieee80211_rx_h_load_stats(struct ieee80211_txrx_data *rx)
284 312
285 rate = &mode->rates[0]; 313 rate = &mode->rates[0];
286 for (i = 0; i < mode->num_rates; i++) { 314 for (i = 0; i < mode->num_rates; i++) {
287 if (mode->rates[i].val == rx->u.rx.status->rate) { 315 if (mode->rates[i].val == status->rate) {
288 rate = &mode->rates[i]; 316 rate = &mode->rates[i];
289 break; 317 break;
290 } 318 }
@@ -308,16 +336,13 @@ ieee80211_rx_h_load_stats(struct ieee80211_txrx_data *rx)
308 336
309 /* Divide channel_use by 8 to avoid wrapping around the counter */ 337 /* Divide channel_use by 8 to avoid wrapping around the counter */
310 load >>= CHAN_UTIL_SHIFT; 338 load >>= CHAN_UTIL_SHIFT;
311 local->channel_use_raw += load;
312 rx->u.rx.load = load;
313 339
314 return TXRX_CONTINUE; 340 return load;
315} 341}
316 342
317ieee80211_rx_handler ieee80211_rx_pre_handlers[] = 343ieee80211_rx_handler ieee80211_rx_pre_handlers[] =
318{ 344{
319 ieee80211_rx_h_parse_qos, 345 ieee80211_rx_h_parse_qos,
320 ieee80211_rx_h_load_stats,
321 NULL 346 NULL
322}; 347};
323 348
@@ -338,8 +363,14 @@ ieee80211_rx_h_passive_scan(struct ieee80211_txrx_data *rx)
338 struct ieee80211_local *local = rx->local; 363 struct ieee80211_local *local = rx->local;
339 struct sk_buff *skb = rx->skb; 364 struct sk_buff *skb = rx->skb;
340 365
341 if (unlikely(local->sta_scanning != 0)) { 366 if (unlikely(local->sta_hw_scanning))
342 ieee80211_sta_rx_scan(rx->dev, skb, rx->u.rx.status); 367 return ieee80211_sta_rx_scan(rx->dev, skb, rx->u.rx.status);
368
369 if (unlikely(local->sta_sw_scanning)) {
370 /* drop all the other packets during a software scan anyway */
371 if (ieee80211_sta_rx_scan(rx->dev, skb, rx->u.rx.status)
372 != TXRX_QUEUED)
373 dev_kfree_skb(skb);
343 return TXRX_QUEUED; 374 return TXRX_QUEUED;
344 } 375 }
345 376
@@ -377,18 +408,6 @@ ieee80211_rx_h_check(struct ieee80211_txrx_data *rx)
377 return TXRX_DROP; 408 return TXRX_DROP;
378 } 409 }
379 410
380 if (!(rx->flags & IEEE80211_TXRXD_RXRA_MATCH))
381 rx->skb->pkt_type = PACKET_OTHERHOST;
382 else if (compare_ether_addr(rx->dev->dev_addr, hdr->addr1) == 0)
383 rx->skb->pkt_type = PACKET_HOST;
384 else if (is_multicast_ether_addr(hdr->addr1)) {
385 if (is_broadcast_ether_addr(hdr->addr1))
386 rx->skb->pkt_type = PACKET_BROADCAST;
387 else
388 rx->skb->pkt_type = PACKET_MULTICAST;
389 } else
390 rx->skb->pkt_type = PACKET_OTHERHOST;
391
392 /* Drop disallowed frame classes based on STA auth/assoc state; 411 /* Drop disallowed frame classes based on STA auth/assoc state;
393 * IEEE 802.11, Chap 5.5. 412 * IEEE 802.11, Chap 5.5.
394 * 413 *
@@ -400,7 +419,7 @@ ieee80211_rx_h_check(struct ieee80211_txrx_data *rx)
400 if (unlikely(((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA || 419 if (unlikely(((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA ||
401 ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL && 420 ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL &&
402 (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)) && 421 (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)) &&
403 rx->sdata->type != IEEE80211_IF_TYPE_IBSS && 422 rx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
404 (!rx->sta || !(rx->sta->flags & WLAN_STA_ASSOC)))) { 423 (!rx->sta || !(rx->sta->flags & WLAN_STA_ASSOC)))) {
405 if ((!(rx->fc & IEEE80211_FCTL_FROMDS) && 424 if ((!(rx->fc & IEEE80211_FCTL_FROMDS) &&
406 !(rx->fc & IEEE80211_FCTL_TODS) && 425 !(rx->fc & IEEE80211_FCTL_TODS) &&
@@ -620,13 +639,14 @@ ieee80211_rx_h_sta_process(struct ieee80211_txrx_data *rx)
620 /* Update last_rx only for IBSS packets which are for the current 639 /* Update last_rx only for IBSS packets which are for the current
621 * BSSID to avoid keeping the current IBSS network alive in cases where 640 * BSSID to avoid keeping the current IBSS network alive in cases where
622 * other STAs are using different BSSID. */ 641 * other STAs are using different BSSID. */
623 if (rx->sdata->type == IEEE80211_IF_TYPE_IBSS) { 642 if (rx->sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
624 u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len); 643 u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len,
644 IEEE80211_IF_TYPE_IBSS);
625 if (compare_ether_addr(bssid, rx->sdata->u.sta.bssid) == 0) 645 if (compare_ether_addr(bssid, rx->sdata->u.sta.bssid) == 0)
626 sta->last_rx = jiffies; 646 sta->last_rx = jiffies;
627 } else 647 } else
628 if (!is_multicast_ether_addr(hdr->addr1) || 648 if (!is_multicast_ether_addr(hdr->addr1) ||
629 rx->sdata->type == IEEE80211_IF_TYPE_STA) { 649 rx->sdata->vif.type == IEEE80211_IF_TYPE_STA) {
630 /* Update last_rx only for unicast frames in order to prevent 650 /* Update last_rx only for unicast frames in order to prevent
631 * the Probe Request frames (the only broadcast frames from a 651 * the Probe Request frames (the only broadcast frames from a
632 * STA in infrastructure mode) from keeping a connection alive. 652 * STA in infrastructure mode) from keeping a connection alive.
@@ -870,6 +890,7 @@ ieee80211_rx_h_defragment(struct ieee80211_txrx_data *rx)
870static ieee80211_txrx_result 890static ieee80211_txrx_result
871ieee80211_rx_h_ps_poll(struct ieee80211_txrx_data *rx) 891ieee80211_rx_h_ps_poll(struct ieee80211_txrx_data *rx)
872{ 892{
893 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev);
873 struct sk_buff *skb; 894 struct sk_buff *skb;
874 int no_pending_pkts; 895 int no_pending_pkts;
875 DECLARE_MAC_BUF(mac); 896 DECLARE_MAC_BUF(mac);
@@ -880,6 +901,10 @@ ieee80211_rx_h_ps_poll(struct ieee80211_txrx_data *rx)
880 !(rx->flags & IEEE80211_TXRXD_RXRA_MATCH))) 901 !(rx->flags & IEEE80211_TXRXD_RXRA_MATCH)))
881 return TXRX_CONTINUE; 902 return TXRX_CONTINUE;
882 903
904 if ((sdata->vif.type != IEEE80211_IF_TYPE_AP) &&
905 (sdata->vif.type != IEEE80211_IF_TYPE_VLAN))
906 return TXRX_DROP;
907
883 skb = skb_dequeue(&rx->sta->tx_filtered); 908 skb = skb_dequeue(&rx->sta->tx_filtered);
884 if (!skb) { 909 if (!skb) {
885 skb = skb_dequeue(&rx->sta->ps_tx_buf); 910 skb = skb_dequeue(&rx->sta->ps_tx_buf);
@@ -956,68 +981,54 @@ ieee80211_rx_h_remove_qos_control(struct ieee80211_txrx_data *rx)
956 return TXRX_CONTINUE; 981 return TXRX_CONTINUE;
957} 982}
958 983
959static ieee80211_txrx_result 984static int
960ieee80211_rx_h_802_1x_pae(struct ieee80211_txrx_data *rx) 985ieee80211_802_1x_port_control(struct ieee80211_txrx_data *rx)
961{ 986{
962 if (rx->sdata->eapol && ieee80211_is_eapol(rx->skb) && 987 if (unlikely(rx->sdata->ieee802_1x_pac &&
963 rx->sdata->type != IEEE80211_IF_TYPE_STA && 988 (!rx->sta || !(rx->sta->flags & WLAN_STA_AUTHORIZED)))) {
964 (rx->flags & IEEE80211_TXRXD_RXRA_MATCH))
965 return TXRX_CONTINUE;
966
967 if (unlikely(rx->sdata->ieee802_1x &&
968 (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA &&
969 (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC &&
970 (!rx->sta || !(rx->sta->flags & WLAN_STA_AUTHORIZED)) &&
971 !ieee80211_is_eapol(rx->skb))) {
972#ifdef CONFIG_MAC80211_DEBUG 989#ifdef CONFIG_MAC80211_DEBUG
973 struct ieee80211_hdr *hdr = 990 printk(KERN_DEBUG "%s: dropped frame "
974 (struct ieee80211_hdr *) rx->skb->data; 991 "(unauthorized port)\n", rx->dev->name);
975 DECLARE_MAC_BUF(mac);
976 printk(KERN_DEBUG "%s: dropped frame from %s"
977 " (unauthorized port)\n", rx->dev->name,
978 print_mac(mac, hdr->addr2));
979#endif /* CONFIG_MAC80211_DEBUG */ 992#endif /* CONFIG_MAC80211_DEBUG */
980 return TXRX_DROP; 993 return -EACCES;
981 } 994 }
982 995
983 return TXRX_CONTINUE; 996 return 0;
984} 997}
985 998
986static ieee80211_txrx_result 999static int
987ieee80211_rx_h_drop_unencrypted(struct ieee80211_txrx_data *rx) 1000ieee80211_drop_unencrypted(struct ieee80211_txrx_data *rx)
988{ 1001{
989 /* 1002 /*
990 * Pass through unencrypted frames if the hardware has 1003 * Pass through unencrypted frames if the hardware has
991 * decrypted them already. 1004 * decrypted them already.
992 */ 1005 */
993 if (rx->u.rx.status->flag & RX_FLAG_DECRYPTED) 1006 if (rx->u.rx.status->flag & RX_FLAG_DECRYPTED)
994 return TXRX_CONTINUE; 1007 return 0;
995 1008
996 /* Drop unencrypted frames if key is set. */ 1009 /* Drop unencrypted frames if key is set. */
997 if (unlikely(!(rx->fc & IEEE80211_FCTL_PROTECTED) && 1010 if (unlikely(!(rx->fc & IEEE80211_FCTL_PROTECTED) &&
998 (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && 1011 (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA &&
999 (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC && 1012 (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC &&
1000 (rx->key || rx->sdata->drop_unencrypted) && 1013 (rx->key || rx->sdata->drop_unencrypted))) {
1001 (rx->sdata->eapol == 0 || !ieee80211_is_eapol(rx->skb)))) {
1002 if (net_ratelimit()) 1014 if (net_ratelimit())
1003 printk(KERN_DEBUG "%s: RX non-WEP frame, but expected " 1015 printk(KERN_DEBUG "%s: RX non-WEP frame, but expected "
1004 "encryption\n", rx->dev->name); 1016 "encryption\n", rx->dev->name);
1005 return TXRX_DROP; 1017 return -EACCES;
1006 } 1018 }
1007 return TXRX_CONTINUE; 1019 return 0;
1008} 1020}
1009 1021
1010static ieee80211_txrx_result 1022static int
1011ieee80211_rx_h_data(struct ieee80211_txrx_data *rx) 1023ieee80211_data_to_8023(struct ieee80211_txrx_data *rx)
1012{ 1024{
1013 struct net_device *dev = rx->dev; 1025 struct net_device *dev = rx->dev;
1014 struct ieee80211_local *local = rx->local;
1015 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; 1026 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data;
1016 u16 fc, hdrlen, ethertype; 1027 u16 fc, hdrlen, ethertype;
1017 u8 *payload; 1028 u8 *payload;
1018 u8 dst[ETH_ALEN]; 1029 u8 dst[ETH_ALEN];
1019 u8 src[ETH_ALEN]; 1030 u8 src[ETH_ALEN];
1020 struct sk_buff *skb = rx->skb, *skb2; 1031 struct sk_buff *skb = rx->skb;
1021 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1032 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1022 DECLARE_MAC_BUF(mac); 1033 DECLARE_MAC_BUF(mac);
1023 DECLARE_MAC_BUF(mac2); 1034 DECLARE_MAC_BUF(mac2);
@@ -1025,11 +1036,9 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
1025 DECLARE_MAC_BUF(mac4); 1036 DECLARE_MAC_BUF(mac4);
1026 1037
1027 fc = rx->fc; 1038 fc = rx->fc;
1028 if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA))
1029 return TXRX_CONTINUE;
1030 1039
1031 if (unlikely(!WLAN_FC_DATA_PRESENT(fc))) 1040 if (unlikely(!WLAN_FC_DATA_PRESENT(fc)))
1032 return TXRX_DROP; 1041 return -1;
1033 1042
1034 hdrlen = ieee80211_get_hdrlen(fc); 1043 hdrlen = ieee80211_get_hdrlen(fc);
1035 1044
@@ -1049,8 +1058,8 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
1049 memcpy(dst, hdr->addr3, ETH_ALEN); 1058 memcpy(dst, hdr->addr3, ETH_ALEN);
1050 memcpy(src, hdr->addr2, ETH_ALEN); 1059 memcpy(src, hdr->addr2, ETH_ALEN);
1051 1060
1052 if (unlikely(sdata->type != IEEE80211_IF_TYPE_AP && 1061 if (unlikely(sdata->vif.type != IEEE80211_IF_TYPE_AP &&
1053 sdata->type != IEEE80211_IF_TYPE_VLAN)) { 1062 sdata->vif.type != IEEE80211_IF_TYPE_VLAN)) {
1054 if (net_ratelimit()) 1063 if (net_ratelimit())
1055 printk(KERN_DEBUG "%s: dropped ToDS frame " 1064 printk(KERN_DEBUG "%s: dropped ToDS frame "
1056 "(BSSID=%s SA=%s DA=%s)\n", 1065 "(BSSID=%s SA=%s DA=%s)\n",
@@ -1058,7 +1067,7 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
1058 print_mac(mac, hdr->addr1), 1067 print_mac(mac, hdr->addr1),
1059 print_mac(mac2, hdr->addr2), 1068 print_mac(mac2, hdr->addr2),
1060 print_mac(mac3, hdr->addr3)); 1069 print_mac(mac3, hdr->addr3));
1061 return TXRX_DROP; 1070 return -1;
1062 } 1071 }
1063 break; 1072 break;
1064 case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): 1073 case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS):
@@ -1066,7 +1075,7 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
1066 memcpy(dst, hdr->addr3, ETH_ALEN); 1075 memcpy(dst, hdr->addr3, ETH_ALEN);
1067 memcpy(src, hdr->addr4, ETH_ALEN); 1076 memcpy(src, hdr->addr4, ETH_ALEN);
1068 1077
1069 if (unlikely(sdata->type != IEEE80211_IF_TYPE_WDS)) { 1078 if (unlikely(sdata->vif.type != IEEE80211_IF_TYPE_WDS)) {
1070 if (net_ratelimit()) 1079 if (net_ratelimit())
1071 printk(KERN_DEBUG "%s: dropped FromDS&ToDS " 1080 printk(KERN_DEBUG "%s: dropped FromDS&ToDS "
1072 "frame (RA=%s TA=%s DA=%s SA=%s)\n", 1081 "frame (RA=%s TA=%s DA=%s SA=%s)\n",
@@ -1075,7 +1084,7 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
1075 print_mac(mac2, hdr->addr2), 1084 print_mac(mac2, hdr->addr2),
1076 print_mac(mac3, hdr->addr3), 1085 print_mac(mac3, hdr->addr3),
1077 print_mac(mac4, hdr->addr4)); 1086 print_mac(mac4, hdr->addr4));
1078 return TXRX_DROP; 1087 return -1;
1079 } 1088 }
1080 break; 1089 break;
1081 case IEEE80211_FCTL_FROMDS: 1090 case IEEE80211_FCTL_FROMDS:
@@ -1083,17 +1092,17 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
1083 memcpy(dst, hdr->addr1, ETH_ALEN); 1092 memcpy(dst, hdr->addr1, ETH_ALEN);
1084 memcpy(src, hdr->addr3, ETH_ALEN); 1093 memcpy(src, hdr->addr3, ETH_ALEN);
1085 1094
1086 if (sdata->type != IEEE80211_IF_TYPE_STA || 1095 if (sdata->vif.type != IEEE80211_IF_TYPE_STA ||
1087 (is_multicast_ether_addr(dst) && 1096 (is_multicast_ether_addr(dst) &&
1088 !compare_ether_addr(src, dev->dev_addr))) 1097 !compare_ether_addr(src, dev->dev_addr)))
1089 return TXRX_DROP; 1098 return -1;
1090 break; 1099 break;
1091 case 0: 1100 case 0:
1092 /* DA SA BSSID */ 1101 /* DA SA BSSID */
1093 memcpy(dst, hdr->addr1, ETH_ALEN); 1102 memcpy(dst, hdr->addr1, ETH_ALEN);
1094 memcpy(src, hdr->addr2, ETH_ALEN); 1103 memcpy(src, hdr->addr2, ETH_ALEN);
1095 1104
1096 if (sdata->type != IEEE80211_IF_TYPE_IBSS) { 1105 if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS) {
1097 if (net_ratelimit()) { 1106 if (net_ratelimit()) {
1098 printk(KERN_DEBUG "%s: dropped IBSS frame " 1107 printk(KERN_DEBUG "%s: dropped IBSS frame "
1099 "(DA=%s SA=%s BSSID=%s)\n", 1108 "(DA=%s SA=%s BSSID=%s)\n",
@@ -1102,21 +1111,20 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
1102 print_mac(mac2, hdr->addr2), 1111 print_mac(mac2, hdr->addr2),
1103 print_mac(mac3, hdr->addr3)); 1112 print_mac(mac3, hdr->addr3));
1104 } 1113 }
1105 return TXRX_DROP; 1114 return -1;
1106 } 1115 }
1107 break; 1116 break;
1108 } 1117 }
1109 1118
1110 payload = skb->data + hdrlen;
1111
1112 if (unlikely(skb->len - hdrlen < 8)) { 1119 if (unlikely(skb->len - hdrlen < 8)) {
1113 if (net_ratelimit()) { 1120 if (net_ratelimit()) {
1114 printk(KERN_DEBUG "%s: RX too short data frame " 1121 printk(KERN_DEBUG "%s: RX too short data frame "
1115 "payload\n", dev->name); 1122 "payload\n", dev->name);
1116 } 1123 }
1117 return TXRX_DROP; 1124 return -1;
1118 } 1125 }
1119 1126
1127 payload = skb->data + hdrlen;
1120 ethertype = (payload[6] << 8) | payload[7]; 1128 ethertype = (payload[6] << 8) | payload[7];
1121 1129
1122 if (likely((compare_ether_addr(payload, rfc1042_header) == 0 && 1130 if (likely((compare_ether_addr(payload, rfc1042_header) == 0 &&
@@ -1130,6 +1138,7 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
1130 } else { 1138 } else {
1131 struct ethhdr *ehdr; 1139 struct ethhdr *ehdr;
1132 __be16 len; 1140 __be16 len;
1141
1133 skb_pull(skb, hdrlen); 1142 skb_pull(skb, hdrlen);
1134 len = htons(skb->len); 1143 len = htons(skb->len);
1135 ehdr = (struct ethhdr *) skb_push(skb, sizeof(struct ethhdr)); 1144 ehdr = (struct ethhdr *) skb_push(skb, sizeof(struct ethhdr));
@@ -1137,36 +1146,72 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
1137 memcpy(ehdr->h_source, src, ETH_ALEN); 1146 memcpy(ehdr->h_source, src, ETH_ALEN);
1138 ehdr->h_proto = len; 1147 ehdr->h_proto = len;
1139 } 1148 }
1140 skb->dev = dev; 1149 return 0;
1150}
1141 1151
1142 skb2 = NULL; 1152/*
1153 * requires that rx->skb is a frame with ethernet header
1154 */
1155static bool ieee80211_frame_allowed(struct ieee80211_txrx_data *rx)
1156{
1157 static const u8 pae_group_addr[ETH_ALEN]
1158 = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x03 };
1159 struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data;
1143 1160
1144 dev->stats.rx_packets++; 1161 /*
1145 dev->stats.rx_bytes += skb->len; 1162 * Allow EAPOL frames to us/the PAE group address regardless
1163 * of whether the frame was encrypted or not.
1164 */
1165 if (ehdr->h_proto == htons(ETH_P_PAE) &&
1166 (compare_ether_addr(ehdr->h_dest, rx->dev->dev_addr) == 0 ||
1167 compare_ether_addr(ehdr->h_dest, pae_group_addr) == 0))
1168 return true;
1169
1170 if (ieee80211_802_1x_port_control(rx) ||
1171 ieee80211_drop_unencrypted(rx))
1172 return false;
1146 1173
1147 if (local->bridge_packets && (sdata->type == IEEE80211_IF_TYPE_AP 1174 return true;
1148 || sdata->type == IEEE80211_IF_TYPE_VLAN) && 1175}
1176
1177/*
1178 * requires that rx->skb is a frame with ethernet header
1179 */
1180static void
1181ieee80211_deliver_skb(struct ieee80211_txrx_data *rx)
1182{
1183 struct net_device *dev = rx->dev;
1184 struct ieee80211_local *local = rx->local;
1185 struct sk_buff *skb, *xmit_skb;
1186 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1187 struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data;
1188 struct sta_info *dsta;
1189
1190 skb = rx->skb;
1191 xmit_skb = NULL;
1192
1193 if (local->bridge_packets && (sdata->vif.type == IEEE80211_IF_TYPE_AP ||
1194 sdata->vif.type == IEEE80211_IF_TYPE_VLAN) &&
1149 (rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) { 1195 (rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) {
1150 if (is_multicast_ether_addr(skb->data)) { 1196 if (is_multicast_ether_addr(ehdr->h_dest)) {
1151 /* send multicast frames both to higher layers in 1197 /*
1152 * local net stack and back to the wireless media */ 1198 * send multicast frames both to higher layers in
1153 skb2 = skb_copy(skb, GFP_ATOMIC); 1199 * local net stack and back to the wireless medium
1154 if (!skb2 && net_ratelimit()) 1200 */
1201 xmit_skb = skb_copy(skb, GFP_ATOMIC);
1202 if (!xmit_skb && net_ratelimit())
1155 printk(KERN_DEBUG "%s: failed to clone " 1203 printk(KERN_DEBUG "%s: failed to clone "
1156 "multicast frame\n", dev->name); 1204 "multicast frame\n", dev->name);
1157 } else { 1205 } else {
1158 struct sta_info *dsta;
1159 dsta = sta_info_get(local, skb->data); 1206 dsta = sta_info_get(local, skb->data);
1160 if (dsta && !dsta->dev) { 1207 if (dsta && dsta->dev == dev) {
1161 if (net_ratelimit()) 1208 /*
1162 printk(KERN_DEBUG "Station with null " 1209 * The destination station is associated to
1163 "dev structure!\n"); 1210 * this AP (in this VLAN), so send the frame
1164 } else if (dsta && dsta->dev == dev) { 1211 * directly to it and do not pass it to local
1165 /* Destination station is associated to this 1212 * net stack.
1166 * AP, so send the frame directly to it and
1167 * do not pass the frame to local net stack.
1168 */ 1213 */
1169 skb2 = skb; 1214 xmit_skb = skb;
1170 skb = NULL; 1215 skb = NULL;
1171 } 1216 }
1172 if (dsta) 1217 if (dsta)
@@ -1181,18 +1226,207 @@ ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
1181 netif_rx(skb); 1226 netif_rx(skb);
1182 } 1227 }
1183 1228
1184 if (skb2) { 1229 if (xmit_skb) {
1185 /* send to wireless media */ 1230 /* send to wireless media */
1186 skb2->protocol = __constant_htons(ETH_P_802_3); 1231 xmit_skb->protocol = htons(ETH_P_802_3);
1187 skb_set_network_header(skb2, 0); 1232 skb_reset_network_header(xmit_skb);
1188 skb_set_mac_header(skb2, 0); 1233 skb_reset_mac_header(xmit_skb);
1189 dev_queue_xmit(skb2); 1234 dev_queue_xmit(xmit_skb);
1190 } 1235 }
1236}
1237
1238static ieee80211_txrx_result
1239ieee80211_rx_h_amsdu(struct ieee80211_txrx_data *rx)
1240{
1241 struct net_device *dev = rx->dev;
1242 struct ieee80211_local *local = rx->local;
1243 u16 fc, ethertype;
1244 u8 *payload;
1245 struct sk_buff *skb = rx->skb, *frame = NULL;
1246 const struct ethhdr *eth;
1247 int remaining, err;
1248 u8 dst[ETH_ALEN];
1249 u8 src[ETH_ALEN];
1250 DECLARE_MAC_BUF(mac);
1251
1252 fc = rx->fc;
1253 if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA))
1254 return TXRX_CONTINUE;
1255
1256 if (unlikely(!WLAN_FC_DATA_PRESENT(fc)))
1257 return TXRX_DROP;
1258
1259 if (!(rx->flags & IEEE80211_TXRXD_RX_AMSDU))
1260 return TXRX_CONTINUE;
1261
1262 err = ieee80211_data_to_8023(rx);
1263 if (unlikely(err))
1264 return TXRX_DROP;
1265
1266 skb->dev = dev;
1267
1268 dev->stats.rx_packets++;
1269 dev->stats.rx_bytes += skb->len;
1270
1271 /* skip the wrapping header */
1272 eth = (struct ethhdr *) skb_pull(skb, sizeof(struct ethhdr));
1273 if (!eth)
1274 return TXRX_DROP;
1275
1276 while (skb != frame) {
1277 u8 padding;
1278 __be16 len = eth->h_proto;
1279 unsigned int subframe_len = sizeof(struct ethhdr) + ntohs(len);
1280
1281 remaining = skb->len;
1282 memcpy(dst, eth->h_dest, ETH_ALEN);
1283 memcpy(src, eth->h_source, ETH_ALEN);
1284
1285 padding = ((4 - subframe_len) & 0x3);
1286 /* the last MSDU has no padding */
1287 if (subframe_len > remaining) {
1288 printk(KERN_DEBUG "%s: wrong buffer size", dev->name);
1289 return TXRX_DROP;
1290 }
1291
1292 skb_pull(skb, sizeof(struct ethhdr));
1293 /* if last subframe reuse skb */
1294 if (remaining <= subframe_len + padding)
1295 frame = skb;
1296 else {
1297 frame = dev_alloc_skb(local->hw.extra_tx_headroom +
1298 subframe_len);
1299
1300 if (frame == NULL)
1301 return TXRX_DROP;
1302
1303 skb_reserve(frame, local->hw.extra_tx_headroom +
1304 sizeof(struct ethhdr));
1305 memcpy(skb_put(frame, ntohs(len)), skb->data,
1306 ntohs(len));
1307
1308 eth = (struct ethhdr *) skb_pull(skb, ntohs(len) +
1309 padding);
1310 if (!eth) {
1311 printk(KERN_DEBUG "%s: wrong buffer size ",
1312 dev->name);
1313 dev_kfree_skb(frame);
1314 return TXRX_DROP;
1315 }
1316 }
1317
1318 skb_reset_network_header(frame);
1319 frame->dev = dev;
1320 frame->priority = skb->priority;
1321 rx->skb = frame;
1322
1323 payload = frame->data;
1324 ethertype = (payload[6] << 8) | payload[7];
1325
1326 if (likely((compare_ether_addr(payload, rfc1042_header) == 0 &&
1327 ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) ||
1328 compare_ether_addr(payload,
1329 bridge_tunnel_header) == 0)) {
1330 /* remove RFC1042 or Bridge-Tunnel
1331 * encapsulation and replace EtherType */
1332 skb_pull(frame, 6);
1333 memcpy(skb_push(frame, ETH_ALEN), src, ETH_ALEN);
1334 memcpy(skb_push(frame, ETH_ALEN), dst, ETH_ALEN);
1335 } else {
1336 memcpy(skb_push(frame, sizeof(__be16)),
1337 &len, sizeof(__be16));
1338 memcpy(skb_push(frame, ETH_ALEN), src, ETH_ALEN);
1339 memcpy(skb_push(frame, ETH_ALEN), dst, ETH_ALEN);
1340 }
1341
1342 if (!ieee80211_frame_allowed(rx)) {
1343 if (skb == frame) /* last frame */
1344 return TXRX_DROP;
1345 dev_kfree_skb(frame);
1346 continue;
1347 }
1348
1349 ieee80211_deliver_skb(rx);
1350 }
1351
1352 return TXRX_QUEUED;
1353}
1354
1355static ieee80211_txrx_result
1356ieee80211_rx_h_data(struct ieee80211_txrx_data *rx)
1357{
1358 struct net_device *dev = rx->dev;
1359 u16 fc;
1360 int err;
1361
1362 fc = rx->fc;
1363 if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA))
1364 return TXRX_CONTINUE;
1365
1366 if (unlikely(!WLAN_FC_DATA_PRESENT(fc)))
1367 return TXRX_DROP;
1368
1369 err = ieee80211_data_to_8023(rx);
1370 if (unlikely(err))
1371 return TXRX_DROP;
1372
1373 if (!ieee80211_frame_allowed(rx))
1374 return TXRX_DROP;
1375
1376 rx->skb->dev = dev;
1377
1378 dev->stats.rx_packets++;
1379 dev->stats.rx_bytes += rx->skb->len;
1380
1381 ieee80211_deliver_skb(rx);
1191 1382
1192 return TXRX_QUEUED; 1383 return TXRX_QUEUED;
1193} 1384}
1194 1385
1195static ieee80211_txrx_result 1386static ieee80211_txrx_result
1387ieee80211_rx_h_ctrl(struct ieee80211_txrx_data *rx)
1388{
1389 struct ieee80211_local *local = rx->local;
1390 struct ieee80211_hw *hw = &local->hw;
1391 struct sk_buff *skb = rx->skb;
1392 struct ieee80211_bar *bar = (struct ieee80211_bar *) skb->data;
1393 struct tid_ampdu_rx *tid_agg_rx;
1394 u16 start_seq_num;
1395 u16 tid;
1396
1397 if (likely((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_CTL))
1398 return TXRX_CONTINUE;
1399
1400 if ((rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_BACK_REQ) {
1401 if (!rx->sta)
1402 return TXRX_CONTINUE;
1403 tid = le16_to_cpu(bar->control) >> 12;
1404 tid_agg_rx = &(rx->sta->ampdu_mlme.tid_rx[tid]);
1405 if (tid_agg_rx->state != HT_AGG_STATE_OPERATIONAL)
1406 return TXRX_CONTINUE;
1407
1408 start_seq_num = le16_to_cpu(bar->start_seq_num) >> 4;
1409
1410 /* reset session timer */
1411 if (tid_agg_rx->timeout) {
1412 unsigned long expires =
1413 jiffies + (tid_agg_rx->timeout / 1000) * HZ;
1414 mod_timer(&tid_agg_rx->session_timer, expires);
1415 }
1416
1417 /* manage reordering buffer according to requested */
1418 /* sequence number */
1419 rcu_read_lock();
1420 ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, NULL,
1421 start_seq_num, 1);
1422 rcu_read_unlock();
1423 return TXRX_DROP;
1424 }
1425
1426 return TXRX_CONTINUE;
1427}
1428
1429static ieee80211_txrx_result
1196ieee80211_rx_h_mgmt(struct ieee80211_txrx_data *rx) 1430ieee80211_rx_h_mgmt(struct ieee80211_txrx_data *rx)
1197{ 1431{
1198 struct ieee80211_sub_if_data *sdata; 1432 struct ieee80211_sub_if_data *sdata;
@@ -1201,8 +1435,8 @@ ieee80211_rx_h_mgmt(struct ieee80211_txrx_data *rx)
1201 return TXRX_DROP; 1435 return TXRX_DROP;
1202 1436
1203 sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); 1437 sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev);
1204 if ((sdata->type == IEEE80211_IF_TYPE_STA || 1438 if ((sdata->vif.type == IEEE80211_IF_TYPE_STA ||
1205 sdata->type == IEEE80211_IF_TYPE_IBSS) && 1439 sdata->vif.type == IEEE80211_IF_TYPE_IBSS) &&
1206 !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)) 1440 !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME))
1207 ieee80211_sta_rx_mgmt(rx->dev, rx->skb, rx->u.rx.status); 1441 ieee80211_sta_rx_mgmt(rx->dev, rx->skb, rx->u.rx.status);
1208 else 1442 else
@@ -1294,7 +1528,7 @@ static void ieee80211_rx_michael_mic_report(struct net_device *dev,
1294 goto ignore; 1528 goto ignore;
1295 } 1529 }
1296 1530
1297 if (rx->sdata->type == IEEE80211_IF_TYPE_AP && keyidx) { 1531 if (rx->sdata->vif.type == IEEE80211_IF_TYPE_AP && keyidx) {
1298 /* 1532 /*
1299 * APs with pairwise keys should never receive Michael MIC 1533 * APs with pairwise keys should never receive Michael MIC
1300 * errors for non-zero keyidx because these are reserved for 1534 * errors for non-zero keyidx because these are reserved for
@@ -1341,9 +1575,9 @@ ieee80211_rx_handler ieee80211_rx_handlers[] =
1341 * are not passed to user space by these functions 1575 * are not passed to user space by these functions
1342 */ 1576 */
1343 ieee80211_rx_h_remove_qos_control, 1577 ieee80211_rx_h_remove_qos_control,
1344 ieee80211_rx_h_802_1x_pae, 1578 ieee80211_rx_h_amsdu,
1345 ieee80211_rx_h_drop_unencrypted,
1346 ieee80211_rx_h_data, 1579 ieee80211_rx_h_data,
1580 ieee80211_rx_h_ctrl,
1347 ieee80211_rx_h_mgmt, 1581 ieee80211_rx_h_mgmt,
1348 NULL 1582 NULL
1349}; 1583};
@@ -1356,7 +1590,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
1356{ 1590{
1357 int multicast = is_multicast_ether_addr(hdr->addr1); 1591 int multicast = is_multicast_ether_addr(hdr->addr1);
1358 1592
1359 switch (sdata->type) { 1593 switch (sdata->vif.type) {
1360 case IEEE80211_IF_TYPE_STA: 1594 case IEEE80211_IF_TYPE_STA:
1361 if (!bssid) 1595 if (!bssid)
1362 return 0; 1596 return 0;
@@ -1427,11 +1661,13 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
1427} 1661}
1428 1662
1429/* 1663/*
1430 * This is the receive path handler. It is called by a low level driver when an 1664 * This is the actual Rx frames handler. as it blongs to Rx path it must
1431 * 802.11 MPDU is received from the hardware. 1665 * be called with rcu_read_lock protection.
1432 */ 1666 */
1433void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, 1667static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
1434 struct ieee80211_rx_status *status) 1668 struct sk_buff *skb,
1669 struct ieee80211_rx_status *status,
1670 u32 load)
1435{ 1671{
1436 struct ieee80211_local *local = hw_to_local(hw); 1672 struct ieee80211_local *local = hw_to_local(hw);
1437 struct ieee80211_sub_if_data *sdata; 1673 struct ieee80211_sub_if_data *sdata;
@@ -1439,29 +1675,11 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
1439 struct ieee80211_hdr *hdr; 1675 struct ieee80211_hdr *hdr;
1440 struct ieee80211_txrx_data rx; 1676 struct ieee80211_txrx_data rx;
1441 u16 type; 1677 u16 type;
1442 int prepres; 1678 int prepares;
1443 struct ieee80211_sub_if_data *prev = NULL; 1679 struct ieee80211_sub_if_data *prev = NULL;
1444 struct sk_buff *skb_new; 1680 struct sk_buff *skb_new;
1445 u8 *bssid; 1681 u8 *bssid;
1446 1682 int hdrlen;
1447 /*
1448 * key references and virtual interfaces are protected using RCU
1449 * and this requires that we are in a read-side RCU section during
1450 * receive processing
1451 */
1452 rcu_read_lock();
1453
1454 /*
1455 * Frames with failed FCS/PLCP checksum are not returned,
1456 * all other frames are returned without radiotap header
1457 * if it was previously present.
1458 * Also, frames with less than 16 bytes are dropped.
1459 */
1460 skb = ieee80211_rx_monitor(local, skb, status);
1461 if (!skb) {
1462 rcu_read_unlock();
1463 return;
1464 }
1465 1683
1466 hdr = (struct ieee80211_hdr *) skb->data; 1684 hdr = (struct ieee80211_hdr *) skb->data;
1467 memset(&rx, 0, sizeof(rx)); 1685 memset(&rx, 0, sizeof(rx));
@@ -1469,9 +1687,22 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
1469 rx.local = local; 1687 rx.local = local;
1470 1688
1471 rx.u.rx.status = status; 1689 rx.u.rx.status = status;
1690 rx.u.rx.load = load;
1472 rx.fc = le16_to_cpu(hdr->frame_control); 1691 rx.fc = le16_to_cpu(hdr->frame_control);
1473 type = rx.fc & IEEE80211_FCTL_FTYPE; 1692 type = rx.fc & IEEE80211_FCTL_FTYPE;
1474 1693
1694 /*
1695 * Drivers are required to align the payload data to a four-byte
1696 * boundary, so the last two bits of the address where it starts
1697 * may not be set. The header is required to be directly before
1698 * the payload data, padding like atheros hardware adds which is
1699 * inbetween the 802.11 header and the payload is not supported,
1700 * the driver is required to move the 802.11 header further back
1701 * in that case.
1702 */
1703 hdrlen = ieee80211_get_hdrlen(rx.fc);
1704 WARN_ON_ONCE(((unsigned long)(skb->data + hdrlen)) & 3);
1705
1475 if (type == IEEE80211_FTYPE_DATA || type == IEEE80211_FTYPE_MGMT) 1706 if (type == IEEE80211_FTYPE_DATA || type == IEEE80211_FTYPE_MGMT)
1476 local->dot11ReceivedFragmentCount++; 1707 local->dot11ReceivedFragmentCount++;
1477 1708
@@ -1486,7 +1717,7 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
1486 goto end; 1717 goto end;
1487 } 1718 }
1488 1719
1489 if (unlikely(local->sta_scanning)) 1720 if (unlikely(local->sta_sw_scanning || local->sta_hw_scanning))
1490 rx.flags |= IEEE80211_TXRXD_RXIN_SCAN; 1721 rx.flags |= IEEE80211_TXRXD_RXIN_SCAN;
1491 1722
1492 if (__ieee80211_invoke_rx_handlers(local, local->rx_pre_handlers, &rx, 1723 if (__ieee80211_invoke_rx_handlers(local, local->rx_pre_handlers, &rx,
@@ -1501,25 +1732,23 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
1501 ieee80211_invoke_rx_handlers(local, local->rx_handlers, &rx, 1732 ieee80211_invoke_rx_handlers(local, local->rx_handlers, &rx,
1502 rx.sta); 1733 rx.sta);
1503 sta_info_put(sta); 1734 sta_info_put(sta);
1504 rcu_read_unlock();
1505 return; 1735 return;
1506 } 1736 }
1507 1737
1508 bssid = ieee80211_get_bssid(hdr, skb->len);
1509
1510 list_for_each_entry_rcu(sdata, &local->interfaces, list) { 1738 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
1511 if (!netif_running(sdata->dev)) 1739 if (!netif_running(sdata->dev))
1512 continue; 1740 continue;
1513 1741
1514 if (sdata->type == IEEE80211_IF_TYPE_MNTR) 1742 if (sdata->vif.type == IEEE80211_IF_TYPE_MNTR)
1515 continue; 1743 continue;
1516 1744
1745 bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type);
1517 rx.flags |= IEEE80211_TXRXD_RXRA_MATCH; 1746 rx.flags |= IEEE80211_TXRXD_RXRA_MATCH;
1518 prepres = prepare_for_handlers(sdata, bssid, &rx, hdr); 1747 prepares = prepare_for_handlers(sdata, bssid, &rx, hdr);
1519 /* prepare_for_handlers can change sta */ 1748 /* prepare_for_handlers can change sta */
1520 sta = rx.sta; 1749 sta = rx.sta;
1521 1750
1522 if (!prepres) 1751 if (!prepares)
1523 continue; 1752 continue;
1524 1753
1525 /* 1754 /*
@@ -1547,6 +1776,7 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
1547 prev->dev->name); 1776 prev->dev->name);
1548 continue; 1777 continue;
1549 } 1778 }
1779 rx.fc = le16_to_cpu(hdr->frame_control);
1550 rx.skb = skb_new; 1780 rx.skb = skb_new;
1551 rx.dev = prev->dev; 1781 rx.dev = prev->dev;
1552 rx.sdata = prev; 1782 rx.sdata = prev;
@@ -1555,6 +1785,7 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
1555 prev = sdata; 1785 prev = sdata;
1556 } 1786 }
1557 if (prev) { 1787 if (prev) {
1788 rx.fc = le16_to_cpu(hdr->frame_control);
1558 rx.skb = skb; 1789 rx.skb = skb;
1559 rx.dev = prev->dev; 1790 rx.dev = prev->dev;
1560 rx.sdata = prev; 1791 rx.sdata = prev;
@@ -1564,10 +1795,230 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
1564 dev_kfree_skb(skb); 1795 dev_kfree_skb(skb);
1565 1796
1566 end: 1797 end:
1567 rcu_read_unlock(); 1798 if (sta)
1799 sta_info_put(sta);
1800}
1801
1802#define SEQ_MODULO 0x1000
1803#define SEQ_MASK 0xfff
1804
1805static inline int seq_less(u16 sq1, u16 sq2)
1806{
1807 return (((sq1 - sq2) & SEQ_MASK) > (SEQ_MODULO >> 1));
1808}
1809
1810static inline u16 seq_inc(u16 sq)
1811{
1812 return ((sq + 1) & SEQ_MASK);
1813}
1814
1815static inline u16 seq_sub(u16 sq1, u16 sq2)
1816{
1817 return ((sq1 - sq2) & SEQ_MASK);
1818}
1819
1820
1821/*
1822 * As it function blongs to Rx path it must be called with
1823 * the proper rcu_read_lock protection for its flow.
1824 */
1825u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
1826 struct tid_ampdu_rx *tid_agg_rx,
1827 struct sk_buff *skb, u16 mpdu_seq_num,
1828 int bar_req)
1829{
1830 struct ieee80211_local *local = hw_to_local(hw);
1831 struct ieee80211_rx_status status;
1832 u16 head_seq_num, buf_size;
1833 int index;
1834 u32 pkt_load;
1835
1836 buf_size = tid_agg_rx->buf_size;
1837 head_seq_num = tid_agg_rx->head_seq_num;
1838
1839 /* frame with out of date sequence number */
1840 if (seq_less(mpdu_seq_num, head_seq_num)) {
1841 dev_kfree_skb(skb);
1842 return 1;
1843 }
1844
1845 /* if frame sequence number exceeds our buffering window size or
1846 * block Ack Request arrived - release stored frames */
1847 if ((!seq_less(mpdu_seq_num, head_seq_num + buf_size)) || (bar_req)) {
1848 /* new head to the ordering buffer */
1849 if (bar_req)
1850 head_seq_num = mpdu_seq_num;
1851 else
1852 head_seq_num =
1853 seq_inc(seq_sub(mpdu_seq_num, buf_size));
1854 /* release stored frames up to new head to stack */
1855 while (seq_less(tid_agg_rx->head_seq_num, head_seq_num)) {
1856 index = seq_sub(tid_agg_rx->head_seq_num,
1857 tid_agg_rx->ssn)
1858 % tid_agg_rx->buf_size;
1859
1860 if (tid_agg_rx->reorder_buf[index]) {
1861 /* release the reordered frames to stack */
1862 memcpy(&status,
1863 tid_agg_rx->reorder_buf[index]->cb,
1864 sizeof(status));
1865 pkt_load = ieee80211_rx_load_stats(local,
1866 tid_agg_rx->reorder_buf[index],
1867 &status);
1868 __ieee80211_rx_handle_packet(hw,
1869 tid_agg_rx->reorder_buf[index],
1870 &status, pkt_load);
1871 tid_agg_rx->stored_mpdu_num--;
1872 tid_agg_rx->reorder_buf[index] = NULL;
1873 }
1874 tid_agg_rx->head_seq_num =
1875 seq_inc(tid_agg_rx->head_seq_num);
1876 }
1877 if (bar_req)
1878 return 1;
1879 }
1880
1881 /* now the new frame is always in the range of the reordering */
1882 /* buffer window */
1883 index = seq_sub(mpdu_seq_num, tid_agg_rx->ssn)
1884 % tid_agg_rx->buf_size;
1885 /* check if we already stored this frame */
1886 if (tid_agg_rx->reorder_buf[index]) {
1887 dev_kfree_skb(skb);
1888 return 1;
1889 }
1568 1890
1891 /* if arrived mpdu is in the right order and nothing else stored */
1892 /* release it immediately */
1893 if (mpdu_seq_num == tid_agg_rx->head_seq_num &&
1894 tid_agg_rx->stored_mpdu_num == 0) {
1895 tid_agg_rx->head_seq_num =
1896 seq_inc(tid_agg_rx->head_seq_num);
1897 return 0;
1898 }
1899
1900 /* put the frame in the reordering buffer */
1901 tid_agg_rx->reorder_buf[index] = skb;
1902 tid_agg_rx->stored_mpdu_num++;
1903 /* release the buffer until next missing frame */
1904 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn)
1905 % tid_agg_rx->buf_size;
1906 while (tid_agg_rx->reorder_buf[index]) {
1907 /* release the reordered frame back to stack */
1908 memcpy(&status, tid_agg_rx->reorder_buf[index]->cb,
1909 sizeof(status));
1910 pkt_load = ieee80211_rx_load_stats(local,
1911 tid_agg_rx->reorder_buf[index],
1912 &status);
1913 __ieee80211_rx_handle_packet(hw, tid_agg_rx->reorder_buf[index],
1914 &status, pkt_load);
1915 tid_agg_rx->stored_mpdu_num--;
1916 tid_agg_rx->reorder_buf[index] = NULL;
1917 tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num);
1918 index = seq_sub(tid_agg_rx->head_seq_num,
1919 tid_agg_rx->ssn) % tid_agg_rx->buf_size;
1920 }
1921 return 1;
1922}
1923
1924static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
1925 struct sk_buff *skb)
1926{
1927 struct ieee80211_hw *hw = &local->hw;
1928 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
1929 struct sta_info *sta;
1930 struct tid_ampdu_rx *tid_agg_rx;
1931 u16 fc, sc;
1932 u16 mpdu_seq_num;
1933 u8 ret = 0, *qc;
1934 int tid;
1935
1936 sta = sta_info_get(local, hdr->addr2);
1937 if (!sta)
1938 return ret;
1939
1940 fc = le16_to_cpu(hdr->frame_control);
1941
1942 /* filter the QoS data rx stream according to
1943 * STA/TID and check if this STA/TID is on aggregation */
1944 if (!WLAN_FC_IS_QOS_DATA(fc))
1945 goto end_reorder;
1946
1947 qc = skb->data + ieee80211_get_hdrlen(fc) - QOS_CONTROL_LEN;
1948 tid = qc[0] & QOS_CONTROL_TID_MASK;
1949 tid_agg_rx = &(sta->ampdu_mlme.tid_rx[tid]);
1950
1951 if (tid_agg_rx->state != HT_AGG_STATE_OPERATIONAL)
1952 goto end_reorder;
1953
1954 /* null data frames are excluded */
1955 if (unlikely(fc & IEEE80211_STYPE_QOS_NULLFUNC))
1956 goto end_reorder;
1957
1958 /* new un-ordered ampdu frame - process it */
1959
1960 /* reset session timer */
1961 if (tid_agg_rx->timeout) {
1962 unsigned long expires =
1963 jiffies + (tid_agg_rx->timeout / 1000) * HZ;
1964 mod_timer(&tid_agg_rx->session_timer, expires);
1965 }
1966
1967 /* if this mpdu is fragmented - terminate rx aggregation session */
1968 sc = le16_to_cpu(hdr->seq_ctrl);
1969 if (sc & IEEE80211_SCTL_FRAG) {
1970 ieee80211_sta_stop_rx_ba_session(sta->dev, sta->addr,
1971 tid, 0, WLAN_REASON_QSTA_REQUIRE_SETUP);
1972 ret = 1;
1973 goto end_reorder;
1974 }
1975
1976 /* according to mpdu sequence number deal with reordering buffer */
1977 mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4;
1978 ret = ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb,
1979 mpdu_seq_num, 0);
1980end_reorder:
1569 if (sta) 1981 if (sta)
1570 sta_info_put(sta); 1982 sta_info_put(sta);
1983 return ret;
1984}
1985
1986/*
1987 * This is the receive path handler. It is called by a low level driver when an
1988 * 802.11 MPDU is received from the hardware.
1989 */
1990void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
1991 struct ieee80211_rx_status *status)
1992{
1993 struct ieee80211_local *local = hw_to_local(hw);
1994 u32 pkt_load;
1995
1996 /*
1997 * key references and virtual interfaces are protected using RCU
1998 * and this requires that we are in a read-side RCU section during
1999 * receive processing
2000 */
2001 rcu_read_lock();
2002
2003 /*
2004 * Frames with failed FCS/PLCP checksum are not returned,
2005 * all other frames are returned without radiotap header
2006 * if it was previously present.
2007 * Also, frames with less than 16 bytes are dropped.
2008 */
2009 skb = ieee80211_rx_monitor(local, skb, status);
2010 if (!skb) {
2011 rcu_read_unlock();
2012 return;
2013 }
2014
2015 pkt_load = ieee80211_rx_load_stats(local, skb, status);
2016 local->channel_use_raw += pkt_load;
2017
2018 if (!ieee80211_rx_reorder_ampdu(local, skb))
2019 __ieee80211_rx_handle_packet(hw, skb, status, pkt_load);
2020
2021 rcu_read_unlock();
1571} 2022}
1572EXPORT_SYMBOL(__ieee80211_rx); 2023EXPORT_SYMBOL(__ieee80211_rx);
1573 2024
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index cfd8ee9adad0..1f74bd296357 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -104,6 +104,7 @@ static void sta_info_release(struct kref *kref)
104 struct sta_info *sta = container_of(kref, struct sta_info, kref); 104 struct sta_info *sta = container_of(kref, struct sta_info, kref);
105 struct ieee80211_local *local = sta->local; 105 struct ieee80211_local *local = sta->local;
106 struct sk_buff *skb; 106 struct sk_buff *skb;
107 int i;
107 108
108 /* free sta structure; it has already been removed from 109 /* free sta structure; it has already been removed from
109 * hash table etc. external structures. Make sure that all 110 * hash table etc. external structures. Make sure that all
@@ -116,6 +117,8 @@ static void sta_info_release(struct kref *kref)
116 while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) { 117 while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) {
117 dev_kfree_skb_any(skb); 118 dev_kfree_skb_any(skb);
118 } 119 }
120 for (i = 0; i < STA_TID_NUM; i++)
121 del_timer_sync(&sta->ampdu_mlme.tid_rx[i].session_timer);
119 rate_control_free_sta(sta->rate_ctrl, sta->rate_ctrl_priv); 122 rate_control_free_sta(sta->rate_ctrl, sta->rate_ctrl_priv);
120 rate_control_put(sta->rate_ctrl); 123 rate_control_put(sta->rate_ctrl);
121 kfree(sta); 124 kfree(sta);
@@ -133,6 +136,7 @@ struct sta_info * sta_info_add(struct ieee80211_local *local,
133 struct net_device *dev, u8 *addr, gfp_t gfp) 136 struct net_device *dev, u8 *addr, gfp_t gfp)
134{ 137{
135 struct sta_info *sta; 138 struct sta_info *sta;
139 int i;
136 DECLARE_MAC_BUF(mac); 140 DECLARE_MAC_BUF(mac);
137 141
138 sta = kzalloc(sizeof(*sta), gfp); 142 sta = kzalloc(sizeof(*sta), gfp);
@@ -152,6 +156,19 @@ struct sta_info * sta_info_add(struct ieee80211_local *local,
152 memcpy(sta->addr, addr, ETH_ALEN); 156 memcpy(sta->addr, addr, ETH_ALEN);
153 sta->local = local; 157 sta->local = local;
154 sta->dev = dev; 158 sta->dev = dev;
159 spin_lock_init(&sta->ampdu_mlme.ampdu_rx);
160 for (i = 0; i < STA_TID_NUM; i++) {
161 /* timer_to_tid must be initialized with identity mapping to
162 * enable session_timer's data differentiation. refer to
163 * sta_rx_agg_session_timer_expired for useage */
164 sta->timer_to_tid[i] = i;
165 /* rx timers */
166 sta->ampdu_mlme.tid_rx[i].session_timer.function =
167 sta_rx_agg_session_timer_expired;
168 sta->ampdu_mlme.tid_rx[i].session_timer.data =
169 (unsigned long)&sta->timer_to_tid[i];
170 init_timer(&sta->ampdu_mlme.tid_rx[i].session_timer);
171 }
155 skb_queue_head_init(&sta->ps_tx_buf); 172 skb_queue_head_init(&sta->ps_tx_buf);
156 skb_queue_head_init(&sta->tx_filtered); 173 skb_queue_head_init(&sta->tx_filtered);
157 __sta_info_get(sta); /* sta used by caller, decremented by 174 __sta_info_get(sta); /* sta used by caller, decremented by
@@ -160,9 +177,16 @@ struct sta_info * sta_info_add(struct ieee80211_local *local,
160 list_add(&sta->list, &local->sta_list); 177 list_add(&sta->list, &local->sta_list);
161 local->num_sta++; 178 local->num_sta++;
162 sta_info_hash_add(local, sta); 179 sta_info_hash_add(local, sta);
163 if (local->ops->sta_notify) 180 if (local->ops->sta_notify) {
164 local->ops->sta_notify(local_to_hw(local), dev->ifindex, 181 struct ieee80211_sub_if_data *sdata;
165 STA_NOTIFY_ADD, addr); 182
183 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
184 if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN)
185 sdata = sdata->u.vlan.ap;
186
187 local->ops->sta_notify(local_to_hw(local), &sdata->vif,
188 STA_NOTIFY_ADD, addr);
189 }
166 write_unlock_bh(&local->sta_lock); 190 write_unlock_bh(&local->sta_lock);
167 191
168#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 192#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
@@ -230,9 +254,17 @@ void sta_info_free(struct sta_info *sta)
230 ieee80211_key_free(sta->key); 254 ieee80211_key_free(sta->key);
231 sta->key = NULL; 255 sta->key = NULL;
232 256
233 if (local->ops->sta_notify) 257 if (local->ops->sta_notify) {
234 local->ops->sta_notify(local_to_hw(local), sta->dev->ifindex, 258 struct ieee80211_sub_if_data *sdata;
235 STA_NOTIFY_REMOVE, sta->addr); 259
260 sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev);
261
262 if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN)
263 sdata = sdata->u.vlan.ap;
264
265 local->ops->sta_notify(local_to_hw(local), &sdata->vif,
266 STA_NOTIFY_REMOVE, sta->addr);
267 }
236 268
237 rate_control_remove_sta_debugfs(sta); 269 rate_control_remove_sta_debugfs(sta);
238 ieee80211_sta_debugfs_remove(sta); 270 ieee80211_sta_debugfs_remove(sta);
@@ -346,11 +378,10 @@ void sta_info_init(struct ieee80211_local *local)
346 rwlock_init(&local->sta_lock); 378 rwlock_init(&local->sta_lock);
347 INIT_LIST_HEAD(&local->sta_list); 379 INIT_LIST_HEAD(&local->sta_list);
348 380
349 init_timer(&local->sta_cleanup); 381 setup_timer(&local->sta_cleanup, sta_info_cleanup,
382 (unsigned long)local);
350 local->sta_cleanup.expires = 383 local->sta_cleanup.expires =
351 round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL); 384 round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL);
352 local->sta_cleanup.data = (unsigned long) local;
353 local->sta_cleanup.function = sta_info_cleanup;
354 385
355#ifdef CONFIG_MAC80211_DEBUGFS 386#ifdef CONFIG_MAC80211_DEBUGFS
356 INIT_WORK(&local->sta_debugfs_add, sta_info_debugfs_add_task); 387 INIT_WORK(&local->sta_debugfs_add, sta_info_debugfs_add_task);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 8f7ebe41c024..96fe3ed95038 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -31,6 +31,51 @@
31#define WLAN_STA_WME BIT(9) 31#define WLAN_STA_WME BIT(9)
32#define WLAN_STA_WDS BIT(27) 32#define WLAN_STA_WDS BIT(27)
33 33
34#define STA_TID_NUM 16
35#define ADDBA_RESP_INTERVAL HZ
36
37#define HT_AGG_STATE_INITIATOR_SHIFT (4)
38
39#define HT_AGG_STATE_REQ_STOP_BA_MSK BIT(3)
40
41#define HT_AGG_STATE_IDLE (0x0)
42#define HT_AGG_STATE_OPERATIONAL (0x7)
43
44/**
45 * struct tid_ampdu_rx - TID aggregation information (Rx).
46 *
47 * @state: TID's state in session state machine.
48 * @dialog_token: dialog token for aggregation session
49 * @ssn: Starting Sequence Number expected to be aggregated.
50 * @buf_size: buffer size for incoming A-MPDUs
51 * @timeout: reset timer value.
52 * @head_seq_num: head sequence number in reordering buffer.
53 * @stored_mpdu_num: number of MPDUs in reordering buffer
54 * @reorder_buf: buffer to reorder incoming aggregated MPDUs
55 * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value)
56 */
57struct tid_ampdu_rx {
58 u8 state;
59 u8 dialog_token;
60 u16 ssn;
61 u16 buf_size;
62 u16 timeout;
63 u16 head_seq_num;
64 u16 stored_mpdu_num;
65 struct sk_buff **reorder_buf;
66 struct timer_list session_timer;
67};
68
69/**
70 * struct sta_ampdu_mlme - STA aggregation information.
71 *
72 * @tid_agg_info_rx: aggregation info for Rx per TID
73 * @ampdu_rx: for locking sections in aggregation Rx flow
74 */
75struct sta_ampdu_mlme {
76 struct tid_ampdu_rx tid_rx[STA_TID_NUM];
77 spinlock_t ampdu_rx;
78};
34 79
35struct sta_info { 80struct sta_info {
36 struct kref kref; 81 struct kref kref;
@@ -99,6 +144,11 @@ struct sta_info {
99 144
100 u16 listen_interval; 145 u16 listen_interval;
101 146
147 struct ieee80211_ht_info ht_info; /* 802.11n HT capabilities
148 of this STA */
149 struct sta_ampdu_mlme ampdu_mlme;
150 u8 timer_to_tid[STA_TID_NUM]; /* convert timer id to tid */
151
102#ifdef CONFIG_MAC80211_DEBUGFS 152#ifdef CONFIG_MAC80211_DEBUGFS
103 struct sta_info_debugfsdentries { 153 struct sta_info_debugfsdentries {
104 struct dentry *dir; 154 struct dentry *dir;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1a531543bccb..67b509edd431 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -176,7 +176,7 @@ static u16 ieee80211_duration(struct ieee80211_txrx_data *tx, int group_addr,
176 * to closest integer */ 176 * to closest integer */
177 177
178 dur = ieee80211_frame_duration(local, 10, rate, erp, 178 dur = ieee80211_frame_duration(local, 10, rate, erp,
179 tx->sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE); 179 tx->sdata->bss_conf.use_short_preamble);
180 180
181 if (next_frag_len) { 181 if (next_frag_len) {
182 /* Frame is fragmented: duration increases with time needed to 182 /* Frame is fragmented: duration increases with time needed to
@@ -185,8 +185,7 @@ static u16 ieee80211_duration(struct ieee80211_txrx_data *tx, int group_addr,
185 /* next fragment */ 185 /* next fragment */
186 dur += ieee80211_frame_duration(local, next_frag_len, 186 dur += ieee80211_frame_duration(local, next_frag_len,
187 txrate->rate, erp, 187 txrate->rate, erp,
188 tx->sdata->flags & 188 tx->sdata->bss_conf.use_short_preamble);
189 IEEE80211_SDATA_SHORT_PREAMBLE);
190 } 189 }
191 190
192 return dur; 191 return dur;
@@ -225,7 +224,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_txrx_data *tx)
225 if (unlikely(tx->flags & IEEE80211_TXRXD_TX_INJECTED)) 224 if (unlikely(tx->flags & IEEE80211_TXRXD_TX_INJECTED))
226 return TXRX_CONTINUE; 225 return TXRX_CONTINUE;
227 226
228 if (unlikely(tx->local->sta_scanning != 0) && 227 if (unlikely(tx->local->sta_sw_scanning) &&
229 ((tx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || 228 ((tx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT ||
230 (tx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PROBE_REQ)) 229 (tx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PROBE_REQ))
231 return TXRX_DROP; 230 return TXRX_DROP;
@@ -237,7 +236,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_txrx_data *tx)
237 236
238 if (likely(tx->flags & IEEE80211_TXRXD_TXUNICAST)) { 237 if (likely(tx->flags & IEEE80211_TXRXD_TXUNICAST)) {
239 if (unlikely(!(sta_flags & WLAN_STA_ASSOC) && 238 if (unlikely(!(sta_flags & WLAN_STA_ASSOC) &&
240 tx->sdata->type != IEEE80211_IF_TYPE_IBSS && 239 tx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS &&
241 (tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA)) { 240 (tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA)) {
242#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 241#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
243 DECLARE_MAC_BUF(mac); 242 DECLARE_MAC_BUF(mac);
@@ -251,7 +250,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_txrx_data *tx)
251 } else { 250 } else {
252 if (unlikely((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && 251 if (unlikely((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA &&
253 tx->local->num_sta == 0 && 252 tx->local->num_sta == 0 &&
254 tx->sdata->type != IEEE80211_IF_TYPE_IBSS)) { 253 tx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS)) {
255 /* 254 /*
256 * No associated STAs - no need to send multicast 255 * No associated STAs - no need to send multicast
257 * frames. 256 * frames.
@@ -261,18 +260,6 @@ ieee80211_tx_h_check_assoc(struct ieee80211_txrx_data *tx)
261 return TXRX_CONTINUE; 260 return TXRX_CONTINUE;
262 } 261 }
263 262
264 if (unlikely(/* !injected && */ tx->sdata->ieee802_1x &&
265 !(sta_flags & WLAN_STA_AUTHORIZED))) {
266#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
267 DECLARE_MAC_BUF(mac);
268 printk(KERN_DEBUG "%s: dropped frame to %s"
269 " (unauthorized port)\n", tx->dev->name,
270 print_mac(mac, hdr->addr1));
271#endif
272 I802_DEBUG_INC(tx->local->tx_handlers_drop_unauth_port);
273 return TXRX_DROP;
274 }
275
276 return TXRX_CONTINUE; 263 return TXRX_CONTINUE;
277} 264}
278 265
@@ -306,7 +293,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
306 list_for_each_entry_rcu(sdata, &local->interfaces, list) { 293 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
307 struct ieee80211_if_ap *ap; 294 struct ieee80211_if_ap *ap;
308 if (sdata->dev == local->mdev || 295 if (sdata->dev == local->mdev ||
309 sdata->type != IEEE80211_IF_TYPE_AP) 296 sdata->vif.type != IEEE80211_IF_TYPE_AP)
310 continue; 297 continue;
311 ap = &sdata->u.ap; 298 ap = &sdata->u.ap;
312 skb = skb_dequeue(&ap->ps_bc_buf); 299 skb = skb_dequeue(&ap->ps_bc_buf);
@@ -334,16 +321,27 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
334 wiphy_name(local->hw.wiphy), purged); 321 wiphy_name(local->hw.wiphy), purged);
335} 322}
336 323
337static inline ieee80211_txrx_result 324static ieee80211_txrx_result
338ieee80211_tx_h_multicast_ps_buf(struct ieee80211_txrx_data *tx) 325ieee80211_tx_h_multicast_ps_buf(struct ieee80211_txrx_data *tx)
339{ 326{
340 /* broadcast/multicast frame */ 327 /*
341 /* If any of the associated stations is in power save mode, 328 * broadcast/multicast frame
342 * the frame is buffered to be sent after DTIM beacon frame */ 329 *
343 if ((tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING) && 330 * If any of the associated stations is in power save mode,
344 tx->sdata->type != IEEE80211_IF_TYPE_WDS && 331 * the frame is buffered to be sent after DTIM beacon frame.
345 tx->sdata->bss && atomic_read(&tx->sdata->bss->num_sta_ps) && 332 * This is done either by the hardware or us.
346 !(tx->fc & IEEE80211_FCTL_ORDER)) { 333 */
334
335 /* not AP/IBSS or ordered frame */
336 if (!tx->sdata->bss || (tx->fc & IEEE80211_FCTL_ORDER))
337 return TXRX_CONTINUE;
338
339 /* no stations in PS mode */
340 if (!atomic_read(&tx->sdata->bss->num_sta_ps))
341 return TXRX_CONTINUE;
342
343 /* buffered in mac80211 */
344 if (tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING) {
347 if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) 345 if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER)
348 purge_old_ps_buffers(tx->local); 346 purge_old_ps_buffers(tx->local);
349 if (skb_queue_len(&tx->sdata->bss->ps_bc_buf) >= 347 if (skb_queue_len(&tx->sdata->bss->ps_bc_buf) >=
@@ -360,10 +358,13 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_txrx_data *tx)
360 return TXRX_QUEUED; 358 return TXRX_QUEUED;
361 } 359 }
362 360
361 /* buffered in hardware */
362 tx->u.tx.control->flags |= IEEE80211_TXCTL_SEND_AFTER_DTIM;
363
363 return TXRX_CONTINUE; 364 return TXRX_CONTINUE;
364} 365}
365 366
366static inline ieee80211_txrx_result 367static ieee80211_txrx_result
367ieee80211_tx_h_unicast_ps_buf(struct ieee80211_txrx_data *tx) 368ieee80211_tx_h_unicast_ps_buf(struct ieee80211_txrx_data *tx)
368{ 369{
369 struct sta_info *sta = tx->sta; 370 struct sta_info *sta = tx->sta;
@@ -420,7 +421,6 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_txrx_data *tx)
420 return TXRX_CONTINUE; 421 return TXRX_CONTINUE;
421} 422}
422 423
423
424static ieee80211_txrx_result 424static ieee80211_txrx_result
425ieee80211_tx_h_ps_buf(struct ieee80211_txrx_data *tx) 425ieee80211_tx_h_ps_buf(struct ieee80211_txrx_data *tx)
426{ 426{
@@ -433,13 +433,11 @@ ieee80211_tx_h_ps_buf(struct ieee80211_txrx_data *tx)
433 return ieee80211_tx_h_multicast_ps_buf(tx); 433 return ieee80211_tx_h_multicast_ps_buf(tx);
434} 434}
435 435
436
437
438
439static ieee80211_txrx_result 436static ieee80211_txrx_result
440ieee80211_tx_h_select_key(struct ieee80211_txrx_data *tx) 437ieee80211_tx_h_select_key(struct ieee80211_txrx_data *tx)
441{ 438{
442 struct ieee80211_key *key; 439 struct ieee80211_key *key;
440 u16 fc = tx->fc;
443 441
444 if (unlikely(tx->u.tx.control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT)) 442 if (unlikely(tx->u.tx.control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT))
445 tx->key = NULL; 443 tx->key = NULL;
@@ -448,19 +446,38 @@ ieee80211_tx_h_select_key(struct ieee80211_txrx_data *tx)
448 else if ((key = rcu_dereference(tx->sdata->default_key))) 446 else if ((key = rcu_dereference(tx->sdata->default_key)))
449 tx->key = key; 447 tx->key = key;
450 else if (tx->sdata->drop_unencrypted && 448 else if (tx->sdata->drop_unencrypted &&
451 !(tx->sdata->eapol && ieee80211_is_eapol(tx->skb))) { 449 !(tx->u.tx.control->flags & IEEE80211_TXCTL_EAPOL_FRAME) &&
450 !(tx->flags & IEEE80211_TXRXD_TX_INJECTED)) {
452 I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted); 451 I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted);
453 return TXRX_DROP; 452 return TXRX_DROP;
454 } else { 453 } else
455 tx->key = NULL; 454 tx->key = NULL;
456 tx->u.tx.control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT;
457 }
458 455
459 if (tx->key) { 456 if (tx->key) {
457 u16 ftype, stype;
458
460 tx->key->tx_rx_count++; 459 tx->key->tx_rx_count++;
461 /* TODO: add threshold stuff again */ 460 /* TODO: add threshold stuff again */
461
462 switch (tx->key->conf.alg) {
463 case ALG_WEP:
464 ftype = fc & IEEE80211_FCTL_FTYPE;
465 stype = fc & IEEE80211_FCTL_STYPE;
466
467 if (ftype == IEEE80211_FTYPE_MGMT &&
468 stype == IEEE80211_STYPE_AUTH)
469 break;
470 case ALG_TKIP:
471 case ALG_CCMP:
472 if (!WLAN_FC_DATA_PRESENT(fc))
473 tx->key = NULL;
474 break;
475 }
462 } 476 }
463 477
478 if (!tx->key || !(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
479 tx->u.tx.control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT;
480
464 return TXRX_CONTINUE; 481 return TXRX_CONTINUE;
465} 482}
466 483
@@ -567,21 +584,17 @@ ieee80211_tx_h_encrypt(struct ieee80211_txrx_data *tx)
567static ieee80211_txrx_result 584static ieee80211_txrx_result
568ieee80211_tx_h_rate_ctrl(struct ieee80211_txrx_data *tx) 585ieee80211_tx_h_rate_ctrl(struct ieee80211_txrx_data *tx)
569{ 586{
570 struct rate_control_extra extra; 587 struct rate_selection rsel;
571 588
572 if (likely(!tx->u.tx.rate)) { 589 if (likely(!tx->u.tx.rate)) {
573 memset(&extra, 0, sizeof(extra)); 590 rate_control_get_rate(tx->dev, tx->u.tx.mode, tx->skb, &rsel);
574 extra.mode = tx->u.tx.mode; 591 tx->u.tx.rate = rsel.rate;
575 extra.ethertype = tx->ethertype; 592 if (unlikely(rsel.probe != NULL)) {
576
577 tx->u.tx.rate = rate_control_get_rate(tx->local, tx->dev,
578 tx->skb, &extra);
579 if (unlikely(extra.probe != NULL)) {
580 tx->u.tx.control->flags |= 593 tx->u.tx.control->flags |=
581 IEEE80211_TXCTL_RATE_CTRL_PROBE; 594 IEEE80211_TXCTL_RATE_CTRL_PROBE;
582 tx->flags |= IEEE80211_TXRXD_TXPROBE_LAST_FRAG; 595 tx->flags |= IEEE80211_TXRXD_TXPROBE_LAST_FRAG;
583 tx->u.tx.control->alt_retry_rate = tx->u.tx.rate->val; 596 tx->u.tx.control->alt_retry_rate = tx->u.tx.rate->val;
584 tx->u.tx.rate = extra.probe; 597 tx->u.tx.rate = rsel.probe;
585 } else 598 } else
586 tx->u.tx.control->alt_retry_rate = -1; 599 tx->u.tx.control->alt_retry_rate = -1;
587 600
@@ -591,15 +604,15 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_txrx_data *tx)
591 tx->u.tx.control->alt_retry_rate = -1; 604 tx->u.tx.control->alt_retry_rate = -1;
592 605
593 if (tx->u.tx.mode->mode == MODE_IEEE80211G && 606 if (tx->u.tx.mode->mode == MODE_IEEE80211G &&
594 (tx->sdata->flags & IEEE80211_SDATA_USE_PROTECTION) && 607 tx->sdata->bss_conf.use_cts_prot &&
595 (tx->flags & IEEE80211_TXRXD_FRAGMENTED) && extra.nonerp) { 608 (tx->flags & IEEE80211_TXRXD_FRAGMENTED) && rsel.nonerp) {
596 tx->u.tx.last_frag_rate = tx->u.tx.rate; 609 tx->u.tx.last_frag_rate = tx->u.tx.rate;
597 if (extra.probe) 610 if (rsel.probe)
598 tx->flags &= ~IEEE80211_TXRXD_TXPROBE_LAST_FRAG; 611 tx->flags &= ~IEEE80211_TXRXD_TXPROBE_LAST_FRAG;
599 else 612 else
600 tx->flags |= IEEE80211_TXRXD_TXPROBE_LAST_FRAG; 613 tx->flags |= IEEE80211_TXRXD_TXPROBE_LAST_FRAG;
601 tx->u.tx.rate = extra.nonerp; 614 tx->u.tx.rate = rsel.nonerp;
602 tx->u.tx.control->rate = extra.nonerp; 615 tx->u.tx.control->rate = rsel.nonerp;
603 tx->u.tx.control->flags &= ~IEEE80211_TXCTL_RATE_CTRL_PROBE; 616 tx->u.tx.control->flags &= ~IEEE80211_TXCTL_RATE_CTRL_PROBE;
604 } else { 617 } else {
605 tx->u.tx.last_frag_rate = tx->u.tx.rate; 618 tx->u.tx.last_frag_rate = tx->u.tx.rate;
@@ -653,7 +666,7 @@ ieee80211_tx_h_misc(struct ieee80211_txrx_data *tx)
653 if (mode->mode == MODE_IEEE80211G && 666 if (mode->mode == MODE_IEEE80211G &&
654 (tx->u.tx.rate->flags & IEEE80211_RATE_ERP) && 667 (tx->u.tx.rate->flags & IEEE80211_RATE_ERP) &&
655 (tx->flags & IEEE80211_TXRXD_TXUNICAST) && 668 (tx->flags & IEEE80211_TXRXD_TXUNICAST) &&
656 (tx->sdata->flags & IEEE80211_SDATA_USE_PROTECTION) && 669 tx->sdata->bss_conf.use_cts_prot &&
657 !(control->flags & IEEE80211_TXCTL_USE_RTS_CTS)) 670 !(control->flags & IEEE80211_TXCTL_USE_RTS_CTS))
658 control->flags |= IEEE80211_TXCTL_USE_CTS_PROTECT; 671 control->flags |= IEEE80211_TXCTL_USE_CTS_PROTECT;
659 672
@@ -662,7 +675,7 @@ ieee80211_tx_h_misc(struct ieee80211_txrx_data *tx)
662 * available on the network at the current point in time. */ 675 * available on the network at the current point in time. */
663 if (((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) && 676 if (((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) &&
664 (tx->u.tx.rate->flags & IEEE80211_RATE_PREAMBLE2) && 677 (tx->u.tx.rate->flags & IEEE80211_RATE_PREAMBLE2) &&
665 (tx->sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE) && 678 tx->sdata->bss_conf.use_short_preamble &&
666 (!tx->sta || (tx->sta->flags & WLAN_STA_SHORT_PREAMBLE))) { 679 (!tx->sta || (tx->sta->flags & WLAN_STA_SHORT_PREAMBLE))) {
667 tx->u.tx.control->tx_rate = tx->u.tx.rate->val2; 680 tx->u.tx.control->tx_rate = tx->u.tx.rate->val2;
668 } 681 }
@@ -706,15 +719,6 @@ ieee80211_tx_h_misc(struct ieee80211_txrx_data *tx)
706 } 719 }
707 } 720 }
708 721
709 /*
710 * Tell hardware to not encrypt when we had sw crypto.
711 * Because we use the same flag to internally indicate that
712 * no (software) encryption should be done, we have to set it
713 * after all crypto handlers.
714 */
715 if (tx->key && !(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
716 tx->u.tx.control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT;
717
718 return TXRX_CONTINUE; 722 return TXRX_CONTINUE;
719} 723}
720 724
@@ -927,7 +931,6 @@ __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx,
927 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 931 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
928 struct ieee80211_hdr *hdr; 932 struct ieee80211_hdr *hdr;
929 struct ieee80211_sub_if_data *sdata; 933 struct ieee80211_sub_if_data *sdata;
930 ieee80211_txrx_result res = TXRX_CONTINUE;
931 934
932 int hdrlen; 935 int hdrlen;
933 936
@@ -945,7 +948,7 @@ __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx,
945 948
946 /* process and remove the injection radiotap header */ 949 /* process and remove the injection radiotap header */
947 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 950 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
948 if (unlikely(sdata->type == IEEE80211_IF_TYPE_MNTR)) { 951 if (unlikely(sdata->vif.type == IEEE80211_IF_TYPE_MNTR)) {
949 if (__ieee80211_parse_tx_radiotap(tx, skb) == TXRX_DROP) 952 if (__ieee80211_parse_tx_radiotap(tx, skb) == TXRX_DROP)
950 return TXRX_DROP; 953 return TXRX_DROP;
951 954
@@ -992,12 +995,10 @@ __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx,
992 } 995 }
993 control->flags |= IEEE80211_TXCTL_FIRST_FRAGMENT; 996 control->flags |= IEEE80211_TXCTL_FIRST_FRAGMENT;
994 997
995 return res; 998 return TXRX_CONTINUE;
996} 999}
997 1000
998/* Device in tx->dev has a reference added; use dev_put(tx->dev) when 1001/*
999 * finished with it.
1000 *
1001 * NB: @tx is uninitialised when passed in here 1002 * NB: @tx is uninitialised when passed in here
1002 */ 1003 */
1003static int ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, 1004static int ieee80211_tx_prepare(struct ieee80211_txrx_data *tx,
@@ -1018,6 +1019,7 @@ static int ieee80211_tx_prepare(struct ieee80211_txrx_data *tx,
1018 return -ENODEV; 1019 return -ENODEV;
1019 /* initialises tx with control */ 1020 /* initialises tx with control */
1020 __ieee80211_tx_prepare(tx, skb, dev, control); 1021 __ieee80211_tx_prepare(tx, skb, dev, control);
1022 dev_put(dev);
1021 return 0; 1023 return 0;
1022} 1024}
1023 1025
@@ -1248,14 +1250,16 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
1248 } 1250 }
1249 } 1251 }
1250 1252
1251 control.ifindex = odev->ifindex; 1253 control.vif = &osdata->vif;
1252 control.type = osdata->type; 1254 control.type = osdata->vif.type;
1253 if (pkt_data->flags & IEEE80211_TXPD_REQ_TX_STATUS) 1255 if (pkt_data->flags & IEEE80211_TXPD_REQ_TX_STATUS)
1254 control.flags |= IEEE80211_TXCTL_REQ_TX_STATUS; 1256 control.flags |= IEEE80211_TXCTL_REQ_TX_STATUS;
1255 if (pkt_data->flags & IEEE80211_TXPD_DO_NOT_ENCRYPT) 1257 if (pkt_data->flags & IEEE80211_TXPD_DO_NOT_ENCRYPT)
1256 control.flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT; 1258 control.flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT;
1257 if (pkt_data->flags & IEEE80211_TXPD_REQUEUE) 1259 if (pkt_data->flags & IEEE80211_TXPD_REQUEUE)
1258 control.flags |= IEEE80211_TXCTL_REQUEUE; 1260 control.flags |= IEEE80211_TXCTL_REQUEUE;
1261 if (pkt_data->flags & IEEE80211_TXPD_EAPOL_FRAME)
1262 control.flags |= IEEE80211_TXCTL_EAPOL_FRAME;
1259 control.queue = pkt_data->queue; 1263 control.queue = pkt_data->queue;
1260 1264
1261 ret = ieee80211_tx(odev, skb, &control); 1265 ret = ieee80211_tx(odev, skb, &control);
@@ -1348,6 +1352,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
1348 int encaps_len, skip_header_bytes; 1352 int encaps_len, skip_header_bytes;
1349 int nh_pos, h_pos; 1353 int nh_pos, h_pos;
1350 struct sta_info *sta; 1354 struct sta_info *sta;
1355 u32 sta_flags = 0;
1351 1356
1352 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1357 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1353 if (unlikely(skb->len < ETH_HLEN)) { 1358 if (unlikely(skb->len < ETH_HLEN)) {
@@ -1363,10 +1368,9 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
1363 /* convert Ethernet header to proper 802.11 header (based on 1368 /* convert Ethernet header to proper 802.11 header (based on
1364 * operation mode) */ 1369 * operation mode) */
1365 ethertype = (skb->data[12] << 8) | skb->data[13]; 1370 ethertype = (skb->data[12] << 8) | skb->data[13];
1366 /* TODO: handling for 802.1x authorized/unauthorized port */
1367 fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA; 1371 fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA;
1368 1372
1369 switch (sdata->type) { 1373 switch (sdata->vif.type) {
1370 case IEEE80211_IF_TYPE_AP: 1374 case IEEE80211_IF_TYPE_AP:
1371 case IEEE80211_IF_TYPE_VLAN: 1375 case IEEE80211_IF_TYPE_VLAN:
1372 fc |= IEEE80211_FCTL_FROMDS; 1376 fc |= IEEE80211_FCTL_FROMDS;
@@ -1405,16 +1409,42 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
1405 goto fail; 1409 goto fail;
1406 } 1410 }
1407 1411
1408 /* receiver is QoS enabled, use a QoS type frame */
1409 sta = sta_info_get(local, hdr.addr1); 1412 sta = sta_info_get(local, hdr.addr1);
1410 if (sta) { 1413 if (sta) {
1411 if (sta->flags & WLAN_STA_WME) { 1414 sta_flags = sta->flags;
1412 fc |= IEEE80211_STYPE_QOS_DATA;
1413 hdrlen += 2;
1414 }
1415 sta_info_put(sta); 1415 sta_info_put(sta);
1416 } 1416 }
1417 1417
1418 /* receiver is QoS enabled, use a QoS type frame */
1419 if (sta_flags & WLAN_STA_WME) {
1420 fc |= IEEE80211_STYPE_QOS_DATA;
1421 hdrlen += 2;
1422 }
1423
1424 /*
1425 * If port access control is enabled, drop frames to unauthorised
1426 * stations unless they are EAPOL frames from the local station.
1427 */
1428 if (unlikely(sdata->ieee802_1x_pac &&
1429 !(sta_flags & WLAN_STA_AUTHORIZED) &&
1430 !(ethertype == ETH_P_PAE &&
1431 compare_ether_addr(dev->dev_addr,
1432 skb->data + ETH_ALEN) == 0))) {
1433#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
1434 DECLARE_MAC_BUF(mac);
1435
1436 if (net_ratelimit())
1437 printk(KERN_DEBUG "%s: dropped frame to %s"
1438 " (unauthorized port)\n", dev->name,
1439 print_mac(mac, hdr.addr1));
1440#endif
1441
1442 I802_DEBUG_INC(local->tx_handlers_drop_unauth_port);
1443
1444 ret = 0;
1445 goto fail;
1446 }
1447
1418 hdr.frame_control = cpu_to_le16(fc); 1448 hdr.frame_control = cpu_to_le16(fc);
1419 hdr.duration_id = 0; 1449 hdr.duration_id = 0;
1420 hdr.seq_ctrl = 0; 1450 hdr.seq_ctrl = 0;
@@ -1503,6 +1533,8 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
1503 pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; 1533 pkt_data = (struct ieee80211_tx_packet_data *)skb->cb;
1504 memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); 1534 memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data));
1505 pkt_data->ifindex = dev->ifindex; 1535 pkt_data->ifindex = dev->ifindex;
1536 if (ethertype == ETH_P_PAE)
1537 pkt_data->flags |= IEEE80211_TXPD_EAPOL_FRAME;
1506 1538
1507 skb->dev = local->mdev; 1539 skb->dev = local->mdev;
1508 dev->stats.tx_packets++; 1540 dev->stats.tx_packets++;
@@ -1527,64 +1559,6 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
1527 return ret; 1559 return ret;
1528} 1560}
1529 1561
1530/*
1531 * This is the transmit routine for the 802.11 type interfaces
1532 * called by upper layers of the linux networking
1533 * stack when it has a frame to transmit
1534 */
1535int ieee80211_mgmt_start_xmit(struct sk_buff *skb, struct net_device *dev)
1536{
1537 struct ieee80211_sub_if_data *sdata;
1538 struct ieee80211_tx_packet_data *pkt_data;
1539 struct ieee80211_hdr *hdr;
1540 u16 fc;
1541
1542 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1543
1544 if (skb->len < 10) {
1545 dev_kfree_skb(skb);
1546 return 0;
1547 }
1548
1549 if (skb_headroom(skb) < sdata->local->tx_headroom) {
1550 if (pskb_expand_head(skb, sdata->local->tx_headroom,
1551 0, GFP_ATOMIC)) {
1552 dev_kfree_skb(skb);
1553 return 0;
1554 }
1555 }
1556
1557 hdr = (struct ieee80211_hdr *) skb->data;
1558 fc = le16_to_cpu(hdr->frame_control);
1559
1560 pkt_data = (struct ieee80211_tx_packet_data *) skb->cb;
1561 memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data));
1562 pkt_data->ifindex = sdata->dev->ifindex;
1563
1564 skb->priority = 20; /* use hardcoded priority for mgmt TX queue */
1565 skb->dev = sdata->local->mdev;
1566
1567 /*
1568 * We're using the protocol field of the the frame control header
1569 * to request TX callback for hostapd. BIT(1) is checked.
1570 */
1571 if ((fc & BIT(1)) == BIT(1)) {
1572 pkt_data->flags |= IEEE80211_TXPD_REQ_TX_STATUS;
1573 fc &= ~BIT(1);
1574 hdr->frame_control = cpu_to_le16(fc);
1575 }
1576
1577 if (!(fc & IEEE80211_FCTL_PROTECTED))
1578 pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT;
1579
1580 dev->stats.tx_packets++;
1581 dev->stats.tx_bytes += skb->len;
1582
1583 dev_queue_xmit(skb);
1584
1585 return 0;
1586}
1587
1588/* helper functions for pending packets for when queues are stopped */ 1562/* helper functions for pending packets for when queues are stopped */
1589 1563
1590void ieee80211_clear_tx_pending(struct ieee80211_local *local) 1564void ieee80211_clear_tx_pending(struct ieee80211_local *local)
@@ -1653,7 +1627,8 @@ void ieee80211_tx_pending(unsigned long data)
1653 1627
1654static void ieee80211_beacon_add_tim(struct ieee80211_local *local, 1628static void ieee80211_beacon_add_tim(struct ieee80211_local *local,
1655 struct ieee80211_if_ap *bss, 1629 struct ieee80211_if_ap *bss,
1656 struct sk_buff *skb) 1630 struct sk_buff *skb,
1631 struct beacon_data *beacon)
1657{ 1632{
1658 u8 *pos, *tim; 1633 u8 *pos, *tim;
1659 int aid0 = 0; 1634 int aid0 = 0;
@@ -1669,7 +1644,7 @@ static void ieee80211_beacon_add_tim(struct ieee80211_local *local,
1669 IEEE80211_MAX_AID+1); 1644 IEEE80211_MAX_AID+1);
1670 1645
1671 if (bss->dtim_count == 0) 1646 if (bss->dtim_count == 0)
1672 bss->dtim_count = bss->dtim_period - 1; 1647 bss->dtim_count = beacon->dtim_period - 1;
1673 else 1648 else
1674 bss->dtim_count--; 1649 bss->dtim_count--;
1675 1650
@@ -1677,7 +1652,7 @@ static void ieee80211_beacon_add_tim(struct ieee80211_local *local,
1677 *pos++ = WLAN_EID_TIM; 1652 *pos++ = WLAN_EID_TIM;
1678 *pos++ = 4; 1653 *pos++ = 4;
1679 *pos++ = bss->dtim_count; 1654 *pos++ = bss->dtim_count;
1680 *pos++ = bss->dtim_period; 1655 *pos++ = beacon->dtim_period;
1681 1656
1682 if (bss->dtim_count == 0 && !skb_queue_empty(&bss->ps_bc_buf)) 1657 if (bss->dtim_count == 0 && !skb_queue_empty(&bss->ps_bc_buf))
1683 aid0 = 1; 1658 aid0 = 1;
@@ -1715,7 +1690,8 @@ static void ieee80211_beacon_add_tim(struct ieee80211_local *local,
1715 read_unlock_bh(&local->sta_lock); 1690 read_unlock_bh(&local->sta_lock);
1716} 1691}
1717 1692
1718struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, int if_id, 1693struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
1694 struct ieee80211_vif *vif,
1719 struct ieee80211_tx_control *control) 1695 struct ieee80211_tx_control *control)
1720{ 1696{
1721 struct ieee80211_local *local = hw_to_local(hw); 1697 struct ieee80211_local *local = hw_to_local(hw);
@@ -1723,68 +1699,64 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, int if_id,
1723 struct net_device *bdev; 1699 struct net_device *bdev;
1724 struct ieee80211_sub_if_data *sdata = NULL; 1700 struct ieee80211_sub_if_data *sdata = NULL;
1725 struct ieee80211_if_ap *ap = NULL; 1701 struct ieee80211_if_ap *ap = NULL;
1726 struct ieee80211_rate *rate; 1702 struct rate_selection rsel;
1727 struct rate_control_extra extra; 1703 struct beacon_data *beacon;
1728 u8 *b_head, *b_tail; 1704
1729 int bh_len, bt_len; 1705 rcu_read_lock();
1730
1731 bdev = dev_get_by_index(&init_net, if_id);
1732 if (bdev) {
1733 sdata = IEEE80211_DEV_TO_SUB_IF(bdev);
1734 ap = &sdata->u.ap;
1735 dev_put(bdev);
1736 }
1737 1706
1738 if (!ap || sdata->type != IEEE80211_IF_TYPE_AP || 1707 sdata = vif_to_sdata(vif);
1739 !ap->beacon_head) { 1708 bdev = sdata->dev;
1709 ap = &sdata->u.ap;
1710
1711 beacon = rcu_dereference(ap->beacon);
1712
1713 if (!ap || sdata->vif.type != IEEE80211_IF_TYPE_AP || !beacon) {
1740#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 1714#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
1741 if (net_ratelimit()) 1715 if (net_ratelimit())
1742 printk(KERN_DEBUG "no beacon data avail for idx=%d " 1716 printk(KERN_DEBUG "no beacon data avail for %s\n",
1743 "(%s)\n", if_id, bdev ? bdev->name : "N/A"); 1717 bdev->name);
1744#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 1718#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
1745 return NULL; 1719 skb = NULL;
1720 goto out;
1746 } 1721 }
1747 1722
1748 /* Assume we are generating the normal beacon locally */ 1723 /* headroom, head length, tail length and maximum TIM length */
1749 b_head = ap->beacon_head; 1724 skb = dev_alloc_skb(local->tx_headroom + beacon->head_len +
1750 b_tail = ap->beacon_tail; 1725 beacon->tail_len + 256);
1751 bh_len = ap->beacon_head_len;
1752 bt_len = ap->beacon_tail_len;
1753
1754 skb = dev_alloc_skb(local->tx_headroom +
1755 bh_len + bt_len + 256 /* maximum TIM len */);
1756 if (!skb) 1726 if (!skb)
1757 return NULL; 1727 goto out;
1758 1728
1759 skb_reserve(skb, local->tx_headroom); 1729 skb_reserve(skb, local->tx_headroom);
1760 memcpy(skb_put(skb, bh_len), b_head, bh_len); 1730 memcpy(skb_put(skb, beacon->head_len), beacon->head,
1731 beacon->head_len);
1761 1732
1762 ieee80211_include_sequence(sdata, (struct ieee80211_hdr *)skb->data); 1733 ieee80211_include_sequence(sdata, (struct ieee80211_hdr *)skb->data);
1763 1734
1764 ieee80211_beacon_add_tim(local, ap, skb); 1735 ieee80211_beacon_add_tim(local, ap, skb, beacon);
1765 1736
1766 if (b_tail) { 1737 if (beacon->tail)
1767 memcpy(skb_put(skb, bt_len), b_tail, bt_len); 1738 memcpy(skb_put(skb, beacon->tail_len), beacon->tail,
1768 } 1739 beacon->tail_len);
1769 1740
1770 if (control) { 1741 if (control) {
1771 memset(&extra, 0, sizeof(extra)); 1742 rate_control_get_rate(local->mdev, local->oper_hw_mode, skb,
1772 extra.mode = local->oper_hw_mode; 1743 &rsel);
1773 1744 if (!rsel.rate) {
1774 rate = rate_control_get_rate(local, local->mdev, skb, &extra);
1775 if (!rate) {
1776 if (net_ratelimit()) { 1745 if (net_ratelimit()) {
1777 printk(KERN_DEBUG "%s: ieee80211_beacon_get: no rate " 1746 printk(KERN_DEBUG "%s: ieee80211_beacon_get: "
1778 "found\n", wiphy_name(local->hw.wiphy)); 1747 "no rate found\n",
1748 wiphy_name(local->hw.wiphy));
1779 } 1749 }
1780 dev_kfree_skb(skb); 1750 dev_kfree_skb(skb);
1781 return NULL; 1751 skb = NULL;
1752 goto out;
1782 } 1753 }
1783 1754
1755 control->vif = vif;
1784 control->tx_rate = 1756 control->tx_rate =
1785 ((sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE) && 1757 (sdata->bss_conf.use_short_preamble &&
1786 (rate->flags & IEEE80211_RATE_PREAMBLE2)) ? 1758 (rsel.rate->flags & IEEE80211_RATE_PREAMBLE2)) ?
1787 rate->val2 : rate->val; 1759 rsel.rate->val2 : rsel.rate->val;
1788 control->antenna_sel_tx = local->hw.conf.antenna_sel_tx; 1760 control->antenna_sel_tx = local->hw.conf.antenna_sel_tx;
1789 control->power_level = local->hw.conf.power_level; 1761 control->power_level = local->hw.conf.power_level;
1790 control->flags |= IEEE80211_TXCTL_NO_ACK; 1762 control->flags |= IEEE80211_TXCTL_NO_ACK;
@@ -1793,11 +1765,14 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, int if_id,
1793 } 1765 }
1794 1766
1795 ap->num_beacons++; 1767 ap->num_beacons++;
1768
1769 out:
1770 rcu_read_unlock();
1796 return skb; 1771 return skb;
1797} 1772}
1798EXPORT_SYMBOL(ieee80211_beacon_get); 1773EXPORT_SYMBOL(ieee80211_beacon_get);
1799 1774
1800void ieee80211_rts_get(struct ieee80211_hw *hw, int if_id, 1775void ieee80211_rts_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
1801 const void *frame, size_t frame_len, 1776 const void *frame, size_t frame_len,
1802 const struct ieee80211_tx_control *frame_txctl, 1777 const struct ieee80211_tx_control *frame_txctl,
1803 struct ieee80211_rts *rts) 1778 struct ieee80211_rts *rts)
@@ -1807,13 +1782,14 @@ void ieee80211_rts_get(struct ieee80211_hw *hw, int if_id,
1807 1782
1808 fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS; 1783 fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS;
1809 rts->frame_control = cpu_to_le16(fctl); 1784 rts->frame_control = cpu_to_le16(fctl);
1810 rts->duration = ieee80211_rts_duration(hw, if_id, frame_len, frame_txctl); 1785 rts->duration = ieee80211_rts_duration(hw, vif, frame_len,
1786 frame_txctl);
1811 memcpy(rts->ra, hdr->addr1, sizeof(rts->ra)); 1787 memcpy(rts->ra, hdr->addr1, sizeof(rts->ra));
1812 memcpy(rts->ta, hdr->addr2, sizeof(rts->ta)); 1788 memcpy(rts->ta, hdr->addr2, sizeof(rts->ta));
1813} 1789}
1814EXPORT_SYMBOL(ieee80211_rts_get); 1790EXPORT_SYMBOL(ieee80211_rts_get);
1815 1791
1816void ieee80211_ctstoself_get(struct ieee80211_hw *hw, int if_id, 1792void ieee80211_ctstoself_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
1817 const void *frame, size_t frame_len, 1793 const void *frame, size_t frame_len,
1818 const struct ieee80211_tx_control *frame_txctl, 1794 const struct ieee80211_tx_control *frame_txctl,
1819 struct ieee80211_cts *cts) 1795 struct ieee80211_cts *cts)
@@ -1823,13 +1799,15 @@ void ieee80211_ctstoself_get(struct ieee80211_hw *hw, int if_id,
1823 1799
1824 fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTS; 1800 fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTS;
1825 cts->frame_control = cpu_to_le16(fctl); 1801 cts->frame_control = cpu_to_le16(fctl);
1826 cts->duration = ieee80211_ctstoself_duration(hw, if_id, frame_len, frame_txctl); 1802 cts->duration = ieee80211_ctstoself_duration(hw, vif,
1803 frame_len, frame_txctl);
1827 memcpy(cts->ra, hdr->addr1, sizeof(cts->ra)); 1804 memcpy(cts->ra, hdr->addr1, sizeof(cts->ra));
1828} 1805}
1829EXPORT_SYMBOL(ieee80211_ctstoself_get); 1806EXPORT_SYMBOL(ieee80211_ctstoself_get);
1830 1807
1831struct sk_buff * 1808struct sk_buff *
1832ieee80211_get_buffered_bc(struct ieee80211_hw *hw, int if_id, 1809ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
1810 struct ieee80211_vif *vif,
1833 struct ieee80211_tx_control *control) 1811 struct ieee80211_tx_control *control)
1834{ 1812{
1835 struct ieee80211_local *local = hw_to_local(hw); 1813 struct ieee80211_local *local = hw_to_local(hw);
@@ -1841,16 +1819,25 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, int if_id,
1841 struct net_device *bdev; 1819 struct net_device *bdev;
1842 struct ieee80211_sub_if_data *sdata; 1820 struct ieee80211_sub_if_data *sdata;
1843 struct ieee80211_if_ap *bss = NULL; 1821 struct ieee80211_if_ap *bss = NULL;
1822 struct beacon_data *beacon;
1844 1823
1845 bdev = dev_get_by_index(&init_net, if_id); 1824 sdata = vif_to_sdata(vif);
1846 if (bdev) { 1825 bdev = sdata->dev;
1847 sdata = IEEE80211_DEV_TO_SUB_IF(bdev); 1826
1848 bss = &sdata->u.ap; 1827
1849 dev_put(bdev); 1828 if (!bss)
1850 }
1851 if (!bss || sdata->type != IEEE80211_IF_TYPE_AP || !bss->beacon_head)
1852 return NULL; 1829 return NULL;
1853 1830
1831 rcu_read_lock();
1832 beacon = rcu_dereference(bss->beacon);
1833
1834 if (sdata->vif.type != IEEE80211_IF_TYPE_AP || !beacon ||
1835 !beacon->head) {
1836 rcu_read_unlock();
1837 return NULL;
1838 }
1839 rcu_read_unlock();
1840
1854 if (bss->dtim_count != 0) 1841 if (bss->dtim_count != 0)
1855 return NULL; /* send buffered bc/mc only after DTIM beacon */ 1842 return NULL; /* send buffered bc/mc only after DTIM beacon */
1856 memset(control, 0, sizeof(*control)); 1843 memset(control, 0, sizeof(*control));
@@ -1883,7 +1870,6 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, int if_id,
1883 if (res == TXRX_DROP || res == TXRX_QUEUED) 1870 if (res == TXRX_DROP || res == TXRX_QUEUED)
1884 break; 1871 break;
1885 } 1872 }
1886 dev_put(tx.dev);
1887 skb = tx.skb; /* handlers are allowed to change skb */ 1873 skb = tx.skb; /* handlers are allowed to change skb */
1888 1874
1889 if (res == TXRX_DROP) { 1875 if (res == TXRX_DROP) {
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 5a0564e1dbd6..5e631ce98d7e 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -22,6 +22,7 @@
22#include <linux/bitmap.h> 22#include <linux/bitmap.h>
23#include <net/net_namespace.h> 23#include <net/net_namespace.h>
24#include <net/cfg80211.h> 24#include <net/cfg80211.h>
25#include <net/rtnetlink.h>
25 26
26#include "ieee80211_i.h" 27#include "ieee80211_i.h"
27#include "ieee80211_rate.h" 28#include "ieee80211_rate.h"
@@ -39,10 +40,6 @@ const unsigned char rfc1042_header[] =
39const unsigned char bridge_tunnel_header[] = 40const unsigned char bridge_tunnel_header[] =
40 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; 41 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 };
41 42
42/* No encapsulation header if EtherType < 0x600 (=length) */
43static const unsigned char eapol_header[] =
44 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00, 0x88, 0x8e };
45
46 43
47static int rate_list_match(const int *rate_list, int rate) 44static int rate_list_match(const int *rate_list, int rate)
48{ 45{
@@ -130,17 +127,21 @@ void ieee80211_prepare_rates(struct ieee80211_local *local,
130 } 127 }
131} 128}
132 129
133u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len) 130u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
131 enum ieee80211_if_types type)
134{ 132{
135 u16 fc; 133 u16 fc;
136 134
137 if (len < 24) 135 /* drop ACK/CTS frames and incorrect hdr len (ctrl) */
136 if (len < 16)
138 return NULL; 137 return NULL;
139 138
140 fc = le16_to_cpu(hdr->frame_control); 139 fc = le16_to_cpu(hdr->frame_control);
141 140
142 switch (fc & IEEE80211_FCTL_FTYPE) { 141 switch (fc & IEEE80211_FCTL_FTYPE) {
143 case IEEE80211_FTYPE_DATA: 142 case IEEE80211_FTYPE_DATA:
143 if (len < 24) /* drop incorrect hdr len (data) */
144 return NULL;
144 switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { 145 switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
145 case IEEE80211_FCTL_TODS: 146 case IEEE80211_FCTL_TODS:
146 return hdr->addr1; 147 return hdr->addr1;
@@ -153,10 +154,24 @@ u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len)
153 } 154 }
154 break; 155 break;
155 case IEEE80211_FTYPE_MGMT: 156 case IEEE80211_FTYPE_MGMT:
157 if (len < 24) /* drop incorrect hdr len (mgmt) */
158 return NULL;
156 return hdr->addr3; 159 return hdr->addr3;
157 case IEEE80211_FTYPE_CTL: 160 case IEEE80211_FTYPE_CTL:
158 if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL) 161 if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)
159 return hdr->addr1; 162 return hdr->addr1;
163 else if ((fc & IEEE80211_FCTL_STYPE) ==
164 IEEE80211_STYPE_BACK_REQ) {
165 switch (type) {
166 case IEEE80211_IF_TYPE_STA:
167 return hdr->addr2;
168 case IEEE80211_IF_TYPE_AP:
169 case IEEE80211_IF_TYPE_VLAN:
170 return hdr->addr1;
171 default:
172 return NULL;
173 }
174 }
160 else 175 else
161 return NULL; 176 return NULL;
162 } 177 }
@@ -217,31 +232,6 @@ int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb)
217} 232}
218EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb); 233EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb);
219 234
220int ieee80211_is_eapol(const struct sk_buff *skb)
221{
222 const struct ieee80211_hdr *hdr;
223 u16 fc;
224 int hdrlen;
225
226 if (unlikely(skb->len < 10))
227 return 0;
228
229 hdr = (const struct ieee80211_hdr *) skb->data;
230 fc = le16_to_cpu(hdr->frame_control);
231
232 if (unlikely(!WLAN_FC_DATA_PRESENT(fc)))
233 return 0;
234
235 hdrlen = ieee80211_get_hdrlen(fc);
236
237 if (unlikely(skb->len >= hdrlen + sizeof(eapol_header) &&
238 memcmp(skb->data + hdrlen, eapol_header,
239 sizeof(eapol_header)) == 0))
240 return 1;
241
242 return 0;
243}
244
245void ieee80211_tx_set_iswep(struct ieee80211_txrx_data *tx) 235void ieee80211_tx_set_iswep(struct ieee80211_txrx_data *tx)
246{ 236{
247 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; 237 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data;
@@ -312,45 +302,35 @@ int ieee80211_frame_duration(struct ieee80211_local *local, size_t len,
312} 302}
313 303
314/* Exported duration function for driver use */ 304/* Exported duration function for driver use */
315__le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, int if_id, 305__le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw,
306 struct ieee80211_vif *vif,
316 size_t frame_len, int rate) 307 size_t frame_len, int rate)
317{ 308{
318 struct ieee80211_local *local = hw_to_local(hw); 309 struct ieee80211_local *local = hw_to_local(hw);
319 struct net_device *bdev = dev_get_by_index(&init_net, if_id); 310 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
320 struct ieee80211_sub_if_data *sdata;
321 u16 dur; 311 u16 dur;
322 int erp; 312 int erp;
323 313
324 if (unlikely(!bdev))
325 return 0;
326
327 sdata = IEEE80211_DEV_TO_SUB_IF(bdev);
328 erp = ieee80211_is_erp_rate(hw->conf.phymode, rate); 314 erp = ieee80211_is_erp_rate(hw->conf.phymode, rate);
329 dur = ieee80211_frame_duration(local, frame_len, rate, 315 dur = ieee80211_frame_duration(local, frame_len, rate, erp,
330 erp, sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE); 316 sdata->bss_conf.use_short_preamble);
331 317
332 dev_put(bdev);
333 return cpu_to_le16(dur); 318 return cpu_to_le16(dur);
334} 319}
335EXPORT_SYMBOL(ieee80211_generic_frame_duration); 320EXPORT_SYMBOL(ieee80211_generic_frame_duration);
336 321
337__le16 ieee80211_rts_duration(struct ieee80211_hw *hw, int if_id, 322__le16 ieee80211_rts_duration(struct ieee80211_hw *hw,
338 size_t frame_len, 323 struct ieee80211_vif *vif, size_t frame_len,
339 const struct ieee80211_tx_control *frame_txctl) 324 const struct ieee80211_tx_control *frame_txctl)
340{ 325{
341 struct ieee80211_local *local = hw_to_local(hw); 326 struct ieee80211_local *local = hw_to_local(hw);
342 struct ieee80211_rate *rate; 327 struct ieee80211_rate *rate;
343 struct net_device *bdev = dev_get_by_index(&init_net, if_id); 328 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
344 struct ieee80211_sub_if_data *sdata; 329 bool short_preamble;
345 int short_preamble;
346 int erp; 330 int erp;
347 u16 dur; 331 u16 dur;
348 332
349 if (unlikely(!bdev)) 333 short_preamble = sdata->bss_conf.use_short_preamble;
350 return 0;
351
352 sdata = IEEE80211_DEV_TO_SUB_IF(bdev);
353 short_preamble = sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE;
354 334
355 rate = frame_txctl->rts_rate; 335 rate = frame_txctl->rts_rate;
356 erp = !!(rate->flags & IEEE80211_RATE_ERP); 336 erp = !!(rate->flags & IEEE80211_RATE_ERP);
@@ -365,28 +345,23 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw, int if_id,
365 dur += ieee80211_frame_duration(local, 10, rate->rate, 345 dur += ieee80211_frame_duration(local, 10, rate->rate,
366 erp, short_preamble); 346 erp, short_preamble);
367 347
368 dev_put(bdev);
369 return cpu_to_le16(dur); 348 return cpu_to_le16(dur);
370} 349}
371EXPORT_SYMBOL(ieee80211_rts_duration); 350EXPORT_SYMBOL(ieee80211_rts_duration);
372 351
373__le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, int if_id, 352__le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
353 struct ieee80211_vif *vif,
374 size_t frame_len, 354 size_t frame_len,
375 const struct ieee80211_tx_control *frame_txctl) 355 const struct ieee80211_tx_control *frame_txctl)
376{ 356{
377 struct ieee80211_local *local = hw_to_local(hw); 357 struct ieee80211_local *local = hw_to_local(hw);
378 struct ieee80211_rate *rate; 358 struct ieee80211_rate *rate;
379 struct net_device *bdev = dev_get_by_index(&init_net, if_id); 359 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
380 struct ieee80211_sub_if_data *sdata; 360 bool short_preamble;
381 int short_preamble;
382 int erp; 361 int erp;
383 u16 dur; 362 u16 dur;
384 363
385 if (unlikely(!bdev)) 364 short_preamble = sdata->bss_conf.use_short_preamble;
386 return 0;
387
388 sdata = IEEE80211_DEV_TO_SUB_IF(bdev);
389 short_preamble = sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE;
390 365
391 rate = frame_txctl->rts_rate; 366 rate = frame_txctl->rts_rate;
392 erp = !!(rate->flags & IEEE80211_RATE_ERP); 367 erp = !!(rate->flags & IEEE80211_RATE_ERP);
@@ -400,7 +375,6 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, int if_id,
400 erp, short_preamble); 375 erp, short_preamble);
401 } 376 }
402 377
403 dev_put(bdev);
404 return cpu_to_le16(dur); 378 return cpu_to_le16(dur);
405} 379}
406EXPORT_SYMBOL(ieee80211_ctstoself_duration); 380EXPORT_SYMBOL(ieee80211_ctstoself_duration);
@@ -484,3 +458,37 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw)
484 ieee80211_wake_queue(hw, i); 458 ieee80211_wake_queue(hw, i);
485} 459}
486EXPORT_SYMBOL(ieee80211_wake_queues); 460EXPORT_SYMBOL(ieee80211_wake_queues);
461
462void ieee80211_iterate_active_interfaces(
463 struct ieee80211_hw *hw,
464 void (*iterator)(void *data, u8 *mac,
465 struct ieee80211_vif *vif),
466 void *data)
467{
468 struct ieee80211_local *local = hw_to_local(hw);
469 struct ieee80211_sub_if_data *sdata;
470
471 rcu_read_lock();
472
473 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
474 switch (sdata->vif.type) {
475 case IEEE80211_IF_TYPE_INVALID:
476 case IEEE80211_IF_TYPE_MNTR:
477 case IEEE80211_IF_TYPE_VLAN:
478 continue;
479 case IEEE80211_IF_TYPE_AP:
480 case IEEE80211_IF_TYPE_STA:
481 case IEEE80211_IF_TYPE_IBSS:
482 case IEEE80211_IF_TYPE_WDS:
483 break;
484 }
485 if (sdata->dev == local->mdev)
486 continue;
487 if (netif_running(sdata->dev))
488 iterator(data, sdata->dev->dev_addr,
489 &sdata->vif);
490 }
491
492 rcu_read_unlock();
493}
494EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces);
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index b5f3413403bd..a0cff72a580b 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -349,16 +349,6 @@ static int wep_encrypt_skb(struct ieee80211_txrx_data *tx, struct sk_buff *skb)
349ieee80211_txrx_result 349ieee80211_txrx_result
350ieee80211_crypto_wep_encrypt(struct ieee80211_txrx_data *tx) 350ieee80211_crypto_wep_encrypt(struct ieee80211_txrx_data *tx)
351{ 351{
352 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data;
353 u16 fc;
354
355 fc = le16_to_cpu(hdr->frame_control);
356
357 if (((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA &&
358 ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT ||
359 (fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH)))
360 return TXRX_CONTINUE;
361
362 tx->u.tx.control->iv_len = WEP_IV_LEN; 352 tx->u.tx.control->iv_len = WEP_IV_LEN;
363 tx->u.tx.control->icv_len = WEP_ICV_LEN; 353 tx->u.tx.control->icv_len = WEP_ICV_LEN;
364 ieee80211_tx_set_iswep(tx); 354 ieee80211_tx_set_iswep(tx);
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 5b8a157975a3..4e236599dd31 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -28,6 +28,7 @@ struct ieee80211_sched_data
28 struct sk_buff_head requeued[TC_80211_MAX_QUEUES]; 28 struct sk_buff_head requeued[TC_80211_MAX_QUEUES];
29}; 29};
30 30
31static const char llc_ip_hdr[8] = {0xAA, 0xAA, 0x3, 0, 0, 0, 0x08, 0};
31 32
32/* given a data frame determine the 802.1p/1d tag to use */ 33/* given a data frame determine the 802.1p/1d tag to use */
33static inline unsigned classify_1d(struct sk_buff *skb, struct Qdisc *qd) 34static inline unsigned classify_1d(struct sk_buff *skb, struct Qdisc *qd)
@@ -54,12 +55,12 @@ static inline unsigned classify_1d(struct sk_buff *skb, struct Qdisc *qd)
54 return skb->priority - 256; 55 return skb->priority - 256;
55 56
56 /* check there is a valid IP header present */ 57 /* check there is a valid IP header present */
57 offset = ieee80211_get_hdrlen_from_skb(skb) + 8 /* LLC + proto */; 58 offset = ieee80211_get_hdrlen_from_skb(skb);
58 if (skb->protocol != __constant_htons(ETH_P_IP) || 59 if (skb->len < offset + sizeof(llc_ip_hdr) + sizeof(*ip) ||
59 skb->len < offset + sizeof(*ip)) 60 memcmp(skb->data + offset, llc_ip_hdr, sizeof(llc_ip_hdr)))
60 return 0; 61 return 0;
61 62
62 ip = (struct iphdr *) (skb->data + offset); 63 ip = (struct iphdr *) (skb->data + offset + sizeof(llc_ip_hdr));
63 64
64 dscp = ip->tos & 0xfc; 65 dscp = ip->tos & 0xfc;
65 if (dscp & 0x1c) 66 if (dscp & 0x1c)
@@ -296,16 +297,16 @@ static void wme_qdiscop_destroy(struct Qdisc* qd)
296 297
297 298
298/* called whenever parameters are updated on existing qdisc */ 299/* called whenever parameters are updated on existing qdisc */
299static int wme_qdiscop_tune(struct Qdisc *qd, struct rtattr *opt) 300static int wme_qdiscop_tune(struct Qdisc *qd, struct nlattr *opt)
300{ 301{
301/* struct ieee80211_sched_data *q = qdisc_priv(qd); 302/* struct ieee80211_sched_data *q = qdisc_priv(qd);
302*/ 303*/
303 /* check our options block is the right size */ 304 /* check our options block is the right size */
304 /* copy any options to our local structure */ 305 /* copy any options to our local structure */
305/* Ignore options block for now - always use static mapping 306/* Ignore options block for now - always use static mapping
306 struct tc_ieee80211_qopt *qopt = RTA_DATA(opt); 307 struct tc_ieee80211_qopt *qopt = nla_data(opt);
307 308
308 if (opt->rta_len < RTA_LENGTH(sizeof(*qopt))) 309 if (opt->nla_len < nla_attr_size(sizeof(*qopt)))
309 return -EINVAL; 310 return -EINVAL;
310 memcpy(q->tag2queue, qopt->tag2queue, sizeof(qopt->tag2queue)); 311 memcpy(q->tag2queue, qopt->tag2queue, sizeof(qopt->tag2queue));
311*/ 312*/
@@ -314,7 +315,7 @@ static int wme_qdiscop_tune(struct Qdisc *qd, struct rtattr *opt)
314 315
315 316
316/* called during initial creation of qdisc on device */ 317/* called during initial creation of qdisc on device */
317static int wme_qdiscop_init(struct Qdisc *qd, struct rtattr *opt) 318static int wme_qdiscop_init(struct Qdisc *qd, struct nlattr *opt)
318{ 319{
319 struct ieee80211_sched_data *q = qdisc_priv(qd); 320 struct ieee80211_sched_data *q = qdisc_priv(qd);
320 struct net_device *dev = qd->dev; 321 struct net_device *dev = qd->dev;
@@ -369,10 +370,10 @@ static int wme_qdiscop_dump(struct Qdisc *qd, struct sk_buff *skb)
369 struct tc_ieee80211_qopt opt; 370 struct tc_ieee80211_qopt opt;
370 371
371 memcpy(&opt.tag2queue, q->tag2queue, TC_80211_MAX_TAG + 1); 372 memcpy(&opt.tag2queue, q->tag2queue, TC_80211_MAX_TAG + 1);
372 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 373 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
373*/ return skb->len; 374*/ return skb->len;
374/* 375/*
375rtattr_failure: 376nla_put_failure:
376 skb_trim(skb, p - skb->data);*/ 377 skb_trim(skb, p - skb->data);*/
377 return -1; 378 return -1;
378} 379}
@@ -443,7 +444,7 @@ static void wme_classop_put(struct Qdisc *q, unsigned long cl)
443 444
444 445
445static int wme_classop_change(struct Qdisc *qd, u32 handle, u32 parent, 446static int wme_classop_change(struct Qdisc *qd, u32 handle, u32 parent,
446 struct rtattr **tca, unsigned long *arg) 447 struct nlattr **tca, unsigned long *arg)
447{ 448{
448 unsigned long cl = *arg; 449 unsigned long cl = *arg;
449 struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr); 450 struct ieee80211_local *local = wdev_priv(qd->dev->ieee80211_ptr);
@@ -527,7 +528,7 @@ static struct tcf_proto ** wme_classop_find_tcf(struct Qdisc *qd,
527 528
528/* this qdisc is classful (i.e. has classes, some of which may have leaf qdiscs attached) 529/* this qdisc is classful (i.e. has classes, some of which may have leaf qdiscs attached)
529 * - these are the operations on the classes */ 530 * - these are the operations on the classes */
530static struct Qdisc_class_ops class_ops = 531static const struct Qdisc_class_ops class_ops =
531{ 532{
532 .graft = wme_classop_graft, 533 .graft = wme_classop_graft,
533 .leaf = wme_classop_leaf, 534 .leaf = wme_classop_leaf,
@@ -547,7 +548,7 @@ static struct Qdisc_class_ops class_ops =
547 548
548 549
549/* queueing discipline operations */ 550/* queueing discipline operations */
550static struct Qdisc_ops wme_qdisc_ops = 551static struct Qdisc_ops wme_qdisc_ops __read_mostly =
551{ 552{
552 .next = NULL, 553 .next = NULL,
553 .cl_ops = &class_ops, 554 .cl_ops = &class_ops,
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 20cec1cb956f..6f04311cf0a0 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -245,16 +245,9 @@ static int tkip_encrypt_skb(struct ieee80211_txrx_data *tx,
245ieee80211_txrx_result 245ieee80211_txrx_result
246ieee80211_crypto_tkip_encrypt(struct ieee80211_txrx_data *tx) 246ieee80211_crypto_tkip_encrypt(struct ieee80211_txrx_data *tx)
247{ 247{
248 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data;
249 u16 fc;
250 struct sk_buff *skb = tx->skb; 248 struct sk_buff *skb = tx->skb;
251 int wpa_test = 0, test = 0; 249 int wpa_test = 0, test = 0;
252 250
253 fc = le16_to_cpu(hdr->frame_control);
254
255 if (!WLAN_FC_DATA_PRESENT(fc))
256 return TXRX_CONTINUE;
257
258 tx->u.tx.control->icv_len = TKIP_ICV_LEN; 251 tx->u.tx.control->icv_len = TKIP_ICV_LEN;
259 tx->u.tx.control->iv_len = TKIP_IV_LEN; 252 tx->u.tx.control->iv_len = TKIP_IV_LEN;
260 ieee80211_tx_set_iswep(tx); 253 ieee80211_tx_set_iswep(tx);
@@ -501,16 +494,9 @@ static int ccmp_encrypt_skb(struct ieee80211_txrx_data *tx,
501ieee80211_txrx_result 494ieee80211_txrx_result
502ieee80211_crypto_ccmp_encrypt(struct ieee80211_txrx_data *tx) 495ieee80211_crypto_ccmp_encrypt(struct ieee80211_txrx_data *tx)
503{ 496{
504 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data;
505 u16 fc;
506 struct sk_buff *skb = tx->skb; 497 struct sk_buff *skb = tx->skb;
507 int test = 0; 498 int test = 0;
508 499
509 fc = le16_to_cpu(hdr->frame_control);
510
511 if (!WLAN_FC_DATA_PRESENT(fc))
512 return TXRX_CONTINUE;
513
514 tx->u.tx.control->icv_len = CCMP_MIC_LEN; 500 tx->u.tx.control->icv_len = CCMP_MIC_LEN;
515 tx->u.tx.control->iv_len = CCMP_HDR_LEN; 501 tx->u.tx.control->iv_len = CCMP_HDR_LEN;
516 ieee80211_tx_set_iswep(tx); 502 ieee80211_tx_set_iswep(tx);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 21a9fcc03796..daf5b881064d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -2,21 +2,20 @@ menu "Core Netfilter Configuration"
2 depends on NET && INET && NETFILTER 2 depends on NET && INET && NETFILTER
3 3
4config NETFILTER_NETLINK 4config NETFILTER_NETLINK
5 tristate "Netfilter netlink interface" 5 tristate
6 help
7 If this option is enabled, the kernel will include support
8 for the new netfilter netlink interface.
9 6
10config NETFILTER_NETLINK_QUEUE 7config NETFILTER_NETLINK_QUEUE
11 tristate "Netfilter NFQUEUE over NFNETLINK interface" 8 tristate "Netfilter NFQUEUE over NFNETLINK interface"
12 depends on NETFILTER_NETLINK 9 depends on NETFILTER_ADVANCED
10 select NETFILTER_NETLINK
13 help 11 help
14 If this option is enabled, the kernel will include support 12 If this option is enabled, the kernel will include support
15 for queueing packets via NFNETLINK. 13 for queueing packets via NFNETLINK.
16 14
17config NETFILTER_NETLINK_LOG 15config NETFILTER_NETLINK_LOG
18 tristate "Netfilter LOG over NFNETLINK interface" 16 tristate "Netfilter LOG over NFNETLINK interface"
19 depends on NETFILTER_NETLINK 17 default m if NETFILTER_ADVANCED=n
18 select NETFILTER_NETLINK
20 help 19 help
21 If this option is enabled, the kernel will include support 20 If this option is enabled, the kernel will include support
22 for logging packets via NFNETLINK. 21 for logging packets via NFNETLINK.
@@ -25,9 +24,9 @@ config NETFILTER_NETLINK_LOG
25 and is also scheduled to replace the old syslog-based ipt_LOG 24 and is also scheduled to replace the old syslog-based ipt_LOG
26 and ip6t_LOG modules. 25 and ip6t_LOG modules.
27 26
28# Rename this to NF_CONNTRACK in a 2.6.25 27config NF_CONNTRACK
29config NF_CONNTRACK_ENABLED
30 tristate "Netfilter connection tracking support" 28 tristate "Netfilter connection tracking support"
29 default m if NETFILTER_ADVANCED=n
31 help 30 help
32 Connection tracking keeps a record of what packets have passed 31 Connection tracking keeps a record of what packets have passed
33 through your machine, in order to figure out how they are related 32 through your machine, in order to figure out how they are related
@@ -40,12 +39,9 @@ config NF_CONNTRACK_ENABLED
40 39
41 To compile it as a module, choose M here. If unsure, say N. 40 To compile it as a module, choose M here. If unsure, say N.
42 41
43config NF_CONNTRACK
44 tristate
45 default NF_CONNTRACK_ENABLED
46
47config NF_CT_ACCT 42config NF_CT_ACCT
48 bool "Connection tracking flow accounting" 43 bool "Connection tracking flow accounting"
44 depends on NETFILTER_ADVANCED
49 depends on NF_CONNTRACK 45 depends on NF_CONNTRACK
50 help 46 help
51 If this option is enabled, the connection tracking code will 47 If this option is enabled, the connection tracking code will
@@ -58,6 +54,7 @@ config NF_CT_ACCT
58 54
59config NF_CONNTRACK_MARK 55config NF_CONNTRACK_MARK
60 bool 'Connection mark tracking support' 56 bool 'Connection mark tracking support'
57 depends on NETFILTER_ADVANCED
61 depends on NF_CONNTRACK 58 depends on NF_CONNTRACK
62 help 59 help
63 This option enables support for connection marks, used by the 60 This option enables support for connection marks, used by the
@@ -68,6 +65,7 @@ config NF_CONNTRACK_MARK
68config NF_CONNTRACK_SECMARK 65config NF_CONNTRACK_SECMARK
69 bool 'Connection tracking security mark support' 66 bool 'Connection tracking security mark support'
70 depends on NF_CONNTRACK && NETWORK_SECMARK 67 depends on NF_CONNTRACK && NETWORK_SECMARK
68 default m if NETFILTER_ADVANCED=n
71 help 69 help
72 This option enables security markings to be applied to 70 This option enables security markings to be applied to
73 connections. Typically they are copied to connections from 71 connections. Typically they are copied to connections from
@@ -78,8 +76,9 @@ config NF_CONNTRACK_SECMARK
78 If unsure, say 'N'. 76 If unsure, say 'N'.
79 77
80config NF_CONNTRACK_EVENTS 78config NF_CONNTRACK_EVENTS
81 bool "Connection tracking events (EXPERIMENTAL)" 79 bool "Connection tracking events"
82 depends on EXPERIMENTAL && NF_CONNTRACK 80 depends on NF_CONNTRACK
81 depends on NETFILTER_ADVANCED
83 help 82 help
84 If this option is enabled, the connection tracking code will 83 If this option is enabled, the connection tracking code will
85 provide a notifier chain that can be used by other kernel code 84 provide a notifier chain that can be used by other kernel code
@@ -94,7 +93,7 @@ config NF_CT_PROTO_GRE
94config NF_CT_PROTO_SCTP 93config NF_CT_PROTO_SCTP
95 tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' 94 tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
96 depends on EXPERIMENTAL && NF_CONNTRACK 95 depends on EXPERIMENTAL && NF_CONNTRACK
97 default n 96 depends on NETFILTER_ADVANCED
98 help 97 help
99 With this option enabled, the layer 3 independent connection 98 With this option enabled, the layer 3 independent connection
100 tracking code will be able to do state tracking on SCTP connections. 99 tracking code will be able to do state tracking on SCTP connections.
@@ -103,8 +102,9 @@ config NF_CT_PROTO_SCTP
103 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. 102 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
104 103
105config NF_CT_PROTO_UDPLITE 104config NF_CT_PROTO_UDPLITE
106 tristate 'UDP-Lite protocol connection tracking support (EXPERIMENTAL)' 105 tristate 'UDP-Lite protocol connection tracking support'
107 depends on EXPERIMENTAL && NF_CONNTRACK 106 depends on NF_CONNTRACK
107 depends on NETFILTER_ADVANCED
108 help 108 help
109 With this option enabled, the layer 3 independent connection 109 With this option enabled, the layer 3 independent connection
110 tracking code will be able to do state tracking on UDP-Lite 110 tracking code will be able to do state tracking on UDP-Lite
@@ -115,6 +115,7 @@ config NF_CT_PROTO_UDPLITE
115config NF_CONNTRACK_AMANDA 115config NF_CONNTRACK_AMANDA
116 tristate "Amanda backup protocol support" 116 tristate "Amanda backup protocol support"
117 depends on NF_CONNTRACK 117 depends on NF_CONNTRACK
118 depends on NETFILTER_ADVANCED
118 select TEXTSEARCH 119 select TEXTSEARCH
119 select TEXTSEARCH_KMP 120 select TEXTSEARCH_KMP
120 help 121 help
@@ -130,6 +131,7 @@ config NF_CONNTRACK_AMANDA
130config NF_CONNTRACK_FTP 131config NF_CONNTRACK_FTP
131 tristate "FTP protocol support" 132 tristate "FTP protocol support"
132 depends on NF_CONNTRACK 133 depends on NF_CONNTRACK
134 default m if NETFILTER_ADVANCED=n
133 help 135 help
134 Tracking FTP connections is problematic: special helpers are 136 Tracking FTP connections is problematic: special helpers are
135 required for tracking them, and doing masquerading and other forms 137 required for tracking them, and doing masquerading and other forms
@@ -142,8 +144,9 @@ config NF_CONNTRACK_FTP
142 To compile it as a module, choose M here. If unsure, say N. 144 To compile it as a module, choose M here. If unsure, say N.
143 145
144config NF_CONNTRACK_H323 146config NF_CONNTRACK_H323
145 tristate "H.323 protocol support (EXPERIMENTAL)" 147 tristate "H.323 protocol support"
146 depends on EXPERIMENTAL && NF_CONNTRACK && (IPV6 || IPV6=n) 148 depends on NF_CONNTRACK && (IPV6 || IPV6=n)
149 depends on NETFILTER_ADVANCED
147 help 150 help
148 H.323 is a VoIP signalling protocol from ITU-T. As one of the most 151 H.323 is a VoIP signalling protocol from ITU-T. As one of the most
149 important VoIP protocols, it is widely used by voice hardware and 152 important VoIP protocols, it is widely used by voice hardware and
@@ -163,6 +166,7 @@ config NF_CONNTRACK_H323
163config NF_CONNTRACK_IRC 166config NF_CONNTRACK_IRC
164 tristate "IRC protocol support" 167 tristate "IRC protocol support"
165 depends on NF_CONNTRACK 168 depends on NF_CONNTRACK
169 default m if NETFILTER_ADVANCED=n
166 help 170 help
167 There is a commonly-used extension to IRC called 171 There is a commonly-used extension to IRC called
168 Direct Client-to-Client Protocol (DCC). This enables users to send 172 Direct Client-to-Client Protocol (DCC). This enables users to send
@@ -176,8 +180,9 @@ config NF_CONNTRACK_IRC
176 To compile it as a module, choose M here. If unsure, say N. 180 To compile it as a module, choose M here. If unsure, say N.
177 181
178config NF_CONNTRACK_NETBIOS_NS 182config NF_CONNTRACK_NETBIOS_NS
179 tristate "NetBIOS name service protocol support (EXPERIMENTAL)" 183 tristate "NetBIOS name service protocol support"
180 depends on EXPERIMENTAL && NF_CONNTRACK 184 depends on NF_CONNTRACK
185 depends on NETFILTER_ADVANCED
181 help 186 help
182 NetBIOS name service requests are sent as broadcast messages from an 187 NetBIOS name service requests are sent as broadcast messages from an
183 unprivileged port and responded to with unicast messages to the 188 unprivileged port and responded to with unicast messages to the
@@ -197,6 +202,7 @@ config NF_CONNTRACK_NETBIOS_NS
197config NF_CONNTRACK_PPTP 202config NF_CONNTRACK_PPTP
198 tristate "PPtP protocol support" 203 tristate "PPtP protocol support"
199 depends on NF_CONNTRACK 204 depends on NF_CONNTRACK
205 depends on NETFILTER_ADVANCED
200 select NF_CT_PROTO_GRE 206 select NF_CT_PROTO_GRE
201 help 207 help
202 This module adds support for PPTP (Point to Point Tunnelling 208 This module adds support for PPTP (Point to Point Tunnelling
@@ -216,6 +222,7 @@ config NF_CONNTRACK_PPTP
216config NF_CONNTRACK_SANE 222config NF_CONNTRACK_SANE
217 tristate "SANE protocol support (EXPERIMENTAL)" 223 tristate "SANE protocol support (EXPERIMENTAL)"
218 depends on EXPERIMENTAL && NF_CONNTRACK 224 depends on EXPERIMENTAL && NF_CONNTRACK
225 depends on NETFILTER_ADVANCED
219 help 226 help
220 SANE is a protocol for remote access to scanners as implemented 227 SANE is a protocol for remote access to scanners as implemented
221 by the 'saned' daemon. Like FTP, it uses separate control and 228 by the 'saned' daemon. Like FTP, it uses separate control and
@@ -227,8 +234,9 @@ config NF_CONNTRACK_SANE
227 To compile it as a module, choose M here. If unsure, say N. 234 To compile it as a module, choose M here. If unsure, say N.
228 235
229config NF_CONNTRACK_SIP 236config NF_CONNTRACK_SIP
230 tristate "SIP protocol support (EXPERIMENTAL)" 237 tristate "SIP protocol support"
231 depends on EXPERIMENTAL && NF_CONNTRACK 238 depends on NF_CONNTRACK
239 default m if NETFILTER_ADVANCED=n
232 help 240 help
233 SIP is an application-layer control protocol that can establish, 241 SIP is an application-layer control protocol that can establish,
234 modify, and terminate multimedia sessions (conferences) such as 242 modify, and terminate multimedia sessions (conferences) such as
@@ -241,6 +249,7 @@ config NF_CONNTRACK_SIP
241config NF_CONNTRACK_TFTP 249config NF_CONNTRACK_TFTP
242 tristate "TFTP protocol support" 250 tristate "TFTP protocol support"
243 depends on NF_CONNTRACK 251 depends on NF_CONNTRACK
252 depends on NETFILTER_ADVANCED
244 help 253 help
245 TFTP connection tracking helper, this is required depending 254 TFTP connection tracking helper, this is required depending
246 on how restrictive your ruleset is. 255 on how restrictive your ruleset is.
@@ -250,15 +259,17 @@ config NF_CONNTRACK_TFTP
250 To compile it as a module, choose M here. If unsure, say N. 259 To compile it as a module, choose M here. If unsure, say N.
251 260
252config NF_CT_NETLINK 261config NF_CT_NETLINK
253 tristate 'Connection tracking netlink interface (EXPERIMENTAL)' 262 tristate 'Connection tracking netlink interface'
254 depends on EXPERIMENTAL && NF_CONNTRACK && NETFILTER_NETLINK 263 depends on NF_CONNTRACK
255 depends on NF_CONNTRACK!=y || NETFILTER_NETLINK!=m 264 select NETFILTER_NETLINK
256 depends on NF_NAT=n || NF_NAT 265 depends on NF_NAT=n || NF_NAT
266 default m if NETFILTER_ADVANCED=n
257 help 267 help
258 This option enables support for a netlink-based userspace interface 268 This option enables support for a netlink-based userspace interface
259 269
260config NETFILTER_XTABLES 270config NETFILTER_XTABLES
261 tristate "Netfilter Xtables support (required for ip_tables)" 271 tristate "Netfilter Xtables support (required for ip_tables)"
272 default m if NETFILTER_ADVANCED=n
262 help 273 help
263 This is required if you intend to use any of ip_tables, 274 This is required if you intend to use any of ip_tables,
264 ip6_tables or arp_tables. 275 ip6_tables or arp_tables.
@@ -268,6 +279,7 @@ config NETFILTER_XTABLES
268config NETFILTER_XT_TARGET_CLASSIFY 279config NETFILTER_XT_TARGET_CLASSIFY
269 tristate '"CLASSIFY" target support' 280 tristate '"CLASSIFY" target support'
270 depends on NETFILTER_XTABLES 281 depends on NETFILTER_XTABLES
282 depends on NETFILTER_ADVANCED
271 help 283 help
272 This option adds a `CLASSIFY' target, which enables the user to set 284 This option adds a `CLASSIFY' target, which enables the user to set
273 the priority of a packet. Some qdiscs can use this value for 285 the priority of a packet. Some qdiscs can use this value for
@@ -282,31 +294,38 @@ config NETFILTER_XT_TARGET_CONNMARK
282 depends on NETFILTER_XTABLES 294 depends on NETFILTER_XTABLES
283 depends on IP_NF_MANGLE || IP6_NF_MANGLE 295 depends on IP_NF_MANGLE || IP6_NF_MANGLE
284 depends on NF_CONNTRACK 296 depends on NF_CONNTRACK
297 depends on NETFILTER_ADVANCED
285 select NF_CONNTRACK_MARK 298 select NF_CONNTRACK_MARK
286 help 299 help
287 This option adds a `CONNMARK' target, which allows one to manipulate 300 This option adds a `CONNMARK' target, which allows one to manipulate
288 the connection mark value. Similar to the MARK target, but 301 the connection mark value. Similar to the MARK target, but
289 affects the connection mark value rather than the packet mark value. 302 affects the connection mark value rather than the packet mark value.
290 303
291 If you want to compile it as a module, say M here and read 304 If you want to compile it as a module, say M here and read
292 <file:Documentation/kbuild/modules.txt>. The module will be called 305 <file:Documentation/kbuild/modules.txt>. The module will be called
293 ipt_CONNMARK.ko. If unsure, say `N'. 306 ipt_CONNMARK.ko. If unsure, say `N'.
294 307
295config NETFILTER_XT_TARGET_DSCP 308config NETFILTER_XT_TARGET_DSCP
296 tristate '"DSCP" target support' 309 tristate '"DSCP" and "TOS" target support'
297 depends on NETFILTER_XTABLES 310 depends on NETFILTER_XTABLES
298 depends on IP_NF_MANGLE || IP6_NF_MANGLE 311 depends on IP_NF_MANGLE || IP6_NF_MANGLE
312 depends on NETFILTER_ADVANCED
299 help 313 help
300 This option adds a `DSCP' target, which allows you to manipulate 314 This option adds a `DSCP' target, which allows you to manipulate
301 the IPv4/IPv6 header DSCP field (differentiated services codepoint). 315 the IPv4/IPv6 header DSCP field (differentiated services codepoint).
302 316
303 The DSCP field can have any value between 0x0 and 0x3f inclusive. 317 The DSCP field can have any value between 0x0 and 0x3f inclusive.
304 318
319 It also adds the "TOS" target, which allows you to create rules in
320 the "mangle" table which alter the Type Of Service field of an IPv4
321 or the Priority field of an IPv6 packet, prior to routing.
322
305 To compile it as a module, choose M here. If unsure, say N. 323 To compile it as a module, choose M here. If unsure, say N.
306 324
307config NETFILTER_XT_TARGET_MARK 325config NETFILTER_XT_TARGET_MARK
308 tristate '"MARK" target support' 326 tristate '"MARK" target support'
309 depends on NETFILTER_XTABLES 327 depends on NETFILTER_XTABLES
328 default m if NETFILTER_ADVANCED=n
310 help 329 help
311 This option adds a `MARK' target, which allows you to create rules 330 This option adds a `MARK' target, which allows you to create rules
312 in the `mangle' table which alter the netfilter mark (nfmark) field 331 in the `mangle' table which alter the netfilter mark (nfmark) field
@@ -320,6 +339,7 @@ config NETFILTER_XT_TARGET_MARK
320config NETFILTER_XT_TARGET_NFQUEUE 339config NETFILTER_XT_TARGET_NFQUEUE
321 tristate '"NFQUEUE" target Support' 340 tristate '"NFQUEUE" target Support'
322 depends on NETFILTER_XTABLES 341 depends on NETFILTER_XTABLES
342 depends on NETFILTER_ADVANCED
323 help 343 help
324 This target replaced the old obsolete QUEUE target. 344 This target replaced the old obsolete QUEUE target.
325 345
@@ -331,6 +351,7 @@ config NETFILTER_XT_TARGET_NFQUEUE
331config NETFILTER_XT_TARGET_NFLOG 351config NETFILTER_XT_TARGET_NFLOG
332 tristate '"NFLOG" target support' 352 tristate '"NFLOG" target support'
333 depends on NETFILTER_XTABLES 353 depends on NETFILTER_XTABLES
354 default m if NETFILTER_ADVANCED=n
334 help 355 help
335 This option enables the NFLOG target, which allows to LOG 356 This option enables the NFLOG target, which allows to LOG
336 messages through the netfilter logging API, which can use 357 messages through the netfilter logging API, which can use
@@ -344,19 +365,32 @@ config NETFILTER_XT_TARGET_NOTRACK
344 depends on NETFILTER_XTABLES 365 depends on NETFILTER_XTABLES
345 depends on IP_NF_RAW || IP6_NF_RAW 366 depends on IP_NF_RAW || IP6_NF_RAW
346 depends on NF_CONNTRACK 367 depends on NF_CONNTRACK
368 depends on NETFILTER_ADVANCED
347 help 369 help
348 The NOTRACK target allows a select rule to specify 370 The NOTRACK target allows a select rule to specify
349 which packets *not* to enter the conntrack/NAT 371 which packets *not* to enter the conntrack/NAT
350 subsystem with all the consequences (no ICMP error tracking, 372 subsystem with all the consequences (no ICMP error tracking,
351 no protocol helpers for the selected packets). 373 no protocol helpers for the selected packets).
352 374
353 If you want to compile it as a module, say M here and read 375 If you want to compile it as a module, say M here and read
354 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. 376 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
355 377
378config NETFILTER_XT_TARGET_RATEEST
379 tristate '"RATEEST" target support'
380 depends on NETFILTER_XTABLES
381 depends on NETFILTER_ADVANCED
382 help
383 This option adds a `RATEEST' target, which allows to measure
384 rates similar to TC estimators. The `rateest' match can be
385 used to match on the measured rates.
386
387 To compile it as a module, choose M here. If unsure, say N.
388
356config NETFILTER_XT_TARGET_TRACE 389config NETFILTER_XT_TARGET_TRACE
357 tristate '"TRACE" target support' 390 tristate '"TRACE" target support'
358 depends on NETFILTER_XTABLES 391 depends on NETFILTER_XTABLES
359 depends on IP_NF_RAW || IP6_NF_RAW 392 depends on IP_NF_RAW || IP6_NF_RAW
393 depends on NETFILTER_ADVANCED
360 help 394 help
361 The TRACE target allows you to mark packets so that the kernel 395 The TRACE target allows you to mark packets so that the kernel
362 will log every rule which match the packets as those traverse 396 will log every rule which match the packets as those traverse
@@ -368,6 +402,7 @@ config NETFILTER_XT_TARGET_TRACE
368config NETFILTER_XT_TARGET_SECMARK 402config NETFILTER_XT_TARGET_SECMARK
369 tristate '"SECMARK" target support' 403 tristate '"SECMARK" target support'
370 depends on NETFILTER_XTABLES && NETWORK_SECMARK 404 depends on NETFILTER_XTABLES && NETWORK_SECMARK
405 default m if NETFILTER_ADVANCED=n
371 help 406 help
372 The SECMARK target allows security marking of network 407 The SECMARK target allows security marking of network
373 packets, for use with security subsystems. 408 packets, for use with security subsystems.
@@ -377,6 +412,7 @@ config NETFILTER_XT_TARGET_SECMARK
377config NETFILTER_XT_TARGET_CONNSECMARK 412config NETFILTER_XT_TARGET_CONNSECMARK
378 tristate '"CONNSECMARK" target support' 413 tristate '"CONNSECMARK" target support'
379 depends on NETFILTER_XTABLES && NF_CONNTRACK && NF_CONNTRACK_SECMARK 414 depends on NETFILTER_XTABLES && NF_CONNTRACK && NF_CONNTRACK_SECMARK
415 default m if NETFILTER_ADVANCED=n
380 help 416 help
381 The CONNSECMARK target copies security markings from packets 417 The CONNSECMARK target copies security markings from packets
382 to connections, and restores security markings from connections 418 to connections, and restores security markings from connections
@@ -388,6 +424,7 @@ config NETFILTER_XT_TARGET_CONNSECMARK
388config NETFILTER_XT_TARGET_TCPMSS 424config NETFILTER_XT_TARGET_TCPMSS
389 tristate '"TCPMSS" target support' 425 tristate '"TCPMSS" target support'
390 depends on NETFILTER_XTABLES && (IPV6 || IPV6=n) 426 depends on NETFILTER_XTABLES && (IPV6 || IPV6=n)
427 default m if NETFILTER_ADVANCED=n
391 ---help--- 428 ---help---
392 This option adds a `TCPMSS' target, which allows you to alter the 429 This option adds a `TCPMSS' target, which allows you to alter the
393 MSS value of TCP SYN packets, to control the maximum size for that 430 MSS value of TCP SYN packets, to control the maximum size for that
@@ -411,9 +448,19 @@ config NETFILTER_XT_TARGET_TCPMSS
411 448
412 To compile it as a module, choose M here. If unsure, say N. 449 To compile it as a module, choose M here. If unsure, say N.
413 450
451config NETFILTER_XT_TARGET_TCPOPTSTRIP
452 tristate '"TCPOPTSTRIP" target support (EXPERIMENTAL)'
453 depends on EXPERIMENTAL && NETFILTER_XTABLES
454 depends on IP_NF_MANGLE || IP6_NF_MANGLE
455 depends on NETFILTER_ADVANCED
456 help
457 This option adds a "TCPOPTSTRIP" target, which allows you to strip
458 TCP options from TCP packets.
459
414config NETFILTER_XT_MATCH_COMMENT 460config NETFILTER_XT_MATCH_COMMENT
415 tristate '"comment" match support' 461 tristate '"comment" match support'
416 depends on NETFILTER_XTABLES 462 depends on NETFILTER_XTABLES
463 depends on NETFILTER_ADVANCED
417 help 464 help
418 This option adds a `comment' dummy-match, which allows you to put 465 This option adds a `comment' dummy-match, which allows you to put
419 comments in your iptables ruleset. 466 comments in your iptables ruleset.
@@ -425,6 +472,7 @@ config NETFILTER_XT_MATCH_CONNBYTES
425 tristate '"connbytes" per-connection counter match support' 472 tristate '"connbytes" per-connection counter match support'
426 depends on NETFILTER_XTABLES 473 depends on NETFILTER_XTABLES
427 depends on NF_CONNTRACK 474 depends on NF_CONNTRACK
475 depends on NETFILTER_ADVANCED
428 select NF_CT_ACCT 476 select NF_CT_ACCT
429 help 477 help
430 This option adds a `connbytes' match, which allows you to match the 478 This option adds a `connbytes' match, which allows you to match the
@@ -437,6 +485,7 @@ config NETFILTER_XT_MATCH_CONNLIMIT
437 tristate '"connlimit" match support"' 485 tristate '"connlimit" match support"'
438 depends on NETFILTER_XTABLES 486 depends on NETFILTER_XTABLES
439 depends on NF_CONNTRACK 487 depends on NF_CONNTRACK
488 depends on NETFILTER_ADVANCED
440 ---help--- 489 ---help---
441 This match allows you to match against the number of parallel 490 This match allows you to match against the number of parallel
442 connections to a server per client IP address (or address block). 491 connections to a server per client IP address (or address block).
@@ -445,11 +494,12 @@ config NETFILTER_XT_MATCH_CONNMARK
445 tristate '"connmark" connection mark match support' 494 tristate '"connmark" connection mark match support'
446 depends on NETFILTER_XTABLES 495 depends on NETFILTER_XTABLES
447 depends on NF_CONNTRACK 496 depends on NF_CONNTRACK
497 depends on NETFILTER_ADVANCED
448 select NF_CONNTRACK_MARK 498 select NF_CONNTRACK_MARK
449 help 499 help
450 This option adds a `connmark' match, which allows you to match the 500 This option adds a `connmark' match, which allows you to match the
451 connection mark value previously set for the session by `CONNMARK'. 501 connection mark value previously set for the session by `CONNMARK'.
452 502
453 If you want to compile it as a module, say M here and read 503 If you want to compile it as a module, say M here and read
454 <file:Documentation/kbuild/modules.txt>. The module will be called 504 <file:Documentation/kbuild/modules.txt>. The module will be called
455 ipt_connmark.ko. If unsure, say `N'. 505 ipt_connmark.ko. If unsure, say `N'.
@@ -458,6 +508,7 @@ config NETFILTER_XT_MATCH_CONNTRACK
458 tristate '"conntrack" connection tracking match support' 508 tristate '"conntrack" connection tracking match support'
459 depends on NETFILTER_XTABLES 509 depends on NETFILTER_XTABLES
460 depends on NF_CONNTRACK 510 depends on NF_CONNTRACK
511 default m if NETFILTER_ADVANCED=n
461 help 512 help
462 This is a general conntrack match module, a superset of the state match. 513 This is a general conntrack match module, a superset of the state match.
463 514
@@ -468,8 +519,9 @@ config NETFILTER_XT_MATCH_CONNTRACK
468 To compile it as a module, choose M here. If unsure, say N. 519 To compile it as a module, choose M here. If unsure, say N.
469 520
470config NETFILTER_XT_MATCH_DCCP 521config NETFILTER_XT_MATCH_DCCP
471 tristate '"DCCP" protocol match support' 522 tristate '"dccp" protocol match support'
472 depends on NETFILTER_XTABLES 523 depends on NETFILTER_XTABLES
524 depends on NETFILTER_ADVANCED
473 help 525 help
474 With this option enabled, you will be able to use the iptables 526 With this option enabled, you will be able to use the iptables
475 `dccp' match in order to match on DCCP source/destination ports 527 `dccp' match in order to match on DCCP source/destination ports
@@ -479,19 +531,25 @@ config NETFILTER_XT_MATCH_DCCP
479 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. 531 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
480 532
481config NETFILTER_XT_MATCH_DSCP 533config NETFILTER_XT_MATCH_DSCP
482 tristate '"DSCP" match support' 534 tristate '"dscp" and "tos" match support'
483 depends on NETFILTER_XTABLES 535 depends on NETFILTER_XTABLES
536 depends on NETFILTER_ADVANCED
484 help 537 help
485 This option adds a `DSCP' match, which allows you to match against 538 This option adds a `DSCP' match, which allows you to match against
486 the IPv4/IPv6 header DSCP field (differentiated services codepoint). 539 the IPv4/IPv6 header DSCP field (differentiated services codepoint).
487 540
488 The DSCP field can have any value between 0x0 and 0x3f inclusive. 541 The DSCP field can have any value between 0x0 and 0x3f inclusive.
489 542
543 It will also add a "tos" match, which allows you to match packets
544 based on the Type Of Service fields of the IPv4 packet (which share
545 the same bits as DSCP).
546
490 To compile it as a module, choose M here. If unsure, say N. 547 To compile it as a module, choose M here. If unsure, say N.
491 548
492config NETFILTER_XT_MATCH_ESP 549config NETFILTER_XT_MATCH_ESP
493 tristate '"ESP" match support' 550 tristate '"esp" match support'
494 depends on NETFILTER_XTABLES 551 depends on NETFILTER_XTABLES
552 depends on NETFILTER_ADVANCED
495 help 553 help
496 This match extension allows you to match a range of SPIs 554 This match extension allows you to match a range of SPIs
497 inside ESP header of IPSec packets. 555 inside ESP header of IPSec packets.
@@ -502,15 +560,28 @@ config NETFILTER_XT_MATCH_HELPER
502 tristate '"helper" match support' 560 tristate '"helper" match support'
503 depends on NETFILTER_XTABLES 561 depends on NETFILTER_XTABLES
504 depends on NF_CONNTRACK 562 depends on NF_CONNTRACK
563 depends on NETFILTER_ADVANCED
505 help 564 help
506 Helper matching allows you to match packets in dynamic connections 565 Helper matching allows you to match packets in dynamic connections
507 tracked by a conntrack-helper, ie. ip_conntrack_ftp 566 tracked by a conntrack-helper, ie. ip_conntrack_ftp
508 567
509 To compile it as a module, choose M here. If unsure, say Y. 568 To compile it as a module, choose M here. If unsure, say Y.
510 569
570config NETFILTER_XT_MATCH_IPRANGE
571 tristate '"iprange" address range match support'
572 depends on NETFILTER_XTABLES
573 depends on NETFILTER_ADVANCED
574 ---help---
575 This option adds a "iprange" match, which allows you to match based on
576 an IP address range. (Normal iptables only matches on single addresses
577 with an optional mask.)
578
579 If unsure, say M.
580
511config NETFILTER_XT_MATCH_LENGTH 581config NETFILTER_XT_MATCH_LENGTH
512 tristate '"length" match support' 582 tristate '"length" match support'
513 depends on NETFILTER_XTABLES 583 depends on NETFILTER_XTABLES
584 depends on NETFILTER_ADVANCED
514 help 585 help
515 This option allows you to match the length of a packet against a 586 This option allows you to match the length of a packet against a
516 specific value or range of values. 587 specific value or range of values.
@@ -520,6 +591,7 @@ config NETFILTER_XT_MATCH_LENGTH
520config NETFILTER_XT_MATCH_LIMIT 591config NETFILTER_XT_MATCH_LIMIT
521 tristate '"limit" match support' 592 tristate '"limit" match support'
522 depends on NETFILTER_XTABLES 593 depends on NETFILTER_XTABLES
594 depends on NETFILTER_ADVANCED
523 help 595 help
524 limit matching allows you to control the rate at which a rule can be 596 limit matching allows you to control the rate at which a rule can be
525 matched: mainly useful in combination with the LOG target ("LOG 597 matched: mainly useful in combination with the LOG target ("LOG
@@ -530,6 +602,7 @@ config NETFILTER_XT_MATCH_LIMIT
530config NETFILTER_XT_MATCH_MAC 602config NETFILTER_XT_MATCH_MAC
531 tristate '"mac" address match support' 603 tristate '"mac" address match support'
532 depends on NETFILTER_XTABLES 604 depends on NETFILTER_XTABLES
605 depends on NETFILTER_ADVANCED
533 help 606 help
534 MAC matching allows you to match packets based on the source 607 MAC matching allows you to match packets based on the source
535 Ethernet address of the packet. 608 Ethernet address of the packet.
@@ -539,6 +612,7 @@ config NETFILTER_XT_MATCH_MAC
539config NETFILTER_XT_MATCH_MARK 612config NETFILTER_XT_MATCH_MARK
540 tristate '"mark" match support' 613 tristate '"mark" match support'
541 depends on NETFILTER_XTABLES 614 depends on NETFILTER_XTABLES
615 default m if NETFILTER_ADVANCED=n
542 help 616 help
543 Netfilter mark matching allows you to match packets based on the 617 Netfilter mark matching allows you to match packets based on the
544 `nfmark' value in the packet. This can be set by the MARK target 618 `nfmark' value in the packet. This can be set by the MARK target
@@ -546,9 +620,19 @@ config NETFILTER_XT_MATCH_MARK
546 620
547 To compile it as a module, choose M here. If unsure, say N. 621 To compile it as a module, choose M here. If unsure, say N.
548 622
623config NETFILTER_XT_MATCH_OWNER
624 tristate '"owner" match support'
625 depends on NETFILTER_XTABLES
626 depends on NETFILTER_ADVANCED
627 ---help---
628 Socket owner matching allows you to match locally-generated packets
629 based on who created the socket: the user or group. It is also
630 possible to check whether a socket actually exists.
631
549config NETFILTER_XT_MATCH_POLICY 632config NETFILTER_XT_MATCH_POLICY
550 tristate 'IPsec "policy" match support' 633 tristate 'IPsec "policy" match support'
551 depends on NETFILTER_XTABLES && XFRM 634 depends on NETFILTER_XTABLES && XFRM
635 default m if NETFILTER_ADVANCED=n
552 help 636 help
553 Policy matching allows you to match packets based on the 637 Policy matching allows you to match packets based on the
554 IPsec policy that was used during decapsulation/will 638 IPsec policy that was used during decapsulation/will
@@ -557,8 +641,9 @@ config NETFILTER_XT_MATCH_POLICY
557 To compile it as a module, choose M here. If unsure, say N. 641 To compile it as a module, choose M here. If unsure, say N.
558 642
559config NETFILTER_XT_MATCH_MULTIPORT 643config NETFILTER_XT_MATCH_MULTIPORT
560 tristate "Multiple port match support" 644 tristate '"multiport" Multiple port match support'
561 depends on NETFILTER_XTABLES 645 depends on NETFILTER_XTABLES
646 depends on NETFILTER_ADVANCED
562 help 647 help
563 Multiport matching allows you to match TCP or UDP packets based on 648 Multiport matching allows you to match TCP or UDP packets based on
564 a series of source or destination ports: normally a rule can only 649 a series of source or destination ports: normally a rule can only
@@ -569,6 +654,7 @@ config NETFILTER_XT_MATCH_MULTIPORT
569config NETFILTER_XT_MATCH_PHYSDEV 654config NETFILTER_XT_MATCH_PHYSDEV
570 tristate '"physdev" match support' 655 tristate '"physdev" match support'
571 depends on NETFILTER_XTABLES && BRIDGE && BRIDGE_NETFILTER 656 depends on NETFILTER_XTABLES && BRIDGE && BRIDGE_NETFILTER
657 depends on NETFILTER_ADVANCED
572 help 658 help
573 Physdev packet matching matches against the physical bridge ports 659 Physdev packet matching matches against the physical bridge ports
574 the IP packet arrived on or will leave by. 660 the IP packet arrived on or will leave by.
@@ -578,6 +664,7 @@ config NETFILTER_XT_MATCH_PHYSDEV
578config NETFILTER_XT_MATCH_PKTTYPE 664config NETFILTER_XT_MATCH_PKTTYPE
579 tristate '"pkttype" packet type match support' 665 tristate '"pkttype" packet type match support'
580 depends on NETFILTER_XTABLES 666 depends on NETFILTER_XTABLES
667 depends on NETFILTER_ADVANCED
581 help 668 help
582 Packet type matching allows you to match a packet by 669 Packet type matching allows you to match a packet by
583 its "class", eg. BROADCAST, MULTICAST, ... 670 its "class", eg. BROADCAST, MULTICAST, ...
@@ -590,6 +677,7 @@ config NETFILTER_XT_MATCH_PKTTYPE
590config NETFILTER_XT_MATCH_QUOTA 677config NETFILTER_XT_MATCH_QUOTA
591 tristate '"quota" match support' 678 tristate '"quota" match support'
592 depends on NETFILTER_XTABLES 679 depends on NETFILTER_XTABLES
680 depends on NETFILTER_ADVANCED
593 help 681 help
594 This option adds a `quota' match, which allows to match on a 682 This option adds a `quota' match, which allows to match on a
595 byte counter. 683 byte counter.
@@ -597,23 +685,36 @@ config NETFILTER_XT_MATCH_QUOTA
597 If you want to compile it as a module, say M here and read 685 If you want to compile it as a module, say M here and read
598 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. 686 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
599 687
688config NETFILTER_XT_MATCH_RATEEST
689 tristate '"rateest" match support'
690 depends on NETFILTER_XTABLES
691 depends on NETFILTER_ADVANCED
692 select NETFILTER_XT_TARGET_RATEEST
693 help
694 This option adds a `rateest' match, which allows to match on the
695 rate estimated by the RATEEST target.
696
697 To compile it as a module, choose M here. If unsure, say N.
698
600config NETFILTER_XT_MATCH_REALM 699config NETFILTER_XT_MATCH_REALM
601 tristate '"realm" match support' 700 tristate '"realm" match support'
602 depends on NETFILTER_XTABLES 701 depends on NETFILTER_XTABLES
702 depends on NETFILTER_ADVANCED
603 select NET_CLS_ROUTE 703 select NET_CLS_ROUTE
604 help 704 help
605 This option adds a `realm' match, which allows you to use the realm 705 This option adds a `realm' match, which allows you to use the realm
606 key from the routing subsystem inside iptables. 706 key from the routing subsystem inside iptables.
607 707
608 This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option 708 This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option
609 in tc world. 709 in tc world.
610 710
611 If you want to compile it as a module, say M here and read 711 If you want to compile it as a module, say M here and read
612 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. 712 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
613 713
614config NETFILTER_XT_MATCH_SCTP 714config NETFILTER_XT_MATCH_SCTP
615 tristate '"sctp" protocol match support (EXPERIMENTAL)' 715 tristate '"sctp" protocol match support (EXPERIMENTAL)'
616 depends on NETFILTER_XTABLES && EXPERIMENTAL 716 depends on NETFILTER_XTABLES && EXPERIMENTAL
717 depends on NETFILTER_ADVANCED
617 help 718 help
618 With this option enabled, you will be able to use the 719 With this option enabled, you will be able to use the
619 `sctp' match in order to match on SCTP source/destination ports 720 `sctp' match in order to match on SCTP source/destination ports
@@ -626,6 +727,7 @@ config NETFILTER_XT_MATCH_STATE
626 tristate '"state" match support' 727 tristate '"state" match support'
627 depends on NETFILTER_XTABLES 728 depends on NETFILTER_XTABLES
628 depends on NF_CONNTRACK 729 depends on NF_CONNTRACK
730 default m if NETFILTER_ADVANCED=n
629 help 731 help
630 Connection state matching allows you to match packets based on their 732 Connection state matching allows you to match packets based on their
631 relationship to a tracked connection (ie. previous packets). This 733 relationship to a tracked connection (ie. previous packets). This
@@ -636,6 +738,7 @@ config NETFILTER_XT_MATCH_STATE
636config NETFILTER_XT_MATCH_STATISTIC 738config NETFILTER_XT_MATCH_STATISTIC
637 tristate '"statistic" match support' 739 tristate '"statistic" match support'
638 depends on NETFILTER_XTABLES 740 depends on NETFILTER_XTABLES
741 depends on NETFILTER_ADVANCED
639 help 742 help
640 This option adds a `statistic' match, which allows you to match 743 This option adds a `statistic' match, which allows you to match
641 on packets periodically or randomly with a given percentage. 744 on packets periodically or randomly with a given percentage.
@@ -645,6 +748,7 @@ config NETFILTER_XT_MATCH_STATISTIC
645config NETFILTER_XT_MATCH_STRING 748config NETFILTER_XT_MATCH_STRING
646 tristate '"string" match support' 749 tristate '"string" match support'
647 depends on NETFILTER_XTABLES 750 depends on NETFILTER_XTABLES
751 depends on NETFILTER_ADVANCED
648 select TEXTSEARCH 752 select TEXTSEARCH
649 select TEXTSEARCH_KMP 753 select TEXTSEARCH_KMP
650 select TEXTSEARCH_BM 754 select TEXTSEARCH_BM
@@ -658,6 +762,7 @@ config NETFILTER_XT_MATCH_STRING
658config NETFILTER_XT_MATCH_TCPMSS 762config NETFILTER_XT_MATCH_TCPMSS
659 tristate '"tcpmss" match support' 763 tristate '"tcpmss" match support'
660 depends on NETFILTER_XTABLES 764 depends on NETFILTER_XTABLES
765 depends on NETFILTER_ADVANCED
661 help 766 help
662 This option adds a `tcpmss' match, which allows you to examine the 767 This option adds a `tcpmss' match, which allows you to examine the
663 MSS value of TCP SYN packets, which control the maximum packet size 768 MSS value of TCP SYN packets, which control the maximum packet size
@@ -668,6 +773,7 @@ config NETFILTER_XT_MATCH_TCPMSS
668config NETFILTER_XT_MATCH_TIME 773config NETFILTER_XT_MATCH_TIME
669 tristate '"time" match support' 774 tristate '"time" match support'
670 depends on NETFILTER_XTABLES 775 depends on NETFILTER_XTABLES
776 depends on NETFILTER_ADVANCED
671 ---help--- 777 ---help---
672 This option adds a "time" match, which allows you to match based on 778 This option adds a "time" match, which allows you to match based on
673 the packet arrival time (at the machine which netfilter is running) 779 the packet arrival time (at the machine which netfilter is running)
@@ -682,6 +788,7 @@ config NETFILTER_XT_MATCH_TIME
682config NETFILTER_XT_MATCH_U32 788config NETFILTER_XT_MATCH_U32
683 tristate '"u32" match support' 789 tristate '"u32" match support'
684 depends on NETFILTER_XTABLES 790 depends on NETFILTER_XTABLES
791 depends on NETFILTER_ADVANCED
685 ---help--- 792 ---help---
686 u32 allows you to extract quantities of up to 4 bytes from a packet, 793 u32 allows you to extract quantities of up to 4 bytes from a packet,
687 AND them with specified masks, shift them by specified amounts and 794 AND them with specified masks, shift them by specified amounts and
@@ -695,6 +802,7 @@ config NETFILTER_XT_MATCH_U32
695config NETFILTER_XT_MATCH_HASHLIMIT 802config NETFILTER_XT_MATCH_HASHLIMIT
696 tristate '"hashlimit" match support' 803 tristate '"hashlimit" match support'
697 depends on NETFILTER_XTABLES && (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n) 804 depends on NETFILTER_XTABLES && (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
805 depends on NETFILTER_ADVANCED
698 help 806 help
699 This option adds a `hashlimit' match. 807 This option adds a `hashlimit' match.
700 808
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index ad0e36ebea3d..ea7508387f95 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -4,7 +4,6 @@ nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_exp
4nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o 4nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
5 5
6obj-$(CONFIG_NETFILTER) = netfilter.o 6obj-$(CONFIG_NETFILTER) = netfilter.o
7obj-$(CONFIG_SYSCTL) += nf_sysctl.o
8 7
9obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o 8obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
10obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o 9obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
@@ -46,8 +45,10 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o
46obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o 45obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
47obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o 46obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
48obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o 47obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o
48obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o
49obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o 49obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o
50obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o 50obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
51obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
51obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o 52obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
52 53
53# matches 54# matches
@@ -61,15 +62,18 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
61obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o 62obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
62obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o 63obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
63obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o 64obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
65obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o
64obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o 66obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
65obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o 67obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
66obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o 68obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
67obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o 69obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o
68obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o 70obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o
71obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
69obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o 72obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
70obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o 73obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o
71obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o 74obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o
72obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o 75obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o
76obj-$(CONFIG_NETFILTER_XT_MATCH_RATEEST) += xt_rateest.o
73obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o 77obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o
74obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o 78obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o
75obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o 79obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index bed9ba01e8ec..c4065b8f9a95 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -26,10 +26,10 @@
26 26
27static DEFINE_MUTEX(afinfo_mutex); 27static DEFINE_MUTEX(afinfo_mutex);
28 28
29struct nf_afinfo *nf_afinfo[NPROTO] __read_mostly; 29const struct nf_afinfo *nf_afinfo[NPROTO] __read_mostly;
30EXPORT_SYMBOL(nf_afinfo); 30EXPORT_SYMBOL(nf_afinfo);
31 31
32int nf_register_afinfo(struct nf_afinfo *afinfo) 32int nf_register_afinfo(const struct nf_afinfo *afinfo)
33{ 33{
34 int err; 34 int err;
35 35
@@ -42,7 +42,7 @@ int nf_register_afinfo(struct nf_afinfo *afinfo)
42} 42}
43EXPORT_SYMBOL_GPL(nf_register_afinfo); 43EXPORT_SYMBOL_GPL(nf_register_afinfo);
44 44
45void nf_unregister_afinfo(struct nf_afinfo *afinfo) 45void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
46{ 46{
47 mutex_lock(&afinfo_mutex); 47 mutex_lock(&afinfo_mutex);
48 rcu_assign_pointer(nf_afinfo[afinfo->family], NULL); 48 rcu_assign_pointer(nf_afinfo[afinfo->family], NULL);
@@ -51,28 +51,23 @@ void nf_unregister_afinfo(struct nf_afinfo *afinfo)
51} 51}
52EXPORT_SYMBOL_GPL(nf_unregister_afinfo); 52EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
53 53
54/* In this code, we can be waiting indefinitely for userspace to
55 * service a packet if a hook returns NF_QUEUE. We could keep a count
56 * of skbuffs queued for userspace, and not deregister a hook unless
57 * this is zero, but that sucks. Now, we simply check when the
58 * packets come back: if the hook is gone, the packet is discarded. */
59struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS] __read_mostly; 54struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS] __read_mostly;
60EXPORT_SYMBOL(nf_hooks); 55EXPORT_SYMBOL(nf_hooks);
61static DEFINE_MUTEX(nf_hook_mutex); 56static DEFINE_MUTEX(nf_hook_mutex);
62 57
63int nf_register_hook(struct nf_hook_ops *reg) 58int nf_register_hook(struct nf_hook_ops *reg)
64{ 59{
65 struct list_head *i; 60 struct nf_hook_ops *elem;
66 int err; 61 int err;
67 62
68 err = mutex_lock_interruptible(&nf_hook_mutex); 63 err = mutex_lock_interruptible(&nf_hook_mutex);
69 if (err < 0) 64 if (err < 0)
70 return err; 65 return err;
71 list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) { 66 list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
72 if (reg->priority < ((struct nf_hook_ops *)i)->priority) 67 if (reg->priority < elem->priority)
73 break; 68 break;
74 } 69 }
75 list_add_rcu(&reg->list, i->prev); 70 list_add_rcu(&reg->list, elem->list.prev);
76 mutex_unlock(&nf_hook_mutex); 71 mutex_unlock(&nf_hook_mutex);
77 return 0; 72 return 0;
78} 73}
@@ -183,8 +178,7 @@ next_hook:
183 } else if (verdict == NF_DROP) { 178 } else if (verdict == NF_DROP) {
184 kfree_skb(skb); 179 kfree_skb(skb);
185 ret = -EPERM; 180 ret = -EPERM;
186 } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { 181 } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
187 NFDEBUG("nf_hook: Verdict = QUEUE.\n");
188 if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn, 182 if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
189 verdict >> NF_VERDICT_BITS)) 183 verdict >> NF_VERDICT_BITS))
190 goto next_hook; 184 goto next_hook;
@@ -217,22 +211,6 @@ int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
217} 211}
218EXPORT_SYMBOL(skb_make_writable); 212EXPORT_SYMBOL(skb_make_writable);
219 213
220void nf_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
221 __be32 from, __be32 to, int pseudohdr)
222{
223 __be32 diff[] = { ~from, to };
224 if (skb->ip_summed != CHECKSUM_PARTIAL) {
225 *sum = csum_fold(csum_partial(diff, sizeof(diff),
226 ~csum_unfold(*sum)));
227 if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
228 skb->csum = ~csum_partial(diff, sizeof(diff),
229 ~skb->csum);
230 } else if (pseudohdr)
231 *sum = ~csum_fold(csum_partial(diff, sizeof(diff),
232 csum_unfold(*sum)));
233}
234EXPORT_SYMBOL(nf_proto_csum_replace4);
235
236#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 214#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
237/* This does not belong here, but locally generated errors need it if connection 215/* This does not belong here, but locally generated errors need it if connection
238 tracking in use: without this, connection may not be in hash table, and hence 216 tracking in use: without this, connection may not be in hash table, and hence
@@ -294,3 +272,12 @@ void __init netfilter_init(void)
294 if (netfilter_log_init() < 0) 272 if (netfilter_log_init() < 0)
295 panic("cannot initialize nf_log"); 273 panic("cannot initialize nf_log");
296} 274}
275
276#ifdef CONFIG_SYSCTL
277struct ctl_path nf_net_netfilter_sysctl_path[] = {
278 { .procname = "net", .ctl_name = CTL_NET, },
279 { .procname = "netfilter", .ctl_name = NET_NETFILTER, },
280 { }
281};
282EXPORT_SYMBOL_GPL(nf_net_netfilter_sysctl_path);
283#endif /* CONFIG_SYSCTL */
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index a4d5cdeb0110..078fff0335ad 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -81,7 +81,7 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
81 ((__force __u16)tuple->src.u.all << 16) | 81 ((__force __u16)tuple->src.u.all << 16) |
82 (__force __u16)tuple->dst.u.all); 82 (__force __u16)tuple->dst.u.all);
83 83
84 return jhash_2words(a, b, rnd) % size; 84 return ((u64)jhash_2words(a, b, rnd) * size) >> 32;
85} 85}
86 86
87static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple) 87static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple)
@@ -831,10 +831,8 @@ EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
831int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, 831int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
832 const struct nf_conntrack_tuple *tuple) 832 const struct nf_conntrack_tuple *tuple)
833{ 833{
834 NLA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t), 834 NLA_PUT_BE16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port);
835 &tuple->src.u.tcp.port); 835 NLA_PUT_BE16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port);
836 NLA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t),
837 &tuple->dst.u.tcp.port);
838 return 0; 836 return 0;
839 837
840nla_put_failure: 838nla_put_failure:
@@ -854,8 +852,8 @@ int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
854 if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT]) 852 if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT])
855 return -EINVAL; 853 return -EINVAL;
856 854
857 t->src.u.tcp.port = *(__be16 *)nla_data(tb[CTA_PROTO_SRC_PORT]); 855 t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]);
858 t->dst.u.tcp.port = *(__be16 *)nla_data(tb[CTA_PROTO_DST_PORT]); 856 t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]);
859 857
860 return 0; 858 return 0;
861} 859}
@@ -863,7 +861,7 @@ EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
863#endif 861#endif
864 862
865/* Used by ipt_REJECT and ip6t_REJECT. */ 863/* Used by ipt_REJECT and ip6t_REJECT. */
866void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) 864static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
867{ 865{
868 struct nf_conn *ct; 866 struct nf_conn *ct;
869 enum ip_conntrack_info ctinfo; 867 enum ip_conntrack_info ctinfo;
@@ -880,7 +878,6 @@ void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
880 nskb->nfctinfo = ctinfo; 878 nskb->nfctinfo = ctinfo;
881 nf_conntrack_get(nskb->nfct); 879 nf_conntrack_get(nskb->nfct);
882} 880}
883EXPORT_SYMBOL_GPL(__nf_conntrack_attach);
884 881
885static inline int 882static inline int
886do_iter(const struct nf_conntrack_tuple_hash *i, 883do_iter(const struct nf_conntrack_tuple_hash *i,
@@ -1124,7 +1121,7 @@ int __init nf_conntrack_init(void)
1124 goto out_fini_expect; 1121 goto out_fini_expect;
1125 1122
1126 /* For use by REJECT target */ 1123 /* For use by REJECT target */
1127 rcu_assign_pointer(ip_ct_attach, __nf_conntrack_attach); 1124 rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach);
1128 rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); 1125 rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
1129 1126
1130 /* Set up fake conntrack: 1127 /* Set up fake conntrack:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 175c8d1a1992..e0cd9d00aa61 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -73,15 +73,17 @@ static void nf_ct_expectation_timed_out(unsigned long ul_expect)
73 73
74static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple) 74static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
75{ 75{
76 unsigned int hash;
77
76 if (unlikely(!nf_ct_expect_hash_rnd_initted)) { 78 if (unlikely(!nf_ct_expect_hash_rnd_initted)) {
77 get_random_bytes(&nf_ct_expect_hash_rnd, 4); 79 get_random_bytes(&nf_ct_expect_hash_rnd, 4);
78 nf_ct_expect_hash_rnd_initted = 1; 80 nf_ct_expect_hash_rnd_initted = 1;
79 } 81 }
80 82
81 return jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all), 83 hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
82 (((tuple->dst.protonum ^ tuple->src.l3num) << 16) | 84 (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
83 (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd) % 85 (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd);
84 nf_ct_expect_hsize; 86 return ((u64)hash * nf_ct_expect_hsize) >> 32;
85} 87}
86 88
87struct nf_conntrack_expect * 89struct nf_conntrack_expect *
@@ -226,8 +228,8 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
226EXPORT_SYMBOL_GPL(nf_ct_expect_alloc); 228EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
227 229
228void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family, 230void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family,
229 union nf_conntrack_address *saddr, 231 union nf_inet_addr *saddr,
230 union nf_conntrack_address *daddr, 232 union nf_inet_addr *daddr,
231 u_int8_t proto, __be16 *src, __be16 *dst) 233 u_int8_t proto, __be16 *src, __be16 *dst)
232{ 234{
233 int len; 235 int len;
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 6df259067f7e..6770baf2e845 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -358,7 +358,7 @@ static int help(struct sk_buff *skb,
358 unsigned int matchlen, matchoff; 358 unsigned int matchlen, matchoff;
359 struct nf_ct_ftp_master *ct_ftp_info = &nfct_help(ct)->help.ct_ftp_info; 359 struct nf_ct_ftp_master *ct_ftp_info = &nfct_help(ct)->help.ct_ftp_info;
360 struct nf_conntrack_expect *exp; 360 struct nf_conntrack_expect *exp;
361 union nf_conntrack_address *daddr; 361 union nf_inet_addr *daddr;
362 struct nf_conntrack_man cmd = {}; 362 struct nf_conntrack_man cmd = {};
363 unsigned int i; 363 unsigned int i;
364 int found = 0, ends_in_nl; 364 int found = 0, ends_in_nl;
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index a869403b2294..ff66fba514fd 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -100,10 +100,10 @@ typedef struct {
100} bitstr_t; 100} bitstr_t;
101 101
102/* Tool Functions */ 102/* Tool Functions */
103#define INC_BIT(bs) if((++bs->bit)>7){bs->cur++;bs->bit=0;} 103#define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;}
104#define INC_BITS(bs,b) if((bs->bit+=b)>7){bs->cur+=bs->bit>>3;bs->bit&=7;} 104#define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;}
105#define BYTE_ALIGN(bs) if(bs->bit){bs->cur++;bs->bit=0;} 105#define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;}
106#define CHECK_BOUND(bs,n) if(bs->cur+(n)>bs->end)return(H323_ERROR_BOUND) 106#define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND)
107static unsigned get_len(bitstr_t * bs); 107static unsigned get_len(bitstr_t * bs);
108static unsigned get_bit(bitstr_t * bs); 108static unsigned get_bit(bitstr_t * bs);
109static unsigned get_bits(bitstr_t * bs, unsigned b); 109static unsigned get_bits(bitstr_t * bs, unsigned b);
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index f23fd9598e19..872c1aa3124c 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -50,12 +50,12 @@ MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations "
50int (*set_h245_addr_hook) (struct sk_buff *skb, 50int (*set_h245_addr_hook) (struct sk_buff *skb,
51 unsigned char **data, int dataoff, 51 unsigned char **data, int dataoff,
52 H245_TransportAddress *taddr, 52 H245_TransportAddress *taddr,
53 union nf_conntrack_address *addr, __be16 port) 53 union nf_inet_addr *addr, __be16 port)
54 __read_mostly; 54 __read_mostly;
55int (*set_h225_addr_hook) (struct sk_buff *skb, 55int (*set_h225_addr_hook) (struct sk_buff *skb,
56 unsigned char **data, int dataoff, 56 unsigned char **data, int dataoff,
57 TransportAddress *taddr, 57 TransportAddress *taddr,
58 union nf_conntrack_address *addr, __be16 port) 58 union nf_inet_addr *addr, __be16 port)
59 __read_mostly; 59 __read_mostly;
60int (*set_sig_addr_hook) (struct sk_buff *skb, 60int (*set_sig_addr_hook) (struct sk_buff *skb,
61 struct nf_conn *ct, 61 struct nf_conn *ct,
@@ -214,7 +214,7 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
214/****************************************************************************/ 214/****************************************************************************/
215static int get_h245_addr(struct nf_conn *ct, unsigned char *data, 215static int get_h245_addr(struct nf_conn *ct, unsigned char *data,
216 H245_TransportAddress *taddr, 216 H245_TransportAddress *taddr,
217 union nf_conntrack_address *addr, __be16 *port) 217 union nf_inet_addr *addr, __be16 *port)
218{ 218{
219 unsigned char *p; 219 unsigned char *p;
220 int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; 220 int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
@@ -257,7 +257,7 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
257 int ret = 0; 257 int ret = 0;
258 __be16 port; 258 __be16 port;
259 __be16 rtp_port, rtcp_port; 259 __be16 rtp_port, rtcp_port;
260 union nf_conntrack_address addr; 260 union nf_inet_addr addr;
261 struct nf_conntrack_expect *rtp_exp; 261 struct nf_conntrack_expect *rtp_exp;
262 struct nf_conntrack_expect *rtcp_exp; 262 struct nf_conntrack_expect *rtcp_exp;
263 typeof(nat_rtp_rtcp_hook) nat_rtp_rtcp; 263 typeof(nat_rtp_rtcp_hook) nat_rtp_rtcp;
@@ -330,7 +330,7 @@ static int expect_t120(struct sk_buff *skb,
330 int dir = CTINFO2DIR(ctinfo); 330 int dir = CTINFO2DIR(ctinfo);
331 int ret = 0; 331 int ret = 0;
332 __be16 port; 332 __be16 port;
333 union nf_conntrack_address addr; 333 union nf_inet_addr addr;
334 struct nf_conntrack_expect *exp; 334 struct nf_conntrack_expect *exp;
335 typeof(nat_t120_hook) nat_t120; 335 typeof(nat_t120_hook) nat_t120;
336 336
@@ -623,7 +623,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = {
623/****************************************************************************/ 623/****************************************************************************/
624int get_h225_addr(struct nf_conn *ct, unsigned char *data, 624int get_h225_addr(struct nf_conn *ct, unsigned char *data,
625 TransportAddress *taddr, 625 TransportAddress *taddr,
626 union nf_conntrack_address *addr, __be16 *port) 626 union nf_inet_addr *addr, __be16 *port)
627{ 627{
628 unsigned char *p; 628 unsigned char *p;
629 int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; 629 int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
@@ -662,7 +662,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
662 int dir = CTINFO2DIR(ctinfo); 662 int dir = CTINFO2DIR(ctinfo);
663 int ret = 0; 663 int ret = 0;
664 __be16 port; 664 __be16 port;
665 union nf_conntrack_address addr; 665 union nf_inet_addr addr;
666 struct nf_conntrack_expect *exp; 666 struct nf_conntrack_expect *exp;
667 typeof(nat_h245_hook) nat_h245; 667 typeof(nat_h245_hook) nat_h245;
668 668
@@ -704,13 +704,19 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
704 704
705/* If the calling party is on the same side of the forward-to party, 705/* If the calling party is on the same side of the forward-to party,
706 * we don't need to track the second call */ 706 * we don't need to track the second call */
707static int callforward_do_filter(union nf_conntrack_address *src, 707static int callforward_do_filter(union nf_inet_addr *src,
708 union nf_conntrack_address *dst, 708 union nf_inet_addr *dst,
709 int family) 709 int family)
710{ 710{
711 const struct nf_afinfo *afinfo;
711 struct flowi fl1, fl2; 712 struct flowi fl1, fl2;
712 int ret = 0; 713 int ret = 0;
713 714
715 /* rcu_read_lock()ed by nf_hook_slow() */
716 afinfo = nf_get_afinfo(family);
717 if (!afinfo)
718 return 0;
719
714 memset(&fl1, 0, sizeof(fl1)); 720 memset(&fl1, 0, sizeof(fl1));
715 memset(&fl2, 0, sizeof(fl2)); 721 memset(&fl2, 0, sizeof(fl2));
716 722
@@ -720,8 +726,8 @@ static int callforward_do_filter(union nf_conntrack_address *src,
720 726
721 fl1.fl4_dst = src->ip; 727 fl1.fl4_dst = src->ip;
722 fl2.fl4_dst = dst->ip; 728 fl2.fl4_dst = dst->ip;
723 if (ip_route_output_key(&rt1, &fl1) == 0) { 729 if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) {
724 if (ip_route_output_key(&rt2, &fl2) == 0) { 730 if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) {
725 if (rt1->rt_gateway == rt2->rt_gateway && 731 if (rt1->rt_gateway == rt2->rt_gateway &&
726 rt1->u.dst.dev == rt2->u.dst.dev) 732 rt1->u.dst.dev == rt2->u.dst.dev)
727 ret = 1; 733 ret = 1;
@@ -731,16 +737,15 @@ static int callforward_do_filter(union nf_conntrack_address *src,
731 } 737 }
732 break; 738 break;
733 } 739 }
734#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 740#if defined(CONFIG_NF_CONNTRACK_IPV6) || \
741 defined(CONFIG_NF_CONNTRACK_IPV6_MODULE)
735 case AF_INET6: { 742 case AF_INET6: {
736 struct rt6_info *rt1, *rt2; 743 struct rt6_info *rt1, *rt2;
737 744
738 memcpy(&fl1.fl6_dst, src, sizeof(fl1.fl6_dst)); 745 memcpy(&fl1.fl6_dst, src, sizeof(fl1.fl6_dst));
739 memcpy(&fl2.fl6_dst, dst, sizeof(fl2.fl6_dst)); 746 memcpy(&fl2.fl6_dst, dst, sizeof(fl2.fl6_dst));
740 rt1 = (struct rt6_info *)ip6_route_output(NULL, &fl1); 747 if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) {
741 if (rt1) { 748 if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) {
742 rt2 = (struct rt6_info *)ip6_route_output(NULL, &fl2);
743 if (rt2) {
744 if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, 749 if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway,
745 sizeof(rt1->rt6i_gateway)) && 750 sizeof(rt1->rt6i_gateway)) &&
746 rt1->u.dst.dev == rt2->u.dst.dev) 751 rt1->u.dst.dev == rt2->u.dst.dev)
@@ -767,7 +772,7 @@ static int expect_callforwarding(struct sk_buff *skb,
767 int dir = CTINFO2DIR(ctinfo); 772 int dir = CTINFO2DIR(ctinfo);
768 int ret = 0; 773 int ret = 0;
769 __be16 port; 774 __be16 port;
770 union nf_conntrack_address addr; 775 union nf_inet_addr addr;
771 struct nf_conntrack_expect *exp; 776 struct nf_conntrack_expect *exp;
772 typeof(nat_callforwarding_hook) nat_callforwarding; 777 typeof(nat_callforwarding_hook) nat_callforwarding;
773 778
@@ -823,7 +828,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
823 int ret; 828 int ret;
824 int i; 829 int i;
825 __be16 port; 830 __be16 port;
826 union nf_conntrack_address addr; 831 union nf_inet_addr addr;
827 typeof(set_h225_addr_hook) set_h225_addr; 832 typeof(set_h225_addr_hook) set_h225_addr;
828 833
829 pr_debug("nf_ct_q931: Setup\n"); 834 pr_debug("nf_ct_q931: Setup\n");
@@ -1195,7 +1200,7 @@ static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff,
1195 1200
1196/****************************************************************************/ 1201/****************************************************************************/
1197static struct nf_conntrack_expect *find_expect(struct nf_conn *ct, 1202static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
1198 union nf_conntrack_address *addr, 1203 union nf_inet_addr *addr,
1199 __be16 port) 1204 __be16 port)
1200{ 1205{
1201 struct nf_conntrack_expect *exp; 1206 struct nf_conntrack_expect *exp;
@@ -1237,7 +1242,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
1237 int ret = 0; 1242 int ret = 0;
1238 int i; 1243 int i;
1239 __be16 port; 1244 __be16 port;
1240 union nf_conntrack_address addr; 1245 union nf_inet_addr addr;
1241 struct nf_conntrack_expect *exp; 1246 struct nf_conntrack_expect *exp;
1242 typeof(nat_q931_hook) nat_q931; 1247 typeof(nat_q931_hook) nat_q931;
1243 1248
@@ -1306,7 +1311,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
1306 int dir = CTINFO2DIR(ctinfo); 1311 int dir = CTINFO2DIR(ctinfo);
1307 int ret = 0; 1312 int ret = 0;
1308 __be16 port; 1313 __be16 port;
1309 union nf_conntrack_address addr; 1314 union nf_inet_addr addr;
1310 struct nf_conntrack_expect *exp; 1315 struct nf_conntrack_expect *exp;
1311 1316
1312 pr_debug("nf_ct_ras: GCF\n"); 1317 pr_debug("nf_ct_ras: GCF\n");
@@ -1466,7 +1471,7 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
1466 struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; 1471 struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
1467 int dir = CTINFO2DIR(ctinfo); 1472 int dir = CTINFO2DIR(ctinfo);
1468 __be16 port; 1473 __be16 port;
1469 union nf_conntrack_address addr; 1474 union nf_inet_addr addr;
1470 typeof(set_h225_addr_hook) set_h225_addr; 1475 typeof(set_h225_addr_hook) set_h225_addr;
1471 1476
1472 pr_debug("nf_ct_ras: ARQ\n"); 1477 pr_debug("nf_ct_ras: ARQ\n");
@@ -1508,7 +1513,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
1508 int dir = CTINFO2DIR(ctinfo); 1513 int dir = CTINFO2DIR(ctinfo);
1509 int ret = 0; 1514 int ret = 0;
1510 __be16 port; 1515 __be16 port;
1511 union nf_conntrack_address addr; 1516 union nf_inet_addr addr;
1512 struct nf_conntrack_expect *exp; 1517 struct nf_conntrack_expect *exp;
1513 typeof(set_sig_addr_hook) set_sig_addr; 1518 typeof(set_sig_addr_hook) set_sig_addr;
1514 1519
@@ -1571,7 +1576,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
1571 int dir = CTINFO2DIR(ctinfo); 1576 int dir = CTINFO2DIR(ctinfo);
1572 int ret = 0; 1577 int ret = 0;
1573 __be16 port; 1578 __be16 port;
1574 union nf_conntrack_address addr; 1579 union nf_inet_addr addr;
1575 struct nf_conntrack_expect *exp; 1580 struct nf_conntrack_expect *exp;
1576 1581
1577 pr_debug("nf_ct_ras: LCF\n"); 1582 pr_debug("nf_ct_ras: LCF\n");
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
index 991c52c9a28b..8e914e5ffea8 100644
--- a/net/netfilter/nf_conntrack_l3proto_generic.c
+++ b/net/netfilter/nf_conntrack_l3proto_generic.c
@@ -55,12 +55,6 @@ static int generic_print_tuple(struct seq_file *s,
55 return 0; 55 return 0;
56} 56}
57 57
58static int generic_print_conntrack(struct seq_file *s,
59 const struct nf_conn *conntrack)
60{
61 return 0;
62}
63
64static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, 58static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
65 unsigned int *dataoff, u_int8_t *protonum) 59 unsigned int *dataoff, u_int8_t *protonum)
66{ 60{
@@ -75,7 +69,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = {
75 .pkt_to_tuple = generic_pkt_to_tuple, 69 .pkt_to_tuple = generic_pkt_to_tuple,
76 .invert_tuple = generic_invert_tuple, 70 .invert_tuple = generic_invert_tuple,
77 .print_tuple = generic_print_tuple, 71 .print_tuple = generic_print_tuple,
78 .print_conntrack = generic_print_conntrack,
79 .get_l4proto = generic_get_l4proto, 72 .get_l4proto = generic_get_l4proto,
80}; 73};
81EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic); 74EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 7d231243754a..38141f104db7 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -59,7 +59,7 @@ ctnetlink_dump_tuples_proto(struct sk_buff *skb,
59 nest_parms = nla_nest_start(skb, CTA_TUPLE_PROTO | NLA_F_NESTED); 59 nest_parms = nla_nest_start(skb, CTA_TUPLE_PROTO | NLA_F_NESTED);
60 if (!nest_parms) 60 if (!nest_parms)
61 goto nla_put_failure; 61 goto nla_put_failure;
62 NLA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum); 62 NLA_PUT_U8(skb, CTA_PROTO_NUM, tuple->dst.protonum);
63 63
64 if (likely(l4proto->tuple_to_nlattr)) 64 if (likely(l4proto->tuple_to_nlattr))
65 ret = l4proto->tuple_to_nlattr(skb, tuple); 65 ret = l4proto->tuple_to_nlattr(skb, tuple);
@@ -95,7 +95,7 @@ nla_put_failure:
95 return -1; 95 return -1;
96} 96}
97 97
98static inline int 98static int
99ctnetlink_dump_tuples(struct sk_buff *skb, 99ctnetlink_dump_tuples(struct sk_buff *skb,
100 const struct nf_conntrack_tuple *tuple) 100 const struct nf_conntrack_tuple *tuple)
101{ 101{
@@ -120,8 +120,7 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
120static inline int 120static inline int
121ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct) 121ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct)
122{ 122{
123 __be32 status = htonl((u_int32_t) ct->status); 123 NLA_PUT_BE32(skb, CTA_STATUS, htonl(ct->status));
124 NLA_PUT(skb, CTA_STATUS, sizeof(status), &status);
125 return 0; 124 return 0;
126 125
127nla_put_failure: 126nla_put_failure:
@@ -131,15 +130,12 @@ nla_put_failure:
131static inline int 130static inline int
132ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct) 131ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct)
133{ 132{
134 long timeout_l = ct->timeout.expires - jiffies; 133 long timeout = (ct->timeout.expires - jiffies) / HZ;
135 __be32 timeout;
136 134
137 if (timeout_l < 0) 135 if (timeout < 0)
138 timeout = 0; 136 timeout = 0;
139 else
140 timeout = htonl(timeout_l / HZ);
141 137
142 NLA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout); 138 NLA_PUT_BE32(skb, CTA_TIMEOUT, htonl(timeout));
143 return 0; 139 return 0;
144 140
145nla_put_failure: 141nla_put_failure:
@@ -193,7 +189,7 @@ ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct)
193 nest_helper = nla_nest_start(skb, CTA_HELP | NLA_F_NESTED); 189 nest_helper = nla_nest_start(skb, CTA_HELP | NLA_F_NESTED);
194 if (!nest_helper) 190 if (!nest_helper)
195 goto nla_put_failure; 191 goto nla_put_failure;
196 NLA_PUT(skb, CTA_HELP_NAME, strlen(helper->name), helper->name); 192 NLA_PUT_STRING(skb, CTA_HELP_NAME, helper->name);
197 193
198 if (helper->to_nlattr) 194 if (helper->to_nlattr)
199 helper->to_nlattr(skb, ct); 195 helper->to_nlattr(skb, ct);
@@ -209,23 +205,21 @@ nla_put_failure:
209} 205}
210 206
211#ifdef CONFIG_NF_CT_ACCT 207#ifdef CONFIG_NF_CT_ACCT
212static inline int 208static int
213ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, 209ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct,
214 enum ip_conntrack_dir dir) 210 enum ip_conntrack_dir dir)
215{ 211{
216 enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; 212 enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
217 struct nlattr *nest_count; 213 struct nlattr *nest_count;
218 __be32 tmp;
219 214
220 nest_count = nla_nest_start(skb, type | NLA_F_NESTED); 215 nest_count = nla_nest_start(skb, type | NLA_F_NESTED);
221 if (!nest_count) 216 if (!nest_count)
222 goto nla_put_failure; 217 goto nla_put_failure;
223 218
224 tmp = htonl(ct->counters[dir].packets); 219 NLA_PUT_BE32(skb, CTA_COUNTERS32_PACKETS,
225 NLA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp); 220 htonl(ct->counters[dir].packets));
226 221 NLA_PUT_BE32(skb, CTA_COUNTERS32_BYTES,
227 tmp = htonl(ct->counters[dir].bytes); 222 htonl(ct->counters[dir].bytes));
228 NLA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp);
229 223
230 nla_nest_end(skb, nest_count); 224 nla_nest_end(skb, nest_count);
231 225
@@ -242,9 +236,7 @@ nla_put_failure:
242static inline int 236static inline int
243ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) 237ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
244{ 238{
245 __be32 mark = htonl(ct->mark); 239 NLA_PUT_BE32(skb, CTA_MARK, htonl(ct->mark));
246
247 NLA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark);
248 return 0; 240 return 0;
249 241
250nla_put_failure: 242nla_put_failure:
@@ -254,11 +246,95 @@ nla_put_failure:
254#define ctnetlink_dump_mark(a, b) (0) 246#define ctnetlink_dump_mark(a, b) (0)
255#endif 247#endif
256 248
249#ifdef CONFIG_NF_CONNTRACK_SECMARK
250static inline int
251ctnetlink_dump_secmark(struct sk_buff *skb, const struct nf_conn *ct)
252{
253 NLA_PUT_BE32(skb, CTA_SECMARK, htonl(ct->secmark));
254 return 0;
255
256nla_put_failure:
257 return -1;
258}
259#else
260#define ctnetlink_dump_secmark(a, b) (0)
261#endif
262
263#define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
264
265static inline int
266ctnetlink_dump_master(struct sk_buff *skb, const struct nf_conn *ct)
267{
268 struct nlattr *nest_parms;
269
270 if (!(ct->status & IPS_EXPECTED))
271 return 0;
272
273 nest_parms = nla_nest_start(skb, CTA_TUPLE_MASTER | NLA_F_NESTED);
274 if (!nest_parms)
275 goto nla_put_failure;
276 if (ctnetlink_dump_tuples(skb, master_tuple(ct)) < 0)
277 goto nla_put_failure;
278 nla_nest_end(skb, nest_parms);
279
280 return 0;
281
282nla_put_failure:
283 return -1;
284}
285
286#ifdef CONFIG_NF_NAT_NEEDED
287static int
288dump_nat_seq_adj(struct sk_buff *skb, const struct nf_nat_seq *natseq, int type)
289{
290 struct nlattr *nest_parms;
291
292 nest_parms = nla_nest_start(skb, type | NLA_F_NESTED);
293 if (!nest_parms)
294 goto nla_put_failure;
295
296 NLA_PUT_BE32(skb, CTA_NAT_SEQ_CORRECTION_POS,
297 htonl(natseq->correction_pos));
298 NLA_PUT_BE32(skb, CTA_NAT_SEQ_OFFSET_BEFORE,
299 htonl(natseq->offset_before));
300 NLA_PUT_BE32(skb, CTA_NAT_SEQ_OFFSET_AFTER,
301 htonl(natseq->offset_after));
302
303 nla_nest_end(skb, nest_parms);
304
305 return 0;
306
307nla_put_failure:
308 return -1;
309}
310
311static inline int
312ctnetlink_dump_nat_seq_adj(struct sk_buff *skb, const struct nf_conn *ct)
313{
314 struct nf_nat_seq *natseq;
315 struct nf_conn_nat *nat = nfct_nat(ct);
316
317 if (!(ct->status & IPS_SEQ_ADJUST) || !nat)
318 return 0;
319
320 natseq = &nat->seq[IP_CT_DIR_ORIGINAL];
321 if (dump_nat_seq_adj(skb, natseq, CTA_NAT_SEQ_ADJ_ORIG) == -1)
322 return -1;
323
324 natseq = &nat->seq[IP_CT_DIR_REPLY];
325 if (dump_nat_seq_adj(skb, natseq, CTA_NAT_SEQ_ADJ_REPLY) == -1)
326 return -1;
327
328 return 0;
329}
330#else
331#define ctnetlink_dump_nat_seq_adj(a, b) (0)
332#endif
333
257static inline int 334static inline int
258ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) 335ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
259{ 336{
260 __be32 id = htonl((unsigned long)ct); 337 NLA_PUT_BE32(skb, CTA_ID, htonl((unsigned long)ct));
261 NLA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id);
262 return 0; 338 return 0;
263 339
264nla_put_failure: 340nla_put_failure:
@@ -268,9 +344,7 @@ nla_put_failure:
268static inline int 344static inline int
269ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct) 345ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct)
270{ 346{
271 __be32 use = htonl(atomic_read(&ct->ct_general.use)); 347 NLA_PUT_BE32(skb, CTA_USE, htonl(atomic_read(&ct->ct_general.use)));
272
273 NLA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use);
274 return 0; 348 return 0;
275 349
276nla_put_failure: 350nla_put_failure:
@@ -320,8 +394,11 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
320 ctnetlink_dump_protoinfo(skb, ct) < 0 || 394 ctnetlink_dump_protoinfo(skb, ct) < 0 ||
321 ctnetlink_dump_helpinfo(skb, ct) < 0 || 395 ctnetlink_dump_helpinfo(skb, ct) < 0 ||
322 ctnetlink_dump_mark(skb, ct) < 0 || 396 ctnetlink_dump_mark(skb, ct) < 0 ||
397 ctnetlink_dump_secmark(skb, ct) < 0 ||
323 ctnetlink_dump_id(skb, ct) < 0 || 398 ctnetlink_dump_id(skb, ct) < 0 ||
324 ctnetlink_dump_use(skb, ct) < 0) 399 ctnetlink_dump_use(skb, ct) < 0 ||
400 ctnetlink_dump_master(skb, ct) < 0 ||
401 ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
325 goto nla_put_failure; 402 goto nla_put_failure;
326 403
327 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 404 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
@@ -419,11 +496,24 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
419 && ctnetlink_dump_mark(skb, ct) < 0) 496 && ctnetlink_dump_mark(skb, ct) < 0)
420 goto nla_put_failure; 497 goto nla_put_failure;
421#endif 498#endif
499#ifdef CONFIG_NF_CONNTRACK_SECMARK
500 if ((events & IPCT_SECMARK || ct->secmark)
501 && ctnetlink_dump_secmark(skb, ct) < 0)
502 goto nla_put_failure;
503#endif
422 504
423 if (events & IPCT_COUNTER_FILLING && 505 if (events & IPCT_COUNTER_FILLING &&
424 (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || 506 (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
425 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)) 507 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0))
426 goto nla_put_failure; 508 goto nla_put_failure;
509
510 if (events & IPCT_RELATED &&
511 ctnetlink_dump_master(skb, ct) < 0)
512 goto nla_put_failure;
513
514 if (events & IPCT_NATSEQADJ &&
515 ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
516 goto nla_put_failure;
427 } 517 }
428 518
429 nlh->nlmsg_len = skb->tail - b; 519 nlh->nlmsg_len = skb->tail - b;
@@ -444,7 +534,7 @@ static int ctnetlink_done(struct netlink_callback *cb)
444 return 0; 534 return 0;
445} 535}
446 536
447#define L3PROTO(ct) ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num 537#define L3PROTO(ct) (ct)->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num
448 538
449static int 539static int
450ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) 540ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
@@ -542,7 +632,7 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr,
542 632
543 if (!tb[CTA_PROTO_NUM]) 633 if (!tb[CTA_PROTO_NUM])
544 return -EINVAL; 634 return -EINVAL;
545 tuple->dst.protonum = *(u_int8_t *)nla_data(tb[CTA_PROTO_NUM]); 635 tuple->dst.protonum = nla_get_u8(tb[CTA_PROTO_NUM]);
546 636
547 l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum); 637 l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum);
548 638
@@ -558,7 +648,7 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr,
558 return ret; 648 return ret;
559} 649}
560 650
561static inline int 651static int
562ctnetlink_parse_tuple(struct nlattr *cda[], struct nf_conntrack_tuple *tuple, 652ctnetlink_parse_tuple(struct nlattr *cda[], struct nf_conntrack_tuple *tuple,
563 enum ctattr_tuple type, u_int8_t l3num) 653 enum ctattr_tuple type, u_int8_t l3num)
564{ 654{
@@ -605,7 +695,7 @@ static int nfnetlink_parse_nat_proto(struct nlattr *attr,
605 struct nf_nat_range *range) 695 struct nf_nat_range *range)
606{ 696{
607 struct nlattr *tb[CTA_PROTONAT_MAX+1]; 697 struct nlattr *tb[CTA_PROTONAT_MAX+1];
608 struct nf_nat_protocol *npt; 698 const struct nf_nat_protocol *npt;
609 int err; 699 int err;
610 700
611 err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy); 701 err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy);
@@ -647,12 +737,12 @@ nfnetlink_parse_nat(struct nlattr *nat,
647 return err; 737 return err;
648 738
649 if (tb[CTA_NAT_MINIP]) 739 if (tb[CTA_NAT_MINIP])
650 range->min_ip = *(__be32 *)nla_data(tb[CTA_NAT_MINIP]); 740 range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]);
651 741
652 if (!tb[CTA_NAT_MAXIP]) 742 if (!tb[CTA_NAT_MAXIP])
653 range->max_ip = range->min_ip; 743 range->max_ip = range->min_ip;
654 else 744 else
655 range->max_ip = *(__be32 *)nla_data(tb[CTA_NAT_MAXIP]); 745 range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]);
656 746
657 if (range->min_ip) 747 if (range->min_ip)
658 range->flags |= IP_NAT_RANGE_MAP_IPS; 748 range->flags |= IP_NAT_RANGE_MAP_IPS;
@@ -722,7 +812,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
722 ct = nf_ct_tuplehash_to_ctrack(h); 812 ct = nf_ct_tuplehash_to_ctrack(h);
723 813
724 if (cda[CTA_ID]) { 814 if (cda[CTA_ID]) {
725 u_int32_t id = ntohl(*(__be32 *)nla_data(cda[CTA_ID])); 815 u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID]));
726 if (id != (u32)(unsigned long)ct) { 816 if (id != (u32)(unsigned long)ct) {
727 nf_ct_put(ct); 817 nf_ct_put(ct);
728 return -ENOENT; 818 return -ENOENT;
@@ -798,11 +888,11 @@ out:
798 return err; 888 return err;
799} 889}
800 890
801static inline int 891static int
802ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[]) 892ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
803{ 893{
804 unsigned long d; 894 unsigned long d;
805 unsigned int status = ntohl(*(__be32 *)nla_data(cda[CTA_STATUS])); 895 unsigned int status = ntohl(nla_get_be32(cda[CTA_STATUS]));
806 d = ct->status ^ status; 896 d = ct->status ^ status;
807 897
808 if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) 898 if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
@@ -828,19 +918,17 @@ ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
828 if (nfnetlink_parse_nat(cda[CTA_NAT_DST], ct, 918 if (nfnetlink_parse_nat(cda[CTA_NAT_DST], ct,
829 &range) < 0) 919 &range) < 0)
830 return -EINVAL; 920 return -EINVAL;
831 if (nf_nat_initialized(ct, 921 if (nf_nat_initialized(ct, IP_NAT_MANIP_DST))
832 HOOK2MANIP(NF_IP_PRE_ROUTING)))
833 return -EEXIST; 922 return -EEXIST;
834 nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING); 923 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
835 } 924 }
836 if (cda[CTA_NAT_SRC]) { 925 if (cda[CTA_NAT_SRC]) {
837 if (nfnetlink_parse_nat(cda[CTA_NAT_SRC], ct, 926 if (nfnetlink_parse_nat(cda[CTA_NAT_SRC], ct,
838 &range) < 0) 927 &range) < 0)
839 return -EINVAL; 928 return -EINVAL;
840 if (nf_nat_initialized(ct, 929 if (nf_nat_initialized(ct, IP_NAT_MANIP_SRC))
841 HOOK2MANIP(NF_IP_POST_ROUTING)))
842 return -EEXIST; 930 return -EEXIST;
843 nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING); 931 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
844 } 932 }
845#endif 933#endif
846 } 934 }
@@ -904,7 +992,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[])
904static inline int 992static inline int
905ctnetlink_change_timeout(struct nf_conn *ct, struct nlattr *cda[]) 993ctnetlink_change_timeout(struct nf_conn *ct, struct nlattr *cda[])
906{ 994{
907 u_int32_t timeout = ntohl(*(__be32 *)nla_data(cda[CTA_TIMEOUT])); 995 u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
908 996
909 if (!del_timer(&ct->timeout)) 997 if (!del_timer(&ct->timeout))
910 return -ETIME; 998 return -ETIME;
@@ -935,6 +1023,66 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, struct nlattr *cda[])
935 return err; 1023 return err;
936} 1024}
937 1025
1026#ifdef CONFIG_NF_NAT_NEEDED
1027static inline int
1028change_nat_seq_adj(struct nf_nat_seq *natseq, struct nlattr *attr)
1029{
1030 struct nlattr *cda[CTA_NAT_SEQ_MAX+1];
1031
1032 nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, NULL);
1033
1034 if (!cda[CTA_NAT_SEQ_CORRECTION_POS])
1035 return -EINVAL;
1036
1037 natseq->correction_pos =
1038 ntohl(nla_get_be32(cda[CTA_NAT_SEQ_CORRECTION_POS]));
1039
1040 if (!cda[CTA_NAT_SEQ_OFFSET_BEFORE])
1041 return -EINVAL;
1042
1043 natseq->offset_before =
1044 ntohl(nla_get_be32(cda[CTA_NAT_SEQ_OFFSET_BEFORE]));
1045
1046 if (!cda[CTA_NAT_SEQ_OFFSET_AFTER])
1047 return -EINVAL;
1048
1049 natseq->offset_after =
1050 ntohl(nla_get_be32(cda[CTA_NAT_SEQ_OFFSET_AFTER]));
1051
1052 return 0;
1053}
1054
1055static int
1056ctnetlink_change_nat_seq_adj(struct nf_conn *ct, struct nlattr *cda[])
1057{
1058 int ret = 0;
1059 struct nf_conn_nat *nat = nfct_nat(ct);
1060
1061 if (!nat)
1062 return 0;
1063
1064 if (cda[CTA_NAT_SEQ_ADJ_ORIG]) {
1065 ret = change_nat_seq_adj(&nat->seq[IP_CT_DIR_ORIGINAL],
1066 cda[CTA_NAT_SEQ_ADJ_ORIG]);
1067 if (ret < 0)
1068 return ret;
1069
1070 ct->status |= IPS_SEQ_ADJUST;
1071 }
1072
1073 if (cda[CTA_NAT_SEQ_ADJ_REPLY]) {
1074 ret = change_nat_seq_adj(&nat->seq[IP_CT_DIR_REPLY],
1075 cda[CTA_NAT_SEQ_ADJ_REPLY]);
1076 if (ret < 0)
1077 return ret;
1078
1079 ct->status |= IPS_SEQ_ADJUST;
1080 }
1081
1082 return 0;
1083}
1084#endif
1085
938static int 1086static int
939ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[]) 1087ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[])
940{ 1088{
@@ -966,7 +1114,15 @@ ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[])
966 1114
967#if defined(CONFIG_NF_CONNTRACK_MARK) 1115#if defined(CONFIG_NF_CONNTRACK_MARK)
968 if (cda[CTA_MARK]) 1116 if (cda[CTA_MARK])
969 ct->mark = ntohl(*(__be32 *)nla_data(cda[CTA_MARK])); 1117 ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
1118#endif
1119
1120#ifdef CONFIG_NF_NAT_NEEDED
1121 if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) {
1122 err = ctnetlink_change_nat_seq_adj(ct, cda);
1123 if (err < 0)
1124 return err;
1125 }
970#endif 1126#endif
971 1127
972 return 0; 1128 return 0;
@@ -989,7 +1145,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
989 1145
990 if (!cda[CTA_TIMEOUT]) 1146 if (!cda[CTA_TIMEOUT])
991 goto err; 1147 goto err;
992 ct->timeout.expires = ntohl(*(__be32 *)nla_data(cda[CTA_TIMEOUT])); 1148 ct->timeout.expires = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
993 1149
994 ct->timeout.expires = jiffies + ct->timeout.expires * HZ; 1150 ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
995 ct->status |= IPS_CONFIRMED; 1151 ct->status |= IPS_CONFIRMED;
@@ -1008,7 +1164,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
1008 1164
1009#if defined(CONFIG_NF_CONNTRACK_MARK) 1165#if defined(CONFIG_NF_CONNTRACK_MARK)
1010 if (cda[CTA_MARK]) 1166 if (cda[CTA_MARK])
1011 ct->mark = ntohl(*(__be32 *)nla_data(cda[CTA_MARK])); 1167 ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
1012#endif 1168#endif
1013 1169
1014 helper = nf_ct_helper_find_get(rtuple); 1170 helper = nf_ct_helper_find_get(rtuple);
@@ -1193,13 +1349,15 @@ nla_put_failure:
1193 return -1; 1349 return -1;
1194} 1350}
1195 1351
1196static inline int 1352static int
1197ctnetlink_exp_dump_expect(struct sk_buff *skb, 1353ctnetlink_exp_dump_expect(struct sk_buff *skb,
1198 const struct nf_conntrack_expect *exp) 1354 const struct nf_conntrack_expect *exp)
1199{ 1355{
1200 struct nf_conn *master = exp->master; 1356 struct nf_conn *master = exp->master;
1201 __be32 timeout = htonl((exp->timeout.expires - jiffies) / HZ); 1357 long timeout = (exp->timeout.expires - jiffies) / HZ;
1202 __be32 id = htonl((unsigned long)exp); 1358
1359 if (timeout < 0)
1360 timeout = 0;
1203 1361
1204 if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0) 1362 if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0)
1205 goto nla_put_failure; 1363 goto nla_put_failure;
@@ -1210,8 +1368,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
1210 CTA_EXPECT_MASTER) < 0) 1368 CTA_EXPECT_MASTER) < 0)
1211 goto nla_put_failure; 1369 goto nla_put_failure;
1212 1370
1213 NLA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout); 1371 NLA_PUT_BE32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout));
1214 NLA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id); 1372 NLA_PUT_BE32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp));
1215 1373
1216 return 0; 1374 return 0;
1217 1375
@@ -1384,7 +1542,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
1384 return -ENOENT; 1542 return -ENOENT;
1385 1543
1386 if (cda[CTA_EXPECT_ID]) { 1544 if (cda[CTA_EXPECT_ID]) {
1387 __be32 id = *(__be32 *)nla_data(cda[CTA_EXPECT_ID]); 1545 __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
1388 if (ntohl(id) != (u32)(unsigned long)exp) { 1546 if (ntohl(id) != (u32)(unsigned long)exp) {
1389 nf_ct_expect_put(exp); 1547 nf_ct_expect_put(exp);
1390 return -ENOENT; 1548 return -ENOENT;
@@ -1438,7 +1596,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
1438 return -ENOENT; 1596 return -ENOENT;
1439 1597
1440 if (cda[CTA_EXPECT_ID]) { 1598 if (cda[CTA_EXPECT_ID]) {
1441 __be32 id = *(__be32 *)nla_data(cda[CTA_EXPECT_ID]); 1599 __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
1442 if (ntohl(id) != (u32)(unsigned long)exp) { 1600 if (ntohl(id) != (u32)(unsigned long)exp) {
1443 nf_ct_expect_put(exp); 1601 nf_ct_expect_put(exp);
1444 return -ENOENT; 1602 return -ENOENT;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 6d947068c58f..8595b5946acf 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -36,11 +36,11 @@ static DEFINE_MUTEX(nf_ct_proto_mutex);
36 36
37#ifdef CONFIG_SYSCTL 37#ifdef CONFIG_SYSCTL
38static int 38static int
39nf_ct_register_sysctl(struct ctl_table_header **header, struct ctl_table *path, 39nf_ct_register_sysctl(struct ctl_table_header **header, struct ctl_path *path,
40 struct ctl_table *table, unsigned int *users) 40 struct ctl_table *table, unsigned int *users)
41{ 41{
42 if (*header == NULL) { 42 if (*header == NULL) {
43 *header = nf_register_sysctl_table(path, table); 43 *header = register_sysctl_paths(path, table);
44 if (*header == NULL) 44 if (*header == NULL)
45 return -ENOMEM; 45 return -ENOMEM;
46 } 46 }
@@ -55,7 +55,8 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header,
55{ 55{
56 if (users != NULL && --*users > 0) 56 if (users != NULL && --*users > 0)
57 return; 57 return;
58 nf_unregister_sysctl_table(*header, table); 58
59 unregister_sysctl_table(*header);
59 *header = NULL; 60 *header = NULL;
60} 61}
61#endif 62#endif
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 13f819179642..22c5dcb6306a 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -40,13 +40,6 @@ static int generic_print_tuple(struct seq_file *s,
40 return 0; 40 return 0;
41} 41}
42 42
43/* Print out the private part of the conntrack. */
44static int generic_print_conntrack(struct seq_file *s,
45 const struct nf_conn *state)
46{
47 return 0;
48}
49
50/* Returns verdict for packet, or -1 for invalid. */ 43/* Returns verdict for packet, or -1 for invalid. */
51static int packet(struct nf_conn *conntrack, 44static int packet(struct nf_conn *conntrack,
52 const struct sk_buff *skb, 45 const struct sk_buff *skb,
@@ -104,7 +97,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly =
104 .pkt_to_tuple = generic_pkt_to_tuple, 97 .pkt_to_tuple = generic_pkt_to_tuple,
105 .invert_tuple = generic_invert_tuple, 98 .invert_tuple = generic_invert_tuple,
106 .print_tuple = generic_print_tuple, 99 .print_tuple = generic_print_tuple,
107 .print_conntrack = generic_print_conntrack,
108 .packet = packet, 100 .packet = packet,
109 .new = new, 101 .new = new,
110#ifdef CONFIG_SYSCTL 102#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index cb0467510592..21d29e782baf 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -49,24 +49,15 @@ static const char *sctp_conntrack_names[] = {
49#define HOURS * 60 MINS 49#define HOURS * 60 MINS
50#define DAYS * 24 HOURS 50#define DAYS * 24 HOURS
51 51
52static unsigned int nf_ct_sctp_timeout_closed __read_mostly = 10 SECS; 52static unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] __read_mostly = {
53static unsigned int nf_ct_sctp_timeout_cookie_wait __read_mostly = 3 SECS; 53 [SCTP_CONNTRACK_CLOSED] = 10 SECS,
54static unsigned int nf_ct_sctp_timeout_cookie_echoed __read_mostly = 3 SECS; 54 [SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS,
55static unsigned int nf_ct_sctp_timeout_established __read_mostly = 5 DAYS; 55 [SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS,
56static unsigned int nf_ct_sctp_timeout_shutdown_sent __read_mostly = 300 SECS / 1000; 56 [SCTP_CONNTRACK_ESTABLISHED] = 5 DAYS,
57static unsigned int nf_ct_sctp_timeout_shutdown_recd __read_mostly = 300 SECS / 1000; 57 [SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000,
58static unsigned int nf_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS; 58 [SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000,
59 59 [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS,
60static unsigned int * sctp_timeouts[] 60};
61= { NULL, /* SCTP_CONNTRACK_NONE */
62 &nf_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */
63 &nf_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */
64 &nf_ct_sctp_timeout_cookie_echoed, /* SCTP_CONNTRACK_COOKIE_ECHOED */
65 &nf_ct_sctp_timeout_established, /* SCTP_CONNTRACK_ESTABLISHED */
66 &nf_ct_sctp_timeout_shutdown_sent, /* SCTP_CONNTRACK_SHUTDOWN_SENT */
67 &nf_ct_sctp_timeout_shutdown_recd, /* SCTP_CONNTRACK_SHUTDOWN_RECD */
68 &nf_ct_sctp_timeout_shutdown_ack_sent /* SCTP_CONNTRACK_SHUTDOWN_ACK_SENT */
69 };
70 61
71#define sNO SCTP_CONNTRACK_NONE 62#define sNO SCTP_CONNTRACK_NONE
72#define sCL SCTP_CONNTRACK_CLOSED 63#define sCL SCTP_CONNTRACK_CLOSED
@@ -110,7 +101,7 @@ cookie echoed to closed.
110*/ 101*/
111 102
112/* SCTP conntrack state transitions */ 103/* SCTP conntrack state transitions */
113static enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = { 104static const u8 sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
114 { 105 {
115/* ORIGINAL */ 106/* ORIGINAL */
116/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */ 107/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
@@ -173,29 +164,28 @@ static int sctp_print_tuple(struct seq_file *s,
173} 164}
174 165
175/* Print out the private part of the conntrack. */ 166/* Print out the private part of the conntrack. */
176static int sctp_print_conntrack(struct seq_file *s, 167static int sctp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
177 const struct nf_conn *conntrack)
178{ 168{
179 enum sctp_conntrack state; 169 enum sctp_conntrack state;
180 170
181 read_lock_bh(&sctp_lock); 171 read_lock_bh(&sctp_lock);
182 state = conntrack->proto.sctp.state; 172 state = ct->proto.sctp.state;
183 read_unlock_bh(&sctp_lock); 173 read_unlock_bh(&sctp_lock);
184 174
185 return seq_printf(s, "%s ", sctp_conntrack_names[state]); 175 return seq_printf(s, "%s ", sctp_conntrack_names[state]);
186} 176}
187 177
188#define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \ 178#define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \
189for (offset = dataoff + sizeof(sctp_sctphdr_t), count = 0; \ 179for ((offset) = (dataoff) + sizeof(sctp_sctphdr_t), (count) = 0; \
190 offset < skb->len && \ 180 (offset) < (skb)->len && \
191 (sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch)); \ 181 ((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch))); \
192 offset += (ntohs(sch->length) + 3) & ~3, count++) 182 (offset) += (ntohs((sch)->length) + 3) & ~3, (count)++)
193 183
194/* Some validity checks to make sure the chunks are fine */ 184/* Some validity checks to make sure the chunks are fine */
195static int do_basic_checks(struct nf_conn *conntrack, 185static int do_basic_checks(struct nf_conn *ct,
196 const struct sk_buff *skb, 186 const struct sk_buff *skb,
197 unsigned int dataoff, 187 unsigned int dataoff,
198 char *map) 188 unsigned long *map)
199{ 189{
200 u_int32_t offset, count; 190 u_int32_t offset, count;
201 sctp_chunkhdr_t _sch, *sch; 191 sctp_chunkhdr_t _sch, *sch;
@@ -206,76 +196,83 @@ static int do_basic_checks(struct nf_conn *conntrack,
206 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { 196 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
207 pr_debug("Chunk Num: %d Type: %d\n", count, sch->type); 197 pr_debug("Chunk Num: %d Type: %d\n", count, sch->type);
208 198
209 if (sch->type == SCTP_CID_INIT 199 if (sch->type == SCTP_CID_INIT ||
210 || sch->type == SCTP_CID_INIT_ACK 200 sch->type == SCTP_CID_INIT_ACK ||
211 || sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { 201 sch->type == SCTP_CID_SHUTDOWN_COMPLETE)
212 flag = 1; 202 flag = 1;
213 }
214 203
215 /* 204 /*
216 * Cookie Ack/Echo chunks not the first OR 205 * Cookie Ack/Echo chunks not the first OR
217 * Init / Init Ack / Shutdown compl chunks not the only chunks 206 * Init / Init Ack / Shutdown compl chunks not the only chunks
218 * OR zero-length. 207 * OR zero-length.
219 */ 208 */
220 if (((sch->type == SCTP_CID_COOKIE_ACK 209 if (((sch->type == SCTP_CID_COOKIE_ACK ||
221 || sch->type == SCTP_CID_COOKIE_ECHO 210 sch->type == SCTP_CID_COOKIE_ECHO ||
222 || flag) 211 flag) &&
223 && count !=0) || !sch->length) { 212 count != 0) || !sch->length) {
224 pr_debug("Basic checks failed\n"); 213 pr_debug("Basic checks failed\n");
225 return 1; 214 return 1;
226 } 215 }
227 216
228 if (map) { 217 if (map)
229 set_bit(sch->type, (void *)map); 218 set_bit(sch->type, map);
230 }
231 } 219 }
232 220
233 pr_debug("Basic checks passed\n"); 221 pr_debug("Basic checks passed\n");
234 return count == 0; 222 return count == 0;
235} 223}
236 224
237static int new_state(enum ip_conntrack_dir dir, 225static int sctp_new_state(enum ip_conntrack_dir dir,
238 enum sctp_conntrack cur_state, 226 enum sctp_conntrack cur_state,
239 int chunk_type) 227 int chunk_type)
240{ 228{
241 int i; 229 int i;
242 230
243 pr_debug("Chunk type: %d\n", chunk_type); 231 pr_debug("Chunk type: %d\n", chunk_type);
244 232
245 switch (chunk_type) { 233 switch (chunk_type) {
246 case SCTP_CID_INIT: 234 case SCTP_CID_INIT:
247 pr_debug("SCTP_CID_INIT\n"); 235 pr_debug("SCTP_CID_INIT\n");
248 i = 0; break; 236 i = 0;
249 case SCTP_CID_INIT_ACK: 237 break;
250 pr_debug("SCTP_CID_INIT_ACK\n"); 238 case SCTP_CID_INIT_ACK:
251 i = 1; break; 239 pr_debug("SCTP_CID_INIT_ACK\n");
252 case SCTP_CID_ABORT: 240 i = 1;
253 pr_debug("SCTP_CID_ABORT\n"); 241 break;
254 i = 2; break; 242 case SCTP_CID_ABORT:
255 case SCTP_CID_SHUTDOWN: 243 pr_debug("SCTP_CID_ABORT\n");
256 pr_debug("SCTP_CID_SHUTDOWN\n"); 244 i = 2;
257 i = 3; break; 245 break;
258 case SCTP_CID_SHUTDOWN_ACK: 246 case SCTP_CID_SHUTDOWN:
259 pr_debug("SCTP_CID_SHUTDOWN_ACK\n"); 247 pr_debug("SCTP_CID_SHUTDOWN\n");
260 i = 4; break; 248 i = 3;
261 case SCTP_CID_ERROR: 249 break;
262 pr_debug("SCTP_CID_ERROR\n"); 250 case SCTP_CID_SHUTDOWN_ACK:
263 i = 5; break; 251 pr_debug("SCTP_CID_SHUTDOWN_ACK\n");
264 case SCTP_CID_COOKIE_ECHO: 252 i = 4;
265 pr_debug("SCTP_CID_COOKIE_ECHO\n"); 253 break;
266 i = 6; break; 254 case SCTP_CID_ERROR:
267 case SCTP_CID_COOKIE_ACK: 255 pr_debug("SCTP_CID_ERROR\n");
268 pr_debug("SCTP_CID_COOKIE_ACK\n"); 256 i = 5;
269 i = 7; break; 257 break;
270 case SCTP_CID_SHUTDOWN_COMPLETE: 258 case SCTP_CID_COOKIE_ECHO:
271 pr_debug("SCTP_CID_SHUTDOWN_COMPLETE\n"); 259 pr_debug("SCTP_CID_COOKIE_ECHO\n");
272 i = 8; break; 260 i = 6;
273 default: 261 break;
274 /* Other chunks like DATA, SACK, HEARTBEAT and 262 case SCTP_CID_COOKIE_ACK:
275 its ACK do not cause a change in state */ 263 pr_debug("SCTP_CID_COOKIE_ACK\n");
276 pr_debug("Unknown chunk type, Will stay in %s\n", 264 i = 7;
277 sctp_conntrack_names[cur_state]); 265 break;
278 return cur_state; 266 case SCTP_CID_SHUTDOWN_COMPLETE:
267 pr_debug("SCTP_CID_SHUTDOWN_COMPLETE\n");
268 i = 8;
269 break;
270 default:
271 /* Other chunks like DATA, SACK, HEARTBEAT and
272 its ACK do not cause a change in state */
273 pr_debug("Unknown chunk type, Will stay in %s\n",
274 sctp_conntrack_names[cur_state]);
275 return cur_state;
279 } 276 }
280 277
281 pr_debug("dir: %d cur_state: %s chunk_type: %d new_state: %s\n", 278 pr_debug("dir: %d cur_state: %s chunk_type: %d new_state: %s\n",
@@ -285,154 +282,145 @@ static int new_state(enum ip_conntrack_dir dir,
285 return sctp_conntracks[dir][i][cur_state]; 282 return sctp_conntracks[dir][i][cur_state];
286} 283}
287 284
288/* Returns verdict for packet, or -1 for invalid. */ 285/* Returns verdict for packet, or -NF_ACCEPT for invalid. */
289static int sctp_packet(struct nf_conn *conntrack, 286static int sctp_packet(struct nf_conn *ct,
290 const struct sk_buff *skb, 287 const struct sk_buff *skb,
291 unsigned int dataoff, 288 unsigned int dataoff,
292 enum ip_conntrack_info ctinfo, 289 enum ip_conntrack_info ctinfo,
293 int pf, 290 int pf,
294 unsigned int hooknum) 291 unsigned int hooknum)
295{ 292{
296 enum sctp_conntrack newconntrack, oldsctpstate; 293 enum sctp_conntrack new_state, old_state;
294 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
297 sctp_sctphdr_t _sctph, *sh; 295 sctp_sctphdr_t _sctph, *sh;
298 sctp_chunkhdr_t _sch, *sch; 296 sctp_chunkhdr_t _sch, *sch;
299 u_int32_t offset, count; 297 u_int32_t offset, count;
300 char map[256 / sizeof (char)] = {0}; 298 unsigned long map[256 / sizeof(unsigned long)] = { 0 };
301 299
302 sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); 300 sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
303 if (sh == NULL) 301 if (sh == NULL)
304 return -1; 302 goto out;
305 303
306 if (do_basic_checks(conntrack, skb, dataoff, map) != 0) 304 if (do_basic_checks(ct, skb, dataoff, map) != 0)
307 return -1; 305 goto out;
308 306
309 /* Check the verification tag (Sec 8.5) */ 307 /* Check the verification tag (Sec 8.5) */
310 if (!test_bit(SCTP_CID_INIT, (void *)map) 308 if (!test_bit(SCTP_CID_INIT, map) &&
311 && !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map) 309 !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) &&
312 && !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map) 310 !test_bit(SCTP_CID_COOKIE_ECHO, map) &&
313 && !test_bit(SCTP_CID_ABORT, (void *)map) 311 !test_bit(SCTP_CID_ABORT, map) &&
314 && !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map) 312 !test_bit(SCTP_CID_SHUTDOWN_ACK, map) &&
315 && (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) { 313 sh->vtag != ct->proto.sctp.vtag[dir]) {
316 pr_debug("Verification tag check failed\n"); 314 pr_debug("Verification tag check failed\n");
317 return -1; 315 goto out;
318 } 316 }
319 317
320 oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX; 318 old_state = new_state = SCTP_CONNTRACK_MAX;
319 write_lock_bh(&sctp_lock);
321 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { 320 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
322 write_lock_bh(&sctp_lock);
323
324 /* Special cases of Verification tag check (Sec 8.5.1) */ 321 /* Special cases of Verification tag check (Sec 8.5.1) */
325 if (sch->type == SCTP_CID_INIT) { 322 if (sch->type == SCTP_CID_INIT) {
326 /* Sec 8.5.1 (A) */ 323 /* Sec 8.5.1 (A) */
327 if (sh->vtag != 0) { 324 if (sh->vtag != 0)
328 write_unlock_bh(&sctp_lock); 325 goto out_unlock;
329 return -1;
330 }
331 } else if (sch->type == SCTP_CID_ABORT) { 326 } else if (sch->type == SCTP_CID_ABORT) {
332 /* Sec 8.5.1 (B) */ 327 /* Sec 8.5.1 (B) */
333 if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) 328 if (sh->vtag != ct->proto.sctp.vtag[dir] &&
334 && !(sh->vtag == conntrack->proto.sctp.vtag 329 sh->vtag != ct->proto.sctp.vtag[!dir])
335 [1 - CTINFO2DIR(ctinfo)])) { 330 goto out_unlock;
336 write_unlock_bh(&sctp_lock);
337 return -1;
338 }
339 } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { 331 } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
340 /* Sec 8.5.1 (C) */ 332 /* Sec 8.5.1 (C) */
341 if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) 333 if (sh->vtag != ct->proto.sctp.vtag[dir] &&
342 && !(sh->vtag == conntrack->proto.sctp.vtag 334 sh->vtag != ct->proto.sctp.vtag[!dir] &&
343 [1 - CTINFO2DIR(ctinfo)] 335 sch->flags & SCTP_CHUNK_FLAG_T)
344 && (sch->flags & 1))) { 336 goto out_unlock;
345 write_unlock_bh(&sctp_lock);
346 return -1;
347 }
348 } else if (sch->type == SCTP_CID_COOKIE_ECHO) { 337 } else if (sch->type == SCTP_CID_COOKIE_ECHO) {
349 /* Sec 8.5.1 (D) */ 338 /* Sec 8.5.1 (D) */
350 if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) { 339 if (sh->vtag != ct->proto.sctp.vtag[dir])
351 write_unlock_bh(&sctp_lock); 340 goto out_unlock;
352 return -1;
353 }
354 } 341 }
355 342
356 oldsctpstate = conntrack->proto.sctp.state; 343 old_state = ct->proto.sctp.state;
357 newconntrack = new_state(CTINFO2DIR(ctinfo), oldsctpstate, sch->type); 344 new_state = sctp_new_state(dir, old_state, sch->type);
358 345
359 /* Invalid */ 346 /* Invalid */
360 if (newconntrack == SCTP_CONNTRACK_MAX) { 347 if (new_state == SCTP_CONNTRACK_MAX) {
361 pr_debug("nf_conntrack_sctp: Invalid dir=%i ctype=%u " 348 pr_debug("nf_conntrack_sctp: Invalid dir=%i ctype=%u "
362 "conntrack=%u\n", 349 "conntrack=%u\n",
363 CTINFO2DIR(ctinfo), sch->type, oldsctpstate); 350 dir, sch->type, old_state);
364 write_unlock_bh(&sctp_lock); 351 goto out_unlock;
365 return -1;
366 } 352 }
367 353
368 /* If it is an INIT or an INIT ACK note down the vtag */ 354 /* If it is an INIT or an INIT ACK note down the vtag */
369 if (sch->type == SCTP_CID_INIT 355 if (sch->type == SCTP_CID_INIT ||
370 || sch->type == SCTP_CID_INIT_ACK) { 356 sch->type == SCTP_CID_INIT_ACK) {
371 sctp_inithdr_t _inithdr, *ih; 357 sctp_inithdr_t _inithdr, *ih;
372 358
373 ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t), 359 ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
374 sizeof(_inithdr), &_inithdr); 360 sizeof(_inithdr), &_inithdr);
375 if (ih == NULL) { 361 if (ih == NULL)
376 write_unlock_bh(&sctp_lock); 362 goto out_unlock;
377 return -1;
378 }
379 pr_debug("Setting vtag %x for dir %d\n", 363 pr_debug("Setting vtag %x for dir %d\n",
380 ih->init_tag, !CTINFO2DIR(ctinfo)); 364 ih->init_tag, !dir);
381 conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag; 365 ct->proto.sctp.vtag[!dir] = ih->init_tag;
382 } 366 }
383 367
384 conntrack->proto.sctp.state = newconntrack; 368 ct->proto.sctp.state = new_state;
385 if (oldsctpstate != newconntrack) 369 if (old_state != new_state)
386 nf_conntrack_event_cache(IPCT_PROTOINFO, skb); 370 nf_conntrack_event_cache(IPCT_PROTOINFO, skb);
387 write_unlock_bh(&sctp_lock);
388 } 371 }
372 write_unlock_bh(&sctp_lock);
389 373
390 nf_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]); 374 nf_ct_refresh_acct(ct, ctinfo, skb, sctp_timeouts[new_state]);
391 375
392 if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED 376 if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED &&
393 && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY 377 dir == IP_CT_DIR_REPLY &&
394 && newconntrack == SCTP_CONNTRACK_ESTABLISHED) { 378 new_state == SCTP_CONNTRACK_ESTABLISHED) {
395 pr_debug("Setting assured bit\n"); 379 pr_debug("Setting assured bit\n");
396 set_bit(IPS_ASSURED_BIT, &conntrack->status); 380 set_bit(IPS_ASSURED_BIT, &ct->status);
397 nf_conntrack_event_cache(IPCT_STATUS, skb); 381 nf_conntrack_event_cache(IPCT_STATUS, skb);
398 } 382 }
399 383
400 return NF_ACCEPT; 384 return NF_ACCEPT;
385
386out_unlock:
387 write_unlock_bh(&sctp_lock);
388out:
389 return -NF_ACCEPT;
401} 390}
402 391
403/* Called when a new connection for this protocol found. */ 392/* Called when a new connection for this protocol found. */
404static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb, 393static int sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
405 unsigned int dataoff) 394 unsigned int dataoff)
406{ 395{
407 enum sctp_conntrack newconntrack; 396 enum sctp_conntrack new_state;
408 sctp_sctphdr_t _sctph, *sh; 397 sctp_sctphdr_t _sctph, *sh;
409 sctp_chunkhdr_t _sch, *sch; 398 sctp_chunkhdr_t _sch, *sch;
410 u_int32_t offset, count; 399 u_int32_t offset, count;
411 char map[256 / sizeof (char)] = {0}; 400 unsigned long map[256 / sizeof(unsigned long)] = { 0 };
412 401
413 sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); 402 sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
414 if (sh == NULL) 403 if (sh == NULL)
415 return 0; 404 return 0;
416 405
417 if (do_basic_checks(conntrack, skb, dataoff, map) != 0) 406 if (do_basic_checks(ct, skb, dataoff, map) != 0)
418 return 0; 407 return 0;
419 408
420 /* If an OOTB packet has any of these chunks discard (Sec 8.4) */ 409 /* If an OOTB packet has any of these chunks discard (Sec 8.4) */
421 if ((test_bit (SCTP_CID_ABORT, (void *)map)) 410 if (test_bit(SCTP_CID_ABORT, map) ||
422 || (test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map)) 411 test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) ||
423 || (test_bit (SCTP_CID_COOKIE_ACK, (void *)map))) { 412 test_bit(SCTP_CID_COOKIE_ACK, map))
424 return 0; 413 return 0;
425 }
426 414
427 newconntrack = SCTP_CONNTRACK_MAX; 415 new_state = SCTP_CONNTRACK_MAX;
428 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { 416 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
429 /* Don't need lock here: this conntrack not in circulation yet */ 417 /* Don't need lock here: this conntrack not in circulation yet */
430 newconntrack = new_state(IP_CT_DIR_ORIGINAL, 418 new_state = sctp_new_state(IP_CT_DIR_ORIGINAL,
431 SCTP_CONNTRACK_NONE, sch->type); 419 SCTP_CONNTRACK_NONE, sch->type);
432 420
433 /* Invalid: delete conntrack */ 421 /* Invalid: delete conntrack */
434 if (newconntrack == SCTP_CONNTRACK_NONE || 422 if (new_state == SCTP_CONNTRACK_NONE ||
435 newconntrack == SCTP_CONNTRACK_MAX) { 423 new_state == SCTP_CONNTRACK_MAX) {
436 pr_debug("nf_conntrack_sctp: invalid new deleting.\n"); 424 pr_debug("nf_conntrack_sctp: invalid new deleting.\n");
437 return 0; 425 return 0;
438 } 426 }
@@ -450,7 +438,7 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb,
450 pr_debug("Setting vtag %x for new conn\n", 438 pr_debug("Setting vtag %x for new conn\n",
451 ih->init_tag); 439 ih->init_tag);
452 440
453 conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = 441 ct->proto.sctp.vtag[IP_CT_DIR_REPLY] =
454 ih->init_tag; 442 ih->init_tag;
455 } else { 443 } else {
456 /* Sec 8.5.1 (A) */ 444 /* Sec 8.5.1 (A) */
@@ -462,10 +450,10 @@ static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb,
462 else { 450 else {
463 pr_debug("Setting vtag %x for new conn OOTB\n", 451 pr_debug("Setting vtag %x for new conn OOTB\n",
464 sh->vtag); 452 sh->vtag);
465 conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag; 453 ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
466 } 454 }
467 455
468 conntrack->proto.sctp.state = newconntrack; 456 ct->proto.sctp.state = new_state;
469 } 457 }
470 458
471 return 1; 459 return 1;
@@ -477,49 +465,49 @@ static struct ctl_table_header *sctp_sysctl_header;
477static struct ctl_table sctp_sysctl_table[] = { 465static struct ctl_table sctp_sysctl_table[] = {
478 { 466 {
479 .procname = "nf_conntrack_sctp_timeout_closed", 467 .procname = "nf_conntrack_sctp_timeout_closed",
480 .data = &nf_ct_sctp_timeout_closed, 468 .data = &sctp_timeouts[SCTP_CONNTRACK_CLOSED],
481 .maxlen = sizeof(unsigned int), 469 .maxlen = sizeof(unsigned int),
482 .mode = 0644, 470 .mode = 0644,
483 .proc_handler = &proc_dointvec_jiffies, 471 .proc_handler = &proc_dointvec_jiffies,
484 }, 472 },
485 { 473 {
486 .procname = "nf_conntrack_sctp_timeout_cookie_wait", 474 .procname = "nf_conntrack_sctp_timeout_cookie_wait",
487 .data = &nf_ct_sctp_timeout_cookie_wait, 475 .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_WAIT],
488 .maxlen = sizeof(unsigned int), 476 .maxlen = sizeof(unsigned int),
489 .mode = 0644, 477 .mode = 0644,
490 .proc_handler = &proc_dointvec_jiffies, 478 .proc_handler = &proc_dointvec_jiffies,
491 }, 479 },
492 { 480 {
493 .procname = "nf_conntrack_sctp_timeout_cookie_echoed", 481 .procname = "nf_conntrack_sctp_timeout_cookie_echoed",
494 .data = &nf_ct_sctp_timeout_cookie_echoed, 482 .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_ECHOED],
495 .maxlen = sizeof(unsigned int), 483 .maxlen = sizeof(unsigned int),
496 .mode = 0644, 484 .mode = 0644,
497 .proc_handler = &proc_dointvec_jiffies, 485 .proc_handler = &proc_dointvec_jiffies,
498 }, 486 },
499 { 487 {
500 .procname = "nf_conntrack_sctp_timeout_established", 488 .procname = "nf_conntrack_sctp_timeout_established",
501 .data = &nf_ct_sctp_timeout_established, 489 .data = &sctp_timeouts[SCTP_CONNTRACK_ESTABLISHED],
502 .maxlen = sizeof(unsigned int), 490 .maxlen = sizeof(unsigned int),
503 .mode = 0644, 491 .mode = 0644,
504 .proc_handler = &proc_dointvec_jiffies, 492 .proc_handler = &proc_dointvec_jiffies,
505 }, 493 },
506 { 494 {
507 .procname = "nf_conntrack_sctp_timeout_shutdown_sent", 495 .procname = "nf_conntrack_sctp_timeout_shutdown_sent",
508 .data = &nf_ct_sctp_timeout_shutdown_sent, 496 .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT],
509 .maxlen = sizeof(unsigned int), 497 .maxlen = sizeof(unsigned int),
510 .mode = 0644, 498 .mode = 0644,
511 .proc_handler = &proc_dointvec_jiffies, 499 .proc_handler = &proc_dointvec_jiffies,
512 }, 500 },
513 { 501 {
514 .procname = "nf_conntrack_sctp_timeout_shutdown_recd", 502 .procname = "nf_conntrack_sctp_timeout_shutdown_recd",
515 .data = &nf_ct_sctp_timeout_shutdown_recd, 503 .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD],
516 .maxlen = sizeof(unsigned int), 504 .maxlen = sizeof(unsigned int),
517 .mode = 0644, 505 .mode = 0644,
518 .proc_handler = &proc_dointvec_jiffies, 506 .proc_handler = &proc_dointvec_jiffies,
519 }, 507 },
520 { 508 {
521 .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent", 509 .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent",
522 .data = &nf_ct_sctp_timeout_shutdown_ack_sent, 510 .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT],
523 .maxlen = sizeof(unsigned int), 511 .maxlen = sizeof(unsigned int),
524 .mode = 0644, 512 .mode = 0644,
525 .proc_handler = &proc_dointvec_jiffies, 513 .proc_handler = &proc_dointvec_jiffies,
@@ -533,49 +521,49 @@ static struct ctl_table sctp_sysctl_table[] = {
533static struct ctl_table sctp_compat_sysctl_table[] = { 521static struct ctl_table sctp_compat_sysctl_table[] = {
534 { 522 {
535 .procname = "ip_conntrack_sctp_timeout_closed", 523 .procname = "ip_conntrack_sctp_timeout_closed",
536 .data = &nf_ct_sctp_timeout_closed, 524 .data = &sctp_timeouts[SCTP_CONNTRACK_CLOSED],
537 .maxlen = sizeof(unsigned int), 525 .maxlen = sizeof(unsigned int),
538 .mode = 0644, 526 .mode = 0644,
539 .proc_handler = &proc_dointvec_jiffies, 527 .proc_handler = &proc_dointvec_jiffies,
540 }, 528 },
541 { 529 {
542 .procname = "ip_conntrack_sctp_timeout_cookie_wait", 530 .procname = "ip_conntrack_sctp_timeout_cookie_wait",
543 .data = &nf_ct_sctp_timeout_cookie_wait, 531 .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_WAIT],
544 .maxlen = sizeof(unsigned int), 532 .maxlen = sizeof(unsigned int),
545 .mode = 0644, 533 .mode = 0644,
546 .proc_handler = &proc_dointvec_jiffies, 534 .proc_handler = &proc_dointvec_jiffies,
547 }, 535 },
548 { 536 {
549 .procname = "ip_conntrack_sctp_timeout_cookie_echoed", 537 .procname = "ip_conntrack_sctp_timeout_cookie_echoed",
550 .data = &nf_ct_sctp_timeout_cookie_echoed, 538 .data = &sctp_timeouts[SCTP_CONNTRACK_COOKIE_ECHOED],
551 .maxlen = sizeof(unsigned int), 539 .maxlen = sizeof(unsigned int),
552 .mode = 0644, 540 .mode = 0644,
553 .proc_handler = &proc_dointvec_jiffies, 541 .proc_handler = &proc_dointvec_jiffies,
554 }, 542 },
555 { 543 {
556 .procname = "ip_conntrack_sctp_timeout_established", 544 .procname = "ip_conntrack_sctp_timeout_established",
557 .data = &nf_ct_sctp_timeout_established, 545 .data = &sctp_timeouts[SCTP_CONNTRACK_ESTABLISHED],
558 .maxlen = sizeof(unsigned int), 546 .maxlen = sizeof(unsigned int),
559 .mode = 0644, 547 .mode = 0644,
560 .proc_handler = &proc_dointvec_jiffies, 548 .proc_handler = &proc_dointvec_jiffies,
561 }, 549 },
562 { 550 {
563 .procname = "ip_conntrack_sctp_timeout_shutdown_sent", 551 .procname = "ip_conntrack_sctp_timeout_shutdown_sent",
564 .data = &nf_ct_sctp_timeout_shutdown_sent, 552 .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT],
565 .maxlen = sizeof(unsigned int), 553 .maxlen = sizeof(unsigned int),
566 .mode = 0644, 554 .mode = 0644,
567 .proc_handler = &proc_dointvec_jiffies, 555 .proc_handler = &proc_dointvec_jiffies,
568 }, 556 },
569 { 557 {
570 .procname = "ip_conntrack_sctp_timeout_shutdown_recd", 558 .procname = "ip_conntrack_sctp_timeout_shutdown_recd",
571 .data = &nf_ct_sctp_timeout_shutdown_recd, 559 .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD],
572 .maxlen = sizeof(unsigned int), 560 .maxlen = sizeof(unsigned int),
573 .mode = 0644, 561 .mode = 0644,
574 .proc_handler = &proc_dointvec_jiffies, 562 .proc_handler = &proc_dointvec_jiffies,
575 }, 563 },
576 { 564 {
577 .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent", 565 .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent",
578 .data = &nf_ct_sctp_timeout_shutdown_ack_sent, 566 .data = &sctp_timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT],
579 .maxlen = sizeof(unsigned int), 567 .maxlen = sizeof(unsigned int),
580 .mode = 0644, 568 .mode = 0644,
581 .proc_handler = &proc_dointvec_jiffies, 569 .proc_handler = &proc_dointvec_jiffies,
@@ -598,6 +586,11 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
598 .packet = sctp_packet, 586 .packet = sctp_packet,
599 .new = sctp_new, 587 .new = sctp_new,
600 .me = THIS_MODULE, 588 .me = THIS_MODULE,
589#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
590 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
591 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
592 .nla_policy = nf_ct_port_nla_policy,
593#endif
601#ifdef CONFIG_SYSCTL 594#ifdef CONFIG_SYSCTL
602 .ctl_table_users = &sctp_sysctl_table_users, 595 .ctl_table_users = &sctp_sysctl_table_users,
603 .ctl_table_header = &sctp_sysctl_header, 596 .ctl_table_header = &sctp_sysctl_header,
@@ -619,6 +612,11 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
619 .packet = sctp_packet, 612 .packet = sctp_packet,
620 .new = sctp_new, 613 .new = sctp_new,
621 .me = THIS_MODULE, 614 .me = THIS_MODULE,
615#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
616 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
617 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
618 .nla_policy = nf_ct_port_nla_policy,
619#endif
622#ifdef CONFIG_SYSCTL 620#ifdef CONFIG_SYSCTL
623 .ctl_table_users = &sctp_sysctl_table_users, 621 .ctl_table_users = &sctp_sysctl_table_users,
624 .ctl_table_header = &sctp_sysctl_header, 622 .ctl_table_header = &sctp_sysctl_header,
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 7a3f64c1aca6..64c9b910419c 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -24,6 +24,7 @@
24#include <net/netfilter/nf_conntrack.h> 24#include <net/netfilter/nf_conntrack.h>
25#include <net/netfilter/nf_conntrack_l4proto.h> 25#include <net/netfilter/nf_conntrack_l4proto.h>
26#include <net/netfilter/nf_conntrack_ecache.h> 26#include <net/netfilter/nf_conntrack_ecache.h>
27#include <net/netfilter/nf_log.h>
27 28
28/* Protects conntrack->proto.tcp */ 29/* Protects conntrack->proto.tcp */
29static DEFINE_RWLOCK(tcp_lock); 30static DEFINE_RWLOCK(tcp_lock);
@@ -63,32 +64,21 @@ static const char *tcp_conntrack_names[] = {
63#define HOURS * 60 MINS 64#define HOURS * 60 MINS
64#define DAYS * 24 HOURS 65#define DAYS * 24 HOURS
65 66
66static unsigned int nf_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS;
67static unsigned int nf_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS;
68static unsigned int nf_ct_tcp_timeout_established __read_mostly = 5 DAYS;
69static unsigned int nf_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS;
70static unsigned int nf_ct_tcp_timeout_close_wait __read_mostly = 60 SECS;
71static unsigned int nf_ct_tcp_timeout_last_ack __read_mostly = 30 SECS;
72static unsigned int nf_ct_tcp_timeout_time_wait __read_mostly = 2 MINS;
73static unsigned int nf_ct_tcp_timeout_close __read_mostly = 10 SECS;
74
75/* RFC1122 says the R2 limit should be at least 100 seconds. 67/* RFC1122 says the R2 limit should be at least 100 seconds.
76 Linux uses 15 packets as limit, which corresponds 68 Linux uses 15 packets as limit, which corresponds
77 to ~13-30min depending on RTO. */ 69 to ~13-30min depending on RTO. */
78static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS; 70static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS;
79 71
80static unsigned int * tcp_timeouts[] = { 72static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = {
81 NULL, /* TCP_CONNTRACK_NONE */ 73 [TCP_CONNTRACK_SYN_SENT] = 2 MINS,
82 &nf_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */ 74 [TCP_CONNTRACK_SYN_RECV] = 60 SECS,
83 &nf_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */ 75 [TCP_CONNTRACK_ESTABLISHED] = 5 DAYS,
84 &nf_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */ 76 [TCP_CONNTRACK_FIN_WAIT] = 2 MINS,
85 &nf_ct_tcp_timeout_fin_wait, /* TCP_CONNTRACK_FIN_WAIT, */ 77 [TCP_CONNTRACK_CLOSE_WAIT] = 60 SECS,
86 &nf_ct_tcp_timeout_close_wait, /* TCP_CONNTRACK_CLOSE_WAIT, */ 78 [TCP_CONNTRACK_LAST_ACK] = 30 SECS,
87 &nf_ct_tcp_timeout_last_ack, /* TCP_CONNTRACK_LAST_ACK, */ 79 [TCP_CONNTRACK_TIME_WAIT] = 2 MINS,
88 &nf_ct_tcp_timeout_time_wait, /* TCP_CONNTRACK_TIME_WAIT, */ 80 [TCP_CONNTRACK_CLOSE] = 10 SECS,
89 &nf_ct_tcp_timeout_close, /* TCP_CONNTRACK_CLOSE, */ 81};
90 NULL, /* TCP_CONNTRACK_LISTEN */
91 };
92 82
93#define sNO TCP_CONNTRACK_NONE 83#define sNO TCP_CONNTRACK_NONE
94#define sSS TCP_CONNTRACK_SYN_SENT 84#define sSS TCP_CONNTRACK_SYN_SENT
@@ -148,7 +138,7 @@ enum tcp_bit_set {
148 * if they are invalid 138 * if they are invalid
149 * or we do not support the request (simultaneous open) 139 * or we do not support the request (simultaneous open)
150 */ 140 */
151static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { 141static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
152 { 142 {
153/* ORIGINAL */ 143/* ORIGINAL */
154/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ 144/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
@@ -783,9 +773,7 @@ static int tcp_error(struct sk_buff *skb,
783 * because the checksum is assumed to be correct. 773 * because the checksum is assumed to be correct.
784 */ 774 */
785 /* FIXME: Source route IP option packets --RR */ 775 /* FIXME: Source route IP option packets --RR */
786 if (nf_conntrack_checksum && 776 if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
787 ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) ||
788 (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING)) &&
789 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { 777 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
790 if (LOG_INVALID(IPPROTO_TCP)) 778 if (LOG_INVALID(IPPROTO_TCP))
791 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 779 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
@@ -942,8 +930,8 @@ static int tcp_packet(struct nf_conn *conntrack,
942 || new_state == TCP_CONNTRACK_CLOSE)) 930 || new_state == TCP_CONNTRACK_CLOSE))
943 conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; 931 conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
944 timeout = conntrack->proto.tcp.retrans >= nf_ct_tcp_max_retrans 932 timeout = conntrack->proto.tcp.retrans >= nf_ct_tcp_max_retrans
945 && *tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans 933 && tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans
946 ? nf_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state]; 934 ? nf_ct_tcp_timeout_max_retrans : tcp_timeouts[new_state];
947 write_unlock_bh(&tcp_lock); 935 write_unlock_bh(&tcp_lock);
948 936
949 nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); 937 nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
@@ -1074,14 +1062,13 @@ static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1074 if (!nest_parms) 1062 if (!nest_parms)
1075 goto nla_put_failure; 1063 goto nla_put_failure;
1076 1064
1077 NLA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), 1065 NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state);
1078 &ct->proto.tcp.state);
1079 1066
1080 NLA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, sizeof(u_int8_t), 1067 NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1081 &ct->proto.tcp.seen[0].td_scale); 1068 ct->proto.tcp.seen[0].td_scale);
1082 1069
1083 NLA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, sizeof(u_int8_t), 1070 NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1084 &ct->proto.tcp.seen[1].td_scale); 1071 ct->proto.tcp.seen[1].td_scale);
1085 1072
1086 tmp.flags = ct->proto.tcp.seen[0].flags; 1073 tmp.flags = ct->proto.tcp.seen[0].flags;
1087 NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, 1074 NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
@@ -1128,8 +1115,7 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1128 return -EINVAL; 1115 return -EINVAL;
1129 1116
1130 write_lock_bh(&tcp_lock); 1117 write_lock_bh(&tcp_lock);
1131 ct->proto.tcp.state = 1118 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1132 *(u_int8_t *)nla_data(tb[CTA_PROTOINFO_TCP_STATE]);
1133 1119
1134 if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) { 1120 if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1135 struct nf_ct_tcp_flags *attr = 1121 struct nf_ct_tcp_flags *attr =
@@ -1149,10 +1135,10 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1149 tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] && 1135 tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1150 ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE && 1136 ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1151 ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) { 1137 ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1152 ct->proto.tcp.seen[0].td_scale = *(u_int8_t *) 1138 ct->proto.tcp.seen[0].td_scale =
1153 nla_data(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]); 1139 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1154 ct->proto.tcp.seen[1].td_scale = *(u_int8_t *) 1140 ct->proto.tcp.seen[1].td_scale =
1155 nla_data(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]); 1141 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1156 } 1142 }
1157 write_unlock_bh(&tcp_lock); 1143 write_unlock_bh(&tcp_lock);
1158 1144
@@ -1166,56 +1152,56 @@ static struct ctl_table_header *tcp_sysctl_header;
1166static struct ctl_table tcp_sysctl_table[] = { 1152static struct ctl_table tcp_sysctl_table[] = {
1167 { 1153 {
1168 .procname = "nf_conntrack_tcp_timeout_syn_sent", 1154 .procname = "nf_conntrack_tcp_timeout_syn_sent",
1169 .data = &nf_ct_tcp_timeout_syn_sent, 1155 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT],
1170 .maxlen = sizeof(unsigned int), 1156 .maxlen = sizeof(unsigned int),
1171 .mode = 0644, 1157 .mode = 0644,
1172 .proc_handler = &proc_dointvec_jiffies, 1158 .proc_handler = &proc_dointvec_jiffies,
1173 }, 1159 },
1174 { 1160 {
1175 .procname = "nf_conntrack_tcp_timeout_syn_recv", 1161 .procname = "nf_conntrack_tcp_timeout_syn_recv",
1176 .data = &nf_ct_tcp_timeout_syn_recv, 1162 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
1177 .maxlen = sizeof(unsigned int), 1163 .maxlen = sizeof(unsigned int),
1178 .mode = 0644, 1164 .mode = 0644,
1179 .proc_handler = &proc_dointvec_jiffies, 1165 .proc_handler = &proc_dointvec_jiffies,
1180 }, 1166 },
1181 { 1167 {
1182 .procname = "nf_conntrack_tcp_timeout_established", 1168 .procname = "nf_conntrack_tcp_timeout_established",
1183 .data = &nf_ct_tcp_timeout_established, 1169 .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED],
1184 .maxlen = sizeof(unsigned int), 1170 .maxlen = sizeof(unsigned int),
1185 .mode = 0644, 1171 .mode = 0644,
1186 .proc_handler = &proc_dointvec_jiffies, 1172 .proc_handler = &proc_dointvec_jiffies,
1187 }, 1173 },
1188 { 1174 {
1189 .procname = "nf_conntrack_tcp_timeout_fin_wait", 1175 .procname = "nf_conntrack_tcp_timeout_fin_wait",
1190 .data = &nf_ct_tcp_timeout_fin_wait, 1176 .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT],
1191 .maxlen = sizeof(unsigned int), 1177 .maxlen = sizeof(unsigned int),
1192 .mode = 0644, 1178 .mode = 0644,
1193 .proc_handler = &proc_dointvec_jiffies, 1179 .proc_handler = &proc_dointvec_jiffies,
1194 }, 1180 },
1195 { 1181 {
1196 .procname = "nf_conntrack_tcp_timeout_close_wait", 1182 .procname = "nf_conntrack_tcp_timeout_close_wait",
1197 .data = &nf_ct_tcp_timeout_close_wait, 1183 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT],
1198 .maxlen = sizeof(unsigned int), 1184 .maxlen = sizeof(unsigned int),
1199 .mode = 0644, 1185 .mode = 0644,
1200 .proc_handler = &proc_dointvec_jiffies, 1186 .proc_handler = &proc_dointvec_jiffies,
1201 }, 1187 },
1202 { 1188 {
1203 .procname = "nf_conntrack_tcp_timeout_last_ack", 1189 .procname = "nf_conntrack_tcp_timeout_last_ack",
1204 .data = &nf_ct_tcp_timeout_last_ack, 1190 .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK],
1205 .maxlen = sizeof(unsigned int), 1191 .maxlen = sizeof(unsigned int),
1206 .mode = 0644, 1192 .mode = 0644,
1207 .proc_handler = &proc_dointvec_jiffies, 1193 .proc_handler = &proc_dointvec_jiffies,
1208 }, 1194 },
1209 { 1195 {
1210 .procname = "nf_conntrack_tcp_timeout_time_wait", 1196 .procname = "nf_conntrack_tcp_timeout_time_wait",
1211 .data = &nf_ct_tcp_timeout_time_wait, 1197 .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT],
1212 .maxlen = sizeof(unsigned int), 1198 .maxlen = sizeof(unsigned int),
1213 .mode = 0644, 1199 .mode = 0644,
1214 .proc_handler = &proc_dointvec_jiffies, 1200 .proc_handler = &proc_dointvec_jiffies,
1215 }, 1201 },
1216 { 1202 {
1217 .procname = "nf_conntrack_tcp_timeout_close", 1203 .procname = "nf_conntrack_tcp_timeout_close",
1218 .data = &nf_ct_tcp_timeout_close, 1204 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE],
1219 .maxlen = sizeof(unsigned int), 1205 .maxlen = sizeof(unsigned int),
1220 .mode = 0644, 1206 .mode = 0644,
1221 .proc_handler = &proc_dointvec_jiffies, 1207 .proc_handler = &proc_dointvec_jiffies,
@@ -1260,56 +1246,56 @@ static struct ctl_table tcp_sysctl_table[] = {
1260static struct ctl_table tcp_compat_sysctl_table[] = { 1246static struct ctl_table tcp_compat_sysctl_table[] = {
1261 { 1247 {
1262 .procname = "ip_conntrack_tcp_timeout_syn_sent", 1248 .procname = "ip_conntrack_tcp_timeout_syn_sent",
1263 .data = &nf_ct_tcp_timeout_syn_sent, 1249 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT],
1264 .maxlen = sizeof(unsigned int), 1250 .maxlen = sizeof(unsigned int),
1265 .mode = 0644, 1251 .mode = 0644,
1266 .proc_handler = &proc_dointvec_jiffies, 1252 .proc_handler = &proc_dointvec_jiffies,
1267 }, 1253 },
1268 { 1254 {
1269 .procname = "ip_conntrack_tcp_timeout_syn_recv", 1255 .procname = "ip_conntrack_tcp_timeout_syn_recv",
1270 .data = &nf_ct_tcp_timeout_syn_recv, 1256 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
1271 .maxlen = sizeof(unsigned int), 1257 .maxlen = sizeof(unsigned int),
1272 .mode = 0644, 1258 .mode = 0644,
1273 .proc_handler = &proc_dointvec_jiffies, 1259 .proc_handler = &proc_dointvec_jiffies,
1274 }, 1260 },
1275 { 1261 {
1276 .procname = "ip_conntrack_tcp_timeout_established", 1262 .procname = "ip_conntrack_tcp_timeout_established",
1277 .data = &nf_ct_tcp_timeout_established, 1263 .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED],
1278 .maxlen = sizeof(unsigned int), 1264 .maxlen = sizeof(unsigned int),
1279 .mode = 0644, 1265 .mode = 0644,
1280 .proc_handler = &proc_dointvec_jiffies, 1266 .proc_handler = &proc_dointvec_jiffies,
1281 }, 1267 },
1282 { 1268 {
1283 .procname = "ip_conntrack_tcp_timeout_fin_wait", 1269 .procname = "ip_conntrack_tcp_timeout_fin_wait",
1284 .data = &nf_ct_tcp_timeout_fin_wait, 1270 .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT],
1285 .maxlen = sizeof(unsigned int), 1271 .maxlen = sizeof(unsigned int),
1286 .mode = 0644, 1272 .mode = 0644,
1287 .proc_handler = &proc_dointvec_jiffies, 1273 .proc_handler = &proc_dointvec_jiffies,
1288 }, 1274 },
1289 { 1275 {
1290 .procname = "ip_conntrack_tcp_timeout_close_wait", 1276 .procname = "ip_conntrack_tcp_timeout_close_wait",
1291 .data = &nf_ct_tcp_timeout_close_wait, 1277 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT],
1292 .maxlen = sizeof(unsigned int), 1278 .maxlen = sizeof(unsigned int),
1293 .mode = 0644, 1279 .mode = 0644,
1294 .proc_handler = &proc_dointvec_jiffies, 1280 .proc_handler = &proc_dointvec_jiffies,
1295 }, 1281 },
1296 { 1282 {
1297 .procname = "ip_conntrack_tcp_timeout_last_ack", 1283 .procname = "ip_conntrack_tcp_timeout_last_ack",
1298 .data = &nf_ct_tcp_timeout_last_ack, 1284 .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK],
1299 .maxlen = sizeof(unsigned int), 1285 .maxlen = sizeof(unsigned int),
1300 .mode = 0644, 1286 .mode = 0644,
1301 .proc_handler = &proc_dointvec_jiffies, 1287 .proc_handler = &proc_dointvec_jiffies,
1302 }, 1288 },
1303 { 1289 {
1304 .procname = "ip_conntrack_tcp_timeout_time_wait", 1290 .procname = "ip_conntrack_tcp_timeout_time_wait",
1305 .data = &nf_ct_tcp_timeout_time_wait, 1291 .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT],
1306 .maxlen = sizeof(unsigned int), 1292 .maxlen = sizeof(unsigned int),
1307 .mode = 0644, 1293 .mode = 0644,
1308 .proc_handler = &proc_dointvec_jiffies, 1294 .proc_handler = &proc_dointvec_jiffies,
1309 }, 1295 },
1310 { 1296 {
1311 .procname = "ip_conntrack_tcp_timeout_close", 1297 .procname = "ip_conntrack_tcp_timeout_close",
1312 .data = &nf_ct_tcp_timeout_close, 1298 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE],
1313 .maxlen = sizeof(unsigned int), 1299 .maxlen = sizeof(unsigned int),
1314 .mode = 0644, 1300 .mode = 0644,
1315 .proc_handler = &proc_dointvec_jiffies, 1301 .proc_handler = &proc_dointvec_jiffies,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index b3e7ecb080e6..384875411082 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -21,6 +21,7 @@
21#include <linux/netfilter_ipv6.h> 21#include <linux/netfilter_ipv6.h>
22#include <net/netfilter/nf_conntrack_l4proto.h> 22#include <net/netfilter/nf_conntrack_l4proto.h>
23#include <net/netfilter/nf_conntrack_ecache.h> 23#include <net/netfilter/nf_conntrack_ecache.h>
24#include <net/netfilter/nf_log.h>
24 25
25static unsigned int nf_ct_udp_timeout __read_mostly = 30*HZ; 26static unsigned int nf_ct_udp_timeout __read_mostly = 30*HZ;
26static unsigned int nf_ct_udp_timeout_stream __read_mostly = 180*HZ; 27static unsigned int nf_ct_udp_timeout_stream __read_mostly = 180*HZ;
@@ -59,13 +60,6 @@ static int udp_print_tuple(struct seq_file *s,
59 ntohs(tuple->dst.u.udp.port)); 60 ntohs(tuple->dst.u.udp.port));
60} 61}
61 62
62/* Print out the private part of the conntrack. */
63static int udp_print_conntrack(struct seq_file *s,
64 const struct nf_conn *conntrack)
65{
66 return 0;
67}
68
69/* Returns verdict for packet, and may modify conntracktype */ 63/* Returns verdict for packet, and may modify conntracktype */
70static int udp_packet(struct nf_conn *conntrack, 64static int udp_packet(struct nf_conn *conntrack,
71 const struct sk_buff *skb, 65 const struct sk_buff *skb,
@@ -128,9 +122,7 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff,
128 * We skip checking packets on the outgoing path 122 * We skip checking packets on the outgoing path
129 * because the checksum is assumed to be correct. 123 * because the checksum is assumed to be correct.
130 * FIXME: Source route IP option packets --RR */ 124 * FIXME: Source route IP option packets --RR */
131 if (nf_conntrack_checksum && 125 if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
132 ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) ||
133 (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING)) &&
134 nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { 126 nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
135 if (LOG_INVALID(IPPROTO_UDP)) 127 if (LOG_INVALID(IPPROTO_UDP))
136 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 128 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
@@ -194,7 +186,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
194 .pkt_to_tuple = udp_pkt_to_tuple, 186 .pkt_to_tuple = udp_pkt_to_tuple,
195 .invert_tuple = udp_invert_tuple, 187 .invert_tuple = udp_invert_tuple,
196 .print_tuple = udp_print_tuple, 188 .print_tuple = udp_print_tuple,
197 .print_conntrack = udp_print_conntrack,
198 .packet = udp_packet, 189 .packet = udp_packet,
199 .new = udp_new, 190 .new = udp_new,
200 .error = udp_error, 191 .error = udp_error,
@@ -222,7 +213,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
222 .pkt_to_tuple = udp_pkt_to_tuple, 213 .pkt_to_tuple = udp_pkt_to_tuple,
223 .invert_tuple = udp_invert_tuple, 214 .invert_tuple = udp_invert_tuple,
224 .print_tuple = udp_print_tuple, 215 .print_tuple = udp_print_tuple,
225 .print_conntrack = udp_print_conntrack,
226 .packet = udp_packet, 216 .packet = udp_packet,
227 .new = udp_new, 217 .new = udp_new,
228 .error = udp_error, 218 .error = udp_error,
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index b8981dd922be..070056d9bcd6 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -22,6 +22,7 @@
22#include <linux/netfilter_ipv6.h> 22#include <linux/netfilter_ipv6.h>
23#include <net/netfilter/nf_conntrack_l4proto.h> 23#include <net/netfilter/nf_conntrack_l4proto.h>
24#include <net/netfilter/nf_conntrack_ecache.h> 24#include <net/netfilter/nf_conntrack_ecache.h>
25#include <net/netfilter/nf_log.h>
25 26
26static unsigned int nf_ct_udplite_timeout __read_mostly = 30*HZ; 27static unsigned int nf_ct_udplite_timeout __read_mostly = 30*HZ;
27static unsigned int nf_ct_udplite_timeout_stream __read_mostly = 180*HZ; 28static unsigned int nf_ct_udplite_timeout_stream __read_mostly = 180*HZ;
@@ -58,13 +59,6 @@ static int udplite_print_tuple(struct seq_file *s,
58 ntohs(tuple->dst.u.udp.port)); 59 ntohs(tuple->dst.u.udp.port));
59} 60}
60 61
61/* Print out the private part of the conntrack. */
62static int udplite_print_conntrack(struct seq_file *s,
63 const struct nf_conn *conntrack)
64{
65 return 0;
66}
67
68/* Returns verdict for packet, and may modify conntracktype */ 62/* Returns verdict for packet, and may modify conntracktype */
69static int udplite_packet(struct nf_conn *conntrack, 63static int udplite_packet(struct nf_conn *conntrack,
70 const struct sk_buff *skb, 64 const struct sk_buff *skb,
@@ -133,8 +127,7 @@ static int udplite_error(struct sk_buff *skb, unsigned int dataoff,
133 127
134 /* Checksum invalid? Ignore. */ 128 /* Checksum invalid? Ignore. */
135 if (nf_conntrack_checksum && !skb_csum_unnecessary(skb) && 129 if (nf_conntrack_checksum && !skb_csum_unnecessary(skb) &&
136 ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || 130 hooknum == NF_INET_PRE_ROUTING) {
137 (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING))) {
138 if (pf == PF_INET) { 131 if (pf == PF_INET) {
139 struct iphdr *iph = ip_hdr(skb); 132 struct iphdr *iph = ip_hdr(skb);
140 133
@@ -198,7 +191,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
198 .pkt_to_tuple = udplite_pkt_to_tuple, 191 .pkt_to_tuple = udplite_pkt_to_tuple,
199 .invert_tuple = udplite_invert_tuple, 192 .invert_tuple = udplite_invert_tuple,
200 .print_tuple = udplite_print_tuple, 193 .print_tuple = udplite_print_tuple,
201 .print_conntrack = udplite_print_conntrack,
202 .packet = udplite_packet, 194 .packet = udplite_packet,
203 .new = udplite_new, 195 .new = udplite_new,
204 .error = udplite_error, 196 .error = udplite_error,
@@ -222,7 +214,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
222 .pkt_to_tuple = udplite_pkt_to_tuple, 214 .pkt_to_tuple = udplite_pkt_to_tuple,
223 .invert_tuple = udplite_invert_tuple, 215 .invert_tuple = udplite_invert_tuple,
224 .print_tuple = udplite_print_tuple, 216 .print_tuple = udplite_print_tuple,
225 .print_conntrack = udplite_print_conntrack,
226 .packet = udplite_packet, 217 .packet = udplite_packet,
227 .new = udplite_new, 218 .new = udplite_new,
228 .error = udplite_error, 219 .error = udplite_error,
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 515abffc4a09..47d8947cf263 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -247,7 +247,7 @@ static int skp_digits_len(struct nf_conn *ct, const char *dptr,
247} 247}
248 248
249static int parse_addr(struct nf_conn *ct, const char *cp, const char **endp, 249static int parse_addr(struct nf_conn *ct, const char *cp, const char **endp,
250 union nf_conntrack_address *addr, const char *limit) 250 union nf_inet_addr *addr, const char *limit)
251{ 251{
252 const char *end; 252 const char *end;
253 int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; 253 int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
@@ -275,7 +275,7 @@ static int parse_addr(struct nf_conn *ct, const char *cp, const char **endp,
275static int epaddr_len(struct nf_conn *ct, const char *dptr, 275static int epaddr_len(struct nf_conn *ct, const char *dptr,
276 const char *limit, int *shift) 276 const char *limit, int *shift)
277{ 277{
278 union nf_conntrack_address addr; 278 union nf_inet_addr addr;
279 const char *aux = dptr; 279 const char *aux = dptr;
280 280
281 if (!parse_addr(ct, dptr, &dptr, &addr, limit)) { 281 if (!parse_addr(ct, dptr, &dptr, &addr, limit)) {
@@ -366,7 +366,7 @@ EXPORT_SYMBOL_GPL(ct_sip_get_info);
366static int set_expected_rtp(struct sk_buff *skb, 366static int set_expected_rtp(struct sk_buff *skb,
367 struct nf_conn *ct, 367 struct nf_conn *ct,
368 enum ip_conntrack_info ctinfo, 368 enum ip_conntrack_info ctinfo,
369 union nf_conntrack_address *addr, 369 union nf_inet_addr *addr,
370 __be16 port, 370 __be16 port,
371 const char *dptr) 371 const char *dptr)
372{ 372{
@@ -403,7 +403,7 @@ static int sip_help(struct sk_buff *skb,
403 enum ip_conntrack_info ctinfo) 403 enum ip_conntrack_info ctinfo)
404{ 404{
405 int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; 405 int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
406 union nf_conntrack_address addr; 406 union nf_inet_addr addr;
407 unsigned int dataoff, datalen; 407 unsigned int dataoff, datalen;
408 const char *dptr; 408 const char *dptr;
409 int ret = NF_ACCEPT; 409 int ret = NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 9efdd37fc195..696074a037c1 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -142,10 +142,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
142 ? (long)(conntrack->timeout.expires - jiffies)/HZ : 0) != 0) 142 ? (long)(conntrack->timeout.expires - jiffies)/HZ : 0) != 0)
143 return -ENOSPC; 143 return -ENOSPC;
144 144
145 if (l3proto->print_conntrack(s, conntrack)) 145 if (l4proto->print_conntrack && l4proto->print_conntrack(s, conntrack))
146 return -ENOSPC;
147
148 if (l4proto->print_conntrack(s, conntrack))
149 return -ENOSPC; 146 return -ENOSPC;
150 147
151 if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 148 if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
@@ -383,15 +380,11 @@ static ctl_table nf_ct_netfilter_table[] = {
383 { .ctl_name = 0 } 380 { .ctl_name = 0 }
384}; 381};
385 382
386static ctl_table nf_ct_net_table[] = { 383struct ctl_path nf_ct_path[] = {
387 { 384 { .procname = "net", .ctl_name = CTL_NET, },
388 .ctl_name = CTL_NET, 385 { }
389 .procname = "net",
390 .mode = 0555,
391 .child = nf_ct_netfilter_table,
392 },
393 { .ctl_name = 0 }
394}; 386};
387
395EXPORT_SYMBOL_GPL(nf_ct_log_invalid); 388EXPORT_SYMBOL_GPL(nf_ct_log_invalid);
396#endif /* CONFIG_SYSCTL */ 389#endif /* CONFIG_SYSCTL */
397 390
@@ -418,7 +411,8 @@ static int __init nf_conntrack_standalone_init(void)
418 proc_stat->owner = THIS_MODULE; 411 proc_stat->owner = THIS_MODULE;
419#endif 412#endif
420#ifdef CONFIG_SYSCTL 413#ifdef CONFIG_SYSCTL
421 nf_ct_sysctl_header = register_sysctl_table(nf_ct_net_table); 414 nf_ct_sysctl_header = register_sysctl_paths(nf_ct_path,
415 nf_ct_netfilter_table);
422 if (nf_ct_sysctl_header == NULL) { 416 if (nf_ct_sysctl_header == NULL) {
423 printk("nf_conntrack: can't register to sysctl.\n"); 417 printk("nf_conntrack: can't register to sysctl.\n");
424 ret = -ENOMEM; 418 ret = -ENOMEM;
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index d67c4fbf6031..4f5f2885fcac 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -6,6 +6,7 @@
6#include <linux/netfilter.h> 6#include <linux/netfilter.h>
7#include <linux/seq_file.h> 7#include <linux/seq_file.h>
8#include <net/protocol.h> 8#include <net/protocol.h>
9#include <net/netfilter/nf_log.h>
9 10
10#include "nf_internals.h" 11#include "nf_internals.h"
11 12
@@ -14,12 +15,12 @@
14 15
15#define NF_LOG_PREFIXLEN 128 16#define NF_LOG_PREFIXLEN 128
16 17
17static struct nf_logger *nf_loggers[NPROTO]; 18static const struct nf_logger *nf_loggers[NPROTO] __read_mostly;
18static DEFINE_MUTEX(nf_log_mutex); 19static DEFINE_MUTEX(nf_log_mutex);
19 20
20/* return EBUSY if somebody else is registered, EEXIST if the same logger 21/* return EBUSY if somebody else is registered, EEXIST if the same logger
21 * is registred, 0 on success. */ 22 * is registred, 0 on success. */
22int nf_log_register(int pf, struct nf_logger *logger) 23int nf_log_register(int pf, const struct nf_logger *logger)
23{ 24{
24 int ret; 25 int ret;
25 26
@@ -57,7 +58,7 @@ void nf_log_unregister_pf(int pf)
57} 58}
58EXPORT_SYMBOL(nf_log_unregister_pf); 59EXPORT_SYMBOL(nf_log_unregister_pf);
59 60
60void nf_log_unregister(struct nf_logger *logger) 61void nf_log_unregister(const struct nf_logger *logger)
61{ 62{
62 int i; 63 int i;
63 64
@@ -77,12 +78,12 @@ void nf_log_packet(int pf,
77 const struct sk_buff *skb, 78 const struct sk_buff *skb,
78 const struct net_device *in, 79 const struct net_device *in,
79 const struct net_device *out, 80 const struct net_device *out,
80 struct nf_loginfo *loginfo, 81 const struct nf_loginfo *loginfo,
81 const char *fmt, ...) 82 const char *fmt, ...)
82{ 83{
83 va_list args; 84 va_list args;
84 char prefix[NF_LOG_PREFIXLEN]; 85 char prefix[NF_LOG_PREFIXLEN];
85 struct nf_logger *logger; 86 const struct nf_logger *logger;
86 87
87 rcu_read_lock(); 88 rcu_read_lock();
88 logger = rcu_dereference(nf_loggers[pf]); 89 logger = rcu_dereference(nf_loggers[pf]);
@@ -90,7 +91,6 @@ void nf_log_packet(int pf,
90 va_start(args, fmt); 91 va_start(args, fmt);
91 vsnprintf(prefix, sizeof(prefix), fmt, args); 92 vsnprintf(prefix, sizeof(prefix), fmt, args);
92 va_end(args); 93 va_end(args);
93 /* We must read logging before nf_logfn[pf] */
94 logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix); 94 logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix);
95 } else if (net_ratelimit()) { 95 } else if (net_ratelimit()) {
96 printk(KERN_WARNING "nf_log_packet: can\'t log since " 96 printk(KERN_WARNING "nf_log_packet: can\'t log since "
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 0cef1433d660..bfc2928c1912 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -7,6 +7,7 @@
7#include <linux/seq_file.h> 7#include <linux/seq_file.h>
8#include <linux/rcupdate.h> 8#include <linux/rcupdate.h>
9#include <net/protocol.h> 9#include <net/protocol.h>
10#include <net/netfilter/nf_queue.h>
10 11
11#include "nf_internals.h" 12#include "nf_internals.h"
12 13
@@ -15,13 +16,13 @@
15 * long term mutex. The handler must provide an an outfn() to accept packets 16 * long term mutex. The handler must provide an an outfn() to accept packets
16 * for queueing and must reinject all packets it receives, no matter what. 17 * for queueing and must reinject all packets it receives, no matter what.
17 */ 18 */
18static struct nf_queue_handler *queue_handler[NPROTO]; 19static const struct nf_queue_handler *queue_handler[NPROTO];
19 20
20static DEFINE_MUTEX(queue_handler_mutex); 21static DEFINE_MUTEX(queue_handler_mutex);
21 22
22/* return EBUSY when somebody else is registered, return EEXIST if the 23/* return EBUSY when somebody else is registered, return EEXIST if the
23 * same handler is registered, return 0 in case of success. */ 24 * same handler is registered, return 0 in case of success. */
24int nf_register_queue_handler(int pf, struct nf_queue_handler *qh) 25int nf_register_queue_handler(int pf, const struct nf_queue_handler *qh)
25{ 26{
26 int ret; 27 int ret;
27 28
@@ -44,7 +45,7 @@ int nf_register_queue_handler(int pf, struct nf_queue_handler *qh)
44EXPORT_SYMBOL(nf_register_queue_handler); 45EXPORT_SYMBOL(nf_register_queue_handler);
45 46
46/* The caller must flush their queue before this */ 47/* The caller must flush their queue before this */
47int nf_unregister_queue_handler(int pf, struct nf_queue_handler *qh) 48int nf_unregister_queue_handler(int pf, const struct nf_queue_handler *qh)
48{ 49{
49 if (pf >= NPROTO) 50 if (pf >= NPROTO)
50 return -EINVAL; 51 return -EINVAL;
@@ -64,7 +65,7 @@ int nf_unregister_queue_handler(int pf, struct nf_queue_handler *qh)
64} 65}
65EXPORT_SYMBOL(nf_unregister_queue_handler); 66EXPORT_SYMBOL(nf_unregister_queue_handler);
66 67
67void nf_unregister_queue_handlers(struct nf_queue_handler *qh) 68void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
68{ 69{
69 int pf; 70 int pf;
70 71
@@ -79,6 +80,27 @@ void nf_unregister_queue_handlers(struct nf_queue_handler *qh)
79} 80}
80EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers); 81EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
81 82
83static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
84{
85 /* Release those devices we held, or Alexey will kill me. */
86 if (entry->indev)
87 dev_put(entry->indev);
88 if (entry->outdev)
89 dev_put(entry->outdev);
90#ifdef CONFIG_BRIDGE_NETFILTER
91 if (entry->skb->nf_bridge) {
92 struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
93
94 if (nf_bridge->physindev)
95 dev_put(nf_bridge->physindev);
96 if (nf_bridge->physoutdev)
97 dev_put(nf_bridge->physoutdev);
98 }
99#endif
100 /* Drop reference to owner of hook which queued us. */
101 module_put(entry->elem->owner);
102}
103
82/* 104/*
83 * Any packet that leaves via this function must come back 105 * Any packet that leaves via this function must come back
84 * through nf_reinject(). 106 * through nf_reinject().
@@ -92,84 +114,79 @@ static int __nf_queue(struct sk_buff *skb,
92 unsigned int queuenum) 114 unsigned int queuenum)
93{ 115{
94 int status; 116 int status;
95 struct nf_info *info; 117 struct nf_queue_entry *entry = NULL;
96#ifdef CONFIG_BRIDGE_NETFILTER 118#ifdef CONFIG_BRIDGE_NETFILTER
97 struct net_device *physindev = NULL; 119 struct net_device *physindev;
98 struct net_device *physoutdev = NULL; 120 struct net_device *physoutdev;
99#endif 121#endif
100 struct nf_afinfo *afinfo; 122 const struct nf_afinfo *afinfo;
101 struct nf_queue_handler *qh; 123 const struct nf_queue_handler *qh;
102 124
103 /* QUEUE == DROP if noone is waiting, to be safe. */ 125 /* QUEUE == DROP if noone is waiting, to be safe. */
104 rcu_read_lock(); 126 rcu_read_lock();
105 127
106 qh = rcu_dereference(queue_handler[pf]); 128 qh = rcu_dereference(queue_handler[pf]);
107 if (!qh) { 129 if (!qh)
108 rcu_read_unlock(); 130 goto err_unlock;
109 kfree_skb(skb);
110 return 1;
111 }
112 131
113 afinfo = nf_get_afinfo(pf); 132 afinfo = nf_get_afinfo(pf);
114 if (!afinfo) { 133 if (!afinfo)
115 rcu_read_unlock(); 134 goto err_unlock;
116 kfree_skb(skb); 135
117 return 1; 136 entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
118 } 137 if (!entry)
119 138 goto err_unlock;
120 info = kmalloc(sizeof(*info) + afinfo->route_key_size, GFP_ATOMIC); 139
121 if (!info) { 140 *entry = (struct nf_queue_entry) {
122 if (net_ratelimit()) 141 .skb = skb,
123 printk(KERN_ERR "OOM queueing packet %p\n", 142 .elem = list_entry(elem, struct nf_hook_ops, list),
124 skb); 143 .pf = pf,
125 rcu_read_unlock(); 144 .hook = hook,
126 kfree_skb(skb); 145 .indev = indev,
127 return 1; 146 .outdev = outdev,
128 } 147 .okfn = okfn,
129 148 };
130 *info = (struct nf_info) {
131 (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
132 149
133 /* If it's going away, ignore hook. */ 150 /* If it's going away, ignore hook. */
134 if (!try_module_get(info->elem->owner)) { 151 if (!try_module_get(entry->elem->owner)) {
135 rcu_read_unlock(); 152 rcu_read_unlock();
136 kfree(info); 153 kfree(entry);
137 return 0; 154 return 0;
138 } 155 }
139 156
140 /* Bump dev refs so they don't vanish while packet is out */ 157 /* Bump dev refs so they don't vanish while packet is out */
141 if (indev) dev_hold(indev); 158 if (indev)
142 if (outdev) dev_hold(outdev); 159 dev_hold(indev);
143 160 if (outdev)
161 dev_hold(outdev);
144#ifdef CONFIG_BRIDGE_NETFILTER 162#ifdef CONFIG_BRIDGE_NETFILTER
145 if (skb->nf_bridge) { 163 if (skb->nf_bridge) {
146 physindev = skb->nf_bridge->physindev; 164 physindev = skb->nf_bridge->physindev;
147 if (physindev) dev_hold(physindev); 165 if (physindev)
166 dev_hold(physindev);
148 physoutdev = skb->nf_bridge->physoutdev; 167 physoutdev = skb->nf_bridge->physoutdev;
149 if (physoutdev) dev_hold(physoutdev); 168 if (physoutdev)
169 dev_hold(physoutdev);
150 } 170 }
151#endif 171#endif
152 afinfo->saveroute(skb, info); 172 afinfo->saveroute(skb, entry);
153 status = qh->outfn(skb, info, queuenum, qh->data); 173 status = qh->outfn(entry, queuenum);
154 174
155 rcu_read_unlock(); 175 rcu_read_unlock();
156 176
157 if (status < 0) { 177 if (status < 0) {
158 /* James M doesn't say fuck enough. */ 178 nf_queue_entry_release_refs(entry);
159 if (indev) dev_put(indev); 179 goto err;
160 if (outdev) dev_put(outdev);
161#ifdef CONFIG_BRIDGE_NETFILTER
162 if (physindev) dev_put(physindev);
163 if (physoutdev) dev_put(physoutdev);
164#endif
165 module_put(info->elem->owner);
166 kfree(info);
167 kfree_skb(skb);
168
169 return 1;
170 } 180 }
171 181
172 return 1; 182 return 1;
183
184err_unlock:
185 rcu_read_unlock();
186err:
187 kfree_skb(skb);
188 kfree(entry);
189 return 1;
173} 190}
174 191
175int nf_queue(struct sk_buff *skb, 192int nf_queue(struct sk_buff *skb,
@@ -212,41 +229,15 @@ int nf_queue(struct sk_buff *skb,
212 return 1; 229 return 1;
213} 230}
214 231
215void nf_reinject(struct sk_buff *skb, struct nf_info *info, 232void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
216 unsigned int verdict)
217{ 233{
218 struct list_head *elem = &info->elem->list; 234 struct sk_buff *skb = entry->skb;
219 struct list_head *i; 235 struct list_head *elem = &entry->elem->list;
220 struct nf_afinfo *afinfo; 236 const struct nf_afinfo *afinfo;
221 237
222 rcu_read_lock(); 238 rcu_read_lock();
223 239
224 /* Release those devices we held, or Alexey will kill me. */ 240 nf_queue_entry_release_refs(entry);
225 if (info->indev) dev_put(info->indev);
226 if (info->outdev) dev_put(info->outdev);
227#ifdef CONFIG_BRIDGE_NETFILTER
228 if (skb->nf_bridge) {
229 if (skb->nf_bridge->physindev)
230 dev_put(skb->nf_bridge->physindev);
231 if (skb->nf_bridge->physoutdev)
232 dev_put(skb->nf_bridge->physoutdev);
233 }
234#endif
235
236 /* Drop reference to owner of hook which queued us. */
237 module_put(info->elem->owner);
238
239 list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
240 if (i == elem)
241 break;
242 }
243
244 if (i == &nf_hooks[info->pf][info->hook]) {
245 /* The module which sent it to userspace is gone. */
246 NFDEBUG("%s: module disappeared, dropping packet.\n",
247 __FUNCTION__);
248 verdict = NF_DROP;
249 }
250 241
251 /* Continue traversal iff userspace said ok... */ 242 /* Continue traversal iff userspace said ok... */
252 if (verdict == NF_REPEAT) { 243 if (verdict == NF_REPEAT) {
@@ -255,28 +246,30 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info,
255 } 246 }
256 247
257 if (verdict == NF_ACCEPT) { 248 if (verdict == NF_ACCEPT) {
258 afinfo = nf_get_afinfo(info->pf); 249 afinfo = nf_get_afinfo(entry->pf);
259 if (!afinfo || afinfo->reroute(skb, info) < 0) 250 if (!afinfo || afinfo->reroute(skb, entry) < 0)
260 verdict = NF_DROP; 251 verdict = NF_DROP;
261 } 252 }
262 253
263 if (verdict == NF_ACCEPT) { 254 if (verdict == NF_ACCEPT) {
264 next_hook: 255 next_hook:
265 verdict = nf_iterate(&nf_hooks[info->pf][info->hook], 256 verdict = nf_iterate(&nf_hooks[entry->pf][entry->hook],
266 skb, info->hook, 257 skb, entry->hook,
267 info->indev, info->outdev, &elem, 258 entry->indev, entry->outdev, &elem,
268 info->okfn, INT_MIN); 259 entry->okfn, INT_MIN);
269 } 260 }
270 261
271 switch (verdict & NF_VERDICT_MASK) { 262 switch (verdict & NF_VERDICT_MASK) {
272 case NF_ACCEPT: 263 case NF_ACCEPT:
273 case NF_STOP: 264 case NF_STOP:
274 info->okfn(skb); 265 local_bh_disable();
266 entry->okfn(skb);
267 local_bh_enable();
275 case NF_STOLEN: 268 case NF_STOLEN:
276 break; 269 break;
277 case NF_QUEUE: 270 case NF_QUEUE:
278 if (!__nf_queue(skb, elem, info->pf, info->hook, 271 if (!__nf_queue(skb, elem, entry->pf, entry->hook,
279 info->indev, info->outdev, info->okfn, 272 entry->indev, entry->outdev, entry->okfn,
280 verdict >> NF_VERDICT_BITS)) 273 verdict >> NF_VERDICT_BITS))
281 goto next_hook; 274 goto next_hook;
282 break; 275 break;
@@ -284,7 +277,7 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info,
284 kfree_skb(skb); 277 kfree_skb(skb);
285 } 278 }
286 rcu_read_unlock(); 279 rcu_read_unlock();
287 kfree(info); 280 kfree(entry);
288 return; 281 return;
289} 282}
290EXPORT_SYMBOL(nf_reinject); 283EXPORT_SYMBOL(nf_reinject);
@@ -317,7 +310,7 @@ static int seq_show(struct seq_file *s, void *v)
317{ 310{
318 int ret; 311 int ret;
319 loff_t *pos = v; 312 loff_t *pos = v;
320 struct nf_queue_handler *qh; 313 const struct nf_queue_handler *qh;
321 314
322 rcu_read_lock(); 315 rcu_read_lock();
323 qh = rcu_dereference(queue_handler[*pos]); 316 qh = rcu_dereference(queue_handler[*pos]);
diff --git a/net/netfilter/nf_sysctl.c b/net/netfilter/nf_sysctl.c
deleted file mode 100644
index ee34589e48a4..000000000000
--- a/net/netfilter/nf_sysctl.c
+++ /dev/null
@@ -1,134 +0,0 @@
1/* nf_sysctl.c netfilter sysctl registration/unregistation
2 *
3 * Copyright (c) 2006 Patrick McHardy <kaber@trash.net>
4 */
5#include <linux/module.h>
6#include <linux/sysctl.h>
7#include <linux/string.h>
8#include <linux/slab.h>
9
10static void
11path_free(struct ctl_table *path, struct ctl_table *table)
12{
13 struct ctl_table *t, *next;
14
15 for (t = path; t != NULL && t != table; t = next) {
16 next = t->child;
17 kfree(t);
18 }
19}
20
21static struct ctl_table *
22path_dup(struct ctl_table *path, struct ctl_table *table)
23{
24 struct ctl_table *t, *last = NULL, *tmp;
25
26 for (t = path; t != NULL; t = t->child) {
27 /* twice the size since path elements are terminated by an
28 * empty element */
29 tmp = kmemdup(t, 2 * sizeof(*t), GFP_KERNEL);
30 if (tmp == NULL) {
31 if (last != NULL)
32 path_free(path, table);
33 return NULL;
34 }
35
36 if (last != NULL)
37 last->child = tmp;
38 else
39 path = tmp;
40 last = tmp;
41 }
42
43 if (last != NULL)
44 last->child = table;
45 else
46 path = table;
47
48 return path;
49}
50
51struct ctl_table_header *
52nf_register_sysctl_table(struct ctl_table *path, struct ctl_table *table)
53{
54 struct ctl_table_header *header;
55
56 path = path_dup(path, table);
57 if (path == NULL)
58 return NULL;
59 header = register_sysctl_table(path);
60 if (header == NULL)
61 path_free(path, table);
62 return header;
63}
64EXPORT_SYMBOL_GPL(nf_register_sysctl_table);
65
66void
67nf_unregister_sysctl_table(struct ctl_table_header *header,
68 struct ctl_table *table)
69{
70 struct ctl_table *path = header->ctl_table;
71
72 unregister_sysctl_table(header);
73 path_free(path, table);
74}
75EXPORT_SYMBOL_GPL(nf_unregister_sysctl_table);
76
77/* net/netfilter */
78static struct ctl_table nf_net_netfilter_table[] = {
79 {
80 .ctl_name = NET_NETFILTER,
81 .procname = "netfilter",
82 .mode = 0555,
83 },
84 {
85 .ctl_name = 0
86 }
87};
88struct ctl_table nf_net_netfilter_sysctl_path[] = {
89 {
90 .ctl_name = CTL_NET,
91 .procname = "net",
92 .mode = 0555,
93 .child = nf_net_netfilter_table,
94 },
95 {
96 .ctl_name = 0
97 }
98};
99EXPORT_SYMBOL_GPL(nf_net_netfilter_sysctl_path);
100
101/* net/ipv4/netfilter */
102static struct ctl_table nf_net_ipv4_netfilter_table[] = {
103 {
104 .ctl_name = NET_IPV4_NETFILTER,
105 .procname = "netfilter",
106 .mode = 0555,
107 },
108 {
109 .ctl_name = 0
110 }
111};
112static struct ctl_table nf_net_ipv4_table[] = {
113 {
114 .ctl_name = NET_IPV4,
115 .procname = "ipv4",
116 .mode = 0555,
117 .child = nf_net_ipv4_netfilter_table,
118 },
119 {
120 .ctl_name = 0
121 }
122};
123struct ctl_table nf_net_ipv4_netfilter_sysctl_path[] = {
124 {
125 .ctl_name = CTL_NET,
126 .procname = "net",
127 .mode = 0555,
128 .child = nf_net_ipv4_table,
129 },
130 {
131 .ctl_name = 0
132 }
133};
134EXPORT_SYMBOL_GPL(nf_net_ipv4_netfilter_sysctl_path);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 2128542995f7..b75c9c4a995d 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -179,7 +179,7 @@ static void nfnetlink_rcv(struct sk_buff *skb)
179static void __exit nfnetlink_exit(void) 179static void __exit nfnetlink_exit(void)
180{ 180{
181 printk("Removing netfilter NETLINK layer.\n"); 181 printk("Removing netfilter NETLINK layer.\n");
182 sock_release(nfnl->sk_socket); 182 netlink_kernel_release(nfnl);
183 return; 183 return;
184} 184}
185 185
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 2c7bd2eb0294..5013cb97ce2b 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -29,6 +29,7 @@
29#include <linux/jhash.h> 29#include <linux/jhash.h>
30#include <linux/random.h> 30#include <linux/random.h>
31#include <net/sock.h> 31#include <net/sock.h>
32#include <net/netfilter/nf_log.h>
32 33
33#include <asm/atomic.h> 34#include <asm/atomic.h>
34 35
@@ -44,14 +45,6 @@
44#define PRINTR(x, args...) do { if (net_ratelimit()) \ 45#define PRINTR(x, args...) do { if (net_ratelimit()) \
45 printk(x, ## args); } while (0); 46 printk(x, ## args); } while (0);
46 47
47#if 0
48#define UDEBUG(x, args ...) printk(KERN_DEBUG "%s(%d):%s(): " x, \
49 __FILE__, __LINE__, __FUNCTION__, \
50 ## args)
51#else
52#define UDEBUG(x, ...)
53#endif
54
55struct nfulnl_instance { 48struct nfulnl_instance {
56 struct hlist_node hlist; /* global list of instances */ 49 struct hlist_node hlist; /* global list of instances */
57 spinlock_t lock; 50 spinlock_t lock;
@@ -92,8 +85,6 @@ __instance_lookup(u_int16_t group_num)
92 struct hlist_node *pos; 85 struct hlist_node *pos;
93 struct nfulnl_instance *inst; 86 struct nfulnl_instance *inst;
94 87
95 UDEBUG("entering (group_num=%u)\n", group_num);
96
97 head = &instance_table[instance_hashfn(group_num)]; 88 head = &instance_table[instance_hashfn(group_num)];
98 hlist_for_each_entry(inst, pos, head, hlist) { 89 hlist_for_each_entry(inst, pos, head, hlist) {
99 if (inst->group_num == group_num) 90 if (inst->group_num == group_num)
@@ -126,7 +117,6 @@ static void
126instance_put(struct nfulnl_instance *inst) 117instance_put(struct nfulnl_instance *inst)
127{ 118{
128 if (inst && atomic_dec_and_test(&inst->use)) { 119 if (inst && atomic_dec_and_test(&inst->use)) {
129 UDEBUG("kfree(inst=%p)\n", inst);
130 kfree(inst); 120 kfree(inst);
131 module_put(THIS_MODULE); 121 module_put(THIS_MODULE);
132 } 122 }
@@ -138,23 +128,23 @@ static struct nfulnl_instance *
138instance_create(u_int16_t group_num, int pid) 128instance_create(u_int16_t group_num, int pid)
139{ 129{
140 struct nfulnl_instance *inst; 130 struct nfulnl_instance *inst;
141 131 int err;
142 UDEBUG("entering (group_num=%u, pid=%d)\n", group_num,
143 pid);
144 132
145 write_lock_bh(&instances_lock); 133 write_lock_bh(&instances_lock);
146 if (__instance_lookup(group_num)) { 134 if (__instance_lookup(group_num)) {
147 inst = NULL; 135 err = -EEXIST;
148 UDEBUG("aborting, instance already exists\n");
149 goto out_unlock; 136 goto out_unlock;
150 } 137 }
151 138
152 inst = kzalloc(sizeof(*inst), GFP_ATOMIC); 139 inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
153 if (!inst) 140 if (!inst) {
141 err = -ENOMEM;
154 goto out_unlock; 142 goto out_unlock;
143 }
155 144
156 if (!try_module_get(THIS_MODULE)) { 145 if (!try_module_get(THIS_MODULE)) {
157 kfree(inst); 146 kfree(inst);
147 err = -EAGAIN;
158 goto out_unlock; 148 goto out_unlock;
159 } 149 }
160 150
@@ -177,16 +167,13 @@ instance_create(u_int16_t group_num, int pid)
177 hlist_add_head(&inst->hlist, 167 hlist_add_head(&inst->hlist,
178 &instance_table[instance_hashfn(group_num)]); 168 &instance_table[instance_hashfn(group_num)]);
179 169
180 UDEBUG("newly added node: %p, next=%p\n", &inst->hlist,
181 inst->hlist.next);
182
183 write_unlock_bh(&instances_lock); 170 write_unlock_bh(&instances_lock);
184 171
185 return inst; 172 return inst;
186 173
187out_unlock: 174out_unlock:
188 write_unlock_bh(&instances_lock); 175 write_unlock_bh(&instances_lock);
189 return NULL; 176 return ERR_PTR(err);
190} 177}
191 178
192static void __nfulnl_flush(struct nfulnl_instance *inst); 179static void __nfulnl_flush(struct nfulnl_instance *inst);
@@ -195,9 +182,6 @@ static void
195__instance_destroy(struct nfulnl_instance *inst) 182__instance_destroy(struct nfulnl_instance *inst)
196{ 183{
197 /* first pull it out of the global list */ 184 /* first pull it out of the global list */
198 UDEBUG("removing instance %p (queuenum=%u) from hash\n",
199 inst, inst->group_num);
200
201 hlist_del(&inst->hlist); 185 hlist_del(&inst->hlist);
202 186
203 /* then flush all pending packets from skb */ 187 /* then flush all pending packets from skb */
@@ -305,8 +289,6 @@ nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size)
305 struct sk_buff *skb; 289 struct sk_buff *skb;
306 unsigned int n; 290 unsigned int n;
307 291
308 UDEBUG("entered (%u, %u)\n", inst_size, pkt_size);
309
310 /* alloc skb which should be big enough for a whole multipart 292 /* alloc skb which should be big enough for a whole multipart
311 * message. WARNING: has to be <= 128k due to slab restrictions */ 293 * message. WARNING: has to be <= 128k due to slab restrictions */
312 294
@@ -341,10 +323,6 @@ __nfulnl_send(struct nfulnl_instance *inst)
341 sizeof(struct nfgenmsg)); 323 sizeof(struct nfgenmsg));
342 324
343 status = nfnetlink_unicast(inst->skb, inst->peer_pid, MSG_DONTWAIT); 325 status = nfnetlink_unicast(inst->skb, inst->peer_pid, MSG_DONTWAIT);
344 if (status < 0) {
345 UDEBUG("netlink_unicast() failed\n");
346 /* FIXME: statistics */
347 }
348 326
349 inst->qlen = 0; 327 inst->qlen = 0;
350 inst->skb = NULL; 328 inst->skb = NULL;
@@ -368,8 +346,6 @@ nfulnl_timer(unsigned long data)
368{ 346{
369 struct nfulnl_instance *inst = (struct nfulnl_instance *)data; 347 struct nfulnl_instance *inst = (struct nfulnl_instance *)data;
370 348
371 UDEBUG("timer function called, flushing buffer\n");
372
373 spin_lock_bh(&inst->lock); 349 spin_lock_bh(&inst->lock);
374 if (inst->skb) 350 if (inst->skb)
375 __nfulnl_send(inst); 351 __nfulnl_send(inst);
@@ -396,8 +372,6 @@ __build_packet_message(struct nfulnl_instance *inst,
396 __be32 tmp_uint; 372 __be32 tmp_uint;
397 sk_buff_data_t old_tail = inst->skb->tail; 373 sk_buff_data_t old_tail = inst->skb->tail;
398 374
399 UDEBUG("entered\n");
400
401 nlh = NLMSG_PUT(inst->skb, 0, 0, 375 nlh = NLMSG_PUT(inst->skb, 0, 0,
402 NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET, 376 NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET,
403 sizeof(struct nfgenmsg)); 377 sizeof(struct nfgenmsg));
@@ -415,32 +389,27 @@ __build_packet_message(struct nfulnl_instance *inst,
415 NLA_PUT(inst->skb, NFULA_PREFIX, plen, prefix); 389 NLA_PUT(inst->skb, NFULA_PREFIX, plen, prefix);
416 390
417 if (indev) { 391 if (indev) {
418 tmp_uint = htonl(indev->ifindex);
419#ifndef CONFIG_BRIDGE_NETFILTER 392#ifndef CONFIG_BRIDGE_NETFILTER
420 NLA_PUT(inst->skb, NFULA_IFINDEX_INDEV, sizeof(tmp_uint), 393 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV,
421 &tmp_uint); 394 htonl(indev->ifindex));
422#else 395#else
423 if (pf == PF_BRIDGE) { 396 if (pf == PF_BRIDGE) {
424 /* Case 1: outdev is physical input device, we need to 397 /* Case 1: outdev is physical input device, we need to
425 * look for bridge group (when called from 398 * look for bridge group (when called from
426 * netfilter_bridge) */ 399 * netfilter_bridge) */
427 NLA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV, 400 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
428 sizeof(tmp_uint), &tmp_uint); 401 htonl(indev->ifindex));
429 /* this is the bridge group "brX" */ 402 /* this is the bridge group "brX" */
430 tmp_uint = htonl(indev->br_port->br->dev->ifindex); 403 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV,
431 NLA_PUT(inst->skb, NFULA_IFINDEX_INDEV, 404 htonl(indev->br_port->br->dev->ifindex));
432 sizeof(tmp_uint), &tmp_uint);
433 } else { 405 } else {
434 /* Case 2: indev is bridge group, we need to look for 406 /* Case 2: indev is bridge group, we need to look for
435 * physical device (when called from ipv4) */ 407 * physical device (when called from ipv4) */
436 NLA_PUT(inst->skb, NFULA_IFINDEX_INDEV, 408 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV,
437 sizeof(tmp_uint), &tmp_uint); 409 htonl(indev->ifindex));
438 if (skb->nf_bridge && skb->nf_bridge->physindev) { 410 if (skb->nf_bridge && skb->nf_bridge->physindev)
439 tmp_uint = 411 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
440 htonl(skb->nf_bridge->physindev->ifindex); 412 htonl(skb->nf_bridge->physindev->ifindex));
441 NLA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV,
442 sizeof(tmp_uint), &tmp_uint);
443 }
444 } 413 }
445#endif 414#endif
446 } 415 }
@@ -448,38 +417,32 @@ __build_packet_message(struct nfulnl_instance *inst,
448 if (outdev) { 417 if (outdev) {
449 tmp_uint = htonl(outdev->ifindex); 418 tmp_uint = htonl(outdev->ifindex);
450#ifndef CONFIG_BRIDGE_NETFILTER 419#ifndef CONFIG_BRIDGE_NETFILTER
451 NLA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, sizeof(tmp_uint), 420 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV,
452 &tmp_uint); 421 htonl(outdev->ifindex));
453#else 422#else
454 if (pf == PF_BRIDGE) { 423 if (pf == PF_BRIDGE) {
455 /* Case 1: outdev is physical output device, we need to 424 /* Case 1: outdev is physical output device, we need to
456 * look for bridge group (when called from 425 * look for bridge group (when called from
457 * netfilter_bridge) */ 426 * netfilter_bridge) */
458 NLA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, 427 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
459 sizeof(tmp_uint), &tmp_uint); 428 htonl(outdev->ifindex));
460 /* this is the bridge group "brX" */ 429 /* this is the bridge group "brX" */
461 tmp_uint = htonl(outdev->br_port->br->dev->ifindex); 430 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV,
462 NLA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, 431 htonl(outdev->br_port->br->dev->ifindex));
463 sizeof(tmp_uint), &tmp_uint);
464 } else { 432 } else {
465 /* Case 2: indev is a bridge group, we need to look 433 /* Case 2: indev is a bridge group, we need to look
466 * for physical device (when called from ipv4) */ 434 * for physical device (when called from ipv4) */
467 NLA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, 435 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV,
468 sizeof(tmp_uint), &tmp_uint); 436 htonl(outdev->ifindex));
469 if (skb->nf_bridge && skb->nf_bridge->physoutdev) { 437 if (skb->nf_bridge && skb->nf_bridge->physoutdev)
470 tmp_uint = 438 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
471 htonl(skb->nf_bridge->physoutdev->ifindex); 439 htonl(skb->nf_bridge->physoutdev->ifindex));
472 NLA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
473 sizeof(tmp_uint), &tmp_uint);
474 }
475 } 440 }
476#endif 441#endif
477 } 442 }
478 443
479 if (skb->mark) { 444 if (skb->mark)
480 tmp_uint = htonl(skb->mark); 445 NLA_PUT_BE32(inst->skb, NFULA_MARK, htonl(skb->mark));
481 NLA_PUT(inst->skb, NFULA_MARK, sizeof(tmp_uint), &tmp_uint);
482 }
483 446
484 if (indev && skb->dev) { 447 if (indev && skb->dev) {
485 struct nfulnl_msg_packet_hw phw; 448 struct nfulnl_msg_packet_hw phw;
@@ -504,23 +467,23 @@ __build_packet_message(struct nfulnl_instance *inst,
504 read_lock_bh(&skb->sk->sk_callback_lock); 467 read_lock_bh(&skb->sk->sk_callback_lock);
505 if (skb->sk->sk_socket && skb->sk->sk_socket->file) { 468 if (skb->sk->sk_socket && skb->sk->sk_socket->file) {
506 __be32 uid = htonl(skb->sk->sk_socket->file->f_uid); 469 __be32 uid = htonl(skb->sk->sk_socket->file->f_uid);
470 __be32 gid = htons(skb->sk->sk_socket->file->f_gid);
507 /* need to unlock here since NLA_PUT may goto */ 471 /* need to unlock here since NLA_PUT may goto */
508 read_unlock_bh(&skb->sk->sk_callback_lock); 472 read_unlock_bh(&skb->sk->sk_callback_lock);
509 NLA_PUT(inst->skb, NFULA_UID, sizeof(uid), &uid); 473 NLA_PUT_BE32(inst->skb, NFULA_UID, uid);
474 NLA_PUT_BE32(inst->skb, NFULA_GID, gid);
510 } else 475 } else
511 read_unlock_bh(&skb->sk->sk_callback_lock); 476 read_unlock_bh(&skb->sk->sk_callback_lock);
512 } 477 }
513 478
514 /* local sequence number */ 479 /* local sequence number */
515 if (inst->flags & NFULNL_CFG_F_SEQ) { 480 if (inst->flags & NFULNL_CFG_F_SEQ)
516 tmp_uint = htonl(inst->seq++); 481 NLA_PUT_BE32(inst->skb, NFULA_SEQ, htonl(inst->seq++));
517 NLA_PUT(inst->skb, NFULA_SEQ, sizeof(tmp_uint), &tmp_uint); 482
518 }
519 /* global sequence number */ 483 /* global sequence number */
520 if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) { 484 if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL)
521 tmp_uint = htonl(atomic_inc_return(&global_seq)); 485 NLA_PUT_BE32(inst->skb, NFULA_SEQ_GLOBAL,
522 NLA_PUT(inst->skb, NFULA_SEQ_GLOBAL, sizeof(tmp_uint), &tmp_uint); 486 htonl(atomic_inc_return(&global_seq)));
523 }
524 487
525 if (data_len) { 488 if (data_len) {
526 struct nlattr *nla; 489 struct nlattr *nla;
@@ -543,7 +506,6 @@ __build_packet_message(struct nfulnl_instance *inst,
543 return 0; 506 return 0;
544 507
545nlmsg_failure: 508nlmsg_failure:
546 UDEBUG("nlmsg_failure\n");
547nla_put_failure: 509nla_put_failure:
548 PRINTR(KERN_ERR "nfnetlink_log: error creating log nlmsg\n"); 510 PRINTR(KERN_ERR "nfnetlink_log: error creating log nlmsg\n");
549 return -1; 511 return -1;
@@ -604,12 +566,11 @@ nfulnl_log_packet(unsigned int pf,
604#endif 566#endif
605 + nla_total_size(sizeof(u_int32_t)) /* mark */ 567 + nla_total_size(sizeof(u_int32_t)) /* mark */
606 + nla_total_size(sizeof(u_int32_t)) /* uid */ 568 + nla_total_size(sizeof(u_int32_t)) /* uid */
569 + nla_total_size(sizeof(u_int32_t)) /* gid */
607 + nla_total_size(plen) /* prefix */ 570 + nla_total_size(plen) /* prefix */
608 + nla_total_size(sizeof(struct nfulnl_msg_packet_hw)) 571 + nla_total_size(sizeof(struct nfulnl_msg_packet_hw))
609 + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)); 572 + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp));
610 573
611 UDEBUG("initial size=%u\n", size);
612
613 spin_lock_bh(&inst->lock); 574 spin_lock_bh(&inst->lock);
614 575
615 if (inst->flags & NFULNL_CFG_F_SEQ) 576 if (inst->flags & NFULNL_CFG_F_SEQ)
@@ -636,7 +597,6 @@ nfulnl_log_packet(unsigned int pf,
636 data_len = inst->copy_range; 597 data_len = inst->copy_range;
637 598
638 size += nla_total_size(data_len); 599 size += nla_total_size(data_len);
639 UDEBUG("copy_packet, therefore size now %u\n", size);
640 break; 600 break;
641 601
642 default: 602 default:
@@ -647,8 +607,6 @@ nfulnl_log_packet(unsigned int pf,
647 size > skb_tailroom(inst->skb) - sizeof(struct nfgenmsg)) { 607 size > skb_tailroom(inst->skb) - sizeof(struct nfgenmsg)) {
648 /* either the queue len is too high or we don't have 608 /* either the queue len is too high or we don't have
649 * enough room in the skb left. flush to userspace. */ 609 * enough room in the skb left. flush to userspace. */
650 UDEBUG("flushing old skb\n");
651
652 __nfulnl_flush(inst); 610 __nfulnl_flush(inst);
653 } 611 }
654 612
@@ -658,7 +616,6 @@ nfulnl_log_packet(unsigned int pf,
658 goto alloc_failure; 616 goto alloc_failure;
659 } 617 }
660 618
661 UDEBUG("qlen %d, qthreshold %d\n", inst->qlen, qthreshold);
662 inst->qlen++; 619 inst->qlen++;
663 620
664 __build_packet_message(inst, skb, data_len, pf, 621 __build_packet_message(inst, skb, data_len, pf,
@@ -680,7 +637,6 @@ unlock_and_release:
680 return; 637 return;
681 638
682alloc_failure: 639alloc_failure:
683 UDEBUG("error allocating skb\n");
684 /* FIXME: statistics */ 640 /* FIXME: statistics */
685 goto unlock_and_release; 641 goto unlock_and_release;
686} 642}
@@ -703,7 +659,6 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
703 struct hlist_head *head = &instance_table[i]; 659 struct hlist_head *head = &instance_table[i];
704 660
705 hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { 661 hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
706 UDEBUG("node = %p\n", inst);
707 if ((n->net == &init_net) && 662 if ((n->net == &init_net) &&
708 (n->pid == inst->peer_pid)) 663 (n->pid == inst->peer_pid))
709 __instance_destroy(inst); 664 __instance_destroy(inst);
@@ -725,7 +680,7 @@ nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
725 return -ENOTSUPP; 680 return -ENOTSUPP;
726} 681}
727 682
728static struct nf_logger nfulnl_logger = { 683static const struct nf_logger nfulnl_logger = {
729 .name = "nfnetlink_log", 684 .name = "nfnetlink_log",
730 .logfn = &nfulnl_log_packet, 685 .logfn = &nfulnl_log_packet,
731 .me = THIS_MODULE, 686 .me = THIS_MODULE,
@@ -749,14 +704,17 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
749 struct nfulnl_instance *inst; 704 struct nfulnl_instance *inst;
750 int ret = 0; 705 int ret = 0;
751 706
752 UDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type));
753
754 inst = instance_lookup_get(group_num); 707 inst = instance_lookup_get(group_num);
708 if (inst && inst->peer_pid != NETLINK_CB(skb).pid) {
709 ret = -EPERM;
710 goto out_put;
711 }
712
755 if (nfula[NFULA_CFG_CMD]) { 713 if (nfula[NFULA_CFG_CMD]) {
756 u_int8_t pf = nfmsg->nfgen_family; 714 u_int8_t pf = nfmsg->nfgen_family;
757 struct nfulnl_msg_config_cmd *cmd; 715 struct nfulnl_msg_config_cmd *cmd;
716
758 cmd = nla_data(nfula[NFULA_CFG_CMD]); 717 cmd = nla_data(nfula[NFULA_CFG_CMD]);
759 UDEBUG("found CFG_CMD for\n");
760 718
761 switch (cmd->command) { 719 switch (cmd->command) {
762 case NFULNL_CFG_CMD_BIND: 720 case NFULNL_CFG_CMD_BIND:
@@ -767,8 +725,8 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
767 725
768 inst = instance_create(group_num, 726 inst = instance_create(group_num,
769 NETLINK_CB(skb).pid); 727 NETLINK_CB(skb).pid);
770 if (!inst) { 728 if (IS_ERR(inst)) {
771 ret = -EINVAL; 729 ret = PTR_ERR(inst);
772 goto out; 730 goto out;
773 } 731 }
774 break; 732 break;
@@ -778,78 +736,71 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
778 goto out; 736 goto out;
779 } 737 }
780 738
781 if (inst->peer_pid != NETLINK_CB(skb).pid) {
782 ret = -EPERM;
783 goto out_put;
784 }
785
786 instance_destroy(inst); 739 instance_destroy(inst);
787 goto out; 740 goto out;
788 case NFULNL_CFG_CMD_PF_BIND: 741 case NFULNL_CFG_CMD_PF_BIND:
789 UDEBUG("registering log handler for pf=%u\n", pf);
790 ret = nf_log_register(pf, &nfulnl_logger); 742 ret = nf_log_register(pf, &nfulnl_logger);
791 break; 743 break;
792 case NFULNL_CFG_CMD_PF_UNBIND: 744 case NFULNL_CFG_CMD_PF_UNBIND:
793 UDEBUG("unregistering log handler for pf=%u\n", pf);
794 /* This is a bug and a feature. We cannot unregister 745 /* This is a bug and a feature. We cannot unregister
795 * other handlers, like nfnetlink_inst can */ 746 * other handlers, like nfnetlink_inst can */
796 nf_log_unregister_pf(pf); 747 nf_log_unregister_pf(pf);
797 break; 748 break;
798 default: 749 default:
799 ret = -EINVAL; 750 ret = -ENOTSUPP;
800 break; 751 break;
801 } 752 }
802
803 if (!inst)
804 goto out;
805 } else {
806 if (!inst) {
807 UDEBUG("no config command, and no instance for "
808 "group=%u pid=%u =>ENOENT\n",
809 group_num, NETLINK_CB(skb).pid);
810 ret = -ENOENT;
811 goto out;
812 }
813
814 if (inst->peer_pid != NETLINK_CB(skb).pid) {
815 UDEBUG("no config command, and wrong pid\n");
816 ret = -EPERM;
817 goto out_put;
818 }
819 } 753 }
820 754
821 if (nfula[NFULA_CFG_MODE]) { 755 if (nfula[NFULA_CFG_MODE]) {
822 struct nfulnl_msg_config_mode *params; 756 struct nfulnl_msg_config_mode *params;
823 params = nla_data(nfula[NFULA_CFG_MODE]); 757 params = nla_data(nfula[NFULA_CFG_MODE]);
824 758
759 if (!inst) {
760 ret = -ENODEV;
761 goto out;
762 }
825 nfulnl_set_mode(inst, params->copy_mode, 763 nfulnl_set_mode(inst, params->copy_mode,
826 ntohl(params->copy_range)); 764 ntohl(params->copy_range));
827 } 765 }
828 766
829 if (nfula[NFULA_CFG_TIMEOUT]) { 767 if (nfula[NFULA_CFG_TIMEOUT]) {
830 __be32 timeout = 768 __be32 timeout = nla_get_be32(nfula[NFULA_CFG_TIMEOUT]);
831 *(__be32 *)nla_data(nfula[NFULA_CFG_TIMEOUT]);
832 769
770 if (!inst) {
771 ret = -ENODEV;
772 goto out;
773 }
833 nfulnl_set_timeout(inst, ntohl(timeout)); 774 nfulnl_set_timeout(inst, ntohl(timeout));
834 } 775 }
835 776
836 if (nfula[NFULA_CFG_NLBUFSIZ]) { 777 if (nfula[NFULA_CFG_NLBUFSIZ]) {
837 __be32 nlbufsiz = 778 __be32 nlbufsiz = nla_get_be32(nfula[NFULA_CFG_NLBUFSIZ]);
838 *(__be32 *)nla_data(nfula[NFULA_CFG_NLBUFSIZ]);
839 779
780 if (!inst) {
781 ret = -ENODEV;
782 goto out;
783 }
840 nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz)); 784 nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz));
841 } 785 }
842 786
843 if (nfula[NFULA_CFG_QTHRESH]) { 787 if (nfula[NFULA_CFG_QTHRESH]) {
844 __be32 qthresh = 788 __be32 qthresh = nla_get_be32(nfula[NFULA_CFG_QTHRESH]);
845 *(__be32 *)nla_data(nfula[NFULA_CFG_QTHRESH]);
846 789
790 if (!inst) {
791 ret = -ENODEV;
792 goto out;
793 }
847 nfulnl_set_qthresh(inst, ntohl(qthresh)); 794 nfulnl_set_qthresh(inst, ntohl(qthresh));
848 } 795 }
849 796
850 if (nfula[NFULA_CFG_FLAGS]) { 797 if (nfula[NFULA_CFG_FLAGS]) {
851 __be16 flags = 798 __be16 flags = nla_get_be16(nfula[NFULA_CFG_FLAGS]);
852 *(__be16 *)nla_data(nfula[NFULA_CFG_FLAGS]); 799
800 if (!inst) {
801 ret = -ENODEV;
802 goto out;
803 }
853 nfulnl_set_flags(inst, ntohs(flags)); 804 nfulnl_set_flags(inst, ntohs(flags));
854 } 805 }
855 806
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 3ceeffcf6f9d..51476f82bb54 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -3,6 +3,7 @@
3 * userspace via nfetlink. 3 * userspace via nfetlink.
4 * 4 *
5 * (C) 2005 by Harald Welte <laforge@netfilter.org> 5 * (C) 2005 by Harald Welte <laforge@netfilter.org>
6 * (C) 2007 by Patrick McHardy <kaber@trash.net>
6 * 7 *
7 * Based on the old ipv4-only ip_queue.c: 8 * Based on the old ipv4-only ip_queue.c:
8 * (C) 2000-2002 James Morris <jmorris@intercode.com.au> 9 * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
@@ -27,6 +28,7 @@
27#include <linux/netfilter/nfnetlink_queue.h> 28#include <linux/netfilter/nfnetlink_queue.h>
28#include <linux/list.h> 29#include <linux/list.h>
29#include <net/sock.h> 30#include <net/sock.h>
31#include <net/netfilter/nf_queue.h>
30 32
31#include <asm/atomic.h> 33#include <asm/atomic.h>
32 34
@@ -36,24 +38,9 @@
36 38
37#define NFQNL_QMAX_DEFAULT 1024 39#define NFQNL_QMAX_DEFAULT 1024
38 40
39#if 0
40#define QDEBUG(x, args ...) printk(KERN_DEBUG "%s(%d):%s(): " x, \
41 __FILE__, __LINE__, __FUNCTION__, \
42 ## args)
43#else
44#define QDEBUG(x, ...)
45#endif
46
47struct nfqnl_queue_entry {
48 struct list_head list;
49 struct nf_info *info;
50 struct sk_buff *skb;
51 unsigned int id;
52};
53
54struct nfqnl_instance { 41struct nfqnl_instance {
55 struct hlist_node hlist; /* global list of queues */ 42 struct hlist_node hlist; /* global list of queues */
56 atomic_t use; 43 struct rcu_head rcu;
57 44
58 int peer_pid; 45 int peer_pid;
59 unsigned int queue_maxlen; 46 unsigned int queue_maxlen;
@@ -62,7 +49,7 @@ struct nfqnl_instance {
62 unsigned int queue_dropped; 49 unsigned int queue_dropped;
63 unsigned int queue_user_dropped; 50 unsigned int queue_user_dropped;
64 51
65 atomic_t id_sequence; /* 'sequence' of pkt ids */ 52 unsigned int id_sequence; /* 'sequence' of pkt ids */
66 53
67 u_int16_t queue_num; /* number of this queue */ 54 u_int16_t queue_num; /* number of this queue */
68 u_int8_t copy_mode; 55 u_int8_t copy_mode;
@@ -72,12 +59,12 @@ struct nfqnl_instance {
72 struct list_head queue_list; /* packets in queue */ 59 struct list_head queue_list; /* packets in queue */
73}; 60};
74 61
75typedef int (*nfqnl_cmpfn)(struct nfqnl_queue_entry *, unsigned long); 62typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
76 63
77static DEFINE_RWLOCK(instances_lock); 64static DEFINE_SPINLOCK(instances_lock);
78 65
79#define INSTANCE_BUCKETS 16 66#define INSTANCE_BUCKETS 16
80static struct hlist_head instance_table[INSTANCE_BUCKETS]; 67static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly;
81 68
82static inline u_int8_t instance_hashfn(u_int16_t queue_num) 69static inline u_int8_t instance_hashfn(u_int16_t queue_num)
83{ 70{
@@ -85,14 +72,14 @@ static inline u_int8_t instance_hashfn(u_int16_t queue_num)
85} 72}
86 73
87static struct nfqnl_instance * 74static struct nfqnl_instance *
88__instance_lookup(u_int16_t queue_num) 75instance_lookup(u_int16_t queue_num)
89{ 76{
90 struct hlist_head *head; 77 struct hlist_head *head;
91 struct hlist_node *pos; 78 struct hlist_node *pos;
92 struct nfqnl_instance *inst; 79 struct nfqnl_instance *inst;
93 80
94 head = &instance_table[instance_hashfn(queue_num)]; 81 head = &instance_table[instance_hashfn(queue_num)];
95 hlist_for_each_entry(inst, pos, head, hlist) { 82 hlist_for_each_entry_rcu(inst, pos, head, hlist) {
96 if (inst->queue_num == queue_num) 83 if (inst->queue_num == queue_num)
97 return inst; 84 return inst;
98 } 85 }
@@ -100,243 +87,131 @@ __instance_lookup(u_int16_t queue_num)
100} 87}
101 88
102static struct nfqnl_instance * 89static struct nfqnl_instance *
103instance_lookup_get(u_int16_t queue_num)
104{
105 struct nfqnl_instance *inst;
106
107 read_lock_bh(&instances_lock);
108 inst = __instance_lookup(queue_num);
109 if (inst)
110 atomic_inc(&inst->use);
111 read_unlock_bh(&instances_lock);
112
113 return inst;
114}
115
116static void
117instance_put(struct nfqnl_instance *inst)
118{
119 if (inst && atomic_dec_and_test(&inst->use)) {
120 QDEBUG("kfree(inst=%p)\n", inst);
121 kfree(inst);
122 }
123}
124
125static struct nfqnl_instance *
126instance_create(u_int16_t queue_num, int pid) 90instance_create(u_int16_t queue_num, int pid)
127{ 91{
128 struct nfqnl_instance *inst; 92 struct nfqnl_instance *inst;
93 unsigned int h;
94 int err;
129 95
130 QDEBUG("entering for queue_num=%u, pid=%d\n", queue_num, pid); 96 spin_lock(&instances_lock);
131 97 if (instance_lookup(queue_num)) {
132 write_lock_bh(&instances_lock); 98 err = -EEXIST;
133 if (__instance_lookup(queue_num)) {
134 inst = NULL;
135 QDEBUG("aborting, instance already exists\n");
136 goto out_unlock; 99 goto out_unlock;
137 } 100 }
138 101
139 inst = kzalloc(sizeof(*inst), GFP_ATOMIC); 102 inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
140 if (!inst) 103 if (!inst) {
104 err = -ENOMEM;
141 goto out_unlock; 105 goto out_unlock;
106 }
142 107
143 inst->queue_num = queue_num; 108 inst->queue_num = queue_num;
144 inst->peer_pid = pid; 109 inst->peer_pid = pid;
145 inst->queue_maxlen = NFQNL_QMAX_DEFAULT; 110 inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
146 inst->copy_range = 0xfffff; 111 inst->copy_range = 0xfffff;
147 inst->copy_mode = NFQNL_COPY_NONE; 112 inst->copy_mode = NFQNL_COPY_NONE;
148 atomic_set(&inst->id_sequence, 0);
149 /* needs to be two, since we _put() after creation */
150 atomic_set(&inst->use, 2);
151 spin_lock_init(&inst->lock); 113 spin_lock_init(&inst->lock);
152 INIT_LIST_HEAD(&inst->queue_list); 114 INIT_LIST_HEAD(&inst->queue_list);
115 INIT_RCU_HEAD(&inst->rcu);
153 116
154 if (!try_module_get(THIS_MODULE)) 117 if (!try_module_get(THIS_MODULE)) {
118 err = -EAGAIN;
155 goto out_free; 119 goto out_free;
120 }
156 121
157 hlist_add_head(&inst->hlist, 122 h = instance_hashfn(queue_num);
158 &instance_table[instance_hashfn(queue_num)]); 123 hlist_add_head_rcu(&inst->hlist, &instance_table[h]);
159
160 write_unlock_bh(&instances_lock);
161 124
162 QDEBUG("successfully created new instance\n"); 125 spin_unlock(&instances_lock);
163 126
164 return inst; 127 return inst;
165 128
166out_free: 129out_free:
167 kfree(inst); 130 kfree(inst);
168out_unlock: 131out_unlock:
169 write_unlock_bh(&instances_lock); 132 spin_unlock(&instances_lock);
170 return NULL; 133 return ERR_PTR(err);
171} 134}
172 135
173static void nfqnl_flush(struct nfqnl_instance *queue, int verdict); 136static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
137 unsigned long data);
174 138
175static void 139static void
176_instance_destroy2(struct nfqnl_instance *inst, int lock) 140instance_destroy_rcu(struct rcu_head *head)
177{ 141{
178 /* first pull it out of the global list */ 142 struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance,
179 if (lock) 143 rcu);
180 write_lock_bh(&instances_lock);
181
182 QDEBUG("removing instance %p (queuenum=%u) from hash\n",
183 inst, inst->queue_num);
184 hlist_del(&inst->hlist);
185
186 if (lock)
187 write_unlock_bh(&instances_lock);
188
189 /* then flush all pending skbs from the queue */
190 nfqnl_flush(inst, NF_DROP);
191
192 /* and finally put the refcount */
193 instance_put(inst);
194 144
145 nfqnl_flush(inst, NULL, 0);
146 kfree(inst);
195 module_put(THIS_MODULE); 147 module_put(THIS_MODULE);
196} 148}
197 149
198static inline void 150static void
199__instance_destroy(struct nfqnl_instance *inst) 151__instance_destroy(struct nfqnl_instance *inst)
200{ 152{
201 _instance_destroy2(inst, 0); 153 hlist_del_rcu(&inst->hlist);
154 call_rcu(&inst->rcu, instance_destroy_rcu);
202} 155}
203 156
204static inline void
205instance_destroy(struct nfqnl_instance *inst)
206{
207 _instance_destroy2(inst, 1);
208}
209
210
211
212static void 157static void
213issue_verdict(struct nfqnl_queue_entry *entry, int verdict) 158instance_destroy(struct nfqnl_instance *inst)
214{ 159{
215 QDEBUG("entering for entry %p, verdict %u\n", entry, verdict); 160 spin_lock(&instances_lock);
216 161 __instance_destroy(inst);
217 /* TCP input path (and probably other bits) assume to be called 162 spin_unlock(&instances_lock);
218 * from softirq context, not from syscall, like issue_verdict is
219 * called. TCP input path deadlocks with locks taken from timer
220 * softirq, e.g. We therefore emulate this by local_bh_disable() */
221
222 local_bh_disable();
223 nf_reinject(entry->skb, entry->info, verdict);
224 local_bh_enable();
225
226 kfree(entry);
227} 163}
228 164
229static inline void 165static inline void
230__enqueue_entry(struct nfqnl_instance *queue, 166__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
231 struct nfqnl_queue_entry *entry)
232{ 167{
233 list_add(&entry->list, &queue->queue_list); 168 list_add_tail(&entry->list, &queue->queue_list);
234 queue->queue_total++; 169 queue->queue_total++;
235} 170}
236 171
237/* 172static struct nf_queue_entry *
238 * Find and return a queued entry matched by cmpfn, or return the last 173find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
239 * entry if cmpfn is NULL.
240 */
241static inline struct nfqnl_queue_entry *
242__find_entry(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
243 unsigned long data)
244{ 174{
245 struct list_head *p; 175 struct nf_queue_entry *entry = NULL, *i;
246 176
247 list_for_each_prev(p, &queue->queue_list) { 177 spin_lock_bh(&queue->lock);
248 struct nfqnl_queue_entry *entry = (struct nfqnl_queue_entry *)p;
249 178
250 if (!cmpfn || cmpfn(entry, data)) 179 list_for_each_entry(i, &queue->queue_list, list) {
251 return entry; 180 if (i->id == id) {
181 entry = i;
182 break;
183 }
252 } 184 }
253 return NULL;
254}
255
256static inline void
257__dequeue_entry(struct nfqnl_instance *q, struct nfqnl_queue_entry *entry)
258{
259 list_del(&entry->list);
260 q->queue_total--;
261}
262
263static inline struct nfqnl_queue_entry *
264__find_dequeue_entry(struct nfqnl_instance *queue,
265 nfqnl_cmpfn cmpfn, unsigned long data)
266{
267 struct nfqnl_queue_entry *entry;
268
269 entry = __find_entry(queue, cmpfn, data);
270 if (entry == NULL)
271 return NULL;
272
273 __dequeue_entry(queue, entry);
274 return entry;
275}
276
277
278static inline void
279__nfqnl_flush(struct nfqnl_instance *queue, int verdict)
280{
281 struct nfqnl_queue_entry *entry;
282
283 while ((entry = __find_dequeue_entry(queue, NULL, 0)))
284 issue_verdict(entry, verdict);
285}
286
287static inline int
288__nfqnl_set_mode(struct nfqnl_instance *queue,
289 unsigned char mode, unsigned int range)
290{
291 int status = 0;
292
293 switch (mode) {
294 case NFQNL_COPY_NONE:
295 case NFQNL_COPY_META:
296 queue->copy_mode = mode;
297 queue->copy_range = 0;
298 break;
299
300 case NFQNL_COPY_PACKET:
301 queue->copy_mode = mode;
302 /* we're using struct nlattr which has 16bit nla_len */
303 if (range > 0xffff)
304 queue->copy_range = 0xffff;
305 else
306 queue->copy_range = range;
307 break;
308
309 default:
310 status = -EINVAL;
311 185
186 if (entry) {
187 list_del(&entry->list);
188 queue->queue_total--;
312 } 189 }
313 return status;
314}
315 190
316static struct nfqnl_queue_entry *
317find_dequeue_entry(struct nfqnl_instance *queue,
318 nfqnl_cmpfn cmpfn, unsigned long data)
319{
320 struct nfqnl_queue_entry *entry;
321
322 spin_lock_bh(&queue->lock);
323 entry = __find_dequeue_entry(queue, cmpfn, data);
324 spin_unlock_bh(&queue->lock); 191 spin_unlock_bh(&queue->lock);
325 192
326 return entry; 193 return entry;
327} 194}
328 195
329static void 196static void
330nfqnl_flush(struct nfqnl_instance *queue, int verdict) 197nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
331{ 198{
199 struct nf_queue_entry *entry, *next;
200
332 spin_lock_bh(&queue->lock); 201 spin_lock_bh(&queue->lock);
333 __nfqnl_flush(queue, verdict); 202 list_for_each_entry_safe(entry, next, &queue->queue_list, list) {
203 if (!cmpfn || cmpfn(entry, data)) {
204 list_del(&entry->list);
205 queue->queue_total--;
206 nf_reinject(entry, NF_DROP);
207 }
208 }
334 spin_unlock_bh(&queue->lock); 209 spin_unlock_bh(&queue->lock);
335} 210}
336 211
337static struct sk_buff * 212static struct sk_buff *
338nfqnl_build_packet_message(struct nfqnl_instance *queue, 213nfqnl_build_packet_message(struct nfqnl_instance *queue,
339 struct nfqnl_queue_entry *entry, int *errp) 214 struct nf_queue_entry *entry)
340{ 215{
341 sk_buff_data_t old_tail; 216 sk_buff_data_t old_tail;
342 size_t size; 217 size_t size;
@@ -345,13 +220,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
345 struct nfqnl_msg_packet_hdr pmsg; 220 struct nfqnl_msg_packet_hdr pmsg;
346 struct nlmsghdr *nlh; 221 struct nlmsghdr *nlh;
347 struct nfgenmsg *nfmsg; 222 struct nfgenmsg *nfmsg;
348 struct nf_info *entinf = entry->info;
349 struct sk_buff *entskb = entry->skb; 223 struct sk_buff *entskb = entry->skb;
350 struct net_device *indev; 224 struct net_device *indev;
351 struct net_device *outdev; 225 struct net_device *outdev;
352 __be32 tmp_uint;
353
354 QDEBUG("entered\n");
355 226
356 size = NLMSG_ALIGN(sizeof(struct nfgenmsg)) 227 size = NLMSG_ALIGN(sizeof(struct nfgenmsg))
357 + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) 228 + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
@@ -365,11 +236,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
365 + nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) 236 + nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
366 + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); 237 + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
367 238
368 outdev = entinf->outdev; 239 outdev = entry->outdev;
369 240
370 spin_lock_bh(&queue->lock); 241 spin_lock_bh(&queue->lock);
371 242
372 switch (queue->copy_mode) { 243 switch ((enum nfqnl_config_mode)queue->copy_mode) {
373 case NFQNL_COPY_META: 244 case NFQNL_COPY_META:
374 case NFQNL_COPY_NONE: 245 case NFQNL_COPY_NONE:
375 data_len = 0; 246 data_len = 0;
@@ -378,7 +249,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
378 case NFQNL_COPY_PACKET: 249 case NFQNL_COPY_PACKET:
379 if ((entskb->ip_summed == CHECKSUM_PARTIAL || 250 if ((entskb->ip_summed == CHECKSUM_PARTIAL ||
380 entskb->ip_summed == CHECKSUM_COMPLETE) && 251 entskb->ip_summed == CHECKSUM_COMPLETE) &&
381 (*errp = skb_checksum_help(entskb))) { 252 skb_checksum_help(entskb)) {
382 spin_unlock_bh(&queue->lock); 253 spin_unlock_bh(&queue->lock);
383 return NULL; 254 return NULL;
384 } 255 }
@@ -390,13 +261,10 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
390 261
391 size += nla_total_size(data_len); 262 size += nla_total_size(data_len);
392 break; 263 break;
393
394 default:
395 *errp = -EINVAL;
396 spin_unlock_bh(&queue->lock);
397 return NULL;
398 } 264 }
399 265
266 entry->id = queue->id_sequence++;
267
400 spin_unlock_bh(&queue->lock); 268 spin_unlock_bh(&queue->lock);
401 269
402 skb = alloc_skb(size, GFP_ATOMIC); 270 skb = alloc_skb(size, GFP_ATOMIC);
@@ -408,81 +276,69 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
408 NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, 276 NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
409 sizeof(struct nfgenmsg)); 277 sizeof(struct nfgenmsg));
410 nfmsg = NLMSG_DATA(nlh); 278 nfmsg = NLMSG_DATA(nlh);
411 nfmsg->nfgen_family = entinf->pf; 279 nfmsg->nfgen_family = entry->pf;
412 nfmsg->version = NFNETLINK_V0; 280 nfmsg->version = NFNETLINK_V0;
413 nfmsg->res_id = htons(queue->queue_num); 281 nfmsg->res_id = htons(queue->queue_num);
414 282
415 pmsg.packet_id = htonl(entry->id); 283 pmsg.packet_id = htonl(entry->id);
416 pmsg.hw_protocol = entskb->protocol; 284 pmsg.hw_protocol = entskb->protocol;
417 pmsg.hook = entinf->hook; 285 pmsg.hook = entry->hook;
418 286
419 NLA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg); 287 NLA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg);
420 288
421 indev = entinf->indev; 289 indev = entry->indev;
422 if (indev) { 290 if (indev) {
423 tmp_uint = htonl(indev->ifindex);
424#ifndef CONFIG_BRIDGE_NETFILTER 291#ifndef CONFIG_BRIDGE_NETFILTER
425 NLA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); 292 NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex));
426#else 293#else
427 if (entinf->pf == PF_BRIDGE) { 294 if (entry->pf == PF_BRIDGE) {
428 /* Case 1: indev is physical input device, we need to 295 /* Case 1: indev is physical input device, we need to
429 * look for bridge group (when called from 296 * look for bridge group (when called from
430 * netfilter_bridge) */ 297 * netfilter_bridge) */
431 NLA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, sizeof(tmp_uint), 298 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV,
432 &tmp_uint); 299 htonl(indev->ifindex));
433 /* this is the bridge group "brX" */ 300 /* this is the bridge group "brX" */
434 tmp_uint = htonl(indev->br_port->br->dev->ifindex); 301 NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV,
435 NLA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), 302 htonl(indev->br_port->br->dev->ifindex));
436 &tmp_uint);
437 } else { 303 } else {
438 /* Case 2: indev is bridge group, we need to look for 304 /* Case 2: indev is bridge group, we need to look for
439 * physical device (when called from ipv4) */ 305 * physical device (when called from ipv4) */
440 NLA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), 306 NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV,
441 &tmp_uint); 307 htonl(indev->ifindex));
442 if (entskb->nf_bridge 308 if (entskb->nf_bridge && entskb->nf_bridge->physindev)
443 && entskb->nf_bridge->physindev) { 309 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV,
444 tmp_uint = htonl(entskb->nf_bridge->physindev->ifindex); 310 htonl(entskb->nf_bridge->physindev->ifindex));
445 NLA_PUT(skb, NFQA_IFINDEX_PHYSINDEV,
446 sizeof(tmp_uint), &tmp_uint);
447 }
448 } 311 }
449#endif 312#endif
450 } 313 }
451 314
452 if (outdev) { 315 if (outdev) {
453 tmp_uint = htonl(outdev->ifindex);
454#ifndef CONFIG_BRIDGE_NETFILTER 316#ifndef CONFIG_BRIDGE_NETFILTER
455 NLA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); 317 NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex));
456#else 318#else
457 if (entinf->pf == PF_BRIDGE) { 319 if (entry->pf == PF_BRIDGE) {
458 /* Case 1: outdev is physical output device, we need to 320 /* Case 1: outdev is physical output device, we need to
459 * look for bridge group (when called from 321 * look for bridge group (when called from
460 * netfilter_bridge) */ 322 * netfilter_bridge) */
461 NLA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint), 323 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV,
462 &tmp_uint); 324 htonl(outdev->ifindex));
463 /* this is the bridge group "brX" */ 325 /* this is the bridge group "brX" */
464 tmp_uint = htonl(outdev->br_port->br->dev->ifindex); 326 NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV,
465 NLA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), 327 htonl(outdev->br_port->br->dev->ifindex));
466 &tmp_uint);
467 } else { 328 } else {
468 /* Case 2: outdev is bridge group, we need to look for 329 /* Case 2: outdev is bridge group, we need to look for
469 * physical output device (when called from ipv4) */ 330 * physical output device (when called from ipv4) */
470 NLA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), 331 NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV,
471 &tmp_uint); 332 htonl(outdev->ifindex));
472 if (entskb->nf_bridge 333 if (entskb->nf_bridge && entskb->nf_bridge->physoutdev)
473 && entskb->nf_bridge->physoutdev) { 334 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV,
474 tmp_uint = htonl(entskb->nf_bridge->physoutdev->ifindex); 335 htonl(entskb->nf_bridge->physoutdev->ifindex));
475 NLA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV,
476 sizeof(tmp_uint), &tmp_uint);
477 }
478 } 336 }
479#endif 337#endif
480 } 338 }
481 339
482 if (entskb->mark) { 340 if (entskb->mark)
483 tmp_uint = htonl(entskb->mark); 341 NLA_PUT_BE32(skb, NFQA_MARK, htonl(entskb->mark));
484 NLA_PUT(skb, NFQA_MARK, sizeof(u_int32_t), &tmp_uint);
485 }
486 342
487 if (indev && entskb->dev) { 343 if (indev && entskb->dev) {
488 struct nfqnl_msg_packet_hw phw; 344 struct nfqnl_msg_packet_hw phw;
@@ -526,51 +382,29 @@ nlmsg_failure:
526nla_put_failure: 382nla_put_failure:
527 if (skb) 383 if (skb)
528 kfree_skb(skb); 384 kfree_skb(skb);
529 *errp = -EINVAL;
530 if (net_ratelimit()) 385 if (net_ratelimit())
531 printk(KERN_ERR "nf_queue: error creating packet message\n"); 386 printk(KERN_ERR "nf_queue: error creating packet message\n");
532 return NULL; 387 return NULL;
533} 388}
534 389
535static int 390static int
536nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info, 391nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
537 unsigned int queuenum, void *data)
538{ 392{
539 int status = -EINVAL;
540 struct sk_buff *nskb; 393 struct sk_buff *nskb;
541 struct nfqnl_instance *queue; 394 struct nfqnl_instance *queue;
542 struct nfqnl_queue_entry *entry; 395 int err;
543
544 QDEBUG("entered\n");
545
546 queue = instance_lookup_get(queuenum);
547 if (!queue) {
548 QDEBUG("no queue instance matching\n");
549 return -EINVAL;
550 }
551
552 if (queue->copy_mode == NFQNL_COPY_NONE) {
553 QDEBUG("mode COPY_NONE, aborting\n");
554 status = -EAGAIN;
555 goto err_out_put;
556 }
557 396
558 entry = kmalloc(sizeof(*entry), GFP_ATOMIC); 397 /* rcu_read_lock()ed by nf_hook_slow() */
559 if (entry == NULL) { 398 queue = instance_lookup(queuenum);
560 if (net_ratelimit()) 399 if (!queue)
561 printk(KERN_ERR 400 goto err_out;
562 "nf_queue: OOM in nfqnl_enqueue_packet()\n");
563 status = -ENOMEM;
564 goto err_out_put;
565 }
566 401
567 entry->info = info; 402 if (queue->copy_mode == NFQNL_COPY_NONE)
568 entry->skb = skb; 403 goto err_out;
569 entry->id = atomic_inc_return(&queue->id_sequence);
570 404
571 nskb = nfqnl_build_packet_message(queue, entry, &status); 405 nskb = nfqnl_build_packet_message(queue, entry);
572 if (nskb == NULL) 406 if (nskb == NULL)
573 goto err_out_free; 407 goto err_out;
574 408
575 spin_lock_bh(&queue->lock); 409 spin_lock_bh(&queue->lock);
576 410
@@ -579,7 +413,6 @@ nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info,
579 413
580 if (queue->queue_total >= queue->queue_maxlen) { 414 if (queue->queue_total >= queue->queue_maxlen) {
581 queue->queue_dropped++; 415 queue->queue_dropped++;
582 status = -ENOSPC;
583 if (net_ratelimit()) 416 if (net_ratelimit())
584 printk(KERN_WARNING "nf_queue: full at %d entries, " 417 printk(KERN_WARNING "nf_queue: full at %d entries, "
585 "dropping packets(s). Dropped: %d\n", 418 "dropping packets(s). Dropped: %d\n",
@@ -588,8 +421,8 @@ nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info,
588 } 421 }
589 422
590 /* nfnetlink_unicast will either free the nskb or add it to a socket */ 423 /* nfnetlink_unicast will either free the nskb or add it to a socket */
591 status = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT); 424 err = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT);
592 if (status < 0) { 425 if (err < 0) {
593 queue->queue_user_dropped++; 426 queue->queue_user_dropped++;
594 goto err_out_unlock; 427 goto err_out_unlock;
595 } 428 }
@@ -597,24 +430,18 @@ nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info,
597 __enqueue_entry(queue, entry); 430 __enqueue_entry(queue, entry);
598 431
599 spin_unlock_bh(&queue->lock); 432 spin_unlock_bh(&queue->lock);
600 instance_put(queue); 433 return 0;
601 return status;
602 434
603err_out_free_nskb: 435err_out_free_nskb:
604 kfree_skb(nskb); 436 kfree_skb(nskb);
605
606err_out_unlock: 437err_out_unlock:
607 spin_unlock_bh(&queue->lock); 438 spin_unlock_bh(&queue->lock);
608 439err_out:
609err_out_free: 440 return -1;
610 kfree(entry);
611err_out_put:
612 instance_put(queue);
613 return status;
614} 441}
615 442
616static int 443static int
617nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e) 444nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e)
618{ 445{
619 int diff; 446 int diff;
620 int err; 447 int err;
@@ -645,35 +472,46 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e)
645 return 0; 472 return 0;
646} 473}
647 474
648static inline int
649id_cmp(struct nfqnl_queue_entry *e, unsigned long id)
650{
651 return (id == e->id);
652}
653
654static int 475static int
655nfqnl_set_mode(struct nfqnl_instance *queue, 476nfqnl_set_mode(struct nfqnl_instance *queue,
656 unsigned char mode, unsigned int range) 477 unsigned char mode, unsigned int range)
657{ 478{
658 int status; 479 int status = 0;
659 480
660 spin_lock_bh(&queue->lock); 481 spin_lock_bh(&queue->lock);
661 status = __nfqnl_set_mode(queue, mode, range); 482 switch (mode) {
483 case NFQNL_COPY_NONE:
484 case NFQNL_COPY_META:
485 queue->copy_mode = mode;
486 queue->copy_range = 0;
487 break;
488
489 case NFQNL_COPY_PACKET:
490 queue->copy_mode = mode;
491 /* we're using struct nlattr which has 16bit nla_len */
492 if (range > 0xffff)
493 queue->copy_range = 0xffff;
494 else
495 queue->copy_range = range;
496 break;
497
498 default:
499 status = -EINVAL;
500
501 }
662 spin_unlock_bh(&queue->lock); 502 spin_unlock_bh(&queue->lock);
663 503
664 return status; 504 return status;
665} 505}
666 506
667static int 507static int
668dev_cmp(struct nfqnl_queue_entry *entry, unsigned long ifindex) 508dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
669{ 509{
670 struct nf_info *entinf = entry->info; 510 if (entry->indev)
671 511 if (entry->indev->ifindex == ifindex)
672 if (entinf->indev)
673 if (entinf->indev->ifindex == ifindex)
674 return 1; 512 return 1;
675 if (entinf->outdev) 513 if (entry->outdev)
676 if (entinf->outdev->ifindex == ifindex) 514 if (entry->outdev->ifindex == ifindex)
677 return 1; 515 return 1;
678#ifdef CONFIG_BRIDGE_NETFILTER 516#ifdef CONFIG_BRIDGE_NETFILTER
679 if (entry->skb->nf_bridge) { 517 if (entry->skb->nf_bridge) {
@@ -695,27 +533,18 @@ nfqnl_dev_drop(int ifindex)
695{ 533{
696 int i; 534 int i;
697 535
698 QDEBUG("entering for ifindex %u\n", ifindex); 536 rcu_read_lock();
699
700 /* this only looks like we have to hold the readlock for a way too long
701 * time, issue_verdict(), nf_reinject(), ... - but we always only
702 * issue NF_DROP, which is processed directly in nf_reinject() */
703 read_lock_bh(&instances_lock);
704 537
705 for (i = 0; i < INSTANCE_BUCKETS; i++) { 538 for (i = 0; i < INSTANCE_BUCKETS; i++) {
706 struct hlist_node *tmp; 539 struct hlist_node *tmp;
707 struct nfqnl_instance *inst; 540 struct nfqnl_instance *inst;
708 struct hlist_head *head = &instance_table[i]; 541 struct hlist_head *head = &instance_table[i];
709 542
710 hlist_for_each_entry(inst, tmp, head, hlist) { 543 hlist_for_each_entry_rcu(inst, tmp, head, hlist)
711 struct nfqnl_queue_entry *entry; 544 nfqnl_flush(inst, dev_cmp, ifindex);
712 while ((entry = find_dequeue_entry(inst, dev_cmp,
713 ifindex)) != NULL)
714 issue_verdict(entry, NF_DROP);
715 }
716 } 545 }
717 546
718 read_unlock_bh(&instances_lock); 547 rcu_read_unlock();
719} 548}
720 549
721#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) 550#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
@@ -750,8 +579,8 @@ nfqnl_rcv_nl_event(struct notifier_block *this,
750 int i; 579 int i;
751 580
752 /* destroy all instances for this pid */ 581 /* destroy all instances for this pid */
753 write_lock_bh(&instances_lock); 582 spin_lock(&instances_lock);
754 for (i = 0; i < INSTANCE_BUCKETS; i++) { 583 for (i = 0; i < INSTANCE_BUCKETS; i++) {
755 struct hlist_node *tmp, *t2; 584 struct hlist_node *tmp, *t2;
756 struct nfqnl_instance *inst; 585 struct nfqnl_instance *inst;
757 struct hlist_head *head = &instance_table[i]; 586 struct hlist_head *head = &instance_table[i];
@@ -762,7 +591,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this,
762 __instance_destroy(inst); 591 __instance_destroy(inst);
763 } 592 }
764 } 593 }
765 write_unlock_bh(&instances_lock); 594 spin_unlock(&instances_lock);
766 } 595 }
767 return NOTIFY_DONE; 596 return NOTIFY_DONE;
768} 597}
@@ -787,21 +616,24 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
787 struct nfqnl_msg_verdict_hdr *vhdr; 616 struct nfqnl_msg_verdict_hdr *vhdr;
788 struct nfqnl_instance *queue; 617 struct nfqnl_instance *queue;
789 unsigned int verdict; 618 unsigned int verdict;
790 struct nfqnl_queue_entry *entry; 619 struct nf_queue_entry *entry;
791 int err; 620 int err;
792 621
793 queue = instance_lookup_get(queue_num); 622 rcu_read_lock();
794 if (!queue) 623 queue = instance_lookup(queue_num);
795 return -ENODEV; 624 if (!queue) {
625 err = -ENODEV;
626 goto err_out_unlock;
627 }
796 628
797 if (queue->peer_pid != NETLINK_CB(skb).pid) { 629 if (queue->peer_pid != NETLINK_CB(skb).pid) {
798 err = -EPERM; 630 err = -EPERM;
799 goto err_out_put; 631 goto err_out_unlock;
800 } 632 }
801 633
802 if (!nfqa[NFQA_VERDICT_HDR]) { 634 if (!nfqa[NFQA_VERDICT_HDR]) {
803 err = -EINVAL; 635 err = -EINVAL;
804 goto err_out_put; 636 goto err_out_unlock;
805 } 637 }
806 638
807 vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]); 639 vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]);
@@ -809,14 +641,15 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
809 641
810 if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) { 642 if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) {
811 err = -EINVAL; 643 err = -EINVAL;
812 goto err_out_put; 644 goto err_out_unlock;
813 } 645 }
814 646
815 entry = find_dequeue_entry(queue, id_cmp, ntohl(vhdr->id)); 647 entry = find_dequeue_entry(queue, ntohl(vhdr->id));
816 if (entry == NULL) { 648 if (entry == NULL) {
817 err = -ENOENT; 649 err = -ENOENT;
818 goto err_out_put; 650 goto err_out_unlock;
819 } 651 }
652 rcu_read_unlock();
820 653
821 if (nfqa[NFQA_PAYLOAD]) { 654 if (nfqa[NFQA_PAYLOAD]) {
822 if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]), 655 if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]),
@@ -825,15 +658,13 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
825 } 658 }
826 659
827 if (nfqa[NFQA_MARK]) 660 if (nfqa[NFQA_MARK])
828 entry->skb->mark = ntohl(*(__be32 *) 661 entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
829 nla_data(nfqa[NFQA_MARK]));
830 662
831 issue_verdict(entry, verdict); 663 nf_reinject(entry, verdict);
832 instance_put(queue);
833 return 0; 664 return 0;
834 665
835err_out_put: 666err_out_unlock:
836 instance_put(queue); 667 rcu_read_unlock();
837 return err; 668 return err;
838} 669}
839 670
@@ -849,7 +680,7 @@ static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
849 [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) }, 680 [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) },
850}; 681};
851 682
852static struct nf_queue_handler nfqh = { 683static const struct nf_queue_handler nfqh = {
853 .name = "nf_queue", 684 .name = "nf_queue",
854 .outfn = &nfqnl_enqueue_packet, 685 .outfn = &nfqnl_enqueue_packet,
855}; 686};
@@ -861,70 +692,72 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
861 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); 692 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
862 u_int16_t queue_num = ntohs(nfmsg->res_id); 693 u_int16_t queue_num = ntohs(nfmsg->res_id);
863 struct nfqnl_instance *queue; 694 struct nfqnl_instance *queue;
695 struct nfqnl_msg_config_cmd *cmd = NULL;
864 int ret = 0; 696 int ret = 0;
865 697
866 QDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type));
867
868 queue = instance_lookup_get(queue_num);
869 if (nfqa[NFQA_CFG_CMD]) { 698 if (nfqa[NFQA_CFG_CMD]) {
870 struct nfqnl_msg_config_cmd *cmd;
871 cmd = nla_data(nfqa[NFQA_CFG_CMD]); 699 cmd = nla_data(nfqa[NFQA_CFG_CMD]);
872 QDEBUG("found CFG_CMD\n");
873 700
701 /* Commands without queue context - might sleep */
874 switch (cmd->command) { 702 switch (cmd->command) {
875 case NFQNL_CFG_CMD_BIND: 703 case NFQNL_CFG_CMD_PF_BIND:
876 if (queue) 704 ret = nf_register_queue_handler(ntohs(cmd->pf),
877 return -EBUSY; 705 &nfqh);
706 break;
707 case NFQNL_CFG_CMD_PF_UNBIND:
708 ret = nf_unregister_queue_handler(ntohs(cmd->pf),
709 &nfqh);
710 break;
711 default:
712 break;
713 }
714
715 if (ret < 0)
716 return ret;
717 }
718
719 rcu_read_lock();
720 queue = instance_lookup(queue_num);
721 if (queue && queue->peer_pid != NETLINK_CB(skb).pid) {
722 ret = -EPERM;
723 goto err_out_unlock;
724 }
878 725
726 if (cmd != NULL) {
727 switch (cmd->command) {
728 case NFQNL_CFG_CMD_BIND:
729 if (queue) {
730 ret = -EBUSY;
731 goto err_out_unlock;
732 }
879 queue = instance_create(queue_num, NETLINK_CB(skb).pid); 733 queue = instance_create(queue_num, NETLINK_CB(skb).pid);
880 if (!queue) 734 if (IS_ERR(queue)) {
881 return -EINVAL; 735 ret = PTR_ERR(queue);
736 goto err_out_unlock;
737 }
882 break; 738 break;
883 case NFQNL_CFG_CMD_UNBIND: 739 case NFQNL_CFG_CMD_UNBIND:
884 if (!queue) 740 if (!queue) {
885 return -ENODEV; 741 ret = -ENODEV;
886 742 goto err_out_unlock;
887 if (queue->peer_pid != NETLINK_CB(skb).pid) {
888 ret = -EPERM;
889 goto out_put;
890 } 743 }
891
892 instance_destroy(queue); 744 instance_destroy(queue);
893 break; 745 break;
894 case NFQNL_CFG_CMD_PF_BIND: 746 case NFQNL_CFG_CMD_PF_BIND:
895 QDEBUG("registering queue handler for pf=%u\n",
896 ntohs(cmd->pf));
897 ret = nf_register_queue_handler(ntohs(cmd->pf), &nfqh);
898 break;
899 case NFQNL_CFG_CMD_PF_UNBIND: 747 case NFQNL_CFG_CMD_PF_UNBIND:
900 QDEBUG("unregistering queue handler for pf=%u\n",
901 ntohs(cmd->pf));
902 ret = nf_unregister_queue_handler(ntohs(cmd->pf), &nfqh);
903 break; 748 break;
904 default: 749 default:
905 ret = -EINVAL; 750 ret = -ENOTSUPP;
906 break; 751 break;
907 } 752 }
908 } else {
909 if (!queue) {
910 QDEBUG("no config command, and no instance ENOENT\n");
911 ret = -ENOENT;
912 goto out_put;
913 }
914
915 if (queue->peer_pid != NETLINK_CB(skb).pid) {
916 QDEBUG("no config command, and wrong pid\n");
917 ret = -EPERM;
918 goto out_put;
919 }
920 } 753 }
921 754
922 if (nfqa[NFQA_CFG_PARAMS]) { 755 if (nfqa[NFQA_CFG_PARAMS]) {
923 struct nfqnl_msg_config_params *params; 756 struct nfqnl_msg_config_params *params;
924 757
925 if (!queue) { 758 if (!queue) {
926 ret = -ENOENT; 759 ret = -ENODEV;
927 goto out_put; 760 goto err_out_unlock;
928 } 761 }
929 params = nla_data(nfqa[NFQA_CFG_PARAMS]); 762 params = nla_data(nfqa[NFQA_CFG_PARAMS]);
930 nfqnl_set_mode(queue, params->copy_mode, 763 nfqnl_set_mode(queue, params->copy_mode,
@@ -933,14 +766,19 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
933 766
934 if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) { 767 if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) {
935 __be32 *queue_maxlen; 768 __be32 *queue_maxlen;
769
770 if (!queue) {
771 ret = -ENODEV;
772 goto err_out_unlock;
773 }
936 queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]); 774 queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]);
937 spin_lock_bh(&queue->lock); 775 spin_lock_bh(&queue->lock);
938 queue->queue_maxlen = ntohl(*queue_maxlen); 776 queue->queue_maxlen = ntohl(*queue_maxlen);
939 spin_unlock_bh(&queue->lock); 777 spin_unlock_bh(&queue->lock);
940 } 778 }
941 779
942out_put: 780err_out_unlock:
943 instance_put(queue); 781 rcu_read_unlock();
944 return ret; 782 return ret;
945} 783}
946 784
@@ -1008,7 +846,7 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
1008 846
1009static void *seq_start(struct seq_file *seq, loff_t *pos) 847static void *seq_start(struct seq_file *seq, loff_t *pos)
1010{ 848{
1011 read_lock_bh(&instances_lock); 849 spin_lock(&instances_lock);
1012 return get_idx(seq, *pos); 850 return get_idx(seq, *pos);
1013} 851}
1014 852
@@ -1020,7 +858,7 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
1020 858
1021static void seq_stop(struct seq_file *s, void *v) 859static void seq_stop(struct seq_file *s, void *v)
1022{ 860{
1023 read_unlock_bh(&instances_lock); 861 spin_unlock(&instances_lock);
1024} 862}
1025 863
1026static int seq_show(struct seq_file *s, void *v) 864static int seq_show(struct seq_file *s, void *v)
@@ -1032,8 +870,7 @@ static int seq_show(struct seq_file *s, void *v)
1032 inst->peer_pid, inst->queue_total, 870 inst->peer_pid, inst->queue_total,
1033 inst->copy_mode, inst->copy_range, 871 inst->copy_mode, inst->copy_range,
1034 inst->queue_dropped, inst->queue_user_dropped, 872 inst->queue_dropped, inst->queue_user_dropped,
1035 atomic_read(&inst->id_sequence), 873 inst->id_sequence, 1);
1036 atomic_read(&inst->use));
1037} 874}
1038 875
1039static const struct seq_operations nfqnl_seq_ops = { 876static const struct seq_operations nfqnl_seq_ops = {
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index b6160e41eb1c..8d4fca96a4a7 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -34,12 +34,21 @@ MODULE_DESCRIPTION("[ip,ip6,arp]_tables backend module");
34 34
35#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) 35#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
36 36
37struct compat_delta {
38 struct compat_delta *next;
39 unsigned int offset;
40 short delta;
41};
42
37struct xt_af { 43struct xt_af {
38 struct mutex mutex; 44 struct mutex mutex;
39 struct list_head match; 45 struct list_head match;
40 struct list_head target; 46 struct list_head target;
41 struct list_head tables; 47 struct list_head tables;
48#ifdef CONFIG_COMPAT
42 struct mutex compat_mutex; 49 struct mutex compat_mutex;
50 struct compat_delta *compat_offsets;
51#endif
43}; 52};
44 53
45static struct xt_af *xt; 54static struct xt_af *xt;
@@ -335,6 +344,54 @@ int xt_check_match(const struct xt_match *match, unsigned short family,
335EXPORT_SYMBOL_GPL(xt_check_match); 344EXPORT_SYMBOL_GPL(xt_check_match);
336 345
337#ifdef CONFIG_COMPAT 346#ifdef CONFIG_COMPAT
347int xt_compat_add_offset(int af, unsigned int offset, short delta)
348{
349 struct compat_delta *tmp;
350
351 tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL);
352 if (!tmp)
353 return -ENOMEM;
354
355 tmp->offset = offset;
356 tmp->delta = delta;
357
358 if (xt[af].compat_offsets) {
359 tmp->next = xt[af].compat_offsets->next;
360 xt[af].compat_offsets->next = tmp;
361 } else {
362 xt[af].compat_offsets = tmp;
363 tmp->next = NULL;
364 }
365 return 0;
366}
367EXPORT_SYMBOL_GPL(xt_compat_add_offset);
368
369void xt_compat_flush_offsets(int af)
370{
371 struct compat_delta *tmp, *next;
372
373 if (xt[af].compat_offsets) {
374 for (tmp = xt[af].compat_offsets; tmp; tmp = next) {
375 next = tmp->next;
376 kfree(tmp);
377 }
378 xt[af].compat_offsets = NULL;
379 }
380}
381EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
382
383short xt_compat_calc_jump(int af, unsigned int offset)
384{
385 struct compat_delta *tmp;
386 short delta;
387
388 for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next)
389 if (tmp->offset < offset)
390 delta += tmp->delta;
391 return delta;
392}
393EXPORT_SYMBOL_GPL(xt_compat_calc_jump);
394
338int xt_compat_match_offset(struct xt_match *match) 395int xt_compat_match_offset(struct xt_match *match)
339{ 396{
340 u_int16_t csize = match->compatsize ? : match->matchsize; 397 u_int16_t csize = match->compatsize ? : match->matchsize;
@@ -342,8 +399,8 @@ int xt_compat_match_offset(struct xt_match *match)
342} 399}
343EXPORT_SYMBOL_GPL(xt_compat_match_offset); 400EXPORT_SYMBOL_GPL(xt_compat_match_offset);
344 401
345void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, 402int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
346 int *size) 403 int *size)
347{ 404{
348 struct xt_match *match = m->u.kernel.match; 405 struct xt_match *match = m->u.kernel.match;
349 struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m; 406 struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m;
@@ -365,6 +422,7 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
365 422
366 *size += off; 423 *size += off;
367 *dstptr += msize; 424 *dstptr += msize;
425 return 0;
368} 426}
369EXPORT_SYMBOL_GPL(xt_compat_match_from_user); 427EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
370 428
@@ -499,7 +557,7 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
499 if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > num_physpages) 557 if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > num_physpages)
500 return NULL; 558 return NULL;
501 559
502 newinfo = kzalloc(sizeof(struct xt_table_info), GFP_KERNEL); 560 newinfo = kzalloc(XT_TABLE_INFO_SZ, GFP_KERNEL);
503 if (!newinfo) 561 if (!newinfo)
504 return NULL; 562 return NULL;
505 563
@@ -872,6 +930,7 @@ static int __init xt_init(void)
872 mutex_init(&xt[i].mutex); 930 mutex_init(&xt[i].mutex);
873#ifdef CONFIG_COMPAT 931#ifdef CONFIG_COMPAT
874 mutex_init(&xt[i].compat_mutex); 932 mutex_init(&xt[i].compat_mutex);
933 xt[i].compat_offsets = NULL;
875#endif 934#endif
876 INIT_LIST_HEAD(&xt[i].target); 935 INIT_LIST_HEAD(&xt[i].target);
877 INIT_LIST_HEAD(&xt[i].match); 936 INIT_LIST_HEAD(&xt[i].match);
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index 77eeae658d42..77a52bf83225 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -22,17 +22,14 @@
22 22
23MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 23MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
24MODULE_LICENSE("GPL"); 24MODULE_LICENSE("GPL");
25MODULE_DESCRIPTION("iptables qdisc classification target module"); 25MODULE_DESCRIPTION("Xtables: Qdisc classification");
26MODULE_ALIAS("ipt_CLASSIFY"); 26MODULE_ALIAS("ipt_CLASSIFY");
27MODULE_ALIAS("ip6t_CLASSIFY"); 27MODULE_ALIAS("ip6t_CLASSIFY");
28 28
29static unsigned int 29static unsigned int
30target(struct sk_buff *skb, 30classify_tg(struct sk_buff *skb, const struct net_device *in,
31 const struct net_device *in, 31 const struct net_device *out, unsigned int hooknum,
32 const struct net_device *out, 32 const struct xt_target *target, const void *targinfo)
33 unsigned int hooknum,
34 const struct xt_target *target,
35 const void *targinfo)
36{ 33{
37 const struct xt_classify_target_info *clinfo = targinfo; 34 const struct xt_classify_target_info *clinfo = targinfo;
38 35
@@ -40,42 +37,41 @@ target(struct sk_buff *skb,
40 return XT_CONTINUE; 37 return XT_CONTINUE;
41} 38}
42 39
43static struct xt_target xt_classify_target[] __read_mostly = { 40static struct xt_target classify_tg_reg[] __read_mostly = {
44 { 41 {
45 .family = AF_INET, 42 .family = AF_INET,
46 .name = "CLASSIFY", 43 .name = "CLASSIFY",
47 .target = target, 44 .target = classify_tg,
48 .targetsize = sizeof(struct xt_classify_target_info), 45 .targetsize = sizeof(struct xt_classify_target_info),
49 .table = "mangle", 46 .table = "mangle",
50 .hooks = (1 << NF_IP_LOCAL_OUT) | 47 .hooks = (1 << NF_INET_LOCAL_OUT) |
51 (1 << NF_IP_FORWARD) | 48 (1 << NF_INET_FORWARD) |
52 (1 << NF_IP_POST_ROUTING), 49 (1 << NF_INET_POST_ROUTING),
53 .me = THIS_MODULE, 50 .me = THIS_MODULE,
54 }, 51 },
55 { 52 {
56 .name = "CLASSIFY", 53 .name = "CLASSIFY",
57 .family = AF_INET6, 54 .family = AF_INET6,
58 .target = target, 55 .target = classify_tg,
59 .targetsize = sizeof(struct xt_classify_target_info), 56 .targetsize = sizeof(struct xt_classify_target_info),
60 .table = "mangle", 57 .table = "mangle",
61 .hooks = (1 << NF_IP6_LOCAL_OUT) | 58 .hooks = (1 << NF_INET_LOCAL_OUT) |
62 (1 << NF_IP6_FORWARD) | 59 (1 << NF_INET_FORWARD) |
63 (1 << NF_IP6_POST_ROUTING), 60 (1 << NF_INET_POST_ROUTING),
64 .me = THIS_MODULE, 61 .me = THIS_MODULE,
65 }, 62 },
66}; 63};
67 64
68static int __init xt_classify_init(void) 65static int __init classify_tg_init(void)
69{ 66{
70 return xt_register_targets(xt_classify_target, 67 return xt_register_targets(classify_tg_reg,
71 ARRAY_SIZE(xt_classify_target)); 68 ARRAY_SIZE(classify_tg_reg));
72} 69}
73 70
74static void __exit xt_classify_fini(void) 71static void __exit classify_tg_exit(void)
75{ 72{
76 xt_unregister_targets(xt_classify_target, 73 xt_unregister_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
77 ARRAY_SIZE(xt_classify_target));
78} 74}
79 75
80module_init(xt_classify_init); 76module_init(classify_tg_init);
81module_exit(xt_classify_fini); 77module_exit(classify_tg_exit);
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 0621ca7de3b0..5fecfb4794b1 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -1,8 +1,10 @@
1/* This kernel module is used to modify the connection mark values, or 1/*
2 * to optionally restore the skb nfmark from the connection mark 2 * xt_CONNMARK - Netfilter module to modify the connection mark values
3 * 3 *
4 * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com> 4 * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
5 * by Henrik Nordstrom <hno@marasystems.com> 5 * by Henrik Nordstrom <hno@marasystems.com>
6 * Copyright © CC Computer Consultants GmbH, 2007 - 2008
7 * Jan Engelhardt <jengelh@computergmbh.de>
6 * 8 *
7 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by 10 * it under the terms of the GNU General Public License as published by
@@ -24,7 +26,7 @@
24#include <net/checksum.h> 26#include <net/checksum.h>
25 27
26MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>"); 28MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>");
27MODULE_DESCRIPTION("IP tables CONNMARK matching module"); 29MODULE_DESCRIPTION("Xtables: connection mark modification");
28MODULE_LICENSE("GPL"); 30MODULE_LICENSE("GPL");
29MODULE_ALIAS("ipt_CONNMARK"); 31MODULE_ALIAS("ipt_CONNMARK");
30MODULE_ALIAS("ip6t_CONNMARK"); 32MODULE_ALIAS("ip6t_CONNMARK");
@@ -34,12 +36,9 @@ MODULE_ALIAS("ip6t_CONNMARK");
34#include <net/netfilter/nf_conntrack_ecache.h> 36#include <net/netfilter/nf_conntrack_ecache.h>
35 37
36static unsigned int 38static unsigned int
37target(struct sk_buff *skb, 39connmark_tg_v0(struct sk_buff *skb, const struct net_device *in,
38 const struct net_device *in, 40 const struct net_device *out, unsigned int hooknum,
39 const struct net_device *out, 41 const struct xt_target *target, const void *targinfo)
40 unsigned int hooknum,
41 const struct xt_target *target,
42 const void *targinfo)
43{ 42{
44 const struct xt_connmark_target_info *markinfo = targinfo; 43 const struct xt_connmark_target_info *markinfo = targinfo;
45 struct nf_conn *ct; 44 struct nf_conn *ct;
@@ -77,12 +76,50 @@ target(struct sk_buff *skb,
77 return XT_CONTINUE; 76 return XT_CONTINUE;
78} 77}
79 78
79static unsigned int
80connmark_tg(struct sk_buff *skb, const struct net_device *in,
81 const struct net_device *out, unsigned int hooknum,
82 const struct xt_target *target, const void *targinfo)
83{
84 const struct xt_connmark_tginfo1 *info = targinfo;
85 enum ip_conntrack_info ctinfo;
86 struct nf_conn *ct;
87 u_int32_t newmark;
88
89 ct = nf_ct_get(skb, &ctinfo);
90 if (ct == NULL)
91 return XT_CONTINUE;
92
93 switch (info->mode) {
94 case XT_CONNMARK_SET:
95 newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
96 if (ct->mark != newmark) {
97 ct->mark = newmark;
98 nf_conntrack_event_cache(IPCT_MARK, skb);
99 }
100 break;
101 case XT_CONNMARK_SAVE:
102 newmark = (ct->mark & ~info->ctmask) ^
103 (skb->mark & info->nfmask);
104 if (ct->mark != newmark) {
105 ct->mark = newmark;
106 nf_conntrack_event_cache(IPCT_MARK, skb);
107 }
108 break;
109 case XT_CONNMARK_RESTORE:
110 newmark = (skb->mark & ~info->nfmask) ^
111 (ct->mark & info->ctmask);
112 skb->mark = newmark;
113 break;
114 }
115
116 return XT_CONTINUE;
117}
118
80static bool 119static bool
81checkentry(const char *tablename, 120connmark_tg_check_v0(const char *tablename, const void *entry,
82 const void *entry, 121 const struct xt_target *target, void *targinfo,
83 const struct xt_target *target, 122 unsigned int hook_mask)
84 void *targinfo,
85 unsigned int hook_mask)
86{ 123{
87 const struct xt_connmark_target_info *matchinfo = targinfo; 124 const struct xt_connmark_target_info *matchinfo = targinfo;
88 125
@@ -100,14 +137,27 @@ checkentry(const char *tablename,
100 } 137 }
101 if (nf_ct_l3proto_try_module_get(target->family) < 0) { 138 if (nf_ct_l3proto_try_module_get(target->family) < 0) {
102 printk(KERN_WARNING "can't load conntrack support for " 139 printk(KERN_WARNING "can't load conntrack support for "
103 "proto=%d\n", target->family); 140 "proto=%u\n", target->family);
141 return false;
142 }
143 return true;
144}
145
146static bool
147connmark_tg_check(const char *tablename, const void *entry,
148 const struct xt_target *target, void *targinfo,
149 unsigned int hook_mask)
150{
151 if (nf_ct_l3proto_try_module_get(target->family) < 0) {
152 printk(KERN_WARNING "cannot load conntrack support for "
153 "proto=%u\n", target->family);
104 return false; 154 return false;
105 } 155 }
106 return true; 156 return true;
107} 157}
108 158
109static void 159static void
110destroy(const struct xt_target *target, void *targinfo) 160connmark_tg_destroy(const struct xt_target *target, void *targinfo)
111{ 161{
112 nf_ct_l3proto_module_put(target->family); 162 nf_ct_l3proto_module_put(target->family);
113} 163}
@@ -120,7 +170,7 @@ struct compat_xt_connmark_target_info {
120 u_int16_t __pad2; 170 u_int16_t __pad2;
121}; 171};
122 172
123static void compat_from_user(void *dst, void *src) 173static void connmark_tg_compat_from_user_v0(void *dst, void *src)
124{ 174{
125 const struct compat_xt_connmark_target_info *cm = src; 175 const struct compat_xt_connmark_target_info *cm = src;
126 struct xt_connmark_target_info m = { 176 struct xt_connmark_target_info m = {
@@ -131,7 +181,7 @@ static void compat_from_user(void *dst, void *src)
131 memcpy(dst, &m, sizeof(m)); 181 memcpy(dst, &m, sizeof(m));
132} 182}
133 183
134static int compat_to_user(void __user *dst, void *src) 184static int connmark_tg_compat_to_user_v0(void __user *dst, void *src)
135{ 185{
136 const struct xt_connmark_target_info *m = src; 186 const struct xt_connmark_target_info *m = src;
137 struct compat_xt_connmark_target_info cm = { 187 struct compat_xt_connmark_target_info cm = {
@@ -143,43 +193,69 @@ static int compat_to_user(void __user *dst, void *src)
143} 193}
144#endif /* CONFIG_COMPAT */ 194#endif /* CONFIG_COMPAT */
145 195
146static struct xt_target xt_connmark_target[] __read_mostly = { 196static struct xt_target connmark_tg_reg[] __read_mostly = {
147 { 197 {
148 .name = "CONNMARK", 198 .name = "CONNMARK",
199 .revision = 0,
149 .family = AF_INET, 200 .family = AF_INET,
150 .checkentry = checkentry, 201 .checkentry = connmark_tg_check_v0,
151 .destroy = destroy, 202 .destroy = connmark_tg_destroy,
152 .target = target, 203 .target = connmark_tg_v0,
153 .targetsize = sizeof(struct xt_connmark_target_info), 204 .targetsize = sizeof(struct xt_connmark_target_info),
154#ifdef CONFIG_COMPAT 205#ifdef CONFIG_COMPAT
155 .compatsize = sizeof(struct compat_xt_connmark_target_info), 206 .compatsize = sizeof(struct compat_xt_connmark_target_info),
156 .compat_from_user = compat_from_user, 207 .compat_from_user = connmark_tg_compat_from_user_v0,
157 .compat_to_user = compat_to_user, 208 .compat_to_user = connmark_tg_compat_to_user_v0,
158#endif 209#endif
159 .me = THIS_MODULE 210 .me = THIS_MODULE
160 }, 211 },
161 { 212 {
162 .name = "CONNMARK", 213 .name = "CONNMARK",
214 .revision = 0,
163 .family = AF_INET6, 215 .family = AF_INET6,
164 .checkentry = checkentry, 216 .checkentry = connmark_tg_check_v0,
165 .destroy = destroy, 217 .destroy = connmark_tg_destroy,
166 .target = target, 218 .target = connmark_tg_v0,
167 .targetsize = sizeof(struct xt_connmark_target_info), 219 .targetsize = sizeof(struct xt_connmark_target_info),
220#ifdef CONFIG_COMPAT
221 .compatsize = sizeof(struct compat_xt_connmark_target_info),
222 .compat_from_user = connmark_tg_compat_from_user_v0,
223 .compat_to_user = connmark_tg_compat_to_user_v0,
224#endif
168 .me = THIS_MODULE 225 .me = THIS_MODULE
169 }, 226 },
227 {
228 .name = "CONNMARK",
229 .revision = 1,
230 .family = AF_INET,
231 .checkentry = connmark_tg_check,
232 .target = connmark_tg,
233 .targetsize = sizeof(struct xt_connmark_tginfo1),
234 .destroy = connmark_tg_destroy,
235 .me = THIS_MODULE,
236 },
237 {
238 .name = "CONNMARK",
239 .revision = 1,
240 .family = AF_INET6,
241 .checkentry = connmark_tg_check,
242 .target = connmark_tg,
243 .targetsize = sizeof(struct xt_connmark_tginfo1),
244 .destroy = connmark_tg_destroy,
245 .me = THIS_MODULE,
246 },
170}; 247};
171 248
172static int __init xt_connmark_init(void) 249static int __init connmark_tg_init(void)
173{ 250{
174 return xt_register_targets(xt_connmark_target, 251 return xt_register_targets(connmark_tg_reg,
175 ARRAY_SIZE(xt_connmark_target)); 252 ARRAY_SIZE(connmark_tg_reg));
176} 253}
177 254
178static void __exit xt_connmark_fini(void) 255static void __exit connmark_tg_exit(void)
179{ 256{
180 xt_unregister_targets(xt_connmark_target, 257 xt_unregister_targets(connmark_tg_reg, ARRAY_SIZE(connmark_tg_reg));
181 ARRAY_SIZE(xt_connmark_target));
182} 258}
183 259
184module_init(xt_connmark_init); 260module_init(connmark_tg_init);
185module_exit(xt_connmark_fini); 261module_exit(connmark_tg_exit);
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index d8feba9bdb48..1faa9136195d 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -20,12 +20,13 @@
20#include <linux/netfilter/x_tables.h> 20#include <linux/netfilter/x_tables.h>
21#include <linux/netfilter/xt_CONNSECMARK.h> 21#include <linux/netfilter/xt_CONNSECMARK.h>
22#include <net/netfilter/nf_conntrack.h> 22#include <net/netfilter/nf_conntrack.h>
23#include <net/netfilter/nf_conntrack_ecache.h>
23 24
24#define PFX "CONNSECMARK: " 25#define PFX "CONNSECMARK: "
25 26
26MODULE_LICENSE("GPL"); 27MODULE_LICENSE("GPL");
27MODULE_AUTHOR("James Morris <jmorris@redhat.com>"); 28MODULE_AUTHOR("James Morris <jmorris@redhat.com>");
28MODULE_DESCRIPTION("ip[6]tables CONNSECMARK module"); 29MODULE_DESCRIPTION("Xtables: target for copying between connection and security mark");
29MODULE_ALIAS("ipt_CONNSECMARK"); 30MODULE_ALIAS("ipt_CONNSECMARK");
30MODULE_ALIAS("ip6t_CONNSECMARK"); 31MODULE_ALIAS("ip6t_CONNSECMARK");
31 32
@@ -40,8 +41,10 @@ static void secmark_save(const struct sk_buff *skb)
40 enum ip_conntrack_info ctinfo; 41 enum ip_conntrack_info ctinfo;
41 42
42 ct = nf_ct_get(skb, &ctinfo); 43 ct = nf_ct_get(skb, &ctinfo);
43 if (ct && !ct->secmark) 44 if (ct && !ct->secmark) {
44 ct->secmark = skb->secmark; 45 ct->secmark = skb->secmark;
46 nf_conntrack_event_cache(IPCT_SECMARK, skb);
47 }
45 } 48 }
46} 49}
47 50
@@ -61,10 +64,10 @@ static void secmark_restore(struct sk_buff *skb)
61 } 64 }
62} 65}
63 66
64static unsigned int target(struct sk_buff *skb, const struct net_device *in, 67static unsigned int
65 const struct net_device *out, unsigned int hooknum, 68connsecmark_tg(struct sk_buff *skb, const struct net_device *in,
66 const struct xt_target *target, 69 const struct net_device *out, unsigned int hooknum,
67 const void *targinfo) 70 const struct xt_target *target, const void *targinfo)
68{ 71{
69 const struct xt_connsecmark_target_info *info = targinfo; 72 const struct xt_connsecmark_target_info *info = targinfo;
70 73
@@ -84,9 +87,10 @@ static unsigned int target(struct sk_buff *skb, const struct net_device *in,
84 return XT_CONTINUE; 87 return XT_CONTINUE;
85} 88}
86 89
87static bool checkentry(const char *tablename, const void *entry, 90static bool
88 const struct xt_target *target, void *targinfo, 91connsecmark_tg_check(const char *tablename, const void *entry,
89 unsigned int hook_mask) 92 const struct xt_target *target, void *targinfo,
93 unsigned int hook_mask)
90{ 94{
91 const struct xt_connsecmark_target_info *info = targinfo; 95 const struct xt_connsecmark_target_info *info = targinfo;
92 96
@@ -102,25 +106,25 @@ static bool checkentry(const char *tablename, const void *entry,
102 106
103 if (nf_ct_l3proto_try_module_get(target->family) < 0) { 107 if (nf_ct_l3proto_try_module_get(target->family) < 0) {
104 printk(KERN_WARNING "can't load conntrack support for " 108 printk(KERN_WARNING "can't load conntrack support for "
105 "proto=%d\n", target->family); 109 "proto=%u\n", target->family);
106 return false; 110 return false;
107 } 111 }
108 return true; 112 return true;
109} 113}
110 114
111static void 115static void
112destroy(const struct xt_target *target, void *targinfo) 116connsecmark_tg_destroy(const struct xt_target *target, void *targinfo)
113{ 117{
114 nf_ct_l3proto_module_put(target->family); 118 nf_ct_l3proto_module_put(target->family);
115} 119}
116 120
117static struct xt_target xt_connsecmark_target[] __read_mostly = { 121static struct xt_target connsecmark_tg_reg[] __read_mostly = {
118 { 122 {
119 .name = "CONNSECMARK", 123 .name = "CONNSECMARK",
120 .family = AF_INET, 124 .family = AF_INET,
121 .checkentry = checkentry, 125 .checkentry = connsecmark_tg_check,
122 .destroy = destroy, 126 .destroy = connsecmark_tg_destroy,
123 .target = target, 127 .target = connsecmark_tg,
124 .targetsize = sizeof(struct xt_connsecmark_target_info), 128 .targetsize = sizeof(struct xt_connsecmark_target_info),
125 .table = "mangle", 129 .table = "mangle",
126 .me = THIS_MODULE, 130 .me = THIS_MODULE,
@@ -128,26 +132,26 @@ static struct xt_target xt_connsecmark_target[] __read_mostly = {
128 { 132 {
129 .name = "CONNSECMARK", 133 .name = "CONNSECMARK",
130 .family = AF_INET6, 134 .family = AF_INET6,
131 .checkentry = checkentry, 135 .checkentry = connsecmark_tg_check,
132 .destroy = destroy, 136 .destroy = connsecmark_tg_destroy,
133 .target = target, 137 .target = connsecmark_tg,
134 .targetsize = sizeof(struct xt_connsecmark_target_info), 138 .targetsize = sizeof(struct xt_connsecmark_target_info),
135 .table = "mangle", 139 .table = "mangle",
136 .me = THIS_MODULE, 140 .me = THIS_MODULE,
137 }, 141 },
138}; 142};
139 143
140static int __init xt_connsecmark_init(void) 144static int __init connsecmark_tg_init(void)
141{ 145{
142 return xt_register_targets(xt_connsecmark_target, 146 return xt_register_targets(connsecmark_tg_reg,
143 ARRAY_SIZE(xt_connsecmark_target)); 147 ARRAY_SIZE(connsecmark_tg_reg));
144} 148}
145 149
146static void __exit xt_connsecmark_fini(void) 150static void __exit connsecmark_tg_exit(void)
147{ 151{
148 xt_unregister_targets(xt_connsecmark_target, 152 xt_unregister_targets(connsecmark_tg_reg,
149 ARRAY_SIZE(xt_connsecmark_target)); 153 ARRAY_SIZE(connsecmark_tg_reg));
150} 154}
151 155
152module_init(xt_connsecmark_init); 156module_init(connsecmark_tg_init);
153module_exit(xt_connsecmark_fini); 157module_exit(connsecmark_tg_exit);
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 6322a933ab71..97efd74c04fe 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -18,19 +18,20 @@
18 18
19#include <linux/netfilter/x_tables.h> 19#include <linux/netfilter/x_tables.h>
20#include <linux/netfilter/xt_DSCP.h> 20#include <linux/netfilter/xt_DSCP.h>
21#include <linux/netfilter_ipv4/ipt_TOS.h>
21 22
22MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 23MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
23MODULE_DESCRIPTION("x_tables DSCP modification module"); 24MODULE_DESCRIPTION("Xtables: DSCP/TOS field modification");
24MODULE_LICENSE("GPL"); 25MODULE_LICENSE("GPL");
25MODULE_ALIAS("ipt_DSCP"); 26MODULE_ALIAS("ipt_DSCP");
26MODULE_ALIAS("ip6t_DSCP"); 27MODULE_ALIAS("ip6t_DSCP");
28MODULE_ALIAS("ipt_TOS");
29MODULE_ALIAS("ip6t_TOS");
27 30
28static unsigned int target(struct sk_buff *skb, 31static unsigned int
29 const struct net_device *in, 32dscp_tg(struct sk_buff *skb, const struct net_device *in,
30 const struct net_device *out, 33 const struct net_device *out, unsigned int hooknum,
31 unsigned int hooknum, 34 const struct xt_target *target, const void *targinfo)
32 const struct xt_target *target,
33 const void *targinfo)
34{ 35{
35 const struct xt_DSCP_info *dinfo = targinfo; 36 const struct xt_DSCP_info *dinfo = targinfo;
36 u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; 37 u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -46,12 +47,10 @@ static unsigned int target(struct sk_buff *skb,
46 return XT_CONTINUE; 47 return XT_CONTINUE;
47} 48}
48 49
49static unsigned int target6(struct sk_buff *skb, 50static unsigned int
50 const struct net_device *in, 51dscp_tg6(struct sk_buff *skb, const struct net_device *in,
51 const struct net_device *out, 52 const struct net_device *out, unsigned int hooknum,
52 unsigned int hooknum, 53 const struct xt_target *target, const void *targinfo)
53 const struct xt_target *target,
54 const void *targinfo)
55{ 54{
56 const struct xt_DSCP_info *dinfo = targinfo; 55 const struct xt_DSCP_info *dinfo = targinfo;
57 u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; 56 u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -66,11 +65,10 @@ static unsigned int target6(struct sk_buff *skb,
66 return XT_CONTINUE; 65 return XT_CONTINUE;
67} 66}
68 67
69static bool checkentry(const char *tablename, 68static bool
70 const void *e_void, 69dscp_tg_check(const char *tablename, const void *e_void,
71 const struct xt_target *target, 70 const struct xt_target *target, void *targinfo,
72 void *targinfo, 71 unsigned int hook_mask)
73 unsigned int hook_mask)
74{ 72{
75 const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp; 73 const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp;
76 74
@@ -81,12 +79,95 @@ static bool checkentry(const char *tablename,
81 return true; 79 return true;
82} 80}
83 81
84static struct xt_target xt_dscp_target[] __read_mostly = { 82static unsigned int
83tos_tg_v0(struct sk_buff *skb, const struct net_device *in,
84 const struct net_device *out, unsigned int hooknum,
85 const struct xt_target *target, const void *targinfo)
86{
87 const struct ipt_tos_target_info *info = targinfo;
88 struct iphdr *iph = ip_hdr(skb);
89 u_int8_t oldtos;
90
91 if ((iph->tos & IPTOS_TOS_MASK) != info->tos) {
92 if (!skb_make_writable(skb, sizeof(struct iphdr)))
93 return NF_DROP;
94
95 iph = ip_hdr(skb);
96 oldtos = iph->tos;
97 iph->tos = (iph->tos & IPTOS_PREC_MASK) | info->tos;
98 csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
99 }
100
101 return XT_CONTINUE;
102}
103
104static bool
105tos_tg_check_v0(const char *tablename, const void *e_void,
106 const struct xt_target *target, void *targinfo,
107 unsigned int hook_mask)
108{
109 const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos;
110
111 if (tos != IPTOS_LOWDELAY && tos != IPTOS_THROUGHPUT &&
112 tos != IPTOS_RELIABILITY && tos != IPTOS_MINCOST &&
113 tos != IPTOS_NORMALSVC) {
114 printk(KERN_WARNING "TOS: bad tos value %#x\n", tos);
115 return false;
116 }
117
118 return true;
119}
120
121static unsigned int
122tos_tg(struct sk_buff *skb, const struct net_device *in,
123 const struct net_device *out, unsigned int hooknum,
124 const struct xt_target *target, const void *targinfo)
125{
126 const struct xt_tos_target_info *info = targinfo;
127 struct iphdr *iph = ip_hdr(skb);
128 u_int8_t orig, nv;
129
130 orig = ipv4_get_dsfield(iph);
131 nv = (orig & ~info->tos_mask) ^ info->tos_value;
132
133 if (orig != nv) {
134 if (!skb_make_writable(skb, sizeof(struct iphdr)))
135 return NF_DROP;
136 iph = ip_hdr(skb);
137 ipv4_change_dsfield(iph, 0, nv);
138 }
139
140 return XT_CONTINUE;
141}
142
143static unsigned int
144tos_tg6(struct sk_buff *skb, const struct net_device *in,
145 const struct net_device *out, unsigned int hooknum,
146 const struct xt_target *target, const void *targinfo)
147{
148 const struct xt_tos_target_info *info = targinfo;
149 struct ipv6hdr *iph = ipv6_hdr(skb);
150 u_int8_t orig, nv;
151
152 orig = ipv6_get_dsfield(iph);
153 nv = (orig & info->tos_mask) ^ info->tos_value;
154
155 if (orig != nv) {
156 if (!skb_make_writable(skb, sizeof(struct iphdr)))
157 return NF_DROP;
158 iph = ipv6_hdr(skb);
159 ipv6_change_dsfield(iph, 0, nv);
160 }
161
162 return XT_CONTINUE;
163}
164
165static struct xt_target dscp_tg_reg[] __read_mostly = {
85 { 166 {
86 .name = "DSCP", 167 .name = "DSCP",
87 .family = AF_INET, 168 .family = AF_INET,
88 .checkentry = checkentry, 169 .checkentry = dscp_tg_check,
89 .target = target, 170 .target = dscp_tg,
90 .targetsize = sizeof(struct xt_DSCP_info), 171 .targetsize = sizeof(struct xt_DSCP_info),
91 .table = "mangle", 172 .table = "mangle",
92 .me = THIS_MODULE, 173 .me = THIS_MODULE,
@@ -94,23 +175,51 @@ static struct xt_target xt_dscp_target[] __read_mostly = {
94 { 175 {
95 .name = "DSCP", 176 .name = "DSCP",
96 .family = AF_INET6, 177 .family = AF_INET6,
97 .checkentry = checkentry, 178 .checkentry = dscp_tg_check,
98 .target = target6, 179 .target = dscp_tg6,
99 .targetsize = sizeof(struct xt_DSCP_info), 180 .targetsize = sizeof(struct xt_DSCP_info),
100 .table = "mangle", 181 .table = "mangle",
101 .me = THIS_MODULE, 182 .me = THIS_MODULE,
102 }, 183 },
184 {
185 .name = "TOS",
186 .revision = 0,
187 .family = AF_INET,
188 .table = "mangle",
189 .target = tos_tg_v0,
190 .targetsize = sizeof(struct ipt_tos_target_info),
191 .checkentry = tos_tg_check_v0,
192 .me = THIS_MODULE,
193 },
194 {
195 .name = "TOS",
196 .revision = 1,
197 .family = AF_INET,
198 .table = "mangle",
199 .target = tos_tg,
200 .targetsize = sizeof(struct xt_tos_target_info),
201 .me = THIS_MODULE,
202 },
203 {
204 .name = "TOS",
205 .revision = 1,
206 .family = AF_INET6,
207 .table = "mangle",
208 .target = tos_tg6,
209 .targetsize = sizeof(struct xt_tos_target_info),
210 .me = THIS_MODULE,
211 },
103}; 212};
104 213
105static int __init xt_dscp_target_init(void) 214static int __init dscp_tg_init(void)
106{ 215{
107 return xt_register_targets(xt_dscp_target, ARRAY_SIZE(xt_dscp_target)); 216 return xt_register_targets(dscp_tg_reg, ARRAY_SIZE(dscp_tg_reg));
108} 217}
109 218
110static void __exit xt_dscp_target_fini(void) 219static void __exit dscp_tg_exit(void)
111{ 220{
112 xt_unregister_targets(xt_dscp_target, ARRAY_SIZE(xt_dscp_target)); 221 xt_unregister_targets(dscp_tg_reg, ARRAY_SIZE(dscp_tg_reg));
113} 222}
114 223
115module_init(xt_dscp_target_init); 224module_init(dscp_tg_init);
116module_exit(xt_dscp_target_fini); 225module_exit(dscp_tg_exit);
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index bc6503d77d75..f9ce20b58981 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -1,10 +1,13 @@
1/* This is a module which is used for setting the NFMARK field of an skb. */ 1/*
2 2 * xt_MARK - Netfilter module to modify the NFMARK field of an skb
3/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca> 3 *
4 * (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
5 * Copyright © CC Computer Consultants GmbH, 2007 - 2008
6 * Jan Engelhardt <jengelh@computergmbh.de>
4 * 7 *
5 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation. 10 * published by the Free Software Foundation.
8 */ 11 */
9 12
10#include <linux/module.h> 13#include <linux/module.h>
@@ -17,17 +20,14 @@
17 20
18MODULE_LICENSE("GPL"); 21MODULE_LICENSE("GPL");
19MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); 22MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
20MODULE_DESCRIPTION("ip[6]tables MARK modification module"); 23MODULE_DESCRIPTION("Xtables: packet mark modification");
21MODULE_ALIAS("ipt_MARK"); 24MODULE_ALIAS("ipt_MARK");
22MODULE_ALIAS("ip6t_MARK"); 25MODULE_ALIAS("ip6t_MARK");
23 26
24static unsigned int 27static unsigned int
25target_v0(struct sk_buff *skb, 28mark_tg_v0(struct sk_buff *skb, const struct net_device *in,
26 const struct net_device *in, 29 const struct net_device *out, unsigned int hooknum,
27 const struct net_device *out, 30 const struct xt_target *target, const void *targinfo)
28 unsigned int hooknum,
29 const struct xt_target *target,
30 const void *targinfo)
31{ 31{
32 const struct xt_mark_target_info *markinfo = targinfo; 32 const struct xt_mark_target_info *markinfo = targinfo;
33 33
@@ -36,12 +36,9 @@ target_v0(struct sk_buff *skb,
36} 36}
37 37
38static unsigned int 38static unsigned int
39target_v1(struct sk_buff *skb, 39mark_tg_v1(struct sk_buff *skb, const struct net_device *in,
40 const struct net_device *in, 40 const struct net_device *out, unsigned int hooknum,
41 const struct net_device *out, 41 const struct xt_target *target, const void *targinfo)
42 unsigned int hooknum,
43 const struct xt_target *target,
44 const void *targinfo)
45{ 42{
46 const struct xt_mark_target_info_v1 *markinfo = targinfo; 43 const struct xt_mark_target_info_v1 *markinfo = targinfo;
47 int mark = 0; 44 int mark = 0;
@@ -64,13 +61,21 @@ target_v1(struct sk_buff *skb,
64 return XT_CONTINUE; 61 return XT_CONTINUE;
65} 62}
66 63
64static unsigned int
65mark_tg(struct sk_buff *skb, const struct net_device *in,
66 const struct net_device *out, unsigned int hooknum,
67 const struct xt_target *target, const void *targinfo)
68{
69 const struct xt_mark_tginfo2 *info = targinfo;
70
71 skb->mark = (skb->mark & ~info->mask) ^ info->mark;
72 return XT_CONTINUE;
73}
67 74
68static bool 75static bool
69checkentry_v0(const char *tablename, 76mark_tg_check_v0(const char *tablename, const void *entry,
70 const void *entry, 77 const struct xt_target *target, void *targinfo,
71 const struct xt_target *target, 78 unsigned int hook_mask)
72 void *targinfo,
73 unsigned int hook_mask)
74{ 79{
75 const struct xt_mark_target_info *markinfo = targinfo; 80 const struct xt_mark_target_info *markinfo = targinfo;
76 81
@@ -82,11 +87,9 @@ checkentry_v0(const char *tablename,
82} 87}
83 88
84static bool 89static bool
85checkentry_v1(const char *tablename, 90mark_tg_check_v1(const char *tablename, const void *entry,
86 const void *entry, 91 const struct xt_target *target, void *targinfo,
87 const struct xt_target *target, 92 unsigned int hook_mask)
88 void *targinfo,
89 unsigned int hook_mask)
90{ 93{
91 const struct xt_mark_target_info_v1 *markinfo = targinfo; 94 const struct xt_mark_target_info_v1 *markinfo = targinfo;
92 95
@@ -105,6 +108,28 @@ checkentry_v1(const char *tablename,
105} 108}
106 109
107#ifdef CONFIG_COMPAT 110#ifdef CONFIG_COMPAT
111struct compat_xt_mark_target_info {
112 compat_ulong_t mark;
113};
114
115static void mark_tg_compat_from_user_v0(void *dst, void *src)
116{
117 const struct compat_xt_mark_target_info *cm = src;
118 struct xt_mark_target_info m = {
119 .mark = cm->mark,
120 };
121 memcpy(dst, &m, sizeof(m));
122}
123
124static int mark_tg_compat_to_user_v0(void __user *dst, void *src)
125{
126 const struct xt_mark_target_info *m = src;
127 struct compat_xt_mark_target_info cm = {
128 .mark = m->mark,
129 };
130 return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
131}
132
108struct compat_xt_mark_target_info_v1 { 133struct compat_xt_mark_target_info_v1 {
109 compat_ulong_t mark; 134 compat_ulong_t mark;
110 u_int8_t mode; 135 u_int8_t mode;
@@ -112,7 +137,7 @@ struct compat_xt_mark_target_info_v1 {
112 u_int16_t __pad2; 137 u_int16_t __pad2;
113}; 138};
114 139
115static void compat_from_user_v1(void *dst, void *src) 140static void mark_tg_compat_from_user_v1(void *dst, void *src)
116{ 141{
117 const struct compat_xt_mark_target_info_v1 *cm = src; 142 const struct compat_xt_mark_target_info_v1 *cm = src;
118 struct xt_mark_target_info_v1 m = { 143 struct xt_mark_target_info_v1 m = {
@@ -122,7 +147,7 @@ static void compat_from_user_v1(void *dst, void *src)
122 memcpy(dst, &m, sizeof(m)); 147 memcpy(dst, &m, sizeof(m));
123} 148}
124 149
125static int compat_to_user_v1(void __user *dst, void *src) 150static int mark_tg_compat_to_user_v1(void __user *dst, void *src)
126{ 151{
127 const struct xt_mark_target_info_v1 *m = src; 152 const struct xt_mark_target_info_v1 *m = src;
128 struct compat_xt_mark_target_info_v1 cm = { 153 struct compat_xt_mark_target_info_v1 cm = {
@@ -133,14 +158,19 @@ static int compat_to_user_v1(void __user *dst, void *src)
133} 158}
134#endif /* CONFIG_COMPAT */ 159#endif /* CONFIG_COMPAT */
135 160
136static struct xt_target xt_mark_target[] __read_mostly = { 161static struct xt_target mark_tg_reg[] __read_mostly = {
137 { 162 {
138 .name = "MARK", 163 .name = "MARK",
139 .family = AF_INET, 164 .family = AF_INET,
140 .revision = 0, 165 .revision = 0,
141 .checkentry = checkentry_v0, 166 .checkentry = mark_tg_check_v0,
142 .target = target_v0, 167 .target = mark_tg_v0,
143 .targetsize = sizeof(struct xt_mark_target_info), 168 .targetsize = sizeof(struct xt_mark_target_info),
169#ifdef CONFIG_COMPAT
170 .compatsize = sizeof(struct compat_xt_mark_target_info),
171 .compat_from_user = mark_tg_compat_from_user_v0,
172 .compat_to_user = mark_tg_compat_to_user_v0,
173#endif
144 .table = "mangle", 174 .table = "mangle",
145 .me = THIS_MODULE, 175 .me = THIS_MODULE,
146 }, 176 },
@@ -148,13 +178,13 @@ static struct xt_target xt_mark_target[] __read_mostly = {
148 .name = "MARK", 178 .name = "MARK",
149 .family = AF_INET, 179 .family = AF_INET,
150 .revision = 1, 180 .revision = 1,
151 .checkentry = checkentry_v1, 181 .checkentry = mark_tg_check_v1,
152 .target = target_v1, 182 .target = mark_tg_v1,
153 .targetsize = sizeof(struct xt_mark_target_info_v1), 183 .targetsize = sizeof(struct xt_mark_target_info_v1),
154#ifdef CONFIG_COMPAT 184#ifdef CONFIG_COMPAT
155 .compatsize = sizeof(struct compat_xt_mark_target_info_v1), 185 .compatsize = sizeof(struct compat_xt_mark_target_info_v1),
156 .compat_from_user = compat_from_user_v1, 186 .compat_from_user = mark_tg_compat_from_user_v1,
157 .compat_to_user = compat_to_user_v1, 187 .compat_to_user = mark_tg_compat_to_user_v1,
158#endif 188#endif
159 .table = "mangle", 189 .table = "mangle",
160 .me = THIS_MODULE, 190 .me = THIS_MODULE,
@@ -163,23 +193,59 @@ static struct xt_target xt_mark_target[] __read_mostly = {
163 .name = "MARK", 193 .name = "MARK",
164 .family = AF_INET6, 194 .family = AF_INET6,
165 .revision = 0, 195 .revision = 0,
166 .checkentry = checkentry_v0, 196 .checkentry = mark_tg_check_v0,
167 .target = target_v0, 197 .target = mark_tg_v0,
168 .targetsize = sizeof(struct xt_mark_target_info), 198 .targetsize = sizeof(struct xt_mark_target_info),
199#ifdef CONFIG_COMPAT
200 .compatsize = sizeof(struct compat_xt_mark_target_info),
201 .compat_from_user = mark_tg_compat_from_user_v0,
202 .compat_to_user = mark_tg_compat_to_user_v0,
203#endif
204 .table = "mangle",
205 .me = THIS_MODULE,
206 },
207 {
208 .name = "MARK",
209 .family = AF_INET6,
210 .revision = 1,
211 .checkentry = mark_tg_check_v1,
212 .target = mark_tg_v1,
213 .targetsize = sizeof(struct xt_mark_target_info_v1),
214#ifdef CONFIG_COMPAT
215 .compatsize = sizeof(struct compat_xt_mark_target_info_v1),
216 .compat_from_user = mark_tg_compat_from_user_v1,
217 .compat_to_user = mark_tg_compat_to_user_v1,
218#endif
169 .table = "mangle", 219 .table = "mangle",
170 .me = THIS_MODULE, 220 .me = THIS_MODULE,
171 }, 221 },
222 {
223 .name = "MARK",
224 .revision = 2,
225 .family = AF_INET,
226 .target = mark_tg,
227 .targetsize = sizeof(struct xt_mark_tginfo2),
228 .me = THIS_MODULE,
229 },
230 {
231 .name = "MARK",
232 .revision = 2,
233 .family = AF_INET6,
234 .target = mark_tg,
235 .targetsize = sizeof(struct xt_mark_tginfo2),
236 .me = THIS_MODULE,
237 },
172}; 238};
173 239
174static int __init xt_mark_init(void) 240static int __init mark_tg_init(void)
175{ 241{
176 return xt_register_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target)); 242 return xt_register_targets(mark_tg_reg, ARRAY_SIZE(mark_tg_reg));
177} 243}
178 244
179static void __exit xt_mark_fini(void) 245static void __exit mark_tg_exit(void)
180{ 246{
181 xt_unregister_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target)); 247 xt_unregister_targets(mark_tg_reg, ARRAY_SIZE(mark_tg_reg));
182} 248}
183 249
184module_init(xt_mark_init); 250module_init(mark_tg_init);
185module_exit(xt_mark_fini); 251module_exit(mark_tg_exit);
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 9fb449ffbf8b..19ae8efae655 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -12,18 +12,18 @@
12 12
13#include <linux/netfilter/x_tables.h> 13#include <linux/netfilter/x_tables.h>
14#include <linux/netfilter/xt_NFLOG.h> 14#include <linux/netfilter/xt_NFLOG.h>
15#include <net/netfilter/nf_log.h>
15 16
16MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 17MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
17MODULE_DESCRIPTION("x_tables NFLOG target"); 18MODULE_DESCRIPTION("Xtables: packet logging to netlink using NFLOG");
18MODULE_LICENSE("GPL"); 19MODULE_LICENSE("GPL");
19MODULE_ALIAS("ipt_NFLOG"); 20MODULE_ALIAS("ipt_NFLOG");
20MODULE_ALIAS("ip6t_NFLOG"); 21MODULE_ALIAS("ip6t_NFLOG");
21 22
22static unsigned int 23static unsigned int
23nflog_target(struct sk_buff *skb, 24nflog_tg(struct sk_buff *skb, const struct net_device *in,
24 const struct net_device *in, const struct net_device *out, 25 const struct net_device *out, unsigned int hooknum,
25 unsigned int hooknum, const struct xt_target *target, 26 const struct xt_target *target, const void *targinfo)
26 const void *targinfo)
27{ 27{
28 const struct xt_nflog_info *info = targinfo; 28 const struct xt_nflog_info *info = targinfo;
29 struct nf_loginfo li; 29 struct nf_loginfo li;
@@ -39,9 +39,9 @@ nflog_target(struct sk_buff *skb,
39} 39}
40 40
41static bool 41static bool
42nflog_checkentry(const char *tablename, const void *entry, 42nflog_tg_check(const char *tablename, const void *entry,
43 const struct xt_target *target, void *targetinfo, 43 const struct xt_target *target, void *targetinfo,
44 unsigned int hookmask) 44 unsigned int hookmask)
45{ 45{
46 const struct xt_nflog_info *info = targetinfo; 46 const struct xt_nflog_info *info = targetinfo;
47 47
@@ -52,35 +52,34 @@ nflog_checkentry(const char *tablename, const void *entry,
52 return true; 52 return true;
53} 53}
54 54
55static struct xt_target xt_nflog_target[] __read_mostly = { 55static struct xt_target nflog_tg_reg[] __read_mostly = {
56 { 56 {
57 .name = "NFLOG", 57 .name = "NFLOG",
58 .family = AF_INET, 58 .family = AF_INET,
59 .checkentry = nflog_checkentry, 59 .checkentry = nflog_tg_check,
60 .target = nflog_target, 60 .target = nflog_tg,
61 .targetsize = sizeof(struct xt_nflog_info), 61 .targetsize = sizeof(struct xt_nflog_info),
62 .me = THIS_MODULE, 62 .me = THIS_MODULE,
63 }, 63 },
64 { 64 {
65 .name = "NFLOG", 65 .name = "NFLOG",
66 .family = AF_INET6, 66 .family = AF_INET6,
67 .checkentry = nflog_checkentry, 67 .checkentry = nflog_tg_check,
68 .target = nflog_target, 68 .target = nflog_tg,
69 .targetsize = sizeof(struct xt_nflog_info), 69 .targetsize = sizeof(struct xt_nflog_info),
70 .me = THIS_MODULE, 70 .me = THIS_MODULE,
71 }, 71 },
72}; 72};
73 73
74static int __init xt_nflog_init(void) 74static int __init nflog_tg_init(void)
75{ 75{
76 return xt_register_targets(xt_nflog_target, 76 return xt_register_targets(nflog_tg_reg, ARRAY_SIZE(nflog_tg_reg));
77 ARRAY_SIZE(xt_nflog_target));
78} 77}
79 78
80static void __exit xt_nflog_fini(void) 79static void __exit nflog_tg_exit(void)
81{ 80{
82 xt_unregister_targets(xt_nflog_target, ARRAY_SIZE(xt_nflog_target)); 81 xt_unregister_targets(nflog_tg_reg, ARRAY_SIZE(nflog_tg_reg));
83} 82}
84 83
85module_init(xt_nflog_init); 84module_init(nflog_tg_init);
86module_exit(xt_nflog_fini); 85module_exit(nflog_tg_exit);
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index c3984e9f766a..beb24d19a56f 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -17,59 +17,55 @@
17#include <linux/netfilter/xt_NFQUEUE.h> 17#include <linux/netfilter/xt_NFQUEUE.h>
18 18
19MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 19MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
20MODULE_DESCRIPTION("[ip,ip6,arp]_tables NFQUEUE target"); 20MODULE_DESCRIPTION("Xtables: packet forwarding to netlink");
21MODULE_LICENSE("GPL"); 21MODULE_LICENSE("GPL");
22MODULE_ALIAS("ipt_NFQUEUE"); 22MODULE_ALIAS("ipt_NFQUEUE");
23MODULE_ALIAS("ip6t_NFQUEUE"); 23MODULE_ALIAS("ip6t_NFQUEUE");
24MODULE_ALIAS("arpt_NFQUEUE"); 24MODULE_ALIAS("arpt_NFQUEUE");
25 25
26static unsigned int 26static unsigned int
27target(struct sk_buff *skb, 27nfqueue_tg(struct sk_buff *skb, const struct net_device *in,
28 const struct net_device *in, 28 const struct net_device *out, unsigned int hooknum,
29 const struct net_device *out, 29 const struct xt_target *target, const void *targinfo)
30 unsigned int hooknum,
31 const struct xt_target *target,
32 const void *targinfo)
33{ 30{
34 const struct xt_NFQ_info *tinfo = targinfo; 31 const struct xt_NFQ_info *tinfo = targinfo;
35 32
36 return NF_QUEUE_NR(tinfo->queuenum); 33 return NF_QUEUE_NR(tinfo->queuenum);
37} 34}
38 35
39static struct xt_target xt_nfqueue_target[] __read_mostly = { 36static struct xt_target nfqueue_tg_reg[] __read_mostly = {
40 { 37 {
41 .name = "NFQUEUE", 38 .name = "NFQUEUE",
42 .family = AF_INET, 39 .family = AF_INET,
43 .target = target, 40 .target = nfqueue_tg,
44 .targetsize = sizeof(struct xt_NFQ_info), 41 .targetsize = sizeof(struct xt_NFQ_info),
45 .me = THIS_MODULE, 42 .me = THIS_MODULE,
46 }, 43 },
47 { 44 {
48 .name = "NFQUEUE", 45 .name = "NFQUEUE",
49 .family = AF_INET6, 46 .family = AF_INET6,
50 .target = target, 47 .target = nfqueue_tg,
51 .targetsize = sizeof(struct xt_NFQ_info), 48 .targetsize = sizeof(struct xt_NFQ_info),
52 .me = THIS_MODULE, 49 .me = THIS_MODULE,
53 }, 50 },
54 { 51 {
55 .name = "NFQUEUE", 52 .name = "NFQUEUE",
56 .family = NF_ARP, 53 .family = NF_ARP,
57 .target = target, 54 .target = nfqueue_tg,
58 .targetsize = sizeof(struct xt_NFQ_info), 55 .targetsize = sizeof(struct xt_NFQ_info),
59 .me = THIS_MODULE, 56 .me = THIS_MODULE,
60 }, 57 },
61}; 58};
62 59
63static int __init xt_nfqueue_init(void) 60static int __init nfqueue_tg_init(void)
64{ 61{
65 return xt_register_targets(xt_nfqueue_target, 62 return xt_register_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg));
66 ARRAY_SIZE(xt_nfqueue_target));
67} 63}
68 64
69static void __exit xt_nfqueue_fini(void) 65static void __exit nfqueue_tg_exit(void)
70{ 66{
71 xt_unregister_targets(xt_nfqueue_target, ARRAY_SIZE(xt_nfqueue_target)); 67 xt_unregister_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg));
72} 68}
73 69
74module_init(xt_nfqueue_init); 70module_init(nfqueue_tg_init);
75module_exit(xt_nfqueue_fini); 71module_exit(nfqueue_tg_exit);
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index 4976ce186615..6c9de611eb8d 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -7,17 +7,15 @@
7#include <linux/netfilter/x_tables.h> 7#include <linux/netfilter/x_tables.h>
8#include <net/netfilter/nf_conntrack.h> 8#include <net/netfilter/nf_conntrack.h>
9 9
10MODULE_DESCRIPTION("Xtables: Disabling connection tracking for packets");
10MODULE_LICENSE("GPL"); 11MODULE_LICENSE("GPL");
11MODULE_ALIAS("ipt_NOTRACK"); 12MODULE_ALIAS("ipt_NOTRACK");
12MODULE_ALIAS("ip6t_NOTRACK"); 13MODULE_ALIAS("ip6t_NOTRACK");
13 14
14static unsigned int 15static unsigned int
15target(struct sk_buff *skb, 16notrack_tg(struct sk_buff *skb, const struct net_device *in,
16 const struct net_device *in, 17 const struct net_device *out, unsigned int hooknum,
17 const struct net_device *out, 18 const struct xt_target *target, const void *targinfo)
18 unsigned int hooknum,
19 const struct xt_target *target,
20 const void *targinfo)
21{ 19{
22 /* Previously seen (loopback)? Ignore. */ 20 /* Previously seen (loopback)? Ignore. */
23 if (skb->nfct != NULL) 21 if (skb->nfct != NULL)
@@ -34,33 +32,32 @@ target(struct sk_buff *skb,
34 return XT_CONTINUE; 32 return XT_CONTINUE;
35} 33}
36 34
37static struct xt_target xt_notrack_target[] __read_mostly = { 35static struct xt_target notrack_tg_reg[] __read_mostly = {
38 { 36 {
39 .name = "NOTRACK", 37 .name = "NOTRACK",
40 .family = AF_INET, 38 .family = AF_INET,
41 .target = target, 39 .target = notrack_tg,
42 .table = "raw", 40 .table = "raw",
43 .me = THIS_MODULE, 41 .me = THIS_MODULE,
44 }, 42 },
45 { 43 {
46 .name = "NOTRACK", 44 .name = "NOTRACK",
47 .family = AF_INET6, 45 .family = AF_INET6,
48 .target = target, 46 .target = notrack_tg,
49 .table = "raw", 47 .table = "raw",
50 .me = THIS_MODULE, 48 .me = THIS_MODULE,
51 }, 49 },
52}; 50};
53 51
54static int __init xt_notrack_init(void) 52static int __init notrack_tg_init(void)
55{ 53{
56 return xt_register_targets(xt_notrack_target, 54 return xt_register_targets(notrack_tg_reg, ARRAY_SIZE(notrack_tg_reg));
57 ARRAY_SIZE(xt_notrack_target));
58} 55}
59 56
60static void __exit xt_notrack_fini(void) 57static void __exit notrack_tg_exit(void)
61{ 58{
62 xt_unregister_targets(xt_notrack_target, ARRAY_SIZE(xt_notrack_target)); 59 xt_unregister_targets(notrack_tg_reg, ARRAY_SIZE(notrack_tg_reg));
63} 60}
64 61
65module_init(xt_notrack_init); 62module_init(notrack_tg_init);
66module_exit(xt_notrack_fini); 63module_exit(notrack_tg_exit);
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
new file mode 100644
index 000000000000..24c73ba31eaa
--- /dev/null
+++ b/net/netfilter/xt_RATEEST.c
@@ -0,0 +1,205 @@
1/*
2 * (C) 2007 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#include <linux/module.h>
9#include <linux/skbuff.h>
10#include <linux/gen_stats.h>
11#include <linux/jhash.h>
12#include <linux/rtnetlink.h>
13#include <linux/random.h>
14#include <net/gen_stats.h>
15#include <net/netlink.h>
16
17#include <linux/netfilter/x_tables.h>
18#include <linux/netfilter/xt_RATEEST.h>
19#include <net/netfilter/xt_rateest.h>
20
21static DEFINE_MUTEX(xt_rateest_mutex);
22
23#define RATEEST_HSIZE 16
24static struct hlist_head rateest_hash[RATEEST_HSIZE] __read_mostly;
25static unsigned int jhash_rnd __read_mostly;
26
27static unsigned int xt_rateest_hash(const char *name)
28{
29 return jhash(name, FIELD_SIZEOF(struct xt_rateest, name), jhash_rnd) &
30 (RATEEST_HSIZE - 1);
31}
32
33static void xt_rateest_hash_insert(struct xt_rateest *est)
34{
35 unsigned int h;
36
37 h = xt_rateest_hash(est->name);
38 hlist_add_head(&est->list, &rateest_hash[h]);
39}
40
41struct xt_rateest *xt_rateest_lookup(const char *name)
42{
43 struct xt_rateest *est;
44 struct hlist_node *n;
45 unsigned int h;
46
47 h = xt_rateest_hash(name);
48 mutex_lock(&xt_rateest_mutex);
49 hlist_for_each_entry(est, n, &rateest_hash[h], list) {
50 if (strcmp(est->name, name) == 0) {
51 est->refcnt++;
52 mutex_unlock(&xt_rateest_mutex);
53 return est;
54 }
55 }
56 mutex_unlock(&xt_rateest_mutex);
57 return NULL;
58}
59EXPORT_SYMBOL_GPL(xt_rateest_lookup);
60
61void xt_rateest_put(struct xt_rateest *est)
62{
63 mutex_lock(&xt_rateest_mutex);
64 if (--est->refcnt == 0) {
65 hlist_del(&est->list);
66 gen_kill_estimator(&est->bstats, &est->rstats);
67 kfree(est);
68 }
69 mutex_unlock(&xt_rateest_mutex);
70}
71EXPORT_SYMBOL_GPL(xt_rateest_put);
72
73static unsigned int
74xt_rateest_tg(struct sk_buff *skb,
75 const struct net_device *in,
76 const struct net_device *out,
77 unsigned int hooknum,
78 const struct xt_target *target,
79 const void *targinfo)
80{
81 const struct xt_rateest_target_info *info = targinfo;
82 struct gnet_stats_basic *stats = &info->est->bstats;
83
84 spin_lock_bh(&info->est->lock);
85 stats->bytes += skb->len;
86 stats->packets++;
87 spin_unlock_bh(&info->est->lock);
88
89 return XT_CONTINUE;
90}
91
92static bool
93xt_rateest_tg_checkentry(const char *tablename,
94 const void *entry,
95 const struct xt_target *target,
96 void *targinfo,
97 unsigned int hook_mask)
98{
99 struct xt_rateest_target_info *info = (void *)targinfo;
100 struct xt_rateest *est;
101 struct {
102 struct nlattr opt;
103 struct gnet_estimator est;
104 } cfg;
105
106 est = xt_rateest_lookup(info->name);
107 if (est) {
108 /*
109 * If estimator parameters are specified, they must match the
110 * existing estimator.
111 */
112 if ((!info->interval && !info->ewma_log) ||
113 (info->interval != est->params.interval ||
114 info->ewma_log != est->params.ewma_log)) {
115 xt_rateest_put(est);
116 return false;
117 }
118 info->est = est;
119 return true;
120 }
121
122 est = kzalloc(sizeof(*est), GFP_KERNEL);
123 if (!est)
124 goto err1;
125
126 strlcpy(est->name, info->name, sizeof(est->name));
127 spin_lock_init(&est->lock);
128 est->refcnt = 1;
129 est->params.interval = info->interval;
130 est->params.ewma_log = info->ewma_log;
131
132 cfg.opt.nla_len = nla_attr_size(sizeof(cfg.est));
133 cfg.opt.nla_type = TCA_STATS_RATE_EST;
134 cfg.est.interval = info->interval;
135 cfg.est.ewma_log = info->ewma_log;
136
137 if (gen_new_estimator(&est->bstats, &est->rstats, &est->lock,
138 &cfg.opt) < 0)
139 goto err2;
140
141 info->est = est;
142 xt_rateest_hash_insert(est);
143
144 return true;
145
146err2:
147 kfree(est);
148err1:
149 return false;
150}
151
152static void xt_rateest_tg_destroy(const struct xt_target *target,
153 void *targinfo)
154{
155 struct xt_rateest_target_info *info = targinfo;
156
157 xt_rateest_put(info->est);
158}
159
160static struct xt_target xt_rateest_target[] __read_mostly = {
161 {
162 .family = AF_INET,
163 .name = "RATEEST",
164 .target = xt_rateest_tg,
165 .checkentry = xt_rateest_tg_checkentry,
166 .destroy = xt_rateest_tg_destroy,
167 .targetsize = sizeof(struct xt_rateest_target_info),
168 .me = THIS_MODULE,
169 },
170 {
171 .family = AF_INET6,
172 .name = "RATEEST",
173 .target = xt_rateest_tg,
174 .checkentry = xt_rateest_tg_checkentry,
175 .destroy = xt_rateest_tg_destroy,
176 .targetsize = sizeof(struct xt_rateest_target_info),
177 .me = THIS_MODULE,
178 },
179};
180
181static int __init xt_rateest_tg_init(void)
182{
183 unsigned int i;
184
185 for (i = 0; i < ARRAY_SIZE(rateest_hash); i++)
186 INIT_HLIST_HEAD(&rateest_hash[i]);
187
188 get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
189 return xt_register_targets(xt_rateest_target,
190 ARRAY_SIZE(xt_rateest_target));
191}
192
193static void __exit xt_rateest_tg_fini(void)
194{
195 xt_unregister_targets(xt_rateest_target, ARRAY_SIZE(xt_rateest_target));
196}
197
198
199MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
200MODULE_LICENSE("GPL");
201MODULE_DESCRIPTION("Xtables: packet rate estimator");
202MODULE_ALIAS("ipt_RATEEST");
203MODULE_ALIAS("ip6t_RATEEST");
204module_init(xt_rateest_tg_init);
205module_exit(xt_rateest_tg_fini);
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 235806eb6ecd..b11b3ecbb39d 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -20,7 +20,7 @@
20 20
21MODULE_LICENSE("GPL"); 21MODULE_LICENSE("GPL");
22MODULE_AUTHOR("James Morris <jmorris@redhat.com>"); 22MODULE_AUTHOR("James Morris <jmorris@redhat.com>");
23MODULE_DESCRIPTION("ip[6]tables SECMARK modification module"); 23MODULE_DESCRIPTION("Xtables: packet security mark modification");
24MODULE_ALIAS("ipt_SECMARK"); 24MODULE_ALIAS("ipt_SECMARK");
25MODULE_ALIAS("ip6t_SECMARK"); 25MODULE_ALIAS("ip6t_SECMARK");
26 26
@@ -28,10 +28,10 @@ MODULE_ALIAS("ip6t_SECMARK");
28 28
29static u8 mode; 29static u8 mode;
30 30
31static unsigned int target(struct sk_buff *skb, const struct net_device *in, 31static unsigned int
32 const struct net_device *out, unsigned int hooknum, 32secmark_tg(struct sk_buff *skb, const struct net_device *in,
33 const struct xt_target *target, 33 const struct net_device *out, unsigned int hooknum,
34 const void *targinfo) 34 const struct xt_target *target, const void *targinfo)
35{ 35{
36 u32 secmark = 0; 36 u32 secmark = 0;
37 const struct xt_secmark_target_info *info = targinfo; 37 const struct xt_secmark_target_info *info = targinfo;
@@ -81,9 +81,10 @@ static bool checkentry_selinux(struct xt_secmark_target_info *info)
81 return true; 81 return true;
82} 82}
83 83
84static bool checkentry(const char *tablename, const void *entry, 84static bool
85 const struct xt_target *target, void *targinfo, 85secmark_tg_check(const char *tablename, const void *entry,
86 unsigned int hook_mask) 86 const struct xt_target *target, void *targinfo,
87 unsigned int hook_mask)
87{ 88{
88 struct xt_secmark_target_info *info = targinfo; 89 struct xt_secmark_target_info *info = targinfo;
89 90
@@ -109,12 +110,12 @@ static bool checkentry(const char *tablename, const void *entry,
109 return true; 110 return true;
110} 111}
111 112
112static struct xt_target xt_secmark_target[] __read_mostly = { 113static struct xt_target secmark_tg_reg[] __read_mostly = {
113 { 114 {
114 .name = "SECMARK", 115 .name = "SECMARK",
115 .family = AF_INET, 116 .family = AF_INET,
116 .checkentry = checkentry, 117 .checkentry = secmark_tg_check,
117 .target = target, 118 .target = secmark_tg,
118 .targetsize = sizeof(struct xt_secmark_target_info), 119 .targetsize = sizeof(struct xt_secmark_target_info),
119 .table = "mangle", 120 .table = "mangle",
120 .me = THIS_MODULE, 121 .me = THIS_MODULE,
@@ -122,24 +123,23 @@ static struct xt_target xt_secmark_target[] __read_mostly = {
122 { 123 {
123 .name = "SECMARK", 124 .name = "SECMARK",
124 .family = AF_INET6, 125 .family = AF_INET6,
125 .checkentry = checkentry, 126 .checkentry = secmark_tg_check,
126 .target = target, 127 .target = secmark_tg,
127 .targetsize = sizeof(struct xt_secmark_target_info), 128 .targetsize = sizeof(struct xt_secmark_target_info),
128 .table = "mangle", 129 .table = "mangle",
129 .me = THIS_MODULE, 130 .me = THIS_MODULE,
130 }, 131 },
131}; 132};
132 133
133static int __init xt_secmark_init(void) 134static int __init secmark_tg_init(void)
134{ 135{
135 return xt_register_targets(xt_secmark_target, 136 return xt_register_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg));
136 ARRAY_SIZE(xt_secmark_target));
137} 137}
138 138
139static void __exit xt_secmark_fini(void) 139static void __exit secmark_tg_exit(void)
140{ 140{
141 xt_unregister_targets(xt_secmark_target, ARRAY_SIZE(xt_secmark_target)); 141 xt_unregister_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg));
142} 142}
143 143
144module_init(xt_secmark_init); 144module_init(secmark_tg_init);
145module_exit(xt_secmark_fini); 145module_exit(secmark_tg_exit);
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 8e76d1f52fbe..60e3767cc71d 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -24,7 +24,7 @@
24 24
25MODULE_LICENSE("GPL"); 25MODULE_LICENSE("GPL");
26MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); 26MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
27MODULE_DESCRIPTION("x_tables TCP MSS modification module"); 27MODULE_DESCRIPTION("Xtables: TCP Maximum Segment Size (MSS) adjustment");
28MODULE_ALIAS("ipt_TCPMSS"); 28MODULE_ALIAS("ipt_TCPMSS");
29MODULE_ALIAS("ip6t_TCPMSS"); 29MODULE_ALIAS("ip6t_TCPMSS");
30 30
@@ -88,15 +88,19 @@ tcpmss_mangle_packet(struct sk_buff *skb,
88 88
89 oldmss = (opt[i+2] << 8) | opt[i+3]; 89 oldmss = (opt[i+2] << 8) | opt[i+3];
90 90
91 if (info->mss == XT_TCPMSS_CLAMP_PMTU && 91 /* Never increase MSS, even when setting it, as
92 oldmss <= newmss) 92 * doing so results in problems for hosts that rely
93 * on MSS being set correctly.
94 */
95 if (oldmss <= newmss)
93 return 0; 96 return 0;
94 97
95 opt[i+2] = (newmss & 0xff00) >> 8; 98 opt[i+2] = (newmss & 0xff00) >> 8;
96 opt[i+3] = newmss & 0x00ff; 99 opt[i+3] = newmss & 0x00ff;
97 100
98 nf_proto_csum_replace2(&tcph->check, skb, 101 inet_proto_csum_replace2(&tcph->check, skb,
99 htons(oldmss), htons(newmss), 0); 102 htons(oldmss), htons(newmss),
103 0);
100 return 0; 104 return 0;
101 } 105 }
102 } 106 }
@@ -117,29 +121,26 @@ tcpmss_mangle_packet(struct sk_buff *skb,
117 opt = (u_int8_t *)tcph + sizeof(struct tcphdr); 121 opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
118 memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); 122 memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
119 123
120 nf_proto_csum_replace2(&tcph->check, skb, 124 inet_proto_csum_replace2(&tcph->check, skb,
121 htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1); 125 htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1);
122 opt[0] = TCPOPT_MSS; 126 opt[0] = TCPOPT_MSS;
123 opt[1] = TCPOLEN_MSS; 127 opt[1] = TCPOLEN_MSS;
124 opt[2] = (newmss & 0xff00) >> 8; 128 opt[2] = (newmss & 0xff00) >> 8;
125 opt[3] = newmss & 0x00ff; 129 opt[3] = newmss & 0x00ff;
126 130
127 nf_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0); 131 inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0);
128 132
129 oldval = ((__be16 *)tcph)[6]; 133 oldval = ((__be16 *)tcph)[6];
130 tcph->doff += TCPOLEN_MSS/4; 134 tcph->doff += TCPOLEN_MSS/4;
131 nf_proto_csum_replace2(&tcph->check, skb, 135 inet_proto_csum_replace2(&tcph->check, skb,
132 oldval, ((__be16 *)tcph)[6], 0); 136 oldval, ((__be16 *)tcph)[6], 0);
133 return TCPOLEN_MSS; 137 return TCPOLEN_MSS;
134} 138}
135 139
136static unsigned int 140static unsigned int
137xt_tcpmss_target4(struct sk_buff *skb, 141tcpmss_tg4(struct sk_buff *skb, const struct net_device *in,
138 const struct net_device *in, 142 const struct net_device *out, unsigned int hooknum,
139 const struct net_device *out, 143 const struct xt_target *target, const void *targinfo)
140 unsigned int hooknum,
141 const struct xt_target *target,
142 const void *targinfo)
143{ 144{
144 struct iphdr *iph = ip_hdr(skb); 145 struct iphdr *iph = ip_hdr(skb);
145 __be16 newlen; 146 __be16 newlen;
@@ -152,7 +153,7 @@ xt_tcpmss_target4(struct sk_buff *skb,
152 if (ret > 0) { 153 if (ret > 0) {
153 iph = ip_hdr(skb); 154 iph = ip_hdr(skb);
154 newlen = htons(ntohs(iph->tot_len) + ret); 155 newlen = htons(ntohs(iph->tot_len) + ret);
155 nf_csum_replace2(&iph->check, iph->tot_len, newlen); 156 csum_replace2(&iph->check, iph->tot_len, newlen);
156 iph->tot_len = newlen; 157 iph->tot_len = newlen;
157 } 158 }
158 return XT_CONTINUE; 159 return XT_CONTINUE;
@@ -160,12 +161,9 @@ xt_tcpmss_target4(struct sk_buff *skb,
160 161
161#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) 162#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
162static unsigned int 163static unsigned int
163xt_tcpmss_target6(struct sk_buff *skb, 164tcpmss_tg6(struct sk_buff *skb, const struct net_device *in,
164 const struct net_device *in, 165 const struct net_device *out, unsigned int hooknum,
165 const struct net_device *out, 166 const struct xt_target *target, const void *targinfo)
166 unsigned int hooknum,
167 const struct xt_target *target,
168 const void *targinfo)
169{ 167{
170 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 168 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
171 u8 nexthdr; 169 u8 nexthdr;
@@ -204,19 +202,17 @@ static inline bool find_syn_match(const struct xt_entry_match *m)
204} 202}
205 203
206static bool 204static bool
207xt_tcpmss_checkentry4(const char *tablename, 205tcpmss_tg4_check(const char *tablename, const void *entry,
208 const void *entry, 206 const struct xt_target *target, void *targinfo,
209 const struct xt_target *target, 207 unsigned int hook_mask)
210 void *targinfo,
211 unsigned int hook_mask)
212{ 208{
213 const struct xt_tcpmss_info *info = targinfo; 209 const struct xt_tcpmss_info *info = targinfo;
214 const struct ipt_entry *e = entry; 210 const struct ipt_entry *e = entry;
215 211
216 if (info->mss == XT_TCPMSS_CLAMP_PMTU && 212 if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
217 (hook_mask & ~((1 << NF_IP_FORWARD) | 213 (hook_mask & ~((1 << NF_INET_FORWARD) |
218 (1 << NF_IP_LOCAL_OUT) | 214 (1 << NF_INET_LOCAL_OUT) |
219 (1 << NF_IP_POST_ROUTING))) != 0) { 215 (1 << NF_INET_POST_ROUTING))) != 0) {
220 printk("xt_TCPMSS: path-MTU clamping only supported in " 216 printk("xt_TCPMSS: path-MTU clamping only supported in "
221 "FORWARD, OUTPUT and POSTROUTING hooks\n"); 217 "FORWARD, OUTPUT and POSTROUTING hooks\n");
222 return false; 218 return false;
@@ -229,19 +225,17 @@ xt_tcpmss_checkentry4(const char *tablename,
229 225
230#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) 226#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
231static bool 227static bool
232xt_tcpmss_checkentry6(const char *tablename, 228tcpmss_tg6_check(const char *tablename, const void *entry,
233 const void *entry, 229 const struct xt_target *target, void *targinfo,
234 const struct xt_target *target, 230 unsigned int hook_mask)
235 void *targinfo,
236 unsigned int hook_mask)
237{ 231{
238 const struct xt_tcpmss_info *info = targinfo; 232 const struct xt_tcpmss_info *info = targinfo;
239 const struct ip6t_entry *e = entry; 233 const struct ip6t_entry *e = entry;
240 234
241 if (info->mss == XT_TCPMSS_CLAMP_PMTU && 235 if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
242 (hook_mask & ~((1 << NF_IP6_FORWARD) | 236 (hook_mask & ~((1 << NF_INET_FORWARD) |
243 (1 << NF_IP6_LOCAL_OUT) | 237 (1 << NF_INET_LOCAL_OUT) |
244 (1 << NF_IP6_POST_ROUTING))) != 0) { 238 (1 << NF_INET_POST_ROUTING))) != 0) {
245 printk("xt_TCPMSS: path-MTU clamping only supported in " 239 printk("xt_TCPMSS: path-MTU clamping only supported in "
246 "FORWARD, OUTPUT and POSTROUTING hooks\n"); 240 "FORWARD, OUTPUT and POSTROUTING hooks\n");
247 return false; 241 return false;
@@ -253,12 +247,12 @@ xt_tcpmss_checkentry6(const char *tablename,
253} 247}
254#endif 248#endif
255 249
256static struct xt_target xt_tcpmss_reg[] __read_mostly = { 250static struct xt_target tcpmss_tg_reg[] __read_mostly = {
257 { 251 {
258 .family = AF_INET, 252 .family = AF_INET,
259 .name = "TCPMSS", 253 .name = "TCPMSS",
260 .checkentry = xt_tcpmss_checkentry4, 254 .checkentry = tcpmss_tg4_check,
261 .target = xt_tcpmss_target4, 255 .target = tcpmss_tg4,
262 .targetsize = sizeof(struct xt_tcpmss_info), 256 .targetsize = sizeof(struct xt_tcpmss_info),
263 .proto = IPPROTO_TCP, 257 .proto = IPPROTO_TCP,
264 .me = THIS_MODULE, 258 .me = THIS_MODULE,
@@ -267,8 +261,8 @@ static struct xt_target xt_tcpmss_reg[] __read_mostly = {
267 { 261 {
268 .family = AF_INET6, 262 .family = AF_INET6,
269 .name = "TCPMSS", 263 .name = "TCPMSS",
270 .checkentry = xt_tcpmss_checkentry6, 264 .checkentry = tcpmss_tg6_check,
271 .target = xt_tcpmss_target6, 265 .target = tcpmss_tg6,
272 .targetsize = sizeof(struct xt_tcpmss_info), 266 .targetsize = sizeof(struct xt_tcpmss_info),
273 .proto = IPPROTO_TCP, 267 .proto = IPPROTO_TCP,
274 .me = THIS_MODULE, 268 .me = THIS_MODULE,
@@ -276,15 +270,15 @@ static struct xt_target xt_tcpmss_reg[] __read_mostly = {
276#endif 270#endif
277}; 271};
278 272
279static int __init xt_tcpmss_init(void) 273static int __init tcpmss_tg_init(void)
280{ 274{
281 return xt_register_targets(xt_tcpmss_reg, ARRAY_SIZE(xt_tcpmss_reg)); 275 return xt_register_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
282} 276}
283 277
284static void __exit xt_tcpmss_fini(void) 278static void __exit tcpmss_tg_exit(void)
285{ 279{
286 xt_unregister_targets(xt_tcpmss_reg, ARRAY_SIZE(xt_tcpmss_reg)); 280 xt_unregister_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
287} 281}
288 282
289module_init(xt_tcpmss_init); 283module_init(tcpmss_tg_init);
290module_exit(xt_tcpmss_fini); 284module_exit(tcpmss_tg_exit);
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
new file mode 100644
index 000000000000..3b2aa56833b9
--- /dev/null
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -0,0 +1,147 @@
1/*
2 * A module for stripping a specific TCP option from TCP packets.
3 *
4 * Copyright (C) 2007 Sven Schnelle <svens@bitebene.org>
5 * Copyright © CC Computer Consultants GmbH, 2007
6 * Contact: Jan Engelhardt <jengelh@computergmbh.de>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/ip.h>
16#include <linux/ipv6.h>
17#include <linux/tcp.h>
18#include <net/ipv6.h>
19#include <net/tcp.h>
20#include <linux/netfilter/x_tables.h>
21#include <linux/netfilter/xt_TCPOPTSTRIP.h>
22
23static inline unsigned int optlen(const u_int8_t *opt, unsigned int offset)
24{
25 /* Beware zero-length options: make finite progress */
26 if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
27 return 1;
28 else
29 return opt[offset+1];
30}
31
32static unsigned int
33tcpoptstrip_mangle_packet(struct sk_buff *skb,
34 const struct xt_tcpoptstrip_target_info *info,
35 unsigned int tcphoff, unsigned int minlen)
36{
37 unsigned int optl, i, j;
38 struct tcphdr *tcph;
39 u_int16_t n, o;
40 u_int8_t *opt;
41
42 if (!skb_make_writable(skb, skb->len))
43 return NF_DROP;
44
45 tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
46 opt = (u_int8_t *)tcph;
47
48 /*
49 * Walk through all TCP options - if we find some option to remove,
50 * set all octets to %TCPOPT_NOP and adjust checksum.
51 */
52 for (i = sizeof(struct tcphdr); i < tcp_hdrlen(skb); i += optl) {
53 optl = optlen(opt, i);
54
55 if (i + optl > tcp_hdrlen(skb))
56 break;
57
58 if (!tcpoptstrip_test_bit(info->strip_bmap, opt[i]))
59 continue;
60
61 for (j = 0; j < optl; ++j) {
62 o = opt[i+j];
63 n = TCPOPT_NOP;
64 if ((i + j) % 2 == 0) {
65 o <<= 8;
66 n <<= 8;
67 }
68 inet_proto_csum_replace2(&tcph->check, skb, htons(o),
69 htons(n), 0);
70 }
71 memset(opt + i, TCPOPT_NOP, optl);
72 }
73
74 return XT_CONTINUE;
75}
76
77static unsigned int
78tcpoptstrip_tg4(struct sk_buff *skb, const struct net_device *in,
79 const struct net_device *out, unsigned int hooknum,
80 const struct xt_target *target, const void *targinfo)
81{
82 return tcpoptstrip_mangle_packet(skb, targinfo, ip_hdrlen(skb),
83 sizeof(struct iphdr) + sizeof(struct tcphdr));
84}
85
86#if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE)
87static unsigned int
88tcpoptstrip_tg6(struct sk_buff *skb, const struct net_device *in,
89 const struct net_device *out, unsigned int hooknum,
90 const struct xt_target *target, const void *targinfo)
91{
92 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
93 unsigned int tcphoff;
94 u_int8_t nexthdr;
95
96 nexthdr = ipv6h->nexthdr;
97 tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr);
98 if (tcphoff < 0)
99 return NF_DROP;
100
101 return tcpoptstrip_mangle_packet(skb, targinfo, tcphoff,
102 sizeof(*ipv6h) + sizeof(struct tcphdr));
103}
104#endif
105
106static struct xt_target tcpoptstrip_tg_reg[] __read_mostly = {
107 {
108 .name = "TCPOPTSTRIP",
109 .family = AF_INET,
110 .table = "mangle",
111 .proto = IPPROTO_TCP,
112 .target = tcpoptstrip_tg4,
113 .targetsize = sizeof(struct xt_tcpoptstrip_target_info),
114 .me = THIS_MODULE,
115 },
116#if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE)
117 {
118 .name = "TCPOPTSTRIP",
119 .family = AF_INET6,
120 .table = "mangle",
121 .proto = IPPROTO_TCP,
122 .target = tcpoptstrip_tg6,
123 .targetsize = sizeof(struct xt_tcpoptstrip_target_info),
124 .me = THIS_MODULE,
125 },
126#endif
127};
128
129static int __init tcpoptstrip_tg_init(void)
130{
131 return xt_register_targets(tcpoptstrip_tg_reg,
132 ARRAY_SIZE(tcpoptstrip_tg_reg));
133}
134
135static void __exit tcpoptstrip_tg_exit(void)
136{
137 xt_unregister_targets(tcpoptstrip_tg_reg,
138 ARRAY_SIZE(tcpoptstrip_tg_reg));
139}
140
141module_init(tcpoptstrip_tg_init);
142module_exit(tcpoptstrip_tg_exit);
143MODULE_AUTHOR("Sven Schnelle <svens@bitebene.org>, Jan Engelhardt <jengelh@computergmbh.de>");
144MODULE_DESCRIPTION("Xtables: TCP option stripping");
145MODULE_LICENSE("GPL");
146MODULE_ALIAS("ipt_TCPOPTSTRIP");
147MODULE_ALIAS("ip6t_TCPOPTSTRIP");
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index 26c5d08ab2c2..30dab79a3438 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -5,49 +5,46 @@
5 5
6#include <linux/netfilter/x_tables.h> 6#include <linux/netfilter/x_tables.h>
7 7
8MODULE_DESCRIPTION("Xtables: packet flow tracing");
8MODULE_LICENSE("GPL"); 9MODULE_LICENSE("GPL");
9MODULE_ALIAS("ipt_TRACE"); 10MODULE_ALIAS("ipt_TRACE");
10MODULE_ALIAS("ip6t_TRACE"); 11MODULE_ALIAS("ip6t_TRACE");
11 12
12static unsigned int 13static unsigned int
13target(struct sk_buff *skb, 14trace_tg(struct sk_buff *skb, const struct net_device *in,
14 const struct net_device *in, 15 const struct net_device *out, unsigned int hooknum,
15 const struct net_device *out, 16 const struct xt_target *target, const void *targinfo)
16 unsigned int hooknum,
17 const struct xt_target *target,
18 const void *targinfo)
19{ 17{
20 skb->nf_trace = 1; 18 skb->nf_trace = 1;
21 return XT_CONTINUE; 19 return XT_CONTINUE;
22} 20}
23 21
24static struct xt_target xt_trace_target[] __read_mostly = { 22static struct xt_target trace_tg_reg[] __read_mostly = {
25 { 23 {
26 .name = "TRACE", 24 .name = "TRACE",
27 .family = AF_INET, 25 .family = AF_INET,
28 .target = target, 26 .target = trace_tg,
29 .table = "raw", 27 .table = "raw",
30 .me = THIS_MODULE, 28 .me = THIS_MODULE,
31 }, 29 },
32 { 30 {
33 .name = "TRACE", 31 .name = "TRACE",
34 .family = AF_INET6, 32 .family = AF_INET6,
35 .target = target, 33 .target = trace_tg,
36 .table = "raw", 34 .table = "raw",
37 .me = THIS_MODULE, 35 .me = THIS_MODULE,
38 }, 36 },
39}; 37};
40 38
41static int __init xt_trace_init(void) 39static int __init trace_tg_init(void)
42{ 40{
43 return xt_register_targets(xt_trace_target, 41 return xt_register_targets(trace_tg_reg, ARRAY_SIZE(trace_tg_reg));
44 ARRAY_SIZE(xt_trace_target));
45} 42}
46 43
47static void __exit xt_trace_fini(void) 44static void __exit trace_tg_exit(void)
48{ 45{
49 xt_unregister_targets(xt_trace_target, ARRAY_SIZE(xt_trace_target)); 46 xt_unregister_targets(trace_tg_reg, ARRAY_SIZE(trace_tg_reg));
50} 47}
51 48
52module_init(xt_trace_init); 49module_init(trace_tg_init);
53module_exit(xt_trace_fini); 50module_exit(trace_tg_exit);
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
index 64bcdb0fe1e6..89f47364e848 100644
--- a/net/netfilter/xt_comment.c
+++ b/net/netfilter/xt_comment.c
@@ -10,52 +10,47 @@
10#include <linux/netfilter/xt_comment.h> 10#include <linux/netfilter/xt_comment.h>
11 11
12MODULE_AUTHOR("Brad Fisher <brad@info-link.net>"); 12MODULE_AUTHOR("Brad Fisher <brad@info-link.net>");
13MODULE_DESCRIPTION("iptables comment match module"); 13MODULE_DESCRIPTION("Xtables: No-op match which can be tagged with a comment");
14MODULE_LICENSE("GPL"); 14MODULE_LICENSE("GPL");
15MODULE_ALIAS("ipt_comment"); 15MODULE_ALIAS("ipt_comment");
16MODULE_ALIAS("ip6t_comment"); 16MODULE_ALIAS("ip6t_comment");
17 17
18static bool 18static bool
19match(const struct sk_buff *skb, 19comment_mt(const struct sk_buff *skb, const struct net_device *in,
20 const struct net_device *in, 20 const struct net_device *out, const struct xt_match *match,
21 const struct net_device *out, 21 const void *matchinfo, int offset, unsigned int protooff,
22 const struct xt_match *match, 22 bool *hotdrop)
23 const void *matchinfo,
24 int offset,
25 unsigned int protooff,
26 bool *hotdrop)
27{ 23{
28 /* We always match */ 24 /* We always match */
29 return true; 25 return true;
30} 26}
31 27
32static struct xt_match xt_comment_match[] __read_mostly = { 28static struct xt_match comment_mt_reg[] __read_mostly = {
33 { 29 {
34 .name = "comment", 30 .name = "comment",
35 .family = AF_INET, 31 .family = AF_INET,
36 .match = match, 32 .match = comment_mt,
37 .matchsize = sizeof(struct xt_comment_info), 33 .matchsize = sizeof(struct xt_comment_info),
38 .me = THIS_MODULE 34 .me = THIS_MODULE
39 }, 35 },
40 { 36 {
41 .name = "comment", 37 .name = "comment",
42 .family = AF_INET6, 38 .family = AF_INET6,
43 .match = match, 39 .match = comment_mt,
44 .matchsize = sizeof(struct xt_comment_info), 40 .matchsize = sizeof(struct xt_comment_info),
45 .me = THIS_MODULE 41 .me = THIS_MODULE
46 }, 42 },
47}; 43};
48 44
49static int __init xt_comment_init(void) 45static int __init comment_mt_init(void)
50{ 46{
51 return xt_register_matches(xt_comment_match, 47 return xt_register_matches(comment_mt_reg, ARRAY_SIZE(comment_mt_reg));
52 ARRAY_SIZE(xt_comment_match));
53} 48}
54 49
55static void __exit xt_comment_fini(void) 50static void __exit comment_mt_exit(void)
56{ 51{
57 xt_unregister_matches(xt_comment_match, ARRAY_SIZE(xt_comment_match)); 52 xt_unregister_matches(comment_mt_reg, ARRAY_SIZE(comment_mt_reg));
58} 53}
59 54
60module_init(xt_comment_init); 55module_init(comment_mt_init);
61module_exit(xt_comment_fini); 56module_exit(comment_mt_exit);
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 9ec50139b9a1..b15e7e2fa143 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -12,19 +12,15 @@
12 12
13MODULE_LICENSE("GPL"); 13MODULE_LICENSE("GPL");
14MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 14MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
15MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection"); 15MODULE_DESCRIPTION("Xtables: Number of packets/bytes per connection matching");
16MODULE_ALIAS("ipt_connbytes"); 16MODULE_ALIAS("ipt_connbytes");
17MODULE_ALIAS("ip6t_connbytes"); 17MODULE_ALIAS("ip6t_connbytes");
18 18
19static bool 19static bool
20match(const struct sk_buff *skb, 20connbytes_mt(const struct sk_buff *skb, const struct net_device *in,
21 const struct net_device *in, 21 const struct net_device *out, const struct xt_match *match,
22 const struct net_device *out, 22 const void *matchinfo, int offset, unsigned int protoff,
23 const struct xt_match *match, 23 bool *hotdrop)
24 const void *matchinfo,
25 int offset,
26 unsigned int protoff,
27 bool *hotdrop)
28{ 24{
29 const struct xt_connbytes_info *sinfo = matchinfo; 25 const struct xt_connbytes_info *sinfo = matchinfo;
30 const struct nf_conn *ct; 26 const struct nf_conn *ct;
@@ -96,11 +92,10 @@ match(const struct sk_buff *skb,
96 return what >= sinfo->count.from; 92 return what >= sinfo->count.from;
97} 93}
98 94
99static bool check(const char *tablename, 95static bool
100 const void *ip, 96connbytes_mt_check(const char *tablename, const void *ip,
101 const struct xt_match *match, 97 const struct xt_match *match, void *matchinfo,
102 void *matchinfo, 98 unsigned int hook_mask)
103 unsigned int hook_mask)
104{ 99{
105 const struct xt_connbytes_info *sinfo = matchinfo; 100 const struct xt_connbytes_info *sinfo = matchinfo;
106 101
@@ -116,7 +111,7 @@ static bool check(const char *tablename,
116 111
117 if (nf_ct_l3proto_try_module_get(match->family) < 0) { 112 if (nf_ct_l3proto_try_module_get(match->family) < 0) {
118 printk(KERN_WARNING "can't load conntrack support for " 113 printk(KERN_WARNING "can't load conntrack support for "
119 "proto=%d\n", match->family); 114 "proto=%u\n", match->family);
120 return false; 115 return false;
121 } 116 }
122 117
@@ -124,43 +119,42 @@ static bool check(const char *tablename,
124} 119}
125 120
126static void 121static void
127destroy(const struct xt_match *match, void *matchinfo) 122connbytes_mt_destroy(const struct xt_match *match, void *matchinfo)
128{ 123{
129 nf_ct_l3proto_module_put(match->family); 124 nf_ct_l3proto_module_put(match->family);
130} 125}
131 126
132static struct xt_match xt_connbytes_match[] __read_mostly = { 127static struct xt_match connbytes_mt_reg[] __read_mostly = {
133 { 128 {
134 .name = "connbytes", 129 .name = "connbytes",
135 .family = AF_INET, 130 .family = AF_INET,
136 .checkentry = check, 131 .checkentry = connbytes_mt_check,
137 .match = match, 132 .match = connbytes_mt,
138 .destroy = destroy, 133 .destroy = connbytes_mt_destroy,
139 .matchsize = sizeof(struct xt_connbytes_info), 134 .matchsize = sizeof(struct xt_connbytes_info),
140 .me = THIS_MODULE 135 .me = THIS_MODULE
141 }, 136 },
142 { 137 {
143 .name = "connbytes", 138 .name = "connbytes",
144 .family = AF_INET6, 139 .family = AF_INET6,
145 .checkentry = check, 140 .checkentry = connbytes_mt_check,
146 .match = match, 141 .match = connbytes_mt,
147 .destroy = destroy, 142 .destroy = connbytes_mt_destroy,
148 .matchsize = sizeof(struct xt_connbytes_info), 143 .matchsize = sizeof(struct xt_connbytes_info),
149 .me = THIS_MODULE 144 .me = THIS_MODULE
150 }, 145 },
151}; 146};
152 147
153static int __init xt_connbytes_init(void) 148static int __init connbytes_mt_init(void)
154{ 149{
155 return xt_register_matches(xt_connbytes_match, 150 return xt_register_matches(connbytes_mt_reg,
156 ARRAY_SIZE(xt_connbytes_match)); 151 ARRAY_SIZE(connbytes_mt_reg));
157} 152}
158 153
159static void __exit xt_connbytes_fini(void) 154static void __exit connbytes_mt_exit(void)
160{ 155{
161 xt_unregister_matches(xt_connbytes_match, 156 xt_unregister_matches(connbytes_mt_reg, ARRAY_SIZE(connbytes_mt_reg));
162 ARRAY_SIZE(xt_connbytes_match));
163} 157}
164 158
165module_init(xt_connbytes_init); 159module_init(connbytes_mt_init);
166module_exit(xt_connbytes_fini); 160module_exit(connbytes_mt_exit);
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index d7becf08a93a..e00ecd974fa3 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -53,10 +53,10 @@ static inline unsigned int connlimit_iphash(__be32 addr)
53} 53}
54 54
55static inline unsigned int 55static inline unsigned int
56connlimit_iphash6(const union nf_conntrack_address *addr, 56connlimit_iphash6(const union nf_inet_addr *addr,
57 const union nf_conntrack_address *mask) 57 const union nf_inet_addr *mask)
58{ 58{
59 union nf_conntrack_address res; 59 union nf_inet_addr res;
60 unsigned int i; 60 unsigned int i;
61 61
62 if (unlikely(!connlimit_rnd_inited)) { 62 if (unlikely(!connlimit_rnd_inited)) {
@@ -81,14 +81,14 @@ static inline bool already_closed(const struct nf_conn *conn)
81} 81}
82 82
83static inline unsigned int 83static inline unsigned int
84same_source_net(const union nf_conntrack_address *addr, 84same_source_net(const union nf_inet_addr *addr,
85 const union nf_conntrack_address *mask, 85 const union nf_inet_addr *mask,
86 const union nf_conntrack_address *u3, unsigned int family) 86 const union nf_inet_addr *u3, unsigned int family)
87{ 87{
88 if (family == AF_INET) { 88 if (family == AF_INET) {
89 return (addr->ip & mask->ip) == (u3->ip & mask->ip); 89 return (addr->ip & mask->ip) == (u3->ip & mask->ip);
90 } else { 90 } else {
91 union nf_conntrack_address lh, rh; 91 union nf_inet_addr lh, rh;
92 unsigned int i; 92 unsigned int i;
93 93
94 for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) { 94 for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) {
@@ -102,8 +102,8 @@ same_source_net(const union nf_conntrack_address *addr,
102 102
103static int count_them(struct xt_connlimit_data *data, 103static int count_them(struct xt_connlimit_data *data,
104 const struct nf_conntrack_tuple *tuple, 104 const struct nf_conntrack_tuple *tuple,
105 const union nf_conntrack_address *addr, 105 const union nf_inet_addr *addr,
106 const union nf_conntrack_address *mask, 106 const union nf_inet_addr *mask,
107 const struct xt_match *match) 107 const struct xt_match *match)
108{ 108{
109 struct nf_conntrack_tuple_hash *found; 109 struct nf_conntrack_tuple_hash *found;
@@ -178,15 +178,14 @@ static int count_them(struct xt_connlimit_data *data,
178 return matches; 178 return matches;
179} 179}
180 180
181static bool connlimit_match(const struct sk_buff *skb, 181static bool
182 const struct net_device *in, 182connlimit_mt(const struct sk_buff *skb, const struct net_device *in,
183 const struct net_device *out, 183 const struct net_device *out, const struct xt_match *match,
184 const struct xt_match *match, 184 const void *matchinfo, int offset, unsigned int protoff,
185 const void *matchinfo, int offset, 185 bool *hotdrop)
186 unsigned int protoff, bool *hotdrop)
187{ 186{
188 const struct xt_connlimit_info *info = matchinfo; 187 const struct xt_connlimit_info *info = matchinfo;
189 union nf_conntrack_address addr, mask; 188 union nf_inet_addr addr;
190 struct nf_conntrack_tuple tuple; 189 struct nf_conntrack_tuple tuple;
191 const struct nf_conntrack_tuple *tuple_ptr = &tuple; 190 const struct nf_conntrack_tuple *tuple_ptr = &tuple;
192 enum ip_conntrack_info ctinfo; 191 enum ip_conntrack_info ctinfo;
@@ -203,15 +202,14 @@ static bool connlimit_match(const struct sk_buff *skb,
203 if (match->family == AF_INET6) { 202 if (match->family == AF_INET6) {
204 const struct ipv6hdr *iph = ipv6_hdr(skb); 203 const struct ipv6hdr *iph = ipv6_hdr(skb);
205 memcpy(&addr.ip6, &iph->saddr, sizeof(iph->saddr)); 204 memcpy(&addr.ip6, &iph->saddr, sizeof(iph->saddr));
206 memcpy(&mask.ip6, info->v6_mask, sizeof(info->v6_mask));
207 } else { 205 } else {
208 const struct iphdr *iph = ip_hdr(skb); 206 const struct iphdr *iph = ip_hdr(skb);
209 addr.ip = iph->saddr; 207 addr.ip = iph->saddr;
210 mask.ip = info->v4_mask;
211 } 208 }
212 209
213 spin_lock_bh(&info->data->lock); 210 spin_lock_bh(&info->data->lock);
214 connections = count_them(info->data, tuple_ptr, &addr, &mask, match); 211 connections = count_them(info->data, tuple_ptr, &addr,
212 &info->mask, match);
215 spin_unlock_bh(&info->data->lock); 213 spin_unlock_bh(&info->data->lock);
216 214
217 if (connections < 0) { 215 if (connections < 0) {
@@ -227,9 +225,10 @@ static bool connlimit_match(const struct sk_buff *skb,
227 return false; 225 return false;
228} 226}
229 227
230static bool connlimit_check(const char *tablename, const void *ip, 228static bool
231 const struct xt_match *match, void *matchinfo, 229connlimit_mt_check(const char *tablename, const void *ip,
232 unsigned int hook_mask) 230 const struct xt_match *match, void *matchinfo,
231 unsigned int hook_mask)
233{ 232{
234 struct xt_connlimit_info *info = matchinfo; 233 struct xt_connlimit_info *info = matchinfo;
235 unsigned int i; 234 unsigned int i;
@@ -254,7 +253,8 @@ static bool connlimit_check(const char *tablename, const void *ip,
254 return true; 253 return true;
255} 254}
256 255
257static void connlimit_destroy(const struct xt_match *match, void *matchinfo) 256static void
257connlimit_mt_destroy(const struct xt_match *match, void *matchinfo)
258{ 258{
259 struct xt_connlimit_info *info = matchinfo; 259 struct xt_connlimit_info *info = matchinfo;
260 struct xt_connlimit_conn *conn; 260 struct xt_connlimit_conn *conn;
@@ -274,41 +274,42 @@ static void connlimit_destroy(const struct xt_match *match, void *matchinfo)
274 kfree(info->data); 274 kfree(info->data);
275} 275}
276 276
277static struct xt_match connlimit_reg[] __read_mostly = { 277static struct xt_match connlimit_mt_reg[] __read_mostly = {
278 { 278 {
279 .name = "connlimit", 279 .name = "connlimit",
280 .family = AF_INET, 280 .family = AF_INET,
281 .checkentry = connlimit_check, 281 .checkentry = connlimit_mt_check,
282 .match = connlimit_match, 282 .match = connlimit_mt,
283 .matchsize = sizeof(struct xt_connlimit_info), 283 .matchsize = sizeof(struct xt_connlimit_info),
284 .destroy = connlimit_destroy, 284 .destroy = connlimit_mt_destroy,
285 .me = THIS_MODULE, 285 .me = THIS_MODULE,
286 }, 286 },
287 { 287 {
288 .name = "connlimit", 288 .name = "connlimit",
289 .family = AF_INET6, 289 .family = AF_INET6,
290 .checkentry = connlimit_check, 290 .checkentry = connlimit_mt_check,
291 .match = connlimit_match, 291 .match = connlimit_mt,
292 .matchsize = sizeof(struct xt_connlimit_info), 292 .matchsize = sizeof(struct xt_connlimit_info),
293 .destroy = connlimit_destroy, 293 .destroy = connlimit_mt_destroy,
294 .me = THIS_MODULE, 294 .me = THIS_MODULE,
295 }, 295 },
296}; 296};
297 297
298static int __init xt_connlimit_init(void) 298static int __init connlimit_mt_init(void)
299{ 299{
300 return xt_register_matches(connlimit_reg, ARRAY_SIZE(connlimit_reg)); 300 return xt_register_matches(connlimit_mt_reg,
301 ARRAY_SIZE(connlimit_mt_reg));
301} 302}
302 303
303static void __exit xt_connlimit_exit(void) 304static void __exit connlimit_mt_exit(void)
304{ 305{
305 xt_unregister_matches(connlimit_reg, ARRAY_SIZE(connlimit_reg)); 306 xt_unregister_matches(connlimit_mt_reg, ARRAY_SIZE(connlimit_mt_reg));
306} 307}
307 308
308module_init(xt_connlimit_init); 309module_init(connlimit_mt_init);
309module_exit(xt_connlimit_exit); 310module_exit(connlimit_mt_exit);
310MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>"); 311MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
311MODULE_DESCRIPTION("netfilter xt_connlimit match module"); 312MODULE_DESCRIPTION("Xtables: Number of connections matching");
312MODULE_LICENSE("GPL"); 313MODULE_LICENSE("GPL");
313MODULE_ALIAS("ipt_connlimit"); 314MODULE_ALIAS("ipt_connlimit");
314MODULE_ALIAS("ip6t_connlimit"); 315MODULE_ALIAS("ip6t_connlimit");
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 9f67920af41f..aaa1b96691f9 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -1,8 +1,10 @@
1/* This kernel module matches connection mark values set by the 1/*
2 * CONNMARK target 2 * xt_connmark - Netfilter module to match connection mark values
3 * 3 *
4 * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com> 4 * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
5 * by Henrik Nordstrom <hno@marasystems.com> 5 * by Henrik Nordstrom <hno@marasystems.com>
6 * Copyright © CC Computer Consultants GmbH, 2007 - 2008
7 * Jan Engelhardt <jengelh@computergmbh.de>
6 * 8 *
7 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by 10 * it under the terms of the GNU General Public License as published by
@@ -26,20 +28,33 @@
26#include <linux/netfilter/xt_connmark.h> 28#include <linux/netfilter/xt_connmark.h>
27 29
28MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>"); 30MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>");
29MODULE_DESCRIPTION("IP tables connmark match module"); 31MODULE_DESCRIPTION("Xtables: connection mark match");
30MODULE_LICENSE("GPL"); 32MODULE_LICENSE("GPL");
31MODULE_ALIAS("ipt_connmark"); 33MODULE_ALIAS("ipt_connmark");
32MODULE_ALIAS("ip6t_connmark"); 34MODULE_ALIAS("ip6t_connmark");
33 35
34static bool 36static bool
35match(const struct sk_buff *skb, 37connmark_mt(const struct sk_buff *skb, const struct net_device *in,
36 const struct net_device *in, 38 const struct net_device *out, const struct xt_match *match,
37 const struct net_device *out, 39 const void *matchinfo, int offset, unsigned int protoff,
38 const struct xt_match *match, 40 bool *hotdrop)
39 const void *matchinfo, 41{
40 int offset, 42 const struct xt_connmark_mtinfo1 *info = matchinfo;
41 unsigned int protoff, 43 enum ip_conntrack_info ctinfo;
42 bool *hotdrop) 44 const struct nf_conn *ct;
45
46 ct = nf_ct_get(skb, &ctinfo);
47 if (ct == NULL)
48 return false;
49
50 return ((ct->mark & info->mask) == info->mark) ^ info->invert;
51}
52
53static bool
54connmark_mt_v0(const struct sk_buff *skb, const struct net_device *in,
55 const struct net_device *out, const struct xt_match *match,
56 const void *matchinfo, int offset, unsigned int protoff,
57 bool *hotdrop)
43{ 58{
44 const struct xt_connmark_info *info = matchinfo; 59 const struct xt_connmark_info *info = matchinfo;
45 const struct nf_conn *ct; 60 const struct nf_conn *ct;
@@ -53,11 +68,9 @@ match(const struct sk_buff *skb,
53} 68}
54 69
55static bool 70static bool
56checkentry(const char *tablename, 71connmark_mt_check_v0(const char *tablename, const void *ip,
57 const void *ip, 72 const struct xt_match *match, void *matchinfo,
58 const struct xt_match *match, 73 unsigned int hook_mask)
59 void *matchinfo,
60 unsigned int hook_mask)
61{ 74{
62 const struct xt_connmark_info *cm = matchinfo; 75 const struct xt_connmark_info *cm = matchinfo;
63 76
@@ -67,14 +80,27 @@ checkentry(const char *tablename,
67 } 80 }
68 if (nf_ct_l3proto_try_module_get(match->family) < 0) { 81 if (nf_ct_l3proto_try_module_get(match->family) < 0) {
69 printk(KERN_WARNING "can't load conntrack support for " 82 printk(KERN_WARNING "can't load conntrack support for "
70 "proto=%d\n", match->family); 83 "proto=%u\n", match->family);
84 return false;
85 }
86 return true;
87}
88
89static bool
90connmark_mt_check(const char *tablename, const void *ip,
91 const struct xt_match *match, void *matchinfo,
92 unsigned int hook_mask)
93{
94 if (nf_ct_l3proto_try_module_get(match->family) < 0) {
95 printk(KERN_WARNING "cannot load conntrack support for "
96 "proto=%u\n", match->family);
71 return false; 97 return false;
72 } 98 }
73 return true; 99 return true;
74} 100}
75 101
76static void 102static void
77destroy(const struct xt_match *match, void *matchinfo) 103connmark_mt_destroy(const struct xt_match *match, void *matchinfo)
78{ 104{
79 nf_ct_l3proto_module_put(match->family); 105 nf_ct_l3proto_module_put(match->family);
80} 106}
@@ -87,7 +113,7 @@ struct compat_xt_connmark_info {
87 u_int16_t __pad2; 113 u_int16_t __pad2;
88}; 114};
89 115
90static void compat_from_user(void *dst, void *src) 116static void connmark_mt_compat_from_user_v0(void *dst, void *src)
91{ 117{
92 const struct compat_xt_connmark_info *cm = src; 118 const struct compat_xt_connmark_info *cm = src;
93 struct xt_connmark_info m = { 119 struct xt_connmark_info m = {
@@ -98,7 +124,7 @@ static void compat_from_user(void *dst, void *src)
98 memcpy(dst, &m, sizeof(m)); 124 memcpy(dst, &m, sizeof(m));
99} 125}
100 126
101static int compat_to_user(void __user *dst, void *src) 127static int connmark_mt_compat_to_user_v0(void __user *dst, void *src)
102{ 128{
103 const struct xt_connmark_info *m = src; 129 const struct xt_connmark_info *m = src;
104 struct compat_xt_connmark_info cm = { 130 struct compat_xt_connmark_info cm = {
@@ -110,42 +136,69 @@ static int compat_to_user(void __user *dst, void *src)
110} 136}
111#endif /* CONFIG_COMPAT */ 137#endif /* CONFIG_COMPAT */
112 138
113static struct xt_match xt_connmark_match[] __read_mostly = { 139static struct xt_match connmark_mt_reg[] __read_mostly = {
114 { 140 {
115 .name = "connmark", 141 .name = "connmark",
142 .revision = 0,
116 .family = AF_INET, 143 .family = AF_INET,
117 .checkentry = checkentry, 144 .checkentry = connmark_mt_check_v0,
118 .match = match, 145 .match = connmark_mt_v0,
119 .destroy = destroy, 146 .destroy = connmark_mt_destroy,
120 .matchsize = sizeof(struct xt_connmark_info), 147 .matchsize = sizeof(struct xt_connmark_info),
121#ifdef CONFIG_COMPAT 148#ifdef CONFIG_COMPAT
122 .compatsize = sizeof(struct compat_xt_connmark_info), 149 .compatsize = sizeof(struct compat_xt_connmark_info),
123 .compat_from_user = compat_from_user, 150 .compat_from_user = connmark_mt_compat_from_user_v0,
124 .compat_to_user = compat_to_user, 151 .compat_to_user = connmark_mt_compat_to_user_v0,
125#endif 152#endif
126 .me = THIS_MODULE 153 .me = THIS_MODULE
127 }, 154 },
128 { 155 {
129 .name = "connmark", 156 .name = "connmark",
157 .revision = 0,
130 .family = AF_INET6, 158 .family = AF_INET6,
131 .checkentry = checkentry, 159 .checkentry = connmark_mt_check_v0,
132 .match = match, 160 .match = connmark_mt_v0,
133 .destroy = destroy, 161 .destroy = connmark_mt_destroy,
134 .matchsize = sizeof(struct xt_connmark_info), 162 .matchsize = sizeof(struct xt_connmark_info),
163#ifdef CONFIG_COMPAT
164 .compatsize = sizeof(struct compat_xt_connmark_info),
165 .compat_from_user = connmark_mt_compat_from_user_v0,
166 .compat_to_user = connmark_mt_compat_to_user_v0,
167#endif
135 .me = THIS_MODULE 168 .me = THIS_MODULE
136 }, 169 },
170 {
171 .name = "connmark",
172 .revision = 1,
173 .family = AF_INET,
174 .checkentry = connmark_mt_check,
175 .match = connmark_mt,
176 .matchsize = sizeof(struct xt_connmark_mtinfo1),
177 .destroy = connmark_mt_destroy,
178 .me = THIS_MODULE,
179 },
180 {
181 .name = "connmark",
182 .revision = 1,
183 .family = AF_INET6,
184 .checkentry = connmark_mt_check,
185 .match = connmark_mt,
186 .matchsize = sizeof(struct xt_connmark_mtinfo1),
187 .destroy = connmark_mt_destroy,
188 .me = THIS_MODULE,
189 },
137}; 190};
138 191
139static int __init xt_connmark_init(void) 192static int __init connmark_mt_init(void)
140{ 193{
141 return xt_register_matches(xt_connmark_match, 194 return xt_register_matches(connmark_mt_reg,
142 ARRAY_SIZE(xt_connmark_match)); 195 ARRAY_SIZE(connmark_mt_reg));
143} 196}
144 197
145static void __exit xt_connmark_fini(void) 198static void __exit connmark_mt_exit(void)
146{ 199{
147 xt_unregister_matches(xt_connmark_match, ARRAY_SIZE(xt_connmark_match)); 200 xt_unregister_matches(connmark_mt_reg, ARRAY_SIZE(connmark_mt_reg));
148} 201}
149 202
150module_init(xt_connmark_init); 203module_init(connmark_mt_init);
151module_exit(xt_connmark_fini); 204module_exit(connmark_mt_exit);
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index ca4b69f020a8..e92190eafcc5 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -1,33 +1,34 @@
1/* Kernel module to match connection tracking information. 1/*
2 * Superset of Rusty's minimalistic state match. 2 * xt_conntrack - Netfilter module to match connection tracking
3 * information. (Superset of Rusty's minimalistic state match.)
3 * 4 *
4 * (C) 2001 Marc Boucher (marc@mbsi.ca). 5 * (C) 2001 Marc Boucher (marc@mbsi.ca).
6 * Copyright © CC Computer Consultants GmbH, 2007 - 2008
7 * Jan Engelhardt <jengelh@computergmbh.de>
5 * 8 *
6 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 10 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation. 11 * published by the Free Software Foundation.
9 */ 12 */
10 13
11#include <linux/module.h> 14#include <linux/module.h>
12#include <linux/skbuff.h> 15#include <linux/skbuff.h>
16#include <net/ipv6.h>
13#include <linux/netfilter/x_tables.h> 17#include <linux/netfilter/x_tables.h>
14#include <linux/netfilter/xt_conntrack.h> 18#include <linux/netfilter/xt_conntrack.h>
15#include <net/netfilter/nf_conntrack.h> 19#include <net/netfilter/nf_conntrack.h>
16 20
17MODULE_LICENSE("GPL"); 21MODULE_LICENSE("GPL");
18MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); 22MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
19MODULE_DESCRIPTION("iptables connection tracking match module"); 23MODULE_DESCRIPTION("Xtables: connection tracking state match");
20MODULE_ALIAS("ipt_conntrack"); 24MODULE_ALIAS("ipt_conntrack");
25MODULE_ALIAS("ip6t_conntrack");
21 26
22static bool 27static bool
23match(const struct sk_buff *skb, 28conntrack_mt_v0(const struct sk_buff *skb, const struct net_device *in,
24 const struct net_device *in, 29 const struct net_device *out, const struct xt_match *match,
25 const struct net_device *out, 30 const void *matchinfo, int offset, unsigned int protoff,
26 const struct xt_match *match, 31 bool *hotdrop)
27 const void *matchinfo,
28 int offset,
29 unsigned int protoff,
30 bool *hotdrop)
31{ 32{
32 const struct xt_conntrack_info *sinfo = matchinfo; 33 const struct xt_conntrack_info *sinfo = matchinfo;
33 const struct nf_conn *ct; 34 const struct nf_conn *ct;
@@ -36,7 +37,7 @@ match(const struct sk_buff *skb,
36 37
37 ct = nf_ct_get(skb, &ctinfo); 38 ct = nf_ct_get(skb, &ctinfo);
38 39
39#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg)) 40#define FWINV(bool, invflg) ((bool) ^ !!(sinfo->invflags & (invflg)))
40 41
41 if (ct == &nf_conntrack_untracked) 42 if (ct == &nf_conntrack_untracked)
42 statebit = XT_CONNTRACK_STATE_UNTRACKED; 43 statebit = XT_CONNTRACK_STATE_UNTRACKED;
@@ -112,24 +113,152 @@ match(const struct sk_buff *skb,
112 return false; 113 return false;
113 } 114 }
114 return true; 115 return true;
116#undef FWINV
115} 117}
116 118
117static bool 119static bool
118checkentry(const char *tablename, 120conntrack_addrcmp(const union nf_inet_addr *kaddr,
119 const void *ip, 121 const union nf_inet_addr *uaddr,
120 const struct xt_match *match, 122 const union nf_inet_addr *umask, unsigned int l3proto)
121 void *matchinfo, 123{
122 unsigned int hook_mask) 124 if (l3proto == AF_INET)
125 return (kaddr->ip & umask->ip) == uaddr->ip;
126 else if (l3proto == AF_INET6)
127 return ipv6_masked_addr_cmp(&kaddr->in6, &umask->in6,
128 &uaddr->in6) == 0;
129 else
130 return false;
131}
132
133static inline bool
134conntrack_mt_origsrc(const struct nf_conn *ct,
135 const struct xt_conntrack_mtinfo1 *info,
136 unsigned int family)
137{
138 return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3,
139 &info->origsrc_addr, &info->origsrc_mask, family);
140}
141
142static inline bool
143conntrack_mt_origdst(const struct nf_conn *ct,
144 const struct xt_conntrack_mtinfo1 *info,
145 unsigned int family)
146{
147 return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3,
148 &info->origdst_addr, &info->origdst_mask, family);
149}
150
151static inline bool
152conntrack_mt_replsrc(const struct nf_conn *ct,
153 const struct xt_conntrack_mtinfo1 *info,
154 unsigned int family)
155{
156 return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3,
157 &info->replsrc_addr, &info->replsrc_mask, family);
158}
159
160static inline bool
161conntrack_mt_repldst(const struct nf_conn *ct,
162 const struct xt_conntrack_mtinfo1 *info,
163 unsigned int family)
164{
165 return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3,
166 &info->repldst_addr, &info->repldst_mask, family);
167}
168
169static bool
170conntrack_mt(const struct sk_buff *skb, const struct net_device *in,
171 const struct net_device *out, const struct xt_match *match,
172 const void *matchinfo, int offset, unsigned int protoff,
173 bool *hotdrop)
174{
175 const struct xt_conntrack_mtinfo1 *info = matchinfo;
176 enum ip_conntrack_info ctinfo;
177 const struct nf_conn *ct;
178 unsigned int statebit;
179
180 ct = nf_ct_get(skb, &ctinfo);
181
182 if (ct == &nf_conntrack_untracked)
183 statebit = XT_CONNTRACK_STATE_UNTRACKED;
184 else if (ct != NULL)
185 statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
186 else
187 statebit = XT_CONNTRACK_STATE_INVALID;
188
189 if (info->match_flags & XT_CONNTRACK_STATE) {
190 if (ct != NULL) {
191 if (test_bit(IPS_SRC_NAT_BIT, &ct->status))
192 statebit |= XT_CONNTRACK_STATE_SNAT;
193 if (test_bit(IPS_DST_NAT_BIT, &ct->status))
194 statebit |= XT_CONNTRACK_STATE_DNAT;
195 }
196 if ((info->state_mask & statebit) ^
197 !(info->invert_flags & XT_CONNTRACK_STATE))
198 return false;
199 }
200
201 if (ct == NULL)
202 return info->match_flags & XT_CONNTRACK_STATE;
203
204 if ((info->match_flags & XT_CONNTRACK_PROTO) &&
205 ((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum ==
206 info->l4proto) ^ !(info->invert_flags & XT_CONNTRACK_PROTO)))
207 return false;
208
209 if (info->match_flags & XT_CONNTRACK_ORIGSRC)
210 if (conntrack_mt_origsrc(ct, info, match->family) ^
211 !(info->invert_flags & XT_CONNTRACK_ORIGSRC))
212 return false;
213
214 if (info->match_flags & XT_CONNTRACK_ORIGDST)
215 if (conntrack_mt_origdst(ct, info, match->family) ^
216 !(info->invert_flags & XT_CONNTRACK_ORIGDST))
217 return false;
218
219 if (info->match_flags & XT_CONNTRACK_REPLSRC)
220 if (conntrack_mt_replsrc(ct, info, match->family) ^
221 !(info->invert_flags & XT_CONNTRACK_REPLSRC))
222 return false;
223
224 if (info->match_flags & XT_CONNTRACK_REPLDST)
225 if (conntrack_mt_repldst(ct, info, match->family) ^
226 !(info->invert_flags & XT_CONNTRACK_REPLDST))
227 return false;
228
229 if ((info->match_flags & XT_CONNTRACK_STATUS) &&
230 (!!(info->status_mask & ct->status) ^
231 !(info->invert_flags & XT_CONNTRACK_STATUS)))
232 return false;
233
234 if (info->match_flags & XT_CONNTRACK_EXPIRES) {
235 unsigned long expires = 0;
236
237 if (timer_pending(&ct->timeout))
238 expires = (ct->timeout.expires - jiffies) / HZ;
239 if ((expires >= info->expires_min &&
240 expires <= info->expires_max) ^
241 !(info->invert_flags & XT_CONNTRACK_EXPIRES))
242 return false;
243 }
244 return true;
245}
246
247static bool
248conntrack_mt_check(const char *tablename, const void *ip,
249 const struct xt_match *match, void *matchinfo,
250 unsigned int hook_mask)
123{ 251{
124 if (nf_ct_l3proto_try_module_get(match->family) < 0) { 252 if (nf_ct_l3proto_try_module_get(match->family) < 0) {
125 printk(KERN_WARNING "can't load conntrack support for " 253 printk(KERN_WARNING "can't load conntrack support for "
126 "proto=%d\n", match->family); 254 "proto=%u\n", match->family);
127 return false; 255 return false;
128 } 256 }
129 return true; 257 return true;
130} 258}
131 259
132static void destroy(const struct xt_match *match, void *matchinfo) 260static void
261conntrack_mt_destroy(const struct xt_match *match, void *matchinfo)
133{ 262{
134 nf_ct_l3proto_module_put(match->family); 263 nf_ct_l3proto_module_put(match->family);
135} 264}
@@ -148,7 +277,7 @@ struct compat_xt_conntrack_info
148 u_int8_t invflags; 277 u_int8_t invflags;
149}; 278};
150 279
151static void compat_from_user(void *dst, void *src) 280static void conntrack_mt_compat_from_user_v0(void *dst, void *src)
152{ 281{
153 const struct compat_xt_conntrack_info *cm = src; 282 const struct compat_xt_conntrack_info *cm = src;
154 struct xt_conntrack_info m = { 283 struct xt_conntrack_info m = {
@@ -165,7 +294,7 @@ static void compat_from_user(void *dst, void *src)
165 memcpy(dst, &m, sizeof(m)); 294 memcpy(dst, &m, sizeof(m));
166} 295}
167 296
168static int compat_to_user(void __user *dst, void *src) 297static int conntrack_mt_compat_to_user_v0(void __user *dst, void *src)
169{ 298{
170 const struct xt_conntrack_info *m = src; 299 const struct xt_conntrack_info *m = src;
171 struct compat_xt_conntrack_info cm = { 300 struct compat_xt_conntrack_info cm = {
@@ -183,30 +312,54 @@ static int compat_to_user(void __user *dst, void *src)
183} 312}
184#endif 313#endif
185 314
186static struct xt_match conntrack_match __read_mostly = { 315static struct xt_match conntrack_mt_reg[] __read_mostly = {
187 .name = "conntrack", 316 {
188 .match = match, 317 .name = "conntrack",
189 .checkentry = checkentry, 318 .revision = 0,
190 .destroy = destroy, 319 .family = AF_INET,
191 .matchsize = sizeof(struct xt_conntrack_info), 320 .match = conntrack_mt_v0,
321 .checkentry = conntrack_mt_check,
322 .destroy = conntrack_mt_destroy,
323 .matchsize = sizeof(struct xt_conntrack_info),
324 .me = THIS_MODULE,
192#ifdef CONFIG_COMPAT 325#ifdef CONFIG_COMPAT
193 .compatsize = sizeof(struct compat_xt_conntrack_info), 326 .compatsize = sizeof(struct compat_xt_conntrack_info),
194 .compat_from_user = compat_from_user, 327 .compat_from_user = conntrack_mt_compat_from_user_v0,
195 .compat_to_user = compat_to_user, 328 .compat_to_user = conntrack_mt_compat_to_user_v0,
196#endif 329#endif
197 .family = AF_INET, 330 },
198 .me = THIS_MODULE, 331 {
332 .name = "conntrack",
333 .revision = 1,
334 .family = AF_INET,
335 .matchsize = sizeof(struct xt_conntrack_mtinfo1),
336 .match = conntrack_mt,
337 .checkentry = conntrack_mt_check,
338 .destroy = conntrack_mt_destroy,
339 .me = THIS_MODULE,
340 },
341 {
342 .name = "conntrack",
343 .revision = 1,
344 .family = AF_INET6,
345 .matchsize = sizeof(struct xt_conntrack_mtinfo1),
346 .match = conntrack_mt,
347 .checkentry = conntrack_mt_check,
348 .destroy = conntrack_mt_destroy,
349 .me = THIS_MODULE,
350 },
199}; 351};
200 352
201static int __init xt_conntrack_init(void) 353static int __init conntrack_mt_init(void)
202{ 354{
203 return xt_register_match(&conntrack_match); 355 return xt_register_matches(conntrack_mt_reg,
356 ARRAY_SIZE(conntrack_mt_reg));
204} 357}
205 358
206static void __exit xt_conntrack_fini(void) 359static void __exit conntrack_mt_exit(void)
207{ 360{
208 xt_unregister_match(&conntrack_match); 361 xt_unregister_matches(conntrack_mt_reg, ARRAY_SIZE(conntrack_mt_reg));
209} 362}
210 363
211module_init(xt_conntrack_init); 364module_init(conntrack_mt_init);
212module_exit(xt_conntrack_fini); 365module_exit(conntrack_mt_exit);
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index c2b1b24ee335..667f45e72cd9 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -22,7 +22,7 @@
22 22
23MODULE_LICENSE("GPL"); 23MODULE_LICENSE("GPL");
24MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 24MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
25MODULE_DESCRIPTION("Match for DCCP protocol packets"); 25MODULE_DESCRIPTION("Xtables: DCCP protocol packet match");
26MODULE_ALIAS("ipt_dccp"); 26MODULE_ALIAS("ipt_dccp");
27MODULE_ALIAS("ip6t_dccp"); 27MODULE_ALIAS("ip6t_dccp");
28 28
@@ -93,14 +93,9 @@ match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff,
93} 93}
94 94
95static bool 95static bool
96match(const struct sk_buff *skb, 96dccp_mt(const struct sk_buff *skb, const struct net_device *in,
97 const struct net_device *in, 97 const struct net_device *out, const struct xt_match *match,
98 const struct net_device *out, 98 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
99 const struct xt_match *match,
100 const void *matchinfo,
101 int offset,
102 unsigned int protoff,
103 bool *hotdrop)
104{ 99{
105 const struct xt_dccp_info *info = matchinfo; 100 const struct xt_dccp_info *info = matchinfo;
106 struct dccp_hdr _dh, *dh; 101 struct dccp_hdr _dh, *dh;
@@ -128,11 +123,9 @@ match(const struct sk_buff *skb,
128} 123}
129 124
130static bool 125static bool
131checkentry(const char *tablename, 126dccp_mt_check(const char *tablename, const void *inf,
132 const void *inf, 127 const struct xt_match *match, void *matchinfo,
133 const struct xt_match *match, 128 unsigned int hook_mask)
134 void *matchinfo,
135 unsigned int hook_mask)
136{ 129{
137 const struct xt_dccp_info *info = matchinfo; 130 const struct xt_dccp_info *info = matchinfo;
138 131
@@ -141,12 +134,12 @@ checkentry(const char *tablename,
141 && !(info->invflags & ~info->flags); 134 && !(info->invflags & ~info->flags);
142} 135}
143 136
144static struct xt_match xt_dccp_match[] __read_mostly = { 137static struct xt_match dccp_mt_reg[] __read_mostly = {
145 { 138 {
146 .name = "dccp", 139 .name = "dccp",
147 .family = AF_INET, 140 .family = AF_INET,
148 .checkentry = checkentry, 141 .checkentry = dccp_mt_check,
149 .match = match, 142 .match = dccp_mt,
150 .matchsize = sizeof(struct xt_dccp_info), 143 .matchsize = sizeof(struct xt_dccp_info),
151 .proto = IPPROTO_DCCP, 144 .proto = IPPROTO_DCCP,
152 .me = THIS_MODULE, 145 .me = THIS_MODULE,
@@ -154,15 +147,15 @@ static struct xt_match xt_dccp_match[] __read_mostly = {
154 { 147 {
155 .name = "dccp", 148 .name = "dccp",
156 .family = AF_INET6, 149 .family = AF_INET6,
157 .checkentry = checkentry, 150 .checkentry = dccp_mt_check,
158 .match = match, 151 .match = dccp_mt,
159 .matchsize = sizeof(struct xt_dccp_info), 152 .matchsize = sizeof(struct xt_dccp_info),
160 .proto = IPPROTO_DCCP, 153 .proto = IPPROTO_DCCP,
161 .me = THIS_MODULE, 154 .me = THIS_MODULE,
162 }, 155 },
163}; 156};
164 157
165static int __init xt_dccp_init(void) 158static int __init dccp_mt_init(void)
166{ 159{
167 int ret; 160 int ret;
168 161
@@ -172,7 +165,7 @@ static int __init xt_dccp_init(void)
172 dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL); 165 dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL);
173 if (!dccp_optbuf) 166 if (!dccp_optbuf)
174 return -ENOMEM; 167 return -ENOMEM;
175 ret = xt_register_matches(xt_dccp_match, ARRAY_SIZE(xt_dccp_match)); 168 ret = xt_register_matches(dccp_mt_reg, ARRAY_SIZE(dccp_mt_reg));
176 if (ret) 169 if (ret)
177 goto out_kfree; 170 goto out_kfree;
178 return ret; 171 return ret;
@@ -182,11 +175,11 @@ out_kfree:
182 return ret; 175 return ret;
183} 176}
184 177
185static void __exit xt_dccp_fini(void) 178static void __exit dccp_mt_exit(void)
186{ 179{
187 xt_unregister_matches(xt_dccp_match, ARRAY_SIZE(xt_dccp_match)); 180 xt_unregister_matches(dccp_mt_reg, ARRAY_SIZE(dccp_mt_reg));
188 kfree(dccp_optbuf); 181 kfree(dccp_optbuf);
189} 182}
190 183
191module_init(xt_dccp_init); 184module_init(dccp_mt_init);
192module_exit(xt_dccp_fini); 185module_exit(dccp_mt_exit);
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index dde6d66e0d33..26f4aab9c429 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -13,23 +13,22 @@
13#include <linux/ipv6.h> 13#include <linux/ipv6.h>
14#include <net/dsfield.h> 14#include <net/dsfield.h>
15 15
16#include <linux/netfilter/xt_dscp.h>
17#include <linux/netfilter/x_tables.h> 16#include <linux/netfilter/x_tables.h>
17#include <linux/netfilter/xt_dscp.h>
18#include <linux/netfilter_ipv4/ipt_tos.h>
18 19
19MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 20MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
20MODULE_DESCRIPTION("x_tables DSCP matching module"); 21MODULE_DESCRIPTION("Xtables: DSCP/TOS field match");
21MODULE_LICENSE("GPL"); 22MODULE_LICENSE("GPL");
22MODULE_ALIAS("ipt_dscp"); 23MODULE_ALIAS("ipt_dscp");
23MODULE_ALIAS("ip6t_dscp"); 24MODULE_ALIAS("ip6t_dscp");
25MODULE_ALIAS("ipt_tos");
26MODULE_ALIAS("ip6t_tos");
24 27
25static bool match(const struct sk_buff *skb, 28static bool
26 const struct net_device *in, 29dscp_mt(const struct sk_buff *skb, const struct net_device *in,
27 const struct net_device *out, 30 const struct net_device *out, const struct xt_match *match,
28 const struct xt_match *match, 31 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
29 const void *matchinfo,
30 int offset,
31 unsigned int protoff,
32 bool *hotdrop)
33{ 32{
34 const struct xt_dscp_info *info = matchinfo; 33 const struct xt_dscp_info *info = matchinfo;
35 u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; 34 u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -37,14 +36,11 @@ static bool match(const struct sk_buff *skb,
37 return (dscp == info->dscp) ^ !!info->invert; 36 return (dscp == info->dscp) ^ !!info->invert;
38} 37}
39 38
40static bool match6(const struct sk_buff *skb, 39static bool
41 const struct net_device *in, 40dscp_mt6(const struct sk_buff *skb, const struct net_device *in,
42 const struct net_device *out, 41 const struct net_device *out, const struct xt_match *match,
43 const struct xt_match *match, 42 const void *matchinfo, int offset, unsigned int protoff,
44 const void *matchinfo, 43 bool *hotdrop)
45 int offset,
46 unsigned int protoff,
47 bool *hotdrop)
48{ 44{
49 const struct xt_dscp_info *info = matchinfo; 45 const struct xt_dscp_info *info = matchinfo;
50 u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; 46 u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -52,11 +48,10 @@ static bool match6(const struct sk_buff *skb,
52 return (dscp == info->dscp) ^ !!info->invert; 48 return (dscp == info->dscp) ^ !!info->invert;
53} 49}
54 50
55static bool checkentry(const char *tablename, 51static bool
56 const void *info, 52dscp_mt_check(const char *tablename, const void *info,
57 const struct xt_match *match, 53 const struct xt_match *match, void *matchinfo,
58 void *matchinfo, 54 unsigned int hook_mask)
59 unsigned int hook_mask)
60{ 55{
61 const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp; 56 const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp;
62 57
@@ -68,34 +63,83 @@ static bool checkentry(const char *tablename,
68 return true; 63 return true;
69} 64}
70 65
71static struct xt_match xt_dscp_match[] __read_mostly = { 66static bool tos_mt_v0(const struct sk_buff *skb, const struct net_device *in,
67 const struct net_device *out,
68 const struct xt_match *match, const void *matchinfo,
69 int offset, unsigned int protoff, bool *hotdrop)
70{
71 const struct ipt_tos_info *info = matchinfo;
72
73 return (ip_hdr(skb)->tos == info->tos) ^ info->invert;
74}
75
76static bool tos_mt(const struct sk_buff *skb, const struct net_device *in,
77 const struct net_device *out, const struct xt_match *match,
78 const void *matchinfo, int offset, unsigned int protoff,
79 bool *hotdrop)
80{
81 const struct xt_tos_match_info *info = matchinfo;
82
83 if (match->family == AF_INET)
84 return ((ip_hdr(skb)->tos & info->tos_mask) ==
85 info->tos_value) ^ !!info->invert;
86 else
87 return ((ipv6_get_dsfield(ipv6_hdr(skb)) & info->tos_mask) ==
88 info->tos_value) ^ !!info->invert;
89}
90
91static struct xt_match dscp_mt_reg[] __read_mostly = {
72 { 92 {
73 .name = "dscp", 93 .name = "dscp",
74 .family = AF_INET, 94 .family = AF_INET,
75 .checkentry = checkentry, 95 .checkentry = dscp_mt_check,
76 .match = match, 96 .match = dscp_mt,
77 .matchsize = sizeof(struct xt_dscp_info), 97 .matchsize = sizeof(struct xt_dscp_info),
78 .me = THIS_MODULE, 98 .me = THIS_MODULE,
79 }, 99 },
80 { 100 {
81 .name = "dscp", 101 .name = "dscp",
82 .family = AF_INET6, 102 .family = AF_INET6,
83 .checkentry = checkentry, 103 .checkentry = dscp_mt_check,
84 .match = match6, 104 .match = dscp_mt6,
85 .matchsize = sizeof(struct xt_dscp_info), 105 .matchsize = sizeof(struct xt_dscp_info),
86 .me = THIS_MODULE, 106 .me = THIS_MODULE,
87 }, 107 },
108 {
109 .name = "tos",
110 .revision = 0,
111 .family = AF_INET,
112 .match = tos_mt_v0,
113 .matchsize = sizeof(struct ipt_tos_info),
114 .me = THIS_MODULE,
115 },
116 {
117 .name = "tos",
118 .revision = 1,
119 .family = AF_INET,
120 .match = tos_mt,
121 .matchsize = sizeof(struct xt_tos_match_info),
122 .me = THIS_MODULE,
123 },
124 {
125 .name = "tos",
126 .revision = 1,
127 .family = AF_INET6,
128 .match = tos_mt,
129 .matchsize = sizeof(struct xt_tos_match_info),
130 .me = THIS_MODULE,
131 },
88}; 132};
89 133
90static int __init xt_dscp_match_init(void) 134static int __init dscp_mt_init(void)
91{ 135{
92 return xt_register_matches(xt_dscp_match, ARRAY_SIZE(xt_dscp_match)); 136 return xt_register_matches(dscp_mt_reg, ARRAY_SIZE(dscp_mt_reg));
93} 137}
94 138
95static void __exit xt_dscp_match_fini(void) 139static void __exit dscp_mt_exit(void)
96{ 140{
97 xt_unregister_matches(xt_dscp_match, ARRAY_SIZE(xt_dscp_match)); 141 xt_unregister_matches(dscp_mt_reg, ARRAY_SIZE(dscp_mt_reg));
98} 142}
99 143
100module_init(xt_dscp_match_init); 144module_init(dscp_mt_init);
101module_exit(xt_dscp_match_fini); 145module_exit(dscp_mt_exit);
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index b11378e001b6..71c7c3785266 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -20,7 +20,7 @@
20 20
21MODULE_LICENSE("GPL"); 21MODULE_LICENSE("GPL");
22MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>"); 22MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>");
23MODULE_DESCRIPTION("x_tables ESP SPI match module"); 23MODULE_DESCRIPTION("Xtables: IPsec-ESP packet match");
24MODULE_ALIAS("ipt_esp"); 24MODULE_ALIAS("ipt_esp");
25MODULE_ALIAS("ip6t_esp"); 25MODULE_ALIAS("ip6t_esp");
26 26
@@ -43,14 +43,9 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
43} 43}
44 44
45static bool 45static bool
46match(const struct sk_buff *skb, 46esp_mt(const struct sk_buff *skb, const struct net_device *in,
47 const struct net_device *in, 47 const struct net_device *out, const struct xt_match *match,
48 const struct net_device *out, 48 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
49 const struct xt_match *match,
50 const void *matchinfo,
51 int offset,
52 unsigned int protoff,
53 bool *hotdrop)
54{ 49{
55 struct ip_esp_hdr _esp, *eh; 50 struct ip_esp_hdr _esp, *eh;
56 const struct xt_esp *espinfo = matchinfo; 51 const struct xt_esp *espinfo = matchinfo;
@@ -75,11 +70,9 @@ match(const struct sk_buff *skb,
75 70
76/* Called when user tries to insert an entry of this type. */ 71/* Called when user tries to insert an entry of this type. */
77static bool 72static bool
78checkentry(const char *tablename, 73esp_mt_check(const char *tablename, const void *ip_void,
79 const void *ip_void, 74 const struct xt_match *match, void *matchinfo,
80 const struct xt_match *match, 75 unsigned int hook_mask)
81 void *matchinfo,
82 unsigned int hook_mask)
83{ 76{
84 const struct xt_esp *espinfo = matchinfo; 77 const struct xt_esp *espinfo = matchinfo;
85 78
@@ -91,12 +84,12 @@ checkentry(const char *tablename,
91 return true; 84 return true;
92} 85}
93 86
94static struct xt_match xt_esp_match[] __read_mostly = { 87static struct xt_match esp_mt_reg[] __read_mostly = {
95 { 88 {
96 .name = "esp", 89 .name = "esp",
97 .family = AF_INET, 90 .family = AF_INET,
98 .checkentry = checkentry, 91 .checkentry = esp_mt_check,
99 .match = match, 92 .match = esp_mt,
100 .matchsize = sizeof(struct xt_esp), 93 .matchsize = sizeof(struct xt_esp),
101 .proto = IPPROTO_ESP, 94 .proto = IPPROTO_ESP,
102 .me = THIS_MODULE, 95 .me = THIS_MODULE,
@@ -104,23 +97,23 @@ static struct xt_match xt_esp_match[] __read_mostly = {
104 { 97 {
105 .name = "esp", 98 .name = "esp",
106 .family = AF_INET6, 99 .family = AF_INET6,
107 .checkentry = checkentry, 100 .checkentry = esp_mt_check,
108 .match = match, 101 .match = esp_mt,
109 .matchsize = sizeof(struct xt_esp), 102 .matchsize = sizeof(struct xt_esp),
110 .proto = IPPROTO_ESP, 103 .proto = IPPROTO_ESP,
111 .me = THIS_MODULE, 104 .me = THIS_MODULE,
112 }, 105 },
113}; 106};
114 107
115static int __init xt_esp_init(void) 108static int __init esp_mt_init(void)
116{ 109{
117 return xt_register_matches(xt_esp_match, ARRAY_SIZE(xt_esp_match)); 110 return xt_register_matches(esp_mt_reg, ARRAY_SIZE(esp_mt_reg));
118} 111}
119 112
120static void __exit xt_esp_cleanup(void) 113static void __exit esp_mt_exit(void)
121{ 114{
122 xt_unregister_matches(xt_esp_match, ARRAY_SIZE(xt_esp_match)); 115 xt_unregister_matches(esp_mt_reg, ARRAY_SIZE(esp_mt_reg));
123} 116}
124 117
125module_init(xt_esp_init); 118module_init(esp_mt_init);
126module_exit(xt_esp_cleanup); 119module_exit(esp_mt_exit);
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 2ef44d8560c1..d479ca980115 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -20,7 +20,11 @@
20#include <linux/mm.h> 20#include <linux/mm.h>
21#include <linux/in.h> 21#include <linux/in.h>
22#include <linux/ip.h> 22#include <linux/ip.h>
23#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
23#include <linux/ipv6.h> 24#include <linux/ipv6.h>
25#include <net/ipv6.h>
26#endif
27
24#include <net/net_namespace.h> 28#include <net/net_namespace.h>
25 29
26#include <linux/netfilter/x_tables.h> 30#include <linux/netfilter/x_tables.h>
@@ -31,7 +35,7 @@
31 35
32MODULE_LICENSE("GPL"); 36MODULE_LICENSE("GPL");
33MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 37MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
34MODULE_DESCRIPTION("iptables match for limiting per hash-bucket"); 38MODULE_DESCRIPTION("Xtables: per hash-bucket rate-limit match");
35MODULE_ALIAS("ipt_hashlimit"); 39MODULE_ALIAS("ipt_hashlimit");
36MODULE_ALIAS("ip6t_hashlimit"); 40MODULE_ALIAS("ip6t_hashlimit");
37 41
@@ -47,10 +51,12 @@ struct dsthash_dst {
47 __be32 src; 51 __be32 src;
48 __be32 dst; 52 __be32 dst;
49 } ip; 53 } ip;
54#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
50 struct { 55 struct {
51 __be32 src[4]; 56 __be32 src[4];
52 __be32 dst[4]; 57 __be32 dst[4];
53 } ip6; 58 } ip6;
59#endif
54 } addr; 60 } addr;
55 __be16 src_port; 61 __be16 src_port;
56 __be16 dst_port; 62 __be16 dst_port;
@@ -104,7 +110,16 @@ static inline bool dst_cmp(const struct dsthash_ent *ent,
104static u_int32_t 110static u_int32_t
105hash_dst(const struct xt_hashlimit_htable *ht, const struct dsthash_dst *dst) 111hash_dst(const struct xt_hashlimit_htable *ht, const struct dsthash_dst *dst)
106{ 112{
107 return jhash(dst, sizeof(*dst), ht->rnd) % ht->cfg.size; 113 u_int32_t hash = jhash2((const u32 *)dst,
114 sizeof(*dst)/sizeof(u32),
115 ht->rnd);
116 /*
117 * Instead of returning hash % ht->cfg.size (implying a divide)
118 * we return the high 32 bits of the (hash * ht->cfg.size) that will
119 * give results between [0 and cfg.size-1] and same hash distribution,
120 * but using a multiply, less expensive than a divide
121 */
122 return ((u64)hash * ht->cfg.size) >> 32;
108} 123}
109 124
110static struct dsthash_ent * 125static struct dsthash_ent *
@@ -379,7 +394,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
379 const struct sk_buff *skb, unsigned int protoff) 394 const struct sk_buff *skb, unsigned int protoff)
380{ 395{
381 __be16 _ports[2], *ports; 396 __be16 _ports[2], *ports;
382 int nexthdr; 397 u8 nexthdr;
383 398
384 memset(dst, 0, sizeof(*dst)); 399 memset(dst, 0, sizeof(*dst));
385 400
@@ -407,8 +422,9 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
407 if (!(hinfo->cfg.mode & 422 if (!(hinfo->cfg.mode &
408 (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT))) 423 (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT)))
409 return 0; 424 return 0;
410 nexthdr = ipv6_find_hdr(skb, &protoff, -1, NULL); 425 nexthdr = ipv6_hdr(skb)->nexthdr;
411 if (nexthdr < 0) 426 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
427 if ((int)protoff < 0)
412 return -1; 428 return -1;
413 break; 429 break;
414#endif 430#endif
@@ -441,14 +457,10 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
441} 457}
442 458
443static bool 459static bool
444hashlimit_match(const struct sk_buff *skb, 460hashlimit_mt(const struct sk_buff *skb, const struct net_device *in,
445 const struct net_device *in, 461 const struct net_device *out, const struct xt_match *match,
446 const struct net_device *out, 462 const void *matchinfo, int offset, unsigned int protoff,
447 const struct xt_match *match, 463 bool *hotdrop)
448 const void *matchinfo,
449 int offset,
450 unsigned int protoff,
451 bool *hotdrop)
452{ 464{
453 const struct xt_hashlimit_info *r = 465 const struct xt_hashlimit_info *r =
454 ((const struct xt_hashlimit_info *)matchinfo)->u.master; 466 ((const struct xt_hashlimit_info *)matchinfo)->u.master;
@@ -500,11 +512,9 @@ hotdrop:
500} 512}
501 513
502static bool 514static bool
503hashlimit_checkentry(const char *tablename, 515hashlimit_mt_check(const char *tablename, const void *inf,
504 const void *inf, 516 const struct xt_match *match, void *matchinfo,
505 const struct xt_match *match, 517 unsigned int hook_mask)
506 void *matchinfo,
507 unsigned int hook_mask)
508{ 518{
509 struct xt_hashlimit_info *r = matchinfo; 519 struct xt_hashlimit_info *r = matchinfo;
510 520
@@ -548,7 +558,7 @@ hashlimit_checkentry(const char *tablename,
548} 558}
549 559
550static void 560static void
551hashlimit_destroy(const struct xt_match *match, void *matchinfo) 561hashlimit_mt_destroy(const struct xt_match *match, void *matchinfo)
552{ 562{
553 const struct xt_hashlimit_info *r = matchinfo; 563 const struct xt_hashlimit_info *r = matchinfo;
554 564
@@ -563,7 +573,7 @@ struct compat_xt_hashlimit_info {
563 compat_uptr_t master; 573 compat_uptr_t master;
564}; 574};
565 575
566static void compat_from_user(void *dst, void *src) 576static void hashlimit_mt_compat_from_user(void *dst, void *src)
567{ 577{
568 int off = offsetof(struct compat_xt_hashlimit_info, hinfo); 578 int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
569 579
@@ -571,7 +581,7 @@ static void compat_from_user(void *dst, void *src)
571 memset(dst + off, 0, sizeof(struct compat_xt_hashlimit_info) - off); 581 memset(dst + off, 0, sizeof(struct compat_xt_hashlimit_info) - off);
572} 582}
573 583
574static int compat_to_user(void __user *dst, void *src) 584static int hashlimit_mt_compat_to_user(void __user *dst, void *src)
575{ 585{
576 int off = offsetof(struct compat_xt_hashlimit_info, hinfo); 586 int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
577 587
@@ -579,35 +589,37 @@ static int compat_to_user(void __user *dst, void *src)
579} 589}
580#endif 590#endif
581 591
582static struct xt_match xt_hashlimit[] __read_mostly = { 592static struct xt_match hashlimit_mt_reg[] __read_mostly = {
583 { 593 {
584 .name = "hashlimit", 594 .name = "hashlimit",
585 .family = AF_INET, 595 .family = AF_INET,
586 .match = hashlimit_match, 596 .match = hashlimit_mt,
587 .matchsize = sizeof(struct xt_hashlimit_info), 597 .matchsize = sizeof(struct xt_hashlimit_info),
588#ifdef CONFIG_COMPAT 598#ifdef CONFIG_COMPAT
589 .compatsize = sizeof(struct compat_xt_hashlimit_info), 599 .compatsize = sizeof(struct compat_xt_hashlimit_info),
590 .compat_from_user = compat_from_user, 600 .compat_from_user = hashlimit_mt_compat_from_user,
591 .compat_to_user = compat_to_user, 601 .compat_to_user = hashlimit_mt_compat_to_user,
592#endif 602#endif
593 .checkentry = hashlimit_checkentry, 603 .checkentry = hashlimit_mt_check,
594 .destroy = hashlimit_destroy, 604 .destroy = hashlimit_mt_destroy,
595 .me = THIS_MODULE 605 .me = THIS_MODULE
596 }, 606 },
607#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
597 { 608 {
598 .name = "hashlimit", 609 .name = "hashlimit",
599 .family = AF_INET6, 610 .family = AF_INET6,
600 .match = hashlimit_match, 611 .match = hashlimit_mt,
601 .matchsize = sizeof(struct xt_hashlimit_info), 612 .matchsize = sizeof(struct xt_hashlimit_info),
602#ifdef CONFIG_COMPAT 613#ifdef CONFIG_COMPAT
603 .compatsize = sizeof(struct compat_xt_hashlimit_info), 614 .compatsize = sizeof(struct compat_xt_hashlimit_info),
604 .compat_from_user = compat_from_user, 615 .compat_from_user = hashlimit_mt_compat_from_user,
605 .compat_to_user = compat_to_user, 616 .compat_to_user = hashlimit_mt_compat_to_user,
606#endif 617#endif
607 .checkentry = hashlimit_checkentry, 618 .checkentry = hashlimit_mt_check,
608 .destroy = hashlimit_destroy, 619 .destroy = hashlimit_mt_destroy,
609 .me = THIS_MODULE 620 .me = THIS_MODULE
610 }, 621 },
622#endif
611}; 623};
612 624
613/* PROC stuff */ 625/* PROC stuff */
@@ -670,6 +682,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, int family,
670 ntohs(ent->dst.dst_port), 682 ntohs(ent->dst.dst_port),
671 ent->rateinfo.credit, ent->rateinfo.credit_cap, 683 ent->rateinfo.credit, ent->rateinfo.credit_cap,
672 ent->rateinfo.cost); 684 ent->rateinfo.cost);
685#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
673 case AF_INET6: 686 case AF_INET6:
674 return seq_printf(s, "%ld " NIP6_FMT ":%u->" 687 return seq_printf(s, "%ld " NIP6_FMT ":%u->"
675 NIP6_FMT ":%u %u %u %u\n", 688 NIP6_FMT ":%u %u %u %u\n",
@@ -680,6 +693,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, int family,
680 ntohs(ent->dst.dst_port), 693 ntohs(ent->dst.dst_port),
681 ent->rateinfo.credit, ent->rateinfo.credit_cap, 694 ent->rateinfo.credit, ent->rateinfo.credit_cap,
682 ent->rateinfo.cost); 695 ent->rateinfo.cost);
696#endif
683 default: 697 default:
684 BUG(); 698 BUG();
685 return 0; 699 return 0;
@@ -728,11 +742,12 @@ static const struct file_operations dl_file_ops = {
728 .release = seq_release 742 .release = seq_release
729}; 743};
730 744
731static int __init xt_hashlimit_init(void) 745static int __init hashlimit_mt_init(void)
732{ 746{
733 int err; 747 int err;
734 748
735 err = xt_register_matches(xt_hashlimit, ARRAY_SIZE(xt_hashlimit)); 749 err = xt_register_matches(hashlimit_mt_reg,
750 ARRAY_SIZE(hashlimit_mt_reg));
736 if (err < 0) 751 if (err < 0)
737 goto err1; 752 goto err1;
738 753
@@ -750,31 +765,36 @@ static int __init xt_hashlimit_init(void)
750 "entry\n"); 765 "entry\n");
751 goto err3; 766 goto err3;
752 } 767 }
768 err = 0;
769#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
753 hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", init_net.proc_net); 770 hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", init_net.proc_net);
754 if (!hashlimit_procdir6) { 771 if (!hashlimit_procdir6) {
755 printk(KERN_ERR "xt_hashlimit: unable to create proc dir " 772 printk(KERN_ERR "xt_hashlimit: unable to create proc dir "
756 "entry\n"); 773 "entry\n");
757 goto err4; 774 err = -ENOMEM;
758 } 775 }
759 return 0; 776#endif
760err4: 777 if (!err)
778 return 0;
761 remove_proc_entry("ipt_hashlimit", init_net.proc_net); 779 remove_proc_entry("ipt_hashlimit", init_net.proc_net);
762err3: 780err3:
763 kmem_cache_destroy(hashlimit_cachep); 781 kmem_cache_destroy(hashlimit_cachep);
764err2: 782err2:
765 xt_unregister_matches(xt_hashlimit, ARRAY_SIZE(xt_hashlimit)); 783 xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
766err1: 784err1:
767 return err; 785 return err;
768 786
769} 787}
770 788
771static void __exit xt_hashlimit_fini(void) 789static void __exit hashlimit_mt_exit(void)
772{ 790{
773 remove_proc_entry("ipt_hashlimit", init_net.proc_net); 791 remove_proc_entry("ipt_hashlimit", init_net.proc_net);
792#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
774 remove_proc_entry("ip6t_hashlimit", init_net.proc_net); 793 remove_proc_entry("ip6t_hashlimit", init_net.proc_net);
794#endif
775 kmem_cache_destroy(hashlimit_cachep); 795 kmem_cache_destroy(hashlimit_cachep);
776 xt_unregister_matches(xt_hashlimit, ARRAY_SIZE(xt_hashlimit)); 796 xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
777} 797}
778 798
779module_init(xt_hashlimit_init); 799module_init(hashlimit_mt_init);
780module_exit(xt_hashlimit_fini); 800module_exit(hashlimit_mt_exit);
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index d842c4a6d63f..dada2905d66e 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -18,20 +18,16 @@
18 18
19MODULE_LICENSE("GPL"); 19MODULE_LICENSE("GPL");
20MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>"); 20MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
21MODULE_DESCRIPTION("iptables helper match module"); 21MODULE_DESCRIPTION("Xtables: Related connection matching");
22MODULE_ALIAS("ipt_helper"); 22MODULE_ALIAS("ipt_helper");
23MODULE_ALIAS("ip6t_helper"); 23MODULE_ALIAS("ip6t_helper");
24 24
25 25
26static bool 26static bool
27match(const struct sk_buff *skb, 27helper_mt(const struct sk_buff *skb, const struct net_device *in,
28 const struct net_device *in, 28 const struct net_device *out, const struct xt_match *match,
29 const struct net_device *out, 29 const void *matchinfo, int offset, unsigned int protoff,
30 const struct xt_match *match, 30 bool *hotdrop)
31 const void *matchinfo,
32 int offset,
33 unsigned int protoff,
34 bool *hotdrop)
35{ 31{
36 const struct xt_helper_info *info = matchinfo; 32 const struct xt_helper_info *info = matchinfo;
37 const struct nf_conn *ct; 33 const struct nf_conn *ct;
@@ -61,61 +57,57 @@ match(const struct sk_buff *skb,
61 return ret; 57 return ret;
62} 58}
63 59
64static bool check(const char *tablename, 60static bool
65 const void *inf, 61helper_mt_check(const char *tablename, const void *inf,
66 const struct xt_match *match, 62 const struct xt_match *match, void *matchinfo,
67 void *matchinfo, 63 unsigned int hook_mask)
68 unsigned int hook_mask)
69{ 64{
70 struct xt_helper_info *info = matchinfo; 65 struct xt_helper_info *info = matchinfo;
71 66
72 if (nf_ct_l3proto_try_module_get(match->family) < 0) { 67 if (nf_ct_l3proto_try_module_get(match->family) < 0) {
73 printk(KERN_WARNING "can't load conntrack support for " 68 printk(KERN_WARNING "can't load conntrack support for "
74 "proto=%d\n", match->family); 69 "proto=%u\n", match->family);
75 return false; 70 return false;
76 } 71 }
77 info->name[29] = '\0'; 72 info->name[29] = '\0';
78 return true; 73 return true;
79} 74}
80 75
81static void 76static void helper_mt_destroy(const struct xt_match *match, void *matchinfo)
82destroy(const struct xt_match *match, void *matchinfo)
83{ 77{
84 nf_ct_l3proto_module_put(match->family); 78 nf_ct_l3proto_module_put(match->family);
85} 79}
86 80
87static struct xt_match xt_helper_match[] __read_mostly = { 81static struct xt_match helper_mt_reg[] __read_mostly = {
88 { 82 {
89 .name = "helper", 83 .name = "helper",
90 .family = AF_INET, 84 .family = AF_INET,
91 .checkentry = check, 85 .checkentry = helper_mt_check,
92 .match = match, 86 .match = helper_mt,
93 .destroy = destroy, 87 .destroy = helper_mt_destroy,
94 .matchsize = sizeof(struct xt_helper_info), 88 .matchsize = sizeof(struct xt_helper_info),
95 .me = THIS_MODULE, 89 .me = THIS_MODULE,
96 }, 90 },
97 { 91 {
98 .name = "helper", 92 .name = "helper",
99 .family = AF_INET6, 93 .family = AF_INET6,
100 .checkentry = check, 94 .checkentry = helper_mt_check,
101 .match = match, 95 .match = helper_mt,
102 .destroy = destroy, 96 .destroy = helper_mt_destroy,
103 .matchsize = sizeof(struct xt_helper_info), 97 .matchsize = sizeof(struct xt_helper_info),
104 .me = THIS_MODULE, 98 .me = THIS_MODULE,
105 }, 99 },
106}; 100};
107 101
108static int __init xt_helper_init(void) 102static int __init helper_mt_init(void)
109{ 103{
110 return xt_register_matches(xt_helper_match, 104 return xt_register_matches(helper_mt_reg, ARRAY_SIZE(helper_mt_reg));
111 ARRAY_SIZE(xt_helper_match));
112} 105}
113 106
114static void __exit xt_helper_fini(void) 107static void __exit helper_mt_exit(void)
115{ 108{
116 xt_unregister_matches(xt_helper_match, ARRAY_SIZE(xt_helper_match)); 109 xt_unregister_matches(helper_mt_reg, ARRAY_SIZE(helper_mt_reg));
117} 110}
118 111
119module_init(xt_helper_init); 112module_init(helper_mt_init);
120module_exit(xt_helper_fini); 113module_exit(helper_mt_exit);
121
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
new file mode 100644
index 000000000000..dbea0e0893f3
--- /dev/null
+++ b/net/netfilter/xt_iprange.c
@@ -0,0 +1,180 @@
1/*
2 * xt_iprange - Netfilter module to match IP address ranges
3 *
4 * (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
5 * (C) CC Computer Consultants GmbH, 2008
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <linux/ip.h>
14#include <linux/ipv6.h>
15#include <linux/netfilter/x_tables.h>
16#include <linux/netfilter_ipv4/ipt_iprange.h>
17
18static bool
19iprange_mt_v0(const struct sk_buff *skb, const struct net_device *in,
20 const struct net_device *out, const struct xt_match *match,
21 const void *matchinfo, int offset, unsigned int protoff,
22 bool *hotdrop)
23{
24 const struct ipt_iprange_info *info = matchinfo;
25 const struct iphdr *iph = ip_hdr(skb);
26
27 if (info->flags & IPRANGE_SRC) {
28 if ((ntohl(iph->saddr) < ntohl(info->src.min_ip)
29 || ntohl(iph->saddr) > ntohl(info->src.max_ip))
30 ^ !!(info->flags & IPRANGE_SRC_INV)) {
31 pr_debug("src IP %u.%u.%u.%u NOT in range %s"
32 "%u.%u.%u.%u-%u.%u.%u.%u\n",
33 NIPQUAD(iph->saddr),
34 info->flags & IPRANGE_SRC_INV ? "(INV) " : "",
35 NIPQUAD(info->src.min_ip),
36 NIPQUAD(info->src.max_ip));
37 return false;
38 }
39 }
40 if (info->flags & IPRANGE_DST) {
41 if ((ntohl(iph->daddr) < ntohl(info->dst.min_ip)
42 || ntohl(iph->daddr) > ntohl(info->dst.max_ip))
43 ^ !!(info->flags & IPRANGE_DST_INV)) {
44 pr_debug("dst IP %u.%u.%u.%u NOT in range %s"
45 "%u.%u.%u.%u-%u.%u.%u.%u\n",
46 NIPQUAD(iph->daddr),
47 info->flags & IPRANGE_DST_INV ? "(INV) " : "",
48 NIPQUAD(info->dst.min_ip),
49 NIPQUAD(info->dst.max_ip));
50 return false;
51 }
52 }
53 return true;
54}
55
56static bool
57iprange_mt4(const struct sk_buff *skb, const struct net_device *in,
58 const struct net_device *out, const struct xt_match *match,
59 const void *matchinfo, int offset, unsigned int protoff,
60 bool *hotdrop)
61{
62 const struct xt_iprange_mtinfo *info = matchinfo;
63 const struct iphdr *iph = ip_hdr(skb);
64 bool m;
65
66 if (info->flags & IPRANGE_SRC) {
67 m = ntohl(iph->saddr) < ntohl(info->src_min.ip);
68 m |= ntohl(iph->saddr) > ntohl(info->src_max.ip);
69 m ^= info->flags & IPRANGE_SRC_INV;
70 if (m) {
71 pr_debug("src IP " NIPQUAD_FMT " NOT in range %s"
72 NIPQUAD_FMT "-" NIPQUAD_FMT "\n",
73 NIPQUAD(iph->saddr),
74 (info->flags & IPRANGE_SRC_INV) ? "(INV) " : "",
75 NIPQUAD(info->src_max.ip),
76 NIPQUAD(info->src_max.ip));
77 return false;
78 }
79 }
80 if (info->flags & IPRANGE_DST) {
81 m = ntohl(iph->daddr) < ntohl(info->dst_min.ip);
82 m |= ntohl(iph->daddr) > ntohl(info->dst_max.ip);
83 m ^= info->flags & IPRANGE_DST_INV;
84 if (m) {
85 pr_debug("dst IP " NIPQUAD_FMT " NOT in range %s"
86 NIPQUAD_FMT "-" NIPQUAD_FMT "\n",
87 NIPQUAD(iph->daddr),
88 (info->flags & IPRANGE_DST_INV) ? "(INV) " : "",
89 NIPQUAD(info->dst_min.ip),
90 NIPQUAD(info->dst_max.ip));
91 return false;
92 }
93 }
94 return true;
95}
96
97static inline int
98iprange_ipv6_sub(const struct in6_addr *a, const struct in6_addr *b)
99{
100 unsigned int i;
101 int r;
102
103 for (i = 0; i < 4; ++i) {
104 r = a->s6_addr32[i] - b->s6_addr32[i];
105 if (r != 0)
106 return r;
107 }
108
109 return 0;
110}
111
112static bool
113iprange_mt6(const struct sk_buff *skb, const struct net_device *in,
114 const struct net_device *out, const struct xt_match *match,
115 const void *matchinfo, int offset, unsigned int protoff,
116 bool *hotdrop)
117{
118 const struct xt_iprange_mtinfo *info = matchinfo;
119 const struct ipv6hdr *iph = ipv6_hdr(skb);
120 bool m;
121
122 if (info->flags & IPRANGE_SRC) {
123 m = iprange_ipv6_sub(&iph->saddr, &info->src_min.in6) < 0;
124 m |= iprange_ipv6_sub(&iph->saddr, &info->src_max.in6) > 0;
125 m ^= info->flags & IPRANGE_SRC_INV;
126 if (m)
127 return false;
128 }
129 if (info->flags & IPRANGE_DST) {
130 m = iprange_ipv6_sub(&iph->daddr, &info->dst_min.in6) < 0;
131 m |= iprange_ipv6_sub(&iph->daddr, &info->dst_max.in6) > 0;
132 m ^= info->flags & IPRANGE_DST_INV;
133 if (m)
134 return false;
135 }
136 return true;
137}
138
139static struct xt_match iprange_mt_reg[] __read_mostly = {
140 {
141 .name = "iprange",
142 .revision = 0,
143 .family = AF_INET,
144 .match = iprange_mt_v0,
145 .matchsize = sizeof(struct ipt_iprange_info),
146 .me = THIS_MODULE,
147 },
148 {
149 .name = "iprange",
150 .revision = 1,
151 .family = AF_INET6,
152 .match = iprange_mt4,
153 .matchsize = sizeof(struct xt_iprange_mtinfo),
154 .me = THIS_MODULE,
155 },
156 {
157 .name = "iprange",
158 .revision = 1,
159 .family = AF_INET6,
160 .match = iprange_mt6,
161 .matchsize = sizeof(struct xt_iprange_mtinfo),
162 .me = THIS_MODULE,
163 },
164};
165
166static int __init iprange_mt_init(void)
167{
168 return xt_register_matches(iprange_mt_reg, ARRAY_SIZE(iprange_mt_reg));
169}
170
171static void __exit iprange_mt_exit(void)
172{
173 xt_unregister_matches(iprange_mt_reg, ARRAY_SIZE(iprange_mt_reg));
174}
175
176module_init(iprange_mt_init);
177module_exit(iprange_mt_exit);
178MODULE_LICENSE("GPL");
179MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>, Jan Engelhardt <jengelh@computergmbh.de>");
180MODULE_DESCRIPTION("Xtables: arbitrary IPv4 range matching");
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index 3dad173d9735..b8640f972950 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -15,20 +15,16 @@
15#include <linux/netfilter/x_tables.h> 15#include <linux/netfilter/x_tables.h>
16 16
17MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); 17MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
18MODULE_DESCRIPTION("IP tables packet length matching module"); 18MODULE_DESCRIPTION("Xtables: Packet length (Layer3,4,5) match");
19MODULE_LICENSE("GPL"); 19MODULE_LICENSE("GPL");
20MODULE_ALIAS("ipt_length"); 20MODULE_ALIAS("ipt_length");
21MODULE_ALIAS("ip6t_length"); 21MODULE_ALIAS("ip6t_length");
22 22
23static bool 23static bool
24match(const struct sk_buff *skb, 24length_mt(const struct sk_buff *skb, const struct net_device *in,
25 const struct net_device *in, 25 const struct net_device *out, const struct xt_match *match,
26 const struct net_device *out, 26 const void *matchinfo, int offset, unsigned int protoff,
27 const struct xt_match *match, 27 bool *hotdrop)
28 const void *matchinfo,
29 int offset,
30 unsigned int protoff,
31 bool *hotdrop)
32{ 28{
33 const struct xt_length_info *info = matchinfo; 29 const struct xt_length_info *info = matchinfo;
34 u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len); 30 u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len);
@@ -37,14 +33,10 @@ match(const struct sk_buff *skb,
37} 33}
38 34
39static bool 35static bool
40match6(const struct sk_buff *skb, 36length_mt6(const struct sk_buff *skb, const struct net_device *in,
41 const struct net_device *in, 37 const struct net_device *out, const struct xt_match *match,
42 const struct net_device *out, 38 const void *matchinfo, int offset, unsigned int protoff,
43 const struct xt_match *match, 39 bool *hotdrop)
44 const void *matchinfo,
45 int offset,
46 unsigned int protoff,
47 bool *hotdrop)
48{ 40{
49 const struct xt_length_info *info = matchinfo; 41 const struct xt_length_info *info = matchinfo;
50 const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) + 42 const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) +
@@ -53,33 +45,32 @@ match6(const struct sk_buff *skb,
53 return (pktlen >= info->min && pktlen <= info->max) ^ info->invert; 45 return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
54} 46}
55 47
56static struct xt_match xt_length_match[] __read_mostly = { 48static struct xt_match length_mt_reg[] __read_mostly = {
57 { 49 {
58 .name = "length", 50 .name = "length",
59 .family = AF_INET, 51 .family = AF_INET,
60 .match = match, 52 .match = length_mt,
61 .matchsize = sizeof(struct xt_length_info), 53 .matchsize = sizeof(struct xt_length_info),
62 .me = THIS_MODULE, 54 .me = THIS_MODULE,
63 }, 55 },
64 { 56 {
65 .name = "length", 57 .name = "length",
66 .family = AF_INET6, 58 .family = AF_INET6,
67 .match = match6, 59 .match = length_mt6,
68 .matchsize = sizeof(struct xt_length_info), 60 .matchsize = sizeof(struct xt_length_info),
69 .me = THIS_MODULE, 61 .me = THIS_MODULE,
70 }, 62 },
71}; 63};
72 64
73static int __init xt_length_init(void) 65static int __init length_mt_init(void)
74{ 66{
75 return xt_register_matches(xt_length_match, 67 return xt_register_matches(length_mt_reg, ARRAY_SIZE(length_mt_reg));
76 ARRAY_SIZE(xt_length_match));
77} 68}
78 69
79static void __exit xt_length_fini(void) 70static void __exit length_mt_exit(void)
80{ 71{
81 xt_unregister_matches(xt_length_match, ARRAY_SIZE(xt_length_match)); 72 xt_unregister_matches(length_mt_reg, ARRAY_SIZE(length_mt_reg));
82} 73}
83 74
84module_init(xt_length_init); 75module_init(length_mt_init);
85module_exit(xt_length_fini); 76module_exit(length_mt_exit);
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index f263a77e57b7..aad9ab8d2046 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -16,7 +16,7 @@
16 16
17MODULE_LICENSE("GPL"); 17MODULE_LICENSE("GPL");
18MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>"); 18MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
19MODULE_DESCRIPTION("iptables rate limit match"); 19MODULE_DESCRIPTION("Xtables: rate-limit match");
20MODULE_ALIAS("ipt_limit"); 20MODULE_ALIAS("ipt_limit");
21MODULE_ALIAS("ip6t_limit"); 21MODULE_ALIAS("ip6t_limit");
22 22
@@ -58,14 +58,10 @@ static DEFINE_SPINLOCK(limit_lock);
58#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) 58#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
59 59
60static bool 60static bool
61ipt_limit_match(const struct sk_buff *skb, 61limit_mt(const struct sk_buff *skb, const struct net_device *in,
62 const struct net_device *in, 62 const struct net_device *out, const struct xt_match *match,
63 const struct net_device *out, 63 const void *matchinfo, int offset, unsigned int protoff,
64 const struct xt_match *match, 64 bool *hotdrop)
65 const void *matchinfo,
66 int offset,
67 unsigned int protoff,
68 bool *hotdrop)
69{ 65{
70 struct xt_rateinfo *r = 66 struct xt_rateinfo *r =
71 ((const struct xt_rateinfo *)matchinfo)->master; 67 ((const struct xt_rateinfo *)matchinfo)->master;
@@ -100,11 +96,9 @@ user2credits(u_int32_t user)
100} 96}
101 97
102static bool 98static bool
103ipt_limit_checkentry(const char *tablename, 99limit_mt_check(const char *tablename, const void *inf,
104 const void *inf, 100 const struct xt_match *match, void *matchinfo,
105 const struct xt_match *match, 101 unsigned int hook_mask)
106 void *matchinfo,
107 unsigned int hook_mask)
108{ 102{
109 struct xt_rateinfo *r = matchinfo; 103 struct xt_rateinfo *r = matchinfo;
110 104
@@ -143,7 +137,7 @@ struct compat_xt_rateinfo {
143 137
144/* To keep the full "prev" timestamp, the upper 32 bits are stored in the 138/* To keep the full "prev" timestamp, the upper 32 bits are stored in the
145 * master pointer, which does not need to be preserved. */ 139 * master pointer, which does not need to be preserved. */
146static void compat_from_user(void *dst, void *src) 140static void limit_mt_compat_from_user(void *dst, void *src)
147{ 141{
148 const struct compat_xt_rateinfo *cm = src; 142 const struct compat_xt_rateinfo *cm = src;
149 struct xt_rateinfo m = { 143 struct xt_rateinfo m = {
@@ -157,7 +151,7 @@ static void compat_from_user(void *dst, void *src)
157 memcpy(dst, &m, sizeof(m)); 151 memcpy(dst, &m, sizeof(m));
158} 152}
159 153
160static int compat_to_user(void __user *dst, void *src) 154static int limit_mt_compat_to_user(void __user *dst, void *src)
161{ 155{
162 const struct xt_rateinfo *m = src; 156 const struct xt_rateinfo *m = src;
163 struct compat_xt_rateinfo cm = { 157 struct compat_xt_rateinfo cm = {
@@ -173,39 +167,44 @@ static int compat_to_user(void __user *dst, void *src)
173} 167}
174#endif /* CONFIG_COMPAT */ 168#endif /* CONFIG_COMPAT */
175 169
176static struct xt_match xt_limit_match[] __read_mostly = { 170static struct xt_match limit_mt_reg[] __read_mostly = {
177 { 171 {
178 .name = "limit", 172 .name = "limit",
179 .family = AF_INET, 173 .family = AF_INET,
180 .checkentry = ipt_limit_checkentry, 174 .checkentry = limit_mt_check,
181 .match = ipt_limit_match, 175 .match = limit_mt,
182 .matchsize = sizeof(struct xt_rateinfo), 176 .matchsize = sizeof(struct xt_rateinfo),
183#ifdef CONFIG_COMPAT 177#ifdef CONFIG_COMPAT
184 .compatsize = sizeof(struct compat_xt_rateinfo), 178 .compatsize = sizeof(struct compat_xt_rateinfo),
185 .compat_from_user = compat_from_user, 179 .compat_from_user = limit_mt_compat_from_user,
186 .compat_to_user = compat_to_user, 180 .compat_to_user = limit_mt_compat_to_user,
187#endif 181#endif
188 .me = THIS_MODULE, 182 .me = THIS_MODULE,
189 }, 183 },
190 { 184 {
191 .name = "limit", 185 .name = "limit",
192 .family = AF_INET6, 186 .family = AF_INET6,
193 .checkentry = ipt_limit_checkentry, 187 .checkentry = limit_mt_check,
194 .match = ipt_limit_match, 188 .match = limit_mt,
195 .matchsize = sizeof(struct xt_rateinfo), 189 .matchsize = sizeof(struct xt_rateinfo),
190#ifdef CONFIG_COMPAT
191 .compatsize = sizeof(struct compat_xt_rateinfo),
192 .compat_from_user = limit_mt_compat_from_user,
193 .compat_to_user = limit_mt_compat_to_user,
194#endif
196 .me = THIS_MODULE, 195 .me = THIS_MODULE,
197 }, 196 },
198}; 197};
199 198
200static int __init xt_limit_init(void) 199static int __init limit_mt_init(void)
201{ 200{
202 return xt_register_matches(xt_limit_match, ARRAY_SIZE(xt_limit_match)); 201 return xt_register_matches(limit_mt_reg, ARRAY_SIZE(limit_mt_reg));
203} 202}
204 203
205static void __exit xt_limit_fini(void) 204static void __exit limit_mt_exit(void)
206{ 205{
207 xt_unregister_matches(xt_limit_match, ARRAY_SIZE(xt_limit_match)); 206 xt_unregister_matches(limit_mt_reg, ARRAY_SIZE(limit_mt_reg));
208} 207}
209 208
210module_init(xt_limit_init); 209module_init(limit_mt_init);
211module_exit(xt_limit_fini); 210module_exit(limit_mt_exit);
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index 00490d777a0f..b3e96a0ec176 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -20,19 +20,14 @@
20 20
21MODULE_LICENSE("GPL"); 21MODULE_LICENSE("GPL");
22MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 22MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
23MODULE_DESCRIPTION("iptables mac matching module"); 23MODULE_DESCRIPTION("Xtables: MAC address match");
24MODULE_ALIAS("ipt_mac"); 24MODULE_ALIAS("ipt_mac");
25MODULE_ALIAS("ip6t_mac"); 25MODULE_ALIAS("ip6t_mac");
26 26
27static bool 27static bool
28match(const struct sk_buff *skb, 28mac_mt(const struct sk_buff *skb, const struct net_device *in,
29 const struct net_device *in, 29 const struct net_device *out, const struct xt_match *match,
30 const struct net_device *out, 30 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
31 const struct xt_match *match,
32 const void *matchinfo,
33 int offset,
34 unsigned int protoff,
35 bool *hotdrop)
36{ 31{
37 const struct xt_mac_info *info = matchinfo; 32 const struct xt_mac_info *info = matchinfo;
38 33
@@ -44,38 +39,38 @@ match(const struct sk_buff *skb,
44 ^ info->invert); 39 ^ info->invert);
45} 40}
46 41
47static struct xt_match xt_mac_match[] __read_mostly = { 42static struct xt_match mac_mt_reg[] __read_mostly = {
48 { 43 {
49 .name = "mac", 44 .name = "mac",
50 .family = AF_INET, 45 .family = AF_INET,
51 .match = match, 46 .match = mac_mt,
52 .matchsize = sizeof(struct xt_mac_info), 47 .matchsize = sizeof(struct xt_mac_info),
53 .hooks = (1 << NF_IP_PRE_ROUTING) | 48 .hooks = (1 << NF_INET_PRE_ROUTING) |
54 (1 << NF_IP_LOCAL_IN) | 49 (1 << NF_INET_LOCAL_IN) |
55 (1 << NF_IP_FORWARD), 50 (1 << NF_INET_FORWARD),
56 .me = THIS_MODULE, 51 .me = THIS_MODULE,
57 }, 52 },
58 { 53 {
59 .name = "mac", 54 .name = "mac",
60 .family = AF_INET6, 55 .family = AF_INET6,
61 .match = match, 56 .match = mac_mt,
62 .matchsize = sizeof(struct xt_mac_info), 57 .matchsize = sizeof(struct xt_mac_info),
63 .hooks = (1 << NF_IP6_PRE_ROUTING) | 58 .hooks = (1 << NF_INET_PRE_ROUTING) |
64 (1 << NF_IP6_LOCAL_IN) | 59 (1 << NF_INET_LOCAL_IN) |
65 (1 << NF_IP6_FORWARD), 60 (1 << NF_INET_FORWARD),
66 .me = THIS_MODULE, 61 .me = THIS_MODULE,
67 }, 62 },
68}; 63};
69 64
70static int __init xt_mac_init(void) 65static int __init mac_mt_init(void)
71{ 66{
72 return xt_register_matches(xt_mac_match, ARRAY_SIZE(xt_mac_match)); 67 return xt_register_matches(mac_mt_reg, ARRAY_SIZE(mac_mt_reg));
73} 68}
74 69
75static void __exit xt_mac_fini(void) 70static void __exit mac_mt_exit(void)
76{ 71{
77 xt_unregister_matches(xt_mac_match, ARRAY_SIZE(xt_mac_match)); 72 xt_unregister_matches(mac_mt_reg, ARRAY_SIZE(mac_mt_reg));
78} 73}
79 74
80module_init(xt_mac_init); 75module_init(mac_mt_init);
81module_exit(xt_mac_fini); 76module_exit(mac_mt_exit);
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index c02a7f8f3925..9f78f6120fbd 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -1,10 +1,13 @@
1/* Kernel module to match NFMARK values. */ 1/*
2 2 * xt_mark - Netfilter module to match NFMARK value
3/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca> 3 *
4 * (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
5 * Copyright © CC Computer Consultants GmbH, 2007 - 2008
6 * Jan Engelhardt <jengelh@computergmbh.de>
4 * 7 *
5 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation. 10 * published by the Free Software Foundation.
8 */ 11 */
9 12
10#include <linux/module.h> 13#include <linux/module.h>
@@ -15,19 +18,15 @@
15 18
16MODULE_LICENSE("GPL"); 19MODULE_LICENSE("GPL");
17MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); 20MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
18MODULE_DESCRIPTION("iptables mark matching module"); 21MODULE_DESCRIPTION("Xtables: packet mark match");
19MODULE_ALIAS("ipt_mark"); 22MODULE_ALIAS("ipt_mark");
20MODULE_ALIAS("ip6t_mark"); 23MODULE_ALIAS("ip6t_mark");
21 24
22static bool 25static bool
23match(const struct sk_buff *skb, 26mark_mt_v0(const struct sk_buff *skb, const struct net_device *in,
24 const struct net_device *in, 27 const struct net_device *out, const struct xt_match *match,
25 const struct net_device *out, 28 const void *matchinfo, int offset, unsigned int protoff,
26 const struct xt_match *match, 29 bool *hotdrop)
27 const void *matchinfo,
28 int offset,
29 unsigned int protoff,
30 bool *hotdrop)
31{ 30{
32 const struct xt_mark_info *info = matchinfo; 31 const struct xt_mark_info *info = matchinfo;
33 32
@@ -35,11 +34,19 @@ match(const struct sk_buff *skb,
35} 34}
36 35
37static bool 36static bool
38checkentry(const char *tablename, 37mark_mt(const struct sk_buff *skb, const struct net_device *in,
39 const void *entry, 38 const struct net_device *out, const struct xt_match *match,
40 const struct xt_match *match, 39 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
41 void *matchinfo, 40{
42 unsigned int hook_mask) 41 const struct xt_mark_mtinfo1 *info = matchinfo;
42
43 return ((skb->mark & info->mask) == info->mark) ^ info->invert;
44}
45
46static bool
47mark_mt_check_v0(const char *tablename, const void *entry,
48 const struct xt_match *match, void *matchinfo,
49 unsigned int hook_mask)
43{ 50{
44 const struct xt_mark_info *minfo = matchinfo; 51 const struct xt_mark_info *minfo = matchinfo;
45 52
@@ -58,7 +65,7 @@ struct compat_xt_mark_info {
58 u_int16_t __pad2; 65 u_int16_t __pad2;
59}; 66};
60 67
61static void compat_from_user(void *dst, void *src) 68static void mark_mt_compat_from_user_v0(void *dst, void *src)
62{ 69{
63 const struct compat_xt_mark_info *cm = src; 70 const struct compat_xt_mark_info *cm = src;
64 struct xt_mark_info m = { 71 struct xt_mark_info m = {
@@ -69,7 +76,7 @@ static void compat_from_user(void *dst, void *src)
69 memcpy(dst, &m, sizeof(m)); 76 memcpy(dst, &m, sizeof(m));
70} 77}
71 78
72static int compat_to_user(void __user *dst, void *src) 79static int mark_mt_compat_to_user_v0(void __user *dst, void *src)
73{ 80{
74 const struct xt_mark_info *m = src; 81 const struct xt_mark_info *m = src;
75 struct compat_xt_mark_info cm = { 82 struct compat_xt_mark_info cm = {
@@ -81,39 +88,62 @@ static int compat_to_user(void __user *dst, void *src)
81} 88}
82#endif /* CONFIG_COMPAT */ 89#endif /* CONFIG_COMPAT */
83 90
84static struct xt_match xt_mark_match[] __read_mostly = { 91static struct xt_match mark_mt_reg[] __read_mostly = {
85 { 92 {
86 .name = "mark", 93 .name = "mark",
94 .revision = 0,
87 .family = AF_INET, 95 .family = AF_INET,
88 .checkentry = checkentry, 96 .checkentry = mark_mt_check_v0,
89 .match = match, 97 .match = mark_mt_v0,
90 .matchsize = sizeof(struct xt_mark_info), 98 .matchsize = sizeof(struct xt_mark_info),
91#ifdef CONFIG_COMPAT 99#ifdef CONFIG_COMPAT
92 .compatsize = sizeof(struct compat_xt_mark_info), 100 .compatsize = sizeof(struct compat_xt_mark_info),
93 .compat_from_user = compat_from_user, 101 .compat_from_user = mark_mt_compat_from_user_v0,
94 .compat_to_user = compat_to_user, 102 .compat_to_user = mark_mt_compat_to_user_v0,
95#endif 103#endif
96 .me = THIS_MODULE, 104 .me = THIS_MODULE,
97 }, 105 },
98 { 106 {
99 .name = "mark", 107 .name = "mark",
108 .revision = 0,
100 .family = AF_INET6, 109 .family = AF_INET6,
101 .checkentry = checkentry, 110 .checkentry = mark_mt_check_v0,
102 .match = match, 111 .match = mark_mt_v0,
103 .matchsize = sizeof(struct xt_mark_info), 112 .matchsize = sizeof(struct xt_mark_info),
113#ifdef CONFIG_COMPAT
114 .compatsize = sizeof(struct compat_xt_mark_info),
115 .compat_from_user = mark_mt_compat_from_user_v0,
116 .compat_to_user = mark_mt_compat_to_user_v0,
117#endif
104 .me = THIS_MODULE, 118 .me = THIS_MODULE,
105 }, 119 },
120 {
121 .name = "mark",
122 .revision = 1,
123 .family = AF_INET,
124 .match = mark_mt,
125 .matchsize = sizeof(struct xt_mark_mtinfo1),
126 .me = THIS_MODULE,
127 },
128 {
129 .name = "mark",
130 .revision = 1,
131 .family = AF_INET6,
132 .match = mark_mt,
133 .matchsize = sizeof(struct xt_mark_mtinfo1),
134 .me = THIS_MODULE,
135 },
106}; 136};
107 137
108static int __init xt_mark_init(void) 138static int __init mark_mt_init(void)
109{ 139{
110 return xt_register_matches(xt_mark_match, ARRAY_SIZE(xt_mark_match)); 140 return xt_register_matches(mark_mt_reg, ARRAY_SIZE(mark_mt_reg));
111} 141}
112 142
113static void __exit xt_mark_fini(void) 143static void __exit mark_mt_exit(void)
114{ 144{
115 xt_unregister_matches(xt_mark_match, ARRAY_SIZE(xt_mark_match)); 145 xt_unregister_matches(mark_mt_reg, ARRAY_SIZE(mark_mt_reg));
116} 146}
117 147
118module_init(xt_mark_init); 148module_init(mark_mt_init);
119module_exit(xt_mark_fini); 149module_exit(mark_mt_exit);
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index e8ae10284acd..31daa8192422 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -22,7 +22,7 @@
22 22
23MODULE_LICENSE("GPL"); 23MODULE_LICENSE("GPL");
24MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 24MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
25MODULE_DESCRIPTION("x_tables multiple port match module"); 25MODULE_DESCRIPTION("Xtables: multiple port matching for TCP, UDP, UDP-Lite, SCTP and DCCP");
26MODULE_ALIAS("ipt_multiport"); 26MODULE_ALIAS("ipt_multiport");
27MODULE_ALIAS("ip6t_multiport"); 27MODULE_ALIAS("ip6t_multiport");
28 28
@@ -34,8 +34,8 @@ MODULE_ALIAS("ip6t_multiport");
34 34
35/* Returns 1 if the port is matched by the test, 0 otherwise. */ 35/* Returns 1 if the port is matched by the test, 0 otherwise. */
36static inline bool 36static inline bool
37ports_match(const u_int16_t *portlist, enum xt_multiport_flags flags, 37ports_match_v0(const u_int16_t *portlist, enum xt_multiport_flags flags,
38 u_int8_t count, u_int16_t src, u_int16_t dst) 38 u_int8_t count, u_int16_t src, u_int16_t dst)
39{ 39{
40 unsigned int i; 40 unsigned int i;
41 for (i = 0; i < count; i++) { 41 for (i = 0; i < count; i++) {
@@ -95,14 +95,10 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
95} 95}
96 96
97static bool 97static bool
98match(const struct sk_buff *skb, 98multiport_mt_v0(const struct sk_buff *skb, const struct net_device *in,
99 const struct net_device *in, 99 const struct net_device *out, const struct xt_match *match,
100 const struct net_device *out, 100 const void *matchinfo, int offset, unsigned int protoff,
101 const struct xt_match *match, 101 bool *hotdrop)
102 const void *matchinfo,
103 int offset,
104 unsigned int protoff,
105 bool *hotdrop)
106{ 102{
107 __be16 _ports[2], *pptr; 103 __be16 _ports[2], *pptr;
108 const struct xt_multiport *multiinfo = matchinfo; 104 const struct xt_multiport *multiinfo = matchinfo;
@@ -120,20 +116,15 @@ match(const struct sk_buff *skb,
120 return false; 116 return false;
121 } 117 }
122 118
123 return ports_match(multiinfo->ports, 119 return ports_match_v0(multiinfo->ports, multiinfo->flags,
124 multiinfo->flags, multiinfo->count, 120 multiinfo->count, ntohs(pptr[0]), ntohs(pptr[1]));
125 ntohs(pptr[0]), ntohs(pptr[1]));
126} 121}
127 122
128static bool 123static bool
129match_v1(const struct sk_buff *skb, 124multiport_mt(const struct sk_buff *skb, const struct net_device *in,
130 const struct net_device *in, 125 const struct net_device *out, const struct xt_match *match,
131 const struct net_device *out, 126 const void *matchinfo, int offset, unsigned int protoff,
132 const struct xt_match *match, 127 bool *hotdrop)
133 const void *matchinfo,
134 int offset,
135 unsigned int protoff,
136 bool *hotdrop)
137{ 128{
138 __be16 _ports[2], *pptr; 129 __be16 _ports[2], *pptr;
139 const struct xt_multiport_v1 *multiinfo = matchinfo; 130 const struct xt_multiport_v1 *multiinfo = matchinfo;
@@ -173,11 +164,9 @@ check(u_int16_t proto,
173 164
174/* Called when user tries to insert an entry of this type. */ 165/* Called when user tries to insert an entry of this type. */
175static bool 166static bool
176checkentry(const char *tablename, 167multiport_mt_check_v0(const char *tablename, const void *info,
177 const void *info, 168 const struct xt_match *match, void *matchinfo,
178 const struct xt_match *match, 169 unsigned int hook_mask)
179 void *matchinfo,
180 unsigned int hook_mask)
181{ 170{
182 const struct ipt_ip *ip = info; 171 const struct ipt_ip *ip = info;
183 const struct xt_multiport *multiinfo = matchinfo; 172 const struct xt_multiport *multiinfo = matchinfo;
@@ -187,11 +176,9 @@ checkentry(const char *tablename,
187} 176}
188 177
189static bool 178static bool
190checkentry_v1(const char *tablename, 179multiport_mt_check(const char *tablename, const void *info,
191 const void *info, 180 const struct xt_match *match, void *matchinfo,
192 const struct xt_match *match, 181 unsigned int hook_mask)
193 void *matchinfo,
194 unsigned int hook_mask)
195{ 182{
196 const struct ipt_ip *ip = info; 183 const struct ipt_ip *ip = info;
197 const struct xt_multiport_v1 *multiinfo = matchinfo; 184 const struct xt_multiport_v1 *multiinfo = matchinfo;
@@ -201,11 +188,9 @@ checkentry_v1(const char *tablename,
201} 188}
202 189
203static bool 190static bool
204checkentry6(const char *tablename, 191multiport_mt6_check_v0(const char *tablename, const void *info,
205 const void *info, 192 const struct xt_match *match, void *matchinfo,
206 const struct xt_match *match, 193 unsigned int hook_mask)
207 void *matchinfo,
208 unsigned int hook_mask)
209{ 194{
210 const struct ip6t_ip6 *ip = info; 195 const struct ip6t_ip6 *ip = info;
211 const struct xt_multiport *multiinfo = matchinfo; 196 const struct xt_multiport *multiinfo = matchinfo;
@@ -215,11 +200,9 @@ checkentry6(const char *tablename,
215} 200}
216 201
217static bool 202static bool
218checkentry6_v1(const char *tablename, 203multiport_mt6_check(const char *tablename, const void *info,
219 const void *info, 204 const struct xt_match *match, void *matchinfo,
220 const struct xt_match *match, 205 unsigned int hook_mask)
221 void *matchinfo,
222 unsigned int hook_mask)
223{ 206{
224 const struct ip6t_ip6 *ip = info; 207 const struct ip6t_ip6 *ip = info;
225 const struct xt_multiport_v1 *multiinfo = matchinfo; 208 const struct xt_multiport_v1 *multiinfo = matchinfo;
@@ -228,13 +211,13 @@ checkentry6_v1(const char *tablename,
228 multiinfo->count); 211 multiinfo->count);
229} 212}
230 213
231static struct xt_match xt_multiport_match[] __read_mostly = { 214static struct xt_match multiport_mt_reg[] __read_mostly = {
232 { 215 {
233 .name = "multiport", 216 .name = "multiport",
234 .family = AF_INET, 217 .family = AF_INET,
235 .revision = 0, 218 .revision = 0,
236 .checkentry = checkentry, 219 .checkentry = multiport_mt_check_v0,
237 .match = match, 220 .match = multiport_mt_v0,
238 .matchsize = sizeof(struct xt_multiport), 221 .matchsize = sizeof(struct xt_multiport),
239 .me = THIS_MODULE, 222 .me = THIS_MODULE,
240 }, 223 },
@@ -242,8 +225,8 @@ static struct xt_match xt_multiport_match[] __read_mostly = {
242 .name = "multiport", 225 .name = "multiport",
243 .family = AF_INET, 226 .family = AF_INET,
244 .revision = 1, 227 .revision = 1,
245 .checkentry = checkentry_v1, 228 .checkentry = multiport_mt_check,
246 .match = match_v1, 229 .match = multiport_mt,
247 .matchsize = sizeof(struct xt_multiport_v1), 230 .matchsize = sizeof(struct xt_multiport_v1),
248 .me = THIS_MODULE, 231 .me = THIS_MODULE,
249 }, 232 },
@@ -251,8 +234,8 @@ static struct xt_match xt_multiport_match[] __read_mostly = {
251 .name = "multiport", 234 .name = "multiport",
252 .family = AF_INET6, 235 .family = AF_INET6,
253 .revision = 0, 236 .revision = 0,
254 .checkentry = checkentry6, 237 .checkentry = multiport_mt6_check_v0,
255 .match = match, 238 .match = multiport_mt_v0,
256 .matchsize = sizeof(struct xt_multiport), 239 .matchsize = sizeof(struct xt_multiport),
257 .me = THIS_MODULE, 240 .me = THIS_MODULE,
258 }, 241 },
@@ -260,24 +243,23 @@ static struct xt_match xt_multiport_match[] __read_mostly = {
260 .name = "multiport", 243 .name = "multiport",
261 .family = AF_INET6, 244 .family = AF_INET6,
262 .revision = 1, 245 .revision = 1,
263 .checkentry = checkentry6_v1, 246 .checkentry = multiport_mt6_check,
264 .match = match_v1, 247 .match = multiport_mt,
265 .matchsize = sizeof(struct xt_multiport_v1), 248 .matchsize = sizeof(struct xt_multiport_v1),
266 .me = THIS_MODULE, 249 .me = THIS_MODULE,
267 }, 250 },
268}; 251};
269 252
270static int __init xt_multiport_init(void) 253static int __init multiport_mt_init(void)
271{ 254{
272 return xt_register_matches(xt_multiport_match, 255 return xt_register_matches(multiport_mt_reg,
273 ARRAY_SIZE(xt_multiport_match)); 256 ARRAY_SIZE(multiport_mt_reg));
274} 257}
275 258
276static void __exit xt_multiport_fini(void) 259static void __exit multiport_mt_exit(void)
277{ 260{
278 xt_unregister_matches(xt_multiport_match, 261 xt_unregister_matches(multiport_mt_reg, ARRAY_SIZE(multiport_mt_reg));
279 ARRAY_SIZE(xt_multiport_match));
280} 262}
281 263
282module_init(xt_multiport_init); 264module_init(multiport_mt_init);
283module_exit(xt_multiport_fini); 265module_exit(multiport_mt_exit);
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
new file mode 100644
index 000000000000..d382f9cc38b0
--- /dev/null
+++ b/net/netfilter/xt_owner.c
@@ -0,0 +1,211 @@
1/*
2 * Kernel module to match various things tied to sockets associated with
3 * locally generated outgoing packets.
4 *
5 * (C) 2000 Marc Boucher <marc@mbsi.ca>
6 *
7 * Copyright © CC Computer Consultants GmbH, 2007
8 * Contact: <jengelh@computergmbh.de>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 */
14#include <linux/module.h>
15#include <linux/skbuff.h>
16#include <linux/file.h>
17#include <net/sock.h>
18#include <linux/netfilter/x_tables.h>
19#include <linux/netfilter/xt_owner.h>
20#include <linux/netfilter_ipv4/ipt_owner.h>
21#include <linux/netfilter_ipv6/ip6t_owner.h>
22
23static bool
24owner_mt_v0(const struct sk_buff *skb, const struct net_device *in,
25 const struct net_device *out, const struct xt_match *match,
26 const void *matchinfo, int offset, unsigned int protoff,
27 bool *hotdrop)
28{
29 const struct ipt_owner_info *info = matchinfo;
30 const struct file *filp;
31
32 if (skb->sk == NULL || skb->sk->sk_socket == NULL)
33 return false;
34
35 filp = skb->sk->sk_socket->file;
36 if (filp == NULL)
37 return false;
38
39 if (info->match & IPT_OWNER_UID)
40 if ((filp->f_uid != info->uid) ^
41 !!(info->invert & IPT_OWNER_UID))
42 return false;
43
44 if (info->match & IPT_OWNER_GID)
45 if ((filp->f_gid != info->gid) ^
46 !!(info->invert & IPT_OWNER_GID))
47 return false;
48
49 return true;
50}
51
52static bool
53owner_mt6_v0(const struct sk_buff *skb, const struct net_device *in,
54 const struct net_device *out, const struct xt_match *match,
55 const void *matchinfo, int offset, unsigned int protoff,
56 bool *hotdrop)
57{
58 const struct ip6t_owner_info *info = matchinfo;
59 const struct file *filp;
60
61 if (skb->sk == NULL || skb->sk->sk_socket == NULL)
62 return false;
63
64 filp = skb->sk->sk_socket->file;
65 if (filp == NULL)
66 return false;
67
68 if (info->match & IP6T_OWNER_UID)
69 if ((filp->f_uid != info->uid) ^
70 !!(info->invert & IP6T_OWNER_UID))
71 return false;
72
73 if (info->match & IP6T_OWNER_GID)
74 if ((filp->f_gid != info->gid) ^
75 !!(info->invert & IP6T_OWNER_GID))
76 return false;
77
78 return true;
79}
80
81static bool
82owner_mt(const struct sk_buff *skb, const struct net_device *in,
83 const struct net_device *out, const struct xt_match *match,
84 const void *matchinfo, int offset, unsigned int protoff,
85 bool *hotdrop)
86{
87 const struct xt_owner_match_info *info = matchinfo;
88 const struct file *filp;
89
90 if (skb->sk == NULL || skb->sk->sk_socket == NULL)
91 return (info->match ^ info->invert) == 0;
92 else if (info->match & info->invert & XT_OWNER_SOCKET)
93 /*
94 * Socket exists but user wanted ! --socket-exists.
95 * (Single ampersands intended.)
96 */
97 return false;
98
99 filp = skb->sk->sk_socket->file;
100 if (filp == NULL)
101 return ((info->match ^ info->invert) &
102 (XT_OWNER_UID | XT_OWNER_GID)) == 0;
103
104 if (info->match & XT_OWNER_UID)
105 if ((filp->f_uid != info->uid) ^
106 !!(info->invert & XT_OWNER_UID))
107 return false;
108
109 if (info->match & XT_OWNER_GID)
110 if ((filp->f_gid != info->gid) ^
111 !!(info->invert & XT_OWNER_GID))
112 return false;
113
114 return true;
115}
116
117static bool
118owner_mt_check_v0(const char *tablename, const void *ip,
119 const struct xt_match *match, void *matchinfo,
120 unsigned int hook_mask)
121{
122 const struct ipt_owner_info *info = matchinfo;
123
124 if (info->match & (IPT_OWNER_PID | IPT_OWNER_SID | IPT_OWNER_COMM)) {
125 printk(KERN_WARNING KBUILD_MODNAME
126 ": PID, SID and command matching is not "
127 "supported anymore\n");
128 return false;
129 }
130
131 return true;
132}
133
134static bool
135owner_mt6_check_v0(const char *tablename, const void *ip,
136 const struct xt_match *match, void *matchinfo,
137 unsigned int hook_mask)
138{
139 const struct ip6t_owner_info *info = matchinfo;
140
141 if (info->match & (IP6T_OWNER_PID | IP6T_OWNER_SID)) {
142 printk(KERN_WARNING KBUILD_MODNAME
143 ": PID and SID matching is not supported anymore\n");
144 return false;
145 }
146
147 return true;
148}
149
150static struct xt_match owner_mt_reg[] __read_mostly = {
151 {
152 .name = "owner",
153 .revision = 0,
154 .family = AF_INET,
155 .match = owner_mt_v0,
156 .matchsize = sizeof(struct ipt_owner_info),
157 .checkentry = owner_mt_check_v0,
158 .hooks = (1 << NF_INET_LOCAL_OUT) |
159 (1 << NF_INET_POST_ROUTING),
160 .me = THIS_MODULE,
161 },
162 {
163 .name = "owner",
164 .revision = 0,
165 .family = AF_INET6,
166 .match = owner_mt6_v0,
167 .matchsize = sizeof(struct ip6t_owner_info),
168 .checkentry = owner_mt6_check_v0,
169 .hooks = (1 << NF_INET_LOCAL_OUT) |
170 (1 << NF_INET_POST_ROUTING),
171 .me = THIS_MODULE,
172 },
173 {
174 .name = "owner",
175 .revision = 1,
176 .family = AF_INET,
177 .match = owner_mt,
178 .matchsize = sizeof(struct xt_owner_match_info),
179 .hooks = (1 << NF_INET_LOCAL_OUT) |
180 (1 << NF_INET_POST_ROUTING),
181 .me = THIS_MODULE,
182 },
183 {
184 .name = "owner",
185 .revision = 1,
186 .family = AF_INET6,
187 .match = owner_mt,
188 .matchsize = sizeof(struct xt_owner_match_info),
189 .hooks = (1 << NF_INET_LOCAL_OUT) |
190 (1 << NF_INET_POST_ROUTING),
191 .me = THIS_MODULE,
192 },
193};
194
195static int __init owner_mt_init(void)
196{
197 return xt_register_matches(owner_mt_reg, ARRAY_SIZE(owner_mt_reg));
198}
199
200static void __exit owner_mt_exit(void)
201{
202 xt_unregister_matches(owner_mt_reg, ARRAY_SIZE(owner_mt_reg));
203}
204
205module_init(owner_mt_init);
206module_exit(owner_mt_exit);
207MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
208MODULE_DESCRIPTION("Xtables: socket owner matching");
209MODULE_LICENSE("GPL");
210MODULE_ALIAS("ipt_owner");
211MODULE_ALIAS("ip6t_owner");
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index a4bab043a6d1..4ec1094bda92 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -16,19 +16,15 @@
16 16
17MODULE_LICENSE("GPL"); 17MODULE_LICENSE("GPL");
18MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); 18MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
19MODULE_DESCRIPTION("iptables bridge physical device match module"); 19MODULE_DESCRIPTION("Xtables: Bridge physical device match");
20MODULE_ALIAS("ipt_physdev"); 20MODULE_ALIAS("ipt_physdev");
21MODULE_ALIAS("ip6t_physdev"); 21MODULE_ALIAS("ip6t_physdev");
22 22
23static bool 23static bool
24match(const struct sk_buff *skb, 24physdev_mt(const struct sk_buff *skb, const struct net_device *in,
25 const struct net_device *in, 25 const struct net_device *out, const struct xt_match *match,
26 const struct net_device *out, 26 const void *matchinfo, int offset, unsigned int protoff,
27 const struct xt_match *match, 27 bool *hotdrop)
28 const void *matchinfo,
29 int offset,
30 unsigned int protoff,
31 bool *hotdrop)
32{ 28{
33 int i; 29 int i;
34 static const char nulldevname[IFNAMSIZ]; 30 static const char nulldevname[IFNAMSIZ];
@@ -99,11 +95,9 @@ match_outdev:
99} 95}
100 96
101static bool 97static bool
102checkentry(const char *tablename, 98physdev_mt_check(const char *tablename, const void *ip,
103 const void *ip, 99 const struct xt_match *match, void *matchinfo,
104 const struct xt_match *match, 100 unsigned int hook_mask)
105 void *matchinfo,
106 unsigned int hook_mask)
107{ 101{
108 const struct xt_physdev_info *info = matchinfo; 102 const struct xt_physdev_info *info = matchinfo;
109 103
@@ -113,46 +107,45 @@ checkentry(const char *tablename,
113 if (info->bitmask & XT_PHYSDEV_OP_OUT && 107 if (info->bitmask & XT_PHYSDEV_OP_OUT &&
114 (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || 108 (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) ||
115 info->invert & XT_PHYSDEV_OP_BRIDGED) && 109 info->invert & XT_PHYSDEV_OP_BRIDGED) &&
116 hook_mask & ((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) | 110 hook_mask & ((1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
117 (1 << NF_IP_POST_ROUTING))) { 111 (1 << NF_INET_POST_ROUTING))) {
118 printk(KERN_WARNING "physdev match: using --physdev-out in the " 112 printk(KERN_WARNING "physdev match: using --physdev-out in the "
119 "OUTPUT, FORWARD and POSTROUTING chains for non-bridged " 113 "OUTPUT, FORWARD and POSTROUTING chains for non-bridged "
120 "traffic is not supported anymore.\n"); 114 "traffic is not supported anymore.\n");
121 if (hook_mask & (1 << NF_IP_LOCAL_OUT)) 115 if (hook_mask & (1 << NF_INET_LOCAL_OUT))
122 return false; 116 return false;
123 } 117 }
124 return true; 118 return true;
125} 119}
126 120
127static struct xt_match xt_physdev_match[] __read_mostly = { 121static struct xt_match physdev_mt_reg[] __read_mostly = {
128 { 122 {
129 .name = "physdev", 123 .name = "physdev",
130 .family = AF_INET, 124 .family = AF_INET,
131 .checkentry = checkentry, 125 .checkentry = physdev_mt_check,
132 .match = match, 126 .match = physdev_mt,
133 .matchsize = sizeof(struct xt_physdev_info), 127 .matchsize = sizeof(struct xt_physdev_info),
134 .me = THIS_MODULE, 128 .me = THIS_MODULE,
135 }, 129 },
136 { 130 {
137 .name = "physdev", 131 .name = "physdev",
138 .family = AF_INET6, 132 .family = AF_INET6,
139 .checkentry = checkentry, 133 .checkentry = physdev_mt_check,
140 .match = match, 134 .match = physdev_mt,
141 .matchsize = sizeof(struct xt_physdev_info), 135 .matchsize = sizeof(struct xt_physdev_info),
142 .me = THIS_MODULE, 136 .me = THIS_MODULE,
143 }, 137 },
144}; 138};
145 139
146static int __init xt_physdev_init(void) 140static int __init physdev_mt_init(void)
147{ 141{
148 return xt_register_matches(xt_physdev_match, 142 return xt_register_matches(physdev_mt_reg, ARRAY_SIZE(physdev_mt_reg));
149 ARRAY_SIZE(xt_physdev_match));
150} 143}
151 144
152static void __exit xt_physdev_fini(void) 145static void __exit physdev_mt_exit(void)
153{ 146{
154 xt_unregister_matches(xt_physdev_match, ARRAY_SIZE(xt_physdev_match)); 147 xt_unregister_matches(physdev_mt_reg, ARRAY_SIZE(physdev_mt_reg));
155} 148}
156 149
157module_init(xt_physdev_init); 150module_init(physdev_mt_init);
158module_exit(xt_physdev_fini); 151module_exit(physdev_mt_exit);
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index a52925f12f35..7936f7e23254 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -11,65 +11,66 @@
11#include <linux/if_packet.h> 11#include <linux/if_packet.h>
12#include <linux/in.h> 12#include <linux/in.h>
13#include <linux/ip.h> 13#include <linux/ip.h>
14#include <linux/ipv6.h>
14 15
15#include <linux/netfilter/xt_pkttype.h> 16#include <linux/netfilter/xt_pkttype.h>
16#include <linux/netfilter/x_tables.h> 17#include <linux/netfilter/x_tables.h>
17 18
18MODULE_LICENSE("GPL"); 19MODULE_LICENSE("GPL");
19MODULE_AUTHOR("Michal Ludvig <michal@logix.cz>"); 20MODULE_AUTHOR("Michal Ludvig <michal@logix.cz>");
20MODULE_DESCRIPTION("IP tables match to match on linklayer packet type"); 21MODULE_DESCRIPTION("Xtables: link layer packet type match");
21MODULE_ALIAS("ipt_pkttype"); 22MODULE_ALIAS("ipt_pkttype");
22MODULE_ALIAS("ip6t_pkttype"); 23MODULE_ALIAS("ip6t_pkttype");
23 24
24static bool match(const struct sk_buff *skb, 25static bool
25 const struct net_device *in, 26pkttype_mt(const struct sk_buff *skb, const struct net_device *in,
26 const struct net_device *out, 27 const struct net_device *out, const struct xt_match *match,
27 const struct xt_match *match, 28 const void *matchinfo, int offset, unsigned int protoff,
28 const void *matchinfo, 29 bool *hotdrop)
29 int offset,
30 unsigned int protoff,
31 bool *hotdrop)
32{ 30{
33 u_int8_t type;
34 const struct xt_pkttype_info *info = matchinfo; 31 const struct xt_pkttype_info *info = matchinfo;
32 u_int8_t type;
35 33
36 if (skb->pkt_type == PACKET_LOOPBACK) 34 if (skb->pkt_type != PACKET_LOOPBACK)
37 type = MULTICAST(ip_hdr(skb)->daddr)
38 ? PACKET_MULTICAST
39 : PACKET_BROADCAST;
40 else
41 type = skb->pkt_type; 35 type = skb->pkt_type;
36 else if (match->family == AF_INET &&
37 ipv4_is_multicast(ip_hdr(skb)->daddr))
38 type = PACKET_MULTICAST;
39 else if (match->family == AF_INET6 &&
40 ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF)
41 type = PACKET_MULTICAST;
42 else
43 type = PACKET_BROADCAST;
42 44
43 return (type == info->pkttype) ^ info->invert; 45 return (type == info->pkttype) ^ info->invert;
44} 46}
45 47
46static struct xt_match xt_pkttype_match[] __read_mostly = { 48static struct xt_match pkttype_mt_reg[] __read_mostly = {
47 { 49 {
48 .name = "pkttype", 50 .name = "pkttype",
49 .family = AF_INET, 51 .family = AF_INET,
50 .match = match, 52 .match = pkttype_mt,
51 .matchsize = sizeof(struct xt_pkttype_info), 53 .matchsize = sizeof(struct xt_pkttype_info),
52 .me = THIS_MODULE, 54 .me = THIS_MODULE,
53 }, 55 },
54 { 56 {
55 .name = "pkttype", 57 .name = "pkttype",
56 .family = AF_INET6, 58 .family = AF_INET6,
57 .match = match, 59 .match = pkttype_mt,
58 .matchsize = sizeof(struct xt_pkttype_info), 60 .matchsize = sizeof(struct xt_pkttype_info),
59 .me = THIS_MODULE, 61 .me = THIS_MODULE,
60 }, 62 },
61}; 63};
62 64
63static int __init xt_pkttype_init(void) 65static int __init pkttype_mt_init(void)
64{ 66{
65 return xt_register_matches(xt_pkttype_match, 67 return xt_register_matches(pkttype_mt_reg, ARRAY_SIZE(pkttype_mt_reg));
66 ARRAY_SIZE(xt_pkttype_match));
67} 68}
68 69
69static void __exit xt_pkttype_fini(void) 70static void __exit pkttype_mt_exit(void)
70{ 71{
71 xt_unregister_matches(xt_pkttype_match, ARRAY_SIZE(xt_pkttype_match)); 72 xt_unregister_matches(pkttype_mt_reg, ARRAY_SIZE(pkttype_mt_reg));
72} 73}
73 74
74module_init(xt_pkttype_init); 75module_init(pkttype_mt_init);
75module_exit(xt_pkttype_fini); 76module_exit(pkttype_mt_exit);
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 6d6d3b7fcbb5..9e918add2282 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -13,37 +13,38 @@
13#include <linux/init.h> 13#include <linux/init.h>
14#include <net/xfrm.h> 14#include <net/xfrm.h>
15 15
16#include <linux/netfilter.h>
16#include <linux/netfilter/xt_policy.h> 17#include <linux/netfilter/xt_policy.h>
17#include <linux/netfilter/x_tables.h> 18#include <linux/netfilter/x_tables.h>
18 19
19MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 20MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
20MODULE_DESCRIPTION("Xtables IPsec policy matching module"); 21MODULE_DESCRIPTION("Xtables: IPsec policy match");
21MODULE_LICENSE("GPL"); 22MODULE_LICENSE("GPL");
22 23
23static inline bool 24static inline bool
24xt_addr_cmp(const union xt_policy_addr *a1, const union xt_policy_addr *m, 25xt_addr_cmp(const union nf_inet_addr *a1, const union nf_inet_addr *m,
25 const union xt_policy_addr *a2, unsigned short family) 26 const union nf_inet_addr *a2, unsigned short family)
26{ 27{
27 switch (family) { 28 switch (family) {
28 case AF_INET: 29 case AF_INET:
29 return !((a1->a4.s_addr ^ a2->a4.s_addr) & m->a4.s_addr); 30 return ((a1->ip ^ a2->ip) & m->ip) == 0;
30 case AF_INET6: 31 case AF_INET6:
31 return !ipv6_masked_addr_cmp(&a1->a6, &m->a6, &a2->a6); 32 return ipv6_masked_addr_cmp(&a1->in6, &m->in6, &a2->in6) == 0;
32 } 33 }
33 return false; 34 return false;
34} 35}
35 36
36static inline bool 37static bool
37match_xfrm_state(const struct xfrm_state *x, const struct xt_policy_elem *e, 38match_xfrm_state(const struct xfrm_state *x, const struct xt_policy_elem *e,
38 unsigned short family) 39 unsigned short family)
39{ 40{
40#define MATCH_ADDR(x,y,z) (!e->match.x || \ 41#define MATCH_ADDR(x,y,z) (!e->match.x || \
41 (xt_addr_cmp(&e->x, &e->y, z, family) \ 42 (xt_addr_cmp(&e->x, &e->y, (const union nf_inet_addr *)(z), family) \
42 ^ e->invert.x)) 43 ^ e->invert.x))
43#define MATCH(x,y) (!e->match.x || ((e->x == (y)) ^ e->invert.x)) 44#define MATCH(x,y) (!e->match.x || ((e->x == (y)) ^ e->invert.x))
44 45
45 return MATCH_ADDR(saddr, smask, (union xt_policy_addr *)&x->props.saddr) && 46 return MATCH_ADDR(saddr, smask, &x->props.saddr) &&
46 MATCH_ADDR(daddr, dmask, (union xt_policy_addr *)&x->id.daddr) && 47 MATCH_ADDR(daddr, dmask, &x->id.daddr) &&
47 MATCH(proto, x->id.proto) && 48 MATCH(proto, x->id.proto) &&
48 MATCH(mode, x->props.mode) && 49 MATCH(mode, x->props.mode) &&
49 MATCH(spi, x->id.spi) && 50 MATCH(spi, x->id.spi) &&
@@ -108,14 +109,11 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info,
108 return strict ? i == info->len : 0; 109 return strict ? i == info->len : 0;
109} 110}
110 111
111static bool match(const struct sk_buff *skb, 112static bool
112 const struct net_device *in, 113policy_mt(const struct sk_buff *skb, const struct net_device *in,
113 const struct net_device *out, 114 const struct net_device *out, const struct xt_match *match,
114 const struct xt_match *match, 115 const void *matchinfo, int offset, unsigned int protoff,
115 const void *matchinfo, 116 bool *hotdrop)
116 int offset,
117 unsigned int protoff,
118 bool *hotdrop)
119{ 117{
120 const struct xt_policy_info *info = matchinfo; 118 const struct xt_policy_info *info = matchinfo;
121 int ret; 119 int ret;
@@ -133,9 +131,10 @@ static bool match(const struct sk_buff *skb,
133 return ret; 131 return ret;
134} 132}
135 133
136static bool checkentry(const char *tablename, const void *ip_void, 134static bool
137 const struct xt_match *match, 135policy_mt_check(const char *tablename, const void *ip_void,
138 void *matchinfo, unsigned int hook_mask) 136 const struct xt_match *match, void *matchinfo,
137 unsigned int hook_mask)
139{ 138{
140 struct xt_policy_info *info = matchinfo; 139 struct xt_policy_info *info = matchinfo;
141 140
@@ -144,14 +143,13 @@ static bool checkentry(const char *tablename, const void *ip_void,
144 "outgoing policy selected\n"); 143 "outgoing policy selected\n");
145 return false; 144 return false;
146 } 145 }
147 /* hook values are equal for IPv4 and IPv6 */ 146 if (hook_mask & (1 << NF_INET_PRE_ROUTING | 1 << NF_INET_LOCAL_IN)
148 if (hook_mask & (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_LOCAL_IN)
149 && info->flags & XT_POLICY_MATCH_OUT) { 147 && info->flags & XT_POLICY_MATCH_OUT) {
150 printk(KERN_ERR "xt_policy: output policy not valid in " 148 printk(KERN_ERR "xt_policy: output policy not valid in "
151 "PRE_ROUTING and INPUT\n"); 149 "PRE_ROUTING and INPUT\n");
152 return false; 150 return false;
153 } 151 }
154 if (hook_mask & (1 << NF_IP_POST_ROUTING | 1 << NF_IP_LOCAL_OUT) 152 if (hook_mask & (1 << NF_INET_POST_ROUTING | 1 << NF_INET_LOCAL_OUT)
155 && info->flags & XT_POLICY_MATCH_IN) { 153 && info->flags & XT_POLICY_MATCH_IN) {
156 printk(KERN_ERR "xt_policy: input policy not valid in " 154 printk(KERN_ERR "xt_policy: input policy not valid in "
157 "POST_ROUTING and OUTPUT\n"); 155 "POST_ROUTING and OUTPUT\n");
@@ -164,37 +162,36 @@ static bool checkentry(const char *tablename, const void *ip_void,
164 return true; 162 return true;
165} 163}
166 164
167static struct xt_match xt_policy_match[] __read_mostly = { 165static struct xt_match policy_mt_reg[] __read_mostly = {
168 { 166 {
169 .name = "policy", 167 .name = "policy",
170 .family = AF_INET, 168 .family = AF_INET,
171 .checkentry = checkentry, 169 .checkentry = policy_mt_check,
172 .match = match, 170 .match = policy_mt,
173 .matchsize = sizeof(struct xt_policy_info), 171 .matchsize = sizeof(struct xt_policy_info),
174 .me = THIS_MODULE, 172 .me = THIS_MODULE,
175 }, 173 },
176 { 174 {
177 .name = "policy", 175 .name = "policy",
178 .family = AF_INET6, 176 .family = AF_INET6,
179 .checkentry = checkentry, 177 .checkentry = policy_mt_check,
180 .match = match, 178 .match = policy_mt,
181 .matchsize = sizeof(struct xt_policy_info), 179 .matchsize = sizeof(struct xt_policy_info),
182 .me = THIS_MODULE, 180 .me = THIS_MODULE,
183 }, 181 },
184}; 182};
185 183
186static int __init init(void) 184static int __init policy_mt_init(void)
187{ 185{
188 return xt_register_matches(xt_policy_match, 186 return xt_register_matches(policy_mt_reg, ARRAY_SIZE(policy_mt_reg));
189 ARRAY_SIZE(xt_policy_match));
190} 187}
191 188
192static void __exit fini(void) 189static void __exit policy_mt_exit(void)
193{ 190{
194 xt_unregister_matches(xt_policy_match, ARRAY_SIZE(xt_policy_match)); 191 xt_unregister_matches(policy_mt_reg, ARRAY_SIZE(policy_mt_reg));
195} 192}
196 193
197module_init(init); 194module_init(policy_mt_init);
198module_exit(fini); 195module_exit(policy_mt_exit);
199MODULE_ALIAS("ipt_policy"); 196MODULE_ALIAS("ipt_policy");
200MODULE_ALIAS("ip6t_policy"); 197MODULE_ALIAS("ip6t_policy");
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index dae97445b87b..3b021d0c522a 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -11,16 +11,17 @@
11 11
12MODULE_LICENSE("GPL"); 12MODULE_LICENSE("GPL");
13MODULE_AUTHOR("Sam Johnston <samj@samj.net>"); 13MODULE_AUTHOR("Sam Johnston <samj@samj.net>");
14MODULE_DESCRIPTION("Xtables: countdown quota match");
14MODULE_ALIAS("ipt_quota"); 15MODULE_ALIAS("ipt_quota");
15MODULE_ALIAS("ip6t_quota"); 16MODULE_ALIAS("ip6t_quota");
16 17
17static DEFINE_SPINLOCK(quota_lock); 18static DEFINE_SPINLOCK(quota_lock);
18 19
19static bool 20static bool
20match(const struct sk_buff *skb, 21quota_mt(const struct sk_buff *skb, const struct net_device *in,
21 const struct net_device *in, const struct net_device *out, 22 const struct net_device *out, const struct xt_match *match,
22 const struct xt_match *match, const void *matchinfo, 23 const void *matchinfo, int offset, unsigned int protoff,
23 int offset, unsigned int protoff, bool *hotdrop) 24 bool *hotdrop)
24{ 25{
25 struct xt_quota_info *q = 26 struct xt_quota_info *q =
26 ((const struct xt_quota_info *)matchinfo)->master; 27 ((const struct xt_quota_info *)matchinfo)->master;
@@ -40,9 +41,9 @@ match(const struct sk_buff *skb,
40} 41}
41 42
42static bool 43static bool
43checkentry(const char *tablename, const void *entry, 44quota_mt_check(const char *tablename, const void *entry,
44 const struct xt_match *match, void *matchinfo, 45 const struct xt_match *match, void *matchinfo,
45 unsigned int hook_mask) 46 unsigned int hook_mask)
46{ 47{
47 struct xt_quota_info *q = matchinfo; 48 struct xt_quota_info *q = matchinfo;
48 49
@@ -53,34 +54,34 @@ checkentry(const char *tablename, const void *entry,
53 return true; 54 return true;
54} 55}
55 56
56static struct xt_match xt_quota_match[] __read_mostly = { 57static struct xt_match quota_mt_reg[] __read_mostly = {
57 { 58 {
58 .name = "quota", 59 .name = "quota",
59 .family = AF_INET, 60 .family = AF_INET,
60 .checkentry = checkentry, 61 .checkentry = quota_mt_check,
61 .match = match, 62 .match = quota_mt,
62 .matchsize = sizeof(struct xt_quota_info), 63 .matchsize = sizeof(struct xt_quota_info),
63 .me = THIS_MODULE 64 .me = THIS_MODULE
64 }, 65 },
65 { 66 {
66 .name = "quota", 67 .name = "quota",
67 .family = AF_INET6, 68 .family = AF_INET6,
68 .checkentry = checkentry, 69 .checkentry = quota_mt_check,
69 .match = match, 70 .match = quota_mt,
70 .matchsize = sizeof(struct xt_quota_info), 71 .matchsize = sizeof(struct xt_quota_info),
71 .me = THIS_MODULE 72 .me = THIS_MODULE
72 }, 73 },
73}; 74};
74 75
75static int __init xt_quota_init(void) 76static int __init quota_mt_init(void)
76{ 77{
77 return xt_register_matches(xt_quota_match, ARRAY_SIZE(xt_quota_match)); 78 return xt_register_matches(quota_mt_reg, ARRAY_SIZE(quota_mt_reg));
78} 79}
79 80
80static void __exit xt_quota_fini(void) 81static void __exit quota_mt_exit(void)
81{ 82{
82 xt_unregister_matches(xt_quota_match, ARRAY_SIZE(xt_quota_match)); 83 xt_unregister_matches(quota_mt_reg, ARRAY_SIZE(quota_mt_reg));
83} 84}
84 85
85module_init(xt_quota_init); 86module_init(quota_mt_init);
86module_exit(xt_quota_fini); 87module_exit(quota_mt_exit);
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
new file mode 100644
index 000000000000..fdb86a515146
--- /dev/null
+++ b/net/netfilter/xt_rateest.c
@@ -0,0 +1,178 @@
1/*
2 * (C) 2007 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#include <linux/module.h>
9#include <linux/skbuff.h>
10#include <linux/gen_stats.h>
11
12#include <linux/netfilter/x_tables.h>
13#include <linux/netfilter/xt_rateest.h>
14#include <net/netfilter/xt_rateest.h>
15
16
17static bool xt_rateest_mt(const struct sk_buff *skb,
18 const struct net_device *in,
19 const struct net_device *out,
20 const struct xt_match *match,
21 const void *matchinfo,
22 int offset,
23 unsigned int protoff,
24 bool *hotdrop)
25{
26 const struct xt_rateest_match_info *info = matchinfo;
27 struct gnet_stats_rate_est *r;
28 u_int32_t bps1, bps2, pps1, pps2;
29 bool ret = true;
30
31 spin_lock_bh(&info->est1->lock);
32 r = &info->est1->rstats;
33 if (info->flags & XT_RATEEST_MATCH_DELTA) {
34 bps1 = info->bps1 >= r->bps ? info->bps1 - r->bps : 0;
35 pps1 = info->pps1 >= r->pps ? info->pps1 - r->pps : 0;
36 } else {
37 bps1 = r->bps;
38 pps1 = r->pps;
39 }
40 spin_unlock_bh(&info->est1->lock);
41
42 if (info->flags & XT_RATEEST_MATCH_ABS) {
43 bps2 = info->bps2;
44 pps2 = info->pps2;
45 } else {
46 spin_lock_bh(&info->est2->lock);
47 r = &info->est2->rstats;
48 if (info->flags & XT_RATEEST_MATCH_DELTA) {
49 bps2 = info->bps2 >= r->bps ? info->bps2 - r->bps : 0;
50 pps2 = info->pps2 >= r->pps ? info->pps2 - r->pps : 0;
51 } else {
52 bps2 = r->bps;
53 pps2 = r->pps;
54 }
55 spin_unlock_bh(&info->est2->lock);
56 }
57
58 switch (info->mode) {
59 case XT_RATEEST_MATCH_LT:
60 if (info->flags & XT_RATEEST_MATCH_BPS)
61 ret &= bps1 < bps2;
62 if (info->flags & XT_RATEEST_MATCH_PPS)
63 ret &= pps1 < pps2;
64 break;
65 case XT_RATEEST_MATCH_GT:
66 if (info->flags & XT_RATEEST_MATCH_BPS)
67 ret &= bps1 > bps2;
68 if (info->flags & XT_RATEEST_MATCH_PPS)
69 ret &= pps1 > pps2;
70 break;
71 case XT_RATEEST_MATCH_EQ:
72 if (info->flags & XT_RATEEST_MATCH_BPS)
73 ret &= bps1 == bps2;
74 if (info->flags & XT_RATEEST_MATCH_PPS)
75 ret &= pps2 == pps2;
76 break;
77 }
78
79 ret ^= info->flags & XT_RATEEST_MATCH_INVERT ? true : false;
80 return ret;
81}
82
83static bool xt_rateest_mt_checkentry(const char *tablename,
84 const void *ip,
85 const struct xt_match *match,
86 void *matchinfo,
87 unsigned int hook_mask)
88{
89 struct xt_rateest_match_info *info = (void *)matchinfo;
90 struct xt_rateest *est1, *est2;
91
92 if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS |
93 XT_RATEEST_MATCH_REL)) != 1)
94 goto err1;
95
96 if (!(info->flags & (XT_RATEEST_MATCH_BPS | XT_RATEEST_MATCH_PPS)))
97 goto err1;
98
99 switch (info->mode) {
100 case XT_RATEEST_MATCH_EQ:
101 case XT_RATEEST_MATCH_LT:
102 case XT_RATEEST_MATCH_GT:
103 break;
104 default:
105 goto err1;
106 }
107
108 est1 = xt_rateest_lookup(info->name1);
109 if (!est1)
110 goto err1;
111
112 if (info->flags & XT_RATEEST_MATCH_REL) {
113 est2 = xt_rateest_lookup(info->name2);
114 if (!est2)
115 goto err2;
116 } else
117 est2 = NULL;
118
119
120 info->est1 = est1;
121 info->est2 = est2;
122 return true;
123
124err2:
125 xt_rateest_put(est1);
126err1:
127 return false;
128}
129
130static void xt_rateest_mt_destroy(const struct xt_match *match,
131 void *matchinfo)
132{
133 struct xt_rateest_match_info *info = (void *)matchinfo;
134
135 xt_rateest_put(info->est1);
136 if (info->est2)
137 xt_rateest_put(info->est2);
138}
139
140static struct xt_match xt_rateest_match[] __read_mostly = {
141 {
142 .family = AF_INET,
143 .name = "rateest",
144 .match = xt_rateest_mt,
145 .checkentry = xt_rateest_mt_checkentry,
146 .destroy = xt_rateest_mt_destroy,
147 .matchsize = sizeof(struct xt_rateest_match_info),
148 .me = THIS_MODULE,
149 },
150 {
151 .family = AF_INET6,
152 .name = "rateest",
153 .match = xt_rateest_mt,
154 .checkentry = xt_rateest_mt_checkentry,
155 .destroy = xt_rateest_mt_destroy,
156 .matchsize = sizeof(struct xt_rateest_match_info),
157 .me = THIS_MODULE,
158 },
159};
160
161static int __init xt_rateest_mt_init(void)
162{
163 return xt_register_matches(xt_rateest_match,
164 ARRAY_SIZE(xt_rateest_match));
165}
166
167static void __exit xt_rateest_mt_fini(void)
168{
169 xt_unregister_matches(xt_rateest_match, ARRAY_SIZE(xt_rateest_match));
170}
171
172MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
173MODULE_LICENSE("GPL");
174MODULE_DESCRIPTION("xtables rate estimator match");
175MODULE_ALIAS("ipt_rateest");
176MODULE_ALIAS("ip6t_rateest");
177module_init(xt_rateest_mt_init);
178module_exit(xt_rateest_mt_fini);
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index cc3e76d77a99..7df1627c536f 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -18,18 +18,14 @@
18 18
19MODULE_AUTHOR("Sampsa Ranta <sampsa@netsonic.fi>"); 19MODULE_AUTHOR("Sampsa Ranta <sampsa@netsonic.fi>");
20MODULE_LICENSE("GPL"); 20MODULE_LICENSE("GPL");
21MODULE_DESCRIPTION("X_tables realm match"); 21MODULE_DESCRIPTION("Xtables: Routing realm match");
22MODULE_ALIAS("ipt_realm"); 22MODULE_ALIAS("ipt_realm");
23 23
24static bool 24static bool
25match(const struct sk_buff *skb, 25realm_mt(const struct sk_buff *skb, const struct net_device *in,
26 const struct net_device *in, 26 const struct net_device *out, const struct xt_match *match,
27 const struct net_device *out, 27 const void *matchinfo, int offset, unsigned int protoff,
28 const struct xt_match *match, 28 bool *hotdrop)
29 const void *matchinfo,
30 int offset,
31 unsigned int protoff,
32 bool *hotdrop)
33{ 29{
34 const struct xt_realm_info *info = matchinfo; 30 const struct xt_realm_info *info = matchinfo;
35 const struct dst_entry *dst = skb->dst; 31 const struct dst_entry *dst = skb->dst;
@@ -37,25 +33,25 @@ match(const struct sk_buff *skb,
37 return (info->id == (dst->tclassid & info->mask)) ^ info->invert; 33 return (info->id == (dst->tclassid & info->mask)) ^ info->invert;
38} 34}
39 35
40static struct xt_match realm_match __read_mostly = { 36static struct xt_match realm_mt_reg __read_mostly = {
41 .name = "realm", 37 .name = "realm",
42 .match = match, 38 .match = realm_mt,
43 .matchsize = sizeof(struct xt_realm_info), 39 .matchsize = sizeof(struct xt_realm_info),
44 .hooks = (1 << NF_IP_POST_ROUTING) | (1 << NF_IP_FORWARD) | 40 .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_FORWARD) |
45 (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_LOCAL_IN), 41 (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN),
46 .family = AF_INET, 42 .family = AF_INET,
47 .me = THIS_MODULE 43 .me = THIS_MODULE
48}; 44};
49 45
50static int __init xt_realm_init(void) 46static int __init realm_mt_init(void)
51{ 47{
52 return xt_register_match(&realm_match); 48 return xt_register_match(&realm_mt_reg);
53} 49}
54 50
55static void __exit xt_realm_fini(void) 51static void __exit realm_mt_exit(void)
56{ 52{
57 xt_unregister_match(&realm_match); 53 xt_unregister_match(&realm_mt_reg);
58} 54}
59 55
60module_init(xt_realm_init); 56module_init(realm_mt_init);
61module_exit(xt_realm_fini); 57module_exit(realm_mt_exit);
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 3358273a47b7..b718ec64333d 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -11,7 +11,7 @@
11 11
12MODULE_LICENSE("GPL"); 12MODULE_LICENSE("GPL");
13MODULE_AUTHOR("Kiran Kumar Immidi"); 13MODULE_AUTHOR("Kiran Kumar Immidi");
14MODULE_DESCRIPTION("Match for SCTP protocol packets"); 14MODULE_DESCRIPTION("Xtables: SCTP protocol packet match");
15MODULE_ALIAS("ipt_sctp"); 15MODULE_ALIAS("ipt_sctp");
16MODULE_ALIAS("ip6t_sctp"); 16MODULE_ALIAS("ip6t_sctp");
17 17
@@ -116,14 +116,9 @@ match_packet(const struct sk_buff *skb,
116} 116}
117 117
118static bool 118static bool
119match(const struct sk_buff *skb, 119sctp_mt(const struct sk_buff *skb, const struct net_device *in,
120 const struct net_device *in, 120 const struct net_device *out, const struct xt_match *match,
121 const struct net_device *out, 121 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
122 const struct xt_match *match,
123 const void *matchinfo,
124 int offset,
125 unsigned int protoff,
126 bool *hotdrop)
127{ 122{
128 const struct xt_sctp_info *info = matchinfo; 123 const struct xt_sctp_info *info = matchinfo;
129 sctp_sctphdr_t _sh, *sh; 124 sctp_sctphdr_t _sh, *sh;
@@ -153,11 +148,9 @@ match(const struct sk_buff *skb,
153} 148}
154 149
155static bool 150static bool
156checkentry(const char *tablename, 151sctp_mt_check(const char *tablename, const void *inf,
157 const void *inf, 152 const struct xt_match *match, void *matchinfo,
158 const struct xt_match *match, 153 unsigned int hook_mask)
159 void *matchinfo,
160 unsigned int hook_mask)
161{ 154{
162 const struct xt_sctp_info *info = matchinfo; 155 const struct xt_sctp_info *info = matchinfo;
163 156
@@ -171,12 +164,12 @@ checkentry(const char *tablename,
171 | SCTP_CHUNK_MATCH_ONLY))); 164 | SCTP_CHUNK_MATCH_ONLY)));
172} 165}
173 166
174static struct xt_match xt_sctp_match[] __read_mostly = { 167static struct xt_match sctp_mt_reg[] __read_mostly = {
175 { 168 {
176 .name = "sctp", 169 .name = "sctp",
177 .family = AF_INET, 170 .family = AF_INET,
178 .checkentry = checkentry, 171 .checkentry = sctp_mt_check,
179 .match = match, 172 .match = sctp_mt,
180 .matchsize = sizeof(struct xt_sctp_info), 173 .matchsize = sizeof(struct xt_sctp_info),
181 .proto = IPPROTO_SCTP, 174 .proto = IPPROTO_SCTP,
182 .me = THIS_MODULE 175 .me = THIS_MODULE
@@ -184,23 +177,23 @@ static struct xt_match xt_sctp_match[] __read_mostly = {
184 { 177 {
185 .name = "sctp", 178 .name = "sctp",
186 .family = AF_INET6, 179 .family = AF_INET6,
187 .checkentry = checkentry, 180 .checkentry = sctp_mt_check,
188 .match = match, 181 .match = sctp_mt,
189 .matchsize = sizeof(struct xt_sctp_info), 182 .matchsize = sizeof(struct xt_sctp_info),
190 .proto = IPPROTO_SCTP, 183 .proto = IPPROTO_SCTP,
191 .me = THIS_MODULE 184 .me = THIS_MODULE
192 }, 185 },
193}; 186};
194 187
195static int __init xt_sctp_init(void) 188static int __init sctp_mt_init(void)
196{ 189{
197 return xt_register_matches(xt_sctp_match, ARRAY_SIZE(xt_sctp_match)); 190 return xt_register_matches(sctp_mt_reg, ARRAY_SIZE(sctp_mt_reg));
198} 191}
199 192
200static void __exit xt_sctp_fini(void) 193static void __exit sctp_mt_exit(void)
201{ 194{
202 xt_unregister_matches(xt_sctp_match, ARRAY_SIZE(xt_sctp_match)); 195 xt_unregister_matches(sctp_mt_reg, ARRAY_SIZE(sctp_mt_reg));
203} 196}
204 197
205module_init(xt_sctp_init); 198module_init(sctp_mt_init);
206module_exit(xt_sctp_fini); 199module_exit(sctp_mt_exit);
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index e0a528df19a7..a776dc36a193 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -21,14 +21,10 @@ MODULE_ALIAS("ipt_state");
21MODULE_ALIAS("ip6t_state"); 21MODULE_ALIAS("ip6t_state");
22 22
23static bool 23static bool
24match(const struct sk_buff *skb, 24state_mt(const struct sk_buff *skb, const struct net_device *in,
25 const struct net_device *in, 25 const struct net_device *out, const struct xt_match *match,
26 const struct net_device *out, 26 const void *matchinfo, int offset, unsigned int protoff,
27 const struct xt_match *match, 27 bool *hotdrop)
28 const void *matchinfo,
29 int offset,
30 unsigned int protoff,
31 bool *hotdrop)
32{ 28{
33 const struct xt_state_info *sinfo = matchinfo; 29 const struct xt_state_info *sinfo = matchinfo;
34 enum ip_conntrack_info ctinfo; 30 enum ip_conntrack_info ctinfo;
@@ -44,56 +40,54 @@ match(const struct sk_buff *skb,
44 return (sinfo->statemask & statebit); 40 return (sinfo->statemask & statebit);
45} 41}
46 42
47static bool check(const char *tablename, 43static bool
48 const void *inf, 44state_mt_check(const char *tablename, const void *inf,
49 const struct xt_match *match, 45 const struct xt_match *match, void *matchinfo,
50 void *matchinfo, 46 unsigned int hook_mask)
51 unsigned int hook_mask)
52{ 47{
53 if (nf_ct_l3proto_try_module_get(match->family) < 0) { 48 if (nf_ct_l3proto_try_module_get(match->family) < 0) {
54 printk(KERN_WARNING "can't load conntrack support for " 49 printk(KERN_WARNING "can't load conntrack support for "
55 "proto=%d\n", match->family); 50 "proto=%u\n", match->family);
56 return false; 51 return false;
57 } 52 }
58 return true; 53 return true;
59} 54}
60 55
61static void 56static void state_mt_destroy(const struct xt_match *match, void *matchinfo)
62destroy(const struct xt_match *match, void *matchinfo)
63{ 57{
64 nf_ct_l3proto_module_put(match->family); 58 nf_ct_l3proto_module_put(match->family);
65} 59}
66 60
67static struct xt_match xt_state_match[] __read_mostly = { 61static struct xt_match state_mt_reg[] __read_mostly = {
68 { 62 {
69 .name = "state", 63 .name = "state",
70 .family = AF_INET, 64 .family = AF_INET,
71 .checkentry = check, 65 .checkentry = state_mt_check,
72 .match = match, 66 .match = state_mt,
73 .destroy = destroy, 67 .destroy = state_mt_destroy,
74 .matchsize = sizeof(struct xt_state_info), 68 .matchsize = sizeof(struct xt_state_info),
75 .me = THIS_MODULE, 69 .me = THIS_MODULE,
76 }, 70 },
77 { 71 {
78 .name = "state", 72 .name = "state",
79 .family = AF_INET6, 73 .family = AF_INET6,
80 .checkentry = check, 74 .checkentry = state_mt_check,
81 .match = match, 75 .match = state_mt,
82 .destroy = destroy, 76 .destroy = state_mt_destroy,
83 .matchsize = sizeof(struct xt_state_info), 77 .matchsize = sizeof(struct xt_state_info),
84 .me = THIS_MODULE, 78 .me = THIS_MODULE,
85 }, 79 },
86}; 80};
87 81
88static int __init xt_state_init(void) 82static int __init state_mt_init(void)
89{ 83{
90 return xt_register_matches(xt_state_match, ARRAY_SIZE(xt_state_match)); 84 return xt_register_matches(state_mt_reg, ARRAY_SIZE(state_mt_reg));
91} 85}
92 86
93static void __exit xt_state_fini(void) 87static void __exit state_mt_exit(void)
94{ 88{
95 xt_unregister_matches(xt_state_match, ARRAY_SIZE(xt_state_match)); 89 xt_unregister_matches(state_mt_reg, ARRAY_SIZE(state_mt_reg));
96} 90}
97 91
98module_init(xt_state_init); 92module_init(state_mt_init);
99module_exit(xt_state_fini); 93module_exit(state_mt_exit);
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 4089dae4e286..43133080da7d 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -18,17 +18,17 @@
18 18
19MODULE_LICENSE("GPL"); 19MODULE_LICENSE("GPL");
20MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 20MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
21MODULE_DESCRIPTION("xtables statistical match module"); 21MODULE_DESCRIPTION("Xtables: statistics-based matching (\"Nth\", random)");
22MODULE_ALIAS("ipt_statistic"); 22MODULE_ALIAS("ipt_statistic");
23MODULE_ALIAS("ip6t_statistic"); 23MODULE_ALIAS("ip6t_statistic");
24 24
25static DEFINE_SPINLOCK(nth_lock); 25static DEFINE_SPINLOCK(nth_lock);
26 26
27static bool 27static bool
28match(const struct sk_buff *skb, 28statistic_mt(const struct sk_buff *skb, const struct net_device *in,
29 const struct net_device *in, const struct net_device *out, 29 const struct net_device *out, const struct xt_match *match,
30 const struct xt_match *match, const void *matchinfo, 30 const void *matchinfo, int offset, unsigned int protoff,
31 int offset, unsigned int protoff, bool *hotdrop) 31 bool *hotdrop)
32{ 32{
33 struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo; 33 struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo;
34 bool ret = info->flags & XT_STATISTIC_INVERT; 34 bool ret = info->flags & XT_STATISTIC_INVERT;
@@ -53,9 +53,9 @@ match(const struct sk_buff *skb,
53} 53}
54 54
55static bool 55static bool
56checkentry(const char *tablename, const void *entry, 56statistic_mt_check(const char *tablename, const void *entry,
57 const struct xt_match *match, void *matchinfo, 57 const struct xt_match *match, void *matchinfo,
58 unsigned int hook_mask) 58 unsigned int hook_mask)
59{ 59{
60 struct xt_statistic_info *info = matchinfo; 60 struct xt_statistic_info *info = matchinfo;
61 61
@@ -66,36 +66,36 @@ checkentry(const char *tablename, const void *entry,
66 return true; 66 return true;
67} 67}
68 68
69static struct xt_match xt_statistic_match[] __read_mostly = { 69static struct xt_match statistic_mt_reg[] __read_mostly = {
70 { 70 {
71 .name = "statistic", 71 .name = "statistic",
72 .family = AF_INET, 72 .family = AF_INET,
73 .checkentry = checkentry, 73 .checkentry = statistic_mt_check,
74 .match = match, 74 .match = statistic_mt,
75 .matchsize = sizeof(struct xt_statistic_info), 75 .matchsize = sizeof(struct xt_statistic_info),
76 .me = THIS_MODULE, 76 .me = THIS_MODULE,
77 }, 77 },
78 { 78 {
79 .name = "statistic", 79 .name = "statistic",
80 .family = AF_INET6, 80 .family = AF_INET6,
81 .checkentry = checkentry, 81 .checkentry = statistic_mt_check,
82 .match = match, 82 .match = statistic_mt,
83 .matchsize = sizeof(struct xt_statistic_info), 83 .matchsize = sizeof(struct xt_statistic_info),
84 .me = THIS_MODULE, 84 .me = THIS_MODULE,
85 }, 85 },
86}; 86};
87 87
88static int __init xt_statistic_init(void) 88static int __init statistic_mt_init(void)
89{ 89{
90 return xt_register_matches(xt_statistic_match, 90 return xt_register_matches(statistic_mt_reg,
91 ARRAY_SIZE(xt_statistic_match)); 91 ARRAY_SIZE(statistic_mt_reg));
92} 92}
93 93
94static void __exit xt_statistic_fini(void) 94static void __exit statistic_mt_exit(void)
95{ 95{
96 xt_unregister_matches(xt_statistic_match, 96 xt_unregister_matches(statistic_mt_reg,
97 ARRAY_SIZE(xt_statistic_match)); 97 ARRAY_SIZE(statistic_mt_reg));
98} 98}
99 99
100module_init(xt_statistic_init); 100module_init(statistic_mt_init);
101module_exit(xt_statistic_fini); 101module_exit(statistic_mt_exit);
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 864133442cda..72f694d947f4 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -16,19 +16,16 @@
16#include <linux/textsearch.h> 16#include <linux/textsearch.h>
17 17
18MODULE_AUTHOR("Pablo Neira Ayuso <pablo@eurodev.net>"); 18MODULE_AUTHOR("Pablo Neira Ayuso <pablo@eurodev.net>");
19MODULE_DESCRIPTION("IP tables string match module"); 19MODULE_DESCRIPTION("Xtables: string-based matching");
20MODULE_LICENSE("GPL"); 20MODULE_LICENSE("GPL");
21MODULE_ALIAS("ipt_string"); 21MODULE_ALIAS("ipt_string");
22MODULE_ALIAS("ip6t_string"); 22MODULE_ALIAS("ip6t_string");
23 23
24static bool match(const struct sk_buff *skb, 24static bool
25 const struct net_device *in, 25string_mt(const struct sk_buff *skb, const struct net_device *in,
26 const struct net_device *out, 26 const struct net_device *out, const struct xt_match *match,
27 const struct xt_match *match, 27 const void *matchinfo, int offset, unsigned int protoff,
28 const void *matchinfo, 28 bool *hotdrop)
29 int offset,
30 unsigned int protoff,
31 bool *hotdrop)
32{ 29{
33 const struct xt_string_info *conf = matchinfo; 30 const struct xt_string_info *conf = matchinfo;
34 struct ts_state state; 31 struct ts_state state;
@@ -40,13 +37,12 @@ static bool match(const struct sk_buff *skb,
40 != UINT_MAX) ^ conf->invert; 37 != UINT_MAX) ^ conf->invert;
41} 38}
42 39
43#define STRING_TEXT_PRIV(m) ((struct xt_string_info *) m) 40#define STRING_TEXT_PRIV(m) ((struct xt_string_info *)(m))
44 41
45static bool checkentry(const char *tablename, 42static bool
46 const void *ip, 43string_mt_check(const char *tablename, const void *ip,
47 const struct xt_match *match, 44 const struct xt_match *match, void *matchinfo,
48 void *matchinfo, 45 unsigned int hook_mask)
49 unsigned int hook_mask)
50{ 46{
51 struct xt_string_info *conf = matchinfo; 47 struct xt_string_info *conf = matchinfo;
52 struct ts_config *ts_conf; 48 struct ts_config *ts_conf;
@@ -68,41 +64,41 @@ static bool checkentry(const char *tablename,
68 return true; 64 return true;
69} 65}
70 66
71static void destroy(const struct xt_match *match, void *matchinfo) 67static void string_mt_destroy(const struct xt_match *match, void *matchinfo)
72{ 68{
73 textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config); 69 textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config);
74} 70}
75 71
76static struct xt_match xt_string_match[] __read_mostly = { 72static struct xt_match string_mt_reg[] __read_mostly = {
77 { 73 {
78 .name = "string", 74 .name = "string",
79 .family = AF_INET, 75 .family = AF_INET,
80 .checkentry = checkentry, 76 .checkentry = string_mt_check,
81 .match = match, 77 .match = string_mt,
82 .destroy = destroy, 78 .destroy = string_mt_destroy,
83 .matchsize = sizeof(struct xt_string_info), 79 .matchsize = sizeof(struct xt_string_info),
84 .me = THIS_MODULE 80 .me = THIS_MODULE
85 }, 81 },
86 { 82 {
87 .name = "string", 83 .name = "string",
88 .family = AF_INET6, 84 .family = AF_INET6,
89 .checkentry = checkentry, 85 .checkentry = string_mt_check,
90 .match = match, 86 .match = string_mt,
91 .destroy = destroy, 87 .destroy = string_mt_destroy,
92 .matchsize = sizeof(struct xt_string_info), 88 .matchsize = sizeof(struct xt_string_info),
93 .me = THIS_MODULE 89 .me = THIS_MODULE
94 }, 90 },
95}; 91};
96 92
97static int __init xt_string_init(void) 93static int __init string_mt_init(void)
98{ 94{
99 return xt_register_matches(xt_string_match, ARRAY_SIZE(xt_string_match)); 95 return xt_register_matches(string_mt_reg, ARRAY_SIZE(string_mt_reg));
100} 96}
101 97
102static void __exit xt_string_fini(void) 98static void __exit string_mt_exit(void)
103{ 99{
104 xt_unregister_matches(xt_string_match, ARRAY_SIZE(xt_string_match)); 100 xt_unregister_matches(string_mt_reg, ARRAY_SIZE(string_mt_reg));
105} 101}
106 102
107module_init(xt_string_init); 103module_init(string_mt_init);
108module_exit(xt_string_fini); 104module_exit(string_mt_exit);
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index 84d401bfafad..d7a5b27fe81e 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -20,19 +20,15 @@
20 20
21MODULE_LICENSE("GPL"); 21MODULE_LICENSE("GPL");
22MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); 22MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
23MODULE_DESCRIPTION("iptables TCP MSS match module"); 23MODULE_DESCRIPTION("Xtables: TCP MSS match");
24MODULE_ALIAS("ipt_tcpmss"); 24MODULE_ALIAS("ipt_tcpmss");
25MODULE_ALIAS("ip6t_tcpmss"); 25MODULE_ALIAS("ip6t_tcpmss");
26 26
27static bool 27static bool
28match(const struct sk_buff *skb, 28tcpmss_mt(const struct sk_buff *skb, const struct net_device *in,
29 const struct net_device *in, 29 const struct net_device *out, const struct xt_match *match,
30 const struct net_device *out, 30 const void *matchinfo, int offset, unsigned int protoff,
31 const struct xt_match *match, 31 bool *hotdrop)
32 const void *matchinfo,
33 int offset,
34 unsigned int protoff,
35 bool *hotdrop)
36{ 32{
37 const struct xt_tcpmss_match_info *info = matchinfo; 33 const struct xt_tcpmss_match_info *info = matchinfo;
38 struct tcphdr _tcph, *th; 34 struct tcphdr _tcph, *th;
@@ -82,11 +78,11 @@ dropit:
82 return false; 78 return false;
83} 79}
84 80
85static struct xt_match xt_tcpmss_match[] __read_mostly = { 81static struct xt_match tcpmss_mt_reg[] __read_mostly = {
86 { 82 {
87 .name = "tcpmss", 83 .name = "tcpmss",
88 .family = AF_INET, 84 .family = AF_INET,
89 .match = match, 85 .match = tcpmss_mt,
90 .matchsize = sizeof(struct xt_tcpmss_match_info), 86 .matchsize = sizeof(struct xt_tcpmss_match_info),
91 .proto = IPPROTO_TCP, 87 .proto = IPPROTO_TCP,
92 .me = THIS_MODULE, 88 .me = THIS_MODULE,
@@ -94,23 +90,22 @@ static struct xt_match xt_tcpmss_match[] __read_mostly = {
94 { 90 {
95 .name = "tcpmss", 91 .name = "tcpmss",
96 .family = AF_INET6, 92 .family = AF_INET6,
97 .match = match, 93 .match = tcpmss_mt,
98 .matchsize = sizeof(struct xt_tcpmss_match_info), 94 .matchsize = sizeof(struct xt_tcpmss_match_info),
99 .proto = IPPROTO_TCP, 95 .proto = IPPROTO_TCP,
100 .me = THIS_MODULE, 96 .me = THIS_MODULE,
101 }, 97 },
102}; 98};
103 99
104static int __init xt_tcpmss_init(void) 100static int __init tcpmss_mt_init(void)
105{ 101{
106 return xt_register_matches(xt_tcpmss_match, 102 return xt_register_matches(tcpmss_mt_reg, ARRAY_SIZE(tcpmss_mt_reg));
107 ARRAY_SIZE(xt_tcpmss_match));
108} 103}
109 104
110static void __exit xt_tcpmss_fini(void) 105static void __exit tcpmss_mt_exit(void)
111{ 106{
112 xt_unregister_matches(xt_tcpmss_match, ARRAY_SIZE(xt_tcpmss_match)); 107 xt_unregister_matches(tcpmss_mt_reg, ARRAY_SIZE(tcpmss_mt_reg));
113} 108}
114 109
115module_init(xt_tcpmss_init); 110module_init(tcpmss_mt_init);
116module_exit(xt_tcpmss_fini); 111module_exit(tcpmss_mt_exit);
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index 223f9bded672..4fa3b669f691 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -10,7 +10,7 @@
10#include <linux/netfilter_ipv4/ip_tables.h> 10#include <linux/netfilter_ipv4/ip_tables.h>
11#include <linux/netfilter_ipv6/ip6_tables.h> 11#include <linux/netfilter_ipv6/ip6_tables.h>
12 12
13MODULE_DESCRIPTION("x_tables match for TCP and UDP(-Lite), supports IPv4 and IPv6"); 13MODULE_DESCRIPTION("Xtables: TCP, UDP and UDP-Lite match");
14MODULE_LICENSE("GPL"); 14MODULE_LICENSE("GPL");
15MODULE_ALIAS("xt_tcp"); 15MODULE_ALIAS("xt_tcp");
16MODULE_ALIAS("xt_udp"); 16MODULE_ALIAS("xt_udp");
@@ -68,14 +68,9 @@ tcp_find_option(u_int8_t option,
68} 68}
69 69
70static bool 70static bool
71tcp_match(const struct sk_buff *skb, 71tcp_mt(const struct sk_buff *skb, const struct net_device *in,
72 const struct net_device *in, 72 const struct net_device *out, const struct xt_match *match,
73 const struct net_device *out, 73 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
74 const struct xt_match *match,
75 const void *matchinfo,
76 int offset,
77 unsigned int protoff,
78 bool *hotdrop)
79{ 74{
80 struct tcphdr _tcph, *th; 75 struct tcphdr _tcph, *th;
81 const struct xt_tcp *tcpinfo = matchinfo; 76 const struct xt_tcp *tcpinfo = matchinfo;
@@ -134,11 +129,9 @@ tcp_match(const struct sk_buff *skb,
134 129
135/* Called when user tries to insert an entry of this type. */ 130/* Called when user tries to insert an entry of this type. */
136static bool 131static bool
137tcp_checkentry(const char *tablename, 132tcp_mt_check(const char *tablename, const void *info,
138 const void *info, 133 const struct xt_match *match, void *matchinfo,
139 const struct xt_match *match, 134 unsigned int hook_mask)
140 void *matchinfo,
141 unsigned int hook_mask)
142{ 135{
143 const struct xt_tcp *tcpinfo = matchinfo; 136 const struct xt_tcp *tcpinfo = matchinfo;
144 137
@@ -147,14 +140,9 @@ tcp_checkentry(const char *tablename,
147} 140}
148 141
149static bool 142static bool
150udp_match(const struct sk_buff *skb, 143udp_mt(const struct sk_buff *skb, const struct net_device *in,
151 const struct net_device *in, 144 const struct net_device *out, const struct xt_match *match,
152 const struct net_device *out, 145 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
153 const struct xt_match *match,
154 const void *matchinfo,
155 int offset,
156 unsigned int protoff,
157 bool *hotdrop)
158{ 146{
159 struct udphdr _udph, *uh; 147 struct udphdr _udph, *uh;
160 const struct xt_udp *udpinfo = matchinfo; 148 const struct xt_udp *udpinfo = matchinfo;
@@ -182,11 +170,9 @@ udp_match(const struct sk_buff *skb,
182 170
183/* Called when user tries to insert an entry of this type. */ 171/* Called when user tries to insert an entry of this type. */
184static bool 172static bool
185udp_checkentry(const char *tablename, 173udp_mt_check(const char *tablename, const void *info,
186 const void *info, 174 const struct xt_match *match, void *matchinfo,
187 const struct xt_match *match, 175 unsigned int hook_mask)
188 void *matchinfo,
189 unsigned int hook_mask)
190{ 176{
191 const struct xt_udp *udpinfo = matchinfo; 177 const struct xt_udp *udpinfo = matchinfo;
192 178
@@ -194,12 +180,12 @@ udp_checkentry(const char *tablename,
194 return !(udpinfo->invflags & ~XT_UDP_INV_MASK); 180 return !(udpinfo->invflags & ~XT_UDP_INV_MASK);
195} 181}
196 182
197static struct xt_match xt_tcpudp_match[] __read_mostly = { 183static struct xt_match tcpudp_mt_reg[] __read_mostly = {
198 { 184 {
199 .name = "tcp", 185 .name = "tcp",
200 .family = AF_INET, 186 .family = AF_INET,
201 .checkentry = tcp_checkentry, 187 .checkentry = tcp_mt_check,
202 .match = tcp_match, 188 .match = tcp_mt,
203 .matchsize = sizeof(struct xt_tcp), 189 .matchsize = sizeof(struct xt_tcp),
204 .proto = IPPROTO_TCP, 190 .proto = IPPROTO_TCP,
205 .me = THIS_MODULE, 191 .me = THIS_MODULE,
@@ -207,8 +193,8 @@ static struct xt_match xt_tcpudp_match[] __read_mostly = {
207 { 193 {
208 .name = "tcp", 194 .name = "tcp",
209 .family = AF_INET6, 195 .family = AF_INET6,
210 .checkentry = tcp_checkentry, 196 .checkentry = tcp_mt_check,
211 .match = tcp_match, 197 .match = tcp_mt,
212 .matchsize = sizeof(struct xt_tcp), 198 .matchsize = sizeof(struct xt_tcp),
213 .proto = IPPROTO_TCP, 199 .proto = IPPROTO_TCP,
214 .me = THIS_MODULE, 200 .me = THIS_MODULE,
@@ -216,8 +202,8 @@ static struct xt_match xt_tcpudp_match[] __read_mostly = {
216 { 202 {
217 .name = "udp", 203 .name = "udp",
218 .family = AF_INET, 204 .family = AF_INET,
219 .checkentry = udp_checkentry, 205 .checkentry = udp_mt_check,
220 .match = udp_match, 206 .match = udp_mt,
221 .matchsize = sizeof(struct xt_udp), 207 .matchsize = sizeof(struct xt_udp),
222 .proto = IPPROTO_UDP, 208 .proto = IPPROTO_UDP,
223 .me = THIS_MODULE, 209 .me = THIS_MODULE,
@@ -225,8 +211,8 @@ static struct xt_match xt_tcpudp_match[] __read_mostly = {
225 { 211 {
226 .name = "udp", 212 .name = "udp",
227 .family = AF_INET6, 213 .family = AF_INET6,
228 .checkentry = udp_checkentry, 214 .checkentry = udp_mt_check,
229 .match = udp_match, 215 .match = udp_mt,
230 .matchsize = sizeof(struct xt_udp), 216 .matchsize = sizeof(struct xt_udp),
231 .proto = IPPROTO_UDP, 217 .proto = IPPROTO_UDP,
232 .me = THIS_MODULE, 218 .me = THIS_MODULE,
@@ -234,8 +220,8 @@ static struct xt_match xt_tcpudp_match[] __read_mostly = {
234 { 220 {
235 .name = "udplite", 221 .name = "udplite",
236 .family = AF_INET, 222 .family = AF_INET,
237 .checkentry = udp_checkentry, 223 .checkentry = udp_mt_check,
238 .match = udp_match, 224 .match = udp_mt,
239 .matchsize = sizeof(struct xt_udp), 225 .matchsize = sizeof(struct xt_udp),
240 .proto = IPPROTO_UDPLITE, 226 .proto = IPPROTO_UDPLITE,
241 .me = THIS_MODULE, 227 .me = THIS_MODULE,
@@ -243,24 +229,23 @@ static struct xt_match xt_tcpudp_match[] __read_mostly = {
243 { 229 {
244 .name = "udplite", 230 .name = "udplite",
245 .family = AF_INET6, 231 .family = AF_INET6,
246 .checkentry = udp_checkentry, 232 .checkentry = udp_mt_check,
247 .match = udp_match, 233 .match = udp_mt,
248 .matchsize = sizeof(struct xt_udp), 234 .matchsize = sizeof(struct xt_udp),
249 .proto = IPPROTO_UDPLITE, 235 .proto = IPPROTO_UDPLITE,
250 .me = THIS_MODULE, 236 .me = THIS_MODULE,
251 }, 237 },
252}; 238};
253 239
254static int __init xt_tcpudp_init(void) 240static int __init tcpudp_mt_init(void)
255{ 241{
256 return xt_register_matches(xt_tcpudp_match, 242 return xt_register_matches(tcpudp_mt_reg, ARRAY_SIZE(tcpudp_mt_reg));
257 ARRAY_SIZE(xt_tcpudp_match));
258} 243}
259 244
260static void __exit xt_tcpudp_fini(void) 245static void __exit tcpudp_mt_exit(void)
261{ 246{
262 xt_unregister_matches(xt_tcpudp_match, ARRAY_SIZE(xt_tcpudp_match)); 247 xt_unregister_matches(tcpudp_mt_reg, ARRAY_SIZE(tcpudp_mt_reg));
263} 248}
264 249
265module_init(xt_tcpudp_init); 250module_init(tcpudp_mt_init);
266module_exit(xt_tcpudp_fini); 251module_exit(tcpudp_mt_exit);
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index f9c55dcd894b..e9a8794bc3ab 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -147,11 +147,10 @@ static void localtime_3(struct xtm *r, time_t time)
147 return; 147 return;
148} 148}
149 149
150static bool xt_time_match(const struct sk_buff *skb, 150static bool
151 const struct net_device *in, 151time_mt(const struct sk_buff *skb, const struct net_device *in,
152 const struct net_device *out, 152 const struct net_device *out, const struct xt_match *match,
153 const struct xt_match *match, const void *matchinfo, 153 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
154 int offset, unsigned int protoff, bool *hotdrop)
155{ 154{
156 const struct xt_time_info *info = matchinfo; 155 const struct xt_time_info *info = matchinfo;
157 unsigned int packet_time; 156 unsigned int packet_time;
@@ -216,9 +215,10 @@ static bool xt_time_match(const struct sk_buff *skb,
216 return true; 215 return true;
217} 216}
218 217
219static bool xt_time_check(const char *tablename, const void *ip, 218static bool
220 const struct xt_match *match, void *matchinfo, 219time_mt_check(const char *tablename, const void *ip,
221 unsigned int hook_mask) 220 const struct xt_match *match, void *matchinfo,
221 unsigned int hook_mask)
222{ 222{
223 struct xt_time_info *info = matchinfo; 223 struct xt_time_info *info = matchinfo;
224 224
@@ -232,39 +232,39 @@ static bool xt_time_check(const char *tablename, const void *ip,
232 return true; 232 return true;
233} 233}
234 234
235static struct xt_match xt_time_reg[] __read_mostly = { 235static struct xt_match time_mt_reg[] __read_mostly = {
236 { 236 {
237 .name = "time", 237 .name = "time",
238 .family = AF_INET, 238 .family = AF_INET,
239 .match = xt_time_match, 239 .match = time_mt,
240 .matchsize = sizeof(struct xt_time_info), 240 .matchsize = sizeof(struct xt_time_info),
241 .checkentry = xt_time_check, 241 .checkentry = time_mt_check,
242 .me = THIS_MODULE, 242 .me = THIS_MODULE,
243 }, 243 },
244 { 244 {
245 .name = "time", 245 .name = "time",
246 .family = AF_INET6, 246 .family = AF_INET6,
247 .match = xt_time_match, 247 .match = time_mt,
248 .matchsize = sizeof(struct xt_time_info), 248 .matchsize = sizeof(struct xt_time_info),
249 .checkentry = xt_time_check, 249 .checkentry = time_mt_check,
250 .me = THIS_MODULE, 250 .me = THIS_MODULE,
251 }, 251 },
252}; 252};
253 253
254static int __init xt_time_init(void) 254static int __init time_mt_init(void)
255{ 255{
256 return xt_register_matches(xt_time_reg, ARRAY_SIZE(xt_time_reg)); 256 return xt_register_matches(time_mt_reg, ARRAY_SIZE(time_mt_reg));
257} 257}
258 258
259static void __exit xt_time_exit(void) 259static void __exit time_mt_exit(void)
260{ 260{
261 xt_unregister_matches(xt_time_reg, ARRAY_SIZE(xt_time_reg)); 261 xt_unregister_matches(time_mt_reg, ARRAY_SIZE(time_mt_reg));
262} 262}
263 263
264module_init(xt_time_init); 264module_init(time_mt_init);
265module_exit(xt_time_exit); 265module_exit(time_mt_exit);
266MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>"); 266MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
267MODULE_DESCRIPTION("netfilter time match"); 267MODULE_DESCRIPTION("Xtables: time-based matching");
268MODULE_LICENSE("GPL"); 268MODULE_LICENSE("GPL");
269MODULE_ALIAS("ipt_time"); 269MODULE_ALIAS("ipt_time");
270MODULE_ALIAS("ip6t_time"); 270MODULE_ALIAS("ip6t_time");
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index af75b8c3f20b..9b8ed390a8e0 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c
@@ -88,11 +88,10 @@ static bool u32_match_it(const struct xt_u32 *data,
88 return true; 88 return true;
89} 89}
90 90
91static bool u32_match(const struct sk_buff *skb, 91static bool
92 const struct net_device *in, 92u32_mt(const struct sk_buff *skb, const struct net_device *in,
93 const struct net_device *out, 93 const struct net_device *out, const struct xt_match *match,
94 const struct xt_match *match, const void *matchinfo, 94 const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
95 int offset, unsigned int protoff, bool *hotdrop)
96{ 95{
97 const struct xt_u32 *data = matchinfo; 96 const struct xt_u32 *data = matchinfo;
98 bool ret; 97 bool ret;
@@ -101,37 +100,37 @@ static bool u32_match(const struct sk_buff *skb,
101 return ret ^ data->invert; 100 return ret ^ data->invert;
102} 101}
103 102
104static struct xt_match u32_reg[] __read_mostly = { 103static struct xt_match u32_mt_reg[] __read_mostly = {
105 { 104 {
106 .name = "u32", 105 .name = "u32",
107 .family = AF_INET, 106 .family = AF_INET,
108 .match = u32_match, 107 .match = u32_mt,
109 .matchsize = sizeof(struct xt_u32), 108 .matchsize = sizeof(struct xt_u32),
110 .me = THIS_MODULE, 109 .me = THIS_MODULE,
111 }, 110 },
112 { 111 {
113 .name = "u32", 112 .name = "u32",
114 .family = AF_INET6, 113 .family = AF_INET6,
115 .match = u32_match, 114 .match = u32_mt,
116 .matchsize = sizeof(struct xt_u32), 115 .matchsize = sizeof(struct xt_u32),
117 .me = THIS_MODULE, 116 .me = THIS_MODULE,
118 }, 117 },
119}; 118};
120 119
121static int __init xt_u32_init(void) 120static int __init u32_mt_init(void)
122{ 121{
123 return xt_register_matches(u32_reg, ARRAY_SIZE(u32_reg)); 122 return xt_register_matches(u32_mt_reg, ARRAY_SIZE(u32_mt_reg));
124} 123}
125 124
126static void __exit xt_u32_exit(void) 125static void __exit u32_mt_exit(void)
127{ 126{
128 xt_unregister_matches(u32_reg, ARRAY_SIZE(u32_reg)); 127 xt_unregister_matches(u32_mt_reg, ARRAY_SIZE(u32_mt_reg));
129} 128}
130 129
131module_init(xt_u32_init); 130module_init(u32_mt_init);
132module_exit(xt_u32_exit); 131module_exit(u32_mt_exit);
133MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>"); 132MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
134MODULE_DESCRIPTION("netfilter u32 match module"); 133MODULE_DESCRIPTION("Xtables: arbitrary byte matching");
135MODULE_LICENSE("GPL"); 134MODULE_LICENSE("GPL");
136MODULE_ALIAS("ipt_u32"); 135MODULE_ALIAS("ipt_u32");
137MODULE_ALIAS("ip6t_u32"); 136MODULE_ALIAS("ip6t_u32");
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index de3988ba1f46..6b178e1247b5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -156,7 +156,7 @@ static void netlink_sock_destruct(struct sock *sk)
156 skb_queue_purge(&sk->sk_receive_queue); 156 skb_queue_purge(&sk->sk_receive_queue);
157 157
158 if (!sock_flag(sk, SOCK_DEAD)) { 158 if (!sock_flag(sk, SOCK_DEAD)) {
159 printk("Freeing alive netlink socket %p\n", sk); 159 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
160 return; 160 return;
161 } 161 }
162 BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); 162 BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
@@ -164,13 +164,14 @@ static void netlink_sock_destruct(struct sock *sk)
164 BUG_TRAP(!nlk_sk(sk)->groups); 164 BUG_TRAP(!nlk_sk(sk)->groups);
165} 165}
166 166
167/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on SMP. 167/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
168 * Look, when several writers sleep and reader wakes them up, all but one 168 * SMP. Look, when several writers sleep and reader wakes them up, all but one
169 * immediately hit write lock and grab all the cpus. Exclusive sleep solves 169 * immediately hit write lock and grab all the cpus. Exclusive sleep solves
170 * this, _but_ remember, it adds useless work on UP machines. 170 * this, _but_ remember, it adds useless work on UP machines.
171 */ 171 */
172 172
173static void netlink_table_grab(void) 173static void netlink_table_grab(void)
174 __acquires(nl_table_lock)
174{ 175{
175 write_lock_irq(&nl_table_lock); 176 write_lock_irq(&nl_table_lock);
176 177
@@ -178,7 +179,7 @@ static void netlink_table_grab(void)
178 DECLARE_WAITQUEUE(wait, current); 179 DECLARE_WAITQUEUE(wait, current);
179 180
180 add_wait_queue_exclusive(&nl_table_wait, &wait); 181 add_wait_queue_exclusive(&nl_table_wait, &wait);
181 for(;;) { 182 for (;;) {
182 set_current_state(TASK_UNINTERRUPTIBLE); 183 set_current_state(TASK_UNINTERRUPTIBLE);
183 if (atomic_read(&nl_table_users) == 0) 184 if (atomic_read(&nl_table_users) == 0)
184 break; 185 break;
@@ -192,13 +193,14 @@ static void netlink_table_grab(void)
192 } 193 }
193} 194}
194 195
195static __inline__ void netlink_table_ungrab(void) 196static void netlink_table_ungrab(void)
197 __releases(nl_table_lock)
196{ 198{
197 write_unlock_irq(&nl_table_lock); 199 write_unlock_irq(&nl_table_lock);
198 wake_up(&nl_table_wait); 200 wake_up(&nl_table_wait);
199} 201}
200 202
201static __inline__ void 203static inline void
202netlink_lock_table(void) 204netlink_lock_table(void)
203{ 205{
204 /* read_lock() synchronizes us to netlink_table_grab */ 206 /* read_lock() synchronizes us to netlink_table_grab */
@@ -208,14 +210,15 @@ netlink_lock_table(void)
208 read_unlock(&nl_table_lock); 210 read_unlock(&nl_table_lock);
209} 211}
210 212
211static __inline__ void 213static inline void
212netlink_unlock_table(void) 214netlink_unlock_table(void)
213{ 215{
214 if (atomic_dec_and_test(&nl_table_users)) 216 if (atomic_dec_and_test(&nl_table_users))
215 wake_up(&nl_table_wait); 217 wake_up(&nl_table_wait);
216} 218}
217 219
218static __inline__ struct sock *netlink_lookup(struct net *net, int protocol, u32 pid) 220static inline struct sock *netlink_lookup(struct net *net, int protocol,
221 u32 pid)
219{ 222{
220 struct nl_pid_hash *hash = &nl_table[protocol].hash; 223 struct nl_pid_hash *hash = &nl_table[protocol].hash;
221 struct hlist_head *head; 224 struct hlist_head *head;
@@ -236,13 +239,14 @@ found:
236 return sk; 239 return sk;
237} 240}
238 241
239static inline struct hlist_head *nl_pid_hash_alloc(size_t size) 242static inline struct hlist_head *nl_pid_hash_zalloc(size_t size)
240{ 243{
241 if (size <= PAGE_SIZE) 244 if (size <= PAGE_SIZE)
242 return kmalloc(size, GFP_ATOMIC); 245 return kzalloc(size, GFP_ATOMIC);
243 else 246 else
244 return (struct hlist_head *) 247 return (struct hlist_head *)
245 __get_free_pages(GFP_ATOMIC, get_order(size)); 248 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
249 get_order(size));
246} 250}
247 251
248static inline void nl_pid_hash_free(struct hlist_head *table, size_t size) 252static inline void nl_pid_hash_free(struct hlist_head *table, size_t size)
@@ -271,11 +275,10 @@ static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow)
271 size *= 2; 275 size *= 2;
272 } 276 }
273 277
274 table = nl_pid_hash_alloc(size); 278 table = nl_pid_hash_zalloc(size);
275 if (!table) 279 if (!table)
276 return 0; 280 return 0;
277 281
278 memset(table, 0, size);
279 otable = hash->table; 282 otable = hash->table;
280 hash->table = table; 283 hash->table = table;
281 hash->mask = mask; 284 hash->mask = mask;
@@ -428,7 +431,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol)
428 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) 431 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
429 return -ESOCKTNOSUPPORT; 432 return -ESOCKTNOSUPPORT;
430 433
431 if (protocol<0 || protocol >= MAX_LINKS) 434 if (protocol < 0 || protocol >= MAX_LINKS)
432 return -EPROTONOSUPPORT; 435 return -EPROTONOSUPPORT;
433 436
434 netlink_lock_table(); 437 netlink_lock_table();
@@ -445,7 +448,8 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol)
445 cb_mutex = nl_table[protocol].cb_mutex; 448 cb_mutex = nl_table[protocol].cb_mutex;
446 netlink_unlock_table(); 449 netlink_unlock_table();
447 450
448 if ((err = __netlink_create(net, sock, cb_mutex, protocol)) < 0) 451 err = __netlink_create(net, sock, cb_mutex, protocol);
452 if (err < 0)
449 goto out_module; 453 goto out_module;
450 454
451 nlk = nlk_sk(sock->sk); 455 nlk = nlk_sk(sock->sk);
@@ -494,9 +498,12 @@ static int netlink_release(struct socket *sock)
494 498
495 netlink_table_grab(); 499 netlink_table_grab();
496 if (netlink_is_kernel(sk)) { 500 if (netlink_is_kernel(sk)) {
497 kfree(nl_table[sk->sk_protocol].listeners); 501 BUG_ON(nl_table[sk->sk_protocol].registered == 0);
498 nl_table[sk->sk_protocol].module = NULL; 502 if (--nl_table[sk->sk_protocol].registered == 0) {
499 nl_table[sk->sk_protocol].registered = 0; 503 kfree(nl_table[sk->sk_protocol].listeners);
504 nl_table[sk->sk_protocol].module = NULL;
505 nl_table[sk->sk_protocol].registered = 0;
506 }
500 } else if (nlk->subscriptions) 507 } else if (nlk->subscriptions)
501 netlink_update_listeners(sk); 508 netlink_update_listeners(sk);
502 netlink_table_ungrab(); 509 netlink_table_ungrab();
@@ -590,7 +597,7 @@ static int netlink_realloc_groups(struct sock *sk)
590 err = -ENOMEM; 597 err = -ENOMEM;
591 goto out_unlock; 598 goto out_unlock;
592 } 599 }
593 memset((char*)new_groups + NLGRPSZ(nlk->ngroups), 0, 600 memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0,
594 NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups)); 601 NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups));
595 602
596 nlk->groups = new_groups; 603 nlk->groups = new_groups;
@@ -600,7 +607,8 @@ static int netlink_realloc_groups(struct sock *sk)
600 return err; 607 return err;
601} 608}
602 609
603static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) 610static int netlink_bind(struct socket *sock, struct sockaddr *addr,
611 int addr_len)
604{ 612{
605 struct sock *sk = sock->sk; 613 struct sock *sk = sock->sk;
606 struct net *net = sk->sk_net; 614 struct net *net = sk->sk_net;
@@ -651,7 +659,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
651 int err = 0; 659 int err = 0;
652 struct sock *sk = sock->sk; 660 struct sock *sk = sock->sk;
653 struct netlink_sock *nlk = nlk_sk(sk); 661 struct netlink_sock *nlk = nlk_sk(sk);
654 struct sockaddr_nl *nladdr=(struct sockaddr_nl*)addr; 662 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
655 663
656 if (addr->sa_family == AF_UNSPEC) { 664 if (addr->sa_family == AF_UNSPEC) {
657 sk->sk_state = NETLINK_UNCONNECTED; 665 sk->sk_state = NETLINK_UNCONNECTED;
@@ -678,11 +686,12 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
678 return err; 686 return err;
679} 687}
680 688
681static int netlink_getname(struct socket *sock, struct sockaddr *addr, int *addr_len, int peer) 689static int netlink_getname(struct socket *sock, struct sockaddr *addr,
690 int *addr_len, int peer)
682{ 691{
683 struct sock *sk = sock->sk; 692 struct sock *sk = sock->sk;
684 struct netlink_sock *nlk = nlk_sk(sk); 693 struct netlink_sock *nlk = nlk_sk(sk);
685 struct sockaddr_nl *nladdr=(struct sockaddr_nl *)addr; 694 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
686 695
687 nladdr->nl_family = AF_NETLINK; 696 nladdr->nl_family = AF_NETLINK;
688 nladdr->nl_pad = 0; 697 nladdr->nl_pad = 0;
@@ -885,6 +894,7 @@ retry:
885 894
886 return netlink_sendskb(sk, skb); 895 return netlink_sendskb(sk, skb);
887} 896}
897EXPORT_SYMBOL(netlink_unicast);
888 898
889int netlink_has_listeners(struct sock *sk, unsigned int group) 899int netlink_has_listeners(struct sock *sk, unsigned int group)
890{ 900{
@@ -905,7 +915,8 @@ int netlink_has_listeners(struct sock *sk, unsigned int group)
905} 915}
906EXPORT_SYMBOL_GPL(netlink_has_listeners); 916EXPORT_SYMBOL_GPL(netlink_has_listeners);
907 917
908static __inline__ int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) 918static inline int netlink_broadcast_deliver(struct sock *sk,
919 struct sk_buff *skb)
909{ 920{
910 struct netlink_sock *nlk = nlk_sk(sk); 921 struct netlink_sock *nlk = nlk_sk(sk);
911 922
@@ -1026,6 +1037,7 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
1026 return -ENOBUFS; 1037 return -ENOBUFS;
1027 return -ESRCH; 1038 return -ESRCH;
1028} 1039}
1040EXPORT_SYMBOL(netlink_broadcast);
1029 1041
1030struct netlink_set_err_data { 1042struct netlink_set_err_data {
1031 struct sock *exclude_sk; 1043 struct sock *exclude_sk;
@@ -1182,7 +1194,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1182 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1194 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1183 struct sock *sk = sock->sk; 1195 struct sock *sk = sock->sk;
1184 struct netlink_sock *nlk = nlk_sk(sk); 1196 struct netlink_sock *nlk = nlk_sk(sk);
1185 struct sockaddr_nl *addr=msg->msg_name; 1197 struct sockaddr_nl *addr = msg->msg_name;
1186 u32 dst_pid; 1198 u32 dst_pid;
1187 u32 dst_group; 1199 u32 dst_group;
1188 struct sk_buff *skb; 1200 struct sk_buff *skb;
@@ -1221,7 +1233,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1221 goto out; 1233 goto out;
1222 err = -ENOBUFS; 1234 err = -ENOBUFS;
1223 skb = alloc_skb(len, GFP_KERNEL); 1235 skb = alloc_skb(len, GFP_KERNEL);
1224 if (skb==NULL) 1236 if (skb == NULL)
1225 goto out; 1237 goto out;
1226 1238
1227 NETLINK_CB(skb).pid = nlk->pid; 1239 NETLINK_CB(skb).pid = nlk->pid;
@@ -1237,7 +1249,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1237 */ 1249 */
1238 1250
1239 err = -EFAULT; 1251 err = -EFAULT;
1240 if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len)) { 1252 if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
1241 kfree_skb(skb); 1253 kfree_skb(skb);
1242 goto out; 1254 goto out;
1243 } 1255 }
@@ -1276,8 +1288,8 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1276 1288
1277 copied = 0; 1289 copied = 0;
1278 1290
1279 skb = skb_recv_datagram(sk,flags,noblock,&err); 1291 skb = skb_recv_datagram(sk, flags, noblock, &err);
1280 if (skb==NULL) 1292 if (skb == NULL)
1281 goto out; 1293 goto out;
1282 1294
1283 msg->msg_namelen = 0; 1295 msg->msg_namelen = 0;
@@ -1292,7 +1304,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1292 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); 1304 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1293 1305
1294 if (msg->msg_name) { 1306 if (msg->msg_name) {
1295 struct sockaddr_nl *addr = (struct sockaddr_nl*)msg->msg_name; 1307 struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name;
1296 addr->nl_family = AF_NETLINK; 1308 addr->nl_family = AF_NETLINK;
1297 addr->nl_pad = 0; 1309 addr->nl_pad = 0;
1298 addr->nl_pid = NETLINK_CB(skb).pid; 1310 addr->nl_pid = NETLINK_CB(skb).pid;
@@ -1344,7 +1356,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1344 1356
1345 BUG_ON(!nl_table); 1357 BUG_ON(!nl_table);
1346 1358
1347 if (unit<0 || unit>=MAX_LINKS) 1359 if (unit < 0 || unit >= MAX_LINKS)
1348 return NULL; 1360 return NULL;
1349 1361
1350 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) 1362 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
@@ -1380,9 +1392,13 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1380 nl_table[unit].registered = 1; 1392 nl_table[unit].registered = 1;
1381 } else { 1393 } else {
1382 kfree(listeners); 1394 kfree(listeners);
1395 nl_table[unit].registered++;
1383 } 1396 }
1384 netlink_table_ungrab(); 1397 netlink_table_ungrab();
1385 1398
1399 /* Do not hold an extra referrence to a namespace as this socket is
1400 * internal to a namespace and does not prevent it to stop. */
1401 put_net(net);
1386 return sk; 1402 return sk;
1387 1403
1388out_sock_release: 1404out_sock_release:
@@ -1390,6 +1406,30 @@ out_sock_release:
1390 sock_release(sock); 1406 sock_release(sock);
1391 return NULL; 1407 return NULL;
1392} 1408}
1409EXPORT_SYMBOL(netlink_kernel_create);
1410
1411
1412void
1413netlink_kernel_release(struct sock *sk)
1414{
1415 if (sk == NULL || sk->sk_socket == NULL)
1416 return;
1417
1418 /*
1419 * Last sock_put should drop referrence to sk->sk_net. It has already
1420 * been dropped in netlink_kernel_create. Taking referrence to stopping
1421 * namespace is not an option.
1422 * Take referrence to a socket to remove it from netlink lookup table
1423 * _alive_ and after that destroy it in the context of init_net.
1424 */
1425 sock_hold(sk);
1426 sock_release(sk->sk_socket);
1427
1428 sk->sk_net = get_net(&init_net);
1429 sock_put(sk);
1430}
1431EXPORT_SYMBOL(netlink_kernel_release);
1432
1393 1433
1394/** 1434/**
1395 * netlink_change_ngroups - change number of multicast groups 1435 * netlink_change_ngroups - change number of multicast groups
@@ -1461,6 +1501,7 @@ void netlink_set_nonroot(int protocol, unsigned int flags)
1461 if ((unsigned int)protocol < MAX_LINKS) 1501 if ((unsigned int)protocol < MAX_LINKS)
1462 nl_table[protocol].nl_nonroot = flags; 1502 nl_table[protocol].nl_nonroot = flags;
1463} 1503}
1504EXPORT_SYMBOL(netlink_set_nonroot);
1464 1505
1465static void netlink_destroy_callback(struct netlink_callback *cb) 1506static void netlink_destroy_callback(struct netlink_callback *cb)
1466{ 1507{
@@ -1529,8 +1570,9 @@ errout:
1529 1570
1530int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, 1571int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1531 struct nlmsghdr *nlh, 1572 struct nlmsghdr *nlh,
1532 int (*dump)(struct sk_buff *skb, struct netlink_callback*), 1573 int (*dump)(struct sk_buff *skb,
1533 int (*done)(struct netlink_callback*)) 1574 struct netlink_callback *),
1575 int (*done)(struct netlink_callback *))
1534{ 1576{
1535 struct netlink_callback *cb; 1577 struct netlink_callback *cb;
1536 struct sock *sk; 1578 struct sock *sk;
@@ -1571,6 +1613,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1571 */ 1613 */
1572 return -EINTR; 1614 return -EINTR;
1573} 1615}
1616EXPORT_SYMBOL(netlink_dump_start);
1574 1617
1575void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) 1618void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1576{ 1619{
@@ -1605,6 +1648,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1605 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh)); 1648 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
1606 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); 1649 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1607} 1650}
1651EXPORT_SYMBOL(netlink_ack);
1608 1652
1609int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, 1653int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1610 struct nlmsghdr *)) 1654 struct nlmsghdr *))
@@ -1638,7 +1682,7 @@ ack:
1638 netlink_ack(skb, nlh, err); 1682 netlink_ack(skb, nlh, err);
1639 1683
1640skip: 1684skip:
1641 msglen = NLMSG_ALIGN(nlh->nlmsg_len); 1685 msglen = NLMSG_ALIGN(nlh->nlmsg_len);
1642 if (msglen > skb->len) 1686 if (msglen > skb->len)
1643 msglen = skb->len; 1687 msglen = skb->len;
1644 skb_pull(skb, msglen); 1688 skb_pull(skb, msglen);
@@ -1646,6 +1690,7 @@ skip:
1646 1690
1647 return 0; 1691 return 0;
1648} 1692}
1693EXPORT_SYMBOL(netlink_rcv_skb);
1649 1694
1650/** 1695/**
1651 * nlmsg_notify - send a notification netlink message 1696 * nlmsg_notify - send a notification netlink message
@@ -1678,10 +1723,11 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid,
1678 1723
1679 return err; 1724 return err;
1680} 1725}
1726EXPORT_SYMBOL(nlmsg_notify);
1681 1727
1682#ifdef CONFIG_PROC_FS 1728#ifdef CONFIG_PROC_FS
1683struct nl_seq_iter { 1729struct nl_seq_iter {
1684 struct net *net; 1730 struct seq_net_private p;
1685 int link; 1731 int link;
1686 int hash_idx; 1732 int hash_idx;
1687}; 1733};
@@ -1694,12 +1740,12 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
1694 struct hlist_node *node; 1740 struct hlist_node *node;
1695 loff_t off = 0; 1741 loff_t off = 0;
1696 1742
1697 for (i=0; i<MAX_LINKS; i++) { 1743 for (i = 0; i < MAX_LINKS; i++) {
1698 struct nl_pid_hash *hash = &nl_table[i].hash; 1744 struct nl_pid_hash *hash = &nl_table[i].hash;
1699 1745
1700 for (j = 0; j <= hash->mask; j++) { 1746 for (j = 0; j <= hash->mask; j++) {
1701 sk_for_each(s, node, &hash->table[j]) { 1747 sk_for_each(s, node, &hash->table[j]) {
1702 if (iter->net != s->sk_net) 1748 if (iter->p.net != s->sk_net)
1703 continue; 1749 continue;
1704 if (off == pos) { 1750 if (off == pos) {
1705 iter->link = i; 1751 iter->link = i;
@@ -1714,6 +1760,7 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
1714} 1760}
1715 1761
1716static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) 1762static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
1763 __acquires(nl_table_lock)
1717{ 1764{
1718 read_lock(&nl_table_lock); 1765 read_lock(&nl_table_lock);
1719 return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; 1766 return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
@@ -1734,7 +1781,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1734 s = v; 1781 s = v;
1735 do { 1782 do {
1736 s = sk_next(s); 1783 s = sk_next(s);
1737 } while (s && (iter->net != s->sk_net)); 1784 } while (s && (iter->p.net != s->sk_net));
1738 if (s) 1785 if (s)
1739 return s; 1786 return s;
1740 1787
@@ -1746,7 +1793,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1746 1793
1747 for (; j <= hash->mask; j++) { 1794 for (; j <= hash->mask; j++) {
1748 s = sk_head(&hash->table[j]); 1795 s = sk_head(&hash->table[j]);
1749 while (s && (iter->net != s->sk_net)) 1796 while (s && (iter->p.net != s->sk_net))
1750 s = sk_next(s); 1797 s = sk_next(s);
1751 if (s) { 1798 if (s) {
1752 iter->link = i; 1799 iter->link = i;
@@ -1762,6 +1809,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1762} 1809}
1763 1810
1764static void netlink_seq_stop(struct seq_file *seq, void *v) 1811static void netlink_seq_stop(struct seq_file *seq, void *v)
1812 __releases(nl_table_lock)
1765{ 1813{
1766 read_unlock(&nl_table_lock); 1814 read_unlock(&nl_table_lock);
1767} 1815}
@@ -1802,27 +1850,8 @@ static const struct seq_operations netlink_seq_ops = {
1802 1850
1803static int netlink_seq_open(struct inode *inode, struct file *file) 1851static int netlink_seq_open(struct inode *inode, struct file *file)
1804{ 1852{
1805 struct nl_seq_iter *iter; 1853 return seq_open_net(inode, file, &netlink_seq_ops,
1806 1854 sizeof(struct nl_seq_iter));
1807 iter = __seq_open_private(file, &netlink_seq_ops, sizeof(*iter));
1808 if (!iter)
1809 return -ENOMEM;
1810
1811 iter->net = get_proc_net(inode);
1812 if (!iter->net) {
1813 seq_release_private(inode, file);
1814 return -ENXIO;
1815 }
1816
1817 return 0;
1818}
1819
1820static int netlink_seq_release(struct inode *inode, struct file *file)
1821{
1822 struct seq_file *seq = file->private_data;
1823 struct nl_seq_iter *iter = seq->private;
1824 put_net(iter->net);
1825 return seq_release_private(inode, file);
1826} 1855}
1827 1856
1828static const struct file_operations netlink_seq_fops = { 1857static const struct file_operations netlink_seq_fops = {
@@ -1830,7 +1859,7 @@ static const struct file_operations netlink_seq_fops = {
1830 .open = netlink_seq_open, 1859 .open = netlink_seq_open,
1831 .read = seq_read, 1860 .read = seq_read,
1832 .llseek = seq_lseek, 1861 .llseek = seq_lseek,
1833 .release = netlink_seq_release, 1862 .release = seq_release_net,
1834}; 1863};
1835 1864
1836#endif 1865#endif
@@ -1839,11 +1868,13 @@ int netlink_register_notifier(struct notifier_block *nb)
1839{ 1868{
1840 return atomic_notifier_chain_register(&netlink_chain, nb); 1869 return atomic_notifier_chain_register(&netlink_chain, nb);
1841} 1870}
1871EXPORT_SYMBOL(netlink_register_notifier);
1842 1872
1843int netlink_unregister_notifier(struct notifier_block *nb) 1873int netlink_unregister_notifier(struct notifier_block *nb)
1844{ 1874{
1845 return atomic_notifier_chain_unregister(&netlink_chain, nb); 1875 return atomic_notifier_chain_unregister(&netlink_chain, nb);
1846} 1876}
1877EXPORT_SYMBOL(netlink_unregister_notifier);
1847 1878
1848static const struct proto_ops netlink_ops = { 1879static const struct proto_ops netlink_ops = {
1849 .family = PF_NETLINK, 1880 .family = PF_NETLINK,
@@ -1922,7 +1953,7 @@ static int __init netlink_proto_init(void)
1922 for (i = 0; i < MAX_LINKS; i++) { 1953 for (i = 0; i < MAX_LINKS; i++) {
1923 struct nl_pid_hash *hash = &nl_table[i].hash; 1954 struct nl_pid_hash *hash = &nl_table[i].hash;
1924 1955
1925 hash->table = nl_pid_hash_alloc(1 * sizeof(*hash->table)); 1956 hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table));
1926 if (!hash->table) { 1957 if (!hash->table) {
1927 while (i-- > 0) 1958 while (i-- > 0)
1928 nl_pid_hash_free(nl_table[i].hash.table, 1959 nl_pid_hash_free(nl_table[i].hash.table,
@@ -1930,7 +1961,6 @@ static int __init netlink_proto_init(void)
1930 kfree(nl_table); 1961 kfree(nl_table);
1931 goto panic; 1962 goto panic;
1932 } 1963 }
1933 memset(hash->table, 0, 1 * sizeof(*hash->table));
1934 hash->max_shift = order; 1964 hash->max_shift = order;
1935 hash->shift = 0; 1965 hash->shift = 0;
1936 hash->mask = 0; 1966 hash->mask = 0;
@@ -1948,14 +1978,3 @@ panic:
1948} 1978}
1949 1979
1950core_initcall(netlink_proto_init); 1980core_initcall(netlink_proto_init);
1951
1952EXPORT_SYMBOL(netlink_ack);
1953EXPORT_SYMBOL(netlink_rcv_skb);
1954EXPORT_SYMBOL(netlink_broadcast);
1955EXPORT_SYMBOL(netlink_dump_start);
1956EXPORT_SYMBOL(netlink_kernel_create);
1957EXPORT_SYMBOL(netlink_register_notifier);
1958EXPORT_SYMBOL(netlink_set_nonroot);
1959EXPORT_SYMBOL(netlink_unicast);
1960EXPORT_SYMBOL(netlink_unregister_notifier);
1961EXPORT_SYMBOL(nlmsg_notify);
diff --git a/net/netlink/attr.c b/net/netlink/attr.c
index ec39d12c2423..feb326f4a752 100644
--- a/net/netlink/attr.c
+++ b/net/netlink/attr.c
@@ -430,6 +430,24 @@ int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
430 return 0; 430 return 0;
431} 431}
432 432
433/**
434 * nla_append - Add a netlink attribute without header or padding
435 * @skb: socket buffer to add attribute to
436 * @attrlen: length of attribute payload
437 * @data: head of attribute payload
438 *
439 * Returns -1 if the tailroom of the skb is insufficient to store
440 * the attribute payload.
441 */
442int nla_append(struct sk_buff *skb, int attrlen, const void *data)
443{
444 if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
445 return -1;
446
447 memcpy(skb_put(skb, attrlen), data, attrlen);
448 return 0;
449}
450
433EXPORT_SYMBOL(nla_validate); 451EXPORT_SYMBOL(nla_validate);
434EXPORT_SYMBOL(nla_parse); 452EXPORT_SYMBOL(nla_parse);
435EXPORT_SYMBOL(nla_find); 453EXPORT_SYMBOL(nla_find);
@@ -445,3 +463,4 @@ EXPORT_SYMBOL(nla_put_nohdr);
445EXPORT_SYMBOL(nla_memcpy); 463EXPORT_SYMBOL(nla_memcpy);
446EXPORT_SYMBOL(nla_memcmp); 464EXPORT_SYMBOL(nla_memcmp);
447EXPORT_SYMBOL(nla_strcmp); 465EXPORT_SYMBOL(nla_strcmp);
466EXPORT_SYMBOL(nla_append);
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index 6cfaad952c6c..1cb98e88f5e1 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -40,21 +40,10 @@ void nr_init_timers(struct sock *sk)
40{ 40{
41 struct nr_sock *nr = nr_sk(sk); 41 struct nr_sock *nr = nr_sk(sk);
42 42
43 init_timer(&nr->t1timer); 43 setup_timer(&nr->t1timer, nr_t1timer_expiry, (unsigned long)sk);
44 nr->t1timer.data = (unsigned long)sk; 44 setup_timer(&nr->t2timer, nr_t2timer_expiry, (unsigned long)sk);
45 nr->t1timer.function = &nr_t1timer_expiry; 45 setup_timer(&nr->t4timer, nr_t4timer_expiry, (unsigned long)sk);
46 46 setup_timer(&nr->idletimer, nr_idletimer_expiry, (unsigned long)sk);
47 init_timer(&nr->t2timer);
48 nr->t2timer.data = (unsigned long)sk;
49 nr->t2timer.function = &nr_t2timer_expiry;
50
51 init_timer(&nr->t4timer);
52 nr->t4timer.data = (unsigned long)sk;
53 nr->t4timer.function = &nr_t4timer_expiry;
54
55 init_timer(&nr->idletimer);
56 nr->idletimer.data = (unsigned long)sk;
57 nr->idletimer.function = &nr_idletimer_expiry;
58 47
59 /* initialized by sock_init_data */ 48 /* initialized by sock_init_data */
60 sk->sk_timer.data = (unsigned long)sk; 49 sk->sk_timer.data = (unsigned long)sk;
diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c
index 2ea68da01fb8..34c96c9674df 100644
--- a/net/netrom/sysctl_net_netrom.c
+++ b/net/netrom/sysctl_net_netrom.c
@@ -170,29 +170,15 @@ static ctl_table nr_table[] = {
170 { .ctl_name = 0 } 170 { .ctl_name = 0 }
171}; 171};
172 172
173static ctl_table nr_dir_table[] = { 173static struct ctl_path nr_path[] = {
174 { 174 { .procname = "net", .ctl_name = CTL_NET, },
175 .ctl_name = NET_NETROM, 175 { .procname = "netrom", .ctl_name = NET_NETROM, },
176 .procname = "netrom", 176 { }
177 .mode = 0555,
178 .child = nr_table
179 },
180 { .ctl_name = 0 }
181};
182
183static ctl_table nr_root_table[] = {
184 {
185 .ctl_name = CTL_NET,
186 .procname = "net",
187 .mode = 0555,
188 .child = nr_dir_table
189 },
190 { .ctl_name = 0 }
191}; 177};
192 178
193void __init nr_register_sysctl(void) 179void __init nr_register_sysctl(void)
194{ 180{
195 nr_table_header = register_sysctl_table(nr_root_table); 181 nr_table_header = register_sysctl_paths(nr_path, nr_table);
196} 182}
197 183
198void nr_unregister_sysctl(void) 184void nr_unregister_sysctl(void)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8a7807dbba01..b8b827c7062d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -135,10 +135,6 @@ dev->hard_header == NULL (ll header is added by device, we cannot control it)
135 packet classifier depends on it. 135 packet classifier depends on it.
136 */ 136 */
137 137
138/* List of all packet sockets. */
139static HLIST_HEAD(packet_sklist);
140static DEFINE_RWLOCK(packet_sklist_lock);
141
142/* Private packet socket structures. */ 138/* Private packet socket structures. */
143 139
144struct packet_mclist 140struct packet_mclist
@@ -246,9 +242,6 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct
246 struct sock *sk; 242 struct sock *sk;
247 struct sockaddr_pkt *spkt; 243 struct sockaddr_pkt *spkt;
248 244
249 if (dev->nd_net != &init_net)
250 goto out;
251
252 /* 245 /*
253 * When we registered the protocol we saved the socket in the data 246 * When we registered the protocol we saved the socket in the data
254 * field for just this event. 247 * field for just this event.
@@ -270,6 +263,9 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct
270 if (skb->pkt_type == PACKET_LOOPBACK) 263 if (skb->pkt_type == PACKET_LOOPBACK)
271 goto out; 264 goto out;
272 265
266 if (dev->nd_net != sk->sk_net)
267 goto out;
268
273 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) 269 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
274 goto oom; 270 goto oom;
275 271
@@ -341,7 +337,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
341 */ 337 */
342 338
343 saddr->spkt_device[13] = 0; 339 saddr->spkt_device[13] = 0;
344 dev = dev_get_by_name(&init_net, saddr->spkt_device); 340 dev = dev_get_by_name(sk->sk_net, saddr->spkt_device);
345 err = -ENODEV; 341 err = -ENODEV;
346 if (dev == NULL) 342 if (dev == NULL)
347 goto out_unlock; 343 goto out_unlock;
@@ -449,15 +445,15 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
449 int skb_len = skb->len; 445 int skb_len = skb->len;
450 unsigned int snaplen, res; 446 unsigned int snaplen, res;
451 447
452 if (dev->nd_net != &init_net)
453 goto drop;
454
455 if (skb->pkt_type == PACKET_LOOPBACK) 448 if (skb->pkt_type == PACKET_LOOPBACK)
456 goto drop; 449 goto drop;
457 450
458 sk = pt->af_packet_priv; 451 sk = pt->af_packet_priv;
459 po = pkt_sk(sk); 452 po = pkt_sk(sk);
460 453
454 if (dev->nd_net != sk->sk_net)
455 goto drop;
456
461 skb->dev = dev; 457 skb->dev = dev;
462 458
463 if (dev->header_ops) { 459 if (dev->header_ops) {
@@ -566,15 +562,15 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
566 struct sk_buff *copy_skb = NULL; 562 struct sk_buff *copy_skb = NULL;
567 struct timeval tv; 563 struct timeval tv;
568 564
569 if (dev->nd_net != &init_net)
570 goto drop;
571
572 if (skb->pkt_type == PACKET_LOOPBACK) 565 if (skb->pkt_type == PACKET_LOOPBACK)
573 goto drop; 566 goto drop;
574 567
575 sk = pt->af_packet_priv; 568 sk = pt->af_packet_priv;
576 po = pkt_sk(sk); 569 po = pkt_sk(sk);
577 570
571 if (dev->nd_net != sk->sk_net)
572 goto drop;
573
578 if (dev->header_ops) { 574 if (dev->header_ops) {
579 if (sk->sk_type != SOCK_DGRAM) 575 if (sk->sk_type != SOCK_DGRAM)
580 skb_push(skb, skb->data - skb_mac_header(skb)); 576 skb_push(skb, skb->data - skb_mac_header(skb));
@@ -732,7 +728,7 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
732 } 728 }
733 729
734 730
735 dev = dev_get_by_index(&init_net, ifindex); 731 dev = dev_get_by_index(sk->sk_net, ifindex);
736 err = -ENXIO; 732 err = -ENXIO;
737 if (dev == NULL) 733 if (dev == NULL)
738 goto out_unlock; 734 goto out_unlock;
@@ -799,15 +795,17 @@ static int packet_release(struct socket *sock)
799{ 795{
800 struct sock *sk = sock->sk; 796 struct sock *sk = sock->sk;
801 struct packet_sock *po; 797 struct packet_sock *po;
798 struct net *net;
802 799
803 if (!sk) 800 if (!sk)
804 return 0; 801 return 0;
805 802
803 net = sk->sk_net;
806 po = pkt_sk(sk); 804 po = pkt_sk(sk);
807 805
808 write_lock_bh(&packet_sklist_lock); 806 write_lock_bh(&net->packet.sklist_lock);
809 sk_del_node_init(sk); 807 sk_del_node_init(sk);
810 write_unlock_bh(&packet_sklist_lock); 808 write_unlock_bh(&net->packet.sklist_lock);
811 809
812 /* 810 /*
813 * Unhook packet receive handler. 811 * Unhook packet receive handler.
@@ -916,7 +914,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int add
916 return -EINVAL; 914 return -EINVAL;
917 strlcpy(name,uaddr->sa_data,sizeof(name)); 915 strlcpy(name,uaddr->sa_data,sizeof(name));
918 916
919 dev = dev_get_by_name(&init_net, name); 917 dev = dev_get_by_name(sk->sk_net, name);
920 if (dev) { 918 if (dev) {
921 err = packet_do_bind(sk, dev, pkt_sk(sk)->num); 919 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
922 dev_put(dev); 920 dev_put(dev);
@@ -943,7 +941,7 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len
943 941
944 if (sll->sll_ifindex) { 942 if (sll->sll_ifindex) {
945 err = -ENODEV; 943 err = -ENODEV;
946 dev = dev_get_by_index(&init_net, sll->sll_ifindex); 944 dev = dev_get_by_index(sk->sk_net, sll->sll_ifindex);
947 if (dev == NULL) 945 if (dev == NULL)
948 goto out; 946 goto out;
949 } 947 }
@@ -972,9 +970,6 @@ static int packet_create(struct net *net, struct socket *sock, int protocol)
972 __be16 proto = (__force __be16)protocol; /* weird, but documented */ 970 __be16 proto = (__force __be16)protocol; /* weird, but documented */
973 int err; 971 int err;
974 972
975 if (net != &init_net)
976 return -EAFNOSUPPORT;
977
978 if (!capable(CAP_NET_RAW)) 973 if (!capable(CAP_NET_RAW))
979 return -EPERM; 974 return -EPERM;
980 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW && 975 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
@@ -1020,9 +1015,9 @@ static int packet_create(struct net *net, struct socket *sock, int protocol)
1020 po->running = 1; 1015 po->running = 1;
1021 } 1016 }
1022 1017
1023 write_lock_bh(&packet_sklist_lock); 1018 write_lock_bh(&net->packet.sklist_lock);
1024 sk_add_node(sk, &packet_sklist); 1019 sk_add_node(sk, &net->packet.sklist);
1025 write_unlock_bh(&packet_sklist_lock); 1020 write_unlock_bh(&net->packet.sklist_lock);
1026 return(0); 1021 return(0);
1027out: 1022out:
1028 return err; 1023 return err;
@@ -1140,7 +1135,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1140 return -EOPNOTSUPP; 1135 return -EOPNOTSUPP;
1141 1136
1142 uaddr->sa_family = AF_PACKET; 1137 uaddr->sa_family = AF_PACKET;
1143 dev = dev_get_by_index(&init_net, pkt_sk(sk)->ifindex); 1138 dev = dev_get_by_index(sk->sk_net, pkt_sk(sk)->ifindex);
1144 if (dev) { 1139 if (dev) {
1145 strlcpy(uaddr->sa_data, dev->name, 15); 1140 strlcpy(uaddr->sa_data, dev->name, 15);
1146 dev_put(dev); 1141 dev_put(dev);
@@ -1165,7 +1160,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1165 sll->sll_family = AF_PACKET; 1160 sll->sll_family = AF_PACKET;
1166 sll->sll_ifindex = po->ifindex; 1161 sll->sll_ifindex = po->ifindex;
1167 sll->sll_protocol = po->num; 1162 sll->sll_protocol = po->num;
1168 dev = dev_get_by_index(&init_net, po->ifindex); 1163 dev = dev_get_by_index(sk->sk_net, po->ifindex);
1169 if (dev) { 1164 if (dev) {
1170 sll->sll_hatype = dev->type; 1165 sll->sll_hatype = dev->type;
1171 sll->sll_halen = dev->addr_len; 1166 sll->sll_halen = dev->addr_len;
@@ -1217,7 +1212,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1217 rtnl_lock(); 1212 rtnl_lock();
1218 1213
1219 err = -ENODEV; 1214 err = -ENODEV;
1220 dev = __dev_get_by_index(&init_net, mreq->mr_ifindex); 1215 dev = __dev_get_by_index(sk->sk_net, mreq->mr_ifindex);
1221 if (!dev) 1216 if (!dev)
1222 goto done; 1217 goto done;
1223 1218
@@ -1271,7 +1266,7 @@ static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1271 if (--ml->count == 0) { 1266 if (--ml->count == 0) {
1272 struct net_device *dev; 1267 struct net_device *dev;
1273 *mlp = ml->next; 1268 *mlp = ml->next;
1274 dev = dev_get_by_index(&init_net, ml->ifindex); 1269 dev = dev_get_by_index(sk->sk_net, ml->ifindex);
1275 if (dev) { 1270 if (dev) {
1276 packet_dev_mc(dev, ml, -1); 1271 packet_dev_mc(dev, ml, -1);
1277 dev_put(dev); 1272 dev_put(dev);
@@ -1299,7 +1294,7 @@ static void packet_flush_mclist(struct sock *sk)
1299 struct net_device *dev; 1294 struct net_device *dev;
1300 1295
1301 po->mclist = ml->next; 1296 po->mclist = ml->next;
1302 if ((dev = dev_get_by_index(&init_net, ml->ifindex)) != NULL) { 1297 if ((dev = dev_get_by_index(sk->sk_net, ml->ifindex)) != NULL) {
1303 packet_dev_mc(dev, ml, -1); 1298 packet_dev_mc(dev, ml, -1);
1304 dev_put(dev); 1299 dev_put(dev);
1305 } 1300 }
@@ -1455,12 +1450,10 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void
1455 struct sock *sk; 1450 struct sock *sk;
1456 struct hlist_node *node; 1451 struct hlist_node *node;
1457 struct net_device *dev = data; 1452 struct net_device *dev = data;
1453 struct net *net = dev->nd_net;
1458 1454
1459 if (dev->nd_net != &init_net) 1455 read_lock(&net->packet.sklist_lock);
1460 return NOTIFY_DONE; 1456 sk_for_each(sk, node, &net->packet.sklist) {
1461
1462 read_lock(&packet_sklist_lock);
1463 sk_for_each(sk, node, &packet_sklist) {
1464 struct packet_sock *po = pkt_sk(sk); 1457 struct packet_sock *po = pkt_sk(sk);
1465 1458
1466 switch (msg) { 1459 switch (msg) {
@@ -1499,7 +1492,7 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void
1499 break; 1492 break;
1500 } 1493 }
1501 } 1494 }
1502 read_unlock(&packet_sklist_lock); 1495 read_unlock(&net->packet.sklist_lock);
1503 return NOTIFY_DONE; 1496 return NOTIFY_DONE;
1504} 1497}
1505 1498
@@ -1547,6 +1540,8 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
1547 case SIOCGIFDSTADDR: 1540 case SIOCGIFDSTADDR:
1548 case SIOCSIFDSTADDR: 1541 case SIOCSIFDSTADDR:
1549 case SIOCSIFFLAGS: 1542 case SIOCSIFFLAGS:
1543 if (sk->sk_net != &init_net)
1544 return -ENOIOCTLCMD;
1550 return inet_dgram_ops.ioctl(sock, cmd, arg); 1545 return inet_dgram_ops.ioctl(sock, cmd, arg);
1551#endif 1546#endif
1552 1547
@@ -1862,12 +1857,12 @@ static struct notifier_block packet_netdev_notifier = {
1862}; 1857};
1863 1858
1864#ifdef CONFIG_PROC_FS 1859#ifdef CONFIG_PROC_FS
1865static inline struct sock *packet_seq_idx(loff_t off) 1860static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
1866{ 1861{
1867 struct sock *s; 1862 struct sock *s;
1868 struct hlist_node *node; 1863 struct hlist_node *node;
1869 1864
1870 sk_for_each(s, node, &packet_sklist) { 1865 sk_for_each(s, node, &net->packet.sklist) {
1871 if (!off--) 1866 if (!off--)
1872 return s; 1867 return s;
1873 } 1868 }
@@ -1875,22 +1870,27 @@ static inline struct sock *packet_seq_idx(loff_t off)
1875} 1870}
1876 1871
1877static void *packet_seq_start(struct seq_file *seq, loff_t *pos) 1872static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1873 __acquires(seq_file_net(seq)->packet.sklist_lock)
1878{ 1874{
1879 read_lock(&packet_sklist_lock); 1875 struct net *net = seq_file_net(seq);
1880 return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN; 1876 read_lock(&net->packet.sklist_lock);
1877 return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN;
1881} 1878}
1882 1879
1883static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1880static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1884{ 1881{
1882 struct net *net = seq_file_net(seq);
1885 ++*pos; 1883 ++*pos;
1886 return (v == SEQ_START_TOKEN) 1884 return (v == SEQ_START_TOKEN)
1887 ? sk_head(&packet_sklist) 1885 ? sk_head(&net->packet.sklist)
1888 : sk_next((struct sock*)v) ; 1886 : sk_next((struct sock*)v) ;
1889} 1887}
1890 1888
1891static void packet_seq_stop(struct seq_file *seq, void *v) 1889static void packet_seq_stop(struct seq_file *seq, void *v)
1890 __releases(seq_file_net(seq)->packet.sklist_lock)
1892{ 1891{
1893 read_unlock(&packet_sklist_lock); 1892 struct net *net = seq_file_net(seq);
1893 read_unlock(&net->packet.sklist_lock);
1894} 1894}
1895 1895
1896static int packet_seq_show(struct seq_file *seq, void *v) 1896static int packet_seq_show(struct seq_file *seq, void *v)
@@ -1926,7 +1926,8 @@ static const struct seq_operations packet_seq_ops = {
1926 1926
1927static int packet_seq_open(struct inode *inode, struct file *file) 1927static int packet_seq_open(struct inode *inode, struct file *file)
1928{ 1928{
1929 return seq_open(file, &packet_seq_ops); 1929 return seq_open_net(inode, file, &packet_seq_ops,
1930 sizeof(struct seq_net_private));
1930} 1931}
1931 1932
1932static const struct file_operations packet_seq_fops = { 1933static const struct file_operations packet_seq_fops = {
@@ -1934,15 +1935,37 @@ static const struct file_operations packet_seq_fops = {
1934 .open = packet_seq_open, 1935 .open = packet_seq_open,
1935 .read = seq_read, 1936 .read = seq_read,
1936 .llseek = seq_lseek, 1937 .llseek = seq_lseek,
1937 .release = seq_release, 1938 .release = seq_release_net,
1938}; 1939};
1939 1940
1940#endif 1941#endif
1941 1942
1943static int packet_net_init(struct net *net)
1944{
1945 rwlock_init(&net->packet.sklist_lock);
1946 INIT_HLIST_HEAD(&net->packet.sklist);
1947
1948 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
1949 return -ENOMEM;
1950
1951 return 0;
1952}
1953
1954static void packet_net_exit(struct net *net)
1955{
1956 proc_net_remove(net, "packet");
1957}
1958
1959static struct pernet_operations packet_net_ops = {
1960 .init = packet_net_init,
1961 .exit = packet_net_exit,
1962};
1963
1964
1942static void __exit packet_exit(void) 1965static void __exit packet_exit(void)
1943{ 1966{
1944 proc_net_remove(&init_net, "packet");
1945 unregister_netdevice_notifier(&packet_netdev_notifier); 1967 unregister_netdevice_notifier(&packet_netdev_notifier);
1968 unregister_pernet_subsys(&packet_net_ops);
1946 sock_unregister(PF_PACKET); 1969 sock_unregister(PF_PACKET);
1947 proto_unregister(&packet_proto); 1970 proto_unregister(&packet_proto);
1948} 1971}
@@ -1955,8 +1978,8 @@ static int __init packet_init(void)
1955 goto out; 1978 goto out;
1956 1979
1957 sock_register(&packet_family_ops); 1980 sock_register(&packet_family_ops);
1981 register_pernet_subsys(&packet_net_ops);
1958 register_netdevice_notifier(&packet_netdev_notifier); 1982 register_netdevice_notifier(&packet_netdev_notifier);
1959 proc_net_fops_create(&init_net, "packet", 0, &packet_seq_fops);
1960out: 1983out:
1961 return rc; 1984 return rc;
1962} 1985}
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index ed2d65cd8010..4a31a81059ab 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -116,7 +116,7 @@ int rosecmp(rose_address *addr1, rose_address *addr2)
116 */ 116 */
117int rosecmpm(rose_address *addr1, rose_address *addr2, unsigned short mask) 117int rosecmpm(rose_address *addr1, rose_address *addr2, unsigned short mask)
118{ 118{
119 int i, j; 119 unsigned int i, j;
120 120
121 if (mask > 10) 121 if (mask > 10)
122 return 1; 122 return 1;
@@ -345,10 +345,9 @@ void rose_destroy_socket(struct sock *sk)
345 if (atomic_read(&sk->sk_wmem_alloc) || 345 if (atomic_read(&sk->sk_wmem_alloc) ||
346 atomic_read(&sk->sk_rmem_alloc)) { 346 atomic_read(&sk->sk_rmem_alloc)) {
347 /* Defer: outstanding buffers */ 347 /* Defer: outstanding buffers */
348 init_timer(&sk->sk_timer); 348 setup_timer(&sk->sk_timer, rose_destroy_timer,
349 (unsigned long)sk);
349 sk->sk_timer.expires = jiffies + 10 * HZ; 350 sk->sk_timer.expires = jiffies + 10 * HZ;
350 sk->sk_timer.function = rose_destroy_timer;
351 sk->sk_timer.data = (unsigned long)sk;
352 add_timer(&sk->sk_timer); 351 add_timer(&sk->sk_timer);
353 } else 352 } else
354 sock_put(sk); 353 sock_put(sk);
@@ -974,8 +973,8 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros
974 */ 973 */
975 memset(&facilities, 0x00, sizeof(struct rose_facilities_struct)); 974 memset(&facilities, 0x00, sizeof(struct rose_facilities_struct));
976 975
977 len = (((skb->data[3] >> 4) & 0x0F) + 1) / 2; 976 len = (((skb->data[3] >> 4) & 0x0F) + 1) >> 1;
978 len += (((skb->data[3] >> 0) & 0x0F) + 1) / 2; 977 len += (((skb->data[3] >> 0) & 0x0F) + 1) >> 1;
979 if (!rose_parse_facilities(skb->data + len + 4, &facilities)) { 978 if (!rose_parse_facilities(skb->data + len + 4, &facilities)) {
980 rose_transmit_clear_request(neigh, lci, ROSE_INVALID_FACILITY, 76); 979 rose_transmit_clear_request(neigh, lci, ROSE_INVALID_FACILITY, 76);
981 return 0; 980 return 0;
@@ -1378,6 +1377,7 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1378 1377
1379#ifdef CONFIG_PROC_FS 1378#ifdef CONFIG_PROC_FS
1380static void *rose_info_start(struct seq_file *seq, loff_t *pos) 1379static void *rose_info_start(struct seq_file *seq, loff_t *pos)
1380 __acquires(rose_list_lock)
1381{ 1381{
1382 int i; 1382 int i;
1383 struct sock *s; 1383 struct sock *s;
@@ -1405,6 +1405,7 @@ static void *rose_info_next(struct seq_file *seq, void *v, loff_t *pos)
1405} 1405}
1406 1406
1407static void rose_info_stop(struct seq_file *seq, void *v) 1407static void rose_info_stop(struct seq_file *seq, void *v)
1408 __releases(rose_list_lock)
1408{ 1409{
1409 spin_unlock_bh(&rose_list_lock); 1410 spin_unlock_bh(&rose_list_lock);
1410} 1411}
diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c
index 4ee0879d3540..7f7fcb46b4fa 100644
--- a/net/rose/rose_in.c
+++ b/net/rose/rose_in.c
@@ -182,7 +182,7 @@ static int rose_state3_machine(struct sock *sk, struct sk_buff *skb, int framety
182 break; 182 break;
183 } 183 }
184 if (atomic_read(&sk->sk_rmem_alloc) > 184 if (atomic_read(&sk->sk_rmem_alloc) >
185 (sk->sk_rcvbuf / 2)) 185 (sk->sk_rcvbuf >> 1))
186 rose->condition |= ROSE_COND_OWN_RX_BUSY; 186 rose->condition |= ROSE_COND_OWN_RX_BUSY;
187 } 187 }
188 /* 188 /*
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 540c0f26ffee..fb9359fb2358 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -994,8 +994,8 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
994 goto out; 994 goto out;
995 } 995 }
996 996
997 len = (((skb->data[3] >> 4) & 0x0F) + 1) / 2; 997 len = (((skb->data[3] >> 4) & 0x0F) + 1) >> 1;
998 len += (((skb->data[3] >> 0) & 0x0F) + 1) / 2; 998 len += (((skb->data[3] >> 0) & 0x0F) + 1) >> 1;
999 999
1000 memset(&facilities, 0x00, sizeof(struct rose_facilities_struct)); 1000 memset(&facilities, 0x00, sizeof(struct rose_facilities_struct));
1001 1001
@@ -1068,6 +1068,7 @@ out:
1068#ifdef CONFIG_PROC_FS 1068#ifdef CONFIG_PROC_FS
1069 1069
1070static void *rose_node_start(struct seq_file *seq, loff_t *pos) 1070static void *rose_node_start(struct seq_file *seq, loff_t *pos)
1071 __acquires(rose_neigh_list_lock)
1071{ 1072{
1072 struct rose_node *rose_node; 1073 struct rose_node *rose_node;
1073 int i = 1; 1074 int i = 1;
@@ -1091,6 +1092,7 @@ static void *rose_node_next(struct seq_file *seq, void *v, loff_t *pos)
1091} 1092}
1092 1093
1093static void rose_node_stop(struct seq_file *seq, void *v) 1094static void rose_node_stop(struct seq_file *seq, void *v)
1095 __releases(rose_neigh_list_lock)
1094{ 1096{
1095 spin_unlock_bh(&rose_neigh_list_lock); 1097 spin_unlock_bh(&rose_neigh_list_lock);
1096} 1098}
@@ -1144,6 +1146,7 @@ const struct file_operations rose_nodes_fops = {
1144}; 1146};
1145 1147
1146static void *rose_neigh_start(struct seq_file *seq, loff_t *pos) 1148static void *rose_neigh_start(struct seq_file *seq, loff_t *pos)
1149 __acquires(rose_neigh_list_lock)
1147{ 1150{
1148 struct rose_neigh *rose_neigh; 1151 struct rose_neigh *rose_neigh;
1149 int i = 1; 1152 int i = 1;
@@ -1167,6 +1170,7 @@ static void *rose_neigh_next(struct seq_file *seq, void *v, loff_t *pos)
1167} 1170}
1168 1171
1169static void rose_neigh_stop(struct seq_file *seq, void *v) 1172static void rose_neigh_stop(struct seq_file *seq, void *v)
1173 __releases(rose_neigh_list_lock)
1170{ 1174{
1171 spin_unlock_bh(&rose_neigh_list_lock); 1175 spin_unlock_bh(&rose_neigh_list_lock);
1172} 1176}
@@ -1227,6 +1231,7 @@ const struct file_operations rose_neigh_fops = {
1227 1231
1228 1232
1229static void *rose_route_start(struct seq_file *seq, loff_t *pos) 1233static void *rose_route_start(struct seq_file *seq, loff_t *pos)
1234 __acquires(rose_route_list_lock)
1230{ 1235{
1231 struct rose_route *rose_route; 1236 struct rose_route *rose_route;
1232 int i = 1; 1237 int i = 1;
@@ -1250,6 +1255,7 @@ static void *rose_route_next(struct seq_file *seq, void *v, loff_t *pos)
1250} 1255}
1251 1256
1252static void rose_route_stop(struct seq_file *seq, void *v) 1257static void rose_route_stop(struct seq_file *seq, void *v)
1258 __releases(rose_route_list_lock)
1253{ 1259{
1254 spin_unlock_bh(&rose_route_list_lock); 1260 spin_unlock_bh(&rose_route_list_lock);
1255} 1261}
diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c
index 455b0555a669..20be3485a97f 100644
--- a/net/rose/sysctl_net_rose.c
+++ b/net/rose/sysctl_net_rose.c
@@ -138,29 +138,15 @@ static ctl_table rose_table[] = {
138 { .ctl_name = 0 } 138 { .ctl_name = 0 }
139}; 139};
140 140
141static ctl_table rose_dir_table[] = { 141static struct ctl_path rose_path[] = {
142 { 142 { .procname = "net", .ctl_name = CTL_NET, },
143 .ctl_name = NET_ROSE, 143 { .procname = "rose", .ctl_name = NET_ROSE, },
144 .procname = "rose", 144 { }
145 .mode = 0555,
146 .child = rose_table
147 },
148 { .ctl_name = 0 }
149};
150
151static ctl_table rose_root_table[] = {
152 {
153 .ctl_name = CTL_NET,
154 .procname = "net",
155 .mode = 0555,
156 .child = rose_dir_table
157 },
158 { .ctl_name = 0 }
159}; 145};
160 146
161void __init rose_register_sysctl(void) 147void __init rose_register_sysctl(void)
162{ 148{
163 rose_table_header = register_sysctl_table(rose_root_table); 149 rose_table_header = register_sysctl_paths(rose_path, rose_table);
164} 150}
165 151
166void rose_unregister_sysctl(void) 152void rose_unregister_sysctl(void)
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index d6389450c4bf..5e82f1c0afbb 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -65,7 +65,7 @@ static void rxrpc_write_space(struct sock *sk)
65 if (rxrpc_writable(sk)) { 65 if (rxrpc_writable(sk)) {
66 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 66 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
67 wake_up_interruptible(sk->sk_sleep); 67 wake_up_interruptible(sk->sk_sleep);
68 sk_wake_async(sk, 2, POLL_OUT); 68 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
69 } 69 }
70 read_unlock(&sk->sk_callback_lock); 70 read_unlock(&sk->sk_callback_lock);
71} 71}
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index d6667f7bc85e..3869a5866752 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -651,7 +651,7 @@ rxrpc_incoming_connection(struct rxrpc_transport *trans,
651 651
652 candidate->trans = trans; 652 candidate->trans = trans;
653 candidate->epoch = hdr->epoch; 653 candidate->epoch = hdr->epoch;
654 candidate->cid = hdr->cid & __constant_cpu_to_be32(RXRPC_CIDMASK); 654 candidate->cid = hdr->cid & cpu_to_be32(RXRPC_CIDMASK);
655 candidate->service_id = hdr->serviceId; 655 candidate->service_id = hdr->serviceId;
656 candidate->security_ix = hdr->securityIndex; 656 candidate->security_ix = hdr->securityIndex;
657 candidate->in_clientflag = RXRPC_CLIENT_INITIATED; 657 candidate->in_clientflag = RXRPC_CLIENT_INITIATED;
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index 91b5bbb003e2..f8a699e92962 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -20,6 +20,7 @@
20#include <net/sock.h> 20#include <net/sock.h>
21#include <net/af_rxrpc.h> 21#include <net/af_rxrpc.h>
22#include <net/ip.h> 22#include <net/ip.h>
23#include <net/udp.h>
23#include "ar-internal.h" 24#include "ar-internal.h"
24 25
25unsigned long rxrpc_ack_timeout = 1; 26unsigned long rxrpc_ack_timeout = 1;
@@ -594,7 +595,7 @@ dead_call:
594 read_unlock_bh(&conn->lock); 595 read_unlock_bh(&conn->lock);
595 596
596 if (sp->hdr.flags & RXRPC_CLIENT_INITIATED && 597 if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
597 sp->hdr.seq == __constant_cpu_to_be32(1)) { 598 sp->hdr.seq == cpu_to_be32(1)) {
598 _debug("incoming call"); 599 _debug("incoming call");
599 skb_queue_tail(&conn->trans->local->accept_queue, skb); 600 skb_queue_tail(&conn->trans->local->accept_queue, skb);
600 rxrpc_queue_work(&conn->trans->local->acceptor); 601 rxrpc_queue_work(&conn->trans->local->acceptor);
@@ -707,10 +708,13 @@ void rxrpc_data_ready(struct sock *sk, int count)
707 if (skb_checksum_complete(skb)) { 708 if (skb_checksum_complete(skb)) {
708 rxrpc_free_skb(skb); 709 rxrpc_free_skb(skb);
709 rxrpc_put_local(local); 710 rxrpc_put_local(local);
711 UDP_INC_STATS_BH(UDP_MIB_INERRORS, 0);
710 _leave(" [CSUM failed]"); 712 _leave(" [CSUM failed]");
711 return; 713 return;
712 } 714 }
713 715
716 UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, 0);
717
714 /* the socket buffer we have is owned by UDP, with UDP's data all over 718 /* the socket buffer we have is owned by UDP, with UDP's data all over
715 * it, but we really want our own */ 719 * it, but we really want our own */
716 skb_orphan(skb); 720 skb_orphan(skb);
@@ -770,7 +774,7 @@ cant_route_call:
770 _debug("can't route call"); 774 _debug("can't route call");
771 if (sp->hdr.flags & RXRPC_CLIENT_INITIATED && 775 if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
772 sp->hdr.type == RXRPC_PACKET_TYPE_DATA) { 776 sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
773 if (sp->hdr.seq == __constant_cpu_to_be32(1)) { 777 if (sp->hdr.seq == cpu_to_be32(1)) {
774 _debug("first packet"); 778 _debug("first packet");
775 skb_queue_tail(&local->accept_queue, skb); 779 skb_queue_tail(&local->accept_queue, skb);
776 rxrpc_queue_work(&local->acceptor); 780 rxrpc_queue_work(&local->acceptor);
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
index 90fa107a8af9..2abe2081a5e8 100644
--- a/net/rxrpc/ar-peer.c
+++ b/net/rxrpc/ar-peer.c
@@ -57,7 +57,7 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
57 BUG(); 57 BUG();
58 } 58 }
59 59
60 ret = ip_route_output_key(&rt, &fl); 60 ret = ip_route_output_key(&init_net, &rt, &fl);
61 if (ret < 0) { 61 if (ret < 0) {
62 _leave(" [route err %d]", ret); 62 _leave(" [route err %d]", ret);
63 return; 63 return;
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 8e69d6993833..f48434adb7c2 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -284,7 +284,7 @@ static int rxkad_secure_packet(const struct rxrpc_call *call,
284 284
285 /* calculate the security checksum */ 285 /* calculate the security checksum */
286 x = htonl(call->channel << (32 - RXRPC_CIDSHIFT)); 286 x = htonl(call->channel << (32 - RXRPC_CIDSHIFT));
287 x |= sp->hdr.seq & __constant_cpu_to_be32(0x3fffffff); 287 x |= sp->hdr.seq & cpu_to_be32(0x3fffffff);
288 tmpbuf.x[0] = sp->hdr.callNumber; 288 tmpbuf.x[0] = sp->hdr.callNumber;
289 tmpbuf.x[1] = x; 289 tmpbuf.x[1] = x;
290 290
@@ -518,7 +518,7 @@ static int rxkad_verify_packet(const struct rxrpc_call *call,
518 518
519 /* validate the security checksum */ 519 /* validate the security checksum */
520 x = htonl(call->channel << (32 - RXRPC_CIDSHIFT)); 520 x = htonl(call->channel << (32 - RXRPC_CIDSHIFT));
521 x |= sp->hdr.seq & __constant_cpu_to_be32(0x3fffffff); 521 x |= sp->hdr.seq & cpu_to_be32(0x3fffffff);
522 tmpbuf.x[0] = call->call_id; 522 tmpbuf.x[0] = call->call_id;
523 tmpbuf.x[1] = x; 523 tmpbuf.x[1] = x;
524 524
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 9c15c4888d12..87af7c913d81 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -198,6 +198,7 @@ config NET_SCH_NETEM
198 198
199config NET_SCH_INGRESS 199config NET_SCH_INGRESS
200 tristate "Ingress Qdisc" 200 tristate "Ingress Qdisc"
201 depends on NET_CLS_ACT || NETFILTER
201 ---help--- 202 ---help---
202 Say Y here if you want to use classifiers for incoming packets. 203 Say Y here if you want to use classifiers for incoming packets.
203 If unsure, say Y. 204 If unsure, say Y.
@@ -445,7 +446,6 @@ config NET_ACT_IPT
445config NET_ACT_NAT 446config NET_ACT_NAT
446 tristate "Stateless NAT" 447 tristate "Stateless NAT"
447 depends on NET_CLS_ACT 448 depends on NET_CLS_ACT
448 select NETFILTER
449 ---help--- 449 ---help---
450 Say Y here to do stateless NAT on IPv4 packets. You should use 450 Say Y here to do stateless NAT on IPv4 packets. You should use
451 netfilter for NAT unless you know what you are doing. 451 netfilter for NAT unless you know what you are doing.
@@ -476,15 +476,6 @@ config NET_ACT_SIMP
476 To compile this code as a module, choose M here: the 476 To compile this code as a module, choose M here: the
477 module will be called simple. 477 module will be called simple.
478 478
479config NET_CLS_POLICE
480 bool "Traffic Policing (obsolete)"
481 select NET_CLS_ACT
482 select NET_ACT_POLICE
483 ---help---
484 Say Y here if you want to do traffic policing, i.e. strict
485 bandwidth limiting. This option is obsolete and just selects
486 the option replacing it. It will be removed in the future.
487
488config NET_CLS_IND 479config NET_CLS_IND
489 bool "Incoming device classification" 480 bool "Incoming device classification"
490 depends on NET_CLS_U32 || NET_CLS_FW 481 depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 72cdb0fade20..0b8eb235bc13 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -18,6 +18,9 @@
18#include <linux/skbuff.h> 18#include <linux/skbuff.h>
19#include <linux/init.h> 19#include <linux/init.h>
20#include <linux/kmod.h> 20#include <linux/kmod.h>
21#include <linux/err.h>
22#include <net/net_namespace.h>
23#include <net/sock.h>
21#include <net/sch_generic.h> 24#include <net/sch_generic.h>
22#include <net/act_api.h> 25#include <net/act_api.h>
23#include <net/netlink.h> 26#include <net/netlink.h>
@@ -66,7 +69,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
66{ 69{
67 struct tcf_common *p; 70 struct tcf_common *p;
68 int err = 0, index = -1,i = 0, s_i = 0, n_i = 0; 71 int err = 0, index = -1,i = 0, s_i = 0, n_i = 0;
69 struct rtattr *r ; 72 struct nlattr *nest;
70 73
71 read_lock_bh(hinfo->lock); 74 read_lock_bh(hinfo->lock);
72 75
@@ -81,15 +84,17 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
81 continue; 84 continue;
82 a->priv = p; 85 a->priv = p;
83 a->order = n_i; 86 a->order = n_i;
84 r = (struct rtattr *)skb_tail_pointer(skb); 87
85 RTA_PUT(skb, a->order, 0, NULL); 88 nest = nla_nest_start(skb, a->order);
89 if (nest == NULL)
90 goto nla_put_failure;
86 err = tcf_action_dump_1(skb, a, 0, 0); 91 err = tcf_action_dump_1(skb, a, 0, 0);
87 if (err < 0) { 92 if (err < 0) {
88 index--; 93 index--;
89 nlmsg_trim(skb, r); 94 nlmsg_trim(skb, nest);
90 goto done; 95 goto done;
91 } 96 }
92 r->rta_len = skb_tail_pointer(skb) - (u8 *)r; 97 nla_nest_end(skb, nest);
93 n_i++; 98 n_i++;
94 if (n_i >= TCA_ACT_MAX_PRIO) 99 if (n_i >= TCA_ACT_MAX_PRIO)
95 goto done; 100 goto done;
@@ -101,8 +106,8 @@ done:
101 cb->args[0] += n_i; 106 cb->args[0] += n_i;
102 return n_i; 107 return n_i;
103 108
104rtattr_failure: 109nla_put_failure:
105 nlmsg_trim(skb, r); 110 nla_nest_cancel(skb, nest);
106 goto done; 111 goto done;
107} 112}
108 113
@@ -110,12 +115,13 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
110 struct tcf_hashinfo *hinfo) 115 struct tcf_hashinfo *hinfo)
111{ 116{
112 struct tcf_common *p, *s_p; 117 struct tcf_common *p, *s_p;
113 struct rtattr *r ; 118 struct nlattr *nest;
114 int i= 0, n_i = 0; 119 int i= 0, n_i = 0;
115 120
116 r = (struct rtattr *)skb_tail_pointer(skb); 121 nest = nla_nest_start(skb, a->order);
117 RTA_PUT(skb, a->order, 0, NULL); 122 if (nest == NULL)
118 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind); 123 goto nla_put_failure;
124 NLA_PUT_STRING(skb, TCA_KIND, a->ops->kind);
119 for (i = 0; i < (hinfo->hmask + 1); i++) { 125 for (i = 0; i < (hinfo->hmask + 1); i++) {
120 p = hinfo->htab[tcf_hash(i, hinfo->hmask)]; 126 p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
121 127
@@ -127,12 +133,12 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
127 p = s_p; 133 p = s_p;
128 } 134 }
129 } 135 }
130 RTA_PUT(skb, TCA_FCNT, 4, &n_i); 136 NLA_PUT_U32(skb, TCA_FCNT, n_i);
131 r->rta_len = skb_tail_pointer(skb) - (u8 *)r; 137 nla_nest_end(skb, nest);
132 138
133 return n_i; 139 return n_i;
134rtattr_failure: 140nla_put_failure:
135 nlmsg_trim(skb, r); 141 nla_nest_cancel(skb, nest);
136 return -EINVAL; 142 return -EINVAL;
137} 143}
138 144
@@ -209,7 +215,7 @@ struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind,
209} 215}
210EXPORT_SYMBOL(tcf_hash_check); 216EXPORT_SYMBOL(tcf_hash_check);
211 217
212struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo) 218struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo)
213{ 219{
214 struct tcf_common *p = kzalloc(size, GFP_KERNEL); 220 struct tcf_common *p = kzalloc(size, GFP_KERNEL);
215 221
@@ -261,6 +267,7 @@ int tcf_register_action(struct tc_action_ops *act)
261 write_unlock(&act_mod_lock); 267 write_unlock(&act_mod_lock);
262 return 0; 268 return 0;
263} 269}
270EXPORT_SYMBOL(tcf_register_action);
264 271
265int tcf_unregister_action(struct tc_action_ops *act) 272int tcf_unregister_action(struct tc_action_ops *act)
266{ 273{
@@ -279,6 +286,7 @@ int tcf_unregister_action(struct tc_action_ops *act)
279 write_unlock(&act_mod_lock); 286 write_unlock(&act_mod_lock);
280 return err; 287 return err;
281} 288}
289EXPORT_SYMBOL(tcf_unregister_action);
282 290
283/* lookup by name */ 291/* lookup by name */
284static struct tc_action_ops *tc_lookup_action_n(char *kind) 292static struct tc_action_ops *tc_lookup_action_n(char *kind)
@@ -301,15 +309,15 @@ static struct tc_action_ops *tc_lookup_action_n(char *kind)
301 return a; 309 return a;
302} 310}
303 311
304/* lookup by rtattr */ 312/* lookup by nlattr */
305static struct tc_action_ops *tc_lookup_action(struct rtattr *kind) 313static struct tc_action_ops *tc_lookup_action(struct nlattr *kind)
306{ 314{
307 struct tc_action_ops *a = NULL; 315 struct tc_action_ops *a = NULL;
308 316
309 if (kind) { 317 if (kind) {
310 read_lock(&act_mod_lock); 318 read_lock(&act_mod_lock);
311 for (a = act_base; a; a = a->next) { 319 for (a = act_base; a; a = a->next) {
312 if (rtattr_strcmp(kind, a->kind) == 0) { 320 if (nla_strcmp(kind, a->kind) == 0) {
313 if (!try_module_get(a->owner)) { 321 if (!try_module_get(a->owner)) {
314 read_unlock(&act_mod_lock); 322 read_unlock(&act_mod_lock);
315 return NULL; 323 return NULL;
@@ -375,6 +383,7 @@ repeat:
375exec_done: 383exec_done:
376 return ret; 384 return ret;
377} 385}
386EXPORT_SYMBOL(tcf_action_exec);
378 387
379void tcf_action_destroy(struct tc_action *act, int bind) 388void tcf_action_destroy(struct tc_action *act, int bind)
380{ 389{
@@ -409,73 +418,77 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
409{ 418{
410 int err = -EINVAL; 419 int err = -EINVAL;
411 unsigned char *b = skb_tail_pointer(skb); 420 unsigned char *b = skb_tail_pointer(skb);
412 struct rtattr *r; 421 struct nlattr *nest;
413 422
414 if (a->ops == NULL || a->ops->dump == NULL) 423 if (a->ops == NULL || a->ops->dump == NULL)
415 return err; 424 return err;
416 425
417 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind); 426 NLA_PUT_STRING(skb, TCA_KIND, a->ops->kind);
418 if (tcf_action_copy_stats(skb, a, 0)) 427 if (tcf_action_copy_stats(skb, a, 0))
419 goto rtattr_failure; 428 goto nla_put_failure;
420 r = (struct rtattr *)skb_tail_pointer(skb); 429 nest = nla_nest_start(skb, TCA_OPTIONS);
421 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 430 if (nest == NULL)
431 goto nla_put_failure;
422 if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) { 432 if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) {
423 r->rta_len = skb_tail_pointer(skb) - (u8 *)r; 433 nla_nest_end(skb, nest);
424 return err; 434 return err;
425 } 435 }
426 436
427rtattr_failure: 437nla_put_failure:
428 nlmsg_trim(skb, b); 438 nlmsg_trim(skb, b);
429 return -1; 439 return -1;
430} 440}
441EXPORT_SYMBOL(tcf_action_dump_1);
431 442
432int 443int
433tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref) 444tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
434{ 445{
435 struct tc_action *a; 446 struct tc_action *a;
436 int err = -EINVAL; 447 int err = -EINVAL;
437 unsigned char *b = skb_tail_pointer(skb); 448 struct nlattr *nest;
438 struct rtattr *r ;
439 449
440 while ((a = act) != NULL) { 450 while ((a = act) != NULL) {
441 r = (struct rtattr *)skb_tail_pointer(skb);
442 act = a->next; 451 act = a->next;
443 RTA_PUT(skb, a->order, 0, NULL); 452 nest = nla_nest_start(skb, a->order);
453 if (nest == NULL)
454 goto nla_put_failure;
444 err = tcf_action_dump_1(skb, a, bind, ref); 455 err = tcf_action_dump_1(skb, a, bind, ref);
445 if (err < 0) 456 if (err < 0)
446 goto errout; 457 goto errout;
447 r->rta_len = skb_tail_pointer(skb) - (u8 *)r; 458 nla_nest_end(skb, nest);
448 } 459 }
449 460
450 return 0; 461 return 0;
451 462
452rtattr_failure: 463nla_put_failure:
453 err = -EINVAL; 464 err = -EINVAL;
454errout: 465errout:
455 nlmsg_trim(skb, b); 466 nla_nest_cancel(skb, nest);
456 return err; 467 return err;
457} 468}
458 469
459struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est, 470struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
460 char *name, int ovr, int bind, int *err) 471 char *name, int ovr, int bind)
461{ 472{
462 struct tc_action *a; 473 struct tc_action *a;
463 struct tc_action_ops *a_o; 474 struct tc_action_ops *a_o;
464 char act_name[IFNAMSIZ]; 475 char act_name[IFNAMSIZ];
465 struct rtattr *tb[TCA_ACT_MAX+1]; 476 struct nlattr *tb[TCA_ACT_MAX+1];
466 struct rtattr *kind; 477 struct nlattr *kind;
467 478 int err;
468 *err = -EINVAL;
469 479
470 if (name == NULL) { 480 if (name == NULL) {
471 if (rtattr_parse_nested(tb, TCA_ACT_MAX, rta) < 0) 481 err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
482 if (err < 0)
472 goto err_out; 483 goto err_out;
473 kind = tb[TCA_ACT_KIND-1]; 484 err = -EINVAL;
485 kind = tb[TCA_ACT_KIND];
474 if (kind == NULL) 486 if (kind == NULL)
475 goto err_out; 487 goto err_out;
476 if (rtattr_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ) 488 if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ)
477 goto err_out; 489 goto err_out;
478 } else { 490 } else {
491 err = -EINVAL;
479 if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) 492 if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ)
480 goto err_out; 493 goto err_out;
481 } 494 }
@@ -496,36 +509,35 @@ struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est,
496 * indicate this using -EAGAIN. 509 * indicate this using -EAGAIN.
497 */ 510 */
498 if (a_o != NULL) { 511 if (a_o != NULL) {
499 *err = -EAGAIN; 512 err = -EAGAIN;
500 goto err_mod; 513 goto err_mod;
501 } 514 }
502#endif 515#endif
503 *err = -ENOENT; 516 err = -ENOENT;
504 goto err_out; 517 goto err_out;
505 } 518 }
506 519
507 *err = -ENOMEM; 520 err = -ENOMEM;
508 a = kzalloc(sizeof(*a), GFP_KERNEL); 521 a = kzalloc(sizeof(*a), GFP_KERNEL);
509 if (a == NULL) 522 if (a == NULL)
510 goto err_mod; 523 goto err_mod;
511 524
512 /* backward compatibility for policer */ 525 /* backward compatibility for policer */
513 if (name == NULL) 526 if (name == NULL)
514 *err = a_o->init(tb[TCA_ACT_OPTIONS-1], est, a, ovr, bind); 527 err = a_o->init(tb[TCA_ACT_OPTIONS], est, a, ovr, bind);
515 else 528 else
516 *err = a_o->init(rta, est, a, ovr, bind); 529 err = a_o->init(nla, est, a, ovr, bind);
517 if (*err < 0) 530 if (err < 0)
518 goto err_free; 531 goto err_free;
519 532
520 /* module count goes up only when brand new policy is created 533 /* module count goes up only when brand new policy is created
521 if it exists and is only bound to in a_o->init() then 534 if it exists and is only bound to in a_o->init() then
522 ACT_P_CREATED is not returned (a zero is). 535 ACT_P_CREATED is not returned (a zero is).
523 */ 536 */
524 if (*err != ACT_P_CREATED) 537 if (err != ACT_P_CREATED)
525 module_put(a_o->owner); 538 module_put(a_o->owner);
526 a->ops = a_o; 539 a->ops = a_o;
527 540
528 *err = 0;
529 return a; 541 return a;
530 542
531err_free: 543err_free:
@@ -533,26 +545,26 @@ err_free:
533err_mod: 545err_mod:
534 module_put(a_o->owner); 546 module_put(a_o->owner);
535err_out: 547err_out:
536 return NULL; 548 return ERR_PTR(err);
537} 549}
538 550
539struct tc_action *tcf_action_init(struct rtattr *rta, struct rtattr *est, 551struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est,
540 char *name, int ovr, int bind, int *err) 552 char *name, int ovr, int bind)
541{ 553{
542 struct rtattr *tb[TCA_ACT_MAX_PRIO+1]; 554 struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
543 struct tc_action *head = NULL, *act, *act_prev = NULL; 555 struct tc_action *head = NULL, *act, *act_prev = NULL;
556 int err;
544 int i; 557 int i;
545 558
546 if (rtattr_parse_nested(tb, TCA_ACT_MAX_PRIO, rta) < 0) { 559 err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
547 *err = -EINVAL; 560 if (err < 0)
548 return head; 561 return ERR_PTR(err);
549 }
550 562
551 for (i=0; i < TCA_ACT_MAX_PRIO && tb[i]; i++) { 563 for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
552 act = tcf_action_init_1(tb[i], est, name, ovr, bind, err); 564 act = tcf_action_init_1(tb[i], est, name, ovr, bind);
553 if (act == NULL) 565 if (IS_ERR(act))
554 goto err; 566 goto err;
555 act->order = i+1; 567 act->order = i;
556 568
557 if (head == NULL) 569 if (head == NULL)
558 head = act; 570 head = act;
@@ -565,7 +577,7 @@ struct tc_action *tcf_action_init(struct rtattr *rta, struct rtattr *est,
565err: 577err:
566 if (head != NULL) 578 if (head != NULL)
567 tcf_action_destroy(head, bind); 579 tcf_action_destroy(head, bind);
568 return NULL; 580 return act;
569} 581}
570 582
571int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, 583int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
@@ -619,7 +631,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
619 struct tcamsg *t; 631 struct tcamsg *t;
620 struct nlmsghdr *nlh; 632 struct nlmsghdr *nlh;
621 unsigned char *b = skb_tail_pointer(skb); 633 unsigned char *b = skb_tail_pointer(skb);
622 struct rtattr *x; 634 struct nlattr *nest;
623 635
624 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags); 636 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
625 637
@@ -628,18 +640,19 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
628 t->tca__pad1 = 0; 640 t->tca__pad1 = 0;
629 t->tca__pad2 = 0; 641 t->tca__pad2 = 0;
630 642
631 x = (struct rtattr *)skb_tail_pointer(skb); 643 nest = nla_nest_start(skb, TCA_ACT_TAB);
632 RTA_PUT(skb, TCA_ACT_TAB, 0, NULL); 644 if (nest == NULL)
645 goto nla_put_failure;
633 646
634 if (tcf_action_dump(skb, a, bind, ref) < 0) 647 if (tcf_action_dump(skb, a, bind, ref) < 0)
635 goto rtattr_failure; 648 goto nla_put_failure;
636 649
637 x->rta_len = skb_tail_pointer(skb) - (u8 *)x; 650 nla_nest_end(skb, nest);
638 651
639 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 652 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
640 return skb->len; 653 return skb->len;
641 654
642rtattr_failure: 655nla_put_failure:
643nlmsg_failure: 656nlmsg_failure:
644 nlmsg_trim(skb, b); 657 nlmsg_trim(skb, b);
645 return -1; 658 return -1;
@@ -658,48 +671,51 @@ act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event)
658 return -EINVAL; 671 return -EINVAL;
659 } 672 }
660 673
661 return rtnl_unicast(skb, pid); 674 return rtnl_unicast(skb, &init_net, pid);
662} 675}
663 676
664static struct tc_action * 677static struct tc_action *
665tcf_action_get_1(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int *err) 678tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
666{ 679{
667 struct rtattr *tb[TCA_ACT_MAX+1]; 680 struct nlattr *tb[TCA_ACT_MAX+1];
668 struct tc_action *a; 681 struct tc_action *a;
669 int index; 682 int index;
683 int err;
670 684
671 *err = -EINVAL; 685 err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
672 if (rtattr_parse_nested(tb, TCA_ACT_MAX, rta) < 0) 686 if (err < 0)
673 return NULL; 687 goto err_out;
674 688
675 if (tb[TCA_ACT_INDEX - 1] == NULL || 689 err = -EINVAL;
676 RTA_PAYLOAD(tb[TCA_ACT_INDEX - 1]) < sizeof(index)) 690 if (tb[TCA_ACT_INDEX] == NULL ||
677 return NULL; 691 nla_len(tb[TCA_ACT_INDEX]) < sizeof(index))
678 index = *(int *)RTA_DATA(tb[TCA_ACT_INDEX - 1]); 692 goto err_out;
693 index = nla_get_u32(tb[TCA_ACT_INDEX]);
679 694
680 *err = -ENOMEM; 695 err = -ENOMEM;
681 a = kzalloc(sizeof(struct tc_action), GFP_KERNEL); 696 a = kzalloc(sizeof(struct tc_action), GFP_KERNEL);
682 if (a == NULL) 697 if (a == NULL)
683 return NULL; 698 goto err_out;
684 699
685 *err = -EINVAL; 700 err = -EINVAL;
686 a->ops = tc_lookup_action(tb[TCA_ACT_KIND - 1]); 701 a->ops = tc_lookup_action(tb[TCA_ACT_KIND]);
687 if (a->ops == NULL) 702 if (a->ops == NULL)
688 goto err_free; 703 goto err_free;
689 if (a->ops->lookup == NULL) 704 if (a->ops->lookup == NULL)
690 goto err_mod; 705 goto err_mod;
691 *err = -ENOENT; 706 err = -ENOENT;
692 if (a->ops->lookup(a, index) == 0) 707 if (a->ops->lookup(a, index) == 0)
693 goto err_mod; 708 goto err_mod;
694 709
695 module_put(a->ops->owner); 710 module_put(a->ops->owner);
696 *err = 0;
697 return a; 711 return a;
712
698err_mod: 713err_mod:
699 module_put(a->ops->owner); 714 module_put(a->ops->owner);
700err_free: 715err_free:
701 kfree(a); 716 kfree(a);
702 return NULL; 717err_out:
718 return ERR_PTR(err);
703} 719}
704 720
705static void cleanup_a(struct tc_action *act) 721static void cleanup_a(struct tc_action *act)
@@ -725,16 +741,16 @@ static struct tc_action *create_a(int i)
725 return act; 741 return act;
726} 742}
727 743
728static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid) 744static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
729{ 745{
730 struct sk_buff *skb; 746 struct sk_buff *skb;
731 unsigned char *b; 747 unsigned char *b;
732 struct nlmsghdr *nlh; 748 struct nlmsghdr *nlh;
733 struct tcamsg *t; 749 struct tcamsg *t;
734 struct netlink_callback dcb; 750 struct netlink_callback dcb;
735 struct rtattr *x; 751 struct nlattr *nest;
736 struct rtattr *tb[TCA_ACT_MAX+1]; 752 struct nlattr *tb[TCA_ACT_MAX+1];
737 struct rtattr *kind; 753 struct nlattr *kind;
738 struct tc_action *a = create_a(0); 754 struct tc_action *a = create_a(0);
739 int err = -EINVAL; 755 int err = -EINVAL;
740 756
@@ -752,10 +768,12 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
752 768
753 b = skb_tail_pointer(skb); 769 b = skb_tail_pointer(skb);
754 770
755 if (rtattr_parse_nested(tb, TCA_ACT_MAX, rta) < 0) 771 err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
772 if (err < 0)
756 goto err_out; 773 goto err_out;
757 774
758 kind = tb[TCA_ACT_KIND-1]; 775 err = -EINVAL;
776 kind = tb[TCA_ACT_KIND];
759 a->ops = tc_lookup_action(kind); 777 a->ops = tc_lookup_action(kind);
760 if (a->ops == NULL) 778 if (a->ops == NULL)
761 goto err_out; 779 goto err_out;
@@ -766,26 +784,27 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
766 t->tca__pad1 = 0; 784 t->tca__pad1 = 0;
767 t->tca__pad2 = 0; 785 t->tca__pad2 = 0;
768 786
769 x = (struct rtattr *)skb_tail_pointer(skb); 787 nest = nla_nest_start(skb, TCA_ACT_TAB);
770 RTA_PUT(skb, TCA_ACT_TAB, 0, NULL); 788 if (nest == NULL)
789 goto nla_put_failure;
771 790
772 err = a->ops->walk(skb, &dcb, RTM_DELACTION, a); 791 err = a->ops->walk(skb, &dcb, RTM_DELACTION, a);
773 if (err < 0) 792 if (err < 0)
774 goto rtattr_failure; 793 goto nla_put_failure;
775 794
776 x->rta_len = skb_tail_pointer(skb) - (u8 *)x; 795 nla_nest_end(skb, nest);
777 796
778 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 797 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
779 nlh->nlmsg_flags |= NLM_F_ROOT; 798 nlh->nlmsg_flags |= NLM_F_ROOT;
780 module_put(a->ops->owner); 799 module_put(a->ops->owner);
781 kfree(a); 800 kfree(a);
782 err = rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); 801 err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
783 if (err > 0) 802 if (err > 0)
784 return 0; 803 return 0;
785 804
786 return err; 805 return err;
787 806
788rtattr_failure: 807nla_put_failure:
789nlmsg_failure: 808nlmsg_failure:
790 module_put(a->ops->owner); 809 module_put(a->ops->owner);
791err_out: 810err_out:
@@ -795,25 +814,28 @@ err_out:
795} 814}
796 815
797static int 816static int
798tca_action_gd(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int event) 817tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event)
799{ 818{
800 int i, ret = 0; 819 int i, ret;
801 struct rtattr *tb[TCA_ACT_MAX_PRIO+1]; 820 struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
802 struct tc_action *head = NULL, *act, *act_prev = NULL; 821 struct tc_action *head = NULL, *act, *act_prev = NULL;
803 822
804 if (rtattr_parse_nested(tb, TCA_ACT_MAX_PRIO, rta) < 0) 823 ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
805 return -EINVAL; 824 if (ret < 0)
825 return ret;
806 826
807 if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) { 827 if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) {
808 if (tb[0] != NULL && tb[1] == NULL) 828 if (tb[0] != NULL && tb[1] == NULL)
809 return tca_action_flush(tb[0], n, pid); 829 return tca_action_flush(tb[0], n, pid);
810 } 830 }
811 831
812 for (i=0; i < TCA_ACT_MAX_PRIO && tb[i]; i++) { 832 for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
813 act = tcf_action_get_1(tb[i], n, pid, &ret); 833 act = tcf_action_get_1(tb[i], n, pid);
814 if (act == NULL) 834 if (IS_ERR(act)) {
835 ret = PTR_ERR(act);
815 goto err; 836 goto err;
816 act->order = i+1; 837 }
838 act->order = i;
817 839
818 if (head == NULL) 840 if (head == NULL)
819 head = act; 841 head = act;
@@ -842,7 +864,7 @@ tca_action_gd(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int event)
842 864
843 /* now do the delete */ 865 /* now do the delete */
844 tcf_action_destroy(head, 0); 866 tcf_action_destroy(head, 0);
845 ret = rtnetlink_send(skb, pid, RTNLGRP_TC, 867 ret = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC,
846 n->nlmsg_flags&NLM_F_ECHO); 868 n->nlmsg_flags&NLM_F_ECHO);
847 if (ret > 0) 869 if (ret > 0)
848 return 0; 870 return 0;
@@ -859,7 +881,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
859 struct tcamsg *t; 881 struct tcamsg *t;
860 struct nlmsghdr *nlh; 882 struct nlmsghdr *nlh;
861 struct sk_buff *skb; 883 struct sk_buff *skb;
862 struct rtattr *x; 884 struct nlattr *nest;
863 unsigned char *b; 885 unsigned char *b;
864 int err = 0; 886 int err = 0;
865 887
@@ -875,23 +897,24 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
875 t->tca__pad1 = 0; 897 t->tca__pad1 = 0;
876 t->tca__pad2 = 0; 898 t->tca__pad2 = 0;
877 899
878 x = (struct rtattr *)skb_tail_pointer(skb); 900 nest = nla_nest_start(skb, TCA_ACT_TAB);
879 RTA_PUT(skb, TCA_ACT_TAB, 0, NULL); 901 if (nest == NULL)
902 goto nla_put_failure;
880 903
881 if (tcf_action_dump(skb, a, 0, 0) < 0) 904 if (tcf_action_dump(skb, a, 0, 0) < 0)
882 goto rtattr_failure; 905 goto nla_put_failure;
883 906
884 x->rta_len = skb_tail_pointer(skb) - (u8 *)x; 907 nla_nest_end(skb, nest);
885 908
886 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 909 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
887 NETLINK_CB(skb).dst_group = RTNLGRP_TC; 910 NETLINK_CB(skb).dst_group = RTNLGRP_TC;
888 911
889 err = rtnetlink_send(skb, pid, RTNLGRP_TC, flags&NLM_F_ECHO); 912 err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
890 if (err > 0) 913 if (err > 0)
891 err = 0; 914 err = 0;
892 return err; 915 return err;
893 916
894rtattr_failure: 917nla_put_failure:
895nlmsg_failure: 918nlmsg_failure:
896 kfree_skb(skb); 919 kfree_skb(skb);
897 return -1; 920 return -1;
@@ -899,16 +922,20 @@ nlmsg_failure:
899 922
900 923
901static int 924static int
902tcf_action_add(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int ovr) 925tcf_action_add(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int ovr)
903{ 926{
904 int ret = 0; 927 int ret = 0;
905 struct tc_action *act; 928 struct tc_action *act;
906 struct tc_action *a; 929 struct tc_action *a;
907 u32 seq = n->nlmsg_seq; 930 u32 seq = n->nlmsg_seq;
908 931
909 act = tcf_action_init(rta, NULL, NULL, ovr, 0, &ret); 932 act = tcf_action_init(nla, NULL, NULL, ovr, 0);
910 if (act == NULL) 933 if (act == NULL)
911 goto done; 934 goto done;
935 if (IS_ERR(act)) {
936 ret = PTR_ERR(act);
937 goto done;
938 }
912 939
913 /* dump then free all the actions after update; inserted policy 940 /* dump then free all the actions after update; inserted policy
914 * stays intact 941 * stays intact
@@ -924,11 +951,19 @@ done:
924 951
925static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 952static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
926{ 953{
927 struct rtattr **tca = arg; 954 struct net *net = skb->sk->sk_net;
955 struct nlattr *tca[TCA_ACT_MAX + 1];
928 u32 pid = skb ? NETLINK_CB(skb).pid : 0; 956 u32 pid = skb ? NETLINK_CB(skb).pid : 0;
929 int ret = 0, ovr = 0; 957 int ret = 0, ovr = 0;
930 958
931 if (tca[TCA_ACT_TAB-1] == NULL) { 959 if (net != &init_net)
960 return -EINVAL;
961
962 ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL);
963 if (ret < 0)
964 return ret;
965
966 if (tca[TCA_ACT_TAB] == NULL) {
932 printk("tc_ctl_action: received NO action attribs\n"); 967 printk("tc_ctl_action: received NO action attribs\n");
933 return -EINVAL; 968 return -EINVAL;
934 } 969 }
@@ -946,15 +981,15 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
946 if (n->nlmsg_flags&NLM_F_REPLACE) 981 if (n->nlmsg_flags&NLM_F_REPLACE)
947 ovr = 1; 982 ovr = 1;
948replay: 983replay:
949 ret = tcf_action_add(tca[TCA_ACT_TAB-1], n, pid, ovr); 984 ret = tcf_action_add(tca[TCA_ACT_TAB], n, pid, ovr);
950 if (ret == -EAGAIN) 985 if (ret == -EAGAIN)
951 goto replay; 986 goto replay;
952 break; 987 break;
953 case RTM_DELACTION: 988 case RTM_DELACTION:
954 ret = tca_action_gd(tca[TCA_ACT_TAB-1], n, pid, RTM_DELACTION); 989 ret = tca_action_gd(tca[TCA_ACT_TAB], n, pid, RTM_DELACTION);
955 break; 990 break;
956 case RTM_GETACTION: 991 case RTM_GETACTION:
957 ret = tca_action_gd(tca[TCA_ACT_TAB-1], n, pid, RTM_GETACTION); 992 ret = tca_action_gd(tca[TCA_ACT_TAB], n, pid, RTM_GETACTION);
958 break; 993 break;
959 default: 994 default:
960 BUG(); 995 BUG();
@@ -963,33 +998,30 @@ replay:
963 return ret; 998 return ret;
964} 999}
965 1000
966static struct rtattr * 1001static struct nlattr *
967find_dump_kind(struct nlmsghdr *n) 1002find_dump_kind(struct nlmsghdr *n)
968{ 1003{
969 struct rtattr *tb1, *tb2[TCA_ACT_MAX+1]; 1004 struct nlattr *tb1, *tb2[TCA_ACT_MAX+1];
970 struct rtattr *tb[TCA_ACT_MAX_PRIO + 1]; 1005 struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
971 struct rtattr *rta[TCAA_MAX + 1]; 1006 struct nlattr *nla[TCAA_MAX + 1];
972 struct rtattr *kind; 1007 struct nlattr *kind;
973 int min_len = NLMSG_LENGTH(sizeof(struct tcamsg)); 1008
974 int attrlen = n->nlmsg_len - NLMSG_ALIGN(min_len); 1009 if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX, NULL) < 0)
975 struct rtattr *attr = (void *) n + NLMSG_ALIGN(min_len);
976
977 if (rtattr_parse(rta, TCAA_MAX, attr, attrlen) < 0)
978 return NULL; 1010 return NULL;
979 tb1 = rta[TCA_ACT_TAB - 1]; 1011 tb1 = nla[TCA_ACT_TAB];
980 if (tb1 == NULL) 1012 if (tb1 == NULL)
981 return NULL; 1013 return NULL;
982 1014
983 if (rtattr_parse(tb, TCA_ACT_MAX_PRIO, RTA_DATA(tb1), 1015 if (nla_parse(tb, TCA_ACT_MAX_PRIO, nla_data(tb1),
984 NLMSG_ALIGN(RTA_PAYLOAD(tb1))) < 0) 1016 NLMSG_ALIGN(nla_len(tb1)), NULL) < 0)
985 return NULL;
986 if (tb[0] == NULL)
987 return NULL; 1017 return NULL;
988 1018
989 if (rtattr_parse(tb2, TCA_ACT_MAX, RTA_DATA(tb[0]), 1019 if (tb[1] == NULL)
990 RTA_PAYLOAD(tb[0])) < 0)
991 return NULL; 1020 return NULL;
992 kind = tb2[TCA_ACT_KIND-1]; 1021 if (nla_parse(tb2, TCA_ACT_MAX, nla_data(tb[1]),
1022 nla_len(tb[1]), NULL) < 0)
1023 return NULL;
1024 kind = tb2[TCA_ACT_KIND];
993 1025
994 return kind; 1026 return kind;
995} 1027}
@@ -997,14 +1029,18 @@ find_dump_kind(struct nlmsghdr *n)
997static int 1029static int
998tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) 1030tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
999{ 1031{
1032 struct net *net = skb->sk->sk_net;
1000 struct nlmsghdr *nlh; 1033 struct nlmsghdr *nlh;
1001 unsigned char *b = skb_tail_pointer(skb); 1034 unsigned char *b = skb_tail_pointer(skb);
1002 struct rtattr *x; 1035 struct nlattr *nest;
1003 struct tc_action_ops *a_o; 1036 struct tc_action_ops *a_o;
1004 struct tc_action a; 1037 struct tc_action a;
1005 int ret = 0; 1038 int ret = 0;
1006 struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh); 1039 struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh);
1007 struct rtattr *kind = find_dump_kind(cb->nlh); 1040 struct nlattr *kind = find_dump_kind(cb->nlh);
1041
1042 if (net != &init_net)
1043 return 0;
1008 1044
1009 if (kind == NULL) { 1045 if (kind == NULL) {
1010 printk("tc_dump_action: action bad kind\n"); 1046 printk("tc_dump_action: action bad kind\n");
@@ -1021,7 +1057,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
1021 1057
1022 if (a_o->walk == NULL) { 1058 if (a_o->walk == NULL) {
1023 printk("tc_dump_action: %s !capable of dumping table\n", a_o->kind); 1059 printk("tc_dump_action: %s !capable of dumping table\n", a_o->kind);
1024 goto rtattr_failure; 1060 goto nla_put_failure;
1025 } 1061 }
1026 1062
1027 nlh = NLMSG_PUT(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 1063 nlh = NLMSG_PUT(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
@@ -1031,18 +1067,19 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
1031 t->tca__pad1 = 0; 1067 t->tca__pad1 = 0;
1032 t->tca__pad2 = 0; 1068 t->tca__pad2 = 0;
1033 1069
1034 x = (struct rtattr *)skb_tail_pointer(skb); 1070 nest = nla_nest_start(skb, TCA_ACT_TAB);
1035 RTA_PUT(skb, TCA_ACT_TAB, 0, NULL); 1071 if (nest == NULL)
1072 goto nla_put_failure;
1036 1073
1037 ret = a_o->walk(skb, cb, RTM_GETACTION, &a); 1074 ret = a_o->walk(skb, cb, RTM_GETACTION, &a);
1038 if (ret < 0) 1075 if (ret < 0)
1039 goto rtattr_failure; 1076 goto nla_put_failure;
1040 1077
1041 if (ret > 0) { 1078 if (ret > 0) {
1042 x->rta_len = skb_tail_pointer(skb) - (u8 *)x; 1079 nla_nest_end(skb, nest);
1043 ret = skb->len; 1080 ret = skb->len;
1044 } else 1081 } else
1045 nlmsg_trim(skb, x); 1082 nla_nest_cancel(skb, nest);
1046 1083
1047 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 1084 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1048 if (NETLINK_CB(cb->skb).pid && ret) 1085 if (NETLINK_CB(cb->skb).pid && ret)
@@ -1050,7 +1087,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
1050 module_put(a_o->owner); 1087 module_put(a_o->owner);
1051 return skb->len; 1088 return skb->len;
1052 1089
1053rtattr_failure: 1090nla_put_failure:
1054nlmsg_failure: 1091nlmsg_failure:
1055 module_put(a_o->owner); 1092 module_put(a_o->owner);
1056 nlmsg_trim(skb, b); 1093 nlmsg_trim(skb, b);
@@ -1067,8 +1104,3 @@ static int __init tc_action_init(void)
1067} 1104}
1068 1105
1069subsys_initcall(tc_action_init); 1106subsys_initcall(tc_action_init);
1070
1071EXPORT_SYMBOL(tcf_register_action);
1072EXPORT_SYMBOL(tcf_unregister_action);
1073EXPORT_SYMBOL(tcf_action_exec);
1074EXPORT_SYMBOL(tcf_action_dump_1);
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index a9631e426d91..422872c4f14b 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -53,28 +53,34 @@ typedef int (*g_rand)(struct tcf_gact *gact);
53static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ }; 53static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ };
54#endif /* CONFIG_GACT_PROB */ 54#endif /* CONFIG_GACT_PROB */
55 55
56static int tcf_gact_init(struct rtattr *rta, struct rtattr *est, 56static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
57 [TCA_GACT_PARMS] = { .len = sizeof(struct tc_gact) },
58 [TCA_GACT_PROB] = { .len = sizeof(struct tc_gact_p) },
59};
60
61static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
57 struct tc_action *a, int ovr, int bind) 62 struct tc_action *a, int ovr, int bind)
58{ 63{
59 struct rtattr *tb[TCA_GACT_MAX]; 64 struct nlattr *tb[TCA_GACT_MAX + 1];
60 struct tc_gact *parm; 65 struct tc_gact *parm;
61 struct tcf_gact *gact; 66 struct tcf_gact *gact;
62 struct tcf_common *pc; 67 struct tcf_common *pc;
63 int ret = 0; 68 int ret = 0;
69 int err;
64 70
65 if (rta == NULL || rtattr_parse_nested(tb, TCA_GACT_MAX, rta) < 0) 71 if (nla == NULL)
66 return -EINVAL; 72 return -EINVAL;
67 73
68 if (tb[TCA_GACT_PARMS - 1] == NULL || 74 err = nla_parse_nested(tb, TCA_GACT_MAX, nla, gact_policy);
69 RTA_PAYLOAD(tb[TCA_GACT_PARMS - 1]) < sizeof(*parm)) 75 if (err < 0)
76 return err;
77
78 if (tb[TCA_GACT_PARMS] == NULL)
70 return -EINVAL; 79 return -EINVAL;
71 parm = RTA_DATA(tb[TCA_GACT_PARMS - 1]); 80 parm = nla_data(tb[TCA_GACT_PARMS]);
72 81
73 if (tb[TCA_GACT_PROB-1] != NULL) 82#ifndef CONFIG_GACT_PROB
74#ifdef CONFIG_GACT_PROB 83 if (tb[TCA_GACT_PROB] != NULL)
75 if (RTA_PAYLOAD(tb[TCA_GACT_PROB-1]) < sizeof(struct tc_gact_p))
76 return -EINVAL;
77#else
78 return -EOPNOTSUPP; 84 return -EOPNOTSUPP;
79#endif 85#endif
80 86
@@ -97,8 +103,8 @@ static int tcf_gact_init(struct rtattr *rta, struct rtattr *est,
97 spin_lock_bh(&gact->tcf_lock); 103 spin_lock_bh(&gact->tcf_lock);
98 gact->tcf_action = parm->action; 104 gact->tcf_action = parm->action;
99#ifdef CONFIG_GACT_PROB 105#ifdef CONFIG_GACT_PROB
100 if (tb[TCA_GACT_PROB-1] != NULL) { 106 if (tb[TCA_GACT_PROB] != NULL) {
101 struct tc_gact_p *p_parm = RTA_DATA(tb[TCA_GACT_PROB-1]); 107 struct tc_gact_p *p_parm = nla_data(tb[TCA_GACT_PROB]);
102 gact->tcfg_paction = p_parm->paction; 108 gact->tcfg_paction = p_parm->paction;
103 gact->tcfg_pval = p_parm->pval; 109 gact->tcfg_pval = p_parm->pval;
104 gact->tcfg_ptype = p_parm->ptype; 110 gact->tcfg_ptype = p_parm->ptype;
@@ -154,23 +160,23 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
154 opt.refcnt = gact->tcf_refcnt - ref; 160 opt.refcnt = gact->tcf_refcnt - ref;
155 opt.bindcnt = gact->tcf_bindcnt - bind; 161 opt.bindcnt = gact->tcf_bindcnt - bind;
156 opt.action = gact->tcf_action; 162 opt.action = gact->tcf_action;
157 RTA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt); 163 NLA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt);
158#ifdef CONFIG_GACT_PROB 164#ifdef CONFIG_GACT_PROB
159 if (gact->tcfg_ptype) { 165 if (gact->tcfg_ptype) {
160 struct tc_gact_p p_opt; 166 struct tc_gact_p p_opt;
161 p_opt.paction = gact->tcfg_paction; 167 p_opt.paction = gact->tcfg_paction;
162 p_opt.pval = gact->tcfg_pval; 168 p_opt.pval = gact->tcfg_pval;
163 p_opt.ptype = gact->tcfg_ptype; 169 p_opt.ptype = gact->tcfg_ptype;
164 RTA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt); 170 NLA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt);
165 } 171 }
166#endif 172#endif
167 t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install); 173 t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install);
168 t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse); 174 t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse);
169 t.expires = jiffies_to_clock_t(gact->tcf_tm.expires); 175 t.expires = jiffies_to_clock_t(gact->tcf_tm.expires);
170 RTA_PUT(skb, TCA_GACT_TM, sizeof(t), &t); 176 NLA_PUT(skb, TCA_GACT_TM, sizeof(t), &t);
171 return skb->len; 177 return skb->len;
172 178
173rtattr_failure: 179nla_put_failure:
174 nlmsg_trim(skb, b); 180 nlmsg_trim(skb, b);
175 return -1; 181 return -1;
176} 182}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index fa006e06ce33..da696fd3e341 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -92,10 +92,17 @@ static int tcf_ipt_release(struct tcf_ipt *ipt, int bind)
92 return ret; 92 return ret;
93} 93}
94 94
95static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est, 95static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
96 [TCA_IPT_TABLE] = { .type = NLA_STRING, .len = IFNAMSIZ },
97 [TCA_IPT_HOOK] = { .type = NLA_U32 },
98 [TCA_IPT_INDEX] = { .type = NLA_U32 },
99 [TCA_IPT_TARG] = { .len = sizeof(struct ipt_entry_target) },
100};
101
102static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
96 struct tc_action *a, int ovr, int bind) 103 struct tc_action *a, int ovr, int bind)
97{ 104{
98 struct rtattr *tb[TCA_IPT_MAX]; 105 struct nlattr *tb[TCA_IPT_MAX + 1];
99 struct tcf_ipt *ipt; 106 struct tcf_ipt *ipt;
100 struct tcf_common *pc; 107 struct tcf_common *pc;
101 struct ipt_entry_target *td, *t; 108 struct ipt_entry_target *td, *t;
@@ -104,22 +111,24 @@ static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est,
104 u32 hook = 0; 111 u32 hook = 0;
105 u32 index = 0; 112 u32 index = 0;
106 113
107 if (rta == NULL || rtattr_parse_nested(tb, TCA_IPT_MAX, rta) < 0) 114 if (nla == NULL)
108 return -EINVAL; 115 return -EINVAL;
109 116
110 if (tb[TCA_IPT_HOOK-1] == NULL || 117 err = nla_parse_nested(tb, TCA_IPT_MAX, nla, ipt_policy);
111 RTA_PAYLOAD(tb[TCA_IPT_HOOK-1]) < sizeof(u32)) 118 if (err < 0)
119 return err;
120
121 if (tb[TCA_IPT_HOOK] == NULL)
112 return -EINVAL; 122 return -EINVAL;
113 if (tb[TCA_IPT_TARG-1] == NULL || 123 if (tb[TCA_IPT_TARG] == NULL)
114 RTA_PAYLOAD(tb[TCA_IPT_TARG-1]) < sizeof(*t))
115 return -EINVAL; 124 return -EINVAL;
116 td = (struct ipt_entry_target *)RTA_DATA(tb[TCA_IPT_TARG-1]); 125
117 if (RTA_PAYLOAD(tb[TCA_IPT_TARG-1]) < td->u.target_size) 126 td = (struct ipt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
127 if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size)
118 return -EINVAL; 128 return -EINVAL;
119 129
120 if (tb[TCA_IPT_INDEX-1] != NULL && 130 if (tb[TCA_IPT_INDEX] != NULL)
121 RTA_PAYLOAD(tb[TCA_IPT_INDEX-1]) >= sizeof(u32)) 131 index = nla_get_u32(tb[TCA_IPT_INDEX]);
122 index = *(u32 *)RTA_DATA(tb[TCA_IPT_INDEX-1]);
123 132
124 pc = tcf_hash_check(index, a, bind, &ipt_hash_info); 133 pc = tcf_hash_check(index, a, bind, &ipt_hash_info);
125 if (!pc) { 134 if (!pc) {
@@ -136,14 +145,14 @@ static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est,
136 } 145 }
137 ipt = to_ipt(pc); 146 ipt = to_ipt(pc);
138 147
139 hook = *(u32 *)RTA_DATA(tb[TCA_IPT_HOOK-1]); 148 hook = nla_get_u32(tb[TCA_IPT_HOOK]);
140 149
141 err = -ENOMEM; 150 err = -ENOMEM;
142 tname = kmalloc(IFNAMSIZ, GFP_KERNEL); 151 tname = kmalloc(IFNAMSIZ, GFP_KERNEL);
143 if (unlikely(!tname)) 152 if (unlikely(!tname))
144 goto err1; 153 goto err1;
145 if (tb[TCA_IPT_TABLE - 1] == NULL || 154 if (tb[TCA_IPT_TABLE] == NULL ||
146 rtattr_strlcpy(tname, tb[TCA_IPT_TABLE-1], IFNAMSIZ) >= IFNAMSIZ) 155 nla_strlcpy(tname, tb[TCA_IPT_TABLE], IFNAMSIZ) >= IFNAMSIZ)
147 strcpy(tname, "mangle"); 156 strcpy(tname, "mangle");
148 157
149 t = kmemdup(td, td->u.target_size, GFP_KERNEL); 158 t = kmemdup(td, td->u.target_size, GFP_KERNEL);
@@ -243,25 +252,25 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
243 252
244 t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); 253 t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
245 if (unlikely(!t)) 254 if (unlikely(!t))
246 goto rtattr_failure; 255 goto nla_put_failure;
247 256
248 c.bindcnt = ipt->tcf_bindcnt - bind; 257 c.bindcnt = ipt->tcf_bindcnt - bind;
249 c.refcnt = ipt->tcf_refcnt - ref; 258 c.refcnt = ipt->tcf_refcnt - ref;
250 strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name); 259 strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name);
251 260
252 RTA_PUT(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t); 261 NLA_PUT(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t);
253 RTA_PUT(skb, TCA_IPT_INDEX, 4, &ipt->tcf_index); 262 NLA_PUT_U32(skb, TCA_IPT_INDEX, ipt->tcf_index);
254 RTA_PUT(skb, TCA_IPT_HOOK, 4, &ipt->tcfi_hook); 263 NLA_PUT_U32(skb, TCA_IPT_HOOK, ipt->tcfi_hook);
255 RTA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c); 264 NLA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c);
256 RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, ipt->tcfi_tname); 265 NLA_PUT_STRING(skb, TCA_IPT_TABLE, ipt->tcfi_tname);
257 tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install); 266 tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install);
258 tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse); 267 tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse);
259 tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires); 268 tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires);
260 RTA_PUT(skb, TCA_IPT_TM, sizeof (tm), &tm); 269 NLA_PUT(skb, TCA_IPT_TM, sizeof (tm), &tm);
261 kfree(t); 270 kfree(t);
262 return skb->len; 271 return skb->len;
263 272
264rtattr_failure: 273nla_put_failure:
265 nlmsg_trim(skb, b); 274 nlmsg_trim(skb, b);
266 kfree(t); 275 kfree(t);
267 return -1; 276 return -1;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index c3fde9180f9d..1aff005d95cd 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -54,24 +54,31 @@ static inline int tcf_mirred_release(struct tcf_mirred *m, int bind)
54 return 0; 54 return 0;
55} 55}
56 56
57static int tcf_mirred_init(struct rtattr *rta, struct rtattr *est, 57static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
58 [TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) },
59};
60
61static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est,
58 struct tc_action *a, int ovr, int bind) 62 struct tc_action *a, int ovr, int bind)
59{ 63{
60 struct rtattr *tb[TCA_MIRRED_MAX]; 64 struct nlattr *tb[TCA_MIRRED_MAX + 1];
61 struct tc_mirred *parm; 65 struct tc_mirred *parm;
62 struct tcf_mirred *m; 66 struct tcf_mirred *m;
63 struct tcf_common *pc; 67 struct tcf_common *pc;
64 struct net_device *dev = NULL; 68 struct net_device *dev = NULL;
65 int ret = 0; 69 int ret = 0, err;
66 int ok_push = 0; 70 int ok_push = 0;
67 71
68 if (rta == NULL || rtattr_parse_nested(tb, TCA_MIRRED_MAX, rta) < 0) 72 if (nla == NULL)
69 return -EINVAL; 73 return -EINVAL;
70 74
71 if (tb[TCA_MIRRED_PARMS-1] == NULL || 75 err = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy);
72 RTA_PAYLOAD(tb[TCA_MIRRED_PARMS-1]) < sizeof(*parm)) 76 if (err < 0)
77 return err;
78
79 if (tb[TCA_MIRRED_PARMS] == NULL)
73 return -EINVAL; 80 return -EINVAL;
74 parm = RTA_DATA(tb[TCA_MIRRED_PARMS-1]); 81 parm = nla_data(tb[TCA_MIRRED_PARMS]);
75 82
76 if (parm->ifindex) { 83 if (parm->ifindex) {
77 dev = __dev_get_by_index(&init_net, parm->ifindex); 84 dev = __dev_get_by_index(&init_net, parm->ifindex);
@@ -207,14 +214,14 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i
207 opt.bindcnt = m->tcf_bindcnt - bind; 214 opt.bindcnt = m->tcf_bindcnt - bind;
208 opt.eaction = m->tcfm_eaction; 215 opt.eaction = m->tcfm_eaction;
209 opt.ifindex = m->tcfm_ifindex; 216 opt.ifindex = m->tcfm_ifindex;
210 RTA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt); 217 NLA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt);
211 t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install); 218 t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install);
212 t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse); 219 t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse);
213 t.expires = jiffies_to_clock_t(m->tcf_tm.expires); 220 t.expires = jiffies_to_clock_t(m->tcf_tm.expires);
214 RTA_PUT(skb, TCA_MIRRED_TM, sizeof(t), &t); 221 NLA_PUT(skb, TCA_MIRRED_TM, sizeof(t), &t);
215 return skb->len; 222 return skb->len;
216 223
217rtattr_failure: 224nla_put_failure:
218 nlmsg_trim(skb, b); 225 nlmsg_trim(skb, b);
219 return -1; 226 return -1;
220} 227}
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index c96273bcaf9c..0a3c8339767a 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -40,22 +40,29 @@ static struct tcf_hashinfo nat_hash_info = {
40 .lock = &nat_lock, 40 .lock = &nat_lock,
41}; 41};
42 42
43static int tcf_nat_init(struct rtattr *rta, struct rtattr *est, 43static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
44 [TCA_NAT_PARMS] = { .len = sizeof(struct tc_nat) },
45};
46
47static int tcf_nat_init(struct nlattr *nla, struct nlattr *est,
44 struct tc_action *a, int ovr, int bind) 48 struct tc_action *a, int ovr, int bind)
45{ 49{
46 struct rtattr *tb[TCA_NAT_MAX]; 50 struct nlattr *tb[TCA_NAT_MAX + 1];
47 struct tc_nat *parm; 51 struct tc_nat *parm;
48 int ret = 0; 52 int ret = 0, err;
49 struct tcf_nat *p; 53 struct tcf_nat *p;
50 struct tcf_common *pc; 54 struct tcf_common *pc;
51 55
52 if (rta == NULL || rtattr_parse_nested(tb, TCA_NAT_MAX, rta) < 0) 56 if (nla == NULL)
53 return -EINVAL; 57 return -EINVAL;
54 58
55 if (tb[TCA_NAT_PARMS - 1] == NULL || 59 err = nla_parse_nested(tb, TCA_NAT_MAX, nla, nat_policy);
56 RTA_PAYLOAD(tb[TCA_NAT_PARMS - 1]) < sizeof(*parm)) 60 if (err < 0)
61 return err;
62
63 if (tb[TCA_NAT_PARMS] == NULL)
57 return -EINVAL; 64 return -EINVAL;
58 parm = RTA_DATA(tb[TCA_NAT_PARMS - 1]); 65 parm = nla_data(tb[TCA_NAT_PARMS]);
59 66
60 pc = tcf_hash_check(parm->index, a, bind, &nat_hash_info); 67 pc = tcf_hash_check(parm->index, a, bind, &nat_hash_info);
61 if (!pc) { 68 if (!pc) {
@@ -151,7 +158,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
151 else 158 else
152 iph->daddr = new_addr; 159 iph->daddr = new_addr;
153 160
154 nf_csum_replace4(&iph->check, addr, new_addr); 161 csum_replace4(&iph->check, addr, new_addr);
155 } 162 }
156 163
157 ihl = iph->ihl * 4; 164 ihl = iph->ihl * 4;
@@ -169,7 +176,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
169 goto drop; 176 goto drop;
170 177
171 tcph = (void *)(skb_network_header(skb) + ihl); 178 tcph = (void *)(skb_network_header(skb) + ihl);
172 nf_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1); 179 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1);
173 break; 180 break;
174 } 181 }
175 case IPPROTO_UDP: 182 case IPPROTO_UDP:
@@ -184,8 +191,8 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
184 191
185 udph = (void *)(skb_network_header(skb) + ihl); 192 udph = (void *)(skb_network_header(skb) + ihl);
186 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { 193 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
187 nf_proto_csum_replace4(&udph->check, skb, addr, 194 inet_proto_csum_replace4(&udph->check, skb, addr,
188 new_addr, 1); 195 new_addr, 1);
189 if (!udph->check) 196 if (!udph->check)
190 udph->check = CSUM_MANGLED_0; 197 udph->check = CSUM_MANGLED_0;
191 } 198 }
@@ -232,8 +239,8 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
232 else 239 else
233 iph->saddr = new_addr; 240 iph->saddr = new_addr;
234 241
235 nf_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr, 242 inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr,
236 1); 243 1);
237 break; 244 break;
238 } 245 }
239 default: 246 default:
@@ -275,17 +282,17 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
275 opt->refcnt = p->tcf_refcnt - ref; 282 opt->refcnt = p->tcf_refcnt - ref;
276 opt->bindcnt = p->tcf_bindcnt - bind; 283 opt->bindcnt = p->tcf_bindcnt - bind;
277 284
278 RTA_PUT(skb, TCA_NAT_PARMS, s, opt); 285 NLA_PUT(skb, TCA_NAT_PARMS, s, opt);
279 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); 286 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
280 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); 287 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
281 t.expires = jiffies_to_clock_t(p->tcf_tm.expires); 288 t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
282 RTA_PUT(skb, TCA_NAT_TM, sizeof(t), &t); 289 NLA_PUT(skb, TCA_NAT_TM, sizeof(t), &t);
283 290
284 kfree(opt); 291 kfree(opt);
285 292
286 return skb->len; 293 return skb->len;
287 294
288rtattr_failure: 295nla_put_failure:
289 nlmsg_trim(skb, b); 296 nlmsg_trim(skb, b);
290 kfree(opt); 297 kfree(opt);
291 return -1; 298 return -1;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index b46fab5fb323..3cc4cb9e500e 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -33,26 +33,33 @@ static struct tcf_hashinfo pedit_hash_info = {
33 .lock = &pedit_lock, 33 .lock = &pedit_lock,
34}; 34};
35 35
36static int tcf_pedit_init(struct rtattr *rta, struct rtattr *est, 36static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
37 [TCA_PEDIT_PARMS] = { .len = sizeof(struct tcf_pedit) },
38};
39
40static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
37 struct tc_action *a, int ovr, int bind) 41 struct tc_action *a, int ovr, int bind)
38{ 42{
39 struct rtattr *tb[TCA_PEDIT_MAX]; 43 struct nlattr *tb[TCA_PEDIT_MAX + 1];
40 struct tc_pedit *parm; 44 struct tc_pedit *parm;
41 int ret = 0; 45 int ret = 0, err;
42 struct tcf_pedit *p; 46 struct tcf_pedit *p;
43 struct tcf_common *pc; 47 struct tcf_common *pc;
44 struct tc_pedit_key *keys = NULL; 48 struct tc_pedit_key *keys = NULL;
45 int ksize; 49 int ksize;
46 50
47 if (rta == NULL || rtattr_parse_nested(tb, TCA_PEDIT_MAX, rta) < 0) 51 if (nla == NULL)
48 return -EINVAL; 52 return -EINVAL;
49 53
50 if (tb[TCA_PEDIT_PARMS - 1] == NULL || 54 err = nla_parse_nested(tb, TCA_PEDIT_MAX, nla, pedit_policy);
51 RTA_PAYLOAD(tb[TCA_PEDIT_PARMS-1]) < sizeof(*parm)) 55 if (err < 0)
56 return err;
57
58 if (tb[TCA_PEDIT_PARMS] == NULL)
52 return -EINVAL; 59 return -EINVAL;
53 parm = RTA_DATA(tb[TCA_PEDIT_PARMS-1]); 60 parm = nla_data(tb[TCA_PEDIT_PARMS]);
54 ksize = parm->nkeys * sizeof(struct tc_pedit_key); 61 ksize = parm->nkeys * sizeof(struct tc_pedit_key);
55 if (RTA_PAYLOAD(tb[TCA_PEDIT_PARMS-1]) < sizeof(*parm) + ksize) 62 if (nla_len(tb[TCA_PEDIT_PARMS]) < sizeof(*parm) + ksize)
56 return -EINVAL; 63 return -EINVAL;
57 64
58 pc = tcf_hash_check(parm->index, a, bind, &pedit_hash_info); 65 pc = tcf_hash_check(parm->index, a, bind, &pedit_hash_info);
@@ -206,15 +213,15 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
206 opt->refcnt = p->tcf_refcnt - ref; 213 opt->refcnt = p->tcf_refcnt - ref;
207 opt->bindcnt = p->tcf_bindcnt - bind; 214 opt->bindcnt = p->tcf_bindcnt - bind;
208 215
209 RTA_PUT(skb, TCA_PEDIT_PARMS, s, opt); 216 NLA_PUT(skb, TCA_PEDIT_PARMS, s, opt);
210 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); 217 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
211 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); 218 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
212 t.expires = jiffies_to_clock_t(p->tcf_tm.expires); 219 t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
213 RTA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t); 220 NLA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t);
214 kfree(opt); 221 kfree(opt);
215 return skb->len; 222 return skb->len;
216 223
217rtattr_failure: 224nla_put_failure:
218 nlmsg_trim(skb, b); 225 nlmsg_trim(skb, b);
219 kfree(opt); 226 kfree(opt);
220 return -1; 227 return -1;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index a73e3e6d87ea..0898120bbcc0 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -54,7 +54,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
54{ 54{
55 struct tcf_common *p; 55 struct tcf_common *p;
56 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; 56 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
57 struct rtattr *r; 57 struct nlattr *nest;
58 58
59 read_lock_bh(&police_lock); 59 read_lock_bh(&police_lock);
60 60
@@ -69,18 +69,19 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
69 continue; 69 continue;
70 a->priv = p; 70 a->priv = p;
71 a->order = index; 71 a->order = index;
72 r = (struct rtattr *)skb_tail_pointer(skb); 72 nest = nla_nest_start(skb, a->order);
73 RTA_PUT(skb, a->order, 0, NULL); 73 if (nest == NULL)
74 goto nla_put_failure;
74 if (type == RTM_DELACTION) 75 if (type == RTM_DELACTION)
75 err = tcf_action_dump_1(skb, a, 0, 1); 76 err = tcf_action_dump_1(skb, a, 0, 1);
76 else 77 else
77 err = tcf_action_dump_1(skb, a, 0, 0); 78 err = tcf_action_dump_1(skb, a, 0, 0);
78 if (err < 0) { 79 if (err < 0) {
79 index--; 80 index--;
80 nlmsg_trim(skb, r); 81 nla_nest_cancel(skb, nest);
81 goto done; 82 goto done;
82 } 83 }
83 r->rta_len = skb_tail_pointer(skb) - (u8 *)r; 84 nla_nest_end(skb, nest);
84 n_i++; 85 n_i++;
85 } 86 }
86 } 87 }
@@ -90,8 +91,8 @@ done:
90 cb->args[0] += n_i; 91 cb->args[0] += n_i;
91 return n_i; 92 return n_i;
92 93
93rtattr_failure: 94nla_put_failure:
94 nlmsg_trim(skb, r); 95 nla_nest_cancel(skb, nest);
95 goto done; 96 goto done;
96} 97}
97 98
@@ -118,33 +119,37 @@ static void tcf_police_destroy(struct tcf_police *p)
118 BUG_TRAP(0); 119 BUG_TRAP(0);
119} 120}
120 121
121static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, 122static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
123 [TCA_POLICE_RATE] = { .len = TC_RTAB_SIZE },
124 [TCA_POLICE_PEAKRATE] = { .len = TC_RTAB_SIZE },
125 [TCA_POLICE_AVRATE] = { .type = NLA_U32 },
126 [TCA_POLICE_RESULT] = { .type = NLA_U32 },
127};
128
129static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est,
122 struct tc_action *a, int ovr, int bind) 130 struct tc_action *a, int ovr, int bind)
123{ 131{
124 unsigned h; 132 unsigned h;
125 int ret = 0, err; 133 int ret = 0, err;
126 struct rtattr *tb[TCA_POLICE_MAX]; 134 struct nlattr *tb[TCA_POLICE_MAX + 1];
127 struct tc_police *parm; 135 struct tc_police *parm;
128 struct tcf_police *police; 136 struct tcf_police *police;
129 struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; 137 struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
130 int size; 138 int size;
131 139
132 if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) 140 if (nla == NULL)
133 return -EINVAL; 141 return -EINVAL;
134 142
135 if (tb[TCA_POLICE_TBF-1] == NULL) 143 err = nla_parse_nested(tb, TCA_POLICE_MAX, nla, police_policy);
136 return -EINVAL; 144 if (err < 0)
137 size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]); 145 return err;
138 if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
139 return -EINVAL;
140 parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
141 146
142 if (tb[TCA_POLICE_RESULT-1] != NULL && 147 if (tb[TCA_POLICE_TBF] == NULL)
143 RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
144 return -EINVAL; 148 return -EINVAL;
145 if (tb[TCA_POLICE_RESULT-1] != NULL && 149 size = nla_len(tb[TCA_POLICE_TBF]);
146 RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) 150 if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
147 return -EINVAL; 151 return -EINVAL;
152 parm = nla_data(tb[TCA_POLICE_TBF]);
148 153
149 if (parm->index) { 154 if (parm->index) {
150 struct tcf_common *pc; 155 struct tcf_common *pc;
@@ -174,12 +179,12 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
174override: 179override:
175 if (parm->rate.rate) { 180 if (parm->rate.rate) {
176 err = -ENOMEM; 181 err = -ENOMEM;
177 R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); 182 R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]);
178 if (R_tab == NULL) 183 if (R_tab == NULL)
179 goto failure; 184 goto failure;
180 if (parm->peakrate.rate) { 185 if (parm->peakrate.rate) {
181 P_tab = qdisc_get_rtab(&parm->peakrate, 186 P_tab = qdisc_get_rtab(&parm->peakrate,
182 tb[TCA_POLICE_PEAKRATE-1]); 187 tb[TCA_POLICE_PEAKRATE]);
183 if (P_tab == NULL) { 188 if (P_tab == NULL) {
184 qdisc_put_rtab(R_tab); 189 qdisc_put_rtab(R_tab);
185 goto failure; 190 goto failure;
@@ -197,8 +202,8 @@ override:
197 police->tcfp_P_tab = P_tab; 202 police->tcfp_P_tab = P_tab;
198 } 203 }
199 204
200 if (tb[TCA_POLICE_RESULT-1]) 205 if (tb[TCA_POLICE_RESULT])
201 police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); 206 police->tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]);
202 police->tcfp_toks = police->tcfp_burst = parm->burst; 207 police->tcfp_toks = police->tcfp_burst = parm->burst;
203 police->tcfp_mtu = parm->mtu; 208 police->tcfp_mtu = parm->mtu;
204 if (police->tcfp_mtu == 0) { 209 if (police->tcfp_mtu == 0) {
@@ -210,9 +215,8 @@ override:
210 police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); 215 police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
211 police->tcf_action = parm->action; 216 police->tcf_action = parm->action;
212 217
213 if (tb[TCA_POLICE_AVRATE-1]) 218 if (tb[TCA_POLICE_AVRATE])
214 police->tcfp_ewma_rate = 219 police->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
215 *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
216 if (est) 220 if (est)
217 gen_replace_estimator(&police->tcf_bstats, 221 gen_replace_estimator(&police->tcf_bstats,
218 &police->tcf_rate_est, 222 &police->tcf_rate_est,
@@ -332,15 +336,14 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
332 opt.peakrate = police->tcfp_P_tab->rate; 336 opt.peakrate = police->tcfp_P_tab->rate;
333 else 337 else
334 memset(&opt.peakrate, 0, sizeof(opt.peakrate)); 338 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
335 RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); 339 NLA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
336 if (police->tcfp_result) 340 if (police->tcfp_result)
337 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), 341 NLA_PUT_U32(skb, TCA_POLICE_RESULT, police->tcfp_result);
338 &police->tcfp_result);
339 if (police->tcfp_ewma_rate) 342 if (police->tcfp_ewma_rate)
340 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); 343 NLA_PUT_U32(skb, TCA_POLICE_AVRATE, police->tcfp_ewma_rate);
341 return skb->len; 344 return skb->len;
342 345
343rtattr_failure: 346nla_put_failure:
344 nlmsg_trim(skb, b); 347 nlmsg_trim(skb, b);
345 return -1; 348 return -1;
346} 349}
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index fb84ef33d14f..fbde461b716c 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -84,30 +84,37 @@ static int realloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata)
84 return alloc_defdata(d, datalen, defdata); 84 return alloc_defdata(d, datalen, defdata);
85} 85}
86 86
87static int tcf_simp_init(struct rtattr *rta, struct rtattr *est, 87static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
88 [TCA_DEF_PARMS] = { .len = sizeof(struct tc_defact) },
89};
90
91static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
88 struct tc_action *a, int ovr, int bind) 92 struct tc_action *a, int ovr, int bind)
89{ 93{
90 struct rtattr *tb[TCA_DEF_MAX]; 94 struct nlattr *tb[TCA_DEF_MAX + 1];
91 struct tc_defact *parm; 95 struct tc_defact *parm;
92 struct tcf_defact *d; 96 struct tcf_defact *d;
93 struct tcf_common *pc; 97 struct tcf_common *pc;
94 void *defdata; 98 void *defdata;
95 u32 datalen = 0; 99 u32 datalen = 0;
96 int ret = 0; 100 int ret = 0, err;
97 101
98 if (rta == NULL || rtattr_parse_nested(tb, TCA_DEF_MAX, rta) < 0) 102 if (nla == NULL)
99 return -EINVAL; 103 return -EINVAL;
100 104
101 if (tb[TCA_DEF_PARMS - 1] == NULL || 105 err = nla_parse_nested(tb, TCA_DEF_MAX, nla, NULL);
102 RTA_PAYLOAD(tb[TCA_DEF_PARMS - 1]) < sizeof(*parm)) 106 if (err < 0)
107 return err;
108
109 if (tb[TCA_DEF_PARMS] == NULL)
103 return -EINVAL; 110 return -EINVAL;
104 111
105 parm = RTA_DATA(tb[TCA_DEF_PARMS - 1]); 112 parm = nla_data(tb[TCA_DEF_PARMS]);
106 defdata = RTA_DATA(tb[TCA_DEF_DATA - 1]); 113 defdata = nla_data(tb[TCA_DEF_DATA]);
107 if (defdata == NULL) 114 if (defdata == NULL)
108 return -EINVAL; 115 return -EINVAL;
109 116
110 datalen = RTA_PAYLOAD(tb[TCA_DEF_DATA - 1]); 117 datalen = nla_len(tb[TCA_DEF_DATA]);
111 if (datalen <= 0) 118 if (datalen <= 0)
112 return -EINVAL; 119 return -EINVAL;
113 120
@@ -164,15 +171,15 @@ static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
164 opt.refcnt = d->tcf_refcnt - ref; 171 opt.refcnt = d->tcf_refcnt - ref;
165 opt.bindcnt = d->tcf_bindcnt - bind; 172 opt.bindcnt = d->tcf_bindcnt - bind;
166 opt.action = d->tcf_action; 173 opt.action = d->tcf_action;
167 RTA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt); 174 NLA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt);
168 RTA_PUT(skb, TCA_DEF_DATA, d->tcfd_datalen, d->tcfd_defdata); 175 NLA_PUT(skb, TCA_DEF_DATA, d->tcfd_datalen, d->tcfd_defdata);
169 t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); 176 t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
170 t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); 177 t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
171 t.expires = jiffies_to_clock_t(d->tcf_tm.expires); 178 t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
172 RTA_PUT(skb, TCA_DEF_TM, sizeof(t), &t); 179 NLA_PUT(skb, TCA_DEF_TM, sizeof(t), &t);
173 return skb->len; 180 return skb->len;
174 181
175rtattr_failure: 182nla_put_failure:
176 nlmsg_trim(skb, b); 183 nlmsg_trim(skb, b);
177 return -1; 184 return -1;
178} 185}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 03657976fd50..3377ca0d0a0c 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -23,33 +23,30 @@
23#include <linux/init.h> 23#include <linux/init.h>
24#include <linux/kmod.h> 24#include <linux/kmod.h>
25#include <linux/netlink.h> 25#include <linux/netlink.h>
26#include <linux/err.h>
27#include <net/net_namespace.h>
28#include <net/sock.h>
26#include <net/netlink.h> 29#include <net/netlink.h>
27#include <net/pkt_sched.h> 30#include <net/pkt_sched.h>
28#include <net/pkt_cls.h> 31#include <net/pkt_cls.h>
29 32
30#if 0 /* control */
31#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
32#else
33#define DPRINTK(format,args...)
34#endif
35
36/* The list of all installed classifier types */ 33/* The list of all installed classifier types */
37 34
38static struct tcf_proto_ops *tcf_proto_base; 35static struct tcf_proto_ops *tcf_proto_base __read_mostly;
39 36
40/* Protects list of registered TC modules. It is pure SMP lock. */ 37/* Protects list of registered TC modules. It is pure SMP lock. */
41static DEFINE_RWLOCK(cls_mod_lock); 38static DEFINE_RWLOCK(cls_mod_lock);
42 39
43/* Find classifier type by string name */ 40/* Find classifier type by string name */
44 41
45static struct tcf_proto_ops * tcf_proto_lookup_ops(struct rtattr *kind) 42static struct tcf_proto_ops *tcf_proto_lookup_ops(struct nlattr *kind)
46{ 43{
47 struct tcf_proto_ops *t = NULL; 44 struct tcf_proto_ops *t = NULL;
48 45
49 if (kind) { 46 if (kind) {
50 read_lock(&cls_mod_lock); 47 read_lock(&cls_mod_lock);
51 for (t = tcf_proto_base; t; t = t->next) { 48 for (t = tcf_proto_base; t; t = t->next) {
52 if (rtattr_strcmp(kind, t->kind) == 0) { 49 if (nla_strcmp(kind, t->kind) == 0) {
53 if (!try_module_get(t->owner)) 50 if (!try_module_get(t->owner))
54 t = NULL; 51 t = NULL;
55 break; 52 break;
@@ -79,6 +76,7 @@ out:
79 write_unlock(&cls_mod_lock); 76 write_unlock(&cls_mod_lock);
80 return rc; 77 return rc;
81} 78}
79EXPORT_SYMBOL(register_tcf_proto_ops);
82 80
83int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) 81int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
84{ 82{
@@ -98,6 +96,7 @@ out:
98 write_unlock(&cls_mod_lock); 96 write_unlock(&cls_mod_lock);
99 return rc; 97 return rc;
100} 98}
99EXPORT_SYMBOL(unregister_tcf_proto_ops);
101 100
102static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n, 101static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
103 struct tcf_proto *tp, unsigned long fh, int event); 102 struct tcf_proto *tp, unsigned long fh, int event);
@@ -105,9 +104,9 @@ static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
105 104
106/* Select new prio value from the range, managed by kernel. */ 105/* Select new prio value from the range, managed by kernel. */
107 106
108static __inline__ u32 tcf_auto_prio(struct tcf_proto *tp) 107static inline u32 tcf_auto_prio(struct tcf_proto *tp)
109{ 108{
110 u32 first = TC_H_MAKE(0xC0000000U,0U); 109 u32 first = TC_H_MAKE(0xC0000000U, 0U);
111 110
112 if (tp) 111 if (tp)
113 first = tp->prio-1; 112 first = tp->prio-1;
@@ -119,7 +118,8 @@ static __inline__ u32 tcf_auto_prio(struct tcf_proto *tp)
119 118
120static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 119static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
121{ 120{
122 struct rtattr **tca; 121 struct net *net = skb->sk->sk_net;
122 struct nlattr *tca[TCA_MAX + 1];
123 struct tcmsg *t; 123 struct tcmsg *t;
124 u32 protocol; 124 u32 protocol;
125 u32 prio; 125 u32 prio;
@@ -130,13 +130,15 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
130 struct tcf_proto **back, **chain; 130 struct tcf_proto **back, **chain;
131 struct tcf_proto *tp; 131 struct tcf_proto *tp;
132 struct tcf_proto_ops *tp_ops; 132 struct tcf_proto_ops *tp_ops;
133 struct Qdisc_class_ops *cops; 133 const struct Qdisc_class_ops *cops;
134 unsigned long cl; 134 unsigned long cl;
135 unsigned long fh; 135 unsigned long fh;
136 int err; 136 int err;
137 137
138 if (net != &init_net)
139 return -EINVAL;
140
138replay: 141replay:
139 tca = arg;
140 t = NLMSG_DATA(n); 142 t = NLMSG_DATA(n);
141 protocol = TC_H_MIN(t->tcm_info); 143 protocol = TC_H_MIN(t->tcm_info);
142 prio = TC_H_MAJ(t->tcm_info); 144 prio = TC_H_MAJ(t->tcm_info);
@@ -148,21 +150,29 @@ replay:
148 /* If no priority is given, user wants we allocated it. */ 150 /* If no priority is given, user wants we allocated it. */
149 if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) 151 if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
150 return -ENOENT; 152 return -ENOENT;
151 prio = TC_H_MAKE(0x80000000U,0U); 153 prio = TC_H_MAKE(0x80000000U, 0U);
152 } 154 }
153 155
154 /* Find head of filter chain. */ 156 /* Find head of filter chain. */
155 157
156 /* Find link */ 158 /* Find link */
157 if ((dev = __dev_get_by_index(&init_net, t->tcm_ifindex)) == NULL) 159 dev = __dev_get_by_index(&init_net, t->tcm_ifindex);
160 if (dev == NULL)
158 return -ENODEV; 161 return -ENODEV;
159 162
163 err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
164 if (err < 0)
165 return err;
166
160 /* Find qdisc */ 167 /* Find qdisc */
161 if (!parent) { 168 if (!parent) {
162 q = dev->qdisc_sleeping; 169 q = dev->qdisc_sleeping;
163 parent = q->handle; 170 parent = q->handle;
164 } else if ((q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent))) == NULL) 171 } else {
165 return -EINVAL; 172 q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent));
173 if (q == NULL)
174 return -EINVAL;
175 }
166 176
167 /* Is it classful? */ 177 /* Is it classful? */
168 if ((cops = q->ops->cl_ops) == NULL) 178 if ((cops = q->ops->cl_ops) == NULL)
@@ -196,7 +206,7 @@ replay:
196 if (tp == NULL) { 206 if (tp == NULL) {
197 /* Proto-tcf does not exist, create new one */ 207 /* Proto-tcf does not exist, create new one */
198 208
199 if (tca[TCA_KIND-1] == NULL || !protocol) 209 if (tca[TCA_KIND] == NULL || !protocol)
200 goto errout; 210 goto errout;
201 211
202 err = -ENOENT; 212 err = -ENOENT;
@@ -207,17 +217,18 @@ replay:
207 /* Create new proto tcf */ 217 /* Create new proto tcf */
208 218
209 err = -ENOBUFS; 219 err = -ENOBUFS;
210 if ((tp = kzalloc(sizeof(*tp), GFP_KERNEL)) == NULL) 220 tp = kzalloc(sizeof(*tp), GFP_KERNEL);
221 if (tp == NULL)
211 goto errout; 222 goto errout;
212 err = -EINVAL; 223 err = -EINVAL;
213 tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND-1]); 224 tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]);
214 if (tp_ops == NULL) { 225 if (tp_ops == NULL) {
215#ifdef CONFIG_KMOD 226#ifdef CONFIG_KMOD
216 struct rtattr *kind = tca[TCA_KIND-1]; 227 struct nlattr *kind = tca[TCA_KIND];
217 char name[IFNAMSIZ]; 228 char name[IFNAMSIZ];
218 229
219 if (kind != NULL && 230 if (kind != NULL &&
220 rtattr_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) { 231 nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
221 rtnl_unlock(); 232 rtnl_unlock();
222 request_module("cls_%s", name); 233 request_module("cls_%s", name);
223 rtnl_lock(); 234 rtnl_lock();
@@ -243,7 +254,9 @@ replay:
243 tp->q = q; 254 tp->q = q;
244 tp->classify = tp_ops->classify; 255 tp->classify = tp_ops->classify;
245 tp->classid = parent; 256 tp->classid = parent;
246 if ((err = tp_ops->init(tp)) != 0) { 257
258 err = tp_ops->init(tp);
259 if (err != 0) {
247 module_put(tp_ops->owner); 260 module_put(tp_ops->owner);
248 kfree(tp); 261 kfree(tp);
249 goto errout; 262 goto errout;
@@ -254,7 +267,7 @@ replay:
254 *back = tp; 267 *back = tp;
255 qdisc_unlock_tree(dev); 268 qdisc_unlock_tree(dev);
256 269
257 } else if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], tp->ops->kind)) 270 } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind))
258 goto errout; 271 goto errout;
259 272
260 fh = tp->ops->get(tp, t->tcm_handle); 273 fh = tp->ops->get(tp, t->tcm_handle);
@@ -272,13 +285,14 @@ replay:
272 } 285 }
273 286
274 err = -ENOENT; 287 err = -ENOENT;
275 if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) 288 if (n->nlmsg_type != RTM_NEWTFILTER ||
289 !(n->nlmsg_flags & NLM_F_CREATE))
276 goto errout; 290 goto errout;
277 } else { 291 } else {
278 switch (n->nlmsg_type) { 292 switch (n->nlmsg_type) {
279 case RTM_NEWTFILTER: 293 case RTM_NEWTFILTER:
280 err = -EEXIST; 294 err = -EEXIST;
281 if (n->nlmsg_flags&NLM_F_EXCL) 295 if (n->nlmsg_flags & NLM_F_EXCL)
282 goto errout; 296 goto errout;
283 break; 297 break;
284 case RTM_DELTFILTER: 298 case RTM_DELTFILTER:
@@ -308,9 +322,8 @@ errout:
308 return err; 322 return err;
309} 323}
310 324
311static int 325static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp,
312tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh, 326 unsigned long fh, u32 pid, u32 seq, u16 flags, int event)
313 u32 pid, u32 seq, u16 flags, int event)
314{ 327{
315 struct tcmsg *tcm; 328 struct tcmsg *tcm;
316 struct nlmsghdr *nlh; 329 struct nlmsghdr *nlh;
@@ -324,18 +337,18 @@ tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh,
324 tcm->tcm_ifindex = tp->q->dev->ifindex; 337 tcm->tcm_ifindex = tp->q->dev->ifindex;
325 tcm->tcm_parent = tp->classid; 338 tcm->tcm_parent = tp->classid;
326 tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); 339 tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
327 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, tp->ops->kind); 340 NLA_PUT_STRING(skb, TCA_KIND, tp->ops->kind);
328 tcm->tcm_handle = fh; 341 tcm->tcm_handle = fh;
329 if (RTM_DELTFILTER != event) { 342 if (RTM_DELTFILTER != event) {
330 tcm->tcm_handle = 0; 343 tcm->tcm_handle = 0;
331 if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0) 344 if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0)
332 goto rtattr_failure; 345 goto nla_put_failure;
333 } 346 }
334 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 347 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
335 return skb->len; 348 return skb->len;
336 349
337nlmsg_failure: 350nlmsg_failure:
338rtattr_failure: 351nla_put_failure:
339 nlmsg_trim(skb, b); 352 nlmsg_trim(skb, b);
340 return -1; 353 return -1;
341} 354}
@@ -355,19 +368,20 @@ static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
355 return -EINVAL; 368 return -EINVAL;
356 } 369 }
357 370
358 return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); 371 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC,
372 n->nlmsg_flags & NLM_F_ECHO);
359} 373}
360 374
361struct tcf_dump_args 375struct tcf_dump_args {
362{
363 struct tcf_walker w; 376 struct tcf_walker w;
364 struct sk_buff *skb; 377 struct sk_buff *skb;
365 struct netlink_callback *cb; 378 struct netlink_callback *cb;
366}; 379};
367 380
368static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, struct tcf_walker *arg) 381static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
382 struct tcf_walker *arg)
369{ 383{
370 struct tcf_dump_args *a = (void*)arg; 384 struct tcf_dump_args *a = (void *)arg;
371 385
372 return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).pid, 386 return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).pid,
373 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER); 387 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER);
@@ -375,16 +389,20 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, struct tcf_walke
375 389
376static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) 390static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
377{ 391{
392 struct net *net = skb->sk->sk_net;
378 int t; 393 int t;
379 int s_t; 394 int s_t;
380 struct net_device *dev; 395 struct net_device *dev;
381 struct Qdisc *q; 396 struct Qdisc *q;
382 struct tcf_proto *tp, **chain; 397 struct tcf_proto *tp, **chain;
383 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); 398 struct tcmsg *tcm = (struct tcmsg *)NLMSG_DATA(cb->nlh);
384 unsigned long cl = 0; 399 unsigned long cl = 0;
385 struct Qdisc_class_ops *cops; 400 const struct Qdisc_class_ops *cops;
386 struct tcf_dump_args arg; 401 struct tcf_dump_args arg;
387 402
403 if (net != &init_net)
404 return 0;
405
388 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) 406 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
389 return skb->len; 407 return skb->len;
390 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) 408 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
@@ -421,9 +439,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
421 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); 439 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
422 if (cb->args[1] == 0) { 440 if (cb->args[1] == 0) {
423 if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).pid, 441 if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).pid,
424 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER) <= 0) { 442 cb->nlh->nlmsg_seq, NLM_F_MULTI,
443 RTM_NEWTFILTER) <= 0)
425 break; 444 break;
426 } 445
427 cb->args[1] = 1; 446 cb->args[1] = 1;
428 } 447 }
429 if (tp->ops->walk == NULL) 448 if (tp->ops->walk == NULL)
@@ -450,8 +469,7 @@ out:
450 return skb->len; 469 return skb->len;
451} 470}
452 471
453void 472void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
454tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
455{ 473{
456#ifdef CONFIG_NET_CLS_ACT 474#ifdef CONFIG_NET_CLS_ACT
457 if (exts->action) { 475 if (exts->action) {
@@ -460,49 +478,48 @@ tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
460 } 478 }
461#endif 479#endif
462} 480}
481EXPORT_SYMBOL(tcf_exts_destroy);
463 482
464 483int tcf_exts_validate(struct tcf_proto *tp, struct nlattr **tb,
465int 484 struct nlattr *rate_tlv, struct tcf_exts *exts,
466tcf_exts_validate(struct tcf_proto *tp, struct rtattr **tb,
467 struct rtattr *rate_tlv, struct tcf_exts *exts,
468 struct tcf_ext_map *map) 485 struct tcf_ext_map *map)
469{ 486{
470 memset(exts, 0, sizeof(*exts)); 487 memset(exts, 0, sizeof(*exts));
471 488
472#ifdef CONFIG_NET_CLS_ACT 489#ifdef CONFIG_NET_CLS_ACT
473 { 490 {
474 int err;
475 struct tc_action *act; 491 struct tc_action *act;
476 492
477 if (map->police && tb[map->police-1]) { 493 if (map->police && tb[map->police]) {
478 act = tcf_action_init_1(tb[map->police-1], rate_tlv, "police", 494 act = tcf_action_init_1(tb[map->police], rate_tlv,
479 TCA_ACT_NOREPLACE, TCA_ACT_BIND, &err); 495 "police", TCA_ACT_NOREPLACE,
480 if (act == NULL) 496 TCA_ACT_BIND);
481 return err; 497 if (IS_ERR(act))
498 return PTR_ERR(act);
482 499
483 act->type = TCA_OLD_COMPAT; 500 act->type = TCA_OLD_COMPAT;
484 exts->action = act; 501 exts->action = act;
485 } else if (map->action && tb[map->action-1]) { 502 } else if (map->action && tb[map->action]) {
486 act = tcf_action_init(tb[map->action-1], rate_tlv, NULL, 503 act = tcf_action_init(tb[map->action], rate_tlv, NULL,
487 TCA_ACT_NOREPLACE, TCA_ACT_BIND, &err); 504 TCA_ACT_NOREPLACE, TCA_ACT_BIND);
488 if (act == NULL) 505 if (IS_ERR(act))
489 return err; 506 return PTR_ERR(act);
490 507
491 exts->action = act; 508 exts->action = act;
492 } 509 }
493 } 510 }
494#else 511#else
495 if ((map->action && tb[map->action-1]) || 512 if ((map->action && tb[map->action]) ||
496 (map->police && tb[map->police-1])) 513 (map->police && tb[map->police]))
497 return -EOPNOTSUPP; 514 return -EOPNOTSUPP;
498#endif 515#endif
499 516
500 return 0; 517 return 0;
501} 518}
519EXPORT_SYMBOL(tcf_exts_validate);
502 520
503void 521void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
504tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, 522 struct tcf_exts *src)
505 struct tcf_exts *src)
506{ 523{
507#ifdef CONFIG_NET_CLS_ACT 524#ifdef CONFIG_NET_CLS_ACT
508 if (src->action) { 525 if (src->action) {
@@ -515,9 +532,9 @@ tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
515 } 532 }
516#endif 533#endif
517} 534}
535EXPORT_SYMBOL(tcf_exts_change);
518 536
519int 537int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts,
520tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts,
521 struct tcf_ext_map *map) 538 struct tcf_ext_map *map)
522{ 539{
523#ifdef CONFIG_NET_CLS_ACT 540#ifdef CONFIG_NET_CLS_ACT
@@ -527,39 +544,45 @@ tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts,
527 * to work with both old and new modes of entering 544 * to work with both old and new modes of entering
528 * tc data even if iproute2 was newer - jhs 545 * tc data even if iproute2 was newer - jhs
529 */ 546 */
530 struct rtattr *p_rta = (struct rtattr *)skb_tail_pointer(skb); 547 struct nlattr *nest;
531 548
532 if (exts->action->type != TCA_OLD_COMPAT) { 549 if (exts->action->type != TCA_OLD_COMPAT) {
533 RTA_PUT(skb, map->action, 0, NULL); 550 nest = nla_nest_start(skb, map->action);
551 if (nest == NULL)
552 goto nla_put_failure;
534 if (tcf_action_dump(skb, exts->action, 0, 0) < 0) 553 if (tcf_action_dump(skb, exts->action, 0, 0) < 0)
535 goto rtattr_failure; 554 goto nla_put_failure;
536 p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta; 555 nla_nest_end(skb, nest);
537 } else if (map->police) { 556 } else if (map->police) {
538 RTA_PUT(skb, map->police, 0, NULL); 557 nest = nla_nest_start(skb, map->police);
558 if (nest == NULL)
559 goto nla_put_failure;
539 if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0) 560 if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0)
540 goto rtattr_failure; 561 goto nla_put_failure;
541 p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta; 562 nla_nest_end(skb, nest);
542 } 563 }
543 } 564 }
544#endif 565#endif
545 return 0; 566 return 0;
546rtattr_failure: __attribute__ ((unused)) 567nla_put_failure: __attribute__ ((unused))
547 return -1; 568 return -1;
548} 569}
570EXPORT_SYMBOL(tcf_exts_dump);
549 571
550int 572
551tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts, 573int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts,
552 struct tcf_ext_map *map) 574 struct tcf_ext_map *map)
553{ 575{
554#ifdef CONFIG_NET_CLS_ACT 576#ifdef CONFIG_NET_CLS_ACT
555 if (exts->action) 577 if (exts->action)
556 if (tcf_action_copy_stats(skb, exts->action, 1) < 0) 578 if (tcf_action_copy_stats(skb, exts->action, 1) < 0)
557 goto rtattr_failure; 579 goto nla_put_failure;
558#endif 580#endif
559 return 0; 581 return 0;
560rtattr_failure: __attribute__ ((unused)) 582nla_put_failure: __attribute__ ((unused))
561 return -1; 583 return -1;
562} 584}
585EXPORT_SYMBOL(tcf_exts_dump_stats);
563 586
564static int __init tc_filter_init(void) 587static int __init tc_filter_init(void)
565{ 588{
@@ -572,11 +595,3 @@ static int __init tc_filter_init(void)
572} 595}
573 596
574subsys_initcall(tc_filter_init); 597subsys_initcall(tc_filter_init);
575
576EXPORT_SYMBOL(register_tcf_proto_ops);
577EXPORT_SYMBOL(unregister_tcf_proto_ops);
578EXPORT_SYMBOL(tcf_exts_validate);
579EXPORT_SYMBOL(tcf_exts_destroy);
580EXPORT_SYMBOL(tcf_exts_change);
581EXPORT_SYMBOL(tcf_exts_dump);
582EXPORT_SYMBOL(tcf_exts_dump_stats);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 8dbcf2771a46..bfb4342ea88c 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -129,28 +129,29 @@ static int basic_delete(struct tcf_proto *tp, unsigned long arg)
129 return -ENOENT; 129 return -ENOENT;
130} 130}
131 131
132static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
133 [TCA_BASIC_CLASSID] = { .type = NLA_U32 },
134 [TCA_BASIC_EMATCHES] = { .type = NLA_NESTED },
135};
136
132static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f, 137static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
133 unsigned long base, struct rtattr **tb, 138 unsigned long base, struct nlattr **tb,
134 struct rtattr *est) 139 struct nlattr *est)
135{ 140{
136 int err = -EINVAL; 141 int err = -EINVAL;
137 struct tcf_exts e; 142 struct tcf_exts e;
138 struct tcf_ematch_tree t; 143 struct tcf_ematch_tree t;
139 144
140 if (tb[TCA_BASIC_CLASSID-1])
141 if (RTA_PAYLOAD(tb[TCA_BASIC_CLASSID-1]) < sizeof(u32))
142 return err;
143
144 err = tcf_exts_validate(tp, tb, est, &e, &basic_ext_map); 145 err = tcf_exts_validate(tp, tb, est, &e, &basic_ext_map);
145 if (err < 0) 146 if (err < 0)
146 return err; 147 return err;
147 148
148 err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES-1], &t); 149 err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &t);
149 if (err < 0) 150 if (err < 0)
150 goto errout; 151 goto errout;
151 152
152 if (tb[TCA_BASIC_CLASSID-1]) { 153 if (tb[TCA_BASIC_CLASSID]) {
153 f->res.classid = *(u32*)RTA_DATA(tb[TCA_BASIC_CLASSID-1]); 154 f->res.classid = nla_get_u32(tb[TCA_BASIC_CLASSID]);
154 tcf_bind_filter(tp, &f->res, base); 155 tcf_bind_filter(tp, &f->res, base);
155 } 156 }
156 157
@@ -164,23 +165,25 @@ errout:
164} 165}
165 166
166static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle, 167static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
167 struct rtattr **tca, unsigned long *arg) 168 struct nlattr **tca, unsigned long *arg)
168{ 169{
169 int err = -EINVAL; 170 int err;
170 struct basic_head *head = (struct basic_head *) tp->root; 171 struct basic_head *head = (struct basic_head *) tp->root;
171 struct rtattr *tb[TCA_BASIC_MAX]; 172 struct nlattr *tb[TCA_BASIC_MAX + 1];
172 struct basic_filter *f = (struct basic_filter *) *arg; 173 struct basic_filter *f = (struct basic_filter *) *arg;
173 174
174 if (tca[TCA_OPTIONS-1] == NULL) 175 if (tca[TCA_OPTIONS] == NULL)
175 return -EINVAL; 176 return -EINVAL;
176 177
177 if (rtattr_parse_nested(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS-1]) < 0) 178 err = nla_parse_nested(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS],
178 return -EINVAL; 179 basic_policy);
180 if (err < 0)
181 return err;
179 182
180 if (f != NULL) { 183 if (f != NULL) {
181 if (handle && f->handle != handle) 184 if (handle && f->handle != handle)
182 return -EINVAL; 185 return -EINVAL;
183 return basic_set_parms(tp, f, base, tb, tca[TCA_RATE-1]); 186 return basic_set_parms(tp, f, base, tb, tca[TCA_RATE]);
184 } 187 }
185 188
186 err = -ENOBUFS; 189 err = -ENOBUFS;
@@ -206,7 +209,7 @@ static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
206 f->handle = head->hgenerator; 209 f->handle = head->hgenerator;
207 } 210 }
208 211
209 err = basic_set_parms(tp, f, base, tb, tca[TCA_RATE-1]); 212 err = basic_set_parms(tp, f, base, tb, tca[TCA_RATE]);
210 if (err < 0) 213 if (err < 0)
211 goto errout; 214 goto errout;
212 215
@@ -245,33 +248,33 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
245 struct sk_buff *skb, struct tcmsg *t) 248 struct sk_buff *skb, struct tcmsg *t)
246{ 249{
247 struct basic_filter *f = (struct basic_filter *) fh; 250 struct basic_filter *f = (struct basic_filter *) fh;
248 unsigned char *b = skb_tail_pointer(skb); 251 struct nlattr *nest;
249 struct rtattr *rta;
250 252
251 if (f == NULL) 253 if (f == NULL)
252 return skb->len; 254 return skb->len;
253 255
254 t->tcm_handle = f->handle; 256 t->tcm_handle = f->handle;
255 257
256 rta = (struct rtattr *) b; 258 nest = nla_nest_start(skb, TCA_OPTIONS);
257 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 259 if (nest == NULL)
260 goto nla_put_failure;
258 261
259 if (f->res.classid) 262 if (f->res.classid)
260 RTA_PUT(skb, TCA_BASIC_CLASSID, sizeof(u32), &f->res.classid); 263 NLA_PUT_U32(skb, TCA_BASIC_CLASSID, f->res.classid);
261 264
262 if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 || 265 if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 ||
263 tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0) 266 tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
264 goto rtattr_failure; 267 goto nla_put_failure;
265 268
266 rta->rta_len = skb_tail_pointer(skb) - b; 269 nla_nest_end(skb, nest);
267 return skb->len; 270 return skb->len;
268 271
269rtattr_failure: 272nla_put_failure:
270 nlmsg_trim(skb, b); 273 nla_nest_cancel(skb, nest);
271 return -1; 274 return -1;
272} 275}
273 276
274static struct tcf_proto_ops cls_basic_ops = { 277static struct tcf_proto_ops cls_basic_ops __read_mostly = {
275 .kind = "basic", 278 .kind = "basic",
276 .classify = basic_classify, 279 .classify = basic_classify,
277 .init = basic_init, 280 .init = basic_init,
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 8adbd6a37d14..436a6e7c438e 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -186,39 +186,41 @@ out:
186 return -EINVAL; 186 return -EINVAL;
187} 187}
188 188
189static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
190 [TCA_FW_CLASSID] = { .type = NLA_U32 },
191 [TCA_FW_INDEV] = { .type = NLA_STRING, .len = IFNAMSIZ },
192 [TCA_FW_MASK] = { .type = NLA_U32 },
193};
194
189static int 195static int
190fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f, 196fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
191 struct rtattr **tb, struct rtattr **tca, unsigned long base) 197 struct nlattr **tb, struct nlattr **tca, unsigned long base)
192{ 198{
193 struct fw_head *head = (struct fw_head *)tp->root; 199 struct fw_head *head = (struct fw_head *)tp->root;
194 struct tcf_exts e; 200 struct tcf_exts e;
195 u32 mask; 201 u32 mask;
196 int err; 202 int err;
197 203
198 err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &fw_ext_map); 204 err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &fw_ext_map);
199 if (err < 0) 205 if (err < 0)
200 return err; 206 return err;
201 207
202 err = -EINVAL; 208 err = -EINVAL;
203 if (tb[TCA_FW_CLASSID-1]) { 209 if (tb[TCA_FW_CLASSID]) {
204 if (RTA_PAYLOAD(tb[TCA_FW_CLASSID-1]) != sizeof(u32)) 210 f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
205 goto errout;
206 f->res.classid = *(u32*)RTA_DATA(tb[TCA_FW_CLASSID-1]);
207 tcf_bind_filter(tp, &f->res, base); 211 tcf_bind_filter(tp, &f->res, base);
208 } 212 }
209 213
210#ifdef CONFIG_NET_CLS_IND 214#ifdef CONFIG_NET_CLS_IND
211 if (tb[TCA_FW_INDEV-1]) { 215 if (tb[TCA_FW_INDEV]) {
212 err = tcf_change_indev(tp, f->indev, tb[TCA_FW_INDEV-1]); 216 err = tcf_change_indev(tp, f->indev, tb[TCA_FW_INDEV]);
213 if (err < 0) 217 if (err < 0)
214 goto errout; 218 goto errout;
215 } 219 }
216#endif /* CONFIG_NET_CLS_IND */ 220#endif /* CONFIG_NET_CLS_IND */
217 221
218 if (tb[TCA_FW_MASK-1]) { 222 if (tb[TCA_FW_MASK]) {
219 if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32)) 223 mask = nla_get_u32(tb[TCA_FW_MASK]);
220 goto errout;
221 mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]);
222 if (mask != head->mask) 224 if (mask != head->mask)
223 goto errout; 225 goto errout;
224 } else if (head->mask != 0xFFFFFFFF) 226 } else if (head->mask != 0xFFFFFFFF)
@@ -234,20 +236,21 @@ errout:
234 236
235static int fw_change(struct tcf_proto *tp, unsigned long base, 237static int fw_change(struct tcf_proto *tp, unsigned long base,
236 u32 handle, 238 u32 handle,
237 struct rtattr **tca, 239 struct nlattr **tca,
238 unsigned long *arg) 240 unsigned long *arg)
239{ 241{
240 struct fw_head *head = (struct fw_head*)tp->root; 242 struct fw_head *head = (struct fw_head*)tp->root;
241 struct fw_filter *f = (struct fw_filter *) *arg; 243 struct fw_filter *f = (struct fw_filter *) *arg;
242 struct rtattr *opt = tca[TCA_OPTIONS-1]; 244 struct nlattr *opt = tca[TCA_OPTIONS];
243 struct rtattr *tb[TCA_FW_MAX]; 245 struct nlattr *tb[TCA_FW_MAX + 1];
244 int err; 246 int err;
245 247
246 if (!opt) 248 if (!opt)
247 return handle ? -EINVAL : 0; 249 return handle ? -EINVAL : 0;
248 250
249 if (rtattr_parse_nested(tb, TCA_FW_MAX, opt) < 0) 251 err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy);
250 return -EINVAL; 252 if (err < 0)
253 return err;
251 254
252 if (f != NULL) { 255 if (f != NULL) {
253 if (f->id != handle && handle) 256 if (f->id != handle && handle)
@@ -260,11 +263,8 @@ static int fw_change(struct tcf_proto *tp, unsigned long base,
260 263
261 if (head == NULL) { 264 if (head == NULL) {
262 u32 mask = 0xFFFFFFFF; 265 u32 mask = 0xFFFFFFFF;
263 if (tb[TCA_FW_MASK-1]) { 266 if (tb[TCA_FW_MASK])
264 if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32)) 267 mask = nla_get_u32(tb[TCA_FW_MASK]);
265 return -EINVAL;
266 mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]);
267 }
268 268
269 head = kzalloc(sizeof(struct fw_head), GFP_KERNEL); 269 head = kzalloc(sizeof(struct fw_head), GFP_KERNEL);
270 if (head == NULL) 270 if (head == NULL)
@@ -333,7 +333,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
333 struct fw_head *head = (struct fw_head *)tp->root; 333 struct fw_head *head = (struct fw_head *)tp->root;
334 struct fw_filter *f = (struct fw_filter*)fh; 334 struct fw_filter *f = (struct fw_filter*)fh;
335 unsigned char *b = skb_tail_pointer(skb); 335 unsigned char *b = skb_tail_pointer(skb);
336 struct rtattr *rta; 336 struct nlattr *nest;
337 337
338 if (f == NULL) 338 if (f == NULL)
339 return skb->len; 339 return skb->len;
@@ -343,35 +343,35 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
343 if (!f->res.classid && !tcf_exts_is_available(&f->exts)) 343 if (!f->res.classid && !tcf_exts_is_available(&f->exts))
344 return skb->len; 344 return skb->len;
345 345
346 rta = (struct rtattr*)b; 346 nest = nla_nest_start(skb, TCA_OPTIONS);
347 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 347 if (nest == NULL)
348 goto nla_put_failure;
348 349
349 if (f->res.classid) 350 if (f->res.classid)
350 RTA_PUT(skb, TCA_FW_CLASSID, 4, &f->res.classid); 351 NLA_PUT_U32(skb, TCA_FW_CLASSID, f->res.classid);
351#ifdef CONFIG_NET_CLS_IND 352#ifdef CONFIG_NET_CLS_IND
352 if (strlen(f->indev)) 353 if (strlen(f->indev))
353 RTA_PUT(skb, TCA_FW_INDEV, IFNAMSIZ, f->indev); 354 NLA_PUT_STRING(skb, TCA_FW_INDEV, f->indev);
354#endif /* CONFIG_NET_CLS_IND */ 355#endif /* CONFIG_NET_CLS_IND */
355 if (head->mask != 0xFFFFFFFF) 356 if (head->mask != 0xFFFFFFFF)
356 RTA_PUT(skb, TCA_FW_MASK, 4, &head->mask); 357 NLA_PUT_U32(skb, TCA_FW_MASK, head->mask);
357 358
358 if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0) 359 if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0)
359 goto rtattr_failure; 360 goto nla_put_failure;
360 361
361 rta->rta_len = skb_tail_pointer(skb) - b; 362 nla_nest_end(skb, nest);
362 363
363 if (tcf_exts_dump_stats(skb, &f->exts, &fw_ext_map) < 0) 364 if (tcf_exts_dump_stats(skb, &f->exts, &fw_ext_map) < 0)
364 goto rtattr_failure; 365 goto nla_put_failure;
365 366
366 return skb->len; 367 return skb->len;
367 368
368rtattr_failure: 369nla_put_failure:
369 nlmsg_trim(skb, b); 370 nlmsg_trim(skb, b);
370 return -1; 371 return -1;
371} 372}
372 373
373static struct tcf_proto_ops cls_fw_ops = { 374static struct tcf_proto_ops cls_fw_ops __read_mostly = {
374 .next = NULL,
375 .kind = "fw", 375 .kind = "fw",
376 .classify = fw_classify, 376 .classify = fw_classify,
377 .init = fw_init, 377 .init = fw_init,
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 0a8409c1d28a..f7e7d3955d28 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -323,9 +323,16 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
323 return 0; 323 return 0;
324} 324}
325 325
326static const struct nla_policy route4_policy[TCA_ROUTE4_MAX + 1] = {
327 [TCA_ROUTE4_CLASSID] = { .type = NLA_U32 },
328 [TCA_ROUTE4_TO] = { .type = NLA_U32 },
329 [TCA_ROUTE4_FROM] = { .type = NLA_U32 },
330 [TCA_ROUTE4_IIF] = { .type = NLA_U32 },
331};
332
326static int route4_set_parms(struct tcf_proto *tp, unsigned long base, 333static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
327 struct route4_filter *f, u32 handle, struct route4_head *head, 334 struct route4_filter *f, u32 handle, struct route4_head *head,
328 struct rtattr **tb, struct rtattr *est, int new) 335 struct nlattr **tb, struct nlattr *est, int new)
329{ 336{
330 int err; 337 int err;
331 u32 id = 0, to = 0, nhandle = 0x8000; 338 u32 id = 0, to = 0, nhandle = 0x8000;
@@ -339,34 +346,24 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
339 return err; 346 return err;
340 347
341 err = -EINVAL; 348 err = -EINVAL;
342 if (tb[TCA_ROUTE4_CLASSID-1]) 349 if (tb[TCA_ROUTE4_TO]) {
343 if (RTA_PAYLOAD(tb[TCA_ROUTE4_CLASSID-1]) < sizeof(u32))
344 goto errout;
345
346 if (tb[TCA_ROUTE4_TO-1]) {
347 if (new && handle & 0x8000) 350 if (new && handle & 0x8000)
348 goto errout; 351 goto errout;
349 if (RTA_PAYLOAD(tb[TCA_ROUTE4_TO-1]) < sizeof(u32)) 352 to = nla_get_u32(tb[TCA_ROUTE4_TO]);
350 goto errout;
351 to = *(u32*)RTA_DATA(tb[TCA_ROUTE4_TO-1]);
352 if (to > 0xFF) 353 if (to > 0xFF)
353 goto errout; 354 goto errout;
354 nhandle = to; 355 nhandle = to;
355 } 356 }
356 357
357 if (tb[TCA_ROUTE4_FROM-1]) { 358 if (tb[TCA_ROUTE4_FROM]) {
358 if (tb[TCA_ROUTE4_IIF-1]) 359 if (tb[TCA_ROUTE4_IIF])
359 goto errout; 360 goto errout;
360 if (RTA_PAYLOAD(tb[TCA_ROUTE4_FROM-1]) < sizeof(u32)) 361 id = nla_get_u32(tb[TCA_ROUTE4_FROM]);
361 goto errout;
362 id = *(u32*)RTA_DATA(tb[TCA_ROUTE4_FROM-1]);
363 if (id > 0xFF) 362 if (id > 0xFF)
364 goto errout; 363 goto errout;
365 nhandle |= id << 16; 364 nhandle |= id << 16;
366 } else if (tb[TCA_ROUTE4_IIF-1]) { 365 } else if (tb[TCA_ROUTE4_IIF]) {
367 if (RTA_PAYLOAD(tb[TCA_ROUTE4_IIF-1]) < sizeof(u32)) 366 id = nla_get_u32(tb[TCA_ROUTE4_IIF]);
368 goto errout;
369 id = *(u32*)RTA_DATA(tb[TCA_ROUTE4_IIF-1]);
370 if (id > 0x7FFF) 367 if (id > 0x7FFF)
371 goto errout; 368 goto errout;
372 nhandle |= (id | 0x8000) << 16; 369 nhandle |= (id | 0x8000) << 16;
@@ -398,20 +395,20 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
398 } 395 }
399 396
400 tcf_tree_lock(tp); 397 tcf_tree_lock(tp);
401 if (tb[TCA_ROUTE4_TO-1]) 398 if (tb[TCA_ROUTE4_TO])
402 f->id = to; 399 f->id = to;
403 400
404 if (tb[TCA_ROUTE4_FROM-1]) 401 if (tb[TCA_ROUTE4_FROM])
405 f->id = to | id<<16; 402 f->id = to | id<<16;
406 else if (tb[TCA_ROUTE4_IIF-1]) 403 else if (tb[TCA_ROUTE4_IIF])
407 f->iif = id; 404 f->iif = id;
408 405
409 f->handle = nhandle; 406 f->handle = nhandle;
410 f->bkt = b; 407 f->bkt = b;
411 tcf_tree_unlock(tp); 408 tcf_tree_unlock(tp);
412 409
413 if (tb[TCA_ROUTE4_CLASSID-1]) { 410 if (tb[TCA_ROUTE4_CLASSID]) {
414 f->res.classid = *(u32*)RTA_DATA(tb[TCA_ROUTE4_CLASSID-1]); 411 f->res.classid = nla_get_u32(tb[TCA_ROUTE4_CLASSID]);
415 tcf_bind_filter(tp, &f->res, base); 412 tcf_bind_filter(tp, &f->res, base);
416 } 413 }
417 414
@@ -425,14 +422,14 @@ errout:
425 422
426static int route4_change(struct tcf_proto *tp, unsigned long base, 423static int route4_change(struct tcf_proto *tp, unsigned long base,
427 u32 handle, 424 u32 handle,
428 struct rtattr **tca, 425 struct nlattr **tca,
429 unsigned long *arg) 426 unsigned long *arg)
430{ 427{
431 struct route4_head *head = tp->root; 428 struct route4_head *head = tp->root;
432 struct route4_filter *f, *f1, **fp; 429 struct route4_filter *f, *f1, **fp;
433 struct route4_bucket *b; 430 struct route4_bucket *b;
434 struct rtattr *opt = tca[TCA_OPTIONS-1]; 431 struct nlattr *opt = tca[TCA_OPTIONS];
435 struct rtattr *tb[TCA_ROUTE4_MAX]; 432 struct nlattr *tb[TCA_ROUTE4_MAX + 1];
436 unsigned int h, th; 433 unsigned int h, th;
437 u32 old_handle = 0; 434 u32 old_handle = 0;
438 int err; 435 int err;
@@ -440,8 +437,9 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
440 if (opt == NULL) 437 if (opt == NULL)
441 return handle ? -EINVAL : 0; 438 return handle ? -EINVAL : 0;
442 439
443 if (rtattr_parse_nested(tb, TCA_ROUTE4_MAX, opt) < 0) 440 err = nla_parse_nested(tb, TCA_ROUTE4_MAX, opt, route4_policy);
444 return -EINVAL; 441 if (err < 0)
442 return err;
445 443
446 if ((f = (struct route4_filter*)*arg) != NULL) { 444 if ((f = (struct route4_filter*)*arg) != NULL) {
447 if (f->handle != handle && handle) 445 if (f->handle != handle && handle)
@@ -451,7 +449,7 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
451 old_handle = f->handle; 449 old_handle = f->handle;
452 450
453 err = route4_set_parms(tp, base, f, handle, head, tb, 451 err = route4_set_parms(tp, base, f, handle, head, tb,
454 tca[TCA_RATE-1], 0); 452 tca[TCA_RATE], 0);
455 if (err < 0) 453 if (err < 0)
456 return err; 454 return err;
457 455
@@ -474,7 +472,7 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
474 goto errout; 472 goto errout;
475 473
476 err = route4_set_parms(tp, base, f, handle, head, tb, 474 err = route4_set_parms(tp, base, f, handle, head, tb,
477 tca[TCA_RATE-1], 1); 475 tca[TCA_RATE], 1);
478 if (err < 0) 476 if (err < 0)
479 goto errout; 477 goto errout;
480 478
@@ -550,7 +548,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
550{ 548{
551 struct route4_filter *f = (struct route4_filter*)fh; 549 struct route4_filter *f = (struct route4_filter*)fh;
552 unsigned char *b = skb_tail_pointer(skb); 550 unsigned char *b = skb_tail_pointer(skb);
553 struct rtattr *rta; 551 struct nlattr *nest;
554 u32 id; 552 u32 id;
555 553
556 if (f == NULL) 554 if (f == NULL)
@@ -558,40 +556,40 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
558 556
559 t->tcm_handle = f->handle; 557 t->tcm_handle = f->handle;
560 558
561 rta = (struct rtattr*)b; 559 nest = nla_nest_start(skb, TCA_OPTIONS);
562 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 560 if (nest == NULL)
561 goto nla_put_failure;
563 562
564 if (!(f->handle&0x8000)) { 563 if (!(f->handle&0x8000)) {
565 id = f->id&0xFF; 564 id = f->id&0xFF;
566 RTA_PUT(skb, TCA_ROUTE4_TO, sizeof(id), &id); 565 NLA_PUT_U32(skb, TCA_ROUTE4_TO, id);
567 } 566 }
568 if (f->handle&0x80000000) { 567 if (f->handle&0x80000000) {
569 if ((f->handle>>16) != 0xFFFF) 568 if ((f->handle>>16) != 0xFFFF)
570 RTA_PUT(skb, TCA_ROUTE4_IIF, sizeof(f->iif), &f->iif); 569 NLA_PUT_U32(skb, TCA_ROUTE4_IIF, f->iif);
571 } else { 570 } else {
572 id = f->id>>16; 571 id = f->id>>16;
573 RTA_PUT(skb, TCA_ROUTE4_FROM, sizeof(id), &id); 572 NLA_PUT_U32(skb, TCA_ROUTE4_FROM, id);
574 } 573 }
575 if (f->res.classid) 574 if (f->res.classid)
576 RTA_PUT(skb, TCA_ROUTE4_CLASSID, 4, &f->res.classid); 575 NLA_PUT_U32(skb, TCA_ROUTE4_CLASSID, f->res.classid);
577 576
578 if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0) 577 if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0)
579 goto rtattr_failure; 578 goto nla_put_failure;
580 579
581 rta->rta_len = skb_tail_pointer(skb) - b; 580 nla_nest_end(skb, nest);
582 581
583 if (tcf_exts_dump_stats(skb, &f->exts, &route_ext_map) < 0) 582 if (tcf_exts_dump_stats(skb, &f->exts, &route_ext_map) < 0)
584 goto rtattr_failure; 583 goto nla_put_failure;
585 584
586 return skb->len; 585 return skb->len;
587 586
588rtattr_failure: 587nla_put_failure:
589 nlmsg_trim(skb, b); 588 nlmsg_trim(skb, b);
590 return -1; 589 return -1;
591} 590}
592 591
593static struct tcf_proto_ops cls_route4_ops = { 592static struct tcf_proto_ops cls_route4_ops __read_mostly = {
594 .next = NULL,
595 .kind = "route", 593 .kind = "route",
596 .classify = route4_classify, 594 .classify = route4_classify,
597 .init = route4_init, 595 .init = route4_init,
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 22f9ede70e8f..7034ea4530e5 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -397,17 +397,26 @@ static u32 gen_tunnel(struct rsvp_head *data)
397 return 0; 397 return 0;
398} 398}
399 399
400static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
401 [TCA_RSVP_CLASSID] = { .type = NLA_U32 },
402 [TCA_RSVP_DST] = { .type = NLA_BINARY,
403 .len = RSVP_DST_LEN * sizeof(u32) },
404 [TCA_RSVP_SRC] = { .type = NLA_BINARY,
405 .len = RSVP_DST_LEN * sizeof(u32) },
406 [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
407};
408
400static int rsvp_change(struct tcf_proto *tp, unsigned long base, 409static int rsvp_change(struct tcf_proto *tp, unsigned long base,
401 u32 handle, 410 u32 handle,
402 struct rtattr **tca, 411 struct nlattr **tca,
403 unsigned long *arg) 412 unsigned long *arg)
404{ 413{
405 struct rsvp_head *data = tp->root; 414 struct rsvp_head *data = tp->root;
406 struct rsvp_filter *f, **fp; 415 struct rsvp_filter *f, **fp;
407 struct rsvp_session *s, **sp; 416 struct rsvp_session *s, **sp;
408 struct tc_rsvp_pinfo *pinfo = NULL; 417 struct tc_rsvp_pinfo *pinfo = NULL;
409 struct rtattr *opt = tca[TCA_OPTIONS-1]; 418 struct nlattr *opt = tca[TCA_OPTIONS-1];
410 struct rtattr *tb[TCA_RSVP_MAX]; 419 struct nlattr *tb[TCA_RSVP_MAX + 1];
411 struct tcf_exts e; 420 struct tcf_exts e;
412 unsigned h1, h2; 421 unsigned h1, h2;
413 __be32 *dst; 422 __be32 *dst;
@@ -416,8 +425,9 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
416 if (opt == NULL) 425 if (opt == NULL)
417 return handle ? -EINVAL : 0; 426 return handle ? -EINVAL : 0;
418 427
419 if (rtattr_parse_nested(tb, TCA_RSVP_MAX, opt) < 0) 428 err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy);
420 return -EINVAL; 429 if (err < 0)
430 return err;
421 431
422 err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map); 432 err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
423 if (err < 0) 433 if (err < 0)
@@ -429,7 +439,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
429 if (f->handle != handle && handle) 439 if (f->handle != handle && handle)
430 goto errout2; 440 goto errout2;
431 if (tb[TCA_RSVP_CLASSID-1]) { 441 if (tb[TCA_RSVP_CLASSID-1]) {
432 f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]); 442 f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
433 tcf_bind_filter(tp, &f->res, base); 443 tcf_bind_filter(tp, &f->res, base);
434 } 444 }
435 445
@@ -451,31 +461,18 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
451 461
452 h2 = 16; 462 h2 = 16;
453 if (tb[TCA_RSVP_SRC-1]) { 463 if (tb[TCA_RSVP_SRC-1]) {
454 err = -EINVAL; 464 memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
455 if (RTA_PAYLOAD(tb[TCA_RSVP_SRC-1]) != sizeof(f->src))
456 goto errout;
457 memcpy(f->src, RTA_DATA(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
458 h2 = hash_src(f->src); 465 h2 = hash_src(f->src);
459 } 466 }
460 if (tb[TCA_RSVP_PINFO-1]) { 467 if (tb[TCA_RSVP_PINFO-1]) {
461 err = -EINVAL; 468 pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
462 if (RTA_PAYLOAD(tb[TCA_RSVP_PINFO-1]) < sizeof(struct tc_rsvp_pinfo))
463 goto errout;
464 pinfo = RTA_DATA(tb[TCA_RSVP_PINFO-1]);
465 f->spi = pinfo->spi; 469 f->spi = pinfo->spi;
466 f->tunnelhdr = pinfo->tunnelhdr; 470 f->tunnelhdr = pinfo->tunnelhdr;
467 } 471 }
468 if (tb[TCA_RSVP_CLASSID-1]) { 472 if (tb[TCA_RSVP_CLASSID-1])
469 err = -EINVAL; 473 f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
470 if (RTA_PAYLOAD(tb[TCA_RSVP_CLASSID-1]) != 4)
471 goto errout;
472 f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
473 }
474 474
475 err = -EINVAL; 475 dst = nla_data(tb[TCA_RSVP_DST-1]);
476 if (RTA_PAYLOAD(tb[TCA_RSVP_DST-1]) != sizeof(f->src))
477 goto errout;
478 dst = RTA_DATA(tb[TCA_RSVP_DST-1]);
479 h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0); 476 h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
480 477
481 err = -ENOMEM; 478 err = -ENOMEM;
@@ -594,7 +591,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
594 struct rsvp_filter *f = (struct rsvp_filter*)fh; 591 struct rsvp_filter *f = (struct rsvp_filter*)fh;
595 struct rsvp_session *s; 592 struct rsvp_session *s;
596 unsigned char *b = skb_tail_pointer(skb); 593 unsigned char *b = skb_tail_pointer(skb);
597 struct rtattr *rta; 594 struct nlattr *nest;
598 struct tc_rsvp_pinfo pinfo; 595 struct tc_rsvp_pinfo pinfo;
599 596
600 if (f == NULL) 597 if (f == NULL)
@@ -603,33 +600,33 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
603 600
604 t->tcm_handle = f->handle; 601 t->tcm_handle = f->handle;
605 602
603 nest = nla_nest_start(skb, TCA_OPTIONS);
604 if (nest == NULL)
605 goto nla_put_failure;
606 606
607 rta = (struct rtattr*)b; 607 NLA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
608 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
609
610 RTA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
611 pinfo.dpi = s->dpi; 608 pinfo.dpi = s->dpi;
612 pinfo.spi = f->spi; 609 pinfo.spi = f->spi;
613 pinfo.protocol = s->protocol; 610 pinfo.protocol = s->protocol;
614 pinfo.tunnelid = s->tunnelid; 611 pinfo.tunnelid = s->tunnelid;
615 pinfo.tunnelhdr = f->tunnelhdr; 612 pinfo.tunnelhdr = f->tunnelhdr;
616 pinfo.pad = 0; 613 pinfo.pad = 0;
617 RTA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo); 614 NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
618 if (f->res.classid) 615 if (f->res.classid)
619 RTA_PUT(skb, TCA_RSVP_CLASSID, 4, &f->res.classid); 616 NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
620 if (((f->handle>>8)&0xFF) != 16) 617 if (((f->handle>>8)&0xFF) != 16)
621 RTA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src); 618 NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
622 619
623 if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0) 620 if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
624 goto rtattr_failure; 621 goto nla_put_failure;
625 622
626 rta->rta_len = skb_tail_pointer(skb) - b; 623 nla_nest_end(skb, nest);
627 624
628 if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0) 625 if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
629 goto rtattr_failure; 626 goto nla_put_failure;
630 return skb->len; 627 return skb->len;
631 628
632rtattr_failure: 629nla_put_failure:
633 nlmsg_trim(skb, b); 630 nlmsg_trim(skb, b);
634 return -1; 631 return -1;
635} 632}
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 2314820a080a..ee60b2d1705d 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -29,19 +29,6 @@
29#define DEFAULT_HASH_SIZE 64 /* optimized for diffserv */ 29#define DEFAULT_HASH_SIZE 64 /* optimized for diffserv */
30 30
31 31
32#if 1 /* control */
33#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
34#else
35#define DPRINTK(format,args...)
36#endif
37
38#if 0 /* data */
39#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
40#else
41#define D2PRINTK(format,args...)
42#endif
43
44
45#define PRIV(tp) ((struct tcindex_data *) (tp)->root) 32#define PRIV(tp) ((struct tcindex_data *) (tp)->root)
46 33
47 34
@@ -104,7 +91,8 @@ static int tcindex_classify(struct sk_buff *skb, struct tcf_proto *tp,
104 struct tcindex_filter_result *f; 91 struct tcindex_filter_result *f;
105 int key = (skb->tc_index & p->mask) >> p->shift; 92 int key = (skb->tc_index & p->mask) >> p->shift;
106 93
107 D2PRINTK("tcindex_classify(skb %p,tp %p,res %p),p %p\n",skb,tp,res,p); 94 pr_debug("tcindex_classify(skb %p,tp %p,res %p),p %p\n",
95 skb, tp, res, p);
108 96
109 f = tcindex_lookup(p, key); 97 f = tcindex_lookup(p, key);
110 if (!f) { 98 if (!f) {
@@ -112,11 +100,11 @@ static int tcindex_classify(struct sk_buff *skb, struct tcf_proto *tp,
112 return -1; 100 return -1;
113 res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle), key); 101 res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle), key);
114 res->class = 0; 102 res->class = 0;
115 D2PRINTK("alg 0x%x\n",res->classid); 103 pr_debug("alg 0x%x\n", res->classid);
116 return 0; 104 return 0;
117 } 105 }
118 *res = f->res; 106 *res = f->res;
119 D2PRINTK("map 0x%x\n",res->classid); 107 pr_debug("map 0x%x\n", res->classid);
120 108
121 return tcf_exts_exec(skb, &f->exts, res); 109 return tcf_exts_exec(skb, &f->exts, res);
122} 110}
@@ -127,7 +115,7 @@ static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
127 struct tcindex_data *p = PRIV(tp); 115 struct tcindex_data *p = PRIV(tp);
128 struct tcindex_filter_result *r; 116 struct tcindex_filter_result *r;
129 117
130 DPRINTK("tcindex_get(tp %p,handle 0x%08x)\n",tp,handle); 118 pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
131 if (p->perfect && handle >= p->alloc_hash) 119 if (p->perfect && handle >= p->alloc_hash)
132 return 0; 120 return 0;
133 r = tcindex_lookup(p, handle); 121 r = tcindex_lookup(p, handle);
@@ -137,7 +125,7 @@ static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
137 125
138static void tcindex_put(struct tcf_proto *tp, unsigned long f) 126static void tcindex_put(struct tcf_proto *tp, unsigned long f)
139{ 127{
140 DPRINTK("tcindex_put(tp %p,f 0x%lx)\n",tp,f); 128 pr_debug("tcindex_put(tp %p,f 0x%lx)\n", tp, f);
141} 129}
142 130
143 131
@@ -145,8 +133,8 @@ static int tcindex_init(struct tcf_proto *tp)
145{ 133{
146 struct tcindex_data *p; 134 struct tcindex_data *p;
147 135
148 DPRINTK("tcindex_init(tp %p)\n",tp); 136 pr_debug("tcindex_init(tp %p)\n", tp);
149 p = kzalloc(sizeof(struct tcindex_data),GFP_KERNEL); 137 p = kzalloc(sizeof(struct tcindex_data), GFP_KERNEL);
150 if (!p) 138 if (!p)
151 return -ENOMEM; 139 return -ENOMEM;
152 140
@@ -166,7 +154,7 @@ __tcindex_delete(struct tcf_proto *tp, unsigned long arg, int lock)
166 struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg; 154 struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
167 struct tcindex_filter *f = NULL; 155 struct tcindex_filter *f = NULL;
168 156
169 DPRINTK("tcindex_delete(tp %p,arg 0x%lx),p %p,f %p\n",tp,arg,p,f); 157 pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p,f %p\n", tp, arg, p, f);
170 if (p->perfect) { 158 if (p->perfect) {
171 if (!r->res.class) 159 if (!r->res.class)
172 return -ENOENT; 160 return -ENOENT;
@@ -205,10 +193,18 @@ valid_perfect_hash(struct tcindex_data *p)
205 return p->hash > (p->mask >> p->shift); 193 return p->hash > (p->mask >> p->shift);
206} 194}
207 195
196static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
197 [TCA_TCINDEX_HASH] = { .type = NLA_U32 },
198 [TCA_TCINDEX_MASK] = { .type = NLA_U16 },
199 [TCA_TCINDEX_SHIFT] = { .type = NLA_U32 },
200 [TCA_TCINDEX_FALL_THROUGH] = { .type = NLA_U32 },
201 [TCA_TCINDEX_CLASSID] = { .type = NLA_U32 },
202};
203
208static int 204static int
209tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle, 205tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
210 struct tcindex_data *p, struct tcindex_filter_result *r, 206 struct tcindex_data *p, struct tcindex_filter_result *r,
211 struct rtattr **tb, struct rtattr *est) 207 struct nlattr **tb, struct nlattr *est)
212{ 208{
213 int err, balloc = 0; 209 int err, balloc = 0;
214 struct tcindex_filter_result new_filter_result, *old_r = r; 210 struct tcindex_filter_result new_filter_result, *old_r = r;
@@ -229,24 +225,14 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
229 else 225 else
230 memset(&cr, 0, sizeof(cr)); 226 memset(&cr, 0, sizeof(cr));
231 227
232 err = -EINVAL; 228 if (tb[TCA_TCINDEX_HASH])
233 if (tb[TCA_TCINDEX_HASH-1]) { 229 cp.hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
234 if (RTA_PAYLOAD(tb[TCA_TCINDEX_HASH-1]) < sizeof(u32))
235 goto errout;
236 cp.hash = *(u32 *) RTA_DATA(tb[TCA_TCINDEX_HASH-1]);
237 }
238 230
239 if (tb[TCA_TCINDEX_MASK-1]) { 231 if (tb[TCA_TCINDEX_MASK])
240 if (RTA_PAYLOAD(tb[TCA_TCINDEX_MASK-1]) < sizeof(u16)) 232 cp.mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
241 goto errout;
242 cp.mask = *(u16 *) RTA_DATA(tb[TCA_TCINDEX_MASK-1]);
243 }
244 233
245 if (tb[TCA_TCINDEX_SHIFT-1]) { 234 if (tb[TCA_TCINDEX_SHIFT])
246 if (RTA_PAYLOAD(tb[TCA_TCINDEX_SHIFT-1]) < sizeof(int)) 235 cp.shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
247 goto errout;
248 cp.shift = *(int *) RTA_DATA(tb[TCA_TCINDEX_SHIFT-1]);
249 }
250 236
251 err = -EBUSY; 237 err = -EBUSY;
252 /* Hash already allocated, make sure that we still meet the 238 /* Hash already allocated, make sure that we still meet the
@@ -260,12 +246,8 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
260 goto errout; 246 goto errout;
261 247
262 err = -EINVAL; 248 err = -EINVAL;
263 if (tb[TCA_TCINDEX_FALL_THROUGH-1]) { 249 if (tb[TCA_TCINDEX_FALL_THROUGH])
264 if (RTA_PAYLOAD(tb[TCA_TCINDEX_FALL_THROUGH-1]) < sizeof(u32)) 250 cp.fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
265 goto errout;
266 cp.fall_through =
267 *(u32 *) RTA_DATA(tb[TCA_TCINDEX_FALL_THROUGH-1]);
268 }
269 251
270 if (!cp.hash) { 252 if (!cp.hash) {
271 /* Hash not specified, use perfect hash if the upper limit 253 /* Hash not specified, use perfect hash if the upper limit
@@ -316,8 +298,8 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
316 goto errout_alloc; 298 goto errout_alloc;
317 } 299 }
318 300
319 if (tb[TCA_TCINDEX_CLASSID-1]) { 301 if (tb[TCA_TCINDEX_CLASSID]) {
320 cr.res.classid = *(u32 *) RTA_DATA(tb[TCA_TCINDEX_CLASSID-1]); 302 cr.res.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]);
321 tcf_bind_filter(tp, &cr.res, base); 303 tcf_bind_filter(tp, &cr.res, base);
322 } 304 }
323 305
@@ -356,34 +338,36 @@ errout:
356 338
357static int 339static int
358tcindex_change(struct tcf_proto *tp, unsigned long base, u32 handle, 340tcindex_change(struct tcf_proto *tp, unsigned long base, u32 handle,
359 struct rtattr **tca, unsigned long *arg) 341 struct nlattr **tca, unsigned long *arg)
360{ 342{
361 struct rtattr *opt = tca[TCA_OPTIONS-1]; 343 struct nlattr *opt = tca[TCA_OPTIONS];
362 struct rtattr *tb[TCA_TCINDEX_MAX]; 344 struct nlattr *tb[TCA_TCINDEX_MAX + 1];
363 struct tcindex_data *p = PRIV(tp); 345 struct tcindex_data *p = PRIV(tp);
364 struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg; 346 struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
347 int err;
365 348
366 DPRINTK("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p," 349 pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
367 "p %p,r %p,*arg 0x%lx\n", 350 "p %p,r %p,*arg 0x%lx\n",
368 tp, handle, tca, arg, opt, p, r, arg ? *arg : 0L); 351 tp, handle, tca, arg, opt, p, r, arg ? *arg : 0L);
369 352
370 if (!opt) 353 if (!opt)
371 return 0; 354 return 0;
372 355
373 if (rtattr_parse_nested(tb, TCA_TCINDEX_MAX, opt) < 0) 356 err = nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, tcindex_policy);
374 return -EINVAL; 357 if (err < 0)
358 return err;
375 359
376 return tcindex_set_parms(tp, base, handle, p, r, tb, tca[TCA_RATE-1]); 360 return tcindex_set_parms(tp, base, handle, p, r, tb, tca[TCA_RATE]);
377} 361}
378 362
379 363
380static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) 364static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
381{ 365{
382 struct tcindex_data *p = PRIV(tp); 366 struct tcindex_data *p = PRIV(tp);
383 struct tcindex_filter *f,*next; 367 struct tcindex_filter *f, *next;
384 int i; 368 int i;
385 369
386 DPRINTK("tcindex_walk(tp %p,walker %p),p %p\n",tp,walker,p); 370 pr_debug("tcindex_walk(tp %p,walker %p),p %p\n", tp, walker, p);
387 if (p->perfect) { 371 if (p->perfect) {
388 for (i = 0; i < p->hash; i++) { 372 for (i = 0; i < p->hash; i++) {
389 if (!p->perfect[i].res.class) 373 if (!p->perfect[i].res.class)
@@ -405,7 +389,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
405 for (f = p->h[i]; f; f = next) { 389 for (f = p->h[i]; f; f = next) {
406 next = f->next; 390 next = f->next;
407 if (walker->count >= walker->skip) { 391 if (walker->count >= walker->skip) {
408 if (walker->fn(tp,(unsigned long) &f->result, 392 if (walker->fn(tp, (unsigned long) &f->result,
409 walker) < 0) { 393 walker) < 0) {
410 walker->stop = 1; 394 walker->stop = 1;
411 return; 395 return;
@@ -429,11 +413,11 @@ static void tcindex_destroy(struct tcf_proto *tp)
429 struct tcindex_data *p = PRIV(tp); 413 struct tcindex_data *p = PRIV(tp);
430 struct tcf_walker walker; 414 struct tcf_walker walker;
431 415
432 DPRINTK("tcindex_destroy(tp %p),p %p\n",tp,p); 416 pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
433 walker.count = 0; 417 walker.count = 0;
434 walker.skip = 0; 418 walker.skip = 0;
435 walker.fn = &tcindex_destroy_element; 419 walker.fn = &tcindex_destroy_element;
436 tcindex_walk(tp,&walker); 420 tcindex_walk(tp, &walker);
437 kfree(p->perfect); 421 kfree(p->perfect);
438 kfree(p->h); 422 kfree(p->h);
439 kfree(p); 423 kfree(p);
@@ -447,21 +431,23 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
447 struct tcindex_data *p = PRIV(tp); 431 struct tcindex_data *p = PRIV(tp);
448 struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh; 432 struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
449 unsigned char *b = skb_tail_pointer(skb); 433 unsigned char *b = skb_tail_pointer(skb);
450 struct rtattr *rta; 434 struct nlattr *nest;
435
436 pr_debug("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n",
437 tp, fh, skb, t, p, r, b);
438 pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h);
439
440 nest = nla_nest_start(skb, TCA_OPTIONS);
441 if (nest == NULL)
442 goto nla_put_failure;
451 443
452 DPRINTK("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n",
453 tp,fh,skb,t,p,r,b);
454 DPRINTK("p->perfect %p p->h %p\n",p->perfect,p->h);
455 rta = (struct rtattr *) b;
456 RTA_PUT(skb,TCA_OPTIONS,0,NULL);
457 if (!fh) { 444 if (!fh) {
458 t->tcm_handle = ~0; /* whatever ... */ 445 t->tcm_handle = ~0; /* whatever ... */
459 RTA_PUT(skb,TCA_TCINDEX_HASH,sizeof(p->hash),&p->hash); 446 NLA_PUT_U32(skb, TCA_TCINDEX_HASH, p->hash);
460 RTA_PUT(skb,TCA_TCINDEX_MASK,sizeof(p->mask),&p->mask); 447 NLA_PUT_U16(skb, TCA_TCINDEX_MASK, p->mask);
461 RTA_PUT(skb,TCA_TCINDEX_SHIFT,sizeof(p->shift),&p->shift); 448 NLA_PUT_U32(skb, TCA_TCINDEX_SHIFT, p->shift);
462 RTA_PUT(skb,TCA_TCINDEX_FALL_THROUGH,sizeof(p->fall_through), 449 NLA_PUT_U32(skb, TCA_TCINDEX_FALL_THROUGH, p->fall_through);
463 &p->fall_through); 450 nla_nest_end(skb, nest);
464 rta->rta_len = skb_tail_pointer(skb) - b;
465 } else { 451 } else {
466 if (p->perfect) { 452 if (p->perfect) {
467 t->tcm_handle = r-p->perfect; 453 t->tcm_handle = r-p->perfect;
@@ -478,27 +464,26 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
478 } 464 }
479 } 465 }
480 } 466 }
481 DPRINTK("handle = %d\n",t->tcm_handle); 467 pr_debug("handle = %d\n", t->tcm_handle);
482 if (r->res.class) 468 if (r->res.class)
483 RTA_PUT(skb, TCA_TCINDEX_CLASSID, 4, &r->res.classid); 469 NLA_PUT_U32(skb, TCA_TCINDEX_CLASSID, r->res.classid);
484 470
485 if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0) 471 if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0)
486 goto rtattr_failure; 472 goto nla_put_failure;
487 rta->rta_len = skb_tail_pointer(skb) - b; 473 nla_nest_end(skb, nest);
488 474
489 if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0) 475 if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0)
490 goto rtattr_failure; 476 goto nla_put_failure;
491 } 477 }
492 478
493 return skb->len; 479 return skb->len;
494 480
495rtattr_failure: 481nla_put_failure:
496 nlmsg_trim(skb, b); 482 nlmsg_trim(skb, b);
497 return -1; 483 return -1;
498} 484}
499 485
500static struct tcf_proto_ops cls_tcindex_ops = { 486static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
501 .next = NULL,
502 .kind = "tcindex", 487 .kind = "tcindex",
503 .classify = tcindex_classify, 488 .classify = tcindex_classify,
504 .init = tcindex_init, 489 .init = tcindex_init,
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index c39008209164..e8a775689123 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -460,10 +460,20 @@ static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
460 return handle|(i>0xFFF ? 0xFFF : i); 460 return handle|(i>0xFFF ? 0xFFF : i);
461} 461}
462 462
463static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
464 [TCA_U32_CLASSID] = { .type = NLA_U32 },
465 [TCA_U32_HASH] = { .type = NLA_U32 },
466 [TCA_U32_LINK] = { .type = NLA_U32 },
467 [TCA_U32_DIVISOR] = { .type = NLA_U32 },
468 [TCA_U32_SEL] = { .len = sizeof(struct tc_u32_sel) },
469 [TCA_U32_INDEV] = { .type = NLA_STRING, .len = IFNAMSIZ },
470 [TCA_U32_MARK] = { .len = sizeof(struct tc_u32_mark) },
471};
472
463static int u32_set_parms(struct tcf_proto *tp, unsigned long base, 473static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
464 struct tc_u_hnode *ht, 474 struct tc_u_hnode *ht,
465 struct tc_u_knode *n, struct rtattr **tb, 475 struct tc_u_knode *n, struct nlattr **tb,
466 struct rtattr *est) 476 struct nlattr *est)
467{ 477{
468 int err; 478 int err;
469 struct tcf_exts e; 479 struct tcf_exts e;
@@ -473,8 +483,8 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
473 return err; 483 return err;
474 484
475 err = -EINVAL; 485 err = -EINVAL;
476 if (tb[TCA_U32_LINK-1]) { 486 if (tb[TCA_U32_LINK]) {
477 u32 handle = *(u32*)RTA_DATA(tb[TCA_U32_LINK-1]); 487 u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
478 struct tc_u_hnode *ht_down = NULL; 488 struct tc_u_hnode *ht_down = NULL;
479 489
480 if (TC_U32_KEY(handle)) 490 if (TC_U32_KEY(handle))
@@ -495,14 +505,14 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
495 if (ht_down) 505 if (ht_down)
496 ht_down->refcnt--; 506 ht_down->refcnt--;
497 } 507 }
498 if (tb[TCA_U32_CLASSID-1]) { 508 if (tb[TCA_U32_CLASSID]) {
499 n->res.classid = *(u32*)RTA_DATA(tb[TCA_U32_CLASSID-1]); 509 n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
500 tcf_bind_filter(tp, &n->res, base); 510 tcf_bind_filter(tp, &n->res, base);
501 } 511 }
502 512
503#ifdef CONFIG_NET_CLS_IND 513#ifdef CONFIG_NET_CLS_IND
504 if (tb[TCA_U32_INDEV-1]) { 514 if (tb[TCA_U32_INDEV]) {
505 err = tcf_change_indev(tp, n->indev, tb[TCA_U32_INDEV-1]); 515 err = tcf_change_indev(tp, n->indev, tb[TCA_U32_INDEV]);
506 if (err < 0) 516 if (err < 0)
507 goto errout; 517 goto errout;
508 } 518 }
@@ -516,33 +526,34 @@ errout:
516} 526}
517 527
518static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, 528static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
519 struct rtattr **tca, 529 struct nlattr **tca,
520 unsigned long *arg) 530 unsigned long *arg)
521{ 531{
522 struct tc_u_common *tp_c = tp->data; 532 struct tc_u_common *tp_c = tp->data;
523 struct tc_u_hnode *ht; 533 struct tc_u_hnode *ht;
524 struct tc_u_knode *n; 534 struct tc_u_knode *n;
525 struct tc_u32_sel *s; 535 struct tc_u32_sel *s;
526 struct rtattr *opt = tca[TCA_OPTIONS-1]; 536 struct nlattr *opt = tca[TCA_OPTIONS];
527 struct rtattr *tb[TCA_U32_MAX]; 537 struct nlattr *tb[TCA_U32_MAX + 1];
528 u32 htid; 538 u32 htid;
529 int err; 539 int err;
530 540
531 if (opt == NULL) 541 if (opt == NULL)
532 return handle ? -EINVAL : 0; 542 return handle ? -EINVAL : 0;
533 543
534 if (rtattr_parse_nested(tb, TCA_U32_MAX, opt) < 0) 544 err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy);
535 return -EINVAL; 545 if (err < 0)
546 return err;
536 547
537 if ((n = (struct tc_u_knode*)*arg) != NULL) { 548 if ((n = (struct tc_u_knode*)*arg) != NULL) {
538 if (TC_U32_KEY(n->handle) == 0) 549 if (TC_U32_KEY(n->handle) == 0)
539 return -EINVAL; 550 return -EINVAL;
540 551
541 return u32_set_parms(tp, base, n->ht_up, n, tb, tca[TCA_RATE-1]); 552 return u32_set_parms(tp, base, n->ht_up, n, tb, tca[TCA_RATE]);
542 } 553 }
543 554
544 if (tb[TCA_U32_DIVISOR-1]) { 555 if (tb[TCA_U32_DIVISOR]) {
545 unsigned divisor = *(unsigned*)RTA_DATA(tb[TCA_U32_DIVISOR-1]); 556 unsigned divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
546 557
547 if (--divisor > 0x100) 558 if (--divisor > 0x100)
548 return -EINVAL; 559 return -EINVAL;
@@ -567,8 +578,8 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
567 return 0; 578 return 0;
568 } 579 }
569 580
570 if (tb[TCA_U32_HASH-1]) { 581 if (tb[TCA_U32_HASH]) {
571 htid = *(unsigned*)RTA_DATA(tb[TCA_U32_HASH-1]); 582 htid = nla_get_u32(tb[TCA_U32_HASH]);
572 if (TC_U32_HTID(htid) == TC_U32_ROOT) { 583 if (TC_U32_HTID(htid) == TC_U32_ROOT) {
573 ht = tp->root; 584 ht = tp->root;
574 htid = ht->handle; 585 htid = ht->handle;
@@ -592,11 +603,10 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
592 } else 603 } else
593 handle = gen_new_kid(ht, htid); 604 handle = gen_new_kid(ht, htid);
594 605
595 if (tb[TCA_U32_SEL-1] == NULL || 606 if (tb[TCA_U32_SEL] == NULL)
596 RTA_PAYLOAD(tb[TCA_U32_SEL-1]) < sizeof(struct tc_u32_sel))
597 return -EINVAL; 607 return -EINVAL;
598 608
599 s = RTA_DATA(tb[TCA_U32_SEL-1]); 609 s = nla_data(tb[TCA_U32_SEL]);
600 610
601 n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL); 611 n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
602 if (n == NULL) 612 if (n == NULL)
@@ -616,23 +626,16 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
616 n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; 626 n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
617 627
618#ifdef CONFIG_CLS_U32_MARK 628#ifdef CONFIG_CLS_U32_MARK
619 if (tb[TCA_U32_MARK-1]) { 629 if (tb[TCA_U32_MARK]) {
620 struct tc_u32_mark *mark; 630 struct tc_u32_mark *mark;
621 631
622 if (RTA_PAYLOAD(tb[TCA_U32_MARK-1]) < sizeof(struct tc_u32_mark)) { 632 mark = nla_data(tb[TCA_U32_MARK]);
623#ifdef CONFIG_CLS_U32_PERF
624 kfree(n->pf);
625#endif
626 kfree(n);
627 return -EINVAL;
628 }
629 mark = RTA_DATA(tb[TCA_U32_MARK-1]);
630 memcpy(&n->mark, mark, sizeof(struct tc_u32_mark)); 633 memcpy(&n->mark, mark, sizeof(struct tc_u32_mark));
631 n->mark.success = 0; 634 n->mark.success = 0;
632 } 635 }
633#endif 636#endif
634 637
635 err = u32_set_parms(tp, base, ht, n, tb, tca[TCA_RATE-1]); 638 err = u32_set_parms(tp, base, ht, n, tb, tca[TCA_RATE]);
636 if (err == 0) { 639 if (err == 0) {
637 struct tc_u_knode **ins; 640 struct tc_u_knode **ins;
638 for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next) 641 for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next)
@@ -693,66 +696,66 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
693 struct sk_buff *skb, struct tcmsg *t) 696 struct sk_buff *skb, struct tcmsg *t)
694{ 697{
695 struct tc_u_knode *n = (struct tc_u_knode*)fh; 698 struct tc_u_knode *n = (struct tc_u_knode*)fh;
696 unsigned char *b = skb_tail_pointer(skb); 699 struct nlattr *nest;
697 struct rtattr *rta;
698 700
699 if (n == NULL) 701 if (n == NULL)
700 return skb->len; 702 return skb->len;
701 703
702 t->tcm_handle = n->handle; 704 t->tcm_handle = n->handle;
703 705
704 rta = (struct rtattr*)b; 706 nest = nla_nest_start(skb, TCA_OPTIONS);
705 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 707 if (nest == NULL)
708 goto nla_put_failure;
706 709
707 if (TC_U32_KEY(n->handle) == 0) { 710 if (TC_U32_KEY(n->handle) == 0) {
708 struct tc_u_hnode *ht = (struct tc_u_hnode*)fh; 711 struct tc_u_hnode *ht = (struct tc_u_hnode*)fh;
709 u32 divisor = ht->divisor+1; 712 u32 divisor = ht->divisor+1;
710 RTA_PUT(skb, TCA_U32_DIVISOR, 4, &divisor); 713 NLA_PUT_U32(skb, TCA_U32_DIVISOR, divisor);
711 } else { 714 } else {
712 RTA_PUT(skb, TCA_U32_SEL, 715 NLA_PUT(skb, TCA_U32_SEL,
713 sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key), 716 sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
714 &n->sel); 717 &n->sel);
715 if (n->ht_up) { 718 if (n->ht_up) {
716 u32 htid = n->handle & 0xFFFFF000; 719 u32 htid = n->handle & 0xFFFFF000;
717 RTA_PUT(skb, TCA_U32_HASH, 4, &htid); 720 NLA_PUT_U32(skb, TCA_U32_HASH, htid);
718 } 721 }
719 if (n->res.classid) 722 if (n->res.classid)
720 RTA_PUT(skb, TCA_U32_CLASSID, 4, &n->res.classid); 723 NLA_PUT_U32(skb, TCA_U32_CLASSID, n->res.classid);
721 if (n->ht_down) 724 if (n->ht_down)
722 RTA_PUT(skb, TCA_U32_LINK, 4, &n->ht_down->handle); 725 NLA_PUT_U32(skb, TCA_U32_LINK, n->ht_down->handle);
723 726
724#ifdef CONFIG_CLS_U32_MARK 727#ifdef CONFIG_CLS_U32_MARK
725 if (n->mark.val || n->mark.mask) 728 if (n->mark.val || n->mark.mask)
726 RTA_PUT(skb, TCA_U32_MARK, sizeof(n->mark), &n->mark); 729 NLA_PUT(skb, TCA_U32_MARK, sizeof(n->mark), &n->mark);
727#endif 730#endif
728 731
729 if (tcf_exts_dump(skb, &n->exts, &u32_ext_map) < 0) 732 if (tcf_exts_dump(skb, &n->exts, &u32_ext_map) < 0)
730 goto rtattr_failure; 733 goto nla_put_failure;
731 734
732#ifdef CONFIG_NET_CLS_IND 735#ifdef CONFIG_NET_CLS_IND
733 if(strlen(n->indev)) 736 if(strlen(n->indev))
734 RTA_PUT(skb, TCA_U32_INDEV, IFNAMSIZ, n->indev); 737 NLA_PUT_STRING(skb, TCA_U32_INDEV, n->indev);
735#endif 738#endif
736#ifdef CONFIG_CLS_U32_PERF 739#ifdef CONFIG_CLS_U32_PERF
737 RTA_PUT(skb, TCA_U32_PCNT, 740 NLA_PUT(skb, TCA_U32_PCNT,
738 sizeof(struct tc_u32_pcnt) + n->sel.nkeys*sizeof(u64), 741 sizeof(struct tc_u32_pcnt) + n->sel.nkeys*sizeof(u64),
739 n->pf); 742 n->pf);
740#endif 743#endif
741 } 744 }
742 745
743 rta->rta_len = skb_tail_pointer(skb) - b; 746 nla_nest_end(skb, nest);
747
744 if (TC_U32_KEY(n->handle)) 748 if (TC_U32_KEY(n->handle))
745 if (tcf_exts_dump_stats(skb, &n->exts, &u32_ext_map) < 0) 749 if (tcf_exts_dump_stats(skb, &n->exts, &u32_ext_map) < 0)
746 goto rtattr_failure; 750 goto nla_put_failure;
747 return skb->len; 751 return skb->len;
748 752
749rtattr_failure: 753nla_put_failure:
750 nlmsg_trim(skb, b); 754 nla_nest_cancel(skb, nest);
751 return -1; 755 return -1;
752} 756}
753 757
754static struct tcf_proto_ops cls_u32_ops = { 758static struct tcf_proto_ops cls_u32_ops __read_mostly = {
755 .next = NULL,
756 .kind = "u32", 759 .kind = "u32",
757 .classify = u32_classify, 760 .classify = u32_classify,
758 .init = u32_init, 761 .init = u32_init,
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index ceda8890ab0e..a1e5619b1876 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -542,11 +542,11 @@ static int meta_var_compare(struct meta_obj *a, struct meta_obj *b)
542 return r; 542 return r;
543} 543}
544 544
545static int meta_var_change(struct meta_value *dst, struct rtattr *rta) 545static int meta_var_change(struct meta_value *dst, struct nlattr *nla)
546{ 546{
547 int len = RTA_PAYLOAD(rta); 547 int len = nla_len(nla);
548 548
549 dst->val = (unsigned long)kmemdup(RTA_DATA(rta), len, GFP_KERNEL); 549 dst->val = (unsigned long)kmemdup(nla_data(nla), len, GFP_KERNEL);
550 if (dst->val == 0UL) 550 if (dst->val == 0UL)
551 return -ENOMEM; 551 return -ENOMEM;
552 dst->len = len; 552 dst->len = len;
@@ -570,10 +570,10 @@ static void meta_var_apply_extras(struct meta_value *v,
570static int meta_var_dump(struct sk_buff *skb, struct meta_value *v, int tlv) 570static int meta_var_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
571{ 571{
572 if (v->val && v->len) 572 if (v->val && v->len)
573 RTA_PUT(skb, tlv, v->len, (void *) v->val); 573 NLA_PUT(skb, tlv, v->len, (void *) v->val);
574 return 0; 574 return 0;
575 575
576rtattr_failure: 576nla_put_failure:
577 return -1; 577 return -1;
578} 578}
579 579
@@ -594,13 +594,13 @@ static int meta_int_compare(struct meta_obj *a, struct meta_obj *b)
594 return 1; 594 return 1;
595} 595}
596 596
597static int meta_int_change(struct meta_value *dst, struct rtattr *rta) 597static int meta_int_change(struct meta_value *dst, struct nlattr *nla)
598{ 598{
599 if (RTA_PAYLOAD(rta) >= sizeof(unsigned long)) { 599 if (nla_len(nla) >= sizeof(unsigned long)) {
600 dst->val = *(unsigned long *) RTA_DATA(rta); 600 dst->val = *(unsigned long *) nla_data(nla);
601 dst->len = sizeof(unsigned long); 601 dst->len = sizeof(unsigned long);
602 } else if (RTA_PAYLOAD(rta) == sizeof(u32)) { 602 } else if (nla_len(nla) == sizeof(u32)) {
603 dst->val = *(u32 *) RTA_DATA(rta); 603 dst->val = nla_get_u32(nla);
604 dst->len = sizeof(u32); 604 dst->len = sizeof(u32);
605 } else 605 } else
606 return -EINVAL; 606 return -EINVAL;
@@ -621,15 +621,14 @@ static void meta_int_apply_extras(struct meta_value *v,
621static int meta_int_dump(struct sk_buff *skb, struct meta_value *v, int tlv) 621static int meta_int_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
622{ 622{
623 if (v->len == sizeof(unsigned long)) 623 if (v->len == sizeof(unsigned long))
624 RTA_PUT(skb, tlv, sizeof(unsigned long), &v->val); 624 NLA_PUT(skb, tlv, sizeof(unsigned long), &v->val);
625 else if (v->len == sizeof(u32)) { 625 else if (v->len == sizeof(u32)) {
626 u32 d = v->val; 626 NLA_PUT_U32(skb, tlv, v->val);
627 RTA_PUT(skb, tlv, sizeof(d), &d);
628 } 627 }
629 628
630 return 0; 629 return 0;
631 630
632rtattr_failure: 631nla_put_failure:
633 return -1; 632 return -1;
634} 633}
635 634
@@ -641,7 +640,7 @@ struct meta_type_ops
641{ 640{
642 void (*destroy)(struct meta_value *); 641 void (*destroy)(struct meta_value *);
643 int (*compare)(struct meta_obj *, struct meta_obj *); 642 int (*compare)(struct meta_obj *, struct meta_obj *);
644 int (*change)(struct meta_value *, struct rtattr *); 643 int (*change)(struct meta_value *, struct nlattr *);
645 void (*apply_extras)(struct meta_value *, struct meta_obj *); 644 void (*apply_extras)(struct meta_value *, struct meta_obj *);
646 int (*dump)(struct sk_buff *, struct meta_value *, int); 645 int (*dump)(struct sk_buff *, struct meta_value *, int);
647}; 646};
@@ -729,13 +728,13 @@ static inline void meta_delete(struct meta_match *meta)
729 kfree(meta); 728 kfree(meta);
730} 729}
731 730
732static inline int meta_change_data(struct meta_value *dst, struct rtattr *rta) 731static inline int meta_change_data(struct meta_value *dst, struct nlattr *nla)
733{ 732{
734 if (rta) { 733 if (nla) {
735 if (RTA_PAYLOAD(rta) == 0) 734 if (nla_len(nla) == 0)
736 return -EINVAL; 735 return -EINVAL;
737 736
738 return meta_type_ops(dst)->change(dst, rta); 737 return meta_type_ops(dst)->change(dst, nla);
739 } 738 }
740 739
741 return 0; 740 return 0;
@@ -746,21 +745,26 @@ static inline int meta_is_supported(struct meta_value *val)
746 return (!meta_id(val) || meta_ops(val)->get); 745 return (!meta_id(val) || meta_ops(val)->get);
747} 746}
748 747
748static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = {
749 [TCA_EM_META_HDR] = { .len = sizeof(struct tcf_meta_hdr) },
750};
751
749static int em_meta_change(struct tcf_proto *tp, void *data, int len, 752static int em_meta_change(struct tcf_proto *tp, void *data, int len,
750 struct tcf_ematch *m) 753 struct tcf_ematch *m)
751{ 754{
752 int err = -EINVAL; 755 int err;
753 struct rtattr *tb[TCA_EM_META_MAX]; 756 struct nlattr *tb[TCA_EM_META_MAX + 1];
754 struct tcf_meta_hdr *hdr; 757 struct tcf_meta_hdr *hdr;
755 struct meta_match *meta = NULL; 758 struct meta_match *meta = NULL;
756 759
757 if (rtattr_parse(tb, TCA_EM_META_MAX, data, len) < 0) 760 err = nla_parse(tb, TCA_EM_META_MAX, data, len, meta_policy);
761 if (err < 0)
758 goto errout; 762 goto errout;
759 763
760 if (tb[TCA_EM_META_HDR-1] == NULL || 764 err = -EINVAL;
761 RTA_PAYLOAD(tb[TCA_EM_META_HDR-1]) < sizeof(*hdr)) 765 if (tb[TCA_EM_META_HDR] == NULL)
762 goto errout; 766 goto errout;
763 hdr = RTA_DATA(tb[TCA_EM_META_HDR-1]); 767 hdr = nla_data(tb[TCA_EM_META_HDR]);
764 768
765 if (TCF_META_TYPE(hdr->left.kind) != TCF_META_TYPE(hdr->right.kind) || 769 if (TCF_META_TYPE(hdr->left.kind) != TCF_META_TYPE(hdr->right.kind) ||
766 TCF_META_TYPE(hdr->left.kind) > TCF_META_TYPE_MAX || 770 TCF_META_TYPE(hdr->left.kind) > TCF_META_TYPE_MAX ||
@@ -781,8 +785,8 @@ static int em_meta_change(struct tcf_proto *tp, void *data, int len,
781 goto errout; 785 goto errout;
782 } 786 }
783 787
784 if (meta_change_data(&meta->lvalue, tb[TCA_EM_META_LVALUE-1]) < 0 || 788 if (meta_change_data(&meta->lvalue, tb[TCA_EM_META_LVALUE]) < 0 ||
785 meta_change_data(&meta->rvalue, tb[TCA_EM_META_RVALUE-1]) < 0) 789 meta_change_data(&meta->rvalue, tb[TCA_EM_META_RVALUE]) < 0)
786 goto errout; 790 goto errout;
787 791
788 m->datalen = sizeof(*meta); 792 m->datalen = sizeof(*meta);
@@ -811,16 +815,16 @@ static int em_meta_dump(struct sk_buff *skb, struct tcf_ematch *em)
811 memcpy(&hdr.left, &meta->lvalue.hdr, sizeof(hdr.left)); 815 memcpy(&hdr.left, &meta->lvalue.hdr, sizeof(hdr.left));
812 memcpy(&hdr.right, &meta->rvalue.hdr, sizeof(hdr.right)); 816 memcpy(&hdr.right, &meta->rvalue.hdr, sizeof(hdr.right));
813 817
814 RTA_PUT(skb, TCA_EM_META_HDR, sizeof(hdr), &hdr); 818 NLA_PUT(skb, TCA_EM_META_HDR, sizeof(hdr), &hdr);
815 819
816 ops = meta_type_ops(&meta->lvalue); 820 ops = meta_type_ops(&meta->lvalue);
817 if (ops->dump(skb, &meta->lvalue, TCA_EM_META_LVALUE) < 0 || 821 if (ops->dump(skb, &meta->lvalue, TCA_EM_META_LVALUE) < 0 ||
818 ops->dump(skb, &meta->rvalue, TCA_EM_META_RVALUE) < 0) 822 ops->dump(skb, &meta->rvalue, TCA_EM_META_RVALUE) < 0)
819 goto rtattr_failure; 823 goto nla_put_failure;
820 824
821 return 0; 825 return 0;
822 826
823rtattr_failure: 827nla_put_failure:
824 return -1; 828 return -1;
825} 829}
826 830
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index d5cd86efb7d0..853c5ead87fd 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -118,11 +118,14 @@ static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m)
118 conf.pattern_len = textsearch_get_pattern_len(tm->config); 118 conf.pattern_len = textsearch_get_pattern_len(tm->config);
119 conf.pad = 0; 119 conf.pad = 0;
120 120
121 RTA_PUT_NOHDR(skb, sizeof(conf), &conf); 121 if (nla_put_nohdr(skb, sizeof(conf), &conf) < 0)
122 RTA_APPEND(skb, conf.pattern_len, textsearch_get_pattern(tm->config)); 122 goto nla_put_failure;
123 if (nla_append(skb, conf.pattern_len,
124 textsearch_get_pattern(tm->config)) < 0)
125 goto nla_put_failure;
123 return 0; 126 return 0;
124 127
125rtattr_failure: 128nla_put_failure:
126 return -1; 129 return -1;
127} 130}
128 131
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index f3a104e323bd..74ff918455a2 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -141,6 +141,7 @@ errout:
141 write_unlock(&ematch_mod_lock); 141 write_unlock(&ematch_mod_lock);
142 return err; 142 return err;
143} 143}
144EXPORT_SYMBOL(tcf_em_register);
144 145
145/** 146/**
146 * tcf_em_unregister - unregster and extended match 147 * tcf_em_unregister - unregster and extended match
@@ -171,6 +172,7 @@ out:
171 write_unlock(&ematch_mod_lock); 172 write_unlock(&ematch_mod_lock);
172 return err; 173 return err;
173} 174}
175EXPORT_SYMBOL(tcf_em_unregister);
174 176
175static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree, 177static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree,
176 int index) 178 int index)
@@ -181,11 +183,11 @@ static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree,
181 183
182static int tcf_em_validate(struct tcf_proto *tp, 184static int tcf_em_validate(struct tcf_proto *tp,
183 struct tcf_ematch_tree_hdr *tree_hdr, 185 struct tcf_ematch_tree_hdr *tree_hdr,
184 struct tcf_ematch *em, struct rtattr *rta, int idx) 186 struct tcf_ematch *em, struct nlattr *nla, int idx)
185{ 187{
186 int err = -EINVAL; 188 int err = -EINVAL;
187 struct tcf_ematch_hdr *em_hdr = RTA_DATA(rta); 189 struct tcf_ematch_hdr *em_hdr = nla_data(nla);
188 int data_len = RTA_PAYLOAD(rta) - sizeof(*em_hdr); 190 int data_len = nla_len(nla) - sizeof(*em_hdr);
189 void *data = (void *) em_hdr + sizeof(*em_hdr); 191 void *data = (void *) em_hdr + sizeof(*em_hdr);
190 192
191 if (!TCF_EM_REL_VALID(em_hdr->flags)) 193 if (!TCF_EM_REL_VALID(em_hdr->flags))
@@ -280,15 +282,20 @@ errout:
280 return err; 282 return err;
281} 283}
282 284
285static const struct nla_policy em_policy[TCA_EMATCH_TREE_MAX + 1] = {
286 [TCA_EMATCH_TREE_HDR] = { .len = sizeof(struct tcf_ematch_tree_hdr) },
287 [TCA_EMATCH_TREE_LIST] = { .type = NLA_NESTED },
288};
289
283/** 290/**
284 * tcf_em_tree_validate - validate ematch config TLV and build ematch tree 291 * tcf_em_tree_validate - validate ematch config TLV and build ematch tree
285 * 292 *
286 * @tp: classifier kind handle 293 * @tp: classifier kind handle
287 * @rta: ematch tree configuration TLV 294 * @nla: ematch tree configuration TLV
288 * @tree: destination ematch tree variable to store the resulting 295 * @tree: destination ematch tree variable to store the resulting
289 * ematch tree. 296 * ematch tree.
290 * 297 *
291 * This function validates the given configuration TLV @rta and builds an 298 * This function validates the given configuration TLV @nla and builds an
292 * ematch tree in @tree. The resulting tree must later be copied into 299 * ematch tree in @tree. The resulting tree must later be copied into
293 * the private classifier data using tcf_em_tree_change(). You MUST NOT 300 * the private classifier data using tcf_em_tree_change(). You MUST NOT
294 * provide the ematch tree variable of the private classifier data directly, 301 * provide the ematch tree variable of the private classifier data directly,
@@ -296,45 +303,43 @@ errout:
296 * 303 *
297 * Returns a negative error code if the configuration TLV contains errors. 304 * Returns a negative error code if the configuration TLV contains errors.
298 */ 305 */
299int tcf_em_tree_validate(struct tcf_proto *tp, struct rtattr *rta, 306int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
300 struct tcf_ematch_tree *tree) 307 struct tcf_ematch_tree *tree)
301{ 308{
302 int idx, list_len, matches_len, err = -EINVAL; 309 int idx, list_len, matches_len, err;
303 struct rtattr *tb[TCA_EMATCH_TREE_MAX]; 310 struct nlattr *tb[TCA_EMATCH_TREE_MAX + 1];
304 struct rtattr *rt_match, *rt_hdr, *rt_list; 311 struct nlattr *rt_match, *rt_hdr, *rt_list;
305 struct tcf_ematch_tree_hdr *tree_hdr; 312 struct tcf_ematch_tree_hdr *tree_hdr;
306 struct tcf_ematch *em; 313 struct tcf_ematch *em;
307 314
308 if (!rta) { 315 if (!nla) {
309 memset(tree, 0, sizeof(*tree)); 316 memset(tree, 0, sizeof(*tree));
310 return 0; 317 return 0;
311 } 318 }
312 319
313 if (rtattr_parse_nested(tb, TCA_EMATCH_TREE_MAX, rta) < 0) 320 err = nla_parse_nested(tb, TCA_EMATCH_TREE_MAX, nla, em_policy);
321 if (err < 0)
314 goto errout; 322 goto errout;
315 323
316 rt_hdr = tb[TCA_EMATCH_TREE_HDR-1]; 324 err = -EINVAL;
317 rt_list = tb[TCA_EMATCH_TREE_LIST-1]; 325 rt_hdr = tb[TCA_EMATCH_TREE_HDR];
326 rt_list = tb[TCA_EMATCH_TREE_LIST];
318 327
319 if (rt_hdr == NULL || rt_list == NULL) 328 if (rt_hdr == NULL || rt_list == NULL)
320 goto errout; 329 goto errout;
321 330
322 if (RTA_PAYLOAD(rt_hdr) < sizeof(*tree_hdr) || 331 tree_hdr = nla_data(rt_hdr);
323 RTA_PAYLOAD(rt_list) < sizeof(*rt_match))
324 goto errout;
325
326 tree_hdr = RTA_DATA(rt_hdr);
327 memcpy(&tree->hdr, tree_hdr, sizeof(*tree_hdr)); 332 memcpy(&tree->hdr, tree_hdr, sizeof(*tree_hdr));
328 333
329 rt_match = RTA_DATA(rt_list); 334 rt_match = nla_data(rt_list);
330 list_len = RTA_PAYLOAD(rt_list); 335 list_len = nla_len(rt_list);
331 matches_len = tree_hdr->nmatches * sizeof(*em); 336 matches_len = tree_hdr->nmatches * sizeof(*em);
332 337
333 tree->matches = kzalloc(matches_len, GFP_KERNEL); 338 tree->matches = kzalloc(matches_len, GFP_KERNEL);
334 if (tree->matches == NULL) 339 if (tree->matches == NULL)
335 goto errout; 340 goto errout;
336 341
337 /* We do not use rtattr_parse_nested here because the maximum 342 /* We do not use nla_parse_nested here because the maximum
338 * number of attributes is unknown. This saves us the allocation 343 * number of attributes is unknown. This saves us the allocation
339 * for a tb buffer which would serve no purpose at all. 344 * for a tb buffer which would serve no purpose at all.
340 * 345 *
@@ -342,16 +347,16 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct rtattr *rta,
342 * provided, their type must be incremental from 1 to n. Even 347 * provided, their type must be incremental from 1 to n. Even
343 * if it does not serve any real purpose, a failure of sticking 348 * if it does not serve any real purpose, a failure of sticking
344 * to this policy will result in parsing failure. */ 349 * to this policy will result in parsing failure. */
345 for (idx = 0; RTA_OK(rt_match, list_len); idx++) { 350 for (idx = 0; nla_ok(rt_match, list_len); idx++) {
346 err = -EINVAL; 351 err = -EINVAL;
347 352
348 if (rt_match->rta_type != (idx + 1)) 353 if (rt_match->nla_type != (idx + 1))
349 goto errout_abort; 354 goto errout_abort;
350 355
351 if (idx >= tree_hdr->nmatches) 356 if (idx >= tree_hdr->nmatches)
352 goto errout_abort; 357 goto errout_abort;
353 358
354 if (RTA_PAYLOAD(rt_match) < sizeof(struct tcf_ematch_hdr)) 359 if (nla_len(rt_match) < sizeof(struct tcf_ematch_hdr))
355 goto errout_abort; 360 goto errout_abort;
356 361
357 em = tcf_em_get_match(tree, idx); 362 em = tcf_em_get_match(tree, idx);
@@ -360,7 +365,7 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct rtattr *rta,
360 if (err < 0) 365 if (err < 0)
361 goto errout_abort; 366 goto errout_abort;
362 367
363 rt_match = RTA_NEXT(rt_match, list_len); 368 rt_match = nla_next(rt_match, &list_len);
364 } 369 }
365 370
366 /* Check if the number of matches provided by userspace actually 371 /* Check if the number of matches provided by userspace actually
@@ -380,6 +385,7 @@ errout_abort:
380 tcf_em_tree_destroy(tp, tree); 385 tcf_em_tree_destroy(tp, tree);
381 return err; 386 return err;
382} 387}
388EXPORT_SYMBOL(tcf_em_tree_validate);
383 389
384/** 390/**
385 * tcf_em_tree_destroy - destroy an ematch tree 391 * tcf_em_tree_destroy - destroy an ematch tree
@@ -413,6 +419,7 @@ void tcf_em_tree_destroy(struct tcf_proto *tp, struct tcf_ematch_tree *tree)
413 tree->hdr.nmatches = 0; 419 tree->hdr.nmatches = 0;
414 kfree(tree->matches); 420 kfree(tree->matches);
415} 421}
422EXPORT_SYMBOL(tcf_em_tree_destroy);
416 423
417/** 424/**
418 * tcf_em_tree_dump - dump ematch tree into a rtnl message 425 * tcf_em_tree_dump - dump ematch tree into a rtnl message
@@ -430,18 +437,22 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
430{ 437{
431 int i; 438 int i;
432 u8 *tail; 439 u8 *tail;
433 struct rtattr *top_start = (struct rtattr *)skb_tail_pointer(skb); 440 struct nlattr *top_start;
434 struct rtattr *list_start; 441 struct nlattr *list_start;
442
443 top_start = nla_nest_start(skb, tlv);
444 if (top_start == NULL)
445 goto nla_put_failure;
435 446
436 RTA_PUT(skb, tlv, 0, NULL); 447 NLA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr);
437 RTA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr);
438 448
439 list_start = (struct rtattr *)skb_tail_pointer(skb); 449 list_start = nla_nest_start(skb, TCA_EMATCH_TREE_LIST);
440 RTA_PUT(skb, TCA_EMATCH_TREE_LIST, 0, NULL); 450 if (list_start == NULL)
451 goto nla_put_failure;
441 452
442 tail = skb_tail_pointer(skb); 453 tail = skb_tail_pointer(skb);
443 for (i = 0; i < tree->hdr.nmatches; i++) { 454 for (i = 0; i < tree->hdr.nmatches; i++) {
444 struct rtattr *match_start = (struct rtattr *)tail; 455 struct nlattr *match_start = (struct nlattr *)tail;
445 struct tcf_ematch *em = tcf_em_get_match(tree, i); 456 struct tcf_ematch *em = tcf_em_get_match(tree, i);
446 struct tcf_ematch_hdr em_hdr = { 457 struct tcf_ematch_hdr em_hdr = {
447 .kind = em->ops ? em->ops->kind : TCF_EM_CONTAINER, 458 .kind = em->ops ? em->ops->kind : TCF_EM_CONTAINER,
@@ -449,29 +460,30 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
449 .flags = em->flags 460 .flags = em->flags
450 }; 461 };
451 462
452 RTA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr); 463 NLA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr);
453 464
454 if (em->ops && em->ops->dump) { 465 if (em->ops && em->ops->dump) {
455 if (em->ops->dump(skb, em) < 0) 466 if (em->ops->dump(skb, em) < 0)
456 goto rtattr_failure; 467 goto nla_put_failure;
457 } else if (tcf_em_is_container(em) || tcf_em_is_simple(em)) { 468 } else if (tcf_em_is_container(em) || tcf_em_is_simple(em)) {
458 u32 u = em->data; 469 u32 u = em->data;
459 RTA_PUT_NOHDR(skb, sizeof(u), &u); 470 nla_put_nohdr(skb, sizeof(u), &u);
460 } else if (em->datalen > 0) 471 } else if (em->datalen > 0)
461 RTA_PUT_NOHDR(skb, em->datalen, (void *) em->data); 472 nla_put_nohdr(skb, em->datalen, (void *) em->data);
462 473
463 tail = skb_tail_pointer(skb); 474 tail = skb_tail_pointer(skb);
464 match_start->rta_len = tail - (u8 *)match_start; 475 match_start->nla_len = tail - (u8 *)match_start;
465 } 476 }
466 477
467 list_start->rta_len = tail - (u8 *)list_start; 478 nla_nest_end(skb, list_start);
468 top_start->rta_len = tail - (u8 *)top_start; 479 nla_nest_end(skb, top_start);
469 480
470 return 0; 481 return 0;
471 482
472rtattr_failure: 483nla_put_failure:
473 return -1; 484 return -1;
474} 485}
486EXPORT_SYMBOL(tcf_em_tree_dump);
475 487
476static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em, 488static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em,
477 struct tcf_pkt_info *info) 489 struct tcf_pkt_info *info)
@@ -529,10 +541,4 @@ stack_overflow:
529 printk("Local stack overflow, increase NET_EMATCH_STACK\n"); 541 printk("Local stack overflow, increase NET_EMATCH_STACK\n");
530 return -1; 542 return -1;
531} 543}
532
533EXPORT_SYMBOL(tcf_em_register);
534EXPORT_SYMBOL(tcf_em_unregister);
535EXPORT_SYMBOL(tcf_em_tree_validate);
536EXPORT_SYMBOL(tcf_em_tree_destroy);
537EXPORT_SYMBOL(tcf_em_tree_dump);
538EXPORT_SYMBOL(__tcf_em_tree_match); 544EXPORT_SYMBOL(__tcf_em_tree_match);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 8ae137e3522b..7e3c048ba9b1 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -29,6 +29,7 @@
29#include <linux/hrtimer.h> 29#include <linux/hrtimer.h>
30 30
31#include <net/net_namespace.h> 31#include <net/net_namespace.h>
32#include <net/sock.h>
32#include <net/netlink.h> 33#include <net/netlink.h>
33#include <net/pkt_sched.h> 34#include <net/pkt_sched.h>
34 35
@@ -157,6 +158,7 @@ out:
157 write_unlock(&qdisc_mod_lock); 158 write_unlock(&qdisc_mod_lock);
158 return rc; 159 return rc;
159} 160}
161EXPORT_SYMBOL(register_qdisc);
160 162
161int unregister_qdisc(struct Qdisc_ops *qops) 163int unregister_qdisc(struct Qdisc_ops *qops)
162{ 164{
@@ -175,6 +177,7 @@ int unregister_qdisc(struct Qdisc_ops *qops)
175 write_unlock(&qdisc_mod_lock); 177 write_unlock(&qdisc_mod_lock);
176 return err; 178 return err;
177} 179}
180EXPORT_SYMBOL(unregister_qdisc);
178 181
179/* We know handle. Find qdisc among all qdisc's attached to device 182/* We know handle. Find qdisc among all qdisc's attached to device
180 (root qdisc, all its children, children of children etc.) 183 (root qdisc, all its children, children of children etc.)
@@ -195,7 +198,7 @@ static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
195{ 198{
196 unsigned long cl; 199 unsigned long cl;
197 struct Qdisc *leaf; 200 struct Qdisc *leaf;
198 struct Qdisc_class_ops *cops = p->ops->cl_ops; 201 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
199 202
200 if (cops == NULL) 203 if (cops == NULL)
201 return NULL; 204 return NULL;
@@ -210,14 +213,14 @@ static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
210 213
211/* Find queueing discipline by name */ 214/* Find queueing discipline by name */
212 215
213static struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind) 216static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
214{ 217{
215 struct Qdisc_ops *q = NULL; 218 struct Qdisc_ops *q = NULL;
216 219
217 if (kind) { 220 if (kind) {
218 read_lock(&qdisc_mod_lock); 221 read_lock(&qdisc_mod_lock);
219 for (q = qdisc_base; q; q = q->next) { 222 for (q = qdisc_base; q; q = q->next) {
220 if (rtattr_strcmp(kind, q->id) == 0) { 223 if (nla_strcmp(kind, q->id) == 0) {
221 if (!try_module_get(q->owner)) 224 if (!try_module_get(q->owner))
222 q = NULL; 225 q = NULL;
223 break; 226 break;
@@ -230,7 +233,7 @@ static struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
230 233
231static struct qdisc_rate_table *qdisc_rtab_list; 234static struct qdisc_rate_table *qdisc_rtab_list;
232 235
233struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab) 236struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
234{ 237{
235 struct qdisc_rate_table *rtab; 238 struct qdisc_rate_table *rtab;
236 239
@@ -241,19 +244,21 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *ta
241 } 244 }
242 } 245 }
243 246
244 if (tab == NULL || r->rate == 0 || r->cell_log == 0 || RTA_PAYLOAD(tab) != 1024) 247 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
248 nla_len(tab) != TC_RTAB_SIZE)
245 return NULL; 249 return NULL;
246 250
247 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL); 251 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
248 if (rtab) { 252 if (rtab) {
249 rtab->rate = *r; 253 rtab->rate = *r;
250 rtab->refcnt = 1; 254 rtab->refcnt = 1;
251 memcpy(rtab->data, RTA_DATA(tab), 1024); 255 memcpy(rtab->data, nla_data(tab), 1024);
252 rtab->next = qdisc_rtab_list; 256 rtab->next = qdisc_rtab_list;
253 qdisc_rtab_list = rtab; 257 qdisc_rtab_list = rtab;
254 } 258 }
255 return rtab; 259 return rtab;
256} 260}
261EXPORT_SYMBOL(qdisc_get_rtab);
257 262
258void qdisc_put_rtab(struct qdisc_rate_table *tab) 263void qdisc_put_rtab(struct qdisc_rate_table *tab)
259{ 264{
@@ -270,6 +275,7 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
270 } 275 }
271 } 276 }
272} 277}
278EXPORT_SYMBOL(qdisc_put_rtab);
273 279
274static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) 280static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
275{ 281{
@@ -373,7 +379,7 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
373 379
374void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) 380void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
375{ 381{
376 struct Qdisc_class_ops *cops; 382 const struct Qdisc_class_ops *cops;
377 unsigned long cl; 383 unsigned long cl;
378 u32 parentid; 384 u32 parentid;
379 385
@@ -417,7 +423,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
417 *old = dev_graft_qdisc(dev, new); 423 *old = dev_graft_qdisc(dev, new);
418 } 424 }
419 } else { 425 } else {
420 struct Qdisc_class_ops *cops = parent->ops->cl_ops; 426 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
421 427
422 err = -EINVAL; 428 err = -EINVAL;
423 429
@@ -440,10 +446,10 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
440 446
441static struct Qdisc * 447static struct Qdisc *
442qdisc_create(struct net_device *dev, u32 parent, u32 handle, 448qdisc_create(struct net_device *dev, u32 parent, u32 handle,
443 struct rtattr **tca, int *errp) 449 struct nlattr **tca, int *errp)
444{ 450{
445 int err; 451 int err;
446 struct rtattr *kind = tca[TCA_KIND-1]; 452 struct nlattr *kind = tca[TCA_KIND];
447 struct Qdisc *sch; 453 struct Qdisc *sch;
448 struct Qdisc_ops *ops; 454 struct Qdisc_ops *ops;
449 455
@@ -451,7 +457,7 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle,
451#ifdef CONFIG_KMOD 457#ifdef CONFIG_KMOD
452 if (ops == NULL && kind != NULL) { 458 if (ops == NULL && kind != NULL) {
453 char name[IFNAMSIZ]; 459 char name[IFNAMSIZ];
454 if (rtattr_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) { 460 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
455 /* We dropped the RTNL semaphore in order to 461 /* We dropped the RTNL semaphore in order to
456 * perform the module load. So, even if we 462 * perform the module load. So, even if we
457 * succeeded in loading the module we have to 463 * succeeded in loading the module we have to
@@ -504,11 +510,11 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle,
504 510
505 sch->handle = handle; 511 sch->handle = handle;
506 512
507 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) { 513 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
508 if (tca[TCA_RATE-1]) { 514 if (tca[TCA_RATE]) {
509 err = gen_new_estimator(&sch->bstats, &sch->rate_est, 515 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
510 sch->stats_lock, 516 sch->stats_lock,
511 tca[TCA_RATE-1]); 517 tca[TCA_RATE]);
512 if (err) { 518 if (err) {
513 /* 519 /*
514 * Any broken qdiscs that would require 520 * Any broken qdiscs that would require
@@ -536,20 +542,20 @@ err_out:
536 return NULL; 542 return NULL;
537} 543}
538 544
539static int qdisc_change(struct Qdisc *sch, struct rtattr **tca) 545static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
540{ 546{
541 if (tca[TCA_OPTIONS-1]) { 547 if (tca[TCA_OPTIONS]) {
542 int err; 548 int err;
543 549
544 if (sch->ops->change == NULL) 550 if (sch->ops->change == NULL)
545 return -EINVAL; 551 return -EINVAL;
546 err = sch->ops->change(sch, tca[TCA_OPTIONS-1]); 552 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
547 if (err) 553 if (err)
548 return err; 554 return err;
549 } 555 }
550 if (tca[TCA_RATE-1]) 556 if (tca[TCA_RATE])
551 gen_replace_estimator(&sch->bstats, &sch->rate_est, 557 gen_replace_estimator(&sch->bstats, &sch->rate_est,
552 sch->stats_lock, tca[TCA_RATE-1]); 558 sch->stats_lock, tca[TCA_RATE]);
553 return 0; 559 return 0;
554} 560}
555 561
@@ -581,7 +587,7 @@ static int
581check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) 587check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
582{ 588{
583 struct Qdisc *leaf; 589 struct Qdisc *leaf;
584 struct Qdisc_class_ops *cops = q->ops->cl_ops; 590 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
585 struct check_loop_arg *arg = (struct check_loop_arg *)w; 591 struct check_loop_arg *arg = (struct check_loop_arg *)w;
586 592
587 leaf = cops->leaf(q, cl); 593 leaf = cops->leaf(q, cl);
@@ -599,17 +605,25 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
599 605
600static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 606static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
601{ 607{
608 struct net *net = skb->sk->sk_net;
602 struct tcmsg *tcm = NLMSG_DATA(n); 609 struct tcmsg *tcm = NLMSG_DATA(n);
603 struct rtattr **tca = arg; 610 struct nlattr *tca[TCA_MAX + 1];
604 struct net_device *dev; 611 struct net_device *dev;
605 u32 clid = tcm->tcm_parent; 612 u32 clid = tcm->tcm_parent;
606 struct Qdisc *q = NULL; 613 struct Qdisc *q = NULL;
607 struct Qdisc *p = NULL; 614 struct Qdisc *p = NULL;
608 int err; 615 int err;
609 616
617 if (net != &init_net)
618 return -EINVAL;
619
610 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) 620 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
611 return -ENODEV; 621 return -ENODEV;
612 622
623 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
624 if (err < 0)
625 return err;
626
613 if (clid) { 627 if (clid) {
614 if (clid != TC_H_ROOT) { 628 if (clid != TC_H_ROOT) {
615 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { 629 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
@@ -632,7 +646,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
632 return -ENOENT; 646 return -ENOENT;
633 } 647 }
634 648
635 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id)) 649 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
636 return -EINVAL; 650 return -EINVAL;
637 651
638 if (n->nlmsg_type == RTM_DELQDISC) { 652 if (n->nlmsg_type == RTM_DELQDISC) {
@@ -660,23 +674,30 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
660 674
661static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 675static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
662{ 676{
677 struct net *net = skb->sk->sk_net;
663 struct tcmsg *tcm; 678 struct tcmsg *tcm;
664 struct rtattr **tca; 679 struct nlattr *tca[TCA_MAX + 1];
665 struct net_device *dev; 680 struct net_device *dev;
666 u32 clid; 681 u32 clid;
667 struct Qdisc *q, *p; 682 struct Qdisc *q, *p;
668 int err; 683 int err;
669 684
685 if (net != &init_net)
686 return -EINVAL;
687
670replay: 688replay:
671 /* Reinit, just in case something touches this. */ 689 /* Reinit, just in case something touches this. */
672 tcm = NLMSG_DATA(n); 690 tcm = NLMSG_DATA(n);
673 tca = arg;
674 clid = tcm->tcm_parent; 691 clid = tcm->tcm_parent;
675 q = p = NULL; 692 q = p = NULL;
676 693
677 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) 694 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
678 return -ENODEV; 695 return -ENODEV;
679 696
697 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
698 if (err < 0)
699 return err;
700
680 if (clid) { 701 if (clid) {
681 if (clid != TC_H_ROOT) { 702 if (clid != TC_H_ROOT) {
682 if (clid != TC_H_INGRESS) { 703 if (clid != TC_H_INGRESS) {
@@ -704,7 +725,7 @@ replay:
704 goto create_n_graft; 725 goto create_n_graft;
705 if (n->nlmsg_flags&NLM_F_EXCL) 726 if (n->nlmsg_flags&NLM_F_EXCL)
706 return -EEXIST; 727 return -EEXIST;
707 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id)) 728 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
708 return -EINVAL; 729 return -EINVAL;
709 if (q == p || 730 if (q == p ||
710 (p && check_loop(q, p, 0))) 731 (p && check_loop(q, p, 0)))
@@ -737,8 +758,8 @@ replay:
737 if ((n->nlmsg_flags&NLM_F_CREATE) && 758 if ((n->nlmsg_flags&NLM_F_CREATE) &&
738 (n->nlmsg_flags&NLM_F_REPLACE) && 759 (n->nlmsg_flags&NLM_F_REPLACE) &&
739 ((n->nlmsg_flags&NLM_F_EXCL) || 760 ((n->nlmsg_flags&NLM_F_EXCL) ||
740 (tca[TCA_KIND-1] && 761 (tca[TCA_KIND] &&
741 rtattr_strcmp(tca[TCA_KIND-1], q->ops->id)))) 762 nla_strcmp(tca[TCA_KIND], q->ops->id))))
742 goto create_n_graft; 763 goto create_n_graft;
743 } 764 }
744 } 765 }
@@ -753,7 +774,7 @@ replay:
753 return -ENOENT; 774 return -ENOENT;
754 if (n->nlmsg_flags&NLM_F_EXCL) 775 if (n->nlmsg_flags&NLM_F_EXCL)
755 return -EEXIST; 776 return -EEXIST;
756 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id)) 777 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
757 return -EINVAL; 778 return -EINVAL;
758 err = qdisc_change(q, tca); 779 err = qdisc_change(q, tca);
759 if (err == 0) 780 if (err == 0)
@@ -814,31 +835,31 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
814 tcm->tcm_parent = clid; 835 tcm->tcm_parent = clid;
815 tcm->tcm_handle = q->handle; 836 tcm->tcm_handle = q->handle;
816 tcm->tcm_info = atomic_read(&q->refcnt); 837 tcm->tcm_info = atomic_read(&q->refcnt);
817 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id); 838 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
818 if (q->ops->dump && q->ops->dump(q, skb) < 0) 839 if (q->ops->dump && q->ops->dump(q, skb) < 0)
819 goto rtattr_failure; 840 goto nla_put_failure;
820 q->qstats.qlen = q->q.qlen; 841 q->qstats.qlen = q->q.qlen;
821 842
822 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, 843 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
823 TCA_XSTATS, q->stats_lock, &d) < 0) 844 TCA_XSTATS, q->stats_lock, &d) < 0)
824 goto rtattr_failure; 845 goto nla_put_failure;
825 846
826 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) 847 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
827 goto rtattr_failure; 848 goto nla_put_failure;
828 849
829 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 || 850 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
830 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || 851 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
831 gnet_stats_copy_queue(&d, &q->qstats) < 0) 852 gnet_stats_copy_queue(&d, &q->qstats) < 0)
832 goto rtattr_failure; 853 goto nla_put_failure;
833 854
834 if (gnet_stats_finish_copy(&d) < 0) 855 if (gnet_stats_finish_copy(&d) < 0)
835 goto rtattr_failure; 856 goto nla_put_failure;
836 857
837 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 858 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
838 return skb->len; 859 return skb->len;
839 860
840nlmsg_failure: 861nlmsg_failure:
841rtattr_failure: 862nla_put_failure:
842 nlmsg_trim(skb, b); 863 nlmsg_trim(skb, b);
843 return -1; 864 return -1;
844} 865}
@@ -863,7 +884,7 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
863 } 884 }
864 885
865 if (skb->len) 886 if (skb->len)
866 return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); 887 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
867 888
868err_out: 889err_out:
869 kfree_skb(skb); 890 kfree_skb(skb);
@@ -872,11 +893,15 @@ err_out:
872 893
873static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) 894static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
874{ 895{
896 struct net *net = skb->sk->sk_net;
875 int idx, q_idx; 897 int idx, q_idx;
876 int s_idx, s_q_idx; 898 int s_idx, s_q_idx;
877 struct net_device *dev; 899 struct net_device *dev;
878 struct Qdisc *q; 900 struct Qdisc *q;
879 901
902 if (net != &init_net)
903 return 0;
904
880 s_idx = cb->args[0]; 905 s_idx = cb->args[0];
881 s_q_idx = q_idx = cb->args[1]; 906 s_q_idx = q_idx = cb->args[1];
882 read_lock(&dev_base_lock); 907 read_lock(&dev_base_lock);
@@ -920,11 +945,12 @@ done:
920 945
921static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 946static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
922{ 947{
948 struct net *net = skb->sk->sk_net;
923 struct tcmsg *tcm = NLMSG_DATA(n); 949 struct tcmsg *tcm = NLMSG_DATA(n);
924 struct rtattr **tca = arg; 950 struct nlattr *tca[TCA_MAX + 1];
925 struct net_device *dev; 951 struct net_device *dev;
926 struct Qdisc *q = NULL; 952 struct Qdisc *q = NULL;
927 struct Qdisc_class_ops *cops; 953 const struct Qdisc_class_ops *cops;
928 unsigned long cl = 0; 954 unsigned long cl = 0;
929 unsigned long new_cl; 955 unsigned long new_cl;
930 u32 pid = tcm->tcm_parent; 956 u32 pid = tcm->tcm_parent;
@@ -932,9 +958,16 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
932 u32 qid = TC_H_MAJ(clid); 958 u32 qid = TC_H_MAJ(clid);
933 int err; 959 int err;
934 960
961 if (net != &init_net)
962 return -EINVAL;
963
935 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) 964 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
936 return -ENODEV; 965 return -ENODEV;
937 966
967 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
968 if (err < 0)
969 return err;
970
938 /* 971 /*
939 parent == TC_H_UNSPEC - unspecified parent. 972 parent == TC_H_UNSPEC - unspecified parent.
940 parent == TC_H_ROOT - class is root, which has no parent. 973 parent == TC_H_ROOT - class is root, which has no parent.
@@ -1039,7 +1072,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1039 struct nlmsghdr *nlh; 1072 struct nlmsghdr *nlh;
1040 unsigned char *b = skb_tail_pointer(skb); 1073 unsigned char *b = skb_tail_pointer(skb);
1041 struct gnet_dump d; 1074 struct gnet_dump d;
1042 struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; 1075 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1043 1076
1044 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); 1077 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1045 tcm = NLMSG_DATA(nlh); 1078 tcm = NLMSG_DATA(nlh);
@@ -1048,25 +1081,25 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1048 tcm->tcm_parent = q->handle; 1081 tcm->tcm_parent = q->handle;
1049 tcm->tcm_handle = q->handle; 1082 tcm->tcm_handle = q->handle;
1050 tcm->tcm_info = 0; 1083 tcm->tcm_info = 0;
1051 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id); 1084 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
1052 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0) 1085 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1053 goto rtattr_failure; 1086 goto nla_put_failure;
1054 1087
1055 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, 1088 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
1056 TCA_XSTATS, q->stats_lock, &d) < 0) 1089 TCA_XSTATS, q->stats_lock, &d) < 0)
1057 goto rtattr_failure; 1090 goto nla_put_failure;
1058 1091
1059 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) 1092 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1060 goto rtattr_failure; 1093 goto nla_put_failure;
1061 1094
1062 if (gnet_stats_finish_copy(&d) < 0) 1095 if (gnet_stats_finish_copy(&d) < 0)
1063 goto rtattr_failure; 1096 goto nla_put_failure;
1064 1097
1065 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 1098 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1066 return skb->len; 1099 return skb->len;
1067 1100
1068nlmsg_failure: 1101nlmsg_failure:
1069rtattr_failure: 1102nla_put_failure:
1070 nlmsg_trim(skb, b); 1103 nlmsg_trim(skb, b);
1071 return -1; 1104 return -1;
1072} 1105}
@@ -1086,7 +1119,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1086 return -EINVAL; 1119 return -EINVAL;
1087 } 1120 }
1088 1121
1089 return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); 1122 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1090} 1123}
1091 1124
1092struct qdisc_dump_args 1125struct qdisc_dump_args
@@ -1106,6 +1139,7 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk
1106 1139
1107static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) 1140static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1108{ 1141{
1142 struct net *net = skb->sk->sk_net;
1109 int t; 1143 int t;
1110 int s_t; 1144 int s_t;
1111 struct net_device *dev; 1145 struct net_device *dev;
@@ -1113,6 +1147,9 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1113 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); 1147 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1114 struct qdisc_dump_args arg; 1148 struct qdisc_dump_args arg;
1115 1149
1150 if (net != &init_net)
1151 return 0;
1152
1116 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) 1153 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1117 return 0; 1154 return 0;
1118 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) 1155 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
@@ -1268,8 +1305,3 @@ static int __init pktsched_init(void)
1268} 1305}
1269 1306
1270subsys_initcall(pktsched_init); 1307subsys_initcall(pktsched_init);
1271
1272EXPORT_SYMBOL(qdisc_get_rtab);
1273EXPORT_SYMBOL(qdisc_put_rtab);
1274EXPORT_SYMBOL(register_qdisc);
1275EXPORT_SYMBOL(unregister_qdisc);
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index ddc4f2c54379..335273416384 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -16,18 +16,6 @@
16 16
17extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */ 17extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */
18 18
19#if 0 /* control */
20#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
21#else
22#define DPRINTK(format,args...)
23#endif
24
25#if 0 /* data */
26#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
27#else
28#define D2PRINTK(format,args...)
29#endif
30
31/* 19/*
32 * The ATM queuing discipline provides a framework for invoking classifiers 20 * The ATM queuing discipline provides a framework for invoking classifiers
33 * (aka "filters"), which in turn select classes of this queuing discipline. 21 * (aka "filters"), which in turn select classes of this queuing discipline.
@@ -49,7 +37,6 @@ extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */
49 * - should lock the flow while there is data in the queue (?) 37 * - should lock the flow while there is data in the queue (?)
50 */ 38 */
51 39
52#define PRIV(sch) qdisc_priv(sch)
53#define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back)) 40#define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back))
54 41
55struct atm_flow_data { 42struct atm_flow_data {
@@ -57,7 +44,7 @@ struct atm_flow_data {
57 struct tcf_proto *filter_list; 44 struct tcf_proto *filter_list;
58 struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */ 45 struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */
59 void (*old_pop)(struct atm_vcc *vcc, 46 void (*old_pop)(struct atm_vcc *vcc,
60 struct sk_buff * skb); /* chaining */ 47 struct sk_buff *skb); /* chaining */
61 struct atm_qdisc_data *parent; /* parent qdisc */ 48 struct atm_qdisc_data *parent; /* parent qdisc */
62 struct socket *sock; /* for closing */ 49 struct socket *sock; /* for closing */
63 u32 classid; /* x:y type ID */ 50 u32 classid; /* x:y type ID */
@@ -84,17 +71,17 @@ static int find_flow(struct atm_qdisc_data *qdisc, struct atm_flow_data *flow)
84{ 71{
85 struct atm_flow_data *walk; 72 struct atm_flow_data *walk;
86 73
87 DPRINTK("find_flow(qdisc %p,flow %p)\n", qdisc, flow); 74 pr_debug("find_flow(qdisc %p,flow %p)\n", qdisc, flow);
88 for (walk = qdisc->flows; walk; walk = walk->next) 75 for (walk = qdisc->flows; walk; walk = walk->next)
89 if (walk == flow) 76 if (walk == flow)
90 return 1; 77 return 1;
91 DPRINTK("find_flow: not found\n"); 78 pr_debug("find_flow: not found\n");
92 return 0; 79 return 0;
93} 80}
94 81
95static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid) 82static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid)
96{ 83{
97 struct atm_qdisc_data *p = PRIV(sch); 84 struct atm_qdisc_data *p = qdisc_priv(sch);
98 struct atm_flow_data *flow; 85 struct atm_flow_data *flow;
99 86
100 for (flow = p->flows; flow; flow = flow->next) 87 for (flow = p->flows; flow; flow = flow->next)
@@ -106,10 +93,10 @@ static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid)
106static int atm_tc_graft(struct Qdisc *sch, unsigned long arg, 93static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
107 struct Qdisc *new, struct Qdisc **old) 94 struct Qdisc *new, struct Qdisc **old)
108{ 95{
109 struct atm_qdisc_data *p = PRIV(sch); 96 struct atm_qdisc_data *p = qdisc_priv(sch);
110 struct atm_flow_data *flow = (struct atm_flow_data *)arg; 97 struct atm_flow_data *flow = (struct atm_flow_data *)arg;
111 98
112 DPRINTK("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n", 99 pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n",
113 sch, p, flow, new, old); 100 sch, p, flow, new, old);
114 if (!find_flow(p, flow)) 101 if (!find_flow(p, flow))
115 return -EINVAL; 102 return -EINVAL;
@@ -125,20 +112,20 @@ static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl)
125{ 112{
126 struct atm_flow_data *flow = (struct atm_flow_data *)cl; 113 struct atm_flow_data *flow = (struct atm_flow_data *)cl;
127 114
128 DPRINTK("atm_tc_leaf(sch %p,flow %p)\n", sch, flow); 115 pr_debug("atm_tc_leaf(sch %p,flow %p)\n", sch, flow);
129 return flow ? flow->q : NULL; 116 return flow ? flow->q : NULL;
130} 117}
131 118
132static unsigned long atm_tc_get(struct Qdisc *sch, u32 classid) 119static unsigned long atm_tc_get(struct Qdisc *sch, u32 classid)
133{ 120{
134 struct atm_qdisc_data *p __maybe_unused = PRIV(sch); 121 struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch);
135 struct atm_flow_data *flow; 122 struct atm_flow_data *flow;
136 123
137 DPRINTK("atm_tc_get(sch %p,[qdisc %p],classid %x)\n", sch, p, classid); 124 pr_debug("atm_tc_get(sch %p,[qdisc %p],classid %x)\n", sch, p, classid);
138 flow = lookup_flow(sch, classid); 125 flow = lookup_flow(sch, classid);
139 if (flow) 126 if (flow)
140 flow->ref++; 127 flow->ref++;
141 DPRINTK("atm_tc_get: flow %p\n", flow); 128 pr_debug("atm_tc_get: flow %p\n", flow);
142 return (unsigned long)flow; 129 return (unsigned long)flow;
143} 130}
144 131
@@ -155,14 +142,14 @@ static unsigned long atm_tc_bind_filter(struct Qdisc *sch,
155 */ 142 */
156static void atm_tc_put(struct Qdisc *sch, unsigned long cl) 143static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
157{ 144{
158 struct atm_qdisc_data *p = PRIV(sch); 145 struct atm_qdisc_data *p = qdisc_priv(sch);
159 struct atm_flow_data *flow = (struct atm_flow_data *)cl; 146 struct atm_flow_data *flow = (struct atm_flow_data *)cl;
160 struct atm_flow_data **prev; 147 struct atm_flow_data **prev;
161 148
162 DPRINTK("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); 149 pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
163 if (--flow->ref) 150 if (--flow->ref)
164 return; 151 return;
165 DPRINTK("atm_tc_put: destroying\n"); 152 pr_debug("atm_tc_put: destroying\n");
166 for (prev = &p->flows; *prev; prev = &(*prev)->next) 153 for (prev = &p->flows; *prev; prev = &(*prev)->next)
167 if (*prev == flow) 154 if (*prev == flow)
168 break; 155 break;
@@ -171,11 +158,11 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
171 return; 158 return;
172 } 159 }
173 *prev = flow->next; 160 *prev = flow->next;
174 DPRINTK("atm_tc_put: qdisc %p\n", flow->q); 161 pr_debug("atm_tc_put: qdisc %p\n", flow->q);
175 qdisc_destroy(flow->q); 162 qdisc_destroy(flow->q);
176 tcf_destroy_chain(flow->filter_list); 163 tcf_destroy_chain(flow->filter_list);
177 if (flow->sock) { 164 if (flow->sock) {
178 DPRINTK("atm_tc_put: f_count %d\n", 165 pr_debug("atm_tc_put: f_count %d\n",
179 file_count(flow->sock->file)); 166 file_count(flow->sock->file));
180 flow->vcc->pop = flow->old_pop; 167 flow->vcc->pop = flow->old_pop;
181 sockfd_put(flow->sock); 168 sockfd_put(flow->sock);
@@ -194,7 +181,7 @@ static void sch_atm_pop(struct atm_vcc *vcc, struct sk_buff *skb)
194{ 181{
195 struct atm_qdisc_data *p = VCC2FLOW(vcc)->parent; 182 struct atm_qdisc_data *p = VCC2FLOW(vcc)->parent;
196 183
197 D2PRINTK("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n", vcc, skb, p); 184 pr_debug("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n", vcc, skb, p);
198 VCC2FLOW(vcc)->old_pop(vcc, skb); 185 VCC2FLOW(vcc)->old_pop(vcc, skb);
199 tasklet_schedule(&p->task); 186 tasklet_schedule(&p->task);
200} 187}
@@ -208,19 +195,24 @@ static const u8 llc_oui_ip[] = {
208 0x08, 0x00 195 0x08, 0x00
209}; /* Ethertype IP (0800) */ 196}; /* Ethertype IP (0800) */
210 197
198static const struct nla_policy atm_policy[TCA_ATM_MAX + 1] = {
199 [TCA_ATM_FD] = { .type = NLA_U32 },
200 [TCA_ATM_EXCESS] = { .type = NLA_U32 },
201};
202
211static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, 203static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
212 struct rtattr **tca, unsigned long *arg) 204 struct nlattr **tca, unsigned long *arg)
213{ 205{
214 struct atm_qdisc_data *p = PRIV(sch); 206 struct atm_qdisc_data *p = qdisc_priv(sch);
215 struct atm_flow_data *flow = (struct atm_flow_data *)*arg; 207 struct atm_flow_data *flow = (struct atm_flow_data *)*arg;
216 struct atm_flow_data *excess = NULL; 208 struct atm_flow_data *excess = NULL;
217 struct rtattr *opt = tca[TCA_OPTIONS - 1]; 209 struct nlattr *opt = tca[TCA_OPTIONS];
218 struct rtattr *tb[TCA_ATM_MAX]; 210 struct nlattr *tb[TCA_ATM_MAX + 1];
219 struct socket *sock; 211 struct socket *sock;
220 int fd, error, hdr_len; 212 int fd, error, hdr_len;
221 void *hdr; 213 void *hdr;
222 214
223 DPRINTK("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x," 215 pr_debug("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x,"
224 "flow %p,opt %p)\n", sch, p, classid, parent, flow, opt); 216 "flow %p,opt %p)\n", sch, p, classid, parent, flow, opt);
225 /* 217 /*
226 * The concept of parents doesn't apply for this qdisc. 218 * The concept of parents doesn't apply for this qdisc.
@@ -236,34 +228,38 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
236 */ 228 */
237 if (flow) 229 if (flow)
238 return -EBUSY; 230 return -EBUSY;
239 if (opt == NULL || rtattr_parse_nested(tb, TCA_ATM_MAX, opt)) 231 if (opt == NULL)
240 return -EINVAL; 232 return -EINVAL;
241 if (!tb[TCA_ATM_FD - 1] || RTA_PAYLOAD(tb[TCA_ATM_FD - 1]) < sizeof(fd)) 233
234 error = nla_parse_nested(tb, TCA_ATM_MAX, opt, atm_policy);
235 if (error < 0)
236 return error;
237
238 if (!tb[TCA_ATM_FD])
242 return -EINVAL; 239 return -EINVAL;
243 fd = *(int *)RTA_DATA(tb[TCA_ATM_FD - 1]); 240 fd = nla_get_u32(tb[TCA_ATM_FD]);
244 DPRINTK("atm_tc_change: fd %d\n", fd); 241 pr_debug("atm_tc_change: fd %d\n", fd);
245 if (tb[TCA_ATM_HDR - 1]) { 242 if (tb[TCA_ATM_HDR]) {
246 hdr_len = RTA_PAYLOAD(tb[TCA_ATM_HDR - 1]); 243 hdr_len = nla_len(tb[TCA_ATM_HDR]);
247 hdr = RTA_DATA(tb[TCA_ATM_HDR - 1]); 244 hdr = nla_data(tb[TCA_ATM_HDR]);
248 } else { 245 } else {
249 hdr_len = RFC1483LLC_LEN; 246 hdr_len = RFC1483LLC_LEN;
250 hdr = NULL; /* default LLC/SNAP for IP */ 247 hdr = NULL; /* default LLC/SNAP for IP */
251 } 248 }
252 if (!tb[TCA_ATM_EXCESS - 1]) 249 if (!tb[TCA_ATM_EXCESS])
253 excess = NULL; 250 excess = NULL;
254 else { 251 else {
255 if (RTA_PAYLOAD(tb[TCA_ATM_EXCESS - 1]) != sizeof(u32))
256 return -EINVAL;
257 excess = (struct atm_flow_data *) 252 excess = (struct atm_flow_data *)
258 atm_tc_get(sch, *(u32 *)RTA_DATA(tb[TCA_ATM_EXCESS - 1])); 253 atm_tc_get(sch, nla_get_u32(tb[TCA_ATM_EXCESS]));
259 if (!excess) 254 if (!excess)
260 return -ENOENT; 255 return -ENOENT;
261 } 256 }
262 DPRINTK("atm_tc_change: type %d, payload %d, hdr_len %d\n", 257 pr_debug("atm_tc_change: type %d, payload %d, hdr_len %d\n",
263 opt->rta_type, RTA_PAYLOAD(opt), hdr_len); 258 opt->nla_type, nla_len(opt), hdr_len);
264 if (!(sock = sockfd_lookup(fd, &error))) 259 sock = sockfd_lookup(fd, &error);
260 if (!sock)
265 return error; /* f_count++ */ 261 return error; /* f_count++ */
266 DPRINTK("atm_tc_change: f_count %d\n", file_count(sock->file)); 262 pr_debug("atm_tc_change: f_count %d\n", file_count(sock->file));
267 if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) { 263 if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) {
268 error = -EPROTOTYPE; 264 error = -EPROTOTYPE;
269 goto err_out; 265 goto err_out;
@@ -272,7 +268,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
272 on vcc->send */ 268 on vcc->send */
273 if (classid) { 269 if (classid) {
274 if (TC_H_MAJ(classid ^ sch->handle)) { 270 if (TC_H_MAJ(classid ^ sch->handle)) {
275 DPRINTK("atm_tc_change: classid mismatch\n"); 271 pr_debug("atm_tc_change: classid mismatch\n");
276 error = -EINVAL; 272 error = -EINVAL;
277 goto err_out; 273 goto err_out;
278 } 274 }
@@ -286,26 +282,28 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
286 282
287 for (i = 1; i < 0x8000; i++) { 283 for (i = 1; i < 0x8000; i++) {
288 classid = TC_H_MAKE(sch->handle, 0x8000 | i); 284 classid = TC_H_MAKE(sch->handle, 0x8000 | i);
289 if (!(cl = atm_tc_get(sch, classid))) 285 cl = atm_tc_get(sch, classid);
286 if (!cl)
290 break; 287 break;
291 atm_tc_put(sch, cl); 288 atm_tc_put(sch, cl);
292 } 289 }
293 } 290 }
294 DPRINTK("atm_tc_change: new id %x\n", classid); 291 pr_debug("atm_tc_change: new id %x\n", classid);
295 flow = kzalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL); 292 flow = kzalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL);
296 DPRINTK("atm_tc_change: flow %p\n", flow); 293 pr_debug("atm_tc_change: flow %p\n", flow);
297 if (!flow) { 294 if (!flow) {
298 error = -ENOBUFS; 295 error = -ENOBUFS;
299 goto err_out; 296 goto err_out;
300 } 297 }
301 flow->filter_list = NULL; 298 flow->filter_list = NULL;
302 if (!(flow->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid))) 299 flow->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid);
300 if (!flow->q)
303 flow->q = &noop_qdisc; 301 flow->q = &noop_qdisc;
304 DPRINTK("atm_tc_change: qdisc %p\n", flow->q); 302 pr_debug("atm_tc_change: qdisc %p\n", flow->q);
305 flow->sock = sock; 303 flow->sock = sock;
306 flow->vcc = ATM_SD(sock); /* speedup */ 304 flow->vcc = ATM_SD(sock); /* speedup */
307 flow->vcc->user_back = flow; 305 flow->vcc->user_back = flow;
308 DPRINTK("atm_tc_change: vcc %p\n", flow->vcc); 306 pr_debug("atm_tc_change: vcc %p\n", flow->vcc);
309 flow->old_pop = flow->vcc->pop; 307 flow->old_pop = flow->vcc->pop;
310 flow->parent = p; 308 flow->parent = p;
311 flow->vcc->pop = sch_atm_pop; 309 flow->vcc->pop = sch_atm_pop;
@@ -330,11 +328,11 @@ err_out:
330 328
331static int atm_tc_delete(struct Qdisc *sch, unsigned long arg) 329static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
332{ 330{
333 struct atm_qdisc_data *p = PRIV(sch); 331 struct atm_qdisc_data *p = qdisc_priv(sch);
334 struct atm_flow_data *flow = (struct atm_flow_data *)arg; 332 struct atm_flow_data *flow = (struct atm_flow_data *)arg;
335 333
336 DPRINTK("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); 334 pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
337 if (!find_flow(PRIV(sch), flow)) 335 if (!find_flow(qdisc_priv(sch), flow))
338 return -EINVAL; 336 return -EINVAL;
339 if (flow->filter_list || flow == &p->link) 337 if (flow->filter_list || flow == &p->link)
340 return -EBUSY; 338 return -EBUSY;
@@ -354,10 +352,10 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
354 352
355static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker) 353static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
356{ 354{
357 struct atm_qdisc_data *p = PRIV(sch); 355 struct atm_qdisc_data *p = qdisc_priv(sch);
358 struct atm_flow_data *flow; 356 struct atm_flow_data *flow;
359 357
360 DPRINTK("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker); 358 pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
361 if (walker->stop) 359 if (walker->stop)
362 return; 360 return;
363 for (flow = p->flows; flow; flow = flow->next) { 361 for (flow = p->flows; flow; flow = flow->next) {
@@ -372,10 +370,10 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
372 370
373static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch, unsigned long cl) 371static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch, unsigned long cl)
374{ 372{
375 struct atm_qdisc_data *p = PRIV(sch); 373 struct atm_qdisc_data *p = qdisc_priv(sch);
376 struct atm_flow_data *flow = (struct atm_flow_data *)cl; 374 struct atm_flow_data *flow = (struct atm_flow_data *)cl;
377 375
378 DPRINTK("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); 376 pr_debug("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
379 return flow ? &flow->filter_list : &p->link.filter_list; 377 return flow ? &flow->filter_list : &p->link.filter_list;
380} 378}
381 379
@@ -383,13 +381,13 @@ static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch, unsigned long cl)
383 381
384static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) 382static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
385{ 383{
386 struct atm_qdisc_data *p = PRIV(sch); 384 struct atm_qdisc_data *p = qdisc_priv(sch);
387 struct atm_flow_data *flow = NULL; /* @@@ */ 385 struct atm_flow_data *flow = NULL; /* @@@ */
388 struct tcf_result res; 386 struct tcf_result res;
389 int result; 387 int result;
390 int ret = NET_XMIT_POLICED; 388 int ret = NET_XMIT_POLICED;
391 389
392 D2PRINTK("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); 390 pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
393 result = TC_POLICE_OK; /* be nice to gcc */ 391 result = TC_POLICE_OK; /* be nice to gcc */
394 if (TC_H_MAJ(skb->priority) != sch->handle || 392 if (TC_H_MAJ(skb->priority) != sch->handle ||
395 !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) 393 !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority)))
@@ -430,7 +428,8 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
430#endif 428#endif
431 } 429 }
432 430
433 if ((ret = flow->q->enqueue(skb, flow->q)) != 0) { 431 ret = flow->q->enqueue(skb, flow->q);
432 if (ret != 0) {
434drop: __maybe_unused 433drop: __maybe_unused
435 sch->qstats.drops++; 434 sch->qstats.drops++;
436 if (flow) 435 if (flow)
@@ -468,11 +467,11 @@ drop: __maybe_unused
468static void sch_atm_dequeue(unsigned long data) 467static void sch_atm_dequeue(unsigned long data)
469{ 468{
470 struct Qdisc *sch = (struct Qdisc *)data; 469 struct Qdisc *sch = (struct Qdisc *)data;
471 struct atm_qdisc_data *p = PRIV(sch); 470 struct atm_qdisc_data *p = qdisc_priv(sch);
472 struct atm_flow_data *flow; 471 struct atm_flow_data *flow;
473 struct sk_buff *skb; 472 struct sk_buff *skb;
474 473
475 D2PRINTK("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p); 474 pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p);
476 for (flow = p->link.next; flow; flow = flow->next) 475 for (flow = p->link.next; flow; flow = flow->next)
477 /* 476 /*
478 * If traffic is properly shaped, this won't generate nasty 477 * If traffic is properly shaped, this won't generate nasty
@@ -483,7 +482,7 @@ static void sch_atm_dequeue(unsigned long data)
483 (void)flow->q->ops->requeue(skb, flow->q); 482 (void)flow->q->ops->requeue(skb, flow->q);
484 break; 483 break;
485 } 484 }
486 D2PRINTK("atm_tc_dequeue: sending on class %p\n", flow); 485 pr_debug("atm_tc_dequeue: sending on class %p\n", flow);
487 /* remove any LL header somebody else has attached */ 486 /* remove any LL header somebody else has attached */
488 skb_pull(skb, skb_network_offset(skb)); 487 skb_pull(skb, skb_network_offset(skb));
489 if (skb_headroom(skb) < flow->hdr_len) { 488 if (skb_headroom(skb) < flow->hdr_len) {
@@ -495,7 +494,7 @@ static void sch_atm_dequeue(unsigned long data)
495 continue; 494 continue;
496 skb = new; 495 skb = new;
497 } 496 }
498 D2PRINTK("sch_atm_dequeue: ip %p, data %p\n", 497 pr_debug("sch_atm_dequeue: ip %p, data %p\n",
499 skb_network_header(skb), skb->data); 498 skb_network_header(skb), skb->data);
500 ATM_SKB(skb)->vcc = flow->vcc; 499 ATM_SKB(skb)->vcc = flow->vcc;
501 memcpy(skb_push(skb, flow->hdr_len), flow->hdr, 500 memcpy(skb_push(skb, flow->hdr_len), flow->hdr,
@@ -509,10 +508,10 @@ static void sch_atm_dequeue(unsigned long data)
509 508
510static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch) 509static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
511{ 510{
512 struct atm_qdisc_data *p = PRIV(sch); 511 struct atm_qdisc_data *p = qdisc_priv(sch);
513 struct sk_buff *skb; 512 struct sk_buff *skb;
514 513
515 D2PRINTK("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p); 514 pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p);
516 tasklet_schedule(&p->task); 515 tasklet_schedule(&p->task);
517 skb = p->link.q->dequeue(p->link.q); 516 skb = p->link.q->dequeue(p->link.q);
518 if (skb) 517 if (skb)
@@ -522,10 +521,10 @@ static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
522 521
523static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch) 522static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch)
524{ 523{
525 struct atm_qdisc_data *p = PRIV(sch); 524 struct atm_qdisc_data *p = qdisc_priv(sch);
526 int ret; 525 int ret;
527 526
528 D2PRINTK("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); 527 pr_debug("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
529 ret = p->link.q->ops->requeue(skb, p->link.q); 528 ret = p->link.q->ops->requeue(skb, p->link.q);
530 if (!ret) { 529 if (!ret) {
531 sch->q.qlen++; 530 sch->q.qlen++;
@@ -539,27 +538,27 @@ static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch)
539 538
540static unsigned int atm_tc_drop(struct Qdisc *sch) 539static unsigned int atm_tc_drop(struct Qdisc *sch)
541{ 540{
542 struct atm_qdisc_data *p = PRIV(sch); 541 struct atm_qdisc_data *p = qdisc_priv(sch);
543 struct atm_flow_data *flow; 542 struct atm_flow_data *flow;
544 unsigned int len; 543 unsigned int len;
545 544
546 DPRINTK("atm_tc_drop(sch %p,[qdisc %p])\n", sch, p); 545 pr_debug("atm_tc_drop(sch %p,[qdisc %p])\n", sch, p);
547 for (flow = p->flows; flow; flow = flow->next) 546 for (flow = p->flows; flow; flow = flow->next)
548 if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q))) 547 if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q)))
549 return len; 548 return len;
550 return 0; 549 return 0;
551} 550}
552 551
553static int atm_tc_init(struct Qdisc *sch, struct rtattr *opt) 552static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
554{ 553{
555 struct atm_qdisc_data *p = PRIV(sch); 554 struct atm_qdisc_data *p = qdisc_priv(sch);
556 555
557 DPRINTK("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); 556 pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
558 p->flows = &p->link; 557 p->flows = &p->link;
559 if (!(p->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, 558 p->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle);
560 sch->handle))) 559 if (!p->link.q)
561 p->link.q = &noop_qdisc; 560 p->link.q = &noop_qdisc;
562 DPRINTK("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); 561 pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
563 p->link.filter_list = NULL; 562 p->link.filter_list = NULL;
564 p->link.vcc = NULL; 563 p->link.vcc = NULL;
565 p->link.sock = NULL; 564 p->link.sock = NULL;
@@ -572,10 +571,10 @@ static int atm_tc_init(struct Qdisc *sch, struct rtattr *opt)
572 571
573static void atm_tc_reset(struct Qdisc *sch) 572static void atm_tc_reset(struct Qdisc *sch)
574{ 573{
575 struct atm_qdisc_data *p = PRIV(sch); 574 struct atm_qdisc_data *p = qdisc_priv(sch);
576 struct atm_flow_data *flow; 575 struct atm_flow_data *flow;
577 576
578 DPRINTK("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p); 577 pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p);
579 for (flow = p->flows; flow; flow = flow->next) 578 for (flow = p->flows; flow; flow = flow->next)
580 qdisc_reset(flow->q); 579 qdisc_reset(flow->q);
581 sch->q.qlen = 0; 580 sch->q.qlen = 0;
@@ -583,10 +582,10 @@ static void atm_tc_reset(struct Qdisc *sch)
583 582
584static void atm_tc_destroy(struct Qdisc *sch) 583static void atm_tc_destroy(struct Qdisc *sch)
585{ 584{
586 struct atm_qdisc_data *p = PRIV(sch); 585 struct atm_qdisc_data *p = qdisc_priv(sch);
587 struct atm_flow_data *flow; 586 struct atm_flow_data *flow;
588 587
589 DPRINTK("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); 588 pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
590 /* races ? */ 589 /* races ? */
591 while ((flow = p->flows)) { 590 while ((flow = p->flows)) {
592 tcf_destroy_chain(flow->filter_list); 591 tcf_destroy_chain(flow->filter_list);
@@ -608,20 +607,22 @@ static void atm_tc_destroy(struct Qdisc *sch)
608static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, 607static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
609 struct sk_buff *skb, struct tcmsg *tcm) 608 struct sk_buff *skb, struct tcmsg *tcm)
610{ 609{
611 struct atm_qdisc_data *p = PRIV(sch); 610 struct atm_qdisc_data *p = qdisc_priv(sch);
612 struct atm_flow_data *flow = (struct atm_flow_data *)cl; 611 struct atm_flow_data *flow = (struct atm_flow_data *)cl;
613 unsigned char *b = skb_tail_pointer(skb); 612 struct nlattr *nest;
614 struct rtattr *rta;
615 613
616 DPRINTK("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n", 614 pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
617 sch, p, flow, skb, tcm); 615 sch, p, flow, skb, tcm);
618 if (!find_flow(p, flow)) 616 if (!find_flow(p, flow))
619 return -EINVAL; 617 return -EINVAL;
620 tcm->tcm_handle = flow->classid; 618 tcm->tcm_handle = flow->classid;
621 tcm->tcm_info = flow->q->handle; 619 tcm->tcm_info = flow->q->handle;
622 rta = (struct rtattr *)b; 620
623 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 621 nest = nla_nest_start(skb, TCA_OPTIONS);
624 RTA_PUT(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr); 622 if (nest == NULL)
623 goto nla_put_failure;
624
625 NLA_PUT(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr);
625 if (flow->vcc) { 626 if (flow->vcc) {
626 struct sockaddr_atmpvc pvc; 627 struct sockaddr_atmpvc pvc;
627 int state; 628 int state;
@@ -630,22 +631,21 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
630 pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1; 631 pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1;
631 pvc.sap_addr.vpi = flow->vcc->vpi; 632 pvc.sap_addr.vpi = flow->vcc->vpi;
632 pvc.sap_addr.vci = flow->vcc->vci; 633 pvc.sap_addr.vci = flow->vcc->vci;
633 RTA_PUT(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc); 634 NLA_PUT(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc);
634 state = ATM_VF2VS(flow->vcc->flags); 635 state = ATM_VF2VS(flow->vcc->flags);
635 RTA_PUT(skb, TCA_ATM_STATE, sizeof(state), &state); 636 NLA_PUT_U32(skb, TCA_ATM_STATE, state);
636 } 637 }
637 if (flow->excess) 638 if (flow->excess)
638 RTA_PUT(skb, TCA_ATM_EXCESS, sizeof(u32), &flow->classid); 639 NLA_PUT_U32(skb, TCA_ATM_EXCESS, flow->classid);
639 else { 640 else {
640 static u32 zero; 641 NLA_PUT_U32(skb, TCA_ATM_EXCESS, 0);
641
642 RTA_PUT(skb, TCA_ATM_EXCESS, sizeof(zero), &zero);
643 } 642 }
644 rta->rta_len = skb_tail_pointer(skb) - b; 643
644 nla_nest_end(skb, nest);
645 return skb->len; 645 return skb->len;
646 646
647rtattr_failure: 647nla_put_failure:
648 nlmsg_trim(skb, b); 648 nla_nest_cancel(skb, nest);
649 return -1; 649 return -1;
650} 650}
651static int 651static int
@@ -668,7 +668,7 @@ static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb)
668 return 0; 668 return 0;
669} 669}
670 670
671static struct Qdisc_class_ops atm_class_ops = { 671static const struct Qdisc_class_ops atm_class_ops = {
672 .graft = atm_tc_graft, 672 .graft = atm_tc_graft,
673 .leaf = atm_tc_leaf, 673 .leaf = atm_tc_leaf,
674 .get = atm_tc_get, 674 .get = atm_tc_get,
@@ -683,7 +683,7 @@ static struct Qdisc_class_ops atm_class_ops = {
683 .dump_stats = atm_tc_dump_class_stats, 683 .dump_stats = atm_tc_dump_class_stats,
684}; 684};
685 685
686static struct Qdisc_ops atm_qdisc_ops = { 686static struct Qdisc_ops atm_qdisc_ops __read_mostly = {
687 .cl_ops = &atm_class_ops, 687 .cl_ops = &atm_class_ops,
688 .id = "atm", 688 .id = "atm",
689 .priv_size = sizeof(struct atm_qdisc_data), 689 .priv_size = sizeof(struct atm_qdisc_data),
diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c
index f914fc43a124..507fb488bc98 100644
--- a/net/sched/sch_blackhole.c
+++ b/net/sched/sch_blackhole.c
@@ -28,7 +28,7 @@ static struct sk_buff *blackhole_dequeue(struct Qdisc *sch)
28 return NULL; 28 return NULL;
29} 29}
30 30
31static struct Qdisc_ops blackhole_qdisc_ops = { 31static struct Qdisc_ops blackhole_qdisc_ops __read_mostly = {
32 .id = "blackhole", 32 .id = "blackhole",
33 .priv_size = 0, 33 .priv_size = 0,
34 .enqueue = blackhole_enqueue, 34 .enqueue = blackhole_enqueue,
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 4de3744e65c3..09969c1fbc08 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1377,24 +1377,33 @@ static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1377 return 0; 1377 return 0;
1378} 1378}
1379 1379
1380static int cbq_init(struct Qdisc *sch, struct rtattr *opt) 1380static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1381 [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
1382 [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
1383 [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
1384 [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
1385 [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
1386 [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1387 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
1388};
1389
1390static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1381{ 1391{
1382 struct cbq_sched_data *q = qdisc_priv(sch); 1392 struct cbq_sched_data *q = qdisc_priv(sch);
1383 struct rtattr *tb[TCA_CBQ_MAX]; 1393 struct nlattr *tb[TCA_CBQ_MAX + 1];
1384 struct tc_ratespec *r; 1394 struct tc_ratespec *r;
1395 int err;
1385 1396
1386 if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 || 1397 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1387 tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL || 1398 if (err < 0)
1388 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec)) 1399 return err;
1389 return -EINVAL;
1390 1400
1391 if (tb[TCA_CBQ_LSSOPT-1] && 1401 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1392 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1393 return -EINVAL; 1402 return -EINVAL;
1394 1403
1395 r = RTA_DATA(tb[TCA_CBQ_RATE-1]); 1404 r = nla_data(tb[TCA_CBQ_RATE]);
1396 1405
1397 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL) 1406 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1398 return -EINVAL; 1407 return -EINVAL;
1399 1408
1400 q->link.refcnt = 1; 1409 q->link.refcnt = 1;
@@ -1427,8 +1436,8 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1427 1436
1428 cbq_link_class(&q->link); 1437 cbq_link_class(&q->link);
1429 1438
1430 if (tb[TCA_CBQ_LSSOPT-1]) 1439 if (tb[TCA_CBQ_LSSOPT])
1431 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1])); 1440 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1432 1441
1433 cbq_addprio(q, &q->link); 1442 cbq_addprio(q, &q->link);
1434 return 0; 1443 return 0;
@@ -1438,10 +1447,10 @@ static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1438{ 1447{
1439 unsigned char *b = skb_tail_pointer(skb); 1448 unsigned char *b = skb_tail_pointer(skb);
1440 1449
1441 RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate); 1450 NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1442 return skb->len; 1451 return skb->len;
1443 1452
1444rtattr_failure: 1453nla_put_failure:
1445 nlmsg_trim(skb, b); 1454 nlmsg_trim(skb, b);
1446 return -1; 1455 return -1;
1447} 1456}
@@ -1463,10 +1472,10 @@ static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1463 opt.minidle = (u32)(-cl->minidle); 1472 opt.minidle = (u32)(-cl->minidle);
1464 opt.offtime = cl->offtime; 1473 opt.offtime = cl->offtime;
1465 opt.change = ~0; 1474 opt.change = ~0;
1466 RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt); 1475 NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1467 return skb->len; 1476 return skb->len;
1468 1477
1469rtattr_failure: 1478nla_put_failure:
1470 nlmsg_trim(skb, b); 1479 nlmsg_trim(skb, b);
1471 return -1; 1480 return -1;
1472} 1481}
@@ -1481,10 +1490,10 @@ static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1481 opt.priority = cl->priority+1; 1490 opt.priority = cl->priority+1;
1482 opt.cpriority = cl->cpriority+1; 1491 opt.cpriority = cl->cpriority+1;
1483 opt.weight = cl->weight; 1492 opt.weight = cl->weight;
1484 RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt); 1493 NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1485 return skb->len; 1494 return skb->len;
1486 1495
1487rtattr_failure: 1496nla_put_failure:
1488 nlmsg_trim(skb, b); 1497 nlmsg_trim(skb, b);
1489 return -1; 1498 return -1;
1490} 1499}
@@ -1498,10 +1507,10 @@ static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1498 opt.priority2 = cl->priority2+1; 1507 opt.priority2 = cl->priority2+1;
1499 opt.pad = 0; 1508 opt.pad = 0;
1500 opt.penalty = cl->penalty; 1509 opt.penalty = cl->penalty;
1501 RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt); 1510 NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1502 return skb->len; 1511 return skb->len;
1503 1512
1504rtattr_failure: 1513nla_put_failure:
1505 nlmsg_trim(skb, b); 1514 nlmsg_trim(skb, b);
1506 return -1; 1515 return -1;
1507} 1516}
@@ -1515,11 +1524,11 @@ static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1515 opt.split = cl->split ? cl->split->classid : 0; 1524 opt.split = cl->split ? cl->split->classid : 0;
1516 opt.defmap = cl->defmap; 1525 opt.defmap = cl->defmap;
1517 opt.defchange = ~0; 1526 opt.defchange = ~0;
1518 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt); 1527 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1519 } 1528 }
1520 return skb->len; 1529 return skb->len;
1521 1530
1522rtattr_failure: 1531nla_put_failure:
1523 nlmsg_trim(skb, b); 1532 nlmsg_trim(skb, b);
1524 return -1; 1533 return -1;
1525} 1534}
@@ -1534,11 +1543,11 @@ static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1534 opt.police = cl->police; 1543 opt.police = cl->police;
1535 opt.__res1 = 0; 1544 opt.__res1 = 0;
1536 opt.__res2 = 0; 1545 opt.__res2 = 0;
1537 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt); 1546 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1538 } 1547 }
1539 return skb->len; 1548 return skb->len;
1540 1549
1541rtattr_failure: 1550nla_put_failure:
1542 nlmsg_trim(skb, b); 1551 nlmsg_trim(skb, b);
1543 return -1; 1552 return -1;
1544} 1553}
@@ -1561,18 +1570,18 @@ static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1561static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb) 1570static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1562{ 1571{
1563 struct cbq_sched_data *q = qdisc_priv(sch); 1572 struct cbq_sched_data *q = qdisc_priv(sch);
1564 unsigned char *b = skb_tail_pointer(skb); 1573 struct nlattr *nest;
1565 struct rtattr *rta;
1566 1574
1567 rta = (struct rtattr*)b; 1575 nest = nla_nest_start(skb, TCA_OPTIONS);
1568 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 1576 if (nest == NULL)
1577 goto nla_put_failure;
1569 if (cbq_dump_attr(skb, &q->link) < 0) 1578 if (cbq_dump_attr(skb, &q->link) < 0)
1570 goto rtattr_failure; 1579 goto nla_put_failure;
1571 rta->rta_len = skb_tail_pointer(skb) - b; 1580 nla_nest_end(skb, nest);
1572 return skb->len; 1581 return skb->len;
1573 1582
1574rtattr_failure: 1583nla_put_failure:
1575 nlmsg_trim(skb, b); 1584 nla_nest_cancel(skb, nest);
1576 return -1; 1585 return -1;
1577} 1586}
1578 1587
@@ -1590,8 +1599,7 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1590 struct sk_buff *skb, struct tcmsg *tcm) 1599 struct sk_buff *skb, struct tcmsg *tcm)
1591{ 1600{
1592 struct cbq_class *cl = (struct cbq_class*)arg; 1601 struct cbq_class *cl = (struct cbq_class*)arg;
1593 unsigned char *b = skb_tail_pointer(skb); 1602 struct nlattr *nest;
1594 struct rtattr *rta;
1595 1603
1596 if (cl->tparent) 1604 if (cl->tparent)
1597 tcm->tcm_parent = cl->tparent->classid; 1605 tcm->tcm_parent = cl->tparent->classid;
@@ -1600,15 +1608,16 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1600 tcm->tcm_handle = cl->classid; 1608 tcm->tcm_handle = cl->classid;
1601 tcm->tcm_info = cl->q->handle; 1609 tcm->tcm_info = cl->q->handle;
1602 1610
1603 rta = (struct rtattr*)b; 1611 nest = nla_nest_start(skb, TCA_OPTIONS);
1604 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 1612 if (nest == NULL)
1613 goto nla_put_failure;
1605 if (cbq_dump_attr(skb, cl) < 0) 1614 if (cbq_dump_attr(skb, cl) < 0)
1606 goto rtattr_failure; 1615 goto nla_put_failure;
1607 rta->rta_len = skb_tail_pointer(skb) - b; 1616 nla_nest_end(skb, nest);
1608 return skb->len; 1617 return skb->len;
1609 1618
1610rtattr_failure: 1619nla_put_failure:
1611 nlmsg_trim(skb, b); 1620 nla_nest_cancel(skb, nest);
1612 return -1; 1621 return -1;
1613} 1622}
1614 1623
@@ -1753,45 +1762,23 @@ static void cbq_put(struct Qdisc *sch, unsigned long arg)
1753} 1762}
1754 1763
1755static int 1764static int
1756cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca, 1765cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1757 unsigned long *arg) 1766 unsigned long *arg)
1758{ 1767{
1759 int err; 1768 int err;
1760 struct cbq_sched_data *q = qdisc_priv(sch); 1769 struct cbq_sched_data *q = qdisc_priv(sch);
1761 struct cbq_class *cl = (struct cbq_class*)*arg; 1770 struct cbq_class *cl = (struct cbq_class*)*arg;
1762 struct rtattr *opt = tca[TCA_OPTIONS-1]; 1771 struct nlattr *opt = tca[TCA_OPTIONS];
1763 struct rtattr *tb[TCA_CBQ_MAX]; 1772 struct nlattr *tb[TCA_CBQ_MAX + 1];
1764 struct cbq_class *parent; 1773 struct cbq_class *parent;
1765 struct qdisc_rate_table *rtab = NULL; 1774 struct qdisc_rate_table *rtab = NULL;
1766 1775
1767 if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt)) 1776 if (opt == NULL)
1768 return -EINVAL; 1777 return -EINVAL;
1769 1778
1770 if (tb[TCA_CBQ_OVL_STRATEGY-1] && 1779 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1771 RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl)) 1780 if (err < 0)
1772 return -EINVAL; 1781 return err;
1773
1774 if (tb[TCA_CBQ_FOPT-1] &&
1775 RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1776 return -EINVAL;
1777
1778 if (tb[TCA_CBQ_RATE-1] &&
1779 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1780 return -EINVAL;
1781
1782 if (tb[TCA_CBQ_LSSOPT-1] &&
1783 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1784 return -EINVAL;
1785
1786 if (tb[TCA_CBQ_WRROPT-1] &&
1787 RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1788 return -EINVAL;
1789
1790#ifdef CONFIG_NET_CLS_ACT
1791 if (tb[TCA_CBQ_POLICE-1] &&
1792 RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1793 return -EINVAL;
1794#endif
1795 1782
1796 if (cl) { 1783 if (cl) {
1797 /* Check parent */ 1784 /* Check parent */
@@ -1802,8 +1789,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t
1802 return -EINVAL; 1789 return -EINVAL;
1803 } 1790 }
1804 1791
1805 if (tb[TCA_CBQ_RATE-1]) { 1792 if (tb[TCA_CBQ_RATE]) {
1806 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]); 1793 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1807 if (rtab == NULL) 1794 if (rtab == NULL)
1808 return -EINVAL; 1795 return -EINVAL;
1809 } 1796 }
@@ -1819,45 +1806,45 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t
1819 qdisc_put_rtab(rtab); 1806 qdisc_put_rtab(rtab);
1820 } 1807 }
1821 1808
1822 if (tb[TCA_CBQ_LSSOPT-1]) 1809 if (tb[TCA_CBQ_LSSOPT])
1823 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1])); 1810 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1824 1811
1825 if (tb[TCA_CBQ_WRROPT-1]) { 1812 if (tb[TCA_CBQ_WRROPT]) {
1826 cbq_rmprio(q, cl); 1813 cbq_rmprio(q, cl);
1827 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1])); 1814 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1828 } 1815 }
1829 1816
1830 if (tb[TCA_CBQ_OVL_STRATEGY-1]) 1817 if (tb[TCA_CBQ_OVL_STRATEGY])
1831 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1])); 1818 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1832 1819
1833#ifdef CONFIG_NET_CLS_ACT 1820#ifdef CONFIG_NET_CLS_ACT
1834 if (tb[TCA_CBQ_POLICE-1]) 1821 if (tb[TCA_CBQ_POLICE])
1835 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1])); 1822 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1836#endif 1823#endif
1837 1824
1838 if (tb[TCA_CBQ_FOPT-1]) 1825 if (tb[TCA_CBQ_FOPT])
1839 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1])); 1826 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1840 1827
1841 if (cl->q->q.qlen) 1828 if (cl->q->q.qlen)
1842 cbq_activate_class(cl); 1829 cbq_activate_class(cl);
1843 1830
1844 sch_tree_unlock(sch); 1831 sch_tree_unlock(sch);
1845 1832
1846 if (tca[TCA_RATE-1]) 1833 if (tca[TCA_RATE])
1847 gen_replace_estimator(&cl->bstats, &cl->rate_est, 1834 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1848 &sch->dev->queue_lock, 1835 &sch->dev->queue_lock,
1849 tca[TCA_RATE-1]); 1836 tca[TCA_RATE]);
1850 return 0; 1837 return 0;
1851 } 1838 }
1852 1839
1853 if (parentid == TC_H_ROOT) 1840 if (parentid == TC_H_ROOT)
1854 return -EINVAL; 1841 return -EINVAL;
1855 1842
1856 if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL || 1843 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1857 tb[TCA_CBQ_LSSOPT-1] == NULL) 1844 tb[TCA_CBQ_LSSOPT] == NULL)
1858 return -EINVAL; 1845 return -EINVAL;
1859 1846
1860 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]); 1847 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1861 if (rtab == NULL) 1848 if (rtab == NULL)
1862 return -EINVAL; 1849 return -EINVAL;
1863 1850
@@ -1912,8 +1899,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t
1912 cl->share = cl->tparent; 1899 cl->share = cl->tparent;
1913 cbq_adjust_levels(parent); 1900 cbq_adjust_levels(parent);
1914 cl->minidle = -0x7FFFFFFF; 1901 cl->minidle = -0x7FFFFFFF;
1915 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1])); 1902 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1916 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1])); 1903 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1917 if (cl->ewma_log==0) 1904 if (cl->ewma_log==0)
1918 cl->ewma_log = q->link.ewma_log; 1905 cl->ewma_log = q->link.ewma_log;
1919 if (cl->maxidle==0) 1906 if (cl->maxidle==0)
@@ -1921,19 +1908,19 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t
1921 if (cl->avpkt==0) 1908 if (cl->avpkt==0)
1922 cl->avpkt = q->link.avpkt; 1909 cl->avpkt = q->link.avpkt;
1923 cl->overlimit = cbq_ovl_classic; 1910 cl->overlimit = cbq_ovl_classic;
1924 if (tb[TCA_CBQ_OVL_STRATEGY-1]) 1911 if (tb[TCA_CBQ_OVL_STRATEGY])
1925 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1])); 1912 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1926#ifdef CONFIG_NET_CLS_ACT 1913#ifdef CONFIG_NET_CLS_ACT
1927 if (tb[TCA_CBQ_POLICE-1]) 1914 if (tb[TCA_CBQ_POLICE])
1928 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1])); 1915 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1929#endif 1916#endif
1930 if (tb[TCA_CBQ_FOPT-1]) 1917 if (tb[TCA_CBQ_FOPT])
1931 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1])); 1918 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1932 sch_tree_unlock(sch); 1919 sch_tree_unlock(sch);
1933 1920
1934 if (tca[TCA_RATE-1]) 1921 if (tca[TCA_RATE])
1935 gen_new_estimator(&cl->bstats, &cl->rate_est, 1922 gen_new_estimator(&cl->bstats, &cl->rate_est,
1936 &sch->dev->queue_lock, tca[TCA_RATE-1]); 1923 &sch->dev->queue_lock, tca[TCA_RATE]);
1937 1924
1938 *arg = (unsigned long)cl; 1925 *arg = (unsigned long)cl;
1939 return 0; 1926 return 0;
@@ -2045,7 +2032,7 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2045 } 2032 }
2046} 2033}
2047 2034
2048static struct Qdisc_class_ops cbq_class_ops = { 2035static const struct Qdisc_class_ops cbq_class_ops = {
2049 .graft = cbq_graft, 2036 .graft = cbq_graft,
2050 .leaf = cbq_leaf, 2037 .leaf = cbq_leaf,
2051 .qlen_notify = cbq_qlen_notify, 2038 .qlen_notify = cbq_qlen_notify,
@@ -2061,7 +2048,7 @@ static struct Qdisc_class_ops cbq_class_ops = {
2061 .dump_stats = cbq_dump_class_stats, 2048 .dump_stats = cbq_dump_class_stats,
2062}; 2049};
2063 2050
2064static struct Qdisc_ops cbq_qdisc_ops = { 2051static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2065 .next = NULL, 2052 .next = NULL,
2066 .cl_ops = &cbq_class_ops, 2053 .cl_ops = &cbq_class_ops,
2067 .id = "cbq", 2054 .id = "cbq",
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 60f89199e3da..0df911fd67b1 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -10,28 +10,12 @@
10#include <linux/errno.h> 10#include <linux/errno.h>
11#include <linux/skbuff.h> 11#include <linux/skbuff.h>
12#include <linux/rtnetlink.h> 12#include <linux/rtnetlink.h>
13#include <linux/bitops.h>
13#include <net/pkt_sched.h> 14#include <net/pkt_sched.h>
14#include <net/dsfield.h> 15#include <net/dsfield.h>
15#include <net/inet_ecn.h> 16#include <net/inet_ecn.h>
16#include <asm/byteorder.h> 17#include <asm/byteorder.h>
17 18
18
19#if 0 /* control */
20#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
21#else
22#define DPRINTK(format,args...)
23#endif
24
25#if 0 /* data */
26#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
27#else
28#define D2PRINTK(format,args...)
29#endif
30
31
32#define PRIV(sch) ((struct dsmark_qdisc_data *) qdisc_priv(sch))
33
34
35/* 19/*
36 * classid class marking 20 * classid class marking
37 * ------- ----- ------- 21 * ------- ----- -------
@@ -60,17 +44,6 @@ struct dsmark_qdisc_data {
60 int set_tc_index; 44 int set_tc_index;
61}; 45};
62 46
63static inline int dsmark_valid_indices(u16 indices)
64{
65 while (indices != 1) {
66 if (indices & 1)
67 return 0;
68 indices >>= 1;
69 }
70
71 return 1;
72}
73
74static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index) 47static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
75{ 48{
76 return (index <= p->indices && index > 0); 49 return (index <= p->indices && index > 0);
@@ -81,9 +54,9 @@ static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
81static int dsmark_graft(struct Qdisc *sch, unsigned long arg, 54static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
82 struct Qdisc *new, struct Qdisc **old) 55 struct Qdisc *new, struct Qdisc **old)
83{ 56{
84 struct dsmark_qdisc_data *p = PRIV(sch); 57 struct dsmark_qdisc_data *p = qdisc_priv(sch);
85 58
86 DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n", 59 pr_debug("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",
87 sch, p, new, old); 60 sch, p, new, old);
88 61
89 if (new == NULL) { 62 if (new == NULL) {
@@ -104,13 +77,14 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
104 77
105static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg) 78static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
106{ 79{
107 return PRIV(sch)->q; 80 struct dsmark_qdisc_data *p = qdisc_priv(sch);
81 return p->q;
108} 82}
109 83
110static unsigned long dsmark_get(struct Qdisc *sch, u32 classid) 84static unsigned long dsmark_get(struct Qdisc *sch, u32 classid)
111{ 85{
112 DPRINTK("dsmark_get(sch %p,[qdisc %p],classid %x)\n", 86 pr_debug("dsmark_get(sch %p,[qdisc %p],classid %x)\n",
113 sch, PRIV(sch), classid); 87 sch, qdisc_priv(sch), classid);
114 88
115 return TC_H_MIN(classid) + 1; 89 return TC_H_MIN(classid) + 1;
116} 90}
@@ -125,44 +99,56 @@ static void dsmark_put(struct Qdisc *sch, unsigned long cl)
125{ 99{
126} 100}
127 101
102static const struct nla_policy dsmark_policy[TCA_DSMARK_MAX + 1] = {
103 [TCA_DSMARK_INDICES] = { .type = NLA_U16 },
104 [TCA_DSMARK_DEFAULT_INDEX] = { .type = NLA_U16 },
105 [TCA_DSMARK_SET_TC_INDEX] = { .type = NLA_FLAG },
106 [TCA_DSMARK_MASK] = { .type = NLA_U8 },
107 [TCA_DSMARK_VALUE] = { .type = NLA_U8 },
108};
109
128static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, 110static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
129 struct rtattr **tca, unsigned long *arg) 111 struct nlattr **tca, unsigned long *arg)
130{ 112{
131 struct dsmark_qdisc_data *p = PRIV(sch); 113 struct dsmark_qdisc_data *p = qdisc_priv(sch);
132 struct rtattr *opt = tca[TCA_OPTIONS-1]; 114 struct nlattr *opt = tca[TCA_OPTIONS];
133 struct rtattr *tb[TCA_DSMARK_MAX]; 115 struct nlattr *tb[TCA_DSMARK_MAX + 1];
134 int err = -EINVAL; 116 int err = -EINVAL;
135 u8 mask = 0; 117 u8 mask = 0;
136 118
137 DPRINTK("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x)," 119 pr_debug("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x),"
138 "arg 0x%lx\n", sch, p, classid, parent, *arg); 120 "arg 0x%lx\n", sch, p, classid, parent, *arg);
139 121
140 if (!dsmark_valid_index(p, *arg)) { 122 if (!dsmark_valid_index(p, *arg)) {
141 err = -ENOENT; 123 err = -ENOENT;
142 goto rtattr_failure; 124 goto errout;
143 } 125 }
144 126
145 if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt)) 127 if (!opt)
146 goto rtattr_failure; 128 goto errout;
147 129
148 if (tb[TCA_DSMARK_MASK-1]) 130 err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy);
149 mask = RTA_GET_U8(tb[TCA_DSMARK_MASK-1]); 131 if (err < 0)
132 goto errout;
133
134 if (tb[TCA_DSMARK_MASK])
135 mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
150 136
151 if (tb[TCA_DSMARK_VALUE-1]) 137 if (tb[TCA_DSMARK_VALUE])
152 p->value[*arg-1] = RTA_GET_U8(tb[TCA_DSMARK_VALUE-1]); 138 p->value[*arg-1] = nla_get_u8(tb[TCA_DSMARK_VALUE]);
153 139
154 if (tb[TCA_DSMARK_MASK-1]) 140 if (tb[TCA_DSMARK_MASK])
155 p->mask[*arg-1] = mask; 141 p->mask[*arg-1] = mask;
156 142
157 err = 0; 143 err = 0;
158 144
159rtattr_failure: 145errout:
160 return err; 146 return err;
161} 147}
162 148
163static int dsmark_delete(struct Qdisc *sch, unsigned long arg) 149static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
164{ 150{
165 struct dsmark_qdisc_data *p = PRIV(sch); 151 struct dsmark_qdisc_data *p = qdisc_priv(sch);
166 152
167 if (!dsmark_valid_index(p, arg)) 153 if (!dsmark_valid_index(p, arg))
168 return -EINVAL; 154 return -EINVAL;
@@ -173,12 +159,12 @@ static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
173 return 0; 159 return 0;
174} 160}
175 161
176static void dsmark_walk(struct Qdisc *sch,struct qdisc_walker *walker) 162static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
177{ 163{
178 struct dsmark_qdisc_data *p = PRIV(sch); 164 struct dsmark_qdisc_data *p = qdisc_priv(sch);
179 int i; 165 int i;
180 166
181 DPRINTK("dsmark_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker); 167 pr_debug("dsmark_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
182 168
183 if (walker->stop) 169 if (walker->stop)
184 return; 170 return;
@@ -197,34 +183,42 @@ ignore:
197 } 183 }
198} 184}
199 185
200static struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,unsigned long cl) 186static inline struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,
187 unsigned long cl)
201{ 188{
202 return &PRIV(sch)->filter_list; 189 struct dsmark_qdisc_data *p = qdisc_priv(sch);
190 return &p->filter_list;
203} 191}
204 192
205/* --------------------------- Qdisc operations ---------------------------- */ 193/* --------------------------- Qdisc operations ---------------------------- */
206 194
207static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch) 195static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
208{ 196{
209 struct dsmark_qdisc_data *p = PRIV(sch); 197 struct dsmark_qdisc_data *p = qdisc_priv(sch);
210 int err; 198 int err;
211 199
212 D2PRINTK("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); 200 pr_debug("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
213 201
214 if (p->set_tc_index) { 202 if (p->set_tc_index) {
215 /* FIXME: Safe with non-linear skbs? --RR */
216 switch (skb->protocol) { 203 switch (skb->protocol) {
217 case __constant_htons(ETH_P_IP): 204 case __constant_htons(ETH_P_IP):
218 skb->tc_index = ipv4_get_dsfield(ip_hdr(skb)) 205 if (skb_cow_head(skb, sizeof(struct iphdr)))
219 & ~INET_ECN_MASK; 206 goto drop;
220 break; 207
221 case __constant_htons(ETH_P_IPV6): 208 skb->tc_index = ipv4_get_dsfield(ip_hdr(skb))
222 skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb)) 209 & ~INET_ECN_MASK;
223 & ~INET_ECN_MASK; 210 break;
224 break; 211
225 default: 212 case __constant_htons(ETH_P_IPV6):
226 skb->tc_index = 0; 213 if (skb_cow_head(skb, sizeof(struct ipv6hdr)))
227 break; 214 goto drop;
215
216 skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb))
217 & ~INET_ECN_MASK;
218 break;
219 default:
220 skb->tc_index = 0;
221 break;
228 } 222 }
229 } 223 }
230 224
@@ -234,7 +228,7 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
234 struct tcf_result res; 228 struct tcf_result res;
235 int result = tc_classify(skb, p->filter_list, &res); 229 int result = tc_classify(skb, p->filter_list, &res);
236 230
237 D2PRINTK("result %d class 0x%04x\n", result, res.classid); 231 pr_debug("result %d class 0x%04x\n", result, res.classid);
238 232
239 switch (result) { 233 switch (result) {
240#ifdef CONFIG_NET_CLS_ACT 234#ifdef CONFIG_NET_CLS_ACT
@@ -242,14 +236,14 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
242 case TC_ACT_STOLEN: 236 case TC_ACT_STOLEN:
243 kfree_skb(skb); 237 kfree_skb(skb);
244 return NET_XMIT_SUCCESS; 238 return NET_XMIT_SUCCESS;
239
245 case TC_ACT_SHOT: 240 case TC_ACT_SHOT:
246 kfree_skb(skb); 241 goto drop;
247 sch->qstats.drops++;
248 return NET_XMIT_BYPASS;
249#endif 242#endif
250 case TC_ACT_OK: 243 case TC_ACT_OK:
251 skb->tc_index = TC_H_MIN(res.classid); 244 skb->tc_index = TC_H_MIN(res.classid);
252 break; 245 break;
246
253 default: 247 default:
254 if (p->default_index != NO_DEFAULT_INDEX) 248 if (p->default_index != NO_DEFAULT_INDEX)
255 skb->tc_index = p->default_index; 249 skb->tc_index = p->default_index;
@@ -257,7 +251,7 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
257 } 251 }
258 } 252 }
259 253
260 err = p->q->enqueue(skb,p->q); 254 err = p->q->enqueue(skb, p->q);
261 if (err != NET_XMIT_SUCCESS) { 255 if (err != NET_XMIT_SUCCESS) {
262 sch->qstats.drops++; 256 sch->qstats.drops++;
263 return err; 257 return err;
@@ -268,15 +262,20 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
268 sch->q.qlen++; 262 sch->q.qlen++;
269 263
270 return NET_XMIT_SUCCESS; 264 return NET_XMIT_SUCCESS;
265
266drop:
267 kfree_skb(skb);
268 sch->qstats.drops++;
269 return NET_XMIT_BYPASS;
271} 270}
272 271
273static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) 272static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
274{ 273{
275 struct dsmark_qdisc_data *p = PRIV(sch); 274 struct dsmark_qdisc_data *p = qdisc_priv(sch);
276 struct sk_buff *skb; 275 struct sk_buff *skb;
277 u32 index; 276 u32 index;
278 277
279 D2PRINTK("dsmark_dequeue(sch %p,[qdisc %p])\n", sch, p); 278 pr_debug("dsmark_dequeue(sch %p,[qdisc %p])\n", sch, p);
280 279
281 skb = p->q->ops->dequeue(p->q); 280 skb = p->q->ops->dequeue(p->q);
282 if (skb == NULL) 281 if (skb == NULL)
@@ -285,39 +284,39 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
285 sch->q.qlen--; 284 sch->q.qlen--;
286 285
287 index = skb->tc_index & (p->indices - 1); 286 index = skb->tc_index & (p->indices - 1);
288 D2PRINTK("index %d->%d\n", skb->tc_index, index); 287 pr_debug("index %d->%d\n", skb->tc_index, index);
289 288
290 switch (skb->protocol) { 289 switch (skb->protocol) {
291 case __constant_htons(ETH_P_IP): 290 case __constant_htons(ETH_P_IP):
292 ipv4_change_dsfield(ip_hdr(skb), p->mask[index], 291 ipv4_change_dsfield(ip_hdr(skb), p->mask[index],
293 p->value[index]); 292 p->value[index]);
294 break;
295 case __constant_htons(ETH_P_IPV6):
296 ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index],
297 p->value[index]);
298 break; 293 break;
299 default: 294 case __constant_htons(ETH_P_IPV6):
300 /* 295 ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index],
301 * Only complain if a change was actually attempted. 296 p->value[index]);
302 * This way, we can send non-IP traffic through dsmark
303 * and don't need yet another qdisc as a bypass.
304 */
305 if (p->mask[index] != 0xff || p->value[index])
306 printk(KERN_WARNING "dsmark_dequeue: "
307 "unsupported protocol %d\n",
308 ntohs(skb->protocol));
309 break; 297 break;
298 default:
299 /*
300 * Only complain if a change was actually attempted.
301 * This way, we can send non-IP traffic through dsmark
302 * and don't need yet another qdisc as a bypass.
303 */
304 if (p->mask[index] != 0xff || p->value[index])
305 printk(KERN_WARNING
306 "dsmark_dequeue: unsupported protocol %d\n",
307 ntohs(skb->protocol));
308 break;
310 } 309 }
311 310
312 return skb; 311 return skb;
313} 312}
314 313
315static int dsmark_requeue(struct sk_buff *skb,struct Qdisc *sch) 314static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch)
316{ 315{
317 struct dsmark_qdisc_data *p = PRIV(sch); 316 struct dsmark_qdisc_data *p = qdisc_priv(sch);
318 int err; 317 int err;
319 318
320 D2PRINTK("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); 319 pr_debug("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
321 320
322 err = p->q->ops->requeue(skb, p->q); 321 err = p->q->ops->requeue(skb, p->q);
323 if (err != NET_XMIT_SUCCESS) { 322 if (err != NET_XMIT_SUCCESS) {
@@ -333,10 +332,10 @@ static int dsmark_requeue(struct sk_buff *skb,struct Qdisc *sch)
333 332
334static unsigned int dsmark_drop(struct Qdisc *sch) 333static unsigned int dsmark_drop(struct Qdisc *sch)
335{ 334{
336 struct dsmark_qdisc_data *p = PRIV(sch); 335 struct dsmark_qdisc_data *p = qdisc_priv(sch);
337 unsigned int len; 336 unsigned int len;
338 337
339 DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n", sch, p); 338 pr_debug("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
340 339
341 if (p->q->ops->drop == NULL) 340 if (p->q->ops->drop == NULL)
342 return 0; 341 return 0;
@@ -348,26 +347,32 @@ static unsigned int dsmark_drop(struct Qdisc *sch)
348 return len; 347 return len;
349} 348}
350 349
351static int dsmark_init(struct Qdisc *sch, struct rtattr *opt) 350static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
352{ 351{
353 struct dsmark_qdisc_data *p = PRIV(sch); 352 struct dsmark_qdisc_data *p = qdisc_priv(sch);
354 struct rtattr *tb[TCA_DSMARK_MAX]; 353 struct nlattr *tb[TCA_DSMARK_MAX + 1];
355 int err = -EINVAL; 354 int err = -EINVAL;
356 u32 default_index = NO_DEFAULT_INDEX; 355 u32 default_index = NO_DEFAULT_INDEX;
357 u16 indices; 356 u16 indices;
358 u8 *mask; 357 u8 *mask;
359 358
360 DPRINTK("dsmark_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); 359 pr_debug("dsmark_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
361 360
362 if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt) < 0) 361 if (!opt)
363 goto errout; 362 goto errout;
364 363
365 indices = RTA_GET_U16(tb[TCA_DSMARK_INDICES-1]); 364 err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy);
366 if (!indices || !dsmark_valid_indices(indices)) 365 if (err < 0)
366 goto errout;
367
368 err = -EINVAL;
369 indices = nla_get_u16(tb[TCA_DSMARK_INDICES]);
370
371 if (hweight32(indices) != 1)
367 goto errout; 372 goto errout;
368 373
369 if (tb[TCA_DSMARK_DEFAULT_INDEX-1]) 374 if (tb[TCA_DSMARK_DEFAULT_INDEX])
370 default_index = RTA_GET_U16(tb[TCA_DSMARK_DEFAULT_INDEX-1]); 375 default_index = nla_get_u16(tb[TCA_DSMARK_DEFAULT_INDEX]);
371 376
372 mask = kmalloc(indices * 2, GFP_KERNEL); 377 mask = kmalloc(indices * 2, GFP_KERNEL);
373 if (mask == NULL) { 378 if (mask == NULL) {
@@ -383,34 +388,33 @@ static int dsmark_init(struct Qdisc *sch, struct rtattr *opt)
383 388
384 p->indices = indices; 389 p->indices = indices;
385 p->default_index = default_index; 390 p->default_index = default_index;
386 p->set_tc_index = RTA_GET_FLAG(tb[TCA_DSMARK_SET_TC_INDEX-1]); 391 p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]);
387 392
388 p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle); 393 p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle);
389 if (p->q == NULL) 394 if (p->q == NULL)
390 p->q = &noop_qdisc; 395 p->q = &noop_qdisc;
391 396
392 DPRINTK("dsmark_init: qdisc %p\n", p->q); 397 pr_debug("dsmark_init: qdisc %p\n", p->q);
393 398
394 err = 0; 399 err = 0;
395errout: 400errout:
396rtattr_failure:
397 return err; 401 return err;
398} 402}
399 403
400static void dsmark_reset(struct Qdisc *sch) 404static void dsmark_reset(struct Qdisc *sch)
401{ 405{
402 struct dsmark_qdisc_data *p = PRIV(sch); 406 struct dsmark_qdisc_data *p = qdisc_priv(sch);
403 407
404 DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n", sch, p); 408 pr_debug("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
405 qdisc_reset(p->q); 409 qdisc_reset(p->q);
406 sch->q.qlen = 0; 410 sch->q.qlen = 0;
407} 411}
408 412
409static void dsmark_destroy(struct Qdisc *sch) 413static void dsmark_destroy(struct Qdisc *sch)
410{ 414{
411 struct dsmark_qdisc_data *p = PRIV(sch); 415 struct dsmark_qdisc_data *p = qdisc_priv(sch);
412 416
413 DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p); 417 pr_debug("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p);
414 418
415 tcf_destroy_chain(p->filter_list); 419 tcf_destroy_chain(p->filter_list);
416 qdisc_destroy(p->q); 420 qdisc_destroy(p->q);
@@ -420,10 +424,10 @@ static void dsmark_destroy(struct Qdisc *sch)
420static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, 424static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
421 struct sk_buff *skb, struct tcmsg *tcm) 425 struct sk_buff *skb, struct tcmsg *tcm)
422{ 426{
423 struct dsmark_qdisc_data *p = PRIV(sch); 427 struct dsmark_qdisc_data *p = qdisc_priv(sch);
424 struct rtattr *opts = NULL; 428 struct nlattr *opts = NULL;
425 429
426 DPRINTK("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n", sch, p, cl); 430 pr_debug("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n", sch, p, cl);
427 431
428 if (!dsmark_valid_index(p, cl)) 432 if (!dsmark_valid_index(p, cl))
429 return -EINVAL; 433 return -EINVAL;
@@ -431,37 +435,41 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
431 tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1); 435 tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1);
432 tcm->tcm_info = p->q->handle; 436 tcm->tcm_info = p->q->handle;
433 437
434 opts = RTA_NEST(skb, TCA_OPTIONS); 438 opts = nla_nest_start(skb, TCA_OPTIONS);
435 RTA_PUT_U8(skb,TCA_DSMARK_MASK, p->mask[cl-1]); 439 if (opts == NULL)
436 RTA_PUT_U8(skb,TCA_DSMARK_VALUE, p->value[cl-1]); 440 goto nla_put_failure;
441 NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl-1]);
442 NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl-1]);
437 443
438 return RTA_NEST_END(skb, opts); 444 return nla_nest_end(skb, opts);
439 445
440rtattr_failure: 446nla_put_failure:
441 return RTA_NEST_CANCEL(skb, opts); 447 return nla_nest_cancel(skb, opts);
442} 448}
443 449
444static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) 450static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
445{ 451{
446 struct dsmark_qdisc_data *p = PRIV(sch); 452 struct dsmark_qdisc_data *p = qdisc_priv(sch);
447 struct rtattr *opts = NULL; 453 struct nlattr *opts = NULL;
448 454
449 opts = RTA_NEST(skb, TCA_OPTIONS); 455 opts = nla_nest_start(skb, TCA_OPTIONS);
450 RTA_PUT_U16(skb, TCA_DSMARK_INDICES, p->indices); 456 if (opts == NULL)
457 goto nla_put_failure;
458 NLA_PUT_U16(skb, TCA_DSMARK_INDICES, p->indices);
451 459
452 if (p->default_index != NO_DEFAULT_INDEX) 460 if (p->default_index != NO_DEFAULT_INDEX)
453 RTA_PUT_U16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index); 461 NLA_PUT_U16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index);
454 462
455 if (p->set_tc_index) 463 if (p->set_tc_index)
456 RTA_PUT_FLAG(skb, TCA_DSMARK_SET_TC_INDEX); 464 NLA_PUT_FLAG(skb, TCA_DSMARK_SET_TC_INDEX);
457 465
458 return RTA_NEST_END(skb, opts); 466 return nla_nest_end(skb, opts);
459 467
460rtattr_failure: 468nla_put_failure:
461 return RTA_NEST_CANCEL(skb, opts); 469 return nla_nest_cancel(skb, opts);
462} 470}
463 471
464static struct Qdisc_class_ops dsmark_class_ops = { 472static const struct Qdisc_class_ops dsmark_class_ops = {
465 .graft = dsmark_graft, 473 .graft = dsmark_graft,
466 .leaf = dsmark_leaf, 474 .leaf = dsmark_leaf,
467 .get = dsmark_get, 475 .get = dsmark_get,
@@ -475,7 +483,7 @@ static struct Qdisc_class_ops dsmark_class_ops = {
475 .dump = dsmark_dump_class, 483 .dump = dsmark_dump_class,
476}; 484};
477 485
478static struct Qdisc_ops dsmark_qdisc_ops = { 486static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = {
479 .next = NULL, 487 .next = NULL,
480 .cl_ops = &dsmark_class_ops, 488 .cl_ops = &dsmark_class_ops,
481 .id = "dsmark", 489 .id = "dsmark",
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index c264308f17c1..95ed48221652 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -43,7 +43,7 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
43 return qdisc_reshape_fail(skb, sch); 43 return qdisc_reshape_fail(skb, sch);
44} 44}
45 45
46static int fifo_init(struct Qdisc *sch, struct rtattr *opt) 46static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
47{ 47{
48 struct fifo_sched_data *q = qdisc_priv(sch); 48 struct fifo_sched_data *q = qdisc_priv(sch);
49 49
@@ -55,9 +55,9 @@ static int fifo_init(struct Qdisc *sch, struct rtattr *opt)
55 55
56 q->limit = limit; 56 q->limit = limit;
57 } else { 57 } else {
58 struct tc_fifo_qopt *ctl = RTA_DATA(opt); 58 struct tc_fifo_qopt *ctl = nla_data(opt);
59 59
60 if (RTA_PAYLOAD(opt) < sizeof(*ctl)) 60 if (nla_len(opt) < sizeof(*ctl))
61 return -EINVAL; 61 return -EINVAL;
62 62
63 q->limit = ctl->limit; 63 q->limit = ctl->limit;
@@ -71,14 +71,14 @@ static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
71 struct fifo_sched_data *q = qdisc_priv(sch); 71 struct fifo_sched_data *q = qdisc_priv(sch);
72 struct tc_fifo_qopt opt = { .limit = q->limit }; 72 struct tc_fifo_qopt opt = { .limit = q->limit };
73 73
74 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 74 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
75 return skb->len; 75 return skb->len;
76 76
77rtattr_failure: 77nla_put_failure:
78 return -1; 78 return -1;
79} 79}
80 80
81struct Qdisc_ops pfifo_qdisc_ops = { 81struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {
82 .id = "pfifo", 82 .id = "pfifo",
83 .priv_size = sizeof(struct fifo_sched_data), 83 .priv_size = sizeof(struct fifo_sched_data),
84 .enqueue = pfifo_enqueue, 84 .enqueue = pfifo_enqueue,
@@ -91,8 +91,9 @@ struct Qdisc_ops pfifo_qdisc_ops = {
91 .dump = fifo_dump, 91 .dump = fifo_dump,
92 .owner = THIS_MODULE, 92 .owner = THIS_MODULE,
93}; 93};
94EXPORT_SYMBOL(pfifo_qdisc_ops);
94 95
95struct Qdisc_ops bfifo_qdisc_ops = { 96struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
96 .id = "bfifo", 97 .id = "bfifo",
97 .priv_size = sizeof(struct fifo_sched_data), 98 .priv_size = sizeof(struct fifo_sched_data),
98 .enqueue = bfifo_enqueue, 99 .enqueue = bfifo_enqueue,
@@ -105,6 +106,4 @@ struct Qdisc_ops bfifo_qdisc_ops = {
105 .dump = fifo_dump, 106 .dump = fifo_dump,
106 .owner = THIS_MODULE, 107 .owner = THIS_MODULE,
107}; 108};
108
109EXPORT_SYMBOL(bfifo_qdisc_ops); 109EXPORT_SYMBOL(bfifo_qdisc_ops);
110EXPORT_SYMBOL(pfifo_qdisc_ops);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e595e6570ce0..10b5c0887fff 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -40,16 +40,22 @@
40 */ 40 */
41 41
42void qdisc_lock_tree(struct net_device *dev) 42void qdisc_lock_tree(struct net_device *dev)
43 __acquires(dev->queue_lock)
44 __acquires(dev->ingress_lock)
43{ 45{
44 spin_lock_bh(&dev->queue_lock); 46 spin_lock_bh(&dev->queue_lock);
45 spin_lock(&dev->ingress_lock); 47 spin_lock(&dev->ingress_lock);
46} 48}
49EXPORT_SYMBOL(qdisc_lock_tree);
47 50
48void qdisc_unlock_tree(struct net_device *dev) 51void qdisc_unlock_tree(struct net_device *dev)
52 __releases(dev->ingress_lock)
53 __releases(dev->queue_lock)
49{ 54{
50 spin_unlock(&dev->ingress_lock); 55 spin_unlock(&dev->ingress_lock);
51 spin_unlock_bh(&dev->queue_lock); 56 spin_unlock_bh(&dev->queue_lock);
52} 57}
58EXPORT_SYMBOL(qdisc_unlock_tree);
53 59
54static inline int qdisc_qlen(struct Qdisc *q) 60static inline int qdisc_qlen(struct Qdisc *q)
55{ 61{
@@ -211,13 +217,6 @@ static void dev_watchdog(unsigned long arg)
211 dev_put(dev); 217 dev_put(dev);
212} 218}
213 219
214static void dev_watchdog_init(struct net_device *dev)
215{
216 init_timer(&dev->watchdog_timer);
217 dev->watchdog_timer.data = (unsigned long)dev;
218 dev->watchdog_timer.function = dev_watchdog;
219}
220
221void __netdev_watchdog_up(struct net_device *dev) 220void __netdev_watchdog_up(struct net_device *dev)
222{ 221{
223 if (dev->tx_timeout) { 222 if (dev->tx_timeout) {
@@ -256,6 +255,7 @@ void netif_carrier_on(struct net_device *dev)
256 __netdev_watchdog_up(dev); 255 __netdev_watchdog_up(dev);
257 } 256 }
258} 257}
258EXPORT_SYMBOL(netif_carrier_on);
259 259
260/** 260/**
261 * netif_carrier_off - clear carrier 261 * netif_carrier_off - clear carrier
@@ -268,6 +268,7 @@ void netif_carrier_off(struct net_device *dev)
268 if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) 268 if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
269 linkwatch_fire_event(dev); 269 linkwatch_fire_event(dev);
270} 270}
271EXPORT_SYMBOL(netif_carrier_off);
271 272
272/* "NOOP" scheduler: the best scheduler, recommended for all interfaces 273/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
273 under all circumstances. It is difficult to invent anything faster or 274 under all circumstances. It is difficult to invent anything faster or
@@ -294,7 +295,7 @@ static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
294 return NET_XMIT_CN; 295 return NET_XMIT_CN;
295} 296}
296 297
297struct Qdisc_ops noop_qdisc_ops = { 298struct Qdisc_ops noop_qdisc_ops __read_mostly = {
298 .id = "noop", 299 .id = "noop",
299 .priv_size = 0, 300 .priv_size = 0,
300 .enqueue = noop_enqueue, 301 .enqueue = noop_enqueue,
@@ -310,8 +311,9 @@ struct Qdisc noop_qdisc = {
310 .ops = &noop_qdisc_ops, 311 .ops = &noop_qdisc_ops,
311 .list = LIST_HEAD_INIT(noop_qdisc.list), 312 .list = LIST_HEAD_INIT(noop_qdisc.list),
312}; 313};
314EXPORT_SYMBOL(noop_qdisc);
313 315
314static struct Qdisc_ops noqueue_qdisc_ops = { 316static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
315 .id = "noqueue", 317 .id = "noqueue",
316 .priv_size = 0, 318 .priv_size = 0,
317 .enqueue = noop_enqueue, 319 .enqueue = noop_enqueue,
@@ -395,14 +397,14 @@ static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
395 struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; 397 struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
396 398
397 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1); 399 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
398 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 400 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
399 return skb->len; 401 return skb->len;
400 402
401rtattr_failure: 403nla_put_failure:
402 return -1; 404 return -1;
403} 405}
404 406
405static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt) 407static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
406{ 408{
407 int prio; 409 int prio;
408 struct sk_buff_head *list = qdisc_priv(qdisc); 410 struct sk_buff_head *list = qdisc_priv(qdisc);
@@ -413,7 +415,7 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt)
413 return 0; 415 return 0;
414} 416}
415 417
416static struct Qdisc_ops pfifo_fast_ops = { 418static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
417 .id = "pfifo_fast", 419 .id = "pfifo_fast",
418 .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head), 420 .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
419 .enqueue = pfifo_fast_enqueue, 421 .enqueue = pfifo_fast_enqueue,
@@ -474,16 +476,18 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops,
474errout: 476errout:
475 return NULL; 477 return NULL;
476} 478}
479EXPORT_SYMBOL(qdisc_create_dflt);
477 480
478/* Under dev->queue_lock and BH! */ 481/* Under dev->queue_lock and BH! */
479 482
480void qdisc_reset(struct Qdisc *qdisc) 483void qdisc_reset(struct Qdisc *qdisc)
481{ 484{
482 struct Qdisc_ops *ops = qdisc->ops; 485 const struct Qdisc_ops *ops = qdisc->ops;
483 486
484 if (ops->reset) 487 if (ops->reset)
485 ops->reset(qdisc); 488 ops->reset(qdisc);
486} 489}
490EXPORT_SYMBOL(qdisc_reset);
487 491
488/* this is the rcu callback function to clean up a qdisc when there 492/* this is the rcu callback function to clean up a qdisc when there
489 * are no further references to it */ 493 * are no further references to it */
@@ -498,7 +502,7 @@ static void __qdisc_destroy(struct rcu_head *head)
498 502
499void qdisc_destroy(struct Qdisc *qdisc) 503void qdisc_destroy(struct Qdisc *qdisc)
500{ 504{
501 struct Qdisc_ops *ops = qdisc->ops; 505 const struct Qdisc_ops *ops = qdisc->ops;
502 506
503 if (qdisc->flags & TCQ_F_BUILTIN || 507 if (qdisc->flags & TCQ_F_BUILTIN ||
504 !atomic_dec_and_test(&qdisc->refcnt)) 508 !atomic_dec_and_test(&qdisc->refcnt))
@@ -515,6 +519,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
515 dev_put(qdisc->dev); 519 dev_put(qdisc->dev);
516 call_rcu(&qdisc->q_rcu, __qdisc_destroy); 520 call_rcu(&qdisc->q_rcu, __qdisc_destroy);
517} 521}
522EXPORT_SYMBOL(qdisc_destroy);
518 523
519void dev_activate(struct net_device *dev) 524void dev_activate(struct net_device *dev)
520{ 525{
@@ -608,7 +613,7 @@ void dev_init_scheduler(struct net_device *dev)
608 INIT_LIST_HEAD(&dev->qdisc_list); 613 INIT_LIST_HEAD(&dev->qdisc_list);
609 qdisc_unlock_tree(dev); 614 qdisc_unlock_tree(dev);
610 615
611 dev_watchdog_init(dev); 616 setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
612} 617}
613 618
614void dev_shutdown(struct net_device *dev) 619void dev_shutdown(struct net_device *dev)
@@ -629,12 +634,3 @@ void dev_shutdown(struct net_device *dev)
629 BUG_TRAP(!timer_pending(&dev->watchdog_timer)); 634 BUG_TRAP(!timer_pending(&dev->watchdog_timer));
630 qdisc_unlock_tree(dev); 635 qdisc_unlock_tree(dev);
631} 636}
632
633EXPORT_SYMBOL(netif_carrier_on);
634EXPORT_SYMBOL(netif_carrier_off);
635EXPORT_SYMBOL(noop_qdisc);
636EXPORT_SYMBOL(qdisc_create_dflt);
637EXPORT_SYMBOL(qdisc_destroy);
638EXPORT_SYMBOL(qdisc_reset);
639EXPORT_SYMBOL(qdisc_lock_tree);
640EXPORT_SYMBOL(qdisc_unlock_tree);
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 3cc6dda02e2e..3a9d226ff1e4 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -350,16 +350,16 @@ static inline void gred_destroy_vq(struct gred_sched_data *q)
350 kfree(q); 350 kfree(q);
351} 351}
352 352
353static inline int gred_change_table_def(struct Qdisc *sch, struct rtattr *dps) 353static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
354{ 354{
355 struct gred_sched *table = qdisc_priv(sch); 355 struct gred_sched *table = qdisc_priv(sch);
356 struct tc_gred_sopt *sopt; 356 struct tc_gred_sopt *sopt;
357 int i; 357 int i;
358 358
359 if (dps == NULL || RTA_PAYLOAD(dps) < sizeof(*sopt)) 359 if (dps == NULL)
360 return -EINVAL; 360 return -EINVAL;
361 361
362 sopt = RTA_DATA(dps); 362 sopt = nla_data(dps);
363 363
364 if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || sopt->def_DP >= sopt->DPs) 364 if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || sopt->def_DP >= sopt->DPs)
365 return -EINVAL; 365 return -EINVAL;
@@ -425,28 +425,37 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
425 return 0; 425 return 0;
426} 426}
427 427
428static int gred_change(struct Qdisc *sch, struct rtattr *opt) 428static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = {
429 [TCA_GRED_PARMS] = { .len = sizeof(struct tc_gred_qopt) },
430 [TCA_GRED_STAB] = { .len = 256 },
431 [TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) },
432};
433
434static int gred_change(struct Qdisc *sch, struct nlattr *opt)
429{ 435{
430 struct gred_sched *table = qdisc_priv(sch); 436 struct gred_sched *table = qdisc_priv(sch);
431 struct tc_gred_qopt *ctl; 437 struct tc_gred_qopt *ctl;
432 struct rtattr *tb[TCA_GRED_MAX]; 438 struct nlattr *tb[TCA_GRED_MAX + 1];
433 int err = -EINVAL, prio = GRED_DEF_PRIO; 439 int err, prio = GRED_DEF_PRIO;
434 u8 *stab; 440 u8 *stab;
435 441
436 if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_MAX, opt)) 442 if (opt == NULL)
437 return -EINVAL; 443 return -EINVAL;
438 444
439 if (tb[TCA_GRED_PARMS-1] == NULL && tb[TCA_GRED_STAB-1] == NULL) 445 err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy);
446 if (err < 0)
447 return err;
448
449 if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL)
440 return gred_change_table_def(sch, opt); 450 return gred_change_table_def(sch, opt);
441 451
442 if (tb[TCA_GRED_PARMS-1] == NULL || 452 if (tb[TCA_GRED_PARMS] == NULL ||
443 RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) || 453 tb[TCA_GRED_STAB] == NULL)
444 tb[TCA_GRED_STAB-1] == NULL ||
445 RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256)
446 return -EINVAL; 454 return -EINVAL;
447 455
448 ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]); 456 err = -EINVAL;
449 stab = RTA_DATA(tb[TCA_GRED_STAB-1]); 457 ctl = nla_data(tb[TCA_GRED_PARMS]);
458 stab = nla_data(tb[TCA_GRED_STAB]);
450 459
451 if (ctl->DP >= table->DPs) 460 if (ctl->DP >= table->DPs)
452 goto errout; 461 goto errout;
@@ -486,23 +495,28 @@ errout:
486 return err; 495 return err;
487} 496}
488 497
489static int gred_init(struct Qdisc *sch, struct rtattr *opt) 498static int gred_init(struct Qdisc *sch, struct nlattr *opt)
490{ 499{
491 struct rtattr *tb[TCA_GRED_MAX]; 500 struct nlattr *tb[TCA_GRED_MAX + 1];
501 int err;
492 502
493 if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_MAX, opt)) 503 if (opt == NULL)
494 return -EINVAL; 504 return -EINVAL;
495 505
496 if (tb[TCA_GRED_PARMS-1] || tb[TCA_GRED_STAB-1]) 506 err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy);
507 if (err < 0)
508 return err;
509
510 if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB])
497 return -EINVAL; 511 return -EINVAL;
498 512
499 return gred_change_table_def(sch, tb[TCA_GRED_DPS-1]); 513 return gred_change_table_def(sch, tb[TCA_GRED_DPS]);
500} 514}
501 515
502static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) 516static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
503{ 517{
504 struct gred_sched *table = qdisc_priv(sch); 518 struct gred_sched *table = qdisc_priv(sch);
505 struct rtattr *parms, *opts = NULL; 519 struct nlattr *parms, *opts = NULL;
506 int i; 520 int i;
507 struct tc_gred_sopt sopt = { 521 struct tc_gred_sopt sopt = {
508 .DPs = table->DPs, 522 .DPs = table->DPs,
@@ -511,9 +525,13 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
511 .flags = table->red_flags, 525 .flags = table->red_flags,
512 }; 526 };
513 527
514 opts = RTA_NEST(skb, TCA_OPTIONS); 528 opts = nla_nest_start(skb, TCA_OPTIONS);
515 RTA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt); 529 if (opts == NULL)
516 parms = RTA_NEST(skb, TCA_GRED_PARMS); 530 goto nla_put_failure;
531 NLA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt);
532 parms = nla_nest_start(skb, TCA_GRED_PARMS);
533 if (parms == NULL)
534 goto nla_put_failure;
517 535
518 for (i = 0; i < MAX_DPs; i++) { 536 for (i = 0; i < MAX_DPs; i++) {
519 struct gred_sched_data *q = table->tab[i]; 537 struct gred_sched_data *q = table->tab[i];
@@ -555,15 +573,16 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
555 opt.qave = red_calc_qavg(&q->parms, q->parms.qavg); 573 opt.qave = red_calc_qavg(&q->parms, q->parms.qavg);
556 574
557append_opt: 575append_opt:
558 RTA_APPEND(skb, sizeof(opt), &opt); 576 if (nla_append(skb, sizeof(opt), &opt) < 0)
577 goto nla_put_failure;
559 } 578 }
560 579
561 RTA_NEST_END(skb, parms); 580 nla_nest_end(skb, parms);
562 581
563 return RTA_NEST_END(skb, opts); 582 return nla_nest_end(skb, opts);
564 583
565rtattr_failure: 584nla_put_failure:
566 return RTA_NEST_CANCEL(skb, opts); 585 return nla_nest_cancel(skb, opts);
567} 586}
568 587
569static void gred_destroy(struct Qdisc *sch) 588static void gred_destroy(struct Qdisc *sch)
@@ -577,7 +596,7 @@ static void gred_destroy(struct Qdisc *sch)
577 } 596 }
578} 597}
579 598
580static struct Qdisc_ops gred_qdisc_ops = { 599static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
581 .id = "gred", 600 .id = "gred",
582 .priv_size = sizeof(struct gred_sched), 601 .priv_size = sizeof(struct gred_sched),
583 .enqueue = gred_enqueue, 602 .enqueue = gred_enqueue,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index a6ad491e434b..87293d0db1d7 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -986,41 +986,46 @@ hfsc_change_usc(struct hfsc_class *cl, struct tc_service_curve *usc,
986 cl->cl_flags |= HFSC_USC; 986 cl->cl_flags |= HFSC_USC;
987} 987}
988 988
989static const struct nla_policy hfsc_policy[TCA_HFSC_MAX + 1] = {
990 [TCA_HFSC_RSC] = { .len = sizeof(struct tc_service_curve) },
991 [TCA_HFSC_FSC] = { .len = sizeof(struct tc_service_curve) },
992 [TCA_HFSC_USC] = { .len = sizeof(struct tc_service_curve) },
993};
994
989static int 995static int
990hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 996hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
991 struct rtattr **tca, unsigned long *arg) 997 struct nlattr **tca, unsigned long *arg)
992{ 998{
993 struct hfsc_sched *q = qdisc_priv(sch); 999 struct hfsc_sched *q = qdisc_priv(sch);
994 struct hfsc_class *cl = (struct hfsc_class *)*arg; 1000 struct hfsc_class *cl = (struct hfsc_class *)*arg;
995 struct hfsc_class *parent = NULL; 1001 struct hfsc_class *parent = NULL;
996 struct rtattr *opt = tca[TCA_OPTIONS-1]; 1002 struct nlattr *opt = tca[TCA_OPTIONS];
997 struct rtattr *tb[TCA_HFSC_MAX]; 1003 struct nlattr *tb[TCA_HFSC_MAX + 1];
998 struct tc_service_curve *rsc = NULL, *fsc = NULL, *usc = NULL; 1004 struct tc_service_curve *rsc = NULL, *fsc = NULL, *usc = NULL;
999 u64 cur_time; 1005 u64 cur_time;
1006 int err;
1000 1007
1001 if (opt == NULL || rtattr_parse_nested(tb, TCA_HFSC_MAX, opt)) 1008 if (opt == NULL)
1002 return -EINVAL; 1009 return -EINVAL;
1003 1010
1004 if (tb[TCA_HFSC_RSC-1]) { 1011 err = nla_parse_nested(tb, TCA_HFSC_MAX, opt, hfsc_policy);
1005 if (RTA_PAYLOAD(tb[TCA_HFSC_RSC-1]) < sizeof(*rsc)) 1012 if (err < 0)
1006 return -EINVAL; 1013 return err;
1007 rsc = RTA_DATA(tb[TCA_HFSC_RSC-1]); 1014
1015 if (tb[TCA_HFSC_RSC]) {
1016 rsc = nla_data(tb[TCA_HFSC_RSC]);
1008 if (rsc->m1 == 0 && rsc->m2 == 0) 1017 if (rsc->m1 == 0 && rsc->m2 == 0)
1009 rsc = NULL; 1018 rsc = NULL;
1010 } 1019 }
1011 1020
1012 if (tb[TCA_HFSC_FSC-1]) { 1021 if (tb[TCA_HFSC_FSC]) {
1013 if (RTA_PAYLOAD(tb[TCA_HFSC_FSC-1]) < sizeof(*fsc)) 1022 fsc = nla_data(tb[TCA_HFSC_FSC]);
1014 return -EINVAL;
1015 fsc = RTA_DATA(tb[TCA_HFSC_FSC-1]);
1016 if (fsc->m1 == 0 && fsc->m2 == 0) 1023 if (fsc->m1 == 0 && fsc->m2 == 0)
1017 fsc = NULL; 1024 fsc = NULL;
1018 } 1025 }
1019 1026
1020 if (tb[TCA_HFSC_USC-1]) { 1027 if (tb[TCA_HFSC_USC]) {
1021 if (RTA_PAYLOAD(tb[TCA_HFSC_USC-1]) < sizeof(*usc)) 1028 usc = nla_data(tb[TCA_HFSC_USC]);
1022 return -EINVAL;
1023 usc = RTA_DATA(tb[TCA_HFSC_USC-1]);
1024 if (usc->m1 == 0 && usc->m2 == 0) 1029 if (usc->m1 == 0 && usc->m2 == 0)
1025 usc = NULL; 1030 usc = NULL;
1026 } 1031 }
@@ -1050,10 +1055,10 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
1050 } 1055 }
1051 sch_tree_unlock(sch); 1056 sch_tree_unlock(sch);
1052 1057
1053 if (tca[TCA_RATE-1]) 1058 if (tca[TCA_RATE])
1054 gen_replace_estimator(&cl->bstats, &cl->rate_est, 1059 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1055 &sch->dev->queue_lock, 1060 &sch->dev->queue_lock,
1056 tca[TCA_RATE-1]); 1061 tca[TCA_RATE]);
1057 return 0; 1062 return 0;
1058 } 1063 }
1059 1064
@@ -1106,9 +1111,9 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
1106 cl->cl_pcvtoff = parent->cl_cvtoff; 1111 cl->cl_pcvtoff = parent->cl_cvtoff;
1107 sch_tree_unlock(sch); 1112 sch_tree_unlock(sch);
1108 1113
1109 if (tca[TCA_RATE-1]) 1114 if (tca[TCA_RATE])
1110 gen_new_estimator(&cl->bstats, &cl->rate_est, 1115 gen_new_estimator(&cl->bstats, &cl->rate_est,
1111 &sch->dev->queue_lock, tca[TCA_RATE-1]); 1116 &sch->dev->queue_lock, tca[TCA_RATE]);
1112 *arg = (unsigned long)cl; 1117 *arg = (unsigned long)cl;
1113 return 0; 1118 return 0;
1114} 1119}
@@ -1304,11 +1309,11 @@ hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc)
1304 tsc.m1 = sm2m(sc->sm1); 1309 tsc.m1 = sm2m(sc->sm1);
1305 tsc.d = dx2d(sc->dx); 1310 tsc.d = dx2d(sc->dx);
1306 tsc.m2 = sm2m(sc->sm2); 1311 tsc.m2 = sm2m(sc->sm2);
1307 RTA_PUT(skb, attr, sizeof(tsc), &tsc); 1312 NLA_PUT(skb, attr, sizeof(tsc), &tsc);
1308 1313
1309 return skb->len; 1314 return skb->len;
1310 1315
1311 rtattr_failure: 1316 nla_put_failure:
1312 return -1; 1317 return -1;
1313} 1318}
1314 1319
@@ -1317,19 +1322,19 @@ hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl)
1317{ 1322{
1318 if ((cl->cl_flags & HFSC_RSC) && 1323 if ((cl->cl_flags & HFSC_RSC) &&
1319 (hfsc_dump_sc(skb, TCA_HFSC_RSC, &cl->cl_rsc) < 0)) 1324 (hfsc_dump_sc(skb, TCA_HFSC_RSC, &cl->cl_rsc) < 0))
1320 goto rtattr_failure; 1325 goto nla_put_failure;
1321 1326
1322 if ((cl->cl_flags & HFSC_FSC) && 1327 if ((cl->cl_flags & HFSC_FSC) &&
1323 (hfsc_dump_sc(skb, TCA_HFSC_FSC, &cl->cl_fsc) < 0)) 1328 (hfsc_dump_sc(skb, TCA_HFSC_FSC, &cl->cl_fsc) < 0))
1324 goto rtattr_failure; 1329 goto nla_put_failure;
1325 1330
1326 if ((cl->cl_flags & HFSC_USC) && 1331 if ((cl->cl_flags & HFSC_USC) &&
1327 (hfsc_dump_sc(skb, TCA_HFSC_USC, &cl->cl_usc) < 0)) 1332 (hfsc_dump_sc(skb, TCA_HFSC_USC, &cl->cl_usc) < 0))
1328 goto rtattr_failure; 1333 goto nla_put_failure;
1329 1334
1330 return skb->len; 1335 return skb->len;
1331 1336
1332 rtattr_failure: 1337 nla_put_failure:
1333 return -1; 1338 return -1;
1334} 1339}
1335 1340
@@ -1338,22 +1343,23 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
1338 struct tcmsg *tcm) 1343 struct tcmsg *tcm)
1339{ 1344{
1340 struct hfsc_class *cl = (struct hfsc_class *)arg; 1345 struct hfsc_class *cl = (struct hfsc_class *)arg;
1341 unsigned char *b = skb_tail_pointer(skb); 1346 struct nlattr *nest;
1342 struct rtattr *rta = (struct rtattr *)b;
1343 1347
1344 tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT; 1348 tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT;
1345 tcm->tcm_handle = cl->classid; 1349 tcm->tcm_handle = cl->classid;
1346 if (cl->level == 0) 1350 if (cl->level == 0)
1347 tcm->tcm_info = cl->qdisc->handle; 1351 tcm->tcm_info = cl->qdisc->handle;
1348 1352
1349 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 1353 nest = nla_nest_start(skb, TCA_OPTIONS);
1354 if (nest == NULL)
1355 goto nla_put_failure;
1350 if (hfsc_dump_curves(skb, cl) < 0) 1356 if (hfsc_dump_curves(skb, cl) < 0)
1351 goto rtattr_failure; 1357 goto nla_put_failure;
1352 rta->rta_len = skb_tail_pointer(skb) - b; 1358 nla_nest_end(skb, nest);
1353 return skb->len; 1359 return skb->len;
1354 1360
1355 rtattr_failure: 1361 nla_put_failure:
1356 nlmsg_trim(skb, b); 1362 nla_nest_cancel(skb, nest);
1357 return -1; 1363 return -1;
1358} 1364}
1359 1365
@@ -1423,15 +1429,15 @@ hfsc_schedule_watchdog(struct Qdisc *sch)
1423} 1429}
1424 1430
1425static int 1431static int
1426hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt) 1432hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
1427{ 1433{
1428 struct hfsc_sched *q = qdisc_priv(sch); 1434 struct hfsc_sched *q = qdisc_priv(sch);
1429 struct tc_hfsc_qopt *qopt; 1435 struct tc_hfsc_qopt *qopt;
1430 unsigned int i; 1436 unsigned int i;
1431 1437
1432 if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt)) 1438 if (opt == NULL || nla_len(opt) < sizeof(*qopt))
1433 return -EINVAL; 1439 return -EINVAL;
1434 qopt = RTA_DATA(opt); 1440 qopt = nla_data(opt);
1435 1441
1436 q->defcls = qopt->defcls; 1442 q->defcls = qopt->defcls;
1437 for (i = 0; i < HFSC_HSIZE; i++) 1443 for (i = 0; i < HFSC_HSIZE; i++)
@@ -1459,14 +1465,14 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt)
1459} 1465}
1460 1466
1461static int 1467static int
1462hfsc_change_qdisc(struct Qdisc *sch, struct rtattr *opt) 1468hfsc_change_qdisc(struct Qdisc *sch, struct nlattr *opt)
1463{ 1469{
1464 struct hfsc_sched *q = qdisc_priv(sch); 1470 struct hfsc_sched *q = qdisc_priv(sch);
1465 struct tc_hfsc_qopt *qopt; 1471 struct tc_hfsc_qopt *qopt;
1466 1472
1467 if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt)) 1473 if (opt == NULL || nla_len(opt) < sizeof(*qopt))
1468 return -EINVAL; 1474 return -EINVAL;
1469 qopt = RTA_DATA(opt); 1475 qopt = nla_data(opt);
1470 1476
1471 sch_tree_lock(sch); 1477 sch_tree_lock(sch);
1472 q->defcls = qopt->defcls; 1478 q->defcls = qopt->defcls;
@@ -1550,10 +1556,10 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
1550 struct tc_hfsc_qopt qopt; 1556 struct tc_hfsc_qopt qopt;
1551 1557
1552 qopt.defcls = q->defcls; 1558 qopt.defcls = q->defcls;
1553 RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt); 1559 NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
1554 return skb->len; 1560 return skb->len;
1555 1561
1556 rtattr_failure: 1562 nla_put_failure:
1557 nlmsg_trim(skb, b); 1563 nlmsg_trim(skb, b);
1558 return -1; 1564 return -1;
1559} 1565}
@@ -1698,7 +1704,7 @@ hfsc_drop(struct Qdisc *sch)
1698 return 0; 1704 return 0;
1699} 1705}
1700 1706
1701static struct Qdisc_class_ops hfsc_class_ops = { 1707static const struct Qdisc_class_ops hfsc_class_ops = {
1702 .change = hfsc_change_class, 1708 .change = hfsc_change_class,
1703 .delete = hfsc_delete_class, 1709 .delete = hfsc_delete_class,
1704 .graft = hfsc_graft_class, 1710 .graft = hfsc_graft_class,
@@ -1714,7 +1720,7 @@ static struct Qdisc_class_ops hfsc_class_ops = {
1714 .walk = hfsc_walk 1720 .walk = hfsc_walk
1715}; 1721};
1716 1722
1717static struct Qdisc_ops hfsc_qdisc_ops = { 1723static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = {
1718 .id = "hfsc", 1724 .id = "hfsc",
1719 .init = hfsc_init_qdisc, 1725 .init = hfsc_init_qdisc,
1720 .change = hfsc_change_qdisc, 1726 .change = hfsc_change_qdisc,
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 5e608a64935a..e1a579efc215 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -214,10 +214,6 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
214 * then finish and return direct queue. 214 * then finish and return direct queue.
215 */ 215 */
216#define HTB_DIRECT (struct htb_class*)-1 216#define HTB_DIRECT (struct htb_class*)-1
217static inline u32 htb_classid(struct htb_class *cl)
218{
219 return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC;
220}
221 217
222static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, 218static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
223 int *qerr) 219 int *qerr)
@@ -996,19 +992,33 @@ static void htb_reset(struct Qdisc *sch)
996 INIT_LIST_HEAD(q->drops + i); 992 INIT_LIST_HEAD(q->drops + i);
997} 993}
998 994
999static int htb_init(struct Qdisc *sch, struct rtattr *opt) 995static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
996 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
997 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
998 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
999 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1000};
1001
1002static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1000{ 1003{
1001 struct htb_sched *q = qdisc_priv(sch); 1004 struct htb_sched *q = qdisc_priv(sch);
1002 struct rtattr *tb[TCA_HTB_INIT]; 1005 struct nlattr *tb[TCA_HTB_INIT + 1];
1003 struct tc_htb_glob *gopt; 1006 struct tc_htb_glob *gopt;
1007 int err;
1004 int i; 1008 int i;
1005 if (!opt || rtattr_parse_nested(tb, TCA_HTB_INIT, opt) || 1009
1006 tb[TCA_HTB_INIT - 1] == NULL || 1010 if (!opt)
1007 RTA_PAYLOAD(tb[TCA_HTB_INIT - 1]) < sizeof(*gopt)) { 1011 return -EINVAL;
1012
1013 err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
1014 if (err < 0)
1015 return err;
1016
1017 if (tb[TCA_HTB_INIT] == NULL) {
1008 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n"); 1018 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
1009 return -EINVAL; 1019 return -EINVAL;
1010 } 1020 }
1011 gopt = RTA_DATA(tb[TCA_HTB_INIT - 1]); 1021 gopt = nla_data(tb[TCA_HTB_INIT]);
1012 if (gopt->version != HTB_VER >> 16) { 1022 if (gopt->version != HTB_VER >> 16) {
1013 printk(KERN_ERR 1023 printk(KERN_ERR
1014 "HTB: need tc/htb version %d (minor is %d), you have %d\n", 1024 "HTB: need tc/htb version %d (minor is %d), you have %d\n",
@@ -1039,25 +1049,29 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
1039static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) 1049static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1040{ 1050{
1041 struct htb_sched *q = qdisc_priv(sch); 1051 struct htb_sched *q = qdisc_priv(sch);
1042 unsigned char *b = skb_tail_pointer(skb); 1052 struct nlattr *nest;
1043 struct rtattr *rta;
1044 struct tc_htb_glob gopt; 1053 struct tc_htb_glob gopt;
1054
1045 spin_lock_bh(&sch->dev->queue_lock); 1055 spin_lock_bh(&sch->dev->queue_lock);
1046 gopt.direct_pkts = q->direct_pkts;
1047 1056
1057 gopt.direct_pkts = q->direct_pkts;
1048 gopt.version = HTB_VER; 1058 gopt.version = HTB_VER;
1049 gopt.rate2quantum = q->rate2quantum; 1059 gopt.rate2quantum = q->rate2quantum;
1050 gopt.defcls = q->defcls; 1060 gopt.defcls = q->defcls;
1051 gopt.debug = 0; 1061 gopt.debug = 0;
1052 rta = (struct rtattr *)b; 1062
1053 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 1063 nest = nla_nest_start(skb, TCA_OPTIONS);
1054 RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt); 1064 if (nest == NULL)
1055 rta->rta_len = skb_tail_pointer(skb) - b; 1065 goto nla_put_failure;
1066 NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
1067 nla_nest_end(skb, nest);
1068
1056 spin_unlock_bh(&sch->dev->queue_lock); 1069 spin_unlock_bh(&sch->dev->queue_lock);
1057 return skb->len; 1070 return skb->len;
1058rtattr_failure: 1071
1072nla_put_failure:
1059 spin_unlock_bh(&sch->dev->queue_lock); 1073 spin_unlock_bh(&sch->dev->queue_lock);
1060 nlmsg_trim(skb, skb_tail_pointer(skb)); 1074 nla_nest_cancel(skb, nest);
1061 return -1; 1075 return -1;
1062} 1076}
1063 1077
@@ -1065,8 +1079,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1065 struct sk_buff *skb, struct tcmsg *tcm) 1079 struct sk_buff *skb, struct tcmsg *tcm)
1066{ 1080{
1067 struct htb_class *cl = (struct htb_class *)arg; 1081 struct htb_class *cl = (struct htb_class *)arg;
1068 unsigned char *b = skb_tail_pointer(skb); 1082 struct nlattr *nest;
1069 struct rtattr *rta;
1070 struct tc_htb_opt opt; 1083 struct tc_htb_opt opt;
1071 1084
1072 spin_lock_bh(&sch->dev->queue_lock); 1085 spin_lock_bh(&sch->dev->queue_lock);
@@ -1075,8 +1088,9 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1075 if (!cl->level && cl->un.leaf.q) 1088 if (!cl->level && cl->un.leaf.q)
1076 tcm->tcm_info = cl->un.leaf.q->handle; 1089 tcm->tcm_info = cl->un.leaf.q->handle;
1077 1090
1078 rta = (struct rtattr *)b; 1091 nest = nla_nest_start(skb, TCA_OPTIONS);
1079 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 1092 if (nest == NULL)
1093 goto nla_put_failure;
1080 1094
1081 memset(&opt, 0, sizeof(opt)); 1095 memset(&opt, 0, sizeof(opt));
1082 1096
@@ -1087,13 +1101,15 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1087 opt.quantum = cl->un.leaf.quantum; 1101 opt.quantum = cl->un.leaf.quantum;
1088 opt.prio = cl->un.leaf.prio; 1102 opt.prio = cl->un.leaf.prio;
1089 opt.level = cl->level; 1103 opt.level = cl->level;
1090 RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt); 1104 NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
1091 rta->rta_len = skb_tail_pointer(skb) - b; 1105
1106 nla_nest_end(skb, nest);
1092 spin_unlock_bh(&sch->dev->queue_lock); 1107 spin_unlock_bh(&sch->dev->queue_lock);
1093 return skb->len; 1108 return skb->len;
1094rtattr_failure: 1109
1110nla_put_failure:
1095 spin_unlock_bh(&sch->dev->queue_lock); 1111 spin_unlock_bh(&sch->dev->queue_lock);
1096 nlmsg_trim(skb, b); 1112 nla_nest_cancel(skb, nest);
1097 return -1; 1113 return -1;
1098} 1114}
1099 1115
@@ -1294,29 +1310,35 @@ static void htb_put(struct Qdisc *sch, unsigned long arg)
1294} 1310}
1295 1311
1296static int htb_change_class(struct Qdisc *sch, u32 classid, 1312static int htb_change_class(struct Qdisc *sch, u32 classid,
1297 u32 parentid, struct rtattr **tca, 1313 u32 parentid, struct nlattr **tca,
1298 unsigned long *arg) 1314 unsigned long *arg)
1299{ 1315{
1300 int err = -EINVAL; 1316 int err = -EINVAL;
1301 struct htb_sched *q = qdisc_priv(sch); 1317 struct htb_sched *q = qdisc_priv(sch);
1302 struct htb_class *cl = (struct htb_class *)*arg, *parent; 1318 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1303 struct rtattr *opt = tca[TCA_OPTIONS - 1]; 1319 struct nlattr *opt = tca[TCA_OPTIONS];
1304 struct qdisc_rate_table *rtab = NULL, *ctab = NULL; 1320 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1305 struct rtattr *tb[TCA_HTB_RTAB]; 1321 struct nlattr *tb[TCA_HTB_RTAB + 1];
1306 struct tc_htb_opt *hopt; 1322 struct tc_htb_opt *hopt;
1307 1323
1308 /* extract all subattrs from opt attr */ 1324 /* extract all subattrs from opt attr */
1309 if (!opt || rtattr_parse_nested(tb, TCA_HTB_RTAB, opt) || 1325 if (!opt)
1310 tb[TCA_HTB_PARMS - 1] == NULL || 1326 goto failure;
1311 RTA_PAYLOAD(tb[TCA_HTB_PARMS - 1]) < sizeof(*hopt)) 1327
1328 err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, htb_policy);
1329 if (err < 0)
1330 goto failure;
1331
1332 err = -EINVAL;
1333 if (tb[TCA_HTB_PARMS] == NULL)
1312 goto failure; 1334 goto failure;
1313 1335
1314 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch); 1336 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1315 1337
1316 hopt = RTA_DATA(tb[TCA_HTB_PARMS - 1]); 1338 hopt = nla_data(tb[TCA_HTB_PARMS]);
1317 1339
1318 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB - 1]); 1340 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
1319 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB - 1]); 1341 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
1320 if (!rtab || !ctab) 1342 if (!rtab || !ctab)
1321 goto failure; 1343 goto failure;
1322 1344
@@ -1324,12 +1346,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1324 struct Qdisc *new_q; 1346 struct Qdisc *new_q;
1325 int prio; 1347 int prio;
1326 struct { 1348 struct {
1327 struct rtattr rta; 1349 struct nlattr nla;
1328 struct gnet_estimator opt; 1350 struct gnet_estimator opt;
1329 } est = { 1351 } est = {
1330 .rta = { 1352 .nla = {
1331 .rta_len = RTA_LENGTH(sizeof(est.opt)), 1353 .nla_len = nla_attr_size(sizeof(est.opt)),
1332 .rta_type = TCA_RATE, 1354 .nla_type = TCA_RATE,
1333 }, 1355 },
1334 .opt = { 1356 .opt = {
1335 /* 4s interval, 16s averaging constant */ 1357 /* 4s interval, 16s averaging constant */
@@ -1354,7 +1376,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1354 1376
1355 gen_new_estimator(&cl->bstats, &cl->rate_est, 1377 gen_new_estimator(&cl->bstats, &cl->rate_est,
1356 &sch->dev->queue_lock, 1378 &sch->dev->queue_lock,
1357 tca[TCA_RATE-1] ? : &est.rta); 1379 tca[TCA_RATE] ? : &est.nla);
1358 cl->refcnt = 1; 1380 cl->refcnt = 1;
1359 INIT_LIST_HEAD(&cl->sibling); 1381 INIT_LIST_HEAD(&cl->sibling);
1360 INIT_HLIST_NODE(&cl->hlist); 1382 INIT_HLIST_NODE(&cl->hlist);
@@ -1407,10 +1429,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1407 list_add_tail(&cl->sibling, 1429 list_add_tail(&cl->sibling,
1408 parent ? &parent->children : &q->root); 1430 parent ? &parent->children : &q->root);
1409 } else { 1431 } else {
1410 if (tca[TCA_RATE-1]) 1432 if (tca[TCA_RATE])
1411 gen_replace_estimator(&cl->bstats, &cl->rate_est, 1433 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1412 &sch->dev->queue_lock, 1434 &sch->dev->queue_lock,
1413 tca[TCA_RATE-1]); 1435 tca[TCA_RATE]);
1414 sch_tree_lock(sch); 1436 sch_tree_lock(sch);
1415 } 1437 }
1416 1438
@@ -1529,7 +1551,7 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1529 } 1551 }
1530} 1552}
1531 1553
1532static struct Qdisc_class_ops htb_class_ops = { 1554static const struct Qdisc_class_ops htb_class_ops = {
1533 .graft = htb_graft, 1555 .graft = htb_graft,
1534 .leaf = htb_leaf, 1556 .leaf = htb_leaf,
1535 .qlen_notify = htb_qlen_notify, 1557 .qlen_notify = htb_qlen_notify,
@@ -1545,7 +1567,7 @@ static struct Qdisc_class_ops htb_class_ops = {
1545 .dump_stats = htb_dump_class_stats, 1567 .dump_stats = htb_dump_class_stats,
1546}; 1568};
1547 1569
1548static struct Qdisc_ops htb_qdisc_ops = { 1570static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1549 .next = NULL, 1571 .next = NULL,
1550 .cl_ops = &htb_class_ops, 1572 .cl_ops = &htb_class_ops,
1551 .id = "htb", 1573 .id = "htb",
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 3f8335e6ea2e..3f72d528273c 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -19,127 +19,71 @@
19#include <net/pkt_sched.h> 19#include <net/pkt_sched.h>
20 20
21 21
22#undef DEBUG_INGRESS 22/* Thanks to Doron Oz for this hack */
23 23#if !defined(CONFIG_NET_CLS_ACT) && defined(CONFIG_NETFILTER)
24#ifdef DEBUG_INGRESS /* control */
25#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
26#else
27#define DPRINTK(format,args...)
28#endif
29
30#if 0 /* data */
31#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
32#else
33#define D2PRINTK(format,args...)
34#endif
35
36
37#define PRIV(sch) qdisc_priv(sch)
38
39
40/* Thanks to Doron Oz for this hack
41*/
42#ifndef CONFIG_NET_CLS_ACT
43#ifdef CONFIG_NETFILTER
44static int nf_registered; 24static int nf_registered;
45#endif 25#endif
46#endif
47 26
48struct ingress_qdisc_data { 27struct ingress_qdisc_data {
49 struct Qdisc *q;
50 struct tcf_proto *filter_list; 28 struct tcf_proto *filter_list;
51}; 29};
52 30
53
54/* ------------------------- Class/flow operations ------------------------- */ 31/* ------------------------- Class/flow operations ------------------------- */
55 32
56 33static int ingress_graft(struct Qdisc *sch, unsigned long arg,
57static int ingress_graft(struct Qdisc *sch,unsigned long arg, 34 struct Qdisc *new, struct Qdisc **old)
58 struct Qdisc *new,struct Qdisc **old)
59{ 35{
60#ifdef DEBUG_INGRESS 36 return -EOPNOTSUPP;
61 struct ingress_qdisc_data *p = PRIV(sch);
62#endif
63
64 DPRINTK("ingress_graft(sch %p,[qdisc %p],new %p,old %p)\n",
65 sch, p, new, old);
66 DPRINTK("\n ingress_graft: You cannot add qdiscs to classes");
67 return 1;
68} 37}
69 38
70
71static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) 39static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
72{ 40{
73 return NULL; 41 return NULL;
74} 42}
75 43
76 44static unsigned long ingress_get(struct Qdisc *sch, u32 classid)
77static unsigned long ingress_get(struct Qdisc *sch,u32 classid)
78{ 45{
79#ifdef DEBUG_INGRESS
80 struct ingress_qdisc_data *p = PRIV(sch);
81#endif
82 DPRINTK("ingress_get(sch %p,[qdisc %p],classid %x)\n", sch, p, classid);
83 return TC_H_MIN(classid) + 1; 46 return TC_H_MIN(classid) + 1;
84} 47}
85 48
86
87static unsigned long ingress_bind_filter(struct Qdisc *sch, 49static unsigned long ingress_bind_filter(struct Qdisc *sch,
88 unsigned long parent, u32 classid) 50 unsigned long parent, u32 classid)
89{ 51{
90 return ingress_get(sch, classid); 52 return ingress_get(sch, classid);
91} 53}
92 54
93
94static void ingress_put(struct Qdisc *sch, unsigned long cl) 55static void ingress_put(struct Qdisc *sch, unsigned long cl)
95{ 56{
96} 57}
97 58
98
99static int ingress_change(struct Qdisc *sch, u32 classid, u32 parent, 59static int ingress_change(struct Qdisc *sch, u32 classid, u32 parent,
100 struct rtattr **tca, unsigned long *arg) 60 struct nlattr **tca, unsigned long *arg)
101{ 61{
102#ifdef DEBUG_INGRESS
103 struct ingress_qdisc_data *p = PRIV(sch);
104#endif
105 DPRINTK("ingress_change(sch %p,[qdisc %p],classid %x,parent %x),"
106 "arg 0x%lx\n", sch, p, classid, parent, *arg);
107 DPRINTK("No effect. sch_ingress doesn't maintain classes at the moment");
108 return 0; 62 return 0;
109} 63}
110 64
111 65static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
112
113static void ingress_walk(struct Qdisc *sch,struct qdisc_walker *walker)
114{ 66{
115#ifdef DEBUG_INGRESS 67 return;
116 struct ingress_qdisc_data *p = PRIV(sch);
117#endif
118 DPRINTK("ingress_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
119 DPRINTK("No effect. sch_ingress doesn't maintain classes at the moment");
120} 68}
121 69
122 70static struct tcf_proto **ingress_find_tcf(struct Qdisc *sch, unsigned long cl)
123static struct tcf_proto **ingress_find_tcf(struct Qdisc *sch,unsigned long cl)
124{ 71{
125 struct ingress_qdisc_data *p = PRIV(sch); 72 struct ingress_qdisc_data *p = qdisc_priv(sch);
126 73
127 return &p->filter_list; 74 return &p->filter_list;
128} 75}
129 76
130
131/* --------------------------- Qdisc operations ---------------------------- */ 77/* --------------------------- Qdisc operations ---------------------------- */
132 78
133 79static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
134static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
135{ 80{
136 struct ingress_qdisc_data *p = PRIV(sch); 81 struct ingress_qdisc_data *p = qdisc_priv(sch);
137 struct tcf_result res; 82 struct tcf_result res;
138 int result; 83 int result;
139 84
140 D2PRINTK("ingress_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
141 result = tc_classify(skb, p->filter_list, &res); 85 result = tc_classify(skb, p->filter_list, &res);
142 D2PRINTK("result %d class 0x%04x\n", result, res.classid); 86
143 /* 87 /*
144 * Unlike normal "enqueue" functions, ingress_enqueue returns a 88 * Unlike normal "enqueue" functions, ingress_enqueue returns a
145 * firewall FW_* code. 89 * firewall FW_* code.
@@ -148,23 +92,22 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
148 sch->bstats.packets++; 92 sch->bstats.packets++;
149 sch->bstats.bytes += skb->len; 93 sch->bstats.bytes += skb->len;
150 switch (result) { 94 switch (result) {
151 case TC_ACT_SHOT: 95 case TC_ACT_SHOT:
152 result = TC_ACT_SHOT; 96 result = TC_ACT_SHOT;
153 sch->qstats.drops++; 97 sch->qstats.drops++;
154 break; 98 break;
155 case TC_ACT_STOLEN: 99 case TC_ACT_STOLEN:
156 case TC_ACT_QUEUED: 100 case TC_ACT_QUEUED:
157 result = TC_ACT_STOLEN; 101 result = TC_ACT_STOLEN;
158 break; 102 break;
159 case TC_ACT_RECLASSIFY: 103 case TC_ACT_RECLASSIFY:
160 case TC_ACT_OK: 104 case TC_ACT_OK:
161 skb->tc_index = TC_H_MIN(res.classid); 105 skb->tc_index = TC_H_MIN(res.classid);
162 default: 106 default:
163 result = TC_ACT_OK; 107 result = TC_ACT_OK;
164 break; 108 break;
165 } 109 }
166#else 110#else
167 D2PRINTK("Overriding result to ACCEPT\n");
168 result = NF_ACCEPT; 111 result = NF_ACCEPT;
169 sch->bstats.packets++; 112 sch->bstats.packets++;
170 sch->bstats.bytes += skb->len; 113 sch->bstats.bytes += skb->len;
@@ -173,39 +116,8 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
173 return result; 116 return result;
174} 117}
175 118
176 119#if !defined(CONFIG_NET_CLS_ACT) && defined(CONFIG_NETFILTER)
177static struct sk_buff *ingress_dequeue(struct Qdisc *sch) 120static unsigned int ing_hook(unsigned int hook, struct sk_buff *skb,
178{
179/*
180 struct ingress_qdisc_data *p = PRIV(sch);
181 D2PRINTK("ingress_dequeue(sch %p,[qdisc %p])\n",sch,PRIV(p));
182*/
183 return NULL;
184}
185
186
187static int ingress_requeue(struct sk_buff *skb,struct Qdisc *sch)
188{
189/*
190 struct ingress_qdisc_data *p = PRIV(sch);
191 D2PRINTK("ingress_requeue(skb %p,sch %p,[qdisc %p])\n",skb,sch,PRIV(p));
192*/
193 return 0;
194}
195
196static unsigned int ingress_drop(struct Qdisc *sch)
197{
198#ifdef DEBUG_INGRESS
199 struct ingress_qdisc_data *p = PRIV(sch);
200#endif
201 DPRINTK("ingress_drop(sch %p,[qdisc %p])\n", sch, p);
202 return 0;
203}
204
205#ifndef CONFIG_NET_CLS_ACT
206#ifdef CONFIG_NETFILTER
207static unsigned int
208ing_hook(unsigned int hook, struct sk_buff *skb,
209 const struct net_device *indev, 121 const struct net_device *indev,
210 const struct net_device *outdev, 122 const struct net_device *outdev,
211 int (*okfn)(struct sk_buff *)) 123 int (*okfn)(struct sk_buff *))
@@ -213,12 +125,7 @@ ing_hook(unsigned int hook, struct sk_buff *skb,
213 125
214 struct Qdisc *q; 126 struct Qdisc *q;
215 struct net_device *dev = skb->dev; 127 struct net_device *dev = skb->dev;
216 int fwres=NF_ACCEPT; 128 int fwres = NF_ACCEPT;
217
218 DPRINTK("ing_hook: skb %s dev=%s len=%u\n",
219 skb->sk ? "(owned)" : "(unowned)",
220 skb->dev ? skb->dev->name : "(no dev)",
221 skb->len);
222 129
223 if (dev->qdisc_ingress) { 130 if (dev->qdisc_ingress) {
224 spin_lock(&dev->ingress_lock); 131 spin_lock(&dev->ingress_lock);
@@ -231,168 +138,101 @@ ing_hook(unsigned int hook, struct sk_buff *skb,
231} 138}
232 139
233/* after ipt_filter */ 140/* after ipt_filter */
234static struct nf_hook_ops ing_ops = { 141static struct nf_hook_ops ing_ops[] __read_mostly = {
235 .hook = ing_hook, 142 {
236 .owner = THIS_MODULE, 143 .hook = ing_hook,
237 .pf = PF_INET, 144 .owner = THIS_MODULE,
238 .hooknum = NF_IP_PRE_ROUTING, 145 .pf = PF_INET,
239 .priority = NF_IP_PRI_FILTER + 1, 146 .hooknum = NF_INET_PRE_ROUTING,
240}; 147 .priority = NF_IP_PRI_FILTER + 1,
241 148 },
242static struct nf_hook_ops ing6_ops = { 149 {
243 .hook = ing_hook, 150 .hook = ing_hook,
244 .owner = THIS_MODULE, 151 .owner = THIS_MODULE,
245 .pf = PF_INET6, 152 .pf = PF_INET6,
246 .hooknum = NF_IP6_PRE_ROUTING, 153 .hooknum = NF_INET_PRE_ROUTING,
247 .priority = NF_IP6_PRI_FILTER + 1, 154 .priority = NF_IP6_PRI_FILTER + 1,
155 },
248}; 156};
249
250#endif
251#endif 157#endif
252 158
253static int ingress_init(struct Qdisc *sch,struct rtattr *opt) 159static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
254{ 160{
255 struct ingress_qdisc_data *p = PRIV(sch); 161#if !defined(CONFIG_NET_CLS_ACT) && defined(CONFIG_NETFILTER)
256
257/* Make sure either netfilter or preferably CLS_ACT is
258* compiled in */
259#ifndef CONFIG_NET_CLS_ACT
260#ifndef CONFIG_NETFILTER
261 printk("You MUST compile classifier actions into the kernel\n");
262 return -EINVAL;
263#else
264 printk("Ingress scheduler: Classifier actions prefered over netfilter\n"); 162 printk("Ingress scheduler: Classifier actions prefered over netfilter\n");
265#endif
266#endif
267 163
268#ifndef CONFIG_NET_CLS_ACT
269#ifdef CONFIG_NETFILTER
270 if (!nf_registered) { 164 if (!nf_registered) {
271 if (nf_register_hook(&ing_ops) < 0) { 165 if (nf_register_hooks(ing_ops, ARRAY_SIZE(ing_ops)) < 0) {
272 printk("ingress qdisc registration error \n"); 166 printk("ingress qdisc registration error \n");
273 return -EINVAL; 167 return -EINVAL;
274 } 168 }
275 nf_registered++; 169 nf_registered++;
276
277 if (nf_register_hook(&ing6_ops) < 0) {
278 printk("IPv6 ingress qdisc registration error, " \
279 "disabling IPv6 support.\n");
280 } else
281 nf_registered++;
282 } 170 }
283#endif 171#endif
284#endif
285
286 DPRINTK("ingress_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt);
287 p->q = &noop_qdisc;
288 return 0; 172 return 0;
289} 173}
290 174
291
292static void ingress_reset(struct Qdisc *sch)
293{
294 struct ingress_qdisc_data *p = PRIV(sch);
295
296 DPRINTK("ingress_reset(sch %p,[qdisc %p])\n", sch, p);
297
298/*
299#if 0
300*/
301/* for future use */
302 qdisc_reset(p->q);
303/*
304#endif
305*/
306}
307
308/* ------------------------------------------------------------- */
309
310
311/* ------------------------------------------------------------- */ 175/* ------------------------------------------------------------- */
312 176
313static void ingress_destroy(struct Qdisc *sch) 177static void ingress_destroy(struct Qdisc *sch)
314{ 178{
315 struct ingress_qdisc_data *p = PRIV(sch); 179 struct ingress_qdisc_data *p = qdisc_priv(sch);
316 180
317 DPRINTK("ingress_destroy(sch %p,[qdisc %p])\n", sch, p);
318 tcf_destroy_chain(p->filter_list); 181 tcf_destroy_chain(p->filter_list);
319#if 0
320/* for future use */
321 qdisc_destroy(p->q);
322#endif
323} 182}
324 183
325
326static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb) 184static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
327{ 185{
328 unsigned char *b = skb_tail_pointer(skb); 186 struct nlattr *nest;
329 struct rtattr *rta;
330 187
331 rta = (struct rtattr *) b; 188 nest = nla_nest_start(skb, TCA_OPTIONS);
332 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 189 if (nest == NULL)
333 rta->rta_len = skb_tail_pointer(skb) - b; 190 goto nla_put_failure;
191 nla_nest_end(skb, nest);
334 return skb->len; 192 return skb->len;
335 193
336rtattr_failure: 194nla_put_failure:
337 nlmsg_trim(skb, b); 195 nla_nest_cancel(skb, nest);
338 return -1; 196 return -1;
339} 197}
340 198
341static struct Qdisc_class_ops ingress_class_ops = { 199static const struct Qdisc_class_ops ingress_class_ops = {
342 .graft = ingress_graft, 200 .graft = ingress_graft,
343 .leaf = ingress_leaf, 201 .leaf = ingress_leaf,
344 .get = ingress_get, 202 .get = ingress_get,
345 .put = ingress_put, 203 .put = ingress_put,
346 .change = ingress_change, 204 .change = ingress_change,
347 .delete = NULL,
348 .walk = ingress_walk, 205 .walk = ingress_walk,
349 .tcf_chain = ingress_find_tcf, 206 .tcf_chain = ingress_find_tcf,
350 .bind_tcf = ingress_bind_filter, 207 .bind_tcf = ingress_bind_filter,
351 .unbind_tcf = ingress_put, 208 .unbind_tcf = ingress_put,
352 .dump = NULL,
353}; 209};
354 210
355static struct Qdisc_ops ingress_qdisc_ops = { 211static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
356 .next = NULL,
357 .cl_ops = &ingress_class_ops, 212 .cl_ops = &ingress_class_ops,
358 .id = "ingress", 213 .id = "ingress",
359 .priv_size = sizeof(struct ingress_qdisc_data), 214 .priv_size = sizeof(struct ingress_qdisc_data),
360 .enqueue = ingress_enqueue, 215 .enqueue = ingress_enqueue,
361 .dequeue = ingress_dequeue,
362 .requeue = ingress_requeue,
363 .drop = ingress_drop,
364 .init = ingress_init, 216 .init = ingress_init,
365 .reset = ingress_reset,
366 .destroy = ingress_destroy, 217 .destroy = ingress_destroy,
367 .change = NULL,
368 .dump = ingress_dump, 218 .dump = ingress_dump,
369 .owner = THIS_MODULE, 219 .owner = THIS_MODULE,
370}; 220};
371 221
372static int __init ingress_module_init(void) 222static int __init ingress_module_init(void)
373{ 223{
374 int ret = 0; 224 return register_qdisc(&ingress_qdisc_ops);
375
376 if ((ret = register_qdisc(&ingress_qdisc_ops)) < 0) {
377 printk("Unable to register Ingress qdisc\n");
378 return ret;
379 }
380
381 return ret;
382} 225}
226
383static void __exit ingress_module_exit(void) 227static void __exit ingress_module_exit(void)
384{ 228{
385 unregister_qdisc(&ingress_qdisc_ops); 229 unregister_qdisc(&ingress_qdisc_ops);
386#ifndef CONFIG_NET_CLS_ACT 230#if !defined(CONFIG_NET_CLS_ACT) && defined(CONFIG_NETFILTER)
387#ifdef CONFIG_NETFILTER 231 if (nf_registered)
388 if (nf_registered) { 232 nf_unregister_hooks(ing_ops, ARRAY_SIZE(ing_ops));
389 nf_unregister_hook(&ing_ops);
390 if (nf_registered > 1)
391 nf_unregister_hook(&ing6_ops);
392 }
393#endif
394#endif 233#endif
395} 234}
235
396module_init(ingress_module_init) 236module_init(ingress_module_init)
397module_exit(ingress_module_exit) 237module_exit(ingress_module_exit)
398MODULE_LICENSE("GPL"); 238MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 9e5e87e81f00..c9c649b26eaa 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -313,21 +313,21 @@ static void netem_reset(struct Qdisc *sch)
313/* Pass size change message down to embedded FIFO */ 313/* Pass size change message down to embedded FIFO */
314static int set_fifo_limit(struct Qdisc *q, int limit) 314static int set_fifo_limit(struct Qdisc *q, int limit)
315{ 315{
316 struct rtattr *rta; 316 struct nlattr *nla;
317 int ret = -ENOMEM; 317 int ret = -ENOMEM;
318 318
319 /* Hack to avoid sending change message to non-FIFO */ 319 /* Hack to avoid sending change message to non-FIFO */
320 if (strncmp(q->ops->id + 1, "fifo", 4) != 0) 320 if (strncmp(q->ops->id + 1, "fifo", 4) != 0)
321 return 0; 321 return 0;
322 322
323 rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); 323 nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
324 if (rta) { 324 if (nla) {
325 rta->rta_type = RTM_NEWQDISC; 325 nla->nla_type = RTM_NEWQDISC;
326 rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt)); 326 nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
327 ((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit; 327 ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
328 328
329 ret = q->ops->change(q, rta); 329 ret = q->ops->change(q, nla);
330 kfree(rta); 330 kfree(nla);
331 } 331 }
332 return ret; 332 return ret;
333} 333}
@@ -336,11 +336,11 @@ static int set_fifo_limit(struct Qdisc *q, int limit)
336 * Distribution data is a variable size payload containing 336 * Distribution data is a variable size payload containing
337 * signed 16 bit values. 337 * signed 16 bit values.
338 */ 338 */
339static int get_dist_table(struct Qdisc *sch, const struct rtattr *attr) 339static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
340{ 340{
341 struct netem_sched_data *q = qdisc_priv(sch); 341 struct netem_sched_data *q = qdisc_priv(sch);
342 unsigned long n = RTA_PAYLOAD(attr)/sizeof(__s16); 342 unsigned long n = nla_len(attr)/sizeof(__s16);
343 const __s16 *data = RTA_DATA(attr); 343 const __s16 *data = nla_data(attr);
344 struct disttable *d; 344 struct disttable *d;
345 int i; 345 int i;
346 346
@@ -363,13 +363,10 @@ static int get_dist_table(struct Qdisc *sch, const struct rtattr *attr)
363 return 0; 363 return 0;
364} 364}
365 365
366static int get_correlation(struct Qdisc *sch, const struct rtattr *attr) 366static int get_correlation(struct Qdisc *sch, const struct nlattr *attr)
367{ 367{
368 struct netem_sched_data *q = qdisc_priv(sch); 368 struct netem_sched_data *q = qdisc_priv(sch);
369 const struct tc_netem_corr *c = RTA_DATA(attr); 369 const struct tc_netem_corr *c = nla_data(attr);
370
371 if (RTA_PAYLOAD(attr) != sizeof(*c))
372 return -EINVAL;
373 370
374 init_crandom(&q->delay_cor, c->delay_corr); 371 init_crandom(&q->delay_cor, c->delay_corr);
375 init_crandom(&q->loss_cor, c->loss_corr); 372 init_crandom(&q->loss_cor, c->loss_corr);
@@ -377,43 +374,48 @@ static int get_correlation(struct Qdisc *sch, const struct rtattr *attr)
377 return 0; 374 return 0;
378} 375}
379 376
380static int get_reorder(struct Qdisc *sch, const struct rtattr *attr) 377static int get_reorder(struct Qdisc *sch, const struct nlattr *attr)
381{ 378{
382 struct netem_sched_data *q = qdisc_priv(sch); 379 struct netem_sched_data *q = qdisc_priv(sch);
383 const struct tc_netem_reorder *r = RTA_DATA(attr); 380 const struct tc_netem_reorder *r = nla_data(attr);
384
385 if (RTA_PAYLOAD(attr) != sizeof(*r))
386 return -EINVAL;
387 381
388 q->reorder = r->probability; 382 q->reorder = r->probability;
389 init_crandom(&q->reorder_cor, r->correlation); 383 init_crandom(&q->reorder_cor, r->correlation);
390 return 0; 384 return 0;
391} 385}
392 386
393static int get_corrupt(struct Qdisc *sch, const struct rtattr *attr) 387static int get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
394{ 388{
395 struct netem_sched_data *q = qdisc_priv(sch); 389 struct netem_sched_data *q = qdisc_priv(sch);
396 const struct tc_netem_corrupt *r = RTA_DATA(attr); 390 const struct tc_netem_corrupt *r = nla_data(attr);
397
398 if (RTA_PAYLOAD(attr) != sizeof(*r))
399 return -EINVAL;
400 391
401 q->corrupt = r->probability; 392 q->corrupt = r->probability;
402 init_crandom(&q->corrupt_cor, r->correlation); 393 init_crandom(&q->corrupt_cor, r->correlation);
403 return 0; 394 return 0;
404} 395}
405 396
397static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
398 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) },
399 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) },
400 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
401};
402
406/* Parse netlink message to set options */ 403/* Parse netlink message to set options */
407static int netem_change(struct Qdisc *sch, struct rtattr *opt) 404static int netem_change(struct Qdisc *sch, struct nlattr *opt)
408{ 405{
409 struct netem_sched_data *q = qdisc_priv(sch); 406 struct netem_sched_data *q = qdisc_priv(sch);
407 struct nlattr *tb[TCA_NETEM_MAX + 1];
410 struct tc_netem_qopt *qopt; 408 struct tc_netem_qopt *qopt;
411 int ret; 409 int ret;
412 410
413 if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt)) 411 if (opt == NULL)
414 return -EINVAL; 412 return -EINVAL;
415 413
416 qopt = RTA_DATA(opt); 414 ret = nla_parse_nested_compat(tb, TCA_NETEM_MAX, opt, netem_policy,
415 qopt, sizeof(*qopt));
416 if (ret < 0)
417 return ret;
418
417 ret = set_fifo_limit(q->qdisc, qopt->limit); 419 ret = set_fifo_limit(q->qdisc, qopt->limit);
418 if (ret) { 420 if (ret) {
419 pr_debug("netem: can't set fifo limit\n"); 421 pr_debug("netem: can't set fifo limit\n");
@@ -434,39 +436,28 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
434 if (q->gap) 436 if (q->gap)
435 q->reorder = ~0; 437 q->reorder = ~0;
436 438
437 /* Handle nested options after initial queue options. 439 if (tb[TCA_NETEM_CORR]) {
438 * Should have put all options in nested format but too late now. 440 ret = get_correlation(sch, tb[TCA_NETEM_CORR]);
439 */ 441 if (ret)
440 if (RTA_PAYLOAD(opt) > sizeof(*qopt)) { 442 return ret;
441 struct rtattr *tb[TCA_NETEM_MAX]; 443 }
442 if (rtattr_parse(tb, TCA_NETEM_MAX,
443 RTA_DATA(opt) + sizeof(*qopt),
444 RTA_PAYLOAD(opt) - sizeof(*qopt)))
445 return -EINVAL;
446
447 if (tb[TCA_NETEM_CORR-1]) {
448 ret = get_correlation(sch, tb[TCA_NETEM_CORR-1]);
449 if (ret)
450 return ret;
451 }
452 444
453 if (tb[TCA_NETEM_DELAY_DIST-1]) { 445 if (tb[TCA_NETEM_DELAY_DIST]) {
454 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST-1]); 446 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
455 if (ret) 447 if (ret)
456 return ret; 448 return ret;
457 } 449 }
458 450
459 if (tb[TCA_NETEM_REORDER-1]) { 451 if (tb[TCA_NETEM_REORDER]) {
460 ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]); 452 ret = get_reorder(sch, tb[TCA_NETEM_REORDER]);
461 if (ret) 453 if (ret)
462 return ret; 454 return ret;
463 } 455 }
464 456
465 if (tb[TCA_NETEM_CORRUPT-1]) { 457 if (tb[TCA_NETEM_CORRUPT]) {
466 ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT-1]); 458 ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
467 if (ret) 459 if (ret)
468 return ret; 460 return ret;
469 }
470 } 461 }
471 462
472 return 0; 463 return 0;
@@ -515,13 +506,13 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
515 return qdisc_reshape_fail(nskb, sch); 506 return qdisc_reshape_fail(nskb, sch);
516} 507}
517 508
518static int tfifo_init(struct Qdisc *sch, struct rtattr *opt) 509static int tfifo_init(struct Qdisc *sch, struct nlattr *opt)
519{ 510{
520 struct fifo_sched_data *q = qdisc_priv(sch); 511 struct fifo_sched_data *q = qdisc_priv(sch);
521 512
522 if (opt) { 513 if (opt) {
523 struct tc_fifo_qopt *ctl = RTA_DATA(opt); 514 struct tc_fifo_qopt *ctl = nla_data(opt);
524 if (RTA_PAYLOAD(opt) < sizeof(*ctl)) 515 if (nla_len(opt) < sizeof(*ctl))
525 return -EINVAL; 516 return -EINVAL;
526 517
527 q->limit = ctl->limit; 518 q->limit = ctl->limit;
@@ -537,14 +528,14 @@ static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
537 struct fifo_sched_data *q = qdisc_priv(sch); 528 struct fifo_sched_data *q = qdisc_priv(sch);
538 struct tc_fifo_qopt opt = { .limit = q->limit }; 529 struct tc_fifo_qopt opt = { .limit = q->limit };
539 530
540 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 531 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
541 return skb->len; 532 return skb->len;
542 533
543rtattr_failure: 534nla_put_failure:
544 return -1; 535 return -1;
545} 536}
546 537
547static struct Qdisc_ops tfifo_qdisc_ops = { 538static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
548 .id = "tfifo", 539 .id = "tfifo",
549 .priv_size = sizeof(struct fifo_sched_data), 540 .priv_size = sizeof(struct fifo_sched_data),
550 .enqueue = tfifo_enqueue, 541 .enqueue = tfifo_enqueue,
@@ -557,7 +548,7 @@ static struct Qdisc_ops tfifo_qdisc_ops = {
557 .dump = tfifo_dump, 548 .dump = tfifo_dump,
558}; 549};
559 550
560static int netem_init(struct Qdisc *sch, struct rtattr *opt) 551static int netem_init(struct Qdisc *sch, struct nlattr *opt)
561{ 552{
562 struct netem_sched_data *q = qdisc_priv(sch); 553 struct netem_sched_data *q = qdisc_priv(sch);
563 int ret; 554 int ret;
@@ -595,7 +586,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
595{ 586{
596 const struct netem_sched_data *q = qdisc_priv(sch); 587 const struct netem_sched_data *q = qdisc_priv(sch);
597 unsigned char *b = skb_tail_pointer(skb); 588 unsigned char *b = skb_tail_pointer(skb);
598 struct rtattr *rta = (struct rtattr *) b; 589 struct nlattr *nla = (struct nlattr *) b;
599 struct tc_netem_qopt qopt; 590 struct tc_netem_qopt qopt;
600 struct tc_netem_corr cor; 591 struct tc_netem_corr cor;
601 struct tc_netem_reorder reorder; 592 struct tc_netem_reorder reorder;
@@ -607,26 +598,26 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
607 qopt.loss = q->loss; 598 qopt.loss = q->loss;
608 qopt.gap = q->gap; 599 qopt.gap = q->gap;
609 qopt.duplicate = q->duplicate; 600 qopt.duplicate = q->duplicate;
610 RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt); 601 NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
611 602
612 cor.delay_corr = q->delay_cor.rho; 603 cor.delay_corr = q->delay_cor.rho;
613 cor.loss_corr = q->loss_cor.rho; 604 cor.loss_corr = q->loss_cor.rho;
614 cor.dup_corr = q->dup_cor.rho; 605 cor.dup_corr = q->dup_cor.rho;
615 RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor); 606 NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
616 607
617 reorder.probability = q->reorder; 608 reorder.probability = q->reorder;
618 reorder.correlation = q->reorder_cor.rho; 609 reorder.correlation = q->reorder_cor.rho;
619 RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder); 610 NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
620 611
621 corrupt.probability = q->corrupt; 612 corrupt.probability = q->corrupt;
622 corrupt.correlation = q->corrupt_cor.rho; 613 corrupt.correlation = q->corrupt_cor.rho;
623 RTA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt); 614 NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
624 615
625 rta->rta_len = skb_tail_pointer(skb) - b; 616 nla->nla_len = skb_tail_pointer(skb) - b;
626 617
627 return skb->len; 618 return skb->len;
628 619
629rtattr_failure: 620nla_put_failure:
630 nlmsg_trim(skb, b); 621 nlmsg_trim(skb, b);
631 return -1; 622 return -1;
632} 623}
@@ -678,7 +669,7 @@ static void netem_put(struct Qdisc *sch, unsigned long arg)
678} 669}
679 670
680static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 671static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
681 struct rtattr **tca, unsigned long *arg) 672 struct nlattr **tca, unsigned long *arg)
682{ 673{
683 return -ENOSYS; 674 return -ENOSYS;
684} 675}
@@ -705,7 +696,7 @@ static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl)
705 return NULL; 696 return NULL;
706} 697}
707 698
708static struct Qdisc_class_ops netem_class_ops = { 699static const struct Qdisc_class_ops netem_class_ops = {
709 .graft = netem_graft, 700 .graft = netem_graft,
710 .leaf = netem_leaf, 701 .leaf = netem_leaf,
711 .get = netem_get, 702 .get = netem_get,
@@ -717,7 +708,7 @@ static struct Qdisc_class_ops netem_class_ops = {
717 .dump = netem_dump_class, 708 .dump = netem_dump_class,
718}; 709};
719 710
720static struct Qdisc_ops netem_qdisc_ops = { 711static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
721 .id = "netem", 712 .id = "netem",
722 .cl_ops = &netem_class_ops, 713 .cl_ops = &netem_class_ops,
723 .priv_size = sizeof(struct netem_sched_data), 714 .priv_size = sizeof(struct netem_sched_data),
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index de894096e442..4aa2b45dad0a 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -224,16 +224,19 @@ prio_destroy(struct Qdisc* sch)
224 qdisc_destroy(q->queues[prio]); 224 qdisc_destroy(q->queues[prio]);
225} 225}
226 226
227static int prio_tune(struct Qdisc *sch, struct rtattr *opt) 227static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
228{ 228{
229 struct prio_sched_data *q = qdisc_priv(sch); 229 struct prio_sched_data *q = qdisc_priv(sch);
230 struct tc_prio_qopt *qopt; 230 struct tc_prio_qopt *qopt;
231 struct rtattr *tb[TCA_PRIO_MAX]; 231 struct nlattr *tb[TCA_PRIO_MAX + 1];
232 int err;
232 int i; 233 int i;
233 234
234 if (rtattr_parse_nested_compat(tb, TCA_PRIO_MAX, opt, qopt, 235 err = nla_parse_nested_compat(tb, TCA_PRIO_MAX, opt, NULL, qopt,
235 sizeof(*qopt))) 236 sizeof(*qopt));
236 return -EINVAL; 237 if (err < 0)
238 return err;
239
237 q->bands = qopt->bands; 240 q->bands = qopt->bands;
238 /* If we're multiqueue, make sure the number of incoming bands 241 /* If we're multiqueue, make sure the number of incoming bands
239 * matches the number of queues on the device we're associating with. 242 * matches the number of queues on the device we're associating with.
@@ -242,7 +245,7 @@ static int prio_tune(struct Qdisc *sch, struct rtattr *opt)
242 * only one that is enabled for multiqueue, since it's the only one 245 * only one that is enabled for multiqueue, since it's the only one
243 * that interacts with the underlying device. 246 * that interacts with the underlying device.
244 */ 247 */
245 q->mq = RTA_GET_FLAG(tb[TCA_PRIO_MQ - 1]); 248 q->mq = nla_get_flag(tb[TCA_PRIO_MQ]);
246 if (q->mq) { 249 if (q->mq) {
247 if (sch->parent != TC_H_ROOT) 250 if (sch->parent != TC_H_ROOT)
248 return -EINVAL; 251 return -EINVAL;
@@ -296,7 +299,7 @@ static int prio_tune(struct Qdisc *sch, struct rtattr *opt)
296 return 0; 299 return 0;
297} 300}
298 301
299static int prio_init(struct Qdisc *sch, struct rtattr *opt) 302static int prio_init(struct Qdisc *sch, struct nlattr *opt)
300{ 303{
301 struct prio_sched_data *q = qdisc_priv(sch); 304 struct prio_sched_data *q = qdisc_priv(sch);
302 int i; 305 int i;
@@ -319,20 +322,24 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
319{ 322{
320 struct prio_sched_data *q = qdisc_priv(sch); 323 struct prio_sched_data *q = qdisc_priv(sch);
321 unsigned char *b = skb_tail_pointer(skb); 324 unsigned char *b = skb_tail_pointer(skb);
322 struct rtattr *nest; 325 struct nlattr *nest;
323 struct tc_prio_qopt opt; 326 struct tc_prio_qopt opt;
324 327
325 opt.bands = q->bands; 328 opt.bands = q->bands;
326 memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1); 329 memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1);
327 330
328 nest = RTA_NEST_COMPAT(skb, TCA_OPTIONS, sizeof(opt), &opt); 331 nest = nla_nest_compat_start(skb, TCA_OPTIONS, sizeof(opt), &opt);
329 if (q->mq) 332 if (nest == NULL)
330 RTA_PUT_FLAG(skb, TCA_PRIO_MQ); 333 goto nla_put_failure;
331 RTA_NEST_COMPAT_END(skb, nest); 334 if (q->mq) {
335 if (nla_put_flag(skb, TCA_PRIO_MQ) < 0)
336 goto nla_put_failure;
337 }
338 nla_nest_compat_end(skb, nest);
332 339
333 return skb->len; 340 return skb->len;
334 341
335rtattr_failure: 342nla_put_failure:
336 nlmsg_trim(skb, b); 343 nlmsg_trim(skb, b);
337 return -1; 344 return -1;
338} 345}
@@ -392,7 +399,7 @@ static void prio_put(struct Qdisc *q, unsigned long cl)
392 return; 399 return;
393} 400}
394 401
395static int prio_change(struct Qdisc *sch, u32 handle, u32 parent, struct rtattr **tca, unsigned long *arg) 402static int prio_change(struct Qdisc *sch, u32 handle, u32 parent, struct nlattr **tca, unsigned long *arg)
396{ 403{
397 unsigned long cl = *arg; 404 unsigned long cl = *arg;
398 struct prio_sched_data *q = qdisc_priv(sch); 405 struct prio_sched_data *q = qdisc_priv(sch);
@@ -468,7 +475,7 @@ static struct tcf_proto ** prio_find_tcf(struct Qdisc *sch, unsigned long cl)
468 return &q->filter_list; 475 return &q->filter_list;
469} 476}
470 477
471static struct Qdisc_class_ops prio_class_ops = { 478static const struct Qdisc_class_ops prio_class_ops = {
472 .graft = prio_graft, 479 .graft = prio_graft,
473 .leaf = prio_leaf, 480 .leaf = prio_leaf,
474 .get = prio_get, 481 .get = prio_get,
@@ -483,7 +490,7 @@ static struct Qdisc_class_ops prio_class_ops = {
483 .dump_stats = prio_dump_class_stats, 490 .dump_stats = prio_dump_class_stats,
484}; 491};
485 492
486static struct Qdisc_ops prio_qdisc_ops = { 493static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
487 .next = NULL, 494 .next = NULL,
488 .cl_ops = &prio_class_ops, 495 .cl_ops = &prio_class_ops,
489 .id = "prio", 496 .id = "prio",
@@ -500,7 +507,7 @@ static struct Qdisc_ops prio_qdisc_ops = {
500 .owner = THIS_MODULE, 507 .owner = THIS_MODULE,
501}; 508};
502 509
503static struct Qdisc_ops rr_qdisc_ops = { 510static struct Qdisc_ops rr_qdisc_ops __read_mostly = {
504 .next = NULL, 511 .next = NULL,
505 .cl_ops = &prio_class_ops, 512 .cl_ops = &prio_class_ops,
506 .id = "rr", 513 .id = "rr",
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 9b95fefb70f4..3dcd493f4f4a 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -177,21 +177,21 @@ static void red_destroy(struct Qdisc *sch)
177static struct Qdisc *red_create_dflt(struct Qdisc *sch, u32 limit) 177static struct Qdisc *red_create_dflt(struct Qdisc *sch, u32 limit)
178{ 178{
179 struct Qdisc *q; 179 struct Qdisc *q;
180 struct rtattr *rta; 180 struct nlattr *nla;
181 int ret; 181 int ret;
182 182
183 q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops, 183 q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops,
184 TC_H_MAKE(sch->handle, 1)); 184 TC_H_MAKE(sch->handle, 1));
185 if (q) { 185 if (q) {
186 rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), 186 nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)),
187 GFP_KERNEL); 187 GFP_KERNEL);
188 if (rta) { 188 if (nla) {
189 rta->rta_type = RTM_NEWQDISC; 189 nla->nla_type = RTM_NEWQDISC;
190 rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt)); 190 nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
191 ((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit; 191 ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
192 192
193 ret = q->ops->change(q, rta); 193 ret = q->ops->change(q, nla);
194 kfree(rta); 194 kfree(nla);
195 195
196 if (ret == 0) 196 if (ret == 0)
197 return q; 197 return q;
@@ -201,23 +201,31 @@ static struct Qdisc *red_create_dflt(struct Qdisc *sch, u32 limit)
201 return NULL; 201 return NULL;
202} 202}
203 203
204static int red_change(struct Qdisc *sch, struct rtattr *opt) 204static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
205 [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
206 [TCA_RED_STAB] = { .len = RED_STAB_SIZE },
207};
208
209static int red_change(struct Qdisc *sch, struct nlattr *opt)
205{ 210{
206 struct red_sched_data *q = qdisc_priv(sch); 211 struct red_sched_data *q = qdisc_priv(sch);
207 struct rtattr *tb[TCA_RED_MAX]; 212 struct nlattr *tb[TCA_RED_MAX + 1];
208 struct tc_red_qopt *ctl; 213 struct tc_red_qopt *ctl;
209 struct Qdisc *child = NULL; 214 struct Qdisc *child = NULL;
215 int err;
210 216
211 if (opt == NULL || rtattr_parse_nested(tb, TCA_RED_MAX, opt)) 217 if (opt == NULL)
212 return -EINVAL; 218 return -EINVAL;
213 219
214 if (tb[TCA_RED_PARMS-1] == NULL || 220 err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy);
215 RTA_PAYLOAD(tb[TCA_RED_PARMS-1]) < sizeof(*ctl) || 221 if (err < 0)
216 tb[TCA_RED_STAB-1] == NULL || 222 return err;
217 RTA_PAYLOAD(tb[TCA_RED_STAB-1]) < RED_STAB_SIZE) 223
224 if (tb[TCA_RED_PARMS] == NULL ||
225 tb[TCA_RED_STAB] == NULL)
218 return -EINVAL; 226 return -EINVAL;
219 227
220 ctl = RTA_DATA(tb[TCA_RED_PARMS-1]); 228 ctl = nla_data(tb[TCA_RED_PARMS]);
221 229
222 if (ctl->limit > 0) { 230 if (ctl->limit > 0) {
223 child = red_create_dflt(sch, ctl->limit); 231 child = red_create_dflt(sch, ctl->limit);
@@ -235,7 +243,7 @@ static int red_change(struct Qdisc *sch, struct rtattr *opt)
235 243
236 red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, 244 red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
237 ctl->Plog, ctl->Scell_log, 245 ctl->Plog, ctl->Scell_log,
238 RTA_DATA(tb[TCA_RED_STAB-1])); 246 nla_data(tb[TCA_RED_STAB]));
239 247
240 if (skb_queue_empty(&sch->q)) 248 if (skb_queue_empty(&sch->q))
241 red_end_of_idle_period(&q->parms); 249 red_end_of_idle_period(&q->parms);
@@ -244,7 +252,7 @@ static int red_change(struct Qdisc *sch, struct rtattr *opt)
244 return 0; 252 return 0;
245} 253}
246 254
247static int red_init(struct Qdisc* sch, struct rtattr *opt) 255static int red_init(struct Qdisc* sch, struct nlattr *opt)
248{ 256{
249 struct red_sched_data *q = qdisc_priv(sch); 257 struct red_sched_data *q = qdisc_priv(sch);
250 258
@@ -255,7 +263,7 @@ static int red_init(struct Qdisc* sch, struct rtattr *opt)
255static int red_dump(struct Qdisc *sch, struct sk_buff *skb) 263static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
256{ 264{
257 struct red_sched_data *q = qdisc_priv(sch); 265 struct red_sched_data *q = qdisc_priv(sch);
258 struct rtattr *opts = NULL; 266 struct nlattr *opts = NULL;
259 struct tc_red_qopt opt = { 267 struct tc_red_qopt opt = {
260 .limit = q->limit, 268 .limit = q->limit,
261 .flags = q->flags, 269 .flags = q->flags,
@@ -266,12 +274,14 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
266 .Scell_log = q->parms.Scell_log, 274 .Scell_log = q->parms.Scell_log,
267 }; 275 };
268 276
269 opts = RTA_NEST(skb, TCA_OPTIONS); 277 opts = nla_nest_start(skb, TCA_OPTIONS);
270 RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt); 278 if (opts == NULL)
271 return RTA_NEST_END(skb, opts); 279 goto nla_put_failure;
280 NLA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
281 return nla_nest_end(skb, opts);
272 282
273rtattr_failure: 283nla_put_failure:
274 return RTA_NEST_CANCEL(skb, opts); 284 return nla_nest_cancel(skb, opts);
275} 285}
276 286
277static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) 287static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
@@ -332,7 +342,7 @@ static void red_put(struct Qdisc *sch, unsigned long arg)
332} 342}
333 343
334static int red_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 344static int red_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
335 struct rtattr **tca, unsigned long *arg) 345 struct nlattr **tca, unsigned long *arg)
336{ 346{
337 return -ENOSYS; 347 return -ENOSYS;
338} 348}
@@ -359,7 +369,7 @@ static struct tcf_proto **red_find_tcf(struct Qdisc *sch, unsigned long cl)
359 return NULL; 369 return NULL;
360} 370}
361 371
362static struct Qdisc_class_ops red_class_ops = { 372static const struct Qdisc_class_ops red_class_ops = {
363 .graft = red_graft, 373 .graft = red_graft,
364 .leaf = red_leaf, 374 .leaf = red_leaf,
365 .get = red_get, 375 .get = red_get,
@@ -371,7 +381,7 @@ static struct Qdisc_class_ops red_class_ops = {
371 .dump = red_dump_class, 381 .dump = red_dump_class,
372}; 382};
373 383
374static struct Qdisc_ops red_qdisc_ops = { 384static struct Qdisc_ops red_qdisc_ops __read_mostly = {
375 .id = "red", 385 .id = "red",
376 .priv_size = sizeof(struct red_sched_data), 386 .priv_size = sizeof(struct red_sched_data),
377 .cl_ops = &red_class_ops, 387 .cl_ops = &red_class_ops,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index b542c875e154..91af539ab6e6 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -122,7 +122,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
122 { 122 {
123 const struct iphdr *iph = ip_hdr(skb); 123 const struct iphdr *iph = ip_hdr(skb);
124 h = iph->daddr; 124 h = iph->daddr;
125 h2 = iph->saddr^iph->protocol; 125 h2 = iph->saddr ^ iph->protocol;
126 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 126 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
127 (iph->protocol == IPPROTO_TCP || 127 (iph->protocol == IPPROTO_TCP ||
128 iph->protocol == IPPROTO_UDP || 128 iph->protocol == IPPROTO_UDP ||
@@ -137,7 +137,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
137 { 137 {
138 struct ipv6hdr *iph = ipv6_hdr(skb); 138 struct ipv6hdr *iph = ipv6_hdr(skb);
139 h = iph->daddr.s6_addr32[3]; 139 h = iph->daddr.s6_addr32[3];
140 h2 = iph->saddr.s6_addr32[3]^iph->nexthdr; 140 h2 = iph->saddr.s6_addr32[3] ^ iph->nexthdr;
141 if (iph->nexthdr == IPPROTO_TCP || 141 if (iph->nexthdr == IPPROTO_TCP ||
142 iph->nexthdr == IPPROTO_UDP || 142 iph->nexthdr == IPPROTO_UDP ||
143 iph->nexthdr == IPPROTO_UDPLITE || 143 iph->nexthdr == IPPROTO_UDPLITE ||
@@ -148,9 +148,10 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
148 break; 148 break;
149 } 149 }
150 default: 150 default:
151 h = (u32)(unsigned long)skb->dst^skb->protocol; 151 h = (unsigned long)skb->dst ^ skb->protocol;
152 h2 = (u32)(unsigned long)skb->sk; 152 h2 = (unsigned long)skb->sk;
153 } 153 }
154
154 return sfq_fold_hash(q, h, h2); 155 return sfq_fold_hash(q, h, h2);
155} 156}
156 157
@@ -208,7 +209,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
208 drop a packet from it */ 209 drop a packet from it */
209 210
210 if (d > 1) { 211 if (d > 1) {
211 sfq_index x = q->dep[d+SFQ_DEPTH].next; 212 sfq_index x = q->dep[d + SFQ_DEPTH].next;
212 skb = q->qs[x].prev; 213 skb = q->qs[x].prev;
213 len = skb->len; 214 len = skb->len;
214 __skb_unlink(skb, &q->qs[x]); 215 __skb_unlink(skb, &q->qs[x]);
@@ -241,7 +242,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
241} 242}
242 243
243static int 244static int
244sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) 245sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
245{ 246{
246 struct sfq_sched_data *q = qdisc_priv(sch); 247 struct sfq_sched_data *q = qdisc_priv(sch);
247 unsigned hash = sfq_hash(q, skb); 248 unsigned hash = sfq_hash(q, skb);
@@ -252,6 +253,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
252 q->ht[hash] = x = q->dep[SFQ_DEPTH].next; 253 q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
253 q->hash[x] = hash; 254 q->hash[x] = hash;
254 } 255 }
256
255 /* If selected queue has length q->limit, this means that 257 /* If selected queue has length q->limit, this means that
256 * all another queues are empty and that we do simple tail drop, 258 * all another queues are empty and that we do simple tail drop,
257 * i.e. drop _this_ packet. 259 * i.e. drop _this_ packet.
@@ -284,7 +286,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
284} 286}
285 287
286static int 288static int
287sfq_requeue(struct sk_buff *skb, struct Qdisc* sch) 289sfq_requeue(struct sk_buff *skb, struct Qdisc *sch)
288{ 290{
289 struct sfq_sched_data *q = qdisc_priv(sch); 291 struct sfq_sched_data *q = qdisc_priv(sch);
290 unsigned hash = sfq_hash(q, skb); 292 unsigned hash = sfq_hash(q, skb);
@@ -295,6 +297,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
295 q->ht[hash] = x = q->dep[SFQ_DEPTH].next; 297 q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
296 q->hash[x] = hash; 298 q->hash[x] = hash;
297 } 299 }
300
298 sch->qstats.backlog += skb->len; 301 sch->qstats.backlog += skb->len;
299 __skb_queue_head(&q->qs[x], skb); 302 __skb_queue_head(&q->qs[x], skb);
300 /* If selected queue has length q->limit+1, this means that 303 /* If selected queue has length q->limit+1, this means that
@@ -310,6 +313,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
310 kfree_skb(skb); 313 kfree_skb(skb);
311 return NET_XMIT_CN; 314 return NET_XMIT_CN;
312 } 315 }
316
313 sfq_inc(q, x); 317 sfq_inc(q, x);
314 if (q->qs[x].qlen == 1) { /* The flow is new */ 318 if (q->qs[x].qlen == 1) { /* The flow is new */
315 if (q->tail == SFQ_DEPTH) { /* It is the first flow */ 319 if (q->tail == SFQ_DEPTH) { /* It is the first flow */
@@ -322,6 +326,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
322 q->tail = x; 326 q->tail = x;
323 } 327 }
324 } 328 }
329
325 if (++sch->q.qlen <= q->limit) { 330 if (++sch->q.qlen <= q->limit) {
326 sch->qstats.requeues++; 331 sch->qstats.requeues++;
327 return 0; 332 return 0;
@@ -336,7 +341,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
336 341
337 342
338static struct sk_buff * 343static struct sk_buff *
339sfq_dequeue(struct Qdisc* sch) 344sfq_dequeue(struct Qdisc *sch)
340{ 345{
341 struct sfq_sched_data *q = qdisc_priv(sch); 346 struct sfq_sched_data *q = qdisc_priv(sch);
342 struct sk_buff *skb; 347 struct sk_buff *skb;
@@ -373,7 +378,7 @@ sfq_dequeue(struct Qdisc* sch)
373} 378}
374 379
375static void 380static void
376sfq_reset(struct Qdisc* sch) 381sfq_reset(struct Qdisc *sch)
377{ 382{
378 struct sk_buff *skb; 383 struct sk_buff *skb;
379 384
@@ -383,27 +388,27 @@ sfq_reset(struct Qdisc* sch)
383 388
384static void sfq_perturbation(unsigned long arg) 389static void sfq_perturbation(unsigned long arg)
385{ 390{
386 struct Qdisc *sch = (struct Qdisc*)arg; 391 struct Qdisc *sch = (struct Qdisc *)arg;
387 struct sfq_sched_data *q = qdisc_priv(sch); 392 struct sfq_sched_data *q = qdisc_priv(sch);
388 393
389 get_random_bytes(&q->perturbation, 4); 394 q->perturbation = net_random();
390 395
391 if (q->perturb_period) 396 if (q->perturb_period)
392 mod_timer(&q->perturb_timer, jiffies + q->perturb_period); 397 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
393} 398}
394 399
395static int sfq_change(struct Qdisc *sch, struct rtattr *opt) 400static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
396{ 401{
397 struct sfq_sched_data *q = qdisc_priv(sch); 402 struct sfq_sched_data *q = qdisc_priv(sch);
398 struct tc_sfq_qopt *ctl = RTA_DATA(opt); 403 struct tc_sfq_qopt *ctl = nla_data(opt);
399 unsigned int qlen; 404 unsigned int qlen;
400 405
401 if (opt->rta_len < RTA_LENGTH(sizeof(*ctl))) 406 if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
402 return -EINVAL; 407 return -EINVAL;
403 408
404 sch_tree_lock(sch); 409 sch_tree_lock(sch);
405 q->quantum = ctl->quantum ? : psched_mtu(sch->dev); 410 q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
406 q->perturb_period = ctl->perturb_period*HZ; 411 q->perturb_period = ctl->perturb_period * HZ;
407 if (ctl->limit) 412 if (ctl->limit)
408 q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); 413 q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
409 414
@@ -415,41 +420,44 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
415 del_timer(&q->perturb_timer); 420 del_timer(&q->perturb_timer);
416 if (q->perturb_period) { 421 if (q->perturb_period) {
417 mod_timer(&q->perturb_timer, jiffies + q->perturb_period); 422 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
418 get_random_bytes(&q->perturbation, 4); 423 q->perturbation = net_random();
419 } 424 }
420 sch_tree_unlock(sch); 425 sch_tree_unlock(sch);
421 return 0; 426 return 0;
422} 427}
423 428
424static int sfq_init(struct Qdisc *sch, struct rtattr *opt) 429static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
425{ 430{
426 struct sfq_sched_data *q = qdisc_priv(sch); 431 struct sfq_sched_data *q = qdisc_priv(sch);
427 int i; 432 int i;
428 433
429 init_timer(&q->perturb_timer);
430 q->perturb_timer.data = (unsigned long)sch;
431 q->perturb_timer.function = sfq_perturbation; 434 q->perturb_timer.function = sfq_perturbation;
435 q->perturb_timer.data = (unsigned long)sch;;
436 init_timer_deferrable(&q->perturb_timer);
432 437
433 for (i=0; i<SFQ_HASH_DIVISOR; i++) 438 for (i = 0; i < SFQ_HASH_DIVISOR; i++)
434 q->ht[i] = SFQ_DEPTH; 439 q->ht[i] = SFQ_DEPTH;
435 for (i=0; i<SFQ_DEPTH; i++) { 440
441 for (i = 0; i < SFQ_DEPTH; i++) {
436 skb_queue_head_init(&q->qs[i]); 442 skb_queue_head_init(&q->qs[i]);
437 q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH; 443 q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH;
438 q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH; 444 q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH;
439 } 445 }
446
440 q->limit = SFQ_DEPTH - 1; 447 q->limit = SFQ_DEPTH - 1;
441 q->max_depth = 0; 448 q->max_depth = 0;
442 q->tail = SFQ_DEPTH; 449 q->tail = SFQ_DEPTH;
443 if (opt == NULL) { 450 if (opt == NULL) {
444 q->quantum = psched_mtu(sch->dev); 451 q->quantum = psched_mtu(sch->dev);
445 q->perturb_period = 0; 452 q->perturb_period = 0;
446 get_random_bytes(&q->perturbation, 4); 453 q->perturbation = net_random();
447 } else { 454 } else {
448 int err = sfq_change(sch, opt); 455 int err = sfq_change(sch, opt);
449 if (err) 456 if (err)
450 return err; 457 return err;
451 } 458 }
452 for (i=0; i<SFQ_DEPTH; i++) 459
460 for (i = 0; i < SFQ_DEPTH; i++)
453 sfq_link(q, i); 461 sfq_link(q, i);
454 return 0; 462 return 0;
455} 463}
@@ -467,22 +475,22 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
467 struct tc_sfq_qopt opt; 475 struct tc_sfq_qopt opt;
468 476
469 opt.quantum = q->quantum; 477 opt.quantum = q->quantum;
470 opt.perturb_period = q->perturb_period/HZ; 478 opt.perturb_period = q->perturb_period / HZ;
471 479
472 opt.limit = q->limit; 480 opt.limit = q->limit;
473 opt.divisor = SFQ_HASH_DIVISOR; 481 opt.divisor = SFQ_HASH_DIVISOR;
474 opt.flows = q->limit; 482 opt.flows = q->limit;
475 483
476 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 484 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
477 485
478 return skb->len; 486 return skb->len;
479 487
480rtattr_failure: 488nla_put_failure:
481 nlmsg_trim(skb, b); 489 nlmsg_trim(skb, b);
482 return -1; 490 return -1;
483} 491}
484 492
485static struct Qdisc_ops sfq_qdisc_ops = { 493static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
486 .next = NULL, 494 .next = NULL,
487 .cl_ops = NULL, 495 .cl_ops = NULL,
488 .id = "sfq", 496 .id = "sfq",
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index b0d81098b0ee..0b7d78f59d8c 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -245,20 +245,21 @@ static void tbf_reset(struct Qdisc* sch)
245static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit) 245static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit)
246{ 246{
247 struct Qdisc *q; 247 struct Qdisc *q;
248 struct rtattr *rta; 248 struct nlattr *nla;
249 int ret; 249 int ret;
250 250
251 q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops, 251 q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops,
252 TC_H_MAKE(sch->handle, 1)); 252 TC_H_MAKE(sch->handle, 1));
253 if (q) { 253 if (q) {
254 rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); 254 nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)),
255 if (rta) { 255 GFP_KERNEL);
256 rta->rta_type = RTM_NEWQDISC; 256 if (nla) {
257 rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt)); 257 nla->nla_type = RTM_NEWQDISC;
258 ((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit; 258 nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
259 ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
259 260
260 ret = q->ops->change(q, rta); 261 ret = q->ops->change(q, nla);
261 kfree(rta); 262 kfree(nla);
262 263
263 if (ret == 0) 264 if (ret == 0)
264 return q; 265 return q;
@@ -269,30 +270,39 @@ static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit)
269 return NULL; 270 return NULL;
270} 271}
271 272
272static int tbf_change(struct Qdisc* sch, struct rtattr *opt) 273static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
274 [TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) },
275 [TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
276 [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
277};
278
279static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
273{ 280{
274 int err = -EINVAL; 281 int err;
275 struct tbf_sched_data *q = qdisc_priv(sch); 282 struct tbf_sched_data *q = qdisc_priv(sch);
276 struct rtattr *tb[TCA_TBF_PTAB]; 283 struct nlattr *tb[TCA_TBF_PTAB + 1];
277 struct tc_tbf_qopt *qopt; 284 struct tc_tbf_qopt *qopt;
278 struct qdisc_rate_table *rtab = NULL; 285 struct qdisc_rate_table *rtab = NULL;
279 struct qdisc_rate_table *ptab = NULL; 286 struct qdisc_rate_table *ptab = NULL;
280 struct Qdisc *child = NULL; 287 struct Qdisc *child = NULL;
281 int max_size,n; 288 int max_size,n;
282 289
283 if (rtattr_parse_nested(tb, TCA_TBF_PTAB, opt) || 290 err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
284 tb[TCA_TBF_PARMS-1] == NULL || 291 if (err < 0)
285 RTA_PAYLOAD(tb[TCA_TBF_PARMS-1]) < sizeof(*qopt)) 292 return err;
293
294 err = -EINVAL;
295 if (tb[TCA_TBF_PARMS] == NULL)
286 goto done; 296 goto done;
287 297
288 qopt = RTA_DATA(tb[TCA_TBF_PARMS-1]); 298 qopt = nla_data(tb[TCA_TBF_PARMS]);
289 rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB-1]); 299 rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
290 if (rtab == NULL) 300 if (rtab == NULL)
291 goto done; 301 goto done;
292 302
293 if (qopt->peakrate.rate) { 303 if (qopt->peakrate.rate) {
294 if (qopt->peakrate.rate > qopt->rate.rate) 304 if (qopt->peakrate.rate > qopt->rate.rate)
295 ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB-1]); 305 ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
296 if (ptab == NULL) 306 if (ptab == NULL)
297 goto done; 307 goto done;
298 } 308 }
@@ -339,7 +349,7 @@ done:
339 return err; 349 return err;
340} 350}
341 351
342static int tbf_init(struct Qdisc* sch, struct rtattr *opt) 352static int tbf_init(struct Qdisc* sch, struct nlattr *opt)
343{ 353{
344 struct tbf_sched_data *q = qdisc_priv(sch); 354 struct tbf_sched_data *q = qdisc_priv(sch);
345 355
@@ -370,12 +380,12 @@ static void tbf_destroy(struct Qdisc *sch)
370static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) 380static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
371{ 381{
372 struct tbf_sched_data *q = qdisc_priv(sch); 382 struct tbf_sched_data *q = qdisc_priv(sch);
373 unsigned char *b = skb_tail_pointer(skb); 383 struct nlattr *nest;
374 struct rtattr *rta;
375 struct tc_tbf_qopt opt; 384 struct tc_tbf_qopt opt;
376 385
377 rta = (struct rtattr*)b; 386 nest = nla_nest_start(skb, TCA_OPTIONS);
378 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 387 if (nest == NULL)
388 goto nla_put_failure;
379 389
380 opt.limit = q->limit; 390 opt.limit = q->limit;
381 opt.rate = q->R_tab->rate; 391 opt.rate = q->R_tab->rate;
@@ -385,13 +395,13 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
385 memset(&opt.peakrate, 0, sizeof(opt.peakrate)); 395 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
386 opt.mtu = q->mtu; 396 opt.mtu = q->mtu;
387 opt.buffer = q->buffer; 397 opt.buffer = q->buffer;
388 RTA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt); 398 NLA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);
389 rta->rta_len = skb_tail_pointer(skb) - b;
390 399
400 nla_nest_end(skb, nest);
391 return skb->len; 401 return skb->len;
392 402
393rtattr_failure: 403nla_put_failure:
394 nlmsg_trim(skb, b); 404 nla_nest_cancel(skb, nest);
395 return -1; 405 return -1;
396} 406}
397 407
@@ -442,7 +452,7 @@ static void tbf_put(struct Qdisc *sch, unsigned long arg)
442} 452}
443 453
444static int tbf_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 454static int tbf_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
445 struct rtattr **tca, unsigned long *arg) 455 struct nlattr **tca, unsigned long *arg)
446{ 456{
447 return -ENOSYS; 457 return -ENOSYS;
448} 458}
@@ -469,7 +479,7 @@ static struct tcf_proto **tbf_find_tcf(struct Qdisc *sch, unsigned long cl)
469 return NULL; 479 return NULL;
470} 480}
471 481
472static struct Qdisc_class_ops tbf_class_ops = 482static const struct Qdisc_class_ops tbf_class_ops =
473{ 483{
474 .graft = tbf_graft, 484 .graft = tbf_graft,
475 .leaf = tbf_leaf, 485 .leaf = tbf_leaf,
@@ -482,7 +492,7 @@ static struct Qdisc_class_ops tbf_class_ops =
482 .dump = tbf_dump_class, 492 .dump = tbf_dump_class,
483}; 493};
484 494
485static struct Qdisc_ops tbf_qdisc_ops = { 495static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
486 .next = NULL, 496 .next = NULL,
487 .cl_ops = &tbf_class_ops, 497 .cl_ops = &tbf_class_ops,
488 .id = "tbf", 498 .id = "tbf",
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index c0ed06d4a504..1411c7b1fbdc 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -168,7 +168,7 @@ teql_destroy(struct Qdisc* sch)
168 } 168 }
169} 169}
170 170
171static int teql_qdisc_init(struct Qdisc *sch, struct rtattr *opt) 171static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
172{ 172{
173 struct net_device *dev = sch->dev; 173 struct net_device *dev = sch->dev;
174 struct teql_master *m = (struct teql_master*)sch->ops; 174 struct teql_master *m = (struct teql_master*)sch->ops;
diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index 5390bc792159..0b79f869c4ea 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig
@@ -10,6 +10,7 @@ menuconfig IP_SCTP
10 select CRYPTO_HMAC 10 select CRYPTO_HMAC
11 select CRYPTO_SHA1 11 select CRYPTO_SHA1
12 select CRYPTO_MD5 if SCTP_HMAC_MD5 12 select CRYPTO_MD5 if SCTP_HMAC_MD5
13 select LIBCRC32C
13 ---help--- 14 ---help---
14 Stream Control Transmission Protocol 15 Stream Control Transmission Protocol
15 16
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 1da7204d9b42..f5356b9d5ee3 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -9,7 +9,7 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
9 transport.o chunk.o sm_make_chunk.o ulpevent.o \ 9 transport.o chunk.o sm_make_chunk.o ulpevent.o \
10 inqueue.o outqueue.o ulpqueue.o command.o \ 10 inqueue.o outqueue.o ulpqueue.o command.o \
11 tsnmap.o bind_addr.o socket.o primitive.o \ 11 tsnmap.o bind_addr.o socket.o primitive.o \
12 output.o input.o debug.o ssnmap.o proc.o crc32c.o \ 12 output.o input.o debug.o ssnmap.o proc.o \
13 auth.o 13 auth.o
14 14
15sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o 15sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 013e3d3ab0f1..a016e78061f4 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -61,6 +61,7 @@
61 61
62/* Forward declarations for internal functions. */ 62/* Forward declarations for internal functions. */
63static void sctp_assoc_bh_rcv(struct work_struct *work); 63static void sctp_assoc_bh_rcv(struct work_struct *work);
64static void sctp_assoc_free_asconf_acks(struct sctp_association *asoc);
64 65
65 66
66/* 1st Level Abstractions. */ 67/* 1st Level Abstractions. */
@@ -167,11 +168,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
167 sp->autoclose * HZ; 168 sp->autoclose * HZ;
168 169
169 /* Initilizes the timers */ 170 /* Initilizes the timers */
170 for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) { 171 for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i)
171 init_timer(&asoc->timers[i]); 172 setup_timer(&asoc->timers[i], sctp_timer_events[i],
172 asoc->timers[i].function = sctp_timer_events[i]; 173 (unsigned long)asoc);
173 asoc->timers[i].data = (unsigned long) asoc;
174 }
175 174
176 /* Pull default initialization values from the sock options. 175 /* Pull default initialization values from the sock options.
177 * Note: This assumes that the values have already been 176 * Note: This assumes that the values have already been
@@ -244,6 +243,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
244 asoc->addip_serial = asoc->c.initial_tsn; 243 asoc->addip_serial = asoc->c.initial_tsn;
245 244
246 INIT_LIST_HEAD(&asoc->addip_chunk_list); 245 INIT_LIST_HEAD(&asoc->addip_chunk_list);
246 INIT_LIST_HEAD(&asoc->asconf_ack_list);
247 247
248 /* Make an empty list of remote transport addresses. */ 248 /* Make an empty list of remote transport addresses. */
249 INIT_LIST_HEAD(&asoc->peer.transport_addr_list); 249 INIT_LIST_HEAD(&asoc->peer.transport_addr_list);
@@ -433,8 +433,7 @@ void sctp_association_free(struct sctp_association *asoc)
433 asoc->peer.transport_count = 0; 433 asoc->peer.transport_count = 0;
434 434
435 /* Free any cached ASCONF_ACK chunk. */ 435 /* Free any cached ASCONF_ACK chunk. */
436 if (asoc->addip_last_asconf_ack) 436 sctp_assoc_free_asconf_acks(asoc);
437 sctp_chunk_free(asoc->addip_last_asconf_ack);
438 437
439 /* Free any cached ASCONF chunk. */ 438 /* Free any cached ASCONF chunk. */
440 if (asoc->addip_last_asconf) 439 if (asoc->addip_last_asconf)
@@ -732,6 +731,23 @@ struct sctp_transport *sctp_assoc_lookup_paddr(
732 return NULL; 731 return NULL;
733} 732}
734 733
734/* Remove all transports except a give one */
735void sctp_assoc_del_nonprimary_peers(struct sctp_association *asoc,
736 struct sctp_transport *primary)
737{
738 struct sctp_transport *temp;
739 struct sctp_transport *t;
740
741 list_for_each_entry_safe(t, temp, &asoc->peer.transport_addr_list,
742 transports) {
743 /* if the current transport is not the primary one, delete it */
744 if (t != primary)
745 sctp_assoc_rm_peer(asoc, t);
746 }
747
748 return;
749}
750
735/* Engage in transport control operations. 751/* Engage in transport control operations.
736 * Mark the transport up or down and send a notification to the user. 752 * Mark the transport up or down and send a notification to the user.
737 * Select and update the new active and retran paths. 753 * Select and update the new active and retran paths.
@@ -1470,3 +1486,56 @@ retry:
1470 asoc->assoc_id = (sctp_assoc_t) assoc_id; 1486 asoc->assoc_id = (sctp_assoc_t) assoc_id;
1471 return error; 1487 return error;
1472} 1488}
1489
1490/* Free asconf_ack cache */
1491static void sctp_assoc_free_asconf_acks(struct sctp_association *asoc)
1492{
1493 struct sctp_chunk *ack;
1494 struct sctp_chunk *tmp;
1495
1496 list_for_each_entry_safe(ack, tmp, &asoc->asconf_ack_list,
1497 transmitted_list) {
1498 list_del_init(&ack->transmitted_list);
1499 sctp_chunk_free(ack);
1500 }
1501}
1502
1503/* Clean up the ASCONF_ACK queue */
1504void sctp_assoc_clean_asconf_ack_cache(const struct sctp_association *asoc)
1505{
1506 struct sctp_chunk *ack;
1507 struct sctp_chunk *tmp;
1508
1509 /* We can remove all the entries from the queue upto
1510 * the "Peer-Sequence-Number".
1511 */
1512 list_for_each_entry_safe(ack, tmp, &asoc->asconf_ack_list,
1513 transmitted_list) {
1514 if (ack->subh.addip_hdr->serial ==
1515 htonl(asoc->peer.addip_serial))
1516 break;
1517
1518 list_del_init(&ack->transmitted_list);
1519 sctp_chunk_free(ack);
1520 }
1521}
1522
1523/* Find the ASCONF_ACK whose serial number matches ASCONF */
1524struct sctp_chunk *sctp_assoc_lookup_asconf_ack(
1525 const struct sctp_association *asoc,
1526 __be32 serial)
1527{
1528 struct sctp_chunk *ack = NULL;
1529
1530 /* Walk through the list of cached ASCONF-ACKs and find the
1531 * ack chunk whose serial number matches that of the request.
1532 */
1533 list_for_each_entry(ack, &asoc->asconf_ack_list, transmitted_list) {
1534 if (ack->subh.addip_hdr->serial == serial) {
1535 sctp_chunk_hold(ack);
1536 break;
1537 }
1538 }
1539
1540 return ack;
1541}
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 6a7d01091f0c..13fbfb449a55 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -171,7 +171,7 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp)
171 171
172/* Add an address to the bind address list in the SCTP_bind_addr structure. */ 172/* Add an address to the bind address list in the SCTP_bind_addr structure. */
173int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, 173int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
174 __u8 use_as_src, gfp_t gfp) 174 __u8 addr_state, gfp_t gfp)
175{ 175{
176 struct sctp_sockaddr_entry *addr; 176 struct sctp_sockaddr_entry *addr;
177 177
@@ -188,7 +188,7 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
188 if (!addr->a.v4.sin_port) 188 if (!addr->a.v4.sin_port)
189 addr->a.v4.sin_port = htons(bp->port); 189 addr->a.v4.sin_port = htons(bp->port);
190 190
191 addr->use_as_src = use_as_src; 191 addr->state = addr_state;
192 addr->valid = 1; 192 addr->valid = 1;
193 193
194 INIT_LIST_HEAD(&addr->list); 194 INIT_LIST_HEAD(&addr->list);
@@ -312,7 +312,7 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
312 } 312 }
313 313
314 af->from_addr_param(&addr, rawaddr, htons(port), 0); 314 af->from_addr_param(&addr, rawaddr, htons(port), 0);
315 retval = sctp_add_bind_addr(bp, &addr, 1, gfp); 315 retval = sctp_add_bind_addr(bp, &addr, SCTP_ADDR_SRC, gfp);
316 if (retval) { 316 if (retval) {
317 /* Can't finish building the list, clean up. */ 317 /* Can't finish building the list, clean up. */
318 sctp_bind_addr_clean(bp); 318 sctp_bind_addr_clean(bp);
@@ -353,6 +353,32 @@ int sctp_bind_addr_match(struct sctp_bind_addr *bp,
353 return match; 353 return match;
354} 354}
355 355
356/* Get the state of the entry in the bind_addr_list */
357int sctp_bind_addr_state(const struct sctp_bind_addr *bp,
358 const union sctp_addr *addr)
359{
360 struct sctp_sockaddr_entry *laddr;
361 struct sctp_af *af;
362 int state = -1;
363
364 af = sctp_get_af_specific(addr->sa.sa_family);
365 if (unlikely(!af))
366 return state;
367
368 rcu_read_lock();
369 list_for_each_entry_rcu(laddr, &bp->address_list, list) {
370 if (!laddr->valid)
371 continue;
372 if (af->cmp_addr(&laddr->a, addr)) {
373 state = laddr->state;
374 break;
375 }
376 }
377 rcu_read_unlock();
378
379 return state;
380}
381
356/* Find the first address in the bind address list that is not present in 382/* Find the first address in the bind address list that is not present in
357 * the addrs packed array. 383 * the addrs packed array.
358 */ 384 */
@@ -411,7 +437,8 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest,
411 (((AF_INET6 == addr->sa.sa_family) && 437 (((AF_INET6 == addr->sa.sa_family) &&
412 (flags & SCTP_ADDR6_ALLOWED) && 438 (flags & SCTP_ADDR6_ALLOWED) &&
413 (flags & SCTP_ADDR6_PEERSUPP)))) 439 (flags & SCTP_ADDR6_PEERSUPP))))
414 error = sctp_add_bind_addr(dest, addr, 1, gfp); 440 error = sctp_add_bind_addr(dest, addr, SCTP_ADDR_SRC,
441 gfp);
415 } 442 }
416 443
417 return error; 444 return error;
diff --git a/net/sctp/crc32c.c b/net/sctp/crc32c.c
deleted file mode 100644
index 181edabdb8ca..000000000000
--- a/net/sctp/crc32c.c
+++ /dev/null
@@ -1,222 +0,0 @@
1/* SCTP kernel reference Implementation
2 * Copyright (c) 1999-2001 Motorola, Inc.
3 * Copyright (c) 2001-2003 International Business Machines, Corp.
4 *
5 * This file is part of the SCTP kernel reference Implementation
6 *
7 * SCTP Checksum functions
8 *
9 * The SCTP reference implementation is free software;
10 * you can redistribute it and/or modify it under the terms of
11 * the GNU General Public License as published by
12 * the Free Software Foundation; either version 2, or (at your option)
13 * any later version.
14 *
15 * The SCTP reference implementation is distributed in the hope that it
16 * will be useful, but WITHOUT ANY WARRANTY; without even the implied
17 * ************************
18 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
19 * See the GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with GNU CC; see the file COPYING. If not, write to
23 * the Free Software Foundation, 59 Temple Place - Suite 330,
24 * Boston, MA 02111-1307, USA.
25 *
26 * Please send any bug reports or fixes you make to the
27 * email address(es):
28 * lksctp developers <lksctp-developers@lists.sourceforge.net>
29 *
30 * Or submit a bug report through the following website:
31 * http://www.sf.net/projects/lksctp
32 *
33 * Written or modified by:
34 * Dinakaran Joseph
35 * Jon Grimm <jgrimm@us.ibm.com>
36 * Sridhar Samudrala <sri@us.ibm.com>
37 *
38 * Any bugs reported given to us we will try to fix... any fixes shared will
39 * be incorporated into the next SCTP release.
40 */
41
42/* The following code has been taken directly from
43 * draft-ietf-tsvwg-sctpcsum-03.txt
44 *
45 * The code has now been modified specifically for SCTP knowledge.
46 */
47
48#include <linux/types.h>
49#include <net/sctp/sctp.h>
50
51#define CRC32C_POLY 0x1EDC6F41
52#define CRC32C(c,d) (c=(c>>8)^crc_c[(c^(d))&0xFF])
53/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
54/* Copyright 2001, D. Otis. Use this program, code or tables */
55/* extracted from it, as desired without restriction. */
56/* */
57/* 32 Bit Reflected CRC table generation for SCTP. */
58/* To accommodate serial byte data being shifted out least */
59/* significant bit first, the table's 32 bit words are reflected */
60/* which flips both byte and bit MS and LS positions. The CRC */
61/* is calculated MS bits first from the perspective of the serial*/
62/* stream. The x^32 term is implied and the x^0 term may also */
63/* be shown as +1. The polynomial code used is 0x1EDC6F41. */
64/* Castagnoli93 */
65/* x^32+x^28+x^27+x^26+x^25+x^23+x^22+x^20+x^19+x^18+x^14+x^13+ */
66/* x^11+x^10+x^9+x^8+x^6+x^0 */
67/* Guy Castagnoli Stefan Braeuer and Martin Herrman */
68/* "Optimization of Cyclic Redundancy-Check Codes */
69/* with 24 and 32 Parity Bits", */
70/* IEEE Transactions on Communications, Vol.41, No.6, June 1993 */
71/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
72static const __u32 crc_c[256] = {
73 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
74 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
75 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
76 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
77 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
78 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
79 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
80 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
81 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
82 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
83 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
84 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
85 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
86 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
87 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
88 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
89 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
90 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
91 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
92 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
93 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
94 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
95 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
96 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
97 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
98 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
99 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
100 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
101 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
102 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
103 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
104 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
105 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
106 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
107 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
108 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
109 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
110 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
111 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
112 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
113 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
114 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
115 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
116 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
117 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
118 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
119 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
120 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
121 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
122 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
123 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
124 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
125 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
126 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
127 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
128 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
129 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
130 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
131 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
132 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
133 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
134 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
135 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
136 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351,
137};
138
139__u32 sctp_start_cksum(__u8 *buffer, __u16 length)
140{
141 __u32 crc32 = ~(__u32) 0;
142 __u32 i;
143
144 /* Optimize this routine to be SCTP specific, knowing how
145 * to skip the checksum field of the SCTP header.
146 */
147
148 /* Calculate CRC up to the checksum. */
149 for (i = 0; i < (sizeof(struct sctphdr) - sizeof(__u32)); i++)
150 CRC32C(crc32, buffer[i]);
151
152 /* Skip checksum field of the header. */
153 for (i = 0; i < sizeof(__u32); i++)
154 CRC32C(crc32, 0);
155
156 /* Calculate the rest of the CRC. */
157 for (i = sizeof(struct sctphdr); i < length ; i++)
158 CRC32C(crc32, buffer[i]);
159
160 return crc32;
161}
162
163__u32 sctp_update_cksum(__u8 *buffer, __u16 length, __u32 crc32)
164{
165 __u32 i;
166
167 for (i = 0; i < length ; i++)
168 CRC32C(crc32, buffer[i]);
169
170 return crc32;
171}
172
173#if 0
174__u32 sctp_update_copy_cksum(__u8 *to, __u8 *from, __u16 length, __u32 crc32)
175{
176 __u32 i;
177 __u32 *_to = (__u32 *)to;
178 __u32 *_from = (__u32 *)from;
179
180 for (i = 0; i < (length/4); i++) {
181 _to[i] = _from[i];
182 CRC32C(crc32, from[i*4]);
183 CRC32C(crc32, from[i*4+1]);
184 CRC32C(crc32, from[i*4+2]);
185 CRC32C(crc32, from[i*4+3]);
186 }
187
188 return crc32;
189}
190#endif /* 0 */
191
192__u32 sctp_end_cksum(__u32 crc32)
193{
194 __u32 result;
195 __u8 byte0, byte1, byte2, byte3;
196
197 result = ~crc32;
198
199 /* result now holds the negated polynomial remainder;
200 * since the table and algorithm is "reflected" [williams95].
201 * That is, result has the same value as if we mapped the message
202 * to a polyomial, computed the host-bit-order polynomial
203 * remainder, performed final negation, then did an end-for-end
204 * bit-reversal.
205 * Note that a 32-bit bit-reversal is identical to four inplace
206 * 8-bit reversals followed by an end-for-end byteswap.
207 * In other words, the bytes of each bit are in the right order,
208 * but the bytes have been byteswapped. So we now do an explicit
209 * byteswap. On a little-endian machine, this byteswap and
210 * the final ntohl cancel out and could be elided.
211 */
212 byte0 = result & 0xff;
213 byte1 = (result>>8) & 0xff;
214 byte2 = (result>>16) & 0xff;
215 byte3 = (result>>24) & 0xff;
216
217 crc32 = ((byte0 << 24) |
218 (byte1 << 16) |
219 (byte2 << 8) |
220 byte3);
221 return crc32;
222}
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 91ae463b079b..d695f710fc77 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -60,6 +60,7 @@
60#include <net/xfrm.h> 60#include <net/xfrm.h>
61#include <net/sctp/sctp.h> 61#include <net/sctp/sctp.h>
62#include <net/sctp/sm.h> 62#include <net/sctp/sm.h>
63#include <net/sctp/checksum.h>
63 64
64/* Forward declarations for internal helpers. */ 65/* Forward declarations for internal helpers. */
65static int sctp_rcv_ootb(struct sk_buff *); 66static int sctp_rcv_ootb(struct sk_buff *);
@@ -890,14 +891,6 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
890 891
891 ch = (sctp_chunkhdr_t *) skb->data; 892 ch = (sctp_chunkhdr_t *) skb->data;
892 893
893 /* The code below will attempt to walk the chunk and extract
894 * parameter information. Before we do that, we need to verify
895 * that the chunk length doesn't cause overflow. Otherwise, we'll
896 * walk off the end.
897 */
898 if (WORD_ROUND(ntohs(ch->length)) > skb->len)
899 return NULL;
900
901 /* 894 /*
902 * This code will NOT touch anything inside the chunk--it is 895 * This code will NOT touch anything inside the chunk--it is
903 * strictly READ-ONLY. 896 * strictly READ-ONLY.
@@ -934,6 +927,44 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
934 return NULL; 927 return NULL;
935} 928}
936 929
930/* ADD-IP, Section 5.2
931 * When an endpoint receives an ASCONF Chunk from the remote peer
932 * special procedures may be needed to identify the association the
933 * ASCONF Chunk is associated with. To properly find the association
934 * the following procedures SHOULD be followed:
935 *
936 * D2) If the association is not found, use the address found in the
937 * Address Parameter TLV combined with the port number found in the
938 * SCTP common header. If found proceed to rule D4.
939 *
940 * D2-ext) If more than one ASCONF Chunks are packed together, use the
941 * address found in the ASCONF Address Parameter TLV of each of the
942 * subsequent ASCONF Chunks. If found, proceed to rule D4.
943 */
944static struct sctp_association *__sctp_rcv_asconf_lookup(
945 sctp_chunkhdr_t *ch,
946 const union sctp_addr *laddr,
947 __be32 peer_port,
948 struct sctp_transport **transportp)
949{
950 sctp_addip_chunk_t *asconf = (struct sctp_addip_chunk *)ch;
951 struct sctp_af *af;
952 union sctp_addr_param *param;
953 union sctp_addr paddr;
954
955 /* Skip over the ADDIP header and find the Address parameter */
956 param = (union sctp_addr_param *)(asconf + 1);
957
958 af = sctp_get_af_specific(param_type2af(param->v4.param_hdr.type));
959 if (unlikely(!af))
960 return NULL;
961
962 af->from_addr_param(&paddr, param, peer_port, 0);
963
964 return __sctp_lookup_association(laddr, &paddr, transportp);
965}
966
967
937/* SCTP-AUTH, Section 6.3: 968/* SCTP-AUTH, Section 6.3:
938* If the receiver does not find a STCB for a packet containing an AUTH 969* If the receiver does not find a STCB for a packet containing an AUTH
939* chunk as the first chunk and not a COOKIE-ECHO chunk as the second 970* chunk as the first chunk and not a COOKIE-ECHO chunk as the second
@@ -942,20 +973,64 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
942* 973*
943* This means that any chunks that can help us identify the association need 974* This means that any chunks that can help us identify the association need
944* to be looked at to find this assocation. 975* to be looked at to find this assocation.
945*
946* TODO: The only chunk currently defined that can do that is ASCONF, but we
947* don't support that functionality yet.
948*/ 976*/
949static struct sctp_association *__sctp_rcv_auth_lookup(struct sk_buff *skb, 977static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb,
950 const union sctp_addr *paddr,
951 const union sctp_addr *laddr, 978 const union sctp_addr *laddr,
952 struct sctp_transport **transportp) 979 struct sctp_transport **transportp)
953{ 980{
954 /* XXX - walk through the chunks looking for something that can 981 struct sctp_association *asoc = NULL;
955 * help us find the association. INIT, and INIT-ACK are not permitted. 982 sctp_chunkhdr_t *ch;
956 * That leaves ASCONF, but we don't support that yet. 983 int have_auth = 0;
984 unsigned int chunk_num = 1;
985 __u8 *ch_end;
986
987 /* Walk through the chunks looking for AUTH or ASCONF chunks
988 * to help us find the association.
957 */ 989 */
958 return NULL; 990 ch = (sctp_chunkhdr_t *) skb->data;
991 do {
992 /* Break out if chunk length is less then minimal. */
993 if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
994 break;
995
996 ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
997 if (ch_end > skb_tail_pointer(skb))
998 break;
999
1000 switch(ch->type) {
1001 case SCTP_CID_AUTH:
1002 have_auth = chunk_num;
1003 break;
1004
1005 case SCTP_CID_COOKIE_ECHO:
1006 /* If a packet arrives containing an AUTH chunk as
1007 * a first chunk, a COOKIE-ECHO chunk as the second
1008 * chunk, and possibly more chunks after them, and
1009 * the receiver does not have an STCB for that
1010 * packet, then authentication is based on
1011 * the contents of the COOKIE- ECHO chunk.
1012 */
1013 if (have_auth == 1 && chunk_num == 2)
1014 return NULL;
1015 break;
1016
1017 case SCTP_CID_ASCONF:
1018 if (have_auth || sctp_addip_noauth)
1019 asoc = __sctp_rcv_asconf_lookup(ch, laddr,
1020 sctp_hdr(skb)->source,
1021 transportp);
1022 default:
1023 break;
1024 }
1025
1026 if (asoc)
1027 break;
1028
1029 ch = (sctp_chunkhdr_t *) ch_end;
1030 chunk_num++;
1031 } while (ch_end < skb_tail_pointer(skb));
1032
1033 return asoc;
959} 1034}
960 1035
961/* 1036/*
@@ -965,7 +1040,6 @@ static struct sctp_association *__sctp_rcv_auth_lookup(struct sk_buff *skb,
965 * chunks. 1040 * chunks.
966 */ 1041 */
967static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb, 1042static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb,
968 const union sctp_addr *paddr,
969 const union sctp_addr *laddr, 1043 const union sctp_addr *laddr,
970 struct sctp_transport **transportp) 1044 struct sctp_transport **transportp)
971{ 1045{
@@ -973,6 +1047,14 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb,
973 1047
974 ch = (sctp_chunkhdr_t *) skb->data; 1048 ch = (sctp_chunkhdr_t *) skb->data;
975 1049
1050 /* The code below will attempt to walk the chunk and extract
1051 * parameter information. Before we do that, we need to verify
1052 * that the chunk length doesn't cause overflow. Otherwise, we'll
1053 * walk off the end.
1054 */
1055 if (WORD_ROUND(ntohs(ch->length)) > skb->len)
1056 return NULL;
1057
976 /* If this is INIT/INIT-ACK look inside the chunk too. */ 1058 /* If this is INIT/INIT-ACK look inside the chunk too. */
977 switch (ch->type) { 1059 switch (ch->type) {
978 case SCTP_CID_INIT: 1060 case SCTP_CID_INIT:
@@ -980,11 +1062,12 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb,
980 return __sctp_rcv_init_lookup(skb, laddr, transportp); 1062 return __sctp_rcv_init_lookup(skb, laddr, transportp);
981 break; 1063 break;
982 1064
983 case SCTP_CID_AUTH: 1065 default:
984 return __sctp_rcv_auth_lookup(skb, paddr, laddr, transportp); 1066 return __sctp_rcv_walk_lookup(skb, laddr, transportp);
985 break; 1067 break;
986 } 1068 }
987 1069
1070
988 return NULL; 1071 return NULL;
989} 1072}
990 1073
@@ -1003,7 +1086,7 @@ static struct sctp_association *__sctp_rcv_lookup(struct sk_buff *skb,
1003 * parameters within the INIT or INIT-ACK. 1086 * parameters within the INIT or INIT-ACK.
1004 */ 1087 */
1005 if (!asoc) 1088 if (!asoc)
1006 asoc = __sctp_rcv_lookup_harder(skb, paddr, laddr, transportp); 1089 asoc = __sctp_rcv_lookup_harder(skb, laddr, transportp);
1007 1090
1008 return asoc; 1091 return asoc;
1009} 1092}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 7f31ff638bc6..74f106a7a7e9 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -330,7 +330,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
330 list_for_each_entry_rcu(laddr, &bp->address_list, list) { 330 list_for_each_entry_rcu(laddr, &bp->address_list, list) {
331 if (!laddr->valid) 331 if (!laddr->valid)
332 continue; 332 continue;
333 if ((laddr->use_as_src) && 333 if ((laddr->state == SCTP_ADDR_SRC) &&
334 (laddr->a.sa.sa_family == AF_INET6) && 334 (laddr->a.sa.sa_family == AF_INET6) &&
335 (scope <= sctp_scope(&laddr->a))) { 335 (scope <= sctp_scope(&laddr->a))) {
336 bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); 336 bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
@@ -556,7 +556,7 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp)
556 if (!(type & IPV6_ADDR_UNICAST)) 556 if (!(type & IPV6_ADDR_UNICAST))
557 return 0; 557 return 0;
558 558
559 return ipv6_chk_addr(in6, NULL, 0); 559 return ipv6_chk_addr(&init_net, in6, NULL, 0);
560} 560}
561 561
562/* This function checks if the address is a valid address to be used for 562/* This function checks if the address is a valid address to be used for
@@ -858,7 +858,8 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr)
858 dev = dev_get_by_index(&init_net, addr->v6.sin6_scope_id); 858 dev = dev_get_by_index(&init_net, addr->v6.sin6_scope_id);
859 if (!dev) 859 if (!dev)
860 return 0; 860 return 0;
861 if (!ipv6_chk_addr(&addr->v6.sin6_addr, dev, 0)) { 861 if (!ipv6_chk_addr(&init_net, &addr->v6.sin6_addr,
862 dev, 0)) {
862 dev_put(dev); 863 dev_put(dev);
863 return 0; 864 return 0;
864 } 865 }
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 847639d542c0..5e811b91f21c 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -60,6 +60,7 @@
60 60
61#include <net/sctp/sctp.h> 61#include <net/sctp/sctp.h>
62#include <net/sctp/sm.h> 62#include <net/sctp/sm.h>
63#include <net/sctp/checksum.h>
63 64
64/* Forward declarations for private helpers. */ 65/* Forward declarations for private helpers. */
65static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, 66static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet,
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index fa76f235169b..a42af865c2ef 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -716,7 +716,29 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
716 new_transport = chunk->transport; 716 new_transport = chunk->transport;
717 717
718 if (!new_transport) { 718 if (!new_transport) {
719 new_transport = asoc->peer.active_path; 719 /*
720 * If we have a prior transport pointer, see if
721 * the destination address of the chunk
722 * matches the destination address of the
723 * current transport. If not a match, then
724 * try to look up the transport with a given
725 * destination address. We do this because
726 * after processing ASCONFs, we may have new
727 * transports created.
728 */
729 if (transport &&
730 sctp_cmp_addr_exact(&chunk->dest,
731 &transport->ipaddr))
732 new_transport = transport;
733 else
734 new_transport = sctp_assoc_lookup_paddr(asoc,
735 &chunk->dest);
736
737 /* if we still don't have a new transport, then
738 * use the current active path.
739 */
740 if (!new_transport)
741 new_transport = asoc->peer.active_path;
720 } else if ((new_transport->state == SCTP_INACTIVE) || 742 } else if ((new_transport->state == SCTP_INACTIVE) ||
721 (new_transport->state == SCTP_UNCONFIRMED)) { 743 (new_transport->state == SCTP_UNCONFIRMED)) {
722 /* If the chunk is Heartbeat or Heartbeat Ack, 744 /* If the chunk is Heartbeat or Heartbeat Ack,
@@ -729,9 +751,12 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
729 * address of the IP datagram containing the 751 * address of the IP datagram containing the
730 * HEARTBEAT chunk to which this ack is responding. 752 * HEARTBEAT chunk to which this ack is responding.
731 * ... 753 * ...
754 *
755 * ASCONF_ACKs also must be sent to the source.
732 */ 756 */
733 if (chunk->chunk_hdr->type != SCTP_CID_HEARTBEAT && 757 if (chunk->chunk_hdr->type != SCTP_CID_HEARTBEAT &&
734 chunk->chunk_hdr->type != SCTP_CID_HEARTBEAT_ACK) 758 chunk->chunk_hdr->type != SCTP_CID_HEARTBEAT_ACK &&
759 chunk->chunk_hdr->type != SCTP_CID_ASCONF_ACK)
735 new_transport = asoc->peer.active_path; 760 new_transport = asoc->peer.active_path;
736 } 761 }
737 762
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index d50f610d1b02..1339742e49f1 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -229,8 +229,8 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
229 (((AF_INET6 == addr->a.sa.sa_family) && 229 (((AF_INET6 == addr->a.sa.sa_family) &&
230 (copy_flags & SCTP_ADDR6_ALLOWED) && 230 (copy_flags & SCTP_ADDR6_ALLOWED) &&
231 (copy_flags & SCTP_ADDR6_PEERSUPP)))) { 231 (copy_flags & SCTP_ADDR6_PEERSUPP)))) {
232 error = sctp_add_bind_addr(bp, &addr->a, 1, 232 error = sctp_add_bind_addr(bp, &addr->a,
233 GFP_ATOMIC); 233 SCTP_ADDR_SRC, GFP_ATOMIC);
234 if (error) 234 if (error)
235 goto end_copy; 235 goto end_copy;
236 } 236 }
@@ -359,7 +359,7 @@ static int sctp_v4_addr_valid(union sctp_addr *addr,
359 const struct sk_buff *skb) 359 const struct sk_buff *skb)
360{ 360{
361 /* Is this a non-unicast address or a unusable SCTP address? */ 361 /* Is this a non-unicast address or a unusable SCTP address? */
362 if (IS_IPV4_UNUSABLE_ADDRESS(&addr->v4.sin_addr.s_addr)) 362 if (IS_IPV4_UNUSABLE_ADDRESS(addr->v4.sin_addr.s_addr))
363 return 0; 363 return 0;
364 364
365 /* Is this a broadcast address? */ 365 /* Is this a broadcast address? */
@@ -372,7 +372,7 @@ static int sctp_v4_addr_valid(union sctp_addr *addr,
372/* Should this be available for binding? */ 372/* Should this be available for binding? */
373static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) 373static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
374{ 374{
375 int ret = inet_addr_type(addr->v4.sin_addr.s_addr); 375 int ret = inet_addr_type(&init_net, addr->v4.sin_addr.s_addr);
376 376
377 377
378 if (addr->v4.sin_addr.s_addr != INADDR_ANY && 378 if (addr->v4.sin_addr.s_addr != INADDR_ANY &&
@@ -408,13 +408,15 @@ static sctp_scope_t sctp_v4_scope(union sctp_addr *addr)
408 */ 408 */
409 409
410 /* Check for unusable SCTP addresses. */ 410 /* Check for unusable SCTP addresses. */
411 if (IS_IPV4_UNUSABLE_ADDRESS(&addr->v4.sin_addr.s_addr)) { 411 if (IS_IPV4_UNUSABLE_ADDRESS(addr->v4.sin_addr.s_addr)) {
412 retval = SCTP_SCOPE_UNUSABLE; 412 retval = SCTP_SCOPE_UNUSABLE;
413 } else if (LOOPBACK(addr->v4.sin_addr.s_addr)) { 413 } else if (ipv4_is_loopback(addr->v4.sin_addr.s_addr)) {
414 retval = SCTP_SCOPE_LOOPBACK; 414 retval = SCTP_SCOPE_LOOPBACK;
415 } else if (IS_IPV4_LINK_ADDRESS(&addr->v4.sin_addr.s_addr)) { 415 } else if (ipv4_is_linklocal_169(addr->v4.sin_addr.s_addr)) {
416 retval = SCTP_SCOPE_LINK; 416 retval = SCTP_SCOPE_LINK;
417 } else if (IS_IPV4_PRIVATE_ADDRESS(&addr->v4.sin_addr.s_addr)) { 417 } else if (ipv4_is_private_10(addr->v4.sin_addr.s_addr) ||
418 ipv4_is_private_172(addr->v4.sin_addr.s_addr) ||
419 ipv4_is_private_192(addr->v4.sin_addr.s_addr)) {
418 retval = SCTP_SCOPE_PRIVATE; 420 retval = SCTP_SCOPE_PRIVATE;
419 } else { 421 } else {
420 retval = SCTP_SCOPE_GLOBAL; 422 retval = SCTP_SCOPE_GLOBAL;
@@ -452,7 +454,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
452 __FUNCTION__, NIPQUAD(fl.fl4_dst), 454 __FUNCTION__, NIPQUAD(fl.fl4_dst),
453 NIPQUAD(fl.fl4_src)); 455 NIPQUAD(fl.fl4_src));
454 456
455 if (!ip_route_output_key(&rt, &fl)) { 457 if (!ip_route_output_key(&init_net, &rt, &fl)) {
456 dst = &rt->u.dst; 458 dst = &rt->u.dst;
457 } 459 }
458 460
@@ -470,7 +472,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
470 */ 472 */
471 rcu_read_lock(); 473 rcu_read_lock();
472 list_for_each_entry_rcu(laddr, &bp->address_list, list) { 474 list_for_each_entry_rcu(laddr, &bp->address_list, list) {
473 if (!laddr->valid || !laddr->use_as_src) 475 if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC))
474 continue; 476 continue;
475 sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port)); 477 sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port));
476 if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a)) 478 if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a))
@@ -492,10 +494,10 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
492 list_for_each_entry_rcu(laddr, &bp->address_list, list) { 494 list_for_each_entry_rcu(laddr, &bp->address_list, list) {
493 if (!laddr->valid) 495 if (!laddr->valid)
494 continue; 496 continue;
495 if ((laddr->use_as_src) && 497 if ((laddr->state == SCTP_ADDR_SRC) &&
496 (AF_INET == laddr->a.sa.sa_family)) { 498 (AF_INET == laddr->a.sa.sa_family)) {
497 fl.fl4_src = laddr->a.v4.sin_addr.s_addr; 499 fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
498 if (!ip_route_output_key(&rt, &fl)) { 500 if (!ip_route_output_key(&init_net, &rt, &fl)) {
499 dst = &rt->u.dst; 501 dst = &rt->u.dst;
500 goto out_unlock; 502 goto out_unlock;
501 } 503 }
@@ -1107,7 +1109,7 @@ SCTP_STATIC __init int sctp_init(void)
1107 sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1)); 1109 sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1));
1108 sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share); 1110 sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share);
1109 1111
1110 sysctl_sctp_wmem[0] = SK_STREAM_MEM_QUANTUM; 1112 sysctl_sctp_wmem[0] = SK_MEM_QUANTUM;
1111 sysctl_sctp_wmem[1] = 16*1024; 1113 sysctl_sctp_wmem[1] = 16*1024;
1112 sysctl_sctp_wmem[2] = max(64*1024, max_share); 1114 sysctl_sctp_wmem[2] = max(64*1024, max_share);
1113 1115
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 3cc629d3c9ff..dd98763c8b00 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1275,6 +1275,9 @@ nodata:
1275/* Release the memory occupied by a chunk. */ 1275/* Release the memory occupied by a chunk. */
1276static void sctp_chunk_destroy(struct sctp_chunk *chunk) 1276static void sctp_chunk_destroy(struct sctp_chunk *chunk)
1277{ 1277{
1278 BUG_ON(!list_empty(&chunk->list));
1279 list_del_init(&chunk->transmitted_list);
1280
1278 /* Free the chunk skb data and the SCTP_chunk stub itself. */ 1281 /* Free the chunk skb data and the SCTP_chunk stub itself. */
1279 dev_kfree_skb(chunk->skb); 1282 dev_kfree_skb(chunk->skb);
1280 1283
@@ -1285,9 +1288,6 @@ static void sctp_chunk_destroy(struct sctp_chunk *chunk)
1285/* Possibly, free the chunk. */ 1288/* Possibly, free the chunk. */
1286void sctp_chunk_free(struct sctp_chunk *chunk) 1289void sctp_chunk_free(struct sctp_chunk *chunk)
1287{ 1290{
1288 BUG_ON(!list_empty(&chunk->list));
1289 list_del_init(&chunk->transmitted_list);
1290
1291 /* Release our reference on the message tracker. */ 1291 /* Release our reference on the message tracker. */
1292 if (chunk->msg) 1292 if (chunk->msg)
1293 sctp_datamsg_put(chunk->msg); 1293 sctp_datamsg_put(chunk->msg);
@@ -1692,8 +1692,8 @@ no_hmac:
1692 1692
1693 /* Also, add the destination address. */ 1693 /* Also, add the destination address. */
1694 if (list_empty(&retval->base.bind_addr.address_list)) { 1694 if (list_empty(&retval->base.bind_addr.address_list)) {
1695 sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest, 1, 1695 sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest,
1696 GFP_ATOMIC); 1696 SCTP_ADDR_SRC, GFP_ATOMIC);
1697 } 1697 }
1698 1698
1699 retval->next_tsn = retval->c.initial_tsn; 1699 retval->next_tsn = retval->c.initial_tsn;
@@ -1836,6 +1836,39 @@ static int sctp_process_hn_param(const struct sctp_association *asoc,
1836 return 0; 1836 return 0;
1837} 1837}
1838 1838
1839static int sctp_verify_ext_param(union sctp_params param)
1840{
1841 __u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
1842 int have_auth = 0;
1843 int have_asconf = 0;
1844 int i;
1845
1846 for (i = 0; i < num_ext; i++) {
1847 switch (param.ext->chunks[i]) {
1848 case SCTP_CID_AUTH:
1849 have_auth = 1;
1850 break;
1851 case SCTP_CID_ASCONF:
1852 case SCTP_CID_ASCONF_ACK:
1853 have_asconf = 1;
1854 break;
1855 }
1856 }
1857
1858 /* ADD-IP Security: The draft requires us to ABORT or ignore the
1859 * INIT/INIT-ACK if ADD-IP is listed, but AUTH is not. Do this
1860 * only if ADD-IP is turned on and we are not backward-compatible
1861 * mode.
1862 */
1863 if (sctp_addip_noauth)
1864 return 1;
1865
1866 if (sctp_addip_enable && !have_auth && have_asconf)
1867 return 0;
1868
1869 return 1;
1870}
1871
1839static void sctp_process_ext_param(struct sctp_association *asoc, 1872static void sctp_process_ext_param(struct sctp_association *asoc,
1840 union sctp_params param) 1873 union sctp_params param)
1841{ 1874{
@@ -1966,9 +1999,18 @@ static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc,
1966 case SCTP_PARAM_UNRECOGNIZED_PARAMETERS: 1999 case SCTP_PARAM_UNRECOGNIZED_PARAMETERS:
1967 case SCTP_PARAM_ECN_CAPABLE: 2000 case SCTP_PARAM_ECN_CAPABLE:
1968 case SCTP_PARAM_ADAPTATION_LAYER_IND: 2001 case SCTP_PARAM_ADAPTATION_LAYER_IND:
2002 break;
2003
1969 case SCTP_PARAM_SUPPORTED_EXT: 2004 case SCTP_PARAM_SUPPORTED_EXT:
2005 if (!sctp_verify_ext_param(param))
2006 return SCTP_IERROR_ABORT;
1970 break; 2007 break;
1971 2008
2009 case SCTP_PARAM_SET_PRIMARY:
2010 if (sctp_addip_enable)
2011 break;
2012 goto fallthrough;
2013
1972 case SCTP_PARAM_HOST_NAME_ADDRESS: 2014 case SCTP_PARAM_HOST_NAME_ADDRESS:
1973 /* Tell the peer, we won't support this param. */ 2015 /* Tell the peer, we won't support this param. */
1974 sctp_process_hn_param(asoc, param, chunk, err_chunk); 2016 sctp_process_hn_param(asoc, param, chunk, err_chunk);
@@ -2134,10 +2176,11 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
2134 !asoc->peer.peer_hmacs)) 2176 !asoc->peer.peer_hmacs))
2135 asoc->peer.auth_capable = 0; 2177 asoc->peer.auth_capable = 0;
2136 2178
2137 2179 /* In a non-backward compatible mode, if the peer claims
2138 /* If the peer claims support for ADD-IP without support 2180 * support for ADD-IP but not AUTH, the ADD-IP spec states
2139 * for AUTH, disable support for ADD-IP. 2181 * that we MUST ABORT the association. Section 6. The section
2140 * Do this only if backward compatible mode is turned off. 2182 * also give us an option to silently ignore the packet, which
2183 * is what we'll do here.
2141 */ 2184 */
2142 if (!sctp_addip_noauth && 2185 if (!sctp_addip_noauth &&
2143 (asoc->peer.asconf_capable && !asoc->peer.auth_capable)) { 2186 (asoc->peer.asconf_capable && !asoc->peer.auth_capable)) {
@@ -2145,6 +2188,7 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
2145 SCTP_PARAM_DEL_IP | 2188 SCTP_PARAM_DEL_IP |
2146 SCTP_PARAM_SET_PRIMARY); 2189 SCTP_PARAM_SET_PRIMARY);
2147 asoc->peer.asconf_capable = 0; 2190 asoc->peer.asconf_capable = 0;
2191 goto clean_up;
2148 } 2192 }
2149 2193
2150 /* Walk list of transports, removing transports in the UNKNOWN state. */ 2194 /* Walk list of transports, removing transports in the UNKNOWN state. */
@@ -2286,6 +2330,8 @@ static int sctp_process_param(struct sctp_association *asoc,
2286 sctp_scope_t scope; 2330 sctp_scope_t scope;
2287 time_t stale; 2331 time_t stale;
2288 struct sctp_af *af; 2332 struct sctp_af *af;
2333 union sctp_addr_param *addr_param;
2334 struct sctp_transport *t;
2289 2335
2290 /* We maintain all INIT parameters in network byte order all the 2336 /* We maintain all INIT parameters in network byte order all the
2291 * time. This allows us to not worry about whether the parameters 2337 * time. This allows us to not worry about whether the parameters
@@ -2376,6 +2422,26 @@ static int sctp_process_param(struct sctp_association *asoc,
2376 asoc->peer.adaptation_ind = param.aind->adaptation_ind; 2422 asoc->peer.adaptation_ind = param.aind->adaptation_ind;
2377 break; 2423 break;
2378 2424
2425 case SCTP_PARAM_SET_PRIMARY:
2426 addr_param = param.v + sizeof(sctp_addip_param_t);
2427
2428 af = sctp_get_af_specific(param_type2af(param.p->type));
2429 af->from_addr_param(&addr, addr_param,
2430 htons(asoc->peer.port), 0);
2431
2432 /* if the address is invalid, we can't process it.
2433 * XXX: see spec for what to do.
2434 */
2435 if (!af->addr_valid(&addr, NULL, NULL))
2436 break;
2437
2438 t = sctp_assoc_lookup_paddr(asoc, &addr);
2439 if (!t)
2440 break;
2441
2442 sctp_assoc_set_primary(asoc, t);
2443 break;
2444
2379 case SCTP_PARAM_SUPPORTED_EXT: 2445 case SCTP_PARAM_SUPPORTED_EXT:
2380 sctp_process_ext_param(asoc, param); 2446 sctp_process_ext_param(asoc, param);
2381 break; 2447 break;
@@ -2727,7 +2793,6 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
2727 struct sctp_transport *peer; 2793 struct sctp_transport *peer;
2728 struct sctp_af *af; 2794 struct sctp_af *af;
2729 union sctp_addr addr; 2795 union sctp_addr addr;
2730 struct list_head *pos;
2731 union sctp_addr_param *addr_param; 2796 union sctp_addr_param *addr_param;
2732 2797
2733 addr_param = (union sctp_addr_param *) 2798 addr_param = (union sctp_addr_param *)
@@ -2738,8 +2803,24 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
2738 return SCTP_ERROR_INV_PARAM; 2803 return SCTP_ERROR_INV_PARAM;
2739 2804
2740 af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0); 2805 af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0);
2806
2807 /* ADDIP 4.2.1 This parameter MUST NOT contain a broadcast
2808 * or multicast address.
2809 * (note: wildcard is permitted and requires special handling so
2810 * make sure we check for that)
2811 */
2812 if (!af->is_any(&addr) && !af->addr_valid(&addr, NULL, asconf->skb))
2813 return SCTP_ERROR_INV_PARAM;
2814
2741 switch (asconf_param->param_hdr.type) { 2815 switch (asconf_param->param_hdr.type) {
2742 case SCTP_PARAM_ADD_IP: 2816 case SCTP_PARAM_ADD_IP:
2817 /* Section 4.2.1:
2818 * If the address 0.0.0.0 or ::0 is provided, the source
2819 * address of the packet MUST be added.
2820 */
2821 if (af->is_any(&addr))
2822 memcpy(&addr, &asconf->source, sizeof(addr));
2823
2743 /* ADDIP 4.3 D9) If an endpoint receives an ADD IP address 2824 /* ADDIP 4.3 D9) If an endpoint receives an ADD IP address
2744 * request and does not have the local resources to add this 2825 * request and does not have the local resources to add this
2745 * new address to the association, it MUST return an Error 2826 * new address to the association, it MUST return an Error
@@ -2761,8 +2842,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
2761 * MUST send an Error Cause TLV with the error cause set to the 2842 * MUST send an Error Cause TLV with the error cause set to the
2762 * new error code 'Request to Delete Last Remaining IP Address'. 2843 * new error code 'Request to Delete Last Remaining IP Address'.
2763 */ 2844 */
2764 pos = asoc->peer.transport_addr_list.next; 2845 if (asoc->peer.transport_count == 1)
2765 if (pos->next == &asoc->peer.transport_addr_list)
2766 return SCTP_ERROR_DEL_LAST_IP; 2846 return SCTP_ERROR_DEL_LAST_IP;
2767 2847
2768 /* ADDIP 4.3 D8) If a request is received to delete an IP 2848 /* ADDIP 4.3 D8) If a request is received to delete an IP
@@ -2775,9 +2855,27 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
2775 if (sctp_cmp_addr_exact(sctp_source(asconf), &addr)) 2855 if (sctp_cmp_addr_exact(sctp_source(asconf), &addr))
2776 return SCTP_ERROR_DEL_SRC_IP; 2856 return SCTP_ERROR_DEL_SRC_IP;
2777 2857
2778 sctp_assoc_del_peer(asoc, &addr); 2858 /* Section 4.2.2
2859 * If the address 0.0.0.0 or ::0 is provided, all
2860 * addresses of the peer except the source address of the
2861 * packet MUST be deleted.
2862 */
2863 if (af->is_any(&addr)) {
2864 sctp_assoc_set_primary(asoc, asconf->transport);
2865 sctp_assoc_del_nonprimary_peers(asoc,
2866 asconf->transport);
2867 } else
2868 sctp_assoc_del_peer(asoc, &addr);
2779 break; 2869 break;
2780 case SCTP_PARAM_SET_PRIMARY: 2870 case SCTP_PARAM_SET_PRIMARY:
2871 /* ADDIP Section 4.2.4
2872 * If the address 0.0.0.0 or ::0 is provided, the receiver
2873 * MAY mark the source address of the packet as its
2874 * primary.
2875 */
2876 if (af->is_any(&addr))
2877 memcpy(&addr.v4, sctp_source(asconf), sizeof(addr));
2878
2781 peer = sctp_assoc_lookup_paddr(asoc, &addr); 2879 peer = sctp_assoc_lookup_paddr(asoc, &addr);
2782 if (!peer) 2880 if (!peer)
2783 return SCTP_ERROR_INV_PARAM; 2881 return SCTP_ERROR_INV_PARAM;
@@ -2921,11 +3019,9 @@ done:
2921 * after freeing the reference to old asconf ack if any. 3019 * after freeing the reference to old asconf ack if any.
2922 */ 3020 */
2923 if (asconf_ack) { 3021 if (asconf_ack) {
2924 if (asoc->addip_last_asconf_ack)
2925 sctp_chunk_free(asoc->addip_last_asconf_ack);
2926
2927 sctp_chunk_hold(asconf_ack); 3022 sctp_chunk_hold(asconf_ack);
2928 asoc->addip_last_asconf_ack = asconf_ack; 3023 list_add_tail(&asconf_ack->transmitted_list,
3024 &asoc->asconf_ack_list);
2929 } 3025 }
2930 3026
2931 return asconf_ack; 3027 return asconf_ack;
@@ -2959,7 +3055,7 @@ static int sctp_asconf_param_success(struct sctp_association *asoc,
2959 local_bh_disable(); 3055 local_bh_disable();
2960 list_for_each_entry(saddr, &bp->address_list, list) { 3056 list_for_each_entry(saddr, &bp->address_list, list) {
2961 if (sctp_cmp_addr_exact(&saddr->a, &addr)) 3057 if (sctp_cmp_addr_exact(&saddr->a, &addr))
2962 saddr->use_as_src = 1; 3058 saddr->state = SCTP_ADDR_SRC;
2963 } 3059 }
2964 local_bh_enable(); 3060 local_bh_enable();
2965 break; 3061 break;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index d247ed4ee423..61cbd5a8dd0c 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -143,6 +143,12 @@ static sctp_ierror_t sctp_sf_authenticate(const struct sctp_endpoint *ep,
143 const sctp_subtype_t type, 143 const sctp_subtype_t type,
144 struct sctp_chunk *chunk); 144 struct sctp_chunk *chunk);
145 145
146static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
147 const struct sctp_association *asoc,
148 const sctp_subtype_t type,
149 void *arg,
150 sctp_cmd_seq_t *commands);
151
146/* Small helper function that checks if the chunk length 152/* Small helper function that checks if the chunk length
147 * is of the appropriate length. The 'required_length' argument 153 * is of the appropriate length. The 'required_length' argument
148 * is set to be the size of a specific chunk we are testing. 154 * is set to be the size of a specific chunk we are testing.
@@ -475,7 +481,6 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
475 sctp_init_chunk_t *initchunk; 481 sctp_init_chunk_t *initchunk;
476 struct sctp_chunk *err_chunk; 482 struct sctp_chunk *err_chunk;
477 struct sctp_packet *packet; 483 struct sctp_packet *packet;
478 sctp_error_t error;
479 484
480 if (!sctp_vtag_verify(chunk, asoc)) 485 if (!sctp_vtag_verify(chunk, asoc))
481 return sctp_sf_pdiscard(ep, asoc, type, arg, commands); 486 return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
@@ -500,8 +505,12 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
500 (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, 505 (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
501 &err_chunk)) { 506 &err_chunk)) {
502 507
508 sctp_error_t error = SCTP_ERROR_NO_RESOURCE;
509
503 /* This chunk contains fatal error. It is to be discarded. 510 /* This chunk contains fatal error. It is to be discarded.
504 * Send an ABORT, with causes if there is any. 511 * Send an ABORT, with causes. If there are no causes,
512 * then there wasn't enough memory. Just terminate
513 * the association.
505 */ 514 */
506 if (err_chunk) { 515 if (err_chunk) {
507 packet = sctp_abort_pkt_new(ep, asoc, arg, 516 packet = sctp_abort_pkt_new(ep, asoc, arg,
@@ -517,12 +526,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
517 SCTP_PACKET(packet)); 526 SCTP_PACKET(packet));
518 SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); 527 SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
519 error = SCTP_ERROR_INV_PARAM; 528 error = SCTP_ERROR_INV_PARAM;
520 } else {
521 error = SCTP_ERROR_NO_RESOURCE;
522 } 529 }
523 } else {
524 sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
525 error = SCTP_ERROR_INV_PARAM;
526 } 530 }
527 531
528 /* SCTP-AUTH, Section 6.3: 532 /* SCTP-AUTH, Section 6.3:
@@ -2073,11 +2077,20 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
2073 if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) 2077 if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
2074 return sctp_sf_pdiscard(ep, asoc, type, arg, commands); 2078 return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
2075 2079
2080 /* ADD-IP: Special case for ABORT chunks
2081 * F4) One special consideration is that ABORT Chunks arriving
2082 * destined to the IP address being deleted MUST be
2083 * ignored (see Section 5.3.1 for further details).
2084 */
2085 if (SCTP_ADDR_DEL ==
2086 sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
2087 return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
2088
2076 /* Stop the T5-shutdown guard timer. */ 2089 /* Stop the T5-shutdown guard timer. */
2077 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, 2090 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
2078 SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); 2091 SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
2079 2092
2080 return sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands); 2093 return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands);
2081} 2094}
2082 2095
2083/* 2096/*
@@ -2109,6 +2122,15 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
2109 if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) 2122 if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
2110 return sctp_sf_pdiscard(ep, asoc, type, arg, commands); 2123 return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
2111 2124
2125 /* ADD-IP: Special case for ABORT chunks
2126 * F4) One special consideration is that ABORT Chunks arriving
2127 * destined to the IP address being deleted MUST be
2128 * ignored (see Section 5.3.1 for further details).
2129 */
2130 if (SCTP_ADDR_DEL ==
2131 sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
2132 return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
2133
2112 /* Stop the T2-shutdown timer. */ 2134 /* Stop the T2-shutdown timer. */
2113 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, 2135 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
2114 SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN)); 2136 SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN));
@@ -2117,7 +2139,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
2117 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, 2139 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
2118 SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); 2140 SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
2119 2141
2120 return sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands); 2142 return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands);
2121} 2143}
2122 2144
2123/* 2145/*
@@ -2344,8 +2366,6 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
2344 sctp_cmd_seq_t *commands) 2366 sctp_cmd_seq_t *commands)
2345{ 2367{
2346 struct sctp_chunk *chunk = arg; 2368 struct sctp_chunk *chunk = arg;
2347 unsigned len;
2348 __be16 error = SCTP_ERROR_NO_ERROR;
2349 2369
2350 if (!sctp_vtag_verify_either(chunk, asoc)) 2370 if (!sctp_vtag_verify_either(chunk, asoc))
2351 return sctp_sf_pdiscard(ep, asoc, type, arg, commands); 2371 return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
@@ -2363,6 +2383,28 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
2363 if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) 2383 if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
2364 return sctp_sf_pdiscard(ep, asoc, type, arg, commands); 2384 return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
2365 2385
2386 /* ADD-IP: Special case for ABORT chunks
2387 * F4) One special consideration is that ABORT Chunks arriving
2388 * destined to the IP address being deleted MUST be
2389 * ignored (see Section 5.3.1 for further details).
2390 */
2391 if (SCTP_ADDR_DEL ==
2392 sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
2393 return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
2394
2395 return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands);
2396}
2397
2398static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
2399 const struct sctp_association *asoc,
2400 const sctp_subtype_t type,
2401 void *arg,
2402 sctp_cmd_seq_t *commands)
2403{
2404 struct sctp_chunk *chunk = arg;
2405 unsigned len;
2406 __be16 error = SCTP_ERROR_NO_ERROR;
2407
2366 /* See if we have an error cause code in the chunk. */ 2408 /* See if we have an error cause code in the chunk. */
2367 len = ntohs(chunk->chunk_hdr->length); 2409 len = ntohs(chunk->chunk_hdr->length);
2368 if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) 2410 if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr))
@@ -3377,6 +3419,15 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
3377 return sctp_sf_pdiscard(ep, asoc, type, arg, commands); 3419 return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
3378 } 3420 }
3379 3421
3422 /* ADD-IP: Section 4.1.1
3423 * This chunk MUST be sent in an authenticated way by using
3424 * the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk
3425 * is received unauthenticated it MUST be silently discarded as
3426 * described in [I-D.ietf-tsvwg-sctp-auth].
3427 */
3428 if (!sctp_addip_noauth && !chunk->auth)
3429 return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
3430
3380 /* Make sure that the ASCONF ADDIP chunk has a valid length. */ 3431 /* Make sure that the ASCONF ADDIP chunk has a valid length. */
3381 if (!sctp_chunk_length_valid(chunk, sizeof(sctp_addip_chunk_t))) 3432 if (!sctp_chunk_length_valid(chunk, sizeof(sctp_addip_chunk_t)))
3382 return sctp_sf_violation_chunklen(ep, asoc, type, arg, 3433 return sctp_sf_violation_chunklen(ep, asoc, type, arg,
@@ -3393,48 +3444,68 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
3393 3444
3394 /* Verify the ASCONF chunk before processing it. */ 3445 /* Verify the ASCONF chunk before processing it. */
3395 if (!sctp_verify_asconf(asoc, 3446 if (!sctp_verify_asconf(asoc,
3396 (sctp_paramhdr_t *)((void *)addr_param + length), 3447 (sctp_paramhdr_t *)((void *)addr_param + length),
3397 (void *)chunk->chunk_end, 3448 (void *)chunk->chunk_end,
3398 &err_param)) 3449 &err_param))
3399 return sctp_sf_violation_paramlen(ep, asoc, type, 3450 return sctp_sf_violation_paramlen(ep, asoc, type,
3400 (void *)&err_param, commands); 3451 (void *)&err_param, commands);
3401 3452
3402 /* ADDIP 4.2 C1) Compare the value of the serial number to the value 3453 /* ADDIP 5.2 E1) Compare the value of the serial number to the value
3403 * the endpoint stored in a new association variable 3454 * the endpoint stored in a new association variable
3404 * 'Peer-Serial-Number'. 3455 * 'Peer-Serial-Number'.
3405 */ 3456 */
3406 if (serial == asoc->peer.addip_serial + 1) { 3457 if (serial == asoc->peer.addip_serial + 1) {
3407 /* ADDIP 4.2 C2) If the value found in the serial number is 3458 /* If this is the first instance of ASCONF in the packet,
3408 * equal to the ('Peer-Serial-Number' + 1), the endpoint MUST 3459 * we can clean our old ASCONF-ACKs.
3409 * do V1-V5. 3460 */
3461 if (!chunk->has_asconf)
3462 sctp_assoc_clean_asconf_ack_cache(asoc);
3463
3464 /* ADDIP 5.2 E4) When the Sequence Number matches the next one
3465 * expected, process the ASCONF as described below and after
3466 * processing the ASCONF Chunk, append an ASCONF-ACK Chunk to
3467 * the response packet and cache a copy of it (in the event it
3468 * later needs to be retransmitted).
3469 *
3470 * Essentially, do V1-V5.
3410 */ 3471 */
3411 asconf_ack = sctp_process_asconf((struct sctp_association *) 3472 asconf_ack = sctp_process_asconf((struct sctp_association *)
3412 asoc, chunk); 3473 asoc, chunk);
3413 if (!asconf_ack) 3474 if (!asconf_ack)
3414 return SCTP_DISPOSITION_NOMEM; 3475 return SCTP_DISPOSITION_NOMEM;
3415 } else if (serial == asoc->peer.addip_serial) { 3476 } else if (serial < asoc->peer.addip_serial + 1) {
3416 /* ADDIP 4.2 C3) If the value found in the serial number is 3477 /* ADDIP 5.2 E2)
3417 * equal to the value stored in the 'Peer-Serial-Number' 3478 * If the value found in the Sequence Number is less than the
3418 * IMPLEMENTATION NOTE: As an optimization a receiver may wish 3479 * ('Peer- Sequence-Number' + 1), simply skip to the next
3419 * to save the last ASCONF-ACK for some predetermined period of 3480 * ASCONF, and include in the outbound response packet
3420 * time and instead of re-processing the ASCONF (with the same 3481 * any previously cached ASCONF-ACK response that was
3421 * serial number) it may just re-transmit the ASCONF-ACK. 3482 * sent and saved that matches the Sequence Number of the
3483 * ASCONF. Note: It is possible that no cached ASCONF-ACK
3484 * Chunk exists. This will occur when an older ASCONF
3485 * arrives out of order. In such a case, the receiver
3486 * should skip the ASCONF Chunk and not include ASCONF-ACK
3487 * Chunk for that chunk.
3422 */ 3488 */
3423 if (asoc->addip_last_asconf_ack) 3489 asconf_ack = sctp_assoc_lookup_asconf_ack(asoc, hdr->serial);
3424 asconf_ack = asoc->addip_last_asconf_ack; 3490 if (!asconf_ack)
3425 else
3426 return SCTP_DISPOSITION_DISCARD; 3491 return SCTP_DISPOSITION_DISCARD;
3427 } else { 3492 } else {
3428 /* ADDIP 4.2 C4) Otherwise, the ASCONF Chunk is discarded since 3493 /* ADDIP 5.2 E5) Otherwise, the ASCONF Chunk is discarded since
3429 * it must be either a stale packet or from an attacker. 3494 * it must be either a stale packet or from an attacker.
3430 */ 3495 */
3431 return SCTP_DISPOSITION_DISCARD; 3496 return SCTP_DISPOSITION_DISCARD;
3432 } 3497 }
3433 3498
3434 /* ADDIP 4.2 C5) In both cases C2 and C3 the ASCONF-ACK MUST be sent 3499 /* ADDIP 5.2 E6) The destination address of the SCTP packet
3435 * back to the source address contained in the IP header of the ASCONF 3500 * containing the ASCONF-ACK Chunks MUST be the source address of
3436 * being responded to. 3501 * the SCTP packet that held the ASCONF Chunks.
3502 *
3503 * To do this properly, we'll set the destination address of the chunk
3504 * and at the transmit time, will try look up the transport to use.
3505 * Since ASCONFs may be bundled, the correct transport may not be
3506 * created untill we process the entire packet, thus this workaround.
3437 */ 3507 */
3508 asconf_ack->dest = chunk->source;
3438 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(asconf_ack)); 3509 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(asconf_ack));
3439 3510
3440 return SCTP_DISPOSITION_CONSUME; 3511 return SCTP_DISPOSITION_CONSUME;
@@ -3463,6 +3534,15 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
3463 return sctp_sf_pdiscard(ep, asoc, type, arg, commands); 3534 return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
3464 } 3535 }
3465 3536
3537 /* ADD-IP, Section 4.1.2:
3538 * This chunk MUST be sent in an authenticated way by using
3539 * the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk
3540 * is received unauthenticated it MUST be silently discarded as
3541 * described in [I-D.ietf-tsvwg-sctp-auth].
3542 */
3543 if (!sctp_addip_noauth && !asconf_ack->auth)
3544 return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
3545
3466 /* Make sure that the ADDIP chunk has a valid length. */ 3546 /* Make sure that the ADDIP chunk has a valid length. */
3467 if (!sctp_chunk_length_valid(asconf_ack, sizeof(sctp_addip_chunk_t))) 3547 if (!sctp_chunk_length_valid(asconf_ack, sizeof(sctp_addip_chunk_t)))
3468 return sctp_sf_violation_chunklen(ep, asoc, type, arg, 3548 return sctp_sf_violation_chunklen(ep, asoc, type, arg,
@@ -5763,7 +5843,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
5763 /* 5843 /*
5764 * Also try to renege to limit our memory usage in the event that 5844 * Also try to renege to limit our memory usage in the event that
5765 * we are under memory pressure 5845 * we are under memory pressure
5766 * If we can't renege, don't worry about it, the sk_stream_rmem_schedule 5846 * If we can't renege, don't worry about it, the sk_rmem_schedule
5767 * in sctp_ulpevent_make_rcvmsg will drop the frame if we grow our 5847 * in sctp_ulpevent_make_rcvmsg will drop the frame if we grow our
5768 * memory usage too much 5848 * memory usage too much
5769 */ 5849 */
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index a93a4bc8f68f..e6016e41ffa0 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -457,11 +457,11 @@ static const sctp_sm_table_entry_t chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][
457 /* SCTP_STATE_ESTABLISHED */ \ 457 /* SCTP_STATE_ESTABLISHED */ \
458 TYPE_SCTP_FUNC(sctp_sf_do_asconf), \ 458 TYPE_SCTP_FUNC(sctp_sf_do_asconf), \
459 /* SCTP_STATE_SHUTDOWN_PENDING */ \ 459 /* SCTP_STATE_SHUTDOWN_PENDING */ \
460 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 460 TYPE_SCTP_FUNC(sctp_sf_do_asconf), \
461 /* SCTP_STATE_SHUTDOWN_SENT */ \ 461 /* SCTP_STATE_SHUTDOWN_SENT */ \
462 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 462 TYPE_SCTP_FUNC(sctp_sf_do_asconf), \
463 /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ 463 /* SCTP_STATE_SHUTDOWN_RECEIVED */ \
464 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 464 TYPE_SCTP_FUNC(sctp_sf_do_asconf), \
465 /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ 465 /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
466 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 466 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
467} /* TYPE_SCTP_ASCONF */ 467} /* TYPE_SCTP_ASCONF */
@@ -478,11 +478,11 @@ static const sctp_sm_table_entry_t chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][
478 /* SCTP_STATE_ESTABLISHED */ \ 478 /* SCTP_STATE_ESTABLISHED */ \
479 TYPE_SCTP_FUNC(sctp_sf_do_asconf_ack), \ 479 TYPE_SCTP_FUNC(sctp_sf_do_asconf_ack), \
480 /* SCTP_STATE_SHUTDOWN_PENDING */ \ 480 /* SCTP_STATE_SHUTDOWN_PENDING */ \
481 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 481 TYPE_SCTP_FUNC(sctp_sf_do_asconf_ack), \
482 /* SCTP_STATE_SHUTDOWN_SENT */ \ 482 /* SCTP_STATE_SHUTDOWN_SENT */ \
483 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 483 TYPE_SCTP_FUNC(sctp_sf_do_asconf_ack), \
484 /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ 484 /* SCTP_STATE_SHUTDOWN_RECEIVED */ \
485 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 485 TYPE_SCTP_FUNC(sctp_sf_do_asconf_ack), \
486 /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ 486 /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
487 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ 487 TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
488} /* TYPE_SCTP_ASCONF_ACK */ 488} /* TYPE_SCTP_ASCONF_ACK */
@@ -691,11 +691,11 @@ chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
691 /* SCTP_STATE_ESTABLISHED */ \ 691 /* SCTP_STATE_ESTABLISHED */ \
692 TYPE_SCTP_FUNC(sctp_sf_do_prm_asconf), \ 692 TYPE_SCTP_FUNC(sctp_sf_do_prm_asconf), \
693 /* SCTP_STATE_SHUTDOWN_PENDING */ \ 693 /* SCTP_STATE_SHUTDOWN_PENDING */ \
694 TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \ 694 TYPE_SCTP_FUNC(sctp_sf_do_prm_asconf), \
695 /* SCTP_STATE_SHUTDOWN_SENT */ \ 695 /* SCTP_STATE_SHUTDOWN_SENT */ \
696 TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \ 696 TYPE_SCTP_FUNC(sctp_sf_do_prm_asconf), \
697 /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ 697 /* SCTP_STATE_SHUTDOWN_RECEIVED */ \
698 TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \ 698 TYPE_SCTP_FUNC(sctp_sf_do_prm_asconf), \
699 /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ 699 /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
700 TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \ 700 TYPE_SCTP_FUNC(sctp_sf_error_shutdown), \
701} /* TYPE_SCTP_PRIMITIVE_REQUESTHEARTBEAT */ 701} /* TYPE_SCTP_PRIMITIVE_REQUESTHEARTBEAT */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ea9649ca0b2a..710df67a6785 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -174,7 +174,8 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
174 sizeof(struct sctp_chunk); 174 sizeof(struct sctp_chunk);
175 175
176 atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); 176 atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
177 sk_charge_skb(sk, chunk->skb); 177 sk->sk_wmem_queued += chunk->skb->truesize;
178 sk_mem_charge(sk, chunk->skb->truesize);
178} 179}
179 180
180/* Verify that this is a valid address. */ 181/* Verify that this is a valid address. */
@@ -390,7 +391,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
390 /* Add the address to the bind address list. 391 /* Add the address to the bind address list.
391 * Use GFP_ATOMIC since BHs will be disabled. 392 * Use GFP_ATOMIC since BHs will be disabled.
392 */ 393 */
393 ret = sctp_add_bind_addr(bp, addr, 1, GFP_ATOMIC); 394 ret = sctp_add_bind_addr(bp, addr, SCTP_ADDR_SRC, GFP_ATOMIC);
394 395
395 /* Copy back into socket for getsockname() use. */ 396 /* Copy back into socket for getsockname() use. */
396 if (!ret) { 397 if (!ret) {
@@ -585,8 +586,8 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
585 addr = (union sctp_addr *)addr_buf; 586 addr = (union sctp_addr *)addr_buf;
586 af = sctp_get_af_specific(addr->v4.sin_family); 587 af = sctp_get_af_specific(addr->v4.sin_family);
587 memcpy(&saveaddr, addr, af->sockaddr_len); 588 memcpy(&saveaddr, addr, af->sockaddr_len);
588 retval = sctp_add_bind_addr(bp, &saveaddr, 0, 589 retval = sctp_add_bind_addr(bp, &saveaddr,
589 GFP_ATOMIC); 590 SCTP_ADDR_NEW, GFP_ATOMIC);
590 addr_buf += af->sockaddr_len; 591 addr_buf += af->sockaddr_len;
591 } 592 }
592 } 593 }
@@ -777,7 +778,7 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
777 af = sctp_get_af_specific(laddr->v4.sin_family); 778 af = sctp_get_af_specific(laddr->v4.sin_family);
778 list_for_each_entry(saddr, &bp->address_list, list) { 779 list_for_each_entry(saddr, &bp->address_list, list) {
779 if (sctp_cmp_addr_exact(&saddr->a, laddr)) 780 if (sctp_cmp_addr_exact(&saddr->a, laddr))
780 saddr->use_as_src = 0; 781 saddr->state = SCTP_ADDR_DEL;
781 } 782 }
782 addr_buf += af->sockaddr_len; 783 addr_buf += af->sockaddr_len;
783 } 784 }
@@ -6008,7 +6009,8 @@ static void __sctp_write_space(struct sctp_association *asoc)
6008 */ 6009 */
6009 if (sock->fasync_list && 6010 if (sock->fasync_list &&
6010 !(sk->sk_shutdown & SEND_SHUTDOWN)) 6011 !(sk->sk_shutdown & SEND_SHUTDOWN))
6011 sock_wake_async(sock, 2, POLL_OUT); 6012 sock_wake_async(sock,
6013 SOCK_WAKE_SPACE, POLL_OUT);
6012 } 6014 }
6013 } 6015 }
6014} 6016}
@@ -6034,10 +6036,10 @@ static void sctp_wfree(struct sk_buff *skb)
6034 atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); 6036 atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
6035 6037
6036 /* 6038 /*
6037 * This undoes what is done via sk_charge_skb 6039 * This undoes what is done via sctp_set_owner_w and sk_mem_charge
6038 */ 6040 */
6039 sk->sk_wmem_queued -= skb->truesize; 6041 sk->sk_wmem_queued -= skb->truesize;
6040 sk->sk_forward_alloc += skb->truesize; 6042 sk_mem_uncharge(sk, skb->truesize);
6041 6043
6042 sock_wfree(skb); 6044 sock_wfree(skb);
6043 __sctp_write_space(asoc); 6045 __sctp_write_space(asoc);
@@ -6058,9 +6060,9 @@ void sctp_sock_rfree(struct sk_buff *skb)
6058 atomic_sub(event->rmem_len, &sk->sk_rmem_alloc); 6060 atomic_sub(event->rmem_len, &sk->sk_rmem_alloc);
6059 6061
6060 /* 6062 /*
6061 * Mimic the behavior of sk_stream_rfree 6063 * Mimic the behavior of sock_rfree
6062 */ 6064 */
6063 sk->sk_forward_alloc += event->rmem_len; 6065 sk_mem_uncharge(sk, event->rmem_len);
6064} 6066}
6065 6067
6066 6068
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index da4f15734fb1..5eb6ea829b54 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -275,24 +275,10 @@ static ctl_table sctp_table[] = {
275 { .ctl_name = 0 } 275 { .ctl_name = 0 }
276}; 276};
277 277
278static ctl_table sctp_net_table[] = { 278static struct ctl_path sctp_path[] = {
279 { 279 { .procname = "net", .ctl_name = CTL_NET, },
280 .ctl_name = NET_SCTP, 280 { .procname = "sctp", .ctl_name = NET_SCTP, },
281 .procname = "sctp", 281 { }
282 .mode = 0555,
283 .child = sctp_table
284 },
285 { .ctl_name = 0 }
286};
287
288static ctl_table sctp_root_table[] = {
289 {
290 .ctl_name = CTL_NET,
291 .procname = "net",
292 .mode = 0555,
293 .child = sctp_net_table
294 },
295 { .ctl_name = 0 }
296}; 282};
297 283
298static struct ctl_table_header * sctp_sysctl_header; 284static struct ctl_table_header * sctp_sysctl_header;
@@ -300,7 +286,7 @@ static struct ctl_table_header * sctp_sysctl_header;
300/* Sysctl registration. */ 286/* Sysctl registration. */
301void sctp_sysctl_register(void) 287void sctp_sysctl_register(void)
302{ 288{
303 sctp_sysctl_header = register_sysctl_table(sctp_root_table); 289 sctp_sysctl_header = register_sysctl_paths(sctp_path, sctp_table);
304} 290}
305 291
306/* Sysctl deregistration. */ 292/* Sysctl deregistration. */
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index d55ce83a020b..dfa109341aeb 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -99,15 +99,10 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
99 INIT_LIST_HEAD(&peer->send_ready); 99 INIT_LIST_HEAD(&peer->send_ready);
100 INIT_LIST_HEAD(&peer->transports); 100 INIT_LIST_HEAD(&peer->transports);
101 101
102 /* Set up the retransmission timer. */ 102 setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event,
103 init_timer(&peer->T3_rtx_timer); 103 (unsigned long)peer);
104 peer->T3_rtx_timer.function = sctp_generate_t3_rtx_event; 104 setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event,
105 peer->T3_rtx_timer.data = (unsigned long)peer; 105 (unsigned long)peer);
106
107 /* Set up the heartbeat timer. */
108 init_timer(&peer->hb_timer);
109 peer->hb_timer.function = sctp_generate_heartbeat_event;
110 peer->hb_timer.data = (unsigned long)peer;
111 106
112 /* Initialize the 64-bit random nonce sent with heartbeat. */ 107 /* Initialize the 64-bit random nonce sent with heartbeat. */
113 get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); 108 get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce));
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 307314356e16..047c27df98f4 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -700,7 +700,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
700 if (rx_count >= asoc->base.sk->sk_rcvbuf) { 700 if (rx_count >= asoc->base.sk->sk_rcvbuf) {
701 701
702 if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) || 702 if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) ||
703 (!sk_stream_rmem_schedule(asoc->base.sk, chunk->skb))) 703 (!sk_rmem_schedule(asoc->base.sk, chunk->skb->truesize)))
704 goto fail; 704 goto fail;
705 } 705 }
706 706
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 1733fa29a501..c25caefa3bcb 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -1046,7 +1046,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
1046 sctp_ulpq_partial_delivery(ulpq, chunk, gfp); 1046 sctp_ulpq_partial_delivery(ulpq, chunk, gfp);
1047 } 1047 }
1048 1048
1049 sk_stream_mem_reclaim(asoc->base.sk); 1049 sk_mem_reclaim(asoc->base.sk);
1050 return; 1050 return;
1051} 1051}
1052 1052
diff --git a/net/socket.c b/net/socket.c
index 74784dfe8e5b..7651de008502 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -112,6 +112,9 @@ static long compat_sock_ioctl(struct file *file,
112static int sock_fasync(int fd, struct file *filp, int on); 112static int sock_fasync(int fd, struct file *filp, int on);
113static ssize_t sock_sendpage(struct file *file, struct page *page, 113static ssize_t sock_sendpage(struct file *file, struct page *page,
114 int offset, size_t size, loff_t *ppos, int more); 114 int offset, size_t size, loff_t *ppos, int more);
115static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
116 struct pipe_inode_info *pipe, size_t len,
117 unsigned int flags);
115 118
116/* 119/*
117 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear 120 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
@@ -134,6 +137,7 @@ static const struct file_operations socket_file_ops = {
134 .fasync = sock_fasync, 137 .fasync = sock_fasync,
135 .sendpage = sock_sendpage, 138 .sendpage = sock_sendpage,
136 .splice_write = generic_splice_sendpage, 139 .splice_write = generic_splice_sendpage,
140 .splice_read = sock_splice_read,
137}; 141};
138 142
139/* 143/*
@@ -691,6 +695,15 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
691 return sock->ops->sendpage(sock, page, offset, size, flags); 695 return sock->ops->sendpage(sock, page, offset, size, flags);
692} 696}
693 697
698static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
699 struct pipe_inode_info *pipe, size_t len,
700 unsigned int flags)
701{
702 struct socket *sock = file->private_data;
703
704 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
705}
706
694static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, 707static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
695 struct sock_iocb *siocb) 708 struct sock_iocb *siocb)
696{ 709{
@@ -1057,20 +1070,19 @@ int sock_wake_async(struct socket *sock, int how, int band)
1057 if (!sock || !sock->fasync_list) 1070 if (!sock || !sock->fasync_list)
1058 return -1; 1071 return -1;
1059 switch (how) { 1072 switch (how) {
1060 case 1: 1073 case SOCK_WAKE_WAITD:
1061
1062 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) 1074 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1063 break; 1075 break;
1064 goto call_kill; 1076 goto call_kill;
1065 case 2: 1077 case SOCK_WAKE_SPACE:
1066 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags)) 1078 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1067 break; 1079 break;
1068 /* fall through */ 1080 /* fall through */
1069 case 0: 1081 case SOCK_WAKE_IO:
1070call_kill: 1082call_kill:
1071 __kill_fasync(sock->fasync_list, SIGIO, band); 1083 __kill_fasync(sock->fasync_list, SIGIO, band);
1072 break; 1084 break;
1073 case 3: 1085 case SOCK_WAKE_URG:
1074 __kill_fasync(sock->fasync_list, SIGURG, band); 1086 __kill_fasync(sock->fasync_list, SIGURG, band);
1075 } 1087 }
1076 return 0; 1088 return 0;
@@ -1353,17 +1365,17 @@ asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1353 * ready for listening. 1365 * ready for listening.
1354 */ 1366 */
1355 1367
1356int sysctl_somaxconn __read_mostly = SOMAXCONN;
1357
1358asmlinkage long sys_listen(int fd, int backlog) 1368asmlinkage long sys_listen(int fd, int backlog)
1359{ 1369{
1360 struct socket *sock; 1370 struct socket *sock;
1361 int err, fput_needed; 1371 int err, fput_needed;
1372 int somaxconn;
1362 1373
1363 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1374 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1364 if (sock) { 1375 if (sock) {
1365 if ((unsigned)backlog > sysctl_somaxconn) 1376 somaxconn = sock->sk->sk_net->sysctl_somaxconn;
1366 backlog = sysctl_somaxconn; 1377 if ((unsigned)backlog > somaxconn)
1378 backlog = somaxconn;
1367 1379
1368 err = security_socket_listen(sock, backlog); 1380 err = security_socket_listen(sock, backlog);
1369 if (!err) 1381 if (!err)
@@ -1581,16 +1593,11 @@ asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1581 struct msghdr msg; 1593 struct msghdr msg;
1582 struct iovec iov; 1594 struct iovec iov;
1583 int fput_needed; 1595 int fput_needed;
1584 struct file *sock_file;
1585 1596
1586 sock_file = fget_light(fd, &fput_needed); 1597 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1587 err = -EBADF; 1598 if (!sock)
1588 if (!sock_file)
1589 goto out; 1599 goto out;
1590 1600
1591 sock = sock_from_file(sock_file, &err);
1592 if (!sock)
1593 goto out_put;
1594 iov.iov_base = buff; 1601 iov.iov_base = buff;
1595 iov.iov_len = len; 1602 iov.iov_len = len;
1596 msg.msg_name = NULL; 1603 msg.msg_name = NULL;
@@ -1612,7 +1619,7 @@ asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1612 err = sock_sendmsg(sock, &msg, len); 1619 err = sock_sendmsg(sock, &msg, len);
1613 1620
1614out_put: 1621out_put:
1615 fput_light(sock_file, fput_needed); 1622 fput_light(sock->file, fput_needed);
1616out: 1623out:
1617 return err; 1624 return err;
1618} 1625}
@@ -1641,17 +1648,11 @@ asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1641 struct msghdr msg; 1648 struct msghdr msg;
1642 char address[MAX_SOCK_ADDR]; 1649 char address[MAX_SOCK_ADDR];
1643 int err, err2; 1650 int err, err2;
1644 struct file *sock_file;
1645 int fput_needed; 1651 int fput_needed;
1646 1652
1647 sock_file = fget_light(fd, &fput_needed); 1653 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1648 err = -EBADF;
1649 if (!sock_file)
1650 goto out;
1651
1652 sock = sock_from_file(sock_file, &err);
1653 if (!sock) 1654 if (!sock)
1654 goto out_put; 1655 goto out;
1655 1656
1656 msg.msg_control = NULL; 1657 msg.msg_control = NULL;
1657 msg.msg_controllen = 0; 1658 msg.msg_controllen = 0;
@@ -1670,8 +1671,8 @@ asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1670 if (err2 < 0) 1671 if (err2 < 0)
1671 err = err2; 1672 err = err2;
1672 } 1673 }
1673out_put: 1674
1674 fput_light(sock_file, fput_needed); 1675 fput_light(sock->file, fput_needed);
1675out: 1676out:
1676 return err; 1677 return err;
1677} 1678}
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 8e05557414ce..73f053d0cc7a 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1127,6 +1127,7 @@ struct handle {
1127}; 1127};
1128 1128
1129static void *c_start(struct seq_file *m, loff_t *pos) 1129static void *c_start(struct seq_file *m, loff_t *pos)
1130 __acquires(cd->hash_lock)
1130{ 1131{
1131 loff_t n = *pos; 1132 loff_t n = *pos;
1132 unsigned hash, entry; 1133 unsigned hash, entry;
@@ -1183,6 +1184,7 @@ static void *c_next(struct seq_file *m, void *p, loff_t *pos)
1183} 1184}
1184 1185
1185static void c_stop(struct seq_file *m, void *p) 1186static void c_stop(struct seq_file *m, void *p)
1187 __releases(cd->hash_lock)
1186{ 1188{
1187 struct cache_detail *cd = ((struct handle*)m->private)->cd; 1189 struct cache_detail *cd = ((struct handle*)m->private)->cd;
1188 read_unlock(&cd->hash_lock); 1190 read_unlock(&cd->hash_lock);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index c98873f39aec..eed5dd9819cd 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -811,9 +811,8 @@ EXPORT_SYMBOL_GPL(rpc_free);
811void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata) 811void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata)
812{ 812{
813 memset(task, 0, sizeof(*task)); 813 memset(task, 0, sizeof(*task));
814 init_timer(&task->tk_timer); 814 setup_timer(&task->tk_timer, (void (*)(unsigned long))rpc_run_timer,
815 task->tk_timer.data = (unsigned long) task; 815 (unsigned long)task);
816 task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer;
817 atomic_set(&task->tk_count, 1); 816 atomic_set(&task->tk_count, 1);
818 task->tk_client = clnt; 817 task->tk_client = clnt;
819 task->tk_flags = flags; 818 task->tk_flags = flags;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index cd641c8634f0..fb92f51405c5 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1011,9 +1011,8 @@ found:
1011 INIT_LIST_HEAD(&xprt->free); 1011 INIT_LIST_HEAD(&xprt->free);
1012 INIT_LIST_HEAD(&xprt->recv); 1012 INIT_LIST_HEAD(&xprt->recv);
1013 INIT_WORK(&xprt->task_cleanup, xprt_autoclose); 1013 INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
1014 init_timer(&xprt->timer); 1014 setup_timer(&xprt->timer, xprt_init_autodisconnect,
1015 xprt->timer.function = xprt_init_autodisconnect; 1015 (unsigned long)xprt);
1016 xprt->timer.data = (unsigned long) xprt;
1017 xprt->last_used = jiffies; 1016 xprt->last_used = jiffies;
1018 xprt->cwnd = RPC_INITCWND; 1017 xprt->cwnd = RPC_INITCWND;
1019 xprt->bind_index = 0; 1018 xprt->bind_index = 0;
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index ee8de7af2a5b..1aa1580cda6d 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -380,7 +380,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
380 headerp->rm_xid = rqst->rq_xid; 380 headerp->rm_xid = rqst->rq_xid;
381 headerp->rm_vers = xdr_one; 381 headerp->rm_vers = xdr_one;
382 headerp->rm_credit = htonl(r_xprt->rx_buf.rb_max_requests); 382 headerp->rm_credit = htonl(r_xprt->rx_buf.rb_max_requests);
383 headerp->rm_type = __constant_htonl(RDMA_MSG); 383 headerp->rm_type = htonl(RDMA_MSG);
384 384
385 /* 385 /*
386 * Chunks needed for results? 386 * Chunks needed for results?
@@ -458,11 +458,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
458 RPCRDMA_INLINE_PAD_VALUE(rqst)); 458 RPCRDMA_INLINE_PAD_VALUE(rqst));
459 459
460 if (padlen) { 460 if (padlen) {
461 headerp->rm_type = __constant_htonl(RDMA_MSGP); 461 headerp->rm_type = htonl(RDMA_MSGP);
462 headerp->rm_body.rm_padded.rm_align = 462 headerp->rm_body.rm_padded.rm_align =
463 htonl(RPCRDMA_INLINE_PAD_VALUE(rqst)); 463 htonl(RPCRDMA_INLINE_PAD_VALUE(rqst));
464 headerp->rm_body.rm_padded.rm_thresh = 464 headerp->rm_body.rm_padded.rm_thresh =
465 __constant_htonl(RPCRDMA_INLINE_PAD_THRESH); 465 htonl(RPCRDMA_INLINE_PAD_THRESH);
466 headerp->rm_body.rm_padded.rm_pempty[0] = xdr_zero; 466 headerp->rm_body.rm_padded.rm_pempty[0] = xdr_zero;
467 headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; 467 headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
468 headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; 468 headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 2f630a512ab7..6fa52f44de0f 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -838,8 +838,12 @@ static void xs_udp_data_ready(struct sock *sk, int len)
838 copied = repsize; 838 copied = repsize;
839 839
840 /* Suck it into the iovec, verify checksum if not done by hw. */ 840 /* Suck it into the iovec, verify checksum if not done by hw. */
841 if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) 841 if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) {
842 UDPX_INC_STATS_BH(sk, UDP_MIB_INERRORS);
842 goto out_unlock; 843 goto out_unlock;
844 }
845
846 UDPX_INC_STATS_BH(sk, UDP_MIB_INDATAGRAMS);
843 847
844 /* Something worked... */ 848 /* Something worked... */
845 dst_confirm(skb->dst); 849 dst_confirm(skb->dst);
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index cd4eafbab1b8..665e856675a4 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -14,6 +14,7 @@
14 14
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/sysctl.h> 16#include <linux/sysctl.h>
17#include <linux/nsproxy.h>
17 18
18#include <net/sock.h> 19#include <net/sock.h>
19 20
@@ -29,28 +30,58 @@
29#include <linux/if_tr.h> 30#include <linux/if_tr.h>
30#endif 31#endif
31 32
32struct ctl_table net_table[] = { 33static struct list_head *
33 { 34net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces)
34 .ctl_name = NET_CORE, 35{
35 .procname = "core", 36 return &namespaces->net_ns->sysctl_table_headers;
36 .mode = 0555, 37}
37 .child = core_table, 38
38 }, 39static struct ctl_table_root net_sysctl_root = {
39#ifdef CONFIG_INET 40 .lookup = net_ctl_header_lookup,
40 { 41};
41 .ctl_name = NET_IPV4, 42
42 .procname = "ipv4", 43static int sysctl_net_init(struct net *net)
43 .mode = 0555, 44{
44 .child = ipv4_table 45 INIT_LIST_HEAD(&net->sysctl_table_headers);
45 }, 46 return 0;
46#endif 47}
47#ifdef CONFIG_TR 48
48 { 49static void sysctl_net_exit(struct net *net)
49 .ctl_name = NET_TR, 50{
50 .procname = "token-ring", 51 WARN_ON(!list_empty(&net->sysctl_table_headers));
51 .mode = 0555, 52 return;
52 .child = tr_table, 53}
53 }, 54
54#endif 55static struct pernet_operations sysctl_pernet_ops = {
55 { 0 }, 56 .init = sysctl_net_init,
57 .exit = sysctl_net_exit,
56}; 58};
59
60static __init int sysctl_init(void)
61{
62 int ret;
63 ret = register_pernet_subsys(&sysctl_pernet_ops);
64 if (ret)
65 goto out;
66 register_sysctl_root(&net_sysctl_root);
67out:
68 return ret;
69}
70subsys_initcall(sysctl_init);
71
72struct ctl_table_header *register_net_sysctl_table(struct net *net,
73 const struct ctl_path *path, struct ctl_table *table)
74{
75 struct nsproxy namespaces;
76 namespaces = *current->nsproxy;
77 namespaces.net_ns = net;
78 return __register_sysctl_paths(&net_sysctl_root,
79 &namespaces, path, table);
80}
81EXPORT_SYMBOL_GPL(register_net_sysctl_table);
82
83void unregister_net_sysctl_table(struct ctl_table_header *header)
84{
85 return unregister_sysctl_table(header);
86}
87EXPORT_SYMBOL_GPL(unregister_net_sysctl_table);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index e40ada964d6e..feabca580820 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -212,9 +212,7 @@ static inline void k_init_timer(struct timer_list *timer, Handler routine,
212 unsigned long argument) 212 unsigned long argument)
213{ 213{
214 dbg("initializing timer %p\n", timer); 214 dbg("initializing timer %p\n", timer);
215 init_timer(timer); 215 setup_timer(timer, routine, argument);
216 timer->function = routine;
217 timer->data = argument;
218} 216}
219 217
220/** 218/**
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 76088153524c..f508614ca59b 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -340,7 +340,7 @@ int tipc_portunreliable(u32 ref, unsigned int *isunreliable)
340 if (!p_ptr) 340 if (!p_ptr)
341 return -EINVAL; 341 return -EINVAL;
342 *isunreliable = port_unreliable(p_ptr); 342 *isunreliable = port_unreliable(p_ptr);
343 spin_unlock_bh(p_ptr->publ.lock); 343 tipc_port_unlock(p_ptr);
344 return TIPC_OK; 344 return TIPC_OK;
345} 345}
346 346
@@ -369,7 +369,7 @@ int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable)
369 if (!p_ptr) 369 if (!p_ptr)
370 return -EINVAL; 370 return -EINVAL;
371 *isunrejectable = port_unreturnable(p_ptr); 371 *isunrejectable = port_unreturnable(p_ptr);
372 spin_unlock_bh(p_ptr->publ.lock); 372 tipc_port_unlock(p_ptr);
373 return TIPC_OK; 373 return TIPC_OK;
374} 374}
375 375
@@ -843,7 +843,7 @@ static void port_dispatcher_sigh(void *dummy)
843 u32 peer_port = port_peerport(p_ptr); 843 u32 peer_port = port_peerport(p_ptr);
844 u32 peer_node = port_peernode(p_ptr); 844 u32 peer_node = port_peernode(p_ptr);
845 845
846 spin_unlock_bh(p_ptr->publ.lock); 846 tipc_port_unlock(p_ptr);
847 if (unlikely(!connected)) { 847 if (unlikely(!connected)) {
848 if (unlikely(published)) 848 if (unlikely(published))
849 goto reject; 849 goto reject;
@@ -867,7 +867,7 @@ static void port_dispatcher_sigh(void *dummy)
867 case TIPC_DIRECT_MSG:{ 867 case TIPC_DIRECT_MSG:{
868 tipc_msg_event cb = up_ptr->msg_cb; 868 tipc_msg_event cb = up_ptr->msg_cb;
869 869
870 spin_unlock_bh(p_ptr->publ.lock); 870 tipc_port_unlock(p_ptr);
871 if (unlikely(connected)) 871 if (unlikely(connected))
872 goto reject; 872 goto reject;
873 if (unlikely(!cb)) 873 if (unlikely(!cb))
@@ -882,7 +882,7 @@ static void port_dispatcher_sigh(void *dummy)
882 case TIPC_NAMED_MSG:{ 882 case TIPC_NAMED_MSG:{
883 tipc_named_msg_event cb = up_ptr->named_msg_cb; 883 tipc_named_msg_event cb = up_ptr->named_msg_cb;
884 884
885 spin_unlock_bh(p_ptr->publ.lock); 885 tipc_port_unlock(p_ptr);
886 if (unlikely(connected)) 886 if (unlikely(connected))
887 goto reject; 887 goto reject;
888 if (unlikely(!cb)) 888 if (unlikely(!cb))
@@ -913,7 +913,7 @@ err:
913 u32 peer_port = port_peerport(p_ptr); 913 u32 peer_port = port_peerport(p_ptr);
914 u32 peer_node = port_peernode(p_ptr); 914 u32 peer_node = port_peernode(p_ptr);
915 915
916 spin_unlock_bh(p_ptr->publ.lock); 916 tipc_port_unlock(p_ptr);
917 if (!connected || !cb) 917 if (!connected || !cb)
918 break; 918 break;
919 if (msg_origport(msg) != peer_port) 919 if (msg_origport(msg) != peer_port)
@@ -929,7 +929,7 @@ err:
929 case TIPC_DIRECT_MSG:{ 929 case TIPC_DIRECT_MSG:{
930 tipc_msg_err_event cb = up_ptr->err_cb; 930 tipc_msg_err_event cb = up_ptr->err_cb;
931 931
932 spin_unlock_bh(p_ptr->publ.lock); 932 tipc_port_unlock(p_ptr);
933 if (connected || !cb) 933 if (connected || !cb)
934 break; 934 break;
935 skb_pull(buf, msg_hdr_sz(msg)); 935 skb_pull(buf, msg_hdr_sz(msg));
@@ -942,7 +942,7 @@ err:
942 tipc_named_msg_err_event cb = 942 tipc_named_msg_err_event cb =
943 up_ptr->named_err_cb; 943 up_ptr->named_err_cb;
944 944
945 spin_unlock_bh(p_ptr->publ.lock); 945 tipc_port_unlock(p_ptr);
946 if (connected || !cb) 946 if (connected || !cb)
947 break; 947 break;
948 dseq.type = msg_nametype(msg); 948 dseq.type = msg_nametype(msg);
@@ -1107,7 +1107,7 @@ int tipc_portimportance(u32 ref, unsigned int *importance)
1107 if (!p_ptr) 1107 if (!p_ptr)
1108 return -EINVAL; 1108 return -EINVAL;
1109 *importance = (unsigned int)msg_importance(&p_ptr->publ.phdr); 1109 *importance = (unsigned int)msg_importance(&p_ptr->publ.phdr);
1110 spin_unlock_bh(p_ptr->publ.lock); 1110 tipc_port_unlock(p_ptr);
1111 return TIPC_OK; 1111 return TIPC_OK;
1112} 1112}
1113 1113
@@ -1122,7 +1122,7 @@ int tipc_set_portimportance(u32 ref, unsigned int imp)
1122 if (!p_ptr) 1122 if (!p_ptr)
1123 return -EINVAL; 1123 return -EINVAL;
1124 msg_set_importance(&p_ptr->publ.phdr, (u32)imp); 1124 msg_set_importance(&p_ptr->publ.phdr, (u32)imp);
1125 spin_unlock_bh(p_ptr->publ.lock); 1125 tipc_port_unlock(p_ptr);
1126 return TIPC_OK; 1126 return TIPC_OK;
1127} 1127}
1128 1128
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 060bba4567d2..eea75888805e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -117,8 +117,6 @@
117#include <net/checksum.h> 117#include <net/checksum.h>
118#include <linux/security.h> 118#include <linux/security.h>
119 119
120int sysctl_unix_max_dgram_qlen __read_mostly = 10;
121
122static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; 120static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
123static DEFINE_SPINLOCK(unix_table_lock); 121static DEFINE_SPINLOCK(unix_table_lock);
124static atomic_t unix_nr_socks = ATOMIC_INIT(0); 122static atomic_t unix_nr_socks = ATOMIC_INIT(0);
@@ -127,32 +125,6 @@ static atomic_t unix_nr_socks = ATOMIC_INIT(0);
127 125
128#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE) 126#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
129 127
130static struct sock *first_unix_socket(int *i)
131{
132 for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
133 if (!hlist_empty(&unix_socket_table[*i]))
134 return __sk_head(&unix_socket_table[*i]);
135 }
136 return NULL;
137}
138
139static struct sock *next_unix_socket(int *i, struct sock *s)
140{
141 struct sock *next = sk_next(s);
142 /* More in this chain? */
143 if (next)
144 return next;
145 /* Look for next non-empty chain. */
146 for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
147 if (!hlist_empty(&unix_socket_table[*i]))
148 return __sk_head(&unix_socket_table[*i]);
149 }
150 return NULL;
151}
152
153#define forall_unix_sockets(i, s) \
154 for (s = first_unix_socket(&(i)); s; s = next_unix_socket(&(i),(s)))
155
156#ifdef CONFIG_SECURITY_NETWORK 128#ifdef CONFIG_SECURITY_NETWORK
157static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 129static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
158{ 130{
@@ -270,7 +242,8 @@ static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
270 spin_unlock(&unix_table_lock); 242 spin_unlock(&unix_table_lock);
271} 243}
272 244
273static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname, 245static struct sock *__unix_find_socket_byname(struct net *net,
246 struct sockaddr_un *sunname,
274 int len, int type, unsigned hash) 247 int len, int type, unsigned hash)
275{ 248{
276 struct sock *s; 249 struct sock *s;
@@ -279,6 +252,9 @@ static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
279 sk_for_each(s, node, &unix_socket_table[hash ^ type]) { 252 sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
280 struct unix_sock *u = unix_sk(s); 253 struct unix_sock *u = unix_sk(s);
281 254
255 if (s->sk_net != net)
256 continue;
257
282 if (u->addr->len == len && 258 if (u->addr->len == len &&
283 !memcmp(u->addr->name, sunname, len)) 259 !memcmp(u->addr->name, sunname, len))
284 goto found; 260 goto found;
@@ -288,21 +264,22 @@ found:
288 return s; 264 return s;
289} 265}
290 266
291static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname, 267static inline struct sock *unix_find_socket_byname(struct net *net,
268 struct sockaddr_un *sunname,
292 int len, int type, 269 int len, int type,
293 unsigned hash) 270 unsigned hash)
294{ 271{
295 struct sock *s; 272 struct sock *s;
296 273
297 spin_lock(&unix_table_lock); 274 spin_lock(&unix_table_lock);
298 s = __unix_find_socket_byname(sunname, len, type, hash); 275 s = __unix_find_socket_byname(net, sunname, len, type, hash);
299 if (s) 276 if (s)
300 sock_hold(s); 277 sock_hold(s);
301 spin_unlock(&unix_table_lock); 278 spin_unlock(&unix_table_lock);
302 return s; 279 return s;
303} 280}
304 281
305static struct sock *unix_find_socket_byinode(struct inode *i) 282static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
306{ 283{
307 struct sock *s; 284 struct sock *s;
308 struct hlist_node *node; 285 struct hlist_node *node;
@@ -312,6 +289,9 @@ static struct sock *unix_find_socket_byinode(struct inode *i)
312 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { 289 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
313 struct dentry *dentry = unix_sk(s)->dentry; 290 struct dentry *dentry = unix_sk(s)->dentry;
314 291
292 if (s->sk_net != net)
293 continue;
294
315 if(dentry && dentry->d_inode == i) 295 if(dentry && dentry->d_inode == i)
316 { 296 {
317 sock_hold(s); 297 sock_hold(s);
@@ -335,7 +315,7 @@ static void unix_write_space(struct sock *sk)
335 if (unix_writable(sk)) { 315 if (unix_writable(sk)) {
336 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 316 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
337 wake_up_interruptible_sync(sk->sk_sleep); 317 wake_up_interruptible_sync(sk->sk_sleep);
338 sk_wake_async(sk, 2, POLL_OUT); 318 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
339 } 319 }
340 read_unlock(&sk->sk_callback_lock); 320 read_unlock(&sk->sk_callback_lock);
341} 321}
@@ -421,7 +401,7 @@ static int unix_release_sock (struct sock *sk, int embrion)
421 unix_state_unlock(skpair); 401 unix_state_unlock(skpair);
422 skpair->sk_state_change(skpair); 402 skpair->sk_state_change(skpair);
423 read_lock(&skpair->sk_callback_lock); 403 read_lock(&skpair->sk_callback_lock);
424 sk_wake_async(skpair,1,POLL_HUP); 404 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
425 read_unlock(&skpair->sk_callback_lock); 405 read_unlock(&skpair->sk_callback_lock);
426 } 406 }
427 sock_put(skpair); /* It may now die */ 407 sock_put(skpair); /* It may now die */
@@ -612,7 +592,7 @@ static struct sock * unix_create1(struct net *net, struct socket *sock)
612 &af_unix_sk_receive_queue_lock_key); 592 &af_unix_sk_receive_queue_lock_key);
613 593
614 sk->sk_write_space = unix_write_space; 594 sk->sk_write_space = unix_write_space;
615 sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen; 595 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
616 sk->sk_destruct = unix_sock_destructor; 596 sk->sk_destruct = unix_sock_destructor;
617 u = unix_sk(sk); 597 u = unix_sk(sk);
618 u->dentry = NULL; 598 u->dentry = NULL;
@@ -631,9 +611,6 @@ out:
631 611
632static int unix_create(struct net *net, struct socket *sock, int protocol) 612static int unix_create(struct net *net, struct socket *sock, int protocol)
633{ 613{
634 if (net != &init_net)
635 return -EAFNOSUPPORT;
636
637 if (protocol && protocol != PF_UNIX) 614 if (protocol && protocol != PF_UNIX)
638 return -EPROTONOSUPPORT; 615 return -EPROTONOSUPPORT;
639 616
@@ -677,6 +654,7 @@ static int unix_release(struct socket *sock)
677static int unix_autobind(struct socket *sock) 654static int unix_autobind(struct socket *sock)
678{ 655{
679 struct sock *sk = sock->sk; 656 struct sock *sk = sock->sk;
657 struct net *net = sk->sk_net;
680 struct unix_sock *u = unix_sk(sk); 658 struct unix_sock *u = unix_sk(sk);
681 static u32 ordernum = 1; 659 static u32 ordernum = 1;
682 struct unix_address * addr; 660 struct unix_address * addr;
@@ -703,7 +681,7 @@ retry:
703 spin_lock(&unix_table_lock); 681 spin_lock(&unix_table_lock);
704 ordernum = (ordernum+1)&0xFFFFF; 682 ordernum = (ordernum+1)&0xFFFFF;
705 683
706 if (__unix_find_socket_byname(addr->name, addr->len, sock->type, 684 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
707 addr->hash)) { 685 addr->hash)) {
708 spin_unlock(&unix_table_lock); 686 spin_unlock(&unix_table_lock);
709 /* Sanity yield. It is unusual case, but yet... */ 687 /* Sanity yield. It is unusual case, but yet... */
@@ -723,7 +701,8 @@ out: mutex_unlock(&u->readlock);
723 return err; 701 return err;
724} 702}
725 703
726static struct sock *unix_find_other(struct sockaddr_un *sunname, int len, 704static struct sock *unix_find_other(struct net *net,
705 struct sockaddr_un *sunname, int len,
727 int type, unsigned hash, int *error) 706 int type, unsigned hash, int *error)
728{ 707{
729 struct sock *u; 708 struct sock *u;
@@ -741,7 +720,7 @@ static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
741 err = -ECONNREFUSED; 720 err = -ECONNREFUSED;
742 if (!S_ISSOCK(nd.dentry->d_inode->i_mode)) 721 if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
743 goto put_fail; 722 goto put_fail;
744 u=unix_find_socket_byinode(nd.dentry->d_inode); 723 u=unix_find_socket_byinode(net, nd.dentry->d_inode);
745 if (!u) 724 if (!u)
746 goto put_fail; 725 goto put_fail;
747 726
@@ -757,7 +736,7 @@ static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
757 } 736 }
758 } else { 737 } else {
759 err = -ECONNREFUSED; 738 err = -ECONNREFUSED;
760 u=unix_find_socket_byname(sunname, len, type, hash); 739 u=unix_find_socket_byname(net, sunname, len, type, hash);
761 if (u) { 740 if (u) {
762 struct dentry *dentry; 741 struct dentry *dentry;
763 dentry = unix_sk(u)->dentry; 742 dentry = unix_sk(u)->dentry;
@@ -779,6 +758,7 @@ fail:
779static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 758static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
780{ 759{
781 struct sock *sk = sock->sk; 760 struct sock *sk = sock->sk;
761 struct net *net = sk->sk_net;
782 struct unix_sock *u = unix_sk(sk); 762 struct unix_sock *u = unix_sk(sk);
783 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; 763 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
784 struct dentry * dentry = NULL; 764 struct dentry * dentry = NULL;
@@ -853,7 +833,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
853 833
854 if (!sunaddr->sun_path[0]) { 834 if (!sunaddr->sun_path[0]) {
855 err = -EADDRINUSE; 835 err = -EADDRINUSE;
856 if (__unix_find_socket_byname(sunaddr, addr_len, 836 if (__unix_find_socket_byname(net, sunaddr, addr_len,
857 sk->sk_type, hash)) { 837 sk->sk_type, hash)) {
858 unix_release_addr(addr); 838 unix_release_addr(addr);
859 goto out_unlock; 839 goto out_unlock;
@@ -919,6 +899,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
919 int alen, int flags) 899 int alen, int flags)
920{ 900{
921 struct sock *sk = sock->sk; 901 struct sock *sk = sock->sk;
902 struct net *net = sk->sk_net;
922 struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr; 903 struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
923 struct sock *other; 904 struct sock *other;
924 unsigned hash; 905 unsigned hash;
@@ -935,7 +916,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
935 goto out; 916 goto out;
936 917
937restart: 918restart:
938 other=unix_find_other(sunaddr, alen, sock->type, hash, &err); 919 other=unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
939 if (!other) 920 if (!other)
940 goto out; 921 goto out;
941 922
@@ -1015,6 +996,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1015{ 996{
1016 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; 997 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1017 struct sock *sk = sock->sk; 998 struct sock *sk = sock->sk;
999 struct net *net = sk->sk_net;
1018 struct unix_sock *u = unix_sk(sk), *newu, *otheru; 1000 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1019 struct sock *newsk = NULL; 1001 struct sock *newsk = NULL;
1020 struct sock *other = NULL; 1002 struct sock *other = NULL;
@@ -1054,7 +1036,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1054 1036
1055restart: 1037restart:
1056 /* Find listening sock. */ 1038 /* Find listening sock. */
1057 other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err); 1039 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1058 if (!other) 1040 if (!other)
1059 goto out; 1041 goto out;
1060 1042
@@ -1330,6 +1312,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1330{ 1312{
1331 struct sock_iocb *siocb = kiocb_to_siocb(kiocb); 1313 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1332 struct sock *sk = sock->sk; 1314 struct sock *sk = sock->sk;
1315 struct net *net = sk->sk_net;
1333 struct unix_sock *u = unix_sk(sk); 1316 struct unix_sock *u = unix_sk(sk);
1334 struct sockaddr_un *sunaddr=msg->msg_name; 1317 struct sockaddr_un *sunaddr=msg->msg_name;
1335 struct sock *other = NULL; 1318 struct sock *other = NULL;
@@ -1393,7 +1376,7 @@ restart:
1393 if (sunaddr == NULL) 1376 if (sunaddr == NULL)
1394 goto out_free; 1377 goto out_free;
1395 1378
1396 other = unix_find_other(sunaddr, namelen, sk->sk_type, 1379 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1397 hash, &err); 1380 hash, &err);
1398 if (other==NULL) 1381 if (other==NULL)
1399 goto out_free; 1382 goto out_free;
@@ -1915,9 +1898,9 @@ static int unix_shutdown(struct socket *sock, int mode)
1915 other->sk_state_change(other); 1898 other->sk_state_change(other);
1916 read_lock(&other->sk_callback_lock); 1899 read_lock(&other->sk_callback_lock);
1917 if (peer_mode == SHUTDOWN_MASK) 1900 if (peer_mode == SHUTDOWN_MASK)
1918 sk_wake_async(other,1,POLL_HUP); 1901 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1919 else if (peer_mode & RCV_SHUTDOWN) 1902 else if (peer_mode & RCV_SHUTDOWN)
1920 sk_wake_async(other,1,POLL_IN); 1903 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1921 read_unlock(&other->sk_callback_lock); 1904 read_unlock(&other->sk_callback_lock);
1922 } 1905 }
1923 if (other) 1906 if (other)
@@ -2006,12 +1989,41 @@ static unsigned int unix_poll(struct file * file, struct socket *sock, poll_tabl
2006 1989
2007 1990
2008#ifdef CONFIG_PROC_FS 1991#ifdef CONFIG_PROC_FS
2009static struct sock *unix_seq_idx(int *iter, loff_t pos) 1992static struct sock *first_unix_socket(int *i)
1993{
1994 for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
1995 if (!hlist_empty(&unix_socket_table[*i]))
1996 return __sk_head(&unix_socket_table[*i]);
1997 }
1998 return NULL;
1999}
2000
2001static struct sock *next_unix_socket(int *i, struct sock *s)
2002{
2003 struct sock *next = sk_next(s);
2004 /* More in this chain? */
2005 if (next)
2006 return next;
2007 /* Look for next non-empty chain. */
2008 for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2009 if (!hlist_empty(&unix_socket_table[*i]))
2010 return __sk_head(&unix_socket_table[*i]);
2011 }
2012 return NULL;
2013}
2014
2015struct unix_iter_state {
2016 struct seq_net_private p;
2017 int i;
2018};
2019static struct sock *unix_seq_idx(struct unix_iter_state *iter, loff_t pos)
2010{ 2020{
2011 loff_t off = 0; 2021 loff_t off = 0;
2012 struct sock *s; 2022 struct sock *s;
2013 2023
2014 for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) { 2024 for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2025 if (s->sk_net != iter->p.net)
2026 continue;
2015 if (off == pos) 2027 if (off == pos)
2016 return s; 2028 return s;
2017 ++off; 2029 ++off;
@@ -2021,21 +2033,30 @@ static struct sock *unix_seq_idx(int *iter, loff_t pos)
2021 2033
2022 2034
2023static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 2035static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2036 __acquires(unix_table_lock)
2024{ 2037{
2038 struct unix_iter_state *iter = seq->private;
2025 spin_lock(&unix_table_lock); 2039 spin_lock(&unix_table_lock);
2026 return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1); 2040 return *pos ? unix_seq_idx(iter, *pos - 1) : ((void *) 1);
2027} 2041}
2028 2042
2029static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2043static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2030{ 2044{
2045 struct unix_iter_state *iter = seq->private;
2046 struct sock *sk = v;
2031 ++*pos; 2047 ++*pos;
2032 2048
2033 if (v == (void *)1) 2049 if (v == (void *)1)
2034 return first_unix_socket(seq->private); 2050 sk = first_unix_socket(&iter->i);
2035 return next_unix_socket(seq->private, v); 2051 else
2052 sk = next_unix_socket(&iter->i, sk);
2053 while (sk && (sk->sk_net != iter->p.net))
2054 sk = next_unix_socket(&iter->i, sk);
2055 return sk;
2036} 2056}
2037 2057
2038static void unix_seq_stop(struct seq_file *seq, void *v) 2058static void unix_seq_stop(struct seq_file *seq, void *v)
2059 __releases(unix_table_lock)
2039{ 2060{
2040 spin_unlock(&unix_table_lock); 2061 spin_unlock(&unix_table_lock);
2041} 2062}
@@ -2094,7 +2115,8 @@ static const struct seq_operations unix_seq_ops = {
2094 2115
2095static int unix_seq_open(struct inode *inode, struct file *file) 2116static int unix_seq_open(struct inode *inode, struct file *file)
2096{ 2117{
2097 return seq_open_private(file, &unix_seq_ops, sizeof(int)); 2118 return seq_open_net(inode, file, &unix_seq_ops,
2119 sizeof(struct unix_iter_state));
2098} 2120}
2099 2121
2100static const struct file_operations unix_seq_fops = { 2122static const struct file_operations unix_seq_fops = {
@@ -2102,7 +2124,7 @@ static const struct file_operations unix_seq_fops = {
2102 .open = unix_seq_open, 2124 .open = unix_seq_open,
2103 .read = seq_read, 2125 .read = seq_read,
2104 .llseek = seq_lseek, 2126 .llseek = seq_lseek,
2105 .release = seq_release_private, 2127 .release = seq_release_net,
2106}; 2128};
2107 2129
2108#endif 2130#endif
@@ -2113,6 +2135,37 @@ static struct net_proto_family unix_family_ops = {
2113 .owner = THIS_MODULE, 2135 .owner = THIS_MODULE,
2114}; 2136};
2115 2137
2138
2139static int unix_net_init(struct net *net)
2140{
2141 int error = -ENOMEM;
2142
2143 net->unx.sysctl_max_dgram_qlen = 10;
2144 if (unix_sysctl_register(net))
2145 goto out;
2146
2147#ifdef CONFIG_PROC_FS
2148 if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2149 unix_sysctl_unregister(net);
2150 goto out;
2151 }
2152#endif
2153 error = 0;
2154out:
2155 return 0;
2156}
2157
2158static void unix_net_exit(struct net *net)
2159{
2160 unix_sysctl_unregister(net);
2161 proc_net_remove(net, "unix");
2162}
2163
2164static struct pernet_operations unix_net_ops = {
2165 .init = unix_net_init,
2166 .exit = unix_net_exit,
2167};
2168
2116static int __init af_unix_init(void) 2169static int __init af_unix_init(void)
2117{ 2170{
2118 int rc = -1; 2171 int rc = -1;
@@ -2128,10 +2181,7 @@ static int __init af_unix_init(void)
2128 } 2181 }
2129 2182
2130 sock_register(&unix_family_ops); 2183 sock_register(&unix_family_ops);
2131#ifdef CONFIG_PROC_FS 2184 register_pernet_subsys(&unix_net_ops);
2132 proc_net_fops_create(&init_net, "unix", 0, &unix_seq_fops);
2133#endif
2134 unix_sysctl_register();
2135out: 2185out:
2136 return rc; 2186 return rc;
2137} 2187}
@@ -2139,9 +2189,8 @@ out:
2139static void __exit af_unix_exit(void) 2189static void __exit af_unix_exit(void)
2140{ 2190{
2141 sock_unregister(PF_UNIX); 2191 sock_unregister(PF_UNIX);
2142 unix_sysctl_unregister();
2143 proc_net_remove(&init_net, "unix");
2144 proto_unregister(&unix_proto); 2192 proto_unregister(&unix_proto);
2193 unregister_pernet_subsys(&unix_net_ops);
2145} 2194}
2146 2195
2147module_init(af_unix_init); 2196module_init(af_unix_init);
diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c
index eb0bd57ebada..77513d7e35f2 100644
--- a/net/unix/sysctl_net_unix.c
+++ b/net/unix/sysctl_net_unix.c
@@ -18,7 +18,7 @@ static ctl_table unix_table[] = {
18 { 18 {
19 .ctl_name = NET_UNIX_MAX_DGRAM_QLEN, 19 .ctl_name = NET_UNIX_MAX_DGRAM_QLEN,
20 .procname = "max_dgram_qlen", 20 .procname = "max_dgram_qlen",
21 .data = &sysctl_unix_max_dgram_qlen, 21 .data = &init_net.unx.sysctl_max_dgram_qlen,
22 .maxlen = sizeof(int), 22 .maxlen = sizeof(int),
23 .mode = 0644, 23 .mode = 0644,
24 .proc_handler = &proc_dointvec 24 .proc_handler = &proc_dointvec
@@ -26,35 +26,39 @@ static ctl_table unix_table[] = {
26 { .ctl_name = 0 } 26 { .ctl_name = 0 }
27}; 27};
28 28
29static ctl_table unix_net_table[] = { 29static struct ctl_path unix_path[] = {
30 { 30 { .procname = "net", .ctl_name = CTL_NET, },
31 .ctl_name = NET_UNIX, 31 { .procname = "unix", .ctl_name = NET_UNIX, },
32 .procname = "unix", 32 { },
33 .mode = 0555,
34 .child = unix_table
35 },
36 { .ctl_name = 0 }
37}; 33};
38 34
39static ctl_table unix_root_table[] = { 35int unix_sysctl_register(struct net *net)
40 { 36{
41 .ctl_name = CTL_NET, 37 struct ctl_table *table;
42 .procname = "net",
43 .mode = 0555,
44 .child = unix_net_table
45 },
46 { .ctl_name = 0 }
47};
48 38
49static struct ctl_table_header * unix_sysctl_header; 39 table = kmemdup(unix_table, sizeof(unix_table), GFP_KERNEL);
40 if (table == NULL)
41 goto err_alloc;
50 42
51void unix_sysctl_register(void) 43 table[0].data = &net->unx.sysctl_max_dgram_qlen;
52{ 44 net->unx.ctl = register_net_sysctl_table(net, unix_path, table);
53 unix_sysctl_header = register_sysctl_table(unix_root_table); 45 if (net->unx.ctl == NULL)
46 goto err_reg;
47
48 return 0;
49
50err_reg:
51 kfree(table);
52err_alloc:
53 return -ENOMEM;
54} 54}
55 55
56void unix_sysctl_unregister(void) 56void unix_sysctl_unregister(struct net *net)
57{ 57{
58 unregister_sysctl_table(unix_sysctl_header); 58 struct ctl_table *table;
59
60 table = net->unx.ctl->ctl_table_arg;
61 unregister_sysctl_table(net->unx.ctl);
62 kfree(table);
59} 63}
60 64
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 6426055a8be0..79270903bda6 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -6,13 +6,13 @@ config NL80211
6 depends on CFG80211 6 depends on CFG80211
7 default y 7 default y
8 ---help--- 8 ---help---
9 This option turns on the new netlink interface 9 This option turns on the new netlink interface
10 (nl80211) support in cfg80211. 10 (nl80211) support in cfg80211.
11 11
12 If =n, drivers using mac80211 will be configured via 12 If =n, drivers using mac80211 will be configured via
13 wireless extension support provided by that subsystem. 13 wireless extension support provided by that subsystem.
14 14
15 If unsure, say Y. 15 If unsure, say Y.
16 16
17config WIRELESS_EXT 17config WIRELESS_EXT
18 bool "Wireless extensions" 18 bool "Wireless extensions"
diff --git a/net/wireless/core.c b/net/wireless/core.c
index febc33bc9c09..cfc5fc5f9e75 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -184,6 +184,9 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv)
184 struct cfg80211_registered_device *drv; 184 struct cfg80211_registered_device *drv;
185 int alloc_size; 185 int alloc_size;
186 186
187 WARN_ON(!ops->add_key && ops->del_key);
188 WARN_ON(ops->add_key && !ops->del_key);
189
187 alloc_size = sizeof(*drv) + sizeof_priv; 190 alloc_size = sizeof(*drv) + sizeof_priv;
188 191
189 drv = kzalloc(alloc_size, GFP_KERNEL); 192 drv = kzalloc(alloc_size, GFP_KERNEL);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 48b0d453e4e1..e3a214f63f91 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -61,6 +61,27 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
61 [NL80211_ATTR_IFTYPE] = { .type = NLA_U32 }, 61 [NL80211_ATTR_IFTYPE] = { .type = NLA_U32 },
62 [NL80211_ATTR_IFINDEX] = { .type = NLA_U32 }, 62 [NL80211_ATTR_IFINDEX] = { .type = NLA_U32 },
63 [NL80211_ATTR_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ-1 }, 63 [NL80211_ATTR_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ-1 },
64
65 [NL80211_ATTR_MAC] = { .type = NLA_BINARY, .len = ETH_ALEN },
66
67 [NL80211_ATTR_KEY_DATA] = { .type = NLA_BINARY,
68 .len = WLAN_MAX_KEY_LEN },
69 [NL80211_ATTR_KEY_IDX] = { .type = NLA_U8 },
70 [NL80211_ATTR_KEY_CIPHER] = { .type = NLA_U32 },
71 [NL80211_ATTR_KEY_DEFAULT] = { .type = NLA_FLAG },
72
73 [NL80211_ATTR_BEACON_INTERVAL] = { .type = NLA_U32 },
74 [NL80211_ATTR_DTIM_PERIOD] = { .type = NLA_U32 },
75 [NL80211_ATTR_BEACON_HEAD] = { .type = NLA_BINARY,
76 .len = IEEE80211_MAX_DATA_LEN },
77 [NL80211_ATTR_BEACON_TAIL] = { .type = NLA_BINARY,
78 .len = IEEE80211_MAX_DATA_LEN },
79 [NL80211_ATTR_STA_AID] = { .type = NLA_U16 },
80 [NL80211_ATTR_STA_FLAGS] = { .type = NLA_NESTED },
81 [NL80211_ATTR_STA_LISTEN_INTERVAL] = { .type = NLA_U16 },
82 [NL80211_ATTR_STA_SUPPORTED_RATES] = { .type = NLA_BINARY,
83 .len = NL80211_MAX_SUPP_RATES },
84 [NL80211_ATTR_STA_VLAN] = { .type = NLA_U32 },
64}; 85};
65 86
66/* message building helper */ 87/* message building helper */
@@ -335,6 +356,655 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info)
335 return err; 356 return err;
336} 357}
337 358
359struct get_key_cookie {
360 struct sk_buff *msg;
361 int error;
362};
363
364static void get_key_callback(void *c, struct key_params *params)
365{
366 struct get_key_cookie *cookie = c;
367
368 if (params->key)
369 NLA_PUT(cookie->msg, NL80211_ATTR_KEY_DATA,
370 params->key_len, params->key);
371
372 if (params->seq)
373 NLA_PUT(cookie->msg, NL80211_ATTR_KEY_SEQ,
374 params->seq_len, params->seq);
375
376 if (params->cipher)
377 NLA_PUT_U32(cookie->msg, NL80211_ATTR_KEY_CIPHER,
378 params->cipher);
379
380 return;
381 nla_put_failure:
382 cookie->error = 1;
383}
384
385static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
386{
387 struct cfg80211_registered_device *drv;
388 int err;
389 struct net_device *dev;
390 u8 key_idx = 0;
391 u8 *mac_addr = NULL;
392 struct get_key_cookie cookie = {
393 .error = 0,
394 };
395 void *hdr;
396 struct sk_buff *msg;
397
398 if (info->attrs[NL80211_ATTR_KEY_IDX])
399 key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
400
401 if (key_idx > 3)
402 return -EINVAL;
403
404 if (info->attrs[NL80211_ATTR_MAC])
405 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
406
407 err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
408 if (err)
409 return err;
410
411 if (!drv->ops->get_key) {
412 err = -EOPNOTSUPP;
413 goto out;
414 }
415
416 msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
417 if (!msg) {
418 err = -ENOMEM;
419 goto out;
420 }
421
422 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
423 NL80211_CMD_NEW_KEY);
424
425 if (IS_ERR(hdr)) {
426 err = PTR_ERR(hdr);
427 goto out;
428 }
429
430 cookie.msg = msg;
431
432 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex);
433 NLA_PUT_U8(msg, NL80211_ATTR_KEY_IDX, key_idx);
434 if (mac_addr)
435 NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr);
436
437 rtnl_lock();
438 err = drv->ops->get_key(&drv->wiphy, dev, key_idx, mac_addr,
439 &cookie, get_key_callback);
440 rtnl_unlock();
441
442 if (err)
443 goto out;
444
445 if (cookie.error)
446 goto nla_put_failure;
447
448 genlmsg_end(msg, hdr);
449 err = genlmsg_unicast(msg, info->snd_pid);
450 goto out;
451
452 nla_put_failure:
453 err = -ENOBUFS;
454 nlmsg_free(msg);
455 out:
456 cfg80211_put_dev(drv);
457 dev_put(dev);
458 return err;
459}
460
461static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
462{
463 struct cfg80211_registered_device *drv;
464 int err;
465 struct net_device *dev;
466 u8 key_idx;
467
468 if (!info->attrs[NL80211_ATTR_KEY_IDX])
469 return -EINVAL;
470
471 key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
472
473 if (key_idx > 3)
474 return -EINVAL;
475
476 /* currently only support setting default key */
477 if (!info->attrs[NL80211_ATTR_KEY_DEFAULT])
478 return -EINVAL;
479
480 err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
481 if (err)
482 return err;
483
484 if (!drv->ops->set_default_key) {
485 err = -EOPNOTSUPP;
486 goto out;
487 }
488
489 rtnl_lock();
490 err = drv->ops->set_default_key(&drv->wiphy, dev, key_idx);
491 rtnl_unlock();
492
493 out:
494 cfg80211_put_dev(drv);
495 dev_put(dev);
496 return err;
497}
498
499static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
500{
501 struct cfg80211_registered_device *drv;
502 int err;
503 struct net_device *dev;
504 struct key_params params;
505 u8 key_idx = 0;
506 u8 *mac_addr = NULL;
507
508 memset(&params, 0, sizeof(params));
509
510 if (!info->attrs[NL80211_ATTR_KEY_CIPHER])
511 return -EINVAL;
512
513 if (info->attrs[NL80211_ATTR_KEY_DATA]) {
514 params.key = nla_data(info->attrs[NL80211_ATTR_KEY_DATA]);
515 params.key_len = nla_len(info->attrs[NL80211_ATTR_KEY_DATA]);
516 }
517
518 if (info->attrs[NL80211_ATTR_KEY_IDX])
519 key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
520
521 params.cipher = nla_get_u32(info->attrs[NL80211_ATTR_KEY_CIPHER]);
522
523 if (info->attrs[NL80211_ATTR_MAC])
524 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
525
526 if (key_idx > 3)
527 return -EINVAL;
528
529 /*
530 * Disallow pairwise keys with non-zero index unless it's WEP
531 * (because current deployments use pairwise WEP keys with
532 * non-zero indizes but 802.11i clearly specifies to use zero)
533 */
534 if (mac_addr && key_idx &&
535 params.cipher != WLAN_CIPHER_SUITE_WEP40 &&
536 params.cipher != WLAN_CIPHER_SUITE_WEP104)
537 return -EINVAL;
538
539 /* TODO: add definitions for the lengths to linux/ieee80211.h */
540 switch (params.cipher) {
541 case WLAN_CIPHER_SUITE_WEP40:
542 if (params.key_len != 5)
543 return -EINVAL;
544 break;
545 case WLAN_CIPHER_SUITE_TKIP:
546 if (params.key_len != 32)
547 return -EINVAL;
548 break;
549 case WLAN_CIPHER_SUITE_CCMP:
550 if (params.key_len != 16)
551 return -EINVAL;
552 break;
553 case WLAN_CIPHER_SUITE_WEP104:
554 if (params.key_len != 13)
555 return -EINVAL;
556 break;
557 default:
558 return -EINVAL;
559 }
560
561 err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
562 if (err)
563 return err;
564
565 if (!drv->ops->add_key) {
566 err = -EOPNOTSUPP;
567 goto out;
568 }
569
570 rtnl_lock();
571 err = drv->ops->add_key(&drv->wiphy, dev, key_idx, mac_addr, &params);
572 rtnl_unlock();
573
574 out:
575 cfg80211_put_dev(drv);
576 dev_put(dev);
577 return err;
578}
579
580static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
581{
582 struct cfg80211_registered_device *drv;
583 int err;
584 struct net_device *dev;
585 u8 key_idx = 0;
586 u8 *mac_addr = NULL;
587
588 if (info->attrs[NL80211_ATTR_KEY_IDX])
589 key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
590
591 if (key_idx > 3)
592 return -EINVAL;
593
594 if (info->attrs[NL80211_ATTR_MAC])
595 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
596
597 err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
598 if (err)
599 return err;
600
601 if (!drv->ops->del_key) {
602 err = -EOPNOTSUPP;
603 goto out;
604 }
605
606 rtnl_lock();
607 err = drv->ops->del_key(&drv->wiphy, dev, key_idx, mac_addr);
608 rtnl_unlock();
609
610 out:
611 cfg80211_put_dev(drv);
612 dev_put(dev);
613 return err;
614}
615
616static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info)
617{
618 int (*call)(struct wiphy *wiphy, struct net_device *dev,
619 struct beacon_parameters *info);
620 struct cfg80211_registered_device *drv;
621 int err;
622 struct net_device *dev;
623 struct beacon_parameters params;
624 int haveinfo = 0;
625
626 err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
627 if (err)
628 return err;
629
630 switch (info->genlhdr->cmd) {
631 case NL80211_CMD_NEW_BEACON:
632 /* these are required for NEW_BEACON */
633 if (!info->attrs[NL80211_ATTR_BEACON_INTERVAL] ||
634 !info->attrs[NL80211_ATTR_DTIM_PERIOD] ||
635 !info->attrs[NL80211_ATTR_BEACON_HEAD]) {
636 err = -EINVAL;
637 goto out;
638 }
639
640 call = drv->ops->add_beacon;
641 break;
642 case NL80211_CMD_SET_BEACON:
643 call = drv->ops->set_beacon;
644 break;
645 default:
646 WARN_ON(1);
647 err = -EOPNOTSUPP;
648 goto out;
649 }
650
651 if (!call) {
652 err = -EOPNOTSUPP;
653 goto out;
654 }
655
656 memset(&params, 0, sizeof(params));
657
658 if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) {
659 params.interval =
660 nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
661 haveinfo = 1;
662 }
663
664 if (info->attrs[NL80211_ATTR_DTIM_PERIOD]) {
665 params.dtim_period =
666 nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]);
667 haveinfo = 1;
668 }
669
670 if (info->attrs[NL80211_ATTR_BEACON_HEAD]) {
671 params.head = nla_data(info->attrs[NL80211_ATTR_BEACON_HEAD]);
672 params.head_len =
673 nla_len(info->attrs[NL80211_ATTR_BEACON_HEAD]);
674 haveinfo = 1;
675 }
676
677 if (info->attrs[NL80211_ATTR_BEACON_TAIL]) {
678 params.tail = nla_data(info->attrs[NL80211_ATTR_BEACON_TAIL]);
679 params.tail_len =
680 nla_len(info->attrs[NL80211_ATTR_BEACON_TAIL]);
681 haveinfo = 1;
682 }
683
684 if (!haveinfo) {
685 err = -EINVAL;
686 goto out;
687 }
688
689 rtnl_lock();
690 err = call(&drv->wiphy, dev, &params);
691 rtnl_unlock();
692
693 out:
694 cfg80211_put_dev(drv);
695 dev_put(dev);
696 return err;
697}
698
699static int nl80211_del_beacon(struct sk_buff *skb, struct genl_info *info)
700{
701 struct cfg80211_registered_device *drv;
702 int err;
703 struct net_device *dev;
704
705 err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
706 if (err)
707 return err;
708
709 if (!drv->ops->del_beacon) {
710 err = -EOPNOTSUPP;
711 goto out;
712 }
713
714 rtnl_lock();
715 err = drv->ops->del_beacon(&drv->wiphy, dev);
716 rtnl_unlock();
717
718 out:
719 cfg80211_put_dev(drv);
720 dev_put(dev);
721 return err;
722}
723
724static const struct nla_policy sta_flags_policy[NL80211_STA_FLAG_MAX + 1] = {
725 [NL80211_STA_FLAG_AUTHORIZED] = { .type = NLA_FLAG },
726 [NL80211_STA_FLAG_SHORT_PREAMBLE] = { .type = NLA_FLAG },
727 [NL80211_STA_FLAG_WME] = { .type = NLA_FLAG },
728};
729
730static int parse_station_flags(struct nlattr *nla, u32 *staflags)
731{
732 struct nlattr *flags[NL80211_STA_FLAG_MAX + 1];
733 int flag;
734
735 *staflags = 0;
736
737 if (!nla)
738 return 0;
739
740 if (nla_parse_nested(flags, NL80211_STA_FLAG_MAX,
741 nla, sta_flags_policy))
742 return -EINVAL;
743
744 *staflags = STATION_FLAG_CHANGED;
745
746 for (flag = 1; flag <= NL80211_STA_FLAG_MAX; flag++)
747 if (flags[flag])
748 *staflags |= (1<<flag);
749
750 return 0;
751}
752
753static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
754 int flags, struct net_device *dev,
755 u8 *mac_addr, struct station_stats *stats)
756{
757 void *hdr;
758 struct nlattr *statsattr;
759
760 hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION);
761 if (!hdr)
762 return -1;
763
764 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex);
765 NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr);
766
767 statsattr = nla_nest_start(msg, NL80211_ATTR_STA_STATS);
768 if (!statsattr)
769 goto nla_put_failure;
770 if (stats->filled & STATION_STAT_INACTIVE_TIME)
771 NLA_PUT_U32(msg, NL80211_STA_STAT_INACTIVE_TIME,
772 stats->inactive_time);
773 if (stats->filled & STATION_STAT_RX_BYTES)
774 NLA_PUT_U32(msg, NL80211_STA_STAT_RX_BYTES,
775 stats->rx_bytes);
776 if (stats->filled & STATION_STAT_TX_BYTES)
777 NLA_PUT_U32(msg, NL80211_STA_STAT_TX_BYTES,
778 stats->tx_bytes);
779
780 nla_nest_end(msg, statsattr);
781
782 return genlmsg_end(msg, hdr);
783
784 nla_put_failure:
785 return genlmsg_cancel(msg, hdr);
786}
787
788
789static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
790{
791 struct cfg80211_registered_device *drv;
792 int err;
793 struct net_device *dev;
794 struct station_stats stats;
795 struct sk_buff *msg;
796 u8 *mac_addr = NULL;
797
798 memset(&stats, 0, sizeof(stats));
799
800 if (!info->attrs[NL80211_ATTR_MAC])
801 return -EINVAL;
802
803 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
804
805 err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
806 if (err)
807 return err;
808
809 if (!drv->ops->get_station) {
810 err = -EOPNOTSUPP;
811 goto out;
812 }
813
814 rtnl_lock();
815 err = drv->ops->get_station(&drv->wiphy, dev, mac_addr, &stats);
816 rtnl_unlock();
817
818 msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
819 if (!msg)
820 goto out;
821
822 if (nl80211_send_station(msg, info->snd_pid, info->snd_seq, 0,
823 dev, mac_addr, &stats) < 0)
824 goto out_free;
825
826 err = genlmsg_unicast(msg, info->snd_pid);
827 goto out;
828
829 out_free:
830 nlmsg_free(msg);
831
832 out:
833 cfg80211_put_dev(drv);
834 dev_put(dev);
835 return err;
836}
837
838/*
839 * Get vlan interface making sure it is on the right wiphy.
840 */
841static int get_vlan(struct nlattr *vlanattr,
842 struct cfg80211_registered_device *rdev,
843 struct net_device **vlan)
844{
845 *vlan = NULL;
846
847 if (vlanattr) {
848 *vlan = dev_get_by_index(&init_net, nla_get_u32(vlanattr));
849 if (!*vlan)
850 return -ENODEV;
851 if (!(*vlan)->ieee80211_ptr)
852 return -EINVAL;
853 if ((*vlan)->ieee80211_ptr->wiphy != &rdev->wiphy)
854 return -EINVAL;
855 }
856 return 0;
857}
858
859static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
860{
861 struct cfg80211_registered_device *drv;
862 int err;
863 struct net_device *dev;
864 struct station_parameters params;
865 u8 *mac_addr = NULL;
866
867 memset(&params, 0, sizeof(params));
868
869 params.listen_interval = -1;
870
871 if (info->attrs[NL80211_ATTR_STA_AID])
872 return -EINVAL;
873
874 if (!info->attrs[NL80211_ATTR_MAC])
875 return -EINVAL;
876
877 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
878
879 if (info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) {
880 params.supported_rates =
881 nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]);
882 params.supported_rates_len =
883 nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]);
884 }
885
886 if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL])
887 params.listen_interval =
888 nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]);
889
890 if (parse_station_flags(info->attrs[NL80211_ATTR_STA_FLAGS],
891 &params.station_flags))
892 return -EINVAL;
893
894 err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
895 if (err)
896 return err;
897
898 err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, &params.vlan);
899 if (err)
900 goto out;
901
902 if (!drv->ops->change_station) {
903 err = -EOPNOTSUPP;
904 goto out;
905 }
906
907 rtnl_lock();
908 err = drv->ops->change_station(&drv->wiphy, dev, mac_addr, &params);
909 rtnl_unlock();
910
911 out:
912 if (params.vlan)
913 dev_put(params.vlan);
914 cfg80211_put_dev(drv);
915 dev_put(dev);
916 return err;
917}
918
919static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
920{
921 struct cfg80211_registered_device *drv;
922 int err;
923 struct net_device *dev;
924 struct station_parameters params;
925 u8 *mac_addr = NULL;
926
927 memset(&params, 0, sizeof(params));
928
929 if (!info->attrs[NL80211_ATTR_MAC])
930 return -EINVAL;
931
932 if (!info->attrs[NL80211_ATTR_STA_AID])
933 return -EINVAL;
934
935 if (!info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL])
936 return -EINVAL;
937
938 if (!info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES])
939 return -EINVAL;
940
941 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
942 params.supported_rates =
943 nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]);
944 params.supported_rates_len =
945 nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]);
946 params.listen_interval =
947 nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]);
948 params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]);
949
950 if (parse_station_flags(info->attrs[NL80211_ATTR_STA_FLAGS],
951 &params.station_flags))
952 return -EINVAL;
953
954 err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
955 if (err)
956 return err;
957
958 err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, &params.vlan);
959 if (err)
960 goto out;
961
962 if (!drv->ops->add_station) {
963 err = -EOPNOTSUPP;
964 goto out;
965 }
966
967 rtnl_lock();
968 err = drv->ops->add_station(&drv->wiphy, dev, mac_addr, &params);
969 rtnl_unlock();
970
971 out:
972 if (params.vlan)
973 dev_put(params.vlan);
974 cfg80211_put_dev(drv);
975 dev_put(dev);
976 return err;
977}
978
979static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
980{
981 struct cfg80211_registered_device *drv;
982 int err;
983 struct net_device *dev;
984 u8 *mac_addr = NULL;
985
986 if (info->attrs[NL80211_ATTR_MAC])
987 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
988
989 err = get_drv_dev_by_info_ifindex(info, &drv, &dev);
990 if (err)
991 return err;
992
993 if (!drv->ops->del_station) {
994 err = -EOPNOTSUPP;
995 goto out;
996 }
997
998 rtnl_lock();
999 err = drv->ops->del_station(&drv->wiphy, dev, mac_addr);
1000 rtnl_unlock();
1001
1002 out:
1003 cfg80211_put_dev(drv);
1004 dev_put(dev);
1005 return err;
1006}
1007
338static struct genl_ops nl80211_ops[] = { 1008static struct genl_ops nl80211_ops[] = {
339 { 1009 {
340 .cmd = NL80211_CMD_GET_WIPHY, 1010 .cmd = NL80211_CMD_GET_WIPHY,
@@ -374,6 +1044,73 @@ static struct genl_ops nl80211_ops[] = {
374 .policy = nl80211_policy, 1044 .policy = nl80211_policy,
375 .flags = GENL_ADMIN_PERM, 1045 .flags = GENL_ADMIN_PERM,
376 }, 1046 },
1047 {
1048 .cmd = NL80211_CMD_GET_KEY,
1049 .doit = nl80211_get_key,
1050 .policy = nl80211_policy,
1051 .flags = GENL_ADMIN_PERM,
1052 },
1053 {
1054 .cmd = NL80211_CMD_SET_KEY,
1055 .doit = nl80211_set_key,
1056 .policy = nl80211_policy,
1057 .flags = GENL_ADMIN_PERM,
1058 },
1059 {
1060 .cmd = NL80211_CMD_NEW_KEY,
1061 .doit = nl80211_new_key,
1062 .policy = nl80211_policy,
1063 .flags = GENL_ADMIN_PERM,
1064 },
1065 {
1066 .cmd = NL80211_CMD_DEL_KEY,
1067 .doit = nl80211_del_key,
1068 .policy = nl80211_policy,
1069 .flags = GENL_ADMIN_PERM,
1070 },
1071 {
1072 .cmd = NL80211_CMD_SET_BEACON,
1073 .policy = nl80211_policy,
1074 .flags = GENL_ADMIN_PERM,
1075 .doit = nl80211_addset_beacon,
1076 },
1077 {
1078 .cmd = NL80211_CMD_NEW_BEACON,
1079 .policy = nl80211_policy,
1080 .flags = GENL_ADMIN_PERM,
1081 .doit = nl80211_addset_beacon,
1082 },
1083 {
1084 .cmd = NL80211_CMD_DEL_BEACON,
1085 .policy = nl80211_policy,
1086 .flags = GENL_ADMIN_PERM,
1087 .doit = nl80211_del_beacon,
1088 },
1089 {
1090 .cmd = NL80211_CMD_GET_STATION,
1091 .doit = nl80211_get_station,
1092 /* TODO: implement dumpit */
1093 .policy = nl80211_policy,
1094 .flags = GENL_ADMIN_PERM,
1095 },
1096 {
1097 .cmd = NL80211_CMD_SET_STATION,
1098 .doit = nl80211_set_station,
1099 .policy = nl80211_policy,
1100 .flags = GENL_ADMIN_PERM,
1101 },
1102 {
1103 .cmd = NL80211_CMD_NEW_STATION,
1104 .doit = nl80211_new_station,
1105 .policy = nl80211_policy,
1106 .flags = GENL_ADMIN_PERM,
1107 },
1108 {
1109 .cmd = NL80211_CMD_DEL_STATION,
1110 .doit = nl80211_del_station,
1111 .policy = nl80211_policy,
1112 .flags = GENL_ADMIN_PERM,
1113 },
377}; 1114};
378 1115
379/* multicast groups */ 1116/* multicast groups */
diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index 47e80cc2077c..2c569b63e7d8 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -417,20 +417,6 @@ static const int event_type_size[] = {
417 IW_EV_QUAL_LEN, /* IW_HEADER_TYPE_QUAL */ 417 IW_EV_QUAL_LEN, /* IW_HEADER_TYPE_QUAL */
418}; 418};
419 419
420/* Size (in bytes) of various events, as packed */
421static const int event_type_pk_size[] = {
422 IW_EV_LCP_PK_LEN, /* IW_HEADER_TYPE_NULL */
423 0,
424 IW_EV_CHAR_PK_LEN, /* IW_HEADER_TYPE_CHAR */
425 0,
426 IW_EV_UINT_PK_LEN, /* IW_HEADER_TYPE_UINT */
427 IW_EV_FREQ_PK_LEN, /* IW_HEADER_TYPE_FREQ */
428 IW_EV_ADDR_PK_LEN, /* IW_HEADER_TYPE_ADDR */
429 0,
430 IW_EV_POINT_PK_LEN, /* Without variable payload */
431 IW_EV_PARAM_PK_LEN, /* IW_HEADER_TYPE_PARAM */
432 IW_EV_QUAL_PK_LEN, /* IW_HEADER_TYPE_QUAL */
433};
434 420
435/************************ COMMON SUBROUTINES ************************/ 421/************************ COMMON SUBROUTINES ************************/
436/* 422/*
@@ -673,26 +659,8 @@ static const struct seq_operations wireless_seq_ops = {
673 659
674static int wireless_seq_open(struct inode *inode, struct file *file) 660static int wireless_seq_open(struct inode *inode, struct file *file)
675{ 661{
676 struct seq_file *seq; 662 return seq_open_net(inode, file, &wireless_seq_ops,
677 int res; 663 sizeof(struct seq_net_private));
678 res = seq_open(file, &wireless_seq_ops);
679 if (!res) {
680 seq = file->private_data;
681 seq->private = get_proc_net(inode);
682 if (!seq->private) {
683 seq_release(inode, file);
684 res = -ENXIO;
685 }
686 }
687 return res;
688}
689
690static int wireless_seq_release(struct inode *inode, struct file *file)
691{
692 struct seq_file *seq = file->private_data;
693 struct net *net = seq->private;
694 put_net(net);
695 return seq_release(inode, file);
696} 664}
697 665
698static const struct file_operations wireless_seq_fops = { 666static const struct file_operations wireless_seq_fops = {
@@ -700,7 +668,7 @@ static const struct file_operations wireless_seq_fops = {
700 .open = wireless_seq_open, 668 .open = wireless_seq_open,
701 .read = seq_read, 669 .read = seq_read,
702 .llseek = seq_lseek, 670 .llseek = seq_lseek,
703 .release = wireless_seq_release, 671 .release = seq_release_net,
704}; 672};
705 673
706int wext_proc_init(struct net *net) 674int wext_proc_init(struct net *net)
@@ -1137,7 +1105,7 @@ static void wireless_nlevent_process(unsigned long data)
1137 struct sk_buff *skb; 1105 struct sk_buff *skb;
1138 1106
1139 while ((skb = skb_dequeue(&wireless_nlevent_queue))) 1107 while ((skb = skb_dequeue(&wireless_nlevent_queue)))
1140 rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); 1108 rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
1141} 1109}
1142 1110
1143static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0); 1111static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0);
@@ -1189,6 +1157,9 @@ static void rtmsg_iwinfo(struct net_device *dev, char *event, int event_len)
1189 struct sk_buff *skb; 1157 struct sk_buff *skb;
1190 int err; 1158 int err;
1191 1159
1160 if (dev->nd_net != &init_net)
1161 return;
1162
1192 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); 1163 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1193 if (!skb) 1164 if (!skb)
1194 return; 1165 return;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 92cfe8e3e0b8..07fad7ccf832 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -83,9 +83,9 @@ struct compat_x25_subscrip_struct {
83int x25_addr_ntoa(unsigned char *p, struct x25_address *called_addr, 83int x25_addr_ntoa(unsigned char *p, struct x25_address *called_addr,
84 struct x25_address *calling_addr) 84 struct x25_address *calling_addr)
85{ 85{
86 int called_len, calling_len; 86 unsigned int called_len, calling_len;
87 char *called, *calling; 87 char *called, *calling;
88 int i; 88 unsigned int i;
89 89
90 called_len = (*p >> 0) & 0x0F; 90 called_len = (*p >> 0) & 0x0F;
91 calling_len = (*p >> 4) & 0x0F; 91 calling_len = (*p >> 4) & 0x0F;
diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c
index a59b77f18234..6ebda25c24e9 100644
--- a/net/x25/sysctl_net_x25.c
+++ b/net/x25/sysctl_net_x25.c
@@ -84,29 +84,15 @@ static struct ctl_table x25_table[] = {
84 { 0, }, 84 { 0, },
85}; 85};
86 86
87static struct ctl_table x25_dir_table[] = { 87static struct ctl_path x25_path[] = {
88 { 88 { .procname = "net", .ctl_name = CTL_NET, },
89 .ctl_name = NET_X25, 89 { .procname = "x25", .ctl_name = NET_X25, },
90 .procname = "x25", 90 { }
91 .mode = 0555,
92 .child = x25_table,
93 },
94 { 0, },
95};
96
97static struct ctl_table x25_root_table[] = {
98 {
99 .ctl_name = CTL_NET,
100 .procname = "net",
101 .mode = 0555,
102 .child = x25_dir_table,
103 },
104 { 0, },
105}; 91};
106 92
107void __init x25_register_sysctl(void) 93void __init x25_register_sysctl(void)
108{ 94{
109 x25_table_header = register_sysctl_table(x25_root_table); 95 x25_table_header = register_sysctl_paths(x25_path, x25_table);
110} 96}
111 97
112void x25_unregister_sysctl(void) 98void x25_unregister_sysctl(void)
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index dec404afa113..a21f6646eb3a 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -205,9 +205,7 @@ int x25_create_facilities(unsigned char *buffer,
205 } 205 }
206 206
207 if (dte_facs->calling_len && (facil_mask & X25_MASK_CALLING_AE)) { 207 if (dte_facs->calling_len && (facil_mask & X25_MASK_CALLING_AE)) {
208 unsigned bytecount = (dte_facs->calling_len % 2) ? 208 unsigned bytecount = (dte_facs->calling_len + 1) >> 1;
209 dte_facs->calling_len / 2 + 1 :
210 dte_facs->calling_len / 2;
211 *p++ = X25_FAC_CALLING_AE; 209 *p++ = X25_FAC_CALLING_AE;
212 *p++ = 1 + bytecount; 210 *p++ = 1 + bytecount;
213 *p++ = dte_facs->calling_len; 211 *p++ = dte_facs->calling_len;
diff --git a/net/x25/x25_forward.c b/net/x25/x25_forward.c
index 34478035e05e..056a55f3a871 100644
--- a/net/x25/x25_forward.c
+++ b/net/x25/x25_forward.c
@@ -12,7 +12,7 @@
12#include <linux/init.h> 12#include <linux/init.h>
13#include <net/x25.h> 13#include <net/x25.h>
14 14
15struct list_head x25_forward_list = LIST_HEAD_INIT(x25_forward_list); 15LIST_HEAD(x25_forward_list);
16DEFINE_RWLOCK(x25_forward_list_lock); 16DEFINE_RWLOCK(x25_forward_list_lock);
17 17
18int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from, 18int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from,
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 1c88762c2794..7d7c3abf38b5 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -247,7 +247,7 @@ static int x25_state3_machine(struct sock *sk, struct sk_buff *skb, int frametyp
247 break; 247 break;
248 } 248 }
249 if (atomic_read(&sk->sk_rmem_alloc) > 249 if (atomic_read(&sk->sk_rmem_alloc) >
250 (sk->sk_rcvbuf / 2)) 250 (sk->sk_rcvbuf >> 1))
251 x25->condition |= X25_COND_OWN_RX_BUSY; 251 x25->condition |= X25_COND_OWN_RX_BUSY;
252 } 252 }
253 /* 253 /*
diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
index 741ce95d4ad1..e4e1b6e49538 100644
--- a/net/x25/x25_link.c
+++ b/net/x25/x25_link.c
@@ -30,7 +30,7 @@
30#include <linux/init.h> 30#include <linux/init.h>
31#include <net/x25.h> 31#include <net/x25.h>
32 32
33static struct list_head x25_neigh_list = LIST_HEAD_INIT(x25_neigh_list); 33static LIST_HEAD(x25_neigh_list);
34static DEFINE_RWLOCK(x25_neigh_list_lock); 34static DEFINE_RWLOCK(x25_neigh_list_lock);
35 35
36static void x25_t20timer_expiry(unsigned long); 36static void x25_t20timer_expiry(unsigned long);
@@ -247,10 +247,7 @@ void x25_link_device_up(struct net_device *dev)
247 return; 247 return;
248 248
249 skb_queue_head_init(&nb->queue); 249 skb_queue_head_init(&nb->queue);
250 250 setup_timer(&nb->t20timer, x25_t20timer_expiry, (unsigned long)nb);
251 init_timer(&nb->t20timer);
252 nb->t20timer.data = (unsigned long)nb;
253 nb->t20timer.function = &x25_t20timer_expiry;
254 251
255 dev_hold(dev); 252 dev_hold(dev);
256 nb->dev = dev; 253 nb->dev = dev;
diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c
index 7d55e50c936f..3f52b09bed03 100644
--- a/net/x25/x25_proc.c
+++ b/net/x25/x25_proc.c
@@ -41,6 +41,7 @@ found:
41} 41}
42 42
43static void *x25_seq_route_start(struct seq_file *seq, loff_t *pos) 43static void *x25_seq_route_start(struct seq_file *seq, loff_t *pos)
44 __acquires(x25_route_list_lock)
44{ 45{
45 loff_t l = *pos; 46 loff_t l = *pos;
46 47
@@ -70,6 +71,7 @@ out:
70} 71}
71 72
72static void x25_seq_route_stop(struct seq_file *seq, void *v) 73static void x25_seq_route_stop(struct seq_file *seq, void *v)
74 __releases(x25_route_list_lock)
73{ 75{
74 read_unlock_bh(&x25_route_list_lock); 76 read_unlock_bh(&x25_route_list_lock);
75} 77}
@@ -105,6 +107,7 @@ found:
105} 107}
106 108
107static void *x25_seq_socket_start(struct seq_file *seq, loff_t *pos) 109static void *x25_seq_socket_start(struct seq_file *seq, loff_t *pos)
110 __acquires(x25_list_lock)
108{ 111{
109 loff_t l = *pos; 112 loff_t l = *pos;
110 113
@@ -127,6 +130,7 @@ out:
127} 130}
128 131
129static void x25_seq_socket_stop(struct seq_file *seq, void *v) 132static void x25_seq_socket_stop(struct seq_file *seq, void *v)
133 __releases(x25_list_lock)
130{ 134{
131 read_unlock_bh(&x25_list_lock); 135 read_unlock_bh(&x25_list_lock);
132} 136}
@@ -183,6 +187,7 @@ found:
183} 187}
184 188
185static void *x25_seq_forward_start(struct seq_file *seq, loff_t *pos) 189static void *x25_seq_forward_start(struct seq_file *seq, loff_t *pos)
190 __acquires(x25_forward_list_lock)
186{ 191{
187 loff_t l = *pos; 192 loff_t l = *pos;
188 193
@@ -213,6 +218,7 @@ out:
213} 218}
214 219
215static void x25_seq_forward_stop(struct seq_file *seq, void *v) 220static void x25_seq_forward_stop(struct seq_file *seq, void *v)
221 __releases(x25_forward_list_lock)
216{ 222{
217 read_unlock_bh(&x25_forward_list_lock); 223 read_unlock_bh(&x25_forward_list_lock);
218} 224}
@@ -287,7 +293,7 @@ static const struct file_operations x25_seq_route_fops = {
287 .release = seq_release, 293 .release = seq_release,
288}; 294};
289 295
290static struct file_operations x25_seq_forward_fops = { 296static const struct file_operations x25_seq_forward_fops = {
291 .owner = THIS_MODULE, 297 .owner = THIS_MODULE,
292 .open = x25_seq_forward_open, 298 .open = x25_seq_forward_open,
293 .read = seq_read, 299 .read = seq_read,
diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c
index 86b5b4da097c..2c999ccf504a 100644
--- a/net/x25/x25_route.c
+++ b/net/x25/x25_route.c
@@ -21,7 +21,7 @@
21#include <linux/init.h> 21#include <linux/init.h>
22#include <net/x25.h> 22#include <net/x25.h>
23 23
24struct list_head x25_route_list = LIST_HEAD_INIT(x25_route_list); 24LIST_HEAD(x25_route_list);
25DEFINE_RWLOCK(x25_route_list_lock); 25DEFINE_RWLOCK(x25_route_list_lock);
26 26
27/* 27/*
diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c
index 8d6220aa5d0f..511a5986af3e 100644
--- a/net/x25/x25_subr.c
+++ b/net/x25/x25_subr.c
@@ -359,7 +359,7 @@ void x25_check_rbuf(struct sock *sk)
359{ 359{
360 struct x25_sock *x25 = x25_sk(sk); 360 struct x25_sock *x25 = x25_sk(sk);
361 361
362 if (atomic_read(&sk->sk_rmem_alloc) < (sk->sk_rcvbuf / 2) && 362 if (atomic_read(&sk->sk_rmem_alloc) < (sk->sk_rcvbuf >> 1) &&
363 (x25->condition & X25_COND_OWN_RX_BUSY)) { 363 (x25->condition & X25_COND_OWN_RX_BUSY)) {
364 x25->condition &= ~X25_COND_OWN_RX_BUSY; 364 x25->condition &= ~X25_COND_OWN_RX_BUSY;
365 x25->condition &= ~X25_COND_ACK_PENDING; 365 x25->condition &= ~X25_COND_ACK_PENDING;
diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c
index 2af190dc5b01..d3e3e54db936 100644
--- a/net/x25/x25_timer.c
+++ b/net/x25/x25_timer.c
@@ -33,9 +33,7 @@ void x25_init_timers(struct sock *sk)
33{ 33{
34 struct x25_sock *x25 = x25_sk(sk); 34 struct x25_sock *x25 = x25_sk(sk);
35 35
36 init_timer(&x25->timer); 36 setup_timer(&x25->timer, x25_timer_expiry, (unsigned long)sk);
37 x25->timer.data = (unsigned long)sk;
38 x25->timer.function = &x25_timer_expiry;
39 37
40 /* initialized by sock_init_data */ 38 /* initialized by sock_init_data */
41 sk->sk_timer.data = (unsigned long)sk; 39 sk->sk_timer.data = (unsigned long)sk;
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 577a4f821b98..8f9dbec319be 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -3,6 +3,7 @@
3# 3#
4config XFRM 4config XFRM
5 bool 5 bool
6 select CRYPTO
6 depends on NET 7 depends on NET
7 8
8config XFRM_USER 9config XFRM_USER
@@ -35,6 +36,16 @@ config XFRM_MIGRATE
35 36
36 If unsure, say N. 37 If unsure, say N.
37 38
39config XFRM_STATISTICS
40 bool "Transformation statistics (EXPERIMENTAL)"
41 depends on XFRM && PROC_FS && EXPERIMENTAL
42 ---help---
43 This statistics is not a SNMP/MIB specification but shows
44 statistics about transformation error (or almost error) factor
45 at packet processing for developer.
46
47 If unsure, say N.
48
38config NET_KEY 49config NET_KEY
39 tristate "PF_KEY sockets" 50 tristate "PF_KEY sockets"
40 select XFRM 51 select XFRM
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 45744a3d3a51..332cfb0ff566 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -4,5 +4,6 @@
4 4
5obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ 5obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
6 xfrm_input.o xfrm_output.o xfrm_algo.o 6 xfrm_input.o xfrm_output.o xfrm_algo.o
7obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
7obj-$(CONFIG_XFRM_USER) += xfrm_user.o 8obj-$(CONFIG_XFRM_USER) += xfrm_user.o
8 9
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 1686f64c4352..b5c5347aed66 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -486,7 +486,6 @@ EXPORT_SYMBOL_GPL(xfrm_ealg_get_byidx);
486 */ 486 */
487void xfrm_probe_algs(void) 487void xfrm_probe_algs(void)
488{ 488{
489#ifdef CONFIG_CRYPTO
490 int i, status; 489 int i, status;
491 490
492 BUG_ON(in_softirq()); 491 BUG_ON(in_softirq());
@@ -511,7 +510,6 @@ void xfrm_probe_algs(void)
511 if (calg_list[i].available != status) 510 if (calg_list[i].available != status)
512 calg_list[i].available = status; 511 calg_list[i].available = status;
513 } 512 }
514#endif
515} 513}
516EXPORT_SYMBOL_GPL(xfrm_probe_algs); 514EXPORT_SYMBOL_GPL(xfrm_probe_algs);
517 515
diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c
index 55ab5792af56..a2023ec52329 100644
--- a/net/xfrm/xfrm_hash.c
+++ b/net/xfrm/xfrm_hash.c
@@ -17,17 +17,14 @@ struct hlist_head *xfrm_hash_alloc(unsigned int sz)
17 struct hlist_head *n; 17 struct hlist_head *n;
18 18
19 if (sz <= PAGE_SIZE) 19 if (sz <= PAGE_SIZE)
20 n = kmalloc(sz, GFP_KERNEL); 20 n = kzalloc(sz, GFP_KERNEL);
21 else if (hashdist) 21 else if (hashdist)
22 n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL); 22 n = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
23 else 23 else
24 n = (struct hlist_head *) 24 n = (struct hlist_head *)
25 __get_free_pages(GFP_KERNEL | __GFP_NOWARN, 25 __get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
26 get_order(sz)); 26 get_order(sz));
27 27
28 if (n)
29 memset(n, 0, sz);
30
31 return n; 28 return n;
32} 29}
33 30
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index cb97fda1b6df..039e7019c48a 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -9,6 +9,8 @@
9 9
10#include <linux/slab.h> 10#include <linux/slab.h>
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/netdevice.h>
13#include <net/dst.h>
12#include <net/ip.h> 14#include <net/ip.h>
13#include <net/xfrm.h> 15#include <net/xfrm.h>
14 16
@@ -81,6 +83,180 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
81} 83}
82EXPORT_SYMBOL(xfrm_parse_spi); 84EXPORT_SYMBOL(xfrm_parse_spi);
83 85
86int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
87{
88 int err;
89
90 err = x->outer_mode->afinfo->extract_input(x, skb);
91 if (err)
92 return err;
93
94 skb->protocol = x->inner_mode->afinfo->eth_proto;
95 return x->inner_mode->input2(x, skb);
96}
97EXPORT_SYMBOL(xfrm_prepare_input);
98
99int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
100{
101 int err;
102 __be32 seq;
103 struct xfrm_state *x;
104 xfrm_address_t *daddr;
105 unsigned int family;
106 int decaps = 0;
107 int async = 0;
108
109 /* A negative encap_type indicates async resumption. */
110 if (encap_type < 0) {
111 async = 1;
112 x = xfrm_input_state(skb);
113 seq = XFRM_SKB_CB(skb)->seq;
114 goto resume;
115 }
116
117 /* Allocate new secpath or COW existing one. */
118 if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
119 struct sec_path *sp;
120
121 sp = secpath_dup(skb->sp);
122 if (!sp) {
123 XFRM_INC_STATS(LINUX_MIB_XFRMINERROR);
124 goto drop;
125 }
126 if (skb->sp)
127 secpath_put(skb->sp);
128 skb->sp = sp;
129 }
130
131 daddr = (xfrm_address_t *)(skb_network_header(skb) +
132 XFRM_SPI_SKB_CB(skb)->daddroff);
133 family = XFRM_SPI_SKB_CB(skb)->family;
134
135 seq = 0;
136 if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) {
137 XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
138 goto drop;
139 }
140
141 do {
142 if (skb->sp->len == XFRM_MAX_DEPTH) {
143 XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR);
144 goto drop;
145 }
146
147 x = xfrm_state_lookup(daddr, spi, nexthdr, family);
148 if (x == NULL) {
149 XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES);
150 xfrm_audit_state_notfound(skb, family, spi, seq);
151 goto drop;
152 }
153
154 skb->sp->xvec[skb->sp->len++] = x;
155
156 spin_lock(&x->lock);
157 if (unlikely(x->km.state != XFRM_STATE_VALID)) {
158 XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEINVALID);
159 goto drop_unlock;
160 }
161
162 if ((x->encap ? x->encap->encap_type : 0) != encap_type) {
163 XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEINVALID);
164 goto drop_unlock;
165 }
166
167 if (x->props.replay_window && xfrm_replay_check(x, skb, seq)) {
168 XFRM_INC_STATS(LINUX_MIB_XFRMINSEQOUTOFWINDOW);
169 goto drop_unlock;
170 }
171
172 if (xfrm_state_check_expire(x)) {
173 XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEEXPIRED);
174 goto drop_unlock;
175 }
176
177 spin_unlock(&x->lock);
178
179 XFRM_SKB_CB(skb)->seq = seq;
180
181 nexthdr = x->type->input(x, skb);
182
183 if (nexthdr == -EINPROGRESS)
184 return 0;
185
186resume:
187 spin_lock(&x->lock);
188 if (nexthdr <= 0) {
189 if (nexthdr == -EBADMSG) {
190 xfrm_audit_state_icvfail(x, skb,
191 x->type->proto);
192 x->stats.integrity_failed++;
193 }
194 XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEPROTOERROR);
195 goto drop_unlock;
196 }
197
198 /* only the first xfrm gets the encap type */
199 encap_type = 0;
200
201 if (x->props.replay_window)
202 xfrm_replay_advance(x, seq);
203
204 x->curlft.bytes += skb->len;
205 x->curlft.packets++;
206
207 spin_unlock(&x->lock);
208
209 XFRM_MODE_SKB_CB(skb)->protocol = nexthdr;
210
211 if (x->inner_mode->input(x, skb)) {
212 XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMODEERROR);
213 goto drop;
214 }
215
216 if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
217 decaps = 1;
218 break;
219 }
220
221 /*
222 * We need the inner address. However, we only get here for
223 * transport mode so the outer address is identical.
224 */
225 daddr = &x->id.daddr;
226 family = x->outer_mode->afinfo->family;
227
228 err = xfrm_parse_spi(skb, nexthdr, &spi, &seq);
229 if (err < 0) {
230 XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
231 goto drop;
232 }
233 } while (!err);
234
235 nf_reset(skb);
236
237 if (decaps) {
238 dst_release(skb->dst);
239 skb->dst = NULL;
240 netif_rx(skb);
241 return 0;
242 } else {
243 return x->inner_mode->afinfo->transport_finish(skb, async);
244 }
245
246drop_unlock:
247 spin_unlock(&x->lock);
248drop:
249 kfree_skb(skb);
250 return 0;
251}
252EXPORT_SYMBOL(xfrm_input);
253
254int xfrm_input_resume(struct sk_buff *skb, int nexthdr)
255{
256 return xfrm_input(skb, nexthdr, 0, -1);
257}
258EXPORT_SYMBOL(xfrm_input_resume);
259
84void __init xfrm_input_init(void) 260void __init xfrm_input_init(void)
85{ 261{
86 secpath_cachep = kmem_cache_create("secpath_cache", 262 secpath_cachep = kmem_cache_create("secpath_cache",
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index f4bfd6c45651..f4a1047a5573 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -12,14 +12,18 @@
12#include <linux/errno.h> 12#include <linux/errno.h>
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/netdevice.h> 14#include <linux/netdevice.h>
15#include <linux/netfilter.h>
15#include <linux/skbuff.h> 16#include <linux/skbuff.h>
16#include <linux/spinlock.h> 17#include <linux/spinlock.h>
17#include <net/dst.h> 18#include <net/dst.h>
18#include <net/xfrm.h> 19#include <net/xfrm.h>
19 20
21static int xfrm_output2(struct sk_buff *skb);
22
20static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) 23static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
21{ 24{
22 int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev) 25 struct dst_entry *dst = skb->dst;
26 int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev)
23 - skb_headroom(skb); 27 - skb_headroom(skb);
24 28
25 if (nhead > 0) 29 if (nhead > 0)
@@ -29,54 +33,63 @@ static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
29 return 0; 33 return 0;
30} 34}
31 35
32static int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb) 36static int xfrm_output_one(struct sk_buff *skb, int err)
33{
34 int err = xfrm_state_check_expire(x);
35 if (err < 0)
36 goto err;
37 err = xfrm_state_check_space(x, skb);
38err:
39 return err;
40}
41
42int xfrm_output(struct sk_buff *skb)
43{ 37{
44 struct dst_entry *dst = skb->dst; 38 struct dst_entry *dst = skb->dst;
45 struct xfrm_state *x = dst->xfrm; 39 struct xfrm_state *x = dst->xfrm;
46 int err;
47 40
48 if (skb->ip_summed == CHECKSUM_PARTIAL) { 41 if (err <= 0)
49 err = skb_checksum_help(skb); 42 goto resume;
50 if (err)
51 goto error_nolock;
52 }
53 43
54 do { 44 do {
45 err = xfrm_state_check_space(x, skb);
46 if (err) {
47 XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR);
48 goto error_nolock;
49 }
50
51 err = x->outer_mode->output(x, skb);
52 if (err) {
53 XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEMODEERROR);
54 goto error_nolock;
55 }
56
55 spin_lock_bh(&x->lock); 57 spin_lock_bh(&x->lock);
56 err = xfrm_state_check(x, skb); 58 err = xfrm_state_check_expire(x);
57 if (err) 59 if (err) {
60 XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEEXPIRED);
58 goto error; 61 goto error;
62 }
59 63
60 if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { 64 if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
61 XFRM_SKB_CB(skb)->seq = ++x->replay.oseq; 65 XFRM_SKB_CB(skb)->seq = ++x->replay.oseq;
66 if (unlikely(x->replay.oseq == 0)) {
67 x->replay.oseq--;
68 xfrm_audit_state_replay_overflow(x, skb);
69 err = -EOVERFLOW;
70 goto error;
71 }
62 if (xfrm_aevent_is_on()) 72 if (xfrm_aevent_is_on())
63 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); 73 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
64 } 74 }
65 75
66 err = x->outer_mode->output(x, skb);
67 if (err)
68 goto error;
69
70 x->curlft.bytes += skb->len; 76 x->curlft.bytes += skb->len;
71 x->curlft.packets++; 77 x->curlft.packets++;
72 78
73 spin_unlock_bh(&x->lock); 79 spin_unlock_bh(&x->lock);
74 80
75 err = x->type->output(x, skb); 81 err = x->type->output(x, skb);
76 if (err) 82 if (err == -EINPROGRESS)
83 goto out_exit;
84
85resume:
86 if (err) {
87 XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEPROTOERROR);
77 goto error_nolock; 88 goto error_nolock;
89 }
78 90
79 if (!(skb->dst = dst_pop(dst))) { 91 if (!(skb->dst = dst_pop(dst))) {
92 XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR);
80 err = -EHOSTUNREACH; 93 err = -EHOSTUNREACH;
81 goto error_nolock; 94 goto error_nolock;
82 } 95 }
@@ -86,10 +99,97 @@ int xfrm_output(struct sk_buff *skb)
86 99
87 err = 0; 100 err = 0;
88 101
89error_nolock: 102out_exit:
90 return err; 103 return err;
91error: 104error:
92 spin_unlock_bh(&x->lock); 105 spin_unlock_bh(&x->lock);
93 goto error_nolock; 106error_nolock:
107 kfree_skb(skb);
108 goto out_exit;
109}
110
111int xfrm_output_resume(struct sk_buff *skb, int err)
112{
113 while (likely((err = xfrm_output_one(skb, err)) == 0)) {
114 struct xfrm_state *x;
115
116 nf_reset(skb);
117
118 err = skb->dst->ops->local_out(skb);
119 if (unlikely(err != 1))
120 goto out;
121
122 x = skb->dst->xfrm;
123 if (!x)
124 return dst_output(skb);
125
126 err = nf_hook(x->inner_mode->afinfo->family,
127 NF_INET_POST_ROUTING, skb,
128 NULL, skb->dst->dev, xfrm_output2);
129 if (unlikely(err != 1))
130 goto out;
131 }
132
133 if (err == -EINPROGRESS)
134 err = 0;
135
136out:
137 return err;
138}
139EXPORT_SYMBOL_GPL(xfrm_output_resume);
140
141static int xfrm_output2(struct sk_buff *skb)
142{
143 return xfrm_output_resume(skb, 1);
144}
145
146static int xfrm_output_gso(struct sk_buff *skb)
147{
148 struct sk_buff *segs;
149
150 segs = skb_gso_segment(skb, 0);
151 kfree_skb(skb);
152 if (unlikely(IS_ERR(segs)))
153 return PTR_ERR(segs);
154
155 do {
156 struct sk_buff *nskb = segs->next;
157 int err;
158
159 segs->next = NULL;
160 err = xfrm_output2(segs);
161
162 if (unlikely(err)) {
163 while ((segs = nskb)) {
164 nskb = segs->next;
165 segs->next = NULL;
166 kfree_skb(segs);
167 }
168 return err;
169 }
170
171 segs = nskb;
172 } while (segs);
173
174 return 0;
175}
176
177int xfrm_output(struct sk_buff *skb)
178{
179 int err;
180
181 if (skb_is_gso(skb))
182 return xfrm_output_gso(skb);
183
184 if (skb->ip_summed == CHECKSUM_PARTIAL) {
185 err = skb_checksum_help(skb);
186 if (err) {
187 XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR);
188 kfree_skb(skb);
189 return err;
190 }
191 }
192
193 return xfrm_output2(skb);
94} 194}
95EXPORT_SYMBOL_GPL(xfrm_output); 195EXPORT_SYMBOL_GPL(xfrm_output);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 26b846e11bfb..47219f98053f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -13,6 +13,7 @@
13 * 13 *
14 */ 14 */
15 15
16#include <linux/err.h>
16#include <linux/slab.h> 17#include <linux/slab.h>
17#include <linux/kmod.h> 18#include <linux/kmod.h>
18#include <linux/list.h> 19#include <linux/list.h>
@@ -23,13 +24,23 @@
23#include <linux/netfilter.h> 24#include <linux/netfilter.h>
24#include <linux/module.h> 25#include <linux/module.h>
25#include <linux/cache.h> 26#include <linux/cache.h>
27#include <linux/audit.h>
28#include <net/dst.h>
26#include <net/xfrm.h> 29#include <net/xfrm.h>
27#include <net/ip.h> 30#include <net/ip.h>
31#ifdef CONFIG_XFRM_STATISTICS
32#include <net/snmp.h>
33#endif
28 34
29#include "xfrm_hash.h" 35#include "xfrm_hash.h"
30 36
31int sysctl_xfrm_larval_drop __read_mostly; 37int sysctl_xfrm_larval_drop __read_mostly;
32 38
39#ifdef CONFIG_XFRM_STATISTICS
40DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly;
41EXPORT_SYMBOL(xfrm_statistics);
42#endif
43
33DEFINE_MUTEX(xfrm_cfg_mutex); 44DEFINE_MUTEX(xfrm_cfg_mutex);
34EXPORT_SYMBOL(xfrm_cfg_mutex); 45EXPORT_SYMBOL(xfrm_cfg_mutex);
35 46
@@ -49,6 +60,7 @@ static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
49 60
50static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); 61static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
51static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); 62static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
63static void xfrm_init_pmtu(struct dst_entry *dst);
52 64
53static inline int 65static inline int
54__xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl) 66__xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
@@ -84,23 +96,27 @@ int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
84 return 0; 96 return 0;
85} 97}
86 98
87int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, 99static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
88 unsigned short family) 100 int family)
89{ 101{
90 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 102 xfrm_address_t *saddr = &x->props.saddr;
91 int err = 0; 103 xfrm_address_t *daddr = &x->id.daddr;
104 struct xfrm_policy_afinfo *afinfo;
105 struct dst_entry *dst;
106
107 if (x->type->flags & XFRM_TYPE_LOCAL_COADDR)
108 saddr = x->coaddr;
109 if (x->type->flags & XFRM_TYPE_REMOTE_COADDR)
110 daddr = x->coaddr;
92 111
112 afinfo = xfrm_policy_get_afinfo(family);
93 if (unlikely(afinfo == NULL)) 113 if (unlikely(afinfo == NULL))
94 return -EAFNOSUPPORT; 114 return ERR_PTR(-EAFNOSUPPORT);
95 115
96 if (likely(afinfo->dst_lookup != NULL)) 116 dst = afinfo->dst_lookup(tos, saddr, daddr);
97 err = afinfo->dst_lookup(dst, fl);
98 else
99 err = -EINVAL;
100 xfrm_policy_put_afinfo(afinfo); 117 xfrm_policy_put_afinfo(afinfo);
101 return err; 118 return dst;
102} 119}
103EXPORT_SYMBOL(xfrm_dst_lookup);
104 120
105static inline unsigned long make_jiffies(long secs) 121static inline unsigned long make_jiffies(long secs)
106{ 122{
@@ -196,9 +212,8 @@ struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
196 INIT_HLIST_NODE(&policy->byidx); 212 INIT_HLIST_NODE(&policy->byidx);
197 rwlock_init(&policy->lock); 213 rwlock_init(&policy->lock);
198 atomic_set(&policy->refcnt, 1); 214 atomic_set(&policy->refcnt, 1);
199 init_timer(&policy->timer); 215 setup_timer(&policy->timer, xfrm_policy_timer,
200 policy->timer.data = (unsigned long)policy; 216 (unsigned long)policy);
201 policy->timer.function = xfrm_policy_timer;
202 } 217 }
203 return policy; 218 return policy;
204} 219}
@@ -206,7 +221,7 @@ EXPORT_SYMBOL(xfrm_policy_alloc);
206 221
207/* Destroy xfrm_policy: descendant resources must be released to this moment. */ 222/* Destroy xfrm_policy: descendant resources must be released to this moment. */
208 223
209void __xfrm_policy_destroy(struct xfrm_policy *policy) 224void xfrm_policy_destroy(struct xfrm_policy *policy)
210{ 225{
211 BUG_ON(!policy->dead); 226 BUG_ON(!policy->dead);
212 227
@@ -218,7 +233,7 @@ void __xfrm_policy_destroy(struct xfrm_policy *policy)
218 security_xfrm_policy_free(policy); 233 security_xfrm_policy_free(policy);
219 kfree(policy); 234 kfree(policy);
220} 235}
221EXPORT_SYMBOL(__xfrm_policy_destroy); 236EXPORT_SYMBOL(xfrm_policy_destroy);
222 237
223static void xfrm_policy_gc_kill(struct xfrm_policy *policy) 238static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
224{ 239{
@@ -1230,24 +1245,185 @@ xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short fa
1230 return x; 1245 return x;
1231} 1246}
1232 1247
1233/* Allocate chain of dst_entry's, attach known xfrm's, calculate 1248static inline int xfrm_get_tos(struct flowi *fl, int family)
1234 * all the metrics... Shortly, bundle a bundle. 1249{
1235 */ 1250 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1251 int tos;
1236 1252
1237static int 1253 if (!afinfo)
1238xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, 1254 return -EINVAL;
1239 struct flowi *fl, struct dst_entry **dst_p, 1255
1240 unsigned short family) 1256 tos = afinfo->get_tos(fl);
1257
1258 xfrm_policy_put_afinfo(afinfo);
1259
1260 return tos;
1261}
1262
1263static inline struct xfrm_dst *xfrm_alloc_dst(int family)
1241{ 1264{
1242 int err;
1243 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1265 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1244 if (unlikely(afinfo == NULL)) 1266 struct xfrm_dst *xdst;
1267
1268 if (!afinfo)
1269 return ERR_PTR(-EINVAL);
1270
1271 xdst = dst_alloc(afinfo->dst_ops) ?: ERR_PTR(-ENOBUFS);
1272
1273 xfrm_policy_put_afinfo(afinfo);
1274
1275 return xdst;
1276}
1277
1278static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1279 int nfheader_len)
1280{
1281 struct xfrm_policy_afinfo *afinfo =
1282 xfrm_policy_get_afinfo(dst->ops->family);
1283 int err;
1284
1285 if (!afinfo)
1245 return -EINVAL; 1286 return -EINVAL;
1246 err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p); 1287
1288 err = afinfo->init_path(path, dst, nfheader_len);
1289
1247 xfrm_policy_put_afinfo(afinfo); 1290 xfrm_policy_put_afinfo(afinfo);
1291
1248 return err; 1292 return err;
1249} 1293}
1250 1294
1295static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
1296{
1297 struct xfrm_policy_afinfo *afinfo =
1298 xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
1299 int err;
1300
1301 if (!afinfo)
1302 return -EINVAL;
1303
1304 err = afinfo->fill_dst(xdst, dev);
1305
1306 xfrm_policy_put_afinfo(afinfo);
1307
1308 return err;
1309}
1310
1311/* Allocate chain of dst_entry's, attach known xfrm's, calculate
1312 * all the metrics... Shortly, bundle a bundle.
1313 */
1314
1315static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1316 struct xfrm_state **xfrm, int nx,
1317 struct flowi *fl,
1318 struct dst_entry *dst)
1319{
1320 unsigned long now = jiffies;
1321 struct net_device *dev;
1322 struct dst_entry *dst_prev = NULL;
1323 struct dst_entry *dst0 = NULL;
1324 int i = 0;
1325 int err;
1326 int header_len = 0;
1327 int nfheader_len = 0;
1328 int trailer_len = 0;
1329 int tos;
1330 int family = policy->selector.family;
1331
1332 tos = xfrm_get_tos(fl, family);
1333 err = tos;
1334 if (tos < 0)
1335 goto put_states;
1336
1337 dst_hold(dst);
1338
1339 for (; i < nx; i++) {
1340 struct xfrm_dst *xdst = xfrm_alloc_dst(family);
1341 struct dst_entry *dst1 = &xdst->u.dst;
1342
1343 err = PTR_ERR(xdst);
1344 if (IS_ERR(xdst)) {
1345 dst_release(dst);
1346 goto put_states;
1347 }
1348
1349 if (!dst_prev)
1350 dst0 = dst1;
1351 else {
1352 dst_prev->child = dst_clone(dst1);
1353 dst1->flags |= DST_NOHASH;
1354 }
1355
1356 xdst->route = dst;
1357 memcpy(&dst1->metrics, &dst->metrics, sizeof(dst->metrics));
1358
1359 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
1360 family = xfrm[i]->props.family;
1361 dst = xfrm_dst_lookup(xfrm[i], tos, family);
1362 err = PTR_ERR(dst);
1363 if (IS_ERR(dst))
1364 goto put_states;
1365 } else
1366 dst_hold(dst);
1367
1368 dst1->xfrm = xfrm[i];
1369 xdst->genid = xfrm[i]->genid;
1370
1371 dst1->obsolete = -1;
1372 dst1->flags |= DST_HOST;
1373 dst1->lastuse = now;
1374
1375 dst1->input = dst_discard;
1376 dst1->output = xfrm[i]->outer_mode->afinfo->output;
1377
1378 dst1->next = dst_prev;
1379 dst_prev = dst1;
1380
1381 header_len += xfrm[i]->props.header_len;
1382 if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
1383 nfheader_len += xfrm[i]->props.header_len;
1384 trailer_len += xfrm[i]->props.trailer_len;
1385 }
1386
1387 dst_prev->child = dst;
1388 dst0->path = dst;
1389
1390 err = -ENODEV;
1391 dev = dst->dev;
1392 if (!dev)
1393 goto free_dst;
1394
1395 /* Copy neighbout for reachability confirmation */
1396 dst0->neighbour = neigh_clone(dst->neighbour);
1397
1398 xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1399 xfrm_init_pmtu(dst_prev);
1400
1401 for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
1402 struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
1403
1404 err = xfrm_fill_dst(xdst, dev);
1405 if (err)
1406 goto free_dst;
1407
1408 dst_prev->header_len = header_len;
1409 dst_prev->trailer_len = trailer_len;
1410 header_len -= xdst->u.dst.xfrm->props.header_len;
1411 trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
1412 }
1413
1414out:
1415 return dst0;
1416
1417put_states:
1418 for (; i < nx; i++)
1419 xfrm_state_put(xfrm[i]);
1420free_dst:
1421 if (dst0)
1422 dst_free(dst0);
1423 dst0 = ERR_PTR(err);
1424 goto out;
1425}
1426
1251static int inline 1427static int inline
1252xfrm_dst_alloc_copy(void **target, void *src, int size) 1428xfrm_dst_alloc_copy(void **target, void *src, int size)
1253{ 1429{
@@ -1319,36 +1495,46 @@ restart:
1319 if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { 1495 if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
1320 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); 1496 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
1321 err = PTR_ERR(policy); 1497 err = PTR_ERR(policy);
1322 if (IS_ERR(policy)) 1498 if (IS_ERR(policy)) {
1499 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1323 goto dropdst; 1500 goto dropdst;
1501 }
1324 } 1502 }
1325 1503
1326 if (!policy) { 1504 if (!policy) {
1327 /* To accelerate a bit... */ 1505 /* To accelerate a bit... */
1328 if ((dst_orig->flags & DST_NOXFRM) || 1506 if ((dst_orig->flags & DST_NOXFRM) ||
1329 !xfrm_policy_count[XFRM_POLICY_OUT]) 1507 !xfrm_policy_count[XFRM_POLICY_OUT])
1330 return 0; 1508 goto nopol;
1331 1509
1332 policy = flow_cache_lookup(fl, dst_orig->ops->family, 1510 policy = flow_cache_lookup(fl, dst_orig->ops->family,
1333 dir, xfrm_policy_lookup); 1511 dir, xfrm_policy_lookup);
1334 err = PTR_ERR(policy); 1512 err = PTR_ERR(policy);
1335 if (IS_ERR(policy)) 1513 if (IS_ERR(policy)) {
1514 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1336 goto dropdst; 1515 goto dropdst;
1516 }
1337 } 1517 }
1338 1518
1339 if (!policy) 1519 if (!policy)
1340 return 0; 1520 goto nopol;
1341 1521
1342 family = dst_orig->ops->family; 1522 family = dst_orig->ops->family;
1343 policy->curlft.use_time = get_seconds();
1344 pols[0] = policy; 1523 pols[0] = policy;
1345 npols ++; 1524 npols ++;
1346 xfrm_nr += pols[0]->xfrm_nr; 1525 xfrm_nr += pols[0]->xfrm_nr;
1347 1526
1527 err = -ENOENT;
1528 if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP))
1529 goto error;
1530
1531 policy->curlft.use_time = get_seconds();
1532
1348 switch (policy->action) { 1533 switch (policy->action) {
1349 default: 1534 default:
1350 case XFRM_POLICY_BLOCK: 1535 case XFRM_POLICY_BLOCK:
1351 /* Prohibit the flow */ 1536 /* Prohibit the flow */
1537 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
1352 err = -EPERM; 1538 err = -EPERM;
1353 goto error; 1539 goto error;
1354 1540
@@ -1368,6 +1554,7 @@ restart:
1368 */ 1554 */
1369 dst = xfrm_find_bundle(fl, policy, family); 1555 dst = xfrm_find_bundle(fl, policy, family);
1370 if (IS_ERR(dst)) { 1556 if (IS_ERR(dst)) {
1557 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1371 err = PTR_ERR(dst); 1558 err = PTR_ERR(dst);
1372 goto error; 1559 goto error;
1373 } 1560 }
@@ -1382,10 +1569,12 @@ restart:
1382 XFRM_POLICY_OUT); 1569 XFRM_POLICY_OUT);
1383 if (pols[1]) { 1570 if (pols[1]) {
1384 if (IS_ERR(pols[1])) { 1571 if (IS_ERR(pols[1])) {
1572 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1385 err = PTR_ERR(pols[1]); 1573 err = PTR_ERR(pols[1]);
1386 goto error; 1574 goto error;
1387 } 1575 }
1388 if (pols[1]->action == XFRM_POLICY_BLOCK) { 1576 if (pols[1]->action == XFRM_POLICY_BLOCK) {
1577 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
1389 err = -EPERM; 1578 err = -EPERM;
1390 goto error; 1579 goto error;
1391 } 1580 }
@@ -1416,10 +1605,11 @@ restart:
1416 /* EREMOTE tells the caller to generate 1605 /* EREMOTE tells the caller to generate
1417 * a one-shot blackhole route. 1606 * a one-shot blackhole route.
1418 */ 1607 */
1608 XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1419 xfrm_pol_put(policy); 1609 xfrm_pol_put(policy);
1420 return -EREMOTE; 1610 return -EREMOTE;
1421 } 1611 }
1422 if (err == -EAGAIN && flags) { 1612 if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
1423 DECLARE_WAITQUEUE(wait, current); 1613 DECLARE_WAITQUEUE(wait, current);
1424 1614
1425 add_wait_queue(&km_waitq, &wait); 1615 add_wait_queue(&km_waitq, &wait);
@@ -1431,6 +1621,7 @@ restart:
1431 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); 1621 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1432 1622
1433 if (nx == -EAGAIN && signal_pending(current)) { 1623 if (nx == -EAGAIN && signal_pending(current)) {
1624 XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1434 err = -ERESTART; 1625 err = -ERESTART;
1435 goto error; 1626 goto error;
1436 } 1627 }
@@ -1441,8 +1632,10 @@ restart:
1441 } 1632 }
1442 err = nx; 1633 err = nx;
1443 } 1634 }
1444 if (err < 0) 1635 if (err < 0) {
1636 XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1445 goto error; 1637 goto error;
1638 }
1446 } 1639 }
1447 if (nx == 0) { 1640 if (nx == 0) {
1448 /* Flow passes not transformed. */ 1641 /* Flow passes not transformed. */
@@ -1450,13 +1643,10 @@ restart:
1450 return 0; 1643 return 0;
1451 } 1644 }
1452 1645
1453 dst = dst_orig; 1646 dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
1454 err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family); 1647 err = PTR_ERR(dst);
1455 1648 if (IS_ERR(dst)) {
1456 if (unlikely(err)) { 1649 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1457 int i;
1458 for (i=0; i<nx; i++)
1459 xfrm_state_put(xfrm[i]);
1460 goto error; 1650 goto error;
1461 } 1651 }
1462 1652
@@ -1477,6 +1667,10 @@ restart:
1477 if (dst) 1667 if (dst)
1478 dst_free(dst); 1668 dst_free(dst);
1479 1669
1670 if (pol_dead)
1671 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLDEAD);
1672 else
1673 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1480 err = -EHOSTUNREACH; 1674 err = -EHOSTUNREACH;
1481 goto error; 1675 goto error;
1482 } 1676 }
@@ -1489,6 +1683,7 @@ restart:
1489 write_unlock_bh(&policy->lock); 1683 write_unlock_bh(&policy->lock);
1490 if (dst) 1684 if (dst)
1491 dst_free(dst); 1685 dst_free(dst);
1686 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1492 goto error; 1687 goto error;
1493 } 1688 }
1494 1689
@@ -1508,6 +1703,12 @@ dropdst:
1508 dst_release(dst_orig); 1703 dst_release(dst_orig);
1509 *dst_p = NULL; 1704 *dst_p = NULL;
1510 return err; 1705 return err;
1706
1707nopol:
1708 err = -ENOENT;
1709 if (flags & XFRM_LOOKUP_ICMP)
1710 goto dropdst;
1711 return 0;
1511} 1712}
1512EXPORT_SYMBOL(__xfrm_lookup); 1713EXPORT_SYMBOL(__xfrm_lookup);
1513 1714
@@ -1591,8 +1792,8 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
1591 return start; 1792 return start;
1592} 1793}
1593 1794
1594int 1795int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
1595xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) 1796 unsigned int family, int reverse)
1596{ 1797{
1597 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1798 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1598 int err; 1799 int err;
@@ -1600,12 +1801,12 @@ xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family
1600 if (unlikely(afinfo == NULL)) 1801 if (unlikely(afinfo == NULL))
1601 return -EAFNOSUPPORT; 1802 return -EAFNOSUPPORT;
1602 1803
1603 afinfo->decode_session(skb, fl); 1804 afinfo->decode_session(skb, fl, reverse);
1604 err = security_xfrm_decode_session(skb, &fl->secid); 1805 err = security_xfrm_decode_session(skb, &fl->secid);
1605 xfrm_policy_put_afinfo(afinfo); 1806 xfrm_policy_put_afinfo(afinfo);
1606 return err; 1807 return err;
1607} 1808}
1608EXPORT_SYMBOL(xfrm_decode_session); 1809EXPORT_SYMBOL(__xfrm_decode_session);
1609 1810
1610static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp) 1811static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
1611{ 1812{
@@ -1627,12 +1828,20 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1627 int npols = 0; 1828 int npols = 0;
1628 int xfrm_nr; 1829 int xfrm_nr;
1629 int pi; 1830 int pi;
1831 int reverse;
1630 struct flowi fl; 1832 struct flowi fl;
1631 u8 fl_dir = policy_to_flow_dir(dir); 1833 u8 fl_dir;
1632 int xerr_idx = -1; 1834 int xerr_idx = -1;
1633 1835
1634 if (xfrm_decode_session(skb, &fl, family) < 0) 1836 reverse = dir & ~XFRM_POLICY_MASK;
1837 dir &= XFRM_POLICY_MASK;
1838 fl_dir = policy_to_flow_dir(dir);
1839
1840 if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
1841 XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
1635 return 0; 1842 return 0;
1843 }
1844
1636 nf_nat_decode_session(skb, &fl, family); 1845 nf_nat_decode_session(skb, &fl, family);
1637 1846
1638 /* First, check used SA against their selectors. */ 1847 /* First, check used SA against their selectors. */
@@ -1641,28 +1850,35 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1641 1850
1642 for (i=skb->sp->len-1; i>=0; i--) { 1851 for (i=skb->sp->len-1; i>=0; i--) {
1643 struct xfrm_state *x = skb->sp->xvec[i]; 1852 struct xfrm_state *x = skb->sp->xvec[i];
1644 if (!xfrm_selector_match(&x->sel, &fl, family)) 1853 if (!xfrm_selector_match(&x->sel, &fl, family)) {
1854 XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH);
1645 return 0; 1855 return 0;
1856 }
1646 } 1857 }
1647 } 1858 }
1648 1859
1649 pol = NULL; 1860 pol = NULL;
1650 if (sk && sk->sk_policy[dir]) { 1861 if (sk && sk->sk_policy[dir]) {
1651 pol = xfrm_sk_policy_lookup(sk, dir, &fl); 1862 pol = xfrm_sk_policy_lookup(sk, dir, &fl);
1652 if (IS_ERR(pol)) 1863 if (IS_ERR(pol)) {
1864 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1653 return 0; 1865 return 0;
1866 }
1654 } 1867 }
1655 1868
1656 if (!pol) 1869 if (!pol)
1657 pol = flow_cache_lookup(&fl, family, fl_dir, 1870 pol = flow_cache_lookup(&fl, family, fl_dir,
1658 xfrm_policy_lookup); 1871 xfrm_policy_lookup);
1659 1872
1660 if (IS_ERR(pol)) 1873 if (IS_ERR(pol)) {
1874 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1661 return 0; 1875 return 0;
1876 }
1662 1877
1663 if (!pol) { 1878 if (!pol) {
1664 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { 1879 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
1665 xfrm_secpath_reject(xerr_idx, skb, &fl); 1880 xfrm_secpath_reject(xerr_idx, skb, &fl);
1881 XFRM_INC_STATS(LINUX_MIB_XFRMINNOPOLS);
1666 return 0; 1882 return 0;
1667 } 1883 }
1668 return 1; 1884 return 1;
@@ -1678,8 +1894,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1678 &fl, family, 1894 &fl, family,
1679 XFRM_POLICY_IN); 1895 XFRM_POLICY_IN);
1680 if (pols[1]) { 1896 if (pols[1]) {
1681 if (IS_ERR(pols[1])) 1897 if (IS_ERR(pols[1])) {
1898 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1682 return 0; 1899 return 0;
1900 }
1683 pols[1]->curlft.use_time = get_seconds(); 1901 pols[1]->curlft.use_time = get_seconds();
1684 npols ++; 1902 npols ++;
1685 } 1903 }
@@ -1700,10 +1918,14 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1700 1918
1701 for (pi = 0; pi < npols; pi++) { 1919 for (pi = 0; pi < npols; pi++) {
1702 if (pols[pi] != pol && 1920 if (pols[pi] != pol &&
1703 pols[pi]->action != XFRM_POLICY_ALLOW) 1921 pols[pi]->action != XFRM_POLICY_ALLOW) {
1922 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
1704 goto reject; 1923 goto reject;
1705 if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) 1924 }
1925 if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
1926 XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR);
1706 goto reject_error; 1927 goto reject_error;
1928 }
1707 for (i = 0; i < pols[pi]->xfrm_nr; i++) 1929 for (i = 0; i < pols[pi]->xfrm_nr; i++)
1708 tpp[ti++] = &pols[pi]->xfrm_vec[i]; 1930 tpp[ti++] = &pols[pi]->xfrm_vec[i];
1709 } 1931 }
@@ -1725,16 +1947,20 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1725 if (k < -1) 1947 if (k < -1)
1726 /* "-2 - errored_index" returned */ 1948 /* "-2 - errored_index" returned */
1727 xerr_idx = -(2+k); 1949 xerr_idx = -(2+k);
1950 XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
1728 goto reject; 1951 goto reject;
1729 } 1952 }
1730 } 1953 }
1731 1954
1732 if (secpath_has_nontransport(sp, k, &xerr_idx)) 1955 if (secpath_has_nontransport(sp, k, &xerr_idx)) {
1956 XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
1733 goto reject; 1957 goto reject;
1958 }
1734 1959
1735 xfrm_pols_put(pols, npols); 1960 xfrm_pols_put(pols, npols);
1736 return 1; 1961 return 1;
1737 } 1962 }
1963 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
1738 1964
1739reject: 1965reject:
1740 xfrm_secpath_reject(xerr_idx, skb, &fl); 1966 xfrm_secpath_reject(xerr_idx, skb, &fl);
@@ -1748,8 +1974,11 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
1748{ 1974{
1749 struct flowi fl; 1975 struct flowi fl;
1750 1976
1751 if (xfrm_decode_session(skb, &fl, family) < 0) 1977 if (xfrm_decode_session(skb, &fl, family) < 0) {
1978 /* XXX: we should have something like FWDHDRERROR here. */
1979 XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
1752 return 0; 1980 return 0;
1981 }
1753 1982
1754 return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0; 1983 return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
1755} 1984}
@@ -1793,7 +2022,7 @@ static int stale_bundle(struct dst_entry *dst)
1793void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) 2022void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
1794{ 2023{
1795 while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { 2024 while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
1796 dst->dev = init_net.loopback_dev; 2025 dst->dev = dev->nd_net->loopback_dev;
1797 dev_hold(dst->dev); 2026 dev_hold(dst->dev);
1798 dev_put(dev); 2027 dev_put(dev);
1799 } 2028 }
@@ -1882,7 +2111,7 @@ static int xfrm_flush_bundles(void)
1882 return 0; 2111 return 0;
1883} 2112}
1884 2113
1885void xfrm_init_pmtu(struct dst_entry *dst) 2114static void xfrm_init_pmtu(struct dst_entry *dst)
1886{ 2115{
1887 do { 2116 do {
1888 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 2117 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
@@ -1903,8 +2132,6 @@ void xfrm_init_pmtu(struct dst_entry *dst)
1903 } while ((dst = dst->next)); 2132 } while ((dst = dst->next));
1904} 2133}
1905 2134
1906EXPORT_SYMBOL(xfrm_init_pmtu);
1907
1908/* Check that the bundle accepts the flow and its components are 2135/* Check that the bundle accepts the flow and its components are
1909 * still valid. 2136 * still valid.
1910 */ 2137 */
@@ -2082,6 +2309,16 @@ static struct notifier_block xfrm_dev_notifier = {
2082 0 2309 0
2083}; 2310};
2084 2311
2312#ifdef CONFIG_XFRM_STATISTICS
2313static int __init xfrm_statistics_init(void)
2314{
2315 if (snmp_mib_init((void **)xfrm_statistics,
2316 sizeof(struct linux_xfrm_mib)) < 0)
2317 return -ENOMEM;
2318 return 0;
2319}
2320#endif
2321
2085static void __init xfrm_policy_init(void) 2322static void __init xfrm_policy_init(void)
2086{ 2323{
2087 unsigned int hmask, sz; 2324 unsigned int hmask, sz;
@@ -2118,71 +2355,81 @@ static void __init xfrm_policy_init(void)
2118 2355
2119void __init xfrm_init(void) 2356void __init xfrm_init(void)
2120{ 2357{
2358#ifdef CONFIG_XFRM_STATISTICS
2359 xfrm_statistics_init();
2360#endif
2121 xfrm_state_init(); 2361 xfrm_state_init();
2122 xfrm_policy_init(); 2362 xfrm_policy_init();
2123 xfrm_input_init(); 2363 xfrm_input_init();
2364#ifdef CONFIG_XFRM_STATISTICS
2365 xfrm_proc_init();
2366#endif
2124} 2367}
2125 2368
2126#ifdef CONFIG_AUDITSYSCALL 2369#ifdef CONFIG_AUDITSYSCALL
2127static inline void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, 2370static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
2128 struct audit_buffer *audit_buf) 2371 struct audit_buffer *audit_buf)
2129{ 2372{
2130 if (xp->security) 2373 struct xfrm_sec_ctx *ctx = xp->security;
2374 struct xfrm_selector *sel = &xp->selector;
2375
2376 if (ctx)
2131 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", 2377 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2132 xp->security->ctx_alg, xp->security->ctx_doi, 2378 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2133 xp->security->ctx_str);
2134 2379
2135 switch(xp->selector.family) { 2380 switch(sel->family) {
2136 case AF_INET: 2381 case AF_INET:
2137 audit_log_format(audit_buf, " src=%u.%u.%u.%u dst=%u.%u.%u.%u", 2382 audit_log_format(audit_buf, " src=" NIPQUAD_FMT,
2138 NIPQUAD(xp->selector.saddr.a4), 2383 NIPQUAD(sel->saddr.a4));
2139 NIPQUAD(xp->selector.daddr.a4)); 2384 if (sel->prefixlen_s != 32)
2385 audit_log_format(audit_buf, " src_prefixlen=%d",
2386 sel->prefixlen_s);
2387 audit_log_format(audit_buf, " dst=" NIPQUAD_FMT,
2388 NIPQUAD(sel->daddr.a4));
2389 if (sel->prefixlen_d != 32)
2390 audit_log_format(audit_buf, " dst_prefixlen=%d",
2391 sel->prefixlen_d);
2140 break; 2392 break;
2141 case AF_INET6: 2393 case AF_INET6:
2142 { 2394 audit_log_format(audit_buf, " src=" NIP6_FMT,
2143 struct in6_addr saddr6, daddr6; 2395 NIP6(*(struct in6_addr *)sel->saddr.a6));
2144 2396 if (sel->prefixlen_s != 128)
2145 memcpy(&saddr6, xp->selector.saddr.a6, 2397 audit_log_format(audit_buf, " src_prefixlen=%d",
2146 sizeof(struct in6_addr)); 2398 sel->prefixlen_s);
2147 memcpy(&daddr6, xp->selector.daddr.a6, 2399 audit_log_format(audit_buf, " dst=" NIP6_FMT,
2148 sizeof(struct in6_addr)); 2400 NIP6(*(struct in6_addr *)sel->daddr.a6));
2149 audit_log_format(audit_buf, 2401 if (sel->prefixlen_d != 128)
2150 " src=" NIP6_FMT " dst=" NIP6_FMT, 2402 audit_log_format(audit_buf, " dst_prefixlen=%d",
2151 NIP6(saddr6), NIP6(daddr6)); 2403 sel->prefixlen_d);
2152 }
2153 break; 2404 break;
2154 } 2405 }
2155} 2406}
2156 2407
2157void 2408void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
2158xfrm_audit_policy_add(struct xfrm_policy *xp, int result, u32 auid, u32 sid) 2409 u32 auid, u32 secid)
2159{ 2410{
2160 struct audit_buffer *audit_buf; 2411 struct audit_buffer *audit_buf;
2161 extern int audit_enabled;
2162 2412
2163 if (audit_enabled == 0) 2413 audit_buf = xfrm_audit_start("SPD-add");
2164 return;
2165 audit_buf = xfrm_audit_start(auid, sid);
2166 if (audit_buf == NULL) 2414 if (audit_buf == NULL)
2167 return; 2415 return;
2168 audit_log_format(audit_buf, " op=SPD-add res=%u", result); 2416 xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
2417 audit_log_format(audit_buf, " res=%u", result);
2169 xfrm_audit_common_policyinfo(xp, audit_buf); 2418 xfrm_audit_common_policyinfo(xp, audit_buf);
2170 audit_log_end(audit_buf); 2419 audit_log_end(audit_buf);
2171} 2420}
2172EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); 2421EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
2173 2422
2174void 2423void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
2175xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, u32 auid, u32 sid) 2424 u32 auid, u32 secid)
2176{ 2425{
2177 struct audit_buffer *audit_buf; 2426 struct audit_buffer *audit_buf;
2178 extern int audit_enabled;
2179 2427
2180 if (audit_enabled == 0) 2428 audit_buf = xfrm_audit_start("SPD-delete");
2181 return;
2182 audit_buf = xfrm_audit_start(auid, sid);
2183 if (audit_buf == NULL) 2429 if (audit_buf == NULL)
2184 return; 2430 return;
2185 audit_log_format(audit_buf, " op=SPD-delete res=%u", result); 2431 xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
2432 audit_log_format(audit_buf, " res=%u", result);
2186 xfrm_audit_common_policyinfo(xp, audit_buf); 2433 xfrm_audit_common_policyinfo(xp, audit_buf);
2187 audit_log_end(audit_buf); 2434 audit_log_end(audit_buf);
2188} 2435}
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
new file mode 100644
index 000000000000..31d035415ecd
--- /dev/null
+++ b/net/xfrm/xfrm_proc.c
@@ -0,0 +1,96 @@
1/*
2 * xfrm_proc.c
3 *
4 * Copyright (C)2006-2007 USAGI/WIDE Project
5 *
6 * Authors: Masahide NAKAMURA <nakam@linux-ipv6.org>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13#include <linux/proc_fs.h>
14#include <linux/seq_file.h>
15#include <net/snmp.h>
16#include <net/xfrm.h>
17
18static struct snmp_mib xfrm_mib_list[] = {
19 SNMP_MIB_ITEM("XfrmInError", LINUX_MIB_XFRMINERROR),
20 SNMP_MIB_ITEM("XfrmInBufferError", LINUX_MIB_XFRMINBUFFERERROR),
21 SNMP_MIB_ITEM("XfrmInHdrError", LINUX_MIB_XFRMINHDRERROR),
22 SNMP_MIB_ITEM("XfrmInNoStates", LINUX_MIB_XFRMINNOSTATES),
23 SNMP_MIB_ITEM("XfrmInStateProtoError", LINUX_MIB_XFRMINSTATEPROTOERROR),
24 SNMP_MIB_ITEM("XfrmInStateModeError", LINUX_MIB_XFRMINSTATEMODEERROR),
25 SNMP_MIB_ITEM("XfrmInSeqOutOfWindow", LINUX_MIB_XFRMINSEQOUTOFWINDOW),
26 SNMP_MIB_ITEM("XfrmInStateExpired", LINUX_MIB_XFRMINSTATEEXPIRED),
27 SNMP_MIB_ITEM("XfrmInStateMismatch", LINUX_MIB_XFRMINSTATEMISMATCH),
28 SNMP_MIB_ITEM("XfrmInStateInvalid", LINUX_MIB_XFRMINSTATEINVALID),
29 SNMP_MIB_ITEM("XfrmInTmplMismatch", LINUX_MIB_XFRMINTMPLMISMATCH),
30 SNMP_MIB_ITEM("XfrmInNoPols", LINUX_MIB_XFRMINNOPOLS),
31 SNMP_MIB_ITEM("XfrmInPolBlock", LINUX_MIB_XFRMINPOLBLOCK),
32 SNMP_MIB_ITEM("XfrmInPolError", LINUX_MIB_XFRMINPOLERROR),
33 SNMP_MIB_ITEM("XfrmOutError", LINUX_MIB_XFRMOUTERROR),
34 SNMP_MIB_ITEM("XfrmOutBundleGenError", LINUX_MIB_XFRMOUTBUNDLEGENERROR),
35 SNMP_MIB_ITEM("XfrmOutBundleCheckError", LINUX_MIB_XFRMOUTBUNDLECHECKERROR),
36 SNMP_MIB_ITEM("XfrmOutNoStates", LINUX_MIB_XFRMOUTNOSTATES),
37 SNMP_MIB_ITEM("XfrmOutStateProtoError", LINUX_MIB_XFRMOUTSTATEPROTOERROR),
38 SNMP_MIB_ITEM("XfrmOutStateModeError", LINUX_MIB_XFRMOUTSTATEMODEERROR),
39 SNMP_MIB_ITEM("XfrmOutStateExpired", LINUX_MIB_XFRMOUTSTATEEXPIRED),
40 SNMP_MIB_ITEM("XfrmOutPolBlock", LINUX_MIB_XFRMOUTPOLBLOCK),
41 SNMP_MIB_ITEM("XfrmOutPolDead", LINUX_MIB_XFRMOUTPOLDEAD),
42 SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR),
43 SNMP_MIB_SENTINEL
44};
45
46static unsigned long
47fold_field(void *mib[], int offt)
48{
49 unsigned long res = 0;
50 int i;
51
52 for_each_possible_cpu(i) {
53 res += *(((unsigned long *)per_cpu_ptr(mib[0], i)) + offt);
54 res += *(((unsigned long *)per_cpu_ptr(mib[1], i)) + offt);
55 }
56 return res;
57}
58
59static int xfrm_statistics_seq_show(struct seq_file *seq, void *v)
60{
61 int i;
62 for (i=0; xfrm_mib_list[i].name; i++)
63 seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name,
64 fold_field((void **)xfrm_statistics,
65 xfrm_mib_list[i].entry));
66 return 0;
67}
68
69static int xfrm_statistics_seq_open(struct inode *inode, struct file *file)
70{
71 return single_open(file, xfrm_statistics_seq_show, NULL);
72}
73
74static struct file_operations xfrm_statistics_seq_fops = {
75 .owner = THIS_MODULE,
76 .open = xfrm_statistics_seq_open,
77 .read = seq_read,
78 .llseek = seq_lseek,
79 .release = single_release,
80};
81
82int __init xfrm_proc_init(void)
83{
84 int rc = 0;
85
86 if (!proc_net_fops_create(&init_net, "xfrm_stat", S_IRUGO,
87 &xfrm_statistics_seq_fops))
88 goto stat_fail;
89
90 out:
91 return rc;
92
93 stat_fail:
94 rc = -ENOMEM;
95 goto out;
96}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index f26aaaca1fae..3003503d0c94 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -19,6 +19,7 @@
19#include <linux/ipsec.h> 19#include <linux/ipsec.h>
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/cache.h> 21#include <linux/cache.h>
22#include <linux/audit.h>
22#include <asm/uaccess.h> 23#include <asm/uaccess.h>
23 24
24#include "xfrm_hash.h" 25#include "xfrm_hash.h"
@@ -60,6 +61,13 @@ static unsigned int xfrm_state_genid;
60static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); 61static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
61static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); 62static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
62 63
64#ifdef CONFIG_AUDITSYSCALL
65static void xfrm_audit_state_replay(struct xfrm_state *x,
66 struct sk_buff *skb, __be32 net_seq);
67#else
68#define xfrm_audit_state_replay(x, s, sq) do { ; } while (0)
69#endif /* CONFIG_AUDITSYSCALL */
70
63static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, 71static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
64 xfrm_address_t *saddr, 72 xfrm_address_t *saddr,
65 u32 reqid, 73 u32 reqid,
@@ -203,6 +211,7 @@ static struct xfrm_state_afinfo *xfrm_state_lock_afinfo(unsigned int family)
203} 211}
204 212
205static void xfrm_state_unlock_afinfo(struct xfrm_state_afinfo *afinfo) 213static void xfrm_state_unlock_afinfo(struct xfrm_state_afinfo *afinfo)
214 __releases(xfrm_state_afinfo_lock)
206{ 215{
207 write_unlock_bh(&xfrm_state_afinfo_lock); 216 write_unlock_bh(&xfrm_state_afinfo_lock);
208} 217}
@@ -504,12 +513,9 @@ struct xfrm_state *xfrm_state_alloc(void)
504 INIT_HLIST_NODE(&x->bydst); 513 INIT_HLIST_NODE(&x->bydst);
505 INIT_HLIST_NODE(&x->bysrc); 514 INIT_HLIST_NODE(&x->bysrc);
506 INIT_HLIST_NODE(&x->byspi); 515 INIT_HLIST_NODE(&x->byspi);
507 init_timer(&x->timer); 516 setup_timer(&x->timer, xfrm_timer_handler, (unsigned long)x);
508 x->timer.function = xfrm_timer_handler; 517 setup_timer(&x->rtimer, xfrm_replay_timer_handler,
509 x->timer.data = (unsigned long)x; 518 (unsigned long)x);
510 init_timer(&x->rtimer);
511 x->rtimer.function = xfrm_replay_timer_handler;
512 x->rtimer.data = (unsigned long)x;
513 x->curlft.add_time = get_seconds(); 519 x->curlft.add_time = get_seconds();
514 x->lft.soft_byte_limit = XFRM_INF; 520 x->lft.soft_byte_limit = XFRM_INF;
515 x->lft.soft_packet_limit = XFRM_INF; 521 x->lft.soft_packet_limit = XFRM_INF;
@@ -759,7 +765,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
759 struct xfrm_policy *pol, int *err, 765 struct xfrm_policy *pol, int *err,
760 unsigned short family) 766 unsigned short family)
761{ 767{
762 unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family); 768 unsigned int h;
763 struct hlist_node *entry; 769 struct hlist_node *entry;
764 struct xfrm_state *x, *x0; 770 struct xfrm_state *x, *x0;
765 int acquire_in_progress = 0; 771 int acquire_in_progress = 0;
@@ -767,6 +773,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
767 struct xfrm_state *best = NULL; 773 struct xfrm_state *best = NULL;
768 774
769 spin_lock_bh(&xfrm_state_lock); 775 spin_lock_bh(&xfrm_state_lock);
776 h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
770 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { 777 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
771 if (x->props.family == family && 778 if (x->props.family == family &&
772 x->props.reqid == tmpl->reqid && 779 x->props.reqid == tmpl->reqid &&
@@ -868,11 +875,12 @@ struct xfrm_state *
868xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 875xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
869 unsigned short family, u8 mode, u8 proto, u32 reqid) 876 unsigned short family, u8 mode, u8 proto, u32 reqid)
870{ 877{
871 unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family); 878 unsigned int h;
872 struct xfrm_state *rx = NULL, *x = NULL; 879 struct xfrm_state *rx = NULL, *x = NULL;
873 struct hlist_node *entry; 880 struct hlist_node *entry;
874 881
875 spin_lock(&xfrm_state_lock); 882 spin_lock(&xfrm_state_lock);
883 h = xfrm_dst_hash(daddr, saddr, reqid, family);
876 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { 884 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
877 if (x->props.family == family && 885 if (x->props.family == family &&
878 x->props.reqid == reqid && 886 x->props.reqid == reqid &&
@@ -1092,7 +1100,7 @@ out:
1092EXPORT_SYMBOL(xfrm_state_add); 1100EXPORT_SYMBOL(xfrm_state_add);
1093 1101
1094#ifdef CONFIG_XFRM_MIGRATE 1102#ifdef CONFIG_XFRM_MIGRATE
1095struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) 1103static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
1096{ 1104{
1097 int err = -ENOMEM; 1105 int err = -ENOMEM;
1098 struct xfrm_state *x = xfrm_state_alloc(); 1106 struct xfrm_state *x = xfrm_state_alloc();
@@ -1167,7 +1175,6 @@ struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
1167 kfree(x); 1175 kfree(x);
1168 return NULL; 1176 return NULL;
1169} 1177}
1170EXPORT_SYMBOL(xfrm_state_clone);
1171 1178
1172/* xfrm_state_lock is held */ 1179/* xfrm_state_lock is held */
1173struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) 1180struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
@@ -1609,13 +1616,14 @@ static void xfrm_replay_timer_handler(unsigned long data)
1609 spin_unlock(&x->lock); 1616 spin_unlock(&x->lock);
1610} 1617}
1611 1618
1612int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq) 1619int xfrm_replay_check(struct xfrm_state *x,
1620 struct sk_buff *skb, __be32 net_seq)
1613{ 1621{
1614 u32 diff; 1622 u32 diff;
1615 u32 seq = ntohl(net_seq); 1623 u32 seq = ntohl(net_seq);
1616 1624
1617 if (unlikely(seq == 0)) 1625 if (unlikely(seq == 0))
1618 return -EINVAL; 1626 goto err;
1619 1627
1620 if (likely(seq > x->replay.seq)) 1628 if (likely(seq > x->replay.seq))
1621 return 0; 1629 return 0;
@@ -1624,14 +1632,18 @@ int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
1624 if (diff >= min_t(unsigned int, x->props.replay_window, 1632 if (diff >= min_t(unsigned int, x->props.replay_window,
1625 sizeof(x->replay.bitmap) * 8)) { 1633 sizeof(x->replay.bitmap) * 8)) {
1626 x->stats.replay_window++; 1634 x->stats.replay_window++;
1627 return -EINVAL; 1635 goto err;
1628 } 1636 }
1629 1637
1630 if (x->replay.bitmap & (1U << diff)) { 1638 if (x->replay.bitmap & (1U << diff)) {
1631 x->stats.replay++; 1639 x->stats.replay++;
1632 return -EINVAL; 1640 goto err;
1633 } 1641 }
1634 return 0; 1642 return 0;
1643
1644err:
1645 xfrm_audit_state_replay(x, skb, net_seq);
1646 return -EINVAL;
1635} 1647}
1636EXPORT_SYMBOL(xfrm_replay_check); 1648EXPORT_SYMBOL(xfrm_replay_check);
1637 1649
@@ -1657,7 +1669,7 @@ void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1657} 1669}
1658EXPORT_SYMBOL(xfrm_replay_advance); 1670EXPORT_SYMBOL(xfrm_replay_advance);
1659 1671
1660static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list); 1672static LIST_HEAD(xfrm_km_list);
1661static DEFINE_RWLOCK(xfrm_km_lock); 1673static DEFINE_RWLOCK(xfrm_km_lock);
1662 1674
1663void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) 1675void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
@@ -1897,6 +1909,7 @@ static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
1897} 1909}
1898 1910
1899static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) 1911static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1912 __releases(xfrm_state_afinfo_lock)
1900{ 1913{
1901 read_unlock(&xfrm_state_afinfo_lock); 1914 read_unlock(&xfrm_state_afinfo_lock);
1902} 1915}
@@ -1996,73 +2009,172 @@ void __init xfrm_state_init(void)
1996} 2009}
1997 2010
1998#ifdef CONFIG_AUDITSYSCALL 2011#ifdef CONFIG_AUDITSYSCALL
1999static inline void xfrm_audit_common_stateinfo(struct xfrm_state *x, 2012static void xfrm_audit_helper_sainfo(struct xfrm_state *x,
2000 struct audit_buffer *audit_buf) 2013 struct audit_buffer *audit_buf)
2001{ 2014{
2002 if (x->security) 2015 struct xfrm_sec_ctx *ctx = x->security;
2016 u32 spi = ntohl(x->id.spi);
2017
2018 if (ctx)
2003 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", 2019 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2004 x->security->ctx_alg, x->security->ctx_doi, 2020 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2005 x->security->ctx_str);
2006 2021
2007 switch(x->props.family) { 2022 switch(x->props.family) {
2008 case AF_INET: 2023 case AF_INET:
2009 audit_log_format(audit_buf, " src=%u.%u.%u.%u dst=%u.%u.%u.%u", 2024 audit_log_format(audit_buf,
2025 " src=" NIPQUAD_FMT " dst=" NIPQUAD_FMT,
2010 NIPQUAD(x->props.saddr.a4), 2026 NIPQUAD(x->props.saddr.a4),
2011 NIPQUAD(x->id.daddr.a4)); 2027 NIPQUAD(x->id.daddr.a4));
2012 break; 2028 break;
2013 case AF_INET6: 2029 case AF_INET6:
2014 { 2030 audit_log_format(audit_buf,
2015 struct in6_addr saddr6, daddr6; 2031 " src=" NIP6_FMT " dst=" NIP6_FMT,
2016 2032 NIP6(*(struct in6_addr *)x->props.saddr.a6),
2017 memcpy(&saddr6, x->props.saddr.a6, 2033 NIP6(*(struct in6_addr *)x->id.daddr.a6));
2018 sizeof(struct in6_addr));
2019 memcpy(&daddr6, x->id.daddr.a6,
2020 sizeof(struct in6_addr));
2021 audit_log_format(audit_buf,
2022 " src=" NIP6_FMT " dst=" NIP6_FMT,
2023 NIP6(saddr6), NIP6(daddr6));
2024 }
2025 break; 2034 break;
2026 } 2035 }
2036
2037 audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
2027} 2038}
2028 2039
2029void 2040static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,
2030xfrm_audit_state_add(struct xfrm_state *x, int result, u32 auid, u32 sid) 2041 struct audit_buffer *audit_buf)
2042{
2043 struct iphdr *iph4;
2044 struct ipv6hdr *iph6;
2045
2046 switch (family) {
2047 case AF_INET:
2048 iph4 = ip_hdr(skb);
2049 audit_log_format(audit_buf,
2050 " src=" NIPQUAD_FMT " dst=" NIPQUAD_FMT,
2051 NIPQUAD(iph4->saddr),
2052 NIPQUAD(iph4->daddr));
2053 break;
2054 case AF_INET6:
2055 iph6 = ipv6_hdr(skb);
2056 audit_log_format(audit_buf,
2057 " src=" NIP6_FMT " dst=" NIP6_FMT
2058 " flowlbl=0x%x%x%x",
2059 NIP6(iph6->saddr),
2060 NIP6(iph6->daddr),
2061 iph6->flow_lbl[0] & 0x0f,
2062 iph6->flow_lbl[1],
2063 iph6->flow_lbl[2]);
2064 break;
2065 }
2066}
2067
2068void xfrm_audit_state_add(struct xfrm_state *x, int result,
2069 u32 auid, u32 secid)
2031{ 2070{
2032 struct audit_buffer *audit_buf; 2071 struct audit_buffer *audit_buf;
2033 u32 spi;
2034 extern int audit_enabled;
2035 2072
2036 if (audit_enabled == 0) 2073 audit_buf = xfrm_audit_start("SAD-add");
2074 if (audit_buf == NULL)
2037 return; 2075 return;
2038 audit_buf = xfrm_audit_start(auid, sid); 2076 xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
2077 xfrm_audit_helper_sainfo(x, audit_buf);
2078 audit_log_format(audit_buf, " res=%u", result);
2079 audit_log_end(audit_buf);
2080}
2081EXPORT_SYMBOL_GPL(xfrm_audit_state_add);
2082
2083void xfrm_audit_state_delete(struct xfrm_state *x, int result,
2084 u32 auid, u32 secid)
2085{
2086 struct audit_buffer *audit_buf;
2087
2088 audit_buf = xfrm_audit_start("SAD-delete");
2039 if (audit_buf == NULL) 2089 if (audit_buf == NULL)
2040 return; 2090 return;
2041 audit_log_format(audit_buf, " op=SAD-add res=%u",result); 2091 xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
2042 xfrm_audit_common_stateinfo(x, audit_buf); 2092 xfrm_audit_helper_sainfo(x, audit_buf);
2093 audit_log_format(audit_buf, " res=%u", result);
2094 audit_log_end(audit_buf);
2095}
2096EXPORT_SYMBOL_GPL(xfrm_audit_state_delete);
2097
2098void xfrm_audit_state_replay_overflow(struct xfrm_state *x,
2099 struct sk_buff *skb)
2100{
2101 struct audit_buffer *audit_buf;
2102 u32 spi;
2103
2104 audit_buf = xfrm_audit_start("SA-replay-overflow");
2105 if (audit_buf == NULL)
2106 return;
2107 xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2108 /* don't record the sequence number because it's inherent in this kind
2109 * of audit message */
2043 spi = ntohl(x->id.spi); 2110 spi = ntohl(x->id.spi);
2044 audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi); 2111 audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
2045 audit_log_end(audit_buf); 2112 audit_log_end(audit_buf);
2046} 2113}
2047EXPORT_SYMBOL_GPL(xfrm_audit_state_add); 2114EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow);
2048 2115
2049void 2116static void xfrm_audit_state_replay(struct xfrm_state *x,
2050xfrm_audit_state_delete(struct xfrm_state *x, int result, u32 auid, u32 sid) 2117 struct sk_buff *skb, __be32 net_seq)
2051{ 2118{
2052 struct audit_buffer *audit_buf; 2119 struct audit_buffer *audit_buf;
2053 u32 spi; 2120 u32 spi;
2054 extern int audit_enabled;
2055 2121
2056 if (audit_enabled == 0) 2122 audit_buf = xfrm_audit_start("SA-replayed-pkt");
2057 return;
2058 audit_buf = xfrm_audit_start(auid, sid);
2059 if (audit_buf == NULL) 2123 if (audit_buf == NULL)
2060 return; 2124 return;
2061 audit_log_format(audit_buf, " op=SAD-delete res=%u",result); 2125 xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2062 xfrm_audit_common_stateinfo(x, audit_buf);
2063 spi = ntohl(x->id.spi); 2126 spi = ntohl(x->id.spi);
2064 audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi); 2127 audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2128 spi, spi, ntohl(net_seq));
2065 audit_log_end(audit_buf); 2129 audit_log_end(audit_buf);
2066} 2130}
2067EXPORT_SYMBOL_GPL(xfrm_audit_state_delete); 2131
2132void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family)
2133{
2134 struct audit_buffer *audit_buf;
2135
2136 audit_buf = xfrm_audit_start("SA-notfound");
2137 if (audit_buf == NULL)
2138 return;
2139 xfrm_audit_helper_pktinfo(skb, family, audit_buf);
2140 audit_log_end(audit_buf);
2141}
2142EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound_simple);
2143
2144void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family,
2145 __be32 net_spi, __be32 net_seq)
2146{
2147 struct audit_buffer *audit_buf;
2148 u32 spi;
2149
2150 audit_buf = xfrm_audit_start("SA-notfound");
2151 if (audit_buf == NULL)
2152 return;
2153 xfrm_audit_helper_pktinfo(skb, family, audit_buf);
2154 spi = ntohl(net_spi);
2155 audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2156 spi, spi, ntohl(net_seq));
2157 audit_log_end(audit_buf);
2158}
2159EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound);
2160
2161void xfrm_audit_state_icvfail(struct xfrm_state *x,
2162 struct sk_buff *skb, u8 proto)
2163{
2164 struct audit_buffer *audit_buf;
2165 __be32 net_spi;
2166 __be32 net_seq;
2167
2168 audit_buf = xfrm_audit_start("SA-icv-failure");
2169 if (audit_buf == NULL)
2170 return;
2171 xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2172 if (xfrm_parse_spi(skb, proto, &net_spi, &net_seq) == 0) {
2173 u32 spi = ntohl(net_spi);
2174 audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2175 spi, spi, ntohl(net_seq));
2176 }
2177 audit_log_end(audit_buf);
2178}
2179EXPORT_SYMBOL_GPL(xfrm_audit_state_icvfail);
2068#endif /* CONFIG_AUDITSYSCALL */ 2180#endif /* CONFIG_AUDITSYSCALL */
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index c4f6419b1769..e0ccdf267813 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1043,7 +1043,7 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p,
1043 return xp; 1043 return xp;
1044 error: 1044 error:
1045 *errp = err; 1045 *errp = err;
1046 kfree(xp); 1046 xfrm_policy_destroy(xp);
1047 return NULL; 1047 return NULL;
1048} 1048}
1049 1049
@@ -1986,8 +1986,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
1986 if (x->coaddr) 1986 if (x->coaddr)
1987 l += nla_total_size(sizeof(*x->coaddr)); 1987 l += nla_total_size(sizeof(*x->coaddr));
1988 1988
1989 /* Must count this as this may become non-zero behind our back. */ 1989 /* Must count x->lastused as it may become non-zero behind our back. */
1990 l += nla_total_size(sizeof(x->lastused)); 1990 l += nla_total_size(sizeof(u64));
1991 1991
1992 return l; 1992 return l;
1993} 1993}
@@ -2420,7 +2420,7 @@ static void __exit xfrm_user_exit(void)
2420 xfrm_unregister_km(&netlink_mgr); 2420 xfrm_unregister_km(&netlink_mgr);
2421 rcu_assign_pointer(xfrm_nl, NULL); 2421 rcu_assign_pointer(xfrm_nl, NULL);
2422 synchronize_rcu(); 2422 synchronize_rcu();
2423 sock_release(nlsk->sk_socket); 2423 netlink_kernel_release(nlsk);
2424} 2424}
2425 2425
2426module_init(xfrm_user_init); 2426module_init(xfrm_user_init);