aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/Makefile4
-rw-r--r--net/8021q/vlan.c1
-rw-r--r--net/8021q/vlan_dev.c2
-rw-r--r--net/appletalk/ddp.c24
-rw-r--r--net/atm/br2684.c7
-rw-r--r--net/atm/clip.c1
-rw-r--r--net/atm/ioctl.c1
-rw-r--r--net/atm/lec.c10
-rw-r--r--net/atm/mpc.c3
-rw-r--r--net/atm/pppoatm.c1
-rw-r--r--net/atm/pvc.c2
-rw-r--r--net/atm/raw.c1
-rw-r--r--net/atm/resources.c1
-rw-r--r--net/atm/svc.c2
-rw-r--r--net/ax25/af_ax25.c7
-rw-r--r--net/ax25/ax25_route.c2
-rw-r--r--net/ax25/ax25_uid.c2
-rw-r--r--net/bluetooth/af_bluetooth.c5
-rw-r--r--net/bluetooth/bnep/core.c6
-rw-r--r--net/bluetooth/bnep/sock.c3
-rw-r--r--net/bluetooth/cmtp/sock.c3
-rw-r--r--net/bluetooth/hci_conn.c2
-rw-r--r--net/bluetooth/hci_sock.c3
-rw-r--r--net/bluetooth/hci_sysfs.c4
-rw-r--r--net/bluetooth/hidp/sock.c3
-rw-r--r--net/bluetooth/l2cap.c10
-rw-r--r--net/bluetooth/rfcomm/sock.c4
-rw-r--r--net/bluetooth/rfcomm/tty.c10
-rw-r--r--net/bluetooth/sco.c9
-rw-r--r--net/bridge/br.c1
-rw-r--r--net/bridge/br_device.c91
-rw-r--r--net/bridge/br_if.c60
-rw-r--r--net/bridge/br_input.c13
-rw-r--r--net/bridge/br_ioctl.c1
-rw-r--r--net/bridge/br_netfilter.c17
-rw-r--r--net/bridge/br_notify.c14
-rw-r--r--net/bridge/br_private.h7
-rw-r--r--net/bridge/br_stp_if.c5
-rw-r--r--net/bridge/br_sysfs_br.c1
-rw-r--r--net/bridge/br_sysfs_if.c5
-rw-r--r--net/bridge/netfilter/Kconfig6
-rw-r--r--net/bridge/netfilter/ebt_ip.c3
-rw-r--r--net/bridge/netfilter/ebt_log.c73
-rw-r--r--net/bridge/netfilter/ebt_stp.c5
-rw-r--r--net/bridge/netfilter/ebt_ulog.c53
-rw-r--r--net/bridge/netfilter/ebtables.c4
-rw-r--r--net/core/datagram.c36
-rw-r--r--net/core/dev.c14
-rw-r--r--net/core/dev_mcast.c2
-rw-r--r--net/core/dv.c3
-rw-r--r--net/core/ethtool.c1
-rw-r--r--net/core/filter.c118
-rw-r--r--net/core/flow.c8
-rw-r--r--net/core/net-sysfs.c105
-rw-r--r--net/core/netpoll.c1
-rw-r--r--net/core/pktgen.c13
-rw-r--r--net/core/scm.c1
-rw-r--r--net/core/skbuff.c42
-rw-r--r--net/core/sock.c22
-rw-r--r--net/core/stream.c10
-rw-r--r--net/core/utils.c4
-rw-r--r--net/core/wireless.c3
-rw-r--r--net/dccp/Makefile4
-rw-r--r--net/dccp/ackvec.c33
-rw-r--r--net/dccp/ackvec.h12
-rw-r--r--net/dccp/ccid.h2
-rw-r--r--net/dccp/dccp.h24
-rw-r--r--net/dccp/diag.c2
-rw-r--r--net/dccp/input.c79
-rw-r--r--net/dccp/ipv4.c306
-rw-r--r--net/dccp/ipv6.c1262
-rw-r--r--net/dccp/ipv6.h37
-rw-r--r--net/dccp/minisocks.c23
-rw-r--r--net/dccp/output.c47
-rw-r--r--net/dccp/proto.c56
-rw-r--r--net/decnet/af_decnet.c7
-rw-r--r--net/decnet/dn_dev.c1
-rw-r--r--net/decnet/dn_neigh.c13
-rw-r--r--net/decnet/dn_nsp_in.c17
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c2
-rw-r--r--net/econet/af_econet.c9
-rw-r--r--net/ethernet/eth.c7
-rw-r--r--net/ieee80211/ieee80211_crypt_wep.c61
-rw-r--r--net/ieee80211/ieee80211_rx.c19
-rw-r--r--net/ieee80211/ieee80211_tx.c2
-rw-r--r--net/ieee80211/ieee80211_wx.c2
-rw-r--r--net/ipv4/Kconfig8
-rw-r--r--net/ipv4/Makefile5
-rw-r--r--net/ipv4/af_inet.c20
-rw-r--r--net/ipv4/ah4.c1
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/esp4.c1
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/fib_hash.c1
-rw-r--r--net/ipv4/fib_rules.c1
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/fib_trie.c8
-rw-r--r--net/ipv4/icmp.c4
-rw-r--r--net/ipv4/igmp.c17
-rw-r--r--net/ipv4/inet_connection_sock.c25
-rw-r--r--net/ipv4/inet_diag.c251
-rw-r--r--net/ipv4/inet_hashtables.c178
-rw-r--r--net/ipv4/inet_timewait_sock.c5
-rw-r--r--net/ipv4/inetpeer.c7
-rw-r--r--net/ipv4/ip_fragment.c70
-rw-r--r--net/ipv4/ip_gre.c40
-rw-r--r--net/ipv4/ip_input.c16
-rw-r--r--net/ipv4/ip_options.c2
-rw-r--r--net/ipv4/ip_output.c46
-rw-r--r--net/ipv4/ip_sockglue.c20
-rw-r--r--net/ipv4/ipcomp.c1
-rw-r--r--net/ipv4/ipconfig.c2
-rw-r--r--net/ipv4/ipip.c23
-rw-r--r--net/ipv4/ipmr.c27
-rw-r--r--net/ipv4/ipvs/ip_vs_app.c28
-rw-r--r--net/ipv4/ipvs/ip_vs_conn.c22
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c7
-rw-r--r--net/ipv4/ipvs/ip_vs_ctl.c11
-rw-r--r--net/ipv4/ipvs/ip_vs_dh.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_est.c4
-rw-r--r--net/ipv4/ipvs/ip_vs_lblc.c29
-rw-r--r--net/ipv4/ipvs/ip_vs_lblcr.c29
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_ah.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_esp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c24
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_udp.c3
-rw-r--r--net/ipv4/ipvs/ip_vs_sched.c1
-rw-r--r--net/ipv4/ipvs/ip_vs_sh.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_sync.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_xmit.c2
-rw-r--r--net/ipv4/netfilter.c25
-rw-r--r--net/ipv4/netfilter/Kconfig10
-rw-r--r--net/ipv4/netfilter/Makefile1
-rw-r--r--net/ipv4/netfilter/arp_tables.c176
-rw-r--r--net/ipv4/netfilter/ip_conntrack_amanda.c4
-rw-r--r--net/ipv4/netfilter/ip_conntrack_ftp.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_pptp.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_irc.c10
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netbios_ns.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c36
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_generic.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_gre.c6
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_icmp.c50
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_sctp.c17
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c25
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_udp.c6
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c53
-rw-r--r--net/ipv4/netfilter/ip_nat_ftp.c2
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_pptp.c81
-rw-r--r--net/ipv4/netfilter/ip_nat_irc.c2
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_gre.c38
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_icmp.c34
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_tcp.c36
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_udp.c36
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_unknown.c16
-rw-r--r--net/ipv4/netfilter/ip_nat_snmp_basic.c2
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c124
-rw-r--r--net/ipv4/netfilter/ip_tables.c200
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c2
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c2
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c10
-rw-r--r--net/ipv4/netfilter/ipt_helper.c1
-rw-r--r--net/ipv4/netfilter/ipt_mac.c5
-rw-r--r--net/ipv4/netfilter/ipt_physdev.c1
-rw-r--r--net/ipv4/netfilter/ipt_policy.c170
-rw-r--r--net/ipv4/netfilter/ipt_recent.c20
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c76
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c97
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/raw.c1
-rw-r--r--net/ipv4/syncookies.c4
-rw-r--r--net/ipv4/sysctl_net_ipv4.c11
-rw-r--r--net/ipv4/tcp.c10
-rw-r--r--net/ipv4/tcp_bic.c85
-rw-r--r--net/ipv4/tcp_cong.c28
-rw-r--r--net/ipv4/tcp_cubic.c411
-rw-r--r--net/ipv4/tcp_input.c101
-rw-r--r--net/ipv4/tcp_ipv4.c270
-rw-r--r--net/ipv4/tcp_minisocks.c16
-rw-r--r--net/ipv4/tcp_output.c118
-rw-r--r--net/ipv4/tcp_vegas.c4
-rw-r--r--net/ipv4/udp.c24
-rw-r--r--net/ipv4/xfrm4_input.c31
-rw-r--r--net/ipv4/xfrm4_output.c72
-rw-r--r--net/ipv6/Makefile3
-rw-r--r--net/ipv6/addrconf.c5
-rw-r--r--net/ipv6/af_inet6.c95
-rw-r--r--net/ipv6/ah6.c1
-rw-r--r--net/ipv6/anycast.c1
-rw-r--r--net/ipv6/datagram.c1
-rw-r--r--net/ipv6/esp6.c1
-rw-r--r--net/ipv6/exthdrs.c23
-rw-r--r--net/ipv6/icmp.c4
-rw-r--r--net/ipv6/inet6_connection_sock.c200
-rw-r--r--net/ipv6/inet6_hashtables.c183
-rw-r--r--net/ipv6/ip6_flowlabel.c3
-rw-r--r--net/ipv6/ip6_input.c23
-rw-r--r--net/ipv6/ip6_output.c6
-rw-r--r--net/ipv6/ip6_tunnel.c27
-rw-r--r--net/ipv6/ipcomp6.c1
-rw-r--r--net/ipv6/ipv6_sockglue.c27
-rw-r--r--net/ipv6/mcast.c13
-rw-r--r--net/ipv6/netfilter.c29
-rw-r--r--net/ipv6/netfilter/Kconfig10
-rw-r--r--net/ipv6/netfilter/Makefile1
-rw-r--r--net/ipv6/netfilter/ip6_tables.c317
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c1
-rw-r--r--net/ipv6/netfilter/ip6t_ah.c3
-rw-r--r--net/ipv6/netfilter/ip6t_dst.c4
-rw-r--r--net/ipv6/netfilter/ip6t_esp.c3
-rw-r--r--net/ipv6/netfilter/ip6t_frag.c2
-rw-r--r--net/ipv6/netfilter/ip6t_hbh.c4
-rw-r--r--net/ipv6/netfilter/ip6t_mac.c5
-rw-r--r--net/ipv6/netfilter/ip6t_policy.c175
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c51
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c77
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c4
-rw-r--r--net/ipv6/raw.c16
-rw-r--r--net/ipv6/reassembly.c11
-rw-r--r--net/ipv6/route.c1
-rw-r--r--net/ipv6/sit.c26
-rw-r--r--net/ipv6/tcp_ipv6.c661
-rw-r--r--net/ipv6/udp.c18
-rw-r--r--net/ipv6/xfrm6_input.c21
-rw-r--r--net/ipv6/xfrm6_output.c76
-rw-r--r--net/ipv6/xfrm6_tunnel.c6
-rw-r--r--net/ipx/af_ipx.c7
-rw-r--r--net/irda/af_irda.c24
-rw-r--r--net/irda/irda_device.c1
-rw-r--r--net/irda/iriap.c2
-rw-r--r--net/irda/irias_object.c12
-rw-r--r--net/irda/irnet/irnet.h1
-rw-r--r--net/key/af_key.c205
-rw-r--r--net/llc/af_llc.c11
-rw-r--r--net/netfilter/Kconfig7
-rw-r--r--net/netfilter/Makefile3
-rw-r--r--net/netfilter/nf_conntrack_core.c239
-rw-r--r--net/netfilter/nf_conntrack_ftp.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c1653
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c18
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c95
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c14
-rw-r--r--net/netfilter/nf_conntrack_standalone.c66
-rw-r--r--net/netfilter/nfnetlink_log.c2
-rw-r--r--net/netfilter/nfnetlink_queue.c81
-rw-r--r--net/netlink/af_netlink.c9
-rw-r--r--net/netlink/genetlink.c2
-rw-r--r--net/netrom/af_netrom.c17
-rw-r--r--net/nonet.c5
-rw-r--r--net/packet/af_packet.c13
-rw-r--r--net/rose/af_rose.c4
-rw-r--r--net/rxrpc/connection.c14
-rw-r--r--net/sched/Kconfig2
-rw-r--r--net/sched/Makefile14
-rw-r--r--net/sched/act_api.c4
-rw-r--r--net/sched/act_gact.c (renamed from net/sched/gact.c)3
-rw-r--r--net/sched/act_ipt.c (renamed from net/sched/ipt.c)6
-rw-r--r--net/sched/act_mirred.c (renamed from net/sched/mirred.c)3
-rw-r--r--net/sched/act_pedit.c (renamed from net/sched/pedit.c)5
-rw-r--r--net/sched/act_police.c (renamed from net/sched/police.c)17
-rw-r--r--net/sched/act_simple.c (renamed from net/sched/simple.c)3
-rw-r--r--net/sched/ematch.c1
-rw-r--r--net/sched/sch_cbq.c4
-rw-r--r--net/sched/sch_hfsc.c12
-rw-r--r--net/sched/sch_htb.c4
-rw-r--r--net/sched/sch_netem.c49
-rw-r--r--net/sched/sch_prio.c7
-rw-r--r--net/sched/sch_teql.c13
-rw-r--r--net/sctp/associola.c81
-rw-r--r--net/sctp/input.c37
-rw-r--r--net/sctp/ipv6.c4
-rw-r--r--net/sctp/output.c17
-rw-r--r--net/sctp/protocol.c3
-rw-r--r--net/sctp/sm_make_chunk.c2
-rw-r--r--net/sctp/sm_sideeffect.c29
-rw-r--r--net/sctp/sm_statefuns.c20
-rw-r--r--net/sctp/socket.c696
-rw-r--r--net/sctp/transport.c32
-rw-r--r--net/socket.c245
-rw-r--r--net/sunrpc/auth.c2
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c10
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_mech.c10
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_seal.c11
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_token.c3
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_unseal.c2
-rw-r--r--net/sunrpc/auth_unix.c2
-rw-r--r--net/sunrpc/cache.c5
-rw-r--r--net/sunrpc/clnt.c114
-rw-r--r--net/sunrpc/pmap_clnt.c17
-rw-r--r--net/sunrpc/rpc_pipe.c69
-rw-r--r--net/sunrpc/sched.c222
-rw-r--r--net/sunrpc/sunrpc_syms.c4
-rw-r--r--net/sunrpc/svc.c9
-rw-r--r--net/sunrpc/svcauth_unix.c14
-rw-r--r--net/sunrpc/svcsock.c4
-rw-r--r--net/sunrpc/xdr.c21
-rw-r--r--net/sunrpc/xprt.c66
-rw-r--r--net/sunrpc/xprtsock.c31
-rw-r--r--net/unix/af_unix.c59
-rw-r--r--net/unix/garbage.c4
-rw-r--r--net/wanrouter/af_wanpipe.c7
-rw-r--r--net/wanrouter/wanmain.c1
-rw-r--r--net/x25/af_x25.c14
-rw-r--r--net/xfrm/xfrm_algo.c6
-rw-r--r--net/xfrm/xfrm_policy.c105
-rw-r--r--net/xfrm/xfrm_state.c9
-rw-r--r--net/xfrm/xfrm_user.c149
310 files changed, 9571 insertions, 3790 deletions
diff --git a/net/802/Makefile b/net/802/Makefile
index 01861929591a..977704a54f68 100644
--- a/net/802/Makefile
+++ b/net/802/Makefile
@@ -2,8 +2,6 @@
2# Makefile for the Linux 802.x protocol layers. 2# Makefile for the Linux 802.x protocol layers.
3# 3#
4 4
5obj-y := p8023.o
6
7# Check the p8022 selections against net/core/Makefile. 5# Check the p8022 selections against net/core/Makefile.
8obj-$(CONFIG_SYSCTL) += sysctl_net_802.o 6obj-$(CONFIG_SYSCTL) += sysctl_net_802.o
9obj-$(CONFIG_LLC) += p8022.o psnap.o 7obj-$(CONFIG_LLC) += p8022.o psnap.o
@@ -11,5 +9,5 @@ obj-$(CONFIG_TR) += p8022.o psnap.o tr.o sysctl_net_802.o
11obj-$(CONFIG_NET_FC) += fc.o 9obj-$(CONFIG_NET_FC) += fc.o
12obj-$(CONFIG_FDDI) += fddi.o 10obj-$(CONFIG_FDDI) += fddi.o
13obj-$(CONFIG_HIPPI) += hippi.o 11obj-$(CONFIG_HIPPI) += hippi.o
14obj-$(CONFIG_IPX) += p8022.o psnap.o 12obj-$(CONFIG_IPX) += p8022.o psnap.o p8023.o
15obj-$(CONFIG_ATALK) += p8022.o psnap.o 13obj-$(CONFIG_ATALK) += p8022.o psnap.o
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 67465b65abe4..fa76220708ce 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -19,6 +19,7 @@
19 */ 19 */
20 20
21#include <asm/uaccess.h> /* for copy_from_user */ 21#include <asm/uaccess.h> /* for copy_from_user */
22#include <linux/capability.h>
22#include <linux/module.h> 23#include <linux/module.h>
23#include <linux/netdevice.h> 24#include <linux/netdevice.h>
24#include <linux/skbuff.h> 25#include <linux/skbuff.h>
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index f2a8750bbf1d..0f604d227da2 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -214,7 +214,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
214 * This allows the VLAN to have a different MAC than the underlying 214 * This allows the VLAN to have a different MAC than the underlying
215 * device, and still route correctly. 215 * device, and still route correctly.
216 */ 216 */
217 if (memcmp(eth_hdr(skb)->h_dest, skb->dev->dev_addr, ETH_ALEN) == 0) { 217 if (!compare_ether_addr(eth_hdr(skb)->h_dest, skb->dev->dev_addr)) {
218 /* It is for our (changed) MAC-address! */ 218 /* It is for our (changed) MAC-address! */
219 skb->pkt_type = PACKET_HOST; 219 skb->pkt_type = PACKET_HOST;
220 } 220 }
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 7982656b9c83..697ac55e29dc 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -52,6 +52,7 @@
52 */ 52 */
53 53
54#include <linux/config.h> 54#include <linux/config.h>
55#include <linux/capability.h>
55#include <linux/module.h> 56#include <linux/module.h>
56#include <linux/if_arp.h> 57#include <linux/if_arp.h>
57#include <linux/termios.h> /* For TIOCOUTQ/INQ */ 58#include <linux/termios.h> /* For TIOCOUTQ/INQ */
@@ -63,7 +64,7 @@
63#include <linux/atalk.h> 64#include <linux/atalk.h>
64 65
65struct datalink_proto *ddp_dl, *aarp_dl; 66struct datalink_proto *ddp_dl, *aarp_dl;
66static struct proto_ops atalk_dgram_ops; 67static const struct proto_ops atalk_dgram_ops;
67 68
68/**************************************************************************\ 69/**************************************************************************\
69* * 70* *
@@ -1763,7 +1764,7 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
1763 */ 1764 */
1764static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 1765static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1765{ 1766{
1766 int rc = -EINVAL; 1767 int rc = -ENOIOCTLCMD;
1767 struct sock *sk = sock->sk; 1768 struct sock *sk = sock->sk;
1768 void __user *argp = (void __user *)arg; 1769 void __user *argp = (void __user *)arg;
1769 1770
@@ -1813,23 +1814,6 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1813 rc = atif_ioctl(cmd, argp); 1814 rc = atif_ioctl(cmd, argp);
1814 rtnl_unlock(); 1815 rtnl_unlock();
1815 break; 1816 break;
1816 /* Physical layer ioctl calls */
1817 case SIOCSIFLINK:
1818 case SIOCGIFHWADDR:
1819 case SIOCSIFHWADDR:
1820 case SIOCGIFFLAGS:
1821 case SIOCSIFFLAGS:
1822 case SIOCGIFTXQLEN:
1823 case SIOCSIFTXQLEN:
1824 case SIOCGIFMTU:
1825 case SIOCGIFCONF:
1826 case SIOCADDMULTI:
1827 case SIOCDELMULTI:
1828 case SIOCGIFCOUNT:
1829 case SIOCGIFINDEX:
1830 case SIOCGIFNAME:
1831 rc = dev_ioctl(cmd, argp);
1832 break;
1833 } 1817 }
1834 1818
1835 return rc; 1819 return rc;
@@ -1841,7 +1825,7 @@ static struct net_proto_family atalk_family_ops = {
1841 .owner = THIS_MODULE, 1825 .owner = THIS_MODULE,
1842}; 1826};
1843 1827
1844static struct proto_ops SOCKOPS_WRAPPED(atalk_dgram_ops) = { 1828static const struct proto_ops SOCKOPS_WRAPPED(atalk_dgram_ops) = {
1845 .family = PF_APPLETALK, 1829 .family = PF_APPLETALK,
1846 .owner = THIS_MODULE, 1830 .owner = THIS_MODULE,
1847 .release = atalk_release, 1831 .release = atalk_release,
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 72f3f7b8de80..680ccb12aae8 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -18,6 +18,7 @@ Author: Marcell GAL, 2000, XDSL Ltd, Hungary
18#include <net/arp.h> 18#include <net/arp.h>
19#include <linux/atm.h> 19#include <linux/atm.h>
20#include <linux/atmdev.h> 20#include <linux/atmdev.h>
21#include <linux/capability.h>
21#include <linux/seq_file.h> 22#include <linux/seq_file.h>
22 23
23#include <linux/atmbr2684.h> 24#include <linux/atmbr2684.h>
@@ -295,14 +296,14 @@ static inline __be16 br_type_trans(struct sk_buff *skb, struct net_device *dev)
295 unsigned char *rawp; 296 unsigned char *rawp;
296 eth = eth_hdr(skb); 297 eth = eth_hdr(skb);
297 298
298 if (*eth->h_dest & 1) { 299 if (is_multicast_ether_addr(eth->h_dest)) {
299 if (memcmp(eth->h_dest, dev->broadcast, ETH_ALEN) == 0) 300 if (!compare_ether_addr(eth->h_dest, dev->broadcast))
300 skb->pkt_type = PACKET_BROADCAST; 301 skb->pkt_type = PACKET_BROADCAST;
301 else 302 else
302 skb->pkt_type = PACKET_MULTICAST; 303 skb->pkt_type = PACKET_MULTICAST;
303 } 304 }
304 305
305 else if (memcmp(eth->h_dest, dev->dev_addr, ETH_ALEN)) 306 else if (compare_ether_addr(eth->h_dest, dev->dev_addr))
306 skb->pkt_type = PACKET_OTHERHOST; 307 skb->pkt_type = PACKET_OTHERHOST;
307 308
308 if (ntohs(eth->h_proto) >= 1536) 309 if (ntohs(eth->h_proto) >= 1536)
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 4f54c9a5e84a..73370de97539 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -19,6 +19,7 @@
19#include <linux/atmdev.h> 19#include <linux/atmdev.h>
20#include <linux/atmclip.h> 20#include <linux/atmclip.h>
21#include <linux/atmarp.h> 21#include <linux/atmarp.h>
22#include <linux/capability.h>
22#include <linux/ip.h> /* for net/route.h */ 23#include <linux/ip.h> /* for net/route.h */
23#include <linux/in.h> /* for struct sockaddr_in */ 24#include <linux/in.h> /* for struct sockaddr_in */
24#include <linux/if.h> /* for IFF_UP */ 25#include <linux/if.h> /* for IFF_UP */
diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index a150198b05a3..eb109af7eb4a 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -12,6 +12,7 @@
12#include <linux/atmdev.h> 12#include <linux/atmdev.h>
13#include <linux/atmclip.h> /* CLIP_*ENCAP */ 13#include <linux/atmclip.h> /* CLIP_*ENCAP */
14#include <linux/atmarp.h> /* manifest constants */ 14#include <linux/atmarp.h> /* manifest constants */
15#include <linux/capability.h>
15#include <linux/sonet.h> /* for ioctls */ 16#include <linux/sonet.h> /* for ioctls */
16#include <linux/atmsvc.h> 17#include <linux/atmsvc.h>
17#include <linux/atmmpc.h> 18#include <linux/atmmpc.h>
diff --git a/net/atm/lec.c b/net/atm/lec.c
index ad840b9afba8..c4fc722fef9a 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -7,6 +7,7 @@
7#include <linux/config.h> 7#include <linux/config.h>
8#include <linux/kernel.h> 8#include <linux/kernel.h>
9#include <linux/bitops.h> 9#include <linux/bitops.h>
10#include <linux/capability.h>
10 11
11/* We are ethernet device */ 12/* We are ethernet device */
12#include <linux/if_ether.h> 13#include <linux/if_ether.h>
@@ -1321,7 +1322,7 @@ static int lane2_associate_req (struct net_device *dev, u8 *lan_dst,
1321 struct sk_buff *skb; 1322 struct sk_buff *skb;
1322 struct lec_priv *priv = (struct lec_priv*)dev->priv; 1323 struct lec_priv *priv = (struct lec_priv*)dev->priv;
1323 1324
1324 if ( memcmp(lan_dst, dev->dev_addr, ETH_ALEN) != 0 ) 1325 if (compare_ether_addr(lan_dst, dev->dev_addr))
1325 return (0); /* not our mac address */ 1326 return (0); /* not our mac address */
1326 1327
1327 kfree(priv->tlvs); /* NULL if there was no previous association */ 1328 kfree(priv->tlvs); /* NULL if there was no previous association */
@@ -1798,7 +1799,7 @@ lec_arp_find(struct lec_priv *priv,
1798 1799
1799 to_return = priv->lec_arp_tables[place]; 1800 to_return = priv->lec_arp_tables[place];
1800 while(to_return) { 1801 while(to_return) {
1801 if (memcmp(mac_addr, to_return->mac_addr, ETH_ALEN) == 0) { 1802 if (!compare_ether_addr(mac_addr, to_return->mac_addr)) {
1802 return to_return; 1803 return to_return;
1803 } 1804 }
1804 to_return = to_return->next; 1805 to_return = to_return->next;
@@ -1811,8 +1812,7 @@ make_entry(struct lec_priv *priv, unsigned char *mac_addr)
1811{ 1812{
1812 struct lec_arp_table *to_return; 1813 struct lec_arp_table *to_return;
1813 1814
1814 to_return = (struct lec_arp_table *) kmalloc(sizeof(struct lec_arp_table), 1815 to_return = kmalloc(sizeof(struct lec_arp_table), GFP_ATOMIC);
1815 GFP_ATOMIC);
1816 if (!to_return) { 1816 if (!to_return) {
1817 printk("LEC: Arp entry kmalloc failed\n"); 1817 printk("LEC: Arp entry kmalloc failed\n");
1818 return NULL; 1818 return NULL;
@@ -2002,7 +2002,7 @@ lec_arp_resolve(struct lec_priv *priv, unsigned char *mac_to_find,
2002 return priv->mcast_vcc; 2002 return priv->mcast_vcc;
2003 break; 2003 break;
2004 case 2: /* LANE2 wants arp for multicast addresses */ 2004 case 2: /* LANE2 wants arp for multicast addresses */
2005 if ( memcmp(mac_to_find, bus_mac, ETH_ALEN) == 0) 2005 if (!compare_ether_addr(mac_to_find, bus_mac))
2006 return priv->mcast_vcc; 2006 return priv->mcast_vcc;
2007 break; 2007 break;
2008 default: 2008 default:
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 526d9531411f..c304ef1513b9 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -3,6 +3,7 @@
3#include <linux/timer.h> 3#include <linux/timer.h>
4#include <linux/init.h> 4#include <linux/init.h>
5#include <linux/bitops.h> 5#include <linux/bitops.h>
6#include <linux/capability.h>
6#include <linux/seq_file.h> 7#include <linux/seq_file.h>
7 8
8/* We are an ethernet device */ 9/* We are an ethernet device */
@@ -552,7 +553,7 @@ static int mpc_send_packet(struct sk_buff *skb, struct net_device *dev)
552 goto non_ip; /* Multi-Protocol Over ATM :-) */ 553 goto non_ip; /* Multi-Protocol Over ATM :-) */
553 554
554 while (i < mpc->number_of_mps_macs) { 555 while (i < mpc->number_of_mps_macs) {
555 if (memcmp(eth->h_dest, (mpc->mps_macs + i*ETH_ALEN), ETH_ALEN) == 0) 556 if (!compare_ether_addr(eth->h_dest, (mpc->mps_macs + i*ETH_ALEN)))
556 if ( send_via_shortcut(skb, mpc) == 0 ) /* try shortcut */ 557 if ( send_via_shortcut(skb, mpc) == 0 ) /* try shortcut */
557 return 0; /* success! */ 558 return 0; /* success! */
558 i++; 559 i++;
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index 58f4a2b5aebe..1489067c1e84 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -39,6 +39,7 @@
39#include <linux/skbuff.h> 39#include <linux/skbuff.h>
40#include <linux/atm.h> 40#include <linux/atm.h>
41#include <linux/atmdev.h> 41#include <linux/atmdev.h>
42#include <linux/capability.h>
42#include <linux/ppp_defs.h> 43#include <linux/ppp_defs.h>
43#include <linux/if_ppp.h> 44#include <linux/if_ppp.h>
44#include <linux/ppp_channel.h> 45#include <linux/ppp_channel.h>
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 2684a92da22b..f2c541774dcd 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -102,7 +102,7 @@ static int pvc_getname(struct socket *sock,struct sockaddr *sockaddr,
102} 102}
103 103
104 104
105static struct proto_ops pvc_proto_ops = { 105static const struct proto_ops pvc_proto_ops = {
106 .family = PF_ATMPVC, 106 .family = PF_ATMPVC,
107 .owner = THIS_MODULE, 107 .owner = THIS_MODULE,
108 108
diff --git a/net/atm/raw.c b/net/atm/raw.c
index 4a0466e91aa6..3e57b17ca523 100644
--- a/net/atm/raw.c
+++ b/net/atm/raw.c
@@ -6,6 +6,7 @@
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/sched.h> 7#include <linux/sched.h>
8#include <linux/atmdev.h> 8#include <linux/atmdev.h>
9#include <linux/capability.h>
9#include <linux/kernel.h> 10#include <linux/kernel.h>
10#include <linux/skbuff.h> 11#include <linux/skbuff.h>
11#include <linux/mm.h> 12#include <linux/mm.h>
diff --git a/net/atm/resources.c b/net/atm/resources.c
index c8c459fcb038..224190537c90 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -16,6 +16,7 @@
16#include <linux/kernel.h> /* for barrier */ 16#include <linux/kernel.h> /* for barrier */
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/bitops.h> 18#include <linux/bitops.h>
19#include <linux/capability.h>
19#include <linux/delay.h> 20#include <linux/delay.h>
20#include <net/sock.h> /* for struct sock */ 21#include <net/sock.h> /* for struct sock */
21 22
diff --git a/net/atm/svc.c b/net/atm/svc.c
index d7b266136bf6..3a180cfd7b48 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -613,7 +613,7 @@ static int svc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
613 return error; 613 return error;
614} 614}
615 615
616static struct proto_ops svc_proto_ops = { 616static const struct proto_ops svc_proto_ops = {
617 .family = PF_ATMSVC, 617 .family = PF_ATMSVC,
618 .owner = THIS_MODULE, 618 .owner = THIS_MODULE,
619 619
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 1b683f302657..dbf9b47681f7 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -14,6 +14,7 @@
14 * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr) 14 * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr)
15 */ 15 */
16#include <linux/config.h> 16#include <linux/config.h>
17#include <linux/capability.h>
17#include <linux/module.h> 18#include <linux/module.h>
18#include <linux/errno.h> 19#include <linux/errno.h>
19#include <linux/types.h> 20#include <linux/types.h>
@@ -54,7 +55,7 @@
54HLIST_HEAD(ax25_list); 55HLIST_HEAD(ax25_list);
55DEFINE_SPINLOCK(ax25_list_lock); 56DEFINE_SPINLOCK(ax25_list_lock);
56 57
57static struct proto_ops ax25_proto_ops; 58static const struct proto_ops ax25_proto_ops;
58 59
59static void ax25_free_sock(struct sock *sk) 60static void ax25_free_sock(struct sock *sk)
60{ 61{
@@ -1827,7 +1828,7 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1827 break; 1828 break;
1828 1829
1829 default: 1830 default:
1830 res = dev_ioctl(cmd, argp); 1831 res = -ENOIOCTLCMD;
1831 break; 1832 break;
1832 } 1833 }
1833 release_sock(sk); 1834 release_sock(sk);
@@ -1944,7 +1945,7 @@ static struct net_proto_family ax25_family_ops = {
1944 .owner = THIS_MODULE, 1945 .owner = THIS_MODULE,
1945}; 1946};
1946 1947
1947static struct proto_ops ax25_proto_ops = { 1948static const struct proto_ops ax25_proto_ops = {
1948 .family = PF_AX25, 1949 .family = PF_AX25,
1949 .owner = THIS_MODULE, 1950 .owner = THIS_MODULE,
1950 .release = ax25_release, 1951 .release = ax25_release,
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index b1e945bd6ed3..f04f8630fd28 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -11,6 +11,8 @@
11 * Copyright (C) Hans-Joachim Hetscher DD8NE (dd8ne@bnv-bamberg.de) 11 * Copyright (C) Hans-Joachim Hetscher DD8NE (dd8ne@bnv-bamberg.de)
12 * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr) 12 * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr)
13 */ 13 */
14
15#include <linux/capability.h>
14#include <linux/errno.h> 16#include <linux/errno.h>
15#include <linux/types.h> 17#include <linux/types.h>
16#include <linux/socket.h> 18#include <linux/socket.h>
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index d53cc8615865..b8b5854bce9a 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -6,6 +6,8 @@
6 * 6 *
7 * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) 7 * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk)
8 */ 8 */
9
10#include <linux/capability.h>
9#include <linux/errno.h> 11#include <linux/errno.h>
10#include <linux/types.h> 12#include <linux/types.h>
11#include <linux/socket.h> 13#include <linux/socket.h>
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index ea616e3fc98e..fb031fe9be9e 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -287,10 +287,9 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo)
287 timeo = schedule_timeout(timeo); 287 timeo = schedule_timeout(timeo);
288 lock_sock(sk); 288 lock_sock(sk);
289 289
290 if (sk->sk_err) { 290 err = sock_error(sk);
291 err = sock_error(sk); 291 if (err)
292 break; 292 break;
293 }
294 } 293 }
295 set_current_state(TASK_RUNNING); 294 set_current_state(TASK_RUNNING);
296 remove_wait_queue(sk->sk_sleep, &wait); 295 remove_wait_queue(sk->sk_sleep, &wait);
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 682bf20af52d..cbb20c32a6c8 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -75,7 +75,7 @@ static struct bnep_session *__bnep_get_session(u8 *dst)
75 75
76 list_for_each(p, &bnep_session_list) { 76 list_for_each(p, &bnep_session_list) {
77 s = list_entry(p, struct bnep_session, list); 77 s = list_entry(p, struct bnep_session, list);
78 if (!memcmp(dst, s->eh.h_source, ETH_ALEN)) 78 if (!compare_ether_addr(dst, s->eh.h_source))
79 return s; 79 return s;
80 } 80 }
81 return NULL; 81 return NULL;
@@ -420,10 +420,10 @@ static inline int bnep_tx_frame(struct bnep_session *s, struct sk_buff *skb)
420 iv[il++] = (struct kvec) { &type, 1 }; 420 iv[il++] = (struct kvec) { &type, 1 };
421 len++; 421 len++;
422 422
423 if (!memcmp(eh->h_dest, s->eh.h_source, ETH_ALEN)) 423 if (!compare_ether_addr(eh->h_dest, s->eh.h_source))
424 type |= 0x01; 424 type |= 0x01;
425 425
426 if (!memcmp(eh->h_source, s->eh.h_dest, ETH_ALEN)) 426 if (!compare_ether_addr(eh->h_source, s->eh.h_dest))
427 type |= 0x02; 427 type |= 0x02;
428 428
429 if (type) 429 if (type)
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 9778c6acd53b..2bfe796cf05d 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -32,6 +32,7 @@
32#include <linux/module.h> 32#include <linux/module.h>
33 33
34#include <linux/types.h> 34#include <linux/types.h>
35#include <linux/capability.h>
35#include <linux/errno.h> 36#include <linux/errno.h>
36#include <linux/kernel.h> 37#include <linux/kernel.h>
37#include <linux/sched.h> 38#include <linux/sched.h>
@@ -146,7 +147,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
146 return 0; 147 return 0;
147} 148}
148 149
149static struct proto_ops bnep_sock_ops = { 150static const struct proto_ops bnep_sock_ops = {
150 .family = PF_BLUETOOTH, 151 .family = PF_BLUETOOTH,
151 .owner = THIS_MODULE, 152 .owner = THIS_MODULE,
152 .release = bnep_sock_release, 153 .release = bnep_sock_release,
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index beb045bf5714..8f8fad23f78a 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -24,6 +24,7 @@
24#include <linux/module.h> 24#include <linux/module.h>
25 25
26#include <linux/types.h> 26#include <linux/types.h>
27#include <linux/capability.h>
27#include <linux/errno.h> 28#include <linux/errno.h>
28#include <linux/kernel.h> 29#include <linux/kernel.h>
29#include <linux/sched.h> 30#include <linux/sched.h>
@@ -137,7 +138,7 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
137 return -EINVAL; 138 return -EINVAL;
138} 139}
139 140
140static struct proto_ops cmtp_sock_ops = { 141static const struct proto_ops cmtp_sock_ops = {
141 .family = PF_BLUETOOTH, 142 .family = PF_BLUETOOTH,
142 .owner = THIS_MODULE, 143 .owner = THIS_MODULE,
143 .release = cmtp_sock_release, 144 .release = cmtp_sock_release,
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index a31244e58888..f812ed129e58 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -403,7 +403,7 @@ int hci_get_conn_list(void __user *arg)
403 403
404 size = sizeof(req) + req.conn_num * sizeof(*ci); 404 size = sizeof(req) + req.conn_num * sizeof(*ci);
405 405
406 if (!(cl = (void *) kmalloc(size, GFP_KERNEL))) 406 if (!(cl = kmalloc(size, GFP_KERNEL)))
407 return -ENOMEM; 407 return -ENOMEM;
408 408
409 if (!(hdev = hci_dev_get(req.dev_id))) { 409 if (!(hdev = hci_dev_get(req.dev_id))) {
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 1d6d0a15c099..bdb6458c6bd5 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -28,6 +28,7 @@
28#include <linux/module.h> 28#include <linux/module.h>
29 29
30#include <linux/types.h> 30#include <linux/types.h>
31#include <linux/capability.h>
31#include <linux/errno.h> 32#include <linux/errno.h>
32#include <linux/kernel.h> 33#include <linux/kernel.h>
33#include <linux/sched.h> 34#include <linux/sched.h>
@@ -575,7 +576,7 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname, char
575 return 0; 576 return 0;
576} 577}
577 578
578static struct proto_ops hci_sock_ops = { 579static const struct proto_ops hci_sock_ops = {
579 .family = PF_BLUETOOTH, 580 .family = PF_BLUETOOTH,
580 .owner = THIS_MODULE, 581 .owner = THIS_MODULE,
581 .release = hci_sock_release, 582 .release = hci_sock_release,
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index bd7568ac87fc..0ed38740388c 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -78,7 +78,7 @@ static struct class_device_attribute *bt_attrs[] = {
78}; 78};
79 79
80#ifdef CONFIG_HOTPLUG 80#ifdef CONFIG_HOTPLUG
81static int bt_hotplug(struct class_device *cdev, char **envp, int num_envp, char *buf, int size) 81static int bt_uevent(struct class_device *cdev, char **envp, int num_envp, char *buf, int size)
82{ 82{
83 struct hci_dev *hdev = class_get_devdata(cdev); 83 struct hci_dev *hdev = class_get_devdata(cdev);
84 int n, i = 0; 84 int n, i = 0;
@@ -107,7 +107,7 @@ struct class bt_class = {
107 .name = "bluetooth", 107 .name = "bluetooth",
108 .release = bt_release, 108 .release = bt_release,
109#ifdef CONFIG_HOTPLUG 109#ifdef CONFIG_HOTPLUG
110 .hotplug = bt_hotplug, 110 .uevent = bt_uevent,
111#endif 111#endif
112}; 112};
113 113
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index f8986f881431..b8f67761b886 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -24,6 +24,7 @@
24#include <linux/module.h> 24#include <linux/module.h>
25 25
26#include <linux/types.h> 26#include <linux/types.h>
27#include <linux/capability.h>
27#include <linux/errno.h> 28#include <linux/errno.h>
28#include <linux/kernel.h> 29#include <linux/kernel.h>
29#include <linux/sched.h> 30#include <linux/sched.h>
@@ -143,7 +144,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
143 return -EINVAL; 144 return -EINVAL;
144} 145}
145 146
146static struct proto_ops hidp_sock_ops = { 147static const struct proto_ops hidp_sock_ops = {
147 .family = PF_BLUETOOTH, 148 .family = PF_BLUETOOTH,
148 .owner = THIS_MODULE, 149 .owner = THIS_MODULE,
149 .release = hidp_sock_release, 150 .release = hidp_sock_release,
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index e3bb11ca4235..f6b4a8085357 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -28,6 +28,7 @@
28#include <linux/module.h> 28#include <linux/module.h>
29 29
30#include <linux/types.h> 30#include <linux/types.h>
31#include <linux/capability.h>
31#include <linux/errno.h> 32#include <linux/errno.h>
32#include <linux/kernel.h> 33#include <linux/kernel.h>
33#include <linux/sched.h> 34#include <linux/sched.h>
@@ -57,7 +58,7 @@
57 58
58#define VERSION "2.8" 59#define VERSION "2.8"
59 60
60static struct proto_ops l2cap_sock_ops; 61static const struct proto_ops l2cap_sock_ops;
61 62
62static struct bt_sock_list l2cap_sk_list = { 63static struct bt_sock_list l2cap_sk_list = {
63 .lock = RW_LOCK_UNLOCKED 64 .lock = RW_LOCK_UNLOCKED
@@ -767,8 +768,9 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
767 768
768 BT_DBG("sock %p, sk %p", sock, sk); 769 BT_DBG("sock %p, sk %p", sock, sk);
769 770
770 if (sk->sk_err) 771 err = sock_error(sk);
771 return sock_error(sk); 772 if (err)
773 return err;
772 774
773 if (msg->msg_flags & MSG_OOB) 775 if (msg->msg_flags & MSG_OOB)
774 return -EOPNOTSUPP; 776 return -EOPNOTSUPP;
@@ -2160,7 +2162,7 @@ static ssize_t l2cap_sysfs_show(struct class *dev, char *buf)
2160 2162
2161static CLASS_ATTR(l2cap, S_IRUGO, l2cap_sysfs_show, NULL); 2163static CLASS_ATTR(l2cap, S_IRUGO, l2cap_sysfs_show, NULL);
2162 2164
2163static struct proto_ops l2cap_sock_ops = { 2165static const struct proto_ops l2cap_sock_ops = {
2164 .family = PF_BLUETOOTH, 2166 .family = PF_BLUETOOTH,
2165 .owner = THIS_MODULE, 2167 .owner = THIS_MODULE,
2166 .release = l2cap_sock_release, 2168 .release = l2cap_sock_release,
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 6c34261b232e..757d2dd3b02f 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -58,7 +58,7 @@
58#define BT_DBG(D...) 58#define BT_DBG(D...)
59#endif 59#endif
60 60
61static struct proto_ops rfcomm_sock_ops; 61static const struct proto_ops rfcomm_sock_ops;
62 62
63static struct bt_sock_list rfcomm_sk_list = { 63static struct bt_sock_list rfcomm_sk_list = {
64 .lock = RW_LOCK_UNLOCKED 64 .lock = RW_LOCK_UNLOCKED
@@ -907,7 +907,7 @@ static ssize_t rfcomm_sock_sysfs_show(struct class *dev, char *buf)
907 907
908static CLASS_ATTR(rfcomm, S_IRUGO, rfcomm_sock_sysfs_show, NULL); 908static CLASS_ATTR(rfcomm, S_IRUGO, rfcomm_sock_sysfs_show, NULL);
909 909
910static struct proto_ops rfcomm_sock_ops = { 910static const struct proto_ops rfcomm_sock_ops = {
911 .family = PF_BLUETOOTH, 911 .family = PF_BLUETOOTH,
912 .owner = THIS_MODULE, 912 .owner = THIS_MODULE,
913 .release = rfcomm_sock_release, 913 .release = rfcomm_sock_release,
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 158a9c46d863..74368f79ee5d 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -34,6 +34,7 @@
34#include <linux/tty_driver.h> 34#include <linux/tty_driver.h>
35#include <linux/tty_flip.h> 35#include <linux/tty_flip.h>
36 36
37#include <linux/capability.h>
37#include <linux/slab.h> 38#include <linux/slab.h>
38#include <linux/skbuff.h> 39#include <linux/skbuff.h>
39 40
@@ -480,13 +481,8 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb)
480 BT_DBG("dlc %p tty %p len %d", dlc, tty, skb->len); 481 BT_DBG("dlc %p tty %p len %d", dlc, tty, skb->len);
481 482
482 if (test_bit(TTY_DONT_FLIP, &tty->flags)) { 483 if (test_bit(TTY_DONT_FLIP, &tty->flags)) {
483 register int i; 484 tty_buffer_request_room(tty, skb->len);
484 for (i = 0; i < skb->len; i++) { 485 tty_insert_flip_string(tty, skb->data, skb->len);
485 if (tty->flip.count >= TTY_FLIPBUF_SIZE)
486 tty_flip_buffer_push(tty);
487
488 tty_insert_flip_char(tty, skb->data[i], 0);
489 }
490 tty_flip_buffer_push(tty); 486 tty_flip_buffer_push(tty);
491 } else 487 } else
492 tty->ldisc.receive_buf(tty, skb->data, NULL, skb->len); 488 tty->ldisc.receive_buf(tty, skb->data, NULL, skb->len);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 9cb00dc6c08c..6b61323ce23c 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -56,7 +56,7 @@
56 56
57#define VERSION "0.5" 57#define VERSION "0.5"
58 58
59static struct proto_ops sco_sock_ops; 59static const struct proto_ops sco_sock_ops;
60 60
61static struct bt_sock_list sco_sk_list = { 61static struct bt_sock_list sco_sk_list = {
62 .lock = RW_LOCK_UNLOCKED 62 .lock = RW_LOCK_UNLOCKED
@@ -637,8 +637,9 @@ static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
637 637
638 BT_DBG("sock %p, sk %p", sock, sk); 638 BT_DBG("sock %p, sk %p", sock, sk);
639 639
640 if (sk->sk_err) 640 err = sock_error(sk);
641 return sock_error(sk); 641 if (err)
642 return err;
642 643
643 if (msg->msg_flags & MSG_OOB) 644 if (msg->msg_flags & MSG_OOB)
644 return -EOPNOTSUPP; 645 return -EOPNOTSUPP;
@@ -913,7 +914,7 @@ static ssize_t sco_sysfs_show(struct class *dev, char *buf)
913 914
914static CLASS_ATTR(sco, S_IRUGO, sco_sysfs_show, NULL); 915static CLASS_ATTR(sco, S_IRUGO, sco_sysfs_show, NULL);
915 916
916static struct proto_ops sco_sock_ops = { 917static const struct proto_ops sco_sock_ops = {
917 .family = PF_BLUETOOTH, 918 .family = PF_BLUETOOTH,
918 .owner = THIS_MODULE, 919 .owner = THIS_MODULE,
919 .release = sco_sock_release, 920 .release = sco_sock_release,
diff --git a/net/bridge/br.c b/net/bridge/br.c
index f8f184942aaf..188cc1ac49eb 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -67,3 +67,4 @@ EXPORT_SYMBOL(br_should_route_hook);
67module_init(br_init) 67module_init(br_init)
68module_exit(br_deinit) 68module_exit(br_deinit)
69MODULE_LICENSE("GPL"); 69MODULE_LICENSE("GPL");
70MODULE_VERSION(BR_VERSION);
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index f564ee99782d..0b33a7b3a00c 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -15,7 +15,9 @@
15 15
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/netdevice.h> 17#include <linux/netdevice.h>
18#include <linux/module.h> 18#include <linux/etherdevice.h>
19#include <linux/ethtool.h>
20
19#include <asm/uaccess.h> 21#include <asm/uaccess.h>
20#include "br_private.h" 22#include "br_private.h"
21 23
@@ -82,6 +84,87 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
82 return 0; 84 return 0;
83} 85}
84 86
87/* Allow setting mac address of pseudo-bridge to be same as
88 * any of the bound interfaces
89 */
90static int br_set_mac_address(struct net_device *dev, void *p)
91{
92 struct net_bridge *br = netdev_priv(dev);
93 struct sockaddr *addr = p;
94 struct net_bridge_port *port;
95 int err = -EADDRNOTAVAIL;
96
97 spin_lock_bh(&br->lock);
98 list_for_each_entry(port, &br->port_list, list) {
99 if (!compare_ether_addr(port->dev->dev_addr, addr->sa_data)) {
100 br_stp_change_bridge_id(br, addr->sa_data);
101 err = 0;
102 break;
103 }
104 }
105 spin_unlock_bh(&br->lock);
106
107 return err;
108}
109
110static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info)
111{
112 strcpy(info->driver, "bridge");
113 strcpy(info->version, BR_VERSION);
114 strcpy(info->fw_version, "N/A");
115 strcpy(info->bus_info, "N/A");
116}
117
118static int br_set_sg(struct net_device *dev, u32 data)
119{
120 struct net_bridge *br = netdev_priv(dev);
121
122 if (data)
123 br->feature_mask |= NETIF_F_SG;
124 else
125 br->feature_mask &= ~NETIF_F_SG;
126
127 br_features_recompute(br);
128 return 0;
129}
130
131static int br_set_tso(struct net_device *dev, u32 data)
132{
133 struct net_bridge *br = netdev_priv(dev);
134
135 if (data)
136 br->feature_mask |= NETIF_F_TSO;
137 else
138 br->feature_mask &= ~NETIF_F_TSO;
139
140 br_features_recompute(br);
141 return 0;
142}
143
144static int br_set_tx_csum(struct net_device *dev, u32 data)
145{
146 struct net_bridge *br = netdev_priv(dev);
147
148 if (data)
149 br->feature_mask |= NETIF_F_IP_CSUM;
150 else
151 br->feature_mask &= ~NETIF_F_IP_CSUM;
152
153 br_features_recompute(br);
154 return 0;
155}
156
157static struct ethtool_ops br_ethtool_ops = {
158 .get_drvinfo = br_getinfo,
159 .get_link = ethtool_op_get_link,
160 .get_sg = ethtool_op_get_sg,
161 .set_sg = br_set_sg,
162 .get_tx_csum = ethtool_op_get_tx_csum,
163 .set_tx_csum = br_set_tx_csum,
164 .get_tso = ethtool_op_get_tso,
165 .set_tso = br_set_tso,
166};
167
85void br_dev_setup(struct net_device *dev) 168void br_dev_setup(struct net_device *dev)
86{ 169{
87 memset(dev->dev_addr, 0, ETH_ALEN); 170 memset(dev->dev_addr, 0, ETH_ALEN);
@@ -96,8 +179,12 @@ void br_dev_setup(struct net_device *dev)
96 dev->change_mtu = br_change_mtu; 179 dev->change_mtu = br_change_mtu;
97 dev->destructor = free_netdev; 180 dev->destructor = free_netdev;
98 SET_MODULE_OWNER(dev); 181 SET_MODULE_OWNER(dev);
182 SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
99 dev->stop = br_dev_stop; 183 dev->stop = br_dev_stop;
100 dev->tx_queue_len = 0; 184 dev->tx_queue_len = 0;
101 dev->set_mac_address = NULL; 185 dev->set_mac_address = br_set_mac_address;
102 dev->priv_flags = IFF_EBRIDGE; 186 dev->priv_flags = IFF_EBRIDGE;
187
188 dev->features = NETIF_F_SG | NETIF_F_FRAGLIST
189 | NETIF_F_HIGHDMA | NETIF_F_TSO | NETIF_F_IP_CSUM;
103} 190}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 975abe254b7a..ba442883e877 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -20,6 +20,7 @@
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/init.h> 21#include <linux/init.h>
22#include <linux/rtnetlink.h> 22#include <linux/rtnetlink.h>
23#include <linux/if_ether.h>
23#include <net/sock.h> 24#include <net/sock.h>
24 25
25#include "br_private.h" 26#include "br_private.h"
@@ -32,9 +33,8 @@
32 * ethtool, use ethtool_ops. Also, since driver might sleep need to 33 * ethtool, use ethtool_ops. Also, since driver might sleep need to
33 * not be holding any locks. 34 * not be holding any locks.
34 */ 35 */
35static int br_initial_port_cost(struct net_device *dev) 36static int port_cost(struct net_device *dev)
36{ 37{
37
38 struct ethtool_cmd ecmd = { ETHTOOL_GSET }; 38 struct ethtool_cmd ecmd = { ETHTOOL_GSET };
39 struct ifreq ifr; 39 struct ifreq ifr;
40 mm_segment_t old_fs; 40 mm_segment_t old_fs;
@@ -58,10 +58,6 @@ static int br_initial_port_cost(struct net_device *dev)
58 return 2; 58 return 2;
59 case SPEED_10: 59 case SPEED_10:
60 return 100; 60 return 100;
61 default:
62 pr_info("bridge: can't decode speed from %s: %d\n",
63 dev->name, ecmd.speed);
64 return 100;
65 } 61 }
66 } 62 }
67 63
@@ -75,6 +71,35 @@ static int br_initial_port_cost(struct net_device *dev)
75 return 100; /* assume old 10Mbps */ 71 return 100; /* assume old 10Mbps */
76} 72}
77 73
74
75/*
76 * Check for port carrier transistions.
77 * Called from work queue to allow for calling functions that
78 * might sleep (such as speed check), and to debounce.
79 */
80static void port_carrier_check(void *arg)
81{
82 struct net_bridge_port *p = arg;
83
84 rtnl_lock();
85 if (netif_carrier_ok(p->dev)) {
86 u32 cost = port_cost(p->dev);
87
88 spin_lock_bh(&p->br->lock);
89 if (p->state == BR_STATE_DISABLED) {
90 p->path_cost = cost;
91 br_stp_enable_port(p);
92 }
93 spin_unlock_bh(&p->br->lock);
94 } else {
95 spin_lock_bh(&p->br->lock);
96 if (p->state != BR_STATE_DISABLED)
97 br_stp_disable_port(p);
98 spin_unlock_bh(&p->br->lock);
99 }
100 rtnl_unlock();
101}
102
78static void destroy_nbp(struct net_bridge_port *p) 103static void destroy_nbp(struct net_bridge_port *p)
79{ 104{
80 struct net_device *dev = p->dev; 105 struct net_device *dev = p->dev;
@@ -102,6 +127,9 @@ static void del_nbp(struct net_bridge_port *p)
102 dev->br_port = NULL; 127 dev->br_port = NULL;
103 dev_set_promiscuity(dev, -1); 128 dev_set_promiscuity(dev, -1);
104 129
130 cancel_delayed_work(&p->carrier_check);
131 flush_scheduled_work();
132
105 spin_lock_bh(&br->lock); 133 spin_lock_bh(&br->lock);
106 br_stp_disable_port(p); 134 br_stp_disable_port(p);
107 spin_unlock_bh(&br->lock); 135 spin_unlock_bh(&br->lock);
@@ -155,6 +183,7 @@ static struct net_device *new_bridge_dev(const char *name)
155 br->bridge_id.prio[1] = 0x00; 183 br->bridge_id.prio[1] = 0x00;
156 memset(br->bridge_id.addr, 0, ETH_ALEN); 184 memset(br->bridge_id.addr, 0, ETH_ALEN);
157 185
186 br->feature_mask = dev->features;
158 br->stp_enabled = 0; 187 br->stp_enabled = 0;
159 br->designated_root = br->bridge_id; 188 br->designated_root = br->bridge_id;
160 br->root_path_cost = 0; 189 br->root_path_cost = 0;
@@ -195,10 +224,9 @@ static int find_portno(struct net_bridge *br)
195 return (index >= BR_MAX_PORTS) ? -EXFULL : index; 224 return (index >= BR_MAX_PORTS) ? -EXFULL : index;
196} 225}
197 226
198/* called with RTNL */ 227/* called with RTNL but without bridge lock */
199static struct net_bridge_port *new_nbp(struct net_bridge *br, 228static struct net_bridge_port *new_nbp(struct net_bridge *br,
200 struct net_device *dev, 229 struct net_device *dev)
201 unsigned long cost)
202{ 230{
203 int index; 231 int index;
204 struct net_bridge_port *p; 232 struct net_bridge_port *p;
@@ -215,12 +243,13 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
215 p->br = br; 243 p->br = br;
216 dev_hold(dev); 244 dev_hold(dev);
217 p->dev = dev; 245 p->dev = dev;
218 p->path_cost = cost; 246 p->path_cost = port_cost(dev);
219 p->priority = 0x8000 >> BR_PORT_BITS; 247 p->priority = 0x8000 >> BR_PORT_BITS;
220 dev->br_port = p; 248 dev->br_port = p;
221 p->port_no = index; 249 p->port_no = index;
222 br_init_port(p); 250 br_init_port(p);
223 p->state = BR_STATE_DISABLED; 251 p->state = BR_STATE_DISABLED;
252 INIT_WORK(&p->carrier_check, port_carrier_check, p);
224 kobject_init(&p->kobj); 253 kobject_init(&p->kobj);
225 254
226 return p; 255 return p;
@@ -295,7 +324,7 @@ int br_del_bridge(const char *name)
295 return ret; 324 return ret;
296} 325}
297 326
298/* Mtu of the bridge pseudo-device 1500 or the minimum of the ports */ 327/* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */
299int br_min_mtu(const struct net_bridge *br) 328int br_min_mtu(const struct net_bridge *br)
300{ 329{
301 const struct net_bridge_port *p; 330 const struct net_bridge_port *p;
@@ -304,7 +333,7 @@ int br_min_mtu(const struct net_bridge *br)
304 ASSERT_RTNL(); 333 ASSERT_RTNL();
305 334
306 if (list_empty(&br->port_list)) 335 if (list_empty(&br->port_list))
307 mtu = 1500; 336 mtu = ETH_DATA_LEN;
308 else { 337 else {
309 list_for_each_entry(p, &br->port_list, list) { 338 list_for_each_entry(p, &br->port_list, list) {
310 if (!mtu || p->dev->mtu < mtu) 339 if (!mtu || p->dev->mtu < mtu)
@@ -322,9 +351,8 @@ void br_features_recompute(struct net_bridge *br)
322 struct net_bridge_port *p; 351 struct net_bridge_port *p;
323 unsigned long features, checksum; 352 unsigned long features, checksum;
324 353
325 features = NETIF_F_SG | NETIF_F_FRAGLIST 354 features = br->feature_mask &~ NETIF_F_IP_CSUM;
326 | NETIF_F_HIGHDMA | NETIF_F_TSO; 355 checksum = br->feature_mask & NETIF_F_IP_CSUM;
327 checksum = NETIF_F_IP_CSUM; /* least commmon subset */
328 356
329 list_for_each_entry(p, &br->port_list, list) { 357 list_for_each_entry(p, &br->port_list, list) {
330 if (!(p->dev->features 358 if (!(p->dev->features
@@ -351,7 +379,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
351 if (dev->br_port != NULL) 379 if (dev->br_port != NULL)
352 return -EBUSY; 380 return -EBUSY;
353 381
354 if (IS_ERR(p = new_nbp(br, dev, br_initial_port_cost(dev)))) 382 if (IS_ERR(p = new_nbp(br, dev)))
355 return PTR_ERR(p); 383 return PTR_ERR(p);
356 384
357 if ((err = br_fdb_insert(br, p, dev->dev_addr))) 385 if ((err = br_fdb_insert(br, p, dev->dev_addr)))
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index b88220a64cd8..e3a73cead6b6 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -53,6 +53,11 @@ int br_handle_frame_finish(struct sk_buff *skb)
53 /* insert into forwarding database after filtering to avoid spoofing */ 53 /* insert into forwarding database after filtering to avoid spoofing */
54 br_fdb_update(p->br, p, eth_hdr(skb)->h_source); 54 br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
55 55
56 if (p->state == BR_STATE_LEARNING) {
57 kfree_skb(skb);
58 goto out;
59 }
60
56 if (br->dev->flags & IFF_PROMISC) { 61 if (br->dev->flags & IFF_PROMISC) {
57 struct sk_buff *skb2; 62 struct sk_buff *skb2;
58 63
@@ -63,7 +68,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
63 } 68 }
64 } 69 }
65 70
66 if (dest[0] & 1) { 71 if (is_multicast_ether_addr(dest)) {
67 br_flood_forward(br, skb, !passedup); 72 br_flood_forward(br, skb, !passedup);
68 if (!passedup) 73 if (!passedup)
69 br_pass_frame_up(br, skb); 74 br_pass_frame_up(br, skb);
@@ -107,9 +112,6 @@ int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
107 if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) 112 if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
108 goto err; 113 goto err;
109 114
110 if (p->state == BR_STATE_LEARNING)
111 br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
112
113 if (p->br->stp_enabled && 115 if (p->br->stp_enabled &&
114 !memcmp(dest, bridge_ula, 5) && 116 !memcmp(dest, bridge_ula, 5) &&
115 !(dest[5] & 0xF0)) { 117 !(dest[5] & 0xF0)) {
@@ -118,9 +120,10 @@ int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
118 NULL, br_stp_handle_bpdu); 120 NULL, br_stp_handle_bpdu);
119 return 1; 121 return 1;
120 } 122 }
123 goto err;
121 } 124 }
122 125
123 else if (p->state == BR_STATE_FORWARDING) { 126 if (p->state == BR_STATE_FORWARDING || p->state == BR_STATE_LEARNING) {
124 if (br_should_route_hook) { 127 if (br_should_route_hook) {
125 if (br_should_route_hook(pskb)) 128 if (br_should_route_hook(pskb))
126 return 0; 129 return 0;
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index b8ce14b22181..159fb8409824 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -13,6 +13,7 @@
13 * 2 of the License, or (at your option) any later version. 13 * 2 of the License, or (at your option) any later version.
14 */ 14 */
15 15
16#include <linux/capability.h>
16#include <linux/kernel.h> 17#include <linux/kernel.h>
17#include <linux/if_bridge.h> 18#include <linux/if_bridge.h>
18#include <linux/netdevice.h> 19#include <linux/netdevice.h>
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 23422bd53a5e..7cac3fb9f809 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -26,6 +26,7 @@
26#include <linux/ip.h> 26#include <linux/ip.h>
27#include <linux/netdevice.h> 27#include <linux/netdevice.h>
28#include <linux/skbuff.h> 28#include <linux/skbuff.h>
29#include <linux/if_arp.h>
29#include <linux/if_ether.h> 30#include <linux/if_ether.h>
30#include <linux/if_vlan.h> 31#include <linux/if_vlan.h>
31#include <linux/netfilter_bridge.h> 32#include <linux/netfilter_bridge.h>
@@ -33,8 +34,11 @@
33#include <linux/netfilter_ipv6.h> 34#include <linux/netfilter_ipv6.h>
34#include <linux/netfilter_arp.h> 35#include <linux/netfilter_arp.h>
35#include <linux/in_route.h> 36#include <linux/in_route.h>
37
36#include <net/ip.h> 38#include <net/ip.h>
37#include <net/ipv6.h> 39#include <net/ipv6.h>
40#include <net/route.h>
41
38#include <asm/uaccess.h> 42#include <asm/uaccess.h>
39#include <asm/checksum.h> 43#include <asm/checksum.h>
40#include "br_private.h" 44#include "br_private.h"
@@ -390,8 +394,9 @@ inhdr_error:
390 * target in particular. Save the original destination IP 394 * target in particular. Save the original destination IP
391 * address to be able to detect DNAT afterwards. */ 395 * address to be able to detect DNAT afterwards. */
392static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb, 396static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
393 const struct net_device *in, const struct net_device *out, 397 const struct net_device *in,
394 int (*okfn)(struct sk_buff *)) 398 const struct net_device *out,
399 int (*okfn)(struct sk_buff *))
395{ 400{
396 struct iphdr *iph; 401 struct iphdr *iph;
397 __u32 len; 402 __u32 len;
@@ -408,8 +413,10 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
408 goto out; 413 goto out;
409 414
410 if (skb->protocol == __constant_htons(ETH_P_8021Q)) { 415 if (skb->protocol == __constant_htons(ETH_P_8021Q)) {
416 u8 *vhdr = skb->data;
411 skb_pull(skb, VLAN_HLEN); 417 skb_pull(skb, VLAN_HLEN);
412 (skb)->nh.raw += VLAN_HLEN; 418 skb_postpull_rcsum(skb, vhdr, VLAN_HLEN);
419 skb->nh.raw += VLAN_HLEN;
413 } 420 }
414 return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn); 421 return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn);
415 } 422 }
@@ -425,8 +432,10 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
425 goto out; 432 goto out;
426 433
427 if (skb->protocol == __constant_htons(ETH_P_8021Q)) { 434 if (skb->protocol == __constant_htons(ETH_P_8021Q)) {
435 u8 *vhdr = skb->data;
428 skb_pull(skb, VLAN_HLEN); 436 skb_pull(skb, VLAN_HLEN);
429 (skb)->nh.raw += VLAN_HLEN; 437 skb_postpull_rcsum(skb, vhdr, VLAN_HLEN);
438 skb->nh.raw += VLAN_HLEN;
430 } 439 }
431 440
432 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 441 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 917311c6828b..a43a9c1d50d7 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -52,17 +52,9 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
52 br_stp_recalculate_bridge_id(br); 52 br_stp_recalculate_bridge_id(br);
53 break; 53 break;
54 54
55 case NETDEV_CHANGE: /* device is up but carrier changed */ 55 case NETDEV_CHANGE:
56 if (!(br->dev->flags & IFF_UP)) 56 if (br->dev->flags & IFF_UP)
57 break; 57 schedule_delayed_work(&p->carrier_check, BR_PORT_DEBOUNCE);
58
59 if (netif_carrier_ok(dev)) {
60 if (p->state == BR_STATE_DISABLED)
61 br_stp_enable_port(p);
62 } else {
63 if (p->state != BR_STATE_DISABLED)
64 br_stp_disable_port(p);
65 }
66 break; 58 break;
67 59
68 case NETDEV_FEAT_CHANGE: 60 case NETDEV_FEAT_CHANGE:
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index bdf95a74d8cd..c5bd631ffcd5 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -27,6 +27,10 @@
27#define BR_PORT_BITS 10 27#define BR_PORT_BITS 10
28#define BR_MAX_PORTS (1<<BR_PORT_BITS) 28#define BR_MAX_PORTS (1<<BR_PORT_BITS)
29 29
30#define BR_PORT_DEBOUNCE (HZ/10)
31
32#define BR_VERSION "2.1"
33
30typedef struct bridge_id bridge_id; 34typedef struct bridge_id bridge_id;
31typedef struct mac_addr mac_addr; 35typedef struct mac_addr mac_addr;
32typedef __u16 port_id; 36typedef __u16 port_id;
@@ -78,6 +82,7 @@ struct net_bridge_port
78 struct timer_list hold_timer; 82 struct timer_list hold_timer;
79 struct timer_list message_age_timer; 83 struct timer_list message_age_timer;
80 struct kobject kobj; 84 struct kobject kobj;
85 struct work_struct carrier_check;
81 struct rcu_head rcu; 86 struct rcu_head rcu;
82}; 87};
83 88
@@ -90,6 +95,7 @@ struct net_bridge
90 spinlock_t hash_lock; 95 spinlock_t hash_lock;
91 struct hlist_head hash[BR_HASH_SIZE]; 96 struct hlist_head hash[BR_HASH_SIZE];
92 struct list_head age_list; 97 struct list_head age_list;
98 unsigned long feature_mask;
93 99
94 /* STP */ 100 /* STP */
95 bridge_id designated_root; 101 bridge_id designated_root;
@@ -201,6 +207,7 @@ extern void br_stp_disable_bridge(struct net_bridge *br);
201extern void br_stp_enable_port(struct net_bridge_port *p); 207extern void br_stp_enable_port(struct net_bridge_port *p);
202extern void br_stp_disable_port(struct net_bridge_port *p); 208extern void br_stp_disable_port(struct net_bridge_port *p);
203extern void br_stp_recalculate_bridge_id(struct net_bridge *br); 209extern void br_stp_recalculate_bridge_id(struct net_bridge *br);
210extern void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a);
204extern void br_stp_set_bridge_priority(struct net_bridge *br, 211extern void br_stp_set_bridge_priority(struct net_bridge *br,
205 u16 newprio); 212 u16 newprio);
206extern void br_stp_set_port_priority(struct net_bridge_port *p, 213extern void br_stp_set_port_priority(struct net_bridge_port *p,
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index ac09b6a23523..cc047f7fb6ef 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -120,8 +120,7 @@ void br_stp_disable_port(struct net_bridge_port *p)
120} 120}
121 121
122/* called under bridge lock */ 122/* called under bridge lock */
123static void br_stp_change_bridge_id(struct net_bridge *br, 123void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr)
124 const unsigned char *addr)
125{ 124{
126 unsigned char oldaddr[6]; 125 unsigned char oldaddr[6];
127 struct net_bridge_port *p; 126 struct net_bridge_port *p;
@@ -158,7 +157,7 @@ void br_stp_recalculate_bridge_id(struct net_bridge *br)
158 157
159 list_for_each_entry(p, &br->port_list, list) { 158 list_for_each_entry(p, &br->port_list, list) {
160 if (addr == br_mac_zero || 159 if (addr == br_mac_zero ||
161 compare_ether_addr(p->dev->dev_addr, addr) < 0) 160 memcmp(p->dev->dev_addr, addr, ETH_ALEN) < 0)
162 addr = p->dev->dev_addr; 161 addr = p->dev->dev_addr;
163 162
164 } 163 }
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 98cf53c81fad..6f577f16c4c0 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -11,6 +11,7 @@
11 * 2 of the License, or (at your option) any later version. 11 * 2 of the License, or (at your option) any later version.
12 */ 12 */
13 13
14#include <linux/capability.h>
14#include <linux/kernel.h> 15#include <linux/kernel.h>
15#include <linux/netdevice.h> 16#include <linux/netdevice.h>
16#include <linux/if_bridge.h> 17#include <linux/if_bridge.h>
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index f6a19d53eaeb..0ac0355d16dd 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -11,6 +11,7 @@
11 * 2 of the License, or (at your option) any later version. 11 * 2 of the License, or (at your option) any later version.
12 */ 12 */
13 13
14#include <linux/capability.h>
14#include <linux/kernel.h> 15#include <linux/kernel.h>
15#include <linux/netdevice.h> 16#include <linux/netdevice.h>
16#include <linux/if_bridge.h> 17#include <linux/if_bridge.h>
@@ -248,7 +249,7 @@ int br_sysfs_addif(struct net_bridge_port *p)
248 if (err) 249 if (err)
249 goto out2; 250 goto out2;
250 251
251 kobject_hotplug(&p->kobj, KOBJ_ADD); 252 kobject_uevent(&p->kobj, KOBJ_ADD);
252 return 0; 253 return 0;
253 out2: 254 out2:
254 kobject_del(&p->kobj); 255 kobject_del(&p->kobj);
@@ -260,7 +261,7 @@ void br_sysfs_removeif(struct net_bridge_port *p)
260{ 261{
261 pr_debug("br_sysfs_removeif\n"); 262 pr_debug("br_sysfs_removeif\n");
262 sysfs_remove_link(&p->br->ifobj, p->dev->name); 263 sysfs_remove_link(&p->br->ifobj, p->dev->name);
263 kobject_hotplug(&p->kobj, KOBJ_REMOVE); 264 kobject_uevent(&p->kobj, KOBJ_REMOVE);
264 kobject_del(&p->kobj); 265 kobject_del(&p->kobj);
265} 266}
266 267
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index c70b3be23026..b84fc6075fe1 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -196,9 +196,13 @@ config BRIDGE_EBT_LOG
196 To compile it as a module, choose M here. If unsure, say N. 196 To compile it as a module, choose M here. If unsure, say N.
197 197
198config BRIDGE_EBT_ULOG 198config BRIDGE_EBT_ULOG
199 tristate "ebt: ulog support" 199 tristate "ebt: ulog support (OBSOLETE)"
200 depends on BRIDGE_NF_EBTABLES 200 depends on BRIDGE_NF_EBTABLES
201 help 201 help
202 This option enables the old bridge-specific "ebt_ulog" implementation
203 which has been obsoleted by the new "nfnetlink_log" code (see
204 CONFIG_NETFILTER_NETLINK_LOG).
205
202 This option adds the ulog watcher, that you can use in any rule 206 This option adds the ulog watcher, that you can use in any rule
203 in any ebtables table. The packet is passed to a userspace 207 in any ebtables table. The packet is passed to a userspace
204 logging daemon using netlink multicast sockets. This differs 208 logging daemon using netlink multicast sockets. This differs
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index 7323805b9726..f158fe67dd60 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -15,6 +15,7 @@
15#include <linux/netfilter_bridge/ebtables.h> 15#include <linux/netfilter_bridge/ebtables.h>
16#include <linux/netfilter_bridge/ebt_ip.h> 16#include <linux/netfilter_bridge/ebt_ip.h>
17#include <linux/ip.h> 17#include <linux/ip.h>
18#include <net/ip.h>
18#include <linux/in.h> 19#include <linux/in.h>
19#include <linux/module.h> 20#include <linux/module.h>
20 21
@@ -51,6 +52,8 @@ static int ebt_filter_ip(const struct sk_buff *skb, const struct net_device *in,
51 if (!(info->bitmask & EBT_IP_DPORT) && 52 if (!(info->bitmask & EBT_IP_DPORT) &&
52 !(info->bitmask & EBT_IP_SPORT)) 53 !(info->bitmask & EBT_IP_SPORT))
53 return EBT_MATCH; 54 return EBT_MATCH;
55 if (ntohs(ih->frag_off) & IP_OFFSET)
56 return EBT_NOMATCH;
54 pptr = skb_header_pointer(skb, ih->ihl*4, 57 pptr = skb_header_pointer(skb, ih->ihl*4,
55 sizeof(_ports), &_ports); 58 sizeof(_ports), &_ports);
56 if (pptr == NULL) 59 if (pptr == NULL)
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 662975be3d1d..9f6e0193ae10 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -3,13 +3,16 @@
3 * 3 *
4 * Authors: 4 * Authors:
5 * Bart De Schuymer <bdschuym@pandora.be> 5 * Bart De Schuymer <bdschuym@pandora.be>
6 * Harald Welte <laforge@netfilter.org>
6 * 7 *
7 * April, 2002 8 * April, 2002
8 * 9 *
9 */ 10 */
10 11
12#include <linux/in.h>
11#include <linux/netfilter_bridge/ebtables.h> 13#include <linux/netfilter_bridge/ebtables.h>
12#include <linux/netfilter_bridge/ebt_log.h> 14#include <linux/netfilter_bridge/ebt_log.h>
15#include <linux/netfilter.h>
13#include <linux/module.h> 16#include <linux/module.h>
14#include <linux/ip.h> 17#include <linux/ip.h>
15#include <linux/if_arp.h> 18#include <linux/if_arp.h>
@@ -55,27 +58,30 @@ static void print_MAC(unsigned char *p)
55} 58}
56 59
57#define myNIPQUAD(a) a[0], a[1], a[2], a[3] 60#define myNIPQUAD(a) a[0], a[1], a[2], a[3]
58static void ebt_log(const struct sk_buff *skb, unsigned int hooknr, 61static void
59 const struct net_device *in, const struct net_device *out, 62ebt_log_packet(unsigned int pf, unsigned int hooknum,
60 const void *data, unsigned int datalen) 63 const struct sk_buff *skb, const struct net_device *in,
64 const struct net_device *out, const struct nf_loginfo *loginfo,
65 const char *prefix)
61{ 66{
62 struct ebt_log_info *info = (struct ebt_log_info *)data; 67 unsigned int bitmask;
63 char level_string[4] = "< >";
64 68
65 level_string[1] = '0' + info->loglevel;
66 spin_lock_bh(&ebt_log_lock); 69 spin_lock_bh(&ebt_log_lock);
67 printk(level_string); 70 printk("<%c>%s IN=%s OUT=%s MAC source = ", '0' + loginfo->u.log.level,
68 printk("%s IN=%s OUT=%s ", info->prefix, in ? in->name : "", 71 prefix, in ? in->name : "", out ? out->name : "");
69 out ? out->name : "");
70 72
71 printk("MAC source = ");
72 print_MAC(eth_hdr(skb)->h_source); 73 print_MAC(eth_hdr(skb)->h_source);
73 printk("MAC dest = "); 74 printk("MAC dest = ");
74 print_MAC(eth_hdr(skb)->h_dest); 75 print_MAC(eth_hdr(skb)->h_dest);
75 76
76 printk("proto = 0x%04x", ntohs(eth_hdr(skb)->h_proto)); 77 printk("proto = 0x%04x", ntohs(eth_hdr(skb)->h_proto));
77 78
78 if ((info->bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto == 79 if (loginfo->type == NF_LOG_TYPE_LOG)
80 bitmask = loginfo->u.log.logflags;
81 else
82 bitmask = NF_LOG_MASK;
83
84 if ((bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto ==
79 htons(ETH_P_IP)){ 85 htons(ETH_P_IP)){
80 struct iphdr _iph, *ih; 86 struct iphdr _iph, *ih;
81 87
@@ -84,10 +90,9 @@ static void ebt_log(const struct sk_buff *skb, unsigned int hooknr,
84 printk(" INCOMPLETE IP header"); 90 printk(" INCOMPLETE IP header");
85 goto out; 91 goto out;
86 } 92 }
87 printk(" IP SRC=%u.%u.%u.%u IP DST=%u.%u.%u.%u,", 93 printk(" IP SRC=%u.%u.%u.%u IP DST=%u.%u.%u.%u, IP "
88 NIPQUAD(ih->saddr), NIPQUAD(ih->daddr)); 94 "tos=0x%02X, IP proto=%d", NIPQUAD(ih->saddr),
89 printk(" IP tos=0x%02X, IP proto=%d", ih->tos, 95 NIPQUAD(ih->daddr), ih->tos, ih->protocol);
90 ih->protocol);
91 if (ih->protocol == IPPROTO_TCP || 96 if (ih->protocol == IPPROTO_TCP ||
92 ih->protocol == IPPROTO_UDP) { 97 ih->protocol == IPPROTO_UDP) {
93 struct tcpudphdr _ports, *pptr; 98 struct tcpudphdr _ports, *pptr;
@@ -104,7 +109,7 @@ static void ebt_log(const struct sk_buff *skb, unsigned int hooknr,
104 goto out; 109 goto out;
105 } 110 }
106 111
107 if ((info->bitmask & EBT_LOG_ARP) && 112 if ((bitmask & EBT_LOG_ARP) &&
108 ((eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) || 113 ((eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) ||
109 (eth_hdr(skb)->h_proto == htons(ETH_P_RARP)))) { 114 (eth_hdr(skb)->h_proto == htons(ETH_P_RARP)))) {
110 struct arphdr _arph, *ah; 115 struct arphdr _arph, *ah;
@@ -144,6 +149,21 @@ static void ebt_log(const struct sk_buff *skb, unsigned int hooknr,
144out: 149out:
145 printk("\n"); 150 printk("\n");
146 spin_unlock_bh(&ebt_log_lock); 151 spin_unlock_bh(&ebt_log_lock);
152
153}
154
155static void ebt_log(const struct sk_buff *skb, unsigned int hooknr,
156 const struct net_device *in, const struct net_device *out,
157 const void *data, unsigned int datalen)
158{
159 struct ebt_log_info *info = (struct ebt_log_info *)data;
160 struct nf_loginfo li;
161
162 li.type = NF_LOG_TYPE_LOG;
163 li.u.log.level = info->loglevel;
164 li.u.log.logflags = info->bitmask;
165
166 nf_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li, info->prefix);
147} 167}
148 168
149static struct ebt_watcher log = 169static struct ebt_watcher log =
@@ -154,13 +174,32 @@ static struct ebt_watcher log =
154 .me = THIS_MODULE, 174 .me = THIS_MODULE,
155}; 175};
156 176
177static struct nf_logger ebt_log_logger = {
178 .name = "ebt_log",
179 .logfn = &ebt_log_packet,
180 .me = THIS_MODULE,
181};
182
157static int __init init(void) 183static int __init init(void)
158{ 184{
159 return ebt_register_watcher(&log); 185 int ret;
186
187 ret = ebt_register_watcher(&log);
188 if (ret < 0)
189 return ret;
190 if (nf_log_register(PF_BRIDGE, &ebt_log_logger) < 0) {
191 printk(KERN_WARNING "ebt_log: not logging via system console "
192 "since somebody else already registered for PF_INET\n");
193 /* we cannot make module load fail here, since otherwise
194 * ebtables userspace would abort */
195 }
196
197 return 0;
160} 198}
161 199
162static void __exit fini(void) 200static void __exit fini(void)
163{ 201{
202 nf_log_unregister_logger(&ebt_log_logger);
164 ebt_unregister_watcher(&log); 203 ebt_unregister_watcher(&log);
165} 204}
166 205
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index f8a8cdec16ee..0248c67277ee 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -10,6 +10,7 @@
10 10
11#include <linux/netfilter_bridge/ebtables.h> 11#include <linux/netfilter_bridge/ebtables.h>
12#include <linux/netfilter_bridge/ebt_stp.h> 12#include <linux/netfilter_bridge/ebt_stp.h>
13#include <linux/etherdevice.h>
13#include <linux/module.h> 14#include <linux/module.h>
14 15
15#define BPDU_TYPE_CONFIG 0 16#define BPDU_TYPE_CONFIG 0
@@ -164,8 +165,8 @@ static int ebt_stp_check(const char *tablename, unsigned int hookmask,
164 if (datalen != len) 165 if (datalen != len)
165 return -EINVAL; 166 return -EINVAL;
166 /* Make sure the match only receives stp frames */ 167 /* Make sure the match only receives stp frames */
167 if (memcmp(e->destmac, bridge_ula, ETH_ALEN) || 168 if (compare_ether_addr(e->destmac, bridge_ula) ||
168 memcmp(e->destmsk, msk, ETH_ALEN) || !(e->bitmask & EBT_DESTMAC)) 169 compare_ether_addr(e->destmsk, msk) || !(e->bitmask & EBT_DESTMAC))
169 return -EINVAL; 170 return -EINVAL;
170 171
171 return 0; 172 return 0;
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index aae26ae2e61f..ce617b3dbbb8 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -3,6 +3,7 @@
3 * 3 *
4 * Authors: 4 * Authors:
5 * Bart De Schuymer <bdschuym@pandora.be> 5 * Bart De Schuymer <bdschuym@pandora.be>
6 * Harald Welte <laforge@netfilter.org>
6 * 7 *
7 * November, 2004 8 * November, 2004
8 * 9 *
@@ -115,14 +116,13 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size)
115 return skb; 116 return skb;
116} 117}
117 118
118static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr, 119static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
119 const struct net_device *in, const struct net_device *out, 120 const struct net_device *in, const struct net_device *out,
120 const void *data, unsigned int datalen) 121 const struct ebt_ulog_info *uloginfo, const char *prefix)
121{ 122{
122 ebt_ulog_packet_msg_t *pm; 123 ebt_ulog_packet_msg_t *pm;
123 size_t size, copy_len; 124 size_t size, copy_len;
124 struct nlmsghdr *nlh; 125 struct nlmsghdr *nlh;
125 struct ebt_ulog_info *uloginfo = (struct ebt_ulog_info *)data;
126 unsigned int group = uloginfo->nlgroup; 126 unsigned int group = uloginfo->nlgroup;
127 ebt_ulog_buff_t *ub = &ulog_buffers[group]; 127 ebt_ulog_buff_t *ub = &ulog_buffers[group];
128 spinlock_t *lock = &ub->lock; 128 spinlock_t *lock = &ub->lock;
@@ -216,6 +216,39 @@ alloc_failure:
216 goto unlock; 216 goto unlock;
217} 217}
218 218
219/* this function is registered with the netfilter core */
220static void ebt_log_packet(unsigned int pf, unsigned int hooknum,
221 const struct sk_buff *skb, const struct net_device *in,
222 const struct net_device *out, const struct nf_loginfo *li,
223 const char *prefix)
224{
225 struct ebt_ulog_info loginfo;
226
227 if (!li || li->type != NF_LOG_TYPE_ULOG) {
228 loginfo.nlgroup = EBT_ULOG_DEFAULT_NLGROUP;
229 loginfo.cprange = 0;
230 loginfo.qthreshold = EBT_ULOG_DEFAULT_QTHRESHOLD;
231 loginfo.prefix[0] = '\0';
232 } else {
233 loginfo.nlgroup = li->u.ulog.group;
234 loginfo.cprange = li->u.ulog.copy_len;
235 loginfo.qthreshold = li->u.ulog.qthreshold;
236 strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix));
237 }
238
239 ebt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
240}
241
242static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr,
243 const struct net_device *in, const struct net_device *out,
244 const void *data, unsigned int datalen)
245{
246 struct ebt_ulog_info *uloginfo = (struct ebt_ulog_info *)data;
247
248 ebt_ulog_packet(hooknr, skb, in, out, uloginfo, NULL);
249}
250
251
219static int ebt_ulog_check(const char *tablename, unsigned int hookmask, 252static int ebt_ulog_check(const char *tablename, unsigned int hookmask,
220 const struct ebt_entry *e, void *data, unsigned int datalen) 253 const struct ebt_entry *e, void *data, unsigned int datalen)
221{ 254{
@@ -240,6 +273,12 @@ static struct ebt_watcher ulog = {
240 .me = THIS_MODULE, 273 .me = THIS_MODULE,
241}; 274};
242 275
276static struct nf_logger ebt_ulog_logger = {
277 .name = EBT_ULOG_WATCHER,
278 .logfn = &ebt_log_packet,
279 .me = THIS_MODULE,
280};
281
243static int __init init(void) 282static int __init init(void)
244{ 283{
245 int i, ret = 0; 284 int i, ret = 0;
@@ -265,6 +304,13 @@ static int __init init(void)
265 else if ((ret = ebt_register_watcher(&ulog))) 304 else if ((ret = ebt_register_watcher(&ulog)))
266 sock_release(ebtulognl->sk_socket); 305 sock_release(ebtulognl->sk_socket);
267 306
307 if (nf_log_register(PF_BRIDGE, &ebt_ulog_logger) < 0) {
308 printk(KERN_WARNING "ebt_ulog: not logging via ulog "
309 "since somebody else already registered for PF_BRIDGE\n");
310 /* we cannot make module load fail here, since otherwise
311 * ebtables userspace would abort */
312 }
313
268 return ret; 314 return ret;
269} 315}
270 316
@@ -273,6 +319,7 @@ static void __exit fini(void)
273 ebt_ulog_buff_t *ub; 319 ebt_ulog_buff_t *ub;
274 int i; 320 int i;
275 321
322 nf_log_unregister_logger(&ebt_ulog_logger);
276 ebt_unregister_watcher(&ulog); 323 ebt_unregister_watcher(&ulog);
277 for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { 324 for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
278 ub = &ulog_buffers[i]; 325 ub = &ulog_buffers[i];
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index f8ffbf6e2333..00729b3604f8 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -944,7 +944,7 @@ static int do_replace(void __user *user, unsigned int len)
944 if (countersize) 944 if (countersize)
945 memset(newinfo->counters, 0, countersize); 945 memset(newinfo->counters, 0, countersize);
946 946
947 newinfo->entries = (char *)vmalloc(tmp.entries_size); 947 newinfo->entries = vmalloc(tmp.entries_size);
948 if (!newinfo->entries) { 948 if (!newinfo->entries) {
949 ret = -ENOMEM; 949 ret = -ENOMEM;
950 goto free_newinfo; 950 goto free_newinfo;
@@ -1146,7 +1146,7 @@ int ebt_register_table(struct ebt_table *table)
1146 if (!newinfo) 1146 if (!newinfo)
1147 return -ENOMEM; 1147 return -ENOMEM;
1148 1148
1149 newinfo->entries = (char *)vmalloc(table->table->entries_size); 1149 newinfo->entries = vmalloc(table->table->entries_size);
1150 if (!(newinfo->entries)) 1150 if (!(newinfo->entries))
1151 goto free_newinfo; 1151 goto free_newinfo;
1152 1152
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 1bcfef51ac58..f8d322e1ea92 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -47,6 +47,7 @@
47#include <linux/rtnetlink.h> 47#include <linux/rtnetlink.h>
48#include <linux/poll.h> 48#include <linux/poll.h>
49#include <linux/highmem.h> 49#include <linux/highmem.h>
50#include <linux/spinlock.h>
50 51
51#include <net/protocol.h> 52#include <net/protocol.h>
52#include <linux/skbuff.h> 53#include <linux/skbuff.h>
@@ -200,6 +201,41 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
200} 201}
201 202
202/** 203/**
204 * skb_kill_datagram - Free a datagram skbuff forcibly
205 * @sk: socket
206 * @skb: datagram skbuff
207 * @flags: MSG_ flags
208 *
209 * This function frees a datagram skbuff that was received by
210 * skb_recv_datagram. The flags argument must match the one
211 * used for skb_recv_datagram.
212 *
213 * If the MSG_PEEK flag is set, and the packet is still on the
214 * receive queue of the socket, it will be taken off the queue
215 * before it is freed.
216 *
217 * This function currently only disables BH when acquiring the
218 * sk_receive_queue lock. Therefore it must not be used in a
219 * context where that lock is acquired in an IRQ context.
220 */
221
222void skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
223{
224 if (flags & MSG_PEEK) {
225 spin_lock_bh(&sk->sk_receive_queue.lock);
226 if (skb == skb_peek(&sk->sk_receive_queue)) {
227 __skb_unlink(skb, &sk->sk_receive_queue);
228 atomic_dec(&skb->users);
229 }
230 spin_unlock_bh(&sk->sk_receive_queue.lock);
231 }
232
233 kfree_skb(skb);
234}
235
236EXPORT_SYMBOL(skb_kill_datagram);
237
238/**
203 * skb_copy_datagram_iovec - Copy a datagram to an iovec. 239 * skb_copy_datagram_iovec - Copy a datagram to an iovec.
204 * @skb: buffer to copy 240 * @skb: buffer to copy
205 * @offset: offset in the buffer to start copying from 241 * @offset: offset in the buffer to start copying from
diff --git a/net/core/dev.c b/net/core/dev.c
index a5efc9ae010b..fd070a098f20 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -75,6 +75,7 @@
75#include <asm/uaccess.h> 75#include <asm/uaccess.h>
76#include <asm/system.h> 76#include <asm/system.h>
77#include <linux/bitops.h> 77#include <linux/bitops.h>
78#include <linux/capability.h>
78#include <linux/config.h> 79#include <linux/config.h>
79#include <linux/cpu.h> 80#include <linux/cpu.h>
80#include <linux/types.h> 81#include <linux/types.h>
@@ -626,7 +627,7 @@ struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mas
626 * Network device names need to be valid file names to 627 * Network device names need to be valid file names to
627 * to allow sysfs to work 628 * to allow sysfs to work
628 */ 629 */
629static int dev_valid_name(const char *name) 630int dev_valid_name(const char *name)
630{ 631{
631 return !(*name == '\0' 632 return !(*name == '\0'
632 || !strcmp(name, ".") 633 || !strcmp(name, ".")
@@ -1092,15 +1093,12 @@ int skb_checksum_help(struct sk_buff *skb, int inward)
1092 goto out; 1093 goto out;
1093 } 1094 }
1094 1095
1095 if (offset > (int)skb->len) 1096 BUG_ON(offset > (int)skb->len);
1096 BUG();
1097 csum = skb_checksum(skb, offset, skb->len-offset, 0); 1097 csum = skb_checksum(skb, offset, skb->len-offset, 0);
1098 1098
1099 offset = skb->tail - skb->h.raw; 1099 offset = skb->tail - skb->h.raw;
1100 if (offset <= 0) 1100 BUG_ON(offset <= 0);
1101 BUG(); 1101 BUG_ON(skb->csum + 2 > offset);
1102 if (skb->csum + 2 > offset)
1103 BUG();
1104 1102
1105 *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum); 1103 *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
1106 skb->ip_summed = CHECKSUM_NONE; 1104 skb->ip_summed = CHECKSUM_NONE;
@@ -3270,13 +3268,13 @@ EXPORT_SYMBOL(__dev_get_by_index);
3270EXPORT_SYMBOL(__dev_get_by_name); 3268EXPORT_SYMBOL(__dev_get_by_name);
3271EXPORT_SYMBOL(__dev_remove_pack); 3269EXPORT_SYMBOL(__dev_remove_pack);
3272EXPORT_SYMBOL(__skb_linearize); 3270EXPORT_SYMBOL(__skb_linearize);
3271EXPORT_SYMBOL(dev_valid_name);
3273EXPORT_SYMBOL(dev_add_pack); 3272EXPORT_SYMBOL(dev_add_pack);
3274EXPORT_SYMBOL(dev_alloc_name); 3273EXPORT_SYMBOL(dev_alloc_name);
3275EXPORT_SYMBOL(dev_close); 3274EXPORT_SYMBOL(dev_close);
3276EXPORT_SYMBOL(dev_get_by_flags); 3275EXPORT_SYMBOL(dev_get_by_flags);
3277EXPORT_SYMBOL(dev_get_by_index); 3276EXPORT_SYMBOL(dev_get_by_index);
3278EXPORT_SYMBOL(dev_get_by_name); 3277EXPORT_SYMBOL(dev_get_by_name);
3279EXPORT_SYMBOL(dev_ioctl);
3280EXPORT_SYMBOL(dev_open); 3278EXPORT_SYMBOL(dev_open);
3281EXPORT_SYMBOL(dev_queue_xmit); 3279EXPORT_SYMBOL(dev_queue_xmit);
3282EXPORT_SYMBOL(dev_remove_pack); 3280EXPORT_SYMBOL(dev_remove_pack);
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index cb530eef0e39..05d60850840e 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -158,7 +158,7 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
158 int err = 0; 158 int err = 0;
159 struct dev_mc_list *dmi, *dmi1; 159 struct dev_mc_list *dmi, *dmi1;
160 160
161 dmi1 = (struct dev_mc_list *)kmalloc(sizeof(*dmi), GFP_ATOMIC); 161 dmi1 = kmalloc(sizeof(*dmi), GFP_ATOMIC);
162 162
163 spin_lock_bh(&dev->xmit_lock); 163 spin_lock_bh(&dev->xmit_lock);
164 for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) { 164 for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) {
diff --git a/net/core/dv.c b/net/core/dv.c
index 3f25f4aa4e66..cf581407538c 100644
--- a/net/core/dv.c
+++ b/net/core/dv.c
@@ -24,6 +24,7 @@
24#include <linux/netdevice.h> 24#include <linux/netdevice.h>
25#include <linux/etherdevice.h> 25#include <linux/etherdevice.h>
26#include <linux/skbuff.h> 26#include <linux/skbuff.h>
27#include <linux/capability.h>
27#include <linux/errno.h> 28#include <linux/errno.h>
28#include <linux/init.h> 29#include <linux/init.h>
29#include <net/dst.h> 30#include <net/dst.h>
@@ -457,7 +458,7 @@ void divert_frame(struct sk_buff *skb)
457 unsigned char *skb_data_end = skb->data + skb->len; 458 unsigned char *skb_data_end = skb->data + skb->len;
458 459
459 /* Packet is already aimed at us, return */ 460 /* Packet is already aimed at us, return */
460 if (!memcmp(eth, skb->dev->dev_addr, ETH_ALEN)) 461 if (!compare_ether_addr(eth->h_dest, skb->dev->dev_addr))
461 return; 462 return;
462 463
463 /* proto is not IP, do nothing */ 464 /* proto is not IP, do nothing */
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 0350586e9195..e6f76106a99b 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -11,6 +11,7 @@
11 11
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/types.h> 13#include <linux/types.h>
14#include <linux/capability.h>
14#include <linux/errno.h> 15#include <linux/errno.h>
15#include <linux/ethtool.h> 16#include <linux/ethtool.h>
16#include <linux/netdevice.h> 17#include <linux/netdevice.h>
diff --git a/net/core/filter.c b/net/core/filter.c
index 3a10e0bc90e8..9eb9d0017a01 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -13,6 +13,7 @@
13 * 2 of the License, or (at your option) any later version. 13 * 2 of the License, or (at your option) any later version.
14 * 14 *
15 * Andi Kleen - Fix a few bad bugs and races. 15 * Andi Kleen - Fix a few bad bugs and races.
16 * Kris Katterjohn - Added many additional checks in sk_chk_filter()
16 */ 17 */
17 18
18#include <linux/module.h> 19#include <linux/module.h>
@@ -74,7 +75,7 @@ static inline void *load_pointer(struct sk_buff *skb, int k,
74 * len is the number of filter blocks in the array. 75 * len is the number of filter blocks in the array.
75 */ 76 */
76 77
77int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) 78unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
78{ 79{
79 struct sock_filter *fentry; /* We walk down these */ 80 struct sock_filter *fentry; /* We walk down these */
80 void *ptr; 81 void *ptr;
@@ -240,9 +241,9 @@ load_b:
240 A = X; 241 A = X;
241 continue; 242 continue;
242 case BPF_RET|BPF_K: 243 case BPF_RET|BPF_K:
243 return ((unsigned int)fentry->k); 244 return fentry->k;
244 case BPF_RET|BPF_A: 245 case BPF_RET|BPF_A:
245 return ((unsigned int)A); 246 return A;
246 case BPF_ST: 247 case BPF_ST:
247 mem[fentry->k] = A; 248 mem[fentry->k] = A;
248 continue; 249 continue;
@@ -250,7 +251,7 @@ load_b:
250 mem[fentry->k] = X; 251 mem[fentry->k] = X;
251 continue; 252 continue;
252 default: 253 default:
253 /* Invalid instruction counts as RET */ 254 WARN_ON(1);
254 return 0; 255 return 0;
255 } 256 }
256 257
@@ -283,8 +284,8 @@ load_b:
283 * 284 *
284 * Check the user's filter code. If we let some ugly 285 * Check the user's filter code. If we let some ugly
285 * filter code slip through kaboom! The filter must contain 286 * filter code slip through kaboom! The filter must contain
286 * no references or jumps that are out of range, no illegal instructions 287 * no references or jumps that are out of range, no illegal
287 * and no backward jumps. It must end with a RET instruction 288 * instructions, and must end with a RET instruction.
288 * 289 *
289 * Returns 0 if the rule set is legal or a negative errno code if not. 290 * Returns 0 if the rule set is legal or a negative errno code if not.
290 */ 291 */
@@ -300,38 +301,85 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
300 for (pc = 0; pc < flen; pc++) { 301 for (pc = 0; pc < flen; pc++) {
301 /* all jumps are forward as they are not signed */ 302 /* all jumps are forward as they are not signed */
302 ftest = &filter[pc]; 303 ftest = &filter[pc];
303 if (BPF_CLASS(ftest->code) == BPF_JMP) {
304 /* but they mustn't jump off the end */
305 if (BPF_OP(ftest->code) == BPF_JA) {
306 /*
307 * Note, the large ftest->k might cause loops.
308 * Compare this with conditional jumps below,
309 * where offsets are limited. --ANK (981016)
310 */
311 if (ftest->k >= (unsigned)(flen-pc-1))
312 return -EINVAL;
313 } else {
314 /* for conditionals both must be safe */
315 if (pc + ftest->jt +1 >= flen ||
316 pc + ftest->jf +1 >= flen)
317 return -EINVAL;
318 }
319 }
320 304
321 /* check for division by zero -Kris Katterjohn 2005-10-30 */ 305 /* Only allow valid instructions */
322 if (ftest->code == (BPF_ALU|BPF_DIV|BPF_K) && ftest->k == 0) 306 switch (ftest->code) {
323 return -EINVAL; 307 case BPF_ALU|BPF_ADD|BPF_K:
308 case BPF_ALU|BPF_ADD|BPF_X:
309 case BPF_ALU|BPF_SUB|BPF_K:
310 case BPF_ALU|BPF_SUB|BPF_X:
311 case BPF_ALU|BPF_MUL|BPF_K:
312 case BPF_ALU|BPF_MUL|BPF_X:
313 case BPF_ALU|BPF_DIV|BPF_X:
314 case BPF_ALU|BPF_AND|BPF_K:
315 case BPF_ALU|BPF_AND|BPF_X:
316 case BPF_ALU|BPF_OR|BPF_K:
317 case BPF_ALU|BPF_OR|BPF_X:
318 case BPF_ALU|BPF_LSH|BPF_K:
319 case BPF_ALU|BPF_LSH|BPF_X:
320 case BPF_ALU|BPF_RSH|BPF_K:
321 case BPF_ALU|BPF_RSH|BPF_X:
322 case BPF_ALU|BPF_NEG:
323 case BPF_LD|BPF_W|BPF_ABS:
324 case BPF_LD|BPF_H|BPF_ABS:
325 case BPF_LD|BPF_B|BPF_ABS:
326 case BPF_LD|BPF_W|BPF_LEN:
327 case BPF_LD|BPF_W|BPF_IND:
328 case BPF_LD|BPF_H|BPF_IND:
329 case BPF_LD|BPF_B|BPF_IND:
330 case BPF_LD|BPF_IMM:
331 case BPF_LDX|BPF_W|BPF_LEN:
332 case BPF_LDX|BPF_B|BPF_MSH:
333 case BPF_LDX|BPF_IMM:
334 case BPF_MISC|BPF_TAX:
335 case BPF_MISC|BPF_TXA:
336 case BPF_RET|BPF_K:
337 case BPF_RET|BPF_A:
338 break;
339
340 /* Some instructions need special checks */
324 341
325 /* check that memory operations use valid addresses. */ 342 case BPF_ALU|BPF_DIV|BPF_K:
326 if (ftest->k >= BPF_MEMWORDS) { 343 /* check for division by zero */
327 /* but it might not be a memory operation... */ 344 if (ftest->k == 0)
328 switch (ftest->code) {
329 case BPF_ST:
330 case BPF_STX:
331 case BPF_LD|BPF_MEM:
332 case BPF_LDX|BPF_MEM:
333 return -EINVAL; 345 return -EINVAL;
334 } 346 break;
347
348 case BPF_LD|BPF_MEM:
349 case BPF_LDX|BPF_MEM:
350 case BPF_ST:
351 case BPF_STX:
352 /* check for invalid memory addresses */
353 if (ftest->k >= BPF_MEMWORDS)
354 return -EINVAL;
355 break;
356
357 case BPF_JMP|BPF_JA:
358 /*
359 * Note, the large ftest->k might cause loops.
360 * Compare this with conditional jumps below,
361 * where offsets are limited. --ANK (981016)
362 */
363 if (ftest->k >= (unsigned)(flen-pc-1))
364 return -EINVAL;
365 break;
366
367 case BPF_JMP|BPF_JEQ|BPF_K:
368 case BPF_JMP|BPF_JEQ|BPF_X:
369 case BPF_JMP|BPF_JGE|BPF_K:
370 case BPF_JMP|BPF_JGE|BPF_X:
371 case BPF_JMP|BPF_JGT|BPF_K:
372 case BPF_JMP|BPF_JGT|BPF_X:
373 case BPF_JMP|BPF_JSET|BPF_K:
374 case BPF_JMP|BPF_JSET|BPF_X:
375 /* for conditionals both must be safe */
376 if (pc + ftest->jt + 1 >= flen ||
377 pc + ftest->jf + 1 >= flen)
378 return -EINVAL;
379 break;
380
381 default:
382 return -EINVAL;
335 } 383 }
336 } 384 }
337 385
diff --git a/net/core/flow.c b/net/core/flow.c
index 7e95b39de9fd..c4f25385029f 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -23,6 +23,7 @@
23#include <net/flow.h> 23#include <net/flow.h>
24#include <asm/atomic.h> 24#include <asm/atomic.h>
25#include <asm/semaphore.h> 25#include <asm/semaphore.h>
26#include <linux/security.h>
26 27
27struct flow_cache_entry { 28struct flow_cache_entry {
28 struct flow_cache_entry *next; 29 struct flow_cache_entry *next;
@@ -30,6 +31,7 @@ struct flow_cache_entry {
30 u8 dir; 31 u8 dir;
31 struct flowi key; 32 struct flowi key;
32 u32 genid; 33 u32 genid;
34 u32 sk_sid;
33 void *object; 35 void *object;
34 atomic_t *object_ref; 36 atomic_t *object_ref;
35}; 37};
@@ -162,7 +164,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
162 return 0; 164 return 0;
163} 165}
164 166
165void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, 167void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir,
166 flow_resolve_t resolver) 168 flow_resolve_t resolver)
167{ 169{
168 struct flow_cache_entry *fle, **head; 170 struct flow_cache_entry *fle, **head;
@@ -186,6 +188,7 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
186 for (fle = *head; fle; fle = fle->next) { 188 for (fle = *head; fle; fle = fle->next) {
187 if (fle->family == family && 189 if (fle->family == family &&
188 fle->dir == dir && 190 fle->dir == dir &&
191 fle->sk_sid == sk_sid &&
189 flow_key_compare(key, &fle->key) == 0) { 192 flow_key_compare(key, &fle->key) == 0) {
190 if (fle->genid == atomic_read(&flow_cache_genid)) { 193 if (fle->genid == atomic_read(&flow_cache_genid)) {
191 void *ret = fle->object; 194 void *ret = fle->object;
@@ -210,6 +213,7 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
210 *head = fle; 213 *head = fle;
211 fle->family = family; 214 fle->family = family;
212 fle->dir = dir; 215 fle->dir = dir;
216 fle->sk_sid = sk_sid;
213 memcpy(&fle->key, key, sizeof(*key)); 217 memcpy(&fle->key, key, sizeof(*key));
214 fle->object = NULL; 218 fle->object = NULL;
215 flow_count(cpu)++; 219 flow_count(cpu)++;
@@ -221,7 +225,7 @@ nocache:
221 void *obj; 225 void *obj;
222 atomic_t *obj_ref; 226 atomic_t *obj_ref;
223 227
224 resolver(key, family, dir, &obj, &obj_ref); 228 resolver(key, sk_sid, family, dir, &obj, &obj_ref);
225 229
226 if (fle) { 230 if (fle) {
227 fle->genid = atomic_read(&flow_cache_genid); 231 fle->genid = atomic_read(&flow_cache_genid);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index e2137f3e489d..e8b2acbc8ea2 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -9,6 +9,7 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#include <linux/capability.h>
12#include <linux/config.h> 13#include <linux/config.h>
13#include <linux/kernel.h> 14#include <linux/kernel.h>
14#include <linux/netdevice.h> 15#include <linux/netdevice.h>
@@ -16,6 +17,7 @@
16#include <net/sock.h> 17#include <net/sock.h>
17#include <linux/rtnetlink.h> 18#include <linux/rtnetlink.h>
18#include <linux/wireless.h> 19#include <linux/wireless.h>
20#include <net/iw_handler.h>
19 21
20#define to_class_dev(obj) container_of(obj,struct class_device,kobj) 22#define to_class_dev(obj) container_of(obj,struct class_device,kobj)
21#define to_net_dev(class) container_of(class, struct net_device, class_dev) 23#define to_net_dev(class) container_of(class, struct net_device, class_dev)
@@ -84,16 +86,11 @@ static ssize_t netdev_store(struct class_device *dev,
84 return ret; 86 return ret;
85} 87}
86 88
87/* generate a read-only network device class attribute */ 89NETDEVICE_SHOW(addr_len, fmt_dec);
88#define NETDEVICE_ATTR(field, format_string) \ 90NETDEVICE_SHOW(iflink, fmt_dec);
89NETDEVICE_SHOW(field, format_string) \ 91NETDEVICE_SHOW(ifindex, fmt_dec);
90static CLASS_DEVICE_ATTR(field, S_IRUGO, show_##field, NULL) \ 92NETDEVICE_SHOW(features, fmt_long_hex);
91 93NETDEVICE_SHOW(type, fmt_dec);
92NETDEVICE_ATTR(addr_len, fmt_dec);
93NETDEVICE_ATTR(iflink, fmt_dec);
94NETDEVICE_ATTR(ifindex, fmt_dec);
95NETDEVICE_ATTR(features, fmt_long_hex);
96NETDEVICE_ATTR(type, fmt_dec);
97 94
98/* use same locking rules as GIFHWADDR ioctl's */ 95/* use same locking rules as GIFHWADDR ioctl's */
99static ssize_t format_addr(char *buf, const unsigned char *addr, int len) 96static ssize_t format_addr(char *buf, const unsigned char *addr, int len)
@@ -136,10 +133,6 @@ static ssize_t show_carrier(struct class_device *dev, char *buf)
136 return -EINVAL; 133 return -EINVAL;
137} 134}
138 135
139static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
140static CLASS_DEVICE_ATTR(broadcast, S_IRUGO, show_broadcast, NULL);
141static CLASS_DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL);
142
143/* read-write attributes */ 136/* read-write attributes */
144NETDEVICE_SHOW(mtu, fmt_dec); 137NETDEVICE_SHOW(mtu, fmt_dec);
145 138
@@ -153,8 +146,6 @@ static ssize_t store_mtu(struct class_device *dev, const char *buf, size_t len)
153 return netdev_store(dev, buf, len, change_mtu); 146 return netdev_store(dev, buf, len, change_mtu);
154} 147}
155 148
156static CLASS_DEVICE_ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu);
157
158NETDEVICE_SHOW(flags, fmt_hex); 149NETDEVICE_SHOW(flags, fmt_hex);
159 150
160static int change_flags(struct net_device *net, unsigned long new_flags) 151static int change_flags(struct net_device *net, unsigned long new_flags)
@@ -167,8 +158,6 @@ static ssize_t store_flags(struct class_device *dev, const char *buf, size_t len
167 return netdev_store(dev, buf, len, change_flags); 158 return netdev_store(dev, buf, len, change_flags);
168} 159}
169 160
170static CLASS_DEVICE_ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags);
171
172NETDEVICE_SHOW(tx_queue_len, fmt_ulong); 161NETDEVICE_SHOW(tx_queue_len, fmt_ulong);
173 162
174static int change_tx_queue_len(struct net_device *net, unsigned long new_len) 163static int change_tx_queue_len(struct net_device *net, unsigned long new_len)
@@ -182,9 +171,6 @@ static ssize_t store_tx_queue_len(struct class_device *dev, const char *buf, siz
182 return netdev_store(dev, buf, len, change_tx_queue_len); 171 return netdev_store(dev, buf, len, change_tx_queue_len);
183} 172}
184 173
185static CLASS_DEVICE_ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
186 store_tx_queue_len);
187
188NETDEVICE_SHOW(weight, fmt_dec); 174NETDEVICE_SHOW(weight, fmt_dec);
189 175
190static int change_weight(struct net_device *net, unsigned long new_weight) 176static int change_weight(struct net_device *net, unsigned long new_weight)
@@ -198,24 +184,21 @@ static ssize_t store_weight(struct class_device *dev, const char *buf, size_t le
198 return netdev_store(dev, buf, len, change_weight); 184 return netdev_store(dev, buf, len, change_weight);
199} 185}
200 186
201static CLASS_DEVICE_ATTR(weight, S_IRUGO | S_IWUSR, show_weight, 187static struct class_device_attribute net_class_attributes[] = {
202 store_weight); 188 __ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
203 189 __ATTR(iflink, S_IRUGO, show_iflink, NULL),
204 190 __ATTR(ifindex, S_IRUGO, show_ifindex, NULL),
205static struct class_device_attribute *net_class_attributes[] = { 191 __ATTR(features, S_IRUGO, show_features, NULL),
206 &class_device_attr_ifindex, 192 __ATTR(type, S_IRUGO, show_type, NULL),
207 &class_device_attr_iflink, 193 __ATTR(address, S_IRUGO, show_address, NULL),
208 &class_device_attr_addr_len, 194 __ATTR(broadcast, S_IRUGO, show_broadcast, NULL),
209 &class_device_attr_tx_queue_len, 195 __ATTR(carrier, S_IRUGO, show_carrier, NULL),
210 &class_device_attr_features, 196 __ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu),
211 &class_device_attr_mtu, 197 __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags),
212 &class_device_attr_flags, 198 __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
213 &class_device_attr_weight, 199 store_tx_queue_len),
214 &class_device_attr_type, 200 __ATTR(weight, S_IRUGO | S_IWUSR, show_weight, store_weight),
215 &class_device_attr_address, 201 {}
216 &class_device_attr_broadcast,
217 &class_device_attr_carrier,
218 NULL
219}; 202};
220 203
221/* Show a given an attribute in the statistics group */ 204/* Show a given an attribute in the statistics group */
@@ -313,13 +296,19 @@ static ssize_t wireless_show(struct class_device *cd, char *buf,
313 char *)) 296 char *))
314{ 297{
315 struct net_device *dev = to_net_dev(cd); 298 struct net_device *dev = to_net_dev(cd);
316 const struct iw_statistics *iw; 299 const struct iw_statistics *iw = NULL;
317 ssize_t ret = -EINVAL; 300 ssize_t ret = -EINVAL;
318 301
319 read_lock(&dev_base_lock); 302 read_lock(&dev_base_lock);
320 if (dev_isalive(dev) && dev->get_wireless_stats 303 if (dev_isalive(dev)) {
321 && (iw = dev->get_wireless_stats(dev)) != NULL) 304 if(dev->wireless_handlers &&
322 ret = (*format)(iw, buf); 305 dev->wireless_handlers->get_wireless_stats)
306 iw = dev->wireless_handlers->get_wireless_stats(dev);
307 else if (dev->get_wireless_stats)
308 iw = dev->get_wireless_stats(dev);
309 if (iw != NULL)
310 ret = (*format)(iw, buf);
311 }
323 read_unlock(&dev_base_lock); 312 read_unlock(&dev_base_lock);
324 313
325 return ret; 314 return ret;
@@ -369,14 +358,14 @@ static struct attribute_group wireless_group = {
369#endif 358#endif
370 359
371#ifdef CONFIG_HOTPLUG 360#ifdef CONFIG_HOTPLUG
372static int netdev_hotplug(struct class_device *cd, char **envp, 361static int netdev_uevent(struct class_device *cd, char **envp,
373 int num_envp, char *buf, int size) 362 int num_envp, char *buf, int size)
374{ 363{
375 struct net_device *dev = to_net_dev(cd); 364 struct net_device *dev = to_net_dev(cd);
376 int i = 0; 365 int i = 0;
377 int n; 366 int n;
378 367
379 /* pass interface in env to hotplug. */ 368 /* pass interface to uevent. */
380 envp[i++] = buf; 369 envp[i++] = buf;
381 n = snprintf(buf, size, "INTERFACE=%s", dev->name) + 1; 370 n = snprintf(buf, size, "INTERFACE=%s", dev->name) + 1;
382 buf += n; 371 buf += n;
@@ -407,8 +396,9 @@ static void netdev_release(struct class_device *cd)
407static struct class net_class = { 396static struct class net_class = {
408 .name = "net", 397 .name = "net",
409 .release = netdev_release, 398 .release = netdev_release,
399 .class_dev_attrs = net_class_attributes,
410#ifdef CONFIG_HOTPLUG 400#ifdef CONFIG_HOTPLUG
411 .hotplug = netdev_hotplug, 401 .uevent = netdev_uevent,
412#endif 402#endif
413}; 403};
414 404
@@ -420,7 +410,8 @@ void netdev_unregister_sysfs(struct net_device * net)
420 sysfs_remove_group(&class_dev->kobj, &netstat_group); 410 sysfs_remove_group(&class_dev->kobj, &netstat_group);
421 411
422#ifdef WIRELESS_EXT 412#ifdef WIRELESS_EXT
423 if (net->get_wireless_stats) 413 if (net->get_wireless_stats || (net->wireless_handlers &&
414 net->wireless_handlers->get_wireless_stats))
424 sysfs_remove_group(&class_dev->kobj, &wireless_group); 415 sysfs_remove_group(&class_dev->kobj, &wireless_group);
425#endif 416#endif
426 class_device_del(class_dev); 417 class_device_del(class_dev);
@@ -431,8 +422,6 @@ void netdev_unregister_sysfs(struct net_device * net)
431int netdev_register_sysfs(struct net_device *net) 422int netdev_register_sysfs(struct net_device *net)
432{ 423{
433 struct class_device *class_dev = &(net->class_dev); 424 struct class_device *class_dev = &(net->class_dev);
434 int i;
435 struct class_device_attribute *attr;
436 int ret; 425 int ret;
437 426
438 class_dev->class = &net_class; 427 class_dev->class = &net_class;
@@ -442,21 +431,17 @@ int netdev_register_sysfs(struct net_device *net)
442 if ((ret = class_device_register(class_dev))) 431 if ((ret = class_device_register(class_dev)))
443 goto out; 432 goto out;
444 433
445 for (i = 0; (attr = net_class_attributes[i]) != NULL; i++) {
446 if ((ret = class_device_create_file(class_dev, attr)))
447 goto out_unreg;
448 }
449
450
451 if (net->get_stats && 434 if (net->get_stats &&
452 (ret = sysfs_create_group(&class_dev->kobj, &netstat_group))) 435 (ret = sysfs_create_group(&class_dev->kobj, &netstat_group)))
453 goto out_unreg; 436 goto out_unreg;
454 437
455#ifdef WIRELESS_EXT 438#ifdef WIRELESS_EXT
456 if (net->get_wireless_stats && 439 if (net->get_wireless_stats || (net->wireless_handlers &&
457 (ret = sysfs_create_group(&class_dev->kobj, &wireless_group))) 440 net->wireless_handlers->get_wireless_stats)) {
458 goto out_cleanup; 441 ret = sysfs_create_group(&class_dev->kobj, &wireless_group);
459 442 if (ret)
443 goto out_cleanup;
444 }
460 return 0; 445 return 0;
461out_cleanup: 446out_cleanup:
462 if (net->get_stats) 447 if (net->get_stats)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 49424a42a2c0..281a632fa6a6 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -13,6 +13,7 @@
13#include <linux/netdevice.h> 13#include <linux/netdevice.h>
14#include <linux/etherdevice.h> 14#include <linux/etherdevice.h>
15#include <linux/string.h> 15#include <linux/string.h>
16#include <linux/if_arp.h>
16#include <linux/inetdevice.h> 17#include <linux/inetdevice.h>
17#include <linux/inet.h> 18#include <linux/inet.h>
18#include <linux/interrupt.h> 19#include <linux/interrupt.h>
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 7fc3e9e28c34..39063122fbb7 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -116,13 +116,13 @@
116#include <linux/sched.h> 116#include <linux/sched.h>
117#include <linux/slab.h> 117#include <linux/slab.h>
118#include <linux/vmalloc.h> 118#include <linux/vmalloc.h>
119#include <linux/sched.h>
120#include <linux/unistd.h> 119#include <linux/unistd.h>
121#include <linux/string.h> 120#include <linux/string.h>
122#include <linux/ptrace.h> 121#include <linux/ptrace.h>
123#include <linux/errno.h> 122#include <linux/errno.h>
124#include <linux/ioport.h> 123#include <linux/ioport.h>
125#include <linux/interrupt.h> 124#include <linux/interrupt.h>
125#include <linux/capability.h>
126#include <linux/delay.h> 126#include <linux/delay.h>
127#include <linux/timer.h> 127#include <linux/timer.h>
128#include <linux/init.h> 128#include <linux/init.h>
@@ -473,7 +473,6 @@ static char version[] __initdata = VERSION;
473 473
474static int pktgen_remove_device(struct pktgen_thread* t, struct pktgen_dev *i); 474static int pktgen_remove_device(struct pktgen_thread* t, struct pktgen_dev *i);
475static int pktgen_add_device(struct pktgen_thread* t, const char* ifname); 475static int pktgen_add_device(struct pktgen_thread* t, const char* ifname);
476static struct pktgen_thread* pktgen_find_thread(const char* name);
477static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread* t, const char* ifname); 476static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread* t, const char* ifname);
478static int pktgen_device_event(struct notifier_block *, unsigned long, void *); 477static int pktgen_device_event(struct notifier_block *, unsigned long, void *);
479static void pktgen_run_all_threads(void); 478static void pktgen_run_all_threads(void);
@@ -487,9 +486,9 @@ static unsigned int fmt_ip6(char *s,const char ip[16]);
487 486
488/* Module parameters, defaults. */ 487/* Module parameters, defaults. */
489static int pg_count_d = 1000; /* 1000 pkts by default */ 488static int pg_count_d = 1000; /* 1000 pkts by default */
490static int pg_delay_d = 0; 489static int pg_delay_d;
491static int pg_clone_skb_d = 0; 490static int pg_clone_skb_d;
492static int debug = 0; 491static int debug;
493 492
494static DECLARE_MUTEX(pktgen_sem); 493static DECLARE_MUTEX(pktgen_sem);
495static struct pktgen_thread *pktgen_threads = NULL; 494static struct pktgen_thread *pktgen_threads = NULL;
@@ -2883,7 +2882,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char* ifname)
2883 return add_dev_to_thread(t, pkt_dev); 2882 return add_dev_to_thread(t, pkt_dev);
2884} 2883}
2885 2884
2886static struct pktgen_thread *pktgen_find_thread(const char* name) 2885static struct pktgen_thread * __init pktgen_find_thread(const char* name)
2887{ 2886{
2888 struct pktgen_thread *t = NULL; 2887 struct pktgen_thread *t = NULL;
2889 2888
@@ -2900,7 +2899,7 @@ static struct pktgen_thread *pktgen_find_thread(const char* name)
2900 return t; 2899 return t;
2901} 2900}
2902 2901
2903static int pktgen_create_thread(const char* name, int cpu) 2902static int __init pktgen_create_thread(const char* name, int cpu)
2904{ 2903{
2905 struct pktgen_thread *t = NULL; 2904 struct pktgen_thread *t = NULL;
2906 struct proc_dir_entry *pe; 2905 struct proc_dir_entry *pe;
diff --git a/net/core/scm.c b/net/core/scm.c
index e887d19be506..649d01ef35b6 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -11,6 +11,7 @@
11 11
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/signal.h> 13#include <linux/signal.h>
14#include <linux/capability.h>
14#include <linux/errno.h> 15#include <linux/errno.h>
15#include <linux/sched.h> 16#include <linux/sched.h>
16#include <linux/mm.h> 17#include <linux/mm.h>
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 83fee37de38e..d0732e9c8560 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -135,17 +135,13 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
135struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, 135struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
136 int fclone) 136 int fclone)
137{ 137{
138 struct skb_shared_info *shinfo;
138 struct sk_buff *skb; 139 struct sk_buff *skb;
139 u8 *data; 140 u8 *data;
140 141
141 /* Get the HEAD */ 142 /* Get the HEAD */
142 if (fclone) 143 skb = kmem_cache_alloc(fclone ? skbuff_fclone_cache : skbuff_head_cache,
143 skb = kmem_cache_alloc(skbuff_fclone_cache, 144 gfp_mask & ~__GFP_DMA);
144 gfp_mask & ~__GFP_DMA);
145 else
146 skb = kmem_cache_alloc(skbuff_head_cache,
147 gfp_mask & ~__GFP_DMA);
148
149 if (!skb) 145 if (!skb)
150 goto out; 146 goto out;
151 147
@@ -162,6 +158,16 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
162 skb->data = data; 158 skb->data = data;
163 skb->tail = data; 159 skb->tail = data;
164 skb->end = data + size; 160 skb->end = data + size;
161 /* make sure we initialize shinfo sequentially */
162 shinfo = skb_shinfo(skb);
163 atomic_set(&shinfo->dataref, 1);
164 shinfo->nr_frags = 0;
165 shinfo->tso_size = 0;
166 shinfo->tso_segs = 0;
167 shinfo->ufo_size = 0;
168 shinfo->ip6_frag_id = 0;
169 shinfo->frag_list = NULL;
170
165 if (fclone) { 171 if (fclone) {
166 struct sk_buff *child = skb + 1; 172 struct sk_buff *child = skb + 1;
167 atomic_t *fclone_ref = (atomic_t *) (child + 1); 173 atomic_t *fclone_ref = (atomic_t *) (child + 1);
@@ -171,13 +177,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
171 177
172 child->fclone = SKB_FCLONE_UNAVAILABLE; 178 child->fclone = SKB_FCLONE_UNAVAILABLE;
173 } 179 }
174 atomic_set(&(skb_shinfo(skb)->dataref), 1);
175 skb_shinfo(skb)->nr_frags = 0;
176 skb_shinfo(skb)->tso_size = 0;
177 skb_shinfo(skb)->tso_segs = 0;
178 skb_shinfo(skb)->frag_list = NULL;
179 skb_shinfo(skb)->ufo_size = 0;
180 skb_shinfo(skb)->ip6_frag_id = 0;
181out: 180out:
182 return skb; 181 return skb;
183nodata: 182nodata:
@@ -792,8 +791,7 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
792 int end = offset + skb_shinfo(skb)->frags[i].size; 791 int end = offset + skb_shinfo(skb)->frags[i].size;
793 if (end > len) { 792 if (end > len) {
794 if (skb_cloned(skb)) { 793 if (skb_cloned(skb)) {
795 if (!realloc) 794 BUG_ON(!realloc);
796 BUG();
797 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 795 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
798 return -ENOMEM; 796 return -ENOMEM;
799 } 797 }
@@ -895,8 +893,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
895 struct sk_buff *insp = NULL; 893 struct sk_buff *insp = NULL;
896 894
897 do { 895 do {
898 if (!list) 896 BUG_ON(!list);
899 BUG();
900 897
901 if (list->len <= eat) { 898 if (list->len <= eat) {
902 /* Eaten as whole. */ 899 /* Eaten as whole. */
@@ -1200,8 +1197,7 @@ unsigned int skb_checksum(const struct sk_buff *skb, int offset,
1200 start = end; 1197 start = end;
1201 } 1198 }
1202 } 1199 }
1203 if (len) 1200 BUG_ON(len);
1204 BUG();
1205 1201
1206 return csum; 1202 return csum;
1207} 1203}
@@ -1283,8 +1279,7 @@ unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
1283 start = end; 1279 start = end;
1284 } 1280 }
1285 } 1281 }
1286 if (len) 1282 BUG_ON(len);
1287 BUG();
1288 return csum; 1283 return csum;
1289} 1284}
1290 1285
@@ -1298,8 +1293,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
1298 else 1293 else
1299 csstart = skb_headlen(skb); 1294 csstart = skb_headlen(skb);
1300 1295
1301 if (csstart > skb_headlen(skb)) 1296 BUG_ON(csstart > skb_headlen(skb));
1302 BUG();
1303 1297
1304 memcpy(to, skb->data, csstart); 1298 memcpy(to, skb->data, csstart);
1305 1299
diff --git a/net/core/sock.c b/net/core/sock.c
index 13cc3be4f056..6e00811d44bc 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -91,6 +91,7 @@
91 * 2 of the License, or (at your option) any later version. 91 * 2 of the License, or (at your option) any later version.
92 */ 92 */
93 93
94#include <linux/capability.h>
94#include <linux/config.h> 95#include <linux/config.h>
95#include <linux/errno.h> 96#include <linux/errno.h>
96#include <linux/types.h> 97#include <linux/types.h>
@@ -1488,7 +1489,7 @@ int proto_register(struct proto *prot, int alloc_slab)
1488 } 1489 }
1489 } 1490 }
1490 1491
1491 if (prot->twsk_obj_size) { 1492 if (prot->twsk_prot != NULL) {
1492 static const char mask[] = "tw_sock_%s"; 1493 static const char mask[] = "tw_sock_%s";
1493 1494
1494 timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); 1495 timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
@@ -1497,11 +1498,12 @@ int proto_register(struct proto *prot, int alloc_slab)
1497 goto out_free_request_sock_slab; 1498 goto out_free_request_sock_slab;
1498 1499
1499 sprintf(timewait_sock_slab_name, mask, prot->name); 1500 sprintf(timewait_sock_slab_name, mask, prot->name);
1500 prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name, 1501 prot->twsk_prot->twsk_slab =
1501 prot->twsk_obj_size, 1502 kmem_cache_create(timewait_sock_slab_name,
1502 0, SLAB_HWCACHE_ALIGN, 1503 prot->twsk_prot->twsk_obj_size,
1503 NULL, NULL); 1504 0, SLAB_HWCACHE_ALIGN,
1504 if (prot->twsk_slab == NULL) 1505 NULL, NULL);
1506 if (prot->twsk_prot->twsk_slab == NULL)
1505 goto out_free_timewait_sock_slab_name; 1507 goto out_free_timewait_sock_slab_name;
1506 } 1508 }
1507 } 1509 }
@@ -1548,12 +1550,12 @@ void proto_unregister(struct proto *prot)
1548 prot->rsk_prot->slab = NULL; 1550 prot->rsk_prot->slab = NULL;
1549 } 1551 }
1550 1552
1551 if (prot->twsk_slab != NULL) { 1553 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1552 const char *name = kmem_cache_name(prot->twsk_slab); 1554 const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
1553 1555
1554 kmem_cache_destroy(prot->twsk_slab); 1556 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
1555 kfree(name); 1557 kfree(name);
1556 prot->twsk_slab = NULL; 1558 prot->twsk_prot->twsk_slab = NULL;
1557 } 1559 }
1558} 1560}
1559 1561
diff --git a/net/core/stream.c b/net/core/stream.c
index 15bfd03e8024..35e25259fd95 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -55,8 +55,9 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
55 int done; 55 int done;
56 56
57 do { 57 do {
58 if (sk->sk_err) 58 int err = sock_error(sk);
59 return sock_error(sk); 59 if (err)
60 return err;
60 if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) 61 if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV))
61 return -EPIPE; 62 return -EPIPE;
62 if (!*timeo_p) 63 if (!*timeo_p)
@@ -67,6 +68,7 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
67 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 68 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
68 sk->sk_write_pending++; 69 sk->sk_write_pending++;
69 done = sk_wait_event(sk, timeo_p, 70 done = sk_wait_event(sk, timeo_p,
71 !sk->sk_err &&
70 !((1 << sk->sk_state) & 72 !((1 << sk->sk_state) &
71 ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))); 73 ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)));
72 finish_wait(sk->sk_sleep, &wait); 74 finish_wait(sk->sk_sleep, &wait);
@@ -137,7 +139,9 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
137 139
138 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 140 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
139 sk->sk_write_pending++; 141 sk->sk_write_pending++;
140 sk_wait_event(sk, &current_timeo, sk_stream_memory_free(sk) && 142 sk_wait_event(sk, &current_timeo, !sk->sk_err &&
143 !(sk->sk_shutdown & SEND_SHUTDOWN) &&
144 sk_stream_memory_free(sk) &&
141 vm_wait); 145 vm_wait);
142 sk->sk_write_pending--; 146 sk->sk_write_pending--;
143 147
diff --git a/net/core/utils.c b/net/core/utils.c
index 7b5970fc9e40..ac1d1fcf8673 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -162,7 +162,7 @@ EXPORT_SYMBOL(net_srandom);
162 * is otherwise not dependent on the TCP/IP stack. 162 * is otherwise not dependent on the TCP/IP stack.
163 */ 163 */
164 164
165__u32 in_aton(const char *str) 165__be32 in_aton(const char *str)
166{ 166{
167 unsigned long l; 167 unsigned long l;
168 unsigned int val; 168 unsigned int val;
@@ -175,7 +175,7 @@ __u32 in_aton(const char *str)
175 if (*str != '\0') 175 if (*str != '\0')
176 { 176 {
177 val = 0; 177 val = 0;
178 while (*str != '\0' && *str != '.') 178 while (*str != '\0' && *str != '.' && *str != '\n')
179 { 179 {
180 val *= 10; 180 val *= 10;
181 val += *str - '0'; 181 val += *str - '0';
diff --git a/net/core/wireless.c b/net/core/wireless.c
index 271ddb35b0b2..2add7ed609e9 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -78,6 +78,7 @@
78#include <linux/seq_file.h> 78#include <linux/seq_file.h>
79#include <linux/init.h> /* for __init */ 79#include <linux/init.h> /* for __init */
80#include <linux/if_arp.h> /* ARPHRD_ETHER */ 80#include <linux/if_arp.h> /* ARPHRD_ETHER */
81#include <linux/etherdevice.h> /* compare_ether_addr */
81 82
82#include <linux/wireless.h> /* Pretty obvious */ 83#include <linux/wireless.h> /* Pretty obvious */
83#include <net/iw_handler.h> /* New driver API */ 84#include <net/iw_handler.h> /* New driver API */
@@ -1506,7 +1507,7 @@ void wireless_spy_update(struct net_device * dev,
1506 1507
1507 /* Update all records that match */ 1508 /* Update all records that match */
1508 for(i = 0; i < spydata->spy_number; i++) 1509 for(i = 0; i < spydata->spy_number; i++)
1509 if(!memcmp(address, spydata->spy_address[i], ETH_ALEN)) { 1510 if(!compare_ether_addr(address, spydata->spy_address[i])) {
1510 memcpy(&(spydata->spy_stat[i]), wstats, 1511 memcpy(&(spydata->spy_stat[i]), wstats,
1511 sizeof(struct iw_quality)); 1512 sizeof(struct iw_quality));
1512 match = i; 1513 match = i;
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 344a8da153fc..87b27fff6e3b 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -1,3 +1,7 @@
1obj-$(CONFIG_IPV6) += dccp_ipv6.o
2
3dccp_ipv6-y := ipv6.o
4
1obj-$(CONFIG_IP_DCCP) += dccp.o 5obj-$(CONFIG_IP_DCCP) += dccp.o
2 6
3dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ 7dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index c9a62cca22fc..ce9cb77c5c29 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -55,8 +55,8 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
55 from = av->dccpav_buf + av->dccpav_buf_head; 55 from = av->dccpav_buf + av->dccpav_buf_head;
56 56
57 /* Check if buf_head wraps */ 57 /* Check if buf_head wraps */
58 if (av->dccpav_buf_head + len > av->dccpav_vec_len) { 58 if ((int)av->dccpav_buf_head + len > av->dccpav_vec_len) {
59 const u32 tailsize = (av->dccpav_vec_len - av->dccpav_buf_head); 59 const u32 tailsize = av->dccpav_vec_len - av->dccpav_buf_head;
60 60
61 memcpy(to, from, tailsize); 61 memcpy(to, from, tailsize);
62 to += tailsize; 62 to += tailsize;
@@ -93,8 +93,14 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
93struct dccp_ackvec *dccp_ackvec_alloc(const unsigned int len, 93struct dccp_ackvec *dccp_ackvec_alloc(const unsigned int len,
94 const gfp_t priority) 94 const gfp_t priority)
95{ 95{
96 struct dccp_ackvec *av = kmalloc(sizeof(*av) + len, priority); 96 struct dccp_ackvec *av;
97 97
98 BUG_ON(len == 0);
99
100 if (len > DCCP_MAX_ACKVEC_LEN)
101 return NULL;
102
103 av = kmalloc(sizeof(*av) + len, priority);
98 if (av != NULL) { 104 if (av != NULL) {
99 av->dccpav_buf_len = len; 105 av->dccpav_buf_len = len;
100 av->dccpav_buf_head = 106 av->dccpav_buf_head =
@@ -117,13 +123,13 @@ void dccp_ackvec_free(struct dccp_ackvec *av)
117} 123}
118 124
119static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av, 125static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av,
120 const unsigned int index) 126 const u8 index)
121{ 127{
122 return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK; 128 return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK;
123} 129}
124 130
125static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av, 131static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
126 const unsigned int index) 132 const u8 index)
127{ 133{
128 return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK; 134 return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK;
129} 135}
@@ -135,7 +141,7 @@ static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
135 */ 141 */
136static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av, 142static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
137 const unsigned int packets, 143 const unsigned int packets,
138 const unsigned char state) 144 const unsigned char state)
139{ 145{
140 unsigned int gap; 146 unsigned int gap;
141 signed long new_head; 147 signed long new_head;
@@ -223,7 +229,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
223 * could reduce the complexity of this scan.) 229 * could reduce the complexity of this scan.)
224 */ 230 */
225 u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno); 231 u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno);
226 unsigned int index = av->dccpav_buf_head; 232 u8 index = av->dccpav_buf_head;
227 233
228 while (1) { 234 while (1) {
229 const u8 len = dccp_ackvec_len(av, index); 235 const u8 len = dccp_ackvec_len(av, index);
@@ -291,7 +297,7 @@ void dccp_ackvec_print(const struct dccp_ackvec *av)
291} 297}
292#endif 298#endif
293 299
294static void dccp_ackvec_trow_away_ack_record(struct dccp_ackvec *av) 300static void dccp_ackvec_throw_away_ack_record(struct dccp_ackvec *av)
295{ 301{
296 /* 302 /*
297 * As we're keeping track of the ack vector size (dccpav_vec_len) and 303 * As we're keeping track of the ack vector size (dccpav_vec_len) and
@@ -301,9 +307,10 @@ static void dccp_ackvec_trow_away_ack_record(struct dccp_ackvec *av)
301 * draft-ietf-dccp-spec-11.txt Appendix A. -acme 307 * draft-ietf-dccp-spec-11.txt Appendix A. -acme
302 */ 308 */
303#if 0 309#if 0
304 av->dccpav_buf_tail = av->dccpav_ack_ptr + 1; 310 u32 new_buf_tail = av->dccpav_ack_ptr + 1;
305 if (av->dccpav_buf_tail >= av->dccpav_vec_len) 311 if (new_buf_tail >= av->dccpav_vec_len)
306 av->dccpav_buf_tail -= av->dccpav_vec_len; 312 new_buf_tail -= av->dccpav_vec_len;
313 av->dccpav_buf_tail = new_buf_tail;
307#endif 314#endif
308 av->dccpav_vec_len -= av->dccpav_sent_len; 315 av->dccpav_vec_len -= av->dccpav_sent_len;
309} 316}
@@ -326,7 +333,7 @@ void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
326 debug_prefix, 1, 333 debug_prefix, 1,
327 (unsigned long long)av->dccpav_ack_seqno, 334 (unsigned long long)av->dccpav_ack_seqno,
328 (unsigned long long)av->dccpav_ack_ackno); 335 (unsigned long long)av->dccpav_ack_ackno);
329 dccp_ackvec_trow_away_ack_record(av); 336 dccp_ackvec_throw_away_ack_record(av);
330 av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1; 337 av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1;
331 } 338 }
332} 339}
@@ -389,7 +396,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
389 av->dccpav_ack_seqno, 396 av->dccpav_ack_seqno,
390 (unsigned long long) 397 (unsigned long long)
391 av->dccpav_ack_ackno); 398 av->dccpav_ack_ackno);
392 dccp_ackvec_trow_away_ack_record(av); 399 dccp_ackvec_throw_away_ack_record(av);
393 } 400 }
394 /* 401 /*
395 * If dccpav_ack_seqno was not received, no problem 402 * If dccpav_ack_seqno was not received, no problem
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index d0fd6c60c574..f7dfb5f67b87 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -54,16 +54,16 @@
54 * @dccpav_buf - circular buffer of acknowledgeable packets 54 * @dccpav_buf - circular buffer of acknowledgeable packets
55 */ 55 */
56struct dccp_ackvec { 56struct dccp_ackvec {
57 unsigned int dccpav_buf_head;
58 unsigned int dccpav_buf_tail;
59 u64 dccpav_buf_ackno; 57 u64 dccpav_buf_ackno;
60 u64 dccpav_ack_seqno; 58 u64 dccpav_ack_seqno;
61 u64 dccpav_ack_ackno; 59 u64 dccpav_ack_ackno;
62 unsigned int dccpav_ack_ptr;
63 unsigned int dccpav_sent_len;
64 unsigned int dccpav_vec_len;
65 unsigned int dccpav_buf_len;
66 struct timeval dccpav_time; 60 struct timeval dccpav_time;
61 u8 dccpav_buf_head;
62 u8 dccpav_buf_tail;
63 u8 dccpav_ack_ptr;
64 u8 dccpav_sent_len;
65 u8 dccpav_vec_len;
66 u8 dccpav_buf_len;
67 u8 dccpav_buf_nonce; 67 u8 dccpav_buf_nonce;
68 u8 dccpav_ack_nonce; 68 u8 dccpav_ack_nonce;
69 u8 dccpav_buf[0]; 69 u8 dccpav_buf[0];
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index c37eeeaf5c6e..de681c6ad081 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -21,6 +21,8 @@
21 21
22#define CCID_MAX 255 22#define CCID_MAX 255
23 23
24struct tcp_info;
25
24struct ccid { 26struct ccid {
25 unsigned char ccid_id; 27 unsigned char ccid_id;
26 const char *ccid_name; 28 const char *ccid_name;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index f97b85d55ad8..93f26dd6e6cb 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -59,7 +59,7 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
59 59
60#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */ 60#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
61 61
62extern struct proto dccp_v4_prot; 62extern struct proto dccp_prot;
63 63
64/* is seq1 < seq2 ? */ 64/* is seq1 < seq2 ? */
65static inline int before48(const u64 seq1, const u64 seq2) 65static inline int before48(const u64 seq1, const u64 seq2)
@@ -228,6 +228,9 @@ extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
228extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, 228extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
229 const struct dccp_hdr *dh, const unsigned len); 229 const struct dccp_hdr *dh, const unsigned len);
230 230
231extern int dccp_v4_init_sock(struct sock *sk);
232extern int dccp_v4_destroy_sock(struct sock *sk);
233
231extern void dccp_close(struct sock *sk, long timeout); 234extern void dccp_close(struct sock *sk, long timeout);
232extern struct sk_buff *dccp_make_response(struct sock *sk, 235extern struct sk_buff *dccp_make_response(struct sock *sk,
233 struct dst_entry *dst, 236 struct dst_entry *dst,
@@ -238,6 +241,7 @@ extern struct sk_buff *dccp_make_reset(struct sock *sk,
238 241
239extern int dccp_connect(struct sock *sk); 242extern int dccp_connect(struct sock *sk);
240extern int dccp_disconnect(struct sock *sk, int flags); 243extern int dccp_disconnect(struct sock *sk, int flags);
244extern void dccp_unhash(struct sock *sk);
241extern int dccp_getsockopt(struct sock *sk, int level, int optname, 245extern int dccp_getsockopt(struct sock *sk, int level, int optname,
242 char __user *optval, int __user *optlen); 246 char __user *optval, int __user *optlen);
243extern int dccp_setsockopt(struct sock *sk, int level, int optname, 247extern int dccp_setsockopt(struct sock *sk, int level, int optname,
@@ -249,6 +253,13 @@ extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk,
249 struct msghdr *msg, size_t len, int nonblock, 253 struct msghdr *msg, size_t len, int nonblock,
250 int flags, int *addr_len); 254 int flags, int *addr_len);
251extern void dccp_shutdown(struct sock *sk, int how); 255extern void dccp_shutdown(struct sock *sk, int how);
256extern int inet_dccp_listen(struct socket *sock, int backlog);
257extern unsigned int dccp_poll(struct file *file, struct socket *sock,
258 poll_table *wait);
259extern void dccp_v4_send_check(struct sock *sk, int len,
260 struct sk_buff *skb);
261extern int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
262 int addr_len);
252 263
253extern int dccp_v4_checksum(const struct sk_buff *skb, 264extern int dccp_v4_checksum(const struct sk_buff *skb,
254 const u32 saddr, const u32 daddr); 265 const u32 saddr, const u32 daddr);
@@ -256,6 +267,17 @@ extern int dccp_v4_checksum(const struct sk_buff *skb,
256extern int dccp_v4_send_reset(struct sock *sk, 267extern int dccp_v4_send_reset(struct sock *sk,
257 enum dccp_reset_codes code); 268 enum dccp_reset_codes code);
258extern void dccp_send_close(struct sock *sk, const int active); 269extern void dccp_send_close(struct sock *sk, const int active);
270extern int dccp_invalid_packet(struct sk_buff *skb);
271
272static inline int dccp_bad_service_code(const struct sock *sk,
273 const __u32 service)
274{
275 const struct dccp_sock *dp = dccp_sk(sk);
276
277 if (dp->dccps_service == service)
278 return 0;
279 return !dccp_list_has_service(dp->dccps_service_list, service);
280}
259 281
260struct dccp_skb_cb { 282struct dccp_skb_cb {
261 __u8 dccpd_type:4; 283 __u8 dccpd_type:4;
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index f675d8e642d3..3f78c00e3822 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -28,7 +28,7 @@ static void dccp_get_info(struct sock *sk, struct tcp_info *info)
28 info->tcpi_retransmits = icsk->icsk_retransmits; 28 info->tcpi_retransmits = icsk->icsk_retransmits;
29 info->tcpi_probes = icsk->icsk_probes_out; 29 info->tcpi_probes = icsk->icsk_probes_out;
30 info->tcpi_backoff = icsk->icsk_backoff; 30 info->tcpi_backoff = icsk->icsk_backoff;
31 info->tcpi_pmtu = dp->dccps_pmtu_cookie; 31 info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
32 32
33 if (dp->dccps_options.dccpo_send_ack_vector) 33 if (dp->dccps_options.dccpo_send_ack_vector)
34 info->tcpi_options |= TCPI_OPT_SACK; 34 info->tcpi_options |= TCPI_OPT_SACK;
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 3454d5941900..b6cba72b44e8 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -151,29 +151,12 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
151 return 0; 151 return 0;
152} 152}
153 153
154int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, 154static inline int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
155 const struct dccp_hdr *dh, const unsigned len) 155 const struct dccp_hdr *dh,
156 const unsigned len)
156{ 157{
157 struct dccp_sock *dp = dccp_sk(sk); 158 struct dccp_sock *dp = dccp_sk(sk);
158 159
159 if (dccp_check_seqno(sk, skb))
160 goto discard;
161
162 if (dccp_parse_options(sk, skb))
163 goto discard;
164
165 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
166 dccp_event_ack_recv(sk, skb);
167
168 if (dp->dccps_options.dccpo_send_ack_vector &&
169 dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
170 DCCP_SKB_CB(skb)->dccpd_seq,
171 DCCP_ACKVEC_STATE_RECEIVED))
172 goto discard;
173
174 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
175 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
176
177 switch (dccp_hdr(skb)->dccph_type) { 160 switch (dccp_hdr(skb)->dccph_type) {
178 case DCCP_PKT_DATAACK: 161 case DCCP_PKT_DATAACK:
179 case DCCP_PKT_DATA: 162 case DCCP_PKT_DATA:
@@ -250,6 +233,37 @@ discard:
250 return 0; 233 return 0;
251} 234}
252 235
236int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
237 const struct dccp_hdr *dh, const unsigned len)
238{
239 struct dccp_sock *dp = dccp_sk(sk);
240
241 if (dccp_check_seqno(sk, skb))
242 goto discard;
243
244 if (dccp_parse_options(sk, skb))
245 goto discard;
246
247 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
248 dccp_event_ack_recv(sk, skb);
249
250 if (dp->dccps_options.dccpo_send_ack_vector &&
251 dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
252 DCCP_SKB_CB(skb)->dccpd_seq,
253 DCCP_ACKVEC_STATE_RECEIVED))
254 goto discard;
255
256 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
257 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
258
259 return __dccp_rcv_established(sk, skb, dh, len);
260discard:
261 __kfree_skb(skb);
262 return 0;
263}
264
265EXPORT_SYMBOL_GPL(dccp_rcv_established);
266
253static int dccp_rcv_request_sent_state_process(struct sock *sk, 267static int dccp_rcv_request_sent_state_process(struct sock *sk,
254 struct sk_buff *skb, 268 struct sk_buff *skb,
255 const struct dccp_hdr *dh, 269 const struct dccp_hdr *dh,
@@ -286,6 +300,12 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
286 goto out_invalid_packet; 300 goto out_invalid_packet;
287 } 301 }
288 302
303 if (dp->dccps_options.dccpo_send_ack_vector &&
304 dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
305 DCCP_SKB_CB(skb)->dccpd_seq,
306 DCCP_ACKVEC_STATE_RECEIVED))
307 goto out_invalid_packet; /* FIXME: change error code */
308
289 dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; 309 dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
290 dccp_update_gsr(sk, dp->dccps_isr); 310 dccp_update_gsr(sk, dp->dccps_isr);
291 /* 311 /*
@@ -309,7 +329,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
309 goto out_invalid_packet; 329 goto out_invalid_packet;
310 } 330 }
311 331
312 dccp_sync_mss(sk, dp->dccps_pmtu_cookie); 332 dccp_sync_mss(sk, icsk->icsk_pmtu_cookie);
313 333
314 /* 334 /*
315 * Step 10: Process REQUEST state (second part) 335 * Step 10: Process REQUEST state (second part)
@@ -329,7 +349,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
329 dccp_set_state(sk, DCCP_PARTOPEN); 349 dccp_set_state(sk, DCCP_PARTOPEN);
330 350
331 /* Make sure socket is routed, for correct metrics. */ 351 /* Make sure socket is routed, for correct metrics. */
332 inet_sk_rebuild_header(sk); 352 icsk->icsk_af_ops->rebuild_header(sk);
333 353
334 if (!sock_flag(sk, SOCK_DEAD)) { 354 if (!sock_flag(sk, SOCK_DEAD)) {
335 sk->sk_state_change(sk); 355 sk->sk_state_change(sk);
@@ -398,9 +418,9 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
398 418
399 if (dh->dccph_type == DCCP_PKT_DATAACK || 419 if (dh->dccph_type == DCCP_PKT_DATAACK ||
400 dh->dccph_type == DCCP_PKT_DATA) { 420 dh->dccph_type == DCCP_PKT_DATA) {
401 dccp_rcv_established(sk, skb, dh, len); 421 __dccp_rcv_established(sk, skb, dh, len);
402 queued = 1; /* packet was queued 422 queued = 1; /* packet was queued
403 (by dccp_rcv_established) */ 423 (by __dccp_rcv_established) */
404 } 424 }
405 break; 425 break;
406 } 426 }
@@ -444,7 +464,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
444 */ 464 */
445 if (sk->sk_state == DCCP_LISTEN) { 465 if (sk->sk_state == DCCP_LISTEN) {
446 if (dh->dccph_type == DCCP_PKT_REQUEST) { 466 if (dh->dccph_type == DCCP_PKT_REQUEST) {
447 if (dccp_v4_conn_request(sk, skb) < 0) 467 if (inet_csk(sk)->icsk_af_ops->conn_request(sk,
468 skb) < 0)
448 return 1; 469 return 1;
449 470
450 /* FIXME: do congestion control initialization */ 471 /* FIXME: do congestion control initialization */
@@ -471,14 +492,14 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
471 if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) 492 if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
472 dccp_event_ack_recv(sk, skb); 493 dccp_event_ack_recv(sk, skb);
473 494
474 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
475 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
476
477 if (dp->dccps_options.dccpo_send_ack_vector && 495 if (dp->dccps_options.dccpo_send_ack_vector &&
478 dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, 496 dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
479 DCCP_SKB_CB(skb)->dccpd_seq, 497 DCCP_SKB_CB(skb)->dccpd_seq,
480 DCCP_ACKVEC_STATE_RECEIVED)) 498 DCCP_ACKVEC_STATE_RECEIVED))
481 goto discard; 499 goto discard;
500
501 ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
502 ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
482 } 503 }
483 504
484 /* 505 /*
@@ -566,3 +587,5 @@ discard:
566 } 587 }
567 return 0; 588 return 0;
568} 589}
590
591EXPORT_SYMBOL_GPL(dccp_rcv_state_process);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 656e13e38cfb..00f983226672 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -19,7 +19,9 @@
19 19
20#include <net/icmp.h> 20#include <net/icmp.h>
21#include <net/inet_hashtables.h> 21#include <net/inet_hashtables.h>
22#include <net/inet_sock.h>
22#include <net/sock.h> 23#include <net/sock.h>
24#include <net/timewait_sock.h>
23#include <net/tcp_states.h> 25#include <net/tcp_states.h>
24#include <net/xfrm.h> 26#include <net/xfrm.h>
25 27
@@ -37,7 +39,8 @@ EXPORT_SYMBOL_GPL(dccp_hashinfo);
37 39
38static int dccp_v4_get_port(struct sock *sk, const unsigned short snum) 40static int dccp_v4_get_port(struct sock *sk, const unsigned short snum)
39{ 41{
40 return inet_csk_get_port(&dccp_hashinfo, sk, snum); 42 return inet_csk_get_port(&dccp_hashinfo, sk, snum,
43 inet_csk_bind_conflict);
41} 44}
42 45
43static void dccp_v4_hash(struct sock *sk) 46static void dccp_v4_hash(struct sock *sk)
@@ -45,171 +48,14 @@ static void dccp_v4_hash(struct sock *sk)
45 inet_hash(&dccp_hashinfo, sk); 48 inet_hash(&dccp_hashinfo, sk);
46} 49}
47 50
48static void dccp_v4_unhash(struct sock *sk) 51void dccp_unhash(struct sock *sk)
49{ 52{
50 inet_unhash(&dccp_hashinfo, sk); 53 inet_unhash(&dccp_hashinfo, sk);
51} 54}
52 55
53/* called with local bh disabled */ 56EXPORT_SYMBOL_GPL(dccp_unhash);
54static int __dccp_v4_check_established(struct sock *sk, const __u16 lport,
55 struct inet_timewait_sock **twp)
56{
57 struct inet_sock *inet = inet_sk(sk);
58 const u32 daddr = inet->rcv_saddr;
59 const u32 saddr = inet->daddr;
60 const int dif = sk->sk_bound_dev_if;
61 INET_ADDR_COOKIE(acookie, saddr, daddr)
62 const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
63 unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
64 struct inet_ehash_bucket *head = inet_ehash_bucket(&dccp_hashinfo, hash);
65 const struct sock *sk2;
66 const struct hlist_node *node;
67 struct inet_timewait_sock *tw;
68
69 prefetch(head->chain.first);
70 write_lock(&head->lock);
71
72 /* Check TIME-WAIT sockets first. */
73 sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) {
74 tw = inet_twsk(sk2);
75
76 if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
77 goto not_unique;
78 }
79 tw = NULL;
80
81 /* And established part... */
82 sk_for_each(sk2, node, &head->chain) {
83 if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
84 goto not_unique;
85 }
86 57
87 /* Must record num and sport now. Otherwise we will see 58int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
88 * in hash table socket with a funny identity. */
89 inet->num = lport;
90 inet->sport = htons(lport);
91 sk->sk_hash = hash;
92 BUG_TRAP(sk_unhashed(sk));
93 __sk_add_node(sk, &head->chain);
94 sock_prot_inc_use(sk->sk_prot);
95 write_unlock(&head->lock);
96
97 if (twp != NULL) {
98 *twp = tw;
99 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
100 } else if (tw != NULL) {
101 /* Silly. Should hash-dance instead... */
102 inet_twsk_deschedule(tw, &dccp_death_row);
103 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
104
105 inet_twsk_put(tw);
106 }
107
108 return 0;
109
110not_unique:
111 write_unlock(&head->lock);
112 return -EADDRNOTAVAIL;
113}
114
115/*
116 * Bind a port for a connect operation and hash it.
117 */
118static int dccp_v4_hash_connect(struct sock *sk)
119{
120 const unsigned short snum = inet_sk(sk)->num;
121 struct inet_bind_hashbucket *head;
122 struct inet_bind_bucket *tb;
123 int ret;
124
125 if (snum == 0) {
126 int low = sysctl_local_port_range[0];
127 int high = sysctl_local_port_range[1];
128 int remaining = (high - low) + 1;
129 int rover = net_random() % (high - low) + low;
130 struct hlist_node *node;
131 struct inet_timewait_sock *tw = NULL;
132
133 local_bh_disable();
134 do {
135 head = &dccp_hashinfo.bhash[inet_bhashfn(rover,
136 dccp_hashinfo.bhash_size)];
137 spin_lock(&head->lock);
138
139 /* Does not bother with rcv_saddr checks,
140 * because the established check is already
141 * unique enough.
142 */
143 inet_bind_bucket_for_each(tb, node, &head->chain) {
144 if (tb->port == rover) {
145 BUG_TRAP(!hlist_empty(&tb->owners));
146 if (tb->fastreuse >= 0)
147 goto next_port;
148 if (!__dccp_v4_check_established(sk,
149 rover,
150 &tw))
151 goto ok;
152 goto next_port;
153 }
154 }
155
156 tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep,
157 head, rover);
158 if (tb == NULL) {
159 spin_unlock(&head->lock);
160 break;
161 }
162 tb->fastreuse = -1;
163 goto ok;
164
165 next_port:
166 spin_unlock(&head->lock);
167 if (++rover > high)
168 rover = low;
169 } while (--remaining > 0);
170
171 local_bh_enable();
172
173 return -EADDRNOTAVAIL;
174
175ok:
176 /* All locks still held and bhs disabled */
177 inet_bind_hash(sk, tb, rover);
178 if (sk_unhashed(sk)) {
179 inet_sk(sk)->sport = htons(rover);
180 __inet_hash(&dccp_hashinfo, sk, 0);
181 }
182 spin_unlock(&head->lock);
183
184 if (tw != NULL) {
185 inet_twsk_deschedule(tw, &dccp_death_row);
186 inet_twsk_put(tw);
187 }
188
189 ret = 0;
190 goto out;
191 }
192
193 head = &dccp_hashinfo.bhash[inet_bhashfn(snum,
194 dccp_hashinfo.bhash_size)];
195 tb = inet_csk(sk)->icsk_bind_hash;
196 spin_lock_bh(&head->lock);
197 if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) {
198 __inet_hash(&dccp_hashinfo, sk, 0);
199 spin_unlock_bh(&head->lock);
200 return 0;
201 } else {
202 spin_unlock(&head->lock);
203 /* No definite answer... Walk to established hash table */
204 ret = __dccp_v4_check_established(sk, snum, NULL);
205out:
206 local_bh_enable();
207 return ret;
208 }
209}
210
211static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
212 int addr_len)
213{ 59{
214 struct inet_sock *inet = inet_sk(sk); 60 struct inet_sock *inet = inet_sk(sk);
215 struct dccp_sock *dp = dccp_sk(sk); 61 struct dccp_sock *dp = dccp_sk(sk);
@@ -259,9 +105,9 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
259 inet->dport = usin->sin_port; 105 inet->dport = usin->sin_port;
260 inet->daddr = daddr; 106 inet->daddr = daddr;
261 107
262 dp->dccps_ext_header_len = 0; 108 inet_csk(sk)->icsk_ext_hdr_len = 0;
263 if (inet->opt != NULL) 109 if (inet->opt != NULL)
264 dp->dccps_ext_header_len = inet->opt->optlen; 110 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
265 /* 111 /*
266 * Socket identity is still unknown (sport may be zero). 112 * Socket identity is still unknown (sport may be zero).
267 * However we set state to DCCP_REQUESTING and not releasing socket 113 * However we set state to DCCP_REQUESTING and not releasing socket
@@ -269,7 +115,7 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
269 * complete initialization after this. 115 * complete initialization after this.
270 */ 116 */
271 dccp_set_state(sk, DCCP_REQUESTING); 117 dccp_set_state(sk, DCCP_REQUESTING);
272 err = dccp_v4_hash_connect(sk); 118 err = inet_hash_connect(&dccp_death_row, sk);
273 if (err != 0) 119 if (err != 0)
274 goto failure; 120 goto failure;
275 121
@@ -287,16 +133,6 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
287 usin->sin_port); 133 usin->sin_port);
288 dccp_update_gss(sk, dp->dccps_iss); 134 dccp_update_gss(sk, dp->dccps_iss);
289 135
290 /*
291 * SWL and AWL are initially adjusted so that they are not less than
292 * the initial Sequence Numbers received and sent, respectively:
293 * SWL := max(GSR + 1 - floor(W/4), ISR),
294 * AWL := max(GSS - W' + 1, ISS).
295 * These adjustments MUST be applied only at the beginning of the
296 * connection.
297 */
298 dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss));
299
300 inet->id = dp->dccps_iss ^ jiffies; 136 inet->id = dp->dccps_iss ^ jiffies;
301 137
302 err = dccp_connect(sk); 138 err = dccp_connect(sk);
@@ -316,6 +152,8 @@ failure:
316 goto out; 152 goto out;
317} 153}
318 154
155EXPORT_SYMBOL_GPL(dccp_v4_connect);
156
319/* 157/*
320 * This routine does path mtu discovery as defined in RFC1191. 158 * This routine does path mtu discovery as defined in RFC1191.
321 */ 159 */
@@ -354,7 +192,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
354 mtu = dst_mtu(dst); 192 mtu = dst_mtu(dst);
355 193
356 if (inet->pmtudisc != IP_PMTUDISC_DONT && 194 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
357 dp->dccps_pmtu_cookie > mtu) { 195 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
358 dccp_sync_mss(sk, mtu); 196 dccp_sync_mss(sk, mtu);
359 197
360 /* 198 /*
@@ -606,6 +444,17 @@ out:
606 sock_put(sk); 444 sock_put(sk);
607} 445}
608 446
447/* This routine computes an IPv4 DCCP checksum. */
448void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
449{
450 const struct inet_sock *inet = inet_sk(sk);
451 struct dccp_hdr *dh = dccp_hdr(skb);
452
453 dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, inet->daddr);
454}
455
456EXPORT_SYMBOL_GPL(dccp_v4_send_check);
457
609int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code) 458int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code)
610{ 459{
611 struct sk_buff *skb; 460 struct sk_buff *skb;
@@ -641,16 +490,6 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk,
641 dccp_hdr(skb)->dccph_sport); 490 dccp_hdr(skb)->dccph_sport);
642} 491}
643 492
644static inline int dccp_bad_service_code(const struct sock *sk,
645 const __u32 service)
646{
647 const struct dccp_sock *dp = dccp_sk(sk);
648
649 if (dp->dccps_service == service)
650 return 0;
651 return !dccp_list_has_service(dp->dccps_service_list, service);
652}
653
654int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 493int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
655{ 494{
656 struct inet_request_sock *ireq; 495 struct inet_request_sock *ireq;
@@ -662,7 +501,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
662 const __u32 service = dccp_hdr_request(skb)->dccph_req_service; 501 const __u32 service = dccp_hdr_request(skb)->dccph_req_service;
663 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); 502 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
664 __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; 503 __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
665 struct dst_entry *dst = NULL;
666 504
667 /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ 505 /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
668 if (((struct rtable *)skb->dst)->rt_flags & 506 if (((struct rtable *)skb->dst)->rt_flags &
@@ -703,7 +541,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
703 ireq = inet_rsk(req); 541 ireq = inet_rsk(req);
704 ireq->loc_addr = daddr; 542 ireq->loc_addr = daddr;
705 ireq->rmt_addr = saddr; 543 ireq->rmt_addr = saddr;
706 /* FIXME: Merge Aristeu's option parsing code when ready */
707 req->rcv_wnd = 100; /* Fake, option parsing will get the 544 req->rcv_wnd = 100; /* Fake, option parsing will get the
708 right value */ 545 right value */
709 ireq->opt = NULL; 546 ireq->opt = NULL;
@@ -721,23 +558,22 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
721 dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); 558 dreq->dreq_iss = dccp_v4_init_sequence(sk, skb);
722 dreq->dreq_service = service; 559 dreq->dreq_service = service;
723 560
724 if (dccp_v4_send_response(sk, req, dst)) 561 if (dccp_v4_send_response(sk, req, NULL))
725 goto drop_and_free; 562 goto drop_and_free;
726 563
727 inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); 564 inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
728 return 0; 565 return 0;
729 566
730drop_and_free: 567drop_and_free:
731 /* 568 reqsk_free(req);
732 * FIXME: should be reqsk_free after implementing req->rsk_ops
733 */
734 __reqsk_free(req);
735drop: 569drop:
736 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); 570 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
737 dcb->dccpd_reset_code = reset_code; 571 dcb->dccpd_reset_code = reset_code;
738 return -1; 572 return -1;
739} 573}
740 574
575EXPORT_SYMBOL_GPL(dccp_v4_conn_request);
576
741/* 577/*
742 * The three way handshake has completed - we got a valid ACK or DATAACK - 578 * The three way handshake has completed - we got a valid ACK or DATAACK -
743 * now create the new socket. 579 * now create the new socket.
@@ -792,6 +628,8 @@ exit:
792 return NULL; 628 return NULL;
793} 629}
794 630
631EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock);
632
795static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 633static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
796{ 634{
797 const struct dccp_hdr *dh = dccp_hdr(skb); 635 const struct dccp_hdr *dh = dccp_hdr(skb);
@@ -1011,7 +849,9 @@ discard:
1011 return 0; 849 return 0;
1012} 850}
1013 851
1014static inline int dccp_invalid_packet(struct sk_buff *skb) 852EXPORT_SYMBOL_GPL(dccp_v4_do_rcv);
853
854int dccp_invalid_packet(struct sk_buff *skb)
1015{ 855{
1016 const struct dccp_hdr *dh; 856 const struct dccp_hdr *dh;
1017 857
@@ -1065,29 +905,30 @@ static inline int dccp_invalid_packet(struct sk_buff *skb)
1065 return 1; 905 return 1;
1066 } 906 }
1067 907
1068 /* If the header checksum is incorrect, drop packet and return */
1069 if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr,
1070 skb->nh.iph->daddr) < 0) {
1071 LIMIT_NETDEBUG(KERN_WARNING "DCCP: header checksum is "
1072 "incorrect\n");
1073 return 1;
1074 }
1075
1076 return 0; 908 return 0;
1077} 909}
1078 910
911EXPORT_SYMBOL_GPL(dccp_invalid_packet);
912
1079/* this is called when real data arrives */ 913/* this is called when real data arrives */
1080int dccp_v4_rcv(struct sk_buff *skb) 914int dccp_v4_rcv(struct sk_buff *skb)
1081{ 915{
1082 const struct dccp_hdr *dh; 916 const struct dccp_hdr *dh;
1083 struct sock *sk; 917 struct sock *sk;
1084 int rc;
1085 918
1086 /* Step 1: Check header basics: */ 919 /* Step 1: Check header basics: */
1087 920
1088 if (dccp_invalid_packet(skb)) 921 if (dccp_invalid_packet(skb))
1089 goto discard_it; 922 goto discard_it;
1090 923
924 /* If the header checksum is incorrect, drop packet and return */
925 if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr,
926 skb->nh.iph->daddr) < 0) {
927 LIMIT_NETDEBUG(KERN_WARNING "%s: incorrect header checksum\n",
928 __FUNCTION__);
929 goto discard_it;
930 }
931
1091 dh = dccp_hdr(skb); 932 dh = dccp_hdr(skb);
1092 933
1093 DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); 934 DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb);
@@ -1143,28 +984,11 @@ int dccp_v4_rcv(struct sk_buff *skb)
1143 goto do_time_wait; 984 goto do_time_wait;
1144 } 985 }
1145 986
1146 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { 987 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1147 dccp_pr_debug("xfrm4_policy_check failed\n");
1148 goto discard_and_relse; 988 goto discard_and_relse;
1149 } 989 nf_reset(skb);
1150
1151 if (sk_filter(sk, skb, 0)) {
1152 dccp_pr_debug("sk_filter failed\n");
1153 goto discard_and_relse;
1154 }
1155
1156 skb->dev = NULL;
1157
1158 bh_lock_sock(sk);
1159 rc = 0;
1160 if (!sock_owned_by_user(sk))
1161 rc = dccp_v4_do_rcv(sk, skb);
1162 else
1163 sk_add_backlog(sk, skb);
1164 bh_unlock_sock(sk);
1165 990
1166 sock_put(sk); 991 return sk_receive_skb(sk, skb);
1167 return rc;
1168 992
1169no_dccp_socket: 993no_dccp_socket:
1170 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 994 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
@@ -1194,9 +1018,23 @@ do_time_wait:
1194 goto no_dccp_socket; 1018 goto no_dccp_socket;
1195} 1019}
1196 1020
1197static int dccp_v4_init_sock(struct sock *sk) 1021struct inet_connection_sock_af_ops dccp_ipv4_af_ops = {
1022 .queue_xmit = ip_queue_xmit,
1023 .send_check = dccp_v4_send_check,
1024 .rebuild_header = inet_sk_rebuild_header,
1025 .conn_request = dccp_v4_conn_request,
1026 .syn_recv_sock = dccp_v4_request_recv_sock,
1027 .net_header_len = sizeof(struct iphdr),
1028 .setsockopt = ip_setsockopt,
1029 .getsockopt = ip_getsockopt,
1030 .addr2sockaddr = inet_csk_addr2sockaddr,
1031 .sockaddr_len = sizeof(struct sockaddr_in),
1032};
1033
1034int dccp_v4_init_sock(struct sock *sk)
1198{ 1035{
1199 struct dccp_sock *dp = dccp_sk(sk); 1036 struct dccp_sock *dp = dccp_sk(sk);
1037 struct inet_connection_sock *icsk = inet_csk(sk);
1200 static int dccp_ctl_socket_init = 1; 1038 static int dccp_ctl_socket_init = 1;
1201 1039
1202 dccp_options_init(&dp->dccps_options); 1040 dccp_options_init(&dp->dccps_options);
@@ -1236,9 +1074,11 @@ static int dccp_v4_init_sock(struct sock *sk)
1236 dccp_ctl_socket_init = 0; 1074 dccp_ctl_socket_init = 0;
1237 1075
1238 dccp_init_xmit_timers(sk); 1076 dccp_init_xmit_timers(sk);
1239 inet_csk(sk)->icsk_rto = DCCP_TIMEOUT_INIT; 1077 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
1240 sk->sk_state = DCCP_CLOSED; 1078 sk->sk_state = DCCP_CLOSED;
1241 sk->sk_write_space = dccp_write_space; 1079 sk->sk_write_space = dccp_write_space;
1080 icsk->icsk_af_ops = &dccp_ipv4_af_ops;
1081 icsk->icsk_sync_mss = dccp_sync_mss;
1242 dp->dccps_mss_cache = 536; 1082 dp->dccps_mss_cache = 536;
1243 dp->dccps_role = DCCP_ROLE_UNDEFINED; 1083 dp->dccps_role = DCCP_ROLE_UNDEFINED;
1244 dp->dccps_service = DCCP_SERVICE_INVALID_VALUE; 1084 dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
@@ -1246,7 +1086,9 @@ static int dccp_v4_init_sock(struct sock *sk)
1246 return 0; 1086 return 0;
1247} 1087}
1248 1088
1249static int dccp_v4_destroy_sock(struct sock *sk) 1089EXPORT_SYMBOL_GPL(dccp_v4_init_sock);
1090
1091int dccp_v4_destroy_sock(struct sock *sk)
1250{ 1092{
1251 struct dccp_sock *dp = dccp_sk(sk); 1093 struct dccp_sock *dp = dccp_sk(sk);
1252 1094
@@ -1279,6 +1121,8 @@ static int dccp_v4_destroy_sock(struct sock *sk)
1279 return 0; 1121 return 0;
1280} 1122}
1281 1123
1124EXPORT_SYMBOL_GPL(dccp_v4_destroy_sock);
1125
1282static void dccp_v4_reqsk_destructor(struct request_sock *req) 1126static void dccp_v4_reqsk_destructor(struct request_sock *req)
1283{ 1127{
1284 kfree(inet_rsk(req)->opt); 1128 kfree(inet_rsk(req)->opt);
@@ -1293,7 +1137,11 @@ static struct request_sock_ops dccp_request_sock_ops = {
1293 .send_reset = dccp_v4_ctl_send_reset, 1137 .send_reset = dccp_v4_ctl_send_reset,
1294}; 1138};
1295 1139
1296struct proto dccp_v4_prot = { 1140static struct timewait_sock_ops dccp_timewait_sock_ops = {
1141 .twsk_obj_size = sizeof(struct inet_timewait_sock),
1142};
1143
1144struct proto dccp_prot = {
1297 .name = "DCCP", 1145 .name = "DCCP",
1298 .owner = THIS_MODULE, 1146 .owner = THIS_MODULE,
1299 .close = dccp_close, 1147 .close = dccp_close,
@@ -1307,7 +1155,7 @@ struct proto dccp_v4_prot = {
1307 .recvmsg = dccp_recvmsg, 1155 .recvmsg = dccp_recvmsg,
1308 .backlog_rcv = dccp_v4_do_rcv, 1156 .backlog_rcv = dccp_v4_do_rcv,
1309 .hash = dccp_v4_hash, 1157 .hash = dccp_v4_hash,
1310 .unhash = dccp_v4_unhash, 1158 .unhash = dccp_unhash,
1311 .accept = inet_csk_accept, 1159 .accept = inet_csk_accept,
1312 .get_port = dccp_v4_get_port, 1160 .get_port = dccp_v4_get_port,
1313 .shutdown = dccp_shutdown, 1161 .shutdown = dccp_shutdown,
@@ -1316,5 +1164,7 @@ struct proto dccp_v4_prot = {
1316 .max_header = MAX_DCCP_HEADER, 1164 .max_header = MAX_DCCP_HEADER,
1317 .obj_size = sizeof(struct dccp_sock), 1165 .obj_size = sizeof(struct dccp_sock),
1318 .rsk_prot = &dccp_request_sock_ops, 1166 .rsk_prot = &dccp_request_sock_ops,
1319 .twsk_obj_size = sizeof(struct inet_timewait_sock), 1167 .twsk_prot = &dccp_timewait_sock_ops,
1320}; 1168};
1169
1170EXPORT_SYMBOL_GPL(dccp_prot);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
new file mode 100644
index 000000000000..df074259f9c3
--- /dev/null
+++ b/net/dccp/ipv6.c
@@ -0,0 +1,1262 @@
1/*
2 * DCCP over IPv6
3 * Linux INET6 implementation
4 *
5 * Based on net/dccp6/ipv6.c
6 *
7 * Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15#include <linux/config.h>
16#include <linux/module.h>
17#include <linux/random.h>
18#include <linux/xfrm.h>
19
20#include <net/addrconf.h>
21#include <net/inet_common.h>
22#include <net/inet_hashtables.h>
23#include <net/inet_sock.h>
24#include <net/inet6_connection_sock.h>
25#include <net/inet6_hashtables.h>
26#include <net/ip6_route.h>
27#include <net/ipv6.h>
28#include <net/protocol.h>
29#include <net/transp_v6.h>
30#include <net/ip6_checksum.h>
31#include <net/xfrm.h>
32
33#include "dccp.h"
34#include "ipv6.h"
35
36static void dccp_v6_ctl_send_reset(struct sk_buff *skb);
37static void dccp_v6_reqsk_send_ack(struct sk_buff *skb,
38 struct request_sock *req);
39static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb);
40
41static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
42
43static struct inet_connection_sock_af_ops dccp_ipv6_mapped;
44static struct inet_connection_sock_af_ops dccp_ipv6_af_ops;
45
46static int dccp_v6_get_port(struct sock *sk, unsigned short snum)
47{
48 return inet_csk_get_port(&dccp_hashinfo, sk, snum,
49 inet6_csk_bind_conflict);
50}
51
52static void dccp_v6_hash(struct sock *sk)
53{
54 if (sk->sk_state != DCCP_CLOSED) {
55 if (inet_csk(sk)->icsk_af_ops == &dccp_ipv6_mapped) {
56 dccp_prot.hash(sk);
57 return;
58 }
59 local_bh_disable();
60 __inet6_hash(&dccp_hashinfo, sk);
61 local_bh_enable();
62 }
63}
64
65static inline u16 dccp_v6_check(struct dccp_hdr *dh, int len,
66 struct in6_addr *saddr,
67 struct in6_addr *daddr,
68 unsigned long base)
69{
70 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_DCCP, base);
71}
72
73static __u32 dccp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
74{
75 const struct dccp_hdr *dh = dccp_hdr(skb);
76
77 if (skb->protocol == htons(ETH_P_IPV6))
78 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
79 skb->nh.ipv6h->saddr.s6_addr32,
80 dh->dccph_dport,
81 dh->dccph_sport);
82 else
83 return secure_dccp_sequence_number(skb->nh.iph->daddr,
84 skb->nh.iph->saddr,
85 dh->dccph_dport,
86 dh->dccph_sport);
87}
88
89static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
90 int addr_len)
91{
92 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
93 struct inet_connection_sock *icsk = inet_csk(sk);
94 struct inet_sock *inet = inet_sk(sk);
95 struct ipv6_pinfo *np = inet6_sk(sk);
96 struct dccp_sock *dp = dccp_sk(sk);
97 struct in6_addr *saddr = NULL, *final_p = NULL, final;
98 struct flowi fl;
99 struct dst_entry *dst;
100 int addr_type;
101 int err;
102
103 dp->dccps_role = DCCP_ROLE_CLIENT;
104
105 if (addr_len < SIN6_LEN_RFC2133)
106 return -EINVAL;
107
108 if (usin->sin6_family != AF_INET6)
109 return -EAFNOSUPPORT;
110
111 memset(&fl, 0, sizeof(fl));
112
113 if (np->sndflow) {
114 fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
115 IP6_ECN_flow_init(fl.fl6_flowlabel);
116 if (fl.fl6_flowlabel & IPV6_FLOWLABEL_MASK) {
117 struct ip6_flowlabel *flowlabel;
118 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
119 if (flowlabel == NULL)
120 return -EINVAL;
121 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
122 fl6_sock_release(flowlabel);
123 }
124 }
125
126 /*
127 * connect() to INADDR_ANY means loopback (BSD'ism).
128 */
129
130 if (ipv6_addr_any(&usin->sin6_addr))
131 usin->sin6_addr.s6_addr[15] = 0x1;
132
133 addr_type = ipv6_addr_type(&usin->sin6_addr);
134
135 if(addr_type & IPV6_ADDR_MULTICAST)
136 return -ENETUNREACH;
137
138 if (addr_type & IPV6_ADDR_LINKLOCAL) {
139 if (addr_len >= sizeof(struct sockaddr_in6) &&
140 usin->sin6_scope_id) {
141 /* If interface is set while binding, indices
142 * must coincide.
143 */
144 if (sk->sk_bound_dev_if &&
145 sk->sk_bound_dev_if != usin->sin6_scope_id)
146 return -EINVAL;
147
148 sk->sk_bound_dev_if = usin->sin6_scope_id;
149 }
150
151 /* Connect to link-local address requires an interface */
152 if (!sk->sk_bound_dev_if)
153 return -EINVAL;
154 }
155
156 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
157 np->flow_label = fl.fl6_flowlabel;
158
159 /*
160 * DCCP over IPv4
161 */
162
163 if (addr_type == IPV6_ADDR_MAPPED) {
164 u32 exthdrlen = icsk->icsk_ext_hdr_len;
165 struct sockaddr_in sin;
166
167 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
168
169 if (__ipv6_only_sock(sk))
170 return -ENETUNREACH;
171
172 sin.sin_family = AF_INET;
173 sin.sin_port = usin->sin6_port;
174 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
175
176 icsk->icsk_af_ops = &dccp_ipv6_mapped;
177 sk->sk_backlog_rcv = dccp_v4_do_rcv;
178
179 err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
180
181 if (err) {
182 icsk->icsk_ext_hdr_len = exthdrlen;
183 icsk->icsk_af_ops = &dccp_ipv6_af_ops;
184 sk->sk_backlog_rcv = dccp_v6_do_rcv;
185 goto failure;
186 } else {
187 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
188 inet->saddr);
189 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
190 inet->rcv_saddr);
191 }
192
193 return err;
194 }
195
196 if (!ipv6_addr_any(&np->rcv_saddr))
197 saddr = &np->rcv_saddr;
198
199 fl.proto = IPPROTO_DCCP;
200 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
201 ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr);
202 fl.oif = sk->sk_bound_dev_if;
203 fl.fl_ip_dport = usin->sin6_port;
204 fl.fl_ip_sport = inet->sport;
205
206 if (np->opt && np->opt->srcrt) {
207 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
208 ipv6_addr_copy(&final, &fl.fl6_dst);
209 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
210 final_p = &final;
211 }
212
213 err = ip6_dst_lookup(sk, &dst, &fl);
214 if (err)
215 goto failure;
216 if (final_p)
217 ipv6_addr_copy(&fl.fl6_dst, final_p);
218
219 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
220 goto failure;
221
222 if (saddr == NULL) {
223 saddr = &fl.fl6_src;
224 ipv6_addr_copy(&np->rcv_saddr, saddr);
225 }
226
227 /* set the source address */
228 ipv6_addr_copy(&np->saddr, saddr);
229 inet->rcv_saddr = LOOPBACK4_IPV6;
230
231 ip6_dst_store(sk, dst, NULL);
232
233 icsk->icsk_ext_hdr_len = 0;
234 if (np->opt)
235 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
236 np->opt->opt_nflen);
237
238 inet->dport = usin->sin6_port;
239
240 dccp_set_state(sk, DCCP_REQUESTING);
241 err = inet6_hash_connect(&dccp_death_row, sk);
242 if (err)
243 goto late_failure;
244 /* FIXME */
245#if 0
246 dp->dccps_gar = secure_dccp_v6_sequence_number(np->saddr.s6_addr32,
247 np->daddr.s6_addr32,
248 inet->sport,
249 inet->dport);
250#endif
251 err = dccp_connect(sk);
252 if (err)
253 goto late_failure;
254
255 return 0;
256
257late_failure:
258 dccp_set_state(sk, DCCP_CLOSED);
259 __sk_dst_reset(sk);
260failure:
261 inet->dport = 0;
262 sk->sk_route_caps = 0;
263 return err;
264}
265
266static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
267 int type, int code, int offset, __u32 info)
268{
269 struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data;
270 const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
271 struct ipv6_pinfo *np;
272 struct sock *sk;
273 int err;
274 __u64 seq;
275
276 sk = inet6_lookup(&dccp_hashinfo, &hdr->daddr, dh->dccph_dport,
277 &hdr->saddr, dh->dccph_sport, skb->dev->ifindex);
278
279 if (sk == NULL) {
280 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
281 return;
282 }
283
284 if (sk->sk_state == DCCP_TIME_WAIT) {
285 inet_twsk_put((struct inet_timewait_sock *)sk);
286 return;
287 }
288
289 bh_lock_sock(sk);
290 if (sock_owned_by_user(sk))
291 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
292
293 if (sk->sk_state == DCCP_CLOSED)
294 goto out;
295
296 np = inet6_sk(sk);
297
298 if (type == ICMPV6_PKT_TOOBIG) {
299 struct dst_entry *dst = NULL;
300
301 if (sock_owned_by_user(sk))
302 goto out;
303 if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED))
304 goto out;
305
306 /* icmp should have updated the destination cache entry */
307 dst = __sk_dst_check(sk, np->dst_cookie);
308
309 if (dst == NULL) {
310 struct inet_sock *inet = inet_sk(sk);
311 struct flowi fl;
312
313 /* BUGGG_FUTURE: Again, it is not clear how
314 to handle rthdr case. Ignore this complexity
315 for now.
316 */
317 memset(&fl, 0, sizeof(fl));
318 fl.proto = IPPROTO_DCCP;
319 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
320 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
321 fl.oif = sk->sk_bound_dev_if;
322 fl.fl_ip_dport = inet->dport;
323 fl.fl_ip_sport = inet->sport;
324
325 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
326 sk->sk_err_soft = -err;
327 goto out;
328 }
329
330 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
331 sk->sk_err_soft = -err;
332 goto out;
333 }
334
335 } else
336 dst_hold(dst);
337
338 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
339 dccp_sync_mss(sk, dst_mtu(dst));
340 } /* else let the usual retransmit timer handle it */
341 dst_release(dst);
342 goto out;
343 }
344
345 icmpv6_err_convert(type, code, &err);
346
347 seq = DCCP_SKB_CB(skb)->dccpd_seq;
348 /* Might be for an request_sock */
349 switch (sk->sk_state) {
350 struct request_sock *req, **prev;
351 case DCCP_LISTEN:
352 if (sock_owned_by_user(sk))
353 goto out;
354
355 req = inet6_csk_search_req(sk, &prev, dh->dccph_dport,
356 &hdr->daddr, &hdr->saddr,
357 inet6_iif(skb));
358 if (!req)
359 goto out;
360
361 /* ICMPs are not backlogged, hence we cannot get
362 * an established socket here.
363 */
364 BUG_TRAP(req->sk == NULL);
365
366 if (seq != dccp_rsk(req)->dreq_iss) {
367 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
368 goto out;
369 }
370
371 inet_csk_reqsk_queue_drop(sk, req, prev);
372 goto out;
373
374 case DCCP_REQUESTING:
375 case DCCP_RESPOND: /* Cannot happen.
376 It can, it SYNs are crossed. --ANK */
377 if (!sock_owned_by_user(sk)) {
378 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
379 sk->sk_err = err;
380 /*
381 * Wake people up to see the error
382 * (see connect in sock.c)
383 */
384 sk->sk_error_report(sk);
385
386 dccp_done(sk);
387 } else
388 sk->sk_err_soft = err;
389 goto out;
390 }
391
392 if (!sock_owned_by_user(sk) && np->recverr) {
393 sk->sk_err = err;
394 sk->sk_error_report(sk);
395 } else
396 sk->sk_err_soft = err;
397
398out:
399 bh_unlock_sock(sk);
400 sock_put(sk);
401}
402
403
404static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
405 struct dst_entry *dst)
406{
407 struct inet6_request_sock *ireq6 = inet6_rsk(req);
408 struct ipv6_pinfo *np = inet6_sk(sk);
409 struct sk_buff *skb;
410 struct ipv6_txoptions *opt = NULL;
411 struct in6_addr *final_p = NULL, final;
412 struct flowi fl;
413 int err = -1;
414
415 memset(&fl, 0, sizeof(fl));
416 fl.proto = IPPROTO_DCCP;
417 ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
418 ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
419 fl.fl6_flowlabel = 0;
420 fl.oif = ireq6->iif;
421 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
422 fl.fl_ip_sport = inet_sk(sk)->sport;
423
424 if (dst == NULL) {
425 opt = np->opt;
426 if (opt == NULL &&
427 np->rxopt.bits.osrcrt == 2 &&
428 ireq6->pktopts) {
429 struct sk_buff *pktopts = ireq6->pktopts;
430 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
431 if (rxopt->srcrt)
432 opt = ipv6_invert_rthdr(sk,
433 (struct ipv6_rt_hdr *)(pktopts->nh.raw +
434 rxopt->srcrt));
435 }
436
437 if (opt && opt->srcrt) {
438 struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt;
439 ipv6_addr_copy(&final, &fl.fl6_dst);
440 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
441 final_p = &final;
442 }
443
444 err = ip6_dst_lookup(sk, &dst, &fl);
445 if (err)
446 goto done;
447 if (final_p)
448 ipv6_addr_copy(&fl.fl6_dst, final_p);
449 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
450 goto done;
451 }
452
453 skb = dccp_make_response(sk, dst, req);
454 if (skb != NULL) {
455 struct dccp_hdr *dh = dccp_hdr(skb);
456 dh->dccph_checksum = dccp_v6_check(dh, skb->len,
457 &ireq6->loc_addr,
458 &ireq6->rmt_addr,
459 csum_partial((char *)dh,
460 skb->len,
461 skb->csum));
462 ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
463 err = ip6_xmit(sk, skb, &fl, opt, 0);
464 if (err == NET_XMIT_CN)
465 err = 0;
466 }
467
468done:
469 if (opt && opt != np->opt)
470 sock_kfree_s(sk, opt, opt->tot_len);
471 return err;
472}
473
474static void dccp_v6_reqsk_destructor(struct request_sock *req)
475{
476 if (inet6_rsk(req)->pktopts != NULL)
477 kfree_skb(inet6_rsk(req)->pktopts);
478}
479
480static struct request_sock_ops dccp6_request_sock_ops = {
481 .family = AF_INET6,
482 .obj_size = sizeof(struct dccp6_request_sock),
483 .rtx_syn_ack = dccp_v6_send_response,
484 .send_ack = dccp_v6_reqsk_send_ack,
485 .destructor = dccp_v6_reqsk_destructor,
486 .send_reset = dccp_v6_ctl_send_reset,
487};
488
489static struct timewait_sock_ops dccp6_timewait_sock_ops = {
490 .twsk_obj_size = sizeof(struct dccp6_timewait_sock),
491};
492
493static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
494{
495 struct ipv6_pinfo *np = inet6_sk(sk);
496 struct dccp_hdr *dh = dccp_hdr(skb);
497
498 dh->dccph_checksum = csum_ipv6_magic(&np->saddr, &np->daddr,
499 len, IPPROTO_DCCP,
500 csum_partial((char *)dh,
501 dh->dccph_doff << 2,
502 skb->csum));
503}
504
505static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb)
506{
507 struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
508 const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
509 sizeof(struct dccp_hdr_ext) +
510 sizeof(struct dccp_hdr_reset);
511 struct sk_buff *skb;
512 struct flowi fl;
513 u64 seqno;
514
515 if (rxdh->dccph_type == DCCP_PKT_RESET)
516 return;
517
518 if (!ipv6_unicast_destination(rxskb))
519 return;
520
521 /*
522 * We need to grab some memory, and put together an RST,
523 * and then put it into the queue to be sent.
524 */
525
526 skb = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) +
527 dccp_hdr_reset_len, GFP_ATOMIC);
528 if (skb == NULL)
529 return;
530
531 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr) +
532 dccp_hdr_reset_len);
533
534 skb->h.raw = skb_push(skb, dccp_hdr_reset_len);
535 dh = dccp_hdr(skb);
536 memset(dh, 0, dccp_hdr_reset_len);
537
538 /* Swap the send and the receive. */
539 dh->dccph_type = DCCP_PKT_RESET;
540 dh->dccph_sport = rxdh->dccph_dport;
541 dh->dccph_dport = rxdh->dccph_sport;
542 dh->dccph_doff = dccp_hdr_reset_len / 4;
543 dh->dccph_x = 1;
544 dccp_hdr_reset(skb)->dccph_reset_code =
545 DCCP_SKB_CB(rxskb)->dccpd_reset_code;
546
547 /* See "8.3.1. Abnormal Termination" in draft-ietf-dccp-spec-11 */
548 seqno = 0;
549 if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
550 dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1);
551
552 dccp_hdr_set_seq(dh, seqno);
553 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
554 DCCP_SKB_CB(rxskb)->dccpd_seq);
555
556 memset(&fl, 0, sizeof(fl));
557 ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr);
558 ipv6_addr_copy(&fl.fl6_src, &rxskb->nh.ipv6h->daddr);
559 dh->dccph_checksum = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
560 sizeof(*dh), IPPROTO_DCCP,
561 skb->csum);
562 fl.proto = IPPROTO_DCCP;
563 fl.oif = inet6_iif(rxskb);
564 fl.fl_ip_dport = dh->dccph_dport;
565 fl.fl_ip_sport = dh->dccph_sport;
566
567 /* sk = NULL, but it is safe for now. RST socket required. */
568 if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) {
569 if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) {
570 ip6_xmit(NULL, skb, &fl, NULL, 0);
571 DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
572 DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
573 return;
574 }
575 }
576
577 kfree_skb(skb);
578}
579
580static void dccp_v6_ctl_send_ack(struct sk_buff *rxskb)
581{
582 struct flowi fl;
583 struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
584 const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) +
585 sizeof(struct dccp_hdr_ext) +
586 sizeof(struct dccp_hdr_ack_bits);
587 struct sk_buff *skb;
588
589 skb = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) +
590 dccp_hdr_ack_len, GFP_ATOMIC);
591 if (skb == NULL)
592 return;
593
594 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr) +
595 dccp_hdr_ack_len);
596
597 skb->h.raw = skb_push(skb, dccp_hdr_ack_len);
598 dh = dccp_hdr(skb);
599 memset(dh, 0, dccp_hdr_ack_len);
600
601 /* Build DCCP header and checksum it. */
602 dh->dccph_type = DCCP_PKT_ACK;
603 dh->dccph_sport = rxdh->dccph_dport;
604 dh->dccph_dport = rxdh->dccph_sport;
605 dh->dccph_doff = dccp_hdr_ack_len / 4;
606 dh->dccph_x = 1;
607
608 dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq);
609 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
610 DCCP_SKB_CB(rxskb)->dccpd_seq);
611
612 memset(&fl, 0, sizeof(fl));
613 ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr);
614 ipv6_addr_copy(&fl.fl6_src, &rxskb->nh.ipv6h->daddr);
615
616 /* FIXME: calculate checksum, IPv4 also should... */
617
618 fl.proto = IPPROTO_DCCP;
619 fl.oif = inet6_iif(rxskb);
620 fl.fl_ip_dport = dh->dccph_dport;
621 fl.fl_ip_sport = dh->dccph_sport;
622
623 if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) {
624 if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) {
625 ip6_xmit(NULL, skb, &fl, NULL, 0);
626 DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
627 return;
628 }
629 }
630
631 kfree_skb(skb);
632}
633
634static void dccp_v6_reqsk_send_ack(struct sk_buff *skb,
635 struct request_sock *req)
636{
637 dccp_v6_ctl_send_ack(skb);
638}
639
640static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
641{
642 const struct dccp_hdr *dh = dccp_hdr(skb);
643 const struct ipv6hdr *iph = skb->nh.ipv6h;
644 struct sock *nsk;
645 struct request_sock **prev;
646 /* Find possible connection requests. */
647 struct request_sock *req = inet6_csk_search_req(sk, &prev,
648 dh->dccph_sport,
649 &iph->saddr,
650 &iph->daddr,
651 inet6_iif(skb));
652 if (req != NULL)
653 return dccp_check_req(sk, skb, req, prev);
654
655 nsk = __inet6_lookup_established(&dccp_hashinfo,
656 &iph->saddr, dh->dccph_sport,
657 &iph->daddr, ntohs(dh->dccph_dport),
658 inet6_iif(skb));
659
660 if (nsk != NULL) {
661 if (nsk->sk_state != DCCP_TIME_WAIT) {
662 bh_lock_sock(nsk);
663 return nsk;
664 }
665 inet_twsk_put((struct inet_timewait_sock *)nsk);
666 return NULL;
667 }
668
669 return sk;
670}
671
672static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
673{
674 struct inet_request_sock *ireq;
675 struct dccp_sock dp;
676 struct request_sock *req;
677 struct dccp_request_sock *dreq;
678 struct inet6_request_sock *ireq6;
679 struct ipv6_pinfo *np = inet6_sk(sk);
680 const __u32 service = dccp_hdr_request(skb)->dccph_req_service;
681 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
682 __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
683
684 if (skb->protocol == htons(ETH_P_IP))
685 return dccp_v4_conn_request(sk, skb);
686
687 if (!ipv6_unicast_destination(skb))
688 goto drop;
689
690 if (dccp_bad_service_code(sk, service)) {
691 reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
692 goto drop;
693 }
694 /*
695 * There are no SYN attacks on IPv6, yet...
696 */
697 if (inet_csk_reqsk_queue_is_full(sk))
698 goto drop;
699
700 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
701 goto drop;
702
703 req = inet6_reqsk_alloc(sk->sk_prot->rsk_prot);
704 if (req == NULL)
705 goto drop;
706
707 /* FIXME: process options */
708
709 dccp_openreq_init(req, &dp, skb);
710
711 ireq6 = inet6_rsk(req);
712 ireq = inet_rsk(req);
713 ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr);
714 ipv6_addr_copy(&ireq6->loc_addr, &skb->nh.ipv6h->daddr);
715 req->rcv_wnd = 100; /* Fake, option parsing will get the
716 right value */
717 ireq6->pktopts = NULL;
718
719 if (ipv6_opt_accepted(sk, skb) ||
720 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
721 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
722 atomic_inc(&skb->users);
723 ireq6->pktopts = skb;
724 }
725 ireq6->iif = sk->sk_bound_dev_if;
726
727 /* So that link locals have meaning */
728 if (!sk->sk_bound_dev_if &&
729 ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL)
730 ireq6->iif = inet6_iif(skb);
731
732 /*
733 * Step 3: Process LISTEN state
734 *
735 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
736 *
737 * In fact we defer setting S.GSR, S.SWL, S.SWH to
738 * dccp_create_openreq_child.
739 */
740 dreq = dccp_rsk(req);
741 dreq->dreq_isr = dcb->dccpd_seq;
742 dreq->dreq_iss = dccp_v6_init_sequence(sk, skb);
743 dreq->dreq_service = service;
744
745 if (dccp_v6_send_response(sk, req, NULL))
746 goto drop_and_free;
747
748 inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
749 return 0;
750
751drop_and_free:
752 reqsk_free(req);
753drop:
754 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
755 dcb->dccpd_reset_code = reset_code;
756 return -1;
757}
758
759static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
760 struct sk_buff *skb,
761 struct request_sock *req,
762 struct dst_entry *dst)
763{
764 struct inet6_request_sock *ireq6 = inet6_rsk(req);
765 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
766 struct inet_sock *newinet;
767 struct dccp_sock *newdp;
768 struct dccp6_sock *newdp6;
769 struct sock *newsk;
770 struct ipv6_txoptions *opt;
771
772 if (skb->protocol == htons(ETH_P_IP)) {
773 /*
774 * v6 mapped
775 */
776
777 newsk = dccp_v4_request_recv_sock(sk, skb, req, dst);
778 if (newsk == NULL)
779 return NULL;
780
781 newdp6 = (struct dccp6_sock *)newsk;
782 newdp = dccp_sk(newsk);
783 newinet = inet_sk(newsk);
784 newinet->pinet6 = &newdp6->inet6;
785 newnp = inet6_sk(newsk);
786
787 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
788
789 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
790 newinet->daddr);
791
792 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
793 newinet->saddr);
794
795 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
796
797 inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped;
798 newsk->sk_backlog_rcv = dccp_v4_do_rcv;
799 newnp->pktoptions = NULL;
800 newnp->opt = NULL;
801 newnp->mcast_oif = inet6_iif(skb);
802 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
803
804 /*
805 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
806 * here, dccp_create_openreq_child now does this for us, see the comment in
807 * that function for the gory details. -acme
808 */
809
810 /* It is tricky place. Until this moment IPv4 tcp
811 worked with IPv6 icsk.icsk_af_ops.
812 Sync it now.
813 */
814 dccp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
815
816 return newsk;
817 }
818
819 opt = np->opt;
820
821 if (sk_acceptq_is_full(sk))
822 goto out_overflow;
823
824 if (np->rxopt.bits.osrcrt == 2 &&
825 opt == NULL && ireq6->pktopts) {
826 struct inet6_skb_parm *rxopt = IP6CB(ireq6->pktopts);
827 if (rxopt->srcrt)
828 opt = ipv6_invert_rthdr(sk,
829 (struct ipv6_rt_hdr *)(ireq6->pktopts->nh.raw +
830 rxopt->srcrt));
831 }
832
833 if (dst == NULL) {
834 struct in6_addr *final_p = NULL, final;
835 struct flowi fl;
836
837 memset(&fl, 0, sizeof(fl));
838 fl.proto = IPPROTO_DCCP;
839 ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
840 if (opt && opt->srcrt) {
841 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
842 ipv6_addr_copy(&final, &fl.fl6_dst);
843 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
844 final_p = &final;
845 }
846 ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
847 fl.oif = sk->sk_bound_dev_if;
848 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
849 fl.fl_ip_sport = inet_sk(sk)->sport;
850
851 if (ip6_dst_lookup(sk, &dst, &fl))
852 goto out;
853
854 if (final_p)
855 ipv6_addr_copy(&fl.fl6_dst, final_p);
856
857 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
858 goto out;
859 }
860
861 newsk = dccp_create_openreq_child(sk, req, skb);
862 if (newsk == NULL)
863 goto out;
864
865 /*
866 * No need to charge this sock to the relevant IPv6 refcnt debug socks
867 * count here, dccp_create_openreq_child now does this for us, see the
868 * comment in that function for the gory details. -acme
869 */
870
871 ip6_dst_store(newsk, dst, NULL);
872 newsk->sk_route_caps = dst->dev->features &
873 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
874
875 newdp6 = (struct dccp6_sock *)newsk;
876 newinet = inet_sk(newsk);
877 newinet->pinet6 = &newdp6->inet6;
878 newdp = dccp_sk(newsk);
879 newnp = inet6_sk(newsk);
880
881 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
882
883 ipv6_addr_copy(&newnp->daddr, &ireq6->rmt_addr);
884 ipv6_addr_copy(&newnp->saddr, &ireq6->loc_addr);
885 ipv6_addr_copy(&newnp->rcv_saddr, &ireq6->loc_addr);
886 newsk->sk_bound_dev_if = ireq6->iif;
887
888 /* Now IPv6 options...
889
890 First: no IPv4 options.
891 */
892 newinet->opt = NULL;
893
894 /* Clone RX bits */
895 newnp->rxopt.all = np->rxopt.all;
896
897 /* Clone pktoptions received with SYN */
898 newnp->pktoptions = NULL;
899 if (ireq6->pktopts != NULL) {
900 newnp->pktoptions = skb_clone(ireq6->pktopts, GFP_ATOMIC);
901 kfree_skb(ireq6->pktopts);
902 ireq6->pktopts = NULL;
903 if (newnp->pktoptions)
904 skb_set_owner_r(newnp->pktoptions, newsk);
905 }
906 newnp->opt = NULL;
907 newnp->mcast_oif = inet6_iif(skb);
908 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
909
910 /* Clone native IPv6 options from listening socket (if any)
911
912 Yes, keeping reference count would be much more clever,
913 but we make one more one thing there: reattach optmem
914 to newsk.
915 */
916 if (opt) {
917 newnp->opt = ipv6_dup_options(newsk, opt);
918 if (opt != np->opt)
919 sock_kfree_s(sk, opt, opt->tot_len);
920 }
921
922 inet_csk(newsk)->icsk_ext_hdr_len = 0;
923 if (newnp->opt)
924 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
925 newnp->opt->opt_flen);
926
927 dccp_sync_mss(newsk, dst_mtu(dst));
928
929 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
930
931 __inet6_hash(&dccp_hashinfo, newsk);
932 inet_inherit_port(&dccp_hashinfo, sk, newsk);
933
934 return newsk;
935
936out_overflow:
937 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
938out:
939 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
940 if (opt && opt != np->opt)
941 sock_kfree_s(sk, opt, opt->tot_len);
942 dst_release(dst);
943 return NULL;
944}
945
946/* The socket must have it's spinlock held when we get
947 * here.
948 *
949 * We have a potential double-lock case here, so even when
950 * doing backlog processing we use the BH locking scheme.
951 * This is because we cannot sleep with the original spinlock
952 * held.
953 */
954static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
955{
956 struct ipv6_pinfo *np = inet6_sk(sk);
957 struct sk_buff *opt_skb = NULL;
958
959 /* Imagine: socket is IPv6. IPv4 packet arrives,
960 goes to IPv4 receive handler and backlogged.
961 From backlog it always goes here. Kerboom...
962 Fortunately, dccp_rcv_established and rcv_established
963 handle them correctly, but it is not case with
964 dccp_v6_hnd_req and dccp_v6_ctl_send_reset(). --ANK
965 */
966
967 if (skb->protocol == htons(ETH_P_IP))
968 return dccp_v4_do_rcv(sk, skb);
969
970 if (sk_filter(sk, skb, 0))
971 goto discard;
972
973 /*
974 * socket locking is here for SMP purposes as backlog rcv
975 * is currently called with bh processing disabled.
976 */
977
978 /* Do Stevens' IPV6_PKTOPTIONS.
979
980 Yes, guys, it is the only place in our code, where we
981 may make it not affecting IPv4.
982 The rest of code is protocol independent,
983 and I do not like idea to uglify IPv4.
984
985 Actually, all the idea behind IPV6_PKTOPTIONS
986 looks not very well thought. For now we latch
987 options, received in the last packet, enqueued
988 by tcp. Feel free to propose better solution.
989 --ANK (980728)
990 */
991 if (np->rxopt.all)
992 opt_skb = skb_clone(skb, GFP_ATOMIC);
993
994 if (sk->sk_state == DCCP_OPEN) { /* Fast path */
995 if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len))
996 goto reset;
997 return 0;
998 }
999
1000 if (sk->sk_state == DCCP_LISTEN) {
1001 struct sock *nsk = dccp_v6_hnd_req(sk, skb);
1002 if (!nsk)
1003 goto discard;
1004
1005 /*
1006 * Queue it on the new socket if the new socket is active,
1007 * otherwise we just shortcircuit this and continue with
1008 * the new socket..
1009 */
1010 if(nsk != sk) {
1011 if (dccp_child_process(sk, nsk, skb))
1012 goto reset;
1013 if (opt_skb)
1014 __kfree_skb(opt_skb);
1015 return 0;
1016 }
1017 }
1018
1019 if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len))
1020 goto reset;
1021 return 0;
1022
1023reset:
1024 dccp_v6_ctl_send_reset(skb);
1025discard:
1026 if (opt_skb)
1027 __kfree_skb(opt_skb);
1028 kfree_skb(skb);
1029 return 0;
1030}
1031
1032static int dccp_v6_rcv(struct sk_buff **pskb)
1033{
1034 const struct dccp_hdr *dh;
1035 struct sk_buff *skb = *pskb;
1036 struct sock *sk;
1037
1038 /* Step 1: Check header basics: */
1039
1040 if (dccp_invalid_packet(skb))
1041 goto discard_it;
1042
1043 dh = dccp_hdr(skb);
1044
1045 DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb);
1046 DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
1047
1048 if (dccp_packet_without_ack(skb))
1049 DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ;
1050 else
1051 DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
1052
1053 /* Step 2:
1054 * Look up flow ID in table and get corresponding socket */
1055 sk = __inet6_lookup(&dccp_hashinfo, &skb->nh.ipv6h->saddr,
1056 dh->dccph_sport,
1057 &skb->nh.ipv6h->daddr, ntohs(dh->dccph_dport),
1058 inet6_iif(skb));
1059 /*
1060 * Step 2:
1061 * If no socket ...
1062 * Generate Reset(No Connection) unless P.type == Reset
1063 * Drop packet and return
1064 */
1065 if (sk == NULL)
1066 goto no_dccp_socket;
1067
1068 /*
1069 * Step 2:
1070 * ... or S.state == TIMEWAIT,
1071 * Generate Reset(No Connection) unless P.type == Reset
1072 * Drop packet and return
1073 */
1074
1075 if (sk->sk_state == DCCP_TIME_WAIT)
1076 goto do_time_wait;
1077
1078 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1079 goto discard_and_relse;
1080
1081 return sk_receive_skb(sk, skb) ? -1 : 0;
1082
1083no_dccp_socket:
1084 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1085 goto discard_it;
1086 /*
1087 * Step 2:
1088 * Generate Reset(No Connection) unless P.type == Reset
1089 * Drop packet and return
1090 */
1091 if (dh->dccph_type != DCCP_PKT_RESET) {
1092 DCCP_SKB_CB(skb)->dccpd_reset_code =
1093 DCCP_RESET_CODE_NO_CONNECTION;
1094 dccp_v6_ctl_send_reset(skb);
1095 }
1096discard_it:
1097
1098 /*
1099 * Discard frame
1100 */
1101
1102 kfree_skb(skb);
1103 return 0;
1104
1105discard_and_relse:
1106 sock_put(sk);
1107 goto discard_it;
1108
1109do_time_wait:
1110 inet_twsk_put((struct inet_timewait_sock *)sk);
1111 goto no_dccp_socket;
1112}
1113
1114static struct inet_connection_sock_af_ops dccp_ipv6_af_ops = {
1115 .queue_xmit = inet6_csk_xmit,
1116 .send_check = dccp_v6_send_check,
1117 .rebuild_header = inet6_sk_rebuild_header,
1118 .conn_request = dccp_v6_conn_request,
1119 .syn_recv_sock = dccp_v6_request_recv_sock,
1120 .net_header_len = sizeof(struct ipv6hdr),
1121 .setsockopt = ipv6_setsockopt,
1122 .getsockopt = ipv6_getsockopt,
1123 .addr2sockaddr = inet6_csk_addr2sockaddr,
1124 .sockaddr_len = sizeof(struct sockaddr_in6)
1125};
1126
1127/*
1128 * DCCP over IPv4 via INET6 API
1129 */
1130static struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
1131 .queue_xmit = ip_queue_xmit,
1132 .send_check = dccp_v4_send_check,
1133 .rebuild_header = inet_sk_rebuild_header,
1134 .conn_request = dccp_v6_conn_request,
1135 .syn_recv_sock = dccp_v6_request_recv_sock,
1136 .net_header_len = sizeof(struct iphdr),
1137 .setsockopt = ipv6_setsockopt,
1138 .getsockopt = ipv6_getsockopt,
1139 .addr2sockaddr = inet6_csk_addr2sockaddr,
1140 .sockaddr_len = sizeof(struct sockaddr_in6)
1141};
1142
1143/* NOTE: A lot of things set to zero explicitly by call to
1144 * sk_alloc() so need not be done here.
1145 */
1146static int dccp_v6_init_sock(struct sock *sk)
1147{
1148 int err = dccp_v4_init_sock(sk);
1149
1150 if (err == 0)
1151 inet_csk(sk)->icsk_af_ops = &dccp_ipv6_af_ops;
1152
1153 return err;
1154}
1155
1156static int dccp_v6_destroy_sock(struct sock *sk)
1157{
1158 dccp_v4_destroy_sock(sk);
1159 return inet6_destroy_sock(sk);
1160}
1161
1162static struct proto dccp_v6_prot = {
1163 .name = "DCCPv6",
1164 .owner = THIS_MODULE,
1165 .close = dccp_close,
1166 .connect = dccp_v6_connect,
1167 .disconnect = dccp_disconnect,
1168 .ioctl = dccp_ioctl,
1169 .init = dccp_v6_init_sock,
1170 .setsockopt = dccp_setsockopt,
1171 .getsockopt = dccp_getsockopt,
1172 .sendmsg = dccp_sendmsg,
1173 .recvmsg = dccp_recvmsg,
1174 .backlog_rcv = dccp_v6_do_rcv,
1175 .hash = dccp_v6_hash,
1176 .unhash = dccp_unhash,
1177 .accept = inet_csk_accept,
1178 .get_port = dccp_v6_get_port,
1179 .shutdown = dccp_shutdown,
1180 .destroy = dccp_v6_destroy_sock,
1181 .orphan_count = &dccp_orphan_count,
1182 .max_header = MAX_DCCP_HEADER,
1183 .obj_size = sizeof(struct dccp6_sock),
1184 .rsk_prot = &dccp6_request_sock_ops,
1185 .twsk_prot = &dccp6_timewait_sock_ops,
1186};
1187
1188static struct inet6_protocol dccp_v6_protocol = {
1189 .handler = dccp_v6_rcv,
1190 .err_handler = dccp_v6_err,
1191 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
1192};
1193
1194static struct proto_ops inet6_dccp_ops = {
1195 .family = PF_INET6,
1196 .owner = THIS_MODULE,
1197 .release = inet6_release,
1198 .bind = inet6_bind,
1199 .connect = inet_stream_connect,
1200 .socketpair = sock_no_socketpair,
1201 .accept = inet_accept,
1202 .getname = inet6_getname,
1203 .poll = dccp_poll,
1204 .ioctl = inet6_ioctl,
1205 .listen = inet_dccp_listen,
1206 .shutdown = inet_shutdown,
1207 .setsockopt = sock_common_setsockopt,
1208 .getsockopt = sock_common_getsockopt,
1209 .sendmsg = inet_sendmsg,
1210 .recvmsg = sock_common_recvmsg,
1211 .mmap = sock_no_mmap,
1212 .sendpage = sock_no_sendpage,
1213};
1214
1215static struct inet_protosw dccp_v6_protosw = {
1216 .type = SOCK_DCCP,
1217 .protocol = IPPROTO_DCCP,
1218 .prot = &dccp_v6_prot,
1219 .ops = &inet6_dccp_ops,
1220 .capability = -1,
1221 .flags = INET_PROTOSW_ICSK,
1222};
1223
1224static int __init dccp_v6_init(void)
1225{
1226 int err = proto_register(&dccp_v6_prot, 1);
1227
1228 if (err != 0)
1229 goto out;
1230
1231 err = inet6_add_protocol(&dccp_v6_protocol, IPPROTO_DCCP);
1232 if (err != 0)
1233 goto out_unregister_proto;
1234
1235 inet6_register_protosw(&dccp_v6_protosw);
1236out:
1237 return err;
1238out_unregister_proto:
1239 proto_unregister(&dccp_v6_prot);
1240 goto out;
1241}
1242
1243static void __exit dccp_v6_exit(void)
1244{
1245 inet6_del_protocol(&dccp_v6_protocol, IPPROTO_DCCP);
1246 inet6_unregister_protosw(&dccp_v6_protosw);
1247 proto_unregister(&dccp_v6_prot);
1248}
1249
1250module_init(dccp_v6_init);
1251module_exit(dccp_v6_exit);
1252
1253/*
1254 * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
1255 * values directly, Also cover the case where the protocol is not specified,
1256 * i.e. net-pf-PF_INET6-proto-0-type-SOCK_DCCP
1257 */
1258MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-33-type-6");
1259MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-0-type-6");
1260MODULE_LICENSE("GPL");
1261MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>");
1262MODULE_DESCRIPTION("DCCPv6 - Datagram Congestion Controlled Protocol");
diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h
new file mode 100644
index 000000000000..e4d4e9309270
--- /dev/null
+++ b/net/dccp/ipv6.h
@@ -0,0 +1,37 @@
1#ifndef _DCCP_IPV6_H
2#define _DCCP_IPV6_H
3/*
4 * net/dccp/ipv6.h
5 *
6 * An implementation of the DCCP protocol
7 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include <linux/config.h>
15#include <linux/dccp.h>
16#include <linux/ipv6.h>
17
18struct dccp6_sock {
19 struct dccp_sock dccp;
20 /*
21 * ipv6_pinfo has to be the last member of dccp6_sock,
22 * see inet6_sk_generic.
23 */
24 struct ipv6_pinfo inet6;
25};
26
27struct dccp6_request_sock {
28 struct dccp_request_sock dccp;
29 struct inet6_request_sock inet6;
30};
31
32struct dccp6_timewait_sock {
33 struct inet_timewait_sock inet;
34 struct inet6_timewait_sock tw6;
35};
36
37#endif /* _DCCP_IPV6_H */
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 1393461898bb..29261fc198e7 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -40,6 +40,8 @@ struct inet_timewait_death_row dccp_death_row = {
40 (unsigned long)&dccp_death_row), 40 (unsigned long)&dccp_death_row),
41}; 41};
42 42
43EXPORT_SYMBOL_GPL(dccp_death_row);
44
43void dccp_time_wait(struct sock *sk, int state, int timeo) 45void dccp_time_wait(struct sock *sk, int state, int timeo)
44{ 46{
45 struct inet_timewait_sock *tw = NULL; 47 struct inet_timewait_sock *tw = NULL;
@@ -50,7 +52,18 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
50 if (tw != NULL) { 52 if (tw != NULL) {
51 const struct inet_connection_sock *icsk = inet_csk(sk); 53 const struct inet_connection_sock *icsk = inet_csk(sk);
52 const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); 54 const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
53 55#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
56 if (tw->tw_family == PF_INET6) {
57 const struct ipv6_pinfo *np = inet6_sk(sk);
58 struct inet6_timewait_sock *tw6;
59
60 tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
61 tw6 = inet6_twsk((struct sock *)tw);
62 ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr);
63 ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr);
64 tw->tw_ipv6only = np->ipv6only;
65 }
66#endif
54 /* Linkage updates. */ 67 /* Linkage updates. */
55 __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); 68 __inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
56 69
@@ -170,6 +183,8 @@ out_free:
170 return newsk; 183 return newsk;
171} 184}
172 185
186EXPORT_SYMBOL_GPL(dccp_create_openreq_child);
187
173/* 188/*
174 * Process an incoming packet for RESPOND sockets represented 189 * Process an incoming packet for RESPOND sockets represented
175 * as an request_sock. 190 * as an request_sock.
@@ -214,7 +229,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
214 goto drop; 229 goto drop;
215 } 230 }
216 231
217 child = dccp_v4_request_recv_sock(sk, skb, req, NULL); 232 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
218 if (child == NULL) 233 if (child == NULL)
219 goto listen_overflow; 234 goto listen_overflow;
220 235
@@ -236,6 +251,8 @@ drop:
236 goto out; 251 goto out;
237} 252}
238 253
254EXPORT_SYMBOL_GPL(dccp_check_req);
255
239/* 256/*
240 * Queue segment on the new socket if the new socket is active, 257 * Queue segment on the new socket if the new socket is active,
241 * otherwise we just shortcircuit this and continue with 258 * otherwise we just shortcircuit this and continue with
@@ -266,3 +283,5 @@ int dccp_child_process(struct sock *parent, struct sock *child,
266 sock_put(child); 283 sock_put(child);
267 return ret; 284 return ret;
268} 285}
286
287EXPORT_SYMBOL_GPL(dccp_child_process);
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 74ff87025878..efd7ffb903a1 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -15,6 +15,7 @@
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/skbuff.h> 16#include <linux/skbuff.h>
17 17
18#include <net/inet_sock.h>
18#include <net/sock.h> 19#include <net/sock.h>
19 20
20#include "ackvec.h" 21#include "ackvec.h"
@@ -43,6 +44,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
43{ 44{
44 if (likely(skb != NULL)) { 45 if (likely(skb != NULL)) {
45 const struct inet_sock *inet = inet_sk(sk); 46 const struct inet_sock *inet = inet_sk(sk);
47 const struct inet_connection_sock *icsk = inet_csk(sk);
46 struct dccp_sock *dp = dccp_sk(sk); 48 struct dccp_sock *dp = dccp_sk(sk);
47 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); 49 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
48 struct dccp_hdr *dh; 50 struct dccp_hdr *dh;
@@ -108,8 +110,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
108 break; 110 break;
109 } 111 }
110 112
111 dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, 113 icsk->icsk_af_ops->send_check(sk, skb->len, skb);
112 inet->daddr);
113 114
114 if (set_ack) 115 if (set_ack)
115 dccp_event_ack_sent(sk); 116 dccp_event_ack_sent(sk);
@@ -117,7 +118,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
117 DCCP_INC_STATS(DCCP_MIB_OUTSEGS); 118 DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
118 119
119 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 120 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
120 err = ip_queue_xmit(skb, 0); 121 err = icsk->icsk_af_ops->queue_xmit(skb, 0);
121 if (err <= 0) 122 if (err <= 0)
122 return err; 123 return err;
123 124
@@ -134,20 +135,13 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
134 135
135unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) 136unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
136{ 137{
138 struct inet_connection_sock *icsk = inet_csk(sk);
137 struct dccp_sock *dp = dccp_sk(sk); 139 struct dccp_sock *dp = dccp_sk(sk);
138 int mss_now; 140 int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len -
139 141 sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext));
140 /*
141 * FIXME: we really should be using the af_specific thing to support
142 * IPv6.
143 * mss_now = pmtu - tp->af_specific->net_header_len -
144 * sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext);
145 */
146 mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) -
147 sizeof(struct dccp_hdr_ext);
148 142
149 /* Now subtract optional transport overhead */ 143 /* Now subtract optional transport overhead */
150 mss_now -= dp->dccps_ext_header_len; 144 mss_now -= icsk->icsk_ext_hdr_len;
151 145
152 /* 146 /*
153 * FIXME: this should come from the CCID infrastructure, where, say, 147 * FIXME: this should come from the CCID infrastructure, where, say,
@@ -160,12 +154,14 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
160 mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; 154 mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
161 155
162 /* And store cached results */ 156 /* And store cached results */
163 dp->dccps_pmtu_cookie = pmtu; 157 icsk->icsk_pmtu_cookie = pmtu;
164 dp->dccps_mss_cache = mss_now; 158 dp->dccps_mss_cache = mss_now;
165 159
166 return mss_now; 160 return mss_now;
167} 161}
168 162
163EXPORT_SYMBOL_GPL(dccp_sync_mss);
164
169void dccp_write_space(struct sock *sk) 165void dccp_write_space(struct sock *sk)
170{ 166{
171 read_lock(&sk->sk_callback_lock); 167 read_lock(&sk->sk_callback_lock);
@@ -266,7 +262,7 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo)
266 262
267int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) 263int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
268{ 264{
269 if (inet_sk_rebuild_header(sk) != 0) 265 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk) != 0)
270 return -EHOSTUNREACH; /* Routing failure or similar. */ 266 return -EHOSTUNREACH; /* Routing failure or similar. */
271 267
272 return dccp_transmit_skb(sk, (skb_cloned(skb) ? 268 return dccp_transmit_skb(sk, (skb_cloned(skb) ?
@@ -321,6 +317,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
321 return skb; 317 return skb;
322} 318}
323 319
320EXPORT_SYMBOL_GPL(dccp_make_response);
321
324struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, 322struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
325 const enum dccp_reset_codes code) 323 const enum dccp_reset_codes code)
326 324
@@ -377,6 +375,7 @@ struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
377 */ 375 */
378static inline void dccp_connect_init(struct sock *sk) 376static inline void dccp_connect_init(struct sock *sk)
379{ 377{
378 struct dccp_sock *dp = dccp_sk(sk);
380 struct dst_entry *dst = __sk_dst_get(sk); 379 struct dst_entry *dst = __sk_dst_get(sk);
381 struct inet_connection_sock *icsk = inet_csk(sk); 380 struct inet_connection_sock *icsk = inet_csk(sk);
382 381
@@ -385,10 +384,16 @@ static inline void dccp_connect_init(struct sock *sk)
385 384
386 dccp_sync_mss(sk, dst_mtu(dst)); 385 dccp_sync_mss(sk, dst_mtu(dst));
387 386
388 /* 387 dccp_update_gss(sk, dp->dccps_iss);
389 * FIXME: set dp->{dccps_swh,dccps_swl}, with 388 /*
390 * something like dccp_inc_seq 389 * SWL and AWL are initially adjusted so that they are not less than
391 */ 390 * the initial Sequence Numbers received and sent, respectively:
391 * SWL := max(GSR + 1 - floor(W/4), ISR),
392 * AWL := max(GSS - W' + 1, ISS).
393 * These adjustments MUST be applied only at the beginning of the
394 * connection.
395 */
396 dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss));
392 397
393 icsk->icsk_retransmits = 0; 398 icsk->icsk_retransmits = 0;
394} 399}
@@ -420,6 +425,8 @@ int dccp_connect(struct sock *sk)
420 return 0; 425 return 0;
421} 426}
422 427
428EXPORT_SYMBOL_GPL(dccp_connect);
429
423void dccp_send_ack(struct sock *sk) 430void dccp_send_ack(struct sock *sk)
424{ 431{
425 /* If we have been reset, we may not send again. */ 432 /* If we have been reset, we may not send again. */
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 8a6b2a9e4581..65b11ea90d85 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -24,7 +24,7 @@
24#include <net/checksum.h> 24#include <net/checksum.h>
25 25
26#include <net/inet_common.h> 26#include <net/inet_common.h>
27#include <net/ip.h> 27#include <net/inet_sock.h>
28#include <net/protocol.h> 28#include <net/protocol.h>
29#include <net/sock.h> 29#include <net/sock.h>
30#include <net/xfrm.h> 30#include <net/xfrm.h>
@@ -34,15 +34,18 @@
34#include <linux/timer.h> 34#include <linux/timer.h>
35#include <linux/delay.h> 35#include <linux/delay.h>
36#include <linux/poll.h> 36#include <linux/poll.h>
37#include <linux/dccp.h>
38 37
39#include "ccid.h" 38#include "ccid.h"
40#include "dccp.h" 39#include "dccp.h"
41 40
42DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; 41DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
43 42
43EXPORT_SYMBOL_GPL(dccp_statistics);
44
44atomic_t dccp_orphan_count = ATOMIC_INIT(0); 45atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45 46
47EXPORT_SYMBOL_GPL(dccp_orphan_count);
48
46static struct net_protocol dccp_protocol = { 49static struct net_protocol dccp_protocol = {
47 .handler = dccp_v4_rcv, 50 .handler = dccp_v4_rcv,
48 .err_handler = dccp_v4_err, 51 .err_handler = dccp_v4_err,
@@ -149,6 +152,8 @@ int dccp_disconnect(struct sock *sk, int flags)
149 return err; 152 return err;
150} 153}
151 154
155EXPORT_SYMBOL_GPL(dccp_disconnect);
156
152/* 157/*
153 * Wait for a DCCP event. 158 * Wait for a DCCP event.
154 * 159 *
@@ -156,8 +161,8 @@ int dccp_disconnect(struct sock *sk, int flags)
156 * take care of normal races (between the test and the event) and we don't 161 * take care of normal races (between the test and the event) and we don't
157 * go look at any of the socket buffers directly. 162 * go look at any of the socket buffers directly.
158 */ 163 */
159static unsigned int dccp_poll(struct file *file, struct socket *sock, 164unsigned int dccp_poll(struct file *file, struct socket *sock,
160 poll_table *wait) 165 poll_table *wait)
161{ 166{
162 unsigned int mask; 167 unsigned int mask;
163 struct sock *sk = sock->sk; 168 struct sock *sk = sock->sk;
@@ -205,12 +210,16 @@ static unsigned int dccp_poll(struct file *file, struct socket *sock,
205 return mask; 210 return mask;
206} 211}
207 212
213EXPORT_SYMBOL_GPL(dccp_poll);
214
208int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) 215int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
209{ 216{
210 dccp_pr_debug("entry\n"); 217 dccp_pr_debug("entry\n");
211 return -ENOIOCTLCMD; 218 return -ENOIOCTLCMD;
212} 219}
213 220
221EXPORT_SYMBOL_GPL(dccp_ioctl);
222
214static int dccp_setsockopt_service(struct sock *sk, const u32 service, 223static int dccp_setsockopt_service(struct sock *sk, const u32 service,
215 char __user *optval, int optlen) 224 char __user *optval, int optlen)
216{ 225{
@@ -254,7 +263,9 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
254 int val; 263 int val;
255 264
256 if (level != SOL_DCCP) 265 if (level != SOL_DCCP)
257 return ip_setsockopt(sk, level, optname, optval, optlen); 266 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
267 optname, optval,
268 optlen);
258 269
259 if (optlen < sizeof(int)) 270 if (optlen < sizeof(int))
260 return -EINVAL; 271 return -EINVAL;
@@ -282,6 +293,8 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
282 return err; 293 return err;
283} 294}
284 295
296EXPORT_SYMBOL_GPL(dccp_setsockopt);
297
285static int dccp_getsockopt_service(struct sock *sk, int len, 298static int dccp_getsockopt_service(struct sock *sk, int len,
286 u32 __user *optval, 299 u32 __user *optval,
287 int __user *optlen) 300 int __user *optlen)
@@ -320,8 +333,9 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
320 int val, len; 333 int val, len;
321 334
322 if (level != SOL_DCCP) 335 if (level != SOL_DCCP)
323 return ip_getsockopt(sk, level, optname, optval, optlen); 336 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
324 337 optname, optval,
338 optlen);
325 if (get_user(len, optlen)) 339 if (get_user(len, optlen))
326 return -EFAULT; 340 return -EFAULT;
327 341
@@ -354,6 +368,8 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
354 return 0; 368 return 0;
355} 369}
356 370
371EXPORT_SYMBOL_GPL(dccp_getsockopt);
372
357int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 373int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
358 size_t len) 374 size_t len)
359{ 375{
@@ -410,6 +426,8 @@ out_discard:
410 goto out_release; 426 goto out_release;
411} 427}
412 428
429EXPORT_SYMBOL_GPL(dccp_sendmsg);
430
413int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 431int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
414 size_t len, int nonblock, int flags, int *addr_len) 432 size_t len, int nonblock, int flags, int *addr_len)
415{ 433{
@@ -507,7 +525,9 @@ out:
507 return len; 525 return len;
508} 526}
509 527
510static int inet_dccp_listen(struct socket *sock, int backlog) 528EXPORT_SYMBOL_GPL(dccp_recvmsg);
529
530int inet_dccp_listen(struct socket *sock, int backlog)
511{ 531{
512 struct sock *sk = sock->sk; 532 struct sock *sk = sock->sk;
513 unsigned char old_state; 533 unsigned char old_state;
@@ -543,6 +563,8 @@ out:
543 return err; 563 return err;
544} 564}
545 565
566EXPORT_SYMBOL_GPL(inet_dccp_listen);
567
546static const unsigned char dccp_new_state[] = { 568static const unsigned char dccp_new_state[] = {
547 /* current state: new state: action: */ 569 /* current state: new state: action: */
548 [0] = DCCP_CLOSED, 570 [0] = DCCP_CLOSED,
@@ -648,12 +670,16 @@ adjudge_to_death:
648 sock_put(sk); 670 sock_put(sk);
649} 671}
650 672
673EXPORT_SYMBOL_GPL(dccp_close);
674
651void dccp_shutdown(struct sock *sk, int how) 675void dccp_shutdown(struct sock *sk, int how)
652{ 676{
653 dccp_pr_debug("entry\n"); 677 dccp_pr_debug("entry\n");
654} 678}
655 679
656static struct proto_ops inet_dccp_ops = { 680EXPORT_SYMBOL_GPL(dccp_shutdown);
681
682static const struct proto_ops inet_dccp_ops = {
657 .family = PF_INET, 683 .family = PF_INET,
658 .owner = THIS_MODULE, 684 .owner = THIS_MODULE,
659 .release = inet_release, 685 .release = inet_release,
@@ -681,11 +707,11 @@ extern struct net_proto_family inet_family_ops;
681static struct inet_protosw dccp_v4_protosw = { 707static struct inet_protosw dccp_v4_protosw = {
682 .type = SOCK_DCCP, 708 .type = SOCK_DCCP,
683 .protocol = IPPROTO_DCCP, 709 .protocol = IPPROTO_DCCP,
684 .prot = &dccp_v4_prot, 710 .prot = &dccp_prot,
685 .ops = &inet_dccp_ops, 711 .ops = &inet_dccp_ops,
686 .capability = -1, 712 .capability = -1,
687 .no_check = 0, 713 .no_check = 0,
688 .flags = 0, 714 .flags = INET_PROTOSW_ICSK,
689}; 715};
690 716
691/* 717/*
@@ -760,13 +786,15 @@ MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
760int dccp_debug; 786int dccp_debug;
761module_param(dccp_debug, int, 0444); 787module_param(dccp_debug, int, 0444);
762MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); 788MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
789
790EXPORT_SYMBOL_GPL(dccp_debug);
763#endif 791#endif
764 792
765static int __init dccp_init(void) 793static int __init dccp_init(void)
766{ 794{
767 unsigned long goal; 795 unsigned long goal;
768 int ehash_order, bhash_order, i; 796 int ehash_order, bhash_order, i;
769 int rc = proto_register(&dccp_v4_prot, 1); 797 int rc = proto_register(&dccp_prot, 1);
770 798
771 if (rc) 799 if (rc)
772 goto out; 800 goto out;
@@ -869,7 +897,7 @@ out_free_bind_bucket_cachep:
869 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); 897 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
870 dccp_hashinfo.bind_bucket_cachep = NULL; 898 dccp_hashinfo.bind_bucket_cachep = NULL;
871out_proto_unregister: 899out_proto_unregister:
872 proto_unregister(&dccp_v4_prot); 900 proto_unregister(&dccp_prot);
873 goto out; 901 goto out;
874} 902}
875 903
@@ -892,7 +920,7 @@ static void __exit dccp_fini(void)
892 get_order(dccp_hashinfo.ehash_size * 920 get_order(dccp_hashinfo.ehash_size *
893 sizeof(struct inet_ehash_bucket))); 921 sizeof(struct inet_ehash_bucket)));
894 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); 922 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
895 proto_unregister(&dccp_v4_prot); 923 proto_unregister(&dccp_prot);
896} 924}
897 925
898module_init(dccp_init); 926module_init(dccp_init);
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index d402e9020c68..ce4aaf94860d 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -122,6 +122,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat
122#include <net/flow.h> 122#include <net/flow.h>
123#include <asm/system.h> 123#include <asm/system.h>
124#include <asm/ioctls.h> 124#include <asm/ioctls.h>
125#include <linux/capability.h>
125#include <linux/mm.h> 126#include <linux/mm.h>
126#include <linux/interrupt.h> 127#include <linux/interrupt.h>
127#include <linux/proc_fs.h> 128#include <linux/proc_fs.h>
@@ -149,7 +150,7 @@ static void dn_keepalive(struct sock *sk);
149#define DN_SK_HASH_MASK (DN_SK_HASH_SIZE - 1) 150#define DN_SK_HASH_MASK (DN_SK_HASH_SIZE - 1)
150 151
151 152
152static struct proto_ops dn_proto_ops; 153static const struct proto_ops dn_proto_ops;
153static DEFINE_RWLOCK(dn_hash_lock); 154static DEFINE_RWLOCK(dn_hash_lock);
154static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE]; 155static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
155static struct hlist_head dn_wild_sk; 156static struct hlist_head dn_wild_sk;
@@ -1252,7 +1253,7 @@ static int dn_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1252 break; 1253 break;
1253 1254
1254 default: 1255 default:
1255 err = dev_ioctl(cmd, (void __user *)arg); 1256 err = -ENOIOCTLCMD;
1256 break; 1257 break;
1257 } 1258 }
1258 1259
@@ -2342,7 +2343,7 @@ static struct net_proto_family dn_family_ops = {
2342 .owner = THIS_MODULE, 2343 .owner = THIS_MODULE,
2343}; 2344};
2344 2345
2345static struct proto_ops dn_proto_ops = { 2346static const struct proto_ops dn_proto_ops = {
2346 .family = AF_DECnet, 2347 .family = AF_DECnet,
2347 .owner = THIS_MODULE, 2348 .owner = THIS_MODULE,
2348 .release = dn_release, 2349 .release = dn_release,
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 5610bb16dbf9..efbead83ba7f 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -25,6 +25,7 @@
25 */ 25 */
26 26
27#include <linux/config.h> 27#include <linux/config.h>
28#include <linux/capability.h>
28#include <linux/module.h> 29#include <linux/module.h>
29#include <linux/moduleparam.h> 30#include <linux/moduleparam.h>
30#include <linux/init.h> 31#include <linux/init.h>
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 8d0cc3cf3e49..33ab256cfd4a 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -408,11 +408,14 @@ int dn_neigh_router_hello(struct sk_buff *skb)
408 } 408 }
409 } 409 }
410 410
411 if (!dn_db->router) { 411 /* Only use routers in our area */
412 dn_db->router = neigh_clone(neigh); 412 if ((dn_ntohs(src)>>10) == dn_ntohs((decnet_address)>>10)) {
413 } else { 413 if (!dn_db->router) {
414 if (msg->priority > ((struct dn_neigh *)dn_db->router)->priority) 414 dn_db->router = neigh_clone(neigh);
415 neigh_release(xchg(&dn_db->router, neigh_clone(neigh))); 415 } else {
416 if (msg->priority > ((struct dn_neigh *)dn_db->router)->priority)
417 neigh_release(xchg(&dn_db->router, neigh_clone(neigh)));
418 }
416 } 419 }
417 write_unlock(&neigh->lock); 420 write_unlock(&neigh->lock);
418 neigh_release(neigh); 421 neigh_release(neigh);
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 369f25b60f3f..44bda85e678f 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -793,7 +793,6 @@ static int dn_nsp_rx_packet(struct sk_buff *skb)
793got_it: 793got_it:
794 if (sk != NULL) { 794 if (sk != NULL) {
795 struct dn_scp *scp = DN_SK(sk); 795 struct dn_scp *scp = DN_SK(sk);
796 int ret;
797 796
798 /* Reset backoff */ 797 /* Reset backoff */
799 scp->nsp_rxtshift = 0; 798 scp->nsp_rxtshift = 0;
@@ -807,21 +806,7 @@ got_it:
807 goto free_out; 806 goto free_out;
808 } 807 }
809 808
810 bh_lock_sock(sk); 809 return sk_receive_skb(sk, skb);
811 ret = NET_RX_SUCCESS;
812 if (decnet_debug_level & 8)
813 printk(KERN_DEBUG "NSP: 0x%02x 0x%02x 0x%04x 0x%04x %d\n",
814 (int)cb->rt_flags, (int)cb->nsp_flags,
815 (int)cb->src_port, (int)cb->dst_port,
816 !!sock_owned_by_user(sk));
817 if (!sock_owned_by_user(sk))
818 ret = dn_nsp_backlog_rcv(sk, skb);
819 else
820 sk_add_backlog(sk, skb);
821 bh_unlock_sock(sk);
822 sock_put(sk);
823
824 return ret;
825 } 810 }
826 811
827 return dn_nsp_no_socket(skb, reason); 812 return dn_nsp_no_socket(skb, reason);
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 1ab94c6e22ed..16a5a31e2126 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -26,8 +26,6 @@
26#include <net/dn.h> 26#include <net/dn.h>
27#include <net/dn_route.h> 27#include <net/dn_route.h>
28 28
29#include <linux/netfilter_decnet.h>
30
31static struct sock *dnrmg = NULL; 29static struct sock *dnrmg = NULL;
32 30
33 31
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 34fdac51df96..c792994d7952 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -31,6 +31,7 @@
31#include <linux/if_arp.h> 31#include <linux/if_arp.h>
32#include <linux/wireless.h> 32#include <linux/wireless.h>
33#include <linux/skbuff.h> 33#include <linux/skbuff.h>
34#include <linux/udp.h>
34#include <net/sock.h> 35#include <net/sock.h>
35#include <net/inet_common.h> 36#include <net/inet_common.h>
36#include <linux/stat.h> 37#include <linux/stat.h>
@@ -45,7 +46,7 @@
45#include <asm/uaccess.h> 46#include <asm/uaccess.h>
46#include <asm/system.h> 47#include <asm/system.h>
47 48
48static struct proto_ops econet_ops; 49static const struct proto_ops econet_ops;
49static struct hlist_head econet_sklist; 50static struct hlist_head econet_sklist;
50static DEFINE_RWLOCK(econet_lock); 51static DEFINE_RWLOCK(econet_lock);
51 52
@@ -56,7 +57,7 @@ static struct net_device *net2dev_map[256];
56#define EC_PORT_IP 0xd2 57#define EC_PORT_IP 0xd2
57 58
58#ifdef CONFIG_ECONET_AUNUDP 59#ifdef CONFIG_ECONET_AUNUDP
59static spinlock_t aun_queue_lock; 60static DEFINE_SPINLOCK(aun_queue_lock);
60static struct socket *udpsock; 61static struct socket *udpsock;
61#define AUN_PORT 0x8000 62#define AUN_PORT 0x8000
62 63
@@ -686,7 +687,7 @@ static int econet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg
686 break; 687 break;
687 688
688 default: 689 default:
689 return dev_ioctl(cmd, argp); 690 return -ENOIOCTLCMD;
690 } 691 }
691 /*NOTREACHED*/ 692 /*NOTREACHED*/
692 return 0; 693 return 0;
@@ -698,7 +699,7 @@ static struct net_proto_family econet_family_ops = {
698 .owner = THIS_MODULE, 699 .owner = THIS_MODULE,
699}; 700};
700 701
701static struct proto_ops SOCKOPS_WRAPPED(econet_ops) = { 702static const struct proto_ops SOCKOPS_WRAPPED(econet_ops) = {
702 .family = PF_ECONET, 703 .family = PF_ECONET,
703 .owner = THIS_MODULE, 704 .owner = THIS_MODULE,
704 .release = econet_release, 705 .release = econet_release,
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index e24577367274..9890fd97e538 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -53,6 +53,7 @@
53#include <linux/errno.h> 53#include <linux/errno.h>
54#include <linux/config.h> 54#include <linux/config.h>
55#include <linux/init.h> 55#include <linux/init.h>
56#include <linux/if_ether.h>
56#include <net/dst.h> 57#include <net/dst.h>
57#include <net/arp.h> 58#include <net/arp.h>
58#include <net/sock.h> 59#include <net/sock.h>
@@ -162,7 +163,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
162 skb_pull(skb,ETH_HLEN); 163 skb_pull(skb,ETH_HLEN);
163 eth = eth_hdr(skb); 164 eth = eth_hdr(skb);
164 165
165 if (*eth->h_dest&1) { 166 if (is_multicast_ether_addr(eth->h_dest)) {
166 if (!compare_ether_addr(eth->h_dest, dev->broadcast)) 167 if (!compare_ether_addr(eth->h_dest, dev->broadcast))
167 skb->pkt_type = PACKET_BROADCAST; 168 skb->pkt_type = PACKET_BROADCAST;
168 else 169 else
@@ -251,7 +252,7 @@ static int eth_mac_addr(struct net_device *dev, void *p)
251 252
252static int eth_change_mtu(struct net_device *dev, int new_mtu) 253static int eth_change_mtu(struct net_device *dev, int new_mtu)
253{ 254{
254 if ((new_mtu < 68) || (new_mtu > 1500)) 255 if (new_mtu < 68 || new_mtu > ETH_DATA_LEN)
255 return -EINVAL; 256 return -EINVAL;
256 dev->mtu = new_mtu; 257 dev->mtu = new_mtu;
257 return 0; 258 return 0;
@@ -272,7 +273,7 @@ void ether_setup(struct net_device *dev)
272 273
273 dev->type = ARPHRD_ETHER; 274 dev->type = ARPHRD_ETHER;
274 dev->hard_header_len = ETH_HLEN; 275 dev->hard_header_len = ETH_HLEN;
275 dev->mtu = 1500; /* eth_mtu */ 276 dev->mtu = ETH_DATA_LEN;
276 dev->addr_len = ETH_ALEN; 277 dev->addr_len = ETH_ALEN;
277 dev->tx_queue_len = 1000; /* Ethernet wants good queues */ 278 dev->tx_queue_len = 1000; /* Ethernet wants good queues */
278 dev->flags = IFF_BROADCAST|IFF_MULTICAST; 279 dev->flags = IFF_BROADCAST|IFF_MULTICAST;
diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c
index 073aebdf0f67..f8dca31be5dd 100644
--- a/net/ieee80211/ieee80211_crypt_wep.c
+++ b/net/ieee80211/ieee80211_crypt_wep.c
@@ -75,22 +75,14 @@ static void prism2_wep_deinit(void *priv)
75 kfree(priv); 75 kfree(priv);
76} 76}
77 77
78/* Perform WEP encryption on given skb that has at least 4 bytes of headroom 78/* Add WEP IV/key info to a frame that has at least 4 bytes of headroom */
79 * for IV and 4 bytes of tailroom for ICV. Both IV and ICV will be transmitted, 79static int prism2_wep_build_iv(struct sk_buff *skb, int hdr_len, void *priv)
80 * so the payload length increases with 8 bytes.
81 *
82 * WEP frame payload: IV + TX key idx, RC4(data), ICV = RC4(CRC32(data))
83 */
84static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
85{ 80{
86 struct prism2_wep_data *wep = priv; 81 struct prism2_wep_data *wep = priv;
87 u32 crc, klen, len; 82 u32 klen, len;
88 u8 key[WEP_KEY_LEN + 3]; 83 u8 *pos;
89 u8 *pos, *icv; 84
90 struct scatterlist sg; 85 if (skb_headroom(skb) < 4 || skb->len < hdr_len)
91
92 if (skb_headroom(skb) < 4 || skb_tailroom(skb) < 4 ||
93 skb->len < hdr_len)
94 return -1; 86 return -1;
95 87
96 len = skb->len - hdr_len; 88 len = skb->len - hdr_len;
@@ -112,15 +104,47 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
112 } 104 }
113 105
114 /* Prepend 24-bit IV to RC4 key and TX frame */ 106 /* Prepend 24-bit IV to RC4 key and TX frame */
115 *pos++ = key[0] = (wep->iv >> 16) & 0xff; 107 *pos++ = (wep->iv >> 16) & 0xff;
116 *pos++ = key[1] = (wep->iv >> 8) & 0xff; 108 *pos++ = (wep->iv >> 8) & 0xff;
117 *pos++ = key[2] = wep->iv & 0xff; 109 *pos++ = wep->iv & 0xff;
118 *pos++ = wep->key_idx << 6; 110 *pos++ = wep->key_idx << 6;
119 111
112 return 0;
113}
114
115/* Perform WEP encryption on given skb that has at least 4 bytes of headroom
116 * for IV and 4 bytes of tailroom for ICV. Both IV and ICV will be transmitted,
117 * so the payload length increases with 8 bytes.
118 *
119 * WEP frame payload: IV + TX key idx, RC4(data), ICV = RC4(CRC32(data))
120 */
121static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
122{
123 struct prism2_wep_data *wep = priv;
124 u32 crc, klen, len;
125 u8 *pos, *icv;
126 struct scatterlist sg;
127 u8 key[WEP_KEY_LEN + 3];
128
129 /* other checks are in prism2_wep_build_iv */
130 if (skb_tailroom(skb) < 4)
131 return -1;
132
133 /* add the IV to the frame */
134 if (prism2_wep_build_iv(skb, hdr_len, priv))
135 return -1;
136
137 /* Copy the IV into the first 3 bytes of the key */
138 memcpy(key, skb->data + hdr_len, 3);
139
120 /* Copy rest of the WEP key (the secret part) */ 140 /* Copy rest of the WEP key (the secret part) */
121 memcpy(key + 3, wep->key, wep->key_len); 141 memcpy(key + 3, wep->key, wep->key_len);
142
143 len = skb->len - hdr_len - 4;
144 pos = skb->data + hdr_len + 4;
145 klen = 3 + wep->key_len;
122 146
123 /* Append little-endian CRC32 and encrypt it to produce ICV */ 147 /* Append little-endian CRC32 over only the data and encrypt it to produce ICV */
124 crc = ~crc32_le(~0, pos, len); 148 crc = ~crc32_le(~0, pos, len);
125 icv = skb_put(skb, 4); 149 icv = skb_put(skb, 4);
126 icv[0] = crc; 150 icv[0] = crc;
@@ -231,6 +255,7 @@ static struct ieee80211_crypto_ops ieee80211_crypt_wep = {
231 .name = "WEP", 255 .name = "WEP",
232 .init = prism2_wep_init, 256 .init = prism2_wep_init,
233 .deinit = prism2_wep_deinit, 257 .deinit = prism2_wep_deinit,
258 .build_iv = prism2_wep_build_iv,
234 .encrypt_mpdu = prism2_wep_encrypt, 259 .encrypt_mpdu = prism2_wep_encrypt,
235 .decrypt_mpdu = prism2_wep_decrypt, 260 .decrypt_mpdu = prism2_wep_decrypt,
236 .encrypt_msdu = NULL, 261 .encrypt_msdu = NULL,
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 03efaacbdb73..5e3380388046 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -76,8 +76,8 @@ static struct ieee80211_frag_entry *ieee80211_frag_cache_find(struct
76 76
77 if (entry->skb != NULL && entry->seq == seq && 77 if (entry->skb != NULL && entry->seq == seq &&
78 (entry->last_frag + 1 == frag || frag == -1) && 78 (entry->last_frag + 1 == frag || frag == -1) &&
79 memcmp(entry->src_addr, src, ETH_ALEN) == 0 && 79 !compare_ether_addr(entry->src_addr, src) &&
80 memcmp(entry->dst_addr, dst, ETH_ALEN) == 0) 80 !compare_ether_addr(entry->dst_addr, dst))
81 return entry; 81 return entry;
82 } 82 }
83 83
@@ -243,12 +243,12 @@ static int ieee80211_is_eapol_frame(struct ieee80211_device *ieee,
243 /* check that the frame is unicast frame to us */ 243 /* check that the frame is unicast frame to us */
244 if ((fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) == 244 if ((fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) ==
245 IEEE80211_FCTL_TODS && 245 IEEE80211_FCTL_TODS &&
246 memcmp(hdr->addr1, dev->dev_addr, ETH_ALEN) == 0 && 246 !compare_ether_addr(hdr->addr1, dev->dev_addr) &&
247 memcmp(hdr->addr3, dev->dev_addr, ETH_ALEN) == 0) { 247 !compare_ether_addr(hdr->addr3, dev->dev_addr)) {
248 /* ToDS frame with own addr BSSID and DA */ 248 /* ToDS frame with own addr BSSID and DA */
249 } else if ((fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) == 249 } else if ((fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) ==
250 IEEE80211_FCTL_FROMDS && 250 IEEE80211_FCTL_FROMDS &&
251 memcmp(hdr->addr1, dev->dev_addr, ETH_ALEN) == 0) { 251 !compare_ether_addr(hdr->addr1, dev->dev_addr)) {
252 /* FromDS frame with own addr as DA */ 252 /* FromDS frame with own addr as DA */
253 } else 253 } else
254 return 0; 254 return 0;
@@ -410,9 +410,8 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
410 return 1; 410 return 1;
411 } 411 }
412 412
413 if ((is_multicast_ether_addr(hdr->addr1) || 413 if (is_multicast_ether_addr(hdr->addr1)
414 is_broadcast_ether_addr(hdr->addr2)) ? ieee->host_mc_decrypt : 414 ? ieee->host_mc_decrypt : ieee->host_decrypt) {
415 ieee->host_decrypt) {
416 int idx = 0; 415 int idx = 0;
417 if (skb->len >= hdrlen + 3) 416 if (skb->len >= hdrlen + 3)
418 idx = skb->data[hdrlen + 3] >> 6; 417 idx = skb->data[hdrlen + 3] >> 6;
@@ -506,7 +505,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
506 if (ieee->iw_mode == IW_MODE_MASTER && !wds && 505 if (ieee->iw_mode == IW_MODE_MASTER && !wds &&
507 (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) == 506 (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) ==
508 IEEE80211_FCTL_FROMDS && ieee->stadev 507 IEEE80211_FCTL_FROMDS && ieee->stadev
509 && memcmp(hdr->addr2, ieee->assoc_ap_addr, ETH_ALEN) == 0) { 508 && !compare_ether_addr(hdr->addr2, ieee->assoc_ap_addr)) {
510 /* Frame from BSSID of the AP for which we are a client */ 509 /* Frame from BSSID of the AP for which we are a client */
511 skb->dev = dev = ieee->stadev; 510 skb->dev = dev = ieee->stadev;
512 stats = hostap_get_stats(dev); 511 stats = hostap_get_stats(dev);
@@ -1232,7 +1231,7 @@ static inline int is_same_network(struct ieee80211_network *src,
1232 * as one network */ 1231 * as one network */
1233 return ((src->ssid_len == dst->ssid_len) && 1232 return ((src->ssid_len == dst->ssid_len) &&
1234 (src->channel == dst->channel) && 1233 (src->channel == dst->channel) &&
1235 !memcmp(src->bssid, dst->bssid, ETH_ALEN) && 1234 !compare_ether_addr(src->bssid, dst->bssid) &&
1236 !memcmp(src->ssid, dst->ssid, src->ssid_len)); 1235 !memcmp(src->ssid, dst->ssid, src->ssid_len));
1237} 1236}
1238 1237
diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c
index 445f206e65e0..e5b33c8d5dbc 100644
--- a/net/ieee80211/ieee80211_tx.c
+++ b/net/ieee80211/ieee80211_tx.c
@@ -288,7 +288,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
288 /* Determine total amount of storage required for TXB packets */ 288 /* Determine total amount of storage required for TXB packets */
289 bytes = skb->len + SNAP_SIZE + sizeof(u16); 289 bytes = skb->len + SNAP_SIZE + sizeof(u16);
290 290
291 if (host_encrypt) 291 if (host_encrypt || host_build_iv)
292 fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA | 292 fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA |
293 IEEE80211_FCTL_PROTECTED; 293 IEEE80211_FCTL_PROTECTED;
294 else 294 else
diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c
index 181755f2aa8b..406d5b964905 100644
--- a/net/ieee80211/ieee80211_wx.c
+++ b/net/ieee80211/ieee80211_wx.c
@@ -284,7 +284,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee,
284 }; 284 };
285 int i, key, key_provided, len; 285 int i, key, key_provided, len;
286 struct ieee80211_crypt_data **crypt; 286 struct ieee80211_crypt_data **crypt;
287 int host_crypto = ieee->host_encrypt || ieee->host_decrypt; 287 int host_crypto = ieee->host_encrypt || ieee->host_decrypt || ieee->host_build_iv;
288 288
289 IEEE80211_DEBUG_WX("SET_ENCODE\n"); 289 IEEE80211_DEBUG_WX("SET_ENCODE\n");
290 290
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index e55136ae09f4..011cca7ae02b 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -456,6 +456,14 @@ config TCP_CONG_BIC
456 increase provides TCP friendliness. 456 increase provides TCP friendliness.
457 See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/ 457 See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/
458 458
459config TCP_CONG_CUBIC
460 tristate "CUBIC TCP"
461 default m
462 ---help---
463 This is version 2.0 of BIC-TCP which uses a cubic growth function
464 among other techniques.
465 See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf
466
459config TCP_CONG_WESTWOOD 467config TCP_CONG_WESTWOOD
460 tristate "TCP Westwood+" 468 tristate "TCP Westwood+"
461 default m 469 default m
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index f0435d00db6b..35e5f5999092 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -9,7 +9,7 @@ obj-y := route.o inetpeer.o protocol.o \
9 tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ 9 tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
10 tcp_minisocks.o tcp_cong.o \ 10 tcp_minisocks.o tcp_cong.o \
11 datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ 11 datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \
12 sysctl_net_ipv4.o fib_frontend.o fib_semantics.o netfilter.o 12 sysctl_net_ipv4.o fib_frontend.o fib_semantics.o
13 13
14obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o 14obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
15obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o 15obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
@@ -28,12 +28,13 @@ obj-$(CONFIG_IP_ROUTE_MULTIPATH_RR) += multipath_rr.o
28obj-$(CONFIG_IP_ROUTE_MULTIPATH_RANDOM) += multipath_random.o 28obj-$(CONFIG_IP_ROUTE_MULTIPATH_RANDOM) += multipath_random.o
29obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o 29obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o
30obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o 30obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o
31obj-$(CONFIG_NETFILTER) += netfilter/ 31obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/
32obj-$(CONFIG_IP_VS) += ipvs/ 32obj-$(CONFIG_IP_VS) += ipvs/
33obj-$(CONFIG_INET_DIAG) += inet_diag.o 33obj-$(CONFIG_INET_DIAG) += inet_diag.o
34obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o 34obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o
35obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o 35obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
36obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o 36obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
37obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o
37obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o 38obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o
38obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o 39obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o
39obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o 40obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d368cf249000..97c276f95b35 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -79,6 +79,7 @@
79#include <linux/string.h> 79#include <linux/string.h>
80#include <linux/sockios.h> 80#include <linux/sockios.h>
81#include <linux/net.h> 81#include <linux/net.h>
82#include <linux/capability.h>
82#include <linux/fcntl.h> 83#include <linux/fcntl.h>
83#include <linux/mm.h> 84#include <linux/mm.h>
84#include <linux/interrupt.h> 85#include <linux/interrupt.h>
@@ -93,6 +94,7 @@
93#include <linux/smp_lock.h> 94#include <linux/smp_lock.h>
94#include <linux/inet.h> 95#include <linux/inet.h>
95#include <linux/igmp.h> 96#include <linux/igmp.h>
97#include <linux/inetdevice.h>
96#include <linux/netdevice.h> 98#include <linux/netdevice.h>
97#include <net/ip.h> 99#include <net/ip.h>
98#include <net/protocol.h> 100#include <net/protocol.h>
@@ -302,6 +304,7 @@ lookup_protocol:
302 sk->sk_reuse = 1; 304 sk->sk_reuse = 1;
303 305
304 inet = inet_sk(sk); 306 inet = inet_sk(sk);
307 inet->is_icsk = INET_PROTOSW_ICSK & answer_flags;
305 308
306 if (SOCK_RAW == sock->type) { 309 if (SOCK_RAW == sock->type) {
307 inet->num = protocol; 310 inet->num = protocol;
@@ -775,16 +778,16 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
775 err = devinet_ioctl(cmd, (void __user *)arg); 778 err = devinet_ioctl(cmd, (void __user *)arg);
776 break; 779 break;
777 default: 780 default:
778 if (!sk->sk_prot->ioctl || 781 if (sk->sk_prot->ioctl)
779 (err = sk->sk_prot->ioctl(sk, cmd, arg)) == 782 err = sk->sk_prot->ioctl(sk, cmd, arg);
780 -ENOIOCTLCMD) 783 else
781 err = dev_ioctl(cmd, (void __user *)arg); 784 err = -ENOIOCTLCMD;
782 break; 785 break;
783 } 786 }
784 return err; 787 return err;
785} 788}
786 789
787struct proto_ops inet_stream_ops = { 790const struct proto_ops inet_stream_ops = {
788 .family = PF_INET, 791 .family = PF_INET,
789 .owner = THIS_MODULE, 792 .owner = THIS_MODULE,
790 .release = inet_release, 793 .release = inet_release,
@@ -805,7 +808,7 @@ struct proto_ops inet_stream_ops = {
805 .sendpage = tcp_sendpage 808 .sendpage = tcp_sendpage
806}; 809};
807 810
808struct proto_ops inet_dgram_ops = { 811const struct proto_ops inet_dgram_ops = {
809 .family = PF_INET, 812 .family = PF_INET,
810 .owner = THIS_MODULE, 813 .owner = THIS_MODULE,
811 .release = inet_release, 814 .release = inet_release,
@@ -830,7 +833,7 @@ struct proto_ops inet_dgram_ops = {
830 * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without 833 * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
831 * udp_poll 834 * udp_poll
832 */ 835 */
833static struct proto_ops inet_sockraw_ops = { 836static const struct proto_ops inet_sockraw_ops = {
834 .family = PF_INET, 837 .family = PF_INET,
835 .owner = THIS_MODULE, 838 .owner = THIS_MODULE,
836 .release = inet_release, 839 .release = inet_release,
@@ -869,7 +872,8 @@ static struct inet_protosw inetsw_array[] =
869 .ops = &inet_stream_ops, 872 .ops = &inet_stream_ops,
870 .capability = -1, 873 .capability = -1,
871 .no_check = 0, 874 .no_check = 0,
872 .flags = INET_PROTOSW_PERMANENT, 875 .flags = INET_PROTOSW_PERMANENT |
876 INET_PROTOSW_ICSK,
873 }, 877 },
874 878
875 { 879 {
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 035ad2c9e1ba..aed537fa2c88 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -6,6 +6,7 @@
6#include <linux/crypto.h> 6#include <linux/crypto.h>
7#include <linux/pfkeyv2.h> 7#include <linux/pfkeyv2.h>
8#include <net/icmp.h> 8#include <net/icmp.h>
9#include <net/protocol.h>
9#include <asm/scatterlist.h> 10#include <asm/scatterlist.h>
10 11
11 12
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index b425748f02d7..accdefedfed7 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -79,6 +79,7 @@
79#include <linux/string.h> 79#include <linux/string.h>
80#include <linux/kernel.h> 80#include <linux/kernel.h>
81#include <linux/sched.h> 81#include <linux/sched.h>
82#include <linux/capability.h>
82#include <linux/config.h> 83#include <linux/config.h>
83#include <linux/socket.h> 84#include <linux/socket.h>
84#include <linux/sockios.h> 85#include <linux/sockios.h>
@@ -86,6 +87,7 @@
86#include <linux/in.h> 87#include <linux/in.h>
87#include <linux/mm.h> 88#include <linux/mm.h>
88#include <linux/inet.h> 89#include <linux/inet.h>
90#include <linux/inetdevice.h>
89#include <linux/netdevice.h> 91#include <linux/netdevice.h>
90#include <linux/etherdevice.h> 92#include <linux/etherdevice.h>
91#include <linux/fddidevice.h> 93#include <linux/fddidevice.h>
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 04a6fe3e95a2..95b9d81ac488 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -32,6 +32,7 @@
32#include <asm/uaccess.h> 32#include <asm/uaccess.h>
33#include <asm/system.h> 33#include <asm/system.h>
34#include <linux/bitops.h> 34#include <linux/bitops.h>
35#include <linux/capability.h>
35#include <linux/module.h> 36#include <linux/module.h>
36#include <linux/types.h> 37#include <linux/types.h>
37#include <linux/kernel.h> 38#include <linux/kernel.h>
@@ -58,6 +59,7 @@
58#endif 59#endif
59#include <linux/kmod.h> 60#include <linux/kmod.h>
60 61
62#include <net/arp.h>
61#include <net/ip.h> 63#include <net/ip.h>
62#include <net/route.h> 64#include <net/route.h>
63#include <net/ip_fib.h> 65#include <net/ip_fib.h>
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 1b18ce66e7b7..73bfcae8af9c 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -9,6 +9,7 @@
9#include <linux/pfkeyv2.h> 9#include <linux/pfkeyv2.h>
10#include <linux/random.h> 10#include <linux/random.h>
11#include <net/icmp.h> 11#include <net/icmp.h>
12#include <net/protocol.h>
12#include <net/udp.h> 13#include <net/udp.h>
13 14
14/* decapsulation data for use when post-processing */ 15/* decapsulation data for use when post-processing */
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 19b1b984d687..5b25fc0d980c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -20,6 +20,7 @@
20#include <asm/uaccess.h> 20#include <asm/uaccess.h>
21#include <asm/system.h> 21#include <asm/system.h>
22#include <linux/bitops.h> 22#include <linux/bitops.h>
23#include <linux/capability.h>
23#include <linux/types.h> 24#include <linux/types.h>
24#include <linux/kernel.h> 25#include <linux/kernel.h>
25#include <linux/sched.h> 26#include <linux/sched.h>
@@ -30,6 +31,7 @@
30#include <linux/errno.h> 31#include <linux/errno.h>
31#include <linux/in.h> 32#include <linux/in.h>
32#include <linux/inet.h> 33#include <linux/inet.h>
34#include <linux/inetdevice.h>
33#include <linux/netdevice.h> 35#include <linux/netdevice.h>
34#include <linux/if_arp.h> 36#include <linux/if_arp.h>
35#include <linux/skbuff.h> 37#include <linux/skbuff.h>
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 7ea0209cb169..e2890ec8159e 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -29,6 +29,7 @@
29#include <linux/errno.h> 29#include <linux/errno.h>
30#include <linux/in.h> 30#include <linux/in.h>
31#include <linux/inet.h> 31#include <linux/inet.h>
32#include <linux/inetdevice.h>
32#include <linux/netdevice.h> 33#include <linux/netdevice.h>
33#include <linux/if_arp.h> 34#include <linux/if_arp.h>
34#include <linux/proc_fs.h> 35#include <linux/proc_fs.h>
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 0b298bbc1518..0dd4d06e456d 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -33,6 +33,7 @@
33#include <linux/errno.h> 33#include <linux/errno.h>
34#include <linux/in.h> 34#include <linux/in.h>
35#include <linux/inet.h> 35#include <linux/inet.h>
36#include <linux/inetdevice.h>
36#include <linux/netdevice.h> 37#include <linux/netdevice.h>
37#include <linux/if_arp.h> 38#include <linux/if_arp.h>
38#include <linux/proc_fs.h> 39#include <linux/proc_fs.h>
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 6d2a6ac070e3..ef4724de7350 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -29,6 +29,7 @@
29#include <linux/errno.h> 29#include <linux/errno.h>
30#include <linux/in.h> 30#include <linux/in.h>
31#include <linux/inet.h> 31#include <linux/inet.h>
32#include <linux/inetdevice.h>
32#include <linux/netdevice.h> 33#include <linux/netdevice.h>
33#include <linux/if_arp.h> 34#include <linux/if_arp.h>
34#include <linux/proc_fs.h> 35#include <linux/proc_fs.h>
@@ -36,6 +37,7 @@
36#include <linux/netlink.h> 37#include <linux/netlink.h>
37#include <linux/init.h> 38#include <linux/init.h>
38 39
40#include <net/arp.h>
39#include <net/ip.h> 41#include <net/ip.h>
40#include <net/protocol.h> 42#include <net/protocol.h>
41#include <net/route.h> 43#include <net/route.h>
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 705e3ce86df9..e320b32373e5 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -41,6 +41,13 @@
41 * modify it under the terms of the GNU General Public License 41 * modify it under the terms of the GNU General Public License
42 * as published by the Free Software Foundation; either version 42 * as published by the Free Software Foundation; either version
43 * 2 of the License, or (at your option) any later version. 43 * 2 of the License, or (at your option) any later version.
44 *
45 * Substantial contributions to this work comes from:
46 *
47 * David S. Miller, <davem@davemloft.net>
48 * Stephen Hemminger <shemminger@osdl.org>
49 * Paul E. McKenney <paulmck@us.ibm.com>
50 * Patrick McHardy <kaber@trash.net>
44 */ 51 */
45 52
46#define VERSION "0.404" 53#define VERSION "0.404"
@@ -59,6 +66,7 @@
59#include <linux/errno.h> 66#include <linux/errno.h>
60#include <linux/in.h> 67#include <linux/in.h>
61#include <linux/inet.h> 68#include <linux/inet.h>
69#include <linux/inetdevice.h>
62#include <linux/netdevice.h> 70#include <linux/netdevice.h>
63#include <linux/if_arp.h> 71#include <linux/if_arp.h>
64#include <linux/proc_fs.h> 72#include <linux/proc_fs.h>
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 92e23b2ad4d2..105039eb7629 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -73,6 +73,7 @@
73#include <linux/socket.h> 73#include <linux/socket.h>
74#include <linux/in.h> 74#include <linux/in.h>
75#include <linux/inet.h> 75#include <linux/inet.h>
76#include <linux/inetdevice.h>
76#include <linux/netdevice.h> 77#include <linux/netdevice.h>
77#include <linux/string.h> 78#include <linux/string.h>
78#include <linux/netfilter_ipv4.h> 79#include <linux/netfilter_ipv4.h>
@@ -898,8 +899,7 @@ static void icmp_address_reply(struct sk_buff *skb)
898 u32 _mask, *mp; 899 u32 _mask, *mp;
899 900
900 mp = skb_header_pointer(skb, 0, sizeof(_mask), &_mask); 901 mp = skb_header_pointer(skb, 0, sizeof(_mask), &_mask);
901 if (mp == NULL) 902 BUG_ON(mp == NULL);
902 BUG();
903 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 903 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
904 if (*mp == ifa->ifa_mask && 904 if (*mp == ifa->ifa_mask &&
905 inet_ifa_match(rt->rt_src, ifa)) 905 inet_ifa_match(rt->rt_src, ifa))
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 4a195c724f01..192092b89e53 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -91,6 +91,8 @@
91#include <linux/if_arp.h> 91#include <linux/if_arp.h>
92#include <linux/rtnetlink.h> 92#include <linux/rtnetlink.h>
93#include <linux/times.h> 93#include <linux/times.h>
94
95#include <net/arp.h>
94#include <net/ip.h> 96#include <net/ip.h>
95#include <net/protocol.h> 97#include <net/protocol.h>
96#include <net/route.h> 98#include <net/route.h>
@@ -973,7 +975,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
973 * for deleted items allows change reports to use common code with 975 * for deleted items allows change reports to use common code with
974 * non-deleted or query-response MCA's. 976 * non-deleted or query-response MCA's.
975 */ 977 */
976 pmc = (struct ip_mc_list *)kmalloc(sizeof(*pmc), GFP_KERNEL); 978 pmc = kmalloc(sizeof(*pmc), GFP_KERNEL);
977 if (!pmc) 979 if (!pmc)
978 return; 980 return;
979 memset(pmc, 0, sizeof(*pmc)); 981 memset(pmc, 0, sizeof(*pmc));
@@ -1153,7 +1155,7 @@ void ip_mc_inc_group(struct in_device *in_dev, u32 addr)
1153 } 1155 }
1154 } 1156 }
1155 1157
1156 im = (struct ip_mc_list *)kmalloc(sizeof(*im), GFP_KERNEL); 1158 im = kmalloc(sizeof(*im), GFP_KERNEL);
1157 if (!im) 1159 if (!im)
1158 goto out; 1160 goto out;
1159 1161
@@ -1474,7 +1476,7 @@ static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode,
1474 psf_prev = psf; 1476 psf_prev = psf;
1475 } 1477 }
1476 if (!psf) { 1478 if (!psf) {
1477 psf = (struct ip_sf_list *)kmalloc(sizeof(*psf), GFP_ATOMIC); 1479 psf = kmalloc(sizeof(*psf), GFP_ATOMIC);
1478 if (!psf) 1480 if (!psf)
1479 return -ENOBUFS; 1481 return -ENOBUFS;
1480 memset(psf, 0, sizeof(*psf)); 1482 memset(psf, 0, sizeof(*psf));
@@ -1657,7 +1659,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
1657 err = -ENOBUFS; 1659 err = -ENOBUFS;
1658 if (count >= sysctl_igmp_max_memberships) 1660 if (count >= sysctl_igmp_max_memberships)
1659 goto done; 1661 goto done;
1660 iml = (struct ip_mc_socklist *)sock_kmalloc(sk,sizeof(*iml),GFP_KERNEL); 1662 iml = sock_kmalloc(sk,sizeof(*iml),GFP_KERNEL);
1661 if (iml == NULL) 1663 if (iml == NULL)
1662 goto done; 1664 goto done;
1663 1665
@@ -1821,8 +1823,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
1821 1823
1822 if (psl) 1824 if (psl)
1823 count += psl->sl_max; 1825 count += psl->sl_max;
1824 newpsl = (struct ip_sf_socklist *)sock_kmalloc(sk, 1826 newpsl = sock_kmalloc(sk, IP_SFLSIZE(count), GFP_KERNEL);
1825 IP_SFLSIZE(count), GFP_KERNEL);
1826 if (!newpsl) { 1827 if (!newpsl) {
1827 err = -ENOBUFS; 1828 err = -ENOBUFS;
1828 goto done; 1829 goto done;
@@ -1905,8 +1906,8 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
1905 goto done; 1906 goto done;
1906 } 1907 }
1907 if (msf->imsf_numsrc) { 1908 if (msf->imsf_numsrc) {
1908 newpsl = (struct ip_sf_socklist *)sock_kmalloc(sk, 1909 newpsl = sock_kmalloc(sk, IP_SFLSIZE(msf->imsf_numsrc),
1909 IP_SFLSIZE(msf->imsf_numsrc), GFP_KERNEL); 1910 GFP_KERNEL);
1910 if (!newpsl) { 1911 if (!newpsl) {
1911 err = -ENOBUFS; 1912 err = -ENOBUFS;
1912 goto done; 1913 goto done;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 3fe021f1a566..ae20281d8deb 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -37,7 +37,8 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
37 */ 37 */
38int sysctl_local_port_range[2] = { 1024, 4999 }; 38int sysctl_local_port_range[2] = { 1024, 4999 };
39 39
40static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) 40int inet_csk_bind_conflict(const struct sock *sk,
41 const struct inet_bind_bucket *tb)
41{ 42{
42 const u32 sk_rcv_saddr = inet_rcv_saddr(sk); 43 const u32 sk_rcv_saddr = inet_rcv_saddr(sk);
43 struct sock *sk2; 44 struct sock *sk2;
@@ -62,11 +63,15 @@ static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucke
62 return node != NULL; 63 return node != NULL;
63} 64}
64 65
66EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
67
65/* Obtain a reference to a local port for the given sock, 68/* Obtain a reference to a local port for the given sock,
66 * if snum is zero it means select any available local port. 69 * if snum is zero it means select any available local port.
67 */ 70 */
68int inet_csk_get_port(struct inet_hashinfo *hashinfo, 71int inet_csk_get_port(struct inet_hashinfo *hashinfo,
69 struct sock *sk, unsigned short snum) 72 struct sock *sk, unsigned short snum,
73 int (*bind_conflict)(const struct sock *sk,
74 const struct inet_bind_bucket *tb))
70{ 75{
71 struct inet_bind_hashbucket *head; 76 struct inet_bind_hashbucket *head;
72 struct hlist_node *node; 77 struct hlist_node *node;
@@ -125,7 +130,7 @@ tb_found:
125 goto success; 130 goto success;
126 } else { 131 } else {
127 ret = 1; 132 ret = 1;
128 if (inet_csk_bind_conflict(sk, tb)) 133 if (bind_conflict(sk, tb))
129 goto fail_unlock; 134 goto fail_unlock;
130 } 135 }
131 } 136 }
@@ -380,7 +385,7 @@ struct request_sock *inet_csk_search_req(const struct sock *sk,
380EXPORT_SYMBOL_GPL(inet_csk_search_req); 385EXPORT_SYMBOL_GPL(inet_csk_search_req);
381 386
382void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 387void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
383 const unsigned timeout) 388 unsigned long timeout)
384{ 389{
385 struct inet_connection_sock *icsk = inet_csk(sk); 390 struct inet_connection_sock *icsk = inet_csk(sk);
386 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; 391 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
@@ -631,3 +636,15 @@ void inet_csk_listen_stop(struct sock *sk)
631} 636}
632 637
633EXPORT_SYMBOL_GPL(inet_csk_listen_stop); 638EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
639
640void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
641{
642 struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
643 const struct inet_sock *inet = inet_sk(sk);
644
645 sin->sin_family = AF_INET;
646 sin->sin_addr.s_addr = inet->daddr;
647 sin->sin_port = inet->dport;
648}
649
650EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 39061ed53cfd..457db99c76df 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -50,9 +50,10 @@ static struct sock *idiagnl;
50#define INET_DIAG_PUT(skb, attrtype, attrlen) \ 50#define INET_DIAG_PUT(skb, attrtype, attrlen) \
51 RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) 51 RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
52 52
53static int inet_diag_fill(struct sk_buff *skb, struct sock *sk, 53static int inet_csk_diag_fill(struct sock *sk,
54 int ext, u32 pid, u32 seq, u16 nlmsg_flags, 54 struct sk_buff *skb,
55 const struct nlmsghdr *unlh) 55 int ext, u32 pid, u32 seq, u16 nlmsg_flags,
56 const struct nlmsghdr *unlh)
56{ 57{
57 const struct inet_sock *inet = inet_sk(sk); 58 const struct inet_sock *inet = inet_sk(sk);
58 const struct inet_connection_sock *icsk = inet_csk(sk); 59 const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -70,20 +71,22 @@ static int inet_diag_fill(struct sk_buff *skb, struct sock *sk,
70 nlh->nlmsg_flags = nlmsg_flags; 71 nlh->nlmsg_flags = nlmsg_flags;
71 72
72 r = NLMSG_DATA(nlh); 73 r = NLMSG_DATA(nlh);
73 if (sk->sk_state != TCP_TIME_WAIT) { 74 BUG_ON(sk->sk_state == TCP_TIME_WAIT);
74 if (ext & (1 << (INET_DIAG_MEMINFO - 1))) 75
75 minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, 76 if (ext & (1 << (INET_DIAG_MEMINFO - 1)))
76 sizeof(*minfo)); 77 minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, sizeof(*minfo));
77 if (ext & (1 << (INET_DIAG_INFO - 1))) 78
78 info = INET_DIAG_PUT(skb, INET_DIAG_INFO, 79 if (ext & (1 << (INET_DIAG_INFO - 1)))
79 handler->idiag_info_size); 80 info = INET_DIAG_PUT(skb, INET_DIAG_INFO,
80 81 handler->idiag_info_size);
81 if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) { 82
82 size_t len = strlen(icsk->icsk_ca_ops->name); 83 if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
83 strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1), 84 const size_t len = strlen(icsk->icsk_ca_ops->name);
84 icsk->icsk_ca_ops->name); 85
85 } 86 strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1),
87 icsk->icsk_ca_ops->name);
86 } 88 }
89
87 r->idiag_family = sk->sk_family; 90 r->idiag_family = sk->sk_family;
88 r->idiag_state = sk->sk_state; 91 r->idiag_state = sk->sk_state;
89 r->idiag_timer = 0; 92 r->idiag_timer = 0;
@@ -93,37 +96,6 @@ static int inet_diag_fill(struct sk_buff *skb, struct sock *sk,
93 r->id.idiag_cookie[0] = (u32)(unsigned long)sk; 96 r->id.idiag_cookie[0] = (u32)(unsigned long)sk;
94 r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); 97 r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
95 98
96 if (r->idiag_state == TCP_TIME_WAIT) {
97 const struct inet_timewait_sock *tw = inet_twsk(sk);
98 long tmo = tw->tw_ttd - jiffies;
99 if (tmo < 0)
100 tmo = 0;
101
102 r->id.idiag_sport = tw->tw_sport;
103 r->id.idiag_dport = tw->tw_dport;
104 r->id.idiag_src[0] = tw->tw_rcv_saddr;
105 r->id.idiag_dst[0] = tw->tw_daddr;
106 r->idiag_state = tw->tw_substate;
107 r->idiag_timer = 3;
108 r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ;
109 r->idiag_rqueue = 0;
110 r->idiag_wqueue = 0;
111 r->idiag_uid = 0;
112 r->idiag_inode = 0;
113#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
114 if (r->idiag_family == AF_INET6) {
115 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
116
117 ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
118 &tcp6tw->tw_v6_rcv_saddr);
119 ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
120 &tcp6tw->tw_v6_daddr);
121 }
122#endif
123 nlh->nlmsg_len = skb->tail - b;
124 return skb->len;
125 }
126
127 r->id.idiag_sport = inet->sport; 99 r->id.idiag_sport = inet->sport;
128 r->id.idiag_dport = inet->dport; 100 r->id.idiag_dport = inet->dport;
129 r->id.idiag_src[0] = inet->rcv_saddr; 101 r->id.idiag_src[0] = inet->rcv_saddr;
@@ -185,7 +157,75 @@ nlmsg_failure:
185 return -1; 157 return -1;
186} 158}
187 159
188static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) 160static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
161 struct sk_buff *skb, int ext, u32 pid,
162 u32 seq, u16 nlmsg_flags,
163 const struct nlmsghdr *unlh)
164{
165 long tmo;
166 struct inet_diag_msg *r;
167 const unsigned char *previous_tail = skb->tail;
168 struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq,
169 unlh->nlmsg_type, sizeof(*r));
170
171 r = NLMSG_DATA(nlh);
172 BUG_ON(tw->tw_state != TCP_TIME_WAIT);
173
174 nlh->nlmsg_flags = nlmsg_flags;
175
176 tmo = tw->tw_ttd - jiffies;
177 if (tmo < 0)
178 tmo = 0;
179
180 r->idiag_family = tw->tw_family;
181 r->idiag_state = tw->tw_state;
182 r->idiag_timer = 0;
183 r->idiag_retrans = 0;
184 r->id.idiag_if = tw->tw_bound_dev_if;
185 r->id.idiag_cookie[0] = (u32)(unsigned long)tw;
186 r->id.idiag_cookie[1] = (u32)(((unsigned long)tw >> 31) >> 1);
187 r->id.idiag_sport = tw->tw_sport;
188 r->id.idiag_dport = tw->tw_dport;
189 r->id.idiag_src[0] = tw->tw_rcv_saddr;
190 r->id.idiag_dst[0] = tw->tw_daddr;
191 r->idiag_state = tw->tw_substate;
192 r->idiag_timer = 3;
193 r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ;
194 r->idiag_rqueue = 0;
195 r->idiag_wqueue = 0;
196 r->idiag_uid = 0;
197 r->idiag_inode = 0;
198#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
199 if (tw->tw_family == AF_INET6) {
200 const struct inet6_timewait_sock *tw6 =
201 inet6_twsk((struct sock *)tw);
202
203 ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
204 &tw6->tw_v6_rcv_saddr);
205 ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
206 &tw6->tw_v6_daddr);
207 }
208#endif
209 nlh->nlmsg_len = skb->tail - previous_tail;
210 return skb->len;
211nlmsg_failure:
212 skb_trim(skb, previous_tail - skb->data);
213 return -1;
214}
215
216static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
217 int ext, u32 pid, u32 seq, u16 nlmsg_flags,
218 const struct nlmsghdr *unlh)
219{
220 if (sk->sk_state == TCP_TIME_WAIT)
221 return inet_twsk_diag_fill((struct inet_timewait_sock *)sk,
222 skb, ext, pid, seq, nlmsg_flags,
223 unlh);
224 return inet_csk_diag_fill(sk, skb, ext, pid, seq, nlmsg_flags, unlh);
225}
226
227static int inet_diag_get_exact(struct sk_buff *in_skb,
228 const struct nlmsghdr *nlh)
189{ 229{
190 int err; 230 int err;
191 struct sock *sk; 231 struct sock *sk;
@@ -235,7 +275,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nl
235 if (!rep) 275 if (!rep)
236 goto out; 276 goto out;
237 277
238 if (inet_diag_fill(rep, sk, req->idiag_ext, 278 if (sk_diag_fill(sk, rep, req->idiag_ext,
239 NETLINK_CB(in_skb).pid, 279 NETLINK_CB(in_skb).pid,
240 nlh->nlmsg_seq, 0, nlh) <= 0) 280 nlh->nlmsg_seq, 0, nlh) <= 0)
241 BUG(); 281 BUG();
@@ -283,7 +323,7 @@ static int bitstring_match(const u32 *a1, const u32 *a2, int bits)
283 323
284 324
285static int inet_diag_bc_run(const void *bc, int len, 325static int inet_diag_bc_run(const void *bc, int len,
286 const struct inet_diag_entry *entry) 326 const struct inet_diag_entry *entry)
287{ 327{
288 while (len > 0) { 328 while (len > 0) {
289 int yes = 1; 329 int yes = 1;
@@ -322,7 +362,7 @@ static int inet_diag_bc_run(const void *bc, int len,
322 yes = 0; 362 yes = 0;
323 break; 363 break;
324 } 364 }
325 365
326 if (cond->prefix_len == 0) 366 if (cond->prefix_len == 0)
327 break; 367 break;
328 368
@@ -331,7 +371,8 @@ static int inet_diag_bc_run(const void *bc, int len,
331 else 371 else
332 addr = entry->daddr; 372 addr = entry->daddr;
333 373
334 if (bitstring_match(addr, cond->addr, cond->prefix_len)) 374 if (bitstring_match(addr, cond->addr,
375 cond->prefix_len))
335 break; 376 break;
336 if (entry->family == AF_INET6 && 377 if (entry->family == AF_INET6 &&
337 cond->family == AF_INET) { 378 cond->family == AF_INET) {
@@ -346,7 +387,7 @@ static int inet_diag_bc_run(const void *bc, int len,
346 } 387 }
347 } 388 }
348 389
349 if (yes) { 390 if (yes) {
350 len -= op->yes; 391 len -= op->yes;
351 bc += op->yes; 392 bc += op->yes;
352 } else { 393 } else {
@@ -407,14 +448,15 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
407 default: 448 default:
408 return -EINVAL; 449 return -EINVAL;
409 } 450 }
410 bc += op->yes; 451 bc += op->yes;
411 len -= op->yes; 452 len -= op->yes;
412 } 453 }
413 return len == 0 ? 0 : -EINVAL; 454 return len == 0 ? 0 : -EINVAL;
414} 455}
415 456
416static int inet_diag_dump_sock(struct sk_buff *skb, struct sock *sk, 457static int inet_csk_diag_dump(struct sock *sk,
417 struct netlink_callback *cb) 458 struct sk_buff *skb,
459 struct netlink_callback *cb)
418{ 460{
419 struct inet_diag_req *r = NLMSG_DATA(cb->nlh); 461 struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
420 462
@@ -444,14 +486,50 @@ static int inet_diag_dump_sock(struct sk_buff *skb, struct sock *sk,
444 return 0; 486 return 0;
445 } 487 }
446 488
447 return inet_diag_fill(skb, sk, r->idiag_ext, NETLINK_CB(cb->skb).pid, 489 return inet_csk_diag_fill(sk, skb, r->idiag_ext,
448 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); 490 NETLINK_CB(cb->skb).pid,
491 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
492}
493
494static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
495 struct sk_buff *skb,
496 struct netlink_callback *cb)
497{
498 struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
499
500 if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
501 struct inet_diag_entry entry;
502 struct rtattr *bc = (struct rtattr *)(r + 1);
503
504 entry.family = tw->tw_family;
505#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
506 if (tw->tw_family == AF_INET6) {
507 struct inet6_timewait_sock *tw6 =
508 inet6_twsk((struct sock *)tw);
509 entry.saddr = tw6->tw_v6_rcv_saddr.s6_addr32;
510 entry.daddr = tw6->tw_v6_daddr.s6_addr32;
511 } else
512#endif
513 {
514 entry.saddr = &tw->tw_rcv_saddr;
515 entry.daddr = &tw->tw_daddr;
516 }
517 entry.sport = tw->tw_num;
518 entry.dport = ntohs(tw->tw_dport);
519 entry.userlocks = 0;
520
521 if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
522 return 0;
523 }
524
525 return inet_twsk_diag_fill(tw, skb, r->idiag_ext,
526 NETLINK_CB(cb->skb).pid,
527 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
449} 528}
450 529
451static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, 530static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
452 struct request_sock *req, 531 struct request_sock *req, u32 pid, u32 seq,
453 u32 pid, u32 seq, 532 const struct nlmsghdr *unlh)
454 const struct nlmsghdr *unlh)
455{ 533{
456 const struct inet_request_sock *ireq = inet_rsk(req); 534 const struct inet_request_sock *ireq = inet_rsk(req);
457 struct inet_sock *inet = inet_sk(sk); 535 struct inet_sock *inet = inet_sk(sk);
@@ -489,9 +567,9 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
489#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 567#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
490 if (r->idiag_family == AF_INET6) { 568 if (r->idiag_family == AF_INET6) {
491 ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, 569 ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
492 &tcp6_rsk(req)->loc_addr); 570 &inet6_rsk(req)->loc_addr);
493 ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, 571 ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
494 &tcp6_rsk(req)->rmt_addr); 572 &inet6_rsk(req)->rmt_addr);
495 } 573 }
496#endif 574#endif
497 nlh->nlmsg_len = skb->tail - b; 575 nlh->nlmsg_len = skb->tail - b;
@@ -504,7 +582,7 @@ nlmsg_failure:
504} 582}
505 583
506static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, 584static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
507 struct netlink_callback *cb) 585 struct netlink_callback *cb)
508{ 586{
509 struct inet_diag_entry entry; 587 struct inet_diag_entry entry;
510 struct inet_diag_req *r = NLMSG_DATA(cb->nlh); 588 struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
@@ -553,13 +631,13 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
553 entry.saddr = 631 entry.saddr =
554#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 632#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
555 (entry.family == AF_INET6) ? 633 (entry.family == AF_INET6) ?
556 tcp6_rsk(req)->loc_addr.s6_addr32 : 634 inet6_rsk(req)->loc_addr.s6_addr32 :
557#endif 635#endif
558 &ireq->loc_addr; 636 &ireq->loc_addr;
559 entry.daddr = 637 entry.daddr =
560#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 638#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
561 (entry.family == AF_INET6) ? 639 (entry.family == AF_INET6) ?
562 tcp6_rsk(req)->rmt_addr.s6_addr32 : 640 inet6_rsk(req)->rmt_addr.s6_addr32 :
563#endif 641#endif
564 &ireq->rmt_addr; 642 &ireq->rmt_addr;
565 entry.dport = ntohs(ireq->rmt_port); 643 entry.dport = ntohs(ireq->rmt_port);
@@ -599,7 +677,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
599 handler = inet_diag_table[cb->nlh->nlmsg_type]; 677 handler = inet_diag_table[cb->nlh->nlmsg_type];
600 BUG_ON(handler == NULL); 678 BUG_ON(handler == NULL);
601 hashinfo = handler->idiag_hashinfo; 679 hashinfo = handler->idiag_hashinfo;
602 680
603 s_i = cb->args[1]; 681 s_i = cb->args[1];
604 s_num = num = cb->args[2]; 682 s_num = num = cb->args[2];
605 683
@@ -630,7 +708,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
630 cb->args[3] > 0) 708 cb->args[3] > 0)
631 goto syn_recv; 709 goto syn_recv;
632 710
633 if (inet_diag_dump_sock(skb, sk, cb) < 0) { 711 if (inet_csk_diag_dump(sk, skb, cb) < 0) {
634 inet_listen_unlock(hashinfo); 712 inet_listen_unlock(hashinfo);
635 goto done; 713 goto done;
636 } 714 }
@@ -672,7 +750,6 @@ skip_listen_ht:
672 s_num = 0; 750 s_num = 0;
673 751
674 read_lock_bh(&head->lock); 752 read_lock_bh(&head->lock);
675
676 num = 0; 753 num = 0;
677 sk_for_each(sk, node, &head->chain) { 754 sk_for_each(sk, node, &head->chain) {
678 struct inet_sock *inet = inet_sk(sk); 755 struct inet_sock *inet = inet_sk(sk);
@@ -684,9 +761,10 @@ skip_listen_ht:
684 if (r->id.idiag_sport != inet->sport && 761 if (r->id.idiag_sport != inet->sport &&
685 r->id.idiag_sport) 762 r->id.idiag_sport)
686 goto next_normal; 763 goto next_normal;
687 if (r->id.idiag_dport != inet->dport && r->id.idiag_dport) 764 if (r->id.idiag_dport != inet->dport &&
765 r->id.idiag_dport)
688 goto next_normal; 766 goto next_normal;
689 if (inet_diag_dump_sock(skb, sk, cb) < 0) { 767 if (inet_csk_diag_dump(sk, skb, cb) < 0) {
690 read_unlock_bh(&head->lock); 768 read_unlock_bh(&head->lock);
691 goto done; 769 goto done;
692 } 770 }
@@ -695,19 +773,20 @@ next_normal:
695 } 773 }
696 774
697 if (r->idiag_states & TCPF_TIME_WAIT) { 775 if (r->idiag_states & TCPF_TIME_WAIT) {
698 sk_for_each(sk, node, 776 struct inet_timewait_sock *tw;
777
778 inet_twsk_for_each(tw, node,
699 &hashinfo->ehash[i + hashinfo->ehash_size].chain) { 779 &hashinfo->ehash[i + hashinfo->ehash_size].chain) {
700 struct inet_sock *inet = inet_sk(sk);
701 780
702 if (num < s_num) 781 if (num < s_num)
703 goto next_dying; 782 goto next_dying;
704 if (r->id.idiag_sport != inet->sport && 783 if (r->id.idiag_sport != tw->tw_sport &&
705 r->id.idiag_sport) 784 r->id.idiag_sport)
706 goto next_dying; 785 goto next_dying;
707 if (r->id.idiag_dport != inet->dport && 786 if (r->id.idiag_dport != tw->tw_dport &&
708 r->id.idiag_dport) 787 r->id.idiag_dport)
709 goto next_dying; 788 goto next_dying;
710 if (inet_diag_dump_sock(skb, sk, cb) < 0) { 789 if (inet_twsk_diag_dump(tw, skb, cb) < 0) {
711 read_unlock_bh(&head->lock); 790 read_unlock_bh(&head->lock);
712 goto done; 791 goto done;
713 } 792 }
@@ -724,8 +803,7 @@ done:
724 return skb->len; 803 return skb->len;
725} 804}
726 805
727static __inline__ int 806static inline int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
728inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
729{ 807{
730 if (!(nlh->nlmsg_flags&NLM_F_REQUEST)) 808 if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
731 return 0; 809 return 0;
@@ -755,9 +833,8 @@ inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
755 } 833 }
756 return netlink_dump_start(idiagnl, skb, nlh, 834 return netlink_dump_start(idiagnl, skb, nlh,
757 inet_diag_dump, NULL); 835 inet_diag_dump, NULL);
758 } else { 836 } else
759 return inet_diag_get_exact(skb, nlh); 837 return inet_diag_get_exact(skb, nlh);
760 }
761 838
762err_inval: 839err_inval:
763 return -EINVAL; 840 return -EINVAL;
@@ -766,15 +843,15 @@ err_inval:
766 843
767static inline void inet_diag_rcv_skb(struct sk_buff *skb) 844static inline void inet_diag_rcv_skb(struct sk_buff *skb)
768{ 845{
769 int err;
770 struct nlmsghdr * nlh;
771
772 if (skb->len >= NLMSG_SPACE(0)) { 846 if (skb->len >= NLMSG_SPACE(0)) {
773 nlh = (struct nlmsghdr *)skb->data; 847 int err;
774 if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) 848 struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
849
850 if (nlh->nlmsg_len < sizeof(*nlh) ||
851 skb->len < nlh->nlmsg_len)
775 return; 852 return;
776 err = inet_diag_rcv_msg(skb, nlh); 853 err = inet_diag_rcv_msg(skb, nlh);
777 if (err || nlh->nlmsg_flags & NLM_F_ACK) 854 if (err || nlh->nlmsg_flags & NLM_F_ACK)
778 netlink_ack(skb, nlh, err); 855 netlink_ack(skb, nlh, err);
779 } 856 }
780} 857}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e8d29fe736d2..33228115cda4 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -15,12 +15,14 @@
15 15
16#include <linux/config.h> 16#include <linux/config.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/random.h>
18#include <linux/sched.h> 19#include <linux/sched.h>
19#include <linux/slab.h> 20#include <linux/slab.h>
20#include <linux/wait.h> 21#include <linux/wait.h>
21 22
22#include <net/inet_connection_sock.h> 23#include <net/inet_connection_sock.h>
23#include <net/inet_hashtables.h> 24#include <net/inet_hashtables.h>
25#include <net/ip.h>
24 26
25/* 27/*
26 * Allocate and initialize a new local port bind bucket. 28 * Allocate and initialize a new local port bind bucket.
@@ -163,3 +165,179 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad
163} 165}
164 166
165EXPORT_SYMBOL_GPL(__inet_lookup_listener); 167EXPORT_SYMBOL_GPL(__inet_lookup_listener);
168
169/* called with local bh disabled */
170static int __inet_check_established(struct inet_timewait_death_row *death_row,
171 struct sock *sk, __u16 lport,
172 struct inet_timewait_sock **twp)
173{
174 struct inet_hashinfo *hinfo = death_row->hashinfo;
175 struct inet_sock *inet = inet_sk(sk);
176 u32 daddr = inet->rcv_saddr;
177 u32 saddr = inet->daddr;
178 int dif = sk->sk_bound_dev_if;
179 INET_ADDR_COOKIE(acookie, saddr, daddr)
180 const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
181 unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
182 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
183 struct sock *sk2;
184 const struct hlist_node *node;
185 struct inet_timewait_sock *tw;
186
187 prefetch(head->chain.first);
188 write_lock(&head->lock);
189
190 /* Check TIME-WAIT sockets first. */
191 sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) {
192 tw = inet_twsk(sk2);
193
194 if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
195 if (twsk_unique(sk, sk2, twp))
196 goto unique;
197 else
198 goto not_unique;
199 }
200 }
201 tw = NULL;
202
203 /* And established part... */
204 sk_for_each(sk2, node, &head->chain) {
205 if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
206 goto not_unique;
207 }
208
209unique:
210 /* Must record num and sport now. Otherwise we will see
211 * in hash table socket with a funny identity. */
212 inet->num = lport;
213 inet->sport = htons(lport);
214 sk->sk_hash = hash;
215 BUG_TRAP(sk_unhashed(sk));
216 __sk_add_node(sk, &head->chain);
217 sock_prot_inc_use(sk->sk_prot);
218 write_unlock(&head->lock);
219
220 if (twp) {
221 *twp = tw;
222 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
223 } else if (tw) {
224 /* Silly. Should hash-dance instead... */
225 inet_twsk_deschedule(tw, death_row);
226 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
227
228 inet_twsk_put(tw);
229 }
230
231 return 0;
232
233not_unique:
234 write_unlock(&head->lock);
235 return -EADDRNOTAVAIL;
236}
237
238static inline u32 inet_sk_port_offset(const struct sock *sk)
239{
240 const struct inet_sock *inet = inet_sk(sk);
241 return secure_ipv4_port_ephemeral(inet->rcv_saddr, inet->daddr,
242 inet->dport);
243}
244
245/*
246 * Bind a port for a connect operation and hash it.
247 */
248int inet_hash_connect(struct inet_timewait_death_row *death_row,
249 struct sock *sk)
250{
251 struct inet_hashinfo *hinfo = death_row->hashinfo;
252 const unsigned short snum = inet_sk(sk)->num;
253 struct inet_bind_hashbucket *head;
254 struct inet_bind_bucket *tb;
255 int ret;
256
257 if (!snum) {
258 int low = sysctl_local_port_range[0];
259 int high = sysctl_local_port_range[1];
260 int range = high - low;
261 int i;
262 int port;
263 static u32 hint;
264 u32 offset = hint + inet_sk_port_offset(sk);
265 struct hlist_node *node;
266 struct inet_timewait_sock *tw = NULL;
267
268 local_bh_disable();
269 for (i = 1; i <= range; i++) {
270 port = low + (i + offset) % range;
271 head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
272 spin_lock(&head->lock);
273
274 /* Does not bother with rcv_saddr checks,
275 * because the established check is already
276 * unique enough.
277 */
278 inet_bind_bucket_for_each(tb, node, &head->chain) {
279 if (tb->port == port) {
280 BUG_TRAP(!hlist_empty(&tb->owners));
281 if (tb->fastreuse >= 0)
282 goto next_port;
283 if (!__inet_check_established(death_row,
284 sk, port,
285 &tw))
286 goto ok;
287 goto next_port;
288 }
289 }
290
291 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port);
292 if (!tb) {
293 spin_unlock(&head->lock);
294 break;
295 }
296 tb->fastreuse = -1;
297 goto ok;
298
299 next_port:
300 spin_unlock(&head->lock);
301 }
302 local_bh_enable();
303
304 return -EADDRNOTAVAIL;
305
306ok:
307 hint += i;
308
309 /* Head lock still held and bh's disabled */
310 inet_bind_hash(sk, tb, port);
311 if (sk_unhashed(sk)) {
312 inet_sk(sk)->sport = htons(port);
313 __inet_hash(hinfo, sk, 0);
314 }
315 spin_unlock(&head->lock);
316
317 if (tw) {
318 inet_twsk_deschedule(tw, death_row);;
319 inet_twsk_put(tw);
320 }
321
322 ret = 0;
323 goto out;
324 }
325
326 head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
327 tb = inet_csk(sk)->icsk_bind_hash;
328 spin_lock_bh(&head->lock);
329 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
330 __inet_hash(hinfo, sk, 0);
331 spin_unlock_bh(&head->lock);
332 return 0;
333 } else {
334 spin_unlock(&head->lock);
335 /* No definite answer... Walk to established hash table */
336 ret = __inet_check_established(death_row, sk, snum, NULL);
337out:
338 local_bh_enable();
339 return ret;
340 }
341}
342
343EXPORT_SYMBOL_GPL(inet_hash_connect);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index a010e9a68811..417f126c749e 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -90,8 +90,9 @@ EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
90 90
91struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state) 91struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state)
92{ 92{
93 struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab, 93 struct inet_timewait_sock *tw =
94 SLAB_ATOMIC); 94 kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
95 SLAB_ATOMIC);
95 if (tw != NULL) { 96 if (tw != NULL) {
96 const struct inet_sock *inet = inet_sk(sk); 97 const struct inet_sock *inet = inet_sk(sk);
97 98
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 2fc3fd38924f..2160874ce7aa 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -304,8 +304,7 @@ static void unlink_from_pool(struct inet_peer *p)
304 /* look for a node to insert instead of p */ 304 /* look for a node to insert instead of p */
305 struct inet_peer *t; 305 struct inet_peer *t;
306 t = lookup_rightempty(p); 306 t = lookup_rightempty(p);
307 if (*stackptr[-1] != t) 307 BUG_ON(*stackptr[-1] != t);
308 BUG();
309 **--stackptr = t->avl_left; 308 **--stackptr = t->avl_left;
310 /* t is removed, t->v4daddr > x->v4daddr for any 309 /* t is removed, t->v4daddr > x->v4daddr for any
311 * x in p->avl_left subtree. 310 * x in p->avl_left subtree.
@@ -314,8 +313,7 @@ static void unlink_from_pool(struct inet_peer *p)
314 t->avl_left = p->avl_left; 313 t->avl_left = p->avl_left;
315 t->avl_right = p->avl_right; 314 t->avl_right = p->avl_right;
316 t->avl_height = p->avl_height; 315 t->avl_height = p->avl_height;
317 if (delp[1] != &p->avl_left) 316 BUG_ON(delp[1] != &p->avl_left);
318 BUG();
319 delp[1] = &t->avl_left; /* was &p->avl_left */ 317 delp[1] = &t->avl_left; /* was &p->avl_left */
320 } 318 }
321 peer_avl_rebalance(stack, stackptr); 319 peer_avl_rebalance(stack, stackptr);
@@ -401,6 +399,7 @@ struct inet_peer *inet_getpeer(__u32 daddr, int create)
401 return NULL; 399 return NULL;
402 n->v4daddr = daddr; 400 n->v4daddr = daddr;
403 atomic_set(&n->refcnt, 1); 401 atomic_set(&n->refcnt, 1);
402 atomic_set(&n->rid, 0);
404 n->ip_id_count = secure_ip_id(daddr); 403 n->ip_id_count = secure_ip_id(daddr);
405 n->tcp_ts_stamp = 0; 404 n->tcp_ts_stamp = 0;
406 405
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 8ce0ce2ee48e..2a8adda15e11 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -22,6 +22,7 @@
22 * Patrick McHardy : LRU queue of frag heads for evictor. 22 * Patrick McHardy : LRU queue of frag heads for evictor.
23 */ 23 */
24 24
25#include <linux/compiler.h>
25#include <linux/config.h> 26#include <linux/config.h>
26#include <linux/module.h> 27#include <linux/module.h>
27#include <linux/types.h> 28#include <linux/types.h>
@@ -38,6 +39,7 @@
38#include <net/ip.h> 39#include <net/ip.h>
39#include <net/icmp.h> 40#include <net/icmp.h>
40#include <net/checksum.h> 41#include <net/checksum.h>
42#include <net/inetpeer.h>
41#include <linux/tcp.h> 43#include <linux/tcp.h>
42#include <linux/udp.h> 44#include <linux/udp.h>
43#include <linux/inet.h> 45#include <linux/inet.h>
@@ -56,6 +58,8 @@
56int sysctl_ipfrag_high_thresh = 256*1024; 58int sysctl_ipfrag_high_thresh = 256*1024;
57int sysctl_ipfrag_low_thresh = 192*1024; 59int sysctl_ipfrag_low_thresh = 192*1024;
58 60
61int sysctl_ipfrag_max_dist = 64;
62
59/* Important NOTE! Fragment queue must be destroyed before MSL expires. 63/* Important NOTE! Fragment queue must be destroyed before MSL expires.
60 * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. 64 * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
61 */ 65 */
@@ -89,8 +93,10 @@ struct ipq {
89 spinlock_t lock; 93 spinlock_t lock;
90 atomic_t refcnt; 94 atomic_t refcnt;
91 struct timer_list timer; /* when will this queue expire? */ 95 struct timer_list timer; /* when will this queue expire? */
92 int iif;
93 struct timeval stamp; 96 struct timeval stamp;
97 int iif;
98 unsigned int rid;
99 struct inet_peer *peer;
94}; 100};
95 101
96/* Hash table. */ 102/* Hash table. */
@@ -195,6 +201,9 @@ static void ip_frag_destroy(struct ipq *qp, int *work)
195 BUG_TRAP(qp->last_in&COMPLETE); 201 BUG_TRAP(qp->last_in&COMPLETE);
196 BUG_TRAP(del_timer(&qp->timer) == 0); 202 BUG_TRAP(del_timer(&qp->timer) == 0);
197 203
204 if (qp->peer)
205 inet_putpeer(qp->peer);
206
198 /* Release all fragment data. */ 207 /* Release all fragment data. */
199 fp = qp->fragments; 208 fp = qp->fragments;
200 while (fp) { 209 while (fp) {
@@ -353,6 +362,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user)
353 qp->meat = 0; 362 qp->meat = 0;
354 qp->fragments = NULL; 363 qp->fragments = NULL;
355 qp->iif = 0; 364 qp->iif = 0;
365 qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL;
356 366
357 /* Initialize a timer for this entry. */ 367 /* Initialize a timer for this entry. */
358 init_timer(&qp->timer); 368 init_timer(&qp->timer);
@@ -373,7 +383,7 @@ out_nomem:
373 */ 383 */
374static inline struct ipq *ip_find(struct iphdr *iph, u32 user) 384static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
375{ 385{
376 __u16 id = iph->id; 386 __be16 id = iph->id;
377 __u32 saddr = iph->saddr; 387 __u32 saddr = iph->saddr;
378 __u32 daddr = iph->daddr; 388 __u32 daddr = iph->daddr;
379 __u8 protocol = iph->protocol; 389 __u8 protocol = iph->protocol;
@@ -398,6 +408,56 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
398 return ip_frag_create(hash, iph, user); 408 return ip_frag_create(hash, iph, user);
399} 409}
400 410
411/* Is the fragment too far ahead to be part of ipq? */
412static inline int ip_frag_too_far(struct ipq *qp)
413{
414 struct inet_peer *peer = qp->peer;
415 unsigned int max = sysctl_ipfrag_max_dist;
416 unsigned int start, end;
417
418 int rc;
419
420 if (!peer || !max)
421 return 0;
422
423 start = qp->rid;
424 end = atomic_inc_return(&peer->rid);
425 qp->rid = end;
426
427 rc = qp->fragments && (end - start) > max;
428
429 if (rc) {
430 IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
431 }
432
433 return rc;
434}
435
436static int ip_frag_reinit(struct ipq *qp)
437{
438 struct sk_buff *fp;
439
440 if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) {
441 atomic_inc(&qp->refcnt);
442 return -ETIMEDOUT;
443 }
444
445 fp = qp->fragments;
446 do {
447 struct sk_buff *xp = fp->next;
448 frag_kfree_skb(fp, NULL);
449 fp = xp;
450 } while (fp);
451
452 qp->last_in = 0;
453 qp->len = 0;
454 qp->meat = 0;
455 qp->fragments = NULL;
456 qp->iif = 0;
457
458 return 0;
459}
460
401/* Add new segment to existing queue. */ 461/* Add new segment to existing queue. */
402static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) 462static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
403{ 463{
@@ -408,6 +468,12 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
408 if (qp->last_in & COMPLETE) 468 if (qp->last_in & COMPLETE)
409 goto err; 469 goto err;
410 470
471 if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
472 unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) {
473 ipq_kill(qp);
474 goto err;
475 }
476
411 offset = ntohs(skb->nh.iph->frag_off); 477 offset = ntohs(skb->nh.iph->frag_off);
412 flags = offset & ~IP_OFFSET; 478 flags = offset & ~IP_OFFSET;
413 offset &= IP_OFFSET; 479 offset &= IP_OFFSET;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 46f9d9cf7a5f..abe23923e4e7 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -10,6 +10,7 @@
10 * 10 *
11 */ 11 */
12 12
13#include <linux/capability.h>
13#include <linux/config.h> 14#include <linux/config.h>
14#include <linux/module.h> 15#include <linux/module.h>
15#include <linux/types.h> 16#include <linux/types.h>
@@ -28,6 +29,7 @@
28#include <linux/inetdevice.h> 29#include <linux/inetdevice.h>
29#include <linux/igmp.h> 30#include <linux/igmp.h>
30#include <linux/netfilter_ipv4.h> 31#include <linux/netfilter_ipv4.h>
32#include <linux/if_ether.h>
31 33
32#include <net/sock.h> 34#include <net/sock.h>
33#include <net/ip.h> 35#include <net/ip.h>
@@ -187,7 +189,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key)
187 } 189 }
188 190
189 if (ipgre_fb_tunnel_dev->flags&IFF_UP) 191 if (ipgre_fb_tunnel_dev->flags&IFF_UP)
190 return ipgre_fb_tunnel_dev->priv; 192 return netdev_priv(ipgre_fb_tunnel_dev);
191 return NULL; 193 return NULL;
192} 194}
193 195
@@ -277,7 +279,7 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
277 return NULL; 279 return NULL;
278 280
279 dev->init = ipgre_tunnel_init; 281 dev->init = ipgre_tunnel_init;
280 nt = dev->priv; 282 nt = netdev_priv(dev);
281 nt->parms = *parms; 283 nt->parms = *parms;
282 284
283 if (register_netdevice(dev) < 0) { 285 if (register_netdevice(dev) < 0) {
@@ -285,9 +287,6 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
285 goto failed; 287 goto failed;
286 } 288 }
287 289
288 nt = dev->priv;
289 nt->parms = *parms;
290
291 dev_hold(dev); 290 dev_hold(dev);
292 ipgre_tunnel_link(nt); 291 ipgre_tunnel_link(nt);
293 return nt; 292 return nt;
@@ -298,7 +297,7 @@ failed:
298 297
299static void ipgre_tunnel_uninit(struct net_device *dev) 298static void ipgre_tunnel_uninit(struct net_device *dev)
300{ 299{
301 ipgre_tunnel_unlink((struct ip_tunnel*)dev->priv); 300 ipgre_tunnel_unlink(netdev_priv(dev));
302 dev_put(dev); 301 dev_put(dev);
303} 302}
304 303
@@ -517,7 +516,7 @@ out:
517 skb2->dst->ops->update_pmtu(skb2->dst, rel_info); 516 skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
518 rel_info = htonl(rel_info); 517 rel_info = htonl(rel_info);
519 } else if (type == ICMP_TIME_EXCEEDED) { 518 } else if (type == ICMP_TIME_EXCEEDED) {
520 struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv; 519 struct ip_tunnel *t = netdev_priv(skb2->dev);
521 if (t->parms.iph.ttl) { 520 if (t->parms.iph.ttl) {
522 rel_type = ICMP_DEST_UNREACH; 521 rel_type = ICMP_DEST_UNREACH;
523 rel_code = ICMP_HOST_UNREACH; 522 rel_code = ICMP_HOST_UNREACH;
@@ -668,7 +667,7 @@ drop_nolock:
668 667
669static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 668static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
670{ 669{
671 struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; 670 struct ip_tunnel *tunnel = netdev_priv(dev);
672 struct net_device_stats *stats = &tunnel->stat; 671 struct net_device_stats *stats = &tunnel->stat;
673 struct iphdr *old_iph = skb->nh.iph; 672 struct iphdr *old_iph = skb->nh.iph;
674 struct iphdr *tiph; 673 struct iphdr *tiph;
@@ -831,6 +830,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
831 skb->h.raw = skb->nh.raw; 830 skb->h.raw = skb->nh.raw;
832 skb->nh.raw = skb_push(skb, gre_hlen); 831 skb->nh.raw = skb_push(skb, gre_hlen);
833 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 832 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
833 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED);
834 dst_release(skb->dst); 834 dst_release(skb->dst);
835 skb->dst = &rt->u.dst; 835 skb->dst = &rt->u.dst;
836 836
@@ -913,7 +913,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
913 t = ipgre_tunnel_locate(&p, 0); 913 t = ipgre_tunnel_locate(&p, 0);
914 } 914 }
915 if (t == NULL) 915 if (t == NULL)
916 t = (struct ip_tunnel*)dev->priv; 916 t = netdev_priv(dev);
917 memcpy(&p, &t->parms, sizeof(p)); 917 memcpy(&p, &t->parms, sizeof(p));
918 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 918 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
919 err = -EFAULT; 919 err = -EFAULT;
@@ -953,7 +953,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
953 } else { 953 } else {
954 unsigned nflags=0; 954 unsigned nflags=0;
955 955
956 t = (struct ip_tunnel*)dev->priv; 956 t = netdev_priv(dev);
957 957
958 if (MULTICAST(p.iph.daddr)) 958 if (MULTICAST(p.iph.daddr))
959 nflags = IFF_BROADCAST; 959 nflags = IFF_BROADCAST;
@@ -1002,7 +1002,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1002 if ((t = ipgre_tunnel_locate(&p, 0)) == NULL) 1002 if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
1003 goto done; 1003 goto done;
1004 err = -EPERM; 1004 err = -EPERM;
1005 if (t == ipgre_fb_tunnel_dev->priv) 1005 if (t == netdev_priv(ipgre_fb_tunnel_dev))
1006 goto done; 1006 goto done;
1007 dev = t->dev; 1007 dev = t->dev;
1008 } 1008 }
@@ -1019,12 +1019,12 @@ done:
1019 1019
1020static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev) 1020static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
1021{ 1021{
1022 return &(((struct ip_tunnel*)dev->priv)->stat); 1022 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
1023} 1023}
1024 1024
1025static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) 1025static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1026{ 1026{
1027 struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; 1027 struct ip_tunnel *tunnel = netdev_priv(dev);
1028 if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen) 1028 if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
1029 return -EINVAL; 1029 return -EINVAL;
1030 dev->mtu = new_mtu; 1030 dev->mtu = new_mtu;
@@ -1064,7 +1064,7 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1064static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, 1064static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
1065 void *daddr, void *saddr, unsigned len) 1065 void *daddr, void *saddr, unsigned len)
1066{ 1066{
1067 struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; 1067 struct ip_tunnel *t = netdev_priv(dev);
1068 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); 1068 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1069 u16 *p = (u16*)(iph+1); 1069 u16 *p = (u16*)(iph+1);
1070 1070
@@ -1091,7 +1091,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned sh
1091 1091
1092static int ipgre_open(struct net_device *dev) 1092static int ipgre_open(struct net_device *dev)
1093{ 1093{
1094 struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; 1094 struct ip_tunnel *t = netdev_priv(dev);
1095 1095
1096 if (MULTICAST(t->parms.iph.daddr)) { 1096 if (MULTICAST(t->parms.iph.daddr)) {
1097 struct flowi fl = { .oif = t->parms.link, 1097 struct flowi fl = { .oif = t->parms.link,
@@ -1115,7 +1115,7 @@ static int ipgre_open(struct net_device *dev)
1115 1115
1116static int ipgre_close(struct net_device *dev) 1116static int ipgre_close(struct net_device *dev)
1117{ 1117{
1118 struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; 1118 struct ip_tunnel *t = netdev_priv(dev);
1119 if (MULTICAST(t->parms.iph.daddr) && t->mlink) { 1119 if (MULTICAST(t->parms.iph.daddr) && t->mlink) {
1120 struct in_device *in_dev = inetdev_by_index(t->mlink); 1120 struct in_device *in_dev = inetdev_by_index(t->mlink);
1121 if (in_dev) { 1121 if (in_dev) {
@@ -1140,7 +1140,7 @@ static void ipgre_tunnel_setup(struct net_device *dev)
1140 1140
1141 dev->type = ARPHRD_IPGRE; 1141 dev->type = ARPHRD_IPGRE;
1142 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4; 1142 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1143 dev->mtu = 1500 - sizeof(struct iphdr) - 4; 1143 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1144 dev->flags = IFF_NOARP; 1144 dev->flags = IFF_NOARP;
1145 dev->iflink = 0; 1145 dev->iflink = 0;
1146 dev->addr_len = 4; 1146 dev->addr_len = 4;
@@ -1152,10 +1152,10 @@ static int ipgre_tunnel_init(struct net_device *dev)
1152 struct ip_tunnel *tunnel; 1152 struct ip_tunnel *tunnel;
1153 struct iphdr *iph; 1153 struct iphdr *iph;
1154 int hlen = LL_MAX_HEADER; 1154 int hlen = LL_MAX_HEADER;
1155 int mtu = 1500; 1155 int mtu = ETH_DATA_LEN;
1156 int addend = sizeof(struct iphdr) + 4; 1156 int addend = sizeof(struct iphdr) + 4;
1157 1157
1158 tunnel = (struct ip_tunnel*)dev->priv; 1158 tunnel = netdev_priv(dev);
1159 iph = &tunnel->parms.iph; 1159 iph = &tunnel->parms.iph;
1160 1160
1161 tunnel->dev = dev; 1161 tunnel->dev = dev;
@@ -1219,7 +1219,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
1219 1219
1220static int __init ipgre_fb_tunnel_init(struct net_device *dev) 1220static int __init ipgre_fb_tunnel_init(struct net_device *dev)
1221{ 1221{
1222 struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; 1222 struct ip_tunnel *tunnel = netdev_priv(dev);
1223 struct iphdr *iph = &tunnel->parms.iph; 1223 struct iphdr *iph = &tunnel->parms.iph;
1224 1224
1225 tunnel->dev = dev; 1225 tunnel->dev = dev;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 473d0f2b2e0d..18d7fad474d7 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -128,6 +128,7 @@
128#include <linux/sockios.h> 128#include <linux/sockios.h>
129#include <linux/in.h> 129#include <linux/in.h>
130#include <linux/inet.h> 130#include <linux/inet.h>
131#include <linux/inetdevice.h>
131#include <linux/netdevice.h> 132#include <linux/netdevice.h>
132#include <linux/etherdevice.h> 133#include <linux/etherdevice.h>
133 134
@@ -184,7 +185,6 @@ int ip_call_ra_chain(struct sk_buff *skb)
184 raw_rcv(last, skb2); 185 raw_rcv(last, skb2);
185 } 186 }
186 last = sk; 187 last = sk;
187 nf_reset(skb);
188 } 188 }
189 } 189 }
190 190
@@ -203,10 +203,6 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
203 203
204 __skb_pull(skb, ihl); 204 __skb_pull(skb, ihl);
205 205
206 /* Free reference early: we don't need it any more, and it may
207 hold ip_conntrack module loaded indefinitely. */
208 nf_reset(skb);
209
210 /* Point into the IP datagram, just past the header. */ 206 /* Point into the IP datagram, just past the header. */
211 skb->h.raw = skb->data; 207 skb->h.raw = skb->data;
212 208
@@ -231,10 +227,12 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
231 if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) { 227 if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) {
232 int ret; 228 int ret;
233 229
234 if (!ipprot->no_policy && 230 if (!ipprot->no_policy) {
235 !xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 231 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
236 kfree_skb(skb); 232 kfree_skb(skb);
237 goto out; 233 goto out;
234 }
235 nf_reset(skb);
238 } 236 }
239 ret = ipprot->handler(skb); 237 ret = ipprot->handler(skb);
240 if (ret < 0) { 238 if (ret < 0) {
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index dbe12da8d8b3..9bebad07bf2e 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -11,6 +11,7 @@
11 * 11 *
12 */ 12 */
13 13
14#include <linux/capability.h>
14#include <linux/module.h> 15#include <linux/module.h>
15#include <linux/types.h> 16#include <linux/types.h>
16#include <asm/uaccess.h> 17#include <asm/uaccess.h>
@@ -22,6 +23,7 @@
22#include <net/sock.h> 23#include <net/sock.h>
23#include <net/ip.h> 24#include <net/ip.h>
24#include <net/icmp.h> 25#include <net/icmp.h>
26#include <net/route.h>
25 27
26/* 28/*
27 * Write options to IP header, record destination address to 29 * Write options to IP header, record destination address to
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index eba64e2bd397..3324fbfe528a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -69,6 +69,7 @@
69#include <net/ip.h> 69#include <net/ip.h>
70#include <net/protocol.h> 70#include <net/protocol.h>
71#include <net/route.h> 71#include <net/route.h>
72#include <net/xfrm.h>
72#include <linux/skbuff.h> 73#include <linux/skbuff.h>
73#include <net/sock.h> 74#include <net/sock.h>
74#include <net/arp.h> 75#include <net/arp.h>
@@ -85,6 +86,8 @@
85 86
86int sysctl_ip_default_ttl = IPDEFTTL; 87int sysctl_ip_default_ttl = IPDEFTTL;
87 88
89static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*));
90
88/* Generate a checksum for an outgoing IP datagram. */ 91/* Generate a checksum for an outgoing IP datagram. */
89__inline__ void ip_send_check(struct iphdr *iph) 92__inline__ void ip_send_check(struct iphdr *iph)
90{ 93{
@@ -202,13 +205,16 @@ static inline int ip_finish_output2(struct sk_buff *skb)
202 205
203static inline int ip_finish_output(struct sk_buff *skb) 206static inline int ip_finish_output(struct sk_buff *skb)
204{ 207{
205 struct net_device *dev = skb->dst->dev; 208#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
206 209 /* Policy lookup after SNAT yielded a new policy */
207 skb->dev = dev; 210 if (skb->dst->xfrm != NULL)
208 skb->protocol = htons(ETH_P_IP); 211 return xfrm4_output_finish(skb);
209 212#endif
210 return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev, 213 if (skb->len > dst_mtu(skb->dst) &&
211 ip_finish_output2); 214 !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
215 return ip_fragment(skb, ip_finish_output2);
216 else
217 return ip_finish_output2(skb);
212} 218}
213 219
214int ip_mc_output(struct sk_buff *skb) 220int ip_mc_output(struct sk_buff *skb)
@@ -265,21 +271,21 @@ int ip_mc_output(struct sk_buff *skb)
265 newskb->dev, ip_dev_loopback_xmit); 271 newskb->dev, ip_dev_loopback_xmit);
266 } 272 }
267 273
268 if (skb->len > dst_mtu(&rt->u.dst)) 274 return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev,
269 return ip_fragment(skb, ip_finish_output); 275 ip_finish_output);
270 else
271 return ip_finish_output(skb);
272} 276}
273 277
274int ip_output(struct sk_buff *skb) 278int ip_output(struct sk_buff *skb)
275{ 279{
280 struct net_device *dev = skb->dst->dev;
281
276 IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS); 282 IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
277 283
278 if (skb->len > dst_mtu(skb->dst) && 284 skb->dev = dev;
279 !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) 285 skb->protocol = htons(ETH_P_IP);
280 return ip_fragment(skb, ip_finish_output); 286
281 else 287 return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
282 return ip_finish_output(skb); 288 ip_finish_output);
283} 289}
284 290
285int ip_queue_xmit(struct sk_buff *skb, int ipfragok) 291int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
@@ -411,7 +417,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
411 * single device frame, and queue such a frame for sending. 417 * single device frame, and queue such a frame for sending.
412 */ 418 */
413 419
414int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) 420static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
415{ 421{
416 struct iphdr *iph; 422 struct iphdr *iph;
417 int raw = 0; 423 int raw = 0;
@@ -420,7 +426,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
420 struct sk_buff *skb2; 426 struct sk_buff *skb2;
421 unsigned int mtu, hlen, left, len, ll_rs; 427 unsigned int mtu, hlen, left, len, ll_rs;
422 int offset; 428 int offset;
423 int not_last_frag; 429 __be16 not_last_frag;
424 struct rtable *rt = (struct rtable*)skb->dst; 430 struct rtable *rt = (struct rtable*)skb->dst;
425 int err = 0; 431 int err = 0;
426 432
@@ -445,6 +451,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
445 451
446 hlen = iph->ihl * 4; 452 hlen = iph->ihl * 4;
447 mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */ 453 mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */
454 IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
448 455
449 /* When frag_list is given, use it. First, check its validity: 456 /* When frag_list is given, use it. First, check its validity:
450 * some transformers could create wrong frag_list or break existing 457 * some transformers could create wrong frag_list or break existing
@@ -1181,7 +1188,7 @@ int ip_push_pending_frames(struct sock *sk)
1181 struct ip_options *opt = NULL; 1188 struct ip_options *opt = NULL;
1182 struct rtable *rt = inet->cork.rt; 1189 struct rtable *rt = inet->cork.rt;
1183 struct iphdr *iph; 1190 struct iphdr *iph;
1184 int df = 0; 1191 __be16 df = 0;
1185 __u8 ttl; 1192 __u8 ttl;
1186 int err = 0; 1193 int err = 0;
1187 1194
@@ -1392,7 +1399,6 @@ void __init ip_init(void)
1392#endif 1399#endif
1393} 1400}
1394 1401
1395EXPORT_SYMBOL(ip_fragment);
1396EXPORT_SYMBOL(ip_generic_getfrag); 1402EXPORT_SYMBOL(ip_generic_getfrag);
1397EXPORT_SYMBOL(ip_queue_xmit); 1403EXPORT_SYMBOL(ip_queue_xmit);
1398EXPORT_SYMBOL(ip_send_check); 1404EXPORT_SYMBOL(ip_send_check);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 4f2d87257309..2bf8d782f678 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -25,12 +25,12 @@
25#include <linux/skbuff.h> 25#include <linux/skbuff.h>
26#include <linux/ip.h> 26#include <linux/ip.h>
27#include <linux/icmp.h> 27#include <linux/icmp.h>
28#include <linux/inetdevice.h>
28#include <linux/netdevice.h> 29#include <linux/netdevice.h>
29#include <net/sock.h> 30#include <net/sock.h>
30#include <net/ip.h> 31#include <net/ip.h>
31#include <net/icmp.h> 32#include <net/icmp.h>
32#include <net/tcp.h> 33#include <net/tcp_states.h>
33#include <linux/tcp.h>
34#include <linux/udp.h> 34#include <linux/udp.h>
35#include <linux/igmp.h> 35#include <linux/igmp.h>
36#include <linux/netfilter.h> 36#include <linux/netfilter.h>
@@ -427,8 +427,8 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
427 err = ip_options_get_from_user(&opt, optval, optlen); 427 err = ip_options_get_from_user(&opt, optval, optlen);
428 if (err) 428 if (err)
429 break; 429 break;
430 if (sk->sk_type == SOCK_STREAM) { 430 if (inet->is_icsk) {
431 struct tcp_sock *tp = tcp_sk(sk); 431 struct inet_connection_sock *icsk = inet_csk(sk);
432#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 432#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
433 if (sk->sk_family == PF_INET || 433 if (sk->sk_family == PF_INET ||
434 (!((1 << sk->sk_state) & 434 (!((1 << sk->sk_state) &
@@ -436,10 +436,10 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
436 inet->daddr != LOOPBACK4_IPV6)) { 436 inet->daddr != LOOPBACK4_IPV6)) {
437#endif 437#endif
438 if (inet->opt) 438 if (inet->opt)
439 tp->ext_header_len -= inet->opt->optlen; 439 icsk->icsk_ext_hdr_len -= inet->opt->optlen;
440 if (opt) 440 if (opt)
441 tp->ext_header_len += opt->optlen; 441 icsk->icsk_ext_hdr_len += opt->optlen;
442 tcp_sync_mss(sk, tp->pmtu_cookie); 442 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
443#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 443#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
444 } 444 }
445#endif 445#endif
@@ -621,7 +621,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
621 err = -ENOBUFS; 621 err = -ENOBUFS;
622 break; 622 break;
623 } 623 }
624 msf = (struct ip_msfilter *)kmalloc(optlen, GFP_KERNEL); 624 msf = kmalloc(optlen, GFP_KERNEL);
625 if (msf == 0) { 625 if (msf == 0) {
626 err = -ENOBUFS; 626 err = -ENOBUFS;
627 break; 627 break;
@@ -778,7 +778,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
778 err = -ENOBUFS; 778 err = -ENOBUFS;
779 break; 779 break;
780 } 780 }
781 gsf = (struct group_filter *)kmalloc(optlen,GFP_KERNEL); 781 gsf = kmalloc(optlen,GFP_KERNEL);
782 if (gsf == 0) { 782 if (gsf == 0) {
783 err = -ENOBUFS; 783 err = -ENOBUFS;
784 break; 784 break;
@@ -798,7 +798,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
798 goto mc_msf_out; 798 goto mc_msf_out;
799 } 799 }
800 msize = IP_MSFILTER_SIZE(gsf->gf_numsrc); 800 msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
801 msf = (struct ip_msfilter *)kmalloc(msize,GFP_KERNEL); 801 msf = kmalloc(msize,GFP_KERNEL);
802 if (msf == 0) { 802 if (msf == 0) {
803 err = -ENOBUFS; 803 err = -ENOBUFS;
804 goto mc_msf_out; 804 goto mc_msf_out;
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index fc718df17b40..d64e2ec8da7b 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -28,6 +28,7 @@
28#include <net/xfrm.h> 28#include <net/xfrm.h>
29#include <net/icmp.h> 29#include <net/icmp.h>
30#include <net/ipcomp.h> 30#include <net/ipcomp.h>
31#include <net/protocol.h>
31 32
32struct ipcomp_tfms { 33struct ipcomp_tfms {
33 struct list_head list; 34 struct list_head list;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index e8674baaa8d9..bb3613ec448c 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -42,6 +42,7 @@
42#include <linux/in.h> 42#include <linux/in.h>
43#include <linux/if.h> 43#include <linux/if.h>
44#include <linux/inet.h> 44#include <linux/inet.h>
45#include <linux/inetdevice.h>
45#include <linux/netdevice.h> 46#include <linux/netdevice.h>
46#include <linux/if_arp.h> 47#include <linux/if_arp.h>
47#include <linux/skbuff.h> 48#include <linux/skbuff.h>
@@ -58,6 +59,7 @@
58#include <net/arp.h> 59#include <net/arp.h>
59#include <net/ip.h> 60#include <net/ip.h>
60#include <net/ipconfig.h> 61#include <net/ipconfig.h>
62#include <net/route.h>
61 63
62#include <asm/uaccess.h> 64#include <asm/uaccess.h>
63#include <net/checksum.h> 65#include <net/checksum.h>
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index c05c1df0bb04..e5cbe72c6b80 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -93,6 +93,7 @@
93 */ 93 */
94 94
95 95
96#include <linux/capability.h>
96#include <linux/config.h> 97#include <linux/config.h>
97#include <linux/module.h> 98#include <linux/module.h>
98#include <linux/types.h> 99#include <linux/types.h>
@@ -108,6 +109,7 @@
108#include <linux/mroute.h> 109#include <linux/mroute.h>
109#include <linux/init.h> 110#include <linux/init.h>
110#include <linux/netfilter_ipv4.h> 111#include <linux/netfilter_ipv4.h>
112#include <linux/if_ether.h>
111 113
112#include <net/sock.h> 114#include <net/sock.h>
113#include <net/ip.h> 115#include <net/ip.h>
@@ -243,7 +245,7 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c
243 if (dev == NULL) 245 if (dev == NULL)
244 return NULL; 246 return NULL;
245 247
246 nt = dev->priv; 248 nt = netdev_priv(dev);
247 SET_MODULE_OWNER(dev); 249 SET_MODULE_OWNER(dev);
248 dev->init = ipip_tunnel_init; 250 dev->init = ipip_tunnel_init;
249 nt->parms = *parms; 251 nt->parms = *parms;
@@ -268,7 +270,7 @@ static void ipip_tunnel_uninit(struct net_device *dev)
268 tunnels_wc[0] = NULL; 270 tunnels_wc[0] = NULL;
269 write_unlock_bh(&ipip_lock); 271 write_unlock_bh(&ipip_lock);
270 } else 272 } else
271 ipip_tunnel_unlink((struct ip_tunnel*)dev->priv); 273 ipip_tunnel_unlink(netdev_priv(dev));
272 dev_put(dev); 274 dev_put(dev);
273} 275}
274 276
@@ -442,7 +444,7 @@ out:
442 skb2->dst->ops->update_pmtu(skb2->dst, rel_info); 444 skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
443 rel_info = htonl(rel_info); 445 rel_info = htonl(rel_info);
444 } else if (type == ICMP_TIME_EXCEEDED) { 446 } else if (type == ICMP_TIME_EXCEEDED) {
445 struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv; 447 struct ip_tunnel *t = netdev_priv(skb2->dev);
446 if (t->parms.iph.ttl) { 448 if (t->parms.iph.ttl) {
447 rel_type = ICMP_DEST_UNREACH; 449 rel_type = ICMP_DEST_UNREACH;
448 rel_code = ICMP_HOST_UNREACH; 450 rel_code = ICMP_HOST_UNREACH;
@@ -513,7 +515,7 @@ out:
513 515
514static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 516static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
515{ 517{
516 struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; 518 struct ip_tunnel *tunnel = netdev_priv(dev);
517 struct net_device_stats *stats = &tunnel->stat; 519 struct net_device_stats *stats = &tunnel->stat;
518 struct iphdr *tiph = &tunnel->parms.iph; 520 struct iphdr *tiph = &tunnel->parms.iph;
519 u8 tos = tunnel->parms.iph.tos; 521 u8 tos = tunnel->parms.iph.tos;
@@ -620,6 +622,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
620 skb->h.raw = skb->nh.raw; 622 skb->h.raw = skb->nh.raw;
621 skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); 623 skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
622 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 624 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
625 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED);
623 dst_release(skb->dst); 626 dst_release(skb->dst);
624 skb->dst = &rt->u.dst; 627 skb->dst = &rt->u.dst;
625 628
@@ -672,7 +675,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
672 t = ipip_tunnel_locate(&p, 0); 675 t = ipip_tunnel_locate(&p, 0);
673 } 676 }
674 if (t == NULL) 677 if (t == NULL)
675 t = (struct ip_tunnel*)dev->priv; 678 t = netdev_priv(dev);
676 memcpy(&p, &t->parms, sizeof(p)); 679 memcpy(&p, &t->parms, sizeof(p));
677 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 680 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
678 err = -EFAULT; 681 err = -EFAULT;
@@ -709,7 +712,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
709 err = -EINVAL; 712 err = -EINVAL;
710 break; 713 break;
711 } 714 }
712 t = (struct ip_tunnel*)dev->priv; 715 t = netdev_priv(dev);
713 ipip_tunnel_unlink(t); 716 ipip_tunnel_unlink(t);
714 t->parms.iph.saddr = p.iph.saddr; 717 t->parms.iph.saddr = p.iph.saddr;
715 t->parms.iph.daddr = p.iph.daddr; 718 t->parms.iph.daddr = p.iph.daddr;
@@ -763,7 +766,7 @@ done:
763 766
764static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev) 767static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
765{ 768{
766 return &(((struct ip_tunnel*)dev->priv)->stat); 769 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
767} 770}
768 771
769static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 772static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
@@ -786,7 +789,7 @@ static void ipip_tunnel_setup(struct net_device *dev)
786 789
787 dev->type = ARPHRD_TUNNEL; 790 dev->type = ARPHRD_TUNNEL;
788 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); 791 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
789 dev->mtu = 1500 - sizeof(struct iphdr); 792 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
790 dev->flags = IFF_NOARP; 793 dev->flags = IFF_NOARP;
791 dev->iflink = 0; 794 dev->iflink = 0;
792 dev->addr_len = 4; 795 dev->addr_len = 4;
@@ -798,7 +801,7 @@ static int ipip_tunnel_init(struct net_device *dev)
798 struct ip_tunnel *tunnel; 801 struct ip_tunnel *tunnel;
799 struct iphdr *iph; 802 struct iphdr *iph;
800 803
801 tunnel = (struct ip_tunnel*)dev->priv; 804 tunnel = netdev_priv(dev);
802 iph = &tunnel->parms.iph; 805 iph = &tunnel->parms.iph;
803 806
804 tunnel->dev = dev; 807 tunnel->dev = dev;
@@ -836,7 +839,7 @@ static int ipip_tunnel_init(struct net_device *dev)
836 839
837static int __init ipip_fb_tunnel_init(struct net_device *dev) 840static int __init ipip_fb_tunnel_init(struct net_device *dev)
838{ 841{
839 struct ip_tunnel *tunnel = dev->priv; 842 struct ip_tunnel *tunnel = netdev_priv(dev);
840 struct iphdr *iph = &tunnel->parms.iph; 843 struct iphdr *iph = &tunnel->parms.iph;
841 844
842 tunnel->dev = dev; 845 tunnel->dev = dev;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 302b7eb507c9..5c94c222e3f3 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -33,6 +33,7 @@
33#include <asm/uaccess.h> 33#include <asm/uaccess.h>
34#include <linux/types.h> 34#include <linux/types.h>
35#include <linux/sched.h> 35#include <linux/sched.h>
36#include <linux/capability.h>
36#include <linux/errno.h> 37#include <linux/errno.h>
37#include <linux/timer.h> 38#include <linux/timer.h>
38#include <linux/mm.h> 39#include <linux/mm.h>
@@ -49,9 +50,11 @@
49#include <linux/seq_file.h> 50#include <linux/seq_file.h>
50#include <linux/mroute.h> 51#include <linux/mroute.h>
51#include <linux/init.h> 52#include <linux/init.h>
53#include <linux/if_ether.h>
52#include <net/ip.h> 54#include <net/ip.h>
53#include <net/protocol.h> 55#include <net/protocol.h>
54#include <linux/skbuff.h> 56#include <linux/skbuff.h>
57#include <net/route.h>
55#include <net/sock.h> 58#include <net/sock.h>
56#include <net/icmp.h> 59#include <net/icmp.h>
57#include <net/udp.h> 60#include <net/udp.h>
@@ -176,8 +179,8 @@ static int reg_vif_num = -1;
176static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 179static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
177{ 180{
178 read_lock(&mrt_lock); 181 read_lock(&mrt_lock);
179 ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len; 182 ((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len;
180 ((struct net_device_stats*)dev->priv)->tx_packets++; 183 ((struct net_device_stats*)netdev_priv(dev))->tx_packets++;
181 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT); 184 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
182 read_unlock(&mrt_lock); 185 read_unlock(&mrt_lock);
183 kfree_skb(skb); 186 kfree_skb(skb);
@@ -186,13 +189,13 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
186 189
187static struct net_device_stats *reg_vif_get_stats(struct net_device *dev) 190static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
188{ 191{
189 return (struct net_device_stats*)dev->priv; 192 return (struct net_device_stats*)netdev_priv(dev);
190} 193}
191 194
192static void reg_vif_setup(struct net_device *dev) 195static void reg_vif_setup(struct net_device *dev)
193{ 196{
194 dev->type = ARPHRD_PIMREG; 197 dev->type = ARPHRD_PIMREG;
195 dev->mtu = 1500 - sizeof(struct iphdr) - 8; 198 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
196 dev->flags = IFF_NOARP; 199 dev->flags = IFF_NOARP;
197 dev->hard_start_xmit = reg_vif_xmit; 200 dev->hard_start_xmit = reg_vif_xmit;
198 dev->get_stats = reg_vif_get_stats; 201 dev->get_stats = reg_vif_get_stats;
@@ -1147,8 +1150,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1147 if (vif->flags & VIFF_REGISTER) { 1150 if (vif->flags & VIFF_REGISTER) {
1148 vif->pkt_out++; 1151 vif->pkt_out++;
1149 vif->bytes_out+=skb->len; 1152 vif->bytes_out+=skb->len;
1150 ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len; 1153 ((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len;
1151 ((struct net_device_stats*)vif->dev->priv)->tx_packets++; 1154 ((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++;
1152 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); 1155 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1153 kfree_skb(skb); 1156 kfree_skb(skb);
1154 return; 1157 return;
@@ -1208,8 +1211,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1208 if (vif->flags & VIFF_TUNNEL) { 1211 if (vif->flags & VIFF_TUNNEL) {
1209 ip_encap(skb, vif->local, vif->remote); 1212 ip_encap(skb, vif->local, vif->remote);
1210 /* FIXME: extra output firewall step used to be here. --RR */ 1213 /* FIXME: extra output firewall step used to be here. --RR */
1211 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++; 1214 ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++;
1212 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb->len; 1215 ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len;
1213 } 1216 }
1214 1217
1215 IPCB(skb)->flags |= IPSKB_FORWARDED; 1218 IPCB(skb)->flags |= IPSKB_FORWARDED;
@@ -1465,8 +1468,8 @@ int pim_rcv_v1(struct sk_buff * skb)
1465 skb->pkt_type = PACKET_HOST; 1468 skb->pkt_type = PACKET_HOST;
1466 dst_release(skb->dst); 1469 dst_release(skb->dst);
1467 skb->dst = NULL; 1470 skb->dst = NULL;
1468 ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len; 1471 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
1469 ((struct net_device_stats*)reg_dev->priv)->rx_packets++; 1472 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
1470 nf_reset(skb); 1473 nf_reset(skb);
1471 netif_rx(skb); 1474 netif_rx(skb);
1472 dev_put(reg_dev); 1475 dev_put(reg_dev);
@@ -1520,8 +1523,8 @@ static int pim_rcv(struct sk_buff * skb)
1520 skb->ip_summed = 0; 1523 skb->ip_summed = 0;
1521 skb->pkt_type = PACKET_HOST; 1524 skb->pkt_type = PACKET_HOST;
1522 dst_release(skb->dst); 1525 dst_release(skb->dst);
1523 ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len; 1526 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
1524 ((struct net_device_stats*)reg_dev->priv)->rx_packets++; 1527 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
1525 skb->dst = NULL; 1528 skb->dst = NULL;
1526 nf_reset(skb); 1529 nf_reset(skb);
1527 netif_rx(skb); 1530 netif_rx(skb);
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index d7eb680101c2..9b176a942ac5 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -224,34 +224,6 @@ void unregister_ip_vs_app(struct ip_vs_app *app)
224} 224}
225 225
226 226
227#if 0000
228/*
229 * Get reference to app by name (called from user context)
230 */
231struct ip_vs_app *ip_vs_app_get_by_name(char *appname)
232{
233 struct ip_vs_app *app, *a = NULL;
234
235 down(&__ip_vs_app_mutex);
236
237 list_for_each_entry(ent, &ip_vs_app_list, a_list) {
238 if (strcmp(app->name, appname))
239 continue;
240
241 /* softirq may call ip_vs_app_get too, so the caller
242 must disable softirq on the current CPU */
243 if (ip_vs_app_get(app))
244 a = app;
245 break;
246 }
247
248 up(&__ip_vs_app_mutex);
249
250 return a;
251}
252#endif
253
254
255/* 227/*
256 * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) 228 * Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
257 */ 229 */
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 2a3a8c59c655..87b83813cf2c 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -24,7 +24,11 @@
24 * 24 *
25 */ 25 */
26 26
27#include <linux/interrupt.h>
28#include <linux/in.h>
29#include <linux/net.h>
27#include <linux/kernel.h> 30#include <linux/kernel.h>
31#include <linux/module.h>
28#include <linux/vmalloc.h> 32#include <linux/vmalloc.h>
29#include <linux/proc_fs.h> /* for proc_net_* */ 33#include <linux/proc_fs.h> /* for proc_net_* */
30#include <linux/seq_file.h> 34#include <linux/seq_file.h>
@@ -219,7 +223,7 @@ struct ip_vs_conn *ip_vs_conn_in_get
219 if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) 223 if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt))
220 cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port); 224 cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port);
221 225
222 IP_VS_DBG(7, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", 226 IP_VS_DBG(9, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
223 ip_vs_proto_name(protocol), 227 ip_vs_proto_name(protocol),
224 NIPQUAD(s_addr), ntohs(s_port), 228 NIPQUAD(s_addr), ntohs(s_port),
225 NIPQUAD(d_addr), ntohs(d_port), 229 NIPQUAD(d_addr), ntohs(d_port),
@@ -254,7 +258,7 @@ struct ip_vs_conn *ip_vs_ct_in_get
254 out: 258 out:
255 ct_read_unlock(hash); 259 ct_read_unlock(hash);
256 260
257 IP_VS_DBG(7, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", 261 IP_VS_DBG(9, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
258 ip_vs_proto_name(protocol), 262 ip_vs_proto_name(protocol),
259 NIPQUAD(s_addr), ntohs(s_port), 263 NIPQUAD(s_addr), ntohs(s_port),
260 NIPQUAD(d_addr), ntohs(d_port), 264 NIPQUAD(d_addr), ntohs(d_port),
@@ -295,7 +299,7 @@ struct ip_vs_conn *ip_vs_conn_out_get
295 299
296 ct_read_unlock(hash); 300 ct_read_unlock(hash);
297 301
298 IP_VS_DBG(7, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", 302 IP_VS_DBG(9, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
299 ip_vs_proto_name(protocol), 303 ip_vs_proto_name(protocol),
300 NIPQUAD(s_addr), ntohs(s_port), 304 NIPQUAD(s_addr), ntohs(s_port),
301 NIPQUAD(d_addr), ntohs(d_port), 305 NIPQUAD(d_addr), ntohs(d_port),
@@ -391,8 +395,9 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
391 cp->flags |= atomic_read(&dest->conn_flags); 395 cp->flags |= atomic_read(&dest->conn_flags);
392 cp->dest = dest; 396 cp->dest = dest;
393 397
394 IP_VS_DBG(9, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " 398 IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
395 "d:%u.%u.%u.%u:%d fwd:%c s:%u flg:%X cnt:%d destcnt:%d\n", 399 "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
400 "dest->refcnt:%d\n",
396 ip_vs_proto_name(cp->protocol), 401 ip_vs_proto_name(cp->protocol),
397 NIPQUAD(cp->caddr), ntohs(cp->cport), 402 NIPQUAD(cp->caddr), ntohs(cp->cport),
398 NIPQUAD(cp->vaddr), ntohs(cp->vport), 403 NIPQUAD(cp->vaddr), ntohs(cp->vport),
@@ -430,8 +435,9 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
430 if (!dest) 435 if (!dest)
431 return; 436 return;
432 437
433 IP_VS_DBG(9, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " 438 IP_VS_DBG(7, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
434 "d:%u.%u.%u.%u:%d fwd:%c s:%u flg:%X cnt:%d destcnt:%d\n", 439 "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
440 "dest->refcnt:%d\n",
435 ip_vs_proto_name(cp->protocol), 441 ip_vs_proto_name(cp->protocol),
436 NIPQUAD(cp->caddr), ntohs(cp->cport), 442 NIPQUAD(cp->caddr), ntohs(cp->cport),
437 NIPQUAD(cp->vaddr), ntohs(cp->vport), 443 NIPQUAD(cp->vaddr), ntohs(cp->vport),
@@ -571,7 +577,7 @@ static void ip_vs_conn_expire(unsigned long data)
571 ip_vs_conn_hash(cp); 577 ip_vs_conn_hash(cp);
572 578
573 expire_later: 579 expire_later:
574 IP_VS_DBG(7, "delayed: refcnt-1=%d conn.n_control=%d\n", 580 IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n",
575 atomic_read(&cp->refcnt)-1, 581 atomic_read(&cp->refcnt)-1,
576 atomic_read(&cp->n_control)); 582 atomic_read(&cp->n_control));
577 583
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 1a0843cd58a9..3f47ad8e1cad 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -426,7 +426,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
426 return NULL; 426 return NULL;
427 427
428 IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u " 428 IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u "
429 "d:%u.%u.%u.%u:%u flg:%X cnt:%d\n", 429 "d:%u.%u.%u.%u:%u conn->flags:%X conn->refcnt:%d\n",
430 ip_vs_fwd_tag(cp), 430 ip_vs_fwd_tag(cp),
431 NIPQUAD(cp->caddr), ntohs(cp->cport), 431 NIPQUAD(cp->caddr), ntohs(cp->cport),
432 NIPQUAD(cp->vaddr), ntohs(cp->vport), 432 NIPQUAD(cp->vaddr), ntohs(cp->vport),
@@ -532,11 +532,8 @@ static unsigned int ip_vs_post_routing(unsigned int hooknum,
532{ 532{
533 if (!((*pskb)->ipvs_property)) 533 if (!((*pskb)->ipvs_property))
534 return NF_ACCEPT; 534 return NF_ACCEPT;
535
536 /* The packet was sent from IPVS, exit this chain */ 535 /* The packet was sent from IPVS, exit this chain */
537 (*okfn)(*pskb); 536 return NF_STOP;
538
539 return NF_STOLEN;
540} 537}
541 538
542u16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) 539u16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 9bdcf31b760e..7f0288b25fa1 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -23,6 +23,7 @@
23#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/init.h> 24#include <linux/init.h>
25#include <linux/types.h> 25#include <linux/types.h>
26#include <linux/capability.h>
26#include <linux/fs.h> 27#include <linux/fs.h>
27#include <linux/sysctl.h> 28#include <linux/sysctl.h>
28#include <linux/proc_fs.h> 29#include <linux/proc_fs.h>
@@ -35,6 +36,7 @@
35#include <linux/netfilter_ipv4.h> 36#include <linux/netfilter_ipv4.h>
36 37
37#include <net/ip.h> 38#include <net/ip.h>
39#include <net/route.h>
38#include <net/sock.h> 40#include <net/sock.h>
39 41
40#include <asm/uaccess.h> 42#include <asm/uaccess.h>
@@ -447,7 +449,7 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport)
447 out: 449 out:
448 read_unlock(&__ip_vs_svc_lock); 450 read_unlock(&__ip_vs_svc_lock);
449 451
450 IP_VS_DBG(6, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n", 452 IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
451 fwmark, ip_vs_proto_name(protocol), 453 fwmark, ip_vs_proto_name(protocol),
452 NIPQUAD(vaddr), ntohs(vport), 454 NIPQUAD(vaddr), ntohs(vport),
453 svc?"hit":"not hit"); 455 svc?"hit":"not hit");
@@ -597,7 +599,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
597 */ 599 */
598 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { 600 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
599 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, " 601 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
600 "refcnt=%d\n", 602 "dest->refcnt=%d\n",
601 dest->vfwmark, 603 dest->vfwmark,
602 NIPQUAD(dest->addr), ntohs(dest->port), 604 NIPQUAD(dest->addr), ntohs(dest->port),
603 atomic_read(&dest->refcnt)); 605 atomic_read(&dest->refcnt));
@@ -804,7 +806,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
804 dest = ip_vs_trash_get_dest(svc, daddr, dport); 806 dest = ip_vs_trash_get_dest(svc, daddr, dport);
805 if (dest != NULL) { 807 if (dest != NULL) {
806 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, " 808 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
807 "refcnt=%d, service %u/%u.%u.%u.%u:%u\n", 809 "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
808 NIPQUAD(daddr), ntohs(dport), 810 NIPQUAD(daddr), ntohs(dport),
809 atomic_read(&dest->refcnt), 811 atomic_read(&dest->refcnt),
810 dest->vfwmark, 812 dest->vfwmark,
@@ -949,7 +951,8 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
949 atomic_dec(&dest->svc->refcnt); 951 atomic_dec(&dest->svc->refcnt);
950 kfree(dest); 952 kfree(dest);
951 } else { 953 } else {
952 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, refcnt=%d\n", 954 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
955 "dest->refcnt=%d\n",
953 NIPQUAD(dest->addr), ntohs(dest->port), 956 NIPQUAD(dest->addr), ntohs(dest->port),
954 atomic_read(&dest->refcnt)); 957 atomic_read(&dest->refcnt));
955 list_add(&dest->n_list, &ip_vs_dest_trash); 958 list_add(&dest->n_list, &ip_vs_dest_trash);
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
index f3bc320dce93..9fee19c4c617 100644
--- a/net/ipv4/ipvs/ip_vs_dh.c
+++ b/net/ipv4/ipvs/ip_vs_dh.c
@@ -37,8 +37,10 @@
37 * 37 *
38 */ 38 */
39 39
40#include <linux/ip.h>
40#include <linux/module.h> 41#include <linux/module.h>
41#include <linux/kernel.h> 42#include <linux/kernel.h>
43#include <linux/skbuff.h>
42 44
43#include <net/ip_vs.h> 45#include <net/ip_vs.h>
44 46
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index 67b3e2fc1fa1..c453e1e57f4b 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -13,8 +13,12 @@
13 * Changes: 13 * Changes:
14 * 14 *
15 */ 15 */
16#include <linux/config.h>
16#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/jiffies.h>
19#include <linux/slab.h>
17#include <linux/types.h> 20#include <linux/types.h>
21#include <linux/interrupt.h>
18 22
19#include <net/ip_vs.h> 23#include <net/ip_vs.h>
20 24
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index 561cda326fa8..6e5cb92a5c83 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -41,8 +41,10 @@
41 * me to write this module. 41 * me to write this module.
42 */ 42 */
43 43
44#include <linux/ip.h>
44#include <linux/module.h> 45#include <linux/module.h>
45#include <linux/kernel.h> 46#include <linux/kernel.h>
47#include <linux/skbuff.h>
46 48
47/* for sysctl */ 49/* for sysctl */
48#include <linux/fs.h> 50#include <linux/fs.h>
@@ -228,33 +230,6 @@ ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
228} 230}
229 231
230 232
231#if 0000
232/*
233 * Unhash ip_vs_lblc_entry from ip_vs_lblc_table.
234 * returns bool success.
235 */
236static int ip_vs_lblc_unhash(struct ip_vs_lblc_table *tbl,
237 struct ip_vs_lblc_entry *en)
238{
239 if (list_empty(&en->list)) {
240 IP_VS_ERR("ip_vs_lblc_unhash(): request for not hashed entry, "
241 "called from %p\n", __builtin_return_address(0));
242 return 0;
243 }
244
245 /*
246 * Remove it from the table
247 */
248 write_lock(&tbl->lock);
249 list_del(&en->list);
250 INIT_LIST_HEAD(&en->list);
251 write_unlock(&tbl->lock);
252
253 return 1;
254}
255#endif
256
257
258/* 233/*
259 * Get ip_vs_lblc_entry associated with supplied parameters. 234 * Get ip_vs_lblc_entry associated with supplied parameters.
260 */ 235 */
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index ce456dbf09a5..32ba37ba72d8 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -39,8 +39,10 @@
39 * 39 *
40 */ 40 */
41 41
42#include <linux/ip.h>
42#include <linux/module.h> 43#include <linux/module.h>
43#include <linux/kernel.h> 44#include <linux/kernel.h>
45#include <linux/skbuff.h>
44 46
45/* for sysctl */ 47/* for sysctl */
46#include <linux/fs.h> 48#include <linux/fs.h>
@@ -414,33 +416,6 @@ ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
414} 416}
415 417
416 418
417#if 0000
418/*
419 * Unhash ip_vs_lblcr_entry from ip_vs_lblcr_table.
420 * returns bool success.
421 */
422static int ip_vs_lblcr_unhash(struct ip_vs_lblcr_table *tbl,
423 struct ip_vs_lblcr_entry *en)
424{
425 if (list_empty(&en->list)) {
426 IP_VS_ERR("ip_vs_lblcr_unhash(): request for not hashed entry, "
427 "called from %p\n", __builtin_return_address(0));
428 return 0;
429 }
430
431 /*
432 * Remove it from the table
433 */
434 write_lock(&tbl->lock);
435 list_del(&en->list);
436 INIT_LIST_HEAD(&en->list);
437 write_unlock(&tbl->lock);
438
439 return 1;
440}
441#endif
442
443
444/* 419/*
445 * Get ip_vs_lblcr_entry associated with supplied parameters. 420 * Get ip_vs_lblcr_entry associated with supplied parameters.
446 */ 421 */
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c
index 453e94a0bbd7..8b0505b09317 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah.c
@@ -12,6 +12,8 @@
12 * 12 *
13 */ 13 */
14 14
15#include <linux/in.h>
16#include <linux/ip.h>
15#include <linux/module.h> 17#include <linux/module.h>
16#include <linux/kernel.h> 18#include <linux/kernel.h>
17#include <linux/netfilter.h> 19#include <linux/netfilter.h>
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c
index 478e5c7c7e8e..c36ccf057a19 100644
--- a/net/ipv4/ipvs/ip_vs_proto_esp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_esp.c
@@ -12,6 +12,8 @@
12 * 12 *
13 */ 13 */
14 14
15#include <linux/in.h>
16#include <linux/ip.h>
15#include <linux/module.h> 17#include <linux/module.h>
16#include <linux/kernel.h> 18#include <linux/kernel.h>
17#include <linux/netfilter.h> 19#include <linux/netfilter.h>
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index 0e878fd6215c..bc28b1160a3a 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -275,28 +275,6 @@ static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
275 [IP_VS_TCP_S_LAST] = 2*HZ, 275 [IP_VS_TCP_S_LAST] = 2*HZ,
276}; 276};
277 277
278
279#if 0
280
281/* FIXME: This is going to die */
282
283static int tcp_timeouts_dos[IP_VS_TCP_S_LAST+1] = {
284 [IP_VS_TCP_S_NONE] = 2*HZ,
285 [IP_VS_TCP_S_ESTABLISHED] = 8*60*HZ,
286 [IP_VS_TCP_S_SYN_SENT] = 60*HZ,
287 [IP_VS_TCP_S_SYN_RECV] = 10*HZ,
288 [IP_VS_TCP_S_FIN_WAIT] = 60*HZ,
289 [IP_VS_TCP_S_TIME_WAIT] = 60*HZ,
290 [IP_VS_TCP_S_CLOSE] = 10*HZ,
291 [IP_VS_TCP_S_CLOSE_WAIT] = 60*HZ,
292 [IP_VS_TCP_S_LAST_ACK] = 30*HZ,
293 [IP_VS_TCP_S_LISTEN] = 2*60*HZ,
294 [IP_VS_TCP_S_SYNACK] = 100*HZ,
295 [IP_VS_TCP_S_LAST] = 2*HZ,
296};
297
298#endif
299
300static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = { 278static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
301 [IP_VS_TCP_S_NONE] = "NONE", 279 [IP_VS_TCP_S_NONE] = "NONE",
302 [IP_VS_TCP_S_ESTABLISHED] = "ESTABLISHED", 280 [IP_VS_TCP_S_ESTABLISHED] = "ESTABLISHED",
@@ -448,7 +426,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
448 struct ip_vs_dest *dest = cp->dest; 426 struct ip_vs_dest *dest = cp->dest;
449 427
450 IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->" 428 IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->"
451 "%u.%u.%u.%u:%d state: %s->%s cnt:%d\n", 429 "%u.%u.%u.%u:%d state: %s->%s conn->refcnt:%d\n",
452 pp->name, 430 pp->name,
453 (state_off==TCP_DIR_OUTPUT)?"output ":"input ", 431 (state_off==TCP_DIR_OUTPUT)?"output ":"input ",
454 th->syn? 'S' : '.', 432 th->syn? 'S' : '.',
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 8ae5f2e0aefa..89d9175d8f28 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -15,8 +15,11 @@
15 * 15 *
16 */ 16 */
17 17
18#include <linux/in.h>
19#include <linux/ip.h>
18#include <linux/kernel.h> 20#include <linux/kernel.h>
19#include <linux/netfilter_ipv4.h> 21#include <linux/netfilter_ipv4.h>
22#include <linux/udp.h>
20 23
21#include <net/ip_vs.h> 24#include <net/ip_vs.h>
22 25
diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/ipv4/ipvs/ip_vs_sched.c
index 0f7c56a225bd..8bc42b76223d 100644
--- a/net/ipv4/ipvs/ip_vs_sched.c
+++ b/net/ipv4/ipvs/ip_vs_sched.c
@@ -22,6 +22,7 @@
22#include <linux/module.h> 22#include <linux/module.h>
23#include <linux/sched.h> 23#include <linux/sched.h>
24#include <linux/spinlock.h> 24#include <linux/spinlock.h>
25#include <linux/interrupt.h>
25#include <asm/string.h> 26#include <asm/string.h>
26#include <linux/kmod.h> 27#include <linux/kmod.h>
27 28
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
index 6f7c50e44a39..7775e6cc68be 100644
--- a/net/ipv4/ipvs/ip_vs_sh.c
+++ b/net/ipv4/ipvs/ip_vs_sh.c
@@ -34,8 +34,10 @@
34 * 34 *
35 */ 35 */
36 36
37#include <linux/ip.h>
37#include <linux/module.h> 38#include <linux/module.h>
38#include <linux/kernel.h> 39#include <linux/kernel.h>
40#include <linux/skbuff.h>
39 41
40#include <net/ip_vs.h> 42#include <net/ip_vs.h>
41 43
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 2e5ced3d8062..1bca714bda3d 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -21,12 +21,14 @@
21 21
22#include <linux/module.h> 22#include <linux/module.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/inetdevice.h>
24#include <linux/net.h> 25#include <linux/net.h>
25#include <linux/completion.h> 26#include <linux/completion.h>
26#include <linux/delay.h> 27#include <linux/delay.h>
27#include <linux/skbuff.h> 28#include <linux/skbuff.h>
28#include <linux/in.h> 29#include <linux/in.h>
29#include <linux/igmp.h> /* for ip_mc_join_group */ 30#include <linux/igmp.h> /* for ip_mc_join_group */
31#include <linux/udp.h>
30 32
31#include <net/ip.h> 33#include <net/ip.h>
32#include <net/sock.h> 34#include <net/sock.h>
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 3b87482049cf..52c12e9edbbc 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -322,7 +322,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
322 struct net_device *tdev; /* Device to other host */ 322 struct net_device *tdev; /* Device to other host */
323 struct iphdr *old_iph = skb->nh.iph; 323 struct iphdr *old_iph = skb->nh.iph;
324 u8 tos = old_iph->tos; 324 u8 tos = old_iph->tos;
325 u16 df = old_iph->frag_off; 325 __be16 df = old_iph->frag_off;
326 struct iphdr *iph; /* Our new IP header */ 326 struct iphdr *iph; /* Our new IP header */
327 int max_headroom; /* The extra header space needed */ 327 int max_headroom; /* The extra header space needed */
328 int mtu; 328 int mtu;
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index ae0779d82c5d..52a3d7c57907 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -1,17 +1,11 @@
1/* IPv4 specific functions of netfilter core */ 1/* IPv4 specific functions of netfilter core */
2
3#include <linux/config.h>
4#ifdef CONFIG_NETFILTER
5
6#include <linux/kernel.h> 2#include <linux/kernel.h>
7#include <linux/netfilter.h> 3#include <linux/netfilter.h>
8#include <linux/netfilter_ipv4.h> 4#include <linux/netfilter_ipv4.h>
9
10#include <linux/tcp.h>
11#include <linux/udp.h>
12#include <linux/icmp.h>
13#include <net/route.h>
14#include <linux/ip.h> 5#include <linux/ip.h>
6#include <net/route.h>
7#include <net/xfrm.h>
8#include <net/ip.h>
15 9
16/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ 10/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
17int ip_route_me_harder(struct sk_buff **pskb) 11int ip_route_me_harder(struct sk_buff **pskb)
@@ -33,7 +27,6 @@ int ip_route_me_harder(struct sk_buff **pskb)
33#ifdef CONFIG_IP_ROUTE_FWMARK 27#ifdef CONFIG_IP_ROUTE_FWMARK
34 fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark; 28 fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
35#endif 29#endif
36 fl.proto = iph->protocol;
37 if (ip_route_output_key(&rt, &fl) != 0) 30 if (ip_route_output_key(&rt, &fl) != 0)
38 return -1; 31 return -1;
39 32
@@ -60,6 +53,13 @@ int ip_route_me_harder(struct sk_buff **pskb)
60 if ((*pskb)->dst->error) 53 if ((*pskb)->dst->error)
61 return -1; 54 return -1;
62 55
56#ifdef CONFIG_XFRM
57 if (!(IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) &&
58 xfrm_decode_session(*pskb, &fl, AF_INET) == 0)
59 if (xfrm_lookup(&(*pskb)->dst, &fl, (*pskb)->sk, 0))
60 return -1;
61#endif
62
63 /* Change in oif may mean change in hh_len. */ 63 /* Change in oif may mean change in hh_len. */
64 hh_len = (*pskb)->dst->dev->hard_header_len; 64 hh_len = (*pskb)->dst->dev->hard_header_len;
65 if (skb_headroom(*pskb) < hh_len) { 65 if (skb_headroom(*pskb) < hh_len) {
@@ -78,6 +78,9 @@ int ip_route_me_harder(struct sk_buff **pskb)
78} 78}
79EXPORT_SYMBOL(ip_route_me_harder); 79EXPORT_SYMBOL(ip_route_me_harder);
80 80
81void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *);
82EXPORT_SYMBOL(ip_nat_decode_session);
83
81/* 84/*
82 * Extra routing may needed on local out, as the QUEUE target never 85 * Extra routing may needed on local out, as the QUEUE target never
83 * returns control to the table. 86 * returns control to the table.
@@ -135,5 +138,3 @@ static void fini(void)
135 138
136module_init(init); 139module_init(init);
137module_exit(fini); 140module_exit(fini);
138
139#endif /* CONFIG_NETFILTER */
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 88a60650e6b8..a9893ec03e02 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -487,6 +487,16 @@ config IP_NF_MATCH_STRING
487 487
488 To compile it as a module, choose M here. If unsure, say N. 488 To compile it as a module, choose M here. If unsure, say N.
489 489
490config IP_NF_MATCH_POLICY
491 tristate "IPsec policy match support"
492 depends on IP_NF_IPTABLES && XFRM
493 help
494 Policy matching allows you to match packets based on the
495 IPsec policy that was used during decapsulation/will
496 be used during encapsulation.
497
498 To compile it as a module, choose M here. If unsure, say N.
499
490# `filter', generic and specific targets 500# `filter', generic and specific targets
491config IP_NF_FILTER 501config IP_NF_FILTER
492 tristate "Packet filtering" 502 tristate "Packet filtering"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index d0a447e520a2..549b01a648b3 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -72,6 +72,7 @@ obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o
72obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o 72obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o
73obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o 73obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
74obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o 74obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o
75obj-$(CONFIG_IP_NF_MATCH_POLICY) += ipt_policy.o
75obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o 76obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o
76obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o 77obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o
77 78
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3c2e9639bba6..b6d5284c8020 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -13,6 +13,7 @@
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <linux/netdevice.h> 15#include <linux/netdevice.h>
16#include <linux/capability.h>
16#include <linux/if_arp.h> 17#include <linux/if_arp.h>
17#include <linux/kmod.h> 18#include <linux/kmod.h>
18#include <linux/vmalloc.h> 19#include <linux/vmalloc.h>
@@ -68,19 +69,14 @@ struct arpt_table_info {
68 unsigned int initial_entries; 69 unsigned int initial_entries;
69 unsigned int hook_entry[NF_ARP_NUMHOOKS]; 70 unsigned int hook_entry[NF_ARP_NUMHOOKS];
70 unsigned int underflow[NF_ARP_NUMHOOKS]; 71 unsigned int underflow[NF_ARP_NUMHOOKS];
71 char entries[0] __attribute__((aligned(SMP_CACHE_BYTES))); 72 void *entries[NR_CPUS];
72}; 73};
73 74
74static LIST_HEAD(arpt_target); 75static LIST_HEAD(arpt_target);
75static LIST_HEAD(arpt_tables); 76static LIST_HEAD(arpt_tables);
77#define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
76#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) 78#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
77 79
78#ifdef CONFIG_SMP
79#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
80#else
81#define TABLE_OFFSET(t,p) 0
82#endif
83
84static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, 80static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
85 char *hdr_addr, int len) 81 char *hdr_addr, int len)
86{ 82{
@@ -269,9 +265,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
269 outdev = out ? out->name : nulldevname; 265 outdev = out ? out->name : nulldevname;
270 266
271 read_lock_bh(&table->lock); 267 read_lock_bh(&table->lock);
272 table_base = (void *)table->private->entries 268 table_base = (void *)table->private->entries[smp_processor_id()];
273 + TABLE_OFFSET(table->private,
274 smp_processor_id());
275 e = get_entry(table_base, table->private->hook_entry[hook]); 269 e = get_entry(table_base, table->private->hook_entry[hook]);
276 back = get_entry(table_base, table->private->underflow[hook]); 270 back = get_entry(table_base, table->private->underflow[hook]);
277 271
@@ -462,7 +456,8 @@ static inline int unconditional(const struct arpt_arp *arp)
462/* Figures out from what hook each rule can be called: returns 0 if 456/* Figures out from what hook each rule can be called: returns 0 if
463 * there are loops. Puts hook bitmask in comefrom. 457 * there are loops. Puts hook bitmask in comefrom.
464 */ 458 */
465static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int valid_hooks) 459static int mark_source_chains(struct arpt_table_info *newinfo,
460 unsigned int valid_hooks, void *entry0)
466{ 461{
467 unsigned int hook; 462 unsigned int hook;
468 463
@@ -472,7 +467,7 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali
472 for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) { 467 for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) {
473 unsigned int pos = newinfo->hook_entry[hook]; 468 unsigned int pos = newinfo->hook_entry[hook];
474 struct arpt_entry *e 469 struct arpt_entry *e
475 = (struct arpt_entry *)(newinfo->entries + pos); 470 = (struct arpt_entry *)(entry0 + pos);
476 471
477 if (!(valid_hooks & (1 << hook))) 472 if (!(valid_hooks & (1 << hook)))
478 continue; 473 continue;
@@ -514,13 +509,13 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali
514 goto next; 509 goto next;
515 510
516 e = (struct arpt_entry *) 511 e = (struct arpt_entry *)
517 (newinfo->entries + pos); 512 (entry0 + pos);
518 } while (oldpos == pos + e->next_offset); 513 } while (oldpos == pos + e->next_offset);
519 514
520 /* Move along one */ 515 /* Move along one */
521 size = e->next_offset; 516 size = e->next_offset;
522 e = (struct arpt_entry *) 517 e = (struct arpt_entry *)
523 (newinfo->entries + pos + size); 518 (entry0 + pos + size);
524 e->counters.pcnt = pos; 519 e->counters.pcnt = pos;
525 pos += size; 520 pos += size;
526 } else { 521 } else {
@@ -537,7 +532,7 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali
537 newpos = pos + e->next_offset; 532 newpos = pos + e->next_offset;
538 } 533 }
539 e = (struct arpt_entry *) 534 e = (struct arpt_entry *)
540 (newinfo->entries + newpos); 535 (entry0 + newpos);
541 e->counters.pcnt = pos; 536 e->counters.pcnt = pos;
542 pos = newpos; 537 pos = newpos;
543 } 538 }
@@ -689,6 +684,7 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
689static int translate_table(const char *name, 684static int translate_table(const char *name,
690 unsigned int valid_hooks, 685 unsigned int valid_hooks,
691 struct arpt_table_info *newinfo, 686 struct arpt_table_info *newinfo,
687 void *entry0,
692 unsigned int size, 688 unsigned int size,
693 unsigned int number, 689 unsigned int number,
694 const unsigned int *hook_entries, 690 const unsigned int *hook_entries,
@@ -710,11 +706,11 @@ static int translate_table(const char *name,
710 i = 0; 706 i = 0;
711 707
712 /* Walk through entries, checking offsets. */ 708 /* Walk through entries, checking offsets. */
713 ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, 709 ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
714 check_entry_size_and_hooks, 710 check_entry_size_and_hooks,
715 newinfo, 711 newinfo,
716 newinfo->entries, 712 entry0,
717 newinfo->entries + size, 713 entry0 + size,
718 hook_entries, underflows, &i); 714 hook_entries, underflows, &i);
719 duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); 715 duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
720 if (ret != 0) 716 if (ret != 0)
@@ -743,29 +739,26 @@ static int translate_table(const char *name,
743 } 739 }
744 } 740 }
745 741
746 if (!mark_source_chains(newinfo, valid_hooks)) { 742 if (!mark_source_chains(newinfo, valid_hooks, entry0)) {
747 duprintf("Looping hook\n"); 743 duprintf("Looping hook\n");
748 return -ELOOP; 744 return -ELOOP;
749 } 745 }
750 746
751 /* Finally, each sanity check must pass */ 747 /* Finally, each sanity check must pass */
752 i = 0; 748 i = 0;
753 ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, 749 ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
754 check_entry, name, size, &i); 750 check_entry, name, size, &i);
755 751
756 if (ret != 0) { 752 if (ret != 0) {
757 ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, 753 ARPT_ENTRY_ITERATE(entry0, newinfo->size,
758 cleanup_entry, &i); 754 cleanup_entry, &i);
759 return ret; 755 return ret;
760 } 756 }
761 757
762 /* And one copy for every other CPU */ 758 /* And one copy for every other CPU */
763 for_each_cpu(i) { 759 for_each_cpu(i) {
764 if (i == 0) 760 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
765 continue; 761 memcpy(newinfo->entries[i], entry0, newinfo->size);
766 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
767 newinfo->entries,
768 SMP_ALIGN(newinfo->size));
769 } 762 }
770 763
771 return ret; 764 return ret;
@@ -807,15 +800,42 @@ static inline int add_entry_to_counter(const struct arpt_entry *e,
807 return 0; 800 return 0;
808} 801}
809 802
803static inline int set_entry_to_counter(const struct arpt_entry *e,
804 struct arpt_counters total[],
805 unsigned int *i)
806{
807 SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
808
809 (*i)++;
810 return 0;
811}
812
810static void get_counters(const struct arpt_table_info *t, 813static void get_counters(const struct arpt_table_info *t,
811 struct arpt_counters counters[]) 814 struct arpt_counters counters[])
812{ 815{
813 unsigned int cpu; 816 unsigned int cpu;
814 unsigned int i; 817 unsigned int i;
818 unsigned int curcpu;
819
820 /* Instead of clearing (by a previous call to memset())
821 * the counters and using adds, we set the counters
822 * with data used by 'current' CPU
823 * We dont care about preemption here.
824 */
825 curcpu = raw_smp_processor_id();
826
827 i = 0;
828 ARPT_ENTRY_ITERATE(t->entries[curcpu],
829 t->size,
830 set_entry_to_counter,
831 counters,
832 &i);
815 833
816 for_each_cpu(cpu) { 834 for_each_cpu(cpu) {
835 if (cpu == curcpu)
836 continue;
817 i = 0; 837 i = 0;
818 ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), 838 ARPT_ENTRY_ITERATE(t->entries[cpu],
819 t->size, 839 t->size,
820 add_entry_to_counter, 840 add_entry_to_counter,
821 counters, 841 counters,
@@ -831,6 +851,7 @@ static int copy_entries_to_user(unsigned int total_size,
831 struct arpt_entry *e; 851 struct arpt_entry *e;
832 struct arpt_counters *counters; 852 struct arpt_counters *counters;
833 int ret = 0; 853 int ret = 0;
854 void *loc_cpu_entry;
834 855
835 /* We need atomic snapshot of counters: rest doesn't change 856 /* We need atomic snapshot of counters: rest doesn't change
836 * (other than comefrom, which userspace doesn't care 857 * (other than comefrom, which userspace doesn't care
@@ -843,13 +864,13 @@ static int copy_entries_to_user(unsigned int total_size,
843 return -ENOMEM; 864 return -ENOMEM;
844 865
845 /* First, sum counters... */ 866 /* First, sum counters... */
846 memset(counters, 0, countersize);
847 write_lock_bh(&table->lock); 867 write_lock_bh(&table->lock);
848 get_counters(table->private, counters); 868 get_counters(table->private, counters);
849 write_unlock_bh(&table->lock); 869 write_unlock_bh(&table->lock);
850 870
851 /* ... then copy entire thing from CPU 0... */ 871 loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
852 if (copy_to_user(userptr, table->private->entries, total_size) != 0) { 872 /* ... then copy entire thing ... */
873 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
853 ret = -EFAULT; 874 ret = -EFAULT;
854 goto free_counters; 875 goto free_counters;
855 } 876 }
@@ -859,7 +880,7 @@ static int copy_entries_to_user(unsigned int total_size,
859 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ 880 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
860 struct arpt_entry_target *t; 881 struct arpt_entry_target *t;
861 882
862 e = (struct arpt_entry *)(table->private->entries + off); 883 e = (struct arpt_entry *)(loc_cpu_entry + off);
863 if (copy_to_user(userptr + off 884 if (copy_to_user(userptr + off
864 + offsetof(struct arpt_entry, counters), 885 + offsetof(struct arpt_entry, counters),
865 &counters[num], 886 &counters[num],
@@ -911,6 +932,47 @@ static int get_entries(const struct arpt_get_entries *entries,
911 return ret; 932 return ret;
912} 933}
913 934
935static void free_table_info(struct arpt_table_info *info)
936{
937 int cpu;
938 for_each_cpu(cpu) {
939 if (info->size <= PAGE_SIZE)
940 kfree(info->entries[cpu]);
941 else
942 vfree(info->entries[cpu]);
943 }
944 kfree(info);
945}
946
947static struct arpt_table_info *alloc_table_info(unsigned int size)
948{
949 struct arpt_table_info *newinfo;
950 int cpu;
951
952 newinfo = kzalloc(sizeof(struct arpt_table_info), GFP_KERNEL);
953 if (!newinfo)
954 return NULL;
955
956 newinfo->size = size;
957
958 for_each_cpu(cpu) {
959 if (size <= PAGE_SIZE)
960 newinfo->entries[cpu] = kmalloc_node(size,
961 GFP_KERNEL,
962 cpu_to_node(cpu));
963 else
964 newinfo->entries[cpu] = vmalloc_node(size,
965 cpu_to_node(cpu));
966
967 if (newinfo->entries[cpu] == NULL) {
968 free_table_info(newinfo);
969 return NULL;
970 }
971 }
972
973 return newinfo;
974}
975
914static int do_replace(void __user *user, unsigned int len) 976static int do_replace(void __user *user, unsigned int len)
915{ 977{
916 int ret; 978 int ret;
@@ -918,6 +980,7 @@ static int do_replace(void __user *user, unsigned int len)
918 struct arpt_table *t; 980 struct arpt_table *t;
919 struct arpt_table_info *newinfo, *oldinfo; 981 struct arpt_table_info *newinfo, *oldinfo;
920 struct arpt_counters *counters; 982 struct arpt_counters *counters;
983 void *loc_cpu_entry, *loc_cpu_old_entry;
921 984
922 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 985 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
923 return -EFAULT; 986 return -EFAULT;
@@ -930,13 +993,13 @@ static int do_replace(void __user *user, unsigned int len)
930 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages) 993 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
931 return -ENOMEM; 994 return -ENOMEM;
932 995
933 newinfo = vmalloc(sizeof(struct arpt_table_info) 996 newinfo = alloc_table_info(tmp.size);
934 + SMP_ALIGN(tmp.size) *
935 (highest_possible_processor_id()+1));
936 if (!newinfo) 997 if (!newinfo)
937 return -ENOMEM; 998 return -ENOMEM;
938 999
939 if (copy_from_user(newinfo->entries, user + sizeof(tmp), 1000 /* choose the copy that is on our node/cpu */
1001 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1002 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
940 tmp.size) != 0) { 1003 tmp.size) != 0) {
941 ret = -EFAULT; 1004 ret = -EFAULT;
942 goto free_newinfo; 1005 goto free_newinfo;
@@ -947,10 +1010,9 @@ static int do_replace(void __user *user, unsigned int len)
947 ret = -ENOMEM; 1010 ret = -ENOMEM;
948 goto free_newinfo; 1011 goto free_newinfo;
949 } 1012 }
950 memset(counters, 0, tmp.num_counters * sizeof(struct arpt_counters));
951 1013
952 ret = translate_table(tmp.name, tmp.valid_hooks, 1014 ret = translate_table(tmp.name, tmp.valid_hooks,
953 newinfo, tmp.size, tmp.num_entries, 1015 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
954 tmp.hook_entry, tmp.underflow); 1016 tmp.hook_entry, tmp.underflow);
955 if (ret != 0) 1017 if (ret != 0)
956 goto free_newinfo_counters; 1018 goto free_newinfo_counters;
@@ -989,8 +1051,10 @@ static int do_replace(void __user *user, unsigned int len)
989 /* Get the old counters. */ 1051 /* Get the old counters. */
990 get_counters(oldinfo, counters); 1052 get_counters(oldinfo, counters);
991 /* Decrease module usage counts and free resource */ 1053 /* Decrease module usage counts and free resource */
992 ARPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL); 1054 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
993 vfree(oldinfo); 1055 ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
1056
1057 free_table_info(oldinfo);
994 if (copy_to_user(tmp.counters, counters, 1058 if (copy_to_user(tmp.counters, counters,
995 sizeof(struct arpt_counters) * tmp.num_counters) != 0) 1059 sizeof(struct arpt_counters) * tmp.num_counters) != 0)
996 ret = -EFAULT; 1060 ret = -EFAULT;
@@ -1002,11 +1066,11 @@ static int do_replace(void __user *user, unsigned int len)
1002 module_put(t->me); 1066 module_put(t->me);
1003 up(&arpt_mutex); 1067 up(&arpt_mutex);
1004 free_newinfo_counters_untrans: 1068 free_newinfo_counters_untrans:
1005 ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry, NULL); 1069 ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1006 free_newinfo_counters: 1070 free_newinfo_counters:
1007 vfree(counters); 1071 vfree(counters);
1008 free_newinfo: 1072 free_newinfo:
1009 vfree(newinfo); 1073 free_table_info(newinfo);
1010 return ret; 1074 return ret;
1011} 1075}
1012 1076
@@ -1030,6 +1094,7 @@ static int do_add_counters(void __user *user, unsigned int len)
1030 struct arpt_counters_info tmp, *paddc; 1094 struct arpt_counters_info tmp, *paddc;
1031 struct arpt_table *t; 1095 struct arpt_table *t;
1032 int ret = 0; 1096 int ret = 0;
1097 void *loc_cpu_entry;
1033 1098
1034 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1099 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1035 return -EFAULT; 1100 return -EFAULT;
@@ -1059,7 +1124,9 @@ static int do_add_counters(void __user *user, unsigned int len)
1059 } 1124 }
1060 1125
1061 i = 0; 1126 i = 0;
1062 ARPT_ENTRY_ITERATE(t->private->entries, 1127 /* Choose the copy that is on our node */
1128 loc_cpu_entry = t->private->entries[smp_processor_id()];
1129 ARPT_ENTRY_ITERATE(loc_cpu_entry,
1063 t->private->size, 1130 t->private->size,
1064 add_counter_to_entry, 1131 add_counter_to_entry,
1065 paddc->counters, 1132 paddc->counters,
@@ -1220,30 +1287,32 @@ int arpt_register_table(struct arpt_table *table,
1220 struct arpt_table_info *newinfo; 1287 struct arpt_table_info *newinfo;
1221 static struct arpt_table_info bootstrap 1288 static struct arpt_table_info bootstrap
1222 = { 0, 0, 0, { 0 }, { 0 }, { } }; 1289 = { 0, 0, 0, { 0 }, { 0 }, { } };
1290 void *loc_cpu_entry;
1223 1291
1224 newinfo = vmalloc(sizeof(struct arpt_table_info) 1292 newinfo = alloc_table_info(repl->size);
1225 + SMP_ALIGN(repl->size) *
1226 (highest_possible_processor_id()+1));
1227 if (!newinfo) { 1293 if (!newinfo) {
1228 ret = -ENOMEM; 1294 ret = -ENOMEM;
1229 return ret; 1295 return ret;
1230 } 1296 }
1231 memcpy(newinfo->entries, repl->entries, repl->size); 1297
1298 /* choose the copy on our node/cpu */
1299 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1300 memcpy(loc_cpu_entry, repl->entries, repl->size);
1232 1301
1233 ret = translate_table(table->name, table->valid_hooks, 1302 ret = translate_table(table->name, table->valid_hooks,
1234 newinfo, repl->size, 1303 newinfo, loc_cpu_entry, repl->size,
1235 repl->num_entries, 1304 repl->num_entries,
1236 repl->hook_entry, 1305 repl->hook_entry,
1237 repl->underflow); 1306 repl->underflow);
1238 duprintf("arpt_register_table: translate table gives %d\n", ret); 1307 duprintf("arpt_register_table: translate table gives %d\n", ret);
1239 if (ret != 0) { 1308 if (ret != 0) {
1240 vfree(newinfo); 1309 free_table_info(newinfo);
1241 return ret; 1310 return ret;
1242 } 1311 }
1243 1312
1244 ret = down_interruptible(&arpt_mutex); 1313 ret = down_interruptible(&arpt_mutex);
1245 if (ret != 0) { 1314 if (ret != 0) {
1246 vfree(newinfo); 1315 free_table_info(newinfo);
1247 return ret; 1316 return ret;
1248 } 1317 }
1249 1318
@@ -1272,20 +1341,23 @@ int arpt_register_table(struct arpt_table *table,
1272 return ret; 1341 return ret;
1273 1342
1274 free_unlock: 1343 free_unlock:
1275 vfree(newinfo); 1344 free_table_info(newinfo);
1276 goto unlock; 1345 goto unlock;
1277} 1346}
1278 1347
1279void arpt_unregister_table(struct arpt_table *table) 1348void arpt_unregister_table(struct arpt_table *table)
1280{ 1349{
1350 void *loc_cpu_entry;
1351
1281 down(&arpt_mutex); 1352 down(&arpt_mutex);
1282 LIST_DELETE(&arpt_tables, table); 1353 LIST_DELETE(&arpt_tables, table);
1283 up(&arpt_mutex); 1354 up(&arpt_mutex);
1284 1355
1285 /* Decrease module usage counts and free resources */ 1356 /* Decrease module usage counts and free resources */
1286 ARPT_ENTRY_ITERATE(table->private->entries, table->private->size, 1357 loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
1358 ARPT_ENTRY_ITERATE(loc_cpu_entry, table->private->size,
1287 cleanup_entry, NULL); 1359 cleanup_entry, NULL);
1288 vfree(table->private); 1360 free_table_info(table->private);
1289} 1361}
1290 1362
1291/* The built-in targets: standard (NULL) and error. */ 1363/* The built-in targets: standard (NULL) and error. */
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
index e52847fa10f5..84e4f79b7ffa 100644
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -18,11 +18,13 @@
18 * 18 *
19 */ 19 */
20 20
21#include <linux/in.h>
21#include <linux/kernel.h> 22#include <linux/kernel.h>
22#include <linux/module.h> 23#include <linux/module.h>
23#include <linux/netfilter.h> 24#include <linux/netfilter.h>
24#include <linux/ip.h> 25#include <linux/ip.h>
25#include <linux/moduleparam.h> 26#include <linux/moduleparam.h>
27#include <linux/udp.h>
26#include <net/checksum.h> 28#include <net/checksum.h>
27#include <net/udp.h> 29#include <net/udp.h>
28 30
@@ -34,7 +36,7 @@ static unsigned int master_timeout = 300;
34MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>"); 36MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
35MODULE_DESCRIPTION("Amanda connection tracking module"); 37MODULE_DESCRIPTION("Amanda connection tracking module");
36MODULE_LICENSE("GPL"); 38MODULE_LICENSE("GPL");
37module_param(master_timeout, int, 0600); 39module_param(master_timeout, uint, 0600);
38MODULE_PARM_DESC(master_timeout, "timeout for the master connection"); 40MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
39 41
40static const char *conns[] = { "DATA ", "MESG ", "INDEX " }; 42static const char *conns[] = { "DATA ", "MESG ", "INDEX " };
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index 68b173bcda60..e627e5856172 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -34,7 +34,7 @@ static int ports_c;
34module_param_array(ports, ushort, &ports_c, 0400); 34module_param_array(ports, ushort, &ports_c, 0400);
35 35
36static int loose; 36static int loose;
37module_param(loose, int, 0600); 37module_param(loose, bool, 0600);
38 38
39unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb, 39unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
40 enum ip_conntrack_info ctinfo, 40 enum ip_conntrack_info ctinfo,
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 4108a5e12b3c..d716bba798f2 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -762,7 +762,7 @@ static struct ip_conntrack_helper pptp = {
762 .help = conntrack_pptp_help 762 .help = conntrack_pptp_help
763}; 763};
764 764
765extern void __exit ip_ct_proto_gre_fini(void); 765extern void ip_ct_proto_gre_fini(void);
766extern int __init ip_ct_proto_gre_init(void); 766extern int __init ip_ct_proto_gre_init(void);
767 767
768/* ip_conntrack_pptp initialization */ 768/* ip_conntrack_pptp initialization */
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
index d7c40421d0d1..c51a2cf71b4b 100644
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ b/net/ipv4/netfilter/ip_conntrack_irc.c
@@ -36,7 +36,7 @@
36#define MAX_PORTS 8 36#define MAX_PORTS 8
37static unsigned short ports[MAX_PORTS]; 37static unsigned short ports[MAX_PORTS];
38static int ports_c; 38static int ports_c;
39static int max_dcc_channels = 8; 39static unsigned int max_dcc_channels = 8;
40static unsigned int dcc_timeout = 300; 40static unsigned int dcc_timeout = 300;
41/* This is slow, but it's simple. --RR */ 41/* This is slow, but it's simple. --RR */
42static char *irc_buffer; 42static char *irc_buffer;
@@ -54,9 +54,9 @@ MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
54MODULE_LICENSE("GPL"); 54MODULE_LICENSE("GPL");
55module_param_array(ports, ushort, &ports_c, 0400); 55module_param_array(ports, ushort, &ports_c, 0400);
56MODULE_PARM_DESC(ports, "port numbers of IRC servers"); 56MODULE_PARM_DESC(ports, "port numbers of IRC servers");
57module_param(max_dcc_channels, int, 0400); 57module_param(max_dcc_channels, uint, 0400);
58MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session"); 58MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session");
59module_param(dcc_timeout, int, 0400); 59module_param(dcc_timeout, uint, 0400);
60MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels"); 60MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels");
61 61
62static const char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " }; 62static const char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " };
@@ -254,10 +254,6 @@ static int __init init(void)
254 printk("ip_conntrack_irc: max_dcc_channels must be a positive integer\n"); 254 printk("ip_conntrack_irc: max_dcc_channels must be a positive integer\n");
255 return -EBUSY; 255 return -EBUSY;
256 } 256 }
257 if (dcc_timeout < 0) {
258 printk("ip_conntrack_irc: dcc_timeout must be a positive integer\n");
259 return -EBUSY;
260 }
261 257
262 irc_buffer = kmalloc(65536, GFP_KERNEL); 258 irc_buffer = kmalloc(65536, GFP_KERNEL);
263 if (!irc_buffer) 259 if (!irc_buffer)
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
index 186646eb249f..4e68e16a2612 100644
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
@@ -37,7 +37,7 @@ MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper");
37MODULE_LICENSE("GPL"); 37MODULE_LICENSE("GPL");
38 38
39static unsigned int timeout = 3; 39static unsigned int timeout = 3;
40module_param(timeout, int, 0600); 40module_param(timeout, uint, 0400);
41MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); 41MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
42 42
43static int help(struct sk_buff **pskb, 43static int help(struct sk_buff **pskb,
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 91fe8f2e38ff..c9ebbe0d2d9c 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -79,6 +79,7 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
79 const struct ip_conntrack_tuple *tuple) 79 const struct ip_conntrack_tuple *tuple)
80{ 80{
81 struct nfattr *nest_parms; 81 struct nfattr *nest_parms;
82 int ret;
82 83
83 nest_parms = NFA_NEST(skb, CTA_TUPLE_IP); 84 nest_parms = NFA_NEST(skb, CTA_TUPLE_IP);
84 NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), &tuple->src.ip); 85 NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), &tuple->src.ip);
@@ -86,10 +87,10 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
86 NFA_NEST_END(skb, nest_parms); 87 NFA_NEST_END(skb, nest_parms);
87 88
88 nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO); 89 nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO);
89 ctnetlink_dump_tuples_proto(skb, tuple); 90 ret = ctnetlink_dump_tuples_proto(skb, tuple);
90 NFA_NEST_END(skb, nest_parms); 91 NFA_NEST_END(skb, nest_parms);
91 92
92 return 0; 93 return ret;
93 94
94nfattr_failure: 95nfattr_failure:
95 return -1; 96 return -1;
@@ -160,7 +161,7 @@ ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
160 return 0; 161 return 0;
161 162
162 nest_helper = NFA_NEST(skb, CTA_HELP); 163 nest_helper = NFA_NEST(skb, CTA_HELP);
163 NFA_PUT(skb, CTA_HELP_NAME, CTA_HELP_MAXNAMESIZE, &ct->helper->name); 164 NFA_PUT(skb, CTA_HELP_NAME, strlen(ct->helper->name), ct->helper->name);
164 165
165 if (ct->helper->to_nfattr) 166 if (ct->helper->to_nfattr)
166 ct->helper->to_nfattr(skb, ct); 167 ct->helper->to_nfattr(skb, ct);
@@ -229,7 +230,7 @@ nfattr_failure:
229static inline int 230static inline int
230ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct) 231ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct)
231{ 232{
232 unsigned int use = htonl(atomic_read(&ct->ct_general.use)); 233 u_int32_t use = htonl(atomic_read(&ct->ct_general.use));
233 234
234 NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use); 235 NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use);
235 return 0; 236 return 0;
@@ -311,29 +312,22 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
311 if (events & IPCT_DESTROY) { 312 if (events & IPCT_DESTROY) {
312 type = IPCTNL_MSG_CT_DELETE; 313 type = IPCTNL_MSG_CT_DELETE;
313 group = NFNLGRP_CONNTRACK_DESTROY; 314 group = NFNLGRP_CONNTRACK_DESTROY;
314 goto alloc_skb; 315 } else if (events & (IPCT_NEW | IPCT_RELATED)) {
315 }
316 if (events & (IPCT_NEW | IPCT_RELATED)) {
317 type = IPCTNL_MSG_CT_NEW; 316 type = IPCTNL_MSG_CT_NEW;
318 flags = NLM_F_CREATE|NLM_F_EXCL; 317 flags = NLM_F_CREATE|NLM_F_EXCL;
319 /* dump everything */ 318 /* dump everything */
320 events = ~0UL; 319 events = ~0UL;
321 group = NFNLGRP_CONNTRACK_NEW; 320 group = NFNLGRP_CONNTRACK_NEW;
322 goto alloc_skb; 321 } else if (events & (IPCT_STATUS |
323 }
324 if (events & (IPCT_STATUS |
325 IPCT_PROTOINFO | 322 IPCT_PROTOINFO |
326 IPCT_HELPER | 323 IPCT_HELPER |
327 IPCT_HELPINFO | 324 IPCT_HELPINFO |
328 IPCT_NATINFO)) { 325 IPCT_NATINFO)) {
329 type = IPCTNL_MSG_CT_NEW; 326 type = IPCTNL_MSG_CT_NEW;
330 group = NFNLGRP_CONNTRACK_UPDATE; 327 group = NFNLGRP_CONNTRACK_UPDATE;
331 goto alloc_skb; 328 } else
332 } 329 return NOTIFY_DONE;
333 330
334 return NOTIFY_DONE;
335
336alloc_skb:
337 /* FIXME: Check if there are any listeners before, don't hurt performance */ 331 /* FIXME: Check if there are any listeners before, don't hurt performance */
338 332
339 skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); 333 skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
@@ -1037,6 +1031,11 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
1037 return err; 1031 return err;
1038 } 1032 }
1039 1033
1034#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
1035 if (cda[CTA_MARK-1])
1036 ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
1037#endif
1038
1040 ct->helper = ip_conntrack_helper_find_get(rtuple); 1039 ct->helper = ip_conntrack_helper_find_get(rtuple);
1041 1040
1042 add_timer(&ct->timeout); 1041 add_timer(&ct->timeout);
@@ -1045,11 +1044,6 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
1045 if (ct->helper) 1044 if (ct->helper)
1046 ip_conntrack_helper_put(ct->helper); 1045 ip_conntrack_helper_put(ct->helper);
1047 1046
1048#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
1049 if (cda[CTA_MARK-1])
1050 ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
1051#endif
1052
1053 DEBUGP("conntrack with id %u inserted\n", ct->id); 1047 DEBUGP("conntrack with id %u inserted\n", ct->id);
1054 return 0; 1048 return 0;
1055 1049
@@ -1209,7 +1203,6 @@ static int ctnetlink_expect_event(struct notifier_block *this,
1209 unsigned int type; 1203 unsigned int type;
1210 unsigned char *b; 1204 unsigned char *b;
1211 int flags = 0; 1205 int flags = 0;
1212 u16 proto;
1213 1206
1214 if (events & IPEXP_NEW) { 1207 if (events & IPEXP_NEW) {
1215 type = IPCTNL_MSG_EXP_NEW; 1208 type = IPCTNL_MSG_EXP_NEW;
@@ -1236,7 +1229,6 @@ static int ctnetlink_expect_event(struct notifier_block *this,
1236 goto nfattr_failure; 1229 goto nfattr_failure;
1237 1230
1238 nlh->nlmsg_len = skb->tail - b; 1231 nlh->nlmsg_len = skb->tail - b;
1239 proto = exp->tuple.dst.protonum;
1240 nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0); 1232 nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0);
1241 return NOTIFY_DONE; 1233 return NOTIFY_DONE;
1242 1234
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
index 88c3712bd251..f891308b5e4c 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
@@ -12,7 +12,7 @@
12#include <linux/netfilter.h> 12#include <linux/netfilter.h>
13#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 13#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
14 14
15unsigned long ip_ct_generic_timeout = 600*HZ; 15unsigned int ip_ct_generic_timeout = 600*HZ;
16 16
17static int generic_pkt_to_tuple(const struct sk_buff *skb, 17static int generic_pkt_to_tuple(const struct sk_buff *skb,
18 unsigned int dataoff, 18 unsigned int dataoff,
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
index 744abb9d377a..c777abf16cb7 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -31,6 +31,7 @@
31#include <linux/ip.h> 31#include <linux/ip.h>
32#include <linux/in.h> 32#include <linux/in.h>
33#include <linux/list.h> 33#include <linux/list.h>
34#include <linux/seq_file.h>
34 35
35static DEFINE_RWLOCK(ip_ct_gre_lock); 36static DEFINE_RWLOCK(ip_ct_gre_lock);
36#define ASSERT_READ_LOCK(x) 37#define ASSERT_READ_LOCK(x)
@@ -308,7 +309,10 @@ int __init ip_ct_proto_gre_init(void)
308 return ip_conntrack_protocol_register(&gre); 309 return ip_conntrack_protocol_register(&gre);
309} 310}
310 311
311void __exit ip_ct_proto_gre_fini(void) 312/* This cannot be __exit, as it is invoked from ip_conntrack_helper_pptp.c's
313 * init() code on errors.
314 */
315void ip_ct_proto_gre_fini(void)
312{ 316{
313 struct list_head *pos, *n; 317 struct list_head *pos, *n;
314 318
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 5f9925db608e..3021af0910f1 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -16,13 +16,12 @@
16#include <linux/skbuff.h> 16#include <linux/skbuff.h>
17#include <net/ip.h> 17#include <net/ip.h>
18#include <net/checksum.h> 18#include <net/checksum.h>
19#include <linux/netfilter.h>
20#include <linux/netfilter_ipv4.h> 19#include <linux/netfilter_ipv4.h>
21#include <linux/netfilter_ipv4/ip_conntrack.h> 20#include <linux/netfilter_ipv4/ip_conntrack.h>
22#include <linux/netfilter_ipv4/ip_conntrack_core.h> 21#include <linux/netfilter_ipv4/ip_conntrack_core.h>
23#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 22#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
24 23
25unsigned long ip_ct_icmp_timeout = 30*HZ; 24unsigned int ip_ct_icmp_timeout = 30*HZ;
26 25
27#if 0 26#if 0
28#define DEBUGP printk 27#define DEBUGP printk
@@ -47,20 +46,21 @@ static int icmp_pkt_to_tuple(const struct sk_buff *skb,
47 return 1; 46 return 1;
48} 47}
49 48
49/* Add 1; spaces filled with 0. */
50static const u_int8_t invmap[] = {
51 [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
52 [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
53 [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
54 [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
55 [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
56 [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
57 [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
58 [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
59};
60
50static int icmp_invert_tuple(struct ip_conntrack_tuple *tuple, 61static int icmp_invert_tuple(struct ip_conntrack_tuple *tuple,
51 const struct ip_conntrack_tuple *orig) 62 const struct ip_conntrack_tuple *orig)
52{ 63{
53 /* Add 1; spaces filled with 0. */
54 static const u_int8_t invmap[]
55 = { [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
56 [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
57 [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
58 [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
59 [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
60 [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
61 [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
62 [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1};
63
64 if (orig->dst.u.icmp.type >= sizeof(invmap) 64 if (orig->dst.u.icmp.type >= sizeof(invmap)
65 || !invmap[orig->dst.u.icmp.type]) 65 || !invmap[orig->dst.u.icmp.type])
66 return 0; 66 return 0;
@@ -110,17 +110,17 @@ static int icmp_packet(struct ip_conntrack *ct,
110 return NF_ACCEPT; 110 return NF_ACCEPT;
111} 111}
112 112
113static const u_int8_t valid_new[] = {
114 [ICMP_ECHO] = 1,
115 [ICMP_TIMESTAMP] = 1,
116 [ICMP_INFO_REQUEST] = 1,
117 [ICMP_ADDRESS] = 1
118};
119
120/* Called when a new connection for this protocol found. */ 113/* Called when a new connection for this protocol found. */
121static int icmp_new(struct ip_conntrack *conntrack, 114static int icmp_new(struct ip_conntrack *conntrack,
122 const struct sk_buff *skb) 115 const struct sk_buff *skb)
123{ 116{
117 static const u_int8_t valid_new[] = {
118 [ICMP_ECHO] = 1,
119 [ICMP_TIMESTAMP] = 1,
120 [ICMP_INFO_REQUEST] = 1,
121 [ICMP_ADDRESS] = 1
122 };
123
124 if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) 124 if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
125 || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { 125 || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
126 /* Can't create a new ICMP `conn' with this. */ 126 /* Can't create a new ICMP `conn' with this. */
@@ -279,10 +279,6 @@ static int icmp_tuple_to_nfattr(struct sk_buff *skb,
279 NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t), 279 NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
280 &t->dst.u.icmp.code); 280 &t->dst.u.icmp.code);
281 281
282 if (t->dst.u.icmp.type >= sizeof(valid_new)
283 || !valid_new[t->dst.u.icmp.type])
284 return -EINVAL;
285
286 return 0; 282 return 0;
287 283
288nfattr_failure: 284nfattr_failure:
@@ -295,7 +291,7 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[],
295 if (!tb[CTA_PROTO_ICMP_TYPE-1] 291 if (!tb[CTA_PROTO_ICMP_TYPE-1]
296 || !tb[CTA_PROTO_ICMP_CODE-1] 292 || !tb[CTA_PROTO_ICMP_CODE-1]
297 || !tb[CTA_PROTO_ICMP_ID-1]) 293 || !tb[CTA_PROTO_ICMP_ID-1])
298 return -1; 294 return -EINVAL;
299 295
300 tuple->dst.u.icmp.type = 296 tuple->dst.u.icmp.type =
301 *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]); 297 *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]);
@@ -304,6 +300,10 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[],
304 tuple->src.u.icmp.id = 300 tuple->src.u.icmp.id =
305 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]); 301 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
306 302
303 if (tuple->dst.u.icmp.type >= sizeof(invmap)
304 || !invmap[tuple->dst.u.icmp.type])
305 return -EINVAL;
306
307 return 0; 307 return 0;
308} 308}
309#endif 309#endif
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index 977fb59d4563..be602e8aeab0 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -16,6 +16,7 @@
16#include <linux/types.h> 16#include <linux/types.h>
17#include <linux/sched.h> 17#include <linux/sched.h>
18#include <linux/timer.h> 18#include <linux/timer.h>
19#include <linux/interrupt.h>
19#include <linux/netfilter.h> 20#include <linux/netfilter.h>
20#include <linux/module.h> 21#include <linux/module.h>
21#include <linux/in.h> 22#include <linux/in.h>
@@ -57,15 +58,15 @@ static const char *sctp_conntrack_names[] = {
57#define HOURS * 60 MINS 58#define HOURS * 60 MINS
58#define DAYS * 24 HOURS 59#define DAYS * 24 HOURS
59 60
60static unsigned long ip_ct_sctp_timeout_closed = 10 SECS; 61static unsigned int ip_ct_sctp_timeout_closed = 10 SECS;
61static unsigned long ip_ct_sctp_timeout_cookie_wait = 3 SECS; 62static unsigned int ip_ct_sctp_timeout_cookie_wait = 3 SECS;
62static unsigned long ip_ct_sctp_timeout_cookie_echoed = 3 SECS; 63static unsigned int ip_ct_sctp_timeout_cookie_echoed = 3 SECS;
63static unsigned long ip_ct_sctp_timeout_established = 5 DAYS; 64static unsigned int ip_ct_sctp_timeout_established = 5 DAYS;
64static unsigned long ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; 65static unsigned int ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000;
65static unsigned long ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; 66static unsigned int ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000;
66static unsigned long ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; 67static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS;
67 68
68static const unsigned long * sctp_timeouts[] 69static const unsigned int * sctp_timeouts[]
69= { NULL, /* SCTP_CONNTRACK_NONE */ 70= { NULL, /* SCTP_CONNTRACK_NONE */
70 &ip_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */ 71 &ip_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */
71 &ip_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */ 72 &ip_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index e7fa29e576dc..e0dc37063545 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -32,7 +32,6 @@
32 32
33#include <net/tcp.h> 33#include <net/tcp.h>
34 34
35#include <linux/netfilter.h>
36#include <linux/netfilter_ipv4.h> 35#include <linux/netfilter_ipv4.h>
37#include <linux/netfilter_ipv4/ip_conntrack.h> 36#include <linux/netfilter_ipv4/ip_conntrack.h>
38#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 37#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
@@ -85,21 +84,21 @@ static const char *tcp_conntrack_names[] = {
85#define HOURS * 60 MINS 84#define HOURS * 60 MINS
86#define DAYS * 24 HOURS 85#define DAYS * 24 HOURS
87 86
88unsigned long ip_ct_tcp_timeout_syn_sent = 2 MINS; 87unsigned int ip_ct_tcp_timeout_syn_sent = 2 MINS;
89unsigned long ip_ct_tcp_timeout_syn_recv = 60 SECS; 88unsigned int ip_ct_tcp_timeout_syn_recv = 60 SECS;
90unsigned long ip_ct_tcp_timeout_established = 5 DAYS; 89unsigned int ip_ct_tcp_timeout_established = 5 DAYS;
91unsigned long ip_ct_tcp_timeout_fin_wait = 2 MINS; 90unsigned int ip_ct_tcp_timeout_fin_wait = 2 MINS;
92unsigned long ip_ct_tcp_timeout_close_wait = 60 SECS; 91unsigned int ip_ct_tcp_timeout_close_wait = 60 SECS;
93unsigned long ip_ct_tcp_timeout_last_ack = 30 SECS; 92unsigned int ip_ct_tcp_timeout_last_ack = 30 SECS;
94unsigned long ip_ct_tcp_timeout_time_wait = 2 MINS; 93unsigned int ip_ct_tcp_timeout_time_wait = 2 MINS;
95unsigned long ip_ct_tcp_timeout_close = 10 SECS; 94unsigned int ip_ct_tcp_timeout_close = 10 SECS;
96 95
97/* RFC1122 says the R2 limit should be at least 100 seconds. 96/* RFC1122 says the R2 limit should be at least 100 seconds.
98 Linux uses 15 packets as limit, which corresponds 97 Linux uses 15 packets as limit, which corresponds
99 to ~13-30min depending on RTO. */ 98 to ~13-30min depending on RTO. */
100unsigned long ip_ct_tcp_timeout_max_retrans = 5 MINS; 99unsigned int ip_ct_tcp_timeout_max_retrans = 5 MINS;
101 100
102static const unsigned long * tcp_timeouts[] 101static const unsigned int * tcp_timeouts[]
103= { NULL, /* TCP_CONNTRACK_NONE */ 102= { NULL, /* TCP_CONNTRACK_NONE */
104 &ip_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */ 103 &ip_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */
105 &ip_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */ 104 &ip_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */
@@ -995,7 +994,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
995 || (!test_bit(IPS_ASSURED_BIT, &conntrack->status) 994 || (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
996 && conntrack->proto.tcp.last_index == TCP_ACK_SET)) 995 && conntrack->proto.tcp.last_index == TCP_ACK_SET))
997 && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) { 996 && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
998 /* RST sent to invalid SYN or ACK we had let trough 997 /* RST sent to invalid SYN or ACK we had let through
999 * at a) and c) above: 998 * at a) and c) above:
1000 * 999 *
1001 * a) SYN was in window then 1000 * a) SYN was in window then
@@ -1006,7 +1005,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
1006 * segments we ignored. */ 1005 * segments we ignored. */
1007 goto in_window; 1006 goto in_window;
1008 } 1007 }
1009 /* Just fall trough */ 1008 /* Just fall through */
1010 default: 1009 default:
1011 /* Keep compilers happy. */ 1010 /* Keep compilers happy. */
1012 break; 1011 break;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index f2dcac7c7660..55b7d3210adf 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -11,15 +11,15 @@
11#include <linux/timer.h> 11#include <linux/timer.h>
12#include <linux/netfilter.h> 12#include <linux/netfilter.h>
13#include <linux/in.h> 13#include <linux/in.h>
14#include <linux/ip.h>
14#include <linux/udp.h> 15#include <linux/udp.h>
15#include <linux/seq_file.h> 16#include <linux/seq_file.h>
16#include <net/checksum.h> 17#include <net/checksum.h>
17#include <linux/netfilter.h>
18#include <linux/netfilter_ipv4.h> 18#include <linux/netfilter_ipv4.h>
19#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 19#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
20 20
21unsigned long ip_ct_udp_timeout = 30*HZ; 21unsigned int ip_ct_udp_timeout = 30*HZ;
22unsigned long ip_ct_udp_timeout_stream = 180*HZ; 22unsigned int ip_ct_udp_timeout_stream = 180*HZ;
23 23
24static int udp_pkt_to_tuple(const struct sk_buff *skb, 24static int udp_pkt_to_tuple(const struct sk_buff *skb,
25 unsigned int dataoff, 25 unsigned int dataoff,
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index dd476b191f4b..9dec1293f67a 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -27,6 +27,7 @@
27#endif 27#endif
28#include <net/checksum.h> 28#include <net/checksum.h>
29#include <net/ip.h> 29#include <net/ip.h>
30#include <net/route.h>
30 31
31#define ASSERT_READ_LOCK(x) 32#define ASSERT_READ_LOCK(x)
32#define ASSERT_WRITE_LOCK(x) 33#define ASSERT_WRITE_LOCK(x)
@@ -450,30 +451,6 @@ static unsigned int ip_conntrack_defrag(unsigned int hooknum,
450 return NF_ACCEPT; 451 return NF_ACCEPT;
451} 452}
452 453
453static unsigned int ip_refrag(unsigned int hooknum,
454 struct sk_buff **pskb,
455 const struct net_device *in,
456 const struct net_device *out,
457 int (*okfn)(struct sk_buff *))
458{
459 struct rtable *rt = (struct rtable *)(*pskb)->dst;
460
461 /* We've seen it coming out the other side: confirm */
462 if (ip_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
463 return NF_DROP;
464
465 /* Local packets are never produced too large for their
466 interface. We degfragment them at LOCAL_OUT, however,
467 so we have to refragment them here. */
468 if ((*pskb)->len > dst_mtu(&rt->u.dst) &&
469 !skb_shinfo(*pskb)->tso_size) {
470 /* No hook can be after us, so this should be OK. */
471 ip_fragment(*pskb, okfn);
472 return NF_STOLEN;
473 }
474 return NF_ACCEPT;
475}
476
477static unsigned int ip_conntrack_local(unsigned int hooknum, 454static unsigned int ip_conntrack_local(unsigned int hooknum,
478 struct sk_buff **pskb, 455 struct sk_buff **pskb,
479 const struct net_device *in, 456 const struct net_device *in,
@@ -543,7 +520,7 @@ static struct nf_hook_ops ip_conntrack_helper_in_ops = {
543 520
544/* Refragmenter; last chance. */ 521/* Refragmenter; last chance. */
545static struct nf_hook_ops ip_conntrack_out_ops = { 522static struct nf_hook_ops ip_conntrack_out_ops = {
546 .hook = ip_refrag, 523 .hook = ip_confirm,
547 .owner = THIS_MODULE, 524 .owner = THIS_MODULE,
548 .pf = PF_INET, 525 .pf = PF_INET,
549 .hooknum = NF_IP_POST_ROUTING, 526 .hooknum = NF_IP_POST_ROUTING,
@@ -567,28 +544,28 @@ extern int ip_conntrack_max;
567extern unsigned int ip_conntrack_htable_size; 544extern unsigned int ip_conntrack_htable_size;
568 545
569/* From ip_conntrack_proto_tcp.c */ 546/* From ip_conntrack_proto_tcp.c */
570extern unsigned long ip_ct_tcp_timeout_syn_sent; 547extern unsigned int ip_ct_tcp_timeout_syn_sent;
571extern unsigned long ip_ct_tcp_timeout_syn_recv; 548extern unsigned int ip_ct_tcp_timeout_syn_recv;
572extern unsigned long ip_ct_tcp_timeout_established; 549extern unsigned int ip_ct_tcp_timeout_established;
573extern unsigned long ip_ct_tcp_timeout_fin_wait; 550extern unsigned int ip_ct_tcp_timeout_fin_wait;
574extern unsigned long ip_ct_tcp_timeout_close_wait; 551extern unsigned int ip_ct_tcp_timeout_close_wait;
575extern unsigned long ip_ct_tcp_timeout_last_ack; 552extern unsigned int ip_ct_tcp_timeout_last_ack;
576extern unsigned long ip_ct_tcp_timeout_time_wait; 553extern unsigned int ip_ct_tcp_timeout_time_wait;
577extern unsigned long ip_ct_tcp_timeout_close; 554extern unsigned int ip_ct_tcp_timeout_close;
578extern unsigned long ip_ct_tcp_timeout_max_retrans; 555extern unsigned int ip_ct_tcp_timeout_max_retrans;
579extern int ip_ct_tcp_loose; 556extern int ip_ct_tcp_loose;
580extern int ip_ct_tcp_be_liberal; 557extern int ip_ct_tcp_be_liberal;
581extern int ip_ct_tcp_max_retrans; 558extern int ip_ct_tcp_max_retrans;
582 559
583/* From ip_conntrack_proto_udp.c */ 560/* From ip_conntrack_proto_udp.c */
584extern unsigned long ip_ct_udp_timeout; 561extern unsigned int ip_ct_udp_timeout;
585extern unsigned long ip_ct_udp_timeout_stream; 562extern unsigned int ip_ct_udp_timeout_stream;
586 563
587/* From ip_conntrack_proto_icmp.c */ 564/* From ip_conntrack_proto_icmp.c */
588extern unsigned long ip_ct_icmp_timeout; 565extern unsigned int ip_ct_icmp_timeout;
589 566
590/* From ip_conntrack_proto_icmp.c */ 567/* From ip_conntrack_proto_icmp.c */
591extern unsigned long ip_ct_generic_timeout; 568extern unsigned int ip_ct_generic_timeout;
592 569
593/* Log invalid packets of a given protocol */ 570/* Log invalid packets of a given protocol */
594static int log_invalid_proto_min = 0; 571static int log_invalid_proto_min = 0;
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
index d83757a70d9f..b8daab3c64af 100644
--- a/net/ipv4/netfilter/ip_nat_ftp.c
+++ b/net/ipv4/netfilter/ip_nat_ftp.c
@@ -171,7 +171,7 @@ static int __init init(void)
171/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ 171/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
172static int warn_set(const char *val, struct kernel_param *kp) 172static int warn_set(const char *val, struct kernel_param *kp)
173{ 173{
174 printk(KERN_INFO __stringify(KBUILD_MODNAME) 174 printk(KERN_INFO KBUILD_MODNAME
175 ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); 175 ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
176 return 0; 176 return 0;
177} 177}
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
index e546203f5662..ac004895781a 100644
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -148,14 +148,14 @@ pptp_outbound_pkt(struct sk_buff **pskb,
148{ 148{
149 struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; 149 struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
150 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; 150 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
151 151 u_int16_t msg, new_callid;
152 u_int16_t msg, *cid = NULL, new_callid; 152 unsigned int cid_off;
153 153
154 new_callid = htons(ct_pptp_info->pns_call_id); 154 new_callid = htons(ct_pptp_info->pns_call_id);
155 155
156 switch (msg = ntohs(ctlh->messageType)) { 156 switch (msg = ntohs(ctlh->messageType)) {
157 case PPTP_OUT_CALL_REQUEST: 157 case PPTP_OUT_CALL_REQUEST:
158 cid = &pptpReq->ocreq.callID; 158 cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
159 /* FIXME: ideally we would want to reserve a call ID 159 /* FIXME: ideally we would want to reserve a call ID
160 * here. current netfilter NAT core is not able to do 160 * here. current netfilter NAT core is not able to do
161 * this :( For now we use TCP source port. This breaks 161 * this :( For now we use TCP source port. This breaks
@@ -172,10 +172,10 @@ pptp_outbound_pkt(struct sk_buff **pskb,
172 ct_pptp_info->pns_call_id = ntohs(new_callid); 172 ct_pptp_info->pns_call_id = ntohs(new_callid);
173 break; 173 break;
174 case PPTP_IN_CALL_REPLY: 174 case PPTP_IN_CALL_REPLY:
175 cid = &pptpReq->icreq.callID; 175 cid_off = offsetof(union pptp_ctrl_union, icreq.callID);
176 break; 176 break;
177 case PPTP_CALL_CLEAR_REQUEST: 177 case PPTP_CALL_CLEAR_REQUEST:
178 cid = &pptpReq->clrreq.callID; 178 cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
179 break; 179 break;
180 default: 180 default:
181 DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, 181 DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
@@ -197,18 +197,15 @@ pptp_outbound_pkt(struct sk_buff **pskb,
197 197
198 /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass 198 /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
199 * down to here */ 199 * down to here */
200
201 IP_NF_ASSERT(cid);
202
203 DEBUGP("altering call id from 0x%04x to 0x%04x\n", 200 DEBUGP("altering call id from 0x%04x to 0x%04x\n",
204 ntohs(*cid), ntohs(new_callid)); 201 ntohs(*(u_int16_t *)pptpReq + cid_off), ntohs(new_callid));
205 202
206 /* mangle packet */ 203 /* mangle packet */
207 if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, 204 if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
208 (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), 205 cid_off + sizeof(struct pptp_pkt_hdr) +
209 sizeof(new_callid), 206 sizeof(struct PptpControlHeader),
210 (char *)&new_callid, 207 sizeof(new_callid), (char *)&new_callid,
211 sizeof(new_callid)) == 0) 208 sizeof(new_callid)) == 0)
212 return NF_DROP; 209 return NF_DROP;
213 210
214 return NF_ACCEPT; 211 return NF_ACCEPT;
@@ -299,31 +296,30 @@ pptp_inbound_pkt(struct sk_buff **pskb,
299 union pptp_ctrl_union *pptpReq) 296 union pptp_ctrl_union *pptpReq)
300{ 297{
301 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; 298 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
302 u_int16_t msg, new_cid = 0, new_pcid, *pcid = NULL, *cid = NULL; 299 u_int16_t msg, new_cid = 0, new_pcid;
303 300 unsigned int pcid_off, cid_off = 0;
304 int ret = NF_ACCEPT, rv;
305 301
306 new_pcid = htons(nat_pptp_info->pns_call_id); 302 new_pcid = htons(nat_pptp_info->pns_call_id);
307 303
308 switch (msg = ntohs(ctlh->messageType)) { 304 switch (msg = ntohs(ctlh->messageType)) {
309 case PPTP_OUT_CALL_REPLY: 305 case PPTP_OUT_CALL_REPLY:
310 pcid = &pptpReq->ocack.peersCallID; 306 pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID);
311 cid = &pptpReq->ocack.callID; 307 cid_off = offsetof(union pptp_ctrl_union, ocack.callID);
312 break; 308 break;
313 case PPTP_IN_CALL_CONNECT: 309 case PPTP_IN_CALL_CONNECT:
314 pcid = &pptpReq->iccon.peersCallID; 310 pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID);
315 break; 311 break;
316 case PPTP_IN_CALL_REQUEST: 312 case PPTP_IN_CALL_REQUEST:
317 /* only need to nat in case PAC is behind NAT box */ 313 /* only need to nat in case PAC is behind NAT box */
318 break; 314 return NF_ACCEPT;
319 case PPTP_WAN_ERROR_NOTIFY: 315 case PPTP_WAN_ERROR_NOTIFY:
320 pcid = &pptpReq->wanerr.peersCallID; 316 pcid_off = offsetof(union pptp_ctrl_union, wanerr.peersCallID);
321 break; 317 break;
322 case PPTP_CALL_DISCONNECT_NOTIFY: 318 case PPTP_CALL_DISCONNECT_NOTIFY:
323 pcid = &pptpReq->disc.callID; 319 pcid_off = offsetof(union pptp_ctrl_union, disc.callID);
324 break; 320 break;
325 case PPTP_SET_LINK_INFO: 321 case PPTP_SET_LINK_INFO:
326 pcid = &pptpReq->setlink.peersCallID; 322 pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID);
327 break; 323 break;
328 324
329 default: 325 default:
@@ -345,35 +341,26 @@ pptp_inbound_pkt(struct sk_buff **pskb,
345 * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */ 341 * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */
346 342
347 /* mangle packet */ 343 /* mangle packet */
348 IP_NF_ASSERT(pcid);
349 DEBUGP("altering peer call id from 0x%04x to 0x%04x\n", 344 DEBUGP("altering peer call id from 0x%04x to 0x%04x\n",
350 ntohs(*pcid), ntohs(new_pcid)); 345 ntohs(*(u_int16_t *)pptpReq + pcid_off), ntohs(new_pcid));
351 346
352 rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, 347 if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
353 (void *)pcid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), 348 pcid_off + sizeof(struct pptp_pkt_hdr) +
354 sizeof(new_pcid), (char *)&new_pcid, 349 sizeof(struct PptpControlHeader),
355 sizeof(new_pcid)); 350 sizeof(new_pcid), (char *)&new_pcid,
356 if (rv != NF_ACCEPT) 351 sizeof(new_pcid)) == 0)
357 return rv; 352 return NF_DROP;
358 353
359 if (new_cid) { 354 if (new_cid) {
360 IP_NF_ASSERT(cid);
361 DEBUGP("altering call id from 0x%04x to 0x%04x\n", 355 DEBUGP("altering call id from 0x%04x to 0x%04x\n",
362 ntohs(*cid), ntohs(new_cid)); 356 ntohs(*(u_int16_t *)pptpReq + cid_off), ntohs(new_cid));
363 rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, 357 if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
364 (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), 358 cid_off + sizeof(struct pptp_pkt_hdr) +
365 sizeof(new_cid), 359 sizeof(struct PptpControlHeader),
366 (char *)&new_cid, 360 sizeof(new_cid), (char *)&new_cid,
367 sizeof(new_cid)); 361 sizeof(new_cid)) == 0)
368 if (rv != NF_ACCEPT) 362 return NF_DROP;
369 return rv;
370 } 363 }
371
372 /* check for earlier return value of 'switch' above */
373 if (ret != NF_ACCEPT)
374 return ret;
375
376 /* great, at least we don't need to resize packets */
377 return NF_ACCEPT; 364 return NF_ACCEPT;
378} 365}
379 366
diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c
index de31942babe3..461c833eaca1 100644
--- a/net/ipv4/netfilter/ip_nat_irc.c
+++ b/net/ipv4/netfilter/ip_nat_irc.c
@@ -113,7 +113,7 @@ static int __init init(void)
113/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ 113/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
114static int warn_set(const char *val, struct kernel_param *kp) 114static int warn_set(const char *val, struct kernel_param *kp)
115{ 115{
116 printk(KERN_INFO __stringify(KBUILD_MODNAME) 116 printk(KERN_INFO KBUILD_MODNAME
117 ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); 117 ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
118 return 0; 118 return 0;
119} 119}
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
index f7cad7cf1aec..6c4899d8046a 100644
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -151,42 +151,6 @@ gre_manip_pkt(struct sk_buff **pskb,
151 return 1; 151 return 1;
152} 152}
153 153
154/* print out a nat tuple */
155static unsigned int
156gre_print(char *buffer,
157 const struct ip_conntrack_tuple *match,
158 const struct ip_conntrack_tuple *mask)
159{
160 unsigned int len = 0;
161
162 if (mask->src.u.gre.key)
163 len += sprintf(buffer + len, "srckey=0x%x ",
164 ntohl(match->src.u.gre.key));
165
166 if (mask->dst.u.gre.key)
167 len += sprintf(buffer + len, "dstkey=0x%x ",
168 ntohl(match->src.u.gre.key));
169
170 return len;
171}
172
173/* print a range of keys */
174static unsigned int
175gre_print_range(char *buffer, const struct ip_nat_range *range)
176{
177 if (range->min.gre.key != 0
178 || range->max.gre.key != 0xFFFF) {
179 if (range->min.gre.key == range->max.gre.key)
180 return sprintf(buffer, "key 0x%x ",
181 ntohl(range->min.gre.key));
182 else
183 return sprintf(buffer, "keys 0x%u-0x%u ",
184 ntohl(range->min.gre.key),
185 ntohl(range->max.gre.key));
186 } else
187 return 0;
188}
189
190/* nat helper struct */ 154/* nat helper struct */
191static struct ip_nat_protocol gre = { 155static struct ip_nat_protocol gre = {
192 .name = "GRE", 156 .name = "GRE",
@@ -194,8 +158,6 @@ static struct ip_nat_protocol gre = {
194 .manip_pkt = gre_manip_pkt, 158 .manip_pkt = gre_manip_pkt,
195 .in_range = gre_in_range, 159 .in_range = gre_in_range,
196 .unique_tuple = gre_unique_tuple, 160 .unique_tuple = gre_unique_tuple,
197 .print = gre_print,
198 .print_range = gre_print_range,
199#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ 161#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
200 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) 162 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
201 .range_to_nfattr = ip_nat_port_range_to_nfattr, 163 .range_to_nfattr = ip_nat_port_range_to_nfattr,
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
index 938719043999..31a3f4ccb99c 100644
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c
@@ -74,38 +74,6 @@ icmp_manip_pkt(struct sk_buff **pskb,
74 return 1; 74 return 1;
75} 75}
76 76
77static unsigned int
78icmp_print(char *buffer,
79 const struct ip_conntrack_tuple *match,
80 const struct ip_conntrack_tuple *mask)
81{
82 unsigned int len = 0;
83
84 if (mask->src.u.icmp.id)
85 len += sprintf(buffer + len, "id=%u ",
86 ntohs(match->src.u.icmp.id));
87
88 if (mask->dst.u.icmp.type)
89 len += sprintf(buffer + len, "type=%u ",
90 ntohs(match->dst.u.icmp.type));
91
92 if (mask->dst.u.icmp.code)
93 len += sprintf(buffer + len, "code=%u ",
94 ntohs(match->dst.u.icmp.code));
95
96 return len;
97}
98
99static unsigned int
100icmp_print_range(char *buffer, const struct ip_nat_range *range)
101{
102 if (range->min.icmp.id != 0 || range->max.icmp.id != 0xFFFF)
103 return sprintf(buffer, "id %u-%u ",
104 ntohs(range->min.icmp.id),
105 ntohs(range->max.icmp.id));
106 else return 0;
107}
108
109struct ip_nat_protocol ip_nat_protocol_icmp = { 77struct ip_nat_protocol ip_nat_protocol_icmp = {
110 .name = "ICMP", 78 .name = "ICMP",
111 .protonum = IPPROTO_ICMP, 79 .protonum = IPPROTO_ICMP,
@@ -113,8 +81,6 @@ struct ip_nat_protocol ip_nat_protocol_icmp = {
113 .manip_pkt = icmp_manip_pkt, 81 .manip_pkt = icmp_manip_pkt,
114 .in_range = icmp_in_range, 82 .in_range = icmp_in_range,
115 .unique_tuple = icmp_unique_tuple, 83 .unique_tuple = icmp_unique_tuple,
116 .print = icmp_print,
117 .print_range = icmp_print_range,
118#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ 84#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
119 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) 85 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
120 .range_to_nfattr = ip_nat_port_range_to_nfattr, 86 .range_to_nfattr = ip_nat_port_range_to_nfattr,
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
index 1d381bf68574..a3d14079eba6 100644
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c
@@ -136,40 +136,6 @@ tcp_manip_pkt(struct sk_buff **pskb,
136 return 1; 136 return 1;
137} 137}
138 138
139static unsigned int
140tcp_print(char *buffer,
141 const struct ip_conntrack_tuple *match,
142 const struct ip_conntrack_tuple *mask)
143{
144 unsigned int len = 0;
145
146 if (mask->src.u.tcp.port)
147 len += sprintf(buffer + len, "srcpt=%u ",
148 ntohs(match->src.u.tcp.port));
149
150
151 if (mask->dst.u.tcp.port)
152 len += sprintf(buffer + len, "dstpt=%u ",
153 ntohs(match->dst.u.tcp.port));
154
155 return len;
156}
157
158static unsigned int
159tcp_print_range(char *buffer, const struct ip_nat_range *range)
160{
161 if (range->min.tcp.port != 0 || range->max.tcp.port != 0xFFFF) {
162 if (range->min.tcp.port == range->max.tcp.port)
163 return sprintf(buffer, "port %u ",
164 ntohs(range->min.tcp.port));
165 else
166 return sprintf(buffer, "ports %u-%u ",
167 ntohs(range->min.tcp.port),
168 ntohs(range->max.tcp.port));
169 }
170 else return 0;
171}
172
173struct ip_nat_protocol ip_nat_protocol_tcp = { 139struct ip_nat_protocol ip_nat_protocol_tcp = {
174 .name = "TCP", 140 .name = "TCP",
175 .protonum = IPPROTO_TCP, 141 .protonum = IPPROTO_TCP,
@@ -177,8 +143,6 @@ struct ip_nat_protocol ip_nat_protocol_tcp = {
177 .manip_pkt = tcp_manip_pkt, 143 .manip_pkt = tcp_manip_pkt,
178 .in_range = tcp_in_range, 144 .in_range = tcp_in_range,
179 .unique_tuple = tcp_unique_tuple, 145 .unique_tuple = tcp_unique_tuple,
180 .print = tcp_print,
181 .print_range = tcp_print_range,
182#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ 146#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
183 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) 147 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
184 .range_to_nfattr = ip_nat_port_range_to_nfattr, 148 .range_to_nfattr = ip_nat_port_range_to_nfattr,
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
index c4906e1aa24a..ec6053fdc867 100644
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_udp.c
@@ -122,40 +122,6 @@ udp_manip_pkt(struct sk_buff **pskb,
122 return 1; 122 return 1;
123} 123}
124 124
125static unsigned int
126udp_print(char *buffer,
127 const struct ip_conntrack_tuple *match,
128 const struct ip_conntrack_tuple *mask)
129{
130 unsigned int len = 0;
131
132 if (mask->src.u.udp.port)
133 len += sprintf(buffer + len, "srcpt=%u ",
134 ntohs(match->src.u.udp.port));
135
136
137 if (mask->dst.u.udp.port)
138 len += sprintf(buffer + len, "dstpt=%u ",
139 ntohs(match->dst.u.udp.port));
140
141 return len;
142}
143
144static unsigned int
145udp_print_range(char *buffer, const struct ip_nat_range *range)
146{
147 if (range->min.udp.port != 0 || range->max.udp.port != 0xFFFF) {
148 if (range->min.udp.port == range->max.udp.port)
149 return sprintf(buffer, "port %u ",
150 ntohs(range->min.udp.port));
151 else
152 return sprintf(buffer, "ports %u-%u ",
153 ntohs(range->min.udp.port),
154 ntohs(range->max.udp.port));
155 }
156 else return 0;
157}
158
159struct ip_nat_protocol ip_nat_protocol_udp = { 125struct ip_nat_protocol ip_nat_protocol_udp = {
160 .name = "UDP", 126 .name = "UDP",
161 .protonum = IPPROTO_UDP, 127 .protonum = IPPROTO_UDP,
@@ -163,8 +129,6 @@ struct ip_nat_protocol ip_nat_protocol_udp = {
163 .manip_pkt = udp_manip_pkt, 129 .manip_pkt = udp_manip_pkt,
164 .in_range = udp_in_range, 130 .in_range = udp_in_range,
165 .unique_tuple = udp_unique_tuple, 131 .unique_tuple = udp_unique_tuple,
166 .print = udp_print,
167 .print_range = udp_print_range,
168#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ 132#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
169 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) 133 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
170 .range_to_nfattr = ip_nat_port_range_to_nfattr, 134 .range_to_nfattr = ip_nat_port_range_to_nfattr,
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c
index f0099a646a0b..3bf049517246 100644
--- a/net/ipv4/netfilter/ip_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c
@@ -46,26 +46,10 @@ unknown_manip_pkt(struct sk_buff **pskb,
46 return 1; 46 return 1;
47} 47}
48 48
49static unsigned int
50unknown_print(char *buffer,
51 const struct ip_conntrack_tuple *match,
52 const struct ip_conntrack_tuple *mask)
53{
54 return 0;
55}
56
57static unsigned int
58unknown_print_range(char *buffer, const struct ip_nat_range *range)
59{
60 return 0;
61}
62
63struct ip_nat_protocol ip_nat_unknown_protocol = { 49struct ip_nat_protocol ip_nat_unknown_protocol = {
64 .name = "unknown", 50 .name = "unknown",
65 /* .me isn't set: getting a ref to this cannot fail. */ 51 /* .me isn't set: getting a ref to this cannot fail. */
66 .manip_pkt = unknown_manip_pkt, 52 .manip_pkt = unknown_manip_pkt,
67 .in_range = unknown_in_range, 53 .in_range = unknown_in_range,
68 .unique_tuple = unknown_unique_tuple, 54 .unique_tuple = unknown_unique_tuple,
69 .print = unknown_print,
70 .print_range = unknown_print_range
71}; 55};
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
index 8acb7ed40b47..4f95d477805c 100644
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c
@@ -44,6 +44,7 @@
44 * 44 *
45 */ 45 */
46#include <linux/config.h> 46#include <linux/config.h>
47#include <linux/in.h>
47#include <linux/module.h> 48#include <linux/module.h>
48#include <linux/types.h> 49#include <linux/types.h>
49#include <linux/kernel.h> 50#include <linux/kernel.h>
@@ -53,6 +54,7 @@
53#include <linux/netfilter_ipv4/ip_conntrack_helper.h> 54#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
54#include <linux/netfilter_ipv4/ip_nat_helper.h> 55#include <linux/netfilter_ipv4/ip_nat_helper.h>
55#include <linux/ip.h> 56#include <linux/ip.h>
57#include <linux/udp.h>
56#include <net/checksum.h> 58#include <net/checksum.h>
57#include <net/udp.h> 59#include <net/udp.h>
58#include <asm/uaccess.h> 60#include <asm/uaccess.h>
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 30cd4e18c129..8b8a1f00bbf4 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -55,6 +55,44 @@
55 : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \ 55 : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \
56 : "*ERROR*"))) 56 : "*ERROR*")))
57 57
58#ifdef CONFIG_XFRM
59static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
60{
61 struct ip_conntrack *ct;
62 struct ip_conntrack_tuple *t;
63 enum ip_conntrack_info ctinfo;
64 enum ip_conntrack_dir dir;
65 unsigned long statusbit;
66
67 ct = ip_conntrack_get(skb, &ctinfo);
68 if (ct == NULL)
69 return;
70 dir = CTINFO2DIR(ctinfo);
71 t = &ct->tuplehash[dir].tuple;
72
73 if (dir == IP_CT_DIR_ORIGINAL)
74 statusbit = IPS_DST_NAT;
75 else
76 statusbit = IPS_SRC_NAT;
77
78 if (ct->status & statusbit) {
79 fl->fl4_dst = t->dst.ip;
80 if (t->dst.protonum == IPPROTO_TCP ||
81 t->dst.protonum == IPPROTO_UDP)
82 fl->fl_ip_dport = t->dst.u.tcp.port;
83 }
84
85 statusbit ^= IPS_NAT_MASK;
86
87 if (ct->status & statusbit) {
88 fl->fl4_src = t->src.ip;
89 if (t->dst.protonum == IPPROTO_TCP ||
90 t->dst.protonum == IPPROTO_UDP)
91 fl->fl_ip_sport = t->src.u.tcp.port;
92 }
93}
94#endif
95
58static unsigned int 96static unsigned int
59ip_nat_fn(unsigned int hooknum, 97ip_nat_fn(unsigned int hooknum,
60 struct sk_buff **pskb, 98 struct sk_buff **pskb,
@@ -162,18 +200,20 @@ ip_nat_in(unsigned int hooknum,
162 const struct net_device *out, 200 const struct net_device *out,
163 int (*okfn)(struct sk_buff *)) 201 int (*okfn)(struct sk_buff *))
164{ 202{
165 u_int32_t saddr, daddr; 203 struct ip_conntrack *ct;
204 enum ip_conntrack_info ctinfo;
166 unsigned int ret; 205 unsigned int ret;
167 206
168 saddr = (*pskb)->nh.iph->saddr;
169 daddr = (*pskb)->nh.iph->daddr;
170
171 ret = ip_nat_fn(hooknum, pskb, in, out, okfn); 207 ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
172 if (ret != NF_DROP && ret != NF_STOLEN 208 if (ret != NF_DROP && ret != NF_STOLEN
173 && ((*pskb)->nh.iph->saddr != saddr 209 && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
174 || (*pskb)->nh.iph->daddr != daddr)) { 210 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
175 dst_release((*pskb)->dst); 211
176 (*pskb)->dst = NULL; 212 if (ct->tuplehash[dir].tuple.src.ip !=
213 ct->tuplehash[!dir].tuple.dst.ip) {
214 dst_release((*pskb)->dst);
215 (*pskb)->dst = NULL;
216 }
177 } 217 }
178 return ret; 218 return ret;
179} 219}
@@ -185,29 +225,30 @@ ip_nat_out(unsigned int hooknum,
185 const struct net_device *out, 225 const struct net_device *out,
186 int (*okfn)(struct sk_buff *)) 226 int (*okfn)(struct sk_buff *))
187{ 227{
228 struct ip_conntrack *ct;
229 enum ip_conntrack_info ctinfo;
230 unsigned int ret;
231
188 /* root is playing with raw sockets. */ 232 /* root is playing with raw sockets. */
189 if ((*pskb)->len < sizeof(struct iphdr) 233 if ((*pskb)->len < sizeof(struct iphdr)
190 || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) 234 || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
191 return NF_ACCEPT; 235 return NF_ACCEPT;
192 236
193 /* We can hit fragment here; forwarded packets get 237 ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
194 defragmented by connection tracking coming in, then 238 if (ret != NF_DROP && ret != NF_STOLEN
195 fragmented (grr) by the forward code. 239 && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
196 240 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
197 In future: If we have nfct != NULL, AND we have NAT 241
198 initialized, AND there is no helper, then we can do full 242 if (ct->tuplehash[dir].tuple.src.ip !=
199 NAPT on the head, and IP-address-only NAT on the rest. 243 ct->tuplehash[!dir].tuple.dst.ip
200 244#ifdef CONFIG_XFRM
201 I'm starting to have nightmares about fragments. */ 245 || ct->tuplehash[dir].tuple.src.u.all !=
202 246 ct->tuplehash[!dir].tuple.dst.u.all
203 if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { 247#endif
204 *pskb = ip_ct_gather_frags(*pskb, IP_DEFRAG_NAT_OUT); 248 )
205 249 return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
206 if (!*pskb)
207 return NF_STOLEN;
208 } 250 }
209 251 return ret;
210 return ip_nat_fn(hooknum, pskb, in, out, okfn);
211} 252}
212 253
213static unsigned int 254static unsigned int
@@ -217,7 +258,8 @@ ip_nat_local_fn(unsigned int hooknum,
217 const struct net_device *out, 258 const struct net_device *out,
218 int (*okfn)(struct sk_buff *)) 259 int (*okfn)(struct sk_buff *))
219{ 260{
220 u_int32_t saddr, daddr; 261 struct ip_conntrack *ct;
262 enum ip_conntrack_info ctinfo;
221 unsigned int ret; 263 unsigned int ret;
222 264
223 /* root is playing with raw sockets. */ 265 /* root is playing with raw sockets. */
@@ -225,14 +267,20 @@ ip_nat_local_fn(unsigned int hooknum,
225 || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) 267 || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
226 return NF_ACCEPT; 268 return NF_ACCEPT;
227 269
228 saddr = (*pskb)->nh.iph->saddr;
229 daddr = (*pskb)->nh.iph->daddr;
230
231 ret = ip_nat_fn(hooknum, pskb, in, out, okfn); 270 ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
232 if (ret != NF_DROP && ret != NF_STOLEN 271 if (ret != NF_DROP && ret != NF_STOLEN
233 && ((*pskb)->nh.iph->saddr != saddr 272 && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
234 || (*pskb)->nh.iph->daddr != daddr)) 273 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
235 return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; 274
275 if (ct->tuplehash[dir].tuple.dst.ip !=
276 ct->tuplehash[!dir].tuple.src.ip
277#ifdef CONFIG_XFRM
278 || ct->tuplehash[dir].tuple.dst.u.all !=
279 ct->tuplehash[dir].tuple.src.u.all
280#endif
281 )
282 return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
283 }
236 return ret; 284 return ret;
237} 285}
238 286
@@ -320,10 +368,14 @@ static int init_or_cleanup(int init)
320 368
321 if (!init) goto cleanup; 369 if (!init) goto cleanup;
322 370
371#ifdef CONFIG_XFRM
372 BUG_ON(ip_nat_decode_session != NULL);
373 ip_nat_decode_session = nat_decode_session;
374#endif
323 ret = ip_nat_rule_init(); 375 ret = ip_nat_rule_init();
324 if (ret < 0) { 376 if (ret < 0) {
325 printk("ip_nat_init: can't setup rules.\n"); 377 printk("ip_nat_init: can't setup rules.\n");
326 goto cleanup_nothing; 378 goto cleanup_decode_session;
327 } 379 }
328 ret = nf_register_hook(&ip_nat_in_ops); 380 ret = nf_register_hook(&ip_nat_in_ops);
329 if (ret < 0) { 381 if (ret < 0) {
@@ -371,7 +423,11 @@ static int init_or_cleanup(int init)
371 nf_unregister_hook(&ip_nat_in_ops); 423 nf_unregister_hook(&ip_nat_in_ops);
372 cleanup_rule_init: 424 cleanup_rule_init:
373 ip_nat_rule_cleanup(); 425 ip_nat_rule_cleanup();
374 cleanup_nothing: 426 cleanup_decode_session:
427#ifdef CONFIG_XFRM
428 ip_nat_decode_session = NULL;
429 synchronize_net();
430#endif
375 return ret; 431 return ret;
376} 432}
377 433
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 45886c8475e8..877bc96d3336 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -14,6 +14,7 @@
14 */ 14 */
15#include <linux/config.h> 15#include <linux/config.h>
16#include <linux/cache.h> 16#include <linux/cache.h>
17#include <linux/capability.h>
17#include <linux/skbuff.h> 18#include <linux/skbuff.h>
18#include <linux/kmod.h> 19#include <linux/kmod.h>
19#include <linux/vmalloc.h> 20#include <linux/vmalloc.h>
@@ -83,11 +84,6 @@ static DECLARE_MUTEX(ipt_mutex);
83 context stops packets coming through and allows user context to read 84 context stops packets coming through and allows user context to read
84 the counters or update the rules. 85 the counters or update the rules.
85 86
86 To be cache friendly on SMP, we arrange them like so:
87 [ n-entries ]
88 ... cache-align padding ...
89 [ n-entries ]
90
91 Hence the start of any table is given by get_table() below. */ 87 Hence the start of any table is given by get_table() below. */
92 88
93/* The table itself */ 89/* The table itself */
@@ -105,20 +101,15 @@ struct ipt_table_info
105 unsigned int underflow[NF_IP_NUMHOOKS]; 101 unsigned int underflow[NF_IP_NUMHOOKS];
106 102
107 /* ipt_entry tables: one per CPU */ 103 /* ipt_entry tables: one per CPU */
108 char entries[0] ____cacheline_aligned; 104 void *entries[NR_CPUS];
109}; 105};
110 106
111static LIST_HEAD(ipt_target); 107static LIST_HEAD(ipt_target);
112static LIST_HEAD(ipt_match); 108static LIST_HEAD(ipt_match);
113static LIST_HEAD(ipt_tables); 109static LIST_HEAD(ipt_tables);
110#define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
114#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) 111#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
115 112
116#ifdef CONFIG_SMP
117#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
118#else
119#define TABLE_OFFSET(t,p) 0
120#endif
121
122#if 0 113#if 0
123#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0) 114#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
124#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; }) 115#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
@@ -290,8 +281,7 @@ ipt_do_table(struct sk_buff **pskb,
290 281
291 read_lock_bh(&table->lock); 282 read_lock_bh(&table->lock);
292 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 283 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
293 table_base = (void *)table->private->entries 284 table_base = (void *)table->private->entries[smp_processor_id()];
294 + TABLE_OFFSET(table->private, smp_processor_id());
295 e = get_entry(table_base, table->private->hook_entry[hook]); 285 e = get_entry(table_base, table->private->hook_entry[hook]);
296 286
297#ifdef CONFIG_NETFILTER_DEBUG 287#ifdef CONFIG_NETFILTER_DEBUG
@@ -563,7 +553,8 @@ unconditional(const struct ipt_ip *ip)
563/* Figures out from what hook each rule can be called: returns 0 if 553/* Figures out from what hook each rule can be called: returns 0 if
564 there are loops. Puts hook bitmask in comefrom. */ 554 there are loops. Puts hook bitmask in comefrom. */
565static int 555static int
566mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks) 556mark_source_chains(struct ipt_table_info *newinfo,
557 unsigned int valid_hooks, void *entry0)
567{ 558{
568 unsigned int hook; 559 unsigned int hook;
569 560
@@ -572,7 +563,7 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
572 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) { 563 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
573 unsigned int pos = newinfo->hook_entry[hook]; 564 unsigned int pos = newinfo->hook_entry[hook];
574 struct ipt_entry *e 565 struct ipt_entry *e
575 = (struct ipt_entry *)(newinfo->entries + pos); 566 = (struct ipt_entry *)(entry0 + pos);
576 567
577 if (!(valid_hooks & (1 << hook))) 568 if (!(valid_hooks & (1 << hook)))
578 continue; 569 continue;
@@ -622,13 +613,13 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
622 goto next; 613 goto next;
623 614
624 e = (struct ipt_entry *) 615 e = (struct ipt_entry *)
625 (newinfo->entries + pos); 616 (entry0 + pos);
626 } while (oldpos == pos + e->next_offset); 617 } while (oldpos == pos + e->next_offset);
627 618
628 /* Move along one */ 619 /* Move along one */
629 size = e->next_offset; 620 size = e->next_offset;
630 e = (struct ipt_entry *) 621 e = (struct ipt_entry *)
631 (newinfo->entries + pos + size); 622 (entry0 + pos + size);
632 e->counters.pcnt = pos; 623 e->counters.pcnt = pos;
633 pos += size; 624 pos += size;
634 } else { 625 } else {
@@ -645,7 +636,7 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
645 newpos = pos + e->next_offset; 636 newpos = pos + e->next_offset;
646 } 637 }
647 e = (struct ipt_entry *) 638 e = (struct ipt_entry *)
648 (newinfo->entries + newpos); 639 (entry0 + newpos);
649 e->counters.pcnt = pos; 640 e->counters.pcnt = pos;
650 pos = newpos; 641 pos = newpos;
651 } 642 }
@@ -855,6 +846,7 @@ static int
855translate_table(const char *name, 846translate_table(const char *name,
856 unsigned int valid_hooks, 847 unsigned int valid_hooks,
857 struct ipt_table_info *newinfo, 848 struct ipt_table_info *newinfo,
849 void *entry0,
858 unsigned int size, 850 unsigned int size,
859 unsigned int number, 851 unsigned int number,
860 const unsigned int *hook_entries, 852 const unsigned int *hook_entries,
@@ -875,11 +867,11 @@ translate_table(const char *name,
875 duprintf("translate_table: size %u\n", newinfo->size); 867 duprintf("translate_table: size %u\n", newinfo->size);
876 i = 0; 868 i = 0;
877 /* Walk through entries, checking offsets. */ 869 /* Walk through entries, checking offsets. */
878 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, 870 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
879 check_entry_size_and_hooks, 871 check_entry_size_and_hooks,
880 newinfo, 872 newinfo,
881 newinfo->entries, 873 entry0,
882 newinfo->entries + size, 874 entry0 + size,
883 hook_entries, underflows, &i); 875 hook_entries, underflows, &i);
884 if (ret != 0) 876 if (ret != 0)
885 return ret; 877 return ret;
@@ -907,27 +899,24 @@ translate_table(const char *name,
907 } 899 }
908 } 900 }
909 901
910 if (!mark_source_chains(newinfo, valid_hooks)) 902 if (!mark_source_chains(newinfo, valid_hooks, entry0))
911 return -ELOOP; 903 return -ELOOP;
912 904
913 /* Finally, each sanity check must pass */ 905 /* Finally, each sanity check must pass */
914 i = 0; 906 i = 0;
915 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, 907 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
916 check_entry, name, size, &i); 908 check_entry, name, size, &i);
917 909
918 if (ret != 0) { 910 if (ret != 0) {
919 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, 911 IPT_ENTRY_ITERATE(entry0, newinfo->size,
920 cleanup_entry, &i); 912 cleanup_entry, &i);
921 return ret; 913 return ret;
922 } 914 }
923 915
924 /* And one copy for every other CPU */ 916 /* And one copy for every other CPU */
925 for_each_cpu(i) { 917 for_each_cpu(i) {
926 if (i == 0) 918 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
927 continue; 919 memcpy(newinfo->entries[i], entry0, newinfo->size);
928 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
929 newinfo->entries,
930 SMP_ALIGN(newinfo->size));
931 } 920 }
932 921
933 return ret; 922 return ret;
@@ -943,15 +932,12 @@ replace_table(struct ipt_table *table,
943 932
944#ifdef CONFIG_NETFILTER_DEBUG 933#ifdef CONFIG_NETFILTER_DEBUG
945 { 934 {
946 struct ipt_entry *table_base; 935 int cpu;
947 unsigned int i;
948 936
949 for_each_cpu(i) { 937 for_each_cpu(cpu) {
950 table_base = 938 struct ipt_entry *table_base = newinfo->entries[cpu];
951 (void *)newinfo->entries 939 if (table_base)
952 + TABLE_OFFSET(newinfo, i); 940 table_base->comefrom = 0xdead57ac;
953
954 table_base->comefrom = 0xdead57ac;
955 } 941 }
956 } 942 }
957#endif 943#endif
@@ -986,16 +972,44 @@ add_entry_to_counter(const struct ipt_entry *e,
986 return 0; 972 return 0;
987} 973}
988 974
975static inline int
976set_entry_to_counter(const struct ipt_entry *e,
977 struct ipt_counters total[],
978 unsigned int *i)
979{
980 SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
981
982 (*i)++;
983 return 0;
984}
985
989static void 986static void
990get_counters(const struct ipt_table_info *t, 987get_counters(const struct ipt_table_info *t,
991 struct ipt_counters counters[]) 988 struct ipt_counters counters[])
992{ 989{
993 unsigned int cpu; 990 unsigned int cpu;
994 unsigned int i; 991 unsigned int i;
992 unsigned int curcpu;
993
994 /* Instead of clearing (by a previous call to memset())
995 * the counters and using adds, we set the counters
996 * with data used by 'current' CPU
997 * We dont care about preemption here.
998 */
999 curcpu = raw_smp_processor_id();
1000
1001 i = 0;
1002 IPT_ENTRY_ITERATE(t->entries[curcpu],
1003 t->size,
1004 set_entry_to_counter,
1005 counters,
1006 &i);
995 1007
996 for_each_cpu(cpu) { 1008 for_each_cpu(cpu) {
1009 if (cpu == curcpu)
1010 continue;
997 i = 0; 1011 i = 0;
998 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), 1012 IPT_ENTRY_ITERATE(t->entries[cpu],
999 t->size, 1013 t->size,
1000 add_entry_to_counter, 1014 add_entry_to_counter,
1001 counters, 1015 counters,
@@ -1012,24 +1026,29 @@ copy_entries_to_user(unsigned int total_size,
1012 struct ipt_entry *e; 1026 struct ipt_entry *e;
1013 struct ipt_counters *counters; 1027 struct ipt_counters *counters;
1014 int ret = 0; 1028 int ret = 0;
1029 void *loc_cpu_entry;
1015 1030
1016 /* We need atomic snapshot of counters: rest doesn't change 1031 /* We need atomic snapshot of counters: rest doesn't change
1017 (other than comefrom, which userspace doesn't care 1032 (other than comefrom, which userspace doesn't care
1018 about). */ 1033 about). */
1019 countersize = sizeof(struct ipt_counters) * table->private->number; 1034 countersize = sizeof(struct ipt_counters) * table->private->number;
1020 counters = vmalloc(countersize); 1035 counters = vmalloc_node(countersize, numa_node_id());
1021 1036
1022 if (counters == NULL) 1037 if (counters == NULL)
1023 return -ENOMEM; 1038 return -ENOMEM;
1024 1039
1025 /* First, sum counters... */ 1040 /* First, sum counters... */
1026 memset(counters, 0, countersize);
1027 write_lock_bh(&table->lock); 1041 write_lock_bh(&table->lock);
1028 get_counters(table->private, counters); 1042 get_counters(table->private, counters);
1029 write_unlock_bh(&table->lock); 1043 write_unlock_bh(&table->lock);
1030 1044
1031 /* ... then copy entire thing from CPU 0... */ 1045 /* choose the copy that is on our node/cpu, ...
1032 if (copy_to_user(userptr, table->private->entries, total_size) != 0) { 1046 * This choice is lazy (because current thread is
1047 * allowed to migrate to another cpu)
1048 */
1049 loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
1050 /* ... then copy entire thing ... */
1051 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
1033 ret = -EFAULT; 1052 ret = -EFAULT;
1034 goto free_counters; 1053 goto free_counters;
1035 } 1054 }
@@ -1041,7 +1060,7 @@ copy_entries_to_user(unsigned int total_size,
1041 struct ipt_entry_match *m; 1060 struct ipt_entry_match *m;
1042 struct ipt_entry_target *t; 1061 struct ipt_entry_target *t;
1043 1062
1044 e = (struct ipt_entry *)(table->private->entries + off); 1063 e = (struct ipt_entry *)(loc_cpu_entry + off);
1045 if (copy_to_user(userptr + off 1064 if (copy_to_user(userptr + off
1046 + offsetof(struct ipt_entry, counters), 1065 + offsetof(struct ipt_entry, counters),
1047 &counters[num], 1066 &counters[num],
@@ -1110,6 +1129,45 @@ get_entries(const struct ipt_get_entries *entries,
1110 return ret; 1129 return ret;
1111} 1130}
1112 1131
1132static void free_table_info(struct ipt_table_info *info)
1133{
1134 int cpu;
1135 for_each_cpu(cpu) {
1136 if (info->size <= PAGE_SIZE)
1137 kfree(info->entries[cpu]);
1138 else
1139 vfree(info->entries[cpu]);
1140 }
1141 kfree(info);
1142}
1143
1144static struct ipt_table_info *alloc_table_info(unsigned int size)
1145{
1146 struct ipt_table_info *newinfo;
1147 int cpu;
1148
1149 newinfo = kzalloc(sizeof(struct ipt_table_info), GFP_KERNEL);
1150 if (!newinfo)
1151 return NULL;
1152
1153 newinfo->size = size;
1154
1155 for_each_cpu(cpu) {
1156 if (size <= PAGE_SIZE)
1157 newinfo->entries[cpu] = kmalloc_node(size,
1158 GFP_KERNEL,
1159 cpu_to_node(cpu));
1160 else
1161 newinfo->entries[cpu] = vmalloc_node(size, cpu_to_node(cpu));
1162 if (newinfo->entries[cpu] == 0) {
1163 free_table_info(newinfo);
1164 return NULL;
1165 }
1166 }
1167
1168 return newinfo;
1169}
1170
1113static int 1171static int
1114do_replace(void __user *user, unsigned int len) 1172do_replace(void __user *user, unsigned int len)
1115{ 1173{
@@ -1118,6 +1176,7 @@ do_replace(void __user *user, unsigned int len)
1118 struct ipt_table *t; 1176 struct ipt_table *t;
1119 struct ipt_table_info *newinfo, *oldinfo; 1177 struct ipt_table_info *newinfo, *oldinfo;
1120 struct ipt_counters *counters; 1178 struct ipt_counters *counters;
1179 void *loc_cpu_entry, *loc_cpu_old_entry;
1121 1180
1122 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1181 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1123 return -EFAULT; 1182 return -EFAULT;
@@ -1130,13 +1189,13 @@ do_replace(void __user *user, unsigned int len)
1130 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages) 1189 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1131 return -ENOMEM; 1190 return -ENOMEM;
1132 1191
1133 newinfo = vmalloc(sizeof(struct ipt_table_info) 1192 newinfo = alloc_table_info(tmp.size);
1134 + SMP_ALIGN(tmp.size) *
1135 (highest_possible_processor_id()+1));
1136 if (!newinfo) 1193 if (!newinfo)
1137 return -ENOMEM; 1194 return -ENOMEM;
1138 1195
1139 if (copy_from_user(newinfo->entries, user + sizeof(tmp), 1196 /* choose the copy that is our node/cpu */
1197 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1198 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1140 tmp.size) != 0) { 1199 tmp.size) != 0) {
1141 ret = -EFAULT; 1200 ret = -EFAULT;
1142 goto free_newinfo; 1201 goto free_newinfo;
@@ -1147,10 +1206,9 @@ do_replace(void __user *user, unsigned int len)
1147 ret = -ENOMEM; 1206 ret = -ENOMEM;
1148 goto free_newinfo; 1207 goto free_newinfo;
1149 } 1208 }
1150 memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1151 1209
1152 ret = translate_table(tmp.name, tmp.valid_hooks, 1210 ret = translate_table(tmp.name, tmp.valid_hooks,
1153 newinfo, tmp.size, tmp.num_entries, 1211 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1154 tmp.hook_entry, tmp.underflow); 1212 tmp.hook_entry, tmp.underflow);
1155 if (ret != 0) 1213 if (ret != 0)
1156 goto free_newinfo_counters; 1214 goto free_newinfo_counters;
@@ -1189,8 +1247,9 @@ do_replace(void __user *user, unsigned int len)
1189 /* Get the old counters. */ 1247 /* Get the old counters. */
1190 get_counters(oldinfo, counters); 1248 get_counters(oldinfo, counters);
1191 /* Decrease module usage counts and free resource */ 1249 /* Decrease module usage counts and free resource */
1192 IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL); 1250 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1193 vfree(oldinfo); 1251 IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
1252 free_table_info(oldinfo);
1194 if (copy_to_user(tmp.counters, counters, 1253 if (copy_to_user(tmp.counters, counters,
1195 sizeof(struct ipt_counters) * tmp.num_counters) != 0) 1254 sizeof(struct ipt_counters) * tmp.num_counters) != 0)
1196 ret = -EFAULT; 1255 ret = -EFAULT;
@@ -1202,11 +1261,11 @@ do_replace(void __user *user, unsigned int len)
1202 module_put(t->me); 1261 module_put(t->me);
1203 up(&ipt_mutex); 1262 up(&ipt_mutex);
1204 free_newinfo_counters_untrans: 1263 free_newinfo_counters_untrans:
1205 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL); 1264 IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
1206 free_newinfo_counters: 1265 free_newinfo_counters:
1207 vfree(counters); 1266 vfree(counters);
1208 free_newinfo: 1267 free_newinfo:
1209 vfree(newinfo); 1268 free_table_info(newinfo);
1210 return ret; 1269 return ret;
1211} 1270}
1212 1271
@@ -1239,6 +1298,7 @@ do_add_counters(void __user *user, unsigned int len)
1239 struct ipt_counters_info tmp, *paddc; 1298 struct ipt_counters_info tmp, *paddc;
1240 struct ipt_table *t; 1299 struct ipt_table *t;
1241 int ret = 0; 1300 int ret = 0;
1301 void *loc_cpu_entry;
1242 1302
1243 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1303 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1244 return -EFAULT; 1304 return -EFAULT;
@@ -1246,7 +1306,7 @@ do_add_counters(void __user *user, unsigned int len)
1246 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters)) 1306 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1247 return -EINVAL; 1307 return -EINVAL;
1248 1308
1249 paddc = vmalloc(len); 1309 paddc = vmalloc_node(len, numa_node_id());
1250 if (!paddc) 1310 if (!paddc)
1251 return -ENOMEM; 1311 return -ENOMEM;
1252 1312
@@ -1268,7 +1328,9 @@ do_add_counters(void __user *user, unsigned int len)
1268 } 1328 }
1269 1329
1270 i = 0; 1330 i = 0;
1271 IPT_ENTRY_ITERATE(t->private->entries, 1331 /* Choose the copy that is on our node */
1332 loc_cpu_entry = t->private->entries[raw_smp_processor_id()];
1333 IPT_ENTRY_ITERATE(loc_cpu_entry,
1272 t->private->size, 1334 t->private->size,
1273 add_counter_to_entry, 1335 add_counter_to_entry,
1274 paddc->counters, 1336 paddc->counters,
@@ -1460,28 +1522,31 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
1460 struct ipt_table_info *newinfo; 1522 struct ipt_table_info *newinfo;
1461 static struct ipt_table_info bootstrap 1523 static struct ipt_table_info bootstrap
1462 = { 0, 0, 0, { 0 }, { 0 }, { } }; 1524 = { 0, 0, 0, { 0 }, { 0 }, { } };
1525 void *loc_cpu_entry;
1463 1526
1464 newinfo = vmalloc(sizeof(struct ipt_table_info) 1527 newinfo = alloc_table_info(repl->size);
1465 + SMP_ALIGN(repl->size) *
1466 (highest_possible_processor_id()+1));
1467 if (!newinfo) 1528 if (!newinfo)
1468 return -ENOMEM; 1529 return -ENOMEM;
1469 1530
1470 memcpy(newinfo->entries, repl->entries, repl->size); 1531 /* choose the copy on our node/cpu
1532 * but dont care of preemption
1533 */
1534 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1535 memcpy(loc_cpu_entry, repl->entries, repl->size);
1471 1536
1472 ret = translate_table(table->name, table->valid_hooks, 1537 ret = translate_table(table->name, table->valid_hooks,
1473 newinfo, repl->size, 1538 newinfo, loc_cpu_entry, repl->size,
1474 repl->num_entries, 1539 repl->num_entries,
1475 repl->hook_entry, 1540 repl->hook_entry,
1476 repl->underflow); 1541 repl->underflow);
1477 if (ret != 0) { 1542 if (ret != 0) {
1478 vfree(newinfo); 1543 free_table_info(newinfo);
1479 return ret; 1544 return ret;
1480 } 1545 }
1481 1546
1482 ret = down_interruptible(&ipt_mutex); 1547 ret = down_interruptible(&ipt_mutex);
1483 if (ret != 0) { 1548 if (ret != 0) {
1484 vfree(newinfo); 1549 free_table_info(newinfo);
1485 return ret; 1550 return ret;
1486 } 1551 }
1487 1552
@@ -1510,20 +1575,23 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
1510 return ret; 1575 return ret;
1511 1576
1512 free_unlock: 1577 free_unlock:
1513 vfree(newinfo); 1578 free_table_info(newinfo);
1514 goto unlock; 1579 goto unlock;
1515} 1580}
1516 1581
1517void ipt_unregister_table(struct ipt_table *table) 1582void ipt_unregister_table(struct ipt_table *table)
1518{ 1583{
1584 void *loc_cpu_entry;
1585
1519 down(&ipt_mutex); 1586 down(&ipt_mutex);
1520 LIST_DELETE(&ipt_tables, table); 1587 LIST_DELETE(&ipt_tables, table);
1521 up(&ipt_mutex); 1588 up(&ipt_mutex);
1522 1589
1523 /* Decrease module usage counts and free resources */ 1590 /* Decrease module usage counts and free resources */
1524 IPT_ENTRY_ITERATE(table->private->entries, table->private->size, 1591 loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
1592 IPT_ENTRY_ITERATE(loc_cpu_entry, table->private->size,
1525 cleanup_entry, NULL); 1593 cleanup_entry, NULL);
1526 vfree(table->private); 1594 free_table_info(table->private);
1527} 1595}
1528 1596
1529/* Returns 1 if the port is matched by the range, 0 otherwise */ 1597/* Returns 1 if the port is matched by the range, 0 otherwise */
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 275a174c6fe6..27860510ca6d 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -11,6 +11,7 @@
11 11
12#include <linux/config.h> 12#include <linux/config.h>
13#include <linux/types.h> 13#include <linux/types.h>
14#include <linux/inetdevice.h>
14#include <linux/ip.h> 15#include <linux/ip.h>
15#include <linux/timer.h> 16#include <linux/timer.h>
16#include <linux/module.h> 17#include <linux/module.h>
@@ -18,6 +19,7 @@
18#include <net/protocol.h> 19#include <net/protocol.h>
19#include <net/ip.h> 20#include <net/ip.h>
20#include <net/checksum.h> 21#include <net/checksum.h>
22#include <net/route.h>
21#include <linux/netfilter_ipv4.h> 23#include <linux/netfilter_ipv4.h>
22#include <linux/netfilter_ipv4/ip_nat_rule.h> 24#include <linux/netfilter_ipv4/ip_nat_rule.h>
23#include <linux/netfilter_ipv4/ip_tables.h> 25#include <linux/netfilter_ipv4/ip_tables.h>
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index f057025a719e..6693526ae128 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -203,7 +203,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
203 sizeof(struct tcphdr), 0)); 203 sizeof(struct tcphdr), 0));
204 204
205 /* Adjust IP TTL, DF */ 205 /* Adjust IP TTL, DF */
206 nskb->nh.iph->ttl = MAXTTL; 206 nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
207 /* Set DF, id = 0 */ 207 /* Set DF, id = 0 */
208 nskb->nh.iph->frag_off = htons(IP_DF); 208 nskb->nh.iph->frag_off = htons(IP_DF);
209 nskb->nh.iph->id = 0; 209 nskb->nh.iph->id = 0;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 2883ccd8a91d..38641cd06123 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -77,15 +77,15 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG);
77#define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0) 77#define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0)
78 78
79static unsigned int nlbufsiz = 4096; 79static unsigned int nlbufsiz = 4096;
80module_param(nlbufsiz, uint, 0600); /* FIXME: Check size < 128k --RR */ 80module_param(nlbufsiz, uint, 0400);
81MODULE_PARM_DESC(nlbufsiz, "netlink buffer size"); 81MODULE_PARM_DESC(nlbufsiz, "netlink buffer size");
82 82
83static unsigned int flushtimeout = 10; 83static unsigned int flushtimeout = 10;
84module_param(flushtimeout, int, 0600); 84module_param(flushtimeout, uint, 0600);
85MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)"); 85MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)");
86 86
87static unsigned int nflog = 1; 87static int nflog = 1;
88module_param(nflog, int, 0400); 88module_param(nflog, bool, 0400);
89MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); 89MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
90 90
91/* global data structures */ 91/* global data structures */
@@ -376,7 +376,7 @@ static int __init init(void)
376 376
377 DEBUGP("ipt_ULOG: init module\n"); 377 DEBUGP("ipt_ULOG: init module\n");
378 378
379 if (nlbufsiz >= 128*1024) { 379 if (nlbufsiz > 128*1024) {
380 printk("Netlink buffer has to be <= 128kB\n"); 380 printk("Netlink buffer has to be <= 128kB\n");
381 return -EINVAL; 381 return -EINVAL;
382 } 382 }
diff --git a/net/ipv4/netfilter/ipt_helper.c b/net/ipv4/netfilter/ipt_helper.c
index bf14e1c7798a..aef649e393af 100644
--- a/net/ipv4/netfilter/ipt_helper.c
+++ b/net/ipv4/netfilter/ipt_helper.c
@@ -13,6 +13,7 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <linux/netfilter.h> 15#include <linux/netfilter.h>
16#include <linux/interrupt.h>
16#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE) 17#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
17#include <linux/netfilter_ipv4/ip_conntrack.h> 18#include <linux/netfilter_ipv4/ip_conntrack.h>
18#include <linux/netfilter_ipv4/ip_conntrack_core.h> 19#include <linux/netfilter_ipv4/ip_conntrack_core.h>
diff --git a/net/ipv4/netfilter/ipt_mac.c b/net/ipv4/netfilter/ipt_mac.c
index 11a459e33f25..1b9bb4559f80 100644
--- a/net/ipv4/netfilter/ipt_mac.c
+++ b/net/ipv4/netfilter/ipt_mac.c
@@ -11,6 +11,7 @@
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/skbuff.h> 12#include <linux/skbuff.h>
13#include <linux/if_ether.h> 13#include <linux/if_ether.h>
14#include <linux/etherdevice.h>
14 15
15#include <linux/netfilter_ipv4/ipt_mac.h> 16#include <linux/netfilter_ipv4/ipt_mac.h>
16#include <linux/netfilter_ipv4/ip_tables.h> 17#include <linux/netfilter_ipv4/ip_tables.h>
@@ -33,8 +34,8 @@ match(const struct sk_buff *skb,
33 return (skb->mac.raw >= skb->head 34 return (skb->mac.raw >= skb->head
34 && (skb->mac.raw + ETH_HLEN) <= skb->data 35 && (skb->mac.raw + ETH_HLEN) <= skb->data
35 /* If so, compare... */ 36 /* If so, compare... */
36 && ((memcmp(eth_hdr(skb)->h_source, info->srcaddr, ETH_ALEN) 37 && ((!compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr))
37 == 0) ^ info->invert)); 38 ^ info->invert));
38} 39}
39 40
40static int 41static int
diff --git a/net/ipv4/netfilter/ipt_physdev.c b/net/ipv4/netfilter/ipt_physdev.c
index 1a53924041fc..03f554857a4d 100644
--- a/net/ipv4/netfilter/ipt_physdev.c
+++ b/net/ipv4/netfilter/ipt_physdev.c
@@ -9,6 +9,7 @@
9 */ 9 */
10 10
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/netdevice.h>
12#include <linux/skbuff.h> 13#include <linux/skbuff.h>
13#include <linux/netfilter_ipv4/ipt_physdev.h> 14#include <linux/netfilter_ipv4/ipt_physdev.h>
14#include <linux/netfilter_ipv4/ip_tables.h> 15#include <linux/netfilter_ipv4/ip_tables.h>
diff --git a/net/ipv4/netfilter/ipt_policy.c b/net/ipv4/netfilter/ipt_policy.c
new file mode 100644
index 000000000000..709debcc69c9
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_policy.c
@@ -0,0 +1,170 @@
1/* IP tables module for matching IPsec policy
2 *
3 * Copyright (c) 2004,2005 Patrick McHardy, <kaber@trash.net>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/kernel.h>
11#include <linux/config.h>
12#include <linux/module.h>
13#include <linux/skbuff.h>
14#include <linux/init.h>
15#include <net/xfrm.h>
16
17#include <linux/netfilter_ipv4.h>
18#include <linux/netfilter_ipv4/ip_tables.h>
19#include <linux/netfilter_ipv4/ipt_policy.h>
20
21MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
22MODULE_DESCRIPTION("IPtables IPsec policy matching module");
23MODULE_LICENSE("GPL");
24
25
26static inline int
27match_xfrm_state(struct xfrm_state *x, const struct ipt_policy_elem *e)
28{
29#define MATCH(x,y) (!e->match.x || ((e->x == (y)) ^ e->invert.x))
30
31 return MATCH(saddr, x->props.saddr.a4 & e->smask) &&
32 MATCH(daddr, x->id.daddr.a4 & e->dmask) &&
33 MATCH(proto, x->id.proto) &&
34 MATCH(mode, x->props.mode) &&
35 MATCH(spi, x->id.spi) &&
36 MATCH(reqid, x->props.reqid);
37}
38
39static int
40match_policy_in(const struct sk_buff *skb, const struct ipt_policy_info *info)
41{
42 const struct ipt_policy_elem *e;
43 struct sec_path *sp = skb->sp;
44 int strict = info->flags & IPT_POLICY_MATCH_STRICT;
45 int i, pos;
46
47 if (sp == NULL)
48 return -1;
49 if (strict && info->len != sp->len)
50 return 0;
51
52 for (i = sp->len - 1; i >= 0; i--) {
53 pos = strict ? i - sp->len + 1 : 0;
54 if (pos >= info->len)
55 return 0;
56 e = &info->pol[pos];
57
58 if (match_xfrm_state(sp->x[i].xvec, e)) {
59 if (!strict)
60 return 1;
61 } else if (strict)
62 return 0;
63 }
64
65 return strict ? 1 : 0;
66}
67
68static int
69match_policy_out(const struct sk_buff *skb, const struct ipt_policy_info *info)
70{
71 const struct ipt_policy_elem *e;
72 struct dst_entry *dst = skb->dst;
73 int strict = info->flags & IPT_POLICY_MATCH_STRICT;
74 int i, pos;
75
76 if (dst->xfrm == NULL)
77 return -1;
78
79 for (i = 0; dst && dst->xfrm; dst = dst->child, i++) {
80 pos = strict ? i : 0;
81 if (pos >= info->len)
82 return 0;
83 e = &info->pol[pos];
84
85 if (match_xfrm_state(dst->xfrm, e)) {
86 if (!strict)
87 return 1;
88 } else if (strict)
89 return 0;
90 }
91
92 return strict ? 1 : 0;
93}
94
95static int match(const struct sk_buff *skb,
96 const struct net_device *in,
97 const struct net_device *out,
98 const void *matchinfo, int offset, int *hotdrop)
99{
100 const struct ipt_policy_info *info = matchinfo;
101 int ret;
102
103 if (info->flags & IPT_POLICY_MATCH_IN)
104 ret = match_policy_in(skb, info);
105 else
106 ret = match_policy_out(skb, info);
107
108 if (ret < 0)
109 ret = info->flags & IPT_POLICY_MATCH_NONE ? 1 : 0;
110 else if (info->flags & IPT_POLICY_MATCH_NONE)
111 ret = 0;
112
113 return ret;
114}
115
116static int checkentry(const char *tablename, const struct ipt_ip *ip,
117 void *matchinfo, unsigned int matchsize,
118 unsigned int hook_mask)
119{
120 struct ipt_policy_info *info = matchinfo;
121
122 if (matchsize != IPT_ALIGN(sizeof(*info))) {
123 printk(KERN_ERR "ipt_policy: matchsize %u != %zu\n",
124 matchsize, IPT_ALIGN(sizeof(*info)));
125 return 0;
126 }
127 if (!(info->flags & (IPT_POLICY_MATCH_IN|IPT_POLICY_MATCH_OUT))) {
128 printk(KERN_ERR "ipt_policy: neither incoming nor "
129 "outgoing policy selected\n");
130 return 0;
131 }
132 if (hook_mask & (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_LOCAL_IN)
133 && info->flags & IPT_POLICY_MATCH_OUT) {
134 printk(KERN_ERR "ipt_policy: output policy not valid in "
135 "PRE_ROUTING and INPUT\n");
136 return 0;
137 }
138 if (hook_mask & (1 << NF_IP_POST_ROUTING | 1 << NF_IP_LOCAL_OUT)
139 && info->flags & IPT_POLICY_MATCH_IN) {
140 printk(KERN_ERR "ipt_policy: input policy not valid in "
141 "POST_ROUTING and OUTPUT\n");
142 return 0;
143 }
144 if (info->len > IPT_POLICY_MAX_ELEM) {
145 printk(KERN_ERR "ipt_policy: too many policy elements\n");
146 return 0;
147 }
148
149 return 1;
150}
151
152static struct ipt_match policy_match = {
153 .name = "policy",
154 .match = match,
155 .checkentry = checkentry,
156 .me = THIS_MODULE,
157};
158
159static int __init init(void)
160{
161 return ipt_register_match(&policy_match);
162}
163
164static void __exit fini(void)
165{
166 ipt_unregister_match(&policy_match);
167}
168
169module_init(init);
170module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 261cbb4d4c49..5ddccb18c65e 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -24,10 +24,10 @@
24#define HASH_LOG 9 24#define HASH_LOG 9
25 25
26/* Defaults, these can be overridden on the module command-line. */ 26/* Defaults, these can be overridden on the module command-line. */
27static int ip_list_tot = 100; 27static unsigned int ip_list_tot = 100;
28static int ip_pkt_list_tot = 20; 28static unsigned int ip_pkt_list_tot = 20;
29static int ip_list_hash_size = 0; 29static unsigned int ip_list_hash_size = 0;
30static int ip_list_perms = 0644; 30static unsigned int ip_list_perms = 0644;
31#ifdef DEBUG 31#ifdef DEBUG
32static int debug = 1; 32static int debug = 1;
33#endif 33#endif
@@ -38,13 +38,13 @@ KERN_INFO RECENT_NAME " " RECENT_VER ": Stephen Frost <sfrost@snowman.net>. htt
38MODULE_AUTHOR("Stephen Frost <sfrost@snowman.net>"); 38MODULE_AUTHOR("Stephen Frost <sfrost@snowman.net>");
39MODULE_DESCRIPTION("IP tables recently seen matching module " RECENT_VER); 39MODULE_DESCRIPTION("IP tables recently seen matching module " RECENT_VER);
40MODULE_LICENSE("GPL"); 40MODULE_LICENSE("GPL");
41module_param(ip_list_tot, int, 0400); 41module_param(ip_list_tot, uint, 0400);
42module_param(ip_pkt_list_tot, int, 0400); 42module_param(ip_pkt_list_tot, uint, 0400);
43module_param(ip_list_hash_size, int, 0400); 43module_param(ip_list_hash_size, uint, 0400);
44module_param(ip_list_perms, int, 0400); 44module_param(ip_list_perms, uint, 0400);
45#ifdef DEBUG 45#ifdef DEBUG
46module_param(debug, int, 0600); 46module_param(debug, bool, 0600);
47MODULE_PARM_DESC(debug,"debugging level, defaults to 1"); 47MODULE_PARM_DESC(debug,"enable debugging output");
48#endif 48#endif
49MODULE_PARM_DESC(ip_list_tot,"number of IPs to remember per list"); 49MODULE_PARM_DESC(ip_list_tot,"number of IPs to remember per list");
50MODULE_PARM_DESC(ip_pkt_list_tot,"number of packets per IP to remember"); 50MODULE_PARM_DESC(ip_pkt_list_tot,"number of packets per IP to remember");
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 8202c1c0afad..0c56c52a3831 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -22,6 +22,7 @@
22#include <linux/skbuff.h> 22#include <linux/skbuff.h>
23#include <linux/icmp.h> 23#include <linux/icmp.h>
24#include <linux/sysctl.h> 24#include <linux/sysctl.h>
25#include <net/route.h>
25#include <net/ip.h> 26#include <net/ip.h>
26 27
27#include <linux/netfilter_ipv4.h> 28#include <linux/netfilter_ipv4.h>
@@ -180,30 +181,6 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
180 return NF_ACCEPT; 181 return NF_ACCEPT;
181} 182}
182 183
183static unsigned int ipv4_refrag(unsigned int hooknum,
184 struct sk_buff **pskb,
185 const struct net_device *in,
186 const struct net_device *out,
187 int (*okfn)(struct sk_buff *))
188{
189 struct rtable *rt = (struct rtable *)(*pskb)->dst;
190
191 /* We've seen it coming out the other side: confirm */
192 if (ipv4_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
193 return NF_DROP;
194
195 /* Local packets are never produced too large for their
196 interface. We degfragment them at LOCAL_OUT, however,
197 so we have to refragment them here. */
198 if ((*pskb)->len > dst_mtu(&rt->u.dst) &&
199 !skb_shinfo(*pskb)->tso_size) {
200 /* No hook can be after us, so this should be OK. */
201 ip_fragment(*pskb, okfn);
202 return NF_STOLEN;
203 }
204 return NF_ACCEPT;
205}
206
207static unsigned int ipv4_conntrack_in(unsigned int hooknum, 184static unsigned int ipv4_conntrack_in(unsigned int hooknum,
208 struct sk_buff **pskb, 185 struct sk_buff **pskb,
209 const struct net_device *in, 186 const struct net_device *in,
@@ -283,7 +260,7 @@ static struct nf_hook_ops ipv4_conntrack_helper_in_ops = {
283 260
284/* Refragmenter; last chance. */ 261/* Refragmenter; last chance. */
285static struct nf_hook_ops ipv4_conntrack_out_ops = { 262static struct nf_hook_ops ipv4_conntrack_out_ops = {
286 .hook = ipv4_refrag, 263 .hook = ipv4_confirm,
287 .owner = THIS_MODULE, 264 .owner = THIS_MODULE,
288 .pf = PF_INET, 265 .pf = PF_INET,
289 .hooknum = NF_IP_POST_ROUTING, 266 .hooknum = NF_IP_POST_ROUTING,
@@ -300,7 +277,7 @@ static struct nf_hook_ops ipv4_conntrack_local_in_ops = {
300 277
301#ifdef CONFIG_SYSCTL 278#ifdef CONFIG_SYSCTL
302/* From nf_conntrack_proto_icmp.c */ 279/* From nf_conntrack_proto_icmp.c */
303extern unsigned long nf_ct_icmp_timeout; 280extern unsigned int nf_ct_icmp_timeout;
304static struct ctl_table_header *nf_ct_ipv4_sysctl_header; 281static struct ctl_table_header *nf_ct_ipv4_sysctl_header;
305 282
306static ctl_table nf_ct_sysctl_table[] = { 283static ctl_table nf_ct_sysctl_table[] = {
@@ -392,6 +369,48 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
392 return -ENOENT; 369 return -ENOENT;
393} 370}
394 371
372#if defined(CONFIG_NF_CT_NETLINK) || \
373 defined(CONFIG_NF_CT_NETLINK_MODULE)
374
375#include <linux/netfilter/nfnetlink.h>
376#include <linux/netfilter/nfnetlink_conntrack.h>
377
378static int ipv4_tuple_to_nfattr(struct sk_buff *skb,
379 const struct nf_conntrack_tuple *tuple)
380{
381 NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t),
382 &tuple->src.u3.ip);
383 NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t),
384 &tuple->dst.u3.ip);
385 return 0;
386
387nfattr_failure:
388 return -1;
389}
390
391static const size_t cta_min_ip[CTA_IP_MAX] = {
392 [CTA_IP_V4_SRC-1] = sizeof(u_int32_t),
393 [CTA_IP_V4_DST-1] = sizeof(u_int32_t),
394};
395
396static int ipv4_nfattr_to_tuple(struct nfattr *tb[],
397 struct nf_conntrack_tuple *t)
398{
399 if (!tb[CTA_IP_V4_SRC-1] || !tb[CTA_IP_V4_DST-1])
400 return -EINVAL;
401
402 if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
403 return -EINVAL;
404
405 t->src.u3.ip =
406 *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
407 t->dst.u3.ip =
408 *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
409
410 return 0;
411}
412#endif
413
395static struct nf_sockopt_ops so_getorigdst = { 414static struct nf_sockopt_ops so_getorigdst = {
396 .pf = PF_INET, 415 .pf = PF_INET,
397 .get_optmin = SO_ORIGINAL_DST, 416 .get_optmin = SO_ORIGINAL_DST,
@@ -408,6 +427,11 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
408 .print_conntrack = ipv4_print_conntrack, 427 .print_conntrack = ipv4_print_conntrack,
409 .prepare = ipv4_prepare, 428 .prepare = ipv4_prepare,
410 .get_features = ipv4_get_features, 429 .get_features = ipv4_get_features,
430#if defined(CONFIG_NF_CT_NETLINK) || \
431 defined(CONFIG_NF_CT_NETLINK_MODULE)
432 .tuple_to_nfattr = ipv4_tuple_to_nfattr,
433 .nfattr_to_tuple = ipv4_nfattr_to_tuple,
434#endif
411 .me = THIS_MODULE, 435 .me = THIS_MODULE,
412}; 436};
413 437
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 7ddb5c08f7b8..52dc175be39a 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -50,20 +50,21 @@ static int icmp_pkt_to_tuple(const struct sk_buff *skb,
50 return 1; 50 return 1;
51} 51}
52 52
53/* Add 1; spaces filled with 0. */
54static const u_int8_t invmap[] = {
55 [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
56 [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
57 [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
58 [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
59 [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
60 [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
61 [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
62 [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
63};
64
53static int icmp_invert_tuple(struct nf_conntrack_tuple *tuple, 65static int icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
54 const struct nf_conntrack_tuple *orig) 66 const struct nf_conntrack_tuple *orig)
55{ 67{
56 /* Add 1; spaces filled with 0. */
57 static u_int8_t invmap[]
58 = { [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
59 [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
60 [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
61 [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
62 [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
63 [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
64 [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
65 [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1};
66
67 if (orig->dst.u.icmp.type >= sizeof(invmap) 68 if (orig->dst.u.icmp.type >= sizeof(invmap)
68 || !invmap[orig->dst.u.icmp.type]) 69 || !invmap[orig->dst.u.icmp.type])
69 return 0; 70 return 0;
@@ -120,11 +121,12 @@ static int icmp_packet(struct nf_conn *ct,
120static int icmp_new(struct nf_conn *conntrack, 121static int icmp_new(struct nf_conn *conntrack,
121 const struct sk_buff *skb, unsigned int dataoff) 122 const struct sk_buff *skb, unsigned int dataoff)
122{ 123{
123 static u_int8_t valid_new[] 124 static const u_int8_t valid_new[] = {
124 = { [ICMP_ECHO] = 1, 125 [ICMP_ECHO] = 1,
125 [ICMP_TIMESTAMP] = 1, 126 [ICMP_TIMESTAMP] = 1,
126 [ICMP_INFO_REQUEST] = 1, 127 [ICMP_INFO_REQUEST] = 1,
127 [ICMP_ADDRESS] = 1 }; 128 [ICMP_ADDRESS] = 1
129 };
128 130
129 if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) 131 if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
130 || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { 132 || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
@@ -168,7 +170,7 @@ icmp_error_message(struct sk_buff *skb,
168 return -NF_ACCEPT; 170 return -NF_ACCEPT;
169 } 171 }
170 172
171 innerproto = nf_ct_find_proto(PF_INET, inside->ip.protocol); 173 innerproto = __nf_ct_proto_find(PF_INET, inside->ip.protocol);
172 dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp); 174 dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp);
173 /* Are they talking about one of our connections? */ 175 /* Are they talking about one of our connections? */
174 if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET, 176 if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
@@ -281,6 +283,60 @@ checksum_skipped:
281 return icmp_error_message(skb, ctinfo, hooknum); 283 return icmp_error_message(skb, ctinfo, hooknum);
282} 284}
283 285
286#if defined(CONFIG_NF_CT_NETLINK) || \
287 defined(CONFIG_NF_CT_NETLINK_MODULE)
288
289#include <linux/netfilter/nfnetlink.h>
290#include <linux/netfilter/nfnetlink_conntrack.h>
291
292static int icmp_tuple_to_nfattr(struct sk_buff *skb,
293 const struct nf_conntrack_tuple *t)
294{
295 NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t),
296 &t->src.u.icmp.id);
297 NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t),
298 &t->dst.u.icmp.type);
299 NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
300 &t->dst.u.icmp.code);
301
302 return 0;
303
304nfattr_failure:
305 return -1;
306}
307
308static const size_t cta_min_proto[CTA_PROTO_MAX] = {
309 [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t),
310 [CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t),
311 [CTA_PROTO_ICMP_ID-1] = sizeof(u_int16_t)
312};
313
314static int icmp_nfattr_to_tuple(struct nfattr *tb[],
315 struct nf_conntrack_tuple *tuple)
316{
317 if (!tb[CTA_PROTO_ICMP_TYPE-1]
318 || !tb[CTA_PROTO_ICMP_CODE-1]
319 || !tb[CTA_PROTO_ICMP_ID-1])
320 return -EINVAL;
321
322 if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
323 return -EINVAL;
324
325 tuple->dst.u.icmp.type =
326 *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]);
327 tuple->dst.u.icmp.code =
328 *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]);
329 tuple->src.u.icmp.id =
330 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
331
332 if (tuple->dst.u.icmp.type >= sizeof(invmap)
333 || !invmap[tuple->dst.u.icmp.type])
334 return -EINVAL;
335
336 return 0;
337}
338#endif
339
284struct nf_conntrack_protocol nf_conntrack_protocol_icmp = 340struct nf_conntrack_protocol nf_conntrack_protocol_icmp =
285{ 341{
286 .list = { NULL, NULL }, 342 .list = { NULL, NULL },
@@ -295,7 +351,12 @@ struct nf_conntrack_protocol nf_conntrack_protocol_icmp =
295 .new = icmp_new, 351 .new = icmp_new,
296 .error = icmp_error, 352 .error = icmp_error,
297 .destroy = NULL, 353 .destroy = NULL,
298 .me = NULL 354 .me = NULL,
355#if defined(CONFIG_NF_CT_NETLINK) || \
356 defined(CONFIG_NF_CT_NETLINK_MODULE)
357 .tuple_to_nfattr = icmp_tuple_to_nfattr,
358 .nfattr_to_tuple = icmp_nfattr_to_tuple,
359#endif
299}; 360};
300 361
301EXPORT_SYMBOL(nf_conntrack_protocol_icmp); 362EXPORT_SYMBOL(nf_conntrack_protocol_icmp);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 0d7dc668db46..39d49dc333a7 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -38,6 +38,7 @@
38#include <net/protocol.h> 38#include <net/protocol.h>
39#include <net/tcp.h> 39#include <net/tcp.h>
40#include <net/udp.h> 40#include <net/udp.h>
41#include <linux/inetdevice.h>
41#include <linux/proc_fs.h> 42#include <linux/proc_fs.h>
42#include <linux/seq_file.h> 43#include <linux/seq_file.h>
43#include <net/sock.h> 44#include <net/sock.h>
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 4b0d7e4d6269..165a4d81efa4 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -255,6 +255,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
255 kfree_skb(skb); 255 kfree_skb(skb);
256 return NET_RX_DROP; 256 return NET_RX_DROP;
257 } 257 }
258 nf_reset(skb);
258 259
259 skb_push(skb, skb->data - skb->nh.raw); 260 skb_push(skb, skb->data - skb->nh.raw);
260 261
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index a34e60ea48a1..e20be3331f67 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -173,10 +173,10 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
173 struct request_sock *req, 173 struct request_sock *req,
174 struct dst_entry *dst) 174 struct dst_entry *dst)
175{ 175{
176 struct tcp_sock *tp = tcp_sk(sk); 176 struct inet_connection_sock *icsk = inet_csk(sk);
177 struct sock *child; 177 struct sock *child;
178 178
179 child = tp->af_specific->syn_recv_sock(sk, skb, req, dst); 179 child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
180 if (child) 180 if (child)
181 inet_csk_reqsk_queue_add(sk, req, child); 181 inet_csk_reqsk_queue_add(sk, req, child);
182 else 182 else
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 01444a02b48b..16984d4a8a06 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -12,6 +12,7 @@
12#include <linux/sysctl.h> 12#include <linux/sysctl.h>
13#include <linux/config.h> 13#include <linux/config.h>
14#include <linux/igmp.h> 14#include <linux/igmp.h>
15#include <linux/inetdevice.h>
15#include <net/snmp.h> 16#include <net/snmp.h>
16#include <net/icmp.h> 17#include <net/icmp.h>
17#include <net/ip.h> 18#include <net/ip.h>
@@ -22,6 +23,7 @@
22extern int sysctl_ip_nonlocal_bind; 23extern int sysctl_ip_nonlocal_bind;
23 24
24#ifdef CONFIG_SYSCTL 25#ifdef CONFIG_SYSCTL
26static int zero;
25static int tcp_retr1_max = 255; 27static int tcp_retr1_max = 255;
26static int ip_local_port_range_min[] = { 1, 1 }; 28static int ip_local_port_range_min[] = { 1, 1 };
27static int ip_local_port_range_max[] = { 65535, 65535 }; 29static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -614,6 +616,15 @@ ctl_table ipv4_table[] = {
614 .strategy = &sysctl_jiffies 616 .strategy = &sysctl_jiffies
615 }, 617 },
616 { 618 {
619 .ctl_name = NET_IPV4_IPFRAG_MAX_DIST,
620 .procname = "ipfrag_max_dist",
621 .data = &sysctl_ipfrag_max_dist,
622 .maxlen = sizeof(int),
623 .mode = 0644,
624 .proc_handler = &proc_dointvec_minmax,
625 .extra1 = &zero
626 },
627 {
617 .ctl_name = NET_TCP_NO_METRICS_SAVE, 628 .ctl_name = NET_TCP_NO_METRICS_SAVE,
618 .procname = "tcp_no_metrics_save", 629 .procname = "tcp_no_metrics_save",
619 .data = &sysctl_tcp_nometrics_save, 630 .data = &sysctl_tcp_nometrics_save,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ef98b14ac56d..00aa80e93243 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1696,8 +1696,8 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
1696 int err = 0; 1696 int err = 0;
1697 1697
1698 if (level != SOL_TCP) 1698 if (level != SOL_TCP)
1699 return tp->af_specific->setsockopt(sk, level, optname, 1699 return icsk->icsk_af_ops->setsockopt(sk, level, optname,
1700 optval, optlen); 1700 optval, optlen);
1701 1701
1702 /* This is a string value all the others are int's */ 1702 /* This is a string value all the others are int's */
1703 if (optname == TCP_CONGESTION) { 1703 if (optname == TCP_CONGESTION) {
@@ -1914,7 +1914,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
1914 info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime); 1914 info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
1915 info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp); 1915 info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
1916 1916
1917 info->tcpi_pmtu = tp->pmtu_cookie; 1917 info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
1918 info->tcpi_rcv_ssthresh = tp->rcv_ssthresh; 1918 info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
1919 info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3; 1919 info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3;
1920 info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2; 1920 info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2;
@@ -1939,8 +1939,8 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
1939 int val, len; 1939 int val, len;
1940 1940
1941 if (level != SOL_TCP) 1941 if (level != SOL_TCP)
1942 return tp->af_specific->getsockopt(sk, level, optname, 1942 return icsk->icsk_af_ops->getsockopt(sk, level, optname,
1943 optval, optlen); 1943 optval, optlen);
1944 1944
1945 if (get_user(len, optlen)) 1945 if (get_user(len, optlen))
1946 return -EFAULT; 1946 return -EFAULT;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 1d0cd86621b1..035f2092d73a 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -30,8 +30,6 @@ static int fast_convergence = 1;
30static int max_increment = 16; 30static int max_increment = 16;
31static int low_window = 14; 31static int low_window = 14;
32static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */ 32static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
33static int low_utilization_threshold = 153;
34static int low_utilization_period = 2;
35static int initial_ssthresh = 100; 33static int initial_ssthresh = 100;
36static int smooth_part = 20; 34static int smooth_part = 20;
37 35
@@ -43,10 +41,6 @@ module_param(low_window, int, 0644);
43MODULE_PARM_DESC(low_window, "lower bound on congestion window (for TCP friendliness)"); 41MODULE_PARM_DESC(low_window, "lower bound on congestion window (for TCP friendliness)");
44module_param(beta, int, 0644); 42module_param(beta, int, 0644);
45MODULE_PARM_DESC(beta, "beta for multiplicative increase"); 43MODULE_PARM_DESC(beta, "beta for multiplicative increase");
46module_param(low_utilization_threshold, int, 0644);
47MODULE_PARM_DESC(low_utilization_threshold, "percent (scaled by 1024) for low utilization mode");
48module_param(low_utilization_period, int, 0644);
49MODULE_PARM_DESC(low_utilization_period, "if average delay exceeds then goto to low utilization mode (seconds)");
50module_param(initial_ssthresh, int, 0644); 44module_param(initial_ssthresh, int, 0644);
51MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold"); 45MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
52module_param(smooth_part, int, 0644); 46module_param(smooth_part, int, 0644);
@@ -60,11 +54,6 @@ struct bictcp {
60 u32 loss_cwnd; /* congestion window at last loss */ 54 u32 loss_cwnd; /* congestion window at last loss */
61 u32 last_cwnd; /* the last snd_cwnd */ 55 u32 last_cwnd; /* the last snd_cwnd */
62 u32 last_time; /* time when updated last_cwnd */ 56 u32 last_time; /* time when updated last_cwnd */
63 u32 delay_min; /* min delay */
64 u32 delay_max; /* max delay */
65 u32 last_delay;
66 u8 low_utilization;/* 0: high; 1: low */
67 u32 low_utilization_start; /* starting time of low utilization detection*/
68 u32 epoch_start; /* beginning of an epoch */ 57 u32 epoch_start; /* beginning of an epoch */
69#define ACK_RATIO_SHIFT 4 58#define ACK_RATIO_SHIFT 4
70 u32 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ 59 u32 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */
@@ -77,11 +66,6 @@ static inline void bictcp_reset(struct bictcp *ca)
77 ca->loss_cwnd = 0; 66 ca->loss_cwnd = 0;
78 ca->last_cwnd = 0; 67 ca->last_cwnd = 0;
79 ca->last_time = 0; 68 ca->last_time = 0;
80 ca->delay_min = 0;
81 ca->delay_max = 0;
82 ca->last_delay = 0;
83 ca->low_utilization = 0;
84 ca->low_utilization_start = 0;
85 ca->epoch_start = 0; 69 ca->epoch_start = 0;
86 ca->delayed_ack = 2 << ACK_RATIO_SHIFT; 70 ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
87} 71}
@@ -143,8 +127,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
143 } 127 }
144 128
145 /* if in slow start or link utilization is very low */ 129 /* if in slow start or link utilization is very low */
146 if ( ca->loss_cwnd == 0 || 130 if (ca->loss_cwnd == 0) {
147 (cwnd > ca->loss_cwnd && ca->low_utilization)) {
148 if (ca->cnt > 20) /* increase cwnd 5% per RTT */ 131 if (ca->cnt > 20) /* increase cwnd 5% per RTT */
149 ca->cnt = 20; 132 ca->cnt = 20;
150 } 133 }
@@ -154,69 +137,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
154 ca->cnt = 1; 137 ca->cnt = 1;
155} 138}
156 139
157
158/* Detect low utilization in congestion avoidance */
159static inline void bictcp_low_utilization(struct sock *sk, int flag)
160{
161 const struct tcp_sock *tp = tcp_sk(sk);
162 struct bictcp *ca = inet_csk_ca(sk);
163 u32 dist, delay;
164
165 /* No time stamp */
166 if (!(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) ||
167 /* Discard delay samples right after fast recovery */
168 tcp_time_stamp < ca->epoch_start + HZ ||
169 /* this delay samples may not be accurate */
170 flag == 0) {
171 ca->last_delay = 0;
172 goto notlow;
173 }
174
175 delay = ca->last_delay<<3; /* use the same scale as tp->srtt*/
176 ca->last_delay = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
177 if (delay == 0) /* no previous delay sample */
178 goto notlow;
179
180 /* first time call or link delay decreases */
181 if (ca->delay_min == 0 || ca->delay_min > delay) {
182 ca->delay_min = ca->delay_max = delay;
183 goto notlow;
184 }
185
186 if (ca->delay_max < delay)
187 ca->delay_max = delay;
188
189 /* utilization is low, if avg delay < dist*threshold
190 for checking_period time */
191 dist = ca->delay_max - ca->delay_min;
192 if (dist <= ca->delay_min>>6 ||
193 tp->srtt - ca->delay_min >= (dist*low_utilization_threshold)>>10)
194 goto notlow;
195
196 if (ca->low_utilization_start == 0) {
197 ca->low_utilization = 0;
198 ca->low_utilization_start = tcp_time_stamp;
199 } else if ((s32)(tcp_time_stamp - ca->low_utilization_start)
200 > low_utilization_period*HZ) {
201 ca->low_utilization = 1;
202 }
203
204 return;
205
206 notlow:
207 ca->low_utilization = 0;
208 ca->low_utilization_start = 0;
209
210}
211
212static void bictcp_cong_avoid(struct sock *sk, u32 ack, 140static void bictcp_cong_avoid(struct sock *sk, u32 ack,
213 u32 seq_rtt, u32 in_flight, int data_acked) 141 u32 seq_rtt, u32 in_flight, int data_acked)
214{ 142{
215 struct tcp_sock *tp = tcp_sk(sk); 143 struct tcp_sock *tp = tcp_sk(sk);
216 struct bictcp *ca = inet_csk_ca(sk); 144 struct bictcp *ca = inet_csk_ca(sk);
217 145
218 bictcp_low_utilization(sk, data_acked);
219
220 if (!tcp_is_cwnd_limited(sk, in_flight)) 146 if (!tcp_is_cwnd_limited(sk, in_flight))
221 return; 147 return;
222 148
@@ -249,11 +175,6 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk)
249 175
250 ca->epoch_start = 0; /* end of epoch */ 176 ca->epoch_start = 0; /* end of epoch */
251 177
252 /* in case of wrong delay_max*/
253 if (ca->delay_min > 0 && ca->delay_max > ca->delay_min)
254 ca->delay_max = ca->delay_min
255 + ((ca->delay_max - ca->delay_min)* 90) / 100;
256
257 /* Wmax and fast convergence */ 178 /* Wmax and fast convergence */
258 if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence) 179 if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
259 ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta)) 180 ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
@@ -289,14 +210,14 @@ static void bictcp_state(struct sock *sk, u8 new_state)
289 bictcp_reset(inet_csk_ca(sk)); 210 bictcp_reset(inet_csk_ca(sk));
290} 211}
291 212
292/* Track delayed acknowledgement ratio using sliding window 213/* Track delayed acknowledgment ratio using sliding window
293 * ratio = (15*ratio + sample) / 16 214 * ratio = (15*ratio + sample) / 16
294 */ 215 */
295static void bictcp_acked(struct sock *sk, u32 cnt) 216static void bictcp_acked(struct sock *sk, u32 cnt)
296{ 217{
297 const struct inet_connection_sock *icsk = inet_csk(sk); 218 const struct inet_connection_sock *icsk = inet_csk(sk);
298 219
299 if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) { 220 if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) {
300 struct bictcp *ca = inet_csk_ca(sk); 221 struct bictcp *ca = inet_csk_ca(sk);
301 cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; 222 cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
302 ca->delayed_ack += cnt; 223 ca->delayed_ack += cnt;
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index c7cc62c8dc12..e688c687d62d 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -174,6 +174,34 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
174 return err; 174 return err;
175} 175}
176 176
177
178/*
179 * Linear increase during slow start
180 */
181void tcp_slow_start(struct tcp_sock *tp)
182{
183 if (sysctl_tcp_abc) {
184 /* RFC3465: Slow Start
185 * TCP sender SHOULD increase cwnd by the number of
186 * previously unacknowledged bytes ACKed by each incoming
187 * acknowledgment, provided the increase is not more than L
188 */
189 if (tp->bytes_acked < tp->mss_cache)
190 return;
191
192 /* We MAY increase by 2 if discovered delayed ack */
193 if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) {
194 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
195 tp->snd_cwnd++;
196 }
197 }
198 tp->bytes_acked = 0;
199
200 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
201 tp->snd_cwnd++;
202}
203EXPORT_SYMBOL_GPL(tcp_slow_start);
204
177/* 205/*
178 * TCP Reno congestion control 206 * TCP Reno congestion control
179 * This is special case used for fallback as well. 207 * This is special case used for fallback as well.
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
new file mode 100644
index 000000000000..31a4986dfbf7
--- /dev/null
+++ b/net/ipv4/tcp_cubic.c
@@ -0,0 +1,411 @@
1/*
2 * TCP CUBIC: Binary Increase Congestion control for TCP v2.0
3 *
4 * This is from the implementation of CUBIC TCP in
5 * Injong Rhee, Lisong Xu.
6 * "CUBIC: A New TCP-Friendly High-Speed TCP Variant
7 * in PFLDnet 2005
8 * Available from:
9 * http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf
10 *
11 * Unless CUBIC is enabled and congestion window is large
12 * this behaves the same as the original Reno.
13 */
14
15#include <linux/config.h>
16#include <linux/mm.h>
17#include <linux/module.h>
18#include <net/tcp.h>
19#include <asm/div64.h>
20
21#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation
22 * max_cwnd = snd_cwnd * beta
23 */
24#define BICTCP_B 4 /*
25 * In binary search,
26 * go to point (max+min)/N
27 */
28#define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */
29
30static int fast_convergence = 1;
31static int max_increment = 16;
32static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
33static int initial_ssthresh = 100;
34static int bic_scale = 41;
35static int tcp_friendliness = 1;
36
37static u32 cube_rtt_scale;
38static u32 beta_scale;
39static u64 cube_factor;
40
41/* Note parameters that are used for precomputing scale factors are read-only */
42module_param(fast_convergence, int, 0644);
43MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");
44module_param(max_increment, int, 0644);
45MODULE_PARM_DESC(max_increment, "Limit on increment allowed during binary search");
46module_param(beta, int, 0444);
47MODULE_PARM_DESC(beta, "beta for multiplicative increase");
48module_param(initial_ssthresh, int, 0644);
49MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
50module_param(bic_scale, int, 0444);
51MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)");
52module_param(tcp_friendliness, int, 0644);
53MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
54
55#include <asm/div64.h>
56
57/* BIC TCP Parameters */
58struct bictcp {
59 u32 cnt; /* increase cwnd by 1 after ACKs */
60 u32 last_max_cwnd; /* last maximum snd_cwnd */
61 u32 loss_cwnd; /* congestion window at last loss */
62 u32 last_cwnd; /* the last snd_cwnd */
63 u32 last_time; /* time when updated last_cwnd */
64 u32 bic_origin_point;/* origin point of bic function */
65 u32 bic_K; /* time to origin point from the beginning of the current epoch */
66 u32 delay_min; /* min delay */
67 u32 epoch_start; /* beginning of an epoch */
68 u32 ack_cnt; /* number of acks */
69 u32 tcp_cwnd; /* estimated tcp cwnd */
70#define ACK_RATIO_SHIFT 4
71 u32 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */
72};
73
74static inline void bictcp_reset(struct bictcp *ca)
75{
76 ca->cnt = 0;
77 ca->last_max_cwnd = 0;
78 ca->loss_cwnd = 0;
79 ca->last_cwnd = 0;
80 ca->last_time = 0;
81 ca->bic_origin_point = 0;
82 ca->bic_K = 0;
83 ca->delay_min = 0;
84 ca->epoch_start = 0;
85 ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
86 ca->ack_cnt = 0;
87 ca->tcp_cwnd = 0;
88}
89
90static void bictcp_init(struct sock *sk)
91{
92 bictcp_reset(inet_csk_ca(sk));
93 if (initial_ssthresh)
94 tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
95}
96
97/* 64bit divisor, dividend and result. dynamic precision */
98static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
99{
100 u_int32_t d = divisor;
101
102 if (divisor > 0xffffffffULL) {
103 unsigned int shift = fls(divisor >> 32);
104
105 d = divisor >> shift;
106 dividend >>= shift;
107 }
108
109 /* avoid 64 bit division if possible */
110 if (dividend >> 32)
111 do_div(dividend, d);
112 else
113 dividend = (uint32_t) dividend / d;
114
115 return dividend;
116}
117
118/*
119 * calculate the cubic root of x using Newton-Raphson
120 */
121static u32 cubic_root(u64 a)
122{
123 u32 x, x1;
124
125 /* Initial estimate is based on:
126 * cbrt(x) = exp(log(x) / 3)
127 */
128 x = 1u << (fls64(a)/3);
129
130 /*
131 * Iteration based on:
132 * 2
133 * x = ( 2 * x + a / x ) / 3
134 * k+1 k k
135 */
136 do {
137 x1 = x;
138 x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3;
139 } while (abs(x1 - x) > 1);
140
141 return x;
142}
143
144/*
145 * Compute congestion window to use.
146 */
147static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
148{
149 u64 offs;
150 u32 delta, t, bic_target, min_cnt, max_cnt;
151
152 ca->ack_cnt++; /* count the number of ACKs */
153
154 if (ca->last_cwnd == cwnd &&
155 (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32)
156 return;
157
158 ca->last_cwnd = cwnd;
159 ca->last_time = tcp_time_stamp;
160
161 if (ca->epoch_start == 0) {
162 ca->epoch_start = tcp_time_stamp; /* record the beginning of an epoch */
163 ca->ack_cnt = 1; /* start counting */
164 ca->tcp_cwnd = cwnd; /* syn with cubic */
165
166 if (ca->last_max_cwnd <= cwnd) {
167 ca->bic_K = 0;
168 ca->bic_origin_point = cwnd;
169 } else {
170 /* Compute new K based on
171 * (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ)
172 */
173 ca->bic_K = cubic_root(cube_factor
174 * (ca->last_max_cwnd - cwnd));
175 ca->bic_origin_point = ca->last_max_cwnd;
176 }
177 }
178
179 /* cubic function - calc*/
180 /* calculate c * time^3 / rtt,
181 * while considering overflow in calculation of time^3
182 * (so time^3 is done by using 64 bit)
183 * and without the support of division of 64bit numbers
184 * (so all divisions are done by using 32 bit)
185 * also NOTE the unit of those veriables
186 * time = (t - K) / 2^bictcp_HZ
187 * c = bic_scale >> 10
188 * rtt = (srtt >> 3) / HZ
189 * !!! The following code does not have overflow problems,
190 * if the cwnd < 1 million packets !!!
191 */
192
193 /* change the unit from HZ to bictcp_HZ */
194 t = ((tcp_time_stamp + ca->delay_min - ca->epoch_start)
195 << BICTCP_HZ) / HZ;
196
197 if (t < ca->bic_K) /* t - K */
198 offs = ca->bic_K - t;
199 else
200 offs = t - ca->bic_K;
201
202 /* c/rtt * (t-K)^3 */
203 delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ);
204 if (t < ca->bic_K) /* below origin*/
205 bic_target = ca->bic_origin_point - delta;
206 else /* above origin*/
207 bic_target = ca->bic_origin_point + delta;
208
209 /* cubic function - calc bictcp_cnt*/
210 if (bic_target > cwnd) {
211 ca->cnt = cwnd / (bic_target - cwnd);
212 } else {
213 ca->cnt = 100 * cwnd; /* very small increment*/
214 }
215
216 if (ca->delay_min > 0) {
217 /* max increment = Smax * rtt / 0.1 */
218 min_cnt = (cwnd * HZ * 8)/(10 * max_increment * ca->delay_min);
219 if (ca->cnt < min_cnt)
220 ca->cnt = min_cnt;
221 }
222
223 /* slow start and low utilization */
224 if (ca->loss_cwnd == 0) /* could be aggressive in slow start */
225 ca->cnt = 50;
226
227 /* TCP Friendly */
228 if (tcp_friendliness) {
229 u32 scale = beta_scale;
230 delta = (cwnd * scale) >> 3;
231 while (ca->ack_cnt > delta) { /* update tcp cwnd */
232 ca->ack_cnt -= delta;
233 ca->tcp_cwnd++;
234 }
235
236 if (ca->tcp_cwnd > cwnd){ /* if bic is slower than tcp */
237 delta = ca->tcp_cwnd - cwnd;
238 max_cnt = cwnd / delta;
239 if (ca->cnt > max_cnt)
240 ca->cnt = max_cnt;
241 }
242 }
243
244 ca->cnt = (ca->cnt << ACK_RATIO_SHIFT) / ca->delayed_ack;
245 if (ca->cnt == 0) /* cannot be zero */
246 ca->cnt = 1;
247}
248
249
250/* Keep track of minimum rtt */
251static inline void measure_delay(struct sock *sk)
252{
253 const struct tcp_sock *tp = tcp_sk(sk);
254 struct bictcp *ca = inet_csk_ca(sk);
255 u32 delay;
256
257 /* No time stamp */
258 if (!(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) ||
259 /* Discard delay samples right after fast recovery */
260 (s32)(tcp_time_stamp - ca->epoch_start) < HZ)
261 return;
262
263 delay = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
264 if (delay == 0)
265 delay = 1;
266
267 /* first time call or link delay decreases */
268 if (ca->delay_min == 0 || ca->delay_min > delay)
269 ca->delay_min = delay;
270}
271
272static void bictcp_cong_avoid(struct sock *sk, u32 ack,
273 u32 seq_rtt, u32 in_flight, int data_acked)
274{
275 struct tcp_sock *tp = tcp_sk(sk);
276 struct bictcp *ca = inet_csk_ca(sk);
277
278 if (data_acked)
279 measure_delay(sk);
280
281 if (!tcp_is_cwnd_limited(sk, in_flight))
282 return;
283
284 if (tp->snd_cwnd <= tp->snd_ssthresh)
285 tcp_slow_start(tp);
286 else {
287 bictcp_update(ca, tp->snd_cwnd);
288
289 /* In dangerous area, increase slowly.
290 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
291 */
292 if (tp->snd_cwnd_cnt >= ca->cnt) {
293 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
294 tp->snd_cwnd++;
295 tp->snd_cwnd_cnt = 0;
296 } else
297 tp->snd_cwnd_cnt++;
298 }
299
300}
301
302static u32 bictcp_recalc_ssthresh(struct sock *sk)
303{
304 const struct tcp_sock *tp = tcp_sk(sk);
305 struct bictcp *ca = inet_csk_ca(sk);
306
307 ca->epoch_start = 0; /* end of epoch */
308
309 /* Wmax and fast convergence */
310 if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
311 ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
312 / (2 * BICTCP_BETA_SCALE);
313 else
314 ca->last_max_cwnd = tp->snd_cwnd;
315
316 ca->loss_cwnd = tp->snd_cwnd;
317
318 return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
319}
320
321static u32 bictcp_undo_cwnd(struct sock *sk)
322{
323 struct bictcp *ca = inet_csk_ca(sk);
324
325 return max(tcp_sk(sk)->snd_cwnd, ca->last_max_cwnd);
326}
327
328static u32 bictcp_min_cwnd(struct sock *sk)
329{
330 return tcp_sk(sk)->snd_ssthresh;
331}
332
333static void bictcp_state(struct sock *sk, u8 new_state)
334{
335 if (new_state == TCP_CA_Loss)
336 bictcp_reset(inet_csk_ca(sk));
337}
338
339/* Track delayed acknowledgment ratio using sliding window
340 * ratio = (15*ratio + sample) / 16
341 */
342static void bictcp_acked(struct sock *sk, u32 cnt)
343{
344 const struct inet_connection_sock *icsk = inet_csk(sk);
345
346 if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) {
347 struct bictcp *ca = inet_csk_ca(sk);
348 cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
349 ca->delayed_ack += cnt;
350 }
351}
352
353
354static struct tcp_congestion_ops cubictcp = {
355 .init = bictcp_init,
356 .ssthresh = bictcp_recalc_ssthresh,
357 .cong_avoid = bictcp_cong_avoid,
358 .set_state = bictcp_state,
359 .undo_cwnd = bictcp_undo_cwnd,
360 .min_cwnd = bictcp_min_cwnd,
361 .pkts_acked = bictcp_acked,
362 .owner = THIS_MODULE,
363 .name = "cubic",
364};
365
366static int __init cubictcp_register(void)
367{
368 BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
369
370 /* Precompute a bunch of the scaling factors that are used per-packet
371 * based on SRTT of 100ms
372 */
373
374 beta_scale = 8*(BICTCP_BETA_SCALE+beta)/ 3 / (BICTCP_BETA_SCALE - beta);
375
376 cube_rtt_scale = (bic_scale << 3) / 10; /* 1024*c/rtt */
377
378 /* calculate the "K" for (wmax-cwnd) = c/rtt * K^3
379 * so K = cubic_root( (wmax-cwnd)*rtt/c )
380 * the unit of K is bictcp_HZ=2^10, not HZ
381 *
382 * c = bic_scale >> 10
383 * rtt = 100ms
384 *
385 * the following code has been designed and tested for
386 * cwnd < 1 million packets
387 * RTT < 100 seconds
388 * HZ < 1,000,00 (corresponding to 10 nano-second)
389 */
390
391 /* 1/c * 2^2*bictcp_HZ * srtt */
392 cube_factor = 1ull << (10+3*BICTCP_HZ); /* 2^40 */
393
394 /* divide by bic_scale and by constant Srtt (100ms) */
395 do_div(cube_factor, bic_scale * 10);
396
397 return tcp_register_congestion_control(&cubictcp);
398}
399
400static void __exit cubictcp_unregister(void)
401{
402 tcp_unregister_congestion_control(&cubictcp);
403}
404
405module_init(cubictcp_register);
406module_exit(cubictcp_unregister);
407
408MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
409MODULE_LICENSE("GPL");
410MODULE_DESCRIPTION("CUBIC TCP");
411MODULE_VERSION("2.0");
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bf2e23086bce..a97ed5416c28 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -115,8 +115,8 @@ int sysctl_tcp_abc = 1;
115/* Adapt the MSS value used to make delayed ack decision to the 115/* Adapt the MSS value used to make delayed ack decision to the
116 * real world. 116 * real world.
117 */ 117 */
118static inline void tcp_measure_rcv_mss(struct sock *sk, 118static void tcp_measure_rcv_mss(struct sock *sk,
119 const struct sk_buff *skb) 119 const struct sk_buff *skb)
120{ 120{
121 struct inet_connection_sock *icsk = inet_csk(sk); 121 struct inet_connection_sock *icsk = inet_csk(sk);
122 const unsigned int lss = icsk->icsk_ack.last_seg_size; 122 const unsigned int lss = icsk->icsk_ack.last_seg_size;
@@ -246,8 +246,8 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
246 return 0; 246 return 0;
247} 247}
248 248
249static inline void tcp_grow_window(struct sock *sk, struct tcp_sock *tp, 249static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
250 struct sk_buff *skb) 250 struct sk_buff *skb)
251{ 251{
252 /* Check #1 */ 252 /* Check #1 */
253 if (tp->rcv_ssthresh < tp->window_clamp && 253 if (tp->rcv_ssthresh < tp->window_clamp &&
@@ -341,6 +341,26 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
341 tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); 341 tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
342} 342}
343 343
344
345/* Initialize RCV_MSS value.
346 * RCV_MSS is an our guess about MSS used by the peer.
347 * We haven't any direct information about the MSS.
348 * It's better to underestimate the RCV_MSS rather than overestimate.
349 * Overestimations make us ACKing less frequently than needed.
350 * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss().
351 */
352void tcp_initialize_rcv_mss(struct sock *sk)
353{
354 struct tcp_sock *tp = tcp_sk(sk);
355 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
356
357 hint = min(hint, tp->rcv_wnd/2);
358 hint = min(hint, TCP_MIN_RCVMSS);
359 hint = max(hint, TCP_MIN_MSS);
360
361 inet_csk(sk)->icsk_ack.rcv_mss = hint;
362}
363
344/* Receiver "autotuning" code. 364/* Receiver "autotuning" code.
345 * 365 *
346 * The algorithm for RTT estimation w/o timestamps is based on 366 * The algorithm for RTT estimation w/o timestamps is based on
@@ -735,6 +755,27 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
735 return min_t(__u32, cwnd, tp->snd_cwnd_clamp); 755 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
736} 756}
737 757
758/* Set slow start threshold and cwnd not falling to slow start */
759void tcp_enter_cwr(struct sock *sk)
760{
761 struct tcp_sock *tp = tcp_sk(sk);
762
763 tp->prior_ssthresh = 0;
764 tp->bytes_acked = 0;
765 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
766 tp->undo_marker = 0;
767 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
768 tp->snd_cwnd = min(tp->snd_cwnd,
769 tcp_packets_in_flight(tp) + 1U);
770 tp->snd_cwnd_cnt = 0;
771 tp->high_seq = tp->snd_nxt;
772 tp->snd_cwnd_stamp = tcp_time_stamp;
773 TCP_ECN_queue_cwr(tp);
774
775 tcp_set_ca_state(sk, TCP_CA_CWR);
776 }
777}
778
738/* Initialize metrics on socket. */ 779/* Initialize metrics on socket. */
739 780
740static void tcp_init_metrics(struct sock *sk) 781static void tcp_init_metrics(struct sock *sk)
@@ -2070,8 +2111,8 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
2070 tcp_ack_no_tstamp(sk, seq_rtt, flag); 2111 tcp_ack_no_tstamp(sk, seq_rtt, flag);
2071} 2112}
2072 2113
2073static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, 2114static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
2074 u32 in_flight, int good) 2115 u32 in_flight, int good)
2075{ 2116{
2076 const struct inet_connection_sock *icsk = inet_csk(sk); 2117 const struct inet_connection_sock *icsk = inet_csk(sk);
2077 icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good); 2118 icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good);
@@ -2082,7 +2123,7 @@ static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
2082 * RFC2988 recommends to restart timer to now+rto. 2123 * RFC2988 recommends to restart timer to now+rto.
2083 */ 2124 */
2084 2125
2085static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp) 2126static void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
2086{ 2127{
2087 if (!tp->packets_out) { 2128 if (!tp->packets_out) {
2088 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); 2129 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
@@ -2147,7 +2188,7 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
2147 return acked; 2188 return acked;
2148} 2189}
2149 2190
2150static inline u32 tcp_usrtt(const struct sk_buff *skb) 2191static u32 tcp_usrtt(const struct sk_buff *skb)
2151{ 2192{
2152 struct timeval tv, now; 2193 struct timeval tv, now;
2153 2194
@@ -2342,7 +2383,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
2342 2383
2343 if (nwin > tp->max_window) { 2384 if (nwin > tp->max_window) {
2344 tp->max_window = nwin; 2385 tp->max_window = nwin;
2345 tcp_sync_mss(sk, tp->pmtu_cookie); 2386 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
2346 } 2387 }
2347 } 2388 }
2348 } 2389 }
@@ -2583,8 +2624,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
2583/* Fast parse options. This hopes to only see timestamps. 2624/* Fast parse options. This hopes to only see timestamps.
2584 * If it is wrong it falls back on tcp_parse_options(). 2625 * If it is wrong it falls back on tcp_parse_options().
2585 */ 2626 */
2586static inline int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, 2627static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
2587 struct tcp_sock *tp) 2628 struct tcp_sock *tp)
2588{ 2629{
2589 if (th->doff == sizeof(struct tcphdr)>>2) { 2630 if (th->doff == sizeof(struct tcphdr)>>2) {
2590 tp->rx_opt.saw_tstamp = 0; 2631 tp->rx_opt.saw_tstamp = 0;
@@ -2804,8 +2845,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
2804 } 2845 }
2805} 2846}
2806 2847
2807static __inline__ int 2848static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
2808tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
2809{ 2849{
2810 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) { 2850 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
2811 if (before(seq, sp->start_seq)) 2851 if (before(seq, sp->start_seq))
@@ -2817,7 +2857,7 @@ tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
2817 return 0; 2857 return 0;
2818} 2858}
2819 2859
2820static inline void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq) 2860static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
2821{ 2861{
2822 if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) { 2862 if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) {
2823 if (before(seq, tp->rcv_nxt)) 2863 if (before(seq, tp->rcv_nxt))
@@ -2832,7 +2872,7 @@ static inline void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
2832 } 2872 }
2833} 2873}
2834 2874
2835static inline void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq) 2875static void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq)
2836{ 2876{
2837 if (!tp->rx_opt.dsack) 2877 if (!tp->rx_opt.dsack)
2838 tcp_dsack_set(tp, seq, end_seq); 2878 tcp_dsack_set(tp, seq, end_seq);
@@ -2890,7 +2930,7 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
2890 } 2930 }
2891} 2931}
2892 2932
2893static __inline__ void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2) 2933static inline void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2)
2894{ 2934{
2895 __u32 tmp; 2935 __u32 tmp;
2896 2936
@@ -3307,7 +3347,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
3307 int offset = start - TCP_SKB_CB(skb)->seq; 3347 int offset = start - TCP_SKB_CB(skb)->seq;
3308 int size = TCP_SKB_CB(skb)->end_seq - start; 3348 int size = TCP_SKB_CB(skb)->end_seq - start;
3309 3349
3310 if (offset < 0) BUG(); 3350 BUG_ON(offset < 0);
3311 if (size > 0) { 3351 if (size > 0) {
3312 size = min(copy, size); 3352 size = min(copy, size);
3313 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size)) 3353 if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
@@ -3455,7 +3495,7 @@ void tcp_cwnd_application_limited(struct sock *sk)
3455 tp->snd_cwnd_stamp = tcp_time_stamp; 3495 tp->snd_cwnd_stamp = tcp_time_stamp;
3456} 3496}
3457 3497
3458static inline int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp) 3498static int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp)
3459{ 3499{
3460 /* If the user specified a specific send buffer setting, do 3500 /* If the user specified a specific send buffer setting, do
3461 * not modify it. 3501 * not modify it.
@@ -3502,7 +3542,7 @@ static void tcp_new_space(struct sock *sk)
3502 sk->sk_write_space(sk); 3542 sk->sk_write_space(sk);
3503} 3543}
3504 3544
3505static inline void tcp_check_space(struct sock *sk) 3545static void tcp_check_space(struct sock *sk)
3506{ 3546{
3507 if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) { 3547 if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
3508 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK); 3548 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
@@ -3512,7 +3552,7 @@ static inline void tcp_check_space(struct sock *sk)
3512 } 3552 }
3513} 3553}
3514 3554
3515static __inline__ void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp) 3555static inline void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp)
3516{ 3556{
3517 tcp_push_pending_frames(sk, tp); 3557 tcp_push_pending_frames(sk, tp);
3518 tcp_check_space(sk); 3558 tcp_check_space(sk);
@@ -3544,7 +3584,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
3544 } 3584 }
3545} 3585}
3546 3586
3547static __inline__ void tcp_ack_snd_check(struct sock *sk) 3587static inline void tcp_ack_snd_check(struct sock *sk)
3548{ 3588{
3549 if (!inet_csk_ack_scheduled(sk)) { 3589 if (!inet_csk_ack_scheduled(sk)) {
3550 /* We sent a data segment already. */ 3590 /* We sent a data segment already. */
@@ -3692,8 +3732,7 @@ static int __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
3692 return result; 3732 return result;
3693} 3733}
3694 3734
3695static __inline__ int 3735static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
3696tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
3697{ 3736{
3698 return skb->ip_summed != CHECKSUM_UNNECESSARY && 3737 return skb->ip_summed != CHECKSUM_UNNECESSARY &&
3699 __tcp_checksum_complete_user(sk, skb); 3738 __tcp_checksum_complete_user(sk, skb);
@@ -3967,12 +4006,12 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
3967 struct tcphdr *th, unsigned len) 4006 struct tcphdr *th, unsigned len)
3968{ 4007{
3969 struct tcp_sock *tp = tcp_sk(sk); 4008 struct tcp_sock *tp = tcp_sk(sk);
4009 struct inet_connection_sock *icsk = inet_csk(sk);
3970 int saved_clamp = tp->rx_opt.mss_clamp; 4010 int saved_clamp = tp->rx_opt.mss_clamp;
3971 4011
3972 tcp_parse_options(skb, &tp->rx_opt, 0); 4012 tcp_parse_options(skb, &tp->rx_opt, 0);
3973 4013
3974 if (th->ack) { 4014 if (th->ack) {
3975 struct inet_connection_sock *icsk;
3976 /* rfc793: 4015 /* rfc793:
3977 * "If the state is SYN-SENT then 4016 * "If the state is SYN-SENT then
3978 * first check the ACK bit 4017 * first check the ACK bit
@@ -4061,7 +4100,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
4061 if (tp->rx_opt.sack_ok && sysctl_tcp_fack) 4100 if (tp->rx_opt.sack_ok && sysctl_tcp_fack)
4062 tp->rx_opt.sack_ok |= 2; 4101 tp->rx_opt.sack_ok |= 2;
4063 4102
4064 tcp_sync_mss(sk, tp->pmtu_cookie); 4103 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
4065 tcp_initialize_rcv_mss(sk); 4104 tcp_initialize_rcv_mss(sk);
4066 4105
4067 /* Remember, tcp_poll() does not lock socket! 4106 /* Remember, tcp_poll() does not lock socket!
@@ -4072,7 +4111,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
4072 tcp_set_state(sk, TCP_ESTABLISHED); 4111 tcp_set_state(sk, TCP_ESTABLISHED);
4073 4112
4074 /* Make sure socket is routed, for correct metrics. */ 4113 /* Make sure socket is routed, for correct metrics. */
4075 tp->af_specific->rebuild_header(sk); 4114 icsk->icsk_af_ops->rebuild_header(sk);
4076 4115
4077 tcp_init_metrics(sk); 4116 tcp_init_metrics(sk);
4078 4117
@@ -4098,8 +4137,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
4098 sk_wake_async(sk, 0, POLL_OUT); 4137 sk_wake_async(sk, 0, POLL_OUT);
4099 } 4138 }
4100 4139
4101 icsk = inet_csk(sk);
4102
4103 if (sk->sk_write_pending || 4140 if (sk->sk_write_pending ||
4104 icsk->icsk_accept_queue.rskq_defer_accept || 4141 icsk->icsk_accept_queue.rskq_defer_accept ||
4105 icsk->icsk_ack.pingpong) { 4142 icsk->icsk_ack.pingpong) {
@@ -4173,7 +4210,7 @@ discard:
4173 if (tp->ecn_flags&TCP_ECN_OK) 4210 if (tp->ecn_flags&TCP_ECN_OK)
4174 sock_set_flag(sk, SOCK_NO_LARGESEND); 4211 sock_set_flag(sk, SOCK_NO_LARGESEND);
4175 4212
4176 tcp_sync_mss(sk, tp->pmtu_cookie); 4213 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
4177 tcp_initialize_rcv_mss(sk); 4214 tcp_initialize_rcv_mss(sk);
4178 4215
4179 4216
@@ -4220,6 +4257,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
4220 struct tcphdr *th, unsigned len) 4257 struct tcphdr *th, unsigned len)
4221{ 4258{
4222 struct tcp_sock *tp = tcp_sk(sk); 4259 struct tcp_sock *tp = tcp_sk(sk);
4260 struct inet_connection_sock *icsk = inet_csk(sk);
4223 int queued = 0; 4261 int queued = 0;
4224 4262
4225 tp->rx_opt.saw_tstamp = 0; 4263 tp->rx_opt.saw_tstamp = 0;
@@ -4236,7 +4274,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
4236 goto discard; 4274 goto discard;
4237 4275
4238 if(th->syn) { 4276 if(th->syn) {
4239 if(tp->af_specific->conn_request(sk, skb) < 0) 4277 if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
4240 return 1; 4278 return 1;
4241 4279
4242 /* Now we have several options: In theory there is 4280 /* Now we have several options: In theory there is
@@ -4349,7 +4387,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
4349 /* Make sure socket is routed, for 4387 /* Make sure socket is routed, for
4350 * correct metrics. 4388 * correct metrics.
4351 */ 4389 */
4352 tp->af_specific->rebuild_header(sk); 4390 icsk->icsk_af_ops->rebuild_header(sk);
4353 4391
4354 tcp_init_metrics(sk); 4392 tcp_init_metrics(sk);
4355 4393
@@ -4475,3 +4513,4 @@ EXPORT_SYMBOL(sysctl_tcp_abc);
4475EXPORT_SYMBOL(tcp_parse_options); 4513EXPORT_SYMBOL(tcp_parse_options);
4476EXPORT_SYMBOL(tcp_rcv_established); 4514EXPORT_SYMBOL(tcp_rcv_established);
4477EXPORT_SYMBOL(tcp_rcv_state_process); 4515EXPORT_SYMBOL(tcp_rcv_state_process);
4516EXPORT_SYMBOL(tcp_initialize_rcv_mss);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4d5021e1929b..6ea353907af5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -69,6 +69,7 @@
69#include <net/transp_v6.h> 69#include <net/transp_v6.h>
70#include <net/ipv6.h> 70#include <net/ipv6.h>
71#include <net/inet_common.h> 71#include <net/inet_common.h>
72#include <net/timewait_sock.h>
72#include <net/xfrm.h> 73#include <net/xfrm.h>
73 74
74#include <linux/inet.h> 75#include <linux/inet.h>
@@ -86,8 +87,7 @@ int sysctl_tcp_low_latency;
86/* Socket used for sending RSTs */ 87/* Socket used for sending RSTs */
87static struct socket *tcp_socket; 88static struct socket *tcp_socket;
88 89
89void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, 90void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
90 struct sk_buff *skb);
91 91
92struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { 92struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
93 .lhash_lock = RW_LOCK_UNLOCKED, 93 .lhash_lock = RW_LOCK_UNLOCKED,
@@ -97,7 +97,8 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
97 97
98static int tcp_v4_get_port(struct sock *sk, unsigned short snum) 98static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
99{ 99{
100 return inet_csk_get_port(&tcp_hashinfo, sk, snum); 100 return inet_csk_get_port(&tcp_hashinfo, sk, snum,
101 inet_csk_bind_conflict);
101} 102}
102 103
103static void tcp_v4_hash(struct sock *sk) 104static void tcp_v4_hash(struct sock *sk)
@@ -118,202 +119,38 @@ static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
118 skb->h.th->source); 119 skb->h.th->source);
119} 120}
120 121
121/* called with local bh disabled */ 122int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
122static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
123 struct inet_timewait_sock **twp)
124{ 123{
125 struct inet_sock *inet = inet_sk(sk); 124 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
126 u32 daddr = inet->rcv_saddr; 125 struct tcp_sock *tp = tcp_sk(sk);
127 u32 saddr = inet->daddr;
128 int dif = sk->sk_bound_dev_if;
129 INET_ADDR_COOKIE(acookie, saddr, daddr)
130 const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
131 unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
132 struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
133 struct sock *sk2;
134 const struct hlist_node *node;
135 struct inet_timewait_sock *tw;
136
137 prefetch(head->chain.first);
138 write_lock(&head->lock);
139
140 /* Check TIME-WAIT sockets first. */
141 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
142 tw = inet_twsk(sk2);
143
144 if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
145 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
146 struct tcp_sock *tp = tcp_sk(sk);
147
148 /* With PAWS, it is safe from the viewpoint
149 of data integrity. Even without PAWS it
150 is safe provided sequence spaces do not
151 overlap i.e. at data rates <= 80Mbit/sec.
152
153 Actually, the idea is close to VJ's one,
154 only timestamp cache is held not per host,
155 but per port pair and TW bucket is used
156 as state holder.
157 126
158 If TW bucket has been already destroyed we 127 /* With PAWS, it is safe from the viewpoint
159 fall back to VJ's scheme and use initial 128 of data integrity. Even without PAWS it is safe provided sequence
160 timestamp retrieved from peer table. 129 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
161 */
162 if (tcptw->tw_ts_recent_stamp &&
163 (!twp || (sysctl_tcp_tw_reuse &&
164 xtime.tv_sec -
165 tcptw->tw_ts_recent_stamp > 1))) {
166 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
167 if (tp->write_seq == 0)
168 tp->write_seq = 1;
169 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
170 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
171 sock_hold(sk2);
172 goto unique;
173 } else
174 goto not_unique;
175 }
176 }
177 tw = NULL;
178 130
179 /* And established part... */ 131 Actually, the idea is close to VJ's one, only timestamp cache is
180 sk_for_each(sk2, node, &head->chain) { 132 held not per host, but per port pair and TW bucket is used as state
181 if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) 133 holder.
182 goto not_unique;
183 }
184 134
185unique: 135 If TW bucket has been already destroyed we fall back to VJ's scheme
186 /* Must record num and sport now. Otherwise we will see 136 and use initial timestamp retrieved from peer table.
187 * in hash table socket with a funny identity. */ 137 */
188 inet->num = lport; 138 if (tcptw->tw_ts_recent_stamp &&
189 inet->sport = htons(lport); 139 (twp == NULL || (sysctl_tcp_tw_reuse &&
190 sk->sk_hash = hash; 140 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
191 BUG_TRAP(sk_unhashed(sk)); 141 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
192 __sk_add_node(sk, &head->chain); 142 if (tp->write_seq == 0)
193 sock_prot_inc_use(sk->sk_prot); 143 tp->write_seq = 1;
194 write_unlock(&head->lock); 144 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
195 145 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
196 if (twp) { 146 sock_hold(sktw);
197 *twp = tw; 147 return 1;
198 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
199 } else if (tw) {
200 /* Silly. Should hash-dance instead... */
201 inet_twsk_deschedule(tw, &tcp_death_row);
202 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
203
204 inet_twsk_put(tw);
205 } 148 }
206 149
207 return 0; 150 return 0;
208
209not_unique:
210 write_unlock(&head->lock);
211 return -EADDRNOTAVAIL;
212} 151}
213 152
214static inline u32 connect_port_offset(const struct sock *sk) 153EXPORT_SYMBOL_GPL(tcp_twsk_unique);
215{
216 const struct inet_sock *inet = inet_sk(sk);
217
218 return secure_tcp_port_ephemeral(inet->rcv_saddr, inet->daddr,
219 inet->dport);
220}
221
222/*
223 * Bind a port for a connect operation and hash it.
224 */
225static inline int tcp_v4_hash_connect(struct sock *sk)
226{
227 const unsigned short snum = inet_sk(sk)->num;
228 struct inet_bind_hashbucket *head;
229 struct inet_bind_bucket *tb;
230 int ret;
231
232 if (!snum) {
233 int low = sysctl_local_port_range[0];
234 int high = sysctl_local_port_range[1];
235 int range = high - low;
236 int i;
237 int port;
238 static u32 hint;
239 u32 offset = hint + connect_port_offset(sk);
240 struct hlist_node *node;
241 struct inet_timewait_sock *tw = NULL;
242
243 local_bh_disable();
244 for (i = 1; i <= range; i++) {
245 port = low + (i + offset) % range;
246 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
247 spin_lock(&head->lock);
248
249 /* Does not bother with rcv_saddr checks,
250 * because the established check is already
251 * unique enough.
252 */
253 inet_bind_bucket_for_each(tb, node, &head->chain) {
254 if (tb->port == port) {
255 BUG_TRAP(!hlist_empty(&tb->owners));
256 if (tb->fastreuse >= 0)
257 goto next_port;
258 if (!__tcp_v4_check_established(sk,
259 port,
260 &tw))
261 goto ok;
262 goto next_port;
263 }
264 }
265
266 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
267 if (!tb) {
268 spin_unlock(&head->lock);
269 break;
270 }
271 tb->fastreuse = -1;
272 goto ok;
273
274 next_port:
275 spin_unlock(&head->lock);
276 }
277 local_bh_enable();
278
279 return -EADDRNOTAVAIL;
280
281ok:
282 hint += i;
283
284 /* Head lock still held and bh's disabled */
285 inet_bind_hash(sk, tb, port);
286 if (sk_unhashed(sk)) {
287 inet_sk(sk)->sport = htons(port);
288 __inet_hash(&tcp_hashinfo, sk, 0);
289 }
290 spin_unlock(&head->lock);
291
292 if (tw) {
293 inet_twsk_deschedule(tw, &tcp_death_row);;
294 inet_twsk_put(tw);
295 }
296
297 ret = 0;
298 goto out;
299 }
300
301 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
302 tb = inet_csk(sk)->icsk_bind_hash;
303 spin_lock_bh(&head->lock);
304 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
305 __inet_hash(&tcp_hashinfo, sk, 0);
306 spin_unlock_bh(&head->lock);
307 return 0;
308 } else {
309 spin_unlock(&head->lock);
310 /* No definite answer... Walk to established hash table */
311 ret = __tcp_v4_check_established(sk, snum, NULL);
312out:
313 local_bh_enable();
314 return ret;
315 }
316}
317 154
318/* This will initiate an outgoing connection. */ 155/* This will initiate an outgoing connection. */
319int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 156int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
@@ -383,9 +220,9 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
383 inet->dport = usin->sin_port; 220 inet->dport = usin->sin_port;
384 inet->daddr = daddr; 221 inet->daddr = daddr;
385 222
386 tp->ext_header_len = 0; 223 inet_csk(sk)->icsk_ext_hdr_len = 0;
387 if (inet->opt) 224 if (inet->opt)
388 tp->ext_header_len = inet->opt->optlen; 225 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
389 226
390 tp->rx_opt.mss_clamp = 536; 227 tp->rx_opt.mss_clamp = 536;
391 228
@@ -395,7 +232,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
395 * complete initialization after this. 232 * complete initialization after this.
396 */ 233 */
397 tcp_set_state(sk, TCP_SYN_SENT); 234 tcp_set_state(sk, TCP_SYN_SENT);
398 err = tcp_v4_hash_connect(sk); 235 err = inet_hash_connect(&tcp_death_row, sk);
399 if (err) 236 if (err)
400 goto failure; 237 goto failure;
401 238
@@ -433,12 +270,10 @@ failure:
433/* 270/*
434 * This routine does path mtu discovery as defined in RFC1191. 271 * This routine does path mtu discovery as defined in RFC1191.
435 */ 272 */
436static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, 273static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
437 u32 mtu)
438{ 274{
439 struct dst_entry *dst; 275 struct dst_entry *dst;
440 struct inet_sock *inet = inet_sk(sk); 276 struct inet_sock *inet = inet_sk(sk);
441 struct tcp_sock *tp = tcp_sk(sk);
442 277
443 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs 278 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
444 * send out by Linux are always <576bytes so they should go through 279 * send out by Linux are always <576bytes so they should go through
@@ -467,7 +302,7 @@ static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph,
467 mtu = dst_mtu(dst); 302 mtu = dst_mtu(dst);
468 303
469 if (inet->pmtudisc != IP_PMTUDISC_DONT && 304 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
470 tp->pmtu_cookie > mtu) { 305 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
471 tcp_sync_mss(sk, mtu); 306 tcp_sync_mss(sk, mtu);
472 307
473 /* Resend the TCP packet because it's 308 /* Resend the TCP packet because it's
@@ -644,10 +479,10 @@ out:
644} 479}
645 480
646/* This routine computes an IPv4 TCP checksum. */ 481/* This routine computes an IPv4 TCP checksum. */
647void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, 482void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
648 struct sk_buff *skb)
649{ 483{
650 struct inet_sock *inet = inet_sk(sk); 484 struct inet_sock *inet = inet_sk(sk);
485 struct tcphdr *th = skb->h.th;
651 486
652 if (skb->ip_summed == CHECKSUM_HW) { 487 if (skb->ip_summed == CHECKSUM_HW) {
653 th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0); 488 th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
@@ -826,7 +661,8 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
826 kfree(inet_rsk(req)->opt); 661 kfree(inet_rsk(req)->opt);
827} 662}
828 663
829static inline void syn_flood_warning(struct sk_buff *skb) 664#ifdef CONFIG_SYN_COOKIES
665static void syn_flood_warning(struct sk_buff *skb)
830{ 666{
831 static unsigned long warntime; 667 static unsigned long warntime;
832 668
@@ -837,12 +673,13 @@ static inline void syn_flood_warning(struct sk_buff *skb)
837 ntohs(skb->h.th->dest)); 673 ntohs(skb->h.th->dest));
838 } 674 }
839} 675}
676#endif
840 677
841/* 678/*
842 * Save and compile IPv4 options into the request_sock if needed. 679 * Save and compile IPv4 options into the request_sock if needed.
843 */ 680 */
844static inline struct ip_options *tcp_v4_save_options(struct sock *sk, 681static struct ip_options *tcp_v4_save_options(struct sock *sk,
845 struct sk_buff *skb) 682 struct sk_buff *skb)
846{ 683{
847 struct ip_options *opt = &(IPCB(skb)->opt); 684 struct ip_options *opt = &(IPCB(skb)->opt);
848 struct ip_options *dopt = NULL; 685 struct ip_options *dopt = NULL;
@@ -869,6 +706,11 @@ struct request_sock_ops tcp_request_sock_ops = {
869 .send_reset = tcp_v4_send_reset, 706 .send_reset = tcp_v4_send_reset,
870}; 707};
871 708
709static struct timewait_sock_ops tcp_timewait_sock_ops = {
710 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
711 .twsk_unique = tcp_twsk_unique,
712};
713
872int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 714int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
873{ 715{
874 struct inet_request_sock *ireq; 716 struct inet_request_sock *ireq;
@@ -1053,9 +895,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1053 ireq->opt = NULL; 895 ireq->opt = NULL;
1054 newinet->mc_index = inet_iif(skb); 896 newinet->mc_index = inet_iif(skb);
1055 newinet->mc_ttl = skb->nh.iph->ttl; 897 newinet->mc_ttl = skb->nh.iph->ttl;
1056 newtp->ext_header_len = 0; 898 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1057 if (newinet->opt) 899 if (newinet->opt)
1058 newtp->ext_header_len = newinet->opt->optlen; 900 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1059 newinet->id = newtp->write_seq ^ jiffies; 901 newinet->id = newtp->write_seq ^ jiffies;
1060 902
1061 tcp_sync_mss(newsk, dst_mtu(dst)); 903 tcp_sync_mss(newsk, dst_mtu(dst));
@@ -1238,6 +1080,7 @@ process:
1238 1080
1239 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 1081 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1240 goto discard_and_relse; 1082 goto discard_and_relse;
1083 nf_reset(skb);
1241 1084
1242 if (sk_filter(sk, skb, 0)) 1085 if (sk_filter(sk, skb, 0))
1243 goto discard_and_relse; 1086 goto discard_and_relse;
@@ -1314,16 +1157,6 @@ do_time_wait:
1314 goto discard_it; 1157 goto discard_it;
1315} 1158}
1316 1159
1317static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1318{
1319 struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
1320 struct inet_sock *inet = inet_sk(sk);
1321
1322 sin->sin_family = AF_INET;
1323 sin->sin_addr.s_addr = inet->daddr;
1324 sin->sin_port = inet->dport;
1325}
1326
1327/* VJ's idea. Save last timestamp seen from this destination 1160/* VJ's idea. Save last timestamp seen from this destination
1328 * and hold it at least for normal timewait interval to use for duplicate 1161 * and hold it at least for normal timewait interval to use for duplicate
1329 * segment detection in subsequent connections, before they enter synchronized 1162 * segment detection in subsequent connections, before they enter synchronized
@@ -1382,7 +1215,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1382 return 0; 1215 return 0;
1383} 1216}
1384 1217
1385struct tcp_func ipv4_specific = { 1218struct inet_connection_sock_af_ops ipv4_specific = {
1386 .queue_xmit = ip_queue_xmit, 1219 .queue_xmit = ip_queue_xmit,
1387 .send_check = tcp_v4_send_check, 1220 .send_check = tcp_v4_send_check,
1388 .rebuild_header = inet_sk_rebuild_header, 1221 .rebuild_header = inet_sk_rebuild_header,
@@ -1392,7 +1225,7 @@ struct tcp_func ipv4_specific = {
1392 .net_header_len = sizeof(struct iphdr), 1225 .net_header_len = sizeof(struct iphdr),
1393 .setsockopt = ip_setsockopt, 1226 .setsockopt = ip_setsockopt,
1394 .getsockopt = ip_getsockopt, 1227 .getsockopt = ip_getsockopt,
1395 .addr2sockaddr = v4_addr2sockaddr, 1228 .addr2sockaddr = inet_csk_addr2sockaddr,
1396 .sockaddr_len = sizeof(struct sockaddr_in), 1229 .sockaddr_len = sizeof(struct sockaddr_in),
1397}; 1230};
1398 1231
@@ -1433,7 +1266,8 @@ static int tcp_v4_init_sock(struct sock *sk)
1433 sk->sk_write_space = sk_stream_write_space; 1266 sk->sk_write_space = sk_stream_write_space;
1434 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); 1267 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1435 1268
1436 tp->af_specific = &ipv4_specific; 1269 icsk->icsk_af_ops = &ipv4_specific;
1270 icsk->icsk_sync_mss = tcp_sync_mss;
1437 1271
1438 sk->sk_sndbuf = sysctl_tcp_wmem[1]; 1272 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1439 sk->sk_rcvbuf = sysctl_tcp_rmem[1]; 1273 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
@@ -1989,7 +1823,7 @@ struct proto tcp_prot = {
1989 .sysctl_rmem = sysctl_tcp_rmem, 1823 .sysctl_rmem = sysctl_tcp_rmem,
1990 .max_header = MAX_TCP_HEADER, 1824 .max_header = MAX_TCP_HEADER,
1991 .obj_size = sizeof(struct tcp_sock), 1825 .obj_size = sizeof(struct tcp_sock),
1992 .twsk_obj_size = sizeof(struct tcp_timewait_sock), 1826 .twsk_prot = &tcp_timewait_sock_ops,
1993 .rsk_prot = &tcp_request_sock_ops, 1827 .rsk_prot = &tcp_request_sock_ops,
1994}; 1828};
1995 1829
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 1b66a2ac4321..2b9b7f6c7f7c 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -274,18 +274,18 @@ kill:
274void tcp_time_wait(struct sock *sk, int state, int timeo) 274void tcp_time_wait(struct sock *sk, int state, int timeo)
275{ 275{
276 struct inet_timewait_sock *tw = NULL; 276 struct inet_timewait_sock *tw = NULL;
277 const struct inet_connection_sock *icsk = inet_csk(sk);
277 const struct tcp_sock *tp = tcp_sk(sk); 278 const struct tcp_sock *tp = tcp_sk(sk);
278 int recycle_ok = 0; 279 int recycle_ok = 0;
279 280
280 if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) 281 if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
281 recycle_ok = tp->af_specific->remember_stamp(sk); 282 recycle_ok = icsk->icsk_af_ops->remember_stamp(sk);
282 283
283 if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) 284 if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets)
284 tw = inet_twsk_alloc(sk, state); 285 tw = inet_twsk_alloc(sk, state);
285 286
286 if (tw != NULL) { 287 if (tw != NULL) {
287 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); 288 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
288 const struct inet_connection_sock *icsk = inet_csk(sk);
289 const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); 289 const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
290 290
291 tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; 291 tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale;
@@ -298,10 +298,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
298#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 298#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
299 if (tw->tw_family == PF_INET6) { 299 if (tw->tw_family == PF_INET6) {
300 struct ipv6_pinfo *np = inet6_sk(sk); 300 struct ipv6_pinfo *np = inet6_sk(sk);
301 struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw); 301 struct inet6_timewait_sock *tw6;
302 302
303 ipv6_addr_copy(&tcp6tw->tw_v6_daddr, &np->daddr); 303 tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
304 ipv6_addr_copy(&tcp6tw->tw_v6_rcv_saddr, &np->rcv_saddr); 304 tw6 = inet6_twsk((struct sock *)tw);
305 ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr);
306 ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr);
305 tw->tw_ipv6only = np->ipv6only; 307 tw->tw_ipv6only = np->ipv6only;
306 } 308 }
307#endif 309#endif
@@ -456,7 +458,6 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
456 struct request_sock **prev) 458 struct request_sock **prev)
457{ 459{
458 struct tcphdr *th = skb->h.th; 460 struct tcphdr *th = skb->h.th;
459 struct tcp_sock *tp = tcp_sk(sk);
460 u32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); 461 u32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
461 int paws_reject = 0; 462 int paws_reject = 0;
462 struct tcp_options_received tmp_opt; 463 struct tcp_options_received tmp_opt;
@@ -613,7 +614,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
613 * ESTABLISHED STATE. If it will be dropped after 614 * ESTABLISHED STATE. If it will be dropped after
614 * socket is created, wait for troubles. 615 * socket is created, wait for troubles.
615 */ 616 */
616 child = tp->af_specific->syn_recv_sock(sk, skb, req, NULL); 617 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb,
618 req, NULL);
617 if (child == NULL) 619 if (child == NULL)
618 goto listen_overflow; 620 goto listen_overflow;
619 621
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b7325e0b406a..a7623ead39a8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -51,8 +51,8 @@ int sysctl_tcp_retrans_collapse = 1;
51 */ 51 */
52int sysctl_tcp_tso_win_divisor = 3; 52int sysctl_tcp_tso_win_divisor = 3;
53 53
54static inline void update_send_head(struct sock *sk, struct tcp_sock *tp, 54static void update_send_head(struct sock *sk, struct tcp_sock *tp,
55 struct sk_buff *skb) 55 struct sk_buff *skb)
56{ 56{
57 sk->sk_send_head = skb->next; 57 sk->sk_send_head = skb->next;
58 if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) 58 if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue)
@@ -124,8 +124,8 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
124 tp->snd_cwnd_used = 0; 124 tp->snd_cwnd_used = 0;
125} 125}
126 126
127static inline void tcp_event_data_sent(struct tcp_sock *tp, 127static void tcp_event_data_sent(struct tcp_sock *tp,
128 struct sk_buff *skb, struct sock *sk) 128 struct sk_buff *skb, struct sock *sk)
129{ 129{
130 struct inet_connection_sock *icsk = inet_csk(sk); 130 struct inet_connection_sock *icsk = inet_csk(sk);
131 const u32 now = tcp_time_stamp; 131 const u32 now = tcp_time_stamp;
@@ -142,7 +142,7 @@ static inline void tcp_event_data_sent(struct tcp_sock *tp,
142 icsk->icsk_ack.pingpong = 1; 142 icsk->icsk_ack.pingpong = 1;
143} 143}
144 144
145static __inline__ void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) 145static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
146{ 146{
147 tcp_dec_quickack_mode(sk, pkts); 147 tcp_dec_quickack_mode(sk, pkts);
148 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); 148 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
@@ -212,7 +212,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
212 * value can be stuffed directly into th->window for an outgoing 212 * value can be stuffed directly into th->window for an outgoing
213 * frame. 213 * frame.
214 */ 214 */
215static __inline__ u16 tcp_select_window(struct sock *sk) 215static u16 tcp_select_window(struct sock *sk)
216{ 216{
217 struct tcp_sock *tp = tcp_sk(sk); 217 struct tcp_sock *tp = tcp_sk(sk);
218 u32 cur_win = tcp_receive_window(tp); 218 u32 cur_win = tcp_receive_window(tp);
@@ -250,6 +250,75 @@ static __inline__ u16 tcp_select_window(struct sock *sk)
250 return new_win; 250 return new_win;
251} 251}
252 252
253static void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp,
254 __u32 tstamp)
255{
256 if (tp->rx_opt.tstamp_ok) {
257 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
258 (TCPOPT_NOP << 16) |
259 (TCPOPT_TIMESTAMP << 8) |
260 TCPOLEN_TIMESTAMP);
261 *ptr++ = htonl(tstamp);
262 *ptr++ = htonl(tp->rx_opt.ts_recent);
263 }
264 if (tp->rx_opt.eff_sacks) {
265 struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks;
266 int this_sack;
267
268 *ptr++ = htonl((TCPOPT_NOP << 24) |
269 (TCPOPT_NOP << 16) |
270 (TCPOPT_SACK << 8) |
271 (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks *
272 TCPOLEN_SACK_PERBLOCK)));
273 for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
274 *ptr++ = htonl(sp[this_sack].start_seq);
275 *ptr++ = htonl(sp[this_sack].end_seq);
276 }
277 if (tp->rx_opt.dsack) {
278 tp->rx_opt.dsack = 0;
279 tp->rx_opt.eff_sacks--;
280 }
281 }
282}
283
284/* Construct a tcp options header for a SYN or SYN_ACK packet.
285 * If this is every changed make sure to change the definition of
286 * MAX_SYN_SIZE to match the new maximum number of options that you
287 * can generate.
288 */
289static void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
290 int offer_wscale, int wscale, __u32 tstamp,
291 __u32 ts_recent)
292{
293 /* We always get an MSS option.
294 * The option bytes which will be seen in normal data
295 * packets should timestamps be used, must be in the MSS
296 * advertised. But we subtract them from tp->mss_cache so
297 * that calculations in tcp_sendmsg are simpler etc.
298 * So account for this fact here if necessary. If we
299 * don't do this correctly, as a receiver we won't
300 * recognize data packets as being full sized when we
301 * should, and thus we won't abide by the delayed ACK
302 * rules correctly.
303 * SACKs don't matter, we never delay an ACK when we
304 * have any of those going out.
305 */
306 *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
307 if (ts) {
308 if(sack)
309 *ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) |
310 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
311 else
312 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
313 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
314 *ptr++ = htonl(tstamp); /* TSVAL */
315 *ptr++ = htonl(ts_recent); /* TSECR */
316 } else if(sack)
317 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
318 (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM);
319 if (offer_wscale)
320 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));
321}
253 322
254/* This routine actually transmits TCP packets queued in by 323/* This routine actually transmits TCP packets queued in by
255 * tcp_do_sendmsg(). This is used by both the initial 324 * tcp_do_sendmsg(). This is used by both the initial
@@ -371,7 +440,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
371 TCP_ECN_send(sk, tp, skb, tcp_header_size); 440 TCP_ECN_send(sk, tp, skb, tcp_header_size);
372 } 441 }
373 442
374 tp->af_specific->send_check(sk, th, skb->len, skb); 443 icsk->icsk_af_ops->send_check(sk, skb->len, skb);
375 444
376 if (likely(tcb->flags & TCPCB_FLAG_ACK)) 445 if (likely(tcb->flags & TCPCB_FLAG_ACK))
377 tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); 446 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
@@ -381,7 +450,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
381 450
382 TCP_INC_STATS(TCP_MIB_OUTSEGS); 451 TCP_INC_STATS(TCP_MIB_OUTSEGS);
383 452
384 err = tp->af_specific->queue_xmit(skb, 0); 453 err = icsk->icsk_af_ops->queue_xmit(skb, 0);
385 if (unlikely(err <= 0)) 454 if (unlikely(err <= 0))
386 return err; 455 return err;
387 456
@@ -621,7 +690,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
621 It is minimum of user_mss and mss received with SYN. 690 It is minimum of user_mss and mss received with SYN.
622 It also does not include TCP options. 691 It also does not include TCP options.
623 692
624 tp->pmtu_cookie is last pmtu, seen by this function. 693 inet_csk(sk)->icsk_pmtu_cookie is last pmtu, seen by this function.
625 694
626 tp->mss_cache is current effective sending mss, including 695 tp->mss_cache is current effective sending mss, including
627 all tcp options except for SACKs. It is evaluated, 696 all tcp options except for SACKs. It is evaluated,
@@ -631,26 +700,26 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
631 NOTE1. rfc1122 clearly states that advertised MSS 700 NOTE1. rfc1122 clearly states that advertised MSS
632 DOES NOT include either tcp or ip options. 701 DOES NOT include either tcp or ip options.
633 702
634 NOTE2. tp->pmtu_cookie and tp->mss_cache are READ ONLY outside 703 NOTE2. inet_csk(sk)->icsk_pmtu_cookie and tp->mss_cache
635 this function. --ANK (980731) 704 are READ ONLY outside this function. --ANK (980731)
636 */ 705 */
637 706
638unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) 707unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
639{ 708{
640 struct tcp_sock *tp = tcp_sk(sk); 709 struct tcp_sock *tp = tcp_sk(sk);
641 int mss_now; 710 struct inet_connection_sock *icsk = inet_csk(sk);
642
643 /* Calculate base mss without TCP options: 711 /* Calculate base mss without TCP options:
644 It is MMS_S - sizeof(tcphdr) of rfc1122 712 It is MMS_S - sizeof(tcphdr) of rfc1122
645 */ 713 */
646 mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr); 714 int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len -
715 sizeof(struct tcphdr));
647 716
648 /* Clamp it (mss_clamp does not include tcp options) */ 717 /* Clamp it (mss_clamp does not include tcp options) */
649 if (mss_now > tp->rx_opt.mss_clamp) 718 if (mss_now > tp->rx_opt.mss_clamp)
650 mss_now = tp->rx_opt.mss_clamp; 719 mss_now = tp->rx_opt.mss_clamp;
651 720
652 /* Now subtract optional transport overhead */ 721 /* Now subtract optional transport overhead */
653 mss_now -= tp->ext_header_len; 722 mss_now -= icsk->icsk_ext_hdr_len;
654 723
655 /* Then reserve room for full set of TCP options and 8 bytes of data */ 724 /* Then reserve room for full set of TCP options and 8 bytes of data */
656 if (mss_now < 48) 725 if (mss_now < 48)
@@ -664,7 +733,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
664 mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len); 733 mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len);
665 734
666 /* And store cached results */ 735 /* And store cached results */
667 tp->pmtu_cookie = pmtu; 736 icsk->icsk_pmtu_cookie = pmtu;
668 tp->mss_cache = mss_now; 737 tp->mss_cache = mss_now;
669 738
670 return mss_now; 739 return mss_now;
@@ -694,7 +763,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
694 763
695 if (dst) { 764 if (dst) {
696 u32 mtu = dst_mtu(dst); 765 u32 mtu = dst_mtu(dst);
697 if (mtu != tp->pmtu_cookie) 766 if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
698 mss_now = tcp_sync_mss(sk, mtu); 767 mss_now = tcp_sync_mss(sk, mtu);
699 } 768 }
700 769
@@ -705,9 +774,10 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
705 xmit_size_goal = mss_now; 774 xmit_size_goal = mss_now;
706 775
707 if (doing_tso) { 776 if (doing_tso) {
708 xmit_size_goal = 65535 - 777 xmit_size_goal = (65535 -
709 tp->af_specific->net_header_len - 778 inet_csk(sk)->icsk_af_ops->net_header_len -
710 tp->ext_header_len - tp->tcp_header_len; 779 inet_csk(sk)->icsk_ext_hdr_len -
780 tp->tcp_header_len);
711 781
712 if (tp->max_window && 782 if (tp->max_window &&
713 (xmit_size_goal > (tp->max_window >> 1))) 783 (xmit_size_goal > (tp->max_window >> 1)))
@@ -723,7 +793,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
723 793
724/* Congestion window validation. (RFC2861) */ 794/* Congestion window validation. (RFC2861) */
725 795
726static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) 796static void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
727{ 797{
728 __u32 packets_out = tp->packets_out; 798 __u32 packets_out = tp->packets_out;
729 799
@@ -772,7 +842,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk
772/* This must be invoked the first time we consider transmitting 842/* This must be invoked the first time we consider transmitting
773 * SKB onto the wire. 843 * SKB onto the wire.
774 */ 844 */
775static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) 845static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
776{ 846{
777 int tso_segs = tcp_skb_pcount(skb); 847 int tso_segs = tcp_skb_pcount(skb);
778 848
@@ -1422,7 +1492,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1422 (sysctl_tcp_retrans_collapse != 0)) 1492 (sysctl_tcp_retrans_collapse != 0))
1423 tcp_retrans_try_collapse(sk, skb, cur_mss); 1493 tcp_retrans_try_collapse(sk, skb, cur_mss);
1424 1494
1425 if(tp->af_specific->rebuild_header(sk)) 1495 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
1426 return -EHOSTUNREACH; /* Routing failure or similar. */ 1496 return -EHOSTUNREACH; /* Routing failure or similar. */
1427 1497
1428 /* Some Solaris stacks overoptimize and ignore the FIN on a 1498 /* Some Solaris stacks overoptimize and ignore the FIN on a
@@ -1793,7 +1863,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
1793/* 1863/*
1794 * Do all connect socket setups that can be done AF independent. 1864 * Do all connect socket setups that can be done AF independent.
1795 */ 1865 */
1796static inline void tcp_connect_init(struct sock *sk) 1866static void tcp_connect_init(struct sock *sk)
1797{ 1867{
1798 struct dst_entry *dst = __sk_dst_get(sk); 1868 struct dst_entry *dst = __sk_dst_get(sk);
1799 struct tcp_sock *tp = tcp_sk(sk); 1869 struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 13e7e6e8df16..3b7403495052 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -330,6 +330,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
330 vegas->cntRTT = 0; 330 vegas->cntRTT = 0;
331 vegas->minRTT = 0x7fffffff; 331 vegas->minRTT = 0x7fffffff;
332 } 332 }
333 /* Use normal slow start */
334 else if (tp->snd_cwnd <= tp->snd_ssthresh)
335 tcp_slow_start(tp);
336
333} 337}
334 338
335/* Extract info for Tcp socket info provided via netlink. */ 339/* Extract info for Tcp socket info provided via netlink. */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2422a5f7195d..00840474a449 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -86,6 +86,7 @@
86#include <linux/module.h> 86#include <linux/module.h>
87#include <linux/socket.h> 87#include <linux/socket.h>
88#include <linux/sockios.h> 88#include <linux/sockios.h>
89#include <linux/igmp.h>
89#include <linux/in.h> 90#include <linux/in.h>
90#include <linux/errno.h> 91#include <linux/errno.h>
91#include <linux/timer.h> 92#include <linux/timer.h>
@@ -846,20 +847,7 @@ out:
846csum_copy_err: 847csum_copy_err:
847 UDP_INC_STATS_BH(UDP_MIB_INERRORS); 848 UDP_INC_STATS_BH(UDP_MIB_INERRORS);
848 849
849 /* Clear queue. */ 850 skb_kill_datagram(sk, skb, flags);
850 if (flags&MSG_PEEK) {
851 int clear = 0;
852 spin_lock_bh(&sk->sk_receive_queue.lock);
853 if (skb == skb_peek(&sk->sk_receive_queue)) {
854 __skb_unlink(skb, &sk->sk_receive_queue);
855 clear = 1;
856 }
857 spin_unlock_bh(&sk->sk_receive_queue.lock);
858 if (clear)
859 kfree_skb(skb);
860 }
861
862 skb_free_datagram(sk, skb);
863 851
864 if (noblock) 852 if (noblock)
865 return -EAGAIN; 853 return -EAGAIN;
@@ -1001,6 +989,7 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
1001 kfree_skb(skb); 989 kfree_skb(skb);
1002 return -1; 990 return -1;
1003 } 991 }
992 nf_reset(skb);
1004 993
1005 if (up->encap_type) { 994 if (up->encap_type) {
1006 /* 995 /*
@@ -1094,7 +1083,7 @@ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
1094 * Otherwise, csum completion requires chacksumming packet body, 1083 * Otherwise, csum completion requires chacksumming packet body,
1095 * including udp header and folding it to skb->csum. 1084 * including udp header and folding it to skb->csum.
1096 */ 1085 */
1097static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, 1086static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
1098 unsigned short ulen, u32 saddr, u32 daddr) 1087 unsigned short ulen, u32 saddr, u32 daddr)
1099{ 1088{
1100 if (uh->check == 0) { 1089 if (uh->check == 0) {
@@ -1108,7 +1097,6 @@ static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
1108 /* Probably, we should checksum udp header (it should be in cache 1097 /* Probably, we should checksum udp header (it should be in cache
1109 * in any case) and data in tiny packets (< rx copybreak). 1098 * in any case) and data in tiny packets (< rx copybreak).
1110 */ 1099 */
1111 return 0;
1112} 1100}
1113 1101
1114/* 1102/*
@@ -1141,8 +1129,7 @@ int udp_rcv(struct sk_buff *skb)
1141 if (pskb_trim_rcsum(skb, ulen)) 1129 if (pskb_trim_rcsum(skb, ulen))
1142 goto short_packet; 1130 goto short_packet;
1143 1131
1144 if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0) 1132 udp_checksum_init(skb, uh, ulen, saddr, daddr);
1145 goto csum_error;
1146 1133
1147 if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) 1134 if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
1148 return udp_v4_mcast_deliver(skb, uh, saddr, daddr); 1135 return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
@@ -1163,6 +1150,7 @@ int udp_rcv(struct sk_buff *skb)
1163 1150
1164 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 1151 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1165 goto drop; 1152 goto drop;
1153 nf_reset(skb);
1166 1154
1167 /* No socket. Drop packet silently, if checksum is wrong */ 1155 /* No socket. Drop packet silently, if checksum is wrong */
1168 if (udp_checksum_complete(skb)) 1156 if (udp_checksum_complete(skb))
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 2d3849c38a0f..850d919591d1 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -11,6 +11,8 @@
11 11
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/string.h> 13#include <linux/string.h>
14#include <linux/netfilter.h>
15#include <linux/netfilter_ipv4.h>
14#include <net/inet_ecn.h> 16#include <net/inet_ecn.h>
15#include <net/ip.h> 17#include <net/ip.h>
16#include <net/xfrm.h> 18#include <net/xfrm.h>
@@ -45,6 +47,23 @@ static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq)
45 return xfrm_parse_spi(skb, nexthdr, spi, seq); 47 return xfrm_parse_spi(skb, nexthdr, spi, seq);
46} 48}
47 49
50#ifdef CONFIG_NETFILTER
51static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
52{
53 struct iphdr *iph = skb->nh.iph;
54
55 if (skb->dst == NULL) {
56 if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
57 skb->dev))
58 goto drop;
59 }
60 return dst_input(skb);
61drop:
62 kfree_skb(skb);
63 return NET_RX_DROP;
64}
65#endif
66
48int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) 67int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
49{ 68{
50 int err; 69 int err;
@@ -137,6 +156,8 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
137 memcpy(skb->sp->x+skb->sp->len, xfrm_vec, xfrm_nr*sizeof(struct sec_decap_state)); 156 memcpy(skb->sp->x+skb->sp->len, xfrm_vec, xfrm_nr*sizeof(struct sec_decap_state));
138 skb->sp->len += xfrm_nr; 157 skb->sp->len += xfrm_nr;
139 158
159 nf_reset(skb);
160
140 if (decaps) { 161 if (decaps) {
141 if (!(skb->dev->flags&IFF_LOOPBACK)) { 162 if (!(skb->dev->flags&IFF_LOOPBACK)) {
142 dst_release(skb->dst); 163 dst_release(skb->dst);
@@ -145,7 +166,17 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
145 netif_rx(skb); 166 netif_rx(skb);
146 return 0; 167 return 0;
147 } else { 168 } else {
169#ifdef CONFIG_NETFILTER
170 __skb_push(skb, skb->data - skb->nh.raw);
171 skb->nh.iph->tot_len = htons(skb->len);
172 ip_send_check(skb->nh.iph);
173
174 NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
175 xfrm4_rcv_encap_finish);
176 return 0;
177#else
148 return -skb->nh.iph->protocol; 178 return -skb->nh.iph->protocol;
179#endif
149 } 180 }
150 181
151drop_unlock: 182drop_unlock:
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 66620a95942a..d4df0ddd424b 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -8,8 +8,10 @@
8 * 2 of the License, or (at your option) any later version. 8 * 2 of the License, or (at your option) any later version.
9 */ 9 */
10 10
11#include <linux/compiler.h>
11#include <linux/skbuff.h> 12#include <linux/skbuff.h>
12#include <linux/spinlock.h> 13#include <linux/spinlock.h>
14#include <linux/netfilter_ipv4.h>
13#include <net/inet_ecn.h> 15#include <net/inet_ecn.h>
14#include <net/ip.h> 16#include <net/ip.h>
15#include <net/xfrm.h> 17#include <net/xfrm.h>
@@ -95,7 +97,7 @@ out:
95 return ret; 97 return ret;
96} 98}
97 99
98int xfrm4_output(struct sk_buff *skb) 100static int xfrm4_output_one(struct sk_buff *skb)
99{ 101{
100 struct dst_entry *dst = skb->dst; 102 struct dst_entry *dst = skb->dst;
101 struct xfrm_state *x = dst->xfrm; 103 struct xfrm_state *x = dst->xfrm;
@@ -113,27 +115,33 @@ int xfrm4_output(struct sk_buff *skb)
113 goto error_nolock; 115 goto error_nolock;
114 } 116 }
115 117
116 spin_lock_bh(&x->lock); 118 do {
117 err = xfrm_state_check(x, skb); 119 spin_lock_bh(&x->lock);
118 if (err) 120 err = xfrm_state_check(x, skb);
119 goto error; 121 if (err)
122 goto error;
120 123
121 xfrm4_encap(skb); 124 xfrm4_encap(skb);
122 125
123 err = x->type->output(x, skb); 126 err = x->type->output(x, skb);
124 if (err) 127 if (err)
125 goto error; 128 goto error;
126 129
127 x->curlft.bytes += skb->len; 130 x->curlft.bytes += skb->len;
128 x->curlft.packets++; 131 x->curlft.packets++;
129 132
130 spin_unlock_bh(&x->lock); 133 spin_unlock_bh(&x->lock);
131 134
132 if (!(skb->dst = dst_pop(dst))) { 135 if (!(skb->dst = dst_pop(dst))) {
133 err = -EHOSTUNREACH; 136 err = -EHOSTUNREACH;
134 goto error_nolock; 137 goto error_nolock;
135 } 138 }
136 err = NET_XMIT_BYPASS; 139 dst = skb->dst;
140 x = dst->xfrm;
141 } while (x && !x->props.mode);
142
143 IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
144 err = 0;
137 145
138out_exit: 146out_exit:
139 return err; 147 return err;
@@ -143,3 +151,33 @@ error_nolock:
143 kfree_skb(skb); 151 kfree_skb(skb);
144 goto out_exit; 152 goto out_exit;
145} 153}
154
155int xfrm4_output_finish(struct sk_buff *skb)
156{
157 int err;
158
159 while (likely((err = xfrm4_output_one(skb)) == 0)) {
160 nf_reset(skb);
161
162 err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, &skb, NULL,
163 skb->dst->dev, dst_output);
164 if (unlikely(err != 1))
165 break;
166
167 if (!skb->dst->xfrm)
168 return dst_output(skb);
169
170 err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL,
171 skb->dst->dev, xfrm4_output_finish);
172 if (unlikely(err != 1))
173 break;
174 }
175
176 return err;
177}
178
179int xfrm4_output(struct sk_buff *skb)
180{
181 return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev,
182 xfrm4_output_finish);
183}
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 6460eec834b7..41877abd22e6 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -8,10 +8,11 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \
8 route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \ 8 route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \
9 protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ 9 protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
10 exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \ 10 exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \
11 ip6_flowlabel.o ipv6_syms.o netfilter.o 11 ip6_flowlabel.o ipv6_syms.o inet6_connection_sock.o
12 12
13ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ 13ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
14 xfrm6_output.o 14 xfrm6_output.o
15ipv6-$(CONFIG_NETFILTER) += netfilter.o
15ipv6-objs += $(ipv6-y) 16ipv6-objs += $(ipv6-y)
16 17
17obj-$(CONFIG_INET6_AH) += ah6.o 18obj-$(CONFIG_INET6_AH) += ah6.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a60585fd85ad..7129d4239755 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -58,6 +58,7 @@
58#ifdef CONFIG_SYSCTL 58#ifdef CONFIG_SYSCTL
59#include <linux/sysctl.h> 59#include <linux/sysctl.h>
60#endif 60#endif
61#include <linux/capability.h>
61#include <linux/delay.h> 62#include <linux/delay.h>
62#include <linux/notifier.h> 63#include <linux/notifier.h>
63#include <linux/string.h> 64#include <linux/string.h>
@@ -1195,7 +1196,7 @@ struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *
1195int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) 1196int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
1196{ 1197{
1197 const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; 1198 const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
1198 const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2); 1199 const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
1199 u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; 1200 u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr;
1200 u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); 1201 u32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
1201 int sk_ipv6only = ipv6_only_sock(sk); 1202 int sk_ipv6only = ipv6_only_sock(sk);
@@ -1228,7 +1229,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
1228 1229
1229/* Gets referenced address, destroys ifaddr */ 1230/* Gets referenced address, destroys ifaddr */
1230 1231
1231void addrconf_dad_stop(struct inet6_ifaddr *ifp) 1232static void addrconf_dad_stop(struct inet6_ifaddr *ifp)
1232{ 1233{
1233 if (ifp->flags&IFA_F_PERMANENT) { 1234 if (ifp->flags&IFA_F_PERMANENT) {
1234 spin_lock_bh(&ifp->lock); 1235 spin_lock_bh(&ifp->lock);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index d9546380fa04..064ffab82a9f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -22,6 +22,7 @@
22 22
23 23
24#include <linux/module.h> 24#include <linux/module.h>
25#include <linux/capability.h>
25#include <linux/config.h> 26#include <linux/config.h>
26#include <linux/errno.h> 27#include <linux/errno.h>
27#include <linux/types.h> 28#include <linux/types.h>
@@ -167,6 +168,7 @@ lookup_protocol:
167 sk->sk_reuse = 1; 168 sk->sk_reuse = 1;
168 169
169 inet = inet_sk(sk); 170 inet = inet_sk(sk);
171 inet->is_icsk = INET_PROTOSW_ICSK & answer_flags;
170 172
171 if (SOCK_RAW == sock->type) { 173 if (SOCK_RAW == sock->type) {
172 inet->num = protocol; 174 inet->num = protocol;
@@ -389,6 +391,8 @@ int inet6_destroy_sock(struct sock *sk)
389 return 0; 391 return 0;
390} 392}
391 393
394EXPORT_SYMBOL_GPL(inet6_destroy_sock);
395
392/* 396/*
393 * This does both peername and sockname. 397 * This does both peername and sockname.
394 */ 398 */
@@ -431,7 +435,6 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
431int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 435int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
432{ 436{
433 struct sock *sk = sock->sk; 437 struct sock *sk = sock->sk;
434 int err = -EINVAL;
435 438
436 switch(cmd) 439 switch(cmd)
437 { 440 {
@@ -450,16 +453,15 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
450 case SIOCSIFDSTADDR: 453 case SIOCSIFDSTADDR:
451 return addrconf_set_dstaddr((void __user *) arg); 454 return addrconf_set_dstaddr((void __user *) arg);
452 default: 455 default:
453 if (!sk->sk_prot->ioctl || 456 if (!sk->sk_prot->ioctl)
454 (err = sk->sk_prot->ioctl(sk, cmd, arg)) == -ENOIOCTLCMD) 457 return -ENOIOCTLCMD;
455 return(dev_ioctl(cmd,(void __user *) arg)); 458 return sk->sk_prot->ioctl(sk, cmd, arg);
456 return err;
457 } 459 }
458 /*NOTREACHED*/ 460 /*NOTREACHED*/
459 return(0); 461 return(0);
460} 462}
461 463
462struct proto_ops inet6_stream_ops = { 464const struct proto_ops inet6_stream_ops = {
463 .family = PF_INET6, 465 .family = PF_INET6,
464 .owner = THIS_MODULE, 466 .owner = THIS_MODULE,
465 .release = inet6_release, 467 .release = inet6_release,
@@ -480,7 +482,7 @@ struct proto_ops inet6_stream_ops = {
480 .sendpage = tcp_sendpage 482 .sendpage = tcp_sendpage
481}; 483};
482 484
483struct proto_ops inet6_dgram_ops = { 485const struct proto_ops inet6_dgram_ops = {
484 .family = PF_INET6, 486 .family = PF_INET6,
485 .owner = THIS_MODULE, 487 .owner = THIS_MODULE,
486 .release = inet6_release, 488 .release = inet6_release,
@@ -508,7 +510,7 @@ static struct net_proto_family inet6_family_ops = {
508}; 510};
509 511
510/* Same as inet6_dgram_ops, sans udp_poll. */ 512/* Same as inet6_dgram_ops, sans udp_poll. */
511static struct proto_ops inet6_sockraw_ops = { 513static const struct proto_ops inet6_sockraw_ops = {
512 .family = PF_INET6, 514 .family = PF_INET6,
513 .owner = THIS_MODULE, 515 .owner = THIS_MODULE,
514 .release = inet6_release, 516 .release = inet6_release,
@@ -609,17 +611,90 @@ inet6_unregister_protosw(struct inet_protosw *p)
609 } 611 }
610} 612}
611 613
614int inet6_sk_rebuild_header(struct sock *sk)
615{
616 int err;
617 struct dst_entry *dst;
618 struct ipv6_pinfo *np = inet6_sk(sk);
619
620 dst = __sk_dst_check(sk, np->dst_cookie);
621
622 if (dst == NULL) {
623 struct inet_sock *inet = inet_sk(sk);
624 struct in6_addr *final_p = NULL, final;
625 struct flowi fl;
626
627 memset(&fl, 0, sizeof(fl));
628 fl.proto = sk->sk_protocol;
629 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
630 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
631 fl.fl6_flowlabel = np->flow_label;
632 fl.oif = sk->sk_bound_dev_if;
633 fl.fl_ip_dport = inet->dport;
634 fl.fl_ip_sport = inet->sport;
635
636 if (np->opt && np->opt->srcrt) {
637 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
638 ipv6_addr_copy(&final, &fl.fl6_dst);
639 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
640 final_p = &final;
641 }
642
643 err = ip6_dst_lookup(sk, &dst, &fl);
644 if (err) {
645 sk->sk_route_caps = 0;
646 return err;
647 }
648 if (final_p)
649 ipv6_addr_copy(&fl.fl6_dst, final_p);
650
651 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
652 sk->sk_err_soft = -err;
653 return err;
654 }
655
656 ip6_dst_store(sk, dst, NULL);
657 sk->sk_route_caps = dst->dev->features &
658 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
659 }
660
661 return 0;
662}
663
664EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header);
665
666int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
667{
668 struct ipv6_pinfo *np = inet6_sk(sk);
669 struct inet6_skb_parm *opt = IP6CB(skb);
670
671 if (np->rxopt.all) {
672 if ((opt->hop && (np->rxopt.bits.hopopts ||
673 np->rxopt.bits.ohopopts)) ||
674 ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) &&
675 np->rxopt.bits.rxflow) ||
676 (opt->srcrt && (np->rxopt.bits.srcrt ||
677 np->rxopt.bits.osrcrt)) ||
678 ((opt->dst1 || opt->dst0) &&
679 (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
680 return 1;
681 }
682 return 0;
683}
684
685EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
686
612int 687int
613snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign) 688snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
614{ 689{
615 if (ptr == NULL) 690 if (ptr == NULL)
616 return -EINVAL; 691 return -EINVAL;
617 692
618 ptr[0] = __alloc_percpu(mibsize, mibalign); 693 ptr[0] = __alloc_percpu(mibsize);
619 if (!ptr[0]) 694 if (!ptr[0])
620 goto err0; 695 goto err0;
621 696
622 ptr[1] = __alloc_percpu(mibsize, mibalign); 697 ptr[1] = __alloc_percpu(mibsize);
623 if (!ptr[1]) 698 if (!ptr[1])
624 goto err1; 699 goto err1;
625 700
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index f3629730eb15..13cc7f895583 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -33,6 +33,7 @@
33#include <linux/string.h> 33#include <linux/string.h>
34#include <net/icmp.h> 34#include <net/icmp.h>
35#include <net/ipv6.h> 35#include <net/ipv6.h>
36#include <net/protocol.h>
36#include <net/xfrm.h> 37#include <net/xfrm.h>
37#include <asm/scatterlist.h> 38#include <asm/scatterlist.h>
38 39
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 6b7294047238..65e73ac0d6d0 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -13,6 +13,7 @@
13 * 2 of the License, or (at your option) any later version. 13 * 2 of the License, or (at your option) any later version.
14 */ 14 */
15 15
16#include <linux/capability.h>
16#include <linux/config.h> 17#include <linux/config.h>
17#include <linux/module.h> 18#include <linux/module.h>
18#include <linux/errno.h> 19#include <linux/errno.h>
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index c4a3a993acb7..99a6eb23378b 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -13,6 +13,7 @@
13 * 2 of the License, or (at your option) any later version. 13 * 2 of the License, or (at your option) any later version.
14 */ 14 */
15 15
16#include <linux/capability.h>
16#include <linux/errno.h> 17#include <linux/errno.h>
17#include <linux/types.h> 18#include <linux/types.h>
18#include <linux/kernel.h> 19#include <linux/kernel.h>
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 8bfbe9970793..6de8ee1a5ad9 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -36,6 +36,7 @@
36#include <linux/random.h> 36#include <linux/random.h>
37#include <net/icmp.h> 37#include <net/icmp.h>
38#include <net/ipv6.h> 38#include <net/ipv6.h>
39#include <net/protocol.h>
39#include <linux/icmpv6.h> 40#include <linux/icmpv6.h>
40 41
41static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) 42static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index be6faf311387..2a1e7e45b890 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -152,7 +152,7 @@ static struct tlvtype_proc tlvprocdestopt_lst[] = {
152 {-1, NULL} 152 {-1, NULL}
153}; 153};
154 154
155static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp) 155static int ipv6_destopt_rcv(struct sk_buff **skbp)
156{ 156{
157 struct sk_buff *skb = *skbp; 157 struct sk_buff *skb = *skbp;
158 struct inet6_skb_parm *opt = IP6CB(skb); 158 struct inet6_skb_parm *opt = IP6CB(skb);
@@ -169,7 +169,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
169 169
170 if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { 170 if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
171 skb->h.raw += ((skb->h.raw[1]+1)<<3); 171 skb->h.raw += ((skb->h.raw[1]+1)<<3);
172 *nhoffp = opt->dst1; 172 opt->nhoff = opt->dst1;
173 return 1; 173 return 1;
174 } 174 }
175 175
@@ -192,7 +192,7 @@ void __init ipv6_destopt_init(void)
192 NONE header. No data in packet. 192 NONE header. No data in packet.
193 ********************************/ 193 ********************************/
194 194
195static int ipv6_nodata_rcv(struct sk_buff **skbp, unsigned int *nhoffp) 195static int ipv6_nodata_rcv(struct sk_buff **skbp)
196{ 196{
197 struct sk_buff *skb = *skbp; 197 struct sk_buff *skb = *skbp;
198 198
@@ -215,7 +215,7 @@ void __init ipv6_nodata_init(void)
215 Routing header. 215 Routing header.
216 ********************************/ 216 ********************************/
217 217
218static int ipv6_rthdr_rcv(struct sk_buff **skbp, unsigned int *nhoffp) 218static int ipv6_rthdr_rcv(struct sk_buff **skbp)
219{ 219{
220 struct sk_buff *skb = *skbp; 220 struct sk_buff *skb = *skbp;
221 struct inet6_skb_parm *opt = IP6CB(skb); 221 struct inet6_skb_parm *opt = IP6CB(skb);
@@ -249,7 +249,7 @@ looped_back:
249 skb->h.raw += (hdr->hdrlen + 1) << 3; 249 skb->h.raw += (hdr->hdrlen + 1) << 3;
250 opt->dst0 = opt->dst1; 250 opt->dst0 = opt->dst1;
251 opt->dst1 = 0; 251 opt->dst1 = 0;
252 *nhoffp = (&hdr->nexthdr) - skb->nh.raw; 252 opt->nhoff = (&hdr->nexthdr) - skb->nh.raw;
253 return 1; 253 return 1;
254 } 254 }
255 255
@@ -413,6 +413,8 @@ ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr)
413 return opt; 413 return opt;
414} 414}
415 415
416EXPORT_SYMBOL_GPL(ipv6_invert_rthdr);
417
416/********************************** 418/**********************************
417 Hop-by-hop options. 419 Hop-by-hop options.
418 **********************************/ 420 **********************************/
@@ -485,9 +487,14 @@ static struct tlvtype_proc tlvprochopopt_lst[] = {
485 487
486int ipv6_parse_hopopts(struct sk_buff *skb, int nhoff) 488int ipv6_parse_hopopts(struct sk_buff *skb, int nhoff)
487{ 489{
488 IP6CB(skb)->hop = sizeof(struct ipv6hdr); 490 struct inet6_skb_parm *opt = IP6CB(skb);
489 if (ip6_parse_tlv(tlvprochopopt_lst, skb)) 491
492 opt->hop = sizeof(struct ipv6hdr);
493 if (ip6_parse_tlv(tlvprochopopt_lst, skb)) {
494 skb->h.raw += (skb->h.raw[1]+1)<<3;
495 opt->nhoff = sizeof(struct ipv6hdr);
490 return sizeof(struct ipv6hdr); 496 return sizeof(struct ipv6hdr);
497 }
491 return -1; 498 return -1;
492} 499}
493 500
@@ -579,6 +586,8 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
579 return opt2; 586 return opt2;
580} 587}
581 588
589EXPORT_SYMBOL_GPL(ipv6_dup_options);
590
582static int ipv6_renew_option(void *ohdr, 591static int ipv6_renew_option(void *ohdr,
583 struct ipv6_opt_hdr __user *newopt, int newoptlen, 592 struct ipv6_opt_hdr __user *newopt, int newoptlen,
584 int inherit, 593 int inherit,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 6ec6a2b549bb..53c81fcd20ba 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -79,7 +79,7 @@ DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
79static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL; 79static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
80#define icmpv6_socket __get_cpu_var(__icmpv6_socket) 80#define icmpv6_socket __get_cpu_var(__icmpv6_socket)
81 81
82static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp); 82static int icmpv6_rcv(struct sk_buff **pskb);
83 83
84static struct inet6_protocol icmpv6_protocol = { 84static struct inet6_protocol icmpv6_protocol = {
85 .handler = icmpv6_rcv, 85 .handler = icmpv6_rcv,
@@ -581,7 +581,7 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
581 * Handle icmp messages 581 * Handle icmp messages
582 */ 582 */
583 583
584static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) 584static int icmpv6_rcv(struct sk_buff **pskb)
585{ 585{
586 struct sk_buff *skb = *pskb; 586 struct sk_buff *skb = *pskb;
587 struct net_device *dev = skb->dev; 587 struct net_device *dev = skb->dev;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
new file mode 100644
index 000000000000..f8f3a37a1494
--- /dev/null
+++ b/net/ipv6/inet6_connection_sock.c
@@ -0,0 +1,200 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Support for INET6 connection oriented protocols.
7 *
8 * Authors: See the TCPv6 sources
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or(at your option) any later version.
14 */
15
16#include <linux/config.h>
17#include <linux/module.h>
18#include <linux/in6.h>
19#include <linux/ipv6.h>
20#include <linux/jhash.h>
21
22#include <net/addrconf.h>
23#include <net/inet_connection_sock.h>
24#include <net/inet_ecn.h>
25#include <net/inet_hashtables.h>
26#include <net/ip6_route.h>
27#include <net/sock.h>
28#include <net/inet6_connection_sock.h>
29
30int inet6_csk_bind_conflict(const struct sock *sk,
31 const struct inet_bind_bucket *tb)
32{
33 const struct sock *sk2;
34 const struct hlist_node *node;
35
36 /* We must walk the whole port owner list in this case. -DaveM */
37 sk_for_each_bound(sk2, node, &tb->owners) {
38 if (sk != sk2 &&
39 (!sk->sk_bound_dev_if ||
40 !sk2->sk_bound_dev_if ||
41 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
42 (!sk->sk_reuse || !sk2->sk_reuse ||
43 sk2->sk_state == TCP_LISTEN) &&
44 ipv6_rcv_saddr_equal(sk, sk2))
45 break;
46 }
47
48 return node != NULL;
49}
50
51EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
52
53/*
54 * request_sock (formerly open request) hash tables.
55 */
56static u32 inet6_synq_hash(const struct in6_addr *raddr, const u16 rport,
57 const u32 rnd, const u16 synq_hsize)
58{
59 u32 a = raddr->s6_addr32[0];
60 u32 b = raddr->s6_addr32[1];
61 u32 c = raddr->s6_addr32[2];
62
63 a += JHASH_GOLDEN_RATIO;
64 b += JHASH_GOLDEN_RATIO;
65 c += rnd;
66 __jhash_mix(a, b, c);
67
68 a += raddr->s6_addr32[3];
69 b += (u32)rport;
70 __jhash_mix(a, b, c);
71
72 return c & (synq_hsize - 1);
73}
74
75struct request_sock *inet6_csk_search_req(const struct sock *sk,
76 struct request_sock ***prevp,
77 const __u16 rport,
78 const struct in6_addr *raddr,
79 const struct in6_addr *laddr,
80 const int iif)
81{
82 const struct inet_connection_sock *icsk = inet_csk(sk);
83 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
84 struct request_sock *req, **prev;
85
86 for (prev = &lopt->syn_table[inet6_synq_hash(raddr, rport,
87 lopt->hash_rnd,
88 lopt->nr_table_entries)];
89 (req = *prev) != NULL;
90 prev = &req->dl_next) {
91 const struct inet6_request_sock *treq = inet6_rsk(req);
92
93 if (inet_rsk(req)->rmt_port == rport &&
94 req->rsk_ops->family == AF_INET6 &&
95 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
96 ipv6_addr_equal(&treq->loc_addr, laddr) &&
97 (!treq->iif || treq->iif == iif)) {
98 BUG_TRAP(req->sk == NULL);
99 *prevp = prev;
100 return req;
101 }
102 }
103
104 return NULL;
105}
106
107EXPORT_SYMBOL_GPL(inet6_csk_search_req);
108
109void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
110 struct request_sock *req,
111 const unsigned long timeout)
112{
113 struct inet_connection_sock *icsk = inet_csk(sk);
114 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
115 const u32 h = inet6_synq_hash(&inet6_rsk(req)->rmt_addr,
116 inet_rsk(req)->rmt_port,
117 lopt->hash_rnd, lopt->nr_table_entries);
118
119 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
120 inet_csk_reqsk_queue_added(sk, timeout);
121}
122
123EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add);
124
125void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
126{
127 struct ipv6_pinfo *np = inet6_sk(sk);
128 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
129
130 sin6->sin6_family = AF_INET6;
131 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
132 sin6->sin6_port = inet_sk(sk)->dport;
133 /* We do not store received flowlabel for TCP */
134 sin6->sin6_flowinfo = 0;
135 sin6->sin6_scope_id = 0;
136 if (sk->sk_bound_dev_if &&
137 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
138 sin6->sin6_scope_id = sk->sk_bound_dev_if;
139}
140
141EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
142
143int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
144{
145 struct sock *sk = skb->sk;
146 struct inet_sock *inet = inet_sk(sk);
147 struct ipv6_pinfo *np = inet6_sk(sk);
148 struct flowi fl;
149 struct dst_entry *dst;
150 struct in6_addr *final_p = NULL, final;
151
152 memset(&fl, 0, sizeof(fl));
153 fl.proto = sk->sk_protocol;
154 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
155 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
156 fl.fl6_flowlabel = np->flow_label;
157 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
158 fl.oif = sk->sk_bound_dev_if;
159 fl.fl_ip_sport = inet->sport;
160 fl.fl_ip_dport = inet->dport;
161
162 if (np->opt && np->opt->srcrt) {
163 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
164 ipv6_addr_copy(&final, &fl.fl6_dst);
165 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
166 final_p = &final;
167 }
168
169 dst = __sk_dst_check(sk, np->dst_cookie);
170
171 if (dst == NULL) {
172 int err = ip6_dst_lookup(sk, &dst, &fl);
173
174 if (err) {
175 sk->sk_err_soft = -err;
176 return err;
177 }
178
179 if (final_p)
180 ipv6_addr_copy(&fl.fl6_dst, final_p);
181
182 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
183 sk->sk_route_caps = 0;
184 return err;
185 }
186
187 ip6_dst_store(sk, dst, NULL);
188 sk->sk_route_caps = dst->dev->features &
189 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
190 }
191
192 skb->dst = dst_clone(dst);
193
194 /* Restore final destination back after routing done */
195 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
196
197 return ip6_xmit(sk, skb, &fl, np->opt, 0);
198}
199
200EXPORT_SYMBOL_GPL(inet6_csk_xmit);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 01d5f46d4e40..4154f3a8b6cf 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -5,7 +5,8 @@
5 * 5 *
6 * Generic INET6 transport hashtables 6 * Generic INET6 transport hashtables
7 * 7 *
8 * Authors: Lotsa people, from code originally in tcp 8 * Authors: Lotsa people, from code originally in tcp, generalised here
9 * by Arnaldo Carvalho de Melo <acme@mandriva.com>
9 * 10 *
10 * This program is free software; you can redistribute it and/or 11 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License 12 * modify it under the terms of the GNU General Public License
@@ -14,12 +15,13 @@
14 */ 15 */
15 16
16#include <linux/config.h> 17#include <linux/config.h>
17
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/random.h>
19 20
20#include <net/inet_connection_sock.h> 21#include <net/inet_connection_sock.h>
21#include <net/inet_hashtables.h> 22#include <net/inet_hashtables.h>
22#include <net/inet6_hashtables.h> 23#include <net/inet6_hashtables.h>
24#include <net/ip.h>
23 25
24struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, 26struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
25 const struct in6_addr *daddr, 27 const struct in6_addr *daddr,
@@ -79,3 +81,180 @@ struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
79} 81}
80 82
81EXPORT_SYMBOL_GPL(inet6_lookup); 83EXPORT_SYMBOL_GPL(inet6_lookup);
84
85static int __inet6_check_established(struct inet_timewait_death_row *death_row,
86 struct sock *sk, const __u16 lport,
87 struct inet_timewait_sock **twp)
88{
89 struct inet_hashinfo *hinfo = death_row->hashinfo;
90 const struct inet_sock *inet = inet_sk(sk);
91 const struct ipv6_pinfo *np = inet6_sk(sk);
92 const struct in6_addr *daddr = &np->rcv_saddr;
93 const struct in6_addr *saddr = &np->daddr;
94 const int dif = sk->sk_bound_dev_if;
95 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
96 const unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr,
97 inet->dport);
98 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
99 struct sock *sk2;
100 const struct hlist_node *node;
101 struct inet_timewait_sock *tw;
102
103 prefetch(head->chain.first);
104 write_lock(&head->lock);
105
106 /* Check TIME-WAIT sockets first. */
107 sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) {
108 const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2);
109
110 tw = inet_twsk(sk2);
111
112 if(*((__u32 *)&(tw->tw_dport)) == ports &&
113 sk2->sk_family == PF_INET6 &&
114 ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) &&
115 ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) &&
116 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
117 if (twsk_unique(sk, sk2, twp))
118 goto unique;
119 else
120 goto not_unique;
121 }
122 }
123 tw = NULL;
124
125 /* And established part... */
126 sk_for_each(sk2, node, &head->chain) {
127 if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
128 goto not_unique;
129 }
130
131unique:
132 BUG_TRAP(sk_unhashed(sk));
133 __sk_add_node(sk, &head->chain);
134 sk->sk_hash = hash;
135 sock_prot_inc_use(sk->sk_prot);
136 write_unlock(&head->lock);
137
138 if (twp != NULL) {
139 *twp = tw;
140 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
141 } else if (tw != NULL) {
142 /* Silly. Should hash-dance instead... */
143 inet_twsk_deschedule(tw, death_row);
144 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
145
146 inet_twsk_put(tw);
147 }
148 return 0;
149
150not_unique:
151 write_unlock(&head->lock);
152 return -EADDRNOTAVAIL;
153}
154
155static inline u32 inet6_sk_port_offset(const struct sock *sk)
156{
157 const struct inet_sock *inet = inet_sk(sk);
158 const struct ipv6_pinfo *np = inet6_sk(sk);
159 return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32,
160 np->daddr.s6_addr32,
161 inet->dport);
162}
163
164int inet6_hash_connect(struct inet_timewait_death_row *death_row,
165 struct sock *sk)
166{
167 struct inet_hashinfo *hinfo = death_row->hashinfo;
168 const unsigned short snum = inet_sk(sk)->num;
169 struct inet_bind_hashbucket *head;
170 struct inet_bind_bucket *tb;
171 int ret;
172
173 if (snum == 0) {
174 const int low = sysctl_local_port_range[0];
175 const int high = sysctl_local_port_range[1];
176 const int range = high - low;
177 int i, port;
178 static u32 hint;
179 const u32 offset = hint + inet6_sk_port_offset(sk);
180 struct hlist_node *node;
181 struct inet_timewait_sock *tw = NULL;
182
183 local_bh_disable();
184 for (i = 1; i <= range; i++) {
185 port = low + (i + offset) % range;
186 head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
187 spin_lock(&head->lock);
188
189 /* Does not bother with rcv_saddr checks,
190 * because the established check is already
191 * unique enough.
192 */
193 inet_bind_bucket_for_each(tb, node, &head->chain) {
194 if (tb->port == port) {
195 BUG_TRAP(!hlist_empty(&tb->owners));
196 if (tb->fastreuse >= 0)
197 goto next_port;
198 if (!__inet6_check_established(death_row,
199 sk, port,
200 &tw))
201 goto ok;
202 goto next_port;
203 }
204 }
205
206 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
207 head, port);
208 if (!tb) {
209 spin_unlock(&head->lock);
210 break;
211 }
212 tb->fastreuse = -1;
213 goto ok;
214
215 next_port:
216 spin_unlock(&head->lock);
217 }
218 local_bh_enable();
219
220 return -EADDRNOTAVAIL;
221
222ok:
223 hint += i;
224
225 /* Head lock still held and bh's disabled */
226 inet_bind_hash(sk, tb, port);
227 if (sk_unhashed(sk)) {
228 inet_sk(sk)->sport = htons(port);
229 __inet6_hash(hinfo, sk);
230 }
231 spin_unlock(&head->lock);
232
233 if (tw) {
234 inet_twsk_deschedule(tw, death_row);
235 inet_twsk_put(tw);
236 }
237
238 ret = 0;
239 goto out;
240 }
241
242 head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
243 tb = inet_csk(sk)->icsk_bind_hash;
244 spin_lock_bh(&head->lock);
245
246 if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) {
247 __inet6_hash(hinfo, sk);
248 spin_unlock_bh(&head->lock);
249 return 0;
250 } else {
251 spin_unlock(&head->lock);
252 /* No definite answer... Walk to established hash table */
253 ret = __inet6_check_established(death_row, sk, snum, NULL);
254out:
255 local_bh_enable();
256 return ret;
257 }
258}
259
260EXPORT_SYMBOL_GPL(inet6_hash_connect);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 1cf02765fb5c..964ad9d1276d 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -9,6 +9,7 @@
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 */ 10 */
11 11
12#include <linux/capability.h>
12#include <linux/config.h> 13#include <linux/config.h>
13#include <linux/errno.h> 14#include <linux/errno.h>
14#include <linux/types.h> 15#include <linux/types.h>
@@ -200,6 +201,8 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, u32 label)
200 return NULL; 201 return NULL;
201} 202}
202 203
204EXPORT_SYMBOL_GPL(fl6_sock_lookup);
205
203void fl6_free_socklist(struct sock *sk) 206void fl6_free_socklist(struct sock *sk)
204{ 207{
205 struct ipv6_pinfo *np = inet6_sk(sk); 208 struct ipv6_pinfo *np = inet6_sk(sk);
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index a6026d2787d2..29f73592e68e 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -48,7 +48,7 @@
48 48
49 49
50 50
51static inline int ip6_rcv_finish( struct sk_buff *skb) 51inline int ip6_rcv_finish( struct sk_buff *skb)
52{ 52{
53 if (skb->dst == NULL) 53 if (skb->dst == NULL)
54 ip6_route_input(skb); 54 ip6_route_input(skb);
@@ -97,6 +97,9 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
97 if (hdr->version != 6) 97 if (hdr->version != 6)
98 goto err; 98 goto err;
99 99
100 skb->h.raw = (u8 *)(hdr + 1);
101 IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
102
100 pkt_len = ntohs(hdr->payload_len); 103 pkt_len = ntohs(hdr->payload_len);
101 104
102 /* pkt_len may be zero if Jumbo payload option is present */ 105 /* pkt_len may be zero if Jumbo payload option is present */
@@ -111,8 +114,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
111 } 114 }
112 115
113 if (hdr->nexthdr == NEXTHDR_HOP) { 116 if (hdr->nexthdr == NEXTHDR_HOP) {
114 skb->h.raw = (u8*)(hdr+1); 117 if (ipv6_parse_hopopts(skb, IP6CB(skb)->nhoff) < 0) {
115 if (ipv6_parse_hopopts(skb, offsetof(struct ipv6hdr, nexthdr)) < 0) {
116 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); 118 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
117 return 0; 119 return 0;
118 } 120 }
@@ -143,26 +145,15 @@ static inline int ip6_input_finish(struct sk_buff *skb)
143 int nexthdr; 145 int nexthdr;
144 u8 hash; 146 u8 hash;
145 147
146 skb->h.raw = skb->nh.raw + sizeof(struct ipv6hdr);
147
148 /* 148 /*
149 * Parse extension headers 149 * Parse extension headers
150 */ 150 */
151 151
152 nexthdr = skb->nh.ipv6h->nexthdr;
153 nhoff = offsetof(struct ipv6hdr, nexthdr);
154
155 /* Skip hop-by-hop options, they are already parsed. */
156 if (nexthdr == NEXTHDR_HOP) {
157 nhoff = sizeof(struct ipv6hdr);
158 nexthdr = skb->h.raw[0];
159 skb->h.raw += (skb->h.raw[1]+1)<<3;
160 }
161
162 rcu_read_lock(); 152 rcu_read_lock();
163resubmit: 153resubmit:
164 if (!pskb_pull(skb, skb->h.raw - skb->data)) 154 if (!pskb_pull(skb, skb->h.raw - skb->data))
165 goto discard; 155 goto discard;
156 nhoff = IP6CB(skb)->nhoff;
166 nexthdr = skb->nh.raw[nhoff]; 157 nexthdr = skb->nh.raw[nhoff];
167 158
168 raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]); 159 raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]);
@@ -194,7 +185,7 @@ resubmit:
194 !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 185 !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
195 goto discard; 186 goto discard;
196 187
197 ret = ipprot->handler(&skb, &nhoff); 188 ret = ipprot->handler(&skb);
198 if (ret > 0) 189 if (ret > 0)
199 goto resubmit; 190 goto resubmit;
200 else if (ret == 0) 191 else if (ret == 0)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 8523c76ebf76..efa3e72cfcfa 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -226,6 +226,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
226 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 226 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
227 ipv6_addr_copy(&hdr->daddr, first_hop); 227 ipv6_addr_copy(&hdr->daddr, first_hop);
228 228
229 skb->priority = sk->sk_priority;
230
229 mtu = dst_mtu(dst); 231 mtu = dst_mtu(dst);
230 if ((skb->len <= mtu) || ipfragok) { 232 if ((skb->len <= mtu) || ipfragok) {
231 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); 233 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
@@ -775,6 +777,8 @@ out_err_release:
775 return err; 777 return err;
776} 778}
777 779
780EXPORT_SYMBOL_GPL(ip6_dst_lookup);
781
778static inline int ip6_ufo_append_data(struct sock *sk, 782static inline int ip6_ufo_append_data(struct sock *sk,
779 int getfrag(void *from, char *to, int offset, int len, 783 int getfrag(void *from, char *to, int offset, int len,
780 int odd, struct sk_buff *skb), 784 int odd, struct sk_buff *skb),
@@ -1180,6 +1184,8 @@ int ip6_push_pending_frames(struct sock *sk)
1180 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 1184 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1181 ipv6_addr_copy(&hdr->daddr, final_dst); 1185 ipv6_addr_copy(&hdr->daddr, final_dst);
1182 1186
1187 skb->priority = sk->sk_priority;
1188
1183 skb->dst = dst_clone(&rt->u.dst); 1189 skb->dst = dst_clone(&rt->u.dst);
1184 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); 1190 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
1185 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); 1191 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index e315d0f80af1..92ead3cf956b 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -21,6 +21,7 @@
21 21
22#include <linux/config.h> 22#include <linux/config.h>
23#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/capability.h>
24#include <linux/errno.h> 25#include <linux/errno.h>
25#include <linux/types.h> 26#include <linux/types.h>
26#include <linux/sockios.h> 27#include <linux/sockios.h>
@@ -243,7 +244,7 @@ ip6_tnl_create(struct ip6_tnl_parm *p, struct ip6_tnl **pt)
243 if (dev == NULL) 244 if (dev == NULL)
244 return -ENOMEM; 245 return -ENOMEM;
245 246
246 t = dev->priv; 247 t = netdev_priv(dev);
247 dev->init = ip6ip6_tnl_dev_init; 248 dev->init = ip6ip6_tnl_dev_init;
248 t->parms = *p; 249 t->parms = *p;
249 250
@@ -308,7 +309,7 @@ ip6ip6_tnl_locate(struct ip6_tnl_parm *p, struct ip6_tnl **pt, int create)
308static void 309static void
309ip6ip6_tnl_dev_uninit(struct net_device *dev) 310ip6ip6_tnl_dev_uninit(struct net_device *dev)
310{ 311{
311 struct ip6_tnl *t = dev->priv; 312 struct ip6_tnl *t = netdev_priv(dev);
312 313
313 if (dev == ip6ip6_fb_tnl_dev) { 314 if (dev == ip6ip6_fb_tnl_dev) {
314 write_lock_bh(&ip6ip6_lock); 315 write_lock_bh(&ip6ip6_lock);
@@ -510,7 +511,7 @@ static inline void ip6ip6_ecn_decapsulate(struct ipv6hdr *outer_iph,
510 **/ 511 **/
511 512
512static int 513static int
513ip6ip6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) 514ip6ip6_rcv(struct sk_buff **pskb)
514{ 515{
515 struct sk_buff *skb = *pskb; 516 struct sk_buff *skb = *pskb;
516 struct ipv6hdr *ipv6h; 517 struct ipv6hdr *ipv6h;
@@ -623,7 +624,7 @@ ip6ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
623static int 624static int
624ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) 625ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
625{ 626{
626 struct ip6_tnl *t = (struct ip6_tnl *) dev->priv; 627 struct ip6_tnl *t = netdev_priv(dev);
627 struct net_device_stats *stats = &t->stat; 628 struct net_device_stats *stats = &t->stat;
628 struct ipv6hdr *ipv6h = skb->nh.ipv6h; 629 struct ipv6hdr *ipv6h = skb->nh.ipv6h;
629 struct ipv6_txoptions *opt = NULL; 630 struct ipv6_txoptions *opt = NULL;
@@ -933,11 +934,11 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
933 break; 934 break;
934 } 935 }
935 if ((err = ip6ip6_tnl_locate(&p, &t, 0)) == -ENODEV) 936 if ((err = ip6ip6_tnl_locate(&p, &t, 0)) == -ENODEV)
936 t = (struct ip6_tnl *) dev->priv; 937 t = netdev_priv(dev);
937 else if (err) 938 else if (err)
938 break; 939 break;
939 } else 940 } else
940 t = (struct ip6_tnl *) dev->priv; 941 t = netdev_priv(dev);
941 942
942 memcpy(&p, &t->parms, sizeof (p)); 943 memcpy(&p, &t->parms, sizeof (p));
943 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) { 944 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
@@ -955,7 +956,7 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
955 break; 956 break;
956 } 957 }
957 if (!create && dev != ip6ip6_fb_tnl_dev) { 958 if (!create && dev != ip6ip6_fb_tnl_dev) {
958 t = (struct ip6_tnl *) dev->priv; 959 t = netdev_priv(dev);
959 } 960 }
960 if (!t && (err = ip6ip6_tnl_locate(&p, &t, create))) { 961 if (!t && (err = ip6ip6_tnl_locate(&p, &t, create))) {
961 break; 962 break;
@@ -991,12 +992,12 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
991 err = ip6ip6_tnl_locate(&p, &t, 0); 992 err = ip6ip6_tnl_locate(&p, &t, 0);
992 if (err) 993 if (err)
993 break; 994 break;
994 if (t == ip6ip6_fb_tnl_dev->priv) { 995 if (t == netdev_priv(ip6ip6_fb_tnl_dev)) {
995 err = -EPERM; 996 err = -EPERM;
996 break; 997 break;
997 } 998 }
998 } else { 999 } else {
999 t = (struct ip6_tnl *) dev->priv; 1000 t = netdev_priv(dev);
1000 } 1001 }
1001 err = unregister_netdevice(t->dev); 1002 err = unregister_netdevice(t->dev);
1002 break; 1003 break;
@@ -1016,7 +1017,7 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1016static struct net_device_stats * 1017static struct net_device_stats *
1017ip6ip6_tnl_get_stats(struct net_device *dev) 1018ip6ip6_tnl_get_stats(struct net_device *dev)
1018{ 1019{
1019 return &(((struct ip6_tnl *) dev->priv)->stat); 1020 return &(((struct ip6_tnl *)netdev_priv(dev))->stat);
1020} 1021}
1021 1022
1022/** 1023/**
@@ -1073,7 +1074,7 @@ static void ip6ip6_tnl_dev_setup(struct net_device *dev)
1073static inline void 1074static inline void
1074ip6ip6_tnl_dev_init_gen(struct net_device *dev) 1075ip6ip6_tnl_dev_init_gen(struct net_device *dev)
1075{ 1076{
1076 struct ip6_tnl *t = (struct ip6_tnl *) dev->priv; 1077 struct ip6_tnl *t = netdev_priv(dev);
1077 t->fl.proto = IPPROTO_IPV6; 1078 t->fl.proto = IPPROTO_IPV6;
1078 t->dev = dev; 1079 t->dev = dev;
1079 strcpy(t->parms.name, dev->name); 1080 strcpy(t->parms.name, dev->name);
@@ -1087,7 +1088,7 @@ ip6ip6_tnl_dev_init_gen(struct net_device *dev)
1087static int 1088static int
1088ip6ip6_tnl_dev_init(struct net_device *dev) 1089ip6ip6_tnl_dev_init(struct net_device *dev)
1089{ 1090{
1090 struct ip6_tnl *t = (struct ip6_tnl *) dev->priv; 1091 struct ip6_tnl *t = netdev_priv(dev);
1091 ip6ip6_tnl_dev_init_gen(dev); 1092 ip6ip6_tnl_dev_init_gen(dev);
1092 ip6ip6_tnl_link_config(t); 1093 ip6ip6_tnl_link_config(t);
1093 return 0; 1094 return 0;
@@ -1103,7 +1104,7 @@ ip6ip6_tnl_dev_init(struct net_device *dev)
1103static int 1104static int
1104ip6ip6_fb_tnl_dev_init(struct net_device *dev) 1105ip6ip6_fb_tnl_dev_init(struct net_device *dev)
1105{ 1106{
1106 struct ip6_tnl *t = dev->priv; 1107 struct ip6_tnl *t = netdev_priv(dev);
1107 ip6ip6_tnl_dev_init_gen(dev); 1108 ip6ip6_tnl_dev_init_gen(dev);
1108 dev_hold(dev); 1109 dev_hold(dev);
1109 tnls_wc[0] = t; 1110 tnls_wc[0] = t;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 55917fb17094..626dd39685f2 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -47,6 +47,7 @@
47#include <linux/rtnetlink.h> 47#include <linux/rtnetlink.h>
48#include <net/icmp.h> 48#include <net/icmp.h>
49#include <net/ipv6.h> 49#include <net/ipv6.h>
50#include <net/protocol.h>
50#include <linux/ipv6.h> 51#include <linux/ipv6.h>
51#include <linux/icmpv6.h> 52#include <linux/icmpv6.h>
52 53
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 3620718defe6..f7142ba519ab 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -26,6 +26,7 @@
26 */ 26 */
27 27
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/capability.h>
29#include <linux/config.h> 30#include <linux/config.h>
30#include <linux/errno.h> 31#include <linux/errno.h>
31#include <linux/types.h> 32#include <linux/types.h>
@@ -163,17 +164,17 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
163 sk_refcnt_debug_dec(sk); 164 sk_refcnt_debug_dec(sk);
164 165
165 if (sk->sk_protocol == IPPROTO_TCP) { 166 if (sk->sk_protocol == IPPROTO_TCP) {
166 struct tcp_sock *tp = tcp_sk(sk); 167 struct inet_connection_sock *icsk = inet_csk(sk);
167 168
168 local_bh_disable(); 169 local_bh_disable();
169 sock_prot_dec_use(sk->sk_prot); 170 sock_prot_dec_use(sk->sk_prot);
170 sock_prot_inc_use(&tcp_prot); 171 sock_prot_inc_use(&tcp_prot);
171 local_bh_enable(); 172 local_bh_enable();
172 sk->sk_prot = &tcp_prot; 173 sk->sk_prot = &tcp_prot;
173 tp->af_specific = &ipv4_specific; 174 icsk->icsk_af_ops = &ipv4_specific;
174 sk->sk_socket->ops = &inet_stream_ops; 175 sk->sk_socket->ops = &inet_stream_ops;
175 sk->sk_family = PF_INET; 176 sk->sk_family = PF_INET;
176 tcp_sync_mss(sk, tp->pmtu_cookie); 177 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
177 } else { 178 } else {
178 local_bh_disable(); 179 local_bh_disable();
179 sock_prot_dec_use(sk->sk_prot); 180 sock_prot_dec_use(sk->sk_prot);
@@ -317,14 +318,15 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
317 } 318 }
318 319
319 retv = 0; 320 retv = 0;
320 if (sk->sk_type == SOCK_STREAM) { 321 if (inet_sk(sk)->is_icsk) {
321 if (opt) { 322 if (opt) {
322 struct tcp_sock *tp = tcp_sk(sk); 323 struct inet_connection_sock *icsk = inet_csk(sk);
323 if (!((1 << sk->sk_state) & 324 if (!((1 << sk->sk_state) &
324 (TCPF_LISTEN | TCPF_CLOSE)) 325 (TCPF_LISTEN | TCPF_CLOSE))
325 && inet_sk(sk)->daddr != LOOPBACK4_IPV6) { 326 && inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
326 tp->ext_header_len = opt->opt_flen + opt->opt_nflen; 327 icsk->icsk_ext_hdr_len =
327 tcp_sync_mss(sk, tp->pmtu_cookie); 328 opt->opt_flen + opt->opt_nflen;
329 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
328 } 330 }
329 } 331 }
330 opt = xchg(&np->opt, opt); 332 opt = xchg(&np->opt, opt);
@@ -380,14 +382,15 @@ sticky_done:
380 goto done; 382 goto done;
381update: 383update:
382 retv = 0; 384 retv = 0;
383 if (sk->sk_type == SOCK_STREAM) { 385 if (inet_sk(sk)->is_icsk) {
384 if (opt) { 386 if (opt) {
385 struct tcp_sock *tp = tcp_sk(sk); 387 struct inet_connection_sock *icsk = inet_csk(sk);
386 if (!((1 << sk->sk_state) & 388 if (!((1 << sk->sk_state) &
387 (TCPF_LISTEN | TCPF_CLOSE)) 389 (TCPF_LISTEN | TCPF_CLOSE))
388 && inet_sk(sk)->daddr != LOOPBACK4_IPV6) { 390 && inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
389 tp->ext_header_len = opt->opt_flen + opt->opt_nflen; 391 icsk->icsk_ext_hdr_len =
390 tcp_sync_mss(sk, tp->pmtu_cookie); 392 opt->opt_flen + opt->opt_nflen;
393 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
391 } 394 }
392 } 395 }
393 opt = xchg(&np->opt, opt); 396 opt = xchg(&np->opt, opt);
@@ -547,7 +550,7 @@ done:
547 retv = -ENOBUFS; 550 retv = -ENOBUFS;
548 break; 551 break;
549 } 552 }
550 gsf = (struct group_filter *)kmalloc(optlen,GFP_KERNEL); 553 gsf = kmalloc(optlen,GFP_KERNEL);
551 if (gsf == 0) { 554 if (gsf == 0) {
552 retv = -ENOBUFS; 555 retv = -ENOBUFS;
553 break; 556 break;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index f829a4ad3ccc..cc3e9f560867 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -224,7 +224,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
224 224
225 mc_lst->ifindex = dev->ifindex; 225 mc_lst->ifindex = dev->ifindex;
226 mc_lst->sfmode = MCAST_EXCLUDE; 226 mc_lst->sfmode = MCAST_EXCLUDE;
227 mc_lst->sflock = RW_LOCK_UNLOCKED; 227 rwlock_init(&mc_lst->sflock);
228 mc_lst->sflist = NULL; 228 mc_lst->sflist = NULL;
229 229
230 /* 230 /*
@@ -449,8 +449,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
449 449
450 if (psl) 450 if (psl)
451 count += psl->sl_max; 451 count += psl->sl_max;
452 newpsl = (struct ip6_sf_socklist *)sock_kmalloc(sk, 452 newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_ATOMIC);
453 IP6_SFLSIZE(count), GFP_ATOMIC);
454 if (!newpsl) { 453 if (!newpsl) {
455 err = -ENOBUFS; 454 err = -ENOBUFS;
456 goto done; 455 goto done;
@@ -535,8 +534,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
535 goto done; 534 goto done;
536 } 535 }
537 if (gsf->gf_numsrc) { 536 if (gsf->gf_numsrc) {
538 newpsl = (struct ip6_sf_socklist *)sock_kmalloc(sk, 537 newpsl = sock_kmalloc(sk, IP6_SFLSIZE(gsf->gf_numsrc),
539 IP6_SFLSIZE(gsf->gf_numsrc), GFP_ATOMIC); 538 GFP_ATOMIC);
540 if (!newpsl) { 539 if (!newpsl) {
541 err = -ENOBUFS; 540 err = -ENOBUFS;
542 goto done; 541 goto done;
@@ -768,7 +767,7 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
768 * for deleted items allows change reports to use common code with 767 * for deleted items allows change reports to use common code with
769 * non-deleted or query-response MCA's. 768 * non-deleted or query-response MCA's.
770 */ 769 */
771 pmc = (struct ifmcaddr6 *)kmalloc(sizeof(*pmc), GFP_ATOMIC); 770 pmc = kmalloc(sizeof(*pmc), GFP_ATOMIC);
772 if (!pmc) 771 if (!pmc)
773 return; 772 return;
774 memset(pmc, 0, sizeof(*pmc)); 773 memset(pmc, 0, sizeof(*pmc));
@@ -1937,7 +1936,7 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
1937 psf_prev = psf; 1936 psf_prev = psf;
1938 } 1937 }
1939 if (!psf) { 1938 if (!psf) {
1940 psf = (struct ip6_sf_list *)kmalloc(sizeof(*psf), GFP_ATOMIC); 1939 psf = kmalloc(sizeof(*psf), GFP_ATOMIC);
1941 if (!psf) 1940 if (!psf)
1942 return -ENOBUFS; 1941 return -ENOBUFS;
1943 memset(psf, 0, sizeof(*psf)); 1942 memset(psf, 0, sizeof(*psf));
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index f8626ebf90fd..d750cfc019dc 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -1,15 +1,12 @@
1#include <linux/config.h>
2#include <linux/init.h>
3
4#ifdef CONFIG_NETFILTER
5
6#include <linux/kernel.h> 1#include <linux/kernel.h>
2#include <linux/init.h>
7#include <linux/ipv6.h> 3#include <linux/ipv6.h>
8#include <linux/netfilter.h> 4#include <linux/netfilter.h>
9#include <linux/netfilter_ipv6.h> 5#include <linux/netfilter_ipv6.h>
10#include <net/dst.h> 6#include <net/dst.h>
11#include <net/ipv6.h> 7#include <net/ipv6.h>
12#include <net/ip6_route.h> 8#include <net/ip6_route.h>
9#include <net/xfrm.h>
13 10
14int ip6_route_me_harder(struct sk_buff *skb) 11int ip6_route_me_harder(struct sk_buff *skb)
15{ 12{
@@ -21,11 +18,17 @@ int ip6_route_me_harder(struct sk_buff *skb)
21 { .ip6_u = 18 { .ip6_u =
22 { .daddr = iph->daddr, 19 { .daddr = iph->daddr,
23 .saddr = iph->saddr, } }, 20 .saddr = iph->saddr, } },
24 .proto = iph->nexthdr,
25 }; 21 };
26 22
27 dst = ip6_route_output(skb->sk, &fl); 23 dst = ip6_route_output(skb->sk, &fl);
28 24
25#ifdef CONFIG_XFRM
26 if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
27 xfrm_decode_session(skb, &fl, AF_INET6) == 0)
28 if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0))
29 return -1;
30#endif
31
29 if (dst->error) { 32 if (dst->error) {
30 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); 33 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
31 LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); 34 LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
@@ -87,18 +90,10 @@ int __init ipv6_netfilter_init(void)
87 return nf_register_queue_rerouter(PF_INET6, &ip6_reroute); 90 return nf_register_queue_rerouter(PF_INET6, &ip6_reroute);
88} 91}
89 92
93/* This can be called from inet6_init() on errors, so it cannot
94 * be marked __exit. -DaveM
95 */
90void ipv6_netfilter_fini(void) 96void ipv6_netfilter_fini(void)
91{ 97{
92 nf_unregister_queue_rerouter(PF_INET6); 98 nf_unregister_queue_rerouter(PF_INET6);
93} 99}
94
95#else /* CONFIG_NETFILTER */
96int __init ipv6_netfilter_init(void)
97{
98 return 0;
99}
100
101void ipv6_netfilter_fini(void)
102{
103}
104#endif /* CONFIG_NETFILTER */
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 04912f9b35c3..105dd69ee9fb 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -179,6 +179,16 @@ config IP6_NF_MATCH_PHYSDEV
179 179
180 To compile it as a module, choose M here. If unsure, say N. 180 To compile it as a module, choose M here. If unsure, say N.
181 181
182config IP6_NF_MATCH_POLICY
183 tristate "IPsec policy match support"
184 depends on IP6_NF_IPTABLES && XFRM
185 help
186 Policy matching allows you to match packets based on the
187 IPsec policy that was used during decapsulation/will
188 be used during encapsulation.
189
190 To compile it as a module, choose M here. If unsure, say N.
191
182# The targets 192# The targets
183config IP6_NF_FILTER 193config IP6_NF_FILTER
184 tristate "Packet filtering" 194 tristate "Packet filtering"
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 9ab5b2ca1f59..c0c809b426e8 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o ip6t_dst.o
13obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o 13obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o
14obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o 14obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o
15obj-$(CONFIG_IP6_NF_MATCH_AHESP) += ip6t_esp.o ip6t_ah.o 15obj-$(CONFIG_IP6_NF_MATCH_AHESP) += ip6t_esp.o ip6t_ah.o
16obj-$(CONFIG_IP6_NF_MATCH_POLICY) += ip6t_policy.o
16obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o 17obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
17obj-$(CONFIG_IP6_NF_MATCH_MULTIPORT) += ip6t_multiport.o 18obj-$(CONFIG_IP6_NF_MATCH_MULTIPORT) += ip6t_multiport.o
18obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o 19obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 95d469271c4d..1390370186d9 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -14,7 +14,10 @@
14 * 06 Jun 2002 Andras Kis-Szabo <kisza@sch.bme.hu> 14 * 06 Jun 2002 Andras Kis-Szabo <kisza@sch.bme.hu>
15 * - new extension header parser code 15 * - new extension header parser code
16 */ 16 */
17
18#include <linux/capability.h>
17#include <linux/config.h> 19#include <linux/config.h>
20#include <linux/in.h>
18#include <linux/skbuff.h> 21#include <linux/skbuff.h>
19#include <linux/kmod.h> 22#include <linux/kmod.h>
20#include <linux/vmalloc.h> 23#include <linux/vmalloc.h>
@@ -86,11 +89,6 @@ static DECLARE_MUTEX(ip6t_mutex);
86 context stops packets coming through and allows user context to read 89 context stops packets coming through and allows user context to read
87 the counters or update the rules. 90 the counters or update the rules.
88 91
89 To be cache friendly on SMP, we arrange them like so:
90 [ n-entries ]
91 ... cache-align padding ...
92 [ n-entries ]
93
94 Hence the start of any table is given by get_table() below. */ 92 Hence the start of any table is given by get_table() below. */
95 93
96/* The table itself */ 94/* The table itself */
@@ -108,33 +106,29 @@ struct ip6t_table_info
108 unsigned int underflow[NF_IP6_NUMHOOKS]; 106 unsigned int underflow[NF_IP6_NUMHOOKS];
109 107
110 /* ip6t_entry tables: one per CPU */ 108 /* ip6t_entry tables: one per CPU */
111 char entries[0] ____cacheline_aligned; 109 void *entries[NR_CPUS];
112}; 110};
113 111
114static LIST_HEAD(ip6t_target); 112static LIST_HEAD(ip6t_target);
115static LIST_HEAD(ip6t_match); 113static LIST_HEAD(ip6t_match);
116static LIST_HEAD(ip6t_tables); 114static LIST_HEAD(ip6t_tables);
115#define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
117#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) 116#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
118 117
119#ifdef CONFIG_SMP
120#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
121#else
122#define TABLE_OFFSET(t,p) 0
123#endif
124
125#if 0 118#if 0
126#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0) 119#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
127#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; }) 120#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
128#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0) 121#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
129#endif 122#endif
130 123
131static int ip6_masked_addrcmp(struct in6_addr addr1, struct in6_addr mask, 124int
132 struct in6_addr addr2) 125ip6_masked_addrcmp(const struct in6_addr *addr1, const struct in6_addr *mask,
126 const struct in6_addr *addr2)
133{ 127{
134 int i; 128 int i;
135 for( i = 0; i < 16; i++){ 129 for( i = 0; i < 16; i++){
136 if((addr1.s6_addr[i] & mask.s6_addr[i]) != 130 if((addr1->s6_addr[i] & mask->s6_addr[i]) !=
137 (addr2.s6_addr[i] & mask.s6_addr[i])) 131 (addr2->s6_addr[i] & mask->s6_addr[i]))
138 return 1; 132 return 1;
139 } 133 }
140 return 0; 134 return 0;
@@ -168,10 +162,10 @@ ip6_packet_match(const struct sk_buff *skb,
168 162
169#define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg)) 163#define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg))
170 164
171 if (FWINV(ip6_masked_addrcmp(ipv6->saddr,ip6info->smsk,ip6info->src), 165 if (FWINV(ip6_masked_addrcmp(&ipv6->saddr, &ip6info->smsk,
172 IP6T_INV_SRCIP) 166 &ip6info->src), IP6T_INV_SRCIP)
173 || FWINV(ip6_masked_addrcmp(ipv6->daddr,ip6info->dmsk,ip6info->dst), 167 || FWINV(ip6_masked_addrcmp(&ipv6->daddr, &ip6info->dmsk,
174 IP6T_INV_DSTIP)) { 168 &ip6info->dst), IP6T_INV_DSTIP)) {
175 dprintf("Source or dest mismatch.\n"); 169 dprintf("Source or dest mismatch.\n");
176/* 170/*
177 dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr, 171 dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
@@ -214,69 +208,21 @@ ip6_packet_match(const struct sk_buff *skb,
214 208
215 /* look for the desired protocol header */ 209 /* look for the desired protocol header */
216 if((ip6info->flags & IP6T_F_PROTO)) { 210 if((ip6info->flags & IP6T_F_PROTO)) {
217 u_int8_t currenthdr = ipv6->nexthdr; 211 int protohdr;
218 struct ipv6_opt_hdr _hdr, *hp; 212 unsigned short _frag_off;
219 u_int16_t ptr; /* Header offset in skb */
220 u_int16_t hdrlen; /* Header */
221 u_int16_t _fragoff = 0, *fp = NULL;
222
223 ptr = IPV6_HDR_LEN;
224
225 while (ip6t_ext_hdr(currenthdr)) {
226 /* Is there enough space for the next ext header? */
227 if (skb->len - ptr < IPV6_OPTHDR_LEN)
228 return 0;
229
230 /* NONE or ESP: there isn't protocol part */
231 /* If we want to count these packets in '-p all',
232 * we will change the return 0 to 1*/
233 if ((currenthdr == IPPROTO_NONE) ||
234 (currenthdr == IPPROTO_ESP))
235 break;
236 213
237 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); 214 protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off);
238 BUG_ON(hp == NULL); 215 if (protohdr < 0)
239 216 return 0;
240 /* Size calculation */
241 if (currenthdr == IPPROTO_FRAGMENT) {
242 fp = skb_header_pointer(skb,
243 ptr+offsetof(struct frag_hdr,
244 frag_off),
245 sizeof(_fragoff),
246 &_fragoff);
247 if (fp == NULL)
248 return 0;
249
250 _fragoff = ntohs(*fp) & ~0x7;
251 hdrlen = 8;
252 } else if (currenthdr == IPPROTO_AH)
253 hdrlen = (hp->hdrlen+2)<<2;
254 else
255 hdrlen = ipv6_optlen(hp);
256
257 currenthdr = hp->nexthdr;
258 ptr += hdrlen;
259 /* ptr is too large */
260 if ( ptr > skb->len )
261 return 0;
262 if (_fragoff) {
263 if (ip6t_ext_hdr(currenthdr))
264 return 0;
265 break;
266 }
267 }
268
269 *protoff = ptr;
270 *fragoff = _fragoff;
271 217
272 /* currenthdr contains the protocol header */ 218 *fragoff = _frag_off;
273 219
274 dprintf("Packet protocol %hi ?= %s%hi.\n", 220 dprintf("Packet protocol %hi ?= %s%hi.\n",
275 currenthdr, 221 protohdr,
276 ip6info->invflags & IP6T_INV_PROTO ? "!":"", 222 ip6info->invflags & IP6T_INV_PROTO ? "!":"",
277 ip6info->proto); 223 ip6info->proto);
278 224
279 if (ip6info->proto == currenthdr) { 225 if (ip6info->proto == protohdr) {
280 if(ip6info->invflags & IP6T_INV_PROTO) { 226 if(ip6info->invflags & IP6T_INV_PROTO) {
281 return 0; 227 return 0;
282 } 228 }
@@ -376,8 +322,7 @@ ip6t_do_table(struct sk_buff **pskb,
376 322
377 read_lock_bh(&table->lock); 323 read_lock_bh(&table->lock);
378 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 324 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
379 table_base = (void *)table->private->entries 325 table_base = (void *)table->private->entries[smp_processor_id()];
380 + TABLE_OFFSET(table->private, smp_processor_id());
381 e = get_entry(table_base, table->private->hook_entry[hook]); 326 e = get_entry(table_base, table->private->hook_entry[hook]);
382 327
383#ifdef CONFIG_NETFILTER_DEBUG 328#ifdef CONFIG_NETFILTER_DEBUG
@@ -649,7 +594,8 @@ unconditional(const struct ip6t_ip6 *ipv6)
649/* Figures out from what hook each rule can be called: returns 0 if 594/* Figures out from what hook each rule can be called: returns 0 if
650 there are loops. Puts hook bitmask in comefrom. */ 595 there are loops. Puts hook bitmask in comefrom. */
651static int 596static int
652mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) 597mark_source_chains(struct ip6t_table_info *newinfo,
598 unsigned int valid_hooks, void *entry0)
653{ 599{
654 unsigned int hook; 600 unsigned int hook;
655 601
@@ -658,7 +604,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
658 for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) { 604 for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) {
659 unsigned int pos = newinfo->hook_entry[hook]; 605 unsigned int pos = newinfo->hook_entry[hook];
660 struct ip6t_entry *e 606 struct ip6t_entry *e
661 = (struct ip6t_entry *)(newinfo->entries + pos); 607 = (struct ip6t_entry *)(entry0 + pos);
662 608
663 if (!(valid_hooks & (1 << hook))) 609 if (!(valid_hooks & (1 << hook)))
664 continue; 610 continue;
@@ -708,13 +654,13 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
708 goto next; 654 goto next;
709 655
710 e = (struct ip6t_entry *) 656 e = (struct ip6t_entry *)
711 (newinfo->entries + pos); 657 (entry0 + pos);
712 } while (oldpos == pos + e->next_offset); 658 } while (oldpos == pos + e->next_offset);
713 659
714 /* Move along one */ 660 /* Move along one */
715 size = e->next_offset; 661 size = e->next_offset;
716 e = (struct ip6t_entry *) 662 e = (struct ip6t_entry *)
717 (newinfo->entries + pos + size); 663 (entry0 + pos + size);
718 e->counters.pcnt = pos; 664 e->counters.pcnt = pos;
719 pos += size; 665 pos += size;
720 } else { 666 } else {
@@ -731,7 +677,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
731 newpos = pos + e->next_offset; 677 newpos = pos + e->next_offset;
732 } 678 }
733 e = (struct ip6t_entry *) 679 e = (struct ip6t_entry *)
734 (newinfo->entries + newpos); 680 (entry0 + newpos);
735 e->counters.pcnt = pos; 681 e->counters.pcnt = pos;
736 pos = newpos; 682 pos = newpos;
737 } 683 }
@@ -941,6 +887,7 @@ static int
941translate_table(const char *name, 887translate_table(const char *name,
942 unsigned int valid_hooks, 888 unsigned int valid_hooks,
943 struct ip6t_table_info *newinfo, 889 struct ip6t_table_info *newinfo,
890 void *entry0,
944 unsigned int size, 891 unsigned int size,
945 unsigned int number, 892 unsigned int number,
946 const unsigned int *hook_entries, 893 const unsigned int *hook_entries,
@@ -961,11 +908,11 @@ translate_table(const char *name,
961 duprintf("translate_table: size %u\n", newinfo->size); 908 duprintf("translate_table: size %u\n", newinfo->size);
962 i = 0; 909 i = 0;
963 /* Walk through entries, checking offsets. */ 910 /* Walk through entries, checking offsets. */
964 ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, 911 ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
965 check_entry_size_and_hooks, 912 check_entry_size_and_hooks,
966 newinfo, 913 newinfo,
967 newinfo->entries, 914 entry0,
968 newinfo->entries + size, 915 entry0 + size,
969 hook_entries, underflows, &i); 916 hook_entries, underflows, &i);
970 if (ret != 0) 917 if (ret != 0)
971 return ret; 918 return ret;
@@ -993,27 +940,24 @@ translate_table(const char *name,
993 } 940 }
994 } 941 }
995 942
996 if (!mark_source_chains(newinfo, valid_hooks)) 943 if (!mark_source_chains(newinfo, valid_hooks, entry0))
997 return -ELOOP; 944 return -ELOOP;
998 945
999 /* Finally, each sanity check must pass */ 946 /* Finally, each sanity check must pass */
1000 i = 0; 947 i = 0;
1001 ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, 948 ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
1002 check_entry, name, size, &i); 949 check_entry, name, size, &i);
1003 950
1004 if (ret != 0) { 951 if (ret != 0) {
1005 IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, 952 IP6T_ENTRY_ITERATE(entry0, newinfo->size,
1006 cleanup_entry, &i); 953 cleanup_entry, &i);
1007 return ret; 954 return ret;
1008 } 955 }
1009 956
1010 /* And one copy for every other CPU */ 957 /* And one copy for every other CPU */
1011 for_each_cpu(i) { 958 for_each_cpu(i) {
1012 if (i == 0) 959 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
1013 continue; 960 memcpy(newinfo->entries[i], entry0, newinfo->size);
1014 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
1015 newinfo->entries,
1016 SMP_ALIGN(newinfo->size));
1017 } 961 }
1018 962
1019 return ret; 963 return ret;
@@ -1029,15 +973,12 @@ replace_table(struct ip6t_table *table,
1029 973
1030#ifdef CONFIG_NETFILTER_DEBUG 974#ifdef CONFIG_NETFILTER_DEBUG
1031 { 975 {
1032 struct ip6t_entry *table_base; 976 int cpu;
1033 unsigned int i;
1034 977
1035 for_each_cpu(i) { 978 for_each_cpu(cpu) {
1036 table_base = 979 struct ip6t_entry *table_base = newinfo->entries[cpu];
1037 (void *)newinfo->entries 980 if (table_base)
1038 + TABLE_OFFSET(newinfo, i); 981 table_base->comefrom = 0xdead57ac;
1039
1040 table_base->comefrom = 0xdead57ac;
1041 } 982 }
1042 } 983 }
1043#endif 984#endif
@@ -1072,16 +1013,44 @@ add_entry_to_counter(const struct ip6t_entry *e,
1072 return 0; 1013 return 0;
1073} 1014}
1074 1015
1016static inline int
1017set_entry_to_counter(const struct ip6t_entry *e,
1018 struct ip6t_counters total[],
1019 unsigned int *i)
1020{
1021 SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
1022
1023 (*i)++;
1024 return 0;
1025}
1026
1075static void 1027static void
1076get_counters(const struct ip6t_table_info *t, 1028get_counters(const struct ip6t_table_info *t,
1077 struct ip6t_counters counters[]) 1029 struct ip6t_counters counters[])
1078{ 1030{
1079 unsigned int cpu; 1031 unsigned int cpu;
1080 unsigned int i; 1032 unsigned int i;
1033 unsigned int curcpu;
1034
1035 /* Instead of clearing (by a previous call to memset())
1036 * the counters and using adds, we set the counters
1037 * with data used by 'current' CPU
1038 * We dont care about preemption here.
1039 */
1040 curcpu = raw_smp_processor_id();
1041
1042 i = 0;
1043 IP6T_ENTRY_ITERATE(t->entries[curcpu],
1044 t->size,
1045 set_entry_to_counter,
1046 counters,
1047 &i);
1081 1048
1082 for_each_cpu(cpu) { 1049 for_each_cpu(cpu) {
1050 if (cpu == curcpu)
1051 continue;
1083 i = 0; 1052 i = 0;
1084 IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), 1053 IP6T_ENTRY_ITERATE(t->entries[cpu],
1085 t->size, 1054 t->size,
1086 add_entry_to_counter, 1055 add_entry_to_counter,
1087 counters, 1056 counters,
@@ -1098,6 +1067,7 @@ copy_entries_to_user(unsigned int total_size,
1098 struct ip6t_entry *e; 1067 struct ip6t_entry *e;
1099 struct ip6t_counters *counters; 1068 struct ip6t_counters *counters;
1100 int ret = 0; 1069 int ret = 0;
1070 void *loc_cpu_entry;
1101 1071
1102 /* We need atomic snapshot of counters: rest doesn't change 1072 /* We need atomic snapshot of counters: rest doesn't change
1103 (other than comefrom, which userspace doesn't care 1073 (other than comefrom, which userspace doesn't care
@@ -1109,13 +1079,13 @@ copy_entries_to_user(unsigned int total_size,
1109 return -ENOMEM; 1079 return -ENOMEM;
1110 1080
1111 /* First, sum counters... */ 1081 /* First, sum counters... */
1112 memset(counters, 0, countersize);
1113 write_lock_bh(&table->lock); 1082 write_lock_bh(&table->lock);
1114 get_counters(table->private, counters); 1083 get_counters(table->private, counters);
1115 write_unlock_bh(&table->lock); 1084 write_unlock_bh(&table->lock);
1116 1085
1117 /* ... then copy entire thing from CPU 0... */ 1086 /* choose the copy that is on ourc node/cpu */
1118 if (copy_to_user(userptr, table->private->entries, total_size) != 0) { 1087 loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
1088 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
1119 ret = -EFAULT; 1089 ret = -EFAULT;
1120 goto free_counters; 1090 goto free_counters;
1121 } 1091 }
@@ -1127,7 +1097,7 @@ copy_entries_to_user(unsigned int total_size,
1127 struct ip6t_entry_match *m; 1097 struct ip6t_entry_match *m;
1128 struct ip6t_entry_target *t; 1098 struct ip6t_entry_target *t;
1129 1099
1130 e = (struct ip6t_entry *)(table->private->entries + off); 1100 e = (struct ip6t_entry *)(loc_cpu_entry + off);
1131 if (copy_to_user(userptr + off 1101 if (copy_to_user(userptr + off
1132 + offsetof(struct ip6t_entry, counters), 1102 + offsetof(struct ip6t_entry, counters),
1133 &counters[num], 1103 &counters[num],
@@ -1196,6 +1166,46 @@ get_entries(const struct ip6t_get_entries *entries,
1196 return ret; 1166 return ret;
1197} 1167}
1198 1168
1169static void free_table_info(struct ip6t_table_info *info)
1170{
1171 int cpu;
1172 for_each_cpu(cpu) {
1173 if (info->size <= PAGE_SIZE)
1174 kfree(info->entries[cpu]);
1175 else
1176 vfree(info->entries[cpu]);
1177 }
1178 kfree(info);
1179}
1180
1181static struct ip6t_table_info *alloc_table_info(unsigned int size)
1182{
1183 struct ip6t_table_info *newinfo;
1184 int cpu;
1185
1186 newinfo = kzalloc(sizeof(struct ip6t_table_info), GFP_KERNEL);
1187 if (!newinfo)
1188 return NULL;
1189
1190 newinfo->size = size;
1191
1192 for_each_cpu(cpu) {
1193 if (size <= PAGE_SIZE)
1194 newinfo->entries[cpu] = kmalloc_node(size,
1195 GFP_KERNEL,
1196 cpu_to_node(cpu));
1197 else
1198 newinfo->entries[cpu] = vmalloc_node(size,
1199 cpu_to_node(cpu));
1200 if (newinfo->entries[cpu] == NULL) {
1201 free_table_info(newinfo);
1202 return NULL;
1203 }
1204 }
1205
1206 return newinfo;
1207}
1208
1199static int 1209static int
1200do_replace(void __user *user, unsigned int len) 1210do_replace(void __user *user, unsigned int len)
1201{ 1211{
@@ -1204,6 +1214,7 @@ do_replace(void __user *user, unsigned int len)
1204 struct ip6t_table *t; 1214 struct ip6t_table *t;
1205 struct ip6t_table_info *newinfo, *oldinfo; 1215 struct ip6t_table_info *newinfo, *oldinfo;
1206 struct ip6t_counters *counters; 1216 struct ip6t_counters *counters;
1217 void *loc_cpu_entry, *loc_cpu_old_entry;
1207 1218
1208 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1219 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1209 return -EFAULT; 1220 return -EFAULT;
@@ -1212,13 +1223,13 @@ do_replace(void __user *user, unsigned int len)
1212 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages) 1223 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1213 return -ENOMEM; 1224 return -ENOMEM;
1214 1225
1215 newinfo = vmalloc(sizeof(struct ip6t_table_info) 1226 newinfo = alloc_table_info(tmp.size);
1216 + SMP_ALIGN(tmp.size) *
1217 (highest_possible_processor_id()+1));
1218 if (!newinfo) 1227 if (!newinfo)
1219 return -ENOMEM; 1228 return -ENOMEM;
1220 1229
1221 if (copy_from_user(newinfo->entries, user + sizeof(tmp), 1230 /* choose the copy that is on our node/cpu */
1231 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1232 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1222 tmp.size) != 0) { 1233 tmp.size) != 0) {
1223 ret = -EFAULT; 1234 ret = -EFAULT;
1224 goto free_newinfo; 1235 goto free_newinfo;
@@ -1229,10 +1240,9 @@ do_replace(void __user *user, unsigned int len)
1229 ret = -ENOMEM; 1240 ret = -ENOMEM;
1230 goto free_newinfo; 1241 goto free_newinfo;
1231 } 1242 }
1232 memset(counters, 0, tmp.num_counters * sizeof(struct ip6t_counters));
1233 1243
1234 ret = translate_table(tmp.name, tmp.valid_hooks, 1244 ret = translate_table(tmp.name, tmp.valid_hooks,
1235 newinfo, tmp.size, tmp.num_entries, 1245 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1236 tmp.hook_entry, tmp.underflow); 1246 tmp.hook_entry, tmp.underflow);
1237 if (ret != 0) 1247 if (ret != 0)
1238 goto free_newinfo_counters; 1248 goto free_newinfo_counters;
@@ -1271,8 +1281,9 @@ do_replace(void __user *user, unsigned int len)
1271 /* Get the old counters. */ 1281 /* Get the old counters. */
1272 get_counters(oldinfo, counters); 1282 get_counters(oldinfo, counters);
1273 /* Decrease module usage counts and free resource */ 1283 /* Decrease module usage counts and free resource */
1274 IP6T_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL); 1284 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1275 vfree(oldinfo); 1285 IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
1286 free_table_info(oldinfo);
1276 if (copy_to_user(tmp.counters, counters, 1287 if (copy_to_user(tmp.counters, counters,
1277 sizeof(struct ip6t_counters) * tmp.num_counters) != 0) 1288 sizeof(struct ip6t_counters) * tmp.num_counters) != 0)
1278 ret = -EFAULT; 1289 ret = -EFAULT;
@@ -1284,11 +1295,11 @@ do_replace(void __user *user, unsigned int len)
1284 module_put(t->me); 1295 module_put(t->me);
1285 up(&ip6t_mutex); 1296 up(&ip6t_mutex);
1286 free_newinfo_counters_untrans: 1297 free_newinfo_counters_untrans:
1287 IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL); 1298 IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
1288 free_newinfo_counters: 1299 free_newinfo_counters:
1289 vfree(counters); 1300 vfree(counters);
1290 free_newinfo: 1301 free_newinfo:
1291 vfree(newinfo); 1302 free_table_info(newinfo);
1292 return ret; 1303 return ret;
1293} 1304}
1294 1305
@@ -1321,6 +1332,7 @@ do_add_counters(void __user *user, unsigned int len)
1321 struct ip6t_counters_info tmp, *paddc; 1332 struct ip6t_counters_info tmp, *paddc;
1322 struct ip6t_table *t; 1333 struct ip6t_table *t;
1323 int ret = 0; 1334 int ret = 0;
1335 void *loc_cpu_entry;
1324 1336
1325 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) 1337 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1326 return -EFAULT; 1338 return -EFAULT;
@@ -1350,7 +1362,9 @@ do_add_counters(void __user *user, unsigned int len)
1350 } 1362 }
1351 1363
1352 i = 0; 1364 i = 0;
1353 IP6T_ENTRY_ITERATE(t->private->entries, 1365 /* Choose the copy that is on our node */
1366 loc_cpu_entry = t->private->entries[smp_processor_id()];
1367 IP6T_ENTRY_ITERATE(loc_cpu_entry,
1354 t->private->size, 1368 t->private->size,
1355 add_counter_to_entry, 1369 add_counter_to_entry,
1356 paddc->counters, 1370 paddc->counters,
@@ -1543,28 +1557,29 @@ int ip6t_register_table(struct ip6t_table *table,
1543 struct ip6t_table_info *newinfo; 1557 struct ip6t_table_info *newinfo;
1544 static struct ip6t_table_info bootstrap 1558 static struct ip6t_table_info bootstrap
1545 = { 0, 0, 0, { 0 }, { 0 }, { } }; 1559 = { 0, 0, 0, { 0 }, { 0 }, { } };
1560 void *loc_cpu_entry;
1546 1561
1547 newinfo = vmalloc(sizeof(struct ip6t_table_info) 1562 newinfo = alloc_table_info(repl->size);
1548 + SMP_ALIGN(repl->size) *
1549 (highest_possible_processor_id()+1));
1550 if (!newinfo) 1563 if (!newinfo)
1551 return -ENOMEM; 1564 return -ENOMEM;
1552 1565
1553 memcpy(newinfo->entries, repl->entries, repl->size); 1566 /* choose the copy on our node/cpu */
1567 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1568 memcpy(loc_cpu_entry, repl->entries, repl->size);
1554 1569
1555 ret = translate_table(table->name, table->valid_hooks, 1570 ret = translate_table(table->name, table->valid_hooks,
1556 newinfo, repl->size, 1571 newinfo, loc_cpu_entry, repl->size,
1557 repl->num_entries, 1572 repl->num_entries,
1558 repl->hook_entry, 1573 repl->hook_entry,
1559 repl->underflow); 1574 repl->underflow);
1560 if (ret != 0) { 1575 if (ret != 0) {
1561 vfree(newinfo); 1576 free_table_info(newinfo);
1562 return ret; 1577 return ret;
1563 } 1578 }
1564 1579
1565 ret = down_interruptible(&ip6t_mutex); 1580 ret = down_interruptible(&ip6t_mutex);
1566 if (ret != 0) { 1581 if (ret != 0) {
1567 vfree(newinfo); 1582 free_table_info(newinfo);
1568 return ret; 1583 return ret;
1569 } 1584 }
1570 1585
@@ -1593,20 +1608,23 @@ int ip6t_register_table(struct ip6t_table *table,
1593 return ret; 1608 return ret;
1594 1609
1595 free_unlock: 1610 free_unlock:
1596 vfree(newinfo); 1611 free_table_info(newinfo);
1597 goto unlock; 1612 goto unlock;
1598} 1613}
1599 1614
1600void ip6t_unregister_table(struct ip6t_table *table) 1615void ip6t_unregister_table(struct ip6t_table *table)
1601{ 1616{
1617 void *loc_cpu_entry;
1618
1602 down(&ip6t_mutex); 1619 down(&ip6t_mutex);
1603 LIST_DELETE(&ip6t_tables, table); 1620 LIST_DELETE(&ip6t_tables, table);
1604 up(&ip6t_mutex); 1621 up(&ip6t_mutex);
1605 1622
1606 /* Decrease module usage counts and free resources */ 1623 /* Decrease module usage counts and free resources */
1607 IP6T_ENTRY_ITERATE(table->private->entries, table->private->size, 1624 loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
1625 IP6T_ENTRY_ITERATE(loc_cpu_entry, table->private->size,
1608 cleanup_entry, NULL); 1626 cleanup_entry, NULL);
1609 vfree(table->private); 1627 free_table_info(table->private);
1610} 1628}
1611 1629
1612/* Returns 1 if the port is matched by the range, 0 otherwise */ 1630/* Returns 1 if the port is matched by the range, 0 otherwise */
@@ -2035,26 +2053,39 @@ static void __exit fini(void)
2035} 2053}
2036 2054
2037/* 2055/*
2038 * find specified header up to transport protocol header. 2056 * find the offset to specified header or the protocol number of last header
2039 * If found target header, the offset to the header is set to *offset 2057 * if target < 0. "last header" is transport protocol header, ESP, or
2040 * and return 0. otherwise, return -1. 2058 * "No next header".
2059 *
2060 * If target header is found, its offset is set in *offset and return protocol
2061 * number. Otherwise, return -1.
2062 *
2063 * Note that non-1st fragment is special case that "the protocol number
2064 * of last header" is "next header" field in Fragment header. In this case,
2065 * *offset is meaningless and fragment offset is stored in *fragoff if fragoff
2066 * isn't NULL.
2041 * 2067 *
2042 * Notes: - non-1st Fragment Header isn't skipped.
2043 * - ESP header isn't skipped.
2044 * - The target header may be trancated.
2045 */ 2068 */
2046int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target) 2069int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
2070 int target, unsigned short *fragoff)
2047{ 2071{
2048 unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data; 2072 unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data;
2049 u8 nexthdr = skb->nh.ipv6h->nexthdr; 2073 u8 nexthdr = skb->nh.ipv6h->nexthdr;
2050 unsigned int len = skb->len - start; 2074 unsigned int len = skb->len - start;
2051 2075
2076 if (fragoff)
2077 *fragoff = 0;
2078
2052 while (nexthdr != target) { 2079 while (nexthdr != target) {
2053 struct ipv6_opt_hdr _hdr, *hp; 2080 struct ipv6_opt_hdr _hdr, *hp;
2054 unsigned int hdrlen; 2081 unsigned int hdrlen;
2055 2082
2056 if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) 2083 if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
2084 if (target < 0)
2085 break;
2057 return -1; 2086 return -1;
2087 }
2088
2058 hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); 2089 hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
2059 if (hp == NULL) 2090 if (hp == NULL)
2060 return -1; 2091 return -1;
@@ -2068,8 +2099,17 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target)
2068 if (fp == NULL) 2099 if (fp == NULL)
2069 return -1; 2100 return -1;
2070 2101
2071 if (ntohs(*fp) & ~0x7) 2102 _frag_off = ntohs(*fp) & ~0x7;
2103 if (_frag_off) {
2104 if (target < 0 &&
2105 ((!ipv6_ext_hdr(hp->nexthdr)) ||
2106 nexthdr == NEXTHDR_NONE)) {
2107 if (fragoff)
2108 *fragoff = _frag_off;
2109 return hp->nexthdr;
2110 }
2072 return -1; 2111 return -1;
2112 }
2073 hdrlen = 8; 2113 hdrlen = 8;
2074 } else if (nexthdr == NEXTHDR_AUTH) 2114 } else if (nexthdr == NEXTHDR_AUTH)
2075 hdrlen = (hp->hdrlen + 2) << 2; 2115 hdrlen = (hp->hdrlen + 2) << 2;
@@ -2082,7 +2122,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target)
2082 } 2122 }
2083 2123
2084 *offset = start; 2124 *offset = start;
2085 return 0; 2125 return nexthdr;
2086} 2126}
2087 2127
2088EXPORT_SYMBOL(ip6t_register_table); 2128EXPORT_SYMBOL(ip6t_register_table);
@@ -2094,6 +2134,7 @@ EXPORT_SYMBOL(ip6t_register_target);
2094EXPORT_SYMBOL(ip6t_unregister_target); 2134EXPORT_SYMBOL(ip6t_unregister_target);
2095EXPORT_SYMBOL(ip6t_ext_hdr); 2135EXPORT_SYMBOL(ip6t_ext_hdr);
2096EXPORT_SYMBOL(ipv6_find_hdr); 2136EXPORT_SYMBOL(ipv6_find_hdr);
2137EXPORT_SYMBOL(ip6_masked_addrcmp);
2097 2138
2098module_init(init); 2139module_init(init);
2099module_exit(fini); 2140module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 0cd1d1bd9033..ae4653bfd654 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -13,6 +13,7 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/moduleparam.h> 14#include <linux/moduleparam.h>
15#include <linux/skbuff.h> 15#include <linux/skbuff.h>
16#include <linux/if_arp.h>
16#include <linux/ip.h> 17#include <linux/ip.h>
17#include <linux/spinlock.h> 18#include <linux/spinlock.h>
18#include <linux/icmpv6.h> 19#include <linux/icmpv6.h>
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index dde37793d20b..f5c1a7ff4a1f 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -9,6 +9,7 @@
9 9
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/skbuff.h> 11#include <linux/skbuff.h>
12#include <linux/ip.h>
12#include <linux/ipv6.h> 13#include <linux/ipv6.h>
13#include <linux/types.h> 14#include <linux/types.h>
14#include <net/checksum.h> 15#include <net/checksum.h>
@@ -53,7 +54,7 @@ match(const struct sk_buff *skb,
53 unsigned int ptr; 54 unsigned int ptr;
54 unsigned int hdrlen = 0; 55 unsigned int hdrlen = 0;
55 56
56 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH) < 0) 57 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL) < 0)
57 return 0; 58 return 0;
58 59
59 ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah); 60 ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c
index c450a635e54b..48cf5f9efc95 100644
--- a/net/ipv6/netfilter/ip6t_dst.c
+++ b/net/ipv6/netfilter/ip6t_dst.c
@@ -71,9 +71,9 @@ match(const struct sk_buff *skb,
71 unsigned int optlen; 71 unsigned int optlen;
72 72
73#if HOPBYHOP 73#if HOPBYHOP
74 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0) 74 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0)
75#else 75#else
76 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0) 76 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0)
77#endif 77#endif
78 return 0; 78 return 0;
79 79
diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c
index 24bc0cde43a1..e1828f6d0a40 100644
--- a/net/ipv6/netfilter/ip6t_esp.c
+++ b/net/ipv6/netfilter/ip6t_esp.c
@@ -9,6 +9,7 @@
9 9
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/skbuff.h> 11#include <linux/skbuff.h>
12#include <linux/ip.h>
12#include <linux/ipv6.h> 13#include <linux/ipv6.h>
13#include <linux/types.h> 14#include <linux/types.h>
14#include <net/checksum.h> 15#include <net/checksum.h>
@@ -55,7 +56,7 @@ match(const struct sk_buff *skb,
55 /* Make sure this isn't an evil packet */ 56 /* Make sure this isn't an evil packet */
56 /*DEBUGP("ipv6_esp entered \n");*/ 57 /*DEBUGP("ipv6_esp entered \n");*/
57 58
58 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP) < 0) 59 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP, NULL) < 0)
59 return 0; 60 return 0;
60 61
61 eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp); 62 eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp);
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 085d5f8eea29..d1549b268669 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -52,7 +52,7 @@ match(const struct sk_buff *skb,
52 const struct ip6t_frag *fraginfo = matchinfo; 52 const struct ip6t_frag *fraginfo = matchinfo;
53 unsigned int ptr; 53 unsigned int ptr;
54 54
55 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT) < 0) 55 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL) < 0)
56 return 0; 56 return 0;
57 57
58 fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag); 58 fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 1d09485111d0..e3bc8e2700e7 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -71,9 +71,9 @@ match(const struct sk_buff *skb,
71 unsigned int optlen; 71 unsigned int optlen;
72 72
73#if HOPBYHOP 73#if HOPBYHOP
74 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0) 74 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0)
75#else 75#else
76 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0) 76 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0)
77#endif 77#endif
78 return 0; 78 return 0;
79 79
diff --git a/net/ipv6/netfilter/ip6t_mac.c b/net/ipv6/netfilter/ip6t_mac.c
index 526d43e37234..c848152315bc 100644
--- a/net/ipv6/netfilter/ip6t_mac.c
+++ b/net/ipv6/netfilter/ip6t_mac.c
@@ -11,6 +11,7 @@
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/skbuff.h> 12#include <linux/skbuff.h>
13#include <linux/if_ether.h> 13#include <linux/if_ether.h>
14#include <linux/etherdevice.h>
14 15
15#include <linux/netfilter_ipv6/ip6t_mac.h> 16#include <linux/netfilter_ipv6/ip6t_mac.h>
16#include <linux/netfilter_ipv6/ip6_tables.h> 17#include <linux/netfilter_ipv6/ip6_tables.h>
@@ -34,8 +35,8 @@ match(const struct sk_buff *skb,
34 return (skb->mac.raw >= skb->head 35 return (skb->mac.raw >= skb->head
35 && (skb->mac.raw + ETH_HLEN) <= skb->data 36 && (skb->mac.raw + ETH_HLEN) <= skb->data
36 /* If so, compare... */ 37 /* If so, compare... */
37 && ((memcmp(eth_hdr(skb)->h_source, info->srcaddr, ETH_ALEN) 38 && ((!compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr))
38 == 0) ^ info->invert)); 39 ^ info->invert));
39} 40}
40 41
41static int 42static int
diff --git a/net/ipv6/netfilter/ip6t_policy.c b/net/ipv6/netfilter/ip6t_policy.c
new file mode 100644
index 000000000000..13fedad48c1d
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_policy.c
@@ -0,0 +1,175 @@
1/* IP tables module for matching IPsec policy
2 *
3 * Copyright (c) 2004,2005 Patrick McHardy, <kaber@trash.net>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/kernel.h>
11#include <linux/config.h>
12#include <linux/module.h>
13#include <linux/skbuff.h>
14#include <linux/init.h>
15#include <net/xfrm.h>
16
17#include <linux/netfilter_ipv6.h>
18#include <linux/netfilter_ipv6/ip6_tables.h>
19#include <linux/netfilter_ipv6/ip6t_policy.h>
20
21MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
22MODULE_DESCRIPTION("IPtables IPsec policy matching module");
23MODULE_LICENSE("GPL");
24
25
26static inline int
27match_xfrm_state(struct xfrm_state *x, const struct ip6t_policy_elem *e)
28{
29#define MATCH_ADDR(x,y,z) (!e->match.x || \
30 ((ip6_masked_addrcmp((z), &e->x, &e->y)) == 0) ^ e->invert.x)
31#define MATCH(x,y) (!e->match.x || ((e->x == (y)) ^ e->invert.x))
32
33 return MATCH_ADDR(saddr, smask, (struct in6_addr *)&x->props.saddr.a6) &&
34 MATCH_ADDR(daddr, dmask, (struct in6_addr *)&x->id.daddr.a6) &&
35 MATCH(proto, x->id.proto) &&
36 MATCH(mode, x->props.mode) &&
37 MATCH(spi, x->id.spi) &&
38 MATCH(reqid, x->props.reqid);
39}
40
41static int
42match_policy_in(const struct sk_buff *skb, const struct ip6t_policy_info *info)
43{
44 const struct ip6t_policy_elem *e;
45 struct sec_path *sp = skb->sp;
46 int strict = info->flags & IP6T_POLICY_MATCH_STRICT;
47 int i, pos;
48
49 if (sp == NULL)
50 return -1;
51 if (strict && info->len != sp->len)
52 return 0;
53
54 for (i = sp->len - 1; i >= 0; i--) {
55 pos = strict ? i - sp->len + 1 : 0;
56 if (pos >= info->len)
57 return 0;
58 e = &info->pol[pos];
59
60 if (match_xfrm_state(sp->x[i].xvec, e)) {
61 if (!strict)
62 return 1;
63 } else if (strict)
64 return 0;
65 }
66
67 return strict ? 1 : 0;
68}
69
70static int
71match_policy_out(const struct sk_buff *skb, const struct ip6t_policy_info *info)
72{
73 const struct ip6t_policy_elem *e;
74 struct dst_entry *dst = skb->dst;
75 int strict = info->flags & IP6T_POLICY_MATCH_STRICT;
76 int i, pos;
77
78 if (dst->xfrm == NULL)
79 return -1;
80
81 for (i = 0; dst && dst->xfrm; dst = dst->child, i++) {
82 pos = strict ? i : 0;
83 if (pos >= info->len)
84 return 0;
85 e = &info->pol[pos];
86
87 if (match_xfrm_state(dst->xfrm, e)) {
88 if (!strict)
89 return 1;
90 } else if (strict)
91 return 0;
92 }
93
94 return strict ? 1 : 0;
95}
96
97static int match(const struct sk_buff *skb,
98 const struct net_device *in,
99 const struct net_device *out,
100 const void *matchinfo,
101 int offset,
102 unsigned int protoff,
103 int *hotdrop)
104{
105 const struct ip6t_policy_info *info = matchinfo;
106 int ret;
107
108 if (info->flags & IP6T_POLICY_MATCH_IN)
109 ret = match_policy_in(skb, info);
110 else
111 ret = match_policy_out(skb, info);
112
113 if (ret < 0)
114 ret = info->flags & IP6T_POLICY_MATCH_NONE ? 1 : 0;
115 else if (info->flags & IP6T_POLICY_MATCH_NONE)
116 ret = 0;
117
118 return ret;
119}
120
121static int checkentry(const char *tablename, const struct ip6t_ip6 *ip,
122 void *matchinfo, unsigned int matchsize,
123 unsigned int hook_mask)
124{
125 struct ip6t_policy_info *info = matchinfo;
126
127 if (matchsize != IP6T_ALIGN(sizeof(*info))) {
128 printk(KERN_ERR "ip6t_policy: matchsize %u != %zu\n",
129 matchsize, IP6T_ALIGN(sizeof(*info)));
130 return 0;
131 }
132 if (!(info->flags & (IP6T_POLICY_MATCH_IN|IP6T_POLICY_MATCH_OUT))) {
133 printk(KERN_ERR "ip6t_policy: neither incoming nor "
134 "outgoing policy selected\n");
135 return 0;
136 }
137 if (hook_mask & (1 << NF_IP6_PRE_ROUTING | 1 << NF_IP6_LOCAL_IN)
138 && info->flags & IP6T_POLICY_MATCH_OUT) {
139 printk(KERN_ERR "ip6t_policy: output policy not valid in "
140 "PRE_ROUTING and INPUT\n");
141 return 0;
142 }
143 if (hook_mask & (1 << NF_IP6_POST_ROUTING | 1 << NF_IP6_LOCAL_OUT)
144 && info->flags & IP6T_POLICY_MATCH_IN) {
145 printk(KERN_ERR "ip6t_policy: input policy not valid in "
146 "POST_ROUTING and OUTPUT\n");
147 return 0;
148 }
149 if (info->len > IP6T_POLICY_MAX_ELEM) {
150 printk(KERN_ERR "ip6t_policy: too many policy elements\n");
151 return 0;
152 }
153
154 return 1;
155}
156
157static struct ip6t_match policy_match = {
158 .name = "policy",
159 .match = match,
160 .checkentry = checkentry,
161 .me = THIS_MODULE,
162};
163
164static int __init init(void)
165{
166 return ip6t_register_match(&policy_match);
167}
168
169static void __exit fini(void)
170{
171 ip6t_unregister_match(&policy_match);
172}
173
174module_init(init);
175module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index beb2fd5cebbb..c1e770e45543 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -58,7 +58,7 @@ match(const struct sk_buff *skb,
58 unsigned int ret = 0; 58 unsigned int ret = 0;
59 struct in6_addr *ap, _addr; 59 struct in6_addr *ap, _addr;
60 60
61 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING) < 0) 61 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL) < 0)
62 return 0; 62 return 0;
63 63
64 rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route); 64 rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 753a3ae8502b..e57d6fc9957a 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -335,10 +335,10 @@ static struct nf_hook_ops ipv6_conntrack_local_in_ops = {
335#ifdef CONFIG_SYSCTL 335#ifdef CONFIG_SYSCTL
336 336
337/* From nf_conntrack_proto_icmpv6.c */ 337/* From nf_conntrack_proto_icmpv6.c */
338extern unsigned long nf_ct_icmpv6_timeout; 338extern unsigned int nf_ct_icmpv6_timeout;
339 339
340/* From nf_conntrack_frag6.c */ 340/* From nf_conntrack_frag6.c */
341extern unsigned long nf_ct_frag6_timeout; 341extern unsigned int nf_ct_frag6_timeout;
342extern unsigned int nf_ct_frag6_low_thresh; 342extern unsigned int nf_ct_frag6_low_thresh;
343extern unsigned int nf_ct_frag6_high_thresh; 343extern unsigned int nf_ct_frag6_high_thresh;
344 344
@@ -401,6 +401,48 @@ static ctl_table nf_ct_net_table[] = {
401}; 401};
402#endif 402#endif
403 403
404#if defined(CONFIG_NF_CT_NETLINK) || \
405 defined(CONFIG_NF_CT_NETLINK_MODULE)
406
407#include <linux/netfilter/nfnetlink.h>
408#include <linux/netfilter/nfnetlink_conntrack.h>
409
410static int ipv6_tuple_to_nfattr(struct sk_buff *skb,
411 const struct nf_conntrack_tuple *tuple)
412{
413 NFA_PUT(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4,
414 &tuple->src.u3.ip6);
415 NFA_PUT(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4,
416 &tuple->dst.u3.ip6);
417 return 0;
418
419nfattr_failure:
420 return -1;
421}
422
423static const size_t cta_min_ip[CTA_IP_MAX] = {
424 [CTA_IP_V6_SRC-1] = sizeof(u_int32_t)*4,
425 [CTA_IP_V6_DST-1] = sizeof(u_int32_t)*4,
426};
427
428static int ipv6_nfattr_to_tuple(struct nfattr *tb[],
429 struct nf_conntrack_tuple *t)
430{
431 if (!tb[CTA_IP_V6_SRC-1] || !tb[CTA_IP_V6_DST-1])
432 return -EINVAL;
433
434 if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
435 return -EINVAL;
436
437 memcpy(&t->src.u3.ip6, NFA_DATA(tb[CTA_IP_V6_SRC-1]),
438 sizeof(u_int32_t) * 4);
439 memcpy(&t->dst.u3.ip6, NFA_DATA(tb[CTA_IP_V6_DST-1]),
440 sizeof(u_int32_t) * 4);
441
442 return 0;
443}
444#endif
445
404struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = { 446struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
405 .l3proto = PF_INET6, 447 .l3proto = PF_INET6,
406 .name = "ipv6", 448 .name = "ipv6",
@@ -409,6 +451,11 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
409 .print_tuple = ipv6_print_tuple, 451 .print_tuple = ipv6_print_tuple,
410 .print_conntrack = ipv6_print_conntrack, 452 .print_conntrack = ipv6_print_conntrack,
411 .prepare = ipv6_prepare, 453 .prepare = ipv6_prepare,
454#if defined(CONFIG_NF_CT_NETLINK) || \
455 defined(CONFIG_NF_CT_NETLINK_MODULE)
456 .tuple_to_nfattr = ipv6_tuple_to_nfattr,
457 .nfattr_to_tuple = ipv6_nfattr_to_tuple,
458#endif
412 .get_features = ipv6_get_features, 459 .get_features = ipv6_get_features,
413 .me = THIS_MODULE, 460 .me = THIS_MODULE,
414}; 461};
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index a7e03cfacd06..09945c333055 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -57,17 +57,17 @@ static int icmpv6_pkt_to_tuple(const struct sk_buff *skb,
57 return 1; 57 return 1;
58} 58}
59 59
60/* Add 1; spaces filled with 0. */
61static u_int8_t invmap[] = {
62 [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1,
63 [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1,
64 [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_QUERY + 1,
65 [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1
66};
67
60static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple, 68static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
61 const struct nf_conntrack_tuple *orig) 69 const struct nf_conntrack_tuple *orig)
62{ 70{
63 /* Add 1; spaces filled with 0. */
64 static u_int8_t invmap[] = {
65 [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1,
66 [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1,
67 [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_QUERY + 1,
68 [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1
69 };
70
71 int type = orig->dst.u.icmp.type - 128; 71 int type = orig->dst.u.icmp.type - 128;
72 if (type < 0 || type >= sizeof(invmap) || !invmap[type]) 72 if (type < 0 || type >= sizeof(invmap) || !invmap[type])
73 return 0; 73 return 0;
@@ -185,7 +185,7 @@ icmpv6_error_message(struct sk_buff *skb,
185 return -NF_ACCEPT; 185 return -NF_ACCEPT;
186 } 186 }
187 187
188 inproto = nf_ct_find_proto(PF_INET6, inprotonum); 188 inproto = __nf_ct_proto_find(PF_INET6, inprotonum);
189 189
190 /* Are they talking about one of our connections? */ 190 /* Are they talking about one of our connections? */
191 if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum, 191 if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum,
@@ -255,6 +255,60 @@ skipped:
255 return icmpv6_error_message(skb, dataoff, ctinfo, hooknum); 255 return icmpv6_error_message(skb, dataoff, ctinfo, hooknum);
256} 256}
257 257
258#if defined(CONFIG_NF_CT_NETLINK) || \
259 defined(CONFIG_NF_CT_NETLINK_MODULE)
260
261#include <linux/netfilter/nfnetlink.h>
262#include <linux/netfilter/nfnetlink_conntrack.h>
263static int icmpv6_tuple_to_nfattr(struct sk_buff *skb,
264 const struct nf_conntrack_tuple *t)
265{
266 NFA_PUT(skb, CTA_PROTO_ICMPV6_ID, sizeof(u_int16_t),
267 &t->src.u.icmp.id);
268 NFA_PUT(skb, CTA_PROTO_ICMPV6_TYPE, sizeof(u_int8_t),
269 &t->dst.u.icmp.type);
270 NFA_PUT(skb, CTA_PROTO_ICMPV6_CODE, sizeof(u_int8_t),
271 &t->dst.u.icmp.code);
272
273 return 0;
274
275nfattr_failure:
276 return -1;
277}
278
279static const size_t cta_min_proto[CTA_PROTO_MAX] = {
280 [CTA_PROTO_ICMPV6_TYPE-1] = sizeof(u_int8_t),
281 [CTA_PROTO_ICMPV6_CODE-1] = sizeof(u_int8_t),
282 [CTA_PROTO_ICMPV6_ID-1] = sizeof(u_int16_t)
283};
284
285static int icmpv6_nfattr_to_tuple(struct nfattr *tb[],
286 struct nf_conntrack_tuple *tuple)
287{
288 if (!tb[CTA_PROTO_ICMPV6_TYPE-1]
289 || !tb[CTA_PROTO_ICMPV6_CODE-1]
290 || !tb[CTA_PROTO_ICMPV6_ID-1])
291 return -EINVAL;
292
293 if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
294 return -EINVAL;
295
296 tuple->dst.u.icmp.type =
297 *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_TYPE-1]);
298 tuple->dst.u.icmp.code =
299 *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_CODE-1]);
300 tuple->src.u.icmp.id =
301 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_ID-1]);
302
303 if (tuple->dst.u.icmp.type < 128
304 || tuple->dst.u.icmp.type - 128 >= sizeof(invmap)
305 || !invmap[tuple->dst.u.icmp.type - 128])
306 return -EINVAL;
307
308 return 0;
309}
310#endif
311
258struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 = 312struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 =
259{ 313{
260 .l3proto = PF_INET6, 314 .l3proto = PF_INET6,
@@ -267,6 +321,11 @@ struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 =
267 .packet = icmpv6_packet, 321 .packet = icmpv6_packet,
268 .new = icmpv6_new, 322 .new = icmpv6_new,
269 .error = icmpv6_error, 323 .error = icmpv6_error,
324#if defined(CONFIG_NF_CT_NETLINK) || \
325 defined(CONFIG_NF_CT_NETLINK_MODULE)
326 .tuple_to_nfattr = icmpv6_tuple_to_nfattr,
327 .nfattr_to_tuple = icmpv6_nfattr_to_tuple,
328#endif
270}; 329};
271 330
272EXPORT_SYMBOL(nf_conntrack_protocol_icmpv6); 331EXPORT_SYMBOL(nf_conntrack_protocol_icmpv6);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index c2c52af9e560..f3e5ffbd592f 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -98,7 +98,7 @@ struct nf_ct_frag6_queue
98#define FRAG6Q_HASHSZ 64 98#define FRAG6Q_HASHSZ 64
99 99
100static struct nf_ct_frag6_queue *nf_ct_frag6_hash[FRAG6Q_HASHSZ]; 100static struct nf_ct_frag6_queue *nf_ct_frag6_hash[FRAG6Q_HASHSZ];
101static rwlock_t nf_ct_frag6_lock = RW_LOCK_UNLOCKED; 101static DEFINE_RWLOCK(nf_ct_frag6_lock);
102static u32 nf_ct_frag6_hash_rnd; 102static u32 nf_ct_frag6_hash_rnd;
103static LIST_HEAD(nf_ct_frag6_lru_list); 103static LIST_HEAD(nf_ct_frag6_lru_list);
104int nf_ct_frag6_nqueues = 0; 104int nf_ct_frag6_nqueues = 0;
@@ -371,7 +371,7 @@ nf_ct_frag6_create(unsigned int hash, u32 id, struct in6_addr *src, struct
371 init_timer(&fq->timer); 371 init_timer(&fq->timer);
372 fq->timer.function = nf_ct_frag6_expire; 372 fq->timer.function = nf_ct_frag6_expire;
373 fq->timer.data = (long) fq; 373 fq->timer.data = (long) fq;
374 fq->lock = SPIN_LOCK_UNLOCKED; 374 spin_lock_init(&fq->lock);
375 atomic_set(&fq->refcnt, 1); 375 atomic_set(&fq->refcnt, 1);
376 376
377 return nf_ct_frag6_intern(hash, fq); 377 return nf_ct_frag6_intern(hash, fq);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a66900cda2af..66f1d12ea578 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -32,6 +32,7 @@
32#include <linux/icmpv6.h> 32#include <linux/icmpv6.h>
33#include <linux/netfilter.h> 33#include <linux/netfilter.h>
34#include <linux/netfilter_ipv6.h> 34#include <linux/netfilter_ipv6.h>
35#include <linux/skbuff.h>
35#include <asm/uaccess.h> 36#include <asm/uaccess.h>
36#include <asm/ioctls.h> 37#include <asm/ioctls.h>
37#include <asm/bug.h> 38#include <asm/bug.h>
@@ -433,25 +434,14 @@ out:
433 return err; 434 return err;
434 435
435csum_copy_err: 436csum_copy_err:
436 /* Clear queue. */ 437 skb_kill_datagram(sk, skb, flags);
437 if (flags&MSG_PEEK) {
438 int clear = 0;
439 spin_lock_bh(&sk->sk_receive_queue.lock);
440 if (skb == skb_peek(&sk->sk_receive_queue)) {
441 __skb_unlink(skb, &sk->sk_receive_queue);
442 clear = 1;
443 }
444 spin_unlock_bh(&sk->sk_receive_queue.lock);
445 if (clear)
446 kfree_skb(skb);
447 }
448 438
449 /* Error for blocking case is chosen to masquerade 439 /* Error for blocking case is chosen to masquerade
450 as some normal condition. 440 as some normal condition.
451 */ 441 */
452 err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH; 442 err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
453 /* FIXME: increment a raw6 drops counter here */ 443 /* FIXME: increment a raw6 drops counter here */
454 goto out_free; 444 goto out;
455} 445}
456 446
457static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, 447static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 5d316cb72ec9..15e1456b3f18 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -581,7 +581,6 @@ err:
581 * the last and the first frames arrived and all the bits are here. 581 * the last and the first frames arrived and all the bits are here.
582 */ 582 */
583static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, 583static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
584 unsigned int *nhoffp,
585 struct net_device *dev) 584 struct net_device *dev)
586{ 585{
587 struct sk_buff *fp, *head = fq->fragments; 586 struct sk_buff *fp, *head = fq->fragments;
@@ -654,6 +653,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
654 head->dev = dev; 653 head->dev = dev;
655 skb_set_timestamp(head, &fq->stamp); 654 skb_set_timestamp(head, &fq->stamp);
656 head->nh.ipv6h->payload_len = htons(payload_len); 655 head->nh.ipv6h->payload_len = htons(payload_len);
656 IP6CB(head)->nhoff = nhoff;
657 657
658 *skb_in = head; 658 *skb_in = head;
659 659
@@ -663,7 +663,6 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
663 663
664 IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); 664 IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
665 fq->fragments = NULL; 665 fq->fragments = NULL;
666 *nhoffp = nhoff;
667 return 1; 666 return 1;
668 667
669out_oversize: 668out_oversize:
@@ -678,7 +677,7 @@ out_fail:
678 return -1; 677 return -1;
679} 678}
680 679
681static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp) 680static int ipv6_frag_rcv(struct sk_buff **skbp)
682{ 681{
683 struct sk_buff *skb = *skbp; 682 struct sk_buff *skb = *skbp;
684 struct net_device *dev = skb->dev; 683 struct net_device *dev = skb->dev;
@@ -710,7 +709,7 @@ static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
710 skb->h.raw += sizeof(struct frag_hdr); 709 skb->h.raw += sizeof(struct frag_hdr);
711 IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); 710 IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
712 711
713 *nhoffp = (u8*)fhdr - skb->nh.raw; 712 IP6CB(skb)->nhoff = (u8*)fhdr - skb->nh.raw;
714 return 1; 713 return 1;
715 } 714 }
716 715
@@ -722,11 +721,11 @@ static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
722 721
723 spin_lock(&fq->lock); 722 spin_lock(&fq->lock);
724 723
725 ip6_frag_queue(fq, skb, fhdr, *nhoffp); 724 ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
726 725
727 if (fq->last_in == (FIRST_IN|LAST_IN) && 726 if (fq->last_in == (FIRST_IN|LAST_IN) &&
728 fq->meat == fq->len) 727 fq->meat == fq->len)
729 ret = ip6_frag_reasm(fq, skbp, nhoffp, dev); 728 ret = ip6_frag_reasm(fq, skbp, dev);
730 729
731 spin_unlock(&fq->lock); 730 spin_unlock(&fq->lock);
732 fq_put(fq, NULL); 731 fq_put(fq, NULL);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 66140f13d119..e0d3ad02ffb5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -24,6 +24,7 @@
24 * reachable. otherwise, round-robin the list. 24 * reachable. otherwise, round-robin the list.
25 */ 25 */
26 26
27#include <linux/capability.h>
27#include <linux/config.h> 28#include <linux/config.h>
28#include <linux/errno.h> 29#include <linux/errno.h>
29#include <linux/types.h> 30#include <linux/types.h>
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index c3123c9e1a8e..c2d3e17beae6 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -20,6 +20,7 @@
20 20
21#include <linux/config.h> 21#include <linux/config.h>
22#include <linux/module.h> 22#include <linux/module.h>
23#include <linux/capability.h>
23#include <linux/errno.h> 24#include <linux/errno.h>
24#include <linux/types.h> 25#include <linux/types.h>
25#include <linux/socket.h> 26#include <linux/socket.h>
@@ -33,6 +34,7 @@
33#include <asm/uaccess.h> 34#include <asm/uaccess.h>
34#include <linux/init.h> 35#include <linux/init.h>
35#include <linux/netfilter_ipv4.h> 36#include <linux/netfilter_ipv4.h>
37#include <linux/if_ether.h>
36 38
37#include <net/sock.h> 39#include <net/sock.h>
38#include <net/snmp.h> 40#include <net/snmp.h>
@@ -183,7 +185,7 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int
183 if (dev == NULL) 185 if (dev == NULL)
184 return NULL; 186 return NULL;
185 187
186 nt = dev->priv; 188 nt = netdev_priv(dev);
187 dev->init = ipip6_tunnel_init; 189 dev->init = ipip6_tunnel_init;
188 nt->parms = *parms; 190 nt->parms = *parms;
189 191
@@ -209,7 +211,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
209 write_unlock_bh(&ipip6_lock); 211 write_unlock_bh(&ipip6_lock);
210 dev_put(dev); 212 dev_put(dev);
211 } else { 213 } else {
212 ipip6_tunnel_unlink((struct ip_tunnel*)dev->priv); 214 ipip6_tunnel_unlink(netdev_priv(dev));
213 dev_put(dev); 215 dev_put(dev);
214 } 216 }
215} 217}
@@ -345,7 +347,7 @@ out:
345 rt6i = rt6_lookup(&iph6->daddr, &iph6->saddr, NULL, 0); 347 rt6i = rt6_lookup(&iph6->daddr, &iph6->saddr, NULL, 0);
346 348
347 if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) { 349 if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) {
348 struct ip_tunnel * t = (struct ip_tunnel*)rt6i->rt6i_dev->priv; 350 struct ip_tunnel *t = netdev_priv(rt6i->rt6i_dev);
349 if (rel_type == ICMPV6_TIME_EXCEED && t->parms.iph.ttl) { 351 if (rel_type == ICMPV6_TIME_EXCEED && t->parms.iph.ttl) {
350 rel_type = ICMPV6_DEST_UNREACH; 352 rel_type = ICMPV6_DEST_UNREACH;
351 rel_code = ICMPV6_ADDR_UNREACH; 353 rel_code = ICMPV6_ADDR_UNREACH;
@@ -380,6 +382,7 @@ static int ipip6_rcv(struct sk_buff *skb)
380 skb->mac.raw = skb->nh.raw; 382 skb->mac.raw = skb->nh.raw;
381 skb->nh.raw = skb->data; 383 skb->nh.raw = skb->data;
382 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); 384 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
385 IPCB(skb)->flags = 0;
383 skb->protocol = htons(ETH_P_IPV6); 386 skb->protocol = htons(ETH_P_IPV6);
384 skb->pkt_type = PACKET_HOST; 387 skb->pkt_type = PACKET_HOST;
385 tunnel->stat.rx_packets++; 388 tunnel->stat.rx_packets++;
@@ -422,7 +425,7 @@ static inline u32 try_6to4(struct in6_addr *v6dst)
422 425
423static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 426static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
424{ 427{
425 struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; 428 struct ip_tunnel *tunnel = netdev_priv(dev);
426 struct net_device_stats *stats = &tunnel->stat; 429 struct net_device_stats *stats = &tunnel->stat;
427 struct iphdr *tiph = &tunnel->parms.iph; 430 struct iphdr *tiph = &tunnel->parms.iph;
428 struct ipv6hdr *iph6 = skb->nh.ipv6h; 431 struct ipv6hdr *iph6 = skb->nh.ipv6h;
@@ -551,6 +554,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
551 skb->h.raw = skb->nh.raw; 554 skb->h.raw = skb->nh.raw;
552 skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); 555 skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
553 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 556 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
557 IPCB(skb)->flags = 0;
554 dst_release(skb->dst); 558 dst_release(skb->dst);
555 skb->dst = &rt->u.dst; 559 skb->dst = &rt->u.dst;
556 560
@@ -607,7 +611,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
607 t = ipip6_tunnel_locate(&p, 0); 611 t = ipip6_tunnel_locate(&p, 0);
608 } 612 }
609 if (t == NULL) 613 if (t == NULL)
610 t = (struct ip_tunnel*)dev->priv; 614 t = netdev_priv(dev);
611 memcpy(&p, &t->parms, sizeof(p)); 615 memcpy(&p, &t->parms, sizeof(p));
612 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 616 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
613 err = -EFAULT; 617 err = -EFAULT;
@@ -644,7 +648,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
644 err = -EINVAL; 648 err = -EINVAL;
645 break; 649 break;
646 } 650 }
647 t = (struct ip_tunnel*)dev->priv; 651 t = netdev_priv(dev);
648 ipip6_tunnel_unlink(t); 652 ipip6_tunnel_unlink(t);
649 t->parms.iph.saddr = p.iph.saddr; 653 t->parms.iph.saddr = p.iph.saddr;
650 t->parms.iph.daddr = p.iph.daddr; 654 t->parms.iph.daddr = p.iph.daddr;
@@ -680,7 +684,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
680 if ((t = ipip6_tunnel_locate(&p, 0)) == NULL) 684 if ((t = ipip6_tunnel_locate(&p, 0)) == NULL)
681 goto done; 685 goto done;
682 err = -EPERM; 686 err = -EPERM;
683 if (t == ipip6_fb_tunnel_dev->priv) 687 if (t == netdev_priv(ipip6_fb_tunnel_dev))
684 goto done; 688 goto done;
685 dev = t->dev; 689 dev = t->dev;
686 } 690 }
@@ -697,7 +701,7 @@ done:
697 701
698static struct net_device_stats *ipip6_tunnel_get_stats(struct net_device *dev) 702static struct net_device_stats *ipip6_tunnel_get_stats(struct net_device *dev)
699{ 703{
700 return &(((struct ip_tunnel*)dev->priv)->stat); 704 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
701} 705}
702 706
703static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) 707static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
@@ -720,7 +724,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
720 724
721 dev->type = ARPHRD_SIT; 725 dev->type = ARPHRD_SIT;
722 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); 726 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
723 dev->mtu = 1500 - sizeof(struct iphdr); 727 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
724 dev->flags = IFF_NOARP; 728 dev->flags = IFF_NOARP;
725 dev->iflink = 0; 729 dev->iflink = 0;
726 dev->addr_len = 4; 730 dev->addr_len = 4;
@@ -732,7 +736,7 @@ static int ipip6_tunnel_init(struct net_device *dev)
732 struct ip_tunnel *tunnel; 736 struct ip_tunnel *tunnel;
733 struct iphdr *iph; 737 struct iphdr *iph;
734 738
735 tunnel = (struct ip_tunnel*)dev->priv; 739 tunnel = netdev_priv(dev);
736 iph = &tunnel->parms.iph; 740 iph = &tunnel->parms.iph;
737 741
738 tunnel->dev = dev; 742 tunnel->dev = dev;
@@ -772,7 +776,7 @@ static int ipip6_tunnel_init(struct net_device *dev)
772 776
773static int __init ipip6_fb_tunnel_init(struct net_device *dev) 777static int __init ipip6_fb_tunnel_init(struct net_device *dev)
774{ 778{
775 struct ip_tunnel *tunnel = dev->priv; 779 struct ip_tunnel *tunnel = netdev_priv(dev);
776 struct iphdr *iph = &tunnel->parms.iph; 780 struct iphdr *iph = &tunnel->parms.iph;
777 781
778 tunnel->dev = dev; 782 tunnel->dev = dev;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 8827389abaf7..66d04004afda 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -48,6 +48,7 @@
48#include <net/tcp.h> 48#include <net/tcp.h>
49#include <net/ndisc.h> 49#include <net/ndisc.h>
50#include <net/inet6_hashtables.h> 50#include <net/inet6_hashtables.h>
51#include <net/inet6_connection_sock.h>
51#include <net/ipv6.h> 52#include <net/ipv6.h>
52#include <net/transp_v6.h> 53#include <net/transp_v6.h>
53#include <net/addrconf.h> 54#include <net/addrconf.h>
@@ -59,232 +60,45 @@
59#include <net/addrconf.h> 60#include <net/addrconf.h>
60#include <net/snmp.h> 61#include <net/snmp.h>
61#include <net/dsfield.h> 62#include <net/dsfield.h>
63#include <net/timewait_sock.h>
62 64
63#include <asm/uaccess.h> 65#include <asm/uaccess.h>
64 66
65#include <linux/proc_fs.h> 67#include <linux/proc_fs.h>
66#include <linux/seq_file.h> 68#include <linux/seq_file.h>
67 69
70/* Socket used for sending RSTs and ACKs */
71static struct socket *tcp6_socket;
72
68static void tcp_v6_send_reset(struct sk_buff *skb); 73static void tcp_v6_send_reset(struct sk_buff *skb);
69static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req); 74static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
70static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 75static void tcp_v6_send_check(struct sock *sk, int len,
71 struct sk_buff *skb); 76 struct sk_buff *skb);
72 77
73static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 78static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
74static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
75
76static struct tcp_func ipv6_mapped;
77static struct tcp_func ipv6_specific;
78
79static inline int tcp_v6_bind_conflict(const struct sock *sk,
80 const struct inet_bind_bucket *tb)
81{
82 const struct sock *sk2;
83 const struct hlist_node *node;
84
85 /* We must walk the whole port owner list in this case. -DaveM */
86 sk_for_each_bound(sk2, node, &tb->owners) {
87 if (sk != sk2 &&
88 (!sk->sk_bound_dev_if ||
89 !sk2->sk_bound_dev_if ||
90 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
91 (!sk->sk_reuse || !sk2->sk_reuse ||
92 sk2->sk_state == TCP_LISTEN) &&
93 ipv6_rcv_saddr_equal(sk, sk2))
94 break;
95 }
96 79
97 return node != NULL; 80static struct inet_connection_sock_af_ops ipv6_mapped;
98} 81static struct inet_connection_sock_af_ops ipv6_specific;
99 82
100/* Grrr, addr_type already calculated by caller, but I don't want
101 * to add some silly "cookie" argument to this method just for that.
102 * But it doesn't matter, the recalculation is in the rarest path
103 * this function ever takes.
104 */
105static int tcp_v6_get_port(struct sock *sk, unsigned short snum) 83static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
106{ 84{
107 struct inet_bind_hashbucket *head; 85 return inet_csk_get_port(&tcp_hashinfo, sk, snum,
108 struct inet_bind_bucket *tb; 86 inet6_csk_bind_conflict);
109 struct hlist_node *node;
110 int ret;
111
112 local_bh_disable();
113 if (snum == 0) {
114 int low = sysctl_local_port_range[0];
115 int high = sysctl_local_port_range[1];
116 int remaining = (high - low) + 1;
117 int rover = net_random() % (high - low) + low;
118
119 do {
120 head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
121 spin_lock(&head->lock);
122 inet_bind_bucket_for_each(tb, node, &head->chain)
123 if (tb->port == rover)
124 goto next;
125 break;
126 next:
127 spin_unlock(&head->lock);
128 if (++rover > high)
129 rover = low;
130 } while (--remaining > 0);
131
132 /* Exhausted local port range during search? It is not
133 * possible for us to be holding one of the bind hash
134 * locks if this test triggers, because if 'remaining'
135 * drops to zero, we broke out of the do/while loop at
136 * the top level, not from the 'break;' statement.
137 */
138 ret = 1;
139 if (unlikely(remaining <= 0))
140 goto fail;
141
142 /* OK, here is the one we will use. */
143 snum = rover;
144 } else {
145 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
146 spin_lock(&head->lock);
147 inet_bind_bucket_for_each(tb, node, &head->chain)
148 if (tb->port == snum)
149 goto tb_found;
150 }
151 tb = NULL;
152 goto tb_not_found;
153tb_found:
154 if (tb && !hlist_empty(&tb->owners)) {
155 if (tb->fastreuse > 0 && sk->sk_reuse &&
156 sk->sk_state != TCP_LISTEN) {
157 goto success;
158 } else {
159 ret = 1;
160 if (tcp_v6_bind_conflict(sk, tb))
161 goto fail_unlock;
162 }
163 }
164tb_not_found:
165 ret = 1;
166 if (tb == NULL) {
167 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
168 if (tb == NULL)
169 goto fail_unlock;
170 }
171 if (hlist_empty(&tb->owners)) {
172 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
173 tb->fastreuse = 1;
174 else
175 tb->fastreuse = 0;
176 } else if (tb->fastreuse &&
177 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
178 tb->fastreuse = 0;
179
180success:
181 if (!inet_csk(sk)->icsk_bind_hash)
182 inet_bind_hash(sk, tb, snum);
183 BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
184 ret = 0;
185
186fail_unlock:
187 spin_unlock(&head->lock);
188fail:
189 local_bh_enable();
190 return ret;
191}
192
193static __inline__ void __tcp_v6_hash(struct sock *sk)
194{
195 struct hlist_head *list;
196 rwlock_t *lock;
197
198 BUG_TRAP(sk_unhashed(sk));
199
200 if (sk->sk_state == TCP_LISTEN) {
201 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
202 lock = &tcp_hashinfo.lhash_lock;
203 inet_listen_wlock(&tcp_hashinfo);
204 } else {
205 unsigned int hash;
206 sk->sk_hash = hash = inet6_sk_ehashfn(sk);
207 hash &= (tcp_hashinfo.ehash_size - 1);
208 list = &tcp_hashinfo.ehash[hash].chain;
209 lock = &tcp_hashinfo.ehash[hash].lock;
210 write_lock(lock);
211 }
212
213 __sk_add_node(sk, list);
214 sock_prot_inc_use(sk->sk_prot);
215 write_unlock(lock);
216} 87}
217 88
218
219static void tcp_v6_hash(struct sock *sk) 89static void tcp_v6_hash(struct sock *sk)
220{ 90{
221 if (sk->sk_state != TCP_CLOSE) { 91 if (sk->sk_state != TCP_CLOSE) {
222 struct tcp_sock *tp = tcp_sk(sk); 92 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
223
224 if (tp->af_specific == &ipv6_mapped) {
225 tcp_prot.hash(sk); 93 tcp_prot.hash(sk);
226 return; 94 return;
227 } 95 }
228 local_bh_disable(); 96 local_bh_disable();
229 __tcp_v6_hash(sk); 97 __inet6_hash(&tcp_hashinfo, sk);
230 local_bh_enable(); 98 local_bh_enable();
231 } 99 }
232} 100}
233 101
234/*
235 * Open request hash tables.
236 */
237
238static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
239{
240 u32 a, b, c;
241
242 a = raddr->s6_addr32[0];
243 b = raddr->s6_addr32[1];
244 c = raddr->s6_addr32[2];
245
246 a += JHASH_GOLDEN_RATIO;
247 b += JHASH_GOLDEN_RATIO;
248 c += rnd;
249 __jhash_mix(a, b, c);
250
251 a += raddr->s6_addr32[3];
252 b += (u32) rport;
253 __jhash_mix(a, b, c);
254
255 return c & (TCP_SYNQ_HSIZE - 1);
256}
257
258static struct request_sock *tcp_v6_search_req(const struct sock *sk,
259 struct request_sock ***prevp,
260 __u16 rport,
261 struct in6_addr *raddr,
262 struct in6_addr *laddr,
263 int iif)
264{
265 const struct inet_connection_sock *icsk = inet_csk(sk);
266 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
267 struct request_sock *req, **prev;
268
269 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
270 (req = *prev) != NULL;
271 prev = &req->dl_next) {
272 const struct tcp6_request_sock *treq = tcp6_rsk(req);
273
274 if (inet_rsk(req)->rmt_port == rport &&
275 req->rsk_ops->family == AF_INET6 &&
276 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
277 ipv6_addr_equal(&treq->loc_addr, laddr) &&
278 (!treq->iif || treq->iif == iif)) {
279 BUG_TRAP(req->sk == NULL);
280 *prevp = prev;
281 return req;
282 }
283 }
284
285 return NULL;
286}
287
288static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len, 102static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
289 struct in6_addr *saddr, 103 struct in6_addr *saddr,
290 struct in6_addr *daddr, 104 struct in6_addr *daddr,
@@ -308,195 +122,12 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
308 } 122 }
309} 123}
310 124
311static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
312 struct inet_timewait_sock **twp)
313{
314 struct inet_sock *inet = inet_sk(sk);
315 const struct ipv6_pinfo *np = inet6_sk(sk);
316 const struct in6_addr *daddr = &np->rcv_saddr;
317 const struct in6_addr *saddr = &np->daddr;
318 const int dif = sk->sk_bound_dev_if;
319 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
320 unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport);
321 struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
322 struct sock *sk2;
323 const struct hlist_node *node;
324 struct inet_timewait_sock *tw;
325
326 prefetch(head->chain.first);
327 write_lock(&head->lock);
328
329 /* Check TIME-WAIT sockets first. */
330 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
331 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
332
333 tw = inet_twsk(sk2);
334
335 if(*((__u32 *)&(tw->tw_dport)) == ports &&
336 sk2->sk_family == PF_INET6 &&
337 ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
338 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
339 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
340 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
341 struct tcp_sock *tp = tcp_sk(sk);
342
343 if (tcptw->tw_ts_recent_stamp &&
344 (!twp ||
345 (sysctl_tcp_tw_reuse &&
346 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
347 /* See comment in tcp_ipv4.c */
348 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
349 if (!tp->write_seq)
350 tp->write_seq = 1;
351 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
352 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
353 sock_hold(sk2);
354 goto unique;
355 } else
356 goto not_unique;
357 }
358 }
359 tw = NULL;
360
361 /* And established part... */
362 sk_for_each(sk2, node, &head->chain) {
363 if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
364 goto not_unique;
365 }
366
367unique:
368 BUG_TRAP(sk_unhashed(sk));
369 __sk_add_node(sk, &head->chain);
370 sk->sk_hash = hash;
371 sock_prot_inc_use(sk->sk_prot);
372 write_unlock(&head->lock);
373
374 if (twp) {
375 *twp = tw;
376 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
377 } else if (tw) {
378 /* Silly. Should hash-dance instead... */
379 inet_twsk_deschedule(tw, &tcp_death_row);
380 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
381
382 inet_twsk_put(tw);
383 }
384 return 0;
385
386not_unique:
387 write_unlock(&head->lock);
388 return -EADDRNOTAVAIL;
389}
390
391static inline u32 tcpv6_port_offset(const struct sock *sk)
392{
393 const struct inet_sock *inet = inet_sk(sk);
394 const struct ipv6_pinfo *np = inet6_sk(sk);
395
396 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
397 np->daddr.s6_addr32,
398 inet->dport);
399}
400
401static int tcp_v6_hash_connect(struct sock *sk)
402{
403 unsigned short snum = inet_sk(sk)->num;
404 struct inet_bind_hashbucket *head;
405 struct inet_bind_bucket *tb;
406 int ret;
407
408 if (!snum) {
409 int low = sysctl_local_port_range[0];
410 int high = sysctl_local_port_range[1];
411 int range = high - low;
412 int i;
413 int port;
414 static u32 hint;
415 u32 offset = hint + tcpv6_port_offset(sk);
416 struct hlist_node *node;
417 struct inet_timewait_sock *tw = NULL;
418
419 local_bh_disable();
420 for (i = 1; i <= range; i++) {
421 port = low + (i + offset) % range;
422 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
423 spin_lock(&head->lock);
424
425 /* Does not bother with rcv_saddr checks,
426 * because the established check is already
427 * unique enough.
428 */
429 inet_bind_bucket_for_each(tb, node, &head->chain) {
430 if (tb->port == port) {
431 BUG_TRAP(!hlist_empty(&tb->owners));
432 if (tb->fastreuse >= 0)
433 goto next_port;
434 if (!__tcp_v6_check_established(sk,
435 port,
436 &tw))
437 goto ok;
438 goto next_port;
439 }
440 }
441
442 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
443 if (!tb) {
444 spin_unlock(&head->lock);
445 break;
446 }
447 tb->fastreuse = -1;
448 goto ok;
449
450 next_port:
451 spin_unlock(&head->lock);
452 }
453 local_bh_enable();
454
455 return -EADDRNOTAVAIL;
456
457ok:
458 hint += i;
459
460 /* Head lock still held and bh's disabled */
461 inet_bind_hash(sk, tb, port);
462 if (sk_unhashed(sk)) {
463 inet_sk(sk)->sport = htons(port);
464 __tcp_v6_hash(sk);
465 }
466 spin_unlock(&head->lock);
467
468 if (tw) {
469 inet_twsk_deschedule(tw, &tcp_death_row);
470 inet_twsk_put(tw);
471 }
472
473 ret = 0;
474 goto out;
475 }
476
477 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
478 tb = inet_csk(sk)->icsk_bind_hash;
479 spin_lock_bh(&head->lock);
480
481 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
482 __tcp_v6_hash(sk);
483 spin_unlock_bh(&head->lock);
484 return 0;
485 } else {
486 spin_unlock(&head->lock);
487 /* No definite answer... Walk to established hash table */
488 ret = __tcp_v6_check_established(sk, snum, NULL);
489out:
490 local_bh_enable();
491 return ret;
492 }
493}
494
495static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 125static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
496 int addr_len) 126 int addr_len)
497{ 127{
498 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 128 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
499 struct inet_sock *inet = inet_sk(sk); 129 struct inet_sock *inet = inet_sk(sk);
130 struct inet_connection_sock *icsk = inet_csk(sk);
500 struct ipv6_pinfo *np = inet6_sk(sk); 131 struct ipv6_pinfo *np = inet6_sk(sk);
501 struct tcp_sock *tp = tcp_sk(sk); 132 struct tcp_sock *tp = tcp_sk(sk);
502 struct in6_addr *saddr = NULL, *final_p = NULL, final; 133 struct in6_addr *saddr = NULL, *final_p = NULL, final;
@@ -571,7 +202,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
571 */ 202 */
572 203
573 if (addr_type == IPV6_ADDR_MAPPED) { 204 if (addr_type == IPV6_ADDR_MAPPED) {
574 u32 exthdrlen = tp->ext_header_len; 205 u32 exthdrlen = icsk->icsk_ext_hdr_len;
575 struct sockaddr_in sin; 206 struct sockaddr_in sin;
576 207
577 SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); 208 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
@@ -583,14 +214,14 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
583 sin.sin_port = usin->sin6_port; 214 sin.sin_port = usin->sin6_port;
584 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 215 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
585 216
586 tp->af_specific = &ipv6_mapped; 217 icsk->icsk_af_ops = &ipv6_mapped;
587 sk->sk_backlog_rcv = tcp_v4_do_rcv; 218 sk->sk_backlog_rcv = tcp_v4_do_rcv;
588 219
589 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); 220 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
590 221
591 if (err) { 222 if (err) {
592 tp->ext_header_len = exthdrlen; 223 icsk->icsk_ext_hdr_len = exthdrlen;
593 tp->af_specific = &ipv6_specific; 224 icsk->icsk_af_ops = &ipv6_specific;
594 sk->sk_backlog_rcv = tcp_v6_do_rcv; 225 sk->sk_backlog_rcv = tcp_v6_do_rcv;
595 goto failure; 226 goto failure;
596 } else { 227 } else {
@@ -643,16 +274,17 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
643 sk->sk_route_caps = dst->dev->features & 274 sk->sk_route_caps = dst->dev->features &
644 ~(NETIF_F_IP_CSUM | NETIF_F_TSO); 275 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
645 276
646 tp->ext_header_len = 0; 277 icsk->icsk_ext_hdr_len = 0;
647 if (np->opt) 278 if (np->opt)
648 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen; 279 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
280 np->opt->opt_nflen);
649 281
650 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 282 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
651 283
652 inet->dport = usin->sin6_port; 284 inet->dport = usin->sin6_port;
653 285
654 tcp_set_state(sk, TCP_SYN_SENT); 286 tcp_set_state(sk, TCP_SYN_SENT);
655 err = tcp_v6_hash_connect(sk); 287 err = inet6_hash_connect(&tcp_death_row, sk);
656 if (err) 288 if (err)
657 goto late_failure; 289 goto late_failure;
658 290
@@ -758,7 +390,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
758 } else 390 } else
759 dst_hold(dst); 391 dst_hold(dst);
760 392
761 if (tp->pmtu_cookie > dst_mtu(dst)) { 393 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
762 tcp_sync_mss(sk, dst_mtu(dst)); 394 tcp_sync_mss(sk, dst_mtu(dst));
763 tcp_simple_retransmit(sk); 395 tcp_simple_retransmit(sk);
764 } /* else let the usual retransmit timer handle it */ 396 } /* else let the usual retransmit timer handle it */
@@ -775,8 +407,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
775 if (sock_owned_by_user(sk)) 407 if (sock_owned_by_user(sk))
776 goto out; 408 goto out;
777 409
778 req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr, 410 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
779 &hdr->saddr, inet6_iif(skb)); 411 &hdr->saddr, inet6_iif(skb));
780 if (!req) 412 if (!req)
781 goto out; 413 goto out;
782 414
@@ -822,7 +454,7 @@ out:
822static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, 454static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
823 struct dst_entry *dst) 455 struct dst_entry *dst)
824{ 456{
825 struct tcp6_request_sock *treq = tcp6_rsk(req); 457 struct inet6_request_sock *treq = inet6_rsk(req);
826 struct ipv6_pinfo *np = inet6_sk(sk); 458 struct ipv6_pinfo *np = inet6_sk(sk);
827 struct sk_buff * skb; 459 struct sk_buff * skb;
828 struct ipv6_txoptions *opt = NULL; 460 struct ipv6_txoptions *opt = NULL;
@@ -888,8 +520,8 @@ done:
888 520
889static void tcp_v6_reqsk_destructor(struct request_sock *req) 521static void tcp_v6_reqsk_destructor(struct request_sock *req)
890{ 522{
891 if (tcp6_rsk(req)->pktopts) 523 if (inet6_rsk(req)->pktopts)
892 kfree_skb(tcp6_rsk(req)->pktopts); 524 kfree_skb(inet6_rsk(req)->pktopts);
893} 525}
894 526
895static struct request_sock_ops tcp6_request_sock_ops = { 527static struct request_sock_ops tcp6_request_sock_ops = {
@@ -901,26 +533,15 @@ static struct request_sock_ops tcp6_request_sock_ops = {
901 .send_reset = tcp_v6_send_reset 533 .send_reset = tcp_v6_send_reset
902}; 534};
903 535
904static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) 536static struct timewait_sock_ops tcp6_timewait_sock_ops = {
905{ 537 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
906 struct ipv6_pinfo *np = inet6_sk(sk); 538 .twsk_unique = tcp_twsk_unique,
907 struct inet6_skb_parm *opt = IP6CB(skb); 539};
908
909 if (np->rxopt.all) {
910 if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
911 ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) ||
912 (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) ||
913 ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
914 return 1;
915 }
916 return 0;
917}
918
919 540
920static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 541static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
921 struct sk_buff *skb)
922{ 542{
923 struct ipv6_pinfo *np = inet6_sk(sk); 543 struct ipv6_pinfo *np = inet6_sk(sk);
544 struct tcphdr *th = skb->h.th;
924 545
925 if (skb->ip_summed == CHECKSUM_HW) { 546 if (skb->ip_summed == CHECKSUM_HW) {
926 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0); 547 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
@@ -993,7 +614,7 @@ static void tcp_v6_send_reset(struct sk_buff *skb)
993 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { 614 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
994 615
995 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { 616 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
996 ip6_xmit(NULL, buff, &fl, NULL, 0); 617 ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0);
997 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); 618 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
998 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS); 619 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
999 return; 620 return;
@@ -1057,7 +678,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32
1057 678
1058 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { 679 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1059 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { 680 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
1060 ip6_xmit(NULL, buff, &fl, NULL, 0); 681 ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0);
1061 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); 682 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1062 return; 683 return;
1063 } 684 }
@@ -1091,8 +712,9 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1091 struct sock *nsk; 712 struct sock *nsk;
1092 713
1093 /* Find possible connection requests. */ 714 /* Find possible connection requests. */
1094 req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr, 715 req = inet6_csk_search_req(sk, &prev, th->source,
1095 &skb->nh.ipv6h->daddr, inet6_iif(skb)); 716 &skb->nh.ipv6h->saddr,
717 &skb->nh.ipv6h->daddr, inet6_iif(skb));
1096 if (req) 718 if (req)
1097 return tcp_check_req(sk, skb, req, prev); 719 return tcp_check_req(sk, skb, req, prev);
1098 720
@@ -1116,23 +738,12 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1116 return sk; 738 return sk;
1117} 739}
1118 740
1119static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1120{
1121 struct inet_connection_sock *icsk = inet_csk(sk);
1122 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
1123 const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1124
1125 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
1126 inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
1127}
1128
1129
1130/* FIXME: this is substantially similar to the ipv4 code. 741/* FIXME: this is substantially similar to the ipv4 code.
1131 * Can some kind of merge be done? -- erics 742 * Can some kind of merge be done? -- erics
1132 */ 743 */
1133static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 744static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1134{ 745{
1135 struct tcp6_request_sock *treq; 746 struct inet6_request_sock *treq;
1136 struct ipv6_pinfo *np = inet6_sk(sk); 747 struct ipv6_pinfo *np = inet6_sk(sk);
1137 struct tcp_options_received tmp_opt; 748 struct tcp_options_received tmp_opt;
1138 struct tcp_sock *tp = tcp_sk(sk); 749 struct tcp_sock *tp = tcp_sk(sk);
@@ -1157,7 +768,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1157 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 768 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1158 goto drop; 769 goto drop;
1159 770
1160 req = reqsk_alloc(&tcp6_request_sock_ops); 771 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
1161 if (req == NULL) 772 if (req == NULL)
1162 goto drop; 773 goto drop;
1163 774
@@ -1170,7 +781,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1170 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; 781 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1171 tcp_openreq_init(req, &tmp_opt, skb); 782 tcp_openreq_init(req, &tmp_opt, skb);
1172 783
1173 treq = tcp6_rsk(req); 784 treq = inet6_rsk(req);
1174 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr); 785 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1175 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr); 786 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1176 TCP_ECN_create_request(req, skb->h.th); 787 TCP_ECN_create_request(req, skb->h.th);
@@ -1196,8 +807,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1196 if (tcp_v6_send_synack(sk, req, NULL)) 807 if (tcp_v6_send_synack(sk, req, NULL))
1197 goto drop; 808 goto drop;
1198 809
1199 tcp_v6_synq_add(sk, req); 810 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1200
1201 return 0; 811 return 0;
1202 812
1203drop: 813drop:
@@ -1212,7 +822,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1212 struct request_sock *req, 822 struct request_sock *req,
1213 struct dst_entry *dst) 823 struct dst_entry *dst)
1214{ 824{
1215 struct tcp6_request_sock *treq = tcp6_rsk(req); 825 struct inet6_request_sock *treq = inet6_rsk(req);
1216 struct ipv6_pinfo *newnp, *np = inet6_sk(sk); 826 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1217 struct tcp6_sock *newtcp6sk; 827 struct tcp6_sock *newtcp6sk;
1218 struct inet_sock *newinet; 828 struct inet_sock *newinet;
@@ -1247,7 +857,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1247 857
1248 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr); 858 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1249 859
1250 newtp->af_specific = &ipv6_mapped; 860 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1251 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 861 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1252 newnp->pktoptions = NULL; 862 newnp->pktoptions = NULL;
1253 newnp->opt = NULL; 863 newnp->opt = NULL;
@@ -1261,10 +871,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1261 */ 871 */
1262 872
1263 /* It is tricky place. Until this moment IPv4 tcp 873 /* It is tricky place. Until this moment IPv4 tcp
1264 worked with IPv6 af_tcp.af_specific. 874 worked with IPv6 icsk.icsk_af_ops.
1265 Sync it now. 875 Sync it now.
1266 */ 876 */
1267 tcp_sync_mss(newsk, newtp->pmtu_cookie); 877 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1268 878
1269 return newsk; 879 return newsk;
1270 } 880 }
@@ -1371,10 +981,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1371 sock_kfree_s(sk, opt, opt->tot_len); 981 sock_kfree_s(sk, opt, opt->tot_len);
1372 } 982 }
1373 983
1374 newtp->ext_header_len = 0; 984 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1375 if (newnp->opt) 985 if (newnp->opt)
1376 newtp->ext_header_len = newnp->opt->opt_nflen + 986 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1377 newnp->opt->opt_flen; 987 newnp->opt->opt_flen);
1378 988
1379 tcp_sync_mss(newsk, dst_mtu(dst)); 989 tcp_sync_mss(newsk, dst_mtu(dst));
1380 newtp->advmss = dst_metric(dst, RTAX_ADVMSS); 990 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
@@ -1382,7 +992,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1382 992
1383 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6; 993 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1384 994
1385 __tcp_v6_hash(newsk); 995 __inet6_hash(&tcp_hashinfo, newsk);
1386 inet_inherit_port(&tcp_hashinfo, sk, newsk); 996 inet_inherit_port(&tcp_hashinfo, sk, newsk);
1387 997
1388 return newsk; 998 return newsk;
@@ -1546,7 +1156,7 @@ ipv6_pktoptions:
1546 return 0; 1156 return 0;
1547} 1157}
1548 1158
1549static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) 1159static int tcp_v6_rcv(struct sk_buff **pskb)
1550{ 1160{
1551 struct sk_buff *skb = *pskb; 1161 struct sk_buff *skb = *pskb;
1552 struct tcphdr *th; 1162 struct tcphdr *th;
@@ -1679,139 +1289,16 @@ do_time_wait:
1679 goto discard_it; 1289 goto discard_it;
1680} 1290}
1681 1291
1682static int tcp_v6_rebuild_header(struct sock *sk)
1683{
1684 int err;
1685 struct dst_entry *dst;
1686 struct ipv6_pinfo *np = inet6_sk(sk);
1687
1688 dst = __sk_dst_check(sk, np->dst_cookie);
1689
1690 if (dst == NULL) {
1691 struct inet_sock *inet = inet_sk(sk);
1692 struct in6_addr *final_p = NULL, final;
1693 struct flowi fl;
1694
1695 memset(&fl, 0, sizeof(fl));
1696 fl.proto = IPPROTO_TCP;
1697 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1698 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1699 fl.fl6_flowlabel = np->flow_label;
1700 fl.oif = sk->sk_bound_dev_if;
1701 fl.fl_ip_dport = inet->dport;
1702 fl.fl_ip_sport = inet->sport;
1703
1704 if (np->opt && np->opt->srcrt) {
1705 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1706 ipv6_addr_copy(&final, &fl.fl6_dst);
1707 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1708 final_p = &final;
1709 }
1710
1711 err = ip6_dst_lookup(sk, &dst, &fl);
1712 if (err) {
1713 sk->sk_route_caps = 0;
1714 return err;
1715 }
1716 if (final_p)
1717 ipv6_addr_copy(&fl.fl6_dst, final_p);
1718
1719 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1720 sk->sk_err_soft = -err;
1721 return err;
1722 }
1723
1724 ip6_dst_store(sk, dst, NULL);
1725 sk->sk_route_caps = dst->dev->features &
1726 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1727 }
1728
1729 return 0;
1730}
1731
1732static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1733{
1734 struct sock *sk = skb->sk;
1735 struct inet_sock *inet = inet_sk(sk);
1736 struct ipv6_pinfo *np = inet6_sk(sk);
1737 struct flowi fl;
1738 struct dst_entry *dst;
1739 struct in6_addr *final_p = NULL, final;
1740
1741 memset(&fl, 0, sizeof(fl));
1742 fl.proto = IPPROTO_TCP;
1743 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1744 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1745 fl.fl6_flowlabel = np->flow_label;
1746 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1747 fl.oif = sk->sk_bound_dev_if;
1748 fl.fl_ip_sport = inet->sport;
1749 fl.fl_ip_dport = inet->dport;
1750
1751 if (np->opt && np->opt->srcrt) {
1752 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1753 ipv6_addr_copy(&final, &fl.fl6_dst);
1754 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1755 final_p = &final;
1756 }
1757
1758 dst = __sk_dst_check(sk, np->dst_cookie);
1759
1760 if (dst == NULL) {
1761 int err = ip6_dst_lookup(sk, &dst, &fl);
1762
1763 if (err) {
1764 sk->sk_err_soft = -err;
1765 return err;
1766 }
1767
1768 if (final_p)
1769 ipv6_addr_copy(&fl.fl6_dst, final_p);
1770
1771 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1772 sk->sk_route_caps = 0;
1773 return err;
1774 }
1775
1776 ip6_dst_store(sk, dst, NULL);
1777 sk->sk_route_caps = dst->dev->features &
1778 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1779 }
1780
1781 skb->dst = dst_clone(dst);
1782
1783 /* Restore final destination back after routing done */
1784 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1785
1786 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1787}
1788
1789static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1790{
1791 struct ipv6_pinfo *np = inet6_sk(sk);
1792 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1793
1794 sin6->sin6_family = AF_INET6;
1795 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1796 sin6->sin6_port = inet_sk(sk)->dport;
1797 /* We do not store received flowlabel for TCP */
1798 sin6->sin6_flowinfo = 0;
1799 sin6->sin6_scope_id = 0;
1800 if (sk->sk_bound_dev_if &&
1801 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1802 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1803}
1804
1805static int tcp_v6_remember_stamp(struct sock *sk) 1292static int tcp_v6_remember_stamp(struct sock *sk)
1806{ 1293{
1807 /* Alas, not yet... */ 1294 /* Alas, not yet... */
1808 return 0; 1295 return 0;
1809} 1296}
1810 1297
1811static struct tcp_func ipv6_specific = { 1298static struct inet_connection_sock_af_ops ipv6_specific = {
1812 .queue_xmit = tcp_v6_xmit, 1299 .queue_xmit = inet6_csk_xmit,
1813 .send_check = tcp_v6_send_check, 1300 .send_check = tcp_v6_send_check,
1814 .rebuild_header = tcp_v6_rebuild_header, 1301 .rebuild_header = inet6_sk_rebuild_header,
1815 .conn_request = tcp_v6_conn_request, 1302 .conn_request = tcp_v6_conn_request,
1816 .syn_recv_sock = tcp_v6_syn_recv_sock, 1303 .syn_recv_sock = tcp_v6_syn_recv_sock,
1817 .remember_stamp = tcp_v6_remember_stamp, 1304 .remember_stamp = tcp_v6_remember_stamp,
@@ -1819,7 +1306,7 @@ static struct tcp_func ipv6_specific = {
1819 1306
1820 .setsockopt = ipv6_setsockopt, 1307 .setsockopt = ipv6_setsockopt,
1821 .getsockopt = ipv6_getsockopt, 1308 .getsockopt = ipv6_getsockopt,
1822 .addr2sockaddr = v6_addr2sockaddr, 1309 .addr2sockaddr = inet6_csk_addr2sockaddr,
1823 .sockaddr_len = sizeof(struct sockaddr_in6) 1310 .sockaddr_len = sizeof(struct sockaddr_in6)
1824}; 1311};
1825 1312
@@ -1827,7 +1314,7 @@ static struct tcp_func ipv6_specific = {
1827 * TCP over IPv4 via INET6 API 1314 * TCP over IPv4 via INET6 API
1828 */ 1315 */
1829 1316
1830static struct tcp_func ipv6_mapped = { 1317static struct inet_connection_sock_af_ops ipv6_mapped = {
1831 .queue_xmit = ip_queue_xmit, 1318 .queue_xmit = ip_queue_xmit,
1832 .send_check = tcp_v4_send_check, 1319 .send_check = tcp_v4_send_check,
1833 .rebuild_header = inet_sk_rebuild_header, 1320 .rebuild_header = inet_sk_rebuild_header,
@@ -1838,7 +1325,7 @@ static struct tcp_func ipv6_mapped = {
1838 1325
1839 .setsockopt = ipv6_setsockopt, 1326 .setsockopt = ipv6_setsockopt,
1840 .getsockopt = ipv6_getsockopt, 1327 .getsockopt = ipv6_getsockopt,
1841 .addr2sockaddr = v6_addr2sockaddr, 1328 .addr2sockaddr = inet6_csk_addr2sockaddr,
1842 .sockaddr_len = sizeof(struct sockaddr_in6) 1329 .sockaddr_len = sizeof(struct sockaddr_in6)
1843}; 1330};
1844 1331
@@ -1877,8 +1364,9 @@ static int tcp_v6_init_sock(struct sock *sk)
1877 1364
1878 sk->sk_state = TCP_CLOSE; 1365 sk->sk_state = TCP_CLOSE;
1879 1366
1880 tp->af_specific = &ipv6_specific; 1367 icsk->icsk_af_ops = &ipv6_specific;
1881 icsk->icsk_ca_ops = &tcp_init_congestion_ops; 1368 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1369 icsk->icsk_sync_mss = tcp_sync_mss;
1882 sk->sk_write_space = sk_stream_write_space; 1370 sk->sk_write_space = sk_stream_write_space;
1883 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); 1371 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1884 1372
@@ -1900,14 +1388,13 @@ static int tcp_v6_destroy_sock(struct sock *sk)
1900static void get_openreq6(struct seq_file *seq, 1388static void get_openreq6(struct seq_file *seq,
1901 struct sock *sk, struct request_sock *req, int i, int uid) 1389 struct sock *sk, struct request_sock *req, int i, int uid)
1902{ 1390{
1903 struct in6_addr *dest, *src;
1904 int ttd = req->expires - jiffies; 1391 int ttd = req->expires - jiffies;
1392 struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1393 struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1905 1394
1906 if (ttd < 0) 1395 if (ttd < 0)
1907 ttd = 0; 1396 ttd = 0;
1908 1397
1909 src = &tcp6_rsk(req)->loc_addr;
1910 dest = &tcp6_rsk(req)->rmt_addr;
1911 seq_printf(seq, 1398 seq_printf(seq,
1912 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1399 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1913 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n", 1400 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
@@ -1988,14 +1475,14 @@ static void get_timewait6_sock(struct seq_file *seq,
1988{ 1475{
1989 struct in6_addr *dest, *src; 1476 struct in6_addr *dest, *src;
1990 __u16 destp, srcp; 1477 __u16 destp, srcp;
1991 struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw); 1478 struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1992 int ttd = tw->tw_ttd - jiffies; 1479 int ttd = tw->tw_ttd - jiffies;
1993 1480
1994 if (ttd < 0) 1481 if (ttd < 0)
1995 ttd = 0; 1482 ttd = 0;
1996 1483
1997 dest = &tcp6tw->tw_v6_daddr; 1484 dest = &tw6->tw_v6_daddr;
1998 src = &tcp6tw->tw_v6_rcv_saddr; 1485 src = &tw6->tw_v6_rcv_saddr;
1999 destp = ntohs(tw->tw_dport); 1486 destp = ntohs(tw->tw_dport);
2000 srcp = ntohs(tw->tw_sport); 1487 srcp = ntohs(tw->tw_sport);
2001 1488
@@ -2093,7 +1580,7 @@ struct proto tcpv6_prot = {
2093 .sysctl_rmem = sysctl_tcp_rmem, 1580 .sysctl_rmem = sysctl_tcp_rmem,
2094 .max_header = MAX_TCP_HEADER, 1581 .max_header = MAX_TCP_HEADER,
2095 .obj_size = sizeof(struct tcp6_sock), 1582 .obj_size = sizeof(struct tcp6_sock),
2096 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 1583 .twsk_prot = &tcp6_timewait_sock_ops,
2097 .rsk_prot = &tcp6_request_sock_ops, 1584 .rsk_prot = &tcp6_request_sock_ops,
2098}; 1585};
2099 1586
@@ -2110,13 +1597,27 @@ static struct inet_protosw tcpv6_protosw = {
2110 .ops = &inet6_stream_ops, 1597 .ops = &inet6_stream_ops,
2111 .capability = -1, 1598 .capability = -1,
2112 .no_check = 0, 1599 .no_check = 0,
2113 .flags = INET_PROTOSW_PERMANENT, 1600 .flags = INET_PROTOSW_PERMANENT |
1601 INET_PROTOSW_ICSK,
2114}; 1602};
2115 1603
2116void __init tcpv6_init(void) 1604void __init tcpv6_init(void)
2117{ 1605{
1606 int err;
1607
2118 /* register inet6 protocol */ 1608 /* register inet6 protocol */
2119 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0) 1609 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2120 printk(KERN_ERR "tcpv6_init: Could not register protocol\n"); 1610 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2121 inet6_register_protosw(&tcpv6_protosw); 1611 inet6_register_protosw(&tcpv6_protosw);
1612
1613 err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_TCP, &tcp6_socket);
1614 if (err < 0)
1615 panic("Failed to create the TCPv6 control socket.\n");
1616 tcp6_socket->sk->sk_allocation = GFP_ATOMIC;
1617
1618 /* Unhash it so that IP input processing does not even
1619 * see it, we do not wish this socket to see incoming
1620 * packets.
1621 */
1622 tcp6_socket->sk->sk_prot->unhash(tcp6_socket->sk);
2122} 1623}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 5cc8731eb55b..c47648892c04 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -36,6 +36,7 @@
36#include <linux/ipv6.h> 36#include <linux/ipv6.h>
37#include <linux/icmpv6.h> 37#include <linux/icmpv6.h>
38#include <linux/init.h> 38#include <linux/init.h>
39#include <linux/skbuff.h>
39#include <asm/uaccess.h> 40#include <asm/uaccess.h>
40 41
41#include <net/sock.h> 42#include <net/sock.h>
@@ -300,20 +301,7 @@ out:
300 return err; 301 return err;
301 302
302csum_copy_err: 303csum_copy_err:
303 /* Clear queue. */ 304 skb_kill_datagram(sk, skb, flags);
304 if (flags&MSG_PEEK) {
305 int clear = 0;
306 spin_lock_bh(&sk->sk_receive_queue.lock);
307 if (skb == skb_peek(&sk->sk_receive_queue)) {
308 __skb_unlink(skb, &sk->sk_receive_queue);
309 clear = 1;
310 }
311 spin_unlock_bh(&sk->sk_receive_queue.lock);
312 if (clear)
313 kfree_skb(skb);
314 }
315
316 skb_free_datagram(sk, skb);
317 305
318 if (flags & MSG_DONTWAIT) { 306 if (flags & MSG_DONTWAIT) {
319 UDP6_INC_STATS_USER(UDP_MIB_INERRORS); 307 UDP6_INC_STATS_USER(UDP_MIB_INERRORS);
@@ -447,7 +435,7 @@ out:
447 read_unlock(&udp_hash_lock); 435 read_unlock(&udp_hash_lock);
448} 436}
449 437
450static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) 438static int udpv6_rcv(struct sk_buff **pskb)
451{ 439{
452 struct sk_buff *skb = *pskb; 440 struct sk_buff *skb = *pskb;
453 struct sock *sk; 441 struct sock *sk;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 28c29d78338e..1ca2da68ef69 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -11,6 +11,8 @@
11 11
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/string.h> 13#include <linux/string.h>
14#include <linux/netfilter.h>
15#include <linux/netfilter_ipv6.h>
14#include <net/dsfield.h> 16#include <net/dsfield.h>
15#include <net/inet_ecn.h> 17#include <net/inet_ecn.h>
16#include <net/ip.h> 18#include <net/ip.h>
@@ -26,7 +28,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
26 IP6_ECN_set_ce(inner_iph); 28 IP6_ECN_set_ce(inner_iph);
27} 29}
28 30
29int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi) 31int xfrm6_rcv_spi(struct sk_buff **pskb, u32 spi)
30{ 32{
31 struct sk_buff *skb = *pskb; 33 struct sk_buff *skb = *pskb;
32 int err; 34 int err;
@@ -38,7 +40,7 @@ int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi)
38 int nexthdr; 40 int nexthdr;
39 unsigned int nhoff; 41 unsigned int nhoff;
40 42
41 nhoff = *nhoffp; 43 nhoff = IP6CB(skb)->nhoff;
42 nexthdr = skb->nh.raw[nhoff]; 44 nexthdr = skb->nh.raw[nhoff];
43 45
44 seq = 0; 46 seq = 0;
@@ -121,6 +123,8 @@ int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi)
121 skb->sp->len += xfrm_nr; 123 skb->sp->len += xfrm_nr;
122 skb->ip_summed = CHECKSUM_NONE; 124 skb->ip_summed = CHECKSUM_NONE;
123 125
126 nf_reset(skb);
127
124 if (decaps) { 128 if (decaps) {
125 if (!(skb->dev->flags&IFF_LOOPBACK)) { 129 if (!(skb->dev->flags&IFF_LOOPBACK)) {
126 dst_release(skb->dst); 130 dst_release(skb->dst);
@@ -129,7 +133,16 @@ int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi)
129 netif_rx(skb); 133 netif_rx(skb);
130 return -1; 134 return -1;
131 } else { 135 } else {
136#ifdef CONFIG_NETFILTER
137 skb->nh.ipv6h->payload_len = htons(skb->len);
138 __skb_push(skb, skb->data - skb->nh.raw);
139
140 NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL,
141 ip6_rcv_finish);
142 return -1;
143#else
132 return 1; 144 return 1;
145#endif
133 } 146 }
134 147
135drop_unlock: 148drop_unlock:
@@ -144,7 +157,7 @@ drop:
144 157
145EXPORT_SYMBOL(xfrm6_rcv_spi); 158EXPORT_SYMBOL(xfrm6_rcv_spi);
146 159
147int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) 160int xfrm6_rcv(struct sk_buff **pskb)
148{ 161{
149 return xfrm6_rcv_spi(pskb, nhoffp, 0); 162 return xfrm6_rcv_spi(pskb, 0);
150} 163}
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 6b9867717d11..80242172a5df 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -9,9 +9,11 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#include <linux/compiler.h>
12#include <linux/skbuff.h> 13#include <linux/skbuff.h>
13#include <linux/spinlock.h> 14#include <linux/spinlock.h>
14#include <linux/icmpv6.h> 15#include <linux/icmpv6.h>
16#include <linux/netfilter_ipv6.h>
15#include <net/dsfield.h> 17#include <net/dsfield.h>
16#include <net/inet_ecn.h> 18#include <net/inet_ecn.h>
17#include <net/ipv6.h> 19#include <net/ipv6.h>
@@ -92,7 +94,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
92 return ret; 94 return ret;
93} 95}
94 96
95int xfrm6_output(struct sk_buff *skb) 97static int xfrm6_output_one(struct sk_buff *skb)
96{ 98{
97 struct dst_entry *dst = skb->dst; 99 struct dst_entry *dst = skb->dst;
98 struct xfrm_state *x = dst->xfrm; 100 struct xfrm_state *x = dst->xfrm;
@@ -110,29 +112,35 @@ int xfrm6_output(struct sk_buff *skb)
110 goto error_nolock; 112 goto error_nolock;
111 } 113 }
112 114
113 spin_lock_bh(&x->lock); 115 do {
114 err = xfrm_state_check(x, skb); 116 spin_lock_bh(&x->lock);
115 if (err) 117 err = xfrm_state_check(x, skb);
116 goto error; 118 if (err)
119 goto error;
117 120
118 xfrm6_encap(skb); 121 xfrm6_encap(skb);
119 122
120 err = x->type->output(x, skb); 123 err = x->type->output(x, skb);
121 if (err) 124 if (err)
122 goto error; 125 goto error;
123 126
124 x->curlft.bytes += skb->len; 127 x->curlft.bytes += skb->len;
125 x->curlft.packets++; 128 x->curlft.packets++;
126 129
127 spin_unlock_bh(&x->lock); 130 spin_unlock_bh(&x->lock);
128 131
129 skb->nh.raw = skb->data; 132 skb->nh.raw = skb->data;
130 133
131 if (!(skb->dst = dst_pop(dst))) { 134 if (!(skb->dst = dst_pop(dst))) {
132 err = -EHOSTUNREACH; 135 err = -EHOSTUNREACH;
133 goto error_nolock; 136 goto error_nolock;
134 } 137 }
135 err = NET_XMIT_BYPASS; 138 dst = skb->dst;
139 x = dst->xfrm;
140 } while (x && !x->props.mode);
141
142 IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
143 err = 0;
136 144
137out_exit: 145out_exit:
138 return err; 146 return err;
@@ -142,3 +150,33 @@ error_nolock:
142 kfree_skb(skb); 150 kfree_skb(skb);
143 goto out_exit; 151 goto out_exit;
144} 152}
153
154static int xfrm6_output_finish(struct sk_buff *skb)
155{
156 int err;
157
158 while (likely((err = xfrm6_output_one(skb)) == 0)) {
159 nf_reset(skb);
160
161 err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, &skb, NULL,
162 skb->dst->dev, dst_output);
163 if (unlikely(err != 1))
164 break;
165
166 if (!skb->dst->xfrm)
167 return dst_output(skb);
168
169 err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL,
170 skb->dst->dev, xfrm6_output_finish);
171 if (unlikely(err != 1))
172 break;
173 }
174
175 return err;
176}
177
178int xfrm6_output(struct sk_buff *skb)
179{
180 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev,
181 xfrm6_output_finish);
182}
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index fbef7826a74f..da09ff258648 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -397,7 +397,7 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler)
397 397
398EXPORT_SYMBOL(xfrm6_tunnel_deregister); 398EXPORT_SYMBOL(xfrm6_tunnel_deregister);
399 399
400static int xfrm6_tunnel_rcv(struct sk_buff **pskb, unsigned int *nhoffp) 400static int xfrm6_tunnel_rcv(struct sk_buff **pskb)
401{ 401{
402 struct sk_buff *skb = *pskb; 402 struct sk_buff *skb = *pskb;
403 struct xfrm6_tunnel *handler = xfrm6_tunnel_handler; 403 struct xfrm6_tunnel *handler = xfrm6_tunnel_handler;
@@ -405,11 +405,11 @@ static int xfrm6_tunnel_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
405 u32 spi; 405 u32 spi;
406 406
407 /* device-like_ip6ip6_handler() */ 407 /* device-like_ip6ip6_handler() */
408 if (handler && handler->handler(pskb, nhoffp) == 0) 408 if (handler && handler->handler(pskb) == 0)
409 return 0; 409 return 0;
410 410
411 spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr); 411 spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr);
412 return xfrm6_rcv_spi(pskb, nhoffp, spi); 412 return xfrm6_rcv_spi(pskb, spi);
413} 413}
414 414
415static void xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 415static void xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 34b3bb868409..0fb513a34d11 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -29,6 +29,7 @@
29 */ 29 */
30 30
31#include <linux/config.h> 31#include <linux/config.h>
32#include <linux/capability.h>
32#include <linux/errno.h> 33#include <linux/errno.h>
33#include <linux/if_arp.h> 34#include <linux/if_arp.h>
34#include <linux/if_ether.h> 35#include <linux/if_ether.h>
@@ -75,7 +76,7 @@ static struct datalink_proto *pEII_datalink;
75static struct datalink_proto *p8023_datalink; 76static struct datalink_proto *p8023_datalink;
76static struct datalink_proto *pSNAP_datalink; 77static struct datalink_proto *pSNAP_datalink;
77 78
78static struct proto_ops ipx_dgram_ops; 79static const struct proto_ops ipx_dgram_ops;
79 80
80LIST_HEAD(ipx_interfaces); 81LIST_HEAD(ipx_interfaces);
81DEFINE_SPINLOCK(ipx_interfaces_lock); 82DEFINE_SPINLOCK(ipx_interfaces_lock);
@@ -1884,7 +1885,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1884 rc = -EINVAL; 1885 rc = -EINVAL;
1885 break; 1886 break;
1886 default: 1887 default:
1887 rc = dev_ioctl(cmd, argp); 1888 rc = -ENOIOCTLCMD;
1888 break; 1889 break;
1889 } 1890 }
1890 1891
@@ -1901,7 +1902,7 @@ static struct net_proto_family ipx_family_ops = {
1901 .owner = THIS_MODULE, 1902 .owner = THIS_MODULE,
1902}; 1903};
1903 1904
1904static struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = { 1905static const struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = {
1905 .family = PF_IPX, 1906 .family = PF_IPX,
1906 .owner = THIS_MODULE, 1907 .owner = THIS_MODULE,
1907 .release = ipx_release, 1908 .release = ipx_release,
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 6f92f9c62990..759445648667 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -43,6 +43,7 @@
43 ********************************************************************/ 43 ********************************************************************/
44 44
45#include <linux/config.h> 45#include <linux/config.h>
46#include <linux/capability.h>
46#include <linux/module.h> 47#include <linux/module.h>
47#include <linux/types.h> 48#include <linux/types.h>
48#include <linux/socket.h> 49#include <linux/socket.h>
@@ -62,12 +63,12 @@
62 63
63static int irda_create(struct socket *sock, int protocol); 64static int irda_create(struct socket *sock, int protocol);
64 65
65static struct proto_ops irda_stream_ops; 66static const struct proto_ops irda_stream_ops;
66static struct proto_ops irda_seqpacket_ops; 67static const struct proto_ops irda_seqpacket_ops;
67static struct proto_ops irda_dgram_ops; 68static const struct proto_ops irda_dgram_ops;
68 69
69#ifdef CONFIG_IRDA_ULTRA 70#ifdef CONFIG_IRDA_ULTRA
70static struct proto_ops irda_ultra_ops; 71static const struct proto_ops irda_ultra_ops;
71#define ULTRA_MAX_DATA 382 72#define ULTRA_MAX_DATA 382
72#endif /* CONFIG_IRDA_ULTRA */ 73#endif /* CONFIG_IRDA_ULTRA */
73 74
@@ -1438,8 +1439,9 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
1438 /* 1439 /*
1439 * POSIX 1003.1g mandates this order. 1440 * POSIX 1003.1g mandates this order.
1440 */ 1441 */
1441 if (sk->sk_err) 1442 ret = sock_error(sk);
1442 ret = sock_error(sk); 1443 if (ret)
1444 break;
1443 else if (sk->sk_shutdown & RCV_SHUTDOWN) 1445 else if (sk->sk_shutdown & RCV_SHUTDOWN)
1444 ; 1446 ;
1445 else if (noblock) 1447 else if (noblock)
@@ -1821,7 +1823,7 @@ static int irda_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1821 return -EINVAL; 1823 return -EINVAL;
1822 default: 1824 default:
1823 IRDA_DEBUG(1, "%s(), doing device ioctl!\n", __FUNCTION__); 1825 IRDA_DEBUG(1, "%s(), doing device ioctl!\n", __FUNCTION__);
1824 return dev_ioctl(cmd, (void __user *) arg); 1826 return -ENOIOCTLCMD;
1825 } 1827 }
1826 1828
1827 /*NOTREACHED*/ 1829 /*NOTREACHED*/
@@ -2463,7 +2465,7 @@ static struct net_proto_family irda_family_ops = {
2463 .owner = THIS_MODULE, 2465 .owner = THIS_MODULE,
2464}; 2466};
2465 2467
2466static struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = { 2468static const struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = {
2467 .family = PF_IRDA, 2469 .family = PF_IRDA,
2468 .owner = THIS_MODULE, 2470 .owner = THIS_MODULE,
2469 .release = irda_release, 2471 .release = irda_release,
@@ -2484,7 +2486,7 @@ static struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = {
2484 .sendpage = sock_no_sendpage, 2486 .sendpage = sock_no_sendpage,
2485}; 2487};
2486 2488
2487static struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = { 2489static const struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = {
2488 .family = PF_IRDA, 2490 .family = PF_IRDA,
2489 .owner = THIS_MODULE, 2491 .owner = THIS_MODULE,
2490 .release = irda_release, 2492 .release = irda_release,
@@ -2505,7 +2507,7 @@ static struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = {
2505 .sendpage = sock_no_sendpage, 2507 .sendpage = sock_no_sendpage,
2506}; 2508};
2507 2509
2508static struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = { 2510static const struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = {
2509 .family = PF_IRDA, 2511 .family = PF_IRDA,
2510 .owner = THIS_MODULE, 2512 .owner = THIS_MODULE,
2511 .release = irda_release, 2513 .release = irda_release,
@@ -2527,7 +2529,7 @@ static struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = {
2527}; 2529};
2528 2530
2529#ifdef CONFIG_IRDA_ULTRA 2531#ifdef CONFIG_IRDA_ULTRA
2530static struct proto_ops SOCKOPS_WRAPPED(irda_ultra_ops) = { 2532static const struct proto_ops SOCKOPS_WRAPPED(irda_ultra_ops) = {
2531 .family = PF_IRDA, 2533 .family = PF_IRDA,
2532 .owner = THIS_MODULE, 2534 .owner = THIS_MODULE,
2533 .release = irda_release, 2535 .release = irda_release,
diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c
index 70543d89438b..890bac0d4a56 100644
--- a/net/irda/irda_device.c
+++ b/net/irda/irda_device.c
@@ -33,6 +33,7 @@
33#include <linux/string.h> 33#include <linux/string.h>
34#include <linux/proc_fs.h> 34#include <linux/proc_fs.h>
35#include <linux/skbuff.h> 35#include <linux/skbuff.h>
36#include <linux/capability.h>
36#include <linux/if.h> 37#include <linux/if.h>
37#include <linux/if_ether.h> 38#include <linux/if_ether.h>
38#include <linux/if_arp.h> 39#include <linux/if_arp.h>
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index b8bb78af8b8a..254f90746900 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -364,7 +364,7 @@ static void iriap_disconnect_request(struct iriap_cb *self)
364/* 364/*
365 * Function iriap_getvaluebyclass (addr, name, attr) 365 * Function iriap_getvaluebyclass (addr, name, attr)
366 * 366 *
367 * Retreive all values from attribute in all objects with given class 367 * Retrieve all values from attribute in all objects with given class
368 * name 368 * name
369 */ 369 */
370int iriap_getvaluebyclass_request(struct iriap_cb *self, 370int iriap_getvaluebyclass_request(struct iriap_cb *self,
diff --git a/net/irda/irias_object.c b/net/irda/irias_object.c
index 75f2666e8630..c6d169fbdceb 100644
--- a/net/irda/irias_object.c
+++ b/net/irda/irias_object.c
@@ -82,8 +82,7 @@ struct ias_object *irias_new_object( char *name, int id)
82 82
83 IRDA_DEBUG( 4, "%s()\n", __FUNCTION__); 83 IRDA_DEBUG( 4, "%s()\n", __FUNCTION__);
84 84
85 obj = (struct ias_object *) kmalloc(sizeof(struct ias_object), 85 obj = kmalloc(sizeof(struct ias_object), GFP_ATOMIC);
86 GFP_ATOMIC);
87 if (obj == NULL) { 86 if (obj == NULL) {
88 IRDA_WARNING("%s(), Unable to allocate object!\n", 87 IRDA_WARNING("%s(), Unable to allocate object!\n",
89 __FUNCTION__); 88 __FUNCTION__);
@@ -348,8 +347,7 @@ void irias_add_integer_attrib(struct ias_object *obj, char *name, int value,
348 IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;); 347 IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;);
349 IRDA_ASSERT(name != NULL, return;); 348 IRDA_ASSERT(name != NULL, return;);
350 349
351 attrib = (struct ias_attrib *) kmalloc(sizeof(struct ias_attrib), 350 attrib = kmalloc(sizeof(struct ias_attrib), GFP_ATOMIC);
352 GFP_ATOMIC);
353 if (attrib == NULL) { 351 if (attrib == NULL) {
354 IRDA_WARNING("%s: Unable to allocate attribute!\n", 352 IRDA_WARNING("%s: Unable to allocate attribute!\n",
355 __FUNCTION__); 353 __FUNCTION__);
@@ -385,8 +383,7 @@ void irias_add_octseq_attrib(struct ias_object *obj, char *name, __u8 *octets,
385 IRDA_ASSERT(name != NULL, return;); 383 IRDA_ASSERT(name != NULL, return;);
386 IRDA_ASSERT(octets != NULL, return;); 384 IRDA_ASSERT(octets != NULL, return;);
387 385
388 attrib = (struct ias_attrib *) kmalloc(sizeof(struct ias_attrib), 386 attrib = kmalloc(sizeof(struct ias_attrib), GFP_ATOMIC);
389 GFP_ATOMIC);
390 if (attrib == NULL) { 387 if (attrib == NULL) {
391 IRDA_WARNING("%s: Unable to allocate attribute!\n", 388 IRDA_WARNING("%s: Unable to allocate attribute!\n",
392 __FUNCTION__); 389 __FUNCTION__);
@@ -420,8 +417,7 @@ void irias_add_string_attrib(struct ias_object *obj, char *name, char *value,
420 IRDA_ASSERT(name != NULL, return;); 417 IRDA_ASSERT(name != NULL, return;);
421 IRDA_ASSERT(value != NULL, return;); 418 IRDA_ASSERT(value != NULL, return;);
422 419
423 attrib = (struct ias_attrib *) kmalloc(sizeof( struct ias_attrib), 420 attrib = kmalloc(sizeof( struct ias_attrib), GFP_ATOMIC);
424 GFP_ATOMIC);
425 if (attrib == NULL) { 421 if (attrib == NULL) {
426 IRDA_WARNING("%s: Unable to allocate attribute!\n", 422 IRDA_WARNING("%s: Unable to allocate attribute!\n",
427 __FUNCTION__); 423 __FUNCTION__);
diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h
index b391cb3893d4..e4fe1e80029c 100644
--- a/net/irda/irnet/irnet.h
+++ b/net/irda/irnet/irnet.h
@@ -248,6 +248,7 @@
248#include <linux/netdevice.h> 248#include <linux/netdevice.h>
249#include <linux/miscdevice.h> 249#include <linux/miscdevice.h>
250#include <linux/poll.h> 250#include <linux/poll.h>
251#include <linux/capability.h>
251#include <linux/config.h> 252#include <linux/config.h>
252#include <linux/ctype.h> /* isspace() */ 253#include <linux/ctype.h> /* isspace() */
253#include <asm/uaccess.h> 254#include <asm/uaccess.h>
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 39031684b65c..43f1ce74187d 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -15,6 +15,7 @@
15 */ 15 */
16 16
17#include <linux/config.h> 17#include <linux/config.h>
18#include <linux/capability.h>
18#include <linux/module.h> 19#include <linux/module.h>
19#include <linux/kernel.h> 20#include <linux/kernel.h>
20#include <linux/socket.h> 21#include <linux/socket.h>
@@ -113,7 +114,7 @@ static __inline__ void pfkey_unlock_table(void)
113} 114}
114 115
115 116
116static struct proto_ops pfkey_ops; 117static const struct proto_ops pfkey_ops;
117 118
118static void pfkey_insert(struct sock *sk) 119static void pfkey_insert(struct sock *sk)
119{ 120{
@@ -297,8 +298,7 @@ static int pfkey_error(struct sadb_msg *orig, int err, struct sock *sk)
297 err = EINTR; 298 err = EINTR;
298 if (err >= 512) 299 if (err >= 512)
299 err = EINVAL; 300 err = EINVAL;
300 if (err <= 0 || err >= 256) 301 BUG_ON(err <= 0 || err >= 256);
301 BUG();
302 302
303 hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg)); 303 hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
304 pfkey_hdr_dup(hdr, orig); 304 pfkey_hdr_dup(hdr, orig);
@@ -336,6 +336,7 @@ static u8 sadb_ext_min_len[] = {
336 [SADB_X_EXT_NAT_T_SPORT] = (u8) sizeof(struct sadb_x_nat_t_port), 336 [SADB_X_EXT_NAT_T_SPORT] = (u8) sizeof(struct sadb_x_nat_t_port),
337 [SADB_X_EXT_NAT_T_DPORT] = (u8) sizeof(struct sadb_x_nat_t_port), 337 [SADB_X_EXT_NAT_T_DPORT] = (u8) sizeof(struct sadb_x_nat_t_port),
338 [SADB_X_EXT_NAT_T_OA] = (u8) sizeof(struct sadb_address), 338 [SADB_X_EXT_NAT_T_OA] = (u8) sizeof(struct sadb_address),
339 [SADB_X_EXT_SEC_CTX] = (u8) sizeof(struct sadb_x_sec_ctx),
339}; 340};
340 341
341/* Verify sadb_address_{len,prefixlen} against sa_family. */ 342/* Verify sadb_address_{len,prefixlen} against sa_family. */
@@ -383,6 +384,55 @@ static int verify_address_len(void *p)
383 return 0; 384 return 0;
384} 385}
385 386
387static inline int pfkey_sec_ctx_len(struct sadb_x_sec_ctx *sec_ctx)
388{
389 int len = 0;
390
391 len += sizeof(struct sadb_x_sec_ctx);
392 len += sec_ctx->sadb_x_ctx_len;
393 len += sizeof(uint64_t) - 1;
394 len /= sizeof(uint64_t);
395
396 return len;
397}
398
399static inline int verify_sec_ctx_len(void *p)
400{
401 struct sadb_x_sec_ctx *sec_ctx = (struct sadb_x_sec_ctx *)p;
402 int len;
403
404 if (sec_ctx->sadb_x_ctx_len > PAGE_SIZE)
405 return -EINVAL;
406
407 len = pfkey_sec_ctx_len(sec_ctx);
408
409 if (sec_ctx->sadb_x_sec_len != len)
410 return -EINVAL;
411
412 return 0;
413}
414
415static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(struct sadb_x_sec_ctx *sec_ctx)
416{
417 struct xfrm_user_sec_ctx *uctx = NULL;
418 int ctx_size = sec_ctx->sadb_x_ctx_len;
419
420 uctx = kmalloc((sizeof(*uctx)+ctx_size), GFP_KERNEL);
421
422 if (!uctx)
423 return NULL;
424
425 uctx->len = pfkey_sec_ctx_len(sec_ctx);
426 uctx->exttype = sec_ctx->sadb_x_sec_exttype;
427 uctx->ctx_doi = sec_ctx->sadb_x_ctx_doi;
428 uctx->ctx_alg = sec_ctx->sadb_x_ctx_alg;
429 uctx->ctx_len = sec_ctx->sadb_x_ctx_len;
430 memcpy(uctx + 1, sec_ctx + 1,
431 uctx->ctx_len);
432
433 return uctx;
434}
435
386static int present_and_same_family(struct sadb_address *src, 436static int present_and_same_family(struct sadb_address *src,
387 struct sadb_address *dst) 437 struct sadb_address *dst)
388{ 438{
@@ -438,6 +488,10 @@ static int parse_exthdrs(struct sk_buff *skb, struct sadb_msg *hdr, void **ext_h
438 if (verify_address_len(p)) 488 if (verify_address_len(p))
439 return -EINVAL; 489 return -EINVAL;
440 } 490 }
491 if (ext_type == SADB_X_EXT_SEC_CTX) {
492 if (verify_sec_ctx_len(p))
493 return -EINVAL;
494 }
441 ext_hdrs[ext_type-1] = p; 495 ext_hdrs[ext_type-1] = p;
442 } 496 }
443 p += ext_len; 497 p += ext_len;
@@ -586,6 +640,9 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys,
586 struct sadb_key *key; 640 struct sadb_key *key;
587 struct sadb_x_sa2 *sa2; 641 struct sadb_x_sa2 *sa2;
588 struct sockaddr_in *sin; 642 struct sockaddr_in *sin;
643 struct sadb_x_sec_ctx *sec_ctx;
644 struct xfrm_sec_ctx *xfrm_ctx;
645 int ctx_size = 0;
589#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 646#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
590 struct sockaddr_in6 *sin6; 647 struct sockaddr_in6 *sin6;
591#endif 648#endif
@@ -609,6 +666,12 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys,
609 sizeof(struct sadb_address)*2 + 666 sizeof(struct sadb_address)*2 +
610 sockaddr_size*2 + 667 sockaddr_size*2 +
611 sizeof(struct sadb_x_sa2); 668 sizeof(struct sadb_x_sa2);
669
670 if ((xfrm_ctx = x->security)) {
671 ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len);
672 size += sizeof(struct sadb_x_sec_ctx) + ctx_size;
673 }
674
612 /* identity & sensitivity */ 675 /* identity & sensitivity */
613 676
614 if ((x->props.family == AF_INET && 677 if ((x->props.family == AF_INET &&
@@ -899,6 +962,20 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys,
899 n_port->sadb_x_nat_t_port_reserved = 0; 962 n_port->sadb_x_nat_t_port_reserved = 0;
900 } 963 }
901 964
965 /* security context */
966 if (xfrm_ctx) {
967 sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb,
968 sizeof(struct sadb_x_sec_ctx) + ctx_size);
969 sec_ctx->sadb_x_sec_len =
970 (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t);
971 sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX;
972 sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi;
973 sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg;
974 sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len;
975 memcpy(sec_ctx + 1, xfrm_ctx->ctx_str,
976 xfrm_ctx->ctx_len);
977 }
978
902 return skb; 979 return skb;
903} 980}
904 981
@@ -909,6 +986,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr,
909 struct sadb_lifetime *lifetime; 986 struct sadb_lifetime *lifetime;
910 struct sadb_sa *sa; 987 struct sadb_sa *sa;
911 struct sadb_key *key; 988 struct sadb_key *key;
989 struct sadb_x_sec_ctx *sec_ctx;
912 uint16_t proto; 990 uint16_t proto;
913 int err; 991 int err;
914 992
@@ -993,6 +1071,21 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr,
993 x->lft.soft_add_expires_seconds = lifetime->sadb_lifetime_addtime; 1071 x->lft.soft_add_expires_seconds = lifetime->sadb_lifetime_addtime;
994 x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime; 1072 x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime;
995 } 1073 }
1074
1075 sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1];
1076 if (sec_ctx != NULL) {
1077 struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
1078
1079 if (!uctx)
1080 goto out;
1081
1082 err = security_xfrm_state_alloc(x, uctx);
1083 kfree(uctx);
1084
1085 if (err)
1086 goto out;
1087 }
1088
996 key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1]; 1089 key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1];
997 if (sa->sadb_sa_auth) { 1090 if (sa->sadb_sa_auth) {
998 int keysize = 0; 1091 int keysize = 0;
@@ -1720,6 +1813,18 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol)
1720 return 0; 1813 return 0;
1721} 1814}
1722 1815
1816static inline int pfkey_xfrm_policy2sec_ctx_size(struct xfrm_policy *xp)
1817{
1818 struct xfrm_sec_ctx *xfrm_ctx = xp->security;
1819
1820 if (xfrm_ctx) {
1821 int len = sizeof(struct sadb_x_sec_ctx);
1822 len += xfrm_ctx->ctx_len;
1823 return PFKEY_ALIGN8(len);
1824 }
1825 return 0;
1826}
1827
1723static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp) 1828static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp)
1724{ 1829{
1725 int sockaddr_size = pfkey_sockaddr_size(xp->family); 1830 int sockaddr_size = pfkey_sockaddr_size(xp->family);
@@ -1733,7 +1838,8 @@ static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp)
1733 (sockaddr_size * 2) + 1838 (sockaddr_size * 2) +
1734 sizeof(struct sadb_x_policy) + 1839 sizeof(struct sadb_x_policy) +
1735 (xp->xfrm_nr * (sizeof(struct sadb_x_ipsecrequest) + 1840 (xp->xfrm_nr * (sizeof(struct sadb_x_ipsecrequest) +
1736 (socklen * 2))); 1841 (socklen * 2))) +
1842 pfkey_xfrm_policy2sec_ctx_size(xp);
1737} 1843}
1738 1844
1739static struct sk_buff * pfkey_xfrm_policy2msg_prep(struct xfrm_policy *xp) 1845static struct sk_buff * pfkey_xfrm_policy2msg_prep(struct xfrm_policy *xp)
@@ -1757,6 +1863,8 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
1757 struct sadb_lifetime *lifetime; 1863 struct sadb_lifetime *lifetime;
1758 struct sadb_x_policy *pol; 1864 struct sadb_x_policy *pol;
1759 struct sockaddr_in *sin; 1865 struct sockaddr_in *sin;
1866 struct sadb_x_sec_ctx *sec_ctx;
1867 struct xfrm_sec_ctx *xfrm_ctx;
1760#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 1868#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1761 struct sockaddr_in6 *sin6; 1869 struct sockaddr_in6 *sin6;
1762#endif 1870#endif
@@ -1941,6 +2049,21 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
1941 } 2049 }
1942 } 2050 }
1943 } 2051 }
2052
2053 /* security context */
2054 if ((xfrm_ctx = xp->security)) {
2055 int ctx_size = pfkey_xfrm_policy2sec_ctx_size(xp);
2056
2057 sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb, ctx_size);
2058 sec_ctx->sadb_x_sec_len = ctx_size / sizeof(uint64_t);
2059 sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX;
2060 sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi;
2061 sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg;
2062 sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len;
2063 memcpy(sec_ctx + 1, xfrm_ctx->ctx_str,
2064 xfrm_ctx->ctx_len);
2065 }
2066
1944 hdr->sadb_msg_len = size / sizeof(uint64_t); 2067 hdr->sadb_msg_len = size / sizeof(uint64_t);
1945 hdr->sadb_msg_reserved = atomic_read(&xp->refcnt); 2068 hdr->sadb_msg_reserved = atomic_read(&xp->refcnt);
1946} 2069}
@@ -1976,12 +2099,13 @@ out:
1976 2099
1977static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 2100static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
1978{ 2101{
1979 int err; 2102 int err = 0;
1980 struct sadb_lifetime *lifetime; 2103 struct sadb_lifetime *lifetime;
1981 struct sadb_address *sa; 2104 struct sadb_address *sa;
1982 struct sadb_x_policy *pol; 2105 struct sadb_x_policy *pol;
1983 struct xfrm_policy *xp; 2106 struct xfrm_policy *xp;
1984 struct km_event c; 2107 struct km_event c;
2108 struct sadb_x_sec_ctx *sec_ctx;
1985 2109
1986 if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 2110 if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
1987 ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || 2111 ext_hdrs[SADB_EXT_ADDRESS_DST-1]) ||
@@ -2028,6 +2152,22 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
2028 if (xp->selector.dport) 2152 if (xp->selector.dport)
2029 xp->selector.dport_mask = ~0; 2153 xp->selector.dport_mask = ~0;
2030 2154
2155 sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1];
2156 if (sec_ctx != NULL) {
2157 struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
2158
2159 if (!uctx) {
2160 err = -ENOBUFS;
2161 goto out;
2162 }
2163
2164 err = security_xfrm_policy_alloc(xp, uctx);
2165 kfree(uctx);
2166
2167 if (err)
2168 goto out;
2169 }
2170
2031 xp->lft.soft_byte_limit = XFRM_INF; 2171 xp->lft.soft_byte_limit = XFRM_INF;
2032 xp->lft.hard_byte_limit = XFRM_INF; 2172 xp->lft.hard_byte_limit = XFRM_INF;
2033 xp->lft.soft_packet_limit = XFRM_INF; 2173 xp->lft.soft_packet_limit = XFRM_INF;
@@ -2051,10 +2191,9 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
2051 2191
2052 err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp, 2192 err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp,
2053 hdr->sadb_msg_type != SADB_X_SPDUPDATE); 2193 hdr->sadb_msg_type != SADB_X_SPDUPDATE);
2054 if (err) { 2194
2055 kfree(xp); 2195 if (err)
2056 return err; 2196 goto out;
2057 }
2058 2197
2059 if (hdr->sadb_msg_type == SADB_X_SPDUPDATE) 2198 if (hdr->sadb_msg_type == SADB_X_SPDUPDATE)
2060 c.event = XFRM_MSG_UPDPOLICY; 2199 c.event = XFRM_MSG_UPDPOLICY;
@@ -2069,6 +2208,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
2069 return 0; 2208 return 0;
2070 2209
2071out: 2210out:
2211 security_xfrm_policy_free(xp);
2072 kfree(xp); 2212 kfree(xp);
2073 return err; 2213 return err;
2074} 2214}
@@ -2078,9 +2218,10 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
2078 int err; 2218 int err;
2079 struct sadb_address *sa; 2219 struct sadb_address *sa;
2080 struct sadb_x_policy *pol; 2220 struct sadb_x_policy *pol;
2081 struct xfrm_policy *xp; 2221 struct xfrm_policy *xp, tmp;
2082 struct xfrm_selector sel; 2222 struct xfrm_selector sel;
2083 struct km_event c; 2223 struct km_event c;
2224 struct sadb_x_sec_ctx *sec_ctx;
2084 2225
2085 if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 2226 if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
2086 ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || 2227 ext_hdrs[SADB_EXT_ADDRESS_DST-1]) ||
@@ -2109,7 +2250,24 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
2109 if (sel.dport) 2250 if (sel.dport)
2110 sel.dport_mask = ~0; 2251 sel.dport_mask = ~0;
2111 2252
2112 xp = xfrm_policy_bysel(pol->sadb_x_policy_dir-1, &sel, 1); 2253 sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1];
2254 memset(&tmp, 0, sizeof(struct xfrm_policy));
2255
2256 if (sec_ctx != NULL) {
2257 struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
2258
2259 if (!uctx)
2260 return -ENOMEM;
2261
2262 err = security_xfrm_policy_alloc(&tmp, uctx);
2263 kfree(uctx);
2264
2265 if (err)
2266 return err;
2267 }
2268
2269 xp = xfrm_policy_bysel_ctx(pol->sadb_x_policy_dir-1, &sel, tmp.security, 1);
2270 security_xfrm_policy_free(&tmp);
2113 if (xp == NULL) 2271 if (xp == NULL)
2114 return -ENOENT; 2272 return -ENOENT;
2115 2273
@@ -2660,6 +2818,7 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
2660{ 2818{
2661 struct xfrm_policy *xp; 2819 struct xfrm_policy *xp;
2662 struct sadb_x_policy *pol = (struct sadb_x_policy*)data; 2820 struct sadb_x_policy *pol = (struct sadb_x_policy*)data;
2821 struct sadb_x_sec_ctx *sec_ctx;
2663 2822
2664 switch (family) { 2823 switch (family) {
2665 case AF_INET: 2824 case AF_INET:
@@ -2709,10 +2868,32 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
2709 (*dir = parse_ipsecrequests(xp, pol)) < 0) 2868 (*dir = parse_ipsecrequests(xp, pol)) < 0)
2710 goto out; 2869 goto out;
2711 2870
2871 /* security context too */
2872 if (len >= (pol->sadb_x_policy_len*8 +
2873 sizeof(struct sadb_x_sec_ctx))) {
2874 char *p = (char *)pol;
2875 struct xfrm_user_sec_ctx *uctx;
2876
2877 p += pol->sadb_x_policy_len*8;
2878 sec_ctx = (struct sadb_x_sec_ctx *)p;
2879 if (len < pol->sadb_x_policy_len*8 +
2880 sec_ctx->sadb_x_sec_len)
2881 goto out;
2882 if ((*dir = verify_sec_ctx_len(p)))
2883 goto out;
2884 uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
2885 *dir = security_xfrm_policy_alloc(xp, uctx);
2886 kfree(uctx);
2887
2888 if (*dir)
2889 goto out;
2890 }
2891
2712 *dir = pol->sadb_x_policy_dir-1; 2892 *dir = pol->sadb_x_policy_dir-1;
2713 return xp; 2893 return xp;
2714 2894
2715out: 2895out:
2896 security_xfrm_policy_free(xp);
2716 kfree(xp); 2897 kfree(xp);
2717 return NULL; 2898 return NULL;
2718} 2899}
@@ -2946,7 +3127,7 @@ out:
2946 return err; 3127 return err;
2947} 3128}
2948 3129
2949static struct proto_ops pfkey_ops = { 3130static const struct proto_ops pfkey_ops = {
2950 .family = PF_KEY, 3131 .family = PF_KEY,
2951 .owner = THIS_MODULE, 3132 .owner = THIS_MODULE,
2952 /* Operations that make no sense on pfkey sockets. */ 3133 /* Operations that make no sense on pfkey sockets. */
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index c3f0b0783453..8171c53bc0ed 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -36,7 +36,7 @@
36static u16 llc_ui_sap_last_autoport = LLC_SAP_DYN_START; 36static u16 llc_ui_sap_last_autoport = LLC_SAP_DYN_START;
37static u16 llc_ui_sap_link_no_max[256]; 37static u16 llc_ui_sap_link_no_max[256];
38static struct sockaddr_llc llc_ui_addrnull; 38static struct sockaddr_llc llc_ui_addrnull;
39static struct proto_ops llc_ui_ops; 39static const struct proto_ops llc_ui_ops;
40 40
41static int llc_ui_wait_for_conn(struct sock *sk, long timeout); 41static int llc_ui_wait_for_conn(struct sock *sk, long timeout);
42static int llc_ui_wait_for_disc(struct sock *sk, long timeout); 42static int llc_ui_wait_for_disc(struct sock *sk, long timeout);
@@ -566,10 +566,9 @@ static int llc_wait_data(struct sock *sk, long timeo)
566 /* 566 /*
567 * POSIX 1003.1g mandates this order. 567 * POSIX 1003.1g mandates this order.
568 */ 568 */
569 if (sk->sk_err) { 569 rc = sock_error(sk);
570 rc = sock_error(sk); 570 if (rc)
571 break; 571 break;
572 }
573 rc = 0; 572 rc = 0;
574 if (sk->sk_shutdown & RCV_SHUTDOWN) 573 if (sk->sk_shutdown & RCV_SHUTDOWN)
575 break; 574 break;
@@ -960,7 +959,7 @@ out:
960static int llc_ui_ioctl(struct socket *sock, unsigned int cmd, 959static int llc_ui_ioctl(struct socket *sock, unsigned int cmd,
961 unsigned long arg) 960 unsigned long arg)
962{ 961{
963 return dev_ioctl(cmd, (void __user *)arg); 962 return -ENOIOCTLCMD;
964} 963}
965 964
966/** 965/**
@@ -1099,7 +1098,7 @@ static struct net_proto_family llc_ui_family_ops = {
1099 .owner = THIS_MODULE, 1098 .owner = THIS_MODULE,
1100}; 1099};
1101 1100
1102static struct proto_ops llc_ui_ops = { 1101static const struct proto_ops llc_ui_ops = {
1103 .family = PF_LLC, 1102 .family = PF_LLC,
1104 .owner = THIS_MODULE, 1103 .owner = THIS_MODULE,
1105 .release = llc_ui_release, 1104 .release = llc_ui_release,
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 794c41d19b28..7d55f9cbd853 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -95,4 +95,11 @@ config NF_CONNTRACK_FTP
95 95
96 To compile it as a module, choose M here. If unsure, say N. 96 To compile it as a module, choose M here. If unsure, say N.
97 97
98config NF_CT_NETLINK
99 tristate 'Connection tracking netlink interface (EXPERIMENTAL)'
100 depends on EXPERIMENTAL && NF_CONNTRACK && NETFILTER_NETLINK
101 depends on NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
102 help
103 This option enables support for a netlink-based userspace interface
104
98endmenu 105endmenu
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 55f019ad2c08..cb2183145c37 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -13,3 +13,6 @@ obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o
13 13
14# SCTP protocol connection tracking 14# SCTP protocol connection tracking
15obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o 15obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
16
17# netlink interface for nf_conntrack
18obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index a7c7b490cf22..62bb509f05d4 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -82,6 +82,8 @@ unsigned int nf_ct_log_invalid;
82static LIST_HEAD(unconfirmed); 82static LIST_HEAD(unconfirmed);
83static int nf_conntrack_vmalloc; 83static int nf_conntrack_vmalloc;
84 84
85static unsigned int nf_conntrack_next_id = 1;
86static unsigned int nf_conntrack_expect_next_id = 1;
85#ifdef CONFIG_NF_CONNTRACK_EVENTS 87#ifdef CONFIG_NF_CONNTRACK_EVENTS
86struct notifier_block *nf_conntrack_chain; 88struct notifier_block *nf_conntrack_chain;
87struct notifier_block *nf_conntrack_expect_chain; 89struct notifier_block *nf_conntrack_expect_chain;
@@ -184,7 +186,7 @@ DECLARE_MUTEX(nf_ct_cache_mutex);
184 186
185extern struct nf_conntrack_protocol nf_conntrack_generic_protocol; 187extern struct nf_conntrack_protocol nf_conntrack_generic_protocol;
186struct nf_conntrack_protocol * 188struct nf_conntrack_protocol *
187nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol) 189__nf_ct_proto_find(u_int16_t l3proto, u_int8_t protocol)
188{ 190{
189 if (unlikely(nf_ct_protos[l3proto] == NULL)) 191 if (unlikely(nf_ct_protos[l3proto] == NULL))
190 return &nf_conntrack_generic_protocol; 192 return &nf_conntrack_generic_protocol;
@@ -192,6 +194,50 @@ nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol)
192 return nf_ct_protos[l3proto][protocol]; 194 return nf_ct_protos[l3proto][protocol];
193} 195}
194 196
197/* this is guaranteed to always return a valid protocol helper, since
198 * it falls back to generic_protocol */
199struct nf_conntrack_protocol *
200nf_ct_proto_find_get(u_int16_t l3proto, u_int8_t protocol)
201{
202 struct nf_conntrack_protocol *p;
203
204 preempt_disable();
205 p = __nf_ct_proto_find(l3proto, protocol);
206 if (p) {
207 if (!try_module_get(p->me))
208 p = &nf_conntrack_generic_protocol;
209 }
210 preempt_enable();
211
212 return p;
213}
214
215void nf_ct_proto_put(struct nf_conntrack_protocol *p)
216{
217 module_put(p->me);
218}
219
220struct nf_conntrack_l3proto *
221nf_ct_l3proto_find_get(u_int16_t l3proto)
222{
223 struct nf_conntrack_l3proto *p;
224
225 preempt_disable();
226 p = __nf_ct_l3proto_find(l3proto);
227 if (p) {
228 if (!try_module_get(p->me))
229 p = &nf_conntrack_generic_l3proto;
230 }
231 preempt_enable();
232
233 return p;
234}
235
236void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p)
237{
238 module_put(p->me);
239}
240
195static int nf_conntrack_hash_rnd_initted; 241static int nf_conntrack_hash_rnd_initted;
196static unsigned int nf_conntrack_hash_rnd; 242static unsigned int nf_conntrack_hash_rnd;
197 243
@@ -384,7 +430,7 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
384} 430}
385 431
386/* nf_conntrack_expect helper functions */ 432/* nf_conntrack_expect helper functions */
387static void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) 433void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
388{ 434{
389 ASSERT_WRITE_LOCK(&nf_conntrack_lock); 435 ASSERT_WRITE_LOCK(&nf_conntrack_lock);
390 NF_CT_ASSERT(!timer_pending(&exp->timeout)); 436 NF_CT_ASSERT(!timer_pending(&exp->timeout));
@@ -404,6 +450,33 @@ static void expectation_timed_out(unsigned long ul_expect)
404 nf_conntrack_expect_put(exp); 450 nf_conntrack_expect_put(exp);
405} 451}
406 452
453struct nf_conntrack_expect *
454__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
455{
456 struct nf_conntrack_expect *i;
457
458 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
459 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
460 atomic_inc(&i->use);
461 return i;
462 }
463 }
464 return NULL;
465}
466
467/* Just find a expectation corresponding to a tuple. */
468struct nf_conntrack_expect *
469nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
470{
471 struct nf_conntrack_expect *i;
472
473 read_lock_bh(&nf_conntrack_lock);
474 i = __nf_conntrack_expect_find(tuple);
475 read_unlock_bh(&nf_conntrack_lock);
476
477 return i;
478}
479
407/* If an expectation for this connection is found, it gets delete from 480/* If an expectation for this connection is found, it gets delete from
408 * global list then returned. */ 481 * global list then returned. */
409static struct nf_conntrack_expect * 482static struct nf_conntrack_expect *
@@ -432,7 +505,7 @@ find_expectation(const struct nf_conntrack_tuple *tuple)
432} 505}
433 506
434/* delete all expectations for this conntrack */ 507/* delete all expectations for this conntrack */
435static void remove_expectations(struct nf_conn *ct) 508void nf_ct_remove_expectations(struct nf_conn *ct)
436{ 509{
437 struct nf_conntrack_expect *i, *tmp; 510 struct nf_conntrack_expect *i, *tmp;
438 511
@@ -462,7 +535,7 @@ clean_from_lists(struct nf_conn *ct)
462 LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); 535 LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
463 536
464 /* Destroy all pending expectations */ 537 /* Destroy all pending expectations */
465 remove_expectations(ct); 538 nf_ct_remove_expectations(ct);
466} 539}
467 540
468static void 541static void
@@ -482,12 +555,11 @@ destroy_conntrack(struct nf_conntrack *nfct)
482 /* To make sure we don't get any weird locking issues here: 555 /* To make sure we don't get any weird locking issues here:
483 * destroy_conntrack() MUST NOT be called with a write lock 556 * destroy_conntrack() MUST NOT be called with a write lock
484 * to nf_conntrack_lock!!! -HW */ 557 * to nf_conntrack_lock!!! -HW */
485 l3proto = nf_ct_find_l3proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num); 558 l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num);
486 if (l3proto && l3proto->destroy) 559 if (l3proto && l3proto->destroy)
487 l3proto->destroy(ct); 560 l3proto->destroy(ct);
488 561
489 proto = nf_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, 562 proto = __nf_ct_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
490 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
491 if (proto && proto->destroy) 563 if (proto && proto->destroy)
492 proto->destroy(ct); 564 proto->destroy(ct);
493 565
@@ -499,7 +571,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
499 * except TFTP can create an expectation on the first packet, 571 * except TFTP can create an expectation on the first packet,
500 * before connection is in the list, so we need to clean here, 572 * before connection is in the list, so we need to clean here,
501 * too. */ 573 * too. */
502 remove_expectations(ct); 574 nf_ct_remove_expectations(ct);
503 575
504 /* We overload first tuple to link into unconfirmed list. */ 576 /* We overload first tuple to link into unconfirmed list. */
505 if (!nf_ct_is_confirmed(ct)) { 577 if (!nf_ct_is_confirmed(ct)) {
@@ -540,7 +612,7 @@ conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i,
540 && nf_ct_tuple_equal(tuple, &i->tuple); 612 && nf_ct_tuple_equal(tuple, &i->tuple);
541} 613}
542 614
543static struct nf_conntrack_tuple_hash * 615struct nf_conntrack_tuple_hash *
544__nf_conntrack_find(const struct nf_conntrack_tuple *tuple, 616__nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
545 const struct nf_conn *ignored_conntrack) 617 const struct nf_conn *ignored_conntrack)
546{ 618{
@@ -575,6 +647,29 @@ nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple,
575 return h; 647 return h;
576} 648}
577 649
650static void __nf_conntrack_hash_insert(struct nf_conn *ct,
651 unsigned int hash,
652 unsigned int repl_hash)
653{
654 ct->id = ++nf_conntrack_next_id;
655 list_prepend(&nf_conntrack_hash[hash],
656 &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
657 list_prepend(&nf_conntrack_hash[repl_hash],
658 &ct->tuplehash[IP_CT_DIR_REPLY].list);
659}
660
661void nf_conntrack_hash_insert(struct nf_conn *ct)
662{
663 unsigned int hash, repl_hash;
664
665 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
666 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
667
668 write_lock_bh(&nf_conntrack_lock);
669 __nf_conntrack_hash_insert(ct, hash, repl_hash);
670 write_unlock_bh(&nf_conntrack_lock);
671}
672
578/* Confirm a connection given skb; places it in hash table */ 673/* Confirm a connection given skb; places it in hash table */
579int 674int
580__nf_conntrack_confirm(struct sk_buff **pskb) 675__nf_conntrack_confirm(struct sk_buff **pskb)
@@ -621,10 +716,7 @@ __nf_conntrack_confirm(struct sk_buff **pskb)
621 /* Remove from unconfirmed list */ 716 /* Remove from unconfirmed list */
622 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); 717 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
623 718
624 list_prepend(&nf_conntrack_hash[hash], 719 __nf_conntrack_hash_insert(ct, hash, repl_hash);
625 &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
626 list_prepend(&nf_conntrack_hash[repl_hash],
627 &ct->tuplehash[IP_CT_DIR_REPLY]);
628 /* Timer relative to confirmation time, not original 720 /* Timer relative to confirmation time, not original
629 setting time, otherwise we'd get timer wrap in 721 setting time, otherwise we'd get timer wrap in
630 weird delay cases. */ 722 weird delay cases. */
@@ -708,13 +800,41 @@ static inline int helper_cmp(const struct nf_conntrack_helper *i,
708} 800}
709 801
710static struct nf_conntrack_helper * 802static struct nf_conntrack_helper *
711nf_ct_find_helper(const struct nf_conntrack_tuple *tuple) 803__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
712{ 804{
713 return LIST_FIND(&helpers, helper_cmp, 805 return LIST_FIND(&helpers, helper_cmp,
714 struct nf_conntrack_helper *, 806 struct nf_conntrack_helper *,
715 tuple); 807 tuple);
716} 808}
717 809
810struct nf_conntrack_helper *
811nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple)
812{
813 struct nf_conntrack_helper *helper;
814
815 /* need nf_conntrack_lock to assure that helper exists until
816 * try_module_get() is called */
817 read_lock_bh(&nf_conntrack_lock);
818
819 helper = __nf_ct_helper_find(tuple);
820 if (helper) {
821 /* need to increase module usage count to assure helper will
822 * not go away while the caller is e.g. busy putting a
823 * conntrack in the hash that uses the helper */
824 if (!try_module_get(helper->me))
825 helper = NULL;
826 }
827
828 read_unlock_bh(&nf_conntrack_lock);
829
830 return helper;
831}
832
833void nf_ct_helper_put(struct nf_conntrack_helper *helper)
834{
835 module_put(helper->me);
836}
837
718static struct nf_conn * 838static struct nf_conn *
719__nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, 839__nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
720 const struct nf_conntrack_tuple *repl, 840 const struct nf_conntrack_tuple *repl,
@@ -744,7 +864,7 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
744 /* find features needed by this conntrack. */ 864 /* find features needed by this conntrack. */
745 features = l3proto->get_features(orig); 865 features = l3proto->get_features(orig);
746 read_lock_bh(&nf_conntrack_lock); 866 read_lock_bh(&nf_conntrack_lock);
747 if (nf_ct_find_helper(repl) != NULL) 867 if (__nf_ct_helper_find(repl) != NULL)
748 features |= NF_CT_F_HELP; 868 features |= NF_CT_F_HELP;
749 read_unlock_bh(&nf_conntrack_lock); 869 read_unlock_bh(&nf_conntrack_lock);
750 870
@@ -794,7 +914,7 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
794{ 914{
795 struct nf_conntrack_l3proto *l3proto; 915 struct nf_conntrack_l3proto *l3proto;
796 916
797 l3proto = nf_ct_find_l3proto(orig->src.l3num); 917 l3proto = __nf_ct_l3proto_find(orig->src.l3num);
798 return __nf_conntrack_alloc(orig, repl, l3proto); 918 return __nf_conntrack_alloc(orig, repl, l3proto);
799} 919}
800 920
@@ -853,7 +973,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
853 nf_conntrack_get(&conntrack->master->ct_general); 973 nf_conntrack_get(&conntrack->master->ct_general);
854 NF_CT_STAT_INC(expect_new); 974 NF_CT_STAT_INC(expect_new);
855 } else { 975 } else {
856 conntrack->helper = nf_ct_find_helper(&repl_tuple); 976 conntrack->helper = __nf_ct_helper_find(&repl_tuple);
857 977
858 NF_CT_STAT_INC(new); 978 NF_CT_STAT_INC(new);
859 } 979 }
@@ -947,13 +1067,13 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
947 return NF_ACCEPT; 1067 return NF_ACCEPT;
948 } 1068 }
949 1069
950 l3proto = nf_ct_find_l3proto((u_int16_t)pf); 1070 l3proto = __nf_ct_l3proto_find((u_int16_t)pf);
951 if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) { 1071 if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) {
952 DEBUGP("not prepared to track yet or error occured\n"); 1072 DEBUGP("not prepared to track yet or error occured\n");
953 return -ret; 1073 return -ret;
954 } 1074 }
955 1075
956 proto = nf_ct_find_proto((u_int16_t)pf, protonum); 1076 proto = __nf_ct_proto_find((u_int16_t)pf, protonum);
957 1077
958 /* It may be an special packet, error, unclean... 1078 /* It may be an special packet, error, unclean...
959 * inverse of the return code tells to the netfilter 1079 * inverse of the return code tells to the netfilter
@@ -1002,9 +1122,9 @@ int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
1002 const struct nf_conntrack_tuple *orig) 1122 const struct nf_conntrack_tuple *orig)
1003{ 1123{
1004 return nf_ct_invert_tuple(inverse, orig, 1124 return nf_ct_invert_tuple(inverse, orig,
1005 nf_ct_find_l3proto(orig->src.l3num), 1125 __nf_ct_l3proto_find(orig->src.l3num),
1006 nf_ct_find_proto(orig->src.l3num, 1126 __nf_ct_proto_find(orig->src.l3num,
1007 orig->dst.protonum)); 1127 orig->dst.protonum));
1008} 1128}
1009 1129
1010/* Would two expected things clash? */ 1130/* Would two expected things clash? */
@@ -1096,6 +1216,7 @@ static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
1096 exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ; 1216 exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
1097 add_timer(&exp->timeout); 1217 add_timer(&exp->timeout);
1098 1218
1219 exp->id = ++nf_conntrack_expect_next_id;
1099 atomic_inc(&exp->use); 1220 atomic_inc(&exp->use);
1100 NF_CT_STAT_INC(expect_create); 1221 NF_CT_STAT_INC(expect_create);
1101} 1222}
@@ -1129,6 +1250,7 @@ static inline int refresh_timer(struct nf_conntrack_expect *i)
1129int nf_conntrack_expect_related(struct nf_conntrack_expect *expect) 1250int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
1130{ 1251{
1131 struct nf_conntrack_expect *i; 1252 struct nf_conntrack_expect *i;
1253 struct nf_conn *master = expect->master;
1132 int ret; 1254 int ret;
1133 1255
1134 DEBUGP("nf_conntrack_expect_related %p\n", related_to); 1256 DEBUGP("nf_conntrack_expect_related %p\n", related_to);
@@ -1149,9 +1271,9 @@ int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
1149 } 1271 }
1150 } 1272 }
1151 /* Will be over limit? */ 1273 /* Will be over limit? */
1152 if (expect->master->helper->max_expected && 1274 if (master->helper->max_expected &&
1153 expect->master->expecting >= expect->master->helper->max_expected) 1275 master->expecting >= master->helper->max_expected)
1154 evict_oldest_expect(expect->master); 1276 evict_oldest_expect(master);
1155 1277
1156 nf_conntrack_expect_insert(expect); 1278 nf_conntrack_expect_insert(expect);
1157 nf_conntrack_expect_event(IPEXP_NEW, expect); 1279 nf_conntrack_expect_event(IPEXP_NEW, expect);
@@ -1175,7 +1297,7 @@ void nf_conntrack_alter_reply(struct nf_conn *conntrack,
1175 1297
1176 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; 1298 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
1177 if (!conntrack->master && conntrack->expecting == 0) 1299 if (!conntrack->master && conntrack->expecting == 0)
1178 conntrack->helper = nf_ct_find_helper(newreply); 1300 conntrack->helper = __nf_ct_helper_find(newreply);
1179 write_unlock_bh(&nf_conntrack_lock); 1301 write_unlock_bh(&nf_conntrack_lock);
1180} 1302}
1181 1303
@@ -1200,6 +1322,19 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
1200 return 0; 1322 return 0;
1201} 1323}
1202 1324
1325struct nf_conntrack_helper *
1326__nf_conntrack_helper_find_byname(const char *name)
1327{
1328 struct nf_conntrack_helper *h;
1329
1330 list_for_each_entry(h, &helpers, list) {
1331 if (!strcmp(h->name, name))
1332 return h;
1333 }
1334
1335 return NULL;
1336}
1337
1203static inline int unhelp(struct nf_conntrack_tuple_hash *i, 1338static inline int unhelp(struct nf_conntrack_tuple_hash *i,
1204 const struct nf_conntrack_helper *me) 1339 const struct nf_conntrack_helper *me)
1205{ 1340{
@@ -1283,6 +1418,51 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
1283 nf_conntrack_event_cache(event, skb); 1418 nf_conntrack_event_cache(event, skb);
1284} 1419}
1285 1420
1421#if defined(CONFIG_NF_CT_NETLINK) || \
1422 defined(CONFIG_NF_CT_NETLINK_MODULE)
1423
1424#include <linux/netfilter/nfnetlink.h>
1425#include <linux/netfilter/nfnetlink_conntrack.h>
1426
1427/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
1428 * in ip_conntrack_core, since we don't want the protocols to autoload
1429 * or depend on ctnetlink */
1430int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb,
1431 const struct nf_conntrack_tuple *tuple)
1432{
1433 NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t),
1434 &tuple->src.u.tcp.port);
1435 NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t),
1436 &tuple->dst.u.tcp.port);
1437 return 0;
1438
1439nfattr_failure:
1440 return -1;
1441}
1442
1443static const size_t cta_min_proto[CTA_PROTO_MAX] = {
1444 [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t),
1445 [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t)
1446};
1447
1448int nf_ct_port_nfattr_to_tuple(struct nfattr *tb[],
1449 struct nf_conntrack_tuple *t)
1450{
1451 if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
1452 return -EINVAL;
1453
1454 if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
1455 return -EINVAL;
1456
1457 t->src.u.tcp.port =
1458 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
1459 t->dst.u.tcp.port =
1460 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
1461
1462 return 0;
1463}
1464#endif
1465
1286/* Used by ipt_REJECT and ip6t_REJECT. */ 1466/* Used by ipt_REJECT and ip6t_REJECT. */
1287void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) 1467void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
1288{ 1468{
@@ -1365,6 +1545,11 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size)
1365 get_order(sizeof(struct list_head) * size)); 1545 get_order(sizeof(struct list_head) * size));
1366} 1546}
1367 1547
1548void nf_conntrack_flush()
1549{
1550 nf_ct_iterate_cleanup(kill_all, NULL);
1551}
1552
1368/* Mishearing the voices in his head, our hero wonders how he's 1553/* Mishearing the voices in his head, our hero wonders how he's
1369 supposed to kill the mall. */ 1554 supposed to kill the mall. */
1370void nf_conntrack_cleanup(void) 1555void nf_conntrack_cleanup(void)
@@ -1378,7 +1563,7 @@ void nf_conntrack_cleanup(void)
1378 1563
1379 nf_ct_event_cache_flush(); 1564 nf_ct_event_cache_flush();
1380 i_see_dead_people: 1565 i_see_dead_people:
1381 nf_ct_iterate_cleanup(kill_all, NULL); 1566 nf_conntrack_flush();
1382 if (atomic_read(&nf_conntrack_count) != 0) { 1567 if (atomic_read(&nf_conntrack_count) != 0) {
1383 schedule(); 1568 schedule();
1384 goto i_see_dead_people; 1569 goto i_see_dead_people;
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 65080e269f27..d5a6eaf4a1de 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -44,7 +44,7 @@ static unsigned int ports_c;
44module_param_array(ports, ushort, &ports_c, 0400); 44module_param_array(ports, ushort, &ports_c, 0400);
45 45
46static int loose; 46static int loose;
47module_param(loose, int, 0600); 47module_param(loose, bool, 0600);
48 48
49unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb, 49unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb,
50 enum ip_conntrack_info ctinfo, 50 enum ip_conntrack_info ctinfo,
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
new file mode 100644
index 000000000000..73ab16bc7d40
--- /dev/null
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -0,0 +1,1653 @@
1/* Connection tracking via netlink socket. Allows for user space
2 * protocol helpers and general trouble making from userspace.
3 *
4 * (C) 2001 by Jay Schulist <jschlst@samba.org>
5 * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org>
6 * (C) 2003 by Patrick Mchardy <kaber@trash.net>
7 * (C) 2005 by Pablo Neira Ayuso <pablo@eurodev.net>
8 *
9 * I've reworked this stuff to use attributes instead of conntrack
10 * structures. 5.44 am. I need more tea. --pablo 05/07/11.
11 *
12 * Initial connection tracking via netlink development funded and
13 * generally made possible by Network Robots, Inc. (www.networkrobots.com)
14 *
15 * Further development of this code funded by Astaro AG (http://www.astaro.com)
16 *
17 * This software may be used and distributed according to the terms
18 * of the GNU General Public License, incorporated herein by reference.
19 *
20 * Derived from ip_conntrack_netlink.c: Port by Pablo Neira Ayuso (05/11/14)
21 */
22
23#include <linux/init.h>
24#include <linux/module.h>
25#include <linux/kernel.h>
26#include <linux/types.h>
27#include <linux/timer.h>
28#include <linux/skbuff.h>
29#include <linux/errno.h>
30#include <linux/netlink.h>
31#include <linux/spinlock.h>
32#include <linux/notifier.h>
33
34#include <linux/netfilter.h>
35#include <net/netfilter/nf_conntrack.h>
36#include <net/netfilter/nf_conntrack_core.h>
37#include <net/netfilter/nf_conntrack_helper.h>
38#include <net/netfilter/nf_conntrack_l3proto.h>
39#include <net/netfilter/nf_conntrack_protocol.h>
40#include <linux/netfilter_ipv4/ip_nat_protocol.h>
41
42#include <linux/netfilter/nfnetlink.h>
43#include <linux/netfilter/nfnetlink_conntrack.h>
44
45MODULE_LICENSE("GPL");
46
47static char __initdata version[] = "0.92";
48
49#if 0
50#define DEBUGP printk
51#else
52#define DEBUGP(format, args...)
53#endif
54
55
56static inline int
57ctnetlink_dump_tuples_proto(struct sk_buff *skb,
58 const struct nf_conntrack_tuple *tuple)
59{
60 struct nf_conntrack_protocol *proto;
61 int ret = 0;
62
63 NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum);
64
65 /* If no protocol helper is found, this function will return the
66 * generic protocol helper, so proto won't *ever* be NULL */
67 proto = nf_ct_proto_find_get(tuple->src.l3num, tuple->dst.protonum);
68 if (likely(proto->tuple_to_nfattr))
69 ret = proto->tuple_to_nfattr(skb, tuple);
70
71 nf_ct_proto_put(proto);
72
73 return ret;
74
75nfattr_failure:
76 return -1;
77}
78
79static inline int
80ctnetlink_dump_tuples(struct sk_buff *skb,
81 const struct nf_conntrack_tuple *tuple)
82{
83 struct nfattr *nest_parms;
84 struct nf_conntrack_l3proto *l3proto;
85 int ret = 0;
86
87 l3proto = nf_ct_l3proto_find_get(tuple->src.l3num);
88
89 nest_parms = NFA_NEST(skb, CTA_TUPLE_IP);
90 if (likely(l3proto->tuple_to_nfattr))
91 ret = l3proto->tuple_to_nfattr(skb, tuple);
92 NFA_NEST_END(skb, nest_parms);
93
94 nf_ct_l3proto_put(l3proto);
95
96 if (unlikely(ret < 0))
97 return ret;
98
99 nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO);
100 ret = ctnetlink_dump_tuples_proto(skb, tuple);
101 NFA_NEST_END(skb, nest_parms);
102
103 return ret;
104
105nfattr_failure:
106 return -1;
107}
108
109static inline int
110ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct)
111{
112 u_int32_t status = htonl((u_int32_t) ct->status);
113 NFA_PUT(skb, CTA_STATUS, sizeof(status), &status);
114 return 0;
115
116nfattr_failure:
117 return -1;
118}
119
120static inline int
121ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct)
122{
123 long timeout_l = ct->timeout.expires - jiffies;
124 u_int32_t timeout;
125
126 if (timeout_l < 0)
127 timeout = 0;
128 else
129 timeout = htonl(timeout_l / HZ);
130
131 NFA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout);
132 return 0;
133
134nfattr_failure:
135 return -1;
136}
137
138static inline int
139ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct)
140{
141 struct nf_conntrack_protocol *proto = nf_ct_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
142 struct nfattr *nest_proto;
143 int ret;
144
145 if (!proto->to_nfattr) {
146 nf_ct_proto_put(proto);
147 return 0;
148 }
149
150 nest_proto = NFA_NEST(skb, CTA_PROTOINFO);
151
152 ret = proto->to_nfattr(skb, nest_proto, ct);
153
154 nf_ct_proto_put(proto);
155
156 NFA_NEST_END(skb, nest_proto);
157
158 return ret;
159
160nfattr_failure:
161 return -1;
162}
163
164static inline int
165ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct)
166{
167 struct nfattr *nest_helper;
168
169 if (!ct->helper)
170 return 0;
171
172 nest_helper = NFA_NEST(skb, CTA_HELP);
173 NFA_PUT(skb, CTA_HELP_NAME, strlen(ct->helper->name), ct->helper->name);
174
175 if (ct->helper->to_nfattr)
176 ct->helper->to_nfattr(skb, ct);
177
178 NFA_NEST_END(skb, nest_helper);
179
180 return 0;
181
182nfattr_failure:
183 return -1;
184}
185
186#ifdef CONFIG_NF_CT_ACCT
187static inline int
188ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct,
189 enum ip_conntrack_dir dir)
190{
191 enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
192 struct nfattr *nest_count = NFA_NEST(skb, type);
193 u_int32_t tmp;
194
195 tmp = htonl(ct->counters[dir].packets);
196 NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp);
197
198 tmp = htonl(ct->counters[dir].bytes);
199 NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp);
200
201 NFA_NEST_END(skb, nest_count);
202
203 return 0;
204
205nfattr_failure:
206 return -1;
207}
208#else
209#define ctnetlink_dump_counters(a, b, c) (0)
210#endif
211
212#ifdef CONFIG_NF_CONNTRACK_MARK
213static inline int
214ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
215{
216 u_int32_t mark = htonl(ct->mark);
217
218 NFA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark);
219 return 0;
220
221nfattr_failure:
222 return -1;
223}
224#else
225#define ctnetlink_dump_mark(a, b) (0)
226#endif
227
228static inline int
229ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
230{
231 u_int32_t id = htonl(ct->id);
232 NFA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id);
233 return 0;
234
235nfattr_failure:
236 return -1;
237}
238
239static inline int
240ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct)
241{
242 u_int32_t use = htonl(atomic_read(&ct->ct_general.use));
243
244 NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use);
245 return 0;
246
247nfattr_failure:
248 return -1;
249}
250
251#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple)
252
253static int
254ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
255 int event, int nowait,
256 const struct nf_conn *ct)
257{
258 struct nlmsghdr *nlh;
259 struct nfgenmsg *nfmsg;
260 struct nfattr *nest_parms;
261 unsigned char *b;
262
263 b = skb->tail;
264
265 event |= NFNL_SUBSYS_CTNETLINK << 8;
266 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
267 nfmsg = NLMSG_DATA(nlh);
268
269 nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
270 nfmsg->nfgen_family =
271 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
272 nfmsg->version = NFNETLINK_V0;
273 nfmsg->res_id = 0;
274
275 nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
276 if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
277 goto nfattr_failure;
278 NFA_NEST_END(skb, nest_parms);
279
280 nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
281 if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
282 goto nfattr_failure;
283 NFA_NEST_END(skb, nest_parms);
284
285 if (ctnetlink_dump_status(skb, ct) < 0 ||
286 ctnetlink_dump_timeout(skb, ct) < 0 ||
287 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
288 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
289 ctnetlink_dump_protoinfo(skb, ct) < 0 ||
290 ctnetlink_dump_helpinfo(skb, ct) < 0 ||
291 ctnetlink_dump_mark(skb, ct) < 0 ||
292 ctnetlink_dump_id(skb, ct) < 0 ||
293 ctnetlink_dump_use(skb, ct) < 0)
294 goto nfattr_failure;
295
296 nlh->nlmsg_len = skb->tail - b;
297 return skb->len;
298
299nlmsg_failure:
300nfattr_failure:
301 skb_trim(skb, b - skb->data);
302 return -1;
303}
304
305#ifdef CONFIG_NF_CONNTRACK_EVENTS
306static int ctnetlink_conntrack_event(struct notifier_block *this,
307 unsigned long events, void *ptr)
308{
309 struct nlmsghdr *nlh;
310 struct nfgenmsg *nfmsg;
311 struct nfattr *nest_parms;
312 struct nf_conn *ct = (struct nf_conn *)ptr;
313 struct sk_buff *skb;
314 unsigned int type;
315 unsigned char *b;
316 unsigned int flags = 0, group;
317
318 /* ignore our fake conntrack entry */
319 if (ct == &nf_conntrack_untracked)
320 return NOTIFY_DONE;
321
322 if (events & IPCT_DESTROY) {
323 type = IPCTNL_MSG_CT_DELETE;
324 group = NFNLGRP_CONNTRACK_DESTROY;
325 } else if (events & (IPCT_NEW | IPCT_RELATED)) {
326 type = IPCTNL_MSG_CT_NEW;
327 flags = NLM_F_CREATE|NLM_F_EXCL;
328 /* dump everything */
329 events = ~0UL;
330 group = NFNLGRP_CONNTRACK_NEW;
331 } else if (events & (IPCT_STATUS |
332 IPCT_PROTOINFO |
333 IPCT_HELPER |
334 IPCT_HELPINFO |
335 IPCT_NATINFO)) {
336 type = IPCTNL_MSG_CT_NEW;
337 group = NFNLGRP_CONNTRACK_UPDATE;
338 } else
339 return NOTIFY_DONE;
340
341 /* FIXME: Check if there are any listeners before, don't hurt performance */
342
343 skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
344 if (!skb)
345 return NOTIFY_DONE;
346
347 b = skb->tail;
348
349 type |= NFNL_SUBSYS_CTNETLINK << 8;
350 nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
351 nfmsg = NLMSG_DATA(nlh);
352
353 nlh->nlmsg_flags = flags;
354 nfmsg->nfgen_family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
355 nfmsg->version = NFNETLINK_V0;
356 nfmsg->res_id = 0;
357
358 nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
359 if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
360 goto nfattr_failure;
361 NFA_NEST_END(skb, nest_parms);
362
363 nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
364 if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
365 goto nfattr_failure;
366 NFA_NEST_END(skb, nest_parms);
367
368 /* NAT stuff is now a status flag */
369 if ((events & IPCT_STATUS || events & IPCT_NATINFO)
370 && ctnetlink_dump_status(skb, ct) < 0)
371 goto nfattr_failure;
372 if (events & IPCT_REFRESH
373 && ctnetlink_dump_timeout(skb, ct) < 0)
374 goto nfattr_failure;
375 if (events & IPCT_PROTOINFO
376 && ctnetlink_dump_protoinfo(skb, ct) < 0)
377 goto nfattr_failure;
378 if (events & IPCT_HELPINFO
379 && ctnetlink_dump_helpinfo(skb, ct) < 0)
380 goto nfattr_failure;
381
382 if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
383 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
384 goto nfattr_failure;
385
386 nlh->nlmsg_len = skb->tail - b;
387 nfnetlink_send(skb, 0, group, 0);
388 return NOTIFY_DONE;
389
390nlmsg_failure:
391nfattr_failure:
392 kfree_skb(skb);
393 return NOTIFY_DONE;
394}
395#endif /* CONFIG_NF_CONNTRACK_EVENTS */
396
397static int ctnetlink_done(struct netlink_callback *cb)
398{
399 DEBUGP("entered %s\n", __FUNCTION__);
400 return 0;
401}
402
403#define L3PROTO(ct) ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num
404
405static int
406ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
407{
408 struct nf_conn *ct = NULL;
409 struct nf_conntrack_tuple_hash *h;
410 struct list_head *i;
411 u_int32_t *id = (u_int32_t *) &cb->args[1];
412 struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
413 u_int8_t l3proto = nfmsg->nfgen_family;
414
415 DEBUGP("entered %s, last bucket=%lu id=%u\n", __FUNCTION__,
416 cb->args[0], *id);
417
418 read_lock_bh(&nf_conntrack_lock);
419 for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++, *id = 0) {
420 list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) {
421 h = (struct nf_conntrack_tuple_hash *) i;
422 if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
423 continue;
424 ct = nf_ct_tuplehash_to_ctrack(h);
425 /* Dump entries of a given L3 protocol number.
426 * If it is not specified, ie. l3proto == 0,
427 * then dump everything. */
428 if (l3proto && L3PROTO(ct) != l3proto)
429 continue;
430 if (ct->id <= *id)
431 continue;
432 if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
433 cb->nlh->nlmsg_seq,
434 IPCTNL_MSG_CT_NEW,
435 1, ct) < 0)
436 goto out;
437 *id = ct->id;
438 }
439 }
440out:
441 read_unlock_bh(&nf_conntrack_lock);
442
443 DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
444
445 return skb->len;
446}
447
448#ifdef CONFIG_NF_CT_ACCT
449static int
450ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb)
451{
452 struct nf_conn *ct = NULL;
453 struct nf_conntrack_tuple_hash *h;
454 struct list_head *i;
455 u_int32_t *id = (u_int32_t *) &cb->args[1];
456 struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
457 u_int8_t l3proto = nfmsg->nfgen_family;
458
459 DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__,
460 cb->args[0], *id);
461
462 write_lock_bh(&nf_conntrack_lock);
463 for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++, *id = 0) {
464 list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) {
465 h = (struct nf_conntrack_tuple_hash *) i;
466 if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
467 continue;
468 ct = nf_ct_tuplehash_to_ctrack(h);
469 if (l3proto && L3PROTO(ct) != l3proto)
470 continue;
471 if (ct->id <= *id)
472 continue;
473 if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
474 cb->nlh->nlmsg_seq,
475 IPCTNL_MSG_CT_NEW,
476 1, ct) < 0)
477 goto out;
478 *id = ct->id;
479
480 memset(&ct->counters, 0, sizeof(ct->counters));
481 }
482 }
483out:
484 write_unlock_bh(&nf_conntrack_lock);
485
486 DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
487
488 return skb->len;
489}
490#endif
491
492static inline int
493ctnetlink_parse_tuple_ip(struct nfattr *attr, struct nf_conntrack_tuple *tuple)
494{
495 struct nfattr *tb[CTA_IP_MAX];
496 struct nf_conntrack_l3proto *l3proto;
497 int ret = 0;
498
499 DEBUGP("entered %s\n", __FUNCTION__);
500
501 nfattr_parse_nested(tb, CTA_IP_MAX, attr);
502
503 l3proto = nf_ct_l3proto_find_get(tuple->src.l3num);
504
505 if (likely(l3proto->nfattr_to_tuple))
506 ret = l3proto->nfattr_to_tuple(tb, tuple);
507
508 nf_ct_l3proto_put(l3proto);
509
510 DEBUGP("leaving\n");
511
512 return ret;
513}
514
515static const size_t cta_min_proto[CTA_PROTO_MAX] = {
516 [CTA_PROTO_NUM-1] = sizeof(u_int8_t),
517};
518
519static inline int
520ctnetlink_parse_tuple_proto(struct nfattr *attr,
521 struct nf_conntrack_tuple *tuple)
522{
523 struct nfattr *tb[CTA_PROTO_MAX];
524 struct nf_conntrack_protocol *proto;
525 int ret = 0;
526
527 DEBUGP("entered %s\n", __FUNCTION__);
528
529 nfattr_parse_nested(tb, CTA_PROTO_MAX, attr);
530
531 if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
532 return -EINVAL;
533
534 if (!tb[CTA_PROTO_NUM-1])
535 return -EINVAL;
536 tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
537
538 proto = nf_ct_proto_find_get(tuple->src.l3num, tuple->dst.protonum);
539
540 if (likely(proto->nfattr_to_tuple))
541 ret = proto->nfattr_to_tuple(tb, tuple);
542
543 nf_ct_proto_put(proto);
544
545 return ret;
546}
547
548static inline int
549ctnetlink_parse_tuple(struct nfattr *cda[], struct nf_conntrack_tuple *tuple,
550 enum ctattr_tuple type, u_int8_t l3num)
551{
552 struct nfattr *tb[CTA_TUPLE_MAX];
553 int err;
554
555 DEBUGP("entered %s\n", __FUNCTION__);
556
557 memset(tuple, 0, sizeof(*tuple));
558
559 nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]);
560
561 if (!tb[CTA_TUPLE_IP-1])
562 return -EINVAL;
563
564 tuple->src.l3num = l3num;
565
566 err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP-1], tuple);
567 if (err < 0)
568 return err;
569
570 if (!tb[CTA_TUPLE_PROTO-1])
571 return -EINVAL;
572
573 err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO-1], tuple);
574 if (err < 0)
575 return err;
576
577 /* orig and expect tuples get DIR_ORIGINAL */
578 if (type == CTA_TUPLE_REPLY)
579 tuple->dst.dir = IP_CT_DIR_REPLY;
580 else
581 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
582
583 NF_CT_DUMP_TUPLE(tuple);
584
585 DEBUGP("leaving\n");
586
587 return 0;
588}
589
590#ifdef CONFIG_IP_NF_NAT_NEEDED
591static const size_t cta_min_protonat[CTA_PROTONAT_MAX] = {
592 [CTA_PROTONAT_PORT_MIN-1] = sizeof(u_int16_t),
593 [CTA_PROTONAT_PORT_MAX-1] = sizeof(u_int16_t),
594};
595
596static int ctnetlink_parse_nat_proto(struct nfattr *attr,
597 const struct nf_conn *ct,
598 struct ip_nat_range *range)
599{
600 struct nfattr *tb[CTA_PROTONAT_MAX];
601 struct ip_nat_protocol *npt;
602
603 DEBUGP("entered %s\n", __FUNCTION__);
604
605 nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr);
606
607 if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat))
608 return -EINVAL;
609
610 npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
611
612 if (!npt->nfattr_to_range) {
613 ip_nat_proto_put(npt);
614 return 0;
615 }
616
617 /* nfattr_to_range returns 1 if it parsed, 0 if not, neg. on error */
618 if (npt->nfattr_to_range(tb, range) > 0)
619 range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
620
621 ip_nat_proto_put(npt);
622
623 DEBUGP("leaving\n");
624 return 0;
625}
626
627static const size_t cta_min_nat[CTA_NAT_MAX] = {
628 [CTA_NAT_MINIP-1] = sizeof(u_int32_t),
629 [CTA_NAT_MAXIP-1] = sizeof(u_int32_t),
630};
631
632static inline int
633ctnetlink_parse_nat(struct nfattr *cda[],
634 const struct nf_conn *ct, struct ip_nat_range *range)
635{
636 struct nfattr *tb[CTA_NAT_MAX];
637 int err;
638
639 DEBUGP("entered %s\n", __FUNCTION__);
640
641 memset(range, 0, sizeof(*range));
642
643 nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]);
644
645 if (nfattr_bad_size(tb, CTA_NAT_MAX, cta_min_nat))
646 return -EINVAL;
647
648 if (tb[CTA_NAT_MINIP-1])
649 range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]);
650
651 if (!tb[CTA_NAT_MAXIP-1])
652 range->max_ip = range->min_ip;
653 else
654 range->max_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MAXIP-1]);
655
656 if (range->min_ip)
657 range->flags |= IP_NAT_RANGE_MAP_IPS;
658
659 if (!tb[CTA_NAT_PROTO-1])
660 return 0;
661
662 err = ctnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range);
663 if (err < 0)
664 return err;
665
666 DEBUGP("leaving\n");
667 return 0;
668}
669#endif
670
671static inline int
672ctnetlink_parse_help(struct nfattr *attr, char **helper_name)
673{
674 struct nfattr *tb[CTA_HELP_MAX];
675
676 DEBUGP("entered %s\n", __FUNCTION__);
677
678 nfattr_parse_nested(tb, CTA_HELP_MAX, attr);
679
680 if (!tb[CTA_HELP_NAME-1])
681 return -EINVAL;
682
683 *helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]);
684
685 return 0;
686}
687
688static const size_t cta_min[CTA_MAX] = {
689 [CTA_STATUS-1] = sizeof(u_int32_t),
690 [CTA_TIMEOUT-1] = sizeof(u_int32_t),
691 [CTA_MARK-1] = sizeof(u_int32_t),
692 [CTA_USE-1] = sizeof(u_int32_t),
693 [CTA_ID-1] = sizeof(u_int32_t)
694};
695
696static int
697ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
698 struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
699{
700 struct nf_conntrack_tuple_hash *h;
701 struct nf_conntrack_tuple tuple;
702 struct nf_conn *ct;
703 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
704 u_int8_t u3 = nfmsg->nfgen_family;
705 int err = 0;
706
707 DEBUGP("entered %s\n", __FUNCTION__);
708
709 if (nfattr_bad_size(cda, CTA_MAX, cta_min))
710 return -EINVAL;
711
712 if (cda[CTA_TUPLE_ORIG-1])
713 err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
714 else if (cda[CTA_TUPLE_REPLY-1])
715 err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
716 else {
717 /* Flush the whole table */
718 nf_conntrack_flush();
719 return 0;
720 }
721
722 if (err < 0)
723 return err;
724
725 h = nf_conntrack_find_get(&tuple, NULL);
726 if (!h) {
727 DEBUGP("tuple not found in conntrack hash\n");
728 return -ENOENT;
729 }
730
731 ct = nf_ct_tuplehash_to_ctrack(h);
732
733 if (cda[CTA_ID-1]) {
734 u_int32_t id = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_ID-1]));
735 if (ct->id != id) {
736 nf_ct_put(ct);
737 return -ENOENT;
738 }
739 }
740 if (del_timer(&ct->timeout))
741 ct->timeout.function((unsigned long)ct);
742
743 nf_ct_put(ct);
744 DEBUGP("leaving\n");
745
746 return 0;
747}
748
749static int
750ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
751 struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
752{
753 struct nf_conntrack_tuple_hash *h;
754 struct nf_conntrack_tuple tuple;
755 struct nf_conn *ct;
756 struct sk_buff *skb2 = NULL;
757 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
758 u_int8_t u3 = nfmsg->nfgen_family;
759 int err = 0;
760
761 DEBUGP("entered %s\n", __FUNCTION__);
762
763 if (nlh->nlmsg_flags & NLM_F_DUMP) {
764 u32 rlen;
765
766 if (NFNL_MSG_TYPE(nlh->nlmsg_type) ==
767 IPCTNL_MSG_CT_GET_CTRZERO) {
768#ifdef CONFIG_NF_CT_ACCT
769 if ((*errp = netlink_dump_start(ctnl, skb, nlh,
770 ctnetlink_dump_table_w,
771 ctnetlink_done)) != 0)
772 return -EINVAL;
773#else
774 return -ENOTSUPP;
775#endif
776 } else {
777 if ((*errp = netlink_dump_start(ctnl, skb, nlh,
778 ctnetlink_dump_table,
779 ctnetlink_done)) != 0)
780 return -EINVAL;
781 }
782
783 rlen = NLMSG_ALIGN(nlh->nlmsg_len);
784 if (rlen > skb->len)
785 rlen = skb->len;
786 skb_pull(skb, rlen);
787 return 0;
788 }
789
790 if (nfattr_bad_size(cda, CTA_MAX, cta_min))
791 return -EINVAL;
792
793 if (cda[CTA_TUPLE_ORIG-1])
794 err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
795 else if (cda[CTA_TUPLE_REPLY-1])
796 err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
797 else
798 return -EINVAL;
799
800 if (err < 0)
801 return err;
802
803 h = nf_conntrack_find_get(&tuple, NULL);
804 if (!h) {
805 DEBUGP("tuple not found in conntrack hash");
806 return -ENOENT;
807 }
808 DEBUGP("tuple found\n");
809 ct = nf_ct_tuplehash_to_ctrack(h);
810
811 err = -ENOMEM;
812 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
813 if (!skb2) {
814 nf_ct_put(ct);
815 return -ENOMEM;
816 }
817 NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid;
818
819 err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq,
820 IPCTNL_MSG_CT_NEW, 1, ct);
821 nf_ct_put(ct);
822 if (err <= 0)
823 goto free;
824
825 err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
826 if (err < 0)
827 goto out;
828
829 DEBUGP("leaving\n");
830 return 0;
831
832free:
833 kfree_skb(skb2);
834out:
835 return err;
836}
837
838static inline int
839ctnetlink_change_status(struct nf_conn *ct, struct nfattr *cda[])
840{
841 unsigned long d;
842 unsigned status = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1]));
843 d = ct->status ^ status;
844
845 if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
846 /* unchangeable */
847 return -EINVAL;
848
849 if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY))
850 /* SEEN_REPLY bit can only be set */
851 return -EINVAL;
852
853
854 if (d & IPS_ASSURED && !(status & IPS_ASSURED))
855 /* ASSURED bit can only be set */
856 return -EINVAL;
857
858 if (cda[CTA_NAT-1]) {
859#ifndef CONFIG_IP_NF_NAT_NEEDED
860 return -EINVAL;
861#else
862 unsigned int hooknum;
863 struct ip_nat_range range;
864
865 if (ctnetlink_parse_nat(cda, ct, &range) < 0)
866 return -EINVAL;
867
868 DEBUGP("NAT: %u.%u.%u.%u-%u.%u.%u.%u:%u-%u\n",
869 NIPQUAD(range.min_ip), NIPQUAD(range.max_ip),
870 htons(range.min.all), htons(range.max.all));
871
872 /* This is tricky but it works. ip_nat_setup_info needs the
873 * hook number as parameter, so let's do the correct
874 * conversion and run away */
875 if (status & IPS_SRC_NAT_DONE)
876 hooknum = NF_IP_POST_ROUTING; /* IP_NAT_MANIP_SRC */
877 else if (status & IPS_DST_NAT_DONE)
878 hooknum = NF_IP_PRE_ROUTING; /* IP_NAT_MANIP_DST */
879 else
880 return -EINVAL; /* Missing NAT flags */
881
882 DEBUGP("NAT status: %lu\n",
883 status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK));
884
885 if (ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
886 return -EEXIST;
887 ip_nat_setup_info(ct, &range, hooknum);
888
889 DEBUGP("NAT status after setup_info: %lu\n",
890 ct->status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK));
891#endif
892 }
893
894 /* Be careful here, modifying NAT bits can screw up things,
895 * so don't let users modify them directly if they don't pass
896 * ip_nat_range. */
897 ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK);
898 return 0;
899}
900
901
902static inline int
903ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[])
904{
905 struct nf_conntrack_helper *helper;
906 char *helpname;
907 int err;
908
909 DEBUGP("entered %s\n", __FUNCTION__);
910
911 /* don't change helper of sibling connections */
912 if (ct->master)
913 return -EINVAL;
914
915 err = ctnetlink_parse_help(cda[CTA_HELP-1], &helpname);
916 if (err < 0)
917 return err;
918
919 helper = __nf_conntrack_helper_find_byname(helpname);
920 if (!helper) {
921 if (!strcmp(helpname, ""))
922 helper = NULL;
923 else
924 return -EINVAL;
925 }
926
927 if (ct->helper) {
928 if (!helper) {
929 /* we had a helper before ... */
930 nf_ct_remove_expectations(ct);
931 ct->helper = NULL;
932 } else {
933 /* need to zero data of old helper */
934 memset(&ct->help, 0, sizeof(ct->help));
935 }
936 }
937
938 ct->helper = helper;
939
940 return 0;
941}
942
943static inline int
944ctnetlink_change_timeout(struct nf_conn *ct, struct nfattr *cda[])
945{
946 u_int32_t timeout = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1]));
947
948 if (!del_timer(&ct->timeout))
949 return -ETIME;
950
951 ct->timeout.expires = jiffies + timeout * HZ;
952 add_timer(&ct->timeout);
953
954 return 0;
955}
956
957static inline int
958ctnetlink_change_protoinfo(struct nf_conn *ct, struct nfattr *cda[])
959{
960 struct nfattr *tb[CTA_PROTOINFO_MAX], *attr = cda[CTA_PROTOINFO-1];
961 struct nf_conntrack_protocol *proto;
962 u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
963 u_int16_t l3num = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
964 int err = 0;
965
966 nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr);
967
968 proto = nf_ct_proto_find_get(l3num, npt);
969
970 if (proto->from_nfattr)
971 err = proto->from_nfattr(tb, ct);
972 nf_ct_proto_put(proto);
973
974 return err;
975}
976
977static int
978ctnetlink_change_conntrack(struct nf_conn *ct, struct nfattr *cda[])
979{
980 int err;
981
982 DEBUGP("entered %s\n", __FUNCTION__);
983
984 if (cda[CTA_HELP-1]) {
985 err = ctnetlink_change_helper(ct, cda);
986 if (err < 0)
987 return err;
988 }
989
990 if (cda[CTA_TIMEOUT-1]) {
991 err = ctnetlink_change_timeout(ct, cda);
992 if (err < 0)
993 return err;
994 }
995
996 if (cda[CTA_STATUS-1]) {
997 err = ctnetlink_change_status(ct, cda);
998 if (err < 0)
999 return err;
1000 }
1001
1002 if (cda[CTA_PROTOINFO-1]) {
1003 err = ctnetlink_change_protoinfo(ct, cda);
1004 if (err < 0)
1005 return err;
1006 }
1007
1008#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
1009 if (cda[CTA_MARK-1])
1010 ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
1011#endif
1012
1013 DEBUGP("all done\n");
1014 return 0;
1015}
1016
1017static int
1018ctnetlink_create_conntrack(struct nfattr *cda[],
1019 struct nf_conntrack_tuple *otuple,
1020 struct nf_conntrack_tuple *rtuple)
1021{
1022 struct nf_conn *ct;
1023 int err = -EINVAL;
1024
1025 DEBUGP("entered %s\n", __FUNCTION__);
1026
1027 ct = nf_conntrack_alloc(otuple, rtuple);
1028 if (ct == NULL || IS_ERR(ct))
1029 return -ENOMEM;
1030
1031 if (!cda[CTA_TIMEOUT-1])
1032 goto err;
1033 ct->timeout.expires = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1]));
1034
1035 ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
1036 ct->status |= IPS_CONFIRMED;
1037
1038 err = ctnetlink_change_status(ct, cda);
1039 if (err < 0)
1040 goto err;
1041
1042 if (cda[CTA_PROTOINFO-1]) {
1043 err = ctnetlink_change_protoinfo(ct, cda);
1044 if (err < 0)
1045 return err;
1046 }
1047
1048#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
1049 if (cda[CTA_MARK-1])
1050 ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
1051#endif
1052
1053 ct->helper = nf_ct_helper_find_get(rtuple);
1054
1055 add_timer(&ct->timeout);
1056 nf_conntrack_hash_insert(ct);
1057
1058 if (ct->helper)
1059 nf_ct_helper_put(ct->helper);
1060
1061 DEBUGP("conntrack with id %u inserted\n", ct->id);
1062 return 0;
1063
1064err:
1065 nf_conntrack_free(ct);
1066 return err;
1067}
1068
1069static int
1070ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1071 struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
1072{
1073 struct nf_conntrack_tuple otuple, rtuple;
1074 struct nf_conntrack_tuple_hash *h = NULL;
1075 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
1076 u_int8_t u3 = nfmsg->nfgen_family;
1077 int err = 0;
1078
1079 DEBUGP("entered %s\n", __FUNCTION__);
1080
1081 if (nfattr_bad_size(cda, CTA_MAX, cta_min))
1082 return -EINVAL;
1083
1084 if (cda[CTA_TUPLE_ORIG-1]) {
1085 err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, u3);
1086 if (err < 0)
1087 return err;
1088 }
1089
1090 if (cda[CTA_TUPLE_REPLY-1]) {
1091 err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY, u3);
1092 if (err < 0)
1093 return err;
1094 }
1095
1096 write_lock_bh(&nf_conntrack_lock);
1097 if (cda[CTA_TUPLE_ORIG-1])
1098 h = __nf_conntrack_find(&otuple, NULL);
1099 else if (cda[CTA_TUPLE_REPLY-1])
1100 h = __nf_conntrack_find(&rtuple, NULL);
1101
1102 if (h == NULL) {
1103 write_unlock_bh(&nf_conntrack_lock);
1104 DEBUGP("no such conntrack, create new\n");
1105 err = -ENOENT;
1106 if (nlh->nlmsg_flags & NLM_F_CREATE)
1107 err = ctnetlink_create_conntrack(cda, &otuple, &rtuple);
1108 return err;
1109 }
1110 /* implicit 'else' */
1111
1112 /* we only allow nat config for new conntracks */
1113 if (cda[CTA_NAT-1]) {
1114 err = -EINVAL;
1115 goto out_unlock;
1116 }
1117
1118 /* We manipulate the conntrack inside the global conntrack table lock,
1119 * so there's no need to increase the refcount */
1120 DEBUGP("conntrack found\n");
1121 err = -EEXIST;
1122 if (!(nlh->nlmsg_flags & NLM_F_EXCL))
1123 err = ctnetlink_change_conntrack(nf_ct_tuplehash_to_ctrack(h), cda);
1124
1125out_unlock:
1126 write_unlock_bh(&nf_conntrack_lock);
1127 return err;
1128}
1129
1130/***********************************************************************
1131 * EXPECT
1132 ***********************************************************************/
1133
1134static inline int
1135ctnetlink_exp_dump_tuple(struct sk_buff *skb,
1136 const struct nf_conntrack_tuple *tuple,
1137 enum ctattr_expect type)
1138{
1139 struct nfattr *nest_parms = NFA_NEST(skb, type);
1140
1141 if (ctnetlink_dump_tuples(skb, tuple) < 0)
1142 goto nfattr_failure;
1143
1144 NFA_NEST_END(skb, nest_parms);
1145
1146 return 0;
1147
1148nfattr_failure:
1149 return -1;
1150}
1151
1152static inline int
1153ctnetlink_exp_dump_expect(struct sk_buff *skb,
1154 const struct nf_conntrack_expect *exp)
1155{
1156 struct nf_conn *master = exp->master;
1157 u_int32_t timeout = htonl((exp->timeout.expires - jiffies) / HZ);
1158 u_int32_t id = htonl(exp->id);
1159
1160 if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0)
1161 goto nfattr_failure;
1162 if (ctnetlink_exp_dump_tuple(skb, &exp->mask, CTA_EXPECT_MASK) < 0)
1163 goto nfattr_failure;
1164 if (ctnetlink_exp_dump_tuple(skb,
1165 &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
1166 CTA_EXPECT_MASTER) < 0)
1167 goto nfattr_failure;
1168
1169 NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout);
1170 NFA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id);
1171
1172 return 0;
1173
1174nfattr_failure:
1175 return -1;
1176}
1177
1178static int
1179ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
1180 int event,
1181 int nowait,
1182 const struct nf_conntrack_expect *exp)
1183{
1184 struct nlmsghdr *nlh;
1185 struct nfgenmsg *nfmsg;
1186 unsigned char *b;
1187
1188 b = skb->tail;
1189
1190 event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
1191 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
1192 nfmsg = NLMSG_DATA(nlh);
1193
1194 nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
1195 nfmsg->nfgen_family = exp->tuple.src.l3num;
1196 nfmsg->version = NFNETLINK_V0;
1197 nfmsg->res_id = 0;
1198
1199 if (ctnetlink_exp_dump_expect(skb, exp) < 0)
1200 goto nfattr_failure;
1201
1202 nlh->nlmsg_len = skb->tail - b;
1203 return skb->len;
1204
1205nlmsg_failure:
1206nfattr_failure:
1207 skb_trim(skb, b - skb->data);
1208 return -1;
1209}
1210
1211#ifdef CONFIG_NF_CONNTRACK_EVENTS
1212static int ctnetlink_expect_event(struct notifier_block *this,
1213 unsigned long events, void *ptr)
1214{
1215 struct nlmsghdr *nlh;
1216 struct nfgenmsg *nfmsg;
1217 struct nf_conntrack_expect *exp = (struct nf_conntrack_expect *)ptr;
1218 struct sk_buff *skb;
1219 unsigned int type;
1220 unsigned char *b;
1221 int flags = 0;
1222
1223 if (events & IPEXP_NEW) {
1224 type = IPCTNL_MSG_EXP_NEW;
1225 flags = NLM_F_CREATE|NLM_F_EXCL;
1226 } else
1227 return NOTIFY_DONE;
1228
1229 skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
1230 if (!skb)
1231 return NOTIFY_DONE;
1232
1233 b = skb->tail;
1234
1235 type |= NFNL_SUBSYS_CTNETLINK << 8;
1236 nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
1237 nfmsg = NLMSG_DATA(nlh);
1238
1239 nlh->nlmsg_flags = flags;
1240 nfmsg->nfgen_family = exp->tuple.src.l3num;
1241 nfmsg->version = NFNETLINK_V0;
1242 nfmsg->res_id = 0;
1243
1244 if (ctnetlink_exp_dump_expect(skb, exp) < 0)
1245 goto nfattr_failure;
1246
1247 nlh->nlmsg_len = skb->tail - b;
1248 nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0);
1249 return NOTIFY_DONE;
1250
1251nlmsg_failure:
1252nfattr_failure:
1253 kfree_skb(skb);
1254 return NOTIFY_DONE;
1255}
1256#endif
1257
1258static int
1259ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
1260{
1261 struct nf_conntrack_expect *exp = NULL;
1262 struct list_head *i;
1263 u_int32_t *id = (u_int32_t *) &cb->args[0];
1264 struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
1265 u_int8_t l3proto = nfmsg->nfgen_family;
1266
1267 DEBUGP("entered %s, last id=%llu\n", __FUNCTION__, *id);
1268
1269 read_lock_bh(&nf_conntrack_lock);
1270 list_for_each_prev(i, &nf_conntrack_expect_list) {
1271 exp = (struct nf_conntrack_expect *) i;
1272 if (l3proto && exp->tuple.src.l3num != l3proto)
1273 continue;
1274 if (exp->id <= *id)
1275 continue;
1276 if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid,
1277 cb->nlh->nlmsg_seq,
1278 IPCTNL_MSG_EXP_NEW,
1279 1, exp) < 0)
1280 goto out;
1281 *id = exp->id;
1282 }
1283out:
1284 read_unlock_bh(&nf_conntrack_lock);
1285
1286 DEBUGP("leaving, last id=%llu\n", *id);
1287
1288 return skb->len;
1289}
1290
1291static const size_t cta_min_exp[CTA_EXPECT_MAX] = {
1292 [CTA_EXPECT_TIMEOUT-1] = sizeof(u_int32_t),
1293 [CTA_EXPECT_ID-1] = sizeof(u_int32_t)
1294};
1295
1296static int
1297ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
1298 struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
1299{
1300 struct nf_conntrack_tuple tuple;
1301 struct nf_conntrack_expect *exp;
1302 struct sk_buff *skb2;
1303 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
1304 u_int8_t u3 = nfmsg->nfgen_family;
1305 int err = 0;
1306
1307 DEBUGP("entered %s\n", __FUNCTION__);
1308
1309 if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
1310 return -EINVAL;
1311
1312 if (nlh->nlmsg_flags & NLM_F_DUMP) {
1313 u32 rlen;
1314
1315 if ((*errp = netlink_dump_start(ctnl, skb, nlh,
1316 ctnetlink_exp_dump_table,
1317 ctnetlink_done)) != 0)
1318 return -EINVAL;
1319 rlen = NLMSG_ALIGN(nlh->nlmsg_len);
1320 if (rlen > skb->len)
1321 rlen = skb->len;
1322 skb_pull(skb, rlen);
1323 return 0;
1324 }
1325
1326 if (cda[CTA_EXPECT_MASTER-1])
1327 err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
1328 else
1329 return -EINVAL;
1330
1331 if (err < 0)
1332 return err;
1333
1334 exp = nf_conntrack_expect_find(&tuple);
1335 if (!exp)
1336 return -ENOENT;
1337
1338 if (cda[CTA_EXPECT_ID-1]) {
1339 u_int32_t id = *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
1340 if (exp->id != ntohl(id)) {
1341 nf_conntrack_expect_put(exp);
1342 return -ENOENT;
1343 }
1344 }
1345
1346 err = -ENOMEM;
1347 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1348 if (!skb2)
1349 goto out;
1350 NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid;
1351
1352 err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid,
1353 nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
1354 1, exp);
1355 if (err <= 0)
1356 goto free;
1357
1358 nf_conntrack_expect_put(exp);
1359
1360 return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1361
1362free:
1363 kfree_skb(skb2);
1364out:
1365 nf_conntrack_expect_put(exp);
1366 return err;
1367}
1368
1369static int
1370ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
1371 struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
1372{
1373 struct nf_conntrack_expect *exp, *tmp;
1374 struct nf_conntrack_tuple tuple;
1375 struct nf_conntrack_helper *h;
1376 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
1377 u_int8_t u3 = nfmsg->nfgen_family;
1378 int err;
1379
1380 if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
1381 return -EINVAL;
1382
1383 if (cda[CTA_EXPECT_TUPLE-1]) {
1384 /* delete a single expect by tuple */
1385 err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
1386 if (err < 0)
1387 return err;
1388
1389 /* bump usage count to 2 */
1390 exp = nf_conntrack_expect_find(&tuple);
1391 if (!exp)
1392 return -ENOENT;
1393
1394 if (cda[CTA_EXPECT_ID-1]) {
1395 u_int32_t id =
1396 *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
1397 if (exp->id != ntohl(id)) {
1398 nf_conntrack_expect_put(exp);
1399 return -ENOENT;
1400 }
1401 }
1402
1403 /* after list removal, usage count == 1 */
1404 nf_conntrack_unexpect_related(exp);
1405 /* have to put what we 'get' above.
1406 * after this line usage count == 0 */
1407 nf_conntrack_expect_put(exp);
1408 } else if (cda[CTA_EXPECT_HELP_NAME-1]) {
1409 char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]);
1410
1411 /* delete all expectations for this helper */
1412 write_lock_bh(&nf_conntrack_lock);
1413 h = __nf_conntrack_helper_find_byname(name);
1414 if (!h) {
1415 write_unlock_bh(&nf_conntrack_lock);
1416 return -EINVAL;
1417 }
1418 list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list,
1419 list) {
1420 if (exp->master->helper == h
1421 && del_timer(&exp->timeout)) {
1422 nf_ct_unlink_expect(exp);
1423 nf_conntrack_expect_put(exp);
1424 }
1425 }
1426 write_unlock_bh(&nf_conntrack_lock);
1427 } else {
1428 /* This basically means we have to flush everything*/
1429 write_lock_bh(&nf_conntrack_lock);
1430 list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list,
1431 list) {
1432 if (del_timer(&exp->timeout)) {
1433 nf_ct_unlink_expect(exp);
1434 nf_conntrack_expect_put(exp);
1435 }
1436 }
1437 write_unlock_bh(&nf_conntrack_lock);
1438 }
1439
1440 return 0;
1441}
1442static int
1443ctnetlink_change_expect(struct nf_conntrack_expect *x, struct nfattr *cda[])
1444{
1445 return -EOPNOTSUPP;
1446}
1447
1448static int
1449ctnetlink_create_expect(struct nfattr *cda[], u_int8_t u3)
1450{
1451 struct nf_conntrack_tuple tuple, mask, master_tuple;
1452 struct nf_conntrack_tuple_hash *h = NULL;
1453 struct nf_conntrack_expect *exp;
1454 struct nf_conn *ct;
1455 int err = 0;
1456
1457 DEBUGP("entered %s\n", __FUNCTION__);
1458
1459 /* caller guarantees that those three CTA_EXPECT_* exist */
1460 err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
1461 if (err < 0)
1462 return err;
1463 err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
1464 if (err < 0)
1465 return err;
1466 err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
1467 if (err < 0)
1468 return err;
1469
1470 /* Look for master conntrack of this expectation */
1471 h = nf_conntrack_find_get(&master_tuple, NULL);
1472 if (!h)
1473 return -ENOENT;
1474 ct = nf_ct_tuplehash_to_ctrack(h);
1475
1476 if (!ct->helper) {
1477 /* such conntrack hasn't got any helper, abort */
1478 err = -EINVAL;
1479 goto out;
1480 }
1481
1482 exp = nf_conntrack_expect_alloc(ct);
1483 if (!exp) {
1484 err = -ENOMEM;
1485 goto out;
1486 }
1487
1488 exp->expectfn = NULL;
1489 exp->flags = 0;
1490 exp->master = ct;
1491 memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple));
1492 memcpy(&exp->mask, &mask, sizeof(struct nf_conntrack_tuple));
1493
1494 err = nf_conntrack_expect_related(exp);
1495 nf_conntrack_expect_put(exp);
1496
1497out:
1498 nf_ct_put(nf_ct_tuplehash_to_ctrack(h));
1499 return err;
1500}
1501
1502static int
1503ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
1504 struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
1505{
1506 struct nf_conntrack_tuple tuple;
1507 struct nf_conntrack_expect *exp;
1508 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
1509 u_int8_t u3 = nfmsg->nfgen_family;
1510 int err = 0;
1511
1512 DEBUGP("entered %s\n", __FUNCTION__);
1513
1514 if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
1515 return -EINVAL;
1516
1517 if (!cda[CTA_EXPECT_TUPLE-1]
1518 || !cda[CTA_EXPECT_MASK-1]
1519 || !cda[CTA_EXPECT_MASTER-1])
1520 return -EINVAL;
1521
1522 err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
1523 if (err < 0)
1524 return err;
1525
1526 write_lock_bh(&nf_conntrack_lock);
1527 exp = __nf_conntrack_expect_find(&tuple);
1528
1529 if (!exp) {
1530 write_unlock_bh(&nf_conntrack_lock);
1531 err = -ENOENT;
1532 if (nlh->nlmsg_flags & NLM_F_CREATE)
1533 err = ctnetlink_create_expect(cda, u3);
1534 return err;
1535 }
1536
1537 err = -EEXIST;
1538 if (!(nlh->nlmsg_flags & NLM_F_EXCL))
1539 err = ctnetlink_change_expect(exp, cda);
1540 write_unlock_bh(&nf_conntrack_lock);
1541
1542 DEBUGP("leaving\n");
1543
1544 return err;
1545}
1546
1547#ifdef CONFIG_NF_CONNTRACK_EVENTS
1548static struct notifier_block ctnl_notifier = {
1549 .notifier_call = ctnetlink_conntrack_event,
1550};
1551
1552static struct notifier_block ctnl_notifier_exp = {
1553 .notifier_call = ctnetlink_expect_event,
1554};
1555#endif
1556
1557static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
1558 [IPCTNL_MSG_CT_NEW] = { .call = ctnetlink_new_conntrack,
1559 .attr_count = CTA_MAX, },
1560 [IPCTNL_MSG_CT_GET] = { .call = ctnetlink_get_conntrack,
1561 .attr_count = CTA_MAX, },
1562 [IPCTNL_MSG_CT_DELETE] = { .call = ctnetlink_del_conntrack,
1563 .attr_count = CTA_MAX, },
1564 [IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack,
1565 .attr_count = CTA_MAX, },
1566};
1567
1568static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
1569 [IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect,
1570 .attr_count = CTA_EXPECT_MAX, },
1571 [IPCTNL_MSG_EXP_NEW] = { .call = ctnetlink_new_expect,
1572 .attr_count = CTA_EXPECT_MAX, },
1573 [IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect,
1574 .attr_count = CTA_EXPECT_MAX, },
1575};
1576
1577static struct nfnetlink_subsystem ctnl_subsys = {
1578 .name = "conntrack",
1579 .subsys_id = NFNL_SUBSYS_CTNETLINK,
1580 .cb_count = IPCTNL_MSG_MAX,
1581 .cb = ctnl_cb,
1582};
1583
1584static struct nfnetlink_subsystem ctnl_exp_subsys = {
1585 .name = "conntrack_expect",
1586 .subsys_id = NFNL_SUBSYS_CTNETLINK_EXP,
1587 .cb_count = IPCTNL_MSG_EXP_MAX,
1588 .cb = ctnl_exp_cb,
1589};
1590
1591MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK);
1592
1593static int __init ctnetlink_init(void)
1594{
1595 int ret;
1596
1597 printk("ctnetlink v%s: registering with nfnetlink.\n", version);
1598 ret = nfnetlink_subsys_register(&ctnl_subsys);
1599 if (ret < 0) {
1600 printk("ctnetlink_init: cannot register with nfnetlink.\n");
1601 goto err_out;
1602 }
1603
1604 ret = nfnetlink_subsys_register(&ctnl_exp_subsys);
1605 if (ret < 0) {
1606 printk("ctnetlink_init: cannot register exp with nfnetlink.\n");
1607 goto err_unreg_subsys;
1608 }
1609
1610#ifdef CONFIG_NF_CONNTRACK_EVENTS
1611 ret = nf_conntrack_register_notifier(&ctnl_notifier);
1612 if (ret < 0) {
1613 printk("ctnetlink_init: cannot register notifier.\n");
1614 goto err_unreg_exp_subsys;
1615 }
1616
1617 ret = nf_conntrack_expect_register_notifier(&ctnl_notifier_exp);
1618 if (ret < 0) {
1619 printk("ctnetlink_init: cannot expect register notifier.\n");
1620 goto err_unreg_notifier;
1621 }
1622#endif
1623
1624 return 0;
1625
1626#ifdef CONFIG_NF_CONNTRACK_EVENTS
1627err_unreg_notifier:
1628 nf_conntrack_unregister_notifier(&ctnl_notifier);
1629err_unreg_exp_subsys:
1630 nfnetlink_subsys_unregister(&ctnl_exp_subsys);
1631#endif
1632err_unreg_subsys:
1633 nfnetlink_subsys_unregister(&ctnl_subsys);
1634err_out:
1635 return ret;
1636}
1637
1638static void __exit ctnetlink_exit(void)
1639{
1640 printk("ctnetlink: unregistering from nfnetlink.\n");
1641
1642#ifdef CONFIG_NF_CONNTRACK_EVENTS
1643 nf_conntrack_unregister_notifier(&ctnl_notifier_exp);
1644 nf_conntrack_unregister_notifier(&ctnl_notifier);
1645#endif
1646
1647 nfnetlink_subsys_unregister(&ctnl_exp_subsys);
1648 nfnetlink_subsys_unregister(&ctnl_subsys);
1649 return;
1650}
1651
1652module_init(ctnetlink_init);
1653module_exit(ctnetlink_exit);
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 36425f6c833f..46bc27e2756d 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -17,7 +17,7 @@
17#include <linux/netfilter.h> 17#include <linux/netfilter.h>
18#include <net/netfilter/nf_conntrack_protocol.h> 18#include <net/netfilter/nf_conntrack_protocol.h>
19 19
20unsigned long nf_ct_generic_timeout = 600*HZ; 20unsigned int nf_ct_generic_timeout = 600*HZ;
21 21
22static int generic_pkt_to_tuple(const struct sk_buff *skb, 22static int generic_pkt_to_tuple(const struct sk_buff *skb,
23 unsigned int dataoff, 23 unsigned int dataoff,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 3a600f77b4e0..cf798e61e379 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -62,15 +62,15 @@ static const char *sctp_conntrack_names[] = {
62#define HOURS * 60 MINS 62#define HOURS * 60 MINS
63#define DAYS * 24 HOURS 63#define DAYS * 24 HOURS
64 64
65static unsigned long nf_ct_sctp_timeout_closed = 10 SECS; 65static unsigned int nf_ct_sctp_timeout_closed = 10 SECS;
66static unsigned long nf_ct_sctp_timeout_cookie_wait = 3 SECS; 66static unsigned int nf_ct_sctp_timeout_cookie_wait = 3 SECS;
67static unsigned long nf_ct_sctp_timeout_cookie_echoed = 3 SECS; 67static unsigned int nf_ct_sctp_timeout_cookie_echoed = 3 SECS;
68static unsigned long nf_ct_sctp_timeout_established = 5 DAYS; 68static unsigned int nf_ct_sctp_timeout_established = 5 DAYS;
69static unsigned long nf_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; 69static unsigned int nf_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000;
70static unsigned long nf_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; 70static unsigned int nf_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000;
71static unsigned long nf_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; 71static unsigned int nf_ct_sctp_timeout_shutdown_ack_sent = 3 SECS;
72 72
73static unsigned long * sctp_timeouts[] 73static unsigned int * sctp_timeouts[]
74= { NULL, /* SCTP_CONNTRACK_NONE */ 74= { NULL, /* SCTP_CONNTRACK_NONE */
75 &nf_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */ 75 &nf_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */
76 &nf_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */ 76 &nf_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 6035633d8225..df99138c3b3b 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -93,21 +93,21 @@ static const char *tcp_conntrack_names[] = {
93#define HOURS * 60 MINS 93#define HOURS * 60 MINS
94#define DAYS * 24 HOURS 94#define DAYS * 24 HOURS
95 95
96unsigned long nf_ct_tcp_timeout_syn_sent = 2 MINS; 96unsigned int nf_ct_tcp_timeout_syn_sent = 2 MINS;
97unsigned long nf_ct_tcp_timeout_syn_recv = 60 SECS; 97unsigned int nf_ct_tcp_timeout_syn_recv = 60 SECS;
98unsigned long nf_ct_tcp_timeout_established = 5 DAYS; 98unsigned int nf_ct_tcp_timeout_established = 5 DAYS;
99unsigned long nf_ct_tcp_timeout_fin_wait = 2 MINS; 99unsigned int nf_ct_tcp_timeout_fin_wait = 2 MINS;
100unsigned long nf_ct_tcp_timeout_close_wait = 60 SECS; 100unsigned int nf_ct_tcp_timeout_close_wait = 60 SECS;
101unsigned long nf_ct_tcp_timeout_last_ack = 30 SECS; 101unsigned int nf_ct_tcp_timeout_last_ack = 30 SECS;
102unsigned long nf_ct_tcp_timeout_time_wait = 2 MINS; 102unsigned int nf_ct_tcp_timeout_time_wait = 2 MINS;
103unsigned long nf_ct_tcp_timeout_close = 10 SECS; 103unsigned int nf_ct_tcp_timeout_close = 10 SECS;
104 104
105/* RFC1122 says the R2 limit should be at least 100 seconds. 105/* RFC1122 says the R2 limit should be at least 100 seconds.
106 Linux uses 15 packets as limit, which corresponds 106 Linux uses 15 packets as limit, which corresponds
107 to ~13-30min depending on RTO. */ 107 to ~13-30min depending on RTO. */
108unsigned long nf_ct_tcp_timeout_max_retrans = 5 MINS; 108unsigned int nf_ct_tcp_timeout_max_retrans = 5 MINS;
109 109
110static unsigned long * tcp_timeouts[] 110static unsigned int * tcp_timeouts[]
111= { NULL, /* TCP_CONNTRACK_NONE */ 111= { NULL, /* TCP_CONNTRACK_NONE */
112 &nf_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */ 112 &nf_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */
113 &nf_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */ 113 &nf_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */
@@ -988,7 +988,7 @@ static int tcp_packet(struct nf_conn *conntrack,
988 || (!test_bit(IPS_ASSURED_BIT, &conntrack->status) 988 || (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
989 && conntrack->proto.tcp.last_index == TCP_ACK_SET)) 989 && conntrack->proto.tcp.last_index == TCP_ACK_SET))
990 && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) { 990 && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
991 /* RST sent to invalid SYN or ACK we had let trough 991 /* RST sent to invalid SYN or ACK we had let through
992 * at a) and c) above: 992 * at a) and c) above:
993 * 993 *
994 * a) SYN was in window then 994 * a) SYN was in window then
@@ -999,7 +999,7 @@ static int tcp_packet(struct nf_conn *conntrack,
999 * segments we ignored. */ 999 * segments we ignored. */
1000 goto in_window; 1000 goto in_window;
1001 } 1001 }
1002 /* Just fall trough */ 1002 /* Just fall through */
1003 default: 1003 default:
1004 /* Keep compilers happy. */ 1004 /* Keep compilers happy. */
1005 break; 1005 break;
@@ -1147,6 +1147,63 @@ static int tcp_new(struct nf_conn *conntrack,
1147 receiver->td_scale); 1147 receiver->td_scale);
1148 return 1; 1148 return 1;
1149} 1149}
1150
1151#if defined(CONFIG_NF_CT_NETLINK) || \
1152 defined(CONFIG_NF_CT_NETLINK_MODULE)
1153
1154#include <linux/netfilter/nfnetlink.h>
1155#include <linux/netfilter/nfnetlink_conntrack.h>
1156
1157static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
1158 const struct nf_conn *ct)
1159{
1160 struct nfattr *nest_parms;
1161
1162 read_lock_bh(&tcp_lock);
1163 nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
1164 NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
1165 &ct->proto.tcp.state);
1166 read_unlock_bh(&tcp_lock);
1167
1168 NFA_NEST_END(skb, nest_parms);
1169
1170 return 0;
1171
1172nfattr_failure:
1173 read_unlock_bh(&tcp_lock);
1174 return -1;
1175}
1176
1177static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
1178 [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t),
1179};
1180
1181static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct)
1182{
1183 struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1];
1184 struct nfattr *tb[CTA_PROTOINFO_TCP_MAX];
1185
1186 /* updates could not contain anything about the private
1187 * protocol info, in that case skip the parsing */
1188 if (!attr)
1189 return 0;
1190
1191 nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr);
1192
1193 if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp))
1194 return -EINVAL;
1195
1196 if (!tb[CTA_PROTOINFO_TCP_STATE-1])
1197 return -EINVAL;
1198
1199 write_lock_bh(&tcp_lock);
1200 ct->proto.tcp.state =
1201 *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
1202 write_unlock_bh(&tcp_lock);
1203
1204 return 0;
1205}
1206#endif
1150 1207
1151struct nf_conntrack_protocol nf_conntrack_protocol_tcp4 = 1208struct nf_conntrack_protocol nf_conntrack_protocol_tcp4 =
1152{ 1209{
@@ -1160,6 +1217,13 @@ struct nf_conntrack_protocol nf_conntrack_protocol_tcp4 =
1160 .packet = tcp_packet, 1217 .packet = tcp_packet,
1161 .new = tcp_new, 1218 .new = tcp_new,
1162 .error = tcp_error4, 1219 .error = tcp_error4,
1220#if defined(CONFIG_NF_CT_NETLINK) || \
1221 defined(CONFIG_NF_CT_NETLINK_MODULE)
1222 .to_nfattr = tcp_to_nfattr,
1223 .from_nfattr = nfattr_to_tcp,
1224 .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr,
1225 .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple,
1226#endif
1163}; 1227};
1164 1228
1165struct nf_conntrack_protocol nf_conntrack_protocol_tcp6 = 1229struct nf_conntrack_protocol nf_conntrack_protocol_tcp6 =
@@ -1174,6 +1238,13 @@ struct nf_conntrack_protocol nf_conntrack_protocol_tcp6 =
1174 .packet = tcp_packet, 1238 .packet = tcp_packet,
1175 .new = tcp_new, 1239 .new = tcp_new,
1176 .error = tcp_error6, 1240 .error = tcp_error6,
1241#if defined(CONFIG_NF_CT_NETLINK) || \
1242 defined(CONFIG_NF_CT_NETLINK_MODULE)
1243 .to_nfattr = tcp_to_nfattr,
1244 .from_nfattr = nfattr_to_tcp,
1245 .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr,
1246 .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple,
1247#endif
1177}; 1248};
1178 1249
1179EXPORT_SYMBOL(nf_conntrack_protocol_tcp4); 1250EXPORT_SYMBOL(nf_conntrack_protocol_tcp4);
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 3cae7ce420dd..4264dd079a16 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -27,8 +27,8 @@
27#include <linux/netfilter_ipv6.h> 27#include <linux/netfilter_ipv6.h>
28#include <net/netfilter/nf_conntrack_protocol.h> 28#include <net/netfilter/nf_conntrack_protocol.h>
29 29
30unsigned long nf_ct_udp_timeout = 30*HZ; 30unsigned int nf_ct_udp_timeout = 30*HZ;
31unsigned long nf_ct_udp_timeout_stream = 180*HZ; 31unsigned int nf_ct_udp_timeout_stream = 180*HZ;
32 32
33static int udp_pkt_to_tuple(const struct sk_buff *skb, 33static int udp_pkt_to_tuple(const struct sk_buff *skb,
34 unsigned int dataoff, 34 unsigned int dataoff,
@@ -196,6 +196,11 @@ struct nf_conntrack_protocol nf_conntrack_protocol_udp4 =
196 .packet = udp_packet, 196 .packet = udp_packet,
197 .new = udp_new, 197 .new = udp_new,
198 .error = udp_error4, 198 .error = udp_error4,
199#if defined(CONFIG_NF_CT_NETLINK) || \
200 defined(CONFIG_NF_CT_NETLINK_MODULE)
201 .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr,
202 .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple,
203#endif
199}; 204};
200 205
201struct nf_conntrack_protocol nf_conntrack_protocol_udp6 = 206struct nf_conntrack_protocol nf_conntrack_protocol_udp6 =
@@ -210,6 +215,11 @@ struct nf_conntrack_protocol nf_conntrack_protocol_udp6 =
210 .packet = udp_packet, 215 .packet = udp_packet,
211 .new = udp_new, 216 .new = udp_new,
212 .error = udp_error6, 217 .error = udp_error6,
218#if defined(CONFIG_NF_CT_NETLINK) || \
219 defined(CONFIG_NF_CT_NETLINK_MODULE)
220 .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr,
221 .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple,
222#endif
213}; 223};
214 224
215EXPORT_SYMBOL(nf_conntrack_protocol_udp4); 225EXPORT_SYMBOL(nf_conntrack_protocol_udp4);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 5af381f9fe3d..3531d142f693 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -161,14 +161,14 @@ static int ct_seq_show(struct seq_file *s, void *v)
161 if (NF_CT_DIRECTION(hash)) 161 if (NF_CT_DIRECTION(hash))
162 return 0; 162 return 0;
163 163
164 l3proto = nf_ct_find_l3proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] 164 l3proto = __nf_ct_l3proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
165 .tuple.src.l3num); 165 .tuple.src.l3num);
166 166
167 NF_CT_ASSERT(l3proto); 167 NF_CT_ASSERT(l3proto);
168 proto = nf_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] 168 proto = __nf_ct_proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
169 .tuple.src.l3num, 169 .tuple.src.l3num,
170 conntrack->tuplehash[IP_CT_DIR_ORIGINAL] 170 conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
171 .tuple.dst.protonum); 171 .tuple.dst.protonum);
172 NF_CT_ASSERT(proto); 172 NF_CT_ASSERT(proto);
173 173
174 if (seq_printf(s, "%-8s %u %-8s %u %ld ", 174 if (seq_printf(s, "%-8s %u %-8s %u %ld ",
@@ -307,9 +307,9 @@ static int exp_seq_show(struct seq_file *s, void *v)
307 expect->tuple.src.l3num, 307 expect->tuple.src.l3num,
308 expect->tuple.dst.protonum); 308 expect->tuple.dst.protonum);
309 print_tuple(s, &expect->tuple, 309 print_tuple(s, &expect->tuple,
310 nf_ct_find_l3proto(expect->tuple.src.l3num), 310 __nf_ct_l3proto_find(expect->tuple.src.l3num),
311 nf_ct_find_proto(expect->tuple.src.l3num, 311 __nf_ct_proto_find(expect->tuple.src.l3num,
312 expect->tuple.dst.protonum)); 312 expect->tuple.dst.protonum));
313 return seq_putc(s, '\n'); 313 return seq_putc(s, '\n');
314} 314}
315 315
@@ -431,25 +431,25 @@ extern int nf_conntrack_max;
431extern unsigned int nf_conntrack_htable_size; 431extern unsigned int nf_conntrack_htable_size;
432 432
433/* From nf_conntrack_proto_tcp.c */ 433/* From nf_conntrack_proto_tcp.c */
434extern unsigned long nf_ct_tcp_timeout_syn_sent; 434extern unsigned int nf_ct_tcp_timeout_syn_sent;
435extern unsigned long nf_ct_tcp_timeout_syn_recv; 435extern unsigned int nf_ct_tcp_timeout_syn_recv;
436extern unsigned long nf_ct_tcp_timeout_established; 436extern unsigned int nf_ct_tcp_timeout_established;
437extern unsigned long nf_ct_tcp_timeout_fin_wait; 437extern unsigned int nf_ct_tcp_timeout_fin_wait;
438extern unsigned long nf_ct_tcp_timeout_close_wait; 438extern unsigned int nf_ct_tcp_timeout_close_wait;
439extern unsigned long nf_ct_tcp_timeout_last_ack; 439extern unsigned int nf_ct_tcp_timeout_last_ack;
440extern unsigned long nf_ct_tcp_timeout_time_wait; 440extern unsigned int nf_ct_tcp_timeout_time_wait;
441extern unsigned long nf_ct_tcp_timeout_close; 441extern unsigned int nf_ct_tcp_timeout_close;
442extern unsigned long nf_ct_tcp_timeout_max_retrans; 442extern unsigned int nf_ct_tcp_timeout_max_retrans;
443extern int nf_ct_tcp_loose; 443extern int nf_ct_tcp_loose;
444extern int nf_ct_tcp_be_liberal; 444extern int nf_ct_tcp_be_liberal;
445extern int nf_ct_tcp_max_retrans; 445extern int nf_ct_tcp_max_retrans;
446 446
447/* From nf_conntrack_proto_udp.c */ 447/* From nf_conntrack_proto_udp.c */
448extern unsigned long nf_ct_udp_timeout; 448extern unsigned int nf_ct_udp_timeout;
449extern unsigned long nf_ct_udp_timeout_stream; 449extern unsigned int nf_ct_udp_timeout_stream;
450 450
451/* From nf_conntrack_proto_generic.c */ 451/* From nf_conntrack_proto_generic.c */
452extern unsigned long nf_ct_generic_timeout; 452extern unsigned int nf_ct_generic_timeout;
453 453
454/* Log invalid packets of a given protocol */ 454/* Log invalid packets of a given protocol */
455static int log_invalid_proto_min = 0; 455static int log_invalid_proto_min = 0;
@@ -847,7 +847,11 @@ EXPORT_SYMBOL(nf_conntrack_helper_unregister);
847EXPORT_SYMBOL(nf_ct_iterate_cleanup); 847EXPORT_SYMBOL(nf_ct_iterate_cleanup);
848EXPORT_SYMBOL(__nf_ct_refresh_acct); 848EXPORT_SYMBOL(__nf_ct_refresh_acct);
849EXPORT_SYMBOL(nf_ct_protos); 849EXPORT_SYMBOL(nf_ct_protos);
850EXPORT_SYMBOL(nf_ct_find_proto); 850EXPORT_SYMBOL(__nf_ct_proto_find);
851EXPORT_SYMBOL(nf_ct_proto_find_get);
852EXPORT_SYMBOL(nf_ct_proto_put);
853EXPORT_SYMBOL(nf_ct_l3proto_find_get);
854EXPORT_SYMBOL(nf_ct_l3proto_put);
851EXPORT_SYMBOL(nf_ct_l3protos); 855EXPORT_SYMBOL(nf_ct_l3protos);
852EXPORT_SYMBOL(nf_conntrack_expect_alloc); 856EXPORT_SYMBOL(nf_conntrack_expect_alloc);
853EXPORT_SYMBOL(nf_conntrack_expect_put); 857EXPORT_SYMBOL(nf_conntrack_expect_put);
@@ -867,3 +871,21 @@ EXPORT_SYMBOL(nf_ct_get_tuple);
867EXPORT_SYMBOL(nf_ct_invert_tuple); 871EXPORT_SYMBOL(nf_ct_invert_tuple);
868EXPORT_SYMBOL(nf_conntrack_in); 872EXPORT_SYMBOL(nf_conntrack_in);
869EXPORT_SYMBOL(__nf_conntrack_attach); 873EXPORT_SYMBOL(__nf_conntrack_attach);
874EXPORT_SYMBOL(nf_conntrack_alloc);
875EXPORT_SYMBOL(nf_conntrack_free);
876EXPORT_SYMBOL(nf_conntrack_flush);
877EXPORT_SYMBOL(nf_ct_remove_expectations);
878EXPORT_SYMBOL(nf_ct_helper_find_get);
879EXPORT_SYMBOL(nf_ct_helper_put);
880EXPORT_SYMBOL(__nf_conntrack_helper_find_byname);
881EXPORT_SYMBOL(__nf_conntrack_find);
882EXPORT_SYMBOL(nf_ct_unlink_expect);
883EXPORT_SYMBOL(nf_conntrack_hash_insert);
884EXPORT_SYMBOL(__nf_conntrack_expect_find);
885EXPORT_SYMBOL(nf_conntrack_expect_find);
886EXPORT_SYMBOL(nf_conntrack_expect_list);
887#if defined(CONFIG_NF_CT_NETLINK) || \
888 defined(CONFIG_NF_CT_NETLINK_MODULE)
889EXPORT_SYMBOL(nf_ct_port_tuple_to_nfattr);
890EXPORT_SYMBOL(nf_ct_port_nfattr_to_tuple);
891#endif
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index cba63729313d..e10512e229b6 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -151,7 +151,7 @@ instance_create(u_int16_t group_num, int pid)
151 goto out_unlock; 151 goto out_unlock;
152 152
153 INIT_HLIST_NODE(&inst->hlist); 153 INIT_HLIST_NODE(&inst->hlist);
154 inst->lock = SPIN_LOCK_UNLOCKED; 154 spin_lock_init(&inst->lock);
155 /* needs to be two, since we _put() after creation */ 155 /* needs to be two, since we _put() after creation */
156 atomic_set(&inst->use, 2); 156 atomic_set(&inst->use, 2);
157 157
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index f28460b61e47..18ed9c5d209c 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -148,7 +148,7 @@ instance_create(u_int16_t queue_num, int pid)
148 atomic_set(&inst->id_sequence, 0); 148 atomic_set(&inst->id_sequence, 0);
149 /* needs to be two, since we _put() after creation */ 149 /* needs to be two, since we _put() after creation */
150 atomic_set(&inst->use, 2); 150 atomic_set(&inst->use, 2);
151 inst->lock = SPIN_LOCK_UNLOCKED; 151 spin_lock_init(&inst->lock);
152 INIT_LIST_HEAD(&inst->queue_list); 152 INIT_LIST_HEAD(&inst->queue_list);
153 153
154 if (!try_module_get(THIS_MODULE)) 154 if (!try_module_get(THIS_MODULE))
@@ -345,6 +345,10 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
345 struct nfqnl_msg_packet_hdr pmsg; 345 struct nfqnl_msg_packet_hdr pmsg;
346 struct nlmsghdr *nlh; 346 struct nlmsghdr *nlh;
347 struct nfgenmsg *nfmsg; 347 struct nfgenmsg *nfmsg;
348 struct nf_info *entinf = entry->info;
349 struct sk_buff *entskb = entry->skb;
350 struct net_device *indev;
351 struct net_device *outdev;
348 unsigned int tmp_uint; 352 unsigned int tmp_uint;
349 353
350 QDEBUG("entered\n"); 354 QDEBUG("entered\n");
@@ -361,6 +365,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
361 + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hw)) 365 + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hw))
362 + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_timestamp)); 366 + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_timestamp));
363 367
368 outdev = entinf->outdev;
369
364 spin_lock_bh(&queue->lock); 370 spin_lock_bh(&queue->lock);
365 371
366 switch (queue->copy_mode) { 372 switch (queue->copy_mode) {
@@ -370,15 +376,15 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
370 break; 376 break;
371 377
372 case NFQNL_COPY_PACKET: 378 case NFQNL_COPY_PACKET:
373 if (entry->skb->ip_summed == CHECKSUM_HW && 379 if (entskb->ip_summed == CHECKSUM_HW &&
374 (*errp = skb_checksum_help(entry->skb, 380 (*errp = skb_checksum_help(entskb,
375 entry->info->outdev == NULL))) { 381 outdev == NULL))) {
376 spin_unlock_bh(&queue->lock); 382 spin_unlock_bh(&queue->lock);
377 return NULL; 383 return NULL;
378 } 384 }
379 if (queue->copy_range == 0 385 if (queue->copy_range == 0
380 || queue->copy_range > entry->skb->len) 386 || queue->copy_range > entskb->len)
381 data_len = entry->skb->len; 387 data_len = entskb->len;
382 else 388 else
383 data_len = queue->copy_range; 389 data_len = queue->copy_range;
384 390
@@ -402,29 +408,30 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
402 NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, 408 NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
403 sizeof(struct nfgenmsg)); 409 sizeof(struct nfgenmsg));
404 nfmsg = NLMSG_DATA(nlh); 410 nfmsg = NLMSG_DATA(nlh);
405 nfmsg->nfgen_family = entry->info->pf; 411 nfmsg->nfgen_family = entinf->pf;
406 nfmsg->version = NFNETLINK_V0; 412 nfmsg->version = NFNETLINK_V0;
407 nfmsg->res_id = htons(queue->queue_num); 413 nfmsg->res_id = htons(queue->queue_num);
408 414
409 pmsg.packet_id = htonl(entry->id); 415 pmsg.packet_id = htonl(entry->id);
410 pmsg.hw_protocol = htons(entry->skb->protocol); 416 pmsg.hw_protocol = htons(entskb->protocol);
411 pmsg.hook = entry->info->hook; 417 pmsg.hook = entinf->hook;
412 418
413 NFA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg); 419 NFA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg);
414 420
415 if (entry->info->indev) { 421 indev = entinf->indev;
416 tmp_uint = htonl(entry->info->indev->ifindex); 422 if (indev) {
423 tmp_uint = htonl(indev->ifindex);
417#ifndef CONFIG_BRIDGE_NETFILTER 424#ifndef CONFIG_BRIDGE_NETFILTER
418 NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); 425 NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint);
419#else 426#else
420 if (entry->info->pf == PF_BRIDGE) { 427 if (entinf->pf == PF_BRIDGE) {
421 /* Case 1: indev is physical input device, we need to 428 /* Case 1: indev is physical input device, we need to
422 * look for bridge group (when called from 429 * look for bridge group (when called from
423 * netfilter_bridge) */ 430 * netfilter_bridge) */
424 NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, sizeof(tmp_uint), 431 NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, sizeof(tmp_uint),
425 &tmp_uint); 432 &tmp_uint);
426 /* this is the bridge group "brX" */ 433 /* this is the bridge group "brX" */
427 tmp_uint = htonl(entry->info->indev->br_port->br->dev->ifindex); 434 tmp_uint = htonl(indev->br_port->br->dev->ifindex);
428 NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), 435 NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint),
429 &tmp_uint); 436 &tmp_uint);
430 } else { 437 } else {
@@ -432,9 +439,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
432 * physical device (when called from ipv4) */ 439 * physical device (when called from ipv4) */
433 NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), 440 NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint),
434 &tmp_uint); 441 &tmp_uint);
435 if (entry->skb->nf_bridge 442 if (entskb->nf_bridge
436 && entry->skb->nf_bridge->physindev) { 443 && entskb->nf_bridge->physindev) {
437 tmp_uint = htonl(entry->skb->nf_bridge->physindev->ifindex); 444 tmp_uint = htonl(entskb->nf_bridge->physindev->ifindex);
438 NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, 445 NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV,
439 sizeof(tmp_uint), &tmp_uint); 446 sizeof(tmp_uint), &tmp_uint);
440 } 447 }
@@ -442,19 +449,19 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
442#endif 449#endif
443 } 450 }
444 451
445 if (entry->info->outdev) { 452 if (outdev) {
446 tmp_uint = htonl(entry->info->outdev->ifindex); 453 tmp_uint = htonl(outdev->ifindex);
447#ifndef CONFIG_BRIDGE_NETFILTER 454#ifndef CONFIG_BRIDGE_NETFILTER
448 NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); 455 NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint);
449#else 456#else
450 if (entry->info->pf == PF_BRIDGE) { 457 if (entinf->pf == PF_BRIDGE) {
451 /* Case 1: outdev is physical output device, we need to 458 /* Case 1: outdev is physical output device, we need to
452 * look for bridge group (when called from 459 * look for bridge group (when called from
453 * netfilter_bridge) */ 460 * netfilter_bridge) */
454 NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint), 461 NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint),
455 &tmp_uint); 462 &tmp_uint);
456 /* this is the bridge group "brX" */ 463 /* this is the bridge group "brX" */
457 tmp_uint = htonl(entry->info->outdev->br_port->br->dev->ifindex); 464 tmp_uint = htonl(outdev->br_port->br->dev->ifindex);
458 NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), 465 NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint),
459 &tmp_uint); 466 &tmp_uint);
460 } else { 467 } else {
@@ -462,9 +469,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
462 * physical output device (when called from ipv4) */ 469 * physical output device (when called from ipv4) */
463 NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), 470 NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint),
464 &tmp_uint); 471 &tmp_uint);
465 if (entry->skb->nf_bridge 472 if (entskb->nf_bridge
466 && entry->skb->nf_bridge->physoutdev) { 473 && entskb->nf_bridge->physoutdev) {
467 tmp_uint = htonl(entry->skb->nf_bridge->physoutdev->ifindex); 474 tmp_uint = htonl(entskb->nf_bridge->physoutdev->ifindex);
468 NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, 475 NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV,
469 sizeof(tmp_uint), &tmp_uint); 476 sizeof(tmp_uint), &tmp_uint);
470 } 477 }
@@ -472,27 +479,27 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
472#endif 479#endif
473 } 480 }
474 481
475 if (entry->skb->nfmark) { 482 if (entskb->nfmark) {
476 tmp_uint = htonl(entry->skb->nfmark); 483 tmp_uint = htonl(entskb->nfmark);
477 NFA_PUT(skb, NFQA_MARK, sizeof(u_int32_t), &tmp_uint); 484 NFA_PUT(skb, NFQA_MARK, sizeof(u_int32_t), &tmp_uint);
478 } 485 }
479 486
480 if (entry->info->indev && entry->skb->dev 487 if (indev && entskb->dev
481 && entry->skb->dev->hard_header_parse) { 488 && entskb->dev->hard_header_parse) {
482 struct nfqnl_msg_packet_hw phw; 489 struct nfqnl_msg_packet_hw phw;
483 490
484 phw.hw_addrlen = 491 phw.hw_addrlen =
485 entry->skb->dev->hard_header_parse(entry->skb, 492 entskb->dev->hard_header_parse(entskb,
486 phw.hw_addr); 493 phw.hw_addr);
487 phw.hw_addrlen = htons(phw.hw_addrlen); 494 phw.hw_addrlen = htons(phw.hw_addrlen);
488 NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw); 495 NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw);
489 } 496 }
490 497
491 if (entry->skb->tstamp.off_sec) { 498 if (entskb->tstamp.off_sec) {
492 struct nfqnl_msg_packet_timestamp ts; 499 struct nfqnl_msg_packet_timestamp ts;
493 500
494 ts.sec = cpu_to_be64(entry->skb->tstamp.off_sec); 501 ts.sec = cpu_to_be64(entskb->tstamp.off_sec);
495 ts.usec = cpu_to_be64(entry->skb->tstamp.off_usec); 502 ts.usec = cpu_to_be64(entskb->tstamp.off_usec);
496 503
497 NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); 504 NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts);
498 } 505 }
@@ -510,7 +517,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
510 nfa->nfa_type = NFQA_PAYLOAD; 517 nfa->nfa_type = NFQA_PAYLOAD;
511 nfa->nfa_len = size; 518 nfa->nfa_len = size;
512 519
513 if (skb_copy_bits(entry->skb, 0, NFA_DATA(nfa), data_len)) 520 if (skb_copy_bits(entskb, 0, NFA_DATA(nfa), data_len))
514 BUG(); 521 BUG();
515 } 522 }
516 523
@@ -667,12 +674,14 @@ nfqnl_set_mode(struct nfqnl_instance *queue,
667static int 674static int
668dev_cmp(struct nfqnl_queue_entry *entry, unsigned long ifindex) 675dev_cmp(struct nfqnl_queue_entry *entry, unsigned long ifindex)
669{ 676{
670 if (entry->info->indev) 677 struct nf_info *entinf = entry->info;
671 if (entry->info->indev->ifindex == ifindex) 678
679 if (entinf->indev)
680 if (entinf->indev->ifindex == ifindex)
672 return 1; 681 return 1;
673 682
674 if (entry->info->outdev) 683 if (entinf->outdev)
675 if (entry->info->outdev->ifindex == ifindex) 684 if (entinf->outdev->ifindex == ifindex)
676 return 1; 685 return 1;
677 686
678 return 0; 687 return 0;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 96020d7087e8..2101b45d2ec6 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -24,6 +24,7 @@
24#include <linux/config.h> 24#include <linux/config.h>
25#include <linux/module.h> 25#include <linux/module.h>
26 26
27#include <linux/capability.h>
27#include <linux/kernel.h> 28#include <linux/kernel.h>
28#include <linux/init.h> 29#include <linux/init.h>
29#include <linux/signal.h> 30#include <linux/signal.h>
@@ -293,7 +294,7 @@ static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len)
293 return 0; 294 return 0;
294} 295}
295 296
296static struct proto_ops netlink_ops; 297static const struct proto_ops netlink_ops;
297 298
298static int netlink_insert(struct sock *sk, u32 pid) 299static int netlink_insert(struct sock *sk, u32 pid)
299{ 300{
@@ -402,7 +403,7 @@ static int netlink_create(struct socket *sock, int protocol)
402 groups = nl_table[protocol].groups; 403 groups = nl_table[protocol].groups;
403 netlink_unlock_table(); 404 netlink_unlock_table();
404 405
405 if ((err = __netlink_create(sock, protocol) < 0)) 406 if ((err = __netlink_create(sock, protocol)) < 0)
406 goto out_module; 407 goto out_module;
407 408
408 nlk = nlk_sk(sock->sk); 409 nlk = nlk_sk(sock->sk);
@@ -1422,7 +1423,7 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1422 while (skb->len >= nlmsg_total_size(0)) { 1423 while (skb->len >= nlmsg_total_size(0)) {
1423 nlh = (struct nlmsghdr *) skb->data; 1424 nlh = (struct nlmsghdr *) skb->data;
1424 1425
1425 if (skb->len < nlh->nlmsg_len) 1426 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
1426 return 0; 1427 return 0;
1427 1428
1428 total_len = min(NLMSG_ALIGN(nlh->nlmsg_len), skb->len); 1429 total_len = min(NLMSG_ALIGN(nlh->nlmsg_len), skb->len);
@@ -1656,7 +1657,7 @@ int netlink_unregister_notifier(struct notifier_block *nb)
1656 return notifier_chain_unregister(&netlink_chain, nb); 1657 return notifier_chain_unregister(&netlink_chain, nb);
1657} 1658}
1658 1659
1659static struct proto_ops netlink_ops = { 1660static const struct proto_ops netlink_ops = {
1660 .family = PF_NETLINK, 1661 .family = PF_NETLINK,
1661 .owner = THIS_MODULE, 1662 .owner = THIS_MODULE,
1662 .release = netlink_release, 1663 .release = netlink_release,
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 287cfcc56951..3b1378498d50 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -441,7 +441,7 @@ errout:
441} 441}
442 442
443static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid, 443static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid,
444 int seq, int cmd) 444 int seq, u8 cmd)
445{ 445{
446 struct sk_buff *skb; 446 struct sk_buff *skb;
447 int err; 447 int err;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index e5d82d711cae..d44981f5a619 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -11,6 +11,7 @@
11#include <linux/config.h> 11#include <linux/config.h>
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/moduleparam.h> 13#include <linux/moduleparam.h>
14#include <linux/capability.h>
14#include <linux/errno.h> 15#include <linux/errno.h>
15#include <linux/types.h> 16#include <linux/types.h>
16#include <linux/socket.h> 17#include <linux/socket.h>
@@ -63,7 +64,7 @@ static unsigned short circuit = 0x101;
63static HLIST_HEAD(nr_list); 64static HLIST_HEAD(nr_list);
64static DEFINE_SPINLOCK(nr_list_lock); 65static DEFINE_SPINLOCK(nr_list_lock);
65 66
66static struct proto_ops nr_proto_ops; 67static const struct proto_ops nr_proto_ops;
67 68
68/* 69/*
69 * Socket removal during an interrupt is now safe. 70 * Socket removal during an interrupt is now safe.
@@ -1166,10 +1167,11 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1166 void __user *argp = (void __user *)arg; 1167 void __user *argp = (void __user *)arg;
1167 int ret; 1168 int ret;
1168 1169
1169 lock_sock(sk);
1170 switch (cmd) { 1170 switch (cmd) {
1171 case TIOCOUTQ: { 1171 case TIOCOUTQ: {
1172 long amount; 1172 long amount;
1173
1174 lock_sock(sk);
1173 amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); 1175 amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
1174 if (amount < 0) 1176 if (amount < 0)
1175 amount = 0; 1177 amount = 0;
@@ -1180,6 +1182,8 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1180 case TIOCINQ: { 1182 case TIOCINQ: {
1181 struct sk_buff *skb; 1183 struct sk_buff *skb;
1182 long amount = 0L; 1184 long amount = 0L;
1185
1186 lock_sock(sk);
1183 /* These two are safe on a single CPU system as only user tasks fiddle here */ 1187 /* These two are safe on a single CPU system as only user tasks fiddle here */
1184 if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) 1188 if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL)
1185 amount = skb->len; 1189 amount = skb->len;
@@ -1188,6 +1192,7 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1188 } 1192 }
1189 1193
1190 case SIOCGSTAMP: 1194 case SIOCGSTAMP:
1195 lock_sock(sk);
1191 ret = sock_get_timestamp(sk, argp); 1196 ret = sock_get_timestamp(sk, argp);
1192 release_sock(sk); 1197 release_sock(sk);
1193 return ret; 1198 return ret;
@@ -1202,21 +1207,17 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1202 case SIOCSIFNETMASK: 1207 case SIOCSIFNETMASK:
1203 case SIOCGIFMETRIC: 1208 case SIOCGIFMETRIC:
1204 case SIOCSIFMETRIC: 1209 case SIOCSIFMETRIC:
1205 release_sock(sk);
1206 return -EINVAL; 1210 return -EINVAL;
1207 1211
1208 case SIOCADDRT: 1212 case SIOCADDRT:
1209 case SIOCDELRT: 1213 case SIOCDELRT:
1210 case SIOCNRDECOBS: 1214 case SIOCNRDECOBS:
1211 release_sock(sk);
1212 if (!capable(CAP_NET_ADMIN)) return -EPERM; 1215 if (!capable(CAP_NET_ADMIN)) return -EPERM;
1213 return nr_rt_ioctl(cmd, argp); 1216 return nr_rt_ioctl(cmd, argp);
1214 1217
1215 default: 1218 default:
1216 release_sock(sk); 1219 return -ENOIOCTLCMD;
1217 return dev_ioctl(cmd, argp);
1218 } 1220 }
1219 release_sock(sk);
1220 1221
1221 return 0; 1222 return 0;
1222} 1223}
@@ -1337,7 +1338,7 @@ static struct net_proto_family nr_family_ops = {
1337 .owner = THIS_MODULE, 1338 .owner = THIS_MODULE,
1338}; 1339};
1339 1340
1340static struct proto_ops nr_proto_ops = { 1341static const struct proto_ops nr_proto_ops = {
1341 .family = PF_NETROM, 1342 .family = PF_NETROM,
1342 .owner = THIS_MODULE, 1343 .owner = THIS_MODULE,
1343 .release = nr_release, 1344 .release = nr_release,
diff --git a/net/nonet.c b/net/nonet.c
index e5241dceaa57..1230f0ae832e 100644
--- a/net/nonet.c
+++ b/net/nonet.c
@@ -14,11 +14,6 @@
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16 16
17void __init sock_init(void)
18{
19 printk(KERN_INFO "Linux NoNET1.0 for Linux 2.6\n");
20}
21
22static int sock_no_open(struct inode *irrelevant, struct file *dontcare) 17static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
23{ 18{
24 return -ENXIO; 19 return -ENXIO;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3e2462760413..ee93abc71cb8 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -53,6 +53,7 @@
53#include <linux/types.h> 53#include <linux/types.h>
54#include <linux/sched.h> 54#include <linux/sched.h>
55#include <linux/mm.h> 55#include <linux/mm.h>
56#include <linux/capability.h>
56#include <linux/fcntl.h> 57#include <linux/fcntl.h>
57#include <linux/socket.h> 58#include <linux/socket.h>
58#include <linux/in.h> 59#include <linux/in.h>
@@ -251,10 +252,10 @@ static void packet_sock_destruct(struct sock *sk)
251} 252}
252 253
253 254
254static struct proto_ops packet_ops; 255static const struct proto_ops packet_ops;
255 256
256#ifdef CONFIG_SOCK_PACKET 257#ifdef CONFIG_SOCK_PACKET
257static struct proto_ops packet_ops_spkt; 258static const struct proto_ops packet_ops_spkt;
258 259
259static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) 260static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
260{ 261{
@@ -1237,7 +1238,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1237 goto done; 1238 goto done;
1238 1239
1239 err = -ENOBUFS; 1240 err = -ENOBUFS;
1240 i = (struct packet_mclist *)kmalloc(sizeof(*i), GFP_KERNEL); 1241 i = kmalloc(sizeof(*i), GFP_KERNEL);
1241 if (i == NULL) 1242 if (i == NULL)
1242 goto done; 1243 goto done;
1243 1244
@@ -1521,7 +1522,7 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
1521#endif 1522#endif
1522 1523
1523 default: 1524 default:
1524 return dev_ioctl(cmd, (void __user *)arg); 1525 return -ENOIOCTLCMD;
1525 } 1526 }
1526 return 0; 1527 return 0;
1527} 1528}
@@ -1784,7 +1785,7 @@ out:
1784 1785
1785 1786
1786#ifdef CONFIG_SOCK_PACKET 1787#ifdef CONFIG_SOCK_PACKET
1787static struct proto_ops packet_ops_spkt = { 1788static const struct proto_ops packet_ops_spkt = {
1788 .family = PF_PACKET, 1789 .family = PF_PACKET,
1789 .owner = THIS_MODULE, 1790 .owner = THIS_MODULE,
1790 .release = packet_release, 1791 .release = packet_release,
@@ -1806,7 +1807,7 @@ static struct proto_ops packet_ops_spkt = {
1806}; 1807};
1807#endif 1808#endif
1808 1809
1809static struct proto_ops packet_ops = { 1810static const struct proto_ops packet_ops = {
1810 .family = PF_PACKET, 1811 .family = PF_PACKET,
1811 .owner = THIS_MODULE, 1812 .owner = THIS_MODULE,
1812 .release = packet_release, 1813 .release = packet_release,
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 829fdbc4400b..ea65396d1619 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -9,7 +9,9 @@
9 * Copyright (C) Terry Dawson VK2KTJ (terry@animats.net) 9 * Copyright (C) Terry Dawson VK2KTJ (terry@animats.net)
10 * Copyright (C) Tomi Manninen OH2BNS (oh2bns@sral.fi) 10 * Copyright (C) Tomi Manninen OH2BNS (oh2bns@sral.fi)
11 */ 11 */
12
12#include <linux/config.h> 13#include <linux/config.h>
14#include <linux/capability.h>
13#include <linux/module.h> 15#include <linux/module.h>
14#include <linux/moduleparam.h> 16#include <linux/moduleparam.h>
15#include <linux/init.h> 17#include <linux/init.h>
@@ -1320,7 +1322,7 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1320 return 0; 1322 return 0;
1321 1323
1322 default: 1324 default:
1323 return dev_ioctl(cmd, argp); 1325 return -ENOIOCTLCMD;
1324 } 1326 }
1325 1327
1326 return 0; 1328 return 0;
diff --git a/net/rxrpc/connection.c b/net/rxrpc/connection.c
index 2ba14a75dbbe..0e0a4553499f 100644
--- a/net/rxrpc/connection.c
+++ b/net/rxrpc/connection.c
@@ -220,6 +220,7 @@ int rxrpc_connection_lookup(struct rxrpc_peer *peer,
220{ 220{
221 struct rxrpc_connection *conn, *candidate = NULL; 221 struct rxrpc_connection *conn, *candidate = NULL;
222 struct list_head *_p; 222 struct list_head *_p;
223 struct sk_buff *pkt = msg->pkt;
223 int ret, fresh = 0; 224 int ret, fresh = 0;
224 __be32 x_epoch, x_connid; 225 __be32 x_epoch, x_connid;
225 __be16 x_port, x_servid; 226 __be16 x_port, x_servid;
@@ -229,10 +230,10 @@ int rxrpc_connection_lookup(struct rxrpc_peer *peer,
229 _enter("%p{{%hu}},%u,%hu", 230 _enter("%p{{%hu}},%u,%hu",
230 peer, 231 peer,
231 peer->trans->port, 232 peer->trans->port,
232 ntohs(msg->pkt->h.uh->source), 233 ntohs(pkt->h.uh->source),
233 ntohs(msg->hdr.serviceId)); 234 ntohs(msg->hdr.serviceId));
234 235
235 x_port = msg->pkt->h.uh->source; 236 x_port = pkt->h.uh->source;
236 x_epoch = msg->hdr.epoch; 237 x_epoch = msg->hdr.epoch;
237 x_clflag = msg->hdr.flags & RXRPC_CLIENT_INITIATED; 238 x_clflag = msg->hdr.flags & RXRPC_CLIENT_INITIATED;
238 x_connid = htonl(ntohl(msg->hdr.cid) & RXRPC_CIDMASK); 239 x_connid = htonl(ntohl(msg->hdr.cid) & RXRPC_CIDMASK);
@@ -267,7 +268,7 @@ int rxrpc_connection_lookup(struct rxrpc_peer *peer,
267 /* fill in the specifics */ 268 /* fill in the specifics */
268 candidate->addr.sin_family = AF_INET; 269 candidate->addr.sin_family = AF_INET;
269 candidate->addr.sin_port = x_port; 270 candidate->addr.sin_port = x_port;
270 candidate->addr.sin_addr.s_addr = msg->pkt->nh.iph->saddr; 271 candidate->addr.sin_addr.s_addr = pkt->nh.iph->saddr;
271 candidate->in_epoch = x_epoch; 272 candidate->in_epoch = x_epoch;
272 candidate->out_epoch = x_epoch; 273 candidate->out_epoch = x_epoch;
273 candidate->in_clientflag = RXRPC_CLIENT_INITIATED; 274 candidate->in_clientflag = RXRPC_CLIENT_INITIATED;
@@ -675,6 +676,7 @@ int rxrpc_conn_receive_call_packet(struct rxrpc_connection *conn,
675 struct rxrpc_message *msg) 676 struct rxrpc_message *msg)
676{ 677{
677 struct rxrpc_message *pmsg; 678 struct rxrpc_message *pmsg;
679 struct dst_entry *dst;
678 struct list_head *_p; 680 struct list_head *_p;
679 unsigned cix, seq; 681 unsigned cix, seq;
680 int ret = 0; 682 int ret = 0;
@@ -710,10 +712,10 @@ int rxrpc_conn_receive_call_packet(struct rxrpc_connection *conn,
710 712
711 call->pkt_rcv_count++; 713 call->pkt_rcv_count++;
712 714
713 if (msg->pkt->dst && msg->pkt->dst->dev) 715 dst = msg->pkt->dst;
716 if (dst && dst->dev)
714 conn->peer->if_mtu = 717 conn->peer->if_mtu =
715 msg->pkt->dst->dev->mtu - 718 dst->dev->mtu - dst->dev->hard_header_len;
716 msg->pkt->dst->dev->hard_header_len;
717 719
718 /* queue on the call in seq order */ 720 /* queue on the call in seq order */
719 rxrpc_get_message(msg); 721 rxrpc_get_message(msg);
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 55cd5327fbd7..8a260d43ceef 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -411,7 +411,7 @@ config NET_EMATCH_META
411 tristate "Metadata" 411 tristate "Metadata"
412 depends on NET_EMATCH 412 depends on NET_EMATCH
413 ---help--- 413 ---help---
414 Say Y here if you want to be ablt to classify packets based on 414 Say Y here if you want to be able to classify packets based on
415 metadata such as load average, netfilter attributes, socket 415 metadata such as load average, netfilter attributes, socket
416 attributes and routing decisions. 416 attributes and routing decisions.
417 417
diff --git a/net/sched/Makefile b/net/sched/Makefile
index e48d0d456b3e..0f06aec66094 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -7,13 +7,13 @@ obj-y := sch_generic.o
7obj-$(CONFIG_NET_SCHED) += sch_api.o sch_fifo.o sch_blackhole.o 7obj-$(CONFIG_NET_SCHED) += sch_api.o sch_fifo.o sch_blackhole.o
8obj-$(CONFIG_NET_CLS) += cls_api.o 8obj-$(CONFIG_NET_CLS) += cls_api.o
9obj-$(CONFIG_NET_CLS_ACT) += act_api.o 9obj-$(CONFIG_NET_CLS_ACT) += act_api.o
10obj-$(CONFIG_NET_ACT_POLICE) += police.o 10obj-$(CONFIG_NET_ACT_POLICE) += act_police.o
11obj-$(CONFIG_NET_CLS_POLICE) += police.o 11obj-$(CONFIG_NET_CLS_POLICE) += act_police.o
12obj-$(CONFIG_NET_ACT_GACT) += gact.o 12obj-$(CONFIG_NET_ACT_GACT) += act_gact.o
13obj-$(CONFIG_NET_ACT_MIRRED) += mirred.o 13obj-$(CONFIG_NET_ACT_MIRRED) += act_mirred.o
14obj-$(CONFIG_NET_ACT_IPT) += ipt.o 14obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o
15obj-$(CONFIG_NET_ACT_PEDIT) += pedit.o 15obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o
16obj-$(CONFIG_NET_ACT_SIMP) += simple.o 16obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o
17obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o 17obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
18obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o 18obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
19obj-$(CONFIG_NET_SCH_HPFQ) += sch_hpfq.o 19obj-$(CONFIG_NET_SCH_HPFQ) += sch_hpfq.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 2ce1cb2aa2ed..792ce59940ec 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -165,7 +165,7 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action *act,
165 while ((a = act) != NULL) { 165 while ((a = act) != NULL) {
166repeat: 166repeat:
167 if (a->ops && a->ops->act) { 167 if (a->ops && a->ops->act) {
168 ret = a->ops->act(&skb, a, res); 168 ret = a->ops->act(skb, a, res);
169 if (TC_MUNGED & skb->tc_verd) { 169 if (TC_MUNGED & skb->tc_verd) {
170 /* copied already, allow trampling */ 170 /* copied already, allow trampling */
171 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); 171 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
@@ -290,7 +290,7 @@ struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est,
290 if (a_o == NULL) { 290 if (a_o == NULL) {
291#ifdef CONFIG_KMOD 291#ifdef CONFIG_KMOD
292 rtnl_unlock(); 292 rtnl_unlock();
293 request_module(act_name); 293 request_module("act_%s", act_name);
294 rtnl_lock(); 294 rtnl_lock();
295 295
296 a_o = tc_lookup_action_n(act_name); 296 a_o = tc_lookup_action_n(act_name);
diff --git a/net/sched/gact.c b/net/sched/act_gact.c
index d1c6d542912a..a1e68f78dcc2 100644
--- a/net/sched/gact.c
+++ b/net/sched/act_gact.c
@@ -135,10 +135,9 @@ tcf_gact_cleanup(struct tc_action *a, int bind)
135} 135}
136 136
137static int 137static int
138tcf_gact(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) 138tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
139{ 139{
140 struct tcf_gact *p = PRIV(a, gact); 140 struct tcf_gact *p = PRIV(a, gact);
141 struct sk_buff *skb = *pskb;
142 int action = TC_ACT_SHOT; 141 int action = TC_ACT_SHOT;
143 142
144 spin_lock(&p->lock); 143 spin_lock(&p->lock);
diff --git a/net/sched/ipt.c b/net/sched/act_ipt.c
index f50136eed211..b5001939b74b 100644
--- a/net/sched/ipt.c
+++ b/net/sched/act_ipt.c
@@ -201,11 +201,10 @@ tcf_ipt_cleanup(struct tc_action *a, int bind)
201} 201}
202 202
203static int 203static int
204tcf_ipt(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) 204tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
205{ 205{
206 int ret = 0, result = 0; 206 int ret = 0, result = 0;
207 struct tcf_ipt *p = PRIV(a, ipt); 207 struct tcf_ipt *p = PRIV(a, ipt);
208 struct sk_buff *skb = *pskb;
209 208
210 if (skb_cloned(skb)) { 209 if (skb_cloned(skb)) {
211 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 210 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
@@ -222,6 +221,9 @@ tcf_ipt(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res)
222 worry later - danger - this API seems to have changed 221 worry later - danger - this API seems to have changed
223 from earlier kernels */ 222 from earlier kernels */
224 223
224 /* iptables targets take a double skb pointer in case the skb
225 * needs to be replaced. We don't own the skb, so this must not
226 * happen. The pskb_expand_head above should make sure of this */
225 ret = p->t->u.kernel.target->target(&skb, skb->dev, NULL, 227 ret = p->t->u.kernel.target->target(&skb, skb->dev, NULL,
226 p->hook, p->t->data, NULL); 228 p->hook, p->t->data, NULL);
227 switch (ret) { 229 switch (ret) {
diff --git a/net/sched/mirred.c b/net/sched/act_mirred.c
index 20d06916dc0b..4fcccbd50885 100644
--- a/net/sched/mirred.c
+++ b/net/sched/act_mirred.c
@@ -158,12 +158,11 @@ tcf_mirred_cleanup(struct tc_action *a, int bind)
158} 158}
159 159
160static int 160static int
161tcf_mirred(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) 161tcf_mirred(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
162{ 162{
163 struct tcf_mirred *p = PRIV(a, mirred); 163 struct tcf_mirred *p = PRIV(a, mirred);
164 struct net_device *dev; 164 struct net_device *dev;
165 struct sk_buff *skb2 = NULL; 165 struct sk_buff *skb2 = NULL;
166 struct sk_buff *skb = *pskb;
167 u32 at = G_TC_AT(skb->tc_verd); 166 u32 at = G_TC_AT(skb->tc_verd);
168 167
169 spin_lock(&p->lock); 168 spin_lock(&p->lock);
diff --git a/net/sched/pedit.c b/net/sched/act_pedit.c
index 767d24f4610e..1742a68e0122 100644
--- a/net/sched/pedit.c
+++ b/net/sched/act_pedit.c
@@ -130,10 +130,9 @@ tcf_pedit_cleanup(struct tc_action *a, int bind)
130} 130}
131 131
132static int 132static int
133tcf_pedit(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) 133tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
134{ 134{
135 struct tcf_pedit *p = PRIV(a, pedit); 135 struct tcf_pedit *p = PRIV(a, pedit);
136 struct sk_buff *skb = *pskb;
137 int i, munged = 0; 136 int i, munged = 0;
138 u8 *pptr; 137 u8 *pptr;
139 138
@@ -246,10 +245,12 @@ tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,int bind, int ref)
246 t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); 245 t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
247 t.expires = jiffies_to_clock_t(p->tm.expires); 246 t.expires = jiffies_to_clock_t(p->tm.expires);
248 RTA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t); 247 RTA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t);
248 kfree(opt);
249 return skb->len; 249 return skb->len;
250 250
251rtattr_failure: 251rtattr_failure:
252 skb_trim(skb, b - skb->data); 252 skb_trim(skb, b - skb->data);
253 kfree(opt);
253 return -1; 254 return -1;
254} 255}
255 256
diff --git a/net/sched/police.c b/net/sched/act_police.c
index eb39fb2f39b6..fa877f8f652c 100644
--- a/net/sched/police.c
+++ b/net/sched/act_police.c
@@ -284,11 +284,10 @@ static int tcf_act_police_cleanup(struct tc_action *a, int bind)
284 return 0; 284 return 0;
285} 285}
286 286
287static int tcf_act_police(struct sk_buff **pskb, struct tc_action *a, 287static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
288 struct tcf_result *res) 288 struct tcf_result *res)
289{ 289{
290 psched_time_t now; 290 psched_time_t now;
291 struct sk_buff *skb = *pskb;
292 struct tcf_police *p = PRIV(a); 291 struct tcf_police *p = PRIV(a);
293 long toks; 292 long toks;
294 long ptoks = 0; 293 long ptoks = 0;
@@ -408,7 +407,7 @@ police_cleanup_module(void)
408module_init(police_init_module); 407module_init(police_init_module);
409module_exit(police_cleanup_module); 408module_exit(police_cleanup_module);
410 409
411#endif 410#else /* CONFIG_NET_CLS_ACT */
412 411
413struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est) 412struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
414{ 413{
@@ -545,6 +544,7 @@ int tcf_police(struct sk_buff *skb, struct tcf_police *p)
545 spin_unlock(&p->lock); 544 spin_unlock(&p->lock);
546 return p->action; 545 return p->action;
547} 546}
547EXPORT_SYMBOL(tcf_police);
548 548
549int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p) 549int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p)
550{ 550{
@@ -601,13 +601,4 @@ errout:
601 return -1; 601 return -1;
602} 602}
603 603
604 604#endif /* CONFIG_NET_CLS_ACT */
605EXPORT_SYMBOL(tcf_police);
606EXPORT_SYMBOL(tcf_police_destroy);
607EXPORT_SYMBOL(tcf_police_dump);
608EXPORT_SYMBOL(tcf_police_dump_stats);
609EXPORT_SYMBOL(tcf_police_hash);
610EXPORT_SYMBOL(tcf_police_ht);
611EXPORT_SYMBOL(tcf_police_locate);
612EXPORT_SYMBOL(tcf_police_lookup);
613EXPORT_SYMBOL(tcf_police_new_index);
diff --git a/net/sched/simple.c b/net/sched/act_simple.c
index 8a6ae4f491e8..e5f2e1f431e2 100644
--- a/net/sched/simple.c
+++ b/net/sched/act_simple.c
@@ -44,9 +44,8 @@ static DEFINE_RWLOCK(simp_lock);
44#include <net/pkt_act.h> 44#include <net/pkt_act.h>
45#include <net/act_generic.h> 45#include <net/act_generic.h>
46 46
47static int tcf_simp(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) 47static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
48{ 48{
49 struct sk_buff *skb = *pskb;
50 struct tcf_defact *p = PRIV(a, defact); 49 struct tcf_defact *p = PRIV(a, defact);
51 50
52 spin_lock(&p->lock); 51 spin_lock(&p->lock);
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 64b047c65568..5cb956b721e8 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -92,7 +92,6 @@
92#include <linux/rtnetlink.h> 92#include <linux/rtnetlink.h>
93#include <linux/skbuff.h> 93#include <linux/skbuff.h>
94#include <net/pkt_cls.h> 94#include <net/pkt_cls.h>
95#include <config/net/ematch/stack.h>
96 95
97static LIST_HEAD(ematch_ops); 96static LIST_HEAD(ematch_ops);
98static DEFINE_RWLOCK(ematch_mod_lock); 97static DEFINE_RWLOCK(ematch_mod_lock);
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 09453f997d8c..6cd81708bf71 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -257,7 +257,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
257 (cl = cbq_class_lookup(q, prio)) != NULL) 257 (cl = cbq_class_lookup(q, prio)) != NULL)
258 return cl; 258 return cl;
259 259
260 *qerr = NET_XMIT_DROP; 260 *qerr = NET_XMIT_BYPASS;
261 for (;;) { 261 for (;;) {
262 int result = 0; 262 int result = 0;
263 defmap = head->defaults; 263 defmap = head->defaults;
@@ -413,7 +413,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
413 q->rx_class = cl; 413 q->rx_class = cl;
414#endif 414#endif
415 if (cl == NULL) { 415 if (cl == NULL) {
416 if (ret == NET_XMIT_DROP) 416 if (ret == NET_XMIT_BYPASS)
417 sch->qstats.drops++; 417 sch->qstats.drops++;
418 kfree_skb(skb); 418 kfree_skb(skb);
419 return ret; 419 return ret;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index c26764bc4103..91132f6871d7 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -208,7 +208,7 @@ struct hfsc_sched
208do { \ 208do { \
209 struct timeval tv; \ 209 struct timeval tv; \
210 do_gettimeofday(&tv); \ 210 do_gettimeofday(&tv); \
211 (stamp) = 1000000ULL * tv.tv_sec + tv.tv_usec; \ 211 (stamp) = 1ULL * USEC_PER_SEC * tv.tv_sec + tv.tv_usec; \
212} while (0) 212} while (0)
213#endif 213#endif
214 214
@@ -502,8 +502,8 @@ d2dx(u32 d)
502 u64 dx; 502 u64 dx;
503 503
504 dx = ((u64)d * PSCHED_JIFFIE2US(HZ)); 504 dx = ((u64)d * PSCHED_JIFFIE2US(HZ));
505 dx += 1000000 - 1; 505 dx += USEC_PER_SEC - 1;
506 do_div(dx, 1000000); 506 do_div(dx, USEC_PER_SEC);
507 return dx; 507 return dx;
508} 508}
509 509
@@ -523,7 +523,7 @@ dx2d(u64 dx)
523{ 523{
524 u64 d; 524 u64 d;
525 525
526 d = dx * 1000000; 526 d = dx * USEC_PER_SEC;
527 do_div(d, PSCHED_JIFFIE2US(HZ)); 527 do_div(d, PSCHED_JIFFIE2US(HZ));
528 return (u32)d; 528 return (u32)d;
529} 529}
@@ -1227,7 +1227,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
1227 if (cl->level == 0) 1227 if (cl->level == 0)
1228 return cl; 1228 return cl;
1229 1229
1230 *qerr = NET_XMIT_DROP; 1230 *qerr = NET_XMIT_BYPASS;
1231 tcf = q->root.filter_list; 1231 tcf = q->root.filter_list;
1232 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) { 1232 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
1233#ifdef CONFIG_NET_CLS_ACT 1233#ifdef CONFIG_NET_CLS_ACT
@@ -1643,7 +1643,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
1643 1643
1644 cl = hfsc_classify(skb, sch, &err); 1644 cl = hfsc_classify(skb, sch, &err);
1645 if (cl == NULL) { 1645 if (cl == NULL) {
1646 if (err == NET_XMIT_DROP) 1646 if (err == NET_XMIT_BYPASS)
1647 sch->qstats.drops++; 1647 sch->qstats.drops++;
1648 kfree_skb(skb); 1648 kfree_skb(skb);
1649 return err; 1649 return err;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 558cc087e602..3ec95df4a85e 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -321,7 +321,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in
321 if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0) 321 if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0)
322 return cl; 322 return cl;
323 323
324 *qerr = NET_XMIT_DROP; 324 *qerr = NET_XMIT_BYPASS;
325 tcf = q->filter_list; 325 tcf = q->filter_list;
326 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) { 326 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
327#ifdef CONFIG_NET_CLS_ACT 327#ifdef CONFIG_NET_CLS_ACT
@@ -724,7 +724,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
724 } 724 }
725#ifdef CONFIG_NET_CLS_ACT 725#ifdef CONFIG_NET_CLS_ACT
726 } else if (!cl) { 726 } else if (!cl) {
727 if (ret == NET_XMIT_DROP) 727 if (ret == NET_XMIT_BYPASS)
728 sch->qstats.drops++; 728 sch->qstats.drops++;
729 kfree_skb (skb); 729 kfree_skb (skb);
730 return ret; 730 return ret;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 82fb07aa06a5..ba5283204837 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -25,7 +25,7 @@
25 25
26#include <net/pkt_sched.h> 26#include <net/pkt_sched.h>
27 27
28#define VERSION "1.1" 28#define VERSION "1.2"
29 29
30/* Network Emulation Queuing algorithm. 30/* Network Emulation Queuing algorithm.
31 ==================================== 31 ====================================
@@ -65,11 +65,12 @@ struct netem_sched_data {
65 u32 jitter; 65 u32 jitter;
66 u32 duplicate; 66 u32 duplicate;
67 u32 reorder; 67 u32 reorder;
68 u32 corrupt;
68 69
69 struct crndstate { 70 struct crndstate {
70 unsigned long last; 71 unsigned long last;
71 unsigned long rho; 72 unsigned long rho;
72 } delay_cor, loss_cor, dup_cor, reorder_cor; 73 } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
73 74
74 struct disttable { 75 struct disttable {
75 u32 size; 76 u32 size;
@@ -183,6 +184,23 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
183 q->duplicate = dupsave; 184 q->duplicate = dupsave;
184 } 185 }
185 186
187 /*
188 * Randomized packet corruption.
189 * Make copy if needed since we are modifying
190 * If packet is going to be hardware checksummed, then
191 * do it now in software before we mangle it.
192 */
193 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
194 if (!(skb = skb_unshare(skb, GFP_ATOMIC))
195 || (skb->ip_summed == CHECKSUM_HW
196 && skb_checksum_help(skb, 0))) {
197 sch->qstats.drops++;
198 return NET_XMIT_DROP;
199 }
200
201 skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
202 }
203
186 if (q->gap == 0 /* not doing reordering */ 204 if (q->gap == 0 /* not doing reordering */
187 || q->counter < q->gap /* inside last reordering gap */ 205 || q->counter < q->gap /* inside last reordering gap */
188 || q->reorder < get_crandom(&q->reorder_cor)) { 206 || q->reorder < get_crandom(&q->reorder_cor)) {
@@ -382,6 +400,20 @@ static int get_reorder(struct Qdisc *sch, const struct rtattr *attr)
382 return 0; 400 return 0;
383} 401}
384 402
403static int get_corrupt(struct Qdisc *sch, const struct rtattr *attr)
404{
405 struct netem_sched_data *q = qdisc_priv(sch);
406 const struct tc_netem_corrupt *r = RTA_DATA(attr);
407
408 if (RTA_PAYLOAD(attr) != sizeof(*r))
409 return -EINVAL;
410
411 q->corrupt = r->probability;
412 init_crandom(&q->corrupt_cor, r->correlation);
413 return 0;
414}
415
416/* Parse netlink message to set options */
385static int netem_change(struct Qdisc *sch, struct rtattr *opt) 417static int netem_change(struct Qdisc *sch, struct rtattr *opt)
386{ 418{
387 struct netem_sched_data *q = qdisc_priv(sch); 419 struct netem_sched_data *q = qdisc_priv(sch);
@@ -432,13 +464,19 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
432 if (ret) 464 if (ret)
433 return ret; 465 return ret;
434 } 466 }
467
435 if (tb[TCA_NETEM_REORDER-1]) { 468 if (tb[TCA_NETEM_REORDER-1]) {
436 ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]); 469 ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]);
437 if (ret) 470 if (ret)
438 return ret; 471 return ret;
439 } 472 }
440 }
441 473
474 if (tb[TCA_NETEM_CORRUPT-1]) {
475 ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT-1]);
476 if (ret)
477 return ret;
478 }
479 }
442 480
443 return 0; 481 return 0;
444} 482}
@@ -564,6 +602,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
564 struct tc_netem_qopt qopt; 602 struct tc_netem_qopt qopt;
565 struct tc_netem_corr cor; 603 struct tc_netem_corr cor;
566 struct tc_netem_reorder reorder; 604 struct tc_netem_reorder reorder;
605 struct tc_netem_corrupt corrupt;
567 606
568 qopt.latency = q->latency; 607 qopt.latency = q->latency;
569 qopt.jitter = q->jitter; 608 qopt.jitter = q->jitter;
@@ -582,6 +621,10 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
582 reorder.correlation = q->reorder_cor.rho; 621 reorder.correlation = q->reorder_cor.rho;
583 RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder); 622 RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
584 623
624 corrupt.probability = q->corrupt;
625 corrupt.correlation = q->corrupt_cor.rho;
626 RTA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
627
585 rta->rta_len = skb->tail - b; 628 rta->rta_len = skb->tail - b;
586 629
587 return skb->len; 630 return skb->len;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 3ac0f495bad0..5b3a3e48ed92 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -54,7 +54,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
54 u32 band = skb->priority; 54 u32 band = skb->priority;
55 struct tcf_result res; 55 struct tcf_result res;
56 56
57 *qerr = NET_XMIT_DROP; 57 *qerr = NET_XMIT_BYPASS;
58 if (TC_H_MAJ(skb->priority) != sch->handle) { 58 if (TC_H_MAJ(skb->priority) != sch->handle) {
59#ifdef CONFIG_NET_CLS_ACT 59#ifdef CONFIG_NET_CLS_ACT
60 switch (tc_classify(skb, q->filter_list, &res)) { 60 switch (tc_classify(skb, q->filter_list, &res)) {
@@ -91,7 +91,8 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
91 qdisc = prio_classify(skb, sch, &ret); 91 qdisc = prio_classify(skb, sch, &ret);
92#ifdef CONFIG_NET_CLS_ACT 92#ifdef CONFIG_NET_CLS_ACT
93 if (qdisc == NULL) { 93 if (qdisc == NULL) {
94 if (ret == NET_XMIT_DROP) 94
95 if (ret == NET_XMIT_BYPASS)
95 sch->qstats.drops++; 96 sch->qstats.drops++;
96 kfree_skb(skb); 97 kfree_skb(skb);
97 return ret; 98 return ret;
@@ -118,7 +119,7 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
118 qdisc = prio_classify(skb, sch, &ret); 119 qdisc = prio_classify(skb, sch, &ret);
119#ifdef CONFIG_NET_CLS_ACT 120#ifdef CONFIG_NET_CLS_ACT
120 if (qdisc == NULL) { 121 if (qdisc == NULL) {
121 if (ret == NET_XMIT_DROP) 122 if (ret == NET_XMIT_BYPASS)
122 sch->qstats.drops++; 123 sch->qstats.drops++;
123 kfree_skb(skb); 124 kfree_skb(skb);
124 return ret; 125 return ret;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 6cf0342706b5..79b8ef34c6e4 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -22,6 +22,7 @@
22#include <linux/in.h> 22#include <linux/in.h>
23#include <linux/errno.h> 23#include <linux/errno.h>
24#include <linux/interrupt.h> 24#include <linux/interrupt.h>
25#include <linux/if_arp.h>
25#include <linux/if_ether.h> 26#include <linux/if_ether.h>
26#include <linux/inet.h> 27#include <linux/inet.h>
27#include <linux/netdevice.h> 28#include <linux/netdevice.h>
@@ -273,7 +274,7 @@ teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *de
273 274
274static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev) 275static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
275{ 276{
276 struct teql_master *master = (void*)dev->priv; 277 struct teql_master *master = netdev_priv(dev);
277 struct Qdisc *start, *q; 278 struct Qdisc *start, *q;
278 int busy; 279 int busy;
279 int nores; 280 int nores;
@@ -349,7 +350,7 @@ drop:
349static int teql_master_open(struct net_device *dev) 350static int teql_master_open(struct net_device *dev)
350{ 351{
351 struct Qdisc * q; 352 struct Qdisc * q;
352 struct teql_master *m = (void*)dev->priv; 353 struct teql_master *m = netdev_priv(dev);
353 int mtu = 0xFFFE; 354 int mtu = 0xFFFE;
354 unsigned flags = IFF_NOARP|IFF_MULTICAST; 355 unsigned flags = IFF_NOARP|IFF_MULTICAST;
355 356
@@ -396,13 +397,13 @@ static int teql_master_close(struct net_device *dev)
396 397
397static struct net_device_stats *teql_master_stats(struct net_device *dev) 398static struct net_device_stats *teql_master_stats(struct net_device *dev)
398{ 399{
399 struct teql_master *m = (void*)dev->priv; 400 struct teql_master *m = netdev_priv(dev);
400 return &m->stats; 401 return &m->stats;
401} 402}
402 403
403static int teql_master_mtu(struct net_device *dev, int new_mtu) 404static int teql_master_mtu(struct net_device *dev, int new_mtu)
404{ 405{
405 struct teql_master *m = (void*)dev->priv; 406 struct teql_master *m = netdev_priv(dev);
406 struct Qdisc *q; 407 struct Qdisc *q;
407 408
408 if (new_mtu < 68) 409 if (new_mtu < 68)
@@ -422,7 +423,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu)
422 423
423static __init void teql_master_setup(struct net_device *dev) 424static __init void teql_master_setup(struct net_device *dev)
424{ 425{
425 struct teql_master *master = dev->priv; 426 struct teql_master *master = netdev_priv(dev);
426 struct Qdisc_ops *ops = &master->qops; 427 struct Qdisc_ops *ops = &master->qops;
427 428
428 master->dev = dev; 429 master->dev = dev;
@@ -475,7 +476,7 @@ static int __init teql_init(void)
475 break; 476 break;
476 } 477 }
477 478
478 master = dev->priv; 479 master = netdev_priv(dev);
479 480
480 strlcpy(master->qops.id, dev->name, IFNAMSIZ); 481 strlcpy(master->qops.id, dev->name, IFNAMSIZ);
481 err = register_qdisc(&master->qops); 482 err = register_qdisc(&master->qops);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index dec68a604773..9d05e13e92f6 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -110,7 +110,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
110 asoc->cookie_life.tv_sec = sp->assocparams.sasoc_cookie_life / 1000; 110 asoc->cookie_life.tv_sec = sp->assocparams.sasoc_cookie_life / 1000;
111 asoc->cookie_life.tv_usec = (sp->assocparams.sasoc_cookie_life % 1000) 111 asoc->cookie_life.tv_usec = (sp->assocparams.sasoc_cookie_life % 1000)
112 * 1000; 112 * 1000;
113 asoc->pmtu = 0;
114 asoc->frag_point = 0; 113 asoc->frag_point = 0;
115 114
116 /* Set the association max_retrans and RTO values from the 115 /* Set the association max_retrans and RTO values from the
@@ -123,6 +122,25 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
123 122
124 asoc->overall_error_count = 0; 123 asoc->overall_error_count = 0;
125 124
125 /* Initialize the association's heartbeat interval based on the
126 * sock configured value.
127 */
128 asoc->hbinterval = msecs_to_jiffies(sp->hbinterval);
129
130 /* Initialize path max retrans value. */
131 asoc->pathmaxrxt = sp->pathmaxrxt;
132
133 /* Initialize default path MTU. */
134 asoc->pathmtu = sp->pathmtu;
135
136 /* Set association default SACK delay */
137 asoc->sackdelay = msecs_to_jiffies(sp->sackdelay);
138
139 /* Set the association default flags controlling
140 * Heartbeat, SACK delay, and Path MTU Discovery.
141 */
142 asoc->param_flags = sp->param_flags;
143
126 /* Initialize the maximum mumber of new data packets that can be sent 144 /* Initialize the maximum mumber of new data packets that can be sent
127 * in a burst. 145 * in a burst.
128 */ 146 */
@@ -144,8 +162,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
144 = 5 * asoc->rto_max; 162 = 5 * asoc->rto_max;
145 163
146 asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0; 164 asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0;
147 asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = 165 asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay;
148 SCTP_DEFAULT_TIMEOUT_SACK;
149 asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = 166 asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] =
150 sp->autoclose * HZ; 167 sp->autoclose * HZ;
151 168
@@ -540,23 +557,46 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
540 557
541 sctp_transport_set_owner(peer, asoc); 558 sctp_transport_set_owner(peer, asoc);
542 559
560 /* Initialize the peer's heartbeat interval based on the
561 * association configured value.
562 */
563 peer->hbinterval = asoc->hbinterval;
564
565 /* Set the path max_retrans. */
566 peer->pathmaxrxt = asoc->pathmaxrxt;
567
568 /* Initialize the peer's SACK delay timeout based on the
569 * association configured value.
570 */
571 peer->sackdelay = asoc->sackdelay;
572
573 /* Enable/disable heartbeat, SACK delay, and path MTU discovery
574 * based on association setting.
575 */
576 peer->param_flags = asoc->param_flags;
577
543 /* Initialize the pmtu of the transport. */ 578 /* Initialize the pmtu of the transport. */
544 sctp_transport_pmtu(peer); 579 if (peer->param_flags & SPP_PMTUD_ENABLE)
580 sctp_transport_pmtu(peer);
581 else if (asoc->pathmtu)
582 peer->pathmtu = asoc->pathmtu;
583 else
584 peer->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
545 585
546 /* If this is the first transport addr on this association, 586 /* If this is the first transport addr on this association,
547 * initialize the association PMTU to the peer's PMTU. 587 * initialize the association PMTU to the peer's PMTU.
548 * If not and the current association PMTU is higher than the new 588 * If not and the current association PMTU is higher than the new
549 * peer's PMTU, reset the association PMTU to the new peer's PMTU. 589 * peer's PMTU, reset the association PMTU to the new peer's PMTU.
550 */ 590 */
551 if (asoc->pmtu) 591 if (asoc->pathmtu)
552 asoc->pmtu = min_t(int, peer->pmtu, asoc->pmtu); 592 asoc->pathmtu = min_t(int, peer->pathmtu, asoc->pathmtu);
553 else 593 else
554 asoc->pmtu = peer->pmtu; 594 asoc->pathmtu = peer->pathmtu;
555 595
556 SCTP_DEBUG_PRINTK("sctp_assoc_add_peer:association %p PMTU set to " 596 SCTP_DEBUG_PRINTK("sctp_assoc_add_peer:association %p PMTU set to "
557 "%d\n", asoc, asoc->pmtu); 597 "%d\n", asoc, asoc->pathmtu);
558 598
559 asoc->frag_point = sctp_frag_point(sp, asoc->pmtu); 599 asoc->frag_point = sctp_frag_point(sp, asoc->pathmtu);
560 600
561 /* The asoc->peer.port might not be meaningful yet, but 601 /* The asoc->peer.port might not be meaningful yet, but
562 * initialize the packet structure anyway. 602 * initialize the packet structure anyway.
@@ -574,7 +614,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
574 * (for example, implementations MAY use the size of the 614 * (for example, implementations MAY use the size of the
575 * receiver advertised window). 615 * receiver advertised window).
576 */ 616 */
577 peer->cwnd = min(4*asoc->pmtu, max_t(__u32, 2*asoc->pmtu, 4380)); 617 peer->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380));
578 618
579 /* At this point, we may not have the receiver's advertised window, 619 /* At this point, we may not have the receiver's advertised window,
580 * so initialize ssthresh to the default value and it will be set 620 * so initialize ssthresh to the default value and it will be set
@@ -585,17 +625,6 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
585 peer->partial_bytes_acked = 0; 625 peer->partial_bytes_acked = 0;
586 peer->flight_size = 0; 626 peer->flight_size = 0;
587 627
588 /* By default, enable heartbeat for peer address. */
589 peer->hb_allowed = 1;
590
591 /* Initialize the peer's heartbeat interval based on the
592 * sock configured value.
593 */
594 peer->hb_interval = msecs_to_jiffies(sp->paddrparam.spp_hbinterval);
595
596 /* Set the path max_retrans. */
597 peer->max_retrans = sp->paddrparam.spp_pathmaxrxt;
598
599 /* Set the transport's RTO.initial value */ 628 /* Set the transport's RTO.initial value */
600 peer->rto = asoc->rto_initial; 629 peer->rto = asoc->rto_initial;
601 630
@@ -1155,18 +1184,18 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc)
1155 /* Get the lowest pmtu of all the transports. */ 1184 /* Get the lowest pmtu of all the transports. */
1156 list_for_each(pos, &asoc->peer.transport_addr_list) { 1185 list_for_each(pos, &asoc->peer.transport_addr_list) {
1157 t = list_entry(pos, struct sctp_transport, transports); 1186 t = list_entry(pos, struct sctp_transport, transports);
1158 if (!pmtu || (t->pmtu < pmtu)) 1187 if (!pmtu || (t->pathmtu < pmtu))
1159 pmtu = t->pmtu; 1188 pmtu = t->pathmtu;
1160 } 1189 }
1161 1190
1162 if (pmtu) { 1191 if (pmtu) {
1163 struct sctp_sock *sp = sctp_sk(asoc->base.sk); 1192 struct sctp_sock *sp = sctp_sk(asoc->base.sk);
1164 asoc->pmtu = pmtu; 1193 asoc->pathmtu = pmtu;
1165 asoc->frag_point = sctp_frag_point(sp, pmtu); 1194 asoc->frag_point = sctp_frag_point(sp, pmtu);
1166 } 1195 }
1167 1196
1168 SCTP_DEBUG_PRINTK("%s: asoc:%p, pmtu:%d, frag_point:%d\n", 1197 SCTP_DEBUG_PRINTK("%s: asoc:%p, pmtu:%d, frag_point:%d\n",
1169 __FUNCTION__, asoc, asoc->pmtu, asoc->frag_point); 1198 __FUNCTION__, asoc, asoc->pathmtu, asoc->frag_point);
1170} 1199}
1171 1200
1172/* Should we send a SACK to update our peer? */ 1201/* Should we send a SACK to update our peer? */
@@ -1179,7 +1208,7 @@ static inline int sctp_peer_needs_update(struct sctp_association *asoc)
1179 case SCTP_STATE_SHUTDOWN_SENT: 1208 case SCTP_STATE_SHUTDOWN_SENT:
1180 if ((asoc->rwnd > asoc->a_rwnd) && 1209 if ((asoc->rwnd > asoc->a_rwnd) &&
1181 ((asoc->rwnd - asoc->a_rwnd) >= 1210 ((asoc->rwnd - asoc->a_rwnd) >=
1182 min_t(__u32, (asoc->base.sk->sk_rcvbuf >> 1), asoc->pmtu))) 1211 min_t(__u32, (asoc->base.sk->sk_rcvbuf >> 1), asoc->pathmtu)))
1183 return 1; 1212 return 1;
1184 break; 1213 break;
1185 default: 1214 default:
diff --git a/net/sctp/input.c b/net/sctp/input.c
index b24ff2c1aef5..4aa6fc60357c 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -225,6 +225,7 @@ int sctp_rcv(struct sk_buff *skb)
225 225
226 if (!xfrm_policy_check(sk, XFRM_POLICY_IN, skb, family)) 226 if (!xfrm_policy_check(sk, XFRM_POLICY_IN, skb, family))
227 goto discard_release; 227 goto discard_release;
228 nf_reset(skb);
228 229
229 ret = sk_filter(sk, skb, 1); 230 ret = sk_filter(sk, skb, 1);
230 if (ret) 231 if (ret)
@@ -305,18 +306,36 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
305void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, 306void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
306 struct sctp_transport *t, __u32 pmtu) 307 struct sctp_transport *t, __u32 pmtu)
307{ 308{
308 if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { 309 if (sock_owned_by_user(sk) || !t || (t->pathmtu == pmtu))
309 printk(KERN_WARNING "%s: Reported pmtu %d too low, " 310 return;
310 "using default minimum of %d\n", __FUNCTION__, pmtu,
311 SCTP_DEFAULT_MINSEGMENT);
312 pmtu = SCTP_DEFAULT_MINSEGMENT;
313 }
314 311
315 if (!sock_owned_by_user(sk) && t && (t->pmtu != pmtu)) { 312 if (t->param_flags & SPP_PMTUD_ENABLE) {
316 t->pmtu = pmtu; 313 if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
314 printk(KERN_WARNING "%s: Reported pmtu %d too low, "
315 "using default minimum of %d\n",
316 __FUNCTION__, pmtu,
317 SCTP_DEFAULT_MINSEGMENT);
318 /* Use default minimum segment size and disable
319 * pmtu discovery on this transport.
320 */
321 t->pathmtu = SCTP_DEFAULT_MINSEGMENT;
322 t->param_flags = (t->param_flags & ~SPP_HB) |
323 SPP_PMTUD_DISABLE;
324 } else {
325 t->pathmtu = pmtu;
326 }
327
328 /* Update association pmtu. */
317 sctp_assoc_sync_pmtu(asoc); 329 sctp_assoc_sync_pmtu(asoc);
318 sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD);
319 } 330 }
331
332 /* Retransmit with the new pmtu setting.
333 * Normally, if PMTU discovery is disabled, an ICMP Fragmentation
334 * Needed will never be sent, but if a message was sent before
335 * PMTU discovery was disabled that was larger than the PMTU, it
336 * would not be fragmented, so it must be re-transmitted fragmented.
337 */
338 sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD);
320} 339}
321 340
322/* 341/*
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index fa3be2b8fb5f..04c7fab4edc4 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -866,7 +866,7 @@ static int sctp_inet6_supported_addrs(const struct sctp_sock *opt,
866 return 2; 866 return 2;
867} 867}
868 868
869static struct proto_ops inet6_seqpacket_ops = { 869static const struct proto_ops inet6_seqpacket_ops = {
870 .family = PF_INET6, 870 .family = PF_INET6,
871 .owner = THIS_MODULE, 871 .owner = THIS_MODULE,
872 .release = inet6_release, 872 .release = inet6_release,
@@ -905,7 +905,7 @@ static struct inet_protosw sctpv6_stream_protosw = {
905 .flags = SCTP_PROTOSW_FLAG, 905 .flags = SCTP_PROTOSW_FLAG,
906}; 906};
907 907
908static int sctp6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) 908static int sctp6_rcv(struct sk_buff **pskb)
909{ 909{
910 return sctp_rcv(*pskb) ? -1 : 0; 910 return sctp_rcv(*pskb) ? -1 : 0;
911} 911}
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 931371633464..a40991ef72c9 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -234,8 +234,8 @@ sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet,
234 goto finish; 234 goto finish;
235 235
236 pmtu = ((packet->transport->asoc) ? 236 pmtu = ((packet->transport->asoc) ?
237 (packet->transport->asoc->pmtu) : 237 (packet->transport->asoc->pathmtu) :
238 (packet->transport->pmtu)); 238 (packet->transport->pathmtu));
239 239
240 too_big = (psize + chunk_len > pmtu); 240 too_big = (psize + chunk_len > pmtu);
241 241
@@ -482,7 +482,9 @@ int sctp_packet_transmit(struct sctp_packet *packet)
482 if (!dst || (dst->obsolete > 1)) { 482 if (!dst || (dst->obsolete > 1)) {
483 dst_release(dst); 483 dst_release(dst);
484 sctp_transport_route(tp, NULL, sctp_sk(sk)); 484 sctp_transport_route(tp, NULL, sctp_sk(sk));
485 sctp_assoc_sync_pmtu(asoc); 485 if (asoc->param_flags & SPP_PMTUD_ENABLE) {
486 sctp_assoc_sync_pmtu(asoc);
487 }
486 } 488 }
487 489
488 nskb->dst = dst_clone(tp->dst); 490 nskb->dst = dst_clone(tp->dst);
@@ -492,7 +494,10 @@ int sctp_packet_transmit(struct sctp_packet *packet)
492 SCTP_DEBUG_PRINTK("***sctp_transmit_packet*** skb len %d\n", 494 SCTP_DEBUG_PRINTK("***sctp_transmit_packet*** skb len %d\n",
493 nskb->len); 495 nskb->len);
494 496
495 (*tp->af_specific->sctp_xmit)(nskb, tp, packet->ipfragok); 497 if (tp->param_flags & SPP_PMTUD_ENABLE)
498 (*tp->af_specific->sctp_xmit)(nskb, tp, packet->ipfragok);
499 else
500 (*tp->af_specific->sctp_xmit)(nskb, tp, 1);
496 501
497out: 502out:
498 packet->size = packet->overhead; 503 packet->size = packet->overhead;
@@ -577,7 +582,7 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet,
577 * if ((flightsize + Max.Burst * MTU) < cwnd) 582 * if ((flightsize + Max.Burst * MTU) < cwnd)
578 * cwnd = flightsize + Max.Burst * MTU 583 * cwnd = flightsize + Max.Burst * MTU
579 */ 584 */
580 max_burst_bytes = asoc->max_burst * asoc->pmtu; 585 max_burst_bytes = asoc->max_burst * asoc->pathmtu;
581 if ((transport->flight_size + max_burst_bytes) < transport->cwnd) { 586 if ((transport->flight_size + max_burst_bytes) < transport->cwnd) {
582 transport->cwnd = transport->flight_size + max_burst_bytes; 587 transport->cwnd = transport->flight_size + max_burst_bytes;
583 SCTP_DEBUG_PRINTK("%s: cwnd limited by max_burst: " 588 SCTP_DEBUG_PRINTK("%s: cwnd limited by max_burst: "
@@ -622,7 +627,7 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet,
622 * data will fit or delay in hopes of bundling a full 627 * data will fit or delay in hopes of bundling a full
623 * sized packet. 628 * sized packet.
624 */ 629 */
625 if (len < asoc->pmtu - packet->overhead) { 630 if (len < asoc->pathmtu - packet->overhead) {
626 retval = SCTP_XMIT_NAGLE_DELAY; 631 retval = SCTP_XMIT_NAGLE_DELAY;
627 goto finish; 632 goto finish;
628 } 633 }
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index f775d78aa59d..de693b43c8ea 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -54,6 +54,7 @@
54#include <net/protocol.h> 54#include <net/protocol.h>
55#include <net/ip.h> 55#include <net/ip.h>
56#include <net/ipv6.h> 56#include <net/ipv6.h>
57#include <net/route.h>
57#include <net/sctp/sctp.h> 58#include <net/sctp/sctp.h>
58#include <net/addrconf.h> 59#include <net/addrconf.h>
59#include <net/inet_common.h> 60#include <net/inet_common.h>
@@ -829,7 +830,7 @@ static struct notifier_block sctp_inetaddr_notifier = {
829}; 830};
830 831
831/* Socket operations. */ 832/* Socket operations. */
832static struct proto_ops inet_seqpacket_ops = { 833static const struct proto_ops inet_seqpacket_ops = {
833 .family = PF_INET, 834 .family = PF_INET,
834 .owner = THIS_MODULE, 835 .owner = THIS_MODULE,
835 .release = inet_release, /* Needs to be wrapped... */ 836 .release = inet_release, /* Needs to be wrapped... */
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index f9573eba5c7a..556c495c6922 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1287,7 +1287,7 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
1287 - (bodysize % SCTP_COOKIE_MULTIPLE); 1287 - (bodysize % SCTP_COOKIE_MULTIPLE);
1288 *cookie_len = headersize + bodysize; 1288 *cookie_len = headersize + bodysize;
1289 1289
1290 retval = (sctp_cookie_param_t *)kmalloc(*cookie_len, GFP_ATOMIC); 1290 retval = kmalloc(*cookie_len, GFP_ATOMIC);
1291 1291
1292 if (!retval) { 1292 if (!retval) {
1293 *cookie_len = 0; 1293 *cookie_len = 0;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 823947170a33..b8b38aba92b3 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -157,9 +157,12 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force,
157{ 157{
158 __u32 ctsn, max_tsn_seen; 158 __u32 ctsn, max_tsn_seen;
159 struct sctp_chunk *sack; 159 struct sctp_chunk *sack;
160 struct sctp_transport *trans = asoc->peer.last_data_from;
160 int error = 0; 161 int error = 0;
161 162
162 if (force) 163 if (force ||
164 (!trans && (asoc->param_flags & SPP_SACKDELAY_DISABLE)) ||
165 (trans && (trans->param_flags & SPP_SACKDELAY_DISABLE)))
163 asoc->peer.sack_needed = 1; 166 asoc->peer.sack_needed = 1;
164 167
165 ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); 168 ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
@@ -189,7 +192,22 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force,
189 if (!asoc->peer.sack_needed) { 192 if (!asoc->peer.sack_needed) {
190 /* We will need a SACK for the next packet. */ 193 /* We will need a SACK for the next packet. */
191 asoc->peer.sack_needed = 1; 194 asoc->peer.sack_needed = 1;
192 goto out; 195
196 /* Set the SACK delay timeout based on the
197 * SACK delay for the last transport
198 * data was received from, or the default
199 * for the association.
200 */
201 if (trans)
202 asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] =
203 trans->sackdelay;
204 else
205 asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] =
206 asoc->sackdelay;
207
208 /* Restart the SACK timer. */
209 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
210 SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
193 } else { 211 } else {
194 if (asoc->a_rwnd > asoc->rwnd) 212 if (asoc->a_rwnd > asoc->rwnd)
195 asoc->a_rwnd = asoc->rwnd; 213 asoc->a_rwnd = asoc->rwnd;
@@ -205,7 +223,7 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force,
205 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, 223 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
206 SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); 224 SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
207 } 225 }
208out: 226
209 return error; 227 return error;
210nomem: 228nomem:
211 error = -ENOMEM; 229 error = -ENOMEM;
@@ -415,7 +433,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc,
415 asoc->overall_error_count++; 433 asoc->overall_error_count++;
416 434
417 if (transport->state != SCTP_INACTIVE && 435 if (transport->state != SCTP_INACTIVE &&
418 (transport->error_count++ >= transport->max_retrans)) { 436 (transport->error_count++ >= transport->pathmaxrxt)) {
419 SCTP_DEBUG_PRINTK_IPADDR("transport_strike:association %p", 437 SCTP_DEBUG_PRINTK_IPADDR("transport_strike:association %p",
420 " transport IP: port:%d failed.\n", 438 " transport IP: port:%d failed.\n",
421 asoc, 439 asoc,
@@ -1232,8 +1250,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1232 case SCTP_CMD_TIMER_START: 1250 case SCTP_CMD_TIMER_START:
1233 timer = &asoc->timers[cmd->obj.to]; 1251 timer = &asoc->timers[cmd->obj.to];
1234 timeout = asoc->timeouts[cmd->obj.to]; 1252 timeout = asoc->timeouts[cmd->obj.to];
1235 if (!timeout) 1253 BUG_ON(!timeout);
1236 BUG();
1237 1254
1238 timer->expires = jiffies + timeout; 1255 timer->expires = jiffies + timeout;
1239 sctp_association_hold(asoc); 1256 sctp_association_hold(asoc);
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 475bfb4972d9..557a7d90b92a 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -900,7 +900,7 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep,
900 * HEARTBEAT is sent (see Section 8.3). 900 * HEARTBEAT is sent (see Section 8.3).
901 */ 901 */
902 902
903 if (transport->hb_allowed) { 903 if (transport->param_flags & SPP_HB_ENABLE) {
904 if (SCTP_DISPOSITION_NOMEM == 904 if (SCTP_DISPOSITION_NOMEM ==
905 sctp_sf_heartbeat(ep, asoc, type, arg, 905 sctp_sf_heartbeat(ep, asoc, type, arg,
906 commands)) 906 commands))
@@ -1051,7 +1051,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
1051 return SCTP_DISPOSITION_DISCARD; 1051 return SCTP_DISPOSITION_DISCARD;
1052 } 1052 }
1053 1053
1054 max_interval = link->hb_interval + link->rto; 1054 max_interval = link->hbinterval + link->rto;
1055 1055
1056 /* Check if the timestamp looks valid. */ 1056 /* Check if the timestamp looks valid. */
1057 if (time_after(hbinfo->sent_at, jiffies) || 1057 if (time_after(hbinfo->sent_at, jiffies) ||
@@ -2691,14 +2691,9 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep,
2691 * document allow. However, an SCTP transmitter MUST NOT be 2691 * document allow. However, an SCTP transmitter MUST NOT be
2692 * more aggressive than the following algorithms allow. 2692 * more aggressive than the following algorithms allow.
2693 */ 2693 */
2694 if (chunk->end_of_packet) { 2694 if (chunk->end_of_packet)
2695 sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE()); 2695 sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE());
2696 2696
2697 /* Start the SACK timer. */
2698 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
2699 SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
2700 }
2701
2702 return SCTP_DISPOSITION_CONSUME; 2697 return SCTP_DISPOSITION_CONSUME;
2703 2698
2704discard_force: 2699discard_force:
@@ -2721,13 +2716,9 @@ discard_force:
2721 return SCTP_DISPOSITION_DISCARD; 2716 return SCTP_DISPOSITION_DISCARD;
2722 2717
2723discard_noforce: 2718discard_noforce:
2724 if (chunk->end_of_packet) { 2719 if (chunk->end_of_packet)
2725 sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE()); 2720 sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE());
2726 2721
2727 /* Start the SACK timer. */
2728 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
2729 SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
2730 }
2731 return SCTP_DISPOSITION_DISCARD; 2722 return SCTP_DISPOSITION_DISCARD;
2732consume: 2723consume:
2733 return SCTP_DISPOSITION_CONSUME; 2724 return SCTP_DISPOSITION_CONSUME;
@@ -3442,9 +3433,6 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(const struct sctp_endpoint *ep,
3442 * send another. 3433 * send another.
3443 */ 3434 */
3444 sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE()); 3435 sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE());
3445 /* Start the SACK timer. */
3446 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
3447 SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
3448 3436
3449 return SCTP_DISPOSITION_CONSUME; 3437 return SCTP_DISPOSITION_CONSUME;
3450 3438
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9df888e932c5..c98ee375ba5e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -63,6 +63,7 @@
63#include <linux/wait.h> 63#include <linux/wait.h>
64#include <linux/time.h> 64#include <linux/time.h>
65#include <linux/ip.h> 65#include <linux/ip.h>
66#include <linux/capability.h>
66#include <linux/fcntl.h> 67#include <linux/fcntl.h>
67#include <linux/poll.h> 68#include <linux/poll.h>
68#include <linux/init.h> 69#include <linux/init.h>
@@ -860,7 +861,7 @@ SCTP_STATIC int sctp_setsockopt_bindx(struct sock* sk,
860 return -EFAULT; 861 return -EFAULT;
861 862
862 /* Alloc space for the address array in kernel memory. */ 863 /* Alloc space for the address array in kernel memory. */
863 kaddrs = (struct sockaddr *)kmalloc(addrs_size, GFP_KERNEL); 864 kaddrs = kmalloc(addrs_size, GFP_KERNEL);
864 if (unlikely(!kaddrs)) 865 if (unlikely(!kaddrs))
865 return -ENOMEM; 866 return -ENOMEM;
866 867
@@ -1150,7 +1151,7 @@ SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk,
1150 return -EFAULT; 1151 return -EFAULT;
1151 1152
1152 /* Alloc space for the address array in kernel memory. */ 1153 /* Alloc space for the address array in kernel memory. */
1153 kaddrs = (struct sockaddr *)kmalloc(addrs_size, GFP_KERNEL); 1154 kaddrs = kmalloc(addrs_size, GFP_KERNEL);
1154 if (unlikely(!kaddrs)) 1155 if (unlikely(!kaddrs))
1155 return -ENOMEM; 1156 return -ENOMEM;
1156 1157
@@ -1941,107 +1942,379 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
1941 * address's parameters: 1942 * address's parameters:
1942 * 1943 *
1943 * struct sctp_paddrparams { 1944 * struct sctp_paddrparams {
1944 * sctp_assoc_t spp_assoc_id; 1945 * sctp_assoc_t spp_assoc_id;
1945 * struct sockaddr_storage spp_address; 1946 * struct sockaddr_storage spp_address;
1946 * uint32_t spp_hbinterval; 1947 * uint32_t spp_hbinterval;
1947 * uint16_t spp_pathmaxrxt; 1948 * uint16_t spp_pathmaxrxt;
1948 * }; 1949 * uint32_t spp_pathmtu;
1949 * 1950 * uint32_t spp_sackdelay;
1950 * spp_assoc_id - (UDP style socket) This is filled in the application, 1951 * uint32_t spp_flags;
1951 * and identifies the association for this query. 1952 * };
1953 *
1954 * spp_assoc_id - (one-to-many style socket) This is filled in the
1955 * application, and identifies the association for
1956 * this query.
1952 * spp_address - This specifies which address is of interest. 1957 * spp_address - This specifies which address is of interest.
1953 * spp_hbinterval - This contains the value of the heartbeat interval, 1958 * spp_hbinterval - This contains the value of the heartbeat interval,
1954 * in milliseconds. A value of 0, when modifying the 1959 * in milliseconds. If a value of zero
1955 * parameter, specifies that the heartbeat on this 1960 * is present in this field then no changes are to
1956 * address should be disabled. A value of UINT32_MAX 1961 * be made to this parameter.
1957 * (4294967295), when modifying the parameter,
1958 * specifies that a heartbeat should be sent
1959 * immediately to the peer address, and the current
1960 * interval should remain unchanged.
1961 * spp_pathmaxrxt - This contains the maximum number of 1962 * spp_pathmaxrxt - This contains the maximum number of
1962 * retransmissions before this address shall be 1963 * retransmissions before this address shall be
1963 * considered unreachable. 1964 * considered unreachable. If a value of zero
1965 * is present in this field then no changes are to
1966 * be made to this parameter.
1967 * spp_pathmtu - When Path MTU discovery is disabled the value
1968 * specified here will be the "fixed" path mtu.
1969 * Note that if the spp_address field is empty
1970 * then all associations on this address will
1971 * have this fixed path mtu set upon them.
1972 *
1973 * spp_sackdelay - When delayed sack is enabled, this value specifies
1974 * the number of milliseconds that sacks will be delayed
1975 * for. This value will apply to all addresses of an
1976 * association if the spp_address field is empty. Note
1977 * also, that if delayed sack is enabled and this
1978 * value is set to 0, no change is made to the last
1979 * recorded delayed sack timer value.
1980 *
1981 * spp_flags - These flags are used to control various features
1982 * on an association. The flag field may contain
1983 * zero or more of the following options.
1984 *
1985 * SPP_HB_ENABLE - Enable heartbeats on the
1986 * specified address. Note that if the address
1987 * field is empty all addresses for the association
1988 * have heartbeats enabled upon them.
1989 *
1990 * SPP_HB_DISABLE - Disable heartbeats on the
1991 * speicifed address. Note that if the address
1992 * field is empty all addresses for the association
1993 * will have their heartbeats disabled. Note also
1994 * that SPP_HB_ENABLE and SPP_HB_DISABLE are
1995 * mutually exclusive, only one of these two should
1996 * be specified. Enabling both fields will have
1997 * undetermined results.
1998 *
1999 * SPP_HB_DEMAND - Request a user initiated heartbeat
2000 * to be made immediately.
2001 *
2002 * SPP_PMTUD_ENABLE - This field will enable PMTU
2003 * discovery upon the specified address. Note that
2004 * if the address feild is empty then all addresses
2005 * on the association are effected.
2006 *
2007 * SPP_PMTUD_DISABLE - This field will disable PMTU
2008 * discovery upon the specified address. Note that
2009 * if the address feild is empty then all addresses
2010 * on the association are effected. Not also that
2011 * SPP_PMTUD_ENABLE and SPP_PMTUD_DISABLE are mutually
2012 * exclusive. Enabling both will have undetermined
2013 * results.
2014 *
2015 * SPP_SACKDELAY_ENABLE - Setting this flag turns
2016 * on delayed sack. The time specified in spp_sackdelay
2017 * is used to specify the sack delay for this address. Note
2018 * that if spp_address is empty then all addresses will
2019 * enable delayed sack and take on the sack delay
2020 * value specified in spp_sackdelay.
2021 * SPP_SACKDELAY_DISABLE - Setting this flag turns
2022 * off delayed sack. If the spp_address field is blank then
2023 * delayed sack is disabled for the entire association. Note
2024 * also that this field is mutually exclusive to
2025 * SPP_SACKDELAY_ENABLE, setting both will have undefined
2026 * results.
1964 */ 2027 */
2028int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
2029 struct sctp_transport *trans,
2030 struct sctp_association *asoc,
2031 struct sctp_sock *sp,
2032 int hb_change,
2033 int pmtud_change,
2034 int sackdelay_change)
2035{
2036 int error;
2037
2038 if (params->spp_flags & SPP_HB_DEMAND && trans) {
2039 error = sctp_primitive_REQUESTHEARTBEAT (trans->asoc, trans);
2040 if (error)
2041 return error;
2042 }
2043
2044 if (params->spp_hbinterval) {
2045 if (trans) {
2046 trans->hbinterval = msecs_to_jiffies(params->spp_hbinterval);
2047 } else if (asoc) {
2048 asoc->hbinterval = msecs_to_jiffies(params->spp_hbinterval);
2049 } else {
2050 sp->hbinterval = params->spp_hbinterval;
2051 }
2052 }
2053
2054 if (hb_change) {
2055 if (trans) {
2056 trans->param_flags =
2057 (trans->param_flags & ~SPP_HB) | hb_change;
2058 } else if (asoc) {
2059 asoc->param_flags =
2060 (asoc->param_flags & ~SPP_HB) | hb_change;
2061 } else {
2062 sp->param_flags =
2063 (sp->param_flags & ~SPP_HB) | hb_change;
2064 }
2065 }
2066
2067 if (params->spp_pathmtu) {
2068 if (trans) {
2069 trans->pathmtu = params->spp_pathmtu;
2070 sctp_assoc_sync_pmtu(asoc);
2071 } else if (asoc) {
2072 asoc->pathmtu = params->spp_pathmtu;
2073 sctp_frag_point(sp, params->spp_pathmtu);
2074 } else {
2075 sp->pathmtu = params->spp_pathmtu;
2076 }
2077 }
2078
2079 if (pmtud_change) {
2080 if (trans) {
2081 int update = (trans->param_flags & SPP_PMTUD_DISABLE) &&
2082 (params->spp_flags & SPP_PMTUD_ENABLE);
2083 trans->param_flags =
2084 (trans->param_flags & ~SPP_PMTUD) | pmtud_change;
2085 if (update) {
2086 sctp_transport_pmtu(trans);
2087 sctp_assoc_sync_pmtu(asoc);
2088 }
2089 } else if (asoc) {
2090 asoc->param_flags =
2091 (asoc->param_flags & ~SPP_PMTUD) | pmtud_change;
2092 } else {
2093 sp->param_flags =
2094 (sp->param_flags & ~SPP_PMTUD) | pmtud_change;
2095 }
2096 }
2097
2098 if (params->spp_sackdelay) {
2099 if (trans) {
2100 trans->sackdelay =
2101 msecs_to_jiffies(params->spp_sackdelay);
2102 } else if (asoc) {
2103 asoc->sackdelay =
2104 msecs_to_jiffies(params->spp_sackdelay);
2105 } else {
2106 sp->sackdelay = params->spp_sackdelay;
2107 }
2108 }
2109
2110 if (sackdelay_change) {
2111 if (trans) {
2112 trans->param_flags =
2113 (trans->param_flags & ~SPP_SACKDELAY) |
2114 sackdelay_change;
2115 } else if (asoc) {
2116 asoc->param_flags =
2117 (asoc->param_flags & ~SPP_SACKDELAY) |
2118 sackdelay_change;
2119 } else {
2120 sp->param_flags =
2121 (sp->param_flags & ~SPP_SACKDELAY) |
2122 sackdelay_change;
2123 }
2124 }
2125
2126 if (params->spp_pathmaxrxt) {
2127 if (trans) {
2128 trans->pathmaxrxt = params->spp_pathmaxrxt;
2129 } else if (asoc) {
2130 asoc->pathmaxrxt = params->spp_pathmaxrxt;
2131 } else {
2132 sp->pathmaxrxt = params->spp_pathmaxrxt;
2133 }
2134 }
2135
2136 return 0;
2137}
2138
1965static int sctp_setsockopt_peer_addr_params(struct sock *sk, 2139static int sctp_setsockopt_peer_addr_params(struct sock *sk,
1966 char __user *optval, int optlen) 2140 char __user *optval, int optlen)
1967{ 2141{
1968 struct sctp_paddrparams params; 2142 struct sctp_paddrparams params;
1969 struct sctp_transport *trans; 2143 struct sctp_transport *trans = NULL;
2144 struct sctp_association *asoc = NULL;
2145 struct sctp_sock *sp = sctp_sk(sk);
1970 int error; 2146 int error;
2147 int hb_change, pmtud_change, sackdelay_change;
1971 2148
1972 if (optlen != sizeof(struct sctp_paddrparams)) 2149 if (optlen != sizeof(struct sctp_paddrparams))
1973 return -EINVAL; 2150 return - EINVAL;
2151
1974 if (copy_from_user(&params, optval, optlen)) 2152 if (copy_from_user(&params, optval, optlen))
1975 return -EFAULT; 2153 return -EFAULT;
1976 2154
1977 /* 2155 /* Validate flags and value parameters. */
1978 * API 7. Socket Options (setting the default value for the endpoint) 2156 hb_change = params.spp_flags & SPP_HB;
1979 * All options that support specific settings on an association by 2157 pmtud_change = params.spp_flags & SPP_PMTUD;
1980 * filling in either an association id variable or a sockaddr_storage 2158 sackdelay_change = params.spp_flags & SPP_SACKDELAY;
1981 * SHOULD also support setting of the same value for the entire endpoint 2159
1982 * (i.e. future associations). To accomplish this the following logic is 2160 if (hb_change == SPP_HB ||
1983 * used when setting one of these options: 2161 pmtud_change == SPP_PMTUD ||
1984 2162 sackdelay_change == SPP_SACKDELAY ||
1985 * c) If neither the sockaddr_storage or association identification is 2163 params.spp_sackdelay > 500 ||
1986 * set i.e. the sockaddr_storage is set to all 0's (INADDR_ANY) and 2164 (params.spp_pathmtu
1987 * the association identification is 0, the settings are a default 2165 && params.spp_pathmtu < SCTP_DEFAULT_MINSEGMENT))
1988 * and to be applied to the endpoint (all future associations). 2166 return -EINVAL;
1989 */
1990 2167
1991 /* update default value for endpoint (all future associations) */ 2168 /* If an address other than INADDR_ANY is specified, and
1992 if (!params.spp_assoc_id && 2169 * no transport is found, then the request is invalid.
1993 sctp_is_any(( union sctp_addr *)&params.spp_address)) { 2170 */
1994 /* Manual heartbeat on an endpoint is invalid. */ 2171 if (!sctp_is_any(( union sctp_addr *)&params.spp_address)) {
1995 if (0xffffffff == params.spp_hbinterval) 2172 trans = sctp_addr_id2transport(sk, &params.spp_address,
2173 params.spp_assoc_id);
2174 if (!trans)
1996 return -EINVAL; 2175 return -EINVAL;
1997 else if (params.spp_hbinterval)
1998 sctp_sk(sk)->paddrparam.spp_hbinterval =
1999 params.spp_hbinterval;
2000 if (params.spp_pathmaxrxt)
2001 sctp_sk(sk)->paddrparam.spp_pathmaxrxt =
2002 params.spp_pathmaxrxt;
2003 return 0;
2004 } 2176 }
2005 2177
2006 trans = sctp_addr_id2transport(sk, &params.spp_address, 2178 /* Get association, if assoc_id != 0 and the socket is a one
2007 params.spp_assoc_id); 2179 * to many style socket, and an association was not found, then
2008 if (!trans) 2180 * the id was invalid.
2181 */
2182 asoc = sctp_id2assoc(sk, params.spp_assoc_id);
2183 if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP))
2009 return -EINVAL; 2184 return -EINVAL;
2010 2185
2011 /* Applications can enable or disable heartbeats for any peer address 2186 /* Heartbeat demand can only be sent on a transport or
2012 * of an association, modify an address's heartbeat interval, force a 2187 * association, but not a socket.
2013 * heartbeat to be sent immediately, and adjust the address's maximum
2014 * number of retransmissions sent before an address is considered
2015 * unreachable.
2016 *
2017 * The value of the heartbeat interval, in milliseconds. A value of
2018 * UINT32_MAX (4294967295), when modifying the parameter, specifies
2019 * that a heartbeat should be sent immediately to the peer address,
2020 * and the current interval should remain unchanged.
2021 */ 2188 */
2022 if (0xffffffff == params.spp_hbinterval) { 2189 if (params.spp_flags & SPP_HB_DEMAND && !trans && !asoc)
2023 error = sctp_primitive_REQUESTHEARTBEAT (trans->asoc, trans); 2190 return -EINVAL;
2024 if (error) 2191
2025 return error; 2192 /* Process parameters. */
2026 } else { 2193 error = sctp_apply_peer_addr_params(&params, trans, asoc, sp,
2027 /* The value of the heartbeat interval, in milliseconds. A value of 0, 2194 hb_change, pmtud_change,
2028 * when modifying the parameter, specifies that the heartbeat on this 2195 sackdelay_change);
2029 * address should be disabled. 2196
2197 if (error)
2198 return error;
2199
2200 /* If changes are for association, also apply parameters to each
2201 * transport.
2030 */ 2202 */
2031 if (params.spp_hbinterval) { 2203 if (!trans && asoc) {
2032 trans->hb_allowed = 1; 2204 struct list_head *pos;
2033 trans->hb_interval = 2205
2034 msecs_to_jiffies(params.spp_hbinterval); 2206 list_for_each(pos, &asoc->peer.transport_addr_list) {
2035 } else 2207 trans = list_entry(pos, struct sctp_transport,
2036 trans->hb_allowed = 0; 2208 transports);
2209 sctp_apply_peer_addr_params(&params, trans, asoc, sp,
2210 hb_change, pmtud_change,
2211 sackdelay_change);
2212 }
2037 } 2213 }
2038 2214
2039 /* spp_pathmaxrxt contains the maximum number of retransmissions 2215 return 0;
2040 * before this address shall be considered unreachable. 2216}
2041 */ 2217
2042 if (params.spp_pathmaxrxt) 2218/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
2043 trans->max_retrans = params.spp_pathmaxrxt; 2219 *
2220 * This options will get or set the delayed ack timer. The time is set
2221 * in milliseconds. If the assoc_id is 0, then this sets or gets the
2222 * endpoints default delayed ack timer value. If the assoc_id field is
2223 * non-zero, then the set or get effects the specified association.
2224 *
2225 * struct sctp_assoc_value {
2226 * sctp_assoc_t assoc_id;
2227 * uint32_t assoc_value;
2228 * };
2229 *
2230 * assoc_id - This parameter, indicates which association the
2231 * user is preforming an action upon. Note that if
2232 * this field's value is zero then the endpoints
2233 * default value is changed (effecting future
2234 * associations only).
2235 *
2236 * assoc_value - This parameter contains the number of milliseconds
2237 * that the user is requesting the delayed ACK timer
2238 * be set to. Note that this value is defined in
2239 * the standard to be between 200 and 500 milliseconds.
2240 *
2241 * Note: a value of zero will leave the value alone,
2242 * but disable SACK delay. A non-zero value will also
2243 * enable SACK delay.
2244 */
2044 2245
2246static int sctp_setsockopt_delayed_ack_time(struct sock *sk,
2247 char __user *optval, int optlen)
2248{
2249 struct sctp_assoc_value params;
2250 struct sctp_transport *trans = NULL;
2251 struct sctp_association *asoc = NULL;
2252 struct sctp_sock *sp = sctp_sk(sk);
2253
2254 if (optlen != sizeof(struct sctp_assoc_value))
2255 return - EINVAL;
2256
2257 if (copy_from_user(&params, optval, optlen))
2258 return -EFAULT;
2259
2260 /* Validate value parameter. */
2261 if (params.assoc_value > 500)
2262 return -EINVAL;
2263
2264 /* Get association, if assoc_id != 0 and the socket is a one
2265 * to many style socket, and an association was not found, then
2266 * the id was invalid.
2267 */
2268 asoc = sctp_id2assoc(sk, params.assoc_id);
2269 if (!asoc && params.assoc_id && sctp_style(sk, UDP))
2270 return -EINVAL;
2271
2272 if (params.assoc_value) {
2273 if (asoc) {
2274 asoc->sackdelay =
2275 msecs_to_jiffies(params.assoc_value);
2276 asoc->param_flags =
2277 (asoc->param_flags & ~SPP_SACKDELAY) |
2278 SPP_SACKDELAY_ENABLE;
2279 } else {
2280 sp->sackdelay = params.assoc_value;
2281 sp->param_flags =
2282 (sp->param_flags & ~SPP_SACKDELAY) |
2283 SPP_SACKDELAY_ENABLE;
2284 }
2285 } else {
2286 if (asoc) {
2287 asoc->param_flags =
2288 (asoc->param_flags & ~SPP_SACKDELAY) |
2289 SPP_SACKDELAY_DISABLE;
2290 } else {
2291 sp->param_flags =
2292 (sp->param_flags & ~SPP_SACKDELAY) |
2293 SPP_SACKDELAY_DISABLE;
2294 }
2295 }
2296
2297 /* If change is for association, also apply to each transport. */
2298 if (asoc) {
2299 struct list_head *pos;
2300
2301 list_for_each(pos, &asoc->peer.transport_addr_list) {
2302 trans = list_entry(pos, struct sctp_transport,
2303 transports);
2304 if (params.assoc_value) {
2305 trans->sackdelay =
2306 msecs_to_jiffies(params.assoc_value);
2307 trans->param_flags =
2308 (trans->param_flags & ~SPP_SACKDELAY) |
2309 SPP_SACKDELAY_ENABLE;
2310 } else {
2311 trans->param_flags =
2312 (trans->param_flags & ~SPP_SACKDELAY) |
2313 SPP_SACKDELAY_DISABLE;
2314 }
2315 }
2316 }
2317
2045 return 0; 2318 return 0;
2046} 2319}
2047 2320
@@ -2334,7 +2607,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, int optl
2334 /* Update the frag_point of the existing associations. */ 2607 /* Update the frag_point of the existing associations. */
2335 list_for_each(pos, &(sp->ep->asocs)) { 2608 list_for_each(pos, &(sp->ep->asocs)) {
2336 asoc = list_entry(pos, struct sctp_association, asocs); 2609 asoc = list_entry(pos, struct sctp_association, asocs);
2337 asoc->frag_point = sctp_frag_point(sp, asoc->pmtu); 2610 asoc->frag_point = sctp_frag_point(sp, asoc->pathmtu);
2338 } 2611 }
2339 2612
2340 return 0; 2613 return 0;
@@ -2491,6 +2764,10 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
2491 retval = sctp_setsockopt_peer_addr_params(sk, optval, optlen); 2764 retval = sctp_setsockopt_peer_addr_params(sk, optval, optlen);
2492 break; 2765 break;
2493 2766
2767 case SCTP_DELAYED_ACK_TIME:
2768 retval = sctp_setsockopt_delayed_ack_time(sk, optval, optlen);
2769 break;
2770
2494 case SCTP_INITMSG: 2771 case SCTP_INITMSG:
2495 retval = sctp_setsockopt_initmsg(sk, optval, optlen); 2772 retval = sctp_setsockopt_initmsg(sk, optval, optlen);
2496 break; 2773 break;
@@ -2715,8 +2992,13 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
2715 /* Default Peer Address Parameters. These defaults can 2992 /* Default Peer Address Parameters. These defaults can
2716 * be modified via SCTP_PEER_ADDR_PARAMS 2993 * be modified via SCTP_PEER_ADDR_PARAMS
2717 */ 2994 */
2718 sp->paddrparam.spp_hbinterval = jiffies_to_msecs(sctp_hb_interval); 2995 sp->hbinterval = jiffies_to_msecs(sctp_hb_interval);
2719 sp->paddrparam.spp_pathmaxrxt = sctp_max_retrans_path; 2996 sp->pathmaxrxt = sctp_max_retrans_path;
2997 sp->pathmtu = 0; // allow default discovery
2998 sp->sackdelay = sctp_sack_timeout;
2999 sp->param_flags = SPP_HB_ENABLE |
3000 SPP_PMTUD_ENABLE |
3001 SPP_SACKDELAY_ENABLE;
2720 3002
2721 /* If enabled no SCTP message fragmentation will be performed. 3003 /* If enabled no SCTP message fragmentation will be performed.
2722 * Configure through SCTP_DISABLE_FRAGMENTS socket option. 3004 * Configure through SCTP_DISABLE_FRAGMENTS socket option.
@@ -2865,7 +3147,7 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len,
2865 status.sstat_primary.spinfo_cwnd = transport->cwnd; 3147 status.sstat_primary.spinfo_cwnd = transport->cwnd;
2866 status.sstat_primary.spinfo_srtt = transport->srtt; 3148 status.sstat_primary.spinfo_srtt = transport->srtt;
2867 status.sstat_primary.spinfo_rto = jiffies_to_msecs(transport->rto); 3149 status.sstat_primary.spinfo_rto = jiffies_to_msecs(transport->rto);
2868 status.sstat_primary.spinfo_mtu = transport->pmtu; 3150 status.sstat_primary.spinfo_mtu = transport->pathmtu;
2869 3151
2870 if (status.sstat_primary.spinfo_state == SCTP_UNKNOWN) 3152 if (status.sstat_primary.spinfo_state == SCTP_UNKNOWN)
2871 status.sstat_primary.spinfo_state = SCTP_ACTIVE; 3153 status.sstat_primary.spinfo_state = SCTP_ACTIVE;
@@ -2924,7 +3206,7 @@ static int sctp_getsockopt_peer_addr_info(struct sock *sk, int len,
2924 pinfo.spinfo_cwnd = transport->cwnd; 3206 pinfo.spinfo_cwnd = transport->cwnd;
2925 pinfo.spinfo_srtt = transport->srtt; 3207 pinfo.spinfo_srtt = transport->srtt;
2926 pinfo.spinfo_rto = jiffies_to_msecs(transport->rto); 3208 pinfo.spinfo_rto = jiffies_to_msecs(transport->rto);
2927 pinfo.spinfo_mtu = transport->pmtu; 3209 pinfo.spinfo_mtu = transport->pathmtu;
2928 3210
2929 if (pinfo.spinfo_state == SCTP_UNKNOWN) 3211 if (pinfo.spinfo_state == SCTP_UNKNOWN)
2930 pinfo.spinfo_state = SCTP_ACTIVE; 3212 pinfo.spinfo_state = SCTP_ACTIVE;
@@ -3086,69 +3368,227 @@ out:
3086 * address's parameters: 3368 * address's parameters:
3087 * 3369 *
3088 * struct sctp_paddrparams { 3370 * struct sctp_paddrparams {
3089 * sctp_assoc_t spp_assoc_id; 3371 * sctp_assoc_t spp_assoc_id;
3090 * struct sockaddr_storage spp_address; 3372 * struct sockaddr_storage spp_address;
3091 * uint32_t spp_hbinterval; 3373 * uint32_t spp_hbinterval;
3092 * uint16_t spp_pathmaxrxt; 3374 * uint16_t spp_pathmaxrxt;
3093 * }; 3375 * uint32_t spp_pathmtu;
3094 * 3376 * uint32_t spp_sackdelay;
3095 * spp_assoc_id - (UDP style socket) This is filled in the application, 3377 * uint32_t spp_flags;
3096 * and identifies the association for this query. 3378 * };
3379 *
3380 * spp_assoc_id - (one-to-many style socket) This is filled in the
3381 * application, and identifies the association for
3382 * this query.
3097 * spp_address - This specifies which address is of interest. 3383 * spp_address - This specifies which address is of interest.
3098 * spp_hbinterval - This contains the value of the heartbeat interval, 3384 * spp_hbinterval - This contains the value of the heartbeat interval,
3099 * in milliseconds. A value of 0, when modifying the 3385 * in milliseconds. If a value of zero
3100 * parameter, specifies that the heartbeat on this 3386 * is present in this field then no changes are to
3101 * address should be disabled. A value of UINT32_MAX 3387 * be made to this parameter.
3102 * (4294967295), when modifying the parameter,
3103 * specifies that a heartbeat should be sent
3104 * immediately to the peer address, and the current
3105 * interval should remain unchanged.
3106 * spp_pathmaxrxt - This contains the maximum number of 3388 * spp_pathmaxrxt - This contains the maximum number of
3107 * retransmissions before this address shall be 3389 * retransmissions before this address shall be
3108 * considered unreachable. 3390 * considered unreachable. If a value of zero
3391 * is present in this field then no changes are to
3392 * be made to this parameter.
3393 * spp_pathmtu - When Path MTU discovery is disabled the value
3394 * specified here will be the "fixed" path mtu.
3395 * Note that if the spp_address field is empty
3396 * then all associations on this address will
3397 * have this fixed path mtu set upon them.
3398 *
3399 * spp_sackdelay - When delayed sack is enabled, this value specifies
3400 * the number of milliseconds that sacks will be delayed
3401 * for. This value will apply to all addresses of an
3402 * association if the spp_address field is empty. Note
3403 * also, that if delayed sack is enabled and this
3404 * value is set to 0, no change is made to the last
3405 * recorded delayed sack timer value.
3406 *
3407 * spp_flags - These flags are used to control various features
3408 * on an association. The flag field may contain
3409 * zero or more of the following options.
3410 *
3411 * SPP_HB_ENABLE - Enable heartbeats on the
3412 * specified address. Note that if the address
3413 * field is empty all addresses for the association
3414 * have heartbeats enabled upon them.
3415 *
3416 * SPP_HB_DISABLE - Disable heartbeats on the
3417 * speicifed address. Note that if the address
3418 * field is empty all addresses for the association
3419 * will have their heartbeats disabled. Note also
3420 * that SPP_HB_ENABLE and SPP_HB_DISABLE are
3421 * mutually exclusive, only one of these two should
3422 * be specified. Enabling both fields will have
3423 * undetermined results.
3424 *
3425 * SPP_HB_DEMAND - Request a user initiated heartbeat
3426 * to be made immediately.
3427 *
3428 * SPP_PMTUD_ENABLE - This field will enable PMTU
3429 * discovery upon the specified address. Note that
3430 * if the address feild is empty then all addresses
3431 * on the association are effected.
3432 *
3433 * SPP_PMTUD_DISABLE - This field will disable PMTU
3434 * discovery upon the specified address. Note that
3435 * if the address feild is empty then all addresses
3436 * on the association are effected. Not also that
3437 * SPP_PMTUD_ENABLE and SPP_PMTUD_DISABLE are mutually
3438 * exclusive. Enabling both will have undetermined
3439 * results.
3440 *
3441 * SPP_SACKDELAY_ENABLE - Setting this flag turns
3442 * on delayed sack. The time specified in spp_sackdelay
3443 * is used to specify the sack delay for this address. Note
3444 * that if spp_address is empty then all addresses will
3445 * enable delayed sack and take on the sack delay
3446 * value specified in spp_sackdelay.
3447 * SPP_SACKDELAY_DISABLE - Setting this flag turns
3448 * off delayed sack. If the spp_address field is blank then
3449 * delayed sack is disabled for the entire association. Note
3450 * also that this field is mutually exclusive to
3451 * SPP_SACKDELAY_ENABLE, setting both will have undefined
3452 * results.
3109 */ 3453 */
3110static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, 3454static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
3111 char __user *optval, int __user *optlen) 3455 char __user *optval, int __user *optlen)
3112{ 3456{
3113 struct sctp_paddrparams params; 3457 struct sctp_paddrparams params;
3114 struct sctp_transport *trans; 3458 struct sctp_transport *trans = NULL;
3459 struct sctp_association *asoc = NULL;
3460 struct sctp_sock *sp = sctp_sk(sk);
3115 3461
3116 if (len != sizeof(struct sctp_paddrparams)) 3462 if (len != sizeof(struct sctp_paddrparams))
3117 return -EINVAL; 3463 return -EINVAL;
3464
3118 if (copy_from_user(&params, optval, len)) 3465 if (copy_from_user(&params, optval, len))
3119 return -EFAULT; 3466 return -EFAULT;
3120 3467
3121 /* If no association id is specified retrieve the default value 3468 /* If an address other than INADDR_ANY is specified, and
3122 * for the endpoint that will be used for all future associations 3469 * no transport is found, then the request is invalid.
3123 */ 3470 */
3124 if (!params.spp_assoc_id && 3471 if (!sctp_is_any(( union sctp_addr *)&params.spp_address)) {
3125 sctp_is_any(( union sctp_addr *)&params.spp_address)) { 3472 trans = sctp_addr_id2transport(sk, &params.spp_address,
3126 params.spp_hbinterval = sctp_sk(sk)->paddrparam.spp_hbinterval; 3473 params.spp_assoc_id);
3127 params.spp_pathmaxrxt = sctp_sk(sk)->paddrparam.spp_pathmaxrxt; 3474 if (!trans) {
3128 3475 SCTP_DEBUG_PRINTK("Failed no transport\n");
3129 goto done; 3476 return -EINVAL;
3477 }
3130 } 3478 }
3131 3479
3132 trans = sctp_addr_id2transport(sk, &params.spp_address, 3480 /* Get association, if assoc_id != 0 and the socket is a one
3133 params.spp_assoc_id); 3481 * to many style socket, and an association was not found, then
3134 if (!trans) 3482 * the id was invalid.
3483 */
3484 asoc = sctp_id2assoc(sk, params.spp_assoc_id);
3485 if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP)) {
3486 SCTP_DEBUG_PRINTK("Failed no association\n");
3135 return -EINVAL; 3487 return -EINVAL;
3488 }
3136 3489
3137 /* The value of the heartbeat interval, in milliseconds. A value of 0, 3490 if (trans) {
3138 * when modifying the parameter, specifies that the heartbeat on this 3491 /* Fetch transport values. */
3139 * address should be disabled. 3492 params.spp_hbinterval = jiffies_to_msecs(trans->hbinterval);
3140 */ 3493 params.spp_pathmtu = trans->pathmtu;
3141 if (!trans->hb_allowed) 3494 params.spp_pathmaxrxt = trans->pathmaxrxt;
3142 params.spp_hbinterval = 0; 3495 params.spp_sackdelay = jiffies_to_msecs(trans->sackdelay);
3143 else 3496
3144 params.spp_hbinterval = jiffies_to_msecs(trans->hb_interval); 3497 /*draft-11 doesn't say what to return in spp_flags*/
3498 params.spp_flags = trans->param_flags;
3499 } else if (asoc) {
3500 /* Fetch association values. */
3501 params.spp_hbinterval = jiffies_to_msecs(asoc->hbinterval);
3502 params.spp_pathmtu = asoc->pathmtu;
3503 params.spp_pathmaxrxt = asoc->pathmaxrxt;
3504 params.spp_sackdelay = jiffies_to_msecs(asoc->sackdelay);
3505
3506 /*draft-11 doesn't say what to return in spp_flags*/
3507 params.spp_flags = asoc->param_flags;
3508 } else {
3509 /* Fetch socket values. */
3510 params.spp_hbinterval = sp->hbinterval;
3511 params.spp_pathmtu = sp->pathmtu;
3512 params.spp_sackdelay = sp->sackdelay;
3513 params.spp_pathmaxrxt = sp->pathmaxrxt;
3514
3515 /*draft-11 doesn't say what to return in spp_flags*/
3516 params.spp_flags = sp->param_flags;
3517 }
3145 3518
3146 /* spp_pathmaxrxt contains the maximum number of retransmissions 3519 if (copy_to_user(optval, &params, len))
3147 * before this address shall be considered unreachable. 3520 return -EFAULT;
3148 */ 3521
3149 params.spp_pathmaxrxt = trans->max_retrans; 3522 if (put_user(len, optlen))
3523 return -EFAULT;
3524
3525 return 0;
3526}
3527
3528/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
3529 *
3530 * This options will get or set the delayed ack timer. The time is set
3531 * in milliseconds. If the assoc_id is 0, then this sets or gets the
3532 * endpoints default delayed ack timer value. If the assoc_id field is
3533 * non-zero, then the set or get effects the specified association.
3534 *
3535 * struct sctp_assoc_value {
3536 * sctp_assoc_t assoc_id;
3537 * uint32_t assoc_value;
3538 * };
3539 *
3540 * assoc_id - This parameter, indicates which association the
3541 * user is preforming an action upon. Note that if
3542 * this field's value is zero then the endpoints
3543 * default value is changed (effecting future
3544 * associations only).
3545 *
3546 * assoc_value - This parameter contains the number of milliseconds
3547 * that the user is requesting the delayed ACK timer
3548 * be set to. Note that this value is defined in
3549 * the standard to be between 200 and 500 milliseconds.
3550 *
3551 * Note: a value of zero will leave the value alone,
3552 * but disable SACK delay. A non-zero value will also
3553 * enable SACK delay.
3554 */
3555static int sctp_getsockopt_delayed_ack_time(struct sock *sk, int len,
3556 char __user *optval,
3557 int __user *optlen)
3558{
3559 struct sctp_assoc_value params;
3560 struct sctp_association *asoc = NULL;
3561 struct sctp_sock *sp = sctp_sk(sk);
3562
3563 if (len != sizeof(struct sctp_assoc_value))
3564 return - EINVAL;
3565
3566 if (copy_from_user(&params, optval, len))
3567 return -EFAULT;
3568
3569 /* Get association, if assoc_id != 0 and the socket is a one
3570 * to many style socket, and an association was not found, then
3571 * the id was invalid.
3572 */
3573 asoc = sctp_id2assoc(sk, params.assoc_id);
3574 if (!asoc && params.assoc_id && sctp_style(sk, UDP))
3575 return -EINVAL;
3576
3577 if (asoc) {
3578 /* Fetch association values. */
3579 if (asoc->param_flags & SPP_SACKDELAY_ENABLE)
3580 params.assoc_value = jiffies_to_msecs(
3581 asoc->sackdelay);
3582 else
3583 params.assoc_value = 0;
3584 } else {
3585 /* Fetch socket values. */
3586 if (sp->param_flags & SPP_SACKDELAY_ENABLE)
3587 params.assoc_value = sp->sackdelay;
3588 else
3589 params.assoc_value = 0;
3590 }
3150 3591
3151done:
3152 if (copy_to_user(optval, &params, len)) 3592 if (copy_to_user(optval, &params, len))
3153 return -EFAULT; 3593 return -EFAULT;
3154 3594
@@ -4015,6 +4455,10 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
4015 retval = sctp_getsockopt_peer_addr_params(sk, len, optval, 4455 retval = sctp_getsockopt_peer_addr_params(sk, len, optval,
4016 optlen); 4456 optlen);
4017 break; 4457 break;
4458 case SCTP_DELAYED_ACK_TIME:
4459 retval = sctp_getsockopt_delayed_ack_time(sk, len, optval,
4460 optlen);
4461 break;
4018 case SCTP_INITMSG: 4462 case SCTP_INITMSG:
4019 retval = sctp_getsockopt_initmsg(sk, len, optval, optlen); 4463 retval = sctp_getsockopt_initmsg(sk, len, optval, optlen);
4020 break; 4464 break;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 268ddaf2dc0f..68d73e2dd155 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -86,10 +86,13 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
86 peer->init_sent_count = 0; 86 peer->init_sent_count = 0;
87 87
88 peer->state = SCTP_ACTIVE; 88 peer->state = SCTP_ACTIVE;
89 peer->hb_allowed = 0; 89 peer->param_flags = SPP_HB_DISABLE |
90 SPP_PMTUD_ENABLE |
91 SPP_SACKDELAY_ENABLE;
92 peer->hbinterval = 0;
90 93
91 /* Initialize the default path max_retrans. */ 94 /* Initialize the default path max_retrans. */
92 peer->max_retrans = sctp_max_retrans_path; 95 peer->pathmaxrxt = sctp_max_retrans_path;
93 peer->error_count = 0; 96 peer->error_count = 0;
94 97
95 INIT_LIST_HEAD(&peer->transmitted); 98 INIT_LIST_HEAD(&peer->transmitted);
@@ -229,10 +232,10 @@ void sctp_transport_pmtu(struct sctp_transport *transport)
229 dst = transport->af_specific->get_dst(NULL, &transport->ipaddr, NULL); 232 dst = transport->af_specific->get_dst(NULL, &transport->ipaddr, NULL);
230 233
231 if (dst) { 234 if (dst) {
232 transport->pmtu = dst_mtu(dst); 235 transport->pathmtu = dst_mtu(dst);
233 dst_release(dst); 236 dst_release(dst);
234 } else 237 } else
235 transport->pmtu = SCTP_DEFAULT_MAXSEGMENT; 238 transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
236} 239}
237 240
238/* Caches the dst entry and source address for a transport's destination 241/* Caches the dst entry and source address for a transport's destination
@@ -254,8 +257,11 @@ void sctp_transport_route(struct sctp_transport *transport,
254 af->get_saddr(asoc, dst, daddr, &transport->saddr); 257 af->get_saddr(asoc, dst, daddr, &transport->saddr);
255 258
256 transport->dst = dst; 259 transport->dst = dst;
260 if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) {
261 return;
262 }
257 if (dst) { 263 if (dst) {
258 transport->pmtu = dst_mtu(dst); 264 transport->pathmtu = dst_mtu(dst);
259 265
260 /* Initialize sk->sk_rcv_saddr, if the transport is the 266 /* Initialize sk->sk_rcv_saddr, if the transport is the
261 * association's active path for getsockname(). 267 * association's active path for getsockname().
@@ -264,7 +270,7 @@ void sctp_transport_route(struct sctp_transport *transport,
264 opt->pf->af->to_sk_saddr(&transport->saddr, 270 opt->pf->af->to_sk_saddr(&transport->saddr,
265 asoc->base.sk); 271 asoc->base.sk);
266 } else 272 } else
267 transport->pmtu = SCTP_DEFAULT_MAXSEGMENT; 273 transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
268} 274}
269 275
270/* Hold a reference to a transport. */ 276/* Hold a reference to a transport. */
@@ -369,7 +375,7 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
369 375
370 ssthresh = transport->ssthresh; 376 ssthresh = transport->ssthresh;
371 pba = transport->partial_bytes_acked; 377 pba = transport->partial_bytes_acked;
372 pmtu = transport->asoc->pmtu; 378 pmtu = transport->asoc->pathmtu;
373 379
374 if (cwnd <= ssthresh) { 380 if (cwnd <= ssthresh) {
375 /* RFC 2960 7.2.1, sctpimpguide-05 2.14.2 When cwnd is less 381 /* RFC 2960 7.2.1, sctpimpguide-05 2.14.2 When cwnd is less
@@ -441,8 +447,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
441 * partial_bytes_acked = 0 447 * partial_bytes_acked = 0
442 */ 448 */
443 transport->ssthresh = max(transport->cwnd/2, 449 transport->ssthresh = max(transport->cwnd/2,
444 4*transport->asoc->pmtu); 450 4*transport->asoc->pathmtu);
445 transport->cwnd = transport->asoc->pmtu; 451 transport->cwnd = transport->asoc->pathmtu;
446 break; 452 break;
447 453
448 case SCTP_LOWER_CWND_FAST_RTX: 454 case SCTP_LOWER_CWND_FAST_RTX:
@@ -459,7 +465,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
459 * partial_bytes_acked = 0 465 * partial_bytes_acked = 0
460 */ 466 */
461 transport->ssthresh = max(transport->cwnd/2, 467 transport->ssthresh = max(transport->cwnd/2,
462 4*transport->asoc->pmtu); 468 4*transport->asoc->pathmtu);
463 transport->cwnd = transport->ssthresh; 469 transport->cwnd = transport->ssthresh;
464 break; 470 break;
465 471
@@ -479,7 +485,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
479 if ((jiffies - transport->last_time_ecne_reduced) > 485 if ((jiffies - transport->last_time_ecne_reduced) >
480 transport->rtt) { 486 transport->rtt) {
481 transport->ssthresh = max(transport->cwnd/2, 487 transport->ssthresh = max(transport->cwnd/2,
482 4*transport->asoc->pmtu); 488 4*transport->asoc->pathmtu);
483 transport->cwnd = transport->ssthresh; 489 transport->cwnd = transport->ssthresh;
484 transport->last_time_ecne_reduced = jiffies; 490 transport->last_time_ecne_reduced = jiffies;
485 } 491 }
@@ -496,7 +502,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
496 */ 502 */
497 if ((jiffies - transport->last_time_used) > transport->rto) 503 if ((jiffies - transport->last_time_used) > transport->rto)
498 transport->cwnd = max(transport->cwnd/2, 504 transport->cwnd = max(transport->cwnd/2,
499 4*transport->asoc->pmtu); 505 4*transport->asoc->pathmtu);
500 break; 506 break;
501 }; 507 };
502 508
@@ -511,7 +517,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
511unsigned long sctp_transport_timeout(struct sctp_transport *t) 517unsigned long sctp_transport_timeout(struct sctp_transport *t)
512{ 518{
513 unsigned long timeout; 519 unsigned long timeout;
514 timeout = t->hb_interval + t->rto + sctp_jitter(t->rto); 520 timeout = t->hbinterval + t->rto + sctp_jitter(t->rto);
515 timeout += jiffies; 521 timeout += jiffies;
516 return timeout; 522 return timeout;
517} 523}
diff --git a/net/socket.c b/net/socket.c
index 3145103cdf54..b38a263853c3 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -640,154 +640,150 @@ static void sock_aio_dtor(struct kiocb *iocb)
640 kfree(iocb->private); 640 kfree(iocb->private);
641} 641}
642 642
643/* 643static ssize_t sock_sendpage(struct file *file, struct page *page,
644 * Read data from a socket. ubuf is a user mode pointer. We make sure the user 644 int offset, size_t size, loff_t *ppos, int more)
645 * area ubuf...ubuf+size-1 is writable before asking the protocol.
646 */
647
648static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
649 size_t size, loff_t pos)
650{ 645{
651 struct sock_iocb *x, siocb;
652 struct socket *sock; 646 struct socket *sock;
653 int flags; 647 int flags;
654 648
655 if (pos != 0) 649 sock = file->private_data;
656 return -ESPIPE;
657 if (size==0) /* Match SYS5 behaviour */
658 return 0;
659 650
660 if (is_sync_kiocb(iocb)) 651 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
661 x = &siocb; 652 if (more)
662 else { 653 flags |= MSG_MORE;
663 x = kmalloc(sizeof(struct sock_iocb), GFP_KERNEL); 654
664 if (!x) 655 return sock->ops->sendpage(sock, page, offset, size, flags);
665 return -ENOMEM; 656}
657
658static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
659 char __user *ubuf, size_t size, struct sock_iocb *siocb)
660{
661 if (!is_sync_kiocb(iocb)) {
662 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
663 if (!siocb)
664 return NULL;
666 iocb->ki_dtor = sock_aio_dtor; 665 iocb->ki_dtor = sock_aio_dtor;
667 } 666 }
668 iocb->private = x;
669 x->kiocb = iocb;
670 sock = iocb->ki_filp->private_data;
671 667
672 x->async_msg.msg_name = NULL; 668 siocb->kiocb = iocb;
673 x->async_msg.msg_namelen = 0; 669 siocb->async_iov.iov_base = ubuf;
674 x->async_msg.msg_iov = &x->async_iov; 670 siocb->async_iov.iov_len = size;
675 x->async_msg.msg_iovlen = 1;
676 x->async_msg.msg_control = NULL;
677 x->async_msg.msg_controllen = 0;
678 x->async_iov.iov_base = ubuf;
679 x->async_iov.iov_len = size;
680 flags = !(iocb->ki_filp->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
681 671
682 return __sock_recvmsg(iocb, sock, &x->async_msg, size, flags); 672 iocb->private = siocb;
673 return siocb;
683} 674}
684 675
676static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
677 struct file *file, struct iovec *iov, unsigned long nr_segs)
678{
679 struct socket *sock = file->private_data;
680 size_t size = 0;
681 int i;
685 682
686/* 683 for (i = 0 ; i < nr_segs ; i++)
687 * Write data to a socket. We verify that the user area ubuf..ubuf+size-1 684 size += iov[i].iov_len;
688 * is readable by the user process.
689 */
690 685
691static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf, 686 msg->msg_name = NULL;
692 size_t size, loff_t pos) 687 msg->msg_namelen = 0;
688 msg->msg_control = NULL;
689 msg->msg_controllen = 0;
690 msg->msg_iov = (struct iovec *) iov;
691 msg->msg_iovlen = nr_segs;
692 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
693
694 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
695}
696
697static ssize_t sock_readv(struct file *file, const struct iovec *iov,
698 unsigned long nr_segs, loff_t *ppos)
693{ 699{
694 struct sock_iocb *x, siocb; 700 struct kiocb iocb;
695 struct socket *sock; 701 struct sock_iocb siocb;
696 702 struct msghdr msg;
703 int ret;
704
705 init_sync_kiocb(&iocb, NULL);
706 iocb.private = &siocb;
707
708 ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
709 if (-EIOCBQUEUED == ret)
710 ret = wait_on_sync_kiocb(&iocb);
711 return ret;
712}
713
714static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
715 size_t count, loff_t pos)
716{
717 struct sock_iocb siocb, *x;
718
697 if (pos != 0) 719 if (pos != 0)
698 return -ESPIPE; 720 return -ESPIPE;
699 if(size==0) /* Match SYS5 behaviour */ 721 if (count == 0) /* Match SYS5 behaviour */
700 return 0; 722 return 0;
701 723
702 if (is_sync_kiocb(iocb)) 724 x = alloc_sock_iocb(iocb, ubuf, count, &siocb);
703 x = &siocb; 725 if (!x)
704 else { 726 return -ENOMEM;
705 x = kmalloc(sizeof(struct sock_iocb), GFP_KERNEL); 727 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp,
706 if (!x) 728 &x->async_iov, 1);
707 return -ENOMEM;
708 iocb->ki_dtor = sock_aio_dtor;
709 }
710 iocb->private = x;
711 x->kiocb = iocb;
712 sock = iocb->ki_filp->private_data;
713
714 x->async_msg.msg_name = NULL;
715 x->async_msg.msg_namelen = 0;
716 x->async_msg.msg_iov = &x->async_iov;
717 x->async_msg.msg_iovlen = 1;
718 x->async_msg.msg_control = NULL;
719 x->async_msg.msg_controllen = 0;
720 x->async_msg.msg_flags = !(iocb->ki_filp->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
721 if (sock->type == SOCK_SEQPACKET)
722 x->async_msg.msg_flags |= MSG_EOR;
723 x->async_iov.iov_base = (void __user *)ubuf;
724 x->async_iov.iov_len = size;
725
726 return __sock_sendmsg(iocb, sock, &x->async_msg, size);
727} 729}
728 730
729static ssize_t sock_sendpage(struct file *file, struct page *page, 731static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
730 int offset, size_t size, loff_t *ppos, int more) 732 struct file *file, struct iovec *iov, unsigned long nr_segs)
731{ 733{
732 struct socket *sock; 734 struct socket *sock = file->private_data;
733 int flags; 735 size_t size = 0;
736 int i;
734 737
735 sock = file->private_data; 738 for (i = 0 ; i < nr_segs ; i++)
739 size += iov[i].iov_len;
736 740
737 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; 741 msg->msg_name = NULL;
738 if (more) 742 msg->msg_namelen = 0;
739 flags |= MSG_MORE; 743 msg->msg_control = NULL;
744 msg->msg_controllen = 0;
745 msg->msg_iov = (struct iovec *) iov;
746 msg->msg_iovlen = nr_segs;
747 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
748 if (sock->type == SOCK_SEQPACKET)
749 msg->msg_flags |= MSG_EOR;
740 750
741 return sock->ops->sendpage(sock, page, offset, size, flags); 751 return __sock_sendmsg(iocb, sock, msg, size);
742} 752}
743 753
744static int sock_readv_writev(int type, 754static ssize_t sock_writev(struct file *file, const struct iovec *iov,
745 struct file * file, const struct iovec * iov, 755 unsigned long nr_segs, loff_t *ppos)
746 long count, size_t size)
747{ 756{
748 struct msghdr msg; 757 struct msghdr msg;
749 struct socket *sock; 758 struct kiocb iocb;
759 struct sock_iocb siocb;
760 int ret;
750 761
751 sock = file->private_data; 762 init_sync_kiocb(&iocb, NULL);
763 iocb.private = &siocb;
752 764
753 msg.msg_name = NULL; 765 ret = do_sock_write(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
754 msg.msg_namelen = 0; 766 if (-EIOCBQUEUED == ret)
755 msg.msg_control = NULL; 767 ret = wait_on_sync_kiocb(&iocb);
756 msg.msg_controllen = 0; 768 return ret;
757 msg.msg_iov = (struct iovec *) iov; 769}
758 msg.msg_iovlen = count;
759 msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
760 770
761 /* read() does a VERIFY_WRITE */ 771static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
762 if (type == VERIFY_WRITE) 772 size_t count, loff_t pos)
763 return sock_recvmsg(sock, &msg, size, msg.msg_flags); 773{
774 struct sock_iocb siocb, *x;
764 775
765 if (sock->type == SOCK_SEQPACKET) 776 if (pos != 0)
766 msg.msg_flags |= MSG_EOR; 777 return -ESPIPE;
778 if (count == 0) /* Match SYS5 behaviour */
779 return 0;
767 780
768 return sock_sendmsg(sock, &msg, size); 781 x = alloc_sock_iocb(iocb, (void __user *)ubuf, count, &siocb);
769} 782 if (!x)
783 return -ENOMEM;
770 784
771static ssize_t sock_readv(struct file *file, const struct iovec *vector, 785 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp,
772 unsigned long count, loff_t *ppos) 786 &x->async_iov, 1);
773{
774 size_t tot_len = 0;
775 int i;
776 for (i = 0 ; i < count ; i++)
777 tot_len += vector[i].iov_len;
778 return sock_readv_writev(VERIFY_WRITE,
779 file, vector, count, tot_len);
780}
781
782static ssize_t sock_writev(struct file *file, const struct iovec *vector,
783 unsigned long count, loff_t *ppos)
784{
785 size_t tot_len = 0;
786 int i;
787 for (i = 0 ; i < count ; i++)
788 tot_len += vector[i].iov_len;
789 return sock_readv_writev(VERIFY_READ,
790 file, vector, count, tot_len);
791} 787}
792 788
793 789
@@ -904,6 +900,13 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
904 break; 900 break;
905 default: 901 default:
906 err = sock->ops->ioctl(sock, cmd, arg); 902 err = sock->ops->ioctl(sock, cmd, arg);
903
904 /*
905 * If this ioctl is unknown try to hand it down
906 * to the NIC driver.
907 */
908 if (err == -ENOIOCTLCMD)
909 err = dev_ioctl(cmd, argp);
907 break; 910 break;
908 } 911 }
909 return err; 912 return err;
@@ -990,7 +993,7 @@ static int sock_fasync(int fd, struct file *filp, int on)
990 993
991 if (on) 994 if (on)
992 { 995 {
993 fna=(struct fasync_struct *)kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); 996 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
994 if(fna==NULL) 997 if(fna==NULL)
995 return -ENOMEM; 998 return -ENOMEM;
996 } 999 }
@@ -2036,7 +2039,7 @@ int sock_unregister(int family)
2036 return 0; 2039 return 0;
2037} 2040}
2038 2041
2039void __init sock_init(void) 2042static int __init sock_init(void)
2040{ 2043{
2041 /* 2044 /*
2042 * Initialize sock SLAB cache. 2045 * Initialize sock SLAB cache.
@@ -2044,12 +2047,10 @@ void __init sock_init(void)
2044 2047
2045 sk_init(); 2048 sk_init();
2046 2049
2047#ifdef SLAB_SKB
2048 /* 2050 /*
2049 * Initialize skbuff SLAB cache 2051 * Initialize skbuff SLAB cache
2050 */ 2052 */
2051 skb_init(); 2053 skb_init();
2052#endif
2053 2054
2054 /* 2055 /*
2055 * Initialize the protocols module. 2056 * Initialize the protocols module.
@@ -2058,15 +2059,19 @@ void __init sock_init(void)
2058 init_inodecache(); 2059 init_inodecache();
2059 register_filesystem(&sock_fs_type); 2060 register_filesystem(&sock_fs_type);
2060 sock_mnt = kern_mount(&sock_fs_type); 2061 sock_mnt = kern_mount(&sock_fs_type);
2061 /* The real protocol initialization is performed when 2062
2062 * do_initcalls is run. 2063 /* The real protocol initialization is performed in later initcalls.
2063 */ 2064 */
2064 2065
2065#ifdef CONFIG_NETFILTER 2066#ifdef CONFIG_NETFILTER
2066 netfilter_init(); 2067 netfilter_init();
2067#endif 2068#endif
2069
2070 return 0;
2068} 2071}
2069 2072
2073core_initcall(sock_init); /* early initcall */
2074
2070#ifdef CONFIG_PROC_FS 2075#ifdef CONFIG_PROC_FS
2071void socket_seq_show(struct seq_file *seq) 2076void socket_seq_show(struct seq_file *seq)
2072{ 2077{
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 8c7756036e95..9ac1b8c26c01 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -94,7 +94,7 @@ rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire)
94 struct rpc_cred_cache *new; 94 struct rpc_cred_cache *new;
95 int i; 95 int i;
96 96
97 new = (struct rpc_cred_cache *)kmalloc(sizeof(*new), GFP_KERNEL); 97 new = kmalloc(sizeof(*new), GFP_KERNEL);
98 if (!new) 98 if (!new)
99 return -ENOMEM; 99 return -ENOMEM;
100 for (i = 0; i < RPC_CREDCACHE_NR; i++) 100 for (i = 0; i < RPC_CREDCACHE_NR; i++)
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 5f1f806a0b11..129e2bd36aff 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -97,13 +97,17 @@ get_key(const void *p, const void *end, struct crypto_tfm **res)
97 alg_mode = CRYPTO_TFM_MODE_CBC; 97 alg_mode = CRYPTO_TFM_MODE_CBC;
98 break; 98 break;
99 default: 99 default:
100 dprintk("RPC: get_key: unsupported algorithm %d\n", alg); 100 printk("gss_kerberos_mech: unsupported algorithm %d\n", alg);
101 goto out_err_free_key; 101 goto out_err_free_key;
102 } 102 }
103 if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) 103 if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) {
104 printk("gss_kerberos_mech: unable to initialize crypto algorithm %s\n", alg_name);
104 goto out_err_free_key; 105 goto out_err_free_key;
105 if (crypto_cipher_setkey(*res, key.data, key.len)) 106 }
107 if (crypto_cipher_setkey(*res, key.data, key.len)) {
108 printk("gss_kerberos_mech: error setting key for crypto algorithm %s\n", alg_name);
106 goto out_err_free_tfm; 109 goto out_err_free_tfm;
110 }
107 111
108 kfree(key.data); 112 kfree(key.data);
109 return p; 113 return p;
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
index 39b3edc14694..58400807d4df 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -111,14 +111,18 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg)
111 setkey = 0; 111 setkey = 0;
112 break; 112 break;
113 default: 113 default:
114 dprintk("RPC: SPKM3 get_key: unsupported algorithm %d", *resalg); 114 dprintk("gss_spkm3_mech: unsupported algorithm %d\n", *resalg);
115 goto out_err_free_key; 115 goto out_err_free_key;
116 } 116 }
117 if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) 117 if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) {
118 printk("gss_spkm3_mech: unable to initialize crypto algorthm %s\n", alg_name);
118 goto out_err_free_key; 119 goto out_err_free_key;
120 }
119 if (setkey) { 121 if (setkey) {
120 if (crypto_cipher_setkey(*res, key.data, key.len)) 122 if (crypto_cipher_setkey(*res, key.data, key.len)) {
123 printk("gss_spkm3_mech: error setting key for crypto algorthm %s\n", alg_name);
121 goto out_err_free_tfm; 124 goto out_err_free_tfm;
125 }
122 } 126 }
123 127
124 if(key.len > 0) 128 if(key.len > 0)
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c
index d1e12b25d6e2..86fbf7c3e39c 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_seal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c
@@ -59,7 +59,7 @@ spkm3_make_token(struct spkm3_ctx *ctx,
59 char tokhdrbuf[25]; 59 char tokhdrbuf[25];
60 struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; 60 struct xdr_netobj md5cksum = {.len = 0, .data = NULL};
61 struct xdr_netobj mic_hdr = {.len = 0, .data = tokhdrbuf}; 61 struct xdr_netobj mic_hdr = {.len = 0, .data = tokhdrbuf};
62 int tmsglen, tokenlen = 0; 62 int tokenlen = 0;
63 unsigned char *ptr; 63 unsigned char *ptr;
64 s32 now; 64 s32 now;
65 int ctxelen = 0, ctxzbit = 0; 65 int ctxelen = 0, ctxzbit = 0;
@@ -92,24 +92,23 @@ spkm3_make_token(struct spkm3_ctx *ctx,
92 } 92 }
93 93
94 if (toktype == SPKM_MIC_TOK) { 94 if (toktype == SPKM_MIC_TOK) {
95 tmsglen = 0;
96 /* Calculate checksum over the mic-header */ 95 /* Calculate checksum over the mic-header */
97 asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit); 96 asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit);
98 spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data, 97 spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data,
99 ctxelen, ctxzbit); 98 ctxelen, ctxzbit);
100 99
101 if (make_checksum(checksum_type, mic_hdr.data, mic_hdr.len, 100 if (make_checksum(checksum_type, mic_hdr.data, mic_hdr.len,
102 text, &md5cksum)) 101 text, 0, &md5cksum))
103 goto out_err; 102 goto out_err;
104 103
105 asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit); 104 asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit);
106 tokenlen = 10 + ctxelen + 1 + 2 + md5elen + 1; 105 tokenlen = 10 + ctxelen + 1 + md5elen + 1;
107 106
108 /* Create token header using generic routines */ 107 /* Create token header using generic routines */
109 token->len = g_token_size(&ctx->mech_used, tokenlen + tmsglen); 108 token->len = g_token_size(&ctx->mech_used, tokenlen);
110 109
111 ptr = token->data; 110 ptr = token->data;
112 g_make_token_header(&ctx->mech_used, tokenlen + tmsglen, &ptr); 111 g_make_token_header(&ctx->mech_used, tokenlen, &ptr);
113 112
114 spkm3_make_mic_token(&ptr, tokenlen, &mic_hdr, &md5cksum, md5elen, md5zbit); 113 spkm3_make_mic_token(&ptr, tokenlen, &mic_hdr, &md5cksum, md5elen, md5zbit);
115 } else if (toktype == SPKM_WRAP_TOK) { /* Not Supported */ 114 } else if (toktype == SPKM_WRAP_TOK) { /* Not Supported */
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c
index 1f824578d773..af0d7ce74686 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_token.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_token.c
@@ -182,6 +182,7 @@ spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ct
182 * *tokp points to the beginning of the SPKM_MIC token described 182 * *tokp points to the beginning of the SPKM_MIC token described
183 * in rfc 2025, section 3.2.1: 183 * in rfc 2025, section 3.2.1:
184 * 184 *
185 * toklen is the inner token length
185 */ 186 */
186void 187void
187spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hdr, struct xdr_netobj *md5cksum, int md5elen, int md5zbit) 188spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hdr, struct xdr_netobj *md5cksum, int md5elen, int md5zbit)
@@ -189,7 +190,7 @@ spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hd
189 unsigned char *ict = *tokp; 190 unsigned char *ict = *tokp;
190 191
191 *(u8 *)ict++ = 0xa4; 192 *(u8 *)ict++ = 0xa4;
192 *(u8 *)ict++ = toklen - 2; 193 *(u8 *)ict++ = toklen;
193 memcpy(ict, mic_hdr->data, mic_hdr->len); 194 memcpy(ict, mic_hdr->data, mic_hdr->len);
194 ict += mic_hdr->len; 195 ict += mic_hdr->len;
195 196
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
index 241d5b30dfcb..96851b0ba1ba 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
@@ -95,7 +95,7 @@ spkm3_read_token(struct spkm3_ctx *ctx,
95 ret = GSS_S_DEFECTIVE_TOKEN; 95 ret = GSS_S_DEFECTIVE_TOKEN;
96 code = make_checksum(CKSUMTYPE_RSA_MD5, ptr + 2, 96 code = make_checksum(CKSUMTYPE_RSA_MD5, ptr + 2,
97 mic_hdrlen + 2, 97 mic_hdrlen + 2,
98 message_buffer, &md5cksum); 98 message_buffer, 0, &md5cksum);
99 99
100 if (code) 100 if (code)
101 goto out; 101 goto out;
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 890fb5ea0dcb..1b3ed4fd1987 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -70,7 +70,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
70 dprintk("RPC: allocating UNIX cred for uid %d gid %d\n", 70 dprintk("RPC: allocating UNIX cred for uid %d gid %d\n",
71 acred->uid, acred->gid); 71 acred->uid, acred->gid);
72 72
73 if (!(cred = (struct unx_cred *) kmalloc(sizeof(*cred), GFP_KERNEL))) 73 if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL)))
74 return ERR_PTR(-ENOMEM); 74 return ERR_PTR(-ENOMEM);
75 75
76 atomic_set(&cred->uc_count, 1); 76 atomic_set(&cred->uc_count, 1);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index f509e9992767..dcaa0c4453ff 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -575,12 +575,11 @@ cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
575 if (rp->q.list.next == &cd->queue) { 575 if (rp->q.list.next == &cd->queue) {
576 spin_unlock(&queue_lock); 576 spin_unlock(&queue_lock);
577 up(&queue_io_sem); 577 up(&queue_io_sem);
578 if (rp->offset) 578 BUG_ON(rp->offset);
579 BUG();
580 return 0; 579 return 0;
581 } 580 }
582 rq = container_of(rp->q.list.next, struct cache_request, q.list); 581 rq = container_of(rp->q.list.next, struct cache_request, q.list);
583 if (rq->q.reader) BUG(); 582 BUG_ON(rq->q.reader);
584 if (rp->offset == 0) 583 if (rp->offset == 0)
585 rq->readers++; 584 rq->readers++;
586 spin_unlock(&queue_lock); 585 spin_unlock(&queue_lock);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 61c3abeaccae..d2f0550c4ba0 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -118,7 +118,7 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
118 goto out_err; 118 goto out_err;
119 119
120 err = -ENOMEM; 120 err = -ENOMEM;
121 clnt = (struct rpc_clnt *) kmalloc(sizeof(*clnt), GFP_KERNEL); 121 clnt = kmalloc(sizeof(*clnt), GFP_KERNEL);
122 if (!clnt) 122 if (!clnt)
123 goto out_err; 123 goto out_err;
124 memset(clnt, 0, sizeof(*clnt)); 124 memset(clnt, 0, sizeof(*clnt));
@@ -225,7 +225,7 @@ rpc_clone_client(struct rpc_clnt *clnt)
225{ 225{
226 struct rpc_clnt *new; 226 struct rpc_clnt *new;
227 227
228 new = (struct rpc_clnt *)kmalloc(sizeof(*new), GFP_KERNEL); 228 new = kmalloc(sizeof(*new), GFP_KERNEL);
229 if (!new) 229 if (!new)
230 goto out_no_clnt; 230 goto out_no_clnt;
231 memcpy(new, clnt, sizeof(*new)); 231 memcpy(new, clnt, sizeof(*new));
@@ -268,7 +268,8 @@ rpc_shutdown_client(struct rpc_clnt *clnt)
268 clnt->cl_oneshot = 0; 268 clnt->cl_oneshot = 0;
269 clnt->cl_dead = 0; 269 clnt->cl_dead = 0;
270 rpc_killall_tasks(clnt); 270 rpc_killall_tasks(clnt);
271 sleep_on_timeout(&destroy_wait, 1*HZ); 271 wait_event_timeout(destroy_wait,
272 !atomic_read(&clnt->cl_users), 1*HZ);
272 } 273 }
273 274
274 if (atomic_read(&clnt->cl_users) < 0) { 275 if (atomic_read(&clnt->cl_users) < 0) {
@@ -374,19 +375,23 @@ out:
374 * Default callback for async RPC calls 375 * Default callback for async RPC calls
375 */ 376 */
376static void 377static void
377rpc_default_callback(struct rpc_task *task) 378rpc_default_callback(struct rpc_task *task, void *data)
378{ 379{
379} 380}
380 381
382static const struct rpc_call_ops rpc_default_ops = {
383 .rpc_call_done = rpc_default_callback,
384};
385
381/* 386/*
382 * Export the signal mask handling for synchronous code that 387 * Export the signal mask handling for synchronous code that
383 * sleeps on RPC calls 388 * sleeps on RPC calls
384 */ 389 */
385#define RPC_INTR_SIGNALS (sigmask(SIGINT) | sigmask(SIGQUIT) | sigmask(SIGKILL)) 390#define RPC_INTR_SIGNALS (sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT) | sigmask(SIGTERM))
386 391
387static void rpc_save_sigmask(sigset_t *oldset, int intr) 392static void rpc_save_sigmask(sigset_t *oldset, int intr)
388{ 393{
389 unsigned long sigallow = 0; 394 unsigned long sigallow = sigmask(SIGKILL);
390 sigset_t sigmask; 395 sigset_t sigmask;
391 396
392 /* Block all signals except those listed in sigallow */ 397 /* Block all signals except those listed in sigallow */
@@ -432,7 +437,7 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
432 BUG_ON(flags & RPC_TASK_ASYNC); 437 BUG_ON(flags & RPC_TASK_ASYNC);
433 438
434 status = -ENOMEM; 439 status = -ENOMEM;
435 task = rpc_new_task(clnt, NULL, flags); 440 task = rpc_new_task(clnt, flags, &rpc_default_ops, NULL);
436 if (task == NULL) 441 if (task == NULL)
437 goto out; 442 goto out;
438 443
@@ -442,14 +447,15 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
442 rpc_call_setup(task, msg, 0); 447 rpc_call_setup(task, msg, 0);
443 448
444 /* Set up the call info struct and execute the task */ 449 /* Set up the call info struct and execute the task */
445 if (task->tk_status == 0) { 450 status = task->tk_status;
451 if (status == 0) {
452 atomic_inc(&task->tk_count);
446 status = rpc_execute(task); 453 status = rpc_execute(task);
447 } else { 454 if (status == 0)
448 status = task->tk_status; 455 status = task->tk_status;
449 rpc_release_task(task);
450 } 456 }
451
452 rpc_restore_sigmask(&oldset); 457 rpc_restore_sigmask(&oldset);
458 rpc_release_task(task);
453out: 459out:
454 return status; 460 return status;
455} 461}
@@ -459,7 +465,7 @@ out:
459 */ 465 */
460int 466int
461rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, 467rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
462 rpc_action callback, void *data) 468 const struct rpc_call_ops *tk_ops, void *data)
463{ 469{
464 struct rpc_task *task; 470 struct rpc_task *task;
465 sigset_t oldset; 471 sigset_t oldset;
@@ -472,12 +478,9 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
472 flags |= RPC_TASK_ASYNC; 478 flags |= RPC_TASK_ASYNC;
473 479
474 /* Create/initialize a new RPC task */ 480 /* Create/initialize a new RPC task */
475 if (!callback)
476 callback = rpc_default_callback;
477 status = -ENOMEM; 481 status = -ENOMEM;
478 if (!(task = rpc_new_task(clnt, callback, flags))) 482 if (!(task = rpc_new_task(clnt, flags, tk_ops, data)))
479 goto out; 483 goto out;
480 task->tk_calldata = data;
481 484
482 /* Mask signals on GSS_AUTH upcalls */ 485 /* Mask signals on GSS_AUTH upcalls */
483 rpc_task_sigmask(task, &oldset); 486 rpc_task_sigmask(task, &oldset);
@@ -511,7 +514,7 @@ rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
511 if (task->tk_status == 0) 514 if (task->tk_status == 0)
512 task->tk_action = call_start; 515 task->tk_action = call_start;
513 else 516 else
514 task->tk_action = NULL; 517 task->tk_action = rpc_exit_task;
515} 518}
516 519
517void 520void
@@ -536,6 +539,18 @@ size_t rpc_max_payload(struct rpc_clnt *clnt)
536} 539}
537EXPORT_SYMBOL(rpc_max_payload); 540EXPORT_SYMBOL(rpc_max_payload);
538 541
542/**
543 * rpc_force_rebind - force transport to check that remote port is unchanged
544 * @clnt: client to rebind
545 *
546 */
547void rpc_force_rebind(struct rpc_clnt *clnt)
548{
549 if (clnt->cl_autobind)
550 clnt->cl_port = 0;
551}
552EXPORT_SYMBOL(rpc_force_rebind);
553
539/* 554/*
540 * Restart an (async) RPC call. Usually called from within the 555 * Restart an (async) RPC call. Usually called from within the
541 * exit handler. 556 * exit handler.
@@ -642,24 +657,26 @@ call_reserveresult(struct rpc_task *task)
642 657
643/* 658/*
644 * 2. Allocate the buffer. For details, see sched.c:rpc_malloc. 659 * 2. Allocate the buffer. For details, see sched.c:rpc_malloc.
645 * (Note: buffer memory is freed in rpc_task_release). 660 * (Note: buffer memory is freed in xprt_release).
646 */ 661 */
647static void 662static void
648call_allocate(struct rpc_task *task) 663call_allocate(struct rpc_task *task)
649{ 664{
665 struct rpc_rqst *req = task->tk_rqstp;
666 struct rpc_xprt *xprt = task->tk_xprt;
650 unsigned int bufsiz; 667 unsigned int bufsiz;
651 668
652 dprintk("RPC: %4d call_allocate (status %d)\n", 669 dprintk("RPC: %4d call_allocate (status %d)\n",
653 task->tk_pid, task->tk_status); 670 task->tk_pid, task->tk_status);
654 task->tk_action = call_bind; 671 task->tk_action = call_bind;
655 if (task->tk_buffer) 672 if (req->rq_buffer)
656 return; 673 return;
657 674
658 /* FIXME: compute buffer requirements more exactly using 675 /* FIXME: compute buffer requirements more exactly using
659 * auth->au_wslack */ 676 * auth->au_wslack */
660 bufsiz = task->tk_msg.rpc_proc->p_bufsiz + RPC_SLACK_SPACE; 677 bufsiz = task->tk_msg.rpc_proc->p_bufsiz + RPC_SLACK_SPACE;
661 678
662 if (rpc_malloc(task, bufsiz << 1) != NULL) 679 if (xprt->ops->buf_alloc(task, bufsiz << 1) != NULL)
663 return; 680 return;
664 printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task); 681 printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task);
665 682
@@ -702,14 +719,14 @@ call_encode(struct rpc_task *task)
702 task->tk_pid, task->tk_status); 719 task->tk_pid, task->tk_status);
703 720
704 /* Default buffer setup */ 721 /* Default buffer setup */
705 bufsiz = task->tk_bufsize >> 1; 722 bufsiz = req->rq_bufsize >> 1;
706 sndbuf->head[0].iov_base = (void *)task->tk_buffer; 723 sndbuf->head[0].iov_base = (void *)req->rq_buffer;
707 sndbuf->head[0].iov_len = bufsiz; 724 sndbuf->head[0].iov_len = bufsiz;
708 sndbuf->tail[0].iov_len = 0; 725 sndbuf->tail[0].iov_len = 0;
709 sndbuf->page_len = 0; 726 sndbuf->page_len = 0;
710 sndbuf->len = 0; 727 sndbuf->len = 0;
711 sndbuf->buflen = bufsiz; 728 sndbuf->buflen = bufsiz;
712 rcvbuf->head[0].iov_base = (void *)((char *)task->tk_buffer + bufsiz); 729 rcvbuf->head[0].iov_base = (void *)((char *)req->rq_buffer + bufsiz);
713 rcvbuf->head[0].iov_len = bufsiz; 730 rcvbuf->head[0].iov_len = bufsiz;
714 rcvbuf->tail[0].iov_len = 0; 731 rcvbuf->tail[0].iov_len = 0;
715 rcvbuf->page_len = 0; 732 rcvbuf->page_len = 0;
@@ -849,8 +866,7 @@ call_connect_status(struct rpc_task *task)
849 } 866 }
850 867
851 /* Something failed: remote service port may have changed */ 868 /* Something failed: remote service port may have changed */
852 if (clnt->cl_autobind) 869 rpc_force_rebind(clnt);
853 clnt->cl_port = 0;
854 870
855 switch (status) { 871 switch (status) {
856 case -ENOTCONN: 872 case -ENOTCONN:
@@ -892,7 +908,7 @@ call_transmit(struct rpc_task *task)
892 if (task->tk_status < 0) 908 if (task->tk_status < 0)
893 return; 909 return;
894 if (!task->tk_msg.rpc_proc->p_decode) { 910 if (!task->tk_msg.rpc_proc->p_decode) {
895 task->tk_action = NULL; 911 task->tk_action = rpc_exit_task;
896 rpc_wake_up_task(task); 912 rpc_wake_up_task(task);
897 } 913 }
898 return; 914 return;
@@ -931,8 +947,7 @@ call_status(struct rpc_task *task)
931 break; 947 break;
932 case -ECONNREFUSED: 948 case -ECONNREFUSED:
933 case -ENOTCONN: 949 case -ENOTCONN:
934 if (clnt->cl_autobind) 950 rpc_force_rebind(clnt);
935 clnt->cl_port = 0;
936 task->tk_action = call_bind; 951 task->tk_action = call_bind;
937 break; 952 break;
938 case -EAGAIN: 953 case -EAGAIN:
@@ -943,8 +958,7 @@ call_status(struct rpc_task *task)
943 rpc_exit(task, status); 958 rpc_exit(task, status);
944 break; 959 break;
945 default: 960 default:
946 if (clnt->cl_chatty) 961 printk("%s: RPC call returned error %d\n",
947 printk("%s: RPC call returned error %d\n",
948 clnt->cl_protname, -status); 962 clnt->cl_protname, -status);
949 rpc_exit(task, status); 963 rpc_exit(task, status);
950 break; 964 break;
@@ -979,20 +993,18 @@ call_timeout(struct rpc_task *task)
979 993
980 dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid); 994 dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid);
981 if (RPC_IS_SOFT(task)) { 995 if (RPC_IS_SOFT(task)) {
982 if (clnt->cl_chatty) 996 printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
983 printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
984 clnt->cl_protname, clnt->cl_server); 997 clnt->cl_protname, clnt->cl_server);
985 rpc_exit(task, -EIO); 998 rpc_exit(task, -EIO);
986 return; 999 return;
987 } 1000 }
988 1001
989 if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN)) { 1002 if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) {
990 task->tk_flags |= RPC_CALL_MAJORSEEN; 1003 task->tk_flags |= RPC_CALL_MAJORSEEN;
991 printk(KERN_NOTICE "%s: server %s not responding, still trying\n", 1004 printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
992 clnt->cl_protname, clnt->cl_server); 1005 clnt->cl_protname, clnt->cl_server);
993 } 1006 }
994 if (clnt->cl_autobind) 1007 rpc_force_rebind(clnt);
995 clnt->cl_port = 0;
996 1008
997retry: 1009retry:
998 clnt->cl_stats->rpcretrans++; 1010 clnt->cl_stats->rpcretrans++;
@@ -1014,7 +1026,7 @@ call_decode(struct rpc_task *task)
1014 dprintk("RPC: %4d call_decode (status %d)\n", 1026 dprintk("RPC: %4d call_decode (status %d)\n",
1015 task->tk_pid, task->tk_status); 1027 task->tk_pid, task->tk_status);
1016 1028
1017 if (clnt->cl_chatty && (task->tk_flags & RPC_CALL_MAJORSEEN)) { 1029 if (task->tk_flags & RPC_CALL_MAJORSEEN) {
1018 printk(KERN_NOTICE "%s: server %s OK\n", 1030 printk(KERN_NOTICE "%s: server %s OK\n",
1019 clnt->cl_protname, clnt->cl_server); 1031 clnt->cl_protname, clnt->cl_server);
1020 task->tk_flags &= ~RPC_CALL_MAJORSEEN; 1032 task->tk_flags &= ~RPC_CALL_MAJORSEEN;
@@ -1039,13 +1051,14 @@ call_decode(struct rpc_task *task)
1039 sizeof(req->rq_rcv_buf)) != 0); 1051 sizeof(req->rq_rcv_buf)) != 0);
1040 1052
1041 /* Verify the RPC header */ 1053 /* Verify the RPC header */
1042 if (!(p = call_verify(task))) { 1054 p = call_verify(task);
1043 if (task->tk_action == NULL) 1055 if (IS_ERR(p)) {
1044 return; 1056 if (p == ERR_PTR(-EAGAIN))
1045 goto out_retry; 1057 goto out_retry;
1058 return;
1046 } 1059 }
1047 1060
1048 task->tk_action = NULL; 1061 task->tk_action = rpc_exit_task;
1049 1062
1050 if (decode) 1063 if (decode)
1051 task->tk_status = rpcauth_unwrap_resp(task, decode, req, p, 1064 task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
@@ -1138,7 +1151,7 @@ call_verify(struct rpc_task *task)
1138 1151
1139 if ((n = ntohl(*p++)) != RPC_REPLY) { 1152 if ((n = ntohl(*p++)) != RPC_REPLY) {
1140 printk(KERN_WARNING "call_verify: not an RPC reply: %x\n", n); 1153 printk(KERN_WARNING "call_verify: not an RPC reply: %x\n", n);
1141 goto out_retry; 1154 goto out_garbage;
1142 } 1155 }
1143 if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) { 1156 if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) {
1144 if (--len < 0) 1157 if (--len < 0)
@@ -1168,7 +1181,7 @@ call_verify(struct rpc_task *task)
1168 task->tk_pid); 1181 task->tk_pid);
1169 rpcauth_invalcred(task); 1182 rpcauth_invalcred(task);
1170 task->tk_action = call_refresh; 1183 task->tk_action = call_refresh;
1171 return NULL; 1184 goto out_retry;
1172 case RPC_AUTH_BADCRED: 1185 case RPC_AUTH_BADCRED:
1173 case RPC_AUTH_BADVERF: 1186 case RPC_AUTH_BADVERF:
1174 /* possibly garbled cred/verf? */ 1187 /* possibly garbled cred/verf? */
@@ -1178,7 +1191,7 @@ call_verify(struct rpc_task *task)
1178 dprintk("RPC: %4d call_verify: retry garbled creds\n", 1191 dprintk("RPC: %4d call_verify: retry garbled creds\n",
1179 task->tk_pid); 1192 task->tk_pid);
1180 task->tk_action = call_bind; 1193 task->tk_action = call_bind;
1181 return NULL; 1194 goto out_retry;
1182 case RPC_AUTH_TOOWEAK: 1195 case RPC_AUTH_TOOWEAK:
1183 printk(KERN_NOTICE "call_verify: server requires stronger " 1196 printk(KERN_NOTICE "call_verify: server requires stronger "
1184 "authentication.\n"); 1197 "authentication.\n");
@@ -1193,7 +1206,7 @@ call_verify(struct rpc_task *task)
1193 } 1206 }
1194 if (!(p = rpcauth_checkverf(task, p))) { 1207 if (!(p = rpcauth_checkverf(task, p))) {
1195 printk(KERN_WARNING "call_verify: auth check failed\n"); 1208 printk(KERN_WARNING "call_verify: auth check failed\n");
1196 goto out_retry; /* bad verifier, retry */ 1209 goto out_garbage; /* bad verifier, retry */
1197 } 1210 }
1198 len = p - (u32 *)iov->iov_base - 1; 1211 len = p - (u32 *)iov->iov_base - 1;
1199 if (len < 0) 1212 if (len < 0)
@@ -1230,23 +1243,24 @@ call_verify(struct rpc_task *task)
1230 /* Also retry */ 1243 /* Also retry */
1231 } 1244 }
1232 1245
1233out_retry: 1246out_garbage:
1234 task->tk_client->cl_stats->rpcgarbage++; 1247 task->tk_client->cl_stats->rpcgarbage++;
1235 if (task->tk_garb_retry) { 1248 if (task->tk_garb_retry) {
1236 task->tk_garb_retry--; 1249 task->tk_garb_retry--;
1237 dprintk("RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid); 1250 dprintk("RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid);
1238 task->tk_action = call_bind; 1251 task->tk_action = call_bind;
1239 return NULL; 1252out_retry:
1253 return ERR_PTR(-EAGAIN);
1240 } 1254 }
1241 printk(KERN_WARNING "RPC %s: retry failed, exit EIO\n", __FUNCTION__); 1255 printk(KERN_WARNING "RPC %s: retry failed, exit EIO\n", __FUNCTION__);
1242out_eio: 1256out_eio:
1243 error = -EIO; 1257 error = -EIO;
1244out_err: 1258out_err:
1245 rpc_exit(task, error); 1259 rpc_exit(task, error);
1246 return NULL; 1260 return ERR_PTR(error);
1247out_overflow: 1261out_overflow:
1248 printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__); 1262 printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__);
1249 goto out_retry; 1263 goto out_garbage;
1250} 1264}
1251 1265
1252static int rpcproc_encode_null(void *rqstp, u32 *data, void *obj) 1266static int rpcproc_encode_null(void *rqstp, u32 *data, void *obj)
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index a398575f94b8..8139ce68e915 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -90,8 +90,7 @@ bailout:
90 map->pm_binding = 0; 90 map->pm_binding = 0;
91 rpc_wake_up(&map->pm_bindwait); 91 rpc_wake_up(&map->pm_bindwait);
92 spin_unlock(&pmap_lock); 92 spin_unlock(&pmap_lock);
93 task->tk_status = -EIO; 93 rpc_exit(task, -EIO);
94 task->tk_action = NULL;
95} 94}
96 95
97#ifdef CONFIG_ROOT_NFS 96#ifdef CONFIG_ROOT_NFS
@@ -132,21 +131,22 @@ static void
132pmap_getport_done(struct rpc_task *task) 131pmap_getport_done(struct rpc_task *task)
133{ 132{
134 struct rpc_clnt *clnt = task->tk_client; 133 struct rpc_clnt *clnt = task->tk_client;
134 struct rpc_xprt *xprt = task->tk_xprt;
135 struct rpc_portmap *map = clnt->cl_pmap; 135 struct rpc_portmap *map = clnt->cl_pmap;
136 136
137 dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n", 137 dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n",
138 task->tk_pid, task->tk_status, clnt->cl_port); 138 task->tk_pid, task->tk_status, clnt->cl_port);
139
140 xprt->ops->set_port(xprt, 0);
139 if (task->tk_status < 0) { 141 if (task->tk_status < 0) {
140 /* Make the calling task exit with an error */ 142 /* Make the calling task exit with an error */
141 task->tk_action = NULL; 143 task->tk_action = rpc_exit_task;
142 } else if (clnt->cl_port == 0) { 144 } else if (clnt->cl_port == 0) {
143 /* Program not registered */ 145 /* Program not registered */
144 task->tk_status = -EACCES; 146 rpc_exit(task, -EACCES);
145 task->tk_action = NULL;
146 } else { 147 } else {
147 /* byte-swap port number first */ 148 xprt->ops->set_port(xprt, clnt->cl_port);
148 clnt->cl_port = htons(clnt->cl_port); 149 clnt->cl_port = htons(clnt->cl_port);
149 clnt->cl_xprt->addr.sin_port = clnt->cl_port;
150 } 150 }
151 spin_lock(&pmap_lock); 151 spin_lock(&pmap_lock);
152 map->pm_binding = 0; 152 map->pm_binding = 0;
@@ -207,7 +207,7 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileg
207 xprt = xprt_create_proto(proto, srvaddr, NULL); 207 xprt = xprt_create_proto(proto, srvaddr, NULL);
208 if (IS_ERR(xprt)) 208 if (IS_ERR(xprt))
209 return (struct rpc_clnt *)xprt; 209 return (struct rpc_clnt *)xprt;
210 xprt->addr.sin_port = htons(RPC_PMAP_PORT); 210 xprt->ops->set_port(xprt, RPC_PMAP_PORT);
211 if (!privileged) 211 if (!privileged)
212 xprt->resvport = 0; 212 xprt->resvport = 0;
213 213
@@ -217,7 +217,6 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileg
217 RPC_AUTH_UNIX); 217 RPC_AUTH_UNIX);
218 if (!IS_ERR(clnt)) { 218 if (!IS_ERR(clnt)) {
219 clnt->cl_softrtry = 1; 219 clnt->cl_softrtry = 1;
220 clnt->cl_chatty = 1;
221 clnt->cl_oneshot = 1; 220 clnt->cl_oneshot = 1;
222 } 221 }
223 return clnt; 222 return clnt;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 16a2458f38f7..9764c80ab0b2 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -69,10 +69,13 @@ rpc_timeout_upcall_queue(void *data)
69 struct rpc_inode *rpci = (struct rpc_inode *)data; 69 struct rpc_inode *rpci = (struct rpc_inode *)data;
70 struct inode *inode = &rpci->vfs_inode; 70 struct inode *inode = &rpci->vfs_inode;
71 71
72 down(&inode->i_sem); 72 mutex_lock(&inode->i_mutex);
73 if (rpci->ops == NULL)
74 goto out;
73 if (rpci->nreaders == 0 && !list_empty(&rpci->pipe)) 75 if (rpci->nreaders == 0 && !list_empty(&rpci->pipe))
74 __rpc_purge_upcall(inode, -ETIMEDOUT); 76 __rpc_purge_upcall(inode, -ETIMEDOUT);
75 up(&inode->i_sem); 77out:
78 mutex_unlock(&inode->i_mutex);
76} 79}
77 80
78int 81int
@@ -81,7 +84,7 @@ rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg)
81 struct rpc_inode *rpci = RPC_I(inode); 84 struct rpc_inode *rpci = RPC_I(inode);
82 int res = -EPIPE; 85 int res = -EPIPE;
83 86
84 down(&inode->i_sem); 87 mutex_lock(&inode->i_mutex);
85 if (rpci->ops == NULL) 88 if (rpci->ops == NULL)
86 goto out; 89 goto out;
87 if (rpci->nreaders) { 90 if (rpci->nreaders) {
@@ -97,7 +100,7 @@ rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg)
97 res = 0; 100 res = 0;
98 } 101 }
99out: 102out:
100 up(&inode->i_sem); 103 mutex_unlock(&inode->i_mutex);
101 wake_up(&rpci->waitq); 104 wake_up(&rpci->waitq);
102 return res; 105 return res;
103} 106}
@@ -113,9 +116,7 @@ rpc_close_pipes(struct inode *inode)
113{ 116{
114 struct rpc_inode *rpci = RPC_I(inode); 117 struct rpc_inode *rpci = RPC_I(inode);
115 118
116 cancel_delayed_work(&rpci->queue_timeout); 119 mutex_lock(&inode->i_mutex);
117 flush_scheduled_work();
118 down(&inode->i_sem);
119 if (rpci->ops != NULL) { 120 if (rpci->ops != NULL) {
120 rpci->nreaders = 0; 121 rpci->nreaders = 0;
121 __rpc_purge_list(rpci, &rpci->in_upcall, -EPIPE); 122 __rpc_purge_list(rpci, &rpci->in_upcall, -EPIPE);
@@ -126,7 +127,9 @@ rpc_close_pipes(struct inode *inode)
126 rpci->ops = NULL; 127 rpci->ops = NULL;
127 } 128 }
128 rpc_inode_setowner(inode, NULL); 129 rpc_inode_setowner(inode, NULL);
129 up(&inode->i_sem); 130 mutex_unlock(&inode->i_mutex);
131 cancel_delayed_work(&rpci->queue_timeout);
132 flush_scheduled_work();
130} 133}
131 134
132static struct inode * 135static struct inode *
@@ -151,7 +154,7 @@ rpc_pipe_open(struct inode *inode, struct file *filp)
151 struct rpc_inode *rpci = RPC_I(inode); 154 struct rpc_inode *rpci = RPC_I(inode);
152 int res = -ENXIO; 155 int res = -ENXIO;
153 156
154 down(&inode->i_sem); 157 mutex_lock(&inode->i_mutex);
155 if (rpci->ops != NULL) { 158 if (rpci->ops != NULL) {
156 if (filp->f_mode & FMODE_READ) 159 if (filp->f_mode & FMODE_READ)
157 rpci->nreaders ++; 160 rpci->nreaders ++;
@@ -159,17 +162,17 @@ rpc_pipe_open(struct inode *inode, struct file *filp)
159 rpci->nwriters ++; 162 rpci->nwriters ++;
160 res = 0; 163 res = 0;
161 } 164 }
162 up(&inode->i_sem); 165 mutex_unlock(&inode->i_mutex);
163 return res; 166 return res;
164} 167}
165 168
166static int 169static int
167rpc_pipe_release(struct inode *inode, struct file *filp) 170rpc_pipe_release(struct inode *inode, struct file *filp)
168{ 171{
169 struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode); 172 struct rpc_inode *rpci = RPC_I(inode);
170 struct rpc_pipe_msg *msg; 173 struct rpc_pipe_msg *msg;
171 174
172 down(&inode->i_sem); 175 mutex_lock(&inode->i_mutex);
173 if (rpci->ops == NULL) 176 if (rpci->ops == NULL)
174 goto out; 177 goto out;
175 msg = (struct rpc_pipe_msg *)filp->private_data; 178 msg = (struct rpc_pipe_msg *)filp->private_data;
@@ -187,7 +190,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
187 if (rpci->ops->release_pipe) 190 if (rpci->ops->release_pipe)
188 rpci->ops->release_pipe(inode); 191 rpci->ops->release_pipe(inode);
189out: 192out:
190 up(&inode->i_sem); 193 mutex_unlock(&inode->i_mutex);
191 return 0; 194 return 0;
192} 195}
193 196
@@ -199,7 +202,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
199 struct rpc_pipe_msg *msg; 202 struct rpc_pipe_msg *msg;
200 int res = 0; 203 int res = 0;
201 204
202 down(&inode->i_sem); 205 mutex_lock(&inode->i_mutex);
203 if (rpci->ops == NULL) { 206 if (rpci->ops == NULL) {
204 res = -EPIPE; 207 res = -EPIPE;
205 goto out_unlock; 208 goto out_unlock;
@@ -226,7 +229,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
226 rpci->ops->destroy_msg(msg); 229 rpci->ops->destroy_msg(msg);
227 } 230 }
228out_unlock: 231out_unlock:
229 up(&inode->i_sem); 232 mutex_unlock(&inode->i_mutex);
230 return res; 233 return res;
231} 234}
232 235
@@ -237,11 +240,11 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of
237 struct rpc_inode *rpci = RPC_I(inode); 240 struct rpc_inode *rpci = RPC_I(inode);
238 int res; 241 int res;
239 242
240 down(&inode->i_sem); 243 mutex_lock(&inode->i_mutex);
241 res = -EPIPE; 244 res = -EPIPE;
242 if (rpci->ops != NULL) 245 if (rpci->ops != NULL)
243 res = rpci->ops->downcall(filp, buf, len); 246 res = rpci->ops->downcall(filp, buf, len);
244 up(&inode->i_sem); 247 mutex_unlock(&inode->i_mutex);
245 return res; 248 return res;
246} 249}
247 250
@@ -319,7 +322,7 @@ rpc_info_open(struct inode *inode, struct file *file)
319 322
320 if (!ret) { 323 if (!ret) {
321 struct seq_file *m = file->private_data; 324 struct seq_file *m = file->private_data;
322 down(&inode->i_sem); 325 mutex_lock(&inode->i_mutex);
323 clnt = RPC_I(inode)->private; 326 clnt = RPC_I(inode)->private;
324 if (clnt) { 327 if (clnt) {
325 atomic_inc(&clnt->cl_users); 328 atomic_inc(&clnt->cl_users);
@@ -328,7 +331,7 @@ rpc_info_open(struct inode *inode, struct file *file)
328 single_release(inode, file); 331 single_release(inode, file);
329 ret = -EINVAL; 332 ret = -EINVAL;
330 } 333 }
331 up(&inode->i_sem); 334 mutex_unlock(&inode->i_mutex);
332 } 335 }
333 return ret; 336 return ret;
334} 337}
@@ -488,11 +491,11 @@ rpc_depopulate(struct dentry *parent)
488 struct dentry *dentry, *dvec[10]; 491 struct dentry *dentry, *dvec[10];
489 int n = 0; 492 int n = 0;
490 493
491 down(&dir->i_sem); 494 mutex_lock(&dir->i_mutex);
492repeat: 495repeat:
493 spin_lock(&dcache_lock); 496 spin_lock(&dcache_lock);
494 list_for_each_safe(pos, next, &parent->d_subdirs) { 497 list_for_each_safe(pos, next, &parent->d_subdirs) {
495 dentry = list_entry(pos, struct dentry, d_child); 498 dentry = list_entry(pos, struct dentry, d_u.d_child);
496 spin_lock(&dentry->d_lock); 499 spin_lock(&dentry->d_lock);
497 if (!d_unhashed(dentry)) { 500 if (!d_unhashed(dentry)) {
498 dget_locked(dentry); 501 dget_locked(dentry);
@@ -516,7 +519,7 @@ repeat:
516 } while (n); 519 } while (n);
517 goto repeat; 520 goto repeat;
518 } 521 }
519 up(&dir->i_sem); 522 mutex_unlock(&dir->i_mutex);
520} 523}
521 524
522static int 525static int
@@ -529,7 +532,7 @@ rpc_populate(struct dentry *parent,
529 struct dentry *dentry; 532 struct dentry *dentry;
530 int mode, i; 533 int mode, i;
531 534
532 down(&dir->i_sem); 535 mutex_lock(&dir->i_mutex);
533 for (i = start; i < eof; i++) { 536 for (i = start; i < eof; i++) {
534 dentry = d_alloc_name(parent, files[i].name); 537 dentry = d_alloc_name(parent, files[i].name);
535 if (!dentry) 538 if (!dentry)
@@ -549,10 +552,10 @@ rpc_populate(struct dentry *parent,
549 dir->i_nlink++; 552 dir->i_nlink++;
550 d_add(dentry, inode); 553 d_add(dentry, inode);
551 } 554 }
552 up(&dir->i_sem); 555 mutex_unlock(&dir->i_mutex);
553 return 0; 556 return 0;
554out_bad: 557out_bad:
555 up(&dir->i_sem); 558 mutex_unlock(&dir->i_mutex);
556 printk(KERN_WARNING "%s: %s failed to populate directory %s\n", 559 printk(KERN_WARNING "%s: %s failed to populate directory %s\n",
557 __FILE__, __FUNCTION__, parent->d_name.name); 560 __FILE__, __FUNCTION__, parent->d_name.name);
558 return -ENOMEM; 561 return -ENOMEM;
@@ -606,7 +609,7 @@ rpc_lookup_negative(char *path, struct nameidata *nd)
606 if ((error = rpc_lookup_parent(path, nd)) != 0) 609 if ((error = rpc_lookup_parent(path, nd)) != 0)
607 return ERR_PTR(error); 610 return ERR_PTR(error);
608 dir = nd->dentry->d_inode; 611 dir = nd->dentry->d_inode;
609 down(&dir->i_sem); 612 mutex_lock(&dir->i_mutex);
610 dentry = lookup_hash(nd); 613 dentry = lookup_hash(nd);
611 if (IS_ERR(dentry)) 614 if (IS_ERR(dentry))
612 goto out_err; 615 goto out_err;
@@ -617,7 +620,7 @@ rpc_lookup_negative(char *path, struct nameidata *nd)
617 } 620 }
618 return dentry; 621 return dentry;
619out_err: 622out_err:
620 up(&dir->i_sem); 623 mutex_unlock(&dir->i_mutex);
621 rpc_release_path(nd); 624 rpc_release_path(nd);
622 return dentry; 625 return dentry;
623} 626}
@@ -643,7 +646,7 @@ rpc_mkdir(char *path, struct rpc_clnt *rpc_client)
643 if (error) 646 if (error)
644 goto err_depopulate; 647 goto err_depopulate;
645out: 648out:
646 up(&dir->i_sem); 649 mutex_unlock(&dir->i_mutex);
647 rpc_release_path(&nd); 650 rpc_release_path(&nd);
648 return dentry; 651 return dentry;
649err_depopulate: 652err_depopulate:
@@ -668,7 +671,7 @@ rpc_rmdir(char *path)
668 if ((error = rpc_lookup_parent(path, &nd)) != 0) 671 if ((error = rpc_lookup_parent(path, &nd)) != 0)
669 return error; 672 return error;
670 dir = nd.dentry->d_inode; 673 dir = nd.dentry->d_inode;
671 down(&dir->i_sem); 674 mutex_lock(&dir->i_mutex);
672 dentry = lookup_hash(&nd); 675 dentry = lookup_hash(&nd);
673 if (IS_ERR(dentry)) { 676 if (IS_ERR(dentry)) {
674 error = PTR_ERR(dentry); 677 error = PTR_ERR(dentry);
@@ -678,7 +681,7 @@ rpc_rmdir(char *path)
678 error = __rpc_rmdir(dir, dentry); 681 error = __rpc_rmdir(dir, dentry);
679 dput(dentry); 682 dput(dentry);
680out_release: 683out_release:
681 up(&dir->i_sem); 684 mutex_unlock(&dir->i_mutex);
682 rpc_release_path(&nd); 685 rpc_release_path(&nd);
683 return error; 686 return error;
684} 687}
@@ -707,7 +710,7 @@ rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags)
707 rpci->ops = ops; 710 rpci->ops = ops;
708 inode_dir_notify(dir, DN_CREATE); 711 inode_dir_notify(dir, DN_CREATE);
709out: 712out:
710 up(&dir->i_sem); 713 mutex_unlock(&dir->i_mutex);
711 rpc_release_path(&nd); 714 rpc_release_path(&nd);
712 return dentry; 715 return dentry;
713err_dput: 716err_dput:
@@ -729,7 +732,7 @@ rpc_unlink(char *path)
729 if ((error = rpc_lookup_parent(path, &nd)) != 0) 732 if ((error = rpc_lookup_parent(path, &nd)) != 0)
730 return error; 733 return error;
731 dir = nd.dentry->d_inode; 734 dir = nd.dentry->d_inode;
732 down(&dir->i_sem); 735 mutex_lock(&dir->i_mutex);
733 dentry = lookup_hash(&nd); 736 dentry = lookup_hash(&nd);
734 if (IS_ERR(dentry)) { 737 if (IS_ERR(dentry)) {
735 error = PTR_ERR(dentry); 738 error = PTR_ERR(dentry);
@@ -743,7 +746,7 @@ rpc_unlink(char *path)
743 dput(dentry); 746 dput(dentry);
744 inode_dir_notify(dir, DN_DELETE); 747 inode_dir_notify(dir, DN_DELETE);
745out_release: 748out_release:
746 up(&dir->i_sem); 749 mutex_unlock(&dir->i_mutex);
747 rpc_release_path(&nd); 750 rpc_release_path(&nd);
748 return error; 751 return error;
749} 752}
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 54e60a657500..7415406aa1ae 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -41,8 +41,6 @@ static mempool_t *rpc_buffer_mempool __read_mostly;
41 41
42static void __rpc_default_timer(struct rpc_task *task); 42static void __rpc_default_timer(struct rpc_task *task);
43static void rpciod_killall(void); 43static void rpciod_killall(void);
44static void rpc_free(struct rpc_task *task);
45
46static void rpc_async_schedule(void *); 44static void rpc_async_schedule(void *);
47 45
48/* 46/*
@@ -264,6 +262,35 @@ void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname)
264} 262}
265EXPORT_SYMBOL(rpc_init_wait_queue); 263EXPORT_SYMBOL(rpc_init_wait_queue);
266 264
265static int rpc_wait_bit_interruptible(void *word)
266{
267 if (signal_pending(current))
268 return -ERESTARTSYS;
269 schedule();
270 return 0;
271}
272
273/*
274 * Mark an RPC call as having completed by clearing the 'active' bit
275 */
276static inline void rpc_mark_complete_task(struct rpc_task *task)
277{
278 rpc_clear_active(task);
279 wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE);
280}
281
282/*
283 * Allow callers to wait for completion of an RPC call
284 */
285int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *))
286{
287 if (action == NULL)
288 action = rpc_wait_bit_interruptible;
289 return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
290 action, TASK_INTERRUPTIBLE);
291}
292EXPORT_SYMBOL(__rpc_wait_for_completion_task);
293
267/* 294/*
268 * Make an RPC task runnable. 295 * Make an RPC task runnable.
269 * 296 *
@@ -299,10 +326,7 @@ static void rpc_make_runnable(struct rpc_task *task)
299static inline void 326static inline void
300rpc_schedule_run(struct rpc_task *task) 327rpc_schedule_run(struct rpc_task *task)
301{ 328{
302 /* Don't run a child twice! */ 329 rpc_set_active(task);
303 if (RPC_IS_ACTIVATED(task))
304 return;
305 task->tk_active = 1;
306 rpc_make_runnable(task); 330 rpc_make_runnable(task);
307} 331}
308 332
@@ -324,8 +348,7 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
324 } 348 }
325 349
326 /* Mark the task as being activated if so needed */ 350 /* Mark the task as being activated if so needed */
327 if (!RPC_IS_ACTIVATED(task)) 351 rpc_set_active(task);
328 task->tk_active = 1;
329 352
330 __rpc_add_wait_queue(q, task); 353 __rpc_add_wait_queue(q, task);
331 354
@@ -555,36 +578,29 @@ __rpc_atrun(struct rpc_task *task)
555} 578}
556 579
557/* 580/*
558 * Helper that calls task->tk_exit if it exists and then returns 581 * Helper to call task->tk_ops->rpc_call_prepare
559 * true if we should exit __rpc_execute.
560 */ 582 */
561static inline int __rpc_do_exit(struct rpc_task *task) 583static void rpc_prepare_task(struct rpc_task *task)
562{ 584{
563 if (task->tk_exit != NULL) { 585 task->tk_ops->rpc_call_prepare(task, task->tk_calldata);
564 lock_kernel();
565 task->tk_exit(task);
566 unlock_kernel();
567 /* If tk_action is non-null, we should restart the call */
568 if (task->tk_action != NULL) {
569 if (!RPC_ASSASSINATED(task)) {
570 /* Release RPC slot and buffer memory */
571 xprt_release(task);
572 rpc_free(task);
573 return 0;
574 }
575 printk(KERN_ERR "RPC: dead task tried to walk away.\n");
576 }
577 }
578 return 1;
579} 586}
580 587
581static int rpc_wait_bit_interruptible(void *word) 588/*
589 * Helper that calls task->tk_ops->rpc_call_done if it exists
590 */
591void rpc_exit_task(struct rpc_task *task)
582{ 592{
583 if (signal_pending(current)) 593 task->tk_action = NULL;
584 return -ERESTARTSYS; 594 if (task->tk_ops->rpc_call_done != NULL) {
585 schedule(); 595 task->tk_ops->rpc_call_done(task, task->tk_calldata);
586 return 0; 596 if (task->tk_action != NULL) {
597 WARN_ON(RPC_ASSASSINATED(task));
598 /* Always release the RPC slot and buffer memory */
599 xprt_release(task);
600 }
601 }
587} 602}
603EXPORT_SYMBOL(rpc_exit_task);
588 604
589/* 605/*
590 * This is the RPC `scheduler' (or rather, the finite state machine). 606 * This is the RPC `scheduler' (or rather, the finite state machine).
@@ -631,12 +647,11 @@ static int __rpc_execute(struct rpc_task *task)
631 * by someone else. 647 * by someone else.
632 */ 648 */
633 if (!RPC_IS_QUEUED(task)) { 649 if (!RPC_IS_QUEUED(task)) {
634 if (task->tk_action != NULL) { 650 if (task->tk_action == NULL)
635 lock_kernel();
636 task->tk_action(task);
637 unlock_kernel();
638 } else if (__rpc_do_exit(task))
639 break; 651 break;
652 lock_kernel();
653 task->tk_action(task);
654 unlock_kernel();
640 } 655 }
641 656
642 /* 657 /*
@@ -676,9 +691,9 @@ static int __rpc_execute(struct rpc_task *task)
676 dprintk("RPC: %4d sync task resuming\n", task->tk_pid); 691 dprintk("RPC: %4d sync task resuming\n", task->tk_pid);
677 } 692 }
678 693
679 dprintk("RPC: %4d exit() = %d\n", task->tk_pid, task->tk_status); 694 dprintk("RPC: %4d, return %d, status %d\n", task->tk_pid, status, task->tk_status);
680 status = task->tk_status; 695 /* Wake up anyone who is waiting for task completion */
681 696 rpc_mark_complete_task(task);
682 /* Release all resources associated with the task */ 697 /* Release all resources associated with the task */
683 rpc_release_task(task); 698 rpc_release_task(task);
684 return status; 699 return status;
@@ -696,9 +711,7 @@ static int __rpc_execute(struct rpc_task *task)
696int 711int
697rpc_execute(struct rpc_task *task) 712rpc_execute(struct rpc_task *task)
698{ 713{
699 BUG_ON(task->tk_active); 714 rpc_set_active(task);
700
701 task->tk_active = 1;
702 rpc_set_running(task); 715 rpc_set_running(task);
703 return __rpc_execute(task); 716 return __rpc_execute(task);
704} 717}
@@ -708,17 +721,19 @@ static void rpc_async_schedule(void *arg)
708 __rpc_execute((struct rpc_task *)arg); 721 __rpc_execute((struct rpc_task *)arg);
709} 722}
710 723
711/* 724/**
712 * Allocate memory for RPC purposes. 725 * rpc_malloc - allocate an RPC buffer
726 * @task: RPC task that will use this buffer
727 * @size: requested byte size
713 * 728 *
714 * We try to ensure that some NFS reads and writes can always proceed 729 * We try to ensure that some NFS reads and writes can always proceed
715 * by using a mempool when allocating 'small' buffers. 730 * by using a mempool when allocating 'small' buffers.
716 * In order to avoid memory starvation triggering more writebacks of 731 * In order to avoid memory starvation triggering more writebacks of
717 * NFS requests, we use GFP_NOFS rather than GFP_KERNEL. 732 * NFS requests, we use GFP_NOFS rather than GFP_KERNEL.
718 */ 733 */
719void * 734void * rpc_malloc(struct rpc_task *task, size_t size)
720rpc_malloc(struct rpc_task *task, size_t size)
721{ 735{
736 struct rpc_rqst *req = task->tk_rqstp;
722 gfp_t gfp; 737 gfp_t gfp;
723 738
724 if (task->tk_flags & RPC_TASK_SWAPPER) 739 if (task->tk_flags & RPC_TASK_SWAPPER)
@@ -727,42 +742,52 @@ rpc_malloc(struct rpc_task *task, size_t size)
727 gfp = GFP_NOFS; 742 gfp = GFP_NOFS;
728 743
729 if (size > RPC_BUFFER_MAXSIZE) { 744 if (size > RPC_BUFFER_MAXSIZE) {
730 task->tk_buffer = kmalloc(size, gfp); 745 req->rq_buffer = kmalloc(size, gfp);
731 if (task->tk_buffer) 746 if (req->rq_buffer)
732 task->tk_bufsize = size; 747 req->rq_bufsize = size;
733 } else { 748 } else {
734 task->tk_buffer = mempool_alloc(rpc_buffer_mempool, gfp); 749 req->rq_buffer = mempool_alloc(rpc_buffer_mempool, gfp);
735 if (task->tk_buffer) 750 if (req->rq_buffer)
736 task->tk_bufsize = RPC_BUFFER_MAXSIZE; 751 req->rq_bufsize = RPC_BUFFER_MAXSIZE;
737 } 752 }
738 return task->tk_buffer; 753 return req->rq_buffer;
739} 754}
740 755
741static void 756/**
742rpc_free(struct rpc_task *task) 757 * rpc_free - free buffer allocated via rpc_malloc
758 * @task: RPC task with a buffer to be freed
759 *
760 */
761void rpc_free(struct rpc_task *task)
743{ 762{
744 if (task->tk_buffer) { 763 struct rpc_rqst *req = task->tk_rqstp;
745 if (task->tk_bufsize == RPC_BUFFER_MAXSIZE) 764
746 mempool_free(task->tk_buffer, rpc_buffer_mempool); 765 if (req->rq_buffer) {
766 if (req->rq_bufsize == RPC_BUFFER_MAXSIZE)
767 mempool_free(req->rq_buffer, rpc_buffer_mempool);
747 else 768 else
748 kfree(task->tk_buffer); 769 kfree(req->rq_buffer);
749 task->tk_buffer = NULL; 770 req->rq_buffer = NULL;
750 task->tk_bufsize = 0; 771 req->rq_bufsize = 0;
751 } 772 }
752} 773}
753 774
754/* 775/*
755 * Creation and deletion of RPC task structures 776 * Creation and deletion of RPC task structures
756 */ 777 */
757void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action callback, int flags) 778void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata)
758{ 779{
759 memset(task, 0, sizeof(*task)); 780 memset(task, 0, sizeof(*task));
760 init_timer(&task->tk_timer); 781 init_timer(&task->tk_timer);
761 task->tk_timer.data = (unsigned long) task; 782 task->tk_timer.data = (unsigned long) task;
762 task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer; 783 task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer;
784 atomic_set(&task->tk_count, 1);
763 task->tk_client = clnt; 785 task->tk_client = clnt;
764 task->tk_flags = flags; 786 task->tk_flags = flags;
765 task->tk_exit = callback; 787 task->tk_ops = tk_ops;
788 if (tk_ops->rpc_call_prepare != NULL)
789 task->tk_action = rpc_prepare_task;
790 task->tk_calldata = calldata;
766 791
767 /* Initialize retry counters */ 792 /* Initialize retry counters */
768 task->tk_garb_retry = 2; 793 task->tk_garb_retry = 2;
@@ -791,6 +816,8 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action call
791 list_add_tail(&task->tk_task, &all_tasks); 816 list_add_tail(&task->tk_task, &all_tasks);
792 spin_unlock(&rpc_sched_lock); 817 spin_unlock(&rpc_sched_lock);
793 818
819 BUG_ON(task->tk_ops == NULL);
820
794 dprintk("RPC: %4d new task procpid %d\n", task->tk_pid, 821 dprintk("RPC: %4d new task procpid %d\n", task->tk_pid,
795 current->pid); 822 current->pid);
796} 823}
@@ -801,8 +828,7 @@ rpc_alloc_task(void)
801 return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); 828 return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
802} 829}
803 830
804static void 831static void rpc_free_task(struct rpc_task *task)
805rpc_default_free_task(struct rpc_task *task)
806{ 832{
807 dprintk("RPC: %4d freeing task\n", task->tk_pid); 833 dprintk("RPC: %4d freeing task\n", task->tk_pid);
808 mempool_free(task, rpc_task_mempool); 834 mempool_free(task, rpc_task_mempool);
@@ -813,8 +839,7 @@ rpc_default_free_task(struct rpc_task *task)
813 * clean up after an allocation failure, as the client may 839 * clean up after an allocation failure, as the client may
814 * have specified "oneshot". 840 * have specified "oneshot".
815 */ 841 */
816struct rpc_task * 842struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata)
817rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags)
818{ 843{
819 struct rpc_task *task; 844 struct rpc_task *task;
820 845
@@ -822,10 +847,7 @@ rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags)
822 if (!task) 847 if (!task)
823 goto cleanup; 848 goto cleanup;
824 849
825 rpc_init_task(task, clnt, callback, flags); 850 rpc_init_task(task, clnt, flags, tk_ops, calldata);
826
827 /* Replace tk_release */
828 task->tk_release = rpc_default_free_task;
829 851
830 dprintk("RPC: %4d allocated task\n", task->tk_pid); 852 dprintk("RPC: %4d allocated task\n", task->tk_pid);
831 task->tk_flags |= RPC_TASK_DYNAMIC; 853 task->tk_flags |= RPC_TASK_DYNAMIC;
@@ -845,11 +867,15 @@ cleanup:
845 867
846void rpc_release_task(struct rpc_task *task) 868void rpc_release_task(struct rpc_task *task)
847{ 869{
848 dprintk("RPC: %4d release task\n", task->tk_pid); 870 const struct rpc_call_ops *tk_ops = task->tk_ops;
871 void *calldata = task->tk_calldata;
849 872
850#ifdef RPC_DEBUG 873#ifdef RPC_DEBUG
851 BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID); 874 BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
852#endif 875#endif
876 if (!atomic_dec_and_test(&task->tk_count))
877 return;
878 dprintk("RPC: %4d release task\n", task->tk_pid);
853 879
854 /* Remove from global task list */ 880 /* Remove from global task list */
855 spin_lock(&rpc_sched_lock); 881 spin_lock(&rpc_sched_lock);
@@ -857,7 +883,6 @@ void rpc_release_task(struct rpc_task *task)
857 spin_unlock(&rpc_sched_lock); 883 spin_unlock(&rpc_sched_lock);
858 884
859 BUG_ON (RPC_IS_QUEUED(task)); 885 BUG_ON (RPC_IS_QUEUED(task));
860 task->tk_active = 0;
861 886
862 /* Synchronously delete any running timer */ 887 /* Synchronously delete any running timer */
863 rpc_delete_timer(task); 888 rpc_delete_timer(task);
@@ -867,7 +892,6 @@ void rpc_release_task(struct rpc_task *task)
867 xprt_release(task); 892 xprt_release(task);
868 if (task->tk_msg.rpc_cred) 893 if (task->tk_msg.rpc_cred)
869 rpcauth_unbindcred(task); 894 rpcauth_unbindcred(task);
870 rpc_free(task);
871 if (task->tk_client) { 895 if (task->tk_client) {
872 rpc_release_client(task->tk_client); 896 rpc_release_client(task->tk_client);
873 task->tk_client = NULL; 897 task->tk_client = NULL;
@@ -876,11 +900,34 @@ void rpc_release_task(struct rpc_task *task)
876#ifdef RPC_DEBUG 900#ifdef RPC_DEBUG
877 task->tk_magic = 0; 901 task->tk_magic = 0;
878#endif 902#endif
879 if (task->tk_release) 903 if (task->tk_flags & RPC_TASK_DYNAMIC)
880 task->tk_release(task); 904 rpc_free_task(task);
905 if (tk_ops->rpc_release)
906 tk_ops->rpc_release(calldata);
881} 907}
882 908
883/** 909/**
910 * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
911 * @clnt - pointer to RPC client
912 * @flags - RPC flags
913 * @ops - RPC call ops
914 * @data - user call data
915 */
916struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
917 const struct rpc_call_ops *ops,
918 void *data)
919{
920 struct rpc_task *task;
921 task = rpc_new_task(clnt, flags, ops, data);
922 if (task == NULL)
923 return ERR_PTR(-ENOMEM);
924 atomic_inc(&task->tk_count);
925 rpc_execute(task);
926 return task;
927}
928EXPORT_SYMBOL(rpc_run_task);
929
930/**
884 * rpc_find_parent - find the parent of a child task. 931 * rpc_find_parent - find the parent of a child task.
885 * @child: child task 932 * @child: child task
886 * 933 *
@@ -890,12 +937,11 @@ void rpc_release_task(struct rpc_task *task)
890 * 937 *
891 * Caller must hold childq.lock 938 * Caller must hold childq.lock
892 */ 939 */
893static inline struct rpc_task *rpc_find_parent(struct rpc_task *child) 940static inline struct rpc_task *rpc_find_parent(struct rpc_task *child, struct rpc_task *parent)
894{ 941{
895 struct rpc_task *task, *parent; 942 struct rpc_task *task;
896 struct list_head *le; 943 struct list_head *le;
897 944
898 parent = (struct rpc_task *) child->tk_calldata;
899 task_for_each(task, le, &childq.tasks[0]) 945 task_for_each(task, le, &childq.tasks[0])
900 if (task == parent) 946 if (task == parent)
901 return parent; 947 return parent;
@@ -903,18 +949,22 @@ static inline struct rpc_task *rpc_find_parent(struct rpc_task *child)
903 return NULL; 949 return NULL;
904} 950}
905 951
906static void rpc_child_exit(struct rpc_task *child) 952static void rpc_child_exit(struct rpc_task *child, void *calldata)
907{ 953{
908 struct rpc_task *parent; 954 struct rpc_task *parent;
909 955
910 spin_lock_bh(&childq.lock); 956 spin_lock_bh(&childq.lock);
911 if ((parent = rpc_find_parent(child)) != NULL) { 957 if ((parent = rpc_find_parent(child, calldata)) != NULL) {
912 parent->tk_status = child->tk_status; 958 parent->tk_status = child->tk_status;
913 __rpc_wake_up_task(parent); 959 __rpc_wake_up_task(parent);
914 } 960 }
915 spin_unlock_bh(&childq.lock); 961 spin_unlock_bh(&childq.lock);
916} 962}
917 963
964static const struct rpc_call_ops rpc_child_ops = {
965 .rpc_call_done = rpc_child_exit,
966};
967
918/* 968/*
919 * Note: rpc_new_task releases the client after a failure. 969 * Note: rpc_new_task releases the client after a failure.
920 */ 970 */
@@ -923,11 +973,9 @@ rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent)
923{ 973{
924 struct rpc_task *task; 974 struct rpc_task *task;
925 975
926 task = rpc_new_task(clnt, NULL, RPC_TASK_ASYNC | RPC_TASK_CHILD); 976 task = rpc_new_task(clnt, RPC_TASK_ASYNC | RPC_TASK_CHILD, &rpc_child_ops, parent);
927 if (!task) 977 if (!task)
928 goto fail; 978 goto fail;
929 task->tk_exit = rpc_child_exit;
930 task->tk_calldata = parent;
931 return task; 979 return task;
932 980
933fail: 981fail:
@@ -1063,7 +1111,7 @@ void rpc_show_tasks(void)
1063 return; 1111 return;
1064 } 1112 }
1065 printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " 1113 printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
1066 "-rpcwait -action- --exit--\n"); 1114 "-rpcwait -action- ---ops--\n");
1067 alltask_for_each(t, le, &all_tasks) { 1115 alltask_for_each(t, le, &all_tasks) {
1068 const char *rpc_waitq = "none"; 1116 const char *rpc_waitq = "none";
1069 1117
@@ -1078,7 +1126,7 @@ void rpc_show_tasks(void)
1078 (t->tk_client ? t->tk_client->cl_prog : 0), 1126 (t->tk_client ? t->tk_client->cl_prog : 0),
1079 t->tk_rqstp, t->tk_timeout, 1127 t->tk_rqstp, t->tk_timeout,
1080 rpc_waitq, 1128 rpc_waitq,
1081 t->tk_action, t->tk_exit); 1129 t->tk_action, t->tk_ops);
1082 } 1130 }
1083 spin_unlock(&rpc_sched_lock); 1131 spin_unlock(&rpc_sched_lock);
1084} 1132}
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index a03d4b600c92..9f7373203592 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -30,8 +30,6 @@ EXPORT_SYMBOL(rpc_init_task);
30EXPORT_SYMBOL(rpc_sleep_on); 30EXPORT_SYMBOL(rpc_sleep_on);
31EXPORT_SYMBOL(rpc_wake_up_next); 31EXPORT_SYMBOL(rpc_wake_up_next);
32EXPORT_SYMBOL(rpc_wake_up_task); 32EXPORT_SYMBOL(rpc_wake_up_task);
33EXPORT_SYMBOL(rpc_new_child);
34EXPORT_SYMBOL(rpc_run_child);
35EXPORT_SYMBOL(rpciod_down); 33EXPORT_SYMBOL(rpciod_down);
36EXPORT_SYMBOL(rpciod_up); 34EXPORT_SYMBOL(rpciod_up);
37EXPORT_SYMBOL(rpc_new_task); 35EXPORT_SYMBOL(rpc_new_task);
@@ -45,7 +43,6 @@ EXPORT_SYMBOL(rpc_clone_client);
45EXPORT_SYMBOL(rpc_bind_new_program); 43EXPORT_SYMBOL(rpc_bind_new_program);
46EXPORT_SYMBOL(rpc_destroy_client); 44EXPORT_SYMBOL(rpc_destroy_client);
47EXPORT_SYMBOL(rpc_shutdown_client); 45EXPORT_SYMBOL(rpc_shutdown_client);
48EXPORT_SYMBOL(rpc_release_client);
49EXPORT_SYMBOL(rpc_killall_tasks); 46EXPORT_SYMBOL(rpc_killall_tasks);
50EXPORT_SYMBOL(rpc_call_sync); 47EXPORT_SYMBOL(rpc_call_sync);
51EXPORT_SYMBOL(rpc_call_async); 48EXPORT_SYMBOL(rpc_call_async);
@@ -120,7 +117,6 @@ EXPORT_SYMBOL(unix_domain_find);
120 117
121/* Generic XDR */ 118/* Generic XDR */
122EXPORT_SYMBOL(xdr_encode_string); 119EXPORT_SYMBOL(xdr_encode_string);
123EXPORT_SYMBOL(xdr_decode_string);
124EXPORT_SYMBOL(xdr_decode_string_inplace); 120EXPORT_SYMBOL(xdr_decode_string_inplace);
125EXPORT_SYMBOL(xdr_decode_netobj); 121EXPORT_SYMBOL(xdr_decode_netobj);
126EXPORT_SYMBOL(xdr_encode_netobj); 122EXPORT_SYMBOL(xdr_encode_netobj);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index e4296c8b861e..b08419e1fc68 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -32,7 +32,7 @@ svc_create(struct svc_program *prog, unsigned int bufsize)
32 int vers; 32 int vers;
33 unsigned int xdrsize; 33 unsigned int xdrsize;
34 34
35 if (!(serv = (struct svc_serv *) kmalloc(sizeof(*serv), GFP_KERNEL))) 35 if (!(serv = kmalloc(sizeof(*serv), GFP_KERNEL)))
36 return NULL; 36 return NULL;
37 memset(serv, 0, sizeof(*serv)); 37 memset(serv, 0, sizeof(*serv));
38 serv->sv_name = prog->pg_name; 38 serv->sv_name = prog->pg_name;
@@ -122,8 +122,7 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size)
122 rqstp->rq_argused = 0; 122 rqstp->rq_argused = 0;
123 rqstp->rq_resused = 0; 123 rqstp->rq_resused = 0;
124 arghi = 0; 124 arghi = 0;
125 if (pages > RPCSVC_MAXPAGES) 125 BUG_ON(pages > RPCSVC_MAXPAGES);
126 BUG();
127 while (pages) { 126 while (pages) {
128 struct page *p = alloc_page(GFP_KERNEL); 127 struct page *p = alloc_page(GFP_KERNEL);
129 if (!p) 128 if (!p)
@@ -167,8 +166,8 @@ svc_create_thread(svc_thread_fn func, struct svc_serv *serv)
167 memset(rqstp, 0, sizeof(*rqstp)); 166 memset(rqstp, 0, sizeof(*rqstp));
168 init_waitqueue_head(&rqstp->rq_wait); 167 init_waitqueue_head(&rqstp->rq_wait);
169 168
170 if (!(rqstp->rq_argp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL)) 169 if (!(rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL))
171 || !(rqstp->rq_resp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL)) 170 || !(rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL))
172 || !svc_init_buffer(rqstp, serv->sv_bufsz)) 171 || !svc_init_buffer(rqstp, serv->sv_bufsz))
173 goto out_thread; 172 goto out_thread;
174 173
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index cac2e774dd81..3e6c694bbad1 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -101,10 +101,22 @@ static void ip_map_put(struct cache_head *item, struct cache_detail *cd)
101 } 101 }
102} 102}
103 103
104#if IP_HASHBITS == 8
105/* hash_long on a 64 bit machine is currently REALLY BAD for
106 * IP addresses in reverse-endian (i.e. on a little-endian machine).
107 * So use a trivial but reliable hash instead
108 */
109static inline int hash_ip(unsigned long ip)
110{
111 int hash = ip ^ (ip>>16);
112 return (hash ^ (hash>>8)) & 0xff;
113}
114#endif
115
104static inline int ip_map_hash(struct ip_map *item) 116static inline int ip_map_hash(struct ip_map *item)
105{ 117{
106 return hash_str(item->m_class, IP_HASHBITS) ^ 118 return hash_str(item->m_class, IP_HASHBITS) ^
107 hash_long((unsigned long)item->m_addr.s_addr, IP_HASHBITS); 119 hash_ip((unsigned long)item->m_addr.s_addr);
108} 120}
109static inline int ip_map_match(struct ip_map *item, struct ip_map *tmp) 121static inline int ip_map_match(struct ip_map *item, struct ip_map *tmp)
110{ 122{
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index c6a51911e71e..e67613e4eb18 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -758,7 +758,7 @@ svc_tcp_accept(struct svc_sock *svsk)
758 struct svc_serv *serv = svsk->sk_server; 758 struct svc_serv *serv = svsk->sk_server;
759 struct socket *sock = svsk->sk_sock; 759 struct socket *sock = svsk->sk_sock;
760 struct socket *newsock; 760 struct socket *newsock;
761 struct proto_ops *ops; 761 const struct proto_ops *ops;
762 struct svc_sock *newsvsk; 762 struct svc_sock *newsvsk;
763 int err, slen; 763 int err, slen;
764 764
@@ -1026,7 +1026,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
1026 } else { 1026 } else {
1027 printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", 1027 printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
1028 svsk->sk_server->sv_name, -len); 1028 svsk->sk_server->sv_name, -len);
1029 svc_sock_received(svsk); 1029 goto err_delete;
1030 } 1030 }
1031 1031
1032 return len; 1032 return len;
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index aaf08cdd19f0..ca4bfa57e116 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -93,27 +93,6 @@ xdr_encode_string(u32 *p, const char *string)
93} 93}
94 94
95u32 * 95u32 *
96xdr_decode_string(u32 *p, char **sp, int *lenp, int maxlen)
97{
98 unsigned int len;
99 char *string;
100
101 if ((len = ntohl(*p++)) > maxlen)
102 return NULL;
103 if (lenp)
104 *lenp = len;
105 if ((len % 4) != 0) {
106 string = (char *) p;
107 } else {
108 string = (char *) (p - 1);
109 memmove(string, p, len);
110 }
111 string[len] = '\0';
112 *sp = string;
113 return p + XDR_QUADLEN(len);
114}
115
116u32 *
117xdr_decode_string_inplace(u32 *p, char **sp, int *lenp, int maxlen) 96xdr_decode_string_inplace(u32 *p, char **sp, int *lenp, int maxlen)
118{ 97{
119 unsigned int len; 98 unsigned int len;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 6dda3860351f..8ff2c8acb223 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -119,6 +119,17 @@ out_sleep:
119 return 0; 119 return 0;
120} 120}
121 121
122static void xprt_clear_locked(struct rpc_xprt *xprt)
123{
124 xprt->snd_task = NULL;
125 if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state) || xprt->shutdown) {
126 smp_mb__before_clear_bit();
127 clear_bit(XPRT_LOCKED, &xprt->state);
128 smp_mb__after_clear_bit();
129 } else
130 schedule_work(&xprt->task_cleanup);
131}
132
122/* 133/*
123 * xprt_reserve_xprt_cong - serialize write access to transports 134 * xprt_reserve_xprt_cong - serialize write access to transports
124 * @task: task that is requesting access to the transport 135 * @task: task that is requesting access to the transport
@@ -145,9 +156,7 @@ int xprt_reserve_xprt_cong(struct rpc_task *task)
145 } 156 }
146 return 1; 157 return 1;
147 } 158 }
148 smp_mb__before_clear_bit(); 159 xprt_clear_locked(xprt);
149 clear_bit(XPRT_LOCKED, &xprt->state);
150 smp_mb__after_clear_bit();
151out_sleep: 160out_sleep:
152 dprintk("RPC: %4d failed to lock transport %p\n", task->tk_pid, xprt); 161 dprintk("RPC: %4d failed to lock transport %p\n", task->tk_pid, xprt);
153 task->tk_timeout = 0; 162 task->tk_timeout = 0;
@@ -193,9 +202,7 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt)
193 return; 202 return;
194 203
195out_unlock: 204out_unlock:
196 smp_mb__before_clear_bit(); 205 xprt_clear_locked(xprt);
197 clear_bit(XPRT_LOCKED, &xprt->state);
198 smp_mb__after_clear_bit();
199} 206}
200 207
201static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) 208static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
@@ -222,9 +229,7 @@ static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
222 return; 229 return;
223 } 230 }
224out_unlock: 231out_unlock:
225 smp_mb__before_clear_bit(); 232 xprt_clear_locked(xprt);
226 clear_bit(XPRT_LOCKED, &xprt->state);
227 smp_mb__after_clear_bit();
228} 233}
229 234
230/** 235/**
@@ -237,10 +242,7 @@ out_unlock:
237void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) 242void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
238{ 243{
239 if (xprt->snd_task == task) { 244 if (xprt->snd_task == task) {
240 xprt->snd_task = NULL; 245 xprt_clear_locked(xprt);
241 smp_mb__before_clear_bit();
242 clear_bit(XPRT_LOCKED, &xprt->state);
243 smp_mb__after_clear_bit();
244 __xprt_lock_write_next(xprt); 246 __xprt_lock_write_next(xprt);
245 } 247 }
246} 248}
@@ -256,10 +258,7 @@ void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
256void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) 258void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
257{ 259{
258 if (xprt->snd_task == task) { 260 if (xprt->snd_task == task) {
259 xprt->snd_task = NULL; 261 xprt_clear_locked(xprt);
260 smp_mb__before_clear_bit();
261 clear_bit(XPRT_LOCKED, &xprt->state);
262 smp_mb__after_clear_bit();
263 __xprt_lock_write_next_cong(xprt); 262 __xprt_lock_write_next_cong(xprt);
264 } 263 }
265} 264}
@@ -535,10 +534,6 @@ void xprt_connect(struct rpc_task *task)
535 dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid, 534 dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid,
536 xprt, (xprt_connected(xprt) ? "is" : "is not")); 535 xprt, (xprt_connected(xprt) ? "is" : "is not"));
537 536
538 if (xprt->shutdown) {
539 task->tk_status = -EIO;
540 return;
541 }
542 if (!xprt->addr.sin_port) { 537 if (!xprt->addr.sin_port) {
543 task->tk_status = -EIO; 538 task->tk_status = -EIO;
544 return; 539 return;
@@ -687,9 +682,6 @@ int xprt_prepare_transmit(struct rpc_task *task)
687 682
688 dprintk("RPC: %4d xprt_prepare_transmit\n", task->tk_pid); 683 dprintk("RPC: %4d xprt_prepare_transmit\n", task->tk_pid);
689 684
690 if (xprt->shutdown)
691 return -EIO;
692
693 spin_lock_bh(&xprt->transport_lock); 685 spin_lock_bh(&xprt->transport_lock);
694 if (req->rq_received && !req->rq_bytes_sent) { 686 if (req->rq_received && !req->rq_bytes_sent) {
695 err = req->rq_received; 687 err = req->rq_received;
@@ -814,11 +806,9 @@ void xprt_reserve(struct rpc_task *task)
814 struct rpc_xprt *xprt = task->tk_xprt; 806 struct rpc_xprt *xprt = task->tk_xprt;
815 807
816 task->tk_status = -EIO; 808 task->tk_status = -EIO;
817 if (!xprt->shutdown) { 809 spin_lock(&xprt->reserve_lock);
818 spin_lock(&xprt->reserve_lock); 810 do_xprt_reserve(task);
819 do_xprt_reserve(task); 811 spin_unlock(&xprt->reserve_lock);
820 spin_unlock(&xprt->reserve_lock);
821 }
822} 812}
823 813
824static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt) 814static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt)
@@ -838,6 +828,8 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
838 req->rq_timeout = xprt->timeout.to_initval; 828 req->rq_timeout = xprt->timeout.to_initval;
839 req->rq_task = task; 829 req->rq_task = task;
840 req->rq_xprt = xprt; 830 req->rq_xprt = xprt;
831 req->rq_buffer = NULL;
832 req->rq_bufsize = 0;
841 req->rq_xid = xprt_alloc_xid(xprt); 833 req->rq_xid = xprt_alloc_xid(xprt);
842 req->rq_release_snd_buf = NULL; 834 req->rq_release_snd_buf = NULL;
843 dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, 835 dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid,
@@ -863,10 +855,11 @@ void xprt_release(struct rpc_task *task)
863 if (!list_empty(&req->rq_list)) 855 if (!list_empty(&req->rq_list))
864 list_del(&req->rq_list); 856 list_del(&req->rq_list);
865 xprt->last_used = jiffies; 857 xprt->last_used = jiffies;
866 if (list_empty(&xprt->recv) && !xprt->shutdown) 858 if (list_empty(&xprt->recv))
867 mod_timer(&xprt->timer, 859 mod_timer(&xprt->timer,
868 xprt->last_used + xprt->idle_timeout); 860 xprt->last_used + xprt->idle_timeout);
869 spin_unlock_bh(&xprt->transport_lock); 861 spin_unlock_bh(&xprt->transport_lock);
862 xprt->ops->buf_free(task);
870 task->tk_rqstp = NULL; 863 task->tk_rqstp = NULL;
871 if (req->rq_release_snd_buf) 864 if (req->rq_release_snd_buf)
872 req->rq_release_snd_buf(req); 865 req->rq_release_snd_buf(req);
@@ -974,16 +967,6 @@ struct rpc_xprt *xprt_create_proto(int proto, struct sockaddr_in *sap, struct rp
974 return xprt; 967 return xprt;
975} 968}
976 969
977static void xprt_shutdown(struct rpc_xprt *xprt)
978{
979 xprt->shutdown = 1;
980 rpc_wake_up(&xprt->sending);
981 rpc_wake_up(&xprt->resend);
982 xprt_wake_pending_tasks(xprt, -EIO);
983 rpc_wake_up(&xprt->backlog);
984 del_timer_sync(&xprt->timer);
985}
986
987/** 970/**
988 * xprt_destroy - destroy an RPC transport, killing off all requests. 971 * xprt_destroy - destroy an RPC transport, killing off all requests.
989 * @xprt: transport to destroy 972 * @xprt: transport to destroy
@@ -992,7 +975,8 @@ static void xprt_shutdown(struct rpc_xprt *xprt)
992int xprt_destroy(struct rpc_xprt *xprt) 975int xprt_destroy(struct rpc_xprt *xprt)
993{ 976{
994 dprintk("RPC: destroying transport %p\n", xprt); 977 dprintk("RPC: destroying transport %p\n", xprt);
995 xprt_shutdown(xprt); 978 xprt->shutdown = 1;
979 del_timer_sync(&xprt->timer);
996 xprt->ops->destroy(xprt); 980 xprt->ops->destroy(xprt);
997 kfree(xprt); 981 kfree(xprt);
998 982
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 77e8800d4127..c458f8d1d6d1 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -28,6 +28,7 @@
28#include <linux/udp.h> 28#include <linux/udp.h>
29#include <linux/tcp.h> 29#include <linux/tcp.h>
30#include <linux/sunrpc/clnt.h> 30#include <linux/sunrpc/clnt.h>
31#include <linux/sunrpc/sched.h>
31#include <linux/file.h> 32#include <linux/file.h>
32 33
33#include <net/sock.h> 34#include <net/sock.h>
@@ -424,7 +425,7 @@ static void xs_close(struct rpc_xprt *xprt)
424 struct sock *sk = xprt->inet; 425 struct sock *sk = xprt->inet;
425 426
426 if (!sk) 427 if (!sk)
427 return; 428 goto clear_close_wait;
428 429
429 dprintk("RPC: xs_close xprt %p\n", xprt); 430 dprintk("RPC: xs_close xprt %p\n", xprt);
430 431
@@ -441,6 +442,10 @@ static void xs_close(struct rpc_xprt *xprt)
441 sk->sk_no_check = 0; 442 sk->sk_no_check = 0;
442 443
443 sock_release(sock); 444 sock_release(sock);
445clear_close_wait:
446 smp_mb__before_clear_bit();
447 clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
448 smp_mb__after_clear_bit();
444} 449}
445 450
446/** 451/**
@@ -800,9 +805,13 @@ static void xs_tcp_state_change(struct sock *sk)
800 case TCP_SYN_SENT: 805 case TCP_SYN_SENT:
801 case TCP_SYN_RECV: 806 case TCP_SYN_RECV:
802 break; 807 break;
808 case TCP_CLOSE_WAIT:
809 /* Try to schedule an autoclose RPC calls */
810 set_bit(XPRT_CLOSE_WAIT, &xprt->state);
811 if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
812 schedule_work(&xprt->task_cleanup);
803 default: 813 default:
804 xprt_disconnect(xprt); 814 xprt_disconnect(xprt);
805 break;
806 } 815 }
807 out: 816 out:
808 read_unlock(&sk->sk_callback_lock); 817 read_unlock(&sk->sk_callback_lock);
@@ -920,6 +929,18 @@ static void xs_udp_timer(struct rpc_task *task)
920 xprt_adjust_cwnd(task, -ETIMEDOUT); 929 xprt_adjust_cwnd(task, -ETIMEDOUT);
921} 930}
922 931
932/**
933 * xs_set_port - reset the port number in the remote endpoint address
934 * @xprt: generic transport
935 * @port: new port number
936 *
937 */
938static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
939{
940 dprintk("RPC: setting port for xprt %p to %u\n", xprt, port);
941 xprt->addr.sin_port = htons(port);
942}
943
923static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) 944static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
924{ 945{
925 struct sockaddr_in myaddr = { 946 struct sockaddr_in myaddr = {
@@ -1160,7 +1181,10 @@ static struct rpc_xprt_ops xs_udp_ops = {
1160 .set_buffer_size = xs_udp_set_buffer_size, 1181 .set_buffer_size = xs_udp_set_buffer_size,
1161 .reserve_xprt = xprt_reserve_xprt_cong, 1182 .reserve_xprt = xprt_reserve_xprt_cong,
1162 .release_xprt = xprt_release_xprt_cong, 1183 .release_xprt = xprt_release_xprt_cong,
1184 .set_port = xs_set_port,
1163 .connect = xs_connect, 1185 .connect = xs_connect,
1186 .buf_alloc = rpc_malloc,
1187 .buf_free = rpc_free,
1164 .send_request = xs_udp_send_request, 1188 .send_request = xs_udp_send_request,
1165 .set_retrans_timeout = xprt_set_retrans_timeout_rtt, 1189 .set_retrans_timeout = xprt_set_retrans_timeout_rtt,
1166 .timer = xs_udp_timer, 1190 .timer = xs_udp_timer,
@@ -1172,7 +1196,10 @@ static struct rpc_xprt_ops xs_udp_ops = {
1172static struct rpc_xprt_ops xs_tcp_ops = { 1196static struct rpc_xprt_ops xs_tcp_ops = {
1173 .reserve_xprt = xprt_reserve_xprt, 1197 .reserve_xprt = xprt_reserve_xprt,
1174 .release_xprt = xprt_release_xprt, 1198 .release_xprt = xprt_release_xprt,
1199 .set_port = xs_set_port,
1175 .connect = xs_connect, 1200 .connect = xs_connect,
1201 .buf_alloc = rpc_malloc,
1202 .buf_free = rpc_free,
1176 .send_request = xs_tcp_send_request, 1203 .send_request = xs_tcp_send_request,
1177 .set_retrans_timeout = xprt_set_retrans_timeout_def, 1204 .set_retrans_timeout = xprt_set_retrans_timeout_def,
1178 .close = xs_close, 1205 .close = xs_close,
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index acc73ba8bade..1b5989b1b670 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -121,7 +121,7 @@
121int sysctl_unix_max_dgram_qlen = 10; 121int sysctl_unix_max_dgram_qlen = 10;
122 122
123struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; 123struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
124DEFINE_RWLOCK(unix_table_lock); 124DEFINE_SPINLOCK(unix_table_lock);
125static atomic_t unix_nr_socks = ATOMIC_INIT(0); 125static atomic_t unix_nr_socks = ATOMIC_INIT(0);
126 126
127#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) 127#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])
@@ -130,7 +130,7 @@ static atomic_t unix_nr_socks = ATOMIC_INIT(0);
130 130
131/* 131/*
132 * SMP locking strategy: 132 * SMP locking strategy:
133 * hash table is protected with rwlock unix_table_lock 133 * hash table is protected with spinlock unix_table_lock
134 * each socket state is protected by separate rwlock. 134 * each socket state is protected by separate rwlock.
135 */ 135 */
136 136
@@ -214,16 +214,16 @@ static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
214 214
215static inline void unix_remove_socket(struct sock *sk) 215static inline void unix_remove_socket(struct sock *sk)
216{ 216{
217 write_lock(&unix_table_lock); 217 spin_lock(&unix_table_lock);
218 __unix_remove_socket(sk); 218 __unix_remove_socket(sk);
219 write_unlock(&unix_table_lock); 219 spin_unlock(&unix_table_lock);
220} 220}
221 221
222static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) 222static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
223{ 223{
224 write_lock(&unix_table_lock); 224 spin_lock(&unix_table_lock);
225 __unix_insert_socket(list, sk); 225 __unix_insert_socket(list, sk);
226 write_unlock(&unix_table_lock); 226 spin_unlock(&unix_table_lock);
227} 227}
228 228
229static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname, 229static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
@@ -250,11 +250,11 @@ static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
250{ 250{
251 struct sock *s; 251 struct sock *s;
252 252
253 read_lock(&unix_table_lock); 253 spin_lock(&unix_table_lock);
254 s = __unix_find_socket_byname(sunname, len, type, hash); 254 s = __unix_find_socket_byname(sunname, len, type, hash);
255 if (s) 255 if (s)
256 sock_hold(s); 256 sock_hold(s);
257 read_unlock(&unix_table_lock); 257 spin_unlock(&unix_table_lock);
258 return s; 258 return s;
259} 259}
260 260
@@ -263,7 +263,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i)
263 struct sock *s; 263 struct sock *s;
264 struct hlist_node *node; 264 struct hlist_node *node;
265 265
266 read_lock(&unix_table_lock); 266 spin_lock(&unix_table_lock);
267 sk_for_each(s, node, 267 sk_for_each(s, node,
268 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { 268 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
269 struct dentry *dentry = unix_sk(s)->dentry; 269 struct dentry *dentry = unix_sk(s)->dentry;
@@ -276,7 +276,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i)
276 } 276 }
277 s = NULL; 277 s = NULL;
278found: 278found:
279 read_unlock(&unix_table_lock); 279 spin_unlock(&unix_table_lock);
280 return s; 280 return s;
281} 281}
282 282
@@ -473,7 +473,7 @@ static int unix_dgram_connect(struct socket *, struct sockaddr *,
473static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, 473static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
474 struct msghdr *, size_t); 474 struct msghdr *, size_t);
475 475
476static struct proto_ops unix_stream_ops = { 476static const struct proto_ops unix_stream_ops = {
477 .family = PF_UNIX, 477 .family = PF_UNIX,
478 .owner = THIS_MODULE, 478 .owner = THIS_MODULE,
479 .release = unix_release, 479 .release = unix_release,
@@ -494,7 +494,7 @@ static struct proto_ops unix_stream_ops = {
494 .sendpage = sock_no_sendpage, 494 .sendpage = sock_no_sendpage,
495}; 495};
496 496
497static struct proto_ops unix_dgram_ops = { 497static const struct proto_ops unix_dgram_ops = {
498 .family = PF_UNIX, 498 .family = PF_UNIX,
499 .owner = THIS_MODULE, 499 .owner = THIS_MODULE,
500 .release = unix_release, 500 .release = unix_release,
@@ -515,7 +515,7 @@ static struct proto_ops unix_dgram_ops = {
515 .sendpage = sock_no_sendpage, 515 .sendpage = sock_no_sendpage,
516}; 516};
517 517
518static struct proto_ops unix_seqpacket_ops = { 518static const struct proto_ops unix_seqpacket_ops = {
519 .family = PF_UNIX, 519 .family = PF_UNIX,
520 .owner = THIS_MODULE, 520 .owner = THIS_MODULE,
521 .release = unix_release, 521 .release = unix_release,
@@ -564,7 +564,7 @@ static struct sock * unix_create1(struct socket *sock)
564 u = unix_sk(sk); 564 u = unix_sk(sk);
565 u->dentry = NULL; 565 u->dentry = NULL;
566 u->mnt = NULL; 566 u->mnt = NULL;
567 rwlock_init(&u->lock); 567 spin_lock_init(&u->lock);
568 atomic_set(&u->inflight, sock ? 0 : -1); 568 atomic_set(&u->inflight, sock ? 0 : -1);
569 init_MUTEX(&u->readsem); /* single task reading lock */ 569 init_MUTEX(&u->readsem); /* single task reading lock */
570 init_waitqueue_head(&u->peer_wait); 570 init_waitqueue_head(&u->peer_wait);
@@ -642,12 +642,12 @@ retry:
642 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); 642 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
643 addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0)); 643 addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
644 644
645 write_lock(&unix_table_lock); 645 spin_lock(&unix_table_lock);
646 ordernum = (ordernum+1)&0xFFFFF; 646 ordernum = (ordernum+1)&0xFFFFF;
647 647
648 if (__unix_find_socket_byname(addr->name, addr->len, sock->type, 648 if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
649 addr->hash)) { 649 addr->hash)) {
650 write_unlock(&unix_table_lock); 650 spin_unlock(&unix_table_lock);
651 /* Sanity yield. It is unusual case, but yet... */ 651 /* Sanity yield. It is unusual case, but yet... */
652 if (!(ordernum&0xFF)) 652 if (!(ordernum&0xFF))
653 yield(); 653 yield();
@@ -658,7 +658,7 @@ retry:
658 __unix_remove_socket(sk); 658 __unix_remove_socket(sk);
659 u->addr = addr; 659 u->addr = addr;
660 __unix_insert_socket(&unix_socket_table[addr->hash], sk); 660 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
661 write_unlock(&unix_table_lock); 661 spin_unlock(&unix_table_lock);
662 err = 0; 662 err = 0;
663 663
664out: up(&u->readsem); 664out: up(&u->readsem);
@@ -784,14 +784,14 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
784 err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0); 784 err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
785 if (err) 785 if (err)
786 goto out_mknod_dput; 786 goto out_mknod_dput;
787 up(&nd.dentry->d_inode->i_sem); 787 mutex_unlock(&nd.dentry->d_inode->i_mutex);
788 dput(nd.dentry); 788 dput(nd.dentry);
789 nd.dentry = dentry; 789 nd.dentry = dentry;
790 790
791 addr->hash = UNIX_HASH_SIZE; 791 addr->hash = UNIX_HASH_SIZE;
792 } 792 }
793 793
794 write_lock(&unix_table_lock); 794 spin_lock(&unix_table_lock);
795 795
796 if (!sunaddr->sun_path[0]) { 796 if (!sunaddr->sun_path[0]) {
797 err = -EADDRINUSE; 797 err = -EADDRINUSE;
@@ -814,7 +814,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
814 __unix_insert_socket(list, sk); 814 __unix_insert_socket(list, sk);
815 815
816out_unlock: 816out_unlock:
817 write_unlock(&unix_table_lock); 817 spin_unlock(&unix_table_lock);
818out_up: 818out_up:
819 up(&u->readsem); 819 up(&u->readsem);
820out: 820out:
@@ -823,7 +823,7 @@ out:
823out_mknod_dput: 823out_mknod_dput:
824 dput(dentry); 824 dput(dentry);
825out_mknod_unlock: 825out_mknod_unlock:
826 up(&nd.dentry->d_inode->i_sem); 826 mutex_unlock(&nd.dentry->d_inode->i_mutex);
827 path_release(&nd); 827 path_release(&nd);
828out_mknod_parent: 828out_mknod_parent:
829 if (err==-EEXIST) 829 if (err==-EEXIST)
@@ -1063,10 +1063,12 @@ restart:
1063 /* Set credentials */ 1063 /* Set credentials */
1064 sk->sk_peercred = other->sk_peercred; 1064 sk->sk_peercred = other->sk_peercred;
1065 1065
1066 sock_hold(newsk);
1067 unix_peer(sk) = newsk;
1068 sock->state = SS_CONNECTED; 1066 sock->state = SS_CONNECTED;
1069 sk->sk_state = TCP_ESTABLISHED; 1067 sk->sk_state = TCP_ESTABLISHED;
1068 sock_hold(newsk);
1069
1070 smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */
1071 unix_peer(sk) = newsk;
1070 1072
1071 unix_state_wunlock(sk); 1073 unix_state_wunlock(sk);
1072 1074
@@ -1414,7 +1416,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1414 } else { 1416 } else {
1415 sunaddr = NULL; 1417 sunaddr = NULL;
1416 err = -ENOTCONN; 1418 err = -ENOTCONN;
1417 other = unix_peer_get(sk); 1419 other = unix_peer(sk);
1418 if (!other) 1420 if (!other)
1419 goto out_err; 1421 goto out_err;
1420 } 1422 }
@@ -1476,7 +1478,6 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1476 other->sk_data_ready(other, size); 1478 other->sk_data_ready(other, size);
1477 sent+=size; 1479 sent+=size;
1478 } 1480 }
1479 sock_put(other);
1480 1481
1481 scm_destroy(siocb->scm); 1482 scm_destroy(siocb->scm);
1482 siocb->scm = NULL; 1483 siocb->scm = NULL;
@@ -1491,8 +1492,6 @@ pipe_err:
1491 send_sig(SIGPIPE,current,0); 1492 send_sig(SIGPIPE,current,0);
1492 err = -EPIPE; 1493 err = -EPIPE;
1493out_err: 1494out_err:
1494 if (other)
1495 sock_put(other);
1496 scm_destroy(siocb->scm); 1495 scm_destroy(siocb->scm);
1497 siocb->scm = NULL; 1496 siocb->scm = NULL;
1498 return sent ? : err; 1497 return sent ? : err;
@@ -1860,7 +1859,7 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1860 } 1859 }
1861 1860
1862 default: 1861 default:
1863 err = dev_ioctl(cmd, (void __user *)arg); 1862 err = -ENOIOCTLCMD;
1864 break; 1863 break;
1865 } 1864 }
1866 return err; 1865 return err;
@@ -1917,7 +1916,7 @@ static struct sock *unix_seq_idx(int *iter, loff_t pos)
1917 1916
1918static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 1917static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
1919{ 1918{
1920 read_lock(&unix_table_lock); 1919 spin_lock(&unix_table_lock);
1921 return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1); 1920 return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
1922} 1921}
1923 1922
@@ -1932,7 +1931,7 @@ static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1932 1931
1933static void unix_seq_stop(struct seq_file *seq, void *v) 1932static void unix_seq_stop(struct seq_file *seq, void *v)
1934{ 1933{
1935 read_unlock(&unix_table_lock); 1934 spin_unlock(&unix_table_lock);
1936} 1935}
1937 1936
1938static int unix_seq_show(struct seq_file *seq, void *v) 1937static int unix_seq_show(struct seq_file *seq, void *v)
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 6ffc64e1712d..411802bd4d37 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -182,7 +182,7 @@ void unix_gc(void)
182 if (down_trylock(&unix_gc_sem)) 182 if (down_trylock(&unix_gc_sem))
183 return; 183 return;
184 184
185 read_lock(&unix_table_lock); 185 spin_lock(&unix_table_lock);
186 186
187 forall_unix_sockets(i, s) 187 forall_unix_sockets(i, s)
188 { 188 {
@@ -301,7 +301,7 @@ void unix_gc(void)
301 } 301 }
302 u->gc_tree = GC_ORPHAN; 302 u->gc_tree = GC_ORPHAN;
303 } 303 }
304 read_unlock(&unix_table_lock); 304 spin_unlock(&unix_table_lock);
305 305
306 /* 306 /*
307 * Here we are. Hitlist is filled. Die. 307 * Here we are. Hitlist is filled. Die.
diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c
index 59fec59b2132..8b9bf4a763b5 100644
--- a/net/wanrouter/af_wanpipe.c
+++ b/net/wanrouter/af_wanpipe.c
@@ -36,6 +36,7 @@
36#include <linux/types.h> 36#include <linux/types.h>
37#include <linux/sched.h> 37#include <linux/sched.h>
38#include <linux/mm.h> 38#include <linux/mm.h>
39#include <linux/capability.h>
39#include <linux/fcntl.h> 40#include <linux/fcntl.h>
40#include <linux/socket.h> 41#include <linux/socket.h>
41#include <linux/in.h> 42#include <linux/in.h>
@@ -181,7 +182,7 @@ struct wanpipe_opt
181#endif 182#endif
182 183
183static int sk_count; 184static int sk_count;
184extern struct proto_ops wanpipe_ops; 185extern const struct proto_ops wanpipe_ops;
185static unsigned long find_free_critical; 186static unsigned long find_free_critical;
186 187
187static void wanpipe_unlink_driver(struct sock *sk); 188static void wanpipe_unlink_driver(struct sock *sk);
@@ -1839,7 +1840,7 @@ static int wanpipe_ioctl(struct socket *sock, unsigned int cmd, unsigned long ar
1839#endif 1840#endif
1840 1841
1841 default: 1842 default:
1842 return dev_ioctl(cmd,(void __user *) arg); 1843 return -ENOIOCTLCMD;
1843 } 1844 }
1844 /*NOTREACHED*/ 1845 /*NOTREACHED*/
1845} 1846}
@@ -2546,7 +2547,7 @@ static int wanpipe_connect(struct socket *sock, struct sockaddr *uaddr, int addr
2546 return 0; 2547 return 0;
2547} 2548}
2548 2549
2549struct proto_ops wanpipe_ops = { 2550const struct proto_ops wanpipe_ops = {
2550 .family = PF_WANPIPE, 2551 .family = PF_WANPIPE,
2551 .owner = THIS_MODULE, 2552 .owner = THIS_MODULE,
2552 .release = wanpipe_release, 2553 .release = wanpipe_release,
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index bcf7b3faa76a..c34833dc7cc1 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -44,6 +44,7 @@
44 44
45#include <linux/config.h> 45#include <linux/config.h>
46#include <linux/stddef.h> /* offsetof(), etc. */ 46#include <linux/stddef.h> /* offsetof(), etc. */
47#include <linux/capability.h>
47#include <linux/errno.h> /* return codes */ 48#include <linux/errno.h> /* return codes */
48#include <linux/kernel.h> 49#include <linux/kernel.h>
49#include <linux/init.h> 50#include <linux/init.h>
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 020d73cc8414..72b6ff3299ba 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -37,6 +37,7 @@
37 37
38#include <linux/config.h> 38#include <linux/config.h>
39#include <linux/module.h> 39#include <linux/module.h>
40#include <linux/capability.h>
40#include <linux/errno.h> 41#include <linux/errno.h>
41#include <linux/kernel.h> 42#include <linux/kernel.h>
42#include <linux/sched.h> 43#include <linux/sched.h>
@@ -64,7 +65,7 @@ int sysctl_x25_ack_holdback_timeout = X25_DEFAULT_T2;
64HLIST_HEAD(x25_list); 65HLIST_HEAD(x25_list);
65DEFINE_RWLOCK(x25_list_lock); 66DEFINE_RWLOCK(x25_list_lock);
66 67
67static struct proto_ops x25_proto_ops; 68static const struct proto_ops x25_proto_ops;
68 69
69static struct x25_address null_x25_address = {" "}; 70static struct x25_address null_x25_address = {" "};
70 71
@@ -540,12 +541,7 @@ static struct sock *x25_make_new(struct sock *osk)
540 sk->sk_state = TCP_ESTABLISHED; 541 sk->sk_state = TCP_ESTABLISHED;
541 sk->sk_sleep = osk->sk_sleep; 542 sk->sk_sleep = osk->sk_sleep;
542 sk->sk_backlog_rcv = osk->sk_backlog_rcv; 543 sk->sk_backlog_rcv = osk->sk_backlog_rcv;
543 544 sock_copy_flags(sk, osk);
544 if (sock_flag(osk, SOCK_ZAPPED))
545 sock_set_flag(sk, SOCK_ZAPPED);
546
547 if (sock_flag(osk, SOCK_DBG))
548 sock_set_flag(sk, SOCK_DBG);
549 545
550 ox25 = x25_sk(osk); 546 ox25 = x25_sk(osk);
551 x25->t21 = ox25->t21; 547 x25->t21 = ox25->t21;
@@ -1378,7 +1374,7 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1378 } 1374 }
1379 1375
1380 default: 1376 default:
1381 rc = dev_ioctl(cmd, argp); 1377 rc = -ENOIOCTLCMD;
1382 break; 1378 break;
1383 } 1379 }
1384 1380
@@ -1391,7 +1387,7 @@ static struct net_proto_family x25_family_ops = {
1391 .owner = THIS_MODULE, 1387 .owner = THIS_MODULE,
1392}; 1388};
1393 1389
1394static struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = { 1390static const struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = {
1395 .family = AF_X25, 1391 .family = AF_X25,
1396 .owner = THIS_MODULE, 1392 .owner = THIS_MODULE,
1397 .release = x25_release, 1393 .release = x25_release,
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 2f4531fcaca2..6ed3302312fb 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -540,8 +540,7 @@ void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
540 start = end; 540 start = end;
541 } 541 }
542 } 542 }
543 if (len) 543 BUG_ON(len);
544 BUG();
545} 544}
546EXPORT_SYMBOL_GPL(skb_icv_walk); 545EXPORT_SYMBOL_GPL(skb_icv_walk);
547 546
@@ -610,8 +609,7 @@ skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
610 start = end; 609 start = end;
611 } 610 }
612 } 611 }
613 if (len) 612 BUG_ON(len);
614 BUG();
615 return elt; 613 return elt;
616} 614}
617EXPORT_SYMBOL_GPL(skb_to_sgvec); 615EXPORT_SYMBOL_GPL(skb_to_sgvec);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d19e274b9c4a..077bbf9fb9b7 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -10,7 +10,7 @@
10 * YOSHIFUJI Hideaki 10 * YOSHIFUJI Hideaki
11 * Split up af-specific portion 11 * Split up af-specific portion
12 * Derek Atkins <derek@ihtfp.com> Add the post_input processor 12 * Derek Atkins <derek@ihtfp.com> Add the post_input processor
13 * 13 *
14 */ 14 */
15 15
16#include <asm/bug.h> 16#include <asm/bug.h>
@@ -22,6 +22,7 @@
22#include <linux/workqueue.h> 22#include <linux/workqueue.h>
23#include <linux/notifier.h> 23#include <linux/notifier.h>
24#include <linux/netdevice.h> 24#include <linux/netdevice.h>
25#include <linux/netfilter.h>
25#include <linux/module.h> 26#include <linux/module.h>
26#include <net/xfrm.h> 27#include <net/xfrm.h>
27#include <net/ip.h> 28#include <net/ip.h>
@@ -247,15 +248,14 @@ EXPORT_SYMBOL(xfrm_policy_alloc);
247 248
248void __xfrm_policy_destroy(struct xfrm_policy *policy) 249void __xfrm_policy_destroy(struct xfrm_policy *policy)
249{ 250{
250 if (!policy->dead) 251 BUG_ON(!policy->dead);
251 BUG();
252 252
253 if (policy->bundles) 253 BUG_ON(policy->bundles);
254 BUG();
255 254
256 if (del_timer(&policy->timer)) 255 if (del_timer(&policy->timer))
257 BUG(); 256 BUG();
258 257
258 security_xfrm_policy_free(policy);
259 kfree(policy); 259 kfree(policy);
260} 260}
261EXPORT_SYMBOL(__xfrm_policy_destroy); 261EXPORT_SYMBOL(__xfrm_policy_destroy);
@@ -350,7 +350,8 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
350 350
351 write_lock_bh(&xfrm_policy_lock); 351 write_lock_bh(&xfrm_policy_lock);
352 for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) { 352 for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) {
353 if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) { 353 if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 &&
354 xfrm_sec_ctx_match(pol->security, policy->security)) {
354 if (excl) { 355 if (excl) {
355 write_unlock_bh(&xfrm_policy_lock); 356 write_unlock_bh(&xfrm_policy_lock);
356 return -EEXIST; 357 return -EEXIST;
@@ -416,14 +417,15 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
416} 417}
417EXPORT_SYMBOL(xfrm_policy_insert); 418EXPORT_SYMBOL(xfrm_policy_insert);
418 419
419struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel, 420struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel,
420 int delete) 421 struct xfrm_sec_ctx *ctx, int delete)
421{ 422{
422 struct xfrm_policy *pol, **p; 423 struct xfrm_policy *pol, **p;
423 424
424 write_lock_bh(&xfrm_policy_lock); 425 write_lock_bh(&xfrm_policy_lock);
425 for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { 426 for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
426 if (memcmp(sel, &pol->selector, sizeof(*sel)) == 0) { 427 if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) &&
428 (xfrm_sec_ctx_match(ctx, pol->security))) {
427 xfrm_pol_hold(pol); 429 xfrm_pol_hold(pol);
428 if (delete) 430 if (delete)
429 *p = pol->next; 431 *p = pol->next;
@@ -438,7 +440,7 @@ struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel,
438 } 440 }
439 return pol; 441 return pol;
440} 442}
441EXPORT_SYMBOL(xfrm_policy_bysel); 443EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
442 444
443struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete) 445struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
444{ 446{
@@ -519,7 +521,7 @@ EXPORT_SYMBOL(xfrm_policy_walk);
519 521
520/* Find policy to apply to this flow. */ 522/* Find policy to apply to this flow. */
521 523
522static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, 524static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir,
523 void **objp, atomic_t **obj_refp) 525 void **objp, atomic_t **obj_refp)
524{ 526{
525 struct xfrm_policy *pol; 527 struct xfrm_policy *pol;
@@ -533,9 +535,12 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
533 continue; 535 continue;
534 536
535 match = xfrm_selector_match(sel, fl, family); 537 match = xfrm_selector_match(sel, fl, family);
538
536 if (match) { 539 if (match) {
537 xfrm_pol_hold(pol); 540 if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) {
538 break; 541 xfrm_pol_hold(pol);
542 break;
543 }
539 } 544 }
540 } 545 }
541 read_unlock_bh(&xfrm_policy_lock); 546 read_unlock_bh(&xfrm_policy_lock);
@@ -543,15 +548,37 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
543 *obj_refp = &pol->refcnt; 548 *obj_refp = &pol->refcnt;
544} 549}
545 550
546static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) 551static inline int policy_to_flow_dir(int dir)
552{
553 if (XFRM_POLICY_IN == FLOW_DIR_IN &&
554 XFRM_POLICY_OUT == FLOW_DIR_OUT &&
555 XFRM_POLICY_FWD == FLOW_DIR_FWD)
556 return dir;
557 switch (dir) {
558 default:
559 case XFRM_POLICY_IN:
560 return FLOW_DIR_IN;
561 case XFRM_POLICY_OUT:
562 return FLOW_DIR_OUT;
563 case XFRM_POLICY_FWD:
564 return FLOW_DIR_FWD;
565 };
566}
567
568static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid)
547{ 569{
548 struct xfrm_policy *pol; 570 struct xfrm_policy *pol;
549 571
550 read_lock_bh(&xfrm_policy_lock); 572 read_lock_bh(&xfrm_policy_lock);
551 if ((pol = sk->sk_policy[dir]) != NULL) { 573 if ((pol = sk->sk_policy[dir]) != NULL) {
552 int match = xfrm_selector_match(&pol->selector, fl, 574 int match = xfrm_selector_match(&pol->selector, fl,
553 sk->sk_family); 575 sk->sk_family);
576 int err = 0;
577
554 if (match) 578 if (match)
579 err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir));
580
581 if (match && !err)
555 xfrm_pol_hold(pol); 582 xfrm_pol_hold(pol);
556 else 583 else
557 pol = NULL; 584 pol = NULL;
@@ -624,6 +651,10 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
624 651
625 if (newp) { 652 if (newp) {
626 newp->selector = old->selector; 653 newp->selector = old->selector;
654 if (security_xfrm_policy_clone(old, newp)) {
655 kfree(newp);
656 return NULL; /* ENOMEM */
657 }
627 newp->lft = old->lft; 658 newp->lft = old->lft;
628 newp->curlft = old->curlft; 659 newp->curlft = old->curlft;
629 newp->action = old->action; 660 newp->action = old->action;
@@ -735,22 +766,6 @@ xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
735 return err; 766 return err;
736} 767}
737 768
738static inline int policy_to_flow_dir(int dir)
739{
740 if (XFRM_POLICY_IN == FLOW_DIR_IN &&
741 XFRM_POLICY_OUT == FLOW_DIR_OUT &&
742 XFRM_POLICY_FWD == FLOW_DIR_FWD)
743 return dir;
744 switch (dir) {
745 default:
746 case XFRM_POLICY_IN:
747 return FLOW_DIR_IN;
748 case XFRM_POLICY_OUT:
749 return FLOW_DIR_OUT;
750 case XFRM_POLICY_FWD:
751 return FLOW_DIR_FWD;
752 };
753}
754 769
755static int stale_bundle(struct dst_entry *dst); 770static int stale_bundle(struct dst_entry *dst);
756 771
@@ -769,19 +784,20 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
769 int err; 784 int err;
770 u32 genid; 785 u32 genid;
771 u16 family = dst_orig->ops->family; 786 u16 family = dst_orig->ops->family;
787 u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
788 u32 sk_sid = security_sk_sid(sk, fl, dir);
772restart: 789restart:
773 genid = atomic_read(&flow_cache_genid); 790 genid = atomic_read(&flow_cache_genid);
774 policy = NULL; 791 policy = NULL;
775 if (sk && sk->sk_policy[1]) 792 if (sk && sk->sk_policy[1])
776 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); 793 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid);
777 794
778 if (!policy) { 795 if (!policy) {
779 /* To accelerate a bit... */ 796 /* To accelerate a bit... */
780 if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT]) 797 if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
781 return 0; 798 return 0;
782 799
783 policy = flow_cache_lookup(fl, family, 800 policy = flow_cache_lookup(fl, sk_sid, family, dir,
784 policy_to_flow_dir(XFRM_POLICY_OUT),
785 xfrm_policy_lookup); 801 xfrm_policy_lookup);
786 } 802 }
787 803
@@ -934,8 +950,8 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
934 return start; 950 return start;
935} 951}
936 952
937static int 953int
938_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) 954xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
939{ 955{
940 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 956 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
941 957
@@ -946,6 +962,7 @@ _decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
946 xfrm_policy_put_afinfo(afinfo); 962 xfrm_policy_put_afinfo(afinfo);
947 return 0; 963 return 0;
948} 964}
965EXPORT_SYMBOL(xfrm_decode_session);
949 966
950static inline int secpath_has_tunnel(struct sec_path *sp, int k) 967static inline int secpath_has_tunnel(struct sec_path *sp, int k)
951{ 968{
@@ -962,16 +979,21 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
962{ 979{
963 struct xfrm_policy *pol; 980 struct xfrm_policy *pol;
964 struct flowi fl; 981 struct flowi fl;
982 u8 fl_dir = policy_to_flow_dir(dir);
983 u32 sk_sid;
965 984
966 if (_decode_session(skb, &fl, family) < 0) 985 if (xfrm_decode_session(skb, &fl, family) < 0)
967 return 0; 986 return 0;
987 nf_nat_decode_session(skb, &fl, family);
988
989 sk_sid = security_sk_sid(sk, &fl, fl_dir);
968 990
969 /* First, check used SA against their selectors. */ 991 /* First, check used SA against their selectors. */
970 if (skb->sp) { 992 if (skb->sp) {
971 int i; 993 int i;
972 994
973 for (i=skb->sp->len-1; i>=0; i--) { 995 for (i=skb->sp->len-1; i>=0; i--) {
974 struct sec_decap_state *xvec = &(skb->sp->x[i]); 996 struct sec_decap_state *xvec = &(skb->sp->x[i]);
975 if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family)) 997 if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family))
976 return 0; 998 return 0;
977 999
@@ -986,11 +1008,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
986 1008
987 pol = NULL; 1009 pol = NULL;
988 if (sk && sk->sk_policy[dir]) 1010 if (sk && sk->sk_policy[dir])
989 pol = xfrm_sk_policy_lookup(sk, dir, &fl); 1011 pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid);
990 1012
991 if (!pol) 1013 if (!pol)
992 pol = flow_cache_lookup(&fl, family, 1014 pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir,
993 policy_to_flow_dir(dir),
994 xfrm_policy_lookup); 1015 xfrm_policy_lookup);
995 1016
996 if (!pol) 1017 if (!pol)
@@ -1035,7 +1056,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
1035{ 1056{
1036 struct flowi fl; 1057 struct flowi fl;
1037 1058
1038 if (_decode_session(skb, &fl, family) < 0) 1059 if (xfrm_decode_session(skb, &fl, family) < 0)
1039 return 0; 1060 return 0;
1040 1061
1041 return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0; 1062 return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 479effc97666..e12d0be5f976 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -10,7 +10,7 @@
10 * Split up af-specific functions 10 * Split up af-specific functions
11 * Derek Atkins <derek@ihtfp.com> 11 * Derek Atkins <derek@ihtfp.com>
12 * Add UDP Encapsulation 12 * Add UDP Encapsulation
13 * 13 *
14 */ 14 */
15 15
16#include <linux/workqueue.h> 16#include <linux/workqueue.h>
@@ -70,6 +70,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
70 x->type->destructor(x); 70 x->type->destructor(x);
71 xfrm_put_type(x->type); 71 xfrm_put_type(x->type);
72 } 72 }
73 security_xfrm_state_free(x);
73 kfree(x); 74 kfree(x);
74} 75}
75 76
@@ -343,7 +344,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
343 selector. 344 selector.
344 */ 345 */
345 if (x->km.state == XFRM_STATE_VALID) { 346 if (x->km.state == XFRM_STATE_VALID) {
346 if (!xfrm_selector_match(&x->sel, fl, family)) 347 if (!xfrm_selector_match(&x->sel, fl, family) ||
348 !xfrm_sec_ctx_match(pol->security, x->security))
347 continue; 349 continue;
348 if (!best || 350 if (!best ||
349 best->km.dying > x->km.dying || 351 best->km.dying > x->km.dying ||
@@ -354,7 +356,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
354 acquire_in_progress = 1; 356 acquire_in_progress = 1;
355 } else if (x->km.state == XFRM_STATE_ERROR || 357 } else if (x->km.state == XFRM_STATE_ERROR ||
356 x->km.state == XFRM_STATE_EXPIRED) { 358 x->km.state == XFRM_STATE_EXPIRED) {
357 if (xfrm_selector_match(&x->sel, fl, family)) 359 if (xfrm_selector_match(&x->sel, fl, family) &&
360 xfrm_sec_ctx_match(pol->security, x->security))
358 error = -ESRCH; 361 error = -ESRCH;
359 } 362 }
360 } 363 }
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 0cdd9a07e043..ac87a09ba83e 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -7,7 +7,7 @@
7 * Kazunori MIYAZAWA @USAGI 7 * Kazunori MIYAZAWA @USAGI
8 * Kunihiro Ishiguro <kunihiro@ipinfusion.com> 8 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
9 * IPv6 support 9 * IPv6 support
10 * 10 *
11 */ 11 */
12 12
13#include <linux/module.h> 13#include <linux/module.h>
@@ -88,6 +88,34 @@ static int verify_encap_tmpl(struct rtattr **xfrma)
88 return 0; 88 return 0;
89} 89}
90 90
91
92static inline int verify_sec_ctx_len(struct rtattr **xfrma)
93{
94 struct rtattr *rt = xfrma[XFRMA_SEC_CTX - 1];
95 struct xfrm_user_sec_ctx *uctx;
96 int len = 0;
97
98 if (!rt)
99 return 0;
100
101 if (rt->rta_len < sizeof(*uctx))
102 return -EINVAL;
103
104 uctx = RTA_DATA(rt);
105
106 if (uctx->ctx_len > PAGE_SIZE)
107 return -EINVAL;
108
109 len += sizeof(struct xfrm_user_sec_ctx);
110 len += uctx->ctx_len;
111
112 if (uctx->len != len)
113 return -EINVAL;
114
115 return 0;
116}
117
118
91static int verify_newsa_info(struct xfrm_usersa_info *p, 119static int verify_newsa_info(struct xfrm_usersa_info *p,
92 struct rtattr **xfrma) 120 struct rtattr **xfrma)
93{ 121{
@@ -145,6 +173,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
145 goto out; 173 goto out;
146 if ((err = verify_encap_tmpl(xfrma))) 174 if ((err = verify_encap_tmpl(xfrma)))
147 goto out; 175 goto out;
176 if ((err = verify_sec_ctx_len(xfrma)))
177 goto out;
148 178
149 err = -EINVAL; 179 err = -EINVAL;
150 switch (p->mode) { 180 switch (p->mode) {
@@ -209,6 +239,30 @@ static int attach_encap_tmpl(struct xfrm_encap_tmpl **encapp, struct rtattr *u_a
209 return 0; 239 return 0;
210} 240}
211 241
242
243static inline int xfrm_user_sec_ctx_size(struct xfrm_policy *xp)
244{
245 struct xfrm_sec_ctx *xfrm_ctx = xp->security;
246 int len = 0;
247
248 if (xfrm_ctx) {
249 len += sizeof(struct xfrm_user_sec_ctx);
250 len += xfrm_ctx->ctx_len;
251 }
252 return len;
253}
254
255static int attach_sec_ctx(struct xfrm_state *x, struct rtattr *u_arg)
256{
257 struct xfrm_user_sec_ctx *uctx;
258
259 if (!u_arg)
260 return 0;
261
262 uctx = RTA_DATA(u_arg);
263 return security_xfrm_state_alloc(x, uctx);
264}
265
212static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) 266static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
213{ 267{
214 memcpy(&x->id, &p->id, sizeof(x->id)); 268 memcpy(&x->id, &p->id, sizeof(x->id));
@@ -253,6 +307,9 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p,
253 if (err) 307 if (err)
254 goto error; 308 goto error;
255 309
310 if ((err = attach_sec_ctx(x, xfrma[XFRMA_SEC_CTX-1])))
311 goto error;
312
256 x->km.seq = p->seq; 313 x->km.seq = p->seq;
257 314
258 return x; 315 return x;
@@ -272,11 +329,11 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
272 int err; 329 int err;
273 struct km_event c; 330 struct km_event c;
274 331
275 err = verify_newsa_info(p, (struct rtattr **) xfrma); 332 err = verify_newsa_info(p, (struct rtattr **)xfrma);
276 if (err) 333 if (err)
277 return err; 334 return err;
278 335
279 x = xfrm_state_construct(p, (struct rtattr **) xfrma, &err); 336 x = xfrm_state_construct(p, (struct rtattr **)xfrma, &err);
280 if (!x) 337 if (!x)
281 return err; 338 return err;
282 339
@@ -390,6 +447,19 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
390 if (x->encap) 447 if (x->encap)
391 RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); 448 RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap);
392 449
450 if (x->security) {
451 int ctx_size = sizeof(struct xfrm_sec_ctx) +
452 x->security->ctx_len;
453 struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size);
454 struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt);
455
456 uctx->exttype = XFRMA_SEC_CTX;
457 uctx->len = ctx_size;
458 uctx->ctx_doi = x->security->ctx_doi;
459 uctx->ctx_alg = x->security->ctx_alg;
460 uctx->ctx_len = x->security->ctx_len;
461 memcpy(uctx + 1, x->security->ctx_str, x->security->ctx_len);
462 }
393 nlh->nlmsg_len = skb->tail - b; 463 nlh->nlmsg_len = skb->tail - b;
394out: 464out:
395 sp->this_idx++; 465 sp->this_idx++;
@@ -603,6 +673,18 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
603 return verify_policy_dir(p->dir); 673 return verify_policy_dir(p->dir);
604} 674}
605 675
676static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct rtattr **xfrma)
677{
678 struct rtattr *rt = xfrma[XFRMA_SEC_CTX-1];
679 struct xfrm_user_sec_ctx *uctx;
680
681 if (!rt)
682 return 0;
683
684 uctx = RTA_DATA(rt);
685 return security_xfrm_policy_alloc(pol, uctx);
686}
687
606static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut, 688static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
607 int nr) 689 int nr)
608{ 690{
@@ -681,7 +763,10 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p,
681 } 763 }
682 764
683 copy_from_user_policy(xp, p); 765 copy_from_user_policy(xp, p);
684 err = copy_from_user_tmpl(xp, xfrma); 766
767 if (!(err = copy_from_user_tmpl(xp, xfrma)))
768 err = copy_from_user_sec_ctx(xp, xfrma);
769
685 if (err) { 770 if (err) {
686 *errp = err; 771 *errp = err;
687 kfree(xp); 772 kfree(xp);
@@ -702,8 +787,11 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
702 err = verify_newpolicy_info(p); 787 err = verify_newpolicy_info(p);
703 if (err) 788 if (err)
704 return err; 789 return err;
790 err = verify_sec_ctx_len((struct rtattr **)xfrma);
791 if (err)
792 return err;
705 793
706 xp = xfrm_policy_construct(p, (struct rtattr **) xfrma, &err); 794 xp = xfrm_policy_construct(p, (struct rtattr **)xfrma, &err);
707 if (!xp) 795 if (!xp)
708 return err; 796 return err;
709 797
@@ -714,6 +802,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
714 excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; 802 excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY;
715 err = xfrm_policy_insert(p->dir, xp, excl); 803 err = xfrm_policy_insert(p->dir, xp, excl);
716 if (err) { 804 if (err) {
805 security_xfrm_policy_free(xp);
717 kfree(xp); 806 kfree(xp);
718 return err; 807 return err;
719 } 808 }
@@ -761,6 +850,27 @@ rtattr_failure:
761 return -1; 850 return -1;
762} 851}
763 852
853static int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb)
854{
855 if (xp->security) {
856 int ctx_size = sizeof(struct xfrm_sec_ctx) +
857 xp->security->ctx_len;
858 struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size);
859 struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt);
860
861 uctx->exttype = XFRMA_SEC_CTX;
862 uctx->len = ctx_size;
863 uctx->ctx_doi = xp->security->ctx_doi;
864 uctx->ctx_alg = xp->security->ctx_alg;
865 uctx->ctx_len = xp->security->ctx_len;
866 memcpy(uctx + 1, xp->security->ctx_str, xp->security->ctx_len);
867 }
868 return 0;
869
870 rtattr_failure:
871 return -1;
872}
873
764static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr) 874static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr)
765{ 875{
766 struct xfrm_dump_info *sp = ptr; 876 struct xfrm_dump_info *sp = ptr;
@@ -782,6 +892,8 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
782 copy_to_user_policy(xp, p, dir); 892 copy_to_user_policy(xp, p, dir);
783 if (copy_to_user_tmpl(xp, skb) < 0) 893 if (copy_to_user_tmpl(xp, skb) < 0)
784 goto nlmsg_failure; 894 goto nlmsg_failure;
895 if (copy_to_user_sec_ctx(xp, skb))
896 goto nlmsg_failure;
785 897
786 nlh->nlmsg_len = skb->tail - b; 898 nlh->nlmsg_len = skb->tail - b;
787out: 899out:
@@ -852,8 +964,25 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
852 964
853 if (p->index) 965 if (p->index)
854 xp = xfrm_policy_byid(p->dir, p->index, delete); 966 xp = xfrm_policy_byid(p->dir, p->index, delete);
855 else 967 else {
856 xp = xfrm_policy_bysel(p->dir, &p->sel, delete); 968 struct rtattr **rtattrs = (struct rtattr **)xfrma;
969 struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1];
970 struct xfrm_policy tmp;
971
972 err = verify_sec_ctx_len(rtattrs);
973 if (err)
974 return err;
975
976 memset(&tmp, 0, sizeof(struct xfrm_policy));
977 if (rt) {
978 struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt);
979
980 if ((err = security_xfrm_policy_alloc(&tmp, uctx)))
981 return err;
982 }
983 xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, delete);
984 security_xfrm_policy_free(&tmp);
985 }
857 if (xp == NULL) 986 if (xp == NULL)
858 return -ENOENT; 987 return -ENOENT;
859 988
@@ -1224,6 +1353,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
1224 1353
1225 if (copy_to_user_tmpl(xp, skb) < 0) 1354 if (copy_to_user_tmpl(xp, skb) < 0)
1226 goto nlmsg_failure; 1355 goto nlmsg_failure;
1356 if (copy_to_user_sec_ctx(xp, skb))
1357 goto nlmsg_failure;
1227 1358
1228 nlh->nlmsg_len = skb->tail - b; 1359 nlh->nlmsg_len = skb->tail - b;
1229 return skb->len; 1360 return skb->len;
@@ -1241,6 +1372,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
1241 1372
1242 len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); 1373 len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
1243 len += NLMSG_SPACE(sizeof(struct xfrm_user_acquire)); 1374 len += NLMSG_SPACE(sizeof(struct xfrm_user_acquire));
1375 len += RTA_SPACE(xfrm_user_sec_ctx_size(xp));
1244 skb = alloc_skb(len, GFP_ATOMIC); 1376 skb = alloc_skb(len, GFP_ATOMIC);
1245 if (skb == NULL) 1377 if (skb == NULL)
1246 return -ENOMEM; 1378 return -ENOMEM;
@@ -1324,6 +1456,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
1324 copy_to_user_policy(xp, &upe->pol, dir); 1456 copy_to_user_policy(xp, &upe->pol, dir);
1325 if (copy_to_user_tmpl(xp, skb) < 0) 1457 if (copy_to_user_tmpl(xp, skb) < 0)
1326 goto nlmsg_failure; 1458 goto nlmsg_failure;
1459 if (copy_to_user_sec_ctx(xp, skb))
1460 goto nlmsg_failure;
1327 upe->hard = !!hard; 1461 upe->hard = !!hard;
1328 1462
1329 nlh->nlmsg_len = skb->tail - b; 1463 nlh->nlmsg_len = skb->tail - b;
@@ -1341,6 +1475,7 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve
1341 1475
1342 len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); 1476 len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
1343 len += NLMSG_SPACE(sizeof(struct xfrm_user_polexpire)); 1477 len += NLMSG_SPACE(sizeof(struct xfrm_user_polexpire));
1478 len += RTA_SPACE(xfrm_user_sec_ctx_size(xp));
1344 skb = alloc_skb(len, GFP_ATOMIC); 1479 skb = alloc_skb(len, GFP_ATOMIC);
1345 if (skb == NULL) 1480 if (skb == NULL)
1346 return -ENOMEM; 1481 return -ENOMEM;