aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/Kconfig5
-rw-r--r--net/Makefile1
-rw-r--r--net/atm/atm_sysfs.c1
-rw-r--r--net/atm/mpc.c11
-rw-r--r--net/atm/mpc.h8
-rw-r--r--net/bridge/br_forward.c5
-rw-r--r--net/bridge/br_netfilter.c50
-rw-r--r--net/bridge/br_netlink.c31
-rw-r--r--net/bridge/netfilter/ebtables.c97
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/datagram.c4
-rw-r--r--net/core/dev.c18
-rw-r--r--net/core/dev_mcast.c3
-rw-r--r--net/core/fib_rules.c421
-rw-r--r--net/core/filter.c8
-rw-r--r--net/core/flow.c13
-rw-r--r--net/core/neighbour.c596
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/pktgen.c4
-rw-r--r--net/core/rtnetlink.c572
-rw-r--r--net/core/skbuff.c23
-rw-r--r--net/core/sock.c34
-rw-r--r--net/core/utils.c213
-rw-r--r--net/core/wireless.c4
-rw-r--r--net/dccp/ackvec.c20
-rw-r--r--net/dccp/ackvec.h4
-rw-r--r--net/dccp/ccids/Kconfig8
-rw-r--r--net/dccp/ccids/ccid2.c204
-rw-r--r--net/dccp/ccids/ccid2.h9
-rw-r--r--net/dccp/ccids/ccid3.c12
-rw-r--r--net/dccp/dccp.h2
-rw-r--r--net/dccp/feat.h5
-rw-r--r--net/dccp/ipv4.c14
-rw-r--r--net/dccp/ipv6.c15
-rw-r--r--net/dccp/output.c90
-rw-r--r--net/dccp/proto.c16
-rw-r--r--net/dccp/sysctl.c8
-rw-r--r--net/decnet/Kconfig1
-rw-r--r--net/decnet/af_decnet.c1
-rw-r--r--net/decnet/dn_dev.c29
-rw-r--r--net/decnet/dn_fib.c76
-rw-r--r--net/decnet/dn_nsp_in.c2
-rw-r--r--net/decnet/dn_route.c19
-rw-r--r--net/decnet/dn_rules.c511
-rw-r--r--net/decnet/dn_table.c163
-rw-r--r--net/ethernet/eth.c196
-rw-r--r--net/ipv4/Kconfig1
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c11
-rw-r--r--net/ipv4/ah4.c4
-rw-r--r--net/ipv4/cipso_ipv4.c1607
-rw-r--r--net/ipv4/devinet.c246
-rw-r--r--net/ipv4/esp4.c15
-rw-r--r--net/ipv4/fib_frontend.c472
-rw-r--r--net/ipv4/fib_hash.c126
-rw-r--r--net/ipv4/fib_lookup.h13
-rw-r--r--net/ipv4/fib_rules.c620
-rw-r--r--net/ipv4/fib_semantics.c518
-rw-r--r--net/ipv4/fib_trie.c110
-rw-r--r--net/ipv4/icmp.c16
-rw-r--r--net/ipv4/igmp.c6
-rw-r--r--net/ipv4/inet_connection_sock.c3
-rw-r--r--net/ipv4/inet_hashtables.c33
-rw-r--r--net/ipv4/inetpeer.c5
-rw-r--r--net/ipv4/ip_fragment.c12
-rw-r--r--net/ipv4/ip_gre.c27
-rw-r--r--net/ipv4/ip_options.c20
-rw-r--r--net/ipv4/ip_output.c25
-rw-r--r--net/ipv4/ipcomp.c8
-rw-r--r--net/ipv4/ipconfig.c1
-rw-r--r--net/ipv4/ipip.c22
-rw-r--r--net/ipv4/ipmr.c12
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c8
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_udp.c8
-rw-r--r--net/ipv4/netfilter.c2
-rw-r--r--net/ipv4/netfilter/Kconfig22
-rw-r--r--net/ipv4/netfilter/Makefile2
-rw-r--r--net/ipv4/netfilter/arp_tables.c18
-rw-r--r--net/ipv4/netfilter/arpt_mangle.c4
-rw-r--r--net/ipv4/netfilter/arptable_filter.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c215
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_pptp.c516
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netbios_ns.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c76
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_generic.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_gre.c52
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_icmp.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_sctp.c14
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c31
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_udp.c7
-rw-r--r--net/ipv4/netfilter/ip_conntrack_sip.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c5
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c56
-rw-r--r--net/ipv4/netfilter/ip_nat_helper.c63
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_pptp.c188
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_gre.c27
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_icmp.c8
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_tcp.c7
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_udp.c15
-rw-r--r--net/ipv4/netfilter/ip_nat_rule.c14
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c13
-rw-r--r--net/ipv4/netfilter/ip_queue.c16
-rw-r--r--net/ipv4/netfilter/ip_tables.c184
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c7
-rw-r--r--net/ipv4/netfilter/ipt_DSCP.c96
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c48
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c4
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c4
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c4
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c4
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c6
-rw-r--r--net/ipv4/netfilter/ipt_SAME.c7
-rw-r--r--net/ipv4/netfilter/ipt_TCPMSS.c135
-rw-r--r--net/ipv4/netfilter/ipt_TOS.c26
-rw-r--r--net/ipv4/netfilter/ipt_TTL.c12
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c3
-rw-r--r--net/ipv4/netfilter/ipt_ah.c1
-rw-r--r--net/ipv4/netfilter/ipt_dscp.c54
-rw-r--r--net/ipv4/netfilter/ipt_ecn.c3
-rw-r--r--net/ipv4/netfilter/ipt_hashlimit.c33
-rw-r--r--net/ipv4/netfilter/ipt_owner.c1
-rw-r--r--net/ipv4/netfilter/ipt_recent.c13
-rw-r--r--net/ipv4/netfilter/iptable_filter.c4
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c4
-rw-r--r--net/ipv4/netfilter/iptable_raw.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c2
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/route.c166
-rw-r--r--net/ipv4/syncookies.c5
-rw-r--r--net/ipv4/sysctl_net_ipv4.c35
-rw-r--r--net/ipv4/tcp.c21
-rw-r--r--net/ipv4/tcp_bic.c2
-rw-r--r--net/ipv4/tcp_cubic.c2
-rw-r--r--net/ipv4/tcp_highspeed.c2
-rw-r--r--net/ipv4/tcp_htcp.c2
-rw-r--r--net/ipv4/tcp_hybla.c2
-rw-r--r--net/ipv4/tcp_input.c40
-rw-r--r--net/ipv4/tcp_ipv4.c25
-rw-r--r--net/ipv4/tcp_lp.c3
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_output.c30
-rw-r--r--net/ipv4/tcp_timer.c16
-rw-r--r--net/ipv4/tcp_vegas.c2
-rw-r--r--net/ipv4/tcp_veno.c3
-rw-r--r--net/ipv4/tcp_westwood.c2
-rw-r--r--net/ipv4/udp.c120
-rw-r--r--net/ipv4/xfrm4_input.c2
-rw-r--r--net/ipv4/xfrm4_mode_transport.c4
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c3
-rw-r--r--net/ipv4/xfrm4_output.c10
-rw-r--r--net/ipv4/xfrm4_policy.c26
-rw-r--r--net/ipv4/xfrm4_state.c84
-rw-r--r--net/ipv4/xfrm4_tunnel.c2
-rw-r--r--net/ipv6/Kconfig44
-rw-r--r--net/ipv6/Makefile4
-rw-r--r--net/ipv6/addrconf.c663
-rw-r--r--net/ipv6/af_inet6.c21
-rw-r--r--net/ipv6/ah6.c89
-rw-r--r--net/ipv6/anycast.c8
-rw-r--r--net/ipv6/datagram.c20
-rw-r--r--net/ipv6/esp6.c16
-rw-r--r--net/ipv6/exthdrs.c233
-rw-r--r--net/ipv6/fib6_rules.c305
-rw-r--r--net/ipv6/icmp.c31
-rw-r--r--net/ipv6/inet6_connection_sock.c3
-rw-r--r--net/ipv6/ip6_fib.c463
-rw-r--r--net/ipv6/ip6_input.c2
-rw-r--r--net/ipv6/ip6_output.c106
-rw-r--r--net/ipv6/ipcomp6.c7
-rw-r--r--net/ipv6/ipv6_sockglue.c10
-rw-r--r--net/ipv6/ipv6_syms.c3
-rw-r--r--net/ipv6/mcast.c2
-rw-r--r--net/ipv6/mip6.c519
-rw-r--r--net/ipv6/ndisc.c53
-rw-r--r--net/ipv6/netfilter.c2
-rw-r--r--net/ipv6/netfilter/Makefile2
-rw-r--r--net/ipv6/netfilter/ip6_queue.c16
-rw-r--r--net/ipv6/netfilter/ip6_tables.c24
-rw-r--r--net/ipv6/netfilter/ip6t_HL.c9
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c4
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c9
-rw-r--r--net/ipv6/netfilter/ip6t_ah.c1
-rw-r--r--net/ipv6/netfilter/ip6t_dst.c220
-rw-r--r--net/ipv6/netfilter/ip6t_frag.c1
-rw-r--r--net/ipv6/netfilter/ip6t_hbh.c49
-rw-r--r--net/ipv6/netfilter/ip6t_ipv6header.c1
-rw-r--r--net/ipv6/netfilter/ip6t_owner.c1
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c1
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c4
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c12
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c12
-rw-r--r--net/ipv6/raw.c49
-rw-r--r--net/ipv6/reassembly.c14
-rw-r--r--net/ipv6/route.c1059
-rw-r--r--net/ipv6/tcp_ipv6.c23
-rw-r--r--net/ipv6/udp.c104
-rw-r--r--net/ipv6/xfrm6_input.c110
-rw-r--r--net/ipv6/xfrm6_mode_ro.c93
-rw-r--r--net/ipv6/xfrm6_mode_transport.c5
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c3
-rw-r--r--net/ipv6/xfrm6_output.c18
-rw-r--r--net/ipv6/xfrm6_policy.c87
-rw-r--r--net/ipv6/xfrm6_state.c189
-rw-r--r--net/ipv6/xfrm6_tunnel.c2
-rw-r--r--net/key/af_key.c61
-rw-r--r--net/netfilter/Kconfig23
-rw-r--r--net/netfilter/Makefile2
-rw-r--r--net/netfilter/core.c24
-rw-r--r--net/netfilter/nf_conntrack_core.c211
-rw-r--r--net/netfilter/nf_conntrack_ftp.c98
-rw-r--r--net/netfilter/nf_conntrack_netlink.c80
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c14
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c31
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c7
-rw-r--r--net/netfilter/nf_conntrack_standalone.c3
-rw-r--r--net/netfilter/nf_internals.h2
-rw-r--r--net/netfilter/nf_queue.c80
-rw-r--r--net/netfilter/nfnetlink_queue.c10
-rw-r--r--net/netfilter/x_tables.c269
-rw-r--r--net/netfilter/xt_CLASSIFY.c63
-rw-r--r--net/netfilter/xt_CONNMARK.c148
-rw-r--r--net/netfilter/xt_CONNSECMARK.c61
-rw-r--r--net/netfilter/xt_DSCP.c118
-rw-r--r--net/netfilter/xt_MARK.c120
-rw-r--r--net/netfilter/xt_NFQUEUE.c71
-rw-r--r--net/netfilter/xt_NOTRACK.c50
-rw-r--r--net/netfilter/xt_SECMARK.c59
-rw-r--r--net/netfilter/xt_comment.c45
-rw-r--r--net/netfilter/xt_connbytes.c48
-rw-r--r--net/netfilter/xt_connmark.c88
-rw-r--r--net/netfilter/xt_conntrack.c185
-rw-r--r--net/netfilter/xt_dccp.c52
-rw-r--r--net/netfilter/xt_dscp.c103
-rw-r--r--net/netfilter/xt_esp.c52
-rw-r--r--net/netfilter/xt_helper.c55
-rw-r--r--net/netfilter/xt_length.c43
-rw-r--r--net/netfilter/xt_limit.c111
-rw-r--r--net/netfilter/xt_mac.c52
-rw-r--r--net/netfilter/xt_mark.c80
-rw-r--r--net/netfilter/xt_multiport.c115
-rw-r--r--net/netfilter/xt_physdev.c50
-rw-r--r--net/netfilter/xt_pkttype.c44
-rw-r--r--net/netfilter/xt_policy.c52
-rw-r--r--net/netfilter/xt_quota.c53
-rw-r--r--net/netfilter/xt_sctp.c52
-rw-r--r--net/netfilter/xt_state.c56
-rw-r--r--net/netfilter/xt_statistic.c55
-rw-r--r--net/netfilter/xt_string.c54
-rw-r--r--net/netfilter/xt_tcpmss.c97
-rw-r--r--net/netfilter/xt_tcpudp.c109
-rw-r--r--net/netlabel/Kconfig14
-rw-r--r--net/netlabel/Makefile16
-rw-r--r--net/netlabel/netlabel_cipso_v4.c542
-rw-r--r--net/netlabel/netlabel_cipso_v4.h217
-rw-r--r--net/netlabel/netlabel_domainhash.c513
-rw-r--r--net/netlabel/netlabel_domainhash.h67
-rw-r--r--net/netlabel/netlabel_kapi.c231
-rw-r--r--net/netlabel/netlabel_mgmt.c624
-rw-r--r--net/netlabel/netlabel_mgmt.h246
-rw-r--r--net/netlabel/netlabel_unlabeled.c253
-rw-r--r--net/netlabel/netlabel_unlabeled.h98
-rw-r--r--net/netlabel/netlabel_user.c158
-rw-r--r--net/netlabel/netlabel_user.h215
-rw-r--r--net/netlink/af_netlink.c80
-rw-r--r--net/netlink/attr.c124
-rw-r--r--net/netlink/genetlink.c54
-rw-r--r--net/packet/af_packet.c45
-rw-r--r--net/sched/act_api.c253
-rw-r--r--net/sched/act_gact.c142
-rw-r--r--net/sched/act_ipt.c179
-rw-r--r--net/sched/act_mirred.c159
-rw-r--r--net/sched/act_pedit.c166
-rw-r--r--net/sched/act_police.c508
-rw-r--r--net/sched/act_simple.c183
-rw-r--r--net/sched/cls_fw.c25
-rw-r--r--net/sched/sch_htb.c1363
-rw-r--r--net/sched/sch_netem.c4
-rw-r--r--net/sctp/input.c10
-rw-r--r--net/sctp/inqueue.c4
-rw-r--r--net/sctp/ipv6.c7
-rw-r--r--net/sctp/outqueue.c6
-rw-r--r--net/sctp/proc.c17
-rw-r--r--net/sctp/protocol.c11
-rw-r--r--net/sctp/sm_statefuns.c174
-rw-r--r--net/sctp/socket.c33
-rw-r--r--net/sctp/sysctl.c140
-rw-r--r--net/sctp/transport.c2
-rw-r--r--net/socket.c1028
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c7
-rw-r--r--net/sunrpc/clnt.c194
-rw-r--r--net/sunrpc/pmap_clnt.c266
-rw-r--r--net/sunrpc/rpc_pipe.c46
-rw-r--r--net/sunrpc/sched.c99
-rw-r--r--net/sunrpc/socklib.c2
-rw-r--r--net/sunrpc/sunrpc_syms.c3
-rw-r--r--net/sunrpc/svcsock.c38
-rw-r--r--net/sunrpc/timer.c2
-rw-r--r--net/sunrpc/xprt.c86
-rw-r--r--net/sunrpc/xprtsock.c121
-rw-r--r--net/unix/af_unix.c7
-rw-r--r--net/xfrm/Kconfig16
-rw-r--r--net/xfrm/Makefile3
-rw-r--r--net/xfrm/xfrm_hash.c41
-rw-r--r--net/xfrm/xfrm_hash.h128
-rw-r--r--net/xfrm/xfrm_input.c4
-rw-r--r--net/xfrm/xfrm_policy.c901
-rw-r--r--net/xfrm/xfrm_state.c640
-rw-r--r--net/xfrm/xfrm_user.c348
313 files changed, 19091 insertions, 9897 deletions
diff --git a/net/Kconfig b/net/Kconfig
index 4959a4e1e0fe..6528a935622c 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -249,6 +249,11 @@ source "net/ieee80211/Kconfig"
249config WIRELESS_EXT 249config WIRELESS_EXT
250 bool 250 bool
251 251
252source "net/netlabel/Kconfig"
253
254config FIB_RULES
255 bool
256
252endif # if NET 257endif # if NET
253endmenu # Networking 258endmenu # Networking
254 259
diff --git a/net/Makefile b/net/Makefile
index 065796f5fb17..ad4d14f4bb29 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_IP_DCCP) += dccp/
46obj-$(CONFIG_IP_SCTP) += sctp/ 46obj-$(CONFIG_IP_SCTP) += sctp/
47obj-$(CONFIG_IEEE80211) += ieee80211/ 47obj-$(CONFIG_IEEE80211) += ieee80211/
48obj-$(CONFIG_TIPC) += tipc/ 48obj-$(CONFIG_TIPC) += tipc/
49obj-$(CONFIG_NETLABEL) += netlabel/
49 50
50ifeq ($(CONFIG_NET),y) 51ifeq ($(CONFIG_NET),y)
51obj-$(CONFIG_SYSCTL) += sysctl_net.o 52obj-$(CONFIG_SYSCTL) += sysctl_net.o
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index 5df4b9a068bb..c0a4ae28fcfa 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -1,6 +1,5 @@
1/* ATM driver model support. */ 1/* ATM driver model support. */
2 2
3#include <linux/config.h>
4#include <linux/kernel.h> 3#include <linux/kernel.h>
5#include <linux/init.h> 4#include <linux/init.h>
6#include <linux/kobject.h> 5#include <linux/kobject.h>
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 00704661e83f..b87c2a88bdce 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -98,11 +98,6 @@ static struct notifier_block mpoa_notifier = {
98 0 98 0
99}; 99};
100 100
101#ifdef CONFIG_PROC_FS
102extern int mpc_proc_init(void);
103extern void mpc_proc_clean(void);
104#endif
105
106struct mpoa_client *mpcs = NULL; /* FIXME */ 101struct mpoa_client *mpcs = NULL; /* FIXME */
107static struct atm_mpoa_qos *qos_head = NULL; 102static struct atm_mpoa_qos *qos_head = NULL;
108static DEFINE_TIMER(mpc_timer, NULL, 0, 0); 103static DEFINE_TIMER(mpc_timer, NULL, 0, 0);
@@ -1439,12 +1434,8 @@ static __init int atm_mpoa_init(void)
1439{ 1434{
1440 register_atm_ioctl(&atm_ioctl_ops); 1435 register_atm_ioctl(&atm_ioctl_ops);
1441 1436
1442#ifdef CONFIG_PROC_FS
1443 if (mpc_proc_init() != 0) 1437 if (mpc_proc_init() != 0)
1444 printk(KERN_INFO "mpoa: failed to initialize /proc/mpoa\n"); 1438 printk(KERN_INFO "mpoa: failed to initialize /proc/mpoa\n");
1445 else
1446 printk(KERN_INFO "mpoa: /proc/mpoa initialized\n");
1447#endif
1448 1439
1449 printk("mpc.c: " __DATE__ " " __TIME__ " initialized\n"); 1440 printk("mpc.c: " __DATE__ " " __TIME__ " initialized\n");
1450 1441
@@ -1457,9 +1448,7 @@ static void __exit atm_mpoa_cleanup(void)
1457 struct atm_mpoa_qos *qos, *nextqos; 1448 struct atm_mpoa_qos *qos, *nextqos;
1458 struct lec_priv *priv; 1449 struct lec_priv *priv;
1459 1450
1460#ifdef CONFIG_PROC_FS
1461 mpc_proc_clean(); 1451 mpc_proc_clean();
1462#endif
1463 1452
1464 del_timer(&mpc_timer); 1453 del_timer(&mpc_timer);
1465 unregister_netdevice_notifier(&mpoa_notifier); 1454 unregister_netdevice_notifier(&mpoa_notifier);
diff --git a/net/atm/mpc.h b/net/atm/mpc.h
index 863ddf6079e1..3c7981a229e8 100644
--- a/net/atm/mpc.h
+++ b/net/atm/mpc.h
@@ -50,4 +50,12 @@ int atm_mpoa_delete_qos(struct atm_mpoa_qos *qos);
50struct seq_file; 50struct seq_file;
51void atm_mpoa_disp_qos(struct seq_file *m); 51void atm_mpoa_disp_qos(struct seq_file *m);
52 52
53#ifdef CONFIG_PROC_FS
54int mpc_proc_init(void);
55void mpc_proc_clean(void);
56#else
57#define mpc_proc_init() (0)
58#define mpc_proc_clean() do { } while(0)
59#endif
60
53#endif /* _MPC_H_ */ 61#endif /* _MPC_H_ */
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 864fbbc7b24d..191b861e5e53 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -38,13 +38,10 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
38 if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) 38 if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
39 kfree_skb(skb); 39 kfree_skb(skb);
40 else { 40 else {
41#ifdef CONFIG_BRIDGE_NETFILTER
42 /* ip_refrag calls ip_fragment, doesn't copy the MAC header. */ 41 /* ip_refrag calls ip_fragment, doesn't copy the MAC header. */
43 if (nf_bridge_maybe_copy_header(skb)) 42 if (nf_bridge_maybe_copy_header(skb))
44 kfree_skb(skb); 43 kfree_skb(skb);
45 else 44 else {
46#endif
47 {
48 skb_push(skb, ETH_HLEN); 45 skb_push(skb, ETH_HLEN);
49 46
50 dev_queue_xmit(skb); 47 dev_queue_xmit(skb);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 05b3de888243..ac181be13d83 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -53,10 +53,10 @@
53 53
54#ifdef CONFIG_SYSCTL 54#ifdef CONFIG_SYSCTL
55static struct ctl_table_header *brnf_sysctl_header; 55static struct ctl_table_header *brnf_sysctl_header;
56static int brnf_call_iptables = 1; 56static int brnf_call_iptables __read_mostly = 1;
57static int brnf_call_ip6tables = 1; 57static int brnf_call_ip6tables __read_mostly = 1;
58static int brnf_call_arptables = 1; 58static int brnf_call_arptables __read_mostly = 1;
59static int brnf_filter_vlan_tagged = 1; 59static int brnf_filter_vlan_tagged __read_mostly = 1;
60#else 60#else
61#define brnf_filter_vlan_tagged 1 61#define brnf_filter_vlan_tagged 1
62#endif 62#endif
@@ -127,14 +127,37 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
127 127
128static inline void nf_bridge_save_header(struct sk_buff *skb) 128static inline void nf_bridge_save_header(struct sk_buff *skb)
129{ 129{
130 int header_size = 16; 130 int header_size = ETH_HLEN;
131 131
132 if (skb->protocol == htons(ETH_P_8021Q)) 132 if (skb->protocol == htons(ETH_P_8021Q))
133 header_size = 18; 133 header_size += VLAN_HLEN;
134 134
135 memcpy(skb->nf_bridge->data, skb->data - header_size, header_size); 135 memcpy(skb->nf_bridge->data, skb->data - header_size, header_size);
136} 136}
137 137
138/*
139 * When forwarding bridge frames, we save a copy of the original
140 * header before processing.
141 */
142int nf_bridge_copy_header(struct sk_buff *skb)
143{
144 int err;
145 int header_size = ETH_HLEN;
146
147 if (skb->protocol == htons(ETH_P_8021Q))
148 header_size += VLAN_HLEN;
149
150 err = skb_cow(skb, header_size);
151 if (err)
152 return err;
153
154 memcpy(skb->data - header_size, skb->nf_bridge->data, header_size);
155
156 if (skb->protocol == htons(ETH_P_8021Q))
157 __skb_push(skb, VLAN_HLEN);
158 return 0;
159}
160
138/* PF_BRIDGE/PRE_ROUTING *********************************************/ 161/* PF_BRIDGE/PRE_ROUTING *********************************************/
139/* Undo the changes made for ip6tables PREROUTING and continue the 162/* Undo the changes made for ip6tables PREROUTING and continue the
140 * bridge PRE_ROUTING hook. */ 163 * bridge PRE_ROUTING hook. */
@@ -695,16 +718,6 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
695 else 718 else
696 pf = PF_INET6; 719 pf = PF_INET6;
697 720
698#ifdef CONFIG_NETFILTER_DEBUG
699 /* Sometimes we get packets with NULL ->dst here (for example,
700 * running a dhcp client daemon triggers this). This should now
701 * be fixed, but let's keep the check around. */
702 if (skb->dst == NULL) {
703 printk(KERN_CRIT "br_netfilter: skb->dst == NULL.");
704 return NF_ACCEPT;
705 }
706#endif
707
708 nf_bridge = skb->nf_bridge; 721 nf_bridge = skb->nf_bridge;
709 nf_bridge->physoutdev = skb->dev; 722 nf_bridge->physoutdev = skb->dev;
710 realindev = nf_bridge->physindev; 723 realindev = nf_bridge->physindev;
@@ -786,7 +799,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
786 * keep the check just to be sure... */ 799 * keep the check just to be sure... */
787 if (skb->mac.raw < skb->head || skb->mac.raw + ETH_HLEN > skb->data) { 800 if (skb->mac.raw < skb->head || skb->mac.raw + ETH_HLEN > skb->data) {
788 printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: " 801 printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: "
789 "bad mac.raw pointer."); 802 "bad mac.raw pointer.\n");
790 goto print_error; 803 goto print_error;
791 } 804 }
792#endif 805#endif
@@ -804,7 +817,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
804 817
805#ifdef CONFIG_NETFILTER_DEBUG 818#ifdef CONFIG_NETFILTER_DEBUG
806 if (skb->dst == NULL) { 819 if (skb->dst == NULL) {
807 printk(KERN_CRIT "br_netfilter: skb->dst == NULL."); 820 printk(KERN_INFO "br_netfilter post_routing: skb->dst == NULL\n");
808 goto print_error; 821 goto print_error;
809 } 822 }
810#endif 823#endif
@@ -841,6 +854,7 @@ print_error:
841 } 854 }
842 printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw, 855 printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw,
843 skb->data); 856 skb->data);
857 dump_stack();
844 return NF_ACCEPT; 858 return NF_ACCEPT;
845#endif 859#endif
846} 860}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 53086fb75089..8f661195d09d 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -12,6 +12,7 @@
12 12
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/rtnetlink.h> 14#include <linux/rtnetlink.h>
15#include <net/netlink.h>
15#include "br_private.h" 16#include "br_private.h"
16 17
17/* 18/*
@@ -76,26 +77,24 @@ rtattr_failure:
76void br_ifinfo_notify(int event, struct net_bridge_port *port) 77void br_ifinfo_notify(int event, struct net_bridge_port *port)
77{ 78{
78 struct sk_buff *skb; 79 struct sk_buff *skb;
79 int err = -ENOMEM; 80 int payload = sizeof(struct ifinfomsg) + 128;
81 int err = -ENOBUFS;
80 82
81 pr_debug("bridge notify event=%d\n", event); 83 pr_debug("bridge notify event=%d\n", event);
82 skb = alloc_skb(NLMSG_SPACE(sizeof(struct ifinfomsg) + 128), 84 skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
83 GFP_ATOMIC); 85 if (skb == NULL)
84 if (!skb) 86 goto errout;
85 goto err_out; 87
88 err = br_fill_ifinfo(skb, port, 0, 0, event, 0);
89 if (err < 0) {
90 kfree_skb(skb);
91 goto errout;
92 }
86 93
87 err = br_fill_ifinfo(skb, port, current->pid, 0, event, 0); 94 err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
95errout:
88 if (err < 0) 96 if (err < 0)
89 goto err_kfree; 97 rtnl_set_sk_err(RTNLGRP_LINK, err);
90
91 NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
92 netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC);
93 return;
94
95err_kfree:
96 kfree_skb(skb);
97err_out:
98 netlink_set_err(rtnl, 0, RTNLGRP_LINK, err);
99} 98}
100 99
101/* 100/*
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 3a13ed643459..3df55b2bd91d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -24,6 +24,7 @@
24#include <linux/vmalloc.h> 24#include <linux/vmalloc.h>
25#include <linux/netfilter_bridge/ebtables.h> 25#include <linux/netfilter_bridge/ebtables.h>
26#include <linux/spinlock.h> 26#include <linux/spinlock.h>
27#include <linux/mutex.h>
27#include <asm/uaccess.h> 28#include <asm/uaccess.h>
28#include <linux/smp.h> 29#include <linux/smp.h>
29#include <linux/cpumask.h> 30#include <linux/cpumask.h>
@@ -31,36 +32,9 @@
31/* needed for logical [in,out]-dev filtering */ 32/* needed for logical [in,out]-dev filtering */
32#include "../br_private.h" 33#include "../br_private.h"
33 34
34/* list_named_find */
35#define ASSERT_READ_LOCK(x)
36#define ASSERT_WRITE_LOCK(x)
37#include <linux/netfilter_ipv4/listhelp.h>
38#include <linux/mutex.h>
39
40#if 0
41/* use this for remote debugging
42 * Copyright (C) 1998 by Ori Pomerantz
43 * Print the string to the appropriate tty, the one
44 * the current task uses
45 */
46static void print_string(char *str)
47{
48 struct tty_struct *my_tty;
49
50 /* The tty for the current task */
51 my_tty = current->signal->tty;
52 if (my_tty != NULL) {
53 my_tty->driver->write(my_tty, 0, str, strlen(str));
54 my_tty->driver->write(my_tty, 0, "\015\012", 2);
55 }
56}
57
58#define BUGPRINT(args) print_string(args);
59#else
60#define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\ 35#define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\
61 "report to author: "format, ## args) 36 "report to author: "format, ## args)
62/* #define BUGPRINT(format, args...) */ 37/* #define BUGPRINT(format, args...) */
63#endif
64#define MEMPRINT(format, args...) printk("kernel msg: ebtables "\ 38#define MEMPRINT(format, args...) printk("kernel msg: ebtables "\
65 ": out of memory: "format, ## args) 39 ": out of memory: "format, ## args)
66/* #define MEMPRINT(format, args...) */ 40/* #define MEMPRINT(format, args...) */
@@ -299,18 +273,22 @@ static inline void *
299find_inlist_lock_noload(struct list_head *head, const char *name, int *error, 273find_inlist_lock_noload(struct list_head *head, const char *name, int *error,
300 struct mutex *mutex) 274 struct mutex *mutex)
301{ 275{
302 void *ret; 276 struct {
277 struct list_head list;
278 char name[EBT_FUNCTION_MAXNAMELEN];
279 } *e;
303 280
304 *error = mutex_lock_interruptible(mutex); 281 *error = mutex_lock_interruptible(mutex);
305 if (*error != 0) 282 if (*error != 0)
306 return NULL; 283 return NULL;
307 284
308 ret = list_named_find(head, name); 285 list_for_each_entry(e, head, list) {
309 if (!ret) { 286 if (strcmp(e->name, name) == 0)
310 *error = -ENOENT; 287 return e;
311 mutex_unlock(mutex);
312 } 288 }
313 return ret; 289 *error = -ENOENT;
290 mutex_unlock(mutex);
291 return NULL;
314} 292}
315 293
316#ifndef CONFIG_KMOD 294#ifndef CONFIG_KMOD
@@ -1064,15 +1042,19 @@ free_newinfo:
1064 1042
1065int ebt_register_target(struct ebt_target *target) 1043int ebt_register_target(struct ebt_target *target)
1066{ 1044{
1045 struct ebt_target *t;
1067 int ret; 1046 int ret;
1068 1047
1069 ret = mutex_lock_interruptible(&ebt_mutex); 1048 ret = mutex_lock_interruptible(&ebt_mutex);
1070 if (ret != 0) 1049 if (ret != 0)
1071 return ret; 1050 return ret;
1072 if (!list_named_insert(&ebt_targets, target)) { 1051 list_for_each_entry(t, &ebt_targets, list) {
1073 mutex_unlock(&ebt_mutex); 1052 if (strcmp(t->name, target->name) == 0) {
1074 return -EEXIST; 1053 mutex_unlock(&ebt_mutex);
1054 return -EEXIST;
1055 }
1075 } 1056 }
1057 list_add(&target->list, &ebt_targets);
1076 mutex_unlock(&ebt_mutex); 1058 mutex_unlock(&ebt_mutex);
1077 1059
1078 return 0; 1060 return 0;
@@ -1081,21 +1063,25 @@ int ebt_register_target(struct ebt_target *target)
1081void ebt_unregister_target(struct ebt_target *target) 1063void ebt_unregister_target(struct ebt_target *target)
1082{ 1064{
1083 mutex_lock(&ebt_mutex); 1065 mutex_lock(&ebt_mutex);
1084 LIST_DELETE(&ebt_targets, target); 1066 list_del(&target->list);
1085 mutex_unlock(&ebt_mutex); 1067 mutex_unlock(&ebt_mutex);
1086} 1068}
1087 1069
1088int ebt_register_match(struct ebt_match *match) 1070int ebt_register_match(struct ebt_match *match)
1089{ 1071{
1072 struct ebt_match *m;
1090 int ret; 1073 int ret;
1091 1074
1092 ret = mutex_lock_interruptible(&ebt_mutex); 1075 ret = mutex_lock_interruptible(&ebt_mutex);
1093 if (ret != 0) 1076 if (ret != 0)
1094 return ret; 1077 return ret;
1095 if (!list_named_insert(&ebt_matches, match)) { 1078 list_for_each_entry(m, &ebt_matches, list) {
1096 mutex_unlock(&ebt_mutex); 1079 if (strcmp(m->name, match->name) == 0) {
1097 return -EEXIST; 1080 mutex_unlock(&ebt_mutex);
1081 return -EEXIST;
1082 }
1098 } 1083 }
1084 list_add(&match->list, &ebt_matches);
1099 mutex_unlock(&ebt_mutex); 1085 mutex_unlock(&ebt_mutex);
1100 1086
1101 return 0; 1087 return 0;
@@ -1104,21 +1090,25 @@ int ebt_register_match(struct ebt_match *match)
1104void ebt_unregister_match(struct ebt_match *match) 1090void ebt_unregister_match(struct ebt_match *match)
1105{ 1091{
1106 mutex_lock(&ebt_mutex); 1092 mutex_lock(&ebt_mutex);
1107 LIST_DELETE(&ebt_matches, match); 1093 list_del(&match->list);
1108 mutex_unlock(&ebt_mutex); 1094 mutex_unlock(&ebt_mutex);
1109} 1095}
1110 1096
1111int ebt_register_watcher(struct ebt_watcher *watcher) 1097int ebt_register_watcher(struct ebt_watcher *watcher)
1112{ 1098{
1099 struct ebt_watcher *w;
1113 int ret; 1100 int ret;
1114 1101
1115 ret = mutex_lock_interruptible(&ebt_mutex); 1102 ret = mutex_lock_interruptible(&ebt_mutex);
1116 if (ret != 0) 1103 if (ret != 0)
1117 return ret; 1104 return ret;
1118 if (!list_named_insert(&ebt_watchers, watcher)) { 1105 list_for_each_entry(w, &ebt_watchers, list) {
1119 mutex_unlock(&ebt_mutex); 1106 if (strcmp(w->name, watcher->name) == 0) {
1120 return -EEXIST; 1107 mutex_unlock(&ebt_mutex);
1108 return -EEXIST;
1109 }
1121 } 1110 }
1111 list_add(&watcher->list, &ebt_watchers);
1122 mutex_unlock(&ebt_mutex); 1112 mutex_unlock(&ebt_mutex);
1123 1113
1124 return 0; 1114 return 0;
@@ -1127,13 +1117,14 @@ int ebt_register_watcher(struct ebt_watcher *watcher)
1127void ebt_unregister_watcher(struct ebt_watcher *watcher) 1117void ebt_unregister_watcher(struct ebt_watcher *watcher)
1128{ 1118{
1129 mutex_lock(&ebt_mutex); 1119 mutex_lock(&ebt_mutex);
1130 LIST_DELETE(&ebt_watchers, watcher); 1120 list_del(&watcher->list);
1131 mutex_unlock(&ebt_mutex); 1121 mutex_unlock(&ebt_mutex);
1132} 1122}
1133 1123
1134int ebt_register_table(struct ebt_table *table) 1124int ebt_register_table(struct ebt_table *table)
1135{ 1125{
1136 struct ebt_table_info *newinfo; 1126 struct ebt_table_info *newinfo;
1127 struct ebt_table *t;
1137 int ret, i, countersize; 1128 int ret, i, countersize;
1138 1129
1139 if (!table || !table->table ||!table->table->entries || 1130 if (!table || !table->table ||!table->table->entries ||
@@ -1179,10 +1170,12 @@ int ebt_register_table(struct ebt_table *table)
1179 if (ret != 0) 1170 if (ret != 0)
1180 goto free_chainstack; 1171 goto free_chainstack;
1181 1172
1182 if (list_named_find(&ebt_tables, table->name)) { 1173 list_for_each_entry(t, &ebt_tables, list) {
1183 ret = -EEXIST; 1174 if (strcmp(t->name, table->name) == 0) {
1184 BUGPRINT("Table name already exists\n"); 1175 ret = -EEXIST;
1185 goto free_unlock; 1176 BUGPRINT("Table name already exists\n");
1177 goto free_unlock;
1178 }
1186 } 1179 }
1187 1180
1188 /* Hold a reference count if the chains aren't empty */ 1181 /* Hold a reference count if the chains aren't empty */
@@ -1190,7 +1183,7 @@ int ebt_register_table(struct ebt_table *table)
1190 ret = -ENOENT; 1183 ret = -ENOENT;
1191 goto free_unlock; 1184 goto free_unlock;
1192 } 1185 }
1193 list_prepend(&ebt_tables, table); 1186 list_add(&table->list, &ebt_tables);
1194 mutex_unlock(&ebt_mutex); 1187 mutex_unlock(&ebt_mutex);
1195 return 0; 1188 return 0;
1196free_unlock: 1189free_unlock:
@@ -1216,7 +1209,7 @@ void ebt_unregister_table(struct ebt_table *table)
1216 return; 1209 return;
1217 } 1210 }
1218 mutex_lock(&ebt_mutex); 1211 mutex_lock(&ebt_mutex);
1219 LIST_DELETE(&ebt_tables, table); 1212 list_del(&table->list);
1220 mutex_unlock(&ebt_mutex); 1213 mutex_unlock(&ebt_mutex);
1221 vfree(table->private->entries); 1214 vfree(table->private->entries);
1222 if (table->private->chainstack) { 1215 if (table->private->chainstack) {
@@ -1486,7 +1479,7 @@ static int __init ebtables_init(void)
1486 int ret; 1479 int ret;
1487 1480
1488 mutex_lock(&ebt_mutex); 1481 mutex_lock(&ebt_mutex);
1489 list_named_insert(&ebt_targets, &ebt_standard_target); 1482 list_add(&ebt_standard_target.list, &ebt_targets);
1490 mutex_unlock(&ebt_mutex); 1483 mutex_unlock(&ebt_mutex);
1491 if ((ret = nf_register_sockopt(&ebt_sockopts)) < 0) 1484 if ((ret = nf_register_sockopt(&ebt_sockopts)) < 0)
1492 return ret; 1485 return ret;
diff --git a/net/core/Makefile b/net/core/Makefile
index 2645ba428d48..119568077dab 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_NET_PKTGEN) += pktgen.o
17obj-$(CONFIG_WIRELESS_EXT) += wireless.o 17obj-$(CONFIG_WIRELESS_EXT) += wireless.o
18obj-$(CONFIG_NETPOLL) += netpoll.o 18obj-$(CONFIG_NETPOLL) += netpoll.o
19obj-$(CONFIG_NET_DMA) += user_dma.o 19obj-$(CONFIG_NET_DMA) += user_dma.o
20obj-$(CONFIG_FIB_RULES) += fib_rules.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index aecddcc30401..f558c61aecc7 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -417,7 +417,7 @@ unsigned int __skb_checksum_complete(struct sk_buff *skb)
417 417
418 sum = (u16)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); 418 sum = (u16)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
419 if (likely(!sum)) { 419 if (likely(!sum)) {
420 if (unlikely(skb->ip_summed == CHECKSUM_HW)) 420 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
421 netdev_rx_csum_fault(skb->dev); 421 netdev_rx_csum_fault(skb->dev);
422 skb->ip_summed = CHECKSUM_UNNECESSARY; 422 skb->ip_summed = CHECKSUM_UNNECESSARY;
423 } 423 }
@@ -462,7 +462,7 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
462 goto fault; 462 goto fault;
463 if ((unsigned short)csum_fold(csum)) 463 if ((unsigned short)csum_fold(csum))
464 goto csum_error; 464 goto csum_error;
465 if (unlikely(skb->ip_summed == CHECKSUM_HW)) 465 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
466 netdev_rx_csum_fault(skb->dev); 466 netdev_rx_csum_fault(skb->dev);
467 iov->iov_len -= chunk; 467 iov->iov_len -= chunk;
468 iov->iov_base += chunk; 468 iov->iov_base += chunk;
diff --git a/net/core/dev.c b/net/core/dev.c
index d4a1ec3bded5..14de297d024d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -640,6 +640,8 @@ int dev_valid_name(const char *name)
640{ 640{
641 if (*name == '\0') 641 if (*name == '\0')
642 return 0; 642 return 0;
643 if (strlen(name) >= IFNAMSIZ)
644 return 0;
643 if (!strcmp(name, ".") || !strcmp(name, "..")) 645 if (!strcmp(name, ".") || !strcmp(name, ".."))
644 return 0; 646 return 0;
645 647
@@ -1166,12 +1168,12 @@ EXPORT_SYMBOL(netif_device_attach);
1166 * Invalidate hardware checksum when packet is to be mangled, and 1168 * Invalidate hardware checksum when packet is to be mangled, and
1167 * complete checksum manually on outgoing path. 1169 * complete checksum manually on outgoing path.
1168 */ 1170 */
1169int skb_checksum_help(struct sk_buff *skb, int inward) 1171int skb_checksum_help(struct sk_buff *skb)
1170{ 1172{
1171 unsigned int csum; 1173 unsigned int csum;
1172 int ret = 0, offset = skb->h.raw - skb->data; 1174 int ret = 0, offset = skb->h.raw - skb->data;
1173 1175
1174 if (inward) 1176 if (skb->ip_summed == CHECKSUM_COMPLETE)
1175 goto out_set_summed; 1177 goto out_set_summed;
1176 1178
1177 if (unlikely(skb_shinfo(skb)->gso_size)) { 1179 if (unlikely(skb_shinfo(skb)->gso_size)) {
@@ -1223,7 +1225,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1223 skb->mac_len = skb->nh.raw - skb->data; 1225 skb->mac_len = skb->nh.raw - skb->data;
1224 __skb_pull(skb, skb->mac_len); 1226 __skb_pull(skb, skb->mac_len);
1225 1227
1226 if (unlikely(skb->ip_summed != CHECKSUM_HW)) { 1228 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1227 if (skb_header_cloned(skb) && 1229 if (skb_header_cloned(skb) &&
1228 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 1230 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1229 return ERR_PTR(err); 1231 return ERR_PTR(err);
@@ -1232,7 +1234,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1232 rcu_read_lock(); 1234 rcu_read_lock();
1233 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { 1235 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1234 if (ptype->type == type && !ptype->dev && ptype->gso_segment) { 1236 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1235 if (unlikely(skb->ip_summed != CHECKSUM_HW)) { 1237 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1236 err = ptype->gso_send_check(skb); 1238 err = ptype->gso_send_check(skb);
1237 segs = ERR_PTR(err); 1239 segs = ERR_PTR(err);
1238 if (err || skb_gso_ok(skb, features)) 1240 if (err || skb_gso_ok(skb, features))
@@ -1444,11 +1446,11 @@ int dev_queue_xmit(struct sk_buff *skb)
1444 /* If packet is not checksummed and device does not support 1446 /* If packet is not checksummed and device does not support
1445 * checksumming for this protocol, complete checksumming here. 1447 * checksumming for this protocol, complete checksumming here.
1446 */ 1448 */
1447 if (skb->ip_summed == CHECKSUM_HW && 1449 if (skb->ip_summed == CHECKSUM_PARTIAL &&
1448 (!(dev->features & NETIF_F_GEN_CSUM) && 1450 (!(dev->features & NETIF_F_GEN_CSUM) &&
1449 (!(dev->features & NETIF_F_IP_CSUM) || 1451 (!(dev->features & NETIF_F_IP_CSUM) ||
1450 skb->protocol != htons(ETH_P_IP)))) 1452 skb->protocol != htons(ETH_P_IP))))
1451 if (skb_checksum_help(skb, 0)) 1453 if (skb_checksum_help(skb))
1452 goto out_kfree_skb; 1454 goto out_kfree_skb;
1453 1455
1454gso: 1456gso:
@@ -3191,13 +3193,15 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name,
3191 struct net_device *dev; 3193 struct net_device *dev;
3192 int alloc_size; 3194 int alloc_size;
3193 3195
3196 BUG_ON(strlen(name) >= sizeof(dev->name));
3197
3194 /* ensure 32-byte alignment of both the device and private area */ 3198 /* ensure 32-byte alignment of both the device and private area */
3195 alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; 3199 alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
3196 alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; 3200 alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3197 3201
3198 p = kzalloc(alloc_size, GFP_KERNEL); 3202 p = kzalloc(alloc_size, GFP_KERNEL);
3199 if (!p) { 3203 if (!p) {
3200 printk(KERN_ERR "alloc_dev: Unable to allocate device.\n"); 3204 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
3201 return NULL; 3205 return NULL;
3202 } 3206 }
3203 3207
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index c57d887da2ef..b22648d04d36 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -21,8 +21,7 @@
21 * 2 of the License, or (at your option) any later version. 21 * 2 of the License, or (at your option) any later version.
22 */ 22 */
23 23
24#include <linux/config.h> 24#include <linux/module.h>
25#include <linux/module.h>
26#include <asm/uaccess.h> 25#include <asm/uaccess.h>
27#include <asm/system.h> 26#include <asm/system.h>
28#include <linux/bitops.h> 27#include <linux/bitops.h>
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
new file mode 100644
index 000000000000..a99d87d82b7f
--- /dev/null
+++ b/net/core/fib_rules.c
@@ -0,0 +1,421 @@
1/*
2 * net/core/fib_rules.c Generic Routing Rules
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation, version 2.
7 *
8 * Authors: Thomas Graf <tgraf@suug.ch>
9 */
10
11#include <linux/config.h>
12#include <linux/types.h>
13#include <linux/kernel.h>
14#include <linux/list.h>
15#include <net/fib_rules.h>
16
17static LIST_HEAD(rules_ops);
18static DEFINE_SPINLOCK(rules_mod_lock);
19
20static void notify_rule_change(int event, struct fib_rule *rule,
21 struct fib_rules_ops *ops, struct nlmsghdr *nlh,
22 u32 pid);
23
24static struct fib_rules_ops *lookup_rules_ops(int family)
25{
26 struct fib_rules_ops *ops;
27
28 rcu_read_lock();
29 list_for_each_entry_rcu(ops, &rules_ops, list) {
30 if (ops->family == family) {
31 if (!try_module_get(ops->owner))
32 ops = NULL;
33 rcu_read_unlock();
34 return ops;
35 }
36 }
37 rcu_read_unlock();
38
39 return NULL;
40}
41
42static void rules_ops_put(struct fib_rules_ops *ops)
43{
44 if (ops)
45 module_put(ops->owner);
46}
47
48int fib_rules_register(struct fib_rules_ops *ops)
49{
50 int err = -EEXIST;
51 struct fib_rules_ops *o;
52
53 if (ops->rule_size < sizeof(struct fib_rule))
54 return -EINVAL;
55
56 if (ops->match == NULL || ops->configure == NULL ||
57 ops->compare == NULL || ops->fill == NULL ||
58 ops->action == NULL)
59 return -EINVAL;
60
61 spin_lock(&rules_mod_lock);
62 list_for_each_entry(o, &rules_ops, list)
63 if (ops->family == o->family)
64 goto errout;
65
66 list_add_tail_rcu(&ops->list, &rules_ops);
67 err = 0;
68errout:
69 spin_unlock(&rules_mod_lock);
70
71 return err;
72}
73
74EXPORT_SYMBOL_GPL(fib_rules_register);
75
76static void cleanup_ops(struct fib_rules_ops *ops)
77{
78 struct fib_rule *rule, *tmp;
79
80 list_for_each_entry_safe(rule, tmp, ops->rules_list, list) {
81 list_del_rcu(&rule->list);
82 fib_rule_put(rule);
83 }
84}
85
86int fib_rules_unregister(struct fib_rules_ops *ops)
87{
88 int err = 0;
89 struct fib_rules_ops *o;
90
91 spin_lock(&rules_mod_lock);
92 list_for_each_entry(o, &rules_ops, list) {
93 if (o == ops) {
94 list_del_rcu(&o->list);
95 cleanup_ops(ops);
96 goto out;
97 }
98 }
99
100 err = -ENOENT;
101out:
102 spin_unlock(&rules_mod_lock);
103
104 synchronize_rcu();
105
106 return err;
107}
108
109EXPORT_SYMBOL_GPL(fib_rules_unregister);
110
111int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
112 int flags, struct fib_lookup_arg *arg)
113{
114 struct fib_rule *rule;
115 int err;
116
117 rcu_read_lock();
118
119 list_for_each_entry_rcu(rule, ops->rules_list, list) {
120 if (rule->ifindex && (rule->ifindex != fl->iif))
121 continue;
122
123 if (!ops->match(rule, fl, flags))
124 continue;
125
126 err = ops->action(rule, fl, flags, arg);
127 if (err != -EAGAIN) {
128 fib_rule_get(rule);
129 arg->rule = rule;
130 goto out;
131 }
132 }
133
134 err = -ENETUNREACH;
135out:
136 rcu_read_unlock();
137
138 return err;
139}
140
141EXPORT_SYMBOL_GPL(fib_rules_lookup);
142
143int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
144{
145 struct fib_rule_hdr *frh = nlmsg_data(nlh);
146 struct fib_rules_ops *ops = NULL;
147 struct fib_rule *rule, *r, *last = NULL;
148 struct nlattr *tb[FRA_MAX+1];
149 int err = -EINVAL;
150
151 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
152 goto errout;
153
154 ops = lookup_rules_ops(frh->family);
155 if (ops == NULL) {
156 err = EAFNOSUPPORT;
157 goto errout;
158 }
159
160 err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
161 if (err < 0)
162 goto errout;
163
164 rule = kzalloc(ops->rule_size, GFP_KERNEL);
165 if (rule == NULL) {
166 err = -ENOMEM;
167 goto errout;
168 }
169
170 if (tb[FRA_PRIORITY])
171 rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
172
173 if (tb[FRA_IFNAME]) {
174 struct net_device *dev;
175
176 rule->ifindex = -1;
177 nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ);
178 dev = __dev_get_by_name(rule->ifname);
179 if (dev)
180 rule->ifindex = dev->ifindex;
181 }
182
183 rule->action = frh->action;
184 rule->flags = frh->flags;
185 rule->table = frh_get_table(frh, tb);
186
187 if (!rule->pref && ops->default_pref)
188 rule->pref = ops->default_pref();
189
190 err = ops->configure(rule, skb, nlh, frh, tb);
191 if (err < 0)
192 goto errout_free;
193
194 list_for_each_entry(r, ops->rules_list, list) {
195 if (r->pref > rule->pref)
196 break;
197 last = r;
198 }
199
200 fib_rule_get(rule);
201
202 if (last)
203 list_add_rcu(&rule->list, &last->list);
204 else
205 list_add_rcu(&rule->list, ops->rules_list);
206
207 notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
208 rules_ops_put(ops);
209 return 0;
210
211errout_free:
212 kfree(rule);
213errout:
214 rules_ops_put(ops);
215 return err;
216}
217
218int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
219{
220 struct fib_rule_hdr *frh = nlmsg_data(nlh);
221 struct fib_rules_ops *ops = NULL;
222 struct fib_rule *rule;
223 struct nlattr *tb[FRA_MAX+1];
224 int err = -EINVAL;
225
226 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
227 goto errout;
228
229 ops = lookup_rules_ops(frh->family);
230 if (ops == NULL) {
231 err = EAFNOSUPPORT;
232 goto errout;
233 }
234
235 err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
236 if (err < 0)
237 goto errout;
238
239 list_for_each_entry(rule, ops->rules_list, list) {
240 if (frh->action && (frh->action != rule->action))
241 continue;
242
243 if (frh->table && (frh_get_table(frh, tb) != rule->table))
244 continue;
245
246 if (tb[FRA_PRIORITY] &&
247 (rule->pref != nla_get_u32(tb[FRA_PRIORITY])))
248 continue;
249
250 if (tb[FRA_IFNAME] &&
251 nla_strcmp(tb[FRA_IFNAME], rule->ifname))
252 continue;
253
254 if (!ops->compare(rule, frh, tb))
255 continue;
256
257 if (rule->flags & FIB_RULE_PERMANENT) {
258 err = -EPERM;
259 goto errout;
260 }
261
262 list_del_rcu(&rule->list);
263 synchronize_rcu();
264 notify_rule_change(RTM_DELRULE, rule, ops, nlh,
265 NETLINK_CB(skb).pid);
266 fib_rule_put(rule);
267 rules_ops_put(ops);
268 return 0;
269 }
270
271 err = -ENOENT;
272errout:
273 rules_ops_put(ops);
274 return err;
275}
276
277static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
278 u32 pid, u32 seq, int type, int flags,
279 struct fib_rules_ops *ops)
280{
281 struct nlmsghdr *nlh;
282 struct fib_rule_hdr *frh;
283
284 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags);
285 if (nlh == NULL)
286 return -1;
287
288 frh = nlmsg_data(nlh);
289 frh->table = rule->table;
290 NLA_PUT_U32(skb, FRA_TABLE, rule->table);
291 frh->res1 = 0;
292 frh->res2 = 0;
293 frh->action = rule->action;
294 frh->flags = rule->flags;
295
296 if (rule->ifname[0])
297 NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname);
298
299 if (rule->pref)
300 NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref);
301
302 if (ops->fill(rule, skb, nlh, frh) < 0)
303 goto nla_put_failure;
304
305 return nlmsg_end(skb, nlh);
306
307nla_put_failure:
308 return nlmsg_cancel(skb, nlh);
309}
310
311int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family)
312{
313 int idx = 0;
314 struct fib_rule *rule;
315 struct fib_rules_ops *ops;
316
317 ops = lookup_rules_ops(family);
318 if (ops == NULL)
319 return -EAFNOSUPPORT;
320
321 rcu_read_lock();
322 list_for_each_entry(rule, ops->rules_list, list) {
323 if (idx < cb->args[0])
324 goto skip;
325
326 if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid,
327 cb->nlh->nlmsg_seq, RTM_NEWRULE,
328 NLM_F_MULTI, ops) < 0)
329 break;
330skip:
331 idx++;
332 }
333 rcu_read_unlock();
334 cb->args[0] = idx;
335 rules_ops_put(ops);
336
337 return skb->len;
338}
339
340EXPORT_SYMBOL_GPL(fib_rules_dump);
341
342static void notify_rule_change(int event, struct fib_rule *rule,
343 struct fib_rules_ops *ops, struct nlmsghdr *nlh,
344 u32 pid)
345{
346 struct sk_buff *skb;
347 int err = -ENOBUFS;
348
349 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
350 if (skb == NULL)
351 goto errout;
352
353 err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops);
354 if (err < 0) {
355 kfree_skb(skb);
356 goto errout;
357 }
358
359 err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL);
360errout:
361 if (err < 0)
362 rtnl_set_sk_err(ops->nlgroup, err);
363}
364
365static void attach_rules(struct list_head *rules, struct net_device *dev)
366{
367 struct fib_rule *rule;
368
369 list_for_each_entry(rule, rules, list) {
370 if (rule->ifindex == -1 &&
371 strcmp(dev->name, rule->ifname) == 0)
372 rule->ifindex = dev->ifindex;
373 }
374}
375
376static void detach_rules(struct list_head *rules, struct net_device *dev)
377{
378 struct fib_rule *rule;
379
380 list_for_each_entry(rule, rules, list)
381 if (rule->ifindex == dev->ifindex)
382 rule->ifindex = -1;
383}
384
385
386static int fib_rules_event(struct notifier_block *this, unsigned long event,
387 void *ptr)
388{
389 struct net_device *dev = ptr;
390 struct fib_rules_ops *ops;
391
392 ASSERT_RTNL();
393 rcu_read_lock();
394
395 switch (event) {
396 case NETDEV_REGISTER:
397 list_for_each_entry(ops, &rules_ops, list)
398 attach_rules(ops->rules_list, dev);
399 break;
400
401 case NETDEV_UNREGISTER:
402 list_for_each_entry(ops, &rules_ops, list)
403 detach_rules(ops->rules_list, dev);
404 break;
405 }
406
407 rcu_read_unlock();
408
409 return NOTIFY_DONE;
410}
411
412static struct notifier_block fib_rules_notifier = {
413 .notifier_call = fib_rules_event,
414};
415
416static int __init fib_rules_init(void)
417{
418 return register_netdevice_notifier(&fib_rules_notifier);
419}
420
421subsys_initcall(fib_rules_init);
diff --git a/net/core/filter.c b/net/core/filter.c
index 5b4486a60cf6..6732782a5a40 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -422,10 +422,10 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
422 if (!err) { 422 if (!err) {
423 struct sk_filter *old_fp; 423 struct sk_filter *old_fp;
424 424
425 spin_lock_bh(&sk->sk_lock.slock); 425 rcu_read_lock_bh();
426 old_fp = sk->sk_filter; 426 old_fp = rcu_dereference(sk->sk_filter);
427 sk->sk_filter = fp; 427 rcu_assign_pointer(sk->sk_filter, fp);
428 spin_unlock_bh(&sk->sk_lock.slock); 428 rcu_read_unlock_bh();
429 fp = old_fp; 429 fp = old_fp;
430 } 430 }
431 431
diff --git a/net/core/flow.c b/net/core/flow.c
index 2191af5f26ac..f23e7e386543 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -32,7 +32,6 @@ struct flow_cache_entry {
32 u8 dir; 32 u8 dir;
33 struct flowi key; 33 struct flowi key;
34 u32 genid; 34 u32 genid;
35 u32 sk_sid;
36 void *object; 35 void *object;
37 atomic_t *object_ref; 36 atomic_t *object_ref;
38}; 37};
@@ -165,7 +164,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
165 return 0; 164 return 0;
166} 165}
167 166
168void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, 167void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
169 flow_resolve_t resolver) 168 flow_resolve_t resolver)
170{ 169{
171 struct flow_cache_entry *fle, **head; 170 struct flow_cache_entry *fle, **head;
@@ -189,7 +188,6 @@ void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir,
189 for (fle = *head; fle; fle = fle->next) { 188 for (fle = *head; fle; fle = fle->next) {
190 if (fle->family == family && 189 if (fle->family == family &&
191 fle->dir == dir && 190 fle->dir == dir &&
192 fle->sk_sid == sk_sid &&
193 flow_key_compare(key, &fle->key) == 0) { 191 flow_key_compare(key, &fle->key) == 0) {
194 if (fle->genid == atomic_read(&flow_cache_genid)) { 192 if (fle->genid == atomic_read(&flow_cache_genid)) {
195 void *ret = fle->object; 193 void *ret = fle->object;
@@ -214,7 +212,6 @@ void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir,
214 *head = fle; 212 *head = fle;
215 fle->family = family; 213 fle->family = family;
216 fle->dir = dir; 214 fle->dir = dir;
217 fle->sk_sid = sk_sid;
218 memcpy(&fle->key, key, sizeof(*key)); 215 memcpy(&fle->key, key, sizeof(*key));
219 fle->object = NULL; 216 fle->object = NULL;
220 flow_count(cpu)++; 217 flow_count(cpu)++;
@@ -226,7 +223,7 @@ nocache:
226 void *obj; 223 void *obj;
227 atomic_t *obj_ref; 224 atomic_t *obj_ref;
228 225
229 resolver(key, sk_sid, family, dir, &obj, &obj_ref); 226 resolver(key, family, dir, &obj, &obj_ref);
230 227
231 if (fle) { 228 if (fle) {
232 fle->genid = atomic_read(&flow_cache_genid); 229 fle->genid = atomic_read(&flow_cache_genid);
@@ -346,12 +343,8 @@ static int __init flow_cache_init(void)
346 343
347 flow_cachep = kmem_cache_create("flow_cache", 344 flow_cachep = kmem_cache_create("flow_cache",
348 sizeof(struct flow_cache_entry), 345 sizeof(struct flow_cache_entry),
349 0, SLAB_HWCACHE_ALIGN, 346 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
350 NULL, NULL); 347 NULL, NULL);
351
352 if (!flow_cachep)
353 panic("NET: failed to allocate flow cache slab\n");
354
355 flow_hash_shift = 10; 348 flow_hash_shift = 10;
356 flow_lwm = 2 * flow_hash_size; 349 flow_lwm = 2 * flow_hash_size;
357 flow_hwm = 4 * flow_hash_size; 350 flow_hwm = 4 * flow_hash_size;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index fe2113f54e2b..b6c69e1463e8 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -30,6 +30,7 @@
30#include <net/dst.h> 30#include <net/dst.h>
31#include <net/sock.h> 31#include <net/sock.h>
32#include <net/netevent.h> 32#include <net/netevent.h>
33#include <net/netlink.h>
33#include <linux/rtnetlink.h> 34#include <linux/rtnetlink.h>
34#include <linux/random.h> 35#include <linux/random.h>
35#include <linux/string.h> 36#include <linux/string.h>
@@ -888,7 +889,7 @@ out_unlock_bh:
888 return rc; 889 return rc;
889} 890}
890 891
891static __inline__ void neigh_update_hhs(struct neighbour *neigh) 892static void neigh_update_hhs(struct neighbour *neigh)
892{ 893{
893 struct hh_cache *hh; 894 struct hh_cache *hh;
894 void (*update)(struct hh_cache*, struct net_device*, unsigned char *) = 895 void (*update)(struct hh_cache*, struct net_device*, unsigned char *) =
@@ -1338,14 +1339,10 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
1338 neigh_rand_reach_time(tbl->parms.base_reachable_time); 1339 neigh_rand_reach_time(tbl->parms.base_reachable_time);
1339 1340
1340 if (!tbl->kmem_cachep) 1341 if (!tbl->kmem_cachep)
1341 tbl->kmem_cachep = kmem_cache_create(tbl->id, 1342 tbl->kmem_cachep =
1342 tbl->entry_size, 1343 kmem_cache_create(tbl->id, tbl->entry_size, 0,
1343 0, SLAB_HWCACHE_ALIGN, 1344 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1344 NULL, NULL); 1345 NULL, NULL);
1345
1346 if (!tbl->kmem_cachep)
1347 panic("cannot create neighbour cache");
1348
1349 tbl->stats = alloc_percpu(struct neigh_statistics); 1346 tbl->stats = alloc_percpu(struct neigh_statistics);
1350 if (!tbl->stats) 1347 if (!tbl->stats)
1351 panic("cannot create neighbour cache statistics"); 1348 panic("cannot create neighbour cache statistics");
@@ -1440,48 +1437,62 @@ int neigh_table_clear(struct neigh_table *tbl)
1440 1437
1441int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1438int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1442{ 1439{
1443 struct ndmsg *ndm = NLMSG_DATA(nlh); 1440 struct ndmsg *ndm;
1444 struct rtattr **nda = arg; 1441 struct nlattr *dst_attr;
1445 struct neigh_table *tbl; 1442 struct neigh_table *tbl;
1446 struct net_device *dev = NULL; 1443 struct net_device *dev = NULL;
1447 int err = -ENODEV; 1444 int err = -EINVAL;
1448 1445
1449 if (ndm->ndm_ifindex && 1446 if (nlmsg_len(nlh) < sizeof(*ndm))
1450 (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
1451 goto out; 1447 goto out;
1452 1448
1449 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1450 if (dst_attr == NULL)
1451 goto out;
1452
1453 ndm = nlmsg_data(nlh);
1454 if (ndm->ndm_ifindex) {
1455 dev = dev_get_by_index(ndm->ndm_ifindex);
1456 if (dev == NULL) {
1457 err = -ENODEV;
1458 goto out;
1459 }
1460 }
1461
1453 read_lock(&neigh_tbl_lock); 1462 read_lock(&neigh_tbl_lock);
1454 for (tbl = neigh_tables; tbl; tbl = tbl->next) { 1463 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1455 struct rtattr *dst_attr = nda[NDA_DST - 1]; 1464 struct neighbour *neigh;
1456 struct neighbour *n;
1457 1465
1458 if (tbl->family != ndm->ndm_family) 1466 if (tbl->family != ndm->ndm_family)
1459 continue; 1467 continue;
1460 read_unlock(&neigh_tbl_lock); 1468 read_unlock(&neigh_tbl_lock);
1461 1469
1462 err = -EINVAL; 1470 if (nla_len(dst_attr) < tbl->key_len)
1463 if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len)
1464 goto out_dev_put; 1471 goto out_dev_put;
1465 1472
1466 if (ndm->ndm_flags & NTF_PROXY) { 1473 if (ndm->ndm_flags & NTF_PROXY) {
1467 err = pneigh_delete(tbl, RTA_DATA(dst_attr), dev); 1474 err = pneigh_delete(tbl, nla_data(dst_attr), dev);
1468 goto out_dev_put; 1475 goto out_dev_put;
1469 } 1476 }
1470 1477
1471 if (!dev) 1478 if (dev == NULL)
1472 goto out; 1479 goto out_dev_put;
1473 1480
1474 n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev); 1481 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1475 if (n) { 1482 if (neigh == NULL) {
1476 err = neigh_update(n, NULL, NUD_FAILED, 1483 err = -ENOENT;
1477 NEIGH_UPDATE_F_OVERRIDE| 1484 goto out_dev_put;
1478 NEIGH_UPDATE_F_ADMIN);
1479 neigh_release(n);
1480 } 1485 }
1486
1487 err = neigh_update(neigh, NULL, NUD_FAILED,
1488 NEIGH_UPDATE_F_OVERRIDE |
1489 NEIGH_UPDATE_F_ADMIN);
1490 neigh_release(neigh);
1481 goto out_dev_put; 1491 goto out_dev_put;
1482 } 1492 }
1483 read_unlock(&neigh_tbl_lock); 1493 read_unlock(&neigh_tbl_lock);
1484 err = -EADDRNOTAVAIL; 1494 err = -EAFNOSUPPORT;
1495
1485out_dev_put: 1496out_dev_put:
1486 if (dev) 1497 if (dev)
1487 dev_put(dev); 1498 dev_put(dev);
@@ -1491,76 +1502,93 @@ out:
1491 1502
1492int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1503int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1493{ 1504{
1494 struct ndmsg *ndm = NLMSG_DATA(nlh); 1505 struct ndmsg *ndm;
1495 struct rtattr **nda = arg; 1506 struct nlattr *tb[NDA_MAX+1];
1496 struct neigh_table *tbl; 1507 struct neigh_table *tbl;
1497 struct net_device *dev = NULL; 1508 struct net_device *dev = NULL;
1498 int err = -ENODEV; 1509 int err;
1499 1510
1500 if (ndm->ndm_ifindex && 1511 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1501 (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL) 1512 if (err < 0)
1502 goto out; 1513 goto out;
1503 1514
1515 err = -EINVAL;
1516 if (tb[NDA_DST] == NULL)
1517 goto out;
1518
1519 ndm = nlmsg_data(nlh);
1520 if (ndm->ndm_ifindex) {
1521 dev = dev_get_by_index(ndm->ndm_ifindex);
1522 if (dev == NULL) {
1523 err = -ENODEV;
1524 goto out;
1525 }
1526
1527 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1528 goto out_dev_put;
1529 }
1530
1504 read_lock(&neigh_tbl_lock); 1531 read_lock(&neigh_tbl_lock);
1505 for (tbl = neigh_tables; tbl; tbl = tbl->next) { 1532 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1506 struct rtattr *lladdr_attr = nda[NDA_LLADDR - 1]; 1533 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1507 struct rtattr *dst_attr = nda[NDA_DST - 1]; 1534 struct neighbour *neigh;
1508 int override = 1; 1535 void *dst, *lladdr;
1509 struct neighbour *n;
1510 1536
1511 if (tbl->family != ndm->ndm_family) 1537 if (tbl->family != ndm->ndm_family)
1512 continue; 1538 continue;
1513 read_unlock(&neigh_tbl_lock); 1539 read_unlock(&neigh_tbl_lock);
1514 1540
1515 err = -EINVAL; 1541 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1516 if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len)
1517 goto out_dev_put; 1542 goto out_dev_put;
1543 dst = nla_data(tb[NDA_DST]);
1544 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1518 1545
1519 if (ndm->ndm_flags & NTF_PROXY) { 1546 if (ndm->ndm_flags & NTF_PROXY) {
1547 struct pneigh_entry *pn;
1548
1520 err = -ENOBUFS; 1549 err = -ENOBUFS;
1521 if (pneigh_lookup(tbl, RTA_DATA(dst_attr), dev, 1)) 1550 pn = pneigh_lookup(tbl, dst, dev, 1);
1551 if (pn) {
1552 pn->flags = ndm->ndm_flags;
1522 err = 0; 1553 err = 0;
1554 }
1523 goto out_dev_put; 1555 goto out_dev_put;
1524 } 1556 }
1525 1557
1526 err = -EINVAL; 1558 if (dev == NULL)
1527 if (!dev)
1528 goto out;
1529 if (lladdr_attr && RTA_PAYLOAD(lladdr_attr) < dev->addr_len)
1530 goto out_dev_put; 1559 goto out_dev_put;
1560
1561 neigh = neigh_lookup(tbl, dst, dev);
1562 if (neigh == NULL) {
1563 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1564 err = -ENOENT;
1565 goto out_dev_put;
1566 }
1531 1567
1532 n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev); 1568 neigh = __neigh_lookup_errno(tbl, dst, dev);
1533 if (n) { 1569 if (IS_ERR(neigh)) {
1534 if (nlh->nlmsg_flags & NLM_F_EXCL) { 1570 err = PTR_ERR(neigh);
1535 err = -EEXIST;
1536 neigh_release(n);
1537 goto out_dev_put; 1571 goto out_dev_put;
1538 } 1572 }
1539
1540 override = nlh->nlmsg_flags & NLM_F_REPLACE;
1541 } else if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1542 err = -ENOENT;
1543 goto out_dev_put;
1544 } else { 1573 } else {
1545 n = __neigh_lookup_errno(tbl, RTA_DATA(dst_attr), dev); 1574 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1546 if (IS_ERR(n)) { 1575 err = -EEXIST;
1547 err = PTR_ERR(n); 1576 neigh_release(neigh);
1548 goto out_dev_put; 1577 goto out_dev_put;
1549 } 1578 }
1550 }
1551 1579
1552 err = neigh_update(n, 1580 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1553 lladdr_attr ? RTA_DATA(lladdr_attr) : NULL, 1581 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1554 ndm->ndm_state, 1582 }
1555 (override ? NEIGH_UPDATE_F_OVERRIDE : 0) |
1556 NEIGH_UPDATE_F_ADMIN);
1557 1583
1558 neigh_release(n); 1584 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1585 neigh_release(neigh);
1559 goto out_dev_put; 1586 goto out_dev_put;
1560 } 1587 }
1561 1588
1562 read_unlock(&neigh_tbl_lock); 1589 read_unlock(&neigh_tbl_lock);
1563 err = -EADDRNOTAVAIL; 1590 err = -EAFNOSUPPORT;
1591
1564out_dev_put: 1592out_dev_put:
1565 if (dev) 1593 if (dev)
1566 dev_put(dev); 1594 dev_put(dev);
@@ -1570,56 +1598,59 @@ out:
1570 1598
1571static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) 1599static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1572{ 1600{
1573 struct rtattr *nest = NULL; 1601 struct nlattr *nest;
1574 1602
1575 nest = RTA_NEST(skb, NDTA_PARMS); 1603 nest = nla_nest_start(skb, NDTA_PARMS);
1604 if (nest == NULL)
1605 return -ENOBUFS;
1576 1606
1577 if (parms->dev) 1607 if (parms->dev)
1578 RTA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex); 1608 NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1579 1609
1580 RTA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)); 1610 NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1581 RTA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len); 1611 NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
1582 RTA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen); 1612 NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1583 RTA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes); 1613 NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1584 RTA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes); 1614 NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1585 RTA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes); 1615 NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1586 RTA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time); 1616 NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1587 RTA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME, 1617 NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1588 parms->base_reachable_time); 1618 parms->base_reachable_time);
1589 RTA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime); 1619 NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1590 RTA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time); 1620 NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1591 RTA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time); 1621 NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1592 RTA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay); 1622 NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1593 RTA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay); 1623 NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1594 RTA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime); 1624 NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1595 1625
1596 return RTA_NEST_END(skb, nest); 1626 return nla_nest_end(skb, nest);
1597 1627
1598rtattr_failure: 1628nla_put_failure:
1599 return RTA_NEST_CANCEL(skb, nest); 1629 return nla_nest_cancel(skb, nest);
1600} 1630}
1601 1631
1602static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb, 1632static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1603 struct netlink_callback *cb) 1633 u32 pid, u32 seq, int type, int flags)
1604{ 1634{
1605 struct nlmsghdr *nlh; 1635 struct nlmsghdr *nlh;
1606 struct ndtmsg *ndtmsg; 1636 struct ndtmsg *ndtmsg;
1607 1637
1608 nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg), 1638 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1609 NLM_F_MULTI); 1639 if (nlh == NULL)
1640 return -ENOBUFS;
1610 1641
1611 ndtmsg = NLMSG_DATA(nlh); 1642 ndtmsg = nlmsg_data(nlh);
1612 1643
1613 read_lock_bh(&tbl->lock); 1644 read_lock_bh(&tbl->lock);
1614 ndtmsg->ndtm_family = tbl->family; 1645 ndtmsg->ndtm_family = tbl->family;
1615 ndtmsg->ndtm_pad1 = 0; 1646 ndtmsg->ndtm_pad1 = 0;
1616 ndtmsg->ndtm_pad2 = 0; 1647 ndtmsg->ndtm_pad2 = 0;
1617 1648
1618 RTA_PUT_STRING(skb, NDTA_NAME, tbl->id); 1649 NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1619 RTA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval); 1650 NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1620 RTA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1); 1651 NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1621 RTA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2); 1652 NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1622 RTA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3); 1653 NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1623 1654
1624 { 1655 {
1625 unsigned long now = jiffies; 1656 unsigned long now = jiffies;
@@ -1638,7 +1669,7 @@ static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb,
1638 .ndtc_proxy_qlen = tbl->proxy_queue.qlen, 1669 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
1639 }; 1670 };
1640 1671
1641 RTA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc); 1672 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1642 } 1673 }
1643 1674
1644 { 1675 {
@@ -1663,55 +1694,50 @@ static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb,
1663 ndst.ndts_forced_gc_runs += st->forced_gc_runs; 1694 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
1664 } 1695 }
1665 1696
1666 RTA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst); 1697 NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1667 } 1698 }
1668 1699
1669 BUG_ON(tbl->parms.dev); 1700 BUG_ON(tbl->parms.dev);
1670 if (neightbl_fill_parms(skb, &tbl->parms) < 0) 1701 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1671 goto rtattr_failure; 1702 goto nla_put_failure;
1672 1703
1673 read_unlock_bh(&tbl->lock); 1704 read_unlock_bh(&tbl->lock);
1674 return NLMSG_END(skb, nlh); 1705 return nlmsg_end(skb, nlh);
1675 1706
1676rtattr_failure: 1707nla_put_failure:
1677 read_unlock_bh(&tbl->lock); 1708 read_unlock_bh(&tbl->lock);
1678 return NLMSG_CANCEL(skb, nlh); 1709 return nlmsg_cancel(skb, nlh);
1679
1680nlmsg_failure:
1681 return -1;
1682} 1710}
1683 1711
1684static int neightbl_fill_param_info(struct neigh_table *tbl, 1712static int neightbl_fill_param_info(struct sk_buff *skb,
1713 struct neigh_table *tbl,
1685 struct neigh_parms *parms, 1714 struct neigh_parms *parms,
1686 struct sk_buff *skb, 1715 u32 pid, u32 seq, int type,
1687 struct netlink_callback *cb) 1716 unsigned int flags)
1688{ 1717{
1689 struct ndtmsg *ndtmsg; 1718 struct ndtmsg *ndtmsg;
1690 struct nlmsghdr *nlh; 1719 struct nlmsghdr *nlh;
1691 1720
1692 nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg), 1721 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1693 NLM_F_MULTI); 1722 if (nlh == NULL)
1723 return -ENOBUFS;
1694 1724
1695 ndtmsg = NLMSG_DATA(nlh); 1725 ndtmsg = nlmsg_data(nlh);
1696 1726
1697 read_lock_bh(&tbl->lock); 1727 read_lock_bh(&tbl->lock);
1698 ndtmsg->ndtm_family = tbl->family; 1728 ndtmsg->ndtm_family = tbl->family;
1699 ndtmsg->ndtm_pad1 = 0; 1729 ndtmsg->ndtm_pad1 = 0;
1700 ndtmsg->ndtm_pad2 = 0; 1730 ndtmsg->ndtm_pad2 = 0;
1701 RTA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1702 1731
1703 if (neightbl_fill_parms(skb, parms) < 0) 1732 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1704 goto rtattr_failure; 1733 neightbl_fill_parms(skb, parms) < 0)
1734 goto errout;
1705 1735
1706 read_unlock_bh(&tbl->lock); 1736 read_unlock_bh(&tbl->lock);
1707 return NLMSG_END(skb, nlh); 1737 return nlmsg_end(skb, nlh);
1708 1738errout:
1709rtattr_failure:
1710 read_unlock_bh(&tbl->lock); 1739 read_unlock_bh(&tbl->lock);
1711 return NLMSG_CANCEL(skb, nlh); 1740 return nlmsg_cancel(skb, nlh);
1712
1713nlmsg_failure:
1714 return -1;
1715} 1741}
1716 1742
1717static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, 1743static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
@@ -1727,28 +1753,61 @@ static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
1727 return NULL; 1753 return NULL;
1728} 1754}
1729 1755
1756static struct nla_policy nl_neightbl_policy[NDTA_MAX+1] __read_mostly = {
1757 [NDTA_NAME] = { .type = NLA_STRING },
1758 [NDTA_THRESH1] = { .type = NLA_U32 },
1759 [NDTA_THRESH2] = { .type = NLA_U32 },
1760 [NDTA_THRESH3] = { .type = NLA_U32 },
1761 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
1762 [NDTA_PARMS] = { .type = NLA_NESTED },
1763};
1764
1765static struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] __read_mostly = {
1766 [NDTPA_IFINDEX] = { .type = NLA_U32 },
1767 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
1768 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
1769 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
1770 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
1771 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
1772 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
1773 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
1774 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
1775 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
1776 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
1777 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
1778 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
1779};
1780
1730int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1781int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1731{ 1782{
1732 struct neigh_table *tbl; 1783 struct neigh_table *tbl;
1733 struct ndtmsg *ndtmsg = NLMSG_DATA(nlh); 1784 struct ndtmsg *ndtmsg;
1734 struct rtattr **tb = arg; 1785 struct nlattr *tb[NDTA_MAX+1];
1735 int err = -EINVAL; 1786 int err;
1736 1787
1737 if (!tb[NDTA_NAME - 1] || !RTA_PAYLOAD(tb[NDTA_NAME - 1])) 1788 err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1738 return -EINVAL; 1789 nl_neightbl_policy);
1790 if (err < 0)
1791 goto errout;
1792
1793 if (tb[NDTA_NAME] == NULL) {
1794 err = -EINVAL;
1795 goto errout;
1796 }
1739 1797
1798 ndtmsg = nlmsg_data(nlh);
1740 read_lock(&neigh_tbl_lock); 1799 read_lock(&neigh_tbl_lock);
1741 for (tbl = neigh_tables; tbl; tbl = tbl->next) { 1800 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1742 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family) 1801 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1743 continue; 1802 continue;
1744 1803
1745 if (!rtattr_strcmp(tb[NDTA_NAME - 1], tbl->id)) 1804 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1746 break; 1805 break;
1747 } 1806 }
1748 1807
1749 if (tbl == NULL) { 1808 if (tbl == NULL) {
1750 err = -ENOENT; 1809 err = -ENOENT;
1751 goto errout; 1810 goto errout_locked;
1752 } 1811 }
1753 1812
1754 /* 1813 /*
@@ -1757,165 +1816,178 @@ int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1757 */ 1816 */
1758 write_lock_bh(&tbl->lock); 1817 write_lock_bh(&tbl->lock);
1759 1818
1760 if (tb[NDTA_THRESH1 - 1]) 1819 if (tb[NDTA_PARMS]) {
1761 tbl->gc_thresh1 = RTA_GET_U32(tb[NDTA_THRESH1 - 1]); 1820 struct nlattr *tbp[NDTPA_MAX+1];
1762
1763 if (tb[NDTA_THRESH2 - 1])
1764 tbl->gc_thresh2 = RTA_GET_U32(tb[NDTA_THRESH2 - 1]);
1765
1766 if (tb[NDTA_THRESH3 - 1])
1767 tbl->gc_thresh3 = RTA_GET_U32(tb[NDTA_THRESH3 - 1]);
1768
1769 if (tb[NDTA_GC_INTERVAL - 1])
1770 tbl->gc_interval = RTA_GET_MSECS(tb[NDTA_GC_INTERVAL - 1]);
1771
1772 if (tb[NDTA_PARMS - 1]) {
1773 struct rtattr *tbp[NDTPA_MAX];
1774 struct neigh_parms *p; 1821 struct neigh_parms *p;
1775 u32 ifindex = 0; 1822 int i, ifindex = 0;
1776 1823
1777 if (rtattr_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS - 1]) < 0) 1824 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1778 goto rtattr_failure; 1825 nl_ntbl_parm_policy);
1826 if (err < 0)
1827 goto errout_tbl_lock;
1779 1828
1780 if (tbp[NDTPA_IFINDEX - 1]) 1829 if (tbp[NDTPA_IFINDEX])
1781 ifindex = RTA_GET_U32(tbp[NDTPA_IFINDEX - 1]); 1830 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1782 1831
1783 p = lookup_neigh_params(tbl, ifindex); 1832 p = lookup_neigh_params(tbl, ifindex);
1784 if (p == NULL) { 1833 if (p == NULL) {
1785 err = -ENOENT; 1834 err = -ENOENT;
1786 goto rtattr_failure; 1835 goto errout_tbl_lock;
1787 } 1836 }
1788
1789 if (tbp[NDTPA_QUEUE_LEN - 1])
1790 p->queue_len = RTA_GET_U32(tbp[NDTPA_QUEUE_LEN - 1]);
1791
1792 if (tbp[NDTPA_PROXY_QLEN - 1])
1793 p->proxy_qlen = RTA_GET_U32(tbp[NDTPA_PROXY_QLEN - 1]);
1794
1795 if (tbp[NDTPA_APP_PROBES - 1])
1796 p->app_probes = RTA_GET_U32(tbp[NDTPA_APP_PROBES - 1]);
1797 1837
1798 if (tbp[NDTPA_UCAST_PROBES - 1]) 1838 for (i = 1; i <= NDTPA_MAX; i++) {
1799 p->ucast_probes = 1839 if (tbp[i] == NULL)
1800 RTA_GET_U32(tbp[NDTPA_UCAST_PROBES - 1]); 1840 continue;
1801
1802 if (tbp[NDTPA_MCAST_PROBES - 1])
1803 p->mcast_probes =
1804 RTA_GET_U32(tbp[NDTPA_MCAST_PROBES - 1]);
1805
1806 if (tbp[NDTPA_BASE_REACHABLE_TIME - 1])
1807 p->base_reachable_time =
1808 RTA_GET_MSECS(tbp[NDTPA_BASE_REACHABLE_TIME - 1]);
1809
1810 if (tbp[NDTPA_GC_STALETIME - 1])
1811 p->gc_staletime =
1812 RTA_GET_MSECS(tbp[NDTPA_GC_STALETIME - 1]);
1813 1841
1814 if (tbp[NDTPA_DELAY_PROBE_TIME - 1]) 1842 switch (i) {
1815 p->delay_probe_time = 1843 case NDTPA_QUEUE_LEN:
1816 RTA_GET_MSECS(tbp[NDTPA_DELAY_PROBE_TIME - 1]); 1844 p->queue_len = nla_get_u32(tbp[i]);
1845 break;
1846 case NDTPA_PROXY_QLEN:
1847 p->proxy_qlen = nla_get_u32(tbp[i]);
1848 break;
1849 case NDTPA_APP_PROBES:
1850 p->app_probes = nla_get_u32(tbp[i]);
1851 break;
1852 case NDTPA_UCAST_PROBES:
1853 p->ucast_probes = nla_get_u32(tbp[i]);
1854 break;
1855 case NDTPA_MCAST_PROBES:
1856 p->mcast_probes = nla_get_u32(tbp[i]);
1857 break;
1858 case NDTPA_BASE_REACHABLE_TIME:
1859 p->base_reachable_time = nla_get_msecs(tbp[i]);
1860 break;
1861 case NDTPA_GC_STALETIME:
1862 p->gc_staletime = nla_get_msecs(tbp[i]);
1863 break;
1864 case NDTPA_DELAY_PROBE_TIME:
1865 p->delay_probe_time = nla_get_msecs(tbp[i]);
1866 break;
1867 case NDTPA_RETRANS_TIME:
1868 p->retrans_time = nla_get_msecs(tbp[i]);
1869 break;
1870 case NDTPA_ANYCAST_DELAY:
1871 p->anycast_delay = nla_get_msecs(tbp[i]);
1872 break;
1873 case NDTPA_PROXY_DELAY:
1874 p->proxy_delay = nla_get_msecs(tbp[i]);
1875 break;
1876 case NDTPA_LOCKTIME:
1877 p->locktime = nla_get_msecs(tbp[i]);
1878 break;
1879 }
1880 }
1881 }
1817 1882
1818 if (tbp[NDTPA_RETRANS_TIME - 1]) 1883 if (tb[NDTA_THRESH1])
1819 p->retrans_time = 1884 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
1820 RTA_GET_MSECS(tbp[NDTPA_RETRANS_TIME - 1]);
1821 1885
1822 if (tbp[NDTPA_ANYCAST_DELAY - 1]) 1886 if (tb[NDTA_THRESH2])
1823 p->anycast_delay = 1887 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
1824 RTA_GET_MSECS(tbp[NDTPA_ANYCAST_DELAY - 1]);
1825 1888
1826 if (tbp[NDTPA_PROXY_DELAY - 1]) 1889 if (tb[NDTA_THRESH3])
1827 p->proxy_delay = 1890 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
1828 RTA_GET_MSECS(tbp[NDTPA_PROXY_DELAY - 1]);
1829 1891
1830 if (tbp[NDTPA_LOCKTIME - 1]) 1892 if (tb[NDTA_GC_INTERVAL])
1831 p->locktime = RTA_GET_MSECS(tbp[NDTPA_LOCKTIME - 1]); 1893 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
1832 }
1833 1894
1834 err = 0; 1895 err = 0;
1835 1896
1836rtattr_failure: 1897errout_tbl_lock:
1837 write_unlock_bh(&tbl->lock); 1898 write_unlock_bh(&tbl->lock);
1838errout: 1899errout_locked:
1839 read_unlock(&neigh_tbl_lock); 1900 read_unlock(&neigh_tbl_lock);
1901errout:
1840 return err; 1902 return err;
1841} 1903}
1842 1904
1843int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) 1905int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
1844{ 1906{
1845 int idx, family; 1907 int family, tidx, nidx = 0;
1846 int s_idx = cb->args[0]; 1908 int tbl_skip = cb->args[0];
1909 int neigh_skip = cb->args[1];
1847 struct neigh_table *tbl; 1910 struct neigh_table *tbl;
1848 1911
1849 family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family; 1912 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
1850 1913
1851 read_lock(&neigh_tbl_lock); 1914 read_lock(&neigh_tbl_lock);
1852 for (tbl = neigh_tables, idx = 0; tbl; tbl = tbl->next) { 1915 for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
1853 struct neigh_parms *p; 1916 struct neigh_parms *p;
1854 1917
1855 if (idx < s_idx || (family && tbl->family != family)) 1918 if (tidx < tbl_skip || (family && tbl->family != family))
1856 continue; 1919 continue;
1857 1920
1858 if (neightbl_fill_info(tbl, skb, cb) <= 0) 1921 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
1922 cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
1923 NLM_F_MULTI) <= 0)
1859 break; 1924 break;
1860 1925
1861 for (++idx, p = tbl->parms.next; p; p = p->next, idx++) { 1926 for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) {
1862 if (idx < s_idx) 1927 if (nidx < neigh_skip)
1863 continue; 1928 continue;
1864 1929
1865 if (neightbl_fill_param_info(tbl, p, skb, cb) <= 0) 1930 if (neightbl_fill_param_info(skb, tbl, p,
1931 NETLINK_CB(cb->skb).pid,
1932 cb->nlh->nlmsg_seq,
1933 RTM_NEWNEIGHTBL,
1934 NLM_F_MULTI) <= 0)
1866 goto out; 1935 goto out;
1867 } 1936 }
1868 1937
1938 neigh_skip = 0;
1869 } 1939 }
1870out: 1940out:
1871 read_unlock(&neigh_tbl_lock); 1941 read_unlock(&neigh_tbl_lock);
1872 cb->args[0] = idx; 1942 cb->args[0] = tidx;
1943 cb->args[1] = nidx;
1873 1944
1874 return skb->len; 1945 return skb->len;
1875} 1946}
1876 1947
1877static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n, 1948static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
1878 u32 pid, u32 seq, int event, unsigned int flags) 1949 u32 pid, u32 seq, int type, unsigned int flags)
1879{ 1950{
1880 unsigned long now = jiffies; 1951 unsigned long now = jiffies;
1881 unsigned char *b = skb->tail;
1882 struct nda_cacheinfo ci; 1952 struct nda_cacheinfo ci;
1883 int locked = 0; 1953 struct nlmsghdr *nlh;
1884 u32 probes; 1954 struct ndmsg *ndm;
1885 struct nlmsghdr *nlh = NLMSG_NEW(skb, pid, seq, event, 1955
1886 sizeof(struct ndmsg), flags); 1956 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
1887 struct ndmsg *ndm = NLMSG_DATA(nlh); 1957 if (nlh == NULL)
1958 return -ENOBUFS;
1888 1959
1889 ndm->ndm_family = n->ops->family; 1960 ndm = nlmsg_data(nlh);
1961 ndm->ndm_family = neigh->ops->family;
1890 ndm->ndm_pad1 = 0; 1962 ndm->ndm_pad1 = 0;
1891 ndm->ndm_pad2 = 0; 1963 ndm->ndm_pad2 = 0;
1892 ndm->ndm_flags = n->flags; 1964 ndm->ndm_flags = neigh->flags;
1893 ndm->ndm_type = n->type; 1965 ndm->ndm_type = neigh->type;
1894 ndm->ndm_ifindex = n->dev->ifindex; 1966 ndm->ndm_ifindex = neigh->dev->ifindex;
1895 RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key); 1967
1896 read_lock_bh(&n->lock); 1968 NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
1897 locked = 1;
1898 ndm->ndm_state = n->nud_state;
1899 if (n->nud_state & NUD_VALID)
1900 RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha);
1901 ci.ndm_used = now - n->used;
1902 ci.ndm_confirmed = now - n->confirmed;
1903 ci.ndm_updated = now - n->updated;
1904 ci.ndm_refcnt = atomic_read(&n->refcnt) - 1;
1905 probes = atomic_read(&n->probes);
1906 read_unlock_bh(&n->lock);
1907 locked = 0;
1908 RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
1909 RTA_PUT(skb, NDA_PROBES, sizeof(probes), &probes);
1910 nlh->nlmsg_len = skb->tail - b;
1911 return skb->len;
1912 1969
1913nlmsg_failure: 1970 read_lock_bh(&neigh->lock);
1914rtattr_failure: 1971 ndm->ndm_state = neigh->nud_state;
1915 if (locked) 1972 if ((neigh->nud_state & NUD_VALID) &&
1916 read_unlock_bh(&n->lock); 1973 nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) {
1917 skb_trim(skb, b - skb->data); 1974 read_unlock_bh(&neigh->lock);
1918 return -1; 1975 goto nla_put_failure;
1976 }
1977
1978 ci.ndm_used = now - neigh->used;
1979 ci.ndm_confirmed = now - neigh->confirmed;
1980 ci.ndm_updated = now - neigh->updated;
1981 ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1;
1982 read_unlock_bh(&neigh->lock);
1983
1984 NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
1985 NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
1986
1987 return nlmsg_end(skb, nlh);
1988
1989nla_put_failure:
1990 return nlmsg_cancel(skb, nlh);
1919} 1991}
1920 1992
1921 1993
@@ -1959,7 +2031,7 @@ int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
1959 int t, family, s_t; 2031 int t, family, s_t;
1960 2032
1961 read_lock(&neigh_tbl_lock); 2033 read_lock(&neigh_tbl_lock);
1962 family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family; 2034 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
1963 s_t = cb->args[0]; 2035 s_t = cb->args[0];
1964 2036
1965 for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) { 2037 for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
@@ -2338,41 +2410,35 @@ static struct file_operations neigh_stat_seq_fops = {
2338#endif /* CONFIG_PROC_FS */ 2410#endif /* CONFIG_PROC_FS */
2339 2411
2340#ifdef CONFIG_ARPD 2412#ifdef CONFIG_ARPD
2341void neigh_app_ns(struct neighbour *n) 2413static void __neigh_notify(struct neighbour *n, int type, int flags)
2342{ 2414{
2343 struct nlmsghdr *nlh; 2415 struct sk_buff *skb;
2344 int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256); 2416 int err = -ENOBUFS;
2345 struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC);
2346 2417
2347 if (!skb) 2418 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
2348 return; 2419 if (skb == NULL)
2420 goto errout;
2349 2421
2350 if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH, 0) < 0) { 2422 err = neigh_fill_info(skb, n, 0, 0, type, flags);
2423 if (err < 0) {
2351 kfree_skb(skb); 2424 kfree_skb(skb);
2352 return; 2425 goto errout;
2353 } 2426 }
2354 nlh = (struct nlmsghdr *)skb->data; 2427
2355 nlh->nlmsg_flags = NLM_F_REQUEST; 2428 err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2356 NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; 2429errout:
2357 netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); 2430 if (err < 0)
2431 rtnl_set_sk_err(RTNLGRP_NEIGH, err);
2358} 2432}
2359 2433
2360static void neigh_app_notify(struct neighbour *n) 2434void neigh_app_ns(struct neighbour *n)
2361{ 2435{
2362 struct nlmsghdr *nlh; 2436 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2363 int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256); 2437}
2364 struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC);
2365
2366 if (!skb)
2367 return;
2368 2438
2369 if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH, 0) < 0) { 2439static void neigh_app_notify(struct neighbour *n)
2370 kfree_skb(skb); 2440{
2371 return; 2441 __neigh_notify(n, RTM_NEWNEIGH, 0);
2372 }
2373 nlh = (struct nlmsghdr *)skb->data;
2374 NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH;
2375 netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
2376} 2442}
2377 2443
2378#endif /* CONFIG_ARPD */ 2444#endif /* CONFIG_ARPD */
@@ -2386,7 +2452,7 @@ static struct neigh_sysctl_table {
2386 ctl_table neigh_neigh_dir[2]; 2452 ctl_table neigh_neigh_dir[2];
2387 ctl_table neigh_proto_dir[2]; 2453 ctl_table neigh_proto_dir[2];
2388 ctl_table neigh_root_dir[2]; 2454 ctl_table neigh_root_dir[2];
2389} neigh_sysctl_template = { 2455} neigh_sysctl_template __read_mostly = {
2390 .neigh_vars = { 2456 .neigh_vars = {
2391 { 2457 {
2392 .ctl_name = NET_NEIGH_MCAST_SOLICIT, 2458 .ctl_name = NET_NEIGH_MCAST_SOLICIT,
@@ -2659,7 +2725,6 @@ void neigh_sysctl_unregister(struct neigh_parms *p)
2659#endif /* CONFIG_SYSCTL */ 2725#endif /* CONFIG_SYSCTL */
2660 2726
2661EXPORT_SYMBOL(__neigh_event_send); 2727EXPORT_SYMBOL(__neigh_event_send);
2662EXPORT_SYMBOL(neigh_add);
2663EXPORT_SYMBOL(neigh_changeaddr); 2728EXPORT_SYMBOL(neigh_changeaddr);
2664EXPORT_SYMBOL(neigh_compat_output); 2729EXPORT_SYMBOL(neigh_compat_output);
2665EXPORT_SYMBOL(neigh_connected_output); 2730EXPORT_SYMBOL(neigh_connected_output);
@@ -2679,11 +2744,8 @@ EXPORT_SYMBOL(neigh_table_clear);
2679EXPORT_SYMBOL(neigh_table_init); 2744EXPORT_SYMBOL(neigh_table_init);
2680EXPORT_SYMBOL(neigh_table_init_no_netlink); 2745EXPORT_SYMBOL(neigh_table_init_no_netlink);
2681EXPORT_SYMBOL(neigh_update); 2746EXPORT_SYMBOL(neigh_update);
2682EXPORT_SYMBOL(neigh_update_hhs);
2683EXPORT_SYMBOL(pneigh_enqueue); 2747EXPORT_SYMBOL(pneigh_enqueue);
2684EXPORT_SYMBOL(pneigh_lookup); 2748EXPORT_SYMBOL(pneigh_lookup);
2685EXPORT_SYMBOL(neightbl_dump_info);
2686EXPORT_SYMBOL(neightbl_set);
2687 2749
2688#ifdef CONFIG_ARPD 2750#ifdef CONFIG_ARPD
2689EXPORT_SYMBOL(neigh_app_ns); 2751EXPORT_SYMBOL(neigh_app_ns);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 471da451cd48..ead5920c26d6 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -110,7 +110,7 @@ static int checksum_udp(struct sk_buff *skb, struct udphdr *uh,
110 110
111 psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); 111 psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
112 112
113 if (skb->ip_summed == CHECKSUM_HW && 113 if (skb->ip_summed == CHECKSUM_COMPLETE &&
114 !(u16)csum_fold(csum_add(psum, skb->csum))) 114 !(u16)csum_fold(csum_add(psum, skb->csum)))
115 return 0; 115 return 0;
116 116
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 6a7320b39ed0..72145d4a2600 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1786,7 +1786,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
1786 * use ipv6_get_lladdr if/when it's get exported 1786 * use ipv6_get_lladdr if/when it's get exported
1787 */ 1787 */
1788 1788
1789 read_lock(&addrconf_lock); 1789 rcu_read_lock();
1790 if ((idev = __in6_dev_get(pkt_dev->odev)) != NULL) { 1790 if ((idev = __in6_dev_get(pkt_dev->odev)) != NULL) {
1791 struct inet6_ifaddr *ifp; 1791 struct inet6_ifaddr *ifp;
1792 1792
@@ -1805,7 +1805,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
1805 } 1805 }
1806 read_unlock_bh(&idev->lock); 1806 read_unlock_bh(&idev->lock);
1807 } 1807 }
1808 read_unlock(&addrconf_lock); 1808 rcu_read_unlock();
1809 if (err) 1809 if (err)
1810 printk("pktgen: ERROR: IPv6 link address not availble.\n"); 1810 printk("pktgen: ERROR: IPv6 link address not availble.\n");
1811 } 1811 }
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 30cc1ba6ed5c..d8e25e08cb7e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -35,6 +35,7 @@
35#include <linux/init.h> 35#include <linux/init.h>
36#include <linux/security.h> 36#include <linux/security.h>
37#include <linux/mutex.h> 37#include <linux/mutex.h>
38#include <linux/if_addr.h>
38 39
39#include <asm/uaccess.h> 40#include <asm/uaccess.h>
40#include <asm/system.h> 41#include <asm/system.h>
@@ -49,6 +50,7 @@
49#include <net/udp.h> 50#include <net/udp.h>
50#include <net/sock.h> 51#include <net/sock.h>
51#include <net/pkt_sched.h> 52#include <net/pkt_sched.h>
53#include <net/fib_rules.h>
52#include <net/netlink.h> 54#include <net/netlink.h>
53#ifdef CONFIG_NET_WIRELESS_RTNETLINK 55#ifdef CONFIG_NET_WIRELESS_RTNETLINK
54#include <linux/wireless.h> 56#include <linux/wireless.h>
@@ -56,6 +58,7 @@
56#endif /* CONFIG_NET_WIRELESS_RTNETLINK */ 58#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
57 59
58static DEFINE_MUTEX(rtnl_mutex); 60static DEFINE_MUTEX(rtnl_mutex);
61static struct sock *rtnl;
59 62
60void rtnl_lock(void) 63void rtnl_lock(void)
61{ 64{
@@ -93,8 +96,6 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
93 return 0; 96 return 0;
94} 97}
95 98
96struct sock *rtnl;
97
98struct rtnetlink_link * rtnetlink_links[NPROTO]; 99struct rtnetlink_link * rtnetlink_links[NPROTO];
99 100
100static const int rtm_min[RTM_NR_FAMILIES] = 101static const int rtm_min[RTM_NR_FAMILIES] =
@@ -102,8 +103,7 @@ static const int rtm_min[RTM_NR_FAMILIES] =
102 [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 103 [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
103 [RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)), 104 [RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
104 [RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), 105 [RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)),
105 [RTM_FAM(RTM_NEWNEIGH)] = NLMSG_LENGTH(sizeof(struct ndmsg)), 106 [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)),
106 [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct rtmsg)),
107 [RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)), 107 [RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)),
108 [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)), 108 [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)),
109 [RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)), 109 [RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)),
@@ -111,7 +111,6 @@ static const int rtm_min[RTM_NR_FAMILIES] =
111 [RTM_FAM(RTM_NEWPREFIX)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), 111 [RTM_FAM(RTM_NEWPREFIX)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
112 [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), 112 [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
113 [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), 113 [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
114 [RTM_FAM(RTM_NEWNEIGHTBL)] = NLMSG_LENGTH(sizeof(struct ndtmsg)),
115}; 114};
116 115
117static const int rta_max[RTM_NR_FAMILIES] = 116static const int rta_max[RTM_NR_FAMILIES] =
@@ -119,13 +118,11 @@ static const int rta_max[RTM_NR_FAMILIES] =
119 [RTM_FAM(RTM_NEWLINK)] = IFLA_MAX, 118 [RTM_FAM(RTM_NEWLINK)] = IFLA_MAX,
120 [RTM_FAM(RTM_NEWADDR)] = IFA_MAX, 119 [RTM_FAM(RTM_NEWADDR)] = IFA_MAX,
121 [RTM_FAM(RTM_NEWROUTE)] = RTA_MAX, 120 [RTM_FAM(RTM_NEWROUTE)] = RTA_MAX,
122 [RTM_FAM(RTM_NEWNEIGH)] = NDA_MAX, 121 [RTM_FAM(RTM_NEWRULE)] = FRA_MAX,
123 [RTM_FAM(RTM_NEWRULE)] = RTA_MAX,
124 [RTM_FAM(RTM_NEWQDISC)] = TCA_MAX, 122 [RTM_FAM(RTM_NEWQDISC)] = TCA_MAX,
125 [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX, 123 [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX,
126 [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX, 124 [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX,
127 [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX, 125 [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX,
128 [RTM_FAM(RTM_NEWNEIGHTBL)] = NDTA_MAX,
129}; 126};
130 127
131void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data) 128void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
@@ -168,24 +165,52 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
168 return err; 165 return err;
169} 166}
170 167
168int rtnl_unicast(struct sk_buff *skb, u32 pid)
169{
170 return nlmsg_unicast(rtnl, skb, pid);
171}
172
173int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
174 struct nlmsghdr *nlh, gfp_t flags)
175{
176 int report = 0;
177
178 if (nlh)
179 report = nlmsg_report(nlh);
180
181 return nlmsg_notify(rtnl, skb, pid, group, report, flags);
182}
183
184void rtnl_set_sk_err(u32 group, int error)
185{
186 netlink_set_err(rtnl, 0, group, error);
187}
188
171int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) 189int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
172{ 190{
173 struct rtattr *mx = (struct rtattr*)skb->tail; 191 struct nlattr *mx;
174 int i; 192 int i, valid = 0;
193
194 mx = nla_nest_start(skb, RTA_METRICS);
195 if (mx == NULL)
196 return -ENOBUFS;
197
198 for (i = 0; i < RTAX_MAX; i++) {
199 if (metrics[i]) {
200 valid++;
201 NLA_PUT_U32(skb, i+1, metrics[i]);
202 }
203 }
175 204
176 RTA_PUT(skb, RTA_METRICS, 0, NULL); 205 if (!valid) {
177 for (i=0; i<RTAX_MAX; i++) { 206 nla_nest_cancel(skb, mx);
178 if (metrics[i]) 207 return 0;
179 RTA_PUT(skb, i+1, sizeof(u32), metrics+i);
180 } 208 }
181 mx->rta_len = skb->tail - (u8*)mx;
182 if (mx->rta_len == RTA_LENGTH(0))
183 skb_trim(skb, (u8*)mx - skb->data);
184 return 0;
185 209
186rtattr_failure: 210 return nla_nest_end(skb, mx);
187 skb_trim(skb, (u8*)mx - skb->data); 211
188 return -1; 212nla_put_failure:
213 return nla_nest_cancel(skb, mx);
189} 214}
190 215
191 216
@@ -216,41 +241,73 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
216 } 241 }
217} 242}
218 243
219static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, 244static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
220 int type, u32 pid, u32 seq, u32 change, 245 struct net_device_stats *b)
221 unsigned int flags)
222{ 246{
223 struct ifinfomsg *r; 247 a->rx_packets = b->rx_packets;
224 struct nlmsghdr *nlh; 248 a->tx_packets = b->tx_packets;
225 unsigned char *b = skb->tail; 249 a->rx_bytes = b->rx_bytes;
226 250 a->tx_bytes = b->tx_bytes;
227 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*r), flags); 251 a->rx_errors = b->rx_errors;
228 r = NLMSG_DATA(nlh); 252 a->tx_errors = b->tx_errors;
229 r->ifi_family = AF_UNSPEC; 253 a->rx_dropped = b->rx_dropped;
230 r->__ifi_pad = 0; 254 a->tx_dropped = b->tx_dropped;
231 r->ifi_type = dev->type; 255
232 r->ifi_index = dev->ifindex; 256 a->multicast = b->multicast;
233 r->ifi_flags = dev_get_flags(dev); 257 a->collisions = b->collisions;
234 r->ifi_change = change; 258
235 259 a->rx_length_errors = b->rx_length_errors;
236 RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name); 260 a->rx_over_errors = b->rx_over_errors;
237 261 a->rx_crc_errors = b->rx_crc_errors;
238 if (1) { 262 a->rx_frame_errors = b->rx_frame_errors;
239 u32 txqlen = dev->tx_queue_len; 263 a->rx_fifo_errors = b->rx_fifo_errors;
240 RTA_PUT(skb, IFLA_TXQLEN, sizeof(txqlen), &txqlen); 264 a->rx_missed_errors = b->rx_missed_errors;
241 } 265
266 a->tx_aborted_errors = b->tx_aborted_errors;
267 a->tx_carrier_errors = b->tx_carrier_errors;
268 a->tx_fifo_errors = b->tx_fifo_errors;
269 a->tx_heartbeat_errors = b->tx_heartbeat_errors;
270 a->tx_window_errors = b->tx_window_errors;
271
272 a->rx_compressed = b->rx_compressed;
273 a->tx_compressed = b->tx_compressed;
274};
242 275
243 if (1) { 276static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
244 u32 weight = dev->weight; 277 void *iwbuf, int iwbuflen, int type, u32 pid,
245 RTA_PUT(skb, IFLA_WEIGHT, sizeof(weight), &weight); 278 u32 seq, u32 change, unsigned int flags)
246 } 279{
280 struct ifinfomsg *ifm;
281 struct nlmsghdr *nlh;
282
283 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
284 if (nlh == NULL)
285 return -ENOBUFS;
286
287 ifm = nlmsg_data(nlh);
288 ifm->ifi_family = AF_UNSPEC;
289 ifm->__ifi_pad = 0;
290 ifm->ifi_type = dev->type;
291 ifm->ifi_index = dev->ifindex;
292 ifm->ifi_flags = dev_get_flags(dev);
293 ifm->ifi_change = change;
294
295 NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
296 NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len);
297 NLA_PUT_U32(skb, IFLA_WEIGHT, dev->weight);
298 NLA_PUT_U8(skb, IFLA_OPERSTATE,
299 netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
300 NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode);
301 NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
302
303 if (dev->ifindex != dev->iflink)
304 NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
305
306 if (dev->master)
307 NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex);
247 308
248 if (1) { 309 if (dev->qdisc_sleeping)
249 u8 operstate = netif_running(dev)?dev->operstate:IF_OPER_DOWN; 310 NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc_sleeping->ops->id);
250 u8 link_mode = dev->link_mode;
251 RTA_PUT(skb, IFLA_OPERSTATE, sizeof(operstate), &operstate);
252 RTA_PUT(skb, IFLA_LINKMODE, sizeof(link_mode), &link_mode);
253 }
254 311
255 if (1) { 312 if (1) {
256 struct rtnl_link_ifmap map = { 313 struct rtnl_link_ifmap map = {
@@ -261,58 +318,38 @@ static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
261 .dma = dev->dma, 318 .dma = dev->dma,
262 .port = dev->if_port, 319 .port = dev->if_port,
263 }; 320 };
264 RTA_PUT(skb, IFLA_MAP, sizeof(map), &map); 321 NLA_PUT(skb, IFLA_MAP, sizeof(map), &map);
265 } 322 }
266 323
267 if (dev->addr_len) { 324 if (dev->addr_len) {
268 RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); 325 NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
269 RTA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast); 326 NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
270 }
271
272 if (1) {
273 u32 mtu = dev->mtu;
274 RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu);
275 }
276
277 if (dev->ifindex != dev->iflink) {
278 u32 iflink = dev->iflink;
279 RTA_PUT(skb, IFLA_LINK, sizeof(iflink), &iflink);
280 }
281
282 if (dev->qdisc_sleeping)
283 RTA_PUT(skb, IFLA_QDISC,
284 strlen(dev->qdisc_sleeping->ops->id) + 1,
285 dev->qdisc_sleeping->ops->id);
286
287 if (dev->master) {
288 u32 master = dev->master->ifindex;
289 RTA_PUT(skb, IFLA_MASTER, sizeof(master), &master);
290 } 327 }
291 328
292 if (dev->get_stats) { 329 if (dev->get_stats) {
293 unsigned long *stats = (unsigned long*)dev->get_stats(dev); 330 struct net_device_stats *stats = dev->get_stats(dev);
294 if (stats) { 331 if (stats) {
295 struct rtattr *a; 332 struct nlattr *attr;
296 __u32 *s; 333
297 int i; 334 attr = nla_reserve(skb, IFLA_STATS,
298 int n = sizeof(struct rtnl_link_stats)/4; 335 sizeof(struct rtnl_link_stats));
299 336 if (attr == NULL)
300 a = __RTA_PUT(skb, IFLA_STATS, n*4); 337 goto nla_put_failure;
301 s = RTA_DATA(a); 338
302 for (i=0; i<n; i++) 339 copy_rtnl_link_stats(nla_data(attr), stats);
303 s[i] = stats[i];
304 } 340 }
305 } 341 }
306 nlh->nlmsg_len = skb->tail - b;
307 return skb->len;
308 342
309nlmsg_failure: 343 if (iwbuf)
310rtattr_failure: 344 NLA_PUT(skb, IFLA_WIRELESS, iwbuflen, iwbuf);
311 skb_trim(skb, b - skb->data); 345
312 return -1; 346 return nlmsg_end(skb, nlh);
347
348nla_put_failure:
349 return nlmsg_cancel(skb, nlh);
313} 350}
314 351
315static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) 352static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
316{ 353{
317 int idx; 354 int idx;
318 int s_idx = cb->args[0]; 355 int s_idx = cb->args[0];
@@ -322,10 +359,9 @@ static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *c
322 for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { 359 for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
323 if (idx < s_idx) 360 if (idx < s_idx)
324 continue; 361 continue;
325 if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, 362 if (rtnl_fill_ifinfo(skb, dev, NULL, 0, RTM_NEWLINK,
326 NETLINK_CB(cb->skb).pid, 363 NETLINK_CB(cb->skb).pid,
327 cb->nlh->nlmsg_seq, 0, 364 cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0)
328 NLM_F_MULTI) <= 0)
329 break; 365 break;
330 } 366 }
331 read_unlock(&dev_base_lock); 367 read_unlock(&dev_base_lock);
@@ -334,52 +370,70 @@ static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *c
334 return skb->len; 370 return skb->len;
335} 371}
336 372
337static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 373static struct nla_policy ifla_policy[IFLA_MAX+1] __read_mostly = {
374 [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 },
375 [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) },
376 [IFLA_MTU] = { .type = NLA_U32 },
377 [IFLA_TXQLEN] = { .type = NLA_U32 },
378 [IFLA_WEIGHT] = { .type = NLA_U32 },
379 [IFLA_OPERSTATE] = { .type = NLA_U8 },
380 [IFLA_LINKMODE] = { .type = NLA_U8 },
381};
382
383static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
338{ 384{
339 struct ifinfomsg *ifm = NLMSG_DATA(nlh); 385 struct ifinfomsg *ifm;
340 struct rtattr **ida = arg;
341 struct net_device *dev; 386 struct net_device *dev;
342 int err, send_addr_notify = 0; 387 int err, send_addr_notify = 0, modified = 0;
388 struct nlattr *tb[IFLA_MAX+1];
389 char ifname[IFNAMSIZ];
390
391 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
392 if (err < 0)
393 goto errout;
394
395 if (tb[IFLA_IFNAME])
396 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
397 else
398 ifname[0] = '\0';
343 399
400 err = -EINVAL;
401 ifm = nlmsg_data(nlh);
344 if (ifm->ifi_index >= 0) 402 if (ifm->ifi_index >= 0)
345 dev = dev_get_by_index(ifm->ifi_index); 403 dev = dev_get_by_index(ifm->ifi_index);
346 else if (ida[IFLA_IFNAME - 1]) { 404 else if (tb[IFLA_IFNAME])
347 char ifname[IFNAMSIZ];
348
349 if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1],
350 IFNAMSIZ) >= IFNAMSIZ)
351 return -EINVAL;
352 dev = dev_get_by_name(ifname); 405 dev = dev_get_by_name(ifname);
353 } else 406 else
354 return -EINVAL; 407 goto errout;
355 408
356 if (!dev) 409 if (dev == NULL) {
357 return -ENODEV; 410 err = -ENODEV;
411 goto errout;
412 }
358 413
359 err = -EINVAL; 414 if (tb[IFLA_ADDRESS] &&
415 nla_len(tb[IFLA_ADDRESS]) < dev->addr_len)
416 goto errout_dev;
360 417
361 if (ifm->ifi_flags) 418 if (tb[IFLA_BROADCAST] &&
362 dev_change_flags(dev, ifm->ifi_flags); 419 nla_len(tb[IFLA_BROADCAST]) < dev->addr_len)
420 goto errout_dev;
363 421
364 if (ida[IFLA_MAP - 1]) { 422 if (tb[IFLA_MAP]) {
365 struct rtnl_link_ifmap *u_map; 423 struct rtnl_link_ifmap *u_map;
366 struct ifmap k_map; 424 struct ifmap k_map;
367 425
368 if (!dev->set_config) { 426 if (!dev->set_config) {
369 err = -EOPNOTSUPP; 427 err = -EOPNOTSUPP;
370 goto out; 428 goto errout_dev;
371 } 429 }
372 430
373 if (!netif_device_present(dev)) { 431 if (!netif_device_present(dev)) {
374 err = -ENODEV; 432 err = -ENODEV;
375 goto out; 433 goto errout_dev;
376 } 434 }
377
378 if (ida[IFLA_MAP - 1]->rta_len != RTA_LENGTH(sizeof(*u_map)))
379 goto out;
380
381 u_map = RTA_DATA(ida[IFLA_MAP - 1]);
382 435
436 u_map = nla_data(tb[IFLA_MAP]);
383 k_map.mem_start = (unsigned long) u_map->mem_start; 437 k_map.mem_start = (unsigned long) u_map->mem_start;
384 k_map.mem_end = (unsigned long) u_map->mem_end; 438 k_map.mem_end = (unsigned long) u_map->mem_end;
385 k_map.base_addr = (unsigned short) u_map->base_addr; 439 k_map.base_addr = (unsigned short) u_map->base_addr;
@@ -388,200 +442,175 @@ static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
388 k_map.port = (unsigned char) u_map->port; 442 k_map.port = (unsigned char) u_map->port;
389 443
390 err = dev->set_config(dev, &k_map); 444 err = dev->set_config(dev, &k_map);
445 if (err < 0)
446 goto errout_dev;
391 447
392 if (err) 448 modified = 1;
393 goto out;
394 } 449 }
395 450
396 if (ida[IFLA_ADDRESS - 1]) { 451 if (tb[IFLA_ADDRESS]) {
397 struct sockaddr *sa; 452 struct sockaddr *sa;
398 int len; 453 int len;
399 454
400 if (!dev->set_mac_address) { 455 if (!dev->set_mac_address) {
401 err = -EOPNOTSUPP; 456 err = -EOPNOTSUPP;
402 goto out; 457 goto errout_dev;
403 } 458 }
459
404 if (!netif_device_present(dev)) { 460 if (!netif_device_present(dev)) {
405 err = -ENODEV; 461 err = -ENODEV;
406 goto out; 462 goto errout_dev;
407 } 463 }
408 if (ida[IFLA_ADDRESS - 1]->rta_len != RTA_LENGTH(dev->addr_len))
409 goto out;
410 464
411 len = sizeof(sa_family_t) + dev->addr_len; 465 len = sizeof(sa_family_t) + dev->addr_len;
412 sa = kmalloc(len, GFP_KERNEL); 466 sa = kmalloc(len, GFP_KERNEL);
413 if (!sa) { 467 if (!sa) {
414 err = -ENOMEM; 468 err = -ENOMEM;
415 goto out; 469 goto errout_dev;
416 } 470 }
417 sa->sa_family = dev->type; 471 sa->sa_family = dev->type;
418 memcpy(sa->sa_data, RTA_DATA(ida[IFLA_ADDRESS - 1]), 472 memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
419 dev->addr_len); 473 dev->addr_len);
420 err = dev->set_mac_address(dev, sa); 474 err = dev->set_mac_address(dev, sa);
421 kfree(sa); 475 kfree(sa);
422 if (err) 476 if (err)
423 goto out; 477 goto errout_dev;
424 send_addr_notify = 1; 478 send_addr_notify = 1;
479 modified = 1;
425 } 480 }
426 481
427 if (ida[IFLA_BROADCAST - 1]) { 482 if (tb[IFLA_MTU]) {
428 if (ida[IFLA_BROADCAST - 1]->rta_len != RTA_LENGTH(dev->addr_len)) 483 err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
429 goto out; 484 if (err < 0)
430 memcpy(dev->broadcast, RTA_DATA(ida[IFLA_BROADCAST - 1]), 485 goto errout_dev;
431 dev->addr_len); 486 modified = 1;
432 send_addr_notify = 1;
433 } 487 }
434 488
435 if (ida[IFLA_MTU - 1]) { 489 /*
436 if (ida[IFLA_MTU - 1]->rta_len != RTA_LENGTH(sizeof(u32))) 490 * Interface selected by interface index but interface
437 goto out; 491 * name provided implies that a name change has been
438 err = dev_set_mtu(dev, *((u32 *) RTA_DATA(ida[IFLA_MTU - 1]))); 492 * requested.
439 493 */
440 if (err) 494 if (ifm->ifi_index >= 0 && ifname[0]) {
441 goto out; 495 err = dev_change_name(dev, ifname);
442 496 if (err < 0)
497 goto errout_dev;
498 modified = 1;
443 } 499 }
444 500
445 if (ida[IFLA_TXQLEN - 1]) { 501#ifdef CONFIG_NET_WIRELESS_RTNETLINK
446 if (ida[IFLA_TXQLEN - 1]->rta_len != RTA_LENGTH(sizeof(u32))) 502 if (tb[IFLA_WIRELESS]) {
447 goto out; 503 /* Call Wireless Extensions.
504 * Various stuff checked in there... */
505 err = wireless_rtnetlink_set(dev, nla_data(tb[IFLA_WIRELESS]),
506 nla_len(tb[IFLA_WIRELESS]));
507 if (err < 0)
508 goto errout_dev;
509 }
510#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
448 511
449 dev->tx_queue_len = *((u32 *) RTA_DATA(ida[IFLA_TXQLEN - 1])); 512 if (tb[IFLA_BROADCAST]) {
513 nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len);
514 send_addr_notify = 1;
450 } 515 }
451 516
452 if (ida[IFLA_WEIGHT - 1]) {
453 if (ida[IFLA_WEIGHT - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
454 goto out;
455 517
456 dev->weight = *((u32 *) RTA_DATA(ida[IFLA_WEIGHT - 1])); 518 if (ifm->ifi_flags)
457 } 519 dev_change_flags(dev, ifm->ifi_flags);
458 520
459 if (ida[IFLA_OPERSTATE - 1]) { 521 if (tb[IFLA_TXQLEN])
460 if (ida[IFLA_OPERSTATE - 1]->rta_len != RTA_LENGTH(sizeof(u8))) 522 dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
461 goto out;
462 523
463 set_operstate(dev, *((u8 *) RTA_DATA(ida[IFLA_OPERSTATE - 1]))); 524 if (tb[IFLA_WEIGHT])
464 } 525 dev->weight = nla_get_u32(tb[IFLA_WEIGHT]);
465 526
466 if (ida[IFLA_LINKMODE - 1]) { 527 if (tb[IFLA_OPERSTATE])
467 if (ida[IFLA_LINKMODE - 1]->rta_len != RTA_LENGTH(sizeof(u8))) 528 set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
468 goto out;
469 529
530 if (tb[IFLA_LINKMODE]) {
470 write_lock_bh(&dev_base_lock); 531 write_lock_bh(&dev_base_lock);
471 dev->link_mode = *((u8 *) RTA_DATA(ida[IFLA_LINKMODE - 1])); 532 dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
472 write_unlock_bh(&dev_base_lock); 533 write_unlock_bh(&dev_base_lock);
473 } 534 }
474 535
475 if (ifm->ifi_index >= 0 && ida[IFLA_IFNAME - 1]) {
476 char ifname[IFNAMSIZ];
477
478 if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1],
479 IFNAMSIZ) >= IFNAMSIZ)
480 goto out;
481 err = dev_change_name(dev, ifname);
482 if (err)
483 goto out;
484 }
485
486#ifdef CONFIG_NET_WIRELESS_RTNETLINK
487 if (ida[IFLA_WIRELESS - 1]) {
488
489 /* Call Wireless Extensions.
490 * Various stuff checked in there... */
491 err = wireless_rtnetlink_set(dev, RTA_DATA(ida[IFLA_WIRELESS - 1]), ida[IFLA_WIRELESS - 1]->rta_len);
492 if (err)
493 goto out;
494 }
495#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
496
497 err = 0; 536 err = 0;
498 537
499out: 538errout_dev:
539 if (err < 0 && modified && net_ratelimit())
540 printk(KERN_WARNING "A link change request failed with "
541 "some changes comitted already. Interface %s may "
542 "have been left with an inconsistent configuration, "
543 "please check.\n", dev->name);
544
500 if (send_addr_notify) 545 if (send_addr_notify)
501 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); 546 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
502 547
503 dev_put(dev); 548 dev_put(dev);
549errout:
504 return err; 550 return err;
505} 551}
506 552
507#ifdef CONFIG_NET_WIRELESS_RTNETLINK 553static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
508static int do_getlink(struct sk_buff *in_skb, struct nlmsghdr* in_nlh, void *arg)
509{ 554{
510 struct ifinfomsg *ifm = NLMSG_DATA(in_nlh); 555 struct ifinfomsg *ifm;
511 struct rtattr **ida = arg; 556 struct nlattr *tb[IFLA_MAX+1];
512 struct net_device *dev; 557 struct net_device *dev = NULL;
513 struct ifinfomsg *r; 558 struct sk_buff *nskb;
514 struct nlmsghdr *nlh; 559 char *iw_buf = NULL, *iw = NULL;
515 int err = -ENOBUFS;
516 struct sk_buff *skb;
517 unsigned char *b;
518 char *iw_buf = NULL;
519 int iw_buf_len = 0; 560 int iw_buf_len = 0;
561 int err, payload;
520 562
521 if (ifm->ifi_index >= 0) 563 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
564 if (err < 0)
565 goto errout;
566
567 ifm = nlmsg_data(nlh);
568 if (ifm->ifi_index >= 0) {
522 dev = dev_get_by_index(ifm->ifi_index); 569 dev = dev_get_by_index(ifm->ifi_index);
523 else 570 if (dev == NULL)
571 return -ENODEV;
572 } else
524 return -EINVAL; 573 return -EINVAL;
525 if (!dev)
526 return -ENODEV;
527 574
528#ifdef CONFIG_NET_WIRELESS_RTNETLINK
529 if (ida[IFLA_WIRELESS - 1]) {
530 575
576#ifdef CONFIG_NET_WIRELESS_RTNETLINK
577 if (tb[IFLA_WIRELESS]) {
531 /* Call Wireless Extensions. We need to know the size before 578 /* Call Wireless Extensions. We need to know the size before
532 * we can alloc. Various stuff checked in there... */ 579 * we can alloc. Various stuff checked in there... */
533 err = wireless_rtnetlink_get(dev, RTA_DATA(ida[IFLA_WIRELESS - 1]), ida[IFLA_WIRELESS - 1]->rta_len, &iw_buf, &iw_buf_len); 580 err = wireless_rtnetlink_get(dev, nla_data(tb[IFLA_WIRELESS]),
534 if (err) 581 nla_len(tb[IFLA_WIRELESS]),
535 goto out; 582 &iw_buf, &iw_buf_len);
583 if (err < 0)
584 goto errout;
585
586 iw += IW_EV_POINT_OFF;
536 } 587 }
537#endif /* CONFIG_NET_WIRELESS_RTNETLINK */ 588#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
538 589
539 /* Create a skb big enough to include all the data. 590 payload = NLMSG_ALIGN(sizeof(struct ifinfomsg) +
540 * Some requests are way bigger than 4k... Jean II */ 591 nla_total_size(iw_buf_len));
541 skb = alloc_skb((NLMSG_LENGTH(sizeof(*r))) + (RTA_SPACE(iw_buf_len)), 592 nskb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL);
542 GFP_KERNEL); 593 if (nskb == NULL) {
543 if (!skb) 594 err = -ENOBUFS;
544 goto out; 595 goto errout;
545 b = skb->tail; 596 }
546 597
547 /* Put in the message the usual good stuff */ 598 err = rtnl_fill_ifinfo(nskb, dev, iw, iw_buf_len, RTM_NEWLINK,
548 nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, in_nlh->nlmsg_seq, 599 NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0);
549 RTM_NEWLINK, sizeof(*r)); 600 if (err <= 0) {
550 r = NLMSG_DATA(nlh); 601 kfree_skb(nskb);
551 r->ifi_family = AF_UNSPEC; 602 goto errout;
552 r->__ifi_pad = 0; 603 }
553 r->ifi_type = dev->type; 604
554 r->ifi_index = dev->ifindex; 605 err = rtnl_unicast(skb, NETLINK_CB(skb).pid);
555 r->ifi_flags = dev->flags; 606errout:
556 r->ifi_change = 0; 607 kfree(iw_buf);
557
558 /* Put the wireless payload if it exist */
559 if(iw_buf != NULL)
560 RTA_PUT(skb, IFLA_WIRELESS, iw_buf_len,
561 iw_buf + IW_EV_POINT_OFF);
562
563 nlh->nlmsg_len = skb->tail - b;
564
565 /* Needed ? */
566 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
567
568 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
569 if (err > 0)
570 err = 0;
571out:
572 if(iw_buf != NULL)
573 kfree(iw_buf);
574 dev_put(dev); 608 dev_put(dev);
575 return err;
576 609
577rtattr_failure: 610 return err;
578nlmsg_failure:
579 kfree_skb(skb);
580 goto out;
581} 611}
582#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
583 612
584static int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb) 613static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
585{ 614{
586 int idx; 615 int idx;
587 int s_idx = cb->family; 616 int s_idx = cb->family;
@@ -608,20 +637,22 @@ static int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
608void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) 637void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
609{ 638{
610 struct sk_buff *skb; 639 struct sk_buff *skb;
611 int size = NLMSG_SPACE(sizeof(struct ifinfomsg) + 640 int err = -ENOBUFS;
612 sizeof(struct rtnl_link_ifmap) +
613 sizeof(struct rtnl_link_stats) + 128);
614 641
615 skb = alloc_skb(size, GFP_KERNEL); 642 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
616 if (!skb) 643 if (skb == NULL)
617 return; 644 goto errout;
618 645
619 if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change, 0) < 0) { 646 err = rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0);
647 if (err < 0) {
620 kfree_skb(skb); 648 kfree_skb(skb);
621 return; 649 goto errout;
622 } 650 }
623 NETLINK_CB(skb).dst_group = RTNLGRP_LINK; 651
624 netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL); 652 err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
653errout:
654 if (err < 0)
655 rtnl_set_sk_err(RTNLGRP_LINK, err);
625} 656}
626 657
627/* Protected by RTNL sempahore. */ 658/* Protected by RTNL sempahore. */
@@ -746,18 +777,19 @@ static void rtnetlink_rcv(struct sock *sk, int len)
746 777
747static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] = 778static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
748{ 779{
749 [RTM_GETLINK - RTM_BASE] = { 780 [RTM_GETLINK - RTM_BASE] = { .doit = rtnl_getlink,
750#ifdef CONFIG_NET_WIRELESS_RTNETLINK 781 .dumpit = rtnl_dump_ifinfo },
751 .doit = do_getlink, 782 [RTM_SETLINK - RTM_BASE] = { .doit = rtnl_setlink },
752#endif /* CONFIG_NET_WIRELESS_RTNETLINK */ 783 [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnl_dump_all },
753 .dumpit = rtnetlink_dump_ifinfo }, 784 [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnl_dump_all },
754 [RTM_SETLINK - RTM_BASE] = { .doit = do_setlink },
755 [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnetlink_dump_all },
756 [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnetlink_dump_all },
757 [RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add }, 785 [RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add },
758 [RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete }, 786 [RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete },
759 [RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info }, 787 [RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info },
760 [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, 788#ifdef CONFIG_FIB_RULES
789 [RTM_NEWRULE - RTM_BASE] = { .doit = fib_nl_newrule },
790 [RTM_DELRULE - RTM_BASE] = { .doit = fib_nl_delrule },
791#endif
792 [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnl_dump_all },
761 [RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info }, 793 [RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info },
762 [RTM_SETNEIGHTBL - RTM_BASE] = { .doit = neightbl_set }, 794 [RTM_SETNEIGHTBL - RTM_BASE] = { .doit = neightbl_set },
763}; 795};
@@ -817,7 +849,9 @@ EXPORT_SYMBOL(rtattr_strlcpy);
817EXPORT_SYMBOL(rtattr_parse); 849EXPORT_SYMBOL(rtattr_parse);
818EXPORT_SYMBOL(rtnetlink_links); 850EXPORT_SYMBOL(rtnetlink_links);
819EXPORT_SYMBOL(rtnetlink_put_metrics); 851EXPORT_SYMBOL(rtnetlink_put_metrics);
820EXPORT_SYMBOL(rtnl);
821EXPORT_SYMBOL(rtnl_lock); 852EXPORT_SYMBOL(rtnl_lock);
822EXPORT_SYMBOL(rtnl_trylock); 853EXPORT_SYMBOL(rtnl_trylock);
823EXPORT_SYMBOL(rtnl_unlock); 854EXPORT_SYMBOL(rtnl_unlock);
855EXPORT_SYMBOL(rtnl_unicast);
856EXPORT_SYMBOL(rtnl_notify);
857EXPORT_SYMBOL(rtnl_set_sk_err);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c54f3664bce5..c448c7f6fde2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1397,7 +1397,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
1397 unsigned int csum; 1397 unsigned int csum;
1398 long csstart; 1398 long csstart;
1399 1399
1400 if (skb->ip_summed == CHECKSUM_HW) 1400 if (skb->ip_summed == CHECKSUM_PARTIAL)
1401 csstart = skb->h.raw - skb->data; 1401 csstart = skb->h.raw - skb->data;
1402 else 1402 else
1403 csstart = skb_headlen(skb); 1403 csstart = skb_headlen(skb);
@@ -1411,7 +1411,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
1411 csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, 1411 csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
1412 skb->len - csstart, 0); 1412 skb->len - csstart, 0);
1413 1413
1414 if (skb->ip_summed == CHECKSUM_HW) { 1414 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1415 long csstuff = csstart + skb->csum; 1415 long csstuff = csstart + skb->csum;
1416 1416
1417 *((unsigned short *)(to + csstuff)) = csum_fold(csum); 1417 *((unsigned short *)(to + csstuff)) = csum_fold(csum);
@@ -1898,10 +1898,10 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
1898 * @len: length of data pulled 1898 * @len: length of data pulled
1899 * 1899 *
1900 * This function performs an skb_pull on the packet and updates 1900 * This function performs an skb_pull on the packet and updates
1901 * update the CHECKSUM_HW checksum. It should be used on receive 1901 * update the CHECKSUM_COMPLETE checksum. It should be used on
1902 * path processing instead of skb_pull unless you know that the 1902 * receive path processing instead of skb_pull unless you know
1903 * checksum difference is zero (e.g., a valid IP header) or you 1903 * that the checksum difference is zero (e.g., a valid IP header)
1904 * are setting ip_summed to CHECKSUM_NONE. 1904 * or you are setting ip_summed to CHECKSUM_NONE.
1905 */ 1905 */
1906unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) 1906unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
1907{ 1907{
@@ -1994,7 +1994,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
1994 frag = skb_shinfo(nskb)->frags; 1994 frag = skb_shinfo(nskb)->frags;
1995 k = 0; 1995 k = 0;
1996 1996
1997 nskb->ip_summed = CHECKSUM_HW; 1997 nskb->ip_summed = CHECKSUM_PARTIAL;
1998 nskb->csum = skb->csum; 1998 nskb->csum = skb->csum;
1999 memcpy(skb_put(nskb, hsize), skb->data + offset, hsize); 1999 memcpy(skb_put(nskb, hsize), skb->data + offset, hsize);
2000 2000
@@ -2046,19 +2046,14 @@ void __init skb_init(void)
2046 skbuff_head_cache = kmem_cache_create("skbuff_head_cache", 2046 skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
2047 sizeof(struct sk_buff), 2047 sizeof(struct sk_buff),
2048 0, 2048 0,
2049 SLAB_HWCACHE_ALIGN, 2049 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2050 NULL, NULL); 2050 NULL, NULL);
2051 if (!skbuff_head_cache)
2052 panic("cannot create skbuff cache");
2053
2054 skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", 2051 skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
2055 (2*sizeof(struct sk_buff)) + 2052 (2*sizeof(struct sk_buff)) +
2056 sizeof(atomic_t), 2053 sizeof(atomic_t),
2057 0, 2054 0,
2058 SLAB_HWCACHE_ALIGN, 2055 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2059 NULL, NULL); 2056 NULL, NULL);
2060 if (!skbuff_fclone_cache)
2061 panic("cannot create skbuff cache");
2062} 2057}
2063 2058
2064EXPORT_SYMBOL(___pskb_trim); 2059EXPORT_SYMBOL(___pskb_trim);
diff --git a/net/core/sock.c b/net/core/sock.c
index 51fcfbc041a7..b77e155cbe6c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -187,13 +187,13 @@ static struct lock_class_key af_callback_keys[AF_MAX];
187#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) 187#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
188 188
189/* Run time adjustable parameters. */ 189/* Run time adjustable parameters. */
190__u32 sysctl_wmem_max = SK_WMEM_MAX; 190__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
191__u32 sysctl_rmem_max = SK_RMEM_MAX; 191__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
192__u32 sysctl_wmem_default = SK_WMEM_MAX; 192__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
193__u32 sysctl_rmem_default = SK_RMEM_MAX; 193__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
194 194
195/* Maximal space eaten by iovec or ancilliary data plus some space */ 195/* Maximal space eaten by iovec or ancilliary data plus some space */
196int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512); 196int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
197 197
198static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) 198static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
199{ 199{
@@ -247,11 +247,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
247 goto out; 247 goto out;
248 } 248 }
249 249
250 /* It would be deadlock, if sock_queue_rcv_skb is used 250 err = sk_filter(sk, skb);
251 with socket lock! We assume that users of this
252 function are lock free.
253 */
254 err = sk_filter(sk, skb, 1);
255 if (err) 251 if (err)
256 goto out; 252 goto out;
257 253
@@ -278,7 +274,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb)
278{ 274{
279 int rc = NET_RX_SUCCESS; 275 int rc = NET_RX_SUCCESS;
280 276
281 if (sk_filter(sk, skb, 0)) 277 if (sk_filter(sk, skb))
282 goto discard_and_relse; 278 goto discard_and_relse;
283 279
284 skb->dev = NULL; 280 skb->dev = NULL;
@@ -606,15 +602,15 @@ set_rcvbuf:
606 break; 602 break;
607 603
608 case SO_DETACH_FILTER: 604 case SO_DETACH_FILTER:
609 spin_lock_bh(&sk->sk_lock.slock); 605 rcu_read_lock_bh();
610 filter = sk->sk_filter; 606 filter = rcu_dereference(sk->sk_filter);
611 if (filter) { 607 if (filter) {
612 sk->sk_filter = NULL; 608 rcu_assign_pointer(sk->sk_filter, NULL);
613 spin_unlock_bh(&sk->sk_lock.slock);
614 sk_filter_release(sk, filter); 609 sk_filter_release(sk, filter);
610 rcu_read_unlock_bh();
615 break; 611 break;
616 } 612 }
617 spin_unlock_bh(&sk->sk_lock.slock); 613 rcu_read_unlock_bh();
618 ret = -ENONET; 614 ret = -ENONET;
619 break; 615 break;
620 616
@@ -884,10 +880,10 @@ void sk_free(struct sock *sk)
884 if (sk->sk_destruct) 880 if (sk->sk_destruct)
885 sk->sk_destruct(sk); 881 sk->sk_destruct(sk);
886 882
887 filter = sk->sk_filter; 883 filter = rcu_dereference(sk->sk_filter);
888 if (filter) { 884 if (filter) {
889 sk_filter_release(sk, filter); 885 sk_filter_release(sk, filter);
890 sk->sk_filter = NULL; 886 rcu_assign_pointer(sk->sk_filter, NULL);
891 } 887 }
892 888
893 sock_disable_timestamp(sk); 889 sock_disable_timestamp(sk);
@@ -911,7 +907,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
911 if (newsk != NULL) { 907 if (newsk != NULL) {
912 struct sk_filter *filter; 908 struct sk_filter *filter;
913 909
914 memcpy(newsk, sk, sk->sk_prot->obj_size); 910 sock_copy(newsk, sk);
915 911
916 /* SANITY */ 912 /* SANITY */
917 sk_node_init(&newsk->sk_node); 913 sk_node_init(&newsk->sk_node);
diff --git a/net/core/utils.c b/net/core/utils.c
index e31c90e05594..2682490777de 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -4,6 +4,7 @@
4 * Authors: 4 * Authors:
5 * net_random Alan Cox 5 * net_random Alan Cox
6 * net_ratelimit Andy Kleen 6 * net_ratelimit Andy Kleen
7 * in{4,6}_pton YOSHIFUJI Hideaki, Copyright (C)2006 USAGI/WIDE Project
7 * 8 *
8 * Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 9 * Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
9 * 10 *
@@ -191,3 +192,215 @@ __be32 in_aton(const char *str)
191} 192}
192 193
193EXPORT_SYMBOL(in_aton); 194EXPORT_SYMBOL(in_aton);
195
196#define IN6PTON_XDIGIT 0x00010000
197#define IN6PTON_DIGIT 0x00020000
198#define IN6PTON_COLON_MASK 0x00700000
199#define IN6PTON_COLON_1 0x00100000 /* single : requested */
200#define IN6PTON_COLON_2 0x00200000 /* second : requested */
201#define IN6PTON_COLON_1_2 0x00400000 /* :: requested */
202#define IN6PTON_DOT 0x00800000 /* . */
203#define IN6PTON_DELIM 0x10000000
204#define IN6PTON_NULL 0x20000000 /* first/tail */
205#define IN6PTON_UNKNOWN 0x40000000
206
207static inline int digit2bin(char c, char delim)
208{
209 if (c == delim || c == '\0')
210 return IN6PTON_DELIM;
211 if (c == '.')
212 return IN6PTON_DOT;
213 if (c >= '0' && c <= '9')
214 return (IN6PTON_DIGIT | (c - '0'));
215 return IN6PTON_UNKNOWN;
216}
217
218static inline int xdigit2bin(char c, char delim)
219{
220 if (c == delim || c == '\0')
221 return IN6PTON_DELIM;
222 if (c == ':')
223 return IN6PTON_COLON_MASK;
224 if (c == '.')
225 return IN6PTON_DOT;
226 if (c >= '0' && c <= '9')
227 return (IN6PTON_XDIGIT | IN6PTON_DIGIT| (c - '0'));
228 if (c >= 'a' && c <= 'f')
229 return (IN6PTON_XDIGIT | (c - 'a' + 10));
230 if (c >= 'A' && c <= 'F')
231 return (IN6PTON_XDIGIT | (c - 'A' + 10));
232 return IN6PTON_UNKNOWN;
233}
234
235int in4_pton(const char *src, int srclen,
236 u8 *dst,
237 char delim, const char **end)
238{
239 const char *s;
240 u8 *d;
241 u8 dbuf[4];
242 int ret = 0;
243 int i;
244 int w = 0;
245
246 if (srclen < 0)
247 srclen = strlen(src);
248 s = src;
249 d = dbuf;
250 i = 0;
251 while(1) {
252 int c;
253 c = xdigit2bin(srclen > 0 ? *s : '\0', delim);
254 if (!(c & (IN6PTON_DIGIT | IN6PTON_DOT | IN6PTON_DELIM))) {
255 goto out;
256 }
257 if (c & (IN6PTON_DOT | IN6PTON_DELIM)) {
258 if (w == 0)
259 goto out;
260 *d++ = w & 0xff;
261 w = 0;
262 i++;
263 if (c & IN6PTON_DELIM) {
264 if (i != 4)
265 goto out;
266 break;
267 }
268 goto cont;
269 }
270 w = (w * 10) + c;
271 if ((w & 0xffff) > 255) {
272 goto out;
273 }
274cont:
275 if (i >= 4)
276 goto out;
277 s++;
278 srclen--;
279 }
280 ret = 1;
281 memcpy(dst, dbuf, sizeof(dbuf));
282out:
283 if (end)
284 *end = s;
285 return ret;
286}
287
288EXPORT_SYMBOL(in4_pton);
289
290int in6_pton(const char *src, int srclen,
291 u8 *dst,
292 char delim, const char **end)
293{
294 const char *s, *tok = NULL;
295 u8 *d, *dc = NULL;
296 u8 dbuf[16];
297 int ret = 0;
298 int i;
299 int state = IN6PTON_COLON_1_2 | IN6PTON_XDIGIT | IN6PTON_NULL;
300 int w = 0;
301
302 memset(dbuf, 0, sizeof(dbuf));
303
304 s = src;
305 d = dbuf;
306 if (srclen < 0)
307 srclen = strlen(src);
308
309 while (1) {
310 int c;
311
312 c = xdigit2bin(srclen > 0 ? *s : '\0', delim);
313 if (!(c & state))
314 goto out;
315 if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) {
316 /* process one 16-bit word */
317 if (!(state & IN6PTON_NULL)) {
318 *d++ = (w >> 8) & 0xff;
319 *d++ = w & 0xff;
320 }
321 w = 0;
322 if (c & IN6PTON_DELIM) {
323 /* We've processed last word */
324 break;
325 }
326 /*
327 * COLON_1 => XDIGIT
328 * COLON_2 => XDIGIT|DELIM
329 * COLON_1_2 => COLON_2
330 */
331 switch (state & IN6PTON_COLON_MASK) {
332 case IN6PTON_COLON_2:
333 dc = d;
334 state = IN6PTON_XDIGIT | IN6PTON_DELIM;
335 if (dc - dbuf >= sizeof(dbuf))
336 state |= IN6PTON_NULL;
337 break;
338 case IN6PTON_COLON_1|IN6PTON_COLON_1_2:
339 state = IN6PTON_XDIGIT | IN6PTON_COLON_2;
340 break;
341 case IN6PTON_COLON_1:
342 state = IN6PTON_XDIGIT;
343 break;
344 case IN6PTON_COLON_1_2:
345 state = IN6PTON_COLON_2;
346 break;
347 default:
348 state = 0;
349 }
350 tok = s + 1;
351 goto cont;
352 }
353
354 if (c & IN6PTON_DOT) {
355 ret = in4_pton(tok ? tok : s, srclen + (int)(s - tok), d, delim, &s);
356 if (ret > 0) {
357 d += 4;
358 break;
359 }
360 goto out;
361 }
362
363 w = (w << 4) | (0xff & c);
364 state = IN6PTON_COLON_1 | IN6PTON_DELIM;
365 if (!(w & 0xf000)) {
366 state |= IN6PTON_XDIGIT;
367 }
368 if (!dc && d + 2 < dbuf + sizeof(dbuf)) {
369 state |= IN6PTON_COLON_1_2;
370 state &= ~IN6PTON_DELIM;
371 }
372 if (d + 2 >= dbuf + sizeof(dbuf)) {
373 state &= ~(IN6PTON_COLON_1|IN6PTON_COLON_1_2);
374 }
375cont:
376 if ((dc && d + 4 < dbuf + sizeof(dbuf)) ||
377 d + 4 == dbuf + sizeof(dbuf)) {
378 state |= IN6PTON_DOT;
379 }
380 if (d >= dbuf + sizeof(dbuf)) {
381 state &= ~(IN6PTON_XDIGIT|IN6PTON_COLON_MASK);
382 }
383 s++;
384 srclen--;
385 }
386
387 i = 15; d--;
388
389 if (dc) {
390 while(d >= dc)
391 dst[i--] = *d--;
392 while(i >= dc - dbuf)
393 dst[i--] = 0;
394 while(i >= 0)
395 dst[i--] = *d--;
396 } else
397 memcpy(dst, dbuf, sizeof(dbuf));
398
399 ret = 1;
400out:
401 if (end)
402 *end = s;
403 return ret;
404}
405
406EXPORT_SYMBOL(in6_pton);
diff --git a/net/core/wireless.c b/net/core/wireless.c
index de0bde4b51dd..3168fca312f7 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -72,7 +72,6 @@
72 72
73/***************************** INCLUDES *****************************/ 73/***************************** INCLUDES *****************************/
74 74
75#include <linux/config.h> /* Not needed ??? */
76#include <linux/module.h> 75#include <linux/module.h>
77#include <linux/types.h> /* off_t */ 76#include <linux/types.h> /* off_t */
78#include <linux/netdevice.h> /* struct ifreq, dev_get_by_name() */ 77#include <linux/netdevice.h> /* struct ifreq, dev_get_by_name() */
@@ -86,6 +85,7 @@
86 85
87#include <linux/wireless.h> /* Pretty obvious */ 86#include <linux/wireless.h> /* Pretty obvious */
88#include <net/iw_handler.h> /* New driver API */ 87#include <net/iw_handler.h> /* New driver API */
88#include <net/netlink.h>
89 89
90#include <asm/uaccess.h> /* copy_to_user() */ 90#include <asm/uaccess.h> /* copy_to_user() */
91 91
@@ -1850,7 +1850,7 @@ static void wireless_nlevent_process(unsigned long data)
1850 struct sk_buff *skb; 1850 struct sk_buff *skb;
1851 1851
1852 while ((skb = skb_dequeue(&wireless_nlevent_queue))) 1852 while ((skb = skb_dequeue(&wireless_nlevent_queue)))
1853 netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC); 1853 rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
1854} 1854}
1855 1855
1856static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0); 1856static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0);
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 8c211c58893b..4d176d33983f 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -142,14 +142,13 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
142 struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority); 142 struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority);
143 143
144 if (av != NULL) { 144 if (av != NULL) {
145 av->dccpav_buf_head = 145 av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1;
146 av->dccpav_buf_tail = DCCP_MAX_ACKVEC_LEN - 1;
147 av->dccpav_buf_ackno = DCCP_MAX_SEQNO + 1; 146 av->dccpav_buf_ackno = DCCP_MAX_SEQNO + 1;
148 av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; 147 av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0;
149 av->dccpav_ack_ptr = 0; 148 av->dccpav_ack_ptr = 0;
150 av->dccpav_time.tv_sec = 0; 149 av->dccpav_time.tv_sec = 0;
151 av->dccpav_time.tv_usec = 0; 150 av->dccpav_time.tv_usec = 0;
152 av->dccpav_sent_len = av->dccpav_vec_len = 0; 151 av->dccpav_vec_len = 0;
153 INIT_LIST_HEAD(&av->dccpav_records); 152 INIT_LIST_HEAD(&av->dccpav_records);
154 } 153 }
155 154
@@ -353,11 +352,13 @@ static void dccp_ackvec_throw_record(struct dccp_ackvec *av,
353{ 352{
354 struct dccp_ackvec_record *next; 353 struct dccp_ackvec_record *next;
355 354
356 av->dccpav_buf_tail = avr->dccpavr_ack_ptr - 1; 355 /* sort out vector length */
357 if (av->dccpav_buf_tail == 0) 356 if (av->dccpav_buf_head <= avr->dccpavr_ack_ptr)
358 av->dccpav_buf_tail = DCCP_MAX_ACKVEC_LEN - 1; 357 av->dccpav_vec_len = avr->dccpavr_ack_ptr - av->dccpav_buf_head;
359 358 else
360 av->dccpav_vec_len -= avr->dccpavr_sent_len; 359 av->dccpav_vec_len = DCCP_MAX_ACKVEC_LEN - 1
360 - av->dccpav_buf_head
361 + avr->dccpavr_ack_ptr;
361 362
362 /* free records */ 363 /* free records */
363 list_for_each_entry_safe_from(avr, next, &av->dccpav_records, 364 list_for_each_entry_safe_from(avr, next, &av->dccpav_records,
@@ -434,8 +435,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
434 break; 435 break;
435found: 436found:
436 if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, ackno)) { 437 if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, ackno)) {
437 const u8 state = (*vector & 438 const u8 state = *vector & DCCP_ACKVEC_STATE_MASK;
438 DCCP_ACKVEC_STATE_MASK) >> 6;
439 if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { 439 if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
440#ifdef CONFIG_IP_DCCP_DEBUG 440#ifdef CONFIG_IP_DCCP_DEBUG
441 struct dccp_sock *dp = dccp_sk(sk); 441 struct dccp_sock *dp = dccp_sk(sk);
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 0adf4b56c34c..2424effac7f6 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -54,9 +54,7 @@ struct dccp_ackvec {
54 struct list_head dccpav_records; 54 struct list_head dccpav_records;
55 struct timeval dccpav_time; 55 struct timeval dccpav_time;
56 u8 dccpav_buf_head; 56 u8 dccpav_buf_head;
57 u8 dccpav_buf_tail;
58 u8 dccpav_ack_ptr; 57 u8 dccpav_ack_ptr;
59 u8 dccpav_sent_len;
60 u8 dccpav_vec_len; 58 u8 dccpav_vec_len;
61 u8 dccpav_buf_nonce; 59 u8 dccpav_buf_nonce;
62 u8 dccpav_ack_nonce; 60 u8 dccpav_ack_nonce;
@@ -107,7 +105,7 @@ extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb);
107 105
108static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) 106static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
109{ 107{
110 return av->dccpav_sent_len != av->dccpav_vec_len; 108 return av->dccpav_vec_len;
111} 109}
112#else /* CONFIG_IP_DCCP_ACKVEC */ 110#else /* CONFIG_IP_DCCP_ACKVEC */
113static inline int dccp_ackvec_init(void) 111static inline int dccp_ackvec_init(void)
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index ca00191628f7..32752f750447 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -30,6 +30,14 @@ config IP_DCCP_CCID2
30 30
31 If in doubt, say M. 31 If in doubt, say M.
32 32
33config IP_DCCP_CCID2_DEBUG
34 bool "CCID2 debug"
35 depends on IP_DCCP_CCID2
36 ---help---
37 Enable CCID2 debug messages.
38
39 If in doubt, say N.
40
33config IP_DCCP_CCID3 41config IP_DCCP_CCID3
34 tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)" 42 tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)"
35 depends on IP_DCCP 43 depends on IP_DCCP
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index e9615627dcd6..457dd3db7f41 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -27,7 +27,6 @@
27 * 27 *
28 * BUGS: 28 * BUGS:
29 * - sequence number wrapping 29 * - sequence number wrapping
30 * - jiffies wrapping
31 */ 30 */
32 31
33#include "../ccid.h" 32#include "../ccid.h"
@@ -36,8 +35,7 @@
36 35
37static int ccid2_debug; 36static int ccid2_debug;
38 37
39#undef CCID2_DEBUG 38#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
40#ifdef CCID2_DEBUG
41#define ccid2_pr_debug(format, a...) \ 39#define ccid2_pr_debug(format, a...) \
42 do { if (ccid2_debug) \ 40 do { if (ccid2_debug) \
43 printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \ 41 printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \
@@ -46,9 +44,7 @@ static int ccid2_debug;
46#define ccid2_pr_debug(format, a...) 44#define ccid2_pr_debug(format, a...)
47#endif 45#endif
48 46
49static const int ccid2_seq_len = 128; 47#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
50
51#ifdef CCID2_DEBUG
52static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) 48static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
53{ 49{
54 int len = 0; 50 int len = 0;
@@ -71,8 +67,8 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
71 67
72 /* packets are sent sequentially */ 68 /* packets are sent sequentially */
73 BUG_ON(seqp->ccid2s_seq <= prev->ccid2s_seq); 69 BUG_ON(seqp->ccid2s_seq <= prev->ccid2s_seq);
74 BUG_ON(seqp->ccid2s_sent < prev->ccid2s_sent); 70 BUG_ON(time_before(seqp->ccid2s_sent,
75 BUG_ON(len > ccid2_seq_len); 71 prev->ccid2s_sent));
76 72
77 seqp = prev; 73 seqp = prev;
78 } 74 }
@@ -84,16 +80,57 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
84 do { 80 do {
85 seqp = seqp->ccid2s_prev; 81 seqp = seqp->ccid2s_prev;
86 len++; 82 len++;
87 BUG_ON(len > ccid2_seq_len);
88 } while (seqp != hctx->ccid2hctx_seqh); 83 } while (seqp != hctx->ccid2hctx_seqh);
89 84
90 BUG_ON(len != ccid2_seq_len);
91 ccid2_pr_debug("total len=%d\n", len); 85 ccid2_pr_debug("total len=%d\n", len);
86 BUG_ON(len != hctx->ccid2hctx_seqbufc * CCID2_SEQBUF_LEN);
92} 87}
93#else 88#else
94#define ccid2_hc_tx_check_sanity(hctx) do {} while (0) 89#define ccid2_hc_tx_check_sanity(hctx) do {} while (0)
95#endif 90#endif
96 91
92static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num,
93 gfp_t gfp)
94{
95 struct ccid2_seq *seqp;
96 int i;
97
98 /* check if we have space to preserve the pointer to the buffer */
99 if (hctx->ccid2hctx_seqbufc >= (sizeof(hctx->ccid2hctx_seqbuf) /
100 sizeof(struct ccid2_seq*)))
101 return -ENOMEM;
102
103 /* allocate buffer and initialize linked list */
104 seqp = kmalloc(sizeof(*seqp) * num, gfp);
105 if (seqp == NULL)
106 return -ENOMEM;
107
108 for (i = 0; i < (num - 1); i++) {
109 seqp[i].ccid2s_next = &seqp[i + 1];
110 seqp[i + 1].ccid2s_prev = &seqp[i];
111 }
112 seqp[num - 1].ccid2s_next = seqp;
113 seqp->ccid2s_prev = &seqp[num - 1];
114
115 /* This is the first allocation. Initiate the head and tail. */
116 if (hctx->ccid2hctx_seqbufc == 0)
117 hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqt = seqp;
118 else {
119 /* link the existing list with the one we just created */
120 hctx->ccid2hctx_seqh->ccid2s_next = seqp;
121 seqp->ccid2s_prev = hctx->ccid2hctx_seqh;
122
123 hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[num - 1];
124 seqp[num - 1].ccid2s_next = hctx->ccid2hctx_seqt;
125 }
126
127 /* store the original pointer to the buffer so we can free it */
128 hctx->ccid2hctx_seqbuf[hctx->ccid2hctx_seqbufc] = seqp;
129 hctx->ccid2hctx_seqbufc++;
130
131 return 0;
132}
133
97static int ccid2_hc_tx_send_packet(struct sock *sk, 134static int ccid2_hc_tx_send_packet(struct sock *sk,
98 struct sk_buff *skb, int len) 135 struct sk_buff *skb, int len)
99{ 136{
@@ -122,7 +159,7 @@ static int ccid2_hc_tx_send_packet(struct sock *sk,
122 } 159 }
123 } 160 }
124 161
125 return 100; /* XXX */ 162 return 1; /* XXX CCID should dequeue when ready instead of polling */
126} 163}
127 164
128static void ccid2_change_l_ack_ratio(struct sock *sk, int val) 165static void ccid2_change_l_ack_ratio(struct sock *sk, int val)
@@ -150,10 +187,8 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, int val)
150 dp->dccps_l_ack_ratio = val; 187 dp->dccps_l_ack_ratio = val;
151} 188}
152 189
153static void ccid2_change_cwnd(struct sock *sk, int val) 190static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, int val)
154{ 191{
155 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
156
157 if (val == 0) 192 if (val == 0)
158 val = 1; 193 val = 1;
159 194
@@ -164,6 +199,17 @@ static void ccid2_change_cwnd(struct sock *sk, int val)
164 hctx->ccid2hctx_cwnd = val; 199 hctx->ccid2hctx_cwnd = val;
165} 200}
166 201
202static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val)
203{
204 ccid2_pr_debug("change SRTT to %ld\n", val);
205 hctx->ccid2hctx_srtt = val;
206}
207
208static void ccid2_change_pipe(struct ccid2_hc_tx_sock *hctx, long val)
209{
210 hctx->ccid2hctx_pipe = val;
211}
212
167static void ccid2_start_rto_timer(struct sock *sk); 213static void ccid2_start_rto_timer(struct sock *sk);
168 214
169static void ccid2_hc_tx_rto_expire(unsigned long data) 215static void ccid2_hc_tx_rto_expire(unsigned long data)
@@ -193,11 +239,11 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
193 ccid2_start_rto_timer(sk); 239 ccid2_start_rto_timer(sk);
194 240
195 /* adjust pipe, cwnd etc */ 241 /* adjust pipe, cwnd etc */
196 hctx->ccid2hctx_pipe = 0; 242 ccid2_change_pipe(hctx, 0);
197 hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd >> 1; 243 hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd >> 1;
198 if (hctx->ccid2hctx_ssthresh < 2) 244 if (hctx->ccid2hctx_ssthresh < 2)
199 hctx->ccid2hctx_ssthresh = 2; 245 hctx->ccid2hctx_ssthresh = 2;
200 ccid2_change_cwnd(sk, 1); 246 ccid2_change_cwnd(hctx, 1);
201 247
202 /* clear state about stuff we sent */ 248 /* clear state about stuff we sent */
203 hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh; 249 hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh;
@@ -232,13 +278,14 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len)
232{ 278{
233 struct dccp_sock *dp = dccp_sk(sk); 279 struct dccp_sock *dp = dccp_sk(sk);
234 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); 280 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
281 struct ccid2_seq *next;
235 u64 seq; 282 u64 seq;
236 283
237 ccid2_hc_tx_check_sanity(hctx); 284 ccid2_hc_tx_check_sanity(hctx);
238 285
239 BUG_ON(!hctx->ccid2hctx_sendwait); 286 BUG_ON(!hctx->ccid2hctx_sendwait);
240 hctx->ccid2hctx_sendwait = 0; 287 hctx->ccid2hctx_sendwait = 0;
241 hctx->ccid2hctx_pipe++; 288 ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe + 1);
242 BUG_ON(hctx->ccid2hctx_pipe < 0); 289 BUG_ON(hctx->ccid2hctx_pipe < 0);
243 290
244 /* There is an issue. What if another packet is sent between 291 /* There is an issue. What if another packet is sent between
@@ -251,15 +298,23 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len)
251 hctx->ccid2hctx_seqh->ccid2s_seq = seq; 298 hctx->ccid2hctx_seqh->ccid2s_seq = seq;
252 hctx->ccid2hctx_seqh->ccid2s_acked = 0; 299 hctx->ccid2hctx_seqh->ccid2s_acked = 0;
253 hctx->ccid2hctx_seqh->ccid2s_sent = jiffies; 300 hctx->ccid2hctx_seqh->ccid2s_sent = jiffies;
254 hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqh->ccid2s_next;
255 301
256 ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd, 302 next = hctx->ccid2hctx_seqh->ccid2s_next;
257 hctx->ccid2hctx_pipe); 303 /* check if we need to alloc more space */
304 if (next == hctx->ccid2hctx_seqt) {
305 int rc;
306
307 ccid2_pr_debug("allocating more space in history\n");
308 rc = ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, GFP_KERNEL);
309 BUG_ON(rc); /* XXX what do we do? */
258 310
259 if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt) { 311 next = hctx->ccid2hctx_seqh->ccid2s_next;
260 /* XXX allocate more space */ 312 BUG_ON(next == hctx->ccid2hctx_seqt);
261 WARN_ON(1);
262 } 313 }
314 hctx->ccid2hctx_seqh = next;
315
316 ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd,
317 hctx->ccid2hctx_pipe);
263 318
264 hctx->ccid2hctx_sent++; 319 hctx->ccid2hctx_sent++;
265 320
@@ -295,7 +350,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len)
295 if (!timer_pending(&hctx->ccid2hctx_rtotimer)) 350 if (!timer_pending(&hctx->ccid2hctx_rtotimer))
296 ccid2_start_rto_timer(sk); 351 ccid2_start_rto_timer(sk);
297 352
298#ifdef CCID2_DEBUG 353#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
299 ccid2_pr_debug("pipe=%d\n", hctx->ccid2hctx_pipe); 354 ccid2_pr_debug("pipe=%d\n", hctx->ccid2hctx_pipe);
300 ccid2_pr_debug("Sent: seq=%llu\n", seq); 355 ccid2_pr_debug("Sent: seq=%llu\n", seq);
301 do { 356 do {
@@ -398,7 +453,7 @@ static inline void ccid2_new_ack(struct sock *sk,
398 /* increase every 2 acks */ 453 /* increase every 2 acks */
399 hctx->ccid2hctx_ssacks++; 454 hctx->ccid2hctx_ssacks++;
400 if (hctx->ccid2hctx_ssacks == 2) { 455 if (hctx->ccid2hctx_ssacks == 2) {
401 ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd + 1); 456 ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd+1);
402 hctx->ccid2hctx_ssacks = 0; 457 hctx->ccid2hctx_ssacks = 0;
403 *maxincr = *maxincr - 1; 458 *maxincr = *maxincr - 1;
404 } 459 }
@@ -411,26 +466,28 @@ static inline void ccid2_new_ack(struct sock *sk,
411 hctx->ccid2hctx_acks++; 466 hctx->ccid2hctx_acks++;
412 467
413 if (hctx->ccid2hctx_acks >= hctx->ccid2hctx_cwnd) { 468 if (hctx->ccid2hctx_acks >= hctx->ccid2hctx_cwnd) {
414 ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd + 1); 469 ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd + 1);
415 hctx->ccid2hctx_acks = 0; 470 hctx->ccid2hctx_acks = 0;
416 } 471 }
417 } 472 }
418 473
419 /* update RTO */ 474 /* update RTO */
420 if (hctx->ccid2hctx_srtt == -1 || 475 if (hctx->ccid2hctx_srtt == -1 ||
421 (jiffies - hctx->ccid2hctx_lastrtt) >= hctx->ccid2hctx_srtt) { 476 time_after(jiffies, hctx->ccid2hctx_lastrtt + hctx->ccid2hctx_srtt)) {
422 unsigned long r = jiffies - seqp->ccid2s_sent; 477 unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent;
423 int s; 478 int s;
424 479
425 /* first measurement */ 480 /* first measurement */
426 if (hctx->ccid2hctx_srtt == -1) { 481 if (hctx->ccid2hctx_srtt == -1) {
427 ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n", 482 ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
428 r, jiffies, seqp->ccid2s_seq); 483 r, jiffies, seqp->ccid2s_seq);
429 hctx->ccid2hctx_srtt = r; 484 ccid2_change_srtt(hctx, r);
430 hctx->ccid2hctx_rttvar = r >> 1; 485 hctx->ccid2hctx_rttvar = r >> 1;
431 } else { 486 } else {
432 /* RTTVAR */ 487 /* RTTVAR */
433 long tmp = hctx->ccid2hctx_srtt - r; 488 long tmp = hctx->ccid2hctx_srtt - r;
489 long srtt;
490
434 if (tmp < 0) 491 if (tmp < 0)
435 tmp *= -1; 492 tmp *= -1;
436 493
@@ -440,10 +497,12 @@ static inline void ccid2_new_ack(struct sock *sk,
440 hctx->ccid2hctx_rttvar += tmp; 497 hctx->ccid2hctx_rttvar += tmp;
441 498
442 /* SRTT */ 499 /* SRTT */
443 hctx->ccid2hctx_srtt *= 7; 500 srtt = hctx->ccid2hctx_srtt;
444 hctx->ccid2hctx_srtt >>= 3; 501 srtt *= 7;
502 srtt >>= 3;
445 tmp = r >> 3; 503 tmp = r >> 3;
446 hctx->ccid2hctx_srtt += tmp; 504 srtt += tmp;
505 ccid2_change_srtt(hctx, srtt);
447 } 506 }
448 s = hctx->ccid2hctx_rttvar << 2; 507 s = hctx->ccid2hctx_rttvar << 2;
449 /* clock granularity is 1 when based on jiffies */ 508 /* clock granularity is 1 when based on jiffies */
@@ -479,13 +538,29 @@ static void ccid2_hc_tx_dec_pipe(struct sock *sk)
479{ 538{
480 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); 539 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
481 540
482 hctx->ccid2hctx_pipe--; 541 ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe-1);
483 BUG_ON(hctx->ccid2hctx_pipe < 0); 542 BUG_ON(hctx->ccid2hctx_pipe < 0);
484 543
485 if (hctx->ccid2hctx_pipe == 0) 544 if (hctx->ccid2hctx_pipe == 0)
486 ccid2_hc_tx_kill_rto_timer(sk); 545 ccid2_hc_tx_kill_rto_timer(sk);
487} 546}
488 547
548static void ccid2_congestion_event(struct ccid2_hc_tx_sock *hctx,
549 struct ccid2_seq *seqp)
550{
551 if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) {
552 ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
553 return;
554 }
555
556 hctx->ccid2hctx_last_cong = jiffies;
557
558 ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd >> 1);
559 hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd;
560 if (hctx->ccid2hctx_ssthresh < 2)
561 hctx->ccid2hctx_ssthresh = 2;
562}
563
489static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) 564static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
490{ 565{
491 struct dccp_sock *dp = dccp_sk(sk); 566 struct dccp_sock *dp = dccp_sk(sk);
@@ -496,7 +571,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
496 unsigned char veclen; 571 unsigned char veclen;
497 int offset = 0; 572 int offset = 0;
498 int done = 0; 573 int done = 0;
499 int loss = 0;
500 unsigned int maxincr = 0; 574 unsigned int maxincr = 0;
501 575
502 ccid2_hc_tx_check_sanity(hctx); 576 ccid2_hc_tx_check_sanity(hctx);
@@ -582,15 +656,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
582 * run length 656 * run length
583 */ 657 */
584 while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { 658 while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
585 const u8 state = (*vector & 659 const u8 state = *vector &
586 DCCP_ACKVEC_STATE_MASK) >> 6; 660 DCCP_ACKVEC_STATE_MASK;
587 661
588 /* new packet received or marked */ 662 /* new packet received or marked */
589 if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED && 663 if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED &&
590 !seqp->ccid2s_acked) { 664 !seqp->ccid2s_acked) {
591 if (state == 665 if (state ==
592 DCCP_ACKVEC_STATE_ECN_MARKED) { 666 DCCP_ACKVEC_STATE_ECN_MARKED) {
593 loss = 1; 667 ccid2_congestion_event(hctx,
668 seqp);
594 } else 669 } else
595 ccid2_new_ack(sk, seqp, 670 ccid2_new_ack(sk, seqp,
596 &maxincr); 671 &maxincr);
@@ -642,7 +717,13 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
642 /* check for lost packets */ 717 /* check for lost packets */
643 while (1) { 718 while (1) {
644 if (!seqp->ccid2s_acked) { 719 if (!seqp->ccid2s_acked) {
645 loss = 1; 720 ccid2_pr_debug("Packet lost: %llu\n",
721 seqp->ccid2s_seq);
722 /* XXX need to traverse from tail -> head in
723 * order to detect multiple congestion events in
724 * one ack vector.
725 */
726 ccid2_congestion_event(hctx, seqp);
646 ccid2_hc_tx_dec_pipe(sk); 727 ccid2_hc_tx_dec_pipe(sk);
647 } 728 }
648 if (seqp == hctx->ccid2hctx_seqt) 729 if (seqp == hctx->ccid2hctx_seqt)
@@ -661,53 +742,33 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
661 hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqt->ccid2s_next; 742 hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqt->ccid2s_next;
662 } 743 }
663 744
664 if (loss) {
665 /* XXX do bit shifts guarantee a 0 as the new bit? */
666 ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd >> 1);
667 hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd;
668 if (hctx->ccid2hctx_ssthresh < 2)
669 hctx->ccid2hctx_ssthresh = 2;
670 }
671
672 ccid2_hc_tx_check_sanity(hctx); 745 ccid2_hc_tx_check_sanity(hctx);
673} 746}
674 747
675static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) 748static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
676{ 749{
677 struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid); 750 struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid);
678 int seqcount = ccid2_seq_len;
679 int i;
680 751
681 /* XXX init variables with proper values */ 752 ccid2_change_cwnd(hctx, 1);
682 hctx->ccid2hctx_cwnd = 1; 753 /* Initialize ssthresh to infinity. This means that we will exit the
683 hctx->ccid2hctx_ssthresh = 10; 754 * initial slow-start after the first packet loss. This is what we
755 * want.
756 */
757 hctx->ccid2hctx_ssthresh = ~0;
684 hctx->ccid2hctx_numdupack = 3; 758 hctx->ccid2hctx_numdupack = 3;
759 hctx->ccid2hctx_seqbufc = 0;
685 760
686 /* XXX init ~ to window size... */ 761 /* XXX init ~ to window size... */
687 hctx->ccid2hctx_seqbuf = kmalloc(sizeof(*hctx->ccid2hctx_seqbuf) * 762 if (ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, GFP_ATOMIC) != 0)
688 seqcount, gfp_any());
689 if (hctx->ccid2hctx_seqbuf == NULL)
690 return -ENOMEM; 763 return -ENOMEM;
691 764
692 for (i = 0; i < (seqcount - 1); i++) {
693 hctx->ccid2hctx_seqbuf[i].ccid2s_next =
694 &hctx->ccid2hctx_seqbuf[i + 1];
695 hctx->ccid2hctx_seqbuf[i + 1].ccid2s_prev =
696 &hctx->ccid2hctx_seqbuf[i];
697 }
698 hctx->ccid2hctx_seqbuf[seqcount - 1].ccid2s_next =
699 hctx->ccid2hctx_seqbuf;
700 hctx->ccid2hctx_seqbuf->ccid2s_prev =
701 &hctx->ccid2hctx_seqbuf[seqcount - 1];
702
703 hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqbuf;
704 hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh;
705 hctx->ccid2hctx_sent = 0; 765 hctx->ccid2hctx_sent = 0;
706 hctx->ccid2hctx_rto = 3 * HZ; 766 hctx->ccid2hctx_rto = 3 * HZ;
707 hctx->ccid2hctx_srtt = -1; 767 ccid2_change_srtt(hctx, -1);
708 hctx->ccid2hctx_rttvar = -1; 768 hctx->ccid2hctx_rttvar = -1;
709 hctx->ccid2hctx_lastrtt = 0; 769 hctx->ccid2hctx_lastrtt = 0;
710 hctx->ccid2hctx_rpdupack = -1; 770 hctx->ccid2hctx_rpdupack = -1;
771 hctx->ccid2hctx_last_cong = jiffies;
711 772
712 hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire; 773 hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire;
713 hctx->ccid2hctx_rtotimer.data = (unsigned long)sk; 774 hctx->ccid2hctx_rtotimer.data = (unsigned long)sk;
@@ -720,10 +781,13 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
720static void ccid2_hc_tx_exit(struct sock *sk) 781static void ccid2_hc_tx_exit(struct sock *sk)
721{ 782{
722 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); 783 struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
784 int i;
723 785
724 ccid2_hc_tx_kill_rto_timer(sk); 786 ccid2_hc_tx_kill_rto_timer(sk);
725 kfree(hctx->ccid2hctx_seqbuf); 787
726 hctx->ccid2hctx_seqbuf = NULL; 788 for (i = 0; i < hctx->ccid2hctx_seqbufc; i++)
789 kfree(hctx->ccid2hctx_seqbuf[i]);
790 hctx->ccid2hctx_seqbufc = 0;
727} 791}
728 792
729static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) 793static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 451a87464fa5..5b2ef4acb300 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -35,6 +35,9 @@ struct ccid2_seq {
35 struct ccid2_seq *ccid2s_next; 35 struct ccid2_seq *ccid2s_next;
36}; 36};
37 37
38#define CCID2_SEQBUF_LEN 256
39#define CCID2_SEQBUF_MAX 128
40
38/** struct ccid2_hc_tx_sock - CCID2 TX half connection 41/** struct ccid2_hc_tx_sock - CCID2 TX half connection
39 * 42 *
40 * @ccid2hctx_ssacks - ACKs recv in slow start 43 * @ccid2hctx_ssacks - ACKs recv in slow start
@@ -50,10 +53,11 @@ struct ccid2_hc_tx_sock {
50 int ccid2hctx_cwnd; 53 int ccid2hctx_cwnd;
51 int ccid2hctx_ssacks; 54 int ccid2hctx_ssacks;
52 int ccid2hctx_acks; 55 int ccid2hctx_acks;
53 int ccid2hctx_ssthresh; 56 unsigned int ccid2hctx_ssthresh;
54 int ccid2hctx_pipe; 57 int ccid2hctx_pipe;
55 int ccid2hctx_numdupack; 58 int ccid2hctx_numdupack;
56 struct ccid2_seq *ccid2hctx_seqbuf; 59 struct ccid2_seq *ccid2hctx_seqbuf[CCID2_SEQBUF_MAX];
60 int ccid2hctx_seqbufc;
57 struct ccid2_seq *ccid2hctx_seqh; 61 struct ccid2_seq *ccid2hctx_seqh;
58 struct ccid2_seq *ccid2hctx_seqt; 62 struct ccid2_seq *ccid2hctx_seqt;
59 long ccid2hctx_rto; 63 long ccid2hctx_rto;
@@ -67,6 +71,7 @@ struct ccid2_hc_tx_sock {
67 u64 ccid2hctx_rpseq; 71 u64 ccid2hctx_rpseq;
68 int ccid2hctx_rpdupack; 72 int ccid2hctx_rpdupack;
69 int ccid2hctx_sendwait; 73 int ccid2hctx_sendwait;
74 unsigned long ccid2hctx_last_cong;
70}; 75};
71 76
72struct ccid2_hc_rx_sock { 77struct ccid2_hc_rx_sock {
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 090bc39e8199..195aa9566228 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -900,7 +900,7 @@ found:
900static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) 900static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
901{ 901{
902 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); 902 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
903 struct dccp_li_hist_entry *next, *head; 903 struct dccp_li_hist_entry *head;
904 u64 seq_temp; 904 u64 seq_temp;
905 905
906 if (list_empty(&hcrx->ccid3hcrx_li_hist)) { 906 if (list_empty(&hcrx->ccid3hcrx_li_hist)) {
@@ -908,15 +908,15 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
908 &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss)) 908 &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss))
909 return; 909 return;
910 910
911 next = (struct dccp_li_hist_entry *) 911 head = list_entry(hcrx->ccid3hcrx_li_hist.next,
912 hcrx->ccid3hcrx_li_hist.next; 912 struct dccp_li_hist_entry, dccplih_node);
913 next->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); 913 head->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
914 } else { 914 } else {
915 struct dccp_li_hist_entry *entry; 915 struct dccp_li_hist_entry *entry;
916 struct list_head *tail; 916 struct list_head *tail;
917 917
918 head = (struct dccp_li_hist_entry *) 918 head = list_entry(hcrx->ccid3hcrx_li_hist.next,
919 hcrx->ccid3hcrx_li_hist.next; 919 struct dccp_li_hist_entry, dccplih_node);
920 /* FIXME win count check removed as was wrong */ 920 /* FIXME win count check removed as was wrong */
921 /* should make this check with receive history */ 921 /* should make this check with receive history */
922 /* and compare there as per section 10.2 of RFC4342 */ 922 /* and compare there as per section 10.2 of RFC4342 */
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index a5c5475724c0..0a21be437ed3 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -130,7 +130,7 @@ extern void dccp_send_delayed_ack(struct sock *sk);
130extern void dccp_send_sync(struct sock *sk, const u64 seq, 130extern void dccp_send_sync(struct sock *sk, const u64 seq,
131 const enum dccp_pkt_type pkt_type); 131 const enum dccp_pkt_type pkt_type);
132 132
133extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo); 133extern void dccp_write_xmit(struct sock *sk, int block);
134extern void dccp_write_space(struct sock *sk); 134extern void dccp_write_space(struct sock *sk);
135 135
136extern void dccp_init_xmit_timers(struct sock *sk); 136extern void dccp_init_xmit_timers(struct sock *sk);
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index b44c45504fb6..cee553d416ca 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -27,5 +27,10 @@ extern int dccp_feat_clone(struct sock *oldsk, struct sock *newsk);
27extern int dccp_feat_init(struct dccp_minisock *dmsk); 27extern int dccp_feat_init(struct dccp_minisock *dmsk);
28 28
29extern int dccp_feat_default_sequence_window; 29extern int dccp_feat_default_sequence_window;
30extern int dccp_feat_default_rx_ccid;
31extern int dccp_feat_default_tx_ccid;
32extern int dccp_feat_default_ack_ratio;
33extern int dccp_feat_default_send_ack_vector;
34extern int dccp_feat_default_send_ndp_count;
30 35
31#endif /* _DCCP_FEAT_H */ 36#endif /* _DCCP_FEAT_H */
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 7f56f7e8f571..9a1a76a7dc41 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -501,6 +501,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
501 501
502 dccp_openreq_init(req, &dp, skb); 502 dccp_openreq_init(req, &dp, skb);
503 503
504 if (security_inet_conn_request(sk, skb, req))
505 goto drop_and_free;
506
504 ireq = inet_rsk(req); 507 ireq = inet_rsk(req);
505 ireq->loc_addr = daddr; 508 ireq->loc_addr = daddr;
506 ireq->rmt_addr = saddr; 509 ireq->rmt_addr = saddr;
@@ -605,10 +608,10 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
605 if (req != NULL) 608 if (req != NULL)
606 return dccp_check_req(sk, skb, req, prev); 609 return dccp_check_req(sk, skb, req, prev);
607 610
608 nsk = __inet_lookup_established(&dccp_hashinfo, 611 nsk = inet_lookup_established(&dccp_hashinfo,
609 iph->saddr, dh->dccph_sport, 612 iph->saddr, dh->dccph_sport,
610 iph->daddr, ntohs(dh->dccph_dport), 613 iph->daddr, dh->dccph_dport,
611 inet_iif(skb)); 614 inet_iif(skb));
612 if (nsk != NULL) { 615 if (nsk != NULL) {
613 if (nsk->sk_state != DCCP_TIME_WAIT) { 616 if (nsk->sk_state != DCCP_TIME_WAIT) {
614 bh_lock_sock(nsk); 617 bh_lock_sock(nsk);
@@ -678,6 +681,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
678 } 681 }
679 }; 682 };
680 683
684 security_skb_classify_flow(skb, &fl);
681 if (ip_route_output_flow(&rt, &fl, sk, 0)) { 685 if (ip_route_output_flow(&rt, &fl, sk, 0)) {
682 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 686 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
683 return NULL; 687 return NULL;
@@ -921,7 +925,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
921 * Look up flow ID in table and get corresponding socket */ 925 * Look up flow ID in table and get corresponding socket */
922 sk = __inet_lookup(&dccp_hashinfo, 926 sk = __inet_lookup(&dccp_hashinfo,
923 skb->nh.iph->saddr, dh->dccph_sport, 927 skb->nh.iph->saddr, dh->dccph_sport,
924 skb->nh.iph->daddr, ntohs(dh->dccph_dport), 928 skb->nh.iph->daddr, dh->dccph_dport,
925 inet_iif(skb)); 929 inet_iif(skb));
926 930
927 /* 931 /*
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 610c722ac27f..7a47399cf31f 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -201,6 +201,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
201 fl.oif = sk->sk_bound_dev_if; 201 fl.oif = sk->sk_bound_dev_if;
202 fl.fl_ip_dport = usin->sin6_port; 202 fl.fl_ip_dport = usin->sin6_port;
203 fl.fl_ip_sport = inet->sport; 203 fl.fl_ip_sport = inet->sport;
204 security_sk_classify_flow(sk, &fl);
204 205
205 if (np->opt != NULL && np->opt->srcrt != NULL) { 206 if (np->opt != NULL && np->opt->srcrt != NULL) {
206 const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; 207 const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
@@ -230,7 +231,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
230 ipv6_addr_copy(&np->saddr, saddr); 231 ipv6_addr_copy(&np->saddr, saddr);
231 inet->rcv_saddr = LOOPBACK4_IPV6; 232 inet->rcv_saddr = LOOPBACK4_IPV6;
232 233
233 __ip6_dst_store(sk, dst, NULL); 234 __ip6_dst_store(sk, dst, NULL, NULL);
234 235
235 icsk->icsk_ext_hdr_len = 0; 236 icsk->icsk_ext_hdr_len = 0;
236 if (np->opt != NULL) 237 if (np->opt != NULL)
@@ -322,6 +323,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
322 fl.oif = sk->sk_bound_dev_if; 323 fl.oif = sk->sk_bound_dev_if;
323 fl.fl_ip_dport = inet->dport; 324 fl.fl_ip_dport = inet->dport;
324 fl.fl_ip_sport = inet->sport; 325 fl.fl_ip_sport = inet->sport;
326 security_sk_classify_flow(sk, &fl);
325 327
326 err = ip6_dst_lookup(sk, &dst, &fl); 328 err = ip6_dst_lookup(sk, &dst, &fl);
327 if (err) { 329 if (err) {
@@ -422,6 +424,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
422 fl.oif = ireq6->iif; 424 fl.oif = ireq6->iif;
423 fl.fl_ip_dport = inet_rsk(req)->rmt_port; 425 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
424 fl.fl_ip_sport = inet_sk(sk)->sport; 426 fl.fl_ip_sport = inet_sk(sk)->sport;
427 security_req_classify_flow(req, &fl);
425 428
426 if (dst == NULL) { 429 if (dst == NULL) {
427 opt = np->opt; 430 opt = np->opt;
@@ -566,6 +569,7 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb)
566 fl.oif = inet6_iif(rxskb); 569 fl.oif = inet6_iif(rxskb);
567 fl.fl_ip_dport = dh->dccph_dport; 570 fl.fl_ip_dport = dh->dccph_dport;
568 fl.fl_ip_sport = dh->dccph_sport; 571 fl.fl_ip_sport = dh->dccph_sport;
572 security_skb_classify_flow(rxskb, &fl);
569 573
570 /* sk = NULL, but it is safe for now. RST socket required. */ 574 /* sk = NULL, but it is safe for now. RST socket required. */
571 if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { 575 if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) {
@@ -622,6 +626,7 @@ static void dccp_v6_reqsk_send_ack(struct sk_buff *rxskb,
622 fl.oif = inet6_iif(rxskb); 626 fl.oif = inet6_iif(rxskb);
623 fl.fl_ip_dport = dh->dccph_dport; 627 fl.fl_ip_dport = dh->dccph_dport;
624 fl.fl_ip_sport = dh->dccph_sport; 628 fl.fl_ip_sport = dh->dccph_sport;
629 security_req_classify_flow(req, &fl);
625 630
626 if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { 631 if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) {
627 if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) { 632 if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) {
@@ -704,6 +709,9 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
704 709
705 dccp_openreq_init(req, &dp, skb); 710 dccp_openreq_init(req, &dp, skb);
706 711
712 if (security_inet_conn_request(sk, skb, req))
713 goto drop_and_free;
714
707 ireq6 = inet6_rsk(req); 715 ireq6 = inet6_rsk(req);
708 ireq = inet_rsk(req); 716 ireq = inet_rsk(req);
709 ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr); 717 ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr);
@@ -842,6 +850,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
842 fl.oif = sk->sk_bound_dev_if; 850 fl.oif = sk->sk_bound_dev_if;
843 fl.fl_ip_dport = inet_rsk(req)->rmt_port; 851 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
844 fl.fl_ip_sport = inet_sk(sk)->sport; 852 fl.fl_ip_sport = inet_sk(sk)->sport;
853 security_sk_classify_flow(sk, &fl);
845 854
846 if (ip6_dst_lookup(sk, &dst, &fl)) 855 if (ip6_dst_lookup(sk, &dst, &fl))
847 goto out; 856 goto out;
@@ -863,7 +872,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
863 * comment in that function for the gory details. -acme 872 * comment in that function for the gory details. -acme
864 */ 873 */
865 874
866 __ip6_dst_store(newsk, dst, NULL); 875 __ip6_dst_store(newsk, dst, NULL, NULL);
867 newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | 876 newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM |
868 NETIF_F_TSO); 877 NETIF_F_TSO);
869 newdp6 = (struct dccp6_sock *)newsk; 878 newdp6 = (struct dccp6_sock *)newsk;
@@ -961,7 +970,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
961 if (skb->protocol == htons(ETH_P_IP)) 970 if (skb->protocol == htons(ETH_P_IP))
962 return dccp_v4_do_rcv(sk, skb); 971 return dccp_v4_do_rcv(sk, skb);
963 972
964 if (sk_filter(sk, skb, 0)) 973 if (sk_filter(sk, skb))
965 goto discard; 974 goto discard;
966 975
967 /* 976 /*
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 58669beee132..7102e3aed4ca 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -198,7 +198,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb,
198 while (1) { 198 while (1) {
199 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 199 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
200 200
201 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 201 if (sk->sk_err)
202 goto do_error; 202 goto do_error;
203 if (!*timeo) 203 if (!*timeo)
204 goto do_nonblock; 204 goto do_nonblock;
@@ -234,37 +234,72 @@ do_interrupted:
234 goto out; 234 goto out;
235} 235}
236 236
237int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) 237static void dccp_write_xmit_timer(unsigned long data) {
238 struct sock *sk = (struct sock *)data;
239 struct dccp_sock *dp = dccp_sk(sk);
240
241 bh_lock_sock(sk);
242 if (sock_owned_by_user(sk))
243 sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1);
244 else
245 dccp_write_xmit(sk, 0);
246 bh_unlock_sock(sk);
247 sock_put(sk);
248}
249
250void dccp_write_xmit(struct sock *sk, int block)
238{ 251{
239 const struct dccp_sock *dp = dccp_sk(sk); 252 struct dccp_sock *dp = dccp_sk(sk);
240 int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, 253 struct sk_buff *skb;
254 long timeo = 30000; /* If a packet is taking longer than 2 secs
255 we have other issues */
256
257 while ((skb = skb_peek(&sk->sk_write_queue))) {
258 int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
241 skb->len); 259 skb->len);
242 260
243 if (err > 0) 261 if (err > 0) {
244 err = dccp_wait_for_ccid(sk, skb, timeo); 262 if (!block) {
263 sk_reset_timer(sk, &dp->dccps_xmit_timer,
264 msecs_to_jiffies(err)+jiffies);
265 break;
266 } else
267 err = dccp_wait_for_ccid(sk, skb, &timeo);
268 if (err) {
269 printk(KERN_CRIT "%s:err at dccp_wait_for_ccid"
270 " %d\n", __FUNCTION__, err);
271 dump_stack();
272 }
273 }
245 274
246 if (err == 0) { 275 skb_dequeue(&sk->sk_write_queue);
247 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); 276 if (err == 0) {
248 const int len = skb->len; 277 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
278 const int len = skb->len;
249 279
250 if (sk->sk_state == DCCP_PARTOPEN) { 280 if (sk->sk_state == DCCP_PARTOPEN) {
251 /* See 8.1.5. Handshake Completion */ 281 /* See 8.1.5. Handshake Completion */
252 inet_csk_schedule_ack(sk); 282 inet_csk_schedule_ack(sk);
253 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 283 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
254 inet_csk(sk)->icsk_rto, 284 inet_csk(sk)->icsk_rto,
255 DCCP_RTO_MAX); 285 DCCP_RTO_MAX);
256 dcb->dccpd_type = DCCP_PKT_DATAACK; 286 dcb->dccpd_type = DCCP_PKT_DATAACK;
257 } else if (dccp_ack_pending(sk)) 287 } else if (dccp_ack_pending(sk))
258 dcb->dccpd_type = DCCP_PKT_DATAACK; 288 dcb->dccpd_type = DCCP_PKT_DATAACK;
259 else 289 else
260 dcb->dccpd_type = DCCP_PKT_DATA; 290 dcb->dccpd_type = DCCP_PKT_DATA;
261 291
262 err = dccp_transmit_skb(sk, skb); 292 err = dccp_transmit_skb(sk, skb);
263 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); 293 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
264 } else 294 if (err) {
265 kfree_skb(skb); 295 printk(KERN_CRIT "%s:err from "
266 296 "ccid_hc_tx_packet_sent %d\n",
267 return err; 297 __FUNCTION__, err);
298 dump_stack();
299 }
300 } else
301 kfree(skb);
302 }
268} 303}
269 304
270int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) 305int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
@@ -426,6 +461,9 @@ static inline void dccp_connect_init(struct sock *sk)
426 dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); 461 dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss));
427 462
428 icsk->icsk_retransmits = 0; 463 icsk->icsk_retransmits = 0;
464 init_timer(&dp->dccps_xmit_timer);
465 dp->dccps_xmit_timer.data = (unsigned long)sk;
466 dp->dccps_xmit_timer.function = dccp_write_xmit_timer;
429} 467}
430 468
431int dccp_connect(struct sock *sk) 469int dccp_connect(struct sock *sk)
@@ -560,8 +598,10 @@ void dccp_send_close(struct sock *sk, const int active)
560 DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; 598 DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ;
561 599
562 if (active) { 600 if (active) {
601 dccp_write_xmit(sk, 1);
563 dccp_skb_entail(sk, skb); 602 dccp_skb_entail(sk, skb);
564 dccp_transmit_skb(sk, skb_clone(skb, prio)); 603 dccp_transmit_skb(sk, skb_clone(skb, prio));
604 /* FIXME do we need a retransmit timer here? */
565 } else 605 } else
566 dccp_transmit_skb(sk, skb); 606 dccp_transmit_skb(sk, skb);
567} 607}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 6f14bb5a28d4..962df0ea31aa 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -662,17 +662,8 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
662 if (rc != 0) 662 if (rc != 0)
663 goto out_discard; 663 goto out_discard;
664 664
665 rc = dccp_write_xmit(sk, skb, &timeo); 665 skb_queue_tail(&sk->sk_write_queue, skb);
666 /* 666 dccp_write_xmit(sk,0);
667 * XXX we don't use sk_write_queue, so just discard the packet.
668 * Current plan however is to _use_ sk_write_queue with
669 * an algorith similar to tcp_sendmsg, where the main difference
670 * is that in DCCP we have to respect packet boundaries, so
671 * no coalescing of skbs.
672 *
673 * This bug was _quickly_ found & fixed by just looking at an OSTRA
674 * generated callgraph 8) -acme
675 */
676out_release: 667out_release:
677 release_sock(sk); 668 release_sock(sk);
678 return rc ? : len; 669 return rc ? : len;
@@ -846,6 +837,7 @@ static int dccp_close_state(struct sock *sk)
846 837
847void dccp_close(struct sock *sk, long timeout) 838void dccp_close(struct sock *sk, long timeout)
848{ 839{
840 struct dccp_sock *dp = dccp_sk(sk);
849 struct sk_buff *skb; 841 struct sk_buff *skb;
850 int state; 842 int state;
851 843
@@ -862,6 +854,8 @@ void dccp_close(struct sock *sk, long timeout)
862 goto adjudge_to_death; 854 goto adjudge_to_death;
863 } 855 }
864 856
857 sk_stop_timer(sk, &dp->dccps_xmit_timer);
858
865 /* 859 /*
866 * We need to flush the recv. buffs. We do this only on the 860 * We need to flush the recv. buffs. We do this only on the
867 * descriptor close, not protocol-sourced closes, because the 861 * descriptor close, not protocol-sourced closes, because the
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index c1ba9451bc3d..38bc157876f3 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -11,18 +11,12 @@
11 11
12#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/sysctl.h> 13#include <linux/sysctl.h>
14#include "feat.h"
14 15
15#ifndef CONFIG_SYSCTL 16#ifndef CONFIG_SYSCTL
16#error This file should not be compiled without CONFIG_SYSCTL defined 17#error This file should not be compiled without CONFIG_SYSCTL defined
17#endif 18#endif
18 19
19extern int dccp_feat_default_sequence_window;
20extern int dccp_feat_default_rx_ccid;
21extern int dccp_feat_default_tx_ccid;
22extern int dccp_feat_default_ack_ratio;
23extern int dccp_feat_default_send_ack_vector;
24extern int dccp_feat_default_send_ndp_count;
25
26static struct ctl_table dccp_default_table[] = { 20static struct ctl_table dccp_default_table[] = {
27 { 21 {
28 .ctl_name = NET_DCCP_DEFAULT_SEQ_WINDOW, 22 .ctl_name = NET_DCCP_DEFAULT_SEQ_WINDOW,
diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig
index 92f2ec46fd22..36e72cb145b0 100644
--- a/net/decnet/Kconfig
+++ b/net/decnet/Kconfig
@@ -27,6 +27,7 @@ config DECNET
27config DECNET_ROUTER 27config DECNET_ROUTER
28 bool "DECnet: router support (EXPERIMENTAL)" 28 bool "DECnet: router support (EXPERIMENTAL)"
29 depends on DECNET && EXPERIMENTAL 29 depends on DECNET && EXPERIMENTAL
30 select FIB_RULES
30 ---help--- 31 ---help---
31 Add support for turning your DECnet Endnode into a level 1 or 2 32 Add support for turning your DECnet Endnode into a level 1 or 2
32 router. This is an experimental, but functional option. If you 33 router. This is an experimental, but functional option. If you
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 5486247735f6..70e027375682 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -130,6 +130,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat
130#include <linux/poll.h> 130#include <linux/poll.h>
131#include <net/neighbour.h> 131#include <net/neighbour.h>
132#include <net/dst.h> 132#include <net/dst.h>
133#include <net/fib_rules.h>
133#include <net/dn.h> 134#include <net/dn.h>
134#include <net/dn_nsp.h> 135#include <net/dn_nsp.h>
135#include <net/dn_dev.h> 136#include <net/dn_dev.h>
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 476455fbdb03..01861feb608d 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -34,6 +34,7 @@
34#include <linux/seq_file.h> 34#include <linux/seq_file.h>
35#include <linux/timer.h> 35#include <linux/timer.h>
36#include <linux/string.h> 36#include <linux/string.h>
37#include <linux/if_addr.h>
37#include <linux/if_arp.h> 38#include <linux/if_arp.h>
38#include <linux/if_ether.h> 39#include <linux/if_ether.h>
39#include <linux/skbuff.h> 40#include <linux/skbuff.h>
@@ -45,6 +46,7 @@
45#include <net/neighbour.h> 46#include <net/neighbour.h>
46#include <net/dst.h> 47#include <net/dst.h>
47#include <net/flow.h> 48#include <net/flow.h>
49#include <net/fib_rules.h>
48#include <net/dn.h> 50#include <net/dn.h>
49#include <net/dn_dev.h> 51#include <net/dn_dev.h>
50#include <net/dn_route.h> 52#include <net/dn_route.h>
@@ -744,20 +746,23 @@ rtattr_failure:
744static void rtmsg_ifa(int event, struct dn_ifaddr *ifa) 746static void rtmsg_ifa(int event, struct dn_ifaddr *ifa)
745{ 747{
746 struct sk_buff *skb; 748 struct sk_buff *skb;
747 int size = NLMSG_SPACE(sizeof(struct ifaddrmsg)+128); 749 int payload = sizeof(struct ifaddrmsg) + 128;
750 int err = -ENOBUFS;
748 751
749 skb = alloc_skb(size, GFP_KERNEL); 752 skb = alloc_skb(nlmsg_total_size(payload), GFP_KERNEL);
750 if (!skb) { 753 if (skb == NULL)
751 netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, ENOBUFS); 754 goto errout;
752 return; 755
753 } 756 err = dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0);
754 if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) { 757 if (err < 0) {
755 kfree_skb(skb); 758 kfree_skb(skb);
756 netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, EINVAL); 759 goto errout;
757 return;
758 } 760 }
759 NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_IFADDR; 761
760 netlink_broadcast(rtnl, skb, 0, RTNLGRP_DECnet_IFADDR, GFP_KERNEL); 762 err = rtnl_notify(skb, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL);
763errout:
764 if (err < 0)
765 rtnl_set_sk_err(RTNLGRP_DECnet_IFADDR, err);
761} 766}
762 767
763static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) 768static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1417,8 +1422,6 @@ static struct rtnetlink_link dnet_rtnetlink_table[RTM_NR_MSGTYPES] =
1417 [RTM_DELROUTE - RTM_BASE] = { .doit = dn_fib_rtm_delroute, }, 1422 [RTM_DELROUTE - RTM_BASE] = { .doit = dn_fib_rtm_delroute, },
1418 [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute, 1423 [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute,
1419 .dumpit = dn_fib_dump, }, 1424 .dumpit = dn_fib_dump, },
1420 [RTM_NEWRULE - RTM_BASE] = { .doit = dn_fib_rtm_newrule, },
1421 [RTM_DELRULE - RTM_BASE] = { .doit = dn_fib_rtm_delrule, },
1422 [RTM_GETRULE - RTM_BASE] = { .dumpit = dn_fib_dump_rules, }, 1425 [RTM_GETRULE - RTM_BASE] = { .dumpit = dn_fib_dump_rules, },
1423#else 1426#else
1424 [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute, 1427 [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute,
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index fa20e2efcfc1..1cf010124ec5 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -34,6 +34,7 @@
34#include <net/neighbour.h> 34#include <net/neighbour.h>
35#include <net/dst.h> 35#include <net/dst.h>
36#include <net/flow.h> 36#include <net/flow.h>
37#include <net/fib_rules.h>
37#include <net/dn.h> 38#include <net/dn.h>
38#include <net/dn_route.h> 39#include <net/dn_route.h>
39#include <net/dn_fib.h> 40#include <net/dn_fib.h>
@@ -54,11 +55,9 @@
54 55
55#define endfor_nexthops(fi) } 56#define endfor_nexthops(fi) }
56 57
57extern int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb);
58
59static DEFINE_SPINLOCK(dn_fib_multipath_lock); 58static DEFINE_SPINLOCK(dn_fib_multipath_lock);
60static struct dn_fib_info *dn_fib_info_list; 59static struct dn_fib_info *dn_fib_info_list;
61static DEFINE_RWLOCK(dn_fib_info_lock); 60static DEFINE_SPINLOCK(dn_fib_info_lock);
62 61
63static struct 62static struct
64{ 63{
@@ -79,6 +78,9 @@ static struct
79 [RTN_XRESOLVE] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE }, 78 [RTN_XRESOLVE] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
80}; 79};
81 80
81static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force);
82static int dn_fib_sync_up(struct net_device *dev);
83
82void dn_fib_free_info(struct dn_fib_info *fi) 84void dn_fib_free_info(struct dn_fib_info *fi)
83{ 85{
84 if (fi->fib_dead == 0) { 86 if (fi->fib_dead == 0) {
@@ -96,7 +98,7 @@ void dn_fib_free_info(struct dn_fib_info *fi)
96 98
97void dn_fib_release_info(struct dn_fib_info *fi) 99void dn_fib_release_info(struct dn_fib_info *fi)
98{ 100{
99 write_lock(&dn_fib_info_lock); 101 spin_lock(&dn_fib_info_lock);
100 if (fi && --fi->fib_treeref == 0) { 102 if (fi && --fi->fib_treeref == 0) {
101 if (fi->fib_next) 103 if (fi->fib_next)
102 fi->fib_next->fib_prev = fi->fib_prev; 104 fi->fib_next->fib_prev = fi->fib_prev;
@@ -107,7 +109,7 @@ void dn_fib_release_info(struct dn_fib_info *fi)
107 fi->fib_dead = 1; 109 fi->fib_dead = 1;
108 dn_fib_info_put(fi); 110 dn_fib_info_put(fi);
109 } 111 }
110 write_unlock(&dn_fib_info_lock); 112 spin_unlock(&dn_fib_info_lock);
111} 113}
112 114
113static inline int dn_fib_nh_comp(const struct dn_fib_info *fi, const struct dn_fib_info *ofi) 115static inline int dn_fib_nh_comp(const struct dn_fib_info *fi, const struct dn_fib_info *ofi)
@@ -378,13 +380,13 @@ link_it:
378 380
379 fi->fib_treeref++; 381 fi->fib_treeref++;
380 atomic_inc(&fi->fib_clntref); 382 atomic_inc(&fi->fib_clntref);
381 write_lock(&dn_fib_info_lock); 383 spin_lock(&dn_fib_info_lock);
382 fi->fib_next = dn_fib_info_list; 384 fi->fib_next = dn_fib_info_list;
383 fi->fib_prev = NULL; 385 fi->fib_prev = NULL;
384 if (dn_fib_info_list) 386 if (dn_fib_info_list)
385 dn_fib_info_list->fib_prev = fi; 387 dn_fib_info_list->fib_prev = fi;
386 dn_fib_info_list = fi; 388 dn_fib_info_list = fi;
387 write_unlock(&dn_fib_info_lock); 389 spin_unlock(&dn_fib_info_lock);
388 return fi; 390 return fi;
389 391
390err_inval: 392err_inval:
@@ -490,7 +492,8 @@ static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta)
490 if (attr) { 492 if (attr) {
491 if (RTA_PAYLOAD(attr) < 4 && RTA_PAYLOAD(attr) != 2) 493 if (RTA_PAYLOAD(attr) < 4 && RTA_PAYLOAD(attr) != 2)
492 return -EINVAL; 494 return -EINVAL;
493 if (i != RTA_MULTIPATH && i != RTA_METRICS) 495 if (i != RTA_MULTIPATH && i != RTA_METRICS &&
496 i != RTA_TABLE)
494 rta[i-1] = (struct rtattr *)RTA_DATA(attr); 497 rta[i-1] = (struct rtattr *)RTA_DATA(attr);
495 } 498 }
496 } 499 }
@@ -507,7 +510,7 @@ int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
507 if (dn_fib_check_attr(r, rta)) 510 if (dn_fib_check_attr(r, rta))
508 return -EINVAL; 511 return -EINVAL;
509 512
510 tb = dn_fib_get_table(r->rtm_table, 0); 513 tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 0);
511 if (tb) 514 if (tb)
512 return tb->delete(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); 515 return tb->delete(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb));
513 516
@@ -523,46 +526,13 @@ int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
523 if (dn_fib_check_attr(r, rta)) 526 if (dn_fib_check_attr(r, rta))
524 return -EINVAL; 527 return -EINVAL;
525 528
526 tb = dn_fib_get_table(r->rtm_table, 1); 529 tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 1);
527 if (tb) 530 if (tb)
528 return tb->insert(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); 531 return tb->insert(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb));
529 532
530 return -ENOBUFS; 533 return -ENOBUFS;
531} 534}
532 535
533
534int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
535{
536 int t;
537 int s_t;
538 struct dn_fib_table *tb;
539
540 if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
541 ((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
542 return dn_cache_dump(skb, cb);
543
544 s_t = cb->args[0];
545 if (s_t == 0)
546 s_t = cb->args[0] = RT_MIN_TABLE;
547
548 for(t = s_t; t <= RT_TABLE_MAX; t++) {
549 if (t < s_t)
550 continue;
551 if (t > s_t)
552 memset(&cb->args[1], 0,
553 sizeof(cb->args) - sizeof(cb->args[0]));
554 tb = dn_fib_get_table(t, 0);
555 if (tb == NULL)
556 continue;
557 if (tb->dump(tb, skb, cb) < 0)
558 break;
559 }
560
561 cb->args[0] = t;
562
563 return skb->len;
564}
565
566static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa) 536static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa)
567{ 537{
568 struct dn_fib_table *tb; 538 struct dn_fib_table *tb;
@@ -682,7 +652,7 @@ static int dn_fib_dnaddr_event(struct notifier_block *this, unsigned long event,
682 return NOTIFY_DONE; 652 return NOTIFY_DONE;
683} 653}
684 654
685int dn_fib_sync_down(__le16 local, struct net_device *dev, int force) 655static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force)
686{ 656{
687 int ret = 0; 657 int ret = 0;
688 int scope = RT_SCOPE_NOWHERE; 658 int scope = RT_SCOPE_NOWHERE;
@@ -726,7 +696,7 @@ int dn_fib_sync_down(__le16 local, struct net_device *dev, int force)
726} 696}
727 697
728 698
729int dn_fib_sync_up(struct net_device *dev) 699static int dn_fib_sync_up(struct net_device *dev)
730{ 700{
731 int ret = 0; 701 int ret = 0;
732 702
@@ -760,22 +730,6 @@ int dn_fib_sync_up(struct net_device *dev)
760 return ret; 730 return ret;
761} 731}
762 732
763void dn_fib_flush(void)
764{
765 int flushed = 0;
766 struct dn_fib_table *tb;
767 int id;
768
769 for(id = RT_TABLE_MAX; id > 0; id--) {
770 if ((tb = dn_fib_get_table(id, 0)) == NULL)
771 continue;
772 flushed += tb->flush(tb);
773 }
774
775 if (flushed)
776 dn_rt_cache_flush(-1);
777}
778
779static struct notifier_block dn_fib_dnaddr_notifier = { 733static struct notifier_block dn_fib_dnaddr_notifier = {
780 .notifier_call = dn_fib_dnaddr_event, 734 .notifier_call = dn_fib_dnaddr_event,
781}; 735};
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 86f7f3b28e70..72ecc6e62ec4 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -586,7 +586,7 @@ static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig
586 goto out; 586 goto out;
587 } 587 }
588 588
589 err = sk_filter(sk, skb, 0); 589 err = sk_filter(sk, skb);
590 if (err) 590 if (err)
591 goto out; 591 goto out;
592 592
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 743e9fcf7c5a..dd0761e3d280 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -80,6 +80,7 @@
80#include <net/neighbour.h> 80#include <net/neighbour.h>
81#include <net/dst.h> 81#include <net/dst.h>
82#include <net/flow.h> 82#include <net/flow.h>
83#include <net/fib_rules.h>
83#include <net/dn.h> 84#include <net/dn.h>
84#include <net/dn_dev.h> 85#include <net/dn_dev.h>
85#include <net/dn_nsp.h> 86#include <net/dn_nsp.h>
@@ -1284,7 +1285,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
1284 dev_hold(out_dev); 1285 dev_hold(out_dev);
1285 1286
1286 if (res.r) 1287 if (res.r)
1287 src_map = dn_fib_rules_policy(fl.fld_src, &res, &flags); 1288 src_map = fl.fld_src; /* no NAT support for now */
1288 1289
1289 gateway = DN_FIB_RES_GW(res); 1290 gateway = DN_FIB_RES_GW(res);
1290 if (res.type == RTN_NAT) { 1291 if (res.type == RTN_NAT) {
@@ -1485,6 +1486,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
1485 r->rtm_src_len = 0; 1486 r->rtm_src_len = 0;
1486 r->rtm_tos = 0; 1487 r->rtm_tos = 0;
1487 r->rtm_table = RT_TABLE_MAIN; 1488 r->rtm_table = RT_TABLE_MAIN;
1489 RTA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
1488 r->rtm_type = rt->rt_type; 1490 r->rtm_type = rt->rt_type;
1489 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; 1491 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
1490 r->rtm_scope = RT_SCOPE_UNIVERSE; 1492 r->rtm_scope = RT_SCOPE_UNIVERSE;
@@ -1609,9 +1611,7 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
1609 goto out_free; 1611 goto out_free;
1610 } 1612 }
1611 1613
1612 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); 1614 return rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
1613
1614 return err;
1615 1615
1616out_free: 1616out_free:
1617 kfree_skb(skb); 1617 kfree_skb(skb);
@@ -1781,14 +1781,9 @@ void __init dn_route_init(void)
1781{ 1781{
1782 int i, goal, order; 1782 int i, goal, order;
1783 1783
1784 dn_dst_ops.kmem_cachep = kmem_cache_create("dn_dst_cache", 1784 dn_dst_ops.kmem_cachep =
1785 sizeof(struct dn_route), 1785 kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0,
1786 0, SLAB_HWCACHE_ALIGN, 1786 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1787 NULL, NULL);
1788
1789 if (!dn_dst_ops.kmem_cachep)
1790 panic("DECnet: Failed to allocate dn_dst_cache\n");
1791
1792 init_timer(&dn_route_timer); 1787 init_timer(&dn_route_timer);
1793 dn_route_timer.function = dn_dst_check_expire; 1788 dn_route_timer.function = dn_dst_check_expire;
1794 dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; 1789 dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 6986be754ef2..3e0c882c90bf 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -11,259 +11,213 @@
11 * 11 *
12 * 12 *
13 * Changes: 13 * Changes:
14 * Steve Whitehouse <steve@chygwyn.com>
15 * Updated for Thomas Graf's generic rules
14 * 16 *
15 */ 17 */
16#include <linux/string.h>
17#include <linux/net.h> 18#include <linux/net.h>
18#include <linux/socket.h>
19#include <linux/sockios.h>
20#include <linux/init.h> 19#include <linux/init.h>
21#include <linux/skbuff.h>
22#include <linux/netlink.h> 20#include <linux/netlink.h>
23#include <linux/rtnetlink.h> 21#include <linux/rtnetlink.h>
24#include <linux/proc_fs.h>
25#include <linux/netdevice.h> 22#include <linux/netdevice.h>
26#include <linux/timer.h>
27#include <linux/spinlock.h> 23#include <linux/spinlock.h>
28#include <linux/in_route.h>
29#include <linux/list.h> 24#include <linux/list.h>
30#include <linux/rcupdate.h> 25#include <linux/rcupdate.h>
31#include <asm/atomic.h>
32#include <asm/uaccess.h>
33#include <net/neighbour.h> 26#include <net/neighbour.h>
34#include <net/dst.h> 27#include <net/dst.h>
35#include <net/flow.h> 28#include <net/flow.h>
29#include <net/fib_rules.h>
36#include <net/dn.h> 30#include <net/dn.h>
37#include <net/dn_fib.h> 31#include <net/dn_fib.h>
38#include <net/dn_neigh.h> 32#include <net/dn_neigh.h>
39#include <net/dn_dev.h> 33#include <net/dn_dev.h>
40 34
35static struct fib_rules_ops dn_fib_rules_ops;
36
41struct dn_fib_rule 37struct dn_fib_rule
42{ 38{
43 struct hlist_node r_hlist; 39 struct fib_rule common;
44 atomic_t r_clntref; 40 unsigned char dst_len;
45 u32 r_preference; 41 unsigned char src_len;
46 unsigned char r_table; 42 __le16 src;
47 unsigned char r_action; 43 __le16 srcmask;
48 unsigned char r_dst_len; 44 __le16 dst;
49 unsigned char r_src_len; 45 __le16 dstmask;
50 __le16 r_src; 46 __le16 srcmap;
51 __le16 r_srcmask; 47 u8 flags;
52 __le16 r_dst;
53 __le16 r_dstmask;
54 __le16 r_srcmap;
55 u8 r_flags;
56#ifdef CONFIG_DECNET_ROUTE_FWMARK 48#ifdef CONFIG_DECNET_ROUTE_FWMARK
57 u32 r_fwmark; 49 u32 fwmark;
50 u32 fwmask;
58#endif 51#endif
59 int r_ifindex;
60 char r_ifname[IFNAMSIZ];
61 int r_dead;
62 struct rcu_head rcu;
63}; 52};
64 53
65static struct dn_fib_rule default_rule = { 54static struct dn_fib_rule default_rule = {
66 .r_clntref = ATOMIC_INIT(2), 55 .common = {
67 .r_preference = 0x7fff, 56 .refcnt = ATOMIC_INIT(2),
68 .r_table = RT_TABLE_MAIN, 57 .pref = 0x7fff,
69 .r_action = RTN_UNICAST 58 .table = RT_TABLE_MAIN,
59 .action = FR_ACT_TO_TBL,
60 },
70}; 61};
71 62
72static struct hlist_head dn_fib_rules; 63static LIST_HEAD(dn_fib_rules);
64
73 65
74int dn_fib_rtm_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 66int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res)
75{ 67{
76 struct rtattr **rta = arg; 68 struct fib_lookup_arg arg = {
77 struct rtmsg *rtm = NLMSG_DATA(nlh); 69 .result = res,
78 struct dn_fib_rule *r; 70 };
79 struct hlist_node *node; 71 int err;
80 int err = -ESRCH; 72
81 73 err = fib_rules_lookup(&dn_fib_rules_ops, flp, 0, &arg);
82 hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { 74 res->r = arg.rule;
83 if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 2) == 0) &&
84 rtm->rtm_src_len == r->r_src_len &&
85 rtm->rtm_dst_len == r->r_dst_len &&
86 (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 2) == 0) &&
87#ifdef CONFIG_DECNET_ROUTE_FWMARK
88 (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) &&
89#endif
90 (!rtm->rtm_type || rtm->rtm_type == r->r_action) &&
91 (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) &&
92 (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) &&
93 (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) {
94
95 err = -EPERM;
96 if (r == &default_rule)
97 break;
98
99 hlist_del_rcu(&r->r_hlist);
100 r->r_dead = 1;
101 dn_fib_rule_put(r);
102 err = 0;
103 break;
104 }
105 }
106 75
107 return err; 76 return err;
108} 77}
109 78
110static inline void dn_fib_rule_put_rcu(struct rcu_head *head) 79static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp,
80 int flags, struct fib_lookup_arg *arg)
111{ 81{
112 struct dn_fib_rule *r = container_of(head, struct dn_fib_rule, rcu); 82 int err = -EAGAIN;
113 kfree(r); 83 struct dn_fib_table *tbl;
114}
115 84
116void dn_fib_rule_put(struct dn_fib_rule *r) 85 switch(rule->action) {
117{ 86 case FR_ACT_TO_TBL:
118 if (atomic_dec_and_test(&r->r_clntref)) { 87 break;
119 if (r->r_dead) 88
120 call_rcu(&r->rcu, dn_fib_rule_put_rcu); 89 case FR_ACT_UNREACHABLE:
121 else 90 err = -ENETUNREACH;
122 printk(KERN_DEBUG "Attempt to free alive dn_fib_rule\n"); 91 goto errout;
92
93 case FR_ACT_PROHIBIT:
94 err = -EACCES;
95 goto errout;
96
97 case FR_ACT_BLACKHOLE:
98 default:
99 err = -EINVAL;
100 goto errout;
123 } 101 }
102
103 tbl = dn_fib_get_table(rule->table, 0);
104 if (tbl == NULL)
105 goto errout;
106
107 err = tbl->lookup(tbl, flp, (struct dn_fib_res *)arg->result);
108 if (err > 0)
109 err = -EAGAIN;
110errout:
111 return err;
124} 112}
125 113
114static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = {
115 [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
116 [FRA_PRIORITY] = { .type = NLA_U32 },
117 [FRA_SRC] = { .type = NLA_U16 },
118 [FRA_DST] = { .type = NLA_U16 },
119 [FRA_FWMARK] = { .type = NLA_U32 },
120 [FRA_FWMASK] = { .type = NLA_U32 },
121 [FRA_TABLE] = { .type = NLA_U32 },
122};
126 123
127int dn_fib_rtm_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 124static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
128{ 125{
129 struct rtattr **rta = arg; 126 struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
130 struct rtmsg *rtm = NLMSG_DATA(nlh); 127 u16 daddr = fl->fld_dst;
131 struct dn_fib_rule *r, *new_r, *last = NULL; 128 u16 saddr = fl->fld_src;
132 struct hlist_node *node = NULL; 129
133 unsigned char table_id; 130 if (((saddr ^ r->src) & r->srcmask) ||
134 131 ((daddr ^ r->dst) & r->dstmask))
135 if (rtm->rtm_src_len > 16 || rtm->rtm_dst_len > 16) 132 return 0;
136 return -EINVAL;
137
138 if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ)
139 return -EINVAL;
140
141 if (rtm->rtm_type == RTN_NAT)
142 return -EINVAL;
143
144 table_id = rtm->rtm_table;
145 if (table_id == RT_TABLE_UNSPEC) {
146 struct dn_fib_table *tb;
147 if (rtm->rtm_type == RTN_UNICAST) {
148 if ((tb = dn_fib_empty_table()) == NULL)
149 return -ENOBUFS;
150 table_id = tb->n;
151 }
152 }
153 133
154 new_r = kzalloc(sizeof(*new_r), GFP_KERNEL);
155 if (!new_r)
156 return -ENOMEM;
157
158 if (rta[RTA_SRC-1])
159 memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 2);
160 if (rta[RTA_DST-1])
161 memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 2);
162 if (rta[RTA_GATEWAY-1])
163 memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 2);
164 new_r->r_src_len = rtm->rtm_src_len;
165 new_r->r_dst_len = rtm->rtm_dst_len;
166 new_r->r_srcmask = dnet_make_mask(rtm->rtm_src_len);
167 new_r->r_dstmask = dnet_make_mask(rtm->rtm_dst_len);
168#ifdef CONFIG_DECNET_ROUTE_FWMARK 134#ifdef CONFIG_DECNET_ROUTE_FWMARK
169 if (rta[RTA_PROTOINFO-1]) 135 if ((r->fwmark ^ fl->fld_fwmark) & r->fwmask)
170 memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4); 136 return 0;
171#endif 137#endif
172 new_r->r_action = rtm->rtm_type;
173 new_r->r_flags = rtm->rtm_flags;
174 if (rta[RTA_PRIORITY-1])
175 memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
176 new_r->r_table = table_id;
177 if (rta[RTA_IIF-1]) {
178 struct net_device *dev;
179 rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ);
180 new_r->r_ifindex = -1;
181 dev = dev_get_by_name(new_r->r_ifname);
182 if (dev) {
183 new_r->r_ifindex = dev->ifindex;
184 dev_put(dev);
185 }
186 }
187 138
188 r = container_of(dn_fib_rules.first, struct dn_fib_rule, r_hlist); 139 return 1;
189 if (!new_r->r_preference) { 140}
190 if (r && r->r_hlist.next != NULL) { 141
191 r = container_of(r->r_hlist.next, struct dn_fib_rule, r_hlist); 142static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
192 if (r->r_preference) 143 struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
193 new_r->r_preference = r->r_preference - 1; 144 struct nlattr **tb)
145{
146 int err = -EINVAL;
147 struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
148
149 if (frh->src_len > 16 || frh->dst_len > 16 || frh->tos)
150 goto errout;
151
152 if (rule->table == RT_TABLE_UNSPEC) {
153 if (rule->action == FR_ACT_TO_TBL) {
154 struct dn_fib_table *table;
155
156 table = dn_fib_empty_table();
157 if (table == NULL) {
158 err = -ENOBUFS;
159 goto errout;
160 }
161
162 rule->table = table->n;
194 } 163 }
195 } 164 }
196 165
197 hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { 166 if (tb[FRA_SRC])
198 if (r->r_preference > new_r->r_preference) 167 r->src = nla_get_u16(tb[FRA_SRC]);
199 break; 168
200 last = r; 169 if (tb[FRA_DST])
170 r->dst = nla_get_u16(tb[FRA_DST]);
171
172#ifdef CONFIG_DECNET_ROUTE_FWMARK
173 if (tb[FRA_FWMARK]) {
174 r->fwmark = nla_get_u32(tb[FRA_FWMARK]);
175 if (r->fwmark)
176 /* compatibility: if the mark value is non-zero all bits
177 * are compared unless a mask is explicitly specified.
178 */
179 r->fwmask = 0xFFFFFFFF;
201 } 180 }
202 atomic_inc(&new_r->r_clntref);
203 181
204 if (last) 182 if (tb[FRA_FWMASK])
205 hlist_add_after_rcu(&last->r_hlist, &new_r->r_hlist); 183 r->fwmask = nla_get_u32(tb[FRA_FWMASK]);
206 else 184#endif
207 hlist_add_before_rcu(&new_r->r_hlist, &r->r_hlist);
208 return 0;
209}
210 185
186 r->src_len = frh->src_len;
187 r->srcmask = dnet_make_mask(r->src_len);
188 r->dst_len = frh->dst_len;
189 r->dstmask = dnet_make_mask(r->dst_len);
190 err = 0;
191errout:
192 return err;
193}
211 194
212int dn_fib_lookup(const struct flowi *flp, struct dn_fib_res *res) 195static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
196 struct nlattr **tb)
213{ 197{
214 struct dn_fib_rule *r, *policy; 198 struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
215 struct dn_fib_table *tb; 199
216 __le16 saddr = flp->fld_src; 200 if (frh->src_len && (r->src_len != frh->src_len))
217 __le16 daddr = flp->fld_dst; 201 return 0;
218 struct hlist_node *node;
219 int err;
220 202
221 rcu_read_lock(); 203 if (frh->dst_len && (r->dst_len != frh->dst_len))
204 return 0;
222 205
223 hlist_for_each_entry_rcu(r, node, &dn_fib_rules, r_hlist) {
224 if (((saddr^r->r_src) & r->r_srcmask) ||
225 ((daddr^r->r_dst) & r->r_dstmask) ||
226#ifdef CONFIG_DECNET_ROUTE_FWMARK 206#ifdef CONFIG_DECNET_ROUTE_FWMARK
227 (r->r_fwmark && r->r_fwmark != flp->fld_fwmark) || 207 if (tb[FRA_FWMARK] && (r->fwmark != nla_get_u32(tb[FRA_FWMARK])))
208 return 0;
209
210 if (tb[FRA_FWMASK] && (r->fwmask != nla_get_u32(tb[FRA_FWMASK])))
211 return 0;
228#endif 212#endif
229 (r->r_ifindex && r->r_ifindex != flp->iif))
230 continue;
231
232 switch(r->r_action) {
233 case RTN_UNICAST:
234 case RTN_NAT:
235 policy = r;
236 break;
237 case RTN_UNREACHABLE:
238 rcu_read_unlock();
239 return -ENETUNREACH;
240 default:
241 case RTN_BLACKHOLE:
242 rcu_read_unlock();
243 return -EINVAL;
244 case RTN_PROHIBIT:
245 rcu_read_unlock();
246 return -EACCES;
247 }
248 213
249 if ((tb = dn_fib_get_table(r->r_table, 0)) == NULL) 214 if (tb[FRA_SRC] && (r->src != nla_get_u16(tb[FRA_SRC])))
250 continue; 215 return 0;
251 err = tb->lookup(tb, flp, res); 216
252 if (err == 0) { 217 if (tb[FRA_DST] && (r->dst != nla_get_u16(tb[FRA_DST])))
253 res->r = policy; 218 return 0;
254 if (policy)
255 atomic_inc(&policy->r_clntref);
256 rcu_read_unlock();
257 return 0;
258 }
259 if (err < 0 && err != -EAGAIN) {
260 rcu_read_unlock();
261 return err;
262 }
263 }
264 219
265 rcu_read_unlock(); 220 return 1;
266 return -ESRCH;
267} 221}
268 222
269unsigned dnet_addr_type(__le16 addr) 223unsigned dnet_addr_type(__le16 addr)
@@ -271,7 +225,7 @@ unsigned dnet_addr_type(__le16 addr)
271 struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } }; 225 struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } };
272 struct dn_fib_res res; 226 struct dn_fib_res res;
273 unsigned ret = RTN_UNICAST; 227 unsigned ret = RTN_UNICAST;
274 struct dn_fib_table *tb = dn_fib_tables[RT_TABLE_LOCAL]; 228 struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0);
275 229
276 res.r = NULL; 230 res.r = NULL;
277 231
@@ -284,142 +238,79 @@ unsigned dnet_addr_type(__le16 addr)
284 return ret; 238 return ret;
285} 239}
286 240
287__le16 dn_fib_rules_policy(__le16 saddr, struct dn_fib_res *res, unsigned *flags) 241static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
242 struct nlmsghdr *nlh, struct fib_rule_hdr *frh)
288{ 243{
289 struct dn_fib_rule *r = res->r; 244 struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
290 245
291 if (r->r_action == RTN_NAT) { 246 frh->family = AF_DECnet;
292 int addrtype = dnet_addr_type(r->r_srcmap); 247 frh->dst_len = r->dst_len;
248 frh->src_len = r->src_len;
249 frh->tos = 0;
293 250
294 if (addrtype == RTN_NAT) { 251#ifdef CONFIG_DECNET_ROUTE_FWMARK
295 saddr = (saddr&~r->r_srcmask)|r->r_srcmap; 252 if (r->fwmark)
296 *flags |= RTCF_SNAT; 253 NLA_PUT_U32(skb, FRA_FWMARK, r->fwmark);
297 } else if (addrtype == RTN_LOCAL || r->r_srcmap == 0) { 254 if (r->fwmask || r->fwmark)
298 saddr = r->r_srcmap; 255 NLA_PUT_U32(skb, FRA_FWMASK, r->fwmask);
299 *flags |= RTCF_MASQ; 256#endif
300 } 257 if (r->dst_len)
301 } 258 NLA_PUT_U16(skb, FRA_DST, r->dst);
302 return saddr; 259 if (r->src_len)
303} 260 NLA_PUT_U16(skb, FRA_SRC, r->src);
304
305static void dn_fib_rules_detach(struct net_device *dev)
306{
307 struct hlist_node *node;
308 struct dn_fib_rule *r;
309
310 hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
311 if (r->r_ifindex == dev->ifindex)
312 r->r_ifindex = -1;
313 }
314}
315 261
316static void dn_fib_rules_attach(struct net_device *dev) 262 return 0;
317{
318 struct hlist_node *node;
319 struct dn_fib_rule *r;
320 263
321 hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { 264nla_put_failure:
322 if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) 265 return -ENOBUFS;
323 r->r_ifindex = dev->ifindex;
324 }
325} 266}
326 267
327static int dn_fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr) 268static u32 dn_fib_rule_default_pref(void)
328{ 269{
329 struct net_device *dev = ptr; 270 struct list_head *pos;
330 271 struct fib_rule *rule;
331 switch(event) { 272
332 case NETDEV_UNREGISTER: 273 if (!list_empty(&dn_fib_rules)) {
333 dn_fib_rules_detach(dev); 274 pos = dn_fib_rules.next;
334 dn_fib_sync_down(0, dev, 1); 275 if (pos->next != &dn_fib_rules) {
335 case NETDEV_REGISTER: 276 rule = list_entry(pos->next, struct fib_rule, list);
336 dn_fib_rules_attach(dev); 277 if (rule->pref)
337 dn_fib_sync_up(dev); 278 return rule->pref - 1;
279 }
338 } 280 }
339 281
340 return NOTIFY_DONE; 282 return 0;
341}
342
343
344static struct notifier_block dn_fib_rules_notifier = {
345 .notifier_call = dn_fib_rules_event,
346};
347
348static int dn_fib_fill_rule(struct sk_buff *skb, struct dn_fib_rule *r,
349 struct netlink_callback *cb, unsigned int flags)
350{
351 struct rtmsg *rtm;
352 struct nlmsghdr *nlh;
353 unsigned char *b = skb->tail;
354
355
356 nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWRULE, sizeof(*rtm), flags);
357 rtm = NLMSG_DATA(nlh);
358 rtm->rtm_family = AF_DECnet;
359 rtm->rtm_dst_len = r->r_dst_len;
360 rtm->rtm_src_len = r->r_src_len;
361 rtm->rtm_tos = 0;
362#ifdef CONFIG_DECNET_ROUTE_FWMARK
363 if (r->r_fwmark)
364 RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark);
365#endif
366 rtm->rtm_table = r->r_table;
367 rtm->rtm_protocol = 0;
368 rtm->rtm_scope = 0;
369 rtm->rtm_type = r->r_action;
370 rtm->rtm_flags = r->r_flags;
371
372 if (r->r_dst_len)
373 RTA_PUT(skb, RTA_DST, 2, &r->r_dst);
374 if (r->r_src_len)
375 RTA_PUT(skb, RTA_SRC, 2, &r->r_src);
376 if (r->r_ifname[0])
377 RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname);
378 if (r->r_preference)
379 RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference);
380 if (r->r_srcmap)
381 RTA_PUT(skb, RTA_GATEWAY, 2, &r->r_srcmap);
382 nlh->nlmsg_len = skb->tail - b;
383 return skb->len;
384
385nlmsg_failure:
386rtattr_failure:
387 skb_trim(skb, b - skb->data);
388 return -1;
389} 283}
390 284
391int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) 285int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
392{ 286{
393 int idx = 0; 287 return fib_rules_dump(skb, cb, AF_DECnet);
394 int s_idx = cb->args[0];
395 struct dn_fib_rule *r;
396 struct hlist_node *node;
397
398 rcu_read_lock();
399 hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
400 if (idx < s_idx)
401 goto next;
402 if (dn_fib_fill_rule(skb, r, cb, NLM_F_MULTI) < 0)
403 break;
404next:
405 idx++;
406 }
407 rcu_read_unlock();
408 cb->args[0] = idx;
409
410 return skb->len;
411} 288}
412 289
290static struct fib_rules_ops dn_fib_rules_ops = {
291 .family = AF_DECnet,
292 .rule_size = sizeof(struct dn_fib_rule),
293 .action = dn_fib_rule_action,
294 .match = dn_fib_rule_match,
295 .configure = dn_fib_rule_configure,
296 .compare = dn_fib_rule_compare,
297 .fill = dn_fib_rule_fill,
298 .default_pref = dn_fib_rule_default_pref,
299 .nlgroup = RTNLGRP_DECnet_RULE,
300 .policy = dn_fib_rule_policy,
301 .rules_list = &dn_fib_rules,
302 .owner = THIS_MODULE,
303};
304
413void __init dn_fib_rules_init(void) 305void __init dn_fib_rules_init(void)
414{ 306{
415 INIT_HLIST_HEAD(&dn_fib_rules); 307 list_add_tail(&default_rule.common.list, &dn_fib_rules);
416 hlist_add_head(&default_rule.r_hlist, &dn_fib_rules); 308 fib_rules_register(&dn_fib_rules_ops);
417 register_netdevice_notifier(&dn_fib_rules_notifier);
418} 309}
419 310
420void __exit dn_fib_rules_cleanup(void) 311void __exit dn_fib_rules_cleanup(void)
421{ 312{
422 unregister_netdevice_notifier(&dn_fib_rules_notifier); 313 fib_rules_unregister(&dn_fib_rules_ops);
423} 314}
424 315
425 316
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index e926c952e363..317904bb5896 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -30,6 +30,7 @@
30#include <net/neighbour.h> 30#include <net/neighbour.h>
31#include <net/dst.h> 31#include <net/dst.h>
32#include <net/flow.h> 32#include <net/flow.h>
33#include <net/fib_rules.h>
33#include <net/dn.h> 34#include <net/dn.h>
34#include <net/dn_route.h> 35#include <net/dn_route.h>
35#include <net/dn_fib.h> 36#include <net/dn_fib.h>
@@ -74,9 +75,9 @@ for( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next)
74for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next) 75for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next)
75 76
76#define RT_TABLE_MIN 1 77#define RT_TABLE_MIN 1
77 78#define DN_FIB_TABLE_HASHSZ 256
79static struct hlist_head dn_fib_table_hash[DN_FIB_TABLE_HASHSZ];
78static DEFINE_RWLOCK(dn_fib_tables_lock); 80static DEFINE_RWLOCK(dn_fib_tables_lock);
79struct dn_fib_table *dn_fib_tables[RT_TABLE_MAX + 1];
80 81
81static kmem_cache_t *dn_hash_kmem __read_mostly; 82static kmem_cache_t *dn_hash_kmem __read_mostly;
82static int dn_fib_hash_zombies; 83static int dn_fib_hash_zombies;
@@ -263,7 +264,7 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern
263} 264}
264 265
265static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 266static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
266 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, 267 u32 tb_id, u8 type, u8 scope, void *dst, int dst_len,
267 struct dn_fib_info *fi, unsigned int flags) 268 struct dn_fib_info *fi, unsigned int flags)
268{ 269{
269 struct rtmsg *rtm; 270 struct rtmsg *rtm;
@@ -277,6 +278,7 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
277 rtm->rtm_src_len = 0; 278 rtm->rtm_src_len = 0;
278 rtm->rtm_tos = 0; 279 rtm->rtm_tos = 0;
279 rtm->rtm_table = tb_id; 280 rtm->rtm_table = tb_id;
281 RTA_PUT_U32(skb, RTA_TABLE, tb_id);
280 rtm->rtm_flags = fi->fib_flags; 282 rtm->rtm_flags = fi->fib_flags;
281 rtm->rtm_scope = scope; 283 rtm->rtm_scope = scope;
282 rtm->rtm_type = type; 284 rtm->rtm_type = type;
@@ -326,29 +328,29 @@ rtattr_failure:
326} 328}
327 329
328 330
329static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, int tb_id, 331static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id,
330 struct nlmsghdr *nlh, struct netlink_skb_parms *req) 332 struct nlmsghdr *nlh, struct netlink_skb_parms *req)
331{ 333{
332 struct sk_buff *skb; 334 struct sk_buff *skb;
333 u32 pid = req ? req->pid : 0; 335 u32 pid = req ? req->pid : 0;
334 int size = NLMSG_SPACE(sizeof(struct rtmsg) + 256); 336 int err = -ENOBUFS;
335 337
336 skb = alloc_skb(size, GFP_KERNEL); 338 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
337 if (!skb) 339 if (skb == NULL)
338 return; 340 goto errout;
339 341
340 if (dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, 342 err = dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id,
341 f->fn_type, f->fn_scope, &f->fn_key, z, 343 f->fn_type, f->fn_scope, &f->fn_key, z,
342 DN_FIB_INFO(f), 0) < 0) { 344 DN_FIB_INFO(f), 0);
345 if (err < 0) {
343 kfree_skb(skb); 346 kfree_skb(skb);
344 return; 347 goto errout;
345 } 348 }
346 NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_ROUTE; 349
347 if (nlh->nlmsg_flags & NLM_F_ECHO) 350 err = rtnl_notify(skb, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
348 atomic_inc(&skb->users); 351errout:
349 netlink_broadcast(rtnl, skb, pid, RTNLGRP_DECnet_ROUTE, GFP_KERNEL); 352 if (err < 0)
350 if (nlh->nlmsg_flags & NLM_F_ECHO) 353 rtnl_set_sk_err(RTNLGRP_DECnet_ROUTE, err);
351 netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
352} 354}
353 355
354static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, 356static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
@@ -359,7 +361,7 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
359{ 361{
360 int i, s_i; 362 int i, s_i;
361 363
362 s_i = cb->args[3]; 364 s_i = cb->args[4];
363 for(i = 0; f; i++, f = f->fn_next) { 365 for(i = 0; f; i++, f = f->fn_next) {
364 if (i < s_i) 366 if (i < s_i)
365 continue; 367 continue;
@@ -372,11 +374,11 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
372 (f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type, 374 (f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type,
373 f->fn_scope, &f->fn_key, dz->dz_order, 375 f->fn_scope, &f->fn_key, dz->dz_order,
374 f->fn_info, NLM_F_MULTI) < 0) { 376 f->fn_info, NLM_F_MULTI) < 0) {
375 cb->args[3] = i; 377 cb->args[4] = i;
376 return -1; 378 return -1;
377 } 379 }
378 } 380 }
379 cb->args[3] = i; 381 cb->args[4] = i;
380 return skb->len; 382 return skb->len;
381} 383}
382 384
@@ -387,20 +389,20 @@ static __inline__ int dn_hash_dump_zone(struct sk_buff *skb,
387{ 389{
388 int h, s_h; 390 int h, s_h;
389 391
390 s_h = cb->args[2]; 392 s_h = cb->args[3];
391 for(h = 0; h < dz->dz_divisor; h++) { 393 for(h = 0; h < dz->dz_divisor; h++) {
392 if (h < s_h) 394 if (h < s_h)
393 continue; 395 continue;
394 if (h > s_h) 396 if (h > s_h)
395 memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0])); 397 memset(&cb->args[4], 0, sizeof(cb->args) - 4*sizeof(cb->args[0]));
396 if (dz->dz_hash == NULL || dz->dz_hash[h] == NULL) 398 if (dz->dz_hash == NULL || dz->dz_hash[h] == NULL)
397 continue; 399 continue;
398 if (dn_hash_dump_bucket(skb, cb, tb, dz, dz->dz_hash[h]) < 0) { 400 if (dn_hash_dump_bucket(skb, cb, tb, dz, dz->dz_hash[h]) < 0) {
399 cb->args[2] = h; 401 cb->args[3] = h;
400 return -1; 402 return -1;
401 } 403 }
402 } 404 }
403 cb->args[2] = h; 405 cb->args[3] = h;
404 return skb->len; 406 return skb->len;
405} 407}
406 408
@@ -411,26 +413,63 @@ static int dn_fib_table_dump(struct dn_fib_table *tb, struct sk_buff *skb,
411 struct dn_zone *dz; 413 struct dn_zone *dz;
412 struct dn_hash *table = (struct dn_hash *)tb->data; 414 struct dn_hash *table = (struct dn_hash *)tb->data;
413 415
414 s_m = cb->args[1]; 416 s_m = cb->args[2];
415 read_lock(&dn_fib_tables_lock); 417 read_lock(&dn_fib_tables_lock);
416 for(dz = table->dh_zone_list, m = 0; dz; dz = dz->dz_next, m++) { 418 for(dz = table->dh_zone_list, m = 0; dz; dz = dz->dz_next, m++) {
417 if (m < s_m) 419 if (m < s_m)
418 continue; 420 continue;
419 if (m > s_m) 421 if (m > s_m)
420 memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(cb->args[0])); 422 memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0]));
421 423
422 if (dn_hash_dump_zone(skb, cb, tb, dz) < 0) { 424 if (dn_hash_dump_zone(skb, cb, tb, dz) < 0) {
423 cb->args[1] = m; 425 cb->args[2] = m;
424 read_unlock(&dn_fib_tables_lock); 426 read_unlock(&dn_fib_tables_lock);
425 return -1; 427 return -1;
426 } 428 }
427 } 429 }
428 read_unlock(&dn_fib_tables_lock); 430 read_unlock(&dn_fib_tables_lock);
429 cb->args[1] = m; 431 cb->args[2] = m;
430 432
431 return skb->len; 433 return skb->len;
432} 434}
433 435
436int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
437{
438 unsigned int h, s_h;
439 unsigned int e = 0, s_e;
440 struct dn_fib_table *tb;
441 struct hlist_node *node;
442 int dumped = 0;
443
444 if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
445 ((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
446 return dn_cache_dump(skb, cb);
447
448 s_h = cb->args[0];
449 s_e = cb->args[1];
450
451 for (h = s_h; h < DN_FIB_TABLE_HASHSZ; h++, s_h = 0) {
452 e = 0;
453 hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist) {
454 if (e < s_e)
455 goto next;
456 if (dumped)
457 memset(&cb->args[2], 0, sizeof(cb->args) -
458 2 * sizeof(cb->args[0]));
459 if (tb->dump(tb, skb, cb) < 0)
460 goto out;
461 dumped = 1;
462next:
463 e++;
464 }
465 }
466out:
467 cb->args[1] = e;
468 cb->args[0] = h;
469
470 return skb->len;
471}
472
434static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req) 473static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req)
435{ 474{
436 struct dn_hash *table = (struct dn_hash *)tb->data; 475 struct dn_hash *table = (struct dn_hash *)tb->data;
@@ -739,9 +778,11 @@ out:
739} 778}
740 779
741 780
742struct dn_fib_table *dn_fib_get_table(int n, int create) 781struct dn_fib_table *dn_fib_get_table(u32 n, int create)
743{ 782{
744 struct dn_fib_table *t; 783 struct dn_fib_table *t;
784 struct hlist_node *node;
785 unsigned int h;
745 786
746 if (n < RT_TABLE_MIN) 787 if (n < RT_TABLE_MIN)
747 return NULL; 788 return NULL;
@@ -749,8 +790,15 @@ struct dn_fib_table *dn_fib_get_table(int n, int create)
749 if (n > RT_TABLE_MAX) 790 if (n > RT_TABLE_MAX)
750 return NULL; 791 return NULL;
751 792
752 if (dn_fib_tables[n]) 793 h = n & (DN_FIB_TABLE_HASHSZ - 1);
753 return dn_fib_tables[n]; 794 rcu_read_lock();
795 hlist_for_each_entry_rcu(t, node, &dn_fib_table_hash[h], hlist) {
796 if (t->n == n) {
797 rcu_read_unlock();
798 return t;
799 }
800 }
801 rcu_read_unlock();
754 802
755 if (!create) 803 if (!create)
756 return NULL; 804 return NULL;
@@ -771,33 +819,37 @@ struct dn_fib_table *dn_fib_get_table(int n, int create)
771 t->flush = dn_fib_table_flush; 819 t->flush = dn_fib_table_flush;
772 t->dump = dn_fib_table_dump; 820 t->dump = dn_fib_table_dump;
773 memset(t->data, 0, sizeof(struct dn_hash)); 821 memset(t->data, 0, sizeof(struct dn_hash));
774 dn_fib_tables[n] = t; 822 hlist_add_head_rcu(&t->hlist, &dn_fib_table_hash[h]);
775 823
776 return t; 824 return t;
777} 825}
778 826
779static void dn_fib_del_tree(int n)
780{
781 struct dn_fib_table *t;
782
783 write_lock(&dn_fib_tables_lock);
784 t = dn_fib_tables[n];
785 dn_fib_tables[n] = NULL;
786 write_unlock(&dn_fib_tables_lock);
787
788 kfree(t);
789}
790
791struct dn_fib_table *dn_fib_empty_table(void) 827struct dn_fib_table *dn_fib_empty_table(void)
792{ 828{
793 int id; 829 u32 id;
794 830
795 for(id = RT_TABLE_MIN; id <= RT_TABLE_MAX; id++) 831 for(id = RT_TABLE_MIN; id <= RT_TABLE_MAX; id++)
796 if (dn_fib_tables[id] == NULL) 832 if (dn_fib_get_table(id, 0) == NULL)
797 return dn_fib_get_table(id, 1); 833 return dn_fib_get_table(id, 1);
798 return NULL; 834 return NULL;
799} 835}
800 836
837void dn_fib_flush(void)
838{
839 int flushed = 0;
840 struct dn_fib_table *tb;
841 struct hlist_node *node;
842 unsigned int h;
843
844 for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) {
845 hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist)
846 flushed += tb->flush(tb);
847 }
848
849 if (flushed)
850 dn_rt_cache_flush(-1);
851}
852
801void __init dn_fib_table_init(void) 853void __init dn_fib_table_init(void)
802{ 854{
803 dn_hash_kmem = kmem_cache_create("dn_fib_info_cache", 855 dn_hash_kmem = kmem_cache_create("dn_fib_info_cache",
@@ -808,10 +860,17 @@ void __init dn_fib_table_init(void)
808 860
809void __exit dn_fib_table_cleanup(void) 861void __exit dn_fib_table_cleanup(void)
810{ 862{
811 int i; 863 struct dn_fib_table *t;
812 864 struct hlist_node *node, *next;
813 for (i = RT_TABLE_MIN; i <= RT_TABLE_MAX; ++i) 865 unsigned int h;
814 dn_fib_del_tree(i);
815 866
816 return; 867 write_lock(&dn_fib_tables_lock);
868 for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) {
869 hlist_for_each_entry_safe(t, node, next, &dn_fib_table_hash[h],
870 hlist) {
871 hlist_del(&t->hlist);
872 kfree(t);
873 }
874 }
875 write_unlock(&dn_fib_tables_lock);
817} 876}
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 387c71c584ee..43863933f27f 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -64,81 +64,79 @@
64 64
65__setup("ether=", netdev_boot_setup); 65__setup("ether=", netdev_boot_setup);
66 66
67/* 67/**
68 * Create the Ethernet MAC header for an arbitrary protocol layer 68 * eth_header - create the Ethernet header
69 * @skb: buffer to alter
70 * @dev: source device
71 * @type: Ethernet type field
72 * @daddr: destination address (NULL leave destination address)
73 * @saddr: source address (NULL use device source address)
74 * @len: packet length (<= skb->len)
69 * 75 *
70 * saddr=NULL means use device source address 76 *
71 * daddr=NULL means leave destination address (eg unresolved arp) 77 * Set the protocol type. For a packet of type ETH_P_802_3 we put the length
78 * in here instead. It is up to the 802.2 layer to carry protocol information.
72 */ 79 */
73
74int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, 80int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
75 void *daddr, void *saddr, unsigned len) 81 void *daddr, void *saddr, unsigned len)
76{ 82{
77 struct ethhdr *eth = (struct ethhdr *)skb_push(skb,ETH_HLEN); 83 struct ethhdr *eth = (struct ethhdr *)skb_push(skb, ETH_HLEN);
78 84
79 /* 85 if (type != ETH_P_802_3)
80 * Set the protocol type. For a packet of type ETH_P_802_3 we put the length
81 * in here instead. It is up to the 802.2 layer to carry protocol information.
82 */
83
84 if(type!=ETH_P_802_3)
85 eth->h_proto = htons(type); 86 eth->h_proto = htons(type);
86 else 87 else
87 eth->h_proto = htons(len); 88 eth->h_proto = htons(len);
88 89
89 /* 90 /*
90 * Set the source hardware address. 91 * Set the source hardware address.
91 */ 92 */
92 93
93 if(!saddr) 94 if (!saddr)
94 saddr = dev->dev_addr; 95 saddr = dev->dev_addr;
95 memcpy(eth->h_source,saddr,dev->addr_len); 96 memcpy(eth->h_source, saddr, dev->addr_len);
96 97
97 if(daddr) 98 if (daddr) {
98 { 99 memcpy(eth->h_dest, daddr, dev->addr_len);
99 memcpy(eth->h_dest,daddr,dev->addr_len);
100 return ETH_HLEN; 100 return ETH_HLEN;
101 } 101 }
102 102
103 /* 103 /*
104 * Anyway, the loopback-device should never use this function... 104 * Anyway, the loopback-device should never use this function...
105 */ 105 */
106 106
107 if (dev->flags & (IFF_LOOPBACK|IFF_NOARP)) 107 if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) {
108 {
109 memset(eth->h_dest, 0, dev->addr_len); 108 memset(eth->h_dest, 0, dev->addr_len);
110 return ETH_HLEN; 109 return ETH_HLEN;
111 } 110 }
112 111
113 return -ETH_HLEN; 112 return -ETH_HLEN;
114} 113}
115 114
116 115/**
117/* 116 * eth_rebuild_header- rebuild the Ethernet MAC header.
118 * Rebuild the Ethernet MAC header. This is called after an ARP 117 * @skb: socket buffer to update
119 * (or in future other address resolution) has completed on this
120 * sk_buff. We now let ARP fill in the other fields.
121 * 118 *
122 * This routine CANNOT use cached dst->neigh! 119 * This is called after an ARP or IPV6 ndisc it's resolution on this
123 * Really, it is used only when dst->neigh is wrong. 120 * sk_buff. We now let protocol (ARP) fill in the other fields.
121 *
122 * This routine CANNOT use cached dst->neigh!
123 * Really, it is used only when dst->neigh is wrong.
124 */ 124 */
125
126int eth_rebuild_header(struct sk_buff *skb) 125int eth_rebuild_header(struct sk_buff *skb)
127{ 126{
128 struct ethhdr *eth = (struct ethhdr *)skb->data; 127 struct ethhdr *eth = (struct ethhdr *)skb->data;
129 struct net_device *dev = skb->dev; 128 struct net_device *dev = skb->dev;
130 129
131 switch (eth->h_proto) 130 switch (eth->h_proto) {
132 {
133#ifdef CONFIG_INET 131#ifdef CONFIG_INET
134 case __constant_htons(ETH_P_IP): 132 case __constant_htons(ETH_P_IP):
135 return arp_find(eth->h_dest, skb); 133 return arp_find(eth->h_dest, skb);
136#endif 134#endif
137 default: 135 default:
138 printk(KERN_DEBUG 136 printk(KERN_DEBUG
139 "%s: unable to resolve type %X addresses.\n", 137 "%s: unable to resolve type %X addresses.\n",
140 dev->name, (int)eth->h_proto); 138 dev->name, (int)eth->h_proto);
141 139
142 memcpy(eth->h_source, dev->dev_addr, dev->addr_len); 140 memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
143 break; 141 break;
144 } 142 }
@@ -146,62 +144,70 @@ int eth_rebuild_header(struct sk_buff *skb)
146 return 0; 144 return 0;
147} 145}
148 146
149 147/**
150/* 148 * eth_type_trans - determine the packet's protocol ID.
151 * Determine the packet's protocol ID. The rule here is that we 149 * @skb: received socket data
152 * assume 802.3 if the type field is short enough to be a length. 150 * @dev: receiving network device
153 * This is normal practice and works for any 'now in use' protocol. 151 *
152 * The rule here is that we
153 * assume 802.3 if the type field is short enough to be a length.
154 * This is normal practice and works for any 'now in use' protocol.
154 */ 155 */
155
156__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) 156__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
157{ 157{
158 struct ethhdr *eth; 158 struct ethhdr *eth;
159 unsigned char *rawp; 159 unsigned char *rawp;
160 160
161 skb->mac.raw = skb->data; 161 skb->mac.raw = skb->data;
162 skb_pull(skb,ETH_HLEN); 162 skb_pull(skb, ETH_HLEN);
163 eth = eth_hdr(skb); 163 eth = eth_hdr(skb);
164 164
165 if (is_multicast_ether_addr(eth->h_dest)) { 165 if (is_multicast_ether_addr(eth->h_dest)) {
166 if (!compare_ether_addr(eth->h_dest, dev->broadcast)) 166 if (!compare_ether_addr(eth->h_dest, dev->broadcast))
167 skb->pkt_type = PACKET_BROADCAST; 167 skb->pkt_type = PACKET_BROADCAST;
168 else 168 else
169 skb->pkt_type = PACKET_MULTICAST; 169 skb->pkt_type = PACKET_MULTICAST;
170 } 170 }
171 171
172 /* 172 /*
173 * This ALLMULTI check should be redundant by 1.4 173 * This ALLMULTI check should be redundant by 1.4
174 * so don't forget to remove it. 174 * so don't forget to remove it.
175 * 175 *
176 * Seems, you forgot to remove it. All silly devices 176 * Seems, you forgot to remove it. All silly devices
177 * seems to set IFF_PROMISC. 177 * seems to set IFF_PROMISC.
178 */ 178 */
179 179
180 else if(1 /*dev->flags&IFF_PROMISC*/) { 180 else if (1 /*dev->flags&IFF_PROMISC */ ) {
181 if (unlikely(compare_ether_addr(eth->h_dest, dev->dev_addr))) 181 if (unlikely(compare_ether_addr(eth->h_dest, dev->dev_addr)))
182 skb->pkt_type = PACKET_OTHERHOST; 182 skb->pkt_type = PACKET_OTHERHOST;
183 } 183 }
184 184
185 if (ntohs(eth->h_proto) >= 1536) 185 if (ntohs(eth->h_proto) >= 1536)
186 return eth->h_proto; 186 return eth->h_proto;
187 187
188 rawp = skb->data; 188 rawp = skb->data;
189 189
190 /* 190 /*
191 * This is a magic hack to spot IPX packets. Older Novell breaks 191 * This is a magic hack to spot IPX packets. Older Novell breaks
192 * the protocol design and runs IPX over 802.3 without an 802.2 LLC 192 * the protocol design and runs IPX over 802.3 without an 802.2 LLC
193 * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This 193 * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
194 * won't work for fault tolerant netware but does for the rest. 194 * won't work for fault tolerant netware but does for the rest.
195 */ 195 */
196 if (*(unsigned short *)rawp == 0xFFFF) 196 if (*(unsigned short *)rawp == 0xFFFF)
197 return htons(ETH_P_802_3); 197 return htons(ETH_P_802_3);
198 198
199 /* 199 /*
200 * Real 802.2 LLC 200 * Real 802.2 LLC
201 */ 201 */
202 return htons(ETH_P_802_2); 202 return htons(ETH_P_802_2);
203} 203}
204EXPORT_SYMBOL(eth_type_trans);
204 205
206/**
207 * eth_header_parse - extract hardware address from packet
208 * @skb: packet to extract header from
209 * @haddr: destination buffer
210 */
205static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr) 211static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr)
206{ 212{
207 struct ethhdr *eth = eth_hdr(skb); 213 struct ethhdr *eth = eth_hdr(skb);
@@ -209,14 +215,20 @@ static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr)
209 return ETH_ALEN; 215 return ETH_ALEN;
210} 216}
211 217
218/**
219 * eth_header_cache - fill cache entry from neighbour
220 * @neigh: source neighbour
221 * @hh: destination cache entry
222 * Create an Ethernet header template from the neighbour.
223 */
212int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh) 224int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh)
213{ 225{
214 unsigned short type = hh->hh_type; 226 unsigned short type = hh->hh_type;
215 struct ethhdr *eth; 227 struct ethhdr *eth;
216 struct net_device *dev = neigh->dev; 228 struct net_device *dev = neigh->dev;
217 229
218 eth = (struct ethhdr*) 230 eth = (struct ethhdr *)
219 (((u8*)hh->hh_data) + (HH_DATA_OFF(sizeof(*eth)))); 231 (((u8 *) hh->hh_data) + (HH_DATA_OFF(sizeof(*eth))));
220 232
221 if (type == __constant_htons(ETH_P_802_3)) 233 if (type == __constant_htons(ETH_P_802_3))
222 return -1; 234 return -1;
@@ -228,27 +240,47 @@ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh)
228 return 0; 240 return 0;
229} 241}
230 242
231/* 243/**
244 * eth_header_cache_update - update cache entry
245 * @hh: destination cache entry
246 * @dev: network device
247 * @haddr: new hardware address
248 *
232 * Called by Address Resolution module to notify changes in address. 249 * Called by Address Resolution module to notify changes in address.
233 */ 250 */
234 251void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev,
235void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, unsigned char * haddr) 252 unsigned char *haddr)
236{ 253{
237 memcpy(((u8*)hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)), 254 memcpy(((u8 *) hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)),
238 haddr, dev->addr_len); 255 haddr, dev->addr_len);
239} 256}
240 257
241EXPORT_SYMBOL(eth_type_trans); 258/**
242 259 * eth_mac_addr - set new Ethernet hardware address
260 * @dev: network device
261 * @p: socket address
262 * Change hardware address of device.
263 *
264 * This doesn't change hardware matching, so needs to be overridden
265 * for most real devices.
266 */
243static int eth_mac_addr(struct net_device *dev, void *p) 267static int eth_mac_addr(struct net_device *dev, void *p)
244{ 268{
245 struct sockaddr *addr=p; 269 struct sockaddr *addr = p;
246 if (netif_running(dev)) 270 if (netif_running(dev))
247 return -EBUSY; 271 return -EBUSY;
248 memcpy(dev->dev_addr, addr->sa_data,dev->addr_len); 272 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
249 return 0; 273 return 0;
250} 274}
251 275
276/**
277 * eth_change_mtu - set new MTU size
278 * @dev: network device
279 * @new_mtu: new Maximum Transfer Unit
280 *
281 * Allow changing MTU size. Needs to be overridden for devices
282 * supporting jumbo frames.
283 */
252static int eth_change_mtu(struct net_device *dev, int new_mtu) 284static int eth_change_mtu(struct net_device *dev, int new_mtu)
253{ 285{
254 if (new_mtu < 68 || new_mtu > ETH_DATA_LEN) 286 if (new_mtu < 68 || new_mtu > ETH_DATA_LEN)
@@ -257,8 +289,10 @@ static int eth_change_mtu(struct net_device *dev, int new_mtu)
257 return 0; 289 return 0;
258} 290}
259 291
260/* 292/**
261 * Fill in the fields of the device structure with ethernet-generic values. 293 * ether_setup - setup Ethernet network device
294 * @dev: network device
295 * Fill in the fields of the device structure with Ethernet-generic values.
262 */ 296 */
263void ether_setup(struct net_device *dev) 297void ether_setup(struct net_device *dev)
264{ 298{
@@ -277,21 +311,21 @@ void ether_setup(struct net_device *dev)
277 dev->tx_queue_len = 1000; /* Ethernet wants good queues */ 311 dev->tx_queue_len = 1000; /* Ethernet wants good queues */
278 dev->flags = IFF_BROADCAST|IFF_MULTICAST; 312 dev->flags = IFF_BROADCAST|IFF_MULTICAST;
279 313
280 memset(dev->broadcast,0xFF, ETH_ALEN); 314 memset(dev->broadcast, 0xFF, ETH_ALEN);
281 315
282} 316}
283EXPORT_SYMBOL(ether_setup); 317EXPORT_SYMBOL(ether_setup);
284 318
285/** 319/**
286 * alloc_etherdev - Allocates and sets up an ethernet device 320 * alloc_etherdev - Allocates and sets up an Ethernet device
287 * @sizeof_priv: Size of additional driver-private structure to be allocated 321 * @sizeof_priv: Size of additional driver-private structure to be allocated
288 * for this ethernet device 322 * for this Ethernet device
289 * 323 *
290 * Fill in the fields of the device structure with ethernet-generic 324 * Fill in the fields of the device structure with Ethernet-generic
291 * values. Basically does everything except registering the device. 325 * values. Basically does everything except registering the device.
292 * 326 *
293 * Constructs a new net device, complete with a private data area of 327 * Constructs a new net device, complete with a private data area of
294 * size @sizeof_priv. A 32-byte (not bit) alignment is enforced for 328 * size (sizeof_priv). A 32-byte (not bit) alignment is enforced for
295 * this private data area. 329 * this private data area.
296 */ 330 */
297 331
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 3b5d504a74be..1650b64415aa 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -88,6 +88,7 @@ config IP_FIB_HASH
88config IP_MULTIPLE_TABLES 88config IP_MULTIPLE_TABLES
89 bool "IP: policy routing" 89 bool "IP: policy routing"
90 depends on IP_ADVANCED_ROUTER 90 depends on IP_ADVANCED_ROUTER
91 select FIB_RULES
91 ---help--- 92 ---help---
92 Normally, a router decides what to do with a received packet based 93 Normally, a router decides what to do with a received packet based
93 solely on the packet's final destination address. If you say Y here, 94 solely on the packet's final destination address. If you say Y here,
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 4878fc5be85f..f66049e28aeb 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -47,6 +47,7 @@ obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o
47obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o 47obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
48obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o 48obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
49obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o 49obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
50obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
50 51
51obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ 52obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
52 xfrm4_output.o 53 xfrm4_output.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c84a32070f8d..fdd89e37b9aa 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -67,7 +67,6 @@
67 * 2 of the License, or (at your option) any later version. 67 * 2 of the License, or (at your option) any later version.
68 */ 68 */
69 69
70#include <linux/config.h>
71#include <linux/err.h> 70#include <linux/err.h>
72#include <linux/errno.h> 71#include <linux/errno.h>
73#include <linux/types.h> 72#include <linux/types.h>
@@ -392,7 +391,7 @@ int inet_release(struct socket *sock)
392} 391}
393 392
394/* It is off by default, see below. */ 393/* It is off by default, see below. */
395int sysctl_ip_nonlocal_bind; 394int sysctl_ip_nonlocal_bind __read_mostly;
396 395
397int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 396int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
398{ 397{
@@ -988,7 +987,7 @@ void inet_unregister_protosw(struct inet_protosw *p)
988 * Shall we try to damage output packets if routing dev changes? 987 * Shall we try to damage output packets if routing dev changes?
989 */ 988 */
990 989
991int sysctl_ip_dynaddr; 990int sysctl_ip_dynaddr __read_mostly;
992 991
993static int inet_sk_reselect_saddr(struct sock *sk) 992static int inet_sk_reselect_saddr(struct sock *sk)
994{ 993{
@@ -1074,6 +1073,7 @@ int inet_sk_rebuild_header(struct sock *sk)
1074 }, 1073 },
1075 }; 1074 };
1076 1075
1076 security_sk_classify_flow(sk, &fl);
1077 err = ip_route_output_flow(&rt, &fl, sk, 0); 1077 err = ip_route_output_flow(&rt, &fl, sk, 0);
1078} 1078}
1079 if (!err) 1079 if (!err)
@@ -1254,10 +1254,7 @@ static int __init inet_init(void)
1254 struct list_head *r; 1254 struct list_head *r;
1255 int rc = -EINVAL; 1255 int rc = -EINVAL;
1256 1256
1257 if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)) { 1257 BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb));
1258 printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
1259 goto out;
1260 }
1261 1258
1262 rc = proto_register(&tcp_prot, 1); 1259 rc = proto_register(&tcp_prot, 1);
1263 if (rc) 1260 if (rc)
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 2b98943e6b02..99542977e47e 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -35,7 +35,7 @@ static int ip_clear_mutable_options(struct iphdr *iph, u32 *daddr)
35 switch (*optptr) { 35 switch (*optptr) {
36 case IPOPT_SEC: 36 case IPOPT_SEC:
37 case 0x85: /* Some "Extended Security" crap. */ 37 case 0x85: /* Some "Extended Security" crap. */
38 case 0x86: /* Another "Commercial Security" crap. */ 38 case IPOPT_CIPSO:
39 case IPOPT_RA: 39 case IPOPT_RA:
40 case 0x80|21: /* RFC1770 */ 40 case 0x80|21: /* RFC1770 */
41 break; 41 break;
@@ -265,7 +265,7 @@ static int ah_init_state(struct xfrm_state *x)
265 goto error; 265 goto error;
266 266
267 x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len); 267 x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len);
268 if (x->props.mode) 268 if (x->props.mode == XFRM_MODE_TUNNEL)
269 x->props.header_len += sizeof(struct iphdr); 269 x->props.header_len += sizeof(struct iphdr);
270 x->data = ahp; 270 x->data = ahp;
271 271
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
new file mode 100644
index 000000000000..80a2a0911b49
--- /dev/null
+++ b/net/ipv4/cipso_ipv4.c
@@ -0,0 +1,1607 @@
1/*
2 * CIPSO - Commercial IP Security Option
3 *
4 * This is an implementation of the CIPSO 2.2 protocol as specified in
5 * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in
6 * FIPS-188, copies of both documents can be found in the Documentation
7 * directory. While CIPSO never became a full IETF RFC standard many vendors
8 * have chosen to adopt the protocol and over the years it has become a
9 * de-facto standard for labeled networking.
10 *
11 * Author: Paul Moore <paul.moore@hp.com>
12 *
13 */
14
15/*
16 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
26 * the GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program; if not, write to the Free Software
30 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31 *
32 */
33
34#include <linux/init.h>
35#include <linux/types.h>
36#include <linux/rcupdate.h>
37#include <linux/list.h>
38#include <linux/spinlock.h>
39#include <linux/string.h>
40#include <linux/jhash.h>
41#include <net/ip.h>
42#include <net/icmp.h>
43#include <net/tcp.h>
44#include <net/netlabel.h>
45#include <net/cipso_ipv4.h>
46#include <asm/bug.h>
47
48struct cipso_v4_domhsh_entry {
49 char *domain;
50 u32 valid;
51 struct list_head list;
52 struct rcu_head rcu;
53};
54
55/* List of available DOI definitions */
56/* XXX - Updates should be minimal so having a single lock for the
57 * cipso_v4_doi_list and the cipso_v4_doi_list->dom_list should be
58 * okay. */
59/* XXX - This currently assumes a minimal number of different DOIs in use,
60 * if in practice there are a lot of different DOIs this list should
61 * probably be turned into a hash table or something similar so we
62 * can do quick lookups. */
63static DEFINE_SPINLOCK(cipso_v4_doi_list_lock);
64static struct list_head cipso_v4_doi_list = LIST_HEAD_INIT(cipso_v4_doi_list);
65
66/* Label mapping cache */
67int cipso_v4_cache_enabled = 1;
68int cipso_v4_cache_bucketsize = 10;
69#define CIPSO_V4_CACHE_BUCKETBITS 7
70#define CIPSO_V4_CACHE_BUCKETS (1 << CIPSO_V4_CACHE_BUCKETBITS)
71#define CIPSO_V4_CACHE_REORDERLIMIT 10
72struct cipso_v4_map_cache_bkt {
73 spinlock_t lock;
74 u32 size;
75 struct list_head list;
76};
77struct cipso_v4_map_cache_entry {
78 u32 hash;
79 unsigned char *key;
80 size_t key_len;
81
82 struct netlbl_lsm_cache lsm_data;
83
84 u32 activity;
85 struct list_head list;
86};
87static struct cipso_v4_map_cache_bkt *cipso_v4_cache = NULL;
88
89/* Restricted bitmap (tag #1) flags */
90int cipso_v4_rbm_optfmt = 0;
91int cipso_v4_rbm_strictvalid = 1;
92
93/*
94 * Helper Functions
95 */
96
97/**
98 * cipso_v4_bitmap_walk - Walk a bitmap looking for a bit
99 * @bitmap: the bitmap
100 * @bitmap_len: length in bits
101 * @offset: starting offset
102 * @state: if non-zero, look for a set (1) bit else look for a cleared (0) bit
103 *
104 * Description:
105 * Starting at @offset, walk the bitmap from left to right until either the
106 * desired bit is found or we reach the end. Return the bit offset, -1 if
107 * not found, or -2 if error.
108 */
109static int cipso_v4_bitmap_walk(const unsigned char *bitmap,
110 u32 bitmap_len,
111 u32 offset,
112 u8 state)
113{
114 u32 bit_spot;
115 u32 byte_offset;
116 unsigned char bitmask;
117 unsigned char byte;
118
119 /* gcc always rounds to zero when doing integer division */
120 byte_offset = offset / 8;
121 byte = bitmap[byte_offset];
122 bit_spot = offset;
123 bitmask = 0x80 >> (offset % 8);
124
125 while (bit_spot < bitmap_len) {
126 if ((state && (byte & bitmask) == bitmask) ||
127 (state == 0 && (byte & bitmask) == 0))
128 return bit_spot;
129
130 bit_spot++;
131 bitmask >>= 1;
132 if (bitmask == 0) {
133 byte = bitmap[++byte_offset];
134 bitmask = 0x80;
135 }
136 }
137
138 return -1;
139}
140
141/**
142 * cipso_v4_bitmap_setbit - Sets a single bit in a bitmap
143 * @bitmap: the bitmap
144 * @bit: the bit
145 * @state: if non-zero, set the bit (1) else clear the bit (0)
146 *
147 * Description:
148 * Set a single bit in the bitmask. Returns zero on success, negative values
149 * on error.
150 */
151static void cipso_v4_bitmap_setbit(unsigned char *bitmap,
152 u32 bit,
153 u8 state)
154{
155 u32 byte_spot;
156 u8 bitmask;
157
158 /* gcc always rounds to zero when doing integer division */
159 byte_spot = bit / 8;
160 bitmask = 0x80 >> (bit % 8);
161 if (state)
162 bitmap[byte_spot] |= bitmask;
163 else
164 bitmap[byte_spot] &= ~bitmask;
165}
166
167/**
168 * cipso_v4_doi_domhsh_free - Frees a domain list entry
169 * @entry: the entry's RCU field
170 *
171 * Description:
172 * This function is designed to be used as a callback to the call_rcu()
173 * function so that the memory allocated to a domain list entry can be released
174 * safely.
175 *
176 */
177static void cipso_v4_doi_domhsh_free(struct rcu_head *entry)
178{
179 struct cipso_v4_domhsh_entry *ptr;
180
181 ptr = container_of(entry, struct cipso_v4_domhsh_entry, rcu);
182 kfree(ptr->domain);
183 kfree(ptr);
184}
185
186/**
187 * cipso_v4_cache_entry_free - Frees a cache entry
188 * @entry: the entry to free
189 *
190 * Description:
191 * This function frees the memory associated with a cache entry.
192 *
193 */
194static void cipso_v4_cache_entry_free(struct cipso_v4_map_cache_entry *entry)
195{
196 if (entry->lsm_data.free)
197 entry->lsm_data.free(entry->lsm_data.data);
198 kfree(entry->key);
199 kfree(entry);
200}
201
202/**
203 * cipso_v4_map_cache_hash - Hashing function for the CIPSO cache
204 * @key: the hash key
205 * @key_len: the length of the key in bytes
206 *
207 * Description:
208 * The CIPSO tag hashing function. Returns a 32-bit hash value.
209 *
210 */
211static u32 cipso_v4_map_cache_hash(const unsigned char *key, u32 key_len)
212{
213 return jhash(key, key_len, 0);
214}
215
216/*
217 * Label Mapping Cache Functions
218 */
219
220/**
221 * cipso_v4_cache_init - Initialize the CIPSO cache
222 *
223 * Description:
224 * Initializes the CIPSO label mapping cache, this function should be called
225 * before any of the other functions defined in this file. Returns zero on
226 * success, negative values on error.
227 *
228 */
229static int cipso_v4_cache_init(void)
230{
231 u32 iter;
232
233 cipso_v4_cache = kcalloc(CIPSO_V4_CACHE_BUCKETS,
234 sizeof(struct cipso_v4_map_cache_bkt),
235 GFP_KERNEL);
236 if (cipso_v4_cache == NULL)
237 return -ENOMEM;
238
239 for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) {
240 spin_lock_init(&cipso_v4_cache[iter].lock);
241 cipso_v4_cache[iter].size = 0;
242 INIT_LIST_HEAD(&cipso_v4_cache[iter].list);
243 }
244
245 return 0;
246}
247
248/**
249 * cipso_v4_cache_invalidate - Invalidates the current CIPSO cache
250 *
251 * Description:
252 * Invalidates and frees any entries in the CIPSO cache. Returns zero on
253 * success and negative values on failure.
254 *
255 */
256void cipso_v4_cache_invalidate(void)
257{
258 struct cipso_v4_map_cache_entry *entry, *tmp_entry;
259 u32 iter;
260
261 for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) {
262 spin_lock(&cipso_v4_cache[iter].lock);
263 list_for_each_entry_safe(entry,
264 tmp_entry,
265 &cipso_v4_cache[iter].list, list) {
266 list_del(&entry->list);
267 cipso_v4_cache_entry_free(entry);
268 }
269 cipso_v4_cache[iter].size = 0;
270 spin_unlock(&cipso_v4_cache[iter].lock);
271 }
272
273 return;
274}
275
276/**
277 * cipso_v4_cache_check - Check the CIPSO cache for a label mapping
278 * @key: the buffer to check
279 * @key_len: buffer length in bytes
280 * @secattr: the security attribute struct to use
281 *
282 * Description:
283 * This function checks the cache to see if a label mapping already exists for
284 * the given key. If there is a match then the cache is adjusted and the
285 * @secattr struct is populated with the correct LSM security attributes. The
286 * cache is adjusted in the following manner if the entry is not already the
287 * first in the cache bucket:
288 *
289 * 1. The cache entry's activity counter is incremented
290 * 2. The previous (higher ranking) entry's activity counter is decremented
291 * 3. If the difference between the two activity counters is geater than
292 * CIPSO_V4_CACHE_REORDERLIMIT the two entries are swapped
293 *
294 * Returns zero on success, -ENOENT for a cache miss, and other negative values
295 * on error.
296 *
297 */
298static int cipso_v4_cache_check(const unsigned char *key,
299 u32 key_len,
300 struct netlbl_lsm_secattr *secattr)
301{
302 u32 bkt;
303 struct cipso_v4_map_cache_entry *entry;
304 struct cipso_v4_map_cache_entry *prev_entry = NULL;
305 u32 hash;
306
307 if (!cipso_v4_cache_enabled)
308 return -ENOENT;
309
310 hash = cipso_v4_map_cache_hash(key, key_len);
311 bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
312 spin_lock(&cipso_v4_cache[bkt].lock);
313 list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) {
314 if (entry->hash == hash &&
315 entry->key_len == key_len &&
316 memcmp(entry->key, key, key_len) == 0) {
317 entry->activity += 1;
318 secattr->cache.free = entry->lsm_data.free;
319 secattr->cache.data = entry->lsm_data.data;
320 if (prev_entry == NULL) {
321 spin_unlock(&cipso_v4_cache[bkt].lock);
322 return 0;
323 }
324
325 if (prev_entry->activity > 0)
326 prev_entry->activity -= 1;
327 if (entry->activity > prev_entry->activity &&
328 entry->activity - prev_entry->activity >
329 CIPSO_V4_CACHE_REORDERLIMIT) {
330 __list_del(entry->list.prev, entry->list.next);
331 __list_add(&entry->list,
332 prev_entry->list.prev,
333 &prev_entry->list);
334 }
335
336 spin_unlock(&cipso_v4_cache[bkt].lock);
337 return 0;
338 }
339 prev_entry = entry;
340 }
341 spin_unlock(&cipso_v4_cache[bkt].lock);
342
343 return -ENOENT;
344}
345
346/**
347 * cipso_v4_cache_add - Add an entry to the CIPSO cache
348 * @skb: the packet
349 * @secattr: the packet's security attributes
350 *
351 * Description:
352 * Add a new entry into the CIPSO label mapping cache. Add the new entry to
353 * head of the cache bucket's list, if the cache bucket is out of room remove
354 * the last entry in the list first. It is important to note that there is
355 * currently no checking for duplicate keys. Returns zero on success,
356 * negative values on failure.
357 *
358 */
359int cipso_v4_cache_add(const struct sk_buff *skb,
360 const struct netlbl_lsm_secattr *secattr)
361{
362 int ret_val = -EPERM;
363 u32 bkt;
364 struct cipso_v4_map_cache_entry *entry = NULL;
365 struct cipso_v4_map_cache_entry *old_entry = NULL;
366 unsigned char *cipso_ptr;
367 u32 cipso_ptr_len;
368
369 if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0)
370 return 0;
371
372 cipso_ptr = CIPSO_V4_OPTPTR(skb);
373 cipso_ptr_len = cipso_ptr[1];
374
375 entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
376 if (entry == NULL)
377 return -ENOMEM;
378 entry->key = kmalloc(cipso_ptr_len, GFP_ATOMIC);
379 if (entry->key == NULL) {
380 ret_val = -ENOMEM;
381 goto cache_add_failure;
382 }
383 memcpy(entry->key, cipso_ptr, cipso_ptr_len);
384 entry->key_len = cipso_ptr_len;
385 entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len);
386 entry->lsm_data.free = secattr->cache.free;
387 entry->lsm_data.data = secattr->cache.data;
388
389 bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
390 spin_lock(&cipso_v4_cache[bkt].lock);
391 if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) {
392 list_add(&entry->list, &cipso_v4_cache[bkt].list);
393 cipso_v4_cache[bkt].size += 1;
394 } else {
395 old_entry = list_entry(cipso_v4_cache[bkt].list.prev,
396 struct cipso_v4_map_cache_entry, list);
397 list_del(&old_entry->list);
398 list_add(&entry->list, &cipso_v4_cache[bkt].list);
399 cipso_v4_cache_entry_free(old_entry);
400 }
401 spin_unlock(&cipso_v4_cache[bkt].lock);
402
403 return 0;
404
405cache_add_failure:
406 if (entry)
407 cipso_v4_cache_entry_free(entry);
408 return ret_val;
409}
410
411/*
412 * DOI List Functions
413 */
414
415/**
416 * cipso_v4_doi_search - Searches for a DOI definition
417 * @doi: the DOI to search for
418 *
419 * Description:
420 * Search the DOI definition list for a DOI definition with a DOI value that
421 * matches @doi. The caller is responsibile for calling rcu_read_[un]lock().
422 * Returns a pointer to the DOI definition on success and NULL on failure.
423 */
424static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi)
425{
426 struct cipso_v4_doi *iter;
427
428 list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
429 if (iter->doi == doi && iter->valid)
430 return iter;
431 return NULL;
432}
433
434/**
435 * cipso_v4_doi_add - Add a new DOI to the CIPSO protocol engine
436 * @doi_def: the DOI structure
437 *
438 * Description:
439 * The caller defines a new DOI for use by the CIPSO engine and calls this
440 * function to add it to the list of acceptable domains. The caller must
441 * ensure that the mapping table specified in @doi_def->map meets all of the
442 * requirements of the mapping type (see cipso_ipv4.h for details). Returns
443 * zero on success and non-zero on failure.
444 *
445 */
446int cipso_v4_doi_add(struct cipso_v4_doi *doi_def)
447{
448 if (doi_def == NULL || doi_def->doi == CIPSO_V4_DOI_UNKNOWN)
449 return -EINVAL;
450
451 doi_def->valid = 1;
452 INIT_RCU_HEAD(&doi_def->rcu);
453 INIT_LIST_HEAD(&doi_def->dom_list);
454
455 rcu_read_lock();
456 if (cipso_v4_doi_search(doi_def->doi) != NULL)
457 goto doi_add_failure_rlock;
458 spin_lock(&cipso_v4_doi_list_lock);
459 if (cipso_v4_doi_search(doi_def->doi) != NULL)
460 goto doi_add_failure_slock;
461 list_add_tail_rcu(&doi_def->list, &cipso_v4_doi_list);
462 spin_unlock(&cipso_v4_doi_list_lock);
463 rcu_read_unlock();
464
465 return 0;
466
467doi_add_failure_slock:
468 spin_unlock(&cipso_v4_doi_list_lock);
469doi_add_failure_rlock:
470 rcu_read_unlock();
471 return -EEXIST;
472}
473
474/**
475 * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine
476 * @doi: the DOI value
477 * @callback: the DOI cleanup/free callback
478 *
479 * Description:
480 * Removes a DOI definition from the CIPSO engine, @callback is called to
481 * free any memory. The NetLabel routines will be called to release their own
482 * LSM domain mappings as well as our own domain list. Returns zero on
483 * success and negative values on failure.
484 *
485 */
486int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head))
487{
488 struct cipso_v4_doi *doi_def;
489 struct cipso_v4_domhsh_entry *dom_iter;
490
491 rcu_read_lock();
492 if (cipso_v4_doi_search(doi) != NULL) {
493 spin_lock(&cipso_v4_doi_list_lock);
494 doi_def = cipso_v4_doi_search(doi);
495 if (doi_def == NULL) {
496 spin_unlock(&cipso_v4_doi_list_lock);
497 rcu_read_unlock();
498 return -ENOENT;
499 }
500 doi_def->valid = 0;
501 list_del_rcu(&doi_def->list);
502 spin_unlock(&cipso_v4_doi_list_lock);
503 list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list)
504 if (dom_iter->valid)
505 netlbl_domhsh_remove(dom_iter->domain);
506 cipso_v4_cache_invalidate();
507 rcu_read_unlock();
508
509 call_rcu(&doi_def->rcu, callback);
510 return 0;
511 }
512 rcu_read_unlock();
513
514 return -ENOENT;
515}
516
517/**
518 * cipso_v4_doi_getdef - Returns a pointer to a valid DOI definition
519 * @doi: the DOI value
520 *
521 * Description:
522 * Searches for a valid DOI definition and if one is found it is returned to
523 * the caller. Otherwise NULL is returned. The caller must ensure that
524 * rcu_read_lock() is held while accessing the returned definition.
525 *
526 */
527struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi)
528{
529 return cipso_v4_doi_search(doi);
530}
531
532/**
533 * cipso_v4_doi_dump_all - Dump all the CIPSO DOI definitions into a sk_buff
534 * @headroom: the amount of headroom to allocate for the sk_buff
535 *
536 * Description:
537 * Dump a list of all the configured DOI values into a sk_buff. The returned
538 * sk_buff has room at the front of the sk_buff for @headroom bytes. See
539 * net/netlabel/netlabel_cipso_v4.h for the LISTALL message format. This
540 * function may fail if another process is changing the DOI list at the same
541 * time. Returns a pointer to a sk_buff on success, NULL on error.
542 *
543 */
544struct sk_buff *cipso_v4_doi_dump_all(size_t headroom)
545{
546 struct sk_buff *skb = NULL;
547 struct cipso_v4_doi *iter;
548 u32 doi_cnt = 0;
549 ssize_t buf_len;
550
551 buf_len = NETLBL_LEN_U32;
552 rcu_read_lock();
553 list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
554 if (iter->valid) {
555 doi_cnt += 1;
556 buf_len += 2 * NETLBL_LEN_U32;
557 }
558
559 skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
560 if (skb == NULL)
561 goto doi_dump_all_failure;
562
563 if (nla_put_u32(skb, NLA_U32, doi_cnt) != 0)
564 goto doi_dump_all_failure;
565 buf_len -= NETLBL_LEN_U32;
566 list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
567 if (iter->valid) {
568 if (buf_len < 2 * NETLBL_LEN_U32)
569 goto doi_dump_all_failure;
570 if (nla_put_u32(skb, NLA_U32, iter->doi) != 0)
571 goto doi_dump_all_failure;
572 if (nla_put_u32(skb, NLA_U32, iter->type) != 0)
573 goto doi_dump_all_failure;
574 buf_len -= 2 * NETLBL_LEN_U32;
575 }
576 rcu_read_unlock();
577
578 return skb;
579
580doi_dump_all_failure:
581 rcu_read_unlock();
582 kfree(skb);
583 return NULL;
584}
585
586/**
587 * cipso_v4_doi_dump - Dump a CIPSO DOI definition into a sk_buff
588 * @doi: the DOI value
589 * @headroom: the amount of headroom to allocate for the sk_buff
590 *
591 * Description:
592 * Lookup the DOI definition matching @doi and dump it's contents into a
593 * sk_buff. The returned sk_buff has room at the front of the sk_buff for
594 * @headroom bytes. See net/netlabel/netlabel_cipso_v4.h for the LIST message
595 * format. This function may fail if another process is changing the DOI list
596 * at the same time. Returns a pointer to a sk_buff on success, NULL on error.
597 *
598 */
599struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom)
600{
601 struct sk_buff *skb = NULL;
602 struct cipso_v4_doi *iter;
603 u32 tag_cnt = 0;
604 u32 lvl_cnt = 0;
605 u32 cat_cnt = 0;
606 ssize_t buf_len;
607 ssize_t tmp;
608
609 rcu_read_lock();
610 iter = cipso_v4_doi_getdef(doi);
611 if (iter == NULL)
612 goto doi_dump_failure;
613 buf_len = NETLBL_LEN_U32;
614 switch (iter->type) {
615 case CIPSO_V4_MAP_PASS:
616 buf_len += NETLBL_LEN_U32;
617 while(tag_cnt < CIPSO_V4_TAG_MAXCNT &&
618 iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) {
619 tag_cnt += 1;
620 buf_len += NETLBL_LEN_U8;
621 }
622 break;
623 case CIPSO_V4_MAP_STD:
624 buf_len += 3 * NETLBL_LEN_U32;
625 while (tag_cnt < CIPSO_V4_TAG_MAXCNT &&
626 iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) {
627 tag_cnt += 1;
628 buf_len += NETLBL_LEN_U8;
629 }
630 for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++)
631 if (iter->map.std->lvl.local[tmp] !=
632 CIPSO_V4_INV_LVL) {
633 lvl_cnt += 1;
634 buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U8;
635 }
636 for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++)
637 if (iter->map.std->cat.local[tmp] !=
638 CIPSO_V4_INV_CAT) {
639 cat_cnt += 1;
640 buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U16;
641 }
642 break;
643 }
644
645 skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
646 if (skb == NULL)
647 goto doi_dump_failure;
648
649 if (nla_put_u32(skb, NLA_U32, iter->type) != 0)
650 goto doi_dump_failure;
651 buf_len -= NETLBL_LEN_U32;
652 if (iter != cipso_v4_doi_getdef(doi))
653 goto doi_dump_failure;
654 switch (iter->type) {
655 case CIPSO_V4_MAP_PASS:
656 if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0)
657 goto doi_dump_failure;
658 buf_len -= NETLBL_LEN_U32;
659 for (tmp = 0;
660 tmp < CIPSO_V4_TAG_MAXCNT &&
661 iter->tags[tmp] != CIPSO_V4_TAG_INVALID;
662 tmp++) {
663 if (buf_len < NETLBL_LEN_U8)
664 goto doi_dump_failure;
665 if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0)
666 goto doi_dump_failure;
667 buf_len -= NETLBL_LEN_U8;
668 }
669 break;
670 case CIPSO_V4_MAP_STD:
671 if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0)
672 goto doi_dump_failure;
673 if (nla_put_u32(skb, NLA_U32, lvl_cnt) != 0)
674 goto doi_dump_failure;
675 if (nla_put_u32(skb, NLA_U32, cat_cnt) != 0)
676 goto doi_dump_failure;
677 buf_len -= 3 * NETLBL_LEN_U32;
678 for (tmp = 0;
679 tmp < CIPSO_V4_TAG_MAXCNT &&
680 iter->tags[tmp] != CIPSO_V4_TAG_INVALID;
681 tmp++) {
682 if (buf_len < NETLBL_LEN_U8)
683 goto doi_dump_failure;
684 if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0)
685 goto doi_dump_failure;
686 buf_len -= NETLBL_LEN_U8;
687 }
688 for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++)
689 if (iter->map.std->lvl.local[tmp] !=
690 CIPSO_V4_INV_LVL) {
691 if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U8)
692 goto doi_dump_failure;
693 if (nla_put_u32(skb, NLA_U32, tmp) != 0)
694 goto doi_dump_failure;
695 if (nla_put_u8(skb,
696 NLA_U8,
697 iter->map.std->lvl.local[tmp]) != 0)
698 goto doi_dump_failure;
699 buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U8;
700 }
701 for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++)
702 if (iter->map.std->cat.local[tmp] !=
703 CIPSO_V4_INV_CAT) {
704 if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U16)
705 goto doi_dump_failure;
706 if (nla_put_u32(skb, NLA_U32, tmp) != 0)
707 goto doi_dump_failure;
708 if (nla_put_u16(skb,
709 NLA_U16,
710 iter->map.std->cat.local[tmp]) != 0)
711 goto doi_dump_failure;
712 buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U16;
713 }
714 break;
715 }
716 rcu_read_unlock();
717
718 return skb;
719
720doi_dump_failure:
721 rcu_read_unlock();
722 kfree(skb);
723 return NULL;
724}
725
726/**
727 * cipso_v4_doi_domhsh_add - Adds a domain entry to a DOI definition
728 * @doi_def: the DOI definition
729 * @domain: the domain to add
730 *
731 * Description:
732 * Adds the @domain to the the DOI specified by @doi_def, this function
733 * should only be called by external functions (i.e. NetLabel). This function
734 * does allocate memory. Returns zero on success, negative values on failure.
735 *
736 */
737int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain)
738{
739 struct cipso_v4_domhsh_entry *iter;
740 struct cipso_v4_domhsh_entry *new_dom;
741
742 new_dom = kzalloc(sizeof(*new_dom), GFP_KERNEL);
743 if (new_dom == NULL)
744 return -ENOMEM;
745 if (domain) {
746 new_dom->domain = kstrdup(domain, GFP_KERNEL);
747 if (new_dom->domain == NULL) {
748 kfree(new_dom);
749 return -ENOMEM;
750 }
751 }
752 new_dom->valid = 1;
753 INIT_RCU_HEAD(&new_dom->rcu);
754
755 rcu_read_lock();
756 spin_lock(&cipso_v4_doi_list_lock);
757 list_for_each_entry_rcu(iter, &doi_def->dom_list, list)
758 if (iter->valid &&
759 ((domain != NULL && iter->domain != NULL &&
760 strcmp(iter->domain, domain) == 0) ||
761 (domain == NULL && iter->domain == NULL))) {
762 spin_unlock(&cipso_v4_doi_list_lock);
763 rcu_read_unlock();
764 kfree(new_dom->domain);
765 kfree(new_dom);
766 return -EEXIST;
767 }
768 list_add_tail_rcu(&new_dom->list, &doi_def->dom_list);
769 spin_unlock(&cipso_v4_doi_list_lock);
770 rcu_read_unlock();
771
772 return 0;
773}
774
775/**
776 * cipso_v4_doi_domhsh_remove - Removes a domain entry from a DOI definition
777 * @doi_def: the DOI definition
778 * @domain: the domain to remove
779 *
780 * Description:
781 * Removes the @domain from the DOI specified by @doi_def, this function
782 * should only be called by external functions (i.e. NetLabel). Returns zero
783 * on success and negative values on error.
784 *
785 */
786int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
787 const char *domain)
788{
789 struct cipso_v4_domhsh_entry *iter;
790
791 rcu_read_lock();
792 spin_lock(&cipso_v4_doi_list_lock);
793 list_for_each_entry_rcu(iter, &doi_def->dom_list, list)
794 if (iter->valid &&
795 ((domain != NULL && iter->domain != NULL &&
796 strcmp(iter->domain, domain) == 0) ||
797 (domain == NULL && iter->domain == NULL))) {
798 iter->valid = 0;
799 list_del_rcu(&iter->list);
800 spin_unlock(&cipso_v4_doi_list_lock);
801 rcu_read_unlock();
802 call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free);
803
804 return 0;
805 }
806 spin_unlock(&cipso_v4_doi_list_lock);
807 rcu_read_unlock();
808
809 return -ENOENT;
810}
811
812/*
813 * Label Mapping Functions
814 */
815
816/**
817 * cipso_v4_map_lvl_valid - Checks to see if the given level is understood
818 * @doi_def: the DOI definition
819 * @level: the level to check
820 *
821 * Description:
822 * Checks the given level against the given DOI definition and returns a
823 * negative value if the level does not have a valid mapping and a zero value
824 * if the level is defined by the DOI.
825 *
826 */
827static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level)
828{
829 switch (doi_def->type) {
830 case CIPSO_V4_MAP_PASS:
831 return 0;
832 case CIPSO_V4_MAP_STD:
833 if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL)
834 return 0;
835 break;
836 }
837
838 return -EFAULT;
839}
840
841/**
842 * cipso_v4_map_lvl_hton - Perform a level mapping from the host to the network
843 * @doi_def: the DOI definition
844 * @host_lvl: the host MLS level
845 * @net_lvl: the network/CIPSO MLS level
846 *
847 * Description:
848 * Perform a label mapping to translate a local MLS level to the correct
849 * CIPSO level using the given DOI definition. Returns zero on success,
850 * negative values otherwise.
851 *
852 */
853static int cipso_v4_map_lvl_hton(const struct cipso_v4_doi *doi_def,
854 u32 host_lvl,
855 u32 *net_lvl)
856{
857 switch (doi_def->type) {
858 case CIPSO_V4_MAP_PASS:
859 *net_lvl = host_lvl;
860 return 0;
861 case CIPSO_V4_MAP_STD:
862 if (host_lvl < doi_def->map.std->lvl.local_size) {
863 *net_lvl = doi_def->map.std->lvl.local[host_lvl];
864 return 0;
865 }
866 break;
867 }
868
869 return -EINVAL;
870}
871
872/**
873 * cipso_v4_map_lvl_ntoh - Perform a level mapping from the network to the host
874 * @doi_def: the DOI definition
875 * @net_lvl: the network/CIPSO MLS level
876 * @host_lvl: the host MLS level
877 *
878 * Description:
879 * Perform a label mapping to translate a CIPSO level to the correct local MLS
880 * level using the given DOI definition. Returns zero on success, negative
881 * values otherwise.
882 *
883 */
884static int cipso_v4_map_lvl_ntoh(const struct cipso_v4_doi *doi_def,
885 u32 net_lvl,
886 u32 *host_lvl)
887{
888 struct cipso_v4_std_map_tbl *map_tbl;
889
890 switch (doi_def->type) {
891 case CIPSO_V4_MAP_PASS:
892 *host_lvl = net_lvl;
893 return 0;
894 case CIPSO_V4_MAP_STD:
895 map_tbl = doi_def->map.std;
896 if (net_lvl < map_tbl->lvl.cipso_size &&
897 map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) {
898 *host_lvl = doi_def->map.std->lvl.cipso[net_lvl];
899 return 0;
900 }
901 break;
902 }
903
904 return -EINVAL;
905}
906
907/**
908 * cipso_v4_map_cat_rbm_valid - Checks to see if the category bitmap is valid
909 * @doi_def: the DOI definition
910 * @bitmap: category bitmap
911 * @bitmap_len: bitmap length in bytes
912 *
913 * Description:
914 * Checks the given category bitmap against the given DOI definition and
915 * returns a negative value if any of the categories in the bitmap do not have
916 * a valid mapping and a zero value if all of the categories are valid.
917 *
918 */
919static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def,
920 const unsigned char *bitmap,
921 u32 bitmap_len)
922{
923 int cat = -1;
924 u32 bitmap_len_bits = bitmap_len * 8;
925 u32 cipso_cat_size = doi_def->map.std->cat.cipso_size;
926 u32 *cipso_array = doi_def->map.std->cat.cipso;
927
928 switch (doi_def->type) {
929 case CIPSO_V4_MAP_PASS:
930 return 0;
931 case CIPSO_V4_MAP_STD:
932 for (;;) {
933 cat = cipso_v4_bitmap_walk(bitmap,
934 bitmap_len_bits,
935 cat + 1,
936 1);
937 if (cat < 0)
938 break;
939 if (cat >= cipso_cat_size ||
940 cipso_array[cat] >= CIPSO_V4_INV_CAT)
941 return -EFAULT;
942 }
943
944 if (cat == -1)
945 return 0;
946 break;
947 }
948
949 return -EFAULT;
950}
951
952/**
953 * cipso_v4_map_cat_rbm_hton - Perform a category mapping from host to network
954 * @doi_def: the DOI definition
955 * @host_cat: the category bitmap in host format
956 * @host_cat_len: the length of the host's category bitmap in bytes
957 * @net_cat: the zero'd out category bitmap in network/CIPSO format
958 * @net_cat_len: the length of the CIPSO bitmap in bytes
959 *
960 * Description:
961 * Perform a label mapping to translate a local MLS category bitmap to the
962 * correct CIPSO bitmap using the given DOI definition. Returns the minimum
963 * size in bytes of the network bitmap on success, negative values otherwise.
964 *
965 */
966static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def,
967 const unsigned char *host_cat,
968 u32 host_cat_len,
969 unsigned char *net_cat,
970 u32 net_cat_len)
971{
972 int host_spot = -1;
973 u32 net_spot;
974 u32 net_spot_max = 0;
975 u32 host_clen_bits = host_cat_len * 8;
976 u32 net_clen_bits = net_cat_len * 8;
977 u32 host_cat_size = doi_def->map.std->cat.local_size;
978 u32 *host_cat_array = doi_def->map.std->cat.local;
979
980 switch (doi_def->type) {
981 case CIPSO_V4_MAP_PASS:
982 net_spot_max = host_cat_len - 1;
983 while (net_spot_max > 0 && host_cat[net_spot_max] == 0)
984 net_spot_max--;
985 if (net_spot_max > net_cat_len)
986 return -EINVAL;
987 memcpy(net_cat, host_cat, net_spot_max);
988 return net_spot_max;
989 case CIPSO_V4_MAP_STD:
990 for (;;) {
991 host_spot = cipso_v4_bitmap_walk(host_cat,
992 host_clen_bits,
993 host_spot + 1,
994 1);
995 if (host_spot < 0)
996 break;
997 if (host_spot >= host_cat_size)
998 return -EPERM;
999
1000 net_spot = host_cat_array[host_spot];
1001 if (net_spot >= net_clen_bits)
1002 return -ENOSPC;
1003 cipso_v4_bitmap_setbit(net_cat, net_spot, 1);
1004
1005 if (net_spot > net_spot_max)
1006 net_spot_max = net_spot;
1007 }
1008
1009 if (host_spot == -2)
1010 return -EFAULT;
1011
1012 if (++net_spot_max % 8)
1013 return net_spot_max / 8 + 1;
1014 return net_spot_max / 8;
1015 }
1016
1017 return -EINVAL;
1018}
1019
1020/**
1021 * cipso_v4_map_cat_rbm_ntoh - Perform a category mapping from network to host
1022 * @doi_def: the DOI definition
1023 * @net_cat: the category bitmap in network/CIPSO format
1024 * @net_cat_len: the length of the CIPSO bitmap in bytes
1025 * @host_cat: the zero'd out category bitmap in host format
1026 * @host_cat_len: the length of the host's category bitmap in bytes
1027 *
1028 * Description:
1029 * Perform a label mapping to translate a CIPSO bitmap to the correct local
1030 * MLS category bitmap using the given DOI definition. Returns the minimum
1031 * size in bytes of the host bitmap on success, negative values otherwise.
1032 *
1033 */
1034static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def,
1035 const unsigned char *net_cat,
1036 u32 net_cat_len,
1037 unsigned char *host_cat,
1038 u32 host_cat_len)
1039{
1040 u32 host_spot;
1041 u32 host_spot_max = 0;
1042 int net_spot = -1;
1043 u32 net_clen_bits = net_cat_len * 8;
1044 u32 host_clen_bits = host_cat_len * 8;
1045 u32 net_cat_size = doi_def->map.std->cat.cipso_size;
1046 u32 *net_cat_array = doi_def->map.std->cat.cipso;
1047
1048 switch (doi_def->type) {
1049 case CIPSO_V4_MAP_PASS:
1050 if (net_cat_len > host_cat_len)
1051 return -EINVAL;
1052 memcpy(host_cat, net_cat, net_cat_len);
1053 return net_cat_len;
1054 case CIPSO_V4_MAP_STD:
1055 for (;;) {
1056 net_spot = cipso_v4_bitmap_walk(net_cat,
1057 net_clen_bits,
1058 net_spot + 1,
1059 1);
1060 if (net_spot < 0)
1061 break;
1062 if (net_spot >= net_cat_size ||
1063 net_cat_array[net_spot] >= CIPSO_V4_INV_CAT)
1064 return -EPERM;
1065
1066 host_spot = net_cat_array[net_spot];
1067 if (host_spot >= host_clen_bits)
1068 return -ENOSPC;
1069 cipso_v4_bitmap_setbit(host_cat, host_spot, 1);
1070
1071 if (host_spot > host_spot_max)
1072 host_spot_max = host_spot;
1073 }
1074
1075 if (net_spot == -2)
1076 return -EFAULT;
1077
1078 if (++host_spot_max % 8)
1079 return host_spot_max / 8 + 1;
1080 return host_spot_max / 8;
1081 }
1082
1083 return -EINVAL;
1084}
1085
1086/*
1087 * Protocol Handling Functions
1088 */
1089
1090#define CIPSO_V4_HDR_LEN 6
1091
1092/**
1093 * cipso_v4_gentag_hdr - Generate a CIPSO option header
1094 * @doi_def: the DOI definition
1095 * @len: the total tag length in bytes
1096 * @buf: the CIPSO option buffer
1097 *
1098 * Description:
1099 * Write a CIPSO header into the beginning of @buffer. Return zero on success,
1100 * negative values on failure.
1101 *
1102 */
1103static int cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def,
1104 u32 len,
1105 unsigned char *buf)
1106{
1107 if (CIPSO_V4_HDR_LEN + len > 40)
1108 return -ENOSPC;
1109
1110 buf[0] = IPOPT_CIPSO;
1111 buf[1] = CIPSO_V4_HDR_LEN + len;
1112 *(u32 *)&buf[2] = htonl(doi_def->doi);
1113
1114 return 0;
1115}
1116
1117#define CIPSO_V4_TAG1_CAT_LEN 30
1118
1119/**
1120 * cipso_v4_gentag_rbm - Generate a CIPSO restricted bitmap tag (type #1)
1121 * @doi_def: the DOI definition
1122 * @secattr: the security attributes
1123 * @buffer: the option buffer
1124 * @buffer_len: length of buffer in bytes
1125 *
1126 * Description:
1127 * Generate a CIPSO option using the restricted bitmap tag, tag type #1. The
1128 * actual buffer length may be larger than the indicated size due to
1129 * translation between host and network category bitmaps. Returns zero on
1130 * success, negative values on failure.
1131 *
1132 */
1133static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def,
1134 const struct netlbl_lsm_secattr *secattr,
1135 unsigned char **buffer,
1136 u32 *buffer_len)
1137{
1138 int ret_val = -EPERM;
1139 unsigned char *buf = NULL;
1140 u32 buf_len;
1141 u32 level;
1142
1143 if (secattr->mls_cat) {
1144 buf = kzalloc(CIPSO_V4_HDR_LEN + 4 + CIPSO_V4_TAG1_CAT_LEN,
1145 GFP_ATOMIC);
1146 if (buf == NULL)
1147 return -ENOMEM;
1148
1149 ret_val = cipso_v4_map_cat_rbm_hton(doi_def,
1150 secattr->mls_cat,
1151 secattr->mls_cat_len,
1152 &buf[CIPSO_V4_HDR_LEN + 4],
1153 CIPSO_V4_TAG1_CAT_LEN);
1154 if (ret_val < 0)
1155 goto gentag_failure;
1156
1157 /* This will send packets using the "optimized" format when
1158 * possibile as specified in section 3.4.2.6 of the
1159 * CIPSO draft. */
1160 if (cipso_v4_rbm_optfmt && (ret_val > 0 && ret_val < 10))
1161 ret_val = 10;
1162
1163 buf_len = 4 + ret_val;
1164 } else {
1165 buf = kzalloc(CIPSO_V4_HDR_LEN + 4, GFP_ATOMIC);
1166 if (buf == NULL)
1167 return -ENOMEM;
1168 buf_len = 4;
1169 }
1170
1171 ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level);
1172 if (ret_val != 0)
1173 goto gentag_failure;
1174
1175 ret_val = cipso_v4_gentag_hdr(doi_def, buf_len, buf);
1176 if (ret_val != 0)
1177 goto gentag_failure;
1178
1179 buf[CIPSO_V4_HDR_LEN] = 0x01;
1180 buf[CIPSO_V4_HDR_LEN + 1] = buf_len;
1181 buf[CIPSO_V4_HDR_LEN + 3] = level;
1182
1183 *buffer = buf;
1184 *buffer_len = CIPSO_V4_HDR_LEN + buf_len;
1185
1186 return 0;
1187
1188gentag_failure:
1189 kfree(buf);
1190 return ret_val;
1191}
1192
1193/**
1194 * cipso_v4_parsetag_rbm - Parse a CIPSO restricted bitmap tag
1195 * @doi_def: the DOI definition
1196 * @tag: the CIPSO tag
1197 * @secattr: the security attributes
1198 *
1199 * Description:
1200 * Parse a CIPSO restricted bitmap tag (tag type #1) and return the security
1201 * attributes in @secattr. Return zero on success, negatives values on
1202 * failure.
1203 *
1204 */
1205static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def,
1206 const unsigned char *tag,
1207 struct netlbl_lsm_secattr *secattr)
1208{
1209 int ret_val;
1210 u8 tag_len = tag[1];
1211 u32 level;
1212
1213 ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level);
1214 if (ret_val != 0)
1215 return ret_val;
1216 secattr->mls_lvl = level;
1217 secattr->mls_lvl_vld = 1;
1218
1219 if (tag_len > 4) {
1220 switch (doi_def->type) {
1221 case CIPSO_V4_MAP_PASS:
1222 secattr->mls_cat_len = tag_len - 4;
1223 break;
1224 case CIPSO_V4_MAP_STD:
1225 secattr->mls_cat_len =
1226 doi_def->map.std->cat.local_size;
1227 break;
1228 }
1229 secattr->mls_cat = kzalloc(secattr->mls_cat_len, GFP_ATOMIC);
1230 if (secattr->mls_cat == NULL)
1231 return -ENOMEM;
1232
1233 ret_val = cipso_v4_map_cat_rbm_ntoh(doi_def,
1234 &tag[4],
1235 tag_len - 4,
1236 secattr->mls_cat,
1237 secattr->mls_cat_len);
1238 if (ret_val < 0) {
1239 kfree(secattr->mls_cat);
1240 return ret_val;
1241 }
1242 secattr->mls_cat_len = ret_val;
1243 }
1244
1245 return 0;
1246}
1247
1248/**
1249 * cipso_v4_validate - Validate a CIPSO option
1250 * @option: the start of the option, on error it is set to point to the error
1251 *
1252 * Description:
1253 * This routine is called to validate a CIPSO option, it checks all of the
1254 * fields to ensure that they are at least valid, see the draft snippet below
1255 * for details. If the option is valid then a zero value is returned and
1256 * the value of @option is unchanged. If the option is invalid then a
1257 * non-zero value is returned and @option is adjusted to point to the
1258 * offending portion of the option. From the IETF draft ...
1259 *
1260 * "If any field within the CIPSO options, such as the DOI identifier, is not
1261 * recognized the IP datagram is discarded and an ICMP 'parameter problem'
1262 * (type 12) is generated and returned. The ICMP code field is set to 'bad
1263 * parameter' (code 0) and the pointer is set to the start of the CIPSO field
1264 * that is unrecognized."
1265 *
1266 */
1267int cipso_v4_validate(unsigned char **option)
1268{
1269 unsigned char *opt = *option;
1270 unsigned char *tag;
1271 unsigned char opt_iter;
1272 unsigned char err_offset = 0;
1273 u8 opt_len;
1274 u8 tag_len;
1275 struct cipso_v4_doi *doi_def = NULL;
1276 u32 tag_iter;
1277
1278 /* caller already checks for length values that are too large */
1279 opt_len = opt[1];
1280 if (opt_len < 8) {
1281 err_offset = 1;
1282 goto validate_return;
1283 }
1284
1285 rcu_read_lock();
1286 doi_def = cipso_v4_doi_getdef(ntohl(*((u32 *)&opt[2])));
1287 if (doi_def == NULL) {
1288 err_offset = 2;
1289 goto validate_return_locked;
1290 }
1291
1292 opt_iter = 6;
1293 tag = opt + opt_iter;
1294 while (opt_iter < opt_len) {
1295 for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];)
1296 if (doi_def->tags[tag_iter] == CIPSO_V4_TAG_INVALID ||
1297 ++tag_iter == CIPSO_V4_TAG_MAXCNT) {
1298 err_offset = opt_iter;
1299 goto validate_return_locked;
1300 }
1301
1302 tag_len = tag[1];
1303 if (tag_len > (opt_len - opt_iter)) {
1304 err_offset = opt_iter + 1;
1305 goto validate_return_locked;
1306 }
1307
1308 switch (tag[0]) {
1309 case CIPSO_V4_TAG_RBITMAP:
1310 if (tag_len < 4) {
1311 err_offset = opt_iter + 1;
1312 goto validate_return_locked;
1313 }
1314
1315 /* We are already going to do all the verification
1316 * necessary at the socket layer so from our point of
1317 * view it is safe to turn these checks off (and less
1318 * work), however, the CIPSO draft says we should do
1319 * all the CIPSO validations here but it doesn't
1320 * really specify _exactly_ what we need to validate
1321 * ... so, just make it a sysctl tunable. */
1322 if (cipso_v4_rbm_strictvalid) {
1323 if (cipso_v4_map_lvl_valid(doi_def,
1324 tag[3]) < 0) {
1325 err_offset = opt_iter + 3;
1326 goto validate_return_locked;
1327 }
1328 if (tag_len > 4 &&
1329 cipso_v4_map_cat_rbm_valid(doi_def,
1330 &tag[4],
1331 tag_len - 4) < 0) {
1332 err_offset = opt_iter + 4;
1333 goto validate_return_locked;
1334 }
1335 }
1336 break;
1337 default:
1338 err_offset = opt_iter;
1339 goto validate_return_locked;
1340 }
1341
1342 tag += tag_len;
1343 opt_iter += tag_len;
1344 }
1345
1346validate_return_locked:
1347 rcu_read_unlock();
1348validate_return:
1349 *option = opt + err_offset;
1350 return err_offset;
1351}
1352
1353/**
1354 * cipso_v4_error - Send the correct reponse for a bad packet
1355 * @skb: the packet
1356 * @error: the error code
1357 * @gateway: CIPSO gateway flag
1358 *
1359 * Description:
1360 * Based on the error code given in @error, send an ICMP error message back to
1361 * the originating host. From the IETF draft ...
1362 *
1363 * "If the contents of the CIPSO [option] are valid but the security label is
1364 * outside of the configured host or port label range, the datagram is
1365 * discarded and an ICMP 'destination unreachable' (type 3) is generated and
1366 * returned. The code field of the ICMP is set to 'communication with
1367 * destination network administratively prohibited' (code 9) or to
1368 * 'communication with destination host administratively prohibited'
1369 * (code 10). The value of the code is dependent on whether the originator
1370 * of the ICMP message is acting as a CIPSO host or a CIPSO gateway. The
1371 * recipient of the ICMP message MUST be able to handle either value. The
1372 * same procedure is performed if a CIPSO [option] can not be added to an
1373 * IP packet because it is too large to fit in the IP options area."
1374 *
1375 * "If the error is triggered by receipt of an ICMP message, the message is
1376 * discarded and no response is permitted (consistent with general ICMP
1377 * processing rules)."
1378 *
1379 */
1380void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
1381{
1382 if (skb->nh.iph->protocol == IPPROTO_ICMP || error != -EACCES)
1383 return;
1384
1385 if (gateway)
1386 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0);
1387 else
1388 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0);
1389}
1390
1391/**
1392 * cipso_v4_socket_setattr - Add a CIPSO option to a socket
1393 * @sock: the socket
1394 * @doi_def: the CIPSO DOI to use
1395 * @secattr: the specific security attributes of the socket
1396 *
1397 * Description:
1398 * Set the CIPSO option on the given socket using the DOI definition and
1399 * security attributes passed to the function. This function requires
1400 * exclusive access to @sock->sk, which means it either needs to be in the
1401 * process of being created or locked via lock_sock(sock->sk). Returns zero on
1402 * success and negative values on failure.
1403 *
1404 */
1405int cipso_v4_socket_setattr(const struct socket *sock,
1406 const struct cipso_v4_doi *doi_def,
1407 const struct netlbl_lsm_secattr *secattr)
1408{
1409 int ret_val = -EPERM;
1410 u32 iter;
1411 unsigned char *buf = NULL;
1412 u32 buf_len = 0;
1413 u32 opt_len;
1414 struct ip_options *opt = NULL;
1415 struct sock *sk;
1416 struct inet_sock *sk_inet;
1417 struct inet_connection_sock *sk_conn;
1418
1419 /* In the case of sock_create_lite(), the sock->sk field is not
1420 * defined yet but it is not a problem as the only users of these
1421 * "lite" PF_INET sockets are functions which do an accept() call
1422 * afterwards so we will label the socket as part of the accept(). */
1423 sk = sock->sk;
1424 if (sk == NULL)
1425 return 0;
1426
1427 /* XXX - This code assumes only one tag per CIPSO option which isn't
1428 * really a good assumption to make but since we only support the MAC
1429 * tags right now it is a safe assumption. */
1430 iter = 0;
1431 do {
1432 switch (doi_def->tags[iter]) {
1433 case CIPSO_V4_TAG_RBITMAP:
1434 ret_val = cipso_v4_gentag_rbm(doi_def,
1435 secattr,
1436 &buf,
1437 &buf_len);
1438 break;
1439 default:
1440 ret_val = -EPERM;
1441 goto socket_setattr_failure;
1442 }
1443
1444 iter++;
1445 } while (ret_val != 0 &&
1446 iter < CIPSO_V4_TAG_MAXCNT &&
1447 doi_def->tags[iter] != CIPSO_V4_TAG_INVALID);
1448 if (ret_val != 0)
1449 goto socket_setattr_failure;
1450
1451 /* We can't use ip_options_get() directly because it makes a call to
1452 * ip_options_get_alloc() which allocates memory with GFP_KERNEL and
1453 * we can't block here. */
1454 opt_len = (buf_len + 3) & ~3;
1455 opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC);
1456 if (opt == NULL) {
1457 ret_val = -ENOMEM;
1458 goto socket_setattr_failure;
1459 }
1460 memcpy(opt->__data, buf, buf_len);
1461 opt->optlen = opt_len;
1462 opt->is_data = 1;
1463 kfree(buf);
1464 buf = NULL;
1465 ret_val = ip_options_compile(opt, NULL);
1466 if (ret_val != 0)
1467 goto socket_setattr_failure;
1468
1469 sk_inet = inet_sk(sk);
1470 if (sk_inet->is_icsk) {
1471 sk_conn = inet_csk(sk);
1472 if (sk_inet->opt)
1473 sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen;
1474 sk_conn->icsk_ext_hdr_len += opt->optlen;
1475 sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
1476 }
1477 opt = xchg(&sk_inet->opt, opt);
1478 kfree(opt);
1479
1480 return 0;
1481
1482socket_setattr_failure:
1483 kfree(buf);
1484 kfree(opt);
1485 return ret_val;
1486}
1487
1488/**
1489 * cipso_v4_socket_getattr - Get the security attributes from a socket
1490 * @sock: the socket
1491 * @secattr: the security attributes
1492 *
1493 * Description:
1494 * Query @sock to see if there is a CIPSO option attached to the socket and if
1495 * there is return the CIPSO security attributes in @secattr. Returns zero on
1496 * success and negative values on failure.
1497 *
1498 */
1499int cipso_v4_socket_getattr(const struct socket *sock,
1500 struct netlbl_lsm_secattr *secattr)
1501{
1502 int ret_val = -ENOMSG;
1503 struct sock *sk;
1504 struct inet_sock *sk_inet;
1505 unsigned char *cipso_ptr;
1506 u32 doi;
1507 struct cipso_v4_doi *doi_def;
1508
1509 sk = sock->sk;
1510 lock_sock(sk);
1511 sk_inet = inet_sk(sk);
1512 if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0)
1513 goto socket_getattr_return;
1514 cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso -
1515 sizeof(struct iphdr);
1516 ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr);
1517 if (ret_val == 0)
1518 goto socket_getattr_return;
1519
1520 doi = ntohl(*(u32 *)&cipso_ptr[2]);
1521 rcu_read_lock();
1522 doi_def = cipso_v4_doi_getdef(doi);
1523 if (doi_def == NULL) {
1524 rcu_read_unlock();
1525 goto socket_getattr_return;
1526 }
1527 switch (cipso_ptr[6]) {
1528 case CIPSO_V4_TAG_RBITMAP:
1529 ret_val = cipso_v4_parsetag_rbm(doi_def,
1530 &cipso_ptr[6],
1531 secattr);
1532 break;
1533 }
1534 rcu_read_unlock();
1535
1536socket_getattr_return:
1537 release_sock(sk);
1538 return ret_val;
1539}
1540
1541/**
1542 * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option
1543 * @skb: the packet
1544 * @secattr: the security attributes
1545 *
1546 * Description:
1547 * Parse the given packet's CIPSO option and return the security attributes.
1548 * Returns zero on success and negative values on failure.
1549 *
1550 */
1551int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
1552 struct netlbl_lsm_secattr *secattr)
1553{
1554 int ret_val = -ENOMSG;
1555 unsigned char *cipso_ptr;
1556 u32 doi;
1557 struct cipso_v4_doi *doi_def;
1558
1559 if (!CIPSO_V4_OPTEXIST(skb))
1560 return -ENOMSG;
1561 cipso_ptr = CIPSO_V4_OPTPTR(skb);
1562 if (cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr) == 0)
1563 return 0;
1564
1565 doi = ntohl(*(u32 *)&cipso_ptr[2]);
1566 rcu_read_lock();
1567 doi_def = cipso_v4_doi_getdef(doi);
1568 if (doi_def == NULL)
1569 goto skbuff_getattr_return;
1570 switch (cipso_ptr[6]) {
1571 case CIPSO_V4_TAG_RBITMAP:
1572 ret_val = cipso_v4_parsetag_rbm(doi_def,
1573 &cipso_ptr[6],
1574 secattr);
1575 break;
1576 }
1577
1578skbuff_getattr_return:
1579 rcu_read_unlock();
1580 return ret_val;
1581}
1582
1583/*
1584 * Setup Functions
1585 */
1586
1587/**
1588 * cipso_v4_init - Initialize the CIPSO module
1589 *
1590 * Description:
1591 * Initialize the CIPSO module and prepare it for use. Returns zero on success
1592 * and negative values on failure.
1593 *
1594 */
1595static int __init cipso_v4_init(void)
1596{
1597 int ret_val;
1598
1599 ret_val = cipso_v4_cache_init();
1600 if (ret_val != 0)
1601 panic("Failed to initialize the CIPSO/IPv4 cache (%d)\n",
1602 ret_val);
1603
1604 return 0;
1605}
1606
1607subsys_initcall(cipso_v4_init);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a6cc31d911eb..8e8d1f17d77a 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -43,6 +43,7 @@
43#include <linux/in.h> 43#include <linux/in.h>
44#include <linux/errno.h> 44#include <linux/errno.h>
45#include <linux/interrupt.h> 45#include <linux/interrupt.h>
46#include <linux/if_addr.h>
46#include <linux/if_ether.h> 47#include <linux/if_ether.h>
47#include <linux/inet.h> 48#include <linux/inet.h>
48#include <linux/netdevice.h> 49#include <linux/netdevice.h>
@@ -62,6 +63,7 @@
62#include <net/ip.h> 63#include <net/ip.h>
63#include <net/route.h> 64#include <net/route.h>
64#include <net/ip_fib.h> 65#include <net/ip_fib.h>
66#include <net/netlink.h>
65 67
66struct ipv4_devconf ipv4_devconf = { 68struct ipv4_devconf ipv4_devconf = {
67 .accept_redirects = 1, 69 .accept_redirects = 1,
@@ -78,7 +80,15 @@ static struct ipv4_devconf ipv4_devconf_dflt = {
78 .accept_source_route = 1, 80 .accept_source_route = 1,
79}; 81};
80 82
81static void rtmsg_ifa(int event, struct in_ifaddr *); 83static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = {
84 [IFA_LOCAL] = { .type = NLA_U32 },
85 [IFA_ADDRESS] = { .type = NLA_U32 },
86 [IFA_BROADCAST] = { .type = NLA_U32 },
87 [IFA_ANYCAST] = { .type = NLA_U32 },
88 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
89};
90
91static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
82 92
83static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); 93static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
84static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 94static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
@@ -229,8 +239,8 @@ int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b)
229 return 0; 239 return 0;
230} 240}
231 241
232static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 242static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
233 int destroy) 243 int destroy, struct nlmsghdr *nlh, u32 pid)
234{ 244{
235 struct in_ifaddr *promote = NULL; 245 struct in_ifaddr *promote = NULL;
236 struct in_ifaddr *ifa, *ifa1 = *ifap; 246 struct in_ifaddr *ifa, *ifa1 = *ifap;
@@ -263,7 +273,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
263 if (!do_promote) { 273 if (!do_promote) {
264 *ifap1 = ifa->ifa_next; 274 *ifap1 = ifa->ifa_next;
265 275
266 rtmsg_ifa(RTM_DELADDR, ifa); 276 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
267 blocking_notifier_call_chain(&inetaddr_chain, 277 blocking_notifier_call_chain(&inetaddr_chain,
268 NETDEV_DOWN, ifa); 278 NETDEV_DOWN, ifa);
269 inet_free_ifa(ifa); 279 inet_free_ifa(ifa);
@@ -288,7 +298,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
288 is valid, it will try to restore deleted routes... Grr. 298 is valid, it will try to restore deleted routes... Grr.
289 So that, this order is correct. 299 So that, this order is correct.
290 */ 300 */
291 rtmsg_ifa(RTM_DELADDR, ifa1); 301 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
292 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); 302 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
293 303
294 if (promote) { 304 if (promote) {
@@ -300,7 +310,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
300 } 310 }
301 311
302 promote->ifa_flags &= ~IFA_F_SECONDARY; 312 promote->ifa_flags &= ~IFA_F_SECONDARY;
303 rtmsg_ifa(RTM_NEWADDR, promote); 313 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
304 blocking_notifier_call_chain(&inetaddr_chain, 314 blocking_notifier_call_chain(&inetaddr_chain,
305 NETDEV_UP, promote); 315 NETDEV_UP, promote);
306 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) { 316 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
@@ -319,7 +329,14 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
319 } 329 }
320} 330}
321 331
322static int inet_insert_ifa(struct in_ifaddr *ifa) 332static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
333 int destroy)
334{
335 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
336}
337
338static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
339 u32 pid)
323{ 340{
324 struct in_device *in_dev = ifa->ifa_dev; 341 struct in_device *in_dev = ifa->ifa_dev;
325 struct in_ifaddr *ifa1, **ifap, **last_primary; 342 struct in_ifaddr *ifa1, **ifap, **last_primary;
@@ -364,12 +381,17 @@ static int inet_insert_ifa(struct in_ifaddr *ifa)
364 /* Send message first, then call notifier. 381 /* Send message first, then call notifier.
365 Notifier will trigger FIB update, so that 382 Notifier will trigger FIB update, so that
366 listeners of netlink will know about new ifaddr */ 383 listeners of netlink will know about new ifaddr */
367 rtmsg_ifa(RTM_NEWADDR, ifa); 384 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
368 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); 385 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
369 386
370 return 0; 387 return 0;
371} 388}
372 389
390static int inet_insert_ifa(struct in_ifaddr *ifa)
391{
392 return __inet_insert_ifa(ifa, NULL, 0);
393}
394
373static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) 395static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
374{ 396{
375 struct in_device *in_dev = __in_dev_get_rtnl(dev); 397 struct in_device *in_dev = __in_dev_get_rtnl(dev);
@@ -421,87 +443,134 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix,
421 443
422static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 444static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
423{ 445{
424 struct rtattr **rta = arg; 446 struct nlattr *tb[IFA_MAX+1];
425 struct in_device *in_dev; 447 struct in_device *in_dev;
426 struct ifaddrmsg *ifm = NLMSG_DATA(nlh); 448 struct ifaddrmsg *ifm;
427 struct in_ifaddr *ifa, **ifap; 449 struct in_ifaddr *ifa, **ifap;
450 int err = -EINVAL;
428 451
429 ASSERT_RTNL(); 452 ASSERT_RTNL();
430 453
431 if ((in_dev = inetdev_by_index(ifm->ifa_index)) == NULL) 454 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
432 goto out; 455 if (err < 0)
456 goto errout;
457
458 ifm = nlmsg_data(nlh);
459 in_dev = inetdev_by_index(ifm->ifa_index);
460 if (in_dev == NULL) {
461 err = -ENODEV;
462 goto errout;
463 }
464
433 __in_dev_put(in_dev); 465 __in_dev_put(in_dev);
434 466
435 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 467 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
436 ifap = &ifa->ifa_next) { 468 ifap = &ifa->ifa_next) {
437 if ((rta[IFA_LOCAL - 1] && 469 if (tb[IFA_LOCAL] &&
438 memcmp(RTA_DATA(rta[IFA_LOCAL - 1]), 470 ifa->ifa_local != nla_get_u32(tb[IFA_LOCAL]))
439 &ifa->ifa_local, 4)) || 471 continue;
440 (rta[IFA_LABEL - 1] && 472
441 rtattr_strcmp(rta[IFA_LABEL - 1], ifa->ifa_label)) || 473 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
442 (rta[IFA_ADDRESS - 1] && 474 continue;
443 (ifm->ifa_prefixlen != ifa->ifa_prefixlen || 475
444 !inet_ifa_match(*(u32*)RTA_DATA(rta[IFA_ADDRESS - 1]), 476 if (tb[IFA_ADDRESS] &&
445 ifa)))) 477 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
478 !inet_ifa_match(nla_get_u32(tb[IFA_ADDRESS]), ifa)))
446 continue; 479 continue;
447 inet_del_ifa(in_dev, ifap, 1); 480
481 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
448 return 0; 482 return 0;
449 } 483 }
450out: 484
451 return -EADDRNOTAVAIL; 485 err = -EADDRNOTAVAIL;
486errout:
487 return err;
452} 488}
453 489
454static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 490static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
455{ 491{
456 struct rtattr **rta = arg; 492 struct nlattr *tb[IFA_MAX+1];
493 struct in_ifaddr *ifa;
494 struct ifaddrmsg *ifm;
457 struct net_device *dev; 495 struct net_device *dev;
458 struct in_device *in_dev; 496 struct in_device *in_dev;
459 struct ifaddrmsg *ifm = NLMSG_DATA(nlh); 497 int err = -EINVAL;
460 struct in_ifaddr *ifa;
461 int rc = -EINVAL;
462 498
463 ASSERT_RTNL(); 499 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
500 if (err < 0)
501 goto errout;
464 502
465 if (ifm->ifa_prefixlen > 32 || !rta[IFA_LOCAL - 1]) 503 ifm = nlmsg_data(nlh);
466 goto out; 504 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
505 goto errout;
467 506
468 rc = -ENODEV; 507 dev = __dev_get_by_index(ifm->ifa_index);
469 if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL) 508 if (dev == NULL) {
470 goto out; 509 err = -ENODEV;
510 goto errout;
511 }
471 512
472 rc = -ENOBUFS; 513 in_dev = __in_dev_get_rtnl(dev);
473 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) { 514 if (in_dev == NULL) {
474 in_dev = inetdev_init(dev); 515 in_dev = inetdev_init(dev);
475 if (!in_dev) 516 if (in_dev == NULL) {
476 goto out; 517 err = -ENOBUFS;
518 goto errout;
519 }
477 } 520 }
478 521
479 if ((ifa = inet_alloc_ifa()) == NULL) 522 ifa = inet_alloc_ifa();
480 goto out; 523 if (ifa == NULL) {
524 /*
525 * A potential indev allocation can be left alive, it stays
526 * assigned to its device and is destroy with it.
527 */
528 err = -ENOBUFS;
529 goto errout;
530 }
531
532 in_dev_hold(in_dev);
533
534 if (tb[IFA_ADDRESS] == NULL)
535 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
481 536
482 if (!rta[IFA_ADDRESS - 1])
483 rta[IFA_ADDRESS - 1] = rta[IFA_LOCAL - 1];
484 memcpy(&ifa->ifa_local, RTA_DATA(rta[IFA_LOCAL - 1]), 4);
485 memcpy(&ifa->ifa_address, RTA_DATA(rta[IFA_ADDRESS - 1]), 4);
486 ifa->ifa_prefixlen = ifm->ifa_prefixlen; 537 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
487 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); 538 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
488 if (rta[IFA_BROADCAST - 1])
489 memcpy(&ifa->ifa_broadcast,
490 RTA_DATA(rta[IFA_BROADCAST - 1]), 4);
491 if (rta[IFA_ANYCAST - 1])
492 memcpy(&ifa->ifa_anycast, RTA_DATA(rta[IFA_ANYCAST - 1]), 4);
493 ifa->ifa_flags = ifm->ifa_flags; 539 ifa->ifa_flags = ifm->ifa_flags;
494 ifa->ifa_scope = ifm->ifa_scope; 540 ifa->ifa_scope = ifm->ifa_scope;
495 in_dev_hold(in_dev); 541 ifa->ifa_dev = in_dev;
496 ifa->ifa_dev = in_dev; 542
497 if (rta[IFA_LABEL - 1]) 543 ifa->ifa_local = nla_get_u32(tb[IFA_LOCAL]);
498 rtattr_strlcpy(ifa->ifa_label, rta[IFA_LABEL - 1], IFNAMSIZ); 544 ifa->ifa_address = nla_get_u32(tb[IFA_ADDRESS]);
545
546 if (tb[IFA_BROADCAST])
547 ifa->ifa_broadcast = nla_get_u32(tb[IFA_BROADCAST]);
548
549 if (tb[IFA_ANYCAST])
550 ifa->ifa_anycast = nla_get_u32(tb[IFA_ANYCAST]);
551
552 if (tb[IFA_LABEL])
553 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
499 else 554 else
500 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 555 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
501 556
502 rc = inet_insert_ifa(ifa); 557 return ifa;
503out: 558
504 return rc; 559errout:
560 return ERR_PTR(err);
561}
562
563static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
564{
565 struct in_ifaddr *ifa;
566
567 ASSERT_RTNL();
568
569 ifa = rtm_to_ifaddr(nlh);
570 if (IS_ERR(ifa))
571 return PTR_ERR(ifa);
572
573 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
505} 574}
506 575
507/* 576/*
@@ -1056,32 +1125,37 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1056{ 1125{
1057 struct ifaddrmsg *ifm; 1126 struct ifaddrmsg *ifm;
1058 struct nlmsghdr *nlh; 1127 struct nlmsghdr *nlh;
1059 unsigned char *b = skb->tail;
1060 1128
1061 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); 1129 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1062 ifm = NLMSG_DATA(nlh); 1130 if (nlh == NULL)
1131 return -ENOBUFS;
1132
1133 ifm = nlmsg_data(nlh);
1063 ifm->ifa_family = AF_INET; 1134 ifm->ifa_family = AF_INET;
1064 ifm->ifa_prefixlen = ifa->ifa_prefixlen; 1135 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1065 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT; 1136 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1066 ifm->ifa_scope = ifa->ifa_scope; 1137 ifm->ifa_scope = ifa->ifa_scope;
1067 ifm->ifa_index = ifa->ifa_dev->dev->ifindex; 1138 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1139
1068 if (ifa->ifa_address) 1140 if (ifa->ifa_address)
1069 RTA_PUT(skb, IFA_ADDRESS, 4, &ifa->ifa_address); 1141 NLA_PUT_U32(skb, IFA_ADDRESS, ifa->ifa_address);
1142
1070 if (ifa->ifa_local) 1143 if (ifa->ifa_local)
1071 RTA_PUT(skb, IFA_LOCAL, 4, &ifa->ifa_local); 1144 NLA_PUT_U32(skb, IFA_LOCAL, ifa->ifa_local);
1145
1072 if (ifa->ifa_broadcast) 1146 if (ifa->ifa_broadcast)
1073 RTA_PUT(skb, IFA_BROADCAST, 4, &ifa->ifa_broadcast); 1147 NLA_PUT_U32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1148
1074 if (ifa->ifa_anycast) 1149 if (ifa->ifa_anycast)
1075 RTA_PUT(skb, IFA_ANYCAST, 4, &ifa->ifa_anycast); 1150 NLA_PUT_U32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1151
1076 if (ifa->ifa_label[0]) 1152 if (ifa->ifa_label[0])
1077 RTA_PUT(skb, IFA_LABEL, IFNAMSIZ, &ifa->ifa_label); 1153 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1078 nlh->nlmsg_len = skb->tail - b;
1079 return skb->len;
1080 1154
1081nlmsg_failure: 1155 return nlmsg_end(skb, nlh);
1082rtattr_failure: 1156
1083 skb_trim(skb, b - skb->data); 1157nla_put_failure:
1084 return -1; 1158 return nlmsg_cancel(skb, nlh);
1085} 1159}
1086 1160
1087static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) 1161static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1127,19 +1201,27 @@ done:
1127 return skb->len; 1201 return skb->len;
1128} 1202}
1129 1203
1130static void rtmsg_ifa(int event, struct in_ifaddr* ifa) 1204static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1205 u32 pid)
1131{ 1206{
1132 int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + 128); 1207 struct sk_buff *skb;
1133 struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); 1208 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1209 int err = -ENOBUFS;
1210
1211 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
1212 if (skb == NULL)
1213 goto errout;
1134 1214
1135 if (!skb) 1215 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1136 netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, ENOBUFS); 1216 if (err < 0) {
1137 else if (inet_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) {
1138 kfree_skb(skb); 1217 kfree_skb(skb);
1139 netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, EINVAL); 1218 goto errout;
1140 } else {
1141 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_IFADDR, GFP_KERNEL);
1142 } 1219 }
1220
1221 err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1222errout:
1223 if (err < 0)
1224 rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
1143} 1225}
1144 1226
1145static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = { 1227static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
@@ -1151,9 +1233,7 @@ static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
1151 [RTM_GETROUTE - RTM_BASE] = { .doit = inet_rtm_getroute, 1233 [RTM_GETROUTE - RTM_BASE] = { .doit = inet_rtm_getroute,
1152 .dumpit = inet_dump_fib, }, 1234 .dumpit = inet_dump_fib, },
1153#ifdef CONFIG_IP_MULTIPLE_TABLES 1235#ifdef CONFIG_IP_MULTIPLE_TABLES
1154 [RTM_NEWRULE - RTM_BASE] = { .doit = inet_rtm_newrule, }, 1236 [RTM_GETRULE - RTM_BASE] = { .dumpit = fib4_rules_dump, },
1155 [RTM_DELRULE - RTM_BASE] = { .doit = inet_rtm_delrule, },
1156 [RTM_GETRULE - RTM_BASE] = { .dumpit = inet_dump_rules, },
1157#endif 1237#endif
1158}; 1238};
1159 1239
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b428489f6ccd..13b29360d102 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -95,8 +95,13 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
95 esph->seq_no = htonl(++x->replay.oseq); 95 esph->seq_no = htonl(++x->replay.oseq);
96 xfrm_aevent_doreplay(x); 96 xfrm_aevent_doreplay(x);
97 97
98 if (esp->conf.ivlen) 98 if (esp->conf.ivlen) {
99 if (unlikely(!esp->conf.ivinitted)) {
100 get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
101 esp->conf.ivinitted = 1;
102 }
99 crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); 103 crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
104 }
100 105
101 do { 106 do {
102 struct scatterlist *sg = &esp->sgbuf[0]; 107 struct scatterlist *sg = &esp->sgbuf[0];
@@ -248,7 +253,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
248 * as per draft-ietf-ipsec-udp-encaps-06, 253 * as per draft-ietf-ipsec-udp-encaps-06,
249 * section 3.1.2 254 * section 3.1.2
250 */ 255 */
251 if (!x->props.mode) 256 if (x->props.mode == XFRM_MODE_TRANSPORT)
252 skb->ip_summed = CHECKSUM_UNNECESSARY; 257 skb->ip_summed = CHECKSUM_UNNECESSARY;
253 } 258 }
254 259
@@ -267,7 +272,7 @@ static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
267 struct esp_data *esp = x->data; 272 struct esp_data *esp = x->data;
268 u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); 273 u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
269 274
270 if (x->props.mode) { 275 if (x->props.mode == XFRM_MODE_TUNNEL) {
271 mtu = ALIGN(mtu + 2, blksize); 276 mtu = ALIGN(mtu + 2, blksize);
272 } else { 277 } else {
273 /* The worst case. */ 278 /* The worst case. */
@@ -378,12 +383,12 @@ static int esp_init_state(struct xfrm_state *x)
378 esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); 383 esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL);
379 if (unlikely(esp->conf.ivec == NULL)) 384 if (unlikely(esp->conf.ivec == NULL))
380 goto error; 385 goto error;
381 get_random_bytes(esp->conf.ivec, esp->conf.ivlen); 386 esp->conf.ivinitted = 0;
382 } 387 }
383 if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) 388 if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len))
384 goto error; 389 goto error;
385 x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; 390 x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen;
386 if (x->props.mode) 391 if (x->props.mode == XFRM_MODE_TUNNEL)
387 x->props.header_len += sizeof(struct iphdr); 392 x->props.header_len += sizeof(struct iphdr);
388 if (x->encap) { 393 if (x->encap) {
389 struct xfrm_encap_tmpl *encap = x->encap; 394 struct xfrm_encap_tmpl *encap = x->encap;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index ba2a70745a63..cfb527c060e4 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -32,10 +32,12 @@
32#include <linux/inet.h> 32#include <linux/inet.h>
33#include <linux/inetdevice.h> 33#include <linux/inetdevice.h>
34#include <linux/netdevice.h> 34#include <linux/netdevice.h>
35#include <linux/if_addr.h>
35#include <linux/if_arp.h> 36#include <linux/if_arp.h>
36#include <linux/skbuff.h> 37#include <linux/skbuff.h>
37#include <linux/netlink.h> 38#include <linux/netlink.h>
38#include <linux/init.h> 39#include <linux/init.h>
40#include <linux/list.h>
39 41
40#include <net/ip.h> 42#include <net/ip.h>
41#include <net/protocol.h> 43#include <net/protocol.h>
@@ -50,48 +52,67 @@
50 52
51#ifndef CONFIG_IP_MULTIPLE_TABLES 53#ifndef CONFIG_IP_MULTIPLE_TABLES
52 54
53#define RT_TABLE_MIN RT_TABLE_MAIN
54
55struct fib_table *ip_fib_local_table; 55struct fib_table *ip_fib_local_table;
56struct fib_table *ip_fib_main_table; 56struct fib_table *ip_fib_main_table;
57 57
58#else 58#define FIB_TABLE_HASHSZ 1
59static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
59 60
60#define RT_TABLE_MIN 1 61#else
61 62
62struct fib_table *fib_tables[RT_TABLE_MAX+1]; 63#define FIB_TABLE_HASHSZ 256
64static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
63 65
64struct fib_table *__fib_new_table(int id) 66struct fib_table *fib_new_table(u32 id)
65{ 67{
66 struct fib_table *tb; 68 struct fib_table *tb;
69 unsigned int h;
67 70
71 if (id == 0)
72 id = RT_TABLE_MAIN;
73 tb = fib_get_table(id);
74 if (tb)
75 return tb;
68 tb = fib_hash_init(id); 76 tb = fib_hash_init(id);
69 if (!tb) 77 if (!tb)
70 return NULL; 78 return NULL;
71 fib_tables[id] = tb; 79 h = id & (FIB_TABLE_HASHSZ - 1);
80 hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
72 return tb; 81 return tb;
73} 82}
74 83
84struct fib_table *fib_get_table(u32 id)
85{
86 struct fib_table *tb;
87 struct hlist_node *node;
88 unsigned int h;
75 89
90 if (id == 0)
91 id = RT_TABLE_MAIN;
92 h = id & (FIB_TABLE_HASHSZ - 1);
93 rcu_read_lock();
94 hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
95 if (tb->tb_id == id) {
96 rcu_read_unlock();
97 return tb;
98 }
99 }
100 rcu_read_unlock();
101 return NULL;
102}
76#endif /* CONFIG_IP_MULTIPLE_TABLES */ 103#endif /* CONFIG_IP_MULTIPLE_TABLES */
77 104
78
79static void fib_flush(void) 105static void fib_flush(void)
80{ 106{
81 int flushed = 0; 107 int flushed = 0;
82#ifdef CONFIG_IP_MULTIPLE_TABLES
83 struct fib_table *tb; 108 struct fib_table *tb;
84 int id; 109 struct hlist_node *node;
110 unsigned int h;
85 111
86 for (id = RT_TABLE_MAX; id>0; id--) { 112 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
87 if ((tb = fib_get_table(id))==NULL) 113 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
88 continue; 114 flushed += tb->tb_flush(tb);
89 flushed += tb->tb_flush(tb);
90 } 115 }
91#else /* CONFIG_IP_MULTIPLE_TABLES */
92 flushed += ip_fib_main_table->tb_flush(ip_fib_main_table);
93 flushed += ip_fib_local_table->tb_flush(ip_fib_local_table);
94#endif /* CONFIG_IP_MULTIPLE_TABLES */
95 116
96 if (flushed) 117 if (flushed)
97 rt_cache_flush(-1); 118 rt_cache_flush(-1);
@@ -232,42 +253,190 @@ e_inval:
232 253
233#ifndef CONFIG_IP_NOSIOCRT 254#ifndef CONFIG_IP_NOSIOCRT
234 255
256static inline u32 sk_extract_addr(struct sockaddr *addr)
257{
258 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
259}
260
261static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
262{
263 struct nlattr *nla;
264
265 nla = (struct nlattr *) ((char *) mx + len);
266 nla->nla_type = type;
267 nla->nla_len = nla_attr_size(4);
268 *(u32 *) nla_data(nla) = value;
269
270 return len + nla_total_size(4);
271}
272
273static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
274 struct fib_config *cfg)
275{
276 u32 addr;
277 int plen;
278
279 memset(cfg, 0, sizeof(*cfg));
280
281 if (rt->rt_dst.sa_family != AF_INET)
282 return -EAFNOSUPPORT;
283
284 /*
285 * Check mask for validity:
286 * a) it must be contiguous.
287 * b) destination must have all host bits clear.
288 * c) if application forgot to set correct family (AF_INET),
289 * reject request unless it is absolutely clear i.e.
290 * both family and mask are zero.
291 */
292 plen = 32;
293 addr = sk_extract_addr(&rt->rt_dst);
294 if (!(rt->rt_flags & RTF_HOST)) {
295 u32 mask = sk_extract_addr(&rt->rt_genmask);
296
297 if (rt->rt_genmask.sa_family != AF_INET) {
298 if (mask || rt->rt_genmask.sa_family)
299 return -EAFNOSUPPORT;
300 }
301
302 if (bad_mask(mask, addr))
303 return -EINVAL;
304
305 plen = inet_mask_len(mask);
306 }
307
308 cfg->fc_dst_len = plen;
309 cfg->fc_dst = addr;
310
311 if (cmd != SIOCDELRT) {
312 cfg->fc_nlflags = NLM_F_CREATE;
313 cfg->fc_protocol = RTPROT_BOOT;
314 }
315
316 if (rt->rt_metric)
317 cfg->fc_priority = rt->rt_metric - 1;
318
319 if (rt->rt_flags & RTF_REJECT) {
320 cfg->fc_scope = RT_SCOPE_HOST;
321 cfg->fc_type = RTN_UNREACHABLE;
322 return 0;
323 }
324
325 cfg->fc_scope = RT_SCOPE_NOWHERE;
326 cfg->fc_type = RTN_UNICAST;
327
328 if (rt->rt_dev) {
329 char *colon;
330 struct net_device *dev;
331 char devname[IFNAMSIZ];
332
333 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
334 return -EFAULT;
335
336 devname[IFNAMSIZ-1] = 0;
337 colon = strchr(devname, ':');
338 if (colon)
339 *colon = 0;
340 dev = __dev_get_by_name(devname);
341 if (!dev)
342 return -ENODEV;
343 cfg->fc_oif = dev->ifindex;
344 if (colon) {
345 struct in_ifaddr *ifa;
346 struct in_device *in_dev = __in_dev_get_rtnl(dev);
347 if (!in_dev)
348 return -ENODEV;
349 *colon = ':';
350 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
351 if (strcmp(ifa->ifa_label, devname) == 0)
352 break;
353 if (ifa == NULL)
354 return -ENODEV;
355 cfg->fc_prefsrc = ifa->ifa_local;
356 }
357 }
358
359 addr = sk_extract_addr(&rt->rt_gateway);
360 if (rt->rt_gateway.sa_family == AF_INET && addr) {
361 cfg->fc_gw = addr;
362 if (rt->rt_flags & RTF_GATEWAY &&
363 inet_addr_type(addr) == RTN_UNICAST)
364 cfg->fc_scope = RT_SCOPE_UNIVERSE;
365 }
366
367 if (cmd == SIOCDELRT)
368 return 0;
369
370 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
371 return -EINVAL;
372
373 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
374 cfg->fc_scope = RT_SCOPE_LINK;
375
376 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
377 struct nlattr *mx;
378 int len = 0;
379
380 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
381 if (mx == NULL)
382 return -ENOMEM;
383
384 if (rt->rt_flags & RTF_MTU)
385 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
386
387 if (rt->rt_flags & RTF_WINDOW)
388 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
389
390 if (rt->rt_flags & RTF_IRTT)
391 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
392
393 cfg->fc_mx = mx;
394 cfg->fc_mx_len = len;
395 }
396
397 return 0;
398}
399
235/* 400/*
236 * Handle IP routing ioctl calls. These are used to manipulate the routing tables 401 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
237 */ 402 */
238 403
239int ip_rt_ioctl(unsigned int cmd, void __user *arg) 404int ip_rt_ioctl(unsigned int cmd, void __user *arg)
240{ 405{
406 struct fib_config cfg;
407 struct rtentry rt;
241 int err; 408 int err;
242 struct kern_rta rta;
243 struct rtentry r;
244 struct {
245 struct nlmsghdr nlh;
246 struct rtmsg rtm;
247 } req;
248 409
249 switch (cmd) { 410 switch (cmd) {
250 case SIOCADDRT: /* Add a route */ 411 case SIOCADDRT: /* Add a route */
251 case SIOCDELRT: /* Delete a route */ 412 case SIOCDELRT: /* Delete a route */
252 if (!capable(CAP_NET_ADMIN)) 413 if (!capable(CAP_NET_ADMIN))
253 return -EPERM; 414 return -EPERM;
254 if (copy_from_user(&r, arg, sizeof(struct rtentry))) 415
416 if (copy_from_user(&rt, arg, sizeof(rt)))
255 return -EFAULT; 417 return -EFAULT;
418
256 rtnl_lock(); 419 rtnl_lock();
257 err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r); 420 err = rtentry_to_fib_config(cmd, &rt, &cfg);
258 if (err == 0) { 421 if (err == 0) {
422 struct fib_table *tb;
423
259 if (cmd == SIOCDELRT) { 424 if (cmd == SIOCDELRT) {
260 struct fib_table *tb = fib_get_table(req.rtm.rtm_table); 425 tb = fib_get_table(cfg.fc_table);
261 err = -ESRCH;
262 if (tb) 426 if (tb)
263 err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL); 427 err = tb->tb_delete(tb, &cfg);
428 else
429 err = -ESRCH;
264 } else { 430 } else {
265 struct fib_table *tb = fib_new_table(req.rtm.rtm_table); 431 tb = fib_new_table(cfg.fc_table);
266 err = -ENOBUFS;
267 if (tb) 432 if (tb)
268 err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); 433 err = tb->tb_insert(tb, &cfg);
434 else
435 err = -ENOBUFS;
269 } 436 }
270 kfree(rta.rta_mx); 437
438 /* allocated by rtentry_to_fib_config() */
439 kfree(cfg.fc_mx);
271 } 440 }
272 rtnl_unlock(); 441 rtnl_unlock();
273 return err; 442 return err;
@@ -284,77 +453,169 @@ int ip_rt_ioctl(unsigned int cmd, void *arg)
284 453
285#endif 454#endif
286 455
287static int inet_check_attr(struct rtmsg *r, struct rtattr **rta) 456struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = {
457 [RTA_DST] = { .type = NLA_U32 },
458 [RTA_SRC] = { .type = NLA_U32 },
459 [RTA_IIF] = { .type = NLA_U32 },
460 [RTA_OIF] = { .type = NLA_U32 },
461 [RTA_GATEWAY] = { .type = NLA_U32 },
462 [RTA_PRIORITY] = { .type = NLA_U32 },
463 [RTA_PREFSRC] = { .type = NLA_U32 },
464 [RTA_METRICS] = { .type = NLA_NESTED },
465 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
466 [RTA_PROTOINFO] = { .type = NLA_U32 },
467 [RTA_FLOW] = { .type = NLA_U32 },
468 [RTA_MP_ALGO] = { .type = NLA_U32 },
469};
470
471static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
472 struct fib_config *cfg)
288{ 473{
289 int i; 474 struct nlattr *attr;
290 475 int err, remaining;
291 for (i=1; i<=RTA_MAX; i++, rta++) { 476 struct rtmsg *rtm;
292 struct rtattr *attr = *rta; 477
293 if (attr) { 478 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
294 if (RTA_PAYLOAD(attr) < 4) 479 if (err < 0)
295 return -EINVAL; 480 goto errout;
296 if (i != RTA_MULTIPATH && i != RTA_METRICS) 481
297 *rta = (struct rtattr*)RTA_DATA(attr); 482 memset(cfg, 0, sizeof(*cfg));
483
484 rtm = nlmsg_data(nlh);
485 cfg->fc_family = rtm->rtm_family;
486 cfg->fc_dst_len = rtm->rtm_dst_len;
487 cfg->fc_src_len = rtm->rtm_src_len;
488 cfg->fc_tos = rtm->rtm_tos;
489 cfg->fc_table = rtm->rtm_table;
490 cfg->fc_protocol = rtm->rtm_protocol;
491 cfg->fc_scope = rtm->rtm_scope;
492 cfg->fc_type = rtm->rtm_type;
493 cfg->fc_flags = rtm->rtm_flags;
494 cfg->fc_nlflags = nlh->nlmsg_flags;
495
496 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
497 cfg->fc_nlinfo.nlh = nlh;
498
499 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
500 switch (attr->nla_type) {
501 case RTA_DST:
502 cfg->fc_dst = nla_get_u32(attr);
503 break;
504 case RTA_SRC:
505 cfg->fc_src = nla_get_u32(attr);
506 break;
507 case RTA_OIF:
508 cfg->fc_oif = nla_get_u32(attr);
509 break;
510 case RTA_GATEWAY:
511 cfg->fc_gw = nla_get_u32(attr);
512 break;
513 case RTA_PRIORITY:
514 cfg->fc_priority = nla_get_u32(attr);
515 break;
516 case RTA_PREFSRC:
517 cfg->fc_prefsrc = nla_get_u32(attr);
518 break;
519 case RTA_METRICS:
520 cfg->fc_mx = nla_data(attr);
521 cfg->fc_mx_len = nla_len(attr);
522 break;
523 case RTA_MULTIPATH:
524 cfg->fc_mp = nla_data(attr);
525 cfg->fc_mp_len = nla_len(attr);
526 break;
527 case RTA_FLOW:
528 cfg->fc_flow = nla_get_u32(attr);
529 break;
530 case RTA_MP_ALGO:
531 cfg->fc_mp_alg = nla_get_u32(attr);
532 break;
533 case RTA_TABLE:
534 cfg->fc_table = nla_get_u32(attr);
535 break;
298 } 536 }
299 } 537 }
538
300 return 0; 539 return 0;
540errout:
541 return err;
301} 542}
302 543
303int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 544int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
304{ 545{
305 struct fib_table * tb; 546 struct fib_config cfg;
306 struct rtattr **rta = arg; 547 struct fib_table *tb;
307 struct rtmsg *r = NLMSG_DATA(nlh); 548 int err;
308 549
309 if (inet_check_attr(r, rta)) 550 err = rtm_to_fib_config(skb, nlh, &cfg);
310 return -EINVAL; 551 if (err < 0)
552 goto errout;
311 553
312 tb = fib_get_table(r->rtm_table); 554 tb = fib_get_table(cfg.fc_table);
313 if (tb) 555 if (tb == NULL) {
314 return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); 556 err = -ESRCH;
315 return -ESRCH; 557 goto errout;
558 }
559
560 err = tb->tb_delete(tb, &cfg);
561errout:
562 return err;
316} 563}
317 564
318int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 565int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
319{ 566{
320 struct fib_table * tb; 567 struct fib_config cfg;
321 struct rtattr **rta = arg; 568 struct fib_table *tb;
322 struct rtmsg *r = NLMSG_DATA(nlh); 569 int err;
323 570
324 if (inet_check_attr(r, rta)) 571 err = rtm_to_fib_config(skb, nlh, &cfg);
325 return -EINVAL; 572 if (err < 0)
573 goto errout;
326 574
327 tb = fib_new_table(r->rtm_table); 575 tb = fib_new_table(cfg.fc_table);
328 if (tb) 576 if (tb == NULL) {
329 return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); 577 err = -ENOBUFS;
330 return -ENOBUFS; 578 goto errout;
579 }
580
581 err = tb->tb_insert(tb, &cfg);
582errout:
583 return err;
331} 584}
332 585
333int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 586int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
334{ 587{
335 int t; 588 unsigned int h, s_h;
336 int s_t; 589 unsigned int e = 0, s_e;
337 struct fib_table *tb; 590 struct fib_table *tb;
591 struct hlist_node *node;
592 int dumped = 0;
338 593
339 if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && 594 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
340 ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) 595 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
341 return ip_rt_dump(skb, cb); 596 return ip_rt_dump(skb, cb);
342 597
343 s_t = cb->args[0]; 598 s_h = cb->args[0];
344 if (s_t == 0) 599 s_e = cb->args[1];
345 s_t = cb->args[0] = RT_TABLE_MIN; 600
346 601 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
347 for (t=s_t; t<=RT_TABLE_MAX; t++) { 602 e = 0;
348 if (t < s_t) continue; 603 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
349 if (t > s_t) 604 if (e < s_e)
350 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); 605 goto next;
351 if ((tb = fib_get_table(t))==NULL) 606 if (dumped)
352 continue; 607 memset(&cb->args[2], 0, sizeof(cb->args) -
353 if (tb->tb_dump(tb, skb, cb) < 0) 608 2 * sizeof(cb->args[0]));
354 break; 609 if (tb->tb_dump(tb, skb, cb) < 0)
610 goto out;
611 dumped = 1;
612next:
613 e++;
614 }
355 } 615 }
356 616out:
357 cb->args[0] = t; 617 cb->args[1] = e;
618 cb->args[0] = h;
358 619
359 return skb->len; 620 return skb->len;
360} 621}
@@ -366,17 +627,19 @@ int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
366 only when netlink is already locked. 627 only when netlink is already locked.
367 */ 628 */
368 629
369static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa) 630static void fib_magic(int cmd, int type, u32 dst, int dst_len,
631 struct in_ifaddr *ifa)
370{ 632{
371 struct fib_table * tb; 633 struct fib_table *tb;
372 struct { 634 struct fib_config cfg = {
373 struct nlmsghdr nlh; 635 .fc_protocol = RTPROT_KERNEL,
374 struct rtmsg rtm; 636 .fc_type = type,
375 } req; 637 .fc_dst = dst,
376 struct kern_rta rta; 638 .fc_dst_len = dst_len,
377 639 .fc_prefsrc = ifa->ifa_local,
378 memset(&req.rtm, 0, sizeof(req.rtm)); 640 .fc_oif = ifa->ifa_dev->dev->ifindex,
379 memset(&rta, 0, sizeof(rta)); 641 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
642 };
380 643
381 if (type == RTN_UNICAST) 644 if (type == RTN_UNICAST)
382 tb = fib_new_table(RT_TABLE_MAIN); 645 tb = fib_new_table(RT_TABLE_MAIN);
@@ -386,26 +649,17 @@ static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr
386 if (tb == NULL) 649 if (tb == NULL)
387 return; 650 return;
388 651
389 req.nlh.nlmsg_len = sizeof(req); 652 cfg.fc_table = tb->tb_id;
390 req.nlh.nlmsg_type = cmd;
391 req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
392 req.nlh.nlmsg_pid = 0;
393 req.nlh.nlmsg_seq = 0;
394 653
395 req.rtm.rtm_dst_len = dst_len; 654 if (type != RTN_LOCAL)
396 req.rtm.rtm_table = tb->tb_id; 655 cfg.fc_scope = RT_SCOPE_LINK;
397 req.rtm.rtm_protocol = RTPROT_KERNEL; 656 else
398 req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST); 657 cfg.fc_scope = RT_SCOPE_HOST;
399 req.rtm.rtm_type = type;
400
401 rta.rta_dst = &dst;
402 rta.rta_prefsrc = &ifa->ifa_local;
403 rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
404 658
405 if (cmd == RTM_NEWROUTE) 659 if (cmd == RTM_NEWROUTE)
406 tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); 660 tb->tb_insert(tb, &cfg);
407 else 661 else
408 tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL); 662 tb->tb_delete(tb, &cfg);
409} 663}
410 664
411void fib_add_ifaddr(struct in_ifaddr *ifa) 665void fib_add_ifaddr(struct in_ifaddr *ifa)
@@ -652,11 +906,17 @@ static struct notifier_block fib_netdev_notifier = {
652 906
653void __init ip_fib_init(void) 907void __init ip_fib_init(void)
654{ 908{
909 unsigned int i;
910
911 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
912 INIT_HLIST_HEAD(&fib_table_hash[i]);
655#ifndef CONFIG_IP_MULTIPLE_TABLES 913#ifndef CONFIG_IP_MULTIPLE_TABLES
656 ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); 914 ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
915 hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
657 ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); 916 ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
917 hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
658#else 918#else
659 fib_rules_init(); 919 fib4_rules_init();
660#endif 920#endif
661 921
662 register_netdevice_notifier(&fib_netdev_notifier); 922 register_netdevice_notifier(&fib_netdev_notifier);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 72c633b357cf..88133b383dc5 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -379,42 +379,39 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, u32 key)
379 return NULL; 379 return NULL;
380} 380}
381 381
382static int 382static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
383fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
384 struct nlmsghdr *n, struct netlink_skb_parms *req)
385{ 383{
386 struct fn_hash *table = (struct fn_hash *) tb->tb_data; 384 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
387 struct fib_node *new_f, *f; 385 struct fib_node *new_f, *f;
388 struct fib_alias *fa, *new_fa; 386 struct fib_alias *fa, *new_fa;
389 struct fn_zone *fz; 387 struct fn_zone *fz;
390 struct fib_info *fi; 388 struct fib_info *fi;
391 int z = r->rtm_dst_len; 389 u8 tos = cfg->fc_tos;
392 int type = r->rtm_type;
393 u8 tos = r->rtm_tos;
394 u32 key; 390 u32 key;
395 int err; 391 int err;
396 392
397 if (z > 32) 393 if (cfg->fc_dst_len > 32)
398 return -EINVAL; 394 return -EINVAL;
399 fz = table->fn_zones[z]; 395
400 if (!fz && !(fz = fn_new_zone(table, z))) 396 fz = table->fn_zones[cfg->fc_dst_len];
397 if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len)))
401 return -ENOBUFS; 398 return -ENOBUFS;
402 399
403 key = 0; 400 key = 0;
404 if (rta->rta_dst) { 401 if (cfg->fc_dst) {
405 u32 dst; 402 if (cfg->fc_dst & ~FZ_MASK(fz))
406 memcpy(&dst, rta->rta_dst, 4);
407 if (dst & ~FZ_MASK(fz))
408 return -EINVAL; 403 return -EINVAL;
409 key = fz_key(dst, fz); 404 key = fz_key(cfg->fc_dst, fz);
410 } 405 }
411 406
412 if ((fi = fib_create_info(r, rta, n, &err)) == NULL) 407 fi = fib_create_info(cfg);
413 return err; 408 if (IS_ERR(fi))
409 return PTR_ERR(fi);
414 410
415 if (fz->fz_nent > (fz->fz_divisor<<1) && 411 if (fz->fz_nent > (fz->fz_divisor<<1) &&
416 fz->fz_divisor < FZ_MAX_DIVISOR && 412 fz->fz_divisor < FZ_MAX_DIVISOR &&
417 (z==32 || (1<<z) > fz->fz_divisor)) 413 (cfg->fc_dst_len == 32 ||
414 (1 << cfg->fc_dst_len) > fz->fz_divisor))
418 fn_rehash_zone(fz); 415 fn_rehash_zone(fz);
419 416
420 f = fib_find_node(fz, key); 417 f = fib_find_node(fz, key);
@@ -440,18 +437,18 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
440 struct fib_alias *fa_orig; 437 struct fib_alias *fa_orig;
441 438
442 err = -EEXIST; 439 err = -EEXIST;
443 if (n->nlmsg_flags & NLM_F_EXCL) 440 if (cfg->fc_nlflags & NLM_F_EXCL)
444 goto out; 441 goto out;
445 442
446 if (n->nlmsg_flags & NLM_F_REPLACE) { 443 if (cfg->fc_nlflags & NLM_F_REPLACE) {
447 struct fib_info *fi_drop; 444 struct fib_info *fi_drop;
448 u8 state; 445 u8 state;
449 446
450 write_lock_bh(&fib_hash_lock); 447 write_lock_bh(&fib_hash_lock);
451 fi_drop = fa->fa_info; 448 fi_drop = fa->fa_info;
452 fa->fa_info = fi; 449 fa->fa_info = fi;
453 fa->fa_type = type; 450 fa->fa_type = cfg->fc_type;
454 fa->fa_scope = r->rtm_scope; 451 fa->fa_scope = cfg->fc_scope;
455 state = fa->fa_state; 452 state = fa->fa_state;
456 fa->fa_state &= ~FA_S_ACCESSED; 453 fa->fa_state &= ~FA_S_ACCESSED;
457 fib_hash_genid++; 454 fib_hash_genid++;
@@ -474,17 +471,17 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
474 break; 471 break;
475 if (fa->fa_info->fib_priority != fi->fib_priority) 472 if (fa->fa_info->fib_priority != fi->fib_priority)
476 break; 473 break;
477 if (fa->fa_type == type && 474 if (fa->fa_type == cfg->fc_type &&
478 fa->fa_scope == r->rtm_scope && 475 fa->fa_scope == cfg->fc_scope &&
479 fa->fa_info == fi) 476 fa->fa_info == fi)
480 goto out; 477 goto out;
481 } 478 }
482 if (!(n->nlmsg_flags & NLM_F_APPEND)) 479 if (!(cfg->fc_nlflags & NLM_F_APPEND))
483 fa = fa_orig; 480 fa = fa_orig;
484 } 481 }
485 482
486 err = -ENOENT; 483 err = -ENOENT;
487 if (!(n->nlmsg_flags&NLM_F_CREATE)) 484 if (!(cfg->fc_nlflags & NLM_F_CREATE))
488 goto out; 485 goto out;
489 486
490 err = -ENOBUFS; 487 err = -ENOBUFS;
@@ -506,8 +503,8 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
506 503
507 new_fa->fa_info = fi; 504 new_fa->fa_info = fi;
508 new_fa->fa_tos = tos; 505 new_fa->fa_tos = tos;
509 new_fa->fa_type = type; 506 new_fa->fa_type = cfg->fc_type;
510 new_fa->fa_scope = r->rtm_scope; 507 new_fa->fa_scope = cfg->fc_scope;
511 new_fa->fa_state = 0; 508 new_fa->fa_state = 0;
512 509
513 /* 510 /*
@@ -526,7 +523,8 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
526 fz->fz_nent++; 523 fz->fz_nent++;
527 rt_cache_flush(-1); 524 rt_cache_flush(-1);
528 525
529 rtmsg_fib(RTM_NEWROUTE, key, new_fa, z, tb->tb_id, n, req); 526 rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id,
527 &cfg->fc_nlinfo);
530 return 0; 528 return 0;
531 529
532out_free_new_fa: 530out_free_new_fa:
@@ -537,30 +535,25 @@ out:
537} 535}
538 536
539 537
540static int 538static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg)
541fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
542 struct nlmsghdr *n, struct netlink_skb_parms *req)
543{ 539{
544 struct fn_hash *table = (struct fn_hash*)tb->tb_data; 540 struct fn_hash *table = (struct fn_hash*)tb->tb_data;
545 struct fib_node *f; 541 struct fib_node *f;
546 struct fib_alias *fa, *fa_to_delete; 542 struct fib_alias *fa, *fa_to_delete;
547 int z = r->rtm_dst_len;
548 struct fn_zone *fz; 543 struct fn_zone *fz;
549 u32 key; 544 u32 key;
550 u8 tos = r->rtm_tos;
551 545
552 if (z > 32) 546 if (cfg->fc_dst_len > 32)
553 return -EINVAL; 547 return -EINVAL;
554 if ((fz = table->fn_zones[z]) == NULL) 548
549 if ((fz = table->fn_zones[cfg->fc_dst_len]) == NULL)
555 return -ESRCH; 550 return -ESRCH;
556 551
557 key = 0; 552 key = 0;
558 if (rta->rta_dst) { 553 if (cfg->fc_dst) {
559 u32 dst; 554 if (cfg->fc_dst & ~FZ_MASK(fz))
560 memcpy(&dst, rta->rta_dst, 4);
561 if (dst & ~FZ_MASK(fz))
562 return -EINVAL; 555 return -EINVAL;
563 key = fz_key(dst, fz); 556 key = fz_key(cfg->fc_dst, fz);
564 } 557 }
565 558
566 f = fib_find_node(fz, key); 559 f = fib_find_node(fz, key);
@@ -568,7 +561,7 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
568 if (!f) 561 if (!f)
569 fa = NULL; 562 fa = NULL;
570 else 563 else
571 fa = fib_find_alias(&f->fn_alias, tos, 0); 564 fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0);
572 if (!fa) 565 if (!fa)
573 return -ESRCH; 566 return -ESRCH;
574 567
@@ -577,16 +570,16 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
577 list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { 570 list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
578 struct fib_info *fi = fa->fa_info; 571 struct fib_info *fi = fa->fa_info;
579 572
580 if (fa->fa_tos != tos) 573 if (fa->fa_tos != cfg->fc_tos)
581 break; 574 break;
582 575
583 if ((!r->rtm_type || 576 if ((!cfg->fc_type ||
584 fa->fa_type == r->rtm_type) && 577 fa->fa_type == cfg->fc_type) &&
585 (r->rtm_scope == RT_SCOPE_NOWHERE || 578 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
586 fa->fa_scope == r->rtm_scope) && 579 fa->fa_scope == cfg->fc_scope) &&
587 (!r->rtm_protocol || 580 (!cfg->fc_protocol ||
588 fi->fib_protocol == r->rtm_protocol) && 581 fi->fib_protocol == cfg->fc_protocol) &&
589 fib_nh_match(r, n, rta, fi) == 0) { 582 fib_nh_match(cfg, fi) == 0) {
590 fa_to_delete = fa; 583 fa_to_delete = fa;
591 break; 584 break;
592 } 585 }
@@ -596,7 +589,8 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
596 int kill_fn; 589 int kill_fn;
597 590
598 fa = fa_to_delete; 591 fa = fa_to_delete;
599 rtmsg_fib(RTM_DELROUTE, key, fa, z, tb->tb_id, n, req); 592 rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len,
593 tb->tb_id, &cfg->fc_nlinfo);
600 594
601 kill_fn = 0; 595 kill_fn = 0;
602 write_lock_bh(&fib_hash_lock); 596 write_lock_bh(&fib_hash_lock);
@@ -684,7 +678,7 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
684 struct fib_node *f; 678 struct fib_node *f;
685 int i, s_i; 679 int i, s_i;
686 680
687 s_i = cb->args[3]; 681 s_i = cb->args[4];
688 i = 0; 682 i = 0;
689 hlist_for_each_entry(f, node, head, fn_hash) { 683 hlist_for_each_entry(f, node, head, fn_hash) {
690 struct fib_alias *fa; 684 struct fib_alias *fa;
@@ -699,19 +693,19 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
699 tb->tb_id, 693 tb->tb_id,
700 fa->fa_type, 694 fa->fa_type,
701 fa->fa_scope, 695 fa->fa_scope,
702 &f->fn_key, 696 f->fn_key,
703 fz->fz_order, 697 fz->fz_order,
704 fa->fa_tos, 698 fa->fa_tos,
705 fa->fa_info, 699 fa->fa_info,
706 NLM_F_MULTI) < 0) { 700 NLM_F_MULTI) < 0) {
707 cb->args[3] = i; 701 cb->args[4] = i;
708 return -1; 702 return -1;
709 } 703 }
710 next: 704 next:
711 i++; 705 i++;
712 } 706 }
713 } 707 }
714 cb->args[3] = i; 708 cb->args[4] = i;
715 return skb->len; 709 return skb->len;
716} 710}
717 711
@@ -722,21 +716,21 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
722{ 716{
723 int h, s_h; 717 int h, s_h;
724 718
725 s_h = cb->args[2]; 719 s_h = cb->args[3];
726 for (h=0; h < fz->fz_divisor; h++) { 720 for (h=0; h < fz->fz_divisor; h++) {
727 if (h < s_h) continue; 721 if (h < s_h) continue;
728 if (h > s_h) 722 if (h > s_h)
729 memset(&cb->args[3], 0, 723 memset(&cb->args[4], 0,
730 sizeof(cb->args) - 3*sizeof(cb->args[0])); 724 sizeof(cb->args) - 4*sizeof(cb->args[0]));
731 if (fz->fz_hash == NULL || 725 if (fz->fz_hash == NULL ||
732 hlist_empty(&fz->fz_hash[h])) 726 hlist_empty(&fz->fz_hash[h]))
733 continue; 727 continue;
734 if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h])<0) { 728 if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h])<0) {
735 cb->args[2] = h; 729 cb->args[3] = h;
736 return -1; 730 return -1;
737 } 731 }
738 } 732 }
739 cb->args[2] = h; 733 cb->args[3] = h;
740 return skb->len; 734 return skb->len;
741} 735}
742 736
@@ -746,28 +740,28 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin
746 struct fn_zone *fz; 740 struct fn_zone *fz;
747 struct fn_hash *table = (struct fn_hash*)tb->tb_data; 741 struct fn_hash *table = (struct fn_hash*)tb->tb_data;
748 742
749 s_m = cb->args[1]; 743 s_m = cb->args[2];
750 read_lock(&fib_hash_lock); 744 read_lock(&fib_hash_lock);
751 for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) { 745 for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) {
752 if (m < s_m) continue; 746 if (m < s_m) continue;
753 if (m > s_m) 747 if (m > s_m)
754 memset(&cb->args[2], 0, 748 memset(&cb->args[3], 0,
755 sizeof(cb->args) - 2*sizeof(cb->args[0])); 749 sizeof(cb->args) - 3*sizeof(cb->args[0]));
756 if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) { 750 if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
757 cb->args[1] = m; 751 cb->args[2] = m;
758 read_unlock(&fib_hash_lock); 752 read_unlock(&fib_hash_lock);
759 return -1; 753 return -1;
760 } 754 }
761 } 755 }
762 read_unlock(&fib_hash_lock); 756 read_unlock(&fib_hash_lock);
763 cb->args[1] = m; 757 cb->args[2] = m;
764 return skb->len; 758 return skb->len;
765} 759}
766 760
767#ifdef CONFIG_IP_MULTIPLE_TABLES 761#ifdef CONFIG_IP_MULTIPLE_TABLES
768struct fib_table * fib_hash_init(int id) 762struct fib_table * fib_hash_init(u32 id)
769#else 763#else
770struct fib_table * __init fib_hash_init(int id) 764struct fib_table * __init fib_hash_init(u32 id)
771#endif 765#endif
772{ 766{
773 struct fib_table *tb; 767 struct fib_table *tb;
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index ef6609ea0eb7..fd6f7769f8ab 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -23,19 +23,14 @@ extern int fib_semantic_match(struct list_head *head,
23 struct fib_result *res, __u32 zone, __u32 mask, 23 struct fib_result *res, __u32 zone, __u32 mask,
24 int prefixlen); 24 int prefixlen);
25extern void fib_release_info(struct fib_info *); 25extern void fib_release_info(struct fib_info *);
26extern struct fib_info *fib_create_info(const struct rtmsg *r, 26extern struct fib_info *fib_create_info(struct fib_config *cfg);
27 struct kern_rta *rta, 27extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
28 const struct nlmsghdr *,
29 int *err);
30extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *,
31 struct kern_rta *rta, struct fib_info *fi);
32extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 28extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
33 u8 tb_id, u8 type, u8 scope, void *dst, 29 u32 tb_id, u8 type, u8 scope, u32 dst,
34 int dst_len, u8 tos, struct fib_info *fi, 30 int dst_len, u8 tos, struct fib_info *fi,
35 unsigned int); 31 unsigned int);
36extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, 32extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
37 int z, int tb_id, 33 int dst_len, u32 tb_id, struct nl_info *info);
38 struct nlmsghdr *n, struct netlink_skb_parms *req);
39extern struct fib_alias *fib_find_alias(struct list_head *fah, 34extern struct fib_alias *fib_find_alias(struct list_head *fah,
40 u8 tos, u32 prio); 35 u8 tos, u32 prio);
41extern int fib_detect_death(struct fib_info *fi, int order, 36extern int fib_detect_death(struct fib_info *fi, int order,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 79b04718bdfd..52b2adae4f22 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -5,9 +5,8 @@
5 * 5 *
6 * IPv4 Forwarding Information Base: policy rules. 6 * IPv4 Forwarding Information Base: policy rules.
7 * 7 *
8 * Version: $Id: fib_rules.c,v 1.17 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 * Thomas Graf <tgraf@suug.ch>
11 * 10 *
12 * This program is free software; you can redistribute it and/or 11 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License 12 * modify it under the terms of the GNU General Public License
@@ -19,463 +18,350 @@
19 * Marc Boucher : routing by fwmark 18 * Marc Boucher : routing by fwmark
20 */ 19 */
21 20
22#include <asm/uaccess.h>
23#include <asm/system.h>
24#include <linux/bitops.h>
25#include <linux/types.h> 21#include <linux/types.h>
26#include <linux/kernel.h> 22#include <linux/kernel.h>
27#include <linux/sched.h>
28#include <linux/mm.h>
29#include <linux/string.h>
30#include <linux/socket.h>
31#include <linux/sockios.h>
32#include <linux/errno.h>
33#include <linux/in.h>
34#include <linux/inet.h>
35#include <linux/inetdevice.h>
36#include <linux/netdevice.h> 23#include <linux/netdevice.h>
37#include <linux/if_arp.h>
38#include <linux/proc_fs.h>
39#include <linux/skbuff.h>
40#include <linux/netlink.h> 24#include <linux/netlink.h>
25#include <linux/inetdevice.h>
41#include <linux/init.h> 26#include <linux/init.h>
42#include <linux/list.h> 27#include <linux/list.h>
43#include <linux/rcupdate.h> 28#include <linux/rcupdate.h>
44
45#include <net/ip.h> 29#include <net/ip.h>
46#include <net/protocol.h>
47#include <net/route.h> 30#include <net/route.h>
48#include <net/tcp.h> 31#include <net/tcp.h>
49#include <net/sock.h>
50#include <net/ip_fib.h> 32#include <net/ip_fib.h>
33#include <net/fib_rules.h>
51 34
52#define FRprintk(a...) 35static struct fib_rules_ops fib4_rules_ops;
53 36
54struct fib_rule 37struct fib4_rule
55{ 38{
56 struct hlist_node hlist; 39 struct fib_rule common;
57 atomic_t r_clntref; 40 u8 dst_len;
58 u32 r_preference; 41 u8 src_len;
59 unsigned char r_table; 42 u8 tos;
60 unsigned char r_action; 43 u32 src;
61 unsigned char r_dst_len; 44 u32 srcmask;
62 unsigned char r_src_len; 45 u32 dst;
63 u32 r_src; 46 u32 dstmask;
64 u32 r_srcmask;
65 u32 r_dst;
66 u32 r_dstmask;
67 u32 r_srcmap;
68 u8 r_flags;
69 u8 r_tos;
70#ifdef CONFIG_IP_ROUTE_FWMARK 47#ifdef CONFIG_IP_ROUTE_FWMARK
71 u32 r_fwmark; 48 u32 fwmark;
49 u32 fwmask;
72#endif 50#endif
73 int r_ifindex;
74#ifdef CONFIG_NET_CLS_ROUTE 51#ifdef CONFIG_NET_CLS_ROUTE
75 __u32 r_tclassid; 52 u32 tclassid;
76#endif 53#endif
77 char r_ifname[IFNAMSIZ];
78 int r_dead;
79 struct rcu_head rcu;
80}; 54};
81 55
82static struct fib_rule default_rule = { 56static struct fib4_rule default_rule = {
83 .r_clntref = ATOMIC_INIT(2), 57 .common = {
84 .r_preference = 0x7FFF, 58 .refcnt = ATOMIC_INIT(2),
85 .r_table = RT_TABLE_DEFAULT, 59 .pref = 0x7FFF,
86 .r_action = RTN_UNICAST, 60 .table = RT_TABLE_DEFAULT,
61 .action = FR_ACT_TO_TBL,
62 },
87}; 63};
88 64
89static struct fib_rule main_rule = { 65static struct fib4_rule main_rule = {
90 .r_clntref = ATOMIC_INIT(2), 66 .common = {
91 .r_preference = 0x7FFE, 67 .refcnt = ATOMIC_INIT(2),
92 .r_table = RT_TABLE_MAIN, 68 .pref = 0x7FFE,
93 .r_action = RTN_UNICAST, 69 .table = RT_TABLE_MAIN,
70 .action = FR_ACT_TO_TBL,
71 },
94}; 72};
95 73
96static struct fib_rule local_rule = { 74static struct fib4_rule local_rule = {
97 .r_clntref = ATOMIC_INIT(2), 75 .common = {
98 .r_table = RT_TABLE_LOCAL, 76 .refcnt = ATOMIC_INIT(2),
99 .r_action = RTN_UNICAST, 77 .table = RT_TABLE_LOCAL,
78 .action = FR_ACT_TO_TBL,
79 .flags = FIB_RULE_PERMANENT,
80 },
100}; 81};
101 82
102static struct hlist_head fib_rules; 83static LIST_HEAD(fib4_rules);
103 84
104/* writer func called from netlink -- rtnl_sem hold*/ 85#ifdef CONFIG_NET_CLS_ROUTE
105 86u32 fib_rules_tclass(struct fib_result *res)
106static void rtmsg_rule(int, struct fib_rule *);
107
108int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
109{ 87{
110 struct rtattr **rta = arg; 88 return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0;
111 struct rtmsg *rtm = NLMSG_DATA(nlh);
112 struct fib_rule *r;
113 struct hlist_node *node;
114 int err = -ESRCH;
115
116 hlist_for_each_entry(r, node, &fib_rules, hlist) {
117 if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 4) == 0) &&
118 rtm->rtm_src_len == r->r_src_len &&
119 rtm->rtm_dst_len == r->r_dst_len &&
120 (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 4) == 0) &&
121 rtm->rtm_tos == r->r_tos &&
122#ifdef CONFIG_IP_ROUTE_FWMARK
123 (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) &&
124#endif
125 (!rtm->rtm_type || rtm->rtm_type == r->r_action) &&
126 (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) &&
127 (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) &&
128 (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) {
129 err = -EPERM;
130 if (r == &local_rule)
131 break;
132
133 hlist_del_rcu(&r->hlist);
134 r->r_dead = 1;
135 rtmsg_rule(RTM_DELRULE, r);
136 fib_rule_put(r);
137 err = 0;
138 break;
139 }
140 }
141 return err;
142} 89}
90#endif
143 91
144/* Allocate new unique table id */ 92int fib_lookup(struct flowi *flp, struct fib_result *res)
145
146static struct fib_table *fib_empty_table(void)
147{ 93{
148 int id; 94 struct fib_lookup_arg arg = {
95 .result = res,
96 };
97 int err;
149 98
150 for (id = 1; id <= RT_TABLE_MAX; id++) 99 err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg);
151 if (fib_tables[id] == NULL) 100 res->r = arg.rule;
152 return __fib_new_table(id);
153 return NULL;
154}
155 101
156static inline void fib_rule_put_rcu(struct rcu_head *head) 102 return err;
157{
158 struct fib_rule *r = container_of(head, struct fib_rule, rcu);
159 kfree(r);
160} 103}
161 104
162void fib_rule_put(struct fib_rule *r) 105static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
106 int flags, struct fib_lookup_arg *arg)
163{ 107{
164 if (atomic_dec_and_test(&r->r_clntref)) { 108 int err = -EAGAIN;
165 if (r->r_dead) 109 struct fib_table *tbl;
166 call_rcu(&r->rcu, fib_rule_put_rcu); 110
167 else 111 switch (rule->action) {
168 printk("Freeing alive rule %p\n", r); 112 case FR_ACT_TO_TBL:
113 break;
114
115 case FR_ACT_UNREACHABLE:
116 err = -ENETUNREACH;
117 goto errout;
118
119 case FR_ACT_PROHIBIT:
120 err = -EACCES;
121 goto errout;
122
123 case FR_ACT_BLACKHOLE:
124 default:
125 err = -EINVAL;
126 goto errout;
169 } 127 }
128
129 if ((tbl = fib_get_table(rule->table)) == NULL)
130 goto errout;
131
132 err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result);
133 if (err > 0)
134 err = -EAGAIN;
135errout:
136 return err;
170} 137}
171 138
172/* writer func called from netlink -- rtnl_sem hold*/
173 139
174int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 140void fib_select_default(const struct flowi *flp, struct fib_result *res)
175{ 141{
176 struct rtattr **rta = arg; 142 if (res->r && res->r->action == FR_ACT_TO_TBL &&
177 struct rtmsg *rtm = NLMSG_DATA(nlh); 143 FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
178 struct fib_rule *r, *new_r, *last = NULL; 144 struct fib_table *tb;
179 struct hlist_node *node = NULL; 145 if ((tb = fib_get_table(res->r->table)) != NULL)
180 unsigned char table_id; 146 tb->tb_select_default(tb, flp, res);
181
182 if (rtm->rtm_src_len > 32 || rtm->rtm_dst_len > 32 ||
183 (rtm->rtm_tos & ~IPTOS_TOS_MASK))
184 return -EINVAL;
185
186 if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ)
187 return -EINVAL;
188
189 table_id = rtm->rtm_table;
190 if (table_id == RT_TABLE_UNSPEC) {
191 struct fib_table *table;
192 if (rtm->rtm_type == RTN_UNICAST) {
193 if ((table = fib_empty_table()) == NULL)
194 return -ENOBUFS;
195 table_id = table->tb_id;
196 }
197 } 147 }
148}
198 149
199 new_r = kzalloc(sizeof(*new_r), GFP_KERNEL); 150static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
200 if (!new_r) 151{
201 return -ENOMEM; 152 struct fib4_rule *r = (struct fib4_rule *) rule;
202 153 u32 daddr = fl->fl4_dst;
203 if (rta[RTA_SRC-1]) 154 u32 saddr = fl->fl4_src;
204 memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4);
205 if (rta[RTA_DST-1])
206 memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 4);
207 if (rta[RTA_GATEWAY-1])
208 memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 4);
209 new_r->r_src_len = rtm->rtm_src_len;
210 new_r->r_dst_len = rtm->rtm_dst_len;
211 new_r->r_srcmask = inet_make_mask(rtm->rtm_src_len);
212 new_r->r_dstmask = inet_make_mask(rtm->rtm_dst_len);
213 new_r->r_tos = rtm->rtm_tos;
214#ifdef CONFIG_IP_ROUTE_FWMARK
215 if (rta[RTA_PROTOINFO-1])
216 memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4);
217#endif
218 new_r->r_action = rtm->rtm_type;
219 new_r->r_flags = rtm->rtm_flags;
220 if (rta[RTA_PRIORITY-1])
221 memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
222 new_r->r_table = table_id;
223 if (rta[RTA_IIF-1]) {
224 struct net_device *dev;
225 rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ);
226 new_r->r_ifindex = -1;
227 dev = __dev_get_by_name(new_r->r_ifname);
228 if (dev)
229 new_r->r_ifindex = dev->ifindex;
230 }
231#ifdef CONFIG_NET_CLS_ROUTE
232 if (rta[RTA_FLOW-1])
233 memcpy(&new_r->r_tclassid, RTA_DATA(rta[RTA_FLOW-1]), 4);
234#endif
235 r = container_of(fib_rules.first, struct fib_rule, hlist);
236 155
237 if (!new_r->r_preference) { 156 if (((saddr ^ r->src) & r->srcmask) ||
238 if (r && r->hlist.next != NULL) { 157 ((daddr ^ r->dst) & r->dstmask))
239 r = container_of(r->hlist.next, struct fib_rule, hlist); 158 return 0;
240 if (r->r_preference)
241 new_r->r_preference = r->r_preference - 1;
242 }
243 }
244 159
245 hlist_for_each_entry(r, node, &fib_rules, hlist) { 160 if (r->tos && (r->tos != fl->fl4_tos))
246 if (r->r_preference > new_r->r_preference) 161 return 0;
247 break;
248 last = r;
249 }
250 atomic_inc(&new_r->r_clntref);
251 162
252 if (last) 163#ifdef CONFIG_IP_ROUTE_FWMARK
253 hlist_add_after_rcu(&last->hlist, &new_r->hlist); 164 if ((r->fwmark ^ fl->fl4_fwmark) & r->fwmask)
254 else 165 return 0;
255 hlist_add_before_rcu(&new_r->hlist, &r->hlist); 166#endif
256 167
257 rtmsg_rule(RTM_NEWRULE, new_r); 168 return 1;
258 return 0;
259} 169}
260 170
261#ifdef CONFIG_NET_CLS_ROUTE 171static struct fib_table *fib_empty_table(void)
262u32 fib_rules_tclass(struct fib_result *res)
263{ 172{
264 if (res->r) 173 u32 id;
265 return res->r->r_tclassid; 174
266 return 0; 175 for (id = 1; id <= RT_TABLE_MAX; id++)
176 if (fib_get_table(id) == NULL)
177 return fib_new_table(id);
178 return NULL;
267} 179}
268#endif
269 180
270/* callers should hold rtnl semaphore */ 181static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = {
182 [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
183 [FRA_PRIORITY] = { .type = NLA_U32 },
184 [FRA_SRC] = { .type = NLA_U32 },
185 [FRA_DST] = { .type = NLA_U32 },
186 [FRA_FWMARK] = { .type = NLA_U32 },
187 [FRA_FWMASK] = { .type = NLA_U32 },
188 [FRA_FLOW] = { .type = NLA_U32 },
189 [FRA_TABLE] = { .type = NLA_U32 },
190};
271 191
272static void fib_rules_detach(struct net_device *dev) 192static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
193 struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
194 struct nlattr **tb)
273{ 195{
274 struct hlist_node *node; 196 int err = -EINVAL;
275 struct fib_rule *r; 197 struct fib4_rule *rule4 = (struct fib4_rule *) rule;
198
199 if (frh->src_len > 32 || frh->dst_len > 32 ||
200 (frh->tos & ~IPTOS_TOS_MASK))
201 goto errout;
202
203 if (rule->table == RT_TABLE_UNSPEC) {
204 if (rule->action == FR_ACT_TO_TBL) {
205 struct fib_table *table;
276 206
277 hlist_for_each_entry(r, node, &fib_rules, hlist) { 207 table = fib_empty_table();
278 if (r->r_ifindex == dev->ifindex) 208 if (table == NULL) {
279 r->r_ifindex = -1; 209 err = -ENOBUFS;
210 goto errout;
211 }
280 212
213 rule->table = table->tb_id;
214 }
281 } 215 }
282}
283 216
284/* callers should hold rtnl semaphore */ 217 if (tb[FRA_SRC])
218 rule4->src = nla_get_u32(tb[FRA_SRC]);
285 219
286static void fib_rules_attach(struct net_device *dev) 220 if (tb[FRA_DST])
287{ 221 rule4->dst = nla_get_u32(tb[FRA_DST]);
288 struct hlist_node *node;
289 struct fib_rule *r;
290 222
291 hlist_for_each_entry(r, node, &fib_rules, hlist) { 223#ifdef CONFIG_IP_ROUTE_FWMARK
292 if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) 224 if (tb[FRA_FWMARK]) {
293 r->r_ifindex = dev->ifindex; 225 rule4->fwmark = nla_get_u32(tb[FRA_FWMARK]);
226 if (rule4->fwmark)
227 /* compatibility: if the mark value is non-zero all bits
228 * are compared unless a mask is explicitly specified.
229 */
230 rule4->fwmask = 0xFFFFFFFF;
294 } 231 }
232
233 if (tb[FRA_FWMASK])
234 rule4->fwmask = nla_get_u32(tb[FRA_FWMASK]);
235#endif
236
237#ifdef CONFIG_NET_CLS_ROUTE
238 if (tb[FRA_FLOW])
239 rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
240#endif
241
242 rule4->src_len = frh->src_len;
243 rule4->srcmask = inet_make_mask(rule4->src_len);
244 rule4->dst_len = frh->dst_len;
245 rule4->dstmask = inet_make_mask(rule4->dst_len);
246 rule4->tos = frh->tos;
247
248 err = 0;
249errout:
250 return err;
295} 251}
296 252
297int fib_lookup(const struct flowi *flp, struct fib_result *res) 253static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
254 struct nlattr **tb)
298{ 255{
299 int err; 256 struct fib4_rule *rule4 = (struct fib4_rule *) rule;
300 struct fib_rule *r, *policy;
301 struct fib_table *tb;
302 struct hlist_node *node;
303 257
304 u32 daddr = flp->fl4_dst; 258 if (frh->src_len && (rule4->src_len != frh->src_len))
305 u32 saddr = flp->fl4_src; 259 return 0;
306 260
307FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ", 261 if (frh->dst_len && (rule4->dst_len != frh->dst_len))
308 NIPQUAD(flp->fl4_dst), NIPQUAD(flp->fl4_src)); 262 return 0;
309 263
310 rcu_read_lock(); 264 if (frh->tos && (rule4->tos != frh->tos))
265 return 0;
311 266
312 hlist_for_each_entry_rcu(r, node, &fib_rules, hlist) {
313 if (((saddr^r->r_src) & r->r_srcmask) ||
314 ((daddr^r->r_dst) & r->r_dstmask) ||
315 (r->r_tos && r->r_tos != flp->fl4_tos) ||
316#ifdef CONFIG_IP_ROUTE_FWMARK 267#ifdef CONFIG_IP_ROUTE_FWMARK
317 (r->r_fwmark && r->r_fwmark != flp->fl4_fwmark) || 268 if (tb[FRA_FWMARK] && (rule4->fwmark != nla_get_u32(tb[FRA_FWMARK])))
269 return 0;
270
271 if (tb[FRA_FWMASK] && (rule4->fwmask != nla_get_u32(tb[FRA_FWMASK])))
272 return 0;
318#endif 273#endif
319 (r->r_ifindex && r->r_ifindex != flp->iif))
320 continue;
321
322FRprintk("tb %d r %d ", r->r_table, r->r_action);
323 switch (r->r_action) {
324 case RTN_UNICAST:
325 policy = r;
326 break;
327 case RTN_UNREACHABLE:
328 rcu_read_unlock();
329 return -ENETUNREACH;
330 default:
331 case RTN_BLACKHOLE:
332 rcu_read_unlock();
333 return -EINVAL;
334 case RTN_PROHIBIT:
335 rcu_read_unlock();
336 return -EACCES;
337 }
338 274
339 if ((tb = fib_get_table(r->r_table)) == NULL) 275#ifdef CONFIG_NET_CLS_ROUTE
340 continue; 276 if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
341 err = tb->tb_lookup(tb, flp, res); 277 return 0;
342 if (err == 0) { 278#endif
343 res->r = policy;
344 if (policy)
345 atomic_inc(&policy->r_clntref);
346 rcu_read_unlock();
347 return 0;
348 }
349 if (err < 0 && err != -EAGAIN) {
350 rcu_read_unlock();
351 return err;
352 }
353 }
354FRprintk("FAILURE\n");
355 rcu_read_unlock();
356 return -ENETUNREACH;
357}
358 279
359void fib_select_default(const struct flowi *flp, struct fib_result *res) 280 if (tb[FRA_SRC] && (rule4->src != nla_get_u32(tb[FRA_SRC])))
360{ 281 return 0;
361 if (res->r && res->r->r_action == RTN_UNICAST &&
362 FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
363 struct fib_table *tb;
364 if ((tb = fib_get_table(res->r->r_table)) != NULL)
365 tb->tb_select_default(tb, flp, res);
366 }
367}
368 282
369static int fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr) 283 if (tb[FRA_DST] && (rule4->dst != nla_get_u32(tb[FRA_DST])))
370{ 284 return 0;
371 struct net_device *dev = ptr;
372 285
373 if (event == NETDEV_UNREGISTER) 286 return 1;
374 fib_rules_detach(dev);
375 else if (event == NETDEV_REGISTER)
376 fib_rules_attach(dev);
377 return NOTIFY_DONE;
378} 287}
379 288
289static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
290 struct nlmsghdr *nlh, struct fib_rule_hdr *frh)
291{
292 struct fib4_rule *rule4 = (struct fib4_rule *) rule;
380 293
381static struct notifier_block fib_rules_notifier = { 294 frh->family = AF_INET;
382 .notifier_call =fib_rules_event, 295 frh->dst_len = rule4->dst_len;
383}; 296 frh->src_len = rule4->src_len;
297 frh->tos = rule4->tos;
384 298
385static __inline__ int inet_fill_rule(struct sk_buff *skb,
386 struct fib_rule *r,
387 u32 pid, u32 seq, int event,
388 unsigned int flags)
389{
390 struct rtmsg *rtm;
391 struct nlmsghdr *nlh;
392 unsigned char *b = skb->tail;
393
394 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
395 rtm = NLMSG_DATA(nlh);
396 rtm->rtm_family = AF_INET;
397 rtm->rtm_dst_len = r->r_dst_len;
398 rtm->rtm_src_len = r->r_src_len;
399 rtm->rtm_tos = r->r_tos;
400#ifdef CONFIG_IP_ROUTE_FWMARK 299#ifdef CONFIG_IP_ROUTE_FWMARK
401 if (r->r_fwmark) 300 if (rule4->fwmark)
402 RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark); 301 NLA_PUT_U32(skb, FRA_FWMARK, rule4->fwmark);
302
303 if (rule4->fwmask || rule4->fwmark)
304 NLA_PUT_U32(skb, FRA_FWMASK, rule4->fwmask);
403#endif 305#endif
404 rtm->rtm_table = r->r_table; 306
405 rtm->rtm_protocol = 0; 307 if (rule4->dst_len)
406 rtm->rtm_scope = 0; 308 NLA_PUT_U32(skb, FRA_DST, rule4->dst);
407 rtm->rtm_type = r->r_action; 309
408 rtm->rtm_flags = r->r_flags; 310 if (rule4->src_len)
409 311 NLA_PUT_U32(skb, FRA_SRC, rule4->src);
410 if (r->r_dst_len) 312
411 RTA_PUT(skb, RTA_DST, 4, &r->r_dst);
412 if (r->r_src_len)
413 RTA_PUT(skb, RTA_SRC, 4, &r->r_src);
414 if (r->r_ifname[0])
415 RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname);
416 if (r->r_preference)
417 RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference);
418 if (r->r_srcmap)
419 RTA_PUT(skb, RTA_GATEWAY, 4, &r->r_srcmap);
420#ifdef CONFIG_NET_CLS_ROUTE 313#ifdef CONFIG_NET_CLS_ROUTE
421 if (r->r_tclassid) 314 if (rule4->tclassid)
422 RTA_PUT(skb, RTA_FLOW, 4, &r->r_tclassid); 315 NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid);
423#endif 316#endif
424 nlh->nlmsg_len = skb->tail - b; 317 return 0;
425 return skb->len;
426 318
427nlmsg_failure: 319nla_put_failure:
428rtattr_failure: 320 return -ENOBUFS;
429 skb_trim(skb, b - skb->data);
430 return -1;
431} 321}
432 322
433/* callers should hold rtnl semaphore */ 323int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
434
435static void rtmsg_rule(int event, struct fib_rule *r)
436{ 324{
437 int size = NLMSG_SPACE(sizeof(struct rtmsg) + 128); 325 return fib_rules_dump(skb, cb, AF_INET);
438 struct sk_buff *skb = alloc_skb(size, GFP_KERNEL);
439
440 if (!skb)
441 netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, ENOBUFS);
442 else if (inet_fill_rule(skb, r, 0, 0, event, 0) < 0) {
443 kfree_skb(skb);
444 netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, EINVAL);
445 } else {
446 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_RULE, GFP_KERNEL);
447 }
448} 326}
449 327
450int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) 328static u32 fib4_rule_default_pref(void)
451{ 329{
452 int idx = 0; 330 struct list_head *pos;
453 int s_idx = cb->args[0]; 331 struct fib_rule *rule;
454 struct fib_rule *r; 332
455 struct hlist_node *node; 333 if (!list_empty(&fib4_rules)) {
456 334 pos = fib4_rules.next;
457 rcu_read_lock(); 335 if (pos->next != &fib4_rules) {
458 hlist_for_each_entry(r, node, &fib_rules, hlist) { 336 rule = list_entry(pos->next, struct fib_rule, list);
459 if (idx < s_idx) 337 if (rule->pref)
460 goto next; 338 return rule->pref - 1;
461 if (inet_fill_rule(skb, r, NETLINK_CB(cb->skb).pid, 339 }
462 cb->nlh->nlmsg_seq,
463 RTM_NEWRULE, NLM_F_MULTI) < 0)
464 break;
465next:
466 idx++;
467 } 340 }
468 rcu_read_unlock();
469 cb->args[0] = idx;
470 341
471 return skb->len; 342 return 0;
472} 343}
473 344
474void __init fib_rules_init(void) 345static struct fib_rules_ops fib4_rules_ops = {
346 .family = AF_INET,
347 .rule_size = sizeof(struct fib4_rule),
348 .action = fib4_rule_action,
349 .match = fib4_rule_match,
350 .configure = fib4_rule_configure,
351 .compare = fib4_rule_compare,
352 .fill = fib4_rule_fill,
353 .default_pref = fib4_rule_default_pref,
354 .nlgroup = RTNLGRP_IPV4_RULE,
355 .policy = fib4_rule_policy,
356 .rules_list = &fib4_rules,
357 .owner = THIS_MODULE,
358};
359
360void __init fib4_rules_init(void)
475{ 361{
476 INIT_HLIST_HEAD(&fib_rules); 362 list_add_tail(&local_rule.common.list, &fib4_rules);
477 hlist_add_head(&local_rule.hlist, &fib_rules); 363 list_add_tail(&main_rule.common.list, &fib4_rules);
478 hlist_add_after(&local_rule.hlist, &main_rule.hlist); 364 list_add_tail(&default_rule.common.list, &fib4_rules);
479 hlist_add_after(&main_rule.hlist, &default_rule.hlist); 365
480 register_netdevice_notifier(&fib_rules_notifier); 366 fib_rules_register(&fib4_rules_ops);
481} 367}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 51738000f3dc..2ead09543f68 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -33,7 +33,6 @@
33#include <linux/if_arp.h> 33#include <linux/if_arp.h>
34#include <linux/proc_fs.h> 34#include <linux/proc_fs.h>
35#include <linux/skbuff.h> 35#include <linux/skbuff.h>
36#include <linux/netlink.h>
37#include <linux/init.h> 36#include <linux/init.h>
38 37
39#include <net/arp.h> 38#include <net/arp.h>
@@ -44,12 +43,14 @@
44#include <net/sock.h> 43#include <net/sock.h>
45#include <net/ip_fib.h> 44#include <net/ip_fib.h>
46#include <net/ip_mp_alg.h> 45#include <net/ip_mp_alg.h>
46#include <net/netlink.h>
47#include <net/nexthop.h>
47 48
48#include "fib_lookup.h" 49#include "fib_lookup.h"
49 50
50#define FSprintk(a...) 51#define FSprintk(a...)
51 52
52static DEFINE_RWLOCK(fib_info_lock); 53static DEFINE_SPINLOCK(fib_info_lock);
53static struct hlist_head *fib_info_hash; 54static struct hlist_head *fib_info_hash;
54static struct hlist_head *fib_info_laddrhash; 55static struct hlist_head *fib_info_laddrhash;
55static unsigned int fib_hash_size; 56static unsigned int fib_hash_size;
@@ -159,7 +160,7 @@ void free_fib_info(struct fib_info *fi)
159 160
160void fib_release_info(struct fib_info *fi) 161void fib_release_info(struct fib_info *fi)
161{ 162{
162 write_lock_bh(&fib_info_lock); 163 spin_lock_bh(&fib_info_lock);
163 if (fi && --fi->fib_treeref == 0) { 164 if (fi && --fi->fib_treeref == 0) {
164 hlist_del(&fi->fib_hash); 165 hlist_del(&fi->fib_hash);
165 if (fi->fib_prefsrc) 166 if (fi->fib_prefsrc)
@@ -172,7 +173,7 @@ void fib_release_info(struct fib_info *fi)
172 fi->fib_dead = 1; 173 fi->fib_dead = 1;
173 fib_info_put(fi); 174 fib_info_put(fi);
174 } 175 }
175 write_unlock_bh(&fib_info_lock); 176 spin_unlock_bh(&fib_info_lock);
176} 177}
177 178
178static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) 179static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
@@ -254,7 +255,7 @@ int ip_fib_check_default(u32 gw, struct net_device *dev)
254 struct fib_nh *nh; 255 struct fib_nh *nh;
255 unsigned int hash; 256 unsigned int hash;
256 257
257 read_lock(&fib_info_lock); 258 spin_lock(&fib_info_lock);
258 259
259 hash = fib_devindex_hashfn(dev->ifindex); 260 hash = fib_devindex_hashfn(dev->ifindex);
260 head = &fib_info_devhash[hash]; 261 head = &fib_info_devhash[hash];
@@ -262,41 +263,41 @@ int ip_fib_check_default(u32 gw, struct net_device *dev)
262 if (nh->nh_dev == dev && 263 if (nh->nh_dev == dev &&
263 nh->nh_gw == gw && 264 nh->nh_gw == gw &&
264 !(nh->nh_flags&RTNH_F_DEAD)) { 265 !(nh->nh_flags&RTNH_F_DEAD)) {
265 read_unlock(&fib_info_lock); 266 spin_unlock(&fib_info_lock);
266 return 0; 267 return 0;
267 } 268 }
268 } 269 }
269 270
270 read_unlock(&fib_info_lock); 271 spin_unlock(&fib_info_lock);
271 272
272 return -1; 273 return -1;
273} 274}
274 275
275void rtmsg_fib(int event, u32 key, struct fib_alias *fa, 276void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
276 int z, int tb_id, 277 int dst_len, u32 tb_id, struct nl_info *info)
277 struct nlmsghdr *n, struct netlink_skb_parms *req)
278{ 278{
279 struct sk_buff *skb; 279 struct sk_buff *skb;
280 u32 pid = req ? req->pid : n->nlmsg_pid; 280 int payload = sizeof(struct rtmsg) + 256;
281 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); 281 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
282 282 int err = -ENOBUFS;
283 skb = alloc_skb(size, GFP_KERNEL); 283
284 if (!skb) 284 skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL);
285 return; 285 if (skb == NULL)
286 286 goto errout;
287 if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id, 287
288 fa->fa_type, fa->fa_scope, &key, z, 288 err = fib_dump_info(skb, info->pid, seq, event, tb_id,
289 fa->fa_tos, 289 fa->fa_type, fa->fa_scope, key, dst_len,
290 fa->fa_info, 0) < 0) { 290 fa->fa_tos, fa->fa_info, 0);
291 if (err < 0) {
291 kfree_skb(skb); 292 kfree_skb(skb);
292 return; 293 goto errout;
293 } 294 }
294 NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE; 295
295 if (n->nlmsg_flags&NLM_F_ECHO) 296 err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE,
296 atomic_inc(&skb->users); 297 info->nlh, GFP_KERNEL);
297 netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL); 298errout:
298 if (n->nlmsg_flags&NLM_F_ECHO) 299 if (err < 0)
299 netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); 300 rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err);
300} 301}
301 302
302/* Return the first fib alias matching TOS with 303/* Return the first fib alias matching TOS with
@@ -342,102 +343,100 @@ int fib_detect_death(struct fib_info *fi, int order,
342 343
343#ifdef CONFIG_IP_ROUTE_MULTIPATH 344#ifdef CONFIG_IP_ROUTE_MULTIPATH
344 345
345static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type) 346static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
346{
347 while (RTA_OK(attr,attrlen)) {
348 if (attr->rta_type == type)
349 return *(u32*)RTA_DATA(attr);
350 attr = RTA_NEXT(attr, attrlen);
351 }
352 return 0;
353}
354
355static int
356fib_count_nexthops(struct rtattr *rta)
357{ 347{
358 int nhs = 0; 348 int nhs = 0;
359 struct rtnexthop *nhp = RTA_DATA(rta);
360 int nhlen = RTA_PAYLOAD(rta);
361 349
362 while (nhlen >= (int)sizeof(struct rtnexthop)) { 350 while (rtnh_ok(rtnh, remaining)) {
363 if ((nhlen -= nhp->rtnh_len) < 0)
364 return 0;
365 nhs++; 351 nhs++;
366 nhp = RTNH_NEXT(nhp); 352 rtnh = rtnh_next(rtnh, &remaining);
367 }; 353 }
368 return nhs; 354
355 /* leftover implies invalid nexthop configuration, discard it */
356 return remaining > 0 ? 0 : nhs;
369} 357}
370 358
371static int 359static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
372fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r) 360 int remaining, struct fib_config *cfg)
373{ 361{
374 struct rtnexthop *nhp = RTA_DATA(rta);
375 int nhlen = RTA_PAYLOAD(rta);
376
377 change_nexthops(fi) { 362 change_nexthops(fi) {
378 int attrlen = nhlen - sizeof(struct rtnexthop); 363 int attrlen;
379 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) 364
365 if (!rtnh_ok(rtnh, remaining))
380 return -EINVAL; 366 return -EINVAL;
381 nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags; 367
382 nh->nh_oif = nhp->rtnh_ifindex; 368 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
383 nh->nh_weight = nhp->rtnh_hops + 1; 369 nh->nh_oif = rtnh->rtnh_ifindex;
384 if (attrlen) { 370 nh->nh_weight = rtnh->rtnh_hops + 1;
385 nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); 371
372 attrlen = rtnh_attrlen(rtnh);
373 if (attrlen > 0) {
374 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
375
376 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
377 nh->nh_gw = nla ? nla_get_u32(nla) : 0;
386#ifdef CONFIG_NET_CLS_ROUTE 378#ifdef CONFIG_NET_CLS_ROUTE
387 nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); 379 nla = nla_find(attrs, attrlen, RTA_FLOW);
380 nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
388#endif 381#endif
389 } 382 }
390 nhp = RTNH_NEXT(nhp); 383
384 rtnh = rtnh_next(rtnh, &remaining);
391 } endfor_nexthops(fi); 385 } endfor_nexthops(fi);
386
392 return 0; 387 return 0;
393} 388}
394 389
395#endif 390#endif
396 391
397int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta, 392int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
398 struct fib_info *fi)
399{ 393{
400#ifdef CONFIG_IP_ROUTE_MULTIPATH 394#ifdef CONFIG_IP_ROUTE_MULTIPATH
401 struct rtnexthop *nhp; 395 struct rtnexthop *rtnh;
402 int nhlen; 396 int remaining;
403#endif 397#endif
404 398
405 if (rta->rta_priority && 399 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
406 *rta->rta_priority != fi->fib_priority)
407 return 1; 400 return 1;
408 401
409 if (rta->rta_oif || rta->rta_gw) { 402 if (cfg->fc_oif || cfg->fc_gw) {
410 if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) && 403 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
411 (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0)) 404 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
412 return 0; 405 return 0;
413 return 1; 406 return 1;
414 } 407 }
415 408
416#ifdef CONFIG_IP_ROUTE_MULTIPATH 409#ifdef CONFIG_IP_ROUTE_MULTIPATH
417 if (rta->rta_mp == NULL) 410 if (cfg->fc_mp == NULL)
418 return 0; 411 return 0;
419 nhp = RTA_DATA(rta->rta_mp); 412
420 nhlen = RTA_PAYLOAD(rta->rta_mp); 413 rtnh = cfg->fc_mp;
414 remaining = cfg->fc_mp_len;
421 415
422 for_nexthops(fi) { 416 for_nexthops(fi) {
423 int attrlen = nhlen - sizeof(struct rtnexthop); 417 int attrlen;
424 u32 gw;
425 418
426 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) 419 if (!rtnh_ok(rtnh, remaining))
427 return -EINVAL; 420 return -EINVAL;
428 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif) 421
422 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
429 return 1; 423 return 1;
430 if (attrlen) { 424
431 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); 425 attrlen = rtnh_attrlen(rtnh);
432 if (gw && gw != nh->nh_gw) 426 if (attrlen < 0) {
427 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
428
429 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
430 if (nla && nla_get_u32(nla) != nh->nh_gw)
433 return 1; 431 return 1;
434#ifdef CONFIG_NET_CLS_ROUTE 432#ifdef CONFIG_NET_CLS_ROUTE
435 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); 433 nla = nla_find(attrs, attrlen, RTA_FLOW);
436 if (gw && gw != nh->nh_tclassid) 434 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
437 return 1; 435 return 1;
438#endif 436#endif
439 } 437 }
440 nhp = RTNH_NEXT(nhp); 438
439 rtnh = rtnh_next(rtnh, &remaining);
441 } endfor_nexthops(fi); 440 } endfor_nexthops(fi);
442#endif 441#endif
443 return 0; 442 return 0;
@@ -488,7 +487,8 @@ int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
488 |-> {local prefix} (terminal node) 487 |-> {local prefix} (terminal node)
489 */ 488 */
490 489
491static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh) 490static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
491 struct fib_nh *nh)
492{ 492{
493 int err; 493 int err;
494 494
@@ -502,7 +502,7 @@ static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_n
502 if (nh->nh_flags&RTNH_F_ONLINK) { 502 if (nh->nh_flags&RTNH_F_ONLINK) {
503 struct net_device *dev; 503 struct net_device *dev;
504 504
505 if (r->rtm_scope >= RT_SCOPE_LINK) 505 if (cfg->fc_scope >= RT_SCOPE_LINK)
506 return -EINVAL; 506 return -EINVAL;
507 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) 507 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
508 return -EINVAL; 508 return -EINVAL;
@@ -516,10 +516,15 @@ static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_n
516 return 0; 516 return 0;
517 } 517 }
518 { 518 {
519 struct flowi fl = { .nl_u = { .ip4_u = 519 struct flowi fl = {
520 { .daddr = nh->nh_gw, 520 .nl_u = {
521 .scope = r->rtm_scope + 1 } }, 521 .ip4_u = {
522 .oif = nh->nh_oif }; 522 .daddr = nh->nh_gw,
523 .scope = cfg->fc_scope + 1,
524 },
525 },
526 .oif = nh->nh_oif,
527 };
523 528
524 /* It is not necessary, but requires a bit of thinking */ 529 /* It is not necessary, but requires a bit of thinking */
525 if (fl.fl4_scope < RT_SCOPE_LINK) 530 if (fl.fl4_scope < RT_SCOPE_LINK)
@@ -598,7 +603,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
598 unsigned int old_size = fib_hash_size; 603 unsigned int old_size = fib_hash_size;
599 unsigned int i, bytes; 604 unsigned int i, bytes;
600 605
601 write_lock_bh(&fib_info_lock); 606 spin_lock_bh(&fib_info_lock);
602 old_info_hash = fib_info_hash; 607 old_info_hash = fib_info_hash;
603 old_laddrhash = fib_info_laddrhash; 608 old_laddrhash = fib_info_laddrhash;
604 fib_hash_size = new_size; 609 fib_hash_size = new_size;
@@ -639,46 +644,35 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
639 } 644 }
640 fib_info_laddrhash = new_laddrhash; 645 fib_info_laddrhash = new_laddrhash;
641 646
642 write_unlock_bh(&fib_info_lock); 647 spin_unlock_bh(&fib_info_lock);
643 648
644 bytes = old_size * sizeof(struct hlist_head *); 649 bytes = old_size * sizeof(struct hlist_head *);
645 fib_hash_free(old_info_hash, bytes); 650 fib_hash_free(old_info_hash, bytes);
646 fib_hash_free(old_laddrhash, bytes); 651 fib_hash_free(old_laddrhash, bytes);
647} 652}
648 653
649struct fib_info * 654struct fib_info *fib_create_info(struct fib_config *cfg)
650fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
651 const struct nlmsghdr *nlh, int *errp)
652{ 655{
653 int err; 656 int err;
654 struct fib_info *fi = NULL; 657 struct fib_info *fi = NULL;
655 struct fib_info *ofi; 658 struct fib_info *ofi;
656#ifdef CONFIG_IP_ROUTE_MULTIPATH
657 int nhs = 1; 659 int nhs = 1;
658#else
659 const int nhs = 1;
660#endif
661#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
662 u32 mp_alg = IP_MP_ALG_NONE;
663#endif
664 660
665 /* Fast check to catch the most weird cases */ 661 /* Fast check to catch the most weird cases */
666 if (fib_props[r->rtm_type].scope > r->rtm_scope) 662 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
667 goto err_inval; 663 goto err_inval;
668 664
669#ifdef CONFIG_IP_ROUTE_MULTIPATH 665#ifdef CONFIG_IP_ROUTE_MULTIPATH
670 if (rta->rta_mp) { 666 if (cfg->fc_mp) {
671 nhs = fib_count_nexthops(rta->rta_mp); 667 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
672 if (nhs == 0) 668 if (nhs == 0)
673 goto err_inval; 669 goto err_inval;
674 } 670 }
675#endif 671#endif
676#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 672#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
677 if (rta->rta_mp_alg) { 673 if (cfg->fc_mp_alg) {
678 mp_alg = *rta->rta_mp_alg; 674 if (cfg->fc_mp_alg < IP_MP_ALG_NONE ||
679 675 cfg->fc_mp_alg > IP_MP_ALG_MAX)
680 if (mp_alg < IP_MP_ALG_NONE ||
681 mp_alg > IP_MP_ALG_MAX)
682 goto err_inval; 676 goto err_inval;
683 } 677 }
684#endif 678#endif
@@ -714,43 +708,42 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
714 goto failure; 708 goto failure;
715 fib_info_cnt++; 709 fib_info_cnt++;
716 710
717 fi->fib_protocol = r->rtm_protocol; 711 fi->fib_protocol = cfg->fc_protocol;
712 fi->fib_flags = cfg->fc_flags;
713 fi->fib_priority = cfg->fc_priority;
714 fi->fib_prefsrc = cfg->fc_prefsrc;
718 715
719 fi->fib_nhs = nhs; 716 fi->fib_nhs = nhs;
720 change_nexthops(fi) { 717 change_nexthops(fi) {
721 nh->nh_parent = fi; 718 nh->nh_parent = fi;
722 } endfor_nexthops(fi) 719 } endfor_nexthops(fi)
723 720
724 fi->fib_flags = r->rtm_flags; 721 if (cfg->fc_mx) {
725 if (rta->rta_priority) 722 struct nlattr *nla;
726 fi->fib_priority = *rta->rta_priority; 723 int remaining;
727 if (rta->rta_mx) { 724
728 int attrlen = RTA_PAYLOAD(rta->rta_mx); 725 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
729 struct rtattr *attr = RTA_DATA(rta->rta_mx); 726 int type = nla->nla_type;
730 727
731 while (RTA_OK(attr, attrlen)) { 728 if (type) {
732 unsigned flavor = attr->rta_type; 729 if (type > RTAX_MAX)
733 if (flavor) {
734 if (flavor > RTAX_MAX)
735 goto err_inval; 730 goto err_inval;
736 fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr); 731 fi->fib_metrics[type - 1] = nla_get_u32(nla);
737 } 732 }
738 attr = RTA_NEXT(attr, attrlen);
739 } 733 }
740 } 734 }
741 if (rta->rta_prefsrc)
742 memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
743 735
744 if (rta->rta_mp) { 736 if (cfg->fc_mp) {
745#ifdef CONFIG_IP_ROUTE_MULTIPATH 737#ifdef CONFIG_IP_ROUTE_MULTIPATH
746 if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0) 738 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
739 if (err != 0)
747 goto failure; 740 goto failure;
748 if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif) 741 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
749 goto err_inval; 742 goto err_inval;
750 if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4)) 743 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
751 goto err_inval; 744 goto err_inval;
752#ifdef CONFIG_NET_CLS_ROUTE 745#ifdef CONFIG_NET_CLS_ROUTE
753 if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4)) 746 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
754 goto err_inval; 747 goto err_inval;
755#endif 748#endif
756#else 749#else
@@ -758,34 +751,32 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
758#endif 751#endif
759 } else { 752 } else {
760 struct fib_nh *nh = fi->fib_nh; 753 struct fib_nh *nh = fi->fib_nh;
761 if (rta->rta_oif) 754
762 nh->nh_oif = *rta->rta_oif; 755 nh->nh_oif = cfg->fc_oif;
763 if (rta->rta_gw) 756 nh->nh_gw = cfg->fc_gw;
764 memcpy(&nh->nh_gw, rta->rta_gw, 4); 757 nh->nh_flags = cfg->fc_flags;
765#ifdef CONFIG_NET_CLS_ROUTE 758#ifdef CONFIG_NET_CLS_ROUTE
766 if (rta->rta_flow) 759 nh->nh_tclassid = cfg->fc_flow;
767 memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
768#endif 760#endif
769 nh->nh_flags = r->rtm_flags;
770#ifdef CONFIG_IP_ROUTE_MULTIPATH 761#ifdef CONFIG_IP_ROUTE_MULTIPATH
771 nh->nh_weight = 1; 762 nh->nh_weight = 1;
772#endif 763#endif
773 } 764 }
774 765
775#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 766#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
776 fi->fib_mp_alg = mp_alg; 767 fi->fib_mp_alg = cfg->fc_mp_alg;
777#endif 768#endif
778 769
779 if (fib_props[r->rtm_type].error) { 770 if (fib_props[cfg->fc_type].error) {
780 if (rta->rta_gw || rta->rta_oif || rta->rta_mp) 771 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
781 goto err_inval; 772 goto err_inval;
782 goto link_it; 773 goto link_it;
783 } 774 }
784 775
785 if (r->rtm_scope > RT_SCOPE_HOST) 776 if (cfg->fc_scope > RT_SCOPE_HOST)
786 goto err_inval; 777 goto err_inval;
787 778
788 if (r->rtm_scope == RT_SCOPE_HOST) { 779 if (cfg->fc_scope == RT_SCOPE_HOST) {
789 struct fib_nh *nh = fi->fib_nh; 780 struct fib_nh *nh = fi->fib_nh;
790 781
791 /* Local address is added. */ 782 /* Local address is added. */
@@ -798,14 +789,14 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
798 goto failure; 789 goto failure;
799 } else { 790 } else {
800 change_nexthops(fi) { 791 change_nexthops(fi) {
801 if ((err = fib_check_nh(r, fi, nh)) != 0) 792 if ((err = fib_check_nh(cfg, fi, nh)) != 0)
802 goto failure; 793 goto failure;
803 } endfor_nexthops(fi) 794 } endfor_nexthops(fi)
804 } 795 }
805 796
806 if (fi->fib_prefsrc) { 797 if (fi->fib_prefsrc) {
807 if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL || 798 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
808 memcmp(&fi->fib_prefsrc, rta->rta_dst, 4)) 799 fi->fib_prefsrc != cfg->fc_dst)
809 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) 800 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
810 goto err_inval; 801 goto err_inval;
811 } 802 }
@@ -820,7 +811,7 @@ link_it:
820 811
821 fi->fib_treeref++; 812 fi->fib_treeref++;
822 atomic_inc(&fi->fib_clntref); 813 atomic_inc(&fi->fib_clntref);
823 write_lock_bh(&fib_info_lock); 814 spin_lock_bh(&fib_info_lock);
824 hlist_add_head(&fi->fib_hash, 815 hlist_add_head(&fi->fib_hash,
825 &fib_info_hash[fib_info_hashfn(fi)]); 816 &fib_info_hash[fib_info_hashfn(fi)]);
826 if (fi->fib_prefsrc) { 817 if (fi->fib_prefsrc) {
@@ -839,19 +830,19 @@ link_it:
839 head = &fib_info_devhash[hash]; 830 head = &fib_info_devhash[hash];
840 hlist_add_head(&nh->nh_hash, head); 831 hlist_add_head(&nh->nh_hash, head);
841 } endfor_nexthops(fi) 832 } endfor_nexthops(fi)
842 write_unlock_bh(&fib_info_lock); 833 spin_unlock_bh(&fib_info_lock);
843 return fi; 834 return fi;
844 835
845err_inval: 836err_inval:
846 err = -EINVAL; 837 err = -EINVAL;
847 838
848failure: 839failure:
849 *errp = err;
850 if (fi) { 840 if (fi) {
851 fi->fib_dead = 1; 841 fi->fib_dead = 1;
852 free_fib_info(fi); 842 free_fib_info(fi);
853 } 843 }
854 return NULL; 844
845 return ERR_PTR(err);
855} 846}
856 847
857/* Note! fib_semantic_match intentionally uses RCU list functions. */ 848/* Note! fib_semantic_match intentionally uses RCU list functions. */
@@ -937,224 +928,89 @@ u32 __fib_res_prefsrc(struct fib_result *res)
937 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); 928 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
938} 929}
939 930
940int 931int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
941fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 932 u32 tb_id, u8 type, u8 scope, u32 dst, int dst_len, u8 tos,
942 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, 933 struct fib_info *fi, unsigned int flags)
943 struct fib_info *fi, unsigned int flags)
944{ 934{
935 struct nlmsghdr *nlh;
945 struct rtmsg *rtm; 936 struct rtmsg *rtm;
946 struct nlmsghdr *nlh;
947 unsigned char *b = skb->tail;
948 937
949 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags); 938 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
950 rtm = NLMSG_DATA(nlh); 939 if (nlh == NULL)
940 return -ENOBUFS;
941
942 rtm = nlmsg_data(nlh);
951 rtm->rtm_family = AF_INET; 943 rtm->rtm_family = AF_INET;
952 rtm->rtm_dst_len = dst_len; 944 rtm->rtm_dst_len = dst_len;
953 rtm->rtm_src_len = 0; 945 rtm->rtm_src_len = 0;
954 rtm->rtm_tos = tos; 946 rtm->rtm_tos = tos;
955 rtm->rtm_table = tb_id; 947 rtm->rtm_table = tb_id;
948 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
956 rtm->rtm_type = type; 949 rtm->rtm_type = type;
957 rtm->rtm_flags = fi->fib_flags; 950 rtm->rtm_flags = fi->fib_flags;
958 rtm->rtm_scope = scope; 951 rtm->rtm_scope = scope;
959 if (rtm->rtm_dst_len)
960 RTA_PUT(skb, RTA_DST, 4, dst);
961 rtm->rtm_protocol = fi->fib_protocol; 952 rtm->rtm_protocol = fi->fib_protocol;
953
954 if (rtm->rtm_dst_len)
955 NLA_PUT_U32(skb, RTA_DST, dst);
956
962 if (fi->fib_priority) 957 if (fi->fib_priority)
963 RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority); 958 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
959
964 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) 960 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
965 goto rtattr_failure; 961 goto nla_put_failure;
962
966 if (fi->fib_prefsrc) 963 if (fi->fib_prefsrc)
967 RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc); 964 NLA_PUT_U32(skb, RTA_PREFSRC, fi->fib_prefsrc);
965
968 if (fi->fib_nhs == 1) { 966 if (fi->fib_nhs == 1) {
969 if (fi->fib_nh->nh_gw) 967 if (fi->fib_nh->nh_gw)
970 RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw); 968 NLA_PUT_U32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
969
971 if (fi->fib_nh->nh_oif) 970 if (fi->fib_nh->nh_oif)
972 RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif); 971 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
973#ifdef CONFIG_NET_CLS_ROUTE 972#ifdef CONFIG_NET_CLS_ROUTE
974 if (fi->fib_nh[0].nh_tclassid) 973 if (fi->fib_nh[0].nh_tclassid)
975 RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid); 974 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
976#endif 975#endif
977 } 976 }
978#ifdef CONFIG_IP_ROUTE_MULTIPATH 977#ifdef CONFIG_IP_ROUTE_MULTIPATH
979 if (fi->fib_nhs > 1) { 978 if (fi->fib_nhs > 1) {
980 struct rtnexthop *nhp; 979 struct rtnexthop *rtnh;
981 struct rtattr *mp_head; 980 struct nlattr *mp;
982 if (skb_tailroom(skb) <= RTA_SPACE(0)) 981
983 goto rtattr_failure; 982 mp = nla_nest_start(skb, RTA_MULTIPATH);
984 mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0)); 983 if (mp == NULL)
984 goto nla_put_failure;
985 985
986 for_nexthops(fi) { 986 for_nexthops(fi) {
987 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) 987 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
988 goto rtattr_failure; 988 if (rtnh == NULL)
989 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); 989 goto nla_put_failure;
990 nhp->rtnh_flags = nh->nh_flags & 0xFF; 990
991 nhp->rtnh_hops = nh->nh_weight-1; 991 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
992 nhp->rtnh_ifindex = nh->nh_oif; 992 rtnh->rtnh_hops = nh->nh_weight - 1;
993 rtnh->rtnh_ifindex = nh->nh_oif;
994
993 if (nh->nh_gw) 995 if (nh->nh_gw)
994 RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw); 996 NLA_PUT_U32(skb, RTA_GATEWAY, nh->nh_gw);
995#ifdef CONFIG_NET_CLS_ROUTE 997#ifdef CONFIG_NET_CLS_ROUTE
996 if (nh->nh_tclassid) 998 if (nh->nh_tclassid)
997 RTA_PUT(skb, RTA_FLOW, 4, &nh->nh_tclassid); 999 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
998#endif 1000#endif
999 nhp->rtnh_len = skb->tail - (unsigned char*)nhp; 1001 /* length of rtnetlink header + attributes */
1002 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
1000 } endfor_nexthops(fi); 1003 } endfor_nexthops(fi);
1001 mp_head->rta_type = RTA_MULTIPATH;
1002 mp_head->rta_len = skb->tail - (u8*)mp_head;
1003 }
1004#endif
1005 nlh->nlmsg_len = skb->tail - b;
1006 return skb->len;
1007
1008nlmsg_failure:
1009rtattr_failure:
1010 skb_trim(skb, b - skb->data);
1011 return -1;
1012}
1013
1014#ifndef CONFIG_IP_NOSIOCRT
1015
1016int
1017fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
1018 struct kern_rta *rta, struct rtentry *r)
1019{
1020 int plen;
1021 u32 *ptr;
1022
1023 memset(rtm, 0, sizeof(*rtm));
1024 memset(rta, 0, sizeof(*rta));
1025
1026 if (r->rt_dst.sa_family != AF_INET)
1027 return -EAFNOSUPPORT;
1028
1029 /* Check mask for validity:
1030 a) it must be contiguous.
1031 b) destination must have all host bits clear.
1032 c) if application forgot to set correct family (AF_INET),
1033 reject request unless it is absolutely clear i.e.
1034 both family and mask are zero.
1035 */
1036 plen = 32;
1037 ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
1038 if (!(r->rt_flags&RTF_HOST)) {
1039 u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
1040 if (r->rt_genmask.sa_family != AF_INET) {
1041 if (mask || r->rt_genmask.sa_family)
1042 return -EAFNOSUPPORT;
1043 }
1044 if (bad_mask(mask, *ptr))
1045 return -EINVAL;
1046 plen = inet_mask_len(mask);
1047 }
1048
1049 nl->nlmsg_flags = NLM_F_REQUEST;
1050 nl->nlmsg_pid = 0;
1051 nl->nlmsg_seq = 0;
1052 nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
1053 if (cmd == SIOCDELRT) {
1054 nl->nlmsg_type = RTM_DELROUTE;
1055 nl->nlmsg_flags = 0;
1056 } else {
1057 nl->nlmsg_type = RTM_NEWROUTE;
1058 nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
1059 rtm->rtm_protocol = RTPROT_BOOT;
1060 }
1061
1062 rtm->rtm_dst_len = plen;
1063 rta->rta_dst = ptr;
1064
1065 if (r->rt_metric) {
1066 *(u32*)&r->rt_pad3 = r->rt_metric - 1;
1067 rta->rta_priority = (u32*)&r->rt_pad3;
1068 }
1069 if (r->rt_flags&RTF_REJECT) {
1070 rtm->rtm_scope = RT_SCOPE_HOST;
1071 rtm->rtm_type = RTN_UNREACHABLE;
1072 return 0;
1073 }
1074 rtm->rtm_scope = RT_SCOPE_NOWHERE;
1075 rtm->rtm_type = RTN_UNICAST;
1076
1077 if (r->rt_dev) {
1078 char *colon;
1079 struct net_device *dev;
1080 char devname[IFNAMSIZ];
1081
1082 if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
1083 return -EFAULT;
1084 devname[IFNAMSIZ-1] = 0;
1085 colon = strchr(devname, ':');
1086 if (colon)
1087 *colon = 0;
1088 dev = __dev_get_by_name(devname);
1089 if (!dev)
1090 return -ENODEV;
1091 rta->rta_oif = &dev->ifindex;
1092 if (colon) {
1093 struct in_ifaddr *ifa;
1094 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1095 if (!in_dev)
1096 return -ENODEV;
1097 *colon = ':';
1098 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
1099 if (strcmp(ifa->ifa_label, devname) == 0)
1100 break;
1101 if (ifa == NULL)
1102 return -ENODEV;
1103 rta->rta_prefsrc = &ifa->ifa_local;
1104 }
1105 }
1106 1004
1107 ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr; 1005 nla_nest_end(skb, mp);
1108 if (r->rt_gateway.sa_family == AF_INET && *ptr) {
1109 rta->rta_gw = ptr;
1110 if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
1111 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1112 } 1006 }
1007#endif
1008 return nlmsg_end(skb, nlh);
1113 1009
1114 if (cmd == SIOCDELRT) 1010nla_put_failure:
1115 return 0; 1011 return nlmsg_cancel(skb, nlh);
1116
1117 if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
1118 return -EINVAL;
1119
1120 if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
1121 rtm->rtm_scope = RT_SCOPE_LINK;
1122
1123 if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
1124 struct rtattr *rec;
1125 struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
1126 if (mx == NULL)
1127 return -ENOMEM;
1128 rta->rta_mx = mx;
1129 mx->rta_type = RTA_METRICS;
1130 mx->rta_len = RTA_LENGTH(0);
1131 if (r->rt_flags&RTF_MTU) {
1132 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1133 rec->rta_type = RTAX_ADVMSS;
1134 rec->rta_len = RTA_LENGTH(4);
1135 mx->rta_len += RTA_LENGTH(4);
1136 *(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
1137 }
1138 if (r->rt_flags&RTF_WINDOW) {
1139 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1140 rec->rta_type = RTAX_WINDOW;
1141 rec->rta_len = RTA_LENGTH(4);
1142 mx->rta_len += RTA_LENGTH(4);
1143 *(u32*)RTA_DATA(rec) = r->rt_window;
1144 }
1145 if (r->rt_flags&RTF_IRTT) {
1146 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1147 rec->rta_type = RTAX_RTT;
1148 rec->rta_len = RTA_LENGTH(4);
1149 mx->rta_len += RTA_LENGTH(4);
1150 *(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
1151 }
1152 }
1153 return 0;
1154} 1012}
1155 1013
1156#endif
1157
1158/* 1014/*
1159 Update FIB if: 1015 Update FIB if:
1160 - local address disappeared -> we must delete all the entries 1016 - local address disappeared -> we must delete all the entries
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 01801c0f885d..9c3ff6ba6e21 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1124,17 +1124,14 @@ err:
1124 return fa_head; 1124 return fa_head;
1125} 1125}
1126 1126
1127static int 1127static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
1128fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1129 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1130{ 1128{
1131 struct trie *t = (struct trie *) tb->tb_data; 1129 struct trie *t = (struct trie *) tb->tb_data;
1132 struct fib_alias *fa, *new_fa; 1130 struct fib_alias *fa, *new_fa;
1133 struct list_head *fa_head = NULL; 1131 struct list_head *fa_head = NULL;
1134 struct fib_info *fi; 1132 struct fib_info *fi;
1135 int plen = r->rtm_dst_len; 1133 int plen = cfg->fc_dst_len;
1136 int type = r->rtm_type; 1134 u8 tos = cfg->fc_tos;
1137 u8 tos = r->rtm_tos;
1138 u32 key, mask; 1135 u32 key, mask;
1139 int err; 1136 int err;
1140 struct leaf *l; 1137 struct leaf *l;
@@ -1142,13 +1139,9 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1142 if (plen > 32) 1139 if (plen > 32)
1143 return -EINVAL; 1140 return -EINVAL;
1144 1141
1145 key = 0; 1142 key = ntohl(cfg->fc_dst);
1146 if (rta->rta_dst)
1147 memcpy(&key, rta->rta_dst, 4);
1148
1149 key = ntohl(key);
1150 1143
1151 pr_debug("Insert table=%d %08x/%d\n", tb->tb_id, key, plen); 1144 pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
1152 1145
1153 mask = ntohl(inet_make_mask(plen)); 1146 mask = ntohl(inet_make_mask(plen));
1154 1147
@@ -1157,10 +1150,11 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1157 1150
1158 key = key & mask; 1151 key = key & mask;
1159 1152
1160 fi = fib_create_info(r, rta, nlhdr, &err); 1153 fi = fib_create_info(cfg);
1161 1154 if (IS_ERR(fi)) {
1162 if (!fi) 1155 err = PTR_ERR(fi);
1163 goto err; 1156 goto err;
1157 }
1164 1158
1165 l = fib_find_node(t, key); 1159 l = fib_find_node(t, key);
1166 fa = NULL; 1160 fa = NULL;
@@ -1185,10 +1179,10 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1185 struct fib_alias *fa_orig; 1179 struct fib_alias *fa_orig;
1186 1180
1187 err = -EEXIST; 1181 err = -EEXIST;
1188 if (nlhdr->nlmsg_flags & NLM_F_EXCL) 1182 if (cfg->fc_nlflags & NLM_F_EXCL)
1189 goto out; 1183 goto out;
1190 1184
1191 if (nlhdr->nlmsg_flags & NLM_F_REPLACE) { 1185 if (cfg->fc_nlflags & NLM_F_REPLACE) {
1192 struct fib_info *fi_drop; 1186 struct fib_info *fi_drop;
1193 u8 state; 1187 u8 state;
1194 1188
@@ -1200,8 +1194,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1200 fi_drop = fa->fa_info; 1194 fi_drop = fa->fa_info;
1201 new_fa->fa_tos = fa->fa_tos; 1195 new_fa->fa_tos = fa->fa_tos;
1202 new_fa->fa_info = fi; 1196 new_fa->fa_info = fi;
1203 new_fa->fa_type = type; 1197 new_fa->fa_type = cfg->fc_type;
1204 new_fa->fa_scope = r->rtm_scope; 1198 new_fa->fa_scope = cfg->fc_scope;
1205 state = fa->fa_state; 1199 state = fa->fa_state;
1206 new_fa->fa_state &= ~FA_S_ACCESSED; 1200 new_fa->fa_state &= ~FA_S_ACCESSED;
1207 1201
@@ -1224,17 +1218,17 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1224 break; 1218 break;
1225 if (fa->fa_info->fib_priority != fi->fib_priority) 1219 if (fa->fa_info->fib_priority != fi->fib_priority)
1226 break; 1220 break;
1227 if (fa->fa_type == type && 1221 if (fa->fa_type == cfg->fc_type &&
1228 fa->fa_scope == r->rtm_scope && 1222 fa->fa_scope == cfg->fc_scope &&
1229 fa->fa_info == fi) { 1223 fa->fa_info == fi) {
1230 goto out; 1224 goto out;
1231 } 1225 }
1232 } 1226 }
1233 if (!(nlhdr->nlmsg_flags & NLM_F_APPEND)) 1227 if (!(cfg->fc_nlflags & NLM_F_APPEND))
1234 fa = fa_orig; 1228 fa = fa_orig;
1235 } 1229 }
1236 err = -ENOENT; 1230 err = -ENOENT;
1237 if (!(nlhdr->nlmsg_flags & NLM_F_CREATE)) 1231 if (!(cfg->fc_nlflags & NLM_F_CREATE))
1238 goto out; 1232 goto out;
1239 1233
1240 err = -ENOBUFS; 1234 err = -ENOBUFS;
@@ -1244,8 +1238,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1244 1238
1245 new_fa->fa_info = fi; 1239 new_fa->fa_info = fi;
1246 new_fa->fa_tos = tos; 1240 new_fa->fa_tos = tos;
1247 new_fa->fa_type = type; 1241 new_fa->fa_type = cfg->fc_type;
1248 new_fa->fa_scope = r->rtm_scope; 1242 new_fa->fa_scope = cfg->fc_scope;
1249 new_fa->fa_state = 0; 1243 new_fa->fa_state = 0;
1250 /* 1244 /*
1251 * Insert new entry to the list. 1245 * Insert new entry to the list.
@@ -1262,7 +1256,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1262 (fa ? &fa->fa_list : fa_head)); 1256 (fa ? &fa->fa_list : fa_head));
1263 1257
1264 rt_cache_flush(-1); 1258 rt_cache_flush(-1);
1265 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req); 1259 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
1260 &cfg->fc_nlinfo);
1266succeeded: 1261succeeded:
1267 return 0; 1262 return 0;
1268 1263
@@ -1548,28 +1543,21 @@ static int trie_leaf_remove(struct trie *t, t_key key)
1548 return 1; 1543 return 1;
1549} 1544}
1550 1545
1551static int 1546static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
1552fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1553 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1554{ 1547{
1555 struct trie *t = (struct trie *) tb->tb_data; 1548 struct trie *t = (struct trie *) tb->tb_data;
1556 u32 key, mask; 1549 u32 key, mask;
1557 int plen = r->rtm_dst_len; 1550 int plen = cfg->fc_dst_len;
1558 u8 tos = r->rtm_tos; 1551 u8 tos = cfg->fc_tos;
1559 struct fib_alias *fa, *fa_to_delete; 1552 struct fib_alias *fa, *fa_to_delete;
1560 struct list_head *fa_head; 1553 struct list_head *fa_head;
1561 struct leaf *l; 1554 struct leaf *l;
1562 struct leaf_info *li; 1555 struct leaf_info *li;
1563 1556
1564
1565 if (plen > 32) 1557 if (plen > 32)
1566 return -EINVAL; 1558 return -EINVAL;
1567 1559
1568 key = 0; 1560 key = ntohl(cfg->fc_dst);
1569 if (rta->rta_dst)
1570 memcpy(&key, rta->rta_dst, 4);
1571
1572 key = ntohl(key);
1573 mask = ntohl(inet_make_mask(plen)); 1561 mask = ntohl(inet_make_mask(plen));
1574 1562
1575 if (key & ~mask) 1563 if (key & ~mask)
@@ -1598,13 +1586,12 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1598 if (fa->fa_tos != tos) 1586 if (fa->fa_tos != tos)
1599 break; 1587 break;
1600 1588
1601 if ((!r->rtm_type || 1589 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1602 fa->fa_type == r->rtm_type) && 1590 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
1603 (r->rtm_scope == RT_SCOPE_NOWHERE || 1591 fa->fa_scope == cfg->fc_scope) &&
1604 fa->fa_scope == r->rtm_scope) && 1592 (!cfg->fc_protocol ||
1605 (!r->rtm_protocol || 1593 fi->fib_protocol == cfg->fc_protocol) &&
1606 fi->fib_protocol == r->rtm_protocol) && 1594 fib_nh_match(cfg, fi) == 0) {
1607 fib_nh_match(r, nlhdr, rta, fi) == 0) {
1608 fa_to_delete = fa; 1595 fa_to_delete = fa;
1609 break; 1596 break;
1610 } 1597 }
@@ -1614,7 +1601,8 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1614 return -ESRCH; 1601 return -ESRCH;
1615 1602
1616 fa = fa_to_delete; 1603 fa = fa_to_delete;
1617 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); 1604 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
1605 &cfg->fc_nlinfo);
1618 1606
1619 l = fib_find_node(t, key); 1607 l = fib_find_node(t, key);
1620 li = find_leaf_info(l, plen); 1608 li = find_leaf_info(l, plen);
@@ -1848,7 +1836,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
1848 1836
1849 u32 xkey = htonl(key); 1837 u32 xkey = htonl(key);
1850 1838
1851 s_i = cb->args[3]; 1839 s_i = cb->args[4];
1852 i = 0; 1840 i = 0;
1853 1841
1854 /* rcu_read_lock is hold by caller */ 1842 /* rcu_read_lock is hold by caller */
@@ -1866,16 +1854,16 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
1866 tb->tb_id, 1854 tb->tb_id,
1867 fa->fa_type, 1855 fa->fa_type,
1868 fa->fa_scope, 1856 fa->fa_scope,
1869 &xkey, 1857 xkey,
1870 plen, 1858 plen,
1871 fa->fa_tos, 1859 fa->fa_tos,
1872 fa->fa_info, 0) < 0) { 1860 fa->fa_info, 0) < 0) {
1873 cb->args[3] = i; 1861 cb->args[4] = i;
1874 return -1; 1862 return -1;
1875 } 1863 }
1876 i++; 1864 i++;
1877 } 1865 }
1878 cb->args[3] = i; 1866 cb->args[4] = i;
1879 return skb->len; 1867 return skb->len;
1880} 1868}
1881 1869
@@ -1886,14 +1874,14 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str
1886 struct list_head *fa_head; 1874 struct list_head *fa_head;
1887 struct leaf *l = NULL; 1875 struct leaf *l = NULL;
1888 1876
1889 s_h = cb->args[2]; 1877 s_h = cb->args[3];
1890 1878
1891 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { 1879 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
1892 if (h < s_h) 1880 if (h < s_h)
1893 continue; 1881 continue;
1894 if (h > s_h) 1882 if (h > s_h)
1895 memset(&cb->args[3], 0, 1883 memset(&cb->args[4], 0,
1896 sizeof(cb->args) - 3*sizeof(cb->args[0])); 1884 sizeof(cb->args) - 4*sizeof(cb->args[0]));
1897 1885
1898 fa_head = get_fa_head(l, plen); 1886 fa_head = get_fa_head(l, plen);
1899 1887
@@ -1904,11 +1892,11 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str
1904 continue; 1892 continue;
1905 1893
1906 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) { 1894 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
1907 cb->args[2] = h; 1895 cb->args[3] = h;
1908 return -1; 1896 return -1;
1909 } 1897 }
1910 } 1898 }
1911 cb->args[2] = h; 1899 cb->args[3] = h;
1912 return skb->len; 1900 return skb->len;
1913} 1901}
1914 1902
@@ -1917,23 +1905,23 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin
1917 int m, s_m; 1905 int m, s_m;
1918 struct trie *t = (struct trie *) tb->tb_data; 1906 struct trie *t = (struct trie *) tb->tb_data;
1919 1907
1920 s_m = cb->args[1]; 1908 s_m = cb->args[2];
1921 1909
1922 rcu_read_lock(); 1910 rcu_read_lock();
1923 for (m = 0; m <= 32; m++) { 1911 for (m = 0; m <= 32; m++) {
1924 if (m < s_m) 1912 if (m < s_m)
1925 continue; 1913 continue;
1926 if (m > s_m) 1914 if (m > s_m)
1927 memset(&cb->args[2], 0, 1915 memset(&cb->args[3], 0,
1928 sizeof(cb->args) - 2*sizeof(cb->args[0])); 1916 sizeof(cb->args) - 3*sizeof(cb->args[0]));
1929 1917
1930 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) { 1918 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
1931 cb->args[1] = m; 1919 cb->args[2] = m;
1932 goto out; 1920 goto out;
1933 } 1921 }
1934 } 1922 }
1935 rcu_read_unlock(); 1923 rcu_read_unlock();
1936 cb->args[1] = m; 1924 cb->args[2] = m;
1937 return skb->len; 1925 return skb->len;
1938out: 1926out:
1939 rcu_read_unlock(); 1927 rcu_read_unlock();
@@ -1943,9 +1931,9 @@ out:
1943/* Fix more generic FIB names for init later */ 1931/* Fix more generic FIB names for init later */
1944 1932
1945#ifdef CONFIG_IP_MULTIPLE_TABLES 1933#ifdef CONFIG_IP_MULTIPLE_TABLES
1946struct fib_table * fib_hash_init(int id) 1934struct fib_table * fib_hash_init(u32 id)
1947#else 1935#else
1948struct fib_table * __init fib_hash_init(int id) 1936struct fib_table * __init fib_hash_init(u32 id)
1949#endif 1937#endif
1950{ 1938{
1951 struct fib_table *tb; 1939 struct fib_table *tb;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4c86ac3d882d..c2ad07e48ab4 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -187,11 +187,11 @@ struct icmp_err icmp_err_convert[] = {
187}; 187};
188 188
189/* Control parameters for ECHO replies. */ 189/* Control parameters for ECHO replies. */
190int sysctl_icmp_echo_ignore_all; 190int sysctl_icmp_echo_ignore_all __read_mostly;
191int sysctl_icmp_echo_ignore_broadcasts = 1; 191int sysctl_icmp_echo_ignore_broadcasts __read_mostly = 1;
192 192
193/* Control parameter - ignore bogus broadcast responses? */ 193/* Control parameter - ignore bogus broadcast responses? */
194int sysctl_icmp_ignore_bogus_error_responses = 1; 194int sysctl_icmp_ignore_bogus_error_responses __read_mostly = 1;
195 195
196/* 196/*
197 * Configurable global rate limit. 197 * Configurable global rate limit.
@@ -205,9 +205,9 @@ int sysctl_icmp_ignore_bogus_error_responses = 1;
205 * time exceeded (11), parameter problem (12) 205 * time exceeded (11), parameter problem (12)
206 */ 206 */
207 207
208int sysctl_icmp_ratelimit = 1 * HZ; 208int sysctl_icmp_ratelimit __read_mostly = 1 * HZ;
209int sysctl_icmp_ratemask = 0x1818; 209int sysctl_icmp_ratemask __read_mostly = 0x1818;
210int sysctl_icmp_errors_use_inbound_ifaddr; 210int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly;
211 211
212/* 212/*
213 * ICMP control array. This specifies what to do with each ICMP. 213 * ICMP control array. This specifies what to do with each ICMP.
@@ -406,6 +406,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
406 .saddr = rt->rt_spec_dst, 406 .saddr = rt->rt_spec_dst,
407 .tos = RT_TOS(skb->nh.iph->tos) } }, 407 .tos = RT_TOS(skb->nh.iph->tos) } },
408 .proto = IPPROTO_ICMP }; 408 .proto = IPPROTO_ICMP };
409 security_skb_classify_flow(skb, &fl);
409 if (ip_route_output_key(&rt, &fl)) 410 if (ip_route_output_key(&rt, &fl))
410 goto out_unlock; 411 goto out_unlock;
411 } 412 }
@@ -560,6 +561,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
560 } 561 }
561 } 562 }
562 }; 563 };
564 security_skb_classify_flow(skb_in, &fl);
563 if (ip_route_output_key(&rt, &fl)) 565 if (ip_route_output_key(&rt, &fl))
564 goto out_unlock; 566 goto out_unlock;
565 } 567 }
@@ -928,7 +930,7 @@ int icmp_rcv(struct sk_buff *skb)
928 ICMP_INC_STATS_BH(ICMP_MIB_INMSGS); 930 ICMP_INC_STATS_BH(ICMP_MIB_INMSGS);
929 931
930 switch (skb->ip_summed) { 932 switch (skb->ip_summed) {
931 case CHECKSUM_HW: 933 case CHECKSUM_COMPLETE:
932 if (!(u16)csum_fold(skb->csum)) 934 if (!(u16)csum_fold(skb->csum))
933 break; 935 break;
934 /* fall through */ 936 /* fall through */
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 8e8117c19e4d..58be8227b0cb 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -931,7 +931,7 @@ int igmp_rcv(struct sk_buff *skb)
931 goto drop; 931 goto drop;
932 932
933 switch (skb->ip_summed) { 933 switch (skb->ip_summed) {
934 case CHECKSUM_HW: 934 case CHECKSUM_COMPLETE:
935 if (!(u16)csum_fold(skb->csum)) 935 if (!(u16)csum_fold(skb->csum))
936 break; 936 break;
937 /* fall through */ 937 /* fall through */
@@ -1397,8 +1397,8 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
1397/* 1397/*
1398 * Join a socket to a group 1398 * Join a socket to a group
1399 */ 1399 */
1400int sysctl_igmp_max_memberships = IP_MAX_MEMBERSHIPS; 1400int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS;
1401int sysctl_igmp_max_msf = IP_MAX_MSF; 1401int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF;
1402 1402
1403 1403
1404static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, 1404static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index e50a1bfd7ccc..07204391d083 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -327,6 +327,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk,
327 { .sport = inet_sk(sk)->sport, 327 { .sport = inet_sk(sk)->sport,
328 .dport = ireq->rmt_port } } }; 328 .dport = ireq->rmt_port } } };
329 329
330 security_req_classify_flow(req, &fl);
330 if (ip_route_output_flow(&rt, &fl, sk, 0)) { 331 if (ip_route_output_flow(&rt, &fl, sk, 0)) {
331 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 332 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
332 return NULL; 333 return NULL;
@@ -509,6 +510,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
509 510
510 /* Deinitialize accept_queue to trap illegal accesses. */ 511 /* Deinitialize accept_queue to trap illegal accesses. */
511 memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); 512 memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
513
514 security_inet_csk_clone(newsk, req);
512 } 515 }
513 return newsk; 516 return newsk;
514} 517}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 95fac5532994..fb296c9a7f3f 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -124,8 +124,10 @@ EXPORT_SYMBOL(inet_listen_wlock);
124 * remote address for the connection. So always assume those are both 124 * remote address for the connection. So always assume those are both
125 * wildcarded during the search since they can never be otherwise. 125 * wildcarded during the search since they can never be otherwise.
126 */ 126 */
127struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr, 127static struct sock *inet_lookup_listener_slow(const struct hlist_head *head,
128 const unsigned short hnum, const int dif) 128 const u32 daddr,
129 const unsigned short hnum,
130 const int dif)
129{ 131{
130 struct sock *result = NULL, *sk; 132 struct sock *result = NULL, *sk;
131 const struct hlist_node *node; 133 const struct hlist_node *node;
@@ -159,6 +161,33 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad
159 return result; 161 return result;
160} 162}
161 163
164/* Optimize the common listener case. */
165struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo,
166 const u32 daddr, const unsigned short hnum,
167 const int dif)
168{
169 struct sock *sk = NULL;
170 const struct hlist_head *head;
171
172 read_lock(&hashinfo->lhash_lock);
173 head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
174 if (!hlist_empty(head)) {
175 const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
176
177 if (inet->num == hnum && !sk->sk_node.next &&
178 (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
179 (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
180 !sk->sk_bound_dev_if)
181 goto sherry_cache;
182 sk = inet_lookup_listener_slow(head, daddr, hnum, dif);
183 }
184 if (sk) {
185sherry_cache:
186 sock_hold(sk);
187 }
188 read_unlock(&hashinfo->lhash_lock);
189 return sk;
190}
162EXPORT_SYMBOL_GPL(__inet_lookup_listener); 191EXPORT_SYMBOL_GPL(__inet_lookup_listener);
163 192
164/* called with local bh disabled */ 193/* called with local bh disabled */
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 03ff62ebcfeb..a675602ef295 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -126,12 +126,9 @@ void __init inet_initpeers(void)
126 126
127 peer_cachep = kmem_cache_create("inet_peer_cache", 127 peer_cachep = kmem_cache_create("inet_peer_cache",
128 sizeof(struct inet_peer), 128 sizeof(struct inet_peer),
129 0, SLAB_HWCACHE_ALIGN, 129 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
130 NULL, NULL); 130 NULL, NULL);
131 131
132 if (!peer_cachep)
133 panic("cannot create inet_peer_cache");
134
135 /* All the timers, started at system startup tend 132 /* All the timers, started at system startup tend
136 to synchronize. Perturb it a bit. 133 to synchronize. Perturb it a bit.
137 */ 134 */
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b84b53a47526..165d72859ddf 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -54,15 +54,15 @@
54 * even the most extreme cases without allowing an attacker to measurably 54 * even the most extreme cases without allowing an attacker to measurably
55 * harm machine performance. 55 * harm machine performance.
56 */ 56 */
57int sysctl_ipfrag_high_thresh = 256*1024; 57int sysctl_ipfrag_high_thresh __read_mostly = 256*1024;
58int sysctl_ipfrag_low_thresh = 192*1024; 58int sysctl_ipfrag_low_thresh __read_mostly = 192*1024;
59 59
60int sysctl_ipfrag_max_dist = 64; 60int sysctl_ipfrag_max_dist __read_mostly = 64;
61 61
62/* Important NOTE! Fragment queue must be destroyed before MSL expires. 62/* Important NOTE! Fragment queue must be destroyed before MSL expires.
63 * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. 63 * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
64 */ 64 */
65int sysctl_ipfrag_time = IP_FRAG_TIME; 65int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME;
66 66
67struct ipfrag_skb_cb 67struct ipfrag_skb_cb
68{ 68{
@@ -130,7 +130,7 @@ static unsigned int ipqhashfn(u16 id, u32 saddr, u32 daddr, u8 prot)
130} 130}
131 131
132static struct timer_list ipfrag_secret_timer; 132static struct timer_list ipfrag_secret_timer;
133int sysctl_ipfrag_secret_interval = 10 * 60 * HZ; 133int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ;
134 134
135static void ipfrag_secret_rebuild(unsigned long dummy) 135static void ipfrag_secret_rebuild(unsigned long dummy)
136{ 136{
@@ -665,7 +665,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
665 head->len += fp->len; 665 head->len += fp->len;
666 if (head->ip_summed != fp->ip_summed) 666 if (head->ip_summed != fp->ip_summed)
667 head->ip_summed = CHECKSUM_NONE; 667 head->ip_summed = CHECKSUM_NONE;
668 else if (head->ip_summed == CHECKSUM_HW) 668 else if (head->ip_summed == CHECKSUM_COMPLETE)
669 head->csum = csum_add(head->csum, fp->csum); 669 head->csum = csum_add(head->csum, fp->csum);
670 head->truesize += fp->truesize; 670 head->truesize += fp->truesize;
671 atomic_sub(fp->truesize, &ip_frag_mem); 671 atomic_sub(fp->truesize, &ip_frag_mem);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0f9b3a31997b..f5fba051df3d 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -393,7 +393,8 @@ out:
393 int code = skb->h.icmph->code; 393 int code = skb->h.icmph->code;
394 int rel_type = 0; 394 int rel_type = 0;
395 int rel_code = 0; 395 int rel_code = 0;
396 int rel_info = 0; 396 __be32 rel_info = 0;
397 __u32 n = 0;
397 u16 flags; 398 u16 flags;
398 int grehlen = (iph->ihl<<2) + 4; 399 int grehlen = (iph->ihl<<2) + 4;
399 struct sk_buff *skb2; 400 struct sk_buff *skb2;
@@ -422,14 +423,16 @@ out:
422 default: 423 default:
423 return; 424 return;
424 case ICMP_PARAMETERPROB: 425 case ICMP_PARAMETERPROB:
425 if (skb->h.icmph->un.gateway < (iph->ihl<<2)) 426 n = ntohl(skb->h.icmph->un.gateway) >> 24;
427 if (n < (iph->ihl<<2))
426 return; 428 return;
427 429
428 /* So... This guy found something strange INSIDE encapsulated 430 /* So... This guy found something strange INSIDE encapsulated
429 packet. Well, he is fool, but what can we do ? 431 packet. Well, he is fool, but what can we do ?
430 */ 432 */
431 rel_type = ICMP_PARAMETERPROB; 433 rel_type = ICMP_PARAMETERPROB;
432 rel_info = skb->h.icmph->un.gateway - grehlen; 434 n -= grehlen;
435 rel_info = htonl(n << 24);
433 break; 436 break;
434 437
435 case ICMP_DEST_UNREACH: 438 case ICMP_DEST_UNREACH:
@@ -440,13 +443,14 @@ out:
440 return; 443 return;
441 case ICMP_FRAG_NEEDED: 444 case ICMP_FRAG_NEEDED:
442 /* And it is the only really necessary thing :-) */ 445 /* And it is the only really necessary thing :-) */
443 rel_info = ntohs(skb->h.icmph->un.frag.mtu); 446 n = ntohs(skb->h.icmph->un.frag.mtu);
444 if (rel_info < grehlen+68) 447 if (n < grehlen+68)
445 return; 448 return;
446 rel_info -= grehlen; 449 n -= grehlen;
447 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ 450 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
448 if (rel_info > ntohs(eiph->tot_len)) 451 if (n > ntohs(eiph->tot_len))
449 return; 452 return;
453 rel_info = htonl(n);
450 break; 454 break;
451 default: 455 default:
452 /* All others are translated to HOST_UNREACH. 456 /* All others are translated to HOST_UNREACH.
@@ -508,12 +512,11 @@ out:
508 512
509 /* change mtu on this route */ 513 /* change mtu on this route */
510 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { 514 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
511 if (rel_info > dst_mtu(skb2->dst)) { 515 if (n > dst_mtu(skb2->dst)) {
512 kfree_skb(skb2); 516 kfree_skb(skb2);
513 return; 517 return;
514 } 518 }
515 skb2->dst->ops->update_pmtu(skb2->dst, rel_info); 519 skb2->dst->ops->update_pmtu(skb2->dst, n);
516 rel_info = htonl(rel_info);
517 } else if (type == ICMP_TIME_EXCEEDED) { 520 } else if (type == ICMP_TIME_EXCEEDED) {
518 struct ip_tunnel *t = netdev_priv(skb2->dev); 521 struct ip_tunnel *t = netdev_priv(skb2->dev);
519 if (t->parms.iph.ttl) { 522 if (t->parms.iph.ttl) {
@@ -576,7 +579,7 @@ static int ipgre_rcv(struct sk_buff *skb)
576 579
577 if (flags&GRE_CSUM) { 580 if (flags&GRE_CSUM) {
578 switch (skb->ip_summed) { 581 switch (skb->ip_summed) {
579 case CHECKSUM_HW: 582 case CHECKSUM_COMPLETE:
580 csum = (u16)csum_fold(skb->csum); 583 csum = (u16)csum_fold(skb->csum);
581 if (!csum) 584 if (!csum)
582 break; 585 break;
@@ -584,7 +587,7 @@ static int ipgre_rcv(struct sk_buff *skb)
584 case CHECKSUM_NONE: 587 case CHECKSUM_NONE:
585 skb->csum = 0; 588 skb->csum = 0;
586 csum = __skb_checksum_complete(skb); 589 csum = __skb_checksum_complete(skb);
587 skb->ip_summed = CHECKSUM_HW; 590 skb->ip_summed = CHECKSUM_COMPLETE;
588 } 591 }
589 offset += 4; 592 offset += 4;
590 } 593 }
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 406056edc02b..e7437c091326 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -24,6 +24,7 @@
24#include <net/ip.h> 24#include <net/ip.h>
25#include <net/icmp.h> 25#include <net/icmp.h>
26#include <net/route.h> 26#include <net/route.h>
27#include <net/cipso_ipv4.h>
27 28
28/* 29/*
29 * Write options to IP header, record destination address to 30 * Write options to IP header, record destination address to
@@ -194,6 +195,13 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
194 dopt->is_strictroute = sopt->is_strictroute; 195 dopt->is_strictroute = sopt->is_strictroute;
195 } 196 }
196 } 197 }
198 if (sopt->cipso) {
199 optlen = sptr[sopt->cipso+1];
200 dopt->cipso = dopt->optlen+sizeof(struct iphdr);
201 memcpy(dptr, sptr+sopt->cipso, optlen);
202 dptr += optlen;
203 dopt->optlen += optlen;
204 }
197 while (dopt->optlen & 3) { 205 while (dopt->optlen & 3) {
198 *dptr++ = IPOPT_END; 206 *dptr++ = IPOPT_END;
199 dopt->optlen++; 207 dopt->optlen++;
@@ -434,6 +442,17 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
434 if (optptr[2] == 0 && optptr[3] == 0) 442 if (optptr[2] == 0 && optptr[3] == 0)
435 opt->router_alert = optptr - iph; 443 opt->router_alert = optptr - iph;
436 break; 444 break;
445 case IPOPT_CIPSO:
446 if (opt->cipso) {
447 pp_ptr = optptr;
448 goto error;
449 }
450 opt->cipso = optptr - iph;
451 if (cipso_v4_validate(&optptr)) {
452 pp_ptr = optptr;
453 goto error;
454 }
455 break;
437 case IPOPT_SEC: 456 case IPOPT_SEC:
438 case IPOPT_SID: 457 case IPOPT_SID:
439 default: 458 default:
@@ -506,7 +525,6 @@ static int ip_options_get_finish(struct ip_options **optp,
506 opt->__data[optlen++] = IPOPT_END; 525 opt->__data[optlen++] = IPOPT_END;
507 opt->optlen = optlen; 526 opt->optlen = optlen;
508 opt->is_data = 1; 527 opt->is_data = 1;
509 opt->is_setbyuser = 1;
510 if (optlen && ip_options_compile(opt, NULL)) { 528 if (optlen && ip_options_compile(opt, NULL)) {
511 kfree(opt); 529 kfree(opt);
512 return -EINVAL; 530 return -EINVAL;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index a2ede167e045..97aee76fb746 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -83,7 +83,7 @@
83#include <linux/netlink.h> 83#include <linux/netlink.h>
84#include <linux/tcp.h> 84#include <linux/tcp.h>
85 85
86int sysctl_ip_default_ttl = IPDEFTTL; 86int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
87 87
88/* Generate a checksum for an outgoing IP datagram. */ 88/* Generate a checksum for an outgoing IP datagram. */
89__inline__ void ip_send_check(struct iphdr *iph) 89__inline__ void ip_send_check(struct iphdr *iph)
@@ -328,6 +328,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
328 * keep trying until route appears or the connection times 328 * keep trying until route appears or the connection times
329 * itself out. 329 * itself out.
330 */ 330 */
331 security_sk_classify_flow(sk, &fl);
331 if (ip_route_output_flow(&rt, &fl, sk, 0)) 332 if (ip_route_output_flow(&rt, &fl, sk, 0))
332 goto no_route; 333 goto no_route;
333 } 334 }
@@ -425,7 +426,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
425 int ptr; 426 int ptr;
426 struct net_device *dev; 427 struct net_device *dev;
427 struct sk_buff *skb2; 428 struct sk_buff *skb2;
428 unsigned int mtu, hlen, left, len, ll_rs; 429 unsigned int mtu, hlen, left, len, ll_rs, pad;
429 int offset; 430 int offset;
430 __be16 not_last_frag; 431 __be16 not_last_frag;
431 struct rtable *rt = (struct rtable*)skb->dst; 432 struct rtable *rt = (struct rtable*)skb->dst;
@@ -555,14 +556,13 @@ slow_path:
555 left = skb->len - hlen; /* Space per frame */ 556 left = skb->len - hlen; /* Space per frame */
556 ptr = raw + hlen; /* Where to start from */ 557 ptr = raw + hlen; /* Where to start from */
557 558
558#ifdef CONFIG_BRIDGE_NETFILTER
559 /* for bridged IP traffic encapsulated inside f.e. a vlan header, 559 /* for bridged IP traffic encapsulated inside f.e. a vlan header,
560 * we need to make room for the encapsulating header */ 560 * we need to make room for the encapsulating header
561 ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, nf_bridge_pad(skb)); 561 */
562 mtu -= nf_bridge_pad(skb); 562 pad = nf_bridge_pad(skb);
563#else 563 ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
564 ll_rs = LL_RESERVED_SPACE(rt->u.dst.dev); 564 mtu -= pad;
565#endif 565
566 /* 566 /*
567 * Fragment the datagram. 567 * Fragment the datagram.
568 */ 568 */
@@ -679,7 +679,7 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
679{ 679{
680 struct iovec *iov = from; 680 struct iovec *iov = from;
681 681
682 if (skb->ip_summed == CHECKSUM_HW) { 682 if (skb->ip_summed == CHECKSUM_PARTIAL) {
683 if (memcpy_fromiovecend(to, iov, offset, len) < 0) 683 if (memcpy_fromiovecend(to, iov, offset, len) < 0)
684 return -EFAULT; 684 return -EFAULT;
685 } else { 685 } else {
@@ -735,7 +735,7 @@ static inline int ip_ufo_append_data(struct sock *sk,
735 /* initialize protocol header pointer */ 735 /* initialize protocol header pointer */
736 skb->h.raw = skb->data + fragheaderlen; 736 skb->h.raw = skb->data + fragheaderlen;
737 737
738 skb->ip_summed = CHECKSUM_HW; 738 skb->ip_summed = CHECKSUM_PARTIAL;
739 skb->csum = 0; 739 skb->csum = 0;
740 sk->sk_sndmsg_off = 0; 740 sk->sk_sndmsg_off = 0;
741 } 741 }
@@ -843,7 +843,7 @@ int ip_append_data(struct sock *sk,
843 length + fragheaderlen <= mtu && 843 length + fragheaderlen <= mtu &&
844 rt->u.dst.dev->features & NETIF_F_ALL_CSUM && 844 rt->u.dst.dev->features & NETIF_F_ALL_CSUM &&
845 !exthdrlen) 845 !exthdrlen)
846 csummode = CHECKSUM_HW; 846 csummode = CHECKSUM_PARTIAL;
847 847
848 inet->cork.length += length; 848 inet->cork.length += length;
849 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && 849 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
@@ -1366,6 +1366,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1366 { .sport = skb->h.th->dest, 1366 { .sport = skb->h.th->dest,
1367 .dport = skb->h.th->source } }, 1367 .dport = skb->h.th->source } },
1368 .proto = sk->sk_protocol }; 1368 .proto = sk->sk_protocol };
1369 security_skb_classify_flow(skb, &fl);
1369 if (ip_route_output_key(&rt, &fl)) 1370 if (ip_route_output_key(&rt, &fl))
1370 return; 1371 return;
1371 } 1372 }
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 5bb9c9f03fb6..17342430a843 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -176,7 +176,7 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
176 return 0; 176 return 0;
177 177
178out_ok: 178out_ok:
179 if (x->props.mode) 179 if (x->props.mode == XFRM_MODE_TUNNEL)
180 ip_send_check(iph); 180 ip_send_check(iph);
181 return 0; 181 return 0;
182} 182}
@@ -216,7 +216,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
216 t->id.daddr.a4 = x->id.daddr.a4; 216 t->id.daddr.a4 = x->id.daddr.a4;
217 memcpy(&t->sel, &x->sel, sizeof(t->sel)); 217 memcpy(&t->sel, &x->sel, sizeof(t->sel));
218 t->props.family = AF_INET; 218 t->props.family = AF_INET;
219 t->props.mode = 1; 219 t->props.mode = XFRM_MODE_TUNNEL;
220 t->props.saddr.a4 = x->props.saddr.a4; 220 t->props.saddr.a4 = x->props.saddr.a4;
221 t->props.flags = x->props.flags; 221 t->props.flags = x->props.flags;
222 222
@@ -416,7 +416,7 @@ static int ipcomp_init_state(struct xfrm_state *x)
416 goto out; 416 goto out;
417 417
418 x->props.header_len = 0; 418 x->props.header_len = 0;
419 if (x->props.mode) 419 if (x->props.mode == XFRM_MODE_TUNNEL)
420 x->props.header_len += sizeof(struct iphdr); 420 x->props.header_len += sizeof(struct iphdr);
421 421
422 mutex_lock(&ipcomp_resource_mutex); 422 mutex_lock(&ipcomp_resource_mutex);
@@ -428,7 +428,7 @@ static int ipcomp_init_state(struct xfrm_state *x)
428 goto error; 428 goto error;
429 mutex_unlock(&ipcomp_resource_mutex); 429 mutex_unlock(&ipcomp_resource_mutex);
430 430
431 if (x->props.mode) { 431 if (x->props.mode == XFRM_MODE_TUNNEL) {
432 err = ipcomp_tunnel_attach(x); 432 err = ipcomp_tunnel_attach(x);
433 if (err) 433 if (err)
434 goto error_tunnel; 434 goto error_tunnel;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index cb8a92f18ef6..1fbb38415b19 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -31,7 +31,6 @@
31 * -- Josef Siemes <jsiemes@web.de>, Aug 2002 31 * -- Josef Siemes <jsiemes@web.de>, Aug 2002
32 */ 32 */
33 33
34#include <linux/config.h>
35#include <linux/types.h> 34#include <linux/types.h>
36#include <linux/string.h> 35#include <linux/string.h>
37#include <linux/kernel.h> 36#include <linux/kernel.h>
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 76ab50b0d6ef..0c4556529228 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -341,7 +341,8 @@ out:
341 int code = skb->h.icmph->code; 341 int code = skb->h.icmph->code;
342 int rel_type = 0; 342 int rel_type = 0;
343 int rel_code = 0; 343 int rel_code = 0;
344 int rel_info = 0; 344 __be32 rel_info = 0;
345 __u32 n = 0;
345 struct sk_buff *skb2; 346 struct sk_buff *skb2;
346 struct flowi fl; 347 struct flowi fl;
347 struct rtable *rt; 348 struct rtable *rt;
@@ -354,14 +355,15 @@ out:
354 default: 355 default:
355 return 0; 356 return 0;
356 case ICMP_PARAMETERPROB: 357 case ICMP_PARAMETERPROB:
357 if (skb->h.icmph->un.gateway < hlen) 358 n = ntohl(skb->h.icmph->un.gateway) >> 24;
359 if (n < hlen)
358 return 0; 360 return 0;
359 361
360 /* So... This guy found something strange INSIDE encapsulated 362 /* So... This guy found something strange INSIDE encapsulated
361 packet. Well, he is fool, but what can we do ? 363 packet. Well, he is fool, but what can we do ?
362 */ 364 */
363 rel_type = ICMP_PARAMETERPROB; 365 rel_type = ICMP_PARAMETERPROB;
364 rel_info = skb->h.icmph->un.gateway - hlen; 366 rel_info = htonl((n - hlen) << 24);
365 break; 367 break;
366 368
367 case ICMP_DEST_UNREACH: 369 case ICMP_DEST_UNREACH:
@@ -372,13 +374,14 @@ out:
372 return 0; 374 return 0;
373 case ICMP_FRAG_NEEDED: 375 case ICMP_FRAG_NEEDED:
374 /* And it is the only really necessary thing :-) */ 376 /* And it is the only really necessary thing :-) */
375 rel_info = ntohs(skb->h.icmph->un.frag.mtu); 377 n = ntohs(skb->h.icmph->un.frag.mtu);
376 if (rel_info < hlen+68) 378 if (n < hlen+68)
377 return 0; 379 return 0;
378 rel_info -= hlen; 380 n -= hlen;
379 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ 381 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
380 if (rel_info > ntohs(eiph->tot_len)) 382 if (n > ntohs(eiph->tot_len))
381 return 0; 383 return 0;
384 rel_info = htonl(n);
382 break; 385 break;
383 default: 386 default:
384 /* All others are translated to HOST_UNREACH. 387 /* All others are translated to HOST_UNREACH.
@@ -440,12 +443,11 @@ out:
440 443
441 /* change mtu on this route */ 444 /* change mtu on this route */
442 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { 445 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
443 if (rel_info > dst_mtu(skb2->dst)) { 446 if (n > dst_mtu(skb2->dst)) {
444 kfree_skb(skb2); 447 kfree_skb(skb2);
445 return 0; 448 return 0;
446 } 449 }
447 skb2->dst->ops->update_pmtu(skb2->dst, rel_info); 450 skb2->dst->ops->update_pmtu(skb2->dst, n);
448 rel_info = htonl(rel_info);
449 } else if (type == ICMP_TIME_EXCEEDED) { 451 } else if (type == ICMP_TIME_EXCEEDED) {
450 struct ip_tunnel *t = netdev_priv(skb2->dev); 452 struct ip_tunnel *t = netdev_priv(skb2->dev);
451 if (t->parms.iph.ttl) { 453 if (t->parms.iph.ttl) {
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 85893eef6b16..ba49588da242 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -312,7 +312,8 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
312 e = NLMSG_DATA(nlh); 312 e = NLMSG_DATA(nlh);
313 e->error = -ETIMEDOUT; 313 e->error = -ETIMEDOUT;
314 memset(&e->msg, 0, sizeof(e->msg)); 314 memset(&e->msg, 0, sizeof(e->msg));
315 netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); 315
316 rtnl_unicast(skb, NETLINK_CB(skb).pid);
316 } else 317 } else
317 kfree_skb(skb); 318 kfree_skb(skb);
318 } 319 }
@@ -512,7 +513,6 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
512 513
513 while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) { 514 while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
514 if (skb->nh.iph->version == 0) { 515 if (skb->nh.iph->version == 0) {
515 int err;
516 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 516 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
517 517
518 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { 518 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
@@ -525,7 +525,8 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
525 e->error = -EMSGSIZE; 525 e->error = -EMSGSIZE;
526 memset(&e->msg, 0, sizeof(e->msg)); 526 memset(&e->msg, 0, sizeof(e->msg));
527 } 527 }
528 err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); 528
529 rtnl_unicast(skb, NETLINK_CB(skb).pid);
529 } else 530 } else
530 ip_mr_forward(skb, c, 0); 531 ip_mr_forward(skb, c, 0);
531 } 532 }
@@ -1899,11 +1900,8 @@ void __init ip_mr_init(void)
1899{ 1900{
1900 mrt_cachep = kmem_cache_create("ip_mrt_cache", 1901 mrt_cachep = kmem_cache_create("ip_mrt_cache",
1901 sizeof(struct mfc_cache), 1902 sizeof(struct mfc_cache),
1902 0, SLAB_HWCACHE_ALIGN, 1903 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1903 NULL, NULL); 1904 NULL, NULL);
1904 if (!mrt_cachep)
1905 panic("cannot allocate ip_mrt_cache");
1906
1907 init_timer(&ipmr_expire_timer); 1905 init_timer(&ipmr_expire_timer);
1908 ipmr_expire_timer.function=ipmr_expire_process; 1906 ipmr_expire_timer.function=ipmr_expire_process;
1909 register_netdevice_notifier(&ip_mr_notifier); 1907 register_netdevice_notifier(&ip_mr_notifier);
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index bc28b1160a3a..820e8318d10d 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -151,7 +151,7 @@ tcp_snat_handler(struct sk_buff **pskb,
151 /* Only port and addr are changed, do fast csum update */ 151 /* Only port and addr are changed, do fast csum update */
152 tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr, 152 tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
153 cp->dport, cp->vport); 153 cp->dport, cp->vport);
154 if ((*pskb)->ip_summed == CHECKSUM_HW) 154 if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
155 (*pskb)->ip_summed = CHECKSUM_NONE; 155 (*pskb)->ip_summed = CHECKSUM_NONE;
156 } else { 156 } else {
157 /* full checksum calculation */ 157 /* full checksum calculation */
@@ -204,7 +204,7 @@ tcp_dnat_handler(struct sk_buff **pskb,
204 /* Only port and addr are changed, do fast csum update */ 204 /* Only port and addr are changed, do fast csum update */
205 tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr, 205 tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
206 cp->vport, cp->dport); 206 cp->vport, cp->dport);
207 if ((*pskb)->ip_summed == CHECKSUM_HW) 207 if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
208 (*pskb)->ip_summed = CHECKSUM_NONE; 208 (*pskb)->ip_summed = CHECKSUM_NONE;
209 } else { 209 } else {
210 /* full checksum calculation */ 210 /* full checksum calculation */
@@ -229,7 +229,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
229 switch (skb->ip_summed) { 229 switch (skb->ip_summed) {
230 case CHECKSUM_NONE: 230 case CHECKSUM_NONE:
231 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); 231 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
232 case CHECKSUM_HW: 232 case CHECKSUM_COMPLETE:
233 if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, 233 if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
234 skb->len - tcphoff, 234 skb->len - tcphoff,
235 skb->nh.iph->protocol, skb->csum)) { 235 skb->nh.iph->protocol, skb->csum)) {
@@ -239,7 +239,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
239 } 239 }
240 break; 240 break;
241 default: 241 default:
242 /* CHECKSUM_UNNECESSARY */ 242 /* No need to checksum. */
243 break; 243 break;
244 } 244 }
245 245
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 89d9175d8f28..90c8166c0ec1 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -161,7 +161,7 @@ udp_snat_handler(struct sk_buff **pskb,
161 /* Only port and addr are changed, do fast csum update */ 161 /* Only port and addr are changed, do fast csum update */
162 udp_fast_csum_update(udph, cp->daddr, cp->vaddr, 162 udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
163 cp->dport, cp->vport); 163 cp->dport, cp->vport);
164 if ((*pskb)->ip_summed == CHECKSUM_HW) 164 if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
165 (*pskb)->ip_summed = CHECKSUM_NONE; 165 (*pskb)->ip_summed = CHECKSUM_NONE;
166 } else { 166 } else {
167 /* full checksum calculation */ 167 /* full checksum calculation */
@@ -216,7 +216,7 @@ udp_dnat_handler(struct sk_buff **pskb,
216 /* Only port and addr are changed, do fast csum update */ 216 /* Only port and addr are changed, do fast csum update */
217 udp_fast_csum_update(udph, cp->vaddr, cp->daddr, 217 udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
218 cp->vport, cp->dport); 218 cp->vport, cp->dport);
219 if ((*pskb)->ip_summed == CHECKSUM_HW) 219 if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
220 (*pskb)->ip_summed = CHECKSUM_NONE; 220 (*pskb)->ip_summed = CHECKSUM_NONE;
221 } else { 221 } else {
222 /* full checksum calculation */ 222 /* full checksum calculation */
@@ -250,7 +250,7 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
250 case CHECKSUM_NONE: 250 case CHECKSUM_NONE:
251 skb->csum = skb_checksum(skb, udphoff, 251 skb->csum = skb_checksum(skb, udphoff,
252 skb->len - udphoff, 0); 252 skb->len - udphoff, 0);
253 case CHECKSUM_HW: 253 case CHECKSUM_COMPLETE:
254 if (csum_tcpudp_magic(skb->nh.iph->saddr, 254 if (csum_tcpudp_magic(skb->nh.iph->saddr,
255 skb->nh.iph->daddr, 255 skb->nh.iph->daddr,
256 skb->len - udphoff, 256 skb->len - udphoff,
@@ -262,7 +262,7 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
262 } 262 }
263 break; 263 break;
264 default: 264 default:
265 /* CHECKSUM_UNNECESSARY */ 265 /* No need to checksum. */
266 break; 266 break;
267 } 267 }
268 } 268 }
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 6a9e34b794bc..f88347de21a9 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -168,7 +168,7 @@ unsigned int nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
168 unsigned int csum = 0; 168 unsigned int csum = 0;
169 169
170 switch (skb->ip_summed) { 170 switch (skb->ip_summed) {
171 case CHECKSUM_HW: 171 case CHECKSUM_COMPLETE:
172 if (hook != NF_IP_PRE_ROUTING && hook != NF_IP_LOCAL_IN) 172 if (hook != NF_IP_PRE_ROUTING && hook != NF_IP_LOCAL_IN)
173 break; 173 break;
174 if ((protocol == 0 && !(u16)csum_fold(skb->csum)) || 174 if ((protocol == 0 && !(u16)csum_fold(skb->csum)) ||
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index ef0b5aac5838..a55b8ff70ded 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -278,17 +278,6 @@ config IP_NF_MATCH_ECN
278 278
279 To compile it as a module, choose M here. If unsure, say N. 279 To compile it as a module, choose M here. If unsure, say N.
280 280
281config IP_NF_MATCH_DSCP
282 tristate "DSCP match support"
283 depends on IP_NF_IPTABLES
284 help
285 This option adds a `DSCP' match, which allows you to match against
286 the IPv4 header DSCP field (DSCP codepoint).
287
288 The DSCP codepoint can have any value between 0x0 and 0x4f.
289
290 To compile it as a module, choose M here. If unsure, say N.
291
292config IP_NF_MATCH_AH 281config IP_NF_MATCH_AH
293 tristate "AH match support" 282 tristate "AH match support"
294 depends on IP_NF_IPTABLES 283 depends on IP_NF_IPTABLES
@@ -568,17 +557,6 @@ config IP_NF_TARGET_ECN
568 557
569 To compile it as a module, choose M here. If unsure, say N. 558 To compile it as a module, choose M here. If unsure, say N.
570 559
571config IP_NF_TARGET_DSCP
572 tristate "DSCP target support"
573 depends on IP_NF_MANGLE
574 help
575 This option adds a `DSCP' match, which allows you to match against
576 the IPv4 header DSCP field (DSCP codepoint).
577
578 The DSCP codepoint can have any value between 0x0 and 0x4f.
579
580 To compile it as a module, choose M here. If unsure, say N.
581
582config IP_NF_TARGET_TTL 560config IP_NF_TARGET_TTL
583 tristate 'TTL target support' 561 tristate 'TTL target support'
584 depends on IP_NF_MANGLE 562 depends on IP_NF_MANGLE
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 3ded4a3af59c..09aaed1a8063 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -59,7 +59,6 @@ obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o
59obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o 59obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
60obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o 60obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
61obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o 61obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
62obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o
63obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o 62obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
64obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o 63obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
65obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o 64obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
@@ -68,7 +67,6 @@ obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
68obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o 67obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
69obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o 68obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o
70obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o 69obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
71obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o
72obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o 70obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
73obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o 71obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
74obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o 72obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 8d1d7a6e72a5..85f0d73ebfb4 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -56,8 +56,6 @@ do { \
56#define ARP_NF_ASSERT(x) 56#define ARP_NF_ASSERT(x)
57#endif 57#endif
58 58
59#include <linux/netfilter_ipv4/listhelp.h>
60
61static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, 59static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
62 char *hdr_addr, int len) 60 char *hdr_addr, int len)
63{ 61{
@@ -208,8 +206,7 @@ static unsigned int arpt_error(struct sk_buff **pskb,
208 const struct net_device *out, 206 const struct net_device *out,
209 unsigned int hooknum, 207 unsigned int hooknum,
210 const struct xt_target *target, 208 const struct xt_target *target,
211 const void *targinfo, 209 const void *targinfo)
212 void *userinfo)
213{ 210{
214 if (net_ratelimit()) 211 if (net_ratelimit())
215 printk("arp_tables: error: '%s'\n", (char *)targinfo); 212 printk("arp_tables: error: '%s'\n", (char *)targinfo);
@@ -226,8 +223,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
226 unsigned int hook, 223 unsigned int hook,
227 const struct net_device *in, 224 const struct net_device *in,
228 const struct net_device *out, 225 const struct net_device *out,
229 struct arpt_table *table, 226 struct arpt_table *table)
230 void *userdata)
231{ 227{
232 static const char nulldevname[IFNAMSIZ]; 228 static const char nulldevname[IFNAMSIZ];
233 unsigned int verdict = NF_DROP; 229 unsigned int verdict = NF_DROP;
@@ -302,8 +298,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
302 in, out, 298 in, out,
303 hook, 299 hook,
304 t->u.kernel.target, 300 t->u.kernel.target,
305 t->data, 301 t->data);
306 userdata);
307 302
308 /* Target might have changed stuff. */ 303 /* Target might have changed stuff. */
309 arp = (*pskb)->nh.arph; 304 arp = (*pskb)->nh.arph;
@@ -490,12 +485,10 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i
490 if (t->u.kernel.target == &arpt_standard_target) { 485 if (t->u.kernel.target == &arpt_standard_target) {
491 if (!standard_check(t, size)) { 486 if (!standard_check(t, size)) {
492 ret = -EINVAL; 487 ret = -EINVAL;
493 goto out; 488 goto err;
494 } 489 }
495 } else if (t->u.kernel.target->checkentry 490 } else if (t->u.kernel.target->checkentry
496 && !t->u.kernel.target->checkentry(name, e, target, t->data, 491 && !t->u.kernel.target->checkentry(name, e, target, t->data,
497 t->u.target_size
498 - sizeof(*t),
499 e->comefrom)) { 492 e->comefrom)) {
500 duprintf("arp_tables: check failed for `%s'.\n", 493 duprintf("arp_tables: check failed for `%s'.\n",
501 t->u.kernel.target->name); 494 t->u.kernel.target->name);
@@ -562,8 +555,7 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
562 555
563 t = arpt_get_target(e); 556 t = arpt_get_target(e);
564 if (t->u.kernel.target->destroy) 557 if (t->u.kernel.target->destroy)
565 t->u.kernel.target->destroy(t->u.kernel.target, t->data, 558 t->u.kernel.target->destroy(t->u.kernel.target, t->data);
566 t->u.target_size - sizeof(*t));
567 module_put(t->u.kernel.target->me); 559 module_put(t->u.kernel.target->me);
568 return 0; 560 return 0;
569} 561}
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index a58325c1ceb9..d12b1df252a1 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -11,7 +11,7 @@ static unsigned int
11target(struct sk_buff **pskb, 11target(struct sk_buff **pskb,
12 const struct net_device *in, const struct net_device *out, 12 const struct net_device *in, const struct net_device *out,
13 unsigned int hooknum, const struct xt_target *target, 13 unsigned int hooknum, const struct xt_target *target,
14 const void *targinfo, void *userinfo) 14 const void *targinfo)
15{ 15{
16 const struct arpt_mangle *mangle = targinfo; 16 const struct arpt_mangle *mangle = targinfo;
17 struct arphdr *arp; 17 struct arphdr *arp;
@@ -67,7 +67,7 @@ target(struct sk_buff **pskb,
67 67
68static int 68static int
69checkentry(const char *tablename, const void *e, const struct xt_target *target, 69checkentry(const char *tablename, const void *e, const struct xt_target *target,
70 void *targinfo, unsigned int targinfosize, unsigned int hook_mask) 70 void *targinfo, unsigned int hook_mask)
71{ 71{
72 const struct arpt_mangle *mangle = targinfo; 72 const struct arpt_mangle *mangle = targinfo;
73 73
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index d7c472faa53b..7edea2a1696c 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -155,7 +155,7 @@ static unsigned int arpt_hook(unsigned int hook,
155 const struct net_device *out, 155 const struct net_device *out,
156 int (*okfn)(struct sk_buff *)) 156 int (*okfn)(struct sk_buff *))
157{ 157{
158 return arpt_do_table(pskb, hook, in, out, &packet_filter, NULL); 158 return arpt_do_table(pskb, hook, in, out, &packet_filter);
159} 159}
160 160
161static struct nf_hook_ops arpt_ops[] = { 161static struct nf_hook_ops arpt_ops[] = {
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index aa459177c3f8..c432b3163609 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -47,7 +47,6 @@
47#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 47#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
48#include <linux/netfilter_ipv4/ip_conntrack_helper.h> 48#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
49#include <linux/netfilter_ipv4/ip_conntrack_core.h> 49#include <linux/netfilter_ipv4/ip_conntrack_core.h>
50#include <linux/netfilter_ipv4/listhelp.h>
51 50
52#define IP_CONNTRACK_VERSION "2.4" 51#define IP_CONNTRACK_VERSION "2.4"
53 52
@@ -64,17 +63,17 @@ atomic_t ip_conntrack_count = ATOMIC_INIT(0);
64 63
65void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL; 64void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
66LIST_HEAD(ip_conntrack_expect_list); 65LIST_HEAD(ip_conntrack_expect_list);
67struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO]; 66struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO] __read_mostly;
68static LIST_HEAD(helpers); 67static LIST_HEAD(helpers);
69unsigned int ip_conntrack_htable_size = 0; 68unsigned int ip_conntrack_htable_size __read_mostly = 0;
70int ip_conntrack_max; 69int ip_conntrack_max __read_mostly;
71struct list_head *ip_conntrack_hash; 70struct list_head *ip_conntrack_hash __read_mostly;
72static kmem_cache_t *ip_conntrack_cachep __read_mostly; 71static kmem_cache_t *ip_conntrack_cachep __read_mostly;
73static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly; 72static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly;
74struct ip_conntrack ip_conntrack_untracked; 73struct ip_conntrack ip_conntrack_untracked;
75unsigned int ip_ct_log_invalid; 74unsigned int ip_ct_log_invalid __read_mostly;
76static LIST_HEAD(unconfirmed); 75static LIST_HEAD(unconfirmed);
77static int ip_conntrack_vmalloc; 76static int ip_conntrack_vmalloc __read_mostly;
78 77
79static unsigned int ip_conntrack_next_id; 78static unsigned int ip_conntrack_next_id;
80static unsigned int ip_conntrack_expect_next_id; 79static unsigned int ip_conntrack_expect_next_id;
@@ -294,15 +293,10 @@ void ip_ct_remove_expectations(struct ip_conntrack *ct)
294static void 293static void
295clean_from_lists(struct ip_conntrack *ct) 294clean_from_lists(struct ip_conntrack *ct)
296{ 295{
297 unsigned int ho, hr;
298
299 DEBUGP("clean_from_lists(%p)\n", ct); 296 DEBUGP("clean_from_lists(%p)\n", ct);
300 ASSERT_WRITE_LOCK(&ip_conntrack_lock); 297 ASSERT_WRITE_LOCK(&ip_conntrack_lock);
301 298 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
302 ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 299 list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
303 hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
304 LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
305 LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
306 300
307 /* Destroy all pending expectations */ 301 /* Destroy all pending expectations */
308 ip_ct_remove_expectations(ct); 302 ip_ct_remove_expectations(ct);
@@ -313,6 +307,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
313{ 307{
314 struct ip_conntrack *ct = (struct ip_conntrack *)nfct; 308 struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
315 struct ip_conntrack_protocol *proto; 309 struct ip_conntrack_protocol *proto;
310 struct ip_conntrack_helper *helper;
316 311
317 DEBUGP("destroy_conntrack(%p)\n", ct); 312 DEBUGP("destroy_conntrack(%p)\n", ct);
318 IP_NF_ASSERT(atomic_read(&nfct->use) == 0); 313 IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
@@ -321,6 +316,10 @@ destroy_conntrack(struct nf_conntrack *nfct)
321 ip_conntrack_event(IPCT_DESTROY, ct); 316 ip_conntrack_event(IPCT_DESTROY, ct);
322 set_bit(IPS_DYING_BIT, &ct->status); 317 set_bit(IPS_DYING_BIT, &ct->status);
323 318
319 helper = ct->helper;
320 if (helper && helper->destroy)
321 helper->destroy(ct);
322
324 /* To make sure we don't get any weird locking issues here: 323 /* To make sure we don't get any weird locking issues here:
325 * destroy_conntrack() MUST NOT be called with a write lock 324 * destroy_conntrack() MUST NOT be called with a write lock
326 * to ip_conntrack_lock!!! -HW */ 325 * to ip_conntrack_lock!!! -HW */
@@ -367,16 +366,6 @@ static void death_by_timeout(unsigned long ul_conntrack)
367 ip_conntrack_put(ct); 366 ip_conntrack_put(ct);
368} 367}
369 368
370static inline int
371conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
372 const struct ip_conntrack_tuple *tuple,
373 const struct ip_conntrack *ignored_conntrack)
374{
375 ASSERT_READ_LOCK(&ip_conntrack_lock);
376 return tuplehash_to_ctrack(i) != ignored_conntrack
377 && ip_ct_tuple_equal(tuple, &i->tuple);
378}
379
380struct ip_conntrack_tuple_hash * 369struct ip_conntrack_tuple_hash *
381__ip_conntrack_find(const struct ip_conntrack_tuple *tuple, 370__ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
382 const struct ip_conntrack *ignored_conntrack) 371 const struct ip_conntrack *ignored_conntrack)
@@ -386,7 +375,8 @@ __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
386 375
387 ASSERT_READ_LOCK(&ip_conntrack_lock); 376 ASSERT_READ_LOCK(&ip_conntrack_lock);
388 list_for_each_entry(h, &ip_conntrack_hash[hash], list) { 377 list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
389 if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { 378 if (tuplehash_to_ctrack(h) != ignored_conntrack &&
379 ip_ct_tuple_equal(tuple, &h->tuple)) {
390 CONNTRACK_STAT_INC(found); 380 CONNTRACK_STAT_INC(found);
391 return h; 381 return h;
392 } 382 }
@@ -417,10 +407,10 @@ static void __ip_conntrack_hash_insert(struct ip_conntrack *ct,
417 unsigned int repl_hash) 407 unsigned int repl_hash)
418{ 408{
419 ct->id = ++ip_conntrack_next_id; 409 ct->id = ++ip_conntrack_next_id;
420 list_prepend(&ip_conntrack_hash[hash], 410 list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
421 &ct->tuplehash[IP_CT_DIR_ORIGINAL].list); 411 &ip_conntrack_hash[hash]);
422 list_prepend(&ip_conntrack_hash[repl_hash], 412 list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
423 &ct->tuplehash[IP_CT_DIR_REPLY].list); 413 &ip_conntrack_hash[repl_hash]);
424} 414}
425 415
426void ip_conntrack_hash_insert(struct ip_conntrack *ct) 416void ip_conntrack_hash_insert(struct ip_conntrack *ct)
@@ -440,6 +430,7 @@ int
440__ip_conntrack_confirm(struct sk_buff **pskb) 430__ip_conntrack_confirm(struct sk_buff **pskb)
441{ 431{
442 unsigned int hash, repl_hash; 432 unsigned int hash, repl_hash;
433 struct ip_conntrack_tuple_hash *h;
443 struct ip_conntrack *ct; 434 struct ip_conntrack *ct;
444 enum ip_conntrack_info ctinfo; 435 enum ip_conntrack_info ctinfo;
445 436
@@ -470,43 +461,43 @@ __ip_conntrack_confirm(struct sk_buff **pskb)
470 /* See if there's one in the list already, including reverse: 461 /* See if there's one in the list already, including reverse:
471 NAT could have grabbed it without realizing, since we're 462 NAT could have grabbed it without realizing, since we're
472 not in the hash. If there is, we lost race. */ 463 not in the hash. If there is, we lost race. */
473 if (!LIST_FIND(&ip_conntrack_hash[hash], 464 list_for_each_entry(h, &ip_conntrack_hash[hash], list)
474 conntrack_tuple_cmp, 465 if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
475 struct ip_conntrack_tuple_hash *, 466 &h->tuple))
476 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) 467 goto out;
477 && !LIST_FIND(&ip_conntrack_hash[repl_hash], 468 list_for_each_entry(h, &ip_conntrack_hash[repl_hash], list)
478 conntrack_tuple_cmp, 469 if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
479 struct ip_conntrack_tuple_hash *, 470 &h->tuple))
480 &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { 471 goto out;
481 /* Remove from unconfirmed list */
482 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
483 472
484 __ip_conntrack_hash_insert(ct, hash, repl_hash); 473 /* Remove from unconfirmed list */
485 /* Timer relative to confirmation time, not original 474 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
486 setting time, otherwise we'd get timer wrap in 475
487 weird delay cases. */ 476 __ip_conntrack_hash_insert(ct, hash, repl_hash);
488 ct->timeout.expires += jiffies; 477 /* Timer relative to confirmation time, not original
489 add_timer(&ct->timeout); 478 setting time, otherwise we'd get timer wrap in
490 atomic_inc(&ct->ct_general.use); 479 weird delay cases. */
491 set_bit(IPS_CONFIRMED_BIT, &ct->status); 480 ct->timeout.expires += jiffies;
492 CONNTRACK_STAT_INC(insert); 481 add_timer(&ct->timeout);
493 write_unlock_bh(&ip_conntrack_lock); 482 atomic_inc(&ct->ct_general.use);
494 if (ct->helper) 483 set_bit(IPS_CONFIRMED_BIT, &ct->status);
495 ip_conntrack_event_cache(IPCT_HELPER, *pskb); 484 CONNTRACK_STAT_INC(insert);
485 write_unlock_bh(&ip_conntrack_lock);
486 if (ct->helper)
487 ip_conntrack_event_cache(IPCT_HELPER, *pskb);
496#ifdef CONFIG_IP_NF_NAT_NEEDED 488#ifdef CONFIG_IP_NF_NAT_NEEDED
497 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || 489 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
498 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) 490 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
499 ip_conntrack_event_cache(IPCT_NATINFO, *pskb); 491 ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
500#endif 492#endif
501 ip_conntrack_event_cache(master_ct(ct) ? 493 ip_conntrack_event_cache(master_ct(ct) ?
502 IPCT_RELATED : IPCT_NEW, *pskb); 494 IPCT_RELATED : IPCT_NEW, *pskb);
503 495
504 return NF_ACCEPT; 496 return NF_ACCEPT;
505 }
506 497
498out:
507 CONNTRACK_STAT_INC(insert_failed); 499 CONNTRACK_STAT_INC(insert_failed);
508 write_unlock_bh(&ip_conntrack_lock); 500 write_unlock_bh(&ip_conntrack_lock);
509
510 return NF_DROP; 501 return NF_DROP;
511} 502}
512 503
@@ -527,23 +518,21 @@ ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
527 518
528/* There's a small race here where we may free a just-assured 519/* There's a small race here where we may free a just-assured
529 connection. Too bad: we're in trouble anyway. */ 520 connection. Too bad: we're in trouble anyway. */
530static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
531{
532 return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status));
533}
534
535static int early_drop(struct list_head *chain) 521static int early_drop(struct list_head *chain)
536{ 522{
537 /* Traverse backwards: gives us oldest, which is roughly LRU */ 523 /* Traverse backwards: gives us oldest, which is roughly LRU */
538 struct ip_conntrack_tuple_hash *h; 524 struct ip_conntrack_tuple_hash *h;
539 struct ip_conntrack *ct = NULL; 525 struct ip_conntrack *ct = NULL, *tmp;
540 int dropped = 0; 526 int dropped = 0;
541 527
542 read_lock_bh(&ip_conntrack_lock); 528 read_lock_bh(&ip_conntrack_lock);
543 h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *); 529 list_for_each_entry_reverse(h, chain, list) {
544 if (h) { 530 tmp = tuplehash_to_ctrack(h);
545 ct = tuplehash_to_ctrack(h); 531 if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) {
546 atomic_inc(&ct->ct_general.use); 532 ct = tmp;
533 atomic_inc(&ct->ct_general.use);
534 break;
535 }
547 } 536 }
548 read_unlock_bh(&ip_conntrack_lock); 537 read_unlock_bh(&ip_conntrack_lock);
549 538
@@ -559,18 +548,16 @@ static int early_drop(struct list_head *chain)
559 return dropped; 548 return dropped;
560} 549}
561 550
562static inline int helper_cmp(const struct ip_conntrack_helper *i,
563 const struct ip_conntrack_tuple *rtuple)
564{
565 return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
566}
567
568static struct ip_conntrack_helper * 551static struct ip_conntrack_helper *
569__ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple) 552__ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
570{ 553{
571 return LIST_FIND(&helpers, helper_cmp, 554 struct ip_conntrack_helper *h;
572 struct ip_conntrack_helper *, 555
573 tuple); 556 list_for_each_entry(h, &helpers, list) {
557 if (ip_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask))
558 return h;
559 }
560 return NULL;
574} 561}
575 562
576struct ip_conntrack_helper * 563struct ip_conntrack_helper *
@@ -640,11 +627,15 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
640 ip_conntrack_hash_rnd_initted = 1; 627 ip_conntrack_hash_rnd_initted = 1;
641 } 628 }
642 629
630 /* We don't want any race condition at early drop stage */
631 atomic_inc(&ip_conntrack_count);
632
643 if (ip_conntrack_max 633 if (ip_conntrack_max
644 && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) { 634 && atomic_read(&ip_conntrack_count) > ip_conntrack_max) {
645 unsigned int hash = hash_conntrack(orig); 635 unsigned int hash = hash_conntrack(orig);
646 /* Try dropping from this hash chain. */ 636 /* Try dropping from this hash chain. */
647 if (!early_drop(&ip_conntrack_hash[hash])) { 637 if (!early_drop(&ip_conntrack_hash[hash])) {
638 atomic_dec(&ip_conntrack_count);
648 if (net_ratelimit()) 639 if (net_ratelimit())
649 printk(KERN_WARNING 640 printk(KERN_WARNING
650 "ip_conntrack: table full, dropping" 641 "ip_conntrack: table full, dropping"
@@ -656,6 +647,7 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
656 conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); 647 conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
657 if (!conntrack) { 648 if (!conntrack) {
658 DEBUGP("Can't allocate conntrack.\n"); 649 DEBUGP("Can't allocate conntrack.\n");
650 atomic_dec(&ip_conntrack_count);
659 return ERR_PTR(-ENOMEM); 651 return ERR_PTR(-ENOMEM);
660 } 652 }
661 653
@@ -669,8 +661,6 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
669 conntrack->timeout.data = (unsigned long)conntrack; 661 conntrack->timeout.data = (unsigned long)conntrack;
670 conntrack->timeout.function = death_by_timeout; 662 conntrack->timeout.function = death_by_timeout;
671 663
672 atomic_inc(&ip_conntrack_count);
673
674 return conntrack; 664 return conntrack;
675} 665}
676 666
@@ -1062,7 +1052,7 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
1062{ 1052{
1063 BUG_ON(me->timeout == 0); 1053 BUG_ON(me->timeout == 0);
1064 write_lock_bh(&ip_conntrack_lock); 1054 write_lock_bh(&ip_conntrack_lock);
1065 list_prepend(&helpers, me); 1055 list_add(&me->list, &helpers);
1066 write_unlock_bh(&ip_conntrack_lock); 1056 write_unlock_bh(&ip_conntrack_lock);
1067 1057
1068 return 0; 1058 return 0;
@@ -1081,24 +1071,24 @@ __ip_conntrack_helper_find_byname(const char *name)
1081 return NULL; 1071 return NULL;
1082} 1072}
1083 1073
1084static inline int unhelp(struct ip_conntrack_tuple_hash *i, 1074static inline void unhelp(struct ip_conntrack_tuple_hash *i,
1085 const struct ip_conntrack_helper *me) 1075 const struct ip_conntrack_helper *me)
1086{ 1076{
1087 if (tuplehash_to_ctrack(i)->helper == me) { 1077 if (tuplehash_to_ctrack(i)->helper == me) {
1088 ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i)); 1078 ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i));
1089 tuplehash_to_ctrack(i)->helper = NULL; 1079 tuplehash_to_ctrack(i)->helper = NULL;
1090 } 1080 }
1091 return 0;
1092} 1081}
1093 1082
1094void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) 1083void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
1095{ 1084{
1096 unsigned int i; 1085 unsigned int i;
1086 struct ip_conntrack_tuple_hash *h;
1097 struct ip_conntrack_expect *exp, *tmp; 1087 struct ip_conntrack_expect *exp, *tmp;
1098 1088
1099 /* Need write lock here, to delete helper. */ 1089 /* Need write lock here, to delete helper. */
1100 write_lock_bh(&ip_conntrack_lock); 1090 write_lock_bh(&ip_conntrack_lock);
1101 LIST_DELETE(&helpers, me); 1091 list_del(&me->list);
1102 1092
1103 /* Get rid of expectations */ 1093 /* Get rid of expectations */
1104 list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) { 1094 list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
@@ -1108,10 +1098,12 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
1108 } 1098 }
1109 } 1099 }
1110 /* Get rid of expecteds, set helpers to NULL. */ 1100 /* Get rid of expecteds, set helpers to NULL. */
1111 LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me); 1101 list_for_each_entry(h, &unconfirmed, list)
1112 for (i = 0; i < ip_conntrack_htable_size; i++) 1102 unhelp(h, me);
1113 LIST_FIND_W(&ip_conntrack_hash[i], unhelp, 1103 for (i = 0; i < ip_conntrack_htable_size; i++) {
1114 struct ip_conntrack_tuple_hash *, me); 1104 list_for_each_entry(h, &ip_conntrack_hash[i], list)
1105 unhelp(h, me);
1106 }
1115 write_unlock_bh(&ip_conntrack_lock); 1107 write_unlock_bh(&ip_conntrack_lock);
1116 1108
1117 /* Someone could be still looking at the helper in a bh. */ 1109 /* Someone could be still looking at the helper in a bh. */
@@ -1237,46 +1229,43 @@ static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
1237 nf_conntrack_get(nskb->nfct); 1229 nf_conntrack_get(nskb->nfct);
1238} 1230}
1239 1231
1240static inline int
1241do_iter(const struct ip_conntrack_tuple_hash *i,
1242 int (*iter)(struct ip_conntrack *i, void *data),
1243 void *data)
1244{
1245 return iter(tuplehash_to_ctrack(i), data);
1246}
1247
1248/* Bring out ya dead! */ 1232/* Bring out ya dead! */
1249static struct ip_conntrack_tuple_hash * 1233static struct ip_conntrack *
1250get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data), 1234get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
1251 void *data, unsigned int *bucket) 1235 void *data, unsigned int *bucket)
1252{ 1236{
1253 struct ip_conntrack_tuple_hash *h = NULL; 1237 struct ip_conntrack_tuple_hash *h;
1238 struct ip_conntrack *ct;
1254 1239
1255 write_lock_bh(&ip_conntrack_lock); 1240 write_lock_bh(&ip_conntrack_lock);
1256 for (; *bucket < ip_conntrack_htable_size; (*bucket)++) { 1241 for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
1257 h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter, 1242 list_for_each_entry(h, &ip_conntrack_hash[*bucket], list) {
1258 struct ip_conntrack_tuple_hash *, iter, data); 1243 ct = tuplehash_to_ctrack(h);
1259 if (h) 1244 if (iter(ct, data))
1260 break; 1245 goto found;
1246 }
1247 }
1248 list_for_each_entry(h, &unconfirmed, list) {
1249 ct = tuplehash_to_ctrack(h);
1250 if (iter(ct, data))
1251 goto found;
1261 } 1252 }
1262 if (!h)
1263 h = LIST_FIND_W(&unconfirmed, do_iter,
1264 struct ip_conntrack_tuple_hash *, iter, data);
1265 if (h)
1266 atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
1267 write_unlock_bh(&ip_conntrack_lock); 1253 write_unlock_bh(&ip_conntrack_lock);
1254 return NULL;
1268 1255
1269 return h; 1256found:
1257 atomic_inc(&ct->ct_general.use);
1258 write_unlock_bh(&ip_conntrack_lock);
1259 return ct;
1270} 1260}
1271 1261
1272void 1262void
1273ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data) 1263ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
1274{ 1264{
1275 struct ip_conntrack_tuple_hash *h; 1265 struct ip_conntrack *ct;
1276 unsigned int bucket = 0; 1266 unsigned int bucket = 0;
1277 1267
1278 while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { 1268 while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
1279 struct ip_conntrack *ct = tuplehash_to_ctrack(h);
1280 /* Time to push up daises... */ 1269 /* Time to push up daises... */
1281 if (del_timer(&ct->timeout)) 1270 if (del_timer(&ct->timeout))
1282 death_by_timeout((unsigned long)ct); 1271 death_by_timeout((unsigned long)ct);
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index b020a33e65e9..fb0aee691721 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -20,11 +20,11 @@
20 * - We can only support one single call within each session 20 * - We can only support one single call within each session
21 * 21 *
22 * TODO: 22 * TODO:
23 * - testing of incoming PPTP calls 23 * - testing of incoming PPTP calls
24 * 24 *
25 * Changes: 25 * Changes:
26 * 2002-02-05 - Version 1.3 26 * 2002-02-05 - Version 1.3
27 * - Call ip_conntrack_unexpect_related() from 27 * - Call ip_conntrack_unexpect_related() from
28 * pptp_destroy_siblings() to destroy expectations in case 28 * pptp_destroy_siblings() to destroy expectations in case
29 * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen 29 * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen
30 * (Philip Craig <philipc@snapgear.com>) 30 * (Philip Craig <philipc@snapgear.com>)
@@ -80,7 +80,7 @@ int
80 struct PptpControlHeader *ctlh, 80 struct PptpControlHeader *ctlh,
81 union pptp_ctrl_union *pptpReq); 81 union pptp_ctrl_union *pptpReq);
82 82
83int 83void
84(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig, 84(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig,
85 struct ip_conntrack_expect *expect_reply); 85 struct ip_conntrack_expect *expect_reply);
86 86
@@ -141,7 +141,7 @@ static void pptp_expectfn(struct ip_conntrack *ct,
141 invert_tuplepr(&inv_t, &exp->tuple); 141 invert_tuplepr(&inv_t, &exp->tuple);
142 DEBUGP("trying to unexpect other dir: "); 142 DEBUGP("trying to unexpect other dir: ");
143 DUMP_TUPLE(&inv_t); 143 DUMP_TUPLE(&inv_t);
144 144
145 exp_other = ip_conntrack_expect_find(&inv_t); 145 exp_other = ip_conntrack_expect_find(&inv_t);
146 if (exp_other) { 146 if (exp_other) {
147 /* delete other expectation. */ 147 /* delete other expectation. */
@@ -194,15 +194,16 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct)
194{ 194{
195 struct ip_conntrack_tuple t; 195 struct ip_conntrack_tuple t;
196 196
197 /* Since ct->sibling_list has literally rusted away in 2.6.11, 197 ip_ct_gre_keymap_destroy(ct);
198 /* Since ct->sibling_list has literally rusted away in 2.6.11,
198 * we now need another way to find out about our sibling 199 * we now need another way to find out about our sibling
199 * contrack and expects... -HW */ 200 * contrack and expects... -HW */
200 201
201 /* try original (pns->pac) tuple */ 202 /* try original (pns->pac) tuple */
202 memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t)); 203 memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t));
203 t.dst.protonum = IPPROTO_GRE; 204 t.dst.protonum = IPPROTO_GRE;
204 t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); 205 t.src.u.gre.key = ct->help.ct_pptp_info.pns_call_id;
205 t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); 206 t.dst.u.gre.key = ct->help.ct_pptp_info.pac_call_id;
206 207
207 if (!destroy_sibling_or_exp(&t)) 208 if (!destroy_sibling_or_exp(&t))
208 DEBUGP("failed to timeout original pns->pac ct/exp\n"); 209 DEBUGP("failed to timeout original pns->pac ct/exp\n");
@@ -210,8 +211,8 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct)
210 /* try reply (pac->pns) tuple */ 211 /* try reply (pac->pns) tuple */
211 memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); 212 memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t));
212 t.dst.protonum = IPPROTO_GRE; 213 t.dst.protonum = IPPROTO_GRE;
213 t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); 214 t.src.u.gre.key = ct->help.ct_pptp_info.pac_call_id;
214 t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); 215 t.dst.u.gre.key = ct->help.ct_pptp_info.pns_call_id;
215 216
216 if (!destroy_sibling_or_exp(&t)) 217 if (!destroy_sibling_or_exp(&t))
217 DEBUGP("failed to timeout reply pac->pns ct/exp\n"); 218 DEBUGP("failed to timeout reply pac->pns ct/exp\n");
@@ -219,94 +220,63 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct)
219 220
220/* expect GRE connections (PNS->PAC and PAC->PNS direction) */ 221/* expect GRE connections (PNS->PAC and PAC->PNS direction) */
221static inline int 222static inline int
222exp_gre(struct ip_conntrack *master, 223exp_gre(struct ip_conntrack *ct,
223 u_int32_t seq,
224 __be16 callid, 224 __be16 callid,
225 __be16 peer_callid) 225 __be16 peer_callid)
226{ 226{
227 struct ip_conntrack_tuple inv_tuple;
228 struct ip_conntrack_tuple exp_tuples[] = {
229 /* tuple in original direction, PNS->PAC */
230 { .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip,
231 .u = { .gre = { .key = peer_callid } }
232 },
233 .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip,
234 .u = { .gre = { .key = callid } },
235 .protonum = IPPROTO_GRE
236 },
237 },
238 /* tuple in reply direction, PAC->PNS */
239 { .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip,
240 .u = { .gre = { .key = callid } }
241 },
242 .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip,
243 .u = { .gre = { .key = peer_callid } },
244 .protonum = IPPROTO_GRE
245 },
246 }
247 };
248 struct ip_conntrack_expect *exp_orig, *exp_reply; 227 struct ip_conntrack_expect *exp_orig, *exp_reply;
249 int ret = 1; 228 int ret = 1;
250 229
251 exp_orig = ip_conntrack_expect_alloc(master); 230 exp_orig = ip_conntrack_expect_alloc(ct);
252 if (exp_orig == NULL) 231 if (exp_orig == NULL)
253 goto out; 232 goto out;
254 233
255 exp_reply = ip_conntrack_expect_alloc(master); 234 exp_reply = ip_conntrack_expect_alloc(ct);
256 if (exp_reply == NULL) 235 if (exp_reply == NULL)
257 goto out_put_orig; 236 goto out_put_orig;
258 237
259 memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple)); 238 /* original direction, PNS->PAC */
239 exp_orig->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
240 exp_orig->tuple.src.u.gre.key = peer_callid;
241 exp_orig->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
242 exp_orig->tuple.dst.u.gre.key = callid;
243 exp_orig->tuple.dst.protonum = IPPROTO_GRE;
260 244
261 exp_orig->mask.src.ip = 0xffffffff; 245 exp_orig->mask.src.ip = 0xffffffff;
262 exp_orig->mask.src.u.all = 0; 246 exp_orig->mask.src.u.all = 0;
263 exp_orig->mask.dst.u.all = 0;
264 exp_orig->mask.dst.u.gre.key = htons(0xffff); 247 exp_orig->mask.dst.u.gre.key = htons(0xffff);
265 exp_orig->mask.dst.ip = 0xffffffff; 248 exp_orig->mask.dst.ip = 0xffffffff;
266 exp_orig->mask.dst.protonum = 0xff; 249 exp_orig->mask.dst.protonum = 0xff;
267 250
268 exp_orig->master = master; 251 exp_orig->master = ct;
269 exp_orig->expectfn = pptp_expectfn; 252 exp_orig->expectfn = pptp_expectfn;
270 exp_orig->flags = 0; 253 exp_orig->flags = 0;
271 254
272 /* both expectations are identical apart from tuple */ 255 /* both expectations are identical apart from tuple */
273 memcpy(exp_reply, exp_orig, sizeof(*exp_reply)); 256 memcpy(exp_reply, exp_orig, sizeof(*exp_reply));
274 memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple));
275 257
276 if (ip_nat_pptp_hook_exp_gre) 258 /* reply direction, PAC->PNS */
277 ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); 259 exp_reply->tuple.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
278 else { 260 exp_reply->tuple.src.u.gre.key = callid;
279 261 exp_reply->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
280 DEBUGP("calling expect_related PNS->PAC"); 262 exp_reply->tuple.dst.u.gre.key = peer_callid;
281 DUMP_TUPLE(&exp_orig->tuple); 263 exp_reply->tuple.dst.protonum = IPPROTO_GRE;
282
283 if (ip_conntrack_expect_related(exp_orig) != 0) {
284 DEBUGP("cannot expect_related()\n");
285 goto out_put_both;
286 }
287 264
288 DEBUGP("calling expect_related PAC->PNS"); 265 if (ip_nat_pptp_hook_exp_gre)
289 DUMP_TUPLE(&exp_reply->tuple); 266 ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply);
290 267 if (ip_conntrack_expect_related(exp_orig) != 0)
291 if (ip_conntrack_expect_related(exp_reply) != 0) { 268 goto out_put_both;
292 DEBUGP("cannot expect_related()\n"); 269 if (ip_conntrack_expect_related(exp_reply) != 0)
293 goto out_unexpect_orig; 270 goto out_unexpect_orig;
294 } 271
295 272 /* Add GRE keymap entries */
296 /* Add GRE keymap entries */ 273 if (ip_ct_gre_keymap_add(ct, &exp_orig->tuple, 0) != 0)
297 if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) { 274 goto out_unexpect_both;
298 DEBUGP("cannot keymap_add() exp\n"); 275 if (ip_ct_gre_keymap_add(ct, &exp_reply->tuple, 1) != 0) {
299 goto out_unexpect_both; 276 ip_ct_gre_keymap_destroy(ct);
300 } 277 goto out_unexpect_both;
301
302 invert_tuplepr(&inv_tuple, &exp_reply->tuple);
303 if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) {
304 ip_ct_gre_keymap_destroy(master);
305 DEBUGP("cannot keymap_add() exp_inv\n");
306 goto out_unexpect_both;
307 }
308 ret = 0;
309 } 278 }
279 ret = 0;
310 280
311out_put_both: 281out_put_both:
312 ip_conntrack_expect_put(exp_reply); 282 ip_conntrack_expect_put(exp_reply);
@@ -322,73 +292,36 @@ out_unexpect_orig:
322 goto out_put_both; 292 goto out_put_both;
323} 293}
324 294
325static inline int 295static inline int
326pptp_inbound_pkt(struct sk_buff **pskb, 296pptp_inbound_pkt(struct sk_buff **pskb,
327 struct tcphdr *tcph, 297 struct PptpControlHeader *ctlh,
328 unsigned int nexthdr_off, 298 union pptp_ctrl_union *pptpReq,
329 unsigned int datalen, 299 unsigned int reqlen,
330 struct ip_conntrack *ct, 300 struct ip_conntrack *ct,
331 enum ip_conntrack_info ctinfo) 301 enum ip_conntrack_info ctinfo)
332{ 302{
333 struct PptpControlHeader _ctlh, *ctlh;
334 unsigned int reqlen;
335 union pptp_ctrl_union _pptpReq, *pptpReq;
336 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; 303 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
337 u_int16_t msg; 304 u_int16_t msg;
338 __be16 *cid, *pcid; 305 __be16 cid = 0, pcid = 0;
339 u_int32_t seq;
340
341 ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
342 if (!ctlh) {
343 DEBUGP("error during skb_header_pointer\n");
344 return NF_ACCEPT;
345 }
346 nexthdr_off += sizeof(_ctlh);
347 datalen -= sizeof(_ctlh);
348
349 reqlen = datalen;
350 if (reqlen > sizeof(*pptpReq))
351 reqlen = sizeof(*pptpReq);
352 pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
353 if (!pptpReq) {
354 DEBUGP("error during skb_header_pointer\n");
355 return NF_ACCEPT;
356 }
357 306
358 msg = ntohs(ctlh->messageType); 307 msg = ntohs(ctlh->messageType);
359 DEBUGP("inbound control message %s\n", pptp_msg_name[msg]); 308 DEBUGP("inbound control message %s\n", pptp_msg_name[msg]);
360 309
361 switch (msg) { 310 switch (msg) {
362 case PPTP_START_SESSION_REPLY: 311 case PPTP_START_SESSION_REPLY:
363 if (reqlen < sizeof(_pptpReq.srep)) {
364 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
365 break;
366 }
367
368 /* server confirms new control session */ 312 /* server confirms new control session */
369 if (info->sstate < PPTP_SESSION_REQUESTED) { 313 if (info->sstate < PPTP_SESSION_REQUESTED)
370 DEBUGP("%s without START_SESS_REQUEST\n", 314 goto invalid;
371 pptp_msg_name[msg]);
372 break;
373 }
374 if (pptpReq->srep.resultCode == PPTP_START_OK) 315 if (pptpReq->srep.resultCode == PPTP_START_OK)
375 info->sstate = PPTP_SESSION_CONFIRMED; 316 info->sstate = PPTP_SESSION_CONFIRMED;
376 else 317 else
377 info->sstate = PPTP_SESSION_ERROR; 318 info->sstate = PPTP_SESSION_ERROR;
378 break; 319 break;
379 320
380 case PPTP_STOP_SESSION_REPLY: 321 case PPTP_STOP_SESSION_REPLY:
381 if (reqlen < sizeof(_pptpReq.strep)) {
382 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
383 break;
384 }
385
386 /* server confirms end of control session */ 322 /* server confirms end of control session */
387 if (info->sstate > PPTP_SESSION_STOPREQ) { 323 if (info->sstate > PPTP_SESSION_STOPREQ)
388 DEBUGP("%s without STOP_SESS_REQUEST\n", 324 goto invalid;
389 pptp_msg_name[msg]);
390 break;
391 }
392 if (pptpReq->strep.resultCode == PPTP_STOP_OK) 325 if (pptpReq->strep.resultCode == PPTP_STOP_OK)
393 info->sstate = PPTP_SESSION_NONE; 326 info->sstate = PPTP_SESSION_NONE;
394 else 327 else
@@ -396,116 +329,64 @@ pptp_inbound_pkt(struct sk_buff **pskb,
396 break; 329 break;
397 330
398 case PPTP_OUT_CALL_REPLY: 331 case PPTP_OUT_CALL_REPLY:
399 if (reqlen < sizeof(_pptpReq.ocack)) {
400 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
401 break;
402 }
403
404 /* server accepted call, we now expect GRE frames */ 332 /* server accepted call, we now expect GRE frames */
405 if (info->sstate != PPTP_SESSION_CONFIRMED) { 333 if (info->sstate != PPTP_SESSION_CONFIRMED)
406 DEBUGP("%s but no session\n", pptp_msg_name[msg]); 334 goto invalid;
407 break;
408 }
409 if (info->cstate != PPTP_CALL_OUT_REQ && 335 if (info->cstate != PPTP_CALL_OUT_REQ &&
410 info->cstate != PPTP_CALL_OUT_CONF) { 336 info->cstate != PPTP_CALL_OUT_CONF)
411 DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]); 337 goto invalid;
412 break; 338
413 } 339 cid = pptpReq->ocack.callID;
414 if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) { 340 pcid = pptpReq->ocack.peersCallID;
341 if (info->pns_call_id != pcid)
342 goto invalid;
343 DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
344 ntohs(cid), ntohs(pcid));
345
346 if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) {
347 info->cstate = PPTP_CALL_OUT_CONF;
348 info->pac_call_id = cid;
349 exp_gre(ct, cid, pcid);
350 } else
415 info->cstate = PPTP_CALL_NONE; 351 info->cstate = PPTP_CALL_NONE;
416 break;
417 }
418
419 cid = &pptpReq->ocack.callID;
420 pcid = &pptpReq->ocack.peersCallID;
421
422 info->pac_call_id = ntohs(*cid);
423
424 if (htons(info->pns_call_id) != *pcid) {
425 DEBUGP("%s for unknown callid %u\n",
426 pptp_msg_name[msg], ntohs(*pcid));
427 break;
428 }
429
430 DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
431 ntohs(*cid), ntohs(*pcid));
432
433 info->cstate = PPTP_CALL_OUT_CONF;
434
435 seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
436 + sizeof(struct PptpControlHeader)
437 + ((void *)pcid - (void *)pptpReq);
438
439 if (exp_gre(ct, seq, *cid, *pcid) != 0)
440 printk("ip_conntrack_pptp: error during exp_gre\n");
441 break; 352 break;
442 353
443 case PPTP_IN_CALL_REQUEST: 354 case PPTP_IN_CALL_REQUEST:
444 if (reqlen < sizeof(_pptpReq.icack)) {
445 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
446 break;
447 }
448
449 /* server tells us about incoming call request */ 355 /* server tells us about incoming call request */
450 if (info->sstate != PPTP_SESSION_CONFIRMED) { 356 if (info->sstate != PPTP_SESSION_CONFIRMED)
451 DEBUGP("%s but no session\n", pptp_msg_name[msg]); 357 goto invalid;
452 break; 358
453 } 359 cid = pptpReq->icreq.callID;
454 pcid = &pptpReq->icack.peersCallID; 360 DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
455 DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
456 info->cstate = PPTP_CALL_IN_REQ; 361 info->cstate = PPTP_CALL_IN_REQ;
457 info->pac_call_id = ntohs(*pcid); 362 info->pac_call_id = cid;
458 break; 363 break;
459 364
460 case PPTP_IN_CALL_CONNECT: 365 case PPTP_IN_CALL_CONNECT:
461 if (reqlen < sizeof(_pptpReq.iccon)) {
462 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
463 break;
464 }
465
466 /* server tells us about incoming call established */ 366 /* server tells us about incoming call established */
467 if (info->sstate != PPTP_SESSION_CONFIRMED) { 367 if (info->sstate != PPTP_SESSION_CONFIRMED)
468 DEBUGP("%s but no session\n", pptp_msg_name[msg]); 368 goto invalid;
469 break; 369 if (info->cstate != PPTP_CALL_IN_REP &&
470 } 370 info->cstate != PPTP_CALL_IN_CONF)
471 if (info->cstate != PPTP_CALL_IN_REP 371 goto invalid;
472 && info->cstate != PPTP_CALL_IN_CONF) {
473 DEBUGP("%s but never sent IN_CALL_REPLY\n",
474 pptp_msg_name[msg]);
475 break;
476 }
477 372
478 pcid = &pptpReq->iccon.peersCallID; 373 pcid = pptpReq->iccon.peersCallID;
479 cid = &info->pac_call_id; 374 cid = info->pac_call_id;
480 375
481 if (info->pns_call_id != ntohs(*pcid)) { 376 if (info->pns_call_id != pcid)
482 DEBUGP("%s for unknown CallID %u\n", 377 goto invalid;
483 pptp_msg_name[msg], ntohs(*pcid));
484 break;
485 }
486 378
487 DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); 379 DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid));
488 info->cstate = PPTP_CALL_IN_CONF; 380 info->cstate = PPTP_CALL_IN_CONF;
489 381
490 /* we expect a GRE connection from PAC to PNS */ 382 /* we expect a GRE connection from PAC to PNS */
491 seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) 383 exp_gre(ct, cid, pcid);
492 + sizeof(struct PptpControlHeader)
493 + ((void *)pcid - (void *)pptpReq);
494
495 if (exp_gre(ct, seq, *cid, *pcid) != 0)
496 printk("ip_conntrack_pptp: error during exp_gre\n");
497
498 break; 384 break;
499 385
500 case PPTP_CALL_DISCONNECT_NOTIFY: 386 case PPTP_CALL_DISCONNECT_NOTIFY:
501 if (reqlen < sizeof(_pptpReq.disc)) {
502 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
503 break;
504 }
505
506 /* server confirms disconnect */ 387 /* server confirms disconnect */
507 cid = &pptpReq->disc.callID; 388 cid = pptpReq->disc.callID;
508 DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); 389 DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
509 info->cstate = PPTP_CALL_NONE; 390 info->cstate = PPTP_CALL_NONE;
510 391
511 /* untrack this call id, unexpect GRE packets */ 392 /* untrack this call id, unexpect GRE packets */
@@ -513,54 +394,39 @@ pptp_inbound_pkt(struct sk_buff **pskb,
513 break; 394 break;
514 395
515 case PPTP_WAN_ERROR_NOTIFY: 396 case PPTP_WAN_ERROR_NOTIFY:
516 break;
517
518 case PPTP_ECHO_REQUEST: 397 case PPTP_ECHO_REQUEST:
519 case PPTP_ECHO_REPLY: 398 case PPTP_ECHO_REPLY:
520 /* I don't have to explain these ;) */ 399 /* I don't have to explain these ;) */
521 break; 400 break;
522 default: 401 default:
523 DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX) 402 goto invalid;
524 ? pptp_msg_name[msg]:pptp_msg_name[0], msg);
525 break;
526 } 403 }
527 404
528
529 if (ip_nat_pptp_hook_inbound) 405 if (ip_nat_pptp_hook_inbound)
530 return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh, 406 return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh,
531 pptpReq); 407 pptpReq);
532
533 return NF_ACCEPT; 408 return NF_ACCEPT;
534 409
410invalid:
411 DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
412 "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
413 msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
414 msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate,
415 ntohs(info->pns_call_id), ntohs(info->pac_call_id));
416 return NF_ACCEPT;
535} 417}
536 418
537static inline int 419static inline int
538pptp_outbound_pkt(struct sk_buff **pskb, 420pptp_outbound_pkt(struct sk_buff **pskb,
539 struct tcphdr *tcph, 421 struct PptpControlHeader *ctlh,
540 unsigned int nexthdr_off, 422 union pptp_ctrl_union *pptpReq,
541 unsigned int datalen, 423 unsigned int reqlen,
542 struct ip_conntrack *ct, 424 struct ip_conntrack *ct,
543 enum ip_conntrack_info ctinfo) 425 enum ip_conntrack_info ctinfo)
544{ 426{
545 struct PptpControlHeader _ctlh, *ctlh;
546 unsigned int reqlen;
547 union pptp_ctrl_union _pptpReq, *pptpReq;
548 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; 427 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
549 u_int16_t msg; 428 u_int16_t msg;
550 __be16 *cid, *pcid; 429 __be16 cid = 0, pcid = 0;
551
552 ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
553 if (!ctlh)
554 return NF_ACCEPT;
555 nexthdr_off += sizeof(_ctlh);
556 datalen -= sizeof(_ctlh);
557
558 reqlen = datalen;
559 if (reqlen > sizeof(*pptpReq))
560 reqlen = sizeof(*pptpReq);
561 pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
562 if (!pptpReq)
563 return NF_ACCEPT;
564 430
565 msg = ntohs(ctlh->messageType); 431 msg = ntohs(ctlh->messageType);
566 DEBUGP("outbound control message %s\n", pptp_msg_name[msg]); 432 DEBUGP("outbound control message %s\n", pptp_msg_name[msg]);
@@ -568,10 +434,8 @@ pptp_outbound_pkt(struct sk_buff **pskb,
568 switch (msg) { 434 switch (msg) {
569 case PPTP_START_SESSION_REQUEST: 435 case PPTP_START_SESSION_REQUEST:
570 /* client requests for new control session */ 436 /* client requests for new control session */
571 if (info->sstate != PPTP_SESSION_NONE) { 437 if (info->sstate != PPTP_SESSION_NONE)
572 DEBUGP("%s but we already have one", 438 goto invalid;
573 pptp_msg_name[msg]);
574 }
575 info->sstate = PPTP_SESSION_REQUESTED; 439 info->sstate = PPTP_SESSION_REQUESTED;
576 break; 440 break;
577 case PPTP_STOP_SESSION_REQUEST: 441 case PPTP_STOP_SESSION_REQUEST:
@@ -580,123 +444,115 @@ pptp_outbound_pkt(struct sk_buff **pskb,
580 break; 444 break;
581 445
582 case PPTP_OUT_CALL_REQUEST: 446 case PPTP_OUT_CALL_REQUEST:
583 if (reqlen < sizeof(_pptpReq.ocreq)) {
584 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
585 /* FIXME: break; */
586 }
587
588 /* client initiating connection to server */ 447 /* client initiating connection to server */
589 if (info->sstate != PPTP_SESSION_CONFIRMED) { 448 if (info->sstate != PPTP_SESSION_CONFIRMED)
590 DEBUGP("%s but no session\n", 449 goto invalid;
591 pptp_msg_name[msg]);
592 break;
593 }
594 info->cstate = PPTP_CALL_OUT_REQ; 450 info->cstate = PPTP_CALL_OUT_REQ;
595 /* track PNS call id */ 451 /* track PNS call id */
596 cid = &pptpReq->ocreq.callID; 452 cid = pptpReq->ocreq.callID;
597 DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); 453 DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
598 info->pns_call_id = ntohs(*cid); 454 info->pns_call_id = cid;
599 break; 455 break;
600 case PPTP_IN_CALL_REPLY: 456 case PPTP_IN_CALL_REPLY:
601 if (reqlen < sizeof(_pptpReq.icack)) {
602 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
603 break;
604 }
605
606 /* client answers incoming call */ 457 /* client answers incoming call */
607 if (info->cstate != PPTP_CALL_IN_REQ 458 if (info->cstate != PPTP_CALL_IN_REQ &&
608 && info->cstate != PPTP_CALL_IN_REP) { 459 info->cstate != PPTP_CALL_IN_REP)
609 DEBUGP("%s without incall_req\n", 460 goto invalid;
610 pptp_msg_name[msg]); 461
611 break; 462 cid = pptpReq->icack.callID;
612 } 463 pcid = pptpReq->icack.peersCallID;
613 if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) { 464 if (info->pac_call_id != pcid)
465 goto invalid;
466 DEBUGP("%s, CID=%X PCID=%X\n", pptp_msg_name[msg],
467 ntohs(cid), ntohs(pcid));
468
469 if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) {
470 /* part two of the three-way handshake */
471 info->cstate = PPTP_CALL_IN_REP;
472 info->pns_call_id = cid;
473 } else
614 info->cstate = PPTP_CALL_NONE; 474 info->cstate = PPTP_CALL_NONE;
615 break;
616 }
617 pcid = &pptpReq->icack.peersCallID;
618 if (info->pac_call_id != ntohs(*pcid)) {
619 DEBUGP("%s for unknown call %u\n",
620 pptp_msg_name[msg], ntohs(*pcid));
621 break;
622 }
623 DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
624 /* part two of the three-way handshake */
625 info->cstate = PPTP_CALL_IN_REP;
626 info->pns_call_id = ntohs(pptpReq->icack.callID);
627 break; 475 break;
628 476
629 case PPTP_CALL_CLEAR_REQUEST: 477 case PPTP_CALL_CLEAR_REQUEST:
630 /* client requests hangup of call */ 478 /* client requests hangup of call */
631 if (info->sstate != PPTP_SESSION_CONFIRMED) { 479 if (info->sstate != PPTP_SESSION_CONFIRMED)
632 DEBUGP("CLEAR_CALL but no session\n"); 480 goto invalid;
633 break;
634 }
635 /* FUTURE: iterate over all calls and check if 481 /* FUTURE: iterate over all calls and check if
636 * call ID is valid. We don't do this without newnat, 482 * call ID is valid. We don't do this without newnat,
637 * because we only know about last call */ 483 * because we only know about last call */
638 info->cstate = PPTP_CALL_CLEAR_REQ; 484 info->cstate = PPTP_CALL_CLEAR_REQ;
639 break; 485 break;
640 case PPTP_SET_LINK_INFO: 486 case PPTP_SET_LINK_INFO:
641 break;
642 case PPTP_ECHO_REQUEST: 487 case PPTP_ECHO_REQUEST:
643 case PPTP_ECHO_REPLY: 488 case PPTP_ECHO_REPLY:
644 /* I don't have to explain these ;) */ 489 /* I don't have to explain these ;) */
645 break; 490 break;
646 default: 491 default:
647 DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? 492 goto invalid;
648 pptp_msg_name[msg]:pptp_msg_name[0], msg);
649 /* unknown: no need to create GRE masq table entry */
650 break;
651 } 493 }
652 494
653 if (ip_nat_pptp_hook_outbound) 495 if (ip_nat_pptp_hook_outbound)
654 return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh, 496 return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh,
655 pptpReq); 497 pptpReq);
498 return NF_ACCEPT;
656 499
500invalid:
501 DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
502 "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
503 msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
504 msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate,
505 ntohs(info->pns_call_id), ntohs(info->pac_call_id));
657 return NF_ACCEPT; 506 return NF_ACCEPT;
658} 507}
659 508
509static const unsigned int pptp_msg_size[] = {
510 [PPTP_START_SESSION_REQUEST] = sizeof(struct PptpStartSessionRequest),
511 [PPTP_START_SESSION_REPLY] = sizeof(struct PptpStartSessionReply),
512 [PPTP_STOP_SESSION_REQUEST] = sizeof(struct PptpStopSessionRequest),
513 [PPTP_STOP_SESSION_REPLY] = sizeof(struct PptpStopSessionReply),
514 [PPTP_OUT_CALL_REQUEST] = sizeof(struct PptpOutCallRequest),
515 [PPTP_OUT_CALL_REPLY] = sizeof(struct PptpOutCallReply),
516 [PPTP_IN_CALL_REQUEST] = sizeof(struct PptpInCallRequest),
517 [PPTP_IN_CALL_REPLY] = sizeof(struct PptpInCallReply),
518 [PPTP_IN_CALL_CONNECT] = sizeof(struct PptpInCallConnected),
519 [PPTP_CALL_CLEAR_REQUEST] = sizeof(struct PptpClearCallRequest),
520 [PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify),
521 [PPTP_WAN_ERROR_NOTIFY] = sizeof(struct PptpWanErrorNotify),
522 [PPTP_SET_LINK_INFO] = sizeof(struct PptpSetLinkInfo),
523};
660 524
661/* track caller id inside control connection, call expect_related */ 525/* track caller id inside control connection, call expect_related */
662static int 526static int
663conntrack_pptp_help(struct sk_buff **pskb, 527conntrack_pptp_help(struct sk_buff **pskb,
664 struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) 528 struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
665 529
666{ 530{
667 struct pptp_pkt_hdr _pptph, *pptph;
668 struct tcphdr _tcph, *tcph;
669 u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
670 u_int32_t datalen;
671 int dir = CTINFO2DIR(ctinfo); 531 int dir = CTINFO2DIR(ctinfo);
672 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; 532 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
673 unsigned int nexthdr_off; 533 struct tcphdr _tcph, *tcph;
674 534 struct pptp_pkt_hdr _pptph, *pptph;
535 struct PptpControlHeader _ctlh, *ctlh;
536 union pptp_ctrl_union _pptpReq, *pptpReq;
537 unsigned int tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
538 unsigned int datalen, reqlen, nexthdr_off;
675 int oldsstate, oldcstate; 539 int oldsstate, oldcstate;
676 int ret; 540 int ret;
541 u_int16_t msg;
677 542
678 /* don't do any tracking before tcp handshake complete */ 543 /* don't do any tracking before tcp handshake complete */
679 if (ctinfo != IP_CT_ESTABLISHED 544 if (ctinfo != IP_CT_ESTABLISHED
680 && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { 545 && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
681 DEBUGP("ctinfo = %u, skipping\n", ctinfo); 546 DEBUGP("ctinfo = %u, skipping\n", ctinfo);
682 return NF_ACCEPT; 547 return NF_ACCEPT;
683 } 548 }
684 549
685 nexthdr_off = (*pskb)->nh.iph->ihl*4; 550 nexthdr_off = (*pskb)->nh.iph->ihl*4;
686 tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph); 551 tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
687 BUG_ON(!tcph); 552 BUG_ON(!tcph);
688 nexthdr_off += tcph->doff * 4; 553 nexthdr_off += tcph->doff * 4;
689 datalen = tcplen - tcph->doff * 4; 554 datalen = tcplen - tcph->doff * 4;
690 555
691 if (tcph->fin || tcph->rst) {
692 DEBUGP("RST/FIN received, timeouting GRE\n");
693 /* can't do this after real newnat */
694 info->cstate = PPTP_CALL_NONE;
695
696 /* untrack this call id, unexpect GRE packets */
697 pptp_destroy_siblings(ct);
698 }
699
700 pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph); 556 pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
701 if (!pptph) { 557 if (!pptph) {
702 DEBUGP("no full PPTP header, can't track\n"); 558 DEBUGP("no full PPTP header, can't track\n");
@@ -712,6 +568,23 @@ conntrack_pptp_help(struct sk_buff **pskb,
712 return NF_ACCEPT; 568 return NF_ACCEPT;
713 } 569 }
714 570
571 ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
572 if (!ctlh)
573 return NF_ACCEPT;
574 nexthdr_off += sizeof(_ctlh);
575 datalen -= sizeof(_ctlh);
576
577 reqlen = datalen;
578 msg = ntohs(ctlh->messageType);
579 if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg])
580 return NF_ACCEPT;
581 if (reqlen > sizeof(*pptpReq))
582 reqlen = sizeof(*pptpReq);
583
584 pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
585 if (!pptpReq)
586 return NF_ACCEPT;
587
715 oldsstate = info->sstate; 588 oldsstate = info->sstate;
716 oldcstate = info->cstate; 589 oldcstate = info->cstate;
717 590
@@ -721,11 +594,11 @@ conntrack_pptp_help(struct sk_buff **pskb,
721 * established from PNS->PAC. However, RFC makes no guarantee */ 594 * established from PNS->PAC. However, RFC makes no guarantee */
722 if (dir == IP_CT_DIR_ORIGINAL) 595 if (dir == IP_CT_DIR_ORIGINAL)
723 /* client -> server (PNS -> PAC) */ 596 /* client -> server (PNS -> PAC) */
724 ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, 597 ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
725 ctinfo); 598 ctinfo);
726 else 599 else
727 /* server -> client (PAC -> PNS) */ 600 /* server -> client (PAC -> PNS) */
728 ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, 601 ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
729 ctinfo); 602 ctinfo);
730 DEBUGP("sstate: %d->%d, cstate: %d->%d\n", 603 DEBUGP("sstate: %d->%d, cstate: %d->%d\n",
731 oldsstate, info->sstate, oldcstate, info->cstate); 604 oldsstate, info->sstate, oldcstate, info->cstate);
@@ -735,30 +608,31 @@ conntrack_pptp_help(struct sk_buff **pskb,
735} 608}
736 609
737/* control protocol helper */ 610/* control protocol helper */
738static struct ip_conntrack_helper pptp = { 611static struct ip_conntrack_helper pptp = {
739 .list = { NULL, NULL }, 612 .list = { NULL, NULL },
740 .name = "pptp", 613 .name = "pptp",
741 .me = THIS_MODULE, 614 .me = THIS_MODULE,
742 .max_expected = 2, 615 .max_expected = 2,
743 .timeout = 5 * 60, 616 .timeout = 5 * 60,
744 .tuple = { .src = { .ip = 0, 617 .tuple = { .src = { .ip = 0,
745 .u = { .tcp = { .port = 618 .u = { .tcp = { .port =
746 __constant_htons(PPTP_CONTROL_PORT) } } 619 __constant_htons(PPTP_CONTROL_PORT) } }
747 }, 620 },
748 .dst = { .ip = 0, 621 .dst = { .ip = 0,
749 .u = { .all = 0 }, 622 .u = { .all = 0 },
750 .protonum = IPPROTO_TCP 623 .protonum = IPPROTO_TCP
751 } 624 }
752 }, 625 },
753 .mask = { .src = { .ip = 0, 626 .mask = { .src = { .ip = 0,
754 .u = { .tcp = { .port = __constant_htons(0xffff) } } 627 .u = { .tcp = { .port = __constant_htons(0xffff) } }
755 }, 628 },
756 .dst = { .ip = 0, 629 .dst = { .ip = 0,
757 .u = { .all = 0 }, 630 .u = { .all = 0 },
758 .protonum = 0xff 631 .protonum = 0xff
759 } 632 }
760 }, 633 },
761 .help = conntrack_pptp_help 634 .help = conntrack_pptp_help,
635 .destroy = pptp_destroy_siblings,
762}; 636};
763 637
764extern void ip_ct_proto_gre_fini(void); 638extern void ip_ct_proto_gre_fini(void);
@@ -768,7 +642,7 @@ extern int __init ip_ct_proto_gre_init(void);
768static int __init ip_conntrack_helper_pptp_init(void) 642static int __init ip_conntrack_helper_pptp_init(void)
769{ 643{
770 int retcode; 644 int retcode;
771 645
772 retcode = ip_ct_proto_gre_init(); 646 retcode = ip_ct_proto_gre_init();
773 if (retcode < 0) 647 if (retcode < 0)
774 return retcode; 648 return retcode;
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
index a566a81325b2..3d0b438783db 100644
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
@@ -21,6 +21,7 @@
21#include <linux/skbuff.h> 21#include <linux/skbuff.h>
22#include <linux/netdevice.h> 22#include <linux/netdevice.h>
23#include <linux/inetdevice.h> 23#include <linux/inetdevice.h>
24#include <linux/if_addr.h>
24#include <linux/in.h> 25#include <linux/in.h>
25#include <linux/ip.h> 26#include <linux/ip.h>
26#include <net/route.h> 27#include <net/route.h>
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 0d4cc92391fa..52eddea27e93 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -329,11 +329,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
329 /* dump everything */ 329 /* dump everything */
330 events = ~0UL; 330 events = ~0UL;
331 group = NFNLGRP_CONNTRACK_NEW; 331 group = NFNLGRP_CONNTRACK_NEW;
332 } else if (events & (IPCT_STATUS | 332 } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) {
333 IPCT_PROTOINFO |
334 IPCT_HELPER |
335 IPCT_HELPINFO |
336 IPCT_NATINFO)) {
337 type = IPCTNL_MSG_CT_NEW; 333 type = IPCTNL_MSG_CT_NEW;
338 group = NFNLGRP_CONNTRACK_UPDATE; 334 group = NFNLGRP_CONNTRACK_UPDATE;
339 } else 335 } else
@@ -385,6 +381,10 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
385 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) 381 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
386 goto nfattr_failure; 382 goto nfattr_failure;
387 383
384 if (events & IPCT_MARK
385 && ctnetlink_dump_mark(skb, ct) < 0)
386 goto nfattr_failure;
387
388 nlh->nlmsg_len = skb->tail - b; 388 nlh->nlmsg_len = skb->tail - b;
389 nfnetlink_send(skb, 0, group, 0); 389 nfnetlink_send(skb, 0, group, 0);
390 return NOTIFY_DONE; 390 return NOTIFY_DONE;
@@ -436,6 +436,11 @@ restart:
436 cb->args[1] = (unsigned long)ct; 436 cb->args[1] = (unsigned long)ct;
437 goto out; 437 goto out;
438 } 438 }
439#ifdef CONFIG_NF_CT_ACCT
440 if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) ==
441 IPCTNL_MSG_CT_GET_CTRZERO)
442 memset(&ct->counters, 0, sizeof(ct->counters));
443#endif
439 } 444 }
440 if (cb->args[1]) { 445 if (cb->args[1]) {
441 cb->args[1] = 0; 446 cb->args[1] = 0;
@@ -451,46 +456,6 @@ out:
451 return skb->len; 456 return skb->len;
452} 457}
453 458
454#ifdef CONFIG_IP_NF_CT_ACCT
455static int
456ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb)
457{
458 struct ip_conntrack *ct = NULL;
459 struct ip_conntrack_tuple_hash *h;
460 struct list_head *i;
461 u_int32_t *id = (u_int32_t *) &cb->args[1];
462
463 DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__,
464 cb->args[0], *id);
465
466 write_lock_bh(&ip_conntrack_lock);
467 for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) {
468 list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
469 h = (struct ip_conntrack_tuple_hash *) i;
470 if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
471 continue;
472 ct = tuplehash_to_ctrack(h);
473 if (ct->id <= *id)
474 continue;
475 if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
476 cb->nlh->nlmsg_seq,
477 IPCTNL_MSG_CT_NEW,
478 1, ct) < 0)
479 goto out;
480 *id = ct->id;
481
482 memset(&ct->counters, 0, sizeof(ct->counters));
483 }
484 }
485out:
486 write_unlock_bh(&ip_conntrack_lock);
487
488 DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
489
490 return skb->len;
491}
492#endif
493
494static const size_t cta_min_ip[CTA_IP_MAX] = { 459static const size_t cta_min_ip[CTA_IP_MAX] = {
495 [CTA_IP_V4_SRC-1] = sizeof(u_int32_t), 460 [CTA_IP_V4_SRC-1] = sizeof(u_int32_t),
496 [CTA_IP_V4_DST-1] = sizeof(u_int32_t), 461 [CTA_IP_V4_DST-1] = sizeof(u_int32_t),
@@ -775,22 +740,14 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
775 if (msg->nfgen_family != AF_INET) 740 if (msg->nfgen_family != AF_INET)
776 return -EAFNOSUPPORT; 741 return -EAFNOSUPPORT;
777 742
778 if (NFNL_MSG_TYPE(nlh->nlmsg_type) == 743#ifndef CONFIG_IP_NF_CT_ACCT
779 IPCTNL_MSG_CT_GET_CTRZERO) { 744 if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO)
780#ifdef CONFIG_IP_NF_CT_ACCT
781 if ((*errp = netlink_dump_start(ctnl, skb, nlh,
782 ctnetlink_dump_table_w,
783 ctnetlink_done)) != 0)
784 return -EINVAL;
785#else
786 return -ENOTSUPP; 745 return -ENOTSUPP;
787#endif 746#endif
788 } else { 747 if ((*errp = netlink_dump_start(ctnl, skb, nlh,
789 if ((*errp = netlink_dump_start(ctnl, skb, nlh, 748 ctnetlink_dump_table,
790 ctnetlink_dump_table, 749 ctnetlink_done)) != 0)
791 ctnetlink_done)) != 0)
792 return -EINVAL; 750 return -EINVAL;
793 }
794 751
795 rlen = NLMSG_ALIGN(nlh->nlmsg_len); 752 rlen = NLMSG_ALIGN(nlh->nlmsg_len);
796 if (rlen > skb->len) 753 if (rlen > skb->len)
@@ -1253,6 +1210,9 @@ static int ctnetlink_expect_event(struct notifier_block *this,
1253 } else 1210 } else
1254 return NOTIFY_DONE; 1211 return NOTIFY_DONE;
1255 1212
1213 if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
1214 return NOTIFY_DONE;
1215
1256 skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); 1216 skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
1257 if (!skb) 1217 if (!skb)
1258 return NOTIFY_DONE; 1218 return NOTIFY_DONE;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
index f891308b5e4c..36f2b5e5d80a 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
@@ -12,7 +12,7 @@
12#include <linux/netfilter.h> 12#include <linux/netfilter.h>
13#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 13#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
14 14
15unsigned int ip_ct_generic_timeout = 600*HZ; 15unsigned int ip_ct_generic_timeout __read_mostly = 600*HZ;
16 16
17static int generic_pkt_to_tuple(const struct sk_buff *skb, 17static int generic_pkt_to_tuple(const struct sk_buff *skb,
18 unsigned int dataoff, 18 unsigned int dataoff,
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
index 4ee016c427b4..5fe026f467d3 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -1,15 +1,15 @@
1/* 1/*
2 * ip_conntrack_proto_gre.c - Version 3.0 2 * ip_conntrack_proto_gre.c - Version 3.0
3 * 3 *
4 * Connection tracking protocol helper module for GRE. 4 * Connection tracking protocol helper module for GRE.
5 * 5 *
6 * GRE is a generic encapsulation protocol, which is generally not very 6 * GRE is a generic encapsulation protocol, which is generally not very
7 * suited for NAT, as it has no protocol-specific part as port numbers. 7 * suited for NAT, as it has no protocol-specific part as port numbers.
8 * 8 *
9 * It has an optional key field, which may help us distinguishing two 9 * It has an optional key field, which may help us distinguishing two
10 * connections between the same two hosts. 10 * connections between the same two hosts.
11 * 11 *
12 * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 12 * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
13 * 13 *
14 * PPTP is built on top of a modified version of GRE, and has a mandatory 14 * PPTP is built on top of a modified version of GRE, and has a mandatory
15 * field called "CallID", which serves us for the same purpose as the key 15 * field called "CallID", which serves us for the same purpose as the key
@@ -37,7 +37,6 @@ static DEFINE_RWLOCK(ip_ct_gre_lock);
37#define ASSERT_READ_LOCK(x) 37#define ASSERT_READ_LOCK(x)
38#define ASSERT_WRITE_LOCK(x) 38#define ASSERT_WRITE_LOCK(x)
39 39
40#include <linux/netfilter_ipv4/listhelp.h>
41#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 40#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
42#include <linux/netfilter_ipv4/ip_conntrack_helper.h> 41#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
43#include <linux/netfilter_ipv4/ip_conntrack_core.h> 42#include <linux/netfilter_ipv4/ip_conntrack_core.h>
@@ -62,7 +61,7 @@ MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE");
62#define DEBUGP(x, args...) 61#define DEBUGP(x, args...)
63#define DUMP_TUPLE_GRE(x) 62#define DUMP_TUPLE_GRE(x)
64#endif 63#endif
65 64
66/* GRE KEYMAP HANDLING FUNCTIONS */ 65/* GRE KEYMAP HANDLING FUNCTIONS */
67static LIST_HEAD(gre_keymap_list); 66static LIST_HEAD(gre_keymap_list);
68 67
@@ -82,12 +81,14 @@ static __be16 gre_keymap_lookup(struct ip_conntrack_tuple *t)
82 __be16 key = 0; 81 __be16 key = 0;
83 82
84 read_lock_bh(&ip_ct_gre_lock); 83 read_lock_bh(&ip_ct_gre_lock);
85 km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, 84 list_for_each_entry(km, &gre_keymap_list, list) {
86 struct ip_ct_gre_keymap *, t); 85 if (gre_key_cmpfn(km, t)) {
87 if (km) 86 key = km->tuple.src.u.gre.key;
88 key = km->tuple.src.u.gre.key; 87 break;
88 }
89 }
89 read_unlock_bh(&ip_ct_gre_lock); 90 read_unlock_bh(&ip_ct_gre_lock);
90 91
91 DEBUGP("lookup src key 0x%x up key for ", key); 92 DEBUGP("lookup src key 0x%x up key for ", key);
92 DUMP_TUPLE_GRE(t); 93 DUMP_TUPLE_GRE(t);
93 94
@@ -99,28 +100,25 @@ int
99ip_ct_gre_keymap_add(struct ip_conntrack *ct, 100ip_ct_gre_keymap_add(struct ip_conntrack *ct,
100 struct ip_conntrack_tuple *t, int reply) 101 struct ip_conntrack_tuple *t, int reply)
101{ 102{
102 struct ip_ct_gre_keymap **exist_km, *km, *old; 103 struct ip_ct_gre_keymap **exist_km, *km;
103 104
104 if (!ct->helper || strcmp(ct->helper->name, "pptp")) { 105 if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
105 DEBUGP("refusing to add GRE keymap to non-pptp session\n"); 106 DEBUGP("refusing to add GRE keymap to non-pptp session\n");
106 return -1; 107 return -1;
107 } 108 }
108 109
109 if (!reply) 110 if (!reply)
110 exist_km = &ct->help.ct_pptp_info.keymap_orig; 111 exist_km = &ct->help.ct_pptp_info.keymap_orig;
111 else 112 else
112 exist_km = &ct->help.ct_pptp_info.keymap_reply; 113 exist_km = &ct->help.ct_pptp_info.keymap_reply;
113 114
114 if (*exist_km) { 115 if (*exist_km) {
115 /* check whether it's a retransmission */ 116 /* check whether it's a retransmission */
116 old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, 117 list_for_each_entry(km, &gre_keymap_list, list) {
117 struct ip_ct_gre_keymap *, t); 118 if (gre_key_cmpfn(km, t) && km == *exist_km)
118 if (old == *exist_km) { 119 return 0;
119 DEBUGP("retransmission\n");
120 return 0;
121 } 120 }
122 121 DEBUGP("trying to override keymap_%s for ct %p\n",
123 DEBUGP("trying to override keymap_%s for ct %p\n",
124 reply? "reply":"orig", ct); 122 reply? "reply":"orig", ct);
125 return -EEXIST; 123 return -EEXIST;
126 } 124 }
@@ -136,7 +134,7 @@ ip_ct_gre_keymap_add(struct ip_conntrack *ct,
136 DUMP_TUPLE_GRE(&km->tuple); 134 DUMP_TUPLE_GRE(&km->tuple);
137 135
138 write_lock_bh(&ip_ct_gre_lock); 136 write_lock_bh(&ip_ct_gre_lock);
139 list_append(&gre_keymap_list, km); 137 list_add_tail(&km->list, &gre_keymap_list);
140 write_unlock_bh(&ip_ct_gre_lock); 138 write_unlock_bh(&ip_ct_gre_lock);
141 139
142 return 0; 140 return 0;
@@ -154,7 +152,7 @@ void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct)
154 152
155 write_lock_bh(&ip_ct_gre_lock); 153 write_lock_bh(&ip_ct_gre_lock);
156 if (ct->help.ct_pptp_info.keymap_orig) { 154 if (ct->help.ct_pptp_info.keymap_orig) {
157 DEBUGP("removing %p from list\n", 155 DEBUGP("removing %p from list\n",
158 ct->help.ct_pptp_info.keymap_orig); 156 ct->help.ct_pptp_info.keymap_orig);
159 list_del(&ct->help.ct_pptp_info.keymap_orig->list); 157 list_del(&ct->help.ct_pptp_info.keymap_orig->list);
160 kfree(ct->help.ct_pptp_info.keymap_orig); 158 kfree(ct->help.ct_pptp_info.keymap_orig);
@@ -222,7 +220,7 @@ static int gre_pkt_to_tuple(const struct sk_buff *skb,
222static int gre_print_tuple(struct seq_file *s, 220static int gre_print_tuple(struct seq_file *s,
223 const struct ip_conntrack_tuple *tuple) 221 const struct ip_conntrack_tuple *tuple)
224{ 222{
225 return seq_printf(s, "srckey=0x%x dstkey=0x%x ", 223 return seq_printf(s, "srckey=0x%x dstkey=0x%x ",
226 ntohs(tuple->src.u.gre.key), 224 ntohs(tuple->src.u.gre.key),
227 ntohs(tuple->dst.u.gre.key)); 225 ntohs(tuple->dst.u.gre.key));
228} 226}
@@ -252,14 +250,14 @@ static int gre_packet(struct ip_conntrack *ct,
252 } else 250 } else
253 ip_ct_refresh_acct(ct, conntrackinfo, skb, 251 ip_ct_refresh_acct(ct, conntrackinfo, skb,
254 ct->proto.gre.timeout); 252 ct->proto.gre.timeout);
255 253
256 return NF_ACCEPT; 254 return NF_ACCEPT;
257} 255}
258 256
259/* Called when a new connection for this protocol found. */ 257/* Called when a new connection for this protocol found. */
260static int gre_new(struct ip_conntrack *ct, 258static int gre_new(struct ip_conntrack *ct,
261 const struct sk_buff *skb) 259 const struct sk_buff *skb)
262{ 260{
263 DEBUGP(": "); 261 DEBUGP(": ");
264 DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 262 DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
265 263
@@ -285,9 +283,9 @@ static void gre_destroy(struct ip_conntrack *ct)
285} 283}
286 284
287/* protocol helper struct */ 285/* protocol helper struct */
288static struct ip_conntrack_protocol gre = { 286static struct ip_conntrack_protocol gre = {
289 .proto = IPPROTO_GRE, 287 .proto = IPPROTO_GRE,
290 .name = "gre", 288 .name = "gre",
291 .pkt_to_tuple = gre_pkt_to_tuple, 289 .pkt_to_tuple = gre_pkt_to_tuple,
292 .invert_tuple = gre_invert_tuple, 290 .invert_tuple = gre_invert_tuple,
293 .print_tuple = gre_print_tuple, 291 .print_tuple = gre_print_tuple,
@@ -325,7 +323,7 @@ void ip_ct_proto_gre_fini(void)
325 } 323 }
326 write_unlock_bh(&ip_ct_gre_lock); 324 write_unlock_bh(&ip_ct_gre_lock);
327 325
328 ip_conntrack_protocol_unregister(&gre); 326 ip_conntrack_protocol_unregister(&gre);
329} 327}
330 328
331EXPORT_SYMBOL(ip_ct_gre_keymap_add); 329EXPORT_SYMBOL(ip_ct_gre_keymap_add);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 23f1c504586d..09c40ebe3345 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -21,7 +21,7 @@
21#include <linux/netfilter_ipv4/ip_conntrack_core.h> 21#include <linux/netfilter_ipv4/ip_conntrack_core.h>
22#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 22#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
23 23
24unsigned int ip_ct_icmp_timeout = 30*HZ; 24unsigned int ip_ct_icmp_timeout __read_mostly = 30*HZ;
25 25
26#if 0 26#if 0
27#define DEBUGP printk 27#define DEBUGP printk
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index 2d3612cd5f18..b908a4842e18 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -58,13 +58,13 @@ static const char *sctp_conntrack_names[] = {
58#define HOURS * 60 MINS 58#define HOURS * 60 MINS
59#define DAYS * 24 HOURS 59#define DAYS * 24 HOURS
60 60
61static unsigned int ip_ct_sctp_timeout_closed = 10 SECS; 61static unsigned int ip_ct_sctp_timeout_closed __read_mostly = 10 SECS;
62static unsigned int ip_ct_sctp_timeout_cookie_wait = 3 SECS; 62static unsigned int ip_ct_sctp_timeout_cookie_wait __read_mostly = 3 SECS;
63static unsigned int ip_ct_sctp_timeout_cookie_echoed = 3 SECS; 63static unsigned int ip_ct_sctp_timeout_cookie_echoed __read_mostly = 3 SECS;
64static unsigned int ip_ct_sctp_timeout_established = 5 DAYS; 64static unsigned int ip_ct_sctp_timeout_established __read_mostly = 5 DAYS;
65static unsigned int ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; 65static unsigned int ip_ct_sctp_timeout_shutdown_sent __read_mostly = 300 SECS / 1000;
66static unsigned int ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; 66static unsigned int ip_ct_sctp_timeout_shutdown_recd __read_mostly = 300 SECS / 1000;
67static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; 67static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS;
68 68
69static const unsigned int * sctp_timeouts[] 69static const unsigned int * sctp_timeouts[]
70= { NULL, /* SCTP_CONNTRACK_NONE */ 70= { NULL, /* SCTP_CONNTRACK_NONE */
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index fb920e76ec10..03ae9a04cb37 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -48,19 +48,19 @@ static DEFINE_RWLOCK(tcp_lock);
48/* "Be conservative in what you do, 48/* "Be conservative in what you do,
49 be liberal in what you accept from others." 49 be liberal in what you accept from others."
50 If it's non-zero, we mark only out of window RST segments as INVALID. */ 50 If it's non-zero, we mark only out of window RST segments as INVALID. */
51int ip_ct_tcp_be_liberal = 0; 51int ip_ct_tcp_be_liberal __read_mostly = 0;
52 52
53/* When connection is picked up from the middle, how many packets are required 53/* When connection is picked up from the middle, how many packets are required
54 to pass in each direction when we assume we are in sync - if any side uses 54 to pass in each direction when we assume we are in sync - if any side uses
55 window scaling, we lost the game. 55 window scaling, we lost the game.
56 If it is set to zero, we disable picking up already established 56 If it is set to zero, we disable picking up already established
57 connections. */ 57 connections. */
58int ip_ct_tcp_loose = 3; 58int ip_ct_tcp_loose __read_mostly = 3;
59 59
60/* Max number of the retransmitted packets without receiving an (acceptable) 60/* Max number of the retransmitted packets without receiving an (acceptable)
61 ACK from the destination. If this number is reached, a shorter timer 61 ACK from the destination. If this number is reached, a shorter timer
62 will be started. */ 62 will be started. */
63int ip_ct_tcp_max_retrans = 3; 63int ip_ct_tcp_max_retrans __read_mostly = 3;
64 64
65 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more 65 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
66 closely. They're more complex. --RR */ 66 closely. They're more complex. --RR */
@@ -83,19 +83,19 @@ static const char *tcp_conntrack_names[] = {
83#define HOURS * 60 MINS 83#define HOURS * 60 MINS
84#define DAYS * 24 HOURS 84#define DAYS * 24 HOURS
85 85
86unsigned int ip_ct_tcp_timeout_syn_sent = 2 MINS; 86unsigned int ip_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS;
87unsigned int ip_ct_tcp_timeout_syn_recv = 60 SECS; 87unsigned int ip_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS;
88unsigned int ip_ct_tcp_timeout_established = 5 DAYS; 88unsigned int ip_ct_tcp_timeout_established __read_mostly = 5 DAYS;
89unsigned int ip_ct_tcp_timeout_fin_wait = 2 MINS; 89unsigned int ip_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS;
90unsigned int ip_ct_tcp_timeout_close_wait = 60 SECS; 90unsigned int ip_ct_tcp_timeout_close_wait __read_mostly = 60 SECS;
91unsigned int ip_ct_tcp_timeout_last_ack = 30 SECS; 91unsigned int ip_ct_tcp_timeout_last_ack __read_mostly = 30 SECS;
92unsigned int ip_ct_tcp_timeout_time_wait = 2 MINS; 92unsigned int ip_ct_tcp_timeout_time_wait __read_mostly = 2 MINS;
93unsigned int ip_ct_tcp_timeout_close = 10 SECS; 93unsigned int ip_ct_tcp_timeout_close __read_mostly = 10 SECS;
94 94
95/* RFC1122 says the R2 limit should be at least 100 seconds. 95/* RFC1122 says the R2 limit should be at least 100 seconds.
96 Linux uses 15 packets as limit, which corresponds 96 Linux uses 15 packets as limit, which corresponds
97 to ~13-30min depending on RTO. */ 97 to ~13-30min depending on RTO. */
98unsigned int ip_ct_tcp_timeout_max_retrans = 5 MINS; 98unsigned int ip_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS;
99 99
100static const unsigned int * tcp_timeouts[] 100static const unsigned int * tcp_timeouts[]
101= { NULL, /* TCP_CONNTRACK_NONE */ 101= { NULL, /* TCP_CONNTRACK_NONE */
@@ -731,13 +731,15 @@ static int tcp_in_window(struct ip_ct_tcp *state,
731 if (state->last_dir == dir 731 if (state->last_dir == dir
732 && state->last_seq == seq 732 && state->last_seq == seq
733 && state->last_ack == ack 733 && state->last_ack == ack
734 && state->last_end == end) 734 && state->last_end == end
735 && state->last_win == win)
735 state->retrans++; 736 state->retrans++;
736 else { 737 else {
737 state->last_dir = dir; 738 state->last_dir = dir;
738 state->last_seq = seq; 739 state->last_seq = seq;
739 state->last_ack = ack; 740 state->last_ack = ack;
740 state->last_end = end; 741 state->last_end = end;
742 state->last_win = win;
741 state->retrans = 0; 743 state->retrans = 0;
742 } 744 }
743 } 745 }
@@ -865,8 +867,7 @@ static int tcp_error(struct sk_buff *skb,
865 867
866 /* Checksum invalid? Ignore. 868 /* Checksum invalid? Ignore.
867 * We skip checking packets on the outgoing path 869 * We skip checking packets on the outgoing path
868 * because the semantic of CHECKSUM_HW is different there 870 * because it is assumed to be correct.
869 * and moreover root might send raw packets.
870 */ 871 */
871 /* FIXME: Source route IP option packets --RR */ 872 /* FIXME: Source route IP option packets --RR */
872 if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && 873 if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index 9b2c16b4d2ff..d0e8a16970ec 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -18,8 +18,8 @@
18#include <linux/netfilter_ipv4.h> 18#include <linux/netfilter_ipv4.h>
19#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 19#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
20 20
21unsigned int ip_ct_udp_timeout = 30*HZ; 21unsigned int ip_ct_udp_timeout __read_mostly = 30*HZ;
22unsigned int ip_ct_udp_timeout_stream = 180*HZ; 22unsigned int ip_ct_udp_timeout_stream __read_mostly = 180*HZ;
23 23
24static int udp_pkt_to_tuple(const struct sk_buff *skb, 24static int udp_pkt_to_tuple(const struct sk_buff *skb,
25 unsigned int dataoff, 25 unsigned int dataoff,
@@ -117,8 +117,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
117 117
118 /* Checksum invalid? Ignore. 118 /* Checksum invalid? Ignore.
119 * We skip checking packets on the outgoing path 119 * We skip checking packets on the outgoing path
120 * because the semantic of CHECKSUM_HW is different there 120 * because the checksum is assumed to be correct.
121 * and moreover root might send raw packets.
122 * FIXME: Source route IP option packets --RR */ 121 * FIXME: Source route IP option packets --RR */
123 if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && 122 if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
124 nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_UDP)) { 123 nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_UDP)) {
diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c
index 4f222d6be009..2893e9c74850 100644
--- a/net/ipv4/netfilter/ip_conntrack_sip.c
+++ b/net/ipv4/netfilter/ip_conntrack_sip.c
@@ -8,7 +8,6 @@
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 */ 9 */
10 10
11#include <linux/config.h>
12#include <linux/module.h> 11#include <linux/module.h>
13#include <linux/ctype.h> 12#include <linux/ctype.h>
14#include <linux/skbuff.h> 13#include <linux/skbuff.h>
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index 7a9fa04a467a..02135756562e 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -35,7 +35,6 @@
35#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 35#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
36#include <linux/netfilter_ipv4/ip_conntrack_core.h> 36#include <linux/netfilter_ipv4/ip_conntrack_core.h>
37#include <linux/netfilter_ipv4/ip_conntrack_helper.h> 37#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
38#include <linux/netfilter_ipv4/listhelp.h>
39 38
40#if 0 39#if 0
41#define DEBUGP printk 40#define DEBUGP printk
@@ -534,7 +533,7 @@ static struct nf_hook_ops ip_conntrack_ops[] = {
534 533
535/* Sysctl support */ 534/* Sysctl support */
536 535
537int ip_conntrack_checksum = 1; 536int ip_conntrack_checksum __read_mostly = 1;
538 537
539#ifdef CONFIG_SYSCTL 538#ifdef CONFIG_SYSCTL
540 539
@@ -563,7 +562,7 @@ extern unsigned int ip_ct_udp_timeout_stream;
563/* From ip_conntrack_proto_icmp.c */ 562/* From ip_conntrack_proto_icmp.c */
564extern unsigned int ip_ct_icmp_timeout; 563extern unsigned int ip_ct_icmp_timeout;
565 564
566/* From ip_conntrack_proto_icmp.c */ 565/* From ip_conntrack_proto_generic.c */
567extern unsigned int ip_ct_generic_timeout; 566extern unsigned int ip_ct_generic_timeout;
568 567
569/* Log invalid packets of a given protocol */ 568/* Log invalid packets of a given protocol */
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 1741d555ad0d..71f3e09cbc84 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -22,9 +22,6 @@
22#include <linux/udp.h> 22#include <linux/udp.h>
23#include <linux/jhash.h> 23#include <linux/jhash.h>
24 24
25#define ASSERT_READ_LOCK(x)
26#define ASSERT_WRITE_LOCK(x)
27
28#include <linux/netfilter_ipv4/ip_conntrack.h> 25#include <linux/netfilter_ipv4/ip_conntrack.h>
29#include <linux/netfilter_ipv4/ip_conntrack_core.h> 26#include <linux/netfilter_ipv4/ip_conntrack_core.h>
30#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 27#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
@@ -33,7 +30,6 @@
33#include <linux/netfilter_ipv4/ip_nat_core.h> 30#include <linux/netfilter_ipv4/ip_nat_core.h>
34#include <linux/netfilter_ipv4/ip_nat_helper.h> 31#include <linux/netfilter_ipv4/ip_nat_helper.h>
35#include <linux/netfilter_ipv4/ip_conntrack_helper.h> 32#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
36#include <linux/netfilter_ipv4/listhelp.h>
37 33
38#if 0 34#if 0
39#define DEBUGP printk 35#define DEBUGP printk
@@ -101,18 +97,6 @@ static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn)
101 write_unlock_bh(&ip_nat_lock); 97 write_unlock_bh(&ip_nat_lock);
102} 98}
103 99
104/* We do checksum mangling, so if they were wrong before they're still
105 * wrong. Also works for incomplete packets (eg. ICMP dest
106 * unreachables.) */
107u_int16_t
108ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
109{
110 u_int32_t diffs[] = { oldvalinv, newval };
111 return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
112 oldcheck^0xFFFF));
113}
114EXPORT_SYMBOL(ip_nat_cheat_check);
115
116/* Is this tuple already taken? (not by us) */ 100/* Is this tuple already taken? (not by us) */
117int 101int
118ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, 102ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
@@ -378,12 +362,12 @@ manip_pkt(u_int16_t proto,
378 iph = (void *)(*pskb)->data + iphdroff; 362 iph = (void *)(*pskb)->data + iphdroff;
379 363
380 if (maniptype == IP_NAT_MANIP_SRC) { 364 if (maniptype == IP_NAT_MANIP_SRC) {
381 iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip, 365 iph->check = nf_csum_update(~iph->saddr, target->src.ip,
382 iph->check); 366 iph->check);
383 iph->saddr = target->src.ip; 367 iph->saddr = target->src.ip;
384 } else { 368 } else {
385 iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip, 369 iph->check = nf_csum_update(~iph->daddr, target->dst.ip,
386 iph->check); 370 iph->check);
387 iph->daddr = target->dst.ip; 371 iph->daddr = target->dst.ip;
388 } 372 }
389 return 1; 373 return 1;
@@ -423,10 +407,10 @@ unsigned int ip_nat_packet(struct ip_conntrack *ct,
423EXPORT_SYMBOL_GPL(ip_nat_packet); 407EXPORT_SYMBOL_GPL(ip_nat_packet);
424 408
425/* Dir is direction ICMP is coming from (opposite to packet it contains) */ 409/* Dir is direction ICMP is coming from (opposite to packet it contains) */
426int ip_nat_icmp_reply_translation(struct sk_buff **pskb, 410int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
427 struct ip_conntrack *ct, 411 enum ip_conntrack_info ctinfo,
428 enum ip_nat_manip_type manip, 412 unsigned int hooknum,
429 enum ip_conntrack_dir dir) 413 struct sk_buff **pskb)
430{ 414{
431 struct { 415 struct {
432 struct icmphdr icmp; 416 struct icmphdr icmp;
@@ -434,7 +418,9 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
434 } *inside; 418 } *inside;
435 struct ip_conntrack_tuple inner, target; 419 struct ip_conntrack_tuple inner, target;
436 int hdrlen = (*pskb)->nh.iph->ihl * 4; 420 int hdrlen = (*pskb)->nh.iph->ihl * 4;
421 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
437 unsigned long statusbit; 422 unsigned long statusbit;
423 enum ip_nat_manip_type manip = HOOK2MANIP(hooknum);
438 424
439 if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) 425 if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
440 return 0; 426 return 0;
@@ -443,12 +429,8 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
443 429
444 /* We're actually going to mangle it beyond trivial checksum 430 /* We're actually going to mangle it beyond trivial checksum
445 adjustment, so make sure the current checksum is correct. */ 431 adjustment, so make sure the current checksum is correct. */
446 if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) { 432 if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0))
447 hdrlen = (*pskb)->nh.iph->ihl * 4; 433 return 0;
448 if ((u16)csum_fold(skb_checksum(*pskb, hdrlen,
449 (*pskb)->len - hdrlen, 0)))
450 return 0;
451 }
452 434
453 /* Must be RELATED */ 435 /* Must be RELATED */
454 IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED || 436 IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
@@ -487,12 +469,14 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
487 !manip)) 469 !manip))
488 return 0; 470 return 0;
489 471
490 /* Reloading "inside" here since manip_pkt inner. */ 472 if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
491 inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; 473 /* Reloading "inside" here since manip_pkt inner. */
492 inside->icmp.checksum = 0; 474 inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
493 inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen, 475 inside->icmp.checksum = 0;
494 (*pskb)->len - hdrlen, 476 inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
495 0)); 477 (*pskb)->len - hdrlen,
478 0));
479 }
496 480
497 /* Change outer to look the reply to an incoming packet 481 /* Change outer to look the reply to an incoming packet
498 * (proto 0 means don't invert per-proto part). */ 482 * (proto 0 means don't invert per-proto part). */
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index cbcaa45370ae..7f6a75984f6c 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -27,16 +27,12 @@
27#include <net/tcp.h> 27#include <net/tcp.h>
28#include <net/udp.h> 28#include <net/udp.h>
29 29
30#define ASSERT_READ_LOCK(x)
31#define ASSERT_WRITE_LOCK(x)
32
33#include <linux/netfilter_ipv4/ip_conntrack.h> 30#include <linux/netfilter_ipv4/ip_conntrack.h>
34#include <linux/netfilter_ipv4/ip_conntrack_helper.h> 31#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
35#include <linux/netfilter_ipv4/ip_nat.h> 32#include <linux/netfilter_ipv4/ip_nat.h>
36#include <linux/netfilter_ipv4/ip_nat_protocol.h> 33#include <linux/netfilter_ipv4/ip_nat_protocol.h>
37#include <linux/netfilter_ipv4/ip_nat_core.h> 34#include <linux/netfilter_ipv4/ip_nat_core.h>
38#include <linux/netfilter_ipv4/ip_nat_helper.h> 35#include <linux/netfilter_ipv4/ip_nat_helper.h>
39#include <linux/netfilter_ipv4/listhelp.h>
40 36
41#if 0 37#if 0
42#define DEBUGP printk 38#define DEBUGP printk
@@ -165,7 +161,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
165{ 161{
166 struct iphdr *iph; 162 struct iphdr *iph;
167 struct tcphdr *tcph; 163 struct tcphdr *tcph;
168 int datalen; 164 int oldlen, datalen;
169 165
170 if (!skb_make_writable(pskb, (*pskb)->len)) 166 if (!skb_make_writable(pskb, (*pskb)->len))
171 return 0; 167 return 0;
@@ -180,13 +176,22 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
180 iph = (*pskb)->nh.iph; 176 iph = (*pskb)->nh.iph;
181 tcph = (void *)iph + iph->ihl*4; 177 tcph = (void *)iph + iph->ihl*4;
182 178
179 oldlen = (*pskb)->len - iph->ihl*4;
183 mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4, 180 mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
184 match_offset, match_len, rep_buffer, rep_len); 181 match_offset, match_len, rep_buffer, rep_len);
185 182
186 datalen = (*pskb)->len - iph->ihl*4; 183 datalen = (*pskb)->len - iph->ihl*4;
187 tcph->check = 0; 184 if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
188 tcph->check = tcp_v4_check(tcph, datalen, iph->saddr, iph->daddr, 185 tcph->check = 0;
189 csum_partial((char *)tcph, datalen, 0)); 186 tcph->check = tcp_v4_check(tcph, datalen,
187 iph->saddr, iph->daddr,
188 csum_partial((char *)tcph,
189 datalen, 0));
190 } else
191 tcph->check = nf_proto_csum_update(*pskb,
192 htons(oldlen) ^ 0xFFFF,
193 htons(datalen),
194 tcph->check, 1);
190 195
191 if (rep_len != match_len) { 196 if (rep_len != match_len) {
192 set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); 197 set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
@@ -221,6 +226,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb,
221{ 226{
222 struct iphdr *iph; 227 struct iphdr *iph;
223 struct udphdr *udph; 228 struct udphdr *udph;
229 int datalen, oldlen;
224 230
225 /* UDP helpers might accidentally mangle the wrong packet */ 231 /* UDP helpers might accidentally mangle the wrong packet */
226 iph = (*pskb)->nh.iph; 232 iph = (*pskb)->nh.iph;
@@ -238,22 +244,32 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb,
238 244
239 iph = (*pskb)->nh.iph; 245 iph = (*pskb)->nh.iph;
240 udph = (void *)iph + iph->ihl*4; 246 udph = (void *)iph + iph->ihl*4;
247
248 oldlen = (*pskb)->len - iph->ihl*4;
241 mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph), 249 mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
242 match_offset, match_len, rep_buffer, rep_len); 250 match_offset, match_len, rep_buffer, rep_len);
243 251
244 /* update the length of the UDP packet */ 252 /* update the length of the UDP packet */
245 udph->len = htons((*pskb)->len - iph->ihl*4); 253 datalen = (*pskb)->len - iph->ihl*4;
254 udph->len = htons(datalen);
246 255
247 /* fix udp checksum if udp checksum was previously calculated */ 256 /* fix udp checksum if udp checksum was previously calculated */
248 if (udph->check) { 257 if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL)
249 int datalen = (*pskb)->len - iph->ihl * 4; 258 return 1;
259
260 if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
250 udph->check = 0; 261 udph->check = 0;
251 udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, 262 udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
252 datalen, IPPROTO_UDP, 263 datalen, IPPROTO_UDP,
253 csum_partial((char *)udph, 264 csum_partial((char *)udph,
254 datalen, 0)); 265 datalen, 0));
255 } 266 if (!udph->check)
256 267 udph->check = -1;
268 } else
269 udph->check = nf_proto_csum_update(*pskb,
270 htons(oldlen) ^ 0xFFFF,
271 htons(datalen),
272 udph->check, 1);
257 return 1; 273 return 1;
258} 274}
259EXPORT_SYMBOL(ip_nat_mangle_udp_packet); 275EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
@@ -293,11 +309,14 @@ sack_adjust(struct sk_buff *skb,
293 ntohl(sack->start_seq), new_start_seq, 309 ntohl(sack->start_seq), new_start_seq,
294 ntohl(sack->end_seq), new_end_seq); 310 ntohl(sack->end_seq), new_end_seq);
295 311
296 tcph->check = 312 tcph->check = nf_proto_csum_update(skb,
297 ip_nat_cheat_check(~sack->start_seq, new_start_seq, 313 ~sack->start_seq,
298 ip_nat_cheat_check(~sack->end_seq, 314 new_start_seq,
299 new_end_seq, 315 tcph->check, 0);
300 tcph->check)); 316 tcph->check = nf_proto_csum_update(skb,
317 ~sack->end_seq,
318 new_end_seq,
319 tcph->check, 0);
301 sack->start_seq = new_start_seq; 320 sack->start_seq = new_start_seq;
302 sack->end_seq = new_end_seq; 321 sack->end_seq = new_end_seq;
303 sackoff += sizeof(*sack); 322 sackoff += sizeof(*sack);
@@ -381,10 +400,10 @@ ip_nat_seq_adjust(struct sk_buff **pskb,
381 newack = ntohl(tcph->ack_seq) - other_way->offset_before; 400 newack = ntohl(tcph->ack_seq) - other_way->offset_before;
382 newack = htonl(newack); 401 newack = htonl(newack);
383 402
384 tcph->check = ip_nat_cheat_check(~tcph->seq, newseq, 403 tcph->check = nf_proto_csum_update(*pskb, ~tcph->seq, newseq,
385 ip_nat_cheat_check(~tcph->ack_seq, 404 tcph->check, 0);
386 newack, 405 tcph->check = nf_proto_csum_update(*pskb, ~tcph->ack_seq, newack,
387 tcph->check)); 406 tcph->check, 0);
388 407
389 DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n", 408 DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
390 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), 409 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
index 1d149964dc38..2ff578807123 100644
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -32,7 +32,7 @@
32 * 2005-06-10 - Version 3.0 32 * 2005-06-10 - Version 3.0
33 * - kernel >= 2.6.11 version, 33 * - kernel >= 2.6.11 version,
34 * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/) 34 * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
35 * 35 *
36 */ 36 */
37 37
38#include <linux/module.h> 38#include <linux/module.h>
@@ -85,19 +85,17 @@ static void pptp_nat_expected(struct ip_conntrack *ct,
85 DEBUGP("we are PNS->PAC\n"); 85 DEBUGP("we are PNS->PAC\n");
86 /* therefore, build tuple for PAC->PNS */ 86 /* therefore, build tuple for PAC->PNS */
87 t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; 87 t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
88 t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id); 88 t.src.u.gre.key = master->help.ct_pptp_info.pac_call_id;
89 t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; 89 t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
90 t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id); 90 t.dst.u.gre.key = master->help.ct_pptp_info.pns_call_id;
91 t.dst.protonum = IPPROTO_GRE; 91 t.dst.protonum = IPPROTO_GRE;
92 } else { 92 } else {
93 DEBUGP("we are PAC->PNS\n"); 93 DEBUGP("we are PAC->PNS\n");
94 /* build tuple for PNS->PAC */ 94 /* build tuple for PNS->PAC */
95 t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; 95 t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
96 t.src.u.gre.key = 96 t.src.u.gre.key = master->nat.help.nat_pptp_info.pns_call_id;
97 htons(master->nat.help.nat_pptp_info.pns_call_id);
98 t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; 97 t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
99 t.dst.u.gre.key = 98 t.dst.u.gre.key = master->nat.help.nat_pptp_info.pac_call_id;
100 htons(master->nat.help.nat_pptp_info.pac_call_id);
101 t.dst.protonum = IPPROTO_GRE; 99 t.dst.protonum = IPPROTO_GRE;
102 } 100 }
103 101
@@ -149,51 +147,52 @@ pptp_outbound_pkt(struct sk_buff **pskb,
149{ 147{
150 struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; 148 struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
151 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; 149 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
152 u_int16_t msg, new_callid; 150 u_int16_t msg;
151 __be16 new_callid;
153 unsigned int cid_off; 152 unsigned int cid_off;
154 153
155 new_callid = htons(ct_pptp_info->pns_call_id); 154 new_callid = ct_pptp_info->pns_call_id;
156 155
157 switch (msg = ntohs(ctlh->messageType)) { 156 switch (msg = ntohs(ctlh->messageType)) {
158 case PPTP_OUT_CALL_REQUEST: 157 case PPTP_OUT_CALL_REQUEST:
159 cid_off = offsetof(union pptp_ctrl_union, ocreq.callID); 158 cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
160 /* FIXME: ideally we would want to reserve a call ID 159 /* FIXME: ideally we would want to reserve a call ID
161 * here. current netfilter NAT core is not able to do 160 * here. current netfilter NAT core is not able to do
162 * this :( For now we use TCP source port. This breaks 161 * this :( For now we use TCP source port. This breaks
163 * multiple calls within one control session */ 162 * multiple calls within one control session */
164 163
165 /* save original call ID in nat_info */ 164 /* save original call ID in nat_info */
166 nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; 165 nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
167 166
168 /* don't use tcph->source since we are at a DSTmanip 167 /* don't use tcph->source since we are at a DSTmanip
169 * hook (e.g. PREROUTING) and pkt is not mangled yet */ 168 * hook (e.g. PREROUTING) and pkt is not mangled yet */
170 new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; 169 new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
171 170
172 /* save new call ID in ct info */ 171 /* save new call ID in ct info */
173 ct_pptp_info->pns_call_id = ntohs(new_callid); 172 ct_pptp_info->pns_call_id = new_callid;
174 break; 173 break;
175 case PPTP_IN_CALL_REPLY: 174 case PPTP_IN_CALL_REPLY:
176 cid_off = offsetof(union pptp_ctrl_union, icreq.callID); 175 cid_off = offsetof(union pptp_ctrl_union, icack.callID);
177 break; 176 break;
178 case PPTP_CALL_CLEAR_REQUEST: 177 case PPTP_CALL_CLEAR_REQUEST:
179 cid_off = offsetof(union pptp_ctrl_union, clrreq.callID); 178 cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
180 break; 179 break;
181 default: 180 default:
182 DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, 181 DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
183 (msg <= PPTP_MSG_MAX)? 182 (msg <= PPTP_MSG_MAX)?
184 pptp_msg_name[msg]:pptp_msg_name[0]); 183 pptp_msg_name[msg]:pptp_msg_name[0]);
185 /* fall through */ 184 /* fall through */
186 185
187 case PPTP_SET_LINK_INFO: 186 case PPTP_SET_LINK_INFO:
188 /* only need to NAT in case PAC is behind NAT box */ 187 /* only need to NAT in case PAC is behind NAT box */
189 case PPTP_START_SESSION_REQUEST: 188 case PPTP_START_SESSION_REQUEST:
190 case PPTP_START_SESSION_REPLY: 189 case PPTP_START_SESSION_REPLY:
191 case PPTP_STOP_SESSION_REQUEST: 190 case PPTP_STOP_SESSION_REQUEST:
192 case PPTP_STOP_SESSION_REPLY: 191 case PPTP_STOP_SESSION_REPLY:
193 case PPTP_ECHO_REQUEST: 192 case PPTP_ECHO_REQUEST:
194 case PPTP_ECHO_REPLY: 193 case PPTP_ECHO_REPLY:
195 /* no need to alter packet */ 194 /* no need to alter packet */
196 return NF_ACCEPT; 195 return NF_ACCEPT;
197 } 196 }
198 197
199 /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass 198 /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
@@ -212,80 +211,28 @@ pptp_outbound_pkt(struct sk_buff **pskb,
212 return NF_ACCEPT; 211 return NF_ACCEPT;
213} 212}
214 213
215static int 214static void
216pptp_exp_gre(struct ip_conntrack_expect *expect_orig, 215pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
217 struct ip_conntrack_expect *expect_reply) 216 struct ip_conntrack_expect *expect_reply)
218{ 217{
219 struct ip_ct_pptp_master *ct_pptp_info =
220 &expect_orig->master->help.ct_pptp_info;
221 struct ip_nat_pptp *nat_pptp_info =
222 &expect_orig->master->nat.help.nat_pptp_info;
223
224 struct ip_conntrack *ct = expect_orig->master; 218 struct ip_conntrack *ct = expect_orig->master;
225 219 struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
226 struct ip_conntrack_tuple inv_t; 220 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
227 struct ip_conntrack_tuple *orig_t, *reply_t;
228 221
229 /* save original PAC call ID in nat_info */ 222 /* save original PAC call ID in nat_info */
230 nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; 223 nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
231 224
232 /* alter expectation */
233 orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
234 reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
235
236 /* alter expectation for PNS->PAC direction */ 225 /* alter expectation for PNS->PAC direction */
237 invert_tuplepr(&inv_t, &expect_orig->tuple); 226 expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id;
238 expect_orig->saved_proto.gre.key = htons(ct_pptp_info->pns_call_id); 227 expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id;
239 expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id); 228 expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id;
240 expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
241 expect_orig->dir = IP_CT_DIR_ORIGINAL; 229 expect_orig->dir = IP_CT_DIR_ORIGINAL;
242 inv_t.src.ip = reply_t->src.ip;
243 inv_t.dst.ip = reply_t->dst.ip;
244 inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
245 inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
246
247 if (!ip_conntrack_expect_related(expect_orig)) {
248 DEBUGP("successfully registered expect\n");
249 } else {
250 DEBUGP("can't expect_related(expect_orig)\n");
251 return 1;
252 }
253 230
254 /* alter expectation for PAC->PNS direction */ 231 /* alter expectation for PAC->PNS direction */
255 invert_tuplepr(&inv_t, &expect_reply->tuple); 232 expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id;
256 expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id); 233 expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id;
257 expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id); 234 expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id;
258 expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
259 expect_reply->dir = IP_CT_DIR_REPLY; 235 expect_reply->dir = IP_CT_DIR_REPLY;
260 inv_t.src.ip = orig_t->src.ip;
261 inv_t.dst.ip = orig_t->dst.ip;
262 inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
263 inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
264
265 if (!ip_conntrack_expect_related(expect_reply)) {
266 DEBUGP("successfully registered expect\n");
267 } else {
268 DEBUGP("can't expect_related(expect_reply)\n");
269 ip_conntrack_unexpect_related(expect_orig);
270 return 1;
271 }
272
273 if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) {
274 DEBUGP("can't register original keymap\n");
275 ip_conntrack_unexpect_related(expect_orig);
276 ip_conntrack_unexpect_related(expect_reply);
277 return 1;
278 }
279
280 if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) {
281 DEBUGP("can't register reply keymap\n");
282 ip_conntrack_unexpect_related(expect_orig);
283 ip_conntrack_unexpect_related(expect_reply);
284 ip_ct_gre_keymap_destroy(ct);
285 return 1;
286 }
287
288 return 0;
289} 236}
290 237
291/* inbound packets == from PAC to PNS */ 238/* inbound packets == from PAC to PNS */
@@ -297,15 +244,15 @@ pptp_inbound_pkt(struct sk_buff **pskb,
297 union pptp_ctrl_union *pptpReq) 244 union pptp_ctrl_union *pptpReq)
298{ 245{
299 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; 246 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
300 u_int16_t msg, new_cid = 0, new_pcid; 247 u_int16_t msg;
301 unsigned int pcid_off, cid_off = 0; 248 __be16 new_pcid;
249 unsigned int pcid_off;
302 250
303 new_pcid = htons(nat_pptp_info->pns_call_id); 251 new_pcid = nat_pptp_info->pns_call_id;
304 252
305 switch (msg = ntohs(ctlh->messageType)) { 253 switch (msg = ntohs(ctlh->messageType)) {
306 case PPTP_OUT_CALL_REPLY: 254 case PPTP_OUT_CALL_REPLY:
307 pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID); 255 pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID);
308 cid_off = offsetof(union pptp_ctrl_union, ocack.callID);
309 break; 256 break;
310 case PPTP_IN_CALL_CONNECT: 257 case PPTP_IN_CALL_CONNECT:
311 pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID); 258 pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID);
@@ -324,7 +271,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
324 break; 271 break;
325 272
326 default: 273 default:
327 DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? 274 DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)?
328 pptp_msg_name[msg]:pptp_msg_name[0]); 275 pptp_msg_name[msg]:pptp_msg_name[0]);
329 /* fall through */ 276 /* fall through */
330 277
@@ -351,17 +298,6 @@ pptp_inbound_pkt(struct sk_buff **pskb,
351 sizeof(new_pcid), (char *)&new_pcid, 298 sizeof(new_pcid), (char *)&new_pcid,
352 sizeof(new_pcid)) == 0) 299 sizeof(new_pcid)) == 0)
353 return NF_DROP; 300 return NF_DROP;
354
355 if (new_cid) {
356 DEBUGP("altering call id from 0x%04x to 0x%04x\n",
357 ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_cid));
358 if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
359 cid_off + sizeof(struct pptp_pkt_hdr) +
360 sizeof(struct PptpControlHeader),
361 sizeof(new_cid), (char *)&new_cid,
362 sizeof(new_cid)) == 0)
363 return NF_DROP;
364 }
365 return NF_ACCEPT; 301 return NF_ACCEPT;
366} 302}
367 303
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
index 38acfdf540eb..bf91f9312b3c 100644
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -6,10 +6,10 @@
6 * GRE is a generic encapsulation protocol, which is generally not very 6 * GRE is a generic encapsulation protocol, which is generally not very
7 * suited for NAT, as it has no protocol-specific part as port numbers. 7 * suited for NAT, as it has no protocol-specific part as port numbers.
8 * 8 *
9 * It has an optional key field, which may help us distinguishing two 9 * It has an optional key field, which may help us distinguishing two
10 * connections between the same two hosts. 10 * connections between the same two hosts.
11 * 11 *
12 * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 12 * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
13 * 13 *
14 * PPTP is built on top of a modified version of GRE, and has a mandatory 14 * PPTP is built on top of a modified version of GRE, and has a mandatory
15 * field called "CallID", which serves us for the same purpose as the key 15 * field called "CallID", which serves us for the same purpose as the key
@@ -60,14 +60,14 @@ gre_in_range(const struct ip_conntrack_tuple *tuple,
60} 60}
61 61
62/* generate unique tuple ... */ 62/* generate unique tuple ... */
63static int 63static int
64gre_unique_tuple(struct ip_conntrack_tuple *tuple, 64gre_unique_tuple(struct ip_conntrack_tuple *tuple,
65 const struct ip_nat_range *range, 65 const struct ip_nat_range *range,
66 enum ip_nat_manip_type maniptype, 66 enum ip_nat_manip_type maniptype,
67 const struct ip_conntrack *conntrack) 67 const struct ip_conntrack *conntrack)
68{ 68{
69 static u_int16_t key; 69 static u_int16_t key;
70 u_int16_t *keyptr; 70 __be16 *keyptr;
71 unsigned int min, i, range_size; 71 unsigned int min, i, range_size;
72 72
73 if (maniptype == IP_NAT_MANIP_SRC) 73 if (maniptype == IP_NAT_MANIP_SRC)
@@ -84,7 +84,7 @@ gre_unique_tuple(struct ip_conntrack_tuple *tuple,
84 range_size = ntohs(range->max.gre.key) - min + 1; 84 range_size = ntohs(range->max.gre.key) - min + 1;
85 } 85 }
86 86
87 DEBUGP("min = %u, range_size = %u\n", min, range_size); 87 DEBUGP("min = %u, range_size = %u\n", min, range_size);
88 88
89 for (i = 0; i < range_size; i++, key++) { 89 for (i = 0; i < range_size; i++, key++) {
90 *keyptr = htons(min + key % range_size); 90 *keyptr = htons(min + key % range_size);
@@ -117,7 +117,7 @@ gre_manip_pkt(struct sk_buff **pskb,
117 greh = (void *)(*pskb)->data + hdroff; 117 greh = (void *)(*pskb)->data + hdroff;
118 pgreh = (struct gre_hdr_pptp *) greh; 118 pgreh = (struct gre_hdr_pptp *) greh;
119 119
120 /* we only have destination manip of a packet, since 'source key' 120 /* we only have destination manip of a packet, since 'source key'
121 * is not present in the packet itself */ 121 * is not present in the packet itself */
122 if (maniptype == IP_NAT_MANIP_DST) { 122 if (maniptype == IP_NAT_MANIP_DST) {
123 /* key manipulation is always dest */ 123 /* key manipulation is always dest */
@@ -129,15 +129,16 @@ gre_manip_pkt(struct sk_buff **pskb,
129 } 129 }
130 if (greh->csum) { 130 if (greh->csum) {
131 /* FIXME: Never tested this code... */ 131 /* FIXME: Never tested this code... */
132 *(gre_csum(greh)) = 132 *(gre_csum(greh)) =
133 ip_nat_cheat_check(~*(gre_key(greh)), 133 nf_proto_csum_update(*pskb,
134 ~*(gre_key(greh)),
134 tuple->dst.u.gre.key, 135 tuple->dst.u.gre.key,
135 *(gre_csum(greh))); 136 *(gre_csum(greh)), 0);
136 } 137 }
137 *(gre_key(greh)) = tuple->dst.u.gre.key; 138 *(gre_key(greh)) = tuple->dst.u.gre.key;
138 break; 139 break;
139 case GRE_VERSION_PPTP: 140 case GRE_VERSION_PPTP:
140 DEBUGP("call_id -> 0x%04x\n", 141 DEBUGP("call_id -> 0x%04x\n",
141 ntohs(tuple->dst.u.gre.key)); 142 ntohs(tuple->dst.u.gre.key));
142 pgreh->call_id = tuple->dst.u.gre.key; 143 pgreh->call_id = tuple->dst.u.gre.key;
143 break; 144 break;
@@ -151,8 +152,8 @@ gre_manip_pkt(struct sk_buff **pskb,
151} 152}
152 153
153/* nat helper struct */ 154/* nat helper struct */
154static struct ip_nat_protocol gre = { 155static struct ip_nat_protocol gre = {
155 .name = "GRE", 156 .name = "GRE",
156 .protonum = IPPROTO_GRE, 157 .protonum = IPPROTO_GRE,
157 .manip_pkt = gre_manip_pkt, 158 .manip_pkt = gre_manip_pkt,
158 .in_range = gre_in_range, 159 .in_range = gre_in_range,
@@ -163,7 +164,7 @@ static struct ip_nat_protocol gre = {
163 .nfattr_to_range = ip_nat_port_nfattr_to_range, 164 .nfattr_to_range = ip_nat_port_nfattr_to_range,
164#endif 165#endif
165}; 166};
166 167
167int __init ip_nat_proto_gre_init(void) 168int __init ip_nat_proto_gre_init(void)
168{ 169{
169 return ip_nat_protocol_register(&gre); 170 return ip_nat_protocol_register(&gre);
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
index 31a3f4ccb99c..ec50cc295317 100644
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c
@@ -66,10 +66,10 @@ icmp_manip_pkt(struct sk_buff **pskb,
66 return 0; 66 return 0;
67 67
68 hdr = (struct icmphdr *)((*pskb)->data + hdroff); 68 hdr = (struct icmphdr *)((*pskb)->data + hdroff);
69 69 hdr->checksum = nf_proto_csum_update(*pskb,
70 hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF, 70 hdr->un.echo.id ^ 0xFFFF,
71 tuple->src.u.icmp.id, 71 tuple->src.u.icmp.id,
72 hdr->checksum); 72 hdr->checksum, 0);
73 hdr->un.echo.id = tuple->src.u.icmp.id; 73 hdr->un.echo.id = tuple->src.u.icmp.id;
74 return 1; 74 return 1;
75} 75}
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
index a3d14079eba6..72a6307bd2db 100644
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c
@@ -129,10 +129,9 @@ tcp_manip_pkt(struct sk_buff **pskb,
129 if (hdrsize < sizeof(*hdr)) 129 if (hdrsize < sizeof(*hdr))
130 return 1; 130 return 1;
131 131
132 hdr->check = ip_nat_cheat_check(~oldip, newip, 132 hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, hdr->check, 1);
133 ip_nat_cheat_check(oldport ^ 0xFFFF, 133 hdr->check = nf_proto_csum_update(*pskb, oldport ^ 0xFFFF, newport,
134 newport, 134 hdr->check, 0);
135 hdr->check));
136 return 1; 135 return 1;
137} 136}
138 137
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
index ec6053fdc867..5da196ae758c 100644
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_udp.c
@@ -113,11 +113,16 @@ udp_manip_pkt(struct sk_buff **pskb,
113 newport = tuple->dst.u.udp.port; 113 newport = tuple->dst.u.udp.port;
114 portptr = &hdr->dest; 114 portptr = &hdr->dest;
115 } 115 }
116 if (hdr->check) /* 0 is a special case meaning no checksum */ 116
117 hdr->check = ip_nat_cheat_check(~oldip, newip, 117 if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) {
118 ip_nat_cheat_check(*portptr ^ 0xFFFF, 118 hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip,
119 newport, 119 hdr->check, 1);
120 hdr->check)); 120 hdr->check = nf_proto_csum_update(*pskb,
121 *portptr ^ 0xFFFF, newport,
122 hdr->check, 0);
123 if (!hdr->check)
124 hdr->check = -1;
125 }
121 *portptr = newport; 126 *portptr = newport;
122 return 1; 127 return 1;
123} 128}
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index 1aba926c1cb0..7b703839aa58 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -19,14 +19,10 @@
19#include <net/route.h> 19#include <net/route.h>
20#include <linux/bitops.h> 20#include <linux/bitops.h>
21 21
22#define ASSERT_READ_LOCK(x)
23#define ASSERT_WRITE_LOCK(x)
24
25#include <linux/netfilter_ipv4/ip_tables.h> 22#include <linux/netfilter_ipv4/ip_tables.h>
26#include <linux/netfilter_ipv4/ip_nat.h> 23#include <linux/netfilter_ipv4/ip_nat.h>
27#include <linux/netfilter_ipv4/ip_nat_core.h> 24#include <linux/netfilter_ipv4/ip_nat_core.h>
28#include <linux/netfilter_ipv4/ip_nat_rule.h> 25#include <linux/netfilter_ipv4/ip_nat_rule.h>
29#include <linux/netfilter_ipv4/listhelp.h>
30 26
31#if 0 27#if 0
32#define DEBUGP printk 28#define DEBUGP printk
@@ -104,8 +100,7 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb,
104 const struct net_device *out, 100 const struct net_device *out,
105 unsigned int hooknum, 101 unsigned int hooknum,
106 const struct ipt_target *target, 102 const struct ipt_target *target,
107 const void *targinfo, 103 const void *targinfo)
108 void *userinfo)
109{ 104{
110 struct ip_conntrack *ct; 105 struct ip_conntrack *ct;
111 enum ip_conntrack_info ctinfo; 106 enum ip_conntrack_info ctinfo;
@@ -147,8 +142,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb,
147 const struct net_device *out, 142 const struct net_device *out,
148 unsigned int hooknum, 143 unsigned int hooknum,
149 const struct ipt_target *target, 144 const struct ipt_target *target,
150 const void *targinfo, 145 const void *targinfo)
151 void *userinfo)
152{ 146{
153 struct ip_conntrack *ct; 147 struct ip_conntrack *ct;
154 enum ip_conntrack_info ctinfo; 148 enum ip_conntrack_info ctinfo;
@@ -174,7 +168,6 @@ static int ipt_snat_checkentry(const char *tablename,
174 const void *entry, 168 const void *entry,
175 const struct ipt_target *target, 169 const struct ipt_target *target,
176 void *targinfo, 170 void *targinfo,
177 unsigned int targinfosize,
178 unsigned int hook_mask) 171 unsigned int hook_mask)
179{ 172{
180 struct ip_nat_multi_range_compat *mr = targinfo; 173 struct ip_nat_multi_range_compat *mr = targinfo;
@@ -191,7 +184,6 @@ static int ipt_dnat_checkentry(const char *tablename,
191 const void *entry, 184 const void *entry,
192 const struct ipt_target *target, 185 const struct ipt_target *target,
193 void *targinfo, 186 void *targinfo,
194 unsigned int targinfosize,
195 unsigned int hook_mask) 187 unsigned int hook_mask)
196{ 188{
197 struct ip_nat_multi_range_compat *mr = targinfo; 189 struct ip_nat_multi_range_compat *mr = targinfo;
@@ -255,7 +247,7 @@ int ip_nat_rule_find(struct sk_buff **pskb,
255{ 247{
256 int ret; 248 int ret;
257 249
258 ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL); 250 ret = ipt_do_table(pskb, hooknum, in, out, &nat_table);
259 251
260 if (ret == NF_ACCEPT) { 252 if (ret == NF_ACCEPT) {
261 if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum))) 253 if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 17de077a7901..9c577db62047 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -30,9 +30,6 @@
30#include <net/checksum.h> 30#include <net/checksum.h>
31#include <linux/spinlock.h> 31#include <linux/spinlock.h>
32 32
33#define ASSERT_READ_LOCK(x)
34#define ASSERT_WRITE_LOCK(x)
35
36#include <linux/netfilter_ipv4/ip_nat.h> 33#include <linux/netfilter_ipv4/ip_nat.h>
37#include <linux/netfilter_ipv4/ip_nat_rule.h> 34#include <linux/netfilter_ipv4/ip_nat_rule.h>
38#include <linux/netfilter_ipv4/ip_nat_protocol.h> 35#include <linux/netfilter_ipv4/ip_nat_protocol.h>
@@ -40,7 +37,6 @@
40#include <linux/netfilter_ipv4/ip_nat_helper.h> 37#include <linux/netfilter_ipv4/ip_nat_helper.h>
41#include <linux/netfilter_ipv4/ip_tables.h> 38#include <linux/netfilter_ipv4/ip_tables.h>
42#include <linux/netfilter_ipv4/ip_conntrack_core.h> 39#include <linux/netfilter_ipv4/ip_conntrack_core.h>
43#include <linux/netfilter_ipv4/listhelp.h>
44 40
45#if 0 41#if 0
46#define DEBUGP printk 42#define DEBUGP printk
@@ -110,11 +106,6 @@ ip_nat_fn(unsigned int hooknum,
110 IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off 106 IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
111 & htons(IP_MF|IP_OFFSET))); 107 & htons(IP_MF|IP_OFFSET)));
112 108
113 /* If we had a hardware checksum before, it's now invalid */
114 if ((*pskb)->ip_summed == CHECKSUM_HW)
115 if (skb_checksum_help(*pskb, (out == NULL)))
116 return NF_DROP;
117
118 ct = ip_conntrack_get(*pskb, &ctinfo); 109 ct = ip_conntrack_get(*pskb, &ctinfo);
119 /* Can't track? It's not due to stress, or conntrack would 110 /* Can't track? It's not due to stress, or conntrack would
120 have dropped it. Hence it's the user's responsibilty to 111 have dropped it. Hence it's the user's responsibilty to
@@ -145,8 +136,8 @@ ip_nat_fn(unsigned int hooknum,
145 case IP_CT_RELATED: 136 case IP_CT_RELATED:
146 case IP_CT_RELATED+IP_CT_IS_REPLY: 137 case IP_CT_RELATED+IP_CT_IS_REPLY:
147 if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { 138 if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
148 if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype, 139 if (!ip_nat_icmp_reply_translation(ct, ctinfo,
149 CTINFO2DIR(ctinfo))) 140 hooknum, pskb))
150 return NF_DROP; 141 return NF_DROP;
151 else 142 else
152 return NF_ACCEPT; 143 return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 198ac36db861..7edad790478a 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -52,15 +52,15 @@ struct ipq_queue_entry {
52 52
53typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); 53typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
54 54
55static unsigned char copy_mode = IPQ_COPY_NONE; 55static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
56static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT; 56static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
57static DEFINE_RWLOCK(queue_lock); 57static DEFINE_RWLOCK(queue_lock);
58static int peer_pid; 58static int peer_pid __read_mostly;
59static unsigned int copy_range; 59static unsigned int copy_range __read_mostly;
60static unsigned int queue_total; 60static unsigned int queue_total;
61static unsigned int queue_dropped = 0; 61static unsigned int queue_dropped = 0;
62static unsigned int queue_user_dropped = 0; 62static unsigned int queue_user_dropped = 0;
63static struct sock *ipqnl; 63static struct sock *ipqnl __read_mostly;
64static LIST_HEAD(queue_list); 64static LIST_HEAD(queue_list);
65static DEFINE_MUTEX(ipqnl_mutex); 65static DEFINE_MUTEX(ipqnl_mutex);
66 66
@@ -208,9 +208,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
208 break; 208 break;
209 209
210 case IPQ_COPY_PACKET: 210 case IPQ_COPY_PACKET:
211 if (entry->skb->ip_summed == CHECKSUM_HW && 211 if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
212 (*errp = skb_checksum_help(entry->skb, 212 entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
213 entry->info->outdev == NULL))) { 213 (*errp = skb_checksum_help(entry->skb))) {
214 read_unlock_bh(&queue_lock); 214 read_unlock_bh(&queue_lock);
215 return NULL; 215 return NULL;
216 } 216 }
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 048514f15f2f..800067d69a9a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -180,8 +180,7 @@ ipt_error(struct sk_buff **pskb,
180 const struct net_device *out, 180 const struct net_device *out,
181 unsigned int hooknum, 181 unsigned int hooknum,
182 const struct xt_target *target, 182 const struct xt_target *target,
183 const void *targinfo, 183 const void *targinfo)
184 void *userinfo)
185{ 184{
186 if (net_ratelimit()) 185 if (net_ratelimit())
187 printk("ip_tables: error: `%s'\n", (char *)targinfo); 186 printk("ip_tables: error: `%s'\n", (char *)targinfo);
@@ -217,8 +216,7 @@ ipt_do_table(struct sk_buff **pskb,
217 unsigned int hook, 216 unsigned int hook,
218 const struct net_device *in, 217 const struct net_device *in,
219 const struct net_device *out, 218 const struct net_device *out,
220 struct ipt_table *table, 219 struct ipt_table *table)
221 void *userdata)
222{ 220{
223 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); 221 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
224 u_int16_t offset; 222 u_int16_t offset;
@@ -308,8 +306,7 @@ ipt_do_table(struct sk_buff **pskb,
308 in, out, 306 in, out,
309 hook, 307 hook,
310 t->u.kernel.target, 308 t->u.kernel.target,
311 t->data, 309 t->data);
312 userdata);
313 310
314#ifdef CONFIG_NETFILTER_DEBUG 311#ifdef CONFIG_NETFILTER_DEBUG
315 if (((struct ipt_entry *)table_base)->comefrom 312 if (((struct ipt_entry *)table_base)->comefrom
@@ -467,8 +464,7 @@ cleanup_match(struct ipt_entry_match *m, unsigned int *i)
467 return 1; 464 return 1;
468 465
469 if (m->u.kernel.match->destroy) 466 if (m->u.kernel.match->destroy)
470 m->u.kernel.match->destroy(m->u.kernel.match, m->data, 467 m->u.kernel.match->destroy(m->u.kernel.match, m->data);
471 m->u.match_size - sizeof(*m));
472 module_put(m->u.kernel.match->me); 468 module_put(m->u.kernel.match->me);
473 return 0; 469 return 0;
474} 470}
@@ -521,7 +517,6 @@ check_match(struct ipt_entry_match *m,
521 517
522 if (m->u.kernel.match->checkentry 518 if (m->u.kernel.match->checkentry
523 && !m->u.kernel.match->checkentry(name, ip, match, m->data, 519 && !m->u.kernel.match->checkentry(name, ip, match, m->data,
524 m->u.match_size - sizeof(*m),
525 hookmask)) { 520 hookmask)) {
526 duprintf("ip_tables: check failed for `%s'.\n", 521 duprintf("ip_tables: check failed for `%s'.\n",
527 m->u.kernel.match->name); 522 m->u.kernel.match->name);
@@ -578,12 +573,10 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size,
578 if (t->u.kernel.target == &ipt_standard_target) { 573 if (t->u.kernel.target == &ipt_standard_target) {
579 if (!standard_check(t, size)) { 574 if (!standard_check(t, size)) {
580 ret = -EINVAL; 575 ret = -EINVAL;
581 goto cleanup_matches; 576 goto err;
582 } 577 }
583 } else if (t->u.kernel.target->checkentry 578 } else if (t->u.kernel.target->checkentry
584 && !t->u.kernel.target->checkentry(name, e, target, t->data, 579 && !t->u.kernel.target->checkentry(name, e, target, t->data,
585 t->u.target_size
586 - sizeof(*t),
587 e->comefrom)) { 580 e->comefrom)) {
588 duprintf("ip_tables: check failed for `%s'.\n", 581 duprintf("ip_tables: check failed for `%s'.\n",
589 t->u.kernel.target->name); 582 t->u.kernel.target->name);
@@ -655,8 +648,7 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i)
655 IPT_MATCH_ITERATE(e, cleanup_match, NULL); 648 IPT_MATCH_ITERATE(e, cleanup_match, NULL);
656 t = ipt_get_target(e); 649 t = ipt_get_target(e);
657 if (t->u.kernel.target->destroy) 650 if (t->u.kernel.target->destroy)
658 t->u.kernel.target->destroy(t->u.kernel.target, t->data, 651 t->u.kernel.target->destroy(t->u.kernel.target, t->data);
659 t->u.target_size - sizeof(*t));
660 module_put(t->u.kernel.target->me); 652 module_put(t->u.kernel.target->me);
661 return 0; 653 return 0;
662} 654}
@@ -950,73 +942,28 @@ static short compat_calc_jump(u_int16_t offset)
950 return delta; 942 return delta;
951} 943}
952 944
953struct compat_ipt_standard_target 945static void compat_standard_from_user(void *dst, void *src)
954{ 946{
955 struct compat_xt_entry_target target; 947 int v = *(compat_int_t *)src;
956 compat_int_t verdict;
957};
958
959struct compat_ipt_standard
960{
961 struct compat_ipt_entry entry;
962 struct compat_ipt_standard_target target;
963};
964 948
965#define IPT_ST_LEN XT_ALIGN(sizeof(struct ipt_standard_target)) 949 if (v > 0)
966#define IPT_ST_COMPAT_LEN COMPAT_XT_ALIGN(sizeof(struct compat_ipt_standard_target)) 950 v += compat_calc_jump(v);
967#define IPT_ST_OFFSET (IPT_ST_LEN - IPT_ST_COMPAT_LEN) 951 memcpy(dst, &v, sizeof(v));
952}
968 953
969static int compat_ipt_standard_fn(void *target, 954static int compat_standard_to_user(void __user *dst, void *src)
970 void **dstptr, int *size, int convert)
971{ 955{
972 struct compat_ipt_standard_target compat_st, *pcompat_st; 956 compat_int_t cv = *(int *)src;
973 struct ipt_standard_target st, *pst;
974 int ret;
975 957
976 ret = 0; 958 if (cv > 0)
977 switch (convert) { 959 cv -= compat_calc_jump(cv);
978 case COMPAT_TO_USER: 960 return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
979 pst = target;
980 memcpy(&compat_st.target, &pst->target,
981 sizeof(compat_st.target));
982 compat_st.verdict = pst->verdict;
983 if (compat_st.verdict > 0)
984 compat_st.verdict -=
985 compat_calc_jump(compat_st.verdict);
986 compat_st.target.u.user.target_size = IPT_ST_COMPAT_LEN;
987 if (copy_to_user(*dstptr, &compat_st, IPT_ST_COMPAT_LEN))
988 ret = -EFAULT;
989 *size -= IPT_ST_OFFSET;
990 *dstptr += IPT_ST_COMPAT_LEN;
991 break;
992 case COMPAT_FROM_USER:
993 pcompat_st = target;
994 memcpy(&st.target, &pcompat_st->target, IPT_ST_COMPAT_LEN);
995 st.verdict = pcompat_st->verdict;
996 if (st.verdict > 0)
997 st.verdict += compat_calc_jump(st.verdict);
998 st.target.u.user.target_size = IPT_ST_LEN;
999 memcpy(*dstptr, &st, IPT_ST_LEN);
1000 *size += IPT_ST_OFFSET;
1001 *dstptr += IPT_ST_LEN;
1002 break;
1003 case COMPAT_CALC_SIZE:
1004 *size += IPT_ST_OFFSET;
1005 break;
1006 default:
1007 ret = -ENOPROTOOPT;
1008 break;
1009 }
1010 return ret;
1011} 961}
1012 962
1013static inline int 963static inline int
1014compat_calc_match(struct ipt_entry_match *m, int * size) 964compat_calc_match(struct ipt_entry_match *m, int * size)
1015{ 965{
1016 if (m->u.kernel.match->compat) 966 *size += xt_compat_match_offset(m->u.kernel.match);
1017 m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
1018 else
1019 xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE);
1020 return 0; 967 return 0;
1021} 968}
1022 969
@@ -1031,10 +978,7 @@ static int compat_calc_entry(struct ipt_entry *e, struct xt_table_info *info,
1031 entry_offset = (void *)e - base; 978 entry_offset = (void *)e - base;
1032 IPT_MATCH_ITERATE(e, compat_calc_match, &off); 979 IPT_MATCH_ITERATE(e, compat_calc_match, &off);
1033 t = ipt_get_target(e); 980 t = ipt_get_target(e);
1034 if (t->u.kernel.target->compat) 981 off += xt_compat_target_offset(t->u.kernel.target);
1035 t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
1036 else
1037 xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE);
1038 newinfo->size -= off; 982 newinfo->size -= off;
1039 ret = compat_add_offset(entry_offset, off); 983 ret = compat_add_offset(entry_offset, off);
1040 if (ret) 984 if (ret)
@@ -1420,17 +1364,13 @@ struct compat_ipt_replace {
1420}; 1364};
1421 1365
1422static inline int compat_copy_match_to_user(struct ipt_entry_match *m, 1366static inline int compat_copy_match_to_user(struct ipt_entry_match *m,
1423 void __user **dstptr, compat_uint_t *size) 1367 void * __user *dstptr, compat_uint_t *size)
1424{ 1368{
1425 if (m->u.kernel.match->compat) 1369 return xt_compat_match_to_user(m, dstptr, size);
1426 return m->u.kernel.match->compat(m, dstptr, size,
1427 COMPAT_TO_USER);
1428 else
1429 return xt_compat_match(m, dstptr, size, COMPAT_TO_USER);
1430} 1370}
1431 1371
1432static int compat_copy_entry_to_user(struct ipt_entry *e, 1372static int compat_copy_entry_to_user(struct ipt_entry *e,
1433 void __user **dstptr, compat_uint_t *size) 1373 void * __user *dstptr, compat_uint_t *size)
1434{ 1374{
1435 struct ipt_entry_target __user *t; 1375 struct ipt_entry_target __user *t;
1436 struct compat_ipt_entry __user *ce; 1376 struct compat_ipt_entry __user *ce;
@@ -1450,11 +1390,7 @@ static int compat_copy_entry_to_user(struct ipt_entry *e,
1450 if (ret) 1390 if (ret)
1451 goto out; 1391 goto out;
1452 t = ipt_get_target(e); 1392 t = ipt_get_target(e);
1453 if (t->u.kernel.target->compat) 1393 ret = xt_compat_target_to_user(t, dstptr, size);
1454 ret = t->u.kernel.target->compat(t, dstptr, size,
1455 COMPAT_TO_USER);
1456 else
1457 ret = xt_compat_target(t, dstptr, size, COMPAT_TO_USER);
1458 if (ret) 1394 if (ret)
1459 goto out; 1395 goto out;
1460 ret = -EFAULT; 1396 ret = -EFAULT;
@@ -1486,11 +1422,7 @@ compat_check_calc_match(struct ipt_entry_match *m,
1486 return match ? PTR_ERR(match) : -ENOENT; 1422 return match ? PTR_ERR(match) : -ENOENT;
1487 } 1423 }
1488 m->u.kernel.match = match; 1424 m->u.kernel.match = match;
1489 1425 *size += xt_compat_match_offset(match);
1490 if (m->u.kernel.match->compat)
1491 m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
1492 else
1493 xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE);
1494 1426
1495 (*i)++; 1427 (*i)++;
1496 return 0; 1428 return 0;
@@ -1537,7 +1469,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
1537 ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip, 1469 ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip,
1538 e->comefrom, &off, &j); 1470 e->comefrom, &off, &j);
1539 if (ret != 0) 1471 if (ret != 0)
1540 goto out; 1472 goto cleanup_matches;
1541 1473
1542 t = ipt_get_target(e); 1474 t = ipt_get_target(e);
1543 target = try_then_request_module(xt_find_target(AF_INET, 1475 target = try_then_request_module(xt_find_target(AF_INET,
@@ -1547,14 +1479,11 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
1547 if (IS_ERR(target) || !target) { 1479 if (IS_ERR(target) || !target) {
1548 duprintf("check_entry: `%s' not found\n", t->u.user.name); 1480 duprintf("check_entry: `%s' not found\n", t->u.user.name);
1549 ret = target ? PTR_ERR(target) : -ENOENT; 1481 ret = target ? PTR_ERR(target) : -ENOENT;
1550 goto out; 1482 goto cleanup_matches;
1551 } 1483 }
1552 t->u.kernel.target = target; 1484 t->u.kernel.target = target;
1553 1485
1554 if (t->u.kernel.target->compat) 1486 off += xt_compat_target_offset(target);
1555 t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
1556 else
1557 xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE);
1558 *size += off; 1487 *size += off;
1559 ret = compat_add_offset(entry_offset, off); 1488 ret = compat_add_offset(entry_offset, off);
1560 if (ret) 1489 if (ret)
@@ -1574,14 +1503,17 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
1574 1503
1575 (*i)++; 1504 (*i)++;
1576 return 0; 1505 return 0;
1506
1577out: 1507out:
1508 module_put(t->u.kernel.target->me);
1509cleanup_matches:
1578 IPT_MATCH_ITERATE(e, cleanup_match, &j); 1510 IPT_MATCH_ITERATE(e, cleanup_match, &j);
1579 return ret; 1511 return ret;
1580} 1512}
1581 1513
1582static inline int compat_copy_match_from_user(struct ipt_entry_match *m, 1514static inline int compat_copy_match_from_user(struct ipt_entry_match *m,
1583 void **dstptr, compat_uint_t *size, const char *name, 1515 void **dstptr, compat_uint_t *size, const char *name,
1584 const struct ipt_ip *ip, unsigned int hookmask) 1516 const struct ipt_ip *ip, unsigned int hookmask, int *i)
1585{ 1517{
1586 struct ipt_entry_match *dm; 1518 struct ipt_entry_match *dm;
1587 struct ipt_match *match; 1519 struct ipt_match *match;
@@ -1589,26 +1521,28 @@ static inline int compat_copy_match_from_user(struct ipt_entry_match *m,
1589 1521
1590 dm = (struct ipt_entry_match *)*dstptr; 1522 dm = (struct ipt_entry_match *)*dstptr;
1591 match = m->u.kernel.match; 1523 match = m->u.kernel.match;
1592 if (match->compat) 1524 xt_compat_match_from_user(m, dstptr, size);
1593 match->compat(m, dstptr, size, COMPAT_FROM_USER);
1594 else
1595 xt_compat_match(m, dstptr, size, COMPAT_FROM_USER);
1596 1525
1597 ret = xt_check_match(match, AF_INET, dm->u.match_size - sizeof(*dm), 1526 ret = xt_check_match(match, AF_INET, dm->u.match_size - sizeof(*dm),
1598 name, hookmask, ip->proto, 1527 name, hookmask, ip->proto,
1599 ip->invflags & IPT_INV_PROTO); 1528 ip->invflags & IPT_INV_PROTO);
1600 if (ret) 1529 if (ret)
1601 return ret; 1530 goto err;
1602 1531
1603 if (m->u.kernel.match->checkentry 1532 if (m->u.kernel.match->checkentry
1604 && !m->u.kernel.match->checkentry(name, ip, match, dm->data, 1533 && !m->u.kernel.match->checkentry(name, ip, match, dm->data,
1605 dm->u.match_size - sizeof(*dm),
1606 hookmask)) { 1534 hookmask)) {
1607 duprintf("ip_tables: check failed for `%s'.\n", 1535 duprintf("ip_tables: check failed for `%s'.\n",
1608 m->u.kernel.match->name); 1536 m->u.kernel.match->name);
1609 return -EINVAL; 1537 ret = -EINVAL;
1538 goto err;
1610 } 1539 }
1540 (*i)++;
1611 return 0; 1541 return 0;
1542
1543err:
1544 module_put(m->u.kernel.match->me);
1545 return ret;
1612} 1546}
1613 1547
1614static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, 1548static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
@@ -1619,25 +1553,23 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
1619 struct ipt_target *target; 1553 struct ipt_target *target;
1620 struct ipt_entry *de; 1554 struct ipt_entry *de;
1621 unsigned int origsize; 1555 unsigned int origsize;
1622 int ret, h; 1556 int ret, h, j;
1623 1557
1624 ret = 0; 1558 ret = 0;
1625 origsize = *size; 1559 origsize = *size;
1626 de = (struct ipt_entry *)*dstptr; 1560 de = (struct ipt_entry *)*dstptr;
1627 memcpy(de, e, sizeof(struct ipt_entry)); 1561 memcpy(de, e, sizeof(struct ipt_entry));
1628 1562
1563 j = 0;
1629 *dstptr += sizeof(struct compat_ipt_entry); 1564 *dstptr += sizeof(struct compat_ipt_entry);
1630 ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size, 1565 ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size,
1631 name, &de->ip, de->comefrom); 1566 name, &de->ip, de->comefrom, &j);
1632 if (ret) 1567 if (ret)
1633 goto out; 1568 goto cleanup_matches;
1634 de->target_offset = e->target_offset - (origsize - *size); 1569 de->target_offset = e->target_offset - (origsize - *size);
1635 t = ipt_get_target(e); 1570 t = ipt_get_target(e);
1636 target = t->u.kernel.target; 1571 target = t->u.kernel.target;
1637 if (target->compat) 1572 xt_compat_target_from_user(t, dstptr, size);
1638 target->compat(t, dstptr, size, COMPAT_FROM_USER);
1639 else
1640 xt_compat_target(t, dstptr, size, COMPAT_FROM_USER);
1641 1573
1642 de->next_offset = e->next_offset - (origsize - *size); 1574 de->next_offset = e->next_offset - (origsize - *size);
1643 for (h = 0; h < NF_IP_NUMHOOKS; h++) { 1575 for (h = 0; h < NF_IP_NUMHOOKS; h++) {
@@ -1653,22 +1585,26 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
1653 name, e->comefrom, e->ip.proto, 1585 name, e->comefrom, e->ip.proto,
1654 e->ip.invflags & IPT_INV_PROTO); 1586 e->ip.invflags & IPT_INV_PROTO);
1655 if (ret) 1587 if (ret)
1656 goto out; 1588 goto err;
1657 1589
1658 ret = -EINVAL; 1590 ret = -EINVAL;
1659 if (t->u.kernel.target == &ipt_standard_target) { 1591 if (t->u.kernel.target == &ipt_standard_target) {
1660 if (!standard_check(t, *size)) 1592 if (!standard_check(t, *size))
1661 goto out; 1593 goto err;
1662 } else if (t->u.kernel.target->checkentry 1594 } else if (t->u.kernel.target->checkentry
1663 && !t->u.kernel.target->checkentry(name, de, target, 1595 && !t->u.kernel.target->checkentry(name, de, target,
1664 t->data, t->u.target_size - sizeof(*t), 1596 t->data, de->comefrom)) {
1665 de->comefrom)) {
1666 duprintf("ip_tables: compat: check failed for `%s'.\n", 1597 duprintf("ip_tables: compat: check failed for `%s'.\n",
1667 t->u.kernel.target->name); 1598 t->u.kernel.target->name);
1668 goto out; 1599 goto err;
1669 } 1600 }
1670 ret = 0; 1601 ret = 0;
1671out: 1602 return ret;
1603
1604err:
1605 module_put(t->u.kernel.target->me);
1606cleanup_matches:
1607 IPT_MATCH_ITERATE(e, cleanup_match, &j);
1672 return ret; 1608 return ret;
1673} 1609}
1674 1610
@@ -1989,6 +1925,8 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
1989 return ret; 1925 return ret;
1990} 1926}
1991 1927
1928static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
1929
1992static int 1930static int
1993compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) 1931compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1994{ 1932{
@@ -2002,8 +1940,7 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2002 ret = compat_get_entries(user, len); 1940 ret = compat_get_entries(user, len);
2003 break; 1941 break;
2004 default: 1942 default:
2005 duprintf("compat_do_ipt_get_ctl: unknown request %i\n", cmd); 1943 ret = do_ipt_get_ctl(sk, cmd, user, len);
2006 ret = -EINVAL;
2007 } 1944 }
2008 return ret; 1945 return ret;
2009} 1946}
@@ -2185,7 +2122,6 @@ icmp_checkentry(const char *tablename,
2185 const void *info, 2122 const void *info,
2186 const struct xt_match *match, 2123 const struct xt_match *match,
2187 void *matchinfo, 2124 void *matchinfo,
2188 unsigned int matchsize,
2189 unsigned int hook_mask) 2125 unsigned int hook_mask)
2190{ 2126{
2191 const struct ipt_icmp *icmpinfo = matchinfo; 2127 const struct ipt_icmp *icmpinfo = matchinfo;
@@ -2200,7 +2136,9 @@ static struct ipt_target ipt_standard_target = {
2200 .targetsize = sizeof(int), 2136 .targetsize = sizeof(int),
2201 .family = AF_INET, 2137 .family = AF_INET,
2202#ifdef CONFIG_COMPAT 2138#ifdef CONFIG_COMPAT
2203 .compat = &compat_ipt_standard_fn, 2139 .compatsize = sizeof(compat_int_t),
2140 .compat_from_user = compat_standard_from_user,
2141 .compat_to_user = compat_standard_to_user,
2204#endif 2142#endif
2205}; 2143};
2206 2144
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index d994c5f5744c..41589665fc5d 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -302,8 +302,7 @@ target(struct sk_buff **pskb,
302 const struct net_device *out, 302 const struct net_device *out,
303 unsigned int hooknum, 303 unsigned int hooknum,
304 const struct xt_target *target, 304 const struct xt_target *target,
305 const void *targinfo, 305 const void *targinfo)
306 void *userinfo)
307{ 306{
308 const struct ipt_clusterip_tgt_info *cipinfo = targinfo; 307 const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
309 enum ip_conntrack_info ctinfo; 308 enum ip_conntrack_info ctinfo;
@@ -373,7 +372,6 @@ checkentry(const char *tablename,
373 const void *e_void, 372 const void *e_void,
374 const struct xt_target *target, 373 const struct xt_target *target,
375 void *targinfo, 374 void *targinfo,
376 unsigned int targinfosize,
377 unsigned int hook_mask) 375 unsigned int hook_mask)
378{ 376{
379 struct ipt_clusterip_tgt_info *cipinfo = targinfo; 377 struct ipt_clusterip_tgt_info *cipinfo = targinfo;
@@ -450,8 +448,7 @@ checkentry(const char *tablename,
450} 448}
451 449
452/* drop reference count of cluster config when rule is deleted */ 450/* drop reference count of cluster config when rule is deleted */
453static void destroy(const struct xt_target *target, void *targinfo, 451static void destroy(const struct xt_target *target, void *targinfo)
454 unsigned int targinfosize)
455{ 452{
456 struct ipt_clusterip_tgt_info *cipinfo = targinfo; 453 struct ipt_clusterip_tgt_info *cipinfo = targinfo;
457 454
diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c
deleted file mode 100644
index c8e971288dfe..000000000000
--- a/net/ipv4/netfilter/ipt_DSCP.c
+++ /dev/null
@@ -1,96 +0,0 @@
1/* iptables module for setting the IPv4 DSCP field, Version 1.8
2 *
3 * (C) 2002 by Harald Welte <laforge@netfilter.org>
4 * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * See RFC2474 for a description of the DSCP field within the IP Header.
11 *
12 * ipt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp
13*/
14
15#include <linux/module.h>
16#include <linux/skbuff.h>
17#include <linux/ip.h>
18#include <net/checksum.h>
19
20#include <linux/netfilter_ipv4/ip_tables.h>
21#include <linux/netfilter_ipv4/ipt_DSCP.h>
22
23MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
24MODULE_DESCRIPTION("iptables DSCP modification module");
25MODULE_LICENSE("GPL");
26
27static unsigned int
28target(struct sk_buff **pskb,
29 const struct net_device *in,
30 const struct net_device *out,
31 unsigned int hooknum,
32 const struct xt_target *target,
33 const void *targinfo,
34 void *userinfo)
35{
36 const struct ipt_DSCP_info *dinfo = targinfo;
37 u_int8_t sh_dscp = ((dinfo->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK);
38
39
40 if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) {
41 u_int16_t diffs[2];
42
43 if (!skb_make_writable(pskb, sizeof(struct iphdr)))
44 return NF_DROP;
45
46 diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
47 (*pskb)->nh.iph->tos = ((*pskb)->nh.iph->tos & ~IPT_DSCP_MASK)
48 | sh_dscp;
49 diffs[1] = htons((*pskb)->nh.iph->tos);
50 (*pskb)->nh.iph->check
51 = csum_fold(csum_partial((char *)diffs,
52 sizeof(diffs),
53 (*pskb)->nh.iph->check
54 ^ 0xFFFF));
55 }
56 return IPT_CONTINUE;
57}
58
59static int
60checkentry(const char *tablename,
61 const void *e_void,
62 const struct xt_target *target,
63 void *targinfo,
64 unsigned int targinfosize,
65 unsigned int hook_mask)
66{
67 const u_int8_t dscp = ((struct ipt_DSCP_info *)targinfo)->dscp;
68
69 if ((dscp > IPT_DSCP_MAX)) {
70 printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp);
71 return 0;
72 }
73 return 1;
74}
75
76static struct ipt_target ipt_dscp_reg = {
77 .name = "DSCP",
78 .target = target,
79 .targetsize = sizeof(struct ipt_DSCP_info),
80 .table = "mangle",
81 .checkentry = checkentry,
82 .me = THIS_MODULE,
83};
84
85static int __init ipt_dscp_init(void)
86{
87 return ipt_register_target(&ipt_dscp_reg);
88}
89
90static void __exit ipt_dscp_fini(void)
91{
92 ipt_unregister_target(&ipt_dscp_reg);
93}
94
95module_init(ipt_dscp_init);
96module_exit(ipt_dscp_fini);
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 4adf5c9d34f5..23f9c7ebe7eb 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -27,32 +27,28 @@ MODULE_DESCRIPTION("iptables ECN modification module");
27static inline int 27static inline int
28set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) 28set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
29{ 29{
30 if (((*pskb)->nh.iph->tos & IPT_ECN_IP_MASK) 30 struct iphdr *iph = (*pskb)->nh.iph;
31 != (einfo->ip_ect & IPT_ECN_IP_MASK)) { 31 u_int16_t oldtos;
32 u_int16_t diffs[2];
33 32
33 if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
34 if (!skb_make_writable(pskb, sizeof(struct iphdr))) 34 if (!skb_make_writable(pskb, sizeof(struct iphdr)))
35 return 0; 35 return 0;
36 36 iph = (*pskb)->nh.iph;
37 diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; 37 oldtos = iph->tos;
38 (*pskb)->nh.iph->tos &= ~IPT_ECN_IP_MASK; 38 iph->tos &= ~IPT_ECN_IP_MASK;
39 (*pskb)->nh.iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); 39 iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
40 diffs[1] = htons((*pskb)->nh.iph->tos); 40 iph->check = nf_csum_update(oldtos ^ 0xFFFF, iph->tos,
41 (*pskb)->nh.iph->check 41 iph->check);
42 = csum_fold(csum_partial((char *)diffs,
43 sizeof(diffs),
44 (*pskb)->nh.iph->check
45 ^0xFFFF));
46 } 42 }
47 return 1; 43 return 1;
48} 44}
49 45
50/* Return 0 if there was an error. */ 46/* Return 0 if there was an error. */
51static inline int 47static inline int
52set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) 48set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
53{ 49{
54 struct tcphdr _tcph, *tcph; 50 struct tcphdr _tcph, *tcph;
55 u_int16_t diffs[2]; 51 u_int16_t oldval;
56 52
57 /* Not enought header? */ 53 /* Not enought header? */
58 tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, 54 tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
@@ -70,22 +66,16 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward)
70 return 0; 66 return 0;
71 tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4; 67 tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4;
72 68
73 if ((*pskb)->ip_summed == CHECKSUM_HW && 69 oldval = ((u_int16_t *)tcph)[6];
74 skb_checksum_help(*pskb, inward))
75 return 0;
76
77 diffs[0] = ((u_int16_t *)tcph)[6];
78 if (einfo->operation & IPT_ECN_OP_SET_ECE) 70 if (einfo->operation & IPT_ECN_OP_SET_ECE)
79 tcph->ece = einfo->proto.tcp.ece; 71 tcph->ece = einfo->proto.tcp.ece;
80 if (einfo->operation & IPT_ECN_OP_SET_CWR) 72 if (einfo->operation & IPT_ECN_OP_SET_CWR)
81 tcph->cwr = einfo->proto.tcp.cwr; 73 tcph->cwr = einfo->proto.tcp.cwr;
82 diffs[1] = ((u_int16_t *)tcph)[6];
83 diffs[0] = diffs[0] ^ 0xFFFF;
84 74
85 if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) 75 tcph->check = nf_proto_csum_update((*pskb),
86 tcph->check = csum_fold(csum_partial((char *)diffs, 76 oldval ^ 0xFFFF,
87 sizeof(diffs), 77 ((u_int16_t *)tcph)[6],
88 tcph->check^0xFFFF)); 78 tcph->check, 0);
89 return 1; 79 return 1;
90} 80}
91 81
@@ -95,8 +85,7 @@ target(struct sk_buff **pskb,
95 const struct net_device *out, 85 const struct net_device *out,
96 unsigned int hooknum, 86 unsigned int hooknum,
97 const struct xt_target *target, 87 const struct xt_target *target,
98 const void *targinfo, 88 const void *targinfo)
99 void *userinfo)
100{ 89{
101 const struct ipt_ECN_info *einfo = targinfo; 90 const struct ipt_ECN_info *einfo = targinfo;
102 91
@@ -106,7 +95,7 @@ target(struct sk_buff **pskb,
106 95
107 if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) 96 if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR)
108 && (*pskb)->nh.iph->protocol == IPPROTO_TCP) 97 && (*pskb)->nh.iph->protocol == IPPROTO_TCP)
109 if (!set_ect_tcp(pskb, einfo, (out == NULL))) 98 if (!set_ect_tcp(pskb, einfo))
110 return NF_DROP; 99 return NF_DROP;
111 100
112 return IPT_CONTINUE; 101 return IPT_CONTINUE;
@@ -117,7 +106,6 @@ checkentry(const char *tablename,
117 const void *e_void, 106 const void *e_void,
118 const struct xt_target *target, 107 const struct xt_target *target,
119 void *targinfo, 108 void *targinfo,
120 unsigned int targinfosize,
121 unsigned int hook_mask) 109 unsigned int hook_mask)
122{ 110{
123 const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo; 111 const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo;
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index b98f7b08b084..7dc820df8bc5 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -416,8 +416,7 @@ ipt_log_target(struct sk_buff **pskb,
416 const struct net_device *out, 416 const struct net_device *out,
417 unsigned int hooknum, 417 unsigned int hooknum,
418 const struct xt_target *target, 418 const struct xt_target *target,
419 const void *targinfo, 419 const void *targinfo)
420 void *userinfo)
421{ 420{
422 const struct ipt_log_info *loginfo = targinfo; 421 const struct ipt_log_info *loginfo = targinfo;
423 struct nf_loginfo li; 422 struct nf_loginfo li;
@@ -440,7 +439,6 @@ static int ipt_log_checkentry(const char *tablename,
440 const void *e, 439 const void *e,
441 const struct xt_target *target, 440 const struct xt_target *target,
442 void *targinfo, 441 void *targinfo,
443 unsigned int targinfosize,
444 unsigned int hook_mask) 442 unsigned int hook_mask)
445{ 443{
446 const struct ipt_log_info *loginfo = targinfo; 444 const struct ipt_log_info *loginfo = targinfo;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index ebd94f2abf0d..bc65168a3437 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -42,7 +42,6 @@ masquerade_check(const char *tablename,
42 const void *e, 42 const void *e,
43 const struct xt_target *target, 43 const struct xt_target *target,
44 void *targinfo, 44 void *targinfo,
45 unsigned int targinfosize,
46 unsigned int hook_mask) 45 unsigned int hook_mask)
47{ 46{
48 const struct ip_nat_multi_range_compat *mr = targinfo; 47 const struct ip_nat_multi_range_compat *mr = targinfo;
@@ -64,8 +63,7 @@ masquerade_target(struct sk_buff **pskb,
64 const struct net_device *out, 63 const struct net_device *out,
65 unsigned int hooknum, 64 unsigned int hooknum,
66 const struct xt_target *target, 65 const struct xt_target *target,
67 const void *targinfo, 66 const void *targinfo)
68 void *userinfo)
69{ 67{
70 struct ip_conntrack *ct; 68 struct ip_conntrack *ct;
71 enum ip_conntrack_info ctinfo; 69 enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 736c4b5a86a7..beb2914225ff 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -33,7 +33,6 @@ check(const char *tablename,
33 const void *e, 33 const void *e,
34 const struct xt_target *target, 34 const struct xt_target *target,
35 void *targinfo, 35 void *targinfo,
36 unsigned int targinfosize,
37 unsigned int hook_mask) 36 unsigned int hook_mask)
38{ 37{
39 const struct ip_nat_multi_range_compat *mr = targinfo; 38 const struct ip_nat_multi_range_compat *mr = targinfo;
@@ -55,8 +54,7 @@ target(struct sk_buff **pskb,
55 const struct net_device *out, 54 const struct net_device *out,
56 unsigned int hooknum, 55 unsigned int hooknum,
57 const struct xt_target *target, 56 const struct xt_target *target,
58 const void *targinfo, 57 const void *targinfo)
59 void *userinfo)
60{ 58{
61 struct ip_conntrack *ct; 59 struct ip_conntrack *ct;
62 enum ip_conntrack_info ctinfo; 60 enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index f290463232de..f03d43671c6d 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -36,7 +36,6 @@ redirect_check(const char *tablename,
36 const void *e, 36 const void *e,
37 const struct xt_target *target, 37 const struct xt_target *target,
38 void *targinfo, 38 void *targinfo,
39 unsigned int targinfosize,
40 unsigned int hook_mask) 39 unsigned int hook_mask)
41{ 40{
42 const struct ip_nat_multi_range_compat *mr = targinfo; 41 const struct ip_nat_multi_range_compat *mr = targinfo;
@@ -58,8 +57,7 @@ redirect_target(struct sk_buff **pskb,
58 const struct net_device *out, 57 const struct net_device *out,
59 unsigned int hooknum, 58 unsigned int hooknum,
60 const struct xt_target *target, 59 const struct xt_target *target,
61 const void *targinfo, 60 const void *targinfo)
62 void *userinfo)
63{ 61{
64 struct ip_conntrack *ct; 62 struct ip_conntrack *ct;
65 enum ip_conntrack_info ctinfo; 63 enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 269bc2067cb8..b81821edd893 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -90,6 +90,7 @@ static inline struct rtable *route_reverse(struct sk_buff *skb,
90 fl.proto = IPPROTO_TCP; 90 fl.proto = IPPROTO_TCP;
91 fl.fl_ip_sport = tcph->dest; 91 fl.fl_ip_sport = tcph->dest;
92 fl.fl_ip_dport = tcph->source; 92 fl.fl_ip_dport = tcph->source;
93 security_skb_classify_flow(skb, &fl);
93 94
94 xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); 95 xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0);
95 96
@@ -184,6 +185,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
184 tcph->urg_ptr = 0; 185 tcph->urg_ptr = 0;
185 186
186 /* Adjust TCP checksum */ 187 /* Adjust TCP checksum */
188 nskb->ip_summed = CHECKSUM_NONE;
187 tcph->check = 0; 189 tcph->check = 0;
188 tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr), 190 tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr),
189 nskb->nh.iph->saddr, 191 nskb->nh.iph->saddr,
@@ -226,8 +228,7 @@ static unsigned int reject(struct sk_buff **pskb,
226 const struct net_device *out, 228 const struct net_device *out,
227 unsigned int hooknum, 229 unsigned int hooknum,
228 const struct xt_target *target, 230 const struct xt_target *target,
229 const void *targinfo, 231 const void *targinfo)
230 void *userinfo)
231{ 232{
232 const struct ipt_reject_info *reject = targinfo; 233 const struct ipt_reject_info *reject = targinfo;
233 234
@@ -275,7 +276,6 @@ static int check(const char *tablename,
275 const void *e_void, 276 const void *e_void,
276 const struct xt_target *target, 277 const struct xt_target *target,
277 void *targinfo, 278 void *targinfo,
278 unsigned int targinfosize,
279 unsigned int hook_mask) 279 unsigned int hook_mask)
280{ 280{
281 const struct ipt_reject_info *rejinfo = targinfo; 281 const struct ipt_reject_info *rejinfo = targinfo;
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index 7169b09b5a67..efbcb1198832 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -52,7 +52,6 @@ same_check(const char *tablename,
52 const void *e, 52 const void *e,
53 const struct xt_target *target, 53 const struct xt_target *target,
54 void *targinfo, 54 void *targinfo,
55 unsigned int targinfosize,
56 unsigned int hook_mask) 55 unsigned int hook_mask)
57{ 56{
58 unsigned int count, countess, rangeip, index = 0; 57 unsigned int count, countess, rangeip, index = 0;
@@ -116,8 +115,7 @@ same_check(const char *tablename,
116} 115}
117 116
118static void 117static void
119same_destroy(const struct xt_target *target, void *targinfo, 118same_destroy(const struct xt_target *target, void *targinfo)
120 unsigned int targinfosize)
121{ 119{
122 struct ipt_same_info *mr = targinfo; 120 struct ipt_same_info *mr = targinfo;
123 121
@@ -133,8 +131,7 @@ same_target(struct sk_buff **pskb,
133 const struct net_device *out, 131 const struct net_device *out,
134 unsigned int hooknum, 132 unsigned int hooknum,
135 const struct xt_target *target, 133 const struct xt_target *target,
136 const void *targinfo, 134 const void *targinfo)
137 void *userinfo)
138{ 135{
139 struct ip_conntrack *ct; 136 struct ip_conntrack *ct;
140 enum ip_conntrack_info ctinfo; 137 enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
index ef2fe5b3f0d8..4246c4321e5b 100644
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -21,26 +21,14 @@ MODULE_LICENSE("GPL");
21MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); 21MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
22MODULE_DESCRIPTION("iptables TCP MSS modification module"); 22MODULE_DESCRIPTION("iptables TCP MSS modification module");
23 23
24#if 0
25#define DEBUGP printk
26#else
27#define DEBUGP(format, args...)
28#endif
29
30static u_int16_t
31cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
32{
33 u_int32_t diffs[] = { oldvalinv, newval };
34 return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
35 oldcheck^0xFFFF));
36}
37
38static inline unsigned int 24static inline unsigned int
39optlen(const u_int8_t *opt, unsigned int offset) 25optlen(const u_int8_t *opt, unsigned int offset)
40{ 26{
41 /* Beware zero-length options: make finite progress */ 27 /* Beware zero-length options: make finite progress */
42 if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) return 1; 28 if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
43 else return opt[offset+1]; 29 return 1;
30 else
31 return opt[offset+1];
44} 32}
45 33
46static unsigned int 34static unsigned int
@@ -49,8 +37,7 @@ ipt_tcpmss_target(struct sk_buff **pskb,
49 const struct net_device *out, 37 const struct net_device *out,
50 unsigned int hooknum, 38 unsigned int hooknum,
51 const struct xt_target *target, 39 const struct xt_target *target,
52 const void *targinfo, 40 const void *targinfo)
53 void *userinfo)
54{ 41{
55 const struct ipt_tcpmss_info *tcpmssinfo = targinfo; 42 const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
56 struct tcphdr *tcph; 43 struct tcphdr *tcph;
@@ -62,13 +49,8 @@ ipt_tcpmss_target(struct sk_buff **pskb,
62 if (!skb_make_writable(pskb, (*pskb)->len)) 49 if (!skb_make_writable(pskb, (*pskb)->len))
63 return NF_DROP; 50 return NF_DROP;
64 51
65 if ((*pskb)->ip_summed == CHECKSUM_HW &&
66 skb_checksum_help(*pskb, out == NULL))
67 return NF_DROP;
68
69 iph = (*pskb)->nh.iph; 52 iph = (*pskb)->nh.iph;
70 tcplen = (*pskb)->len - iph->ihl*4; 53 tcplen = (*pskb)->len - iph->ihl*4;
71
72 tcph = (void *)iph + iph->ihl*4; 54 tcph = (void *)iph + iph->ihl*4;
73 55
74 /* Since it passed flags test in tcp match, we know it is is 56 /* Since it passed flags test in tcp match, we know it is is
@@ -84,54 +66,41 @@ ipt_tcpmss_target(struct sk_buff **pskb,
84 return NF_DROP; 66 return NF_DROP;
85 } 67 }
86 68
87 if(tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) { 69 if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) {
88 if(!(*pskb)->dst) { 70 if (dst_mtu((*pskb)->dst) <= sizeof(struct iphdr) +
71 sizeof(struct tcphdr)) {
89 if (net_ratelimit()) 72 if (net_ratelimit())
90 printk(KERN_ERR 73 printk(KERN_ERR "ipt_tcpmss_target: "
91 "ipt_tcpmss_target: no dst?! can't determine path-MTU\n"); 74 "unknown or invalid path-MTU (%d)\n",
75 dst_mtu((*pskb)->dst));
92 return NF_DROP; /* or IPT_CONTINUE ?? */ 76 return NF_DROP; /* or IPT_CONTINUE ?? */
93 } 77 }
94 78
95 if(dst_mtu((*pskb)->dst) <= (sizeof(struct iphdr) + sizeof(struct tcphdr))) { 79 newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) -
96 if (net_ratelimit()) 80 sizeof(struct tcphdr);
97 printk(KERN_ERR
98 "ipt_tcpmss_target: unknown or invalid path-MTU (%d)\n", dst_mtu((*pskb)->dst));
99 return NF_DROP; /* or IPT_CONTINUE ?? */
100 }
101
102 newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) - sizeof(struct tcphdr);
103 } else 81 } else
104 newmss = tcpmssinfo->mss; 82 newmss = tcpmssinfo->mss;
105 83
106 opt = (u_int8_t *)tcph; 84 opt = (u_int8_t *)tcph;
107 for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)){ 85 for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) {
108 if ((opt[i] == TCPOPT_MSS) && 86 if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS &&
109 ((tcph->doff*4 - i) >= TCPOLEN_MSS) && 87 opt[i+1] == TCPOLEN_MSS) {
110 (opt[i+1] == TCPOLEN_MSS)) {
111 u_int16_t oldmss; 88 u_int16_t oldmss;
112 89
113 oldmss = (opt[i+2] << 8) | opt[i+3]; 90 oldmss = (opt[i+2] << 8) | opt[i+3];
114 91
115 if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) && 92 if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU &&
116 (oldmss <= newmss)) 93 oldmss <= newmss)
117 return IPT_CONTINUE; 94 return IPT_CONTINUE;
118 95
119 opt[i+2] = (newmss & 0xff00) >> 8; 96 opt[i+2] = (newmss & 0xff00) >> 8;
120 opt[i+3] = (newmss & 0x00ff); 97 opt[i+3] = (newmss & 0x00ff);
121 98
122 tcph->check = cheat_check(htons(oldmss)^0xFFFF, 99 tcph->check = nf_proto_csum_update(*pskb,
123 htons(newmss), 100 htons(oldmss)^0xFFFF,
124 tcph->check); 101 htons(newmss),
125 102 tcph->check, 0);
126 DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu" 103 return IPT_CONTINUE;
127 "->%u.%u.%u.%u:%hu changed TCP MSS option"
128 " (from %u to %u)\n",
129 NIPQUAD((*pskb)->nh.iph->saddr),
130 ntohs(tcph->source),
131 NIPQUAD((*pskb)->nh.iph->daddr),
132 ntohs(tcph->dest),
133 oldmss, newmss);
134 goto retmodified;
135 } 104 }
136 } 105 }
137 106
@@ -143,13 +112,8 @@ ipt_tcpmss_target(struct sk_buff **pskb,
143 112
144 newskb = skb_copy_expand(*pskb, skb_headroom(*pskb), 113 newskb = skb_copy_expand(*pskb, skb_headroom(*pskb),
145 TCPOLEN_MSS, GFP_ATOMIC); 114 TCPOLEN_MSS, GFP_ATOMIC);
146 if (!newskb) { 115 if (!newskb)
147 if (net_ratelimit())
148 printk(KERN_ERR "ipt_tcpmss_target:"
149 " unable to allocate larger skb\n");
150 return NF_DROP; 116 return NF_DROP;
151 }
152
153 kfree_skb(*pskb); 117 kfree_skb(*pskb);
154 *pskb = newskb; 118 *pskb = newskb;
155 iph = (*pskb)->nh.iph; 119 iph = (*pskb)->nh.iph;
@@ -161,36 +125,29 @@ ipt_tcpmss_target(struct sk_buff **pskb,
161 opt = (u_int8_t *)tcph + sizeof(struct tcphdr); 125 opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
162 memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); 126 memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
163 127
164 tcph->check = cheat_check(htons(tcplen) ^ 0xFFFF, 128 tcph->check = nf_proto_csum_update(*pskb,
165 htons(tcplen + TCPOLEN_MSS), tcph->check); 129 htons(tcplen) ^ 0xFFFF,
166 tcplen += TCPOLEN_MSS; 130 htons(tcplen + TCPOLEN_MSS),
167 131 tcph->check, 1);
168 opt[0] = TCPOPT_MSS; 132 opt[0] = TCPOPT_MSS;
169 opt[1] = TCPOLEN_MSS; 133 opt[1] = TCPOLEN_MSS;
170 opt[2] = (newmss & 0xff00) >> 8; 134 opt[2] = (newmss & 0xff00) >> 8;
171 opt[3] = (newmss & 0x00ff); 135 opt[3] = (newmss & 0x00ff);
172 136
173 tcph->check = cheat_check(~0, *((u_int32_t *)opt), tcph->check); 137 tcph->check = nf_proto_csum_update(*pskb, ~0, *((u_int32_t *)opt),
138 tcph->check, 0);
174 139
175 oldval = ((u_int16_t *)tcph)[6]; 140 oldval = ((u_int16_t *)tcph)[6];
176 tcph->doff += TCPOLEN_MSS/4; 141 tcph->doff += TCPOLEN_MSS/4;
177 tcph->check = cheat_check(oldval ^ 0xFFFF, 142 tcph->check = nf_proto_csum_update(*pskb,
178 ((u_int16_t *)tcph)[6], tcph->check); 143 oldval ^ 0xFFFF,
144 ((u_int16_t *)tcph)[6],
145 tcph->check, 0);
179 146
180 newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS); 147 newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS);
181 iph->check = cheat_check(iph->tot_len ^ 0xFFFF, 148 iph->check = nf_csum_update(iph->tot_len ^ 0xFFFF,
182 newtotlen, iph->check); 149 newtotlen, iph->check);
183 iph->tot_len = newtotlen; 150 iph->tot_len = newtotlen;
184
185 DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu"
186 "->%u.%u.%u.%u:%hu added TCP MSS option (%u)\n",
187 NIPQUAD((*pskb)->nh.iph->saddr),
188 ntohs(tcph->source),
189 NIPQUAD((*pskb)->nh.iph->daddr),
190 ntohs(tcph->dest),
191 newmss);
192
193 retmodified:
194 return IPT_CONTINUE; 151 return IPT_CONTINUE;
195} 152}
196 153
@@ -200,9 +157,9 @@ static inline int find_syn_match(const struct ipt_entry_match *m)
200{ 157{
201 const struct ipt_tcp *tcpinfo = (const struct ipt_tcp *)m->data; 158 const struct ipt_tcp *tcpinfo = (const struct ipt_tcp *)m->data;
202 159
203 if (strcmp(m->u.kernel.match->name, "tcp") == 0 160 if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
204 && (tcpinfo->flg_cmp & TH_SYN) 161 tcpinfo->flg_cmp & TH_SYN &&
205 && !(tcpinfo->invflags & IPT_TCP_INV_FLAGS)) 162 !(tcpinfo->invflags & IPT_TCP_INV_FLAGS))
206 return 1; 163 return 1;
207 164
208 return 0; 165 return 0;
@@ -214,17 +171,17 @@ ipt_tcpmss_checkentry(const char *tablename,
214 const void *e_void, 171 const void *e_void,
215 const struct xt_target *target, 172 const struct xt_target *target,
216 void *targinfo, 173 void *targinfo,
217 unsigned int targinfosize,
218 unsigned int hook_mask) 174 unsigned int hook_mask)
219{ 175{
220 const struct ipt_tcpmss_info *tcpmssinfo = targinfo; 176 const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
221 const struct ipt_entry *e = e_void; 177 const struct ipt_entry *e = e_void;
222 178
223 if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) && 179 if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU &&
224 ((hook_mask & ~((1 << NF_IP_FORWARD) 180 (hook_mask & ~((1 << NF_IP_FORWARD) |
225 | (1 << NF_IP_LOCAL_OUT) 181 (1 << NF_IP_LOCAL_OUT) |
226 | (1 << NF_IP_POST_ROUTING))) != 0)) { 182 (1 << NF_IP_POST_ROUTING))) != 0) {
227 printk("TCPMSS: path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); 183 printk("TCPMSS: path-MTU clamping only supported in "
184 "FORWARD, OUTPUT and POSTROUTING hooks\n");
228 return 0; 185 return 0;
229 } 186 }
230 187
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index 1c7a5ca399b3..471a4c438b0a 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -26,27 +26,20 @@ target(struct sk_buff **pskb,
26 const struct net_device *out, 26 const struct net_device *out,
27 unsigned int hooknum, 27 unsigned int hooknum,
28 const struct xt_target *target, 28 const struct xt_target *target,
29 const void *targinfo, 29 const void *targinfo)
30 void *userinfo)
31{ 30{
32 const struct ipt_tos_target_info *tosinfo = targinfo; 31 const struct ipt_tos_target_info *tosinfo = targinfo;
32 struct iphdr *iph = (*pskb)->nh.iph;
33 u_int16_t oldtos;
33 34
34 if (((*pskb)->nh.iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { 35 if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
35 u_int16_t diffs[2];
36
37 if (!skb_make_writable(pskb, sizeof(struct iphdr))) 36 if (!skb_make_writable(pskb, sizeof(struct iphdr)))
38 return NF_DROP; 37 return NF_DROP;
39 38 iph = (*pskb)->nh.iph;
40 diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; 39 oldtos = iph->tos;
41 (*pskb)->nh.iph->tos 40 iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
42 = ((*pskb)->nh.iph->tos & IPTOS_PREC_MASK) 41 iph->check = nf_csum_update(oldtos ^ 0xFFFF, iph->tos,
43 | tosinfo->tos; 42 iph->check);
44 diffs[1] = htons((*pskb)->nh.iph->tos);
45 (*pskb)->nh.iph->check
46 = csum_fold(csum_partial((char *)diffs,
47 sizeof(diffs),
48 (*pskb)->nh.iph->check
49 ^0xFFFF));
50 } 43 }
51 return IPT_CONTINUE; 44 return IPT_CONTINUE;
52} 45}
@@ -56,7 +49,6 @@ checkentry(const char *tablename,
56 const void *e_void, 49 const void *e_void,
57 const struct xt_target *target, 50 const struct xt_target *target,
58 void *targinfo, 51 void *targinfo,
59 unsigned int targinfosize,
60 unsigned int hook_mask) 52 unsigned int hook_mask)
61{ 53{
62 const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos; 54 const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos;
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index f48892ae0be5..96e79cc6d0f2 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -23,11 +23,10 @@ static unsigned int
23ipt_ttl_target(struct sk_buff **pskb, 23ipt_ttl_target(struct sk_buff **pskb,
24 const struct net_device *in, const struct net_device *out, 24 const struct net_device *in, const struct net_device *out,
25 unsigned int hooknum, const struct xt_target *target, 25 unsigned int hooknum, const struct xt_target *target,
26 const void *targinfo, void *userinfo) 26 const void *targinfo)
27{ 27{
28 struct iphdr *iph; 28 struct iphdr *iph;
29 const struct ipt_TTL_info *info = targinfo; 29 const struct ipt_TTL_info *info = targinfo;
30 u_int16_t diffs[2];
31 int new_ttl; 30 int new_ttl;
32 31
33 if (!skb_make_writable(pskb, (*pskb)->len)) 32 if (!skb_make_writable(pskb, (*pskb)->len))
@@ -55,12 +54,10 @@ ipt_ttl_target(struct sk_buff **pskb,
55 } 54 }
56 55
57 if (new_ttl != iph->ttl) { 56 if (new_ttl != iph->ttl) {
58 diffs[0] = htons(((unsigned)iph->ttl) << 8) ^ 0xFFFF; 57 iph->check = nf_csum_update(ntohs((iph->ttl << 8)) ^ 0xFFFF,
58 ntohs(new_ttl << 8),
59 iph->check);
59 iph->ttl = new_ttl; 60 iph->ttl = new_ttl;
60 diffs[1] = htons(((unsigned)iph->ttl) << 8);
61 iph->check = csum_fold(csum_partial((char *)diffs,
62 sizeof(diffs),
63 iph->check^0xFFFF));
64 } 61 }
65 62
66 return IPT_CONTINUE; 63 return IPT_CONTINUE;
@@ -70,7 +67,6 @@ static int ipt_ttl_checkentry(const char *tablename,
70 const void *e, 67 const void *e,
71 const struct xt_target *target, 68 const struct xt_target *target,
72 void *targinfo, 69 void *targinfo,
73 unsigned int targinfosize,
74 unsigned int hook_mask) 70 unsigned int hook_mask)
75{ 71{
76 struct ipt_TTL_info *info = targinfo; 72 struct ipt_TTL_info *info = targinfo;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index d46fd677fa11..2b104ea54f48 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -308,7 +308,7 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb,
308 const struct net_device *out, 308 const struct net_device *out,
309 unsigned int hooknum, 309 unsigned int hooknum,
310 const struct xt_target *target, 310 const struct xt_target *target,
311 const void *targinfo, void *userinfo) 311 const void *targinfo)
312{ 312{
313 struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; 313 struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
314 314
@@ -346,7 +346,6 @@ static int ipt_ulog_checkentry(const char *tablename,
346 const void *e, 346 const void *e,
347 const struct xt_target *target, 347 const struct xt_target *target,
348 void *targinfo, 348 void *targinfo,
349 unsigned int targinfosize,
350 unsigned int hookmask) 349 unsigned int hookmask)
351{ 350{
352 struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; 351 struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 2927135873d7..1798f86bc534 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -74,7 +74,6 @@ checkentry(const char *tablename,
74 const void *ip_void, 74 const void *ip_void,
75 const struct xt_match *match, 75 const struct xt_match *match,
76 void *matchinfo, 76 void *matchinfo,
77 unsigned int matchinfosize,
78 unsigned int hook_mask) 77 unsigned int hook_mask)
79{ 78{
80 const struct ipt_ah *ahinfo = matchinfo; 79 const struct ipt_ah *ahinfo = matchinfo;
diff --git a/net/ipv4/netfilter/ipt_dscp.c b/net/ipv4/netfilter/ipt_dscp.c
deleted file mode 100644
index 47177591aeb6..000000000000
--- a/net/ipv4/netfilter/ipt_dscp.c
+++ /dev/null
@@ -1,54 +0,0 @@
1/* IP tables module for matching the value of the IPv4 DSCP field
2 *
3 * ipt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp
4 *
5 * (C) 2002 by Harald Welte <laforge@netfilter.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/module.h>
13#include <linux/skbuff.h>
14
15#include <linux/netfilter_ipv4/ipt_dscp.h>
16#include <linux/netfilter_ipv4/ip_tables.h>
17
18MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
19MODULE_DESCRIPTION("iptables DSCP matching module");
20MODULE_LICENSE("GPL");
21
22static int match(const struct sk_buff *skb,
23 const struct net_device *in, const struct net_device *out,
24 const struct xt_match *match, const void *matchinfo,
25 int offset, unsigned int protoff, int *hotdrop)
26{
27 const struct ipt_dscp_info *info = matchinfo;
28 const struct iphdr *iph = skb->nh.iph;
29
30 u_int8_t sh_dscp = ((info->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK);
31
32 return ((iph->tos&IPT_DSCP_MASK) == sh_dscp) ^ info->invert;
33}
34
35static struct ipt_match dscp_match = {
36 .name = "dscp",
37 .match = match,
38 .matchsize = sizeof(struct ipt_dscp_info),
39 .me = THIS_MODULE,
40};
41
42static int __init ipt_dscp_init(void)
43{
44 return ipt_register_match(&dscp_match);
45}
46
47static void __exit ipt_dscp_fini(void)
48{
49 ipt_unregister_match(&dscp_match);
50
51}
52
53module_init(ipt_dscp_init);
54module_exit(ipt_dscp_fini);
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index b28250414933..dafbdec0efc0 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -88,8 +88,7 @@ static int match(const struct sk_buff *skb,
88 88
89static int checkentry(const char *tablename, const void *ip_void, 89static int checkentry(const char *tablename, const void *ip_void,
90 const struct xt_match *match, 90 const struct xt_match *match,
91 void *matchinfo, unsigned int matchsize, 91 void *matchinfo, unsigned int hook_mask)
92 unsigned int hook_mask)
93{ 92{
94 const struct ipt_ecn_info *info = matchinfo; 93 const struct ipt_ecn_info *info = matchinfo;
95 const struct ipt_ip *ip = ip_void; 94 const struct ipt_ip *ip = ip_void;
diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c
index 3bd2368e1fc9..4f73a61aa3dd 100644
--- a/net/ipv4/netfilter/ipt_hashlimit.c
+++ b/net/ipv4/netfilter/ipt_hashlimit.c
@@ -478,7 +478,6 @@ hashlimit_checkentry(const char *tablename,
478 const void *inf, 478 const void *inf,
479 const struct xt_match *match, 479 const struct xt_match *match,
480 void *matchinfo, 480 void *matchinfo,
481 unsigned int matchsize,
482 unsigned int hook_mask) 481 unsigned int hook_mask)
483{ 482{
484 struct ipt_hashlimit_info *r = matchinfo; 483 struct ipt_hashlimit_info *r = matchinfo;
@@ -529,18 +528,46 @@ hashlimit_checkentry(const char *tablename,
529} 528}
530 529
531static void 530static void
532hashlimit_destroy(const struct xt_match *match, void *matchinfo, 531hashlimit_destroy(const struct xt_match *match, void *matchinfo)
533 unsigned int matchsize)
534{ 532{
535 struct ipt_hashlimit_info *r = matchinfo; 533 struct ipt_hashlimit_info *r = matchinfo;
536 534
537 htable_put(r->hinfo); 535 htable_put(r->hinfo);
538} 536}
539 537
538#ifdef CONFIG_COMPAT
539struct compat_ipt_hashlimit_info {
540 char name[IFNAMSIZ];
541 struct hashlimit_cfg cfg;
542 compat_uptr_t hinfo;
543 compat_uptr_t master;
544};
545
546static void compat_from_user(void *dst, void *src)
547{
548 int off = offsetof(struct compat_ipt_hashlimit_info, hinfo);
549
550 memcpy(dst, src, off);
551 memset(dst + off, 0, sizeof(struct compat_ipt_hashlimit_info) - off);
552}
553
554static int compat_to_user(void __user *dst, void *src)
555{
556 int off = offsetof(struct compat_ipt_hashlimit_info, hinfo);
557
558 return copy_to_user(dst, src, off) ? -EFAULT : 0;
559}
560#endif
561
540static struct ipt_match ipt_hashlimit = { 562static struct ipt_match ipt_hashlimit = {
541 .name = "hashlimit", 563 .name = "hashlimit",
542 .match = hashlimit_match, 564 .match = hashlimit_match,
543 .matchsize = sizeof(struct ipt_hashlimit_info), 565 .matchsize = sizeof(struct ipt_hashlimit_info),
566#ifdef CONFIG_COMPAT
567 .compatsize = sizeof(struct compat_ipt_hashlimit_info),
568 .compat_from_user = compat_from_user,
569 .compat_to_user = compat_to_user,
570#endif
544 .checkentry = hashlimit_checkentry, 571 .checkentry = hashlimit_checkentry,
545 .destroy = hashlimit_destroy, 572 .destroy = hashlimit_destroy,
546 .me = THIS_MODULE 573 .me = THIS_MODULE
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index 5ac6ac023b5e..78c336f12a9e 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -56,7 +56,6 @@ checkentry(const char *tablename,
56 const void *ip, 56 const void *ip,
57 const struct xt_match *match, 57 const struct xt_match *match,
58 void *matchinfo, 58 void *matchinfo,
59 unsigned int matchsize,
60 unsigned int hook_mask) 59 unsigned int hook_mask)
61{ 60{
62 const struct ipt_owner_info *info = matchinfo; 61 const struct ipt_owner_info *info = matchinfo;
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 61a2139f9cfd..32ae8d7ac506 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -35,14 +35,20 @@ static unsigned int ip_list_tot = 100;
35static unsigned int ip_pkt_list_tot = 20; 35static unsigned int ip_pkt_list_tot = 20;
36static unsigned int ip_list_hash_size = 0; 36static unsigned int ip_list_hash_size = 0;
37static unsigned int ip_list_perms = 0644; 37static unsigned int ip_list_perms = 0644;
38static unsigned int ip_list_uid = 0;
39static unsigned int ip_list_gid = 0;
38module_param(ip_list_tot, uint, 0400); 40module_param(ip_list_tot, uint, 0400);
39module_param(ip_pkt_list_tot, uint, 0400); 41module_param(ip_pkt_list_tot, uint, 0400);
40module_param(ip_list_hash_size, uint, 0400); 42module_param(ip_list_hash_size, uint, 0400);
41module_param(ip_list_perms, uint, 0400); 43module_param(ip_list_perms, uint, 0400);
44module_param(ip_list_uid, uint, 0400);
45module_param(ip_list_gid, uint, 0400);
42MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list"); 46MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
43MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)"); 47MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)");
44MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs"); 48MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
45MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files"); 49MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files");
50MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/ipt_recent/* files");
51MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/ipt_recent/* files");
46 52
47 53
48struct recent_entry { 54struct recent_entry {
@@ -232,7 +238,7 @@ out:
232static int 238static int
233ipt_recent_checkentry(const char *tablename, const void *ip, 239ipt_recent_checkentry(const char *tablename, const void *ip,
234 const struct xt_match *match, void *matchinfo, 240 const struct xt_match *match, void *matchinfo,
235 unsigned int matchsize, unsigned int hook_mask) 241 unsigned int hook_mask)
236{ 242{
237 const struct ipt_recent_info *info = matchinfo; 243 const struct ipt_recent_info *info = matchinfo;
238 struct recent_table *t; 244 struct recent_table *t;
@@ -274,6 +280,8 @@ ipt_recent_checkentry(const char *tablename, const void *ip,
274 goto out; 280 goto out;
275 } 281 }
276 t->proc->proc_fops = &recent_fops; 282 t->proc->proc_fops = &recent_fops;
283 t->proc->uid = ip_list_uid;
284 t->proc->gid = ip_list_gid;
277 t->proc->data = t; 285 t->proc->data = t;
278#endif 286#endif
279 spin_lock_bh(&recent_lock); 287 spin_lock_bh(&recent_lock);
@@ -286,8 +294,7 @@ out:
286} 294}
287 295
288static void 296static void
289ipt_recent_destroy(const struct xt_match *match, void *matchinfo, 297ipt_recent_destroy(const struct xt_match *match, void *matchinfo)
290 unsigned int matchsize)
291{ 298{
292 const struct ipt_recent_info *info = matchinfo; 299 const struct ipt_recent_info *info = matchinfo;
293 struct recent_table *t; 300 struct recent_table *t;
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 7f417484bfbf..e2e7dd8d7903 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -90,7 +90,7 @@ ipt_hook(unsigned int hook,
90 const struct net_device *out, 90 const struct net_device *out,
91 int (*okfn)(struct sk_buff *)) 91 int (*okfn)(struct sk_buff *))
92{ 92{
93 return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL); 93 return ipt_do_table(pskb, hook, in, out, &packet_filter);
94} 94}
95 95
96static unsigned int 96static unsigned int
@@ -108,7 +108,7 @@ ipt_local_out_hook(unsigned int hook,
108 return NF_ACCEPT; 108 return NF_ACCEPT;
109 } 109 }
110 110
111 return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL); 111 return ipt_do_table(pskb, hook, in, out, &packet_filter);
112} 112}
113 113
114static struct nf_hook_ops ipt_ops[] = { 114static struct nf_hook_ops ipt_ops[] = {
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 4e7998beda63..79336cb42527 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -119,7 +119,7 @@ ipt_route_hook(unsigned int hook,
119 const struct net_device *out, 119 const struct net_device *out,
120 int (*okfn)(struct sk_buff *)) 120 int (*okfn)(struct sk_buff *))
121{ 121{
122 return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); 122 return ipt_do_table(pskb, hook, in, out, &packet_mangler);
123} 123}
124 124
125static unsigned int 125static unsigned int
@@ -148,7 +148,7 @@ ipt_local_hook(unsigned int hook,
148 daddr = (*pskb)->nh.iph->daddr; 148 daddr = (*pskb)->nh.iph->daddr;
149 tos = (*pskb)->nh.iph->tos; 149 tos = (*pskb)->nh.iph->tos;
150 150
151 ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); 151 ret = ipt_do_table(pskb, hook, in, out, &packet_mangler);
152 /* Reroute for ANY change. */ 152 /* Reroute for ANY change. */
153 if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE 153 if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE
154 && ((*pskb)->nh.iph->saddr != saddr 154 && ((*pskb)->nh.iph->saddr != saddr
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 7912cce1e1b8..bcbeb4aeacd9 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -95,7 +95,7 @@ ipt_hook(unsigned int hook,
95 const struct net_device *out, 95 const struct net_device *out,
96 int (*okfn)(struct sk_buff *)) 96 int (*okfn)(struct sk_buff *))
97{ 97{
98 return ipt_do_table(pskb, hook, in, out, &packet_raw, NULL); 98 return ipt_do_table(pskb, hook, in, out, &packet_raw);
99} 99}
100 100
101/* 'raw' is the very first table. */ 101/* 'raw' is the very first table. */
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 663a73ee3f2f..790f00d500c3 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -25,7 +25,7 @@
25#include <net/netfilter/nf_conntrack_protocol.h> 25#include <net/netfilter/nf_conntrack_protocol.h>
26#include <net/netfilter/nf_conntrack_core.h> 26#include <net/netfilter/nf_conntrack_core.h>
27 27
28unsigned long nf_ct_icmp_timeout = 30*HZ; 28unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ;
29 29
30#if 0 30#if 0
31#define DEBUGP printk 31#define DEBUGP printk
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index d61e2a9d394d..9c6cbe3d9fb8 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -173,6 +173,8 @@ static const struct snmp_mib snmp4_udp_list[] = {
173 SNMP_MIB_ITEM("NoPorts", UDP_MIB_NOPORTS), 173 SNMP_MIB_ITEM("NoPorts", UDP_MIB_NOPORTS),
174 SNMP_MIB_ITEM("InErrors", UDP_MIB_INERRORS), 174 SNMP_MIB_ITEM("InErrors", UDP_MIB_INERRORS),
175 SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS), 175 SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS),
176 SNMP_MIB_ITEM("RcvbufErrors", UDP_MIB_RCVBUFERRORS),
177 SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS),
176 SNMP_MIB_SENTINEL 178 SNMP_MIB_SENTINEL
177}; 179};
178 180
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 62b2762a2420..0e935b4c8741 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -38,8 +38,7 @@
38 * as published by the Free Software Foundation; either version 38 * as published by the Free Software Foundation; either version
39 * 2 of the License, or (at your option) any later version. 39 * 2 of the License, or (at your option) any later version.
40 */ 40 */
41 41
42#include <linux/config.h>
43#include <linux/types.h> 42#include <linux/types.h>
44#include <asm/atomic.h> 43#include <asm/atomic.h>
45#include <asm/byteorder.h> 44#include <asm/byteorder.h>
@@ -484,6 +483,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
484 if (!inet->hdrincl) 483 if (!inet->hdrincl)
485 raw_probe_proto_opt(&fl, msg); 484 raw_probe_proto_opt(&fl, msg);
486 485
486 security_sk_classify_flow(sk, &fl);
487 err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); 487 err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
488 } 488 }
489 if (err) 489 if (err)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b873cbcdd0b8..20ffe8e88c0f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2639,51 +2639,54 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2639{ 2639{
2640 struct rtable *rt = (struct rtable*)skb->dst; 2640 struct rtable *rt = (struct rtable*)skb->dst;
2641 struct rtmsg *r; 2641 struct rtmsg *r;
2642 struct nlmsghdr *nlh; 2642 struct nlmsghdr *nlh;
2643 unsigned char *b = skb->tail;
2644 struct rta_cacheinfo ci; 2643 struct rta_cacheinfo ci;
2645#ifdef CONFIG_IP_MROUTE 2644
2646 struct rtattr *eptr; 2645 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
2647#endif 2646 if (nlh == NULL)
2648 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags); 2647 return -ENOBUFS;
2649 r = NLMSG_DATA(nlh); 2648
2649 r = nlmsg_data(nlh);
2650 r->rtm_family = AF_INET; 2650 r->rtm_family = AF_INET;
2651 r->rtm_dst_len = 32; 2651 r->rtm_dst_len = 32;
2652 r->rtm_src_len = 0; 2652 r->rtm_src_len = 0;
2653 r->rtm_tos = rt->fl.fl4_tos; 2653 r->rtm_tos = rt->fl.fl4_tos;
2654 r->rtm_table = RT_TABLE_MAIN; 2654 r->rtm_table = RT_TABLE_MAIN;
2655 NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
2655 r->rtm_type = rt->rt_type; 2656 r->rtm_type = rt->rt_type;
2656 r->rtm_scope = RT_SCOPE_UNIVERSE; 2657 r->rtm_scope = RT_SCOPE_UNIVERSE;
2657 r->rtm_protocol = RTPROT_UNSPEC; 2658 r->rtm_protocol = RTPROT_UNSPEC;
2658 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; 2659 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2659 if (rt->rt_flags & RTCF_NOTIFY) 2660 if (rt->rt_flags & RTCF_NOTIFY)
2660 r->rtm_flags |= RTM_F_NOTIFY; 2661 r->rtm_flags |= RTM_F_NOTIFY;
2661 RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst); 2662
2663 NLA_PUT_U32(skb, RTA_DST, rt->rt_dst);
2664
2662 if (rt->fl.fl4_src) { 2665 if (rt->fl.fl4_src) {
2663 r->rtm_src_len = 32; 2666 r->rtm_src_len = 32;
2664 RTA_PUT(skb, RTA_SRC, 4, &rt->fl.fl4_src); 2667 NLA_PUT_U32(skb, RTA_SRC, rt->fl.fl4_src);
2665 } 2668 }
2666 if (rt->u.dst.dev) 2669 if (rt->u.dst.dev)
2667 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex); 2670 NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex);
2668#ifdef CONFIG_NET_CLS_ROUTE 2671#ifdef CONFIG_NET_CLS_ROUTE
2669 if (rt->u.dst.tclassid) 2672 if (rt->u.dst.tclassid)
2670 RTA_PUT(skb, RTA_FLOW, 4, &rt->u.dst.tclassid); 2673 NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid);
2671#endif 2674#endif
2672#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 2675#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
2673 if (rt->rt_multipath_alg != IP_MP_ALG_NONE) { 2676 if (rt->rt_multipath_alg != IP_MP_ALG_NONE)
2674 __u32 alg = rt->rt_multipath_alg; 2677 NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg);
2675
2676 RTA_PUT(skb, RTA_MP_ALGO, 4, &alg);
2677 }
2678#endif 2678#endif
2679 if (rt->fl.iif) 2679 if (rt->fl.iif)
2680 RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst); 2680 NLA_PUT_U32(skb, RTA_PREFSRC, rt->rt_spec_dst);
2681 else if (rt->rt_src != rt->fl.fl4_src) 2681 else if (rt->rt_src != rt->fl.fl4_src)
2682 RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src); 2682 NLA_PUT_U32(skb, RTA_PREFSRC, rt->rt_src);
2683
2683 if (rt->rt_dst != rt->rt_gateway) 2684 if (rt->rt_dst != rt->rt_gateway)
2684 RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway); 2685 NLA_PUT_U32(skb, RTA_GATEWAY, rt->rt_gateway);
2686
2685 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 2687 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2686 goto rtattr_failure; 2688 goto nla_put_failure;
2689
2687 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); 2690 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
2688 ci.rta_used = rt->u.dst.__use; 2691 ci.rta_used = rt->u.dst.__use;
2689 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt); 2692 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
@@ -2700,10 +2703,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2700 ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp; 2703 ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp;
2701 } 2704 }
2702 } 2705 }
2703#ifdef CONFIG_IP_MROUTE 2706
2704 eptr = (struct rtattr*)skb->tail;
2705#endif
2706 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
2707 if (rt->fl.iif) { 2707 if (rt->fl.iif) {
2708#ifdef CONFIG_IP_MROUTE 2708#ifdef CONFIG_IP_MROUTE
2709 u32 dst = rt->rt_dst; 2709 u32 dst = rt->rt_dst;
@@ -2715,41 +2715,46 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2715 if (!nowait) { 2715 if (!nowait) {
2716 if (err == 0) 2716 if (err == 0)
2717 return 0; 2717 return 0;
2718 goto nlmsg_failure; 2718 goto nla_put_failure;
2719 } else { 2719 } else {
2720 if (err == -EMSGSIZE) 2720 if (err == -EMSGSIZE)
2721 goto nlmsg_failure; 2721 goto nla_put_failure;
2722 ((struct rta_cacheinfo*)RTA_DATA(eptr))->rta_error = err; 2722 ci.rta_error = err;
2723 } 2723 }
2724 } 2724 }
2725 } else 2725 } else
2726#endif 2726#endif
2727 RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); 2727 NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif);
2728 } 2728 }
2729 2729
2730 nlh->nlmsg_len = skb->tail - b; 2730 NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
2731 return skb->len; 2731
2732 return nlmsg_end(skb, nlh);
2732 2733
2733nlmsg_failure: 2734nla_put_failure:
2734rtattr_failure: 2735 return nlmsg_cancel(skb, nlh);
2735 skb_trim(skb, b - skb->data);
2736 return -1;
2737} 2736}
2738 2737
2739int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2738int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2740{ 2739{
2741 struct rtattr **rta = arg; 2740 struct rtmsg *rtm;
2742 struct rtmsg *rtm = NLMSG_DATA(nlh); 2741 struct nlattr *tb[RTA_MAX+1];
2743 struct rtable *rt = NULL; 2742 struct rtable *rt = NULL;
2744 u32 dst = 0; 2743 u32 dst, src, iif;
2745 u32 src = 0; 2744 int err;
2746 int iif = 0;
2747 int err = -ENOBUFS;
2748 struct sk_buff *skb; 2745 struct sk_buff *skb;
2749 2746
2747 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2748 if (err < 0)
2749 goto errout;
2750
2751 rtm = nlmsg_data(nlh);
2752
2750 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2753 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2751 if (!skb) 2754 if (skb == NULL) {
2752 goto out; 2755 err = -ENOBUFS;
2756 goto errout;
2757 }
2753 2758
2754 /* Reserve room for dummy headers, this skb can pass 2759 /* Reserve room for dummy headers, this skb can pass
2755 through good chunk of routing engine. 2760 through good chunk of routing engine.
@@ -2760,62 +2765,61 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2760 skb->nh.iph->protocol = IPPROTO_ICMP; 2765 skb->nh.iph->protocol = IPPROTO_ICMP;
2761 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); 2766 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2762 2767
2763 if (rta[RTA_SRC - 1]) 2768 src = tb[RTA_SRC] ? nla_get_u32(tb[RTA_SRC]) : 0;
2764 memcpy(&src, RTA_DATA(rta[RTA_SRC - 1]), 4); 2769 dst = tb[RTA_DST] ? nla_get_u32(tb[RTA_DST]) : 0;
2765 if (rta[RTA_DST - 1]) 2770 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
2766 memcpy(&dst, RTA_DATA(rta[RTA_DST - 1]), 4);
2767 if (rta[RTA_IIF - 1])
2768 memcpy(&iif, RTA_DATA(rta[RTA_IIF - 1]), sizeof(int));
2769 2771
2770 if (iif) { 2772 if (iif) {
2771 struct net_device *dev = __dev_get_by_index(iif); 2773 struct net_device *dev;
2772 err = -ENODEV; 2774
2773 if (!dev) 2775 dev = __dev_get_by_index(iif);
2774 goto out_free; 2776 if (dev == NULL) {
2777 err = -ENODEV;
2778 goto errout_free;
2779 }
2780
2775 skb->protocol = htons(ETH_P_IP); 2781 skb->protocol = htons(ETH_P_IP);
2776 skb->dev = dev; 2782 skb->dev = dev;
2777 local_bh_disable(); 2783 local_bh_disable();
2778 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); 2784 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2779 local_bh_enable(); 2785 local_bh_enable();
2780 rt = (struct rtable*)skb->dst; 2786
2781 if (!err && rt->u.dst.error) 2787 rt = (struct rtable*) skb->dst;
2788 if (err == 0 && rt->u.dst.error)
2782 err = -rt->u.dst.error; 2789 err = -rt->u.dst.error;
2783 } else { 2790 } else {
2784 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst, 2791 struct flowi fl = {
2785 .saddr = src, 2792 .nl_u = {
2786 .tos = rtm->rtm_tos } } }; 2793 .ip4_u = {
2787 int oif = 0; 2794 .daddr = dst,
2788 if (rta[RTA_OIF - 1]) 2795 .saddr = src,
2789 memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); 2796 .tos = rtm->rtm_tos,
2790 fl.oif = oif; 2797 },
2798 },
2799 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
2800 };
2791 err = ip_route_output_key(&rt, &fl); 2801 err = ip_route_output_key(&rt, &fl);
2792 } 2802 }
2803
2793 if (err) 2804 if (err)
2794 goto out_free; 2805 goto errout_free;
2795 2806
2796 skb->dst = &rt->u.dst; 2807 skb->dst = &rt->u.dst;
2797 if (rtm->rtm_flags & RTM_F_NOTIFY) 2808 if (rtm->rtm_flags & RTM_F_NOTIFY)
2798 rt->rt_flags |= RTCF_NOTIFY; 2809 rt->rt_flags |= RTCF_NOTIFY;
2799 2810
2800 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
2801
2802 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 2811 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
2803 RTM_NEWROUTE, 0, 0); 2812 RTM_NEWROUTE, 0, 0);
2804 if (!err) 2813 if (err <= 0)
2805 goto out_free; 2814 goto errout_free;
2806 if (err < 0) {
2807 err = -EMSGSIZE;
2808 goto out_free;
2809 }
2810 2815
2811 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); 2816 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2812 if (err > 0) 2817errout:
2813 err = 0; 2818 return err;
2814out: return err;
2815 2819
2816out_free: 2820errout_free:
2817 kfree_skb(skb); 2821 kfree_skb(skb);
2818 goto out; 2822 goto errout;
2819} 2823}
2820 2824
2821int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) 2825int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -3143,13 +3147,9 @@ int __init ip_rt_init(void)
3143 } 3147 }
3144#endif 3148#endif
3145 3149
3146 ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache", 3150 ipv4_dst_ops.kmem_cachep =
3147 sizeof(struct rtable), 3151 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
3148 0, SLAB_HWCACHE_ALIGN, 3152 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
3149 NULL, NULL);
3150
3151 if (!ipv4_dst_ops.kmem_cachep)
3152 panic("IP: failed to allocate ip_dst_cache\n");
3153 3153
3154 rt_hash_table = (struct rt_hash_bucket *) 3154 rt_hash_table = (struct rt_hash_bucket *)
3155 alloc_large_system_hash("IP route cache", 3155 alloc_large_system_hash("IP route cache",
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index e20be3331f67..661e0a4bca72 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -214,6 +214,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
214 if (!req) 214 if (!req)
215 goto out; 215 goto out;
216 216
217 if (security_inet_conn_request(sk, skb, req)) {
218 reqsk_free(req);
219 goto out;
220 }
217 ireq = inet_rsk(req); 221 ireq = inet_rsk(req);
218 treq = tcp_rsk(req); 222 treq = tcp_rsk(req);
219 treq->rcv_isn = htonl(skb->h.th->seq) - 1; 223 treq->rcv_isn = htonl(skb->h.th->seq) - 1;
@@ -259,6 +263,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
259 .uli_u = { .ports = 263 .uli_u = { .ports =
260 { .sport = skb->h.th->dest, 264 { .sport = skb->h.th->dest,
261 .dport = skb->h.th->source } } }; 265 .dport = skb->h.th->source } } };
266 security_req_classify_flow(req, &fl);
262 if (ip_route_output_key(&rt, &fl)) { 267 if (ip_route_output_key(&rt, &fl)) {
263 reqsk_free(req); 268 reqsk_free(req);
264 goto out; 269 goto out;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 70cea9d08a38..19b2071ff319 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -17,6 +17,7 @@
17#include <net/ip.h> 17#include <net/ip.h>
18#include <net/route.h> 18#include <net/route.h>
19#include <net/tcp.h> 19#include <net/tcp.h>
20#include <net/cipso_ipv4.h>
20 21
21/* From af_inet.c */ 22/* From af_inet.c */
22extern int sysctl_ip_nonlocal_bind; 23extern int sysctl_ip_nonlocal_bind;
@@ -697,6 +698,40 @@ ctl_table ipv4_table[] = {
697 .mode = 0644, 698 .mode = 0644,
698 .proc_handler = &proc_dointvec 699 .proc_handler = &proc_dointvec
699 }, 700 },
701#ifdef CONFIG_NETLABEL
702 {
703 .ctl_name = NET_CIPSOV4_CACHE_ENABLE,
704 .procname = "cipso_cache_enable",
705 .data = &cipso_v4_cache_enabled,
706 .maxlen = sizeof(int),
707 .mode = 0644,
708 .proc_handler = &proc_dointvec,
709 },
710 {
711 .ctl_name = NET_CIPSOV4_CACHE_BUCKET_SIZE,
712 .procname = "cipso_cache_bucket_size",
713 .data = &cipso_v4_cache_bucketsize,
714 .maxlen = sizeof(int),
715 .mode = 0644,
716 .proc_handler = &proc_dointvec,
717 },
718 {
719 .ctl_name = NET_CIPSOV4_RBM_OPTFMT,
720 .procname = "cipso_rbm_optfmt",
721 .data = &cipso_v4_rbm_optfmt,
722 .maxlen = sizeof(int),
723 .mode = 0644,
724 .proc_handler = &proc_dointvec,
725 },
726 {
727 .ctl_name = NET_CIPSOV4_RBM_STRICTVALID,
728 .procname = "cipso_rbm_strictvalid",
729 .data = &cipso_v4_rbm_strictvalid,
730 .maxlen = sizeof(int),
731 .mode = 0644,
732 .proc_handler = &proc_dointvec,
733 },
734#endif /* CONFIG_NETLABEL */
700 { .ctl_name = 0 } 735 { .ctl_name = 0 }
701}; 736};
702 737
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 934396bb1376..66e9a729f6df 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -268,7 +268,7 @@
268#include <asm/uaccess.h> 268#include <asm/uaccess.h>
269#include <asm/ioctls.h> 269#include <asm/ioctls.h>
270 270
271int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; 271int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
272 272
273DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly; 273DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly;
274 274
@@ -568,7 +568,7 @@ new_segment:
568 skb->truesize += copy; 568 skb->truesize += copy;
569 sk->sk_wmem_queued += copy; 569 sk->sk_wmem_queued += copy;
570 sk->sk_forward_alloc -= copy; 570 sk->sk_forward_alloc -= copy;
571 skb->ip_summed = CHECKSUM_HW; 571 skb->ip_summed = CHECKSUM_PARTIAL;
572 tp->write_seq += copy; 572 tp->write_seq += copy;
573 TCP_SKB_CB(skb)->end_seq += copy; 573 TCP_SKB_CB(skb)->end_seq += copy;
574 skb_shinfo(skb)->gso_segs = 0; 574 skb_shinfo(skb)->gso_segs = 0;
@@ -723,7 +723,7 @@ new_segment:
723 * Check whether we can use HW checksum. 723 * Check whether we can use HW checksum.
724 */ 724 */
725 if (sk->sk_route_caps & NETIF_F_ALL_CSUM) 725 if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
726 skb->ip_summed = CHECKSUM_HW; 726 skb->ip_summed = CHECKSUM_PARTIAL;
727 727
728 skb_entail(sk, tp, skb); 728 skb_entail(sk, tp, skb);
729 copy = size_goal; 729 copy = size_goal;
@@ -955,8 +955,11 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
955 * receive buffer and there was a small segment 955 * receive buffer and there was a small segment
956 * in queue. 956 * in queue.
957 */ 957 */
958 (copied > 0 && (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && 958 (copied > 0 &&
959 !icsk->icsk_ack.pingpong && !atomic_read(&sk->sk_rmem_alloc))) 959 ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) ||
960 ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
961 !icsk->icsk_ack.pingpong)) &&
962 !atomic_read(&sk->sk_rmem_alloc)))
960 time_to_ack = 1; 963 time_to_ack = 1;
961 } 964 }
962 965
@@ -2205,7 +2208,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
2205 th->fin = th->psh = 0; 2208 th->fin = th->psh = 0;
2206 2209
2207 th->check = ~csum_fold(th->check + delta); 2210 th->check = ~csum_fold(th->check + delta);
2208 if (skb->ip_summed != CHECKSUM_HW) 2211 if (skb->ip_summed != CHECKSUM_PARTIAL)
2209 th->check = csum_fold(csum_partial(skb->h.raw, thlen, 2212 th->check = csum_fold(csum_partial(skb->h.raw, thlen,
2210 skb->csum)); 2213 skb->csum));
2211 2214
@@ -2219,7 +2222,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
2219 2222
2220 delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len); 2223 delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len);
2221 th->check = ~csum_fold(th->check + delta); 2224 th->check = ~csum_fold(th->check + delta);
2222 if (skb->ip_summed != CHECKSUM_HW) 2225 if (skb->ip_summed != CHECKSUM_PARTIAL)
2223 th->check = csum_fold(csum_partial(skb->h.raw, thlen, 2226 th->check = csum_fold(csum_partial(skb->h.raw, thlen,
2224 skb->csum)); 2227 skb->csum));
2225 2228
@@ -2254,9 +2257,7 @@ void __init tcp_init(void)
2254 tcp_hashinfo.bind_bucket_cachep = 2257 tcp_hashinfo.bind_bucket_cachep =
2255 kmem_cache_create("tcp_bind_bucket", 2258 kmem_cache_create("tcp_bind_bucket",
2256 sizeof(struct inet_bind_bucket), 0, 2259 sizeof(struct inet_bind_bucket), 0,
2257 SLAB_HWCACHE_ALIGN, NULL, NULL); 2260 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
2258 if (!tcp_hashinfo.bind_bucket_cachep)
2259 panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
2260 2261
2261 /* Size and allocate the main established and bind bucket 2262 /* Size and allocate the main established and bind bucket
2262 * hash tables. 2263 * hash tables.
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index b0134ab08379..5730333cd0ac 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -231,7 +231,7 @@ static struct tcp_congestion_ops bictcp = {
231 231
232static int __init bictcp_register(void) 232static int __init bictcp_register(void)
233{ 233{
234 BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); 234 BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
235 return tcp_register_congestion_control(&bictcp); 235 return tcp_register_congestion_control(&bictcp);
236} 236}
237 237
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 2be27980ca78..a60ef38d75c6 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -358,7 +358,7 @@ static struct tcp_congestion_ops cubictcp = {
358 358
359static int __init cubictcp_register(void) 359static int __init cubictcp_register(void)
360{ 360{
361 BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); 361 BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
362 362
363 /* Precompute a bunch of the scaling factors that are used per-packet 363 /* Precompute a bunch of the scaling factors that are used per-packet
364 * based on SRTT of 100ms 364 * based on SRTT of 100ms
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index fa3e1aad660c..c4fc811bf377 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -189,7 +189,7 @@ static struct tcp_congestion_ops tcp_highspeed = {
189 189
190static int __init hstcp_register(void) 190static int __init hstcp_register(void)
191{ 191{
192 BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE); 192 BUILD_BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE);
193 return tcp_register_congestion_control(&tcp_highspeed); 193 return tcp_register_congestion_control(&tcp_highspeed);
194} 194}
195 195
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 6edfe5e4510e..682e7d5b6f2f 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -286,7 +286,7 @@ static struct tcp_congestion_ops htcp = {
286 286
287static int __init htcp_register(void) 287static int __init htcp_register(void)
288{ 288{
289 BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE); 289 BUILD_BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE);
290 BUILD_BUG_ON(BETA_MIN >= BETA_MAX); 290 BUILD_BUG_ON(BETA_MIN >= BETA_MAX);
291 return tcp_register_congestion_control(&htcp); 291 return tcp_register_congestion_control(&htcp);
292} 292}
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 7406e0c5fb8e..59e691d26f64 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -170,7 +170,7 @@ static struct tcp_congestion_ops tcp_hybla = {
170 170
171static int __init hybla_register(void) 171static int __init hybla_register(void)
172{ 172{
173 BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE); 173 BUILD_BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE);
174 return tcp_register_congestion_control(&tcp_hybla); 174 return tcp_register_congestion_control(&tcp_hybla);
175} 175}
176 176
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 159fa3f1ba67..b3def0df14fb 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -72,24 +72,24 @@
72#include <asm/unaligned.h> 72#include <asm/unaligned.h>
73#include <net/netdma.h> 73#include <net/netdma.h>
74 74
75int sysctl_tcp_timestamps = 1; 75int sysctl_tcp_timestamps __read_mostly = 1;
76int sysctl_tcp_window_scaling = 1; 76int sysctl_tcp_window_scaling __read_mostly = 1;
77int sysctl_tcp_sack = 1; 77int sysctl_tcp_sack __read_mostly = 1;
78int sysctl_tcp_fack = 1; 78int sysctl_tcp_fack __read_mostly = 1;
79int sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH; 79int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
80int sysctl_tcp_ecn; 80int sysctl_tcp_ecn __read_mostly;
81int sysctl_tcp_dsack = 1; 81int sysctl_tcp_dsack __read_mostly = 1;
82int sysctl_tcp_app_win = 31; 82int sysctl_tcp_app_win __read_mostly = 31;
83int sysctl_tcp_adv_win_scale = 2; 83int sysctl_tcp_adv_win_scale __read_mostly = 2;
84 84
85int sysctl_tcp_stdurg; 85int sysctl_tcp_stdurg __read_mostly;
86int sysctl_tcp_rfc1337; 86int sysctl_tcp_rfc1337 __read_mostly;
87int sysctl_tcp_max_orphans = NR_FILE; 87int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
88int sysctl_tcp_frto; 88int sysctl_tcp_frto __read_mostly;
89int sysctl_tcp_nometrics_save; 89int sysctl_tcp_nometrics_save __read_mostly;
90 90
91int sysctl_tcp_moderate_rcvbuf = 1; 91int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
92int sysctl_tcp_abc; 92int sysctl_tcp_abc __read_mostly;
93 93
94#define FLAG_DATA 0x01 /* Incoming frame contained data. */ 94#define FLAG_DATA 0x01 /* Incoming frame contained data. */
95#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ 95#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
@@ -127,7 +127,7 @@ static void tcp_measure_rcv_mss(struct sock *sk,
127 /* skb->len may jitter because of SACKs, even if peer 127 /* skb->len may jitter because of SACKs, even if peer
128 * sends good full-sized frames. 128 * sends good full-sized frames.
129 */ 129 */
130 len = skb->len; 130 len = skb_shinfo(skb)->gso_size ?: skb->len;
131 if (len >= icsk->icsk_ack.rcv_mss) { 131 if (len >= icsk->icsk_ack.rcv_mss) {
132 icsk->icsk_ack.rcv_mss = len; 132 icsk->icsk_ack.rcv_mss = len;
133 } else { 133 } else {
@@ -156,6 +156,8 @@ static void tcp_measure_rcv_mss(struct sock *sk,
156 return; 156 return;
157 } 157 }
158 } 158 }
159 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
160 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
159 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; 161 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
160 } 162 }
161} 163}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4b04c3edd4a9..39b179856082 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -78,8 +78,8 @@
78#include <linux/proc_fs.h> 78#include <linux/proc_fs.h>
79#include <linux/seq_file.h> 79#include <linux/seq_file.h>
80 80
81int sysctl_tcp_tw_reuse; 81int sysctl_tcp_tw_reuse __read_mostly;
82int sysctl_tcp_low_latency; 82int sysctl_tcp_low_latency __read_mostly;
83 83
84/* Check TCP sequence numbers in ICMP packets. */ 84/* Check TCP sequence numbers in ICMP packets. */
85#define ICMP_MIN_LENGTH 8 85#define ICMP_MIN_LENGTH 8
@@ -484,7 +484,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
484 struct inet_sock *inet = inet_sk(sk); 484 struct inet_sock *inet = inet_sk(sk);
485 struct tcphdr *th = skb->h.th; 485 struct tcphdr *th = skb->h.th;
486 486
487 if (skb->ip_summed == CHECKSUM_HW) { 487 if (skb->ip_summed == CHECKSUM_PARTIAL) {
488 th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0); 488 th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
489 skb->csum = offsetof(struct tcphdr, check); 489 skb->csum = offsetof(struct tcphdr, check);
490 } else { 490 } else {
@@ -509,7 +509,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
509 th->check = 0; 509 th->check = 0;
510 th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0); 510 th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
511 skb->csum = offsetof(struct tcphdr, check); 511 skb->csum = offsetof(struct tcphdr, check);
512 skb->ip_summed = CHECKSUM_HW; 512 skb->ip_summed = CHECKSUM_PARTIAL;
513 return 0; 513 return 0;
514} 514}
515 515
@@ -798,6 +798,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
798 798
799 tcp_openreq_init(req, &tmp_opt, skb); 799 tcp_openreq_init(req, &tmp_opt, skb);
800 800
801 if (security_inet_conn_request(sk, skb, req))
802 goto drop_and_free;
803
801 ireq = inet_rsk(req); 804 ireq = inet_rsk(req);
802 ireq->loc_addr = daddr; 805 ireq->loc_addr = daddr;
803 ireq->rmt_addr = saddr; 806 ireq->rmt_addr = saddr;
@@ -948,9 +951,9 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
948 if (req) 951 if (req)
949 return tcp_check_req(sk, skb, req, prev); 952 return tcp_check_req(sk, skb, req, prev);
950 953
951 nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, 954 nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
952 th->source, skb->nh.iph->daddr, 955 th->source, skb->nh.iph->daddr,
953 ntohs(th->dest), inet_iif(skb)); 956 th->dest, inet_iif(skb));
954 957
955 if (nsk) { 958 if (nsk) {
956 if (nsk->sk_state != TCP_TIME_WAIT) { 959 if (nsk->sk_state != TCP_TIME_WAIT) {
@@ -970,7 +973,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
970 973
971static int tcp_v4_checksum_init(struct sk_buff *skb) 974static int tcp_v4_checksum_init(struct sk_buff *skb)
972{ 975{
973 if (skb->ip_summed == CHECKSUM_HW) { 976 if (skb->ip_summed == CHECKSUM_COMPLETE) {
974 if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr, 977 if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
975 skb->nh.iph->daddr, skb->csum)) { 978 skb->nh.iph->daddr, skb->csum)) {
976 skb->ip_summed = CHECKSUM_UNNECESSARY; 979 skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -1087,7 +1090,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
1087 TCP_SKB_CB(skb)->sacked = 0; 1090 TCP_SKB_CB(skb)->sacked = 0;
1088 1091
1089 sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source, 1092 sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
1090 skb->nh.iph->daddr, ntohs(th->dest), 1093 skb->nh.iph->daddr, th->dest,
1091 inet_iif(skb)); 1094 inet_iif(skb));
1092 1095
1093 if (!sk) 1096 if (!sk)
@@ -1101,7 +1104,7 @@ process:
1101 goto discard_and_relse; 1104 goto discard_and_relse;
1102 nf_reset(skb); 1105 nf_reset(skb);
1103 1106
1104 if (sk_filter(sk, skb, 0)) 1107 if (sk_filter(sk, skb))
1105 goto discard_and_relse; 1108 goto discard_and_relse;
1106 1109
1107 skb->dev = NULL; 1110 skb->dev = NULL;
@@ -1165,7 +1168,7 @@ do_time_wait:
1165 case TCP_TW_SYN: { 1168 case TCP_TW_SYN: {
1166 struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, 1169 struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
1167 skb->nh.iph->daddr, 1170 skb->nh.iph->daddr,
1168 ntohs(th->dest), 1171 th->dest,
1169 inet_iif(skb)); 1172 inet_iif(skb));
1170 if (sk2) { 1173 if (sk2) {
1171 inet_twsk_deschedule((struct inet_timewait_sock *)sk, 1174 inet_twsk_deschedule((struct inet_timewait_sock *)sk,
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 48f28d617ce6..308fb7e071c5 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -35,7 +35,6 @@
35 * Version: $Id: tcp_lp.c,v 1.24 2006/09/05 20:22:53 hswong3i Exp $ 35 * Version: $Id: tcp_lp.c,v 1.24 2006/09/05 20:22:53 hswong3i Exp $
36 */ 36 */
37 37
38#include <linux/config.h>
39#include <linux/module.h> 38#include <linux/module.h>
40#include <net/tcp.h> 39#include <net/tcp.h>
41 40
@@ -328,7 +327,7 @@ static struct tcp_congestion_ops tcp_lp = {
328 327
329static int __init tcp_lp_register(void) 328static int __init tcp_lp_register(void)
330{ 329{
331 BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE); 330 BUILD_BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE);
332 return tcp_register_congestion_control(&tcp_lp); 331 return tcp_register_congestion_control(&tcp_lp);
333} 332}
334 333
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 624e2b2c7f53..0163d9826907 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -34,8 +34,8 @@
34#define SYNC_INIT 1 34#define SYNC_INIT 1
35#endif 35#endif
36 36
37int sysctl_tcp_syncookies = SYNC_INIT; 37int sysctl_tcp_syncookies __read_mostly = SYNC_INIT;
38int sysctl_tcp_abort_on_overflow; 38int sysctl_tcp_abort_on_overflow __read_mostly;
39 39
40struct inet_timewait_death_row tcp_death_row = { 40struct inet_timewait_death_row tcp_death_row = {
41 .sysctl_max_tw_buckets = NR_FILE * 2, 41 .sysctl_max_tw_buckets = NR_FILE * 2,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b4f3ffe1b3b4..061edfae0c29 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -43,24 +43,24 @@
43#include <linux/smp_lock.h> 43#include <linux/smp_lock.h>
44 44
45/* People can turn this off for buggy TCP's found in printers etc. */ 45/* People can turn this off for buggy TCP's found in printers etc. */
46int sysctl_tcp_retrans_collapse = 1; 46int sysctl_tcp_retrans_collapse __read_mostly = 1;
47 47
48/* People can turn this on to work with those rare, broken TCPs that 48/* People can turn this on to work with those rare, broken TCPs that
49 * interpret the window field as a signed quantity. 49 * interpret the window field as a signed quantity.
50 */ 50 */
51int sysctl_tcp_workaround_signed_windows = 0; 51int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
52 52
53/* This limits the percentage of the congestion window which we 53/* This limits the percentage of the congestion window which we
54 * will allow a single TSO frame to consume. Building TSO frames 54 * will allow a single TSO frame to consume. Building TSO frames
55 * which are too large can cause TCP streams to be bursty. 55 * which are too large can cause TCP streams to be bursty.
56 */ 56 */
57int sysctl_tcp_tso_win_divisor = 3; 57int sysctl_tcp_tso_win_divisor __read_mostly = 3;
58 58
59int sysctl_tcp_mtu_probing = 0; 59int sysctl_tcp_mtu_probing __read_mostly = 0;
60int sysctl_tcp_base_mss = 512; 60int sysctl_tcp_base_mss __read_mostly = 512;
61 61
62/* By default, RFC2861 behavior. */ 62/* By default, RFC2861 behavior. */
63int sysctl_tcp_slow_start_after_idle = 1; 63int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
64 64
65static void update_send_head(struct sock *sk, struct tcp_sock *tp, 65static void update_send_head(struct sock *sk, struct tcp_sock *tp,
66 struct sk_buff *skb) 66 struct sk_buff *skb)
@@ -577,7 +577,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
577 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; 577 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
578 TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL; 578 TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
579 579
580 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) { 580 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
581 /* Copy and checksum data tail into the new buffer. */ 581 /* Copy and checksum data tail into the new buffer. */
582 buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize), 582 buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize),
583 nsize, 0); 583 nsize, 0);
@@ -586,7 +586,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
586 586
587 skb->csum = csum_block_sub(skb->csum, buff->csum, len); 587 skb->csum = csum_block_sub(skb->csum, buff->csum, len);
588 } else { 588 } else {
589 skb->ip_summed = CHECKSUM_HW; 589 skb->ip_summed = CHECKSUM_PARTIAL;
590 skb_split(skb, buff, len); 590 skb_split(skb, buff, len);
591 } 591 }
592 592
@@ -689,7 +689,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
689 __pskb_trim_head(skb, len - skb_headlen(skb)); 689 __pskb_trim_head(skb, len - skb_headlen(skb));
690 690
691 TCP_SKB_CB(skb)->seq += len; 691 TCP_SKB_CB(skb)->seq += len;
692 skb->ip_summed = CHECKSUM_HW; 692 skb->ip_summed = CHECKSUM_PARTIAL;
693 693
694 skb->truesize -= len; 694 skb->truesize -= len;
695 sk->sk_wmem_queued -= len; 695 sk->sk_wmem_queued -= len;
@@ -1062,7 +1062,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1062 /* This packet was never sent out yet, so no SACK bits. */ 1062 /* This packet was never sent out yet, so no SACK bits. */
1063 TCP_SKB_CB(buff)->sacked = 0; 1063 TCP_SKB_CB(buff)->sacked = 0;
1064 1064
1065 buff->ip_summed = skb->ip_summed = CHECKSUM_HW; 1065 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
1066 skb_split(skb, buff, len); 1066 skb_split(skb, buff, len);
1067 1067
1068 /* Fix up tso_factor for both original and new SKB. */ 1068 /* Fix up tso_factor for both original and new SKB. */
@@ -1206,8 +1206,7 @@ static int tcp_mtu_probe(struct sock *sk)
1206 TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK; 1206 TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK;
1207 TCP_SKB_CB(nskb)->sacked = 0; 1207 TCP_SKB_CB(nskb)->sacked = 0;
1208 nskb->csum = 0; 1208 nskb->csum = 0;
1209 if (skb->ip_summed == CHECKSUM_HW) 1209 nskb->ip_summed = skb->ip_summed;
1210 nskb->ip_summed = CHECKSUM_HW;
1211 1210
1212 len = 0; 1211 len = 0;
1213 while (len < probe_size) { 1212 while (len < probe_size) {
@@ -1231,7 +1230,7 @@ static int tcp_mtu_probe(struct sock *sk)
1231 ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); 1230 ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
1232 if (!skb_shinfo(skb)->nr_frags) { 1231 if (!skb_shinfo(skb)->nr_frags) {
1233 skb_pull(skb, copy); 1232 skb_pull(skb, copy);
1234 if (skb->ip_summed != CHECKSUM_HW) 1233 if (skb->ip_summed != CHECKSUM_PARTIAL)
1235 skb->csum = csum_partial(skb->data, skb->len, 0); 1234 skb->csum = csum_partial(skb->data, skb->len, 0);
1236 } else { 1235 } else {
1237 __pskb_trim_head(skb, copy); 1236 __pskb_trim_head(skb, copy);
@@ -1572,10 +1571,9 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
1572 1571
1573 memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); 1572 memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
1574 1573
1575 if (next_skb->ip_summed == CHECKSUM_HW) 1574 skb->ip_summed = next_skb->ip_summed;
1576 skb->ip_summed = CHECKSUM_HW;
1577 1575
1578 if (skb->ip_summed != CHECKSUM_HW) 1576 if (skb->ip_summed != CHECKSUM_PARTIAL)
1579 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size); 1577 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
1580 1578
1581 /* Update sequence range on original skb. */ 1579 /* Update sequence range on original skb. */
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 7c1bde3cd6cb..fb09ade5897b 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -23,14 +23,14 @@
23#include <linux/module.h> 23#include <linux/module.h>
24#include <net/tcp.h> 24#include <net/tcp.h>
25 25
26int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; 26int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES;
27int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; 27int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES;
28int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; 28int sysctl_tcp_keepalive_time __read_mostly = TCP_KEEPALIVE_TIME;
29int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; 29int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES;
30int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL; 30int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL;
31int sysctl_tcp_retries1 = TCP_RETR1; 31int sysctl_tcp_retries1 __read_mostly = TCP_RETR1;
32int sysctl_tcp_retries2 = TCP_RETR2; 32int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
33int sysctl_tcp_orphan_retries; 33int sysctl_tcp_orphan_retries __read_mostly;
34 34
35static void tcp_write_timer(unsigned long); 35static void tcp_write_timer(unsigned long);
36static void tcp_delack_timer(unsigned long); 36static void tcp_delack_timer(unsigned long);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 490360b5b4bf..a3b7aa015a2f 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -370,7 +370,7 @@ static struct tcp_congestion_ops tcp_vegas = {
370 370
371static int __init tcp_vegas_register(void) 371static int __init tcp_vegas_register(void)
372{ 372{
373 BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE); 373 BUILD_BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE);
374 tcp_register_congestion_control(&tcp_vegas); 374 tcp_register_congestion_control(&tcp_vegas);
375 return 0; 375 return 0;
376} 376}
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 11b42a7135c1..ce57bf302f6c 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -9,7 +9,6 @@
9 * See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf 9 * See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
10 */ 10 */
11 11
12#include <linux/config.h>
13#include <linux/mm.h> 12#include <linux/mm.h>
14#include <linux/module.h> 13#include <linux/module.h>
15#include <linux/skbuff.h> 14#include <linux/skbuff.h>
@@ -213,7 +212,7 @@ static struct tcp_congestion_ops tcp_veno = {
213 212
214static int __init tcp_veno_register(void) 213static int __init tcp_veno_register(void)
215{ 214{
216 BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE); 215 BUILD_BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE);
217 tcp_register_congestion_control(&tcp_veno); 216 tcp_register_congestion_control(&tcp_veno);
218 return 0; 217 return 0;
219} 218}
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 5446312ffd2a..4f42a86c77f3 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -289,7 +289,7 @@ static struct tcp_congestion_ops tcp_westwood = {
289 289
290static int __init tcp_westwood_register(void) 290static int __init tcp_westwood_register(void)
291{ 291{
292 BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE); 292 BUILD_BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE);
293 return tcp_register_congestion_control(&tcp_westwood); 293 return tcp_register_congestion_control(&tcp_westwood);
294} 294}
295 295
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f136cec96d95..77e265d7bb8f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -118,14 +118,33 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly;
118struct hlist_head udp_hash[UDP_HTABLE_SIZE]; 118struct hlist_head udp_hash[UDP_HTABLE_SIZE];
119DEFINE_RWLOCK(udp_hash_lock); 119DEFINE_RWLOCK(udp_hash_lock);
120 120
121/* Shared by v4/v6 udp. */ 121static int udp_port_rover;
122int udp_port_rover;
123 122
124static int udp_v4_get_port(struct sock *sk, unsigned short snum) 123static inline int udp_lport_inuse(u16 num)
124{
125 struct sock *sk;
126 struct hlist_node *node;
127
128 sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)])
129 if (inet_sk(sk)->num == num)
130 return 1;
131 return 0;
132}
133
134/**
135 * udp_get_port - common port lookup for IPv4 and IPv6
136 *
137 * @sk: socket struct in question
138 * @snum: port number to look up
139 * @saddr_comp: AF-dependent comparison of bound local IP addresses
140 */
141int udp_get_port(struct sock *sk, unsigned short snum,
142 int (*saddr_cmp)(const struct sock *sk1, const struct sock *sk2))
125{ 143{
126 struct hlist_node *node; 144 struct hlist_node *node;
145 struct hlist_head *head;
127 struct sock *sk2; 146 struct sock *sk2;
128 struct inet_sock *inet = inet_sk(sk); 147 int error = 1;
129 148
130 write_lock_bh(&udp_hash_lock); 149 write_lock_bh(&udp_hash_lock);
131 if (snum == 0) { 150 if (snum == 0) {
@@ -137,11 +156,10 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum)
137 best_size_so_far = 32767; 156 best_size_so_far = 32767;
138 best = result = udp_port_rover; 157 best = result = udp_port_rover;
139 for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { 158 for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
140 struct hlist_head *list;
141 int size; 159 int size;
142 160
143 list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; 161 head = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
144 if (hlist_empty(list)) { 162 if (hlist_empty(head)) {
145 if (result > sysctl_local_port_range[1]) 163 if (result > sysctl_local_port_range[1])
146 result = sysctl_local_port_range[0] + 164 result = sysctl_local_port_range[0] +
147 ((result - sysctl_local_port_range[0]) & 165 ((result - sysctl_local_port_range[0]) &
@@ -149,12 +167,11 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum)
149 goto gotit; 167 goto gotit;
150 } 168 }
151 size = 0; 169 size = 0;
152 sk_for_each(sk2, node, list) 170 sk_for_each(sk2, node, head)
153 if (++size >= best_size_so_far) 171 if (++size < best_size_so_far) {
154 goto next; 172 best_size_so_far = size;
155 best_size_so_far = size; 173 best = result;
156 best = result; 174 }
157 next:;
158 } 175 }
159 result = best; 176 result = best;
160 for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) { 177 for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
@@ -170,38 +187,44 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum)
170gotit: 187gotit:
171 udp_port_rover = snum = result; 188 udp_port_rover = snum = result;
172 } else { 189 } else {
173 sk_for_each(sk2, node, 190 head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
174 &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) { 191
175 struct inet_sock *inet2 = inet_sk(sk2); 192 sk_for_each(sk2, node, head)
176 193 if (inet_sk(sk2)->num == snum &&
177 if (inet2->num == snum && 194 sk2 != sk &&
178 sk2 != sk && 195 (!sk2->sk_reuse || !sk->sk_reuse) &&
179 !ipv6_only_sock(sk2) && 196 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
180 (!sk2->sk_bound_dev_if || 197 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
181 !sk->sk_bound_dev_if || 198 (*saddr_cmp)(sk, sk2) )
182 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
183 (!inet2->rcv_saddr ||
184 !inet->rcv_saddr ||
185 inet2->rcv_saddr == inet->rcv_saddr) &&
186 (!sk2->sk_reuse || !sk->sk_reuse))
187 goto fail; 199 goto fail;
188 }
189 } 200 }
190 inet->num = snum; 201 inet_sk(sk)->num = snum;
191 if (sk_unhashed(sk)) { 202 if (sk_unhashed(sk)) {
192 struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; 203 head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
193 204 sk_add_node(sk, head);
194 sk_add_node(sk, h);
195 sock_prot_inc_use(sk->sk_prot); 205 sock_prot_inc_use(sk->sk_prot);
196 } 206 }
197 write_unlock_bh(&udp_hash_lock); 207 error = 0;
198 return 0;
199
200fail: 208fail:
201 write_unlock_bh(&udp_hash_lock); 209 write_unlock_bh(&udp_hash_lock);
202 return 1; 210 return error;
211}
212
213static inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
214{
215 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
216
217 return ( !ipv6_only_sock(sk2) &&
218 (!inet1->rcv_saddr || !inet2->rcv_saddr ||
219 inet1->rcv_saddr == inet2->rcv_saddr ));
203} 220}
204 221
222static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
223{
224 return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
225}
226
227
205static void udp_v4_hash(struct sock *sk) 228static void udp_v4_hash(struct sock *sk)
206{ 229{
207 BUG(); 230 BUG();
@@ -429,7 +452,7 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up)
429 /* 452 /*
430 * Only one fragment on the socket. 453 * Only one fragment on the socket.
431 */ 454 */
432 if (skb->ip_summed == CHECKSUM_HW) { 455 if (skb->ip_summed == CHECKSUM_PARTIAL) {
433 skb->csum = offsetof(struct udphdr, check); 456 skb->csum = offsetof(struct udphdr, check);
434 uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, 457 uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst,
435 up->len, IPPROTO_UDP, 0); 458 up->len, IPPROTO_UDP, 0);
@@ -448,7 +471,7 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up)
448 * fragments on the socket so that all csums of sk_buffs 471 * fragments on the socket so that all csums of sk_buffs
449 * should be together. 472 * should be together.
450 */ 473 */
451 if (skb->ip_summed == CHECKSUM_HW) { 474 if (skb->ip_summed == CHECKSUM_PARTIAL) {
452 int offset = (unsigned char *)uh - skb->data; 475 int offset = (unsigned char *)uh - skb->data;
453 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 476 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
454 477
@@ -603,6 +626,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
603 .uli_u = { .ports = 626 .uli_u = { .ports =
604 { .sport = inet->sport, 627 { .sport = inet->sport,
605 .dport = dport } } }; 628 .dport = dport } } };
629 security_sk_classify_flow(sk, &fl);
606 err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); 630 err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
607 if (err) 631 if (err)
608 goto out; 632 goto out;
@@ -661,6 +685,16 @@ out:
661 UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); 685 UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS);
662 return len; 686 return len;
663 } 687 }
688 /*
689 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
690 * ENOBUFS might not be good (it's not tunable per se), but otherwise
691 * we don't have a good statistic (IpOutDiscards but it can be too many
692 * things). We could add another new stat but at least for now that
693 * seems like overkill.
694 */
695 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
696 UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS);
697 }
664 return err; 698 return err;
665 699
666do_confirm: 700do_confirm:
@@ -980,6 +1014,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
980static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) 1014static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
981{ 1015{
982 struct udp_sock *up = udp_sk(sk); 1016 struct udp_sock *up = udp_sk(sk);
1017 int rc;
983 1018
984 /* 1019 /*
985 * Charge it to the socket, dropping if the queue is full. 1020 * Charge it to the socket, dropping if the queue is full.
@@ -1026,7 +1061,10 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
1026 skb->ip_summed = CHECKSUM_UNNECESSARY; 1061 skb->ip_summed = CHECKSUM_UNNECESSARY;
1027 } 1062 }
1028 1063
1029 if (sock_queue_rcv_skb(sk,skb)<0) { 1064 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
1065 /* Note that an ENOMEM error is charged twice */
1066 if (rc == -ENOMEM)
1067 UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS);
1030 UDP_INC_STATS_BH(UDP_MIB_INERRORS); 1068 UDP_INC_STATS_BH(UDP_MIB_INERRORS);
1031 kfree_skb(skb); 1069 kfree_skb(skb);
1032 return -1; 1070 return -1;
@@ -1087,7 +1125,7 @@ static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
1087{ 1125{
1088 if (uh->check == 0) { 1126 if (uh->check == 0) {
1089 skb->ip_summed = CHECKSUM_UNNECESSARY; 1127 skb->ip_summed = CHECKSUM_UNNECESSARY;
1090 } else if (skb->ip_summed == CHECKSUM_HW) { 1128 } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
1091 if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) 1129 if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
1092 skb->ip_summed = CHECKSUM_UNNECESSARY; 1130 skb->ip_summed = CHECKSUM_UNNECESSARY;
1093 } 1131 }
@@ -1581,7 +1619,7 @@ EXPORT_SYMBOL(udp_disconnect);
1581EXPORT_SYMBOL(udp_hash); 1619EXPORT_SYMBOL(udp_hash);
1582EXPORT_SYMBOL(udp_hash_lock); 1620EXPORT_SYMBOL(udp_hash_lock);
1583EXPORT_SYMBOL(udp_ioctl); 1621EXPORT_SYMBOL(udp_ioctl);
1584EXPORT_SYMBOL(udp_port_rover); 1622EXPORT_SYMBOL(udp_get_port);
1585EXPORT_SYMBOL(udp_prot); 1623EXPORT_SYMBOL(udp_prot);
1586EXPORT_SYMBOL(udp_sendmsg); 1624EXPORT_SYMBOL(udp_sendmsg);
1587EXPORT_SYMBOL(udp_poll); 1625EXPORT_SYMBOL(udp_poll);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 817ed84511a6..040e8475f295 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -106,7 +106,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
106 if (x->mode->input(x, skb)) 106 if (x->mode->input(x, skb))
107 goto drop; 107 goto drop;
108 108
109 if (x->props.mode) { 109 if (x->props.mode == XFRM_MODE_TUNNEL) {
110 decaps = 1; 110 decaps = 1;
111 break; 111 break;
112 } 112 }
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
index a9e6b3dd19c9..92676b7e4034 100644
--- a/net/ipv4/xfrm4_mode_transport.c
+++ b/net/ipv4/xfrm4_mode_transport.c
@@ -21,9 +21,8 @@
21 * On exit, skb->h will be set to the start of the payload to be processed 21 * On exit, skb->h will be set to the start of the payload to be processed
22 * by x->type->output and skb->nh will be set to the top IP header. 22 * by x->type->output and skb->nh will be set to the top IP header.
23 */ 23 */
24static int xfrm4_transport_output(struct sk_buff *skb) 24static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
25{ 25{
26 struct xfrm_state *x;
27 struct iphdr *iph; 26 struct iphdr *iph;
28 int ihl; 27 int ihl;
29 28
@@ -33,7 +32,6 @@ static int xfrm4_transport_output(struct sk_buff *skb)
33 ihl = iph->ihl * 4; 32 ihl = iph->ihl * 4;
34 skb->h.raw += ihl; 33 skb->h.raw += ihl;
35 34
36 x = skb->dst->xfrm;
37 skb->nh.raw = memmove(skb_push(skb, x->props.header_len), iph, ihl); 35 skb->nh.raw = memmove(skb_push(skb, x->props.header_len), iph, ihl);
38 return 0; 36 return 0;
39} 37}
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 13cafbe56ce3..e23c21d31a53 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -33,10 +33,9 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
33 * On exit, skb->h will be set to the start of the payload to be processed 33 * On exit, skb->h will be set to the start of the payload to be processed
34 * by x->type->output and skb->nh will be set to the top IP header. 34 * by x->type->output and skb->nh will be set to the top IP header.
35 */ 35 */
36static int xfrm4_tunnel_output(struct sk_buff *skb) 36static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
37{ 37{
38 struct dst_entry *dst = skb->dst; 38 struct dst_entry *dst = skb->dst;
39 struct xfrm_state *x = dst->xfrm;
40 struct iphdr *iph, *top_iph; 39 struct iphdr *iph, *top_iph;
41 int flags; 40 int flags;
42 41
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index d16f863cf687..04403fb01a58 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -48,13 +48,13 @@ static int xfrm4_output_one(struct sk_buff *skb)
48 struct xfrm_state *x = dst->xfrm; 48 struct xfrm_state *x = dst->xfrm;
49 int err; 49 int err;
50 50
51 if (skb->ip_summed == CHECKSUM_HW) { 51 if (skb->ip_summed == CHECKSUM_PARTIAL) {
52 err = skb_checksum_help(skb, 0); 52 err = skb_checksum_help(skb);
53 if (err) 53 if (err)
54 goto error_nolock; 54 goto error_nolock;
55 } 55 }
56 56
57 if (x->props.mode) { 57 if (x->props.mode == XFRM_MODE_TUNNEL) {
58 err = xfrm4_tunnel_check_size(skb); 58 err = xfrm4_tunnel_check_size(skb);
59 if (err) 59 if (err)
60 goto error_nolock; 60 goto error_nolock;
@@ -66,7 +66,7 @@ static int xfrm4_output_one(struct sk_buff *skb)
66 if (err) 66 if (err)
67 goto error; 67 goto error;
68 68
69 err = x->mode->output(skb); 69 err = x->mode->output(x, skb);
70 if (err) 70 if (err)
71 goto error; 71 goto error;
72 72
@@ -85,7 +85,7 @@ static int xfrm4_output_one(struct sk_buff *skb)
85 } 85 }
86 dst = skb->dst; 86 dst = skb->dst;
87 x = dst->xfrm; 87 x = dst->xfrm;
88 } while (x && !x->props.mode); 88 } while (x && (x->props.mode != XFRM_MODE_TUNNEL));
89 89
90 IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; 90 IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
91 err = 0; 91 err = 0;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 8f50eae47d03..eabcd27b1767 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -21,6 +21,25 @@ static int xfrm4_dst_lookup(struct xfrm_dst **dst, struct flowi *fl)
21 return __ip_route_output_key((struct rtable**)dst, fl); 21 return __ip_route_output_key((struct rtable**)dst, fl);
22} 22}
23 23
24static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
25{
26 struct rtable *rt;
27 struct flowi fl_tunnel = {
28 .nl_u = {
29 .ip4_u = {
30 .daddr = daddr->a4,
31 },
32 },
33 };
34
35 if (!xfrm4_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) {
36 saddr->a4 = rt->rt_src;
37 dst_release(&rt->u.dst);
38 return 0;
39 }
40 return -EHOSTUNREACH;
41}
42
24static struct dst_entry * 43static struct dst_entry *
25__xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) 44__xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
26{ 45{
@@ -33,7 +52,7 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
33 xdst->u.rt.fl.fl4_dst == fl->fl4_dst && 52 xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
34 xdst->u.rt.fl.fl4_src == fl->fl4_src && 53 xdst->u.rt.fl.fl4_src == fl->fl4_src &&
35 xdst->u.rt.fl.fl4_tos == fl->fl4_tos && 54 xdst->u.rt.fl.fl4_tos == fl->fl4_tos &&
36 xfrm_bundle_ok(xdst, fl, AF_INET)) { 55 xfrm_bundle_ok(xdst, fl, AF_INET, 0)) {
37 dst_clone(dst); 56 dst_clone(dst);
38 break; 57 break;
39 } 58 }
@@ -93,10 +112,11 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
93 112
94 xdst = (struct xfrm_dst *)dst1; 113 xdst = (struct xfrm_dst *)dst1;
95 xdst->route = &rt->u.dst; 114 xdst->route = &rt->u.dst;
115 xdst->genid = xfrm[i]->genid;
96 116
97 dst1->next = dst_prev; 117 dst1->next = dst_prev;
98 dst_prev = dst1; 118 dst_prev = dst1;
99 if (xfrm[i]->props.mode) { 119 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
100 remote = xfrm[i]->id.daddr.a4; 120 remote = xfrm[i]->id.daddr.a4;
101 local = xfrm[i]->props.saddr.a4; 121 local = xfrm[i]->props.saddr.a4;
102 tunnel = 1; 122 tunnel = 1;
@@ -135,6 +155,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
135 dst_prev->flags |= DST_HOST; 155 dst_prev->flags |= DST_HOST;
136 dst_prev->lastuse = jiffies; 156 dst_prev->lastuse = jiffies;
137 dst_prev->header_len = header_len; 157 dst_prev->header_len = header_len;
158 dst_prev->nfheader_len = 0;
138 dst_prev->trailer_len = trailer_len; 159 dst_prev->trailer_len = trailer_len;
139 memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics)); 160 memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
140 161
@@ -296,6 +317,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
296 .family = AF_INET, 317 .family = AF_INET,
297 .dst_ops = &xfrm4_dst_ops, 318 .dst_ops = &xfrm4_dst_ops,
298 .dst_lookup = xfrm4_dst_lookup, 319 .dst_lookup = xfrm4_dst_lookup,
320 .get_saddr = xfrm4_get_saddr,
299 .find_bundle = __xfrm4_find_bundle, 321 .find_bundle = __xfrm4_find_bundle,
300 .bundle_create = __xfrm4_bundle_create, 322 .bundle_create = __xfrm4_bundle_create,
301 .decode_session = _decode_session4, 323 .decode_session = _decode_session4,
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 81e1751c966e..fe2034494d08 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -42,99 +42,15 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
42 x->props.saddr = tmpl->saddr; 42 x->props.saddr = tmpl->saddr;
43 if (x->props.saddr.a4 == 0) 43 if (x->props.saddr.a4 == 0)
44 x->props.saddr.a4 = saddr->a4; 44 x->props.saddr.a4 = saddr->a4;
45 if (tmpl->mode && x->props.saddr.a4 == 0) {
46 struct rtable *rt;
47 struct flowi fl_tunnel = {
48 .nl_u = {
49 .ip4_u = {
50 .daddr = x->id.daddr.a4,
51 }
52 }
53 };
54 if (!xfrm_dst_lookup((struct xfrm_dst **)&rt,
55 &fl_tunnel, AF_INET)) {
56 x->props.saddr.a4 = rt->rt_src;
57 dst_release(&rt->u.dst);
58 }
59 }
60 x->props.mode = tmpl->mode; 45 x->props.mode = tmpl->mode;
61 x->props.reqid = tmpl->reqid; 46 x->props.reqid = tmpl->reqid;
62 x->props.family = AF_INET; 47 x->props.family = AF_INET;
63} 48}
64 49
65static struct xfrm_state *
66__xfrm4_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto)
67{
68 unsigned h = __xfrm4_spi_hash(daddr, spi, proto);
69 struct xfrm_state *x;
70
71 list_for_each_entry(x, xfrm4_state_afinfo.state_byspi+h, byspi) {
72 if (x->props.family == AF_INET &&
73 spi == x->id.spi &&
74 daddr->a4 == x->id.daddr.a4 &&
75 proto == x->id.proto) {
76 xfrm_state_hold(x);
77 return x;
78 }
79 }
80 return NULL;
81}
82
83static struct xfrm_state *
84__xfrm4_find_acq(u8 mode, u32 reqid, u8 proto,
85 xfrm_address_t *daddr, xfrm_address_t *saddr,
86 int create)
87{
88 struct xfrm_state *x, *x0;
89 unsigned h = __xfrm4_dst_hash(daddr);
90
91 x0 = NULL;
92
93 list_for_each_entry(x, xfrm4_state_afinfo.state_bydst+h, bydst) {
94 if (x->props.family == AF_INET &&
95 daddr->a4 == x->id.daddr.a4 &&
96 mode == x->props.mode &&
97 proto == x->id.proto &&
98 saddr->a4 == x->props.saddr.a4 &&
99 reqid == x->props.reqid &&
100 x->km.state == XFRM_STATE_ACQ &&
101 !x->id.spi) {
102 x0 = x;
103 break;
104 }
105 }
106 if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) {
107 x0->sel.daddr.a4 = daddr->a4;
108 x0->sel.saddr.a4 = saddr->a4;
109 x0->sel.prefixlen_d = 32;
110 x0->sel.prefixlen_s = 32;
111 x0->props.saddr.a4 = saddr->a4;
112 x0->km.state = XFRM_STATE_ACQ;
113 x0->id.daddr.a4 = daddr->a4;
114 x0->id.proto = proto;
115 x0->props.family = AF_INET;
116 x0->props.mode = mode;
117 x0->props.reqid = reqid;
118 x0->props.family = AF_INET;
119 x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
120 xfrm_state_hold(x0);
121 x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
122 add_timer(&x0->timer);
123 xfrm_state_hold(x0);
124 list_add_tail(&x0->bydst, xfrm4_state_afinfo.state_bydst+h);
125 wake_up(&km_waitq);
126 }
127 if (x0)
128 xfrm_state_hold(x0);
129 return x0;
130}
131
132static struct xfrm_state_afinfo xfrm4_state_afinfo = { 50static struct xfrm_state_afinfo xfrm4_state_afinfo = {
133 .family = AF_INET, 51 .family = AF_INET,
134 .init_flags = xfrm4_init_flags, 52 .init_flags = xfrm4_init_flags,
135 .init_tempsel = __xfrm4_init_tempsel, 53 .init_tempsel = __xfrm4_init_tempsel,
136 .state_lookup = __xfrm4_state_lookup,
137 .find_acq = __xfrm4_find_acq,
138}; 54};
139 55
140void __init xfrm4_state_init(void) 56void __init xfrm4_state_init(void)
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index f8ceaa127c83..f110af5b1319 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -28,7 +28,7 @@ static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb)
28 28
29static int ipip_init_state(struct xfrm_state *x) 29static int ipip_init_state(struct xfrm_state *x)
30{ 30{
31 if (!x->props.mode) 31 if (x->props.mode != XFRM_MODE_TUNNEL)
32 return -EINVAL; 32 return -EINVAL;
33 33
34 if (x->encap) 34 if (x->encap)
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 0ba06c0c5d39..a2d211da2aba 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -98,6 +98,15 @@ config INET6_IPCOMP
98 98
99 If unsure, say Y. 99 If unsure, say Y.
100 100
101config IPV6_MIP6
102 bool "IPv6: Mobility (EXPERIMENTAL)"
103 depends on IPV6 && EXPERIMENTAL
104 select XFRM
105 ---help---
106 Support for IPv6 Mobility described in RFC 3775.
107
108 If unsure, say N.
109
101config INET6_XFRM_TUNNEL 110config INET6_XFRM_TUNNEL
102 tristate 111 tristate
103 select INET6_TUNNEL 112 select INET6_TUNNEL
@@ -127,6 +136,13 @@ config INET6_XFRM_MODE_TUNNEL
127 136
128 If unsure, say Y. 137 If unsure, say Y.
129 138
139config INET6_XFRM_MODE_ROUTEOPTIMIZATION
140 tristate "IPv6: MIPv6 route optimization mode (EXPERIMENTAL)"
141 depends on IPV6 && EXPERIMENTAL
142 select XFRM
143 ---help---
144 Support for MIPv6 route optimization mode.
145
130config IPV6_TUNNEL 146config IPV6_TUNNEL
131 tristate "IPv6: IPv6-in-IPv6 tunnel" 147 tristate "IPv6: IPv6-in-IPv6 tunnel"
132 select INET6_TUNNEL 148 select INET6_TUNNEL
@@ -136,3 +152,31 @@ config IPV6_TUNNEL
136 152
137 If unsure, say N. 153 If unsure, say N.
138 154
155config IPV6_SUBTREES
156 bool "IPv6: source address based routing"
157 depends on IPV6 && EXPERIMENTAL
158 ---help---
159 Enable routing by source address or prefix.
160
161 The destination address is still the primary routing key, so mixing
162 normal and source prefix specific routes in the same routing table
163 may sometimes lead to unintended routing behavior. This can be
164 avoided by defining different routing tables for the normal and
165 source prefix specific routes.
166
167 If unsure, say N.
168
169config IPV6_MULTIPLE_TABLES
170 bool "IPv6: Multiple Routing Tables"
171 depends on IPV6 && EXPERIMENTAL
172 select FIB_RULES
173 ---help---
174 Support multiple routing tables.
175
176config IPV6_ROUTE_FWMARK
177 bool "IPv6: use netfilter MARK value as routing key"
178 depends on IPV6_MULTIPLE_TABLES && NETFILTER
179 ---help---
180 If you say Y here, you will be able to specify different routes for
181 packets with different mark values (see iptables(8), MARK target).
182
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 386e0a626948..0213c6612b58 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -13,6 +13,9 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \
13ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ 13ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
14 xfrm6_output.o 14 xfrm6_output.o
15ipv6-$(CONFIG_NETFILTER) += netfilter.o 15ipv6-$(CONFIG_NETFILTER) += netfilter.o
16ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
17ipv6-$(CONFIG_IPV6_MIP6) += mip6.o
18
16ipv6-objs += $(ipv6-y) 19ipv6-objs += $(ipv6-y)
17 20
18obj-$(CONFIG_INET6_AH) += ah6.o 21obj-$(CONFIG_INET6_AH) += ah6.o
@@ -22,6 +25,7 @@ obj-$(CONFIG_INET6_XFRM_TUNNEL) += xfrm6_tunnel.o
22obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o 25obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o
23obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o 26obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o
24obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o 27obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
28obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
25obj-$(CONFIG_NETFILTER) += netfilter/ 29obj-$(CONFIG_NETFILTER) += netfilter/
26 30
27obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o 31obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c7852b38e03e..c18676352397 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -48,6 +48,7 @@
48#include <linux/net.h> 48#include <linux/net.h>
49#include <linux/in6.h> 49#include <linux/in6.h>
50#include <linux/netdevice.h> 50#include <linux/netdevice.h>
51#include <linux/if_addr.h>
51#include <linux/if_arp.h> 52#include <linux/if_arp.h>
52#include <linux/if_arcnet.h> 53#include <linux/if_arcnet.h>
53#include <linux/if_infiniband.h> 54#include <linux/if_infiniband.h>
@@ -72,6 +73,7 @@
72#include <net/addrconf.h> 73#include <net/addrconf.h>
73#include <net/tcp.h> 74#include <net/tcp.h>
74#include <net/ip.h> 75#include <net/ip.h>
76#include <net/netlink.h>
75#include <linux/if_tunnel.h> 77#include <linux/if_tunnel.h>
76#include <linux/rtnetlink.h> 78#include <linux/rtnetlink.h>
77 79
@@ -117,9 +119,6 @@ static int ipv6_count_addresses(struct inet6_dev *idev);
117static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE]; 119static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE];
118static DEFINE_RWLOCK(addrconf_hash_lock); 120static DEFINE_RWLOCK(addrconf_hash_lock);
119 121
120/* Protects inet6 devices */
121DEFINE_RWLOCK(addrconf_lock);
122
123static void addrconf_verify(unsigned long); 122static void addrconf_verify(unsigned long);
124 123
125static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0); 124static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0);
@@ -144,7 +143,7 @@ static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *de
144 143
145static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); 144static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
146 145
147struct ipv6_devconf ipv6_devconf = { 146struct ipv6_devconf ipv6_devconf __read_mostly = {
148 .forwarding = 0, 147 .forwarding = 0,
149 .hop_limit = IPV6_DEFAULT_HOPLIMIT, 148 .hop_limit = IPV6_DEFAULT_HOPLIMIT,
150 .mtu6 = IPV6_MIN_MTU, 149 .mtu6 = IPV6_MIN_MTU,
@@ -173,9 +172,10 @@ struct ipv6_devconf ipv6_devconf = {
173 .accept_ra_rt_info_max_plen = 0, 172 .accept_ra_rt_info_max_plen = 0,
174#endif 173#endif
175#endif 174#endif
175 .proxy_ndp = 0,
176}; 176};
177 177
178static struct ipv6_devconf ipv6_devconf_dflt = { 178static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
179 .forwarding = 0, 179 .forwarding = 0,
180 .hop_limit = IPV6_DEFAULT_HOPLIMIT, 180 .hop_limit = IPV6_DEFAULT_HOPLIMIT,
181 .mtu6 = IPV6_MIN_MTU, 181 .mtu6 = IPV6_MIN_MTU,
@@ -203,6 +203,7 @@ static struct ipv6_devconf ipv6_devconf_dflt = {
203 .accept_ra_rt_info_max_plen = 0, 203 .accept_ra_rt_info_max_plen = 0,
204#endif 204#endif
205#endif 205#endif
206 .proxy_ndp = 0,
206}; 207};
207 208
208/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ 209/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
@@ -314,6 +315,12 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
314 315
315/* Nobody refers to this device, we may destroy it. */ 316/* Nobody refers to this device, we may destroy it. */
316 317
318static void in6_dev_finish_destroy_rcu(struct rcu_head *head)
319{
320 struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu);
321 kfree(idev);
322}
323
317void in6_dev_finish_destroy(struct inet6_dev *idev) 324void in6_dev_finish_destroy(struct inet6_dev *idev)
318{ 325{
319 struct net_device *dev = idev->dev; 326 struct net_device *dev = idev->dev;
@@ -328,7 +335,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
328 return; 335 return;
329 } 336 }
330 snmp6_free_dev(idev); 337 snmp6_free_dev(idev);
331 kfree(idev); 338 call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu);
332} 339}
333 340
334static struct inet6_dev * ipv6_add_dev(struct net_device *dev) 341static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
@@ -404,9 +411,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
404 if (netif_carrier_ok(dev)) 411 if (netif_carrier_ok(dev))
405 ndev->if_flags |= IF_READY; 412 ndev->if_flags |= IF_READY;
406 413
407 write_lock_bh(&addrconf_lock); 414 /* protected by rtnl_lock */
408 dev->ip6_ptr = ndev; 415 rcu_assign_pointer(dev->ip6_ptr, ndev);
409 write_unlock_bh(&addrconf_lock);
410 416
411 ipv6_mc_init_dev(ndev); 417 ipv6_mc_init_dev(ndev);
412 ndev->tstamp = jiffies; 418 ndev->tstamp = jiffies;
@@ -470,7 +476,7 @@ static void addrconf_forward_change(void)
470 476
471 read_lock(&dev_base_lock); 477 read_lock(&dev_base_lock);
472 for (dev=dev_base; dev; dev=dev->next) { 478 for (dev=dev_base; dev; dev=dev->next) {
473 read_lock(&addrconf_lock); 479 rcu_read_lock();
474 idev = __in6_dev_get(dev); 480 idev = __in6_dev_get(dev);
475 if (idev) { 481 if (idev) {
476 int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding); 482 int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding);
@@ -478,7 +484,7 @@ static void addrconf_forward_change(void)
478 if (changed) 484 if (changed)
479 dev_forward_change(idev); 485 dev_forward_change(idev);
480 } 486 }
481 read_unlock(&addrconf_lock); 487 rcu_read_unlock();
482 } 488 }
483 read_unlock(&dev_base_lock); 489 read_unlock(&dev_base_lock);
484} 490}
@@ -539,7 +545,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
539 int hash; 545 int hash;
540 int err = 0; 546 int err = 0;
541 547
542 read_lock_bh(&addrconf_lock); 548 rcu_read_lock_bh();
543 if (idev->dead) { 549 if (idev->dead) {
544 err = -ENODEV; /*XXX*/ 550 err = -ENODEV; /*XXX*/
545 goto out2; 551 goto out2;
@@ -608,7 +614,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
608 in6_ifa_hold(ifa); 614 in6_ifa_hold(ifa);
609 write_unlock(&idev->lock); 615 write_unlock(&idev->lock);
610out2: 616out2:
611 read_unlock_bh(&addrconf_lock); 617 rcu_read_unlock_bh();
612 618
613 if (likely(err == 0)) 619 if (likely(err == 0))
614 atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa); 620 atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa);
@@ -734,7 +740,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
734 740
735 if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { 741 if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
736 if (onlink == 0) { 742 if (onlink == 0) {
737 ip6_del_rt(rt, NULL, NULL, NULL); 743 ip6_del_rt(rt);
738 rt = NULL; 744 rt = NULL;
739 } else if (!(rt->rt6i_flags & RTF_EXPIRES)) { 745 } else if (!(rt->rt6i_flags & RTF_EXPIRES)) {
740 rt->rt6i_expires = expires; 746 rt->rt6i_expires = expires;
@@ -911,7 +917,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
911 memset(&hiscore, 0, sizeof(hiscore)); 917 memset(&hiscore, 0, sizeof(hiscore));
912 918
913 read_lock(&dev_base_lock); 919 read_lock(&dev_base_lock);
914 read_lock(&addrconf_lock); 920 rcu_read_lock();
915 921
916 for (dev = dev_base; dev; dev=dev->next) { 922 for (dev = dev_base; dev; dev=dev->next) {
917 struct inet6_dev *idev; 923 struct inet6_dev *idev;
@@ -1032,9 +1038,27 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
1032 continue; 1038 continue;
1033 } 1039 }
1034 1040
1035 /* Rule 4: Prefer home address -- not implemented yet */ 1041 /* Rule 4: Prefer home address */
1042#ifdef CONFIG_IPV6_MIP6
1043 if (hiscore.rule < 4) {
1044 if (ifa_result->flags & IFA_F_HOMEADDRESS)
1045 hiscore.attrs |= IPV6_SADDR_SCORE_HOA;
1046 hiscore.rule++;
1047 }
1048 if (ifa->flags & IFA_F_HOMEADDRESS) {
1049 score.attrs |= IPV6_SADDR_SCORE_HOA;
1050 if (!(ifa_result->flags & IFA_F_HOMEADDRESS)) {
1051 score.rule = 4;
1052 goto record_it;
1053 }
1054 } else {
1055 if (hiscore.attrs & IPV6_SADDR_SCORE_HOA)
1056 continue;
1057 }
1058#else
1036 if (hiscore.rule < 4) 1059 if (hiscore.rule < 4)
1037 hiscore.rule++; 1060 hiscore.rule++;
1061#endif
1038 1062
1039 /* Rule 5: Prefer outgoing interface */ 1063 /* Rule 5: Prefer outgoing interface */
1040 if (hiscore.rule < 5) { 1064 if (hiscore.rule < 5) {
@@ -1123,7 +1147,7 @@ record_it:
1123 } 1147 }
1124 read_unlock_bh(&idev->lock); 1148 read_unlock_bh(&idev->lock);
1125 } 1149 }
1126 read_unlock(&addrconf_lock); 1150 rcu_read_unlock();
1127 read_unlock(&dev_base_lock); 1151 read_unlock(&dev_base_lock);
1128 1152
1129 if (!ifa_result) 1153 if (!ifa_result)
@@ -1147,7 +1171,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
1147 struct inet6_dev *idev; 1171 struct inet6_dev *idev;
1148 int err = -EADDRNOTAVAIL; 1172 int err = -EADDRNOTAVAIL;
1149 1173
1150 read_lock(&addrconf_lock); 1174 rcu_read_lock();
1151 if ((idev = __in6_dev_get(dev)) != NULL) { 1175 if ((idev = __in6_dev_get(dev)) != NULL) {
1152 struct inet6_ifaddr *ifp; 1176 struct inet6_ifaddr *ifp;
1153 1177
@@ -1161,7 +1185,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
1161 } 1185 }
1162 read_unlock_bh(&idev->lock); 1186 read_unlock_bh(&idev->lock);
1163 } 1187 }
1164 read_unlock(&addrconf_lock); 1188 rcu_read_unlock();
1165 return err; 1189 return err;
1166} 1190}
1167 1191
@@ -1462,7 +1486,7 @@ static void ipv6_regen_rndid(unsigned long data)
1462 struct inet6_dev *idev = (struct inet6_dev *) data; 1486 struct inet6_dev *idev = (struct inet6_dev *) data;
1463 unsigned long expires; 1487 unsigned long expires;
1464 1488
1465 read_lock_bh(&addrconf_lock); 1489 rcu_read_lock_bh();
1466 write_lock_bh(&idev->lock); 1490 write_lock_bh(&idev->lock);
1467 1491
1468 if (idev->dead) 1492 if (idev->dead)
@@ -1486,7 +1510,7 @@ static void ipv6_regen_rndid(unsigned long data)
1486 1510
1487out: 1511out:
1488 write_unlock_bh(&idev->lock); 1512 write_unlock_bh(&idev->lock);
1489 read_unlock_bh(&addrconf_lock); 1513 rcu_read_unlock_bh();
1490 in6_dev_put(idev); 1514 in6_dev_put(idev);
1491} 1515}
1492 1516
@@ -1507,59 +1531,56 @@ static void
1507addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, 1531addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
1508 unsigned long expires, u32 flags) 1532 unsigned long expires, u32 flags)
1509{ 1533{
1510 struct in6_rtmsg rtmsg; 1534 struct fib6_config cfg = {
1535 .fc_table = RT6_TABLE_PREFIX,
1536 .fc_metric = IP6_RT_PRIO_ADDRCONF,
1537 .fc_ifindex = dev->ifindex,
1538 .fc_expires = expires,
1539 .fc_dst_len = plen,
1540 .fc_flags = RTF_UP | flags,
1541 };
1511 1542
1512 memset(&rtmsg, 0, sizeof(rtmsg)); 1543 ipv6_addr_copy(&cfg.fc_dst, pfx);
1513 ipv6_addr_copy(&rtmsg.rtmsg_dst, pfx);
1514 rtmsg.rtmsg_dst_len = plen;
1515 rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF;
1516 rtmsg.rtmsg_ifindex = dev->ifindex;
1517 rtmsg.rtmsg_info = expires;
1518 rtmsg.rtmsg_flags = RTF_UP|flags;
1519 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1520 1544
1521 /* Prevent useless cloning on PtP SIT. 1545 /* Prevent useless cloning on PtP SIT.
1522 This thing is done here expecting that the whole 1546 This thing is done here expecting that the whole
1523 class of non-broadcast devices need not cloning. 1547 class of non-broadcast devices need not cloning.
1524 */ 1548 */
1525 if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT)) 1549 if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT))
1526 rtmsg.rtmsg_flags |= RTF_NONEXTHOP; 1550 cfg.fc_flags |= RTF_NONEXTHOP;
1527 1551
1528 ip6_route_add(&rtmsg, NULL, NULL, NULL); 1552 ip6_route_add(&cfg);
1529} 1553}
1530 1554
1531/* Create "default" multicast route to the interface */ 1555/* Create "default" multicast route to the interface */
1532 1556
1533static void addrconf_add_mroute(struct net_device *dev) 1557static void addrconf_add_mroute(struct net_device *dev)
1534{ 1558{
1535 struct in6_rtmsg rtmsg; 1559 struct fib6_config cfg = {
1560 .fc_table = RT6_TABLE_LOCAL,
1561 .fc_metric = IP6_RT_PRIO_ADDRCONF,
1562 .fc_ifindex = dev->ifindex,
1563 .fc_dst_len = 8,
1564 .fc_flags = RTF_UP,
1565 };
1566
1567 ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
1536 1568
1537 memset(&rtmsg, 0, sizeof(rtmsg)); 1569 ip6_route_add(&cfg);
1538 ipv6_addr_set(&rtmsg.rtmsg_dst,
1539 htonl(0xFF000000), 0, 0, 0);
1540 rtmsg.rtmsg_dst_len = 8;
1541 rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF;
1542 rtmsg.rtmsg_ifindex = dev->ifindex;
1543 rtmsg.rtmsg_flags = RTF_UP;
1544 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1545 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1546} 1570}
1547 1571
1548static void sit_route_add(struct net_device *dev) 1572static void sit_route_add(struct net_device *dev)
1549{ 1573{
1550 struct in6_rtmsg rtmsg; 1574 struct fib6_config cfg = {
1551 1575 .fc_table = RT6_TABLE_MAIN,
1552 memset(&rtmsg, 0, sizeof(rtmsg)); 1576 .fc_metric = IP6_RT_PRIO_ADDRCONF,
1553 1577 .fc_ifindex = dev->ifindex,
1554 rtmsg.rtmsg_type = RTMSG_NEWROUTE; 1578 .fc_dst_len = 96,
1555 rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; 1579 .fc_flags = RTF_UP | RTF_NONEXTHOP,
1580 };
1556 1581
1557 /* prefix length - 96 bits "::d.d.d.d" */ 1582 /* prefix length - 96 bits "::d.d.d.d" */
1558 rtmsg.rtmsg_dst_len = 96; 1583 ip6_route_add(&cfg);
1559 rtmsg.rtmsg_flags = RTF_UP|RTF_NONEXTHOP;
1560 rtmsg.rtmsg_ifindex = dev->ifindex;
1561
1562 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1563} 1584}
1564 1585
1565static void addrconf_add_lroute(struct net_device *dev) 1586static void addrconf_add_lroute(struct net_device *dev)
@@ -1660,7 +1681,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
1660 if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { 1681 if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
1661 if (rt->rt6i_flags&RTF_EXPIRES) { 1682 if (rt->rt6i_flags&RTF_EXPIRES) {
1662 if (valid_lft == 0) { 1683 if (valid_lft == 0) {
1663 ip6_del_rt(rt, NULL, NULL, NULL); 1684 ip6_del_rt(rt);
1664 rt = NULL; 1685 rt = NULL;
1665 } else { 1686 } else {
1666 rt->rt6i_expires = jiffies + rt_expires; 1687 rt->rt6i_expires = jiffies + rt_expires;
@@ -1870,12 +1891,11 @@ err_exit:
1870 * Manual configuration of address on an interface 1891 * Manual configuration of address on an interface
1871 */ 1892 */
1872static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, 1893static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen,
1873 __u32 prefered_lft, __u32 valid_lft) 1894 __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft)
1874{ 1895{
1875 struct inet6_ifaddr *ifp; 1896 struct inet6_ifaddr *ifp;
1876 struct inet6_dev *idev; 1897 struct inet6_dev *idev;
1877 struct net_device *dev; 1898 struct net_device *dev;
1878 __u8 ifa_flags = 0;
1879 int scope; 1899 int scope;
1880 1900
1881 ASSERT_RTNL(); 1901 ASSERT_RTNL();
@@ -1887,9 +1907,6 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen,
1887 if ((dev = __dev_get_by_index(ifindex)) == NULL) 1907 if ((dev = __dev_get_by_index(ifindex)) == NULL)
1888 return -ENODEV; 1908 return -ENODEV;
1889 1909
1890 if (!(dev->flags&IFF_UP))
1891 return -ENETDOWN;
1892
1893 if ((idev = addrconf_add_dev(dev)) == NULL) 1910 if ((idev = addrconf_add_dev(dev)) == NULL)
1894 return -ENOBUFS; 1911 return -ENOBUFS;
1895 1912
@@ -1971,7 +1988,7 @@ int addrconf_add_ifaddr(void __user *arg)
1971 1988
1972 rtnl_lock(); 1989 rtnl_lock();
1973 err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen, 1990 err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen,
1974 INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); 1991 IFA_F_PERMANENT, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1975 rtnl_unlock(); 1992 rtnl_unlock();
1976 return err; 1993 return err;
1977} 1994}
@@ -2344,10 +2361,10 @@ static int addrconf_ifdown(struct net_device *dev, int how)
2344 Do not dev_put! 2361 Do not dev_put!
2345 */ 2362 */
2346 if (how == 1) { 2363 if (how == 1) {
2347 write_lock_bh(&addrconf_lock);
2348 dev->ip6_ptr = NULL;
2349 idev->dead = 1; 2364 idev->dead = 1;
2350 write_unlock_bh(&addrconf_lock); 2365
2366 /* protected by rtnl_lock */
2367 rcu_assign_pointer(dev->ip6_ptr, NULL);
2351 2368
2352 /* Step 1.5: remove snmp6 entry */ 2369 /* Step 1.5: remove snmp6 entry */
2353 snmp6_unregister_dev(idev); 2370 snmp6_unregister_dev(idev);
@@ -2514,7 +2531,8 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
2514 spin_lock_bh(&ifp->lock); 2531 spin_lock_bh(&ifp->lock);
2515 2532
2516 if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || 2533 if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
2517 !(ifp->flags&IFA_F_TENTATIVE)) { 2534 !(ifp->flags&IFA_F_TENTATIVE) ||
2535 ifp->flags & IFA_F_NODAD) {
2518 ifp->flags &= ~IFA_F_TENTATIVE; 2536 ifp->flags &= ~IFA_F_TENTATIVE;
2519 spin_unlock_bh(&ifp->lock); 2537 spin_unlock_bh(&ifp->lock);
2520 read_unlock_bh(&idev->lock); 2538 read_unlock_bh(&idev->lock);
@@ -2759,6 +2777,26 @@ void if6_proc_exit(void)
2759} 2777}
2760#endif /* CONFIG_PROC_FS */ 2778#endif /* CONFIG_PROC_FS */
2761 2779
2780#ifdef CONFIG_IPV6_MIP6
2781/* Check if address is a home address configured on any interface. */
2782int ipv6_chk_home_addr(struct in6_addr *addr)
2783{
2784 int ret = 0;
2785 struct inet6_ifaddr * ifp;
2786 u8 hash = ipv6_addr_hash(addr);
2787 read_lock_bh(&addrconf_hash_lock);
2788 for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) {
2789 if (ipv6_addr_cmp(&ifp->addr, addr) == 0 &&
2790 (ifp->flags & IFA_F_HOMEADDRESS)) {
2791 ret = 1;
2792 break;
2793 }
2794 }
2795 read_unlock_bh(&addrconf_hash_lock);
2796 return ret;
2797}
2798#endif
2799
2762/* 2800/*
2763 * Periodic address status verification 2801 * Periodic address status verification
2764 */ 2802 */
@@ -2869,66 +2907,68 @@ restart:
2869 spin_unlock_bh(&addrconf_verify_lock); 2907 spin_unlock_bh(&addrconf_verify_lock);
2870} 2908}
2871 2909
2910static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local)
2911{
2912 struct in6_addr *pfx = NULL;
2913
2914 if (addr)
2915 pfx = nla_data(addr);
2916
2917 if (local) {
2918 if (pfx && nla_memcmp(local, pfx, sizeof(*pfx)))
2919 pfx = NULL;
2920 else
2921 pfx = nla_data(local);
2922 }
2923
2924 return pfx;
2925}
2926
2927static struct nla_policy ifa_ipv6_policy[IFA_MAX+1] __read_mostly = {
2928 [IFA_ADDRESS] = { .len = sizeof(struct in6_addr) },
2929 [IFA_LOCAL] = { .len = sizeof(struct in6_addr) },
2930 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
2931};
2932
2872static int 2933static int
2873inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 2934inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2874{ 2935{
2875 struct rtattr **rta = arg; 2936 struct ifaddrmsg *ifm;
2876 struct ifaddrmsg *ifm = NLMSG_DATA(nlh); 2937 struct nlattr *tb[IFA_MAX+1];
2877 struct in6_addr *pfx; 2938 struct in6_addr *pfx;
2939 int err;
2878 2940
2879 pfx = NULL; 2941 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
2880 if (rta[IFA_ADDRESS-1]) { 2942 if (err < 0)
2881 if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx)) 2943 return err;
2882 return -EINVAL; 2944
2883 pfx = RTA_DATA(rta[IFA_ADDRESS-1]); 2945 ifm = nlmsg_data(nlh);
2884 } 2946 pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
2885 if (rta[IFA_LOCAL-1]) {
2886 if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*pfx) ||
2887 (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx))))
2888 return -EINVAL;
2889 pfx = RTA_DATA(rta[IFA_LOCAL-1]);
2890 }
2891 if (pfx == NULL) 2947 if (pfx == NULL)
2892 return -EINVAL; 2948 return -EINVAL;
2893 2949
2894 return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen); 2950 return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen);
2895} 2951}
2896 2952
2897static int 2953static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
2898inet6_addr_modify(int ifindex, struct in6_addr *pfx, 2954 u32 prefered_lft, u32 valid_lft)
2899 __u32 prefered_lft, __u32 valid_lft)
2900{ 2955{
2901 struct inet6_ifaddr *ifp = NULL;
2902 struct net_device *dev;
2903 int ifa_flags = 0;
2904
2905 if ((dev = __dev_get_by_index(ifindex)) == NULL)
2906 return -ENODEV;
2907
2908 if (!(dev->flags&IFF_UP))
2909 return -ENETDOWN;
2910
2911 if (!valid_lft || (prefered_lft > valid_lft)) 2956 if (!valid_lft || (prefered_lft > valid_lft))
2912 return -EINVAL; 2957 return -EINVAL;
2913 2958
2914 ifp = ipv6_get_ifaddr(pfx, dev, 1);
2915 if (ifp == NULL)
2916 return -ENOENT;
2917
2918 if (valid_lft == INFINITY_LIFE_TIME) 2959 if (valid_lft == INFINITY_LIFE_TIME)
2919 ifa_flags = IFA_F_PERMANENT; 2960 ifa_flags |= IFA_F_PERMANENT;
2920 else if (valid_lft >= 0x7FFFFFFF/HZ) 2961 else if (valid_lft >= 0x7FFFFFFF/HZ)
2921 valid_lft = 0x7FFFFFFF/HZ; 2962 valid_lft = 0x7FFFFFFF/HZ;
2922 2963
2923 if (prefered_lft == 0) 2964 if (prefered_lft == 0)
2924 ifa_flags = IFA_F_DEPRECATED; 2965 ifa_flags |= IFA_F_DEPRECATED;
2925 else if ((prefered_lft >= 0x7FFFFFFF/HZ) && 2966 else if ((prefered_lft >= 0x7FFFFFFF/HZ) &&
2926 (prefered_lft != INFINITY_LIFE_TIME)) 2967 (prefered_lft != INFINITY_LIFE_TIME))
2927 prefered_lft = 0x7FFFFFFF/HZ; 2968 prefered_lft = 0x7FFFFFFF/HZ;
2928 2969
2929 spin_lock_bh(&ifp->lock); 2970 spin_lock_bh(&ifp->lock);
2930 ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED|IFA_F_PERMANENT)) | ifa_flags; 2971 ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags;
2931
2932 ifp->tstamp = jiffies; 2972 ifp->tstamp = jiffies;
2933 ifp->valid_lft = valid_lft; 2973 ifp->valid_lft = valid_lft;
2934 ifp->prefered_lft = prefered_lft; 2974 ifp->prefered_lft = prefered_lft;
@@ -2936,7 +2976,6 @@ inet6_addr_modify(int ifindex, struct in6_addr *pfx,
2936 spin_unlock_bh(&ifp->lock); 2976 spin_unlock_bh(&ifp->lock);
2937 if (!(ifp->flags&IFA_F_TENTATIVE)) 2977 if (!(ifp->flags&IFA_F_TENTATIVE))
2938 ipv6_ifa_notify(0, ifp); 2978 ipv6_ifa_notify(0, ifp);
2939 in6_ifa_put(ifp);
2940 2979
2941 addrconf_verify(0); 2980 addrconf_verify(0);
2942 2981
@@ -2946,172 +2985,189 @@ inet6_addr_modify(int ifindex, struct in6_addr *pfx,
2946static int 2985static int
2947inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 2986inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2948{ 2987{
2949 struct rtattr **rta = arg; 2988 struct ifaddrmsg *ifm;
2950 struct ifaddrmsg *ifm = NLMSG_DATA(nlh); 2989 struct nlattr *tb[IFA_MAX+1];
2951 struct in6_addr *pfx; 2990 struct in6_addr *pfx;
2952 __u32 valid_lft = INFINITY_LIFE_TIME, prefered_lft = INFINITY_LIFE_TIME; 2991 struct inet6_ifaddr *ifa;
2992 struct net_device *dev;
2993 u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
2994 u8 ifa_flags;
2995 int err;
2953 2996
2954 pfx = NULL; 2997 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
2955 if (rta[IFA_ADDRESS-1]) { 2998 if (err < 0)
2956 if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx)) 2999 return err;
2957 return -EINVAL; 3000
2958 pfx = RTA_DATA(rta[IFA_ADDRESS-1]); 3001 ifm = nlmsg_data(nlh);
2959 } 3002 pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
2960 if (rta[IFA_LOCAL-1]) {
2961 if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*pfx) ||
2962 (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx))))
2963 return -EINVAL;
2964 pfx = RTA_DATA(rta[IFA_LOCAL-1]);
2965 }
2966 if (pfx == NULL) 3003 if (pfx == NULL)
2967 return -EINVAL; 3004 return -EINVAL;
2968 3005
2969 if (rta[IFA_CACHEINFO-1]) { 3006 if (tb[IFA_CACHEINFO]) {
2970 struct ifa_cacheinfo *ci; 3007 struct ifa_cacheinfo *ci;
2971 if (RTA_PAYLOAD(rta[IFA_CACHEINFO-1]) < sizeof(*ci)) 3008
2972 return -EINVAL; 3009 ci = nla_data(tb[IFA_CACHEINFO]);
2973 ci = RTA_DATA(rta[IFA_CACHEINFO-1]);
2974 valid_lft = ci->ifa_valid; 3010 valid_lft = ci->ifa_valid;
2975 prefered_lft = ci->ifa_prefered; 3011 preferred_lft = ci->ifa_prefered;
3012 } else {
3013 preferred_lft = INFINITY_LIFE_TIME;
3014 valid_lft = INFINITY_LIFE_TIME;
2976 } 3015 }
2977 3016
2978 if (nlh->nlmsg_flags & NLM_F_REPLACE) { 3017 dev = __dev_get_by_index(ifm->ifa_index);
2979 int ret; 3018 if (dev == NULL)
2980 ret = inet6_addr_modify(ifm->ifa_index, pfx, 3019 return -ENODEV;
2981 prefered_lft, valid_lft); 3020
2982 if (ret == 0 || !(nlh->nlmsg_flags & NLM_F_CREATE)) 3021 /* We ignore other flags so far. */
2983 return ret; 3022 ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS);
3023
3024 ifa = ipv6_get_ifaddr(pfx, dev, 1);
3025 if (ifa == NULL) {
3026 /*
3027 * It would be best to check for !NLM_F_CREATE here but
3028 * userspace alreay relies on not having to provide this.
3029 */
3030 return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen,
3031 ifa_flags, preferred_lft, valid_lft);
2984 } 3032 }
2985 3033
2986 return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen, 3034 if (nlh->nlmsg_flags & NLM_F_EXCL ||
2987 prefered_lft, valid_lft); 3035 !(nlh->nlmsg_flags & NLM_F_REPLACE))
3036 err = -EEXIST;
3037 else
3038 err = inet6_addr_modify(ifa, ifa_flags, preferred_lft, valid_lft);
3039
3040 in6_ifa_put(ifa);
3041
3042 return err;
3043}
3044
3045static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags,
3046 u8 scope, int ifindex)
3047{
3048 struct ifaddrmsg *ifm;
2988 3049
3050 ifm = nlmsg_data(nlh);
3051 ifm->ifa_family = AF_INET6;
3052 ifm->ifa_prefixlen = prefixlen;
3053 ifm->ifa_flags = flags;
3054 ifm->ifa_scope = scope;
3055 ifm->ifa_index = ifindex;
2989} 3056}
2990 3057
2991/* Maximum length of ifa_cacheinfo attributes */ 3058static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
2992#define INET6_IFADDR_RTA_SPACE \ 3059 unsigned long tstamp, u32 preferred, u32 valid)
2993 RTA_SPACE(16) /* IFA_ADDRESS */ + \ 3060{
2994 RTA_SPACE(sizeof(struct ifa_cacheinfo)) /* CACHEINFO */ 3061 struct ifa_cacheinfo ci;
3062
3063 ci.cstamp = (u32)(TIME_DELTA(cstamp, INITIAL_JIFFIES) / HZ * 100
3064 + TIME_DELTA(cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
3065 ci.tstamp = (u32)(TIME_DELTA(tstamp, INITIAL_JIFFIES) / HZ * 100
3066 + TIME_DELTA(tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
3067 ci.ifa_prefered = preferred;
3068 ci.ifa_valid = valid;
3069
3070 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
3071}
3072
3073static inline int rt_scope(int ifa_scope)
3074{
3075 if (ifa_scope & IFA_HOST)
3076 return RT_SCOPE_HOST;
3077 else if (ifa_scope & IFA_LINK)
3078 return RT_SCOPE_LINK;
3079 else if (ifa_scope & IFA_SITE)
3080 return RT_SCOPE_SITE;
3081 else
3082 return RT_SCOPE_UNIVERSE;
3083}
3084
3085static inline int inet6_ifaddr_msgsize(void)
3086{
3087 return nlmsg_total_size(sizeof(struct ifaddrmsg) +
3088 nla_total_size(16) +
3089 nla_total_size(sizeof(struct ifa_cacheinfo)) +
3090 128);
3091}
2995 3092
2996static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, 3093static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
2997 u32 pid, u32 seq, int event, unsigned int flags) 3094 u32 pid, u32 seq, int event, unsigned int flags)
2998{ 3095{
2999 struct ifaddrmsg *ifm;
3000 struct nlmsghdr *nlh; 3096 struct nlmsghdr *nlh;
3001 struct ifa_cacheinfo ci; 3097 u32 preferred, valid;
3002 unsigned char *b = skb->tail; 3098
3099 nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
3100 if (nlh == NULL)
3101 return -ENOBUFS;
3102
3103 put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope),
3104 ifa->idev->dev->ifindex);
3003 3105
3004 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
3005 ifm = NLMSG_DATA(nlh);
3006 ifm->ifa_family = AF_INET6;
3007 ifm->ifa_prefixlen = ifa->prefix_len;
3008 ifm->ifa_flags = ifa->flags;
3009 ifm->ifa_scope = RT_SCOPE_UNIVERSE;
3010 if (ifa->scope&IFA_HOST)
3011 ifm->ifa_scope = RT_SCOPE_HOST;
3012 else if (ifa->scope&IFA_LINK)
3013 ifm->ifa_scope = RT_SCOPE_LINK;
3014 else if (ifa->scope&IFA_SITE)
3015 ifm->ifa_scope = RT_SCOPE_SITE;
3016 ifm->ifa_index = ifa->idev->dev->ifindex;
3017 RTA_PUT(skb, IFA_ADDRESS, 16, &ifa->addr);
3018 if (!(ifa->flags&IFA_F_PERMANENT)) { 3106 if (!(ifa->flags&IFA_F_PERMANENT)) {
3019 ci.ifa_prefered = ifa->prefered_lft; 3107 preferred = ifa->prefered_lft;
3020 ci.ifa_valid = ifa->valid_lft; 3108 valid = ifa->valid_lft;
3021 if (ci.ifa_prefered != INFINITY_LIFE_TIME) { 3109 if (preferred != INFINITY_LIFE_TIME) {
3022 long tval = (jiffies - ifa->tstamp)/HZ; 3110 long tval = (jiffies - ifa->tstamp)/HZ;
3023 ci.ifa_prefered -= tval; 3111 preferred -= tval;
3024 if (ci.ifa_valid != INFINITY_LIFE_TIME) 3112 if (valid != INFINITY_LIFE_TIME)
3025 ci.ifa_valid -= tval; 3113 valid -= tval;
3026 } 3114 }
3027 } else { 3115 } else {
3028 ci.ifa_prefered = INFINITY_LIFE_TIME; 3116 preferred = INFINITY_LIFE_TIME;
3029 ci.ifa_valid = INFINITY_LIFE_TIME; 3117 valid = INFINITY_LIFE_TIME;
3030 } 3118 }
3031 ci.cstamp = (__u32)(TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) / HZ * 100
3032 + TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
3033 ci.tstamp = (__u32)(TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) / HZ * 100
3034 + TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
3035 RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
3036 nlh->nlmsg_len = skb->tail - b;
3037 return skb->len;
3038 3119
3039nlmsg_failure: 3120 if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 ||
3040rtattr_failure: 3121 put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0)
3041 skb_trim(skb, b - skb->data); 3122 return nlmsg_cancel(skb, nlh);
3042 return -1; 3123
3124 return nlmsg_end(skb, nlh);
3043} 3125}
3044 3126
3045static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, 3127static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
3046 u32 pid, u32 seq, int event, u16 flags) 3128 u32 pid, u32 seq, int event, u16 flags)
3047{ 3129{
3048 struct ifaddrmsg *ifm;
3049 struct nlmsghdr *nlh; 3130 struct nlmsghdr *nlh;
3050 struct ifa_cacheinfo ci; 3131 u8 scope = RT_SCOPE_UNIVERSE;
3051 unsigned char *b = skb->tail; 3132 int ifindex = ifmca->idev->dev->ifindex;
3052
3053 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
3054 ifm = NLMSG_DATA(nlh);
3055 ifm->ifa_family = AF_INET6;
3056 ifm->ifa_prefixlen = 128;
3057 ifm->ifa_flags = IFA_F_PERMANENT;
3058 ifm->ifa_scope = RT_SCOPE_UNIVERSE;
3059 if (ipv6_addr_scope(&ifmca->mca_addr)&IFA_SITE)
3060 ifm->ifa_scope = RT_SCOPE_SITE;
3061 ifm->ifa_index = ifmca->idev->dev->ifindex;
3062 RTA_PUT(skb, IFA_MULTICAST, 16, &ifmca->mca_addr);
3063 ci.cstamp = (__u32)(TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) / HZ
3064 * 100 + TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) % HZ
3065 * 100 / HZ);
3066 ci.tstamp = (__u32)(TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) / HZ
3067 * 100 + TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) % HZ
3068 * 100 / HZ);
3069 ci.ifa_prefered = INFINITY_LIFE_TIME;
3070 ci.ifa_valid = INFINITY_LIFE_TIME;
3071 RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
3072 nlh->nlmsg_len = skb->tail - b;
3073 return skb->len;
3074 3133
3075nlmsg_failure: 3134 if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
3076rtattr_failure: 3135 scope = RT_SCOPE_SITE;
3077 skb_trim(skb, b - skb->data); 3136
3078 return -1; 3137 nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
3138 if (nlh == NULL)
3139 return -ENOBUFS;
3140
3141 put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
3142 if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 ||
3143 put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp,
3144 INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0)
3145 return nlmsg_cancel(skb, nlh);
3146
3147 return nlmsg_end(skb, nlh);
3079} 3148}
3080 3149
3081static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, 3150static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
3082 u32 pid, u32 seq, int event, unsigned int flags) 3151 u32 pid, u32 seq, int event, unsigned int flags)
3083{ 3152{
3084 struct ifaddrmsg *ifm;
3085 struct nlmsghdr *nlh; 3153 struct nlmsghdr *nlh;
3086 struct ifa_cacheinfo ci; 3154 u8 scope = RT_SCOPE_UNIVERSE;
3087 unsigned char *b = skb->tail; 3155 int ifindex = ifaca->aca_idev->dev->ifindex;
3088
3089 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
3090 ifm = NLMSG_DATA(nlh);
3091 ifm->ifa_family = AF_INET6;
3092 ifm->ifa_prefixlen = 128;
3093 ifm->ifa_flags = IFA_F_PERMANENT;
3094 ifm->ifa_scope = RT_SCOPE_UNIVERSE;
3095 if (ipv6_addr_scope(&ifaca->aca_addr)&IFA_SITE)
3096 ifm->ifa_scope = RT_SCOPE_SITE;
3097 ifm->ifa_index = ifaca->aca_idev->dev->ifindex;
3098 RTA_PUT(skb, IFA_ANYCAST, 16, &ifaca->aca_addr);
3099 ci.cstamp = (__u32)(TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) / HZ
3100 * 100 + TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) % HZ
3101 * 100 / HZ);
3102 ci.tstamp = (__u32)(TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) / HZ
3103 * 100 + TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) % HZ
3104 * 100 / HZ);
3105 ci.ifa_prefered = INFINITY_LIFE_TIME;
3106 ci.ifa_valid = INFINITY_LIFE_TIME;
3107 RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
3108 nlh->nlmsg_len = skb->tail - b;
3109 return skb->len;
3110 3156
3111nlmsg_failure: 3157 if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
3112rtattr_failure: 3158 scope = RT_SCOPE_SITE;
3113 skb_trim(skb, b - skb->data); 3159
3114 return -1; 3160 nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
3161 if (nlh == NULL)
3162 return -ENOBUFS;
3163
3164 put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
3165 if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 ||
3166 put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp,
3167 INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0)
3168 return nlmsg_cancel(skb, nlh);
3169
3170 return nlmsg_end(skb, nlh);
3115} 3171}
3116 3172
3117enum addr_type_t 3173enum addr_type_t
@@ -3222,79 +3278,74 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
3222 return inet6_dump_addr(skb, cb, type); 3278 return inet6_dump_addr(skb, cb, type);
3223} 3279}
3224 3280
3225static int inet6_rtm_getaddr(struct sk_buff *in_skb, 3281static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
3226 struct nlmsghdr* nlh, void *arg) 3282 void *arg)
3227{ 3283{
3228 struct rtattr **rta = arg; 3284 struct ifaddrmsg *ifm;
3229 struct ifaddrmsg *ifm = NLMSG_DATA(nlh); 3285 struct nlattr *tb[IFA_MAX+1];
3230 struct in6_addr *addr = NULL; 3286 struct in6_addr *addr = NULL;
3231 struct net_device *dev = NULL; 3287 struct net_device *dev = NULL;
3232 struct inet6_ifaddr *ifa; 3288 struct inet6_ifaddr *ifa;
3233 struct sk_buff *skb; 3289 struct sk_buff *skb;
3234 int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE);
3235 int err; 3290 int err;
3236 3291
3237 if (rta[IFA_ADDRESS-1]) { 3292 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
3238 if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*addr)) 3293 if (err < 0)
3239 return -EINVAL; 3294 goto errout;
3240 addr = RTA_DATA(rta[IFA_ADDRESS-1]); 3295
3241 } 3296 addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
3242 if (rta[IFA_LOCAL-1]) { 3297 if (addr == NULL) {
3243 if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*addr) || 3298 err = -EINVAL;
3244 (addr && memcmp(addr, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*addr)))) 3299 goto errout;
3245 return -EINVAL;
3246 addr = RTA_DATA(rta[IFA_LOCAL-1]);
3247 } 3300 }
3248 if (addr == NULL)
3249 return -EINVAL;
3250 3301
3302 ifm = nlmsg_data(nlh);
3251 if (ifm->ifa_index) 3303 if (ifm->ifa_index)
3252 dev = __dev_get_by_index(ifm->ifa_index); 3304 dev = __dev_get_by_index(ifm->ifa_index);
3253 3305
3254 if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) 3306 if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) {
3255 return -EADDRNOTAVAIL; 3307 err = -EADDRNOTAVAIL;
3308 goto errout;
3309 }
3256 3310
3257 if ((skb = alloc_skb(size, GFP_KERNEL)) == NULL) { 3311 if ((skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL)) == NULL) {
3258 err = -ENOBUFS; 3312 err = -ENOBUFS;
3259 goto out; 3313 goto errout_ifa;
3260 } 3314 }
3261 3315
3262 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
3263 err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, 3316 err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid,
3264 nlh->nlmsg_seq, RTM_NEWADDR, 0); 3317 nlh->nlmsg_seq, RTM_NEWADDR, 0);
3265 if (err < 0) { 3318 if (err < 0) {
3266 err = -EMSGSIZE; 3319 kfree_skb(skb);
3267 goto out_free; 3320 goto errout_ifa;
3268 } 3321 }
3269 3322
3270 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); 3323 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
3271 if (err > 0) 3324errout_ifa:
3272 err = 0;
3273out:
3274 in6_ifa_put(ifa); 3325 in6_ifa_put(ifa);
3326errout:
3275 return err; 3327 return err;
3276out_free:
3277 kfree_skb(skb);
3278 goto out;
3279} 3328}
3280 3329
3281static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) 3330static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
3282{ 3331{
3283 struct sk_buff *skb; 3332 struct sk_buff *skb;
3284 int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE); 3333 int err = -ENOBUFS;
3285 3334
3286 skb = alloc_skb(size, GFP_ATOMIC); 3335 skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC);
3287 if (!skb) { 3336 if (skb == NULL)
3288 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, ENOBUFS); 3337 goto errout;
3289 return; 3338
3290 } 3339 err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0);
3291 if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) { 3340 if (err < 0) {
3292 kfree_skb(skb); 3341 kfree_skb(skb);
3293 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, EINVAL); 3342 goto errout;
3294 return;
3295 } 3343 }
3296 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFADDR; 3344
3297 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFADDR, GFP_ATOMIC); 3345 err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
3346errout:
3347 if (err < 0)
3348 rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
3298} 3349}
3299 3350
3300static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, 3351static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
@@ -3329,6 +3380,7 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
3329 array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; 3380 array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
3330#endif 3381#endif
3331#endif 3382#endif
3383 array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
3332} 3384}
3333 3385
3334/* Maximum length of ifinfomsg attributes */ 3386/* Maximum length of ifinfomsg attributes */
@@ -3435,20 +3487,23 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
3435void inet6_ifinfo_notify(int event, struct inet6_dev *idev) 3487void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
3436{ 3488{
3437 struct sk_buff *skb; 3489 struct sk_buff *skb;
3438 int size = NLMSG_SPACE(sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE); 3490 int payload = sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE;
3491 int err = -ENOBUFS;
3439 3492
3440 skb = alloc_skb(size, GFP_ATOMIC); 3493 skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
3441 if (!skb) { 3494 if (skb == NULL)
3442 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, ENOBUFS); 3495 goto errout;
3443 return; 3496
3444 } 3497 err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0);
3445 if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) { 3498 if (err < 0) {
3446 kfree_skb(skb); 3499 kfree_skb(skb);
3447 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, EINVAL); 3500 goto errout;
3448 return;
3449 } 3501 }
3450 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFINFO; 3502
3451 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFINFO, GFP_ATOMIC); 3503 err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
3504errout:
3505 if (err < 0)
3506 rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
3452} 3507}
3453 3508
3454/* Maximum length of prefix_cacheinfo attributes */ 3509/* Maximum length of prefix_cacheinfo attributes */
@@ -3500,20 +3555,23 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
3500 struct prefix_info *pinfo) 3555 struct prefix_info *pinfo)
3501{ 3556{
3502 struct sk_buff *skb; 3557 struct sk_buff *skb;
3503 int size = NLMSG_SPACE(sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE); 3558 int payload = sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE;
3559 int err = -ENOBUFS;
3504 3560
3505 skb = alloc_skb(size, GFP_ATOMIC); 3561 skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
3506 if (!skb) { 3562 if (skb == NULL)
3507 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, ENOBUFS); 3563 goto errout;
3508 return; 3564
3509 } 3565 err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0);
3510 if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) { 3566 if (err < 0) {
3511 kfree_skb(skb); 3567 kfree_skb(skb);
3512 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, EINVAL); 3568 goto errout;
3513 return;
3514 } 3569 }
3515 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_PREFIX; 3570
3516 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_PREFIX, GFP_ATOMIC); 3571 err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
3572errout:
3573 if (err < 0)
3574 rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err);
3517} 3575}
3518 3576
3519static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { 3577static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = {
@@ -3528,6 +3586,9 @@ static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = {
3528 [RTM_DELROUTE - RTM_BASE] = { .doit = inet6_rtm_delroute, }, 3586 [RTM_DELROUTE - RTM_BASE] = { .doit = inet6_rtm_delroute, },
3529 [RTM_GETROUTE - RTM_BASE] = { .doit = inet6_rtm_getroute, 3587 [RTM_GETROUTE - RTM_BASE] = { .doit = inet6_rtm_getroute,
3530 .dumpit = inet6_dump_fib, }, 3588 .dumpit = inet6_dump_fib, },
3589#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3590 [RTM_GETRULE - RTM_BASE] = { .dumpit = fib6_rules_dump, },
3591#endif
3531}; 3592};
3532 3593
3533static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) 3594static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
@@ -3536,7 +3597,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
3536 3597
3537 switch (event) { 3598 switch (event) {
3538 case RTM_NEWADDR: 3599 case RTM_NEWADDR:
3539 ip6_ins_rt(ifp->rt, NULL, NULL, NULL); 3600 ip6_ins_rt(ifp->rt);
3540 if (ifp->idev->cnf.forwarding) 3601 if (ifp->idev->cnf.forwarding)
3541 addrconf_join_anycast(ifp); 3602 addrconf_join_anycast(ifp);
3542 break; 3603 break;
@@ -3545,7 +3606,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
3545 addrconf_leave_anycast(ifp); 3606 addrconf_leave_anycast(ifp);
3546 addrconf_leave_solict(ifp->idev, &ifp->addr); 3607 addrconf_leave_solict(ifp->idev, &ifp->addr);
3547 dst_hold(&ifp->rt->u.dst); 3608 dst_hold(&ifp->rt->u.dst);
3548 if (ip6_del_rt(ifp->rt, NULL, NULL, NULL)) 3609 if (ip6_del_rt(ifp->rt))
3549 dst_free(&ifp->rt->u.dst); 3610 dst_free(&ifp->rt->u.dst);
3550 break; 3611 break;
3551 } 3612 }
@@ -3553,10 +3614,10 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
3553 3614
3554static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) 3615static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
3555{ 3616{
3556 read_lock_bh(&addrconf_lock); 3617 rcu_read_lock_bh();
3557 if (likely(ifp->idev->dead == 0)) 3618 if (likely(ifp->idev->dead == 0))
3558 __ipv6_ifa_notify(event, ifp); 3619 __ipv6_ifa_notify(event, ifp);
3559 read_unlock_bh(&addrconf_lock); 3620 rcu_read_unlock_bh();
3560} 3621}
3561 3622
3562#ifdef CONFIG_SYSCTL 3623#ifdef CONFIG_SYSCTL
@@ -3653,7 +3714,7 @@ static struct addrconf_sysctl_table
3653 ctl_table addrconf_conf_dir[2]; 3714 ctl_table addrconf_conf_dir[2];
3654 ctl_table addrconf_proto_dir[2]; 3715 ctl_table addrconf_proto_dir[2];
3655 ctl_table addrconf_root_dir[2]; 3716 ctl_table addrconf_root_dir[2];
3656} addrconf_sysctl = { 3717} addrconf_sysctl __read_mostly = {
3657 .sysctl_header = NULL, 3718 .sysctl_header = NULL,
3658 .addrconf_vars = { 3719 .addrconf_vars = {
3659 { 3720 {
@@ -3843,6 +3904,14 @@ static struct addrconf_sysctl_table
3843#endif 3904#endif
3844#endif 3905#endif
3845 { 3906 {
3907 .ctl_name = NET_IPV6_PROXY_NDP,
3908 .procname = "proxy_ndp",
3909 .data = &ipv6_devconf.proxy_ndp,
3910 .maxlen = sizeof(int),
3911 .mode = 0644,
3912 .proc_handler = &proc_dointvec,
3913 },
3914 {
3846 .ctl_name = 0, /* sentinel */ 3915 .ctl_name = 0, /* sentinel */
3847 } 3916 }
3848 }, 3917 },
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index ac85e9c532c2..bf6e8aff19d4 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -59,6 +59,9 @@
59#ifdef CONFIG_IPV6_TUNNEL 59#ifdef CONFIG_IPV6_TUNNEL
60#include <net/ip6_tunnel.h> 60#include <net/ip6_tunnel.h>
61#endif 61#endif
62#ifdef CONFIG_IPV6_MIP6
63#include <net/mip6.h>
64#endif
62 65
63#include <asm/uaccess.h> 66#include <asm/uaccess.h>
64#include <asm/system.h> 67#include <asm/system.h>
@@ -67,7 +70,7 @@ MODULE_AUTHOR("Cast of dozens");
67MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); 70MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
68MODULE_LICENSE("GPL"); 71MODULE_LICENSE("GPL");
69 72
70int sysctl_ipv6_bindv6only; 73int sysctl_ipv6_bindv6only __read_mostly;
71 74
72/* The inetsw table contains everything that inet_create needs to 75/* The inetsw table contains everything that inet_create needs to
73 * build a new socket. 76 * build a new socket.
@@ -637,6 +640,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
637 fl.oif = sk->sk_bound_dev_if; 640 fl.oif = sk->sk_bound_dev_if;
638 fl.fl_ip_dport = inet->dport; 641 fl.fl_ip_dport = inet->dport;
639 fl.fl_ip_sport = inet->sport; 642 fl.fl_ip_sport = inet->sport;
643 security_sk_classify_flow(sk, &fl);
640 644
641 if (np->opt && np->opt->srcrt) { 645 if (np->opt && np->opt->srcrt) {
642 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; 646 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
@@ -658,7 +662,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
658 return err; 662 return err;
659 } 663 }
660 664
661 __ip6_dst_store(sk, dst, NULL); 665 __ip6_dst_store(sk, dst, NULL, NULL);
662 } 666 }
663 667
664 return 0; 668 return 0;
@@ -757,6 +761,8 @@ static int __init inet6_init(void)
757 struct list_head *r; 761 struct list_head *r;
758 int err; 762 int err;
759 763
764 BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb));
765
760#ifdef MODULE 766#ifdef MODULE
761#if 0 /* FIXME --RR */ 767#if 0 /* FIXME --RR */
762 if (!mod_member_present(&__this_module, can_unload)) 768 if (!mod_member_present(&__this_module, can_unload))
@@ -766,11 +772,6 @@ static int __init inet6_init(void)
766#endif 772#endif
767#endif 773#endif
768 774
769 if (sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)) {
770 printk(KERN_CRIT "inet6_proto_init: size fault\n");
771 return -EINVAL;
772 }
773
774 err = proto_register(&tcpv6_prot, 1); 775 err = proto_register(&tcpv6_prot, 1);
775 if (err) 776 if (err)
776 goto out; 777 goto out;
@@ -856,6 +857,9 @@ static int __init inet6_init(void)
856 ipv6_frag_init(); 857 ipv6_frag_init();
857 ipv6_nodata_init(); 858 ipv6_nodata_init();
858 ipv6_destopt_init(); 859 ipv6_destopt_init();
860#ifdef CONFIG_IPV6_MIP6
861 mip6_init();
862#endif
859 863
860 /* Init v6 transport protocols. */ 864 /* Init v6 transport protocols. */
861 udpv6_init(); 865 udpv6_init();
@@ -919,6 +923,9 @@ static void __exit inet6_exit(void)
919 tcp6_proc_exit(); 923 tcp6_proc_exit();
920 raw6_proc_exit(); 924 raw6_proc_exit();
921#endif 925#endif
926#ifdef CONFIG_IPV6_MIP6
927 mip6_fini();
928#endif
922 /* Cleanup code parts. */ 929 /* Cleanup code parts. */
923 sit_cleanup(); 930 sit_cleanup();
924 ip6_flowlabel_cleanup(); 931 ip6_flowlabel_cleanup();
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 00ffa7bc6c9f..b0d83e8e4252 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -74,6 +74,66 @@ bad:
74 return 0; 74 return 0;
75} 75}
76 76
77#ifdef CONFIG_IPV6_MIP6
78/**
79 * ipv6_rearrange_destopt - rearrange IPv6 destination options header
80 * @iph: IPv6 header
81 * @destopt: destionation options header
82 */
83static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *destopt)
84{
85 u8 *opt = (u8 *)destopt;
86 int len = ipv6_optlen(destopt);
87 int off = 0;
88 int optlen = 0;
89
90 off += 2;
91 len -= 2;
92
93 while (len > 0) {
94
95 switch (opt[off]) {
96
97 case IPV6_TLV_PAD0:
98 optlen = 1;
99 break;
100 default:
101 if (len < 2)
102 goto bad;
103 optlen = opt[off+1]+2;
104 if (len < optlen)
105 goto bad;
106
107 /* Rearrange the source address in @iph and the
108 * addresses in home address option for final source.
109 * See 11.3.2 of RFC 3775 for details.
110 */
111 if (opt[off] == IPV6_TLV_HAO) {
112 struct in6_addr final_addr;
113 struct ipv6_destopt_hao *hao;
114
115 hao = (struct ipv6_destopt_hao *)&opt[off];
116 if (hao->length != sizeof(hao->addr)) {
117 if (net_ratelimit())
118 printk(KERN_WARNING "destopt hao: invalid header length: %u\n", hao->length);
119 goto bad;
120 }
121 ipv6_addr_copy(&final_addr, &hao->addr);
122 ipv6_addr_copy(&hao->addr, &iph->saddr);
123 ipv6_addr_copy(&iph->saddr, &final_addr);
124 }
125 break;
126 }
127
128 off += optlen;
129 len -= optlen;
130 }
131 /* Note: ok if len == 0 */
132bad:
133 return;
134}
135#endif
136
77/** 137/**
78 * ipv6_rearrange_rthdr - rearrange IPv6 routing header 138 * ipv6_rearrange_rthdr - rearrange IPv6 routing header
79 * @iph: IPv6 header 139 * @iph: IPv6 header
@@ -113,7 +173,7 @@ static void ipv6_rearrange_rthdr(struct ipv6hdr *iph, struct ipv6_rt_hdr *rthdr)
113 ipv6_addr_copy(&iph->daddr, &final_addr); 173 ipv6_addr_copy(&iph->daddr, &final_addr);
114} 174}
115 175
116static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len) 176static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
117{ 177{
118 union { 178 union {
119 struct ipv6hdr *iph; 179 struct ipv6hdr *iph;
@@ -128,8 +188,12 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len)
128 188
129 while (exthdr.raw < end) { 189 while (exthdr.raw < end) {
130 switch (nexthdr) { 190 switch (nexthdr) {
131 case NEXTHDR_HOP:
132 case NEXTHDR_DEST: 191 case NEXTHDR_DEST:
192#ifdef CONFIG_IPV6_MIP6
193 if (dir == XFRM_POLICY_OUT)
194 ipv6_rearrange_destopt(iph, exthdr.opth);
195#endif
196 case NEXTHDR_HOP:
133 if (!zero_out_mutable_opts(exthdr.opth)) { 197 if (!zero_out_mutable_opts(exthdr.opth)) {
134 LIMIT_NETDEBUG( 198 LIMIT_NETDEBUG(
135 KERN_WARNING "overrun %sopts\n", 199 KERN_WARNING "overrun %sopts\n",
@@ -164,6 +228,9 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
164 u8 nexthdr; 228 u8 nexthdr;
165 char tmp_base[8]; 229 char tmp_base[8];
166 struct { 230 struct {
231#ifdef CONFIG_IPV6_MIP6
232 struct in6_addr saddr;
233#endif
167 struct in6_addr daddr; 234 struct in6_addr daddr;
168 char hdrs[0]; 235 char hdrs[0];
169 } *tmp_ext; 236 } *tmp_ext;
@@ -188,10 +255,15 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
188 err = -ENOMEM; 255 err = -ENOMEM;
189 goto error; 256 goto error;
190 } 257 }
258#ifdef CONFIG_IPV6_MIP6
259 memcpy(tmp_ext, &top_iph->saddr, extlen);
260#else
191 memcpy(tmp_ext, &top_iph->daddr, extlen); 261 memcpy(tmp_ext, &top_iph->daddr, extlen);
262#endif
192 err = ipv6_clear_mutable_options(top_iph, 263 err = ipv6_clear_mutable_options(top_iph,
193 extlen - sizeof(*tmp_ext) + 264 extlen - sizeof(*tmp_ext) +
194 sizeof(*top_iph)); 265 sizeof(*top_iph),
266 XFRM_POLICY_OUT);
195 if (err) 267 if (err)
196 goto error_free_iph; 268 goto error_free_iph;
197 } 269 }
@@ -222,7 +294,11 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
222 294
223 memcpy(top_iph, tmp_base, sizeof(tmp_base)); 295 memcpy(top_iph, tmp_base, sizeof(tmp_base));
224 if (tmp_ext) { 296 if (tmp_ext) {
297#ifdef CONFIG_IPV6_MIP6
298 memcpy(&top_iph->saddr, tmp_ext, extlen);
299#else
225 memcpy(&top_iph->daddr, tmp_ext, extlen); 300 memcpy(&top_iph->daddr, tmp_ext, extlen);
301#endif
226error_free_iph: 302error_free_iph:
227 kfree(tmp_ext); 303 kfree(tmp_ext);
228 } 304 }
@@ -282,7 +358,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
282 if (!tmp_hdr) 358 if (!tmp_hdr)
283 goto out; 359 goto out;
284 memcpy(tmp_hdr, skb->nh.raw, hdr_len); 360 memcpy(tmp_hdr, skb->nh.raw, hdr_len);
285 if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len)) 361 if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len, XFRM_POLICY_IN))
286 goto free_out; 362 goto free_out;
287 skb->nh.ipv6h->priority = 0; 363 skb->nh.ipv6h->priority = 0;
288 skb->nh.ipv6h->flow_lbl[0] = 0; 364 skb->nh.ipv6h->flow_lbl[0] = 0;
@@ -398,7 +474,7 @@ static int ah6_init_state(struct xfrm_state *x)
398 goto error; 474 goto error;
399 475
400 x->props.header_len = XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len); 476 x->props.header_len = XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len);
401 if (x->props.mode) 477 if (x->props.mode == XFRM_MODE_TUNNEL)
402 x->props.header_len += sizeof(struct ipv6hdr); 478 x->props.header_len += sizeof(struct ipv6hdr);
403 x->data = ahp; 479 x->data = ahp;
404 480
@@ -435,7 +511,8 @@ static struct xfrm_type ah6_type =
435 .init_state = ah6_init_state, 511 .init_state = ah6_init_state,
436 .destructor = ah6_destroy, 512 .destructor = ah6_destroy,
437 .input = ah6_input, 513 .input = ah6_input,
438 .output = ah6_output 514 .output = ah6_output,
515 .hdr_offset = xfrm6_find_1stfragopt,
439}; 516};
440 517
441static struct inet6_protocol ah6_protocol = { 518static struct inet6_protocol ah6_protocol = {
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index f6881d7a0385..a9604764e015 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -56,7 +56,7 @@ ip6_onlink(struct in6_addr *addr, struct net_device *dev)
56 int onlink; 56 int onlink;
57 57
58 onlink = 0; 58 onlink = 0;
59 read_lock(&addrconf_lock); 59 rcu_read_lock();
60 idev = __in6_dev_get(dev); 60 idev = __in6_dev_get(dev);
61 if (idev) { 61 if (idev) {
62 read_lock_bh(&idev->lock); 62 read_lock_bh(&idev->lock);
@@ -68,7 +68,7 @@ ip6_onlink(struct in6_addr *addr, struct net_device *dev)
68 } 68 }
69 read_unlock_bh(&idev->lock); 69 read_unlock_bh(&idev->lock);
70 } 70 }
71 read_unlock(&addrconf_lock); 71 rcu_read_unlock();
72 return onlink; 72 return onlink;
73} 73}
74 74
@@ -335,7 +335,7 @@ int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr)
335 write_unlock_bh(&idev->lock); 335 write_unlock_bh(&idev->lock);
336 336
337 dst_hold(&rt->u.dst); 337 dst_hold(&rt->u.dst);
338 if (ip6_ins_rt(rt, NULL, NULL, NULL)) 338 if (ip6_ins_rt(rt))
339 dst_release(&rt->u.dst); 339 dst_release(&rt->u.dst);
340 340
341 addrconf_join_solict(dev, &aca->aca_addr); 341 addrconf_join_solict(dev, &aca->aca_addr);
@@ -378,7 +378,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr)
378 addrconf_leave_solict(idev, &aca->aca_addr); 378 addrconf_leave_solict(idev, &aca->aca_addr);
379 379
380 dst_hold(&aca->aca_rt->u.dst); 380 dst_hold(&aca->aca_rt->u.dst);
381 if (ip6_del_rt(aca->aca_rt, NULL, NULL, NULL)) 381 if (ip6_del_rt(aca->aca_rt))
382 dst_free(&aca->aca_rt->u.dst); 382 dst_free(&aca->aca_rt->u.dst);
383 else 383 else
384 dst_release(&aca->aca_rt->u.dst); 384 dst_release(&aca->aca_rt->u.dst);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 3b55b4c8e2d1..7206747022fc 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -156,6 +156,8 @@ ipv4_connected:
156 if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST)) 156 if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST))
157 fl.oif = np->mcast_oif; 157 fl.oif = np->mcast_oif;
158 158
159 security_sk_classify_flow(sk, &fl);
160
159 if (flowlabel) { 161 if (flowlabel) {
160 if (flowlabel->opt && flowlabel->opt->srcrt) { 162 if (flowlabel->opt && flowlabel->opt->srcrt) {
161 struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt; 163 struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt;
@@ -191,7 +193,12 @@ ipv4_connected:
191 193
192 ip6_dst_store(sk, dst, 194 ip6_dst_store(sk, dst,
193 ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? 195 ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ?
194 &np->daddr : NULL); 196 &np->daddr : NULL,
197#ifdef CONFIG_IPV6_SUBTREES
198 ipv6_addr_equal(&fl.fl6_src, &np->saddr) ?
199 &np->saddr :
200#endif
201 NULL);
195 202
196 sk->sk_state = TCP_ESTABLISHED; 203 sk->sk_state = TCP_ESTABLISHED;
197out: 204out:
@@ -641,10 +648,13 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
641 648
642 rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg); 649 rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg);
643 650
644 /* 651 switch (rthdr->type) {
645 * TYPE 0 652 case IPV6_SRCRT_TYPE_0:
646 */ 653#ifdef CONFIG_IPV6_MIP6
647 if (rthdr->type) { 654 case IPV6_SRCRT_TYPE_2:
655#endif
656 break;
657 default:
648 err = -EINVAL; 658 err = -EINVAL;
649 goto exit_f; 659 goto exit_f;
650 } 660 }
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 2ebfd281e721..e78680a9985b 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -99,8 +99,13 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
99 esph->seq_no = htonl(++x->replay.oseq); 99 esph->seq_no = htonl(++x->replay.oseq);
100 xfrm_aevent_doreplay(x); 100 xfrm_aevent_doreplay(x);
101 101
102 if (esp->conf.ivlen) 102 if (esp->conf.ivlen) {
103 if (unlikely(!esp->conf.ivinitted)) {
104 get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
105 esp->conf.ivinitted = 1;
106 }
103 crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); 107 crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
108 }
104 109
105 do { 110 do {
106 struct scatterlist *sg = &esp->sgbuf[0]; 111 struct scatterlist *sg = &esp->sgbuf[0];
@@ -237,7 +242,7 @@ static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
237 struct esp_data *esp = x->data; 242 struct esp_data *esp = x->data;
238 u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); 243 u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
239 244
240 if (x->props.mode) { 245 if (x->props.mode == XFRM_MODE_TUNNEL) {
241 mtu = ALIGN(mtu + 2, blksize); 246 mtu = ALIGN(mtu + 2, blksize);
242 } else { 247 } else {
243 /* The worst case. */ 248 /* The worst case. */
@@ -353,12 +358,12 @@ static int esp6_init_state(struct xfrm_state *x)
353 esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); 358 esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL);
354 if (unlikely(esp->conf.ivec == NULL)) 359 if (unlikely(esp->conf.ivec == NULL))
355 goto error; 360 goto error;
356 get_random_bytes(esp->conf.ivec, esp->conf.ivlen); 361 esp->conf.ivinitted = 0;
357 } 362 }
358 if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) 363 if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len))
359 goto error; 364 goto error;
360 x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen; 365 x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen;
361 if (x->props.mode) 366 if (x->props.mode == XFRM_MODE_TUNNEL)
362 x->props.header_len += sizeof(struct ipv6hdr); 367 x->props.header_len += sizeof(struct ipv6hdr);
363 x->data = esp; 368 x->data = esp;
364 return 0; 369 return 0;
@@ -379,7 +384,8 @@ static struct xfrm_type esp6_type =
379 .destructor = esp6_destroy, 384 .destructor = esp6_destroy,
380 .get_max_size = esp6_get_max_size, 385 .get_max_size = esp6_get_max_size,
381 .input = esp6_input, 386 .input = esp6_input,
382 .output = esp6_output 387 .output = esp6_output,
388 .hdr_offset = xfrm6_find_1stfragopt,
383}; 389};
384 390
385static struct inet6_protocol esp6_protocol = { 391static struct inet6_protocol esp6_protocol = {
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 86dac106873b..88c96b10684c 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -43,9 +43,54 @@
43#include <net/ndisc.h> 43#include <net/ndisc.h>
44#include <net/ip6_route.h> 44#include <net/ip6_route.h>
45#include <net/addrconf.h> 45#include <net/addrconf.h>
46#ifdef CONFIG_IPV6_MIP6
47#include <net/xfrm.h>
48#endif
46 49
47#include <asm/uaccess.h> 50#include <asm/uaccess.h>
48 51
52int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
53{
54 int packet_len = skb->tail - skb->nh.raw;
55 struct ipv6_opt_hdr *hdr;
56 int len;
57
58 if (offset + 2 > packet_len)
59 goto bad;
60 hdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
61 len = ((hdr->hdrlen + 1) << 3);
62
63 if (offset + len > packet_len)
64 goto bad;
65
66 offset += 2;
67 len -= 2;
68
69 while (len > 0) {
70 int opttype = skb->nh.raw[offset];
71 int optlen;
72
73 if (opttype == type)
74 return offset;
75
76 switch (opttype) {
77 case IPV6_TLV_PAD0:
78 optlen = 1;
79 break;
80 default:
81 optlen = skb->nh.raw[offset + 1] + 2;
82 if (optlen > len)
83 goto bad;
84 break;
85 }
86 offset += optlen;
87 len -= optlen;
88 }
89 /* not_found */
90 bad:
91 return -1;
92}
93
49/* 94/*
50 * Parsing tlv encoded headers. 95 * Parsing tlv encoded headers.
51 * 96 *
@@ -56,7 +101,7 @@
56 101
57struct tlvtype_proc { 102struct tlvtype_proc {
58 int type; 103 int type;
59 int (*func)(struct sk_buff *skb, int offset); 104 int (*func)(struct sk_buff **skbp, int offset);
60}; 105};
61 106
62/********************* 107/*********************
@@ -65,8 +110,10 @@ struct tlvtype_proc {
65 110
66/* An unknown option is detected, decide what to do */ 111/* An unknown option is detected, decide what to do */
67 112
68static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) 113static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff)
69{ 114{
115 struct sk_buff *skb = *skbp;
116
70 switch ((skb->nh.raw[optoff] & 0xC0) >> 6) { 117 switch ((skb->nh.raw[optoff] & 0xC0) >> 6) {
71 case 0: /* ignore */ 118 case 0: /* ignore */
72 return 1; 119 return 1;
@@ -91,8 +138,9 @@ static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
91 138
92/* Parse tlv encoded option header (hop-by-hop or destination) */ 139/* Parse tlv encoded option header (hop-by-hop or destination) */
93 140
94static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb) 141static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
95{ 142{
143 struct sk_buff *skb = *skbp;
96 struct tlvtype_proc *curr; 144 struct tlvtype_proc *curr;
97 int off = skb->h.raw - skb->nh.raw; 145 int off = skb->h.raw - skb->nh.raw;
98 int len = ((skb->h.raw[1]+1)<<3); 146 int len = ((skb->h.raw[1]+1)<<3);
@@ -122,13 +170,13 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
122 /* type specific length/alignment 170 /* type specific length/alignment
123 checks will be performed in the 171 checks will be performed in the
124 func(). */ 172 func(). */
125 if (curr->func(skb, off) == 0) 173 if (curr->func(skbp, off) == 0)
126 return 0; 174 return 0;
127 break; 175 break;
128 } 176 }
129 } 177 }
130 if (curr->type < 0) { 178 if (curr->type < 0) {
131 if (ip6_tlvopt_unknown(skb, off) == 0) 179 if (ip6_tlvopt_unknown(skbp, off) == 0)
132 return 0; 180 return 0;
133 } 181 }
134 break; 182 break;
@@ -147,8 +195,85 @@ bad:
147 Destination options header. 195 Destination options header.
148 *****************************/ 196 *****************************/
149 197
198#ifdef CONFIG_IPV6_MIP6
199static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
200{
201 struct sk_buff *skb = *skbp;
202 struct ipv6_destopt_hao *hao;
203 struct inet6_skb_parm *opt = IP6CB(skb);
204 struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->nh.raw;
205 struct in6_addr tmp_addr;
206 int ret;
207
208 if (opt->dsthao) {
209 LIMIT_NETDEBUG(KERN_DEBUG "hao duplicated\n");
210 goto discard;
211 }
212 opt->dsthao = opt->dst1;
213 opt->dst1 = 0;
214
215 hao = (struct ipv6_destopt_hao *)(skb->nh.raw + optoff);
216
217 if (hao->length != 16) {
218 LIMIT_NETDEBUG(
219 KERN_DEBUG "hao invalid option length = %d\n", hao->length);
220 goto discard;
221 }
222
223 if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) {
224 LIMIT_NETDEBUG(
225 KERN_DEBUG "hao is not an unicast addr: " NIP6_FMT "\n", NIP6(hao->addr));
226 goto discard;
227 }
228
229 ret = xfrm6_input_addr(skb, (xfrm_address_t *)&ipv6h->daddr,
230 (xfrm_address_t *)&hao->addr, IPPROTO_DSTOPTS);
231 if (unlikely(ret < 0))
232 goto discard;
233
234 if (skb_cloned(skb)) {
235 struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
236 struct inet6_skb_parm *opt2;
237
238 if (skb2 == NULL)
239 goto discard;
240
241 opt2 = IP6CB(skb2);
242 memcpy(opt2, opt, sizeof(*opt2));
243
244 kfree_skb(skb);
245
246 /* update all variable using below by copied skbuff */
247 *skbp = skb = skb2;
248 hao = (struct ipv6_destopt_hao *)(skb2->nh.raw + optoff);
249 ipv6h = (struct ipv6hdr *)skb2->nh.raw;
250 }
251
252 if (skb->ip_summed == CHECKSUM_COMPLETE)
253 skb->ip_summed = CHECKSUM_NONE;
254
255 ipv6_addr_copy(&tmp_addr, &ipv6h->saddr);
256 ipv6_addr_copy(&ipv6h->saddr, &hao->addr);
257 ipv6_addr_copy(&hao->addr, &tmp_addr);
258
259 if (skb->tstamp.off_sec == 0)
260 __net_timestamp(skb);
261
262 return 1;
263
264 discard:
265 kfree_skb(skb);
266 return 0;
267}
268#endif
269
150static struct tlvtype_proc tlvprocdestopt_lst[] = { 270static struct tlvtype_proc tlvprocdestopt_lst[] = {
151 /* No destination options are defined now */ 271#ifdef CONFIG_IPV6_MIP6
272 {
273 .type = IPV6_TLV_HAO,
274 .func = ipv6_dest_hao,
275 },
276#endif
152 {-1, NULL} 277 {-1, NULL}
153}; 278};
154 279
@@ -156,6 +281,9 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
156{ 281{
157 struct sk_buff *skb = *skbp; 282 struct sk_buff *skb = *skbp;
158 struct inet6_skb_parm *opt = IP6CB(skb); 283 struct inet6_skb_parm *opt = IP6CB(skb);
284#ifdef CONFIG_IPV6_MIP6
285 __u16 dstbuf;
286#endif
159 287
160 if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) || 288 if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
161 !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) { 289 !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
@@ -166,10 +294,19 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
166 294
167 opt->lastopt = skb->h.raw - skb->nh.raw; 295 opt->lastopt = skb->h.raw - skb->nh.raw;
168 opt->dst1 = skb->h.raw - skb->nh.raw; 296 opt->dst1 = skb->h.raw - skb->nh.raw;
297#ifdef CONFIG_IPV6_MIP6
298 dstbuf = opt->dst1;
299#endif
169 300
170 if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { 301 if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) {
302 skb = *skbp;
171 skb->h.raw += ((skb->h.raw[1]+1)<<3); 303 skb->h.raw += ((skb->h.raw[1]+1)<<3);
304 opt = IP6CB(skb);
305#ifdef CONFIG_IPV6_MIP6
306 opt->nhoff = dstbuf;
307#else
172 opt->nhoff = opt->dst1; 308 opt->nhoff = opt->dst1;
309#endif
173 return 1; 310 return 1;
174 } 311 }
175 312
@@ -219,7 +356,7 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
219{ 356{
220 struct sk_buff *skb = *skbp; 357 struct sk_buff *skb = *skbp;
221 struct inet6_skb_parm *opt = IP6CB(skb); 358 struct inet6_skb_parm *opt = IP6CB(skb);
222 struct in6_addr *addr; 359 struct in6_addr *addr = NULL;
223 struct in6_addr daddr; 360 struct in6_addr daddr;
224 int n, i; 361 int n, i;
225 362
@@ -244,6 +381,23 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
244 381
245looped_back: 382looped_back:
246 if (hdr->segments_left == 0) { 383 if (hdr->segments_left == 0) {
384 switch (hdr->type) {
385#ifdef CONFIG_IPV6_MIP6
386 case IPV6_SRCRT_TYPE_2:
387 /* Silently discard type 2 header unless it was
388 * processed by own
389 */
390 if (!addr) {
391 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
392 kfree_skb(skb);
393 return -1;
394 }
395 break;
396#endif
397 default:
398 break;
399 }
400
247 opt->lastopt = skb->h.raw - skb->nh.raw; 401 opt->lastopt = skb->h.raw - skb->nh.raw;
248 opt->srcrt = skb->h.raw - skb->nh.raw; 402 opt->srcrt = skb->h.raw - skb->nh.raw;
249 skb->h.raw += (hdr->hdrlen + 1) << 3; 403 skb->h.raw += (hdr->hdrlen + 1) << 3;
@@ -253,17 +407,29 @@ looped_back:
253 return 1; 407 return 1;
254 } 408 }
255 409
256 if (hdr->type != IPV6_SRCRT_TYPE_0) { 410 switch (hdr->type) {
411 case IPV6_SRCRT_TYPE_0:
412 if (hdr->hdrlen & 0x01) {
413 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
414 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw);
415 return -1;
416 }
417 break;
418#ifdef CONFIG_IPV6_MIP6
419 case IPV6_SRCRT_TYPE_2:
420 /* Silently discard invalid RTH type 2 */
421 if (hdr->hdrlen != 2 || hdr->segments_left != 1) {
422 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
423 kfree_skb(skb);
424 return -1;
425 }
426 break;
427#endif
428 default:
257 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); 429 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
258 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw); 430 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw);
259 return -1; 431 return -1;
260 } 432 }
261
262 if (hdr->hdrlen & 0x01) {
263 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
264 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw);
265 return -1;
266 }
267 433
268 /* 434 /*
269 * This is the routing header forwarding algorithm from 435 * This is the routing header forwarding algorithm from
@@ -294,7 +460,7 @@ looped_back:
294 hdr = (struct ipv6_rt_hdr *) skb2->h.raw; 460 hdr = (struct ipv6_rt_hdr *) skb2->h.raw;
295 } 461 }
296 462
297 if (skb->ip_summed == CHECKSUM_HW) 463 if (skb->ip_summed == CHECKSUM_COMPLETE)
298 skb->ip_summed = CHECKSUM_NONE; 464 skb->ip_summed = CHECKSUM_NONE;
299 465
300 i = n - --hdr->segments_left; 466 i = n - --hdr->segments_left;
@@ -303,6 +469,27 @@ looped_back:
303 addr = rthdr->addr; 469 addr = rthdr->addr;
304 addr += i - 1; 470 addr += i - 1;
305 471
472 switch (hdr->type) {
473#ifdef CONFIG_IPV6_MIP6
474 case IPV6_SRCRT_TYPE_2:
475 if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
476 (xfrm_address_t *)&skb->nh.ipv6h->saddr,
477 IPPROTO_ROUTING) < 0) {
478 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
479 kfree_skb(skb);
480 return -1;
481 }
482 if (!ipv6_chk_home_addr(addr)) {
483 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
484 kfree_skb(skb);
485 return -1;
486 }
487 break;
488#endif
489 default:
490 break;
491 }
492
306 if (ipv6_addr_is_multicast(addr)) { 493 if (ipv6_addr_is_multicast(addr)) {
307 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); 494 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
308 kfree_skb(skb); 495 kfree_skb(skb);
@@ -421,8 +608,10 @@ EXPORT_SYMBOL_GPL(ipv6_invert_rthdr);
421 608
422/* Router Alert as of RFC 2711 */ 609/* Router Alert as of RFC 2711 */
423 610
424static int ipv6_hop_ra(struct sk_buff *skb, int optoff) 611static int ipv6_hop_ra(struct sk_buff **skbp, int optoff)
425{ 612{
613 struct sk_buff *skb = *skbp;
614
426 if (skb->nh.raw[optoff+1] == 2) { 615 if (skb->nh.raw[optoff+1] == 2) {
427 IP6CB(skb)->ra = optoff; 616 IP6CB(skb)->ra = optoff;
428 return 1; 617 return 1;
@@ -435,8 +624,9 @@ static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
435 624
436/* Jumbo payload */ 625/* Jumbo payload */
437 626
438static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) 627static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff)
439{ 628{
629 struct sk_buff *skb = *skbp;
440 u32 pkt_len; 630 u32 pkt_len;
441 631
442 if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) { 632 if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) {
@@ -485,8 +675,9 @@ static struct tlvtype_proc tlvprochopopt_lst[] = {
485 { -1, } 675 { -1, }
486}; 676};
487 677
488int ipv6_parse_hopopts(struct sk_buff *skb) 678int ipv6_parse_hopopts(struct sk_buff **skbp)
489{ 679{
680 struct sk_buff *skb = *skbp;
490 struct inet6_skb_parm *opt = IP6CB(skb); 681 struct inet6_skb_parm *opt = IP6CB(skb);
491 682
492 /* 683 /*
@@ -502,8 +693,10 @@ int ipv6_parse_hopopts(struct sk_buff *skb)
502 } 693 }
503 694
504 opt->hop = sizeof(struct ipv6hdr); 695 opt->hop = sizeof(struct ipv6hdr);
505 if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { 696 if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) {
697 skb = *skbp;
506 skb->h.raw += (skb->h.raw[1]+1)<<3; 698 skb->h.raw += (skb->h.raw[1]+1)<<3;
699 opt = IP6CB(skb);
507 opt->nhoff = sizeof(struct ipv6hdr); 700 opt->nhoff = sizeof(struct ipv6hdr);
508 return 1; 701 return 1;
509 } 702 }
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
new file mode 100644
index 000000000000..34f5bfaddfc2
--- /dev/null
+++ b/net/ipv6/fib6_rules.c
@@ -0,0 +1,305 @@
1/*
2 * net/ipv6/fib6_rules.c IPv6 Routing Policy Rules
3 *
4 * Copyright (C)2003-2006 Helsinki University of Technology
5 * Copyright (C)2003-2006 USAGI/WIDE Project
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation, version 2.
10 *
11 * Authors
12 * Thomas Graf <tgraf@suug.ch>
13 * Ville Nuorvala <vnuorval@tcs.hut.fi>
14 */
15
16#include <linux/config.h>
17#include <linux/netdevice.h>
18
19#include <net/fib_rules.h>
20#include <net/ipv6.h>
21#include <net/ip6_route.h>
22#include <net/netlink.h>
23
24struct fib6_rule
25{
26 struct fib_rule common;
27 struct rt6key src;
28 struct rt6key dst;
29#ifdef CONFIG_IPV6_ROUTE_FWMARK
30 u32 fwmark;
31 u32 fwmask;
32#endif
33 u8 tclass;
34};
35
36static struct fib_rules_ops fib6_rules_ops;
37
38static struct fib6_rule main_rule = {
39 .common = {
40 .refcnt = ATOMIC_INIT(2),
41 .pref = 0x7FFE,
42 .action = FR_ACT_TO_TBL,
43 .table = RT6_TABLE_MAIN,
44 },
45};
46
47static struct fib6_rule local_rule = {
48 .common = {
49 .refcnt = ATOMIC_INIT(2),
50 .pref = 0,
51 .action = FR_ACT_TO_TBL,
52 .table = RT6_TABLE_LOCAL,
53 .flags = FIB_RULE_PERMANENT,
54 },
55};
56
57static LIST_HEAD(fib6_rules);
58
59struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
60 pol_lookup_t lookup)
61{
62 struct fib_lookup_arg arg = {
63 .lookup_ptr = lookup,
64 };
65
66 fib_rules_lookup(&fib6_rules_ops, fl, flags, &arg);
67 if (arg.rule)
68 fib_rule_put(arg.rule);
69
70 if (arg.result)
71 return (struct dst_entry *) arg.result;
72
73 dst_hold(&ip6_null_entry.u.dst);
74 return &ip6_null_entry.u.dst;
75}
76
77static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
78 int flags, struct fib_lookup_arg *arg)
79{
80 struct rt6_info *rt = NULL;
81 struct fib6_table *table;
82 pol_lookup_t lookup = arg->lookup_ptr;
83
84 switch (rule->action) {
85 case FR_ACT_TO_TBL:
86 break;
87 case FR_ACT_UNREACHABLE:
88 rt = &ip6_null_entry;
89 goto discard_pkt;
90 default:
91 case FR_ACT_BLACKHOLE:
92 rt = &ip6_blk_hole_entry;
93 goto discard_pkt;
94 case FR_ACT_PROHIBIT:
95 rt = &ip6_prohibit_entry;
96 goto discard_pkt;
97 }
98
99 table = fib6_get_table(rule->table);
100 if (table)
101 rt = lookup(table, flp, flags);
102
103 if (rt != &ip6_null_entry)
104 goto out;
105 dst_release(&rt->u.dst);
106 rt = NULL;
107 goto out;
108
109discard_pkt:
110 dst_hold(&rt->u.dst);
111out:
112 arg->result = rt;
113 return rt == NULL ? -EAGAIN : 0;
114}
115
116
117static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
118{
119 struct fib6_rule *r = (struct fib6_rule *) rule;
120
121 if (!ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen))
122 return 0;
123
124 if ((flags & RT6_LOOKUP_F_HAS_SADDR) &&
125 !ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, r->src.plen))
126 return 0;
127
128 if (r->tclass && r->tclass != ((ntohl(fl->fl6_flowlabel) >> 20) & 0xff))
129 return 0;
130
131#ifdef CONFIG_IPV6_ROUTE_FWMARK
132 if ((r->fwmark ^ fl->fl6_fwmark) & r->fwmask)
133 return 0;
134#endif
135
136 return 1;
137}
138
139static struct nla_policy fib6_rule_policy[FRA_MAX+1] __read_mostly = {
140 [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
141 [FRA_PRIORITY] = { .type = NLA_U32 },
142 [FRA_SRC] = { .len = sizeof(struct in6_addr) },
143 [FRA_DST] = { .len = sizeof(struct in6_addr) },
144 [FRA_FWMARK] = { .type = NLA_U32 },
145 [FRA_FWMASK] = { .type = NLA_U32 },
146 [FRA_TABLE] = { .type = NLA_U32 },
147};
148
149static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
150 struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
151 struct nlattr **tb)
152{
153 int err = -EINVAL;
154 struct fib6_rule *rule6 = (struct fib6_rule *) rule;
155
156 if (frh->src_len > 128 || frh->dst_len > 128 ||
157 (frh->tos & ~IPV6_FLOWINFO_MASK))
158 goto errout;
159
160 if (rule->action == FR_ACT_TO_TBL) {
161 if (rule->table == RT6_TABLE_UNSPEC)
162 goto errout;
163
164 if (fib6_new_table(rule->table) == NULL) {
165 err = -ENOBUFS;
166 goto errout;
167 }
168 }
169
170 if (tb[FRA_SRC])
171 nla_memcpy(&rule6->src.addr, tb[FRA_SRC],
172 sizeof(struct in6_addr));
173
174 if (tb[FRA_DST])
175 nla_memcpy(&rule6->dst.addr, tb[FRA_DST],
176 sizeof(struct in6_addr));
177
178#ifdef CONFIG_IPV6_ROUTE_FWMARK
179 if (tb[FRA_FWMARK]) {
180 rule6->fwmark = nla_get_u32(tb[FRA_FWMARK]);
181 if (rule6->fwmark) {
182 /*
183 * if the mark value is non-zero,
184 * all bits are compared by default
185 * unless a mask is explicitly specified.
186 */
187 rule6->fwmask = 0xFFFFFFFF;
188 }
189 }
190
191 if (tb[FRA_FWMASK])
192 rule6->fwmask = nla_get_u32(tb[FRA_FWMASK]);
193#endif
194
195 rule6->src.plen = frh->src_len;
196 rule6->dst.plen = frh->dst_len;
197 rule6->tclass = frh->tos;
198
199 err = 0;
200errout:
201 return err;
202}
203
204static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205 struct nlattr **tb)
206{
207 struct fib6_rule *rule6 = (struct fib6_rule *) rule;
208
209 if (frh->src_len && (rule6->src.plen != frh->src_len))
210 return 0;
211
212 if (frh->dst_len && (rule6->dst.plen != frh->dst_len))
213 return 0;
214
215 if (frh->tos && (rule6->tclass != frh->tos))
216 return 0;
217
218 if (tb[FRA_SRC] &&
219 nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr)))
220 return 0;
221
222 if (tb[FRA_DST] &&
223 nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr)))
224 return 0;
225
226#ifdef CONFIG_IPV6_ROUTE_FWMARK
227 if (tb[FRA_FWMARK] && (rule6->fwmark != nla_get_u32(tb[FRA_FWMARK])))
228 return 0;
229
230 if (tb[FRA_FWMASK] && (rule6->fwmask != nla_get_u32(tb[FRA_FWMASK])))
231 return 0;
232#endif
233
234 return 1;
235}
236
237static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
238 struct nlmsghdr *nlh, struct fib_rule_hdr *frh)
239{
240 struct fib6_rule *rule6 = (struct fib6_rule *) rule;
241
242 frh->family = AF_INET6;
243 frh->dst_len = rule6->dst.plen;
244 frh->src_len = rule6->src.plen;
245 frh->tos = rule6->tclass;
246
247 if (rule6->dst.plen)
248 NLA_PUT(skb, FRA_DST, sizeof(struct in6_addr),
249 &rule6->dst.addr);
250
251 if (rule6->src.plen)
252 NLA_PUT(skb, FRA_SRC, sizeof(struct in6_addr),
253 &rule6->src.addr);
254
255#ifdef CONFIG_IPV6_ROUTE_FWMARK
256 if (rule6->fwmark)
257 NLA_PUT_U32(skb, FRA_FWMARK, rule6->fwmark);
258
259 if (rule6->fwmask || rule6->fwmark)
260 NLA_PUT_U32(skb, FRA_FWMASK, rule6->fwmask);
261#endif
262
263 return 0;
264
265nla_put_failure:
266 return -ENOBUFS;
267}
268
269int fib6_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
270{
271 return fib_rules_dump(skb, cb, AF_INET6);
272}
273
274static u32 fib6_rule_default_pref(void)
275{
276 return 0x3FFF;
277}
278
279static struct fib_rules_ops fib6_rules_ops = {
280 .family = AF_INET6,
281 .rule_size = sizeof(struct fib6_rule),
282 .action = fib6_rule_action,
283 .match = fib6_rule_match,
284 .configure = fib6_rule_configure,
285 .compare = fib6_rule_compare,
286 .fill = fib6_rule_fill,
287 .default_pref = fib6_rule_default_pref,
288 .nlgroup = RTNLGRP_IPV6_RULE,
289 .policy = fib6_rule_policy,
290 .rules_list = &fib6_rules,
291 .owner = THIS_MODULE,
292};
293
294void __init fib6_rules_init(void)
295{
296 list_add_tail(&local_rule.common.list, &fib6_rules);
297 list_add_tail(&main_rule.common.list, &fib6_rules);
298
299 fib_rules_register(&fib6_rules_ops);
300}
301
302void fib6_rules_cleanup(void)
303{
304 fib_rules_unregister(&fib6_rules_ops);
305}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 356a8a7ef22a..4ec876066b3f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -151,7 +151,7 @@ static int is_ineligible(struct sk_buff *skb)
151 return 0; 151 return 0;
152} 152}
153 153
154static int sysctl_icmpv6_time = 1*HZ; 154static int sysctl_icmpv6_time __read_mostly = 1*HZ;
155 155
156/* 156/*
157 * Check the ICMP output rate limit 157 * Check the ICMP output rate limit
@@ -273,6 +273,29 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st
273 return 0; 273 return 0;
274} 274}
275 275
276#ifdef CONFIG_IPV6_MIP6
277static void mip6_addr_swap(struct sk_buff *skb)
278{
279 struct ipv6hdr *iph = skb->nh.ipv6h;
280 struct inet6_skb_parm *opt = IP6CB(skb);
281 struct ipv6_destopt_hao *hao;
282 struct in6_addr tmp;
283 int off;
284
285 if (opt->dsthao) {
286 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
287 if (likely(off >= 0)) {
288 hao = (struct ipv6_destopt_hao *)(skb->nh.raw + off);
289 ipv6_addr_copy(&tmp, &iph->saddr);
290 ipv6_addr_copy(&iph->saddr, &hao->addr);
291 ipv6_addr_copy(&hao->addr, &tmp);
292 }
293 }
294}
295#else
296static inline void mip6_addr_swap(struct sk_buff *skb) {}
297#endif
298
276/* 299/*
277 * Send an ICMP message in response to a packet in error 300 * Send an ICMP message in response to a packet in error
278 */ 301 */
@@ -350,6 +373,8 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
350 return; 373 return;
351 } 374 }
352 375
376 mip6_addr_swap(skb);
377
353 memset(&fl, 0, sizeof(fl)); 378 memset(&fl, 0, sizeof(fl));
354 fl.proto = IPPROTO_ICMPV6; 379 fl.proto = IPPROTO_ICMPV6;
355 ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr); 380 ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
@@ -358,6 +383,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
358 fl.oif = iif; 383 fl.oif = iif;
359 fl.fl_icmp_type = type; 384 fl.fl_icmp_type = type;
360 fl.fl_icmp_code = code; 385 fl.fl_icmp_code = code;
386 security_skb_classify_flow(skb, &fl);
361 387
362 if (icmpv6_xmit_lock()) 388 if (icmpv6_xmit_lock())
363 return; 389 return;
@@ -472,6 +498,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
472 ipv6_addr_copy(&fl.fl6_src, saddr); 498 ipv6_addr_copy(&fl.fl6_src, saddr);
473 fl.oif = skb->dev->ifindex; 499 fl.oif = skb->dev->ifindex;
474 fl.fl_icmp_type = ICMPV6_ECHO_REPLY; 500 fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
501 security_skb_classify_flow(skb, &fl);
475 502
476 if (icmpv6_xmit_lock()) 503 if (icmpv6_xmit_lock())
477 return; 504 return;
@@ -604,7 +631,7 @@ static int icmpv6_rcv(struct sk_buff **pskb)
604 631
605 /* Perform checksum. */ 632 /* Perform checksum. */
606 switch (skb->ip_summed) { 633 switch (skb->ip_summed) {
607 case CHECKSUM_HW: 634 case CHECKSUM_COMPLETE:
608 if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, 635 if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
609 skb->csum)) 636 skb->csum))
610 break; 637 break;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index bf491077b822..827f41d1478b 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -157,6 +157,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
157 fl.oif = sk->sk_bound_dev_if; 157 fl.oif = sk->sk_bound_dev_if;
158 fl.fl_ip_sport = inet->sport; 158 fl.fl_ip_sport = inet->sport;
159 fl.fl_ip_dport = inet->dport; 159 fl.fl_ip_dport = inet->dport;
160 security_sk_classify_flow(sk, &fl);
160 161
161 if (np->opt && np->opt->srcrt) { 162 if (np->opt && np->opt->srcrt) {
162 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; 163 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
@@ -185,7 +186,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
185 return err; 186 return err;
186 } 187 }
187 188
188 __ip6_dst_store(sk, dst, NULL); 189 __ip6_dst_store(sk, dst, NULL, NULL);
189 } 190 }
190 191
191 skb->dst = dst_clone(dst); 192 skb->dst = dst_clone(dst);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 764221220afd..8fcae7a6510b 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -18,6 +18,7 @@
18 * Yuji SEKIYA @USAGI: Support default route on router node; 18 * Yuji SEKIYA @USAGI: Support default route on router node;
19 * remove ip6_null_entry from the top of 19 * remove ip6_null_entry from the top of
20 * routing table. 20 * routing table.
21 * Ville Nuorvala: Fixed routing subtrees.
21 */ 22 */
22#include <linux/errno.h> 23#include <linux/errno.h>
23#include <linux/types.h> 24#include <linux/types.h>
@@ -26,6 +27,7 @@
26#include <linux/netdevice.h> 27#include <linux/netdevice.h>
27#include <linux/in6.h> 28#include <linux/in6.h>
28#include <linux/init.h> 29#include <linux/init.h>
30#include <linux/list.h>
29 31
30#ifdef CONFIG_PROC_FS 32#ifdef CONFIG_PROC_FS
31#include <linux/proc_fs.h> 33#include <linux/proc_fs.h>
@@ -68,19 +70,19 @@ struct fib6_cleaner_t
68 void *arg; 70 void *arg;
69}; 71};
70 72
71DEFINE_RWLOCK(fib6_walker_lock); 73static DEFINE_RWLOCK(fib6_walker_lock);
72
73 74
74#ifdef CONFIG_IPV6_SUBTREES 75#ifdef CONFIG_IPV6_SUBTREES
75#define FWS_INIT FWS_S 76#define FWS_INIT FWS_S
76#define SUBTREE(fn) ((fn)->subtree)
77#else 77#else
78#define FWS_INIT FWS_L 78#define FWS_INIT FWS_L
79#define SUBTREE(fn) NULL
80#endif 79#endif
81 80
82static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt); 81static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt);
82static struct rt6_info * fib6_find_prefix(struct fib6_node *fn);
83static struct fib6_node * fib6_repair_tree(struct fib6_node *fn); 83static struct fib6_node * fib6_repair_tree(struct fib6_node *fn);
84static int fib6_walk(struct fib6_walker_t *w);
85static int fib6_walk_continue(struct fib6_walker_t *w);
84 86
85/* 87/*
86 * A routing update causes an increase of the serial number on the 88 * A routing update causes an increase of the serial number on the
@@ -93,13 +95,31 @@ static __u32 rt_sernum;
93 95
94static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0); 96static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0);
95 97
96struct fib6_walker_t fib6_walker_list = { 98static struct fib6_walker_t fib6_walker_list = {
97 .prev = &fib6_walker_list, 99 .prev = &fib6_walker_list,
98 .next = &fib6_walker_list, 100 .next = &fib6_walker_list,
99}; 101};
100 102
101#define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next) 103#define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next)
102 104
105static inline void fib6_walker_link(struct fib6_walker_t *w)
106{
107 write_lock_bh(&fib6_walker_lock);
108 w->next = fib6_walker_list.next;
109 w->prev = &fib6_walker_list;
110 w->next->prev = w;
111 w->prev->next = w;
112 write_unlock_bh(&fib6_walker_lock);
113}
114
115static inline void fib6_walker_unlink(struct fib6_walker_t *w)
116{
117 write_lock_bh(&fib6_walker_lock);
118 w->next->prev = w->prev;
119 w->prev->next = w->next;
120 w->prev = w->next = w;
121 write_unlock_bh(&fib6_walker_lock);
122}
103static __inline__ u32 fib6_new_sernum(void) 123static __inline__ u32 fib6_new_sernum(void)
104{ 124{
105 u32 n = ++rt_sernum; 125 u32 n = ++rt_sernum;
@@ -147,6 +167,253 @@ static __inline__ void rt6_release(struct rt6_info *rt)
147 dst_free(&rt->u.dst); 167 dst_free(&rt->u.dst);
148} 168}
149 169
170static struct fib6_table fib6_main_tbl = {
171 .tb6_id = RT6_TABLE_MAIN,
172 .tb6_lock = RW_LOCK_UNLOCKED,
173 .tb6_root = {
174 .leaf = &ip6_null_entry,
175 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
176 },
177};
178
179#ifdef CONFIG_IPV6_MULTIPLE_TABLES
180#define FIB_TABLE_HASHSZ 256
181#else
182#define FIB_TABLE_HASHSZ 1
183#endif
184static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
185
186static void fib6_link_table(struct fib6_table *tb)
187{
188 unsigned int h;
189
190 h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1);
191
192 /*
193 * No protection necessary, this is the only list mutatation
194 * operation, tables never disappear once they exist.
195 */
196 hlist_add_head_rcu(&tb->tb6_hlist, &fib_table_hash[h]);
197}
198
199#ifdef CONFIG_IPV6_MULTIPLE_TABLES
200static struct fib6_table fib6_local_tbl = {
201 .tb6_id = RT6_TABLE_LOCAL,
202 .tb6_lock = RW_LOCK_UNLOCKED,
203 .tb6_root = {
204 .leaf = &ip6_null_entry,
205 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
206 },
207};
208
209static struct fib6_table *fib6_alloc_table(u32 id)
210{
211 struct fib6_table *table;
212
213 table = kzalloc(sizeof(*table), GFP_ATOMIC);
214 if (table != NULL) {
215 table->tb6_id = id;
216 table->tb6_lock = RW_LOCK_UNLOCKED;
217 table->tb6_root.leaf = &ip6_null_entry;
218 table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
219 }
220
221 return table;
222}
223
224struct fib6_table *fib6_new_table(u32 id)
225{
226 struct fib6_table *tb;
227
228 if (id == 0)
229 id = RT6_TABLE_MAIN;
230 tb = fib6_get_table(id);
231 if (tb)
232 return tb;
233
234 tb = fib6_alloc_table(id);
235 if (tb != NULL)
236 fib6_link_table(tb);
237
238 return tb;
239}
240
241struct fib6_table *fib6_get_table(u32 id)
242{
243 struct fib6_table *tb;
244 struct hlist_node *node;
245 unsigned int h;
246
247 if (id == 0)
248 id = RT6_TABLE_MAIN;
249 h = id & (FIB_TABLE_HASHSZ - 1);
250 rcu_read_lock();
251 hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb6_hlist) {
252 if (tb->tb6_id == id) {
253 rcu_read_unlock();
254 return tb;
255 }
256 }
257 rcu_read_unlock();
258
259 return NULL;
260}
261
262static void __init fib6_tables_init(void)
263{
264 fib6_link_table(&fib6_main_tbl);
265 fib6_link_table(&fib6_local_tbl);
266}
267
268#else
269
270struct fib6_table *fib6_new_table(u32 id)
271{
272 return fib6_get_table(id);
273}
274
275struct fib6_table *fib6_get_table(u32 id)
276{
277 return &fib6_main_tbl;
278}
279
280struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
281 pol_lookup_t lookup)
282{
283 return (struct dst_entry *) lookup(&fib6_main_tbl, fl, flags);
284}
285
286static void __init fib6_tables_init(void)
287{
288 fib6_link_table(&fib6_main_tbl);
289}
290
291#endif
292
293static int fib6_dump_node(struct fib6_walker_t *w)
294{
295 int res;
296 struct rt6_info *rt;
297
298 for (rt = w->leaf; rt; rt = rt->u.next) {
299 res = rt6_dump_route(rt, w->args);
300 if (res < 0) {
301 /* Frame is full, suspend walking */
302 w->leaf = rt;
303 return 1;
304 }
305 BUG_TRAP(res!=0);
306 }
307 w->leaf = NULL;
308 return 0;
309}
310
311static void fib6_dump_end(struct netlink_callback *cb)
312{
313 struct fib6_walker_t *w = (void*)cb->args[2];
314
315 if (w) {
316 cb->args[2] = 0;
317 kfree(w);
318 }
319 cb->done = (void*)cb->args[3];
320 cb->args[1] = 3;
321}
322
323static int fib6_dump_done(struct netlink_callback *cb)
324{
325 fib6_dump_end(cb);
326 return cb->done ? cb->done(cb) : 0;
327}
328
329static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
330 struct netlink_callback *cb)
331{
332 struct fib6_walker_t *w;
333 int res;
334
335 w = (void *)cb->args[2];
336 w->root = &table->tb6_root;
337
338 if (cb->args[4] == 0) {
339 read_lock_bh(&table->tb6_lock);
340 res = fib6_walk(w);
341 read_unlock_bh(&table->tb6_lock);
342 if (res > 0)
343 cb->args[4] = 1;
344 } else {
345 read_lock_bh(&table->tb6_lock);
346 res = fib6_walk_continue(w);
347 read_unlock_bh(&table->tb6_lock);
348 if (res != 0) {
349 if (res < 0)
350 fib6_walker_unlink(w);
351 goto end;
352 }
353 fib6_walker_unlink(w);
354 cb->args[4] = 0;
355 }
356end:
357 return res;
358}
359
360int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
361{
362 unsigned int h, s_h;
363 unsigned int e = 0, s_e;
364 struct rt6_rtnl_dump_arg arg;
365 struct fib6_walker_t *w;
366 struct fib6_table *tb;
367 struct hlist_node *node;
368 int res = 0;
369
370 s_h = cb->args[0];
371 s_e = cb->args[1];
372
373 w = (void *)cb->args[2];
374 if (w == NULL) {
375 /* New dump:
376 *
377 * 1. hook callback destructor.
378 */
379 cb->args[3] = (long)cb->done;
380 cb->done = fib6_dump_done;
381
382 /*
383 * 2. allocate and initialize walker.
384 */
385 w = kzalloc(sizeof(*w), GFP_ATOMIC);
386 if (w == NULL)
387 return -ENOMEM;
388 w->func = fib6_dump_node;
389 cb->args[2] = (long)w;
390 }
391
392 arg.skb = skb;
393 arg.cb = cb;
394 w->args = &arg;
395
396 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
397 e = 0;
398 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb6_hlist) {
399 if (e < s_e)
400 goto next;
401 res = fib6_dump_table(tb, skb, cb);
402 if (res != 0)
403 goto out;
404next:
405 e++;
406 }
407 }
408out:
409 cb->args[1] = e;
410 cb->args[0] = h;
411
412 res = res < 0 ? res : skb->len;
413 if (res <= 0)
414 fib6_dump_end(cb);
415 return res;
416}
150 417
151/* 418/*
152 * Routing Table 419 * Routing Table
@@ -343,7 +610,7 @@ insert_above:
343 */ 610 */
344 611
345static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, 612static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
346 struct nlmsghdr *nlh, struct netlink_skb_parms *req) 613 struct nl_info *info)
347{ 614{
348 struct rt6_info *iter = NULL; 615 struct rt6_info *iter = NULL;
349 struct rt6_info **ins; 616 struct rt6_info **ins;
@@ -398,7 +665,7 @@ out:
398 *ins = rt; 665 *ins = rt;
399 rt->rt6i_node = fn; 666 rt->rt6i_node = fn;
400 atomic_inc(&rt->rt6i_ref); 667 atomic_inc(&rt->rt6i_ref);
401 inet6_rt_notify(RTM_NEWROUTE, rt, nlh, req); 668 inet6_rt_notify(RTM_NEWROUTE, rt, info);
402 rt6_stats.fib_rt_entries++; 669 rt6_stats.fib_rt_entries++;
403 670
404 if ((fn->fn_flags & RTN_RTINFO) == 0) { 671 if ((fn->fn_flags & RTN_RTINFO) == 0) {
@@ -428,10 +695,9 @@ void fib6_force_start_gc(void)
428 * with source addr info in sub-trees 695 * with source addr info in sub-trees
429 */ 696 */
430 697
431int fib6_add(struct fib6_node *root, struct rt6_info *rt, 698int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
432 struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
433{ 699{
434 struct fib6_node *fn; 700 struct fib6_node *fn, *pn = NULL;
435 int err = -ENOMEM; 701 int err = -ENOMEM;
436 702
437 fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), 703 fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
@@ -440,6 +706,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
440 if (fn == NULL) 706 if (fn == NULL)
441 goto out; 707 goto out;
442 708
709 pn = fn;
710
443#ifdef CONFIG_IPV6_SUBTREES 711#ifdef CONFIG_IPV6_SUBTREES
444 if (rt->rt6i_src.plen) { 712 if (rt->rt6i_src.plen) {
445 struct fib6_node *sn; 713 struct fib6_node *sn;
@@ -485,10 +753,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
485 /* Now link new subtree to main tree */ 753 /* Now link new subtree to main tree */
486 sfn->parent = fn; 754 sfn->parent = fn;
487 fn->subtree = sfn; 755 fn->subtree = sfn;
488 if (fn->leaf == NULL) {
489 fn->leaf = rt;
490 atomic_inc(&rt->rt6i_ref);
491 }
492 } else { 756 } else {
493 sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, 757 sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
494 sizeof(struct in6_addr), rt->rt6i_src.plen, 758 sizeof(struct in6_addr), rt->rt6i_src.plen,
@@ -498,21 +762,42 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
498 goto st_failure; 762 goto st_failure;
499 } 763 }
500 764
765 if (fn->leaf == NULL) {
766 fn->leaf = rt;
767 atomic_inc(&rt->rt6i_ref);
768 }
501 fn = sn; 769 fn = sn;
502 } 770 }
503#endif 771#endif
504 772
505 err = fib6_add_rt2node(fn, rt, nlh, req); 773 err = fib6_add_rt2node(fn, rt, info);
506 774
507 if (err == 0) { 775 if (err == 0) {
508 fib6_start_gc(rt); 776 fib6_start_gc(rt);
509 if (!(rt->rt6i_flags&RTF_CACHE)) 777 if (!(rt->rt6i_flags&RTF_CACHE))
510 fib6_prune_clones(fn, rt); 778 fib6_prune_clones(pn, rt);
511 } 779 }
512 780
513out: 781out:
514 if (err) 782 if (err) {
783#ifdef CONFIG_IPV6_SUBTREES
784 /*
785 * If fib6_add_1 has cleared the old leaf pointer in the
786 * super-tree leaf node we have to find a new one for it.
787 */
788 if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) {
789 pn->leaf = fib6_find_prefix(pn);
790#if RT6_DEBUG >= 2
791 if (!pn->leaf) {
792 BUG_TRAP(pn->leaf != NULL);
793 pn->leaf = &ip6_null_entry;
794 }
795#endif
796 atomic_inc(&pn->leaf->rt6i_ref);
797 }
798#endif
515 dst_free(&rt->u.dst); 799 dst_free(&rt->u.dst);
800 }
516 return err; 801 return err;
517 802
518#ifdef CONFIG_IPV6_SUBTREES 803#ifdef CONFIG_IPV6_SUBTREES
@@ -543,6 +828,9 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
543 struct fib6_node *fn; 828 struct fib6_node *fn;
544 int dir; 829 int dir;
545 830
831 if (unlikely(args->offset == 0))
832 return NULL;
833
546 /* 834 /*
547 * Descend on a tree 835 * Descend on a tree
548 */ 836 */
@@ -564,33 +852,26 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
564 break; 852 break;
565 } 853 }
566 854
567 while ((fn->fn_flags & RTN_ROOT) == 0) { 855 while(fn) {
568#ifdef CONFIG_IPV6_SUBTREES 856 if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) {
569 if (fn->subtree) {
570 struct fib6_node *st;
571 struct lookup_args *narg;
572
573 narg = args + 1;
574
575 if (narg->addr) {
576 st = fib6_lookup_1(fn->subtree, narg);
577
578 if (st && !(st->fn_flags & RTN_ROOT))
579 return st;
580 }
581 }
582#endif
583
584 if (fn->fn_flags & RTN_RTINFO) {
585 struct rt6key *key; 857 struct rt6key *key;
586 858
587 key = (struct rt6key *) ((u8 *) fn->leaf + 859 key = (struct rt6key *) ((u8 *) fn->leaf +
588 args->offset); 860 args->offset);
589 861
590 if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) 862 if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
591 return fn; 863#ifdef CONFIG_IPV6_SUBTREES
864 if (fn->subtree)
865 fn = fib6_lookup_1(fn->subtree, args + 1);
866#endif
867 if (!fn || fn->fn_flags & RTN_RTINFO)
868 return fn;
869 }
592 } 870 }
593 871
872 if (fn->fn_flags & RTN_ROOT)
873 break;
874
594 fn = fn->parent; 875 fn = fn->parent;
595 } 876 }
596 877
@@ -600,18 +881,24 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
600struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr, 881struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr,
601 struct in6_addr *saddr) 882 struct in6_addr *saddr)
602{ 883{
603 struct lookup_args args[2];
604 struct fib6_node *fn; 884 struct fib6_node *fn;
605 885 struct lookup_args args[] = {
606 args[0].offset = offsetof(struct rt6_info, rt6i_dst); 886 {
607 args[0].addr = daddr; 887 .offset = offsetof(struct rt6_info, rt6i_dst),
608 888 .addr = daddr,
889 },
609#ifdef CONFIG_IPV6_SUBTREES 890#ifdef CONFIG_IPV6_SUBTREES
610 args[1].offset = offsetof(struct rt6_info, rt6i_src); 891 {
611 args[1].addr = saddr; 892 .offset = offsetof(struct rt6_info, rt6i_src),
893 .addr = saddr,
894 },
612#endif 895#endif
896 {
897 .offset = 0, /* sentinel */
898 }
899 };
613 900
614 fn = fib6_lookup_1(root, args); 901 fn = fib6_lookup_1(root, daddr ? args : args + 1);
615 902
616 if (fn == NULL || fn->fn_flags & RTN_TL_ROOT) 903 if (fn == NULL || fn->fn_flags & RTN_TL_ROOT)
617 fn = root; 904 fn = root;
@@ -667,10 +954,8 @@ struct fib6_node * fib6_locate(struct fib6_node *root,
667#ifdef CONFIG_IPV6_SUBTREES 954#ifdef CONFIG_IPV6_SUBTREES
668 if (src_len) { 955 if (src_len) {
669 BUG_TRAP(saddr!=NULL); 956 BUG_TRAP(saddr!=NULL);
670 if (fn == NULL) 957 if (fn && fn->subtree)
671 fn = fn->subtree; 958 fn = fib6_locate_1(fn->subtree, saddr, src_len,
672 if (fn)
673 fn = fib6_locate_1(fn, saddr, src_len,
674 offsetof(struct rt6_info, rt6i_src)); 959 offsetof(struct rt6_info, rt6i_src));
675 } 960 }
676#endif 961#endif
@@ -699,7 +984,7 @@ static struct rt6_info * fib6_find_prefix(struct fib6_node *fn)
699 if(fn->right) 984 if(fn->right)
700 return fn->right->leaf; 985 return fn->right->leaf;
701 986
702 fn = SUBTREE(fn); 987 fn = FIB6_SUBTREE(fn);
703 } 988 }
704 return NULL; 989 return NULL;
705} 990}
@@ -730,7 +1015,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
730 if (fn->right) child = fn->right, children |= 1; 1015 if (fn->right) child = fn->right, children |= 1;
731 if (fn->left) child = fn->left, children |= 2; 1016 if (fn->left) child = fn->left, children |= 2;
732 1017
733 if (children == 3 || SUBTREE(fn) 1018 if (children == 3 || FIB6_SUBTREE(fn)
734#ifdef CONFIG_IPV6_SUBTREES 1019#ifdef CONFIG_IPV6_SUBTREES
735 /* Subtree root (i.e. fn) may have one child */ 1020 /* Subtree root (i.e. fn) may have one child */
736 || (children && fn->fn_flags&RTN_ROOT) 1021 || (children && fn->fn_flags&RTN_ROOT)
@@ -749,9 +1034,9 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
749 1034
750 pn = fn->parent; 1035 pn = fn->parent;
751#ifdef CONFIG_IPV6_SUBTREES 1036#ifdef CONFIG_IPV6_SUBTREES
752 if (SUBTREE(pn) == fn) { 1037 if (FIB6_SUBTREE(pn) == fn) {
753 BUG_TRAP(fn->fn_flags&RTN_ROOT); 1038 BUG_TRAP(fn->fn_flags&RTN_ROOT);
754 SUBTREE(pn) = NULL; 1039 FIB6_SUBTREE(pn) = NULL;
755 nstate = FWS_L; 1040 nstate = FWS_L;
756 } else { 1041 } else {
757 BUG_TRAP(!(fn->fn_flags&RTN_ROOT)); 1042 BUG_TRAP(!(fn->fn_flags&RTN_ROOT));
@@ -799,7 +1084,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
799 read_unlock(&fib6_walker_lock); 1084 read_unlock(&fib6_walker_lock);
800 1085
801 node_free(fn); 1086 node_free(fn);
802 if (pn->fn_flags&RTN_RTINFO || SUBTREE(pn)) 1087 if (pn->fn_flags&RTN_RTINFO || FIB6_SUBTREE(pn))
803 return pn; 1088 return pn;
804 1089
805 rt6_release(pn->leaf); 1090 rt6_release(pn->leaf);
@@ -809,7 +1094,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
809} 1094}
810 1095
811static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, 1096static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
812 struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) 1097 struct nl_info *info)
813{ 1098{
814 struct fib6_walker_t *w; 1099 struct fib6_walker_t *w;
815 struct rt6_info *rt = *rtp; 1100 struct rt6_info *rt = *rtp;
@@ -865,11 +1150,11 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
865 if (atomic_read(&rt->rt6i_ref) != 1) BUG(); 1150 if (atomic_read(&rt->rt6i_ref) != 1) BUG();
866 } 1151 }
867 1152
868 inet6_rt_notify(RTM_DELROUTE, rt, nlh, req); 1153 inet6_rt_notify(RTM_DELROUTE, rt, info);
869 rt6_release(rt); 1154 rt6_release(rt);
870} 1155}
871 1156
872int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) 1157int fib6_del(struct rt6_info *rt, struct nl_info *info)
873{ 1158{
874 struct fib6_node *fn = rt->rt6i_node; 1159 struct fib6_node *fn = rt->rt6i_node;
875 struct rt6_info **rtp; 1160 struct rt6_info **rtp;
@@ -885,8 +1170,18 @@ int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct ne
885 1170
886 BUG_TRAP(fn->fn_flags&RTN_RTINFO); 1171 BUG_TRAP(fn->fn_flags&RTN_RTINFO);
887 1172
888 if (!(rt->rt6i_flags&RTF_CACHE)) 1173 if (!(rt->rt6i_flags&RTF_CACHE)) {
889 fib6_prune_clones(fn, rt); 1174 struct fib6_node *pn = fn;
1175#ifdef CONFIG_IPV6_SUBTREES
1176 /* clones of this route might be in another subtree */
1177 if (rt->rt6i_src.plen) {
1178 while (!(pn->fn_flags&RTN_ROOT))
1179 pn = pn->parent;
1180 pn = pn->parent;
1181 }
1182#endif
1183 fib6_prune_clones(pn, rt);
1184 }
890 1185
891 /* 1186 /*
892 * Walk the leaf entries looking for ourself 1187 * Walk the leaf entries looking for ourself
@@ -894,7 +1189,7 @@ int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct ne
894 1189
895 for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) { 1190 for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) {
896 if (*rtp == rt) { 1191 if (*rtp == rt) {
897 fib6_del_route(fn, rtp, nlh, _rtattr, req); 1192 fib6_del_route(fn, rtp, info);
898 return 0; 1193 return 0;
899 } 1194 }
900 } 1195 }
@@ -925,7 +1220,7 @@ int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct ne
925 * <0 -> walk is terminated by an error. 1220 * <0 -> walk is terminated by an error.
926 */ 1221 */
927 1222
928int fib6_walk_continue(struct fib6_walker_t *w) 1223static int fib6_walk_continue(struct fib6_walker_t *w)
929{ 1224{
930 struct fib6_node *fn, *pn; 1225 struct fib6_node *fn, *pn;
931 1226
@@ -942,8 +1237,8 @@ int fib6_walk_continue(struct fib6_walker_t *w)
942 switch (w->state) { 1237 switch (w->state) {
943#ifdef CONFIG_IPV6_SUBTREES 1238#ifdef CONFIG_IPV6_SUBTREES
944 case FWS_S: 1239 case FWS_S:
945 if (SUBTREE(fn)) { 1240 if (FIB6_SUBTREE(fn)) {
946 w->node = SUBTREE(fn); 1241 w->node = FIB6_SUBTREE(fn);
947 continue; 1242 continue;
948 } 1243 }
949 w->state = FWS_L; 1244 w->state = FWS_L;
@@ -977,7 +1272,7 @@ int fib6_walk_continue(struct fib6_walker_t *w)
977 pn = fn->parent; 1272 pn = fn->parent;
978 w->node = pn; 1273 w->node = pn;
979#ifdef CONFIG_IPV6_SUBTREES 1274#ifdef CONFIG_IPV6_SUBTREES
980 if (SUBTREE(pn) == fn) { 1275 if (FIB6_SUBTREE(pn) == fn) {
981 BUG_TRAP(fn->fn_flags&RTN_ROOT); 1276 BUG_TRAP(fn->fn_flags&RTN_ROOT);
982 w->state = FWS_L; 1277 w->state = FWS_L;
983 continue; 1278 continue;
@@ -999,7 +1294,7 @@ int fib6_walk_continue(struct fib6_walker_t *w)
999 } 1294 }
1000} 1295}
1001 1296
1002int fib6_walk(struct fib6_walker_t *w) 1297static int fib6_walk(struct fib6_walker_t *w)
1003{ 1298{
1004 int res; 1299 int res;
1005 1300
@@ -1023,7 +1318,7 @@ static int fib6_clean_node(struct fib6_walker_t *w)
1023 res = c->func(rt, c->arg); 1318 res = c->func(rt, c->arg);
1024 if (res < 0) { 1319 if (res < 0) {
1025 w->leaf = rt; 1320 w->leaf = rt;
1026 res = fib6_del(rt, NULL, NULL, NULL); 1321 res = fib6_del(rt, NULL);
1027 if (res) { 1322 if (res) {
1028#if RT6_DEBUG >= 2 1323#if RT6_DEBUG >= 2
1029 printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res); 1324 printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
@@ -1049,9 +1344,9 @@ static int fib6_clean_node(struct fib6_walker_t *w)
1049 * ignoring pure split nodes) will be scanned. 1344 * ignoring pure split nodes) will be scanned.
1050 */ 1345 */
1051 1346
1052void fib6_clean_tree(struct fib6_node *root, 1347static void fib6_clean_tree(struct fib6_node *root,
1053 int (*func)(struct rt6_info *, void *arg), 1348 int (*func)(struct rt6_info *, void *arg),
1054 int prune, void *arg) 1349 int prune, void *arg)
1055{ 1350{
1056 struct fib6_cleaner_t c; 1351 struct fib6_cleaner_t c;
1057 1352
@@ -1064,6 +1359,25 @@ void fib6_clean_tree(struct fib6_node *root,
1064 fib6_walk(&c.w); 1359 fib6_walk(&c.w);
1065} 1360}
1066 1361
1362void fib6_clean_all(int (*func)(struct rt6_info *, void *arg),
1363 int prune, void *arg)
1364{
1365 struct fib6_table *table;
1366 struct hlist_node *node;
1367 unsigned int h;
1368
1369 rcu_read_lock();
1370 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
1371 hlist_for_each_entry_rcu(table, node, &fib_table_hash[h],
1372 tb6_hlist) {
1373 write_lock_bh(&table->tb6_lock);
1374 fib6_clean_tree(&table->tb6_root, func, prune, arg);
1375 write_unlock_bh(&table->tb6_lock);
1376 }
1377 }
1378 rcu_read_unlock();
1379}
1380
1067static int fib6_prune_clone(struct rt6_info *rt, void *arg) 1381static int fib6_prune_clone(struct rt6_info *rt, void *arg)
1068{ 1382{
1069 if (rt->rt6i_flags & RTF_CACHE) { 1383 if (rt->rt6i_flags & RTF_CACHE) {
@@ -1142,11 +1456,8 @@ void fib6_run_gc(unsigned long dummy)
1142 } 1456 }
1143 gc_args.more = 0; 1457 gc_args.more = 0;
1144 1458
1145
1146 write_lock_bh(&rt6_lock);
1147 ndisc_dst_gc(&gc_args.more); 1459 ndisc_dst_gc(&gc_args.more);
1148 fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL); 1460 fib6_clean_all(fib6_age, 0, NULL);
1149 write_unlock_bh(&rt6_lock);
1150 1461
1151 if (gc_args.more) 1462 if (gc_args.more)
1152 mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); 1463 mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
@@ -1161,10 +1472,10 @@ void __init fib6_init(void)
1161{ 1472{
1162 fib6_node_kmem = kmem_cache_create("fib6_nodes", 1473 fib6_node_kmem = kmem_cache_create("fib6_nodes",
1163 sizeof(struct fib6_node), 1474 sizeof(struct fib6_node),
1164 0, SLAB_HWCACHE_ALIGN, 1475 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1165 NULL, NULL); 1476 NULL, NULL);
1166 if (!fib6_node_kmem) 1477
1167 panic("cannot create fib6_nodes cache"); 1478 fib6_tables_init();
1168} 1479}
1169 1480
1170void fib6_gc_cleanup(void) 1481void fib6_gc_cleanup(void)
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 25c2a9e03895..6b8e6d76a58b 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -111,7 +111,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
111 } 111 }
112 112
113 if (hdr->nexthdr == NEXTHDR_HOP) { 113 if (hdr->nexthdr == NEXTHDR_HOP) {
114 if (ipv6_parse_hopopts(skb) < 0) { 114 if (ipv6_parse_hopopts(&skb) < 0) {
115 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); 115 IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
116 return 0; 116 return 0;
117 } 117 }
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4fb47a252913..66716911962e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -308,6 +308,56 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
308 return 0; 308 return 0;
309} 309}
310 310
311static int ip6_forward_proxy_check(struct sk_buff *skb)
312{
313 struct ipv6hdr *hdr = skb->nh.ipv6h;
314 u8 nexthdr = hdr->nexthdr;
315 int offset;
316
317 if (ipv6_ext_hdr(nexthdr)) {
318 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
319 if (offset < 0)
320 return 0;
321 } else
322 offset = sizeof(struct ipv6hdr);
323
324 if (nexthdr == IPPROTO_ICMPV6) {
325 struct icmp6hdr *icmp6;
326
327 if (!pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data))
328 return 0;
329
330 icmp6 = (struct icmp6hdr *)(skb->nh.raw + offset);
331
332 switch (icmp6->icmp6_type) {
333 case NDISC_ROUTER_SOLICITATION:
334 case NDISC_ROUTER_ADVERTISEMENT:
335 case NDISC_NEIGHBOUR_SOLICITATION:
336 case NDISC_NEIGHBOUR_ADVERTISEMENT:
337 case NDISC_REDIRECT:
338 /* For reaction involving unicast neighbor discovery
339 * message destined to the proxied address, pass it to
340 * input function.
341 */
342 return 1;
343 default:
344 break;
345 }
346 }
347
348 /*
349 * The proxying router can't forward traffic sent to a link-local
350 * address, so signal the sender and discard the packet. This
351 * behavior is clarified by the MIPv6 specification.
352 */
353 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
354 dst_link_failure(skb);
355 return -1;
356 }
357
358 return 0;
359}
360
311static inline int ip6_forward_finish(struct sk_buff *skb) 361static inline int ip6_forward_finish(struct sk_buff *skb)
312{ 362{
313 return dst_output(skb); 363 return dst_output(skb);
@@ -362,6 +412,18 @@ int ip6_forward(struct sk_buff *skb)
362 return -ETIMEDOUT; 412 return -ETIMEDOUT;
363 } 413 }
364 414
415 /* XXX: idev->cnf.proxy_ndp? */
416 if (ipv6_devconf.proxy_ndp &&
417 pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
418 int proxied = ip6_forward_proxy_check(skb);
419 if (proxied > 0)
420 return ip6_input(skb);
421 else if (proxied < 0) {
422 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
423 goto drop;
424 }
425 }
426
365 if (!xfrm6_route_forward(skb)) { 427 if (!xfrm6_route_forward(skb)) {
366 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS); 428 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
367 goto drop; 429 goto drop;
@@ -475,17 +537,25 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
475 switch (**nexthdr) { 537 switch (**nexthdr) {
476 538
477 case NEXTHDR_HOP: 539 case NEXTHDR_HOP:
540 break;
478 case NEXTHDR_ROUTING: 541 case NEXTHDR_ROUTING:
542 found_rhdr = 1;
543 break;
479 case NEXTHDR_DEST: 544 case NEXTHDR_DEST:
480 if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1; 545#ifdef CONFIG_IPV6_MIP6
481 if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset; 546 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
482 offset += ipv6_optlen(exthdr); 547 break;
483 *nexthdr = &exthdr->nexthdr; 548#endif
484 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); 549 if (found_rhdr)
550 return offset;
485 break; 551 break;
486 default : 552 default :
487 return offset; 553 return offset;
488 } 554 }
555
556 offset += ipv6_optlen(exthdr);
557 *nexthdr = &exthdr->nexthdr;
558 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
489 } 559 }
490 560
491 return offset; 561 return offset;
@@ -726,6 +796,14 @@ fail:
726 return err; 796 return err;
727} 797}
728 798
799static inline int ip6_rt_check(struct rt6key *rt_key,
800 struct in6_addr *fl_addr,
801 struct in6_addr *addr_cache)
802{
803 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
804 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
805}
806
729static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 807static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
730 struct dst_entry *dst, 808 struct dst_entry *dst,
731 struct flowi *fl) 809 struct flowi *fl)
@@ -741,8 +819,8 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
741 * that we do not support routing by source, TOS, 819 * that we do not support routing by source, TOS,
742 * and MSG_DONTROUTE --ANK (980726) 820 * and MSG_DONTROUTE --ANK (980726)
743 * 821 *
744 * 1. If route was host route, check that 822 * 1. ip6_rt_check(): If route was host route,
745 * cached destination is current. 823 * check that cached destination is current.
746 * If it is network route, we still may 824 * If it is network route, we still may
747 * check its validity using saved pointer 825 * check its validity using saved pointer
748 * to the last used address: daddr_cache. 826 * to the last used address: daddr_cache.
@@ -753,11 +831,11 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
753 * sockets. 831 * sockets.
754 * 2. oif also should be the same. 832 * 2. oif also should be the same.
755 */ 833 */
756 if (((rt->rt6i_dst.plen != 128 || 834 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
757 !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr)) 835#ifdef CONFIG_IPV6_SUBTREES
758 && (np->daddr_cache == NULL || 836 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
759 !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache))) 837#endif
760 || (fl->oif && fl->oif != dst->dev->ifindex)) { 838 (fl->oif && fl->oif != dst->dev->ifindex)) {
761 dst_release(dst); 839 dst_release(dst);
762 dst = NULL; 840 dst = NULL;
763 } 841 }
@@ -866,7 +944,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
866 /* initialize protocol header pointer */ 944 /* initialize protocol header pointer */
867 skb->h.raw = skb->data + fragheaderlen; 945 skb->h.raw = skb->data + fragheaderlen;
868 946
869 skb->ip_summed = CHECKSUM_HW; 947 skb->ip_summed = CHECKSUM_PARTIAL;
870 skb->csum = 0; 948 skb->csum = 0;
871 sk->sk_sndmsg_off = 0; 949 sk->sk_sndmsg_off = 0;
872 } 950 }
@@ -963,7 +1041,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
963 1041
964 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); 1042 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
965 1043
966 fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0); 1044 fragheaderlen = sizeof(struct ipv6hdr) + rt->u.dst.nfheader_len + (opt ? opt->opt_nflen : 0);
967 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); 1045 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
968 1046
969 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { 1047 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index a81e9e9d93bd..ad9c6e824e62 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -212,7 +212,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
212 memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr)); 212 memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr));
213 memcpy(&t->sel, &x->sel, sizeof(t->sel)); 213 memcpy(&t->sel, &x->sel, sizeof(t->sel));
214 t->props.family = AF_INET6; 214 t->props.family = AF_INET6;
215 t->props.mode = 1; 215 t->props.mode = XFRM_MODE_TUNNEL;
216 memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); 216 memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr));
217 217
218 if (xfrm_init_state(t)) 218 if (xfrm_init_state(t))
@@ -417,7 +417,7 @@ static int ipcomp6_init_state(struct xfrm_state *x)
417 goto out; 417 goto out;
418 418
419 x->props.header_len = 0; 419 x->props.header_len = 0;
420 if (x->props.mode) 420 if (x->props.mode == XFRM_MODE_TUNNEL)
421 x->props.header_len += sizeof(struct ipv6hdr); 421 x->props.header_len += sizeof(struct ipv6hdr);
422 422
423 mutex_lock(&ipcomp6_resource_mutex); 423 mutex_lock(&ipcomp6_resource_mutex);
@@ -429,7 +429,7 @@ static int ipcomp6_init_state(struct xfrm_state *x)
429 goto error; 429 goto error;
430 mutex_unlock(&ipcomp6_resource_mutex); 430 mutex_unlock(&ipcomp6_resource_mutex);
431 431
432 if (x->props.mode) { 432 if (x->props.mode == XFRM_MODE_TUNNEL) {
433 err = ipcomp6_tunnel_attach(x); 433 err = ipcomp6_tunnel_attach(x);
434 if (err) 434 if (err)
435 goto error_tunnel; 435 goto error_tunnel;
@@ -461,6 +461,7 @@ static struct xfrm_type ipcomp6_type =
461 .destructor = ipcomp6_destroy, 461 .destructor = ipcomp6_destroy,
462 .input = ipcomp6_input, 462 .input = ipcomp6_input,
463 .output = ipcomp6_output, 463 .output = ipcomp6_output,
464 .hdr_offset = xfrm6_find_1stfragopt,
464}; 465};
465 466
466static struct inet6_protocol ipcomp6_protocol = 467static struct inet6_protocol ipcomp6_protocol =
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index a5eaaf693abf..4f3bb7fcc8b5 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -407,8 +407,16 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
407 /* routing header option needs extra check */ 407 /* routing header option needs extra check */
408 if (optname == IPV6_RTHDR && opt->srcrt) { 408 if (optname == IPV6_RTHDR && opt->srcrt) {
409 struct ipv6_rt_hdr *rthdr = opt->srcrt; 409 struct ipv6_rt_hdr *rthdr = opt->srcrt;
410 if (rthdr->type) 410 switch (rthdr->type) {
411 case IPV6_SRCRT_TYPE_0:
412#ifdef CONFIG_IPV6_MIP6
413 case IPV6_SRCRT_TYPE_2:
414#endif
415 break;
416 default:
411 goto sticky_done; 417 goto sticky_done;
418 }
419
412 if ((rthdr->hdrlen & 1) || 420 if ((rthdr->hdrlen & 1) ||
413 (rthdr->hdrlen >> 1) != rthdr->segments_left) 421 (rthdr->hdrlen >> 1) != rthdr->segments_left)
414 goto sticky_done; 422 goto sticky_done;
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c
index dd4d1ce77769..0e8e0676a033 100644
--- a/net/ipv6/ipv6_syms.c
+++ b/net/ipv6/ipv6_syms.c
@@ -14,7 +14,6 @@ EXPORT_SYMBOL(ndisc_mc_map);
14EXPORT_SYMBOL(register_inet6addr_notifier); 14EXPORT_SYMBOL(register_inet6addr_notifier);
15EXPORT_SYMBOL(unregister_inet6addr_notifier); 15EXPORT_SYMBOL(unregister_inet6addr_notifier);
16EXPORT_SYMBOL(ip6_route_output); 16EXPORT_SYMBOL(ip6_route_output);
17EXPORT_SYMBOL(addrconf_lock);
18EXPORT_SYMBOL(ipv6_setsockopt); 17EXPORT_SYMBOL(ipv6_setsockopt);
19EXPORT_SYMBOL(ipv6_getsockopt); 18EXPORT_SYMBOL(ipv6_getsockopt);
20EXPORT_SYMBOL(inet6_register_protosw); 19EXPORT_SYMBOL(inet6_register_protosw);
@@ -31,6 +30,8 @@ EXPORT_SYMBOL(ipv6_chk_addr);
31EXPORT_SYMBOL(in6_dev_finish_destroy); 30EXPORT_SYMBOL(in6_dev_finish_destroy);
32#ifdef CONFIG_XFRM 31#ifdef CONFIG_XFRM
33EXPORT_SYMBOL(xfrm6_rcv); 32EXPORT_SYMBOL(xfrm6_rcv);
33EXPORT_SYMBOL(xfrm6_input_addr);
34EXPORT_SYMBOL(xfrm6_find_1stfragopt);
34#endif 35#endif
35EXPORT_SYMBOL(rt6_lookup); 36EXPORT_SYMBOL(rt6_lookup);
36EXPORT_SYMBOL(ipv6_push_nfrag_opts); 37EXPORT_SYMBOL(ipv6_push_nfrag_opts);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 639eb20c9f1f..3b114e3fa2f8 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -171,7 +171,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
171 171
172#define IPV6_MLD_MAX_MSF 64 172#define IPV6_MLD_MAX_MSF 64
173 173
174int sysctl_mld_max_msf = IPV6_MLD_MAX_MSF; 174int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
175 175
176/* 176/*
177 * socket join on multicast group 177 * socket join on multicast group
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
new file mode 100644
index 000000000000..99d116caecda
--- /dev/null
+++ b/net/ipv6/mip6.c
@@ -0,0 +1,519 @@
1/*
2 * Copyright (C)2003-2006 Helsinki University of Technology
3 * Copyright (C)2003-2006 USAGI/WIDE Project
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19/*
20 * Authors:
21 * Noriaki TAKAMIYA @USAGI
22 * Masahide NAKAMURA @USAGI
23 */
24
25#include <linux/config.h>
26#include <linux/module.h>
27#include <linux/skbuff.h>
28#include <linux/time.h>
29#include <linux/ipv6.h>
30#include <linux/icmpv6.h>
31#include <net/sock.h>
32#include <net/ipv6.h>
33#include <net/ip6_checksum.h>
34#include <net/xfrm.h>
35#include <net/mip6.h>
36
37static xfrm_address_t *mip6_xfrm_addr(struct xfrm_state *x, xfrm_address_t *addr)
38{
39 return x->coaddr;
40}
41
42static inline unsigned int calc_padlen(unsigned int len, unsigned int n)
43{
44 return (n - len + 16) & 0x7;
45}
46
47static inline void *mip6_padn(__u8 *data, __u8 padlen)
48{
49 if (!data)
50 return NULL;
51 if (padlen == 1) {
52 data[0] = MIP6_OPT_PAD_1;
53 } else if (padlen > 1) {
54 data[0] = MIP6_OPT_PAD_N;
55 data[1] = padlen - 2;
56 if (padlen > 2)
57 memset(data+2, 0, data[1]);
58 }
59 return data + padlen;
60}
61
62static inline void mip6_param_prob(struct sk_buff *skb, int code, int pos)
63{
64 icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
65}
66
67static int mip6_mh_len(int type)
68{
69 int len = 0;
70
71 switch (type) {
72 case IP6_MH_TYPE_BRR:
73 len = 0;
74 break;
75 case IP6_MH_TYPE_HOTI:
76 case IP6_MH_TYPE_COTI:
77 case IP6_MH_TYPE_BU:
78 case IP6_MH_TYPE_BACK:
79 len = 1;
80 break;
81 case IP6_MH_TYPE_HOT:
82 case IP6_MH_TYPE_COT:
83 case IP6_MH_TYPE_BERROR:
84 len = 2;
85 break;
86 }
87 return len;
88}
89
90int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
91{
92 struct ip6_mh *mh;
93 int mhlen;
94
95 if (!pskb_may_pull(skb, (skb->h.raw - skb->data) + 8) ||
96 !pskb_may_pull(skb, (skb->h.raw - skb->data) + ((skb->h.raw[1] + 1) << 3)))
97 return -1;
98
99 mh = (struct ip6_mh *)skb->h.raw;
100
101 if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) {
102 LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n",
103 mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type));
104 mip6_param_prob(skb, 0, (&mh->ip6mh_hdrlen) - skb->nh.raw);
105 return -1;
106 }
107 mhlen = (mh->ip6mh_hdrlen + 1) << 3;
108
109 if (skb->ip_summed == CHECKSUM_COMPLETE) {
110 skb->ip_summed = CHECKSUM_UNNECESSARY;
111 if (csum_ipv6_magic(&skb->nh.ipv6h->saddr,
112 &skb->nh.ipv6h->daddr,
113 mhlen, IPPROTO_MH,
114 skb->csum)) {
115 LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH hw checksum failed\n");
116 skb->ip_summed = CHECKSUM_NONE;
117 }
118 }
119 if (skb->ip_summed == CHECKSUM_NONE) {
120 if (csum_ipv6_magic(&skb->nh.ipv6h->saddr,
121 &skb->nh.ipv6h->daddr,
122 mhlen, IPPROTO_MH,
123 skb_checksum(skb, 0, mhlen, 0))) {
124 LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH checksum failed "
125 "[" NIP6_FMT " > " NIP6_FMT "]\n",
126 NIP6(skb->nh.ipv6h->saddr),
127 NIP6(skb->nh.ipv6h->daddr));
128 return -1;
129 }
130 skb->ip_summed = CHECKSUM_UNNECESSARY;
131 }
132
133 if (mh->ip6mh_proto != IPPROTO_NONE) {
134 LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n",
135 mh->ip6mh_proto);
136 mip6_param_prob(skb, 0, (&mh->ip6mh_proto) - skb->nh.raw);
137 return -1;
138 }
139
140 return 0;
141}
142
143struct mip6_report_rate_limiter {
144 spinlock_t lock;
145 struct timeval stamp;
146 int iif;
147 struct in6_addr src;
148 struct in6_addr dst;
149};
150
151static struct mip6_report_rate_limiter mip6_report_rl = {
152 .lock = SPIN_LOCK_UNLOCKED
153};
154
155static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb)
156{
157 struct ipv6hdr *iph = skb->nh.ipv6h;
158 struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data;
159
160 if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) &&
161 !ipv6_addr_any((struct in6_addr *)x->coaddr))
162 return -ENOENT;
163
164 return destopt->nexthdr;
165}
166
167/* Destination Option Header is inserted.
168 * IP Header's src address is replaced with Home Address Option in
169 * Destination Option Header.
170 */
171static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
172{
173 struct ipv6hdr *iph;
174 struct ipv6_destopt_hdr *dstopt;
175 struct ipv6_destopt_hao *hao;
176 u8 nexthdr;
177 int len;
178
179 iph = (struct ipv6hdr *)skb->data;
180 iph->payload_len = htons(skb->len - sizeof(*iph));
181
182 nexthdr = *skb->nh.raw;
183 *skb->nh.raw = IPPROTO_DSTOPTS;
184
185 dstopt = (struct ipv6_destopt_hdr *)skb->h.raw;
186 dstopt->nexthdr = nexthdr;
187
188 hao = mip6_padn((char *)(dstopt + 1),
189 calc_padlen(sizeof(*dstopt), 6));
190
191 hao->type = IPV6_TLV_HAO;
192 hao->length = sizeof(*hao) - 2;
193 BUG_TRAP(hao->length == 16);
194
195 len = ((char *)hao - (char *)dstopt) + sizeof(*hao);
196
197 memcpy(&hao->addr, &iph->saddr, sizeof(hao->addr));
198 memcpy(&iph->saddr, x->coaddr, sizeof(iph->saddr));
199
200 BUG_TRAP(len == x->props.header_len);
201 dstopt->hdrlen = (x->props.header_len >> 3) - 1;
202
203 return 0;
204}
205
206static inline int mip6_report_rl_allow(struct timeval *stamp,
207 struct in6_addr *dst,
208 struct in6_addr *src, int iif)
209{
210 int allow = 0;
211
212 spin_lock_bh(&mip6_report_rl.lock);
213 if (mip6_report_rl.stamp.tv_sec != stamp->tv_sec ||
214 mip6_report_rl.stamp.tv_usec != stamp->tv_usec ||
215 mip6_report_rl.iif != iif ||
216 !ipv6_addr_equal(&mip6_report_rl.src, src) ||
217 !ipv6_addr_equal(&mip6_report_rl.dst, dst)) {
218 mip6_report_rl.stamp.tv_sec = stamp->tv_sec;
219 mip6_report_rl.stamp.tv_usec = stamp->tv_usec;
220 mip6_report_rl.iif = iif;
221 ipv6_addr_copy(&mip6_report_rl.src, src);
222 ipv6_addr_copy(&mip6_report_rl.dst, dst);
223 allow = 1;
224 }
225 spin_unlock_bh(&mip6_report_rl.lock);
226 return allow;
227}
228
229static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct flowi *fl)
230{
231 struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
232 struct ipv6_destopt_hao *hao = NULL;
233 struct xfrm_selector sel;
234 int offset;
235 struct timeval stamp;
236 int err = 0;
237
238 if (unlikely(fl->proto == IPPROTO_MH &&
239 fl->fl_mh_type <= IP6_MH_TYPE_MAX))
240 goto out;
241
242 if (likely(opt->dsthao)) {
243 offset = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
244 if (likely(offset >= 0))
245 hao = (struct ipv6_destopt_hao *)(skb->nh.raw + offset);
246 }
247
248 skb_get_timestamp(skb, &stamp);
249
250 if (!mip6_report_rl_allow(&stamp, &skb->nh.ipv6h->daddr,
251 hao ? &hao->addr : &skb->nh.ipv6h->saddr,
252 opt->iif))
253 goto out;
254
255 memset(&sel, 0, sizeof(sel));
256 memcpy(&sel.daddr, (xfrm_address_t *)&skb->nh.ipv6h->daddr,
257 sizeof(sel.daddr));
258 sel.prefixlen_d = 128;
259 memcpy(&sel.saddr, (xfrm_address_t *)&skb->nh.ipv6h->saddr,
260 sizeof(sel.saddr));
261 sel.prefixlen_s = 128;
262 sel.family = AF_INET6;
263 sel.proto = fl->proto;
264 sel.dport = xfrm_flowi_dport(fl);
265 if (sel.dport)
266 sel.dport_mask = ~((__u16)0);
267 sel.sport = xfrm_flowi_sport(fl);
268 if (sel.sport)
269 sel.sport_mask = ~((__u16)0);
270 sel.ifindex = fl->oif;
271
272 err = km_report(IPPROTO_DSTOPTS, &sel,
273 (hao ? (xfrm_address_t *)&hao->addr : NULL));
274
275 out:
276 return err;
277}
278
279static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
280 u8 **nexthdr)
281{
282 u16 offset = sizeof(struct ipv6hdr);
283 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
284 unsigned int packet_len = skb->tail - skb->nh.raw;
285 int found_rhdr = 0;
286
287 *nexthdr = &skb->nh.ipv6h->nexthdr;
288
289 while (offset + 1 <= packet_len) {
290
291 switch (**nexthdr) {
292 case NEXTHDR_HOP:
293 break;
294 case NEXTHDR_ROUTING:
295 found_rhdr = 1;
296 break;
297 case NEXTHDR_DEST:
298 /*
299 * HAO MUST NOT appear more than once.
300 * XXX: It is better to try to find by the end of
301 * XXX: packet if HAO exists.
302 */
303 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) {
304 LIMIT_NETDEBUG(KERN_WARNING "mip6: hao exists already, override\n");
305 return offset;
306 }
307
308 if (found_rhdr)
309 return offset;
310
311 break;
312 default:
313 return offset;
314 }
315
316 offset += ipv6_optlen(exthdr);
317 *nexthdr = &exthdr->nexthdr;
318 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
319 }
320
321 return offset;
322}
323
324static int mip6_destopt_init_state(struct xfrm_state *x)
325{
326 if (x->id.spi) {
327 printk(KERN_INFO "%s: spi is not 0: %u\n", __FUNCTION__,
328 x->id.spi);
329 return -EINVAL;
330 }
331 if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) {
332 printk(KERN_INFO "%s: state's mode is not %u: %u\n",
333 __FUNCTION__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
334 return -EINVAL;
335 }
336
337 x->props.header_len = sizeof(struct ipv6_destopt_hdr) +
338 calc_padlen(sizeof(struct ipv6_destopt_hdr), 6) +
339 sizeof(struct ipv6_destopt_hao);
340 BUG_TRAP(x->props.header_len == 24);
341
342 return 0;
343}
344
345/*
346 * Do nothing about destroying since it has no specific operation for
347 * destination options header unlike IPsec protocols.
348 */
349static void mip6_destopt_destroy(struct xfrm_state *x)
350{
351}
352
353static struct xfrm_type mip6_destopt_type =
354{
355 .description = "MIP6DESTOPT",
356 .owner = THIS_MODULE,
357 .proto = IPPROTO_DSTOPTS,
358 .flags = XFRM_TYPE_NON_FRAGMENT,
359 .init_state = mip6_destopt_init_state,
360 .destructor = mip6_destopt_destroy,
361 .input = mip6_destopt_input,
362 .output = mip6_destopt_output,
363 .reject = mip6_destopt_reject,
364 .hdr_offset = mip6_destopt_offset,
365 .local_addr = mip6_xfrm_addr,
366};
367
368static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
369{
370 struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data;
371
372 if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) &&
373 !ipv6_addr_any((struct in6_addr *)x->coaddr))
374 return -ENOENT;
375
376 return rt2->rt_hdr.nexthdr;
377}
378
379/* Routing Header type 2 is inserted.
380 * IP Header's dst address is replaced with Routing Header's Home Address.
381 */
382static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb)
383{
384 struct ipv6hdr *iph;
385 struct rt2_hdr *rt2;
386 u8 nexthdr;
387
388 iph = (struct ipv6hdr *)skb->data;
389 iph->payload_len = htons(skb->len - sizeof(*iph));
390
391 nexthdr = *skb->nh.raw;
392 *skb->nh.raw = IPPROTO_ROUTING;
393
394 rt2 = (struct rt2_hdr *)skb->h.raw;
395 rt2->rt_hdr.nexthdr = nexthdr;
396 rt2->rt_hdr.hdrlen = (x->props.header_len >> 3) - 1;
397 rt2->rt_hdr.type = IPV6_SRCRT_TYPE_2;
398 rt2->rt_hdr.segments_left = 1;
399 memset(&rt2->reserved, 0, sizeof(rt2->reserved));
400
401 BUG_TRAP(rt2->rt_hdr.hdrlen == 2);
402
403 memcpy(&rt2->addr, &iph->daddr, sizeof(rt2->addr));
404 memcpy(&iph->daddr, x->coaddr, sizeof(iph->daddr));
405
406 return 0;
407}
408
409static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
410 u8 **nexthdr)
411{
412 u16 offset = sizeof(struct ipv6hdr);
413 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
414 unsigned int packet_len = skb->tail - skb->nh.raw;
415 int found_rhdr = 0;
416
417 *nexthdr = &skb->nh.ipv6h->nexthdr;
418
419 while (offset + 1 <= packet_len) {
420
421 switch (**nexthdr) {
422 case NEXTHDR_HOP:
423 break;
424 case NEXTHDR_ROUTING:
425 if (offset + 3 <= packet_len) {
426 struct ipv6_rt_hdr *rt;
427 rt = (struct ipv6_rt_hdr *)(skb->nh.raw + offset);
428 if (rt->type != 0)
429 return offset;
430 }
431 found_rhdr = 1;
432 break;
433 case NEXTHDR_DEST:
434 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
435 return offset;
436
437 if (found_rhdr)
438 return offset;
439
440 break;
441 default:
442 return offset;
443 }
444
445 offset += ipv6_optlen(exthdr);
446 *nexthdr = &exthdr->nexthdr;
447 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
448 }
449
450 return offset;
451}
452
453static int mip6_rthdr_init_state(struct xfrm_state *x)
454{
455 if (x->id.spi) {
456 printk(KERN_INFO "%s: spi is not 0: %u\n", __FUNCTION__,
457 x->id.spi);
458 return -EINVAL;
459 }
460 if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) {
461 printk(KERN_INFO "%s: state's mode is not %u: %u\n",
462 __FUNCTION__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
463 return -EINVAL;
464 }
465
466 x->props.header_len = sizeof(struct rt2_hdr);
467
468 return 0;
469}
470
471/*
472 * Do nothing about destroying since it has no specific operation for routing
473 * header type 2 unlike IPsec protocols.
474 */
475static void mip6_rthdr_destroy(struct xfrm_state *x)
476{
477}
478
479static struct xfrm_type mip6_rthdr_type =
480{
481 .description = "MIP6RT",
482 .owner = THIS_MODULE,
483 .proto = IPPROTO_ROUTING,
484 .flags = XFRM_TYPE_NON_FRAGMENT,
485 .init_state = mip6_rthdr_init_state,
486 .destructor = mip6_rthdr_destroy,
487 .input = mip6_rthdr_input,
488 .output = mip6_rthdr_output,
489 .hdr_offset = mip6_rthdr_offset,
490 .remote_addr = mip6_xfrm_addr,
491};
492
493int __init mip6_init(void)
494{
495 printk(KERN_INFO "Mobile IPv6\n");
496
497 if (xfrm_register_type(&mip6_destopt_type, AF_INET6) < 0) {
498 printk(KERN_INFO "%s: can't add xfrm type(destopt)\n", __FUNCTION__);
499 goto mip6_destopt_xfrm_fail;
500 }
501 if (xfrm_register_type(&mip6_rthdr_type, AF_INET6) < 0) {
502 printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __FUNCTION__);
503 goto mip6_rthdr_xfrm_fail;
504 }
505 return 0;
506
507 mip6_rthdr_xfrm_fail:
508 xfrm_unregister_type(&mip6_destopt_type, AF_INET6);
509 mip6_destopt_xfrm_fail:
510 return -EAGAIN;
511}
512
513void __exit mip6_fini(void)
514{
515 if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0)
516 printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __FUNCTION__);
517 if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0)
518 printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __FUNCTION__);
519}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b50055b9278d..0304b5fe8d6a 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -62,6 +62,7 @@
62#include <linux/sysctl.h> 62#include <linux/sysctl.h>
63#endif 63#endif
64 64
65#include <linux/if_addr.h>
65#include <linux/if_arp.h> 66#include <linux/if_arp.h>
66#include <linux/ipv6.h> 67#include <linux/ipv6.h>
67#include <linux/icmpv6.h> 68#include <linux/icmpv6.h>
@@ -411,7 +412,8 @@ static void pndisc_destructor(struct pneigh_entry *n)
411 */ 412 */
412 413
413static inline void ndisc_flow_init(struct flowi *fl, u8 type, 414static inline void ndisc_flow_init(struct flowi *fl, u8 type,
414 struct in6_addr *saddr, struct in6_addr *daddr) 415 struct in6_addr *saddr, struct in6_addr *daddr,
416 int oif)
415{ 417{
416 memset(fl, 0, sizeof(*fl)); 418 memset(fl, 0, sizeof(*fl));
417 ipv6_addr_copy(&fl->fl6_src, saddr); 419 ipv6_addr_copy(&fl->fl6_src, saddr);
@@ -419,6 +421,8 @@ static inline void ndisc_flow_init(struct flowi *fl, u8 type,
419 fl->proto = IPPROTO_ICMPV6; 421 fl->proto = IPPROTO_ICMPV6;
420 fl->fl_icmp_type = type; 422 fl->fl_icmp_type = type;
421 fl->fl_icmp_code = 0; 423 fl->fl_icmp_code = 0;
424 fl->oif = oif;
425 security_sk_classify_flow(ndisc_socket->sk, fl);
422} 426}
423 427
424static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, 428static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
@@ -450,7 +454,8 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
450 src_addr = &tmpaddr; 454 src_addr = &tmpaddr;
451 } 455 }
452 456
453 ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr); 457 ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr,
458 dev->ifindex);
454 459
455 dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output); 460 dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
456 if (!dst) 461 if (!dst)
@@ -491,7 +496,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
491 msg->icmph.icmp6_unused = 0; 496 msg->icmph.icmp6_unused = 0;
492 msg->icmph.icmp6_router = router; 497 msg->icmph.icmp6_router = router;
493 msg->icmph.icmp6_solicited = solicited; 498 msg->icmph.icmp6_solicited = solicited;
494 msg->icmph.icmp6_override = !!override; 499 msg->icmph.icmp6_override = override;
495 500
496 /* Set the target address. */ 501 /* Set the target address. */
497 ipv6_addr_copy(&msg->target, solicited_addr); 502 ipv6_addr_copy(&msg->target, solicited_addr);
@@ -540,7 +545,8 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
540 saddr = &addr_buf; 545 saddr = &addr_buf;
541 } 546 }
542 547
543 ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr); 548 ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr,
549 dev->ifindex);
544 550
545 dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output); 551 dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
546 if (!dst) 552 if (!dst)
@@ -615,7 +621,8 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
615 int len; 621 int len;
616 int err; 622 int err;
617 623
618 ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr); 624 ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr,
625 dev->ifindex);
619 626
620 dst = ndisc_dst_alloc(dev, NULL, daddr, ip6_output); 627 dst = ndisc_dst_alloc(dev, NULL, daddr, ip6_output);
621 if (!dst) 628 if (!dst)
@@ -729,8 +736,10 @@ static void ndisc_recv_ns(struct sk_buff *skb)
729 struct inet6_ifaddr *ifp; 736 struct inet6_ifaddr *ifp;
730 struct inet6_dev *idev = NULL; 737 struct inet6_dev *idev = NULL;
731 struct neighbour *neigh; 738 struct neighbour *neigh;
739 struct pneigh_entry *pneigh = NULL;
732 int dad = ipv6_addr_any(saddr); 740 int dad = ipv6_addr_any(saddr);
733 int inc; 741 int inc;
742 int is_router;
734 743
735 if (ipv6_addr_is_multicast(&msg->target)) { 744 if (ipv6_addr_is_multicast(&msg->target)) {
736 ND_PRINTK2(KERN_WARNING 745 ND_PRINTK2(KERN_WARNING
@@ -815,7 +824,9 @@ static void ndisc_recv_ns(struct sk_buff *skb)
815 824
816 if (ipv6_chk_acast_addr(dev, &msg->target) || 825 if (ipv6_chk_acast_addr(dev, &msg->target) ||
817 (idev->cnf.forwarding && 826 (idev->cnf.forwarding &&
818 pneigh_lookup(&nd_tbl, &msg->target, dev, 0))) { 827 (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) &&
828 (pneigh = pneigh_lookup(&nd_tbl,
829 &msg->target, dev, 0)) != NULL)) {
819 if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && 830 if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
820 skb->pkt_type != PACKET_HOST && 831 skb->pkt_type != PACKET_HOST &&
821 inc != 0 && 832 inc != 0 &&
@@ -836,12 +847,14 @@ static void ndisc_recv_ns(struct sk_buff *skb)
836 goto out; 847 goto out;
837 } 848 }
838 849
850 is_router = !!(pneigh ? pneigh->flags & NTF_ROUTER : idev->cnf.forwarding);
851
839 if (dad) { 852 if (dad) {
840 struct in6_addr maddr; 853 struct in6_addr maddr;
841 854
842 ipv6_addr_all_nodes(&maddr); 855 ipv6_addr_all_nodes(&maddr);
843 ndisc_send_na(dev, NULL, &maddr, &msg->target, 856 ndisc_send_na(dev, NULL, &maddr, &msg->target,
844 idev->cnf.forwarding, 0, (ifp != NULL), 1); 857 is_router, 0, (ifp != NULL), 1);
845 goto out; 858 goto out;
846 } 859 }
847 860
@@ -862,7 +875,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
862 NEIGH_UPDATE_F_OVERRIDE); 875 NEIGH_UPDATE_F_OVERRIDE);
863 if (neigh || !dev->hard_header) { 876 if (neigh || !dev->hard_header) {
864 ndisc_send_na(dev, neigh, saddr, &msg->target, 877 ndisc_send_na(dev, neigh, saddr, &msg->target,
865 idev->cnf.forwarding, 878 is_router,
866 1, (ifp != NULL && inc), inc); 879 1, (ifp != NULL && inc), inc);
867 if (neigh) 880 if (neigh)
868 neigh_release(neigh); 881 neigh_release(neigh);
@@ -945,6 +958,20 @@ static void ndisc_recv_na(struct sk_buff *skb)
945 if (neigh->nud_state & NUD_FAILED) 958 if (neigh->nud_state & NUD_FAILED)
946 goto out; 959 goto out;
947 960
961 /*
962 * Don't update the neighbor cache entry on a proxy NA from
963 * ourselves because either the proxied node is off link or it
964 * has already sent a NA to us.
965 */
966 if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
967 ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp &&
968 pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) {
969 /* XXX: idev->cnf.prixy_ndp */
970 WARN_ON(skb->dst != NULL &&
971 ((struct rt6_info *)skb->dst)->rt6i_idev);
972 goto out;
973 }
974
948 neigh_update(neigh, lladdr, 975 neigh_update(neigh, lladdr,
949 msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE, 976 msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
950 NEIGH_UPDATE_F_WEAK_OVERRIDE| 977 NEIGH_UPDATE_F_WEAK_OVERRIDE|
@@ -959,7 +986,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
959 struct rt6_info *rt; 986 struct rt6_info *rt;
960 rt = rt6_get_dflt_router(saddr, dev); 987 rt = rt6_get_dflt_router(saddr, dev);
961 if (rt) 988 if (rt)
962 ip6_del_rt(rt, NULL, NULL, NULL); 989 ip6_del_rt(rt);
963 } 990 }
964 991
965out: 992out:
@@ -1112,7 +1139,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1112 1139
1113 if (rt && lifetime == 0) { 1140 if (rt && lifetime == 0) {
1114 neigh_clone(neigh); 1141 neigh_clone(neigh);
1115 ip6_del_rt(rt, NULL, NULL, NULL); 1142 ip6_del_rt(rt);
1116 rt = NULL; 1143 rt = NULL;
1117 } 1144 }
1118 1145
@@ -1344,7 +1371,8 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
1344 1371
1345 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); 1372 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1346 if (neigh) { 1373 if (neigh) {
1347 rt6_redirect(dest, &skb->nh.ipv6h->saddr, neigh, lladdr, 1374 rt6_redirect(dest, &skb->nh.ipv6h->daddr,
1375 &skb->nh.ipv6h->saddr, neigh, lladdr,
1348 on_link); 1376 on_link);
1349 neigh_release(neigh); 1377 neigh_release(neigh);
1350 } 1378 }
@@ -1380,7 +1408,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1380 return; 1408 return;
1381 } 1409 }
1382 1410
1383 ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr); 1411 ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr,
1412 dev->ifindex);
1384 1413
1385 dst = ip6_route_output(NULL, &fl); 1414 dst = ip6_route_output(NULL, &fl);
1386 if (dst == NULL) 1415 if (dst == NULL)
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 395a417ba955..580b1aba6722 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -87,7 +87,7 @@ unsigned int nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
87 unsigned int csum = 0; 87 unsigned int csum = 0;
88 88
89 switch (skb->ip_summed) { 89 switch (skb->ip_summed) {
90 case CHECKSUM_HW: 90 case CHECKSUM_COMPLETE:
91 if (hook != NF_IP6_PRE_ROUTING && hook != NF_IP6_LOCAL_IN) 91 if (hook != NF_IP6_PRE_ROUTING && hook != NF_IP6_LOCAL_IN)
92 break; 92 break;
93 if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, 93 if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index eeeb57d4c9c5..ac1dfebde175 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -5,7 +5,7 @@
5# Link order matters here. 5# Link order matters here.
6obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o 6obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o
7obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o 7obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
8obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o ip6t_dst.o 8obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o
9obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o 9obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o
10obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o 10obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o
11obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o 11obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 968a14be0d05..9510c24ca8d2 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -56,15 +56,15 @@ struct ipq_queue_entry {
56 56
57typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); 57typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
58 58
59static unsigned char copy_mode = IPQ_COPY_NONE; 59static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
60static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT; 60static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
61static DEFINE_RWLOCK(queue_lock); 61static DEFINE_RWLOCK(queue_lock);
62static int peer_pid; 62static int peer_pid __read_mostly;
63static unsigned int copy_range; 63static unsigned int copy_range __read_mostly;
64static unsigned int queue_total; 64static unsigned int queue_total;
65static unsigned int queue_dropped = 0; 65static unsigned int queue_dropped = 0;
66static unsigned int queue_user_dropped = 0; 66static unsigned int queue_user_dropped = 0;
67static struct sock *ipqnl; 67static struct sock *ipqnl __read_mostly;
68static LIST_HEAD(queue_list); 68static LIST_HEAD(queue_list);
69static DEFINE_MUTEX(ipqnl_mutex); 69static DEFINE_MUTEX(ipqnl_mutex);
70 70
@@ -206,9 +206,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
206 break; 206 break;
207 207
208 case IPQ_COPY_PACKET: 208 case IPQ_COPY_PACKET:
209 if (entry->skb->ip_summed == CHECKSUM_HW && 209 if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
210 (*errp = skb_checksum_help(entry->skb, 210 entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
211 entry->info->outdev == NULL))) { 211 (*errp = skb_checksum_help(entry->skb))) {
212 read_unlock_bh(&queue_lock); 212 read_unlock_bh(&queue_lock);
213 return NULL; 213 return NULL;
214 } 214 }
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index c9d6b23cd3f7..4ab368fa0b8f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -70,9 +70,6 @@ do { \
70#define IP_NF_ASSERT(x) 70#define IP_NF_ASSERT(x)
71#endif 71#endif
72 72
73
74#include <linux/netfilter_ipv4/listhelp.h>
75
76#if 0 73#if 0
77/* All the better to debug you with... */ 74/* All the better to debug you with... */
78#define static 75#define static
@@ -220,8 +217,7 @@ ip6t_error(struct sk_buff **pskb,
220 const struct net_device *out, 217 const struct net_device *out,
221 unsigned int hooknum, 218 unsigned int hooknum,
222 const struct xt_target *target, 219 const struct xt_target *target,
223 const void *targinfo, 220 const void *targinfo)
224 void *userinfo)
225{ 221{
226 if (net_ratelimit()) 222 if (net_ratelimit())
227 printk("ip6_tables: error: `%s'\n", (char *)targinfo); 223 printk("ip6_tables: error: `%s'\n", (char *)targinfo);
@@ -258,8 +254,7 @@ ip6t_do_table(struct sk_buff **pskb,
258 unsigned int hook, 254 unsigned int hook,
259 const struct net_device *in, 255 const struct net_device *in,
260 const struct net_device *out, 256 const struct net_device *out,
261 struct xt_table *table, 257 struct xt_table *table)
262 void *userdata)
263{ 258{
264 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); 259 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
265 int offset = 0; 260 int offset = 0;
@@ -349,8 +344,7 @@ ip6t_do_table(struct sk_buff **pskb,
349 in, out, 344 in, out,
350 hook, 345 hook,
351 t->u.kernel.target, 346 t->u.kernel.target,
352 t->data, 347 t->data);
353 userdata);
354 348
355#ifdef CONFIG_NETFILTER_DEBUG 349#ifdef CONFIG_NETFILTER_DEBUG
356 if (((struct ip6t_entry *)table_base)->comefrom 350 if (((struct ip6t_entry *)table_base)->comefrom
@@ -507,8 +501,7 @@ cleanup_match(struct ip6t_entry_match *m, unsigned int *i)
507 return 1; 501 return 1;
508 502
509 if (m->u.kernel.match->destroy) 503 if (m->u.kernel.match->destroy)
510 m->u.kernel.match->destroy(m->u.kernel.match, m->data, 504 m->u.kernel.match->destroy(m->u.kernel.match, m->data);
511 m->u.match_size - sizeof(*m));
512 module_put(m->u.kernel.match->me); 505 module_put(m->u.kernel.match->me);
513 return 0; 506 return 0;
514} 507}
@@ -561,7 +554,6 @@ check_match(struct ip6t_entry_match *m,
561 554
562 if (m->u.kernel.match->checkentry 555 if (m->u.kernel.match->checkentry
563 && !m->u.kernel.match->checkentry(name, ipv6, match, m->data, 556 && !m->u.kernel.match->checkentry(name, ipv6, match, m->data,
564 m->u.match_size - sizeof(*m),
565 hookmask)) { 557 hookmask)) {
566 duprintf("ip_tables: check failed for `%s'.\n", 558 duprintf("ip_tables: check failed for `%s'.\n",
567 m->u.kernel.match->name); 559 m->u.kernel.match->name);
@@ -618,12 +610,10 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
618 if (t->u.kernel.target == &ip6t_standard_target) { 610 if (t->u.kernel.target == &ip6t_standard_target) {
619 if (!standard_check(t, size)) { 611 if (!standard_check(t, size)) {
620 ret = -EINVAL; 612 ret = -EINVAL;
621 goto cleanup_matches; 613 goto err;
622 } 614 }
623 } else if (t->u.kernel.target->checkentry 615 } else if (t->u.kernel.target->checkentry
624 && !t->u.kernel.target->checkentry(name, e, target, t->data, 616 && !t->u.kernel.target->checkentry(name, e, target, t->data,
625 t->u.target_size
626 - sizeof(*t),
627 e->comefrom)) { 617 e->comefrom)) {
628 duprintf("ip_tables: check failed for `%s'.\n", 618 duprintf("ip_tables: check failed for `%s'.\n",
629 t->u.kernel.target->name); 619 t->u.kernel.target->name);
@@ -695,8 +685,7 @@ cleanup_entry(struct ip6t_entry *e, unsigned int *i)
695 IP6T_MATCH_ITERATE(e, cleanup_match, NULL); 685 IP6T_MATCH_ITERATE(e, cleanup_match, NULL);
696 t = ip6t_get_target(e); 686 t = ip6t_get_target(e);
697 if (t->u.kernel.target->destroy) 687 if (t->u.kernel.target->destroy)
698 t->u.kernel.target->destroy(t->u.kernel.target, t->data, 688 t->u.kernel.target->destroy(t->u.kernel.target, t->data);
699 t->u.target_size - sizeof(*t));
700 module_put(t->u.kernel.target->me); 689 module_put(t->u.kernel.target->me);
701 return 0; 690 return 0;
702} 691}
@@ -1352,7 +1341,6 @@ icmp6_checkentry(const char *tablename,
1352 const void *entry, 1341 const void *entry,
1353 const struct xt_match *match, 1342 const struct xt_match *match,
1354 void *matchinfo, 1343 void *matchinfo,
1355 unsigned int matchsize,
1356 unsigned int hook_mask) 1344 unsigned int hook_mask)
1357{ 1345{
1358 const struct ip6t_icmp *icmpinfo = matchinfo; 1346 const struct ip6t_icmp *icmpinfo = matchinfo;
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index b8eff8ee69b1..435750f664dd 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -22,11 +22,10 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb,
22 const struct net_device *out, 22 const struct net_device *out,
23 unsigned int hooknum, 23 unsigned int hooknum,
24 const struct xt_target *target, 24 const struct xt_target *target,
25 const void *targinfo, void *userinfo) 25 const void *targinfo)
26{ 26{
27 struct ipv6hdr *ip6h; 27 struct ipv6hdr *ip6h;
28 const struct ip6t_HL_info *info = targinfo; 28 const struct ip6t_HL_info *info = targinfo;
29 u_int16_t diffs[2];
30 int new_hl; 29 int new_hl;
31 30
32 if (!skb_make_writable(pskb, (*pskb)->len)) 31 if (!skb_make_writable(pskb, (*pskb)->len))
@@ -53,11 +52,8 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb,
53 break; 52 break;
54 } 53 }
55 54
56 if (new_hl != ip6h->hop_limit) { 55 if (new_hl != ip6h->hop_limit)
57 diffs[0] = htons(((unsigned)ip6h->hop_limit) << 8) ^ 0xFFFF;
58 ip6h->hop_limit = new_hl; 56 ip6h->hop_limit = new_hl;
59 diffs[1] = htons(((unsigned)ip6h->hop_limit) << 8);
60 }
61 57
62 return IP6T_CONTINUE; 58 return IP6T_CONTINUE;
63} 59}
@@ -66,7 +62,6 @@ static int ip6t_hl_checkentry(const char *tablename,
66 const void *entry, 62 const void *entry,
67 const struct xt_target *target, 63 const struct xt_target *target,
68 void *targinfo, 64 void *targinfo,
69 unsigned int targinfosize,
70 unsigned int hook_mask) 65 unsigned int hook_mask)
71{ 66{
72 struct ip6t_HL_info *info = targinfo; 67 struct ip6t_HL_info *info = targinfo;
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 73c6300109d6..0cf537d30185 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -427,8 +427,7 @@ ip6t_log_target(struct sk_buff **pskb,
427 const struct net_device *out, 427 const struct net_device *out,
428 unsigned int hooknum, 428 unsigned int hooknum,
429 const struct xt_target *target, 429 const struct xt_target *target,
430 const void *targinfo, 430 const void *targinfo)
431 void *userinfo)
432{ 431{
433 const struct ip6t_log_info *loginfo = targinfo; 432 const struct ip6t_log_info *loginfo = targinfo;
434 struct nf_loginfo li; 433 struct nf_loginfo li;
@@ -452,7 +451,6 @@ static int ip6t_log_checkentry(const char *tablename,
452 const void *entry, 451 const void *entry,
453 const struct xt_target *target, 452 const struct xt_target *target,
454 void *targinfo, 453 void *targinfo,
455 unsigned int targinfosize,
456 unsigned int hook_mask) 454 unsigned int hook_mask)
457{ 455{
458 const struct ip6t_log_info *loginfo = targinfo; 456 const struct ip6t_log_info *loginfo = targinfo;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 8629ba195d2d..311eae82feb3 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -96,6 +96,7 @@ static void send_reset(struct sk_buff *oldskb)
96 ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); 96 ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr);
97 fl.fl_ip_sport = otcph.dest; 97 fl.fl_ip_sport = otcph.dest;
98 fl.fl_ip_dport = otcph.source; 98 fl.fl_ip_dport = otcph.source;
99 security_skb_classify_flow(oldskb, &fl);
99 dst = ip6_route_output(NULL, &fl); 100 dst = ip6_route_output(NULL, &fl);
100 if (dst == NULL) 101 if (dst == NULL)
101 return; 102 return;
@@ -179,8 +180,7 @@ static unsigned int reject6_target(struct sk_buff **pskb,
179 const struct net_device *out, 180 const struct net_device *out,
180 unsigned int hooknum, 181 unsigned int hooknum,
181 const struct xt_target *target, 182 const struct xt_target *target,
182 const void *targinfo, 183 const void *targinfo)
183 void *userinfo)
184{ 184{
185 const struct ip6t_reject_info *reject = targinfo; 185 const struct ip6t_reject_info *reject = targinfo;
186 186
@@ -223,7 +223,6 @@ static int check(const char *tablename,
223 const void *entry, 223 const void *entry,
224 const struct xt_target *target, 224 const struct xt_target *target,
225 void *targinfo, 225 void *targinfo,
226 unsigned int targinfosize,
227 unsigned int hook_mask) 226 unsigned int hook_mask)
228{ 227{
229 const struct ip6t_reject_info *rejinfo = targinfo; 228 const struct ip6t_reject_info *rejinfo = targinfo;
@@ -256,9 +255,7 @@ static struct ip6t_target ip6t_reject_reg = {
256 255
257static int __init ip6t_reject_init(void) 256static int __init ip6t_reject_init(void)
258{ 257{
259 if (ip6t_register_target(&ip6t_reject_reg)) 258 return ip6t_register_target(&ip6t_reject_reg);
260 return -EINVAL;
261 return 0;
262} 259}
263 260
264static void __exit ip6t_reject_fini(void) 261static void __exit ip6t_reject_fini(void)
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 2f7bb20c758b..ec1b1608156c 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -102,7 +102,6 @@ checkentry(const char *tablename,
102 const void *entry, 102 const void *entry,
103 const struct xt_match *match, 103 const struct xt_match *match,
104 void *matchinfo, 104 void *matchinfo,
105 unsigned int matchinfosize,
106 unsigned int hook_mask) 105 unsigned int hook_mask)
107{ 106{
108 const struct ip6t_ah *ahinfo = matchinfo; 107 const struct ip6t_ah *ahinfo = matchinfo;
diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c
deleted file mode 100644
index 9422413d0571..000000000000
--- a/net/ipv6/netfilter/ip6t_dst.c
+++ /dev/null
@@ -1,220 +0,0 @@
1/* Kernel module to match Hop-by-Hop and Destination parameters. */
2
3/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/module.h>
11#include <linux/skbuff.h>
12#include <linux/ipv6.h>
13#include <linux/types.h>
14#include <net/checksum.h>
15#include <net/ipv6.h>
16
17#include <asm/byteorder.h>
18
19#include <linux/netfilter_ipv6/ip6_tables.h>
20#include <linux/netfilter_ipv6/ip6t_opts.h>
21
22#define HOPBYHOP 0
23
24MODULE_LICENSE("GPL");
25#if HOPBYHOP
26MODULE_DESCRIPTION("IPv6 HbH match");
27#else
28MODULE_DESCRIPTION("IPv6 DST match");
29#endif
30MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
31
32#if 0
33#define DEBUGP printk
34#else
35#define DEBUGP(format, args...)
36#endif
37
38/*
39 * (Type & 0xC0) >> 6
40 * 0 -> ignorable
41 * 1 -> must drop the packet
42 * 2 -> send ICMP PARM PROB regardless and drop packet
43 * 3 -> Send ICMP if not a multicast address and drop packet
44 * (Type & 0x20) >> 5
45 * 0 -> invariant
46 * 1 -> can change the routing
47 * (Type & 0x1F) Type
48 * 0 -> Pad1 (only 1 byte!)
49 * 1 -> PadN LENGTH info (total length = length + 2)
50 * C0 | 2 -> JUMBO 4 x x x x ( xxxx > 64k )
51 * 5 -> RTALERT 2 x x
52 */
53
54static int
55match(const struct sk_buff *skb,
56 const struct net_device *in,
57 const struct net_device *out,
58 const struct xt_match *match,
59 const void *matchinfo,
60 int offset,
61 unsigned int protoff,
62 int *hotdrop)
63{
64 struct ipv6_opt_hdr _optsh, *oh;
65 const struct ip6t_opts *optinfo = matchinfo;
66 unsigned int temp;
67 unsigned int ptr;
68 unsigned int hdrlen = 0;
69 unsigned int ret = 0;
70 u8 _opttype, *tp = NULL;
71 u8 _optlen, *lp = NULL;
72 unsigned int optlen;
73
74#if HOPBYHOP
75 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0)
76#else
77 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0)
78#endif
79 return 0;
80
81 oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
82 if (oh == NULL) {
83 *hotdrop = 1;
84 return 0;
85 }
86
87 hdrlen = ipv6_optlen(oh);
88 if (skb->len - ptr < hdrlen) {
89 /* Packet smaller than it's length field */
90 return 0;
91 }
92
93 DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
94
95 DEBUGP("len %02X %04X %02X ",
96 optinfo->hdrlen, hdrlen,
97 (!(optinfo->flags & IP6T_OPTS_LEN) ||
98 ((optinfo->hdrlen == hdrlen) ^
99 !!(optinfo->invflags & IP6T_OPTS_INV_LEN))));
100
101 ret = (oh != NULL) &&
102 (!(optinfo->flags & IP6T_OPTS_LEN) ||
103 ((optinfo->hdrlen == hdrlen) ^
104 !!(optinfo->invflags & IP6T_OPTS_INV_LEN)));
105
106 ptr += 2;
107 hdrlen -= 2;
108 if (!(optinfo->flags & IP6T_OPTS_OPTS)) {
109 return ret;
110 } else if (optinfo->flags & IP6T_OPTS_NSTRICT) {
111 DEBUGP("Not strict - not implemented");
112 } else {
113 DEBUGP("Strict ");
114 DEBUGP("#%d ", optinfo->optsnr);
115 for (temp = 0; temp < optinfo->optsnr; temp++) {
116 /* type field exists ? */
117 if (hdrlen < 1)
118 break;
119 tp = skb_header_pointer(skb, ptr, sizeof(_opttype),
120 &_opttype);
121 if (tp == NULL)
122 break;
123
124 /* Type check */
125 if (*tp != (optinfo->opts[temp] & 0xFF00) >> 8) {
126 DEBUGP("Tbad %02X %02X\n",
127 *tp,
128 (optinfo->opts[temp] & 0xFF00) >> 8);
129 return 0;
130 } else {
131 DEBUGP("Tok ");
132 }
133 /* Length check */
134 if (*tp) {
135 u16 spec_len;
136
137 /* length field exists ? */
138 if (hdrlen < 2)
139 break;
140 lp = skb_header_pointer(skb, ptr + 1,
141 sizeof(_optlen),
142 &_optlen);
143 if (lp == NULL)
144 break;
145 spec_len = optinfo->opts[temp] & 0x00FF;
146
147 if (spec_len != 0x00FF && spec_len != *lp) {
148 DEBUGP("Lbad %02X %04X\n", *lp,
149 spec_len);
150 return 0;
151 }
152 DEBUGP("Lok ");
153 optlen = *lp + 2;
154 } else {
155 DEBUGP("Pad1\n");
156 optlen = 1;
157 }
158
159 /* Step to the next */
160 DEBUGP("len%04X \n", optlen);
161
162 if ((ptr > skb->len - optlen || hdrlen < optlen) &&
163 (temp < optinfo->optsnr - 1)) {
164 DEBUGP("new pointer is too large! \n");
165 break;
166 }
167 ptr += optlen;
168 hdrlen -= optlen;
169 }
170 if (temp == optinfo->optsnr)
171 return ret;
172 else
173 return 0;
174 }
175
176 return 0;
177}
178
179/* Called when user tries to insert an entry of this type. */
180static int
181checkentry(const char *tablename,
182 const void *info,
183 const struct xt_match *match,
184 void *matchinfo,
185 unsigned int matchinfosize,
186 unsigned int hook_mask)
187{
188 const struct ip6t_opts *optsinfo = matchinfo;
189
190 if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
191 DEBUGP("ip6t_opts: unknown flags %X\n", optsinfo->invflags);
192 return 0;
193 }
194 return 1;
195}
196
197static struct ip6t_match opts_match = {
198#if HOPBYHOP
199 .name = "hbh",
200#else
201 .name = "dst",
202#endif
203 .match = match,
204 .matchsize = sizeof(struct ip6t_opts),
205 .checkentry = checkentry,
206 .me = THIS_MODULE,
207};
208
209static int __init ip6t_dst_init(void)
210{
211 return ip6t_register_match(&opts_match);
212}
213
214static void __exit ip6t_dst_fini(void)
215{
216 ip6t_unregister_match(&opts_match);
217}
218
219module_init(ip6t_dst_init);
220module_exit(ip6t_dst_fini);
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 06768c84bd31..78d9c8b9e28a 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -119,7 +119,6 @@ checkentry(const char *tablename,
119 const void *ip, 119 const void *ip,
120 const struct xt_match *match, 120 const struct xt_match *match,
121 void *matchinfo, 121 void *matchinfo,
122 unsigned int matchinfosize,
123 unsigned int hook_mask) 122 unsigned int hook_mask)
124{ 123{
125 const struct ip6t_frag *fraginfo = matchinfo; 124 const struct ip6t_frag *fraginfo = matchinfo;
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 374f1be85c0d..d32a205e3af2 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -19,15 +19,10 @@
19#include <linux/netfilter_ipv6/ip6_tables.h> 19#include <linux/netfilter_ipv6/ip6_tables.h>
20#include <linux/netfilter_ipv6/ip6t_opts.h> 20#include <linux/netfilter_ipv6/ip6t_opts.h>
21 21
22#define HOPBYHOP 1
23
24MODULE_LICENSE("GPL"); 22MODULE_LICENSE("GPL");
25#if HOPBYHOP 23MODULE_DESCRIPTION("IPv6 opts match");
26MODULE_DESCRIPTION("IPv6 HbH match");
27#else
28MODULE_DESCRIPTION("IPv6 DST match");
29#endif
30MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); 24MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
25MODULE_ALIAS("ip6t_dst");
31 26
32#if 0 27#if 0
33#define DEBUGP printk 28#define DEBUGP printk
@@ -71,11 +66,7 @@ match(const struct sk_buff *skb,
71 u8 _optlen, *lp = NULL; 66 u8 _optlen, *lp = NULL;
72 unsigned int optlen; 67 unsigned int optlen;
73 68
74#if HOPBYHOP 69 if (ipv6_find_hdr(skb, &ptr, match->data, NULL) < 0)
75 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0)
76#else
77 if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0)
78#endif
79 return 0; 70 return 0;
80 71
81 oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); 72 oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
@@ -182,7 +173,6 @@ checkentry(const char *tablename,
182 const void *entry, 173 const void *entry,
183 const struct xt_match *match, 174 const struct xt_match *match,
184 void *matchinfo, 175 void *matchinfo,
185 unsigned int matchinfosize,
186 unsigned int hook_mask) 176 unsigned int hook_mask)
187{ 177{
188 const struct ip6t_opts *optsinfo = matchinfo; 178 const struct ip6t_opts *optsinfo = matchinfo;
@@ -194,26 +184,35 @@ checkentry(const char *tablename,
194 return 1; 184 return 1;
195} 185}
196 186
197static struct ip6t_match opts_match = { 187static struct xt_match opts_match[] = {
198#if HOPBYHOP 188 {
199 .name = "hbh", 189 .name = "hbh",
200#else 190 .family = AF_INET6,
201 .name = "dst", 191 .match = match,
202#endif 192 .matchsize = sizeof(struct ip6t_opts),
203 .match = match, 193 .checkentry = checkentry,
204 .matchsize = sizeof(struct ip6t_opts), 194 .me = THIS_MODULE,
205 .checkentry = checkentry, 195 .data = NEXTHDR_HOP,
206 .me = THIS_MODULE, 196 },
197 {
198 .name = "dst",
199 .family = AF_INET6,
200 .match = match,
201 .matchsize = sizeof(struct ip6t_opts),
202 .checkentry = checkentry,
203 .me = THIS_MODULE,
204 .data = NEXTHDR_DEST,
205 },
207}; 206};
208 207
209static int __init ip6t_hbh_init(void) 208static int __init ip6t_hbh_init(void)
210{ 209{
211 return ip6t_register_match(&opts_match); 210 return xt_register_matches(opts_match, ARRAY_SIZE(opts_match));
212} 211}
213 212
214static void __exit ip6t_hbh_fini(void) 213static void __exit ip6t_hbh_fini(void)
215{ 214{
216 ip6t_unregister_match(&opts_match); 215 xt_unregister_matches(opts_match, ARRAY_SIZE(opts_match));
217} 216}
218 217
219module_init(ip6t_hbh_init); 218module_init(ip6t_hbh_init);
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 9375eeb1369f..3093c398002f 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -128,7 +128,6 @@ ipv6header_checkentry(const char *tablename,
128 const void *ip, 128 const void *ip,
129 const struct xt_match *match, 129 const struct xt_match *match,
130 void *matchinfo, 130 void *matchinfo,
131 unsigned int matchsize,
132 unsigned int hook_mask) 131 unsigned int hook_mask)
133{ 132{
134 const struct ip6t_ipv6header_info *info = matchinfo; 133 const struct ip6t_ipv6header_info *info = matchinfo;
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c
index 5d047990cd44..4eb9bbc4ebc3 100644
--- a/net/ipv6/netfilter/ip6t_owner.c
+++ b/net/ipv6/netfilter/ip6t_owner.c
@@ -57,7 +57,6 @@ checkentry(const char *tablename,
57 const void *ip, 57 const void *ip,
58 const struct xt_match *match, 58 const struct xt_match *match,
59 void *matchinfo, 59 void *matchinfo,
60 unsigned int matchsize,
61 unsigned int hook_mask) 60 unsigned int hook_mask)
62{ 61{
63 const struct ip6t_owner_info *info = matchinfo; 62 const struct ip6t_owner_info *info = matchinfo;
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index fbb0184a41d8..bcb2e168a5bc 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -197,7 +197,6 @@ checkentry(const char *tablename,
197 const void *entry, 197 const void *entry,
198 const struct xt_match *match, 198 const struct xt_match *match,
199 void *matchinfo, 199 void *matchinfo,
200 unsigned int matchinfosize,
201 unsigned int hook_mask) 200 unsigned int hook_mask)
202{ 201{
203 const struct ip6t_rt *rtinfo = matchinfo; 202 const struct ip6t_rt *rtinfo = matchinfo;
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 60976c0c58e8..2fc07c74decf 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -108,7 +108,7 @@ ip6t_hook(unsigned int hook,
108 const struct net_device *out, 108 const struct net_device *out,
109 int (*okfn)(struct sk_buff *)) 109 int (*okfn)(struct sk_buff *))
110{ 110{
111 return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL); 111 return ip6t_do_table(pskb, hook, in, out, &packet_filter);
112} 112}
113 113
114static unsigned int 114static unsigned int
@@ -128,7 +128,7 @@ ip6t_local_out_hook(unsigned int hook,
128 } 128 }
129#endif 129#endif
130 130
131 return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL); 131 return ip6t_do_table(pskb, hook, in, out, &packet_filter);
132} 132}
133 133
134static struct nf_hook_ops ip6t_ops[] = { 134static struct nf_hook_ops ip6t_ops[] = {
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 03a13eab1dae..386ea260e767 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -138,7 +138,7 @@ ip6t_route_hook(unsigned int hook,
138 const struct net_device *out, 138 const struct net_device *out,
139 int (*okfn)(struct sk_buff *)) 139 int (*okfn)(struct sk_buff *))
140{ 140{
141 return ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL); 141 return ip6t_do_table(pskb, hook, in, out, &packet_mangler);
142} 142}
143 143
144static unsigned int 144static unsigned int
@@ -174,18 +174,14 @@ ip6t_local_hook(unsigned int hook,
174 /* flowlabel and prio (includes version, which shouldn't change either */ 174 /* flowlabel and prio (includes version, which shouldn't change either */
175 flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h); 175 flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h);
176 176
177 ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL); 177 ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler);
178 178
179 if (ret != NF_DROP && ret != NF_STOLEN 179 if (ret != NF_DROP && ret != NF_STOLEN
180 && (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr)) 180 && (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr))
181 || memcmp(&(*pskb)->nh.ipv6h->daddr, &daddr, sizeof(daddr)) 181 || memcmp(&(*pskb)->nh.ipv6h->daddr, &daddr, sizeof(daddr))
182 || (*pskb)->nfmark != nfmark 182 || (*pskb)->nfmark != nfmark
183 || (*pskb)->nh.ipv6h->hop_limit != hop_limit)) { 183 || (*pskb)->nh.ipv6h->hop_limit != hop_limit))
184 184 return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP;
185 /* something which could affect routing has changed */
186
187 DEBUGP("ip6table_mangle: we'd need to re-route a packet\n");
188 }
189 185
190 return ret; 186 return ret;
191} 187}
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 61a7c58e99f8..b4154da575c0 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -122,7 +122,7 @@ ip6t_hook(unsigned int hook,
122 const struct net_device *out, 122 const struct net_device *out,
123 int (*okfn)(struct sk_buff *)) 123 int (*okfn)(struct sk_buff *))
124{ 124{
125 return ip6t_do_table(pskb, hook, in, out, &packet_raw, NULL); 125 return ip6t_do_table(pskb, hook, in, out, &packet_raw);
126} 126}
127 127
128static struct nf_hook_ops ip6t_ops[] = { 128static struct nf_hook_ops ip6t_ops[] = {
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index c2ab38ff46af..e5e53fff9e38 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -335,7 +335,7 @@ static struct nf_hook_ops ipv6_conntrack_ops[] = {
335/* From nf_conntrack_proto_icmpv6.c */ 335/* From nf_conntrack_proto_icmpv6.c */
336extern unsigned int nf_ct_icmpv6_timeout; 336extern unsigned int nf_ct_icmpv6_timeout;
337 337
338/* From nf_conntrack_frag6.c */ 338/* From nf_conntrack_reasm.c */
339extern unsigned int nf_ct_frag6_timeout; 339extern unsigned int nf_ct_frag6_timeout;
340extern unsigned int nf_ct_frag6_low_thresh; 340extern unsigned int nf_ct_frag6_low_thresh;
341extern unsigned int nf_ct_frag6_high_thresh; 341extern unsigned int nf_ct_frag6_high_thresh;
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index ef18a7b7014b..34d447208ffd 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -33,7 +33,7 @@
33#include <net/netfilter/nf_conntrack_core.h> 33#include <net/netfilter/nf_conntrack_core.h>
34#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> 34#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
35 35
36unsigned long nf_ct_icmpv6_timeout = 30*HZ; 36unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ;
37 37
38#if 0 38#if 0
39#define DEBUGP printk 39#define DEBUGP printk
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 00d5583807f7..bf93c1ea6be9 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -54,9 +54,9 @@
54#define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */ 54#define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */
55#define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT 55#define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT
56 56
57unsigned int nf_ct_frag6_high_thresh = 256*1024; 57unsigned int nf_ct_frag6_high_thresh __read_mostly = 256*1024;
58unsigned int nf_ct_frag6_low_thresh = 192*1024; 58unsigned int nf_ct_frag6_low_thresh __read_mostly = 192*1024;
59unsigned long nf_ct_frag6_timeout = IPV6_FRAG_TIMEOUT; 59unsigned long nf_ct_frag6_timeout __read_mostly = IPV6_FRAG_TIMEOUT;
60 60
61struct nf_ct_frag6_skb_cb 61struct nf_ct_frag6_skb_cb
62{ 62{
@@ -408,7 +408,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
408 return -1; 408 return -1;
409 } 409 }
410 410
411 if (skb->ip_summed == CHECKSUM_HW) 411 if (skb->ip_summed == CHECKSUM_COMPLETE)
412 skb->csum = csum_sub(skb->csum, 412 skb->csum = csum_sub(skb->csum,
413 csum_partial(skb->nh.raw, 413 csum_partial(skb->nh.raw,
414 (u8*)(fhdr + 1) - skb->nh.raw, 414 (u8*)(fhdr + 1) - skb->nh.raw,
@@ -640,7 +640,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
640 head->len += fp->len; 640 head->len += fp->len;
641 if (head->ip_summed != fp->ip_summed) 641 if (head->ip_summed != fp->ip_summed)
642 head->ip_summed = CHECKSUM_NONE; 642 head->ip_summed = CHECKSUM_NONE;
643 else if (head->ip_summed == CHECKSUM_HW) 643 else if (head->ip_summed == CHECKSUM_COMPLETE)
644 head->csum = csum_add(head->csum, fp->csum); 644 head->csum = csum_add(head->csum, fp->csum);
645 head->truesize += fp->truesize; 645 head->truesize += fp->truesize;
646 atomic_sub(fp->truesize, &nf_ct_frag6_mem); 646 atomic_sub(fp->truesize, &nf_ct_frag6_mem);
@@ -652,7 +652,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
652 head->nh.ipv6h->payload_len = htons(payload_len); 652 head->nh.ipv6h->payload_len = htons(payload_len);
653 653
654 /* Yes, and fold redundant checksum back. 8) */ 654 /* Yes, and fold redundant checksum back. 8) */
655 if (head->ip_summed == CHECKSUM_HW) 655 if (head->ip_summed == CHECKSUM_COMPLETE)
656 head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); 656 head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
657 657
658 fq->fragments = NULL; 658 fq->fragments = NULL;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 15b862d8acab..d09329ca3267 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -50,6 +50,9 @@
50#include <net/udp.h> 50#include <net/udp.h>
51#include <net/inet_common.h> 51#include <net/inet_common.h>
52#include <net/tcp_states.h> 52#include <net/tcp_states.h>
53#ifdef CONFIG_IPV6_MIP6
54#include <net/mip6.h>
55#endif
53 56
54#include <net/rawv6.h> 57#include <net/rawv6.h>
55#include <net/xfrm.h> 58#include <net/xfrm.h>
@@ -169,8 +172,32 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
169 sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); 172 sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif);
170 173
171 while (sk) { 174 while (sk) {
175 int filtered;
176
172 delivered = 1; 177 delivered = 1;
173 if (nexthdr != IPPROTO_ICMPV6 || !icmpv6_filter(sk, skb)) { 178 switch (nexthdr) {
179 case IPPROTO_ICMPV6:
180 filtered = icmpv6_filter(sk, skb);
181 break;
182#ifdef CONFIG_IPV6_MIP6
183 case IPPROTO_MH:
184 /* XXX: To validate MH only once for each packet,
185 * this is placed here. It should be after checking
186 * xfrm policy, however it doesn't. The checking xfrm
187 * policy is placed in rawv6_rcv() because it is
188 * required for each socket.
189 */
190 filtered = mip6_mh_filter(sk, skb);
191 break;
192#endif
193 default:
194 filtered = 0;
195 break;
196 }
197
198 if (filtered < 0)
199 break;
200 if (filtered == 0) {
174 struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); 201 struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
175 202
176 /* Not releasing hash table! */ 203 /* Not releasing hash table! */
@@ -334,7 +361,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
334 if (!rp->checksum) 361 if (!rp->checksum)
335 skb->ip_summed = CHECKSUM_UNNECESSARY; 362 skb->ip_summed = CHECKSUM_UNNECESSARY;
336 363
337 if (skb->ip_summed == CHECKSUM_HW) { 364 if (skb->ip_summed == CHECKSUM_COMPLETE) {
338 skb_postpull_rcsum(skb, skb->nh.raw, 365 skb_postpull_rcsum(skb, skb->nh.raw,
339 skb->h.raw - skb->nh.raw); 366 skb->h.raw - skb->nh.raw);
340 if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr, 367 if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr,
@@ -582,6 +609,9 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
582 struct iovec *iov; 609 struct iovec *iov;
583 u8 __user *type = NULL; 610 u8 __user *type = NULL;
584 u8 __user *code = NULL; 611 u8 __user *code = NULL;
612#ifdef CONFIG_IPV6_MIP6
613 u8 len = 0;
614#endif
585 int probed = 0; 615 int probed = 0;
586 int i; 616 int i;
587 617
@@ -613,6 +643,20 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
613 probed = 1; 643 probed = 1;
614 } 644 }
615 break; 645 break;
646#ifdef CONFIG_IPV6_MIP6
647 case IPPROTO_MH:
648 if (iov->iov_base && iov->iov_len < 1)
649 break;
650 /* check if type field is readable or not. */
651 if (iov->iov_len > 2 - len) {
652 u8 __user *p = iov->iov_base;
653 get_user(fl->fl_mh_type, &p[2 - len]);
654 probed = 1;
655 } else
656 len += iov->iov_len;
657
658 break;
659#endif
616 default: 660 default:
617 probed = 1; 661 probed = 1;
618 break; 662 break;
@@ -759,6 +803,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
759 803
760 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) 804 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
761 fl.oif = np->mcast_oif; 805 fl.oif = np->mcast_oif;
806 security_sk_classify_flow(sk, &fl);
762 807
763 err = ip6_dst_lookup(sk, &dst, &fl); 808 err = ip6_dst_lookup(sk, &dst, &fl);
764 if (err) 809 if (err)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 4e299c69e1c6..f39bbedd1327 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -53,10 +53,10 @@
53#include <net/ndisc.h> 53#include <net/ndisc.h>
54#include <net/addrconf.h> 54#include <net/addrconf.h>
55 55
56int sysctl_ip6frag_high_thresh = 256*1024; 56int sysctl_ip6frag_high_thresh __read_mostly = 256*1024;
57int sysctl_ip6frag_low_thresh = 192*1024; 57int sysctl_ip6frag_low_thresh __read_mostly = 192*1024;
58 58
59int sysctl_ip6frag_time = IPV6_FRAG_TIMEOUT; 59int sysctl_ip6frag_time __read_mostly = IPV6_FRAG_TIMEOUT;
60 60
61struct ip6frag_skb_cb 61struct ip6frag_skb_cb
62{ 62{
@@ -152,7 +152,7 @@ static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr,
152} 152}
153 153
154static struct timer_list ip6_frag_secret_timer; 154static struct timer_list ip6_frag_secret_timer;
155int sysctl_ip6frag_secret_interval = 10 * 60 * HZ; 155int sysctl_ip6frag_secret_interval __read_mostly = 10 * 60 * HZ;
156 156
157static void ip6_frag_secret_rebuild(unsigned long dummy) 157static void ip6_frag_secret_rebuild(unsigned long dummy)
158{ 158{
@@ -433,7 +433,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
433 return; 433 return;
434 } 434 }
435 435
436 if (skb->ip_summed == CHECKSUM_HW) 436 if (skb->ip_summed == CHECKSUM_COMPLETE)
437 skb->csum = csum_sub(skb->csum, 437 skb->csum = csum_sub(skb->csum,
438 csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0)); 438 csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0));
439 439
@@ -647,7 +647,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
647 head->len += fp->len; 647 head->len += fp->len;
648 if (head->ip_summed != fp->ip_summed) 648 if (head->ip_summed != fp->ip_summed)
649 head->ip_summed = CHECKSUM_NONE; 649 head->ip_summed = CHECKSUM_NONE;
650 else if (head->ip_summed == CHECKSUM_HW) 650 else if (head->ip_summed == CHECKSUM_COMPLETE)
651 head->csum = csum_add(head->csum, fp->csum); 651 head->csum = csum_add(head->csum, fp->csum);
652 head->truesize += fp->truesize; 652 head->truesize += fp->truesize;
653 atomic_sub(fp->truesize, &ip6_frag_mem); 653 atomic_sub(fp->truesize, &ip6_frag_mem);
@@ -662,7 +662,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
662 *skb_in = head; 662 *skb_in = head;
663 663
664 /* Yes, and fold redundant checksum back. 8) */ 664 /* Yes, and fold redundant checksum back. 8) */
665 if (head->ip_summed == CHECKSUM_HW) 665 if (head->ip_summed == CHECKSUM_COMPLETE)
666 head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); 666 head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
667 667
668 IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); 668 IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d9baca062d24..d6b4b4f48d18 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -22,6 +22,8 @@
22 * routers in REACHABLE, STALE, DELAY or PROBE states). 22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably) 23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list. 24 * reachable. otherwise, round-robin the list.
25 * Ville Nuorvala
26 * Fixed routing subtrees.
25 */ 27 */
26 28
27#include <linux/capability.h> 29#include <linux/capability.h>
@@ -35,7 +37,6 @@
35#include <linux/netdevice.h> 37#include <linux/netdevice.h>
36#include <linux/in6.h> 38#include <linux/in6.h>
37#include <linux/init.h> 39#include <linux/init.h>
38#include <linux/netlink.h>
39#include <linux/if_arp.h> 40#include <linux/if_arp.h>
40 41
41#ifdef CONFIG_PROC_FS 42#ifdef CONFIG_PROC_FS
@@ -54,6 +55,7 @@
54#include <net/dst.h> 55#include <net/dst.h>
55#include <net/xfrm.h> 56#include <net/xfrm.h>
56#include <net/netevent.h> 57#include <net/netevent.h>
58#include <net/netlink.h>
57 59
58#include <asm/uaccess.h> 60#include <asm/uaccess.h>
59 61
@@ -74,9 +76,6 @@
74 76
75#define CLONE_OFFLINK_ROUTE 0 77#define CLONE_OFFLINK_ROUTE 0
76 78
77#define RT6_SELECT_F_IFACE 0x1
78#define RT6_SELECT_F_REACHABLE 0x2
79
80static int ip6_rt_max_size = 4096; 79static int ip6_rt_max_size = 4096;
81static int ip6_rt_gc_min_interval = HZ / 2; 80static int ip6_rt_gc_min_interval = HZ / 2;
82static int ip6_rt_gc_timeout = 60*HZ; 81static int ip6_rt_gc_timeout = 60*HZ;
@@ -140,15 +139,49 @@ struct rt6_info ip6_null_entry = {
140 .rt6i_ref = ATOMIC_INIT(1), 139 .rt6i_ref = ATOMIC_INIT(1),
141}; 140};
142 141
143struct fib6_node ip6_routing_table = { 142#ifdef CONFIG_IPV6_MULTIPLE_TABLES
144 .leaf = &ip6_null_entry,
145 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
146};
147 143
148/* Protects all the ip6 fib */ 144struct rt6_info ip6_prohibit_entry = {
145 .u = {
146 .dst = {
147 .__refcnt = ATOMIC_INIT(1),
148 .__use = 1,
149 .dev = &loopback_dev,
150 .obsolete = -1,
151 .error = -EACCES,
152 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
153 .input = ip6_pkt_discard,
154 .output = ip6_pkt_discard_out,
155 .ops = &ip6_dst_ops,
156 .path = (struct dst_entry*)&ip6_prohibit_entry,
157 }
158 },
159 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
160 .rt6i_metric = ~(u32) 0,
161 .rt6i_ref = ATOMIC_INIT(1),
162};
149 163
150DEFINE_RWLOCK(rt6_lock); 164struct rt6_info ip6_blk_hole_entry = {
165 .u = {
166 .dst = {
167 .__refcnt = ATOMIC_INIT(1),
168 .__use = 1,
169 .dev = &loopback_dev,
170 .obsolete = -1,
171 .error = -EINVAL,
172 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
173 .input = ip6_pkt_discard,
174 .output = ip6_pkt_discard_out,
175 .ops = &ip6_dst_ops,
176 .path = (struct dst_entry*)&ip6_blk_hole_entry,
177 }
178 },
179 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
180 .rt6i_metric = ~(u32) 0,
181 .rt6i_ref = ATOMIC_INIT(1),
182};
151 183
184#endif
152 185
153/* allocate dst with ip6_dst_ops */ 186/* allocate dst with ip6_dst_ops */
154static __inline__ struct rt6_info *ip6_dst_alloc(void) 187static __inline__ struct rt6_info *ip6_dst_alloc(void)
@@ -188,8 +221,14 @@ static __inline__ int rt6_check_expired(const struct rt6_info *rt)
188 time_after(jiffies, rt->rt6i_expires)); 221 time_after(jiffies, rt->rt6i_expires));
189} 222}
190 223
224static inline int rt6_need_strict(struct in6_addr *daddr)
225{
226 return (ipv6_addr_type(daddr) &
227 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
228}
229
191/* 230/*
192 * Route lookup. Any rt6_lock is implied. 231 * Route lookup. Any table->tb6_lock is implied.
193 */ 232 */
194 233
195static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, 234static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
@@ -298,7 +337,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif,
298 int m, n; 337 int m, n;
299 338
300 m = rt6_check_dev(rt, oif); 339 m = rt6_check_dev(rt, oif);
301 if (!m && (strict & RT6_SELECT_F_IFACE)) 340 if (!m && (strict & RT6_LOOKUP_F_IFACE))
302 return -1; 341 return -1;
303#ifdef CONFIG_IPV6_ROUTER_PREF 342#ifdef CONFIG_IPV6_ROUTER_PREF
304 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 343 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
@@ -306,7 +345,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif,
306 n = rt6_check_neigh(rt); 345 n = rt6_check_neigh(rt);
307 if (n > 1) 346 if (n > 1)
308 m |= 16; 347 m |= 16;
309 else if (!n && strict & RT6_SELECT_F_REACHABLE) 348 else if (!n && strict & RT6_LOOKUP_F_REACHABLE)
310 return -1; 349 return -1;
311 return m; 350 return m;
312} 351}
@@ -346,7 +385,7 @@ static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
346 } 385 }
347 386
348 if (!match && 387 if (!match &&
349 (strict & RT6_SELECT_F_REACHABLE) && 388 (strict & RT6_LOOKUP_F_REACHABLE) &&
350 last && last != rt0) { 389 last && last != rt0) {
351 /* no entries matched; do round-robin */ 390 /* no entries matched; do round-robin */
352 static DEFINE_SPINLOCK(lock); 391 static DEFINE_SPINLOCK(lock);
@@ -417,7 +456,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
417 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex); 456 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
418 457
419 if (rt && !lifetime) { 458 if (rt && !lifetime) {
420 ip6_del_rt(rt, NULL, NULL, NULL); 459 ip6_del_rt(rt);
421 rt = NULL; 460 rt = NULL;
422 } 461 }
423 462
@@ -441,44 +480,95 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
441} 480}
442#endif 481#endif
443 482
444struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, 483#define BACKTRACK(saddr) \
445 int oif, int strict) 484do { \
485 if (rt == &ip6_null_entry) { \
486 struct fib6_node *pn; \
487 while (fn) { \
488 if (fn->fn_flags & RTN_TL_ROOT) \
489 goto out; \
490 pn = fn->parent; \
491 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
492 fn = fib6_lookup(pn->subtree, NULL, saddr); \
493 else \
494 fn = pn; \
495 if (fn->fn_flags & RTN_RTINFO) \
496 goto restart; \
497 } \
498 } \
499} while(0)
500
501static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
502 struct flowi *fl, int flags)
446{ 503{
447 struct fib6_node *fn; 504 struct fib6_node *fn;
448 struct rt6_info *rt; 505 struct rt6_info *rt;
449 506
450 read_lock_bh(&rt6_lock); 507 read_lock_bh(&table->tb6_lock);
451 fn = fib6_lookup(&ip6_routing_table, daddr, saddr); 508 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
452 rt = rt6_device_match(fn->leaf, oif, strict); 509restart:
510 rt = fn->leaf;
511 rt = rt6_device_match(rt, fl->oif, flags);
512 BACKTRACK(&fl->fl6_src);
513out:
453 dst_hold(&rt->u.dst); 514 dst_hold(&rt->u.dst);
454 rt->u.dst.__use++; 515 read_unlock_bh(&table->tb6_lock);
455 read_unlock_bh(&rt6_lock);
456 516
457 rt->u.dst.lastuse = jiffies; 517 rt->u.dst.lastuse = jiffies;
458 if (rt->u.dst.error == 0) 518 rt->u.dst.__use++;
459 return rt; 519
460 dst_release(&rt->u.dst); 520 return rt;
521
522}
523
524struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
525 int oif, int strict)
526{
527 struct flowi fl = {
528 .oif = oif,
529 .nl_u = {
530 .ip6_u = {
531 .daddr = *daddr,
532 /* TODO: saddr */
533 },
534 },
535 };
536 struct dst_entry *dst;
537 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
538
539 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
540 if (dst->error == 0)
541 return (struct rt6_info *) dst;
542
543 dst_release(dst);
544
461 return NULL; 545 return NULL;
462} 546}
463 547
464/* ip6_ins_rt is called with FREE rt6_lock. 548/* ip6_ins_rt is called with FREE table->tb6_lock.
465 It takes new route entry, the addition fails by any reason the 549 It takes new route entry, the addition fails by any reason the
466 route is freed. In any case, if caller does not hold it, it may 550 route is freed. In any case, if caller does not hold it, it may
467 be destroyed. 551 be destroyed.
468 */ 552 */
469 553
470int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, 554static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
471 void *_rtattr, struct netlink_skb_parms *req)
472{ 555{
473 int err; 556 int err;
557 struct fib6_table *table;
474 558
475 write_lock_bh(&rt6_lock); 559 table = rt->rt6i_table;
476 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req); 560 write_lock_bh(&table->tb6_lock);
477 write_unlock_bh(&rt6_lock); 561 err = fib6_add(&table->tb6_root, rt, info);
562 write_unlock_bh(&table->tb6_lock);
478 563
479 return err; 564 return err;
480} 565}
481 566
567int ip6_ins_rt(struct rt6_info *rt)
568{
569 return __ip6_ins_rt(rt, NULL);
570}
571
482static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, 572static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
483 struct in6_addr *saddr) 573 struct in6_addr *saddr)
484{ 574{
@@ -532,51 +622,39 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d
532 return rt; 622 return rt;
533} 623}
534 624
535#define BACKTRACK() \ 625static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
536if (rt == &ip6_null_entry) { \ 626 struct flowi *fl, int flags)
537 while ((fn = fn->parent) != NULL) { \
538 if (fn->fn_flags & RTN_ROOT) { \
539 goto out; \
540 } \
541 if (fn->fn_flags & RTN_RTINFO) \
542 goto restart; \
543 } \
544}
545
546
547void ip6_route_input(struct sk_buff *skb)
548{ 627{
549 struct fib6_node *fn; 628 struct fib6_node *fn;
550 struct rt6_info *rt, *nrt; 629 struct rt6_info *rt, *nrt;
551 int strict; 630 int strict = 0;
552 int attempts = 3; 631 int attempts = 3;
553 int err; 632 int err;
554 int reachable = RT6_SELECT_F_REACHABLE; 633 int reachable = RT6_LOOKUP_F_REACHABLE;
555 634
556 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0; 635 strict |= flags & RT6_LOOKUP_F_IFACE;
557 636
558relookup: 637relookup:
559 read_lock_bh(&rt6_lock); 638 read_lock_bh(&table->tb6_lock);
560 639
561restart_2: 640restart_2:
562 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr, 641 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
563 &skb->nh.ipv6h->saddr);
564 642
565restart: 643restart:
566 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable); 644 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
567 BACKTRACK(); 645 BACKTRACK(&fl->fl6_src);
568 if (rt == &ip6_null_entry || 646 if (rt == &ip6_null_entry ||
569 rt->rt6i_flags & RTF_CACHE) 647 rt->rt6i_flags & RTF_CACHE)
570 goto out; 648 goto out;
571 649
572 dst_hold(&rt->u.dst); 650 dst_hold(&rt->u.dst);
573 read_unlock_bh(&rt6_lock); 651 read_unlock_bh(&table->tb6_lock);
574 652
575 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 653 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
576 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr); 654 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
577 else { 655 else {
578#if CLONE_OFFLINK_ROUTE 656#if CLONE_OFFLINK_ROUTE
579 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr); 657 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
580#else 658#else
581 goto out2; 659 goto out2;
582#endif 660#endif
@@ -587,7 +665,7 @@ restart:
587 665
588 dst_hold(&rt->u.dst); 666 dst_hold(&rt->u.dst);
589 if (nrt) { 667 if (nrt) {
590 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb)); 668 err = ip6_ins_rt(nrt);
591 if (!err) 669 if (!err)
592 goto out2; 670 goto out2;
593 } 671 }
@@ -596,7 +674,7 @@ restart:
596 goto out2; 674 goto out2;
597 675
598 /* 676 /*
599 * Race condition! In the gap, when rt6_lock was 677 * Race condition! In the gap, when table->tb6_lock was
600 * released someone could insert this route. Relookup. 678 * released someone could insert this route. Relookup.
601 */ 679 */
602 dst_release(&rt->u.dst); 680 dst_release(&rt->u.dst);
@@ -608,40 +686,63 @@ out:
608 goto restart_2; 686 goto restart_2;
609 } 687 }
610 dst_hold(&rt->u.dst); 688 dst_hold(&rt->u.dst);
611 read_unlock_bh(&rt6_lock); 689 read_unlock_bh(&table->tb6_lock);
612out2: 690out2:
613 rt->u.dst.lastuse = jiffies; 691 rt->u.dst.lastuse = jiffies;
614 rt->u.dst.__use++; 692 rt->u.dst.__use++;
615 skb->dst = (struct dst_entry *) rt; 693
616 return; 694 return rt;
617} 695}
618 696
619struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) 697void ip6_route_input(struct sk_buff *skb)
698{
699 struct ipv6hdr *iph = skb->nh.ipv6h;
700 struct flowi fl = {
701 .iif = skb->dev->ifindex,
702 .nl_u = {
703 .ip6_u = {
704 .daddr = iph->daddr,
705 .saddr = iph->saddr,
706#ifdef CONFIG_IPV6_ROUTE_FWMARK
707 .fwmark = skb->nfmark,
708#endif
709 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
710 },
711 },
712 .proto = iph->nexthdr,
713 };
714 int flags = rt6_need_strict(&iph->daddr) ? RT6_LOOKUP_F_IFACE : 0;
715
716 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
717}
718
719static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
720 struct flowi *fl, int flags)
620{ 721{
621 struct fib6_node *fn; 722 struct fib6_node *fn;
622 struct rt6_info *rt, *nrt; 723 struct rt6_info *rt, *nrt;
623 int strict; 724 int strict = 0;
624 int attempts = 3; 725 int attempts = 3;
625 int err; 726 int err;
626 int reachable = RT6_SELECT_F_REACHABLE; 727 int reachable = RT6_LOOKUP_F_REACHABLE;
627 728
628 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0; 729 strict |= flags & RT6_LOOKUP_F_IFACE;
629 730
630relookup: 731relookup:
631 read_lock_bh(&rt6_lock); 732 read_lock_bh(&table->tb6_lock);
632 733
633restart_2: 734restart_2:
634 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src); 735 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
635 736
636restart: 737restart:
637 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable); 738 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
638 BACKTRACK(); 739 BACKTRACK(&fl->fl6_src);
639 if (rt == &ip6_null_entry || 740 if (rt == &ip6_null_entry ||
640 rt->rt6i_flags & RTF_CACHE) 741 rt->rt6i_flags & RTF_CACHE)
641 goto out; 742 goto out;
642 743
643 dst_hold(&rt->u.dst); 744 dst_hold(&rt->u.dst);
644 read_unlock_bh(&rt6_lock); 745 read_unlock_bh(&table->tb6_lock);
645 746
646 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 747 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
647 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); 748 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
@@ -658,7 +759,7 @@ restart:
658 759
659 dst_hold(&rt->u.dst); 760 dst_hold(&rt->u.dst);
660 if (nrt) { 761 if (nrt) {
661 err = ip6_ins_rt(nrt, NULL, NULL, NULL); 762 err = ip6_ins_rt(nrt);
662 if (!err) 763 if (!err)
663 goto out2; 764 goto out2;
664 } 765 }
@@ -667,7 +768,7 @@ restart:
667 goto out2; 768 goto out2;
668 769
669 /* 770 /*
670 * Race condition! In the gap, when rt6_lock was 771 * Race condition! In the gap, when table->tb6_lock was
671 * released someone could insert this route. Relookup. 772 * released someone could insert this route. Relookup.
672 */ 773 */
673 dst_release(&rt->u.dst); 774 dst_release(&rt->u.dst);
@@ -679,11 +780,21 @@ out:
679 goto restart_2; 780 goto restart_2;
680 } 781 }
681 dst_hold(&rt->u.dst); 782 dst_hold(&rt->u.dst);
682 read_unlock_bh(&rt6_lock); 783 read_unlock_bh(&table->tb6_lock);
683out2: 784out2:
684 rt->u.dst.lastuse = jiffies; 785 rt->u.dst.lastuse = jiffies;
685 rt->u.dst.__use++; 786 rt->u.dst.__use++;
686 return &rt->u.dst; 787 return rt;
788}
789
790struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
791{
792 int flags = 0;
793
794 if (rt6_need_strict(&fl->fl6_dst))
795 flags |= RT6_LOOKUP_F_IFACE;
796
797 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
687} 798}
688 799
689 800
@@ -709,7 +820,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
709 820
710 if (rt) { 821 if (rt) {
711 if (rt->rt6i_flags & RTF_CACHE) 822 if (rt->rt6i_flags & RTF_CACHE)
712 ip6_del_rt(rt, NULL, NULL, NULL); 823 ip6_del_rt(rt);
713 else 824 else
714 dst_release(dst); 825 dst_release(dst);
715 } 826 }
@@ -747,8 +858,6 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
747 } 858 }
748} 859}
749 860
750/* Protected by rt6_lock. */
751static struct dst_entry *ndisc_dst_gc_list;
752static int ipv6_get_mtu(struct net_device *dev); 861static int ipv6_get_mtu(struct net_device *dev);
753 862
754static inline unsigned int ipv6_advmss(unsigned int mtu) 863static inline unsigned int ipv6_advmss(unsigned int mtu)
@@ -769,6 +878,9 @@ static inline unsigned int ipv6_advmss(unsigned int mtu)
769 return mtu; 878 return mtu;
770} 879}
771 880
881static struct dst_entry *ndisc_dst_gc_list;
882static DEFINE_SPINLOCK(ndisc_lock);
883
772struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 884struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
773 struct neighbour *neigh, 885 struct neighbour *neigh,
774 struct in6_addr *addr, 886 struct in6_addr *addr,
@@ -809,10 +921,10 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
809 rt->rt6i_dst.plen = 128; 921 rt->rt6i_dst.plen = 128;
810#endif 922#endif
811 923
812 write_lock_bh(&rt6_lock); 924 spin_lock_bh(&ndisc_lock);
813 rt->u.dst.next = ndisc_dst_gc_list; 925 rt->u.dst.next = ndisc_dst_gc_list;
814 ndisc_dst_gc_list = &rt->u.dst; 926 ndisc_dst_gc_list = &rt->u.dst;
815 write_unlock_bh(&rt6_lock); 927 spin_unlock_bh(&ndisc_lock);
816 928
817 fib6_force_start_gc(); 929 fib6_force_start_gc();
818 930
@@ -826,8 +938,11 @@ int ndisc_dst_gc(int *more)
826 int freed; 938 int freed;
827 939
828 next = NULL; 940 next = NULL;
941 freed = 0;
942
943 spin_lock_bh(&ndisc_lock);
829 pprev = &ndisc_dst_gc_list; 944 pprev = &ndisc_dst_gc_list;
830 freed = 0; 945
831 while ((dst = *pprev) != NULL) { 946 while ((dst = *pprev) != NULL) {
832 if (!atomic_read(&dst->__refcnt)) { 947 if (!atomic_read(&dst->__refcnt)) {
833 *pprev = dst->next; 948 *pprev = dst->next;
@@ -839,6 +954,8 @@ int ndisc_dst_gc(int *more)
839 } 954 }
840 } 955 }
841 956
957 spin_unlock_bh(&ndisc_lock);
958
842 return freed; 959 return freed;
843} 960}
844 961
@@ -899,28 +1016,24 @@ int ipv6_get_hoplimit(struct net_device *dev)
899 * 1016 *
900 */ 1017 */
901 1018
902int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 1019int ip6_route_add(struct fib6_config *cfg)
903 void *_rtattr, struct netlink_skb_parms *req)
904{ 1020{
905 int err; 1021 int err;
906 struct rtmsg *r;
907 struct rtattr **rta;
908 struct rt6_info *rt = NULL; 1022 struct rt6_info *rt = NULL;
909 struct net_device *dev = NULL; 1023 struct net_device *dev = NULL;
910 struct inet6_dev *idev = NULL; 1024 struct inet6_dev *idev = NULL;
1025 struct fib6_table *table;
911 int addr_type; 1026 int addr_type;
912 1027
913 rta = (struct rtattr **) _rtattr; 1028 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
914
915 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
916 return -EINVAL; 1029 return -EINVAL;
917#ifndef CONFIG_IPV6_SUBTREES 1030#ifndef CONFIG_IPV6_SUBTREES
918 if (rtmsg->rtmsg_src_len) 1031 if (cfg->fc_src_len)
919 return -EINVAL; 1032 return -EINVAL;
920#endif 1033#endif
921 if (rtmsg->rtmsg_ifindex) { 1034 if (cfg->fc_ifindex) {
922 err = -ENODEV; 1035 err = -ENODEV;
923 dev = dev_get_by_index(rtmsg->rtmsg_ifindex); 1036 dev = dev_get_by_index(cfg->fc_ifindex);
924 if (!dev) 1037 if (!dev)
925 goto out; 1038 goto out;
926 idev = in6_dev_get(dev); 1039 idev = in6_dev_get(dev);
@@ -928,8 +1041,14 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
928 goto out; 1041 goto out;
929 } 1042 }
930 1043
931 if (rtmsg->rtmsg_metric == 0) 1044 if (cfg->fc_metric == 0)
932 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER; 1045 cfg->fc_metric = IP6_RT_PRIO_USER;
1046
1047 table = fib6_new_table(cfg->fc_table);
1048 if (table == NULL) {
1049 err = -ENOBUFS;
1050 goto out;
1051 }
933 1052
934 rt = ip6_dst_alloc(); 1053 rt = ip6_dst_alloc();
935 1054
@@ -939,14 +1058,13 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
939 } 1058 }
940 1059
941 rt->u.dst.obsolete = -1; 1060 rt->u.dst.obsolete = -1;
942 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info); 1061 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
943 if (nlh && (r = NLMSG_DATA(nlh))) {
944 rt->rt6i_protocol = r->rtm_protocol;
945 } else {
946 rt->rt6i_protocol = RTPROT_BOOT;
947 }
948 1062
949 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst); 1063 if (cfg->fc_protocol == RTPROT_UNSPEC)
1064 cfg->fc_protocol = RTPROT_BOOT;
1065 rt->rt6i_protocol = cfg->fc_protocol;
1066
1067 addr_type = ipv6_addr_type(&cfg->fc_dst);
950 1068
951 if (addr_type & IPV6_ADDR_MULTICAST) 1069 if (addr_type & IPV6_ADDR_MULTICAST)
952 rt->u.dst.input = ip6_mc_input; 1070 rt->u.dst.input = ip6_mc_input;
@@ -955,24 +1073,22 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
955 1073
956 rt->u.dst.output = ip6_output; 1074 rt->u.dst.output = ip6_output;
957 1075
958 ipv6_addr_prefix(&rt->rt6i_dst.addr, 1076 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
959 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len); 1077 rt->rt6i_dst.plen = cfg->fc_dst_len;
960 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
961 if (rt->rt6i_dst.plen == 128) 1078 if (rt->rt6i_dst.plen == 128)
962 rt->u.dst.flags = DST_HOST; 1079 rt->u.dst.flags = DST_HOST;
963 1080
964#ifdef CONFIG_IPV6_SUBTREES 1081#ifdef CONFIG_IPV6_SUBTREES
965 ipv6_addr_prefix(&rt->rt6i_src.addr, 1082 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
966 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len); 1083 rt->rt6i_src.plen = cfg->fc_src_len;
967 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
968#endif 1084#endif
969 1085
970 rt->rt6i_metric = rtmsg->rtmsg_metric; 1086 rt->rt6i_metric = cfg->fc_metric;
971 1087
972 /* We cannot add true routes via loopback here, 1088 /* We cannot add true routes via loopback here,
973 they would result in kernel looping; promote them to reject routes 1089 they would result in kernel looping; promote them to reject routes
974 */ 1090 */
975 if ((rtmsg->rtmsg_flags&RTF_REJECT) || 1091 if ((cfg->fc_flags & RTF_REJECT) ||
976 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { 1092 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
977 /* hold loopback dev/idev if we haven't done so. */ 1093 /* hold loopback dev/idev if we haven't done so. */
978 if (dev != &loopback_dev) { 1094 if (dev != &loopback_dev) {
@@ -995,12 +1111,12 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
995 goto install_route; 1111 goto install_route;
996 } 1112 }
997 1113
998 if (rtmsg->rtmsg_flags & RTF_GATEWAY) { 1114 if (cfg->fc_flags & RTF_GATEWAY) {
999 struct in6_addr *gw_addr; 1115 struct in6_addr *gw_addr;
1000 int gwa_type; 1116 int gwa_type;
1001 1117
1002 gw_addr = &rtmsg->rtmsg_gateway; 1118 gw_addr = &cfg->fc_gateway;
1003 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway); 1119 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1004 gwa_type = ipv6_addr_type(gw_addr); 1120 gwa_type = ipv6_addr_type(gw_addr);
1005 1121
1006 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1122 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
@@ -1017,7 +1133,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
1017 if (!(gwa_type&IPV6_ADDR_UNICAST)) 1133 if (!(gwa_type&IPV6_ADDR_UNICAST))
1018 goto out; 1134 goto out;
1019 1135
1020 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1); 1136 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1021 1137
1022 err = -EHOSTUNREACH; 1138 err = -EHOSTUNREACH;
1023 if (grt == NULL) 1139 if (grt == NULL)
@@ -1049,7 +1165,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
1049 if (dev == NULL) 1165 if (dev == NULL)
1050 goto out; 1166 goto out;
1051 1167
1052 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) { 1168 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1053 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); 1169 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1054 if (IS_ERR(rt->rt6i_nexthop)) { 1170 if (IS_ERR(rt->rt6i_nexthop)) {
1055 err = PTR_ERR(rt->rt6i_nexthop); 1171 err = PTR_ERR(rt->rt6i_nexthop);
@@ -1058,24 +1174,24 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
1058 } 1174 }
1059 } 1175 }
1060 1176
1061 rt->rt6i_flags = rtmsg->rtmsg_flags; 1177 rt->rt6i_flags = cfg->fc_flags;
1062 1178
1063install_route: 1179install_route:
1064 if (rta && rta[RTA_METRICS-1]) { 1180 if (cfg->fc_mx) {
1065 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]); 1181 struct nlattr *nla;
1066 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]); 1182 int remaining;
1067 1183
1068 while (RTA_OK(attr, attrlen)) { 1184 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1069 unsigned flavor = attr->rta_type; 1185 int type = nla->nla_type;
1070 if (flavor) { 1186
1071 if (flavor > RTAX_MAX) { 1187 if (type) {
1188 if (type > RTAX_MAX) {
1072 err = -EINVAL; 1189 err = -EINVAL;
1073 goto out; 1190 goto out;
1074 } 1191 }
1075 rt->u.dst.metrics[flavor-1] = 1192
1076 *(u32 *)RTA_DATA(attr); 1193 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1077 } 1194 }
1078 attr = RTA_NEXT(attr, attrlen);
1079 } 1195 }
1080 } 1196 }
1081 1197
@@ -1087,7 +1203,8 @@ install_route:
1087 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); 1203 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1088 rt->u.dst.dev = dev; 1204 rt->u.dst.dev = dev;
1089 rt->rt6i_idev = idev; 1205 rt->rt6i_idev = idev;
1090 return ip6_ins_rt(rt, nlh, _rtattr, req); 1206 rt->rt6i_table = table;
1207 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1091 1208
1092out: 1209out:
1093 if (dev) 1210 if (dev)
@@ -1099,51 +1216,65 @@ out:
1099 return err; 1216 return err;
1100} 1217}
1101 1218
1102int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) 1219static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1103{ 1220{
1104 int err; 1221 int err;
1222 struct fib6_table *table;
1105 1223
1106 write_lock_bh(&rt6_lock); 1224 if (rt == &ip6_null_entry)
1225 return -ENOENT;
1107 1226
1108 err = fib6_del(rt, nlh, _rtattr, req); 1227 table = rt->rt6i_table;
1228 write_lock_bh(&table->tb6_lock);
1229
1230 err = fib6_del(rt, info);
1109 dst_release(&rt->u.dst); 1231 dst_release(&rt->u.dst);
1110 1232
1111 write_unlock_bh(&rt6_lock); 1233 write_unlock_bh(&table->tb6_lock);
1112 1234
1113 return err; 1235 return err;
1114} 1236}
1115 1237
1116static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) 1238int ip6_del_rt(struct rt6_info *rt)
1117{ 1239{
1240 return __ip6_del_rt(rt, NULL);
1241}
1242
1243static int ip6_route_del(struct fib6_config *cfg)
1244{
1245 struct fib6_table *table;
1118 struct fib6_node *fn; 1246 struct fib6_node *fn;
1119 struct rt6_info *rt; 1247 struct rt6_info *rt;
1120 int err = -ESRCH; 1248 int err = -ESRCH;
1121 1249
1122 read_lock_bh(&rt6_lock); 1250 table = fib6_get_table(cfg->fc_table);
1251 if (table == NULL)
1252 return err;
1123 1253
1124 fn = fib6_locate(&ip6_routing_table, 1254 read_lock_bh(&table->tb6_lock);
1125 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len, 1255
1126 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len); 1256 fn = fib6_locate(&table->tb6_root,
1257 &cfg->fc_dst, cfg->fc_dst_len,
1258 &cfg->fc_src, cfg->fc_src_len);
1127 1259
1128 if (fn) { 1260 if (fn) {
1129 for (rt = fn->leaf; rt; rt = rt->u.next) { 1261 for (rt = fn->leaf; rt; rt = rt->u.next) {
1130 if (rtmsg->rtmsg_ifindex && 1262 if (cfg->fc_ifindex &&
1131 (rt->rt6i_dev == NULL || 1263 (rt->rt6i_dev == NULL ||
1132 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex)) 1264 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1133 continue; 1265 continue;
1134 if (rtmsg->rtmsg_flags&RTF_GATEWAY && 1266 if (cfg->fc_flags & RTF_GATEWAY &&
1135 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway)) 1267 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1136 continue; 1268 continue;
1137 if (rtmsg->rtmsg_metric && 1269 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1138 rtmsg->rtmsg_metric != rt->rt6i_metric)
1139 continue; 1270 continue;
1140 dst_hold(&rt->u.dst); 1271 dst_hold(&rt->u.dst);
1141 read_unlock_bh(&rt6_lock); 1272 read_unlock_bh(&table->tb6_lock);
1142 1273
1143 return ip6_del_rt(rt, nlh, _rtattr, req); 1274 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1144 } 1275 }
1145 } 1276 }
1146 read_unlock_bh(&rt6_lock); 1277 read_unlock_bh(&table->tb6_lock);
1147 1278
1148 return err; 1279 return err;
1149} 1280}
@@ -1151,13 +1282,18 @@ static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_r
1151/* 1282/*
1152 * Handle redirects 1283 * Handle redirects
1153 */ 1284 */
1154void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, 1285struct ip6rd_flowi {
1155 struct neighbour *neigh, u8 *lladdr, int on_link) 1286 struct flowi fl;
1287 struct in6_addr gateway;
1288};
1289
1290static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1291 struct flowi *fl,
1292 int flags)
1156{ 1293{
1157 struct rt6_info *rt, *nrt = NULL; 1294 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1158 int strict; 1295 struct rt6_info *rt;
1159 struct fib6_node *fn; 1296 struct fib6_node *fn;
1160 struct netevent_redirect netevent;
1161 1297
1162 /* 1298 /*
1163 * Get the "current" route for this destination and 1299 * Get the "current" route for this destination and
@@ -1169,10 +1305,9 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1169 * is a bit fuzzy and one might need to check all possible 1305 * is a bit fuzzy and one might need to check all possible
1170 * routes. 1306 * routes.
1171 */ 1307 */
1172 strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL);
1173 1308
1174 read_lock_bh(&rt6_lock); 1309 read_lock_bh(&table->tb6_lock);
1175 fn = fib6_lookup(&ip6_routing_table, dest, NULL); 1310 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1176restart: 1311restart:
1177 for (rt = fn->leaf; rt; rt = rt->u.next) { 1312 for (rt = fn->leaf; rt; rt = rt->u.next) {
1178 /* 1313 /*
@@ -1187,29 +1322,60 @@ restart:
1187 continue; 1322 continue;
1188 if (!(rt->rt6i_flags & RTF_GATEWAY)) 1323 if (!(rt->rt6i_flags & RTF_GATEWAY))
1189 continue; 1324 continue;
1190 if (neigh->dev != rt->rt6i_dev) 1325 if (fl->oif != rt->rt6i_dev->ifindex)
1191 continue; 1326 continue;
1192 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) 1327 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1193 continue; 1328 continue;
1194 break; 1329 break;
1195 } 1330 }
1196 if (rt)
1197 dst_hold(&rt->u.dst);
1198 else if (strict) {
1199 while ((fn = fn->parent) != NULL) {
1200 if (fn->fn_flags & RTN_ROOT)
1201 break;
1202 if (fn->fn_flags & RTN_RTINFO)
1203 goto restart;
1204 }
1205 }
1206 read_unlock_bh(&rt6_lock);
1207 1331
1208 if (!rt) { 1332 if (!rt)
1333 rt = &ip6_null_entry;
1334 BACKTRACK(&fl->fl6_src);
1335out:
1336 dst_hold(&rt->u.dst);
1337
1338 read_unlock_bh(&table->tb6_lock);
1339
1340 return rt;
1341};
1342
1343static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1344 struct in6_addr *src,
1345 struct in6_addr *gateway,
1346 struct net_device *dev)
1347{
1348 struct ip6rd_flowi rdfl = {
1349 .fl = {
1350 .oif = dev->ifindex,
1351 .nl_u = {
1352 .ip6_u = {
1353 .daddr = *dest,
1354 .saddr = *src,
1355 },
1356 },
1357 },
1358 .gateway = *gateway,
1359 };
1360 int flags = rt6_need_strict(dest) ? RT6_LOOKUP_F_IFACE : 0;
1361
1362 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1363}
1364
1365void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1366 struct in6_addr *saddr,
1367 struct neighbour *neigh, u8 *lladdr, int on_link)
1368{
1369 struct rt6_info *rt, *nrt = NULL;
1370 struct netevent_redirect netevent;
1371
1372 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1373
1374 if (rt == &ip6_null_entry) {
1209 if (net_ratelimit()) 1375 if (net_ratelimit())
1210 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " 1376 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1211 "for redirect target\n"); 1377 "for redirect target\n");
1212 return; 1378 goto out;
1213 } 1379 }
1214 1380
1215 /* 1381 /*
@@ -1252,7 +1418,7 @@ restart:
1252 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); 1418 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1253 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst)); 1419 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1254 1420
1255 if (ip6_ins_rt(nrt, NULL, NULL, NULL)) 1421 if (ip6_ins_rt(nrt))
1256 goto out; 1422 goto out;
1257 1423
1258 netevent.old = &rt->u.dst; 1424 netevent.old = &rt->u.dst;
@@ -1260,7 +1426,7 @@ restart:
1260 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 1426 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1261 1427
1262 if (rt->rt6i_flags&RTF_CACHE) { 1428 if (rt->rt6i_flags&RTF_CACHE) {
1263 ip6_del_rt(rt, NULL, NULL, NULL); 1429 ip6_del_rt(rt);
1264 return; 1430 return;
1265 } 1431 }
1266 1432
@@ -1342,7 +1508,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1342 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); 1508 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1343 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; 1509 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1344 1510
1345 ip6_ins_rt(nrt, NULL, NULL, NULL); 1511 ip6_ins_rt(nrt);
1346 } 1512 }
1347out: 1513out:
1348 dst_release(&rt->u.dst); 1514 dst_release(&rt->u.dst);
@@ -1378,6 +1544,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1378#ifdef CONFIG_IPV6_SUBTREES 1544#ifdef CONFIG_IPV6_SUBTREES
1379 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1545 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1380#endif 1546#endif
1547 rt->rt6i_table = ort->rt6i_table;
1381 } 1548 }
1382 return rt; 1549 return rt;
1383} 1550}
@@ -1388,9 +1555,14 @@ static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixle
1388{ 1555{
1389 struct fib6_node *fn; 1556 struct fib6_node *fn;
1390 struct rt6_info *rt = NULL; 1557 struct rt6_info *rt = NULL;
1558 struct fib6_table *table;
1559
1560 table = fib6_get_table(RT6_TABLE_INFO);
1561 if (table == NULL)
1562 return NULL;
1391 1563
1392 write_lock_bh(&rt6_lock); 1564 write_lock_bh(&table->tb6_lock);
1393 fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0); 1565 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1394 if (!fn) 1566 if (!fn)
1395 goto out; 1567 goto out;
1396 1568
@@ -1405,7 +1577,7 @@ static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixle
1405 break; 1577 break;
1406 } 1578 }
1407out: 1579out:
1408 write_unlock_bh(&rt6_lock); 1580 write_unlock_bh(&table->tb6_lock);
1409 return rt; 1581 return rt;
1410} 1582}
1411 1583
@@ -1413,21 +1585,23 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle
1413 struct in6_addr *gwaddr, int ifindex, 1585 struct in6_addr *gwaddr, int ifindex,
1414 unsigned pref) 1586 unsigned pref)
1415{ 1587{
1416 struct in6_rtmsg rtmsg; 1588 struct fib6_config cfg = {
1589 .fc_table = RT6_TABLE_INFO,
1590 .fc_metric = 1024,
1591 .fc_ifindex = ifindex,
1592 .fc_dst_len = prefixlen,
1593 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1594 RTF_UP | RTF_PREF(pref),
1595 };
1596
1597 ipv6_addr_copy(&cfg.fc_dst, prefix);
1598 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1417 1599
1418 memset(&rtmsg, 0, sizeof(rtmsg));
1419 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1420 ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
1421 rtmsg.rtmsg_dst_len = prefixlen;
1422 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1423 rtmsg.rtmsg_metric = 1024;
1424 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
1425 /* We should treat it as a default route if prefix length is 0. */ 1600 /* We should treat it as a default route if prefix length is 0. */
1426 if (!prefixlen) 1601 if (!prefixlen)
1427 rtmsg.rtmsg_flags |= RTF_DEFAULT; 1602 cfg.fc_flags |= RTF_DEFAULT;
1428 rtmsg.rtmsg_ifindex = ifindex;
1429 1603
1430 ip6_route_add(&rtmsg, NULL, NULL, NULL); 1604 ip6_route_add(&cfg);
1431 1605
1432 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex); 1606 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1433} 1607}
@@ -1436,12 +1610,14 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle
1436struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) 1610struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1437{ 1611{
1438 struct rt6_info *rt; 1612 struct rt6_info *rt;
1439 struct fib6_node *fn; 1613 struct fib6_table *table;
1440 1614
1441 fn = &ip6_routing_table; 1615 table = fib6_get_table(RT6_TABLE_DFLT);
1616 if (table == NULL)
1617 return NULL;
1442 1618
1443 write_lock_bh(&rt6_lock); 1619 write_lock_bh(&table->tb6_lock);
1444 for (rt = fn->leaf; rt; rt=rt->u.next) { 1620 for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
1445 if (dev == rt->rt6i_dev && 1621 if (dev == rt->rt6i_dev &&
1446 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1622 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1447 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1623 ipv6_addr_equal(&rt->rt6i_gateway, addr))
@@ -1449,7 +1625,7 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d
1449 } 1625 }
1450 if (rt) 1626 if (rt)
1451 dst_hold(&rt->u.dst); 1627 dst_hold(&rt->u.dst);
1452 write_unlock_bh(&rt6_lock); 1628 write_unlock_bh(&table->tb6_lock);
1453 return rt; 1629 return rt;
1454} 1630}
1455 1631
@@ -1457,43 +1633,65 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1457 struct net_device *dev, 1633 struct net_device *dev,
1458 unsigned int pref) 1634 unsigned int pref)
1459{ 1635{
1460 struct in6_rtmsg rtmsg; 1636 struct fib6_config cfg = {
1637 .fc_table = RT6_TABLE_DFLT,
1638 .fc_metric = 1024,
1639 .fc_ifindex = dev->ifindex,
1640 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1641 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1642 };
1461 1643
1462 memset(&rtmsg, 0, sizeof(struct in6_rtmsg)); 1644 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1463 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1464 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1465 rtmsg.rtmsg_metric = 1024;
1466 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
1467 RTF_PREF(pref);
1468 1645
1469 rtmsg.rtmsg_ifindex = dev->ifindex; 1646 ip6_route_add(&cfg);
1470 1647
1471 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1472 return rt6_get_dflt_router(gwaddr, dev); 1648 return rt6_get_dflt_router(gwaddr, dev);
1473} 1649}
1474 1650
1475void rt6_purge_dflt_routers(void) 1651void rt6_purge_dflt_routers(void)
1476{ 1652{
1477 struct rt6_info *rt; 1653 struct rt6_info *rt;
1654 struct fib6_table *table;
1655
1656 /* NOTE: Keep consistent with rt6_get_dflt_router */
1657 table = fib6_get_table(RT6_TABLE_DFLT);
1658 if (table == NULL)
1659 return;
1478 1660
1479restart: 1661restart:
1480 read_lock_bh(&rt6_lock); 1662 read_lock_bh(&table->tb6_lock);
1481 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) { 1663 for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
1482 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { 1664 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1483 dst_hold(&rt->u.dst); 1665 dst_hold(&rt->u.dst);
1484 1666 read_unlock_bh(&table->tb6_lock);
1485 read_unlock_bh(&rt6_lock); 1667 ip6_del_rt(rt);
1486
1487 ip6_del_rt(rt, NULL, NULL, NULL);
1488
1489 goto restart; 1668 goto restart;
1490 } 1669 }
1491 } 1670 }
1492 read_unlock_bh(&rt6_lock); 1671 read_unlock_bh(&table->tb6_lock);
1672}
1673
1674static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1675 struct fib6_config *cfg)
1676{
1677 memset(cfg, 0, sizeof(*cfg));
1678
1679 cfg->fc_table = RT6_TABLE_MAIN;
1680 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1681 cfg->fc_metric = rtmsg->rtmsg_metric;
1682 cfg->fc_expires = rtmsg->rtmsg_info;
1683 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1684 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1685 cfg->fc_flags = rtmsg->rtmsg_flags;
1686
1687 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1688 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1689 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1493} 1690}
1494 1691
1495int ipv6_route_ioctl(unsigned int cmd, void __user *arg) 1692int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1496{ 1693{
1694 struct fib6_config cfg;
1497 struct in6_rtmsg rtmsg; 1695 struct in6_rtmsg rtmsg;
1498 int err; 1696 int err;
1499 1697
@@ -1506,14 +1704,16 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1506 sizeof(struct in6_rtmsg)); 1704 sizeof(struct in6_rtmsg));
1507 if (err) 1705 if (err)
1508 return -EFAULT; 1706 return -EFAULT;
1509 1707
1708 rtmsg_to_fib6_config(&rtmsg, &cfg);
1709
1510 rtnl_lock(); 1710 rtnl_lock();
1511 switch (cmd) { 1711 switch (cmd) {
1512 case SIOCADDRT: 1712 case SIOCADDRT:
1513 err = ip6_route_add(&rtmsg, NULL, NULL, NULL); 1713 err = ip6_route_add(&cfg);
1514 break; 1714 break;
1515 case SIOCDELRT: 1715 case SIOCDELRT:
1516 err = ip6_route_del(&rtmsg, NULL, NULL, NULL); 1716 err = ip6_route_del(&cfg);
1517 break; 1717 break;
1518 default: 1718 default:
1519 err = -EINVAL; 1719 err = -EINVAL;
@@ -1587,6 +1787,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1587 1787
1588 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 1788 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1589 rt->rt6i_dst.plen = 128; 1789 rt->rt6i_dst.plen = 128;
1790 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1590 1791
1591 atomic_set(&rt->u.dst.__refcnt, 1); 1792 atomic_set(&rt->u.dst.__refcnt, 1);
1592 1793
@@ -1605,9 +1806,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *arg)
1605 1806
1606void rt6_ifdown(struct net_device *dev) 1807void rt6_ifdown(struct net_device *dev)
1607{ 1808{
1608 write_lock_bh(&rt6_lock); 1809 fib6_clean_all(fib6_ifdown, 0, dev);
1609 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1610 write_unlock_bh(&rt6_lock);
1611} 1810}
1612 1811
1613struct rt6_mtu_change_arg 1812struct rt6_mtu_change_arg
@@ -1657,80 +1856,114 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1657 1856
1658void rt6_mtu_change(struct net_device *dev, unsigned mtu) 1857void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1659{ 1858{
1660 struct rt6_mtu_change_arg arg; 1859 struct rt6_mtu_change_arg arg = {
1860 .dev = dev,
1861 .mtu = mtu,
1862 };
1661 1863
1662 arg.dev = dev; 1864 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1663 arg.mtu = mtu;
1664 read_lock_bh(&rt6_lock);
1665 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1666 read_unlock_bh(&rt6_lock);
1667} 1865}
1668 1866
1669static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta, 1867static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
1670 struct in6_rtmsg *rtmsg) 1868 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
1869 [RTA_OIF] = { .type = NLA_U32 },
1870 [RTA_IIF] = { .type = NLA_U32 },
1871 [RTA_PRIORITY] = { .type = NLA_U32 },
1872 [RTA_METRICS] = { .type = NLA_NESTED },
1873};
1874
1875static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1876 struct fib6_config *cfg)
1671{ 1877{
1672 memset(rtmsg, 0, sizeof(*rtmsg)); 1878 struct rtmsg *rtm;
1879 struct nlattr *tb[RTA_MAX+1];
1880 int err;
1673 1881
1674 rtmsg->rtmsg_dst_len = r->rtm_dst_len; 1882 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1675 rtmsg->rtmsg_src_len = r->rtm_src_len; 1883 if (err < 0)
1676 rtmsg->rtmsg_flags = RTF_UP; 1884 goto errout;
1677 if (r->rtm_type == RTN_UNREACHABLE)
1678 rtmsg->rtmsg_flags |= RTF_REJECT;
1679 1885
1680 if (rta[RTA_GATEWAY-1]) { 1886 err = -EINVAL;
1681 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16)) 1887 rtm = nlmsg_data(nlh);
1682 return -EINVAL; 1888 memset(cfg, 0, sizeof(*cfg));
1683 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16); 1889
1684 rtmsg->rtmsg_flags |= RTF_GATEWAY; 1890 cfg->fc_table = rtm->rtm_table;
1685 } 1891 cfg->fc_dst_len = rtm->rtm_dst_len;
1686 if (rta[RTA_DST-1]) { 1892 cfg->fc_src_len = rtm->rtm_src_len;
1687 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3)) 1893 cfg->fc_flags = RTF_UP;
1688 return -EINVAL; 1894 cfg->fc_protocol = rtm->rtm_protocol;
1689 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3)); 1895
1896 if (rtm->rtm_type == RTN_UNREACHABLE)
1897 cfg->fc_flags |= RTF_REJECT;
1898
1899 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1900 cfg->fc_nlinfo.nlh = nlh;
1901
1902 if (tb[RTA_GATEWAY]) {
1903 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1904 cfg->fc_flags |= RTF_GATEWAY;
1690 } 1905 }
1691 if (rta[RTA_SRC-1]) { 1906
1692 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3)) 1907 if (tb[RTA_DST]) {
1693 return -EINVAL; 1908 int plen = (rtm->rtm_dst_len + 7) >> 3;
1694 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3)); 1909
1910 if (nla_len(tb[RTA_DST]) < plen)
1911 goto errout;
1912
1913 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1695 } 1914 }
1696 if (rta[RTA_OIF-1]) { 1915
1697 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int))) 1916 if (tb[RTA_SRC]) {
1698 return -EINVAL; 1917 int plen = (rtm->rtm_src_len + 7) >> 3;
1699 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int)); 1918
1919 if (nla_len(tb[RTA_SRC]) < plen)
1920 goto errout;
1921
1922 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1700 } 1923 }
1701 if (rta[RTA_PRIORITY-1]) { 1924
1702 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4)) 1925 if (tb[RTA_OIF])
1703 return -EINVAL; 1926 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1704 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4); 1927
1928 if (tb[RTA_PRIORITY])
1929 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1930
1931 if (tb[RTA_METRICS]) {
1932 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1933 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1705 } 1934 }
1706 return 0; 1935
1936 if (tb[RTA_TABLE])
1937 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1938
1939 err = 0;
1940errout:
1941 return err;
1707} 1942}
1708 1943
1709int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 1944int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1710{ 1945{
1711 struct rtmsg *r = NLMSG_DATA(nlh); 1946 struct fib6_config cfg;
1712 struct in6_rtmsg rtmsg; 1947 int err;
1713 1948
1714 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) 1949 err = rtm_to_fib6_config(skb, nlh, &cfg);
1715 return -EINVAL; 1950 if (err < 0)
1716 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb)); 1951 return err;
1952
1953 return ip6_route_del(&cfg);
1717} 1954}
1718 1955
1719int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 1956int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1720{ 1957{
1721 struct rtmsg *r = NLMSG_DATA(nlh); 1958 struct fib6_config cfg;
1722 struct in6_rtmsg rtmsg; 1959 int err;
1723 1960
1724 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) 1961 err = rtm_to_fib6_config(skb, nlh, &cfg);
1725 return -EINVAL; 1962 if (err < 0)
1726 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb)); 1963 return err;
1727}
1728 1964
1729struct rt6_rtnl_dump_arg 1965 return ip6_route_add(&cfg);
1730{ 1966}
1731 struct sk_buff *skb;
1732 struct netlink_callback *cb;
1733};
1734 1967
1735static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, 1968static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1736 struct in6_addr *dst, struct in6_addr *src, 1969 struct in6_addr *dst, struct in6_addr *src,
@@ -1738,9 +1971,9 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1738 int prefix, unsigned int flags) 1971 int prefix, unsigned int flags)
1739{ 1972{
1740 struct rtmsg *rtm; 1973 struct rtmsg *rtm;
1741 struct nlmsghdr *nlh; 1974 struct nlmsghdr *nlh;
1742 unsigned char *b = skb->tail;
1743 struct rta_cacheinfo ci; 1975 struct rta_cacheinfo ci;
1976 u32 table;
1744 1977
1745 if (prefix) { /* user wants prefix routes only */ 1978 if (prefix) { /* user wants prefix routes only */
1746 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 1979 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
@@ -1749,13 +1982,21 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1749 } 1982 }
1750 } 1983 }
1751 1984
1752 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags); 1985 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
1753 rtm = NLMSG_DATA(nlh); 1986 if (nlh == NULL)
1987 return -ENOBUFS;
1988
1989 rtm = nlmsg_data(nlh);
1754 rtm->rtm_family = AF_INET6; 1990 rtm->rtm_family = AF_INET6;
1755 rtm->rtm_dst_len = rt->rt6i_dst.plen; 1991 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1756 rtm->rtm_src_len = rt->rt6i_src.plen; 1992 rtm->rtm_src_len = rt->rt6i_src.plen;
1757 rtm->rtm_tos = 0; 1993 rtm->rtm_tos = 0;
1758 rtm->rtm_table = RT_TABLE_MAIN; 1994 if (rt->rt6i_table)
1995 table = rt->rt6i_table->tb6_id;
1996 else
1997 table = RT6_TABLE_UNSPEC;
1998 rtm->rtm_table = table;
1999 NLA_PUT_U32(skb, RTA_TABLE, table);
1759 if (rt->rt6i_flags&RTF_REJECT) 2000 if (rt->rt6i_flags&RTF_REJECT)
1760 rtm->rtm_type = RTN_UNREACHABLE; 2001 rtm->rtm_type = RTN_UNREACHABLE;
1761 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) 2002 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
@@ -1776,31 +2017,35 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1776 rtm->rtm_flags |= RTM_F_CLONED; 2017 rtm->rtm_flags |= RTM_F_CLONED;
1777 2018
1778 if (dst) { 2019 if (dst) {
1779 RTA_PUT(skb, RTA_DST, 16, dst); 2020 NLA_PUT(skb, RTA_DST, 16, dst);
1780 rtm->rtm_dst_len = 128; 2021 rtm->rtm_dst_len = 128;
1781 } else if (rtm->rtm_dst_len) 2022 } else if (rtm->rtm_dst_len)
1782 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); 2023 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1783#ifdef CONFIG_IPV6_SUBTREES 2024#ifdef CONFIG_IPV6_SUBTREES
1784 if (src) { 2025 if (src) {
1785 RTA_PUT(skb, RTA_SRC, 16, src); 2026 NLA_PUT(skb, RTA_SRC, 16, src);
1786 rtm->rtm_src_len = 128; 2027 rtm->rtm_src_len = 128;
1787 } else if (rtm->rtm_src_len) 2028 } else if (rtm->rtm_src_len)
1788 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); 2029 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1789#endif 2030#endif
1790 if (iif) 2031 if (iif)
1791 RTA_PUT(skb, RTA_IIF, 4, &iif); 2032 NLA_PUT_U32(skb, RTA_IIF, iif);
1792 else if (dst) { 2033 else if (dst) {
1793 struct in6_addr saddr_buf; 2034 struct in6_addr saddr_buf;
1794 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0) 2035 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1795 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); 2036 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1796 } 2037 }
2038
1797 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 2039 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1798 goto rtattr_failure; 2040 goto nla_put_failure;
2041
1799 if (rt->u.dst.neighbour) 2042 if (rt->u.dst.neighbour)
1800 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); 2043 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2044
1801 if (rt->u.dst.dev) 2045 if (rt->u.dst.dev)
1802 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex); 2046 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
1803 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric); 2047
2048 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
1804 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); 2049 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1805 if (rt->rt6i_expires) 2050 if (rt->rt6i_expires)
1806 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies); 2051 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
@@ -1812,23 +2057,21 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1812 ci.rta_id = 0; 2057 ci.rta_id = 0;
1813 ci.rta_ts = 0; 2058 ci.rta_ts = 0;
1814 ci.rta_tsage = 0; 2059 ci.rta_tsage = 0;
1815 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); 2060 NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1816 nlh->nlmsg_len = skb->tail - b;
1817 return skb->len;
1818 2061
1819nlmsg_failure: 2062 return nlmsg_end(skb, nlh);
1820rtattr_failure: 2063
1821 skb_trim(skb, b - skb->data); 2064nla_put_failure:
1822 return -1; 2065 return nlmsg_cancel(skb, nlh);
1823} 2066}
1824 2067
1825static int rt6_dump_route(struct rt6_info *rt, void *p_arg) 2068int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1826{ 2069{
1827 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 2070 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1828 int prefix; 2071 int prefix;
1829 2072
1830 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) { 2073 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
1831 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh); 2074 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1832 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 2075 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1833 } else 2076 } else
1834 prefix = 0; 2077 prefix = 0;
@@ -1838,189 +2081,108 @@ static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1838 prefix, NLM_F_MULTI); 2081 prefix, NLM_F_MULTI);
1839} 2082}
1840 2083
1841static int fib6_dump_node(struct fib6_walker_t *w) 2084int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1842{ 2085{
1843 int res; 2086 struct nlattr *tb[RTA_MAX+1];
1844 struct rt6_info *rt; 2087 struct rt6_info *rt;
2088 struct sk_buff *skb;
2089 struct rtmsg *rtm;
2090 struct flowi fl;
2091 int err, iif = 0;
1845 2092
1846 for (rt = w->leaf; rt; rt = rt->u.next) { 2093 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1847 res = rt6_dump_route(rt, w->args); 2094 if (err < 0)
1848 if (res < 0) { 2095 goto errout;
1849 /* Frame is full, suspend walking */
1850 w->leaf = rt;
1851 return 1;
1852 }
1853 BUG_TRAP(res!=0);
1854 }
1855 w->leaf = NULL;
1856 return 0;
1857}
1858
1859static void fib6_dump_end(struct netlink_callback *cb)
1860{
1861 struct fib6_walker_t *w = (void*)cb->args[0];
1862
1863 if (w) {
1864 cb->args[0] = 0;
1865 fib6_walker_unlink(w);
1866 kfree(w);
1867 }
1868 cb->done = (void*)cb->args[1];
1869 cb->args[1] = 0;
1870}
1871
1872static int fib6_dump_done(struct netlink_callback *cb)
1873{
1874 fib6_dump_end(cb);
1875 return cb->done ? cb->done(cb) : 0;
1876}
1877
1878int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1879{
1880 struct rt6_rtnl_dump_arg arg;
1881 struct fib6_walker_t *w;
1882 int res;
1883 2096
1884 arg.skb = skb; 2097 err = -EINVAL;
1885 arg.cb = cb; 2098 memset(&fl, 0, sizeof(fl));
1886 2099
1887 w = (void*)cb->args[0]; 2100 if (tb[RTA_SRC]) {
1888 if (w == NULL) { 2101 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
1889 /* New dump: 2102 goto errout;
1890 *
1891 * 1. hook callback destructor.
1892 */
1893 cb->args[1] = (long)cb->done;
1894 cb->done = fib6_dump_done;
1895 2103
1896 /* 2104 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
1897 * 2. allocate and initialize walker.
1898 */
1899 w = kzalloc(sizeof(*w), GFP_ATOMIC);
1900 if (w == NULL)
1901 return -ENOMEM;
1902 RT6_TRACE("dump<%p", w);
1903 w->root = &ip6_routing_table;
1904 w->func = fib6_dump_node;
1905 w->args = &arg;
1906 cb->args[0] = (long)w;
1907 read_lock_bh(&rt6_lock);
1908 res = fib6_walk(w);
1909 read_unlock_bh(&rt6_lock);
1910 } else {
1911 w->args = &arg;
1912 read_lock_bh(&rt6_lock);
1913 res = fib6_walk_continue(w);
1914 read_unlock_bh(&rt6_lock);
1915 } 2105 }
1916#if RT6_DEBUG >= 3
1917 if (res <= 0 && skb->len == 0)
1918 RT6_TRACE("%p>dump end\n", w);
1919#endif
1920 res = res < 0 ? res : skb->len;
1921 /* res < 0 is an error. (really, impossible)
1922 res == 0 means that dump is complete, but skb still can contain data.
1923 res > 0 dump is not complete, but frame is full.
1924 */
1925 /* Destroy walker, if dump of this table is complete. */
1926 if (res <= 0)
1927 fib6_dump_end(cb);
1928 return res;
1929}
1930
1931int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1932{
1933 struct rtattr **rta = arg;
1934 int iif = 0;
1935 int err = -ENOBUFS;
1936 struct sk_buff *skb;
1937 struct flowi fl;
1938 struct rt6_info *rt;
1939 2106
1940 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2107 if (tb[RTA_DST]) {
1941 if (skb == NULL) 2108 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
1942 goto out; 2109 goto errout;
1943 2110
1944 /* Reserve room for dummy headers, this skb can pass 2111 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
1945 through good chunk of routing engine. 2112 }
1946 */
1947 skb->mac.raw = skb->data;
1948 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1949 2113
1950 memset(&fl, 0, sizeof(fl)); 2114 if (tb[RTA_IIF])
1951 if (rta[RTA_SRC-1]) 2115 iif = nla_get_u32(tb[RTA_IIF]);
1952 ipv6_addr_copy(&fl.fl6_src,
1953 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1954 if (rta[RTA_DST-1])
1955 ipv6_addr_copy(&fl.fl6_dst,
1956 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1957 2116
1958 if (rta[RTA_IIF-1]) 2117 if (tb[RTA_OIF])
1959 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); 2118 fl.oif = nla_get_u32(tb[RTA_OIF]);
1960 2119
1961 if (iif) { 2120 if (iif) {
1962 struct net_device *dev; 2121 struct net_device *dev;
1963 dev = __dev_get_by_index(iif); 2122 dev = __dev_get_by_index(iif);
1964 if (!dev) { 2123 if (!dev) {
1965 err = -ENODEV; 2124 err = -ENODEV;
1966 goto out_free; 2125 goto errout;
1967 } 2126 }
1968 } 2127 }
1969 2128
1970 fl.oif = 0; 2129 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1971 if (rta[RTA_OIF-1]) 2130 if (skb == NULL) {
1972 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int)); 2131 err = -ENOBUFS;
2132 goto errout;
2133 }
1973 2134
1974 rt = (struct rt6_info*)ip6_route_output(NULL, &fl); 2135 /* Reserve room for dummy headers, this skb can pass
2136 through good chunk of routing engine.
2137 */
2138 skb->mac.raw = skb->data;
2139 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1975 2140
2141 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
1976 skb->dst = &rt->u.dst; 2142 skb->dst = &rt->u.dst;
1977 2143
1978 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; 2144 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1979 err = rt6_fill_node(skb, rt,
1980 &fl.fl6_dst, &fl.fl6_src,
1981 iif,
1982 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 2145 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1983 nlh->nlmsg_seq, 0, 0); 2146 nlh->nlmsg_seq, 0, 0);
1984 if (err < 0) { 2147 if (err < 0) {
1985 err = -EMSGSIZE; 2148 kfree_skb(skb);
1986 goto out_free; 2149 goto errout;
1987 } 2150 }
1988 2151
1989 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); 2152 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
1990 if (err > 0) 2153errout:
1991 err = 0;
1992out:
1993 return err; 2154 return err;
1994out_free:
1995 kfree_skb(skb);
1996 goto out;
1997} 2155}
1998 2156
1999void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 2157void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2000 struct netlink_skb_parms *req)
2001{ 2158{
2002 struct sk_buff *skb; 2159 struct sk_buff *skb;
2003 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); 2160 u32 pid = 0, seq = 0;
2004 u32 pid = current->pid; 2161 struct nlmsghdr *nlh = NULL;
2005 u32 seq = 0; 2162 int payload = sizeof(struct rtmsg) + 256;
2006 2163 int err = -ENOBUFS;
2007 if (req) 2164
2008 pid = req->pid; 2165 if (info) {
2009 if (nlh) 2166 pid = info->pid;
2010 seq = nlh->nlmsg_seq; 2167 nlh = info->nlh;
2011 2168 if (nlh)
2012 skb = alloc_skb(size, gfp_any()); 2169 seq = nlh->nlmsg_seq;
2013 if (!skb) {
2014 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
2015 return;
2016 } 2170 }
2017 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) { 2171
2172 skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
2173 if (skb == NULL)
2174 goto errout;
2175
2176 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2177 if (err < 0) {
2018 kfree_skb(skb); 2178 kfree_skb(skb);
2019 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL); 2179 goto errout;
2020 return;
2021 } 2180 }
2022 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE; 2181
2023 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any()); 2182 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2183errout:
2184 if (err < 0)
2185 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2024} 2186}
2025 2187
2026/* 2188/*
@@ -2096,16 +2258,13 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2096 2258
2097static int rt6_proc_info(char *buffer, char **start, off_t offset, int length) 2259static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2098{ 2260{
2099 struct rt6_proc_arg arg; 2261 struct rt6_proc_arg arg = {
2100 arg.buffer = buffer; 2262 .buffer = buffer,
2101 arg.offset = offset; 2263 .offset = offset,
2102 arg.length = length; 2264 .length = length,
2103 arg.skip = 0; 2265 };
2104 arg.len = 0;
2105 2266
2106 read_lock_bh(&rt6_lock); 2267 fib6_clean_all(rt6_info_route, 0, &arg);
2107 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
2108 read_unlock_bh(&rt6_lock);
2109 2268
2110 *start = buffer; 2269 *start = buffer;
2111 if (offset) 2270 if (offset)
@@ -2260,13 +2419,9 @@ void __init ip6_route_init(void)
2260{ 2419{
2261 struct proc_dir_entry *p; 2420 struct proc_dir_entry *p;
2262 2421
2263 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache", 2422 ip6_dst_ops.kmem_cachep =
2264 sizeof(struct rt6_info), 2423 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2265 0, SLAB_HWCACHE_ALIGN, 2424 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
2266 NULL, NULL);
2267 if (!ip6_dst_ops.kmem_cachep)
2268 panic("cannot create ip6_dst_cache");
2269
2270 fib6_init(); 2425 fib6_init();
2271#ifdef CONFIG_PROC_FS 2426#ifdef CONFIG_PROC_FS
2272 p = proc_net_create("ipv6_route", 0, rt6_proc_info); 2427 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
@@ -2278,10 +2433,16 @@ void __init ip6_route_init(void)
2278#ifdef CONFIG_XFRM 2433#ifdef CONFIG_XFRM
2279 xfrm6_init(); 2434 xfrm6_init();
2280#endif 2435#endif
2436#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2437 fib6_rules_init();
2438#endif
2281} 2439}
2282 2440
2283void ip6_route_cleanup(void) 2441void ip6_route_cleanup(void)
2284{ 2442{
2443#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2444 fib6_rules_cleanup();
2445#endif
2285#ifdef CONFIG_PROC_FS 2446#ifdef CONFIG_PROC_FS
2286 proc_net_remove("ipv6_route"); 2447 proc_net_remove("ipv6_route");
2287 proc_net_remove("rt6_stats"); 2448 proc_net_remove("rt6_stats");
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 802a1a6b1037..2546fc9f0a78 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -251,6 +251,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
251 final_p = &final; 251 final_p = &final;
252 } 252 }
253 253
254 security_sk_classify_flow(sk, &fl);
255
254 err = ip6_dst_lookup(sk, &dst, &fl); 256 err = ip6_dst_lookup(sk, &dst, &fl);
255 if (err) 257 if (err)
256 goto failure; 258 goto failure;
@@ -270,7 +272,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
270 inet->rcv_saddr = LOOPBACK4_IPV6; 272 inet->rcv_saddr = LOOPBACK4_IPV6;
271 273
272 sk->sk_gso_type = SKB_GSO_TCPV6; 274 sk->sk_gso_type = SKB_GSO_TCPV6;
273 __ip6_dst_store(sk, dst, NULL); 275 __ip6_dst_store(sk, dst, NULL, NULL);
274 276
275 icsk->icsk_ext_hdr_len = 0; 277 icsk->icsk_ext_hdr_len = 0;
276 if (np->opt) 278 if (np->opt)
@@ -374,6 +376,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
374 fl.oif = sk->sk_bound_dev_if; 376 fl.oif = sk->sk_bound_dev_if;
375 fl.fl_ip_dport = inet->dport; 377 fl.fl_ip_dport = inet->dport;
376 fl.fl_ip_sport = inet->sport; 378 fl.fl_ip_sport = inet->sport;
379 security_skb_classify_flow(skb, &fl);
377 380
378 if ((err = ip6_dst_lookup(sk, &dst, &fl))) { 381 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
379 sk->sk_err_soft = -err; 382 sk->sk_err_soft = -err;
@@ -467,6 +470,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
467 fl.oif = treq->iif; 470 fl.oif = treq->iif;
468 fl.fl_ip_dport = inet_rsk(req)->rmt_port; 471 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
469 fl.fl_ip_sport = inet_sk(sk)->sport; 472 fl.fl_ip_sport = inet_sk(sk)->sport;
473 security_req_classify_flow(req, &fl);
470 474
471 if (dst == NULL) { 475 if (dst == NULL) {
472 opt = np->opt; 476 opt = np->opt;
@@ -541,7 +545,7 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
541 struct ipv6_pinfo *np = inet6_sk(sk); 545 struct ipv6_pinfo *np = inet6_sk(sk);
542 struct tcphdr *th = skb->h.th; 546 struct tcphdr *th = skb->h.th;
543 547
544 if (skb->ip_summed == CHECKSUM_HW) { 548 if (skb->ip_summed == CHECKSUM_PARTIAL) {
545 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0); 549 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
546 skb->csum = offsetof(struct tcphdr, check); 550 skb->csum = offsetof(struct tcphdr, check);
547 } else { 551 } else {
@@ -566,7 +570,7 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb)
566 th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, 570 th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
567 IPPROTO_TCP, 0); 571 IPPROTO_TCP, 0);
568 skb->csum = offsetof(struct tcphdr, check); 572 skb->csum = offsetof(struct tcphdr, check);
569 skb->ip_summed = CHECKSUM_HW; 573 skb->ip_summed = CHECKSUM_PARTIAL;
570 return 0; 574 return 0;
571} 575}
572 576
@@ -625,6 +629,7 @@ static void tcp_v6_send_reset(struct sk_buff *skb)
625 fl.oif = inet6_iif(skb); 629 fl.oif = inet6_iif(skb);
626 fl.fl_ip_dport = t1->dest; 630 fl.fl_ip_dport = t1->dest;
627 fl.fl_ip_sport = t1->source; 631 fl.fl_ip_sport = t1->source;
632 security_skb_classify_flow(skb, &fl);
628 633
629 /* sk = NULL, but it is safe for now. RST socket required. */ 634 /* sk = NULL, but it is safe for now. RST socket required. */
630 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { 635 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
@@ -691,6 +696,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32
691 fl.oif = inet6_iif(skb); 696 fl.oif = inet6_iif(skb);
692 fl.fl_ip_dport = t1->dest; 697 fl.fl_ip_dport = t1->dest;
693 fl.fl_ip_sport = t1->source; 698 fl.fl_ip_sport = t1->source;
699 security_skb_classify_flow(skb, &fl);
694 700
695 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { 701 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
696 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { 702 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
@@ -820,6 +826,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
820 826
821 tcp_rsk(req)->snt_isn = isn; 827 tcp_rsk(req)->snt_isn = isn;
822 828
829 security_inet_conn_request(sk, skb, req);
830
823 if (tcp_v6_send_synack(sk, req, NULL)) 831 if (tcp_v6_send_synack(sk, req, NULL))
824 goto drop; 832 goto drop;
825 833
@@ -923,6 +931,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
923 fl.oif = sk->sk_bound_dev_if; 931 fl.oif = sk->sk_bound_dev_if;
924 fl.fl_ip_dport = inet_rsk(req)->rmt_port; 932 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
925 fl.fl_ip_sport = inet_sk(sk)->sport; 933 fl.fl_ip_sport = inet_sk(sk)->sport;
934 security_req_classify_flow(req, &fl);
926 935
927 if (ip6_dst_lookup(sk, &dst, &fl)) 936 if (ip6_dst_lookup(sk, &dst, &fl))
928 goto out; 937 goto out;
@@ -945,7 +954,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
945 */ 954 */
946 955
947 newsk->sk_gso_type = SKB_GSO_TCPV6; 956 newsk->sk_gso_type = SKB_GSO_TCPV6;
948 __ip6_dst_store(newsk, dst, NULL); 957 __ip6_dst_store(newsk, dst, NULL, NULL);
949 958
950 newtcp6sk = (struct tcp6_sock *)newsk; 959 newtcp6sk = (struct tcp6_sock *)newsk;
951 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; 960 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
@@ -1024,7 +1033,7 @@ out:
1024 1033
1025static int tcp_v6_checksum_init(struct sk_buff *skb) 1034static int tcp_v6_checksum_init(struct sk_buff *skb)
1026{ 1035{
1027 if (skb->ip_summed == CHECKSUM_HW) { 1036 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1028 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, 1037 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1029 &skb->nh.ipv6h->daddr,skb->csum)) { 1038 &skb->nh.ipv6h->daddr,skb->csum)) {
1030 skb->ip_summed = CHECKSUM_UNNECESSARY; 1039 skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -1066,7 +1075,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1066 if (skb->protocol == htons(ETH_P_IP)) 1075 if (skb->protocol == htons(ETH_P_IP))
1067 return tcp_v4_do_rcv(sk, skb); 1076 return tcp_v4_do_rcv(sk, skb);
1068 1077
1069 if (sk_filter(sk, skb, 0)) 1078 if (sk_filter(sk, skb))
1070 goto discard; 1079 goto discard;
1071 1080
1072 /* 1081 /*
@@ -1223,7 +1232,7 @@ process:
1223 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 1232 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1224 goto discard_and_relse; 1233 goto discard_and_relse;
1225 1234
1226 if (sk_filter(sk, skb, 0)) 1235 if (sk_filter(sk, skb))
1227 goto discard_and_relse; 1236 goto discard_and_relse;
1228 1237
1229 skb->dev = NULL; 1238 skb->dev = NULL;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3d54f246411e..9662561701d1 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -61,81 +61,9 @@
61 61
62DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; 62DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
63 63
64/* Grrr, addr_type already calculated by caller, but I don't want 64static inline int udp_v6_get_port(struct sock *sk, unsigned short snum)
65 * to add some silly "cookie" argument to this method just for that.
66 */
67static int udp_v6_get_port(struct sock *sk, unsigned short snum)
68{ 65{
69 struct sock *sk2; 66 return udp_get_port(sk, snum, ipv6_rcv_saddr_equal);
70 struct hlist_node *node;
71
72 write_lock_bh(&udp_hash_lock);
73 if (snum == 0) {
74 int best_size_so_far, best, result, i;
75
76 if (udp_port_rover > sysctl_local_port_range[1] ||
77 udp_port_rover < sysctl_local_port_range[0])
78 udp_port_rover = sysctl_local_port_range[0];
79 best_size_so_far = 32767;
80 best = result = udp_port_rover;
81 for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
82 int size;
83 struct hlist_head *list;
84
85 list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
86 if (hlist_empty(list)) {
87 if (result > sysctl_local_port_range[1])
88 result = sysctl_local_port_range[0] +
89 ((result - sysctl_local_port_range[0]) &
90 (UDP_HTABLE_SIZE - 1));
91 goto gotit;
92 }
93 size = 0;
94 sk_for_each(sk2, node, list)
95 if (++size >= best_size_so_far)
96 goto next;
97 best_size_so_far = size;
98 best = result;
99 next:;
100 }
101 result = best;
102 for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
103 if (result > sysctl_local_port_range[1])
104 result = sysctl_local_port_range[0]
105 + ((result - sysctl_local_port_range[0]) &
106 (UDP_HTABLE_SIZE - 1));
107 if (!udp_lport_inuse(result))
108 break;
109 }
110 if (i >= (1 << 16) / UDP_HTABLE_SIZE)
111 goto fail;
112gotit:
113 udp_port_rover = snum = result;
114 } else {
115 sk_for_each(sk2, node,
116 &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
117 if (inet_sk(sk2)->num == snum &&
118 sk2 != sk &&
119 (!sk2->sk_bound_dev_if ||
120 !sk->sk_bound_dev_if ||
121 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
122 (!sk2->sk_reuse || !sk->sk_reuse) &&
123 ipv6_rcv_saddr_equal(sk, sk2))
124 goto fail;
125 }
126 }
127
128 inet_sk(sk)->num = snum;
129 if (sk_unhashed(sk)) {
130 sk_add_node(sk, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]);
131 sock_prot_inc_use(sk->sk_prot);
132 }
133 write_unlock_bh(&udp_hash_lock);
134 return 0;
135
136fail:
137 write_unlock_bh(&udp_hash_lock);
138 return 1;
139} 67}
140 68
141static void udp_v6_hash(struct sock *sk) 69static void udp_v6_hash(struct sock *sk)
@@ -345,6 +273,8 @@ out:
345 273
346static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) 274static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
347{ 275{
276 int rc;
277
348 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 278 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
349 kfree_skb(skb); 279 kfree_skb(skb);
350 return -1; 280 return -1;
@@ -356,7 +286,10 @@ static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
356 return 0; 286 return 0;
357 } 287 }
358 288
359 if (sock_queue_rcv_skb(sk,skb)<0) { 289 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
290 /* Note that an ENOMEM error is charged twice */
291 if (rc == -ENOMEM)
292 UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS);
360 UDP6_INC_STATS_BH(UDP_MIB_INERRORS); 293 UDP6_INC_STATS_BH(UDP_MIB_INERRORS);
361 kfree_skb(skb); 294 kfree_skb(skb);
362 return 0; 295 return 0;
@@ -475,7 +408,7 @@ static int udpv6_rcv(struct sk_buff **pskb)
475 uh = skb->h.uh; 408 uh = skb->h.uh;
476 } 409 }
477 410
478 if (skb->ip_summed == CHECKSUM_HW && 411 if (skb->ip_summed == CHECKSUM_COMPLETE &&
479 !csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) 412 !csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum))
480 skb->ip_summed = CHECKSUM_UNNECESSARY; 413 skb->ip_summed = CHECKSUM_UNNECESSARY;
481 414
@@ -782,6 +715,8 @@ do_udp_sendmsg:
782 connected = 0; 715 connected = 0;
783 } 716 }
784 717
718 security_sk_classify_flow(sk, fl);
719
785 err = ip6_sk_dst_lookup(sk, &dst, fl); 720 err = ip6_sk_dst_lookup(sk, &dst, fl);
786 if (err) 721 if (err)
787 goto out; 722 goto out;
@@ -840,7 +775,12 @@ do_append_data:
840 if (connected) { 775 if (connected) {
841 ip6_dst_store(sk, dst, 776 ip6_dst_store(sk, dst,
842 ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? 777 ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ?
843 &np->daddr : NULL); 778 &np->daddr : NULL,
779#ifdef CONFIG_IPV6_SUBTREES
780 ipv6_addr_equal(&fl->fl6_src, &np->saddr) ?
781 &np->saddr :
782#endif
783 NULL);
844 } else { 784 } else {
845 dst_release(dst); 785 dst_release(dst);
846 } 786 }
@@ -855,6 +795,16 @@ out:
855 UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); 795 UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS);
856 return len; 796 return len;
857 } 797 }
798 /*
799 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
800 * ENOBUFS might not be good (it's not tunable per se), but otherwise
801 * we don't have a good statistic (IpOutDiscards but it can be too many
802 * things). We could add another new stat but at least for now that
803 * seems like overkill.
804 */
805 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
806 UDP6_INC_STATS_USER(UDP_MIB_SNDBUFERRORS);
807 }
858 return err; 808 return err;
859 809
860do_confirm: 810do_confirm:
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 0405d74ff910..a40a05789013 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -72,7 +72,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi)
72 if (x->mode->input(x, skb)) 72 if (x->mode->input(x, skb))
73 goto drop; 73 goto drop;
74 74
75 if (x->props.mode) { /* XXX */ 75 if (x->props.mode == XFRM_MODE_TUNNEL) { /* XXX */
76 decaps = 1; 76 decaps = 1;
77 break; 77 break;
78 } 78 }
@@ -138,3 +138,111 @@ int xfrm6_rcv(struct sk_buff **pskb)
138{ 138{
139 return xfrm6_rcv_spi(*pskb, 0); 139 return xfrm6_rcv_spi(*pskb, 0);
140} 140}
141
142int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
143 xfrm_address_t *saddr, u8 proto)
144{
145 struct xfrm_state *x = NULL;
146 int wildcard = 0;
147 struct in6_addr any;
148 xfrm_address_t *xany;
149 struct xfrm_state *xfrm_vec_one = NULL;
150 int nh = 0;
151 int i = 0;
152
153 ipv6_addr_set(&any, 0, 0, 0, 0);
154 xany = (xfrm_address_t *)&any;
155
156 for (i = 0; i < 3; i++) {
157 xfrm_address_t *dst, *src;
158 switch (i) {
159 case 0:
160 dst = daddr;
161 src = saddr;
162 break;
163 case 1:
164 /* lookup state with wild-card source address */
165 wildcard = 1;
166 dst = daddr;
167 src = xany;
168 break;
169 case 2:
170 default:
171 /* lookup state with wild-card addresses */
172 wildcard = 1; /* XXX */
173 dst = xany;
174 src = xany;
175 break;
176 }
177
178 x = xfrm_state_lookup_byaddr(dst, src, proto, AF_INET6);
179 if (!x)
180 continue;
181
182 spin_lock(&x->lock);
183
184 if (wildcard) {
185 if ((x->props.flags & XFRM_STATE_WILDRECV) == 0) {
186 spin_unlock(&x->lock);
187 xfrm_state_put(x);
188 x = NULL;
189 continue;
190 }
191 }
192
193 if (unlikely(x->km.state != XFRM_STATE_VALID)) {
194 spin_unlock(&x->lock);
195 xfrm_state_put(x);
196 x = NULL;
197 continue;
198 }
199 if (xfrm_state_check_expire(x)) {
200 spin_unlock(&x->lock);
201 xfrm_state_put(x);
202 x = NULL;
203 continue;
204 }
205
206 nh = x->type->input(x, skb);
207 if (nh <= 0) {
208 spin_unlock(&x->lock);
209 xfrm_state_put(x);
210 x = NULL;
211 continue;
212 }
213
214 x->curlft.bytes += skb->len;
215 x->curlft.packets++;
216
217 spin_unlock(&x->lock);
218
219 xfrm_vec_one = x;
220 break;
221 }
222
223 if (!xfrm_vec_one)
224 goto drop;
225
226 /* Allocate new secpath or COW existing one. */
227 if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
228 struct sec_path *sp;
229 sp = secpath_dup(skb->sp);
230 if (!sp)
231 goto drop;
232 if (skb->sp)
233 secpath_put(skb->sp);
234 skb->sp = sp;
235 }
236
237 if (1 + skb->sp->len > XFRM_MAX_DEPTH)
238 goto drop;
239
240 skb->sp->xvec[skb->sp->len] = xfrm_vec_one;
241 skb->sp->len ++;
242
243 return 1;
244drop:
245 if (xfrm_vec_one)
246 xfrm_state_put(xfrm_vec_one);
247 return -1;
248}
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
new file mode 100644
index 000000000000..6031c16d46ca
--- /dev/null
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -0,0 +1,93 @@
1/*
2 * xfrm6_mode_ro.c - Route optimization mode for IPv6.
3 *
4 * Copyright (C)2003-2006 Helsinki University of Technology
5 * Copyright (C)2003-2006 USAGI/WIDE Project
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21/*
22 * Authors:
23 * Noriaki TAKAMIYA @USAGI
24 * Masahide NAKAMURA @USAGI
25 */
26
27#include <linux/init.h>
28#include <linux/kernel.h>
29#include <linux/module.h>
30#include <linux/skbuff.h>
31#include <linux/stringify.h>
32#include <net/ipv6.h>
33#include <net/xfrm.h>
34
35/* Add route optimization header space.
36 *
37 * The IP header and mutable extension headers will be moved forward to make
38 * space for the route optimization header.
39 *
40 * On exit, skb->h will be set to the start of the encapsulation header to be
41 * filled in by x->type->output and skb->nh will be set to the nextheader field
42 * of the extension header directly preceding the encapsulation header, or in
43 * its absence, that of the top IP header. The value of skb->data will always
44 * point to the top IP header.
45 */
46static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
47{
48 struct ipv6hdr *iph;
49 u8 *prevhdr;
50 int hdr_len;
51
52 skb_push(skb, x->props.header_len);
53 iph = skb->nh.ipv6h;
54
55 hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
56 skb->nh.raw = prevhdr - x->props.header_len;
57 skb->h.raw = skb->data + hdr_len;
58 memmove(skb->data, iph, hdr_len);
59 return 0;
60}
61
62/*
63 * Do nothing about routing optimization header unlike IPsec.
64 */
65static int xfrm6_ro_input(struct xfrm_state *x, struct sk_buff *skb)
66{
67 return 0;
68}
69
70static struct xfrm_mode xfrm6_ro_mode = {
71 .input = xfrm6_ro_input,
72 .output = xfrm6_ro_output,
73 .owner = THIS_MODULE,
74 .encap = XFRM_MODE_ROUTEOPTIMIZATION,
75};
76
77static int __init xfrm6_ro_init(void)
78{
79 return xfrm_register_mode(&xfrm6_ro_mode, AF_INET6);
80}
81
82static void __exit xfrm6_ro_exit(void)
83{
84 int err;
85
86 err = xfrm_unregister_mode(&xfrm6_ro_mode, AF_INET6);
87 BUG_ON(err);
88}
89
90module_init(xfrm6_ro_init);
91module_exit(xfrm6_ro_exit);
92MODULE_LICENSE("GPL");
93MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_ROUTEOPTIMIZATION);
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index 711d713e36d8..3a4b39b12bad 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -25,9 +25,8 @@
25 * its absence, that of the top IP header. The value of skb->data will always 25 * its absence, that of the top IP header. The value of skb->data will always
26 * point to the top IP header. 26 * point to the top IP header.
27 */ 27 */
28static int xfrm6_transport_output(struct sk_buff *skb) 28static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
29{ 29{
30 struct xfrm_state *x = skb->dst->xfrm;
31 struct ipv6hdr *iph; 30 struct ipv6hdr *iph;
32 u8 *prevhdr; 31 u8 *prevhdr;
33 int hdr_len; 32 int hdr_len;
@@ -35,7 +34,7 @@ static int xfrm6_transport_output(struct sk_buff *skb)
35 skb_push(skb, x->props.header_len); 34 skb_push(skb, x->props.header_len);
36 iph = skb->nh.ipv6h; 35 iph = skb->nh.ipv6h;
37 36
38 hdr_len = ip6_find_1stfragopt(skb, &prevhdr); 37 hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
39 skb->nh.raw = prevhdr - x->props.header_len; 38 skb->nh.raw = prevhdr - x->props.header_len;
40 skb->h.raw = skb->data + hdr_len; 39 skb->h.raw = skb->data + hdr_len;
41 memmove(skb->data, iph, hdr_len); 40 memmove(skb->data, iph, hdr_len);
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 8af79be2edca..5e7d8a7d6414 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -37,10 +37,9 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
37 * its absence, that of the top IP header. The value of skb->data will always 37 * its absence, that of the top IP header. The value of skb->data will always
38 * point to the top IP header. 38 * point to the top IP header.
39 */ 39 */
40static int xfrm6_tunnel_output(struct sk_buff *skb) 40static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
41{ 41{
42 struct dst_entry *dst = skb->dst; 42 struct dst_entry *dst = skb->dst;
43 struct xfrm_state *x = dst->xfrm;
44 struct ipv6hdr *iph, *top_iph; 43 struct ipv6hdr *iph, *top_iph;
45 int dsfield; 44 int dsfield;
46 45
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index c8c8b44a0f58..c260ea104c52 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -17,6 +17,12 @@
17#include <net/ipv6.h> 17#include <net/ipv6.h>
18#include <net/xfrm.h> 18#include <net/xfrm.h>
19 19
20int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
21 u8 **prevhdr)
22{
23 return ip6_find_1stfragopt(skb, prevhdr);
24}
25
20static int xfrm6_tunnel_check_size(struct sk_buff *skb) 26static int xfrm6_tunnel_check_size(struct sk_buff *skb)
21{ 27{
22 int mtu, ret = 0; 28 int mtu, ret = 0;
@@ -41,13 +47,13 @@ static int xfrm6_output_one(struct sk_buff *skb)
41 struct xfrm_state *x = dst->xfrm; 47 struct xfrm_state *x = dst->xfrm;
42 int err; 48 int err;
43 49
44 if (skb->ip_summed == CHECKSUM_HW) { 50 if (skb->ip_summed == CHECKSUM_PARTIAL) {
45 err = skb_checksum_help(skb, 0); 51 err = skb_checksum_help(skb);
46 if (err) 52 if (err)
47 goto error_nolock; 53 goto error_nolock;
48 } 54 }
49 55
50 if (x->props.mode) { 56 if (x->props.mode == XFRM_MODE_TUNNEL) {
51 err = xfrm6_tunnel_check_size(skb); 57 err = xfrm6_tunnel_check_size(skb);
52 if (err) 58 if (err)
53 goto error_nolock; 59 goto error_nolock;
@@ -59,7 +65,7 @@ static int xfrm6_output_one(struct sk_buff *skb)
59 if (err) 65 if (err)
60 goto error; 66 goto error;
61 67
62 err = x->mode->output(skb); 68 err = x->mode->output(x, skb);
63 if (err) 69 if (err)
64 goto error; 70 goto error;
65 71
@@ -69,6 +75,8 @@ static int xfrm6_output_one(struct sk_buff *skb)
69 75
70 x->curlft.bytes += skb->len; 76 x->curlft.bytes += skb->len;
71 x->curlft.packets++; 77 x->curlft.packets++;
78 if (x->props.mode == XFRM_MODE_ROUTEOPTIMIZATION)
79 x->lastused = (u64)xtime.tv_sec;
72 80
73 spin_unlock_bh(&x->lock); 81 spin_unlock_bh(&x->lock);
74 82
@@ -80,7 +88,7 @@ static int xfrm6_output_one(struct sk_buff *skb)
80 } 88 }
81 dst = skb->dst; 89 dst = skb->dst;
82 x = dst->xfrm; 90 x = dst->xfrm;
83 } while (x && !x->props.mode); 91 } while (x && (x->props.mode != XFRM_MODE_TUNNEL));
84 92
85 IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; 93 IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
86 err = 0; 94 err = 0;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 73cd250aecbb..6a252e2134d1 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -18,6 +18,9 @@
18#include <net/ip.h> 18#include <net/ip.h>
19#include <net/ipv6.h> 19#include <net/ipv6.h>
20#include <net/ip6_route.h> 20#include <net/ip6_route.h>
21#ifdef CONFIG_IPV6_MIP6
22#include <net/mip6.h>
23#endif
21 24
22static struct dst_ops xfrm6_dst_ops; 25static struct dst_ops xfrm6_dst_ops;
23static struct xfrm_policy_afinfo xfrm6_policy_afinfo; 26static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
@@ -31,6 +34,26 @@ static int xfrm6_dst_lookup(struct xfrm_dst **dst, struct flowi *fl)
31 return err; 34 return err;
32} 35}
33 36
37static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
38{
39 struct rt6_info *rt;
40 struct flowi fl_tunnel = {
41 .nl_u = {
42 .ip6_u = {
43 .daddr = *(struct in6_addr *)&daddr->a6,
44 },
45 },
46 };
47
48 if (!xfrm6_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) {
49 ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)&daddr->a6,
50 (struct in6_addr *)&saddr->a6);
51 dst_release(&rt->u.dst);
52 return 0;
53 }
54 return -EHOSTUNREACH;
55}
56
34static struct dst_entry * 57static struct dst_entry *
35__xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) 58__xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
36{ 59{
@@ -50,7 +73,9 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
50 xdst->u.rt6.rt6i_src.plen); 73 xdst->u.rt6.rt6i_src.plen);
51 if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) && 74 if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) &&
52 ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) && 75 ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) &&
53 xfrm_bundle_ok(xdst, fl, AF_INET6)) { 76 xfrm_bundle_ok(xdst, fl, AF_INET6,
77 (xdst->u.rt6.rt6i_dst.plen != 128 ||
78 xdst->u.rt6.rt6i_src.plen != 128))) {
54 dst_clone(dst); 79 dst_clone(dst);
55 break; 80 break;
56 } 81 }
@@ -59,6 +84,40 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
59 return dst; 84 return dst;
60} 85}
61 86
87static inline struct in6_addr*
88__xfrm6_bundle_addr_remote(struct xfrm_state *x, struct in6_addr *addr)
89{
90 return (x->type->remote_addr) ?
91 (struct in6_addr*)x->type->remote_addr(x, (xfrm_address_t *)addr) :
92 (struct in6_addr*)&x->id.daddr;
93}
94
95static inline struct in6_addr*
96__xfrm6_bundle_addr_local(struct xfrm_state *x, struct in6_addr *addr)
97{
98 return (x->type->local_addr) ?
99 (struct in6_addr*)x->type->local_addr(x, (xfrm_address_t *)addr) :
100 (struct in6_addr*)&x->props.saddr;
101}
102
103static inline void
104__xfrm6_bundle_len_inc(int *len, int *nflen, struct xfrm_state *x)
105{
106 if (x->type->flags & XFRM_TYPE_NON_FRAGMENT)
107 *nflen += x->props.header_len;
108 else
109 *len += x->props.header_len;
110}
111
112static inline void
113__xfrm6_bundle_len_dec(int *len, int *nflen, struct xfrm_state *x)
114{
115 if (x->type->flags & XFRM_TYPE_NON_FRAGMENT)
116 *nflen -= x->props.header_len;
117 else
118 *len -= x->props.header_len;
119}
120
62/* Allocate chain of dst_entry's, attach known xfrm's, calculate 121/* Allocate chain of dst_entry's, attach known xfrm's, calculate
63 * all the metrics... Shortly, bundle a bundle. 122 * all the metrics... Shortly, bundle a bundle.
64 */ 123 */
@@ -83,6 +142,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
83 int i; 142 int i;
84 int err = 0; 143 int err = 0;
85 int header_len = 0; 144 int header_len = 0;
145 int nfheader_len = 0;
86 int trailer_len = 0; 146 int trailer_len = 0;
87 147
88 dst = dst_prev = NULL; 148 dst = dst_prev = NULL;
@@ -109,17 +169,18 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
109 169
110 xdst = (struct xfrm_dst *)dst1; 170 xdst = (struct xfrm_dst *)dst1;
111 xdst->route = &rt->u.dst; 171 xdst->route = &rt->u.dst;
172 xdst->genid = xfrm[i]->genid;
112 if (rt->rt6i_node) 173 if (rt->rt6i_node)
113 xdst->route_cookie = rt->rt6i_node->fn_sernum; 174 xdst->route_cookie = rt->rt6i_node->fn_sernum;
114 175
115 dst1->next = dst_prev; 176 dst1->next = dst_prev;
116 dst_prev = dst1; 177 dst_prev = dst1;
117 if (xfrm[i]->props.mode) { 178 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
118 remote = (struct in6_addr*)&xfrm[i]->id.daddr; 179 remote = __xfrm6_bundle_addr_remote(xfrm[i], remote);
119 local = (struct in6_addr*)&xfrm[i]->props.saddr; 180 local = __xfrm6_bundle_addr_local(xfrm[i], local);
120 tunnel = 1; 181 tunnel = 1;
121 } 182 }
122 header_len += xfrm[i]->props.header_len; 183 __xfrm6_bundle_len_inc(&header_len, &nfheader_len, xfrm[i]);
123 trailer_len += xfrm[i]->props.trailer_len; 184 trailer_len += xfrm[i]->props.trailer_len;
124 185
125 if (tunnel) { 186 if (tunnel) {
@@ -154,6 +215,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
154 dst_prev->flags |= DST_HOST; 215 dst_prev->flags |= DST_HOST;
155 dst_prev->lastuse = jiffies; 216 dst_prev->lastuse = jiffies;
156 dst_prev->header_len = header_len; 217 dst_prev->header_len = header_len;
218 dst_prev->nfheader_len = nfheader_len;
157 dst_prev->trailer_len = trailer_len; 219 dst_prev->trailer_len = trailer_len;
158 memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics)); 220 memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
159 221
@@ -172,7 +234,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
172 x->u.rt6.rt6i_src = rt0->rt6i_src; 234 x->u.rt6.rt6i_src = rt0->rt6i_src;
173 x->u.rt6.rt6i_idev = rt0->rt6i_idev; 235 x->u.rt6.rt6i_idev = rt0->rt6i_idev;
174 in6_dev_hold(rt0->rt6i_idev); 236 in6_dev_hold(rt0->rt6i_idev);
175 header_len -= x->u.dst.xfrm->props.header_len; 237 __xfrm6_bundle_len_dec(&header_len, &nfheader_len, x->u.dst.xfrm);
176 trailer_len -= x->u.dst.xfrm->props.trailer_len; 238 trailer_len -= x->u.dst.xfrm->props.trailer_len;
177 } 239 }
178 240
@@ -232,6 +294,18 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
232 fl->proto = nexthdr; 294 fl->proto = nexthdr;
233 return; 295 return;
234 296
297#ifdef CONFIG_IPV6_MIP6
298 case IPPROTO_MH:
299 if (pskb_may_pull(skb, skb->nh.raw + offset + 3 - skb->data)) {
300 struct ip6_mh *mh;
301 mh = (struct ip6_mh *)exthdr;
302
303 fl->fl_mh_type = mh->ip6mh_type;
304 }
305 fl->proto = nexthdr;
306 return;
307#endif
308
235 /* XXX Why are there these headers? */ 309 /* XXX Why are there these headers? */
236 case IPPROTO_AH: 310 case IPPROTO_AH:
237 case IPPROTO_ESP: 311 case IPPROTO_ESP:
@@ -308,6 +382,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
308 .family = AF_INET6, 382 .family = AF_INET6,
309 .dst_ops = &xfrm6_dst_ops, 383 .dst_ops = &xfrm6_dst_ops,
310 .dst_lookup = xfrm6_dst_lookup, 384 .dst_lookup = xfrm6_dst_lookup,
385 .get_saddr = xfrm6_get_saddr,
311 .find_bundle = __xfrm6_find_bundle, 386 .find_bundle = __xfrm6_find_bundle,
312 .bundle_create = __xfrm6_bundle_create, 387 .bundle_create = __xfrm6_bundle_create,
313 .decode_session = _decode_session6, 388 .decode_session = _decode_session6,
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index b33296b3f6de..711bfafb2472 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -42,102 +42,135 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl,
42 memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr)); 42 memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr));
43 if (ipv6_addr_any((struct in6_addr*)&x->props.saddr)) 43 if (ipv6_addr_any((struct in6_addr*)&x->props.saddr))
44 memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr)); 44 memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr));
45 if (tmpl->mode && ipv6_addr_any((struct in6_addr*)&x->props.saddr)) {
46 struct rt6_info *rt;
47 struct flowi fl_tunnel = {
48 .nl_u = {
49 .ip6_u = {
50 .daddr = *(struct in6_addr *)daddr,
51 }
52 }
53 };
54 if (!xfrm_dst_lookup((struct xfrm_dst **)&rt,
55 &fl_tunnel, AF_INET6)) {
56 ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)daddr,
57 (struct in6_addr *)&x->props.saddr);
58 dst_release(&rt->u.dst);
59 }
60 }
61 x->props.mode = tmpl->mode; 45 x->props.mode = tmpl->mode;
62 x->props.reqid = tmpl->reqid; 46 x->props.reqid = tmpl->reqid;
63 x->props.family = AF_INET6; 47 x->props.family = AF_INET6;
64} 48}
65 49
66static struct xfrm_state * 50static int
67__xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) 51__xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n)
68{ 52{
69 unsigned h = __xfrm6_spi_hash(daddr, spi, proto); 53 int i;
70 struct xfrm_state *x; 54 int j = 0;
71 55
72 list_for_each_entry(x, xfrm6_state_afinfo.state_byspi+h, byspi) { 56 /* Rule 1: select IPsec transport except AH */
73 if (x->props.family == AF_INET6 && 57 for (i = 0; i < n; i++) {
74 spi == x->id.spi && 58 if (src[i]->props.mode == XFRM_MODE_TRANSPORT &&
75 ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) && 59 src[i]->id.proto != IPPROTO_AH) {
76 proto == x->id.proto) { 60 dst[j++] = src[i];
77 xfrm_state_hold(x); 61 src[i] = NULL;
78 return x; 62 }
63 }
64 if (j == n)
65 goto end;
66
67 /* Rule 2: select MIPv6 RO or inbound trigger */
68#ifdef CONFIG_IPV6_MIP6
69 for (i = 0; i < n; i++) {
70 if (src[i] &&
71 (src[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION ||
72 src[i]->props.mode == XFRM_MODE_IN_TRIGGER)) {
73 dst[j++] = src[i];
74 src[i] = NULL;
75 }
76 }
77 if (j == n)
78 goto end;
79#endif
80
81 /* Rule 3: select IPsec transport AH */
82 for (i = 0; i < n; i++) {
83 if (src[i] &&
84 src[i]->props.mode == XFRM_MODE_TRANSPORT &&
85 src[i]->id.proto == IPPROTO_AH) {
86 dst[j++] = src[i];
87 src[i] = NULL;
79 } 88 }
80 } 89 }
81 return NULL; 90 if (j == n)
91 goto end;
92
93 /* Rule 4: select IPsec tunnel */
94 for (i = 0; i < n; i++) {
95 if (src[i] &&
96 src[i]->props.mode == XFRM_MODE_TUNNEL) {
97 dst[j++] = src[i];
98 src[i] = NULL;
99 }
100 }
101 if (likely(j == n))
102 goto end;
103
104 /* Final rule */
105 for (i = 0; i < n; i++) {
106 if (src[i]) {
107 dst[j++] = src[i];
108 src[i] = NULL;
109 }
110 }
111
112 end:
113 return 0;
82} 114}
83 115
84static struct xfrm_state * 116static int
85__xfrm6_find_acq(u8 mode, u32 reqid, u8 proto, 117__xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n)
86 xfrm_address_t *daddr, xfrm_address_t *saddr,
87 int create)
88{ 118{
89 struct xfrm_state *x, *x0; 119 int i;
90 unsigned h = __xfrm6_dst_hash(daddr); 120 int j = 0;
91 121
92 x0 = NULL; 122 /* Rule 1: select IPsec transport */
93 123 for (i = 0; i < n; i++) {
94 list_for_each_entry(x, xfrm6_state_afinfo.state_bydst+h, bydst) { 124 if (src[i]->mode == XFRM_MODE_TRANSPORT) {
95 if (x->props.family == AF_INET6 && 125 dst[j++] = src[i];
96 ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) && 126 src[i] = NULL;
97 mode == x->props.mode && 127 }
98 proto == x->id.proto &&
99 ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) &&
100 reqid == x->props.reqid &&
101 x->km.state == XFRM_STATE_ACQ &&
102 !x->id.spi) {
103 x0 = x;
104 break;
105 }
106 } 128 }
107 if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) { 129 if (j == n)
108 ipv6_addr_copy((struct in6_addr *)x0->sel.daddr.a6, 130 goto end;
109 (struct in6_addr *)daddr); 131
110 ipv6_addr_copy((struct in6_addr *)x0->sel.saddr.a6, 132 /* Rule 2: select MIPv6 RO or inbound trigger */
111 (struct in6_addr *)saddr); 133#ifdef CONFIG_IPV6_MIP6
112 x0->sel.prefixlen_d = 128; 134 for (i = 0; i < n; i++) {
113 x0->sel.prefixlen_s = 128; 135 if (src[i] &&
114 ipv6_addr_copy((struct in6_addr *)x0->props.saddr.a6, 136 (src[i]->mode == XFRM_MODE_ROUTEOPTIMIZATION ||
115 (struct in6_addr *)saddr); 137 src[i]->mode == XFRM_MODE_IN_TRIGGER)) {
116 x0->km.state = XFRM_STATE_ACQ; 138 dst[j++] = src[i];
117 ipv6_addr_copy((struct in6_addr *)x0->id.daddr.a6, 139 src[i] = NULL;
118 (struct in6_addr *)daddr); 140 }
119 x0->id.proto = proto;
120 x0->props.family = AF_INET6;
121 x0->props.mode = mode;
122 x0->props.reqid = reqid;
123 x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
124 xfrm_state_hold(x0);
125 x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
126 add_timer(&x0->timer);
127 xfrm_state_hold(x0);
128 list_add_tail(&x0->bydst, xfrm6_state_afinfo.state_bydst+h);
129 wake_up(&km_waitq);
130 } 141 }
131 if (x0) 142 if (j == n)
132 xfrm_state_hold(x0); 143 goto end;
133 return x0; 144#endif
145
146 /* Rule 3: select IPsec tunnel */
147 for (i = 0; i < n; i++) {
148 if (src[i] &&
149 src[i]->mode == XFRM_MODE_TUNNEL) {
150 dst[j++] = src[i];
151 src[i] = NULL;
152 }
153 }
154 if (likely(j == n))
155 goto end;
156
157 /* Final rule */
158 for (i = 0; i < n; i++) {
159 if (src[i]) {
160 dst[j++] = src[i];
161 src[i] = NULL;
162 }
163 }
164
165 end:
166 return 0;
134} 167}
135 168
136static struct xfrm_state_afinfo xfrm6_state_afinfo = { 169static struct xfrm_state_afinfo xfrm6_state_afinfo = {
137 .family = AF_INET6, 170 .family = AF_INET6,
138 .init_tempsel = __xfrm6_init_tempsel, 171 .init_tempsel = __xfrm6_init_tempsel,
139 .state_lookup = __xfrm6_state_lookup, 172 .tmpl_sort = __xfrm6_tmpl_sort,
140 .find_acq = __xfrm6_find_acq, 173 .state_sort = __xfrm6_state_sort,
141}; 174};
142 175
143void __init xfrm6_state_init(void) 176void __init xfrm6_state_init(void)
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index c8f9369c2a87..59685ee8f700 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -307,7 +307,7 @@ static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
307 307
308static int xfrm6_tunnel_init_state(struct xfrm_state *x) 308static int xfrm6_tunnel_init_state(struct xfrm_state *x)
309{ 309{
310 if (!x->props.mode) 310 if (x->props.mode != XFRM_MODE_TUNNEL)
311 return -EINVAL; 311 return -EINVAL;
312 312
313 if (x->encap) 313 if (x->encap)
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 3a95b2ee4690..83b443ddc72f 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1731,7 +1731,8 @@ static u32 gen_reqid(void)
1731 ++reqid; 1731 ++reqid;
1732 if (reqid == 0) 1732 if (reqid == 0)
1733 reqid = IPSEC_MANUAL_REQID_MAX+1; 1733 reqid = IPSEC_MANUAL_REQID_MAX+1;
1734 if (xfrm_policy_walk(check_reqid, (void*)&reqid) != -EEXIST) 1734 if (xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, check_reqid,
1735 (void*)&reqid) != -EEXIST)
1735 return reqid; 1736 return reqid;
1736 } while (reqid != start); 1737 } while (reqid != start);
1737 return 0; 1738 return 0;
@@ -1765,7 +1766,7 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
1765 } 1766 }
1766 1767
1767 /* addresses present only in tunnel mode */ 1768 /* addresses present only in tunnel mode */
1768 if (t->mode) { 1769 if (t->mode == XFRM_MODE_TUNNEL) {
1769 switch (xp->family) { 1770 switch (xp->family) {
1770 case AF_INET: 1771 case AF_INET:
1771 sin = (void*)(rq+1); 1772 sin = (void*)(rq+1);
@@ -1997,7 +1998,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
1997 int req_size; 1998 int req_size;
1998 1999
1999 req_size = sizeof(struct sadb_x_ipsecrequest); 2000 req_size = sizeof(struct sadb_x_ipsecrequest);
2000 if (t->mode) 2001 if (t->mode == XFRM_MODE_TUNNEL)
2001 req_size += 2*socklen; 2002 req_size += 2*socklen;
2002 else 2003 else
2003 size -= 2*socklen; 2004 size -= 2*socklen;
@@ -2013,7 +2014,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
2013 if (t->optional) 2014 if (t->optional)
2014 rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE; 2015 rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE;
2015 rq->sadb_x_ipsecrequest_reqid = t->reqid; 2016 rq->sadb_x_ipsecrequest_reqid = t->reqid;
2016 if (t->mode) { 2017 if (t->mode == XFRM_MODE_TUNNEL) {
2017 switch (xp->family) { 2018 switch (xp->family) {
2018 case AF_INET: 2019 case AF_INET:
2019 sin = (void*)(rq+1); 2020 sin = (void*)(rq+1);
@@ -2268,7 +2269,8 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
2268 return err; 2269 return err;
2269 } 2270 }
2270 2271
2271 xp = xfrm_policy_bysel_ctx(pol->sadb_x_policy_dir-1, &sel, tmp.security, 1); 2272 xp = xfrm_policy_bysel_ctx(XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir-1,
2273 &sel, tmp.security, 1);
2272 security_xfrm_policy_free(&tmp); 2274 security_xfrm_policy_free(&tmp);
2273 if (xp == NULL) 2275 if (xp == NULL)
2274 return -ENOENT; 2276 return -ENOENT;
@@ -2330,7 +2332,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
2330 if (dir >= XFRM_POLICY_MAX) 2332 if (dir >= XFRM_POLICY_MAX)
2331 return -EINVAL; 2333 return -EINVAL;
2332 2334
2333 xp = xfrm_policy_byid(dir, pol->sadb_x_policy_id, 2335 xp = xfrm_policy_byid(XFRM_POLICY_TYPE_MAIN, dir, pol->sadb_x_policy_id,
2334 hdr->sadb_msg_type == SADB_X_SPDDELETE2); 2336 hdr->sadb_msg_type == SADB_X_SPDDELETE2);
2335 if (xp == NULL) 2337 if (xp == NULL)
2336 return -ENOENT; 2338 return -ENOENT;
@@ -2378,7 +2380,7 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *
2378{ 2380{
2379 struct pfkey_dump_data data = { .skb = skb, .hdr = hdr, .sk = sk }; 2381 struct pfkey_dump_data data = { .skb = skb, .hdr = hdr, .sk = sk };
2380 2382
2381 return xfrm_policy_walk(dump_sp, &data); 2383 return xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_sp, &data);
2382} 2384}
2383 2385
2384static int key_notify_policy_flush(struct km_event *c) 2386static int key_notify_policy_flush(struct km_event *c)
@@ -2405,7 +2407,8 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg
2405{ 2407{
2406 struct km_event c; 2408 struct km_event c;
2407 2409
2408 xfrm_policy_flush(); 2410 xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN);
2411 c.data.type = XFRM_POLICY_TYPE_MAIN;
2409 c.event = XFRM_MSG_FLUSHPOLICY; 2412 c.event = XFRM_MSG_FLUSHPOLICY;
2410 c.pid = hdr->sadb_msg_pid; 2413 c.pid = hdr->sadb_msg_pid;
2411 c.seq = hdr->sadb_msg_seq; 2414 c.seq = hdr->sadb_msg_seq;
@@ -2667,6 +2670,9 @@ static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c)
2667 2670
2668static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) 2671static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
2669{ 2672{
2673 if (xp && xp->type != XFRM_POLICY_TYPE_MAIN)
2674 return 0;
2675
2670 switch (c->event) { 2676 switch (c->event) {
2671 case XFRM_MSG_POLEXPIRE: 2677 case XFRM_MSG_POLEXPIRE:
2672 return key_notify_policy_expire(xp, c); 2678 return key_notify_policy_expire(xp, c);
@@ -2675,6 +2681,8 @@ static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_e
2675 case XFRM_MSG_UPDPOLICY: 2681 case XFRM_MSG_UPDPOLICY:
2676 return key_notify_policy(xp, dir, c); 2682 return key_notify_policy(xp, dir, c);
2677 case XFRM_MSG_FLUSHPOLICY: 2683 case XFRM_MSG_FLUSHPOLICY:
2684 if (c->data.type != XFRM_POLICY_TYPE_MAIN)
2685 break;
2678 return key_notify_policy_flush(c); 2686 return key_notify_policy_flush(c);
2679 default: 2687 default:
2680 printk("pfkey: Unknown policy event %d\n", c->event); 2688 printk("pfkey: Unknown policy event %d\n", c->event);
@@ -2708,6 +2716,9 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
2708#endif 2716#endif
2709 int sockaddr_size; 2717 int sockaddr_size;
2710 int size; 2718 int size;
2719 struct sadb_x_sec_ctx *sec_ctx;
2720 struct xfrm_sec_ctx *xfrm_ctx;
2721 int ctx_size = 0;
2711 2722
2712 sockaddr_size = pfkey_sockaddr_size(x->props.family); 2723 sockaddr_size = pfkey_sockaddr_size(x->props.family);
2713 if (!sockaddr_size) 2724 if (!sockaddr_size)
@@ -2723,6 +2734,11 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
2723 else if (x->id.proto == IPPROTO_ESP) 2734 else if (x->id.proto == IPPROTO_ESP)
2724 size += count_esp_combs(t); 2735 size += count_esp_combs(t);
2725 2736
2737 if ((xfrm_ctx = x->security)) {
2738 ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len);
2739 size += sizeof(struct sadb_x_sec_ctx) + ctx_size;
2740 }
2741
2726 skb = alloc_skb(size + 16, GFP_ATOMIC); 2742 skb = alloc_skb(size + 16, GFP_ATOMIC);
2727 if (skb == NULL) 2743 if (skb == NULL)
2728 return -ENOMEM; 2744 return -ENOMEM;
@@ -2818,17 +2834,31 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
2818 else if (x->id.proto == IPPROTO_ESP) 2834 else if (x->id.proto == IPPROTO_ESP)
2819 dump_esp_combs(skb, t); 2835 dump_esp_combs(skb, t);
2820 2836
2837 /* security context */
2838 if (xfrm_ctx) {
2839 sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb,
2840 sizeof(struct sadb_x_sec_ctx) + ctx_size);
2841 sec_ctx->sadb_x_sec_len =
2842 (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t);
2843 sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX;
2844 sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi;
2845 sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg;
2846 sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len;
2847 memcpy(sec_ctx + 1, xfrm_ctx->ctx_str,
2848 xfrm_ctx->ctx_len);
2849 }
2850
2821 return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL); 2851 return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL);
2822} 2852}
2823 2853
2824static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, 2854static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt,
2825 u8 *data, int len, int *dir) 2855 u8 *data, int len, int *dir)
2826{ 2856{
2827 struct xfrm_policy *xp; 2857 struct xfrm_policy *xp;
2828 struct sadb_x_policy *pol = (struct sadb_x_policy*)data; 2858 struct sadb_x_policy *pol = (struct sadb_x_policy*)data;
2829 struct sadb_x_sec_ctx *sec_ctx; 2859 struct sadb_x_sec_ctx *sec_ctx;
2830 2860
2831 switch (family) { 2861 switch (sk->sk_family) {
2832 case AF_INET: 2862 case AF_INET:
2833 if (opt != IP_IPSEC_POLICY) { 2863 if (opt != IP_IPSEC_POLICY) {
2834 *dir = -EOPNOTSUPP; 2864 *dir = -EOPNOTSUPP;
@@ -2869,7 +2899,7 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
2869 xp->lft.hard_byte_limit = XFRM_INF; 2899 xp->lft.hard_byte_limit = XFRM_INF;
2870 xp->lft.soft_packet_limit = XFRM_INF; 2900 xp->lft.soft_packet_limit = XFRM_INF;
2871 xp->lft.hard_packet_limit = XFRM_INF; 2901 xp->lft.hard_packet_limit = XFRM_INF;
2872 xp->family = family; 2902 xp->family = sk->sk_family;
2873 2903
2874 xp->xfrm_nr = 0; 2904 xp->xfrm_nr = 0;
2875 if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC && 2905 if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC &&
@@ -2885,8 +2915,10 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
2885 p += pol->sadb_x_policy_len*8; 2915 p += pol->sadb_x_policy_len*8;
2886 sec_ctx = (struct sadb_x_sec_ctx *)p; 2916 sec_ctx = (struct sadb_x_sec_ctx *)p;
2887 if (len < pol->sadb_x_policy_len*8 + 2917 if (len < pol->sadb_x_policy_len*8 +
2888 sec_ctx->sadb_x_sec_len) 2918 sec_ctx->sadb_x_sec_len) {
2919 *dir = -EINVAL;
2889 goto out; 2920 goto out;
2921 }
2890 if ((*dir = verify_sec_ctx_len(p))) 2922 if ((*dir = verify_sec_ctx_len(p)))
2891 goto out; 2923 goto out;
2892 uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); 2924 uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
@@ -2896,6 +2928,11 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
2896 if (*dir) 2928 if (*dir)
2897 goto out; 2929 goto out;
2898 } 2930 }
2931 else {
2932 *dir = security_xfrm_sock_policy_alloc(xp, sk);
2933 if (*dir)
2934 goto out;
2935 }
2899 2936
2900 *dir = pol->sadb_x_policy_dir-1; 2937 *dir = pol->sadb_x_policy_dir-1;
2901 return xp; 2938 return xp;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index a9894ddfd72a..0a28d2c5c44f 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -148,6 +148,18 @@ config NETFILTER_XT_TARGET_CONNMARK
148 <file:Documentation/modules.txt>. The module will be called 148 <file:Documentation/modules.txt>. The module will be called
149 ipt_CONNMARK.o. If unsure, say `N'. 149 ipt_CONNMARK.o. If unsure, say `N'.
150 150
151config NETFILTER_XT_TARGET_DSCP
152 tristate '"DSCP" target support'
153 depends on NETFILTER_XTABLES
154 depends on IP_NF_MANGLE || IP6_NF_MANGLE
155 help
156 This option adds a `DSCP' target, which allows you to manipulate
157 the IPv4/IPv6 header DSCP field (differentiated services codepoint).
158
159 The DSCP field can have any value between 0x0 and 0x3f inclusive.
160
161 To compile it as a module, choose M here. If unsure, say N.
162
151config NETFILTER_XT_TARGET_MARK 163config NETFILTER_XT_TARGET_MARK
152 tristate '"MARK" target support' 164 tristate '"MARK" target support'
153 depends on NETFILTER_XTABLES 165 depends on NETFILTER_XTABLES
@@ -263,6 +275,17 @@ config NETFILTER_XT_MATCH_DCCP
263 If you want to compile it as a module, say M here and read 275 If you want to compile it as a module, say M here and read
264 <file:Documentation/modules.txt>. If unsure, say `N'. 276 <file:Documentation/modules.txt>. If unsure, say `N'.
265 277
278config NETFILTER_XT_MATCH_DSCP
279 tristate '"DSCP" match support'
280 depends on NETFILTER_XTABLES
281 help
282 This option adds a `DSCP' match, which allows you to match against
283 the IPv4/IPv6 header DSCP field (differentiated services codepoint).
284
285 The DSCP field can have any value between 0x0 and 0x3f inclusive.
286
287 To compile it as a module, choose M here. If unsure, say N.
288
266config NETFILTER_XT_MATCH_ESP 289config NETFILTER_XT_MATCH_ESP
267 tristate '"ESP" match support' 290 tristate '"ESP" match support'
268 depends on NETFILTER_XTABLES 291 depends on NETFILTER_XTABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 6fa4b7580458..a74be492fd0a 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
25# targets 25# targets
26obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o 26obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
27obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o 27obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
28obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
28obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o 29obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o
29obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o 30obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
30obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o 31obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o
@@ -37,6 +38,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
37obj-$(CONFIG_NETFILTER_XT_MATCH_CONNMARK) += xt_connmark.o 38obj-$(CONFIG_NETFILTER_XT_MATCH_CONNMARK) += xt_connmark.o
38obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o 39obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o
39obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o 40obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o
41obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
40obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o 42obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
41obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o 43obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
42obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o 44obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 5d29d5e23624..d80b935b3a92 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -182,7 +182,7 @@ next_hook:
182 ret = -EPERM; 182 ret = -EPERM;
183 } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { 183 } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
184 NFDEBUG("nf_hook: Verdict = QUEUE.\n"); 184 NFDEBUG("nf_hook: Verdict = QUEUE.\n");
185 if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn, 185 if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn,
186 verdict >> NF_VERDICT_BITS)) 186 verdict >> NF_VERDICT_BITS))
187 goto next_hook; 187 goto next_hook;
188 } 188 }
@@ -222,6 +222,28 @@ copy_skb:
222} 222}
223EXPORT_SYMBOL(skb_make_writable); 223EXPORT_SYMBOL(skb_make_writable);
224 224
225u_int16_t nf_csum_update(u_int32_t oldval, u_int32_t newval, u_int32_t csum)
226{
227 u_int32_t diff[] = { oldval, newval };
228
229 return csum_fold(csum_partial((char *)diff, sizeof(diff), ~csum));
230}
231EXPORT_SYMBOL(nf_csum_update);
232
233u_int16_t nf_proto_csum_update(struct sk_buff *skb,
234 u_int32_t oldval, u_int32_t newval,
235 u_int16_t csum, int pseudohdr)
236{
237 if (skb->ip_summed != CHECKSUM_PARTIAL) {
238 csum = nf_csum_update(oldval, newval, csum);
239 if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
240 skb->csum = nf_csum_update(oldval, newval, skb->csum);
241 } else if (pseudohdr)
242 csum = ~nf_csum_update(oldval, newval, ~csum);
243
244 return csum;
245}
246EXPORT_SYMBOL(nf_proto_csum_update);
225 247
226/* This does not belong here, but locally generated errors need it if connection 248/* This does not belong here, but locally generated errors need it if connection
227 tracking in use: without this, connection may not be in hash table, and hence 249 tracking in use: without this, connection may not be in hash table, and hence
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 8f2261965a68..093b3ddc513c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -57,7 +57,6 @@
57#include <net/netfilter/nf_conntrack_protocol.h> 57#include <net/netfilter/nf_conntrack_protocol.h>
58#include <net/netfilter/nf_conntrack_helper.h> 58#include <net/netfilter/nf_conntrack_helper.h>
59#include <net/netfilter/nf_conntrack_core.h> 59#include <net/netfilter/nf_conntrack_core.h>
60#include <linux/netfilter_ipv4/listhelp.h>
61 60
62#define NF_CONNTRACK_VERSION "0.5.0" 61#define NF_CONNTRACK_VERSION "0.5.0"
63 62
@@ -74,17 +73,17 @@ atomic_t nf_conntrack_count = ATOMIC_INIT(0);
74 73
75void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL; 74void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL;
76LIST_HEAD(nf_conntrack_expect_list); 75LIST_HEAD(nf_conntrack_expect_list);
77struct nf_conntrack_protocol **nf_ct_protos[PF_MAX]; 76struct nf_conntrack_protocol **nf_ct_protos[PF_MAX] __read_mostly;
78struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX]; 77struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX] __read_mostly;
79static LIST_HEAD(helpers); 78static LIST_HEAD(helpers);
80unsigned int nf_conntrack_htable_size = 0; 79unsigned int nf_conntrack_htable_size __read_mostly = 0;
81int nf_conntrack_max; 80int nf_conntrack_max __read_mostly;
82struct list_head *nf_conntrack_hash; 81struct list_head *nf_conntrack_hash __read_mostly;
83static kmem_cache_t *nf_conntrack_expect_cachep; 82static kmem_cache_t *nf_conntrack_expect_cachep __read_mostly;
84struct nf_conn nf_conntrack_untracked; 83struct nf_conn nf_conntrack_untracked;
85unsigned int nf_ct_log_invalid; 84unsigned int nf_ct_log_invalid __read_mostly;
86static LIST_HEAD(unconfirmed); 85static LIST_HEAD(unconfirmed);
87static int nf_conntrack_vmalloc; 86static int nf_conntrack_vmalloc __read_mostly;
88 87
89static unsigned int nf_conntrack_next_id; 88static unsigned int nf_conntrack_next_id;
90static unsigned int nf_conntrack_expect_next_id; 89static unsigned int nf_conntrack_expect_next_id;
@@ -539,15 +538,10 @@ void nf_ct_remove_expectations(struct nf_conn *ct)
539static void 538static void
540clean_from_lists(struct nf_conn *ct) 539clean_from_lists(struct nf_conn *ct)
541{ 540{
542 unsigned int ho, hr;
543
544 DEBUGP("clean_from_lists(%p)\n", ct); 541 DEBUGP("clean_from_lists(%p)\n", ct);
545 ASSERT_WRITE_LOCK(&nf_conntrack_lock); 542 ASSERT_WRITE_LOCK(&nf_conntrack_lock);
546 543 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
547 ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 544 list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
548 hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
549 LIST_DELETE(&nf_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
550 LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
551 545
552 /* Destroy all pending expectations */ 546 /* Destroy all pending expectations */
553 nf_ct_remove_expectations(ct); 547 nf_ct_remove_expectations(ct);
@@ -617,16 +611,6 @@ static void death_by_timeout(unsigned long ul_conntrack)
617 nf_ct_put(ct); 611 nf_ct_put(ct);
618} 612}
619 613
620static inline int
621conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i,
622 const struct nf_conntrack_tuple *tuple,
623 const struct nf_conn *ignored_conntrack)
624{
625 ASSERT_READ_LOCK(&nf_conntrack_lock);
626 return nf_ct_tuplehash_to_ctrack(i) != ignored_conntrack
627 && nf_ct_tuple_equal(tuple, &i->tuple);
628}
629
630struct nf_conntrack_tuple_hash * 614struct nf_conntrack_tuple_hash *
631__nf_conntrack_find(const struct nf_conntrack_tuple *tuple, 615__nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
632 const struct nf_conn *ignored_conntrack) 616 const struct nf_conn *ignored_conntrack)
@@ -636,7 +620,8 @@ __nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
636 620
637 ASSERT_READ_LOCK(&nf_conntrack_lock); 621 ASSERT_READ_LOCK(&nf_conntrack_lock);
638 list_for_each_entry(h, &nf_conntrack_hash[hash], list) { 622 list_for_each_entry(h, &nf_conntrack_hash[hash], list) {
639 if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { 623 if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
624 nf_ct_tuple_equal(tuple, &h->tuple)) {
640 NF_CT_STAT_INC(found); 625 NF_CT_STAT_INC(found);
641 return h; 626 return h;
642 } 627 }
@@ -667,10 +652,10 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
667 unsigned int repl_hash) 652 unsigned int repl_hash)
668{ 653{
669 ct->id = ++nf_conntrack_next_id; 654 ct->id = ++nf_conntrack_next_id;
670 list_prepend(&nf_conntrack_hash[hash], 655 list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
671 &ct->tuplehash[IP_CT_DIR_ORIGINAL].list); 656 &nf_conntrack_hash[hash]);
672 list_prepend(&nf_conntrack_hash[repl_hash], 657 list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
673 &ct->tuplehash[IP_CT_DIR_REPLY].list); 658 &nf_conntrack_hash[repl_hash]);
674} 659}
675 660
676void nf_conntrack_hash_insert(struct nf_conn *ct) 661void nf_conntrack_hash_insert(struct nf_conn *ct)
@@ -690,7 +675,9 @@ int
690__nf_conntrack_confirm(struct sk_buff **pskb) 675__nf_conntrack_confirm(struct sk_buff **pskb)
691{ 676{
692 unsigned int hash, repl_hash; 677 unsigned int hash, repl_hash;
678 struct nf_conntrack_tuple_hash *h;
693 struct nf_conn *ct; 679 struct nf_conn *ct;
680 struct nf_conn_help *help;
694 enum ip_conntrack_info ctinfo; 681 enum ip_conntrack_info ctinfo;
695 682
696 ct = nf_ct_get(*pskb, &ctinfo); 683 ct = nf_ct_get(*pskb, &ctinfo);
@@ -720,41 +707,41 @@ __nf_conntrack_confirm(struct sk_buff **pskb)
720 /* See if there's one in the list already, including reverse: 707 /* See if there's one in the list already, including reverse:
721 NAT could have grabbed it without realizing, since we're 708 NAT could have grabbed it without realizing, since we're
722 not in the hash. If there is, we lost race. */ 709 not in the hash. If there is, we lost race. */
723 if (!LIST_FIND(&nf_conntrack_hash[hash], 710 list_for_each_entry(h, &nf_conntrack_hash[hash], list)
724 conntrack_tuple_cmp, 711 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
725 struct nf_conntrack_tuple_hash *, 712 &h->tuple))
726 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) 713 goto out;
727 && !LIST_FIND(&nf_conntrack_hash[repl_hash], 714 list_for_each_entry(h, &nf_conntrack_hash[repl_hash], list)
728 conntrack_tuple_cmp, 715 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
729 struct nf_conntrack_tuple_hash *, 716 &h->tuple))
730 &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { 717 goto out;
731 struct nf_conn_help *help;
732 /* Remove from unconfirmed list */
733 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
734 718
735 __nf_conntrack_hash_insert(ct, hash, repl_hash); 719 /* Remove from unconfirmed list */
736 /* Timer relative to confirmation time, not original 720 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
737 setting time, otherwise we'd get timer wrap in 721
738 weird delay cases. */ 722 __nf_conntrack_hash_insert(ct, hash, repl_hash);
739 ct->timeout.expires += jiffies; 723 /* Timer relative to confirmation time, not original
740 add_timer(&ct->timeout); 724 setting time, otherwise we'd get timer wrap in
741 atomic_inc(&ct->ct_general.use); 725 weird delay cases. */
742 set_bit(IPS_CONFIRMED_BIT, &ct->status); 726 ct->timeout.expires += jiffies;
743 NF_CT_STAT_INC(insert); 727 add_timer(&ct->timeout);
744 write_unlock_bh(&nf_conntrack_lock); 728 atomic_inc(&ct->ct_general.use);
745 help = nfct_help(ct); 729 set_bit(IPS_CONFIRMED_BIT, &ct->status);
746 if (help && help->helper) 730 NF_CT_STAT_INC(insert);
747 nf_conntrack_event_cache(IPCT_HELPER, *pskb); 731 write_unlock_bh(&nf_conntrack_lock);
732 help = nfct_help(ct);
733 if (help && help->helper)
734 nf_conntrack_event_cache(IPCT_HELPER, *pskb);
748#ifdef CONFIG_NF_NAT_NEEDED 735#ifdef CONFIG_NF_NAT_NEEDED
749 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || 736 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
750 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) 737 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
751 nf_conntrack_event_cache(IPCT_NATINFO, *pskb); 738 nf_conntrack_event_cache(IPCT_NATINFO, *pskb);
752#endif 739#endif
753 nf_conntrack_event_cache(master_ct(ct) ? 740 nf_conntrack_event_cache(master_ct(ct) ?
754 IPCT_RELATED : IPCT_NEW, *pskb); 741 IPCT_RELATED : IPCT_NEW, *pskb);
755 return NF_ACCEPT; 742 return NF_ACCEPT;
756 }
757 743
744out:
758 NF_CT_STAT_INC(insert_failed); 745 NF_CT_STAT_INC(insert_failed);
759 write_unlock_bh(&nf_conntrack_lock); 746 write_unlock_bh(&nf_conntrack_lock);
760 return NF_DROP; 747 return NF_DROP;
@@ -777,24 +764,21 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
777 764
778/* There's a small race here where we may free a just-assured 765/* There's a small race here where we may free a just-assured
779 connection. Too bad: we're in trouble anyway. */ 766 connection. Too bad: we're in trouble anyway. */
780static inline int unreplied(const struct nf_conntrack_tuple_hash *i)
781{
782 return !(test_bit(IPS_ASSURED_BIT,
783 &nf_ct_tuplehash_to_ctrack(i)->status));
784}
785
786static int early_drop(struct list_head *chain) 767static int early_drop(struct list_head *chain)
787{ 768{
788 /* Traverse backwards: gives us oldest, which is roughly LRU */ 769 /* Traverse backwards: gives us oldest, which is roughly LRU */
789 struct nf_conntrack_tuple_hash *h; 770 struct nf_conntrack_tuple_hash *h;
790 struct nf_conn *ct = NULL; 771 struct nf_conn *ct = NULL, *tmp;
791 int dropped = 0; 772 int dropped = 0;
792 773
793 read_lock_bh(&nf_conntrack_lock); 774 read_lock_bh(&nf_conntrack_lock);
794 h = LIST_FIND_B(chain, unreplied, struct nf_conntrack_tuple_hash *); 775 list_for_each_entry_reverse(h, chain, list) {
795 if (h) { 776 tmp = nf_ct_tuplehash_to_ctrack(h);
796 ct = nf_ct_tuplehash_to_ctrack(h); 777 if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) {
797 atomic_inc(&ct->ct_general.use); 778 ct = tmp;
779 atomic_inc(&ct->ct_general.use);
780 break;
781 }
798 } 782 }
799 read_unlock_bh(&nf_conntrack_lock); 783 read_unlock_bh(&nf_conntrack_lock);
800 784
@@ -810,18 +794,16 @@ static int early_drop(struct list_head *chain)
810 return dropped; 794 return dropped;
811} 795}
812 796
813static inline int helper_cmp(const struct nf_conntrack_helper *i,
814 const struct nf_conntrack_tuple *rtuple)
815{
816 return nf_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
817}
818
819static struct nf_conntrack_helper * 797static struct nf_conntrack_helper *
820__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) 798__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
821{ 799{
822 return LIST_FIND(&helpers, helper_cmp, 800 struct nf_conntrack_helper *h;
823 struct nf_conntrack_helper *, 801
824 tuple); 802 list_for_each_entry(h, &helpers, list) {
803 if (nf_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask))
804 return h;
805 }
806 return NULL;
825} 807}
826 808
827struct nf_conntrack_helper * 809struct nf_conntrack_helper *
@@ -866,11 +848,15 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
866 nf_conntrack_hash_rnd_initted = 1; 848 nf_conntrack_hash_rnd_initted = 1;
867 } 849 }
868 850
851 /* We don't want any race condition at early drop stage */
852 atomic_inc(&nf_conntrack_count);
853
869 if (nf_conntrack_max 854 if (nf_conntrack_max
870 && atomic_read(&nf_conntrack_count) >= nf_conntrack_max) { 855 && atomic_read(&nf_conntrack_count) > nf_conntrack_max) {
871 unsigned int hash = hash_conntrack(orig); 856 unsigned int hash = hash_conntrack(orig);
872 /* Try dropping from this hash chain. */ 857 /* Try dropping from this hash chain. */
873 if (!early_drop(&nf_conntrack_hash[hash])) { 858 if (!early_drop(&nf_conntrack_hash[hash])) {
859 atomic_dec(&nf_conntrack_count);
874 if (net_ratelimit()) 860 if (net_ratelimit())
875 printk(KERN_WARNING 861 printk(KERN_WARNING
876 "nf_conntrack: table full, dropping" 862 "nf_conntrack: table full, dropping"
@@ -921,10 +907,12 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
921 init_timer(&conntrack->timeout); 907 init_timer(&conntrack->timeout);
922 conntrack->timeout.data = (unsigned long)conntrack; 908 conntrack->timeout.data = (unsigned long)conntrack;
923 conntrack->timeout.function = death_by_timeout; 909 conntrack->timeout.function = death_by_timeout;
910 read_unlock_bh(&nf_ct_cache_lock);
924 911
925 atomic_inc(&nf_conntrack_count); 912 return conntrack;
926out: 913out:
927 read_unlock_bh(&nf_ct_cache_lock); 914 read_unlock_bh(&nf_ct_cache_lock);
915 atomic_dec(&nf_conntrack_count);
928 return conntrack; 916 return conntrack;
929} 917}
930 918
@@ -1323,7 +1311,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
1323 return ret; 1311 return ret;
1324 } 1312 }
1325 write_lock_bh(&nf_conntrack_lock); 1313 write_lock_bh(&nf_conntrack_lock);
1326 list_prepend(&helpers, me); 1314 list_add(&me->list, &helpers);
1327 write_unlock_bh(&nf_conntrack_lock); 1315 write_unlock_bh(&nf_conntrack_lock);
1328 1316
1329 return 0; 1317 return 0;
@@ -1342,8 +1330,8 @@ __nf_conntrack_helper_find_byname(const char *name)
1342 return NULL; 1330 return NULL;
1343} 1331}
1344 1332
1345static inline int unhelp(struct nf_conntrack_tuple_hash *i, 1333static inline void unhelp(struct nf_conntrack_tuple_hash *i,
1346 const struct nf_conntrack_helper *me) 1334 const struct nf_conntrack_helper *me)
1347{ 1335{
1348 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); 1336 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
1349 struct nf_conn_help *help = nfct_help(ct); 1337 struct nf_conn_help *help = nfct_help(ct);
@@ -1352,17 +1340,17 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i,
1352 nf_conntrack_event(IPCT_HELPER, ct); 1340 nf_conntrack_event(IPCT_HELPER, ct);
1353 help->helper = NULL; 1341 help->helper = NULL;
1354 } 1342 }
1355 return 0;
1356} 1343}
1357 1344
1358void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) 1345void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
1359{ 1346{
1360 unsigned int i; 1347 unsigned int i;
1348 struct nf_conntrack_tuple_hash *h;
1361 struct nf_conntrack_expect *exp, *tmp; 1349 struct nf_conntrack_expect *exp, *tmp;
1362 1350
1363 /* Need write lock here, to delete helper. */ 1351 /* Need write lock here, to delete helper. */
1364 write_lock_bh(&nf_conntrack_lock); 1352 write_lock_bh(&nf_conntrack_lock);
1365 LIST_DELETE(&helpers, me); 1353 list_del(&me->list);
1366 1354
1367 /* Get rid of expectations */ 1355 /* Get rid of expectations */
1368 list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) { 1356 list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) {
@@ -1374,10 +1362,12 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
1374 } 1362 }
1375 1363
1376 /* Get rid of expecteds, set helpers to NULL. */ 1364 /* Get rid of expecteds, set helpers to NULL. */
1377 LIST_FIND_W(&unconfirmed, unhelp, struct nf_conntrack_tuple_hash*, me); 1365 list_for_each_entry(h, &unconfirmed, list)
1378 for (i = 0; i < nf_conntrack_htable_size; i++) 1366 unhelp(h, me);
1379 LIST_FIND_W(&nf_conntrack_hash[i], unhelp, 1367 for (i = 0; i < nf_conntrack_htable_size; i++) {
1380 struct nf_conntrack_tuple_hash *, me); 1368 list_for_each_entry(h, &nf_conntrack_hash[i], list)
1369 unhelp(h, me);
1370 }
1381 write_unlock_bh(&nf_conntrack_lock); 1371 write_unlock_bh(&nf_conntrack_lock);
1382 1372
1383 /* Someone could be still looking at the helper in a bh. */ 1373 /* Someone could be still looking at the helper in a bh. */
@@ -1510,37 +1500,40 @@ do_iter(const struct nf_conntrack_tuple_hash *i,
1510} 1500}
1511 1501
1512/* Bring out ya dead! */ 1502/* Bring out ya dead! */
1513static struct nf_conntrack_tuple_hash * 1503static struct nf_conn *
1514get_next_corpse(int (*iter)(struct nf_conn *i, void *data), 1504get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
1515 void *data, unsigned int *bucket) 1505 void *data, unsigned int *bucket)
1516{ 1506{
1517 struct nf_conntrack_tuple_hash *h = NULL; 1507 struct nf_conntrack_tuple_hash *h;
1508 struct nf_conn *ct;
1518 1509
1519 write_lock_bh(&nf_conntrack_lock); 1510 write_lock_bh(&nf_conntrack_lock);
1520 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { 1511 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
1521 h = LIST_FIND_W(&nf_conntrack_hash[*bucket], do_iter, 1512 list_for_each_entry(h, &nf_conntrack_hash[*bucket], list) {
1522 struct nf_conntrack_tuple_hash *, iter, data); 1513 ct = nf_ct_tuplehash_to_ctrack(h);
1523 if (h) 1514 if (iter(ct, data))
1524 break; 1515 goto found;
1516 }
1525 } 1517 }
1526 if (!h) 1518 list_for_each_entry(h, &unconfirmed, list) {
1527 h = LIST_FIND_W(&unconfirmed, do_iter, 1519 ct = nf_ct_tuplehash_to_ctrack(h);
1528 struct nf_conntrack_tuple_hash *, iter, data); 1520 if (iter(ct, data))
1529 if (h) 1521 goto found;
1530 atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); 1522 }
1523 return NULL;
1524found:
1525 atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
1531 write_unlock_bh(&nf_conntrack_lock); 1526 write_unlock_bh(&nf_conntrack_lock);
1532 1527 return ct;
1533 return h;
1534} 1528}
1535 1529
1536void 1530void
1537nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data) 1531nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data)
1538{ 1532{
1539 struct nf_conntrack_tuple_hash *h; 1533 struct nf_conn *ct;
1540 unsigned int bucket = 0; 1534 unsigned int bucket = 0;
1541 1535
1542 while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { 1536 while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
1543 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
1544 /* Time to push up daises... */ 1537 /* Time to push up daises... */
1545 if (del_timer(&ct->timeout)) 1538 if (del_timer(&ct->timeout))
1546 death_by_timeout((unsigned long)ct); 1539 death_by_timeout((unsigned long)ct);
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 960972d225f9..0c17a5bd112b 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -21,6 +21,7 @@
21#include <linux/ip.h> 21#include <linux/ip.h>
22#include <linux/ipv6.h> 22#include <linux/ipv6.h>
23#include <linux/ctype.h> 23#include <linux/ctype.h>
24#include <linux/inet.h>
24#include <net/checksum.h> 25#include <net/checksum.h>
25#include <net/tcp.h> 26#include <net/tcp.h>
26 27
@@ -111,101 +112,14 @@ static struct ftp_search {
111 }, 112 },
112}; 113};
113 114
114/* This code is based on inet_pton() in glibc-2.2.4 */
115static int 115static int
116get_ipv6_addr(const char *src, size_t dlen, struct in6_addr *dst, u_int8_t term) 116get_ipv6_addr(const char *src, size_t dlen, struct in6_addr *dst, u_int8_t term)
117{ 117{
118 static const char xdigits[] = "0123456789abcdef"; 118 const char *end;
119 u_int8_t tmp[16], *tp, *endp, *colonp; 119 int ret = in6_pton(src, min_t(size_t, dlen, 0xffff), (u8 *)dst, term, &end);
120 int ch, saw_xdigit; 120 if (ret > 0)
121 u_int32_t val; 121 return (int)(end - src);
122 size_t clen = 0; 122 return 0;
123
124 tp = memset(tmp, '\0', sizeof(tmp));
125 endp = tp + sizeof(tmp);
126 colonp = NULL;
127
128 /* Leading :: requires some special handling. */
129 if (*src == ':'){
130 if (*++src != ':') {
131 DEBUGP("invalid \":\" at the head of addr\n");
132 return 0;
133 }
134 clen++;
135 }
136
137 saw_xdigit = 0;
138 val = 0;
139 while ((clen < dlen) && (*src != term)) {
140 const char *pch;
141
142 ch = tolower(*src++);
143 clen++;
144
145 pch = strchr(xdigits, ch);
146 if (pch != NULL) {
147 val <<= 4;
148 val |= (pch - xdigits);
149 if (val > 0xffff)
150 return 0;
151
152 saw_xdigit = 1;
153 continue;
154 }
155 if (ch != ':') {
156 DEBUGP("get_ipv6_addr: invalid char. \'%c\'\n", ch);
157 return 0;
158 }
159
160 if (!saw_xdigit) {
161 if (colonp) {
162 DEBUGP("invalid location of \"::\".\n");
163 return 0;
164 }
165 colonp = tp;
166 continue;
167 } else if (*src == term) {
168 DEBUGP("trancated IPv6 addr\n");
169 return 0;
170 }
171
172 if (tp + 2 > endp)
173 return 0;
174 *tp++ = (u_int8_t) (val >> 8) & 0xff;
175 *tp++ = (u_int8_t) val & 0xff;
176
177 saw_xdigit = 0;
178 val = 0;
179 continue;
180 }
181 if (saw_xdigit) {
182 if (tp + 2 > endp)
183 return 0;
184 *tp++ = (u_int8_t) (val >> 8) & 0xff;
185 *tp++ = (u_int8_t) val & 0xff;
186 }
187 if (colonp != NULL) {
188 /*
189 * Since some memmove()'s erroneously fail to handle
190 * overlapping regions, we'll do the shift by hand.
191 */
192 const int n = tp - colonp;
193 int i;
194
195 if (tp == endp)
196 return 0;
197
198 for (i = 1; i <= n; i++) {
199 endp[- i] = colonp[n - i];
200 colonp[n - i] = 0;
201 }
202 tp = endp;
203 }
204 if (tp != endp || (*src != term))
205 return 0;
206
207 memcpy(dst->s6_addr, tmp, sizeof(dst->s6_addr));
208 return clen;
209} 123}
210 124
211static int try_number(const char *data, size_t dlen, u_int32_t array[], 125static int try_number(const char *data, size_t dlen, u_int32_t array[],
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 6527d4e048d8..1721f7c78c77 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -339,11 +339,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
339 /* dump everything */ 339 /* dump everything */
340 events = ~0UL; 340 events = ~0UL;
341 group = NFNLGRP_CONNTRACK_NEW; 341 group = NFNLGRP_CONNTRACK_NEW;
342 } else if (events & (IPCT_STATUS | 342 } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) {
343 IPCT_PROTOINFO |
344 IPCT_HELPER |
345 IPCT_HELPINFO |
346 IPCT_NATINFO)) {
347 type = IPCTNL_MSG_CT_NEW; 343 type = IPCTNL_MSG_CT_NEW;
348 group = NFNLGRP_CONNTRACK_UPDATE; 344 group = NFNLGRP_CONNTRACK_UPDATE;
349 } else 345 } else
@@ -395,6 +391,10 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
395 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) 391 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
396 goto nfattr_failure; 392 goto nfattr_failure;
397 393
394 if (events & IPCT_MARK
395 && ctnetlink_dump_mark(skb, ct) < 0)
396 goto nfattr_failure;
397
398 nlh->nlmsg_len = skb->tail - b; 398 nlh->nlmsg_len = skb->tail - b;
399 nfnetlink_send(skb, 0, group, 0); 399 nfnetlink_send(skb, 0, group, 0);
400 return NOTIFY_DONE; 400 return NOTIFY_DONE;
@@ -455,6 +455,11 @@ restart:
455 cb->args[1] = (unsigned long)ct; 455 cb->args[1] = (unsigned long)ct;
456 goto out; 456 goto out;
457 } 457 }
458#ifdef CONFIG_NF_CT_ACCT
459 if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) ==
460 IPCTNL_MSG_CT_GET_CTRZERO)
461 memset(&ct->counters, 0, sizeof(ct->counters));
462#endif
458 } 463 }
459 if (cb->args[1]) { 464 if (cb->args[1]) {
460 cb->args[1] = 0; 465 cb->args[1] = 0;
@@ -470,50 +475,6 @@ out:
470 return skb->len; 475 return skb->len;
471} 476}
472 477
473#ifdef CONFIG_NF_CT_ACCT
474static int
475ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb)
476{
477 struct nf_conn *ct = NULL;
478 struct nf_conntrack_tuple_hash *h;
479 struct list_head *i;
480 u_int32_t *id = (u_int32_t *) &cb->args[1];
481 struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
482 u_int8_t l3proto = nfmsg->nfgen_family;
483
484 DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__,
485 cb->args[0], *id);
486
487 write_lock_bh(&nf_conntrack_lock);
488 for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++, *id = 0) {
489 list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) {
490 h = (struct nf_conntrack_tuple_hash *) i;
491 if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
492 continue;
493 ct = nf_ct_tuplehash_to_ctrack(h);
494 if (l3proto && L3PROTO(ct) != l3proto)
495 continue;
496 if (ct->id <= *id)
497 continue;
498 if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
499 cb->nlh->nlmsg_seq,
500 IPCTNL_MSG_CT_NEW,
501 1, ct) < 0)
502 goto out;
503 *id = ct->id;
504
505 memset(&ct->counters, 0, sizeof(ct->counters));
506 }
507 }
508out:
509 write_unlock_bh(&nf_conntrack_lock);
510
511 DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
512
513 return skb->len;
514}
515#endif
516
517static inline int 478static inline int
518ctnetlink_parse_tuple_ip(struct nfattr *attr, struct nf_conntrack_tuple *tuple) 479ctnetlink_parse_tuple_ip(struct nfattr *attr, struct nf_conntrack_tuple *tuple)
519{ 480{
@@ -788,22 +749,14 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
788 if (nlh->nlmsg_flags & NLM_F_DUMP) { 749 if (nlh->nlmsg_flags & NLM_F_DUMP) {
789 u32 rlen; 750 u32 rlen;
790 751
791 if (NFNL_MSG_TYPE(nlh->nlmsg_type) == 752#ifndef CONFIG_NF_CT_ACCT
792 IPCTNL_MSG_CT_GET_CTRZERO) { 753 if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO)
793#ifdef CONFIG_NF_CT_ACCT
794 if ((*errp = netlink_dump_start(ctnl, skb, nlh,
795 ctnetlink_dump_table_w,
796 ctnetlink_done)) != 0)
797 return -EINVAL;
798#else
799 return -ENOTSUPP; 754 return -ENOTSUPP;
800#endif 755#endif
801 } else { 756 if ((*errp = netlink_dump_start(ctnl, skb, nlh,
802 if ((*errp = netlink_dump_start(ctnl, skb, nlh, 757 ctnetlink_dump_table,
803 ctnetlink_dump_table, 758 ctnetlink_done)) != 0)
804 ctnetlink_done)) != 0)
805 return -EINVAL; 759 return -EINVAL;
806 }
807 760
808 rlen = NLMSG_ALIGN(nlh->nlmsg_len); 761 rlen = NLMSG_ALIGN(nlh->nlmsg_len);
809 if (rlen > skb->len) 762 if (rlen > skb->len)
@@ -1274,6 +1227,9 @@ static int ctnetlink_expect_event(struct notifier_block *this,
1274 } else 1227 } else
1275 return NOTIFY_DONE; 1228 return NOTIFY_DONE;
1276 1229
1230 if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
1231 return NOTIFY_DONE;
1232
1277 skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); 1233 skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
1278 if (!skb) 1234 if (!skb)
1279 return NOTIFY_DONE; 1235 return NOTIFY_DONE;
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 46bc27e2756d..26408bb0955b 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -17,7 +17,7 @@
17#include <linux/netfilter.h> 17#include <linux/netfilter.h>
18#include <net/netfilter/nf_conntrack_protocol.h> 18#include <net/netfilter/nf_conntrack_protocol.h>
19 19
20unsigned int nf_ct_generic_timeout = 600*HZ; 20unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ;
21 21
22static int generic_pkt_to_tuple(const struct sk_buff *skb, 22static int generic_pkt_to_tuple(const struct sk_buff *skb,
23 unsigned int dataoff, 23 unsigned int dataoff,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 9bd8a7877fd5..af568777372b 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -64,13 +64,13 @@ static const char *sctp_conntrack_names[] = {
64#define HOURS * 60 MINS 64#define HOURS * 60 MINS
65#define DAYS * 24 HOURS 65#define DAYS * 24 HOURS
66 66
67static unsigned int nf_ct_sctp_timeout_closed = 10 SECS; 67static unsigned int nf_ct_sctp_timeout_closed __read_mostly = 10 SECS;
68static unsigned int nf_ct_sctp_timeout_cookie_wait = 3 SECS; 68static unsigned int nf_ct_sctp_timeout_cookie_wait __read_mostly = 3 SECS;
69static unsigned int nf_ct_sctp_timeout_cookie_echoed = 3 SECS; 69static unsigned int nf_ct_sctp_timeout_cookie_echoed __read_mostly = 3 SECS;
70static unsigned int nf_ct_sctp_timeout_established = 5 DAYS; 70static unsigned int nf_ct_sctp_timeout_established __read_mostly = 5 DAYS;
71static unsigned int nf_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; 71static unsigned int nf_ct_sctp_timeout_shutdown_sent __read_mostly = 300 SECS / 1000;
72static unsigned int nf_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; 72static unsigned int nf_ct_sctp_timeout_shutdown_recd __read_mostly = 300 SECS / 1000;
73static unsigned int nf_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; 73static unsigned int nf_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS;
74 74
75static unsigned int * sctp_timeouts[] 75static unsigned int * sctp_timeouts[]
76= { NULL, /* SCTP_CONNTRACK_NONE */ 76= { NULL, /* SCTP_CONNTRACK_NONE */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index af8adcba23a7..238bbb5b72ef 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -57,19 +57,19 @@ static DEFINE_RWLOCK(tcp_lock);
57/* "Be conservative in what you do, 57/* "Be conservative in what you do,
58 be liberal in what you accept from others." 58 be liberal in what you accept from others."
59 If it's non-zero, we mark only out of window RST segments as INVALID. */ 59 If it's non-zero, we mark only out of window RST segments as INVALID. */
60int nf_ct_tcp_be_liberal = 0; 60int nf_ct_tcp_be_liberal __read_mostly = 0;
61 61
62/* When connection is picked up from the middle, how many packets are required 62/* When connection is picked up from the middle, how many packets are required
63 to pass in each direction when we assume we are in sync - if any side uses 63 to pass in each direction when we assume we are in sync - if any side uses
64 window scaling, we lost the game. 64 window scaling, we lost the game.
65 If it is set to zero, we disable picking up already established 65 If it is set to zero, we disable picking up already established
66 connections. */ 66 connections. */
67int nf_ct_tcp_loose = 3; 67int nf_ct_tcp_loose __read_mostly = 3;
68 68
69/* Max number of the retransmitted packets without receiving an (acceptable) 69/* Max number of the retransmitted packets without receiving an (acceptable)
70 ACK from the destination. If this number is reached, a shorter timer 70 ACK from the destination. If this number is reached, a shorter timer
71 will be started. */ 71 will be started. */
72int nf_ct_tcp_max_retrans = 3; 72int nf_ct_tcp_max_retrans __read_mostly = 3;
73 73
74 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more 74 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
75 closely. They're more complex. --RR */ 75 closely. They're more complex. --RR */
@@ -92,19 +92,19 @@ static const char *tcp_conntrack_names[] = {
92#define HOURS * 60 MINS 92#define HOURS * 60 MINS
93#define DAYS * 24 HOURS 93#define DAYS * 24 HOURS
94 94
95unsigned int nf_ct_tcp_timeout_syn_sent = 2 MINS; 95unsigned int nf_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS;
96unsigned int nf_ct_tcp_timeout_syn_recv = 60 SECS; 96unsigned int nf_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS;
97unsigned int nf_ct_tcp_timeout_established = 5 DAYS; 97unsigned int nf_ct_tcp_timeout_established __read_mostly = 5 DAYS;
98unsigned int nf_ct_tcp_timeout_fin_wait = 2 MINS; 98unsigned int nf_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS;
99unsigned int nf_ct_tcp_timeout_close_wait = 60 SECS; 99unsigned int nf_ct_tcp_timeout_close_wait __read_mostly = 60 SECS;
100unsigned int nf_ct_tcp_timeout_last_ack = 30 SECS; 100unsigned int nf_ct_tcp_timeout_last_ack __read_mostly = 30 SECS;
101unsigned int nf_ct_tcp_timeout_time_wait = 2 MINS; 101unsigned int nf_ct_tcp_timeout_time_wait __read_mostly = 2 MINS;
102unsigned int nf_ct_tcp_timeout_close = 10 SECS; 102unsigned int nf_ct_tcp_timeout_close __read_mostly = 10 SECS;
103 103
104/* RFC1122 says the R2 limit should be at least 100 seconds. 104/* RFC1122 says the R2 limit should be at least 100 seconds.
105 Linux uses 15 packets as limit, which corresponds 105 Linux uses 15 packets as limit, which corresponds
106 to ~13-30min depending on RTO. */ 106 to ~13-30min depending on RTO. */
107unsigned int nf_ct_tcp_timeout_max_retrans = 5 MINS; 107unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS;
108 108
109static unsigned int * tcp_timeouts[] 109static unsigned int * tcp_timeouts[]
110= { NULL, /* TCP_CONNTRACK_NONE */ 110= { NULL, /* TCP_CONNTRACK_NONE */
@@ -688,13 +688,15 @@ static int tcp_in_window(struct ip_ct_tcp *state,
688 if (state->last_dir == dir 688 if (state->last_dir == dir
689 && state->last_seq == seq 689 && state->last_seq == seq
690 && state->last_ack == ack 690 && state->last_ack == ack
691 && state->last_end == end) 691 && state->last_end == end
692 && state->last_win == win)
692 state->retrans++; 693 state->retrans++;
693 else { 694 else {
694 state->last_dir = dir; 695 state->last_dir = dir;
695 state->last_seq = seq; 696 state->last_seq = seq;
696 state->last_ack = ack; 697 state->last_ack = ack;
697 state->last_end = end; 698 state->last_end = end;
699 state->last_win = win;
698 state->retrans = 0; 700 state->retrans = 0;
699 } 701 }
700 } 702 }
@@ -823,8 +825,7 @@ static int tcp_error(struct sk_buff *skb,
823 825
824 /* Checksum invalid? Ignore. 826 /* Checksum invalid? Ignore.
825 * We skip checking packets on the outgoing path 827 * We skip checking packets on the outgoing path
826 * because the semantic of CHECKSUM_HW is different there 828 * because the checksum is assumed to be correct.
827 * and moreover root might send raw packets.
828 */ 829 */
829 /* FIXME: Source route IP option packets --RR */ 830 /* FIXME: Source route IP option packets --RR */
830 if (nf_conntrack_checksum && 831 if (nf_conntrack_checksum &&
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index ae07ebe3ab37..d28981cf9af5 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -27,8 +27,8 @@
27#include <linux/netfilter_ipv6.h> 27#include <linux/netfilter_ipv6.h>
28#include <net/netfilter/nf_conntrack_protocol.h> 28#include <net/netfilter/nf_conntrack_protocol.h>
29 29
30unsigned int nf_ct_udp_timeout = 30*HZ; 30unsigned int nf_ct_udp_timeout __read_mostly = 30*HZ;
31unsigned int nf_ct_udp_timeout_stream = 180*HZ; 31unsigned int nf_ct_udp_timeout_stream __read_mostly = 180*HZ;
32 32
33static int udp_pkt_to_tuple(const struct sk_buff *skb, 33static int udp_pkt_to_tuple(const struct sk_buff *skb,
34 unsigned int dataoff, 34 unsigned int dataoff,
@@ -131,8 +131,7 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff,
131 131
132 /* Checksum invalid? Ignore. 132 /* Checksum invalid? Ignore.
133 * We skip checking packets on the outgoing path 133 * We skip checking packets on the outgoing path
134 * because the semantic of CHECKSUM_HW is different there 134 * because the checksum is assumed to be correct.
135 * and moreover root might send raw packets.
136 * FIXME: Source route IP option packets --RR */ 135 * FIXME: Source route IP option packets --RR */
137 if (nf_conntrack_checksum && 136 if (nf_conntrack_checksum &&
138 ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || 137 ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) ||
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 4ef836699962..5954f6773810 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -37,7 +37,6 @@
37#include <net/netfilter/nf_conntrack_protocol.h> 37#include <net/netfilter/nf_conntrack_protocol.h>
38#include <net/netfilter/nf_conntrack_core.h> 38#include <net/netfilter/nf_conntrack_core.h>
39#include <net/netfilter/nf_conntrack_helper.h> 39#include <net/netfilter/nf_conntrack_helper.h>
40#include <linux/netfilter_ipv4/listhelp.h>
41 40
42#if 0 41#if 0
43#define DEBUGP printk 42#define DEBUGP printk
@@ -428,7 +427,7 @@ static struct file_operations ct_cpu_seq_fops = {
428 427
429/* Sysctl support */ 428/* Sysctl support */
430 429
431int nf_conntrack_checksum = 1; 430int nf_conntrack_checksum __read_mostly = 1;
432 431
433#ifdef CONFIG_SYSCTL 432#ifdef CONFIG_SYSCTL
434 433
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 86e392bfe833..a981971ce1d5 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -23,7 +23,7 @@ extern unsigned int nf_iterate(struct list_head *head,
23 int hook_thresh); 23 int hook_thresh);
24 24
25/* nf_queue.c */ 25/* nf_queue.c */
26extern int nf_queue(struct sk_buff **skb, 26extern int nf_queue(struct sk_buff *skb,
27 struct list_head *elem, 27 struct list_head *elem,
28 int pf, unsigned int hook, 28 int pf, unsigned int hook,
29 struct net_device *indev, 29 struct net_device *indev,
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 662a869593bf..4d8936ed581d 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -74,13 +74,13 @@ EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
74 * Any packet that leaves via this function must come back 74 * Any packet that leaves via this function must come back
75 * through nf_reinject(). 75 * through nf_reinject().
76 */ 76 */
77int nf_queue(struct sk_buff **skb, 77static int __nf_queue(struct sk_buff *skb,
78 struct list_head *elem, 78 struct list_head *elem,
79 int pf, unsigned int hook, 79 int pf, unsigned int hook,
80 struct net_device *indev, 80 struct net_device *indev,
81 struct net_device *outdev, 81 struct net_device *outdev,
82 int (*okfn)(struct sk_buff *), 82 int (*okfn)(struct sk_buff *),
83 unsigned int queuenum) 83 unsigned int queuenum)
84{ 84{
85 int status; 85 int status;
86 struct nf_info *info; 86 struct nf_info *info;
@@ -94,14 +94,14 @@ int nf_queue(struct sk_buff **skb,
94 read_lock(&queue_handler_lock); 94 read_lock(&queue_handler_lock);
95 if (!queue_handler[pf]) { 95 if (!queue_handler[pf]) {
96 read_unlock(&queue_handler_lock); 96 read_unlock(&queue_handler_lock);
97 kfree_skb(*skb); 97 kfree_skb(skb);
98 return 1; 98 return 1;
99 } 99 }
100 100
101 afinfo = nf_get_afinfo(pf); 101 afinfo = nf_get_afinfo(pf);
102 if (!afinfo) { 102 if (!afinfo) {
103 read_unlock(&queue_handler_lock); 103 read_unlock(&queue_handler_lock);
104 kfree_skb(*skb); 104 kfree_skb(skb);
105 return 1; 105 return 1;
106 } 106 }
107 107
@@ -109,9 +109,9 @@ int nf_queue(struct sk_buff **skb,
109 if (!info) { 109 if (!info) {
110 if (net_ratelimit()) 110 if (net_ratelimit())
111 printk(KERN_ERR "OOM queueing packet %p\n", 111 printk(KERN_ERR "OOM queueing packet %p\n",
112 *skb); 112 skb);
113 read_unlock(&queue_handler_lock); 113 read_unlock(&queue_handler_lock);
114 kfree_skb(*skb); 114 kfree_skb(skb);
115 return 1; 115 return 1;
116 } 116 }
117 117
@@ -130,15 +130,15 @@ int nf_queue(struct sk_buff **skb,
130 if (outdev) dev_hold(outdev); 130 if (outdev) dev_hold(outdev);
131 131
132#ifdef CONFIG_BRIDGE_NETFILTER 132#ifdef CONFIG_BRIDGE_NETFILTER
133 if ((*skb)->nf_bridge) { 133 if (skb->nf_bridge) {
134 physindev = (*skb)->nf_bridge->physindev; 134 physindev = skb->nf_bridge->physindev;
135 if (physindev) dev_hold(physindev); 135 if (physindev) dev_hold(physindev);
136 physoutdev = (*skb)->nf_bridge->physoutdev; 136 physoutdev = skb->nf_bridge->physoutdev;
137 if (physoutdev) dev_hold(physoutdev); 137 if (physoutdev) dev_hold(physoutdev);
138 } 138 }
139#endif 139#endif
140 afinfo->saveroute(*skb, info); 140 afinfo->saveroute(skb, info);
141 status = queue_handler[pf]->outfn(*skb, info, queuenum, 141 status = queue_handler[pf]->outfn(skb, info, queuenum,
142 queue_handler[pf]->data); 142 queue_handler[pf]->data);
143 143
144 read_unlock(&queue_handler_lock); 144 read_unlock(&queue_handler_lock);
@@ -153,7 +153,7 @@ int nf_queue(struct sk_buff **skb,
153#endif 153#endif
154 module_put(info->elem->owner); 154 module_put(info->elem->owner);
155 kfree(info); 155 kfree(info);
156 kfree_skb(*skb); 156 kfree_skb(skb);
157 157
158 return 1; 158 return 1;
159 } 159 }
@@ -161,6 +161,46 @@ int nf_queue(struct sk_buff **skb,
161 return 1; 161 return 1;
162} 162}
163 163
164int nf_queue(struct sk_buff *skb,
165 struct list_head *elem,
166 int pf, unsigned int hook,
167 struct net_device *indev,
168 struct net_device *outdev,
169 int (*okfn)(struct sk_buff *),
170 unsigned int queuenum)
171{
172 struct sk_buff *segs;
173
174 if (!skb_is_gso(skb))
175 return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
176 queuenum);
177
178 switch (pf) {
179 case AF_INET:
180 skb->protocol = htons(ETH_P_IP);
181 break;
182 case AF_INET6:
183 skb->protocol = htons(ETH_P_IPV6);
184 break;
185 }
186
187 segs = skb_gso_segment(skb, 0);
188 kfree_skb(skb);
189 if (unlikely(IS_ERR(segs)))
190 return 1;
191
192 do {
193 struct sk_buff *nskb = segs->next;
194
195 segs->next = NULL;
196 if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn,
197 queuenum))
198 kfree_skb(segs);
199 segs = nskb;
200 } while (segs);
201 return 1;
202}
203
164void nf_reinject(struct sk_buff *skb, struct nf_info *info, 204void nf_reinject(struct sk_buff *skb, struct nf_info *info,
165 unsigned int verdict) 205 unsigned int verdict)
166{ 206{
@@ -224,9 +264,9 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info,
224 case NF_STOLEN: 264 case NF_STOLEN:
225 break; 265 break;
226 case NF_QUEUE: 266 case NF_QUEUE:
227 if (!nf_queue(&skb, elem, info->pf, info->hook, 267 if (!__nf_queue(skb, elem, info->pf, info->hook,
228 info->indev, info->outdev, info->okfn, 268 info->indev, info->outdev, info->okfn,
229 verdict >> NF_VERDICT_BITS)) 269 verdict >> NF_VERDICT_BITS))
230 goto next_hook; 270 goto next_hook;
231 break; 271 break;
232 default: 272 default:
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 49ef41e34c48..8eb2473d83e1 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -377,9 +377,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
377 break; 377 break;
378 378
379 case NFQNL_COPY_PACKET: 379 case NFQNL_COPY_PACKET:
380 if (entskb->ip_summed == CHECKSUM_HW && 380 if ((entskb->ip_summed == CHECKSUM_PARTIAL ||
381 (*errp = skb_checksum_help(entskb, 381 entskb->ip_summed == CHECKSUM_COMPLETE) &&
382 outdev == NULL))) { 382 (*errp = skb_checksum_help(entskb))) {
383 spin_unlock_bh(&queue->lock); 383 spin_unlock_bh(&queue->lock);
384 return NULL; 384 return NULL;
385 } 385 }
@@ -584,7 +584,7 @@ nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info,
584 queue->queue_dropped++; 584 queue->queue_dropped++;
585 status = -ENOSPC; 585 status = -ENOSPC;
586 if (net_ratelimit()) 586 if (net_ratelimit())
587 printk(KERN_WARNING "ip_queue: full at %d entries, " 587 printk(KERN_WARNING "nf_queue: full at %d entries, "
588 "dropping packets(s). Dropped: %d\n", 588 "dropping packets(s). Dropped: %d\n",
589 queue->queue_total, queue->queue_dropped); 589 queue->queue_total, queue->queue_dropped);
590 goto err_out_free_nskb; 590 goto err_out_free_nskb;
@@ -635,7 +635,7 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e)
635 diff, 635 diff,
636 GFP_ATOMIC); 636 GFP_ATOMIC);
637 if (newskb == NULL) { 637 if (newskb == NULL) {
638 printk(KERN_WARNING "ip_queue: OOM " 638 printk(KERN_WARNING "nf_queue: OOM "
639 "in mangle, dropping packet\n"); 639 "in mangle, dropping packet\n");
640 return -ENOMEM; 640 return -ENOMEM;
641 } 641 }
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 174e8f970095..58522fc65d33 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -81,12 +81,42 @@ xt_unregister_target(struct xt_target *target)
81 int af = target->family; 81 int af = target->family;
82 82
83 mutex_lock(&xt[af].mutex); 83 mutex_lock(&xt[af].mutex);
84 LIST_DELETE(&xt[af].target, target); 84 list_del(&target->list);
85 mutex_unlock(&xt[af].mutex); 85 mutex_unlock(&xt[af].mutex);
86} 86}
87EXPORT_SYMBOL(xt_unregister_target); 87EXPORT_SYMBOL(xt_unregister_target);
88 88
89int 89int
90xt_register_targets(struct xt_target *target, unsigned int n)
91{
92 unsigned int i;
93 int err = 0;
94
95 for (i = 0; i < n; i++) {
96 err = xt_register_target(&target[i]);
97 if (err)
98 goto err;
99 }
100 return err;
101
102err:
103 if (i > 0)
104 xt_unregister_targets(target, i);
105 return err;
106}
107EXPORT_SYMBOL(xt_register_targets);
108
109void
110xt_unregister_targets(struct xt_target *target, unsigned int n)
111{
112 unsigned int i;
113
114 for (i = 0; i < n; i++)
115 xt_unregister_target(&target[i]);
116}
117EXPORT_SYMBOL(xt_unregister_targets);
118
119int
90xt_register_match(struct xt_match *match) 120xt_register_match(struct xt_match *match)
91{ 121{
92 int ret, af = match->family; 122 int ret, af = match->family;
@@ -108,11 +138,41 @@ xt_unregister_match(struct xt_match *match)
108 int af = match->family; 138 int af = match->family;
109 139
110 mutex_lock(&xt[af].mutex); 140 mutex_lock(&xt[af].mutex);
111 LIST_DELETE(&xt[af].match, match); 141 list_del(&match->list);
112 mutex_unlock(&xt[af].mutex); 142 mutex_unlock(&xt[af].mutex);
113} 143}
114EXPORT_SYMBOL(xt_unregister_match); 144EXPORT_SYMBOL(xt_unregister_match);
115 145
146int
147xt_register_matches(struct xt_match *match, unsigned int n)
148{
149 unsigned int i;
150 int err = 0;
151
152 for (i = 0; i < n; i++) {
153 err = xt_register_match(&match[i]);
154 if (err)
155 goto err;
156 }
157 return err;
158
159err:
160 if (i > 0)
161 xt_unregister_matches(match, i);
162 return err;
163}
164EXPORT_SYMBOL(xt_register_matches);
165
166void
167xt_unregister_matches(struct xt_match *match, unsigned int n)
168{
169 unsigned int i;
170
171 for (i = 0; i < n; i++)
172 xt_unregister_match(&match[i]);
173}
174EXPORT_SYMBOL(xt_unregister_matches);
175
116 176
117/* 177/*
118 * These are weird, but module loading must not be done with mutex 178 * These are weird, but module loading must not be done with mutex
@@ -273,52 +333,65 @@ int xt_check_match(const struct xt_match *match, unsigned short family,
273EXPORT_SYMBOL_GPL(xt_check_match); 333EXPORT_SYMBOL_GPL(xt_check_match);
274 334
275#ifdef CONFIG_COMPAT 335#ifdef CONFIG_COMPAT
276int xt_compat_match(void *match, void **dstptr, int *size, int convert) 336int xt_compat_match_offset(struct xt_match *match)
277{ 337{
278 struct xt_match *m; 338 u_int16_t csize = match->compatsize ? : match->matchsize;
279 struct compat_xt_entry_match *pcompat_m; 339 return XT_ALIGN(match->matchsize) - COMPAT_XT_ALIGN(csize);
280 struct xt_entry_match *pm; 340}
281 u_int16_t msize; 341EXPORT_SYMBOL_GPL(xt_compat_match_offset);
282 int off, ret;
283 342
284 ret = 0; 343void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
285 m = ((struct xt_entry_match *)match)->u.kernel.match; 344 int *size)
286 off = XT_ALIGN(m->matchsize) - COMPAT_XT_ALIGN(m->matchsize); 345{
287 switch (convert) { 346 struct xt_match *match = m->u.kernel.match;
288 case COMPAT_TO_USER: 347 struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m;
289 pm = (struct xt_entry_match *)match; 348 int pad, off = xt_compat_match_offset(match);
290 msize = pm->u.user.match_size; 349 u_int16_t msize = cm->u.user.match_size;
291 if (copy_to_user(*dstptr, pm, msize)) { 350
292 ret = -EFAULT; 351 m = *dstptr;
293 break; 352 memcpy(m, cm, sizeof(*cm));
294 } 353 if (match->compat_from_user)
295 msize -= off; 354 match->compat_from_user(m->data, cm->data);
296 if (put_user(msize, (u_int16_t *)*dstptr)) 355 else
297 ret = -EFAULT; 356 memcpy(m->data, cm->data, msize - sizeof(*cm));
298 *size -= off; 357 pad = XT_ALIGN(match->matchsize) - match->matchsize;
299 *dstptr += msize; 358 if (pad > 0)
300 break; 359 memset(m->data + match->matchsize, 0, pad);
301 case COMPAT_FROM_USER: 360
302 pcompat_m = (struct compat_xt_entry_match *)match; 361 msize += off;
303 pm = (struct xt_entry_match *)*dstptr; 362 m->u.user.match_size = msize;
304 msize = pcompat_m->u.user.match_size; 363
305 memcpy(pm, pcompat_m, msize); 364 *size += off;
306 msize += off; 365 *dstptr += msize;
307 pm->u.user.match_size = msize; 366}
308 *size += off; 367EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
309 *dstptr += msize; 368
310 break; 369int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr,
311 case COMPAT_CALC_SIZE: 370 int *size)
312 *size += off; 371{
313 break; 372 struct xt_match *match = m->u.kernel.match;
314 default: 373 struct compat_xt_entry_match __user *cm = *dstptr;
315 ret = -ENOPROTOOPT; 374 int off = xt_compat_match_offset(match);
316 break; 375 u_int16_t msize = m->u.user.match_size - off;
376
377 if (copy_to_user(cm, m, sizeof(*cm)) ||
378 put_user(msize, &cm->u.user.match_size))
379 return -EFAULT;
380
381 if (match->compat_to_user) {
382 if (match->compat_to_user((void __user *)cm->data, m->data))
383 return -EFAULT;
384 } else {
385 if (copy_to_user(cm->data, m->data, msize - sizeof(*cm)))
386 return -EFAULT;
317 } 387 }
318 return ret; 388
389 *size -= off;
390 *dstptr += msize;
391 return 0;
319} 392}
320EXPORT_SYMBOL_GPL(xt_compat_match); 393EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
321#endif 394#endif /* CONFIG_COMPAT */
322 395
323int xt_check_target(const struct xt_target *target, unsigned short family, 396int xt_check_target(const struct xt_target *target, unsigned short family,
324 unsigned int size, const char *table, unsigned int hook_mask, 397 unsigned int size, const char *table, unsigned int hook_mask,
@@ -350,51 +423,64 @@ int xt_check_target(const struct xt_target *target, unsigned short family,
350EXPORT_SYMBOL_GPL(xt_check_target); 423EXPORT_SYMBOL_GPL(xt_check_target);
351 424
352#ifdef CONFIG_COMPAT 425#ifdef CONFIG_COMPAT
353int xt_compat_target(void *target, void **dstptr, int *size, int convert) 426int xt_compat_target_offset(struct xt_target *target)
354{ 427{
355 struct xt_target *t; 428 u_int16_t csize = target->compatsize ? : target->targetsize;
356 struct compat_xt_entry_target *pcompat; 429 return XT_ALIGN(target->targetsize) - COMPAT_XT_ALIGN(csize);
357 struct xt_entry_target *pt; 430}
358 u_int16_t tsize; 431EXPORT_SYMBOL_GPL(xt_compat_target_offset);
359 int off, ret;
360 432
361 ret = 0; 433void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
362 t = ((struct xt_entry_target *)target)->u.kernel.target; 434 int *size)
363 off = XT_ALIGN(t->targetsize) - COMPAT_XT_ALIGN(t->targetsize); 435{
364 switch (convert) { 436 struct xt_target *target = t->u.kernel.target;
365 case COMPAT_TO_USER: 437 struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t;
366 pt = (struct xt_entry_target *)target; 438 int pad, off = xt_compat_target_offset(target);
367 tsize = pt->u.user.target_size; 439 u_int16_t tsize = ct->u.user.target_size;
368 if (copy_to_user(*dstptr, pt, tsize)) { 440
369 ret = -EFAULT; 441 t = *dstptr;
370 break; 442 memcpy(t, ct, sizeof(*ct));
371 } 443 if (target->compat_from_user)
372 tsize -= off; 444 target->compat_from_user(t->data, ct->data);
373 if (put_user(tsize, (u_int16_t *)*dstptr)) 445 else
374 ret = -EFAULT; 446 memcpy(t->data, ct->data, tsize - sizeof(*ct));
375 *size -= off; 447 pad = XT_ALIGN(target->targetsize) - target->targetsize;
376 *dstptr += tsize; 448 if (pad > 0)
377 break; 449 memset(t->data + target->targetsize, 0, pad);
378 case COMPAT_FROM_USER: 450
379 pcompat = (struct compat_xt_entry_target *)target; 451 tsize += off;
380 pt = (struct xt_entry_target *)*dstptr; 452 t->u.user.target_size = tsize;
381 tsize = pcompat->u.user.target_size; 453
382 memcpy(pt, pcompat, tsize); 454 *size += off;
383 tsize += off; 455 *dstptr += tsize;
384 pt->u.user.target_size = tsize; 456}
385 *size += off; 457EXPORT_SYMBOL_GPL(xt_compat_target_from_user);
386 *dstptr += tsize; 458
387 break; 459int xt_compat_target_to_user(struct xt_entry_target *t, void __user **dstptr,
388 case COMPAT_CALC_SIZE: 460 int *size)
389 *size += off; 461{
390 break; 462 struct xt_target *target = t->u.kernel.target;
391 default: 463 struct compat_xt_entry_target __user *ct = *dstptr;
392 ret = -ENOPROTOOPT; 464 int off = xt_compat_target_offset(target);
393 break; 465 u_int16_t tsize = t->u.user.target_size - off;
466
467 if (copy_to_user(ct, t, sizeof(*ct)) ||
468 put_user(tsize, &ct->u.user.target_size))
469 return -EFAULT;
470
471 if (target->compat_to_user) {
472 if (target->compat_to_user((void __user *)ct->data, t->data))
473 return -EFAULT;
474 } else {
475 if (copy_to_user(ct->data, t->data, tsize - sizeof(*ct)))
476 return -EFAULT;
394 } 477 }
395 return ret; 478
479 *size -= off;
480 *dstptr += tsize;
481 return 0;
396} 482}
397EXPORT_SYMBOL_GPL(xt_compat_target); 483EXPORT_SYMBOL_GPL(xt_compat_target_to_user);
398#endif 484#endif
399 485
400struct xt_table_info *xt_alloc_table_info(unsigned int size) 486struct xt_table_info *xt_alloc_table_info(unsigned int size)
@@ -515,15 +601,18 @@ int xt_register_table(struct xt_table *table,
515{ 601{
516 int ret; 602 int ret;
517 struct xt_table_info *private; 603 struct xt_table_info *private;
604 struct xt_table *t;
518 605
519 ret = mutex_lock_interruptible(&xt[table->af].mutex); 606 ret = mutex_lock_interruptible(&xt[table->af].mutex);
520 if (ret != 0) 607 if (ret != 0)
521 return ret; 608 return ret;
522 609
523 /* Don't autoload: we'd eat our tail... */ 610 /* Don't autoload: we'd eat our tail... */
524 if (list_named_find(&xt[table->af].tables, table->name)) { 611 list_for_each_entry(t, &xt[table->af].tables, list) {
525 ret = -EEXIST; 612 if (strcmp(t->name, table->name) == 0) {
526 goto unlock; 613 ret = -EEXIST;
614 goto unlock;
615 }
527 } 616 }
528 617
529 /* Simplifies replace_table code. */ 618 /* Simplifies replace_table code. */
@@ -538,7 +627,7 @@ int xt_register_table(struct xt_table *table,
538 /* save number of initial entries */ 627 /* save number of initial entries */
539 private->initial_entries = private->number; 628 private->initial_entries = private->number;
540 629
541 list_prepend(&xt[table->af].tables, table); 630 list_add(&table->list, &xt[table->af].tables);
542 631
543 ret = 0; 632 ret = 0;
544 unlock: 633 unlock:
@@ -553,7 +642,7 @@ void *xt_unregister_table(struct xt_table *table)
553 642
554 mutex_lock(&xt[table->af].mutex); 643 mutex_lock(&xt[table->af].mutex);
555 private = table->private; 644 private = table->private;
556 LIST_DELETE(&xt[table->af].tables, table); 645 list_del(&table->list);
557 mutex_unlock(&xt[table->af].mutex); 646 mutex_unlock(&xt[table->af].mutex);
558 647
559 return private; 648 return private;
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index e54e57730012..50de965bb104 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -29,8 +29,7 @@ target(struct sk_buff **pskb,
29 const struct net_device *out, 29 const struct net_device *out,
30 unsigned int hooknum, 30 unsigned int hooknum,
31 const struct xt_target *target, 31 const struct xt_target *target,
32 const void *targinfo, 32 const void *targinfo)
33 void *userinfo)
34{ 33{
35 const struct xt_classify_target_info *clinfo = targinfo; 34 const struct xt_classify_target_info *clinfo = targinfo;
36 35
@@ -40,47 +39,41 @@ target(struct sk_buff **pskb,
40 return XT_CONTINUE; 39 return XT_CONTINUE;
41} 40}
42 41
43static struct xt_target classify_reg = { 42static struct xt_target xt_classify_target[] = {
44 .name = "CLASSIFY", 43 {
45 .target = target, 44 .family = AF_INET,
46 .targetsize = sizeof(struct xt_classify_target_info), 45 .name = "CLASSIFY",
47 .table = "mangle", 46 .target = target,
48 .hooks = (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) | 47 .targetsize = sizeof(struct xt_classify_target_info),
49 (1 << NF_IP_POST_ROUTING), 48 .table = "mangle",
50 .family = AF_INET, 49 .hooks = (1 << NF_IP_LOCAL_OUT) |
51 .me = THIS_MODULE, 50 (1 << NF_IP_FORWARD) |
51 (1 << NF_IP_POST_ROUTING),
52 .me = THIS_MODULE,
53 },
54 {
55 .name = "CLASSIFY",
56 .family = AF_INET6,
57 .target = target,
58 .targetsize = sizeof(struct xt_classify_target_info),
59 .table = "mangle",
60 .hooks = (1 << NF_IP_LOCAL_OUT) |
61 (1 << NF_IP_FORWARD) |
62 (1 << NF_IP_POST_ROUTING),
63 .me = THIS_MODULE,
64 },
52}; 65};
53static struct xt_target classify6_reg = {
54 .name = "CLASSIFY",
55 .target = target,
56 .targetsize = sizeof(struct xt_classify_target_info),
57 .table = "mangle",
58 .hooks = (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) |
59 (1 << NF_IP_POST_ROUTING),
60 .family = AF_INET6,
61 .me = THIS_MODULE,
62};
63
64 66
65static int __init xt_classify_init(void) 67static int __init xt_classify_init(void)
66{ 68{
67 int ret; 69 return xt_register_targets(xt_classify_target,
68 70 ARRAY_SIZE(xt_classify_target));
69 ret = xt_register_target(&classify_reg);
70 if (ret)
71 return ret;
72
73 ret = xt_register_target(&classify6_reg);
74 if (ret)
75 xt_unregister_target(&classify_reg);
76
77 return ret;
78} 71}
79 72
80static void __exit xt_classify_fini(void) 73static void __exit xt_classify_fini(void)
81{ 74{
82 xt_unregister_target(&classify_reg); 75 xt_unregister_targets(xt_classify_target,
83 xt_unregister_target(&classify6_reg); 76 ARRAY_SIZE(xt_classify_target));
84} 77}
85 78
86module_init(xt_classify_init); 79module_init(xt_classify_init);
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 60c375d36f01..c01524f817f0 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -38,8 +38,7 @@ target(struct sk_buff **pskb,
38 const struct net_device *out, 38 const struct net_device *out,
39 unsigned int hooknum, 39 unsigned int hooknum,
40 const struct xt_target *target, 40 const struct xt_target *target,
41 const void *targinfo, 41 const void *targinfo)
42 void *userinfo)
43{ 42{
44 const struct xt_connmark_target_info *markinfo = targinfo; 43 const struct xt_connmark_target_info *markinfo = targinfo;
45 u_int32_t diff; 44 u_int32_t diff;
@@ -49,24 +48,37 @@ target(struct sk_buff **pskb,
49 u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo); 48 u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo);
50 49
51 if (ctmark) { 50 if (ctmark) {
52 switch(markinfo->mode) { 51 switch(markinfo->mode) {
53 case XT_CONNMARK_SET: 52 case XT_CONNMARK_SET:
54 newmark = (*ctmark & ~markinfo->mask) | markinfo->mark; 53 newmark = (*ctmark & ~markinfo->mask) | markinfo->mark;
55 if (newmark != *ctmark) 54 if (newmark != *ctmark) {
56 *ctmark = newmark; 55 *ctmark = newmark;
57 break; 56#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
58 case XT_CONNMARK_SAVE: 57 ip_conntrack_event_cache(IPCT_MARK, *pskb);
59 newmark = (*ctmark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask); 58#else
60 if (*ctmark != newmark) 59 nf_conntrack_event_cache(IPCT_MARK, *pskb);
61 *ctmark = newmark; 60#endif
62 break; 61 }
63 case XT_CONNMARK_RESTORE: 62 break;
64 nfmark = (*pskb)->nfmark; 63 case XT_CONNMARK_SAVE:
65 diff = (*ctmark ^ nfmark) & markinfo->mask; 64 newmark = (*ctmark & ~markinfo->mask) |
66 if (diff != 0) 65 ((*pskb)->nfmark & markinfo->mask);
67 (*pskb)->nfmark = nfmark ^ diff; 66 if (*ctmark != newmark) {
68 break; 67 *ctmark = newmark;
69 } 68#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
69 ip_conntrack_event_cache(IPCT_MARK, *pskb);
70#else
71 nf_conntrack_event_cache(IPCT_MARK, *pskb);
72#endif
73 }
74 break;
75 case XT_CONNMARK_RESTORE:
76 nfmark = (*pskb)->nfmark;
77 diff = (*ctmark ^ nfmark) & markinfo->mask;
78 if (diff != 0)
79 (*pskb)->nfmark = nfmark ^ diff;
80 break;
81 }
70 } 82 }
71 83
72 return XT_CONTINUE; 84 return XT_CONTINUE;
@@ -77,65 +89,91 @@ checkentry(const char *tablename,
77 const void *entry, 89 const void *entry,
78 const struct xt_target *target, 90 const struct xt_target *target,
79 void *targinfo, 91 void *targinfo,
80 unsigned int targinfosize,
81 unsigned int hook_mask) 92 unsigned int hook_mask)
82{ 93{
83 struct xt_connmark_target_info *matchinfo = targinfo; 94 struct xt_connmark_target_info *matchinfo = targinfo;
84 95
85 if (matchinfo->mode == XT_CONNMARK_RESTORE) { 96 if (matchinfo->mode == XT_CONNMARK_RESTORE) {
86 if (strcmp(tablename, "mangle") != 0) { 97 if (strcmp(tablename, "mangle") != 0) {
87 printk(KERN_WARNING "CONNMARK: restore can only be called from \"mangle\" table, not \"%s\"\n", tablename); 98 printk(KERN_WARNING "CONNMARK: restore can only be "
88 return 0; 99 "called from \"mangle\" table, not \"%s\"\n",
89 } 100 tablename);
101 return 0;
102 }
90 } 103 }
91
92 if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) { 104 if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) {
93 printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n"); 105 printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n");
94 return 0; 106 return 0;
95 } 107 }
96
97 return 1; 108 return 1;
98} 109}
99 110
100static struct xt_target connmark_reg = { 111#ifdef CONFIG_COMPAT
101 .name = "CONNMARK", 112struct compat_xt_connmark_target_info {
102 .target = target, 113 compat_ulong_t mark, mask;
103 .targetsize = sizeof(struct xt_connmark_target_info), 114 u_int8_t mode;
104 .checkentry = checkentry, 115 u_int8_t __pad1;
105 .family = AF_INET, 116 u_int16_t __pad2;
106 .me = THIS_MODULE
107}; 117};
108 118
109static struct xt_target connmark6_reg = { 119static void compat_from_user(void *dst, void *src)
110 .name = "CONNMARK", 120{
111 .target = target, 121 struct compat_xt_connmark_target_info *cm = src;
112 .targetsize = sizeof(struct xt_connmark_target_info), 122 struct xt_connmark_target_info m = {
113 .checkentry = checkentry, 123 .mark = cm->mark,
114 .family = AF_INET6, 124 .mask = cm->mask,
115 .me = THIS_MODULE 125 .mode = cm->mode,
126 };
127 memcpy(dst, &m, sizeof(m));
128}
129
130static int compat_to_user(void __user *dst, void *src)
131{
132 struct xt_connmark_target_info *m = src;
133 struct compat_xt_connmark_target_info cm = {
134 .mark = m->mark,
135 .mask = m->mask,
136 .mode = m->mode,
137 };
138 return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
139}
140#endif /* CONFIG_COMPAT */
141
142static struct xt_target xt_connmark_target[] = {
143 {
144 .name = "CONNMARK",
145 .family = AF_INET,
146 .checkentry = checkentry,
147 .target = target,
148 .targetsize = sizeof(struct xt_connmark_target_info),
149#ifdef CONFIG_COMPAT
150 .compatsize = sizeof(struct compat_xt_connmark_target_info),
151 .compat_from_user = compat_from_user,
152 .compat_to_user = compat_to_user,
153#endif
154 .me = THIS_MODULE
155 },
156 {
157 .name = "CONNMARK",
158 .family = AF_INET6,
159 .checkentry = checkentry,
160 .target = target,
161 .targetsize = sizeof(struct xt_connmark_target_info),
162 .me = THIS_MODULE
163 },
116}; 164};
117 165
118static int __init xt_connmark_init(void) 166static int __init xt_connmark_init(void)
119{ 167{
120 int ret;
121
122 need_conntrack(); 168 need_conntrack();
123 169 return xt_register_targets(xt_connmark_target,
124 ret = xt_register_target(&connmark_reg); 170 ARRAY_SIZE(xt_connmark_target));
125 if (ret)
126 return ret;
127
128 ret = xt_register_target(&connmark6_reg);
129 if (ret)
130 xt_unregister_target(&connmark_reg);
131
132 return ret;
133} 171}
134 172
135static void __exit xt_connmark_fini(void) 173static void __exit xt_connmark_fini(void)
136{ 174{
137 xt_unregister_target(&connmark_reg); 175 xt_unregister_targets(xt_connmark_target,
138 xt_unregister_target(&connmark6_reg); 176 ARRAY_SIZE(xt_connmark_target));
139} 177}
140 178
141module_init(xt_connmark_init); 179module_init(xt_connmark_init);
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 8c011e020769..467386266674 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -66,7 +66,7 @@ static void secmark_restore(struct sk_buff *skb)
66static unsigned int target(struct sk_buff **pskb, const struct net_device *in, 66static unsigned int target(struct sk_buff **pskb, const struct net_device *in,
67 const struct net_device *out, unsigned int hooknum, 67 const struct net_device *out, unsigned int hooknum,
68 const struct xt_target *target, 68 const struct xt_target *target,
69 const void *targinfo, void *userinfo) 69 const void *targinfo)
70{ 70{
71 struct sk_buff *skb = *pskb; 71 struct sk_buff *skb = *pskb;
72 const struct xt_connsecmark_target_info *info = targinfo; 72 const struct xt_connsecmark_target_info *info = targinfo;
@@ -89,7 +89,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in,
89 89
90static int checkentry(const char *tablename, const void *entry, 90static int checkentry(const char *tablename, const void *entry,
91 const struct xt_target *target, void *targinfo, 91 const struct xt_target *target, void *targinfo,
92 unsigned int targinfosize, unsigned int hook_mask) 92 unsigned int hook_mask)
93{ 93{
94 struct xt_connsecmark_target_info *info = targinfo; 94 struct xt_connsecmark_target_info *info = targinfo;
95 95
@@ -106,49 +106,38 @@ static int checkentry(const char *tablename, const void *entry,
106 return 1; 106 return 1;
107} 107}
108 108
109static struct xt_target ipt_connsecmark_reg = { 109static struct xt_target xt_connsecmark_target[] = {
110 .name = "CONNSECMARK", 110 {
111 .target = target, 111 .name = "CONNSECMARK",
112 .targetsize = sizeof(struct xt_connsecmark_target_info), 112 .family = AF_INET,
113 .table = "mangle", 113 .checkentry = checkentry,
114 .checkentry = checkentry, 114 .target = target,
115 .me = THIS_MODULE, 115 .targetsize = sizeof(struct xt_connsecmark_target_info),
116 .family = AF_INET, 116 .table = "mangle",
117 .revision = 0, 117 .me = THIS_MODULE,
118}; 118 },
119 119 {
120static struct xt_target ip6t_connsecmark_reg = { 120 .name = "CONNSECMARK",
121 .name = "CONNSECMARK", 121 .family = AF_INET6,
122 .target = target, 122 .checkentry = checkentry,
123 .targetsize = sizeof(struct xt_connsecmark_target_info), 123 .target = target,
124 .table = "mangle", 124 .targetsize = sizeof(struct xt_connsecmark_target_info),
125 .checkentry = checkentry, 125 .table = "mangle",
126 .me = THIS_MODULE, 126 .me = THIS_MODULE,
127 .family = AF_INET6, 127 },
128 .revision = 0,
129}; 128};
130 129
131static int __init xt_connsecmark_init(void) 130static int __init xt_connsecmark_init(void)
132{ 131{
133 int err;
134
135 need_conntrack(); 132 need_conntrack();
136 133 return xt_register_targets(xt_connsecmark_target,
137 err = xt_register_target(&ipt_connsecmark_reg); 134 ARRAY_SIZE(xt_connsecmark_target));
138 if (err)
139 return err;
140
141 err = xt_register_target(&ip6t_connsecmark_reg);
142 if (err)
143 xt_unregister_target(&ipt_connsecmark_reg);
144
145 return err;
146} 135}
147 136
148static void __exit xt_connsecmark_fini(void) 137static void __exit xt_connsecmark_fini(void)
149{ 138{
150 xt_unregister_target(&ip6t_connsecmark_reg); 139 xt_unregister_targets(xt_connsecmark_target,
151 xt_unregister_target(&ipt_connsecmark_reg); 140 ARRAY_SIZE(xt_connsecmark_target));
152} 141}
153 142
154module_init(xt_connsecmark_init); 143module_init(xt_connsecmark_init);
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
new file mode 100644
index 000000000000..a7cc75aeb38d
--- /dev/null
+++ b/net/netfilter/xt_DSCP.c
@@ -0,0 +1,118 @@
1/* x_tables module for setting the IPv4/IPv6 DSCP field, Version 1.8
2 *
3 * (C) 2002 by Harald Welte <laforge@netfilter.org>
4 * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * See RFC2474 for a description of the DSCP field within the IP Header.
11 *
12 * xt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp
13*/
14
15#include <linux/module.h>
16#include <linux/skbuff.h>
17#include <linux/ip.h>
18#include <linux/ipv6.h>
19#include <net/dsfield.h>
20
21#include <linux/netfilter/x_tables.h>
22#include <linux/netfilter/xt_DSCP.h>
23
24MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
25MODULE_DESCRIPTION("x_tables DSCP modification module");
26MODULE_LICENSE("GPL");
27MODULE_ALIAS("ipt_DSCP");
28MODULE_ALIAS("ip6t_DSCP");
29
30static unsigned int target(struct sk_buff **pskb,
31 const struct net_device *in,
32 const struct net_device *out,
33 unsigned int hooknum,
34 const struct xt_target *target,
35 const void *targinfo)
36{
37 const struct xt_DSCP_info *dinfo = targinfo;
38 u_int8_t dscp = ipv4_get_dsfield((*pskb)->nh.iph) >> XT_DSCP_SHIFT;
39
40 if (dscp != dinfo->dscp) {
41 if (!skb_make_writable(pskb, sizeof(struct iphdr)))
42 return NF_DROP;
43
44 ipv4_change_dsfield((*pskb)->nh.iph, (__u8)(~XT_DSCP_MASK),
45 dinfo->dscp << XT_DSCP_SHIFT);
46
47 }
48 return XT_CONTINUE;
49}
50
51static unsigned int target6(struct sk_buff **pskb,
52 const struct net_device *in,
53 const struct net_device *out,
54 unsigned int hooknum,
55 const struct xt_target *target,
56 const void *targinfo)
57{
58 const struct xt_DSCP_info *dinfo = targinfo;
59 u_int8_t dscp = ipv6_get_dsfield((*pskb)->nh.ipv6h) >> XT_DSCP_SHIFT;
60
61 if (dscp != dinfo->dscp) {
62 if (!skb_make_writable(pskb, sizeof(struct ipv6hdr)))
63 return NF_DROP;
64
65 ipv6_change_dsfield((*pskb)->nh.ipv6h, (__u8)(~XT_DSCP_MASK),
66 dinfo->dscp << XT_DSCP_SHIFT);
67 }
68 return XT_CONTINUE;
69}
70
71static int checkentry(const char *tablename,
72 const void *e_void,
73 const struct xt_target *target,
74 void *targinfo,
75 unsigned int hook_mask)
76{
77 const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp;
78
79 if ((dscp > XT_DSCP_MAX)) {
80 printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp);
81 return 0;
82 }
83 return 1;
84}
85
86static struct xt_target xt_dscp_target[] = {
87 {
88 .name = "DSCP",
89 .family = AF_INET,
90 .checkentry = checkentry,
91 .target = target,
92 .targetsize = sizeof(struct xt_DSCP_info),
93 .table = "mangle",
94 .me = THIS_MODULE,
95 },
96 {
97 .name = "DSCP",
98 .family = AF_INET6,
99 .checkentry = checkentry,
100 .target = target6,
101 .targetsize = sizeof(struct xt_DSCP_info),
102 .table = "mangle",
103 .me = THIS_MODULE,
104 },
105};
106
107static int __init xt_dscp_target_init(void)
108{
109 return xt_register_targets(xt_dscp_target, ARRAY_SIZE(xt_dscp_target));
110}
111
112static void __exit xt_dscp_target_fini(void)
113{
114 xt_unregister_targets(xt_dscp_target, ARRAY_SIZE(xt_dscp_target));
115}
116
117module_init(xt_dscp_target_init);
118module_exit(xt_dscp_target_fini);
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index ee9c34edc76c..c6e860a7114f 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -27,8 +27,7 @@ target_v0(struct sk_buff **pskb,
27 const struct net_device *out, 27 const struct net_device *out,
28 unsigned int hooknum, 28 unsigned int hooknum,
29 const struct xt_target *target, 29 const struct xt_target *target,
30 const void *targinfo, 30 const void *targinfo)
31 void *userinfo)
32{ 31{
33 const struct xt_mark_target_info *markinfo = targinfo; 32 const struct xt_mark_target_info *markinfo = targinfo;
34 33
@@ -44,8 +43,7 @@ target_v1(struct sk_buff **pskb,
44 const struct net_device *out, 43 const struct net_device *out,
45 unsigned int hooknum, 44 unsigned int hooknum,
46 const struct xt_target *target, 45 const struct xt_target *target,
47 const void *targinfo, 46 const void *targinfo)
48 void *userinfo)
49{ 47{
50 const struct xt_mark_target_info_v1 *markinfo = targinfo; 48 const struct xt_mark_target_info_v1 *markinfo = targinfo;
51 int mark = 0; 49 int mark = 0;
@@ -76,7 +74,6 @@ checkentry_v0(const char *tablename,
76 const void *entry, 74 const void *entry,
77 const struct xt_target *target, 75 const struct xt_target *target,
78 void *targinfo, 76 void *targinfo,
79 unsigned int targinfosize,
80 unsigned int hook_mask) 77 unsigned int hook_mask)
81{ 78{
82 struct xt_mark_target_info *markinfo = targinfo; 79 struct xt_mark_target_info *markinfo = targinfo;
@@ -93,7 +90,6 @@ checkentry_v1(const char *tablename,
93 const void *entry, 90 const void *entry,
94 const struct xt_target *target, 91 const struct xt_target *target,
95 void *targinfo, 92 void *targinfo,
96 unsigned int targinfosize,
97 unsigned int hook_mask) 93 unsigned int hook_mask)
98{ 94{
99 struct xt_mark_target_info_v1 *markinfo = targinfo; 95 struct xt_mark_target_info_v1 *markinfo = targinfo;
@@ -112,65 +108,81 @@ checkentry_v1(const char *tablename,
112 return 1; 108 return 1;
113} 109}
114 110
115static struct xt_target ipt_mark_reg_v0 = { 111#ifdef CONFIG_COMPAT
116 .name = "MARK", 112struct compat_xt_mark_target_info_v1 {
117 .target = target_v0, 113 compat_ulong_t mark;
118 .targetsize = sizeof(struct xt_mark_target_info), 114 u_int8_t mode;
119 .table = "mangle", 115 u_int8_t __pad1;
120 .checkentry = checkentry_v0, 116 u_int16_t __pad2;
121 .me = THIS_MODULE,
122 .family = AF_INET,
123 .revision = 0,
124}; 117};
125 118
126static struct xt_target ipt_mark_reg_v1 = { 119static void compat_from_user_v1(void *dst, void *src)
127 .name = "MARK", 120{
128 .target = target_v1, 121 struct compat_xt_mark_target_info_v1 *cm = src;
129 .targetsize = sizeof(struct xt_mark_target_info_v1), 122 struct xt_mark_target_info_v1 m = {
130 .table = "mangle", 123 .mark = cm->mark,
131 .checkentry = checkentry_v1, 124 .mode = cm->mode,
132 .me = THIS_MODULE, 125 };
133 .family = AF_INET, 126 memcpy(dst, &m, sizeof(m));
134 .revision = 1, 127}
135};
136 128
137static struct xt_target ip6t_mark_reg_v0 = { 129static int compat_to_user_v1(void __user *dst, void *src)
138 .name = "MARK", 130{
139 .target = target_v0, 131 struct xt_mark_target_info_v1 *m = src;
140 .targetsize = sizeof(struct xt_mark_target_info), 132 struct compat_xt_mark_target_info_v1 cm = {
141 .table = "mangle", 133 .mark = m->mark,
142 .checkentry = checkentry_v0, 134 .mode = m->mode,
143 .me = THIS_MODULE, 135 };
144 .family = AF_INET6, 136 return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
145 .revision = 0, 137}
138#endif /* CONFIG_COMPAT */
139
140static struct xt_target xt_mark_target[] = {
141 {
142 .name = "MARK",
143 .family = AF_INET,
144 .revision = 0,
145 .checkentry = checkentry_v0,
146 .target = target_v0,
147 .targetsize = sizeof(struct xt_mark_target_info),
148 .table = "mangle",
149 .me = THIS_MODULE,
150 },
151 {
152 .name = "MARK",
153 .family = AF_INET,
154 .revision = 1,
155 .checkentry = checkentry_v1,
156 .target = target_v1,
157 .targetsize = sizeof(struct xt_mark_target_info_v1),
158#ifdef CONFIG_COMPAT
159 .compatsize = sizeof(struct compat_xt_mark_target_info_v1),
160 .compat_from_user = compat_from_user_v1,
161 .compat_to_user = compat_to_user_v1,
162#endif
163 .table = "mangle",
164 .me = THIS_MODULE,
165 },
166 {
167 .name = "MARK",
168 .family = AF_INET6,
169 .revision = 0,
170 .checkentry = checkentry_v0,
171 .target = target_v0,
172 .targetsize = sizeof(struct xt_mark_target_info),
173 .table = "mangle",
174 .me = THIS_MODULE,
175 },
146}; 176};
147 177
148static int __init xt_mark_init(void) 178static int __init xt_mark_init(void)
149{ 179{
150 int err; 180 return xt_register_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target));
151
152 err = xt_register_target(&ipt_mark_reg_v0);
153 if (err)
154 return err;
155
156 err = xt_register_target(&ipt_mark_reg_v1);
157 if (err)
158 xt_unregister_target(&ipt_mark_reg_v0);
159
160 err = xt_register_target(&ip6t_mark_reg_v0);
161 if (err) {
162 xt_unregister_target(&ipt_mark_reg_v0);
163 xt_unregister_target(&ipt_mark_reg_v1);
164 }
165
166 return err;
167} 181}
168 182
169static void __exit xt_mark_fini(void) 183static void __exit xt_mark_fini(void)
170{ 184{
171 xt_unregister_target(&ipt_mark_reg_v0); 185 xt_unregister_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target));
172 xt_unregister_target(&ipt_mark_reg_v1);
173 xt_unregister_target(&ip6t_mark_reg_v0);
174} 186}
175 187
176module_init(xt_mark_init); 188module_init(xt_mark_init);
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 86ccceb61fdd..db9b896e57c8 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -29,65 +29,46 @@ target(struct sk_buff **pskb,
29 const struct net_device *out, 29 const struct net_device *out,
30 unsigned int hooknum, 30 unsigned int hooknum,
31 const struct xt_target *target, 31 const struct xt_target *target,
32 const void *targinfo, 32 const void *targinfo)
33 void *userinfo)
34{ 33{
35 const struct xt_NFQ_info *tinfo = targinfo; 34 const struct xt_NFQ_info *tinfo = targinfo;
36 35
37 return NF_QUEUE_NR(tinfo->queuenum); 36 return NF_QUEUE_NR(tinfo->queuenum);
38} 37}
39 38
40static struct xt_target ipt_NFQ_reg = { 39static struct xt_target xt_nfqueue_target[] = {
41 .name = "NFQUEUE", 40 {
42 .target = target, 41 .name = "NFQUEUE",
43 .targetsize = sizeof(struct xt_NFQ_info), 42 .family = AF_INET,
44 .family = AF_INET, 43 .target = target,
45 .me = THIS_MODULE, 44 .targetsize = sizeof(struct xt_NFQ_info),
46}; 45 .me = THIS_MODULE,
47 46 },
48static struct xt_target ip6t_NFQ_reg = { 47 {
49 .name = "NFQUEUE", 48 .name = "NFQUEUE",
50 .target = target, 49 .family = AF_INET6,
51 .targetsize = sizeof(struct xt_NFQ_info), 50 .target = target,
52 .family = AF_INET6, 51 .targetsize = sizeof(struct xt_NFQ_info),
53 .me = THIS_MODULE, 52 .me = THIS_MODULE,
54}; 53 },
55 54 {
56static struct xt_target arpt_NFQ_reg = { 55 .name = "NFQUEUE",
57 .name = "NFQUEUE", 56 .family = NF_ARP,
58 .target = target, 57 .target = target,
59 .targetsize = sizeof(struct xt_NFQ_info), 58 .targetsize = sizeof(struct xt_NFQ_info),
60 .family = NF_ARP, 59 .me = THIS_MODULE,
61 .me = THIS_MODULE, 60 },
62}; 61};
63 62
64static int __init xt_nfqueue_init(void) 63static int __init xt_nfqueue_init(void)
65{ 64{
66 int ret; 65 return xt_register_targets(xt_nfqueue_target,
67 ret = xt_register_target(&ipt_NFQ_reg); 66 ARRAY_SIZE(xt_nfqueue_target));
68 if (ret)
69 return ret;
70 ret = xt_register_target(&ip6t_NFQ_reg);
71 if (ret)
72 goto out_ip;
73 ret = xt_register_target(&arpt_NFQ_reg);
74 if (ret)
75 goto out_ip6;
76
77 return ret;
78out_ip6:
79 xt_unregister_target(&ip6t_NFQ_reg);
80out_ip:
81 xt_unregister_target(&ipt_NFQ_reg);
82
83 return ret;
84} 67}
85 68
86static void __exit xt_nfqueue_fini(void) 69static void __exit xt_nfqueue_fini(void)
87{ 70{
88 xt_unregister_target(&arpt_NFQ_reg); 71 xt_register_targets(xt_nfqueue_target, ARRAY_SIZE(xt_nfqueue_target));
89 xt_unregister_target(&ip6t_NFQ_reg);
90 xt_unregister_target(&ipt_NFQ_reg);
91} 72}
92 73
93module_init(xt_nfqueue_init); 74module_init(xt_nfqueue_init);
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index 98f4b5363ce8..6d00dcaed238 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -16,8 +16,7 @@ target(struct sk_buff **pskb,
16 const struct net_device *out, 16 const struct net_device *out,
17 unsigned int hooknum, 17 unsigned int hooknum,
18 const struct xt_target *target, 18 const struct xt_target *target,
19 const void *targinfo, 19 const void *targinfo)
20 void *userinfo)
21{ 20{
22 /* Previously seen (loopback)? Ignore. */ 21 /* Previously seen (loopback)? Ignore. */
23 if ((*pskb)->nfct != NULL) 22 if ((*pskb)->nfct != NULL)
@@ -34,43 +33,32 @@ target(struct sk_buff **pskb,
34 return XT_CONTINUE; 33 return XT_CONTINUE;
35} 34}
36 35
37static struct xt_target notrack_reg = { 36static struct xt_target xt_notrack_target[] = {
38 .name = "NOTRACK", 37 {
39 .target = target, 38 .name = "NOTRACK",
40 .targetsize = 0, 39 .family = AF_INET,
41 .table = "raw", 40 .target = target,
42 .family = AF_INET, 41 .table = "raw",
43 .me = THIS_MODULE, 42 .me = THIS_MODULE,
44}; 43 },
45 44 {
46static struct xt_target notrack6_reg = { 45 .name = "NOTRACK",
47 .name = "NOTRACK", 46 .family = AF_INET6,
48 .target = target, 47 .target = target,
49 .targetsize = 0, 48 .table = "raw",
50 .table = "raw", 49 .me = THIS_MODULE,
51 .family = AF_INET6, 50 },
52 .me = THIS_MODULE,
53}; 51};
54 52
55static int __init xt_notrack_init(void) 53static int __init xt_notrack_init(void)
56{ 54{
57 int ret; 55 return xt_register_targets(xt_notrack_target,
58 56 ARRAY_SIZE(xt_notrack_target));
59 ret = xt_register_target(&notrack_reg);
60 if (ret)
61 return ret;
62
63 ret = xt_register_target(&notrack6_reg);
64 if (ret)
65 xt_unregister_target(&notrack_reg);
66
67 return ret;
68} 57}
69 58
70static void __exit xt_notrack_fini(void) 59static void __exit xt_notrack_fini(void)
71{ 60{
72 xt_unregister_target(&notrack6_reg); 61 xt_unregister_targets(xt_notrack_target, ARRAY_SIZE(xt_notrack_target));
73 xt_unregister_target(&notrack_reg);
74} 62}
75 63
76module_init(xt_notrack_init); 64module_init(xt_notrack_init);
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index de9537ad9a7c..add752196290 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -31,7 +31,7 @@ static u8 mode;
31static unsigned int target(struct sk_buff **pskb, const struct net_device *in, 31static unsigned int target(struct sk_buff **pskb, const struct net_device *in,
32 const struct net_device *out, unsigned int hooknum, 32 const struct net_device *out, unsigned int hooknum,
33 const struct xt_target *target, 33 const struct xt_target *target,
34 const void *targinfo, void *userinfo) 34 const void *targinfo)
35{ 35{
36 u32 secmark = 0; 36 u32 secmark = 0;
37 const struct xt_secmark_target_info *info = targinfo; 37 const struct xt_secmark_target_info *info = targinfo;
@@ -85,7 +85,7 @@ static int checkentry_selinux(struct xt_secmark_target_info *info)
85 85
86static int checkentry(const char *tablename, const void *entry, 86static int checkentry(const char *tablename, const void *entry,
87 const struct xt_target *target, void *targinfo, 87 const struct xt_target *target, void *targinfo,
88 unsigned int targinfosize, unsigned int hook_mask) 88 unsigned int hook_mask)
89{ 89{
90 struct xt_secmark_target_info *info = targinfo; 90 struct xt_secmark_target_info *info = targinfo;
91 91
@@ -111,47 +111,36 @@ static int checkentry(const char *tablename, const void *entry,
111 return 1; 111 return 1;
112} 112}
113 113
114static struct xt_target ipt_secmark_reg = { 114static struct xt_target xt_secmark_target[] = {
115 .name = "SECMARK", 115 {
116 .target = target, 116 .name = "SECMARK",
117 .targetsize = sizeof(struct xt_secmark_target_info), 117 .family = AF_INET,
118 .table = "mangle", 118 .checkentry = checkentry,
119 .checkentry = checkentry, 119 .target = target,
120 .me = THIS_MODULE, 120 .targetsize = sizeof(struct xt_secmark_target_info),
121 .family = AF_INET, 121 .table = "mangle",
122 .revision = 0, 122 .me = THIS_MODULE,
123}; 123 },
124 124 {
125static struct xt_target ip6t_secmark_reg = { 125 .name = "SECMARK",
126 .name = "SECMARK", 126 .family = AF_INET6,
127 .target = target, 127 .checkentry = checkentry,
128 .targetsize = sizeof(struct xt_secmark_target_info), 128 .target = target,
129 .table = "mangle", 129 .targetsize = sizeof(struct xt_secmark_target_info),
130 .checkentry = checkentry, 130 .table = "mangle",
131 .me = THIS_MODULE, 131 .me = THIS_MODULE,
132 .family = AF_INET6, 132 },
133 .revision = 0,
134}; 133};
135 134
136static int __init xt_secmark_init(void) 135static int __init xt_secmark_init(void)
137{ 136{
138 int err; 137 return xt_register_targets(xt_secmark_target,
139 138 ARRAY_SIZE(xt_secmark_target));
140 err = xt_register_target(&ipt_secmark_reg);
141 if (err)
142 return err;
143
144 err = xt_register_target(&ip6t_secmark_reg);
145 if (err)
146 xt_unregister_target(&ipt_secmark_reg);
147
148 return err;
149} 139}
150 140
151static void __exit xt_secmark_fini(void) 141static void __exit xt_secmark_fini(void)
152{ 142{
153 xt_unregister_target(&ip6t_secmark_reg); 143 xt_unregister_targets(xt_secmark_target, ARRAY_SIZE(xt_secmark_target));
154 xt_unregister_target(&ipt_secmark_reg);
155} 144}
156 145
157module_init(xt_secmark_init); 146module_init(xt_secmark_init);
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
index 197609cb06d7..7db492d65220 100644
--- a/net/netfilter/xt_comment.c
+++ b/net/netfilter/xt_comment.c
@@ -29,41 +29,32 @@ match(const struct sk_buff *skb,
29 return 1; 29 return 1;
30} 30}
31 31
32static struct xt_match comment_match = { 32static struct xt_match xt_comment_match[] = {
33 .name = "comment", 33 {
34 .match = match, 34 .name = "comment",
35 .matchsize = sizeof(struct xt_comment_info), 35 .family = AF_INET,
36 .family = AF_INET, 36 .match = match,
37 .me = THIS_MODULE 37 .matchsize = sizeof(struct xt_comment_info),
38}; 38 .me = THIS_MODULE
39 39 },
40static struct xt_match comment6_match = { 40 {
41 .name = "comment", 41 .name = "comment",
42 .match = match, 42 .family = AF_INET6,
43 .matchsize = sizeof(struct xt_comment_info), 43 .match = match,
44 .family = AF_INET6, 44 .matchsize = sizeof(struct xt_comment_info),
45 .me = THIS_MODULE 45 .me = THIS_MODULE
46 },
46}; 47};
47 48
48static int __init xt_comment_init(void) 49static int __init xt_comment_init(void)
49{ 50{
50 int ret; 51 return xt_register_matches(xt_comment_match,
51 52 ARRAY_SIZE(xt_comment_match));
52 ret = xt_register_match(&comment_match);
53 if (ret)
54 return ret;
55
56 ret = xt_register_match(&comment6_match);
57 if (ret)
58 xt_unregister_match(&comment_match);
59
60 return ret;
61} 53}
62 54
63static void __exit xt_comment_fini(void) 55static void __exit xt_comment_fini(void)
64{ 56{
65 xt_unregister_match(&comment_match); 57 xt_unregister_matches(xt_comment_match, ARRAY_SIZE(xt_comment_match));
66 xt_unregister_match(&comment6_match);
67} 58}
68 59
69module_init(xt_comment_init); 60module_init(xt_comment_init);
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 1396fe2d07c1..dcc497ea8183 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -125,7 +125,6 @@ static int check(const char *tablename,
125 const void *ip, 125 const void *ip,
126 const struct xt_match *match, 126 const struct xt_match *match,
127 void *matchinfo, 127 void *matchinfo,
128 unsigned int matchsize,
129 unsigned int hook_mask) 128 unsigned int hook_mask)
130{ 129{
131 const struct xt_connbytes_info *sinfo = matchinfo; 130 const struct xt_connbytes_info *sinfo = matchinfo;
@@ -143,40 +142,35 @@ static int check(const char *tablename,
143 return 1; 142 return 1;
144} 143}
145 144
146static struct xt_match connbytes_match = { 145static struct xt_match xt_connbytes_match[] = {
147 .name = "connbytes", 146 {
148 .match = match, 147 .name = "connbytes",
149 .checkentry = check, 148 .family = AF_INET,
150 .matchsize = sizeof(struct xt_connbytes_info), 149 .checkentry = check,
151 .family = AF_INET, 150 .match = match,
152 .me = THIS_MODULE 151 .matchsize = sizeof(struct xt_connbytes_info),
153}; 152 .me = THIS_MODULE
154static struct xt_match connbytes6_match = { 153 },
155 .name = "connbytes", 154 {
156 .match = match, 155 .name = "connbytes",
157 .checkentry = check, 156 .family = AF_INET6,
158 .matchsize = sizeof(struct xt_connbytes_info), 157 .checkentry = check,
159 .family = AF_INET6, 158 .match = match,
160 .me = THIS_MODULE 159 .matchsize = sizeof(struct xt_connbytes_info),
160 .me = THIS_MODULE
161 },
161}; 162};
162 163
163static int __init xt_connbytes_init(void) 164static int __init xt_connbytes_init(void)
164{ 165{
165 int ret; 166 return xt_register_matches(xt_connbytes_match,
166 ret = xt_register_match(&connbytes_match); 167 ARRAY_SIZE(xt_connbytes_match));
167 if (ret)
168 return ret;
169
170 ret = xt_register_match(&connbytes6_match);
171 if (ret)
172 xt_unregister_match(&connbytes_match);
173 return ret;
174} 168}
175 169
176static void __exit xt_connbytes_fini(void) 170static void __exit xt_connbytes_fini(void)
177{ 171{
178 xt_unregister_match(&connbytes_match); 172 xt_unregister_matches(xt_connbytes_match,
179 xt_unregister_match(&connbytes6_match); 173 ARRAY_SIZE(xt_connbytes_match));
180} 174}
181 175
182module_init(xt_connbytes_init); 176module_init(xt_connbytes_init);
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 56324c8aff0a..92a5726ef237 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -55,7 +55,6 @@ checkentry(const char *tablename,
55 const void *ip, 55 const void *ip,
56 const struct xt_match *match, 56 const struct xt_match *match,
57 void *matchinfo, 57 void *matchinfo,
58 unsigned int matchsize,
59 unsigned int hook_mask) 58 unsigned int hook_mask)
60{ 59{
61 struct xt_connmark_info *cm = matchinfo; 60 struct xt_connmark_info *cm = matchinfo;
@@ -75,53 +74,80 @@ checkentry(const char *tablename,
75} 74}
76 75
77static void 76static void
78destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) 77destroy(const struct xt_match *match, void *matchinfo)
79{ 78{
80#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 79#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
81 nf_ct_l3proto_module_put(match->family); 80 nf_ct_l3proto_module_put(match->family);
82#endif 81#endif
83} 82}
84 83
85static struct xt_match connmark_match = { 84#ifdef CONFIG_COMPAT
86 .name = "connmark", 85struct compat_xt_connmark_info {
87 .match = match, 86 compat_ulong_t mark, mask;
88 .matchsize = sizeof(struct xt_connmark_info), 87 u_int8_t invert;
89 .checkentry = checkentry, 88 u_int8_t __pad1;
90 .destroy = destroy, 89 u_int16_t __pad2;
91 .family = AF_INET,
92 .me = THIS_MODULE
93}; 90};
94 91
95static struct xt_match connmark6_match = { 92static void compat_from_user(void *dst, void *src)
96 .name = "connmark", 93{
97 .match = match, 94 struct compat_xt_connmark_info *cm = src;
98 .matchsize = sizeof(struct xt_connmark_info), 95 struct xt_connmark_info m = {
99 .checkentry = checkentry, 96 .mark = cm->mark,
100 .destroy = destroy, 97 .mask = cm->mask,
101 .family = AF_INET6, 98 .invert = cm->invert,
102 .me = THIS_MODULE 99 };
100 memcpy(dst, &m, sizeof(m));
101}
102
103static int compat_to_user(void __user *dst, void *src)
104{
105 struct xt_connmark_info *m = src;
106 struct compat_xt_connmark_info cm = {
107 .mark = m->mark,
108 .mask = m->mask,
109 .invert = m->invert,
110 };
111 return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
112}
113#endif /* CONFIG_COMPAT */
114
115static struct xt_match xt_connmark_match[] = {
116 {
117 .name = "connmark",
118 .family = AF_INET,
119 .checkentry = checkentry,
120 .match = match,
121 .destroy = destroy,
122 .matchsize = sizeof(struct xt_connmark_info),
123#ifdef CONFIG_COMPAT
124 .compatsize = sizeof(struct compat_xt_connmark_info),
125 .compat_from_user = compat_from_user,
126 .compat_to_user = compat_to_user,
127#endif
128 .me = THIS_MODULE
129 },
130 {
131 .name = "connmark",
132 .family = AF_INET6,
133 .checkentry = checkentry,
134 .match = match,
135 .destroy = destroy,
136 .matchsize = sizeof(struct xt_connmark_info),
137 .me = THIS_MODULE
138 },
103}; 139};
104 140
105static int __init xt_connmark_init(void) 141static int __init xt_connmark_init(void)
106{ 142{
107 int ret;
108
109 need_conntrack(); 143 need_conntrack();
110 144 return xt_register_matches(xt_connmark_match,
111 ret = xt_register_match(&connmark_match); 145 ARRAY_SIZE(xt_connmark_match));
112 if (ret)
113 return ret;
114
115 ret = xt_register_match(&connmark6_match);
116 if (ret)
117 xt_unregister_match(&connmark_match);
118 return ret;
119} 146}
120 147
121static void __exit xt_connmark_fini(void) 148static void __exit xt_connmark_fini(void)
122{ 149{
123 xt_unregister_match(&connmark6_match); 150 xt_register_matches(xt_connmark_match, ARRAY_SIZE(xt_connmark_match));
124 xt_unregister_match(&connmark_match);
125} 151}
126 152
127module_init(xt_connmark_init); 153module_init(xt_connmark_init);
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 145489a4c3f2..0ea501a2fda5 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -45,7 +45,7 @@ match(const struct sk_buff *skb,
45 45
46 ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo); 46 ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
47 47
48#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg)) 48#define FWINV(bool, invflg) ((bool) ^ !!(sinfo->invflags & invflg))
49 49
50 if (ct == &ip_conntrack_untracked) 50 if (ct == &ip_conntrack_untracked)
51 statebit = XT_CONNTRACK_STATE_UNTRACKED; 51 statebit = XT_CONNTRACK_STATE_UNTRACKED;
@@ -54,63 +54,72 @@ match(const struct sk_buff *skb,
54 else 54 else
55 statebit = XT_CONNTRACK_STATE_INVALID; 55 statebit = XT_CONNTRACK_STATE_INVALID;
56 56
57 if(sinfo->flags & XT_CONNTRACK_STATE) { 57 if (sinfo->flags & XT_CONNTRACK_STATE) {
58 if (ct) { 58 if (ct) {
59 if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip != 59 if (test_bit(IPS_SRC_NAT_BIT, &ct->status))
60 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip)
61 statebit |= XT_CONNTRACK_STATE_SNAT; 60 statebit |= XT_CONNTRACK_STATE_SNAT;
62 61 if (test_bit(IPS_DST_NAT_BIT, &ct->status))
63 if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip !=
64 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip)
65 statebit |= XT_CONNTRACK_STATE_DNAT; 62 statebit |= XT_CONNTRACK_STATE_DNAT;
66 } 63 }
67 64 if (FWINV((statebit & sinfo->statemask) == 0,
68 if (FWINV((statebit & sinfo->statemask) == 0, XT_CONNTRACK_STATE)) 65 XT_CONNTRACK_STATE))
69 return 0;
70 }
71
72 if(sinfo->flags & XT_CONNTRACK_PROTO) {
73 if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, XT_CONNTRACK_PROTO))
74 return 0;
75 }
76
77 if(sinfo->flags & XT_CONNTRACK_ORIGSRC) {
78 if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, XT_CONNTRACK_ORIGSRC))
79 return 0; 66 return 0;
80 } 67 }
81 68
82 if(sinfo->flags & XT_CONNTRACK_ORIGDST) { 69 if (ct == NULL) {
83 if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, XT_CONNTRACK_ORIGDST)) 70 if (sinfo->flags & ~XT_CONNTRACK_STATE)
84 return 0; 71 return 0;
72 return 1;
85 } 73 }
86 74
87 if(sinfo->flags & XT_CONNTRACK_REPLSRC) { 75 if (sinfo->flags & XT_CONNTRACK_PROTO &&
88 if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, XT_CONNTRACK_REPLSRC)) 76 FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum !=
89 return 0; 77 sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum,
90 } 78 XT_CONNTRACK_PROTO))
79 return 0;
80
81 if (sinfo->flags & XT_CONNTRACK_ORIGSRC &&
82 FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip &
83 sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
84 sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip,
85 XT_CONNTRACK_ORIGSRC))
86 return 0;
91 87
92 if(sinfo->flags & XT_CONNTRACK_REPLDST) { 88 if (sinfo->flags & XT_CONNTRACK_ORIGDST &&
93 if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, XT_CONNTRACK_REPLDST)) 89 FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip &
94 return 0; 90 sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
95 } 91 sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip,
92 XT_CONNTRACK_ORIGDST))
93 return 0;
96 94
97 if(sinfo->flags & XT_CONNTRACK_STATUS) { 95 if (sinfo->flags & XT_CONNTRACK_REPLSRC &&
98 if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, XT_CONNTRACK_STATUS)) 96 FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip &
99 return 0; 97 sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) !=
100 } 98 sinfo->tuple[IP_CT_DIR_REPLY].src.ip,
99 XT_CONNTRACK_REPLSRC))
100 return 0;
101 101
102 if(sinfo->flags & XT_CONNTRACK_EXPIRES) { 102 if (sinfo->flags & XT_CONNTRACK_REPLDST &&
103 unsigned long expires; 103 FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip &
104 sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) !=
105 sinfo->tuple[IP_CT_DIR_REPLY].dst.ip,
106 XT_CONNTRACK_REPLDST))
107 return 0;
104 108
105 if(!ct) 109 if (sinfo->flags & XT_CONNTRACK_STATUS &&
106 return 0; 110 FWINV((ct->status & sinfo->statusmask) == 0,
111 XT_CONNTRACK_STATUS))
112 return 0;
107 113
108 expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0; 114 if (sinfo->flags & XT_CONNTRACK_EXPIRES) {
115 unsigned long expires = timer_pending(&ct->timeout) ?
116 (ct->timeout.expires - jiffies)/HZ : 0;
109 117
110 if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), XT_CONNTRACK_EXPIRES)) 118 if (FWINV(!(expires >= sinfo->expires_min &&
119 expires <= sinfo->expires_max),
120 XT_CONNTRACK_EXPIRES))
111 return 0; 121 return 0;
112 } 122 }
113
114 return 1; 123 return 1;
115} 124}
116 125
@@ -141,63 +150,72 @@ match(const struct sk_buff *skb,
141 else 150 else
142 statebit = XT_CONNTRACK_STATE_INVALID; 151 statebit = XT_CONNTRACK_STATE_INVALID;
143 152
144 if(sinfo->flags & XT_CONNTRACK_STATE) { 153 if (sinfo->flags & XT_CONNTRACK_STATE) {
145 if (ct) { 154 if (ct) {
146 if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip != 155 if (test_bit(IPS_SRC_NAT_BIT, &ct->status))
147 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip)
148 statebit |= XT_CONNTRACK_STATE_SNAT; 156 statebit |= XT_CONNTRACK_STATE_SNAT;
149 157 if (test_bit(IPS_DST_NAT_BIT, &ct->status))
150 if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip !=
151 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip)
152 statebit |= XT_CONNTRACK_STATE_DNAT; 158 statebit |= XT_CONNTRACK_STATE_DNAT;
153 } 159 }
154 160 if (FWINV((statebit & sinfo->statemask) == 0,
155 if (FWINV((statebit & sinfo->statemask) == 0, XT_CONNTRACK_STATE)) 161 XT_CONNTRACK_STATE))
156 return 0;
157 }
158
159 if(sinfo->flags & XT_CONNTRACK_PROTO) {
160 if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, XT_CONNTRACK_PROTO))
161 return 0;
162 }
163
164 if(sinfo->flags & XT_CONNTRACK_ORIGSRC) {
165 if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, XT_CONNTRACK_ORIGSRC))
166 return 0; 162 return 0;
167 } 163 }
168 164
169 if(sinfo->flags & XT_CONNTRACK_ORIGDST) { 165 if (ct == NULL) {
170 if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, XT_CONNTRACK_ORIGDST)) 166 if (sinfo->flags & ~XT_CONNTRACK_STATE)
171 return 0; 167 return 0;
168 return 1;
172 } 169 }
173 170
174 if(sinfo->flags & XT_CONNTRACK_REPLSRC) { 171 if (sinfo->flags & XT_CONNTRACK_PROTO &&
175 if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, XT_CONNTRACK_REPLSRC)) 172 FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum !=
176 return 0; 173 sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum,
177 } 174 XT_CONNTRACK_PROTO))
175 return 0;
176
177 if (sinfo->flags & XT_CONNTRACK_ORIGSRC &&
178 FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip &
179 sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
180 sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip,
181 XT_CONNTRACK_ORIGSRC))
182 return 0;
178 183
179 if(sinfo->flags & XT_CONNTRACK_REPLDST) { 184 if (sinfo->flags & XT_CONNTRACK_ORIGDST &&
180 if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, XT_CONNTRACK_REPLDST)) 185 FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip &
181 return 0; 186 sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
182 } 187 sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip,
188 XT_CONNTRACK_ORIGDST))
189 return 0;
183 190
184 if(sinfo->flags & XT_CONNTRACK_STATUS) { 191 if (sinfo->flags & XT_CONNTRACK_REPLSRC &&
185 if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, XT_CONNTRACK_STATUS)) 192 FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip &
186 return 0; 193 sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) !=
187 } 194 sinfo->tuple[IP_CT_DIR_REPLY].src.ip,
195 XT_CONNTRACK_REPLSRC))
196 return 0;
188 197
189 if(sinfo->flags & XT_CONNTRACK_EXPIRES) { 198 if (sinfo->flags & XT_CONNTRACK_REPLDST &&
190 unsigned long expires; 199 FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip &
200 sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) !=
201 sinfo->tuple[IP_CT_DIR_REPLY].dst.ip,
202 XT_CONNTRACK_REPLDST))
203 return 0;
191 204
192 if(!ct) 205 if (sinfo->flags & XT_CONNTRACK_STATUS &&
193 return 0; 206 FWINV((ct->status & sinfo->statusmask) == 0,
207 XT_CONNTRACK_STATUS))
208 return 0;
194 209
195 expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0; 210 if(sinfo->flags & XT_CONNTRACK_EXPIRES) {
211 unsigned long expires = timer_pending(&ct->timeout) ?
212 (ct->timeout.expires - jiffies)/HZ : 0;
196 213
197 if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), XT_CONNTRACK_EXPIRES)) 214 if (FWINV(!(expires >= sinfo->expires_min &&
215 expires <= sinfo->expires_max),
216 XT_CONNTRACK_EXPIRES))
198 return 0; 217 return 0;
199 } 218 }
200
201 return 1; 219 return 1;
202} 220}
203 221
@@ -208,7 +226,6 @@ checkentry(const char *tablename,
208 const void *ip, 226 const void *ip,
209 const struct xt_match *match, 227 const struct xt_match *match,
210 void *matchinfo, 228 void *matchinfo,
211 unsigned int matchsize,
212 unsigned int hook_mask) 229 unsigned int hook_mask)
213{ 230{
214#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 231#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
@@ -221,8 +238,7 @@ checkentry(const char *tablename,
221 return 1; 238 return 1;
222} 239}
223 240
224static void 241static void destroy(const struct xt_match *match, void *matchinfo)
225destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize)
226{ 242{
227#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 243#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
228 nf_ct_l3proto_module_put(match->family); 244 nf_ct_l3proto_module_put(match->family);
@@ -241,11 +257,8 @@ static struct xt_match conntrack_match = {
241 257
242static int __init xt_conntrack_init(void) 258static int __init xt_conntrack_init(void)
243{ 259{
244 int ret;
245 need_conntrack(); 260 need_conntrack();
246 ret = xt_register_match(&conntrack_match); 261 return xt_register_match(&conntrack_match);
247
248 return ret;
249} 262}
250 263
251static void __exit xt_conntrack_fini(void) 264static void __exit xt_conntrack_fini(void)
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 2e2f825dad4c..3e6cf430e518 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -131,7 +131,6 @@ checkentry(const char *tablename,
131 const void *inf, 131 const void *inf,
132 const struct xt_match *match, 132 const struct xt_match *match,
133 void *matchinfo, 133 void *matchinfo,
134 unsigned int matchsize,
135 unsigned int hook_mask) 134 unsigned int hook_mask)
136{ 135{
137 const struct xt_dccp_info *info = matchinfo; 136 const struct xt_dccp_info *info = matchinfo;
@@ -141,27 +140,26 @@ checkentry(const char *tablename,
141 && !(info->invflags & ~info->flags); 140 && !(info->invflags & ~info->flags);
142} 141}
143 142
144static struct xt_match dccp_match = 143static struct xt_match xt_dccp_match[] = {
145{ 144 {
146 .name = "dccp", 145 .name = "dccp",
147 .match = match, 146 .family = AF_INET,
148 .matchsize = sizeof(struct xt_dccp_info), 147 .checkentry = checkentry,
149 .proto = IPPROTO_DCCP, 148 .match = match,
150 .checkentry = checkentry, 149 .matchsize = sizeof(struct xt_dccp_info),
151 .family = AF_INET, 150 .proto = IPPROTO_DCCP,
152 .me = THIS_MODULE, 151 .me = THIS_MODULE,
152 },
153 {
154 .name = "dccp",
155 .family = AF_INET6,
156 .checkentry = checkentry,
157 .match = match,
158 .matchsize = sizeof(struct xt_dccp_info),
159 .proto = IPPROTO_DCCP,
160 .me = THIS_MODULE,
161 },
153}; 162};
154static struct xt_match dccp6_match =
155{
156 .name = "dccp",
157 .match = match,
158 .matchsize = sizeof(struct xt_dccp_info),
159 .proto = IPPROTO_DCCP,
160 .checkentry = checkentry,
161 .family = AF_INET6,
162 .me = THIS_MODULE,
163};
164
165 163
166static int __init xt_dccp_init(void) 164static int __init xt_dccp_init(void)
167{ 165{
@@ -173,27 +171,19 @@ static int __init xt_dccp_init(void)
173 dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL); 171 dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL);
174 if (!dccp_optbuf) 172 if (!dccp_optbuf)
175 return -ENOMEM; 173 return -ENOMEM;
176 ret = xt_register_match(&dccp_match); 174 ret = xt_register_matches(xt_dccp_match, ARRAY_SIZE(xt_dccp_match));
177 if (ret) 175 if (ret)
178 goto out_kfree; 176 goto out_kfree;
179 ret = xt_register_match(&dccp6_match);
180 if (ret)
181 goto out_unreg;
182
183 return ret; 177 return ret;
184 178
185out_unreg:
186 xt_unregister_match(&dccp_match);
187out_kfree: 179out_kfree:
188 kfree(dccp_optbuf); 180 kfree(dccp_optbuf);
189
190 return ret; 181 return ret;
191} 182}
192 183
193static void __exit xt_dccp_fini(void) 184static void __exit xt_dccp_fini(void)
194{ 185{
195 xt_unregister_match(&dccp6_match); 186 xt_unregister_matches(xt_dccp_match, ARRAY_SIZE(xt_dccp_match));
196 xt_unregister_match(&dccp_match);
197 kfree(dccp_optbuf); 187 kfree(dccp_optbuf);
198} 188}
199 189
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
new file mode 100644
index 000000000000..26c7f4ad102a
--- /dev/null
+++ b/net/netfilter/xt_dscp.c
@@ -0,0 +1,103 @@
1/* IP tables module for matching the value of the IPv4/IPv6 DSCP field
2 *
3 * xt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp
4 *
5 * (C) 2002 by Harald Welte <laforge@netfilter.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/module.h>
13#include <linux/skbuff.h>
14#include <linux/ip.h>
15#include <linux/ipv6.h>
16#include <net/dsfield.h>
17
18#include <linux/netfilter/xt_dscp.h>
19#include <linux/netfilter/x_tables.h>
20
21MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
22MODULE_DESCRIPTION("x_tables DSCP matching module");
23MODULE_LICENSE("GPL");
24MODULE_ALIAS("ipt_dscp");
25MODULE_ALIAS("ip6t_dscp");
26
27static int match(const struct sk_buff *skb,
28 const struct net_device *in,
29 const struct net_device *out,
30 const struct xt_match *match,
31 const void *matchinfo,
32 int offset,
33 unsigned int protoff,
34 int *hotdrop)
35{
36 const struct xt_dscp_info *info = matchinfo;
37 u_int8_t dscp = ipv4_get_dsfield(skb->nh.iph) >> XT_DSCP_SHIFT;
38
39 return (dscp == info->dscp) ^ !!info->invert;
40}
41
42static int match6(const struct sk_buff *skb,
43 const struct net_device *in,
44 const struct net_device *out,
45 const struct xt_match *match,
46 const void *matchinfo,
47 int offset,
48 unsigned int protoff,
49 int *hotdrop)
50{
51 const struct xt_dscp_info *info = matchinfo;
52 u_int8_t dscp = ipv6_get_dsfield(skb->nh.ipv6h) >> XT_DSCP_SHIFT;
53
54 return (dscp == info->dscp) ^ !!info->invert;
55}
56
57static int checkentry(const char *tablename,
58 const void *info,
59 const struct xt_match *match,
60 void *matchinfo,
61 unsigned int hook_mask)
62{
63 const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp;
64
65 if (dscp > XT_DSCP_MAX) {
66 printk(KERN_ERR "xt_dscp: dscp %x out of range\n", dscp);
67 return 0;
68 }
69
70 return 1;
71}
72
73static struct xt_match xt_dscp_match[] = {
74 {
75 .name = "dscp",
76 .family = AF_INET,
77 .checkentry = checkentry,
78 .match = match,
79 .matchsize = sizeof(struct xt_dscp_info),
80 .me = THIS_MODULE,
81 },
82 {
83 .name = "dscp",
84 .family = AF_INET6,
85 .checkentry = checkentry,
86 .match = match6,
87 .matchsize = sizeof(struct xt_dscp_info),
88 .me = THIS_MODULE,
89 },
90};
91
92static int __init xt_dscp_match_init(void)
93{
94 return xt_register_matches(xt_dscp_match, ARRAY_SIZE(xt_dscp_match));
95}
96
97static void __exit xt_dscp_match_fini(void)
98{
99 xt_unregister_matches(xt_dscp_match, ARRAY_SIZE(xt_dscp_match));
100}
101
102module_init(xt_dscp_match_init);
103module_exit(xt_dscp_match_fini);
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 9dad6281e0c1..7c95f149d942 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -79,7 +79,6 @@ checkentry(const char *tablename,
79 const void *ip_void, 79 const void *ip_void,
80 const struct xt_match *match, 80 const struct xt_match *match,
81 void *matchinfo, 81 void *matchinfo,
82 unsigned int matchinfosize,
83 unsigned int hook_mask) 82 unsigned int hook_mask)
84{ 83{
85 const struct xt_esp *espinfo = matchinfo; 84 const struct xt_esp *espinfo = matchinfo;
@@ -92,44 +91,35 @@ checkentry(const char *tablename,
92 return 1; 91 return 1;
93} 92}
94 93
95static struct xt_match esp_match = { 94static struct xt_match xt_esp_match[] = {
96 .name = "esp", 95 {
97 .family = AF_INET, 96 .name = "esp",
98 .proto = IPPROTO_ESP, 97 .family = AF_INET,
99 .match = &match, 98 .checkentry = checkentry,
100 .matchsize = sizeof(struct xt_esp), 99 .match = match,
101 .checkentry = &checkentry, 100 .matchsize = sizeof(struct xt_esp),
102 .me = THIS_MODULE, 101 .proto = IPPROTO_ESP,
103}; 102 .me = THIS_MODULE,
104 103 },
105static struct xt_match esp6_match = { 104 {
106 .name = "esp", 105 .name = "esp",
107 .family = AF_INET6, 106 .family = AF_INET6,
108 .proto = IPPROTO_ESP, 107 .checkentry = checkentry,
109 .match = &match, 108 .match = match,
110 .matchsize = sizeof(struct xt_esp), 109 .matchsize = sizeof(struct xt_esp),
111 .checkentry = &checkentry, 110 .proto = IPPROTO_ESP,
112 .me = THIS_MODULE, 111 .me = THIS_MODULE,
112 },
113}; 113};
114 114
115static int __init xt_esp_init(void) 115static int __init xt_esp_init(void)
116{ 116{
117 int ret; 117 return xt_register_matches(xt_esp_match, ARRAY_SIZE(xt_esp_match));
118 ret = xt_register_match(&esp_match);
119 if (ret)
120 return ret;
121
122 ret = xt_register_match(&esp6_match);
123 if (ret)
124 xt_unregister_match(&esp_match);
125
126 return ret;
127} 118}
128 119
129static void __exit xt_esp_cleanup(void) 120static void __exit xt_esp_cleanup(void)
130{ 121{
131 xt_unregister_match(&esp_match); 122 xt_unregister_matches(xt_esp_match, ARRAY_SIZE(xt_esp_match));
132 xt_unregister_match(&esp6_match);
133} 123}
134 124
135module_init(xt_esp_init); 125module_init(xt_esp_init);
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 799c2a43e3b9..5d7818b73e3a 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -139,7 +139,6 @@ static int check(const char *tablename,
139 const void *inf, 139 const void *inf,
140 const struct xt_match *match, 140 const struct xt_match *match,
141 void *matchinfo, 141 void *matchinfo,
142 unsigned int matchsize,
143 unsigned int hook_mask) 142 unsigned int hook_mask)
144{ 143{
145 struct xt_helper_info *info = matchinfo; 144 struct xt_helper_info *info = matchinfo;
@@ -156,52 +155,44 @@ static int check(const char *tablename,
156} 155}
157 156
158static void 157static void
159destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) 158destroy(const struct xt_match *match, void *matchinfo)
160{ 159{
161#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 160#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
162 nf_ct_l3proto_module_put(match->family); 161 nf_ct_l3proto_module_put(match->family);
163#endif 162#endif
164} 163}
165 164
166static struct xt_match helper_match = { 165static struct xt_match xt_helper_match[] = {
167 .name = "helper", 166 {
168 .match = match, 167 .name = "helper",
169 .matchsize = sizeof(struct xt_helper_info), 168 .family = AF_INET,
170 .checkentry = check, 169 .checkentry = check,
171 .destroy = destroy, 170 .match = match,
172 .family = AF_INET, 171 .destroy = destroy,
173 .me = THIS_MODULE, 172 .matchsize = sizeof(struct xt_helper_info),
174}; 173 .me = THIS_MODULE,
175static struct xt_match helper6_match = { 174 },
176 .name = "helper", 175 {
177 .match = match, 176 .name = "helper",
178 .matchsize = sizeof(struct xt_helper_info), 177 .family = AF_INET6,
179 .checkentry = check, 178 .checkentry = check,
180 .destroy = destroy, 179 .match = match,
181 .family = AF_INET6, 180 .destroy = destroy,
182 .me = THIS_MODULE, 181 .matchsize = sizeof(struct xt_helper_info),
182 .me = THIS_MODULE,
183 },
183}; 184};
184 185
185static int __init xt_helper_init(void) 186static int __init xt_helper_init(void)
186{ 187{
187 int ret;
188 need_conntrack(); 188 need_conntrack();
189 189 return xt_register_matches(xt_helper_match,
190 ret = xt_register_match(&helper_match); 190 ARRAY_SIZE(xt_helper_match));
191 if (ret < 0)
192 return ret;
193
194 ret = xt_register_match(&helper6_match);
195 if (ret < 0)
196 xt_unregister_match(&helper_match);
197
198 return ret;
199} 191}
200 192
201static void __exit xt_helper_fini(void) 193static void __exit xt_helper_fini(void)
202{ 194{
203 xt_unregister_match(&helper_match); 195 xt_unregister_matches(xt_helper_match, ARRAY_SIZE(xt_helper_match));
204 xt_unregister_match(&helper6_match);
205} 196}
206 197
207module_init(xt_helper_init); 198module_init(xt_helper_init);
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index 109132c9a146..67fd30d9f303 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -52,39 +52,32 @@ match6(const struct sk_buff *skb,
52 return (pktlen >= info->min && pktlen <= info->max) ^ info->invert; 52 return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
53} 53}
54 54
55static struct xt_match length_match = { 55static struct xt_match xt_length_match[] = {
56 .name = "length", 56 {
57 .match = match, 57 .name = "length",
58 .matchsize = sizeof(struct xt_length_info), 58 .family = AF_INET,
59 .family = AF_INET, 59 .match = match,
60 .me = THIS_MODULE, 60 .matchsize = sizeof(struct xt_length_info),
61}; 61 .me = THIS_MODULE,
62 62 },
63static struct xt_match length6_match = { 63 {
64 .name = "length", 64 .name = "length",
65 .match = match6, 65 .family = AF_INET6,
66 .matchsize = sizeof(struct xt_length_info), 66 .match = match6,
67 .family = AF_INET6, 67 .matchsize = sizeof(struct xt_length_info),
68 .me = THIS_MODULE, 68 .me = THIS_MODULE,
69 },
69}; 70};
70 71
71static int __init xt_length_init(void) 72static int __init xt_length_init(void)
72{ 73{
73 int ret; 74 return xt_register_matches(xt_length_match,
74 ret = xt_register_match(&length_match); 75 ARRAY_SIZE(xt_length_match));
75 if (ret)
76 return ret;
77 ret = xt_register_match(&length6_match);
78 if (ret)
79 xt_unregister_match(&length_match);
80
81 return ret;
82} 76}
83 77
84static void __exit xt_length_fini(void) 78static void __exit xt_length_fini(void)
85{ 79{
86 xt_unregister_match(&length_match); 80 xt_unregister_matches(xt_length_match, ARRAY_SIZE(xt_length_match));
87 xt_unregister_match(&length6_match);
88} 81}
89 82
90module_init(xt_length_init); 83module_init(xt_length_init);
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index ce7fdb7e4e07..fda7b7dec27d 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -110,7 +110,6 @@ ipt_limit_checkentry(const char *tablename,
110 const void *inf, 110 const void *inf,
111 const struct xt_match *match, 111 const struct xt_match *match,
112 void *matchinfo, 112 void *matchinfo,
113 unsigned int matchsize,
114 unsigned int hook_mask) 113 unsigned int hook_mask)
115{ 114{
116 struct xt_rateinfo *r = matchinfo; 115 struct xt_rateinfo *r = matchinfo;
@@ -123,55 +122,95 @@ ipt_limit_checkentry(const char *tablename,
123 return 0; 122 return 0;
124 } 123 }
125 124
126 /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies *
127 128. */
128 r->prev = jiffies;
129 r->credit = user2credits(r->avg * r->burst); /* Credits full. */
130 r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
131 r->cost = user2credits(r->avg);
132
133 /* For SMP, we only want to use one set of counters. */ 125 /* For SMP, we only want to use one set of counters. */
134 r->master = r; 126 r->master = r;
135 127 if (r->cost == 0) {
128 /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies *
129 128. */
130 r->prev = jiffies;
131 r->credit = user2credits(r->avg * r->burst); /* Credits full. */
132 r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
133 r->cost = user2credits(r->avg);
134 }
136 return 1; 135 return 1;
137} 136}
138 137
139static struct xt_match ipt_limit_reg = { 138#ifdef CONFIG_COMPAT
140 .name = "limit", 139struct compat_xt_rateinfo {
141 .match = ipt_limit_match, 140 u_int32_t avg;
142 .matchsize = sizeof(struct xt_rateinfo), 141 u_int32_t burst;
143 .checkentry = ipt_limit_checkentry, 142
144 .family = AF_INET, 143 compat_ulong_t prev;
145 .me = THIS_MODULE, 144 u_int32_t credit;
145 u_int32_t credit_cap, cost;
146
147 u_int32_t master;
146}; 148};
147static struct xt_match limit6_reg = { 149
148 .name = "limit", 150/* To keep the full "prev" timestamp, the upper 32 bits are stored in the
149 .match = ipt_limit_match, 151 * master pointer, which does not need to be preserved. */
150 .matchsize = sizeof(struct xt_rateinfo), 152static void compat_from_user(void *dst, void *src)
151 .checkentry = ipt_limit_checkentry, 153{
152 .family = AF_INET6, 154 struct compat_xt_rateinfo *cm = src;
153 .me = THIS_MODULE, 155 struct xt_rateinfo m = {
156 .avg = cm->avg,
157 .burst = cm->burst,
158 .prev = cm->prev | (unsigned long)cm->master << 32,
159 .credit = cm->credit,
160 .credit_cap = cm->credit_cap,
161 .cost = cm->cost,
162 };
163 memcpy(dst, &m, sizeof(m));
164}
165
166static int compat_to_user(void __user *dst, void *src)
167{
168 struct xt_rateinfo *m = src;
169 struct compat_xt_rateinfo cm = {
170 .avg = m->avg,
171 .burst = m->burst,
172 .prev = m->prev,
173 .credit = m->credit,
174 .credit_cap = m->credit_cap,
175 .cost = m->cost,
176 .master = m->prev >> 32,
177 };
178 return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
179}
180#endif /* CONFIG_COMPAT */
181
182static struct xt_match xt_limit_match[] = {
183 {
184 .name = "limit",
185 .family = AF_INET,
186 .checkentry = ipt_limit_checkentry,
187 .match = ipt_limit_match,
188 .matchsize = sizeof(struct xt_rateinfo),
189#ifdef CONFIG_COMPAT
190 .compatsize = sizeof(struct compat_xt_rateinfo),
191 .compat_from_user = compat_from_user,
192 .compat_to_user = compat_to_user,
193#endif
194 .me = THIS_MODULE,
195 },
196 {
197 .name = "limit",
198 .family = AF_INET6,
199 .checkentry = ipt_limit_checkentry,
200 .match = ipt_limit_match,
201 .matchsize = sizeof(struct xt_rateinfo),
202 .me = THIS_MODULE,
203 },
154}; 204};
155 205
156static int __init xt_limit_init(void) 206static int __init xt_limit_init(void)
157{ 207{
158 int ret; 208 return xt_register_matches(xt_limit_match, ARRAY_SIZE(xt_limit_match));
159
160 ret = xt_register_match(&ipt_limit_reg);
161 if (ret)
162 return ret;
163
164 ret = xt_register_match(&limit6_reg);
165 if (ret)
166 xt_unregister_match(&ipt_limit_reg);
167
168 return ret;
169} 209}
170 210
171static void __exit xt_limit_fini(void) 211static void __exit xt_limit_fini(void)
172{ 212{
173 xt_unregister_match(&ipt_limit_reg); 213 xt_unregister_matches(xt_limit_match, ARRAY_SIZE(xt_limit_match));
174 xt_unregister_match(&limit6_reg);
175} 214}
176 215
177module_init(xt_limit_init); 216module_init(xt_limit_init);
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index 356290ffe386..425fc21e31f5 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -43,43 +43,37 @@ match(const struct sk_buff *skb,
43 ^ info->invert)); 43 ^ info->invert));
44} 44}
45 45
46static struct xt_match mac_match = { 46static struct xt_match xt_mac_match[] = {
47 .name = "mac", 47 {
48 .match = match, 48 .name = "mac",
49 .matchsize = sizeof(struct xt_mac_info), 49 .family = AF_INET,
50 .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) | 50 .match = match,
51 (1 << NF_IP_FORWARD), 51 .matchsize = sizeof(struct xt_mac_info),
52 .family = AF_INET, 52 .hooks = (1 << NF_IP_PRE_ROUTING) |
53 .me = THIS_MODULE, 53 (1 << NF_IP_LOCAL_IN) |
54}; 54 (1 << NF_IP_FORWARD),
55static struct xt_match mac6_match = { 55 .me = THIS_MODULE,
56 .name = "mac", 56 },
57 .match = match, 57 {
58 .matchsize = sizeof(struct xt_mac_info), 58 .name = "mac",
59 .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) | 59 .family = AF_INET6,
60 (1 << NF_IP_FORWARD), 60 .match = match,
61 .family = AF_INET6, 61 .matchsize = sizeof(struct xt_mac_info),
62 .me = THIS_MODULE, 62 .hooks = (1 << NF_IP_PRE_ROUTING) |
63 (1 << NF_IP_LOCAL_IN) |
64 (1 << NF_IP_FORWARD),
65 .me = THIS_MODULE,
66 },
63}; 67};
64 68
65static int __init xt_mac_init(void) 69static int __init xt_mac_init(void)
66{ 70{
67 int ret; 71 return xt_register_matches(xt_mac_match, ARRAY_SIZE(xt_mac_match));
68 ret = xt_register_match(&mac_match);
69 if (ret)
70 return ret;
71
72 ret = xt_register_match(&mac6_match);
73 if (ret)
74 xt_unregister_match(&mac_match);
75
76 return ret;
77} 72}
78 73
79static void __exit xt_mac_fini(void) 74static void __exit xt_mac_fini(void)
80{ 75{
81 xt_unregister_match(&mac_match); 76 xt_unregister_matches(xt_mac_match, ARRAY_SIZE(xt_mac_match));
82 xt_unregister_match(&mac6_match);
83} 77}
84 78
85module_init(xt_mac_init); 79module_init(xt_mac_init);
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 876bc5797738..934dddfbcd23 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -39,7 +39,6 @@ checkentry(const char *tablename,
39 const void *entry, 39 const void *entry,
40 const struct xt_match *match, 40 const struct xt_match *match,
41 void *matchinfo, 41 void *matchinfo,
42 unsigned int matchsize,
43 unsigned int hook_mask) 42 unsigned int hook_mask)
44{ 43{
45 const struct xt_mark_info *minfo = matchinfo; 44 const struct xt_mark_info *minfo = matchinfo;
@@ -51,42 +50,69 @@ checkentry(const char *tablename,
51 return 1; 50 return 1;
52} 51}
53 52
54static struct xt_match mark_match = { 53#ifdef CONFIG_COMPAT
55 .name = "mark", 54struct compat_xt_mark_info {
56 .match = match, 55 compat_ulong_t mark, mask;
57 .matchsize = sizeof(struct xt_mark_info), 56 u_int8_t invert;
58 .checkentry = checkentry, 57 u_int8_t __pad1;
59 .family = AF_INET, 58 u_int16_t __pad2;
60 .me = THIS_MODULE,
61}; 59};
62 60
63static struct xt_match mark6_match = { 61static void compat_from_user(void *dst, void *src)
64 .name = "mark", 62{
65 .match = match, 63 struct compat_xt_mark_info *cm = src;
66 .matchsize = sizeof(struct xt_mark_info), 64 struct xt_mark_info m = {
67 .checkentry = checkentry, 65 .mark = cm->mark,
68 .family = AF_INET6, 66 .mask = cm->mask,
69 .me = THIS_MODULE, 67 .invert = cm->invert,
70}; 68 };
69 memcpy(dst, &m, sizeof(m));
70}
71 71
72static int __init xt_mark_init(void) 72static int compat_to_user(void __user *dst, void *src)
73{ 73{
74 int ret; 74 struct xt_mark_info *m = src;
75 ret = xt_register_match(&mark_match); 75 struct compat_xt_mark_info cm = {
76 if (ret) 76 .mark = m->mark,
77 return ret; 77 .mask = m->mask,
78 .invert = m->invert,
79 };
80 return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
81}
82#endif /* CONFIG_COMPAT */
78 83
79 ret = xt_register_match(&mark6_match); 84static struct xt_match xt_mark_match[] = {
80 if (ret) 85 {
81 xt_unregister_match(&mark_match); 86 .name = "mark",
87 .family = AF_INET,
88 .checkentry = checkentry,
89 .match = match,
90 .matchsize = sizeof(struct xt_mark_info),
91#ifdef CONFIG_COMPAT
92 .compatsize = sizeof(struct compat_xt_mark_info),
93 .compat_from_user = compat_from_user,
94 .compat_to_user = compat_to_user,
95#endif
96 .me = THIS_MODULE,
97 },
98 {
99 .name = "mark",
100 .family = AF_INET6,
101 .checkentry = checkentry,
102 .match = match,
103 .matchsize = sizeof(struct xt_mark_info),
104 .me = THIS_MODULE,
105 },
106};
82 107
83 return ret; 108static int __init xt_mark_init(void)
109{
110 return xt_register_matches(xt_mark_match, ARRAY_SIZE(xt_mark_match));
84} 111}
85 112
86static void __exit xt_mark_fini(void) 113static void __exit xt_mark_fini(void)
87{ 114{
88 xt_unregister_match(&mark_match); 115 xt_unregister_matches(xt_mark_match, ARRAY_SIZE(xt_mark_match));
89 xt_unregister_match(&mark6_match);
90} 116}
91 117
92module_init(xt_mark_init); 118module_init(xt_mark_init);
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index 1ff0a25396e7..d3aefd380930 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -176,7 +176,6 @@ checkentry(const char *tablename,
176 const void *info, 176 const void *info,
177 const struct xt_match *match, 177 const struct xt_match *match,
178 void *matchinfo, 178 void *matchinfo,
179 unsigned int matchsize,
180 unsigned int hook_mask) 179 unsigned int hook_mask)
181{ 180{
182 const struct ipt_ip *ip = info; 181 const struct ipt_ip *ip = info;
@@ -191,7 +190,6 @@ checkentry_v1(const char *tablename,
191 const void *info, 190 const void *info,
192 const struct xt_match *match, 191 const struct xt_match *match,
193 void *matchinfo, 192 void *matchinfo,
194 unsigned int matchsize,
195 unsigned int hook_mask) 193 unsigned int hook_mask)
196{ 194{
197 const struct ipt_ip *ip = info; 195 const struct ipt_ip *ip = info;
@@ -206,7 +204,6 @@ checkentry6(const char *tablename,
206 const void *info, 204 const void *info,
207 const struct xt_match *match, 205 const struct xt_match *match,
208 void *matchinfo, 206 void *matchinfo,
209 unsigned int matchsize,
210 unsigned int hook_mask) 207 unsigned int hook_mask)
211{ 208{
212 const struct ip6t_ip6 *ip = info; 209 const struct ip6t_ip6 *ip = info;
@@ -221,7 +218,6 @@ checkentry6_v1(const char *tablename,
221 const void *info, 218 const void *info,
222 const struct xt_match *match, 219 const struct xt_match *match,
223 void *matchinfo, 220 void *matchinfo,
224 unsigned int matchsize,
225 unsigned int hook_mask) 221 unsigned int hook_mask)
226{ 222{
227 const struct ip6t_ip6 *ip = info; 223 const struct ip6t_ip6 *ip = info;
@@ -231,84 +227,55 @@ checkentry6_v1(const char *tablename,
231 multiinfo->count); 227 multiinfo->count);
232} 228}
233 229
234static struct xt_match multiport_match = { 230static struct xt_match xt_multiport_match[] = {
235 .name = "multiport", 231 {
236 .revision = 0, 232 .name = "multiport",
237 .matchsize = sizeof(struct xt_multiport), 233 .family = AF_INET,
238 .match = &match, 234 .revision = 0,
239 .checkentry = &checkentry, 235 .checkentry = checkentry,
240 .family = AF_INET, 236 .match = match,
241 .me = THIS_MODULE, 237 .matchsize = sizeof(struct xt_multiport),
242}; 238 .me = THIS_MODULE,
243 239 },
244static struct xt_match multiport_match_v1 = { 240 {
245 .name = "multiport", 241 .name = "multiport",
246 .revision = 1, 242 .family = AF_INET,
247 .matchsize = sizeof(struct xt_multiport_v1), 243 .revision = 1,
248 .match = &match_v1, 244 .checkentry = checkentry_v1,
249 .checkentry = &checkentry_v1, 245 .match = match_v1,
250 .family = AF_INET, 246 .matchsize = sizeof(struct xt_multiport_v1),
251 .me = THIS_MODULE, 247 .me = THIS_MODULE,
252}; 248 },
253 249 {
254static struct xt_match multiport6_match = { 250 .name = "multiport",
255 .name = "multiport", 251 .family = AF_INET6,
256 .revision = 0, 252 .revision = 0,
257 .matchsize = sizeof(struct xt_multiport), 253 .checkentry = checkentry6,
258 .match = &match, 254 .match = match,
259 .checkentry = &checkentry6, 255 .matchsize = sizeof(struct xt_multiport),
260 .family = AF_INET6, 256 .me = THIS_MODULE,
261 .me = THIS_MODULE, 257 },
262}; 258 {
263 259 .name = "multiport",
264static struct xt_match multiport6_match_v1 = { 260 .family = AF_INET6,
265 .name = "multiport", 261 .revision = 1,
266 .revision = 1, 262 .checkentry = checkentry6_v1,
267 .matchsize = sizeof(struct xt_multiport_v1), 263 .match = match_v1,
268 .match = &match_v1, 264 .matchsize = sizeof(struct xt_multiport_v1),
269 .checkentry = &checkentry6_v1, 265 .me = THIS_MODULE,
270 .family = AF_INET6, 266 },
271 .me = THIS_MODULE,
272}; 267};
273 268
274static int __init xt_multiport_init(void) 269static int __init xt_multiport_init(void)
275{ 270{
276 int ret; 271 return xt_register_matches(xt_multiport_match,
277 272 ARRAY_SIZE(xt_multiport_match));
278 ret = xt_register_match(&multiport_match);
279 if (ret)
280 goto out;
281
282 ret = xt_register_match(&multiport_match_v1);
283 if (ret)
284 goto out_unreg_multi_v0;
285
286 ret = xt_register_match(&multiport6_match);
287 if (ret)
288 goto out_unreg_multi_v1;
289
290 ret = xt_register_match(&multiport6_match_v1);
291 if (ret)
292 goto out_unreg_multi6_v0;
293
294 return ret;
295
296out_unreg_multi6_v0:
297 xt_unregister_match(&multiport6_match);
298out_unreg_multi_v1:
299 xt_unregister_match(&multiport_match_v1);
300out_unreg_multi_v0:
301 xt_unregister_match(&multiport_match);
302out:
303 return ret;
304} 273}
305 274
306static void __exit xt_multiport_fini(void) 275static void __exit xt_multiport_fini(void)
307{ 276{
308 xt_unregister_match(&multiport_match); 277 xt_unregister_matches(xt_multiport_match,
309 xt_unregister_match(&multiport_match_v1); 278 ARRAY_SIZE(xt_multiport_match));
310 xt_unregister_match(&multiport6_match);
311 xt_unregister_match(&multiport6_match_v1);
312} 279}
313 280
314module_init(xt_multiport_init); 281module_init(xt_multiport_init);
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 63a965467465..fd8f954cded5 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -106,7 +106,6 @@ checkentry(const char *tablename,
106 const void *ip, 106 const void *ip,
107 const struct xt_match *match, 107 const struct xt_match *match,
108 void *matchinfo, 108 void *matchinfo,
109 unsigned int matchsize,
110 unsigned int hook_mask) 109 unsigned int hook_mask)
111{ 110{
112 const struct xt_physdev_info *info = matchinfo; 111 const struct xt_physdev_info *info = matchinfo;
@@ -132,43 +131,34 @@ checkentry(const char *tablename,
132 return 1; 131 return 1;
133} 132}
134 133
135static struct xt_match physdev_match = { 134static struct xt_match xt_physdev_match[] = {
136 .name = "physdev", 135 {
137 .match = match, 136 .name = "physdev",
138 .matchsize = sizeof(struct xt_physdev_info), 137 .family = AF_INET,
139 .checkentry = checkentry, 138 .checkentry = checkentry,
140 .family = AF_INET, 139 .match = match,
141 .me = THIS_MODULE, 140 .matchsize = sizeof(struct xt_physdev_info),
142}; 141 .me = THIS_MODULE,
143 142 },
144static struct xt_match physdev6_match = { 143 {
145 .name = "physdev", 144 .name = "physdev",
146 .match = match, 145 .family = AF_INET6,
147 .matchsize = sizeof(struct xt_physdev_info), 146 .checkentry = checkentry,
148 .checkentry = checkentry, 147 .match = match,
149 .family = AF_INET6, 148 .matchsize = sizeof(struct xt_physdev_info),
150 .me = THIS_MODULE, 149 .me = THIS_MODULE,
150 },
151}; 151};
152 152
153static int __init xt_physdev_init(void) 153static int __init xt_physdev_init(void)
154{ 154{
155 int ret; 155 return xt_register_matches(xt_physdev_match,
156 156 ARRAY_SIZE(xt_physdev_match));
157 ret = xt_register_match(&physdev_match);
158 if (ret < 0)
159 return ret;
160
161 ret = xt_register_match(&physdev6_match);
162 if (ret < 0)
163 xt_unregister_match(&physdev_match);
164
165 return ret;
166} 157}
167 158
168static void __exit xt_physdev_fini(void) 159static void __exit xt_physdev_fini(void)
169{ 160{
170 xt_unregister_match(&physdev_match); 161 xt_unregister_matches(xt_physdev_match, ARRAY_SIZE(xt_physdev_match));
171 xt_unregister_match(&physdev6_match);
172} 162}
173 163
174module_init(xt_physdev_init); 164module_init(xt_physdev_init);
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index d2f5320a80bf..16e7b0804287 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -43,40 +43,32 @@ static int match(const struct sk_buff *skb,
43 return (type == info->pkttype) ^ info->invert; 43 return (type == info->pkttype) ^ info->invert;
44} 44}
45 45
46static struct xt_match pkttype_match = { 46static struct xt_match xt_pkttype_match[] = {
47 .name = "pkttype", 47 {
48 .match = match, 48 .name = "pkttype",
49 .matchsize = sizeof(struct xt_pkttype_info), 49 .family = AF_INET,
50 .family = AF_INET, 50 .match = match,
51 .me = THIS_MODULE, 51 .matchsize = sizeof(struct xt_pkttype_info),
52}; 52 .me = THIS_MODULE,
53 53 },
54static struct xt_match pkttype6_match = { 54 {
55 .name = "pkttype", 55 .name = "pkttype",
56 .match = match, 56 .family = AF_INET6,
57 .matchsize = sizeof(struct xt_pkttype_info), 57 .match = match,
58 .family = AF_INET6, 58 .matchsize = sizeof(struct xt_pkttype_info),
59 .me = THIS_MODULE, 59 .me = THIS_MODULE,
60 },
60}; 61};
61 62
62static int __init xt_pkttype_init(void) 63static int __init xt_pkttype_init(void)
63{ 64{
64 int ret; 65 return xt_register_matches(xt_pkttype_match,
65 ret = xt_register_match(&pkttype_match); 66 ARRAY_SIZE(xt_pkttype_match));
66 if (ret)
67 return ret;
68
69 ret = xt_register_match(&pkttype6_match);
70 if (ret)
71 xt_unregister_match(&pkttype_match);
72
73 return ret;
74} 67}
75 68
76static void __exit xt_pkttype_fini(void) 69static void __exit xt_pkttype_fini(void)
77{ 70{
78 xt_unregister_match(&pkttype_match); 71 xt_unregister_matches(xt_pkttype_match, ARRAY_SIZE(xt_pkttype_match));
79 xt_unregister_match(&pkttype6_match);
80} 72}
81 73
82module_init(xt_pkttype_init); 74module_init(xt_pkttype_init);
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index ba1ca03abad3..46bde2b1e1e0 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -135,8 +135,7 @@ static int match(const struct sk_buff *skb,
135 135
136static int checkentry(const char *tablename, const void *ip_void, 136static int checkentry(const char *tablename, const void *ip_void,
137 const struct xt_match *match, 137 const struct xt_match *match,
138 void *matchinfo, unsigned int matchsize, 138 void *matchinfo, unsigned int hook_mask)
139 unsigned int hook_mask)
140{ 139{
141 struct xt_policy_info *info = matchinfo; 140 struct xt_policy_info *info = matchinfo;
142 141
@@ -165,43 +164,34 @@ static int checkentry(const char *tablename, const void *ip_void,
165 return 1; 164 return 1;
166} 165}
167 166
168static struct xt_match policy_match = { 167static struct xt_match xt_policy_match[] = {
169 .name = "policy", 168 {
170 .family = AF_INET, 169 .name = "policy",
171 .match = match, 170 .family = AF_INET,
172 .matchsize = sizeof(struct xt_policy_info), 171 .checkentry = checkentry,
173 .checkentry = checkentry, 172 .match = match,
174 .family = AF_INET, 173 .matchsize = sizeof(struct xt_policy_info),
175 .me = THIS_MODULE, 174 .me = THIS_MODULE,
176}; 175 },
177 176 {
178static struct xt_match policy6_match = { 177 .name = "policy",
179 .name = "policy", 178 .family = AF_INET6,
180 .family = AF_INET6, 179 .checkentry = checkentry,
181 .match = match, 180 .match = match,
182 .matchsize = sizeof(struct xt_policy_info), 181 .matchsize = sizeof(struct xt_policy_info),
183 .checkentry = checkentry, 182 .me = THIS_MODULE,
184 .family = AF_INET6, 183 },
185 .me = THIS_MODULE,
186}; 184};
187 185
188static int __init init(void) 186static int __init init(void)
189{ 187{
190 int ret; 188 return xt_register_matches(xt_policy_match,
191 189 ARRAY_SIZE(xt_policy_match));
192 ret = xt_register_match(&policy_match);
193 if (ret)
194 return ret;
195 ret = xt_register_match(&policy6_match);
196 if (ret)
197 xt_unregister_match(&policy_match);
198 return ret;
199} 190}
200 191
201static void __exit fini(void) 192static void __exit fini(void)
202{ 193{
203 xt_unregister_match(&policy6_match); 194 xt_unregister_matches(xt_policy_match, ARRAY_SIZE(xt_policy_match));
204 xt_unregister_match(&policy_match);
205} 195}
206 196
207module_init(init); 197module_init(init);
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index be8d3c26b568..b75fa2c70e66 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -41,7 +41,7 @@ match(const struct sk_buff *skb,
41static int 41static int
42checkentry(const char *tablename, const void *entry, 42checkentry(const char *tablename, const void *entry,
43 const struct xt_match *match, void *matchinfo, 43 const struct xt_match *match, void *matchinfo,
44 unsigned int matchsize, unsigned int hook_mask) 44 unsigned int hook_mask)
45{ 45{
46 struct xt_quota_info *q = (struct xt_quota_info *)matchinfo; 46 struct xt_quota_info *q = (struct xt_quota_info *)matchinfo;
47 47
@@ -52,46 +52,33 @@ checkentry(const char *tablename, const void *entry,
52 return 1; 52 return 1;
53} 53}
54 54
55static struct xt_match quota_match = { 55static struct xt_match xt_quota_match[] = {
56 .name = "quota", 56 {
57 .family = AF_INET, 57 .name = "quota",
58 .match = match, 58 .family = AF_INET,
59 .matchsize = sizeof(struct xt_quota_info), 59 .checkentry = checkentry,
60 .checkentry = checkentry, 60 .match = match,
61 .me = THIS_MODULE 61 .matchsize = sizeof(struct xt_quota_info),
62}; 62 .me = THIS_MODULE
63 63 },
64static struct xt_match quota_match6 = { 64 {
65 .name = "quota", 65 .name = "quota",
66 .family = AF_INET6, 66 .family = AF_INET6,
67 .match = match, 67 .checkentry = checkentry,
68 .matchsize = sizeof(struct xt_quota_info), 68 .match = match,
69 .checkentry = checkentry, 69 .matchsize = sizeof(struct xt_quota_info),
70 .me = THIS_MODULE 70 .me = THIS_MODULE
71 },
71}; 72};
72 73
73static int __init xt_quota_init(void) 74static int __init xt_quota_init(void)
74{ 75{
75 int ret; 76 return xt_register_matches(xt_quota_match, ARRAY_SIZE(xt_quota_match));
76
77 ret = xt_register_match(&quota_match);
78 if (ret)
79 goto err1;
80 ret = xt_register_match(&quota_match6);
81 if (ret)
82 goto err2;
83 return ret;
84
85err2:
86 xt_unregister_match(&quota_match);
87err1:
88 return ret;
89} 77}
90 78
91static void __exit xt_quota_fini(void) 79static void __exit xt_quota_fini(void)
92{ 80{
93 xt_unregister_match(&quota_match6); 81 xt_unregister_matches(xt_quota_match, ARRAY_SIZE(xt_quota_match));
94 xt_unregister_match(&quota_match);
95} 82}
96 83
97module_init(xt_quota_init); 84module_init(xt_quota_init);
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 843383e01d41..7956acaaa24b 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -163,7 +163,6 @@ checkentry(const char *tablename,
163 const void *inf, 163 const void *inf,
164 const struct xt_match *match, 164 const struct xt_match *match,
165 void *matchinfo, 165 void *matchinfo,
166 unsigned int matchsize,
167 unsigned int hook_mask) 166 unsigned int hook_mask)
168{ 167{
169 const struct xt_sctp_info *info = matchinfo; 168 const struct xt_sctp_info *info = matchinfo;
@@ -178,44 +177,35 @@ checkentry(const char *tablename,
178 | SCTP_CHUNK_MATCH_ONLY))); 177 | SCTP_CHUNK_MATCH_ONLY)));
179} 178}
180 179
181static struct xt_match sctp_match = { 180static struct xt_match xt_sctp_match[] = {
182 .name = "sctp", 181 {
183 .match = match, 182 .name = "sctp",
184 .matchsize = sizeof(struct xt_sctp_info), 183 .family = AF_INET,
185 .proto = IPPROTO_SCTP, 184 .checkentry = checkentry,
186 .checkentry = checkentry, 185 .match = match,
187 .family = AF_INET, 186 .matchsize = sizeof(struct xt_sctp_info),
188 .me = THIS_MODULE 187 .proto = IPPROTO_SCTP,
189}; 188 .me = THIS_MODULE
190 189 },
191static struct xt_match sctp6_match = { 190 {
192 .name = "sctp", 191 .name = "sctp",
193 .match = match, 192 .family = AF_INET6,
194 .matchsize = sizeof(struct xt_sctp_info), 193 .checkentry = checkentry,
195 .proto = IPPROTO_SCTP, 194 .match = match,
196 .checkentry = checkentry, 195 .matchsize = sizeof(struct xt_sctp_info),
197 .family = AF_INET6, 196 .proto = IPPROTO_SCTP,
198 .me = THIS_MODULE 197 .me = THIS_MODULE
198 },
199}; 199};
200 200
201static int __init xt_sctp_init(void) 201static int __init xt_sctp_init(void)
202{ 202{
203 int ret; 203 return xt_register_matches(xt_sctp_match, ARRAY_SIZE(xt_sctp_match));
204 ret = xt_register_match(&sctp_match);
205 if (ret)
206 return ret;
207
208 ret = xt_register_match(&sctp6_match);
209 if (ret)
210 xt_unregister_match(&sctp_match);
211
212 return ret;
213} 204}
214 205
215static void __exit xt_sctp_fini(void) 206static void __exit xt_sctp_fini(void)
216{ 207{
217 xt_unregister_match(&sctp6_match); 208 xt_unregister_matches(xt_sctp_match, ARRAY_SIZE(xt_sctp_match));
218 xt_unregister_match(&sctp_match);
219} 209}
220 210
221module_init(xt_sctp_init); 211module_init(xt_sctp_init);
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index f9e304dc4504..d9010b16a1f9 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -48,7 +48,6 @@ static int check(const char *tablename,
48 const void *inf, 48 const void *inf,
49 const struct xt_match *match, 49 const struct xt_match *match,
50 void *matchinfo, 50 void *matchinfo,
51 unsigned int matchsize,
52 unsigned int hook_mask) 51 unsigned int hook_mask)
53{ 52{
54#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 53#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
@@ -62,54 +61,43 @@ static int check(const char *tablename,
62} 61}
63 62
64static void 63static void
65destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) 64destroy(const struct xt_match *match, void *matchinfo)
66{ 65{
67#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 66#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
68 nf_ct_l3proto_module_put(match->family); 67 nf_ct_l3proto_module_put(match->family);
69#endif 68#endif
70} 69}
71 70
72static struct xt_match state_match = { 71static struct xt_match xt_state_match[] = {
73 .name = "state", 72 {
74 .match = match, 73 .name = "state",
75 .checkentry = check, 74 .family = AF_INET,
76 .destroy = destroy, 75 .checkentry = check,
77 .matchsize = sizeof(struct xt_state_info), 76 .match = match,
78 .family = AF_INET, 77 .destroy = destroy,
79 .me = THIS_MODULE, 78 .matchsize = sizeof(struct xt_state_info),
80}; 79 .me = THIS_MODULE,
81 80 },
82static struct xt_match state6_match = { 81 {
83 .name = "state", 82 .name = "state",
84 .match = match, 83 .family = AF_INET6,
85 .checkentry = check, 84 .checkentry = check,
86 .destroy = destroy, 85 .match = match,
87 .matchsize = sizeof(struct xt_state_info), 86 .destroy = destroy,
88 .family = AF_INET6, 87 .matchsize = sizeof(struct xt_state_info),
89 .me = THIS_MODULE, 88 .me = THIS_MODULE,
89 },
90}; 90};
91 91
92static int __init xt_state_init(void) 92static int __init xt_state_init(void)
93{ 93{
94 int ret;
95
96 need_conntrack(); 94 need_conntrack();
97 95 return xt_register_matches(xt_state_match, ARRAY_SIZE(xt_state_match));
98 ret = xt_register_match(&state_match);
99 if (ret < 0)
100 return ret;
101
102 ret = xt_register_match(&state6_match);
103 if (ret < 0)
104 xt_unregister_match(&state_match);
105
106 return ret;
107} 96}
108 97
109static void __exit xt_state_fini(void) 98static void __exit xt_state_fini(void)
110{ 99{
111 xt_unregister_match(&state_match); 100 xt_unregister_matches(xt_state_match, ARRAY_SIZE(xt_state_match));
112 xt_unregister_match(&state6_match);
113} 101}
114 102
115module_init(xt_state_init); 103module_init(xt_state_init);
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index de1037f58596..091a9f89f5d5 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -55,7 +55,7 @@ match(const struct sk_buff *skb,
55static int 55static int
56checkentry(const char *tablename, const void *entry, 56checkentry(const char *tablename, const void *entry,
57 const struct xt_match *match, void *matchinfo, 57 const struct xt_match *match, void *matchinfo,
58 unsigned int matchsize, unsigned int hook_mask) 58 unsigned int hook_mask)
59{ 59{
60 struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo; 60 struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo;
61 61
@@ -66,46 +66,35 @@ checkentry(const char *tablename, const void *entry,
66 return 1; 66 return 1;
67} 67}
68 68
69static struct xt_match statistic_match = { 69static struct xt_match xt_statistic_match[] = {
70 .name = "statistic", 70 {
71 .match = match, 71 .name = "statistic",
72 .matchsize = sizeof(struct xt_statistic_info), 72 .family = AF_INET,
73 .checkentry = checkentry, 73 .checkentry = checkentry,
74 .family = AF_INET, 74 .match = match,
75 .me = THIS_MODULE, 75 .matchsize = sizeof(struct xt_statistic_info),
76}; 76 .me = THIS_MODULE,
77 77 },
78static struct xt_match statistic_match6 = { 78 {
79 .name = "statistic", 79 .name = "statistic",
80 .match = match, 80 .family = AF_INET6,
81 .matchsize = sizeof(struct xt_statistic_info), 81 .checkentry = checkentry,
82 .checkentry = checkentry, 82 .match = match,
83 .family = AF_INET6, 83 .matchsize = sizeof(struct xt_statistic_info),
84 .me = THIS_MODULE, 84 .me = THIS_MODULE,
85 },
85}; 86};
86 87
87static int __init xt_statistic_init(void) 88static int __init xt_statistic_init(void)
88{ 89{
89 int ret; 90 return xt_register_matches(xt_statistic_match,
90 91 ARRAY_SIZE(xt_statistic_match));
91 ret = xt_register_match(&statistic_match);
92 if (ret)
93 goto err1;
94
95 ret = xt_register_match(&statistic_match6);
96 if (ret)
97 goto err2;
98 return ret;
99err2:
100 xt_unregister_match(&statistic_match);
101err1:
102 return ret;
103} 92}
104 93
105static void __exit xt_statistic_fini(void) 94static void __exit xt_statistic_fini(void)
106{ 95{
107 xt_unregister_match(&statistic_match6); 96 xt_unregister_matches(xt_statistic_match,
108 xt_unregister_match(&statistic_match); 97 ARRAY_SIZE(xt_statistic_match));
109} 98}
110 99
111module_init(xt_statistic_init); 100module_init(xt_statistic_init);
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 275330fcdaaa..4453252400aa 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -46,7 +46,6 @@ static int checkentry(const char *tablename,
46 const void *ip, 46 const void *ip,
47 const struct xt_match *match, 47 const struct xt_match *match,
48 void *matchinfo, 48 void *matchinfo,
49 unsigned int matchsize,
50 unsigned int hook_mask) 49 unsigned int hook_mask)
51{ 50{
52 struct xt_string_info *conf = matchinfo; 51 struct xt_string_info *conf = matchinfo;
@@ -69,49 +68,40 @@ static int checkentry(const char *tablename,
69 return 1; 68 return 1;
70} 69}
71 70
72static void destroy(const struct xt_match *match, void *matchinfo, 71static void destroy(const struct xt_match *match, void *matchinfo)
73 unsigned int matchsize)
74{ 72{
75 textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config); 73 textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config);
76} 74}
77 75
78static struct xt_match string_match = { 76static struct xt_match xt_string_match[] = {
79 .name = "string", 77 {
80 .match = match, 78 .name = "string",
81 .matchsize = sizeof(struct xt_string_info), 79 .family = AF_INET,
82 .checkentry = checkentry, 80 .checkentry = checkentry,
83 .destroy = destroy, 81 .match = match,
84 .family = AF_INET, 82 .destroy = destroy,
85 .me = THIS_MODULE 83 .matchsize = sizeof(struct xt_string_info),
86}; 84 .me = THIS_MODULE
87static struct xt_match string6_match = { 85 },
88 .name = "string", 86 {
89 .match = match, 87 .name = "string",
90 .matchsize = sizeof(struct xt_string_info), 88 .family = AF_INET6,
91 .checkentry = checkentry, 89 .checkentry = checkentry,
92 .destroy = destroy, 90 .match = match,
93 .family = AF_INET6, 91 .destroy = destroy,
94 .me = THIS_MODULE 92 .matchsize = sizeof(struct xt_string_info),
93 .me = THIS_MODULE
94 },
95}; 95};
96 96
97static int __init xt_string_init(void) 97static int __init xt_string_init(void)
98{ 98{
99 int ret; 99 return xt_register_matches(xt_string_match, ARRAY_SIZE(xt_string_match));
100
101 ret = xt_register_match(&string_match);
102 if (ret)
103 return ret;
104 ret = xt_register_match(&string6_match);
105 if (ret)
106 xt_unregister_match(&string_match);
107
108 return ret;
109} 100}
110 101
111static void __exit xt_string_fini(void) 102static void __exit xt_string_fini(void)
112{ 103{
113 xt_unregister_match(&string_match); 104 xt_unregister_matches(xt_string_match, ARRAY_SIZE(xt_string_match));
114 xt_unregister_match(&string6_match);
115} 105}
116 106
117module_init(xt_string_init); 107module_init(xt_string_init);
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index cf7d335cadcd..a3682fe2f192 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -18,21 +18,22 @@
18#include <linux/netfilter_ipv4/ip_tables.h> 18#include <linux/netfilter_ipv4/ip_tables.h>
19#include <linux/netfilter_ipv6/ip6_tables.h> 19#include <linux/netfilter_ipv6/ip6_tables.h>
20 20
21#define TH_SYN 0x02
22
23MODULE_LICENSE("GPL"); 21MODULE_LICENSE("GPL");
24MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); 22MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
25MODULE_DESCRIPTION("iptables TCP MSS match module"); 23MODULE_DESCRIPTION("iptables TCP MSS match module");
26MODULE_ALIAS("ipt_tcpmss"); 24MODULE_ALIAS("ipt_tcpmss");
27 25
28/* Returns 1 if the mss option is set and matched by the range, 0 otherwise */ 26static int
29static inline int 27match(const struct sk_buff *skb,
30mssoption_match(u_int16_t min, u_int16_t max, 28 const struct net_device *in,
31 const struct sk_buff *skb, 29 const struct net_device *out,
32 unsigned int protoff, 30 const struct xt_match *match,
33 int invert, 31 const void *matchinfo,
34 int *hotdrop) 32 int offset,
33 unsigned int protoff,
34 int *hotdrop)
35{ 35{
36 const struct xt_tcpmss_match_info *info = matchinfo;
36 struct tcphdr _tcph, *th; 37 struct tcphdr _tcph, *th;
37 /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ 38 /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
38 u8 _opt[15 * 4 - sizeof(_tcph)], *op; 39 u8 _opt[15 * 4 - sizeof(_tcph)], *op;
@@ -64,72 +65,50 @@ mssoption_match(u_int16_t min, u_int16_t max,
64 65
65 mssval = (op[i+2] << 8) | op[i+3]; 66 mssval = (op[i+2] << 8) | op[i+3];
66 67
67 return (mssval >= min && mssval <= max) ^ invert; 68 return (mssval >= info->mss_min &&
69 mssval <= info->mss_max) ^ info->invert;
68 } 70 }
69 if (op[i] < 2) i++; 71 if (op[i] < 2)
70 else i += op[i+1]?:1; 72 i++;
73 else
74 i += op[i+1] ? : 1;
71 } 75 }
72out: 76out:
73 return invert; 77 return info->invert;
74 78
75 dropit: 79dropit:
76 *hotdrop = 1; 80 *hotdrop = 1;
77 return 0; 81 return 0;
78} 82}
79 83
80static int 84static struct xt_match xt_tcpmss_match[] = {
81match(const struct sk_buff *skb, 85 {
82 const struct net_device *in, 86 .name = "tcpmss",
83 const struct net_device *out, 87 .family = AF_INET,
84 const struct xt_match *match, 88 .match = match,
85 const void *matchinfo, 89 .matchsize = sizeof(struct xt_tcpmss_match_info),
86 int offset, 90 .proto = IPPROTO_TCP,
87 unsigned int protoff, 91 .me = THIS_MODULE,
88 int *hotdrop) 92 },
89{ 93 {
90 const struct xt_tcpmss_match_info *info = matchinfo; 94 .name = "tcpmss",
91 95 .family = AF_INET6,
92 return mssoption_match(info->mss_min, info->mss_max, skb, protoff, 96 .match = match,
93 info->invert, hotdrop); 97 .matchsize = sizeof(struct xt_tcpmss_match_info),
94} 98 .proto = IPPROTO_TCP,
95 99 .me = THIS_MODULE,
96static struct xt_match tcpmss_match = { 100 },
97 .name = "tcpmss",
98 .match = match,
99 .matchsize = sizeof(struct xt_tcpmss_match_info),
100 .proto = IPPROTO_TCP,
101 .family = AF_INET,
102 .me = THIS_MODULE,
103};
104
105static struct xt_match tcpmss6_match = {
106 .name = "tcpmss",
107 .match = match,
108 .matchsize = sizeof(struct xt_tcpmss_match_info),
109 .proto = IPPROTO_TCP,
110 .family = AF_INET6,
111 .me = THIS_MODULE,
112}; 101};
113 102
114
115static int __init xt_tcpmss_init(void) 103static int __init xt_tcpmss_init(void)
116{ 104{
117 int ret; 105 return xt_register_matches(xt_tcpmss_match,
118 ret = xt_register_match(&tcpmss_match); 106 ARRAY_SIZE(xt_tcpmss_match));
119 if (ret)
120 return ret;
121
122 ret = xt_register_match(&tcpmss6_match);
123 if (ret)
124 xt_unregister_match(&tcpmss_match);
125
126 return ret;
127} 107}
128 108
129static void __exit xt_tcpmss_fini(void) 109static void __exit xt_tcpmss_fini(void)
130{ 110{
131 xt_unregister_match(&tcpmss6_match); 111 xt_unregister_matches(xt_tcpmss_match, ARRAY_SIZE(xt_tcpmss_match));
132 xt_unregister_match(&tcpmss_match);
133} 112}
134 113
135module_init(xt_tcpmss_init); 114module_init(xt_tcpmss_init);
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index a9a63aa68936..e76a68e0bc66 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -141,7 +141,6 @@ tcp_checkentry(const char *tablename,
141 const void *info, 141 const void *info,
142 const struct xt_match *match, 142 const struct xt_match *match,
143 void *matchinfo, 143 void *matchinfo,
144 unsigned int matchsize,
145 unsigned int hook_mask) 144 unsigned int hook_mask)
146{ 145{
147 const struct xt_tcp *tcpinfo = matchinfo; 146 const struct xt_tcp *tcpinfo = matchinfo;
@@ -190,7 +189,6 @@ udp_checkentry(const char *tablename,
190 const void *info, 189 const void *info,
191 const struct xt_match *match, 190 const struct xt_match *match,
192 void *matchinfo, 191 void *matchinfo,
193 unsigned int matchsize,
194 unsigned int hook_mask) 192 unsigned int hook_mask)
195{ 193{
196 const struct xt_tcp *udpinfo = matchinfo; 194 const struct xt_tcp *udpinfo = matchinfo;
@@ -199,81 +197,54 @@ udp_checkentry(const char *tablename,
199 return !(udpinfo->invflags & ~XT_UDP_INV_MASK); 197 return !(udpinfo->invflags & ~XT_UDP_INV_MASK);
200} 198}
201 199
202static struct xt_match tcp_matchstruct = { 200static struct xt_match xt_tcpudp_match[] = {
203 .name = "tcp", 201 {
204 .match = tcp_match, 202 .name = "tcp",
205 .matchsize = sizeof(struct xt_tcp), 203 .family = AF_INET,
206 .proto = IPPROTO_TCP, 204 .checkentry = tcp_checkentry,
207 .family = AF_INET, 205 .match = tcp_match,
208 .checkentry = tcp_checkentry, 206 .matchsize = sizeof(struct xt_tcp),
209 .me = THIS_MODULE, 207 .proto = IPPROTO_TCP,
210}; 208 .me = THIS_MODULE,
211 209 },
212static struct xt_match tcp6_matchstruct = { 210 {
213 .name = "tcp", 211 .name = "tcp",
214 .match = tcp_match, 212 .family = AF_INET6,
215 .matchsize = sizeof(struct xt_tcp), 213 .checkentry = tcp_checkentry,
216 .proto = IPPROTO_TCP, 214 .match = tcp_match,
217 .family = AF_INET6, 215 .matchsize = sizeof(struct xt_tcp),
218 .checkentry = tcp_checkentry, 216 .proto = IPPROTO_TCP,
219 .me = THIS_MODULE, 217 .me = THIS_MODULE,
220}; 218 },
221 219 {
222static struct xt_match udp_matchstruct = { 220 .name = "udp",
223 .name = "udp", 221 .family = AF_INET,
224 .match = udp_match, 222 .checkentry = udp_checkentry,
225 .matchsize = sizeof(struct xt_udp), 223 .match = udp_match,
226 .proto = IPPROTO_UDP, 224 .matchsize = sizeof(struct xt_udp),
227 .family = AF_INET, 225 .proto = IPPROTO_UDP,
228 .checkentry = udp_checkentry, 226 .me = THIS_MODULE,
229 .me = THIS_MODULE, 227 },
230}; 228 {
231static struct xt_match udp6_matchstruct = { 229 .name = "udp",
232 .name = "udp", 230 .family = AF_INET6,
233 .match = udp_match, 231 .checkentry = udp_checkentry,
234 .matchsize = sizeof(struct xt_udp), 232 .match = udp_match,
235 .proto = IPPROTO_UDP, 233 .matchsize = sizeof(struct xt_udp),
236 .family = AF_INET6, 234 .proto = IPPROTO_UDP,
237 .checkentry = udp_checkentry, 235 .me = THIS_MODULE,
238 .me = THIS_MODULE, 236 },
239}; 237};
240 238
241static int __init xt_tcpudp_init(void) 239static int __init xt_tcpudp_init(void)
242{ 240{
243 int ret; 241 return xt_register_matches(xt_tcpudp_match,
244 ret = xt_register_match(&tcp_matchstruct); 242 ARRAY_SIZE(xt_tcpudp_match));
245 if (ret)
246 return ret;
247
248 ret = xt_register_match(&tcp6_matchstruct);
249 if (ret)
250 goto out_unreg_tcp;
251
252 ret = xt_register_match(&udp_matchstruct);
253 if (ret)
254 goto out_unreg_tcp6;
255
256 ret = xt_register_match(&udp6_matchstruct);
257 if (ret)
258 goto out_unreg_udp;
259
260 return ret;
261
262out_unreg_udp:
263 xt_unregister_match(&udp_matchstruct);
264out_unreg_tcp6:
265 xt_unregister_match(&tcp6_matchstruct);
266out_unreg_tcp:
267 xt_unregister_match(&tcp_matchstruct);
268 return ret;
269} 243}
270 244
271static void __exit xt_tcpudp_fini(void) 245static void __exit xt_tcpudp_fini(void)
272{ 246{
273 xt_unregister_match(&udp6_matchstruct); 247 xt_unregister_matches(xt_tcpudp_match, ARRAY_SIZE(xt_tcpudp_match));
274 xt_unregister_match(&udp_matchstruct);
275 xt_unregister_match(&tcp6_matchstruct);
276 xt_unregister_match(&tcp_matchstruct);
277} 248}
278 249
279module_init(xt_tcpudp_init); 250module_init(xt_tcpudp_init);
diff --git a/net/netlabel/Kconfig b/net/netlabel/Kconfig
new file mode 100644
index 000000000000..fe23cb7f1e87
--- /dev/null
+++ b/net/netlabel/Kconfig
@@ -0,0 +1,14 @@
1#
2# NetLabel configuration
3#
4
5config NETLABEL
6 bool "NetLabel subsystem support"
7 depends on NET && SECURITY
8 default n
9 ---help---
10 NetLabel provides support for explicit network packet labeling
11 protocols such as CIPSO and RIPSO. For more information see
12 Documentation/netlabel.
13
14 If you are unsure, say N.
diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile
new file mode 100644
index 000000000000..8af18c0a47d9
--- /dev/null
+++ b/net/netlabel/Makefile
@@ -0,0 +1,16 @@
1#
2# Makefile for the NetLabel subsystem.
3#
4# Feb 9, 2006, Paul Moore <paul.moore@hp.com>
5#
6
7# base objects
8obj-y := netlabel_user.o netlabel_kapi.o netlabel_domainhash.o
9
10# management objects
11obj-y += netlabel_mgmt.o
12
13# protocol modules
14obj-y += netlabel_unlabeled.o
15obj-y += netlabel_cipso_v4.o
16
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
new file mode 100644
index 000000000000..a4f40adc447b
--- /dev/null
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -0,0 +1,542 @@
1/*
2 * NetLabel CIPSO/IPv4 Support
3 *
4 * This file defines the CIPSO/IPv4 functions for the NetLabel system. The
5 * NetLabel system manages static and dynamic label mappings for network
6 * protocols such as CIPSO and RIPSO.
7 *
8 * Author: Paul Moore <paul.moore@hp.com>
9 *
10 */
11
12/*
13 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
14 *
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2 of the License, or
18 * (at your option) any later version.
19 *
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
23 * the GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 *
29 */
30
31#include <linux/types.h>
32#include <linux/socket.h>
33#include <linux/string.h>
34#include <linux/skbuff.h>
35#include <net/sock.h>
36#include <net/netlink.h>
37#include <net/genetlink.h>
38#include <net/netlabel.h>
39#include <net/cipso_ipv4.h>
40
41#include "netlabel_user.h"
42#include "netlabel_cipso_v4.h"
43
44/* NetLabel Generic NETLINK CIPSOv4 family */
45static struct genl_family netlbl_cipsov4_gnl_family = {
46 .id = GENL_ID_GENERATE,
47 .hdrsize = 0,
48 .name = NETLBL_NLTYPE_CIPSOV4_NAME,
49 .version = NETLBL_PROTO_VERSION,
50 .maxattr = 0,
51};
52
53
54/*
55 * Helper Functions
56 */
57
58/**
59 * netlbl_cipsov4_doi_free - Frees a CIPSO V4 DOI definition
60 * @entry: the entry's RCU field
61 *
62 * Description:
63 * This function is designed to be used as a callback to the call_rcu()
64 * function so that the memory allocated to the DOI definition can be released
65 * safely.
66 *
67 */
68static void netlbl_cipsov4_doi_free(struct rcu_head *entry)
69{
70 struct cipso_v4_doi *ptr;
71
72 ptr = container_of(entry, struct cipso_v4_doi, rcu);
73 switch (ptr->type) {
74 case CIPSO_V4_MAP_STD:
75 kfree(ptr->map.std->lvl.cipso);
76 kfree(ptr->map.std->lvl.local);
77 kfree(ptr->map.std->cat.cipso);
78 kfree(ptr->map.std->cat.local);
79 break;
80 }
81 kfree(ptr);
82}
83
84
85/*
86 * NetLabel Command Handlers
87 */
88
89/**
90 * netlbl_cipsov4_add_std - Adds a CIPSO V4 DOI definition
91 * @doi: the DOI value
92 * @msg: the ADD message data
93 * @msg_size: the size of the ADD message buffer
94 *
95 * Description:
96 * Create a new CIPSO_V4_MAP_STD DOI definition based on the given ADD message
97 * and add it to the CIPSO V4 engine. Return zero on success and non-zero on
98 * error.
99 *
100 */
101static int netlbl_cipsov4_add_std(u32 doi, struct nlattr *msg, size_t msg_size)
102{
103 int ret_val = -EINVAL;
104 int msg_len = msg_size;
105 u32 num_tags;
106 u32 num_lvls;
107 u32 num_cats;
108 struct cipso_v4_doi *doi_def = NULL;
109 u32 iter;
110 u32 tmp_val_a;
111 u32 tmp_val_b;
112
113 if (msg_len < NETLBL_LEN_U32)
114 goto add_std_failure;
115 num_tags = netlbl_getinc_u32(&msg, &msg_len);
116 if (num_tags == 0 || num_tags > CIPSO_V4_TAG_MAXCNT)
117 goto add_std_failure;
118
119 doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
120 if (doi_def == NULL) {
121 ret_val = -ENOMEM;
122 goto add_std_failure;
123 }
124 doi_def->map.std = kzalloc(sizeof(*doi_def->map.std), GFP_KERNEL);
125 if (doi_def->map.std == NULL) {
126 ret_val = -ENOMEM;
127 goto add_std_failure;
128 }
129 doi_def->type = CIPSO_V4_MAP_STD;
130
131 for (iter = 0; iter < num_tags; iter++) {
132 if (msg_len < NETLBL_LEN_U8)
133 goto add_std_failure;
134 doi_def->tags[iter] = netlbl_getinc_u8(&msg, &msg_len);
135 switch (doi_def->tags[iter]) {
136 case CIPSO_V4_TAG_RBITMAP:
137 break;
138 default:
139 goto add_std_failure;
140 }
141 }
142 if (iter < CIPSO_V4_TAG_MAXCNT)
143 doi_def->tags[iter] = CIPSO_V4_TAG_INVALID;
144
145 if (msg_len < 6 * NETLBL_LEN_U32)
146 goto add_std_failure;
147
148 num_lvls = netlbl_getinc_u32(&msg, &msg_len);
149 if (num_lvls == 0)
150 goto add_std_failure;
151 doi_def->map.std->lvl.local_size = netlbl_getinc_u32(&msg, &msg_len);
152 if (doi_def->map.std->lvl.local_size > CIPSO_V4_MAX_LOC_LVLS)
153 goto add_std_failure;
154 doi_def->map.std->lvl.local = kcalloc(doi_def->map.std->lvl.local_size,
155 sizeof(u32),
156 GFP_KERNEL);
157 if (doi_def->map.std->lvl.local == NULL) {
158 ret_val = -ENOMEM;
159 goto add_std_failure;
160 }
161 doi_def->map.std->lvl.cipso_size = netlbl_getinc_u8(&msg, &msg_len);
162 if (doi_def->map.std->lvl.cipso_size > CIPSO_V4_MAX_REM_LVLS)
163 goto add_std_failure;
164 doi_def->map.std->lvl.cipso = kcalloc(doi_def->map.std->lvl.cipso_size,
165 sizeof(u32),
166 GFP_KERNEL);
167 if (doi_def->map.std->lvl.cipso == NULL) {
168 ret_val = -ENOMEM;
169 goto add_std_failure;
170 }
171
172 num_cats = netlbl_getinc_u32(&msg, &msg_len);
173 doi_def->map.std->cat.local_size = netlbl_getinc_u32(&msg, &msg_len);
174 if (doi_def->map.std->cat.local_size > CIPSO_V4_MAX_LOC_CATS)
175 goto add_std_failure;
176 doi_def->map.std->cat.local = kcalloc(doi_def->map.std->cat.local_size,
177 sizeof(u32),
178 GFP_KERNEL);
179 if (doi_def->map.std->cat.local == NULL) {
180 ret_val = -ENOMEM;
181 goto add_std_failure;
182 }
183 doi_def->map.std->cat.cipso_size = netlbl_getinc_u16(&msg, &msg_len);
184 if (doi_def->map.std->cat.cipso_size > CIPSO_V4_MAX_REM_CATS)
185 goto add_std_failure;
186 doi_def->map.std->cat.cipso = kcalloc(doi_def->map.std->cat.cipso_size,
187 sizeof(u32),
188 GFP_KERNEL);
189 if (doi_def->map.std->cat.cipso == NULL) {
190 ret_val = -ENOMEM;
191 goto add_std_failure;
192 }
193
194 if (msg_len <
195 num_lvls * (NETLBL_LEN_U32 + NETLBL_LEN_U8) +
196 num_cats * (NETLBL_LEN_U32 + NETLBL_LEN_U16))
197 goto add_std_failure;
198
199 for (iter = 0; iter < doi_def->map.std->lvl.cipso_size; iter++)
200 doi_def->map.std->lvl.cipso[iter] = CIPSO_V4_INV_LVL;
201 for (iter = 0; iter < doi_def->map.std->lvl.local_size; iter++)
202 doi_def->map.std->lvl.local[iter] = CIPSO_V4_INV_LVL;
203 for (iter = 0; iter < doi_def->map.std->cat.cipso_size; iter++)
204 doi_def->map.std->cat.cipso[iter] = CIPSO_V4_INV_CAT;
205 for (iter = 0; iter < doi_def->map.std->cat.local_size; iter++)
206 doi_def->map.std->cat.local[iter] = CIPSO_V4_INV_CAT;
207
208 for (iter = 0; iter < num_lvls; iter++) {
209 tmp_val_a = netlbl_getinc_u32(&msg, &msg_len);
210 tmp_val_b = netlbl_getinc_u8(&msg, &msg_len);
211
212 if (tmp_val_a >= doi_def->map.std->lvl.local_size ||
213 tmp_val_b >= doi_def->map.std->lvl.cipso_size)
214 goto add_std_failure;
215
216 doi_def->map.std->lvl.cipso[tmp_val_b] = tmp_val_a;
217 doi_def->map.std->lvl.local[tmp_val_a] = tmp_val_b;
218 }
219
220 for (iter = 0; iter < num_cats; iter++) {
221 tmp_val_a = netlbl_getinc_u32(&msg, &msg_len);
222 tmp_val_b = netlbl_getinc_u16(&msg, &msg_len);
223
224 if (tmp_val_a >= doi_def->map.std->cat.local_size ||
225 tmp_val_b >= doi_def->map.std->cat.cipso_size)
226 goto add_std_failure;
227
228 doi_def->map.std->cat.cipso[tmp_val_b] = tmp_val_a;
229 doi_def->map.std->cat.local[tmp_val_a] = tmp_val_b;
230 }
231
232 doi_def->doi = doi;
233 ret_val = cipso_v4_doi_add(doi_def);
234 if (ret_val != 0)
235 goto add_std_failure;
236 return 0;
237
238add_std_failure:
239 if (doi_def)
240 netlbl_cipsov4_doi_free(&doi_def->rcu);
241 return ret_val;
242}
243
244/**
245 * netlbl_cipsov4_add_pass - Adds a CIPSO V4 DOI definition
246 * @doi: the DOI value
247 * @msg: the ADD message data
248 * @msg_size: the size of the ADD message buffer
249 *
250 * Description:
251 * Create a new CIPSO_V4_MAP_PASS DOI definition based on the given ADD message
252 * and add it to the CIPSO V4 engine. Return zero on success and non-zero on
253 * error.
254 *
255 */
256static int netlbl_cipsov4_add_pass(u32 doi,
257 struct nlattr *msg,
258 size_t msg_size)
259{
260 int ret_val = -EINVAL;
261 int msg_len = msg_size;
262 u32 num_tags;
263 struct cipso_v4_doi *doi_def = NULL;
264 u32 iter;
265
266 if (msg_len < NETLBL_LEN_U32)
267 goto add_pass_failure;
268 num_tags = netlbl_getinc_u32(&msg, &msg_len);
269 if (num_tags == 0 || num_tags > CIPSO_V4_TAG_MAXCNT)
270 goto add_pass_failure;
271
272 doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
273 if (doi_def == NULL) {
274 ret_val = -ENOMEM;
275 goto add_pass_failure;
276 }
277 doi_def->type = CIPSO_V4_MAP_PASS;
278
279 for (iter = 0; iter < num_tags; iter++) {
280 if (msg_len < NETLBL_LEN_U8)
281 goto add_pass_failure;
282 doi_def->tags[iter] = netlbl_getinc_u8(&msg, &msg_len);
283 switch (doi_def->tags[iter]) {
284 case CIPSO_V4_TAG_RBITMAP:
285 break;
286 default:
287 goto add_pass_failure;
288 }
289 }
290 if (iter < CIPSO_V4_TAG_MAXCNT)
291 doi_def->tags[iter] = CIPSO_V4_TAG_INVALID;
292
293 doi_def->doi = doi;
294 ret_val = cipso_v4_doi_add(doi_def);
295 if (ret_val != 0)
296 goto add_pass_failure;
297 return 0;
298
299add_pass_failure:
300 if (doi_def)
301 netlbl_cipsov4_doi_free(&doi_def->rcu);
302 return ret_val;
303}
304
305/**
306 * netlbl_cipsov4_add - Handle an ADD message
307 * @skb: the NETLINK buffer
308 * @info: the Generic NETLINK info block
309 *
310 * Description:
311 * Create a new DOI definition based on the given ADD message and add it to the
312 * CIPSO V4 engine. Returns zero on success, negative values on failure.
313 *
314 */
315static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info)
316
317{
318 int ret_val = -EINVAL;
319 u32 doi;
320 u32 map_type;
321 int msg_len = netlbl_netlink_payload_len(skb);
322 struct nlattr *msg = netlbl_netlink_payload_data(skb);
323
324 ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
325 if (ret_val != 0)
326 goto add_return;
327
328 if (msg_len < 2 * NETLBL_LEN_U32)
329 goto add_return;
330
331 doi = netlbl_getinc_u32(&msg, &msg_len);
332 map_type = netlbl_getinc_u32(&msg, &msg_len);
333 switch (map_type) {
334 case CIPSO_V4_MAP_STD:
335 ret_val = netlbl_cipsov4_add_std(doi, msg, msg_len);
336 break;
337 case CIPSO_V4_MAP_PASS:
338 ret_val = netlbl_cipsov4_add_pass(doi, msg, msg_len);
339 break;
340 }
341
342add_return:
343 netlbl_netlink_send_ack(info,
344 netlbl_cipsov4_gnl_family.id,
345 NLBL_CIPSOV4_C_ACK,
346 -ret_val);
347 return ret_val;
348}
349
350/**
351 * netlbl_cipsov4_list - Handle a LIST message
352 * @skb: the NETLINK buffer
353 * @info: the Generic NETLINK info block
354 *
355 * Description:
356 * Process a user generated LIST message and respond accordingly. Returns
357 * zero on success and negative values on error.
358 *
359 */
360static int netlbl_cipsov4_list(struct sk_buff *skb, struct genl_info *info)
361{
362 int ret_val = -EINVAL;
363 u32 doi;
364 struct nlattr *msg = netlbl_netlink_payload_data(skb);
365 struct sk_buff *ans_skb;
366
367 if (netlbl_netlink_payload_len(skb) != NETLBL_LEN_U32)
368 goto list_failure;
369
370 doi = nla_get_u32(msg);
371 ans_skb = cipso_v4_doi_dump(doi, NLMSG_SPACE(GENL_HDRLEN));
372 if (ans_skb == NULL) {
373 ret_val = -ENOMEM;
374 goto list_failure;
375 }
376 netlbl_netlink_hdr_push(ans_skb,
377 info->snd_pid,
378 0,
379 netlbl_cipsov4_gnl_family.id,
380 NLBL_CIPSOV4_C_LIST);
381
382 ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
383 if (ret_val != 0)
384 goto list_failure;
385
386 return 0;
387
388list_failure:
389 netlbl_netlink_send_ack(info,
390 netlbl_cipsov4_gnl_family.id,
391 NLBL_CIPSOV4_C_ACK,
392 -ret_val);
393 return ret_val;
394}
395
396/**
397 * netlbl_cipsov4_listall - Handle a LISTALL message
398 * @skb: the NETLINK buffer
399 * @info: the Generic NETLINK info block
400 *
401 * Description:
402 * Process a user generated LISTALL message and respond accordingly. Returns
403 * zero on success and negative values on error.
404 *
405 */
406static int netlbl_cipsov4_listall(struct sk_buff *skb, struct genl_info *info)
407{
408 int ret_val = -EINVAL;
409 struct sk_buff *ans_skb;
410
411 ans_skb = cipso_v4_doi_dump_all(NLMSG_SPACE(GENL_HDRLEN));
412 if (ans_skb == NULL) {
413 ret_val = -ENOMEM;
414 goto listall_failure;
415 }
416 netlbl_netlink_hdr_push(ans_skb,
417 info->snd_pid,
418 0,
419 netlbl_cipsov4_gnl_family.id,
420 NLBL_CIPSOV4_C_LISTALL);
421
422 ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
423 if (ret_val != 0)
424 goto listall_failure;
425
426 return 0;
427
428listall_failure:
429 netlbl_netlink_send_ack(info,
430 netlbl_cipsov4_gnl_family.id,
431 NLBL_CIPSOV4_C_ACK,
432 -ret_val);
433 return ret_val;
434}
435
436/**
437 * netlbl_cipsov4_remove - Handle a REMOVE message
438 * @skb: the NETLINK buffer
439 * @info: the Generic NETLINK info block
440 *
441 * Description:
442 * Process a user generated REMOVE message and respond accordingly. Returns
443 * zero on success, negative values on failure.
444 *
445 */
446static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
447{
448 int ret_val;
449 u32 doi;
450 struct nlattr *msg = netlbl_netlink_payload_data(skb);
451
452 ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
453 if (ret_val != 0)
454 goto remove_return;
455
456 if (netlbl_netlink_payload_len(skb) != NETLBL_LEN_U32) {
457 ret_val = -EINVAL;
458 goto remove_return;
459 }
460
461 doi = nla_get_u32(msg);
462 ret_val = cipso_v4_doi_remove(doi, netlbl_cipsov4_doi_free);
463
464remove_return:
465 netlbl_netlink_send_ack(info,
466 netlbl_cipsov4_gnl_family.id,
467 NLBL_CIPSOV4_C_ACK,
468 -ret_val);
469 return ret_val;
470}
471
472/*
473 * NetLabel Generic NETLINK Command Definitions
474 */
475
476static struct genl_ops netlbl_cipsov4_genl_c_add = {
477 .cmd = NLBL_CIPSOV4_C_ADD,
478 .flags = 0,
479 .doit = netlbl_cipsov4_add,
480 .dumpit = NULL,
481};
482
483static struct genl_ops netlbl_cipsov4_genl_c_remove = {
484 .cmd = NLBL_CIPSOV4_C_REMOVE,
485 .flags = 0,
486 .doit = netlbl_cipsov4_remove,
487 .dumpit = NULL,
488};
489
490static struct genl_ops netlbl_cipsov4_genl_c_list = {
491 .cmd = NLBL_CIPSOV4_C_LIST,
492 .flags = 0,
493 .doit = netlbl_cipsov4_list,
494 .dumpit = NULL,
495};
496
497static struct genl_ops netlbl_cipsov4_genl_c_listall = {
498 .cmd = NLBL_CIPSOV4_C_LISTALL,
499 .flags = 0,
500 .doit = netlbl_cipsov4_listall,
501 .dumpit = NULL,
502};
503
504/*
505 * NetLabel Generic NETLINK Protocol Functions
506 */
507
508/**
509 * netlbl_cipsov4_genl_init - Register the CIPSOv4 NetLabel component
510 *
511 * Description:
512 * Register the CIPSOv4 packet NetLabel component with the Generic NETLINK
513 * mechanism. Returns zero on success, negative values on failure.
514 *
515 */
516int netlbl_cipsov4_genl_init(void)
517{
518 int ret_val;
519
520 ret_val = genl_register_family(&netlbl_cipsov4_gnl_family);
521 if (ret_val != 0)
522 return ret_val;
523
524 ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family,
525 &netlbl_cipsov4_genl_c_add);
526 if (ret_val != 0)
527 return ret_val;
528 ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family,
529 &netlbl_cipsov4_genl_c_remove);
530 if (ret_val != 0)
531 return ret_val;
532 ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family,
533 &netlbl_cipsov4_genl_c_list);
534 if (ret_val != 0)
535 return ret_val;
536 ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family,
537 &netlbl_cipsov4_genl_c_listall);
538 if (ret_val != 0)
539 return ret_val;
540
541 return 0;
542}
diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h
new file mode 100644
index 000000000000..4c6ff4b93004
--- /dev/null
+++ b/net/netlabel/netlabel_cipso_v4.h
@@ -0,0 +1,217 @@
1/*
2 * NetLabel CIPSO/IPv4 Support
3 *
4 * This file defines the CIPSO/IPv4 functions for the NetLabel system. The
5 * NetLabel system manages static and dynamic label mappings for network
6 * protocols such as CIPSO and RIPSO.
7 *
8 * Author: Paul Moore <paul.moore@hp.com>
9 *
10 */
11
12/*
13 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
14 *
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2 of the License, or
18 * (at your option) any later version.
19 *
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
23 * the GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 *
29 */
30
31#ifndef _NETLABEL_CIPSO_V4
32#define _NETLABEL_CIPSO_V4
33
34#include <net/netlabel.h>
35
36/*
37 * The following NetLabel payloads are supported by the CIPSO subsystem, all
38 * of which are preceeded by the nlmsghdr struct.
39 *
40 * o ACK:
41 * Sent by the kernel in response to an applications message, applications
42 * should never send this message.
43 *
44 * +----------------------+-----------------------+
45 * | seq number (32 bits) | return code (32 bits) |
46 * +----------------------+-----------------------+
47 *
48 * seq number: the sequence number of the original message, taken from the
49 * nlmsghdr structure
50 * return code: return value, based on errno values
51 *
52 * o ADD:
53 * Sent by an application to add a new DOI mapping table, after completion
54 * of the task the kernel should ACK this message.
55 *
56 * +---------------+--------------------+---------------------+
57 * | DOI (32 bits) | map type (32 bits) | tag count (32 bits) | ...
58 * +---------------+--------------------+---------------------+
59 *
60 * +-----------------+
61 * | tag #X (8 bits) | ... repeated
62 * +-----------------+
63 *
64 * +-------------- ---- --- -- -
65 * | mapping data
66 * +-------------- ---- --- -- -
67 *
68 * DOI: the DOI value
69 * map type: the mapping table type (defined in the cipso_ipv4.h header
70 * as CIPSO_V4_MAP_*)
71 * tag count: the number of tags, must be greater than zero
72 * tag: the CIPSO tag for the DOI, tags listed first are given
73 * higher priorirty when sending packets
74 * mapping data: specific to the map type (see below)
75 *
76 * CIPSO_V4_MAP_STD
77 *
78 * +------------------+-----------------------+----------------------+
79 * | levels (32 bits) | max l level (32 bits) | max r level (8 bits) | ...
80 * +------------------+-----------------------+----------------------+
81 *
82 * +----------------------+---------------------+---------------------+
83 * | categories (32 bits) | max l cat (32 bits) | max r cat (16 bits) | ...
84 * +----------------------+---------------------+---------------------+
85 *
86 * +--------------------------+-------------------------+
87 * | local level #X (32 bits) | CIPSO level #X (8 bits) | ... repeated
88 * +--------------------------+-------------------------+
89 *
90 * +-----------------------------+-----------------------------+
91 * | local category #X (32 bits) | CIPSO category #X (16 bits) | ... repeated
92 * +-----------------------------+-----------------------------+
93 *
94 * levels: the number of level mappings
95 * max l level: the highest local level
96 * max r level: the highest remote/CIPSO level
97 * categories: the number of category mappings
98 * max l cat: the highest local category
99 * max r cat: the highest remote/CIPSO category
100 * local level: the local part of a level mapping
101 * CIPSO level: the remote/CIPSO part of a level mapping
102 * local category: the local part of a category mapping
103 * CIPSO category: the remote/CIPSO part of a category mapping
104 *
105 * CIPSO_V4_MAP_PASS
106 *
107 * No mapping data is needed for this map type.
108 *
109 * o REMOVE:
110 * Sent by an application to remove a specific DOI mapping table from the
111 * CIPSO V4 system. The kernel should ACK this message.
112 *
113 * +---------------+
114 * | DOI (32 bits) |
115 * +---------------+
116 *
117 * DOI: the DOI value
118 *
119 * o LIST:
120 * Sent by an application to list the details of a DOI definition. The
121 * kernel should send an ACK on error or a response as indicated below. The
122 * application generated message format is shown below.
123 *
124 * +---------------+
125 * | DOI (32 bits) |
126 * +---------------+
127 *
128 * DOI: the DOI value
129 *
130 * The valid response message format depends on the type of the DOI mapping,
131 * the known formats are shown below.
132 *
133 * +--------------------+
134 * | map type (32 bits) | ...
135 * +--------------------+
136 *
137 * map type: the DOI mapping table type (defined in the cipso_ipv4.h
138 * header as CIPSO_V4_MAP_*)
139 *
140 * (map type == CIPSO_V4_MAP_STD)
141 *
142 * +----------------+------------------+----------------------+
143 * | tags (32 bits) | levels (32 bits) | categories (32 bits) | ...
144 * +----------------+------------------+----------------------+
145 *
146 * +-----------------+
147 * | tag #X (8 bits) | ... repeated
148 * +-----------------+
149 *
150 * +--------------------------+-------------------------+
151 * | local level #X (32 bits) | CIPSO level #X (8 bits) | ... repeated
152 * +--------------------------+-------------------------+
153 *
154 * +-----------------------------+-----------------------------+
155 * | local category #X (32 bits) | CIPSO category #X (16 bits) | ... repeated
156 * +-----------------------------+-----------------------------+
157 *
158 * tags: the number of CIPSO tag types
159 * levels: the number of level mappings
160 * categories: the number of category mappings
161 * tag: the tag number, tags listed first are given higher
162 * priority when sending packets
163 * local level: the local part of a level mapping
164 * CIPSO level: the remote/CIPSO part of a level mapping
165 * local category: the local part of a category mapping
166 * CIPSO category: the remote/CIPSO part of a category mapping
167 *
168 * (map type == CIPSO_V4_MAP_PASS)
169 *
170 * +----------------+
171 * | tags (32 bits) | ...
172 * +----------------+
173 *
174 * +-----------------+
175 * | tag #X (8 bits) | ... repeated
176 * +-----------------+
177 *
178 * tags: the number of CIPSO tag types
179 * tag: the tag number, tags listed first are given higher
180 * priority when sending packets
181 *
182 * o LISTALL:
183 * This message is sent by an application to list the valid DOIs on the
184 * system. There is no payload and the kernel should respond with an ACK
185 * or the following message.
186 *
187 * +---------------------+------------------+-----------------------+
188 * | DOI count (32 bits) | DOI #X (32 bits) | map type #X (32 bits) |
189 * +---------------------+------------------+-----------------------+
190 *
191 * +-----------------------+
192 * | map type #X (32 bits) | ...
193 * +-----------------------+
194 *
195 * DOI count: the number of DOIs
196 * DOI: the DOI value
197 * map type: the DOI mapping table type (defined in the cipso_ipv4.h
198 * header as CIPSO_V4_MAP_*)
199 *
200 */
201
202/* NetLabel CIPSOv4 commands */
203enum {
204 NLBL_CIPSOV4_C_UNSPEC,
205 NLBL_CIPSOV4_C_ACK,
206 NLBL_CIPSOV4_C_ADD,
207 NLBL_CIPSOV4_C_REMOVE,
208 NLBL_CIPSOV4_C_LIST,
209 NLBL_CIPSOV4_C_LISTALL,
210 __NLBL_CIPSOV4_C_MAX,
211};
212#define NLBL_CIPSOV4_C_MAX (__NLBL_CIPSOV4_C_MAX - 1)
213
214/* NetLabel protocol functions */
215int netlbl_cipsov4_genl_init(void);
216
217#endif
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
new file mode 100644
index 000000000000..0489a1378101
--- /dev/null
+++ b/net/netlabel/netlabel_domainhash.c
@@ -0,0 +1,513 @@
1/*
2 * NetLabel Domain Hash Table
3 *
4 * This file manages the domain hash table that NetLabel uses to determine
5 * which network labeling protocol to use for a given domain. The NetLabel
6 * system manages static and dynamic label mappings for network protocols such
7 * as CIPSO and RIPSO.
8 *
9 * Author: Paul Moore <paul.moore@hp.com>
10 *
11 */
12
13/*
14 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
15 *
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by
18 * the Free Software Foundation; either version 2 of the License, or
19 * (at your option) any later version.
20 *
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
24 * the GNU General Public License for more details.
25 *
26 * You should have received a copy of the GNU General Public License
27 * along with this program; if not, write to the Free Software
28 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 *
30 */
31
32#include <linux/types.h>
33#include <linux/rcupdate.h>
34#include <linux/list.h>
35#include <linux/skbuff.h>
36#include <linux/spinlock.h>
37#include <linux/string.h>
38#include <net/netlabel.h>
39#include <net/cipso_ipv4.h>
40#include <asm/bug.h>
41
42#include "netlabel_mgmt.h"
43#include "netlabel_domainhash.h"
44
45struct netlbl_domhsh_tbl {
46 struct list_head *tbl;
47 u32 size;
48};
49
50/* Domain hash table */
51/* XXX - updates should be so rare that having one spinlock for the entire
52 * hash table should be okay */
53static DEFINE_SPINLOCK(netlbl_domhsh_lock);
54static struct netlbl_domhsh_tbl *netlbl_domhsh = NULL;
55
56/* Default domain mapping */
57static DEFINE_SPINLOCK(netlbl_domhsh_def_lock);
58static struct netlbl_dom_map *netlbl_domhsh_def = NULL;
59
60/*
61 * Domain Hash Table Helper Functions
62 */
63
64/**
65 * netlbl_domhsh_free_entry - Frees a domain hash table entry
66 * @entry: the entry's RCU field
67 *
68 * Description:
69 * This function is designed to be used as a callback to the call_rcu()
70 * function so that the memory allocated to a hash table entry can be released
71 * safely.
72 *
73 */
74static void netlbl_domhsh_free_entry(struct rcu_head *entry)
75{
76 struct netlbl_dom_map *ptr;
77
78 ptr = container_of(entry, struct netlbl_dom_map, rcu);
79 kfree(ptr->domain);
80 kfree(ptr);
81}
82
83/**
84 * netlbl_domhsh_hash - Hashing function for the domain hash table
85 * @domain: the domain name to hash
86 *
87 * Description:
88 * This is the hashing function for the domain hash table, it returns the
89 * correct bucket number for the domain. The caller is responsibile for
90 * calling the rcu_read_[un]lock() functions.
91 *
92 */
93static u32 netlbl_domhsh_hash(const char *key)
94{
95 u32 iter;
96 u32 val;
97 u32 len;
98
99 /* This is taken (with slight modification) from
100 * security/selinux/ss/symtab.c:symhash() */
101
102 for (iter = 0, val = 0, len = strlen(key); iter < len; iter++)
103 val = (val << 4 | (val >> (8 * sizeof(u32) - 4))) ^ key[iter];
104 return val & (rcu_dereference(netlbl_domhsh)->size - 1);
105}
106
107/**
108 * netlbl_domhsh_search - Search for a domain entry
109 * @domain: the domain
110 * @def: return default if no match is found
111 *
112 * Description:
113 * Searches the domain hash table and returns a pointer to the hash table
114 * entry if found, otherwise NULL is returned. If @def is non-zero and a
115 * match is not found in the domain hash table the default mapping is returned
116 * if it exists. The caller is responsibile for the rcu hash table locks
117 * (i.e. the caller much call rcu_read_[un]lock()).
118 *
119 */
120static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain, u32 def)
121{
122 u32 bkt;
123 struct netlbl_dom_map *iter;
124
125 if (domain != NULL) {
126 bkt = netlbl_domhsh_hash(domain);
127 list_for_each_entry_rcu(iter, &netlbl_domhsh->tbl[bkt], list)
128 if (iter->valid && strcmp(iter->domain, domain) == 0)
129 return iter;
130 }
131
132 if (def != 0) {
133 iter = rcu_dereference(netlbl_domhsh_def);
134 if (iter != NULL && iter->valid)
135 return iter;
136 }
137
138 return NULL;
139}
140
141/*
142 * Domain Hash Table Functions
143 */
144
145/**
146 * netlbl_domhsh_init - Init for the domain hash
147 * @size: the number of bits to use for the hash buckets
148 *
149 * Description:
150 * Initializes the domain hash table, should be called only by
151 * netlbl_user_init() during initialization. Returns zero on success, non-zero
152 * values on error.
153 *
154 */
155int netlbl_domhsh_init(u32 size)
156{
157 u32 iter;
158 struct netlbl_domhsh_tbl *hsh_tbl;
159
160 if (size == 0)
161 return -EINVAL;
162
163 hsh_tbl = kmalloc(sizeof(*hsh_tbl), GFP_KERNEL);
164 if (hsh_tbl == NULL)
165 return -ENOMEM;
166 hsh_tbl->size = 1 << size;
167 hsh_tbl->tbl = kcalloc(hsh_tbl->size,
168 sizeof(struct list_head),
169 GFP_KERNEL);
170 if (hsh_tbl->tbl == NULL) {
171 kfree(hsh_tbl);
172 return -ENOMEM;
173 }
174 for (iter = 0; iter < hsh_tbl->size; iter++)
175 INIT_LIST_HEAD(&hsh_tbl->tbl[iter]);
176
177 rcu_read_lock();
178 spin_lock(&netlbl_domhsh_lock);
179 rcu_assign_pointer(netlbl_domhsh, hsh_tbl);
180 spin_unlock(&netlbl_domhsh_lock);
181 rcu_read_unlock();
182
183 return 0;
184}
185
186/**
187 * netlbl_domhsh_add - Adds a entry to the domain hash table
188 * @entry: the entry to add
189 *
190 * Description:
191 * Adds a new entry to the domain hash table and handles any updates to the
192 * lower level protocol handler (i.e. CIPSO). Returns zero on success,
193 * negative on failure.
194 *
195 */
196int netlbl_domhsh_add(struct netlbl_dom_map *entry)
197{
198 int ret_val;
199 u32 bkt;
200
201 switch (entry->type) {
202 case NETLBL_NLTYPE_UNLABELED:
203 ret_val = 0;
204 break;
205 case NETLBL_NLTYPE_CIPSOV4:
206 ret_val = cipso_v4_doi_domhsh_add(entry->type_def.cipsov4,
207 entry->domain);
208 break;
209 default:
210 return -EINVAL;
211 }
212 if (ret_val != 0)
213 return ret_val;
214
215 entry->valid = 1;
216 INIT_RCU_HEAD(&entry->rcu);
217
218 ret_val = 0;
219 rcu_read_lock();
220 if (entry->domain != NULL) {
221 bkt = netlbl_domhsh_hash(entry->domain);
222 spin_lock(&netlbl_domhsh_lock);
223 if (netlbl_domhsh_search(entry->domain, 0) == NULL)
224 list_add_tail_rcu(&entry->list,
225 &netlbl_domhsh->tbl[bkt]);
226 else
227 ret_val = -EEXIST;
228 spin_unlock(&netlbl_domhsh_lock);
229 } else if (entry->domain == NULL) {
230 INIT_LIST_HEAD(&entry->list);
231 spin_lock(&netlbl_domhsh_def_lock);
232 if (rcu_dereference(netlbl_domhsh_def) == NULL)
233 rcu_assign_pointer(netlbl_domhsh_def, entry);
234 else
235 ret_val = -EEXIST;
236 spin_unlock(&netlbl_domhsh_def_lock);
237 } else
238 ret_val = -EINVAL;
239 rcu_read_unlock();
240
241 if (ret_val != 0) {
242 switch (entry->type) {
243 case NETLBL_NLTYPE_CIPSOV4:
244 if (cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4,
245 entry->domain) != 0)
246 BUG();
247 break;
248 }
249 }
250
251 return ret_val;
252}
253
254/**
255 * netlbl_domhsh_add_default - Adds the default entry to the domain hash table
256 * @entry: the entry to add
257 *
258 * Description:
259 * Adds a new default entry to the domain hash table and handles any updates
260 * to the lower level protocol handler (i.e. CIPSO). Returns zero on success,
261 * negative on failure.
262 *
263 */
264int netlbl_domhsh_add_default(struct netlbl_dom_map *entry)
265{
266 return netlbl_domhsh_add(entry);
267}
268
269/**
270 * netlbl_domhsh_remove - Removes an entry from the domain hash table
271 * @domain: the domain to remove
272 *
273 * Description:
274 * Removes an entry from the domain hash table and handles any updates to the
275 * lower level protocol handler (i.e. CIPSO). Returns zero on success,
276 * negative on failure.
277 *
278 */
279int netlbl_domhsh_remove(const char *domain)
280{
281 int ret_val = -ENOENT;
282 struct netlbl_dom_map *entry;
283
284 rcu_read_lock();
285 if (domain != NULL)
286 entry = netlbl_domhsh_search(domain, 0);
287 else
288 entry = netlbl_domhsh_search(domain, 1);
289 if (entry == NULL)
290 goto remove_return;
291 switch (entry->type) {
292 case NETLBL_NLTYPE_UNLABELED:
293 break;
294 case NETLBL_NLTYPE_CIPSOV4:
295 ret_val = cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4,
296 entry->domain);
297 if (ret_val != 0)
298 goto remove_return;
299 break;
300 }
301 ret_val = 0;
302 if (entry != rcu_dereference(netlbl_domhsh_def)) {
303 spin_lock(&netlbl_domhsh_lock);
304 if (entry->valid) {
305 entry->valid = 0;
306 list_del_rcu(&entry->list);
307 } else
308 ret_val = -ENOENT;
309 spin_unlock(&netlbl_domhsh_lock);
310 } else {
311 spin_lock(&netlbl_domhsh_def_lock);
312 if (entry->valid) {
313 entry->valid = 0;
314 rcu_assign_pointer(netlbl_domhsh_def, NULL);
315 } else
316 ret_val = -ENOENT;
317 spin_unlock(&netlbl_domhsh_def_lock);
318 }
319 if (ret_val == 0)
320 call_rcu(&entry->rcu, netlbl_domhsh_free_entry);
321
322remove_return:
323 rcu_read_unlock();
324 return ret_val;
325}
326
327/**
328 * netlbl_domhsh_remove_default - Removes the default entry from the table
329 *
330 * Description:
331 * Removes/resets the default entry for the domain hash table and handles any
332 * updates to the lower level protocol handler (i.e. CIPSO). Returns zero on
333 * success, non-zero on failure.
334 *
335 */
336int netlbl_domhsh_remove_default(void)
337{
338 return netlbl_domhsh_remove(NULL);
339}
340
341/**
342 * netlbl_domhsh_getentry - Get an entry from the domain hash table
343 * @domain: the domain name to search for
344 *
345 * Description:
346 * Look through the domain hash table searching for an entry to match @domain,
347 * return a pointer to a copy of the entry or NULL. The caller is responsibile
348 * for ensuring that rcu_read_[un]lock() is called.
349 *
350 */
351struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain)
352{
353 return netlbl_domhsh_search(domain, 1);
354}
355
356/**
357 * netlbl_domhsh_dump - Dump the domain hash table into a sk_buff
358 *
359 * Description:
360 * Dump the domain hash table into a buffer suitable for returning to an
361 * application in response to a NetLabel management DOMAIN message. This
362 * function may fail if another process is growing the hash table at the same
363 * time. The returned sk_buff has room at the front of the sk_buff for
364 * @headroom bytes. See netlabel.h for the DOMAIN message format. Returns a
365 * pointer to a sk_buff on success, NULL on error.
366 *
367 */
368struct sk_buff *netlbl_domhsh_dump(size_t headroom)
369{
370 struct sk_buff *skb = NULL;
371 ssize_t buf_len;
372 u32 bkt_iter;
373 u32 dom_cnt = 0;
374 struct netlbl_domhsh_tbl *hsh_tbl;
375 struct netlbl_dom_map *list_iter;
376 ssize_t tmp_len;
377
378 buf_len = NETLBL_LEN_U32;
379 rcu_read_lock();
380 hsh_tbl = rcu_dereference(netlbl_domhsh);
381 for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++)
382 list_for_each_entry_rcu(list_iter,
383 &hsh_tbl->tbl[bkt_iter], list) {
384 buf_len += NETLBL_LEN_U32 +
385 nla_total_size(strlen(list_iter->domain) + 1);
386 switch (list_iter->type) {
387 case NETLBL_NLTYPE_UNLABELED:
388 break;
389 case NETLBL_NLTYPE_CIPSOV4:
390 buf_len += 2 * NETLBL_LEN_U32;
391 break;
392 }
393 dom_cnt++;
394 }
395
396 skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
397 if (skb == NULL)
398 goto dump_failure;
399
400 if (nla_put_u32(skb, NLA_U32, dom_cnt) != 0)
401 goto dump_failure;
402 buf_len -= NETLBL_LEN_U32;
403 hsh_tbl = rcu_dereference(netlbl_domhsh);
404 for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++)
405 list_for_each_entry_rcu(list_iter,
406 &hsh_tbl->tbl[bkt_iter], list) {
407 tmp_len = nla_total_size(strlen(list_iter->domain) +
408 1);
409 if (buf_len < NETLBL_LEN_U32 + tmp_len)
410 goto dump_failure;
411 if (nla_put_string(skb,
412 NLA_STRING,
413 list_iter->domain) != 0)
414 goto dump_failure;
415 if (nla_put_u32(skb, NLA_U32, list_iter->type) != 0)
416 goto dump_failure;
417 buf_len -= NETLBL_LEN_U32 + tmp_len;
418 switch (list_iter->type) {
419 case NETLBL_NLTYPE_UNLABELED:
420 break;
421 case NETLBL_NLTYPE_CIPSOV4:
422 if (buf_len < 2 * NETLBL_LEN_U32)
423 goto dump_failure;
424 if (nla_put_u32(skb,
425 NLA_U32,
426 list_iter->type_def.cipsov4->type) != 0)
427 goto dump_failure;
428 if (nla_put_u32(skb,
429 NLA_U32,
430 list_iter->type_def.cipsov4->doi) != 0)
431 goto dump_failure;
432 buf_len -= 2 * NETLBL_LEN_U32;
433 break;
434 }
435 }
436 rcu_read_unlock();
437
438 return skb;
439
440dump_failure:
441 rcu_read_unlock();
442 kfree_skb(skb);
443 return NULL;
444}
445
446/**
447 * netlbl_domhsh_dump_default - Dump the default domain mapping into a sk_buff
448 *
449 * Description:
450 * Dump the default domain mapping into a buffer suitable for returning to an
451 * application in response to a NetLabel management DEFDOMAIN message. This
452 * function may fail if another process is changing the default domain mapping
453 * at the same time. The returned sk_buff has room at the front of the
454 * skb_buff for @headroom bytes. See netlabel.h for the DEFDOMAIN message
455 * format. Returns a pointer to a sk_buff on success, NULL on error.
456 *
457 */
458struct sk_buff *netlbl_domhsh_dump_default(size_t headroom)
459{
460 struct sk_buff *skb;
461 ssize_t buf_len;
462 struct netlbl_dom_map *entry;
463
464 buf_len = NETLBL_LEN_U32;
465 rcu_read_lock();
466 entry = rcu_dereference(netlbl_domhsh_def);
467 if (entry != NULL)
468 switch (entry->type) {
469 case NETLBL_NLTYPE_UNLABELED:
470 break;
471 case NETLBL_NLTYPE_CIPSOV4:
472 buf_len += 2 * NETLBL_LEN_U32;
473 break;
474 }
475
476 skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
477 if (skb == NULL)
478 goto dump_default_failure;
479
480 if (entry != rcu_dereference(netlbl_domhsh_def))
481 goto dump_default_failure;
482 if (entry != NULL) {
483 if (nla_put_u32(skb, NLA_U32, entry->type) != 0)
484 goto dump_default_failure;
485 buf_len -= NETLBL_LEN_U32;
486 switch (entry->type) {
487 case NETLBL_NLTYPE_UNLABELED:
488 break;
489 case NETLBL_NLTYPE_CIPSOV4:
490 if (buf_len < 2 * NETLBL_LEN_U32)
491 goto dump_default_failure;
492 if (nla_put_u32(skb,
493 NLA_U32,
494 entry->type_def.cipsov4->type) != 0)
495 goto dump_default_failure;
496 if (nla_put_u32(skb,
497 NLA_U32,
498 entry->type_def.cipsov4->doi) != 0)
499 goto dump_default_failure;
500 buf_len -= 2 * NETLBL_LEN_U32;
501 break;
502 }
503 } else
504 nla_put_u32(skb, NLA_U32, NETLBL_NLTYPE_NONE);
505 rcu_read_unlock();
506
507 return skb;
508
509dump_default_failure:
510 rcu_read_unlock();
511 kfree_skb(skb);
512 return NULL;
513}
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
new file mode 100644
index 000000000000..99a2287de246
--- /dev/null
+++ b/net/netlabel/netlabel_domainhash.h
@@ -0,0 +1,67 @@
1/*
2 * NetLabel Domain Hash Table
3 *
4 * This file manages the domain hash table that NetLabel uses to determine
5 * which network labeling protocol to use for a given domain. The NetLabel
6 * system manages static and dynamic label mappings for network protocols such
7 * as CIPSO and RIPSO.
8 *
9 * Author: Paul Moore <paul.moore@hp.com>
10 *
11 */
12
13/*
14 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
15 *
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by
18 * the Free Software Foundation; either version 2 of the License, or
19 * (at your option) any later version.
20 *
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
24 * the GNU General Public License for more details.
25 *
26 * You should have received a copy of the GNU General Public License
27 * along with this program; if not, write to the Free Software
28 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 *
30 */
31
32#ifndef _NETLABEL_DOMAINHASH_H
33#define _NETLABEL_DOMAINHASH_H
34
35#include <linux/types.h>
36#include <linux/rcupdate.h>
37#include <linux/list.h>
38
39/* Domain hash table size */
40/* XXX - currently this number is an uneducated guess */
41#define NETLBL_DOMHSH_BITSIZE 7
42
43/* Domain mapping definition struct */
44struct netlbl_dom_map {
45 char *domain;
46 u32 type;
47 union {
48 struct cipso_v4_doi *cipsov4;
49 } type_def;
50
51 u32 valid;
52 struct list_head list;
53 struct rcu_head rcu;
54};
55
56/* init function */
57int netlbl_domhsh_init(u32 size);
58
59/* Manipulate the domain hash table */
60int netlbl_domhsh_add(struct netlbl_dom_map *entry);
61int netlbl_domhsh_add_default(struct netlbl_dom_map *entry);
62int netlbl_domhsh_remove_default(void);
63struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain);
64struct sk_buff *netlbl_domhsh_dump(size_t headroom);
65struct sk_buff *netlbl_domhsh_dump_default(size_t headroom);
66
67#endif
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
new file mode 100644
index 000000000000..0fd8aaafe23f
--- /dev/null
+++ b/net/netlabel/netlabel_kapi.c
@@ -0,0 +1,231 @@
1/*
2 * NetLabel Kernel API
3 *
4 * This file defines the kernel API for the NetLabel system. The NetLabel
5 * system manages static and dynamic label mappings for network protocols such
6 * as CIPSO and RIPSO.
7 *
8 * Author: Paul Moore <paul.moore@hp.com>
9 *
10 */
11
12/*
13 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
14 *
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2 of the License, or
18 * (at your option) any later version.
19 *
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
23 * the GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 *
29 */
30
31#include <linux/init.h>
32#include <linux/types.h>
33#include <net/ip.h>
34#include <net/netlabel.h>
35#include <net/cipso_ipv4.h>
36#include <asm/bug.h>
37
38#include "netlabel_domainhash.h"
39#include "netlabel_unlabeled.h"
40#include "netlabel_user.h"
41
42/*
43 * LSM Functions
44 */
45
46/**
47 * netlbl_socket_setattr - Label a socket using the correct protocol
48 * @sock: the socket to label
49 * @secattr: the security attributes
50 *
51 * Description:
52 * Attach the correct label to the given socket using the security attributes
53 * specified in @secattr. This function requires exclusive access to
54 * @sock->sk, which means it either needs to be in the process of being
55 * created or locked via lock_sock(sock->sk). Returns zero on success,
56 * negative values on failure.
57 *
58 */
59int netlbl_socket_setattr(const struct socket *sock,
60 const struct netlbl_lsm_secattr *secattr)
61{
62 int ret_val = -ENOENT;
63 struct netlbl_dom_map *dom_entry;
64
65 rcu_read_lock();
66 dom_entry = netlbl_domhsh_getentry(secattr->domain);
67 if (dom_entry == NULL)
68 goto socket_setattr_return;
69 switch (dom_entry->type) {
70 case NETLBL_NLTYPE_CIPSOV4:
71 ret_val = cipso_v4_socket_setattr(sock,
72 dom_entry->type_def.cipsov4,
73 secattr);
74 break;
75 case NETLBL_NLTYPE_UNLABELED:
76 ret_val = 0;
77 break;
78 default:
79 ret_val = -ENOENT;
80 }
81
82socket_setattr_return:
83 rcu_read_unlock();
84 return ret_val;
85}
86
87/**
88 * netlbl_socket_getattr - Determine the security attributes of a socket
89 * @sock: the socket
90 * @secattr: the security attributes
91 *
92 * Description:
93 * Examines the given socket to see any NetLabel style labeling has been
94 * applied to the socket, if so it parses the socket label and returns the
95 * security attributes in @secattr. Returns zero on success, negative values
96 * on failure.
97 *
98 */
99int netlbl_socket_getattr(const struct socket *sock,
100 struct netlbl_lsm_secattr *secattr)
101{
102 int ret_val;
103
104 ret_val = cipso_v4_socket_getattr(sock, secattr);
105 if (ret_val == 0)
106 return 0;
107
108 return netlbl_unlabel_getattr(secattr);
109}
110
111/**
112 * netlbl_skbuff_getattr - Determine the security attributes of a packet
113 * @skb: the packet
114 * @secattr: the security attributes
115 *
116 * Description:
117 * Examines the given packet to see if a recognized form of packet labeling
118 * is present, if so it parses the packet label and returns the security
119 * attributes in @secattr. Returns zero on success, negative values on
120 * failure.
121 *
122 */
123int netlbl_skbuff_getattr(const struct sk_buff *skb,
124 struct netlbl_lsm_secattr *secattr)
125{
126 int ret_val;
127
128 ret_val = cipso_v4_skbuff_getattr(skb, secattr);
129 if (ret_val == 0)
130 return 0;
131
132 return netlbl_unlabel_getattr(secattr);
133}
134
135/**
136 * netlbl_skbuff_err - Handle a LSM error on a sk_buff
137 * @skb: the packet
138 * @error: the error code
139 *
140 * Description:
141 * Deal with a LSM problem when handling the packet in @skb, typically this is
142 * a permission denied problem (-EACCES). The correct action is determined
143 * according to the packet's labeling protocol.
144 *
145 */
146void netlbl_skbuff_err(struct sk_buff *skb, int error)
147{
148 if (CIPSO_V4_OPTEXIST(skb))
149 cipso_v4_error(skb, error, 0);
150}
151
152/**
153 * netlbl_cache_invalidate - Invalidate all of the NetLabel protocol caches
154 *
155 * Description:
156 * For all of the NetLabel protocols that support some form of label mapping
157 * cache, invalidate the cache. Returns zero on success, negative values on
158 * error.
159 *
160 */
161void netlbl_cache_invalidate(void)
162{
163 cipso_v4_cache_invalidate();
164}
165
166/**
167 * netlbl_cache_add - Add an entry to a NetLabel protocol cache
168 * @skb: the packet
169 * @secattr: the packet's security attributes
170 *
171 * Description:
172 * Add the LSM security attributes for the given packet to the underlying
173 * NetLabel protocol's label mapping cache. Returns zero on success, negative
174 * values on error.
175 *
176 */
177int netlbl_cache_add(const struct sk_buff *skb,
178 const struct netlbl_lsm_secattr *secattr)
179{
180 if (secattr->cache.data == NULL)
181 return -ENOMSG;
182
183 if (CIPSO_V4_OPTEXIST(skb))
184 return cipso_v4_cache_add(skb, secattr);
185
186 return -ENOMSG;
187}
188
189/*
190 * Setup Functions
191 */
192
193/**
194 * netlbl_init - Initialize NetLabel
195 *
196 * Description:
197 * Perform the required NetLabel initialization before first use.
198 *
199 */
200static int __init netlbl_init(void)
201{
202 int ret_val;
203
204 printk(KERN_INFO "NetLabel: Initializing\n");
205 printk(KERN_INFO "NetLabel: domain hash size = %u\n",
206 (1 << NETLBL_DOMHSH_BITSIZE));
207 printk(KERN_INFO "NetLabel: protocols ="
208 " UNLABELED"
209 " CIPSOv4"
210 "\n");
211
212 ret_val = netlbl_domhsh_init(NETLBL_DOMHSH_BITSIZE);
213 if (ret_val != 0)
214 goto init_failure;
215
216 ret_val = netlbl_netlink_init();
217 if (ret_val != 0)
218 goto init_failure;
219
220 ret_val = netlbl_unlabel_defconf();
221 if (ret_val != 0)
222 goto init_failure;
223 printk(KERN_INFO "NetLabel: unlabeled traffic allowed by default\n");
224
225 return 0;
226
227init_failure:
228 panic("NetLabel: failed to initialize properly (%d)\n", ret_val);
229}
230
231subsys_initcall(netlbl_init);
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
new file mode 100644
index 000000000000..85bc11a1fc46
--- /dev/null
+++ b/net/netlabel/netlabel_mgmt.c
@@ -0,0 +1,624 @@
1/*
2 * NetLabel Management Support
3 *
4 * This file defines the management functions for the NetLabel system. The
5 * NetLabel system manages static and dynamic label mappings for network
6 * protocols such as CIPSO and RIPSO.
7 *
8 * Author: Paul Moore <paul.moore@hp.com>
9 *
10 */
11
12/*
13 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
14 *
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2 of the License, or
18 * (at your option) any later version.
19 *
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
23 * the GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 *
29 */
30
31#include <linux/types.h>
32#include <linux/socket.h>
33#include <linux/string.h>
34#include <linux/skbuff.h>
35#include <net/sock.h>
36#include <net/netlink.h>
37#include <net/genetlink.h>
38#include <net/netlabel.h>
39#include <net/cipso_ipv4.h>
40
41#include "netlabel_domainhash.h"
42#include "netlabel_user.h"
43#include "netlabel_mgmt.h"
44
45/* NetLabel Generic NETLINK CIPSOv4 family */
46static struct genl_family netlbl_mgmt_gnl_family = {
47 .id = GENL_ID_GENERATE,
48 .hdrsize = 0,
49 .name = NETLBL_NLTYPE_MGMT_NAME,
50 .version = NETLBL_PROTO_VERSION,
51 .maxattr = 0,
52};
53
54
55/*
56 * NetLabel Command Handlers
57 */
58
59/**
60 * netlbl_mgmt_add - Handle an ADD message
61 * @skb: the NETLINK buffer
62 * @info: the Generic NETLINK info block
63 *
64 * Description:
65 * Process a user generated ADD message and add the domains from the message
66 * to the hash table. See netlabel.h for a description of the message format.
67 * Returns zero on success, negative values on failure.
68 *
69 */
70static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
71{
72 int ret_val = -EINVAL;
73 struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb);
74 int msg_len = netlbl_netlink_payload_len(skb);
75 u32 count;
76 struct netlbl_dom_map *entry = NULL;
77 u32 iter;
78 u32 tmp_val;
79 int tmp_size;
80
81 ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
82 if (ret_val != 0)
83 goto add_failure;
84
85 if (msg_len < NETLBL_LEN_U32)
86 goto add_failure;
87 count = netlbl_getinc_u32(&msg_ptr, &msg_len);
88
89 for (iter = 0; iter < count && msg_len > 0; iter++, entry = NULL) {
90 if (msg_len <= 0) {
91 ret_val = -EINVAL;
92 goto add_failure;
93 }
94 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
95 if (entry == NULL) {
96 ret_val = -ENOMEM;
97 goto add_failure;
98 }
99 tmp_size = nla_len(msg_ptr);
100 if (tmp_size <= 0 || tmp_size > msg_len) {
101 ret_val = -EINVAL;
102 goto add_failure;
103 }
104 entry->domain = kmalloc(tmp_size, GFP_KERNEL);
105 if (entry->domain == NULL) {
106 ret_val = -ENOMEM;
107 goto add_failure;
108 }
109 nla_strlcpy(entry->domain, msg_ptr, tmp_size);
110 entry->domain[tmp_size - 1] = '\0';
111 msg_ptr = nla_next(msg_ptr, &msg_len);
112
113 if (msg_len < NETLBL_LEN_U32) {
114 ret_val = -EINVAL;
115 goto add_failure;
116 }
117 tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
118 entry->type = tmp_val;
119 switch (tmp_val) {
120 case NETLBL_NLTYPE_UNLABELED:
121 ret_val = netlbl_domhsh_add(entry);
122 break;
123 case NETLBL_NLTYPE_CIPSOV4:
124 if (msg_len < NETLBL_LEN_U32) {
125 ret_val = -EINVAL;
126 goto add_failure;
127 }
128 tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
129 /* We should be holding a rcu_read_lock() here
130 * while we hold the result but since the entry
131 * will always be deleted when the CIPSO DOI
132 * is deleted we aren't going to keep the lock. */
133 rcu_read_lock();
134 entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
135 if (entry->type_def.cipsov4 == NULL) {
136 rcu_read_unlock();
137 ret_val = -EINVAL;
138 goto add_failure;
139 }
140 ret_val = netlbl_domhsh_add(entry);
141 rcu_read_unlock();
142 break;
143 default:
144 ret_val = -EINVAL;
145 }
146 if (ret_val != 0)
147 goto add_failure;
148 }
149
150 netlbl_netlink_send_ack(info,
151 netlbl_mgmt_gnl_family.id,
152 NLBL_MGMT_C_ACK,
153 NETLBL_E_OK);
154 return 0;
155
156add_failure:
157 if (entry)
158 kfree(entry->domain);
159 kfree(entry);
160 netlbl_netlink_send_ack(info,
161 netlbl_mgmt_gnl_family.id,
162 NLBL_MGMT_C_ACK,
163 -ret_val);
164 return ret_val;
165}
166
167/**
168 * netlbl_mgmt_remove - Handle a REMOVE message
169 * @skb: the NETLINK buffer
170 * @info: the Generic NETLINK info block
171 *
172 * Description:
173 * Process a user generated REMOVE message and remove the specified domain
174 * mappings. Returns zero on success, negative values on failure.
175 *
176 */
177static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info)
178{
179 int ret_val = -EINVAL;
180 struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb);
181 int msg_len = netlbl_netlink_payload_len(skb);
182 u32 count;
183 u32 iter;
184 int tmp_size;
185 unsigned char *domain;
186
187 ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
188 if (ret_val != 0)
189 goto remove_return;
190
191 if (msg_len < NETLBL_LEN_U32)
192 goto remove_return;
193 count = netlbl_getinc_u32(&msg_ptr, &msg_len);
194
195 for (iter = 0; iter < count && msg_len > 0; iter++) {
196 if (msg_len <= 0) {
197 ret_val = -EINVAL;
198 goto remove_return;
199 }
200 tmp_size = nla_len(msg_ptr);
201 domain = nla_data(msg_ptr);
202 if (tmp_size <= 0 || tmp_size > msg_len ||
203 domain[tmp_size - 1] != '\0') {
204 ret_val = -EINVAL;
205 goto remove_return;
206 }
207 ret_val = netlbl_domhsh_remove(domain);
208 if (ret_val != 0)
209 goto remove_return;
210 msg_ptr = nla_next(msg_ptr, &msg_len);
211 }
212
213 ret_val = 0;
214
215remove_return:
216 netlbl_netlink_send_ack(info,
217 netlbl_mgmt_gnl_family.id,
218 NLBL_MGMT_C_ACK,
219 -ret_val);
220 return ret_val;
221}
222
223/**
224 * netlbl_mgmt_list - Handle a LIST message
225 * @skb: the NETLINK buffer
226 * @info: the Generic NETLINK info block
227 *
228 * Description:
229 * Process a user generated LIST message and dumps the domain hash table in a
230 * form suitable for use in a kernel generated LIST message. Returns zero on
231 * success, negative values on failure.
232 *
233 */
234static int netlbl_mgmt_list(struct sk_buff *skb, struct genl_info *info)
235{
236 int ret_val = -ENOMEM;
237 struct sk_buff *ans_skb;
238
239 ans_skb = netlbl_domhsh_dump(NLMSG_SPACE(GENL_HDRLEN));
240 if (ans_skb == NULL)
241 goto list_failure;
242 netlbl_netlink_hdr_push(ans_skb,
243 info->snd_pid,
244 0,
245 netlbl_mgmt_gnl_family.id,
246 NLBL_MGMT_C_LIST);
247
248 ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
249 if (ret_val != 0)
250 goto list_failure;
251
252 return 0;
253
254list_failure:
255 netlbl_netlink_send_ack(info,
256 netlbl_mgmt_gnl_family.id,
257 NLBL_MGMT_C_ACK,
258 -ret_val);
259 return ret_val;
260}
261
262/**
263 * netlbl_mgmt_adddef - Handle an ADDDEF message
264 * @skb: the NETLINK buffer
265 * @info: the Generic NETLINK info block
266 *
267 * Description:
268 * Process a user generated ADDDEF message and respond accordingly. Returns
269 * zero on success, negative values on failure.
270 *
271 */
272static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info)
273{
274 int ret_val = -EINVAL;
275 struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb);
276 int msg_len = netlbl_netlink_payload_len(skb);
277 struct netlbl_dom_map *entry = NULL;
278 u32 tmp_val;
279
280 ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
281 if (ret_val != 0)
282 goto adddef_failure;
283
284 if (msg_len < NETLBL_LEN_U32)
285 goto adddef_failure;
286 tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
287
288 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
289 if (entry == NULL) {
290 ret_val = -ENOMEM;
291 goto adddef_failure;
292 }
293
294 entry->type = tmp_val;
295 switch (entry->type) {
296 case NETLBL_NLTYPE_UNLABELED:
297 ret_val = netlbl_domhsh_add_default(entry);
298 break;
299 case NETLBL_NLTYPE_CIPSOV4:
300 if (msg_len < NETLBL_LEN_U32) {
301 ret_val = -EINVAL;
302 goto adddef_failure;
303 }
304 tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
305 /* We should be holding a rcu_read_lock here while we
306 * hold the result but since the entry will always be
307 * deleted when the CIPSO DOI is deleted we are going
308 * to skip the lock. */
309 rcu_read_lock();
310 entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
311 if (entry->type_def.cipsov4 == NULL) {
312 rcu_read_unlock();
313 ret_val = -EINVAL;
314 goto adddef_failure;
315 }
316 ret_val = netlbl_domhsh_add_default(entry);
317 rcu_read_unlock();
318 break;
319 default:
320 ret_val = -EINVAL;
321 }
322 if (ret_val != 0)
323 goto adddef_failure;
324
325 netlbl_netlink_send_ack(info,
326 netlbl_mgmt_gnl_family.id,
327 NLBL_MGMT_C_ACK,
328 NETLBL_E_OK);
329 return 0;
330
331adddef_failure:
332 kfree(entry);
333 netlbl_netlink_send_ack(info,
334 netlbl_mgmt_gnl_family.id,
335 NLBL_MGMT_C_ACK,
336 -ret_val);
337 return ret_val;
338}
339
340/**
341 * netlbl_mgmt_removedef - Handle a REMOVEDEF message
342 * @skb: the NETLINK buffer
343 * @info: the Generic NETLINK info block
344 *
345 * Description:
346 * Process a user generated REMOVEDEF message and remove the default domain
347 * mapping. Returns zero on success, negative values on failure.
348 *
349 */
350static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info)
351{
352 int ret_val;
353
354 ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
355 if (ret_val != 0)
356 goto removedef_return;
357
358 ret_val = netlbl_domhsh_remove_default();
359
360removedef_return:
361 netlbl_netlink_send_ack(info,
362 netlbl_mgmt_gnl_family.id,
363 NLBL_MGMT_C_ACK,
364 -ret_val);
365 return ret_val;
366}
367
368/**
369 * netlbl_mgmt_listdef - Handle a LISTDEF message
370 * @skb: the NETLINK buffer
371 * @info: the Generic NETLINK info block
372 *
373 * Description:
374 * Process a user generated LISTDEF message and dumps the default domain
375 * mapping in a form suitable for use in a kernel generated LISTDEF message.
376 * Returns zero on success, negative values on failure.
377 *
378 */
379static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info)
380{
381 int ret_val = -ENOMEM;
382 struct sk_buff *ans_skb;
383
384 ans_skb = netlbl_domhsh_dump_default(NLMSG_SPACE(GENL_HDRLEN));
385 if (ans_skb == NULL)
386 goto listdef_failure;
387 netlbl_netlink_hdr_push(ans_skb,
388 info->snd_pid,
389 0,
390 netlbl_mgmt_gnl_family.id,
391 NLBL_MGMT_C_LISTDEF);
392
393 ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
394 if (ret_val != 0)
395 goto listdef_failure;
396
397 return 0;
398
399listdef_failure:
400 netlbl_netlink_send_ack(info,
401 netlbl_mgmt_gnl_family.id,
402 NLBL_MGMT_C_ACK,
403 -ret_val);
404 return ret_val;
405}
406
407/**
408 * netlbl_mgmt_modules - Handle a MODULES message
409 * @skb: the NETLINK buffer
410 * @info: the Generic NETLINK info block
411 *
412 * Description:
413 * Process a user generated MODULES message and respond accordingly.
414 *
415 */
416static int netlbl_mgmt_modules(struct sk_buff *skb, struct genl_info *info)
417{
418 int ret_val = -ENOMEM;
419 size_t data_size;
420 u32 mod_count;
421 struct sk_buff *ans_skb = NULL;
422
423 /* unlabeled + cipsov4 */
424 mod_count = 2;
425
426 data_size = GENL_HDRLEN + NETLBL_LEN_U32 + mod_count * NETLBL_LEN_U32;
427 ans_skb = netlbl_netlink_alloc_skb(0, data_size, GFP_KERNEL);
428 if (ans_skb == NULL)
429 goto modules_failure;
430
431 if (netlbl_netlink_hdr_put(ans_skb,
432 info->snd_pid,
433 0,
434 netlbl_mgmt_gnl_family.id,
435 NLBL_MGMT_C_MODULES) == NULL)
436 goto modules_failure;
437
438 ret_val = nla_put_u32(ans_skb, NLA_U32, mod_count);
439 if (ret_val != 0)
440 goto modules_failure;
441 ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_NLTYPE_UNLABELED);
442 if (ret_val != 0)
443 goto modules_failure;
444 ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_NLTYPE_CIPSOV4);
445 if (ret_val != 0)
446 goto modules_failure;
447
448 ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
449 if (ret_val != 0)
450 goto modules_failure;
451
452 return 0;
453
454modules_failure:
455 kfree_skb(ans_skb);
456 netlbl_netlink_send_ack(info,
457 netlbl_mgmt_gnl_family.id,
458 NLBL_MGMT_C_ACK,
459 -ret_val);
460 return ret_val;
461}
462
463/**
464 * netlbl_mgmt_version - Handle a VERSION message
465 * @skb: the NETLINK buffer
466 * @info: the Generic NETLINK info block
467 *
468 * Description:
469 * Process a user generated VERSION message and respond accordingly. Returns
470 * zero on success, negative values on failure.
471 *
472 */
473static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info)
474{
475 int ret_val = -ENOMEM;
476 struct sk_buff *ans_skb = NULL;
477
478 ans_skb = netlbl_netlink_alloc_skb(0,
479 GENL_HDRLEN + NETLBL_LEN_U32,
480 GFP_KERNEL);
481 if (ans_skb == NULL)
482 goto version_failure;
483 if (netlbl_netlink_hdr_put(ans_skb,
484 info->snd_pid,
485 0,
486 netlbl_mgmt_gnl_family.id,
487 NLBL_MGMT_C_VERSION) == NULL)
488 goto version_failure;
489
490 ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_PROTO_VERSION);
491 if (ret_val != 0)
492 goto version_failure;
493
494 ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
495 if (ret_val != 0)
496 goto version_failure;
497
498 return 0;
499
500version_failure:
501 kfree_skb(ans_skb);
502 netlbl_netlink_send_ack(info,
503 netlbl_mgmt_gnl_family.id,
504 NLBL_MGMT_C_ACK,
505 -ret_val);
506 return ret_val;
507}
508
509
510/*
511 * NetLabel Generic NETLINK Command Definitions
512 */
513
514static struct genl_ops netlbl_mgmt_genl_c_add = {
515 .cmd = NLBL_MGMT_C_ADD,
516 .flags = 0,
517 .doit = netlbl_mgmt_add,
518 .dumpit = NULL,
519};
520
521static struct genl_ops netlbl_mgmt_genl_c_remove = {
522 .cmd = NLBL_MGMT_C_REMOVE,
523 .flags = 0,
524 .doit = netlbl_mgmt_remove,
525 .dumpit = NULL,
526};
527
528static struct genl_ops netlbl_mgmt_genl_c_list = {
529 .cmd = NLBL_MGMT_C_LIST,
530 .flags = 0,
531 .doit = netlbl_mgmt_list,
532 .dumpit = NULL,
533};
534
535static struct genl_ops netlbl_mgmt_genl_c_adddef = {
536 .cmd = NLBL_MGMT_C_ADDDEF,
537 .flags = 0,
538 .doit = netlbl_mgmt_adddef,
539 .dumpit = NULL,
540};
541
542static struct genl_ops netlbl_mgmt_genl_c_removedef = {
543 .cmd = NLBL_MGMT_C_REMOVEDEF,
544 .flags = 0,
545 .doit = netlbl_mgmt_removedef,
546 .dumpit = NULL,
547};
548
549static struct genl_ops netlbl_mgmt_genl_c_listdef = {
550 .cmd = NLBL_MGMT_C_LISTDEF,
551 .flags = 0,
552 .doit = netlbl_mgmt_listdef,
553 .dumpit = NULL,
554};
555
556static struct genl_ops netlbl_mgmt_genl_c_modules = {
557 .cmd = NLBL_MGMT_C_MODULES,
558 .flags = 0,
559 .doit = netlbl_mgmt_modules,
560 .dumpit = NULL,
561};
562
563static struct genl_ops netlbl_mgmt_genl_c_version = {
564 .cmd = NLBL_MGMT_C_VERSION,
565 .flags = 0,
566 .doit = netlbl_mgmt_version,
567 .dumpit = NULL,
568};
569
570/*
571 * NetLabel Generic NETLINK Protocol Functions
572 */
573
574/**
575 * netlbl_mgmt_genl_init - Register the NetLabel management component
576 *
577 * Description:
578 * Register the NetLabel management component with the Generic NETLINK
579 * mechanism. Returns zero on success, negative values on failure.
580 *
581 */
582int netlbl_mgmt_genl_init(void)
583{
584 int ret_val;
585
586 ret_val = genl_register_family(&netlbl_mgmt_gnl_family);
587 if (ret_val != 0)
588 return ret_val;
589
590 ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
591 &netlbl_mgmt_genl_c_add);
592 if (ret_val != 0)
593 return ret_val;
594 ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
595 &netlbl_mgmt_genl_c_remove);
596 if (ret_val != 0)
597 return ret_val;
598 ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
599 &netlbl_mgmt_genl_c_list);
600 if (ret_val != 0)
601 return ret_val;
602 ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
603 &netlbl_mgmt_genl_c_adddef);
604 if (ret_val != 0)
605 return ret_val;
606 ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
607 &netlbl_mgmt_genl_c_removedef);
608 if (ret_val != 0)
609 return ret_val;
610 ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
611 &netlbl_mgmt_genl_c_listdef);
612 if (ret_val != 0)
613 return ret_val;
614 ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
615 &netlbl_mgmt_genl_c_modules);
616 if (ret_val != 0)
617 return ret_val;
618 ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
619 &netlbl_mgmt_genl_c_version);
620 if (ret_val != 0)
621 return ret_val;
622
623 return 0;
624}
diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h
new file mode 100644
index 000000000000..fd6c6acbfa08
--- /dev/null
+++ b/net/netlabel/netlabel_mgmt.h
@@ -0,0 +1,246 @@
1/*
2 * NetLabel Management Support
3 *
4 * This file defines the management functions for the NetLabel system. The
5 * NetLabel system manages static and dynamic label mappings for network
6 * protocols such as CIPSO and RIPSO.
7 *
8 * Author: Paul Moore <paul.moore@hp.com>
9 *
10 */
11
12/*
13 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
14 *
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2 of the License, or
18 * (at your option) any later version.
19 *
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
23 * the GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 *
29 */
30
31#ifndef _NETLABEL_MGMT_H
32#define _NETLABEL_MGMT_H
33
34#include <net/netlabel.h>
35
36/*
37 * The following NetLabel payloads are supported by the management interface,
38 * all of which are preceeded by the nlmsghdr struct.
39 *
40 * o ACK:
41 * Sent by the kernel in response to an applications message, applications
42 * should never send this message.
43 *
44 * +----------------------+-----------------------+
45 * | seq number (32 bits) | return code (32 bits) |
46 * +----------------------+-----------------------+
47 *
48 * seq number: the sequence number of the original message, taken from the
49 * nlmsghdr structure
50 * return code: return value, based on errno values
51 *
52 * o ADD:
53 * Sent by an application to add a domain mapping to the NetLabel system.
54 * The kernel should respond with an ACK.
55 *
56 * +-------------------+
57 * | domains (32 bits) | ...
58 * +-------------------+
59 *
60 * domains: the number of domains in the message
61 *
62 * +--------------------------+-------------------------+
63 * | domain string (variable) | protocol type (32 bits) | ...
64 * +--------------------------+-------------------------+
65 *
66 * +-------------- ---- --- -- -
67 * | mapping data ... repeated
68 * +-------------- ---- --- -- -
69 *
70 * domain string: the domain string, NULL terminated
71 * protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
72 * mapping data: specific to the map type (see below)
73 *
74 * NETLBL_NLTYPE_UNLABELED
75 *
76 * No mapping data for this protocol type.
77 *
78 * NETLBL_NLTYPE_CIPSOV4
79 *
80 * +---------------+
81 * | doi (32 bits) |
82 * +---------------+
83 *
84 * doi: the CIPSO DOI value
85 *
86 * o REMOVE:
87 * Sent by an application to remove a domain mapping from the NetLabel
88 * system. The kernel should ACK this message.
89 *
90 * +-------------------+
91 * | domains (32 bits) | ...
92 * +-------------------+
93 *
94 * domains: the number of domains in the message
95 *
96 * +--------------------------+
97 * | domain string (variable) | ...
98 * +--------------------------+
99 *
100 * domain string: the domain string, NULL terminated
101 *
102 * o LIST:
103 * This message can be sent either from an application or by the kernel in
104 * response to an application generated LIST message. When sent by an
105 * application there is no payload. The kernel should respond to a LIST
106 * message either with a LIST message on success or an ACK message on
107 * failure.
108 *
109 * +-------------------+
110 * | domains (32 bits) | ...
111 * +-------------------+
112 *
113 * domains: the number of domains in the message
114 *
115 * +--------------------------+
116 * | domain string (variable) | ...
117 * +--------------------------+
118 *
119 * +-------------------------+-------------- ---- --- -- -
120 * | protocol type (32 bits) | mapping data ... repeated
121 * +-------------------------+-------------- ---- --- -- -
122 *
123 * domain string: the domain string, NULL terminated
124 * protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
125 * mapping data: specific to the map type (see below)
126 *
127 * NETLBL_NLTYPE_UNLABELED
128 *
129 * No mapping data for this protocol type.
130 *
131 * NETLBL_NLTYPE_CIPSOV4
132 *
133 * +----------------+---------------+
134 * | type (32 bits) | doi (32 bits) |
135 * +----------------+---------------+
136 *
137 * type: the CIPSO mapping table type (defined in the cipso_ipv4.h header
138 * as CIPSO_V4_MAP_*)
139 * doi: the CIPSO DOI value
140 *
141 * o ADDDEF:
142 * Sent by an application to set the default domain mapping for the NetLabel
143 * system. The kernel should respond with an ACK.
144 *
145 * +-------------------------+-------------- ---- --- -- -
146 * | protocol type (32 bits) | mapping data ... repeated
147 * +-------------------------+-------------- ---- --- -- -
148 *
149 * protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
150 * mapping data: specific to the map type (see below)
151 *
152 * NETLBL_NLTYPE_UNLABELED
153 *
154 * No mapping data for this protocol type.
155 *
156 * NETLBL_NLTYPE_CIPSOV4
157 *
158 * +---------------+
159 * | doi (32 bits) |
160 * +---------------+
161 *
162 * doi: the CIPSO DOI value
163 *
164 * o REMOVEDEF:
165 * Sent by an application to remove the default domain mapping from the
166 * NetLabel system, there is no payload. The kernel should ACK this message.
167 *
168 * o LISTDEF:
169 * This message can be sent either from an application or by the kernel in
170 * response to an application generated LISTDEF message. When sent by an
171 * application there is no payload. The kernel should respond to a
172 * LISTDEF message either with a LISTDEF message on success or an ACK message
173 * on failure.
174 *
175 * +-------------------------+-------------- ---- --- -- -
176 * | protocol type (32 bits) | mapping data ... repeated
177 * +-------------------------+-------------- ---- --- -- -
178 *
179 * protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
180 * mapping data: specific to the map type (see below)
181 *
182 * NETLBL_NLTYPE_UNLABELED
183 *
184 * No mapping data for this protocol type.
185 *
186 * NETLBL_NLTYPE_CIPSOV4
187 *
188 * +----------------+---------------+
189 * | type (32 bits) | doi (32 bits) |
190 * +----------------+---------------+
191 *
192 * type: the CIPSO mapping table type (defined in the cipso_ipv4.h header
193 * as CIPSO_V4_MAP_*)
194 * doi: the CIPSO DOI value
195 *
196 * o MODULES:
197 * Sent by an application to request a list of configured NetLabel modules
198 * in the kernel. When sent by an application there is no payload.
199 *
200 * +-------------------+
201 * | modules (32 bits) | ...
202 * +-------------------+
203 *
204 * modules: the number of modules in the message, if this is an application
205 * generated message and the value is zero then return a list of
206 * the configured modules
207 *
208 * +------------------+
209 * | module (32 bits) | ... repeated
210 * +------------------+
211 *
212 * module: the module number as defined by NETLBL_NLTYPE_*
213 *
214 * o VERSION:
215 * Sent by an application to request the NetLabel version string. When sent
216 * by an application there is no payload. This message type is also used by
217 * the kernel to respond to an VERSION request.
218 *
219 * +-------------------+
220 * | version (32 bits) |
221 * +-------------------+
222 *
223 * version: the protocol version number
224 *
225 */
226
227/* NetLabel Management commands */
228enum {
229 NLBL_MGMT_C_UNSPEC,
230 NLBL_MGMT_C_ACK,
231 NLBL_MGMT_C_ADD,
232 NLBL_MGMT_C_REMOVE,
233 NLBL_MGMT_C_LIST,
234 NLBL_MGMT_C_ADDDEF,
235 NLBL_MGMT_C_REMOVEDEF,
236 NLBL_MGMT_C_LISTDEF,
237 NLBL_MGMT_C_MODULES,
238 NLBL_MGMT_C_VERSION,
239 __NLBL_MGMT_C_MAX,
240};
241#define NLBL_MGMT_C_MAX (__NLBL_MGMT_C_MAX - 1)
242
243/* NetLabel protocol functions */
244int netlbl_mgmt_genl_init(void);
245
246#endif
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
new file mode 100644
index 000000000000..785f4960e0d3
--- /dev/null
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -0,0 +1,253 @@
1/*
2 * NetLabel Unlabeled Support
3 *
4 * This file defines functions for dealing with unlabeled packets for the
5 * NetLabel system. The NetLabel system manages static and dynamic label
6 * mappings for network protocols such as CIPSO and RIPSO.
7 *
8 * Author: Paul Moore <paul.moore@hp.com>
9 *
10 */
11
12/*
13 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
14 *
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2 of the License, or
18 * (at your option) any later version.
19 *
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
23 * the GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 *
29 */
30
31#include <linux/types.h>
32#include <linux/rcupdate.h>
33#include <linux/list.h>
34#include <linux/spinlock.h>
35#include <linux/socket.h>
36#include <linux/string.h>
37#include <linux/skbuff.h>
38#include <net/sock.h>
39#include <net/netlink.h>
40#include <net/genetlink.h>
41
42#include <net/netlabel.h>
43#include <asm/bug.h>
44
45#include "netlabel_user.h"
46#include "netlabel_domainhash.h"
47#include "netlabel_unlabeled.h"
48
49/* Accept unlabeled packets flag */
50static atomic_t netlabel_unlabel_accept_flg = ATOMIC_INIT(0);
51
52/* NetLabel Generic NETLINK CIPSOv4 family */
53static struct genl_family netlbl_unlabel_gnl_family = {
54 .id = GENL_ID_GENERATE,
55 .hdrsize = 0,
56 .name = NETLBL_NLTYPE_UNLABELED_NAME,
57 .version = NETLBL_PROTO_VERSION,
58 .maxattr = 0,
59};
60
61
62/*
63 * NetLabel Command Handlers
64 */
65
66/**
67 * netlbl_unlabel_accept - Handle an ACCEPT message
68 * @skb: the NETLINK buffer
69 * @info: the Generic NETLINK info block
70 *
71 * Description:
72 * Process a user generated ACCEPT message and set the accept flag accordingly.
73 * Returns zero on success, negative values on failure.
74 *
75 */
76static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info)
77{
78 int ret_val;
79 struct nlattr *data = netlbl_netlink_payload_data(skb);
80 u32 value;
81
82 ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
83 if (ret_val != 0)
84 return ret_val;
85
86 if (netlbl_netlink_payload_len(skb) == NETLBL_LEN_U32) {
87 value = nla_get_u32(data);
88 if (value == 1 || value == 0) {
89 atomic_set(&netlabel_unlabel_accept_flg, value);
90 netlbl_netlink_send_ack(info,
91 netlbl_unlabel_gnl_family.id,
92 NLBL_UNLABEL_C_ACK,
93 NETLBL_E_OK);
94 return 0;
95 }
96 }
97
98 netlbl_netlink_send_ack(info,
99 netlbl_unlabel_gnl_family.id,
100 NLBL_UNLABEL_C_ACK,
101 EINVAL);
102 return -EINVAL;
103}
104
105/**
106 * netlbl_unlabel_list - Handle a LIST message
107 * @skb: the NETLINK buffer
108 * @info: the Generic NETLINK info block
109 *
110 * Description:
111 * Process a user generated LIST message and respond with the current status.
112 * Returns zero on success, negative values on failure.
113 *
114 */
115static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info)
116{
117 int ret_val = -ENOMEM;
118 struct sk_buff *ans_skb;
119
120 ans_skb = netlbl_netlink_alloc_skb(0,
121 GENL_HDRLEN + NETLBL_LEN_U32,
122 GFP_KERNEL);
123 if (ans_skb == NULL)
124 goto list_failure;
125
126 if (netlbl_netlink_hdr_put(ans_skb,
127 info->snd_pid,
128 0,
129 netlbl_unlabel_gnl_family.id,
130 NLBL_UNLABEL_C_LIST) == NULL)
131 goto list_failure;
132
133 ret_val = nla_put_u32(ans_skb,
134 NLA_U32,
135 atomic_read(&netlabel_unlabel_accept_flg));
136 if (ret_val != 0)
137 goto list_failure;
138
139 ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
140 if (ret_val != 0)
141 goto list_failure;
142
143 return 0;
144
145list_failure:
146 netlbl_netlink_send_ack(info,
147 netlbl_unlabel_gnl_family.id,
148 NLBL_UNLABEL_C_ACK,
149 -ret_val);
150 return ret_val;
151}
152
153
154/*
155 * NetLabel Generic NETLINK Command Definitions
156 */
157
158static struct genl_ops netlbl_unlabel_genl_c_accept = {
159 .cmd = NLBL_UNLABEL_C_ACCEPT,
160 .flags = 0,
161 .doit = netlbl_unlabel_accept,
162 .dumpit = NULL,
163};
164
165static struct genl_ops netlbl_unlabel_genl_c_list = {
166 .cmd = NLBL_UNLABEL_C_LIST,
167 .flags = 0,
168 .doit = netlbl_unlabel_list,
169 .dumpit = NULL,
170};
171
172
173/*
174 * NetLabel Generic NETLINK Protocol Functions
175 */
176
177/**
178 * netlbl_unlabel_genl_init - Register the Unlabeled NetLabel component
179 *
180 * Description:
181 * Register the unlabeled packet NetLabel component with the Generic NETLINK
182 * mechanism. Returns zero on success, negative values on failure.
183 *
184 */
185int netlbl_unlabel_genl_init(void)
186{
187 int ret_val;
188
189 ret_val = genl_register_family(&netlbl_unlabel_gnl_family);
190 if (ret_val != 0)
191 return ret_val;
192
193 ret_val = genl_register_ops(&netlbl_unlabel_gnl_family,
194 &netlbl_unlabel_genl_c_accept);
195 if (ret_val != 0)
196 return ret_val;
197
198 ret_val = genl_register_ops(&netlbl_unlabel_gnl_family,
199 &netlbl_unlabel_genl_c_list);
200 if (ret_val != 0)
201 return ret_val;
202
203 return 0;
204}
205
206/*
207 * NetLabel KAPI Hooks
208 */
209
210/**
211 * netlbl_unlabel_getattr - Get the security attributes for an unlabled packet
212 * @secattr: the security attributes
213 *
214 * Description:
215 * Determine the security attributes, if any, for an unlabled packet and return
216 * them in @secattr. Returns zero on success and negative values on failure.
217 *
218 */
219int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr)
220{
221 if (atomic_read(&netlabel_unlabel_accept_flg) == 1) {
222 memset(secattr, 0, sizeof(*secattr));
223 return 0;
224 }
225
226 return -ENOMSG;
227}
228
229/**
230 * netlbl_unlabel_defconf - Set the default config to allow unlabeled packets
231 *
232 * Description:
233 * Set the default NetLabel configuration to allow incoming unlabeled packets
234 * and to send unlabeled network traffic by default.
235 *
236 */
237int netlbl_unlabel_defconf(void)
238{
239 int ret_val;
240 struct netlbl_dom_map *entry;
241
242 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
243 if (entry == NULL)
244 return -ENOMEM;
245 entry->type = NETLBL_NLTYPE_UNLABELED;
246 ret_val = netlbl_domhsh_add_default(entry);
247 if (ret_val != 0)
248 return ret_val;
249
250 atomic_set(&netlabel_unlabel_accept_flg, 1);
251
252 return 0;
253}
diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h
new file mode 100644
index 000000000000..f300e54e14b6
--- /dev/null
+++ b/net/netlabel/netlabel_unlabeled.h
@@ -0,0 +1,98 @@
1/*
2 * NetLabel Unlabeled Support
3 *
4 * This file defines functions for dealing with unlabeled packets for the
5 * NetLabel system. The NetLabel system manages static and dynamic label
6 * mappings for network protocols such as CIPSO and RIPSO.
7 *
8 * Author: Paul Moore <paul.moore@hp.com>
9 *
10 */
11
12/*
13 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
14 *
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2 of the License, or
18 * (at your option) any later version.
19 *
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
23 * the GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 *
29 */
30
31#ifndef _NETLABEL_UNLABELED_H
32#define _NETLABEL_UNLABELED_H
33
34#include <net/netlabel.h>
35
36/*
37 * The following NetLabel payloads are supported by the Unlabeled subsystem.
38 *
39 * o ACK:
40 * Sent by the kernel in response to an applications message, applications
41 * should never send this message.
42 *
43 * +----------------------+-----------------------+
44 * | seq number (32 bits) | return code (32 bits) |
45 * +----------------------+-----------------------+
46 *
47 * seq number: the sequence number of the original message, taken from the
48 * nlmsghdr structure
49 * return code: return value, based on errno values
50 *
51 * o ACCEPT
52 * This message is sent from an application to specify if the kernel should
53 * allow unlabled packets to pass if they do not match any of the static
54 * mappings defined in the unlabeled module.
55 *
56 * +-----------------+
57 * | allow (32 bits) |
58 * +-----------------+
59 *
60 * allow: if true (1) then allow the packets to pass, if false (0) then
61 * reject the packets
62 *
63 * o LIST
64 * This message can be sent either from an application or by the kernel in
65 * response to an application generated LIST message. When sent by an
66 * application there is no payload. The kernel should respond to a LIST
67 * message either with a LIST message on success or an ACK message on
68 * failure.
69 *
70 * +-----------------------+
71 * | accept flag (32 bits) |
72 * +-----------------------+
73 *
74 * accept flag: if true (1) then unlabeled packets are allowed to pass,
75 * if false (0) then unlabeled packets are rejected
76 *
77 */
78
79/* NetLabel Unlabeled commands */
80enum {
81 NLBL_UNLABEL_C_UNSPEC,
82 NLBL_UNLABEL_C_ACK,
83 NLBL_UNLABEL_C_ACCEPT,
84 NLBL_UNLABEL_C_LIST,
85 __NLBL_UNLABEL_C_MAX,
86};
87#define NLBL_UNLABEL_C_MAX (__NLBL_UNLABEL_C_MAX - 1)
88
89/* NetLabel protocol functions */
90int netlbl_unlabel_genl_init(void);
91
92/* Process Unlabeled incoming network packets */
93int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr);
94
95/* Set the default configuration to allow Unlabeled packets */
96int netlbl_unlabel_defconf(void);
97
98#endif
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
new file mode 100644
index 000000000000..73cbe66e42ff
--- /dev/null
+++ b/net/netlabel/netlabel_user.c
@@ -0,0 +1,158 @@
1/*
2 * NetLabel NETLINK Interface
3 *
4 * This file defines the NETLINK interface for the NetLabel system. The
5 * NetLabel system manages static and dynamic label mappings for network
6 * protocols such as CIPSO and RIPSO.
7 *
8 * Author: Paul Moore <paul.moore@hp.com>
9 *
10 */
11
12/*
13 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
14 *
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2 of the License, or
18 * (at your option) any later version.
19 *
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
23 * the GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 *
29 */
30
31#include <linux/init.h>
32#include <linux/types.h>
33#include <linux/list.h>
34#include <linux/socket.h>
35#include <net/sock.h>
36#include <net/netlink.h>
37#include <net/genetlink.h>
38#include <net/netlabel.h>
39#include <asm/bug.h>
40
41#include "netlabel_mgmt.h"
42#include "netlabel_unlabeled.h"
43#include "netlabel_cipso_v4.h"
44#include "netlabel_user.h"
45
46/*
47 * NetLabel NETLINK Setup Functions
48 */
49
50/**
51 * netlbl_netlink_init - Initialize the NETLINK communication channel
52 *
53 * Description:
54 * Call out to the NetLabel components so they can register their families and
55 * commands with the Generic NETLINK mechanism. Returns zero on success and
56 * non-zero on failure.
57 *
58 */
59int netlbl_netlink_init(void)
60{
61 int ret_val;
62
63 ret_val = netlbl_mgmt_genl_init();
64 if (ret_val != 0)
65 return ret_val;
66
67 ret_val = netlbl_cipsov4_genl_init();
68 if (ret_val != 0)
69 return ret_val;
70
71 ret_val = netlbl_unlabel_genl_init();
72 if (ret_val != 0)
73 return ret_val;
74
75 return 0;
76}
77
78/*
79 * NetLabel Common Protocol Functions
80 */
81
82/**
83 * netlbl_netlink_send_ack - Send an ACK message
84 * @info: the generic NETLINK information
85 * @genl_family: the generic NETLINK family ID value
86 * @ack_cmd: the generic NETLINK family ACK command value
87 * @ret_code: return code to use
88 *
89 * Description:
90 * This function sends an ACK message to the sender of the NETLINK message
91 * specified by @info.
92 *
93 */
94void netlbl_netlink_send_ack(const struct genl_info *info,
95 u32 genl_family,
96 u8 ack_cmd,
97 u32 ret_code)
98{
99 size_t data_size;
100 struct sk_buff *skb;
101
102 data_size = GENL_HDRLEN + 2 * NETLBL_LEN_U32;
103 skb = netlbl_netlink_alloc_skb(0, data_size, GFP_KERNEL);
104 if (skb == NULL)
105 return;
106
107 if (netlbl_netlink_hdr_put(skb,
108 info->snd_pid,
109 0,
110 genl_family,
111 ack_cmd) == NULL)
112 goto send_ack_failure;
113
114 if (nla_put_u32(skb, NLA_U32, info->snd_seq) != 0)
115 goto send_ack_failure;
116 if (nla_put_u32(skb, NLA_U32, ret_code) != 0)
117 goto send_ack_failure;
118
119 netlbl_netlink_snd(skb, info->snd_pid);
120 return;
121
122send_ack_failure:
123 kfree_skb(skb);
124}
125
126/*
127 * NETLINK I/O Functions
128 */
129
130/**
131 * netlbl_netlink_snd - Send a NetLabel message
132 * @skb: NetLabel message
133 * @pid: destination PID
134 *
135 * Description:
136 * Sends a unicast NetLabel message over the NETLINK socket.
137 *
138 */
139int netlbl_netlink_snd(struct sk_buff *skb, u32 pid)
140{
141 return genlmsg_unicast(skb, pid);
142}
143
144/**
145 * netlbl_netlink_snd - Send a NetLabel message
146 * @skb: NetLabel message
147 * @pid: sending PID
148 * @group: multicast group id
149 *
150 * Description:
151 * Sends a multicast NetLabel message over the NETLINK socket to all members
152 * of @group except @pid.
153 *
154 */
155int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group)
156{
157 return genlmsg_multicast(skb, pid, group, GFP_KERNEL);
158}
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
new file mode 100644
index 000000000000..385a6c7488c6
--- /dev/null
+++ b/net/netlabel/netlabel_user.h
@@ -0,0 +1,215 @@
1/*
2 * NetLabel NETLINK Interface
3 *
4 * This file defines the NETLINK interface for the NetLabel system. The
5 * NetLabel system manages static and dynamic label mappings for network
6 * protocols such as CIPSO and RIPSO.
7 *
8 * Author: Paul Moore <paul.moore@hp.com>
9 *
10 */
11
12/*
13 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
14 *
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2 of the License, or
18 * (at your option) any later version.
19 *
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
23 * the GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 *
29 */
30
31#ifndef _NETLABEL_USER_H
32#define _NETLABEL_USER_H
33
34#include <linux/types.h>
35#include <linux/skbuff.h>
36#include <linux/capability.h>
37#include <net/netlink.h>
38#include <net/genetlink.h>
39#include <net/netlabel.h>
40
41/* NetLabel NETLINK helper functions */
42
43/**
44 * netlbl_netlink_cap_check - Check the NETLINK msg capabilities
45 * @skb: the NETLINK buffer
46 * @req_cap: the required capability
47 *
48 * Description:
49 * Check the NETLINK buffer's capabilities against the required capabilities.
50 * Returns zero on success, negative values on failure.
51 *
52 */
53static inline int netlbl_netlink_cap_check(const struct sk_buff *skb,
54 kernel_cap_t req_cap)
55{
56 if (cap_raised(NETLINK_CB(skb).eff_cap, req_cap))
57 return 0;
58 return -EPERM;
59}
60
61/**
62 * netlbl_getinc_u8 - Read a u8 value from a nlattr stream and move on
63 * @nla: the attribute
64 * @rem_len: remaining length
65 *
66 * Description:
67 * Return a u8 value pointed to by @nla and advance it to the next attribute.
68 *
69 */
70static inline u8 netlbl_getinc_u8(struct nlattr **nla, int *rem_len)
71{
72 u8 val = nla_get_u8(*nla);
73 *nla = nla_next(*nla, rem_len);
74 return val;
75}
76
77/**
78 * netlbl_getinc_u16 - Read a u16 value from a nlattr stream and move on
79 * @nla: the attribute
80 * @rem_len: remaining length
81 *
82 * Description:
83 * Return a u16 value pointed to by @nla and advance it to the next attribute.
84 *
85 */
86static inline u16 netlbl_getinc_u16(struct nlattr **nla, int *rem_len)
87{
88 u16 val = nla_get_u16(*nla);
89 *nla = nla_next(*nla, rem_len);
90 return val;
91}
92
93/**
94 * netlbl_getinc_u32 - Read a u32 value from a nlattr stream and move on
95 * @nla: the attribute
96 * @rem_len: remaining length
97 *
98 * Description:
99 * Return a u32 value pointed to by @nla and advance it to the next attribute.
100 *
101 */
102static inline u32 netlbl_getinc_u32(struct nlattr **nla, int *rem_len)
103{
104 u32 val = nla_get_u32(*nla);
105 *nla = nla_next(*nla, rem_len);
106 return val;
107}
108
109/**
110 * netlbl_netlink_hdr_put - Write the NETLINK buffers into a sk_buff
111 * @skb: the packet
112 * @pid: the PID of the receipient
113 * @seq: the sequence number
114 * @type: the generic NETLINK message family type
115 * @cmd: command
116 *
117 * Description:
118 * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr
119 * struct to the packet. Returns a pointer to the start of the payload buffer
120 * on success or NULL on failure.
121 *
122 */
123static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb,
124 u32 pid,
125 u32 seq,
126 int type,
127 u8 cmd)
128{
129 return genlmsg_put(skb,
130 pid,
131 seq,
132 type,
133 0,
134 0,
135 cmd,
136 NETLBL_PROTO_VERSION);
137}
138
139/**
140 * netlbl_netlink_hdr_push - Write the NETLINK buffers into a sk_buff
141 * @skb: the packet
142 * @pid: the PID of the receipient
143 * @seq: the sequence number
144 * @type: the generic NETLINK message family type
145 * @cmd: command
146 *
147 * Description:
148 * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr
149 * struct to the packet.
150 *
151 */
152static inline void netlbl_netlink_hdr_push(struct sk_buff *skb,
153 u32 pid,
154 u32 seq,
155 int type,
156 u8 cmd)
157
158{
159 struct nlmsghdr *nlh;
160 struct genlmsghdr *hdr;
161
162 nlh = (struct nlmsghdr *)skb_push(skb, NLMSG_SPACE(GENL_HDRLEN));
163 nlh->nlmsg_type = type;
164 nlh->nlmsg_len = skb->len;
165 nlh->nlmsg_flags = 0;
166 nlh->nlmsg_pid = pid;
167 nlh->nlmsg_seq = seq;
168
169 hdr = nlmsg_data(nlh);
170 hdr->cmd = cmd;
171 hdr->version = NETLBL_PROTO_VERSION;
172 hdr->reserved = 0;
173}
174
175/**
176 * netlbl_netlink_payload_len - Return the length of the payload
177 * @skb: the NETLINK buffer
178 *
179 * Description:
180 * This function returns the length of the NetLabel payload.
181 *
182 */
183static inline u32 netlbl_netlink_payload_len(const struct sk_buff *skb)
184{
185 return nlmsg_len((struct nlmsghdr *)skb->data) - GENL_HDRLEN;
186}
187
188/**
189 * netlbl_netlink_payload_data - Returns a pointer to the start of the payload
190 * @skb: the NETLINK buffer
191 *
192 * Description:
193 * This function returns a pointer to the start of the NetLabel payload.
194 *
195 */
196static inline void *netlbl_netlink_payload_data(const struct sk_buff *skb)
197{
198 return (unsigned char *)nlmsg_data((struct nlmsghdr *)skb->data) +
199 GENL_HDRLEN;
200}
201
202/* NetLabel common protocol functions */
203
204void netlbl_netlink_send_ack(const struct genl_info *info,
205 u32 genl_family,
206 u8 ack_cmd,
207 u32 ret_code);
208
209/* NetLabel NETLINK I/O functions */
210
211int netlbl_netlink_init(void);
212int netlbl_netlink_snd(struct sk_buff *skb, u32 pid);
213int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group);
214
215#endif
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 8b85036ba8e3..d56e0d21f919 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1147,7 +1147,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1147 if (len > sk->sk_sndbuf - 32) 1147 if (len > sk->sk_sndbuf - 32)
1148 goto out; 1148 goto out;
1149 err = -ENOBUFS; 1149 err = -ENOBUFS;
1150 skb = alloc_skb(len, GFP_KERNEL); 1150 skb = nlmsg_new(len, GFP_KERNEL);
1151 if (skb==NULL) 1151 if (skb==NULL)
1152 goto out; 1152 goto out;
1153 1153
@@ -1341,19 +1341,18 @@ static int netlink_dump(struct sock *sk)
1341 struct netlink_callback *cb; 1341 struct netlink_callback *cb;
1342 struct sk_buff *skb; 1342 struct sk_buff *skb;
1343 struct nlmsghdr *nlh; 1343 struct nlmsghdr *nlh;
1344 int len; 1344 int len, err = -ENOBUFS;
1345 1345
1346 skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL); 1346 skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL);
1347 if (!skb) 1347 if (!skb)
1348 return -ENOBUFS; 1348 goto errout;
1349 1349
1350 spin_lock(&nlk->cb_lock); 1350 spin_lock(&nlk->cb_lock);
1351 1351
1352 cb = nlk->cb; 1352 cb = nlk->cb;
1353 if (cb == NULL) { 1353 if (cb == NULL) {
1354 spin_unlock(&nlk->cb_lock); 1354 err = -EINVAL;
1355 kfree_skb(skb); 1355 goto errout_skb;
1356 return -EINVAL;
1357 } 1356 }
1358 1357
1359 len = cb->dump(skb, cb); 1358 len = cb->dump(skb, cb);
@@ -1365,8 +1364,12 @@ static int netlink_dump(struct sock *sk)
1365 return 0; 1364 return 0;
1366 } 1365 }
1367 1366
1368 nlh = NLMSG_NEW_ANSWER(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); 1367 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
1369 memcpy(NLMSG_DATA(nlh), &len, sizeof(len)); 1368 if (!nlh)
1369 goto errout_skb;
1370
1371 memcpy(nlmsg_data(nlh), &len, sizeof(len));
1372
1370 skb_queue_tail(&sk->sk_receive_queue, skb); 1373 skb_queue_tail(&sk->sk_receive_queue, skb);
1371 sk->sk_data_ready(sk, skb->len); 1374 sk->sk_data_ready(sk, skb->len);
1372 1375
@@ -1378,8 +1381,11 @@ static int netlink_dump(struct sock *sk)
1378 netlink_destroy_callback(cb); 1381 netlink_destroy_callback(cb);
1379 return 0; 1382 return 0;
1380 1383
1381nlmsg_failure: 1384errout_skb:
1382 return -ENOBUFS; 1385 spin_unlock(&nlk->cb_lock);
1386 kfree_skb(skb);
1387errout:
1388 return err;
1383} 1389}
1384 1390
1385int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, 1391int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
@@ -1431,11 +1437,11 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1431 int size; 1437 int size;
1432 1438
1433 if (err == 0) 1439 if (err == 0)
1434 size = NLMSG_SPACE(sizeof(struct nlmsgerr)); 1440 size = nlmsg_total_size(sizeof(*errmsg));
1435 else 1441 else
1436 size = NLMSG_SPACE(4 + NLMSG_ALIGN(nlh->nlmsg_len)); 1442 size = nlmsg_total_size(sizeof(*errmsg) + nlmsg_len(nlh));
1437 1443
1438 skb = alloc_skb(size, GFP_KERNEL); 1444 skb = nlmsg_new(size, GFP_KERNEL);
1439 if (!skb) { 1445 if (!skb) {
1440 struct sock *sk; 1446 struct sock *sk;
1441 1447
@@ -1451,16 +1457,15 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1451 1457
1452 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 1458 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
1453 NLMSG_ERROR, sizeof(struct nlmsgerr), 0); 1459 NLMSG_ERROR, sizeof(struct nlmsgerr), 0);
1454 errmsg = NLMSG_DATA(rep); 1460 errmsg = nlmsg_data(rep);
1455 errmsg->error = err; 1461 errmsg->error = err;
1456 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr)); 1462 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
1457 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); 1463 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1458} 1464}
1459 1465
1460static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, 1466static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1461 struct nlmsghdr *, int *)) 1467 struct nlmsghdr *, int *))
1462{ 1468{
1463 unsigned int total_len;
1464 struct nlmsghdr *nlh; 1469 struct nlmsghdr *nlh;
1465 int err; 1470 int err;
1466 1471
@@ -1470,8 +1475,6 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1470 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) 1475 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
1471 return 0; 1476 return 0;
1472 1477
1473 total_len = min(NLMSG_ALIGN(nlh->nlmsg_len), skb->len);
1474
1475 if (cb(skb, nlh, &err) < 0) { 1478 if (cb(skb, nlh, &err) < 0) {
1476 /* Not an error, but we have to interrupt processing 1479 /* Not an error, but we have to interrupt processing
1477 * here. Note: that in this case we do not pull 1480 * here. Note: that in this case we do not pull
@@ -1483,7 +1486,7 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1483 } else if (nlh->nlmsg_flags & NLM_F_ACK) 1486 } else if (nlh->nlmsg_flags & NLM_F_ACK)
1484 netlink_ack(skb, nlh, 0); 1487 netlink_ack(skb, nlh, 0);
1485 1488
1486 skb_pull(skb, total_len); 1489 netlink_queue_skip(nlh, skb);
1487 } 1490 }
1488 1491
1489 return 0; 1492 return 0;
@@ -1546,6 +1549,38 @@ void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb)
1546 skb_pull(skb, msglen); 1549 skb_pull(skb, msglen);
1547} 1550}
1548 1551
1552/**
1553 * nlmsg_notify - send a notification netlink message
1554 * @sk: netlink socket to use
1555 * @skb: notification message
1556 * @pid: destination netlink pid for reports or 0
1557 * @group: destination multicast group or 0
1558 * @report: 1 to report back, 0 to disable
1559 * @flags: allocation flags
1560 */
1561int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid,
1562 unsigned int group, int report, gfp_t flags)
1563{
1564 int err = 0;
1565
1566 if (group) {
1567 int exclude_pid = 0;
1568
1569 if (report) {
1570 atomic_inc(&skb->users);
1571 exclude_pid = pid;
1572 }
1573
1574 /* errors reported via destination sk->sk_err */
1575 nlmsg_multicast(sk, skb, exclude_pid, group, flags);
1576 }
1577
1578 if (report)
1579 err = nlmsg_unicast(sk, skb, pid);
1580
1581 return err;
1582}
1583
1549#ifdef CONFIG_PROC_FS 1584#ifdef CONFIG_PROC_FS
1550struct nl_seq_iter { 1585struct nl_seq_iter {
1551 int link; 1586 int link;
@@ -1727,8 +1762,6 @@ static struct net_proto_family netlink_family_ops = {
1727 .owner = THIS_MODULE, /* for consistency 8) */ 1762 .owner = THIS_MODULE, /* for consistency 8) */
1728}; 1763};
1729 1764
1730extern void netlink_skb_parms_too_large(void);
1731
1732static int __init netlink_proto_init(void) 1765static int __init netlink_proto_init(void)
1733{ 1766{
1734 struct sk_buff *dummy_skb; 1767 struct sk_buff *dummy_skb;
@@ -1740,8 +1773,7 @@ static int __init netlink_proto_init(void)
1740 if (err != 0) 1773 if (err != 0)
1741 goto out; 1774 goto out;
1742 1775
1743 if (sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb)) 1776 BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb));
1744 netlink_skb_parms_too_large();
1745 1777
1746 nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL); 1778 nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
1747 if (!nl_table) 1779 if (!nl_table)
@@ -1799,4 +1831,4 @@ EXPORT_SYMBOL(netlink_set_err);
1799EXPORT_SYMBOL(netlink_set_nonroot); 1831EXPORT_SYMBOL(netlink_set_nonroot);
1800EXPORT_SYMBOL(netlink_unicast); 1832EXPORT_SYMBOL(netlink_unicast);
1801EXPORT_SYMBOL(netlink_unregister_notifier); 1833EXPORT_SYMBOL(netlink_unregister_notifier);
1802 1834EXPORT_SYMBOL(nlmsg_notify);
diff --git a/net/netlink/attr.c b/net/netlink/attr.c
index dddbd15135a8..004139557e09 100644
--- a/net/netlink/attr.c
+++ b/net/netlink/attr.c
@@ -20,7 +20,6 @@ static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = {
20 [NLA_U16] = sizeof(u16), 20 [NLA_U16] = sizeof(u16),
21 [NLA_U32] = sizeof(u32), 21 [NLA_U32] = sizeof(u32),
22 [NLA_U64] = sizeof(u64), 22 [NLA_U64] = sizeof(u64),
23 [NLA_STRING] = 1,
24 [NLA_NESTED] = NLA_HDRLEN, 23 [NLA_NESTED] = NLA_HDRLEN,
25}; 24};
26 25
@@ -28,7 +27,7 @@ static int validate_nla(struct nlattr *nla, int maxtype,
28 struct nla_policy *policy) 27 struct nla_policy *policy)
29{ 28{
30 struct nla_policy *pt; 29 struct nla_policy *pt;
31 int minlen = 0; 30 int minlen = 0, attrlen = nla_len(nla);
32 31
33 if (nla->nla_type <= 0 || nla->nla_type > maxtype) 32 if (nla->nla_type <= 0 || nla->nla_type > maxtype)
34 return 0; 33 return 0;
@@ -37,16 +36,46 @@ static int validate_nla(struct nlattr *nla, int maxtype,
37 36
38 BUG_ON(pt->type > NLA_TYPE_MAX); 37 BUG_ON(pt->type > NLA_TYPE_MAX);
39 38
40 if (pt->minlen) 39 switch (pt->type) {
41 minlen = pt->minlen; 40 case NLA_FLAG:
42 else if (pt->type != NLA_UNSPEC) 41 if (attrlen > 0)
43 minlen = nla_attr_minlen[pt->type]; 42 return -ERANGE;
43 break;
44 44
45 if (pt->type == NLA_FLAG && nla_len(nla) > 0) 45 case NLA_NUL_STRING:
46 return -ERANGE; 46 if (pt->len)
47 minlen = min_t(int, attrlen, pt->len + 1);
48 else
49 minlen = attrlen;
47 50
48 if (nla_len(nla) < minlen) 51 if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL)
49 return -ERANGE; 52 return -EINVAL;
53 /* fall through */
54
55 case NLA_STRING:
56 if (attrlen < 1)
57 return -ERANGE;
58
59 if (pt->len) {
60 char *buf = nla_data(nla);
61
62 if (buf[attrlen - 1] == '\0')
63 attrlen--;
64
65 if (attrlen > pt->len)
66 return -ERANGE;
67 }
68 break;
69
70 default:
71 if (pt->len)
72 minlen = pt->len;
73 else if (pt->type != NLA_UNSPEC)
74 minlen = nla_attr_minlen[pt->type];
75
76 if (attrlen < minlen)
77 return -ERANGE;
78 }
50 79
51 return 0; 80 return 0;
52} 81}
@@ -255,6 +284,26 @@ struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
255} 284}
256 285
257/** 286/**
287 * __nla_reserve_nohdr - reserve room for attribute without header
288 * @skb: socket buffer to reserve room on
289 * @attrlen: length of attribute payload
290 *
291 * Reserves room for attribute payload without a header.
292 *
293 * The caller is responsible to ensure that the skb provides enough
294 * tailroom for the payload.
295 */
296void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen)
297{
298 void *start;
299
300 start = skb_put(skb, NLA_ALIGN(attrlen));
301 memset(start, 0, NLA_ALIGN(attrlen));
302
303 return start;
304}
305
306/**
258 * nla_reserve - reserve room for attribute on the skb 307 * nla_reserve - reserve room for attribute on the skb
259 * @skb: socket buffer to reserve room on 308 * @skb: socket buffer to reserve room on
260 * @attrtype: attribute type 309 * @attrtype: attribute type
@@ -275,6 +324,24 @@ struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
275} 324}
276 325
277/** 326/**
327 * nla_reserve - reserve room for attribute without header
328 * @skb: socket buffer to reserve room on
329 * @len: length of attribute payload
330 *
331 * Reserves room for attribute payload without a header.
332 *
333 * Returns NULL if the tailroom of the skb is insufficient to store
334 * the attribute payload.
335 */
336void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen)
337{
338 if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
339 return NULL;
340
341 return __nla_reserve_nohdr(skb, attrlen);
342}
343
344/**
278 * __nla_put - Add a netlink attribute to a socket buffer 345 * __nla_put - Add a netlink attribute to a socket buffer
279 * @skb: socket buffer to add attribute to 346 * @skb: socket buffer to add attribute to
280 * @attrtype: attribute type 347 * @attrtype: attribute type
@@ -293,6 +360,22 @@ void __nla_put(struct sk_buff *skb, int attrtype, int attrlen,
293 memcpy(nla_data(nla), data, attrlen); 360 memcpy(nla_data(nla), data, attrlen);
294} 361}
295 362
363/**
364 * __nla_put_nohdr - Add a netlink attribute without header
365 * @skb: socket buffer to add attribute to
366 * @attrlen: length of attribute payload
367 * @data: head of attribute payload
368 *
369 * The caller is responsible to ensure that the skb provides enough
370 * tailroom for the attribute payload.
371 */
372void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
373{
374 void *start;
375
376 start = __nla_reserve_nohdr(skb, attrlen);
377 memcpy(start, data, attrlen);
378}
296 379
297/** 380/**
298 * nla_put - Add a netlink attribute to a socket buffer 381 * nla_put - Add a netlink attribute to a socket buffer
@@ -313,15 +396,36 @@ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
313 return 0; 396 return 0;
314} 397}
315 398
399/**
400 * nla_put_nohdr - Add a netlink attribute without header
401 * @skb: socket buffer to add attribute to
402 * @attrlen: length of attribute payload
403 * @data: head of attribute payload
404 *
405 * Returns -1 if the tailroom of the skb is insufficient to store
406 * the attribute payload.
407 */
408int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
409{
410 if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
411 return -1;
412
413 __nla_put_nohdr(skb, attrlen, data);
414 return 0;
415}
316 416
317EXPORT_SYMBOL(nla_validate); 417EXPORT_SYMBOL(nla_validate);
318EXPORT_SYMBOL(nla_parse); 418EXPORT_SYMBOL(nla_parse);
319EXPORT_SYMBOL(nla_find); 419EXPORT_SYMBOL(nla_find);
320EXPORT_SYMBOL(nla_strlcpy); 420EXPORT_SYMBOL(nla_strlcpy);
321EXPORT_SYMBOL(__nla_reserve); 421EXPORT_SYMBOL(__nla_reserve);
422EXPORT_SYMBOL(__nla_reserve_nohdr);
322EXPORT_SYMBOL(nla_reserve); 423EXPORT_SYMBOL(nla_reserve);
424EXPORT_SYMBOL(nla_reserve_nohdr);
323EXPORT_SYMBOL(__nla_put); 425EXPORT_SYMBOL(__nla_put);
426EXPORT_SYMBOL(__nla_put_nohdr);
324EXPORT_SYMBOL(nla_put); 427EXPORT_SYMBOL(nla_put);
428EXPORT_SYMBOL(nla_put_nohdr);
325EXPORT_SYMBOL(nla_memcpy); 429EXPORT_SYMBOL(nla_memcpy);
326EXPORT_SYMBOL(nla_memcmp); 430EXPORT_SYMBOL(nla_memcmp);
327EXPORT_SYMBOL(nla_strcmp); 431EXPORT_SYMBOL(nla_strcmp);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index a298f77cc3e3..49bc2db7982b 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -387,7 +387,10 @@ static void genl_rcv(struct sock *sk, int len)
387static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, 387static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq,
388 u32 flags, struct sk_buff *skb, u8 cmd) 388 u32 flags, struct sk_buff *skb, u8 cmd)
389{ 389{
390 struct nlattr *nla_ops;
391 struct genl_ops *ops;
390 void *hdr; 392 void *hdr;
393 int idx = 1;
391 394
392 hdr = genlmsg_put(skb, pid, seq, GENL_ID_CTRL, 0, flags, cmd, 395 hdr = genlmsg_put(skb, pid, seq, GENL_ID_CTRL, 0, flags, cmd,
393 family->version); 396 family->version);
@@ -396,6 +399,37 @@ static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq,
396 399
397 NLA_PUT_STRING(skb, CTRL_ATTR_FAMILY_NAME, family->name); 400 NLA_PUT_STRING(skb, CTRL_ATTR_FAMILY_NAME, family->name);
398 NLA_PUT_U16(skb, CTRL_ATTR_FAMILY_ID, family->id); 401 NLA_PUT_U16(skb, CTRL_ATTR_FAMILY_ID, family->id);
402 NLA_PUT_U32(skb, CTRL_ATTR_VERSION, family->version);
403 NLA_PUT_U32(skb, CTRL_ATTR_HDRSIZE, family->hdrsize);
404 NLA_PUT_U32(skb, CTRL_ATTR_MAXATTR, family->maxattr);
405
406 nla_ops = nla_nest_start(skb, CTRL_ATTR_OPS);
407 if (nla_ops == NULL)
408 goto nla_put_failure;
409
410 list_for_each_entry(ops, &family->ops_list, ops_list) {
411 struct nlattr *nest;
412
413 nest = nla_nest_start(skb, idx++);
414 if (nest == NULL)
415 goto nla_put_failure;
416
417 NLA_PUT_U32(skb, CTRL_ATTR_OP_ID, ops->cmd);
418 NLA_PUT_U32(skb, CTRL_ATTR_OP_FLAGS, ops->flags);
419
420 if (ops->policy)
421 NLA_PUT_FLAG(skb, CTRL_ATTR_OP_POLICY);
422
423 if (ops->doit)
424 NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DOIT);
425
426 if (ops->dumpit)
427 NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DUMPIT);
428
429 nla_nest_end(skb, nest);
430 }
431
432 nla_nest_end(skb, nla_ops);
399 433
400 return genlmsg_end(skb, hdr); 434 return genlmsg_end(skb, hdr);
401 435
@@ -411,6 +445,9 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
411 int chains_to_skip = cb->args[0]; 445 int chains_to_skip = cb->args[0];
412 int fams_to_skip = cb->args[1]; 446 int fams_to_skip = cb->args[1];
413 447
448 if (chains_to_skip != 0)
449 genl_lock();
450
414 for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { 451 for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
415 if (i < chains_to_skip) 452 if (i < chains_to_skip)
416 continue; 453 continue;
@@ -428,6 +465,9 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
428 } 465 }
429 466
430errout: 467errout:
468 if (chains_to_skip != 0)
469 genl_unlock();
470
431 cb->args[0] = i; 471 cb->args[0] = i;
432 cb->args[1] = n; 472 cb->args[1] = n;
433 473
@@ -440,7 +480,7 @@ static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid,
440 struct sk_buff *skb; 480 struct sk_buff *skb;
441 int err; 481 int err;
442 482
443 skb = nlmsg_new(NLMSG_GOODSIZE); 483 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
444 if (skb == NULL) 484 if (skb == NULL)
445 return ERR_PTR(-ENOBUFS); 485 return ERR_PTR(-ENOBUFS);
446 486
@@ -455,7 +495,8 @@ static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid,
455 495
456static struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] __read_mostly = { 496static struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] __read_mostly = {
457 [CTRL_ATTR_FAMILY_ID] = { .type = NLA_U16 }, 497 [CTRL_ATTR_FAMILY_ID] = { .type = NLA_U16 },
458 [CTRL_ATTR_FAMILY_NAME] = { .type = NLA_STRING }, 498 [CTRL_ATTR_FAMILY_NAME] = { .type = NLA_NUL_STRING,
499 .len = GENL_NAMSIZ - 1 },
459}; 500};
460 501
461static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) 502static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
@@ -470,12 +511,9 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
470 } 511 }
471 512
472 if (info->attrs[CTRL_ATTR_FAMILY_NAME]) { 513 if (info->attrs[CTRL_ATTR_FAMILY_NAME]) {
473 char name[GENL_NAMSIZ]; 514 char *name;
474
475 if (nla_strlcpy(name, info->attrs[CTRL_ATTR_FAMILY_NAME],
476 GENL_NAMSIZ) >= GENL_NAMSIZ)
477 goto errout;
478 515
516 name = nla_data(info->attrs[CTRL_ATTR_FAMILY_NAME]);
479 res = genl_family_find_byname(name); 517 res = genl_family_find_byname(name);
480 } 518 }
481 519
@@ -510,7 +548,7 @@ static int genl_ctrl_event(int event, void *data)
510 if (IS_ERR(msg)) 548 if (IS_ERR(msg))
511 return PTR_ERR(msg); 549 return PTR_ERR(msg);
512 550
513 genlmsg_multicast(msg, 0, GENL_ID_CTRL); 551 genlmsg_multicast(msg, 0, GENL_ID_CTRL, GFP_KERNEL);
514 break; 552 break;
515 } 553 }
516 554
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 4172a5235916..f4ccb90e6739 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -427,21 +427,24 @@ out_unlock:
427} 427}
428#endif 428#endif
429 429
430static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res) 430static inline int run_filter(struct sk_buff *skb, struct sock *sk,
431 unsigned *snaplen)
431{ 432{
432 struct sk_filter *filter; 433 struct sk_filter *filter;
434 int err = 0;
433 435
434 bh_lock_sock(sk); 436 rcu_read_lock_bh();
435 filter = sk->sk_filter; 437 filter = rcu_dereference(sk->sk_filter);
436 /* 438 if (filter != NULL) {
437 * Our caller already checked that filter != NULL but we need to 439 err = sk_run_filter(skb, filter->insns, filter->len);
438 * verify that under bh_lock_sock() to be safe 440 if (!err)
439 */ 441 err = -EPERM;
440 if (likely(filter != NULL)) 442 else if (*snaplen > err)
441 res = sk_run_filter(skb, filter->insns, filter->len); 443 *snaplen = err;
442 bh_unlock_sock(sk); 444 }
445 rcu_read_unlock_bh();
443 446
444 return res; 447 return err;
445} 448}
446 449
447/* 450/*
@@ -491,13 +494,8 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
491 494
492 snaplen = skb->len; 495 snaplen = skb->len;
493 496
494 if (sk->sk_filter) { 497 if (run_filter(skb, sk, &snaplen) < 0)
495 unsigned res = run_filter(skb, sk, snaplen); 498 goto drop_n_restore;
496 if (res == 0)
497 goto drop_n_restore;
498 if (snaplen > res)
499 snaplen = res;
500 }
501 499
502 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= 500 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
503 (unsigned)sk->sk_rcvbuf) 501 (unsigned)sk->sk_rcvbuf)
@@ -586,20 +584,15 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
586 else if (skb->pkt_type == PACKET_OUTGOING) { 584 else if (skb->pkt_type == PACKET_OUTGOING) {
587 /* Special case: outgoing packets have ll header at head */ 585 /* Special case: outgoing packets have ll header at head */
588 skb_pull(skb, skb->nh.raw - skb->data); 586 skb_pull(skb, skb->nh.raw - skb->data);
589 if (skb->ip_summed == CHECKSUM_HW) 587 if (skb->ip_summed == CHECKSUM_PARTIAL)
590 status |= TP_STATUS_CSUMNOTREADY; 588 status |= TP_STATUS_CSUMNOTREADY;
591 } 589 }
592 } 590 }
593 591
594 snaplen = skb->len; 592 snaplen = skb->len;
595 593
596 if (sk->sk_filter) { 594 if (run_filter(skb, sk, &snaplen) < 0)
597 unsigned res = run_filter(skb, sk, snaplen); 595 goto drop_n_restore;
598 if (res == 0)
599 goto drop_n_restore;
600 if (snaplen > res)
601 snaplen = res;
602 }
603 596
604 if (sk->sk_type == SOCK_DGRAM) { 597 if (sk->sk_type == SOCK_DGRAM) {
605 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16; 598 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index a2587b52e531..835070e9169c 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -33,16 +33,230 @@
33#include <net/sch_generic.h> 33#include <net/sch_generic.h>
34#include <net/act_api.h> 34#include <net/act_api.h>
35 35
36#if 0 /* control */ 36void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
37#define DPRINTK(format, args...) printk(KERN_DEBUG format, ##args) 37{
38#else 38 unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
39#define DPRINTK(format, args...) 39 struct tcf_common **p1p;
40
41 for (p1p = &hinfo->htab[h]; *p1p; p1p = &(*p1p)->tcfc_next) {
42 if (*p1p == p) {
43 write_lock_bh(hinfo->lock);
44 *p1p = p->tcfc_next;
45 write_unlock_bh(hinfo->lock);
46#ifdef CONFIG_NET_ESTIMATOR
47 gen_kill_estimator(&p->tcfc_bstats,
48 &p->tcfc_rate_est);
40#endif 49#endif
41#if 0 /* data */ 50 kfree(p);
42#define D2PRINTK(format, args...) printk(KERN_DEBUG format, ##args) 51 return;
43#else 52 }
44#define D2PRINTK(format, args...) 53 }
54 BUG_TRAP(0);
55}
56EXPORT_SYMBOL(tcf_hash_destroy);
57
58int tcf_hash_release(struct tcf_common *p, int bind,
59 struct tcf_hashinfo *hinfo)
60{
61 int ret = 0;
62
63 if (p) {
64 if (bind)
65 p->tcfc_bindcnt--;
66
67 p->tcfc_refcnt--;
68 if (p->tcfc_bindcnt <= 0 && p->tcfc_refcnt <= 0) {
69 tcf_hash_destroy(p, hinfo);
70 ret = 1;
71 }
72 }
73 return ret;
74}
75EXPORT_SYMBOL(tcf_hash_release);
76
77static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
78 struct tc_action *a, struct tcf_hashinfo *hinfo)
79{
80 struct tcf_common *p;
81 int err = 0, index = -1,i = 0, s_i = 0, n_i = 0;
82 struct rtattr *r ;
83
84 read_lock(hinfo->lock);
85
86 s_i = cb->args[0];
87
88 for (i = 0; i < (hinfo->hmask + 1); i++) {
89 p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
90
91 for (; p; p = p->tcfc_next) {
92 index++;
93 if (index < s_i)
94 continue;
95 a->priv = p;
96 a->order = n_i;
97 r = (struct rtattr*) skb->tail;
98 RTA_PUT(skb, a->order, 0, NULL);
99 err = tcf_action_dump_1(skb, a, 0, 0);
100 if (err < 0) {
101 index--;
102 skb_trim(skb, (u8*)r - skb->data);
103 goto done;
104 }
105 r->rta_len = skb->tail - (u8*)r;
106 n_i++;
107 if (n_i >= TCA_ACT_MAX_PRIO)
108 goto done;
109 }
110 }
111done:
112 read_unlock(hinfo->lock);
113 if (n_i)
114 cb->args[0] += n_i;
115 return n_i;
116
117rtattr_failure:
118 skb_trim(skb, (u8*)r - skb->data);
119 goto done;
120}
121
122static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
123 struct tcf_hashinfo *hinfo)
124{
125 struct tcf_common *p, *s_p;
126 struct rtattr *r ;
127 int i= 0, n_i = 0;
128
129 r = (struct rtattr*) skb->tail;
130 RTA_PUT(skb, a->order, 0, NULL);
131 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
132 for (i = 0; i < (hinfo->hmask + 1); i++) {
133 p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
134
135 while (p != NULL) {
136 s_p = p->tcfc_next;
137 if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo))
138 module_put(a->ops->owner);
139 n_i++;
140 p = s_p;
141 }
142 }
143 RTA_PUT(skb, TCA_FCNT, 4, &n_i);
144 r->rta_len = skb->tail - (u8*)r;
145
146 return n_i;
147rtattr_failure:
148 skb_trim(skb, (u8*)r - skb->data);
149 return -EINVAL;
150}
151
152int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
153 int type, struct tc_action *a)
154{
155 struct tcf_hashinfo *hinfo = a->ops->hinfo;
156
157 if (type == RTM_DELACTION) {
158 return tcf_del_walker(skb, a, hinfo);
159 } else if (type == RTM_GETACTION) {
160 return tcf_dump_walker(skb, cb, a, hinfo);
161 } else {
162 printk("tcf_generic_walker: unknown action %d\n", type);
163 return -EINVAL;
164 }
165}
166EXPORT_SYMBOL(tcf_generic_walker);
167
168struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
169{
170 struct tcf_common *p;
171
172 read_lock(hinfo->lock);
173 for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p;
174 p = p->tcfc_next) {
175 if (p->tcfc_index == index)
176 break;
177 }
178 read_unlock(hinfo->lock);
179
180 return p;
181}
182EXPORT_SYMBOL(tcf_hash_lookup);
183
184u32 tcf_hash_new_index(u32 *idx_gen, struct tcf_hashinfo *hinfo)
185{
186 u32 val = *idx_gen;
187
188 do {
189 if (++val == 0)
190 val = 1;
191 } while (tcf_hash_lookup(val, hinfo));
192
193 return (*idx_gen = val);
194}
195EXPORT_SYMBOL(tcf_hash_new_index);
196
197int tcf_hash_search(struct tc_action *a, u32 index)
198{
199 struct tcf_hashinfo *hinfo = a->ops->hinfo;
200 struct tcf_common *p = tcf_hash_lookup(index, hinfo);
201
202 if (p) {
203 a->priv = p;
204 return 1;
205 }
206 return 0;
207}
208EXPORT_SYMBOL(tcf_hash_search);
209
210struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind,
211 struct tcf_hashinfo *hinfo)
212{
213 struct tcf_common *p = NULL;
214 if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) {
215 if (bind) {
216 p->tcfc_bindcnt++;
217 p->tcfc_refcnt++;
218 }
219 a->priv = p;
220 }
221 return p;
222}
223EXPORT_SYMBOL(tcf_hash_check);
224
225struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo)
226{
227 struct tcf_common *p = kzalloc(size, GFP_KERNEL);
228
229 if (unlikely(!p))
230 return p;
231 p->tcfc_refcnt = 1;
232 if (bind)
233 p->tcfc_bindcnt = 1;
234
235 spin_lock_init(&p->tcfc_lock);
236 p->tcfc_stats_lock = &p->tcfc_lock;
237 p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo);
238 p->tcfc_tm.install = jiffies;
239 p->tcfc_tm.lastuse = jiffies;
240#ifdef CONFIG_NET_ESTIMATOR
241 if (est)
242 gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est,
243 p->tcfc_stats_lock, est);
45#endif 244#endif
245 a->priv = (void *) p;
246 return p;
247}
248EXPORT_SYMBOL(tcf_hash_create);
249
250void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo)
251{
252 unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
253
254 write_lock_bh(hinfo->lock);
255 p->tcfc_next = hinfo->htab[h];
256 hinfo->htab[h] = p;
257 write_unlock_bh(hinfo->lock);
258}
259EXPORT_SYMBOL(tcf_hash_insert);
46 260
47static struct tc_action_ops *act_base = NULL; 261static struct tc_action_ops *act_base = NULL;
48static DEFINE_RWLOCK(act_mod_lock); 262static DEFINE_RWLOCK(act_mod_lock);
@@ -155,9 +369,6 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action *act,
155 369
156 if (skb->tc_verd & TC_NCLS) { 370 if (skb->tc_verd & TC_NCLS) {
157 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); 371 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
158 D2PRINTK("(%p)tcf_action_exec: cleared TC_NCLS in %s out %s\n",
159 skb, skb->input_dev ? skb->input_dev->name : "xxx",
160 skb->dev->name);
161 ret = TC_ACT_OK; 372 ret = TC_ACT_OK;
162 goto exec_done; 373 goto exec_done;
163 } 374 }
@@ -187,8 +398,6 @@ void tcf_action_destroy(struct tc_action *act, int bind)
187 398
188 for (a = act; a; a = act) { 399 for (a = act; a; a = act) {
189 if (a->ops && a->ops->cleanup) { 400 if (a->ops && a->ops->cleanup) {
190 DPRINTK("tcf_action_destroy destroying %p next %p\n",
191 a, a->next);
192 if (a->ops->cleanup(a, bind) == ACT_P_DELETED) 401 if (a->ops->cleanup(a, bind) == ACT_P_DELETED)
193 module_put(a->ops->owner); 402 module_put(a->ops->owner);
194 act = act->next; 403 act = act->next;
@@ -331,7 +540,6 @@ struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est,
331 if (*err != ACT_P_CREATED) 540 if (*err != ACT_P_CREATED)
332 module_put(a_o->owner); 541 module_put(a_o->owner);
333 a->ops = a_o; 542 a->ops = a_o;
334 DPRINTK("tcf_action_init_1: successfull %s\n", act_name);
335 543
336 *err = 0; 544 *err = 0;
337 return a; 545 return a;
@@ -392,12 +600,12 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
392 if (compat_mode) { 600 if (compat_mode) {
393 if (a->type == TCA_OLD_COMPAT) 601 if (a->type == TCA_OLD_COMPAT)
394 err = gnet_stats_start_copy_compat(skb, 0, 602 err = gnet_stats_start_copy_compat(skb, 0,
395 TCA_STATS, TCA_XSTATS, h->stats_lock, &d); 603 TCA_STATS, TCA_XSTATS, h->tcf_stats_lock, &d);
396 else 604 else
397 return 0; 605 return 0;
398 } else 606 } else
399 err = gnet_stats_start_copy(skb, TCA_ACT_STATS, 607 err = gnet_stats_start_copy(skb, TCA_ACT_STATS,
400 h->stats_lock, &d); 608 h->tcf_stats_lock, &d);
401 609
402 if (err < 0) 610 if (err < 0)
403 goto errout; 611 goto errout;
@@ -406,11 +614,11 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
406 if (a->ops->get_stats(skb, a) < 0) 614 if (a->ops->get_stats(skb, a) < 0)
407 goto errout; 615 goto errout;
408 616
409 if (gnet_stats_copy_basic(&d, &h->bstats) < 0 || 617 if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 ||
410#ifdef CONFIG_NET_ESTIMATOR 618#ifdef CONFIG_NET_ESTIMATOR
411 gnet_stats_copy_rate_est(&d, &h->rate_est) < 0 || 619 gnet_stats_copy_rate_est(&d, &h->tcf_rate_est) < 0 ||
412#endif 620#endif
413 gnet_stats_copy_queue(&d, &h->qstats) < 0) 621 gnet_stats_copy_queue(&d, &h->tcf_qstats) < 0)
414 goto errout; 622 goto errout;
415 623
416 if (gnet_stats_finish_copy(&d) < 0) 624 if (gnet_stats_finish_copy(&d) < 0)
@@ -459,7 +667,6 @@ static int
459act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event) 667act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event)
460{ 668{
461 struct sk_buff *skb; 669 struct sk_buff *skb;
462 int err = 0;
463 670
464 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 671 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
465 if (!skb) 672 if (!skb)
@@ -468,10 +675,8 @@ act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event)
468 kfree_skb(skb); 675 kfree_skb(skb);
469 return -EINVAL; 676 return -EINVAL;
470 } 677 }
471 err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); 678
472 if (err > 0) 679 return rtnl_unicast(skb, pid);
473 err = 0;
474 return err;
475} 680}
476 681
477static struct tc_action * 682static struct tc_action *
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index e75a147ad60f..6cff56696a81 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -34,48 +34,43 @@
34#include <linux/tc_act/tc_gact.h> 34#include <linux/tc_act/tc_gact.h>
35#include <net/tc_act/tc_gact.h> 35#include <net/tc_act/tc_gact.h>
36 36
37/* use generic hash table */ 37#define GACT_TAB_MASK 15
38#define MY_TAB_SIZE 16 38static struct tcf_common *tcf_gact_ht[GACT_TAB_MASK + 1];
39#define MY_TAB_MASK 15 39static u32 gact_idx_gen;
40
41static u32 idx_gen;
42static struct tcf_gact *tcf_gact_ht[MY_TAB_SIZE];
43static DEFINE_RWLOCK(gact_lock); 40static DEFINE_RWLOCK(gact_lock);
44 41
45/* ovewrride the defaults */ 42static struct tcf_hashinfo gact_hash_info = {
46#define tcf_st tcf_gact 43 .htab = tcf_gact_ht,
47#define tc_st tc_gact 44 .hmask = GACT_TAB_MASK,
48#define tcf_t_lock gact_lock 45 .lock = &gact_lock,
49#define tcf_ht tcf_gact_ht 46};
50
51#define CONFIG_NET_ACT_INIT 1
52#include <net/pkt_act.h>
53 47
54#ifdef CONFIG_GACT_PROB 48#ifdef CONFIG_GACT_PROB
55static int gact_net_rand(struct tcf_gact *p) 49static int gact_net_rand(struct tcf_gact *gact)
56{ 50{
57 if (net_random()%p->pval) 51 if (net_random() % gact->tcfg_pval)
58 return p->action; 52 return gact->tcf_action;
59 return p->paction; 53 return gact->tcfg_paction;
60} 54}
61 55
62static int gact_determ(struct tcf_gact *p) 56static int gact_determ(struct tcf_gact *gact)
63{ 57{
64 if (p->bstats.packets%p->pval) 58 if (gact->tcf_bstats.packets % gact->tcfg_pval)
65 return p->action; 59 return gact->tcf_action;
66 return p->paction; 60 return gact->tcfg_paction;
67} 61}
68 62
69typedef int (*g_rand)(struct tcf_gact *p); 63typedef int (*g_rand)(struct tcf_gact *gact);
70static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ }; 64static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ };
71#endif 65#endif /* CONFIG_GACT_PROB */
72 66
73static int tcf_gact_init(struct rtattr *rta, struct rtattr *est, 67static int tcf_gact_init(struct rtattr *rta, struct rtattr *est,
74 struct tc_action *a, int ovr, int bind) 68 struct tc_action *a, int ovr, int bind)
75{ 69{
76 struct rtattr *tb[TCA_GACT_MAX]; 70 struct rtattr *tb[TCA_GACT_MAX];
77 struct tc_gact *parm; 71 struct tc_gact *parm;
78 struct tcf_gact *p; 72 struct tcf_gact *gact;
73 struct tcf_common *pc;
79 int ret = 0; 74 int ret = 0;
80 75
81 if (rta == NULL || rtattr_parse_nested(tb, TCA_GACT_MAX, rta) < 0) 76 if (rta == NULL || rtattr_parse_nested(tb, TCA_GACT_MAX, rta) < 0)
@@ -94,105 +89,106 @@ static int tcf_gact_init(struct rtattr *rta, struct rtattr *est,
94 return -EOPNOTSUPP; 89 return -EOPNOTSUPP;
95#endif 90#endif
96 91
97 p = tcf_hash_check(parm->index, a, ovr, bind); 92 pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info);
98 if (p == NULL) { 93 if (!pc) {
99 p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind); 94 pc = tcf_hash_create(parm->index, est, a, sizeof(*gact),
100 if (p == NULL) 95 bind, &gact_idx_gen, &gact_hash_info);
96 if (unlikely(!pc))
101 return -ENOMEM; 97 return -ENOMEM;
102 ret = ACT_P_CREATED; 98 ret = ACT_P_CREATED;
103 } else { 99 } else {
104 if (!ovr) { 100 if (!ovr) {
105 tcf_hash_release(p, bind); 101 tcf_hash_release(pc, bind, &gact_hash_info);
106 return -EEXIST; 102 return -EEXIST;
107 } 103 }
108 } 104 }
109 105
110 spin_lock_bh(&p->lock); 106 gact = to_gact(pc);
111 p->action = parm->action; 107
108 spin_lock_bh(&gact->tcf_lock);
109 gact->tcf_action = parm->action;
112#ifdef CONFIG_GACT_PROB 110#ifdef CONFIG_GACT_PROB
113 if (tb[TCA_GACT_PROB-1] != NULL) { 111 if (tb[TCA_GACT_PROB-1] != NULL) {
114 struct tc_gact_p *p_parm = RTA_DATA(tb[TCA_GACT_PROB-1]); 112 struct tc_gact_p *p_parm = RTA_DATA(tb[TCA_GACT_PROB-1]);
115 p->paction = p_parm->paction; 113 gact->tcfg_paction = p_parm->paction;
116 p->pval = p_parm->pval; 114 gact->tcfg_pval = p_parm->pval;
117 p->ptype = p_parm->ptype; 115 gact->tcfg_ptype = p_parm->ptype;
118 } 116 }
119#endif 117#endif
120 spin_unlock_bh(&p->lock); 118 spin_unlock_bh(&gact->tcf_lock);
121 if (ret == ACT_P_CREATED) 119 if (ret == ACT_P_CREATED)
122 tcf_hash_insert(p); 120 tcf_hash_insert(pc, &gact_hash_info);
123 return ret; 121 return ret;
124} 122}
125 123
126static int 124static int tcf_gact_cleanup(struct tc_action *a, int bind)
127tcf_gact_cleanup(struct tc_action *a, int bind)
128{ 125{
129 struct tcf_gact *p = PRIV(a, gact); 126 struct tcf_gact *gact = a->priv;
130 127
131 if (p != NULL) 128 if (gact)
132 return tcf_hash_release(p, bind); 129 return tcf_hash_release(&gact->common, bind, &gact_hash_info);
133 return 0; 130 return 0;
134} 131}
135 132
136static int 133static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
137tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
138{ 134{
139 struct tcf_gact *p = PRIV(a, gact); 135 struct tcf_gact *gact = a->priv;
140 int action = TC_ACT_SHOT; 136 int action = TC_ACT_SHOT;
141 137
142 spin_lock(&p->lock); 138 spin_lock(&gact->tcf_lock);
143#ifdef CONFIG_GACT_PROB 139#ifdef CONFIG_GACT_PROB
144 if (p->ptype && gact_rand[p->ptype] != NULL) 140 if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL)
145 action = gact_rand[p->ptype](p); 141 action = gact_rand[gact->tcfg_ptype](gact);
146 else 142 else
147 action = p->action; 143 action = gact->tcf_action;
148#else 144#else
149 action = p->action; 145 action = gact->tcf_action;
150#endif 146#endif
151 p->bstats.bytes += skb->len; 147 gact->tcf_bstats.bytes += skb->len;
152 p->bstats.packets++; 148 gact->tcf_bstats.packets++;
153 if (action == TC_ACT_SHOT) 149 if (action == TC_ACT_SHOT)
154 p->qstats.drops++; 150 gact->tcf_qstats.drops++;
155 p->tm.lastuse = jiffies; 151 gact->tcf_tm.lastuse = jiffies;
156 spin_unlock(&p->lock); 152 spin_unlock(&gact->tcf_lock);
157 153
158 return action; 154 return action;
159} 155}
160 156
161static int 157static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
162tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
163{ 158{
164 unsigned char *b = skb->tail; 159 unsigned char *b = skb->tail;
165 struct tc_gact opt; 160 struct tc_gact opt;
166 struct tcf_gact *p = PRIV(a, gact); 161 struct tcf_gact *gact = a->priv;
167 struct tcf_t t; 162 struct tcf_t t;
168 163
169 opt.index = p->index; 164 opt.index = gact->tcf_index;
170 opt.refcnt = p->refcnt - ref; 165 opt.refcnt = gact->tcf_refcnt - ref;
171 opt.bindcnt = p->bindcnt - bind; 166 opt.bindcnt = gact->tcf_bindcnt - bind;
172 opt.action = p->action; 167 opt.action = gact->tcf_action;
173 RTA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt); 168 RTA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt);
174#ifdef CONFIG_GACT_PROB 169#ifdef CONFIG_GACT_PROB
175 if (p->ptype) { 170 if (gact->tcfg_ptype) {
176 struct tc_gact_p p_opt; 171 struct tc_gact_p p_opt;
177 p_opt.paction = p->paction; 172 p_opt.paction = gact->tcfg_paction;
178 p_opt.pval = p->pval; 173 p_opt.pval = gact->tcfg_pval;
179 p_opt.ptype = p->ptype; 174 p_opt.ptype = gact->tcfg_ptype;
180 RTA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt); 175 RTA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt);
181 } 176 }
182#endif 177#endif
183 t.install = jiffies_to_clock_t(jiffies - p->tm.install); 178 t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install);
184 t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); 179 t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse);
185 t.expires = jiffies_to_clock_t(p->tm.expires); 180 t.expires = jiffies_to_clock_t(gact->tcf_tm.expires);
186 RTA_PUT(skb, TCA_GACT_TM, sizeof(t), &t); 181 RTA_PUT(skb, TCA_GACT_TM, sizeof(t), &t);
187 return skb->len; 182 return skb->len;
188 183
189 rtattr_failure: 184rtattr_failure:
190 skb_trim(skb, b - skb->data); 185 skb_trim(skb, b - skb->data);
191 return -1; 186 return -1;
192} 187}
193 188
194static struct tc_action_ops act_gact_ops = { 189static struct tc_action_ops act_gact_ops = {
195 .kind = "gact", 190 .kind = "gact",
191 .hinfo = &gact_hash_info,
196 .type = TCA_ACT_GACT, 192 .type = TCA_ACT_GACT,
197 .capab = TCA_CAP_NONE, 193 .capab = TCA_CAP_NONE,
198 .owner = THIS_MODULE, 194 .owner = THIS_MODULE,
@@ -208,8 +204,7 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
208MODULE_DESCRIPTION("Generic Classifier actions"); 204MODULE_DESCRIPTION("Generic Classifier actions");
209MODULE_LICENSE("GPL"); 205MODULE_LICENSE("GPL");
210 206
211static int __init 207static int __init gact_init_module(void)
212gact_init_module(void)
213{ 208{
214#ifdef CONFIG_GACT_PROB 209#ifdef CONFIG_GACT_PROB
215 printk("GACT probability on\n"); 210 printk("GACT probability on\n");
@@ -219,8 +214,7 @@ gact_init_module(void)
219 return tcf_register_action(&act_gact_ops); 214 return tcf_register_action(&act_gact_ops);
220} 215}
221 216
222static void __exit 217static void __exit gact_cleanup_module(void)
223gact_cleanup_module(void)
224{ 218{
225 tcf_unregister_action(&act_gact_ops); 219 tcf_unregister_action(&act_gact_ops);
226} 220}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index d799e01248c4..d8c9310da6e5 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -38,25 +38,19 @@
38 38
39#include <linux/netfilter_ipv4/ip_tables.h> 39#include <linux/netfilter_ipv4/ip_tables.h>
40 40
41/* use generic hash table */
42#define MY_TAB_SIZE 16
43#define MY_TAB_MASK 15
44 41
45static u32 idx_gen; 42#define IPT_TAB_MASK 15
46static struct tcf_ipt *tcf_ipt_ht[MY_TAB_SIZE]; 43static struct tcf_common *tcf_ipt_ht[IPT_TAB_MASK + 1];
47/* ipt hash table lock */ 44static u32 ipt_idx_gen;
48static DEFINE_RWLOCK(ipt_lock); 45static DEFINE_RWLOCK(ipt_lock);
49 46
50/* ovewrride the defaults */ 47static struct tcf_hashinfo ipt_hash_info = {
51#define tcf_st tcf_ipt 48 .htab = tcf_ipt_ht,
52#define tcf_t_lock ipt_lock 49 .hmask = IPT_TAB_MASK,
53#define tcf_ht tcf_ipt_ht 50 .lock = &ipt_lock,
54 51};
55#define CONFIG_NET_ACT_INIT
56#include <net/pkt_act.h>
57 52
58static int 53static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
59ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
60{ 54{
61 struct ipt_target *target; 55 struct ipt_target *target;
62 int ret = 0; 56 int ret = 0;
@@ -65,7 +59,6 @@ ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
65 if (!target) 59 if (!target)
66 return -ENOENT; 60 return -ENOENT;
67 61
68 DPRINTK("ipt_init_target: found %s\n", target->name);
69 t->u.kernel.target = target; 62 t->u.kernel.target = target;
70 63
71 ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), 64 ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
@@ -76,10 +69,7 @@ ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
76 if (t->u.kernel.target->checkentry 69 if (t->u.kernel.target->checkentry
77 && !t->u.kernel.target->checkentry(table, NULL, 70 && !t->u.kernel.target->checkentry(table, NULL,
78 t->u.kernel.target, t->data, 71 t->u.kernel.target, t->data,
79 t->u.target_size - sizeof(*t),
80 hook)) { 72 hook)) {
81 DPRINTK("ipt_init_target: check failed for `%s'.\n",
82 t->u.kernel.target->name);
83 module_put(t->u.kernel.target->me); 73 module_put(t->u.kernel.target->me);
84 ret = -EINVAL; 74 ret = -EINVAL;
85 } 75 }
@@ -87,40 +77,37 @@ ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
87 return ret; 77 return ret;
88} 78}
89 79
90static void 80static void ipt_destroy_target(struct ipt_entry_target *t)
91ipt_destroy_target(struct ipt_entry_target *t)
92{ 81{
93 if (t->u.kernel.target->destroy) 82 if (t->u.kernel.target->destroy)
94 t->u.kernel.target->destroy(t->u.kernel.target, t->data, 83 t->u.kernel.target->destroy(t->u.kernel.target, t->data);
95 t->u.target_size - sizeof(*t));
96 module_put(t->u.kernel.target->me); 84 module_put(t->u.kernel.target->me);
97} 85}
98 86
99static int 87static int tcf_ipt_release(struct tcf_ipt *ipt, int bind)
100tcf_ipt_release(struct tcf_ipt *p, int bind)
101{ 88{
102 int ret = 0; 89 int ret = 0;
103 if (p) { 90 if (ipt) {
104 if (bind) 91 if (bind)
105 p->bindcnt--; 92 ipt->tcf_bindcnt--;
106 p->refcnt--; 93 ipt->tcf_refcnt--;
107 if (p->bindcnt <= 0 && p->refcnt <= 0) { 94 if (ipt->tcf_bindcnt <= 0 && ipt->tcf_refcnt <= 0) {
108 ipt_destroy_target(p->t); 95 ipt_destroy_target(ipt->tcfi_t);
109 kfree(p->tname); 96 kfree(ipt->tcfi_tname);
110 kfree(p->t); 97 kfree(ipt->tcfi_t);
111 tcf_hash_destroy(p); 98 tcf_hash_destroy(&ipt->common, &ipt_hash_info);
112 ret = ACT_P_DELETED; 99 ret = ACT_P_DELETED;
113 } 100 }
114 } 101 }
115 return ret; 102 return ret;
116} 103}
117 104
118static int 105static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est,
119tcf_ipt_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, 106 struct tc_action *a, int ovr, int bind)
120 int ovr, int bind)
121{ 107{
122 struct rtattr *tb[TCA_IPT_MAX]; 108 struct rtattr *tb[TCA_IPT_MAX];
123 struct tcf_ipt *p; 109 struct tcf_ipt *ipt;
110 struct tcf_common *pc;
124 struct ipt_entry_target *td, *t; 111 struct ipt_entry_target *td, *t;
125 char *tname; 112 char *tname;
126 int ret = 0, err; 113 int ret = 0, err;
@@ -144,49 +131,51 @@ tcf_ipt_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
144 RTA_PAYLOAD(tb[TCA_IPT_INDEX-1]) >= sizeof(u32)) 131 RTA_PAYLOAD(tb[TCA_IPT_INDEX-1]) >= sizeof(u32))
145 index = *(u32 *)RTA_DATA(tb[TCA_IPT_INDEX-1]); 132 index = *(u32 *)RTA_DATA(tb[TCA_IPT_INDEX-1]);
146 133
147 p = tcf_hash_check(index, a, ovr, bind); 134 pc = tcf_hash_check(index, a, bind, &ipt_hash_info);
148 if (p == NULL) { 135 if (!pc) {
149 p = tcf_hash_create(index, est, a, sizeof(*p), ovr, bind); 136 pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind,
150 if (p == NULL) 137 &ipt_idx_gen, &ipt_hash_info);
138 if (unlikely(!pc))
151 return -ENOMEM; 139 return -ENOMEM;
152 ret = ACT_P_CREATED; 140 ret = ACT_P_CREATED;
153 } else { 141 } else {
154 if (!ovr) { 142 if (!ovr) {
155 tcf_ipt_release(p, bind); 143 tcf_ipt_release(to_ipt(pc), bind);
156 return -EEXIST; 144 return -EEXIST;
157 } 145 }
158 } 146 }
147 ipt = to_ipt(pc);
159 148
160 hook = *(u32 *)RTA_DATA(tb[TCA_IPT_HOOK-1]); 149 hook = *(u32 *)RTA_DATA(tb[TCA_IPT_HOOK-1]);
161 150
162 err = -ENOMEM; 151 err = -ENOMEM;
163 tname = kmalloc(IFNAMSIZ, GFP_KERNEL); 152 tname = kmalloc(IFNAMSIZ, GFP_KERNEL);
164 if (tname == NULL) 153 if (unlikely(!tname))
165 goto err1; 154 goto err1;
166 if (tb[TCA_IPT_TABLE - 1] == NULL || 155 if (tb[TCA_IPT_TABLE - 1] == NULL ||
167 rtattr_strlcpy(tname, tb[TCA_IPT_TABLE-1], IFNAMSIZ) >= IFNAMSIZ) 156 rtattr_strlcpy(tname, tb[TCA_IPT_TABLE-1], IFNAMSIZ) >= IFNAMSIZ)
168 strcpy(tname, "mangle"); 157 strcpy(tname, "mangle");
169 158
170 t = kmalloc(td->u.target_size, GFP_KERNEL); 159 t = kmalloc(td->u.target_size, GFP_KERNEL);
171 if (t == NULL) 160 if (unlikely(!t))
172 goto err2; 161 goto err2;
173 memcpy(t, td, td->u.target_size); 162 memcpy(t, td, td->u.target_size);
174 163
175 if ((err = ipt_init_target(t, tname, hook)) < 0) 164 if ((err = ipt_init_target(t, tname, hook)) < 0)
176 goto err3; 165 goto err3;
177 166
178 spin_lock_bh(&p->lock); 167 spin_lock_bh(&ipt->tcf_lock);
179 if (ret != ACT_P_CREATED) { 168 if (ret != ACT_P_CREATED) {
180 ipt_destroy_target(p->t); 169 ipt_destroy_target(ipt->tcfi_t);
181 kfree(p->tname); 170 kfree(ipt->tcfi_tname);
182 kfree(p->t); 171 kfree(ipt->tcfi_t);
183 } 172 }
184 p->tname = tname; 173 ipt->tcfi_tname = tname;
185 p->t = t; 174 ipt->tcfi_t = t;
186 p->hook = hook; 175 ipt->tcfi_hook = hook;
187 spin_unlock_bh(&p->lock); 176 spin_unlock_bh(&ipt->tcf_lock);
188 if (ret == ACT_P_CREATED) 177 if (ret == ACT_P_CREATED)
189 tcf_hash_insert(p); 178 tcf_hash_insert(pc, &ipt_hash_info);
190 return ret; 179 return ret;
191 180
192err3: 181err3:
@@ -194,33 +183,32 @@ err3:
194err2: 183err2:
195 kfree(tname); 184 kfree(tname);
196err1: 185err1:
197 kfree(p); 186 kfree(pc);
198 return err; 187 return err;
199} 188}
200 189
201static int 190static int tcf_ipt_cleanup(struct tc_action *a, int bind)
202tcf_ipt_cleanup(struct tc_action *a, int bind)
203{ 191{
204 struct tcf_ipt *p = PRIV(a, ipt); 192 struct tcf_ipt *ipt = a->priv;
205 return tcf_ipt_release(p, bind); 193 return tcf_ipt_release(ipt, bind);
206} 194}
207 195
208static int 196static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
209tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) 197 struct tcf_result *res)
210{ 198{
211 int ret = 0, result = 0; 199 int ret = 0, result = 0;
212 struct tcf_ipt *p = PRIV(a, ipt); 200 struct tcf_ipt *ipt = a->priv;
213 201
214 if (skb_cloned(skb)) { 202 if (skb_cloned(skb)) {
215 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 203 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
216 return TC_ACT_UNSPEC; 204 return TC_ACT_UNSPEC;
217 } 205 }
218 206
219 spin_lock(&p->lock); 207 spin_lock(&ipt->tcf_lock);
220 208
221 p->tm.lastuse = jiffies; 209 ipt->tcf_tm.lastuse = jiffies;
222 p->bstats.bytes += skb->len; 210 ipt->tcf_bstats.bytes += skb->len;
223 p->bstats.packets++; 211 ipt->tcf_bstats.packets++;
224 212
225 /* yes, we have to worry about both in and out dev 213 /* yes, we have to worry about both in and out dev
226 worry later - danger - this API seems to have changed 214 worry later - danger - this API seems to have changed
@@ -229,16 +217,17 @@ tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
229 /* iptables targets take a double skb pointer in case the skb 217 /* iptables targets take a double skb pointer in case the skb
230 * needs to be replaced. We don't own the skb, so this must not 218 * needs to be replaced. We don't own the skb, so this must not
231 * happen. The pskb_expand_head above should make sure of this */ 219 * happen. The pskb_expand_head above should make sure of this */
232 ret = p->t->u.kernel.target->target(&skb, skb->dev, NULL, p->hook, 220 ret = ipt->tcfi_t->u.kernel.target->target(&skb, skb->dev, NULL,
233 p->t->u.kernel.target, p->t->data, 221 ipt->tcfi_hook,
234 NULL); 222 ipt->tcfi_t->u.kernel.target,
223 ipt->tcfi_t->data);
235 switch (ret) { 224 switch (ret) {
236 case NF_ACCEPT: 225 case NF_ACCEPT:
237 result = TC_ACT_OK; 226 result = TC_ACT_OK;
238 break; 227 break;
239 case NF_DROP: 228 case NF_DROP:
240 result = TC_ACT_SHOT; 229 result = TC_ACT_SHOT;
241 p->qstats.drops++; 230 ipt->tcf_qstats.drops++;
242 break; 231 break;
243 case IPT_CONTINUE: 232 case IPT_CONTINUE:
244 result = TC_ACT_PIPE; 233 result = TC_ACT_PIPE;
@@ -249,53 +238,46 @@ tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
249 result = TC_POLICE_OK; 238 result = TC_POLICE_OK;
250 break; 239 break;
251 } 240 }
252 spin_unlock(&p->lock); 241 spin_unlock(&ipt->tcf_lock);
253 return result; 242 return result;
254 243
255} 244}
256 245
257static int 246static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
258tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
259{ 247{
248 unsigned char *b = skb->tail;
249 struct tcf_ipt *ipt = a->priv;
260 struct ipt_entry_target *t; 250 struct ipt_entry_target *t;
261 struct tcf_t tm; 251 struct tcf_t tm;
262 struct tc_cnt c; 252 struct tc_cnt c;
263 unsigned char *b = skb->tail;
264 struct tcf_ipt *p = PRIV(a, ipt);
265 253
266 /* for simple targets kernel size == user size 254 /* for simple targets kernel size == user size
267 ** user name = target name 255 ** user name = target name
268 ** for foolproof you need to not assume this 256 ** for foolproof you need to not assume this
269 */ 257 */
270 258
271 t = kmalloc(p->t->u.user.target_size, GFP_ATOMIC); 259 t = kmalloc(ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
272 if (t == NULL) 260 if (unlikely(!t))
273 goto rtattr_failure; 261 goto rtattr_failure;
274 262
275 c.bindcnt = p->bindcnt - bind; 263 c.bindcnt = ipt->tcf_bindcnt - bind;
276 c.refcnt = p->refcnt - ref; 264 c.refcnt = ipt->tcf_refcnt - ref;
277 memcpy(t, p->t, p->t->u.user.target_size); 265 memcpy(t, ipt->tcfi_t, ipt->tcfi_t->u.user.target_size);
278 strcpy(t->u.user.name, p->t->u.kernel.target->name); 266 strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name);
279 267
280 DPRINTK("\ttcf_ipt_dump tablename %s length %d\n", p->tname, 268 RTA_PUT(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t);
281 strlen(p->tname)); 269 RTA_PUT(skb, TCA_IPT_INDEX, 4, &ipt->tcf_index);
282 DPRINTK("\tdump target name %s size %d size user %d " 270 RTA_PUT(skb, TCA_IPT_HOOK, 4, &ipt->tcfi_hook);
283 "data[0] %x data[1] %x\n", p->t->u.kernel.target->name,
284 p->t->u.target_size, p->t->u.user.target_size,
285 p->t->data[0], p->t->data[1]);
286 RTA_PUT(skb, TCA_IPT_TARG, p->t->u.user.target_size, t);
287 RTA_PUT(skb, TCA_IPT_INDEX, 4, &p->index);
288 RTA_PUT(skb, TCA_IPT_HOOK, 4, &p->hook);
289 RTA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c); 271 RTA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c);
290 RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, p->tname); 272 RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, ipt->tcfi_tname);
291 tm.install = jiffies_to_clock_t(jiffies - p->tm.install); 273 tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install);
292 tm.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); 274 tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse);
293 tm.expires = jiffies_to_clock_t(p->tm.expires); 275 tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires);
294 RTA_PUT(skb, TCA_IPT_TM, sizeof (tm), &tm); 276 RTA_PUT(skb, TCA_IPT_TM, sizeof (tm), &tm);
295 kfree(t); 277 kfree(t);
296 return skb->len; 278 return skb->len;
297 279
298 rtattr_failure: 280rtattr_failure:
299 skb_trim(skb, b - skb->data); 281 skb_trim(skb, b - skb->data);
300 kfree(t); 282 kfree(t);
301 return -1; 283 return -1;
@@ -303,6 +285,7 @@ tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
303 285
304static struct tc_action_ops act_ipt_ops = { 286static struct tc_action_ops act_ipt_ops = {
305 .kind = "ipt", 287 .kind = "ipt",
288 .hinfo = &ipt_hash_info,
306 .type = TCA_ACT_IPT, 289 .type = TCA_ACT_IPT,
307 .capab = TCA_CAP_NONE, 290 .capab = TCA_CAP_NONE,
308 .owner = THIS_MODULE, 291 .owner = THIS_MODULE,
@@ -318,14 +301,12 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
318MODULE_DESCRIPTION("Iptables target actions"); 301MODULE_DESCRIPTION("Iptables target actions");
319MODULE_LICENSE("GPL"); 302MODULE_LICENSE("GPL");
320 303
321static int __init 304static int __init ipt_init_module(void)
322ipt_init_module(void)
323{ 305{
324 return tcf_register_action(&act_ipt_ops); 306 return tcf_register_action(&act_ipt_ops);
325} 307}
326 308
327static void __exit 309static void __exit ipt_cleanup_module(void)
328ipt_cleanup_module(void)
329{ 310{
330 tcf_unregister_action(&act_ipt_ops); 311 tcf_unregister_action(&act_ipt_ops);
331} 312}
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index fc562047ecc5..483897271f15 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -39,46 +39,39 @@
39#include <linux/etherdevice.h> 39#include <linux/etherdevice.h>
40#include <linux/if_arp.h> 40#include <linux/if_arp.h>
41 41
42 42#define MIRRED_TAB_MASK 7
43/* use generic hash table */ 43static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1];
44#define MY_TAB_SIZE 8 44static u32 mirred_idx_gen;
45#define MY_TAB_MASK (MY_TAB_SIZE - 1)
46static u32 idx_gen;
47static struct tcf_mirred *tcf_mirred_ht[MY_TAB_SIZE];
48static DEFINE_RWLOCK(mirred_lock); 45static DEFINE_RWLOCK(mirred_lock);
49 46
50/* ovewrride the defaults */ 47static struct tcf_hashinfo mirred_hash_info = {
51#define tcf_st tcf_mirred 48 .htab = tcf_mirred_ht,
52#define tc_st tc_mirred 49 .hmask = MIRRED_TAB_MASK,
53#define tcf_t_lock mirred_lock 50 .lock = &mirred_lock,
54#define tcf_ht tcf_mirred_ht 51};
55
56#define CONFIG_NET_ACT_INIT 1
57#include <net/pkt_act.h>
58 52
59static inline int 53static inline int tcf_mirred_release(struct tcf_mirred *m, int bind)
60tcf_mirred_release(struct tcf_mirred *p, int bind)
61{ 54{
62 if (p) { 55 if (m) {
63 if (bind) 56 if (bind)
64 p->bindcnt--; 57 m->tcf_bindcnt--;
65 p->refcnt--; 58 m->tcf_refcnt--;
66 if(!p->bindcnt && p->refcnt <= 0) { 59 if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
67 dev_put(p->dev); 60 dev_put(m->tcfm_dev);
68 tcf_hash_destroy(p); 61 tcf_hash_destroy(&m->common, &mirred_hash_info);
69 return 1; 62 return 1;
70 } 63 }
71 } 64 }
72 return 0; 65 return 0;
73} 66}
74 67
75static int 68static int tcf_mirred_init(struct rtattr *rta, struct rtattr *est,
76tcf_mirred_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, 69 struct tc_action *a, int ovr, int bind)
77 int ovr, int bind)
78{ 70{
79 struct rtattr *tb[TCA_MIRRED_MAX]; 71 struct rtattr *tb[TCA_MIRRED_MAX];
80 struct tc_mirred *parm; 72 struct tc_mirred *parm;
81 struct tcf_mirred *p; 73 struct tcf_mirred *m;
74 struct tcf_common *pc;
82 struct net_device *dev = NULL; 75 struct net_device *dev = NULL;
83 int ret = 0; 76 int ret = 0;
84 int ok_push = 0; 77 int ok_push = 0;
@@ -110,64 +103,62 @@ tcf_mirred_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
110 } 103 }
111 } 104 }
112 105
113 p = tcf_hash_check(parm->index, a, ovr, bind); 106 pc = tcf_hash_check(parm->index, a, bind, &mirred_hash_info);
114 if (p == NULL) { 107 if (!pc) {
115 if (!parm->ifindex) 108 if (!parm->ifindex)
116 return -EINVAL; 109 return -EINVAL;
117 p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind); 110 pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind,
118 if (p == NULL) 111 &mirred_idx_gen, &mirred_hash_info);
112 if (unlikely(!pc))
119 return -ENOMEM; 113 return -ENOMEM;
120 ret = ACT_P_CREATED; 114 ret = ACT_P_CREATED;
121 } else { 115 } else {
122 if (!ovr) { 116 if (!ovr) {
123 tcf_mirred_release(p, bind); 117 tcf_mirred_release(to_mirred(pc), bind);
124 return -EEXIST; 118 return -EEXIST;
125 } 119 }
126 } 120 }
121 m = to_mirred(pc);
127 122
128 spin_lock_bh(&p->lock); 123 spin_lock_bh(&m->tcf_lock);
129 p->action = parm->action; 124 m->tcf_action = parm->action;
130 p->eaction = parm->eaction; 125 m->tcfm_eaction = parm->eaction;
131 if (parm->ifindex) { 126 if (parm->ifindex) {
132 p->ifindex = parm->ifindex; 127 m->tcfm_ifindex = parm->ifindex;
133 if (ret != ACT_P_CREATED) 128 if (ret != ACT_P_CREATED)
134 dev_put(p->dev); 129 dev_put(m->tcfm_dev);
135 p->dev = dev; 130 m->tcfm_dev = dev;
136 dev_hold(dev); 131 dev_hold(dev);
137 p->ok_push = ok_push; 132 m->tcfm_ok_push = ok_push;
138 } 133 }
139 spin_unlock_bh(&p->lock); 134 spin_unlock_bh(&m->tcf_lock);
140 if (ret == ACT_P_CREATED) 135 if (ret == ACT_P_CREATED)
141 tcf_hash_insert(p); 136 tcf_hash_insert(pc, &mirred_hash_info);
142 137
143 DPRINTK("tcf_mirred_init index %d action %d eaction %d device %s "
144 "ifindex %d\n", parm->index, parm->action, parm->eaction,
145 dev->name, parm->ifindex);
146 return ret; 138 return ret;
147} 139}
148 140
149static int 141static int tcf_mirred_cleanup(struct tc_action *a, int bind)
150tcf_mirred_cleanup(struct tc_action *a, int bind)
151{ 142{
152 struct tcf_mirred *p = PRIV(a, mirred); 143 struct tcf_mirred *m = a->priv;
153 144
154 if (p != NULL) 145 if (m)
155 return tcf_mirred_release(p, bind); 146 return tcf_mirred_release(m, bind);
156 return 0; 147 return 0;
157} 148}
158 149
159static int 150static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
160tcf_mirred(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) 151 struct tcf_result *res)
161{ 152{
162 struct tcf_mirred *p = PRIV(a, mirred); 153 struct tcf_mirred *m = a->priv;
163 struct net_device *dev; 154 struct net_device *dev;
164 struct sk_buff *skb2 = NULL; 155 struct sk_buff *skb2 = NULL;
165 u32 at = G_TC_AT(skb->tc_verd); 156 u32 at = G_TC_AT(skb->tc_verd);
166 157
167 spin_lock(&p->lock); 158 spin_lock(&m->tcf_lock);
168 159
169 dev = p->dev; 160 dev = m->tcfm_dev;
170 p->tm.lastuse = jiffies; 161 m->tcf_tm.lastuse = jiffies;
171 162
172 if (!(dev->flags&IFF_UP) ) { 163 if (!(dev->flags&IFF_UP) ) {
173 if (net_ratelimit()) 164 if (net_ratelimit())
@@ -176,10 +167,10 @@ tcf_mirred(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
176bad_mirred: 167bad_mirred:
177 if (skb2 != NULL) 168 if (skb2 != NULL)
178 kfree_skb(skb2); 169 kfree_skb(skb2);
179 p->qstats.overlimits++; 170 m->tcf_qstats.overlimits++;
180 p->bstats.bytes += skb->len; 171 m->tcf_bstats.bytes += skb->len;
181 p->bstats.packets++; 172 m->tcf_bstats.packets++;
182 spin_unlock(&p->lock); 173 spin_unlock(&m->tcf_lock);
183 /* should we be asking for packet to be dropped? 174 /* should we be asking for packet to be dropped?
184 * may make sense for redirect case only 175 * may make sense for redirect case only
185 */ 176 */
@@ -189,59 +180,59 @@ bad_mirred:
189 skb2 = skb_clone(skb, GFP_ATOMIC); 180 skb2 = skb_clone(skb, GFP_ATOMIC);
190 if (skb2 == NULL) 181 if (skb2 == NULL)
191 goto bad_mirred; 182 goto bad_mirred;
192 if (p->eaction != TCA_EGRESS_MIRROR && p->eaction != TCA_EGRESS_REDIR) { 183 if (m->tcfm_eaction != TCA_EGRESS_MIRROR &&
184 m->tcfm_eaction != TCA_EGRESS_REDIR) {
193 if (net_ratelimit()) 185 if (net_ratelimit())
194 printk("tcf_mirred unknown action %d\n", p->eaction); 186 printk("tcf_mirred unknown action %d\n",
187 m->tcfm_eaction);
195 goto bad_mirred; 188 goto bad_mirred;
196 } 189 }
197 190
198 p->bstats.bytes += skb2->len; 191 m->tcf_bstats.bytes += skb2->len;
199 p->bstats.packets++; 192 m->tcf_bstats.packets++;
200 if (!(at & AT_EGRESS)) 193 if (!(at & AT_EGRESS))
201 if (p->ok_push) 194 if (m->tcfm_ok_push)
202 skb_push(skb2, skb2->dev->hard_header_len); 195 skb_push(skb2, skb2->dev->hard_header_len);
203 196
204 /* mirror is always swallowed */ 197 /* mirror is always swallowed */
205 if (p->eaction != TCA_EGRESS_MIRROR) 198 if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
206 skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at); 199 skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at);
207 200
208 skb2->dev = dev; 201 skb2->dev = dev;
209 skb2->input_dev = skb->dev; 202 skb2->input_dev = skb->dev;
210 dev_queue_xmit(skb2); 203 dev_queue_xmit(skb2);
211 spin_unlock(&p->lock); 204 spin_unlock(&m->tcf_lock);
212 return p->action; 205 return m->tcf_action;
213} 206}
214 207
215static int 208static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
216tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
217{ 209{
218 unsigned char *b = skb->tail; 210 unsigned char *b = skb->tail;
211 struct tcf_mirred *m = a->priv;
219 struct tc_mirred opt; 212 struct tc_mirred opt;
220 struct tcf_mirred *p = PRIV(a, mirred);
221 struct tcf_t t; 213 struct tcf_t t;
222 214
223 opt.index = p->index; 215 opt.index = m->tcf_index;
224 opt.action = p->action; 216 opt.action = m->tcf_action;
225 opt.refcnt = p->refcnt - ref; 217 opt.refcnt = m->tcf_refcnt - ref;
226 opt.bindcnt = p->bindcnt - bind; 218 opt.bindcnt = m->tcf_bindcnt - bind;
227 opt.eaction = p->eaction; 219 opt.eaction = m->tcfm_eaction;
228 opt.ifindex = p->ifindex; 220 opt.ifindex = m->tcfm_ifindex;
229 DPRINTK("tcf_mirred_dump index %d action %d eaction %d ifindex %d\n",
230 p->index, p->action, p->eaction, p->ifindex);
231 RTA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt); 221 RTA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt);
232 t.install = jiffies_to_clock_t(jiffies - p->tm.install); 222 t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install);
233 t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); 223 t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse);
234 t.expires = jiffies_to_clock_t(p->tm.expires); 224 t.expires = jiffies_to_clock_t(m->tcf_tm.expires);
235 RTA_PUT(skb, TCA_MIRRED_TM, sizeof(t), &t); 225 RTA_PUT(skb, TCA_MIRRED_TM, sizeof(t), &t);
236 return skb->len; 226 return skb->len;
237 227
238 rtattr_failure: 228rtattr_failure:
239 skb_trim(skb, b - skb->data); 229 skb_trim(skb, b - skb->data);
240 return -1; 230 return -1;
241} 231}
242 232
243static struct tc_action_ops act_mirred_ops = { 233static struct tc_action_ops act_mirred_ops = {
244 .kind = "mirred", 234 .kind = "mirred",
235 .hinfo = &mirred_hash_info,
245 .type = TCA_ACT_MIRRED, 236 .type = TCA_ACT_MIRRED,
246 .capab = TCA_CAP_NONE, 237 .capab = TCA_CAP_NONE,
247 .owner = THIS_MODULE, 238 .owner = THIS_MODULE,
@@ -257,15 +248,13 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002)");
257MODULE_DESCRIPTION("Device Mirror/redirect actions"); 248MODULE_DESCRIPTION("Device Mirror/redirect actions");
258MODULE_LICENSE("GPL"); 249MODULE_LICENSE("GPL");
259 250
260static int __init 251static int __init mirred_init_module(void)
261mirred_init_module(void)
262{ 252{
263 printk("Mirror/redirect action on\n"); 253 printk("Mirror/redirect action on\n");
264 return tcf_register_action(&act_mirred_ops); 254 return tcf_register_action(&act_mirred_ops);
265} 255}
266 256
267static void __exit 257static void __exit mirred_cleanup_module(void)
268mirred_cleanup_module(void)
269{ 258{
270 tcf_unregister_action(&act_mirred_ops); 259 tcf_unregister_action(&act_mirred_ops);
271} 260}
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index f257475e0e0c..8ac65c219b98 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -33,32 +33,25 @@
33#include <linux/tc_act/tc_pedit.h> 33#include <linux/tc_act/tc_pedit.h>
34#include <net/tc_act/tc_pedit.h> 34#include <net/tc_act/tc_pedit.h>
35 35
36 36#define PEDIT_TAB_MASK 15
37#define PEDIT_DEB 1 37static struct tcf_common *tcf_pedit_ht[PEDIT_TAB_MASK + 1];
38 38static u32 pedit_idx_gen;
39/* use generic hash table */
40#define MY_TAB_SIZE 16
41#define MY_TAB_MASK 15
42static u32 idx_gen;
43static struct tcf_pedit *tcf_pedit_ht[MY_TAB_SIZE];
44static DEFINE_RWLOCK(pedit_lock); 39static DEFINE_RWLOCK(pedit_lock);
45 40
46#define tcf_st tcf_pedit 41static struct tcf_hashinfo pedit_hash_info = {
47#define tc_st tc_pedit 42 .htab = tcf_pedit_ht,
48#define tcf_t_lock pedit_lock 43 .hmask = PEDIT_TAB_MASK,
49#define tcf_ht tcf_pedit_ht 44 .lock = &pedit_lock,
50 45};
51#define CONFIG_NET_ACT_INIT 1
52#include <net/pkt_act.h>
53 46
54static int 47static int tcf_pedit_init(struct rtattr *rta, struct rtattr *est,
55tcf_pedit_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, 48 struct tc_action *a, int ovr, int bind)
56 int ovr, int bind)
57{ 49{
58 struct rtattr *tb[TCA_PEDIT_MAX]; 50 struct rtattr *tb[TCA_PEDIT_MAX];
59 struct tc_pedit *parm; 51 struct tc_pedit *parm;
60 int ret = 0; 52 int ret = 0;
61 struct tcf_pedit *p; 53 struct tcf_pedit *p;
54 struct tcf_common *pc;
62 struct tc_pedit_key *keys = NULL; 55 struct tc_pedit_key *keys = NULL;
63 int ksize; 56 int ksize;
64 57
@@ -73,54 +66,56 @@ tcf_pedit_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
73 if (RTA_PAYLOAD(tb[TCA_PEDIT_PARMS-1]) < sizeof(*parm) + ksize) 66 if (RTA_PAYLOAD(tb[TCA_PEDIT_PARMS-1]) < sizeof(*parm) + ksize)
74 return -EINVAL; 67 return -EINVAL;
75 68
76 p = tcf_hash_check(parm->index, a, ovr, bind); 69 pc = tcf_hash_check(parm->index, a, bind, &pedit_hash_info);
77 if (p == NULL) { 70 if (!pc) {
78 if (!parm->nkeys) 71 if (!parm->nkeys)
79 return -EINVAL; 72 return -EINVAL;
80 p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind); 73 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
81 if (p == NULL) 74 &pedit_idx_gen, &pedit_hash_info);
75 if (unlikely(!pc))
82 return -ENOMEM; 76 return -ENOMEM;
77 p = to_pedit(pc);
83 keys = kmalloc(ksize, GFP_KERNEL); 78 keys = kmalloc(ksize, GFP_KERNEL);
84 if (keys == NULL) { 79 if (keys == NULL) {
85 kfree(p); 80 kfree(pc);
86 return -ENOMEM; 81 return -ENOMEM;
87 } 82 }
88 ret = ACT_P_CREATED; 83 ret = ACT_P_CREATED;
89 } else { 84 } else {
85 p = to_pedit(pc);
90 if (!ovr) { 86 if (!ovr) {
91 tcf_hash_release(p, bind); 87 tcf_hash_release(pc, bind, &pedit_hash_info);
92 return -EEXIST; 88 return -EEXIST;
93 } 89 }
94 if (p->nkeys && p->nkeys != parm->nkeys) { 90 if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) {
95 keys = kmalloc(ksize, GFP_KERNEL); 91 keys = kmalloc(ksize, GFP_KERNEL);
96 if (keys == NULL) 92 if (keys == NULL)
97 return -ENOMEM; 93 return -ENOMEM;
98 } 94 }
99 } 95 }
100 96
101 spin_lock_bh(&p->lock); 97 spin_lock_bh(&p->tcf_lock);
102 p->flags = parm->flags; 98 p->tcfp_flags = parm->flags;
103 p->action = parm->action; 99 p->tcf_action = parm->action;
104 if (keys) { 100 if (keys) {
105 kfree(p->keys); 101 kfree(p->tcfp_keys);
106 p->keys = keys; 102 p->tcfp_keys = keys;
107 p->nkeys = parm->nkeys; 103 p->tcfp_nkeys = parm->nkeys;
108 } 104 }
109 memcpy(p->keys, parm->keys, ksize); 105 memcpy(p->tcfp_keys, parm->keys, ksize);
110 spin_unlock_bh(&p->lock); 106 spin_unlock_bh(&p->tcf_lock);
111 if (ret == ACT_P_CREATED) 107 if (ret == ACT_P_CREATED)
112 tcf_hash_insert(p); 108 tcf_hash_insert(pc, &pedit_hash_info);
113 return ret; 109 return ret;
114} 110}
115 111
116static int 112static int tcf_pedit_cleanup(struct tc_action *a, int bind)
117tcf_pedit_cleanup(struct tc_action *a, int bind)
118{ 113{
119 struct tcf_pedit *p = PRIV(a, pedit); 114 struct tcf_pedit *p = a->priv;
120 115
121 if (p != NULL) { 116 if (p) {
122 struct tc_pedit_key *keys = p->keys; 117 struct tc_pedit_key *keys = p->tcfp_keys;
123 if (tcf_hash_release(p, bind)) { 118 if (tcf_hash_release(&p->common, bind, &pedit_hash_info)) {
124 kfree(keys); 119 kfree(keys);
125 return 1; 120 return 1;
126 } 121 }
@@ -128,30 +123,30 @@ tcf_pedit_cleanup(struct tc_action *a, int bind)
128 return 0; 123 return 0;
129} 124}
130 125
131static int 126static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
132tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) 127 struct tcf_result *res)
133{ 128{
134 struct tcf_pedit *p = PRIV(a, pedit); 129 struct tcf_pedit *p = a->priv;
135 int i, munged = 0; 130 int i, munged = 0;
136 u8 *pptr; 131 u8 *pptr;
137 132
138 if (!(skb->tc_verd & TC_OK2MUNGE)) { 133 if (!(skb->tc_verd & TC_OK2MUNGE)) {
139 /* should we set skb->cloned? */ 134 /* should we set skb->cloned? */
140 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { 135 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
141 return p->action; 136 return p->tcf_action;
142 } 137 }
143 } 138 }
144 139
145 pptr = skb->nh.raw; 140 pptr = skb->nh.raw;
146 141
147 spin_lock(&p->lock); 142 spin_lock(&p->tcf_lock);
148 143
149 p->tm.lastuse = jiffies; 144 p->tcf_tm.lastuse = jiffies;
150 145
151 if (p->nkeys > 0) { 146 if (p->tcfp_nkeys > 0) {
152 struct tc_pedit_key *tkey = p->keys; 147 struct tc_pedit_key *tkey = p->tcfp_keys;
153 148
154 for (i = p->nkeys; i > 0; i--, tkey++) { 149 for (i = p->tcfp_nkeys; i > 0; i--, tkey++) {
155 u32 *ptr; 150 u32 *ptr;
156 int offset = tkey->off; 151 int offset = tkey->off;
157 152
@@ -169,7 +164,8 @@ tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
169 printk("offset must be on 32 bit boundaries\n"); 164 printk("offset must be on 32 bit boundaries\n");
170 goto bad; 165 goto bad;
171 } 166 }
172 if (skb->len < 0 || (offset > 0 && offset > skb->len)) { 167 if (skb->len < 0 ||
168 (offset > 0 && offset > skb->len)) {
173 printk("offset %d cant exceed pkt length %d\n", 169 printk("offset %d cant exceed pkt length %d\n",
174 offset, skb->len); 170 offset, skb->len);
175 goto bad; 171 goto bad;
@@ -185,63 +181,47 @@ tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
185 skb->tc_verd = SET_TC_MUNGED(skb->tc_verd); 181 skb->tc_verd = SET_TC_MUNGED(skb->tc_verd);
186 goto done; 182 goto done;
187 } else { 183 } else {
188 printk("pedit BUG: index %d\n",p->index); 184 printk("pedit BUG: index %d\n", p->tcf_index);
189 } 185 }
190 186
191bad: 187bad:
192 p->qstats.overlimits++; 188 p->tcf_qstats.overlimits++;
193done: 189done:
194 p->bstats.bytes += skb->len; 190 p->tcf_bstats.bytes += skb->len;
195 p->bstats.packets++; 191 p->tcf_bstats.packets++;
196 spin_unlock(&p->lock); 192 spin_unlock(&p->tcf_lock);
197 return p->action; 193 return p->tcf_action;
198} 194}
199 195
200static int 196static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
201tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,int bind, int ref) 197 int bind, int ref)
202{ 198{
203 unsigned char *b = skb->tail; 199 unsigned char *b = skb->tail;
200 struct tcf_pedit *p = a->priv;
204 struct tc_pedit *opt; 201 struct tc_pedit *opt;
205 struct tcf_pedit *p = PRIV(a, pedit);
206 struct tcf_t t; 202 struct tcf_t t;
207 int s; 203 int s;
208 204
209 s = sizeof(*opt) + p->nkeys * sizeof(struct tc_pedit_key); 205 s = sizeof(*opt) + p->tcfp_nkeys * sizeof(struct tc_pedit_key);
210 206
211 /* netlink spinlocks held above us - must use ATOMIC */ 207 /* netlink spinlocks held above us - must use ATOMIC */
212 opt = kzalloc(s, GFP_ATOMIC); 208 opt = kzalloc(s, GFP_ATOMIC);
213 if (opt == NULL) 209 if (unlikely(!opt))
214 return -ENOBUFS; 210 return -ENOBUFS;
215 211
216 memcpy(opt->keys, p->keys, p->nkeys * sizeof(struct tc_pedit_key)); 212 memcpy(opt->keys, p->tcfp_keys,
217 opt->index = p->index; 213 p->tcfp_nkeys * sizeof(struct tc_pedit_key));
218 opt->nkeys = p->nkeys; 214 opt->index = p->tcf_index;
219 opt->flags = p->flags; 215 opt->nkeys = p->tcfp_nkeys;
220 opt->action = p->action; 216 opt->flags = p->tcfp_flags;
221 opt->refcnt = p->refcnt - ref; 217 opt->action = p->tcf_action;
222 opt->bindcnt = p->bindcnt - bind; 218 opt->refcnt = p->tcf_refcnt - ref;
223 219 opt->bindcnt = p->tcf_bindcnt - bind;
224
225#ifdef PEDIT_DEB
226 {
227 /* Debug - get rid of later */
228 int i;
229 struct tc_pedit_key *key = opt->keys;
230
231 for (i=0; i<opt->nkeys; i++, key++) {
232 printk( "\n key #%d",i);
233 printk( " at %d: val %08x mask %08x",
234 (unsigned int)key->off,
235 (unsigned int)key->val,
236 (unsigned int)key->mask);
237 }
238 }
239#endif
240 220
241 RTA_PUT(skb, TCA_PEDIT_PARMS, s, opt); 221 RTA_PUT(skb, TCA_PEDIT_PARMS, s, opt);
242 t.install = jiffies_to_clock_t(jiffies - p->tm.install); 222 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
243 t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); 223 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
244 t.expires = jiffies_to_clock_t(p->tm.expires); 224 t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
245 RTA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t); 225 RTA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t);
246 kfree(opt); 226 kfree(opt);
247 return skb->len; 227 return skb->len;
@@ -252,9 +232,9 @@ rtattr_failure:
252 return -1; 232 return -1;
253} 233}
254 234
255static 235static struct tc_action_ops act_pedit_ops = {
256struct tc_action_ops act_pedit_ops = {
257 .kind = "pedit", 236 .kind = "pedit",
237 .hinfo = &pedit_hash_info,
258 .type = TCA_ACT_PEDIT, 238 .type = TCA_ACT_PEDIT,
259 .capab = TCA_CAP_NONE, 239 .capab = TCA_CAP_NONE,
260 .owner = THIS_MODULE, 240 .owner = THIS_MODULE,
@@ -270,14 +250,12 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
270MODULE_DESCRIPTION("Generic Packet Editor actions"); 250MODULE_DESCRIPTION("Generic Packet Editor actions");
271MODULE_LICENSE("GPL"); 251MODULE_LICENSE("GPL");
272 252
273static int __init 253static int __init pedit_init_module(void)
274pedit_init_module(void)
275{ 254{
276 return tcf_register_action(&act_pedit_ops); 255 return tcf_register_action(&act_pedit_ops);
277} 256}
278 257
279static void __exit 258static void __exit pedit_cleanup_module(void)
280pedit_cleanup_module(void)
281{ 259{
282 tcf_unregister_action(&act_pedit_ops); 260 tcf_unregister_action(&act_pedit_ops);
283} 261}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index da905d7b4b40..fed47b658837 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -32,43 +32,27 @@
32#include <net/sock.h> 32#include <net/sock.h>
33#include <net/act_api.h> 33#include <net/act_api.h>
34 34
35#define L2T(p,L) ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log]) 35#define L2T(p,L) ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log])
36#define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log]) 36#define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log])
37#define PRIV(a) ((struct tcf_police *) (a)->priv)
38
39/* use generic hash table */
40#define MY_TAB_SIZE 16
41#define MY_TAB_MASK 15
42static u32 idx_gen;
43static struct tcf_police *tcf_police_ht[MY_TAB_SIZE];
44/* Policer hash table lock */
45static DEFINE_RWLOCK(police_lock);
46
47/* Each policer is serialized by its individual spinlock */
48 37
49static __inline__ unsigned tcf_police_hash(u32 index) 38#define POL_TAB_MASK 15
50{ 39static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
51 return index&0xF; 40static u32 police_idx_gen;
52} 41static DEFINE_RWLOCK(police_lock);
53 42
54static __inline__ struct tcf_police * tcf_police_lookup(u32 index) 43static struct tcf_hashinfo police_hash_info = {
55{ 44 .htab = tcf_police_ht,
56 struct tcf_police *p; 45 .hmask = POL_TAB_MASK,
46 .lock = &police_lock,
47};
57 48
58 read_lock(&police_lock); 49/* Each policer is serialized by its individual spinlock */
59 for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) {
60 if (p->index == index)
61 break;
62 }
63 read_unlock(&police_lock);
64 return p;
65}
66 50
67#ifdef CONFIG_NET_CLS_ACT 51#ifdef CONFIG_NET_CLS_ACT
68static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb, 52static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb,
69 int type, struct tc_action *a) 53 int type, struct tc_action *a)
70{ 54{
71 struct tcf_police *p; 55 struct tcf_common *p;
72 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; 56 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
73 struct rtattr *r; 57 struct rtattr *r;
74 58
@@ -76,10 +60,10 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
76 60
77 s_i = cb->args[0]; 61 s_i = cb->args[0];
78 62
79 for (i = 0; i < MY_TAB_SIZE; i++) { 63 for (i = 0; i < (POL_TAB_MASK + 1); i++) {
80 p = tcf_police_ht[tcf_police_hash(i)]; 64 p = tcf_police_ht[tcf_hash(i, POL_TAB_MASK)];
81 65
82 for (; p; p = p->next) { 66 for (; p; p = p->tcfc_next) {
83 index++; 67 index++;
84 if (index < s_i) 68 if (index < s_i)
85 continue; 69 continue;
@@ -110,48 +94,26 @@ rtattr_failure:
110 skb_trim(skb, (u8*)r - skb->data); 94 skb_trim(skb, (u8*)r - skb->data);
111 goto done; 95 goto done;
112} 96}
113
114static inline int
115tcf_act_police_hash_search(struct tc_action *a, u32 index)
116{
117 struct tcf_police *p = tcf_police_lookup(index);
118
119 if (p != NULL) {
120 a->priv = p;
121 return 1;
122 } else {
123 return 0;
124 }
125}
126#endif 97#endif
127 98
128static inline u32 tcf_police_new_index(void)
129{
130 do {
131 if (++idx_gen == 0)
132 idx_gen = 1;
133 } while (tcf_police_lookup(idx_gen));
134
135 return idx_gen;
136}
137
138void tcf_police_destroy(struct tcf_police *p) 99void tcf_police_destroy(struct tcf_police *p)
139{ 100{
140 unsigned h = tcf_police_hash(p->index); 101 unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
141 struct tcf_police **p1p; 102 struct tcf_common **p1p;
142 103
143 for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) { 104 for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->tcfc_next) {
144 if (*p1p == p) { 105 if (*p1p == &p->common) {
145 write_lock_bh(&police_lock); 106 write_lock_bh(&police_lock);
146 *p1p = p->next; 107 *p1p = p->tcf_next;
147 write_unlock_bh(&police_lock); 108 write_unlock_bh(&police_lock);
148#ifdef CONFIG_NET_ESTIMATOR 109#ifdef CONFIG_NET_ESTIMATOR
149 gen_kill_estimator(&p->bstats, &p->rate_est); 110 gen_kill_estimator(&p->tcf_bstats,
111 &p->tcf_rate_est);
150#endif 112#endif
151 if (p->R_tab) 113 if (p->tcfp_R_tab)
152 qdisc_put_rtab(p->R_tab); 114 qdisc_put_rtab(p->tcfp_R_tab);
153 if (p->P_tab) 115 if (p->tcfp_P_tab)
154 qdisc_put_rtab(p->P_tab); 116 qdisc_put_rtab(p->tcfp_P_tab);
155 kfree(p); 117 kfree(p);
156 return; 118 return;
157 } 119 }
@@ -167,7 +129,7 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
167 int ret = 0, err; 129 int ret = 0, err;
168 struct rtattr *tb[TCA_POLICE_MAX]; 130 struct rtattr *tb[TCA_POLICE_MAX];
169 struct tc_police *parm; 131 struct tc_police *parm;
170 struct tcf_police *p; 132 struct tcf_police *police;
171 struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; 133 struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
172 134
173 if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) 135 if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
@@ -185,27 +147,32 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
185 RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) 147 RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
186 return -EINVAL; 148 return -EINVAL;
187 149
188 if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) { 150 if (parm->index) {
189 a->priv = p; 151 struct tcf_common *pc;
190 if (bind) { 152
191 p->bindcnt += 1; 153 pc = tcf_hash_lookup(parm->index, &police_hash_info);
192 p->refcnt += 1; 154 if (pc != NULL) {
155 a->priv = pc;
156 police = to_police(pc);
157 if (bind) {
158 police->tcf_bindcnt += 1;
159 police->tcf_refcnt += 1;
160 }
161 if (ovr)
162 goto override;
163 return ret;
193 } 164 }
194 if (ovr)
195 goto override;
196 return ret;
197 } 165 }
198 166
199 p = kzalloc(sizeof(*p), GFP_KERNEL); 167 police = kzalloc(sizeof(*police), GFP_KERNEL);
200 if (p == NULL) 168 if (police == NULL)
201 return -ENOMEM; 169 return -ENOMEM;
202
203 ret = ACT_P_CREATED; 170 ret = ACT_P_CREATED;
204 p->refcnt = 1; 171 police->tcf_refcnt = 1;
205 spin_lock_init(&p->lock); 172 spin_lock_init(&police->tcf_lock);
206 p->stats_lock = &p->lock; 173 police->tcf_stats_lock = &police->tcf_lock;
207 if (bind) 174 if (bind)
208 p->bindcnt = 1; 175 police->tcf_bindcnt = 1;
209override: 176override:
210 if (parm->rate.rate) { 177 if (parm->rate.rate) {
211 err = -ENOMEM; 178 err = -ENOMEM;
@@ -215,67 +182,71 @@ override:
215 if (parm->peakrate.rate) { 182 if (parm->peakrate.rate) {
216 P_tab = qdisc_get_rtab(&parm->peakrate, 183 P_tab = qdisc_get_rtab(&parm->peakrate,
217 tb[TCA_POLICE_PEAKRATE-1]); 184 tb[TCA_POLICE_PEAKRATE-1]);
218 if (p->P_tab == NULL) { 185 if (P_tab == NULL) {
219 qdisc_put_rtab(R_tab); 186 qdisc_put_rtab(R_tab);
220 goto failure; 187 goto failure;
221 } 188 }
222 } 189 }
223 } 190 }
224 /* No failure allowed after this point */ 191 /* No failure allowed after this point */
225 spin_lock_bh(&p->lock); 192 spin_lock_bh(&police->tcf_lock);
226 if (R_tab != NULL) { 193 if (R_tab != NULL) {
227 qdisc_put_rtab(p->R_tab); 194 qdisc_put_rtab(police->tcfp_R_tab);
228 p->R_tab = R_tab; 195 police->tcfp_R_tab = R_tab;
229 } 196 }
230 if (P_tab != NULL) { 197 if (P_tab != NULL) {
231 qdisc_put_rtab(p->P_tab); 198 qdisc_put_rtab(police->tcfp_P_tab);
232 p->P_tab = P_tab; 199 police->tcfp_P_tab = P_tab;
233 } 200 }
234 201
235 if (tb[TCA_POLICE_RESULT-1]) 202 if (tb[TCA_POLICE_RESULT-1])
236 p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); 203 police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
237 p->toks = p->burst = parm->burst; 204 police->tcfp_toks = police->tcfp_burst = parm->burst;
238 p->mtu = parm->mtu; 205 police->tcfp_mtu = parm->mtu;
239 if (p->mtu == 0) { 206 if (police->tcfp_mtu == 0) {
240 p->mtu = ~0; 207 police->tcfp_mtu = ~0;
241 if (p->R_tab) 208 if (police->tcfp_R_tab)
242 p->mtu = 255<<p->R_tab->rate.cell_log; 209 police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log;
243 } 210 }
244 if (p->P_tab) 211 if (police->tcfp_P_tab)
245 p->ptoks = L2T_P(p, p->mtu); 212 police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
246 p->action = parm->action; 213 police->tcf_action = parm->action;
247 214
248#ifdef CONFIG_NET_ESTIMATOR 215#ifdef CONFIG_NET_ESTIMATOR
249 if (tb[TCA_POLICE_AVRATE-1]) 216 if (tb[TCA_POLICE_AVRATE-1])
250 p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); 217 police->tcfp_ewma_rate =
218 *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
251 if (est) 219 if (est)
252 gen_replace_estimator(&p->bstats, &p->rate_est, p->stats_lock, est); 220 gen_replace_estimator(&police->tcf_bstats,
221 &police->tcf_rate_est,
222 police->tcf_stats_lock, est);
253#endif 223#endif
254 224
255 spin_unlock_bh(&p->lock); 225 spin_unlock_bh(&police->tcf_lock);
256 if (ret != ACT_P_CREATED) 226 if (ret != ACT_P_CREATED)
257 return ret; 227 return ret;
258 228
259 PSCHED_GET_TIME(p->t_c); 229 PSCHED_GET_TIME(police->tcfp_t_c);
260 p->index = parm->index ? : tcf_police_new_index(); 230 police->tcf_index = parm->index ? parm->index :
261 h = tcf_police_hash(p->index); 231 tcf_hash_new_index(&police_idx_gen, &police_hash_info);
232 h = tcf_hash(police->tcf_index, POL_TAB_MASK);
262 write_lock_bh(&police_lock); 233 write_lock_bh(&police_lock);
263 p->next = tcf_police_ht[h]; 234 police->tcf_next = tcf_police_ht[h];
264 tcf_police_ht[h] = p; 235 tcf_police_ht[h] = &police->common;
265 write_unlock_bh(&police_lock); 236 write_unlock_bh(&police_lock);
266 237
267 a->priv = p; 238 a->priv = police;
268 return ret; 239 return ret;
269 240
270failure: 241failure:
271 if (ret == ACT_P_CREATED) 242 if (ret == ACT_P_CREATED)
272 kfree(p); 243 kfree(police);
273 return err; 244 return err;
274} 245}
275 246
276static int tcf_act_police_cleanup(struct tc_action *a, int bind) 247static int tcf_act_police_cleanup(struct tc_action *a, int bind)
277{ 248{
278 struct tcf_police *p = PRIV(a); 249 struct tcf_police *p = a->priv;
279 250
280 if (p != NULL) 251 if (p != NULL)
281 return tcf_police_release(p, bind); 252 return tcf_police_release(p, bind);
@@ -285,86 +256,87 @@ static int tcf_act_police_cleanup(struct tc_action *a, int bind)
285static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, 256static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
286 struct tcf_result *res) 257 struct tcf_result *res)
287{ 258{
259 struct tcf_police *police = a->priv;
288 psched_time_t now; 260 psched_time_t now;
289 struct tcf_police *p = PRIV(a);
290 long toks; 261 long toks;
291 long ptoks = 0; 262 long ptoks = 0;
292 263
293 spin_lock(&p->lock); 264 spin_lock(&police->tcf_lock);
294 265
295 p->bstats.bytes += skb->len; 266 police->tcf_bstats.bytes += skb->len;
296 p->bstats.packets++; 267 police->tcf_bstats.packets++;
297 268
298#ifdef CONFIG_NET_ESTIMATOR 269#ifdef CONFIG_NET_ESTIMATOR
299 if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) { 270 if (police->tcfp_ewma_rate &&
300 p->qstats.overlimits++; 271 police->tcf_rate_est.bps >= police->tcfp_ewma_rate) {
301 spin_unlock(&p->lock); 272 police->tcf_qstats.overlimits++;
302 return p->action; 273 spin_unlock(&police->tcf_lock);
274 return police->tcf_action;
303 } 275 }
304#endif 276#endif
305 277
306 if (skb->len <= p->mtu) { 278 if (skb->len <= police->tcfp_mtu) {
307 if (p->R_tab == NULL) { 279 if (police->tcfp_R_tab == NULL) {
308 spin_unlock(&p->lock); 280 spin_unlock(&police->tcf_lock);
309 return p->result; 281 return police->tcfp_result;
310 } 282 }
311 283
312 PSCHED_GET_TIME(now); 284 PSCHED_GET_TIME(now);
313 285
314 toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst); 286 toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c,
315 287 police->tcfp_burst);
316 if (p->P_tab) { 288 if (police->tcfp_P_tab) {
317 ptoks = toks + p->ptoks; 289 ptoks = toks + police->tcfp_ptoks;
318 if (ptoks > (long)L2T_P(p, p->mtu)) 290 if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
319 ptoks = (long)L2T_P(p, p->mtu); 291 ptoks = (long)L2T_P(police, police->tcfp_mtu);
320 ptoks -= L2T_P(p, skb->len); 292 ptoks -= L2T_P(police, skb->len);
321 } 293 }
322 toks += p->toks; 294 toks += police->tcfp_toks;
323 if (toks > (long)p->burst) 295 if (toks > (long)police->tcfp_burst)
324 toks = p->burst; 296 toks = police->tcfp_burst;
325 toks -= L2T(p, skb->len); 297 toks -= L2T(police, skb->len);
326
327 if ((toks|ptoks) >= 0) { 298 if ((toks|ptoks) >= 0) {
328 p->t_c = now; 299 police->tcfp_t_c = now;
329 p->toks = toks; 300 police->tcfp_toks = toks;
330 p->ptoks = ptoks; 301 police->tcfp_ptoks = ptoks;
331 spin_unlock(&p->lock); 302 spin_unlock(&police->tcf_lock);
332 return p->result; 303 return police->tcfp_result;
333 } 304 }
334 } 305 }
335 306
336 p->qstats.overlimits++; 307 police->tcf_qstats.overlimits++;
337 spin_unlock(&p->lock); 308 spin_unlock(&police->tcf_lock);
338 return p->action; 309 return police->tcf_action;
339} 310}
340 311
341static int 312static int
342tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) 313tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
343{ 314{
344 unsigned char *b = skb->tail; 315 unsigned char *b = skb->tail;
316 struct tcf_police *police = a->priv;
345 struct tc_police opt; 317 struct tc_police opt;
346 struct tcf_police *p = PRIV(a); 318
347 319 opt.index = police->tcf_index;
348 opt.index = p->index; 320 opt.action = police->tcf_action;
349 opt.action = p->action; 321 opt.mtu = police->tcfp_mtu;
350 opt.mtu = p->mtu; 322 opt.burst = police->tcfp_burst;
351 opt.burst = p->burst; 323 opt.refcnt = police->tcf_refcnt - ref;
352 opt.refcnt = p->refcnt - ref; 324 opt.bindcnt = police->tcf_bindcnt - bind;
353 opt.bindcnt = p->bindcnt - bind; 325 if (police->tcfp_R_tab)
354 if (p->R_tab) 326 opt.rate = police->tcfp_R_tab->rate;
355 opt.rate = p->R_tab->rate;
356 else 327 else
357 memset(&opt.rate, 0, sizeof(opt.rate)); 328 memset(&opt.rate, 0, sizeof(opt.rate));
358 if (p->P_tab) 329 if (police->tcfp_P_tab)
359 opt.peakrate = p->P_tab->rate; 330 opt.peakrate = police->tcfp_P_tab->rate;
360 else 331 else
361 memset(&opt.peakrate, 0, sizeof(opt.peakrate)); 332 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
362 RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); 333 RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
363 if (p->result) 334 if (police->tcfp_result)
364 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result); 335 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int),
336 &police->tcfp_result);
365#ifdef CONFIG_NET_ESTIMATOR 337#ifdef CONFIG_NET_ESTIMATOR
366 if (p->ewma_rate) 338 if (police->tcfp_ewma_rate)
367 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate); 339 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate);
368#endif 340#endif
369 return skb->len; 341 return skb->len;
370 342
@@ -379,13 +351,14 @@ MODULE_LICENSE("GPL");
379 351
380static struct tc_action_ops act_police_ops = { 352static struct tc_action_ops act_police_ops = {
381 .kind = "police", 353 .kind = "police",
354 .hinfo = &police_hash_info,
382 .type = TCA_ID_POLICE, 355 .type = TCA_ID_POLICE,
383 .capab = TCA_CAP_NONE, 356 .capab = TCA_CAP_NONE,
384 .owner = THIS_MODULE, 357 .owner = THIS_MODULE,
385 .act = tcf_act_police, 358 .act = tcf_act_police,
386 .dump = tcf_act_police_dump, 359 .dump = tcf_act_police_dump,
387 .cleanup = tcf_act_police_cleanup, 360 .cleanup = tcf_act_police_cleanup,
388 .lookup = tcf_act_police_hash_search, 361 .lookup = tcf_hash_search,
389 .init = tcf_act_police_locate, 362 .init = tcf_act_police_locate,
390 .walk = tcf_act_police_walker 363 .walk = tcf_act_police_walker
391}; 364};
@@ -407,10 +380,39 @@ module_exit(police_cleanup_module);
407 380
408#else /* CONFIG_NET_CLS_ACT */ 381#else /* CONFIG_NET_CLS_ACT */
409 382
410struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est) 383static struct tcf_common *tcf_police_lookup(u32 index)
411{ 384{
412 unsigned h; 385 struct tcf_hashinfo *hinfo = &police_hash_info;
413 struct tcf_police *p; 386 struct tcf_common *p;
387
388 read_lock(hinfo->lock);
389 for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p;
390 p = p->tcfc_next) {
391 if (p->tcfc_index == index)
392 break;
393 }
394 read_unlock(hinfo->lock);
395
396 return p;
397}
398
399static u32 tcf_police_new_index(void)
400{
401 u32 *idx_gen = &police_idx_gen;
402 u32 val = *idx_gen;
403
404 do {
405 if (++val == 0)
406 val = 1;
407 } while (tcf_police_lookup(val));
408
409 return (*idx_gen = val);
410}
411
412struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est)
413{
414 unsigned int h;
415 struct tcf_police *police;
414 struct rtattr *tb[TCA_POLICE_MAX]; 416 struct rtattr *tb[TCA_POLICE_MAX];
415 struct tc_police *parm; 417 struct tc_police *parm;
416 418
@@ -423,149 +425,158 @@ struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
423 425
424 parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); 426 parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
425 427
426 if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) { 428 if (parm->index) {
427 p->refcnt++; 429 struct tcf_common *pc;
428 return p;
429 }
430 430
431 p = kzalloc(sizeof(*p), GFP_KERNEL); 431 pc = tcf_police_lookup(parm->index);
432 if (p == NULL) 432 if (pc) {
433 police = to_police(pc);
434 police->tcf_refcnt++;
435 return police;
436 }
437 }
438 police = kzalloc(sizeof(*police), GFP_KERNEL);
439 if (unlikely(!police))
433 return NULL; 440 return NULL;
434 441
435 p->refcnt = 1; 442 police->tcf_refcnt = 1;
436 spin_lock_init(&p->lock); 443 spin_lock_init(&police->tcf_lock);
437 p->stats_lock = &p->lock; 444 police->tcf_stats_lock = &police->tcf_lock;
438 if (parm->rate.rate) { 445 if (parm->rate.rate) {
439 p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); 446 police->tcfp_R_tab =
440 if (p->R_tab == NULL) 447 qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
448 if (police->tcfp_R_tab == NULL)
441 goto failure; 449 goto failure;
442 if (parm->peakrate.rate) { 450 if (parm->peakrate.rate) {
443 p->P_tab = qdisc_get_rtab(&parm->peakrate, 451 police->tcfp_P_tab =
444 tb[TCA_POLICE_PEAKRATE-1]); 452 qdisc_get_rtab(&parm->peakrate,
445 if (p->P_tab == NULL) 453 tb[TCA_POLICE_PEAKRATE-1]);
454 if (police->tcfp_P_tab == NULL)
446 goto failure; 455 goto failure;
447 } 456 }
448 } 457 }
449 if (tb[TCA_POLICE_RESULT-1]) { 458 if (tb[TCA_POLICE_RESULT-1]) {
450 if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) 459 if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
451 goto failure; 460 goto failure;
452 p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); 461 police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
453 } 462 }
454#ifdef CONFIG_NET_ESTIMATOR 463#ifdef CONFIG_NET_ESTIMATOR
455 if (tb[TCA_POLICE_AVRATE-1]) { 464 if (tb[TCA_POLICE_AVRATE-1]) {
456 if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32)) 465 if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32))
457 goto failure; 466 goto failure;
458 p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); 467 police->tcfp_ewma_rate =
468 *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
459 } 469 }
460#endif 470#endif
461 p->toks = p->burst = parm->burst; 471 police->tcfp_toks = police->tcfp_burst = parm->burst;
462 p->mtu = parm->mtu; 472 police->tcfp_mtu = parm->mtu;
463 if (p->mtu == 0) { 473 if (police->tcfp_mtu == 0) {
464 p->mtu = ~0; 474 police->tcfp_mtu = ~0;
465 if (p->R_tab) 475 if (police->tcfp_R_tab)
466 p->mtu = 255<<p->R_tab->rate.cell_log; 476 police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log;
467 } 477 }
468 if (p->P_tab) 478 if (police->tcfp_P_tab)
469 p->ptoks = L2T_P(p, p->mtu); 479 police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
470 PSCHED_GET_TIME(p->t_c); 480 PSCHED_GET_TIME(police->tcfp_t_c);
471 p->index = parm->index ? : tcf_police_new_index(); 481 police->tcf_index = parm->index ? parm->index :
472 p->action = parm->action; 482 tcf_police_new_index();
483 police->tcf_action = parm->action;
473#ifdef CONFIG_NET_ESTIMATOR 484#ifdef CONFIG_NET_ESTIMATOR
474 if (est) 485 if (est)
475 gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est); 486 gen_new_estimator(&police->tcf_bstats, &police->tcf_rate_est,
487 police->tcf_stats_lock, est);
476#endif 488#endif
477 h = tcf_police_hash(p->index); 489 h = tcf_hash(police->tcf_index, POL_TAB_MASK);
478 write_lock_bh(&police_lock); 490 write_lock_bh(&police_lock);
479 p->next = tcf_police_ht[h]; 491 police->tcf_next = tcf_police_ht[h];
480 tcf_police_ht[h] = p; 492 tcf_police_ht[h] = &police->common;
481 write_unlock_bh(&police_lock); 493 write_unlock_bh(&police_lock);
482 return p; 494 return police;
483 495
484failure: 496failure:
485 if (p->R_tab) 497 if (police->tcfp_R_tab)
486 qdisc_put_rtab(p->R_tab); 498 qdisc_put_rtab(police->tcfp_R_tab);
487 kfree(p); 499 kfree(police);
488 return NULL; 500 return NULL;
489} 501}
490 502
491int tcf_police(struct sk_buff *skb, struct tcf_police *p) 503int tcf_police(struct sk_buff *skb, struct tcf_police *police)
492{ 504{
493 psched_time_t now; 505 psched_time_t now;
494 long toks; 506 long toks;
495 long ptoks = 0; 507 long ptoks = 0;
496 508
497 spin_lock(&p->lock); 509 spin_lock(&police->tcf_lock);
498 510
499 p->bstats.bytes += skb->len; 511 police->tcf_bstats.bytes += skb->len;
500 p->bstats.packets++; 512 police->tcf_bstats.packets++;
501 513
502#ifdef CONFIG_NET_ESTIMATOR 514#ifdef CONFIG_NET_ESTIMATOR
503 if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) { 515 if (police->tcfp_ewma_rate &&
504 p->qstats.overlimits++; 516 police->tcf_rate_est.bps >= police->tcfp_ewma_rate) {
505 spin_unlock(&p->lock); 517 police->tcf_qstats.overlimits++;
506 return p->action; 518 spin_unlock(&police->tcf_lock);
519 return police->tcf_action;
507 } 520 }
508#endif 521#endif
509 522 if (skb->len <= police->tcfp_mtu) {
510 if (skb->len <= p->mtu) { 523 if (police->tcfp_R_tab == NULL) {
511 if (p->R_tab == NULL) { 524 spin_unlock(&police->tcf_lock);
512 spin_unlock(&p->lock); 525 return police->tcfp_result;
513 return p->result;
514 } 526 }
515 527
516 PSCHED_GET_TIME(now); 528 PSCHED_GET_TIME(now);
517 529 toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c,
518 toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst); 530 police->tcfp_burst);
519 531 if (police->tcfp_P_tab) {
520 if (p->P_tab) { 532 ptoks = toks + police->tcfp_ptoks;
521 ptoks = toks + p->ptoks; 533 if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
522 if (ptoks > (long)L2T_P(p, p->mtu)) 534 ptoks = (long)L2T_P(police, police->tcfp_mtu);
523 ptoks = (long)L2T_P(p, p->mtu); 535 ptoks -= L2T_P(police, skb->len);
524 ptoks -= L2T_P(p, skb->len);
525 } 536 }
526 toks += p->toks; 537 toks += police->tcfp_toks;
527 if (toks > (long)p->burst) 538 if (toks > (long)police->tcfp_burst)
528 toks = p->burst; 539 toks = police->tcfp_burst;
529 toks -= L2T(p, skb->len); 540 toks -= L2T(police, skb->len);
530
531 if ((toks|ptoks) >= 0) { 541 if ((toks|ptoks) >= 0) {
532 p->t_c = now; 542 police->tcfp_t_c = now;
533 p->toks = toks; 543 police->tcfp_toks = toks;
534 p->ptoks = ptoks; 544 police->tcfp_ptoks = ptoks;
535 spin_unlock(&p->lock); 545 spin_unlock(&police->tcf_lock);
536 return p->result; 546 return police->tcfp_result;
537 } 547 }
538 } 548 }
539 549
540 p->qstats.overlimits++; 550 police->tcf_qstats.overlimits++;
541 spin_unlock(&p->lock); 551 spin_unlock(&police->tcf_lock);
542 return p->action; 552 return police->tcf_action;
543} 553}
544EXPORT_SYMBOL(tcf_police); 554EXPORT_SYMBOL(tcf_police);
545 555
546int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p) 556int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police)
547{ 557{
548 unsigned char *b = skb->tail; 558 unsigned char *b = skb->tail;
549 struct tc_police opt; 559 struct tc_police opt;
550 560
551 opt.index = p->index; 561 opt.index = police->tcf_index;
552 opt.action = p->action; 562 opt.action = police->tcf_action;
553 opt.mtu = p->mtu; 563 opt.mtu = police->tcfp_mtu;
554 opt.burst = p->burst; 564 opt.burst = police->tcfp_burst;
555 if (p->R_tab) 565 if (police->tcfp_R_tab)
556 opt.rate = p->R_tab->rate; 566 opt.rate = police->tcfp_R_tab->rate;
557 else 567 else
558 memset(&opt.rate, 0, sizeof(opt.rate)); 568 memset(&opt.rate, 0, sizeof(opt.rate));
559 if (p->P_tab) 569 if (police->tcfp_P_tab)
560 opt.peakrate = p->P_tab->rate; 570 opt.peakrate = police->tcfp_P_tab->rate;
561 else 571 else
562 memset(&opt.peakrate, 0, sizeof(opt.peakrate)); 572 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
563 RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); 573 RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
564 if (p->result) 574 if (police->tcfp_result)
565 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result); 575 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int),
576 &police->tcfp_result);
566#ifdef CONFIG_NET_ESTIMATOR 577#ifdef CONFIG_NET_ESTIMATOR
567 if (p->ewma_rate) 578 if (police->tcfp_ewma_rate)
568 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate); 579 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate);
569#endif 580#endif
570 return skb->len; 581 return skb->len;
571 582
@@ -574,19 +585,20 @@ rtattr_failure:
574 return -1; 585 return -1;
575} 586}
576 587
577int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p) 588int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *police)
578{ 589{
579 struct gnet_dump d; 590 struct gnet_dump d;
580 591
581 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, 592 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
582 TCA_XSTATS, p->stats_lock, &d) < 0) 593 TCA_XSTATS, police->tcf_stats_lock,
594 &d) < 0)
583 goto errout; 595 goto errout;
584 596
585 if (gnet_stats_copy_basic(&d, &p->bstats) < 0 || 597 if (gnet_stats_copy_basic(&d, &police->tcf_bstats) < 0 ||
586#ifdef CONFIG_NET_ESTIMATOR 598#ifdef CONFIG_NET_ESTIMATOR
587 gnet_stats_copy_rate_est(&d, &p->rate_est) < 0 || 599 gnet_stats_copy_rate_est(&d, &police->tcf_rate_est) < 0 ||
588#endif 600#endif
589 gnet_stats_copy_queue(&d, &p->qstats) < 0) 601 gnet_stats_copy_queue(&d, &police->tcf_qstats) < 0)
590 goto errout; 602 goto errout;
591 603
592 if (gnet_stats_finish_copy(&d) < 0) 604 if (gnet_stats_finish_copy(&d) < 0)
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 17105c82537f..901571a67707 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -20,54 +20,175 @@
20 20
21#define TCA_ACT_SIMP 22 21#define TCA_ACT_SIMP 22
22 22
23/* XXX: Hide all these common elements under some macro
24 * probably
25*/
26#include <linux/tc_act/tc_defact.h> 23#include <linux/tc_act/tc_defact.h>
27#include <net/tc_act/tc_defact.h> 24#include <net/tc_act/tc_defact.h>
28 25
29/* use generic hash table with 8 buckets */ 26#define SIMP_TAB_MASK 7
30#define MY_TAB_SIZE 8 27static struct tcf_common *tcf_simp_ht[SIMP_TAB_MASK + 1];
31#define MY_TAB_MASK (MY_TAB_SIZE - 1) 28static u32 simp_idx_gen;
32static u32 idx_gen;
33static struct tcf_defact *tcf_simp_ht[MY_TAB_SIZE];
34static DEFINE_RWLOCK(simp_lock); 29static DEFINE_RWLOCK(simp_lock);
35 30
36/* override the defaults */ 31static struct tcf_hashinfo simp_hash_info = {
37#define tcf_st tcf_defact 32 .htab = tcf_simp_ht,
38#define tc_st tc_defact 33 .hmask = SIMP_TAB_MASK,
39#define tcf_t_lock simp_lock 34 .lock = &simp_lock,
40#define tcf_ht tcf_simp_ht 35};
41
42#define CONFIG_NET_ACT_INIT 1
43#include <net/pkt_act.h>
44#include <net/act_generic.h>
45 36
46static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) 37static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
47{ 38{
48 struct tcf_defact *p = PRIV(a, defact); 39 struct tcf_defact *d = a->priv;
49 40
50 spin_lock(&p->lock); 41 spin_lock(&d->tcf_lock);
51 p->tm.lastuse = jiffies; 42 d->tcf_tm.lastuse = jiffies;
52 p->bstats.bytes += skb->len; 43 d->tcf_bstats.bytes += skb->len;
53 p->bstats.packets++; 44 d->tcf_bstats.packets++;
54 45
55 /* print policy string followed by _ then packet count 46 /* print policy string followed by _ then packet count
56 * Example if this was the 3rd packet and the string was "hello" 47 * Example if this was the 3rd packet and the string was "hello"
57 * then it would look like "hello_3" (without quotes) 48 * then it would look like "hello_3" (without quotes)
58 **/ 49 **/
59 printk("simple: %s_%d\n", (char *)p->defdata, p->bstats.packets); 50 printk("simple: %s_%d\n",
60 spin_unlock(&p->lock); 51 (char *)d->tcfd_defdata, d->tcf_bstats.packets);
61 return p->action; 52 spin_unlock(&d->tcf_lock);
53 return d->tcf_action;
54}
55
56static int tcf_simp_release(struct tcf_defact *d, int bind)
57{
58 int ret = 0;
59 if (d) {
60 if (bind)
61 d->tcf_bindcnt--;
62 d->tcf_refcnt--;
63 if (d->tcf_bindcnt <= 0 && d->tcf_refcnt <= 0) {
64 kfree(d->tcfd_defdata);
65 tcf_hash_destroy(&d->common, &simp_hash_info);
66 ret = 1;
67 }
68 }
69 return ret;
70}
71
72static int alloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata)
73{
74 d->tcfd_defdata = kmalloc(datalen, GFP_KERNEL);
75 if (unlikely(!d->tcfd_defdata))
76 return -ENOMEM;
77 d->tcfd_datalen = datalen;
78 memcpy(d->tcfd_defdata, defdata, datalen);
79 return 0;
80}
81
82static int realloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata)
83{
84 kfree(d->tcfd_defdata);
85 return alloc_defdata(d, datalen, defdata);
86}
87
88static int tcf_simp_init(struct rtattr *rta, struct rtattr *est,
89 struct tc_action *a, int ovr, int bind)
90{
91 struct rtattr *tb[TCA_DEF_MAX];
92 struct tc_defact *parm;
93 struct tcf_defact *d;
94 struct tcf_common *pc;
95 void *defdata;
96 u32 datalen = 0;
97 int ret = 0;
98
99 if (rta == NULL || rtattr_parse_nested(tb, TCA_DEF_MAX, rta) < 0)
100 return -EINVAL;
101
102 if (tb[TCA_DEF_PARMS - 1] == NULL ||
103 RTA_PAYLOAD(tb[TCA_DEF_PARMS - 1]) < sizeof(*parm))
104 return -EINVAL;
105
106 parm = RTA_DATA(tb[TCA_DEF_PARMS - 1]);
107 defdata = RTA_DATA(tb[TCA_DEF_DATA - 1]);
108 if (defdata == NULL)
109 return -EINVAL;
110
111 datalen = RTA_PAYLOAD(tb[TCA_DEF_DATA - 1]);
112 if (datalen <= 0)
113 return -EINVAL;
114
115 pc = tcf_hash_check(parm->index, a, bind, &simp_hash_info);
116 if (!pc) {
117 pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
118 &simp_idx_gen, &simp_hash_info);
119 if (unlikely(!pc))
120 return -ENOMEM;
121
122 d = to_defact(pc);
123 ret = alloc_defdata(d, datalen, defdata);
124 if (ret < 0) {
125 kfree(pc);
126 return ret;
127 }
128 ret = ACT_P_CREATED;
129 } else {
130 d = to_defact(pc);
131 if (!ovr) {
132 tcf_simp_release(d, bind);
133 return -EEXIST;
134 }
135 realloc_defdata(d, datalen, defdata);
136 }
137
138 spin_lock_bh(&d->tcf_lock);
139 d->tcf_action = parm->action;
140 spin_unlock_bh(&d->tcf_lock);
141
142 if (ret == ACT_P_CREATED)
143 tcf_hash_insert(pc, &simp_hash_info);
144 return ret;
145}
146
147static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
148{
149 struct tcf_defact *d = a->priv;
150
151 if (d)
152 return tcf_simp_release(d, bind);
153 return 0;
154}
155
156static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
157 int bind, int ref)
158{
159 unsigned char *b = skb->tail;
160 struct tcf_defact *d = a->priv;
161 struct tc_defact opt;
162 struct tcf_t t;
163
164 opt.index = d->tcf_index;
165 opt.refcnt = d->tcf_refcnt - ref;
166 opt.bindcnt = d->tcf_bindcnt - bind;
167 opt.action = d->tcf_action;
168 RTA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt);
169 RTA_PUT(skb, TCA_DEF_DATA, d->tcfd_datalen, d->tcfd_defdata);
170 t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
171 t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
172 t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
173 RTA_PUT(skb, TCA_DEF_TM, sizeof(t), &t);
174 return skb->len;
175
176rtattr_failure:
177 skb_trim(skb, b - skb->data);
178 return -1;
62} 179}
63 180
64static struct tc_action_ops act_simp_ops = { 181static struct tc_action_ops act_simp_ops = {
65 .kind = "simple", 182 .kind = "simple",
66 .type = TCA_ACT_SIMP, 183 .hinfo = &simp_hash_info,
67 .capab = TCA_CAP_NONE, 184 .type = TCA_ACT_SIMP,
68 .owner = THIS_MODULE, 185 .capab = TCA_CAP_NONE,
69 .act = tcf_simp, 186 .owner = THIS_MODULE,
70 tca_use_default_ops 187 .act = tcf_simp,
188 .dump = tcf_simp_dump,
189 .cleanup = tcf_simp_cleanup,
190 .init = tcf_simp_init,
191 .walk = tcf_generic_walker,
71}; 192};
72 193
73MODULE_AUTHOR("Jamal Hadi Salim(2005)"); 194MODULE_AUTHOR("Jamal Hadi Salim(2005)");
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index e6973d9b686d..e54acc6bcccd 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -50,6 +50,7 @@
50struct fw_head 50struct fw_head
51{ 51{
52 struct fw_filter *ht[HTSIZE]; 52 struct fw_filter *ht[HTSIZE];
53 u32 mask;
53}; 54};
54 55
55struct fw_filter 56struct fw_filter
@@ -101,7 +102,7 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
101 struct fw_filter *f; 102 struct fw_filter *f;
102 int r; 103 int r;
103#ifdef CONFIG_NETFILTER 104#ifdef CONFIG_NETFILTER
104 u32 id = skb->nfmark; 105 u32 id = skb->nfmark & head->mask;
105#else 106#else
106 u32 id = 0; 107 u32 id = 0;
107#endif 108#endif
@@ -209,7 +210,9 @@ static int
209fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f, 210fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
210 struct rtattr **tb, struct rtattr **tca, unsigned long base) 211 struct rtattr **tb, struct rtattr **tca, unsigned long base)
211{ 212{
213 struct fw_head *head = (struct fw_head *)tp->root;
212 struct tcf_exts e; 214 struct tcf_exts e;
215 u32 mask;
213 int err; 216 int err;
214 217
215 err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &fw_ext_map); 218 err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &fw_ext_map);
@@ -232,6 +235,15 @@ fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
232 } 235 }
233#endif /* CONFIG_NET_CLS_IND */ 236#endif /* CONFIG_NET_CLS_IND */
234 237
238 if (tb[TCA_FW_MASK-1]) {
239 if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32))
240 goto errout;
241 mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]);
242 if (mask != head->mask)
243 goto errout;
244 } else if (head->mask != 0xFFFFFFFF)
245 goto errout;
246
235 tcf_exts_change(tp, &f->exts, &e); 247 tcf_exts_change(tp, &f->exts, &e);
236 248
237 return 0; 249 return 0;
@@ -267,9 +279,17 @@ static int fw_change(struct tcf_proto *tp, unsigned long base,
267 return -EINVAL; 279 return -EINVAL;
268 280
269 if (head == NULL) { 281 if (head == NULL) {
282 u32 mask = 0xFFFFFFFF;
283 if (tb[TCA_FW_MASK-1]) {
284 if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32))
285 return -EINVAL;
286 mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]);
287 }
288
270 head = kzalloc(sizeof(struct fw_head), GFP_KERNEL); 289 head = kzalloc(sizeof(struct fw_head), GFP_KERNEL);
271 if (head == NULL) 290 if (head == NULL)
272 return -ENOBUFS; 291 return -ENOBUFS;
292 head->mask = mask;
273 293
274 tcf_tree_lock(tp); 294 tcf_tree_lock(tp);
275 tp->root = head; 295 tp->root = head;
@@ -330,6 +350,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
330static int fw_dump(struct tcf_proto *tp, unsigned long fh, 350static int fw_dump(struct tcf_proto *tp, unsigned long fh,
331 struct sk_buff *skb, struct tcmsg *t) 351 struct sk_buff *skb, struct tcmsg *t)
332{ 352{
353 struct fw_head *head = (struct fw_head *)tp->root;
333 struct fw_filter *f = (struct fw_filter*)fh; 354 struct fw_filter *f = (struct fw_filter*)fh;
334 unsigned char *b = skb->tail; 355 unsigned char *b = skb->tail;
335 struct rtattr *rta; 356 struct rtattr *rta;
@@ -351,6 +372,8 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
351 if (strlen(f->indev)) 372 if (strlen(f->indev))
352 RTA_PUT(skb, TCA_FW_INDEV, IFNAMSIZ, f->indev); 373 RTA_PUT(skb, TCA_FW_INDEV, IFNAMSIZ, f->indev);
353#endif /* CONFIG_NET_CLS_IND */ 374#endif /* CONFIG_NET_CLS_IND */
375 if (head->mask != 0xFFFFFFFF)
376 RTA_PUT(skb, TCA_FW_MASK, 4, &head->mask);
354 377
355 if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0) 378 if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0)
356 goto rtattr_failure; 379 goto rtattr_failure;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 880a3394a51f..bb3ddd4784b1 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1,4 +1,4 @@
1/* vim: ts=8 sw=8 1/*
2 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version 2 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
@@ -68,218 +68,165 @@
68 one less than their parent. 68 one less than their parent.
69*/ 69*/
70 70
71#define HTB_HSIZE 16 /* classid hash size */ 71#define HTB_HSIZE 16 /* classid hash size */
72#define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */ 72#define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */
73#undef HTB_DEBUG /* compile debugging support (activated by tc tool) */ 73#define HTB_RATECM 1 /* whether to use rate computer */
74#define HTB_RATECM 1 /* whether to use rate computer */ 74#define HTB_HYSTERESIS 1 /* whether to use mode hysteresis for speedup */
75#define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */ 75#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
76#define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock)
77#define HTB_QUNLOCK(S) spin_unlock_bh(&(S)->dev->queue_lock)
78#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
79 76
80#if HTB_VER >> 16 != TC_HTB_PROTOVER 77#if HTB_VER >> 16 != TC_HTB_PROTOVER
81#error "Mismatched sch_htb.c and pkt_sch.h" 78#error "Mismatched sch_htb.c and pkt_sch.h"
82#endif 79#endif
83 80
84/* debugging support; S is subsystem, these are defined:
85 0 - netlink messages
86 1 - enqueue
87 2 - drop & requeue
88 3 - dequeue main
89 4 - dequeue one prio DRR part
90 5 - dequeue class accounting
91 6 - class overlimit status computation
92 7 - hint tree
93 8 - event queue
94 10 - rate estimator
95 11 - classifier
96 12 - fast dequeue cache
97
98 L is level; 0 = none, 1 = basic info, 2 = detailed, 3 = full
99 q->debug uint32 contains 16 2-bit fields one for subsystem starting
100 from LSB
101 */
102#ifdef HTB_DEBUG
103#define HTB_DBG_COND(S,L) (((q->debug>>(2*S))&3) >= L)
104#define HTB_DBG(S,L,FMT,ARG...) if (HTB_DBG_COND(S,L)) \
105 printk(KERN_DEBUG FMT,##ARG)
106#define HTB_CHCL(cl) BUG_TRAP((cl)->magic == HTB_CMAGIC)
107#define HTB_PASSQ q,
108#define HTB_ARGQ struct htb_sched *q,
109#define static
110#undef __inline__
111#define __inline__
112#undef inline
113#define inline
114#define HTB_CMAGIC 0xFEFAFEF1
115#define htb_safe_rb_erase(N,R) do { BUG_TRAP((N)->rb_color != -1); \
116 if ((N)->rb_color == -1) break; \
117 rb_erase(N,R); \
118 (N)->rb_color = -1; } while (0)
119#else
120#define HTB_DBG_COND(S,L) (0)
121#define HTB_DBG(S,L,FMT,ARG...)
122#define HTB_PASSQ
123#define HTB_ARGQ
124#define HTB_CHCL(cl)
125#define htb_safe_rb_erase(N,R) rb_erase(N,R)
126#endif
127
128
129/* used internaly to keep status of single class */ 81/* used internaly to keep status of single class */
130enum htb_cmode { 82enum htb_cmode {
131 HTB_CANT_SEND, /* class can't send and can't borrow */ 83 HTB_CANT_SEND, /* class can't send and can't borrow */
132 HTB_MAY_BORROW, /* class can't send but may borrow */ 84 HTB_MAY_BORROW, /* class can't send but may borrow */
133 HTB_CAN_SEND /* class can send */ 85 HTB_CAN_SEND /* class can send */
134}; 86};
135 87
136/* interior & leaf nodes; props specific to leaves are marked L: */ 88/* interior & leaf nodes; props specific to leaves are marked L: */
137struct htb_class 89struct htb_class {
138{ 90 /* general class parameters */
139#ifdef HTB_DEBUG 91 u32 classid;
140 unsigned magic; 92 struct gnet_stats_basic bstats;
141#endif 93 struct gnet_stats_queue qstats;
142 /* general class parameters */ 94 struct gnet_stats_rate_est rate_est;
143 u32 classid; 95 struct tc_htb_xstats xstats; /* our special stats */
144 struct gnet_stats_basic bstats; 96 int refcnt; /* usage count of this class */
145 struct gnet_stats_queue qstats;
146 struct gnet_stats_rate_est rate_est;
147 struct tc_htb_xstats xstats;/* our special stats */
148 int refcnt; /* usage count of this class */
149 97
150#ifdef HTB_RATECM 98#ifdef HTB_RATECM
151 /* rate measurement counters */ 99 /* rate measurement counters */
152 unsigned long rate_bytes,sum_bytes; 100 unsigned long rate_bytes, sum_bytes;
153 unsigned long rate_packets,sum_packets; 101 unsigned long rate_packets, sum_packets;
154#endif 102#endif
155 103
156 /* topology */ 104 /* topology */
157 int level; /* our level (see above) */ 105 int level; /* our level (see above) */
158 struct htb_class *parent; /* parent class */ 106 struct htb_class *parent; /* parent class */
159 struct list_head hlist; /* classid hash list item */ 107 struct hlist_node hlist; /* classid hash list item */
160 struct list_head sibling; /* sibling list item */ 108 struct list_head sibling; /* sibling list item */
161 struct list_head children; /* children list */ 109 struct list_head children; /* children list */
162 110
163 union { 111 union {
164 struct htb_class_leaf { 112 struct htb_class_leaf {
165 struct Qdisc *q; 113 struct Qdisc *q;
166 int prio; 114 int prio;
167 int aprio; 115 int aprio;
168 int quantum; 116 int quantum;
169 int deficit[TC_HTB_MAXDEPTH]; 117 int deficit[TC_HTB_MAXDEPTH];
170 struct list_head drop_list; 118 struct list_head drop_list;
171 } leaf; 119 } leaf;
172 struct htb_class_inner { 120 struct htb_class_inner {
173 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */ 121 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
174 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */ 122 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
175 /* When class changes from state 1->2 and disconnects from 123 /* When class changes from state 1->2 and disconnects from
176 parent's feed then we lost ptr value and start from the 124 parent's feed then we lost ptr value and start from the
177 first child again. Here we store classid of the 125 first child again. Here we store classid of the
178 last valid ptr (used when ptr is NULL). */ 126 last valid ptr (used when ptr is NULL). */
179 u32 last_ptr_id[TC_HTB_NUMPRIO]; 127 u32 last_ptr_id[TC_HTB_NUMPRIO];
180 } inner; 128 } inner;
181 } un; 129 } un;
182 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ 130 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
183 struct rb_node pq_node; /* node for event queue */ 131 struct rb_node pq_node; /* node for event queue */
184 unsigned long pq_key; /* the same type as jiffies global */ 132 unsigned long pq_key; /* the same type as jiffies global */
185 133
186 int prio_activity; /* for which prios are we active */ 134 int prio_activity; /* for which prios are we active */
187 enum htb_cmode cmode; /* current mode of the class */ 135 enum htb_cmode cmode; /* current mode of the class */
188 136
189 /* class attached filters */ 137 /* class attached filters */
190 struct tcf_proto *filter_list; 138 struct tcf_proto *filter_list;
191 int filter_cnt; 139 int filter_cnt;
192 140
193 int warned; /* only one warning about non work conserving .. */ 141 int warned; /* only one warning about non work conserving .. */
194 142
195 /* token bucket parameters */ 143 /* token bucket parameters */
196 struct qdisc_rate_table *rate; /* rate table of the class itself */ 144 struct qdisc_rate_table *rate; /* rate table of the class itself */
197 struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */ 145 struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */
198 long buffer,cbuffer; /* token bucket depth/rate */ 146 long buffer, cbuffer; /* token bucket depth/rate */
199 psched_tdiff_t mbuffer; /* max wait time */ 147 psched_tdiff_t mbuffer; /* max wait time */
200 long tokens,ctokens; /* current number of tokens */ 148 long tokens, ctokens; /* current number of tokens */
201 psched_time_t t_c; /* checkpoint time */ 149 psched_time_t t_c; /* checkpoint time */
202}; 150};
203 151
204/* TODO: maybe compute rate when size is too large .. or drop ? */ 152/* TODO: maybe compute rate when size is too large .. or drop ? */
205static __inline__ long L2T(struct htb_class *cl,struct qdisc_rate_table *rate, 153static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
206 int size) 154 int size)
207{ 155{
208 int slot = size >> rate->rate.cell_log; 156 int slot = size >> rate->rate.cell_log;
209 if (slot > 255) { 157 if (slot > 255) {
210 cl->xstats.giants++; 158 cl->xstats.giants++;
211 slot = 255; 159 slot = 255;
212 } 160 }
213 return rate->data[slot]; 161 return rate->data[slot];
214} 162}
215 163
216struct htb_sched 164struct htb_sched {
217{ 165 struct list_head root; /* root classes list */
218 struct list_head root; /* root classes list */ 166 struct hlist_head hash[HTB_HSIZE]; /* hashed by classid */
219 struct list_head hash[HTB_HSIZE]; /* hashed by classid */ 167 struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
220 struct list_head drops[TC_HTB_NUMPRIO]; /* active leaves (for drops) */ 168
221 169 /* self list - roots of self generating tree */
222 /* self list - roots of self generating tree */ 170 struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
223 struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; 171 int row_mask[TC_HTB_MAXDEPTH];
224 int row_mask[TC_HTB_MAXDEPTH]; 172 struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
225 struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; 173 u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
226 u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; 174
227 175 /* self wait list - roots of wait PQs per row */
228 /* self wait list - roots of wait PQs per row */ 176 struct rb_root wait_pq[TC_HTB_MAXDEPTH];
229 struct rb_root wait_pq[TC_HTB_MAXDEPTH]; 177
230 178 /* time of nearest event per level (row) */
231 /* time of nearest event per level (row) */ 179 unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
232 unsigned long near_ev_cache[TC_HTB_MAXDEPTH]; 180
233 181 /* cached value of jiffies in dequeue */
234 /* cached value of jiffies in dequeue */ 182 unsigned long jiffies;
235 unsigned long jiffies; 183
236 184 /* whether we hit non-work conserving class during this dequeue; we use */
237 /* whether we hit non-work conserving class during this dequeue; we use */ 185 int nwc_hit; /* this to disable mindelay complaint in dequeue */
238 int nwc_hit; /* this to disable mindelay complaint in dequeue */ 186
239 187 int defcls; /* class where unclassified flows go to */
240 int defcls; /* class where unclassified flows go to */ 188
241 u32 debug; /* subsystem debug levels */ 189 /* filters for qdisc itself */
242 190 struct tcf_proto *filter_list;
243 /* filters for qdisc itself */ 191 int filter_cnt;
244 struct tcf_proto *filter_list; 192
245 int filter_cnt; 193 int rate2quantum; /* quant = rate / rate2quantum */
246 194 psched_time_t now; /* cached dequeue time */
247 int rate2quantum; /* quant = rate / rate2quantum */ 195 struct timer_list timer; /* send delay timer */
248 psched_time_t now; /* cached dequeue time */
249 struct timer_list timer; /* send delay timer */
250#ifdef HTB_RATECM 196#ifdef HTB_RATECM
251 struct timer_list rttim; /* rate computer timer */ 197 struct timer_list rttim; /* rate computer timer */
252 int recmp_bucket; /* which hash bucket to recompute next */ 198 int recmp_bucket; /* which hash bucket to recompute next */
253#endif 199#endif
254
255 /* non shaped skbs; let them go directly thru */
256 struct sk_buff_head direct_queue;
257 int direct_qlen; /* max qlen of above */
258 200
259 long direct_pkts; 201 /* non shaped skbs; let them go directly thru */
202 struct sk_buff_head direct_queue;
203 int direct_qlen; /* max qlen of above */
204
205 long direct_pkts;
260}; 206};
261 207
262/* compute hash of size HTB_HSIZE for given handle */ 208/* compute hash of size HTB_HSIZE for given handle */
263static __inline__ int htb_hash(u32 h) 209static inline int htb_hash(u32 h)
264{ 210{
265#if HTB_HSIZE != 16 211#if HTB_HSIZE != 16
266 #error "Declare new hash for your HTB_HSIZE" 212#error "Declare new hash for your HTB_HSIZE"
267#endif 213#endif
268 h ^= h>>8; /* stolen from cbq_hash */ 214 h ^= h >> 8; /* stolen from cbq_hash */
269 h ^= h>>4; 215 h ^= h >> 4;
270 return h & 0xf; 216 return h & 0xf;
271} 217}
272 218
273/* find class in global hash table using given handle */ 219/* find class in global hash table using given handle */
274static __inline__ struct htb_class *htb_find(u32 handle, struct Qdisc *sch) 220static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
275{ 221{
276 struct htb_sched *q = qdisc_priv(sch); 222 struct htb_sched *q = qdisc_priv(sch);
277 struct list_head *p; 223 struct hlist_node *p;
278 if (TC_H_MAJ(handle) != sch->handle) 224 struct htb_class *cl;
225
226 if (TC_H_MAJ(handle) != sch->handle)
279 return NULL; 227 return NULL;
280 228
281 list_for_each (p,q->hash+htb_hash(handle)) { 229 hlist_for_each_entry(cl, p, q->hash + htb_hash(handle), hlist) {
282 struct htb_class *cl = list_entry(p,struct htb_class,hlist);
283 if (cl->classid == handle) 230 if (cl->classid == handle)
284 return cl; 231 return cl;
285 } 232 }
@@ -304,7 +251,8 @@ static inline u32 htb_classid(struct htb_class *cl)
304 return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC; 251 return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC;
305} 252}
306 253
307static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) 254static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
255 int *qerr)
308{ 256{
309 struct htb_sched *q = qdisc_priv(sch); 257 struct htb_sched *q = qdisc_priv(sch);
310 struct htb_class *cl; 258 struct htb_class *cl;
@@ -316,8 +264,8 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in
316 note that nfmark can be used too by attaching filter fw with no 264 note that nfmark can be used too by attaching filter fw with no
317 rules in it */ 265 rules in it */
318 if (skb->priority == sch->handle) 266 if (skb->priority == sch->handle)
319 return HTB_DIRECT; /* X:0 (direct flow) selected */ 267 return HTB_DIRECT; /* X:0 (direct flow) selected */
320 if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0) 268 if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
321 return cl; 269 return cl;
322 270
323 *qerr = NET_XMIT_BYPASS; 271 *qerr = NET_XMIT_BYPASS;
@@ -326,7 +274,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in
326#ifdef CONFIG_NET_CLS_ACT 274#ifdef CONFIG_NET_CLS_ACT
327 switch (result) { 275 switch (result) {
328 case TC_ACT_QUEUED: 276 case TC_ACT_QUEUED:
329 case TC_ACT_STOLEN: 277 case TC_ACT_STOLEN:
330 *qerr = NET_XMIT_SUCCESS; 278 *qerr = NET_XMIT_SUCCESS;
331 case TC_ACT_SHOT: 279 case TC_ACT_SHOT:
332 return NULL; 280 return NULL;
@@ -335,97 +283,44 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in
335 if (result == TC_POLICE_SHOT) 283 if (result == TC_POLICE_SHOT)
336 return HTB_DIRECT; 284 return HTB_DIRECT;
337#endif 285#endif
338 if ((cl = (void*)res.class) == NULL) { 286 if ((cl = (void *)res.class) == NULL) {
339 if (res.classid == sch->handle) 287 if (res.classid == sch->handle)
340 return HTB_DIRECT; /* X:0 (direct flow) */ 288 return HTB_DIRECT; /* X:0 (direct flow) */
341 if ((cl = htb_find(res.classid,sch)) == NULL) 289 if ((cl = htb_find(res.classid, sch)) == NULL)
342 break; /* filter selected invalid classid */ 290 break; /* filter selected invalid classid */
343 } 291 }
344 if (!cl->level) 292 if (!cl->level)
345 return cl; /* we hit leaf; return it */ 293 return cl; /* we hit leaf; return it */
346 294
347 /* we have got inner class; apply inner filter chain */ 295 /* we have got inner class; apply inner filter chain */
348 tcf = cl->filter_list; 296 tcf = cl->filter_list;
349 } 297 }
350 /* classification failed; try to use default class */ 298 /* classification failed; try to use default class */
351 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle),q->defcls),sch); 299 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
352 if (!cl || cl->level) 300 if (!cl || cl->level)
353 return HTB_DIRECT; /* bad default .. this is safe bet */ 301 return HTB_DIRECT; /* bad default .. this is safe bet */
354 return cl; 302 return cl;
355} 303}
356 304
357#ifdef HTB_DEBUG
358static void htb_next_rb_node(struct rb_node **n);
359#define HTB_DUMTREE(root,memb) if(root) { \
360 struct rb_node *n = (root)->rb_node; \
361 while (n->rb_left) n = n->rb_left; \
362 while (n) { \
363 struct htb_class *cl = rb_entry(n, struct htb_class, memb); \
364 printk(" %x",cl->classid); htb_next_rb_node (&n); \
365 } }
366
367static void htb_debug_dump (struct htb_sched *q)
368{
369 int i,p;
370 printk(KERN_DEBUG "htb*g j=%lu lj=%lu\n",jiffies,q->jiffies);
371 /* rows */
372 for (i=TC_HTB_MAXDEPTH-1;i>=0;i--) {
373 printk(KERN_DEBUG "htb*r%d m=%x",i,q->row_mask[i]);
374 for (p=0;p<TC_HTB_NUMPRIO;p++) {
375 if (!q->row[i][p].rb_node) continue;
376 printk(" p%d:",p);
377 HTB_DUMTREE(q->row[i]+p,node[p]);
378 }
379 printk("\n");
380 }
381 /* classes */
382 for (i = 0; i < HTB_HSIZE; i++) {
383 struct list_head *l;
384 list_for_each (l,q->hash+i) {
385 struct htb_class *cl = list_entry(l,struct htb_class,hlist);
386 long diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer);
387 printk(KERN_DEBUG "htb*c%x m=%d t=%ld c=%ld pq=%lu df=%ld ql=%d "
388 "pa=%x f:",
389 cl->classid,cl->cmode,cl->tokens,cl->ctokens,
390 cl->pq_node.rb_color==-1?0:cl->pq_key,diff,
391 cl->level?0:cl->un.leaf.q->q.qlen,cl->prio_activity);
392 if (cl->level)
393 for (p=0;p<TC_HTB_NUMPRIO;p++) {
394 if (!cl->un.inner.feed[p].rb_node) continue;
395 printk(" p%d a=%x:",p,cl->un.inner.ptr[p]?rb_entry(cl->un.inner.ptr[p], struct htb_class,node[p])->classid:0);
396 HTB_DUMTREE(cl->un.inner.feed+p,node[p]);
397 }
398 printk("\n");
399 }
400 }
401}
402#endif
403/** 305/**
404 * htb_add_to_id_tree - adds class to the round robin list 306 * htb_add_to_id_tree - adds class to the round robin list
405 * 307 *
406 * Routine adds class to the list (actually tree) sorted by classid. 308 * Routine adds class to the list (actually tree) sorted by classid.
407 * Make sure that class is not already on such list for given prio. 309 * Make sure that class is not already on such list for given prio.
408 */ 310 */
409static void htb_add_to_id_tree (HTB_ARGQ struct rb_root *root, 311static void htb_add_to_id_tree(struct rb_root *root,
410 struct htb_class *cl,int prio) 312 struct htb_class *cl, int prio)
411{ 313{
412 struct rb_node **p = &root->rb_node, *parent = NULL; 314 struct rb_node **p = &root->rb_node, *parent = NULL;
413 HTB_DBG(7,3,"htb_add_id_tree cl=%X prio=%d\n",cl->classid,prio); 315
414#ifdef HTB_DEBUG
415 if (cl->node[prio].rb_color != -1) { BUG_TRAP(0); return; }
416 HTB_CHCL(cl);
417 if (*p) {
418 struct htb_class *x = rb_entry(*p,struct htb_class,node[prio]);
419 HTB_CHCL(x);
420 }
421#endif
422 while (*p) { 316 while (*p) {
423 struct htb_class *c; parent = *p; 317 struct htb_class *c;
318 parent = *p;
424 c = rb_entry(parent, struct htb_class, node[prio]); 319 c = rb_entry(parent, struct htb_class, node[prio]);
425 HTB_CHCL(c); 320
426 if (cl->classid > c->classid) 321 if (cl->classid > c->classid)
427 p = &parent->rb_right; 322 p = &parent->rb_right;
428 else 323 else
429 p = &parent->rb_left; 324 p = &parent->rb_left;
430 } 325 }
431 rb_link_node(&cl->node[prio], parent, p); 326 rb_link_node(&cl->node[prio], parent, p);
@@ -439,17 +334,11 @@ static void htb_add_to_id_tree (HTB_ARGQ struct rb_root *root,
439 * change its mode in cl->pq_key microseconds. Make sure that class is not 334 * change its mode in cl->pq_key microseconds. Make sure that class is not
440 * already in the queue. 335 * already in the queue.
441 */ 336 */
442static void htb_add_to_wait_tree (struct htb_sched *q, 337static void htb_add_to_wait_tree(struct htb_sched *q,
443 struct htb_class *cl,long delay,int debug_hint) 338 struct htb_class *cl, long delay)
444{ 339{
445 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL; 340 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
446 HTB_DBG(7,3,"htb_add_wt cl=%X key=%lu\n",cl->classid,cl->pq_key); 341
447#ifdef HTB_DEBUG
448 if (cl->pq_node.rb_color != -1) { BUG_TRAP(0); return; }
449 HTB_CHCL(cl);
450 if ((delay <= 0 || delay > cl->mbuffer) && net_ratelimit())
451 printk(KERN_ERR "HTB: suspicious delay in wait_tree d=%ld cl=%X h=%d\n",delay,cl->classid,debug_hint);
452#endif
453 cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay); 342 cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay);
454 if (cl->pq_key == q->jiffies) 343 if (cl->pq_key == q->jiffies)
455 cl->pq_key++; 344 cl->pq_key++;
@@ -457,13 +346,14 @@ static void htb_add_to_wait_tree (struct htb_sched *q,
457 /* update the nearest event cache */ 346 /* update the nearest event cache */
458 if (time_after(q->near_ev_cache[cl->level], cl->pq_key)) 347 if (time_after(q->near_ev_cache[cl->level], cl->pq_key))
459 q->near_ev_cache[cl->level] = cl->pq_key; 348 q->near_ev_cache[cl->level] = cl->pq_key;
460 349
461 while (*p) { 350 while (*p) {
462 struct htb_class *c; parent = *p; 351 struct htb_class *c;
352 parent = *p;
463 c = rb_entry(parent, struct htb_class, pq_node); 353 c = rb_entry(parent, struct htb_class, pq_node);
464 if (time_after_eq(cl->pq_key, c->pq_key)) 354 if (time_after_eq(cl->pq_key, c->pq_key))
465 p = &parent->rb_right; 355 p = &parent->rb_right;
466 else 356 else
467 p = &parent->rb_left; 357 p = &parent->rb_left;
468 } 358 }
469 rb_link_node(&cl->pq_node, parent, p); 359 rb_link_node(&cl->pq_node, parent, p);
@@ -476,7 +366,7 @@ static void htb_add_to_wait_tree (struct htb_sched *q,
476 * When we are past last key we return NULL. 366 * When we are past last key we return NULL.
477 * Average complexity is 2 steps per call. 367 * Average complexity is 2 steps per call.
478 */ 368 */
479static void htb_next_rb_node(struct rb_node **n) 369static inline void htb_next_rb_node(struct rb_node **n)
480{ 370{
481 *n = rb_next(*n); 371 *n = rb_next(*n);
482} 372}
@@ -487,42 +377,51 @@ static void htb_next_rb_node(struct rb_node **n)
487 * The class is added to row at priorities marked in mask. 377 * The class is added to row at priorities marked in mask.
488 * It does nothing if mask == 0. 378 * It does nothing if mask == 0.
489 */ 379 */
490static inline void htb_add_class_to_row(struct htb_sched *q, 380static inline void htb_add_class_to_row(struct htb_sched *q,
491 struct htb_class *cl,int mask) 381 struct htb_class *cl, int mask)
492{ 382{
493 HTB_DBG(7,2,"htb_addrow cl=%X mask=%X rmask=%X\n",
494 cl->classid,mask,q->row_mask[cl->level]);
495 HTB_CHCL(cl);
496 q->row_mask[cl->level] |= mask; 383 q->row_mask[cl->level] |= mask;
497 while (mask) { 384 while (mask) {
498 int prio = ffz(~mask); 385 int prio = ffz(~mask);
499 mask &= ~(1 << prio); 386 mask &= ~(1 << prio);
500 htb_add_to_id_tree(HTB_PASSQ q->row[cl->level]+prio,cl,prio); 387 htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
388 }
389}
390
391/* If this triggers, it is a bug in this code, but it need not be fatal */
392static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
393{
394 if (RB_EMPTY_NODE(rb)) {
395 WARN_ON(1);
396 } else {
397 rb_erase(rb, root);
398 RB_CLEAR_NODE(rb);
501 } 399 }
502} 400}
503 401
402
504/** 403/**
505 * htb_remove_class_from_row - removes class from its row 404 * htb_remove_class_from_row - removes class from its row
506 * 405 *
507 * The class is removed from row at priorities marked in mask. 406 * The class is removed from row at priorities marked in mask.
508 * It does nothing if mask == 0. 407 * It does nothing if mask == 0.
509 */ 408 */
510static __inline__ void htb_remove_class_from_row(struct htb_sched *q, 409static inline void htb_remove_class_from_row(struct htb_sched *q,
511 struct htb_class *cl,int mask) 410 struct htb_class *cl, int mask)
512{ 411{
513 int m = 0; 412 int m = 0;
514 HTB_CHCL(cl); 413
515 while (mask) { 414 while (mask) {
516 int prio = ffz(~mask); 415 int prio = ffz(~mask);
416
517 mask &= ~(1 << prio); 417 mask &= ~(1 << prio);
518 if (q->ptr[cl->level][prio] == cl->node+prio) 418 if (q->ptr[cl->level][prio] == cl->node + prio)
519 htb_next_rb_node(q->ptr[cl->level]+prio); 419 htb_next_rb_node(q->ptr[cl->level] + prio);
520 htb_safe_rb_erase(cl->node + prio,q->row[cl->level]+prio); 420
521 if (!q->row[cl->level][prio].rb_node) 421 htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
422 if (!q->row[cl->level][prio].rb_node)
522 m |= 1 << prio; 423 m |= 1 << prio;
523 } 424 }
524 HTB_DBG(7,2,"htb_delrow cl=%X mask=%X rmask=%X maskdel=%X\n",
525 cl->classid,mask,q->row_mask[cl->level],m);
526 q->row_mask[cl->level] &= ~m; 425 q->row_mask[cl->level] &= ~m;
527} 426}
528 427
@@ -533,34 +432,31 @@ static __inline__ void htb_remove_class_from_row(struct htb_sched *q,
533 * for priorities it is participating on. cl->cmode must be new 432 * for priorities it is participating on. cl->cmode must be new
534 * (activated) mode. It does nothing if cl->prio_activity == 0. 433 * (activated) mode. It does nothing if cl->prio_activity == 0.
535 */ 434 */
536static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl) 435static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
537{ 436{
538 struct htb_class *p = cl->parent; 437 struct htb_class *p = cl->parent;
539 long m,mask = cl->prio_activity; 438 long m, mask = cl->prio_activity;
540 HTB_DBG(7,2,"htb_act_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode);
541 HTB_CHCL(cl);
542 439
543 while (cl->cmode == HTB_MAY_BORROW && p && mask) { 440 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
544 HTB_CHCL(p); 441 m = mask;
545 m = mask; while (m) { 442 while (m) {
546 int prio = ffz(~m); 443 int prio = ffz(~m);
547 m &= ~(1 << prio); 444 m &= ~(1 << prio);
548 445
549 if (p->un.inner.feed[prio].rb_node) 446 if (p->un.inner.feed[prio].rb_node)
550 /* parent already has its feed in use so that 447 /* parent already has its feed in use so that
551 reset bit in mask as parent is already ok */ 448 reset bit in mask as parent is already ok */
552 mask &= ~(1 << prio); 449 mask &= ~(1 << prio);
553 450
554 htb_add_to_id_tree(HTB_PASSQ p->un.inner.feed+prio,cl,prio); 451 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
555 } 452 }
556 HTB_DBG(7,3,"htb_act_pr_aft p=%X pact=%X mask=%lX pmode=%d\n",
557 p->classid,p->prio_activity,mask,p->cmode);
558 p->prio_activity |= mask; 453 p->prio_activity |= mask;
559 cl = p; p = cl->parent; 454 cl = p;
560 HTB_CHCL(cl); 455 p = cl->parent;
456
561 } 457 }
562 if (cl->cmode == HTB_CAN_SEND && mask) 458 if (cl->cmode == HTB_CAN_SEND && mask)
563 htb_add_class_to_row(q,cl,mask); 459 htb_add_class_to_row(q, cl, mask);
564} 460}
565 461
566/** 462/**
@@ -573,39 +469,52 @@ static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl)
573static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) 469static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
574{ 470{
575 struct htb_class *p = cl->parent; 471 struct htb_class *p = cl->parent;
576 long m,mask = cl->prio_activity; 472 long m, mask = cl->prio_activity;
577 HTB_DBG(7,2,"htb_deact_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode);
578 HTB_CHCL(cl);
579 473
580 while (cl->cmode == HTB_MAY_BORROW && p && mask) { 474 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
581 m = mask; mask = 0; 475 m = mask;
476 mask = 0;
582 while (m) { 477 while (m) {
583 int prio = ffz(~m); 478 int prio = ffz(~m);
584 m &= ~(1 << prio); 479 m &= ~(1 << prio);
585 480
586 if (p->un.inner.ptr[prio] == cl->node+prio) { 481 if (p->un.inner.ptr[prio] == cl->node + prio) {
587 /* we are removing child which is pointed to from 482 /* we are removing child which is pointed to from
588 parent feed - forget the pointer but remember 483 parent feed - forget the pointer but remember
589 classid */ 484 classid */
590 p->un.inner.last_ptr_id[prio] = cl->classid; 485 p->un.inner.last_ptr_id[prio] = cl->classid;
591 p->un.inner.ptr[prio] = NULL; 486 p->un.inner.ptr[prio] = NULL;
592 } 487 }
593 488
594 htb_safe_rb_erase(cl->node + prio,p->un.inner.feed + prio); 489 htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
595 490
596 if (!p->un.inner.feed[prio].rb_node) 491 if (!p->un.inner.feed[prio].rb_node)
597 mask |= 1 << prio; 492 mask |= 1 << prio;
598 } 493 }
599 HTB_DBG(7,3,"htb_deact_pr_aft p=%X pact=%X mask=%lX pmode=%d\n", 494
600 p->classid,p->prio_activity,mask,p->cmode);
601 p->prio_activity &= ~mask; 495 p->prio_activity &= ~mask;
602 cl = p; p = cl->parent; 496 cl = p;
603 HTB_CHCL(cl); 497 p = cl->parent;
498
604 } 499 }
605 if (cl->cmode == HTB_CAN_SEND && mask) 500 if (cl->cmode == HTB_CAN_SEND && mask)
606 htb_remove_class_from_row(q,cl,mask); 501 htb_remove_class_from_row(q, cl, mask);
607} 502}
608 503
504#if HTB_HYSTERESIS
505static inline long htb_lowater(const struct htb_class *cl)
506{
507 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
508}
509static inline long htb_hiwater(const struct htb_class *cl)
510{
511 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
512}
513#else
514#define htb_lowater(cl) (0)
515#define htb_hiwater(cl) (0)
516#endif
517
609/** 518/**
610 * htb_class_mode - computes and returns current class mode 519 * htb_class_mode - computes and returns current class mode
611 * 520 *
@@ -617,28 +526,21 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
617 * 0 .. -cl->{c,}buffer range. It is meant to limit number of 526 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
618 * mode transitions per time unit. The speed gain is about 1/6. 527 * mode transitions per time unit. The speed gain is about 1/6.
619 */ 528 */
620static __inline__ enum htb_cmode 529static inline enum htb_cmode
621htb_class_mode(struct htb_class *cl,long *diff) 530htb_class_mode(struct htb_class *cl, long *diff)
622{ 531{
623 long toks; 532 long toks;
624 533
625 if ((toks = (cl->ctokens + *diff)) < ( 534 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
626#if HTB_HYSTERESIS 535 *diff = -toks;
627 cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 536 return HTB_CANT_SEND;
628#endif 537 }
629 0)) { 538
630 *diff = -toks; 539 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
631 return HTB_CANT_SEND; 540 return HTB_CAN_SEND;
632 }
633 if ((toks = (cl->tokens + *diff)) >= (
634#if HTB_HYSTERESIS
635 cl->cmode == HTB_CAN_SEND ? -cl->buffer :
636#endif
637 0))
638 return HTB_CAN_SEND;
639 541
640 *diff = -toks; 542 *diff = -toks;
641 return HTB_MAY_BORROW; 543 return HTB_MAY_BORROW;
642} 544}
643 545
644/** 546/**
@@ -650,24 +552,21 @@ htb_class_mode(struct htb_class *cl,long *diff)
650 * be different from old one and cl->pq_key has to be valid if changing 552 * be different from old one and cl->pq_key has to be valid if changing
651 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree). 553 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
652 */ 554 */
653static void 555static void
654htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff) 556htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
655{ 557{
656 enum htb_cmode new_mode = htb_class_mode(cl,diff); 558 enum htb_cmode new_mode = htb_class_mode(cl, diff);
657
658 HTB_CHCL(cl);
659 HTB_DBG(7,1,"htb_chging_clmode %d->%d cl=%X\n",cl->cmode,new_mode,cl->classid);
660 559
661 if (new_mode == cl->cmode) 560 if (new_mode == cl->cmode)
662 return; 561 return;
663 562
664 if (cl->prio_activity) { /* not necessary: speed optimization */ 563 if (cl->prio_activity) { /* not necessary: speed optimization */
665 if (cl->cmode != HTB_CANT_SEND) 564 if (cl->cmode != HTB_CANT_SEND)
666 htb_deactivate_prios(q,cl); 565 htb_deactivate_prios(q, cl);
667 cl->cmode = new_mode; 566 cl->cmode = new_mode;
668 if (new_mode != HTB_CANT_SEND) 567 if (new_mode != HTB_CANT_SEND)
669 htb_activate_prios(q,cl); 568 htb_activate_prios(q, cl);
670 } else 569 } else
671 cl->cmode = new_mode; 570 cl->cmode = new_mode;
672} 571}
673 572
@@ -678,14 +577,15 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
678 * for the prio. It can be called on already active leaf safely. 577 * for the prio. It can be called on already active leaf safely.
679 * It also adds leaf into droplist. 578 * It also adds leaf into droplist.
680 */ 579 */
681static __inline__ void htb_activate(struct htb_sched *q,struct htb_class *cl) 580static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
682{ 581{
683 BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen); 582 BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen);
684 HTB_CHCL(cl); 583
685 if (!cl->prio_activity) { 584 if (!cl->prio_activity) {
686 cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio); 585 cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
687 htb_activate_prios(q,cl); 586 htb_activate_prios(q, cl);
688 list_add_tail(&cl->un.leaf.drop_list,q->drops+cl->un.leaf.aprio); 587 list_add_tail(&cl->un.leaf.drop_list,
588 q->drops + cl->un.leaf.aprio);
689 } 589 }
690} 590}
691 591
@@ -695,120 +595,120 @@ static __inline__ void htb_activate(struct htb_sched *q,struct htb_class *cl)
695 * Make sure that leaf is active. In the other words it can't be called 595 * Make sure that leaf is active. In the other words it can't be called
696 * with non-active leaf. It also removes class from the drop list. 596 * with non-active leaf. It also removes class from the drop list.
697 */ 597 */
698static __inline__ void 598static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
699htb_deactivate(struct htb_sched *q,struct htb_class *cl)
700{ 599{
701 BUG_TRAP(cl->prio_activity); 600 BUG_TRAP(cl->prio_activity);
702 HTB_CHCL(cl); 601
703 htb_deactivate_prios(q,cl); 602 htb_deactivate_prios(q, cl);
704 cl->prio_activity = 0; 603 cl->prio_activity = 0;
705 list_del_init(&cl->un.leaf.drop_list); 604 list_del_init(&cl->un.leaf.drop_list);
706} 605}
707 606
708static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) 607static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
709{ 608{
710 int ret; 609 int ret;
711 struct htb_sched *q = qdisc_priv(sch); 610 struct htb_sched *q = qdisc_priv(sch);
712 struct htb_class *cl = htb_classify(skb,sch,&ret); 611 struct htb_class *cl = htb_classify(skb, sch, &ret);
713 612
714 if (cl == HTB_DIRECT) { 613 if (cl == HTB_DIRECT) {
715 /* enqueue to helper queue */ 614 /* enqueue to helper queue */
716 if (q->direct_queue.qlen < q->direct_qlen) { 615 if (q->direct_queue.qlen < q->direct_qlen) {
717 __skb_queue_tail(&q->direct_queue, skb); 616 __skb_queue_tail(&q->direct_queue, skb);
718 q->direct_pkts++; 617 q->direct_pkts++;
719 } else { 618 } else {
720 kfree_skb(skb); 619 kfree_skb(skb);
721 sch->qstats.drops++; 620 sch->qstats.drops++;
722 return NET_XMIT_DROP; 621 return NET_XMIT_DROP;
723 } 622 }
724#ifdef CONFIG_NET_CLS_ACT 623#ifdef CONFIG_NET_CLS_ACT
725 } else if (!cl) { 624 } else if (!cl) {
726 if (ret == NET_XMIT_BYPASS) 625 if (ret == NET_XMIT_BYPASS)
727 sch->qstats.drops++; 626 sch->qstats.drops++;
728 kfree_skb (skb); 627 kfree_skb(skb);
729 return ret; 628 return ret;
730#endif 629#endif
731 } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) { 630 } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) !=
732 sch->qstats.drops++; 631 NET_XMIT_SUCCESS) {
733 cl->qstats.drops++; 632 sch->qstats.drops++;
734 return NET_XMIT_DROP; 633 cl->qstats.drops++;
735 } else { 634 return NET_XMIT_DROP;
736 cl->bstats.packets++; cl->bstats.bytes += skb->len; 635 } else {
737 htb_activate (q,cl); 636 cl->bstats.packets++;
738 } 637 cl->bstats.bytes += skb->len;
739 638 htb_activate(q, cl);
740 sch->q.qlen++; 639 }
741 sch->bstats.packets++; sch->bstats.bytes += skb->len; 640
742 HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb); 641 sch->q.qlen++;
743 return NET_XMIT_SUCCESS; 642 sch->bstats.packets++;
643 sch->bstats.bytes += skb->len;
644 return NET_XMIT_SUCCESS;
744} 645}
745 646
746/* TODO: requeuing packet charges it to policers again !! */ 647/* TODO: requeuing packet charges it to policers again !! */
747static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) 648static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
748{ 649{
749 struct htb_sched *q = qdisc_priv(sch); 650 struct htb_sched *q = qdisc_priv(sch);
750 int ret = NET_XMIT_SUCCESS; 651 int ret = NET_XMIT_SUCCESS;
751 struct htb_class *cl = htb_classify(skb,sch, &ret); 652 struct htb_class *cl = htb_classify(skb, sch, &ret);
752 struct sk_buff *tskb; 653 struct sk_buff *tskb;
753 654
754 if (cl == HTB_DIRECT || !cl) { 655 if (cl == HTB_DIRECT || !cl) {
755 /* enqueue to helper queue */ 656 /* enqueue to helper queue */
756 if (q->direct_queue.qlen < q->direct_qlen && cl) { 657 if (q->direct_queue.qlen < q->direct_qlen && cl) {
757 __skb_queue_head(&q->direct_queue, skb); 658 __skb_queue_head(&q->direct_queue, skb);
758 } else { 659 } else {
759 __skb_queue_head(&q->direct_queue, skb); 660 __skb_queue_head(&q->direct_queue, skb);
760 tskb = __skb_dequeue_tail(&q->direct_queue); 661 tskb = __skb_dequeue_tail(&q->direct_queue);
761 kfree_skb (tskb); 662 kfree_skb(tskb);
762 sch->qstats.drops++; 663 sch->qstats.drops++;
763 return NET_XMIT_CN; 664 return NET_XMIT_CN;
764 } 665 }
765 } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) { 666 } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) !=
766 sch->qstats.drops++; 667 NET_XMIT_SUCCESS) {
767 cl->qstats.drops++; 668 sch->qstats.drops++;
768 return NET_XMIT_DROP; 669 cl->qstats.drops++;
769 } else 670 return NET_XMIT_DROP;
770 htb_activate (q,cl); 671 } else
771 672 htb_activate(q, cl);
772 sch->q.qlen++; 673
773 sch->qstats.requeues++; 674 sch->q.qlen++;
774 HTB_DBG(1,1,"htb_req_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb); 675 sch->qstats.requeues++;
775 return NET_XMIT_SUCCESS; 676 return NET_XMIT_SUCCESS;
776} 677}
777 678
778static void htb_timer(unsigned long arg) 679static void htb_timer(unsigned long arg)
779{ 680{
780 struct Qdisc *sch = (struct Qdisc*)arg; 681 struct Qdisc *sch = (struct Qdisc *)arg;
781 sch->flags &= ~TCQ_F_THROTTLED; 682 sch->flags &= ~TCQ_F_THROTTLED;
782 wmb(); 683 wmb();
783 netif_schedule(sch->dev); 684 netif_schedule(sch->dev);
784} 685}
785 686
786#ifdef HTB_RATECM 687#ifdef HTB_RATECM
787#define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0 688#define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0
788static void htb_rate_timer(unsigned long arg) 689static void htb_rate_timer(unsigned long arg)
789{ 690{
790 struct Qdisc *sch = (struct Qdisc*)arg; 691 struct Qdisc *sch = (struct Qdisc *)arg;
791 struct htb_sched *q = qdisc_priv(sch); 692 struct htb_sched *q = qdisc_priv(sch);
792 struct list_head *p; 693 struct hlist_node *p;
694 struct htb_class *cl;
695
793 696
794 /* lock queue so that we can muck with it */ 697 /* lock queue so that we can muck with it */
795 HTB_QLOCK(sch); 698 spin_lock_bh(&sch->dev->queue_lock);
796 HTB_DBG(10,1,"htb_rttmr j=%ld\n",jiffies);
797 699
798 q->rttim.expires = jiffies + HZ; 700 q->rttim.expires = jiffies + HZ;
799 add_timer(&q->rttim); 701 add_timer(&q->rttim);
800 702
801 /* scan and recompute one bucket at time */ 703 /* scan and recompute one bucket at time */
802 if (++q->recmp_bucket >= HTB_HSIZE) 704 if (++q->recmp_bucket >= HTB_HSIZE)
803 q->recmp_bucket = 0; 705 q->recmp_bucket = 0;
804 list_for_each (p,q->hash+q->recmp_bucket) { 706
805 struct htb_class *cl = list_entry(p,struct htb_class,hlist); 707 hlist_for_each_entry(cl,p, q->hash + q->recmp_bucket, hlist) {
806 HTB_DBG(10,2,"htb_rttmr_cl cl=%X sbyte=%lu spkt=%lu\n", 708 RT_GEN(cl->sum_bytes, cl->rate_bytes);
807 cl->classid,cl->sum_bytes,cl->sum_packets); 709 RT_GEN(cl->sum_packets, cl->rate_packets);
808 RT_GEN (cl->sum_bytes,cl->rate_bytes);
809 RT_GEN (cl->sum_packets,cl->rate_packets);
810 } 710 }
811 HTB_QUNLOCK(sch); 711 spin_unlock_bh(&sch->dev->queue_lock);
812} 712}
813#endif 713#endif
814 714
@@ -823,12 +723,11 @@ static void htb_rate_timer(unsigned long arg)
823 * CAN_SEND) because we can use more precise clock that event queue here. 723 * CAN_SEND) because we can use more precise clock that event queue here.
824 * In such case we remove class from event queue first. 724 * In such case we remove class from event queue first.
825 */ 725 */
826static void htb_charge_class(struct htb_sched *q,struct htb_class *cl, 726static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
827 int level,int bytes) 727 int level, int bytes)
828{ 728{
829 long toks,diff; 729 long toks, diff;
830 enum htb_cmode old_mode; 730 enum htb_cmode old_mode;
831 HTB_DBG(5,1,"htb_chrg_cl cl=%X lev=%d len=%d\n",cl->classid,level,bytes);
832 731
833#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \ 732#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
834 if (toks > cl->B) toks = cl->B; \ 733 if (toks > cl->B) toks = cl->B; \
@@ -837,47 +736,31 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
837 cl->T = toks 736 cl->T = toks
838 737
839 while (cl) { 738 while (cl) {
840 HTB_CHCL(cl); 739 diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
841 diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer);
842#ifdef HTB_DEBUG
843 if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) {
844 if (net_ratelimit())
845 printk(KERN_ERR "HTB: bad diff in charge, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n",
846 cl->classid, diff,
847#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
848 q->now.tv_sec * 1000000ULL + q->now.tv_usec,
849 cl->t_c.tv_sec * 1000000ULL + cl->t_c.tv_usec,
850#else
851 (unsigned long long) q->now,
852 (unsigned long long) cl->t_c,
853#endif
854 q->jiffies);
855 diff = 1000;
856 }
857#endif
858 if (cl->level >= level) { 740 if (cl->level >= level) {
859 if (cl->level == level) cl->xstats.lends++; 741 if (cl->level == level)
860 HTB_ACCNT (tokens,buffer,rate); 742 cl->xstats.lends++;
743 HTB_ACCNT(tokens, buffer, rate);
861 } else { 744 } else {
862 cl->xstats.borrows++; 745 cl->xstats.borrows++;
863 cl->tokens += diff; /* we moved t_c; update tokens */ 746 cl->tokens += diff; /* we moved t_c; update tokens */
864 } 747 }
865 HTB_ACCNT (ctokens,cbuffer,ceil); 748 HTB_ACCNT(ctokens, cbuffer, ceil);
866 cl->t_c = q->now; 749 cl->t_c = q->now;
867 HTB_DBG(5,2,"htb_chrg_clp cl=%X diff=%ld tok=%ld ctok=%ld\n",cl->classid,diff,cl->tokens,cl->ctokens);
868 750
869 old_mode = cl->cmode; diff = 0; 751 old_mode = cl->cmode;
870 htb_change_class_mode(q,cl,&diff); 752 diff = 0;
753 htb_change_class_mode(q, cl, &diff);
871 if (old_mode != cl->cmode) { 754 if (old_mode != cl->cmode) {
872 if (old_mode != HTB_CAN_SEND) 755 if (old_mode != HTB_CAN_SEND)
873 htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level); 756 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
874 if (cl->cmode != HTB_CAN_SEND) 757 if (cl->cmode != HTB_CAN_SEND)
875 htb_add_to_wait_tree (q,cl,diff,1); 758 htb_add_to_wait_tree(q, cl, diff);
876 } 759 }
877
878#ifdef HTB_RATECM 760#ifdef HTB_RATECM
879 /* update rate counters */ 761 /* update rate counters */
880 cl->sum_bytes += bytes; cl->sum_packets++; 762 cl->sum_bytes += bytes;
763 cl->sum_packets++;
881#endif 764#endif
882 765
883 /* update byte stats except for leaves which are already updated */ 766 /* update byte stats except for leaves which are already updated */
@@ -896,60 +779,46 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
896 * next pending event (0 for no event in pq). 779 * next pending event (0 for no event in pq).
897 * Note: Aplied are events whose have cl->pq_key <= jiffies. 780 * Note: Aplied are events whose have cl->pq_key <= jiffies.
898 */ 781 */
899static long htb_do_events(struct htb_sched *q,int level) 782static long htb_do_events(struct htb_sched *q, int level)
900{ 783{
901 int i; 784 int i;
902 HTB_DBG(8,1,"htb_do_events l=%d root=%p rmask=%X\n", 785
903 level,q->wait_pq[level].rb_node,q->row_mask[level]);
904 for (i = 0; i < 500; i++) { 786 for (i = 0; i < 500; i++) {
905 struct htb_class *cl; 787 struct htb_class *cl;
906 long diff; 788 long diff;
907 struct rb_node *p = q->wait_pq[level].rb_node; 789 struct rb_node *p = q->wait_pq[level].rb_node;
908 if (!p) return 0; 790 if (!p)
909 while (p->rb_left) p = p->rb_left; 791 return 0;
792 while (p->rb_left)
793 p = p->rb_left;
910 794
911 cl = rb_entry(p, struct htb_class, pq_node); 795 cl = rb_entry(p, struct htb_class, pq_node);
912 if (time_after(cl->pq_key, q->jiffies)) { 796 if (time_after(cl->pq_key, q->jiffies)) {
913 HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - q->jiffies);
914 return cl->pq_key - q->jiffies; 797 return cl->pq_key - q->jiffies;
915 } 798 }
916 htb_safe_rb_erase(p,q->wait_pq+level); 799 htb_safe_rb_erase(p, q->wait_pq + level);
917 diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer); 800 diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
918#ifdef HTB_DEBUG 801 htb_change_class_mode(q, cl, &diff);
919 if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) {
920 if (net_ratelimit())
921 printk(KERN_ERR "HTB: bad diff in events, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n",
922 cl->classid, diff,
923#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
924 q->now.tv_sec * 1000000ULL + q->now.tv_usec,
925 cl->t_c.tv_sec * 1000000ULL + cl->t_c.tv_usec,
926#else
927 (unsigned long long) q->now,
928 (unsigned long long) cl->t_c,
929#endif
930 q->jiffies);
931 diff = 1000;
932 }
933#endif
934 htb_change_class_mode(q,cl,&diff);
935 if (cl->cmode != HTB_CAN_SEND) 802 if (cl->cmode != HTB_CAN_SEND)
936 htb_add_to_wait_tree (q,cl,diff,2); 803 htb_add_to_wait_tree(q, cl, diff);
937 } 804 }
938 if (net_ratelimit()) 805 if (net_ratelimit())
939 printk(KERN_WARNING "htb: too many events !\n"); 806 printk(KERN_WARNING "htb: too many events !\n");
940 return HZ/10; 807 return HZ / 10;
941} 808}
942 809
943/* Returns class->node+prio from id-tree where classe's id is >= id. NULL 810/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
944 is no such one exists. */ 811 is no such one exists. */
945static struct rb_node * 812static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
946htb_id_find_next_upper(int prio,struct rb_node *n,u32 id) 813 u32 id)
947{ 814{
948 struct rb_node *r = NULL; 815 struct rb_node *r = NULL;
949 while (n) { 816 while (n) {
950 struct htb_class *cl = rb_entry(n,struct htb_class,node[prio]); 817 struct htb_class *cl =
951 if (id == cl->classid) return n; 818 rb_entry(n, struct htb_class, node[prio]);
952 819 if (id == cl->classid)
820 return n;
821
953 if (id > cl->classid) { 822 if (id > cl->classid) {
954 n = n->rb_right; 823 n = n->rb_right;
955 } else { 824 } else {
@@ -965,49 +834,49 @@ htb_id_find_next_upper(int prio,struct rb_node *n,u32 id)
965 * 834 *
966 * Find leaf where current feed pointers points to. 835 * Find leaf where current feed pointers points to.
967 */ 836 */
968static struct htb_class * 837static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
969htb_lookup_leaf(HTB_ARGQ struct rb_root *tree,int prio,struct rb_node **pptr,u32 *pid) 838 struct rb_node **pptr, u32 * pid)
970{ 839{
971 int i; 840 int i;
972 struct { 841 struct {
973 struct rb_node *root; 842 struct rb_node *root;
974 struct rb_node **pptr; 843 struct rb_node **pptr;
975 u32 *pid; 844 u32 *pid;
976 } stk[TC_HTB_MAXDEPTH],*sp = stk; 845 } stk[TC_HTB_MAXDEPTH], *sp = stk;
977 846
978 BUG_TRAP(tree->rb_node); 847 BUG_TRAP(tree->rb_node);
979 sp->root = tree->rb_node; 848 sp->root = tree->rb_node;
980 sp->pptr = pptr; 849 sp->pptr = pptr;
981 sp->pid = pid; 850 sp->pid = pid;
982 851
983 for (i = 0; i < 65535; i++) { 852 for (i = 0; i < 65535; i++) {
984 HTB_DBG(4,2,"htb_lleaf ptr=%p pid=%X\n",*sp->pptr,*sp->pid); 853 if (!*sp->pptr && *sp->pid) {
985
986 if (!*sp->pptr && *sp->pid) {
987 /* ptr was invalidated but id is valid - try to recover 854 /* ptr was invalidated but id is valid - try to recover
988 the original or next ptr */ 855 the original or next ptr */
989 *sp->pptr = htb_id_find_next_upper(prio,sp->root,*sp->pid); 856 *sp->pptr =
857 htb_id_find_next_upper(prio, sp->root, *sp->pid);
990 } 858 }
991 *sp->pid = 0; /* ptr is valid now so that remove this hint as it 859 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
992 can become out of date quickly */ 860 can become out of date quickly */
993 if (!*sp->pptr) { /* we are at right end; rewind & go up */ 861 if (!*sp->pptr) { /* we are at right end; rewind & go up */
994 *sp->pptr = sp->root; 862 *sp->pptr = sp->root;
995 while ((*sp->pptr)->rb_left) 863 while ((*sp->pptr)->rb_left)
996 *sp->pptr = (*sp->pptr)->rb_left; 864 *sp->pptr = (*sp->pptr)->rb_left;
997 if (sp > stk) { 865 if (sp > stk) {
998 sp--; 866 sp--;
999 BUG_TRAP(*sp->pptr); if(!*sp->pptr) return NULL; 867 BUG_TRAP(*sp->pptr);
1000 htb_next_rb_node (sp->pptr); 868 if (!*sp->pptr)
869 return NULL;
870 htb_next_rb_node(sp->pptr);
1001 } 871 }
1002 } else { 872 } else {
1003 struct htb_class *cl; 873 struct htb_class *cl;
1004 cl = rb_entry(*sp->pptr,struct htb_class,node[prio]); 874 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
1005 HTB_CHCL(cl); 875 if (!cl->level)
1006 if (!cl->level)
1007 return cl; 876 return cl;
1008 (++sp)->root = cl->un.inner.feed[prio].rb_node; 877 (++sp)->root = cl->un.inner.feed[prio].rb_node;
1009 sp->pptr = cl->un.inner.ptr+prio; 878 sp->pptr = cl->un.inner.ptr + prio;
1010 sp->pid = cl->un.inner.last_ptr_id+prio; 879 sp->pid = cl->un.inner.last_ptr_id + prio;
1011 } 880 }
1012 } 881 }
1013 BUG_TRAP(0); 882 BUG_TRAP(0);
@@ -1016,21 +885,21 @@ htb_lookup_leaf(HTB_ARGQ struct rb_root *tree,int prio,struct rb_node **pptr,u32
1016 885
1017/* dequeues packet at given priority and level; call only if 886/* dequeues packet at given priority and level; call only if
1018 you are sure that there is active class at prio/level */ 887 you are sure that there is active class at prio/level */
1019static struct sk_buff * 888static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
1020htb_dequeue_tree(struct htb_sched *q,int prio,int level) 889 int level)
1021{ 890{
1022 struct sk_buff *skb = NULL; 891 struct sk_buff *skb = NULL;
1023 struct htb_class *cl,*start; 892 struct htb_class *cl, *start;
1024 /* look initial class up in the row */ 893 /* look initial class up in the row */
1025 start = cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio, 894 start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
1026 q->ptr[level]+prio,q->last_ptr_id[level]+prio); 895 q->ptr[level] + prio,
1027 896 q->last_ptr_id[level] + prio);
897
1028 do { 898 do {
1029next: 899next:
1030 BUG_TRAP(cl); 900 BUG_TRAP(cl);
1031 if (!cl) return NULL; 901 if (!cl)
1032 HTB_DBG(4,1,"htb_deq_tr prio=%d lev=%d cl=%X defic=%d\n", 902 return NULL;
1033 prio,level,cl->classid,cl->un.leaf.deficit[level]);
1034 903
1035 /* class can be empty - it is unlikely but can be true if leaf 904 /* class can be empty - it is unlikely but can be true if leaf
1036 qdisc drops packets in enqueue routine or if someone used 905 qdisc drops packets in enqueue routine or if someone used
@@ -1038,64 +907,69 @@ next:
1038 simply deactivate and skip such class */ 907 simply deactivate and skip such class */
1039 if (unlikely(cl->un.leaf.q->q.qlen == 0)) { 908 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
1040 struct htb_class *next; 909 struct htb_class *next;
1041 htb_deactivate(q,cl); 910 htb_deactivate(q, cl);
1042 911
1043 /* row/level might become empty */ 912 /* row/level might become empty */
1044 if ((q->row_mask[level] & (1 << prio)) == 0) 913 if ((q->row_mask[level] & (1 << prio)) == 0)
1045 return NULL; 914 return NULL;
1046 915
1047 next = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio, 916 next = htb_lookup_leaf(q->row[level] + prio,
1048 prio,q->ptr[level]+prio,q->last_ptr_id[level]+prio); 917 prio, q->ptr[level] + prio,
918 q->last_ptr_id[level] + prio);
1049 919
1050 if (cl == start) /* fix start if we just deleted it */ 920 if (cl == start) /* fix start if we just deleted it */
1051 start = next; 921 start = next;
1052 cl = next; 922 cl = next;
1053 goto next; 923 goto next;
1054 } 924 }
1055 925
1056 if (likely((skb = cl->un.leaf.q->dequeue(cl->un.leaf.q)) != NULL)) 926 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
927 if (likely(skb != NULL))
1057 break; 928 break;
1058 if (!cl->warned) { 929 if (!cl->warned) {
1059 printk(KERN_WARNING "htb: class %X isn't work conserving ?!\n",cl->classid); 930 printk(KERN_WARNING
931 "htb: class %X isn't work conserving ?!\n",
932 cl->classid);
1060 cl->warned = 1; 933 cl->warned = 1;
1061 } 934 }
1062 q->nwc_hit++; 935 q->nwc_hit++;
1063 htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio); 936 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
1064 cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio,q->ptr[level]+prio, 937 ptr[0]) + prio);
1065 q->last_ptr_id[level]+prio); 938 cl = htb_lookup_leaf(q->row[level] + prio, prio,
939 q->ptr[level] + prio,
940 q->last_ptr_id[level] + prio);
1066 941
1067 } while (cl != start); 942 } while (cl != start);
1068 943
1069 if (likely(skb != NULL)) { 944 if (likely(skb != NULL)) {
1070 if ((cl->un.leaf.deficit[level] -= skb->len) < 0) { 945 if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
1071 HTB_DBG(4,2,"htb_next_cl oldptr=%p quant_add=%d\n",
1072 level?cl->parent->un.inner.ptr[prio]:q->ptr[0][prio],cl->un.leaf.quantum);
1073 cl->un.leaf.deficit[level] += cl->un.leaf.quantum; 946 cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
1074 htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio); 947 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
948 ptr[0]) + prio);
1075 } 949 }
1076 /* this used to be after charge_class but this constelation 950 /* this used to be after charge_class but this constelation
1077 gives us slightly better performance */ 951 gives us slightly better performance */
1078 if (!cl->un.leaf.q->q.qlen) 952 if (!cl->un.leaf.q->q.qlen)
1079 htb_deactivate (q,cl); 953 htb_deactivate(q, cl);
1080 htb_charge_class (q,cl,level,skb->len); 954 htb_charge_class(q, cl, level, skb->len);
1081 } 955 }
1082 return skb; 956 return skb;
1083} 957}
1084 958
1085static void htb_delay_by(struct Qdisc *sch,long delay) 959static void htb_delay_by(struct Qdisc *sch, long delay)
1086{ 960{
1087 struct htb_sched *q = qdisc_priv(sch); 961 struct htb_sched *q = qdisc_priv(sch);
1088 if (delay <= 0) delay = 1; 962 if (delay <= 0)
1089 if (unlikely(delay > 5*HZ)) { 963 delay = 1;
964 if (unlikely(delay > 5 * HZ)) {
1090 if (net_ratelimit()) 965 if (net_ratelimit())
1091 printk(KERN_INFO "HTB delay %ld > 5sec\n", delay); 966 printk(KERN_INFO "HTB delay %ld > 5sec\n", delay);
1092 delay = 5*HZ; 967 delay = 5 * HZ;
1093 } 968 }
1094 /* why don't use jiffies here ? because expires can be in past */ 969 /* why don't use jiffies here ? because expires can be in past */
1095 mod_timer(&q->timer, q->jiffies + delay); 970 mod_timer(&q->timer, q->jiffies + delay);
1096 sch->flags |= TCQ_F_THROTTLED; 971 sch->flags |= TCQ_F_THROTTLED;
1097 sch->qstats.overlimits++; 972 sch->qstats.overlimits++;
1098 HTB_DBG(3,1,"htb_deq t_delay=%ld\n",delay);
1099} 973}
1100 974
1101static struct sk_buff *htb_dequeue(struct Qdisc *sch) 975static struct sk_buff *htb_dequeue(struct Qdisc *sch)
@@ -1104,22 +978,19 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
1104 struct htb_sched *q = qdisc_priv(sch); 978 struct htb_sched *q = qdisc_priv(sch);
1105 int level; 979 int level;
1106 long min_delay; 980 long min_delay;
1107#ifdef HTB_DEBUG
1108 int evs_used = 0;
1109#endif
1110 981
1111 q->jiffies = jiffies; 982 q->jiffies = jiffies;
1112 HTB_DBG(3,1,"htb_deq dircnt=%d qlen=%d\n",skb_queue_len(&q->direct_queue),
1113 sch->q.qlen);
1114 983
1115 /* try to dequeue direct packets as high prio (!) to minimize cpu work */ 984 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
1116 if ((skb = __skb_dequeue(&q->direct_queue)) != NULL) { 985 skb = __skb_dequeue(&q->direct_queue);
986 if (skb != NULL) {
1117 sch->flags &= ~TCQ_F_THROTTLED; 987 sch->flags &= ~TCQ_F_THROTTLED;
1118 sch->q.qlen--; 988 sch->q.qlen--;
1119 return skb; 989 return skb;
1120 } 990 }
1121 991
1122 if (!sch->q.qlen) goto fin; 992 if (!sch->q.qlen)
993 goto fin;
1123 PSCHED_GET_TIME(q->now); 994 PSCHED_GET_TIME(q->now);
1124 995
1125 min_delay = LONG_MAX; 996 min_delay = LONG_MAX;
@@ -1129,21 +1000,19 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
1129 int m; 1000 int m;
1130 long delay; 1001 long delay;
1131 if (time_after_eq(q->jiffies, q->near_ev_cache[level])) { 1002 if (time_after_eq(q->jiffies, q->near_ev_cache[level])) {
1132 delay = htb_do_events(q,level); 1003 delay = htb_do_events(q, level);
1133 q->near_ev_cache[level] = q->jiffies + (delay ? delay : HZ); 1004 q->near_ev_cache[level] =
1134#ifdef HTB_DEBUG 1005 q->jiffies + (delay ? delay : HZ);
1135 evs_used++;
1136#endif
1137 } else 1006 } else
1138 delay = q->near_ev_cache[level] - q->jiffies; 1007 delay = q->near_ev_cache[level] - q->jiffies;
1139 1008
1140 if (delay && min_delay > delay) 1009 if (delay && min_delay > delay)
1141 min_delay = delay; 1010 min_delay = delay;
1142 m = ~q->row_mask[level]; 1011 m = ~q->row_mask[level];
1143 while (m != (int)(-1)) { 1012 while (m != (int)(-1)) {
1144 int prio = ffz (m); 1013 int prio = ffz(m);
1145 m |= 1 << prio; 1014 m |= 1 << prio;
1146 skb = htb_dequeue_tree(q,prio,level); 1015 skb = htb_dequeue_tree(q, prio, level);
1147 if (likely(skb != NULL)) { 1016 if (likely(skb != NULL)) {
1148 sch->q.qlen--; 1017 sch->q.qlen--;
1149 sch->flags &= ~TCQ_F_THROTTLED; 1018 sch->flags &= ~TCQ_F_THROTTLED;
@@ -1151,40 +1020,28 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
1151 } 1020 }
1152 } 1021 }
1153 } 1022 }
1154#ifdef HTB_DEBUG 1023 htb_delay_by(sch, min_delay > 5 * HZ ? 5 * HZ : min_delay);
1155 if (!q->nwc_hit && min_delay >= 10*HZ && net_ratelimit()) {
1156 if (min_delay == LONG_MAX) {
1157 printk(KERN_ERR "HTB: dequeue bug (%d,%lu,%lu), report it please !\n",
1158 evs_used,q->jiffies,jiffies);
1159 htb_debug_dump(q);
1160 } else
1161 printk(KERN_WARNING "HTB: mindelay=%ld, some class has "
1162 "too small rate\n",min_delay);
1163 }
1164#endif
1165 htb_delay_by (sch,min_delay > 5*HZ ? 5*HZ : min_delay);
1166fin: 1024fin:
1167 HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,q->jiffies,skb);
1168 return skb; 1025 return skb;
1169} 1026}
1170 1027
1171/* try to drop from each class (by prio) until one succeed */ 1028/* try to drop from each class (by prio) until one succeed */
1172static unsigned int htb_drop(struct Qdisc* sch) 1029static unsigned int htb_drop(struct Qdisc *sch)
1173{ 1030{
1174 struct htb_sched *q = qdisc_priv(sch); 1031 struct htb_sched *q = qdisc_priv(sch);
1175 int prio; 1032 int prio;
1176 1033
1177 for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) { 1034 for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
1178 struct list_head *p; 1035 struct list_head *p;
1179 list_for_each (p,q->drops+prio) { 1036 list_for_each(p, q->drops + prio) {
1180 struct htb_class *cl = list_entry(p, struct htb_class, 1037 struct htb_class *cl = list_entry(p, struct htb_class,
1181 un.leaf.drop_list); 1038 un.leaf.drop_list);
1182 unsigned int len; 1039 unsigned int len;
1183 if (cl->un.leaf.q->ops->drop && 1040 if (cl->un.leaf.q->ops->drop &&
1184 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) { 1041 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
1185 sch->q.qlen--; 1042 sch->q.qlen--;
1186 if (!cl->un.leaf.q->q.qlen) 1043 if (!cl->un.leaf.q->q.qlen)
1187 htb_deactivate (q,cl); 1044 htb_deactivate(q, cl);
1188 return len; 1045 return len;
1189 } 1046 }
1190 } 1047 }
@@ -1194,29 +1051,25 @@ static unsigned int htb_drop(struct Qdisc* sch)
1194 1051
1195/* reset all classes */ 1052/* reset all classes */
1196/* always caled under BH & queue lock */ 1053/* always caled under BH & queue lock */
1197static void htb_reset(struct Qdisc* sch) 1054static void htb_reset(struct Qdisc *sch)
1198{ 1055{
1199 struct htb_sched *q = qdisc_priv(sch); 1056 struct htb_sched *q = qdisc_priv(sch);
1200 int i; 1057 int i;
1201 HTB_DBG(0,1,"htb_reset sch=%p, handle=%X\n",sch,sch->handle);
1202 1058
1203 for (i = 0; i < HTB_HSIZE; i++) { 1059 for (i = 0; i < HTB_HSIZE; i++) {
1204 struct list_head *p; 1060 struct hlist_node *p;
1205 list_for_each (p,q->hash+i) { 1061 struct htb_class *cl;
1206 struct htb_class *cl = list_entry(p,struct htb_class,hlist); 1062
1063 hlist_for_each_entry(cl, p, q->hash + i, hlist) {
1207 if (cl->level) 1064 if (cl->level)
1208 memset(&cl->un.inner,0,sizeof(cl->un.inner)); 1065 memset(&cl->un.inner, 0, sizeof(cl->un.inner));
1209 else { 1066 else {
1210 if (cl->un.leaf.q) 1067 if (cl->un.leaf.q)
1211 qdisc_reset(cl->un.leaf.q); 1068 qdisc_reset(cl->un.leaf.q);
1212 INIT_LIST_HEAD(&cl->un.leaf.drop_list); 1069 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1213 } 1070 }
1214 cl->prio_activity = 0; 1071 cl->prio_activity = 0;
1215 cl->cmode = HTB_CAN_SEND; 1072 cl->cmode = HTB_CAN_SEND;
1216#ifdef HTB_DEBUG
1217 cl->pq_node.rb_color = -1;
1218 memset(cl->node,255,sizeof(cl->node));
1219#endif
1220 1073
1221 } 1074 }
1222 } 1075 }
@@ -1224,12 +1077,12 @@ static void htb_reset(struct Qdisc* sch)
1224 del_timer(&q->timer); 1077 del_timer(&q->timer);
1225 __skb_queue_purge(&q->direct_queue); 1078 __skb_queue_purge(&q->direct_queue);
1226 sch->q.qlen = 0; 1079 sch->q.qlen = 0;
1227 memset(q->row,0,sizeof(q->row)); 1080 memset(q->row, 0, sizeof(q->row));
1228 memset(q->row_mask,0,sizeof(q->row_mask)); 1081 memset(q->row_mask, 0, sizeof(q->row_mask));
1229 memset(q->wait_pq,0,sizeof(q->wait_pq)); 1082 memset(q->wait_pq, 0, sizeof(q->wait_pq));
1230 memset(q->ptr,0,sizeof(q->ptr)); 1083 memset(q->ptr, 0, sizeof(q->ptr));
1231 for (i = 0; i < TC_HTB_NUMPRIO; i++) 1084 for (i = 0; i < TC_HTB_NUMPRIO; i++)
1232 INIT_LIST_HEAD(q->drops+i); 1085 INIT_LIST_HEAD(q->drops + i);
1233} 1086}
1234 1087
1235static int htb_init(struct Qdisc *sch, struct rtattr *opt) 1088static int htb_init(struct Qdisc *sch, struct rtattr *opt)
@@ -1238,36 +1091,31 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
1238 struct rtattr *tb[TCA_HTB_INIT]; 1091 struct rtattr *tb[TCA_HTB_INIT];
1239 struct tc_htb_glob *gopt; 1092 struct tc_htb_glob *gopt;
1240 int i; 1093 int i;
1241#ifdef HTB_DEBUG
1242 printk(KERN_INFO "HTB init, kernel part version %d.%d\n",
1243 HTB_VER >> 16,HTB_VER & 0xffff);
1244#endif
1245 if (!opt || rtattr_parse_nested(tb, TCA_HTB_INIT, opt) || 1094 if (!opt || rtattr_parse_nested(tb, TCA_HTB_INIT, opt) ||
1246 tb[TCA_HTB_INIT-1] == NULL || 1095 tb[TCA_HTB_INIT - 1] == NULL ||
1247 RTA_PAYLOAD(tb[TCA_HTB_INIT-1]) < sizeof(*gopt)) { 1096 RTA_PAYLOAD(tb[TCA_HTB_INIT - 1]) < sizeof(*gopt)) {
1248 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n"); 1097 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
1249 return -EINVAL; 1098 return -EINVAL;
1250 } 1099 }
1251 gopt = RTA_DATA(tb[TCA_HTB_INIT-1]); 1100 gopt = RTA_DATA(tb[TCA_HTB_INIT - 1]);
1252 if (gopt->version != HTB_VER >> 16) { 1101 if (gopt->version != HTB_VER >> 16) {
1253 printk(KERN_ERR "HTB: need tc/htb version %d (minor is %d), you have %d\n", 1102 printk(KERN_ERR
1254 HTB_VER >> 16,HTB_VER & 0xffff,gopt->version); 1103 "HTB: need tc/htb version %d (minor is %d), you have %d\n",
1104 HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
1255 return -EINVAL; 1105 return -EINVAL;
1256 } 1106 }
1257 q->debug = gopt->debug;
1258 HTB_DBG(0,1,"htb_init sch=%p handle=%X r2q=%d\n",sch,sch->handle,gopt->rate2quantum);
1259 1107
1260 INIT_LIST_HEAD(&q->root); 1108 INIT_LIST_HEAD(&q->root);
1261 for (i = 0; i < HTB_HSIZE; i++) 1109 for (i = 0; i < HTB_HSIZE; i++)
1262 INIT_LIST_HEAD(q->hash+i); 1110 INIT_HLIST_HEAD(q->hash + i);
1263 for (i = 0; i < TC_HTB_NUMPRIO; i++) 1111 for (i = 0; i < TC_HTB_NUMPRIO; i++)
1264 INIT_LIST_HEAD(q->drops+i); 1112 INIT_LIST_HEAD(q->drops + i);
1265 1113
1266 init_timer(&q->timer); 1114 init_timer(&q->timer);
1267 skb_queue_head_init(&q->direct_queue); 1115 skb_queue_head_init(&q->direct_queue);
1268 1116
1269 q->direct_qlen = sch->dev->tx_queue_len; 1117 q->direct_qlen = sch->dev->tx_queue_len;
1270 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ 1118 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1271 q->direct_qlen = 2; 1119 q->direct_qlen = 2;
1272 q->timer.function = htb_timer; 1120 q->timer.function = htb_timer;
1273 q->timer.data = (unsigned long)sch; 1121 q->timer.data = (unsigned long)sch;
@@ -1289,80 +1137,72 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
1289static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) 1137static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1290{ 1138{
1291 struct htb_sched *q = qdisc_priv(sch); 1139 struct htb_sched *q = qdisc_priv(sch);
1292 unsigned char *b = skb->tail; 1140 unsigned char *b = skb->tail;
1293 struct rtattr *rta; 1141 struct rtattr *rta;
1294 struct tc_htb_glob gopt; 1142 struct tc_htb_glob gopt;
1295 HTB_DBG(0,1,"htb_dump sch=%p, handle=%X\n",sch,sch->handle); 1143 spin_lock_bh(&sch->dev->queue_lock);
1296 HTB_QLOCK(sch);
1297 gopt.direct_pkts = q->direct_pkts; 1144 gopt.direct_pkts = q->direct_pkts;
1298 1145
1299#ifdef HTB_DEBUG
1300 if (HTB_DBG_COND(0,2))
1301 htb_debug_dump(q);
1302#endif
1303 gopt.version = HTB_VER; 1146 gopt.version = HTB_VER;
1304 gopt.rate2quantum = q->rate2quantum; 1147 gopt.rate2quantum = q->rate2quantum;
1305 gopt.defcls = q->defcls; 1148 gopt.defcls = q->defcls;
1306 gopt.debug = q->debug; 1149 gopt.debug = 0;
1307 rta = (struct rtattr*)b; 1150 rta = (struct rtattr *)b;
1308 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 1151 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1309 RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt); 1152 RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
1310 rta->rta_len = skb->tail - b; 1153 rta->rta_len = skb->tail - b;
1311 HTB_QUNLOCK(sch); 1154 spin_unlock_bh(&sch->dev->queue_lock);
1312 return skb->len; 1155 return skb->len;
1313rtattr_failure: 1156rtattr_failure:
1314 HTB_QUNLOCK(sch); 1157 spin_unlock_bh(&sch->dev->queue_lock);
1315 skb_trim(skb, skb->tail - skb->data); 1158 skb_trim(skb, skb->tail - skb->data);
1316 return -1; 1159 return -1;
1317} 1160}
1318 1161
1319static int htb_dump_class(struct Qdisc *sch, unsigned long arg, 1162static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1320 struct sk_buff *skb, struct tcmsg *tcm) 1163 struct sk_buff *skb, struct tcmsg *tcm)
1321{ 1164{
1322#ifdef HTB_DEBUG 1165 struct htb_class *cl = (struct htb_class *)arg;
1323 struct htb_sched *q = qdisc_priv(sch); 1166 unsigned char *b = skb->tail;
1324#endif
1325 struct htb_class *cl = (struct htb_class*)arg;
1326 unsigned char *b = skb->tail;
1327 struct rtattr *rta; 1167 struct rtattr *rta;
1328 struct tc_htb_opt opt; 1168 struct tc_htb_opt opt;
1329 1169
1330 HTB_DBG(0,1,"htb_dump_class handle=%X clid=%X\n",sch->handle,cl->classid); 1170 spin_lock_bh(&sch->dev->queue_lock);
1331
1332 HTB_QLOCK(sch);
1333 tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT; 1171 tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT;
1334 tcm->tcm_handle = cl->classid; 1172 tcm->tcm_handle = cl->classid;
1335 if (!cl->level && cl->un.leaf.q) 1173 if (!cl->level && cl->un.leaf.q)
1336 tcm->tcm_info = cl->un.leaf.q->handle; 1174 tcm->tcm_info = cl->un.leaf.q->handle;
1337 1175
1338 rta = (struct rtattr*)b; 1176 rta = (struct rtattr *)b;
1339 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 1177 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1340 1178
1341 memset (&opt,0,sizeof(opt)); 1179 memset(&opt, 0, sizeof(opt));
1342 1180
1343 opt.rate = cl->rate->rate; opt.buffer = cl->buffer; 1181 opt.rate = cl->rate->rate;
1344 opt.ceil = cl->ceil->rate; opt.cbuffer = cl->cbuffer; 1182 opt.buffer = cl->buffer;
1345 opt.quantum = cl->un.leaf.quantum; opt.prio = cl->un.leaf.prio; 1183 opt.ceil = cl->ceil->rate;
1346 opt.level = cl->level; 1184 opt.cbuffer = cl->cbuffer;
1185 opt.quantum = cl->un.leaf.quantum;
1186 opt.prio = cl->un.leaf.prio;
1187 opt.level = cl->level;
1347 RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt); 1188 RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
1348 rta->rta_len = skb->tail - b; 1189 rta->rta_len = skb->tail - b;
1349 HTB_QUNLOCK(sch); 1190 spin_unlock_bh(&sch->dev->queue_lock);
1350 return skb->len; 1191 return skb->len;
1351rtattr_failure: 1192rtattr_failure:
1352 HTB_QUNLOCK(sch); 1193 spin_unlock_bh(&sch->dev->queue_lock);
1353 skb_trim(skb, b - skb->data); 1194 skb_trim(skb, b - skb->data);
1354 return -1; 1195 return -1;
1355} 1196}
1356 1197
1357static int 1198static int
1358htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, 1199htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1359 struct gnet_dump *d)
1360{ 1200{
1361 struct htb_class *cl = (struct htb_class*)arg; 1201 struct htb_class *cl = (struct htb_class *)arg;
1362 1202
1363#ifdef HTB_RATECM 1203#ifdef HTB_RATECM
1364 cl->rate_est.bps = cl->rate_bytes/(HTB_EWMAC*HTB_HSIZE); 1204 cl->rate_est.bps = cl->rate_bytes / (HTB_EWMAC * HTB_HSIZE);
1365 cl->rate_est.pps = cl->rate_packets/(HTB_EWMAC*HTB_HSIZE); 1205 cl->rate_est.pps = cl->rate_packets / (HTB_EWMAC * HTB_HSIZE);
1366#endif 1206#endif
1367 1207
1368 if (!cl->level && cl->un.leaf.q) 1208 if (!cl->level && cl->un.leaf.q)
@@ -1379,21 +1219,22 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1379} 1219}
1380 1220
1381static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 1221static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1382 struct Qdisc **old) 1222 struct Qdisc **old)
1383{ 1223{
1384 struct htb_class *cl = (struct htb_class*)arg; 1224 struct htb_class *cl = (struct htb_class *)arg;
1385 1225
1386 if (cl && !cl->level) { 1226 if (cl && !cl->level) {
1387 if (new == NULL && (new = qdisc_create_dflt(sch->dev, 1227 if (new == NULL && (new = qdisc_create_dflt(sch->dev,
1388 &pfifo_qdisc_ops)) == NULL) 1228 &pfifo_qdisc_ops))
1389 return -ENOBUFS; 1229 == NULL)
1230 return -ENOBUFS;
1390 sch_tree_lock(sch); 1231 sch_tree_lock(sch);
1391 if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) { 1232 if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
1392 if (cl->prio_activity) 1233 if (cl->prio_activity)
1393 htb_deactivate (qdisc_priv(sch),cl); 1234 htb_deactivate(qdisc_priv(sch), cl);
1394 1235
1395 /* TODO: is it correct ? Why CBQ doesn't do it ? */ 1236 /* TODO: is it correct ? Why CBQ doesn't do it ? */
1396 sch->q.qlen -= (*old)->q.qlen; 1237 sch->q.qlen -= (*old)->q.qlen;
1397 qdisc_reset(*old); 1238 qdisc_reset(*old);
1398 } 1239 }
1399 sch_tree_unlock(sch); 1240 sch_tree_unlock(sch);
@@ -1402,20 +1243,16 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1402 return -ENOENT; 1243 return -ENOENT;
1403} 1244}
1404 1245
1405static struct Qdisc * htb_leaf(struct Qdisc *sch, unsigned long arg) 1246static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1406{ 1247{
1407 struct htb_class *cl = (struct htb_class*)arg; 1248 struct htb_class *cl = (struct htb_class *)arg;
1408 return (cl && !cl->level) ? cl->un.leaf.q : NULL; 1249 return (cl && !cl->level) ? cl->un.leaf.q : NULL;
1409} 1250}
1410 1251
1411static unsigned long htb_get(struct Qdisc *sch, u32 classid) 1252static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1412{ 1253{
1413#ifdef HTB_DEBUG 1254 struct htb_class *cl = htb_find(classid, sch);
1414 struct htb_sched *q = qdisc_priv(sch); 1255 if (cl)
1415#endif
1416 struct htb_class *cl = htb_find(classid,sch);
1417 HTB_DBG(0,1,"htb_get clid=%X q=%p cl=%p ref=%d\n",classid,q,cl,cl?cl->refcnt:0);
1418 if (cl)
1419 cl->refcnt++; 1256 cl->refcnt++;
1420 return (unsigned long)cl; 1257 return (unsigned long)cl;
1421} 1258}
@@ -1430,10 +1267,9 @@ static void htb_destroy_filters(struct tcf_proto **fl)
1430 } 1267 }
1431} 1268}
1432 1269
1433static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl) 1270static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1434{ 1271{
1435 struct htb_sched *q = qdisc_priv(sch); 1272 struct htb_sched *q = qdisc_priv(sch);
1436 HTB_DBG(0,1,"htb_destrycls clid=%X ref=%d\n", cl?cl->classid:0,cl?cl->refcnt:0);
1437 if (!cl->level) { 1273 if (!cl->level) {
1438 BUG_TRAP(cl->un.leaf.q); 1274 BUG_TRAP(cl->un.leaf.q);
1439 sch->q.qlen -= cl->un.leaf.q->q.qlen; 1275 sch->q.qlen -= cl->un.leaf.q->q.qlen;
@@ -1441,45 +1277,45 @@ static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl)
1441 } 1277 }
1442 qdisc_put_rtab(cl->rate); 1278 qdisc_put_rtab(cl->rate);
1443 qdisc_put_rtab(cl->ceil); 1279 qdisc_put_rtab(cl->ceil);
1444 1280
1445 htb_destroy_filters (&cl->filter_list); 1281 htb_destroy_filters(&cl->filter_list);
1446 1282
1447 while (!list_empty(&cl->children)) 1283 while (!list_empty(&cl->children))
1448 htb_destroy_class (sch,list_entry(cl->children.next, 1284 htb_destroy_class(sch, list_entry(cl->children.next,
1449 struct htb_class,sibling)); 1285 struct htb_class, sibling));
1450 1286
1451 /* note: this delete may happen twice (see htb_delete) */ 1287 /* note: this delete may happen twice (see htb_delete) */
1452 list_del(&cl->hlist); 1288 if (!hlist_unhashed(&cl->hlist))
1289 hlist_del(&cl->hlist);
1453 list_del(&cl->sibling); 1290 list_del(&cl->sibling);
1454 1291
1455 if (cl->prio_activity) 1292 if (cl->prio_activity)
1456 htb_deactivate (q,cl); 1293 htb_deactivate(q, cl);
1457 1294
1458 if (cl->cmode != HTB_CAN_SEND) 1295 if (cl->cmode != HTB_CAN_SEND)
1459 htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level); 1296 htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1460 1297
1461 kfree(cl); 1298 kfree(cl);
1462} 1299}
1463 1300
1464/* always caled under BH & queue lock */ 1301/* always caled under BH & queue lock */
1465static void htb_destroy(struct Qdisc* sch) 1302static void htb_destroy(struct Qdisc *sch)
1466{ 1303{
1467 struct htb_sched *q = qdisc_priv(sch); 1304 struct htb_sched *q = qdisc_priv(sch);
1468 HTB_DBG(0,1,"htb_destroy q=%p\n",q);
1469 1305
1470 del_timer_sync (&q->timer); 1306 del_timer_sync(&q->timer);
1471#ifdef HTB_RATECM 1307#ifdef HTB_RATECM
1472 del_timer_sync (&q->rttim); 1308 del_timer_sync(&q->rttim);
1473#endif 1309#endif
1474 /* This line used to be after htb_destroy_class call below 1310 /* This line used to be after htb_destroy_class call below
1475 and surprisingly it worked in 2.4. But it must precede it 1311 and surprisingly it worked in 2.4. But it must precede it
1476 because filter need its target class alive to be able to call 1312 because filter need its target class alive to be able to call
1477 unbind_filter on it (without Oops). */ 1313 unbind_filter on it (without Oops). */
1478 htb_destroy_filters(&q->filter_list); 1314 htb_destroy_filters(&q->filter_list);
1479 1315
1480 while (!list_empty(&q->root)) 1316 while (!list_empty(&q->root))
1481 htb_destroy_class (sch,list_entry(q->root.next, 1317 htb_destroy_class(sch, list_entry(q->root.next,
1482 struct htb_class,sibling)); 1318 struct htb_class, sibling));
1483 1319
1484 __skb_queue_purge(&q->direct_queue); 1320 __skb_queue_purge(&q->direct_queue);
1485} 1321}
@@ -1487,24 +1323,25 @@ static void htb_destroy(struct Qdisc* sch)
1487static int htb_delete(struct Qdisc *sch, unsigned long arg) 1323static int htb_delete(struct Qdisc *sch, unsigned long arg)
1488{ 1324{
1489 struct htb_sched *q = qdisc_priv(sch); 1325 struct htb_sched *q = qdisc_priv(sch);
1490 struct htb_class *cl = (struct htb_class*)arg; 1326 struct htb_class *cl = (struct htb_class *)arg;
1491 HTB_DBG(0,1,"htb_delete q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0);
1492 1327
1493 // TODO: why don't allow to delete subtree ? references ? does 1328 // TODO: why don't allow to delete subtree ? references ? does
1494 // tc subsys quarantee us that in htb_destroy it holds no class 1329 // tc subsys quarantee us that in htb_destroy it holds no class
1495 // refs so that we can remove children safely there ? 1330 // refs so that we can remove children safely there ?
1496 if (!list_empty(&cl->children) || cl->filter_cnt) 1331 if (!list_empty(&cl->children) || cl->filter_cnt)
1497 return -EBUSY; 1332 return -EBUSY;
1498 1333
1499 sch_tree_lock(sch); 1334 sch_tree_lock(sch);
1500 1335
1501 /* delete from hash and active; remainder in destroy_class */ 1336 /* delete from hash and active; remainder in destroy_class */
1502 list_del_init(&cl->hlist); 1337 if (!hlist_unhashed(&cl->hlist))
1338 hlist_del(&cl->hlist);
1339
1503 if (cl->prio_activity) 1340 if (cl->prio_activity)
1504 htb_deactivate (q,cl); 1341 htb_deactivate(q, cl);
1505 1342
1506 if (--cl->refcnt == 0) 1343 if (--cl->refcnt == 0)
1507 htb_destroy_class(sch,cl); 1344 htb_destroy_class(sch, cl);
1508 1345
1509 sch_tree_unlock(sch); 1346 sch_tree_unlock(sch);
1510 return 0; 1347 return 0;
@@ -1512,45 +1349,46 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
1512 1349
1513static void htb_put(struct Qdisc *sch, unsigned long arg) 1350static void htb_put(struct Qdisc *sch, unsigned long arg)
1514{ 1351{
1515#ifdef HTB_DEBUG 1352 struct htb_class *cl = (struct htb_class *)arg;
1516 struct htb_sched *q = qdisc_priv(sch);
1517#endif
1518 struct htb_class *cl = (struct htb_class*)arg;
1519 HTB_DBG(0,1,"htb_put q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0);
1520 1353
1521 if (--cl->refcnt == 0) 1354 if (--cl->refcnt == 0)
1522 htb_destroy_class(sch,cl); 1355 htb_destroy_class(sch, cl);
1523} 1356}
1524 1357
1525static int htb_change_class(struct Qdisc *sch, u32 classid, 1358static int htb_change_class(struct Qdisc *sch, u32 classid,
1526 u32 parentid, struct rtattr **tca, unsigned long *arg) 1359 u32 parentid, struct rtattr **tca,
1360 unsigned long *arg)
1527{ 1361{
1528 int err = -EINVAL; 1362 int err = -EINVAL;
1529 struct htb_sched *q = qdisc_priv(sch); 1363 struct htb_sched *q = qdisc_priv(sch);
1530 struct htb_class *cl = (struct htb_class*)*arg,*parent; 1364 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1531 struct rtattr *opt = tca[TCA_OPTIONS-1]; 1365 struct rtattr *opt = tca[TCA_OPTIONS - 1];
1532 struct qdisc_rate_table *rtab = NULL, *ctab = NULL; 1366 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1533 struct rtattr *tb[TCA_HTB_RTAB]; 1367 struct rtattr *tb[TCA_HTB_RTAB];
1534 struct tc_htb_opt *hopt; 1368 struct tc_htb_opt *hopt;
1535 1369
1536 /* extract all subattrs from opt attr */ 1370 /* extract all subattrs from opt attr */
1537 if (!opt || rtattr_parse_nested(tb, TCA_HTB_RTAB, opt) || 1371 if (!opt || rtattr_parse_nested(tb, TCA_HTB_RTAB, opt) ||
1538 tb[TCA_HTB_PARMS-1] == NULL || 1372 tb[TCA_HTB_PARMS - 1] == NULL ||
1539 RTA_PAYLOAD(tb[TCA_HTB_PARMS-1]) < sizeof(*hopt)) 1373 RTA_PAYLOAD(tb[TCA_HTB_PARMS - 1]) < sizeof(*hopt))
1540 goto failure; 1374 goto failure;
1541
1542 parent = parentid == TC_H_ROOT ? NULL : htb_find (parentid,sch);
1543 1375
1544 hopt = RTA_DATA(tb[TCA_HTB_PARMS-1]); 1376 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1545 HTB_DBG(0,1,"htb_chg cl=%p(%X), clid=%X, parid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,classid,parentid,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum); 1377
1546 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB-1]); 1378 hopt = RTA_DATA(tb[TCA_HTB_PARMS - 1]);
1547 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB-1]); 1379
1548 if (!rtab || !ctab) goto failure; 1380 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB - 1]);
1381 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB - 1]);
1382 if (!rtab || !ctab)
1383 goto failure;
1549 1384
1550 if (!cl) { /* new class */ 1385 if (!cl) { /* new class */
1551 struct Qdisc *new_q; 1386 struct Qdisc *new_q;
1387 int prio;
1388
1552 /* check for valid classid */ 1389 /* check for valid classid */
1553 if (!classid || TC_H_MAJ(classid^sch->handle) || htb_find(classid,sch)) 1390 if (!classid || TC_H_MAJ(classid ^ sch->handle)
1391 || htb_find(classid, sch))
1554 goto failure; 1392 goto failure;
1555 1393
1556 /* check maximal depth */ 1394 /* check maximal depth */
@@ -1561,15 +1399,16 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1561 err = -ENOBUFS; 1399 err = -ENOBUFS;
1562 if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL) 1400 if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
1563 goto failure; 1401 goto failure;
1564 1402
1565 cl->refcnt = 1; 1403 cl->refcnt = 1;
1566 INIT_LIST_HEAD(&cl->sibling); 1404 INIT_LIST_HEAD(&cl->sibling);
1567 INIT_LIST_HEAD(&cl->hlist); 1405 INIT_HLIST_NODE(&cl->hlist);
1568 INIT_LIST_HEAD(&cl->children); 1406 INIT_LIST_HEAD(&cl->children);
1569 INIT_LIST_HEAD(&cl->un.leaf.drop_list); 1407 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1570#ifdef HTB_DEBUG 1408 RB_CLEAR_NODE(&cl->pq_node);
1571 cl->magic = HTB_CMAGIC; 1409
1572#endif 1410 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1411 RB_CLEAR_NODE(&cl->node[prio]);
1573 1412
1574 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) 1413 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1575 so that can't be used inside of sch_tree_lock 1414 so that can't be used inside of sch_tree_lock
@@ -1579,53 +1418,53 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1579 if (parent && !parent->level) { 1418 if (parent && !parent->level) {
1580 /* turn parent into inner node */ 1419 /* turn parent into inner node */
1581 sch->q.qlen -= parent->un.leaf.q->q.qlen; 1420 sch->q.qlen -= parent->un.leaf.q->q.qlen;
1582 qdisc_destroy (parent->un.leaf.q); 1421 qdisc_destroy(parent->un.leaf.q);
1583 if (parent->prio_activity) 1422 if (parent->prio_activity)
1584 htb_deactivate (q,parent); 1423 htb_deactivate(q, parent);
1585 1424
1586 /* remove from evt list because of level change */ 1425 /* remove from evt list because of level change */
1587 if (parent->cmode != HTB_CAN_SEND) { 1426 if (parent->cmode != HTB_CAN_SEND) {
1588 htb_safe_rb_erase(&parent->pq_node,q->wait_pq /*+0*/); 1427 htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
1589 parent->cmode = HTB_CAN_SEND; 1428 parent->cmode = HTB_CAN_SEND;
1590 } 1429 }
1591 parent->level = (parent->parent ? parent->parent->level 1430 parent->level = (parent->parent ? parent->parent->level
1592 : TC_HTB_MAXDEPTH) - 1; 1431 : TC_HTB_MAXDEPTH) - 1;
1593 memset (&parent->un.inner,0,sizeof(parent->un.inner)); 1432 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1594 } 1433 }
1595 /* leaf (we) needs elementary qdisc */ 1434 /* leaf (we) needs elementary qdisc */
1596 cl->un.leaf.q = new_q ? new_q : &noop_qdisc; 1435 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1597 1436
1598 cl->classid = classid; cl->parent = parent; 1437 cl->classid = classid;
1438 cl->parent = parent;
1599 1439
1600 /* set class to be in HTB_CAN_SEND state */ 1440 /* set class to be in HTB_CAN_SEND state */
1601 cl->tokens = hopt->buffer; 1441 cl->tokens = hopt->buffer;
1602 cl->ctokens = hopt->cbuffer; 1442 cl->ctokens = hopt->cbuffer;
1603 cl->mbuffer = PSCHED_JIFFIE2US(HZ*60); /* 1min */ 1443 cl->mbuffer = PSCHED_JIFFIE2US(HZ * 60); /* 1min */
1604 PSCHED_GET_TIME(cl->t_c); 1444 PSCHED_GET_TIME(cl->t_c);
1605 cl->cmode = HTB_CAN_SEND; 1445 cl->cmode = HTB_CAN_SEND;
1606 1446
1607 /* attach to the hash list and parent's family */ 1447 /* attach to the hash list and parent's family */
1608 list_add_tail(&cl->hlist, q->hash+htb_hash(classid)); 1448 hlist_add_head(&cl->hlist, q->hash + htb_hash(classid));
1609 list_add_tail(&cl->sibling, parent ? &parent->children : &q->root); 1449 list_add_tail(&cl->sibling,
1610#ifdef HTB_DEBUG 1450 parent ? &parent->children : &q->root);
1611 { 1451 } else
1612 int i; 1452 sch_tree_lock(sch);
1613 for (i = 0; i < TC_HTB_NUMPRIO; i++) cl->node[i].rb_color = -1;
1614 cl->pq_node.rb_color = -1;
1615 }
1616#endif
1617 } else sch_tree_lock(sch);
1618 1453
1619 /* it used to be a nasty bug here, we have to check that node 1454 /* it used to be a nasty bug here, we have to check that node
1620 is really leaf before changing cl->un.leaf ! */ 1455 is really leaf before changing cl->un.leaf ! */
1621 if (!cl->level) { 1456 if (!cl->level) {
1622 cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum; 1457 cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
1623 if (!hopt->quantum && cl->un.leaf.quantum < 1000) { 1458 if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
1624 printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", cl->classid); 1459 printk(KERN_WARNING
1460 "HTB: quantum of class %X is small. Consider r2q change.\n",
1461 cl->classid);
1625 cl->un.leaf.quantum = 1000; 1462 cl->un.leaf.quantum = 1000;
1626 } 1463 }
1627 if (!hopt->quantum && cl->un.leaf.quantum > 200000) { 1464 if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
1628 printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", cl->classid); 1465 printk(KERN_WARNING
1466 "HTB: quantum of class %X is big. Consider r2q change.\n",
1467 cl->classid);
1629 cl->un.leaf.quantum = 200000; 1468 cl->un.leaf.quantum = 200000;
1630 } 1469 }
1631 if (hopt->quantum) 1470 if (hopt->quantum)
@@ -1636,16 +1475,22 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1636 1475
1637 cl->buffer = hopt->buffer; 1476 cl->buffer = hopt->buffer;
1638 cl->cbuffer = hopt->cbuffer; 1477 cl->cbuffer = hopt->cbuffer;
1639 if (cl->rate) qdisc_put_rtab(cl->rate); cl->rate = rtab; 1478 if (cl->rate)
1640 if (cl->ceil) qdisc_put_rtab(cl->ceil); cl->ceil = ctab; 1479 qdisc_put_rtab(cl->rate);
1480 cl->rate = rtab;
1481 if (cl->ceil)
1482 qdisc_put_rtab(cl->ceil);
1483 cl->ceil = ctab;
1641 sch_tree_unlock(sch); 1484 sch_tree_unlock(sch);
1642 1485
1643 *arg = (unsigned long)cl; 1486 *arg = (unsigned long)cl;
1644 return 0; 1487 return 0;
1645 1488
1646failure: 1489failure:
1647 if (rtab) qdisc_put_rtab(rtab); 1490 if (rtab)
1648 if (ctab) qdisc_put_rtab(ctab); 1491 qdisc_put_rtab(rtab);
1492 if (ctab)
1493 qdisc_put_rtab(ctab);
1649 return err; 1494 return err;
1650} 1495}
1651 1496
@@ -1654,28 +1499,28 @@ static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1654 struct htb_sched *q = qdisc_priv(sch); 1499 struct htb_sched *q = qdisc_priv(sch);
1655 struct htb_class *cl = (struct htb_class *)arg; 1500 struct htb_class *cl = (struct htb_class *)arg;
1656 struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list; 1501 struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
1657 HTB_DBG(0,2,"htb_tcf q=%p clid=%X fref=%d fl=%p\n",q,cl?cl->classid:0,cl?cl->filter_cnt:q->filter_cnt,*fl); 1502
1658 return fl; 1503 return fl;
1659} 1504}
1660 1505
1661static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, 1506static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
1662 u32 classid) 1507 u32 classid)
1663{ 1508{
1664 struct htb_sched *q = qdisc_priv(sch); 1509 struct htb_sched *q = qdisc_priv(sch);
1665 struct htb_class *cl = htb_find (classid,sch); 1510 struct htb_class *cl = htb_find(classid, sch);
1666 HTB_DBG(0,2,"htb_bind q=%p clid=%X cl=%p fref=%d\n",q,classid,cl,cl?cl->filter_cnt:q->filter_cnt); 1511
1667 /*if (cl && !cl->level) return 0; 1512 /*if (cl && !cl->level) return 0;
1668 The line above used to be there to prevent attaching filters to 1513 The line above used to be there to prevent attaching filters to
1669 leaves. But at least tc_index filter uses this just to get class 1514 leaves. But at least tc_index filter uses this just to get class
1670 for other reasons so that we have to allow for it. 1515 for other reasons so that we have to allow for it.
1671 ---- 1516 ----
1672 19.6.2002 As Werner explained it is ok - bind filter is just 1517 19.6.2002 As Werner explained it is ok - bind filter is just
1673 another way to "lock" the class - unlike "get" this lock can 1518 another way to "lock" the class - unlike "get" this lock can
1674 be broken by class during destroy IIUC. 1519 be broken by class during destroy IIUC.
1675 */ 1520 */
1676 if (cl) 1521 if (cl)
1677 cl->filter_cnt++; 1522 cl->filter_cnt++;
1678 else 1523 else
1679 q->filter_cnt++; 1524 q->filter_cnt++;
1680 return (unsigned long)cl; 1525 return (unsigned long)cl;
1681} 1526}
@@ -1684,10 +1529,10 @@ static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1684{ 1529{
1685 struct htb_sched *q = qdisc_priv(sch); 1530 struct htb_sched *q = qdisc_priv(sch);
1686 struct htb_class *cl = (struct htb_class *)arg; 1531 struct htb_class *cl = (struct htb_class *)arg;
1687 HTB_DBG(0,2,"htb_unbind q=%p cl=%p fref=%d\n",q,cl,cl?cl->filter_cnt:q->filter_cnt); 1532
1688 if (cl) 1533 if (cl)
1689 cl->filter_cnt--; 1534 cl->filter_cnt--;
1690 else 1535 else
1691 q->filter_cnt--; 1536 q->filter_cnt--;
1692} 1537}
1693 1538
@@ -1700,9 +1545,10 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1700 return; 1545 return;
1701 1546
1702 for (i = 0; i < HTB_HSIZE; i++) { 1547 for (i = 0; i < HTB_HSIZE; i++) {
1703 struct list_head *p; 1548 struct hlist_node *p;
1704 list_for_each (p,q->hash+i) { 1549 struct htb_class *cl;
1705 struct htb_class *cl = list_entry(p,struct htb_class,hlist); 1550
1551 hlist_for_each_entry(cl, p, q->hash + i, hlist) {
1706 if (arg->count < arg->skip) { 1552 if (arg->count < arg->skip) {
1707 arg->count++; 1553 arg->count++;
1708 continue; 1554 continue;
@@ -1750,12 +1596,13 @@ static struct Qdisc_ops htb_qdisc_ops = {
1750 1596
1751static int __init htb_module_init(void) 1597static int __init htb_module_init(void)
1752{ 1598{
1753 return register_qdisc(&htb_qdisc_ops); 1599 return register_qdisc(&htb_qdisc_ops);
1754} 1600}
1755static void __exit htb_module_exit(void) 1601static void __exit htb_module_exit(void)
1756{ 1602{
1757 unregister_qdisc(&htb_qdisc_ops); 1603 unregister_qdisc(&htb_qdisc_ops);
1758} 1604}
1605
1759module_init(htb_module_init) 1606module_init(htb_module_init)
1760module_exit(htb_module_exit) 1607module_exit(htb_module_exit)
1761MODULE_LICENSE("GPL"); 1608MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index a08ec4c7c55d..45939bafbdf8 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -192,8 +192,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
192 */ 192 */
193 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { 193 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
194 if (!(skb = skb_unshare(skb, GFP_ATOMIC)) 194 if (!(skb = skb_unshare(skb, GFP_ATOMIC))
195 || (skb->ip_summed == CHECKSUM_HW 195 || (skb->ip_summed == CHECKSUM_PARTIAL
196 && skb_checksum_help(skb, 0))) { 196 && skb_checksum_help(skb))) {
197 sch->qstats.drops++; 197 sch->qstats.drops++;
198 return NET_XMIT_DROP; 198 return NET_XMIT_DROP;
199 } 199 }
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 42b66e74bbb5..03f65de75d88 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -228,7 +228,7 @@ int sctp_rcv(struct sk_buff *skb)
228 goto discard_release; 228 goto discard_release;
229 nf_reset(skb); 229 nf_reset(skb);
230 230
231 if (sk_filter(sk, skb, 1)) 231 if (sk_filter(sk, skb))
232 goto discard_release; 232 goto discard_release;
233 233
234 /* Create an SCTP packet structure. */ 234 /* Create an SCTP packet structure. */
@@ -255,10 +255,13 @@ int sctp_rcv(struct sk_buff *skb)
255 */ 255 */
256 sctp_bh_lock_sock(sk); 256 sctp_bh_lock_sock(sk);
257 257
258 if (sock_owned_by_user(sk)) 258 if (sock_owned_by_user(sk)) {
259 SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG);
259 sctp_add_backlog(sk, skb); 260 sctp_add_backlog(sk, skb);
260 else 261 } else {
262 SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ);
261 sctp_inq_push(&chunk->rcvr->inqueue, chunk); 263 sctp_inq_push(&chunk->rcvr->inqueue, chunk);
264 }
262 265
263 sctp_bh_unlock_sock(sk); 266 sctp_bh_unlock_sock(sk);
264 267
@@ -271,6 +274,7 @@ int sctp_rcv(struct sk_buff *skb)
271 return 0; 274 return 0;
272 275
273discard_it: 276discard_it:
277 SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_DISCARDS);
274 kfree_skb(skb); 278 kfree_skb(skb);
275 return 0; 279 return 0;
276 280
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index cf0c767d43ae..cf6deed7e849 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -87,7 +87,7 @@ void sctp_inq_free(struct sctp_inq *queue)
87/* Put a new packet in an SCTP inqueue. 87/* Put a new packet in an SCTP inqueue.
88 * We assume that packet->sctp_hdr is set and in host byte order. 88 * We assume that packet->sctp_hdr is set and in host byte order.
89 */ 89 */
90void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *packet) 90void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk)
91{ 91{
92 /* Directly call the packet handling routine. */ 92 /* Directly call the packet handling routine. */
93 93
@@ -96,7 +96,7 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *packet)
96 * Eventually, we should clean up inqueue to not rely 96 * Eventually, we should clean up inqueue to not rely
97 * on the BH related data structures. 97 * on the BH related data structures.
98 */ 98 */
99 list_add_tail(&packet->list, &q->in_chunk_list); 99 list_add_tail(&chunk->list, &q->in_chunk_list);
100 q->immediate.func(q->immediate.data); 100 q->immediate.func(q->immediate.data);
101} 101}
102 102
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 99c0cefc04e0..249e5033c1a8 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -78,7 +78,6 @@
78 78
79#include <asm/uaccess.h> 79#include <asm/uaccess.h>
80 80
81extern int sctp_inetaddr_event(struct notifier_block *, unsigned long, void *);
82static struct notifier_block sctp_inet6addr_notifier = { 81static struct notifier_block sctp_inet6addr_notifier = {
83 .notifier_call = sctp_inetaddr_event, 82 .notifier_call = sctp_inetaddr_event,
84}; 83};
@@ -322,9 +321,9 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
322 struct inet6_ifaddr *ifp; 321 struct inet6_ifaddr *ifp;
323 struct sctp_sockaddr_entry *addr; 322 struct sctp_sockaddr_entry *addr;
324 323
325 read_lock(&addrconf_lock); 324 rcu_read_lock();
326 if ((in6_dev = __in6_dev_get(dev)) == NULL) { 325 if ((in6_dev = __in6_dev_get(dev)) == NULL) {
327 read_unlock(&addrconf_lock); 326 rcu_read_unlock();
328 return; 327 return;
329 } 328 }
330 329
@@ -343,7 +342,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
343 } 342 }
344 343
345 read_unlock(&in6_dev->lock); 344 read_unlock(&in6_dev->lock);
346 read_unlock(&addrconf_lock); 345 rcu_read_unlock();
347} 346}
348 347
349/* Initialize a sockaddr_storage from in incoming skb. */ 348/* Initialize a sockaddr_storage from in incoming skb. */
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 30b710c54e64..37074a39ecbb 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -467,6 +467,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
467 467
468 switch(reason) { 468 switch(reason) {
469 case SCTP_RTXR_T3_RTX: 469 case SCTP_RTXR_T3_RTX:
470 SCTP_INC_STATS(SCTP_MIB_T3_RETRANSMITS);
470 sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX); 471 sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX);
471 /* Update the retran path if the T3-rtx timer has expired for 472 /* Update the retran path if the T3-rtx timer has expired for
472 * the current retran path. 473 * the current retran path.
@@ -475,12 +476,15 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
475 sctp_assoc_update_retran_path(transport->asoc); 476 sctp_assoc_update_retran_path(transport->asoc);
476 break; 477 break;
477 case SCTP_RTXR_FAST_RTX: 478 case SCTP_RTXR_FAST_RTX:
479 SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS);
478 sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX); 480 sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX);
479 fast_retransmit = 1; 481 fast_retransmit = 1;
480 break; 482 break;
481 case SCTP_RTXR_PMTUD: 483 case SCTP_RTXR_PMTUD:
482 default: 484 SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS);
483 break; 485 break;
486 default:
487 BUG();
484 } 488 }
485 489
486 sctp_retransmit_mark(q, transport, fast_retransmit); 490 sctp_retransmit_mark(q, transport, fast_retransmit);
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 5b3b0e0ae7e5..a356d8d310a9 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -57,6 +57,21 @@ static struct snmp_mib sctp_snmp_list[] = {
57 SNMP_MIB_ITEM("SctpReasmUsrMsgs", SCTP_MIB_REASMUSRMSGS), 57 SNMP_MIB_ITEM("SctpReasmUsrMsgs", SCTP_MIB_REASMUSRMSGS),
58 SNMP_MIB_ITEM("SctpOutSCTPPacks", SCTP_MIB_OUTSCTPPACKS), 58 SNMP_MIB_ITEM("SctpOutSCTPPacks", SCTP_MIB_OUTSCTPPACKS),
59 SNMP_MIB_ITEM("SctpInSCTPPacks", SCTP_MIB_INSCTPPACKS), 59 SNMP_MIB_ITEM("SctpInSCTPPacks", SCTP_MIB_INSCTPPACKS),
60 SNMP_MIB_ITEM("SctpT1InitExpireds", SCTP_MIB_T1_INIT_EXPIREDS),
61 SNMP_MIB_ITEM("SctpT1CookieExpireds", SCTP_MIB_T1_COOKIE_EXPIREDS),
62 SNMP_MIB_ITEM("SctpT2ShutdownExpireds", SCTP_MIB_T2_SHUTDOWN_EXPIREDS),
63 SNMP_MIB_ITEM("SctpT3RtxExpireds", SCTP_MIB_T3_RTX_EXPIREDS),
64 SNMP_MIB_ITEM("SctpT4RtoExpireds", SCTP_MIB_T4_RTO_EXPIREDS),
65 SNMP_MIB_ITEM("SctpT5ShutdownGuardExpireds", SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS),
66 SNMP_MIB_ITEM("SctpDelaySackExpireds", SCTP_MIB_DELAY_SACK_EXPIREDS),
67 SNMP_MIB_ITEM("SctpAutocloseExpireds", SCTP_MIB_AUTOCLOSE_EXPIREDS),
68 SNMP_MIB_ITEM("SctpT3Retransmits", SCTP_MIB_T3_RETRANSMITS),
69 SNMP_MIB_ITEM("SctpPmtudRetransmits", SCTP_MIB_PMTUD_RETRANSMITS),
70 SNMP_MIB_ITEM("SctpFastRetransmits", SCTP_MIB_FAST_RETRANSMITS),
71 SNMP_MIB_ITEM("SctpInPktSoftirq", SCTP_MIB_IN_PKT_SOFTIRQ),
72 SNMP_MIB_ITEM("SctpInPktBacklog", SCTP_MIB_IN_PKT_BACKLOG),
73 SNMP_MIB_ITEM("SctpInPktDiscards", SCTP_MIB_IN_PKT_DISCARDS),
74 SNMP_MIB_ITEM("SctpInDataChunkDiscards", SCTP_MIB_IN_DATA_CHUNK_DISCARDS),
60 SNMP_MIB_SENTINEL 75 SNMP_MIB_SENTINEL
61}; 76};
62 77
@@ -328,8 +343,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
328 "%8p %8p %-3d %-3d %-2d %-4d %4d %8d %8d %7d %5lu %-5d %5d ", 343 "%8p %8p %-3d %-3d %-2d %-4d %4d %8d %8d %7d %5lu %-5d %5d ",
329 assoc, sk, sctp_sk(sk)->type, sk->sk_state, 344 assoc, sk, sctp_sk(sk)->type, sk->sk_state,
330 assoc->state, hash, assoc->assoc_id, 345 assoc->state, hash, assoc->assoc_id,
331 (sk->sk_rcvbuf - assoc->rwnd),
332 assoc->sndbuf_used, 346 assoc->sndbuf_used,
347 (sk->sk_rcvbuf - assoc->rwnd),
333 sock_i_uid(sk), sock_i_ino(sk), 348 sock_i_uid(sk), sock_i_ino(sk),
334 epb->bind_addr.port, 349 epb->bind_addr.port,
335 assoc->peer.port); 350 assoc->peer.port);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 1ab03a27a76e..fac7674438a4 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -61,7 +61,7 @@
61#include <net/inet_ecn.h> 61#include <net/inet_ecn.h>
62 62
63/* Global data structures. */ 63/* Global data structures. */
64struct sctp_globals sctp_globals; 64struct sctp_globals sctp_globals __read_mostly;
65struct proc_dir_entry *proc_net_sctp; 65struct proc_dir_entry *proc_net_sctp;
66DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly; 66DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly;
67 67
@@ -82,13 +82,6 @@ static struct sctp_af *sctp_af_v6_specific;
82kmem_cache_t *sctp_chunk_cachep __read_mostly; 82kmem_cache_t *sctp_chunk_cachep __read_mostly;
83kmem_cache_t *sctp_bucket_cachep __read_mostly; 83kmem_cache_t *sctp_bucket_cachep __read_mostly;
84 84
85extern int sctp_snmp_proc_init(void);
86extern int sctp_snmp_proc_exit(void);
87extern int sctp_eps_proc_init(void);
88extern int sctp_eps_proc_exit(void);
89extern int sctp_assocs_proc_init(void);
90extern int sctp_assocs_proc_exit(void);
91
92/* Return the address of the control sock. */ 85/* Return the address of the control sock. */
93struct sock *sctp_get_ctl_sock(void) 86struct sock *sctp_get_ctl_sock(void)
94{ 87{
@@ -1049,7 +1042,7 @@ SCTP_STATIC __init int sctp_init(void)
1049 sctp_rto_beta = SCTP_RTO_BETA; 1042 sctp_rto_beta = SCTP_RTO_BETA;
1050 1043
1051 /* Valid.Cookie.Life - 60 seconds */ 1044 /* Valid.Cookie.Life - 60 seconds */
1052 sctp_valid_cookie_life = 60 * HZ; 1045 sctp_valid_cookie_life = SCTP_DEFAULT_COOKIE_LIFE;
1053 1046
1054 /* Whether Cookie Preservative is enabled(1) or not(0) */ 1047 /* Whether Cookie Preservative is enabled(1) or not(0) */
1055 sctp_cookie_preserve_enable = 1; 1048 sctp_cookie_preserve_enable = 1;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 5b5ae7958322..1c42fe983a5b 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -187,10 +187,9 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
187 */ 187 */
188 ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP, 188 ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP,
189 0, 0, 0, GFP_ATOMIC); 189 0, 0, 0, GFP_ATOMIC);
190 if (!ev) 190 if (ev)
191 goto nomem; 191 sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
192 192 SCTP_ULPEVENT(ev));
193 sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
194 193
195 /* Upon reception of the SHUTDOWN COMPLETE chunk the endpoint 194 /* Upon reception of the SHUTDOWN COMPLETE chunk the endpoint
196 * will verify that it is in SHUTDOWN-ACK-SENT state, if it is 195 * will verify that it is in SHUTDOWN-ACK-SENT state, if it is
@@ -215,9 +214,6 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
215 sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); 214 sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
216 215
217 return SCTP_DISPOSITION_DELETE_TCB; 216 return SCTP_DISPOSITION_DELETE_TCB;
218
219nomem:
220 return SCTP_DISPOSITION_NOMEM;
221} 217}
222 218
223/* 219/*
@@ -347,8 +343,6 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
347 GFP_ATOMIC)) 343 GFP_ATOMIC))
348 goto nomem_init; 344 goto nomem_init;
349 345
350 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
351
352 /* B) "Z" shall respond immediately with an INIT ACK chunk. */ 346 /* B) "Z" shall respond immediately with an INIT ACK chunk. */
353 347
354 /* If there are errors need to be reported for unknown parameters, 348 /* If there are errors need to be reported for unknown parameters,
@@ -360,11 +354,11 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
360 sizeof(sctp_chunkhdr_t); 354 sizeof(sctp_chunkhdr_t);
361 355
362 if (sctp_assoc_set_bind_addr_from_ep(new_asoc, GFP_ATOMIC) < 0) 356 if (sctp_assoc_set_bind_addr_from_ep(new_asoc, GFP_ATOMIC) < 0)
363 goto nomem_ack; 357 goto nomem_init;
364 358
365 repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len); 359 repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len);
366 if (!repl) 360 if (!repl)
367 goto nomem_ack; 361 goto nomem_init;
368 362
369 /* If there are errors need to be reported for unknown parameters, 363 /* If there are errors need to be reported for unknown parameters,
370 * include them in the outgoing INIT ACK as "Unrecognized parameter" 364 * include them in the outgoing INIT ACK as "Unrecognized parameter"
@@ -388,6 +382,8 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
388 sctp_chunk_free(err_chunk); 382 sctp_chunk_free(err_chunk);
389 } 383 }
390 384
385 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
386
391 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); 387 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
392 388
393 /* 389 /*
@@ -400,12 +396,11 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
400 396
401 return SCTP_DISPOSITION_DELETE_TCB; 397 return SCTP_DISPOSITION_DELETE_TCB;
402 398
403nomem_ack:
404 if (err_chunk)
405 sctp_chunk_free(err_chunk);
406nomem_init: 399nomem_init:
407 sctp_association_free(new_asoc); 400 sctp_association_free(new_asoc);
408nomem: 401nomem:
402 if (err_chunk)
403 sctp_chunk_free(err_chunk);
409 return SCTP_DISPOSITION_NOMEM; 404 return SCTP_DISPOSITION_NOMEM;
410} 405}
411 406
@@ -600,7 +595,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
600 struct sctp_association *new_asoc; 595 struct sctp_association *new_asoc;
601 sctp_init_chunk_t *peer_init; 596 sctp_init_chunk_t *peer_init;
602 struct sctp_chunk *repl; 597 struct sctp_chunk *repl;
603 struct sctp_ulpevent *ev; 598 struct sctp_ulpevent *ev, *ai_ev = NULL;
604 int error = 0; 599 int error = 0;
605 struct sctp_chunk *err_chk_p; 600 struct sctp_chunk *err_chk_p;
606 601
@@ -659,20 +654,10 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
659 }; 654 };
660 } 655 }
661 656
662 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
663 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
664 SCTP_STATE(SCTP_STATE_ESTABLISHED));
665 SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
666 SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS);
667 sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
668 657
669 if (new_asoc->autoclose) 658 /* Delay state machine commands until later.
670 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, 659 *
671 SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); 660 * Re-build the bind address for the association is done in
672
673 sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
674
675 /* Re-build the bind address for the association is done in
676 * the sctp_unpack_cookie() already. 661 * the sctp_unpack_cookie() already.
677 */ 662 */
678 /* This is a brand-new association, so these are not yet side 663 /* This is a brand-new association, so these are not yet side
@@ -687,9 +672,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
687 672
688 repl = sctp_make_cookie_ack(new_asoc, chunk); 673 repl = sctp_make_cookie_ack(new_asoc, chunk);
689 if (!repl) 674 if (!repl)
690 goto nomem_repl; 675 goto nomem_init;
691
692 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
693 676
694 /* RFC 2960 5.1 Normal Establishment of an Association 677 /* RFC 2960 5.1 Normal Establishment of an Association
695 * 678 *
@@ -704,28 +687,53 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
704 if (!ev) 687 if (!ev)
705 goto nomem_ev; 688 goto nomem_ev;
706 689
707 sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
708
709 /* Sockets API Draft Section 5.3.1.6 690 /* Sockets API Draft Section 5.3.1.6
710 * When a peer sends a Adaption Layer Indication parameter , SCTP 691 * When a peer sends a Adaption Layer Indication parameter , SCTP
711 * delivers this notification to inform the application that of the 692 * delivers this notification to inform the application that of the
712 * peers requested adaption layer. 693 * peers requested adaption layer.
713 */ 694 */
714 if (new_asoc->peer.adaption_ind) { 695 if (new_asoc->peer.adaption_ind) {
715 ev = sctp_ulpevent_make_adaption_indication(new_asoc, 696 ai_ev = sctp_ulpevent_make_adaption_indication(new_asoc,
716 GFP_ATOMIC); 697 GFP_ATOMIC);
717 if (!ev) 698 if (!ai_ev)
718 goto nomem_ev; 699 goto nomem_aiev;
700 }
701
702 /* Add all the state machine commands now since we've created
703 * everything. This way we don't introduce memory corruptions
704 * during side-effect processing and correclty count established
705 * associations.
706 */
707 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
708 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
709 SCTP_STATE(SCTP_STATE_ESTABLISHED));
710 SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
711 SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS);
712 sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
713
714 if (new_asoc->autoclose)
715 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
716 SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE));
719 717
718 sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
719
720 /* This will send the COOKIE ACK */
721 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
722
723 /* Queue the ASSOC_CHANGE event */
724 sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
725
726 /* Send up the Adaptation Layer Indication event */
727 if (ai_ev)
720 sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, 728 sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
721 SCTP_ULPEVENT(ev)); 729 SCTP_ULPEVENT(ai_ev));
722 }
723 730
724 return SCTP_DISPOSITION_CONSUME; 731 return SCTP_DISPOSITION_CONSUME;
725 732
733nomem_aiev:
734 sctp_ulpevent_free(ev);
726nomem_ev: 735nomem_ev:
727 sctp_chunk_free(repl); 736 sctp_chunk_free(repl);
728nomem_repl:
729nomem_init: 737nomem_init:
730 sctp_association_free(new_asoc); 738 sctp_association_free(new_asoc);
731nomem: 739nomem:
@@ -1360,10 +1368,8 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
1360 if (!sctp_process_init(new_asoc, chunk->chunk_hdr->type, 1368 if (!sctp_process_init(new_asoc, chunk->chunk_hdr->type,
1361 sctp_source(chunk), 1369 sctp_source(chunk),
1362 (sctp_init_chunk_t *)chunk->chunk_hdr, 1370 (sctp_init_chunk_t *)chunk->chunk_hdr,
1363 GFP_ATOMIC)) { 1371 GFP_ATOMIC))
1364 retval = SCTP_DISPOSITION_NOMEM; 1372 goto nomem;
1365 goto nomem_init;
1366 }
1367 1373
1368 /* Make sure no new addresses are being added during the 1374 /* Make sure no new addresses are being added during the
1369 * restart. Do not do this check for COOKIE-WAIT state, 1375 * restart. Do not do this check for COOKIE-WAIT state,
@@ -1374,7 +1380,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
1374 if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk, 1380 if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk,
1375 commands)) { 1381 commands)) {
1376 retval = SCTP_DISPOSITION_CONSUME; 1382 retval = SCTP_DISPOSITION_CONSUME;
1377 goto cleanup_asoc; 1383 goto nomem_retval;
1378 } 1384 }
1379 } 1385 }
1380 1386
@@ -1430,17 +1436,17 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
1430 sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); 1436 sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
1431 retval = SCTP_DISPOSITION_CONSUME; 1437 retval = SCTP_DISPOSITION_CONSUME;
1432 1438
1439 return retval;
1440
1441nomem:
1442 retval = SCTP_DISPOSITION_NOMEM;
1443nomem_retval:
1444 if (new_asoc)
1445 sctp_association_free(new_asoc);
1433cleanup: 1446cleanup:
1434 if (err_chunk) 1447 if (err_chunk)
1435 sctp_chunk_free(err_chunk); 1448 sctp_chunk_free(err_chunk);
1436 return retval; 1449 return retval;
1437nomem:
1438 retval = SCTP_DISPOSITION_NOMEM;
1439 goto cleanup;
1440nomem_init:
1441cleanup_asoc:
1442 sctp_association_free(new_asoc);
1443 goto cleanup;
1444} 1450}
1445 1451
1446/* 1452/*
@@ -1611,15 +1617,10 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep,
1611 */ 1617 */
1612 sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_OUTQUEUE, SCTP_NULL()); 1618 sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_OUTQUEUE, SCTP_NULL());
1613 1619
1614 /* Update the content of current association. */
1615 sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
1616
1617 repl = sctp_make_cookie_ack(new_asoc, chunk); 1620 repl = sctp_make_cookie_ack(new_asoc, chunk);
1618 if (!repl) 1621 if (!repl)
1619 goto nomem; 1622 goto nomem;
1620 1623
1621 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
1622
1623 /* Report association restart to upper layer. */ 1624 /* Report association restart to upper layer. */
1624 ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_RESTART, 0, 1625 ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_RESTART, 0,
1625 new_asoc->c.sinit_num_ostreams, 1626 new_asoc->c.sinit_num_ostreams,
@@ -1628,6 +1629,9 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep,
1628 if (!ev) 1629 if (!ev)
1629 goto nomem_ev; 1630 goto nomem_ev;
1630 1631
1632 /* Update the content of current association. */
1633 sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
1634 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
1631 sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); 1635 sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
1632 return SCTP_DISPOSITION_CONSUME; 1636 return SCTP_DISPOSITION_CONSUME;
1633 1637
@@ -1751,7 +1755,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
1751 sctp_cmd_seq_t *commands, 1755 sctp_cmd_seq_t *commands,
1752 struct sctp_association *new_asoc) 1756 struct sctp_association *new_asoc)
1753{ 1757{
1754 struct sctp_ulpevent *ev = NULL; 1758 struct sctp_ulpevent *ev = NULL, *ai_ev = NULL;
1755 struct sctp_chunk *repl; 1759 struct sctp_chunk *repl;
1756 1760
1757 /* Clarification from Implementor's Guide: 1761 /* Clarification from Implementor's Guide:
@@ -1778,29 +1782,25 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
1778 * SCTP user upon reception of a valid COOKIE 1782 * SCTP user upon reception of a valid COOKIE
1779 * ECHO chunk. 1783 * ECHO chunk.
1780 */ 1784 */
1781 ev = sctp_ulpevent_make_assoc_change(new_asoc, 0, 1785 ev = sctp_ulpevent_make_assoc_change(asoc, 0,
1782 SCTP_COMM_UP, 0, 1786 SCTP_COMM_UP, 0,
1783 new_asoc->c.sinit_num_ostreams, 1787 asoc->c.sinit_num_ostreams,
1784 new_asoc->c.sinit_max_instreams, 1788 asoc->c.sinit_max_instreams,
1785 GFP_ATOMIC); 1789 GFP_ATOMIC);
1786 if (!ev) 1790 if (!ev)
1787 goto nomem; 1791 goto nomem;
1788 sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
1789 SCTP_ULPEVENT(ev));
1790 1792
1791 /* Sockets API Draft Section 5.3.1.6 1793 /* Sockets API Draft Section 5.3.1.6
1792 * When a peer sends a Adaption Layer Indication parameter, 1794 * When a peer sends a Adaption Layer Indication parameter,
1793 * SCTP delivers this notification to inform the application 1795 * SCTP delivers this notification to inform the application
1794 * that of the peers requested adaption layer. 1796 * that of the peers requested adaption layer.
1795 */ 1797 */
1796 if (new_asoc->peer.adaption_ind) { 1798 if (asoc->peer.adaption_ind) {
1797 ev = sctp_ulpevent_make_adaption_indication(new_asoc, 1799 ai_ev = sctp_ulpevent_make_adaption_indication(asoc,
1798 GFP_ATOMIC); 1800 GFP_ATOMIC);
1799 if (!ev) 1801 if (!ai_ev)
1800 goto nomem; 1802 goto nomem;
1801 1803
1802 sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
1803 SCTP_ULPEVENT(ev));
1804 } 1804 }
1805 } 1805 }
1806 sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); 1806 sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
@@ -1809,12 +1809,21 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
1809 if (!repl) 1809 if (!repl)
1810 goto nomem; 1810 goto nomem;
1811 1811
1812 if (ev)
1813 sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
1814 SCTP_ULPEVENT(ev));
1815 if (ai_ev)
1816 sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
1817 SCTP_ULPEVENT(ai_ev));
1818
1812 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); 1819 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
1813 sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); 1820 sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
1814 1821
1815 return SCTP_DISPOSITION_CONSUME; 1822 return SCTP_DISPOSITION_CONSUME;
1816 1823
1817nomem: 1824nomem:
1825 if (ai_ev)
1826 sctp_ulpevent_free(ai_ev);
1818 if (ev) 1827 if (ev)
1819 sctp_ulpevent_free(ev); 1828 sctp_ulpevent_free(ev);
1820 return SCTP_DISPOSITION_NOMEM; 1829 return SCTP_DISPOSITION_NOMEM;
@@ -2663,9 +2672,11 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep,
2663 break; 2672 break;
2664 case SCTP_IERROR_HIGH_TSN: 2673 case SCTP_IERROR_HIGH_TSN:
2665 case SCTP_IERROR_BAD_STREAM: 2674 case SCTP_IERROR_BAD_STREAM:
2675 SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
2666 goto discard_noforce; 2676 goto discard_noforce;
2667 case SCTP_IERROR_DUP_TSN: 2677 case SCTP_IERROR_DUP_TSN:
2668 case SCTP_IERROR_IGNORE_TSN: 2678 case SCTP_IERROR_IGNORE_TSN:
2679 SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
2669 goto discard_force; 2680 goto discard_force;
2670 case SCTP_IERROR_NO_DATA: 2681 case SCTP_IERROR_NO_DATA:
2671 goto consume; 2682 goto consume;
@@ -3017,7 +3028,6 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
3017 if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) 3028 if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
3018 return sctp_sf_violation_chunklen(ep, asoc, type, arg, 3029 return sctp_sf_violation_chunklen(ep, asoc, type, arg,
3019 commands); 3030 commands);
3020
3021 /* 10.2 H) SHUTDOWN COMPLETE notification 3031 /* 10.2 H) SHUTDOWN COMPLETE notification
3022 * 3032 *
3023 * When SCTP completes the shutdown procedures (section 9.2) this 3033 * When SCTP completes the shutdown procedures (section 9.2) this
@@ -3028,6 +3038,14 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
3028 if (!ev) 3038 if (!ev)
3029 goto nomem; 3039 goto nomem;
3030 3040
3041 /* ...send a SHUTDOWN COMPLETE chunk to its peer, */
3042 reply = sctp_make_shutdown_complete(asoc, chunk);
3043 if (!reply)
3044 goto nomem_chunk;
3045
3046 /* Do all the commands now (after allocation), so that we
3047 * have consistent state if memory allocation failes
3048 */
3031 sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); 3049 sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
3032 3050
3033 /* Upon the receipt of the SHUTDOWN ACK, the SHUTDOWN sender shall 3051 /* Upon the receipt of the SHUTDOWN ACK, the SHUTDOWN sender shall
@@ -3039,11 +3057,6 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
3039 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, 3057 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
3040 SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); 3058 SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
3041 3059
3042 /* ...send a SHUTDOWN COMPLETE chunk to its peer, */
3043 reply = sctp_make_shutdown_complete(asoc, chunk);
3044 if (!reply)
3045 goto nomem;
3046
3047 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, 3060 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
3048 SCTP_STATE(SCTP_STATE_CLOSED)); 3061 SCTP_STATE(SCTP_STATE_CLOSED));
3049 SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS); 3062 SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS);
@@ -3054,6 +3067,8 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
3054 sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); 3067 sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
3055 return SCTP_DISPOSITION_DELETE_TCB; 3068 return SCTP_DISPOSITION_DELETE_TCB;
3056 3069
3070nomem_chunk:
3071 sctp_ulpevent_free(ev);
3057nomem: 3072nomem:
3058 return SCTP_DISPOSITION_NOMEM; 3073 return SCTP_DISPOSITION_NOMEM;
3059} 3074}
@@ -3652,6 +3667,7 @@ sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep,
3652 void *arg, 3667 void *arg,
3653 sctp_cmd_seq_t *commands) 3668 sctp_cmd_seq_t *commands)
3654{ 3669{
3670 SCTP_INC_STATS(SCTP_MIB_IN_PKT_DISCARDS);
3655 sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); 3671 sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL());
3656 3672
3657 return SCTP_DISPOSITION_CONSUME; 3673 return SCTP_DISPOSITION_CONSUME;
@@ -4548,6 +4564,8 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep,
4548{ 4564{
4549 struct sctp_transport *transport = arg; 4565 struct sctp_transport *transport = arg;
4550 4566
4567 SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS);
4568
4551 if (asoc->overall_error_count >= asoc->max_retrans) { 4569 if (asoc->overall_error_count >= asoc->max_retrans) {
4552 sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, 4570 sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
4553 SCTP_ERROR(ETIMEDOUT)); 4571 SCTP_ERROR(ETIMEDOUT));
@@ -4616,6 +4634,7 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep,
4616 void *arg, 4634 void *arg,
4617 sctp_cmd_seq_t *commands) 4635 sctp_cmd_seq_t *commands)
4618{ 4636{
4637 SCTP_INC_STATS(SCTP_MIB_DELAY_SACK_EXPIREDS);
4619 sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE()); 4638 sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE());
4620 return SCTP_DISPOSITION_CONSUME; 4639 return SCTP_DISPOSITION_CONSUME;
4621} 4640}
@@ -4650,6 +4669,7 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep,
4650 int attempts = asoc->init_err_counter + 1; 4669 int attempts = asoc->init_err_counter + 1;
4651 4670
4652 SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n"); 4671 SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n");
4672 SCTP_INC_STATS(SCTP_MIB_T1_INIT_EXPIREDS);
4653 4673
4654 if (attempts <= asoc->max_init_attempts) { 4674 if (attempts <= asoc->max_init_attempts) {
4655 bp = (struct sctp_bind_addr *) &asoc->base.bind_addr; 4675 bp = (struct sctp_bind_addr *) &asoc->base.bind_addr;
@@ -4709,6 +4729,7 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep
4709 int attempts = asoc->init_err_counter + 1; 4729 int attempts = asoc->init_err_counter + 1;
4710 4730
4711 SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n"); 4731 SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n");
4732 SCTP_INC_STATS(SCTP_MIB_T1_COOKIE_EXPIREDS);
4712 4733
4713 if (attempts <= asoc->max_init_attempts) { 4734 if (attempts <= asoc->max_init_attempts) {
4714 repl = sctp_make_cookie_echo(asoc, NULL); 4735 repl = sctp_make_cookie_echo(asoc, NULL);
@@ -4753,6 +4774,8 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep,
4753 struct sctp_chunk *reply = NULL; 4774 struct sctp_chunk *reply = NULL;
4754 4775
4755 SCTP_DEBUG_PRINTK("Timer T2 expired.\n"); 4776 SCTP_DEBUG_PRINTK("Timer T2 expired.\n");
4777 SCTP_INC_STATS(SCTP_MIB_T2_SHUTDOWN_EXPIREDS);
4778
4756 if (asoc->overall_error_count >= asoc->max_retrans) { 4779 if (asoc->overall_error_count >= asoc->max_retrans) {
4757 sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, 4780 sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
4758 SCTP_ERROR(ETIMEDOUT)); 4781 SCTP_ERROR(ETIMEDOUT));
@@ -4814,6 +4837,8 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
4814 struct sctp_chunk *chunk = asoc->addip_last_asconf; 4837 struct sctp_chunk *chunk = asoc->addip_last_asconf;
4815 struct sctp_transport *transport = chunk->transport; 4838 struct sctp_transport *transport = chunk->transport;
4816 4839
4840 SCTP_INC_STATS(SCTP_MIB_T4_RTO_EXPIREDS);
4841
4817 /* ADDIP 4.1 B1) Increment the error counters and perform path failure 4842 /* ADDIP 4.1 B1) Increment the error counters and perform path failure
4818 * detection on the appropriate destination address as defined in 4843 * detection on the appropriate destination address as defined in
4819 * RFC2960 [5] section 8.1 and 8.2. 4844 * RFC2960 [5] section 8.1 and 8.2.
@@ -4880,6 +4905,7 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep,
4880 struct sctp_chunk *reply = NULL; 4905 struct sctp_chunk *reply = NULL;
4881 4906
4882 SCTP_DEBUG_PRINTK("Timer T5 expired.\n"); 4907 SCTP_DEBUG_PRINTK("Timer T5 expired.\n");
4908 SCTP_INC_STATS(SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS);
4883 4909
4884 reply = sctp_make_abort(asoc, NULL, 0); 4910 reply = sctp_make_abort(asoc, NULL, 0);
4885 if (!reply) 4911 if (!reply)
@@ -4910,6 +4936,8 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
4910{ 4936{
4911 int disposition; 4937 int disposition;
4912 4938
4939 SCTP_INC_STATS(SCTP_MIB_AUTOCLOSE_EXPIREDS);
4940
4913 /* From 9.2 Shutdown of an Association 4941 /* From 9.2 Shutdown of an Association
4914 * Upon receipt of the SHUTDOWN primitive from its upper 4942 * Upon receipt of the SHUTDOWN primitive from its upper
4915 * layer, the endpoint enters SHUTDOWN-PENDING state and 4943 * layer, the endpoint enters SHUTDOWN-PENDING state and
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 85caf7963886..79c3e072cf28 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2081,13 +2081,13 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
2081 * SPP_SACKDELAY_ENABLE, setting both will have undefined 2081 * SPP_SACKDELAY_ENABLE, setting both will have undefined
2082 * results. 2082 * results.
2083 */ 2083 */
2084int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, 2084static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
2085 struct sctp_transport *trans, 2085 struct sctp_transport *trans,
2086 struct sctp_association *asoc, 2086 struct sctp_association *asoc,
2087 struct sctp_sock *sp, 2087 struct sctp_sock *sp,
2088 int hb_change, 2088 int hb_change,
2089 int pmtud_change, 2089 int pmtud_change,
2090 int sackdelay_change) 2090 int sackdelay_change)
2091{ 2091{
2092 int error; 2092 int error;
2093 2093
@@ -2970,7 +2970,7 @@ SCTP_STATIC struct sock *sctp_accept(struct sock *sk, int flags, int *err)
2970 goto out; 2970 goto out;
2971 } 2971 }
2972 2972
2973 timeo = sock_rcvtimeo(sk, sk->sk_socket->file->f_flags & O_NONBLOCK); 2973 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
2974 2974
2975 error = sctp_wait_for_accept(sk, timeo); 2975 error = sctp_wait_for_accept(sk, timeo);
2976 if (error) 2976 if (error)
@@ -3045,14 +3045,14 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
3045 sp->initmsg.sinit_num_ostreams = sctp_max_outstreams; 3045 sp->initmsg.sinit_num_ostreams = sctp_max_outstreams;
3046 sp->initmsg.sinit_max_instreams = sctp_max_instreams; 3046 sp->initmsg.sinit_max_instreams = sctp_max_instreams;
3047 sp->initmsg.sinit_max_attempts = sctp_max_retrans_init; 3047 sp->initmsg.sinit_max_attempts = sctp_max_retrans_init;
3048 sp->initmsg.sinit_max_init_timeo = jiffies_to_msecs(sctp_rto_max); 3048 sp->initmsg.sinit_max_init_timeo = sctp_rto_max;
3049 3049
3050 /* Initialize default RTO related parameters. These parameters can 3050 /* Initialize default RTO related parameters. These parameters can
3051 * be modified for with the SCTP_RTOINFO socket option. 3051 * be modified for with the SCTP_RTOINFO socket option.
3052 */ 3052 */
3053 sp->rtoinfo.srto_initial = jiffies_to_msecs(sctp_rto_initial); 3053 sp->rtoinfo.srto_initial = sctp_rto_initial;
3054 sp->rtoinfo.srto_max = jiffies_to_msecs(sctp_rto_max); 3054 sp->rtoinfo.srto_max = sctp_rto_max;
3055 sp->rtoinfo.srto_min = jiffies_to_msecs(sctp_rto_min); 3055 sp->rtoinfo.srto_min = sctp_rto_min;
3056 3056
3057 /* Initialize default association related parameters. These parameters 3057 /* Initialize default association related parameters. These parameters
3058 * can be modified with the SCTP_ASSOCINFO socket option. 3058 * can be modified with the SCTP_ASSOCINFO socket option.
@@ -3061,8 +3061,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
3061 sp->assocparams.sasoc_number_peer_destinations = 0; 3061 sp->assocparams.sasoc_number_peer_destinations = 0;
3062 sp->assocparams.sasoc_peer_rwnd = 0; 3062 sp->assocparams.sasoc_peer_rwnd = 0;
3063 sp->assocparams.sasoc_local_rwnd = 0; 3063 sp->assocparams.sasoc_local_rwnd = 0;
3064 sp->assocparams.sasoc_cookie_life = 3064 sp->assocparams.sasoc_cookie_life = sctp_valid_cookie_life;
3065 jiffies_to_msecs(sctp_valid_cookie_life);
3066 3065
3067 /* Initialize default event subscriptions. By default, all the 3066 /* Initialize default event subscriptions. By default, all the
3068 * options are off. 3067 * options are off.
@@ -3072,10 +3071,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
3072 /* Default Peer Address Parameters. These defaults can 3071 /* Default Peer Address Parameters. These defaults can
3073 * be modified via SCTP_PEER_ADDR_PARAMS 3072 * be modified via SCTP_PEER_ADDR_PARAMS
3074 */ 3073 */
3075 sp->hbinterval = jiffies_to_msecs(sctp_hb_interval); 3074 sp->hbinterval = sctp_hb_interval;
3076 sp->pathmaxrxt = sctp_max_retrans_path; 3075 sp->pathmaxrxt = sctp_max_retrans_path;
3077 sp->pathmtu = 0; // allow default discovery 3076 sp->pathmtu = 0; // allow default discovery
3078 sp->sackdelay = jiffies_to_msecs(sctp_sack_timeout); 3077 sp->sackdelay = sctp_sack_timeout;
3079 sp->param_flags = SPP_HB_ENABLE | 3078 sp->param_flags = SPP_HB_ENABLE |
3080 SPP_PMTUD_ENABLE | 3079 SPP_PMTUD_ENABLE |
3081 SPP_SACKDELAY_ENABLE; 3080 SPP_SACKDELAY_ENABLE;
@@ -5619,6 +5618,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
5619 /* Copy the bind_addr list from the original endpoint to the new 5618 /* Copy the bind_addr list from the original endpoint to the new
5620 * endpoint so that we can handle restarts properly 5619 * endpoint so that we can handle restarts properly
5621 */ 5620 */
5621 if (PF_INET6 == assoc->base.sk->sk_family)
5622 flags = SCTP_ADDR6_ALLOWED;
5622 if (assoc->peer.ipv4_address) 5623 if (assoc->peer.ipv4_address)
5623 flags |= SCTP_ADDR4_PEERSUPP; 5624 flags |= SCTP_ADDR4_PEERSUPP;
5624 if (assoc->peer.ipv6_address) 5625 if (assoc->peer.ipv6_address)
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index dc6f3ff32358..633cd178654b 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -45,9 +45,10 @@
45#include <net/sctp/sctp.h> 45#include <net/sctp/sctp.h>
46#include <linux/sysctl.h> 46#include <linux/sysctl.h>
47 47
48static ctl_handler sctp_sysctl_jiffies_ms; 48static int zero = 0;
49static long rto_timer_min = 1; 49static int one = 1;
50static long rto_timer_max = 86400000; /* One day */ 50static int timer_max = 86400000; /* ms in one day */
51static int int_max = INT_MAX;
51static long sack_timer_min = 1; 52static long sack_timer_min = 1;
52static long sack_timer_max = 500; 53static long sack_timer_max = 500;
53 54
@@ -56,45 +57,45 @@ static ctl_table sctp_table[] = {
56 .ctl_name = NET_SCTP_RTO_INITIAL, 57 .ctl_name = NET_SCTP_RTO_INITIAL,
57 .procname = "rto_initial", 58 .procname = "rto_initial",
58 .data = &sctp_rto_initial, 59 .data = &sctp_rto_initial,
59 .maxlen = sizeof(long), 60 .maxlen = sizeof(unsigned int),
60 .mode = 0644, 61 .mode = 0644,
61 .proc_handler = &proc_doulongvec_ms_jiffies_minmax, 62 .proc_handler = &proc_dointvec_minmax,
62 .strategy = &sctp_sysctl_jiffies_ms, 63 .strategy = &sysctl_intvec,
63 .extra1 = &rto_timer_min, 64 .extra1 = &one,
64 .extra2 = &rto_timer_max 65 .extra2 = &timer_max
65 }, 66 },
66 { 67 {
67 .ctl_name = NET_SCTP_RTO_MIN, 68 .ctl_name = NET_SCTP_RTO_MIN,
68 .procname = "rto_min", 69 .procname = "rto_min",
69 .data = &sctp_rto_min, 70 .data = &sctp_rto_min,
70 .maxlen = sizeof(long), 71 .maxlen = sizeof(unsigned int),
71 .mode = 0644, 72 .mode = 0644,
72 .proc_handler = &proc_doulongvec_ms_jiffies_minmax, 73 .proc_handler = &proc_dointvec_minmax,
73 .strategy = &sctp_sysctl_jiffies_ms, 74 .strategy = &sysctl_intvec,
74 .extra1 = &rto_timer_min, 75 .extra1 = &one,
75 .extra2 = &rto_timer_max 76 .extra2 = &timer_max
76 }, 77 },
77 { 78 {
78 .ctl_name = NET_SCTP_RTO_MAX, 79 .ctl_name = NET_SCTP_RTO_MAX,
79 .procname = "rto_max", 80 .procname = "rto_max",
80 .data = &sctp_rto_max, 81 .data = &sctp_rto_max,
81 .maxlen = sizeof(long), 82 .maxlen = sizeof(unsigned int),
82 .mode = 0644, 83 .mode = 0644,
83 .proc_handler = &proc_doulongvec_ms_jiffies_minmax, 84 .proc_handler = &proc_dointvec_minmax,
84 .strategy = &sctp_sysctl_jiffies_ms, 85 .strategy = &sysctl_intvec,
85 .extra1 = &rto_timer_min, 86 .extra1 = &one,
86 .extra2 = &rto_timer_max 87 .extra2 = &timer_max
87 }, 88 },
88 { 89 {
89 .ctl_name = NET_SCTP_VALID_COOKIE_LIFE, 90 .ctl_name = NET_SCTP_VALID_COOKIE_LIFE,
90 .procname = "valid_cookie_life", 91 .procname = "valid_cookie_life",
91 .data = &sctp_valid_cookie_life, 92 .data = &sctp_valid_cookie_life,
92 .maxlen = sizeof(long), 93 .maxlen = sizeof(unsigned int),
93 .mode = 0644, 94 .mode = 0644,
94 .proc_handler = &proc_doulongvec_ms_jiffies_minmax, 95 .proc_handler = &proc_dointvec_minmax,
95 .strategy = &sctp_sysctl_jiffies_ms, 96 .strategy = &sysctl_intvec,
96 .extra1 = &rto_timer_min, 97 .extra1 = &one,
97 .extra2 = &rto_timer_max 98 .extra2 = &timer_max
98 }, 99 },
99 { 100 {
100 .ctl_name = NET_SCTP_MAX_BURST, 101 .ctl_name = NET_SCTP_MAX_BURST,
@@ -102,7 +103,10 @@ static ctl_table sctp_table[] = {
102 .data = &sctp_max_burst, 103 .data = &sctp_max_burst,
103 .maxlen = sizeof(int), 104 .maxlen = sizeof(int),
104 .mode = 0644, 105 .mode = 0644,
105 .proc_handler = &proc_dointvec 106 .proc_handler = &proc_dointvec_minmax,
107 .strategy = &sysctl_intvec,
108 .extra1 = &zero,
109 .extra2 = &int_max
106 }, 110 },
107 { 111 {
108 .ctl_name = NET_SCTP_ASSOCIATION_MAX_RETRANS, 112 .ctl_name = NET_SCTP_ASSOCIATION_MAX_RETRANS,
@@ -110,7 +114,10 @@ static ctl_table sctp_table[] = {
110 .data = &sctp_max_retrans_association, 114 .data = &sctp_max_retrans_association,
111 .maxlen = sizeof(int), 115 .maxlen = sizeof(int),
112 .mode = 0644, 116 .mode = 0644,
113 .proc_handler = &proc_dointvec 117 .proc_handler = &proc_dointvec_minmax,
118 .strategy = &sysctl_intvec,
119 .extra1 = &one,
120 .extra2 = &int_max
114 }, 121 },
115 { 122 {
116 .ctl_name = NET_SCTP_SNDBUF_POLICY, 123 .ctl_name = NET_SCTP_SNDBUF_POLICY,
@@ -118,7 +125,8 @@ static ctl_table sctp_table[] = {
118 .data = &sctp_sndbuf_policy, 125 .data = &sctp_sndbuf_policy,
119 .maxlen = sizeof(int), 126 .maxlen = sizeof(int),
120 .mode = 0644, 127 .mode = 0644,
121 .proc_handler = &proc_dointvec 128 .proc_handler = &proc_dointvec,
129 .strategy = &sysctl_intvec
122 }, 130 },
123 { 131 {
124 .ctl_name = NET_SCTP_RCVBUF_POLICY, 132 .ctl_name = NET_SCTP_RCVBUF_POLICY,
@@ -126,7 +134,8 @@ static ctl_table sctp_table[] = {
126 .data = &sctp_rcvbuf_policy, 134 .data = &sctp_rcvbuf_policy,
127 .maxlen = sizeof(int), 135 .maxlen = sizeof(int),
128 .mode = 0644, 136 .mode = 0644,
129 .proc_handler = &proc_dointvec 137 .proc_handler = &proc_dointvec,
138 .strategy = &sysctl_intvec
130 }, 139 },
131 { 140 {
132 .ctl_name = NET_SCTP_PATH_MAX_RETRANS, 141 .ctl_name = NET_SCTP_PATH_MAX_RETRANS,
@@ -134,7 +143,10 @@ static ctl_table sctp_table[] = {
134 .data = &sctp_max_retrans_path, 143 .data = &sctp_max_retrans_path,
135 .maxlen = sizeof(int), 144 .maxlen = sizeof(int),
136 .mode = 0644, 145 .mode = 0644,
137 .proc_handler = &proc_dointvec 146 .proc_handler = &proc_dointvec_minmax,
147 .strategy = &sysctl_intvec,
148 .extra1 = &one,
149 .extra2 = &int_max
138 }, 150 },
139 { 151 {
140 .ctl_name = NET_SCTP_MAX_INIT_RETRANSMITS, 152 .ctl_name = NET_SCTP_MAX_INIT_RETRANSMITS,
@@ -142,18 +154,21 @@ static ctl_table sctp_table[] = {
142 .data = &sctp_max_retrans_init, 154 .data = &sctp_max_retrans_init,
143 .maxlen = sizeof(int), 155 .maxlen = sizeof(int),
144 .mode = 0644, 156 .mode = 0644,
145 .proc_handler = &proc_dointvec 157 .proc_handler = &proc_dointvec_minmax,
158 .strategy = &sysctl_intvec,
159 .extra1 = &one,
160 .extra2 = &int_max
146 }, 161 },
147 { 162 {
148 .ctl_name = NET_SCTP_HB_INTERVAL, 163 .ctl_name = NET_SCTP_HB_INTERVAL,
149 .procname = "hb_interval", 164 .procname = "hb_interval",
150 .data = &sctp_hb_interval, 165 .data = &sctp_hb_interval,
151 .maxlen = sizeof(long), 166 .maxlen = sizeof(unsigned int),
152 .mode = 0644, 167 .mode = 0644,
153 .proc_handler = &proc_doulongvec_ms_jiffies_minmax, 168 .proc_handler = &proc_dointvec_minmax,
154 .strategy = &sctp_sysctl_jiffies_ms, 169 .strategy = &sysctl_intvec,
155 .extra1 = &rto_timer_min, 170 .extra1 = &one,
156 .extra2 = &rto_timer_max 171 .extra2 = &timer_max
157 }, 172 },
158 { 173 {
159 .ctl_name = NET_SCTP_PRESERVE_ENABLE, 174 .ctl_name = NET_SCTP_PRESERVE_ENABLE,
@@ -161,23 +176,26 @@ static ctl_table sctp_table[] = {
161 .data = &sctp_cookie_preserve_enable, 176 .data = &sctp_cookie_preserve_enable,
162 .maxlen = sizeof(int), 177 .maxlen = sizeof(int),
163 .mode = 0644, 178 .mode = 0644,
164 .proc_handler = &proc_dointvec 179 .proc_handler = &proc_dointvec,
180 .strategy = &sysctl_intvec
165 }, 181 },
166 { 182 {
167 .ctl_name = NET_SCTP_RTO_ALPHA, 183 .ctl_name = NET_SCTP_RTO_ALPHA,
168 .procname = "rto_alpha_exp_divisor", 184 .procname = "rto_alpha_exp_divisor",
169 .data = &sctp_rto_alpha, 185 .data = &sctp_rto_alpha,
170 .maxlen = sizeof(int), 186 .maxlen = sizeof(int),
171 .mode = 0644, 187 .mode = 0444,
172 .proc_handler = &proc_dointvec 188 .proc_handler = &proc_dointvec,
189 .strategy = &sysctl_intvec
173 }, 190 },
174 { 191 {
175 .ctl_name = NET_SCTP_RTO_BETA, 192 .ctl_name = NET_SCTP_RTO_BETA,
176 .procname = "rto_beta_exp_divisor", 193 .procname = "rto_beta_exp_divisor",
177 .data = &sctp_rto_beta, 194 .data = &sctp_rto_beta,
178 .maxlen = sizeof(int), 195 .maxlen = sizeof(int),
179 .mode = 0644, 196 .mode = 0444,
180 .proc_handler = &proc_dointvec 197 .proc_handler = &proc_dointvec,
198 .strategy = &sysctl_intvec
181 }, 199 },
182 { 200 {
183 .ctl_name = NET_SCTP_ADDIP_ENABLE, 201 .ctl_name = NET_SCTP_ADDIP_ENABLE,
@@ -185,7 +203,8 @@ static ctl_table sctp_table[] = {
185 .data = &sctp_addip_enable, 203 .data = &sctp_addip_enable,
186 .maxlen = sizeof(int), 204 .maxlen = sizeof(int),
187 .mode = 0644, 205 .mode = 0644,
188 .proc_handler = &proc_dointvec 206 .proc_handler = &proc_dointvec,
207 .strategy = &sysctl_intvec
189 }, 208 },
190 { 209 {
191 .ctl_name = NET_SCTP_PRSCTP_ENABLE, 210 .ctl_name = NET_SCTP_PRSCTP_ENABLE,
@@ -193,7 +212,8 @@ static ctl_table sctp_table[] = {
193 .data = &sctp_prsctp_enable, 212 .data = &sctp_prsctp_enable,
194 .maxlen = sizeof(int), 213 .maxlen = sizeof(int),
195 .mode = 0644, 214 .mode = 0644,
196 .proc_handler = &proc_dointvec 215 .proc_handler = &proc_dointvec,
216 .strategy = &sysctl_intvec
197 }, 217 },
198 { 218 {
199 .ctl_name = NET_SCTP_SACK_TIMEOUT, 219 .ctl_name = NET_SCTP_SACK_TIMEOUT,
@@ -201,8 +221,8 @@ static ctl_table sctp_table[] = {
201 .data = &sctp_sack_timeout, 221 .data = &sctp_sack_timeout,
202 .maxlen = sizeof(long), 222 .maxlen = sizeof(long),
203 .mode = 0644, 223 .mode = 0644,
204 .proc_handler = &proc_doulongvec_ms_jiffies_minmax, 224 .proc_handler = &proc_dointvec_minmax,
205 .strategy = &sctp_sysctl_jiffies_ms, 225 .strategy = &sysctl_intvec,
206 .extra1 = &sack_timer_min, 226 .extra1 = &sack_timer_min,
207 .extra2 = &sack_timer_max, 227 .extra2 = &sack_timer_max,
208 }, 228 },
@@ -242,37 +262,3 @@ void sctp_sysctl_unregister(void)
242{ 262{
243 unregister_sysctl_table(sctp_sysctl_header); 263 unregister_sysctl_table(sctp_sysctl_header);
244} 264}
245
246/* Strategy function to convert jiffies to milliseconds. */
247static int sctp_sysctl_jiffies_ms(ctl_table *table, int __user *name, int nlen,
248 void __user *oldval, size_t __user *oldlenp,
249 void __user *newval, size_t newlen, void **context) {
250
251 if (oldval) {
252 size_t olen;
253
254 if (oldlenp) {
255 if (get_user(olen, oldlenp))
256 return -EFAULT;
257
258 if (olen != sizeof (int))
259 return -EINVAL;
260 }
261 if (put_user((*(int *)(table->data) * 1000) / HZ,
262 (int __user *)oldval) ||
263 (oldlenp && put_user(sizeof (int), oldlenp)))
264 return -EFAULT;
265 }
266 if (newval && newlen) {
267 int new;
268
269 if (newlen != sizeof (int))
270 return -EINVAL;
271
272 if (get_user(new, (int __user *)newval))
273 return -EFAULT;
274
275 *(int *)(table->data) = (new * HZ) / 1000;
276 }
277 return 1;
278}
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 2763aa93de1a..3e5936a5f671 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -75,7 +75,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
75 * parameter 'RTO.Initial'. 75 * parameter 'RTO.Initial'.
76 */ 76 */
77 peer->rtt = 0; 77 peer->rtt = 0;
78 peer->rto = sctp_rto_initial; 78 peer->rto = msecs_to_jiffies(sctp_rto_initial);
79 peer->rttvar = 0; 79 peer->rttvar = 0;
80 peer->srtt = 0; 80 peer->srtt = 0;
81 peer->rto_pending = 0; 81 peer->rto_pending = 0;
diff --git a/net/socket.c b/net/socket.c
index 6d261bf206fc..1bc4167e0da8 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -42,7 +42,7 @@
42 * Andi Kleen : Some small cleanups, optimizations, 42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug. 43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) 44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
45 * Tigran Aivazian : Made listen(2) backlog sanity checks 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
46 * protocol-independent 46 * protocol-independent
47 * 47 *
48 * 48 *
@@ -53,17 +53,17 @@
53 * 53 *
54 * 54 *
55 * This module is effectively the top level interface to the BSD socket 55 * This module is effectively the top level interface to the BSD socket
56 * paradigm. 56 * paradigm.
57 * 57 *
58 * Based upon Swansea University Computer Society NET3.039 58 * Based upon Swansea University Computer Society NET3.039
59 */ 59 */
60 60
61#include <linux/mm.h> 61#include <linux/mm.h>
62#include <linux/smp_lock.h>
63#include <linux/socket.h> 62#include <linux/socket.h>
64#include <linux/file.h> 63#include <linux/file.h>
65#include <linux/net.h> 64#include <linux/net.h>
66#include <linux/interrupt.h> 65#include <linux/interrupt.h>
66#include <linux/rcupdate.h>
67#include <linux/netdevice.h> 67#include <linux/netdevice.h>
68#include <linux/proc_fs.h> 68#include <linux/proc_fs.h>
69#include <linux/seq_file.h> 69#include <linux/seq_file.h>
@@ -96,25 +96,24 @@
96 96
97static int sock_no_open(struct inode *irrelevant, struct file *dontcare); 97static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
98static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf, 98static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf,
99 size_t size, loff_t pos); 99 size_t size, loff_t pos);
100static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf, 100static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf,
101 size_t size, loff_t pos); 101 size_t size, loff_t pos);
102static int sock_mmap(struct file *file, struct vm_area_struct * vma); 102static int sock_mmap(struct file *file, struct vm_area_struct *vma);
103 103
104static int sock_close(struct inode *inode, struct file *file); 104static int sock_close(struct inode *inode, struct file *file);
105static unsigned int sock_poll(struct file *file, 105static unsigned int sock_poll(struct file *file,
106 struct poll_table_struct *wait); 106 struct poll_table_struct *wait);
107static long sock_ioctl(struct file *file, 107static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
108 unsigned int cmd, unsigned long arg);
109#ifdef CONFIG_COMPAT 108#ifdef CONFIG_COMPAT
110static long compat_sock_ioctl(struct file *file, 109static long compat_sock_ioctl(struct file *file,
111 unsigned int cmd, unsigned long arg); 110 unsigned int cmd, unsigned long arg);
112#endif 111#endif
113static int sock_fasync(int fd, struct file *filp, int on); 112static int sock_fasync(int fd, struct file *filp, int on);
114static ssize_t sock_readv(struct file *file, const struct iovec *vector, 113static ssize_t sock_readv(struct file *file, const struct iovec *vector,
115 unsigned long count, loff_t *ppos); 114 unsigned long count, loff_t *ppos);
116static ssize_t sock_writev(struct file *file, const struct iovec *vector, 115static ssize_t sock_writev(struct file *file, const struct iovec *vector,
117 unsigned long count, loff_t *ppos); 116 unsigned long count, loff_t *ppos);
118static ssize_t sock_sendpage(struct file *file, struct page *page, 117static ssize_t sock_sendpage(struct file *file, struct page *page,
119 int offset, size_t size, loff_t *ppos, int more); 118 int offset, size_t size, loff_t *ppos, int more);
120 119
@@ -147,52 +146,8 @@ static struct file_operations socket_file_ops = {
147 * The protocol list. Each protocol is registered in here. 146 * The protocol list. Each protocol is registered in here.
148 */ 147 */
149 148
150static struct net_proto_family *net_families[NPROTO];
151
152#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
153static atomic_t net_family_lockct = ATOMIC_INIT(0);
154static DEFINE_SPINLOCK(net_family_lock); 149static DEFINE_SPINLOCK(net_family_lock);
155 150static const struct net_proto_family *net_families[NPROTO] __read_mostly;
156/* The strategy is: modifications net_family vector are short, do not
157 sleep and veeery rare, but read access should be free of any exclusive
158 locks.
159 */
160
161static void net_family_write_lock(void)
162{
163 spin_lock(&net_family_lock);
164 while (atomic_read(&net_family_lockct) != 0) {
165 spin_unlock(&net_family_lock);
166
167 yield();
168
169 spin_lock(&net_family_lock);
170 }
171}
172
173static __inline__ void net_family_write_unlock(void)
174{
175 spin_unlock(&net_family_lock);
176}
177
178static __inline__ void net_family_read_lock(void)
179{
180 atomic_inc(&net_family_lockct);
181 spin_unlock_wait(&net_family_lock);
182}
183
184static __inline__ void net_family_read_unlock(void)
185{
186 atomic_dec(&net_family_lockct);
187}
188
189#else
190#define net_family_write_lock() do { } while(0)
191#define net_family_write_unlock() do { } while(0)
192#define net_family_read_lock() do { } while(0)
193#define net_family_read_unlock() do { } while(0)
194#endif
195
196 151
197/* 152/*
198 * Statistics counters of the socket lists 153 * Statistics counters of the socket lists
@@ -201,19 +156,20 @@ static __inline__ void net_family_read_unlock(void)
201static DEFINE_PER_CPU(int, sockets_in_use) = 0; 156static DEFINE_PER_CPU(int, sockets_in_use) = 0;
202 157
203/* 158/*
204 * Support routines. Move socket addresses back and forth across the kernel/user 159 * Support routines.
205 * divide and look after the messy bits. 160 * Move socket addresses back and forth across the kernel/user
161 * divide and look after the messy bits.
206 */ 162 */
207 163
208#define MAX_SOCK_ADDR 128 /* 108 for Unix domain - 164#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
209 16 for IP, 16 for IPX, 165 16 for IP, 16 for IPX,
210 24 for IPv6, 166 24 for IPv6,
211 about 80 for AX.25 167 about 80 for AX.25
212 must be at least one bigger than 168 must be at least one bigger than
213 the AF_UNIX size (see net/unix/af_unix.c 169 the AF_UNIX size (see net/unix/af_unix.c
214 :unix_mkname()). 170 :unix_mkname()).
215 */ 171 */
216 172
217/** 173/**
218 * move_addr_to_kernel - copy a socket address into kernel space 174 * move_addr_to_kernel - copy a socket address into kernel space
219 * @uaddr: Address in user space 175 * @uaddr: Address in user space
@@ -227,11 +183,11 @@ static DEFINE_PER_CPU(int, sockets_in_use) = 0;
227 183
228int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr) 184int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
229{ 185{
230 if(ulen<0||ulen>MAX_SOCK_ADDR) 186 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
231 return -EINVAL; 187 return -EINVAL;
232 if(ulen==0) 188 if (ulen == 0)
233 return 0; 189 return 0;
234 if(copy_from_user(kaddr,uaddr,ulen)) 190 if (copy_from_user(kaddr, uaddr, ulen))
235 return -EFAULT; 191 return -EFAULT;
236 return audit_sockaddr(ulen, kaddr); 192 return audit_sockaddr(ulen, kaddr);
237} 193}
@@ -252,51 +208,52 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
252 * length of the data is written over the length limit the user 208 * length of the data is written over the length limit the user
253 * specified. Zero is returned for a success. 209 * specified. Zero is returned for a success.
254 */ 210 */
255 211
256int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ulen) 212int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
213 int __user *ulen)
257{ 214{
258 int err; 215 int err;
259 int len; 216 int len;
260 217
261 if((err=get_user(len, ulen))) 218 err = get_user(len, ulen);
219 if (err)
262 return err; 220 return err;
263 if(len>klen) 221 if (len > klen)
264 len=klen; 222 len = klen;
265 if(len<0 || len> MAX_SOCK_ADDR) 223 if (len < 0 || len > MAX_SOCK_ADDR)
266 return -EINVAL; 224 return -EINVAL;
267 if(len) 225 if (len) {
268 {
269 if (audit_sockaddr(klen, kaddr)) 226 if (audit_sockaddr(klen, kaddr))
270 return -ENOMEM; 227 return -ENOMEM;
271 if(copy_to_user(uaddr,kaddr,len)) 228 if (copy_to_user(uaddr, kaddr, len))
272 return -EFAULT; 229 return -EFAULT;
273 } 230 }
274 /* 231 /*
275 * "fromlen shall refer to the value before truncation.." 232 * "fromlen shall refer to the value before truncation.."
276 * 1003.1g 233 * 1003.1g
277 */ 234 */
278 return __put_user(klen, ulen); 235 return __put_user(klen, ulen);
279} 236}
280 237
281#define SOCKFS_MAGIC 0x534F434B 238#define SOCKFS_MAGIC 0x534F434B
282 239
283static kmem_cache_t * sock_inode_cachep __read_mostly; 240static kmem_cache_t *sock_inode_cachep __read_mostly;
284 241
285static struct inode *sock_alloc_inode(struct super_block *sb) 242static struct inode *sock_alloc_inode(struct super_block *sb)
286{ 243{
287 struct socket_alloc *ei; 244 struct socket_alloc *ei;
288 ei = (struct socket_alloc *)kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL); 245
246 ei = kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL);
289 if (!ei) 247 if (!ei)
290 return NULL; 248 return NULL;
291 init_waitqueue_head(&ei->socket.wait); 249 init_waitqueue_head(&ei->socket.wait);
292 250
293 ei->socket.fasync_list = NULL; 251 ei->socket.fasync_list = NULL;
294 ei->socket.state = SS_UNCONNECTED; 252 ei->socket.state = SS_UNCONNECTED;
295 ei->socket.flags = 0; 253 ei->socket.flags = 0;
296 ei->socket.ops = NULL; 254 ei->socket.ops = NULL;
297 ei->socket.sk = NULL; 255 ei->socket.sk = NULL;
298 ei->socket.file = NULL; 256 ei->socket.file = NULL;
299 ei->socket.flags = 0;
300 257
301 return &ei->vfs_inode; 258 return &ei->vfs_inode;
302} 259}
@@ -307,22 +264,25 @@ static void sock_destroy_inode(struct inode *inode)
307 container_of(inode, struct socket_alloc, vfs_inode)); 264 container_of(inode, struct socket_alloc, vfs_inode));
308} 265}
309 266
310static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) 267static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
311{ 268{
312 struct socket_alloc *ei = (struct socket_alloc *) foo; 269 struct socket_alloc *ei = (struct socket_alloc *)foo;
313 270
314 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 271 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR))
315 SLAB_CTOR_CONSTRUCTOR) 272 == SLAB_CTOR_CONSTRUCTOR)
316 inode_init_once(&ei->vfs_inode); 273 inode_init_once(&ei->vfs_inode);
317} 274}
318 275
319static int init_inodecache(void) 276static int init_inodecache(void)
320{ 277{
321 sock_inode_cachep = kmem_cache_create("sock_inode_cache", 278 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
322 sizeof(struct socket_alloc), 279 sizeof(struct socket_alloc),
323 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| 280 0,
324 SLAB_MEM_SPREAD), 281 (SLAB_HWCACHE_ALIGN |
325 init_once, NULL); 282 SLAB_RECLAIM_ACCOUNT |
283 SLAB_MEM_SPREAD),
284 init_once,
285 NULL);
326 if (sock_inode_cachep == NULL) 286 if (sock_inode_cachep == NULL)
327 return -ENOMEM; 287 return -ENOMEM;
328 return 0; 288 return 0;
@@ -335,7 +295,8 @@ static struct super_operations sockfs_ops = {
335}; 295};
336 296
337static int sockfs_get_sb(struct file_system_type *fs_type, 297static int sockfs_get_sb(struct file_system_type *fs_type,
338 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 298 int flags, const char *dev_name, void *data,
299 struct vfsmount *mnt)
339{ 300{
340 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, 301 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
341 mnt); 302 mnt);
@@ -348,12 +309,13 @@ static struct file_system_type sock_fs_type = {
348 .get_sb = sockfs_get_sb, 309 .get_sb = sockfs_get_sb,
349 .kill_sb = kill_anon_super, 310 .kill_sb = kill_anon_super,
350}; 311};
312
351static int sockfs_delete_dentry(struct dentry *dentry) 313static int sockfs_delete_dentry(struct dentry *dentry)
352{ 314{
353 return 1; 315 return 1;
354} 316}
355static struct dentry_operations sockfs_dentry_operations = { 317static struct dentry_operations sockfs_dentry_operations = {
356 .d_delete = sockfs_delete_dentry, 318 .d_delete = sockfs_delete_dentry,
357}; 319};
358 320
359/* 321/*
@@ -477,10 +439,12 @@ struct socket *sockfd_lookup(int fd, int *err)
477 struct file *file; 439 struct file *file;
478 struct socket *sock; 440 struct socket *sock;
479 441
480 if (!(file = fget(fd))) { 442 file = fget(fd);
443 if (!file) {
481 *err = -EBADF; 444 *err = -EBADF;
482 return NULL; 445 return NULL;
483 } 446 }
447
484 sock = sock_from_file(file, err); 448 sock = sock_from_file(file, err);
485 if (!sock) 449 if (!sock)
486 fput(file); 450 fput(file);
@@ -505,7 +469,7 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
505 469
506/** 470/**
507 * sock_alloc - allocate a socket 471 * sock_alloc - allocate a socket
508 * 472 *
509 * Allocate a new inode and socket object. The two are bound together 473 * Allocate a new inode and socket object. The two are bound together
510 * and initialised. The socket is then returned. If we are out of inodes 474 * and initialised. The socket is then returned. If we are out of inodes
511 * NULL is returned. 475 * NULL is returned.
@@ -513,8 +477,8 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
513 477
514static struct socket *sock_alloc(void) 478static struct socket *sock_alloc(void)
515{ 479{
516 struct inode * inode; 480 struct inode *inode;
517 struct socket * sock; 481 struct socket *sock;
518 482
519 inode = new_inode(sock_mnt->mnt_sb); 483 inode = new_inode(sock_mnt->mnt_sb);
520 if (!inode) 484 if (!inode)
@@ -522,7 +486,7 @@ static struct socket *sock_alloc(void)
522 486
523 sock = SOCKET_I(inode); 487 sock = SOCKET_I(inode);
524 488
525 inode->i_mode = S_IFSOCK|S_IRWXUGO; 489 inode->i_mode = S_IFSOCK | S_IRWXUGO;
526 inode->i_uid = current->fsuid; 490 inode->i_uid = current->fsuid;
527 inode->i_gid = current->fsgid; 491 inode->i_gid = current->fsgid;
528 492
@@ -536,7 +500,7 @@ static struct socket *sock_alloc(void)
536 * a back door. Remember to keep it shut otherwise you'll let the 500 * a back door. Remember to keep it shut otherwise you'll let the
537 * creepy crawlies in. 501 * creepy crawlies in.
538 */ 502 */
539 503
540static int sock_no_open(struct inode *irrelevant, struct file *dontcare) 504static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
541{ 505{
542 return -ENXIO; 506 return -ENXIO;
@@ -553,9 +517,9 @@ const struct file_operations bad_sock_fops = {
553 * 517 *
554 * The socket is released from the protocol stack if it has a release 518 * The socket is released from the protocol stack if it has a release
555 * callback, and the inode is then released if the socket is bound to 519 * callback, and the inode is then released if the socket is bound to
556 * an inode not a file. 520 * an inode not a file.
557 */ 521 */
558 522
559void sock_release(struct socket *sock) 523void sock_release(struct socket *sock)
560{ 524{
561 if (sock->ops) { 525 if (sock->ops) {
@@ -575,10 +539,10 @@ void sock_release(struct socket *sock)
575 iput(SOCK_INODE(sock)); 539 iput(SOCK_INODE(sock));
576 return; 540 return;
577 } 541 }
578 sock->file=NULL; 542 sock->file = NULL;
579} 543}
580 544
581static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, 545static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
582 struct msghdr *msg, size_t size) 546 struct msghdr *msg, size_t size)
583{ 547{
584 struct sock_iocb *si = kiocb_to_siocb(iocb); 548 struct sock_iocb *si = kiocb_to_siocb(iocb);
@@ -621,14 +585,14 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
621 * the following is safe, since for compiler definitions of kvec and 585 * the following is safe, since for compiler definitions of kvec and
622 * iovec are identical, yielding the same in-core layout and alignment 586 * iovec are identical, yielding the same in-core layout and alignment
623 */ 587 */
624 msg->msg_iov = (struct iovec *)vec, 588 msg->msg_iov = (struct iovec *)vec;
625 msg->msg_iovlen = num; 589 msg->msg_iovlen = num;
626 result = sock_sendmsg(sock, msg, size); 590 result = sock_sendmsg(sock, msg, size);
627 set_fs(oldfs); 591 set_fs(oldfs);
628 return result; 592 return result;
629} 593}
630 594
631static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, 595static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
632 struct msghdr *msg, size_t size, int flags) 596 struct msghdr *msg, size_t size, int flags)
633{ 597{
634 int err; 598 int err;
@@ -647,14 +611,14 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
647 return sock->ops->recvmsg(iocb, sock, msg, size, flags); 611 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
648} 612}
649 613
650int sock_recvmsg(struct socket *sock, struct msghdr *msg, 614int sock_recvmsg(struct socket *sock, struct msghdr *msg,
651 size_t size, int flags) 615 size_t size, int flags)
652{ 616{
653 struct kiocb iocb; 617 struct kiocb iocb;
654 struct sock_iocb siocb; 618 struct sock_iocb siocb;
655 int ret; 619 int ret;
656 620
657 init_sync_kiocb(&iocb, NULL); 621 init_sync_kiocb(&iocb, NULL);
658 iocb.private = &siocb; 622 iocb.private = &siocb;
659 ret = __sock_recvmsg(&iocb, sock, msg, size, flags); 623 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
660 if (-EIOCBQUEUED == ret) 624 if (-EIOCBQUEUED == ret)
@@ -662,9 +626,8 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg,
662 return ret; 626 return ret;
663} 627}
664 628
665int kernel_recvmsg(struct socket *sock, struct msghdr *msg, 629int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
666 struct kvec *vec, size_t num, 630 struct kvec *vec, size_t num, size_t size, int flags)
667 size_t size, int flags)
668{ 631{
669 mm_segment_t oldfs = get_fs(); 632 mm_segment_t oldfs = get_fs();
670 int result; 633 int result;
@@ -674,8 +637,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
674 * the following is safe, since for compiler definitions of kvec and 637 * the following is safe, since for compiler definitions of kvec and
675 * iovec are identical, yielding the same in-core layout and alignment 638 * iovec are identical, yielding the same in-core layout and alignment
676 */ 639 */
677 msg->msg_iov = (struct iovec *)vec, 640 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
678 msg->msg_iovlen = num;
679 result = sock_recvmsg(sock, msg, size, flags); 641 result = sock_recvmsg(sock, msg, size, flags);
680 set_fs(oldfs); 642 set_fs(oldfs);
681 return result; 643 return result;
@@ -702,7 +664,8 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
702} 664}
703 665
704static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, 666static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
705 char __user *ubuf, size_t size, struct sock_iocb *siocb) 667 char __user *ubuf, size_t size,
668 struct sock_iocb *siocb)
706{ 669{
707 if (!is_sync_kiocb(iocb)) { 670 if (!is_sync_kiocb(iocb)) {
708 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); 671 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
@@ -720,20 +683,21 @@ static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
720} 683}
721 684
722static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, 685static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
723 struct file *file, struct iovec *iov, unsigned long nr_segs) 686 struct file *file, struct iovec *iov,
687 unsigned long nr_segs)
724{ 688{
725 struct socket *sock = file->private_data; 689 struct socket *sock = file->private_data;
726 size_t size = 0; 690 size_t size = 0;
727 int i; 691 int i;
728 692
729 for (i = 0 ; i < nr_segs ; i++) 693 for (i = 0; i < nr_segs; i++)
730 size += iov[i].iov_len; 694 size += iov[i].iov_len;
731 695
732 msg->msg_name = NULL; 696 msg->msg_name = NULL;
733 msg->msg_namelen = 0; 697 msg->msg_namelen = 0;
734 msg->msg_control = NULL; 698 msg->msg_control = NULL;
735 msg->msg_controllen = 0; 699 msg->msg_controllen = 0;
736 msg->msg_iov = (struct iovec *) iov; 700 msg->msg_iov = (struct iovec *)iov;
737 msg->msg_iovlen = nr_segs; 701 msg->msg_iovlen = nr_segs;
738 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; 702 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
739 703
@@ -748,7 +712,7 @@ static ssize_t sock_readv(struct file *file, const struct iovec *iov,
748 struct msghdr msg; 712 struct msghdr msg;
749 int ret; 713 int ret;
750 714
751 init_sync_kiocb(&iocb, NULL); 715 init_sync_kiocb(&iocb, NULL);
752 iocb.private = &siocb; 716 iocb.private = &siocb;
753 717
754 ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs); 718 ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
@@ -758,7 +722,7 @@ static ssize_t sock_readv(struct file *file, const struct iovec *iov,
758} 722}
759 723
760static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf, 724static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
761 size_t count, loff_t pos) 725 size_t count, loff_t pos)
762{ 726{
763 struct sock_iocb siocb, *x; 727 struct sock_iocb siocb, *x;
764 728
@@ -771,24 +735,25 @@ static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
771 if (!x) 735 if (!x)
772 return -ENOMEM; 736 return -ENOMEM;
773 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, 737 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp,
774 &x->async_iov, 1); 738 &x->async_iov, 1);
775} 739}
776 740
777static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, 741static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
778 struct file *file, struct iovec *iov, unsigned long nr_segs) 742 struct file *file, struct iovec *iov,
743 unsigned long nr_segs)
779{ 744{
780 struct socket *sock = file->private_data; 745 struct socket *sock = file->private_data;
781 size_t size = 0; 746 size_t size = 0;
782 int i; 747 int i;
783 748
784 for (i = 0 ; i < nr_segs ; i++) 749 for (i = 0; i < nr_segs; i++)
785 size += iov[i].iov_len; 750 size += iov[i].iov_len;
786 751
787 msg->msg_name = NULL; 752 msg->msg_name = NULL;
788 msg->msg_namelen = 0; 753 msg->msg_namelen = 0;
789 msg->msg_control = NULL; 754 msg->msg_control = NULL;
790 msg->msg_controllen = 0; 755 msg->msg_controllen = 0;
791 msg->msg_iov = (struct iovec *) iov; 756 msg->msg_iov = (struct iovec *)iov;
792 msg->msg_iovlen = nr_segs; 757 msg->msg_iovlen = nr_segs;
793 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; 758 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
794 if (sock->type == SOCK_SEQPACKET) 759 if (sock->type == SOCK_SEQPACKET)
@@ -815,7 +780,7 @@ static ssize_t sock_writev(struct file *file, const struct iovec *iov,
815} 780}
816 781
817static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf, 782static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
818 size_t count, loff_t pos) 783 size_t count, loff_t pos)
819{ 784{
820 struct sock_iocb siocb, *x; 785 struct sock_iocb siocb, *x;
821 786
@@ -829,46 +794,48 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
829 return -ENOMEM; 794 return -ENOMEM;
830 795
831 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, 796 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp,
832 &x->async_iov, 1); 797 &x->async_iov, 1);
833} 798}
834 799
835
836/* 800/*
837 * Atomic setting of ioctl hooks to avoid race 801 * Atomic setting of ioctl hooks to avoid race
838 * with module unload. 802 * with module unload.
839 */ 803 */
840 804
841static DEFINE_MUTEX(br_ioctl_mutex); 805static DEFINE_MUTEX(br_ioctl_mutex);
842static int (*br_ioctl_hook)(unsigned int cmd, void __user *arg) = NULL; 806static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
843 807
844void brioctl_set(int (*hook)(unsigned int, void __user *)) 808void brioctl_set(int (*hook) (unsigned int, void __user *))
845{ 809{
846 mutex_lock(&br_ioctl_mutex); 810 mutex_lock(&br_ioctl_mutex);
847 br_ioctl_hook = hook; 811 br_ioctl_hook = hook;
848 mutex_unlock(&br_ioctl_mutex); 812 mutex_unlock(&br_ioctl_mutex);
849} 813}
814
850EXPORT_SYMBOL(brioctl_set); 815EXPORT_SYMBOL(brioctl_set);
851 816
852static DEFINE_MUTEX(vlan_ioctl_mutex); 817static DEFINE_MUTEX(vlan_ioctl_mutex);
853static int (*vlan_ioctl_hook)(void __user *arg); 818static int (*vlan_ioctl_hook) (void __user *arg);
854 819
855void vlan_ioctl_set(int (*hook)(void __user *)) 820void vlan_ioctl_set(int (*hook) (void __user *))
856{ 821{
857 mutex_lock(&vlan_ioctl_mutex); 822 mutex_lock(&vlan_ioctl_mutex);
858 vlan_ioctl_hook = hook; 823 vlan_ioctl_hook = hook;
859 mutex_unlock(&vlan_ioctl_mutex); 824 mutex_unlock(&vlan_ioctl_mutex);
860} 825}
826
861EXPORT_SYMBOL(vlan_ioctl_set); 827EXPORT_SYMBOL(vlan_ioctl_set);
862 828
863static DEFINE_MUTEX(dlci_ioctl_mutex); 829static DEFINE_MUTEX(dlci_ioctl_mutex);
864static int (*dlci_ioctl_hook)(unsigned int, void __user *); 830static int (*dlci_ioctl_hook) (unsigned int, void __user *);
865 831
866void dlci_ioctl_set(int (*hook)(unsigned int, void __user *)) 832void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
867{ 833{
868 mutex_lock(&dlci_ioctl_mutex); 834 mutex_lock(&dlci_ioctl_mutex);
869 dlci_ioctl_hook = hook; 835 dlci_ioctl_hook = hook;
870 mutex_unlock(&dlci_ioctl_mutex); 836 mutex_unlock(&dlci_ioctl_mutex);
871} 837}
838
872EXPORT_SYMBOL(dlci_ioctl_set); 839EXPORT_SYMBOL(dlci_ioctl_set);
873 840
874/* 841/*
@@ -890,8 +857,8 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
890 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { 857 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
891 err = dev_ioctl(cmd, argp); 858 err = dev_ioctl(cmd, argp);
892 } else 859 } else
893#endif /* CONFIG_WIRELESS_EXT */ 860#endif /* CONFIG_WIRELESS_EXT */
894 switch (cmd) { 861 switch (cmd) {
895 case FIOSETOWN: 862 case FIOSETOWN:
896 case SIOCSPGRP: 863 case SIOCSPGRP:
897 err = -EFAULT; 864 err = -EFAULT;
@@ -901,7 +868,8 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
901 break; 868 break;
902 case FIOGETOWN: 869 case FIOGETOWN:
903 case SIOCGPGRP: 870 case SIOCGPGRP:
904 err = put_user(sock->file->f_owner.pid, (int __user *)argp); 871 err = put_user(sock->file->f_owner.pid,
872 (int __user *)argp);
905 break; 873 break;
906 case SIOCGIFBR: 874 case SIOCGIFBR:
907 case SIOCSIFBR: 875 case SIOCSIFBR:
@@ -912,7 +880,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
912 request_module("bridge"); 880 request_module("bridge");
913 881
914 mutex_lock(&br_ioctl_mutex); 882 mutex_lock(&br_ioctl_mutex);
915 if (br_ioctl_hook) 883 if (br_ioctl_hook)
916 err = br_ioctl_hook(cmd, argp); 884 err = br_ioctl_hook(cmd, argp);
917 mutex_unlock(&br_ioctl_mutex); 885 mutex_unlock(&br_ioctl_mutex);
918 break; 886 break;
@@ -929,7 +897,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
929 break; 897 break;
930 case SIOCGIFDIVERT: 898 case SIOCGIFDIVERT:
931 case SIOCSIFDIVERT: 899 case SIOCSIFDIVERT:
932 /* Convert this to call through a hook */ 900 /* Convert this to call through a hook */
933 err = divert_ioctl(cmd, argp); 901 err = divert_ioctl(cmd, argp);
934 break; 902 break;
935 case SIOCADDDLCI: 903 case SIOCADDDLCI:
@@ -954,7 +922,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
954 if (err == -ENOIOCTLCMD) 922 if (err == -ENOIOCTLCMD)
955 err = dev_ioctl(cmd, argp); 923 err = dev_ioctl(cmd, argp);
956 break; 924 break;
957 } 925 }
958 return err; 926 return err;
959} 927}
960 928
@@ -962,7 +930,7 @@ int sock_create_lite(int family, int type, int protocol, struct socket **res)
962{ 930{
963 int err; 931 int err;
964 struct socket *sock = NULL; 932 struct socket *sock = NULL;
965 933
966 err = security_socket_create(family, type, protocol, 1); 934 err = security_socket_create(family, type, protocol, 1);
967 if (err) 935 if (err)
968 goto out; 936 goto out;
@@ -973,26 +941,33 @@ int sock_create_lite(int family, int type, int protocol, struct socket **res)
973 goto out; 941 goto out;
974 } 942 }
975 943
976 security_socket_post_create(sock, family, type, protocol, 1);
977 sock->type = type; 944 sock->type = type;
945 err = security_socket_post_create(sock, family, type, protocol, 1);
946 if (err)
947 goto out_release;
948
978out: 949out:
979 *res = sock; 950 *res = sock;
980 return err; 951 return err;
952out_release:
953 sock_release(sock);
954 sock = NULL;
955 goto out;
981} 956}
982 957
983/* No kernel lock held - perfect */ 958/* No kernel lock held - perfect */
984static unsigned int sock_poll(struct file *file, poll_table * wait) 959static unsigned int sock_poll(struct file *file, poll_table *wait)
985{ 960{
986 struct socket *sock; 961 struct socket *sock;
987 962
988 /* 963 /*
989 * We can't return errors to poll, so it's either yes or no. 964 * We can't return errors to poll, so it's either yes or no.
990 */ 965 */
991 sock = file->private_data; 966 sock = file->private_data;
992 return sock->ops->poll(file, sock, wait); 967 return sock->ops->poll(file, sock, wait);
993} 968}
994 969
995static int sock_mmap(struct file * file, struct vm_area_struct * vma) 970static int sock_mmap(struct file *file, struct vm_area_struct *vma)
996{ 971{
997 struct socket *sock = file->private_data; 972 struct socket *sock = file->private_data;
998 973
@@ -1002,12 +977,11 @@ static int sock_mmap(struct file * file, struct vm_area_struct * vma)
1002static int sock_close(struct inode *inode, struct file *filp) 977static int sock_close(struct inode *inode, struct file *filp)
1003{ 978{
1004 /* 979 /*
1005 * It was possible the inode is NULL we were 980 * It was possible the inode is NULL we were
1006 * closing an unfinished socket. 981 * closing an unfinished socket.
1007 */ 982 */
1008 983
1009 if (!inode) 984 if (!inode) {
1010 {
1011 printk(KERN_DEBUG "sock_close: NULL inode\n"); 985 printk(KERN_DEBUG "sock_close: NULL inode\n");
1012 return 0; 986 return 0;
1013 } 987 }
@@ -1033,57 +1007,52 @@ static int sock_close(struct inode *inode, struct file *filp)
1033 1007
1034static int sock_fasync(int fd, struct file *filp, int on) 1008static int sock_fasync(int fd, struct file *filp, int on)
1035{ 1009{
1036 struct fasync_struct *fa, *fna=NULL, **prev; 1010 struct fasync_struct *fa, *fna = NULL, **prev;
1037 struct socket *sock; 1011 struct socket *sock;
1038 struct sock *sk; 1012 struct sock *sk;
1039 1013
1040 if (on) 1014 if (on) {
1041 {
1042 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); 1015 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
1043 if(fna==NULL) 1016 if (fna == NULL)
1044 return -ENOMEM; 1017 return -ENOMEM;
1045 } 1018 }
1046 1019
1047 sock = filp->private_data; 1020 sock = filp->private_data;
1048 1021
1049 if ((sk=sock->sk) == NULL) { 1022 sk = sock->sk;
1023 if (sk == NULL) {
1050 kfree(fna); 1024 kfree(fna);
1051 return -EINVAL; 1025 return -EINVAL;
1052 } 1026 }
1053 1027
1054 lock_sock(sk); 1028 lock_sock(sk);
1055 1029
1056 prev=&(sock->fasync_list); 1030 prev = &(sock->fasync_list);
1057 1031
1058 for (fa=*prev; fa!=NULL; prev=&fa->fa_next,fa=*prev) 1032 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1059 if (fa->fa_file==filp) 1033 if (fa->fa_file == filp)
1060 break; 1034 break;
1061 1035
1062 if(on) 1036 if (on) {
1063 { 1037 if (fa != NULL) {
1064 if(fa!=NULL)
1065 {
1066 write_lock_bh(&sk->sk_callback_lock); 1038 write_lock_bh(&sk->sk_callback_lock);
1067 fa->fa_fd=fd; 1039 fa->fa_fd = fd;
1068 write_unlock_bh(&sk->sk_callback_lock); 1040 write_unlock_bh(&sk->sk_callback_lock);
1069 1041
1070 kfree(fna); 1042 kfree(fna);
1071 goto out; 1043 goto out;
1072 } 1044 }
1073 fna->fa_file=filp; 1045 fna->fa_file = filp;
1074 fna->fa_fd=fd; 1046 fna->fa_fd = fd;
1075 fna->magic=FASYNC_MAGIC; 1047 fna->magic = FASYNC_MAGIC;
1076 fna->fa_next=sock->fasync_list; 1048 fna->fa_next = sock->fasync_list;
1077 write_lock_bh(&sk->sk_callback_lock); 1049 write_lock_bh(&sk->sk_callback_lock);
1078 sock->fasync_list=fna; 1050 sock->fasync_list = fna;
1079 write_unlock_bh(&sk->sk_callback_lock); 1051 write_unlock_bh(&sk->sk_callback_lock);
1080 } 1052 } else {
1081 else 1053 if (fa != NULL) {
1082 {
1083 if (fa!=NULL)
1084 {
1085 write_lock_bh(&sk->sk_callback_lock); 1054 write_lock_bh(&sk->sk_callback_lock);
1086 *prev=fa->fa_next; 1055 *prev = fa->fa_next;
1087 write_unlock_bh(&sk->sk_callback_lock); 1056 write_unlock_bh(&sk->sk_callback_lock);
1088 kfree(fa); 1057 kfree(fa);
1089 } 1058 }
@@ -1100,10 +1069,9 @@ int sock_wake_async(struct socket *sock, int how, int band)
1100{ 1069{
1101 if (!sock || !sock->fasync_list) 1070 if (!sock || !sock->fasync_list)
1102 return -1; 1071 return -1;
1103 switch (how) 1072 switch (how) {
1104 {
1105 case 1: 1073 case 1:
1106 1074
1107 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) 1075 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1108 break; 1076 break;
1109 goto call_kill; 1077 goto call_kill;
@@ -1112,7 +1080,7 @@ int sock_wake_async(struct socket *sock, int how, int band)
1112 break; 1080 break;
1113 /* fall through */ 1081 /* fall through */
1114 case 0: 1082 case 0:
1115 call_kill: 1083call_kill:
1116 __kill_fasync(sock->fasync_list, SIGIO, band); 1084 __kill_fasync(sock->fasync_list, SIGIO, band);
1117 break; 1085 break;
1118 case 3: 1086 case 3:
@@ -1121,13 +1089,15 @@ int sock_wake_async(struct socket *sock, int how, int band)
1121 return 0; 1089 return 0;
1122} 1090}
1123 1091
1124static int __sock_create(int family, int type, int protocol, struct socket **res, int kern) 1092static int __sock_create(int family, int type, int protocol,
1093 struct socket **res, int kern)
1125{ 1094{
1126 int err; 1095 int err;
1127 struct socket *sock; 1096 struct socket *sock;
1097 const struct net_proto_family *pf;
1128 1098
1129 /* 1099 /*
1130 * Check protocol is in range 1100 * Check protocol is in range
1131 */ 1101 */
1132 if (family < 0 || family >= NPROTO) 1102 if (family < 0 || family >= NPROTO)
1133 return -EAFNOSUPPORT; 1103 return -EAFNOSUPPORT;
@@ -1140,10 +1110,11 @@ static int __sock_create(int family, int type, int protocol, struct socket **res
1140 deadlock in module load. 1110 deadlock in module load.
1141 */ 1111 */
1142 if (family == PF_INET && type == SOCK_PACKET) { 1112 if (family == PF_INET && type == SOCK_PACKET) {
1143 static int warned; 1113 static int warned;
1144 if (!warned) { 1114 if (!warned) {
1145 warned = 1; 1115 warned = 1;
1146 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm); 1116 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1117 current->comm);
1147 } 1118 }
1148 family = PF_PACKET; 1119 family = PF_PACKET;
1149 } 1120 }
@@ -1151,79 +1122,84 @@ static int __sock_create(int family, int type, int protocol, struct socket **res
1151 err = security_socket_create(family, type, protocol, kern); 1122 err = security_socket_create(family, type, protocol, kern);
1152 if (err) 1123 if (err)
1153 return err; 1124 return err;
1154 1125
1126 /*
1127 * Allocate the socket and allow the family to set things up. if
1128 * the protocol is 0, the family is instructed to select an appropriate
1129 * default.
1130 */
1131 sock = sock_alloc();
1132 if (!sock) {
1133 if (net_ratelimit())
1134 printk(KERN_WARNING "socket: no more sockets\n");
1135 return -ENFILE; /* Not exactly a match, but its the
1136 closest posix thing */
1137 }
1138
1139 sock->type = type;
1140
1155#if defined(CONFIG_KMOD) 1141#if defined(CONFIG_KMOD)
1156 /* Attempt to load a protocol module if the find failed. 1142 /* Attempt to load a protocol module if the find failed.
1157 * 1143 *
1158 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 1144 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1159 * requested real, full-featured networking support upon configuration. 1145 * requested real, full-featured networking support upon configuration.
1160 * Otherwise module support will break! 1146 * Otherwise module support will break!
1161 */ 1147 */
1162 if (net_families[family]==NULL) 1148 if (net_families[family] == NULL)
1163 { 1149 request_module("net-pf-%d", family);
1164 request_module("net-pf-%d",family);
1165 }
1166#endif 1150#endif
1167 1151
1168 net_family_read_lock(); 1152 rcu_read_lock();
1169 if (net_families[family] == NULL) { 1153 pf = rcu_dereference(net_families[family]);
1170 err = -EAFNOSUPPORT; 1154 err = -EAFNOSUPPORT;
1171 goto out; 1155 if (!pf)
1172 } 1156 goto out_release;
1173
1174/*
1175 * Allocate the socket and allow the family to set things up. if
1176 * the protocol is 0, the family is instructed to select an appropriate
1177 * default.
1178 */
1179
1180 if (!(sock = sock_alloc())) {
1181 if (net_ratelimit())
1182 printk(KERN_WARNING "socket: no more sockets\n");
1183 err = -ENFILE; /* Not exactly a match, but its the
1184 closest posix thing */
1185 goto out;
1186 }
1187
1188 sock->type = type;
1189 1157
1190 /* 1158 /*
1191 * We will call the ->create function, that possibly is in a loadable 1159 * We will call the ->create function, that possibly is in a loadable
1192 * module, so we have to bump that loadable module refcnt first. 1160 * module, so we have to bump that loadable module refcnt first.
1193 */ 1161 */
1194 err = -EAFNOSUPPORT; 1162 if (!try_module_get(pf->owner))
1195 if (!try_module_get(net_families[family]->owner))
1196 goto out_release; 1163 goto out_release;
1197 1164
1198 if ((err = net_families[family]->create(sock, protocol)) < 0) { 1165 /* Now protected by module ref count */
1199 sock->ops = NULL; 1166 rcu_read_unlock();
1167
1168 err = pf->create(sock, protocol);
1169 if (err < 0)
1200 goto out_module_put; 1170 goto out_module_put;
1201 }
1202 1171
1203 /* 1172 /*
1204 * Now to bump the refcnt of the [loadable] module that owns this 1173 * Now to bump the refcnt of the [loadable] module that owns this
1205 * socket at sock_release time we decrement its refcnt. 1174 * socket at sock_release time we decrement its refcnt.
1206 */ 1175 */
1207 if (!try_module_get(sock->ops->owner)) { 1176 if (!try_module_get(sock->ops->owner))
1208 sock->ops = NULL; 1177 goto out_module_busy;
1209 goto out_module_put; 1178
1210 }
1211 /* 1179 /*
1212 * Now that we're done with the ->create function, the [loadable] 1180 * Now that we're done with the ->create function, the [loadable]
1213 * module can have its refcnt decremented 1181 * module can have its refcnt decremented
1214 */ 1182 */
1215 module_put(net_families[family]->owner); 1183 module_put(pf->owner);
1184 err = security_socket_post_create(sock, family, type, protocol, kern);
1185 if (err)
1186 goto out_release;
1216 *res = sock; 1187 *res = sock;
1217 security_socket_post_create(sock, family, type, protocol, kern);
1218 1188
1219out: 1189 return 0;
1220 net_family_read_unlock(); 1190
1221 return err; 1191out_module_busy:
1192 err = -EAFNOSUPPORT;
1222out_module_put: 1193out_module_put:
1223 module_put(net_families[family]->owner); 1194 sock->ops = NULL;
1224out_release: 1195 module_put(pf->owner);
1196out_sock_release:
1225 sock_release(sock); 1197 sock_release(sock);
1226 goto out; 1198 return err;
1199
1200out_release:
1201 rcu_read_unlock();
1202 goto out_sock_release;
1227} 1203}
1228 1204
1229int sock_create(int family, int type, int protocol, struct socket **res) 1205int sock_create(int family, int type, int protocol, struct socket **res)
@@ -1262,7 +1238,8 @@ out_release:
1262 * Create a pair of connected sockets. 1238 * Create a pair of connected sockets.
1263 */ 1239 */
1264 1240
1265asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *usockvec) 1241asmlinkage long sys_socketpair(int family, int type, int protocol,
1242 int __user *usockvec)
1266{ 1243{
1267 struct socket *sock1, *sock2; 1244 struct socket *sock1, *sock2;
1268 int fd1, fd2, err; 1245 int fd1, fd2, err;
@@ -1281,7 +1258,7 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *u
1281 goto out_release_1; 1258 goto out_release_1;
1282 1259
1283 err = sock1->ops->socketpair(sock1, sock2); 1260 err = sock1->ops->socketpair(sock1, sock2);
1284 if (err < 0) 1261 if (err < 0)
1285 goto out_release_both; 1262 goto out_release_both;
1286 1263
1287 fd1 = fd2 = -1; 1264 fd1 = fd2 = -1;
@@ -1300,7 +1277,7 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *u
1300 * Not kernel problem. 1277 * Not kernel problem.
1301 */ 1278 */
1302 1279
1303 err = put_user(fd1, &usockvec[0]); 1280 err = put_user(fd1, &usockvec[0]);
1304 if (!err) 1281 if (!err)
1305 err = put_user(fd2, &usockvec[1]); 1282 err = put_user(fd2, &usockvec[1]);
1306 if (!err) 1283 if (!err)
@@ -1311,19 +1288,18 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *u
1311 return err; 1288 return err;
1312 1289
1313out_close_1: 1290out_close_1:
1314 sock_release(sock2); 1291 sock_release(sock2);
1315 sys_close(fd1); 1292 sys_close(fd1);
1316 return err; 1293 return err;
1317 1294
1318out_release_both: 1295out_release_both:
1319 sock_release(sock2); 1296 sock_release(sock2);
1320out_release_1: 1297out_release_1:
1321 sock_release(sock1); 1298 sock_release(sock1);
1322out: 1299out:
1323 return err; 1300 return err;
1324} 1301}
1325 1302
1326
1327/* 1303/*
1328 * Bind a name to a socket. Nothing much to do here since it's 1304 * Bind a name to a socket. Nothing much to do here since it's
1329 * the protocol's responsibility to handle the local address. 1305 * the protocol's responsibility to handle the local address.
@@ -1338,35 +1314,39 @@ asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1338 char address[MAX_SOCK_ADDR]; 1314 char address[MAX_SOCK_ADDR];
1339 int err, fput_needed; 1315 int err, fput_needed;
1340 1316
1341 if((sock = sockfd_lookup_light(fd, &err, &fput_needed))!=NULL) 1317 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1342 { 1318 if(sock) {
1343 if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0) { 1319 err = move_addr_to_kernel(umyaddr, addrlen, address);
1344 err = security_socket_bind(sock, (struct sockaddr *)address, addrlen); 1320 if (err >= 0) {
1321 err = security_socket_bind(sock,
1322 (struct sockaddr *)address,
1323 addrlen);
1345 if (!err) 1324 if (!err)
1346 err = sock->ops->bind(sock, 1325 err = sock->ops->bind(sock,
1347 (struct sockaddr *)address, addrlen); 1326 (struct sockaddr *)
1327 address, addrlen);
1348 } 1328 }
1349 fput_light(sock->file, fput_needed); 1329 fput_light(sock->file, fput_needed);
1350 } 1330 }
1351 return err; 1331 return err;
1352} 1332}
1353 1333
1354
1355/* 1334/*
1356 * Perform a listen. Basically, we allow the protocol to do anything 1335 * Perform a listen. Basically, we allow the protocol to do anything
1357 * necessary for a listen, and if that works, we mark the socket as 1336 * necessary for a listen, and if that works, we mark the socket as
1358 * ready for listening. 1337 * ready for listening.
1359 */ 1338 */
1360 1339
1361int sysctl_somaxconn = SOMAXCONN; 1340int sysctl_somaxconn __read_mostly = SOMAXCONN;
1362 1341
1363asmlinkage long sys_listen(int fd, int backlog) 1342asmlinkage long sys_listen(int fd, int backlog)
1364{ 1343{
1365 struct socket *sock; 1344 struct socket *sock;
1366 int err, fput_needed; 1345 int err, fput_needed;
1367 1346
1368 if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) { 1347 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1369 if ((unsigned) backlog > sysctl_somaxconn) 1348 if (sock) {
1349 if ((unsigned)backlog > sysctl_somaxconn)
1370 backlog = sysctl_somaxconn; 1350 backlog = sysctl_somaxconn;
1371 1351
1372 err = security_socket_listen(sock, backlog); 1352 err = security_socket_listen(sock, backlog);
@@ -1378,7 +1358,6 @@ asmlinkage long sys_listen(int fd, int backlog)
1378 return err; 1358 return err;
1379} 1359}
1380 1360
1381
1382/* 1361/*
1383 * For accept, we attempt to create a new socket, set up the link 1362 * For accept, we attempt to create a new socket, set up the link
1384 * with the client, wake up the client, then return the new 1363 * with the client, wake up the client, then return the new
@@ -1391,7 +1370,8 @@ asmlinkage long sys_listen(int fd, int backlog)
1391 * clean when we restucture accept also. 1370 * clean when we restucture accept also.
1392 */ 1371 */
1393 1372
1394asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen) 1373asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1374 int __user *upeer_addrlen)
1395{ 1375{
1396 struct socket *sock, *newsock; 1376 struct socket *sock, *newsock;
1397 struct file *newfile; 1377 struct file *newfile;
@@ -1403,7 +1383,7 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int _
1403 goto out; 1383 goto out;
1404 1384
1405 err = -ENFILE; 1385 err = -ENFILE;
1406 if (!(newsock = sock_alloc())) 1386 if (!(newsock = sock_alloc()))
1407 goto out_put; 1387 goto out_put;
1408 1388
1409 newsock->type = sock->type; 1389 newsock->type = sock->type;
@@ -1435,11 +1415,13 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int _
1435 goto out_fd; 1415 goto out_fd;
1436 1416
1437 if (upeer_sockaddr) { 1417 if (upeer_sockaddr) {
1438 if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 2)<0) { 1418 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1419 &len, 2) < 0) {
1439 err = -ECONNABORTED; 1420 err = -ECONNABORTED;
1440 goto out_fd; 1421 goto out_fd;
1441 } 1422 }
1442 err = move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen); 1423 err = move_addr_to_user(address, len, upeer_sockaddr,
1424 upeer_addrlen);
1443 if (err < 0) 1425 if (err < 0)
1444 goto out_fd; 1426 goto out_fd;
1445 } 1427 }
@@ -1461,7 +1443,6 @@ out_fd:
1461 goto out_put; 1443 goto out_put;
1462} 1444}
1463 1445
1464
1465/* 1446/*
1466 * Attempt to connect to a socket with the server address. The address 1447 * Attempt to connect to a socket with the server address. The address
1467 * is in user space so we verify it is OK and move it to kernel space. 1448 * is in user space so we verify it is OK and move it to kernel space.
@@ -1474,7 +1455,8 @@ out_fd:
1474 * include the -EINPROGRESS status for such sockets. 1455 * include the -EINPROGRESS status for such sockets.
1475 */ 1456 */
1476 1457
1477asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen) 1458asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1459 int addrlen)
1478{ 1460{
1479 struct socket *sock; 1461 struct socket *sock;
1480 char address[MAX_SOCK_ADDR]; 1462 char address[MAX_SOCK_ADDR];
@@ -1487,11 +1469,12 @@ asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, int addrl
1487 if (err < 0) 1469 if (err < 0)
1488 goto out_put; 1470 goto out_put;
1489 1471
1490 err = security_socket_connect(sock, (struct sockaddr *)address, addrlen); 1472 err =
1473 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1491 if (err) 1474 if (err)
1492 goto out_put; 1475 goto out_put;
1493 1476
1494 err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen, 1477 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1495 sock->file->f_flags); 1478 sock->file->f_flags);
1496out_put: 1479out_put:
1497 fput_light(sock->file, fput_needed); 1480 fput_light(sock->file, fput_needed);
@@ -1504,12 +1487,13 @@ out:
1504 * name to user space. 1487 * name to user space.
1505 */ 1488 */
1506 1489
1507asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len) 1490asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1491 int __user *usockaddr_len)
1508{ 1492{
1509 struct socket *sock; 1493 struct socket *sock;
1510 char address[MAX_SOCK_ADDR]; 1494 char address[MAX_SOCK_ADDR];
1511 int len, err, fput_needed; 1495 int len, err, fput_needed;
1512 1496
1513 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1497 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1514 if (!sock) 1498 if (!sock)
1515 goto out; 1499 goto out;
@@ -1534,22 +1518,27 @@ out:
1534 * name to user space. 1518 * name to user space.
1535 */ 1519 */
1536 1520
1537asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len) 1521asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1522 int __user *usockaddr_len)
1538{ 1523{
1539 struct socket *sock; 1524 struct socket *sock;
1540 char address[MAX_SOCK_ADDR]; 1525 char address[MAX_SOCK_ADDR];
1541 int len, err, fput_needed; 1526 int len, err, fput_needed;
1542 1527
1543 if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) { 1528 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1529 if (sock != NULL) {
1544 err = security_socket_getpeername(sock); 1530 err = security_socket_getpeername(sock);
1545 if (err) { 1531 if (err) {
1546 fput_light(sock->file, fput_needed); 1532 fput_light(sock->file, fput_needed);
1547 return err; 1533 return err;
1548 } 1534 }
1549 1535
1550 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 1); 1536 err =
1537 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1538 1);
1551 if (!err) 1539 if (!err)
1552 err=move_addr_to_user(address,len, usockaddr, usockaddr_len); 1540 err = move_addr_to_user(address, len, usockaddr,
1541 usockaddr_len);
1553 fput_light(sock->file, fput_needed); 1542 fput_light(sock->file, fput_needed);
1554 } 1543 }
1555 return err; 1544 return err;
@@ -1561,8 +1550,9 @@ asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, int _
1561 * the protocol. 1550 * the protocol.
1562 */ 1551 */
1563 1552
1564asmlinkage long sys_sendto(int fd, void __user * buff, size_t len, unsigned flags, 1553asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1565 struct sockaddr __user *addr, int addr_len) 1554 unsigned flags, struct sockaddr __user *addr,
1555 int addr_len)
1566{ 1556{
1567 struct socket *sock; 1557 struct socket *sock;
1568 char address[MAX_SOCK_ADDR]; 1558 char address[MAX_SOCK_ADDR];
@@ -1579,54 +1569,55 @@ asmlinkage long sys_sendto(int fd, void __user * buff, size_t len, unsigned flag
1579 sock = sock_from_file(sock_file, &err); 1569 sock = sock_from_file(sock_file, &err);
1580 if (!sock) 1570 if (!sock)
1581 goto out_put; 1571 goto out_put;
1582 iov.iov_base=buff; 1572 iov.iov_base = buff;
1583 iov.iov_len=len; 1573 iov.iov_len = len;
1584 msg.msg_name=NULL; 1574 msg.msg_name = NULL;
1585 msg.msg_iov=&iov; 1575 msg.msg_iov = &iov;
1586 msg.msg_iovlen=1; 1576 msg.msg_iovlen = 1;
1587 msg.msg_control=NULL; 1577 msg.msg_control = NULL;
1588 msg.msg_controllen=0; 1578 msg.msg_controllen = 0;
1589 msg.msg_namelen=0; 1579 msg.msg_namelen = 0;
1590 if (addr) { 1580 if (addr) {
1591 err = move_addr_to_kernel(addr, addr_len, address); 1581 err = move_addr_to_kernel(addr, addr_len, address);
1592 if (err < 0) 1582 if (err < 0)
1593 goto out_put; 1583 goto out_put;
1594 msg.msg_name=address; 1584 msg.msg_name = address;
1595 msg.msg_namelen=addr_len; 1585 msg.msg_namelen = addr_len;
1596 } 1586 }
1597 if (sock->file->f_flags & O_NONBLOCK) 1587 if (sock->file->f_flags & O_NONBLOCK)
1598 flags |= MSG_DONTWAIT; 1588 flags |= MSG_DONTWAIT;
1599 msg.msg_flags = flags; 1589 msg.msg_flags = flags;
1600 err = sock_sendmsg(sock, &msg, len); 1590 err = sock_sendmsg(sock, &msg, len);
1601 1591
1602out_put: 1592out_put:
1603 fput_light(sock_file, fput_needed); 1593 fput_light(sock_file, fput_needed);
1604 return err; 1594 return err;
1605} 1595}
1606 1596
1607/* 1597/*
1608 * Send a datagram down a socket. 1598 * Send a datagram down a socket.
1609 */ 1599 */
1610 1600
1611asmlinkage long sys_send(int fd, void __user * buff, size_t len, unsigned flags) 1601asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1612{ 1602{
1613 return sys_sendto(fd, buff, len, flags, NULL, 0); 1603 return sys_sendto(fd, buff, len, flags, NULL, 0);
1614} 1604}
1615 1605
1616/* 1606/*
1617 * Receive a frame from the socket and optionally record the address of the 1607 * Receive a frame from the socket and optionally record the address of the
1618 * sender. We verify the buffers are writable and if needed move the 1608 * sender. We verify the buffers are writable and if needed move the
1619 * sender address from kernel to user space. 1609 * sender address from kernel to user space.
1620 */ 1610 */
1621 1611
1622asmlinkage long sys_recvfrom(int fd, void __user * ubuf, size_t size, unsigned flags, 1612asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1623 struct sockaddr __user *addr, int __user *addr_len) 1613 unsigned flags, struct sockaddr __user *addr,
1614 int __user *addr_len)
1624{ 1615{
1625 struct socket *sock; 1616 struct socket *sock;
1626 struct iovec iov; 1617 struct iovec iov;
1627 struct msghdr msg; 1618 struct msghdr msg;
1628 char address[MAX_SOCK_ADDR]; 1619 char address[MAX_SOCK_ADDR];
1629 int err,err2; 1620 int err, err2;
1630 struct file *sock_file; 1621 struct file *sock_file;
1631 int fput_needed; 1622 int fput_needed;
1632 1623
@@ -1638,23 +1629,22 @@ asmlinkage long sys_recvfrom(int fd, void __user * ubuf, size_t size, unsigned f
1638 if (!sock) 1629 if (!sock)
1639 goto out; 1630 goto out;
1640 1631
1641 msg.msg_control=NULL; 1632 msg.msg_control = NULL;
1642 msg.msg_controllen=0; 1633 msg.msg_controllen = 0;
1643 msg.msg_iovlen=1; 1634 msg.msg_iovlen = 1;
1644 msg.msg_iov=&iov; 1635 msg.msg_iov = &iov;
1645 iov.iov_len=size; 1636 iov.iov_len = size;
1646 iov.iov_base=ubuf; 1637 iov.iov_base = ubuf;
1647 msg.msg_name=address; 1638 msg.msg_name = address;
1648 msg.msg_namelen=MAX_SOCK_ADDR; 1639 msg.msg_namelen = MAX_SOCK_ADDR;
1649 if (sock->file->f_flags & O_NONBLOCK) 1640 if (sock->file->f_flags & O_NONBLOCK)
1650 flags |= MSG_DONTWAIT; 1641 flags |= MSG_DONTWAIT;
1651 err=sock_recvmsg(sock, &msg, size, flags); 1642 err = sock_recvmsg(sock, &msg, size, flags);
1652 1643
1653 if(err >= 0 && addr != NULL) 1644 if (err >= 0 && addr != NULL) {
1654 { 1645 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1655 err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len); 1646 if (err2 < 0)
1656 if(err2<0) 1647 err = err2;
1657 err=err2;
1658 } 1648 }
1659out: 1649out:
1660 fput_light(sock_file, fput_needed); 1650 fput_light(sock_file, fput_needed);
@@ -1662,10 +1652,11 @@ out:
1662} 1652}
1663 1653
1664/* 1654/*
1665 * Receive a datagram from a socket. 1655 * Receive a datagram from a socket.
1666 */ 1656 */
1667 1657
1668asmlinkage long sys_recv(int fd, void __user * ubuf, size_t size, unsigned flags) 1658asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1659 unsigned flags)
1669{ 1660{
1670 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); 1661 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1671} 1662}
@@ -1675,24 +1666,29 @@ asmlinkage long sys_recv(int fd, void __user * ubuf, size_t size, unsigned flags
1675 * to pass the user mode parameter for the protocols to sort out. 1666 * to pass the user mode parameter for the protocols to sort out.
1676 */ 1667 */
1677 1668
1678asmlinkage long sys_setsockopt(int fd, int level, int optname, char __user *optval, int optlen) 1669asmlinkage long sys_setsockopt(int fd, int level, int optname,
1670 char __user *optval, int optlen)
1679{ 1671{
1680 int err, fput_needed; 1672 int err, fput_needed;
1681 struct socket *sock; 1673 struct socket *sock;
1682 1674
1683 if (optlen < 0) 1675 if (optlen < 0)
1684 return -EINVAL; 1676 return -EINVAL;
1685 1677
1686 if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) 1678 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1687 { 1679 if (sock != NULL) {
1688 err = security_socket_setsockopt(sock,level,optname); 1680 err = security_socket_setsockopt(sock, level, optname);
1689 if (err) 1681 if (err)
1690 goto out_put; 1682 goto out_put;
1691 1683
1692 if (level == SOL_SOCKET) 1684 if (level == SOL_SOCKET)
1693 err=sock_setsockopt(sock,level,optname,optval,optlen); 1685 err =
1686 sock_setsockopt(sock, level, optname, optval,
1687 optlen);
1694 else 1688 else
1695 err=sock->ops->setsockopt(sock, level, optname, optval, optlen); 1689 err =
1690 sock->ops->setsockopt(sock, level, optname, optval,
1691 optlen);
1696out_put: 1692out_put:
1697 fput_light(sock->file, fput_needed); 1693 fput_light(sock->file, fput_needed);
1698 } 1694 }
@@ -1704,27 +1700,32 @@ out_put:
1704 * to pass a user mode parameter for the protocols to sort out. 1700 * to pass a user mode parameter for the protocols to sort out.
1705 */ 1701 */
1706 1702
1707asmlinkage long sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen) 1703asmlinkage long sys_getsockopt(int fd, int level, int optname,
1704 char __user *optval, int __user *optlen)
1708{ 1705{
1709 int err, fput_needed; 1706 int err, fput_needed;
1710 struct socket *sock; 1707 struct socket *sock;
1711 1708
1712 if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) { 1709 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1710 if (sock != NULL) {
1713 err = security_socket_getsockopt(sock, level, optname); 1711 err = security_socket_getsockopt(sock, level, optname);
1714 if (err) 1712 if (err)
1715 goto out_put; 1713 goto out_put;
1716 1714
1717 if (level == SOL_SOCKET) 1715 if (level == SOL_SOCKET)
1718 err=sock_getsockopt(sock,level,optname,optval,optlen); 1716 err =
1717 sock_getsockopt(sock, level, optname, optval,
1718 optlen);
1719 else 1719 else
1720 err=sock->ops->getsockopt(sock, level, optname, optval, optlen); 1720 err =
1721 sock->ops->getsockopt(sock, level, optname, optval,
1722 optlen);
1721out_put: 1723out_put:
1722 fput_light(sock->file, fput_needed); 1724 fput_light(sock->file, fput_needed);
1723 } 1725 }
1724 return err; 1726 return err;
1725} 1727}
1726 1728
1727
1728/* 1729/*
1729 * Shutdown a socket. 1730 * Shutdown a socket.
1730 */ 1731 */
@@ -1734,8 +1735,8 @@ asmlinkage long sys_shutdown(int fd, int how)
1734 int err, fput_needed; 1735 int err, fput_needed;
1735 struct socket *sock; 1736 struct socket *sock;
1736 1737
1737 if ((sock = sockfd_lookup_light(fd, &err, &fput_needed))!=NULL) 1738 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1738 { 1739 if (sock != NULL) {
1739 err = security_socket_shutdown(sock, how); 1740 err = security_socket_shutdown(sock, how);
1740 if (!err) 1741 if (!err)
1741 err = sock->ops->shutdown(sock, how); 1742 err = sock->ops->shutdown(sock, how);
@@ -1744,41 +1745,42 @@ asmlinkage long sys_shutdown(int fd, int how)
1744 return err; 1745 return err;
1745} 1746}
1746 1747
1747/* A couple of helpful macros for getting the address of the 32/64 bit 1748/* A couple of helpful macros for getting the address of the 32/64 bit
1748 * fields which are the same type (int / unsigned) on our platforms. 1749 * fields which are the same type (int / unsigned) on our platforms.
1749 */ 1750 */
1750#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member) 1751#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1751#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) 1752#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1752#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) 1753#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1753 1754
1754
1755/* 1755/*
1756 * BSD sendmsg interface 1756 * BSD sendmsg interface
1757 */ 1757 */
1758 1758
1759asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) 1759asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1760{ 1760{
1761 struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; 1761 struct compat_msghdr __user *msg_compat =
1762 (struct compat_msghdr __user *)msg;
1762 struct socket *sock; 1763 struct socket *sock;
1763 char address[MAX_SOCK_ADDR]; 1764 char address[MAX_SOCK_ADDR];
1764 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; 1765 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
1765 unsigned char ctl[sizeof(struct cmsghdr) + 20] 1766 unsigned char ctl[sizeof(struct cmsghdr) + 20]
1766 __attribute__ ((aligned (sizeof(__kernel_size_t)))); 1767 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1767 /* 20 is size of ipv6_pktinfo */ 1768 /* 20 is size of ipv6_pktinfo */
1768 unsigned char *ctl_buf = ctl; 1769 unsigned char *ctl_buf = ctl;
1769 struct msghdr msg_sys; 1770 struct msghdr msg_sys;
1770 int err, ctl_len, iov_size, total_len; 1771 int err, ctl_len, iov_size, total_len;
1771 int fput_needed; 1772 int fput_needed;
1772 1773
1773 err = -EFAULT; 1774 err = -EFAULT;
1774 if (MSG_CMSG_COMPAT & flags) { 1775 if (MSG_CMSG_COMPAT & flags) {
1775 if (get_compat_msghdr(&msg_sys, msg_compat)) 1776 if (get_compat_msghdr(&msg_sys, msg_compat))
1776 return -EFAULT; 1777 return -EFAULT;
1777 } else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) 1778 }
1779 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1778 return -EFAULT; 1780 return -EFAULT;
1779 1781
1780 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1782 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1781 if (!sock) 1783 if (!sock)
1782 goto out; 1784 goto out;
1783 1785
1784 /* do not move before msg_sys is valid */ 1786 /* do not move before msg_sys is valid */
@@ -1786,7 +1788,7 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1786 if (msg_sys.msg_iovlen > UIO_MAXIOV) 1788 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1787 goto out_put; 1789 goto out_put;
1788 1790
1789 /* Check whether to allocate the iovec area*/ 1791 /* Check whether to allocate the iovec area */
1790 err = -ENOMEM; 1792 err = -ENOMEM;
1791 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); 1793 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1792 if (msg_sys.msg_iovlen > UIO_FASTIOV) { 1794 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
@@ -1800,7 +1802,7 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1800 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ); 1802 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1801 } else 1803 } else
1802 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ); 1804 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
1803 if (err < 0) 1805 if (err < 0)
1804 goto out_freeiov; 1806 goto out_freeiov;
1805 total_len = err; 1807 total_len = err;
1806 1808
@@ -1808,18 +1810,19 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1808 1810
1809 if (msg_sys.msg_controllen > INT_MAX) 1811 if (msg_sys.msg_controllen > INT_MAX)
1810 goto out_freeiov; 1812 goto out_freeiov;
1811 ctl_len = msg_sys.msg_controllen; 1813 ctl_len = msg_sys.msg_controllen;
1812 if ((MSG_CMSG_COMPAT & flags) && ctl_len) { 1814 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
1813 err = cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, sizeof(ctl)); 1815 err =
1816 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1817 sizeof(ctl));
1814 if (err) 1818 if (err)
1815 goto out_freeiov; 1819 goto out_freeiov;
1816 ctl_buf = msg_sys.msg_control; 1820 ctl_buf = msg_sys.msg_control;
1817 ctl_len = msg_sys.msg_controllen; 1821 ctl_len = msg_sys.msg_controllen;
1818 } else if (ctl_len) { 1822 } else if (ctl_len) {
1819 if (ctl_len > sizeof(ctl)) 1823 if (ctl_len > sizeof(ctl)) {
1820 {
1821 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); 1824 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
1822 if (ctl_buf == NULL) 1825 if (ctl_buf == NULL)
1823 goto out_freeiov; 1826 goto out_freeiov;
1824 } 1827 }
1825 err = -EFAULT; 1828 err = -EFAULT;
@@ -1828,7 +1831,8 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1828 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted 1831 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1829 * checking falls down on this. 1832 * checking falls down on this.
1830 */ 1833 */
1831 if (copy_from_user(ctl_buf, (void __user *) msg_sys.msg_control, ctl_len)) 1834 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1835 ctl_len))
1832 goto out_freectl; 1836 goto out_freectl;
1833 msg_sys.msg_control = ctl_buf; 1837 msg_sys.msg_control = ctl_buf;
1834 } 1838 }
@@ -1839,14 +1843,14 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1839 err = sock_sendmsg(sock, &msg_sys, total_len); 1843 err = sock_sendmsg(sock, &msg_sys, total_len);
1840 1844
1841out_freectl: 1845out_freectl:
1842 if (ctl_buf != ctl) 1846 if (ctl_buf != ctl)
1843 sock_kfree_s(sock->sk, ctl_buf, ctl_len); 1847 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1844out_freeiov: 1848out_freeiov:
1845 if (iov != iovstack) 1849 if (iov != iovstack)
1846 sock_kfree_s(sock->sk, iov, iov_size); 1850 sock_kfree_s(sock->sk, iov, iov_size);
1847out_put: 1851out_put:
1848 fput_light(sock->file, fput_needed); 1852 fput_light(sock->file, fput_needed);
1849out: 1853out:
1850 return err; 1854 return err;
1851} 1855}
1852 1856
@@ -1854,12 +1858,14 @@ out:
1854 * BSD recvmsg interface 1858 * BSD recvmsg interface
1855 */ 1859 */
1856 1860
1857asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flags) 1861asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1862 unsigned int flags)
1858{ 1863{
1859 struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; 1864 struct compat_msghdr __user *msg_compat =
1865 (struct compat_msghdr __user *)msg;
1860 struct socket *sock; 1866 struct socket *sock;
1861 struct iovec iovstack[UIO_FASTIOV]; 1867 struct iovec iovstack[UIO_FASTIOV];
1862 struct iovec *iov=iovstack; 1868 struct iovec *iov = iovstack;
1863 struct msghdr msg_sys; 1869 struct msghdr msg_sys;
1864 unsigned long cmsg_ptr; 1870 unsigned long cmsg_ptr;
1865 int err, iov_size, total_len, len; 1871 int err, iov_size, total_len, len;
@@ -1871,13 +1877,13 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
1871 /* user mode address pointers */ 1877 /* user mode address pointers */
1872 struct sockaddr __user *uaddr; 1878 struct sockaddr __user *uaddr;
1873 int __user *uaddr_len; 1879 int __user *uaddr_len;
1874 1880
1875 if (MSG_CMSG_COMPAT & flags) { 1881 if (MSG_CMSG_COMPAT & flags) {
1876 if (get_compat_msghdr(&msg_sys, msg_compat)) 1882 if (get_compat_msghdr(&msg_sys, msg_compat))
1877 return -EFAULT; 1883 return -EFAULT;
1878 } else 1884 }
1879 if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr))) 1885 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1880 return -EFAULT; 1886 return -EFAULT;
1881 1887
1882 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1888 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1883 if (!sock) 1889 if (!sock)
@@ -1886,8 +1892,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
1886 err = -EMSGSIZE; 1892 err = -EMSGSIZE;
1887 if (msg_sys.msg_iovlen > UIO_MAXIOV) 1893 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1888 goto out_put; 1894 goto out_put;
1889 1895
1890 /* Check whether to allocate the iovec area*/ 1896 /* Check whether to allocate the iovec area */
1891 err = -ENOMEM; 1897 err = -ENOMEM;
1892 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); 1898 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1893 if (msg_sys.msg_iovlen > UIO_FASTIOV) { 1899 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
@@ -1897,11 +1903,11 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
1897 } 1903 }
1898 1904
1899 /* 1905 /*
1900 * Save the user-mode address (verify_iovec will change the 1906 * Save the user-mode address (verify_iovec will change the
1901 * kernel msghdr to use the kernel address space) 1907 * kernel msghdr to use the kernel address space)
1902 */ 1908 */
1903 1909
1904 uaddr = (void __user *) msg_sys.msg_name; 1910 uaddr = (void __user *)msg_sys.msg_name;
1905 uaddr_len = COMPAT_NAMELEN(msg); 1911 uaddr_len = COMPAT_NAMELEN(msg);
1906 if (MSG_CMSG_COMPAT & flags) { 1912 if (MSG_CMSG_COMPAT & flags) {
1907 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE); 1913 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
@@ -1909,13 +1915,13 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
1909 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE); 1915 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1910 if (err < 0) 1916 if (err < 0)
1911 goto out_freeiov; 1917 goto out_freeiov;
1912 total_len=err; 1918 total_len = err;
1913 1919
1914 cmsg_ptr = (unsigned long)msg_sys.msg_control; 1920 cmsg_ptr = (unsigned long)msg_sys.msg_control;
1915 msg_sys.msg_flags = 0; 1921 msg_sys.msg_flags = 0;
1916 if (MSG_CMSG_COMPAT & flags) 1922 if (MSG_CMSG_COMPAT & flags)
1917 msg_sys.msg_flags = MSG_CMSG_COMPAT; 1923 msg_sys.msg_flags = MSG_CMSG_COMPAT;
1918 1924
1919 if (sock->file->f_flags & O_NONBLOCK) 1925 if (sock->file->f_flags & O_NONBLOCK)
1920 flags |= MSG_DONTWAIT; 1926 flags |= MSG_DONTWAIT;
1921 err = sock_recvmsg(sock, &msg_sys, total_len, flags); 1927 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
@@ -1924,7 +1930,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
1924 len = err; 1930 len = err;
1925 1931
1926 if (uaddr != NULL) { 1932 if (uaddr != NULL) {
1927 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len); 1933 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1934 uaddr_len);
1928 if (err < 0) 1935 if (err < 0)
1929 goto out_freeiov; 1936 goto out_freeiov;
1930 } 1937 }
@@ -1933,10 +1940,10 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
1933 if (err) 1940 if (err)
1934 goto out_freeiov; 1941 goto out_freeiov;
1935 if (MSG_CMSG_COMPAT & flags) 1942 if (MSG_CMSG_COMPAT & flags)
1936 err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, 1943 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1937 &msg_compat->msg_controllen); 1944 &msg_compat->msg_controllen);
1938 else 1945 else
1939 err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, 1946 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1940 &msg->msg_controllen); 1947 &msg->msg_controllen);
1941 if (err) 1948 if (err)
1942 goto out_freeiov; 1949 goto out_freeiov;
@@ -1955,163 +1962,187 @@ out:
1955 1962
1956/* Argument list sizes for sys_socketcall */ 1963/* Argument list sizes for sys_socketcall */
1957#define AL(x) ((x) * sizeof(unsigned long)) 1964#define AL(x) ((x) * sizeof(unsigned long))
1958static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), 1965static const unsigned char nargs[18]={
1959 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), 1966 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1960 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)}; 1967 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1968 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1969};
1970
1961#undef AL 1971#undef AL
1962 1972
1963/* 1973/*
1964 * System call vectors. 1974 * System call vectors.
1965 * 1975 *
1966 * Argument checking cleaned up. Saved 20% in size. 1976 * Argument checking cleaned up. Saved 20% in size.
1967 * This function doesn't need to set the kernel lock because 1977 * This function doesn't need to set the kernel lock because
1968 * it is set by the callees. 1978 * it is set by the callees.
1969 */ 1979 */
1970 1980
1971asmlinkage long sys_socketcall(int call, unsigned long __user *args) 1981asmlinkage long sys_socketcall(int call, unsigned long __user *args)
1972{ 1982{
1973 unsigned long a[6]; 1983 unsigned long a[6];
1974 unsigned long a0,a1; 1984 unsigned long a0, a1;
1975 int err; 1985 int err;
1976 1986
1977 if(call<1||call>SYS_RECVMSG) 1987 if (call < 1 || call > SYS_RECVMSG)
1978 return -EINVAL; 1988 return -EINVAL;
1979 1989
1980 /* copy_from_user should be SMP safe. */ 1990 /* copy_from_user should be SMP safe. */
1981 if (copy_from_user(a, args, nargs[call])) 1991 if (copy_from_user(a, args, nargs[call]))
1982 return -EFAULT; 1992 return -EFAULT;
1983 1993
1984 err = audit_socketcall(nargs[call]/sizeof(unsigned long), a); 1994 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
1985 if (err) 1995 if (err)
1986 return err; 1996 return err;
1987 1997
1988 a0=a[0]; 1998 a0 = a[0];
1989 a1=a[1]; 1999 a1 = a[1];
1990 2000
1991 switch(call) 2001 switch (call) {
1992 { 2002 case SYS_SOCKET:
1993 case SYS_SOCKET: 2003 err = sys_socket(a0, a1, a[2]);
1994 err = sys_socket(a0,a1,a[2]); 2004 break;
1995 break; 2005 case SYS_BIND:
1996 case SYS_BIND: 2006 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
1997 err = sys_bind(a0,(struct sockaddr __user *)a1, a[2]); 2007 break;
1998 break; 2008 case SYS_CONNECT:
1999 case SYS_CONNECT: 2009 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2000 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]); 2010 break;
2001 break; 2011 case SYS_LISTEN:
2002 case SYS_LISTEN: 2012 err = sys_listen(a0, a1);
2003 err = sys_listen(a0,a1); 2013 break;
2004 break; 2014 case SYS_ACCEPT:
2005 case SYS_ACCEPT: 2015 err =
2006 err = sys_accept(a0,(struct sockaddr __user *)a1, (int __user *)a[2]); 2016 sys_accept(a0, (struct sockaddr __user *)a1,
2007 break; 2017 (int __user *)a[2]);
2008 case SYS_GETSOCKNAME: 2018 break;
2009 err = sys_getsockname(a0,(struct sockaddr __user *)a1, (int __user *)a[2]); 2019 case SYS_GETSOCKNAME:
2010 break; 2020 err =
2011 case SYS_GETPEERNAME: 2021 sys_getsockname(a0, (struct sockaddr __user *)a1,
2012 err = sys_getpeername(a0, (struct sockaddr __user *)a1, (int __user *)a[2]); 2022 (int __user *)a[2]);
2013 break; 2023 break;
2014 case SYS_SOCKETPAIR: 2024 case SYS_GETPEERNAME:
2015 err = sys_socketpair(a0,a1, a[2], (int __user *)a[3]); 2025 err =
2016 break; 2026 sys_getpeername(a0, (struct sockaddr __user *)a1,
2017 case SYS_SEND: 2027 (int __user *)a[2]);
2018 err = sys_send(a0, (void __user *)a1, a[2], a[3]); 2028 break;
2019 break; 2029 case SYS_SOCKETPAIR:
2020 case SYS_SENDTO: 2030 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2021 err = sys_sendto(a0,(void __user *)a1, a[2], a[3], 2031 break;
2022 (struct sockaddr __user *)a[4], a[5]); 2032 case SYS_SEND:
2023 break; 2033 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2024 case SYS_RECV: 2034 break;
2025 err = sys_recv(a0, (void __user *)a1, a[2], a[3]); 2035 case SYS_SENDTO:
2026 break; 2036 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2027 case SYS_RECVFROM: 2037 (struct sockaddr __user *)a[4], a[5]);
2028 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3], 2038 break;
2029 (struct sockaddr __user *)a[4], (int __user *)a[5]); 2039 case SYS_RECV:
2030 break; 2040 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2031 case SYS_SHUTDOWN: 2041 break;
2032 err = sys_shutdown(a0,a1); 2042 case SYS_RECVFROM:
2033 break; 2043 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2034 case SYS_SETSOCKOPT: 2044 (struct sockaddr __user *)a[4],
2035 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]); 2045 (int __user *)a[5]);
2036 break; 2046 break;
2037 case SYS_GETSOCKOPT: 2047 case SYS_SHUTDOWN:
2038 err = sys_getsockopt(a0, a1, a[2], (char __user *)a[3], (int __user *)a[4]); 2048 err = sys_shutdown(a0, a1);
2039 break; 2049 break;
2040 case SYS_SENDMSG: 2050 case SYS_SETSOCKOPT:
2041 err = sys_sendmsg(a0, (struct msghdr __user *) a1, a[2]); 2051 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2042 break; 2052 break;
2043 case SYS_RECVMSG: 2053 case SYS_GETSOCKOPT:
2044 err = sys_recvmsg(a0, (struct msghdr __user *) a1, a[2]); 2054 err =
2045 break; 2055 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2046 default: 2056 (int __user *)a[4]);
2047 err = -EINVAL; 2057 break;
2048 break; 2058 case SYS_SENDMSG:
2059 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2060 break;
2061 case SYS_RECVMSG:
2062 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2063 break;
2064 default:
2065 err = -EINVAL;
2066 break;
2049 } 2067 }
2050 return err; 2068 return err;
2051} 2069}
2052 2070
2053#endif /* __ARCH_WANT_SYS_SOCKETCALL */ 2071#endif /* __ARCH_WANT_SYS_SOCKETCALL */
2054 2072
2055/* 2073/**
2074 * sock_register - add a socket protocol handler
2075 * @ops: description of protocol
2076 *
2056 * This function is called by a protocol handler that wants to 2077 * This function is called by a protocol handler that wants to
2057 * advertise its address family, and have it linked into the 2078 * advertise its address family, and have it linked into the
2058 * SOCKET module. 2079 * socket interface. The value ops->family coresponds to the
2080 * socket system call protocol family.
2059 */ 2081 */
2060 2082int sock_register(const struct net_proto_family *ops)
2061int sock_register(struct net_proto_family *ops)
2062{ 2083{
2063 int err; 2084 int err;
2064 2085
2065 if (ops->family >= NPROTO) { 2086 if (ops->family >= NPROTO) {
2066 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO); 2087 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2088 NPROTO);
2067 return -ENOBUFS; 2089 return -ENOBUFS;
2068 } 2090 }
2069 net_family_write_lock(); 2091
2070 err = -EEXIST; 2092 spin_lock(&net_family_lock);
2071 if (net_families[ops->family] == NULL) { 2093 if (net_families[ops->family])
2072 net_families[ops->family]=ops; 2094 err = -EEXIST;
2095 else {
2096 net_families[ops->family] = ops;
2073 err = 0; 2097 err = 0;
2074 } 2098 }
2075 net_family_write_unlock(); 2099 spin_unlock(&net_family_lock);
2076 printk(KERN_INFO "NET: Registered protocol family %d\n", 2100
2077 ops->family); 2101 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
2078 return err; 2102 return err;
2079} 2103}
2080 2104
2081/* 2105/**
2106 * sock_unregister - remove a protocol handler
2107 * @family: protocol family to remove
2108 *
2082 * This function is called by a protocol handler that wants to 2109 * This function is called by a protocol handler that wants to
2083 * remove its address family, and have it unlinked from the 2110 * remove its address family, and have it unlinked from the
2084 * SOCKET module. 2111 * new socket creation.
2112 *
2113 * If protocol handler is a module, then it can use module reference
2114 * counts to protect against new references. If protocol handler is not
2115 * a module then it needs to provide its own protection in
2116 * the ops->create routine.
2085 */ 2117 */
2086 2118void sock_unregister(int family)
2087int sock_unregister(int family)
2088{ 2119{
2089 if (family < 0 || family >= NPROTO) 2120 BUG_ON(family < 0 || family >= NPROTO);
2090 return -1;
2091 2121
2092 net_family_write_lock(); 2122 spin_lock(&net_family_lock);
2093 net_families[family]=NULL; 2123 net_families[family] = NULL;
2094 net_family_write_unlock(); 2124 spin_unlock(&net_family_lock);
2095 printk(KERN_INFO "NET: Unregistered protocol family %d\n", 2125
2096 family); 2126 synchronize_rcu();
2097 return 0; 2127
2128 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
2098} 2129}
2099 2130
2100static int __init sock_init(void) 2131static int __init sock_init(void)
2101{ 2132{
2102 /* 2133 /*
2103 * Initialize sock SLAB cache. 2134 * Initialize sock SLAB cache.
2104 */ 2135 */
2105 2136
2106 sk_init(); 2137 sk_init();
2107 2138
2108 /* 2139 /*
2109 * Initialize skbuff SLAB cache 2140 * Initialize skbuff SLAB cache
2110 */ 2141 */
2111 skb_init(); 2142 skb_init();
2112 2143
2113 /* 2144 /*
2114 * Initialize the protocols module. 2145 * Initialize the protocols module.
2115 */ 2146 */
2116 2147
2117 init_inodecache(); 2148 init_inodecache();
@@ -2137,7 +2168,7 @@ void socket_seq_show(struct seq_file *seq)
2137 int counter = 0; 2168 int counter = 0;
2138 2169
2139 for_each_possible_cpu(cpu) 2170 for_each_possible_cpu(cpu)
2140 counter += per_cpu(sockets_in_use, cpu); 2171 counter += per_cpu(sockets_in_use, cpu);
2141 2172
2142 /* It can be negative, by the way. 8) */ 2173 /* It can be negative, by the way. 8) */
2143 if (counter < 0) 2174 if (counter < 0)
@@ -2145,11 +2176,11 @@ void socket_seq_show(struct seq_file *seq)
2145 2176
2146 seq_printf(seq, "sockets: used %d\n", counter); 2177 seq_printf(seq, "sockets: used %d\n", counter);
2147} 2178}
2148#endif /* CONFIG_PROC_FS */ 2179#endif /* CONFIG_PROC_FS */
2149 2180
2150#ifdef CONFIG_COMPAT 2181#ifdef CONFIG_COMPAT
2151static long compat_sock_ioctl(struct file *file, unsigned cmd, 2182static long compat_sock_ioctl(struct file *file, unsigned cmd,
2152 unsigned long arg) 2183 unsigned long arg)
2153{ 2184{
2154 struct socket *sock = file->private_data; 2185 struct socket *sock = file->private_data;
2155 int ret = -ENOIOCTLCMD; 2186 int ret = -ENOIOCTLCMD;
@@ -2161,6 +2192,109 @@ static long compat_sock_ioctl(struct file *file, unsigned cmd,
2161} 2192}
2162#endif 2193#endif
2163 2194
2195int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2196{
2197 return sock->ops->bind(sock, addr, addrlen);
2198}
2199
2200int kernel_listen(struct socket *sock, int backlog)
2201{
2202 return sock->ops->listen(sock, backlog);
2203}
2204
2205int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2206{
2207 struct sock *sk = sock->sk;
2208 int err;
2209
2210 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2211 newsock);
2212 if (err < 0)
2213 goto done;
2214
2215 err = sock->ops->accept(sock, *newsock, flags);
2216 if (err < 0) {
2217 sock_release(*newsock);
2218 goto done;
2219 }
2220
2221 (*newsock)->ops = sock->ops;
2222
2223done:
2224 return err;
2225}
2226
2227int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
2228 int flags)
2229{
2230 return sock->ops->connect(sock, addr, addrlen, flags);
2231}
2232
2233int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2234 int *addrlen)
2235{
2236 return sock->ops->getname(sock, addr, addrlen, 0);
2237}
2238
2239int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2240 int *addrlen)
2241{
2242 return sock->ops->getname(sock, addr, addrlen, 1);
2243}
2244
2245int kernel_getsockopt(struct socket *sock, int level, int optname,
2246 char *optval, int *optlen)
2247{
2248 mm_segment_t oldfs = get_fs();
2249 int err;
2250
2251 set_fs(KERNEL_DS);
2252 if (level == SOL_SOCKET)
2253 err = sock_getsockopt(sock, level, optname, optval, optlen);
2254 else
2255 err = sock->ops->getsockopt(sock, level, optname, optval,
2256 optlen);
2257 set_fs(oldfs);
2258 return err;
2259}
2260
2261int kernel_setsockopt(struct socket *sock, int level, int optname,
2262 char *optval, int optlen)
2263{
2264 mm_segment_t oldfs = get_fs();
2265 int err;
2266
2267 set_fs(KERNEL_DS);
2268 if (level == SOL_SOCKET)
2269 err = sock_setsockopt(sock, level, optname, optval, optlen);
2270 else
2271 err = sock->ops->setsockopt(sock, level, optname, optval,
2272 optlen);
2273 set_fs(oldfs);
2274 return err;
2275}
2276
2277int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2278 size_t size, int flags)
2279{
2280 if (sock->ops->sendpage)
2281 return sock->ops->sendpage(sock, page, offset, size, flags);
2282
2283 return sock_no_sendpage(sock, page, offset, size, flags);
2284}
2285
2286int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2287{
2288 mm_segment_t oldfs = get_fs();
2289 int err;
2290
2291 set_fs(KERNEL_DS);
2292 err = sock->ops->ioctl(sock, cmd, arg);
2293 set_fs(oldfs);
2294
2295 return err;
2296}
2297
2164/* ABI emulation layers need these two */ 2298/* ABI emulation layers need these two */
2165EXPORT_SYMBOL(move_addr_to_kernel); 2299EXPORT_SYMBOL(move_addr_to_kernel);
2166EXPORT_SYMBOL(move_addr_to_user); 2300EXPORT_SYMBOL(move_addr_to_user);
@@ -2177,3 +2311,13 @@ EXPORT_SYMBOL(sock_wake_async);
2177EXPORT_SYMBOL(sockfd_lookup); 2311EXPORT_SYMBOL(sockfd_lookup);
2178EXPORT_SYMBOL(kernel_sendmsg); 2312EXPORT_SYMBOL(kernel_sendmsg);
2179EXPORT_SYMBOL(kernel_recvmsg); 2313EXPORT_SYMBOL(kernel_recvmsg);
2314EXPORT_SYMBOL(kernel_bind);
2315EXPORT_SYMBOL(kernel_listen);
2316EXPORT_SYMBOL(kernel_accept);
2317EXPORT_SYMBOL(kernel_connect);
2318EXPORT_SYMBOL(kernel_getsockname);
2319EXPORT_SYMBOL(kernel_getpeername);
2320EXPORT_SYMBOL(kernel_getsockopt);
2321EXPORT_SYMBOL(kernel_setsockopt);
2322EXPORT_SYMBOL(kernel_sendpage);
2323EXPORT_SYMBOL(kernel_sock_ioctl);
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index ef1cf5b476c8..6eed3e166ba3 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -88,7 +88,6 @@ struct gss_auth {
88 struct list_head upcalls; 88 struct list_head upcalls;
89 struct rpc_clnt *client; 89 struct rpc_clnt *client;
90 struct dentry *dentry; 90 struct dentry *dentry;
91 char path[48];
92 spinlock_t lock; 91 spinlock_t lock;
93}; 92};
94 93
@@ -690,10 +689,8 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
690 if (err) 689 if (err)
691 goto err_put_mech; 690 goto err_put_mech;
692 691
693 snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s", 692 gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name,
694 clnt->cl_pathname, 693 clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
695 gss_auth->mech->gm_name);
696 gss_auth->dentry = rpc_mkpipe(gss_auth->path, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
697 if (IS_ERR(gss_auth->dentry)) { 694 if (IS_ERR(gss_auth->dentry)) {
698 err = PTR_ERR(gss_auth->dentry); 695 err = PTR_ERR(gss_auth->dentry);
699 goto err_put_mech; 696 goto err_put_mech;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 3e19d321067a..084a0ad5c64e 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -97,17 +97,7 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
97 } 97 }
98} 98}
99 99
100/* 100static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *program, u32 vers, rpc_authflavor_t flavor)
101 * Create an RPC client
102 * FIXME: This should also take a flags argument (as in task->tk_flags).
103 * It's called (among others) from pmap_create_client, which may in
104 * turn be called by an async task. In this case, rpciod should not be
105 * made to sleep too long.
106 */
107struct rpc_clnt *
108rpc_new_client(struct rpc_xprt *xprt, char *servname,
109 struct rpc_program *program, u32 vers,
110 rpc_authflavor_t flavor)
111{ 101{
112 struct rpc_version *version; 102 struct rpc_version *version;
113 struct rpc_clnt *clnt = NULL; 103 struct rpc_clnt *clnt = NULL;
@@ -147,16 +137,12 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
147 clnt->cl_procinfo = version->procs; 137 clnt->cl_procinfo = version->procs;
148 clnt->cl_maxproc = version->nrprocs; 138 clnt->cl_maxproc = version->nrprocs;
149 clnt->cl_protname = program->name; 139 clnt->cl_protname = program->name;
150 clnt->cl_pmap = &clnt->cl_pmap_default;
151 clnt->cl_port = xprt->addr.sin_port;
152 clnt->cl_prog = program->number; 140 clnt->cl_prog = program->number;
153 clnt->cl_vers = version->number; 141 clnt->cl_vers = version->number;
154 clnt->cl_prot = xprt->prot;
155 clnt->cl_stats = program->stats; 142 clnt->cl_stats = program->stats;
156 clnt->cl_metrics = rpc_alloc_iostats(clnt); 143 clnt->cl_metrics = rpc_alloc_iostats(clnt);
157 rpc_init_wait_queue(&clnt->cl_pmap_default.pm_bindwait, "bindwait");
158 144
159 if (!clnt->cl_port) 145 if (!xprt_bound(clnt->cl_xprt))
160 clnt->cl_autobind = 1; 146 clnt->cl_autobind = 1;
161 147
162 clnt->cl_rtt = &clnt->cl_rtt_default; 148 clnt->cl_rtt = &clnt->cl_rtt_default;
@@ -191,40 +177,71 @@ out_no_path:
191 kfree(clnt->cl_server); 177 kfree(clnt->cl_server);
192 kfree(clnt); 178 kfree(clnt);
193out_err: 179out_err:
194 xprt_destroy(xprt); 180 xprt_put(xprt);
195out_no_xprt: 181out_no_xprt:
196 return ERR_PTR(err); 182 return ERR_PTR(err);
197} 183}
198 184
199/** 185/*
200 * Create an RPC client 186 * rpc_create - create an RPC client and transport with one call
201 * @xprt - pointer to xprt struct 187 * @args: rpc_clnt create argument structure
202 * @servname - name of server
203 * @info - rpc_program
204 * @version - rpc_program version
205 * @authflavor - rpc_auth flavour to use
206 * 188 *
207 * Creates an RPC client structure, then pings the server in order to 189 * Creates and initializes an RPC transport and an RPC client.
208 * determine if it is up, and if it supports this program and version.
209 * 190 *
210 * This function should never be called by asynchronous tasks such as 191 * It can ping the server in order to determine if it is up, and to see if
211 * the portmapper. 192 * it supports this program and version. RPC_CLNT_CREATE_NOPING disables
193 * this behavior so asynchronous tasks can also use rpc_create.
212 */ 194 */
213struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname, 195struct rpc_clnt *rpc_create(struct rpc_create_args *args)
214 struct rpc_program *info, u32 version, rpc_authflavor_t authflavor)
215{ 196{
197 struct rpc_xprt *xprt;
216 struct rpc_clnt *clnt; 198 struct rpc_clnt *clnt;
217 int err; 199
218 200 xprt = xprt_create_transport(args->protocol, args->address,
219 clnt = rpc_new_client(xprt, servname, info, version, authflavor); 201 args->addrsize, args->timeout);
202 if (IS_ERR(xprt))
203 return (struct rpc_clnt *)xprt;
204
205 /*
206 * By default, kernel RPC client connects from a reserved port.
207 * CAP_NET_BIND_SERVICE will not be set for unprivileged requesters,
208 * but it is always enabled for rpciod, which handles the connect
209 * operation.
210 */
211 xprt->resvport = 1;
212 if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
213 xprt->resvport = 0;
214
215 dprintk("RPC: creating %s client for %s (xprt %p)\n",
216 args->program->name, args->servername, xprt);
217
218 clnt = rpc_new_client(xprt, args->servername, args->program,
219 args->version, args->authflavor);
220 if (IS_ERR(clnt)) 220 if (IS_ERR(clnt))
221 return clnt; 221 return clnt;
222 err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR); 222
223 if (err == 0) 223 if (!(args->flags & RPC_CLNT_CREATE_NOPING)) {
224 return clnt; 224 int err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR);
225 rpc_shutdown_client(clnt); 225 if (err != 0) {
226 return ERR_PTR(err); 226 rpc_shutdown_client(clnt);
227 return ERR_PTR(err);
228 }
229 }
230
231 clnt->cl_softrtry = 1;
232 if (args->flags & RPC_CLNT_CREATE_HARDRTRY)
233 clnt->cl_softrtry = 0;
234
235 if (args->flags & RPC_CLNT_CREATE_INTR)
236 clnt->cl_intr = 1;
237 if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
238 clnt->cl_autobind = 1;
239 if (args->flags & RPC_CLNT_CREATE_ONESHOT)
240 clnt->cl_oneshot = 1;
241
242 return clnt;
227} 243}
244EXPORT_SYMBOL_GPL(rpc_create);
228 245
229/* 246/*
230 * This function clones the RPC client structure. It allows us to share the 247 * This function clones the RPC client structure. It allows us to share the
@@ -244,8 +261,7 @@ rpc_clone_client(struct rpc_clnt *clnt)
244 atomic_set(&new->cl_users, 0); 261 atomic_set(&new->cl_users, 0);
245 new->cl_parent = clnt; 262 new->cl_parent = clnt;
246 atomic_inc(&clnt->cl_count); 263 atomic_inc(&clnt->cl_count);
247 /* Duplicate portmapper */ 264 new->cl_xprt = xprt_get(clnt->cl_xprt);
248 rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait");
249 /* Turn off autobind on clones */ 265 /* Turn off autobind on clones */
250 new->cl_autobind = 0; 266 new->cl_autobind = 0;
251 new->cl_oneshot = 0; 267 new->cl_oneshot = 0;
@@ -255,8 +271,7 @@ rpc_clone_client(struct rpc_clnt *clnt)
255 rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); 271 rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
256 if (new->cl_auth) 272 if (new->cl_auth)
257 atomic_inc(&new->cl_auth->au_count); 273 atomic_inc(&new->cl_auth->au_count);
258 new->cl_pmap = &new->cl_pmap_default; 274 new->cl_metrics = rpc_alloc_iostats(clnt);
259 new->cl_metrics = rpc_alloc_iostats(clnt);
260 return new; 275 return new;
261out_no_clnt: 276out_no_clnt:
262 printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__); 277 printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__);
@@ -323,15 +338,12 @@ rpc_destroy_client(struct rpc_clnt *clnt)
323 rpc_rmdir(clnt->cl_dentry); 338 rpc_rmdir(clnt->cl_dentry);
324 rpc_put_mount(); 339 rpc_put_mount();
325 } 340 }
326 if (clnt->cl_xprt) {
327 xprt_destroy(clnt->cl_xprt);
328 clnt->cl_xprt = NULL;
329 }
330 if (clnt->cl_server != clnt->cl_inline_name) 341 if (clnt->cl_server != clnt->cl_inline_name)
331 kfree(clnt->cl_server); 342 kfree(clnt->cl_server);
332out_free: 343out_free:
333 rpc_free_iostats(clnt->cl_metrics); 344 rpc_free_iostats(clnt->cl_metrics);
334 clnt->cl_metrics = NULL; 345 clnt->cl_metrics = NULL;
346 xprt_put(clnt->cl_xprt);
335 kfree(clnt); 347 kfree(clnt);
336 return 0; 348 return 0;
337} 349}
@@ -540,6 +552,40 @@ rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
540 task->tk_action = rpc_exit_task; 552 task->tk_action = rpc_exit_task;
541} 553}
542 554
555/**
556 * rpc_peeraddr - extract remote peer address from clnt's xprt
557 * @clnt: RPC client structure
558 * @buf: target buffer
559 * @size: length of target buffer
560 *
561 * Returns the number of bytes that are actually in the stored address.
562 */
563size_t rpc_peeraddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t bufsize)
564{
565 size_t bytes;
566 struct rpc_xprt *xprt = clnt->cl_xprt;
567
568 bytes = sizeof(xprt->addr);
569 if (bytes > bufsize)
570 bytes = bufsize;
571 memcpy(buf, &clnt->cl_xprt->addr, bytes);
572 return xprt->addrlen;
573}
574EXPORT_SYMBOL_GPL(rpc_peeraddr);
575
576/**
577 * rpc_peeraddr2str - return remote peer address in printable format
578 * @clnt: RPC client structure
579 * @format: address format
580 *
581 */
582char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format)
583{
584 struct rpc_xprt *xprt = clnt->cl_xprt;
585 return xprt->ops->print_addr(xprt, format);
586}
587EXPORT_SYMBOL_GPL(rpc_peeraddr2str);
588
543void 589void
544rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) 590rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize)
545{ 591{
@@ -560,7 +606,7 @@ size_t rpc_max_payload(struct rpc_clnt *clnt)
560{ 606{
561 return clnt->cl_xprt->max_payload; 607 return clnt->cl_xprt->max_payload;
562} 608}
563EXPORT_SYMBOL(rpc_max_payload); 609EXPORT_SYMBOL_GPL(rpc_max_payload);
564 610
565/** 611/**
566 * rpc_force_rebind - force transport to check that remote port is unchanged 612 * rpc_force_rebind - force transport to check that remote port is unchanged
@@ -570,9 +616,9 @@ EXPORT_SYMBOL(rpc_max_payload);
570void rpc_force_rebind(struct rpc_clnt *clnt) 616void rpc_force_rebind(struct rpc_clnt *clnt)
571{ 617{
572 if (clnt->cl_autobind) 618 if (clnt->cl_autobind)
573 clnt->cl_port = 0; 619 xprt_clear_bound(clnt->cl_xprt);
574} 620}
575EXPORT_SYMBOL(rpc_force_rebind); 621EXPORT_SYMBOL_GPL(rpc_force_rebind);
576 622
577/* 623/*
578 * Restart an (async) RPC call. Usually called from within the 624 * Restart an (async) RPC call. Usually called from within the
@@ -781,16 +827,16 @@ call_encode(struct rpc_task *task)
781static void 827static void
782call_bind(struct rpc_task *task) 828call_bind(struct rpc_task *task)
783{ 829{
784 struct rpc_clnt *clnt = task->tk_client; 830 struct rpc_xprt *xprt = task->tk_xprt;
785 831
786 dprintk("RPC: %4d call_bind (status %d)\n", 832 dprintk("RPC: %4d call_bind (status %d)\n",
787 task->tk_pid, task->tk_status); 833 task->tk_pid, task->tk_status);
788 834
789 task->tk_action = call_connect; 835 task->tk_action = call_connect;
790 if (!clnt->cl_port) { 836 if (!xprt_bound(xprt)) {
791 task->tk_action = call_bind_status; 837 task->tk_action = call_bind_status;
792 task->tk_timeout = task->tk_xprt->bind_timeout; 838 task->tk_timeout = xprt->bind_timeout;
793 rpc_getport(task, clnt); 839 xprt->ops->rpcbind(task);
794 } 840 }
795} 841}
796 842
@@ -815,15 +861,11 @@ call_bind_status(struct rpc_task *task)
815 dprintk("RPC: %4d remote rpcbind: RPC program/version unavailable\n", 861 dprintk("RPC: %4d remote rpcbind: RPC program/version unavailable\n",
816 task->tk_pid); 862 task->tk_pid);
817 rpc_delay(task, 3*HZ); 863 rpc_delay(task, 3*HZ);
818 goto retry_bind; 864 goto retry_timeout;
819 case -ETIMEDOUT: 865 case -ETIMEDOUT:
820 dprintk("RPC: %4d rpcbind request timed out\n", 866 dprintk("RPC: %4d rpcbind request timed out\n",
821 task->tk_pid); 867 task->tk_pid);
822 if (RPC_IS_SOFT(task)) { 868 goto retry_timeout;
823 status = -EIO;
824 break;
825 }
826 goto retry_bind;
827 case -EPFNOSUPPORT: 869 case -EPFNOSUPPORT:
828 dprintk("RPC: %4d remote rpcbind service unavailable\n", 870 dprintk("RPC: %4d remote rpcbind service unavailable\n",
829 task->tk_pid); 871 task->tk_pid);
@@ -836,16 +878,13 @@ call_bind_status(struct rpc_task *task)
836 dprintk("RPC: %4d unrecognized rpcbind error (%d)\n", 878 dprintk("RPC: %4d unrecognized rpcbind error (%d)\n",
837 task->tk_pid, -task->tk_status); 879 task->tk_pid, -task->tk_status);
838 status = -EIO; 880 status = -EIO;
839 break;
840 } 881 }
841 882
842 rpc_exit(task, status); 883 rpc_exit(task, status);
843 return; 884 return;
844 885
845retry_bind: 886retry_timeout:
846 task->tk_status = 0; 887 task->tk_action = call_timeout;
847 task->tk_action = call_bind;
848 return;
849} 888}
850 889
851/* 890/*
@@ -893,14 +932,16 @@ call_connect_status(struct rpc_task *task)
893 932
894 switch (status) { 933 switch (status) {
895 case -ENOTCONN: 934 case -ENOTCONN:
896 case -ETIMEDOUT:
897 case -EAGAIN: 935 case -EAGAIN:
898 task->tk_action = call_bind; 936 task->tk_action = call_bind;
899 break; 937 if (!RPC_IS_SOFT(task))
900 default: 938 return;
901 rpc_exit(task, -EIO); 939 /* if soft mounted, test if we've timed out */
902 break; 940 case -ETIMEDOUT:
941 task->tk_action = call_timeout;
942 return;
903 } 943 }
944 rpc_exit(task, -EIO);
904} 945}
905 946
906/* 947/*
@@ -982,6 +1023,14 @@ call_status(struct rpc_task *task)
982 1023
983 task->tk_status = 0; 1024 task->tk_status = 0;
984 switch(status) { 1025 switch(status) {
1026 case -EHOSTDOWN:
1027 case -EHOSTUNREACH:
1028 case -ENETUNREACH:
1029 /*
1030 * Delay any retries for 3 seconds, then handle as if it
1031 * were a timeout.
1032 */
1033 rpc_delay(task, 3*HZ);
985 case -ETIMEDOUT: 1034 case -ETIMEDOUT:
986 task->tk_action = call_timeout; 1035 task->tk_action = call_timeout;
987 break; 1036 break;
@@ -1001,7 +1050,6 @@ call_status(struct rpc_task *task)
1001 printk("%s: RPC call returned error %d\n", 1050 printk("%s: RPC call returned error %d\n",
1002 clnt->cl_protname, -status); 1051 clnt->cl_protname, -status);
1003 rpc_exit(task, status); 1052 rpc_exit(task, status);
1004 break;
1005 } 1053 }
1006} 1054}
1007 1055
@@ -1069,10 +1117,10 @@ call_decode(struct rpc_task *task)
1069 clnt->cl_stats->rpcretrans++; 1117 clnt->cl_stats->rpcretrans++;
1070 goto out_retry; 1118 goto out_retry;
1071 } 1119 }
1072 printk(KERN_WARNING "%s: too small RPC reply size (%d bytes)\n", 1120 dprintk("%s: too small RPC reply size (%d bytes)\n",
1073 clnt->cl_protname, task->tk_status); 1121 clnt->cl_protname, task->tk_status);
1074 rpc_exit(task, -EIO); 1122 task->tk_action = call_timeout;
1075 return; 1123 goto out_retry;
1076 } 1124 }
1077 1125
1078 /* 1126 /*
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index 623180f224c9..c04609d3476a 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -1,7 +1,9 @@
1/* 1/*
2 * linux/net/sunrpc/pmap.c 2 * linux/net/sunrpc/pmap_clnt.c
3 * 3 *
4 * Portmapper client. 4 * In-kernel RPC portmapper client.
5 *
6 * Portmapper supports version 2 of the rpcbind protocol (RFC 1833).
5 * 7 *
6 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> 8 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
7 */ 9 */
@@ -13,7 +15,6 @@
13#include <linux/uio.h> 15#include <linux/uio.h>
14#include <linux/in.h> 16#include <linux/in.h>
15#include <linux/sunrpc/clnt.h> 17#include <linux/sunrpc/clnt.h>
16#include <linux/sunrpc/xprt.h>
17#include <linux/sunrpc/sched.h> 18#include <linux/sunrpc/sched.h>
18 19
19#ifdef RPC_DEBUG 20#ifdef RPC_DEBUG
@@ -24,80 +25,141 @@
24#define PMAP_UNSET 2 25#define PMAP_UNSET 2
25#define PMAP_GETPORT 3 26#define PMAP_GETPORT 3
26 27
28struct portmap_args {
29 u32 pm_prog;
30 u32 pm_vers;
31 u32 pm_prot;
32 unsigned short pm_port;
33 struct rpc_xprt * pm_xprt;
34};
35
27static struct rpc_procinfo pmap_procedures[]; 36static struct rpc_procinfo pmap_procedures[];
28static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int, int); 37static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int, int);
29static void pmap_getport_done(struct rpc_task *); 38static void pmap_getport_done(struct rpc_task *, void *);
30static struct rpc_program pmap_program; 39static struct rpc_program pmap_program;
31static DEFINE_SPINLOCK(pmap_lock);
32 40
33/* 41static void pmap_getport_prepare(struct rpc_task *task, void *calldata)
34 * Obtain the port for a given RPC service on a given host. This one can
35 * be called for an ongoing RPC request.
36 */
37void
38rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt)
39{ 42{
40 struct rpc_portmap *map = clnt->cl_pmap; 43 struct portmap_args *map = calldata;
41 struct sockaddr_in *sap = &clnt->cl_xprt->addr;
42 struct rpc_message msg = { 44 struct rpc_message msg = {
43 .rpc_proc = &pmap_procedures[PMAP_GETPORT], 45 .rpc_proc = &pmap_procedures[PMAP_GETPORT],
44 .rpc_argp = map, 46 .rpc_argp = map,
45 .rpc_resp = &clnt->cl_port, 47 .rpc_resp = &map->pm_port,
46 .rpc_cred = NULL
47 }; 48 };
49
50 rpc_call_setup(task, &msg, 0);
51}
52
53static inline struct portmap_args *pmap_map_alloc(void)
54{
55 return kmalloc(sizeof(struct portmap_args), GFP_NOFS);
56}
57
58static inline void pmap_map_free(struct portmap_args *map)
59{
60 kfree(map);
61}
62
63static void pmap_map_release(void *data)
64{
65 pmap_map_free(data);
66}
67
68static const struct rpc_call_ops pmap_getport_ops = {
69 .rpc_call_prepare = pmap_getport_prepare,
70 .rpc_call_done = pmap_getport_done,
71 .rpc_release = pmap_map_release,
72};
73
74static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt, int status)
75{
76 xprt_clear_binding(xprt);
77 rpc_wake_up_status(&xprt->binding, status);
78}
79
80/**
81 * rpc_getport - obtain the port for a given RPC service on a given host
82 * @task: task that is waiting for portmapper request
83 *
84 * This one can be called for an ongoing RPC request, and can be used in
85 * an async (rpciod) context.
86 */
87void rpc_getport(struct rpc_task *task)
88{
89 struct rpc_clnt *clnt = task->tk_client;
90 struct rpc_xprt *xprt = task->tk_xprt;
91 struct sockaddr_in addr;
92 struct portmap_args *map;
48 struct rpc_clnt *pmap_clnt; 93 struct rpc_clnt *pmap_clnt;
49 struct rpc_task *child; 94 struct rpc_task *child;
95 int status;
50 96
51 dprintk("RPC: %4d rpc_getport(%s, %d, %d, %d)\n", 97 dprintk("RPC: %4d rpc_getport(%s, %u, %u, %d)\n",
52 task->tk_pid, clnt->cl_server, 98 task->tk_pid, clnt->cl_server,
53 map->pm_prog, map->pm_vers, map->pm_prot); 99 clnt->cl_prog, clnt->cl_vers, xprt->prot);
54 100
55 /* Autobind on cloned rpc clients is discouraged */ 101 /* Autobind on cloned rpc clients is discouraged */
56 BUG_ON(clnt->cl_parent != clnt); 102 BUG_ON(clnt->cl_parent != clnt);
57 103
58 spin_lock(&pmap_lock); 104 if (xprt_test_and_set_binding(xprt)) {
59 if (map->pm_binding) { 105 task->tk_status = -EACCES; /* tell caller to check again */
60 rpc_sleep_on(&map->pm_bindwait, task, NULL, NULL); 106 rpc_sleep_on(&xprt->binding, task, NULL, NULL);
61 spin_unlock(&pmap_lock);
62 return; 107 return;
63 } 108 }
64 map->pm_binding = 1;
65 spin_unlock(&pmap_lock);
66 109
67 pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot, 0); 110 /* Someone else may have bound if we slept */
68 if (IS_ERR(pmap_clnt)) { 111 status = 0;
69 task->tk_status = PTR_ERR(pmap_clnt); 112 if (xprt_bound(xprt))
113 goto bailout_nofree;
114
115 status = -ENOMEM;
116 map = pmap_map_alloc();
117 if (!map)
118 goto bailout_nofree;
119 map->pm_prog = clnt->cl_prog;
120 map->pm_vers = clnt->cl_vers;
121 map->pm_prot = xprt->prot;
122 map->pm_port = 0;
123 map->pm_xprt = xprt_get(xprt);
124
125 rpc_peeraddr(clnt, (struct sockaddr *) &addr, sizeof(addr));
126 pmap_clnt = pmap_create(clnt->cl_server, &addr, map->pm_prot, 0);
127 status = PTR_ERR(pmap_clnt);
128 if (IS_ERR(pmap_clnt))
70 goto bailout; 129 goto bailout;
71 }
72 task->tk_status = 0;
73 130
74 /* 131 status = -EIO;
75 * Note: rpc_new_child will release client after a failure. 132 child = rpc_run_task(pmap_clnt, RPC_TASK_ASYNC, &pmap_getport_ops, map);
76 */ 133 if (IS_ERR(child))
77 if (!(child = rpc_new_child(pmap_clnt, task)))
78 goto bailout; 134 goto bailout;
135 rpc_release_task(child);
79 136
80 /* Setup the call info struct */ 137 rpc_sleep_on(&xprt->binding, task, NULL, NULL);
81 rpc_call_setup(child, &msg, 0);
82 138
83 /* ... and run the child task */
84 task->tk_xprt->stat.bind_count++; 139 task->tk_xprt->stat.bind_count++;
85 rpc_run_child(task, child, pmap_getport_done);
86 return; 140 return;
87 141
88bailout: 142bailout:
89 spin_lock(&pmap_lock); 143 pmap_map_free(map);
90 map->pm_binding = 0; 144 xprt_put(xprt);
91 rpc_wake_up(&map->pm_bindwait); 145bailout_nofree:
92 spin_unlock(&pmap_lock); 146 task->tk_status = status;
93 rpc_exit(task, -EIO); 147 pmap_wake_portmap_waiters(xprt, status);
94} 148}
95 149
96#ifdef CONFIG_ROOT_NFS 150#ifdef CONFIG_ROOT_NFS
97int 151/**
98rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) 152 * rpc_getport_external - obtain the port for a given RPC service on a given host
153 * @sin: address of remote peer
154 * @prog: RPC program number to bind
155 * @vers: RPC version number to bind
156 * @prot: transport protocol to use to make this request
157 *
158 * This one is called from outside the RPC client in a synchronous task context.
159 */
160int rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
99{ 161{
100 struct rpc_portmap map = { 162 struct portmap_args map = {
101 .pm_prog = prog, 163 .pm_prog = prog,
102 .pm_vers = vers, 164 .pm_vers = vers,
103 .pm_prot = prot, 165 .pm_prot = prot,
@@ -112,7 +174,7 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
112 char hostname[32]; 174 char hostname[32];
113 int status; 175 int status;
114 176
115 dprintk("RPC: rpc_getport_external(%u.%u.%u.%u, %d, %d, %d)\n", 177 dprintk("RPC: rpc_getport_external(%u.%u.%u.%u, %u, %u, %d)\n",
116 NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); 178 NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
117 179
118 sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); 180 sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr));
@@ -132,45 +194,53 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
132} 194}
133#endif 195#endif
134 196
135static void 197/*
136pmap_getport_done(struct rpc_task *task) 198 * Portmapper child task invokes this callback via tk_exit.
199 */
200static void pmap_getport_done(struct rpc_task *child, void *data)
137{ 201{
138 struct rpc_clnt *clnt = task->tk_client; 202 struct portmap_args *map = data;
139 struct rpc_xprt *xprt = task->tk_xprt; 203 struct rpc_xprt *xprt = map->pm_xprt;
140 struct rpc_portmap *map = clnt->cl_pmap; 204 int status = child->tk_status;
141 205
142 dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n", 206 if (status < 0) {
143 task->tk_pid, task->tk_status, clnt->cl_port); 207 /* Portmapper not available */
144 208 xprt->ops->set_port(xprt, 0);
145 xprt->ops->set_port(xprt, 0); 209 } else if (map->pm_port == 0) {
146 if (task->tk_status < 0) { 210 /* Requested RPC service wasn't registered */
147 /* Make the calling task exit with an error */ 211 xprt->ops->set_port(xprt, 0);
148 task->tk_action = rpc_exit_task; 212 status = -EACCES;
149 } else if (clnt->cl_port == 0) {
150 /* Program not registered */
151 rpc_exit(task, -EACCES);
152 } else { 213 } else {
153 xprt->ops->set_port(xprt, clnt->cl_port); 214 /* Succeeded */
154 clnt->cl_port = htons(clnt->cl_port); 215 xprt->ops->set_port(xprt, map->pm_port);
216 xprt_set_bound(xprt);
217 status = 0;
155 } 218 }
156 spin_lock(&pmap_lock); 219
157 map->pm_binding = 0; 220 dprintk("RPC: %4d pmap_getport_done(status %d, port %u)\n",
158 rpc_wake_up(&map->pm_bindwait); 221 child->tk_pid, status, map->pm_port);
159 spin_unlock(&pmap_lock); 222
223 pmap_wake_portmap_waiters(xprt, status);
224 xprt_put(xprt);
160} 225}
161 226
162/* 227/**
163 * Set or unset a port registration with the local portmapper. 228 * rpc_register - set or unset a port registration with the local portmapper
229 * @prog: RPC program number to bind
230 * @vers: RPC version number to bind
231 * @prot: transport protocol to use to make this request
232 * @port: port value to register
233 * @okay: result code
234 *
164 * port == 0 means unregister, port != 0 means register. 235 * port == 0 means unregister, port != 0 means register.
165 */ 236 */
166int 237int rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
167rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
168{ 238{
169 struct sockaddr_in sin = { 239 struct sockaddr_in sin = {
170 .sin_family = AF_INET, 240 .sin_family = AF_INET,
171 .sin_addr.s_addr = htonl(INADDR_LOOPBACK), 241 .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
172 }; 242 };
173 struct rpc_portmap map = { 243 struct portmap_args map = {
174 .pm_prog = prog, 244 .pm_prog = prog,
175 .pm_vers = vers, 245 .pm_vers = vers,
176 .pm_prot = prot, 246 .pm_prot = prot,
@@ -184,7 +254,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
184 struct rpc_clnt *pmap_clnt; 254 struct rpc_clnt *pmap_clnt;
185 int error = 0; 255 int error = 0;
186 256
187 dprintk("RPC: registering (%d, %d, %d, %d) with portmapper.\n", 257 dprintk("RPC: registering (%u, %u, %d, %u) with portmapper.\n",
188 prog, vers, prot, port); 258 prog, vers, prot, port);
189 259
190 pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1); 260 pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1);
@@ -207,38 +277,32 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
207 return error; 277 return error;
208} 278}
209 279
210static struct rpc_clnt * 280static struct rpc_clnt *pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged)
211pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged)
212{ 281{
213 struct rpc_xprt *xprt; 282 struct rpc_create_args args = {
214 struct rpc_clnt *clnt; 283 .protocol = proto,
215 284 .address = (struct sockaddr *)srvaddr,
216 /* printk("pmap: create xprt\n"); */ 285 .addrsize = sizeof(*srvaddr),
217 xprt = xprt_create_proto(proto, srvaddr, NULL); 286 .servername = hostname,
218 if (IS_ERR(xprt)) 287 .program = &pmap_program,
219 return (struct rpc_clnt *)xprt; 288 .version = RPC_PMAP_VERSION,
220 xprt->ops->set_port(xprt, RPC_PMAP_PORT); 289 .authflavor = RPC_AUTH_UNIX,
290 .flags = (RPC_CLNT_CREATE_ONESHOT |
291 RPC_CLNT_CREATE_NOPING),
292 };
293
294 srvaddr->sin_port = htons(RPC_PMAP_PORT);
221 if (!privileged) 295 if (!privileged)
222 xprt->resvport = 0; 296 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
223 297 return rpc_create(&args);
224 /* printk("pmap: create clnt\n"); */
225 clnt = rpc_new_client(xprt, hostname,
226 &pmap_program, RPC_PMAP_VERSION,
227 RPC_AUTH_UNIX);
228 if (!IS_ERR(clnt)) {
229 clnt->cl_softrtry = 1;
230 clnt->cl_oneshot = 1;
231 }
232 return clnt;
233} 298}
234 299
235/* 300/*
236 * XDR encode/decode functions for PMAP 301 * XDR encode/decode functions for PMAP
237 */ 302 */
238static int 303static int xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct portmap_args *map)
239xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct rpc_portmap *map)
240{ 304{
241 dprintk("RPC: xdr_encode_mapping(%d, %d, %d, %d)\n", 305 dprintk("RPC: xdr_encode_mapping(%u, %u, %u, %u)\n",
242 map->pm_prog, map->pm_vers, map->pm_prot, map->pm_port); 306 map->pm_prog, map->pm_vers, map->pm_prot, map->pm_port);
243 *p++ = htonl(map->pm_prog); 307 *p++ = htonl(map->pm_prog);
244 *p++ = htonl(map->pm_vers); 308 *p++ = htonl(map->pm_vers);
@@ -249,15 +313,13 @@ xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct rpc_portmap *map)
249 return 0; 313 return 0;
250} 314}
251 315
252static int 316static int xdr_decode_port(struct rpc_rqst *req, u32 *p, unsigned short *portp)
253xdr_decode_port(struct rpc_rqst *req, u32 *p, unsigned short *portp)
254{ 317{
255 *portp = (unsigned short) ntohl(*p++); 318 *portp = (unsigned short) ntohl(*p++);
256 return 0; 319 return 0;
257} 320}
258 321
259static int 322static int xdr_decode_bool(struct rpc_rqst *req, u32 *p, unsigned int *boolp)
260xdr_decode_bool(struct rpc_rqst *req, u32 *p, unsigned int *boolp)
261{ 323{
262 *boolp = (unsigned int) ntohl(*p++); 324 *boolp = (unsigned int) ntohl(*p++);
263 return 0; 325 return 0;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 0b1a1ac8a4bc..dfa504fe383f 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -327,10 +327,8 @@ rpc_show_info(struct seq_file *m, void *v)
327 seq_printf(m, "RPC server: %s\n", clnt->cl_server); 327 seq_printf(m, "RPC server: %s\n", clnt->cl_server);
328 seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname, 328 seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname,
329 clnt->cl_prog, clnt->cl_vers); 329 clnt->cl_prog, clnt->cl_vers);
330 seq_printf(m, "address: %u.%u.%u.%u\n", 330 seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
331 NIPQUAD(clnt->cl_xprt->addr.sin_addr.s_addr)); 331 seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO));
332 seq_printf(m, "protocol: %s\n",
333 clnt->cl_xprt->prot == IPPROTO_UDP ? "udp" : "tcp");
334 return 0; 332 return 0;
335} 333}
336 334
@@ -623,17 +621,13 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry)
623} 621}
624 622
625static struct dentry * 623static struct dentry *
626rpc_lookup_negative(char *path, struct nameidata *nd) 624rpc_lookup_create(struct dentry *parent, const char *name, int len)
627{ 625{
626 struct inode *dir = parent->d_inode;
628 struct dentry *dentry; 627 struct dentry *dentry;
629 struct inode *dir;
630 int error;
631 628
632 if ((error = rpc_lookup_parent(path, nd)) != 0)
633 return ERR_PTR(error);
634 dir = nd->dentry->d_inode;
635 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); 629 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
636 dentry = lookup_one_len(nd->last.name, nd->dentry, nd->last.len); 630 dentry = lookup_one_len(name, parent, len);
637 if (IS_ERR(dentry)) 631 if (IS_ERR(dentry))
638 goto out_err; 632 goto out_err;
639 if (dentry->d_inode) { 633 if (dentry->d_inode) {
@@ -644,7 +638,20 @@ rpc_lookup_negative(char *path, struct nameidata *nd)
644 return dentry; 638 return dentry;
645out_err: 639out_err:
646 mutex_unlock(&dir->i_mutex); 640 mutex_unlock(&dir->i_mutex);
647 rpc_release_path(nd); 641 return dentry;
642}
643
644static struct dentry *
645rpc_lookup_negative(char *path, struct nameidata *nd)
646{
647 struct dentry *dentry;
648 int error;
649
650 if ((error = rpc_lookup_parent(path, nd)) != 0)
651 return ERR_PTR(error);
652 dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len);
653 if (IS_ERR(dentry))
654 rpc_release_path(nd);
648 return dentry; 655 return dentry;
649} 656}
650 657
@@ -703,18 +710,17 @@ rpc_rmdir(struct dentry *dentry)
703} 710}
704 711
705struct dentry * 712struct dentry *
706rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags) 713rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pipe_ops *ops, int flags)
707{ 714{
708 struct nameidata nd;
709 struct dentry *dentry; 715 struct dentry *dentry;
710 struct inode *dir, *inode; 716 struct inode *dir, *inode;
711 struct rpc_inode *rpci; 717 struct rpc_inode *rpci;
712 718
713 dentry = rpc_lookup_negative(path, &nd); 719 dentry = rpc_lookup_create(parent, name, strlen(name));
714 if (IS_ERR(dentry)) 720 if (IS_ERR(dentry))
715 return dentry; 721 return dentry;
716 dir = nd.dentry->d_inode; 722 dir = parent->d_inode;
717 inode = rpc_get_inode(dir->i_sb, S_IFSOCK | S_IRUSR | S_IWUSR); 723 inode = rpc_get_inode(dir->i_sb, S_IFIFO | S_IRUSR | S_IWUSR);
718 if (!inode) 724 if (!inode)
719 goto err_dput; 725 goto err_dput;
720 inode->i_ino = iunique(dir->i_sb, 100); 726 inode->i_ino = iunique(dir->i_sb, 100);
@@ -728,13 +734,13 @@ rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags)
728 dget(dentry); 734 dget(dentry);
729out: 735out:
730 mutex_unlock(&dir->i_mutex); 736 mutex_unlock(&dir->i_mutex);
731 rpc_release_path(&nd);
732 return dentry; 737 return dentry;
733err_dput: 738err_dput:
734 dput(dentry); 739 dput(dentry);
735 dentry = ERR_PTR(-ENOMEM); 740 dentry = ERR_PTR(-ENOMEM);
736 printk(KERN_WARNING "%s: %s() failed to create pipe %s (errno = %d)\n", 741 printk(KERN_WARNING "%s: %s() failed to create pipe %s/%s (errno = %d)\n",
737 __FILE__, __FUNCTION__, path, -ENOMEM); 742 __FILE__, __FUNCTION__, parent->d_name.name, name,
743 -ENOMEM);
738 goto out; 744 goto out;
739} 745}
740 746
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 5c3eee768504..6390461a9756 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -21,7 +21,6 @@
21#include <linux/mutex.h> 21#include <linux/mutex.h>
22 22
23#include <linux/sunrpc/clnt.h> 23#include <linux/sunrpc/clnt.h>
24#include <linux/sunrpc/xprt.h>
25 24
26#ifdef RPC_DEBUG 25#ifdef RPC_DEBUG
27#define RPCDBG_FACILITY RPCDBG_SCHED 26#define RPCDBG_FACILITY RPCDBG_SCHED
@@ -45,12 +44,6 @@ static void rpciod_killall(void);
45static void rpc_async_schedule(void *); 44static void rpc_async_schedule(void *);
46 45
47/* 46/*
48 * RPC tasks that create another task (e.g. for contacting the portmapper)
49 * will wait on this queue for their child's completion
50 */
51static RPC_WAITQ(childq, "childq");
52
53/*
54 * RPC tasks sit here while waiting for conditions to improve. 47 * RPC tasks sit here while waiting for conditions to improve.
55 */ 48 */
56static RPC_WAITQ(delay_queue, "delayq"); 49static RPC_WAITQ(delay_queue, "delayq");
@@ -324,16 +317,6 @@ static void rpc_make_runnable(struct rpc_task *task)
324} 317}
325 318
326/* 319/*
327 * Place a newly initialized task on the workqueue.
328 */
329static inline void
330rpc_schedule_run(struct rpc_task *task)
331{
332 rpc_set_active(task);
333 rpc_make_runnable(task);
334}
335
336/*
337 * Prepare for sleeping on a wait queue. 320 * Prepare for sleeping on a wait queue.
338 * By always appending tasks to the list we ensure FIFO behavior. 321 * By always appending tasks to the list we ensure FIFO behavior.
339 * NB: An RPC task will only receive interrupt-driven events as long 322 * NB: An RPC task will only receive interrupt-driven events as long
@@ -559,24 +542,20 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
559 spin_unlock_bh(&queue->lock); 542 spin_unlock_bh(&queue->lock);
560} 543}
561 544
545static void __rpc_atrun(struct rpc_task *task)
546{
547 rpc_wake_up_task(task);
548}
549
562/* 550/*
563 * Run a task at a later time 551 * Run a task at a later time
564 */ 552 */
565static void __rpc_atrun(struct rpc_task *); 553void rpc_delay(struct rpc_task *task, unsigned long delay)
566void
567rpc_delay(struct rpc_task *task, unsigned long delay)
568{ 554{
569 task->tk_timeout = delay; 555 task->tk_timeout = delay;
570 rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun); 556 rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun);
571} 557}
572 558
573static void
574__rpc_atrun(struct rpc_task *task)
575{
576 task->tk_status = 0;
577 rpc_wake_up_task(task);
578}
579
580/* 559/*
581 * Helper to call task->tk_ops->rpc_call_prepare 560 * Helper to call task->tk_ops->rpc_call_prepare
582 */ 561 */
@@ -933,72 +912,6 @@ struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
933} 912}
934EXPORT_SYMBOL(rpc_run_task); 913EXPORT_SYMBOL(rpc_run_task);
935 914
936/**
937 * rpc_find_parent - find the parent of a child task.
938 * @child: child task
939 * @parent: parent task
940 *
941 * Checks that the parent task is still sleeping on the
942 * queue 'childq'. If so returns a pointer to the parent.
943 * Upon failure returns NULL.
944 *
945 * Caller must hold childq.lock
946 */
947static inline struct rpc_task *rpc_find_parent(struct rpc_task *child, struct rpc_task *parent)
948{
949 struct rpc_task *task;
950 struct list_head *le;
951
952 task_for_each(task, le, &childq.tasks[0])
953 if (task == parent)
954 return parent;
955
956 return NULL;
957}
958
959static void rpc_child_exit(struct rpc_task *child, void *calldata)
960{
961 struct rpc_task *parent;
962
963 spin_lock_bh(&childq.lock);
964 if ((parent = rpc_find_parent(child, calldata)) != NULL) {
965 parent->tk_status = child->tk_status;
966 __rpc_wake_up_task(parent);
967 }
968 spin_unlock_bh(&childq.lock);
969}
970
971static const struct rpc_call_ops rpc_child_ops = {
972 .rpc_call_done = rpc_child_exit,
973};
974
975/*
976 * Note: rpc_new_task releases the client after a failure.
977 */
978struct rpc_task *
979rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent)
980{
981 struct rpc_task *task;
982
983 task = rpc_new_task(clnt, RPC_TASK_ASYNC | RPC_TASK_CHILD, &rpc_child_ops, parent);
984 if (!task)
985 goto fail;
986 return task;
987
988fail:
989 parent->tk_status = -ENOMEM;
990 return NULL;
991}
992
993void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
994{
995 spin_lock_bh(&childq.lock);
996 /* N.B. Is it possible for the child to have already finished? */
997 __rpc_sleep_on(&childq, task, func, NULL);
998 rpc_schedule_run(child);
999 spin_unlock_bh(&childq.lock);
1000}
1001
1002/* 915/*
1003 * Kill all tasks for the given client. 916 * Kill all tasks for the given client.
1004 * XXX: kill their descendants as well? 917 * XXX: kill their descendants as well?
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index eb330d4f66d6..6f17527b9e69 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -168,7 +168,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
168 return -1; 168 return -1;
169 if ((unsigned short)csum_fold(desc.csum)) 169 if ((unsigned short)csum_fold(desc.csum))
170 return -1; 170 return -1;
171 if (unlikely(skb->ip_summed == CHECKSUM_HW)) 171 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
172 netdev_rx_csum_fault(skb->dev); 172 netdev_rx_csum_fault(skb->dev);
173 return 0; 173 return 0;
174no_checksum: 174no_checksum:
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index f38f939ce95f..26c0531d7e25 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -36,8 +36,6 @@ EXPORT_SYMBOL(rpc_wake_up_status);
36EXPORT_SYMBOL(rpc_release_task); 36EXPORT_SYMBOL(rpc_release_task);
37 37
38/* RPC client functions */ 38/* RPC client functions */
39EXPORT_SYMBOL(rpc_create_client);
40EXPORT_SYMBOL(rpc_new_client);
41EXPORT_SYMBOL(rpc_clone_client); 39EXPORT_SYMBOL(rpc_clone_client);
42EXPORT_SYMBOL(rpc_bind_new_program); 40EXPORT_SYMBOL(rpc_bind_new_program);
43EXPORT_SYMBOL(rpc_destroy_client); 41EXPORT_SYMBOL(rpc_destroy_client);
@@ -57,7 +55,6 @@ EXPORT_SYMBOL(rpc_queue_upcall);
57EXPORT_SYMBOL(rpc_mkpipe); 55EXPORT_SYMBOL(rpc_mkpipe);
58 56
59/* Client transport */ 57/* Client transport */
60EXPORT_SYMBOL(xprt_create_proto);
61EXPORT_SYMBOL(xprt_set_timeout); 58EXPORT_SYMBOL(xprt_set_timeout);
62 59
63/* Client credential cache */ 60/* Client credential cache */
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index d9a95732df46..953aff89bcac 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -388,7 +388,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
388 /* send head */ 388 /* send head */
389 if (slen == xdr->head[0].iov_len) 389 if (slen == xdr->head[0].iov_len)
390 flags = 0; 390 flags = 0;
391 len = sock->ops->sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags); 391 len = kernel_sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags);
392 if (len != xdr->head[0].iov_len) 392 if (len != xdr->head[0].iov_len)
393 goto out; 393 goto out;
394 slen -= xdr->head[0].iov_len; 394 slen -= xdr->head[0].iov_len;
@@ -400,7 +400,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
400 while (pglen > 0) { 400 while (pglen > 0) {
401 if (slen == size) 401 if (slen == size)
402 flags = 0; 402 flags = 0;
403 result = sock->ops->sendpage(sock, *ppage, base, size, flags); 403 result = kernel_sendpage(sock, *ppage, base, size, flags);
404 if (result > 0) 404 if (result > 0)
405 len += result; 405 len += result;
406 if (result != size) 406 if (result != size)
@@ -413,7 +413,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
413 } 413 }
414 /* send tail */ 414 /* send tail */
415 if (xdr->tail[0].iov_len) { 415 if (xdr->tail[0].iov_len) {
416 result = sock->ops->sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage], 416 result = kernel_sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage],
417 ((unsigned long)xdr->tail[0].iov_base)& (PAGE_SIZE-1), 417 ((unsigned long)xdr->tail[0].iov_base)& (PAGE_SIZE-1),
418 xdr->tail[0].iov_len, 0); 418 xdr->tail[0].iov_len, 0);
419 419
@@ -434,13 +434,10 @@ out:
434static int 434static int
435svc_recv_available(struct svc_sock *svsk) 435svc_recv_available(struct svc_sock *svsk)
436{ 436{
437 mm_segment_t oldfs;
438 struct socket *sock = svsk->sk_sock; 437 struct socket *sock = svsk->sk_sock;
439 int avail, err; 438 int avail, err;
440 439
441 oldfs = get_fs(); set_fs(KERNEL_DS); 440 err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail);
442 err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail);
443 set_fs(oldfs);
444 441
445 return (err >= 0)? avail : err; 442 return (err >= 0)? avail : err;
446} 443}
@@ -472,7 +469,7 @@ svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen)
472 * at accept time. FIXME 469 * at accept time. FIXME
473 */ 470 */
474 alen = sizeof(rqstp->rq_addr); 471 alen = sizeof(rqstp->rq_addr);
475 sock->ops->getname(sock, (struct sockaddr *)&rqstp->rq_addr, &alen, 1); 472 kernel_getpeername(sock, (struct sockaddr *)&rqstp->rq_addr, &alen);
476 473
477 dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", 474 dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
478 rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len); 475 rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len);
@@ -758,7 +755,6 @@ svc_tcp_accept(struct svc_sock *svsk)
758 struct svc_serv *serv = svsk->sk_server; 755 struct svc_serv *serv = svsk->sk_server;
759 struct socket *sock = svsk->sk_sock; 756 struct socket *sock = svsk->sk_sock;
760 struct socket *newsock; 757 struct socket *newsock;
761 const struct proto_ops *ops;
762 struct svc_sock *newsvsk; 758 struct svc_sock *newsvsk;
763 int err, slen; 759 int err, slen;
764 760
@@ -766,29 +762,23 @@ svc_tcp_accept(struct svc_sock *svsk)
766 if (!sock) 762 if (!sock)
767 return; 763 return;
768 764
769 err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &newsock); 765 clear_bit(SK_CONN, &svsk->sk_flags);
770 if (err) { 766 err = kernel_accept(sock, &newsock, O_NONBLOCK);
767 if (err < 0) {
771 if (err == -ENOMEM) 768 if (err == -ENOMEM)
772 printk(KERN_WARNING "%s: no more sockets!\n", 769 printk(KERN_WARNING "%s: no more sockets!\n",
773 serv->sv_name); 770 serv->sv_name);
774 return; 771 else if (err != -EAGAIN && net_ratelimit())
775 }
776
777 dprintk("svc: tcp_accept %p allocated\n", newsock);
778 newsock->ops = ops = sock->ops;
779
780 clear_bit(SK_CONN, &svsk->sk_flags);
781 if ((err = ops->accept(sock, newsock, O_NONBLOCK)) < 0) {
782 if (err != -EAGAIN && net_ratelimit())
783 printk(KERN_WARNING "%s: accept failed (err %d)!\n", 772 printk(KERN_WARNING "%s: accept failed (err %d)!\n",
784 serv->sv_name, -err); 773 serv->sv_name, -err);
785 goto failed; /* aborted connection or whatever */ 774 return;
786 } 775 }
776
787 set_bit(SK_CONN, &svsk->sk_flags); 777 set_bit(SK_CONN, &svsk->sk_flags);
788 svc_sock_enqueue(svsk); 778 svc_sock_enqueue(svsk);
789 779
790 slen = sizeof(sin); 780 slen = sizeof(sin);
791 err = ops->getname(newsock, (struct sockaddr *) &sin, &slen, 1); 781 err = kernel_getpeername(newsock, (struct sockaddr *) &sin, &slen);
792 if (err < 0) { 782 if (err < 0) {
793 if (net_ratelimit()) 783 if (net_ratelimit())
794 printk(KERN_WARNING "%s: peername failed (err %d)!\n", 784 printk(KERN_WARNING "%s: peername failed (err %d)!\n",
@@ -1406,14 +1396,14 @@ svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin)
1406 if (sin != NULL) { 1396 if (sin != NULL) {
1407 if (type == SOCK_STREAM) 1397 if (type == SOCK_STREAM)
1408 sock->sk->sk_reuse = 1; /* allow address reuse */ 1398 sock->sk->sk_reuse = 1; /* allow address reuse */
1409 error = sock->ops->bind(sock, (struct sockaddr *) sin, 1399 error = kernel_bind(sock, (struct sockaddr *) sin,
1410 sizeof(*sin)); 1400 sizeof(*sin));
1411 if (error < 0) 1401 if (error < 0)
1412 goto bummer; 1402 goto bummer;
1413 } 1403 }
1414 1404
1415 if (protocol == IPPROTO_TCP) { 1405 if (protocol == IPPROTO_TCP) {
1416 if ((error = sock->ops->listen(sock, 64)) < 0) 1406 if ((error = kernel_listen(sock, 64)) < 0)
1417 goto bummer; 1407 goto bummer;
1418 } 1408 }
1419 1409
diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c
index bcbdf6430d5c..8142fdb8a930 100644
--- a/net/sunrpc/timer.c
+++ b/net/sunrpc/timer.c
@@ -19,8 +19,6 @@
19#include <linux/unistd.h> 19#include <linux/unistd.h>
20 20
21#include <linux/sunrpc/clnt.h> 21#include <linux/sunrpc/clnt.h>
22#include <linux/sunrpc/xprt.h>
23#include <linux/sunrpc/timer.h>
24 22
25#define RPC_RTO_MAX (60*HZ) 23#define RPC_RTO_MAX (60*HZ)
26#define RPC_RTO_INIT (HZ/5) 24#define RPC_RTO_INIT (HZ/5)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index e8c2bc4977f3..1f786f68729d 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -534,7 +534,7 @@ void xprt_connect(struct rpc_task *task)
534 dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid, 534 dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid,
535 xprt, (xprt_connected(xprt) ? "is" : "is not")); 535 xprt, (xprt_connected(xprt) ? "is" : "is not"));
536 536
537 if (!xprt->addr.sin_port) { 537 if (!xprt_bound(xprt)) {
538 task->tk_status = -EIO; 538 task->tk_status = -EIO;
539 return; 539 return;
540 } 540 }
@@ -585,13 +585,6 @@ static void xprt_connect_status(struct rpc_task *task)
585 task->tk_pid, -task->tk_status, task->tk_client->cl_server); 585 task->tk_pid, -task->tk_status, task->tk_client->cl_server);
586 xprt_release_write(xprt, task); 586 xprt_release_write(xprt, task);
587 task->tk_status = -EIO; 587 task->tk_status = -EIO;
588 return;
589 }
590
591 /* if soft mounted, just cause this RPC to fail */
592 if (RPC_IS_SOFT(task)) {
593 xprt_release_write(xprt, task);
594 task->tk_status = -EIO;
595 } 588 }
596} 589}
597 590
@@ -829,6 +822,7 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
829 req->rq_bufsize = 0; 822 req->rq_bufsize = 0;
830 req->rq_xid = xprt_alloc_xid(xprt); 823 req->rq_xid = xprt_alloc_xid(xprt);
831 req->rq_release_snd_buf = NULL; 824 req->rq_release_snd_buf = NULL;
825 xprt_reset_majortimeo(req);
832 dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, 826 dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid,
833 req, ntohl(req->rq_xid)); 827 req, ntohl(req->rq_xid));
834} 828}
@@ -887,16 +881,32 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i
887 to->to_exponential = 0; 881 to->to_exponential = 0;
888} 882}
889 883
890static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) 884/**
885 * xprt_create_transport - create an RPC transport
886 * @proto: requested transport protocol
887 * @ap: remote peer address
888 * @size: length of address
889 * @to: timeout parameters
890 *
891 */
892struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t size, struct rpc_timeout *to)
891{ 893{
892 int result; 894 int result;
893 struct rpc_xprt *xprt; 895 struct rpc_xprt *xprt;
894 struct rpc_rqst *req; 896 struct rpc_rqst *req;
895 897
896 if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) 898 if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) {
899 dprintk("RPC: xprt_create_transport: no memory\n");
897 return ERR_PTR(-ENOMEM); 900 return ERR_PTR(-ENOMEM);
898 901 }
899 xprt->addr = *ap; 902 if (size <= sizeof(xprt->addr)) {
903 memcpy(&xprt->addr, ap, size);
904 xprt->addrlen = size;
905 } else {
906 kfree(xprt);
907 dprintk("RPC: xprt_create_transport: address too large\n");
908 return ERR_PTR(-EBADF);
909 }
900 910
901 switch (proto) { 911 switch (proto) {
902 case IPPROTO_UDP: 912 case IPPROTO_UDP:
@@ -908,14 +918,15 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc
908 default: 918 default:
909 printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", 919 printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n",
910 proto); 920 proto);
911 result = -EIO; 921 return ERR_PTR(-EIO);
912 break;
913 } 922 }
914 if (result) { 923 if (result) {
915 kfree(xprt); 924 kfree(xprt);
925 dprintk("RPC: xprt_create_transport: failed, %d\n", result);
916 return ERR_PTR(result); 926 return ERR_PTR(result);
917 } 927 }
918 928
929 kref_init(&xprt->kref);
919 spin_lock_init(&xprt->transport_lock); 930 spin_lock_init(&xprt->transport_lock);
920 spin_lock_init(&xprt->reserve_lock); 931 spin_lock_init(&xprt->reserve_lock);
921 932
@@ -928,6 +939,7 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc
928 xprt->last_used = jiffies; 939 xprt->last_used = jiffies;
929 xprt->cwnd = RPC_INITCWND; 940 xprt->cwnd = RPC_INITCWND;
930 941
942 rpc_init_wait_queue(&xprt->binding, "xprt_binding");
931 rpc_init_wait_queue(&xprt->pending, "xprt_pending"); 943 rpc_init_wait_queue(&xprt->pending, "xprt_pending");
932 rpc_init_wait_queue(&xprt->sending, "xprt_sending"); 944 rpc_init_wait_queue(&xprt->sending, "xprt_sending");
933 rpc_init_wait_queue(&xprt->resend, "xprt_resend"); 945 rpc_init_wait_queue(&xprt->resend, "xprt_resend");
@@ -941,41 +953,43 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc
941 953
942 dprintk("RPC: created transport %p with %u slots\n", xprt, 954 dprintk("RPC: created transport %p with %u slots\n", xprt,
943 xprt->max_reqs); 955 xprt->max_reqs);
944
945 return xprt;
946}
947 956
948/**
949 * xprt_create_proto - create an RPC client transport
950 * @proto: requested transport protocol
951 * @sap: remote peer's address
952 * @to: timeout parameters for new transport
953 *
954 */
955struct rpc_xprt *xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to)
956{
957 struct rpc_xprt *xprt;
958
959 xprt = xprt_setup(proto, sap, to);
960 if (IS_ERR(xprt))
961 dprintk("RPC: xprt_create_proto failed\n");
962 else
963 dprintk("RPC: xprt_create_proto created xprt %p\n", xprt);
964 return xprt; 957 return xprt;
965} 958}
966 959
967/** 960/**
968 * xprt_destroy - destroy an RPC transport, killing off all requests. 961 * xprt_destroy - destroy an RPC transport, killing off all requests.
969 * @xprt: transport to destroy 962 * @kref: kref for the transport to destroy
970 * 963 *
971 */ 964 */
972int xprt_destroy(struct rpc_xprt *xprt) 965static void xprt_destroy(struct kref *kref)
973{ 966{
967 struct rpc_xprt *xprt = container_of(kref, struct rpc_xprt, kref);
968
974 dprintk("RPC: destroying transport %p\n", xprt); 969 dprintk("RPC: destroying transport %p\n", xprt);
975 xprt->shutdown = 1; 970 xprt->shutdown = 1;
976 del_timer_sync(&xprt->timer); 971 del_timer_sync(&xprt->timer);
977 xprt->ops->destroy(xprt); 972 xprt->ops->destroy(xprt);
978 kfree(xprt); 973 kfree(xprt);
974}
979 975
980 return 0; 976/**
977 * xprt_put - release a reference to an RPC transport.
978 * @xprt: pointer to the transport
979 *
980 */
981void xprt_put(struct rpc_xprt *xprt)
982{
983 kref_put(&xprt->kref, xprt_destroy);
984}
985
986/**
987 * xprt_get - return a reference to an RPC transport.
988 * @xprt: pointer to the transport
989 *
990 */
991struct rpc_xprt *xprt_get(struct rpc_xprt *xprt)
992{
993 kref_get(&xprt->kref);
994 return xprt;
981} 995}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 441bd53f5eca..9b62923a9c06 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -125,6 +125,47 @@ static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count)
125} 125}
126#endif 126#endif
127 127
128static void xs_format_peer_addresses(struct rpc_xprt *xprt)
129{
130 struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
131 char *buf;
132
133 buf = kzalloc(20, GFP_KERNEL);
134 if (buf) {
135 snprintf(buf, 20, "%u.%u.%u.%u",
136 NIPQUAD(addr->sin_addr.s_addr));
137 }
138 xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
139
140 buf = kzalloc(8, GFP_KERNEL);
141 if (buf) {
142 snprintf(buf, 8, "%u",
143 ntohs(addr->sin_port));
144 }
145 xprt->address_strings[RPC_DISPLAY_PORT] = buf;
146
147 if (xprt->prot == IPPROTO_UDP)
148 xprt->address_strings[RPC_DISPLAY_PROTO] = "udp";
149 else
150 xprt->address_strings[RPC_DISPLAY_PROTO] = "tcp";
151
152 buf = kzalloc(48, GFP_KERNEL);
153 if (buf) {
154 snprintf(buf, 48, "addr=%u.%u.%u.%u port=%u proto=%s",
155 NIPQUAD(addr->sin_addr.s_addr),
156 ntohs(addr->sin_port),
157 xprt->prot == IPPROTO_UDP ? "udp" : "tcp");
158 }
159 xprt->address_strings[RPC_DISPLAY_ALL] = buf;
160}
161
162static void xs_free_peer_addresses(struct rpc_xprt *xprt)
163{
164 kfree(xprt->address_strings[RPC_DISPLAY_ADDR]);
165 kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
166 kfree(xprt->address_strings[RPC_DISPLAY_ALL]);
167}
168
128#define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) 169#define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)
129 170
130static inline int xs_send_head(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, unsigned int len) 171static inline int xs_send_head(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, unsigned int len)
@@ -174,7 +215,6 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a
174 struct page **ppage = xdr->pages; 215 struct page **ppage = xdr->pages;
175 unsigned int len, pglen = xdr->page_len; 216 unsigned int len, pglen = xdr->page_len;
176 int err, ret = 0; 217 int err, ret = 0;
177 ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int);
178 218
179 if (unlikely(!sock)) 219 if (unlikely(!sock))
180 return -ENOTCONN; 220 return -ENOTCONN;
@@ -207,7 +247,6 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a
207 base &= ~PAGE_CACHE_MASK; 247 base &= ~PAGE_CACHE_MASK;
208 } 248 }
209 249
210 sendpage = sock->ops->sendpage ? : sock_no_sendpage;
211 do { 250 do {
212 int flags = XS_SENDMSG_FLAGS; 251 int flags = XS_SENDMSG_FLAGS;
213 252
@@ -220,10 +259,7 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a
220 if (pglen != len || xdr->tail[0].iov_len != 0) 259 if (pglen != len || xdr->tail[0].iov_len != 0)
221 flags |= MSG_MORE; 260 flags |= MSG_MORE;
222 261
223 /* Hmm... We might be dealing with highmem pages */ 262 err = kernel_sendpage(sock, *ppage, base, len, flags);
224 if (PageHighMem(*ppage))
225 sendpage = sock_no_sendpage;
226 err = sendpage(sock, *ppage, base, len, flags);
227 if (ret == 0) 263 if (ret == 0)
228 ret = err; 264 ret = err;
229 else if (err > 0) 265 else if (err > 0)
@@ -300,7 +336,7 @@ static int xs_udp_send_request(struct rpc_task *task)
300 336
301 req->rq_xtime = jiffies; 337 req->rq_xtime = jiffies;
302 status = xs_sendpages(xprt->sock, (struct sockaddr *) &xprt->addr, 338 status = xs_sendpages(xprt->sock, (struct sockaddr *) &xprt->addr,
303 sizeof(xprt->addr), xdr, req->rq_bytes_sent); 339 xprt->addrlen, xdr, req->rq_bytes_sent);
304 340
305 dprintk("RPC: xs_udp_send_request(%u) = %d\n", 341 dprintk("RPC: xs_udp_send_request(%u) = %d\n",
306 xdr->len - req->rq_bytes_sent, status); 342 xdr->len - req->rq_bytes_sent, status);
@@ -490,6 +526,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
490 526
491 xprt_disconnect(xprt); 527 xprt_disconnect(xprt);
492 xs_close(xprt); 528 xs_close(xprt);
529 xs_free_peer_addresses(xprt);
493 kfree(xprt->slot); 530 kfree(xprt->slot);
494} 531}
495 532
@@ -965,6 +1002,19 @@ static unsigned short xs_get_random_port(void)
965} 1002}
966 1003
967/** 1004/**
1005 * xs_print_peer_address - format an IPv4 address for printing
1006 * @xprt: generic transport
1007 * @format: flags field indicating which parts of the address to render
1008 */
1009static char *xs_print_peer_address(struct rpc_xprt *xprt, enum rpc_display_format_t format)
1010{
1011 if (xprt->address_strings[format] != NULL)
1012 return xprt->address_strings[format];
1013 else
1014 return "unprintable";
1015}
1016
1017/**
968 * xs_set_port - reset the port number in the remote endpoint address 1018 * xs_set_port - reset the port number in the remote endpoint address
969 * @xprt: generic transport 1019 * @xprt: generic transport
970 * @port: new port number 1020 * @port: new port number
@@ -972,8 +1022,11 @@ static unsigned short xs_get_random_port(void)
972 */ 1022 */
973static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) 1023static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
974{ 1024{
1025 struct sockaddr_in *sap = (struct sockaddr_in *) &xprt->addr;
1026
975 dprintk("RPC: setting port for xprt %p to %u\n", xprt, port); 1027 dprintk("RPC: setting port for xprt %p to %u\n", xprt, port);
976 xprt->addr.sin_port = htons(port); 1028
1029 sap->sin_port = htons(port);
977} 1030}
978 1031
979static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) 1032static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
@@ -986,7 +1039,7 @@ static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
986 1039
987 do { 1040 do {
988 myaddr.sin_port = htons(port); 1041 myaddr.sin_port = htons(port);
989 err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, 1042 err = kernel_bind(sock, (struct sockaddr *) &myaddr,
990 sizeof(myaddr)); 1043 sizeof(myaddr));
991 if (err == 0) { 1044 if (err == 0) {
992 xprt->port = port; 1045 xprt->port = port;
@@ -1016,11 +1069,9 @@ static void xs_udp_connect_worker(void *args)
1016 struct socket *sock = xprt->sock; 1069 struct socket *sock = xprt->sock;
1017 int err, status = -EIO; 1070 int err, status = -EIO;
1018 1071
1019 if (xprt->shutdown || xprt->addr.sin_port == 0) 1072 if (xprt->shutdown || !xprt_bound(xprt))
1020 goto out; 1073 goto out;
1021 1074
1022 dprintk("RPC: xs_udp_connect_worker for xprt %p\n", xprt);
1023
1024 /* Start by resetting any existing state */ 1075 /* Start by resetting any existing state */
1025 xs_close(xprt); 1076 xs_close(xprt);
1026 1077
@@ -1034,6 +1085,9 @@ static void xs_udp_connect_worker(void *args)
1034 goto out; 1085 goto out;
1035 } 1086 }
1036 1087
1088 dprintk("RPC: worker connecting xprt %p to address: %s\n",
1089 xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
1090
1037 if (!xprt->inet) { 1091 if (!xprt->inet) {
1038 struct sock *sk = sock->sk; 1092 struct sock *sk = sock->sk;
1039 1093
@@ -1081,7 +1135,7 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
1081 */ 1135 */
1082 memset(&any, 0, sizeof(any)); 1136 memset(&any, 0, sizeof(any));
1083 any.sa_family = AF_UNSPEC; 1137 any.sa_family = AF_UNSPEC;
1084 result = sock->ops->connect(sock, &any, sizeof(any), 0); 1138 result = kernel_connect(sock, &any, sizeof(any), 0);
1085 if (result) 1139 if (result)
1086 dprintk("RPC: AF_UNSPEC connect return code %d\n", 1140 dprintk("RPC: AF_UNSPEC connect return code %d\n",
1087 result); 1141 result);
@@ -1099,11 +1153,9 @@ static void xs_tcp_connect_worker(void *args)
1099 struct socket *sock = xprt->sock; 1153 struct socket *sock = xprt->sock;
1100 int err, status = -EIO; 1154 int err, status = -EIO;
1101 1155
1102 if (xprt->shutdown || xprt->addr.sin_port == 0) 1156 if (xprt->shutdown || !xprt_bound(xprt))
1103 goto out; 1157 goto out;
1104 1158
1105 dprintk("RPC: xs_tcp_connect_worker for xprt %p\n", xprt);
1106
1107 if (!xprt->sock) { 1159 if (!xprt->sock) {
1108 /* start from scratch */ 1160 /* start from scratch */
1109 if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { 1161 if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
@@ -1119,6 +1171,9 @@ static void xs_tcp_connect_worker(void *args)
1119 /* "close" the socket, preserving the local port */ 1171 /* "close" the socket, preserving the local port */
1120 xs_tcp_reuse_connection(xprt); 1172 xs_tcp_reuse_connection(xprt);
1121 1173
1174 dprintk("RPC: worker connecting xprt %p to address: %s\n",
1175 xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
1176
1122 if (!xprt->inet) { 1177 if (!xprt->inet) {
1123 struct sock *sk = sock->sk; 1178 struct sock *sk = sock->sk;
1124 1179
@@ -1151,8 +1206,8 @@ static void xs_tcp_connect_worker(void *args)
1151 /* Tell the socket layer to start connecting... */ 1206 /* Tell the socket layer to start connecting... */
1152 xprt->stat.connect_count++; 1207 xprt->stat.connect_count++;
1153 xprt->stat.connect_start = jiffies; 1208 xprt->stat.connect_start = jiffies;
1154 status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, 1209 status = kernel_connect(sock, (struct sockaddr *) &xprt->addr,
1155 sizeof(xprt->addr), O_NONBLOCK); 1210 xprt->addrlen, O_NONBLOCK);
1156 dprintk("RPC: %p connect status %d connected %d sock state %d\n", 1211 dprintk("RPC: %p connect status %d connected %d sock state %d\n",
1157 xprt, -status, xprt_connected(xprt), sock->sk->sk_state); 1212 xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
1158 if (status < 0) { 1213 if (status < 0) {
@@ -1260,8 +1315,10 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
1260 1315
1261static struct rpc_xprt_ops xs_udp_ops = { 1316static struct rpc_xprt_ops xs_udp_ops = {
1262 .set_buffer_size = xs_udp_set_buffer_size, 1317 .set_buffer_size = xs_udp_set_buffer_size,
1318 .print_addr = xs_print_peer_address,
1263 .reserve_xprt = xprt_reserve_xprt_cong, 1319 .reserve_xprt = xprt_reserve_xprt_cong,
1264 .release_xprt = xprt_release_xprt_cong, 1320 .release_xprt = xprt_release_xprt_cong,
1321 .rpcbind = rpc_getport,
1265 .set_port = xs_set_port, 1322 .set_port = xs_set_port,
1266 .connect = xs_connect, 1323 .connect = xs_connect,
1267 .buf_alloc = rpc_malloc, 1324 .buf_alloc = rpc_malloc,
@@ -1276,8 +1333,10 @@ static struct rpc_xprt_ops xs_udp_ops = {
1276}; 1333};
1277 1334
1278static struct rpc_xprt_ops xs_tcp_ops = { 1335static struct rpc_xprt_ops xs_tcp_ops = {
1336 .print_addr = xs_print_peer_address,
1279 .reserve_xprt = xprt_reserve_xprt, 1337 .reserve_xprt = xprt_reserve_xprt,
1280 .release_xprt = xs_tcp_release_xprt, 1338 .release_xprt = xs_tcp_release_xprt,
1339 .rpcbind = rpc_getport,
1281 .set_port = xs_set_port, 1340 .set_port = xs_set_port,
1282 .connect = xs_connect, 1341 .connect = xs_connect,
1283 .buf_alloc = rpc_malloc, 1342 .buf_alloc = rpc_malloc,
@@ -1298,8 +1357,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
1298int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) 1357int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
1299{ 1358{
1300 size_t slot_table_size; 1359 size_t slot_table_size;
1301 1360 struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
1302 dprintk("RPC: setting up udp-ipv4 transport...\n");
1303 1361
1304 xprt->max_reqs = xprt_udp_slot_table_entries; 1362 xprt->max_reqs = xprt_udp_slot_table_entries;
1305 slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); 1363 slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]);
@@ -1307,10 +1365,12 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
1307 if (xprt->slot == NULL) 1365 if (xprt->slot == NULL)
1308 return -ENOMEM; 1366 return -ENOMEM;
1309 1367
1310 xprt->prot = IPPROTO_UDP; 1368 if (ntohs(addr->sin_port != 0))
1369 xprt_set_bound(xprt);
1311 xprt->port = xs_get_random_port(); 1370 xprt->port = xs_get_random_port();
1371
1372 xprt->prot = IPPROTO_UDP;
1312 xprt->tsh_size = 0; 1373 xprt->tsh_size = 0;
1313 xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
1314 /* XXX: header size can vary due to auth type, IPv6, etc. */ 1374 /* XXX: header size can vary due to auth type, IPv6, etc. */
1315 xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); 1375 xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
1316 1376
@@ -1327,6 +1387,10 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
1327 else 1387 else
1328 xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); 1388 xprt_set_timeout(&xprt->timeout, 5, 5 * HZ);
1329 1389
1390 xs_format_peer_addresses(xprt);
1391 dprintk("RPC: set up transport to address %s\n",
1392 xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
1393
1330 return 0; 1394 return 0;
1331} 1395}
1332 1396
@@ -1339,8 +1403,7 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
1339int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) 1403int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
1340{ 1404{
1341 size_t slot_table_size; 1405 size_t slot_table_size;
1342 1406 struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
1343 dprintk("RPC: setting up tcp-ipv4 transport...\n");
1344 1407
1345 xprt->max_reqs = xprt_tcp_slot_table_entries; 1408 xprt->max_reqs = xprt_tcp_slot_table_entries;
1346 slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); 1409 slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]);
@@ -1348,10 +1411,12 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
1348 if (xprt->slot == NULL) 1411 if (xprt->slot == NULL)
1349 return -ENOMEM; 1412 return -ENOMEM;
1350 1413
1351 xprt->prot = IPPROTO_TCP; 1414 if (ntohs(addr->sin_port) != 0)
1415 xprt_set_bound(xprt);
1352 xprt->port = xs_get_random_port(); 1416 xprt->port = xs_get_random_port();
1417
1418 xprt->prot = IPPROTO_TCP;
1353 xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); 1419 xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
1354 xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
1355 xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; 1420 xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
1356 1421
1357 INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt); 1422 INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt);
@@ -1367,5 +1432,9 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
1367 else 1432 else
1368 xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); 1433 xprt_set_timeout(&xprt->timeout, 2, 60 * HZ);
1369 1434
1435 xs_format_peer_addresses(xprt);
1436 dprintk("RPC: set up transport to address %s\n",
1437 xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
1438
1370 return 0; 1439 return 0;
1371} 1440}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index de6ec519272e..b43a27828df5 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -117,7 +117,7 @@
117#include <net/checksum.h> 117#include <net/checksum.h>
118#include <linux/security.h> 118#include <linux/security.h>
119 119
120int sysctl_unix_max_dgram_qlen = 10; 120int sysctl_unix_max_dgram_qlen __read_mostly = 10;
121 121
122struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; 122struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
123DEFINE_SPINLOCK(unix_table_lock); 123DEFINE_SPINLOCK(unix_table_lock);
@@ -2060,10 +2060,7 @@ static int __init af_unix_init(void)
2060 int rc = -1; 2060 int rc = -1;
2061 struct sk_buff *dummy_skb; 2061 struct sk_buff *dummy_skb;
2062 2062
2063 if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) { 2063 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2064 printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
2065 goto out;
2066 }
2067 2064
2068 rc = proto_register(&unix_proto, 1); 2065 rc = proto_register(&unix_proto, 1);
2069 if (rc != 0) { 2066 if (rc != 0) {
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 0c1c04322baf..0faab6332586 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -6,14 +6,24 @@ config XFRM
6 depends on NET 6 depends on NET
7 7
8config XFRM_USER 8config XFRM_USER
9 tristate "IPsec user configuration interface" 9 tristate "Transformation user configuration interface"
10 depends on INET && XFRM 10 depends on INET && XFRM
11 ---help--- 11 ---help---
12 Support for IPsec user configuration interface used 12 Support for Transformation(XFRM) user configuration interface
13 by native Linux tools. 13 like IPsec used by native Linux tools.
14 14
15 If unsure, say Y. 15 If unsure, say Y.
16 16
17config XFRM_SUB_POLICY
18 bool "Transformation sub policy support (EXPERIMENTAL)"
19 depends on XFRM && EXPERIMENTAL
20 ---help---
21 Support sub policy for developers. By using sub policy with main
22 one, two policies can be applied to the same packet at once.
23 Policy which lives shorter time in kernel should be a sub.
24
25 If unsure, say N.
26
17config NET_KEY 27config NET_KEY
18 tristate "PF_KEY sockets" 28 tristate "PF_KEY sockets"
19 select XFRM 29 select XFRM
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 693aac1aa833..de3c1a625a46 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -2,6 +2,7 @@
2# Makefile for the XFRM subsystem. 2# Makefile for the XFRM subsystem.
3# 3#
4 4
5obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_input.o xfrm_algo.o 5obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
6 xfrm_input.o xfrm_algo.o
6obj-$(CONFIG_XFRM_USER) += xfrm_user.o 7obj-$(CONFIG_XFRM_USER) += xfrm_user.o
7 8
diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c
new file mode 100644
index 000000000000..37643bb8768a
--- /dev/null
+++ b/net/xfrm/xfrm_hash.c
@@ -0,0 +1,41 @@
1/* xfrm_hash.c: Common hash table code.
2 *
3 * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
4 */
5
6#include <linux/kernel.h>
7#include <linux/mm.h>
8#include <linux/bootmem.h>
9#include <linux/vmalloc.h>
10#include <linux/slab.h>
11#include <linux/xfrm.h>
12
13#include "xfrm_hash.h"
14
15struct hlist_head *xfrm_hash_alloc(unsigned int sz)
16{
17 struct hlist_head *n;
18
19 if (sz <= PAGE_SIZE)
20 n = kmalloc(sz, GFP_KERNEL);
21 else if (hashdist)
22 n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
23 else
24 n = (struct hlist_head *)
25 __get_free_pages(GFP_KERNEL, get_order(sz));
26
27 if (n)
28 memset(n, 0, sz);
29
30 return n;
31}
32
33void xfrm_hash_free(struct hlist_head *n, unsigned int sz)
34{
35 if (sz <= PAGE_SIZE)
36 kfree(n);
37 else if (hashdist)
38 vfree(n);
39 else
40 free_pages((unsigned long)n, get_order(sz));
41}
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
new file mode 100644
index 000000000000..d3abb0b7dc62
--- /dev/null
+++ b/net/xfrm/xfrm_hash.h
@@ -0,0 +1,128 @@
1#ifndef _XFRM_HASH_H
2#define _XFRM_HASH_H
3
4#include <linux/xfrm.h>
5#include <linux/socket.h>
6
7static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr)
8{
9 return ntohl(addr->a4);
10}
11
12static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr)
13{
14 return ntohl(addr->a6[2] ^ addr->a6[3]);
15}
16
17static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
18{
19 return ntohl(daddr->a4 ^ saddr->a4);
20}
21
22static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
23{
24 return ntohl(daddr->a6[2] ^ daddr->a6[3] ^
25 saddr->a6[2] ^ saddr->a6[3]);
26}
27
28static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr,
29 u32 reqid, unsigned short family,
30 unsigned int hmask)
31{
32 unsigned int h = family ^ reqid;
33 switch (family) {
34 case AF_INET:
35 h ^= __xfrm4_daddr_saddr_hash(daddr, saddr);
36 break;
37 case AF_INET6:
38 h ^= __xfrm6_daddr_saddr_hash(daddr, saddr);
39 break;
40 }
41 return (h ^ (h >> 16)) & hmask;
42}
43
44static inline unsigned __xfrm_src_hash(xfrm_address_t *saddr,
45 unsigned short family,
46 unsigned int hmask)
47{
48 unsigned int h = family;
49 switch (family) {
50 case AF_INET:
51 h ^= __xfrm4_addr_hash(saddr);
52 break;
53 case AF_INET6:
54 h ^= __xfrm6_addr_hash(saddr);
55 break;
56 };
57 return (h ^ (h >> 16)) & hmask;
58}
59
60static inline unsigned int
61__xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family,
62 unsigned int hmask)
63{
64 unsigned int h = spi ^ proto;
65 switch (family) {
66 case AF_INET:
67 h ^= __xfrm4_addr_hash(daddr);
68 break;
69 case AF_INET6:
70 h ^= __xfrm6_addr_hash(daddr);
71 break;
72 }
73 return (h ^ (h >> 10) ^ (h >> 20)) & hmask;
74}
75
76static inline unsigned int __idx_hash(u32 index, unsigned int hmask)
77{
78 return (index ^ (index >> 8)) & hmask;
79}
80
81static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask)
82{
83 xfrm_address_t *daddr = &sel->daddr;
84 xfrm_address_t *saddr = &sel->saddr;
85 unsigned int h = 0;
86
87 switch (family) {
88 case AF_INET:
89 if (sel->prefixlen_d != 32 ||
90 sel->prefixlen_s != 32)
91 return hmask + 1;
92
93 h = __xfrm4_daddr_saddr_hash(daddr, saddr);
94 break;
95
96 case AF_INET6:
97 if (sel->prefixlen_d != 128 ||
98 sel->prefixlen_s != 128)
99 return hmask + 1;
100
101 h = __xfrm6_daddr_saddr_hash(daddr, saddr);
102 break;
103 };
104 h ^= (h >> 16);
105 return h & hmask;
106}
107
108static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask)
109{
110 unsigned int h = 0;
111
112 switch (family) {
113 case AF_INET:
114 h = __xfrm4_daddr_saddr_hash(daddr, saddr);
115 break;
116
117 case AF_INET6:
118 h = __xfrm6_daddr_saddr_hash(daddr, saddr);
119 break;
120 };
121 h ^= (h >> 16);
122 return h & hmask;
123}
124
125extern struct hlist_head *xfrm_hash_alloc(unsigned int sz);
126extern void xfrm_hash_free(struct hlist_head *n, unsigned int sz);
127
128#endif /* _XFRM_HASH_H */
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 891a6090cc09..dfc90bb1cf1f 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -82,8 +82,6 @@ void __init xfrm_input_init(void)
82{ 82{
83 secpath_cachep = kmem_cache_create("secpath_cache", 83 secpath_cachep = kmem_cache_create("secpath_cache",
84 sizeof(struct sec_path), 84 sizeof(struct sec_path),
85 0, SLAB_HWCACHE_ALIGN, 85 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
86 NULL, NULL); 86 NULL, NULL);
87 if (!secpath_cachep)
88 panic("XFRM: failed to allocate secpath_cache\n");
89} 87}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 3da67ca2c3ce..b6e2e79d7261 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -22,16 +22,19 @@
22#include <linux/netdevice.h> 22#include <linux/netdevice.h>
23#include <linux/netfilter.h> 23#include <linux/netfilter.h>
24#include <linux/module.h> 24#include <linux/module.h>
25#include <linux/cache.h>
25#include <net/xfrm.h> 26#include <net/xfrm.h>
26#include <net/ip.h> 27#include <net/ip.h>
27 28
29#include "xfrm_hash.h"
30
28DEFINE_MUTEX(xfrm_cfg_mutex); 31DEFINE_MUTEX(xfrm_cfg_mutex);
29EXPORT_SYMBOL(xfrm_cfg_mutex); 32EXPORT_SYMBOL(xfrm_cfg_mutex);
30 33
31static DEFINE_RWLOCK(xfrm_policy_lock); 34static DEFINE_RWLOCK(xfrm_policy_lock);
32 35
33struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; 36unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
34EXPORT_SYMBOL(xfrm_policy_list); 37EXPORT_SYMBOL(xfrm_policy_count);
35 38
36static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); 39static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
37static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; 40static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
@@ -39,8 +42,7 @@ static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
39static kmem_cache_t *xfrm_dst_cache __read_mostly; 42static kmem_cache_t *xfrm_dst_cache __read_mostly;
40 43
41static struct work_struct xfrm_policy_gc_work; 44static struct work_struct xfrm_policy_gc_work;
42static struct list_head xfrm_policy_gc_list = 45static HLIST_HEAD(xfrm_policy_gc_list);
43 LIST_HEAD_INIT(xfrm_policy_gc_list);
44static DEFINE_SPINLOCK(xfrm_policy_gc_lock); 46static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
45 47
46static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); 48static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
@@ -310,8 +312,10 @@ struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
310 policy = kzalloc(sizeof(struct xfrm_policy), gfp); 312 policy = kzalloc(sizeof(struct xfrm_policy), gfp);
311 313
312 if (policy) { 314 if (policy) {
313 atomic_set(&policy->refcnt, 1); 315 INIT_HLIST_NODE(&policy->bydst);
316 INIT_HLIST_NODE(&policy->byidx);
314 rwlock_init(&policy->lock); 317 rwlock_init(&policy->lock);
318 atomic_set(&policy->refcnt, 1);
315 init_timer(&policy->timer); 319 init_timer(&policy->timer);
316 policy->timer.data = (unsigned long)policy; 320 policy->timer.data = (unsigned long)policy;
317 policy->timer.function = xfrm_policy_timer; 321 policy->timer.function = xfrm_policy_timer;
@@ -357,17 +361,16 @@ static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
357static void xfrm_policy_gc_task(void *data) 361static void xfrm_policy_gc_task(void *data)
358{ 362{
359 struct xfrm_policy *policy; 363 struct xfrm_policy *policy;
360 struct list_head *entry, *tmp; 364 struct hlist_node *entry, *tmp;
361 struct list_head gc_list = LIST_HEAD_INIT(gc_list); 365 struct hlist_head gc_list;
362 366
363 spin_lock_bh(&xfrm_policy_gc_lock); 367 spin_lock_bh(&xfrm_policy_gc_lock);
364 list_splice_init(&xfrm_policy_gc_list, &gc_list); 368 gc_list.first = xfrm_policy_gc_list.first;
369 INIT_HLIST_HEAD(&xfrm_policy_gc_list);
365 spin_unlock_bh(&xfrm_policy_gc_lock); 370 spin_unlock_bh(&xfrm_policy_gc_lock);
366 371
367 list_for_each_safe(entry, tmp, &gc_list) { 372 hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
368 policy = list_entry(entry, struct xfrm_policy, list);
369 xfrm_policy_gc_kill(policy); 373 xfrm_policy_gc_kill(policy);
370 }
371} 374}
372 375
373/* Rule must be locked. Release descentant resources, announce 376/* Rule must be locked. Release descentant resources, announce
@@ -389,70 +392,275 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
389 } 392 }
390 393
391 spin_lock(&xfrm_policy_gc_lock); 394 spin_lock(&xfrm_policy_gc_lock);
392 list_add(&policy->list, &xfrm_policy_gc_list); 395 hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
393 spin_unlock(&xfrm_policy_gc_lock); 396 spin_unlock(&xfrm_policy_gc_lock);
394 397
395 schedule_work(&xfrm_policy_gc_work); 398 schedule_work(&xfrm_policy_gc_work);
396} 399}
397 400
401struct xfrm_policy_hash {
402 struct hlist_head *table;
403 unsigned int hmask;
404};
405
406static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2];
407static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly;
408static struct hlist_head *xfrm_policy_byidx __read_mostly;
409static unsigned int xfrm_idx_hmask __read_mostly;
410static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
411
412static inline unsigned int idx_hash(u32 index)
413{
414 return __idx_hash(index, xfrm_idx_hmask);
415}
416
417static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir)
418{
419 unsigned int hmask = xfrm_policy_bydst[dir].hmask;
420 unsigned int hash = __sel_hash(sel, family, hmask);
421
422 return (hash == hmask + 1 ?
423 &xfrm_policy_inexact[dir] :
424 xfrm_policy_bydst[dir].table + hash);
425}
426
427static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir)
428{
429 unsigned int hmask = xfrm_policy_bydst[dir].hmask;
430 unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
431
432 return xfrm_policy_bydst[dir].table + hash;
433}
434
435static void xfrm_dst_hash_transfer(struct hlist_head *list,
436 struct hlist_head *ndsttable,
437 unsigned int nhashmask)
438{
439 struct hlist_node *entry, *tmp;
440 struct xfrm_policy *pol;
441
442 hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
443 unsigned int h;
444
445 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
446 pol->family, nhashmask);
447 hlist_add_head(&pol->bydst, ndsttable+h);
448 }
449}
450
451static void xfrm_idx_hash_transfer(struct hlist_head *list,
452 struct hlist_head *nidxtable,
453 unsigned int nhashmask)
454{
455 struct hlist_node *entry, *tmp;
456 struct xfrm_policy *pol;
457
458 hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
459 unsigned int h;
460
461 h = __idx_hash(pol->index, nhashmask);
462 hlist_add_head(&pol->byidx, nidxtable+h);
463 }
464}
465
466static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
467{
468 return ((old_hmask + 1) << 1) - 1;
469}
470
471static void xfrm_bydst_resize(int dir)
472{
473 unsigned int hmask = xfrm_policy_bydst[dir].hmask;
474 unsigned int nhashmask = xfrm_new_hash_mask(hmask);
475 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
476 struct hlist_head *odst = xfrm_policy_bydst[dir].table;
477 struct hlist_head *ndst = xfrm_hash_alloc(nsize);
478 int i;
479
480 if (!ndst)
481 return;
482
483 write_lock_bh(&xfrm_policy_lock);
484
485 for (i = hmask; i >= 0; i--)
486 xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
487
488 xfrm_policy_bydst[dir].table = ndst;
489 xfrm_policy_bydst[dir].hmask = nhashmask;
490
491 write_unlock_bh(&xfrm_policy_lock);
492
493 xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
494}
495
496static void xfrm_byidx_resize(int total)
497{
498 unsigned int hmask = xfrm_idx_hmask;
499 unsigned int nhashmask = xfrm_new_hash_mask(hmask);
500 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
501 struct hlist_head *oidx = xfrm_policy_byidx;
502 struct hlist_head *nidx = xfrm_hash_alloc(nsize);
503 int i;
504
505 if (!nidx)
506 return;
507
508 write_lock_bh(&xfrm_policy_lock);
509
510 for (i = hmask; i >= 0; i--)
511 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
512
513 xfrm_policy_byidx = nidx;
514 xfrm_idx_hmask = nhashmask;
515
516 write_unlock_bh(&xfrm_policy_lock);
517
518 xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
519}
520
521static inline int xfrm_bydst_should_resize(int dir, int *total)
522{
523 unsigned int cnt = xfrm_policy_count[dir];
524 unsigned int hmask = xfrm_policy_bydst[dir].hmask;
525
526 if (total)
527 *total += cnt;
528
529 if ((hmask + 1) < xfrm_policy_hashmax &&
530 cnt > hmask)
531 return 1;
532
533 return 0;
534}
535
536static inline int xfrm_byidx_should_resize(int total)
537{
538 unsigned int hmask = xfrm_idx_hmask;
539
540 if ((hmask + 1) < xfrm_policy_hashmax &&
541 total > hmask)
542 return 1;
543
544 return 0;
545}
546
547static DEFINE_MUTEX(hash_resize_mutex);
548
549static void xfrm_hash_resize(void *__unused)
550{
551 int dir, total;
552
553 mutex_lock(&hash_resize_mutex);
554
555 total = 0;
556 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
557 if (xfrm_bydst_should_resize(dir, &total))
558 xfrm_bydst_resize(dir);
559 }
560 if (xfrm_byidx_should_resize(total))
561 xfrm_byidx_resize(total);
562
563 mutex_unlock(&hash_resize_mutex);
564}
565
566static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
567
398/* Generate new index... KAME seems to generate them ordered by cost 568/* Generate new index... KAME seems to generate them ordered by cost
399 * of an absolute inpredictability of ordering of rules. This will not pass. */ 569 * of an absolute inpredictability of ordering of rules. This will not pass. */
400static u32 xfrm_gen_index(int dir) 570static u32 xfrm_gen_index(u8 type, int dir)
401{ 571{
402 u32 idx;
403 struct xfrm_policy *p;
404 static u32 idx_generator; 572 static u32 idx_generator;
405 573
406 for (;;) { 574 for (;;) {
575 struct hlist_node *entry;
576 struct hlist_head *list;
577 struct xfrm_policy *p;
578 u32 idx;
579 int found;
580
407 idx = (idx_generator | dir); 581 idx = (idx_generator | dir);
408 idx_generator += 8; 582 idx_generator += 8;
409 if (idx == 0) 583 if (idx == 0)
410 idx = 8; 584 idx = 8;
411 for (p = xfrm_policy_list[dir]; p; p = p->next) { 585 list = xfrm_policy_byidx + idx_hash(idx);
412 if (p->index == idx) 586 found = 0;
587 hlist_for_each_entry(p, entry, list, byidx) {
588 if (p->index == idx) {
589 found = 1;
413 break; 590 break;
591 }
414 } 592 }
415 if (!p) 593 if (!found)
416 return idx; 594 return idx;
417 } 595 }
418} 596}
419 597
598static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
599{
600 u32 *p1 = (u32 *) s1;
601 u32 *p2 = (u32 *) s2;
602 int len = sizeof(struct xfrm_selector) / sizeof(u32);
603 int i;
604
605 for (i = 0; i < len; i++) {
606 if (p1[i] != p2[i])
607 return 1;
608 }
609
610 return 0;
611}
612
420int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) 613int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
421{ 614{
422 struct xfrm_policy *pol, **p; 615 struct xfrm_policy *pol;
423 struct xfrm_policy *delpol = NULL; 616 struct xfrm_policy *delpol;
424 struct xfrm_policy **newpos = NULL; 617 struct hlist_head *chain;
618 struct hlist_node *entry, *newpos, *last;
425 struct dst_entry *gc_list; 619 struct dst_entry *gc_list;
426 620
427 write_lock_bh(&xfrm_policy_lock); 621 write_lock_bh(&xfrm_policy_lock);
428 for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) { 622 chain = policy_hash_bysel(&policy->selector, policy->family, dir);
429 if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 && 623 delpol = NULL;
624 newpos = NULL;
625 last = NULL;
626 hlist_for_each_entry(pol, entry, chain, bydst) {
627 if (!delpol &&
628 pol->type == policy->type &&
629 !selector_cmp(&pol->selector, &policy->selector) &&
430 xfrm_sec_ctx_match(pol->security, policy->security)) { 630 xfrm_sec_ctx_match(pol->security, policy->security)) {
431 if (excl) { 631 if (excl) {
432 write_unlock_bh(&xfrm_policy_lock); 632 write_unlock_bh(&xfrm_policy_lock);
433 return -EEXIST; 633 return -EEXIST;
434 } 634 }
435 *p = pol->next;
436 delpol = pol; 635 delpol = pol;
437 if (policy->priority > pol->priority) 636 if (policy->priority > pol->priority)
438 continue; 637 continue;
439 } else if (policy->priority >= pol->priority) { 638 } else if (policy->priority >= pol->priority) {
440 p = &pol->next; 639 last = &pol->bydst;
441 continue; 640 continue;
442 } 641 }
443 if (!newpos) 642 if (!newpos)
444 newpos = p; 643 newpos = &pol->bydst;
445 if (delpol) 644 if (delpol)
446 break; 645 break;
447 p = &pol->next; 646 last = &pol->bydst;
448 } 647 }
648 if (!newpos)
649 newpos = last;
449 if (newpos) 650 if (newpos)
450 p = newpos; 651 hlist_add_after(newpos, &policy->bydst);
652 else
653 hlist_add_head(&policy->bydst, chain);
451 xfrm_pol_hold(policy); 654 xfrm_pol_hold(policy);
452 policy->next = *p; 655 xfrm_policy_count[dir]++;
453 *p = policy;
454 atomic_inc(&flow_cache_genid); 656 atomic_inc(&flow_cache_genid);
455 policy->index = delpol ? delpol->index : xfrm_gen_index(dir); 657 if (delpol) {
658 hlist_del(&delpol->bydst);
659 hlist_del(&delpol->byidx);
660 xfrm_policy_count[dir]--;
661 }
662 policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
663 hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
456 policy->curlft.add_time = (unsigned long)xtime.tv_sec; 664 policy->curlft.add_time = (unsigned long)xtime.tv_sec;
457 policy->curlft.use_time = 0; 665 policy->curlft.use_time = 0;
458 if (!mod_timer(&policy->timer, jiffies + HZ)) 666 if (!mod_timer(&policy->timer, jiffies + HZ))
@@ -461,10 +669,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
461 669
462 if (delpol) 670 if (delpol)
463 xfrm_policy_kill(delpol); 671 xfrm_policy_kill(delpol);
672 else if (xfrm_bydst_should_resize(dir, NULL))
673 schedule_work(&xfrm_hash_work);
464 674
465 read_lock_bh(&xfrm_policy_lock); 675 read_lock_bh(&xfrm_policy_lock);
466 gc_list = NULL; 676 gc_list = NULL;
467 for (policy = policy->next; policy; policy = policy->next) { 677 entry = &policy->bydst;
678 hlist_for_each_entry_continue(policy, entry, bydst) {
468 struct dst_entry *dst; 679 struct dst_entry *dst;
469 680
470 write_lock(&policy->lock); 681 write_lock(&policy->lock);
@@ -493,87 +704,146 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
493} 704}
494EXPORT_SYMBOL(xfrm_policy_insert); 705EXPORT_SYMBOL(xfrm_policy_insert);
495 706
496struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel, 707struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
708 struct xfrm_selector *sel,
497 struct xfrm_sec_ctx *ctx, int delete) 709 struct xfrm_sec_ctx *ctx, int delete)
498{ 710{
499 struct xfrm_policy *pol, **p; 711 struct xfrm_policy *pol, *ret;
712 struct hlist_head *chain;
713 struct hlist_node *entry;
500 714
501 write_lock_bh(&xfrm_policy_lock); 715 write_lock_bh(&xfrm_policy_lock);
502 for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { 716 chain = policy_hash_bysel(sel, sel->family, dir);
503 if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) && 717 ret = NULL;
504 (xfrm_sec_ctx_match(ctx, pol->security))) { 718 hlist_for_each_entry(pol, entry, chain, bydst) {
719 if (pol->type == type &&
720 !selector_cmp(sel, &pol->selector) &&
721 xfrm_sec_ctx_match(ctx, pol->security)) {
505 xfrm_pol_hold(pol); 722 xfrm_pol_hold(pol);
506 if (delete) 723 if (delete) {
507 *p = pol->next; 724 hlist_del(&pol->bydst);
725 hlist_del(&pol->byidx);
726 xfrm_policy_count[dir]--;
727 }
728 ret = pol;
508 break; 729 break;
509 } 730 }
510 } 731 }
511 write_unlock_bh(&xfrm_policy_lock); 732 write_unlock_bh(&xfrm_policy_lock);
512 733
513 if (pol && delete) { 734 if (ret && delete) {
514 atomic_inc(&flow_cache_genid); 735 atomic_inc(&flow_cache_genid);
515 xfrm_policy_kill(pol); 736 xfrm_policy_kill(ret);
516 } 737 }
517 return pol; 738 return ret;
518} 739}
519EXPORT_SYMBOL(xfrm_policy_bysel_ctx); 740EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
520 741
521struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete) 742struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete)
522{ 743{
523 struct xfrm_policy *pol, **p; 744 struct xfrm_policy *pol, *ret;
745 struct hlist_head *chain;
746 struct hlist_node *entry;
524 747
525 write_lock_bh(&xfrm_policy_lock); 748 write_lock_bh(&xfrm_policy_lock);
526 for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { 749 chain = xfrm_policy_byidx + idx_hash(id);
527 if (pol->index == id) { 750 ret = NULL;
751 hlist_for_each_entry(pol, entry, chain, byidx) {
752 if (pol->type == type && pol->index == id) {
528 xfrm_pol_hold(pol); 753 xfrm_pol_hold(pol);
529 if (delete) 754 if (delete) {
530 *p = pol->next; 755 hlist_del(&pol->bydst);
756 hlist_del(&pol->byidx);
757 xfrm_policy_count[dir]--;
758 }
759 ret = pol;
531 break; 760 break;
532 } 761 }
533 } 762 }
534 write_unlock_bh(&xfrm_policy_lock); 763 write_unlock_bh(&xfrm_policy_lock);
535 764
536 if (pol && delete) { 765 if (ret && delete) {
537 atomic_inc(&flow_cache_genid); 766 atomic_inc(&flow_cache_genid);
538 xfrm_policy_kill(pol); 767 xfrm_policy_kill(ret);
539 } 768 }
540 return pol; 769 return ret;
541} 770}
542EXPORT_SYMBOL(xfrm_policy_byid); 771EXPORT_SYMBOL(xfrm_policy_byid);
543 772
544void xfrm_policy_flush(void) 773void xfrm_policy_flush(u8 type)
545{ 774{
546 struct xfrm_policy *xp;
547 int dir; 775 int dir;
548 776
549 write_lock_bh(&xfrm_policy_lock); 777 write_lock_bh(&xfrm_policy_lock);
550 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { 778 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
551 while ((xp = xfrm_policy_list[dir]) != NULL) { 779 struct xfrm_policy *pol;
552 xfrm_policy_list[dir] = xp->next; 780 struct hlist_node *entry;
781 int i;
782
783 again1:
784 hlist_for_each_entry(pol, entry,
785 &xfrm_policy_inexact[dir], bydst) {
786 if (pol->type != type)
787 continue;
788 hlist_del(&pol->bydst);
789 hlist_del(&pol->byidx);
553 write_unlock_bh(&xfrm_policy_lock); 790 write_unlock_bh(&xfrm_policy_lock);
554 791
555 xfrm_policy_kill(xp); 792 xfrm_policy_kill(pol);
556 793
557 write_lock_bh(&xfrm_policy_lock); 794 write_lock_bh(&xfrm_policy_lock);
795 goto again1;
558 } 796 }
797
798 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
799 again2:
800 hlist_for_each_entry(pol, entry,
801 xfrm_policy_bydst[dir].table + i,
802 bydst) {
803 if (pol->type != type)
804 continue;
805 hlist_del(&pol->bydst);
806 hlist_del(&pol->byidx);
807 write_unlock_bh(&xfrm_policy_lock);
808
809 xfrm_policy_kill(pol);
810
811 write_lock_bh(&xfrm_policy_lock);
812 goto again2;
813 }
814 }
815
816 xfrm_policy_count[dir] = 0;
559 } 817 }
560 atomic_inc(&flow_cache_genid); 818 atomic_inc(&flow_cache_genid);
561 write_unlock_bh(&xfrm_policy_lock); 819 write_unlock_bh(&xfrm_policy_lock);
562} 820}
563EXPORT_SYMBOL(xfrm_policy_flush); 821EXPORT_SYMBOL(xfrm_policy_flush);
564 822
565int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), 823int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*),
566 void *data) 824 void *data)
567{ 825{
568 struct xfrm_policy *xp; 826 struct xfrm_policy *pol;
569 int dir; 827 struct hlist_node *entry;
570 int count = 0; 828 int dir, count, error;
571 int error = 0;
572 829
573 read_lock_bh(&xfrm_policy_lock); 830 read_lock_bh(&xfrm_policy_lock);
831 count = 0;
574 for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { 832 for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
575 for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) 833 struct hlist_head *table = xfrm_policy_bydst[dir].table;
576 count++; 834 int i;
835
836 hlist_for_each_entry(pol, entry,
837 &xfrm_policy_inexact[dir], bydst) {
838 if (pol->type == type)
839 count++;
840 }
841 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
842 hlist_for_each_entry(pol, entry, table + i, bydst) {
843 if (pol->type == type)
844 count++;
845 }
846 }
577 } 847 }
578 848
579 if (count == 0) { 849 if (count == 0) {
@@ -582,13 +852,28 @@ int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
582 } 852 }
583 853
584 for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { 854 for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
585 for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) { 855 struct hlist_head *table = xfrm_policy_bydst[dir].table;
586 error = func(xp, dir%XFRM_POLICY_MAX, --count, data); 856 int i;
857
858 hlist_for_each_entry(pol, entry,
859 &xfrm_policy_inexact[dir], bydst) {
860 if (pol->type != type)
861 continue;
862 error = func(pol, dir % XFRM_POLICY_MAX, --count, data);
587 if (error) 863 if (error)
588 goto out; 864 goto out;
589 } 865 }
866 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
867 hlist_for_each_entry(pol, entry, table + i, bydst) {
868 if (pol->type != type)
869 continue;
870 error = func(pol, dir % XFRM_POLICY_MAX, --count, data);
871 if (error)
872 goto out;
873 }
874 }
590 } 875 }
591 876 error = 0;
592out: 877out:
593 read_unlock_bh(&xfrm_policy_lock); 878 read_unlock_bh(&xfrm_policy_lock);
594 return error; 879 return error;
@@ -597,29 +882,79 @@ EXPORT_SYMBOL(xfrm_policy_walk);
597 882
598/* Find policy to apply to this flow. */ 883/* Find policy to apply to this flow. */
599 884
600static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir, 885static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
601 void **objp, atomic_t **obj_refp) 886 u8 type, u16 family, int dir)
602{ 887{
603 struct xfrm_policy *pol; 888 struct xfrm_selector *sel = &pol->selector;
889 int match;
604 890
605 read_lock_bh(&xfrm_policy_lock); 891 if (pol->family != family ||
606 for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) { 892 pol->type != type)
607 struct xfrm_selector *sel = &pol->selector; 893 return 0;
608 int match;
609 894
610 if (pol->family != family) 895 match = xfrm_selector_match(sel, fl, family);
611 continue; 896 if (match) {
897 if (!security_xfrm_policy_lookup(pol, fl->secid, dir))
898 return 1;
899 }
612 900
613 match = xfrm_selector_match(sel, fl, family); 901 return 0;
902}
614 903
615 if (match) { 904static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
616 if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) { 905 u16 family, u8 dir)
617 xfrm_pol_hold(pol); 906{
618 break; 907 struct xfrm_policy *pol, *ret;
619 } 908 xfrm_address_t *daddr, *saddr;
909 struct hlist_node *entry;
910 struct hlist_head *chain;
911 u32 priority = ~0U;
912
913 daddr = xfrm_flowi_daddr(fl, family);
914 saddr = xfrm_flowi_saddr(fl, family);
915 if (unlikely(!daddr || !saddr))
916 return NULL;
917
918 read_lock_bh(&xfrm_policy_lock);
919 chain = policy_hash_direct(daddr, saddr, family, dir);
920 ret = NULL;
921 hlist_for_each_entry(pol, entry, chain, bydst) {
922 if (xfrm_policy_match(pol, fl, type, family, dir)) {
923 ret = pol;
924 priority = ret->priority;
925 break;
926 }
927 }
928 chain = &xfrm_policy_inexact[dir];
929 hlist_for_each_entry(pol, entry, chain, bydst) {
930 if (xfrm_policy_match(pol, fl, type, family, dir) &&
931 pol->priority < priority) {
932 ret = pol;
933 break;
620 } 934 }
621 } 935 }
936 if (ret)
937 xfrm_pol_hold(ret);
622 read_unlock_bh(&xfrm_policy_lock); 938 read_unlock_bh(&xfrm_policy_lock);
939
940 return ret;
941}
942
943static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
944 void **objp, atomic_t **obj_refp)
945{
946 struct xfrm_policy *pol;
947
948#ifdef CONFIG_XFRM_SUB_POLICY
949 pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir);
950 if (pol)
951 goto end;
952#endif
953 pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir);
954
955#ifdef CONFIG_XFRM_SUB_POLICY
956end:
957#endif
623 if ((*objp = (void *) pol) != NULL) 958 if ((*objp = (void *) pol) != NULL)
624 *obj_refp = &pol->refcnt; 959 *obj_refp = &pol->refcnt;
625} 960}
@@ -641,7 +976,7 @@ static inline int policy_to_flow_dir(int dir)
641 }; 976 };
642} 977}
643 978
644static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid) 979static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
645{ 980{
646 struct xfrm_policy *pol; 981 struct xfrm_policy *pol;
647 982
@@ -652,7 +987,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc
652 int err = 0; 987 int err = 0;
653 988
654 if (match) 989 if (match)
655 err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir)); 990 err = security_xfrm_policy_lookup(pol, fl->secid, policy_to_flow_dir(dir));
656 991
657 if (match && !err) 992 if (match && !err)
658 xfrm_pol_hold(pol); 993 xfrm_pol_hold(pol);
@@ -665,24 +1000,29 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc
665 1000
666static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) 1001static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
667{ 1002{
668 pol->next = xfrm_policy_list[dir]; 1003 struct hlist_head *chain = policy_hash_bysel(&pol->selector,
669 xfrm_policy_list[dir] = pol; 1004 pol->family, dir);
1005
1006 hlist_add_head(&pol->bydst, chain);
1007 hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index));
1008 xfrm_policy_count[dir]++;
670 xfrm_pol_hold(pol); 1009 xfrm_pol_hold(pol);
1010
1011 if (xfrm_bydst_should_resize(dir, NULL))
1012 schedule_work(&xfrm_hash_work);
671} 1013}
672 1014
673static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 1015static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
674 int dir) 1016 int dir)
675{ 1017{
676 struct xfrm_policy **polp; 1018 if (hlist_unhashed(&pol->bydst))
1019 return NULL;
677 1020
678 for (polp = &xfrm_policy_list[dir]; 1021 hlist_del(&pol->bydst);
679 *polp != NULL; polp = &(*polp)->next) { 1022 hlist_del(&pol->byidx);
680 if (*polp == pol) { 1023 xfrm_policy_count[dir]--;
681 *polp = pol->next; 1024
682 return pol; 1025 return pol;
683 }
684 }
685 return NULL;
686} 1026}
687 1027
688int xfrm_policy_delete(struct xfrm_policy *pol, int dir) 1028int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
@@ -704,12 +1044,17 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
704{ 1044{
705 struct xfrm_policy *old_pol; 1045 struct xfrm_policy *old_pol;
706 1046
1047#ifdef CONFIG_XFRM_SUB_POLICY
1048 if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1049 return -EINVAL;
1050#endif
1051
707 write_lock_bh(&xfrm_policy_lock); 1052 write_lock_bh(&xfrm_policy_lock);
708 old_pol = sk->sk_policy[dir]; 1053 old_pol = sk->sk_policy[dir];
709 sk->sk_policy[dir] = pol; 1054 sk->sk_policy[dir] = pol;
710 if (pol) { 1055 if (pol) {
711 pol->curlft.add_time = (unsigned long)xtime.tv_sec; 1056 pol->curlft.add_time = (unsigned long)xtime.tv_sec;
712 pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir); 1057 pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir);
713 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); 1058 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
714 } 1059 }
715 if (old_pol) 1060 if (old_pol)
@@ -738,6 +1083,7 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
738 newp->flags = old->flags; 1083 newp->flags = old->flags;
739 newp->xfrm_nr = old->xfrm_nr; 1084 newp->xfrm_nr = old->xfrm_nr;
740 newp->index = old->index; 1085 newp->index = old->index;
1086 newp->type = old->type;
741 memcpy(newp->xfrm_vec, old->xfrm_vec, 1087 memcpy(newp->xfrm_vec, old->xfrm_vec,
742 newp->xfrm_nr*sizeof(struct xfrm_tmpl)); 1088 newp->xfrm_nr*sizeof(struct xfrm_tmpl));
743 write_lock_bh(&xfrm_policy_lock); 1089 write_lock_bh(&xfrm_policy_lock);
@@ -761,17 +1107,32 @@ int __xfrm_sk_clone_policy(struct sock *sk)
761 return 0; 1107 return 0;
762} 1108}
763 1109
1110static int
1111xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote,
1112 unsigned short family)
1113{
1114 int err;
1115 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1116
1117 if (unlikely(afinfo == NULL))
1118 return -EINVAL;
1119 err = afinfo->get_saddr(local, remote);
1120 xfrm_policy_put_afinfo(afinfo);
1121 return err;
1122}
1123
764/* Resolve list of templates for the flow, given policy. */ 1124/* Resolve list of templates for the flow, given policy. */
765 1125
766static int 1126static int
767xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl, 1127xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
768 struct xfrm_state **xfrm, 1128 struct xfrm_state **xfrm,
769 unsigned short family) 1129 unsigned short family)
770{ 1130{
771 int nx; 1131 int nx;
772 int i, error; 1132 int i, error;
773 xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); 1133 xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
774 xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); 1134 xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1135 xfrm_address_t tmp;
775 1136
776 for (nx=0, i = 0; i < policy->xfrm_nr; i++) { 1137 for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
777 struct xfrm_state *x; 1138 struct xfrm_state *x;
@@ -779,9 +1140,15 @@ xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl,
779 xfrm_address_t *local = saddr; 1140 xfrm_address_t *local = saddr;
780 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; 1141 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
781 1142
782 if (tmpl->mode) { 1143 if (tmpl->mode == XFRM_MODE_TUNNEL) {
783 remote = &tmpl->id.daddr; 1144 remote = &tmpl->id.daddr;
784 local = &tmpl->saddr; 1145 local = &tmpl->saddr;
1146 if (xfrm_addr_any(local, family)) {
1147 error = xfrm_get_saddr(&tmp, remote, family);
1148 if (error)
1149 goto fail;
1150 local = &tmp;
1151 }
785 } 1152 }
786 1153
787 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); 1154 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
@@ -809,6 +1176,45 @@ fail:
809 return error; 1176 return error;
810} 1177}
811 1178
1179static int
1180xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
1181 struct xfrm_state **xfrm,
1182 unsigned short family)
1183{
1184 struct xfrm_state *tp[XFRM_MAX_DEPTH];
1185 struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1186 int cnx = 0;
1187 int error;
1188 int ret;
1189 int i;
1190
1191 for (i = 0; i < npols; i++) {
1192 if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1193 error = -ENOBUFS;
1194 goto fail;
1195 }
1196
1197 ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1198 if (ret < 0) {
1199 error = ret;
1200 goto fail;
1201 } else
1202 cnx += ret;
1203 }
1204
1205 /* found states are sorted for outbound processing */
1206 if (npols > 1)
1207 xfrm_state_sort(xfrm, tpp, cnx, family);
1208
1209 return cnx;
1210
1211 fail:
1212 for (cnx--; cnx>=0; cnx--)
1213 xfrm_state_put(tpp[cnx]);
1214 return error;
1215
1216}
1217
812/* Check that the bundle accepts the flow and its components are 1218/* Check that the bundle accepts the flow and its components are
813 * still valid. 1219 * still valid.
814 */ 1220 */
@@ -855,6 +1261,11 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
855 struct sock *sk, int flags) 1261 struct sock *sk, int flags)
856{ 1262{
857 struct xfrm_policy *policy; 1263 struct xfrm_policy *policy;
1264 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1265 int npols;
1266 int pol_dead;
1267 int xfrm_nr;
1268 int pi;
858 struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; 1269 struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
859 struct dst_entry *dst, *dst_orig = *dst_p; 1270 struct dst_entry *dst, *dst_orig = *dst_p;
860 int nx = 0; 1271 int nx = 0;
@@ -862,19 +1273,26 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
862 u32 genid; 1273 u32 genid;
863 u16 family; 1274 u16 family;
864 u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); 1275 u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
865 u32 sk_sid = security_sk_sid(sk, fl, dir); 1276
866restart: 1277restart:
867 genid = atomic_read(&flow_cache_genid); 1278 genid = atomic_read(&flow_cache_genid);
868 policy = NULL; 1279 policy = NULL;
1280 for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
1281 pols[pi] = NULL;
1282 npols = 0;
1283 pol_dead = 0;
1284 xfrm_nr = 0;
1285
869 if (sk && sk->sk_policy[1]) 1286 if (sk && sk->sk_policy[1])
870 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid); 1287 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
871 1288
872 if (!policy) { 1289 if (!policy) {
873 /* To accelerate a bit... */ 1290 /* To accelerate a bit... */
874 if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT]) 1291 if ((dst_orig->flags & DST_NOXFRM) ||
1292 !xfrm_policy_count[XFRM_POLICY_OUT])
875 return 0; 1293 return 0;
876 1294
877 policy = flow_cache_lookup(fl, sk_sid, dst_orig->ops->family, 1295 policy = flow_cache_lookup(fl, dst_orig->ops->family,
878 dir, xfrm_policy_lookup); 1296 dir, xfrm_policy_lookup);
879 } 1297 }
880 1298
@@ -883,6 +1301,9 @@ restart:
883 1301
884 family = dst_orig->ops->family; 1302 family = dst_orig->ops->family;
885 policy->curlft.use_time = (unsigned long)xtime.tv_sec; 1303 policy->curlft.use_time = (unsigned long)xtime.tv_sec;
1304 pols[0] = policy;
1305 npols ++;
1306 xfrm_nr += pols[0]->xfrm_nr;
886 1307
887 switch (policy->action) { 1308 switch (policy->action) {
888 case XFRM_POLICY_BLOCK: 1309 case XFRM_POLICY_BLOCK:
@@ -891,11 +1312,13 @@ restart:
891 goto error; 1312 goto error;
892 1313
893 case XFRM_POLICY_ALLOW: 1314 case XFRM_POLICY_ALLOW:
1315#ifndef CONFIG_XFRM_SUB_POLICY
894 if (policy->xfrm_nr == 0) { 1316 if (policy->xfrm_nr == 0) {
895 /* Flow passes not transformed. */ 1317 /* Flow passes not transformed. */
896 xfrm_pol_put(policy); 1318 xfrm_pol_put(policy);
897 return 0; 1319 return 0;
898 } 1320 }
1321#endif
899 1322
900 /* Try to find matching bundle. 1323 /* Try to find matching bundle.
901 * 1324 *
@@ -911,7 +1334,36 @@ restart:
911 if (dst) 1334 if (dst)
912 break; 1335 break;
913 1336
914 nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); 1337#ifdef CONFIG_XFRM_SUB_POLICY
1338 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1339 pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1340 fl, family,
1341 XFRM_POLICY_OUT);
1342 if (pols[1]) {
1343 if (pols[1]->action == XFRM_POLICY_BLOCK) {
1344 err = -EPERM;
1345 goto error;
1346 }
1347 npols ++;
1348 xfrm_nr += pols[1]->xfrm_nr;
1349 }
1350 }
1351
1352 /*
1353 * Because neither flowi nor bundle information knows about
1354 * transformation template size. On more than one policy usage
1355 * we can realize whether all of them is bypass or not after
1356 * they are searched. See above not-transformed bypass
1357 * is surrounded by non-sub policy configuration, too.
1358 */
1359 if (xfrm_nr == 0) {
1360 /* Flow passes not transformed. */
1361 xfrm_pols_put(pols, npols);
1362 return 0;
1363 }
1364
1365#endif
1366 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
915 1367
916 if (unlikely(nx<0)) { 1368 if (unlikely(nx<0)) {
917 err = nx; 1369 err = nx;
@@ -924,7 +1376,7 @@ restart:
924 set_current_state(TASK_RUNNING); 1376 set_current_state(TASK_RUNNING);
925 remove_wait_queue(&km_waitq, &wait); 1377 remove_wait_queue(&km_waitq, &wait);
926 1378
927 nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); 1379 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
928 1380
929 if (nx == -EAGAIN && signal_pending(current)) { 1381 if (nx == -EAGAIN && signal_pending(current)) {
930 err = -ERESTART; 1382 err = -ERESTART;
@@ -932,7 +1384,7 @@ restart:
932 } 1384 }
933 if (nx == -EAGAIN || 1385 if (nx == -EAGAIN ||
934 genid != atomic_read(&flow_cache_genid)) { 1386 genid != atomic_read(&flow_cache_genid)) {
935 xfrm_pol_put(policy); 1387 xfrm_pols_put(pols, npols);
936 goto restart; 1388 goto restart;
937 } 1389 }
938 err = nx; 1390 err = nx;
@@ -942,7 +1394,7 @@ restart:
942 } 1394 }
943 if (nx == 0) { 1395 if (nx == 0) {
944 /* Flow passes not transformed. */ 1396 /* Flow passes not transformed. */
945 xfrm_pol_put(policy); 1397 xfrm_pols_put(pols, npols);
946 return 0; 1398 return 0;
947 } 1399 }
948 1400
@@ -956,8 +1408,14 @@ restart:
956 goto error; 1408 goto error;
957 } 1409 }
958 1410
1411 for (pi = 0; pi < npols; pi++) {
1412 read_lock_bh(&pols[pi]->lock);
1413 pol_dead |= pols[pi]->dead;
1414 read_unlock_bh(&pols[pi]->lock);
1415 }
1416
959 write_lock_bh(&policy->lock); 1417 write_lock_bh(&policy->lock);
960 if (unlikely(policy->dead || stale_bundle(dst))) { 1418 if (unlikely(pol_dead || stale_bundle(dst))) {
961 /* Wow! While we worked on resolving, this 1419 /* Wow! While we worked on resolving, this
962 * policy has gone. Retry. It is not paranoia, 1420 * policy has gone. Retry. It is not paranoia,
963 * we just cannot enlist new bundle to dead object. 1421 * we just cannot enlist new bundle to dead object.
@@ -977,17 +1435,34 @@ restart:
977 } 1435 }
978 *dst_p = dst; 1436 *dst_p = dst;
979 dst_release(dst_orig); 1437 dst_release(dst_orig);
980 xfrm_pol_put(policy); 1438 xfrm_pols_put(pols, npols);
981 return 0; 1439 return 0;
982 1440
983error: 1441error:
984 dst_release(dst_orig); 1442 dst_release(dst_orig);
985 xfrm_pol_put(policy); 1443 xfrm_pols_put(pols, npols);
986 *dst_p = NULL; 1444 *dst_p = NULL;
987 return err; 1445 return err;
988} 1446}
989EXPORT_SYMBOL(xfrm_lookup); 1447EXPORT_SYMBOL(xfrm_lookup);
990 1448
1449static inline int
1450xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)
1451{
1452 struct xfrm_state *x;
1453 int err;
1454
1455 if (!skb->sp || idx < 0 || idx >= skb->sp->len)
1456 return 0;
1457 x = skb->sp->xvec[idx];
1458 if (!x->type->reject)
1459 return 0;
1460 xfrm_state_hold(x);
1461 err = x->type->reject(x, skb, fl);
1462 xfrm_state_put(x);
1463 return err;
1464}
1465
991/* When skb is transformed back to its "native" form, we have to 1466/* When skb is transformed back to its "native" form, we have to
992 * check policy restrictions. At the moment we make this in maximally 1467 * check policy restrictions. At the moment we make this in maximally
993 * stupid way. Shame on me. :-) Of course, connected sockets must 1468 * stupid way. Shame on me. :-) Of course, connected sockets must
@@ -1004,10 +1479,19 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
1004 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && 1479 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
1005 (x->props.reqid == tmpl->reqid || !tmpl->reqid) && 1480 (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
1006 x->props.mode == tmpl->mode && 1481 x->props.mode == tmpl->mode &&
1007 (tmpl->aalgos & (1<<x->props.aalgo)) && 1482 ((tmpl->aalgos & (1<<x->props.aalgo)) ||
1008 !(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family)); 1483 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
1484 !(x->props.mode != XFRM_MODE_TRANSPORT &&
1485 xfrm_state_addr_cmp(tmpl, x, family));
1009} 1486}
1010 1487
1488/*
1489 * 0 or more than 0 is returned when validation is succeeded (either bypass
1490 * because of optional transport mode, or next index of the mathced secpath
1491 * state with the template.
1492 * -1 is returned when no matching template is found.
1493 * Otherwise "-2 - errored_index" is returned.
1494 */
1011static inline int 1495static inline int
1012xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, 1496xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
1013 unsigned short family) 1497 unsigned short family)
@@ -1015,15 +1499,18 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
1015 int idx = start; 1499 int idx = start;
1016 1500
1017 if (tmpl->optional) { 1501 if (tmpl->optional) {
1018 if (!tmpl->mode) 1502 if (tmpl->mode == XFRM_MODE_TRANSPORT)
1019 return start; 1503 return start;
1020 } else 1504 } else
1021 start = -1; 1505 start = -1;
1022 for (; idx < sp->len; idx++) { 1506 for (; idx < sp->len; idx++) {
1023 if (xfrm_state_ok(tmpl, sp->xvec[idx], family)) 1507 if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
1024 return ++idx; 1508 return ++idx;
1025 if (sp->xvec[idx]->props.mode) 1509 if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
1510 if (start == -1)
1511 start = -2-idx;
1026 break; 1512 break;
1513 }
1027 } 1514 }
1028 return start; 1515 return start;
1029} 1516}
@@ -1032,21 +1519,25 @@ int
1032xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) 1519xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
1033{ 1520{
1034 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1521 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1522 int err;
1035 1523
1036 if (unlikely(afinfo == NULL)) 1524 if (unlikely(afinfo == NULL))
1037 return -EAFNOSUPPORT; 1525 return -EAFNOSUPPORT;
1038 1526
1039 afinfo->decode_session(skb, fl); 1527 afinfo->decode_session(skb, fl);
1528 err = security_xfrm_decode_session(skb, &fl->secid);
1040 xfrm_policy_put_afinfo(afinfo); 1529 xfrm_policy_put_afinfo(afinfo);
1041 return 0; 1530 return err;
1042} 1531}
1043EXPORT_SYMBOL(xfrm_decode_session); 1532EXPORT_SYMBOL(xfrm_decode_session);
1044 1533
1045static inline int secpath_has_tunnel(struct sec_path *sp, int k) 1534static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
1046{ 1535{
1047 for (; k < sp->len; k++) { 1536 for (; k < sp->len; k++) {
1048 if (sp->xvec[k]->props.mode) 1537 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
1538 *idxp = k;
1049 return 1; 1539 return 1;
1540 }
1050 } 1541 }
1051 1542
1052 return 0; 1543 return 0;
@@ -1056,16 +1547,18 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1056 unsigned short family) 1547 unsigned short family)
1057{ 1548{
1058 struct xfrm_policy *pol; 1549 struct xfrm_policy *pol;
1550 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1551 int npols = 0;
1552 int xfrm_nr;
1553 int pi;
1059 struct flowi fl; 1554 struct flowi fl;
1060 u8 fl_dir = policy_to_flow_dir(dir); 1555 u8 fl_dir = policy_to_flow_dir(dir);
1061 u32 sk_sid; 1556 int xerr_idx = -1;
1062 1557
1063 if (xfrm_decode_session(skb, &fl, family) < 0) 1558 if (xfrm_decode_session(skb, &fl, family) < 0)
1064 return 0; 1559 return 0;
1065 nf_nat_decode_session(skb, &fl, family); 1560 nf_nat_decode_session(skb, &fl, family);
1066 1561
1067 sk_sid = security_sk_sid(sk, &fl, fl_dir);
1068
1069 /* First, check used SA against their selectors. */ 1562 /* First, check used SA against their selectors. */
1070 if (skb->sp) { 1563 if (skb->sp) {
1071 int i; 1564 int i;
@@ -1079,46 +1572,90 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1079 1572
1080 pol = NULL; 1573 pol = NULL;
1081 if (sk && sk->sk_policy[dir]) 1574 if (sk && sk->sk_policy[dir])
1082 pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid); 1575 pol = xfrm_sk_policy_lookup(sk, dir, &fl);
1083 1576
1084 if (!pol) 1577 if (!pol)
1085 pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir, 1578 pol = flow_cache_lookup(&fl, family, fl_dir,
1086 xfrm_policy_lookup); 1579 xfrm_policy_lookup);
1087 1580
1088 if (!pol) 1581 if (!pol) {
1089 return !skb->sp || !secpath_has_tunnel(skb->sp, 0); 1582 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
1583 xfrm_secpath_reject(xerr_idx, skb, &fl);
1584 return 0;
1585 }
1586 return 1;
1587 }
1090 1588
1091 pol->curlft.use_time = (unsigned long)xtime.tv_sec; 1589 pol->curlft.use_time = (unsigned long)xtime.tv_sec;
1092 1590
1591 pols[0] = pol;
1592 npols ++;
1593#ifdef CONFIG_XFRM_SUB_POLICY
1594 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1595 pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1596 &fl, family,
1597 XFRM_POLICY_IN);
1598 if (pols[1]) {
1599 pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec;
1600 npols ++;
1601 }
1602 }
1603#endif
1604
1093 if (pol->action == XFRM_POLICY_ALLOW) { 1605 if (pol->action == XFRM_POLICY_ALLOW) {
1094 struct sec_path *sp; 1606 struct sec_path *sp;
1095 static struct sec_path dummy; 1607 static struct sec_path dummy;
1608 struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
1609 struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
1610 struct xfrm_tmpl **tpp = tp;
1611 int ti = 0;
1096 int i, k; 1612 int i, k;
1097 1613
1098 if ((sp = skb->sp) == NULL) 1614 if ((sp = skb->sp) == NULL)
1099 sp = &dummy; 1615 sp = &dummy;
1100 1616
1617 for (pi = 0; pi < npols; pi++) {
1618 if (pols[pi] != pol &&
1619 pols[pi]->action != XFRM_POLICY_ALLOW)
1620 goto reject;
1621 if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH)
1622 goto reject_error;
1623 for (i = 0; i < pols[pi]->xfrm_nr; i++)
1624 tpp[ti++] = &pols[pi]->xfrm_vec[i];
1625 }
1626 xfrm_nr = ti;
1627 if (npols > 1) {
1628 xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
1629 tpp = stp;
1630 }
1631
1101 /* For each tunnel xfrm, find the first matching tmpl. 1632 /* For each tunnel xfrm, find the first matching tmpl.
1102 * For each tmpl before that, find corresponding xfrm. 1633 * For each tmpl before that, find corresponding xfrm.
1103 * Order is _important_. Later we will implement 1634 * Order is _important_. Later we will implement
1104 * some barriers, but at the moment barriers 1635 * some barriers, but at the moment barriers
1105 * are implied between each two transformations. 1636 * are implied between each two transformations.
1106 */ 1637 */
1107 for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) { 1638 for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
1108 k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family); 1639 k = xfrm_policy_ok(tpp[i], sp, k, family);
1109 if (k < 0) 1640 if (k < 0) {
1641 if (k < -1)
1642 /* "-2 - errored_index" returned */
1643 xerr_idx = -(2+k);
1110 goto reject; 1644 goto reject;
1645 }
1111 } 1646 }
1112 1647
1113 if (secpath_has_tunnel(sp, k)) 1648 if (secpath_has_nontransport(sp, k, &xerr_idx))
1114 goto reject; 1649 goto reject;
1115 1650
1116 xfrm_pol_put(pol); 1651 xfrm_pols_put(pols, npols);
1117 return 1; 1652 return 1;
1118 } 1653 }
1119 1654
1120reject: 1655reject:
1121 xfrm_pol_put(pol); 1656 xfrm_secpath_reject(xerr_idx, skb, &fl);
1657reject_error:
1658 xfrm_pols_put(pols, npols);
1122 return 0; 1659 return 0;
1123} 1660}
1124EXPORT_SYMBOL(__xfrm_policy_check); 1661EXPORT_SYMBOL(__xfrm_policy_check);
@@ -1166,7 +1703,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
1166 1703
1167static int stale_bundle(struct dst_entry *dst) 1704static int stale_bundle(struct dst_entry *dst)
1168{ 1705{
1169 return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC); 1706 return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);
1170} 1707}
1171 1708
1172void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) 1709void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
@@ -1196,33 +1733,50 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
1196 return dst; 1733 return dst;
1197} 1734}
1198 1735
1736static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
1737{
1738 struct dst_entry *dst, **dstp;
1739
1740 write_lock(&pol->lock);
1741 dstp = &pol->bundles;
1742 while ((dst=*dstp) != NULL) {
1743 if (func(dst)) {
1744 *dstp = dst->next;
1745 dst->next = *gc_list_p;
1746 *gc_list_p = dst;
1747 } else {
1748 dstp = &dst->next;
1749 }
1750 }
1751 write_unlock(&pol->lock);
1752}
1753
1199static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) 1754static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
1200{ 1755{
1201 int i; 1756 struct dst_entry *gc_list = NULL;
1202 struct xfrm_policy *pol; 1757 int dir;
1203 struct dst_entry *dst, **dstp, *gc_list = NULL;
1204 1758
1205 read_lock_bh(&xfrm_policy_lock); 1759 read_lock_bh(&xfrm_policy_lock);
1206 for (i=0; i<2*XFRM_POLICY_MAX; i++) { 1760 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
1207 for (pol = xfrm_policy_list[i]; pol; pol = pol->next) { 1761 struct xfrm_policy *pol;
1208 write_lock(&pol->lock); 1762 struct hlist_node *entry;
1209 dstp = &pol->bundles; 1763 struct hlist_head *table;
1210 while ((dst=*dstp) != NULL) { 1764 int i;
1211 if (func(dst)) { 1765
1212 *dstp = dst->next; 1766 hlist_for_each_entry(pol, entry,
1213 dst->next = gc_list; 1767 &xfrm_policy_inexact[dir], bydst)
1214 gc_list = dst; 1768 prune_one_bundle(pol, func, &gc_list);
1215 } else { 1769
1216 dstp = &dst->next; 1770 table = xfrm_policy_bydst[dir].table;
1217 } 1771 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
1218 } 1772 hlist_for_each_entry(pol, entry, table + i, bydst)
1219 write_unlock(&pol->lock); 1773 prune_one_bundle(pol, func, &gc_list);
1220 } 1774 }
1221 } 1775 }
1222 read_unlock_bh(&xfrm_policy_lock); 1776 read_unlock_bh(&xfrm_policy_lock);
1223 1777
1224 while (gc_list) { 1778 while (gc_list) {
1225 dst = gc_list; 1779 struct dst_entry *dst = gc_list;
1226 gc_list = dst->next; 1780 gc_list = dst->next;
1227 dst_free(dst); 1781 dst_free(dst);
1228 } 1782 }
@@ -1238,22 +1792,12 @@ static void __xfrm_garbage_collect(void)
1238 xfrm_prune_bundles(unused_bundle); 1792 xfrm_prune_bundles(unused_bundle);
1239} 1793}
1240 1794
1241int xfrm_flush_bundles(void) 1795static int xfrm_flush_bundles(void)
1242{ 1796{
1243 xfrm_prune_bundles(stale_bundle); 1797 xfrm_prune_bundles(stale_bundle);
1244 return 0; 1798 return 0;
1245} 1799}
1246 1800
1247static int always_true(struct dst_entry *dst)
1248{
1249 return 1;
1250}
1251
1252void xfrm_flush_all_bundles(void)
1253{
1254 xfrm_prune_bundles(always_true);
1255}
1256
1257void xfrm_init_pmtu(struct dst_entry *dst) 1801void xfrm_init_pmtu(struct dst_entry *dst)
1258{ 1802{
1259 do { 1803 do {
@@ -1281,7 +1825,7 @@ EXPORT_SYMBOL(xfrm_init_pmtu);
1281 * still valid. 1825 * still valid.
1282 */ 1826 */
1283 1827
1284int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) 1828int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family, int strict)
1285{ 1829{
1286 struct dst_entry *dst = &first->u.dst; 1830 struct dst_entry *dst = &first->u.dst;
1287 struct xfrm_dst *last; 1831 struct xfrm_dst *last;
@@ -1298,8 +1842,16 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family)
1298 1842
1299 if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family)) 1843 if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
1300 return 0; 1844 return 0;
1845 if (fl && !security_xfrm_flow_state_match(fl, dst->xfrm))
1846 return 0;
1301 if (dst->xfrm->km.state != XFRM_STATE_VALID) 1847 if (dst->xfrm->km.state != XFRM_STATE_VALID)
1302 return 0; 1848 return 0;
1849 if (xdst->genid != dst->xfrm->genid)
1850 return 0;
1851
1852 if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL &&
1853 !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
1854 return 0;
1303 1855
1304 mtu = dst_mtu(dst->child); 1856 mtu = dst_mtu(dst->child);
1305 if (xdst->child_mtu_cached != mtu) { 1857 if (xdst->child_mtu_cached != mtu) {
@@ -1448,12 +2000,33 @@ static struct notifier_block xfrm_dev_notifier = {
1448 2000
1449static void __init xfrm_policy_init(void) 2001static void __init xfrm_policy_init(void)
1450{ 2002{
2003 unsigned int hmask, sz;
2004 int dir;
2005
1451 xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", 2006 xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
1452 sizeof(struct xfrm_dst), 2007 sizeof(struct xfrm_dst),
1453 0, SLAB_HWCACHE_ALIGN, 2008 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1454 NULL, NULL); 2009 NULL, NULL);
1455 if (!xfrm_dst_cache) 2010
1456 panic("XFRM: failed to allocate xfrm_dst_cache\n"); 2011 hmask = 8 - 1;
2012 sz = (hmask+1) * sizeof(struct hlist_head);
2013
2014 xfrm_policy_byidx = xfrm_hash_alloc(sz);
2015 xfrm_idx_hmask = hmask;
2016 if (!xfrm_policy_byidx)
2017 panic("XFRM: failed to allocate byidx hash\n");
2018
2019 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2020 struct xfrm_policy_hash *htab;
2021
2022 INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]);
2023
2024 htab = &xfrm_policy_bydst[dir];
2025 htab->table = xfrm_hash_alloc(sz);
2026 htab->hmask = hmask;
2027 if (!htab->table)
2028 panic("XFRM: failed to allocate bydst hash\n");
2029 }
1457 2030
1458 INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL); 2031 INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL);
1459 register_netdevice_notifier(&xfrm_dev_notifier); 2032 register_netdevice_notifier(&xfrm_dev_notifier);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 0021aad5db43..9f63edd39346 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -18,8 +18,11 @@
18#include <linux/pfkeyv2.h> 18#include <linux/pfkeyv2.h>
19#include <linux/ipsec.h> 19#include <linux/ipsec.h>
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/cache.h>
21#include <asm/uaccess.h> 22#include <asm/uaccess.h>
22 23
24#include "xfrm_hash.h"
25
23struct sock *xfrm_nl; 26struct sock *xfrm_nl;
24EXPORT_SYMBOL(xfrm_nl); 27EXPORT_SYMBOL(xfrm_nl);
25 28
@@ -32,7 +35,7 @@ EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
32/* Each xfrm_state may be linked to two tables: 35/* Each xfrm_state may be linked to two tables:
33 36
34 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) 37 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
35 2. Hash table by daddr to find what SAs exist for given 38 2. Hash table by (daddr,family,reqid) to find what SAs exist for given
36 destination/tunnel endpoint. (output) 39 destination/tunnel endpoint. (output)
37 */ 40 */
38 41
@@ -44,8 +47,123 @@ static DEFINE_SPINLOCK(xfrm_state_lock);
44 * Main use is finding SA after policy selected tunnel or transport mode. 47 * Main use is finding SA after policy selected tunnel or transport mode.
45 * Also, it can be used by ah/esp icmp error handler to find offending SA. 48 * Also, it can be used by ah/esp icmp error handler to find offending SA.
46 */ 49 */
47static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE]; 50static struct hlist_head *xfrm_state_bydst __read_mostly;
48static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; 51static struct hlist_head *xfrm_state_bysrc __read_mostly;
52static struct hlist_head *xfrm_state_byspi __read_mostly;
53static unsigned int xfrm_state_hmask __read_mostly;
54static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
55static unsigned int xfrm_state_num;
56static unsigned int xfrm_state_genid;
57
58static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
59 xfrm_address_t *saddr,
60 u32 reqid,
61 unsigned short family)
62{
63 return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
64}
65
66static inline unsigned int xfrm_src_hash(xfrm_address_t *addr,
67 unsigned short family)
68{
69 return __xfrm_src_hash(addr, family, xfrm_state_hmask);
70}
71
72static inline unsigned int
73xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
74{
75 return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
76}
77
78static void xfrm_hash_transfer(struct hlist_head *list,
79 struct hlist_head *ndsttable,
80 struct hlist_head *nsrctable,
81 struct hlist_head *nspitable,
82 unsigned int nhashmask)
83{
84 struct hlist_node *entry, *tmp;
85 struct xfrm_state *x;
86
87 hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
88 unsigned int h;
89
90 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
91 x->props.reqid, x->props.family,
92 nhashmask);
93 hlist_add_head(&x->bydst, ndsttable+h);
94
95 h = __xfrm_src_hash(&x->props.saddr, x->props.family,
96 nhashmask);
97 hlist_add_head(&x->bysrc, nsrctable+h);
98
99 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
100 x->props.family, nhashmask);
101 hlist_add_head(&x->byspi, nspitable+h);
102 }
103}
104
105static unsigned long xfrm_hash_new_size(void)
106{
107 return ((xfrm_state_hmask + 1) << 1) *
108 sizeof(struct hlist_head);
109}
110
111static DEFINE_MUTEX(hash_resize_mutex);
112
113static void xfrm_hash_resize(void *__unused)
114{
115 struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
116 unsigned long nsize, osize;
117 unsigned int nhashmask, ohashmask;
118 int i;
119
120 mutex_lock(&hash_resize_mutex);
121
122 nsize = xfrm_hash_new_size();
123 ndst = xfrm_hash_alloc(nsize);
124 if (!ndst)
125 goto out_unlock;
126 nsrc = xfrm_hash_alloc(nsize);
127 if (!nsrc) {
128 xfrm_hash_free(ndst, nsize);
129 goto out_unlock;
130 }
131 nspi = xfrm_hash_alloc(nsize);
132 if (!nspi) {
133 xfrm_hash_free(ndst, nsize);
134 xfrm_hash_free(nsrc, nsize);
135 goto out_unlock;
136 }
137
138 spin_lock_bh(&xfrm_state_lock);
139
140 nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
141 for (i = xfrm_state_hmask; i >= 0; i--)
142 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
143 nhashmask);
144
145 odst = xfrm_state_bydst;
146 osrc = xfrm_state_bysrc;
147 ospi = xfrm_state_byspi;
148 ohashmask = xfrm_state_hmask;
149
150 xfrm_state_bydst = ndst;
151 xfrm_state_bysrc = nsrc;
152 xfrm_state_byspi = nspi;
153 xfrm_state_hmask = nhashmask;
154
155 spin_unlock_bh(&xfrm_state_lock);
156
157 osize = (ohashmask + 1) * sizeof(struct hlist_head);
158 xfrm_hash_free(odst, osize);
159 xfrm_hash_free(osrc, osize);
160 xfrm_hash_free(ospi, osize);
161
162out_unlock:
163 mutex_unlock(&hash_resize_mutex);
164}
165
166static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
49 167
50DECLARE_WAIT_QUEUE_HEAD(km_waitq); 168DECLARE_WAIT_QUEUE_HEAD(km_waitq);
51EXPORT_SYMBOL(km_waitq); 169EXPORT_SYMBOL(km_waitq);
@@ -54,11 +172,9 @@ static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
54static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; 172static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
55 173
56static struct work_struct xfrm_state_gc_work; 174static struct work_struct xfrm_state_gc_work;
57static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list); 175static HLIST_HEAD(xfrm_state_gc_list);
58static DEFINE_SPINLOCK(xfrm_state_gc_lock); 176static DEFINE_SPINLOCK(xfrm_state_gc_lock);
59 177
60static int xfrm_state_gc_flush_bundles;
61
62int __xfrm_state_delete(struct xfrm_state *x); 178int __xfrm_state_delete(struct xfrm_state *x);
63 179
64static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family); 180static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
@@ -69,14 +185,13 @@ void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
69 185
70static void xfrm_state_gc_destroy(struct xfrm_state *x) 186static void xfrm_state_gc_destroy(struct xfrm_state *x)
71{ 187{
72 if (del_timer(&x->timer)) 188 del_timer_sync(&x->timer);
73 BUG(); 189 del_timer_sync(&x->rtimer);
74 if (del_timer(&x->rtimer))
75 BUG();
76 kfree(x->aalg); 190 kfree(x->aalg);
77 kfree(x->ealg); 191 kfree(x->ealg);
78 kfree(x->calg); 192 kfree(x->calg);
79 kfree(x->encap); 193 kfree(x->encap);
194 kfree(x->coaddr);
80 if (x->mode) 195 if (x->mode)
81 xfrm_put_mode(x->mode); 196 xfrm_put_mode(x->mode);
82 if (x->type) { 197 if (x->type) {
@@ -90,22 +205,17 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
90static void xfrm_state_gc_task(void *data) 205static void xfrm_state_gc_task(void *data)
91{ 206{
92 struct xfrm_state *x; 207 struct xfrm_state *x;
93 struct list_head *entry, *tmp; 208 struct hlist_node *entry, *tmp;
94 struct list_head gc_list = LIST_HEAD_INIT(gc_list); 209 struct hlist_head gc_list;
95
96 if (xfrm_state_gc_flush_bundles) {
97 xfrm_state_gc_flush_bundles = 0;
98 xfrm_flush_bundles();
99 }
100 210
101 spin_lock_bh(&xfrm_state_gc_lock); 211 spin_lock_bh(&xfrm_state_gc_lock);
102 list_splice_init(&xfrm_state_gc_list, &gc_list); 212 gc_list.first = xfrm_state_gc_list.first;
213 INIT_HLIST_HEAD(&xfrm_state_gc_list);
103 spin_unlock_bh(&xfrm_state_gc_lock); 214 spin_unlock_bh(&xfrm_state_gc_lock);
104 215
105 list_for_each_safe(entry, tmp, &gc_list) { 216 hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
106 x = list_entry(entry, struct xfrm_state, bydst);
107 xfrm_state_gc_destroy(x); 217 xfrm_state_gc_destroy(x);
108 } 218
109 wake_up(&km_waitq); 219 wake_up(&km_waitq);
110} 220}
111 221
@@ -168,9 +278,9 @@ static void xfrm_timer_handler(unsigned long data)
168 if (warn) 278 if (warn)
169 km_state_expired(x, 0, 0); 279 km_state_expired(x, 0, 0);
170resched: 280resched:
171 if (next != LONG_MAX && 281 if (next != LONG_MAX)
172 !mod_timer(&x->timer, jiffies + make_jiffies(next))) 282 mod_timer(&x->timer, jiffies + make_jiffies(next));
173 xfrm_state_hold(x); 283
174 goto out; 284 goto out;
175 285
176expired: 286expired:
@@ -185,7 +295,6 @@ expired:
185 295
186out: 296out:
187 spin_unlock(&x->lock); 297 spin_unlock(&x->lock);
188 xfrm_state_put(x);
189} 298}
190 299
191static void xfrm_replay_timer_handler(unsigned long data); 300static void xfrm_replay_timer_handler(unsigned long data);
@@ -199,8 +308,9 @@ struct xfrm_state *xfrm_state_alloc(void)
199 if (x) { 308 if (x) {
200 atomic_set(&x->refcnt, 1); 309 atomic_set(&x->refcnt, 1);
201 atomic_set(&x->tunnel_users, 0); 310 atomic_set(&x->tunnel_users, 0);
202 INIT_LIST_HEAD(&x->bydst); 311 INIT_HLIST_NODE(&x->bydst);
203 INIT_LIST_HEAD(&x->byspi); 312 INIT_HLIST_NODE(&x->bysrc);
313 INIT_HLIST_NODE(&x->byspi);
204 init_timer(&x->timer); 314 init_timer(&x->timer);
205 x->timer.function = xfrm_timer_handler; 315 x->timer.function = xfrm_timer_handler;
206 x->timer.data = (unsigned long)x; 316 x->timer.data = (unsigned long)x;
@@ -225,7 +335,7 @@ void __xfrm_state_destroy(struct xfrm_state *x)
225 BUG_TRAP(x->km.state == XFRM_STATE_DEAD); 335 BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
226 336
227 spin_lock_bh(&xfrm_state_gc_lock); 337 spin_lock_bh(&xfrm_state_gc_lock);
228 list_add(&x->bydst, &xfrm_state_gc_list); 338 hlist_add_head(&x->bydst, &xfrm_state_gc_list);
229 spin_unlock_bh(&xfrm_state_gc_lock); 339 spin_unlock_bh(&xfrm_state_gc_lock);
230 schedule_work(&xfrm_state_gc_work); 340 schedule_work(&xfrm_state_gc_work);
231} 341}
@@ -238,27 +348,12 @@ int __xfrm_state_delete(struct xfrm_state *x)
238 if (x->km.state != XFRM_STATE_DEAD) { 348 if (x->km.state != XFRM_STATE_DEAD) {
239 x->km.state = XFRM_STATE_DEAD; 349 x->km.state = XFRM_STATE_DEAD;
240 spin_lock(&xfrm_state_lock); 350 spin_lock(&xfrm_state_lock);
241 list_del(&x->bydst); 351 hlist_del(&x->bydst);
242 __xfrm_state_put(x); 352 hlist_del(&x->bysrc);
243 if (x->id.spi) { 353 if (x->id.spi)
244 list_del(&x->byspi); 354 hlist_del(&x->byspi);
245 __xfrm_state_put(x); 355 xfrm_state_num--;
246 }
247 spin_unlock(&xfrm_state_lock); 356 spin_unlock(&xfrm_state_lock);
248 if (del_timer(&x->timer))
249 __xfrm_state_put(x);
250 if (del_timer(&x->rtimer))
251 __xfrm_state_put(x);
252
253 /* The number two in this test is the reference
254 * mentioned in the comment below plus the reference
255 * our caller holds. A larger value means that
256 * there are DSTs attached to this xfrm_state.
257 */
258 if (atomic_read(&x->refcnt) > 2) {
259 xfrm_state_gc_flush_bundles = 1;
260 schedule_work(&xfrm_state_gc_work);
261 }
262 357
263 /* All xfrm_state objects are created by xfrm_state_alloc. 358 /* All xfrm_state objects are created by xfrm_state_alloc.
264 * The xfrm_state_alloc call gives a reference, and that 359 * The xfrm_state_alloc call gives a reference, and that
@@ -287,14 +382,15 @@ EXPORT_SYMBOL(xfrm_state_delete);
287void xfrm_state_flush(u8 proto) 382void xfrm_state_flush(u8 proto)
288{ 383{
289 int i; 384 int i;
290 struct xfrm_state *x;
291 385
292 spin_lock_bh(&xfrm_state_lock); 386 spin_lock_bh(&xfrm_state_lock);
293 for (i = 0; i < XFRM_DST_HSIZE; i++) { 387 for (i = 0; i <= xfrm_state_hmask; i++) {
388 struct hlist_node *entry;
389 struct xfrm_state *x;
294restart: 390restart:
295 list_for_each_entry(x, xfrm_state_bydst+i, bydst) { 391 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
296 if (!xfrm_state_kern(x) && 392 if (!xfrm_state_kern(x) &&
297 (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) { 393 xfrm_id_proto_match(x->id.proto, proto)) {
298 xfrm_state_hold(x); 394 xfrm_state_hold(x);
299 spin_unlock_bh(&xfrm_state_lock); 395 spin_unlock_bh(&xfrm_state_lock);
300 396
@@ -325,29 +421,103 @@ xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
325 return 0; 421 return 0;
326} 422}
327 423
424static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
425{
426 unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
427 struct xfrm_state *x;
428 struct hlist_node *entry;
429
430 hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
431 if (x->props.family != family ||
432 x->id.spi != spi ||
433 x->id.proto != proto)
434 continue;
435
436 switch (family) {
437 case AF_INET:
438 if (x->id.daddr.a4 != daddr->a4)
439 continue;
440 break;
441 case AF_INET6:
442 if (!ipv6_addr_equal((struct in6_addr *)daddr,
443 (struct in6_addr *)
444 x->id.daddr.a6))
445 continue;
446 break;
447 };
448
449 xfrm_state_hold(x);
450 return x;
451 }
452
453 return NULL;
454}
455
456static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
457{
458 unsigned int h = xfrm_src_hash(saddr, family);
459 struct xfrm_state *x;
460 struct hlist_node *entry;
461
462 hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
463 if (x->props.family != family ||
464 x->id.proto != proto)
465 continue;
466
467 switch (family) {
468 case AF_INET:
469 if (x->id.daddr.a4 != daddr->a4 ||
470 x->props.saddr.a4 != saddr->a4)
471 continue;
472 break;
473 case AF_INET6:
474 if (!ipv6_addr_equal((struct in6_addr *)daddr,
475 (struct in6_addr *)
476 x->id.daddr.a6) ||
477 !ipv6_addr_equal((struct in6_addr *)saddr,
478 (struct in6_addr *)
479 x->props.saddr.a6))
480 continue;
481 break;
482 };
483
484 xfrm_state_hold(x);
485 return x;
486 }
487
488 return NULL;
489}
490
491static inline struct xfrm_state *
492__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
493{
494 if (use_spi)
495 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
496 x->id.proto, family);
497 else
498 return __xfrm_state_lookup_byaddr(&x->id.daddr,
499 &x->props.saddr,
500 x->id.proto, family);
501}
502
328struct xfrm_state * 503struct xfrm_state *
329xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 504xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
330 struct flowi *fl, struct xfrm_tmpl *tmpl, 505 struct flowi *fl, struct xfrm_tmpl *tmpl,
331 struct xfrm_policy *pol, int *err, 506 struct xfrm_policy *pol, int *err,
332 unsigned short family) 507 unsigned short family)
333{ 508{
334 unsigned h = xfrm_dst_hash(daddr, family); 509 unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
510 struct hlist_node *entry;
335 struct xfrm_state *x, *x0; 511 struct xfrm_state *x, *x0;
336 int acquire_in_progress = 0; 512 int acquire_in_progress = 0;
337 int error = 0; 513 int error = 0;
338 struct xfrm_state *best = NULL; 514 struct xfrm_state *best = NULL;
339 struct xfrm_state_afinfo *afinfo;
340 515
341 afinfo = xfrm_state_get_afinfo(family);
342 if (afinfo == NULL) {
343 *err = -EAFNOSUPPORT;
344 return NULL;
345 }
346
347 spin_lock_bh(&xfrm_state_lock); 516 spin_lock_bh(&xfrm_state_lock);
348 list_for_each_entry(x, xfrm_state_bydst+h, bydst) { 517 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
349 if (x->props.family == family && 518 if (x->props.family == family &&
350 x->props.reqid == tmpl->reqid && 519 x->props.reqid == tmpl->reqid &&
520 !(x->props.flags & XFRM_STATE_WILDRECV) &&
351 xfrm_state_addr_check(x, daddr, saddr, family) && 521 xfrm_state_addr_check(x, daddr, saddr, family) &&
352 tmpl->mode == x->props.mode && 522 tmpl->mode == x->props.mode &&
353 tmpl->id.proto == x->id.proto && 523 tmpl->id.proto == x->id.proto &&
@@ -367,7 +537,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
367 */ 537 */
368 if (x->km.state == XFRM_STATE_VALID) { 538 if (x->km.state == XFRM_STATE_VALID) {
369 if (!xfrm_selector_match(&x->sel, fl, family) || 539 if (!xfrm_selector_match(&x->sel, fl, family) ||
370 !xfrm_sec_ctx_match(pol->security, x->security)) 540 !security_xfrm_state_pol_flow_match(x, pol, fl))
371 continue; 541 continue;
372 if (!best || 542 if (!best ||
373 best->km.dying > x->km.dying || 543 best->km.dying > x->km.dying ||
@@ -379,7 +549,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
379 } else if (x->km.state == XFRM_STATE_ERROR || 549 } else if (x->km.state == XFRM_STATE_ERROR ||
380 x->km.state == XFRM_STATE_EXPIRED) { 550 x->km.state == XFRM_STATE_EXPIRED) {
381 if (xfrm_selector_match(&x->sel, fl, family) && 551 if (xfrm_selector_match(&x->sel, fl, family) &&
382 xfrm_sec_ctx_match(pol->security, x->security)) 552 security_xfrm_state_pol_flow_match(x, pol, fl))
383 error = -ESRCH; 553 error = -ESRCH;
384 } 554 }
385 } 555 }
@@ -388,8 +558,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
388 x = best; 558 x = best;
389 if (!x && !error && !acquire_in_progress) { 559 if (!x && !error && !acquire_in_progress) {
390 if (tmpl->id.spi && 560 if (tmpl->id.spi &&
391 (x0 = afinfo->state_lookup(daddr, tmpl->id.spi, 561 (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
392 tmpl->id.proto)) != NULL) { 562 tmpl->id.proto, family)) != NULL) {
393 xfrm_state_put(x0); 563 xfrm_state_put(x0);
394 error = -EEXIST; 564 error = -EEXIST;
395 goto out; 565 goto out;
@@ -403,17 +573,24 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
403 * to current session. */ 573 * to current session. */
404 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family); 574 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
405 575
576 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
577 if (error) {
578 x->km.state = XFRM_STATE_DEAD;
579 xfrm_state_put(x);
580 x = NULL;
581 goto out;
582 }
583
406 if (km_query(x, tmpl, pol) == 0) { 584 if (km_query(x, tmpl, pol) == 0) {
407 x->km.state = XFRM_STATE_ACQ; 585 x->km.state = XFRM_STATE_ACQ;
408 list_add_tail(&x->bydst, xfrm_state_bydst+h); 586 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
409 xfrm_state_hold(x); 587 h = xfrm_src_hash(saddr, family);
588 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
410 if (x->id.spi) { 589 if (x->id.spi) {
411 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); 590 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
412 list_add(&x->byspi, xfrm_state_byspi+h); 591 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
413 xfrm_state_hold(x);
414 } 592 }
415 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; 593 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
416 xfrm_state_hold(x);
417 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; 594 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
418 add_timer(&x->timer); 595 add_timer(&x->timer);
419 } else { 596 } else {
@@ -429,59 +606,167 @@ out:
429 else 606 else
430 *err = acquire_in_progress ? -EAGAIN : error; 607 *err = acquire_in_progress ? -EAGAIN : error;
431 spin_unlock_bh(&xfrm_state_lock); 608 spin_unlock_bh(&xfrm_state_lock);
432 xfrm_state_put_afinfo(afinfo);
433 return x; 609 return x;
434} 610}
435 611
436static void __xfrm_state_insert(struct xfrm_state *x) 612static void __xfrm_state_insert(struct xfrm_state *x)
437{ 613{
438 unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family); 614 unsigned int h;
439 615
440 list_add(&x->bydst, xfrm_state_bydst+h); 616 x->genid = ++xfrm_state_genid;
441 xfrm_state_hold(x);
442 617
443 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); 618 h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
619 x->props.reqid, x->props.family);
620 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
444 621
445 list_add(&x->byspi, xfrm_state_byspi+h); 622 h = xfrm_src_hash(&x->props.saddr, x->props.family);
446 xfrm_state_hold(x); 623 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
447 624
448 if (!mod_timer(&x->timer, jiffies + HZ)) 625 if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
449 xfrm_state_hold(x); 626 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
627 x->props.family);
450 628
451 if (x->replay_maxage && 629 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
452 !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) 630 }
453 xfrm_state_hold(x); 631
632 mod_timer(&x->timer, jiffies + HZ);
633 if (x->replay_maxage)
634 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
454 635
455 wake_up(&km_waitq); 636 wake_up(&km_waitq);
637
638 xfrm_state_num++;
639
640 if (x->bydst.next != NULL &&
641 (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
642 xfrm_state_num > xfrm_state_hmask)
643 schedule_work(&xfrm_hash_work);
644}
645
646/* xfrm_state_lock is held */
647static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
648{
649 unsigned short family = xnew->props.family;
650 u32 reqid = xnew->props.reqid;
651 struct xfrm_state *x;
652 struct hlist_node *entry;
653 unsigned int h;
654
655 h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
656 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
657 if (x->props.family == family &&
658 x->props.reqid == reqid &&
659 !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
660 !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
661 x->genid = xfrm_state_genid;
662 }
456} 663}
457 664
458void xfrm_state_insert(struct xfrm_state *x) 665void xfrm_state_insert(struct xfrm_state *x)
459{ 666{
460 spin_lock_bh(&xfrm_state_lock); 667 spin_lock_bh(&xfrm_state_lock);
668 __xfrm_state_bump_genids(x);
461 __xfrm_state_insert(x); 669 __xfrm_state_insert(x);
462 spin_unlock_bh(&xfrm_state_lock); 670 spin_unlock_bh(&xfrm_state_lock);
463
464 xfrm_flush_all_bundles();
465} 671}
466EXPORT_SYMBOL(xfrm_state_insert); 672EXPORT_SYMBOL(xfrm_state_insert);
467 673
674/* xfrm_state_lock is held */
675static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
676{
677 unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
678 struct hlist_node *entry;
679 struct xfrm_state *x;
680
681 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
682 if (x->props.reqid != reqid ||
683 x->props.mode != mode ||
684 x->props.family != family ||
685 x->km.state != XFRM_STATE_ACQ ||
686 x->id.spi != 0)
687 continue;
688
689 switch (family) {
690 case AF_INET:
691 if (x->id.daddr.a4 != daddr->a4 ||
692 x->props.saddr.a4 != saddr->a4)
693 continue;
694 break;
695 case AF_INET6:
696 if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
697 (struct in6_addr *)daddr) ||
698 !ipv6_addr_equal((struct in6_addr *)
699 x->props.saddr.a6,
700 (struct in6_addr *)saddr))
701 continue;
702 break;
703 };
704
705 xfrm_state_hold(x);
706 return x;
707 }
708
709 if (!create)
710 return NULL;
711
712 x = xfrm_state_alloc();
713 if (likely(x)) {
714 switch (family) {
715 case AF_INET:
716 x->sel.daddr.a4 = daddr->a4;
717 x->sel.saddr.a4 = saddr->a4;
718 x->sel.prefixlen_d = 32;
719 x->sel.prefixlen_s = 32;
720 x->props.saddr.a4 = saddr->a4;
721 x->id.daddr.a4 = daddr->a4;
722 break;
723
724 case AF_INET6:
725 ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
726 (struct in6_addr *)daddr);
727 ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
728 (struct in6_addr *)saddr);
729 x->sel.prefixlen_d = 128;
730 x->sel.prefixlen_s = 128;
731 ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
732 (struct in6_addr *)saddr);
733 ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
734 (struct in6_addr *)daddr);
735 break;
736 };
737
738 x->km.state = XFRM_STATE_ACQ;
739 x->id.proto = proto;
740 x->props.family = family;
741 x->props.mode = mode;
742 x->props.reqid = reqid;
743 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
744 xfrm_state_hold(x);
745 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
746 add_timer(&x->timer);
747 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
748 h = xfrm_src_hash(saddr, family);
749 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
750 wake_up(&km_waitq);
751 }
752
753 return x;
754}
755
468static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq); 756static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
469 757
470int xfrm_state_add(struct xfrm_state *x) 758int xfrm_state_add(struct xfrm_state *x)
471{ 759{
472 struct xfrm_state_afinfo *afinfo;
473 struct xfrm_state *x1; 760 struct xfrm_state *x1;
474 int family; 761 int family;
475 int err; 762 int err;
763 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
476 764
477 family = x->props.family; 765 family = x->props.family;
478 afinfo = xfrm_state_get_afinfo(family);
479 if (unlikely(afinfo == NULL))
480 return -EAFNOSUPPORT;
481 766
482 spin_lock_bh(&xfrm_state_lock); 767 spin_lock_bh(&xfrm_state_lock);
483 768
484 x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); 769 x1 = __xfrm_state_locate(x, use_spi, family);
485 if (x1) { 770 if (x1) {
486 xfrm_state_put(x1); 771 xfrm_state_put(x1);
487 x1 = NULL; 772 x1 = NULL;
@@ -489,7 +774,7 @@ int xfrm_state_add(struct xfrm_state *x)
489 goto out; 774 goto out;
490 } 775 }
491 776
492 if (x->km.seq) { 777 if (use_spi && x->km.seq) {
493 x1 = __xfrm_find_acq_byseq(x->km.seq); 778 x1 = __xfrm_find_acq_byseq(x->km.seq);
494 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) { 779 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
495 xfrm_state_put(x1); 780 xfrm_state_put(x1);
@@ -497,20 +782,17 @@ int xfrm_state_add(struct xfrm_state *x)
497 } 782 }
498 } 783 }
499 784
500 if (!x1) 785 if (use_spi && !x1)
501 x1 = afinfo->find_acq( 786 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
502 x->props.mode, x->props.reqid, x->id.proto, 787 x->id.proto,
503 &x->id.daddr, &x->props.saddr, 0); 788 &x->id.daddr, &x->props.saddr, 0);
504 789
790 __xfrm_state_bump_genids(x);
505 __xfrm_state_insert(x); 791 __xfrm_state_insert(x);
506 err = 0; 792 err = 0;
507 793
508out: 794out:
509 spin_unlock_bh(&xfrm_state_lock); 795 spin_unlock_bh(&xfrm_state_lock);
510 xfrm_state_put_afinfo(afinfo);
511
512 if (!err)
513 xfrm_flush_all_bundles();
514 796
515 if (x1) { 797 if (x1) {
516 xfrm_state_delete(x1); 798 xfrm_state_delete(x1);
@@ -523,16 +805,12 @@ EXPORT_SYMBOL(xfrm_state_add);
523 805
524int xfrm_state_update(struct xfrm_state *x) 806int xfrm_state_update(struct xfrm_state *x)
525{ 807{
526 struct xfrm_state_afinfo *afinfo;
527 struct xfrm_state *x1; 808 struct xfrm_state *x1;
528 int err; 809 int err;
529 810 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
530 afinfo = xfrm_state_get_afinfo(x->props.family);
531 if (unlikely(afinfo == NULL))
532 return -EAFNOSUPPORT;
533 811
534 spin_lock_bh(&xfrm_state_lock); 812 spin_lock_bh(&xfrm_state_lock);
535 x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); 813 x1 = __xfrm_state_locate(x, use_spi, x->props.family);
536 814
537 err = -ESRCH; 815 err = -ESRCH;
538 if (!x1) 816 if (!x1)
@@ -552,7 +830,6 @@ int xfrm_state_update(struct xfrm_state *x)
552 830
553out: 831out:
554 spin_unlock_bh(&xfrm_state_lock); 832 spin_unlock_bh(&xfrm_state_lock);
555 xfrm_state_put_afinfo(afinfo);
556 833
557 if (err) 834 if (err)
558 return err; 835 return err;
@@ -568,11 +845,15 @@ out:
568 if (likely(x1->km.state == XFRM_STATE_VALID)) { 845 if (likely(x1->km.state == XFRM_STATE_VALID)) {
569 if (x->encap && x1->encap) 846 if (x->encap && x1->encap)
570 memcpy(x1->encap, x->encap, sizeof(*x1->encap)); 847 memcpy(x1->encap, x->encap, sizeof(*x1->encap));
848 if (x->coaddr && x1->coaddr) {
849 memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
850 }
851 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
852 memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
571 memcpy(&x1->lft, &x->lft, sizeof(x1->lft)); 853 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
572 x1->km.dying = 0; 854 x1->km.dying = 0;
573 855
574 if (!mod_timer(&x1->timer, jiffies + HZ)) 856 mod_timer(&x1->timer, jiffies + HZ);
575 xfrm_state_hold(x1);
576 if (x1->curlft.use_time) 857 if (x1->curlft.use_time)
577 xfrm_state_check_expire(x1); 858 xfrm_state_check_expire(x1);
578 859
@@ -597,8 +878,7 @@ int xfrm_state_check_expire(struct xfrm_state *x)
597 if (x->curlft.bytes >= x->lft.hard_byte_limit || 878 if (x->curlft.bytes >= x->lft.hard_byte_limit ||
598 x->curlft.packets >= x->lft.hard_packet_limit) { 879 x->curlft.packets >= x->lft.hard_packet_limit) {
599 x->km.state = XFRM_STATE_EXPIRED; 880 x->km.state = XFRM_STATE_EXPIRED;
600 if (!mod_timer(&x->timer, jiffies)) 881 mod_timer(&x->timer, jiffies);
601 xfrm_state_hold(x);
602 return -EINVAL; 882 return -EINVAL;
603 } 883 }
604 884
@@ -640,46 +920,93 @@ xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
640 unsigned short family) 920 unsigned short family)
641{ 921{
642 struct xfrm_state *x; 922 struct xfrm_state *x;
643 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
644 if (!afinfo)
645 return NULL;
646 923
647 spin_lock_bh(&xfrm_state_lock); 924 spin_lock_bh(&xfrm_state_lock);
648 x = afinfo->state_lookup(daddr, spi, proto); 925 x = __xfrm_state_lookup(daddr, spi, proto, family);
649 spin_unlock_bh(&xfrm_state_lock); 926 spin_unlock_bh(&xfrm_state_lock);
650 xfrm_state_put_afinfo(afinfo);
651 return x; 927 return x;
652} 928}
653EXPORT_SYMBOL(xfrm_state_lookup); 929EXPORT_SYMBOL(xfrm_state_lookup);
654 930
655struct xfrm_state * 931struct xfrm_state *
932xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
933 u8 proto, unsigned short family)
934{
935 struct xfrm_state *x;
936
937 spin_lock_bh(&xfrm_state_lock);
938 x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
939 spin_unlock_bh(&xfrm_state_lock);
940 return x;
941}
942EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
943
944struct xfrm_state *
656xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 945xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
657 xfrm_address_t *daddr, xfrm_address_t *saddr, 946 xfrm_address_t *daddr, xfrm_address_t *saddr,
658 int create, unsigned short family) 947 int create, unsigned short family)
659{ 948{
660 struct xfrm_state *x; 949 struct xfrm_state *x;
950
951 spin_lock_bh(&xfrm_state_lock);
952 x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
953 spin_unlock_bh(&xfrm_state_lock);
954
955 return x;
956}
957EXPORT_SYMBOL(xfrm_find_acq);
958
959#ifdef CONFIG_XFRM_SUB_POLICY
960int
961xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
962 unsigned short family)
963{
964 int err = 0;
661 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); 965 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
662 if (!afinfo) 966 if (!afinfo)
663 return NULL; 967 return -EAFNOSUPPORT;
664 968
665 spin_lock_bh(&xfrm_state_lock); 969 spin_lock_bh(&xfrm_state_lock);
666 x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create); 970 if (afinfo->tmpl_sort)
971 err = afinfo->tmpl_sort(dst, src, n);
667 spin_unlock_bh(&xfrm_state_lock); 972 spin_unlock_bh(&xfrm_state_lock);
668 xfrm_state_put_afinfo(afinfo); 973 xfrm_state_put_afinfo(afinfo);
669 return x; 974 return err;
670} 975}
671EXPORT_SYMBOL(xfrm_find_acq); 976EXPORT_SYMBOL(xfrm_tmpl_sort);
977
978int
979xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
980 unsigned short family)
981{
982 int err = 0;
983 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
984 if (!afinfo)
985 return -EAFNOSUPPORT;
986
987 spin_lock_bh(&xfrm_state_lock);
988 if (afinfo->state_sort)
989 err = afinfo->state_sort(dst, src, n);
990 spin_unlock_bh(&xfrm_state_lock);
991 xfrm_state_put_afinfo(afinfo);
992 return err;
993}
994EXPORT_SYMBOL(xfrm_state_sort);
995#endif
672 996
673/* Silly enough, but I'm lazy to build resolution list */ 997/* Silly enough, but I'm lazy to build resolution list */
674 998
675static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) 999static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
676{ 1000{
677 int i; 1001 int i;
678 struct xfrm_state *x;
679 1002
680 for (i = 0; i < XFRM_DST_HSIZE; i++) { 1003 for (i = 0; i <= xfrm_state_hmask; i++) {
681 list_for_each_entry(x, xfrm_state_bydst+i, bydst) { 1004 struct hlist_node *entry;
682 if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) { 1005 struct xfrm_state *x;
1006
1007 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1008 if (x->km.seq == seq &&
1009 x->km.state == XFRM_STATE_ACQ) {
683 xfrm_state_hold(x); 1010 xfrm_state_hold(x);
684 return x; 1011 return x;
685 } 1012 }
@@ -715,7 +1042,7 @@ EXPORT_SYMBOL(xfrm_get_acqseq);
715void 1042void
716xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) 1043xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
717{ 1044{
718 u32 h; 1045 unsigned int h;
719 struct xfrm_state *x0; 1046 struct xfrm_state *x0;
720 1047
721 if (x->id.spi) 1048 if (x->id.spi)
@@ -745,8 +1072,7 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
745 if (x->id.spi) { 1072 if (x->id.spi) {
746 spin_lock_bh(&xfrm_state_lock); 1073 spin_lock_bh(&xfrm_state_lock);
747 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); 1074 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
748 list_add(&x->byspi, xfrm_state_byspi+h); 1075 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
749 xfrm_state_hold(x);
750 spin_unlock_bh(&xfrm_state_lock); 1076 spin_unlock_bh(&xfrm_state_lock);
751 wake_up(&km_waitq); 1077 wake_up(&km_waitq);
752 } 1078 }
@@ -758,13 +1084,14 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
758{ 1084{
759 int i; 1085 int i;
760 struct xfrm_state *x; 1086 struct xfrm_state *x;
1087 struct hlist_node *entry;
761 int count = 0; 1088 int count = 0;
762 int err = 0; 1089 int err = 0;
763 1090
764 spin_lock_bh(&xfrm_state_lock); 1091 spin_lock_bh(&xfrm_state_lock);
765 for (i = 0; i < XFRM_DST_HSIZE; i++) { 1092 for (i = 0; i <= xfrm_state_hmask; i++) {
766 list_for_each_entry(x, xfrm_state_bydst+i, bydst) { 1093 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
767 if (proto == IPSEC_PROTO_ANY || x->id.proto == proto) 1094 if (xfrm_id_proto_match(x->id.proto, proto))
768 count++; 1095 count++;
769 } 1096 }
770 } 1097 }
@@ -773,9 +1100,9 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
773 goto out; 1100 goto out;
774 } 1101 }
775 1102
776 for (i = 0; i < XFRM_DST_HSIZE; i++) { 1103 for (i = 0; i <= xfrm_state_hmask; i++) {
777 list_for_each_entry(x, xfrm_state_bydst+i, bydst) { 1104 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
778 if (proto != IPSEC_PROTO_ANY && x->id.proto != proto) 1105 if (!xfrm_id_proto_match(x->id.proto, proto))
779 continue; 1106 continue;
780 err = func(x, --count, data); 1107 err = func(x, --count, data);
781 if (err) 1108 if (err)
@@ -832,10 +1159,8 @@ void xfrm_replay_notify(struct xfrm_state *x, int event)
832 km_state_notify(x, &c); 1159 km_state_notify(x, &c);
833 1160
834 if (x->replay_maxage && 1161 if (x->replay_maxage &&
835 !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) { 1162 !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
836 xfrm_state_hold(x);
837 x->xflags &= ~XFRM_TIME_DEFER; 1163 x->xflags &= ~XFRM_TIME_DEFER;
838 }
839} 1164}
840EXPORT_SYMBOL(xfrm_replay_notify); 1165EXPORT_SYMBOL(xfrm_replay_notify);
841 1166
@@ -853,7 +1178,6 @@ static void xfrm_replay_timer_handler(unsigned long data)
853 } 1178 }
854 1179
855 spin_unlock(&x->lock); 1180 spin_unlock(&x->lock);
856 xfrm_state_put(x);
857} 1181}
858 1182
859int xfrm_replay_check(struct xfrm_state *x, u32 seq) 1183int xfrm_replay_check(struct xfrm_state *x, u32 seq)
@@ -997,6 +1321,25 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
997} 1321}
998EXPORT_SYMBOL(km_policy_expired); 1322EXPORT_SYMBOL(km_policy_expired);
999 1323
1324int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1325{
1326 int err = -EINVAL;
1327 int ret;
1328 struct xfrm_mgr *km;
1329
1330 read_lock(&xfrm_km_lock);
1331 list_for_each_entry(km, &xfrm_km_list, list) {
1332 if (km->report) {
1333 ret = km->report(proto, sel, addr);
1334 if (!ret)
1335 err = ret;
1336 }
1337 }
1338 read_unlock(&xfrm_km_lock);
1339 return err;
1340}
1341EXPORT_SYMBOL(km_report);
1342
1000int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) 1343int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1001{ 1344{
1002 int err; 1345 int err;
@@ -1018,7 +1361,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
1018 err = -EINVAL; 1361 err = -EINVAL;
1019 read_lock(&xfrm_km_lock); 1362 read_lock(&xfrm_km_lock);
1020 list_for_each_entry(km, &xfrm_km_list, list) { 1363 list_for_each_entry(km, &xfrm_km_list, list) {
1021 pol = km->compile_policy(sk->sk_family, optname, data, 1364 pol = km->compile_policy(sk, optname, data,
1022 optlen, &err); 1365 optlen, &err);
1023 if (err >= 0) 1366 if (err >= 0)
1024 break; 1367 break;
@@ -1065,11 +1408,8 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1065 write_lock_bh(&xfrm_state_afinfo_lock); 1408 write_lock_bh(&xfrm_state_afinfo_lock);
1066 if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) 1409 if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1067 err = -ENOBUFS; 1410 err = -ENOBUFS;
1068 else { 1411 else
1069 afinfo->state_bydst = xfrm_state_bydst;
1070 afinfo->state_byspi = xfrm_state_byspi;
1071 xfrm_state_afinfo[afinfo->family] = afinfo; 1412 xfrm_state_afinfo[afinfo->family] = afinfo;
1072 }
1073 write_unlock_bh(&xfrm_state_afinfo_lock); 1413 write_unlock_bh(&xfrm_state_afinfo_lock);
1074 return err; 1414 return err;
1075} 1415}
@@ -1086,11 +1426,8 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1086 if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) { 1426 if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1087 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo)) 1427 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1088 err = -EINVAL; 1428 err = -EINVAL;
1089 else { 1429 else
1090 xfrm_state_afinfo[afinfo->family] = NULL; 1430 xfrm_state_afinfo[afinfo->family] = NULL;
1091 afinfo->state_byspi = NULL;
1092 afinfo->state_bydst = NULL;
1093 }
1094 } 1431 }
1095 write_unlock_bh(&xfrm_state_afinfo_lock); 1432 write_unlock_bh(&xfrm_state_afinfo_lock);
1096 return err; 1433 return err;
@@ -1206,12 +1543,17 @@ EXPORT_SYMBOL(xfrm_init_state);
1206 1543
1207void __init xfrm_state_init(void) 1544void __init xfrm_state_init(void)
1208{ 1545{
1209 int i; 1546 unsigned int sz;
1547
1548 sz = sizeof(struct hlist_head) * 8;
1549
1550 xfrm_state_bydst = xfrm_hash_alloc(sz);
1551 xfrm_state_bysrc = xfrm_hash_alloc(sz);
1552 xfrm_state_byspi = xfrm_hash_alloc(sz);
1553 if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1554 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1555 xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1210 1556
1211 for (i=0; i<XFRM_DST_HSIZE; i++) {
1212 INIT_LIST_HEAD(&xfrm_state_bydst[i]);
1213 INIT_LIST_HEAD(&xfrm_state_byspi[i]);
1214 }
1215 INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL); 1557 INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
1216} 1558}
1217 1559
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index fa79ddc4239e..c59a78d2923a 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -28,6 +28,9 @@
28#include <net/xfrm.h> 28#include <net/xfrm.h>
29#include <net/netlink.h> 29#include <net/netlink.h>
30#include <asm/uaccess.h> 30#include <asm/uaccess.h>
31#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
32#include <linux/in6.h>
33#endif
31 34
32static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type) 35static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type)
33{ 36{
@@ -87,6 +90,22 @@ static int verify_encap_tmpl(struct rtattr **xfrma)
87 return 0; 90 return 0;
88} 91}
89 92
93static int verify_one_addr(struct rtattr **xfrma, enum xfrm_attr_type_t type,
94 xfrm_address_t **addrp)
95{
96 struct rtattr *rt = xfrma[type - 1];
97
98 if (!rt)
99 return 0;
100
101 if ((rt->rta_len - sizeof(*rt)) < sizeof(**addrp))
102 return -EINVAL;
103
104 if (addrp)
105 *addrp = RTA_DATA(rt);
106
107 return 0;
108}
90 109
91static inline int verify_sec_ctx_len(struct rtattr **xfrma) 110static inline int verify_sec_ctx_len(struct rtattr **xfrma)
92{ 111{
@@ -157,6 +176,19 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
157 goto out; 176 goto out;
158 break; 177 break;
159 178
179#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
180 case IPPROTO_DSTOPTS:
181 case IPPROTO_ROUTING:
182 if (xfrma[XFRMA_ALG_COMP-1] ||
183 xfrma[XFRMA_ALG_AUTH-1] ||
184 xfrma[XFRMA_ALG_CRYPT-1] ||
185 xfrma[XFRMA_ENCAP-1] ||
186 xfrma[XFRMA_SEC_CTX-1] ||
187 !xfrma[XFRMA_COADDR-1])
188 goto out;
189 break;
190#endif
191
160 default: 192 default:
161 goto out; 193 goto out;
162 }; 194 };
@@ -171,11 +203,14 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
171 goto out; 203 goto out;
172 if ((err = verify_sec_ctx_len(xfrma))) 204 if ((err = verify_sec_ctx_len(xfrma)))
173 goto out; 205 goto out;
206 if ((err = verify_one_addr(xfrma, XFRMA_COADDR, NULL)))
207 goto out;
174 208
175 err = -EINVAL; 209 err = -EINVAL;
176 switch (p->mode) { 210 switch (p->mode) {
177 case 0: 211 case XFRM_MODE_TRANSPORT:
178 case 1: 212 case XFRM_MODE_TUNNEL:
213 case XFRM_MODE_ROUTEOPTIMIZATION:
179 break; 214 break;
180 215
181 default: 216 default:
@@ -260,6 +295,24 @@ static int attach_sec_ctx(struct xfrm_state *x, struct rtattr *u_arg)
260 return security_xfrm_state_alloc(x, uctx); 295 return security_xfrm_state_alloc(x, uctx);
261} 296}
262 297
298static int attach_one_addr(xfrm_address_t **addrpp, struct rtattr *u_arg)
299{
300 struct rtattr *rta = u_arg;
301 xfrm_address_t *p, *uaddrp;
302
303 if (!rta)
304 return 0;
305
306 uaddrp = RTA_DATA(rta);
307 p = kmalloc(sizeof(*p), GFP_KERNEL);
308 if (!p)
309 return -ENOMEM;
310
311 memcpy(p, uaddrp, sizeof(*p));
312 *addrpp = p;
313 return 0;
314}
315
263static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) 316static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
264{ 317{
265 memcpy(&x->id, &p->id, sizeof(x->id)); 318 memcpy(&x->id, &p->id, sizeof(x->id));
@@ -349,7 +402,8 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p,
349 goto error; 402 goto error;
350 if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1]))) 403 if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1])))
351 goto error; 404 goto error;
352 405 if ((err = attach_one_addr(&x->coaddr, xfrma[XFRMA_COADDR-1])))
406 goto error;
353 err = xfrm_init_state(x); 407 err = xfrm_init_state(x);
354 if (err) 408 if (err)
355 goto error; 409 goto error;
@@ -418,16 +472,48 @@ out:
418 return err; 472 return err;
419} 473}
420 474
475static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p,
476 struct rtattr **xfrma,
477 int *errp)
478{
479 struct xfrm_state *x = NULL;
480 int err;
481
482 if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) {
483 err = -ESRCH;
484 x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family);
485 } else {
486 xfrm_address_t *saddr = NULL;
487
488 err = verify_one_addr(xfrma, XFRMA_SRCADDR, &saddr);
489 if (err)
490 goto out;
491
492 if (!saddr) {
493 err = -EINVAL;
494 goto out;
495 }
496
497 x = xfrm_state_lookup_byaddr(&p->daddr, saddr, p->proto,
498 p->family);
499 }
500
501 out:
502 if (!x && errp)
503 *errp = err;
504 return x;
505}
506
421static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) 507static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
422{ 508{
423 struct xfrm_state *x; 509 struct xfrm_state *x;
424 int err; 510 int err = -ESRCH;
425 struct km_event c; 511 struct km_event c;
426 struct xfrm_usersa_id *p = NLMSG_DATA(nlh); 512 struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
427 513
428 x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); 514 x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err);
429 if (x == NULL) 515 if (x == NULL)
430 return -ESRCH; 516 return err;
431 517
432 if ((err = security_xfrm_state_delete(x)) != 0) 518 if ((err = security_xfrm_state_delete(x)) != 0)
433 goto out; 519 goto out;
@@ -521,6 +607,13 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
521 uctx->ctx_len = x->security->ctx_len; 607 uctx->ctx_len = x->security->ctx_len;
522 memcpy(uctx + 1, x->security->ctx_str, x->security->ctx_len); 608 memcpy(uctx + 1, x->security->ctx_str, x->security->ctx_len);
523 } 609 }
610
611 if (x->coaddr)
612 RTA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr);
613
614 if (x->lastused)
615 RTA_PUT(skb, XFRMA_LASTUSED, sizeof(x->lastused), &x->lastused);
616
524 nlh->nlmsg_len = skb->tail - b; 617 nlh->nlmsg_len = skb->tail - b;
525out: 618out:
526 sp->this_idx++; 619 sp->this_idx++;
@@ -542,7 +635,7 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
542 info.nlmsg_flags = NLM_F_MULTI; 635 info.nlmsg_flags = NLM_F_MULTI;
543 info.this_idx = 0; 636 info.this_idx = 0;
544 info.start_idx = cb->args[0]; 637 info.start_idx = cb->args[0];
545 (void) xfrm_state_walk(IPSEC_PROTO_ANY, dump_one_state, &info); 638 (void) xfrm_state_walk(0, dump_one_state, &info);
546 cb->args[0] = info.this_idx; 639 cb->args[0] = info.this_idx;
547 640
548 return skb->len; 641 return skb->len;
@@ -578,10 +671,9 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
578 struct xfrm_usersa_id *p = NLMSG_DATA(nlh); 671 struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
579 struct xfrm_state *x; 672 struct xfrm_state *x;
580 struct sk_buff *resp_skb; 673 struct sk_buff *resp_skb;
581 int err; 674 int err = -ESRCH;
582 675
583 x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); 676 x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err);
584 err = -ESRCH;
585 if (x == NULL) 677 if (x == NULL)
586 goto out_noput; 678 goto out_noput;
587 679
@@ -694,6 +786,22 @@ static int verify_policy_dir(__u8 dir)
694 return 0; 786 return 0;
695} 787}
696 788
789static int verify_policy_type(__u8 type)
790{
791 switch (type) {
792 case XFRM_POLICY_TYPE_MAIN:
793#ifdef CONFIG_XFRM_SUB_POLICY
794 case XFRM_POLICY_TYPE_SUB:
795#endif
796 break;
797
798 default:
799 return -EINVAL;
800 };
801
802 return 0;
803}
804
697static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) 805static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
698{ 806{
699 switch (p->share) { 807 switch (p->share) {
@@ -787,6 +895,29 @@ static int copy_from_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma)
787 return 0; 895 return 0;
788} 896}
789 897
898static int copy_from_user_policy_type(u8 *tp, struct rtattr **xfrma)
899{
900 struct rtattr *rt = xfrma[XFRMA_POLICY_TYPE-1];
901 struct xfrm_userpolicy_type *upt;
902 __u8 type = XFRM_POLICY_TYPE_MAIN;
903 int err;
904
905 if (rt) {
906 if (rt->rta_len < sizeof(*upt))
907 return -EINVAL;
908
909 upt = RTA_DATA(rt);
910 type = upt->type;
911 }
912
913 err = verify_policy_type(type);
914 if (err)
915 return err;
916
917 *tp = type;
918 return 0;
919}
920
790static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p) 921static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p)
791{ 922{
792 xp->priority = p->priority; 923 xp->priority = p->priority;
@@ -825,16 +956,20 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p,
825 956
826 copy_from_user_policy(xp, p); 957 copy_from_user_policy(xp, p);
827 958
959 err = copy_from_user_policy_type(&xp->type, xfrma);
960 if (err)
961 goto error;
962
828 if (!(err = copy_from_user_tmpl(xp, xfrma))) 963 if (!(err = copy_from_user_tmpl(xp, xfrma)))
829 err = copy_from_user_sec_ctx(xp, xfrma); 964 err = copy_from_user_sec_ctx(xp, xfrma);
830 965 if (err)
831 if (err) { 966 goto error;
832 *errp = err;
833 kfree(xp);
834 xp = NULL;
835 }
836 967
837 return xp; 968 return xp;
969 error:
970 *errp = err;
971 kfree(xp);
972 return NULL;
838} 973}
839 974
840static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) 975static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
@@ -911,27 +1046,63 @@ rtattr_failure:
911 return -1; 1046 return -1;
912} 1047}
913 1048
914static int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb) 1049static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb)
915{ 1050{
916 if (xp->security) { 1051 int ctx_size = sizeof(struct xfrm_sec_ctx) + s->ctx_len;
917 int ctx_size = sizeof(struct xfrm_sec_ctx) + 1052 struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size);
918 xp->security->ctx_len; 1053 struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt);
919 struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size); 1054
920 struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); 1055 uctx->exttype = XFRMA_SEC_CTX;
1056 uctx->len = ctx_size;
1057 uctx->ctx_doi = s->ctx_doi;
1058 uctx->ctx_alg = s->ctx_alg;
1059 uctx->ctx_len = s->ctx_len;
1060 memcpy(uctx + 1, s->ctx_str, s->ctx_len);
1061 return 0;
921 1062
922 uctx->exttype = XFRMA_SEC_CTX; 1063 rtattr_failure:
923 uctx->len = ctx_size; 1064 return -1;
924 uctx->ctx_doi = xp->security->ctx_doi; 1065}
925 uctx->ctx_alg = xp->security->ctx_alg; 1066
926 uctx->ctx_len = xp->security->ctx_len; 1067static inline int copy_to_user_state_sec_ctx(struct xfrm_state *x, struct sk_buff *skb)
927 memcpy(uctx + 1, xp->security->ctx_str, xp->security->ctx_len); 1068{
1069 if (x->security) {
1070 return copy_sec_ctx(x->security, skb);
928 } 1071 }
929 return 0; 1072 return 0;
1073}
930 1074
931 rtattr_failure: 1075static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb)
1076{
1077 if (xp->security) {
1078 return copy_sec_ctx(xp->security, skb);
1079 }
1080 return 0;
1081}
1082
1083#ifdef CONFIG_XFRM_SUB_POLICY
1084static int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb)
1085{
1086 struct xfrm_userpolicy_type upt;
1087
1088 memset(&upt, 0, sizeof(upt));
1089 upt.type = xp->type;
1090
1091 RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt);
1092
1093 return 0;
1094
1095rtattr_failure:
932 return -1; 1096 return -1;
933} 1097}
934 1098
1099#else
1100static inline int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb)
1101{
1102 return 0;
1103}
1104#endif
1105
935static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr) 1106static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr)
936{ 1107{
937 struct xfrm_dump_info *sp = ptr; 1108 struct xfrm_dump_info *sp = ptr;
@@ -955,6 +1126,8 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
955 goto nlmsg_failure; 1126 goto nlmsg_failure;
956 if (copy_to_user_sec_ctx(xp, skb)) 1127 if (copy_to_user_sec_ctx(xp, skb))
957 goto nlmsg_failure; 1128 goto nlmsg_failure;
1129 if (copy_to_user_policy_type(xp, skb) < 0)
1130 goto nlmsg_failure;
958 1131
959 nlh->nlmsg_len = skb->tail - b; 1132 nlh->nlmsg_len = skb->tail - b;
960out: 1133out:
@@ -976,7 +1149,10 @@ static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb)
976 info.nlmsg_flags = NLM_F_MULTI; 1149 info.nlmsg_flags = NLM_F_MULTI;
977 info.this_idx = 0; 1150 info.this_idx = 0;
978 info.start_idx = cb->args[0]; 1151 info.start_idx = cb->args[0];
979 (void) xfrm_policy_walk(dump_one_policy, &info); 1152 (void) xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_one_policy, &info);
1153#ifdef CONFIG_XFRM_SUB_POLICY
1154 (void) xfrm_policy_walk(XFRM_POLICY_TYPE_SUB, dump_one_policy, &info);
1155#endif
980 cb->args[0] = info.this_idx; 1156 cb->args[0] = info.this_idx;
981 1157
982 return skb->len; 1158 return skb->len;
@@ -1012,6 +1188,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
1012{ 1188{
1013 struct xfrm_policy *xp; 1189 struct xfrm_policy *xp;
1014 struct xfrm_userpolicy_id *p; 1190 struct xfrm_userpolicy_id *p;
1191 __u8 type = XFRM_POLICY_TYPE_MAIN;
1015 int err; 1192 int err;
1016 struct km_event c; 1193 struct km_event c;
1017 int delete; 1194 int delete;
@@ -1019,12 +1196,16 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
1019 p = NLMSG_DATA(nlh); 1196 p = NLMSG_DATA(nlh);
1020 delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY; 1197 delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY;
1021 1198
1199 err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma);
1200 if (err)
1201 return err;
1202
1022 err = verify_policy_dir(p->dir); 1203 err = verify_policy_dir(p->dir);
1023 if (err) 1204 if (err)
1024 return err; 1205 return err;
1025 1206
1026 if (p->index) 1207 if (p->index)
1027 xp = xfrm_policy_byid(p->dir, p->index, delete); 1208 xp = xfrm_policy_byid(type, p->dir, p->index, delete);
1028 else { 1209 else {
1029 struct rtattr **rtattrs = (struct rtattr **)xfrma; 1210 struct rtattr **rtattrs = (struct rtattr **)xfrma;
1030 struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1]; 1211 struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1];
@@ -1041,7 +1222,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
1041 if ((err = security_xfrm_policy_alloc(&tmp, uctx))) 1222 if ((err = security_xfrm_policy_alloc(&tmp, uctx)))
1042 return err; 1223 return err;
1043 } 1224 }
1044 xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, delete); 1225 xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, delete);
1045 security_xfrm_policy_free(&tmp); 1226 security_xfrm_policy_free(&tmp);
1046 } 1227 }
1047 if (xp == NULL) 1228 if (xp == NULL)
@@ -1224,9 +1405,16 @@ out:
1224 1405
1225static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) 1406static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
1226{ 1407{
1227struct km_event c; 1408 struct km_event c;
1409 __u8 type = XFRM_POLICY_TYPE_MAIN;
1410 int err;
1411
1412 err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma);
1413 if (err)
1414 return err;
1228 1415
1229 xfrm_policy_flush(); 1416 xfrm_policy_flush(type);
1417 c.data.type = type;
1230 c.event = nlh->nlmsg_type; 1418 c.event = nlh->nlmsg_type;
1231 c.seq = nlh->nlmsg_seq; 1419 c.seq = nlh->nlmsg_seq;
1232 c.pid = nlh->nlmsg_pid; 1420 c.pid = nlh->nlmsg_pid;
@@ -1239,10 +1427,15 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void *
1239 struct xfrm_policy *xp; 1427 struct xfrm_policy *xp;
1240 struct xfrm_user_polexpire *up = NLMSG_DATA(nlh); 1428 struct xfrm_user_polexpire *up = NLMSG_DATA(nlh);
1241 struct xfrm_userpolicy_info *p = &up->pol; 1429 struct xfrm_userpolicy_info *p = &up->pol;
1430 __u8 type = XFRM_POLICY_TYPE_MAIN;
1242 int err = -ENOENT; 1431 int err = -ENOENT;
1243 1432
1433 err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma);
1434 if (err)
1435 return err;
1436
1244 if (p->index) 1437 if (p->index)
1245 xp = xfrm_policy_byid(p->dir, p->index, 0); 1438 xp = xfrm_policy_byid(type, p->dir, p->index, 0);
1246 else { 1439 else {
1247 struct rtattr **rtattrs = (struct rtattr **)xfrma; 1440 struct rtattr **rtattrs = (struct rtattr **)xfrma;
1248 struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1]; 1441 struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1];
@@ -1259,7 +1452,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void *
1259 if ((err = security_xfrm_policy_alloc(&tmp, uctx))) 1452 if ((err = security_xfrm_policy_alloc(&tmp, uctx)))
1260 return err; 1453 return err;
1261 } 1454 }
1262 xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, 0); 1455 xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, 0);
1263 security_xfrm_policy_free(&tmp); 1456 security_xfrm_policy_free(&tmp);
1264 } 1457 }
1265 1458
@@ -1386,6 +1579,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
1386 [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = NLMSG_LENGTH(0), 1579 [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = NLMSG_LENGTH(0),
1387 [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), 1580 [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id),
1388 [XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), 1581 [XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id),
1582 [XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report),
1389}; 1583};
1390 1584
1391#undef XMSGSIZE 1585#undef XMSGSIZE
@@ -1710,7 +1904,9 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
1710 1904
1711 if (copy_to_user_tmpl(xp, skb) < 0) 1905 if (copy_to_user_tmpl(xp, skb) < 0)
1712 goto nlmsg_failure; 1906 goto nlmsg_failure;
1713 if (copy_to_user_sec_ctx(xp, skb)) 1907 if (copy_to_user_state_sec_ctx(x, skb))
1908 goto nlmsg_failure;
1909 if (copy_to_user_policy_type(xp, skb) < 0)
1714 goto nlmsg_failure; 1910 goto nlmsg_failure;
1715 1911
1716 nlh->nlmsg_len = skb->tail - b; 1912 nlh->nlmsg_len = skb->tail - b;
@@ -1744,7 +1940,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
1744/* User gives us xfrm_user_policy_info followed by an array of 0 1940/* User gives us xfrm_user_policy_info followed by an array of 0
1745 * or more templates. 1941 * or more templates.
1746 */ 1942 */
1747static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, 1943static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,
1748 u8 *data, int len, int *dir) 1944 u8 *data, int len, int *dir)
1749{ 1945{
1750 struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data; 1946 struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data;
@@ -1752,7 +1948,7 @@ static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt,
1752 struct xfrm_policy *xp; 1948 struct xfrm_policy *xp;
1753 int nr; 1949 int nr;
1754 1950
1755 switch (family) { 1951 switch (sk->sk_family) {
1756 case AF_INET: 1952 case AF_INET:
1757 if (opt != IP_XFRM_POLICY) { 1953 if (opt != IP_XFRM_POLICY) {
1758 *dir = -EOPNOTSUPP; 1954 *dir = -EOPNOTSUPP;
@@ -1792,8 +1988,18 @@ static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt,
1792 } 1988 }
1793 1989
1794 copy_from_user_policy(xp, p); 1990 copy_from_user_policy(xp, p);
1991 xp->type = XFRM_POLICY_TYPE_MAIN;
1795 copy_templates(xp, ut, nr); 1992 copy_templates(xp, ut, nr);
1796 1993
1994 if (!xp->security) {
1995 int err = security_xfrm_sock_policy_alloc(xp, sk);
1996 if (err) {
1997 kfree(xp);
1998 *dir = err;
1999 return NULL;
2000 }
2001 }
2002
1797 *dir = p->dir; 2003 *dir = p->dir;
1798 2004
1799 return xp; 2005 return xp;
@@ -1816,6 +2022,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
1816 goto nlmsg_failure; 2022 goto nlmsg_failure;
1817 if (copy_to_user_sec_ctx(xp, skb)) 2023 if (copy_to_user_sec_ctx(xp, skb))
1818 goto nlmsg_failure; 2024 goto nlmsg_failure;
2025 if (copy_to_user_policy_type(xp, skb) < 0)
2026 goto nlmsg_failure;
1819 upe->hard = !!hard; 2027 upe->hard = !!hard;
1820 2028
1821 nlh->nlmsg_len = skb->tail - b; 2029 nlh->nlmsg_len = skb->tail - b;
@@ -1887,6 +2095,8 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *
1887 copy_to_user_policy(xp, p, dir); 2095 copy_to_user_policy(xp, p, dir);
1888 if (copy_to_user_tmpl(xp, skb) < 0) 2096 if (copy_to_user_tmpl(xp, skb) < 0)
1889 goto nlmsg_failure; 2097 goto nlmsg_failure;
2098 if (copy_to_user_policy_type(xp, skb) < 0)
2099 goto nlmsg_failure;
1890 2100
1891 nlh->nlmsg_len = skb->tail - b; 2101 nlh->nlmsg_len = skb->tail - b;
1892 2102
@@ -1904,6 +2114,9 @@ static int xfrm_notify_policy_flush(struct km_event *c)
1904 struct nlmsghdr *nlh; 2114 struct nlmsghdr *nlh;
1905 struct sk_buff *skb; 2115 struct sk_buff *skb;
1906 unsigned char *b; 2116 unsigned char *b;
2117#ifdef CONFIG_XFRM_SUB_POLICY
2118 struct xfrm_userpolicy_type upt;
2119#endif
1907 int len = NLMSG_LENGTH(0); 2120 int len = NLMSG_LENGTH(0);
1908 2121
1909 skb = alloc_skb(len, GFP_ATOMIC); 2122 skb = alloc_skb(len, GFP_ATOMIC);
@@ -1913,6 +2126,13 @@ static int xfrm_notify_policy_flush(struct km_event *c)
1913 2126
1914 2127
1915 nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0); 2128 nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0);
2129 nlh->nlmsg_flags = 0;
2130
2131#ifdef CONFIG_XFRM_SUB_POLICY
2132 memset(&upt, 0, sizeof(upt));
2133 upt.type = c->data.type;
2134 RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt);
2135#endif
1916 2136
1917 nlh->nlmsg_len = skb->tail - b; 2137 nlh->nlmsg_len = skb->tail - b;
1918 2138
@@ -1920,6 +2140,9 @@ static int xfrm_notify_policy_flush(struct km_event *c)
1920 return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); 2140 return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
1921 2141
1922nlmsg_failure: 2142nlmsg_failure:
2143#ifdef CONFIG_XFRM_SUB_POLICY
2144rtattr_failure:
2145#endif
1923 kfree_skb(skb); 2146 kfree_skb(skb);
1924 return -1; 2147 return -1;
1925} 2148}
@@ -1944,19 +2167,64 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_ev
1944 2167
1945} 2168}
1946 2169
2170static int build_report(struct sk_buff *skb, u8 proto,
2171 struct xfrm_selector *sel, xfrm_address_t *addr)
2172{
2173 struct xfrm_user_report *ur;
2174 struct nlmsghdr *nlh;
2175 unsigned char *b = skb->tail;
2176
2177 nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur));
2178 ur = NLMSG_DATA(nlh);
2179 nlh->nlmsg_flags = 0;
2180
2181 ur->proto = proto;
2182 memcpy(&ur->sel, sel, sizeof(ur->sel));
2183
2184 if (addr)
2185 RTA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr);
2186
2187 nlh->nlmsg_len = skb->tail - b;
2188 return skb->len;
2189
2190nlmsg_failure:
2191rtattr_failure:
2192 skb_trim(skb, b - skb->data);
2193 return -1;
2194}
2195
2196static int xfrm_send_report(u8 proto, struct xfrm_selector *sel,
2197 xfrm_address_t *addr)
2198{
2199 struct sk_buff *skb;
2200 size_t len;
2201
2202 len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(struct xfrm_user_report)));
2203 skb = alloc_skb(len, GFP_ATOMIC);
2204 if (skb == NULL)
2205 return -ENOMEM;
2206
2207 if (build_report(skb, proto, sel, addr) < 0)
2208 BUG();
2209
2210 NETLINK_CB(skb).dst_group = XFRMNLGRP_REPORT;
2211 return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC);
2212}
2213
1947static struct xfrm_mgr netlink_mgr = { 2214static struct xfrm_mgr netlink_mgr = {
1948 .id = "netlink", 2215 .id = "netlink",
1949 .notify = xfrm_send_state_notify, 2216 .notify = xfrm_send_state_notify,
1950 .acquire = xfrm_send_acquire, 2217 .acquire = xfrm_send_acquire,
1951 .compile_policy = xfrm_compile_policy, 2218 .compile_policy = xfrm_compile_policy,
1952 .notify_policy = xfrm_send_policy_notify, 2219 .notify_policy = xfrm_send_policy_notify,
2220 .report = xfrm_send_report,
1953}; 2221};
1954 2222
1955static int __init xfrm_user_init(void) 2223static int __init xfrm_user_init(void)
1956{ 2224{
1957 struct sock *nlsk; 2225 struct sock *nlsk;
1958 2226
1959 printk(KERN_INFO "Initializing IPsec netlink socket\n"); 2227 printk(KERN_INFO "Initializing XFRM netlink socket\n");
1960 2228
1961 nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX, 2229 nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX,
1962 xfrm_netlink_rcv, THIS_MODULE); 2230 xfrm_netlink_rcv, THIS_MODULE);