aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2011-01-20 03:06:15 -0500
committerDavid S. Miller <davem@davemloft.net>2011-01-20 03:06:15 -0500
commita07aa004c8d814a975b1a68afdb7baaa8f1b91d5 (patch)
tree652edc2dce9732a64780b9e332034b6567631a8b
parentcc7ec456f82da7f89a5b376e613b3ac4311b3e9a (diff)
parent5d8449286456659cdd0998e62d80df2d9e77e9e3 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6
-rw-r--r--include/linux/audit.h2
-rw-r--r--include/linux/ip_vs.h8
-rw-r--r--include/linux/netfilter.h27
-rw-r--r--include/linux/netfilter/Kbuild2
-rw-r--r--include/linux/netfilter/nf_conntrack_snmp.h9
-rw-r--r--include/linux/netfilter/nfnetlink_conntrack.h9
-rw-r--r--include/linux/netfilter/x_tables.h3
-rw-r--r--include/linux/netfilter/xt_AUDIT.h30
-rw-r--r--include/linux/netfilter/xt_CT.h10
-rw-r--r--include/linux/netfilter/xt_NFQUEUE.h6
-rw-r--r--include/linux/netfilter/xt_TCPOPTSTRIP.h2
-rw-r--r--include/linux/netfilter/xt_TPROXY.h8
-rw-r--r--include/linux/netfilter/xt_cluster.h8
-rw-r--r--include/linux/netfilter/xt_comment.h2
-rw-r--r--include/linux/netfilter/xt_conntrack.h15
-rw-r--r--include/linux/netfilter/xt_quota.h6
-rw-r--r--include/linux/netfilter/xt_time.h14
-rw-r--r--include/linux/netfilter/xt_u32.h16
-rw-r--r--include/linux/netfilter_bridge/ebt_802_3.h24
-rw-r--r--include/linux/netfilter_bridge/ebt_among.h2
-rw-r--r--include/linux/netfilter_bridge/ebt_arp.h4
-rw-r--r--include/linux/netfilter_bridge/ebt_ip.h12
-rw-r--r--include/linux/netfilter_bridge/ebt_ip6.h23
-rw-r--r--include/linux/netfilter_bridge/ebt_limit.h8
-rw-r--r--include/linux/netfilter_bridge/ebt_log.h6
-rw-r--r--include/linux/netfilter_bridge/ebt_mark_m.h4
-rw-r--r--include/linux/netfilter_bridge/ebt_nflog.h10
-rw-r--r--include/linux/netfilter_bridge/ebt_pkttype.h4
-rw-r--r--include/linux/netfilter_bridge/ebt_stp.h24
-rw-r--r--include/linux/netfilter_bridge/ebt_ulog.h2
-rw-r--r--include/linux/netfilter_bridge/ebt_vlan.h8
-rw-r--r--include/linux/netfilter_ipv4/ipt_CLUSTERIP.h14
-rw-r--r--include/linux/netfilter_ipv4/ipt_ECN.h6
-rw-r--r--include/linux/netfilter_ipv4/ipt_SAME.h6
-rw-r--r--include/linux/netfilter_ipv4/ipt_TTL.h4
-rw-r--r--include/linux/netfilter_ipv4/ipt_addrtype.h14
-rw-r--r--include/linux/netfilter_ipv4/ipt_ah.h4
-rw-r--r--include/linux/netfilter_ipv4/ipt_ecn.h8
-rw-r--r--include/linux/netfilter_ipv4/ipt_ttl.h4
-rw-r--r--include/linux/netfilter_ipv6/ip6t_HL.h4
-rw-r--r--include/linux/netfilter_ipv6/ip6t_REJECT.h2
-rw-r--r--include/linux/netfilter_ipv6/ip6t_ah.h8
-rw-r--r--include/linux/netfilter_ipv6/ip6t_frag.h8
-rw-r--r--include/linux/netfilter_ipv6/ip6t_hl.h4
-rw-r--r--include/linux/netfilter_ipv6/ip6t_ipv6header.h6
-rw-r--r--include/linux/netfilter_ipv6/ip6t_mh.h4
-rw-r--r--include/linux/netfilter_ipv6/ip6t_opts.h10
-rw-r--r--include/linux/netfilter_ipv6/ip6t_rt.h12
-rw-r--r--include/net/dst.h2
-rw-r--r--include/net/ip_fib.h6
-rw-r--r--include/net/ip_vs.h295
-rw-r--r--include/net/net_namespace.h2
-rw-r--r--include/net/netfilter/nf_conntrack.h23
-rw-r--r--include/net/netfilter/nf_conntrack_ecache.h12
-rw-r--r--include/net/netfilter/nf_conntrack_extend.h10
-rw-r--r--include/net/netfilter/nf_conntrack_helper.h6
-rw-r--r--include/net/netfilter/nf_conntrack_l3proto.h2
-rw-r--r--include/net/netfilter/nf_conntrack_timestamp.h53
-rw-r--r--include/net/netfilter/nf_nat.h6
-rw-r--r--include/net/netfilter/nf_nat_core.h4
-rw-r--r--include/net/netns/conntrack.h4
-rw-r--r--include/net/netns/ip_vs.h143
-rw-r--r--include/net/netns/ipv4.h1
-rw-r--r--kernel/audit.c2
-rw-r--r--net/bridge/netfilter/ebt_ip6.c46
-rw-r--r--net/bridge/netfilter/ebtables.c1
-rw-r--r--net/ipv4/Kconfig4
-rw-r--r--net/ipv4/fib_rules.c10
-rw-r--r--net/ipv4/fib_semantics.c14
-rw-r--r--net/ipv4/ip_input.c2
-rw-r--r--net/ipv4/netfilter/Kconfig3
-rw-r--r--net/ipv4/netfilter/arp_tables.c2
-rw-r--r--net/ipv4/netfilter/ip_tables.c2
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c7
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c3
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c17
-rw-r--r--net/ipv4/netfilter/nf_nat_amanda.c8
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c15
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c9
-rw-r--r--net/ipv4/route.c26
-rw-r--r--net/ipv6/netfilter/ip6_tables.c2
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c3
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c2
-rw-r--r--net/netfilter/Kconfig43
-rw-r--r--net/netfilter/Makefile4
-rw-r--r--net/netfilter/core.c20
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c98
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c195
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c378
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c887
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c134
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c61
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c67
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c72
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_pe.c17
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c125
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_ah_esp.c45
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c153
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c142
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c110
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c1235
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c26
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c82
-rw-r--r--net/netfilter/nf_conntrack_core.c57
-rw-r--r--net/netfilter/nf_conntrack_expect.c34
-rw-r--r--net/netfilter/nf_conntrack_extend.c11
-rw-r--r--net/netfilter/nf_conntrack_helper.c20
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c74
-rw-r--r--net/netfilter/nf_conntrack_netlink.c47
-rw-r--r--net/netfilter/nf_conntrack_proto.c24
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c14
-rw-r--r--net/netfilter/nf_conntrack_snmp.c77
-rw-r--r--net/netfilter/nf_conntrack_standalone.c45
-rw-r--r--net/netfilter/nf_conntrack_timestamp.c120
-rw-r--r--net/netfilter/nf_log.c6
-rw-r--r--net/netfilter/nf_queue.c82
-rw-r--r--net/netfilter/nfnetlink_log.c6
-rw-r--r--net/netfilter/nfnetlink_queue.c22
-rw-r--r--net/netfilter/x_tables.c98
-rw-r--r--net/netfilter/xt_AUDIT.c204
-rw-r--r--net/netfilter/xt_CLASSIFY.c36
-rw-r--r--net/netfilter/xt_IDLETIMER.c2
-rw-r--r--net/netfilter/xt_LED.c2
-rw-r--r--net/netfilter/xt_NFQUEUE.c34
-rw-r--r--net/netfilter/xt_connlimit.c6
-rw-r--r--net/netfilter/xt_conntrack.c75
-rw-r--r--net/netfilter/xt_cpu.c2
-rw-r--r--net/netfilter/xt_ipvs.c2
-rw-r--r--net/sched/Kconfig5
-rw-r--r--net/sched/cls_flow.c2
-rw-r--r--net/sched/em_meta.c2
135 files changed, 4477 insertions, 1712 deletions
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 359df0487690..9d339eb27881 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -103,6 +103,8 @@
103#define AUDIT_BPRM_FCAPS 1321 /* Information about fcaps increasing perms */ 103#define AUDIT_BPRM_FCAPS 1321 /* Information about fcaps increasing perms */
104#define AUDIT_CAPSET 1322 /* Record showing argument to sys_capset */ 104#define AUDIT_CAPSET 1322 /* Record showing argument to sys_capset */
105#define AUDIT_MMAP 1323 /* Record showing descriptor and flags in mmap */ 105#define AUDIT_MMAP 1323 /* Record showing descriptor and flags in mmap */
106#define AUDIT_NETFILTER_PKT 1324 /* Packets traversing netfilter chains */
107#define AUDIT_NETFILTER_CFG 1325 /* Netfilter chain modifications */
106 108
107#define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ 109#define AUDIT_AVC 1400 /* SE Linux avc denial or grant */
108#define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ 110#define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */
diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h
index 5f43a3b2e3ad..4deb3834d62c 100644
--- a/include/linux/ip_vs.h
+++ b/include/linux/ip_vs.h
@@ -89,6 +89,14 @@
89#define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ 89#define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */
90#define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */ 90#define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */
91 91
92#define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \
93 IP_VS_CONN_F_NOOUTPUT | \
94 IP_VS_CONN_F_INACTIVE | \
95 IP_VS_CONN_F_SEQ_MASK | \
96 IP_VS_CONN_F_NO_CPORT | \
97 IP_VS_CONN_F_TEMPLATE \
98 )
99
92/* Flags that are not sent to backup server start from bit 16 */ 100/* Flags that are not sent to backup server start from bit 16 */
93#define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */ 101#define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */
94 102
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 1893837b3966..eeec00abb664 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -24,16 +24,20 @@
24#define NF_MAX_VERDICT NF_STOP 24#define NF_MAX_VERDICT NF_STOP
25 25
26/* we overload the higher bits for encoding auxiliary data such as the queue 26/* we overload the higher bits for encoding auxiliary data such as the queue
27 * number. Not nice, but better than additional function arguments. */ 27 * number or errno values. Not nice, but better than additional function
28#define NF_VERDICT_MASK 0x0000ffff 28 * arguments. */
29#define NF_VERDICT_BITS 16 29#define NF_VERDICT_MASK 0x000000ff
30
31/* extra verdict flags have mask 0x0000ff00 */
32#define NF_VERDICT_FLAG_QUEUE_BYPASS 0x00008000
30 33
34/* queue number (NF_QUEUE) or errno (NF_DROP) */
31#define NF_VERDICT_QMASK 0xffff0000 35#define NF_VERDICT_QMASK 0xffff0000
32#define NF_VERDICT_QBITS 16 36#define NF_VERDICT_QBITS 16
33 37
34#define NF_QUEUE_NR(x) ((((x) << NF_VERDICT_BITS) & NF_VERDICT_QMASK) | NF_QUEUE) 38#define NF_QUEUE_NR(x) ((((x) << 16) & NF_VERDICT_QMASK) | NF_QUEUE)
35 39
36#define NF_DROP_ERR(x) (((-x) << NF_VERDICT_BITS) | NF_DROP) 40#define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP)
37 41
38/* only for userspace compatibility */ 42/* only for userspace compatibility */
39#ifndef __KERNEL__ 43#ifndef __KERNEL__
@@ -41,6 +45,9 @@
41 <= 0x2000 is used for protocol-flags. */ 45 <= 0x2000 is used for protocol-flags. */
42#define NFC_UNKNOWN 0x4000 46#define NFC_UNKNOWN 0x4000
43#define NFC_ALTERED 0x8000 47#define NFC_ALTERED 0x8000
48
49/* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */
50#define NF_VERDICT_BITS 16
44#endif 51#endif
45 52
46enum nf_inet_hooks { 53enum nf_inet_hooks {
@@ -72,6 +79,10 @@ union nf_inet_addr {
72 79
73#ifdef __KERNEL__ 80#ifdef __KERNEL__
74#ifdef CONFIG_NETFILTER 81#ifdef CONFIG_NETFILTER
82static inline int NF_DROP_GETERR(int verdict)
83{
84 return -(verdict >> NF_VERDICT_QBITS);
85}
75 86
76static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1, 87static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1,
77 const union nf_inet_addr *a2) 88 const union nf_inet_addr *a2)
@@ -267,7 +278,7 @@ struct nf_afinfo {
267 int route_key_size; 278 int route_key_size;
268}; 279};
269 280
270extern const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO]; 281extern const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO];
271static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family) 282static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family)
272{ 283{
273 return rcu_dereference(nf_afinfo[family]); 284 return rcu_dereference(nf_afinfo[family]);
@@ -357,9 +368,9 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
357#endif /*CONFIG_NETFILTER*/ 368#endif /*CONFIG_NETFILTER*/
358 369
359#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 370#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
360extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *); 371extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu;
361extern void nf_ct_attach(struct sk_buff *, struct sk_buff *); 372extern void nf_ct_attach(struct sk_buff *, struct sk_buff *);
362extern void (*nf_ct_destroy)(struct nf_conntrack *); 373extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu;
363#else 374#else
364static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} 375static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
365#endif 376#endif
diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 9d40effe7ca7..89c0d1e20d72 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -9,6 +9,7 @@ header-y += nfnetlink_conntrack.h
9header-y += nfnetlink_log.h 9header-y += nfnetlink_log.h
10header-y += nfnetlink_queue.h 10header-y += nfnetlink_queue.h
11header-y += x_tables.h 11header-y += x_tables.h
12header-y += xt_AUDIT.h
12header-y += xt_CHECKSUM.h 13header-y += xt_CHECKSUM.h
13header-y += xt_CLASSIFY.h 14header-y += xt_CLASSIFY.h
14header-y += xt_CONNMARK.h 15header-y += xt_CONNMARK.h
@@ -55,6 +56,7 @@ header-y += xt_rateest.h
55header-y += xt_realm.h 56header-y += xt_realm.h
56header-y += xt_recent.h 57header-y += xt_recent.h
57header-y += xt_sctp.h 58header-y += xt_sctp.h
59header-y += xt_socket.h
58header-y += xt_state.h 60header-y += xt_state.h
59header-y += xt_statistic.h 61header-y += xt_statistic.h
60header-y += xt_string.h 62header-y += xt_string.h
diff --git a/include/linux/netfilter/nf_conntrack_snmp.h b/include/linux/netfilter/nf_conntrack_snmp.h
new file mode 100644
index 000000000000..064bc63a5346
--- /dev/null
+++ b/include/linux/netfilter/nf_conntrack_snmp.h
@@ -0,0 +1,9 @@
1#ifndef _NF_CONNTRACK_SNMP_H
2#define _NF_CONNTRACK_SNMP_H
3
4extern int (*nf_nat_snmp_hook)(struct sk_buff *skb,
5 unsigned int protoff,
6 struct nf_conn *ct,
7 enum ip_conntrack_info ctinfo);
8
9#endif /* _NF_CONNTRACK_SNMP_H */
diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h
index 19711e3ffd42..debf1aefd753 100644
--- a/include/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -42,6 +42,7 @@ enum ctattr_type {
42 CTA_SECMARK, /* obsolete */ 42 CTA_SECMARK, /* obsolete */
43 CTA_ZONE, 43 CTA_ZONE,
44 CTA_SECCTX, 44 CTA_SECCTX,
45 CTA_TIMESTAMP,
45 __CTA_MAX 46 __CTA_MAX
46}; 47};
47#define CTA_MAX (__CTA_MAX - 1) 48#define CTA_MAX (__CTA_MAX - 1)
@@ -127,6 +128,14 @@ enum ctattr_counters {
127}; 128};
128#define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1) 129#define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1)
129 130
131enum ctattr_tstamp {
132 CTA_TIMESTAMP_UNSPEC,
133 CTA_TIMESTAMP_START,
134 CTA_TIMESTAMP_STOP,
135 __CTA_TIMESTAMP_MAX
136};
137#define CTA_TIMESTAMP_MAX (__CTA_TIMESTAMP_MAX - 1)
138
130enum ctattr_nat { 139enum ctattr_nat {
131 CTA_NAT_UNSPEC, 140 CTA_NAT_UNSPEC,
132 CTA_NAT_MINIP, 141 CTA_NAT_MINIP,
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 6712e713b299..37219525ff6f 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -611,8 +611,9 @@ struct _compat_xt_align {
611extern void xt_compat_lock(u_int8_t af); 611extern void xt_compat_lock(u_int8_t af);
612extern void xt_compat_unlock(u_int8_t af); 612extern void xt_compat_unlock(u_int8_t af);
613 613
614extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta); 614extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta);
615extern void xt_compat_flush_offsets(u_int8_t af); 615extern void xt_compat_flush_offsets(u_int8_t af);
616extern void xt_compat_init_offsets(u_int8_t af, unsigned int number);
616extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset); 617extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset);
617 618
618extern int xt_compat_match_offset(const struct xt_match *match); 619extern int xt_compat_match_offset(const struct xt_match *match);
diff --git a/include/linux/netfilter/xt_AUDIT.h b/include/linux/netfilter/xt_AUDIT.h
new file mode 100644
index 000000000000..38751d2ea52b
--- /dev/null
+++ b/include/linux/netfilter/xt_AUDIT.h
@@ -0,0 +1,30 @@
1/*
2 * Header file for iptables xt_AUDIT target
3 *
4 * (C) 2010-2011 Thomas Graf <tgraf@redhat.com>
5 * (C) 2010-2011 Red Hat, Inc.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#ifndef _XT_AUDIT_TARGET_H
13#define _XT_AUDIT_TARGET_H
14
15#include <linux/types.h>
16
17enum {
18 XT_AUDIT_TYPE_ACCEPT = 0,
19 XT_AUDIT_TYPE_DROP,
20 XT_AUDIT_TYPE_REJECT,
21 __XT_AUDIT_TYPE_MAX,
22};
23
24#define XT_AUDIT_TYPE_MAX (__XT_AUDIT_TYPE_MAX - 1)
25
26struct xt_audit_info {
27 __u8 type; /* XT_AUDIT_TYPE_* */
28};
29
30#endif /* _XT_AUDIT_TARGET_H */
diff --git a/include/linux/netfilter/xt_CT.h b/include/linux/netfilter/xt_CT.h
index 1b564106891d..fbf4c5658554 100644
--- a/include/linux/netfilter/xt_CT.h
+++ b/include/linux/netfilter/xt_CT.h
@@ -4,11 +4,11 @@
4#define XT_CT_NOTRACK 0x1 4#define XT_CT_NOTRACK 0x1
5 5
6struct xt_ct_target_info { 6struct xt_ct_target_info {
7 u_int16_t flags; 7 __u16 flags;
8 u_int16_t zone; 8 __u16 zone;
9 u_int32_t ct_events; 9 __u32 ct_events;
10 u_int32_t exp_events; 10 __u32 exp_events;
11 char helper[16]; 11 char helper[16];
12 12
13 /* Used internally by the kernel */ 13 /* Used internally by the kernel */
14 struct nf_conn *ct __attribute__((aligned(8))); 14 struct nf_conn *ct __attribute__((aligned(8)));
diff --git a/include/linux/netfilter/xt_NFQUEUE.h b/include/linux/netfilter/xt_NFQUEUE.h
index 2584f4a777de..9eafdbbb401c 100644
--- a/include/linux/netfilter/xt_NFQUEUE.h
+++ b/include/linux/netfilter/xt_NFQUEUE.h
@@ -20,4 +20,10 @@ struct xt_NFQ_info_v1 {
20 __u16 queues_total; 20 __u16 queues_total;
21}; 21};
22 22
23struct xt_NFQ_info_v2 {
24 __u16 queuenum;
25 __u16 queues_total;
26 __u16 bypass;
27};
28
23#endif /* _XT_NFQ_TARGET_H */ 29#endif /* _XT_NFQ_TARGET_H */
diff --git a/include/linux/netfilter/xt_TCPOPTSTRIP.h b/include/linux/netfilter/xt_TCPOPTSTRIP.h
index 2db543214ff5..342ef14b1761 100644
--- a/include/linux/netfilter/xt_TCPOPTSTRIP.h
+++ b/include/linux/netfilter/xt_TCPOPTSTRIP.h
@@ -7,7 +7,7 @@
7 (((1U << (idx & 31)) & bmap[(idx) >> 5]) != 0) 7 (((1U << (idx & 31)) & bmap[(idx) >> 5]) != 0)
8 8
9struct xt_tcpoptstrip_target_info { 9struct xt_tcpoptstrip_target_info {
10 u_int32_t strip_bmap[8]; 10 __u32 strip_bmap[8];
11}; 11};
12 12
13#endif /* _XT_TCPOPTSTRIP_H */ 13#endif /* _XT_TCPOPTSTRIP_H */
diff --git a/include/linux/netfilter/xt_TPROXY.h b/include/linux/netfilter/xt_TPROXY.h
index 3f3d69361289..8097e0b4c15e 100644
--- a/include/linux/netfilter/xt_TPROXY.h
+++ b/include/linux/netfilter/xt_TPROXY.h
@@ -5,15 +5,15 @@
5 * redirection. We can get rid of that whenever we get support for 5 * redirection. We can get rid of that whenever we get support for
6 * mutliple targets in the same rule. */ 6 * mutliple targets in the same rule. */
7struct xt_tproxy_target_info { 7struct xt_tproxy_target_info {
8 u_int32_t mark_mask; 8 __u32 mark_mask;
9 u_int32_t mark_value; 9 __u32 mark_value;
10 __be32 laddr; 10 __be32 laddr;
11 __be16 lport; 11 __be16 lport;
12}; 12};
13 13
14struct xt_tproxy_target_info_v1 { 14struct xt_tproxy_target_info_v1 {
15 u_int32_t mark_mask; 15 __u32 mark_mask;
16 u_int32_t mark_value; 16 __u32 mark_value;
17 union nf_inet_addr laddr; 17 union nf_inet_addr laddr;
18 __be16 lport; 18 __be16 lport;
19}; 19};
diff --git a/include/linux/netfilter/xt_cluster.h b/include/linux/netfilter/xt_cluster.h
index 886682656f09..66cfa3c782ac 100644
--- a/include/linux/netfilter/xt_cluster.h
+++ b/include/linux/netfilter/xt_cluster.h
@@ -6,10 +6,10 @@ enum xt_cluster_flags {
6}; 6};
7 7
8struct xt_cluster_match_info { 8struct xt_cluster_match_info {
9 u_int32_t total_nodes; 9 __u32 total_nodes;
10 u_int32_t node_mask; 10 __u32 node_mask;
11 u_int32_t hash_seed; 11 __u32 hash_seed;
12 u_int32_t flags; 12 __u32 flags;
13}; 13};
14 14
15#define XT_CLUSTER_NODES_MAX 32 15#define XT_CLUSTER_NODES_MAX 32
diff --git a/include/linux/netfilter/xt_comment.h b/include/linux/netfilter/xt_comment.h
index eacfedc6b5d0..0ea5e79f5bd7 100644
--- a/include/linux/netfilter/xt_comment.h
+++ b/include/linux/netfilter/xt_comment.h
@@ -4,7 +4,7 @@
4#define XT_MAX_COMMENT_LEN 256 4#define XT_MAX_COMMENT_LEN 256
5 5
6struct xt_comment_info { 6struct xt_comment_info {
7 unsigned char comment[XT_MAX_COMMENT_LEN]; 7 char comment[XT_MAX_COMMENT_LEN];
8}; 8};
9 9
10#endif /* XT_COMMENT_H */ 10#endif /* XT_COMMENT_H */
diff --git a/include/linux/netfilter/xt_conntrack.h b/include/linux/netfilter/xt_conntrack.h
index 54f47a2f6152..74b904d8f99c 100644
--- a/include/linux/netfilter/xt_conntrack.h
+++ b/include/linux/netfilter/xt_conntrack.h
@@ -58,4 +58,19 @@ struct xt_conntrack_mtinfo2 {
58 __u16 state_mask, status_mask; 58 __u16 state_mask, status_mask;
59}; 59};
60 60
61struct xt_conntrack_mtinfo3 {
62 union nf_inet_addr origsrc_addr, origsrc_mask;
63 union nf_inet_addr origdst_addr, origdst_mask;
64 union nf_inet_addr replsrc_addr, replsrc_mask;
65 union nf_inet_addr repldst_addr, repldst_mask;
66 __u32 expires_min, expires_max;
67 __u16 l4proto;
68 __u16 origsrc_port, origdst_port;
69 __u16 replsrc_port, repldst_port;
70 __u16 match_flags, invert_flags;
71 __u16 state_mask, status_mask;
72 __u16 origsrc_port_high, origdst_port_high;
73 __u16 replsrc_port_high, repldst_port_high;
74};
75
61#endif /*_XT_CONNTRACK_H*/ 76#endif /*_XT_CONNTRACK_H*/
diff --git a/include/linux/netfilter/xt_quota.h b/include/linux/netfilter/xt_quota.h
index b0d28c659ab7..8bda65f0bc92 100644
--- a/include/linux/netfilter/xt_quota.h
+++ b/include/linux/netfilter/xt_quota.h
@@ -9,9 +9,9 @@ enum xt_quota_flags {
9struct xt_quota_priv; 9struct xt_quota_priv;
10 10
11struct xt_quota_info { 11struct xt_quota_info {
12 u_int32_t flags; 12 __u32 flags;
13 u_int32_t pad; 13 __u32 pad;
14 aligned_u64 quota; 14 aligned_u64 quota;
15 15
16 /* Used internally by the kernel */ 16 /* Used internally by the kernel */
17 struct xt_quota_priv *master; 17 struct xt_quota_priv *master;
diff --git a/include/linux/netfilter/xt_time.h b/include/linux/netfilter/xt_time.h
index 14b6df412c9f..b8bd4568efdb 100644
--- a/include/linux/netfilter/xt_time.h
+++ b/include/linux/netfilter/xt_time.h
@@ -2,13 +2,13 @@
2#define _XT_TIME_H 1 2#define _XT_TIME_H 1
3 3
4struct xt_time_info { 4struct xt_time_info {
5 u_int32_t date_start; 5 __u32 date_start;
6 u_int32_t date_stop; 6 __u32 date_stop;
7 u_int32_t daytime_start; 7 __u32 daytime_start;
8 u_int32_t daytime_stop; 8 __u32 daytime_stop;
9 u_int32_t monthdays_match; 9 __u32 monthdays_match;
10 u_int8_t weekdays_match; 10 __u8 weekdays_match;
11 u_int8_t flags; 11 __u8 flags;
12}; 12};
13 13
14enum { 14enum {
diff --git a/include/linux/netfilter/xt_u32.h b/include/linux/netfilter/xt_u32.h
index 9947f56cdbdd..e8c3d8722bae 100644
--- a/include/linux/netfilter/xt_u32.h
+++ b/include/linux/netfilter/xt_u32.h
@@ -9,13 +9,13 @@ enum xt_u32_ops {
9}; 9};
10 10
11struct xt_u32_location_element { 11struct xt_u32_location_element {
12 u_int32_t number; 12 __u32 number;
13 u_int8_t nextop; 13 __u8 nextop;
14}; 14};
15 15
16struct xt_u32_value_element { 16struct xt_u32_value_element {
17 u_int32_t min; 17 __u32 min;
18 u_int32_t max; 18 __u32 max;
19}; 19};
20 20
21/* 21/*
@@ -27,14 +27,14 @@ struct xt_u32_value_element {
27struct xt_u32_test { 27struct xt_u32_test {
28 struct xt_u32_location_element location[XT_U32_MAXSIZE+1]; 28 struct xt_u32_location_element location[XT_U32_MAXSIZE+1];
29 struct xt_u32_value_element value[XT_U32_MAXSIZE+1]; 29 struct xt_u32_value_element value[XT_U32_MAXSIZE+1];
30 u_int8_t nnums; 30 __u8 nnums;
31 u_int8_t nvalues; 31 __u8 nvalues;
32}; 32};
33 33
34struct xt_u32 { 34struct xt_u32 {
35 struct xt_u32_test tests[XT_U32_MAXSIZE+1]; 35 struct xt_u32_test tests[XT_U32_MAXSIZE+1];
36 u_int8_t ntests; 36 __u8 ntests;
37 u_int8_t invert; 37 __u8 invert;
38}; 38};
39 39
40#endif /* _XT_U32_H */ 40#endif /* _XT_U32_H */
diff --git a/include/linux/netfilter_bridge/ebt_802_3.h b/include/linux/netfilter_bridge/ebt_802_3.h
index c73ef0b18bdc..c427764f4444 100644
--- a/include/linux/netfilter_bridge/ebt_802_3.h
+++ b/include/linux/netfilter_bridge/ebt_802_3.h
@@ -24,24 +24,24 @@
24 24
25/* ui has one byte ctrl, ni has two */ 25/* ui has one byte ctrl, ni has two */
26struct hdr_ui { 26struct hdr_ui {
27 uint8_t dsap; 27 __u8 dsap;
28 uint8_t ssap; 28 __u8 ssap;
29 uint8_t ctrl; 29 __u8 ctrl;
30 uint8_t orig[3]; 30 __u8 orig[3];
31 __be16 type; 31 __be16 type;
32}; 32};
33 33
34struct hdr_ni { 34struct hdr_ni {
35 uint8_t dsap; 35 __u8 dsap;
36 uint8_t ssap; 36 __u8 ssap;
37 __be16 ctrl; 37 __be16 ctrl;
38 uint8_t orig[3]; 38 __u8 orig[3];
39 __be16 type; 39 __be16 type;
40}; 40};
41 41
42struct ebt_802_3_hdr { 42struct ebt_802_3_hdr {
43 uint8_t daddr[6]; 43 __u8 daddr[6];
44 uint8_t saddr[6]; 44 __u8 saddr[6];
45 __be16 len; 45 __be16 len;
46 union { 46 union {
47 struct hdr_ui ui; 47 struct hdr_ui ui;
@@ -59,10 +59,10 @@ static inline struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb)
59#endif 59#endif
60 60
61struct ebt_802_3_info { 61struct ebt_802_3_info {
62 uint8_t sap; 62 __u8 sap;
63 __be16 type; 63 __be16 type;
64 uint8_t bitmask; 64 __u8 bitmask;
65 uint8_t invflags; 65 __u8 invflags;
66}; 66};
67 67
68#endif 68#endif
diff --git a/include/linux/netfilter_bridge/ebt_among.h b/include/linux/netfilter_bridge/ebt_among.h
index 0009558609a7..686c9619dbc0 100644
--- a/include/linux/netfilter_bridge/ebt_among.h
+++ b/include/linux/netfilter_bridge/ebt_among.h
@@ -30,7 +30,7 @@
30 */ 30 */
31 31
32struct ebt_mac_wormhash_tuple { 32struct ebt_mac_wormhash_tuple {
33 uint32_t cmp[2]; 33 __u32 cmp[2];
34 __be32 ip; 34 __be32 ip;
35}; 35};
36 36
diff --git a/include/linux/netfilter_bridge/ebt_arp.h b/include/linux/netfilter_bridge/ebt_arp.h
index cbf4843b6b0f..e62b5af95869 100644
--- a/include/linux/netfilter_bridge/ebt_arp.h
+++ b/include/linux/netfilter_bridge/ebt_arp.h
@@ -27,8 +27,8 @@ struct ebt_arp_info
27 unsigned char smmsk[ETH_ALEN]; 27 unsigned char smmsk[ETH_ALEN];
28 unsigned char dmaddr[ETH_ALEN]; 28 unsigned char dmaddr[ETH_ALEN];
29 unsigned char dmmsk[ETH_ALEN]; 29 unsigned char dmmsk[ETH_ALEN];
30 uint8_t bitmask; 30 __u8 bitmask;
31 uint8_t invflags; 31 __u8 invflags;
32}; 32};
33 33
34#endif 34#endif
diff --git a/include/linux/netfilter_bridge/ebt_ip.h b/include/linux/netfilter_bridge/ebt_ip.h
index 6a708fb92241..d99de58da2c7 100644
--- a/include/linux/netfilter_bridge/ebt_ip.h
+++ b/include/linux/netfilter_bridge/ebt_ip.h
@@ -31,12 +31,12 @@ struct ebt_ip_info {
31 __be32 daddr; 31 __be32 daddr;
32 __be32 smsk; 32 __be32 smsk;
33 __be32 dmsk; 33 __be32 dmsk;
34 uint8_t tos; 34 __u8 tos;
35 uint8_t protocol; 35 __u8 protocol;
36 uint8_t bitmask; 36 __u8 bitmask;
37 uint8_t invflags; 37 __u8 invflags;
38 uint16_t sport[2]; 38 __u16 sport[2];
39 uint16_t dport[2]; 39 __u16 dport[2];
40}; 40};
41 41
42#endif 42#endif
diff --git a/include/linux/netfilter_bridge/ebt_ip6.h b/include/linux/netfilter_bridge/ebt_ip6.h
index e5de98701519..998e9d5a6b60 100644
--- a/include/linux/netfilter_bridge/ebt_ip6.h
+++ b/include/linux/netfilter_bridge/ebt_ip6.h
@@ -18,8 +18,11 @@
18#define EBT_IP6_PROTO 0x08 18#define EBT_IP6_PROTO 0x08
19#define EBT_IP6_SPORT 0x10 19#define EBT_IP6_SPORT 0x10
20#define EBT_IP6_DPORT 0x20 20#define EBT_IP6_DPORT 0x20
21#define EBT_IP6_ICMP6 0x40
22
21#define EBT_IP6_MASK (EBT_IP6_SOURCE | EBT_IP6_DEST | EBT_IP6_TCLASS |\ 23#define EBT_IP6_MASK (EBT_IP6_SOURCE | EBT_IP6_DEST | EBT_IP6_TCLASS |\
22 EBT_IP6_PROTO | EBT_IP6_SPORT | EBT_IP6_DPORT) 24 EBT_IP6_PROTO | EBT_IP6_SPORT | EBT_IP6_DPORT | \
25 EBT_IP6_ICMP6)
23#define EBT_IP6_MATCH "ip6" 26#define EBT_IP6_MATCH "ip6"
24 27
25/* the same values are used for the invflags */ 28/* the same values are used for the invflags */
@@ -28,12 +31,18 @@ struct ebt_ip6_info {
28 struct in6_addr daddr; 31 struct in6_addr daddr;
29 struct in6_addr smsk; 32 struct in6_addr smsk;
30 struct in6_addr dmsk; 33 struct in6_addr dmsk;
31 uint8_t tclass; 34 __u8 tclass;
32 uint8_t protocol; 35 __u8 protocol;
33 uint8_t bitmask; 36 __u8 bitmask;
34 uint8_t invflags; 37 __u8 invflags;
35 uint16_t sport[2]; 38 union {
36 uint16_t dport[2]; 39 __u16 sport[2];
40 __u8 icmpv6_type[2];
41 };
42 union {
43 __u16 dport[2];
44 __u8 icmpv6_code[2];
45 };
37}; 46};
38 47
39#endif 48#endif
diff --git a/include/linux/netfilter_bridge/ebt_limit.h b/include/linux/netfilter_bridge/ebt_limit.h
index 4bf76b751676..721d51ffa513 100644
--- a/include/linux/netfilter_bridge/ebt_limit.h
+++ b/include/linux/netfilter_bridge/ebt_limit.h
@@ -10,13 +10,13 @@
10 seconds, or one every 59 hours. */ 10 seconds, or one every 59 hours. */
11 11
12struct ebt_limit_info { 12struct ebt_limit_info {
13 u_int32_t avg; /* Average secs between packets * scale */ 13 __u32 avg; /* Average secs between packets * scale */
14 u_int32_t burst; /* Period multiplier for upper limit. */ 14 __u32 burst; /* Period multiplier for upper limit. */
15 15
16 /* Used internally by the kernel */ 16 /* Used internally by the kernel */
17 unsigned long prev; 17 unsigned long prev;
18 u_int32_t credit; 18 __u32 credit;
19 u_int32_t credit_cap, cost; 19 __u32 credit_cap, cost;
20}; 20};
21 21
22#endif 22#endif
diff --git a/include/linux/netfilter_bridge/ebt_log.h b/include/linux/netfilter_bridge/ebt_log.h
index cc2cdfb764bc..564beb4946ea 100644
--- a/include/linux/netfilter_bridge/ebt_log.h
+++ b/include/linux/netfilter_bridge/ebt_log.h
@@ -10,9 +10,9 @@
10#define EBT_LOG_WATCHER "log" 10#define EBT_LOG_WATCHER "log"
11 11
12struct ebt_log_info { 12struct ebt_log_info {
13 uint8_t loglevel; 13 __u8 loglevel;
14 uint8_t prefix[EBT_LOG_PREFIX_SIZE]; 14 __u8 prefix[EBT_LOG_PREFIX_SIZE];
15 uint32_t bitmask; 15 __u32 bitmask;
16}; 16};
17 17
18#endif 18#endif
diff --git a/include/linux/netfilter_bridge/ebt_mark_m.h b/include/linux/netfilter_bridge/ebt_mark_m.h
index 9ceb10ec0ed6..97b96c4b8db4 100644
--- a/include/linux/netfilter_bridge/ebt_mark_m.h
+++ b/include/linux/netfilter_bridge/ebt_mark_m.h
@@ -6,8 +6,8 @@
6#define EBT_MARK_MASK (EBT_MARK_AND | EBT_MARK_OR) 6#define EBT_MARK_MASK (EBT_MARK_AND | EBT_MARK_OR)
7struct ebt_mark_m_info { 7struct ebt_mark_m_info {
8 unsigned long mark, mask; 8 unsigned long mark, mask;
9 uint8_t invert; 9 __u8 invert;
10 uint8_t bitmask; 10 __u8 bitmask;
11}; 11};
12#define EBT_MARK_MATCH "mark_m" 12#define EBT_MARK_MATCH "mark_m"
13 13
diff --git a/include/linux/netfilter_bridge/ebt_nflog.h b/include/linux/netfilter_bridge/ebt_nflog.h
index 052817849b83..477315bc3537 100644
--- a/include/linux/netfilter_bridge/ebt_nflog.h
+++ b/include/linux/netfilter_bridge/ebt_nflog.h
@@ -10,11 +10,11 @@
10#define EBT_NFLOG_DEFAULT_THRESHOLD 1 10#define EBT_NFLOG_DEFAULT_THRESHOLD 1
11 11
12struct ebt_nflog_info { 12struct ebt_nflog_info {
13 u_int32_t len; 13 __u32 len;
14 u_int16_t group; 14 __u16 group;
15 u_int16_t threshold; 15 __u16 threshold;
16 u_int16_t flags; 16 __u16 flags;
17 u_int16_t pad; 17 __u16 pad;
18 char prefix[EBT_NFLOG_PREFIX_SIZE]; 18 char prefix[EBT_NFLOG_PREFIX_SIZE];
19}; 19};
20 20
diff --git a/include/linux/netfilter_bridge/ebt_pkttype.h b/include/linux/netfilter_bridge/ebt_pkttype.h
index 51a799840931..7c0fb0fdcf14 100644
--- a/include/linux/netfilter_bridge/ebt_pkttype.h
+++ b/include/linux/netfilter_bridge/ebt_pkttype.h
@@ -2,8 +2,8 @@
2#define __LINUX_BRIDGE_EBT_PKTTYPE_H 2#define __LINUX_BRIDGE_EBT_PKTTYPE_H
3 3
4struct ebt_pkttype_info { 4struct ebt_pkttype_info {
5 uint8_t pkt_type; 5 __u8 pkt_type;
6 uint8_t invert; 6 __u8 invert;
7}; 7};
8#define EBT_PKTTYPE_MATCH "pkttype" 8#define EBT_PKTTYPE_MATCH "pkttype"
9 9
diff --git a/include/linux/netfilter_bridge/ebt_stp.h b/include/linux/netfilter_bridge/ebt_stp.h
index e503a0aa2728..13a0bd49a92a 100644
--- a/include/linux/netfilter_bridge/ebt_stp.h
+++ b/include/linux/netfilter_bridge/ebt_stp.h
@@ -21,24 +21,24 @@
21#define EBT_STP_MATCH "stp" 21#define EBT_STP_MATCH "stp"
22 22
23struct ebt_stp_config_info { 23struct ebt_stp_config_info {
24 uint8_t flags; 24 __u8 flags;
25 uint16_t root_priol, root_priou; 25 __u16 root_priol, root_priou;
26 char root_addr[6], root_addrmsk[6]; 26 char root_addr[6], root_addrmsk[6];
27 uint32_t root_costl, root_costu; 27 __u32 root_costl, root_costu;
28 uint16_t sender_priol, sender_priou; 28 __u16 sender_priol, sender_priou;
29 char sender_addr[6], sender_addrmsk[6]; 29 char sender_addr[6], sender_addrmsk[6];
30 uint16_t portl, portu; 30 __u16 portl, portu;
31 uint16_t msg_agel, msg_ageu; 31 __u16 msg_agel, msg_ageu;
32 uint16_t max_agel, max_ageu; 32 __u16 max_agel, max_ageu;
33 uint16_t hello_timel, hello_timeu; 33 __u16 hello_timel, hello_timeu;
34 uint16_t forward_delayl, forward_delayu; 34 __u16 forward_delayl, forward_delayu;
35}; 35};
36 36
37struct ebt_stp_info { 37struct ebt_stp_info {
38 uint8_t type; 38 __u8 type;
39 struct ebt_stp_config_info config; 39 struct ebt_stp_config_info config;
40 uint16_t bitmask; 40 __u16 bitmask;
41 uint16_t invflags; 41 __u16 invflags;
42}; 42};
43 43
44#endif 44#endif
diff --git a/include/linux/netfilter_bridge/ebt_ulog.h b/include/linux/netfilter_bridge/ebt_ulog.h
index b677e2671541..de35a51a7e46 100644
--- a/include/linux/netfilter_bridge/ebt_ulog.h
+++ b/include/linux/netfilter_bridge/ebt_ulog.h
@@ -10,7 +10,7 @@
10#define EBT_ULOG_VERSION 1 10#define EBT_ULOG_VERSION 1
11 11
12struct ebt_ulog_info { 12struct ebt_ulog_info {
13 uint32_t nlgroup; 13 __u32 nlgroup;
14 unsigned int cprange; 14 unsigned int cprange;
15 unsigned int qthreshold; 15 unsigned int qthreshold;
16 char prefix[EBT_ULOG_PREFIX_LEN]; 16 char prefix[EBT_ULOG_PREFIX_LEN];
diff --git a/include/linux/netfilter_bridge/ebt_vlan.h b/include/linux/netfilter_bridge/ebt_vlan.h
index 1d98be4031e7..48dffc1dad36 100644
--- a/include/linux/netfilter_bridge/ebt_vlan.h
+++ b/include/linux/netfilter_bridge/ebt_vlan.h
@@ -8,12 +8,12 @@
8#define EBT_VLAN_MATCH "vlan" 8#define EBT_VLAN_MATCH "vlan"
9 9
10struct ebt_vlan_info { 10struct ebt_vlan_info {
11 uint16_t id; /* VLAN ID {1-4095} */ 11 __u16 id; /* VLAN ID {1-4095} */
12 uint8_t prio; /* VLAN User Priority {0-7} */ 12 __u8 prio; /* VLAN User Priority {0-7} */
13 __be16 encap; /* VLAN Encapsulated frame code {0-65535} */ 13 __be16 encap; /* VLAN Encapsulated frame code {0-65535} */
14 uint8_t bitmask; /* Args bitmask bit 1=1 - ID arg, 14 __u8 bitmask; /* Args bitmask bit 1=1 - ID arg,
15 bit 2=1 User-Priority arg, bit 3=1 encap*/ 15 bit 2=1 User-Priority arg, bit 3=1 encap*/
16 uint8_t invflags; /* Inverse bitmask bit 1=1 - inversed ID arg, 16 __u8 invflags; /* Inverse bitmask bit 1=1 - inversed ID arg,
17 bit 2=1 - inversed Pirority arg */ 17 bit 2=1 - inversed Pirority arg */
18}; 18};
19 19
diff --git a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
index e5a3687c8a72..3114f06939ef 100644
--- a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
+++ b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
@@ -17,15 +17,15 @@ struct clusterip_config;
17 17
18struct ipt_clusterip_tgt_info { 18struct ipt_clusterip_tgt_info {
19 19
20 u_int32_t flags; 20 __u32 flags;
21 21
22 /* only relevant for new ones */ 22 /* only relevant for new ones */
23 u_int8_t clustermac[6]; 23 __u8 clustermac[6];
24 u_int16_t num_total_nodes; 24 __u16 num_total_nodes;
25 u_int16_t num_local_nodes; 25 __u16 num_local_nodes;
26 u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; 26 __u16 local_nodes[CLUSTERIP_MAX_NODES];
27 u_int32_t hash_mode; 27 __u32 hash_mode;
28 u_int32_t hash_initval; 28 __u32 hash_initval;
29 29
30 /* Used internally by the kernel */ 30 /* Used internally by the kernel */
31 struct clusterip_config *config; 31 struct clusterip_config *config;
diff --git a/include/linux/netfilter_ipv4/ipt_ECN.h b/include/linux/netfilter_ipv4/ipt_ECN.h
index 7ca45918ab8e..c6e3e01b75e0 100644
--- a/include/linux/netfilter_ipv4/ipt_ECN.h
+++ b/include/linux/netfilter_ipv4/ipt_ECN.h
@@ -19,11 +19,11 @@
19#define IPT_ECN_OP_MASK 0xce 19#define IPT_ECN_OP_MASK 0xce
20 20
21struct ipt_ECN_info { 21struct ipt_ECN_info {
22 u_int8_t operation; /* bitset of operations */ 22 __u8 operation; /* bitset of operations */
23 u_int8_t ip_ect; /* ECT codepoint of IPv4 header, pre-shifted */ 23 __u8 ip_ect; /* ECT codepoint of IPv4 header, pre-shifted */
24 union { 24 union {
25 struct { 25 struct {
26 u_int8_t ece:1, cwr:1; /* TCP ECT bits */ 26 __u8 ece:1, cwr:1; /* TCP ECT bits */
27 } tcp; 27 } tcp;
28 } proto; 28 } proto;
29}; 29};
diff --git a/include/linux/netfilter_ipv4/ipt_SAME.h b/include/linux/netfilter_ipv4/ipt_SAME.h
index 2529660c5b38..fa0ebeca5d95 100644
--- a/include/linux/netfilter_ipv4/ipt_SAME.h
+++ b/include/linux/netfilter_ipv4/ipt_SAME.h
@@ -7,9 +7,9 @@
7 7
8struct ipt_same_info { 8struct ipt_same_info {
9 unsigned char info; 9 unsigned char info;
10 u_int32_t rangesize; 10 __u32 rangesize;
11 u_int32_t ipnum; 11 __u32 ipnum;
12 u_int32_t *iparray; 12 __u32 *iparray;
13 13
14 /* hangs off end. */ 14 /* hangs off end. */
15 struct nf_nat_range range[IPT_SAME_MAX_RANGE]; 15 struct nf_nat_range range[IPT_SAME_MAX_RANGE];
diff --git a/include/linux/netfilter_ipv4/ipt_TTL.h b/include/linux/netfilter_ipv4/ipt_TTL.h
index ee6611edc112..f6250e422d5e 100644
--- a/include/linux/netfilter_ipv4/ipt_TTL.h
+++ b/include/linux/netfilter_ipv4/ipt_TTL.h
@@ -13,8 +13,8 @@ enum {
13#define IPT_TTL_MAXMODE IPT_TTL_DEC 13#define IPT_TTL_MAXMODE IPT_TTL_DEC
14 14
15struct ipt_TTL_info { 15struct ipt_TTL_info {
16 u_int8_t mode; 16 __u8 mode;
17 u_int8_t ttl; 17 __u8 ttl;
18}; 18};
19 19
20 20
diff --git a/include/linux/netfilter_ipv4/ipt_addrtype.h b/include/linux/netfilter_ipv4/ipt_addrtype.h
index 446de6aef983..f29c3cfcc240 100644
--- a/include/linux/netfilter_ipv4/ipt_addrtype.h
+++ b/include/linux/netfilter_ipv4/ipt_addrtype.h
@@ -9,17 +9,17 @@ enum {
9}; 9};
10 10
11struct ipt_addrtype_info_v1 { 11struct ipt_addrtype_info_v1 {
12 u_int16_t source; /* source-type mask */ 12 __u16 source; /* source-type mask */
13 u_int16_t dest; /* dest-type mask */ 13 __u16 dest; /* dest-type mask */
14 u_int32_t flags; 14 __u32 flags;
15}; 15};
16 16
17/* revision 0 */ 17/* revision 0 */
18struct ipt_addrtype_info { 18struct ipt_addrtype_info {
19 u_int16_t source; /* source-type mask */ 19 __u16 source; /* source-type mask */
20 u_int16_t dest; /* dest-type mask */ 20 __u16 dest; /* dest-type mask */
21 u_int32_t invert_source; 21 __u32 invert_source;
22 u_int32_t invert_dest; 22 __u32 invert_dest;
23}; 23};
24 24
25#endif 25#endif
diff --git a/include/linux/netfilter_ipv4/ipt_ah.h b/include/linux/netfilter_ipv4/ipt_ah.h
index 2e555b4d05e3..8fea283ee62a 100644
--- a/include/linux/netfilter_ipv4/ipt_ah.h
+++ b/include/linux/netfilter_ipv4/ipt_ah.h
@@ -2,8 +2,8 @@
2#define _IPT_AH_H 2#define _IPT_AH_H
3 3
4struct ipt_ah { 4struct ipt_ah {
5 u_int32_t spis[2]; /* Security Parameter Index */ 5 __u32 spis[2]; /* Security Parameter Index */
6 u_int8_t invflags; /* Inverse flags */ 6 __u8 invflags; /* Inverse flags */
7}; 7};
8 8
9 9
diff --git a/include/linux/netfilter_ipv4/ipt_ecn.h b/include/linux/netfilter_ipv4/ipt_ecn.h
index 9945baa4ccd7..78b98aa8784d 100644
--- a/include/linux/netfilter_ipv4/ipt_ecn.h
+++ b/include/linux/netfilter_ipv4/ipt_ecn.h
@@ -20,12 +20,12 @@
20 20
21/* match info */ 21/* match info */
22struct ipt_ecn_info { 22struct ipt_ecn_info {
23 u_int8_t operation; 23 __u8 operation;
24 u_int8_t invert; 24 __u8 invert;
25 u_int8_t ip_ect; 25 __u8 ip_ect;
26 union { 26 union {
27 struct { 27 struct {
28 u_int8_t ect; 28 __u8 ect;
29 } tcp; 29 } tcp;
30 } proto; 30 } proto;
31}; 31};
diff --git a/include/linux/netfilter_ipv4/ipt_ttl.h b/include/linux/netfilter_ipv4/ipt_ttl.h
index ee24fd86a3aa..93d9a06689a3 100644
--- a/include/linux/netfilter_ipv4/ipt_ttl.h
+++ b/include/linux/netfilter_ipv4/ipt_ttl.h
@@ -13,8 +13,8 @@ enum {
13 13
14 14
15struct ipt_ttl_info { 15struct ipt_ttl_info {
16 u_int8_t mode; 16 __u8 mode;
17 u_int8_t ttl; 17 __u8 ttl;
18}; 18};
19 19
20 20
diff --git a/include/linux/netfilter_ipv6/ip6t_HL.h b/include/linux/netfilter_ipv6/ip6t_HL.h
index afb7813d45ab..81cdaf0480e3 100644
--- a/include/linux/netfilter_ipv6/ip6t_HL.h
+++ b/include/linux/netfilter_ipv6/ip6t_HL.h
@@ -14,8 +14,8 @@ enum {
14#define IP6T_HL_MAXMODE IP6T_HL_DEC 14#define IP6T_HL_MAXMODE IP6T_HL_DEC
15 15
16struct ip6t_HL_info { 16struct ip6t_HL_info {
17 u_int8_t mode; 17 __u8 mode;
18 u_int8_t hop_limit; 18 __u8 hop_limit;
19}; 19};
20 20
21 21
diff --git a/include/linux/netfilter_ipv6/ip6t_REJECT.h b/include/linux/netfilter_ipv6/ip6t_REJECT.h
index 6be6504162bb..b999aa4e5969 100644
--- a/include/linux/netfilter_ipv6/ip6t_REJECT.h
+++ b/include/linux/netfilter_ipv6/ip6t_REJECT.h
@@ -12,7 +12,7 @@ enum ip6t_reject_with {
12}; 12};
13 13
14struct ip6t_reject_info { 14struct ip6t_reject_info {
15 u_int32_t with; /* reject type */ 15 __u32 with; /* reject type */
16}; 16};
17 17
18#endif /*_IP6T_REJECT_H*/ 18#endif /*_IP6T_REJECT_H*/
diff --git a/include/linux/netfilter_ipv6/ip6t_ah.h b/include/linux/netfilter_ipv6/ip6t_ah.h
index 17a745cfb2c7..a602c165edd1 100644
--- a/include/linux/netfilter_ipv6/ip6t_ah.h
+++ b/include/linux/netfilter_ipv6/ip6t_ah.h
@@ -2,10 +2,10 @@
2#define _IP6T_AH_H 2#define _IP6T_AH_H
3 3
4struct ip6t_ah { 4struct ip6t_ah {
5 u_int32_t spis[2]; /* Security Parameter Index */ 5 __u32 spis[2]; /* Security Parameter Index */
6 u_int32_t hdrlen; /* Header Length */ 6 __u32 hdrlen; /* Header Length */
7 u_int8_t hdrres; /* Test of the Reserved Filed */ 7 __u8 hdrres; /* Test of the Reserved Filed */
8 u_int8_t invflags; /* Inverse flags */ 8 __u8 invflags; /* Inverse flags */
9}; 9};
10 10
11#define IP6T_AH_SPI 0x01 11#define IP6T_AH_SPI 0x01
diff --git a/include/linux/netfilter_ipv6/ip6t_frag.h b/include/linux/netfilter_ipv6/ip6t_frag.h
index 3724d0850920..538b31ef5e3d 100644
--- a/include/linux/netfilter_ipv6/ip6t_frag.h
+++ b/include/linux/netfilter_ipv6/ip6t_frag.h
@@ -2,10 +2,10 @@
2#define _IP6T_FRAG_H 2#define _IP6T_FRAG_H
3 3
4struct ip6t_frag { 4struct ip6t_frag {
5 u_int32_t ids[2]; /* Security Parameter Index */ 5 __u32 ids[2]; /* Security Parameter Index */
6 u_int32_t hdrlen; /* Header Length */ 6 __u32 hdrlen; /* Header Length */
7 u_int8_t flags; /* */ 7 __u8 flags; /* */
8 u_int8_t invflags; /* Inverse flags */ 8 __u8 invflags; /* Inverse flags */
9}; 9};
10 10
11#define IP6T_FRAG_IDS 0x01 11#define IP6T_FRAG_IDS 0x01
diff --git a/include/linux/netfilter_ipv6/ip6t_hl.h b/include/linux/netfilter_ipv6/ip6t_hl.h
index 5ef91b8319a8..c6fddcb971da 100644
--- a/include/linux/netfilter_ipv6/ip6t_hl.h
+++ b/include/linux/netfilter_ipv6/ip6t_hl.h
@@ -14,8 +14,8 @@ enum {
14 14
15 15
16struct ip6t_hl_info { 16struct ip6t_hl_info {
17 u_int8_t mode; 17 __u8 mode;
18 u_int8_t hop_limit; 18 __u8 hop_limit;
19}; 19};
20 20
21 21
diff --git a/include/linux/netfilter_ipv6/ip6t_ipv6header.h b/include/linux/netfilter_ipv6/ip6t_ipv6header.h
index 01dfd445596a..73d53bd3ff62 100644
--- a/include/linux/netfilter_ipv6/ip6t_ipv6header.h
+++ b/include/linux/netfilter_ipv6/ip6t_ipv6header.h
@@ -9,9 +9,9 @@ on whether they contain certain headers */
9#define __IPV6HEADER_H 9#define __IPV6HEADER_H
10 10
11struct ip6t_ipv6header_info { 11struct ip6t_ipv6header_info {
12 u_int8_t matchflags; 12 __u8 matchflags;
13 u_int8_t invflags; 13 __u8 invflags;
14 u_int8_t modeflag; 14 __u8 modeflag;
15}; 15};
16 16
17#define MASK_HOPOPTS 128 17#define MASK_HOPOPTS 128
diff --git a/include/linux/netfilter_ipv6/ip6t_mh.h b/include/linux/netfilter_ipv6/ip6t_mh.h
index 18549bca2d1f..98c8cf685eea 100644
--- a/include/linux/netfilter_ipv6/ip6t_mh.h
+++ b/include/linux/netfilter_ipv6/ip6t_mh.h
@@ -3,8 +3,8 @@
3 3
4/* MH matching stuff */ 4/* MH matching stuff */
5struct ip6t_mh { 5struct ip6t_mh {
6 u_int8_t types[2]; /* MH type range */ 6 __u8 types[2]; /* MH type range */
7 u_int8_t invflags; /* Inverse flags */ 7 __u8 invflags; /* Inverse flags */
8}; 8};
9 9
10/* Values for "invflags" field in struct ip6t_mh. */ 10/* Values for "invflags" field in struct ip6t_mh. */
diff --git a/include/linux/netfilter_ipv6/ip6t_opts.h b/include/linux/netfilter_ipv6/ip6t_opts.h
index 62d89bcd9f9c..405d309cd741 100644
--- a/include/linux/netfilter_ipv6/ip6t_opts.h
+++ b/include/linux/netfilter_ipv6/ip6t_opts.h
@@ -4,11 +4,11 @@
4#define IP6T_OPTS_OPTSNR 16 4#define IP6T_OPTS_OPTSNR 16
5 5
6struct ip6t_opts { 6struct ip6t_opts {
7 u_int32_t hdrlen; /* Header Length */ 7 __u32 hdrlen; /* Header Length */
8 u_int8_t flags; /* */ 8 __u8 flags; /* */
9 u_int8_t invflags; /* Inverse flags */ 9 __u8 invflags; /* Inverse flags */
10 u_int16_t opts[IP6T_OPTS_OPTSNR]; /* opts */ 10 __u16 opts[IP6T_OPTS_OPTSNR]; /* opts */
11 u_int8_t optsnr; /* Nr of OPts */ 11 __u8 optsnr; /* Nr of OPts */
12}; 12};
13 13
14#define IP6T_OPTS_LEN 0x01 14#define IP6T_OPTS_LEN 0x01
diff --git a/include/linux/netfilter_ipv6/ip6t_rt.h b/include/linux/netfilter_ipv6/ip6t_rt.h
index ab91bfd2cd00..e8dad20acd37 100644
--- a/include/linux/netfilter_ipv6/ip6t_rt.h
+++ b/include/linux/netfilter_ipv6/ip6t_rt.h
@@ -6,13 +6,13 @@
6#define IP6T_RT_HOPS 16 6#define IP6T_RT_HOPS 16
7 7
8struct ip6t_rt { 8struct ip6t_rt {
9 u_int32_t rt_type; /* Routing Type */ 9 __u32 rt_type; /* Routing Type */
10 u_int32_t segsleft[2]; /* Segments Left */ 10 __u32 segsleft[2]; /* Segments Left */
11 u_int32_t hdrlen; /* Header Length */ 11 __u32 hdrlen; /* Header Length */
12 u_int8_t flags; /* */ 12 __u8 flags; /* */
13 u_int8_t invflags; /* Inverse flags */ 13 __u8 invflags; /* Inverse flags */
14 struct in6_addr addrs[IP6T_RT_HOPS]; /* Hops */ 14 struct in6_addr addrs[IP6T_RT_HOPS]; /* Hops */
15 u_int8_t addrnr; /* Nr of Addresses */ 15 __u8 addrnr; /* Nr of Addresses */
16}; 16};
17 17
18#define IP6T_RT_TYP 0x01 18#define IP6T_RT_TYP 0x01
diff --git a/include/net/dst.h b/include/net/dst.h
index 93b0310317be..be5a0d4c491d 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -72,7 +72,7 @@ struct dst_entry {
72 72
73 u32 _metrics[RTAX_MAX]; 73 u32 _metrics[RTAX_MAX];
74 74
75#ifdef CONFIG_NET_CLS_ROUTE 75#ifdef CONFIG_IP_ROUTE_CLASSID
76 __u32 tclassid; 76 __u32 tclassid;
77#else 77#else
78 __u32 __pad2; 78 __u32 __pad2;
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 07bdb5e9e8ac..65d1fcdbc63b 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -55,7 +55,7 @@ struct fib_nh {
55 int nh_weight; 55 int nh_weight;
56 int nh_power; 56 int nh_power;
57#endif 57#endif
58#ifdef CONFIG_NET_CLS_ROUTE 58#ifdef CONFIG_IP_ROUTE_CLASSID
59 __u32 nh_tclassid; 59 __u32 nh_tclassid;
60#endif 60#endif
61 int nh_oif; 61 int nh_oif;
@@ -201,7 +201,7 @@ static inline int fib_lookup(struct net *net, const struct flowi *flp,
201extern int __net_init fib4_rules_init(struct net *net); 201extern int __net_init fib4_rules_init(struct net *net);
202extern void __net_exit fib4_rules_exit(struct net *net); 202extern void __net_exit fib4_rules_exit(struct net *net);
203 203
204#ifdef CONFIG_NET_CLS_ROUTE 204#ifdef CONFIG_IP_ROUTE_CLASSID
205extern u32 fib_rules_tclass(struct fib_result *res); 205extern u32 fib_rules_tclass(struct fib_result *res);
206#endif 206#endif
207 207
@@ -235,7 +235,7 @@ extern struct fib_table *fib_hash_table(u32 id);
235 235
236static inline void fib_combine_itag(u32 *itag, struct fib_result *res) 236static inline void fib_combine_itag(u32 *itag, struct fib_result *res)
237{ 237{
238#ifdef CONFIG_NET_CLS_ROUTE 238#ifdef CONFIG_IP_ROUTE_CLASSID
239#ifdef CONFIG_IP_MULTIPLE_TABLES 239#ifdef CONFIG_IP_MULTIPLE_TABLES
240 u32 rtag; 240 u32 rtag;
241#endif 241#endif
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index b7bbd6c28cfa..b23bea62f708 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -28,6 +28,80 @@
28#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 28#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
29#include <net/netfilter/nf_conntrack.h> 29#include <net/netfilter/nf_conntrack.h>
30#endif 30#endif
31#include <net/net_namespace.h> /* Netw namespace */
32
33/*
34 * Generic access of ipvs struct
35 */
36static inline struct netns_ipvs *net_ipvs(struct net* net)
37{
38 return net->ipvs;
39}
40/*
41 * Get net ptr from skb in traffic cases
42 * use skb_sknet when call is from userland (ioctl or netlink)
43 */
44static inline struct net *skb_net(const struct sk_buff *skb)
45{
46#ifdef CONFIG_NET_NS
47#ifdef CONFIG_IP_VS_DEBUG
48 /*
49 * This is used for debug only.
50 * Start with the most likely hit
51 * End with BUG
52 */
53 if (likely(skb->dev && skb->dev->nd_net))
54 return dev_net(skb->dev);
55 if (skb_dst(skb)->dev)
56 return dev_net(skb_dst(skb)->dev);
57 WARN(skb->sk, "Maybe skb_sknet should be used in %s() at line:%d\n",
58 __func__, __LINE__);
59 if (likely(skb->sk && skb->sk->sk_net))
60 return sock_net(skb->sk);
61 pr_err("There is no net ptr to find in the skb in %s() line:%d\n",
62 __func__, __LINE__);
63 BUG();
64#else
65 return dev_net(skb->dev ? : skb_dst(skb)->dev);
66#endif
67#else
68 return &init_net;
69#endif
70}
71
72static inline struct net *skb_sknet(const struct sk_buff *skb)
73{
74#ifdef CONFIG_NET_NS
75#ifdef CONFIG_IP_VS_DEBUG
76 /* Start with the most likely hit */
77 if (likely(skb->sk && skb->sk->sk_net))
78 return sock_net(skb->sk);
79 WARN(skb->dev, "Maybe skb_net should be used instead in %s() line:%d\n",
80 __func__, __LINE__);
81 if (likely(skb->dev && skb->dev->nd_net))
82 return dev_net(skb->dev);
83 pr_err("There is no net ptr to find in the skb in %s() line:%d\n",
84 __func__, __LINE__);
85 BUG();
86#else
87 return sock_net(skb->sk);
88#endif
89#else
90 return &init_net;
91#endif
92}
93/*
94 * This one needed for single_open_net since net is stored directly in
95 * private not as a struct i.e. seq_file_net cant be used.
96 */
97static inline struct net *seq_file_single_net(struct seq_file *seq)
98{
99#ifdef CONFIG_NET_NS
100 return (struct net *)seq->private;
101#else
102 return &init_net;
103#endif
104}
31 105
32/* Connections' size value needed by ip_vs_ctl.c */ 106/* Connections' size value needed by ip_vs_ctl.c */
33extern int ip_vs_conn_tab_size; 107extern int ip_vs_conn_tab_size;
@@ -258,6 +332,23 @@ struct ip_vs_seq {
258 before last resized pkt */ 332 before last resized pkt */
259}; 333};
260 334
335/*
336 * counters per cpu
337 */
338struct ip_vs_counters {
339 __u32 conns; /* connections scheduled */
340 __u32 inpkts; /* incoming packets */
341 __u32 outpkts; /* outgoing packets */
342 __u64 inbytes; /* incoming bytes */
343 __u64 outbytes; /* outgoing bytes */
344};
345/*
346 * Stats per cpu
347 */
348struct ip_vs_cpu_stats {
349 struct ip_vs_counters ustats;
350 struct u64_stats_sync syncp;
351};
261 352
262/* 353/*
263 * IPVS statistics objects 354 * IPVS statistics objects
@@ -279,17 +370,34 @@ struct ip_vs_estimator {
279}; 370};
280 371
281struct ip_vs_stats { 372struct ip_vs_stats {
282 struct ip_vs_stats_user ustats; /* statistics */ 373 struct ip_vs_stats_user ustats; /* statistics */
283 struct ip_vs_estimator est; /* estimator */ 374 struct ip_vs_estimator est; /* estimator */
284 375 struct ip_vs_cpu_stats *cpustats; /* per cpu counters */
285 spinlock_t lock; /* spin lock */ 376 spinlock_t lock; /* spin lock */
286}; 377};
287 378
379/*
380 * Helper Macros for per cpu
381 * ipvs->tot_stats->ustats.count
382 */
383#define IPVS_STAT_INC(ipvs, count) \
384 __this_cpu_inc((ipvs)->ustats->count)
385
386#define IPVS_STAT_ADD(ipvs, count, value) \
387 do {\
388 write_seqcount_begin(per_cpu_ptr((ipvs)->ustats_seq, \
389 raw_smp_processor_id())); \
390 __this_cpu_add((ipvs)->ustats->count, value); \
391 write_seqcount_end(per_cpu_ptr((ipvs)->ustats_seq, \
392 raw_smp_processor_id())); \
393 } while (0)
394
288struct dst_entry; 395struct dst_entry;
289struct iphdr; 396struct iphdr;
290struct ip_vs_conn; 397struct ip_vs_conn;
291struct ip_vs_app; 398struct ip_vs_app;
292struct sk_buff; 399struct sk_buff;
400struct ip_vs_proto_data;
293 401
294struct ip_vs_protocol { 402struct ip_vs_protocol {
295 struct ip_vs_protocol *next; 403 struct ip_vs_protocol *next;
@@ -297,21 +405,22 @@ struct ip_vs_protocol {
297 u16 protocol; 405 u16 protocol;
298 u16 num_states; 406 u16 num_states;
299 int dont_defrag; 407 int dont_defrag;
300 atomic_t appcnt; /* counter of proto app incs */
301 int *timeout_table; /* protocol timeout table */
302 408
303 void (*init)(struct ip_vs_protocol *pp); 409 void (*init)(struct ip_vs_protocol *pp);
304 410
305 void (*exit)(struct ip_vs_protocol *pp); 411 void (*exit)(struct ip_vs_protocol *pp);
306 412
413 void (*init_netns)(struct net *net, struct ip_vs_proto_data *pd);
414
415 void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd);
416
307 int (*conn_schedule)(int af, struct sk_buff *skb, 417 int (*conn_schedule)(int af, struct sk_buff *skb,
308 struct ip_vs_protocol *pp, 418 struct ip_vs_proto_data *pd,
309 int *verdict, struct ip_vs_conn **cpp); 419 int *verdict, struct ip_vs_conn **cpp);
310 420
311 struct ip_vs_conn * 421 struct ip_vs_conn *
312 (*conn_in_get)(int af, 422 (*conn_in_get)(int af,
313 const struct sk_buff *skb, 423 const struct sk_buff *skb,
314 struct ip_vs_protocol *pp,
315 const struct ip_vs_iphdr *iph, 424 const struct ip_vs_iphdr *iph,
316 unsigned int proto_off, 425 unsigned int proto_off,
317 int inverse); 426 int inverse);
@@ -319,7 +428,6 @@ struct ip_vs_protocol {
319 struct ip_vs_conn * 428 struct ip_vs_conn *
320 (*conn_out_get)(int af, 429 (*conn_out_get)(int af,
321 const struct sk_buff *skb, 430 const struct sk_buff *skb,
322 struct ip_vs_protocol *pp,
323 const struct ip_vs_iphdr *iph, 431 const struct ip_vs_iphdr *iph,
324 unsigned int proto_off, 432 unsigned int proto_off,
325 int inverse); 433 int inverse);
@@ -337,11 +445,11 @@ struct ip_vs_protocol {
337 445
338 int (*state_transition)(struct ip_vs_conn *cp, int direction, 446 int (*state_transition)(struct ip_vs_conn *cp, int direction,
339 const struct sk_buff *skb, 447 const struct sk_buff *skb,
340 struct ip_vs_protocol *pp); 448 struct ip_vs_proto_data *pd);
341 449
342 int (*register_app)(struct ip_vs_app *inc); 450 int (*register_app)(struct net *net, struct ip_vs_app *inc);
343 451
344 void (*unregister_app)(struct ip_vs_app *inc); 452 void (*unregister_app)(struct net *net, struct ip_vs_app *inc);
345 453
346 int (*app_conn_bind)(struct ip_vs_conn *cp); 454 int (*app_conn_bind)(struct ip_vs_conn *cp);
347 455
@@ -350,14 +458,26 @@ struct ip_vs_protocol {
350 int offset, 458 int offset,
351 const char *msg); 459 const char *msg);
352 460
353 void (*timeout_change)(struct ip_vs_protocol *pp, int flags); 461 void (*timeout_change)(struct ip_vs_proto_data *pd, int flags);
462};
354 463
355 int (*set_state_timeout)(struct ip_vs_protocol *pp, char *sname, int to); 464/*
465 * protocol data per netns
466 */
467struct ip_vs_proto_data {
468 struct ip_vs_proto_data *next;
469 struct ip_vs_protocol *pp;
470 int *timeout_table; /* protocol timeout table */
471 atomic_t appcnt; /* counter of proto app incs. */
472 struct tcp_states_t *tcp_state_table;
356}; 473};
357 474
358extern struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto); 475extern struct ip_vs_protocol *ip_vs_proto_get(unsigned short proto);
476extern struct ip_vs_proto_data *ip_vs_proto_data_get(struct net *net,
477 unsigned short proto);
359 478
360struct ip_vs_conn_param { 479struct ip_vs_conn_param {
480 struct net *net;
361 const union nf_inet_addr *caddr; 481 const union nf_inet_addr *caddr;
362 const union nf_inet_addr *vaddr; 482 const union nf_inet_addr *vaddr;
363 __be16 cport; 483 __be16 cport;
@@ -375,16 +495,19 @@ struct ip_vs_conn_param {
375 */ 495 */
376struct ip_vs_conn { 496struct ip_vs_conn {
377 struct list_head c_list; /* hashed list heads */ 497 struct list_head c_list; /* hashed list heads */
378 498#ifdef CONFIG_NET_NS
499 struct net *net; /* Name space */
500#endif
379 /* Protocol, addresses and port numbers */ 501 /* Protocol, addresses and port numbers */
380 u16 af; /* address family */ 502 u16 af; /* address family */
381 union nf_inet_addr caddr; /* client address */ 503 __be16 cport;
382 union nf_inet_addr vaddr; /* virtual address */ 504 __be16 vport;
383 union nf_inet_addr daddr; /* destination address */ 505 __be16 dport;
384 volatile __u32 flags; /* status flags */ 506 __u32 fwmark; /* Fire wall mark from skb */
385 __be16 cport; 507 union nf_inet_addr caddr; /* client address */
386 __be16 vport; 508 union nf_inet_addr vaddr; /* virtual address */
387 __be16 dport; 509 union nf_inet_addr daddr; /* destination address */
510 volatile __u32 flags; /* status flags */
388 __u16 protocol; /* Which protocol (TCP/UDP) */ 511 __u16 protocol; /* Which protocol (TCP/UDP) */
389 512
390 /* counter and timer */ 513 /* counter and timer */
@@ -422,10 +545,38 @@ struct ip_vs_conn {
422 struct ip_vs_seq in_seq; /* incoming seq. struct */ 545 struct ip_vs_seq in_seq; /* incoming seq. struct */
423 struct ip_vs_seq out_seq; /* outgoing seq. struct */ 546 struct ip_vs_seq out_seq; /* outgoing seq. struct */
424 547
548 const struct ip_vs_pe *pe;
425 char *pe_data; 549 char *pe_data;
426 __u8 pe_data_len; 550 __u8 pe_data_len;
427}; 551};
428 552
553/*
554 * To save some memory in conn table when name space is disabled.
555 */
556static inline struct net *ip_vs_conn_net(const struct ip_vs_conn *cp)
557{
558#ifdef CONFIG_NET_NS
559 return cp->net;
560#else
561 return &init_net;
562#endif
563}
564static inline void ip_vs_conn_net_set(struct ip_vs_conn *cp, struct net *net)
565{
566#ifdef CONFIG_NET_NS
567 cp->net = net;
568#endif
569}
570
571static inline int ip_vs_conn_net_eq(const struct ip_vs_conn *cp,
572 struct net *net)
573{
574#ifdef CONFIG_NET_NS
575 return cp->net == net;
576#else
577 return 1;
578#endif
579}
429 580
430/* 581/*
431 * Extended internal versions of struct ip_vs_service_user and 582 * Extended internal versions of struct ip_vs_service_user and
@@ -485,6 +636,7 @@ struct ip_vs_service {
485 unsigned flags; /* service status flags */ 636 unsigned flags; /* service status flags */
486 unsigned timeout; /* persistent timeout in ticks */ 637 unsigned timeout; /* persistent timeout in ticks */
487 __be32 netmask; /* grouping granularity */ 638 __be32 netmask; /* grouping granularity */
639 struct net *net;
488 640
489 struct list_head destinations; /* real server d-linked list */ 641 struct list_head destinations; /* real server d-linked list */
490 __u32 num_dests; /* number of servers */ 642 __u32 num_dests; /* number of servers */
@@ -510,8 +662,8 @@ struct ip_vs_dest {
510 struct list_head d_list; /* for table with all the dests */ 662 struct list_head d_list; /* for table with all the dests */
511 663
512 u16 af; /* address family */ 664 u16 af; /* address family */
513 union nf_inet_addr addr; /* IP address of the server */
514 __be16 port; /* port number of the server */ 665 __be16 port; /* port number of the server */
666 union nf_inet_addr addr; /* IP address of the server */
515 volatile unsigned flags; /* dest status flags */ 667 volatile unsigned flags; /* dest status flags */
516 atomic_t conn_flags; /* flags to copy to conn */ 668 atomic_t conn_flags; /* flags to copy to conn */
517 atomic_t weight; /* server weight */ 669 atomic_t weight; /* server weight */
@@ -538,8 +690,8 @@ struct ip_vs_dest {
538 /* for virtual service */ 690 /* for virtual service */
539 struct ip_vs_service *svc; /* service it belongs to */ 691 struct ip_vs_service *svc; /* service it belongs to */
540 __u16 protocol; /* which protocol (TCP/UDP) */ 692 __u16 protocol; /* which protocol (TCP/UDP) */
541 union nf_inet_addr vaddr; /* virtual IP address */
542 __be16 vport; /* virtual port number */ 693 __be16 vport; /* virtual port number */
694 union nf_inet_addr vaddr; /* virtual IP address */
543 __u32 vfwmark; /* firewall mark of service */ 695 __u32 vfwmark; /* firewall mark of service */
544}; 696};
545 697
@@ -674,13 +826,14 @@ enum {
674 IP_VS_DIR_LAST, 826 IP_VS_DIR_LAST,
675}; 827};
676 828
677static inline void ip_vs_conn_fill_param(int af, int protocol, 829static inline void ip_vs_conn_fill_param(struct net *net, int af, int protocol,
678 const union nf_inet_addr *caddr, 830 const union nf_inet_addr *caddr,
679 __be16 cport, 831 __be16 cport,
680 const union nf_inet_addr *vaddr, 832 const union nf_inet_addr *vaddr,
681 __be16 vport, 833 __be16 vport,
682 struct ip_vs_conn_param *p) 834 struct ip_vs_conn_param *p)
683{ 835{
836 p->net = net;
684 p->af = af; 837 p->af = af;
685 p->protocol = protocol; 838 p->protocol = protocol;
686 p->caddr = caddr; 839 p->caddr = caddr;
@@ -695,7 +848,6 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p);
695struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p); 848struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p);
696 849
697struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, 850struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
698 struct ip_vs_protocol *pp,
699 const struct ip_vs_iphdr *iph, 851 const struct ip_vs_iphdr *iph,
700 unsigned int proto_off, 852 unsigned int proto_off,
701 int inverse); 853 int inverse);
@@ -703,7 +855,6 @@ struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
703struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p); 855struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p);
704 856
705struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, 857struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
706 struct ip_vs_protocol *pp,
707 const struct ip_vs_iphdr *iph, 858 const struct ip_vs_iphdr *iph,
708 unsigned int proto_off, 859 unsigned int proto_off,
709 int inverse); 860 int inverse);
@@ -719,14 +870,14 @@ extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
719struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, 870struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p,
720 const union nf_inet_addr *daddr, 871 const union nf_inet_addr *daddr,
721 __be16 dport, unsigned flags, 872 __be16 dport, unsigned flags,
722 struct ip_vs_dest *dest); 873 struct ip_vs_dest *dest, __u32 fwmark);
723extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp); 874extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
724 875
725extern const char * ip_vs_state_name(__u16 proto, int state); 876extern const char * ip_vs_state_name(__u16 proto, int state);
726 877
727extern void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp); 878extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp);
728extern int ip_vs_check_template(struct ip_vs_conn *ct); 879extern int ip_vs_check_template(struct ip_vs_conn *ct);
729extern void ip_vs_random_dropentry(void); 880extern void ip_vs_random_dropentry(struct net *net);
730extern int ip_vs_conn_init(void); 881extern int ip_vs_conn_init(void);
731extern void ip_vs_conn_cleanup(void); 882extern void ip_vs_conn_cleanup(void);
732 883
@@ -796,12 +947,12 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
796 * (from ip_vs_app.c) 947 * (from ip_vs_app.c)
797 */ 948 */
798#define IP_VS_APP_MAX_PORTS 8 949#define IP_VS_APP_MAX_PORTS 8
799extern int register_ip_vs_app(struct ip_vs_app *app); 950extern int register_ip_vs_app(struct net *net, struct ip_vs_app *app);
800extern void unregister_ip_vs_app(struct ip_vs_app *app); 951extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app);
801extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp); 952extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
802extern void ip_vs_unbind_app(struct ip_vs_conn *cp); 953extern void ip_vs_unbind_app(struct ip_vs_conn *cp);
803extern int 954extern int register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app,
804register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port); 955 __u16 proto, __u16 port);
805extern int ip_vs_app_inc_get(struct ip_vs_app *inc); 956extern int ip_vs_app_inc_get(struct ip_vs_app *inc);
806extern void ip_vs_app_inc_put(struct ip_vs_app *inc); 957extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
807 958
@@ -814,15 +965,27 @@ void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe);
814void ip_vs_unbind_pe(struct ip_vs_service *svc); 965void ip_vs_unbind_pe(struct ip_vs_service *svc);
815int register_ip_vs_pe(struct ip_vs_pe *pe); 966int register_ip_vs_pe(struct ip_vs_pe *pe);
816int unregister_ip_vs_pe(struct ip_vs_pe *pe); 967int unregister_ip_vs_pe(struct ip_vs_pe *pe);
817extern struct ip_vs_pe *ip_vs_pe_get(const char *name); 968struct ip_vs_pe *ip_vs_pe_getbyname(const char *name);
818extern void ip_vs_pe_put(struct ip_vs_pe *pe); 969struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name);
970
971static inline void ip_vs_pe_get(const struct ip_vs_pe *pe)
972{
973 if (pe && pe->module)
974 __module_get(pe->module);
975}
976
977static inline void ip_vs_pe_put(const struct ip_vs_pe *pe)
978{
979 if (pe && pe->module)
980 module_put(pe->module);
981}
819 982
820/* 983/*
821 * IPVS protocol functions (from ip_vs_proto.c) 984 * IPVS protocol functions (from ip_vs_proto.c)
822 */ 985 */
823extern int ip_vs_protocol_init(void); 986extern int ip_vs_protocol_init(void);
824extern void ip_vs_protocol_cleanup(void); 987extern void ip_vs_protocol_cleanup(void);
825extern void ip_vs_protocol_timeout_change(int flags); 988extern void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags);
826extern int *ip_vs_create_timeout_table(int *table, int size); 989extern int *ip_vs_create_timeout_table(int *table, int size);
827extern int 990extern int
828ip_vs_set_state_timeout(int *table, int num, const char *const *names, 991ip_vs_set_state_timeout(int *table, int num, const char *const *names,
@@ -852,26 +1015,21 @@ extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
852extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); 1015extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
853extern struct ip_vs_conn * 1016extern struct ip_vs_conn *
854ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, 1017ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
855 struct ip_vs_protocol *pp, int *ignored); 1018 struct ip_vs_proto_data *pd, int *ignored);
856extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, 1019extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
857 struct ip_vs_protocol *pp); 1020 struct ip_vs_proto_data *pd);
858 1021
859 1022
860/* 1023/*
861 * IPVS control data and functions (from ip_vs_ctl.c) 1024 * IPVS control data and functions (from ip_vs_ctl.c)
862 */ 1025 */
863extern int sysctl_ip_vs_cache_bypass;
864extern int sysctl_ip_vs_expire_nodest_conn;
865extern int sysctl_ip_vs_expire_quiescent_template;
866extern int sysctl_ip_vs_sync_threshold[2];
867extern int sysctl_ip_vs_nat_icmp_send;
868extern int sysctl_ip_vs_conntrack;
869extern int sysctl_ip_vs_snat_reroute;
870extern struct ip_vs_stats ip_vs_stats; 1026extern struct ip_vs_stats ip_vs_stats;
871extern const struct ctl_path net_vs_ctl_path[]; 1027extern const struct ctl_path net_vs_ctl_path[];
1028extern int sysctl_ip_vs_sync_ver;
872 1029
1030extern void ip_vs_sync_switch_mode(struct net *net, int mode);
873extern struct ip_vs_service * 1031extern struct ip_vs_service *
874ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, 1032ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
875 const union nf_inet_addr *vaddr, __be16 vport); 1033 const union nf_inet_addr *vaddr, __be16 vport);
876 1034
877static inline void ip_vs_service_put(struct ip_vs_service *svc) 1035static inline void ip_vs_service_put(struct ip_vs_service *svc)
@@ -880,7 +1038,7 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc)
880} 1038}
881 1039
882extern struct ip_vs_dest * 1040extern struct ip_vs_dest *
883ip_vs_lookup_real_service(int af, __u16 protocol, 1041ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
884 const union nf_inet_addr *daddr, __be16 dport); 1042 const union nf_inet_addr *daddr, __be16 dport);
885 1043
886extern int ip_vs_use_count_inc(void); 1044extern int ip_vs_use_count_inc(void);
@@ -888,8 +1046,9 @@ extern void ip_vs_use_count_dec(void);
888extern int ip_vs_control_init(void); 1046extern int ip_vs_control_init(void);
889extern void ip_vs_control_cleanup(void); 1047extern void ip_vs_control_cleanup(void);
890extern struct ip_vs_dest * 1048extern struct ip_vs_dest *
891ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport, 1049ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr,
892 const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol); 1050 __be16 dport, const union nf_inet_addr *vaddr, __be16 vport,
1051 __u16 protocol, __u32 fwmark);
893extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); 1052extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
894 1053
895 1054
@@ -897,14 +1056,12 @@ extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
897 * IPVS sync daemon data and function prototypes 1056 * IPVS sync daemon data and function prototypes
898 * (from ip_vs_sync.c) 1057 * (from ip_vs_sync.c)
899 */ 1058 */
900extern volatile int ip_vs_sync_state; 1059extern int start_sync_thread(struct net *net, int state, char *mcast_ifn,
901extern volatile int ip_vs_master_syncid; 1060 __u8 syncid);
902extern volatile int ip_vs_backup_syncid; 1061extern int stop_sync_thread(struct net *net, int state);
903extern char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; 1062extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp);
904extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; 1063extern int ip_vs_sync_init(void);
905extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid); 1064extern void ip_vs_sync_cleanup(void);
906extern int stop_sync_thread(int state);
907extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
908 1065
909 1066
910/* 1067/*
@@ -912,8 +1069,8 @@ extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
912 */ 1069 */
913extern int ip_vs_estimator_init(void); 1070extern int ip_vs_estimator_init(void);
914extern void ip_vs_estimator_cleanup(void); 1071extern void ip_vs_estimator_cleanup(void);
915extern void ip_vs_new_estimator(struct ip_vs_stats *stats); 1072extern void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats);
916extern void ip_vs_kill_estimator(struct ip_vs_stats *stats); 1073extern void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats);
917extern void ip_vs_zero_estimator(struct ip_vs_stats *stats); 1074extern void ip_vs_zero_estimator(struct ip_vs_stats *stats);
918 1075
919/* 1076/*
@@ -955,11 +1112,13 @@ extern int ip_vs_icmp_xmit_v6
955extern int ip_vs_drop_rate; 1112extern int ip_vs_drop_rate;
956extern int ip_vs_drop_counter; 1113extern int ip_vs_drop_counter;
957 1114
958static __inline__ int ip_vs_todrop(void) 1115static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
959{ 1116{
960 if (!ip_vs_drop_rate) return 0; 1117 if (!ipvs->drop_rate)
961 if (--ip_vs_drop_counter > 0) return 0; 1118 return 0;
962 ip_vs_drop_counter = ip_vs_drop_rate; 1119 if (--ipvs->drop_counter > 0)
1120 return 0;
1121 ipvs->drop_counter = ipvs->drop_rate;
963 return 1; 1122 return 1;
964} 1123}
965 1124
@@ -1047,9 +1206,9 @@ static inline void ip_vs_notrack(struct sk_buff *skb)
1047 * Netfilter connection tracking 1206 * Netfilter connection tracking
1048 * (from ip_vs_nfct.c) 1207 * (from ip_vs_nfct.c)
1049 */ 1208 */
1050static inline int ip_vs_conntrack_enabled(void) 1209static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
1051{ 1210{
1052 return sysctl_ip_vs_conntrack; 1211 return ipvs->sysctl_conntrack;
1053} 1212}
1054 1213
1055extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, 1214extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
@@ -1062,7 +1221,7 @@ extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp);
1062 1221
1063#else 1222#else
1064 1223
1065static inline int ip_vs_conntrack_enabled(void) 1224static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
1066{ 1225{
1067 return 0; 1226 return 0;
1068} 1227}
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 1bf812b21fb7..b3b4a34cb2cc 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -20,6 +20,7 @@
20#include <net/netns/conntrack.h> 20#include <net/netns/conntrack.h>
21#endif 21#endif
22#include <net/netns/xfrm.h> 22#include <net/netns/xfrm.h>
23#include <net/netns/ip_vs.h>
23 24
24struct proc_dir_entry; 25struct proc_dir_entry;
25struct net_device; 26struct net_device;
@@ -94,6 +95,7 @@ struct net {
94#ifdef CONFIG_XFRM 95#ifdef CONFIG_XFRM
95 struct netns_xfrm xfrm; 96 struct netns_xfrm xfrm;
96#endif 97#endif
98 struct netns_ipvs *ipvs;
97}; 99};
98 100
99 101
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index d85cff10e169..d0d13378991e 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -50,11 +50,24 @@ union nf_conntrack_expect_proto {
50/* per conntrack: application helper private data */ 50/* per conntrack: application helper private data */
51union nf_conntrack_help { 51union nf_conntrack_help {
52 /* insert conntrack helper private data (master) here */ 52 /* insert conntrack helper private data (master) here */
53#if defined(CONFIG_NF_CONNTRACK_FTP) || defined(CONFIG_NF_CONNTRACK_FTP_MODULE)
53 struct nf_ct_ftp_master ct_ftp_info; 54 struct nf_ct_ftp_master ct_ftp_info;
55#endif
56#if defined(CONFIG_NF_CONNTRACK_PPTP) || \
57 defined(CONFIG_NF_CONNTRACK_PPTP_MODULE)
54 struct nf_ct_pptp_master ct_pptp_info; 58 struct nf_ct_pptp_master ct_pptp_info;
59#endif
60#if defined(CONFIG_NF_CONNTRACK_H323) || \
61 defined(CONFIG_NF_CONNTRACK_H323_MODULE)
55 struct nf_ct_h323_master ct_h323_info; 62 struct nf_ct_h323_master ct_h323_info;
63#endif
64#if defined(CONFIG_NF_CONNTRACK_SANE) || \
65 defined(CONFIG_NF_CONNTRACK_SANE_MODULE)
56 struct nf_ct_sane_master ct_sane_info; 66 struct nf_ct_sane_master ct_sane_info;
67#endif
68#if defined(CONFIG_NF_CONNTRACK_SIP) || defined(CONFIG_NF_CONNTRACK_SIP_MODULE)
57 struct nf_ct_sip_master ct_sip_info; 69 struct nf_ct_sip_master ct_sip_info;
70#endif
58}; 71};
59 72
60#include <linux/types.h> 73#include <linux/types.h>
@@ -116,14 +129,14 @@ struct nf_conn {
116 u_int32_t secmark; 129 u_int32_t secmark;
117#endif 130#endif
118 131
119 /* Storage reserved for other modules: */
120 union nf_conntrack_proto proto;
121
122 /* Extensions */ 132 /* Extensions */
123 struct nf_ct_ext *ext; 133 struct nf_ct_ext *ext;
124#ifdef CONFIG_NET_NS 134#ifdef CONFIG_NET_NS
125 struct net *ct_net; 135 struct net *ct_net;
126#endif 136#endif
137
138 /* Storage reserved for other modules, must be the last member */
139 union nf_conntrack_proto proto;
127}; 140};
128 141
129static inline struct nf_conn * 142static inline struct nf_conn *
@@ -189,9 +202,9 @@ extern void nf_ct_l3proto_module_put(unsigned short l3proto);
189 * Allocate a hashtable of hlist_head (if nulls == 0), 202 * Allocate a hashtable of hlist_head (if nulls == 0),
190 * or hlist_nulls_head (if nulls == 1) 203 * or hlist_nulls_head (if nulls == 1)
191 */ 204 */
192extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls); 205extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls);
193 206
194extern void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size); 207extern void nf_ct_free_hashtable(void *hash, unsigned int size);
195 208
196extern struct nf_conntrack_tuple_hash * 209extern struct nf_conntrack_tuple_hash *
197__nf_conntrack_find(struct net *net, u16 zone, 210__nf_conntrack_find(struct net *net, u16 zone,
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 96ba5f7dcab6..8fdb04b8cce0 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -23,12 +23,17 @@ struct nf_conntrack_ecache {
23static inline struct nf_conntrack_ecache * 23static inline struct nf_conntrack_ecache *
24nf_ct_ecache_find(const struct nf_conn *ct) 24nf_ct_ecache_find(const struct nf_conn *ct)
25{ 25{
26#ifdef CONFIG_NF_CONNTRACK_EVENTS
26 return nf_ct_ext_find(ct, NF_CT_EXT_ECACHE); 27 return nf_ct_ext_find(ct, NF_CT_EXT_ECACHE);
28#else
29 return NULL;
30#endif
27} 31}
28 32
29static inline struct nf_conntrack_ecache * 33static inline struct nf_conntrack_ecache *
30nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) 34nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
31{ 35{
36#ifdef CONFIG_NF_CONNTRACK_EVENTS
32 struct net *net = nf_ct_net(ct); 37 struct net *net = nf_ct_net(ct);
33 struct nf_conntrack_ecache *e; 38 struct nf_conntrack_ecache *e;
34 39
@@ -45,6 +50,9 @@ nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
45 e->expmask = expmask; 50 e->expmask = expmask;
46 } 51 }
47 return e; 52 return e;
53#else
54 return NULL;
55#endif
48}; 56};
49 57
50#ifdef CONFIG_NF_CONNTRACK_EVENTS 58#ifdef CONFIG_NF_CONNTRACK_EVENTS
@@ -59,7 +67,7 @@ struct nf_ct_event_notifier {
59 int (*fcn)(unsigned int events, struct nf_ct_event *item); 67 int (*fcn)(unsigned int events, struct nf_ct_event *item);
60}; 68};
61 69
62extern struct nf_ct_event_notifier *nf_conntrack_event_cb; 70extern struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
63extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb); 71extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb);
64extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb); 72extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb);
65 73
@@ -159,7 +167,7 @@ struct nf_exp_event_notifier {
159 int (*fcn)(unsigned int events, struct nf_exp_event *item); 167 int (*fcn)(unsigned int events, struct nf_exp_event *item);
160}; 168};
161 169
162extern struct nf_exp_event_notifier *nf_expect_event_cb; 170extern struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
163extern int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *nb); 171extern int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *nb);
164extern void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *nb); 172extern void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *nb);
165 173
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index 0772d296dfdb..2dcf31703acb 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -7,10 +7,19 @@
7 7
8enum nf_ct_ext_id { 8enum nf_ct_ext_id {
9 NF_CT_EXT_HELPER, 9 NF_CT_EXT_HELPER,
10#if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE)
10 NF_CT_EXT_NAT, 11 NF_CT_EXT_NAT,
12#endif
11 NF_CT_EXT_ACCT, 13 NF_CT_EXT_ACCT,
14#ifdef CONFIG_NF_CONNTRACK_EVENTS
12 NF_CT_EXT_ECACHE, 15 NF_CT_EXT_ECACHE,
16#endif
17#ifdef CONFIG_NF_CONNTRACK_ZONES
13 NF_CT_EXT_ZONE, 18 NF_CT_EXT_ZONE,
19#endif
20#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
21 NF_CT_EXT_TSTAMP,
22#endif
14 NF_CT_EXT_NUM, 23 NF_CT_EXT_NUM,
15}; 24};
16 25
@@ -19,6 +28,7 @@ enum nf_ct_ext_id {
19#define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter 28#define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter
20#define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache 29#define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache
21#define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone 30#define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone
31#define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp
22 32
23/* Extensions: optional stuff which isn't permanently in struct. */ 33/* Extensions: optional stuff which isn't permanently in struct. */
24struct nf_ct_ext { 34struct nf_ct_ext {
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index 32c305dbdab6..f1c1311adc2c 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -63,4 +63,10 @@ static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct)
63extern int nf_conntrack_helper_init(void); 63extern int nf_conntrack_helper_init(void);
64extern void nf_conntrack_helper_fini(void); 64extern void nf_conntrack_helper_fini(void);
65 65
66extern int nf_conntrack_broadcast_help(struct sk_buff *skb,
67 unsigned int protoff,
68 struct nf_conn *ct,
69 enum ip_conntrack_info ctinfo,
70 unsigned int timeout);
71
66#endif /*_NF_CONNTRACK_HELPER_H*/ 72#endif /*_NF_CONNTRACK_HELPER_H*/
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index a7547611e8f1..e8010f445ae1 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -73,7 +73,7 @@ struct nf_conntrack_l3proto {
73 struct module *me; 73 struct module *me;
74}; 74};
75 75
76extern struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX]; 76extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX];
77 77
78/* Protocol registration. */ 78/* Protocol registration. */
79extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto); 79extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto);
diff --git a/include/net/netfilter/nf_conntrack_timestamp.h b/include/net/netfilter/nf_conntrack_timestamp.h
new file mode 100644
index 000000000000..f17dcb664e29
--- /dev/null
+++ b/include/net/netfilter/nf_conntrack_timestamp.h
@@ -0,0 +1,53 @@
1#ifndef _NF_CONNTRACK_TSTAMP_H
2#define _NF_CONNTRACK_TSTAMP_H
3
4#include <net/net_namespace.h>
5#include <linux/netfilter/nf_conntrack_common.h>
6#include <linux/netfilter/nf_conntrack_tuple_common.h>
7#include <net/netfilter/nf_conntrack.h>
8#include <net/netfilter/nf_conntrack_extend.h>
9
10struct nf_conn_tstamp {
11 u_int64_t start;
12 u_int64_t stop;
13};
14
15static inline
16struct nf_conn_tstamp *nf_conn_tstamp_find(const struct nf_conn *ct)
17{
18#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
19 return nf_ct_ext_find(ct, NF_CT_EXT_TSTAMP);
20#else
21 return NULL;
22#endif
23}
24
25static inline
26struct nf_conn_tstamp *nf_ct_tstamp_ext_add(struct nf_conn *ct, gfp_t gfp)
27{
28#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
29 struct net *net = nf_ct_net(ct);
30
31 if (!net->ct.sysctl_tstamp)
32 return NULL;
33
34 return nf_ct_ext_add(ct, NF_CT_EXT_TSTAMP, gfp);
35#else
36 return NULL;
37#endif
38};
39
40static inline bool nf_ct_tstamp_enabled(struct net *net)
41{
42 return net->ct.sysctl_tstamp != 0;
43}
44
45static inline void nf_ct_set_tstamp(struct net *net, bool enable)
46{
47 net->ct.sysctl_tstamp = enable;
48}
49
50extern int nf_conntrack_tstamp_init(struct net *net);
51extern void nf_conntrack_tstamp_fini(struct net *net);
52
53#endif /* _NF_CONNTRACK_TSTAMP_H */
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index f5f09f032a90..aff80b190c12 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -56,7 +56,9 @@ struct nf_nat_multi_range_compat {
56/* per conntrack: nat application helper private data */ 56/* per conntrack: nat application helper private data */
57union nf_conntrack_nat_help { 57union nf_conntrack_nat_help {
58 /* insert nat helper private data here */ 58 /* insert nat helper private data here */
59#if defined(CONFIG_NF_NAT_PPTP) || defined(CONFIG_NF_NAT_PPTP_MODULE)
59 struct nf_nat_pptp nat_pptp_info; 60 struct nf_nat_pptp nat_pptp_info;
61#endif
60}; 62};
61 63
62struct nf_conn; 64struct nf_conn;
@@ -84,7 +86,11 @@ extern int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
84 86
85static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct) 87static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct)
86{ 88{
89#if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE)
87 return nf_ct_ext_find(ct, NF_CT_EXT_NAT); 90 return nf_ct_ext_find(ct, NF_CT_EXT_NAT);
91#else
92 return NULL;
93#endif
88} 94}
89 95
90#else /* !__KERNEL__: iptables wants this to compile. */ 96#else /* !__KERNEL__: iptables wants this to compile. */
diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h
index 33602ab66190..3dc7b98effeb 100644
--- a/include/net/netfilter/nf_nat_core.h
+++ b/include/net/netfilter/nf_nat_core.h
@@ -21,9 +21,9 @@ static inline int nf_nat_initialized(struct nf_conn *ct,
21 enum nf_nat_manip_type manip) 21 enum nf_nat_manip_type manip)
22{ 22{
23 if (manip == IP_NAT_MANIP_SRC) 23 if (manip == IP_NAT_MANIP_SRC)
24 return test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); 24 return ct->status & IPS_SRC_NAT_DONE;
25 else 25 else
26 return test_bit(IPS_DST_NAT_DONE_BIT, &ct->status); 26 return ct->status & IPS_DST_NAT_DONE;
27} 27}
28 28
29struct nlattr; 29struct nlattr;
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index d4958d4c6574..341eb089349e 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -21,15 +21,15 @@ struct netns_ct {
21 int sysctl_events; 21 int sysctl_events;
22 unsigned int sysctl_events_retry_timeout; 22 unsigned int sysctl_events_retry_timeout;
23 int sysctl_acct; 23 int sysctl_acct;
24 int sysctl_tstamp;
24 int sysctl_checksum; 25 int sysctl_checksum;
25 unsigned int sysctl_log_invalid; /* Log invalid packets */ 26 unsigned int sysctl_log_invalid; /* Log invalid packets */
26#ifdef CONFIG_SYSCTL 27#ifdef CONFIG_SYSCTL
27 struct ctl_table_header *sysctl_header; 28 struct ctl_table_header *sysctl_header;
28 struct ctl_table_header *acct_sysctl_header; 29 struct ctl_table_header *acct_sysctl_header;
30 struct ctl_table_header *tstamp_sysctl_header;
29 struct ctl_table_header *event_sysctl_header; 31 struct ctl_table_header *event_sysctl_header;
30#endif 32#endif
31 int hash_vmalloc;
32 int expect_vmalloc;
33 char *slabname; 33 char *slabname;
34}; 34};
35#endif 35#endif
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
new file mode 100644
index 000000000000..259ebac904bf
--- /dev/null
+++ b/include/net/netns/ip_vs.h
@@ -0,0 +1,143 @@
1/*
2 * IP Virtual Server
3 * Data structure for network namspace
4 *
5 */
6
7#ifndef IP_VS_H_
8#define IP_VS_H_
9
10#include <linux/list.h>
11#include <linux/mutex.h>
12#include <linux/list_nulls.h>
13#include <linux/ip_vs.h>
14#include <asm/atomic.h>
15#include <linux/in.h>
16
17struct ip_vs_stats;
18struct ip_vs_sync_buff;
19struct ctl_table_header;
20
21struct netns_ipvs {
22 int gen; /* Generation */
23 /*
24 * Hash table: for real service lookups
25 */
26 #define IP_VS_RTAB_BITS 4
27 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
28 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
29
30 struct list_head rs_table[IP_VS_RTAB_SIZE];
31 /* ip_vs_app */
32 struct list_head app_list;
33 struct mutex app_mutex;
34 struct lock_class_key app_key; /* mutex debuging */
35
36 /* ip_vs_proto */
37 #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */
38 struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
39 /* ip_vs_proto_tcp */
40#ifdef CONFIG_IP_VS_PROTO_TCP
41 #define TCP_APP_TAB_BITS 4
42 #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
43 #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
44 struct list_head tcp_apps[TCP_APP_TAB_SIZE];
45 spinlock_t tcp_app_lock;
46#endif
47 /* ip_vs_proto_udp */
48#ifdef CONFIG_IP_VS_PROTO_UDP
49 #define UDP_APP_TAB_BITS 4
50 #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
51 #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
52 struct list_head udp_apps[UDP_APP_TAB_SIZE];
53 spinlock_t udp_app_lock;
54#endif
55 /* ip_vs_proto_sctp */
56#ifdef CONFIG_IP_VS_PROTO_SCTP
57 #define SCTP_APP_TAB_BITS 4
58 #define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS)
59 #define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1)
60 /* Hash table for SCTP application incarnations */
61 struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
62 spinlock_t sctp_app_lock;
63#endif
64 /* ip_vs_conn */
65 atomic_t conn_count; /* connection counter */
66
67 /* ip_vs_ctl */
68 struct ip_vs_stats *tot_stats; /* Statistics & est. */
69 struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */
70 seqcount_t *ustats_seq; /* u64 read retry */
71
72 int num_services; /* no of virtual services */
73 /* 1/rate drop and drop-entry variables */
74 struct delayed_work defense_work; /* Work handler */
75 int drop_rate;
76 int drop_counter;
77 atomic_t dropentry;
78 /* locks in ctl.c */
79 spinlock_t dropentry_lock; /* drop entry handling */
80 spinlock_t droppacket_lock; /* drop packet handling */
81 spinlock_t securetcp_lock; /* state and timeout tables */
82 rwlock_t rs_lock; /* real services table */
83 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
84 struct lock_class_key ctl_key; /* ctl_mutex debuging */
85 /* Trash for destinations */
86 struct list_head dest_trash;
87 /* Service counters */
88 atomic_t ftpsvc_counter;
89 atomic_t nullsvc_counter;
90
91 /* sys-ctl struct */
92 struct ctl_table_header *sysctl_hdr;
93 struct ctl_table *sysctl_tbl;
94 /* sysctl variables */
95 int sysctl_amemthresh;
96 int sysctl_am_droprate;
97 int sysctl_drop_entry;
98 int sysctl_drop_packet;
99 int sysctl_secure_tcp;
100#ifdef CONFIG_IP_VS_NFCT
101 int sysctl_conntrack;
102#endif
103 int sysctl_snat_reroute;
104 int sysctl_sync_ver;
105 int sysctl_cache_bypass;
106 int sysctl_expire_nodest_conn;
107 int sysctl_expire_quiescent_template;
108 int sysctl_sync_threshold[2];
109 int sysctl_nat_icmp_send;
110
111 /* ip_vs_lblc */
112 int sysctl_lblc_expiration;
113 struct ctl_table_header *lblc_ctl_header;
114 struct ctl_table *lblc_ctl_table;
115 /* ip_vs_lblcr */
116 int sysctl_lblcr_expiration;
117 struct ctl_table_header *lblcr_ctl_header;
118 struct ctl_table *lblcr_ctl_table;
119 /* ip_vs_est */
120 struct list_head est_list; /* estimator list */
121 spinlock_t est_lock;
122 struct timer_list est_timer; /* Estimation timer */
123 /* ip_vs_sync */
124 struct list_head sync_queue;
125 spinlock_t sync_lock;
126 struct ip_vs_sync_buff *sync_buff;
127 spinlock_t sync_buff_lock;
128 struct sockaddr_in sync_mcast_addr;
129 struct task_struct *master_thread;
130 struct task_struct *backup_thread;
131 int send_mesg_maxlen;
132 int recv_mesg_maxlen;
133 volatile int sync_state;
134 volatile int master_syncid;
135 volatile int backup_syncid;
136 /* multicast interface name */
137 char master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
138 char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
139 /* net name space ptr */
140 struct net *net; /* Needed by timer routines */
141};
142
143#endif /* IP_VS_H_ */
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index d68c3f121774..e2e2ef57eca2 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -43,7 +43,6 @@ struct netns_ipv4 {
43 struct xt_table *nat_table; 43 struct xt_table *nat_table;
44 struct hlist_head *nat_bysource; 44 struct hlist_head *nat_bysource;
45 unsigned int nat_htable_size; 45 unsigned int nat_htable_size;
46 int nat_vmalloced;
47#endif 46#endif
48 47
49 int sysctl_icmp_echo_ignore_all; 48 int sysctl_icmp_echo_ignore_all;
diff --git a/kernel/audit.c b/kernel/audit.c
index e4956244ae50..162e88e33bc9 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -74,6 +74,8 @@ static int audit_initialized;
74int audit_enabled; 74int audit_enabled;
75int audit_ever_enabled; 75int audit_ever_enabled;
76 76
77EXPORT_SYMBOL_GPL(audit_enabled);
78
77/* Default state when kernel boots without any parameters. */ 79/* Default state when kernel boots without any parameters. */
78static int audit_default; 80static int audit_default;
79 81
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 50a46afc2bcc..2ed0056a39a8 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -22,9 +22,15 @@
22#include <linux/netfilter_bridge/ebtables.h> 22#include <linux/netfilter_bridge/ebtables.h>
23#include <linux/netfilter_bridge/ebt_ip6.h> 23#include <linux/netfilter_bridge/ebt_ip6.h>
24 24
25struct tcpudphdr { 25union pkthdr {
26 __be16 src; 26 struct {
27 __be16 dst; 27 __be16 src;
28 __be16 dst;
29 } tcpudphdr;
30 struct {
31 u8 type;
32 u8 code;
33 } icmphdr;
28}; 34};
29 35
30static bool 36static bool
@@ -33,8 +39,8 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
33 const struct ebt_ip6_info *info = par->matchinfo; 39 const struct ebt_ip6_info *info = par->matchinfo;
34 const struct ipv6hdr *ih6; 40 const struct ipv6hdr *ih6;
35 struct ipv6hdr _ip6h; 41 struct ipv6hdr _ip6h;
36 const struct tcpudphdr *pptr; 42 const union pkthdr *pptr;
37 struct tcpudphdr _ports; 43 union pkthdr _pkthdr;
38 44
39 ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h); 45 ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h);
40 if (ih6 == NULL) 46 if (ih6 == NULL)
@@ -56,26 +62,34 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
56 return false; 62 return false;
57 if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO)) 63 if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO))
58 return false; 64 return false;
59 if (!(info->bitmask & EBT_IP6_DPORT) && 65 if (!(info->bitmask & ( EBT_IP6_DPORT |
60 !(info->bitmask & EBT_IP6_SPORT)) 66 EBT_IP6_SPORT | EBT_IP6_ICMP6)))
61 return true; 67 return true;
62 pptr = skb_header_pointer(skb, offset_ph, sizeof(_ports), 68
63 &_ports); 69 /* min icmpv6 headersize is 4, so sizeof(_pkthdr) is ok. */
70 pptr = skb_header_pointer(skb, offset_ph, sizeof(_pkthdr),
71 &_pkthdr);
64 if (pptr == NULL) 72 if (pptr == NULL)
65 return false; 73 return false;
66 if (info->bitmask & EBT_IP6_DPORT) { 74 if (info->bitmask & EBT_IP6_DPORT) {
67 u32 dst = ntohs(pptr->dst); 75 u16 dst = ntohs(pptr->tcpudphdr.dst);
68 if (FWINV(dst < info->dport[0] || 76 if (FWINV(dst < info->dport[0] ||
69 dst > info->dport[1], EBT_IP6_DPORT)) 77 dst > info->dport[1], EBT_IP6_DPORT))
70 return false; 78 return false;
71 } 79 }
72 if (info->bitmask & EBT_IP6_SPORT) { 80 if (info->bitmask & EBT_IP6_SPORT) {
73 u32 src = ntohs(pptr->src); 81 u16 src = ntohs(pptr->tcpudphdr.src);
74 if (FWINV(src < info->sport[0] || 82 if (FWINV(src < info->sport[0] ||
75 src > info->sport[1], EBT_IP6_SPORT)) 83 src > info->sport[1], EBT_IP6_SPORT))
76 return false; 84 return false;
77 } 85 }
78 return true; 86 if ((info->bitmask & EBT_IP6_ICMP6) &&
87 FWINV(pptr->icmphdr.type < info->icmpv6_type[0] ||
88 pptr->icmphdr.type > info->icmpv6_type[1] ||
89 pptr->icmphdr.code < info->icmpv6_code[0] ||
90 pptr->icmphdr.code > info->icmpv6_code[1],
91 EBT_IP6_ICMP6))
92 return false;
79 } 93 }
80 return true; 94 return true;
81} 95}
@@ -103,6 +117,14 @@ static int ebt_ip6_mt_check(const struct xt_mtchk_param *par)
103 return -EINVAL; 117 return -EINVAL;
104 if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1]) 118 if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1])
105 return -EINVAL; 119 return -EINVAL;
120 if (info->bitmask & EBT_IP6_ICMP6) {
121 if ((info->invflags & EBT_IP6_PROTO) ||
122 info->protocol != IPPROTO_ICMPV6)
123 return -EINVAL;
124 if (info->icmpv6_type[0] > info->icmpv6_type[1] ||
125 info->icmpv6_code[0] > info->icmpv6_code[1])
126 return -EINVAL;
127 }
106 return 0; 128 return 0;
107} 129}
108 130
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 16df0532d4b9..5f1825df9dca 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1764,6 +1764,7 @@ static int compat_table_info(const struct ebt_table_info *info,
1764 1764
1765 newinfo->entries_size = size; 1765 newinfo->entries_size = size;
1766 1766
1767 xt_compat_init_offsets(AF_INET, info->nentries);
1767 return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, 1768 return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info,
1768 entries, newinfo); 1769 entries, newinfo);
1769} 1770}
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index a5a1050595d1..8949a05ac307 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -140,6 +140,9 @@ config IP_ROUTE_VERBOSE
140 handled by the klogd daemon which is responsible for kernel messages 140 handled by the klogd daemon which is responsible for kernel messages
141 ("man klogd"). 141 ("man klogd").
142 142
143config IP_ROUTE_CLASSID
144 bool
145
143config IP_PNP 146config IP_PNP
144 bool "IP: kernel level autoconfiguration" 147 bool "IP: kernel level autoconfiguration"
145 help 148 help
@@ -657,4 +660,3 @@ config TCP_MD5SIG
657 on the Internet. 660 on the Internet.
658 661
659 If unsure, say N. 662 If unsure, say N.
660
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 7981a24f5c7b..9cefe72029cf 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -41,12 +41,12 @@ struct fib4_rule {
41 __be32 srcmask; 41 __be32 srcmask;
42 __be32 dst; 42 __be32 dst;
43 __be32 dstmask; 43 __be32 dstmask;
44#ifdef CONFIG_NET_CLS_ROUTE 44#ifdef CONFIG_IP_ROUTE_CLASSID
45 u32 tclassid; 45 u32 tclassid;
46#endif 46#endif
47}; 47};
48 48
49#ifdef CONFIG_NET_CLS_ROUTE 49#ifdef CONFIG_IP_ROUTE_CLASSID
50u32 fib_rules_tclass(struct fib_result *res) 50u32 fib_rules_tclass(struct fib_result *res)
51{ 51{
52 return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; 52 return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0;
@@ -165,7 +165,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
165 if (frh->dst_len) 165 if (frh->dst_len)
166 rule4->dst = nla_get_be32(tb[FRA_DST]); 166 rule4->dst = nla_get_be32(tb[FRA_DST]);
167 167
168#ifdef CONFIG_NET_CLS_ROUTE 168#ifdef CONFIG_IP_ROUTE_CLASSID
169 if (tb[FRA_FLOW]) 169 if (tb[FRA_FLOW])
170 rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); 170 rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
171#endif 171#endif
@@ -195,7 +195,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
195 if (frh->tos && (rule4->tos != frh->tos)) 195 if (frh->tos && (rule4->tos != frh->tos))
196 return 0; 196 return 0;
197 197
198#ifdef CONFIG_NET_CLS_ROUTE 198#ifdef CONFIG_IP_ROUTE_CLASSID
199 if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) 199 if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
200 return 0; 200 return 0;
201#endif 201#endif
@@ -224,7 +224,7 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
224 if (rule4->src_len) 224 if (rule4->src_len)
225 NLA_PUT_BE32(skb, FRA_SRC, rule4->src); 225 NLA_PUT_BE32(skb, FRA_SRC, rule4->src);
226 226
227#ifdef CONFIG_NET_CLS_ROUTE 227#ifdef CONFIG_IP_ROUTE_CLASSID
228 if (rule4->tclassid) 228 if (rule4->tclassid)
229 NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); 229 NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid);
230#endif 230#endif
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 12d3dc3df1b7..9aff11d7278f 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -200,7 +200,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
200#ifdef CONFIG_IP_ROUTE_MULTIPATH 200#ifdef CONFIG_IP_ROUTE_MULTIPATH
201 nh->nh_weight != onh->nh_weight || 201 nh->nh_weight != onh->nh_weight ||
202#endif 202#endif
203#ifdef CONFIG_NET_CLS_ROUTE 203#ifdef CONFIG_IP_ROUTE_CLASSID
204 nh->nh_tclassid != onh->nh_tclassid || 204 nh->nh_tclassid != onh->nh_tclassid ||
205#endif 205#endif
206 ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD)) 206 ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD))
@@ -422,7 +422,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
422 422
423 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 423 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
424 nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0; 424 nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0;
425#ifdef CONFIG_NET_CLS_ROUTE 425#ifdef CONFIG_IP_ROUTE_CLASSID
426 nla = nla_find(attrs, attrlen, RTA_FLOW); 426 nla = nla_find(attrs, attrlen, RTA_FLOW);
427 nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; 427 nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
428#endif 428#endif
@@ -476,7 +476,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
476 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 476 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
477 if (nla && nla_get_be32(nla) != nh->nh_gw) 477 if (nla && nla_get_be32(nla) != nh->nh_gw)
478 return 1; 478 return 1;
479#ifdef CONFIG_NET_CLS_ROUTE 479#ifdef CONFIG_IP_ROUTE_CLASSID
480 nla = nla_find(attrs, attrlen, RTA_FLOW); 480 nla = nla_find(attrs, attrlen, RTA_FLOW);
481 if (nla && nla_get_u32(nla) != nh->nh_tclassid) 481 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
482 return 1; 482 return 1;
@@ -779,7 +779,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
779 goto err_inval; 779 goto err_inval;
780 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) 780 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
781 goto err_inval; 781 goto err_inval;
782#ifdef CONFIG_NET_CLS_ROUTE 782#ifdef CONFIG_IP_ROUTE_CLASSID
783 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) 783 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
784 goto err_inval; 784 goto err_inval;
785#endif 785#endif
@@ -792,7 +792,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
792 nh->nh_oif = cfg->fc_oif; 792 nh->nh_oif = cfg->fc_oif;
793 nh->nh_gw = cfg->fc_gw; 793 nh->nh_gw = cfg->fc_gw;
794 nh->nh_flags = cfg->fc_flags; 794 nh->nh_flags = cfg->fc_flags;
795#ifdef CONFIG_NET_CLS_ROUTE 795#ifdef CONFIG_IP_ROUTE_CLASSID
796 nh->nh_tclassid = cfg->fc_flow; 796 nh->nh_tclassid = cfg->fc_flow;
797#endif 797#endif
798#ifdef CONFIG_IP_ROUTE_MULTIPATH 798#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -1002,7 +1002,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
1002 1002
1003 if (fi->fib_nh->nh_oif) 1003 if (fi->fib_nh->nh_oif)
1004 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); 1004 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
1005#ifdef CONFIG_NET_CLS_ROUTE 1005#ifdef CONFIG_IP_ROUTE_CLASSID
1006 if (fi->fib_nh[0].nh_tclassid) 1006 if (fi->fib_nh[0].nh_tclassid)
1007 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); 1007 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
1008#endif 1008#endif
@@ -1027,7 +1027,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
1027 1027
1028 if (nh->nh_gw) 1028 if (nh->nh_gw)
1029 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); 1029 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
1030#ifdef CONFIG_NET_CLS_ROUTE 1030#ifdef CONFIG_IP_ROUTE_CLASSID
1031 if (nh->nh_tclassid) 1031 if (nh->nh_tclassid)
1032 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); 1032 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
1033#endif 1033#endif
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d859bcc26cb7..d7b2b0987a3b 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -340,7 +340,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
340 } 340 }
341 } 341 }
342 342
343#ifdef CONFIG_NET_CLS_ROUTE 343#ifdef CONFIG_IP_ROUTE_CLASSID
344 if (unlikely(skb_dst(skb)->tclassid)) { 344 if (unlikely(skb_dst(skb)->tclassid)) {
345 struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); 345 struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
346 u32 idx = skb_dst(skb)->tclassid; 346 u32 idx = skb_dst(skb)->tclassid;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index babd1a2bae5f..f926a310075d 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -206,8 +206,9 @@ config IP_NF_TARGET_REDIRECT
206 206
207config NF_NAT_SNMP_BASIC 207config NF_NAT_SNMP_BASIC
208 tristate "Basic SNMP-ALG support" 208 tristate "Basic SNMP-ALG support"
209 depends on NF_NAT 209 depends on NF_CONNTRACK_SNMP && NF_NAT
210 depends on NETFILTER_ADVANCED 210 depends on NETFILTER_ADVANCED
211 default NF_NAT && NF_CONNTRACK_SNMP
211 ---help--- 212 ---help---
212 213
213 This module implements an Application Layer Gateway (ALG) for 214 This module implements an Application Layer Gateway (ALG) for
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e855fffaed95..e95054c690c6 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -866,6 +866,7 @@ static int compat_table_info(const struct xt_table_info *info,
866 memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); 866 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
867 newinfo->initial_entries = 0; 867 newinfo->initial_entries = 0;
868 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 868 loc_cpu_entry = info->entries[raw_smp_processor_id()];
869 xt_compat_init_offsets(NFPROTO_ARP, info->number);
869 xt_entry_foreach(iter, loc_cpu_entry, info->size) { 870 xt_entry_foreach(iter, loc_cpu_entry, info->size) {
870 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); 871 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
871 if (ret != 0) 872 if (ret != 0)
@@ -1333,6 +1334,7 @@ static int translate_compat_table(const char *name,
1333 duprintf("translate_compat_table: size %u\n", info->size); 1334 duprintf("translate_compat_table: size %u\n", info->size);
1334 j = 0; 1335 j = 0;
1335 xt_compat_lock(NFPROTO_ARP); 1336 xt_compat_lock(NFPROTO_ARP);
1337 xt_compat_init_offsets(NFPROTO_ARP, number);
1336 /* Walk through entries, checking offsets. */ 1338 /* Walk through entries, checking offsets. */
1337 xt_entry_foreach(iter0, entry0, total_size) { 1339 xt_entry_foreach(iter0, entry0, total_size) {
1338 ret = check_compat_entry_size_and_hooks(iter0, info, &size, 1340 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 652efea013dc..ef7d7b9680ea 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1063,6 +1063,7 @@ static int compat_table_info(const struct xt_table_info *info,
1063 memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); 1063 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1064 newinfo->initial_entries = 0; 1064 newinfo->initial_entries = 0;
1065 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 1065 loc_cpu_entry = info->entries[raw_smp_processor_id()];
1066 xt_compat_init_offsets(AF_INET, info->number);
1066 xt_entry_foreach(iter, loc_cpu_entry, info->size) { 1067 xt_entry_foreach(iter, loc_cpu_entry, info->size) {
1067 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); 1068 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
1068 if (ret != 0) 1069 if (ret != 0)
@@ -1664,6 +1665,7 @@ translate_compat_table(struct net *net,
1664 duprintf("translate_compat_table: size %u\n", info->size); 1665 duprintf("translate_compat_table: size %u\n", info->size);
1665 j = 0; 1666 j = 0;
1666 xt_compat_lock(AF_INET); 1667 xt_compat_lock(AF_INET);
1668 xt_compat_init_offsets(AF_INET, number);
1667 /* Walk through entries, checking offsets. */ 1669 /* Walk through entries, checking offsets. */
1668 xt_entry_foreach(iter0, entry0, total_size) { 1670 xt_entry_foreach(iter0, entry0, total_size) {
1669 ret = check_compat_entry_size_and_hooks(iter0, info, &size, 1671 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 1e26a4897655..403ca57f6011 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -300,13 +300,8 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
300 * that the ->target() function isn't called after ->destroy() */ 300 * that the ->target() function isn't called after ->destroy() */
301 301
302 ct = nf_ct_get(skb, &ctinfo); 302 ct = nf_ct_get(skb, &ctinfo);
303 if (ct == NULL) { 303 if (ct == NULL)
304 pr_info("no conntrack!\n");
305 /* FIXME: need to drop invalid ones, since replies
306 * to outgoing connections of other nodes will be
307 * marked as INVALID */
308 return NF_DROP; 304 return NF_DROP;
309 }
310 305
311 /* special case: ICMP error handling. conntrack distinguishes between 306 /* special case: ICMP error handling. conntrack distinguishes between
312 * error messages (RELATED) and information requests (see below) */ 307 * error messages (RELATED) and information requests (see below) */
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 72ffc8fda2e9..d76d6c9ed946 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -442,8 +442,7 @@ ipt_log_packet(u_int8_t pf,
442 } 442 }
443#endif 443#endif
444 444
445 /* MAC logging for input path only. */ 445 if (in != NULL)
446 if (in && !out)
447 dump_mac_header(m, loginfo, skb); 446 dump_mac_header(m, loginfo, skb);
448 447
449 dump_packet(m, loginfo, skb, 0); 448 dump_packet(m, loginfo, skb, 0);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 63f60fc5d26a..5585980fce2e 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -20,6 +20,7 @@
20#include <net/netfilter/nf_conntrack_l4proto.h> 20#include <net/netfilter/nf_conntrack_l4proto.h>
21#include <net/netfilter/nf_conntrack_expect.h> 21#include <net/netfilter/nf_conntrack_expect.h>
22#include <net/netfilter/nf_conntrack_acct.h> 22#include <net/netfilter/nf_conntrack_acct.h>
23#include <linux/rculist_nulls.h>
23 24
24struct ct_iter_state { 25struct ct_iter_state {
25 struct seq_net_private p; 26 struct seq_net_private p;
@@ -35,7 +36,8 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
35 for (st->bucket = 0; 36 for (st->bucket = 0;
36 st->bucket < net->ct.htable_size; 37 st->bucket < net->ct.htable_size;
37 st->bucket++) { 38 st->bucket++) {
38 n = rcu_dereference(net->ct.hash[st->bucket].first); 39 n = rcu_dereference(
40 hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
39 if (!is_a_nulls(n)) 41 if (!is_a_nulls(n))
40 return n; 42 return n;
41 } 43 }
@@ -48,13 +50,14 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
48 struct net *net = seq_file_net(seq); 50 struct net *net = seq_file_net(seq);
49 struct ct_iter_state *st = seq->private; 51 struct ct_iter_state *st = seq->private;
50 52
51 head = rcu_dereference(head->next); 53 head = rcu_dereference(hlist_nulls_next_rcu(head));
52 while (is_a_nulls(head)) { 54 while (is_a_nulls(head)) {
53 if (likely(get_nulls_value(head) == st->bucket)) { 55 if (likely(get_nulls_value(head) == st->bucket)) {
54 if (++st->bucket >= net->ct.htable_size) 56 if (++st->bucket >= net->ct.htable_size)
55 return NULL; 57 return NULL;
56 } 58 }
57 head = rcu_dereference(net->ct.hash[st->bucket].first); 59 head = rcu_dereference(
60 hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
58 } 61 }
59 return head; 62 return head;
60} 63}
@@ -217,7 +220,8 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
217 struct hlist_node *n; 220 struct hlist_node *n;
218 221
219 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { 222 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
220 n = rcu_dereference(net->ct.expect_hash[st->bucket].first); 223 n = rcu_dereference(
224 hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
221 if (n) 225 if (n)
222 return n; 226 return n;
223 } 227 }
@@ -230,11 +234,12 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
230 struct net *net = seq_file_net(seq); 234 struct net *net = seq_file_net(seq);
231 struct ct_expect_iter_state *st = seq->private; 235 struct ct_expect_iter_state *st = seq->private;
232 236
233 head = rcu_dereference(head->next); 237 head = rcu_dereference(hlist_next_rcu(head));
234 while (head == NULL) { 238 while (head == NULL) {
235 if (++st->bucket >= nf_ct_expect_hsize) 239 if (++st->bucket >= nf_ct_expect_hsize)
236 return NULL; 240 return NULL;
237 head = rcu_dereference(net->ct.expect_hash[st->bucket].first); 241 head = rcu_dereference(
242 hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
238 } 243 }
239 return head; 244 return head;
240} 245}
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index 0f23b3f06df0..703f366fd235 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -44,13 +44,13 @@ static unsigned int help(struct sk_buff *skb,
44 44
45 /* Try to get same port: if not, try to change it. */ 45 /* Try to get same port: if not, try to change it. */
46 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { 46 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
47 int ret; 47 int res;
48 48
49 exp->tuple.dst.u.tcp.port = htons(port); 49 exp->tuple.dst.u.tcp.port = htons(port);
50 ret = nf_ct_expect_related(exp); 50 res = nf_ct_expect_related(exp);
51 if (ret == 0) 51 if (res == 0)
52 break; 52 break;
53 else if (ret != -EBUSY) { 53 else if (res != -EBUSY) {
54 port = 0; 54 port = 0;
55 break; 55 break;
56 } 56 }
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index c04787ce1a71..3002c0492fb0 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -323,9 +323,9 @@ nf_nat_setup_info(struct nf_conn *ct,
323 323
324 /* It's done. */ 324 /* It's done. */
325 if (maniptype == IP_NAT_MANIP_DST) 325 if (maniptype == IP_NAT_MANIP_DST)
326 set_bit(IPS_DST_NAT_DONE_BIT, &ct->status); 326 ct->status |= IPS_DST_NAT_DONE;
327 else 327 else
328 set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); 328 ct->status |= IPS_SRC_NAT_DONE;
329 329
330 return NF_ACCEPT; 330 return NF_ACCEPT;
331} 331}
@@ -502,7 +502,10 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
502 int ret = 0; 502 int ret = 0;
503 503
504 spin_lock_bh(&nf_nat_lock); 504 spin_lock_bh(&nf_nat_lock);
505 if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) { 505 if (rcu_dereference_protected(
506 nf_nat_protos[proto->protonum],
507 lockdep_is_held(&nf_nat_lock)
508 ) != &nf_nat_unknown_protocol) {
506 ret = -EBUSY; 509 ret = -EBUSY;
507 goto out; 510 goto out;
508 } 511 }
@@ -679,8 +682,7 @@ static int __net_init nf_nat_net_init(struct net *net)
679{ 682{
680 /* Leave them the same for the moment. */ 683 /* Leave them the same for the moment. */
681 net->ipv4.nat_htable_size = net->ct.htable_size; 684 net->ipv4.nat_htable_size = net->ct.htable_size;
682 net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 685 net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0);
683 &net->ipv4.nat_vmalloced, 0);
684 if (!net->ipv4.nat_bysource) 686 if (!net->ipv4.nat_bysource)
685 return -ENOMEM; 687 return -ENOMEM;
686 return 0; 688 return 0;
@@ -702,8 +704,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
702{ 704{
703 nf_ct_iterate_cleanup(net, &clean_nat, NULL); 705 nf_ct_iterate_cleanup(net, &clean_nat, NULL);
704 synchronize_rcu(); 706 synchronize_rcu();
705 nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, 707 nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size);
706 net->ipv4.nat_htable_size);
707} 708}
708 709
709static struct pernet_operations nf_nat_net_ops = { 710static struct pernet_operations nf_nat_net_ops = {
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index ee5f419d0a56..8812a02078ab 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -54,6 +54,7 @@
54#include <net/netfilter/nf_conntrack_expect.h> 54#include <net/netfilter/nf_conntrack_expect.h>
55#include <net/netfilter/nf_conntrack_helper.h> 55#include <net/netfilter/nf_conntrack_helper.h>
56#include <net/netfilter/nf_nat_helper.h> 56#include <net/netfilter/nf_nat_helper.h>
57#include <linux/netfilter/nf_conntrack_snmp.h>
57 58
58MODULE_LICENSE("GPL"); 59MODULE_LICENSE("GPL");
59MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); 60MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
@@ -1310,9 +1311,9 @@ static int __init nf_nat_snmp_basic_init(void)
1310{ 1311{
1311 int ret = 0; 1312 int ret = 0;
1312 1313
1313 ret = nf_conntrack_helper_register(&snmp_helper); 1314 BUG_ON(nf_nat_snmp_hook != NULL);
1314 if (ret < 0) 1315 rcu_assign_pointer(nf_nat_snmp_hook, help);
1315 return ret; 1316
1316 ret = nf_conntrack_helper_register(&snmp_trap_helper); 1317 ret = nf_conntrack_helper_register(&snmp_trap_helper);
1317 if (ret < 0) { 1318 if (ret < 0) {
1318 nf_conntrack_helper_unregister(&snmp_helper); 1319 nf_conntrack_helper_unregister(&snmp_helper);
@@ -1323,7 +1324,7 @@ static int __init nf_nat_snmp_basic_init(void)
1323 1324
1324static void __exit nf_nat_snmp_basic_fini(void) 1325static void __exit nf_nat_snmp_basic_fini(void)
1325{ 1326{
1326 nf_conntrack_helper_unregister(&snmp_helper); 1327 rcu_assign_pointer(nf_nat_snmp_hook, NULL);
1327 nf_conntrack_helper_unregister(&snmp_trap_helper); 1328 nf_conntrack_helper_unregister(&snmp_trap_helper);
1328} 1329}
1329 1330
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 351dc4e85242..3e5b7cc2db4f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -514,7 +514,7 @@ static const struct file_operations rt_cpu_seq_fops = {
514 .release = seq_release, 514 .release = seq_release,
515}; 515};
516 516
517#ifdef CONFIG_NET_CLS_ROUTE 517#ifdef CONFIG_IP_ROUTE_CLASSID
518static int rt_acct_proc_show(struct seq_file *m, void *v) 518static int rt_acct_proc_show(struct seq_file *m, void *v)
519{ 519{
520 struct ip_rt_acct *dst, *src; 520 struct ip_rt_acct *dst, *src;
@@ -567,14 +567,14 @@ static int __net_init ip_rt_do_proc_init(struct net *net)
567 if (!pde) 567 if (!pde)
568 goto err2; 568 goto err2;
569 569
570#ifdef CONFIG_NET_CLS_ROUTE 570#ifdef CONFIG_IP_ROUTE_CLASSID
571 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); 571 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
572 if (!pde) 572 if (!pde)
573 goto err3; 573 goto err3;
574#endif 574#endif
575 return 0; 575 return 0;
576 576
577#ifdef CONFIG_NET_CLS_ROUTE 577#ifdef CONFIG_IP_ROUTE_CLASSID
578err3: 578err3:
579 remove_proc_entry("rt_cache", net->proc_net_stat); 579 remove_proc_entry("rt_cache", net->proc_net_stat);
580#endif 580#endif
@@ -588,7 +588,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net)
588{ 588{
589 remove_proc_entry("rt_cache", net->proc_net_stat); 589 remove_proc_entry("rt_cache", net->proc_net_stat);
590 remove_proc_entry("rt_cache", net->proc_net); 590 remove_proc_entry("rt_cache", net->proc_net);
591#ifdef CONFIG_NET_CLS_ROUTE 591#ifdef CONFIG_IP_ROUTE_CLASSID
592 remove_proc_entry("rt_acct", net->proc_net); 592 remove_proc_entry("rt_acct", net->proc_net);
593#endif 593#endif
594} 594}
@@ -1775,7 +1775,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1775 memcpy(addr, &src, 4); 1775 memcpy(addr, &src, 4);
1776} 1776}
1777 1777
1778#ifdef CONFIG_NET_CLS_ROUTE 1778#ifdef CONFIG_IP_ROUTE_CLASSID
1779static void set_class_tag(struct rtable *rt, u32 tag) 1779static void set_class_tag(struct rtable *rt, u32 tag)
1780{ 1780{
1781 if (!(rt->dst.tclassid & 0xFFFF)) 1781 if (!(rt->dst.tclassid & 0xFFFF))
@@ -1825,7 +1825,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1825 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 1825 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
1826 rt->rt_gateway = FIB_RES_GW(*res); 1826 rt->rt_gateway = FIB_RES_GW(*res);
1827 dst_import_metrics(dst, fi->fib_metrics); 1827 dst_import_metrics(dst, fi->fib_metrics);
1828#ifdef CONFIG_NET_CLS_ROUTE 1828#ifdef CONFIG_IP_ROUTE_CLASSID
1829 dst->tclassid = FIB_RES_NH(*res).nh_tclassid; 1829 dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
1830#endif 1830#endif
1831 } 1831 }
@@ -1835,7 +1835,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1835 if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) 1835 if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40)
1836 dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); 1836 dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40);
1837 1837
1838#ifdef CONFIG_NET_CLS_ROUTE 1838#ifdef CONFIG_IP_ROUTE_CLASSID
1839#ifdef CONFIG_IP_MULTIPLE_TABLES 1839#ifdef CONFIG_IP_MULTIPLE_TABLES
1840 set_class_tag(rt, fib_rules_tclass(res)); 1840 set_class_tag(rt, fib_rules_tclass(res));
1841#endif 1841#endif
@@ -1891,7 +1891,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1891 rth->fl.mark = skb->mark; 1891 rth->fl.mark = skb->mark;
1892 rth->fl.fl4_src = saddr; 1892 rth->fl.fl4_src = saddr;
1893 rth->rt_src = saddr; 1893 rth->rt_src = saddr;
1894#ifdef CONFIG_NET_CLS_ROUTE 1894#ifdef CONFIG_IP_ROUTE_CLASSID
1895 rth->dst.tclassid = itag; 1895 rth->dst.tclassid = itag;
1896#endif 1896#endif
1897 rth->rt_iif = 1897 rth->rt_iif =
@@ -2208,7 +2208,7 @@ local_input:
2208 rth->fl.mark = skb->mark; 2208 rth->fl.mark = skb->mark;
2209 rth->fl.fl4_src = saddr; 2209 rth->fl.fl4_src = saddr;
2210 rth->rt_src = saddr; 2210 rth->rt_src = saddr;
2211#ifdef CONFIG_NET_CLS_ROUTE 2211#ifdef CONFIG_IP_ROUTE_CLASSID
2212 rth->dst.tclassid = itag; 2212 rth->dst.tclassid = itag;
2213#endif 2213#endif
2214 rth->rt_iif = 2214 rth->rt_iif =
@@ -2828,7 +2828,7 @@ static int rt_fill_info(struct net *net,
2828 } 2828 }
2829 if (rt->dst.dev) 2829 if (rt->dst.dev)
2830 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); 2830 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2831#ifdef CONFIG_NET_CLS_ROUTE 2831#ifdef CONFIG_IP_ROUTE_CLASSID
2832 if (rt->dst.tclassid) 2832 if (rt->dst.tclassid)
2833 NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); 2833 NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
2834#endif 2834#endif
@@ -3249,9 +3249,9 @@ static __net_initdata struct pernet_operations rt_genid_ops = {
3249}; 3249};
3250 3250
3251 3251
3252#ifdef CONFIG_NET_CLS_ROUTE 3252#ifdef CONFIG_IP_ROUTE_CLASSID
3253struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; 3253struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
3254#endif /* CONFIG_NET_CLS_ROUTE */ 3254#endif /* CONFIG_IP_ROUTE_CLASSID */
3255 3255
3256static __initdata unsigned long rhash_entries; 3256static __initdata unsigned long rhash_entries;
3257static int __init set_rhash_entries(char *str) 3257static int __init set_rhash_entries(char *str)
@@ -3267,7 +3267,7 @@ int __init ip_rt_init(void)
3267{ 3267{
3268 int rc = 0; 3268 int rc = 0;
3269 3269
3270#ifdef CONFIG_NET_CLS_ROUTE 3270#ifdef CONFIG_IP_ROUTE_CLASSID
3271 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); 3271 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
3272 if (!ip_rt_acct) 3272 if (!ip_rt_acct)
3273 panic("IP: failed to allocate ip_rt_acct\n"); 3273 panic("IP: failed to allocate ip_rt_acct\n");
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 7d227c644f72..47b7b8df7fac 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1076,6 +1076,7 @@ static int compat_table_info(const struct xt_table_info *info,
1076 memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); 1076 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1077 newinfo->initial_entries = 0; 1077 newinfo->initial_entries = 0;
1078 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 1078 loc_cpu_entry = info->entries[raw_smp_processor_id()];
1079 xt_compat_init_offsets(AF_INET6, info->number);
1079 xt_entry_foreach(iter, loc_cpu_entry, info->size) { 1080 xt_entry_foreach(iter, loc_cpu_entry, info->size) {
1080 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); 1081 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
1081 if (ret != 0) 1082 if (ret != 0)
@@ -1679,6 +1680,7 @@ translate_compat_table(struct net *net,
1679 duprintf("translate_compat_table: size %u\n", info->size); 1680 duprintf("translate_compat_table: size %u\n", info->size);
1680 j = 0; 1681 j = 0;
1681 xt_compat_lock(AF_INET6); 1682 xt_compat_lock(AF_INET6);
1683 xt_compat_init_offsets(AF_INET6, number);
1682 /* Walk through entries, checking offsets. */ 1684 /* Walk through entries, checking offsets. */
1683 xt_entry_foreach(iter0, entry0, total_size) { 1685 xt_entry_foreach(iter0, entry0, total_size) {
1684 ret = check_compat_entry_size_and_hooks(iter0, info, &size, 1686 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 09c88891a753..05027b753721 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -452,8 +452,7 @@ ip6t_log_packet(u_int8_t pf,
452 in ? in->name : "", 452 in ? in->name : "",
453 out ? out->name : ""); 453 out ? out->name : "");
454 454
455 /* MAC logging for input path only. */ 455 if (in != NULL)
456 if (in && !out)
457 dump_mac_header(m, loginfo, skb); 456 dump_mac_header(m, loginfo, skb);
458 457
459 dump_packet(m, loginfo, skb, skb_network_offset(skb), 1); 458 dump_packet(m, loginfo, skb, skb_network_offset(skb), 1);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 79d43aa8fa8d..66e003e1fcd5 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -73,7 +73,7 @@ static struct inet_frags nf_frags;
73static struct netns_frags nf_init_frags; 73static struct netns_frags nf_init_frags;
74 74
75#ifdef CONFIG_SYSCTL 75#ifdef CONFIG_SYSCTL
76struct ctl_table nf_ct_frag6_sysctl_table[] = { 76static struct ctl_table nf_ct_frag6_sysctl_table[] = {
77 { 77 {
78 .procname = "nf_conntrack_frag6_timeout", 78 .procname = "nf_conntrack_frag6_timeout",
79 .data = &nf_init_frags.timeout, 79 .data = &nf_init_frags.timeout,
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 1534f2b44caf..faf7412ea453 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -85,6 +85,17 @@ config NF_CONNTRACK_EVENTS
85 85
86 If unsure, say `N'. 86 If unsure, say `N'.
87 87
88config NF_CONNTRACK_TIMESTAMP
89 bool 'Connection tracking timestamping'
90 depends on NETFILTER_ADVANCED
91 help
92 This option enables support for connection tracking timestamping.
93 This allows you to store the flow start-time and to obtain
94 the flow-stop time (once it has been destroyed) via Connection
95 tracking events.
96
97 If unsure, say `N'.
98
88config NF_CT_PROTO_DCCP 99config NF_CT_PROTO_DCCP
89 tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)' 100 tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)'
90 depends on EXPERIMENTAL 101 depends on EXPERIMENTAL
@@ -185,9 +196,13 @@ config NF_CONNTRACK_IRC
185 196
186 To compile it as a module, choose M here. If unsure, say N. 197 To compile it as a module, choose M here. If unsure, say N.
187 198
199config NF_CONNTRACK_BROADCAST
200 tristate
201
188config NF_CONNTRACK_NETBIOS_NS 202config NF_CONNTRACK_NETBIOS_NS
189 tristate "NetBIOS name service protocol support" 203 tristate "NetBIOS name service protocol support"
190 depends on NETFILTER_ADVANCED 204 depends on NETFILTER_ADVANCED
205 select NF_CONNTRACK_BROADCAST
191 help 206 help
192 NetBIOS name service requests are sent as broadcast messages from an 207 NetBIOS name service requests are sent as broadcast messages from an
193 unprivileged port and responded to with unicast messages to the 208 unprivileged port and responded to with unicast messages to the
@@ -204,6 +219,21 @@ config NF_CONNTRACK_NETBIOS_NS
204 219
205 To compile it as a module, choose M here. If unsure, say N. 220 To compile it as a module, choose M here. If unsure, say N.
206 221
222config NF_CONNTRACK_SNMP
223 tristate "SNMP service protocol support"
224 depends on NETFILTER_ADVANCED
225 select NF_CONNTRACK_BROADCAST
226 help
227 SNMP service requests are sent as broadcast messages from an
228 unprivileged port and responded to with unicast messages to the
229 same port. This make them hard to firewall properly because connection
230 tracking doesn't deal with broadcasts. This helper tracks locally
231 originating SNMP service requests and the corresponding
232 responses. It relies on correct IP address configuration, specifically
233 netmask and broadcast address.
234
235 To compile it as a module, choose M here. If unsure, say N.
236
207config NF_CONNTRACK_PPTP 237config NF_CONNTRACK_PPTP
208 tristate "PPtP protocol support" 238 tristate "PPtP protocol support"
209 depends on NETFILTER_ADVANCED 239 depends on NETFILTER_ADVANCED
@@ -326,6 +356,16 @@ config NETFILTER_XT_CONNMARK
326 356
327comment "Xtables targets" 357comment "Xtables targets"
328 358
359config NETFILTER_XT_TARGET_AUDIT
360 tristate "AUDIT target support"
361 depends on AUDIT
362 depends on NETFILTER_ADVANCED
363 ---help---
364 This option adds a 'AUDIT' target, which can be used to create
365 audit records for packets dropped/accepted.
366
367 To compileit as a module, choose M here. If unsure, say N.
368
329config NETFILTER_XT_TARGET_CHECKSUM 369config NETFILTER_XT_TARGET_CHECKSUM
330 tristate "CHECKSUM target support" 370 tristate "CHECKSUM target support"
331 depends on IP_NF_MANGLE || IP6_NF_MANGLE 371 depends on IP_NF_MANGLE || IP6_NF_MANGLE
@@ -477,6 +517,7 @@ config NETFILTER_XT_TARGET_NFLOG
477config NETFILTER_XT_TARGET_NFQUEUE 517config NETFILTER_XT_TARGET_NFQUEUE
478 tristate '"NFQUEUE" target Support' 518 tristate '"NFQUEUE" target Support'
479 depends on NETFILTER_ADVANCED 519 depends on NETFILTER_ADVANCED
520 select NETFILTER_NETLINK_QUEUE
480 help 521 help
481 This target replaced the old obsolete QUEUE target. 522 This target replaced the old obsolete QUEUE target.
482 523
@@ -886,7 +927,7 @@ config NETFILTER_XT_MATCH_RATEEST
886config NETFILTER_XT_MATCH_REALM 927config NETFILTER_XT_MATCH_REALM
887 tristate '"realm" match support' 928 tristate '"realm" match support'
888 depends on NETFILTER_ADVANCED 929 depends on NETFILTER_ADVANCED
889 select NET_CLS_ROUTE 930 select IP_ROUTE_CLASSID
890 help 931 help
891 This option adds a `realm' match, which allows you to use the realm 932 This option adds a `realm' match, which allows you to use the realm
892 key from the routing subsystem inside iptables. 933 key from the routing subsystem inside iptables.
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 441050f31111..9ae6878a85b1 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,6 +1,7 @@
1netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o 1netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
2 2
3nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o 3nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o
4nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
4nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o 5nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
5 6
6obj-$(CONFIG_NETFILTER) = netfilter.o 7obj-$(CONFIG_NETFILTER) = netfilter.o
@@ -28,7 +29,9 @@ obj-$(CONFIG_NF_CONNTRACK_AMANDA) += nf_conntrack_amanda.o
28obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o 29obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o
29obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o 30obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o
30obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o 31obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o
32obj-$(CONFIG_NF_CONNTRACK_BROADCAST) += nf_conntrack_broadcast.o
31obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o 33obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o
34obj-$(CONFIG_NF_CONNTRACK_SNMP) += nf_conntrack_snmp.o
32obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o 35obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o
33obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o 36obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
34obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o 37obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
@@ -45,6 +48,7 @@ obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
45obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o 48obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
46 49
47# targets 50# targets
51obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o
48obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o 52obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o
49obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o 53obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
50obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o 54obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 32fcbe290c04..1e00bf7d27c5 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -175,13 +175,21 @@ next_hook:
175 ret = 1; 175 ret = 1;
176 } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { 176 } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
177 kfree_skb(skb); 177 kfree_skb(skb);
178 ret = -(verdict >> NF_VERDICT_BITS); 178 ret = NF_DROP_GETERR(verdict);
179 if (ret == 0) 179 if (ret == 0)
180 ret = -EPERM; 180 ret = -EPERM;
181 } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { 181 } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
182 if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn, 182 ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
183 verdict >> NF_VERDICT_BITS)) 183 verdict >> NF_VERDICT_QBITS);
184 goto next_hook; 184 if (ret < 0) {
185 if (ret == -ECANCELED)
186 goto next_hook;
187 if (ret == -ESRCH &&
188 (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
189 goto next_hook;
190 kfree_skb(skb);
191 }
192 ret = 0;
185 } 193 }
186 rcu_read_unlock(); 194 rcu_read_unlock();
187 return ret; 195 return ret;
@@ -214,7 +222,7 @@ EXPORT_SYMBOL(skb_make_writable);
214/* This does not belong here, but locally generated errors need it if connection 222/* This does not belong here, but locally generated errors need it if connection
215 tracking in use: without this, connection may not be in hash table, and hence 223 tracking in use: without this, connection may not be in hash table, and hence
216 manufactured ICMP or RST packets will not be associated with it. */ 224 manufactured ICMP or RST packets will not be associated with it. */
217void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *); 225void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly;
218EXPORT_SYMBOL(ip_ct_attach); 226EXPORT_SYMBOL(ip_ct_attach);
219 227
220void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) 228void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
@@ -231,7 +239,7 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
231} 239}
232EXPORT_SYMBOL(nf_ct_attach); 240EXPORT_SYMBOL(nf_ct_attach);
233 241
234void (*nf_ct_destroy)(struct nf_conntrack *); 242void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly;
235EXPORT_SYMBOL(nf_ct_destroy); 243EXPORT_SYMBOL(nf_ct_destroy);
236 244
237void nf_conntrack_destroy(struct nf_conntrack *nfct) 245void nf_conntrack_destroy(struct nf_conntrack *nfct)
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index a475edee0912..5c48ffb60c28 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -43,11 +43,6 @@ EXPORT_SYMBOL(register_ip_vs_app);
43EXPORT_SYMBOL(unregister_ip_vs_app); 43EXPORT_SYMBOL(unregister_ip_vs_app);
44EXPORT_SYMBOL(register_ip_vs_app_inc); 44EXPORT_SYMBOL(register_ip_vs_app_inc);
45 45
46/* ipvs application list head */
47static LIST_HEAD(ip_vs_app_list);
48static DEFINE_MUTEX(__ip_vs_app_mutex);
49
50
51/* 46/*
52 * Get an ip_vs_app object 47 * Get an ip_vs_app object
53 */ 48 */
@@ -67,7 +62,8 @@ static inline void ip_vs_app_put(struct ip_vs_app *app)
67 * Allocate/initialize app incarnation and register it in proto apps. 62 * Allocate/initialize app incarnation and register it in proto apps.
68 */ 63 */
69static int 64static int
70ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) 65ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
66 __u16 port)
71{ 67{
72 struct ip_vs_protocol *pp; 68 struct ip_vs_protocol *pp;
73 struct ip_vs_app *inc; 69 struct ip_vs_app *inc;
@@ -98,7 +94,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
98 } 94 }
99 } 95 }
100 96
101 ret = pp->register_app(inc); 97 ret = pp->register_app(net, inc);
102 if (ret) 98 if (ret)
103 goto out; 99 goto out;
104 100
@@ -119,7 +115,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
119 * Release app incarnation 115 * Release app incarnation
120 */ 116 */
121static void 117static void
122ip_vs_app_inc_release(struct ip_vs_app *inc) 118ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
123{ 119{
124 struct ip_vs_protocol *pp; 120 struct ip_vs_protocol *pp;
125 121
@@ -127,7 +123,7 @@ ip_vs_app_inc_release(struct ip_vs_app *inc)
127 return; 123 return;
128 124
129 if (pp->unregister_app) 125 if (pp->unregister_app)
130 pp->unregister_app(inc); 126 pp->unregister_app(net, inc);
131 127
132 IP_VS_DBG(9, "%s App %s:%u unregistered\n", 128 IP_VS_DBG(9, "%s App %s:%u unregistered\n",
133 pp->name, inc->name, ntohs(inc->port)); 129 pp->name, inc->name, ntohs(inc->port));
@@ -168,15 +164,17 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc)
168 * Register an application incarnation in protocol applications 164 * Register an application incarnation in protocol applications
169 */ 165 */
170int 166int
171register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port) 167register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
168 __u16 port)
172{ 169{
170 struct netns_ipvs *ipvs = net_ipvs(net);
173 int result; 171 int result;
174 172
175 mutex_lock(&__ip_vs_app_mutex); 173 mutex_lock(&ipvs->app_mutex);
176 174
177 result = ip_vs_app_inc_new(app, proto, port); 175 result = ip_vs_app_inc_new(net, app, proto, port);
178 176
179 mutex_unlock(&__ip_vs_app_mutex); 177 mutex_unlock(&ipvs->app_mutex);
180 178
181 return result; 179 return result;
182} 180}
@@ -185,16 +183,17 @@ register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
185/* 183/*
186 * ip_vs_app registration routine 184 * ip_vs_app registration routine
187 */ 185 */
188int register_ip_vs_app(struct ip_vs_app *app) 186int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
189{ 187{
188 struct netns_ipvs *ipvs = net_ipvs(net);
190 /* increase the module use count */ 189 /* increase the module use count */
191 ip_vs_use_count_inc(); 190 ip_vs_use_count_inc();
192 191
193 mutex_lock(&__ip_vs_app_mutex); 192 mutex_lock(&ipvs->app_mutex);
194 193
195 list_add(&app->a_list, &ip_vs_app_list); 194 list_add(&app->a_list, &ipvs->app_list);
196 195
197 mutex_unlock(&__ip_vs_app_mutex); 196 mutex_unlock(&ipvs->app_mutex);
198 197
199 return 0; 198 return 0;
200} 199}
@@ -204,19 +203,20 @@ int register_ip_vs_app(struct ip_vs_app *app)
204 * ip_vs_app unregistration routine 203 * ip_vs_app unregistration routine
205 * We are sure there are no app incarnations attached to services 204 * We are sure there are no app incarnations attached to services
206 */ 205 */
207void unregister_ip_vs_app(struct ip_vs_app *app) 206void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
208{ 207{
208 struct netns_ipvs *ipvs = net_ipvs(net);
209 struct ip_vs_app *inc, *nxt; 209 struct ip_vs_app *inc, *nxt;
210 210
211 mutex_lock(&__ip_vs_app_mutex); 211 mutex_lock(&ipvs->app_mutex);
212 212
213 list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) { 213 list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
214 ip_vs_app_inc_release(inc); 214 ip_vs_app_inc_release(net, inc);
215 } 215 }
216 216
217 list_del(&app->a_list); 217 list_del(&app->a_list);
218 218
219 mutex_unlock(&__ip_vs_app_mutex); 219 mutex_unlock(&ipvs->app_mutex);
220 220
221 /* decrease the module use count */ 221 /* decrease the module use count */
222 ip_vs_use_count_dec(); 222 ip_vs_use_count_dec();
@@ -226,7 +226,8 @@ void unregister_ip_vs_app(struct ip_vs_app *app)
226/* 226/*
227 * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) 227 * Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
228 */ 228 */
229int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp) 229int ip_vs_bind_app(struct ip_vs_conn *cp,
230 struct ip_vs_protocol *pp)
230{ 231{
231 return pp->app_conn_bind(cp); 232 return pp->app_conn_bind(cp);
232} 233}
@@ -481,11 +482,11 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
481 * /proc/net/ip_vs_app entry function 482 * /proc/net/ip_vs_app entry function
482 */ 483 */
483 484
484static struct ip_vs_app *ip_vs_app_idx(loff_t pos) 485static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)
485{ 486{
486 struct ip_vs_app *app, *inc; 487 struct ip_vs_app *app, *inc;
487 488
488 list_for_each_entry(app, &ip_vs_app_list, a_list) { 489 list_for_each_entry(app, &ipvs->app_list, a_list) {
489 list_for_each_entry(inc, &app->incs_list, a_list) { 490 list_for_each_entry(inc, &app->incs_list, a_list) {
490 if (pos-- == 0) 491 if (pos-- == 0)
491 return inc; 492 return inc;
@@ -497,19 +498,24 @@ static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
497 498
498static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) 499static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
499{ 500{
500 mutex_lock(&__ip_vs_app_mutex); 501 struct net *net = seq_file_net(seq);
502 struct netns_ipvs *ipvs = net_ipvs(net);
501 503
502 return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN; 504 mutex_lock(&ipvs->app_mutex);
505
506 return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
503} 507}
504 508
505static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) 509static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
506{ 510{
507 struct ip_vs_app *inc, *app; 511 struct ip_vs_app *inc, *app;
508 struct list_head *e; 512 struct list_head *e;
513 struct net *net = seq_file_net(seq);
514 struct netns_ipvs *ipvs = net_ipvs(net);
509 515
510 ++*pos; 516 ++*pos;
511 if (v == SEQ_START_TOKEN) 517 if (v == SEQ_START_TOKEN)
512 return ip_vs_app_idx(0); 518 return ip_vs_app_idx(ipvs, 0);
513 519
514 inc = v; 520 inc = v;
515 app = inc->app; 521 app = inc->app;
@@ -518,7 +524,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
518 return list_entry(e, struct ip_vs_app, a_list); 524 return list_entry(e, struct ip_vs_app, a_list);
519 525
520 /* go on to next application */ 526 /* go on to next application */
521 for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) { 527 for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
522 app = list_entry(e, struct ip_vs_app, a_list); 528 app = list_entry(e, struct ip_vs_app, a_list);
523 list_for_each_entry(inc, &app->incs_list, a_list) { 529 list_for_each_entry(inc, &app->incs_list, a_list) {
524 return inc; 530 return inc;
@@ -529,7 +535,9 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
529 535
530static void ip_vs_app_seq_stop(struct seq_file *seq, void *v) 536static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
531{ 537{
532 mutex_unlock(&__ip_vs_app_mutex); 538 struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq));
539
540 mutex_unlock(&ipvs->app_mutex);
533} 541}
534 542
535static int ip_vs_app_seq_show(struct seq_file *seq, void *v) 543static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
@@ -557,7 +565,8 @@ static const struct seq_operations ip_vs_app_seq_ops = {
557 565
558static int ip_vs_app_open(struct inode *inode, struct file *file) 566static int ip_vs_app_open(struct inode *inode, struct file *file)
559{ 567{
560 return seq_open(file, &ip_vs_app_seq_ops); 568 return seq_open_net(inode, file, &ip_vs_app_seq_ops,
569 sizeof(struct seq_net_private));
561} 570}
562 571
563static const struct file_operations ip_vs_app_fops = { 572static const struct file_operations ip_vs_app_fops = {
@@ -569,15 +578,36 @@ static const struct file_operations ip_vs_app_fops = {
569}; 578};
570#endif 579#endif
571 580
572int __init ip_vs_app_init(void) 581static int __net_init __ip_vs_app_init(struct net *net)
573{ 582{
574 /* we will replace it with proc_net_ipvs_create() soon */ 583 struct netns_ipvs *ipvs = net_ipvs(net);
575 proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops); 584
585 INIT_LIST_HEAD(&ipvs->app_list);
586 __mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key);
587 proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops);
576 return 0; 588 return 0;
577} 589}
578 590
591static void __net_exit __ip_vs_app_cleanup(struct net *net)
592{
593 proc_net_remove(net, "ip_vs_app");
594}
595
596static struct pernet_operations ip_vs_app_ops = {
597 .init = __ip_vs_app_init,
598 .exit = __ip_vs_app_cleanup,
599};
600
601int __init ip_vs_app_init(void)
602{
603 int rv;
604
605 rv = register_pernet_subsys(&ip_vs_app_ops);
606 return rv;
607}
608
579 609
580void ip_vs_app_cleanup(void) 610void ip_vs_app_cleanup(void)
581{ 611{
582 proc_net_remove(&init_net, "ip_vs_app"); 612 unregister_pernet_subsys(&ip_vs_app_ops);
583} 613}
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index e9adecdc8ca4..83233fe24a08 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -48,35 +48,32 @@
48/* 48/*
49 * Connection hash size. Default is what was selected at compile time. 49 * Connection hash size. Default is what was selected at compile time.
50*/ 50*/
51int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS; 51static int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
52module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444); 52module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444);
53MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size"); 53MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size");
54 54
55/* size and mask values */ 55/* size and mask values */
56int ip_vs_conn_tab_size; 56int ip_vs_conn_tab_size __read_mostly;
57int ip_vs_conn_tab_mask; 57static int ip_vs_conn_tab_mask __read_mostly;
58 58
59/* 59/*
60 * Connection hash table: for input and output packets lookups of IPVS 60 * Connection hash table: for input and output packets lookups of IPVS
61 */ 61 */
62static struct list_head *ip_vs_conn_tab; 62static struct list_head *ip_vs_conn_tab __read_mostly;
63 63
64/* SLAB cache for IPVS connections */ 64/* SLAB cache for IPVS connections */
65static struct kmem_cache *ip_vs_conn_cachep __read_mostly; 65static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
66 66
67/* counter for current IPVS connections */
68static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
69
70/* counter for no client port connections */ 67/* counter for no client port connections */
71static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0); 68static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
72 69
73/* random value for IPVS connection hash */ 70/* random value for IPVS connection hash */
74static unsigned int ip_vs_conn_rnd; 71static unsigned int ip_vs_conn_rnd __read_mostly;
75 72
76/* 73/*
77 * Fine locking granularity for big connection hash table 74 * Fine locking granularity for big connection hash table
78 */ 75 */
79#define CT_LOCKARRAY_BITS 4 76#define CT_LOCKARRAY_BITS 5
80#define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS) 77#define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS)
81#define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1) 78#define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1)
82 79
@@ -133,19 +130,19 @@ static inline void ct_write_unlock_bh(unsigned key)
133/* 130/*
134 * Returns hash value for IPVS connection entry 131 * Returns hash value for IPVS connection entry
135 */ 132 */
136static unsigned int ip_vs_conn_hashkey(int af, unsigned proto, 133static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned proto,
137 const union nf_inet_addr *addr, 134 const union nf_inet_addr *addr,
138 __be16 port) 135 __be16 port)
139{ 136{
140#ifdef CONFIG_IP_VS_IPV6 137#ifdef CONFIG_IP_VS_IPV6
141 if (af == AF_INET6) 138 if (af == AF_INET6)
142 return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), 139 return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
143 (__force u32)port, proto, ip_vs_conn_rnd) 140 (__force u32)port, proto, ip_vs_conn_rnd) ^
144 & ip_vs_conn_tab_mask; 141 ((size_t)net>>8)) & ip_vs_conn_tab_mask;
145#endif 142#endif
146 return jhash_3words((__force u32)addr->ip, (__force u32)port, proto, 143 return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
147 ip_vs_conn_rnd) 144 ip_vs_conn_rnd) ^
148 & ip_vs_conn_tab_mask; 145 ((size_t)net>>8)) & ip_vs_conn_tab_mask;
149} 146}
150 147
151static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, 148static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
@@ -166,18 +163,18 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
166 port = p->vport; 163 port = p->vport;
167 } 164 }
168 165
169 return ip_vs_conn_hashkey(p->af, p->protocol, addr, port); 166 return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port);
170} 167}
171 168
172static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) 169static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
173{ 170{
174 struct ip_vs_conn_param p; 171 struct ip_vs_conn_param p;
175 172
176 ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport, 173 ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol,
177 NULL, 0, &p); 174 &cp->caddr, cp->cport, NULL, 0, &p);
178 175
179 if (cp->dest && cp->dest->svc->pe) { 176 if (cp->pe) {
180 p.pe = cp->dest->svc->pe; 177 p.pe = cp->pe;
181 p.pe_data = cp->pe_data; 178 p.pe_data = cp->pe_data;
182 p.pe_data_len = cp->pe_data_len; 179 p.pe_data_len = cp->pe_data_len;
183 } 180 }
@@ -186,7 +183,7 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
186} 183}
187 184
188/* 185/*
189 * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port. 186 * Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port.
190 * returns bool success. 187 * returns bool success.
191 */ 188 */
192static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) 189static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
@@ -269,11 +266,12 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
269 266
270 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 267 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
271 if (cp->af == p->af && 268 if (cp->af == p->af &&
269 p->cport == cp->cport && p->vport == cp->vport &&
272 ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && 270 ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
273 ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && 271 ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
274 p->cport == cp->cport && p->vport == cp->vport &&
275 ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && 272 ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
276 p->protocol == cp->protocol) { 273 p->protocol == cp->protocol &&
274 ip_vs_conn_net_eq(cp, p->net)) {
277 /* HIT */ 275 /* HIT */
278 atomic_inc(&cp->refcnt); 276 atomic_inc(&cp->refcnt);
279 ct_read_unlock(hash); 277 ct_read_unlock(hash);
@@ -313,23 +311,23 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
313 struct ip_vs_conn_param *p) 311 struct ip_vs_conn_param *p)
314{ 312{
315 __be16 _ports[2], *pptr; 313 __be16 _ports[2], *pptr;
314 struct net *net = skb_net(skb);
316 315
317 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 316 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
318 if (pptr == NULL) 317 if (pptr == NULL)
319 return 1; 318 return 1;
320 319
321 if (likely(!inverse)) 320 if (likely(!inverse))
322 ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0], 321 ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr,
323 &iph->daddr, pptr[1], p); 322 pptr[0], &iph->daddr, pptr[1], p);
324 else 323 else
325 ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1], 324 ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr,
326 &iph->saddr, pptr[0], p); 325 pptr[1], &iph->saddr, pptr[0], p);
327 return 0; 326 return 0;
328} 327}
329 328
330struct ip_vs_conn * 329struct ip_vs_conn *
331ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, 330ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
332 struct ip_vs_protocol *pp,
333 const struct ip_vs_iphdr *iph, 331 const struct ip_vs_iphdr *iph,
334 unsigned int proto_off, int inverse) 332 unsigned int proto_off, int inverse)
335{ 333{
@@ -353,8 +351,10 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
353 ct_read_lock(hash); 351 ct_read_lock(hash);
354 352
355 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 353 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
354 if (!ip_vs_conn_net_eq(cp, p->net))
355 continue;
356 if (p->pe_data && p->pe->ct_match) { 356 if (p->pe_data && p->pe->ct_match) {
357 if (p->pe->ct_match(p, cp)) 357 if (p->pe == cp->pe && p->pe->ct_match(p, cp))
358 goto out; 358 goto out;
359 continue; 359 continue;
360 } 360 }
@@ -404,10 +404,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
404 404
405 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 405 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
406 if (cp->af == p->af && 406 if (cp->af == p->af &&
407 p->vport == cp->cport && p->cport == cp->dport &&
407 ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && 408 ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
408 ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && 409 ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
409 p->vport == cp->cport && p->cport == cp->dport && 410 p->protocol == cp->protocol &&
410 p->protocol == cp->protocol) { 411 ip_vs_conn_net_eq(cp, p->net)) {
411 /* HIT */ 412 /* HIT */
412 atomic_inc(&cp->refcnt); 413 atomic_inc(&cp->refcnt);
413 ret = cp; 414 ret = cp;
@@ -428,7 +429,6 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
428 429
429struct ip_vs_conn * 430struct ip_vs_conn *
430ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, 431ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
431 struct ip_vs_protocol *pp,
432 const struct ip_vs_iphdr *iph, 432 const struct ip_vs_iphdr *iph,
433 unsigned int proto_off, int inverse) 433 unsigned int proto_off, int inverse)
434{ 434{
@@ -611,9 +611,9 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
611 struct ip_vs_dest *dest; 611 struct ip_vs_dest *dest;
612 612
613 if ((cp) && (!cp->dest)) { 613 if ((cp) && (!cp->dest)) {
614 dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport, 614 dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
615 &cp->vaddr, cp->vport, 615 cp->dport, &cp->vaddr, cp->vport,
616 cp->protocol); 616 cp->protocol, cp->fwmark);
617 ip_vs_bind_dest(cp, dest); 617 ip_vs_bind_dest(cp, dest);
618 return dest; 618 return dest;
619 } else 619 } else
@@ -686,13 +686,14 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
686int ip_vs_check_template(struct ip_vs_conn *ct) 686int ip_vs_check_template(struct ip_vs_conn *ct)
687{ 687{
688 struct ip_vs_dest *dest = ct->dest; 688 struct ip_vs_dest *dest = ct->dest;
689 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct));
689 690
690 /* 691 /*
691 * Checking the dest server status. 692 * Checking the dest server status.
692 */ 693 */
693 if ((dest == NULL) || 694 if ((dest == NULL) ||
694 !(dest->flags & IP_VS_DEST_F_AVAILABLE) || 695 !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
695 (sysctl_ip_vs_expire_quiescent_template && 696 (ipvs->sysctl_expire_quiescent_template &&
696 (atomic_read(&dest->weight) == 0))) { 697 (atomic_read(&dest->weight) == 0))) {
697 IP_VS_DBG_BUF(9, "check_template: dest not available for " 698 IP_VS_DBG_BUF(9, "check_template: dest not available for "
698 "protocol %s s:%s:%d v:%s:%d " 699 "protocol %s s:%s:%d v:%s:%d "
@@ -730,6 +731,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
730static void ip_vs_conn_expire(unsigned long data) 731static void ip_vs_conn_expire(unsigned long data)
731{ 732{
732 struct ip_vs_conn *cp = (struct ip_vs_conn *)data; 733 struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
734 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
733 735
734 cp->timeout = 60*HZ; 736 cp->timeout = 60*HZ;
735 737
@@ -765,13 +767,14 @@ static void ip_vs_conn_expire(unsigned long data)
765 if (cp->flags & IP_VS_CONN_F_NFCT) 767 if (cp->flags & IP_VS_CONN_F_NFCT)
766 ip_vs_conn_drop_conntrack(cp); 768 ip_vs_conn_drop_conntrack(cp);
767 769
770 ip_vs_pe_put(cp->pe);
768 kfree(cp->pe_data); 771 kfree(cp->pe_data);
769 if (unlikely(cp->app != NULL)) 772 if (unlikely(cp->app != NULL))
770 ip_vs_unbind_app(cp); 773 ip_vs_unbind_app(cp);
771 ip_vs_unbind_dest(cp); 774 ip_vs_unbind_dest(cp);
772 if (cp->flags & IP_VS_CONN_F_NO_CPORT) 775 if (cp->flags & IP_VS_CONN_F_NO_CPORT)
773 atomic_dec(&ip_vs_conn_no_cport_cnt); 776 atomic_dec(&ip_vs_conn_no_cport_cnt);
774 atomic_dec(&ip_vs_conn_count); 777 atomic_dec(&ipvs->conn_count);
775 778
776 kmem_cache_free(ip_vs_conn_cachep, cp); 779 kmem_cache_free(ip_vs_conn_cachep, cp);
777 return; 780 return;
@@ -802,10 +805,12 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
802struct ip_vs_conn * 805struct ip_vs_conn *
803ip_vs_conn_new(const struct ip_vs_conn_param *p, 806ip_vs_conn_new(const struct ip_vs_conn_param *p,
804 const union nf_inet_addr *daddr, __be16 dport, unsigned flags, 807 const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
805 struct ip_vs_dest *dest) 808 struct ip_vs_dest *dest, __u32 fwmark)
806{ 809{
807 struct ip_vs_conn *cp; 810 struct ip_vs_conn *cp;
808 struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol); 811 struct netns_ipvs *ipvs = net_ipvs(p->net);
812 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
813 p->protocol);
809 814
810 cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); 815 cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
811 if (cp == NULL) { 816 if (cp == NULL) {
@@ -815,6 +820,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
815 820
816 INIT_LIST_HEAD(&cp->c_list); 821 INIT_LIST_HEAD(&cp->c_list);
817 setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); 822 setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
823 ip_vs_conn_net_set(cp, p->net);
818 cp->af = p->af; 824 cp->af = p->af;
819 cp->protocol = p->protocol; 825 cp->protocol = p->protocol;
820 ip_vs_addr_copy(p->af, &cp->caddr, p->caddr); 826 ip_vs_addr_copy(p->af, &cp->caddr, p->caddr);
@@ -826,7 +832,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
826 &cp->daddr, daddr); 832 &cp->daddr, daddr);
827 cp->dport = dport; 833 cp->dport = dport;
828 cp->flags = flags; 834 cp->flags = flags;
829 if (flags & IP_VS_CONN_F_TEMPLATE && p->pe_data) { 835 cp->fwmark = fwmark;
836 if (flags & IP_VS_CONN_F_TEMPLATE && p->pe) {
837 ip_vs_pe_get(p->pe);
838 cp->pe = p->pe;
830 cp->pe_data = p->pe_data; 839 cp->pe_data = p->pe_data;
831 cp->pe_data_len = p->pe_data_len; 840 cp->pe_data_len = p->pe_data_len;
832 } 841 }
@@ -842,7 +851,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
842 atomic_set(&cp->n_control, 0); 851 atomic_set(&cp->n_control, 0);
843 atomic_set(&cp->in_pkts, 0); 852 atomic_set(&cp->in_pkts, 0);
844 853
845 atomic_inc(&ip_vs_conn_count); 854 atomic_inc(&ipvs->conn_count);
846 if (flags & IP_VS_CONN_F_NO_CPORT) 855 if (flags & IP_VS_CONN_F_NO_CPORT)
847 atomic_inc(&ip_vs_conn_no_cport_cnt); 856 atomic_inc(&ip_vs_conn_no_cport_cnt);
848 857
@@ -861,8 +870,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
861#endif 870#endif
862 ip_vs_bind_xmit(cp); 871 ip_vs_bind_xmit(cp);
863 872
864 if (unlikely(pp && atomic_read(&pp->appcnt))) 873 if (unlikely(pd && atomic_read(&pd->appcnt)))
865 ip_vs_bind_app(cp, pp); 874 ip_vs_bind_app(cp, pd->pp);
866 875
867 /* 876 /*
868 * Allow conntrack to be preserved. By default, conntrack 877 * Allow conntrack to be preserved. By default, conntrack
@@ -871,7 +880,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
871 * IP_VS_CONN_F_ONE_PACKET too. 880 * IP_VS_CONN_F_ONE_PACKET too.
872 */ 881 */
873 882
874 if (ip_vs_conntrack_enabled()) 883 if (ip_vs_conntrack_enabled(ipvs))
875 cp->flags |= IP_VS_CONN_F_NFCT; 884 cp->flags |= IP_VS_CONN_F_NFCT;
876 885
877 /* Hash it in the ip_vs_conn_tab finally */ 886 /* Hash it in the ip_vs_conn_tab finally */
@@ -884,17 +893,22 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
884 * /proc/net/ip_vs_conn entries 893 * /proc/net/ip_vs_conn entries
885 */ 894 */
886#ifdef CONFIG_PROC_FS 895#ifdef CONFIG_PROC_FS
896struct ip_vs_iter_state {
897 struct seq_net_private p;
898 struct list_head *l;
899};
887 900
888static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) 901static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
889{ 902{
890 int idx; 903 int idx;
891 struct ip_vs_conn *cp; 904 struct ip_vs_conn *cp;
905 struct ip_vs_iter_state *iter = seq->private;
892 906
893 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { 907 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
894 ct_read_lock_bh(idx); 908 ct_read_lock_bh(idx);
895 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { 909 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
896 if (pos-- == 0) { 910 if (pos-- == 0) {
897 seq->private = &ip_vs_conn_tab[idx]; 911 iter->l = &ip_vs_conn_tab[idx];
898 return cp; 912 return cp;
899 } 913 }
900 } 914 }
@@ -906,14 +920,17 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
906 920
907static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos) 921static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
908{ 922{
909 seq->private = NULL; 923 struct ip_vs_iter_state *iter = seq->private;
924
925 iter->l = NULL;
910 return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN; 926 return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
911} 927}
912 928
913static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) 929static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
914{ 930{
915 struct ip_vs_conn *cp = v; 931 struct ip_vs_conn *cp = v;
916 struct list_head *e, *l = seq->private; 932 struct ip_vs_iter_state *iter = seq->private;
933 struct list_head *e, *l = iter->l;
917 int idx; 934 int idx;
918 935
919 ++*pos; 936 ++*pos;
@@ -930,18 +947,19 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
930 while (++idx < ip_vs_conn_tab_size) { 947 while (++idx < ip_vs_conn_tab_size) {
931 ct_read_lock_bh(idx); 948 ct_read_lock_bh(idx);
932 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { 949 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
933 seq->private = &ip_vs_conn_tab[idx]; 950 iter->l = &ip_vs_conn_tab[idx];
934 return cp; 951 return cp;
935 } 952 }
936 ct_read_unlock_bh(idx); 953 ct_read_unlock_bh(idx);
937 } 954 }
938 seq->private = NULL; 955 iter->l = NULL;
939 return NULL; 956 return NULL;
940} 957}
941 958
942static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v) 959static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
943{ 960{
944 struct list_head *l = seq->private; 961 struct ip_vs_iter_state *iter = seq->private;
962 struct list_head *l = iter->l;
945 963
946 if (l) 964 if (l)
947 ct_read_unlock_bh(l - ip_vs_conn_tab); 965 ct_read_unlock_bh(l - ip_vs_conn_tab);
@@ -955,18 +973,19 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
955 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n"); 973 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n");
956 else { 974 else {
957 const struct ip_vs_conn *cp = v; 975 const struct ip_vs_conn *cp = v;
976 struct net *net = seq_file_net(seq);
958 char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3]; 977 char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
959 size_t len = 0; 978 size_t len = 0;
960 979
961 if (cp->dest && cp->pe_data && 980 if (!ip_vs_conn_net_eq(cp, net))
962 cp->dest->svc->pe->show_pe_data) { 981 return 0;
982 if (cp->pe_data) {
963 pe_data[0] = ' '; 983 pe_data[0] = ' ';
964 len = strlen(cp->dest->svc->pe->name); 984 len = strlen(cp->pe->name);
965 memcpy(pe_data + 1, cp->dest->svc->pe->name, len); 985 memcpy(pe_data + 1, cp->pe->name, len);
966 pe_data[len + 1] = ' '; 986 pe_data[len + 1] = ' ';
967 len += 2; 987 len += 2;
968 len += cp->dest->svc->pe->show_pe_data(cp, 988 len += cp->pe->show_pe_data(cp, pe_data + len);
969 pe_data + len);
970 } 989 }
971 pe_data[len] = '\0'; 990 pe_data[len] = '\0';
972 991
@@ -1004,7 +1023,8 @@ static const struct seq_operations ip_vs_conn_seq_ops = {
1004 1023
1005static int ip_vs_conn_open(struct inode *inode, struct file *file) 1024static int ip_vs_conn_open(struct inode *inode, struct file *file)
1006{ 1025{
1007 return seq_open(file, &ip_vs_conn_seq_ops); 1026 return seq_open_net(inode, file, &ip_vs_conn_seq_ops,
1027 sizeof(struct ip_vs_iter_state));
1008} 1028}
1009 1029
1010static const struct file_operations ip_vs_conn_fops = { 1030static const struct file_operations ip_vs_conn_fops = {
@@ -1031,6 +1051,10 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
1031 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n"); 1051 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n");
1032 else { 1052 else {
1033 const struct ip_vs_conn *cp = v; 1053 const struct ip_vs_conn *cp = v;
1054 struct net *net = seq_file_net(seq);
1055
1056 if (!ip_vs_conn_net_eq(cp, net))
1057 return 0;
1034 1058
1035#ifdef CONFIG_IP_VS_IPV6 1059#ifdef CONFIG_IP_VS_IPV6
1036 if (cp->af == AF_INET6) 1060 if (cp->af == AF_INET6)
@@ -1067,7 +1091,8 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
1067 1091
1068static int ip_vs_conn_sync_open(struct inode *inode, struct file *file) 1092static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
1069{ 1093{
1070 return seq_open(file, &ip_vs_conn_sync_seq_ops); 1094 return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops,
1095 sizeof(struct ip_vs_iter_state));
1071} 1096}
1072 1097
1073static const struct file_operations ip_vs_conn_sync_fops = { 1098static const struct file_operations ip_vs_conn_sync_fops = {
@@ -1113,7 +1138,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
1113} 1138}
1114 1139
1115/* Called from keventd and must protect itself from softirqs */ 1140/* Called from keventd and must protect itself from softirqs */
1116void ip_vs_random_dropentry(void) 1141void ip_vs_random_dropentry(struct net *net)
1117{ 1142{
1118 int idx; 1143 int idx;
1119 struct ip_vs_conn *cp; 1144 struct ip_vs_conn *cp;
@@ -1133,7 +1158,8 @@ void ip_vs_random_dropentry(void)
1133 if (cp->flags & IP_VS_CONN_F_TEMPLATE) 1158 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
1134 /* connection template */ 1159 /* connection template */
1135 continue; 1160 continue;
1136 1161 if (!ip_vs_conn_net_eq(cp, net))
1162 continue;
1137 if (cp->protocol == IPPROTO_TCP) { 1163 if (cp->protocol == IPPROTO_TCP) {
1138 switch(cp->state) { 1164 switch(cp->state) {
1139 case IP_VS_TCP_S_SYN_RECV: 1165 case IP_VS_TCP_S_SYN_RECV:
@@ -1168,12 +1194,13 @@ void ip_vs_random_dropentry(void)
1168/* 1194/*
1169 * Flush all the connection entries in the ip_vs_conn_tab 1195 * Flush all the connection entries in the ip_vs_conn_tab
1170 */ 1196 */
1171static void ip_vs_conn_flush(void) 1197static void ip_vs_conn_flush(struct net *net)
1172{ 1198{
1173 int idx; 1199 int idx;
1174 struct ip_vs_conn *cp; 1200 struct ip_vs_conn *cp;
1201 struct netns_ipvs *ipvs = net_ipvs(net);
1175 1202
1176 flush_again: 1203flush_again:
1177 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { 1204 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
1178 /* 1205 /*
1179 * Lock is actually needed in this loop. 1206 * Lock is actually needed in this loop.
@@ -1181,7 +1208,8 @@ static void ip_vs_conn_flush(void)
1181 ct_write_lock_bh(idx); 1208 ct_write_lock_bh(idx);
1182 1209
1183 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { 1210 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
1184 1211 if (!ip_vs_conn_net_eq(cp, net))
1212 continue;
1185 IP_VS_DBG(4, "del connection\n"); 1213 IP_VS_DBG(4, "del connection\n");
1186 ip_vs_conn_expire_now(cp); 1214 ip_vs_conn_expire_now(cp);
1187 if (cp->control) { 1215 if (cp->control) {
@@ -1194,16 +1222,41 @@ static void ip_vs_conn_flush(void)
1194 1222
1195 /* the counter may be not NULL, because maybe some conn entries 1223 /* the counter may be not NULL, because maybe some conn entries
1196 are run by slow timer handler or unhashed but still referred */ 1224 are run by slow timer handler or unhashed but still referred */
1197 if (atomic_read(&ip_vs_conn_count) != 0) { 1225 if (atomic_read(&ipvs->conn_count) != 0) {
1198 schedule(); 1226 schedule();
1199 goto flush_again; 1227 goto flush_again;
1200 } 1228 }
1201} 1229}
1230/*
1231 * per netns init and exit
1232 */
1233int __net_init __ip_vs_conn_init(struct net *net)
1234{
1235 struct netns_ipvs *ipvs = net_ipvs(net);
1236
1237 atomic_set(&ipvs->conn_count, 0);
1238
1239 proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops);
1240 proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
1241 return 0;
1242}
1202 1243
1244static void __net_exit __ip_vs_conn_cleanup(struct net *net)
1245{
1246 /* flush all the connection entries first */
1247 ip_vs_conn_flush(net);
1248 proc_net_remove(net, "ip_vs_conn");
1249 proc_net_remove(net, "ip_vs_conn_sync");
1250}
1251static struct pernet_operations ipvs_conn_ops = {
1252 .init = __ip_vs_conn_init,
1253 .exit = __ip_vs_conn_cleanup,
1254};
1203 1255
1204int __init ip_vs_conn_init(void) 1256int __init ip_vs_conn_init(void)
1205{ 1257{
1206 int idx; 1258 int idx;
1259 int retc;
1207 1260
1208 /* Compute size and mask */ 1261 /* Compute size and mask */
1209 ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits; 1262 ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
@@ -1241,24 +1294,18 @@ int __init ip_vs_conn_init(void)
1241 rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); 1294 rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
1242 } 1295 }
1243 1296
1244 proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops); 1297 retc = register_pernet_subsys(&ipvs_conn_ops);
1245 proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
1246 1298
1247 /* calculate the random value for connection hash */ 1299 /* calculate the random value for connection hash */
1248 get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); 1300 get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
1249 1301
1250 return 0; 1302 return retc;
1251} 1303}
1252 1304
1253
1254void ip_vs_conn_cleanup(void) 1305void ip_vs_conn_cleanup(void)
1255{ 1306{
1256 /* flush all the connection entries first */ 1307 unregister_pernet_subsys(&ipvs_conn_ops);
1257 ip_vs_conn_flush();
1258
1259 /* Release the empty cache */ 1308 /* Release the empty cache */
1260 kmem_cache_destroy(ip_vs_conn_cachep); 1309 kmem_cache_destroy(ip_vs_conn_cachep);
1261 proc_net_remove(&init_net, "ip_vs_conn");
1262 proc_net_remove(&init_net, "ip_vs_conn_sync");
1263 vfree(ip_vs_conn_tab); 1310 vfree(ip_vs_conn_tab);
1264} 1311}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index b4e51e9c5a04..f36a84f33efb 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -41,6 +41,7 @@
41#include <net/icmp.h> /* for icmp_send */ 41#include <net/icmp.h> /* for icmp_send */
42#include <net/route.h> 42#include <net/route.h>
43#include <net/ip6_checksum.h> 43#include <net/ip6_checksum.h>
44#include <net/netns/generic.h> /* net_generic() */
44 45
45#include <linux/netfilter.h> 46#include <linux/netfilter.h>
46#include <linux/netfilter_ipv4.h> 47#include <linux/netfilter_ipv4.h>
@@ -68,6 +69,12 @@ EXPORT_SYMBOL(ip_vs_conn_put);
68EXPORT_SYMBOL(ip_vs_get_debug_level); 69EXPORT_SYMBOL(ip_vs_get_debug_level);
69#endif 70#endif
70 71
72int ip_vs_net_id __read_mostly;
73#ifdef IP_VS_GENERIC_NETNS
74EXPORT_SYMBOL(ip_vs_net_id);
75#endif
76/* netns cnt used for uniqueness */
77static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
71 78
72/* ID used in ICMP lookups */ 79/* ID used in ICMP lookups */
73#define icmp_id(icmph) (((icmph)->un).echo.id) 80#define icmp_id(icmph) (((icmph)->un).echo.id)
@@ -108,21 +115,28 @@ static inline void
108ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) 115ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
109{ 116{
110 struct ip_vs_dest *dest = cp->dest; 117 struct ip_vs_dest *dest = cp->dest;
118 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
119
111 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 120 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
112 spin_lock(&dest->stats.lock); 121 struct ip_vs_cpu_stats *s;
113 dest->stats.ustats.inpkts++; 122
114 dest->stats.ustats.inbytes += skb->len; 123 s = this_cpu_ptr(dest->stats.cpustats);
115 spin_unlock(&dest->stats.lock); 124 s->ustats.inpkts++;
116 125 u64_stats_update_begin(&s->syncp);
117 spin_lock(&dest->svc->stats.lock); 126 s->ustats.inbytes += skb->len;
118 dest->svc->stats.ustats.inpkts++; 127 u64_stats_update_end(&s->syncp);
119 dest->svc->stats.ustats.inbytes += skb->len; 128
120 spin_unlock(&dest->svc->stats.lock); 129 s = this_cpu_ptr(dest->svc->stats.cpustats);
121 130 s->ustats.inpkts++;
122 spin_lock(&ip_vs_stats.lock); 131 u64_stats_update_begin(&s->syncp);
123 ip_vs_stats.ustats.inpkts++; 132 s->ustats.inbytes += skb->len;
124 ip_vs_stats.ustats.inbytes += skb->len; 133 u64_stats_update_end(&s->syncp);
125 spin_unlock(&ip_vs_stats.lock); 134
135 s = this_cpu_ptr(ipvs->cpustats);
136 s->ustats.inpkts++;
137 u64_stats_update_begin(&s->syncp);
138 s->ustats.inbytes += skb->len;
139 u64_stats_update_end(&s->syncp);
126 } 140 }
127} 141}
128 142
@@ -131,21 +145,28 @@ static inline void
131ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) 145ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
132{ 146{
133 struct ip_vs_dest *dest = cp->dest; 147 struct ip_vs_dest *dest = cp->dest;
148 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
149
134 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 150 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
135 spin_lock(&dest->stats.lock); 151 struct ip_vs_cpu_stats *s;
136 dest->stats.ustats.outpkts++; 152
137 dest->stats.ustats.outbytes += skb->len; 153 s = this_cpu_ptr(dest->stats.cpustats);
138 spin_unlock(&dest->stats.lock); 154 s->ustats.outpkts++;
139 155 u64_stats_update_begin(&s->syncp);
140 spin_lock(&dest->svc->stats.lock); 156 s->ustats.outbytes += skb->len;
141 dest->svc->stats.ustats.outpkts++; 157 u64_stats_update_end(&s->syncp);
142 dest->svc->stats.ustats.outbytes += skb->len; 158
143 spin_unlock(&dest->svc->stats.lock); 159 s = this_cpu_ptr(dest->svc->stats.cpustats);
144 160 s->ustats.outpkts++;
145 spin_lock(&ip_vs_stats.lock); 161 u64_stats_update_begin(&s->syncp);
146 ip_vs_stats.ustats.outpkts++; 162 s->ustats.outbytes += skb->len;
147 ip_vs_stats.ustats.outbytes += skb->len; 163 u64_stats_update_end(&s->syncp);
148 spin_unlock(&ip_vs_stats.lock); 164
165 s = this_cpu_ptr(ipvs->cpustats);
166 s->ustats.outpkts++;
167 u64_stats_update_begin(&s->syncp);
168 s->ustats.outbytes += skb->len;
169 u64_stats_update_end(&s->syncp);
149 } 170 }
150} 171}
151 172
@@ -153,41 +174,44 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
153static inline void 174static inline void
154ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) 175ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
155{ 176{
156 spin_lock(&cp->dest->stats.lock); 177 struct netns_ipvs *ipvs = net_ipvs(svc->net);
157 cp->dest->stats.ustats.conns++; 178 struct ip_vs_cpu_stats *s;
158 spin_unlock(&cp->dest->stats.lock); 179
180 s = this_cpu_ptr(cp->dest->stats.cpustats);
181 s->ustats.conns++;
159 182
160 spin_lock(&svc->stats.lock); 183 s = this_cpu_ptr(svc->stats.cpustats);
161 svc->stats.ustats.conns++; 184 s->ustats.conns++;
162 spin_unlock(&svc->stats.lock);
163 185
164 spin_lock(&ip_vs_stats.lock); 186 s = this_cpu_ptr(ipvs->cpustats);
165 ip_vs_stats.ustats.conns++; 187 s->ustats.conns++;
166 spin_unlock(&ip_vs_stats.lock);
167} 188}
168 189
169 190
170static inline int 191static inline int
171ip_vs_set_state(struct ip_vs_conn *cp, int direction, 192ip_vs_set_state(struct ip_vs_conn *cp, int direction,
172 const struct sk_buff *skb, 193 const struct sk_buff *skb,
173 struct ip_vs_protocol *pp) 194 struct ip_vs_proto_data *pd)
174{ 195{
175 if (unlikely(!pp->state_transition)) 196 if (unlikely(!pd->pp->state_transition))
176 return 0; 197 return 0;
177 return pp->state_transition(cp, direction, skb, pp); 198 return pd->pp->state_transition(cp, direction, skb, pd);
178} 199}
179 200
180static inline void 201static inline int
181ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, 202ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
182 struct sk_buff *skb, int protocol, 203 struct sk_buff *skb, int protocol,
183 const union nf_inet_addr *caddr, __be16 cport, 204 const union nf_inet_addr *caddr, __be16 cport,
184 const union nf_inet_addr *vaddr, __be16 vport, 205 const union nf_inet_addr *vaddr, __be16 vport,
185 struct ip_vs_conn_param *p) 206 struct ip_vs_conn_param *p)
186{ 207{
187 ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p); 208 ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
209 vport, p);
188 p->pe = svc->pe; 210 p->pe = svc->pe;
189 if (p->pe && p->pe->fill_param) 211 if (p->pe && p->pe->fill_param)
190 p->pe->fill_param(p, skb); 212 return p->pe->fill_param(p, skb);
213
214 return 0;
191} 215}
192 216
193/* 217/*
@@ -200,7 +224,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
200static struct ip_vs_conn * 224static struct ip_vs_conn *
201ip_vs_sched_persist(struct ip_vs_service *svc, 225ip_vs_sched_persist(struct ip_vs_service *svc,
202 struct sk_buff *skb, 226 struct sk_buff *skb,
203 __be16 ports[2]) 227 __be16 src_port, __be16 dst_port, int *ignored)
204{ 228{
205 struct ip_vs_conn *cp = NULL; 229 struct ip_vs_conn *cp = NULL;
206 struct ip_vs_iphdr iph; 230 struct ip_vs_iphdr iph;
@@ -224,8 +248,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
224 248
225 IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " 249 IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
226 "mnet %s\n", 250 "mnet %s\n",
227 IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]), 251 IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port),
228 IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]), 252 IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port),
229 IP_VS_DBG_ADDR(svc->af, &snet)); 253 IP_VS_DBG_ADDR(svc->af, &snet));
230 254
231 /* 255 /*
@@ -247,14 +271,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
247 const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; 271 const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
248 __be16 vport = 0; 272 __be16 vport = 0;
249 273
250 if (ports[1] == svc->port) { 274 if (dst_port == svc->port) {
251 /* non-FTP template: 275 /* non-FTP template:
252 * <protocol, caddr, 0, vaddr, vport, daddr, dport> 276 * <protocol, caddr, 0, vaddr, vport, daddr, dport>
253 * FTP template: 277 * FTP template:
254 * <protocol, caddr, 0, vaddr, 0, daddr, 0> 278 * <protocol, caddr, 0, vaddr, 0, daddr, 0>
255 */ 279 */
256 if (svc->port != FTPPORT) 280 if (svc->port != FTPPORT)
257 vport = ports[1]; 281 vport = dst_port;
258 } else { 282 } else {
259 /* Note: persistent fwmark-based services and 283 /* Note: persistent fwmark-based services and
260 * persistent port zero service are handled here. 284 * persistent port zero service are handled here.
@@ -268,24 +292,31 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
268 vaddr = &fwmark; 292 vaddr = &fwmark;
269 } 293 }
270 } 294 }
271 ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0, 295 /* return *ignored = -1 so NF_DROP can be used */
272 vaddr, vport, &param); 296 if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
297 vaddr, vport, &param) < 0) {
298 *ignored = -1;
299 return NULL;
300 }
273 } 301 }
274 302
275 /* Check if a template already exists */ 303 /* Check if a template already exists */
276 ct = ip_vs_ct_in_get(&param); 304 ct = ip_vs_ct_in_get(&param);
277 if (!ct || !ip_vs_check_template(ct)) { 305 if (!ct || !ip_vs_check_template(ct)) {
278 /* No template found or the dest of the connection 306 /*
307 * No template found or the dest of the connection
279 * template is not available. 308 * template is not available.
309 * return *ignored=0 i.e. ICMP and NF_DROP
280 */ 310 */
281 dest = svc->scheduler->schedule(svc, skb); 311 dest = svc->scheduler->schedule(svc, skb);
282 if (!dest) { 312 if (!dest) {
283 IP_VS_DBG(1, "p-schedule: no dest found.\n"); 313 IP_VS_DBG(1, "p-schedule: no dest found.\n");
284 kfree(param.pe_data); 314 kfree(param.pe_data);
315 *ignored = 0;
285 return NULL; 316 return NULL;
286 } 317 }
287 318
288 if (ports[1] == svc->port && svc->port != FTPPORT) 319 if (dst_port == svc->port && svc->port != FTPPORT)
289 dport = dest->port; 320 dport = dest->port;
290 321
291 /* Create a template 322 /* Create a template
@@ -293,9 +324,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
293 * and thus param.pe_data will be destroyed 324 * and thus param.pe_data will be destroyed
294 * when the template expires */ 325 * when the template expires */
295 ct = ip_vs_conn_new(&param, &dest->addr, dport, 326 ct = ip_vs_conn_new(&param, &dest->addr, dport,
296 IP_VS_CONN_F_TEMPLATE, dest); 327 IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
297 if (ct == NULL) { 328 if (ct == NULL) {
298 kfree(param.pe_data); 329 kfree(param.pe_data);
330 *ignored = -1;
299 return NULL; 331 return NULL;
300 } 332 }
301 333
@@ -306,7 +338,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
306 kfree(param.pe_data); 338 kfree(param.pe_data);
307 } 339 }
308 340
309 dport = ports[1]; 341 dport = dst_port;
310 if (dport == svc->port && dest->port) 342 if (dport == svc->port && dest->port)
311 dport = dest->port; 343 dport = dest->port;
312 344
@@ -317,11 +349,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
317 /* 349 /*
318 * Create a new connection according to the template 350 * Create a new connection according to the template
319 */ 351 */
320 ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0], 352 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr,
321 &iph.daddr, ports[1], &param); 353 src_port, &iph.daddr, dst_port, &param);
322 cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest); 354
355 cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
323 if (cp == NULL) { 356 if (cp == NULL) {
324 ip_vs_conn_put(ct); 357 ip_vs_conn_put(ct);
358 *ignored = -1;
325 return NULL; 359 return NULL;
326 } 360 }
327 361
@@ -341,11 +375,27 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
341 * It selects a server according to the virtual service, and 375 * It selects a server according to the virtual service, and
342 * creates a connection entry. 376 * creates a connection entry.
343 * Protocols supported: TCP, UDP 377 * Protocols supported: TCP, UDP
378 *
379 * Usage of *ignored
380 *
381 * 1 : protocol tried to schedule (eg. on SYN), found svc but the
382 * svc/scheduler decides that this packet should be accepted with
383 * NF_ACCEPT because it must not be scheduled.
384 *
385 * 0 : scheduler can not find destination, so try bypass or
386 * return ICMP and then NF_DROP (ip_vs_leave).
387 *
388 * -1 : scheduler tried to schedule but fatal error occurred, eg.
389 * ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param
390 * failure such as missing Call-ID, ENOMEM on skb_linearize
391 * or pe_data. In this case we should return NF_DROP without
392 * any attempts to send ICMP with ip_vs_leave.
344 */ 393 */
345struct ip_vs_conn * 394struct ip_vs_conn *
346ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, 395ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
347 struct ip_vs_protocol *pp, int *ignored) 396 struct ip_vs_proto_data *pd, int *ignored)
348{ 397{
398 struct ip_vs_protocol *pp = pd->pp;
349 struct ip_vs_conn *cp = NULL; 399 struct ip_vs_conn *cp = NULL;
350 struct ip_vs_iphdr iph; 400 struct ip_vs_iphdr iph;
351 struct ip_vs_dest *dest; 401 struct ip_vs_dest *dest;
@@ -371,12 +421,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
371 } 421 }
372 422
373 /* 423 /*
374 * Do not schedule replies from local real server. It is risky 424 * Do not schedule replies from local real server.
375 * for fwmark services but mostly for persistent services.
376 */ 425 */
377 if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && 426 if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
378 (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) && 427 (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) {
379 (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
380 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, 428 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
381 "Not scheduling reply for existing connection"); 429 "Not scheduling reply for existing connection");
382 __ip_vs_conn_put(cp); 430 __ip_vs_conn_put(cp);
@@ -386,10 +434,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
386 /* 434 /*
387 * Persistent service 435 * Persistent service
388 */ 436 */
389 if (svc->flags & IP_VS_SVC_F_PERSISTENT) { 437 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
390 *ignored = 0; 438 return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored);
391 return ip_vs_sched_persist(svc, skb, pptr); 439
392 } 440 *ignored = 0;
393 441
394 /* 442 /*
395 * Non-persistent service 443 * Non-persistent service
@@ -402,8 +450,6 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
402 return NULL; 450 return NULL;
403 } 451 }
404 452
405 *ignored = 0;
406
407 dest = svc->scheduler->schedule(svc, skb); 453 dest = svc->scheduler->schedule(svc, skb);
408 if (dest == NULL) { 454 if (dest == NULL) {
409 IP_VS_DBG(1, "Schedule: no dest found.\n"); 455 IP_VS_DBG(1, "Schedule: no dest found.\n");
@@ -419,13 +465,17 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
419 */ 465 */
420 { 466 {
421 struct ip_vs_conn_param p; 467 struct ip_vs_conn_param p;
422 ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, 468
423 pptr[0], &iph.daddr, pptr[1], &p); 469 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
470 &iph.saddr, pptr[0], &iph.daddr, pptr[1],
471 &p);
424 cp = ip_vs_conn_new(&p, &dest->addr, 472 cp = ip_vs_conn_new(&p, &dest->addr,
425 dest->port ? dest->port : pptr[1], 473 dest->port ? dest->port : pptr[1],
426 flags, dest); 474 flags, dest, skb->mark);
427 if (!cp) 475 if (!cp) {
476 *ignored = -1;
428 return NULL; 477 return NULL;
478 }
429 } 479 }
430 480
431 IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u " 481 IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
@@ -447,11 +497,14 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
447 * no destination is available for a new connection. 497 * no destination is available for a new connection.
448 */ 498 */
449int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, 499int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
450 struct ip_vs_protocol *pp) 500 struct ip_vs_proto_data *pd)
451{ 501{
502 struct net *net;
503 struct netns_ipvs *ipvs;
452 __be16 _ports[2], *pptr; 504 __be16 _ports[2], *pptr;
453 struct ip_vs_iphdr iph; 505 struct ip_vs_iphdr iph;
454 int unicast; 506 int unicast;
507
455 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 508 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
456 509
457 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); 510 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
@@ -459,18 +512,20 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
459 ip_vs_service_put(svc); 512 ip_vs_service_put(svc);
460 return NF_DROP; 513 return NF_DROP;
461 } 514 }
515 net = skb_net(skb);
462 516
463#ifdef CONFIG_IP_VS_IPV6 517#ifdef CONFIG_IP_VS_IPV6
464 if (svc->af == AF_INET6) 518 if (svc->af == AF_INET6)
465 unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST; 519 unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
466 else 520 else
467#endif 521#endif
468 unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST); 522 unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST);
469 523
470 /* if it is fwmark-based service, the cache_bypass sysctl is up 524 /* if it is fwmark-based service, the cache_bypass sysctl is up
471 and the destination is a non-local unicast, then create 525 and the destination is a non-local unicast, then create
472 a cache_bypass connection entry */ 526 a cache_bypass connection entry */
473 if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { 527 ipvs = net_ipvs(net);
528 if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
474 int ret, cs; 529 int ret, cs;
475 struct ip_vs_conn *cp; 530 struct ip_vs_conn *cp;
476 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && 531 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
@@ -484,12 +539,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
484 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); 539 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
485 { 540 {
486 struct ip_vs_conn_param p; 541 struct ip_vs_conn_param p;
487 ip_vs_conn_fill_param(svc->af, iph.protocol, 542 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
488 &iph.saddr, pptr[0], 543 &iph.saddr, pptr[0],
489 &iph.daddr, pptr[1], &p); 544 &iph.daddr, pptr[1], &p);
490 cp = ip_vs_conn_new(&p, &daddr, 0, 545 cp = ip_vs_conn_new(&p, &daddr, 0,
491 IP_VS_CONN_F_BYPASS | flags, 546 IP_VS_CONN_F_BYPASS | flags,
492 NULL); 547 NULL, skb->mark);
493 if (!cp) 548 if (!cp)
494 return NF_DROP; 549 return NF_DROP;
495 } 550 }
@@ -498,10 +553,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
498 ip_vs_in_stats(cp, skb); 553 ip_vs_in_stats(cp, skb);
499 554
500 /* set state */ 555 /* set state */
501 cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); 556 cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
502 557
503 /* transmit the first SYN packet */ 558 /* transmit the first SYN packet */
504 ret = cp->packet_xmit(skb, cp, pp); 559 ret = cp->packet_xmit(skb, cp, pd->pp);
505 /* do not touch skb anymore */ 560 /* do not touch skb anymore */
506 561
507 atomic_inc(&cp->in_pkts); 562 atomic_inc(&cp->in_pkts);
@@ -682,6 +737,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
682 struct ip_vs_protocol *pp, 737 struct ip_vs_protocol *pp,
683 unsigned int offset, unsigned int ihl) 738 unsigned int offset, unsigned int ihl)
684{ 739{
740 struct netns_ipvs *ipvs;
685 unsigned int verdict = NF_DROP; 741 unsigned int verdict = NF_DROP;
686 742
687 if (IP_VS_FWD_METHOD(cp) != 0) { 743 if (IP_VS_FWD_METHOD(cp) != 0) {
@@ -703,6 +759,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
703 if (!skb_make_writable(skb, offset)) 759 if (!skb_make_writable(skb, offset))
704 goto out; 760 goto out;
705 761
762 ipvs = net_ipvs(skb_net(skb));
763
706#ifdef CONFIG_IP_VS_IPV6 764#ifdef CONFIG_IP_VS_IPV6
707 if (af == AF_INET6) 765 if (af == AF_INET6)
708 ip_vs_nat_icmp_v6(skb, pp, cp, 1); 766 ip_vs_nat_icmp_v6(skb, pp, cp, 1);
@@ -712,11 +770,11 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
712 770
713#ifdef CONFIG_IP_VS_IPV6 771#ifdef CONFIG_IP_VS_IPV6
714 if (af == AF_INET6) { 772 if (af == AF_INET6) {
715 if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0) 773 if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
716 goto out; 774 goto out;
717 } else 775 } else
718#endif 776#endif
719 if ((sysctl_ip_vs_snat_reroute || 777 if ((ipvs->sysctl_snat_reroute ||
720 skb_rtable(skb)->rt_flags & RTCF_LOCAL) && 778 skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
721 ip_route_me_harder(skb, RTN_LOCAL) != 0) 779 ip_route_me_harder(skb, RTN_LOCAL) != 0)
722 goto out; 780 goto out;
@@ -808,7 +866,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
808 866
809 ip_vs_fill_iphdr(AF_INET, cih, &ciph); 867 ip_vs_fill_iphdr(AF_INET, cih, &ciph);
810 /* The embedded headers contain source and dest in reverse order */ 868 /* The embedded headers contain source and dest in reverse order */
811 cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); 869 cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
812 if (!cp) 870 if (!cp)
813 return NF_ACCEPT; 871 return NF_ACCEPT;
814 872
@@ -885,7 +943,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
885 943
886 ip_vs_fill_iphdr(AF_INET6, cih, &ciph); 944 ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
887 /* The embedded headers contain source and dest in reverse order */ 945 /* The embedded headers contain source and dest in reverse order */
888 cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); 946 cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
889 if (!cp) 947 if (!cp)
890 return NF_ACCEPT; 948 return NF_ACCEPT;
891 949
@@ -924,9 +982,12 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
924 * Used for NAT and local client. 982 * Used for NAT and local client.
925 */ 983 */
926static unsigned int 984static unsigned int
927handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 985handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
928 struct ip_vs_conn *cp, int ihl) 986 struct ip_vs_conn *cp, int ihl)
929{ 987{
988 struct ip_vs_protocol *pp = pd->pp;
989 struct netns_ipvs *ipvs;
990
930 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); 991 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
931 992
932 if (!skb_make_writable(skb, ihl)) 993 if (!skb_make_writable(skb, ihl))
@@ -961,13 +1022,15 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
961 * if it came from this machine itself. So re-compute 1022 * if it came from this machine itself. So re-compute
962 * the routing information. 1023 * the routing information.
963 */ 1024 */
1025 ipvs = net_ipvs(skb_net(skb));
1026
964#ifdef CONFIG_IP_VS_IPV6 1027#ifdef CONFIG_IP_VS_IPV6
965 if (af == AF_INET6) { 1028 if (af == AF_INET6) {
966 if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0) 1029 if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
967 goto drop; 1030 goto drop;
968 } else 1031 } else
969#endif 1032#endif
970 if ((sysctl_ip_vs_snat_reroute || 1033 if ((ipvs->sysctl_snat_reroute ||
971 skb_rtable(skb)->rt_flags & RTCF_LOCAL) && 1034 skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
972 ip_route_me_harder(skb, RTN_LOCAL) != 0) 1035 ip_route_me_harder(skb, RTN_LOCAL) != 0)
973 goto drop; 1036 goto drop;
@@ -975,7 +1038,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
975 IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); 1038 IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
976 1039
977 ip_vs_out_stats(cp, skb); 1040 ip_vs_out_stats(cp, skb);
978 ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); 1041 ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);
979 skb->ipvs_property = 1; 1042 skb->ipvs_property = 1;
980 if (!(cp->flags & IP_VS_CONN_F_NFCT)) 1043 if (!(cp->flags & IP_VS_CONN_F_NFCT))
981 ip_vs_notrack(skb); 1044 ip_vs_notrack(skb);
@@ -999,9 +1062,12 @@ drop:
999static unsigned int 1062static unsigned int
1000ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) 1063ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1001{ 1064{
1065 struct net *net = NULL;
1002 struct ip_vs_iphdr iph; 1066 struct ip_vs_iphdr iph;
1003 struct ip_vs_protocol *pp; 1067 struct ip_vs_protocol *pp;
1068 struct ip_vs_proto_data *pd;
1004 struct ip_vs_conn *cp; 1069 struct ip_vs_conn *cp;
1070 struct netns_ipvs *ipvs;
1005 1071
1006 EnterFunction(11); 1072 EnterFunction(11);
1007 1073
@@ -1022,6 +1088,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1022 if (unlikely(!skb_dst(skb))) 1088 if (unlikely(!skb_dst(skb)))
1023 return NF_ACCEPT; 1089 return NF_ACCEPT;
1024 1090
1091 net = skb_net(skb);
1025 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1092 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1026#ifdef CONFIG_IP_VS_IPV6 1093#ifdef CONFIG_IP_VS_IPV6
1027 if (af == AF_INET6) { 1094 if (af == AF_INET6) {
@@ -1045,9 +1112,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1045 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1112 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1046 } 1113 }
1047 1114
1048 pp = ip_vs_proto_get(iph.protocol); 1115 pd = ip_vs_proto_data_get(net, iph.protocol);
1049 if (unlikely(!pp)) 1116 if (unlikely(!pd))
1050 return NF_ACCEPT; 1117 return NF_ACCEPT;
1118 pp = pd->pp;
1051 1119
1052 /* reassemble IP fragments */ 1120 /* reassemble IP fragments */
1053#ifdef CONFIG_IP_VS_IPV6 1121#ifdef CONFIG_IP_VS_IPV6
@@ -1073,11 +1141,12 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1073 /* 1141 /*
1074 * Check if the packet belongs to an existing entry 1142 * Check if the packet belongs to an existing entry
1075 */ 1143 */
1076 cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); 1144 cp = pp->conn_out_get(af, skb, &iph, iph.len, 0);
1145 ipvs = net_ipvs(net);
1077 1146
1078 if (likely(cp)) 1147 if (likely(cp))
1079 return handle_response(af, skb, pp, cp, iph.len); 1148 return handle_response(af, skb, pd, cp, iph.len);
1080 if (sysctl_ip_vs_nat_icmp_send && 1149 if (ipvs->sysctl_nat_icmp_send &&
1081 (pp->protocol == IPPROTO_TCP || 1150 (pp->protocol == IPPROTO_TCP ||
1082 pp->protocol == IPPROTO_UDP || 1151 pp->protocol == IPPROTO_UDP ||
1083 pp->protocol == IPPROTO_SCTP)) { 1152 pp->protocol == IPPROTO_SCTP)) {
@@ -1087,7 +1156,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1087 sizeof(_ports), _ports); 1156 sizeof(_ports), _ports);
1088 if (pptr == NULL) 1157 if (pptr == NULL)
1089 return NF_ACCEPT; /* Not for me */ 1158 return NF_ACCEPT; /* Not for me */
1090 if (ip_vs_lookup_real_service(af, iph.protocol, 1159 if (ip_vs_lookup_real_service(net, af, iph.protocol,
1091 &iph.saddr, 1160 &iph.saddr,
1092 pptr[0])) { 1161 pptr[0])) {
1093 /* 1162 /*
@@ -1202,12 +1271,14 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
1202static int 1271static int
1203ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) 1272ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1204{ 1273{
1274 struct net *net = NULL;
1205 struct iphdr *iph; 1275 struct iphdr *iph;
1206 struct icmphdr _icmph, *ic; 1276 struct icmphdr _icmph, *ic;
1207 struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ 1277 struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
1208 struct ip_vs_iphdr ciph; 1278 struct ip_vs_iphdr ciph;
1209 struct ip_vs_conn *cp; 1279 struct ip_vs_conn *cp;
1210 struct ip_vs_protocol *pp; 1280 struct ip_vs_protocol *pp;
1281 struct ip_vs_proto_data *pd;
1211 unsigned int offset, ihl, verdict; 1282 unsigned int offset, ihl, verdict;
1212 union nf_inet_addr snet; 1283 union nf_inet_addr snet;
1213 1284
@@ -1249,9 +1320,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1249 if (cih == NULL) 1320 if (cih == NULL)
1250 return NF_ACCEPT; /* The packet looks wrong, ignore */ 1321 return NF_ACCEPT; /* The packet looks wrong, ignore */
1251 1322
1252 pp = ip_vs_proto_get(cih->protocol); 1323 net = skb_net(skb);
1253 if (!pp) 1324 pd = ip_vs_proto_data_get(net, cih->protocol);
1325 if (!pd)
1254 return NF_ACCEPT; 1326 return NF_ACCEPT;
1327 pp = pd->pp;
1255 1328
1256 /* Is the embedded protocol header present? */ 1329 /* Is the embedded protocol header present? */
1257 if (unlikely(cih->frag_off & htons(IP_OFFSET) && 1330 if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
@@ -1265,10 +1338,10 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1265 1338
1266 ip_vs_fill_iphdr(AF_INET, cih, &ciph); 1339 ip_vs_fill_iphdr(AF_INET, cih, &ciph);
1267 /* The embedded headers contain source and dest in reverse order */ 1340 /* The embedded headers contain source and dest in reverse order */
1268 cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1); 1341 cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1);
1269 if (!cp) { 1342 if (!cp) {
1270 /* The packet could also belong to a local client */ 1343 /* The packet could also belong to a local client */
1271 cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); 1344 cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
1272 if (cp) { 1345 if (cp) {
1273 snet.ip = iph->saddr; 1346 snet.ip = iph->saddr;
1274 return handle_response_icmp(AF_INET, skb, &snet, 1347 return handle_response_icmp(AF_INET, skb, &snet,
@@ -1312,6 +1385,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1312static int 1385static int
1313ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) 1386ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1314{ 1387{
1388 struct net *net = NULL;
1315 struct ipv6hdr *iph; 1389 struct ipv6hdr *iph;
1316 struct icmp6hdr _icmph, *ic; 1390 struct icmp6hdr _icmph, *ic;
1317 struct ipv6hdr _ciph, *cih; /* The ip header contained 1391 struct ipv6hdr _ciph, *cih; /* The ip header contained
@@ -1319,6 +1393,7 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1319 struct ip_vs_iphdr ciph; 1393 struct ip_vs_iphdr ciph;
1320 struct ip_vs_conn *cp; 1394 struct ip_vs_conn *cp;
1321 struct ip_vs_protocol *pp; 1395 struct ip_vs_protocol *pp;
1396 struct ip_vs_proto_data *pd;
1322 unsigned int offset, verdict; 1397 unsigned int offset, verdict;
1323 union nf_inet_addr snet; 1398 union nf_inet_addr snet;
1324 struct rt6_info *rt; 1399 struct rt6_info *rt;
@@ -1361,9 +1436,11 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1361 if (cih == NULL) 1436 if (cih == NULL)
1362 return NF_ACCEPT; /* The packet looks wrong, ignore */ 1437 return NF_ACCEPT; /* The packet looks wrong, ignore */
1363 1438
1364 pp = ip_vs_proto_get(cih->nexthdr); 1439 net = skb_net(skb);
1365 if (!pp) 1440 pd = ip_vs_proto_data_get(net, cih->nexthdr);
1441 if (!pd)
1366 return NF_ACCEPT; 1442 return NF_ACCEPT;
1443 pp = pd->pp;
1367 1444
1368 /* Is the embedded protocol header present? */ 1445 /* Is the embedded protocol header present? */
1369 /* TODO: we don't support fragmentation at the moment anyways */ 1446 /* TODO: we don't support fragmentation at the moment anyways */
@@ -1377,10 +1454,10 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1377 1454
1378 ip_vs_fill_iphdr(AF_INET6, cih, &ciph); 1455 ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
1379 /* The embedded headers contain source and dest in reverse order */ 1456 /* The embedded headers contain source and dest in reverse order */
1380 cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1); 1457 cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1);
1381 if (!cp) { 1458 if (!cp) {
1382 /* The packet could also belong to a local client */ 1459 /* The packet could also belong to a local client */
1383 cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); 1460 cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
1384 if (cp) { 1461 if (cp) {
1385 ipv6_addr_copy(&snet.in6, &iph->saddr); 1462 ipv6_addr_copy(&snet.in6, &iph->saddr);
1386 return handle_response_icmp(AF_INET6, skb, &snet, 1463 return handle_response_icmp(AF_INET6, skb, &snet,
@@ -1423,10 +1500,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1423static unsigned int 1500static unsigned int
1424ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) 1501ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1425{ 1502{
1503 struct net *net;
1426 struct ip_vs_iphdr iph; 1504 struct ip_vs_iphdr iph;
1427 struct ip_vs_protocol *pp; 1505 struct ip_vs_protocol *pp;
1506 struct ip_vs_proto_data *pd;
1428 struct ip_vs_conn *cp; 1507 struct ip_vs_conn *cp;
1429 int ret, restart, pkts; 1508 int ret, restart, pkts;
1509 struct netns_ipvs *ipvs;
1430 1510
1431 /* Already marked as IPVS request or reply? */ 1511 /* Already marked as IPVS request or reply? */
1432 if (skb->ipvs_property) 1512 if (skb->ipvs_property)
@@ -1480,20 +1560,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1480 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1560 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1481 } 1561 }
1482 1562
1563 net = skb_net(skb);
1483 /* Protocol supported? */ 1564 /* Protocol supported? */
1484 pp = ip_vs_proto_get(iph.protocol); 1565 pd = ip_vs_proto_data_get(net, iph.protocol);
1485 if (unlikely(!pp)) 1566 if (unlikely(!pd))
1486 return NF_ACCEPT; 1567 return NF_ACCEPT;
1487 1568 pp = pd->pp;
1488 /* 1569 /*
1489 * Check if the packet belongs to an existing connection entry 1570 * Check if the packet belongs to an existing connection entry
1490 */ 1571 */
1491 cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0); 1572 cp = pp->conn_in_get(af, skb, &iph, iph.len, 0);
1492 1573
1493 if (unlikely(!cp)) { 1574 if (unlikely(!cp)) {
1494 int v; 1575 int v;
1495 1576
1496 if (!pp->conn_schedule(af, skb, pp, &v, &cp)) 1577 if (!pp->conn_schedule(af, skb, pd, &v, &cp))
1497 return v; 1578 return v;
1498 } 1579 }
1499 1580
@@ -1505,12 +1586,13 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1505 } 1586 }
1506 1587
1507 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); 1588 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
1508 1589 net = skb_net(skb);
1590 ipvs = net_ipvs(net);
1509 /* Check the server status */ 1591 /* Check the server status */
1510 if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { 1592 if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
1511 /* the destination server is not available */ 1593 /* the destination server is not available */
1512 1594
1513 if (sysctl_ip_vs_expire_nodest_conn) { 1595 if (ipvs->sysctl_expire_nodest_conn) {
1514 /* try to expire the connection immediately */ 1596 /* try to expire the connection immediately */
1515 ip_vs_conn_expire_now(cp); 1597 ip_vs_conn_expire_now(cp);
1516 } 1598 }
@@ -1521,7 +1603,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1521 } 1603 }
1522 1604
1523 ip_vs_in_stats(cp, skb); 1605 ip_vs_in_stats(cp, skb);
1524 restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); 1606 restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
1525 if (cp->packet_xmit) 1607 if (cp->packet_xmit)
1526 ret = cp->packet_xmit(skb, cp, pp); 1608 ret = cp->packet_xmit(skb, cp, pp);
1527 /* do not touch skb anymore */ 1609 /* do not touch skb anymore */
@@ -1535,35 +1617,41 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1535 * 1617 *
1536 * Sync connection if it is about to close to 1618 * Sync connection if it is about to close to
1537 * encorage the standby servers to update the connections timeout 1619 * encorage the standby servers to update the connections timeout
1620 *
1621 * For ONE_PKT let ip_vs_sync_conn() do the filter work.
1538 */ 1622 */
1539 pkts = atomic_add_return(1, &cp->in_pkts); 1623
1540 if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) && 1624 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
1625 pkts = ipvs->sysctl_sync_threshold[0];
1626 else
1627 pkts = atomic_add_return(1, &cp->in_pkts);
1628
1629 if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
1541 cp->protocol == IPPROTO_SCTP) { 1630 cp->protocol == IPPROTO_SCTP) {
1542 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && 1631 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
1543 (pkts % sysctl_ip_vs_sync_threshold[1] 1632 (pkts % ipvs->sysctl_sync_threshold[1]
1544 == sysctl_ip_vs_sync_threshold[0])) || 1633 == ipvs->sysctl_sync_threshold[0])) ||
1545 (cp->old_state != cp->state && 1634 (cp->old_state != cp->state &&
1546 ((cp->state == IP_VS_SCTP_S_CLOSED) || 1635 ((cp->state == IP_VS_SCTP_S_CLOSED) ||
1547 (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) || 1636 (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
1548 (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) { 1637 (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
1549 ip_vs_sync_conn(cp); 1638 ip_vs_sync_conn(net, cp);
1550 goto out; 1639 goto out;
1551 } 1640 }
1552 } 1641 }
1553 1642
1554 /* Keep this block last: TCP and others with pp->num_states <= 1 */ 1643 /* Keep this block last: TCP and others with pp->num_states <= 1 */
1555 else if (af == AF_INET && 1644 else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
1556 (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
1557 (((cp->protocol != IPPROTO_TCP || 1645 (((cp->protocol != IPPROTO_TCP ||
1558 cp->state == IP_VS_TCP_S_ESTABLISHED) && 1646 cp->state == IP_VS_TCP_S_ESTABLISHED) &&
1559 (pkts % sysctl_ip_vs_sync_threshold[1] 1647 (pkts % ipvs->sysctl_sync_threshold[1]
1560 == sysctl_ip_vs_sync_threshold[0])) || 1648 == ipvs->sysctl_sync_threshold[0])) ||
1561 ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && 1649 ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
1562 ((cp->state == IP_VS_TCP_S_FIN_WAIT) || 1650 ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
1563 (cp->state == IP_VS_TCP_S_CLOSE) || 1651 (cp->state == IP_VS_TCP_S_CLOSE) ||
1564 (cp->state == IP_VS_TCP_S_CLOSE_WAIT) || 1652 (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
1565 (cp->state == IP_VS_TCP_S_TIME_WAIT))))) 1653 (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
1566 ip_vs_sync_conn(cp); 1654 ip_vs_sync_conn(net, cp);
1567out: 1655out:
1568 cp->old_state = cp->state; 1656 cp->old_state = cp->state;
1569 1657
@@ -1782,7 +1870,41 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1782 }, 1870 },
1783#endif 1871#endif
1784}; 1872};
1873/*
1874 * Initialize IP Virtual Server netns mem.
1875 */
1876static int __net_init __ip_vs_init(struct net *net)
1877{
1878 struct netns_ipvs *ipvs;
1879
1880 ipvs = net_generic(net, ip_vs_net_id);
1881 if (ipvs == NULL) {
1882 pr_err("%s(): no memory.\n", __func__);
1883 return -ENOMEM;
1884 }
1885 ipvs->net = net;
1886 /* Counters used for creating unique names */
1887 ipvs->gen = atomic_read(&ipvs_netns_cnt);
1888 atomic_inc(&ipvs_netns_cnt);
1889 net->ipvs = ipvs;
1890 printk(KERN_INFO "IPVS: Creating netns size=%lu id=%d\n",
1891 sizeof(struct netns_ipvs), ipvs->gen);
1892 return 0;
1893}
1894
1895static void __net_exit __ip_vs_cleanup(struct net *net)
1896{
1897 struct netns_ipvs *ipvs = net_ipvs(net);
1785 1898
1899 IP_VS_DBG(10, "ipvs netns %d released\n", ipvs->gen);
1900}
1901
1902static struct pernet_operations ipvs_core_ops = {
1903 .init = __ip_vs_init,
1904 .exit = __ip_vs_cleanup,
1905 .id = &ip_vs_net_id,
1906 .size = sizeof(struct netns_ipvs),
1907};
1786 1908
1787/* 1909/*
1788 * Initialize IP Virtual Server 1910 * Initialize IP Virtual Server
@@ -1791,8 +1913,11 @@ static int __init ip_vs_init(void)
1791{ 1913{
1792 int ret; 1914 int ret;
1793 1915
1794 ip_vs_estimator_init(); 1916 ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */
1917 if (ret < 0)
1918 return ret;
1795 1919
1920 ip_vs_estimator_init();
1796 ret = ip_vs_control_init(); 1921 ret = ip_vs_control_init();
1797 if (ret < 0) { 1922 if (ret < 0) {
1798 pr_err("can't setup control.\n"); 1923 pr_err("can't setup control.\n");
@@ -1813,15 +1938,23 @@ static int __init ip_vs_init(void)
1813 goto cleanup_app; 1938 goto cleanup_app;
1814 } 1939 }
1815 1940
1941 ret = ip_vs_sync_init();
1942 if (ret < 0) {
1943 pr_err("can't setup sync data.\n");
1944 goto cleanup_conn;
1945 }
1946
1816 ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); 1947 ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
1817 if (ret < 0) { 1948 if (ret < 0) {
1818 pr_err("can't register hooks.\n"); 1949 pr_err("can't register hooks.\n");
1819 goto cleanup_conn; 1950 goto cleanup_sync;
1820 } 1951 }
1821 1952
1822 pr_info("ipvs loaded.\n"); 1953 pr_info("ipvs loaded.\n");
1823 return ret; 1954 return ret;
1824 1955
1956cleanup_sync:
1957 ip_vs_sync_cleanup();
1825 cleanup_conn: 1958 cleanup_conn:
1826 ip_vs_conn_cleanup(); 1959 ip_vs_conn_cleanup();
1827 cleanup_app: 1960 cleanup_app:
@@ -1831,17 +1964,20 @@ static int __init ip_vs_init(void)
1831 ip_vs_control_cleanup(); 1964 ip_vs_control_cleanup();
1832 cleanup_estimator: 1965 cleanup_estimator:
1833 ip_vs_estimator_cleanup(); 1966 ip_vs_estimator_cleanup();
1967 unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */
1834 return ret; 1968 return ret;
1835} 1969}
1836 1970
1837static void __exit ip_vs_cleanup(void) 1971static void __exit ip_vs_cleanup(void)
1838{ 1972{
1839 nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); 1973 nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
1974 ip_vs_sync_cleanup();
1840 ip_vs_conn_cleanup(); 1975 ip_vs_conn_cleanup();
1841 ip_vs_app_cleanup(); 1976 ip_vs_app_cleanup();
1842 ip_vs_protocol_cleanup(); 1977 ip_vs_protocol_cleanup();
1843 ip_vs_control_cleanup(); 1978 ip_vs_control_cleanup();
1844 ip_vs_estimator_cleanup(); 1979 ip_vs_estimator_cleanup();
1980 unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */
1845 pr_info("ipvs unloaded.\n"); 1981 pr_info("ipvs unloaded.\n");
1846} 1982}
1847 1983
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 22f7ad5101ab..09ca2ce2f2b7 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -38,6 +38,7 @@
38#include <linux/mutex.h> 38#include <linux/mutex.h>
39 39
40#include <net/net_namespace.h> 40#include <net/net_namespace.h>
41#include <linux/nsproxy.h>
41#include <net/ip.h> 42#include <net/ip.h>
42#ifdef CONFIG_IP_VS_IPV6 43#ifdef CONFIG_IP_VS_IPV6
43#include <net/ipv6.h> 44#include <net/ipv6.h>
@@ -57,42 +58,7 @@ static DEFINE_MUTEX(__ip_vs_mutex);
57/* lock for service table */ 58/* lock for service table */
58static DEFINE_RWLOCK(__ip_vs_svc_lock); 59static DEFINE_RWLOCK(__ip_vs_svc_lock);
59 60
60/* lock for table with the real services */
61static DEFINE_RWLOCK(__ip_vs_rs_lock);
62
63/* lock for state and timeout tables */
64static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
65
66/* lock for drop entry handling */
67static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
68
69/* lock for drop packet handling */
70static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
71
72/* 1/rate drop and drop-entry variables */
73int ip_vs_drop_rate = 0;
74int ip_vs_drop_counter = 0;
75static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
76
77/* number of virtual services */
78static int ip_vs_num_services = 0;
79
80/* sysctl variables */ 61/* sysctl variables */
81static int sysctl_ip_vs_drop_entry = 0;
82static int sysctl_ip_vs_drop_packet = 0;
83static int sysctl_ip_vs_secure_tcp = 0;
84static int sysctl_ip_vs_amemthresh = 1024;
85static int sysctl_ip_vs_am_droprate = 10;
86int sysctl_ip_vs_cache_bypass = 0;
87int sysctl_ip_vs_expire_nodest_conn = 0;
88int sysctl_ip_vs_expire_quiescent_template = 0;
89int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
90int sysctl_ip_vs_nat_icmp_send = 0;
91#ifdef CONFIG_IP_VS_NFCT
92int sysctl_ip_vs_conntrack;
93#endif
94int sysctl_ip_vs_snat_reroute = 1;
95
96 62
97#ifdef CONFIG_IP_VS_DEBUG 63#ifdef CONFIG_IP_VS_DEBUG
98static int sysctl_ip_vs_debug_level = 0; 64static int sysctl_ip_vs_debug_level = 0;
@@ -105,7 +71,8 @@ int ip_vs_get_debug_level(void)
105 71
106#ifdef CONFIG_IP_VS_IPV6 72#ifdef CONFIG_IP_VS_IPV6
107/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ 73/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
108static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) 74static int __ip_vs_addr_is_local_v6(struct net *net,
75 const struct in6_addr *addr)
109{ 76{
110 struct rt6_info *rt; 77 struct rt6_info *rt;
111 struct flowi fl = { 78 struct flowi fl = {
@@ -114,7 +81,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
114 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} }, 81 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
115 }; 82 };
116 83
117 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); 84 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl);
118 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK)) 85 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
119 return 1; 86 return 1;
120 87
@@ -125,7 +92,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
125 * update_defense_level is called from keventd and from sysctl, 92 * update_defense_level is called from keventd and from sysctl,
126 * so it needs to protect itself from softirqs 93 * so it needs to protect itself from softirqs
127 */ 94 */
128static void update_defense_level(void) 95static void update_defense_level(struct netns_ipvs *ipvs)
129{ 96{
130 struct sysinfo i; 97 struct sysinfo i;
131 static int old_secure_tcp = 0; 98 static int old_secure_tcp = 0;
@@ -141,73 +108,73 @@ static void update_defense_level(void)
141 /* si_swapinfo(&i); */ 108 /* si_swapinfo(&i); */
142 /* availmem = availmem - (i.totalswap - i.freeswap); */ 109 /* availmem = availmem - (i.totalswap - i.freeswap); */
143 110
144 nomem = (availmem < sysctl_ip_vs_amemthresh); 111 nomem = (availmem < ipvs->sysctl_amemthresh);
145 112
146 local_bh_disable(); 113 local_bh_disable();
147 114
148 /* drop_entry */ 115 /* drop_entry */
149 spin_lock(&__ip_vs_dropentry_lock); 116 spin_lock(&ipvs->dropentry_lock);
150 switch (sysctl_ip_vs_drop_entry) { 117 switch (ipvs->sysctl_drop_entry) {
151 case 0: 118 case 0:
152 atomic_set(&ip_vs_dropentry, 0); 119 atomic_set(&ipvs->dropentry, 0);
153 break; 120 break;
154 case 1: 121 case 1:
155 if (nomem) { 122 if (nomem) {
156 atomic_set(&ip_vs_dropentry, 1); 123 atomic_set(&ipvs->dropentry, 1);
157 sysctl_ip_vs_drop_entry = 2; 124 ipvs->sysctl_drop_entry = 2;
158 } else { 125 } else {
159 atomic_set(&ip_vs_dropentry, 0); 126 atomic_set(&ipvs->dropentry, 0);
160 } 127 }
161 break; 128 break;
162 case 2: 129 case 2:
163 if (nomem) { 130 if (nomem) {
164 atomic_set(&ip_vs_dropentry, 1); 131 atomic_set(&ipvs->dropentry, 1);
165 } else { 132 } else {
166 atomic_set(&ip_vs_dropentry, 0); 133 atomic_set(&ipvs->dropentry, 0);
167 sysctl_ip_vs_drop_entry = 1; 134 ipvs->sysctl_drop_entry = 1;
168 }; 135 };
169 break; 136 break;
170 case 3: 137 case 3:
171 atomic_set(&ip_vs_dropentry, 1); 138 atomic_set(&ipvs->dropentry, 1);
172 break; 139 break;
173 } 140 }
174 spin_unlock(&__ip_vs_dropentry_lock); 141 spin_unlock(&ipvs->dropentry_lock);
175 142
176 /* drop_packet */ 143 /* drop_packet */
177 spin_lock(&__ip_vs_droppacket_lock); 144 spin_lock(&ipvs->droppacket_lock);
178 switch (sysctl_ip_vs_drop_packet) { 145 switch (ipvs->sysctl_drop_packet) {
179 case 0: 146 case 0:
180 ip_vs_drop_rate = 0; 147 ipvs->drop_rate = 0;
181 break; 148 break;
182 case 1: 149 case 1:
183 if (nomem) { 150 if (nomem) {
184 ip_vs_drop_rate = ip_vs_drop_counter 151 ipvs->drop_rate = ipvs->drop_counter
185 = sysctl_ip_vs_amemthresh / 152 = ipvs->sysctl_amemthresh /
186 (sysctl_ip_vs_amemthresh-availmem); 153 (ipvs->sysctl_amemthresh-availmem);
187 sysctl_ip_vs_drop_packet = 2; 154 ipvs->sysctl_drop_packet = 2;
188 } else { 155 } else {
189 ip_vs_drop_rate = 0; 156 ipvs->drop_rate = 0;
190 } 157 }
191 break; 158 break;
192 case 2: 159 case 2:
193 if (nomem) { 160 if (nomem) {
194 ip_vs_drop_rate = ip_vs_drop_counter 161 ipvs->drop_rate = ipvs->drop_counter
195 = sysctl_ip_vs_amemthresh / 162 = ipvs->sysctl_amemthresh /
196 (sysctl_ip_vs_amemthresh-availmem); 163 (ipvs->sysctl_amemthresh-availmem);
197 } else { 164 } else {
198 ip_vs_drop_rate = 0; 165 ipvs->drop_rate = 0;
199 sysctl_ip_vs_drop_packet = 1; 166 ipvs->sysctl_drop_packet = 1;
200 } 167 }
201 break; 168 break;
202 case 3: 169 case 3:
203 ip_vs_drop_rate = sysctl_ip_vs_am_droprate; 170 ipvs->drop_rate = ipvs->sysctl_am_droprate;
204 break; 171 break;
205 } 172 }
206 spin_unlock(&__ip_vs_droppacket_lock); 173 spin_unlock(&ipvs->droppacket_lock);
207 174
208 /* secure_tcp */ 175 /* secure_tcp */
209 spin_lock(&ip_vs_securetcp_lock); 176 spin_lock(&ipvs->securetcp_lock);
210 switch (sysctl_ip_vs_secure_tcp) { 177 switch (ipvs->sysctl_secure_tcp) {
211 case 0: 178 case 0:
212 if (old_secure_tcp >= 2) 179 if (old_secure_tcp >= 2)
213 to_change = 0; 180 to_change = 0;
@@ -216,7 +183,7 @@ static void update_defense_level(void)
216 if (nomem) { 183 if (nomem) {
217 if (old_secure_tcp < 2) 184 if (old_secure_tcp < 2)
218 to_change = 1; 185 to_change = 1;
219 sysctl_ip_vs_secure_tcp = 2; 186 ipvs->sysctl_secure_tcp = 2;
220 } else { 187 } else {
221 if (old_secure_tcp >= 2) 188 if (old_secure_tcp >= 2)
222 to_change = 0; 189 to_change = 0;
@@ -229,7 +196,7 @@ static void update_defense_level(void)
229 } else { 196 } else {
230 if (old_secure_tcp >= 2) 197 if (old_secure_tcp >= 2)
231 to_change = 0; 198 to_change = 0;
232 sysctl_ip_vs_secure_tcp = 1; 199 ipvs->sysctl_secure_tcp = 1;
233 } 200 }
234 break; 201 break;
235 case 3: 202 case 3:
@@ -237,10 +204,11 @@ static void update_defense_level(void)
237 to_change = 1; 204 to_change = 1;
238 break; 205 break;
239 } 206 }
240 old_secure_tcp = sysctl_ip_vs_secure_tcp; 207 old_secure_tcp = ipvs->sysctl_secure_tcp;
241 if (to_change >= 0) 208 if (to_change >= 0)
242 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1); 209 ip_vs_protocol_timeout_change(ipvs,
243 spin_unlock(&ip_vs_securetcp_lock); 210 ipvs->sysctl_secure_tcp > 1);
211 spin_unlock(&ipvs->securetcp_lock);
244 212
245 local_bh_enable(); 213 local_bh_enable();
246} 214}
@@ -250,16 +218,16 @@ static void update_defense_level(void)
250 * Timer for checking the defense 218 * Timer for checking the defense
251 */ 219 */
252#define DEFENSE_TIMER_PERIOD 1*HZ 220#define DEFENSE_TIMER_PERIOD 1*HZ
253static void defense_work_handler(struct work_struct *work);
254static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
255 221
256static void defense_work_handler(struct work_struct *work) 222static void defense_work_handler(struct work_struct *work)
257{ 223{
258 update_defense_level(); 224 struct netns_ipvs *ipvs =
259 if (atomic_read(&ip_vs_dropentry)) 225 container_of(work, struct netns_ipvs, defense_work.work);
260 ip_vs_random_dropentry();
261 226
262 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); 227 update_defense_level(ipvs);
228 if (atomic_read(&ipvs->dropentry))
229 ip_vs_random_dropentry(ipvs->net);
230 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
263} 231}
264 232
265int 233int
@@ -287,33 +255,13 @@ static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
287/* the service table hashed by fwmark */ 255/* the service table hashed by fwmark */
288static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; 256static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
289 257
290/*
291 * Hash table: for real service lookups
292 */
293#define IP_VS_RTAB_BITS 4
294#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
295#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
296
297static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
298
299/*
300 * Trash for destinations
301 */
302static LIST_HEAD(ip_vs_dest_trash);
303
304/*
305 * FTP & NULL virtual service counters
306 */
307static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
308static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
309
310 258
311/* 259/*
312 * Returns hash value for virtual service 260 * Returns hash value for virtual service
313 */ 261 */
314static __inline__ unsigned 262static inline unsigned
315ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, 263ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
316 __be16 port) 264 const union nf_inet_addr *addr, __be16 port)
317{ 265{
318 register unsigned porth = ntohs(port); 266 register unsigned porth = ntohs(port);
319 __be32 addr_fold = addr->ip; 267 __be32 addr_fold = addr->ip;
@@ -323,6 +271,7 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
323 addr_fold = addr->ip6[0]^addr->ip6[1]^ 271 addr_fold = addr->ip6[0]^addr->ip6[1]^
324 addr->ip6[2]^addr->ip6[3]; 272 addr->ip6[2]^addr->ip6[3];
325#endif 273#endif
274 addr_fold ^= ((size_t)net>>8);
326 275
327 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth) 276 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
328 & IP_VS_SVC_TAB_MASK; 277 & IP_VS_SVC_TAB_MASK;
@@ -331,13 +280,13 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
331/* 280/*
332 * Returns hash value of fwmark for virtual service lookup 281 * Returns hash value of fwmark for virtual service lookup
333 */ 282 */
334static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark) 283static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
335{ 284{
336 return fwmark & IP_VS_SVC_TAB_MASK; 285 return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
337} 286}
338 287
339/* 288/*
340 * Hashes a service in the ip_vs_svc_table by <proto,addr,port> 289 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
341 * or in the ip_vs_svc_fwm_table by fwmark. 290 * or in the ip_vs_svc_fwm_table by fwmark.
342 * Should be called with locked tables. 291 * Should be called with locked tables.
343 */ 292 */
@@ -353,16 +302,16 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
353 302
354 if (svc->fwmark == 0) { 303 if (svc->fwmark == 0) {
355 /* 304 /*
356 * Hash it by <protocol,addr,port> in ip_vs_svc_table 305 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
357 */ 306 */
358 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr, 307 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
359 svc->port); 308 &svc->addr, svc->port);
360 list_add(&svc->s_list, &ip_vs_svc_table[hash]); 309 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
361 } else { 310 } else {
362 /* 311 /*
363 * Hash it by fwmark in ip_vs_svc_fwm_table 312 * Hash it by fwmark in svc_fwm_table
364 */ 313 */
365 hash = ip_vs_svc_fwm_hashkey(svc->fwmark); 314 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
366 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]); 315 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
367 } 316 }
368 317
@@ -374,7 +323,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
374 323
375 324
376/* 325/*
377 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table. 326 * Unhashes a service from svc_table / svc_fwm_table.
378 * Should be called with locked tables. 327 * Should be called with locked tables.
379 */ 328 */
380static int ip_vs_svc_unhash(struct ip_vs_service *svc) 329static int ip_vs_svc_unhash(struct ip_vs_service *svc)
@@ -386,10 +335,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
386 } 335 }
387 336
388 if (svc->fwmark == 0) { 337 if (svc->fwmark == 0) {
389 /* Remove it from the ip_vs_svc_table table */ 338 /* Remove it from the svc_table table */
390 list_del(&svc->s_list); 339 list_del(&svc->s_list);
391 } else { 340 } else {
392 /* Remove it from the ip_vs_svc_fwm_table table */ 341 /* Remove it from the svc_fwm_table table */
393 list_del(&svc->f_list); 342 list_del(&svc->f_list);
394 } 343 }
395 344
@@ -400,23 +349,24 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
400 349
401 350
402/* 351/*
403 * Get service by {proto,addr,port} in the service table. 352 * Get service by {netns, proto,addr,port} in the service table.
404 */ 353 */
405static inline struct ip_vs_service * 354static inline struct ip_vs_service *
406__ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr, 355__ip_vs_service_find(struct net *net, int af, __u16 protocol,
407 __be16 vport) 356 const union nf_inet_addr *vaddr, __be16 vport)
408{ 357{
409 unsigned hash; 358 unsigned hash;
410 struct ip_vs_service *svc; 359 struct ip_vs_service *svc;
411 360
412 /* Check for "full" addressed entries */ 361 /* Check for "full" addressed entries */
413 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport); 362 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
414 363
415 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ 364 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
416 if ((svc->af == af) 365 if ((svc->af == af)
417 && ip_vs_addr_equal(af, &svc->addr, vaddr) 366 && ip_vs_addr_equal(af, &svc->addr, vaddr)
418 && (svc->port == vport) 367 && (svc->port == vport)
419 && (svc->protocol == protocol)) { 368 && (svc->protocol == protocol)
369 && net_eq(svc->net, net)) {
420 /* HIT */ 370 /* HIT */
421 return svc; 371 return svc;
422 } 372 }
@@ -430,16 +380,17 @@ __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
430 * Get service by {fwmark} in the service table. 380 * Get service by {fwmark} in the service table.
431 */ 381 */
432static inline struct ip_vs_service * 382static inline struct ip_vs_service *
433__ip_vs_svc_fwm_find(int af, __u32 fwmark) 383__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
434{ 384{
435 unsigned hash; 385 unsigned hash;
436 struct ip_vs_service *svc; 386 struct ip_vs_service *svc;
437 387
438 /* Check for fwmark addressed entries */ 388 /* Check for fwmark addressed entries */
439 hash = ip_vs_svc_fwm_hashkey(fwmark); 389 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
440 390
441 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { 391 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
442 if (svc->fwmark == fwmark && svc->af == af) { 392 if (svc->fwmark == fwmark && svc->af == af
393 && net_eq(svc->net, net)) {
443 /* HIT */ 394 /* HIT */
444 return svc; 395 return svc;
445 } 396 }
@@ -449,42 +400,44 @@ __ip_vs_svc_fwm_find(int af, __u32 fwmark)
449} 400}
450 401
451struct ip_vs_service * 402struct ip_vs_service *
452ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, 403ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
453 const union nf_inet_addr *vaddr, __be16 vport) 404 const union nf_inet_addr *vaddr, __be16 vport)
454{ 405{
455 struct ip_vs_service *svc; 406 struct ip_vs_service *svc;
407 struct netns_ipvs *ipvs = net_ipvs(net);
456 408
457 read_lock(&__ip_vs_svc_lock); 409 read_lock(&__ip_vs_svc_lock);
458 410
459 /* 411 /*
460 * Check the table hashed by fwmark first 412 * Check the table hashed by fwmark first
461 */ 413 */
462 if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark))) 414 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
415 if (fwmark && svc)
463 goto out; 416 goto out;
464 417
465 /* 418 /*
466 * Check the table hashed by <protocol,addr,port> 419 * Check the table hashed by <protocol,addr,port>
467 * for "full" addressed entries 420 * for "full" addressed entries
468 */ 421 */
469 svc = __ip_vs_service_find(af, protocol, vaddr, vport); 422 svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
470 423
471 if (svc == NULL 424 if (svc == NULL
472 && protocol == IPPROTO_TCP 425 && protocol == IPPROTO_TCP
473 && atomic_read(&ip_vs_ftpsvc_counter) 426 && atomic_read(&ipvs->ftpsvc_counter)
474 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) { 427 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
475 /* 428 /*
476 * Check if ftp service entry exists, the packet 429 * Check if ftp service entry exists, the packet
477 * might belong to FTP data connections. 430 * might belong to FTP data connections.
478 */ 431 */
479 svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT); 432 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
480 } 433 }
481 434
482 if (svc == NULL 435 if (svc == NULL
483 && atomic_read(&ip_vs_nullsvc_counter)) { 436 && atomic_read(&ipvs->nullsvc_counter)) {
484 /* 437 /*
485 * Check if the catch-all port (port zero) exists 438 * Check if the catch-all port (port zero) exists
486 */ 439 */
487 svc = __ip_vs_service_find(af, protocol, vaddr, 0); 440 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
488 } 441 }
489 442
490 out: 443 out:
@@ -519,6 +472,7 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)
519 svc->fwmark, 472 svc->fwmark,
520 IP_VS_DBG_ADDR(svc->af, &svc->addr), 473 IP_VS_DBG_ADDR(svc->af, &svc->addr),
521 ntohs(svc->port), atomic_read(&svc->usecnt)); 474 ntohs(svc->port), atomic_read(&svc->usecnt));
475 free_percpu(svc->stats.cpustats);
522 kfree(svc); 476 kfree(svc);
523 } 477 }
524} 478}
@@ -545,10 +499,10 @@ static inline unsigned ip_vs_rs_hashkey(int af,
545} 499}
546 500
547/* 501/*
548 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>. 502 * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
549 * should be called with locked tables. 503 * should be called with locked tables.
550 */ 504 */
551static int ip_vs_rs_hash(struct ip_vs_dest *dest) 505static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
552{ 506{
553 unsigned hash; 507 unsigned hash;
554 508
@@ -562,19 +516,19 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest)
562 */ 516 */
563 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); 517 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
564 518
565 list_add(&dest->d_list, &ip_vs_rtable[hash]); 519 list_add(&dest->d_list, &ipvs->rs_table[hash]);
566 520
567 return 1; 521 return 1;
568} 522}
569 523
570/* 524/*
571 * UNhashes ip_vs_dest from ip_vs_rtable. 525 * UNhashes ip_vs_dest from rs_table.
572 * should be called with locked tables. 526 * should be called with locked tables.
573 */ 527 */
574static int ip_vs_rs_unhash(struct ip_vs_dest *dest) 528static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
575{ 529{
576 /* 530 /*
577 * Remove it from the ip_vs_rtable table. 531 * Remove it from the rs_table table.
578 */ 532 */
579 if (!list_empty(&dest->d_list)) { 533 if (!list_empty(&dest->d_list)) {
580 list_del(&dest->d_list); 534 list_del(&dest->d_list);
@@ -588,10 +542,11 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
588 * Lookup real service by <proto,addr,port> in the real service table. 542 * Lookup real service by <proto,addr,port> in the real service table.
589 */ 543 */
590struct ip_vs_dest * 544struct ip_vs_dest *
591ip_vs_lookup_real_service(int af, __u16 protocol, 545ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
592 const union nf_inet_addr *daddr, 546 const union nf_inet_addr *daddr,
593 __be16 dport) 547 __be16 dport)
594{ 548{
549 struct netns_ipvs *ipvs = net_ipvs(net);
595 unsigned hash; 550 unsigned hash;
596 struct ip_vs_dest *dest; 551 struct ip_vs_dest *dest;
597 552
@@ -601,19 +556,19 @@ ip_vs_lookup_real_service(int af, __u16 protocol,
601 */ 556 */
602 hash = ip_vs_rs_hashkey(af, daddr, dport); 557 hash = ip_vs_rs_hashkey(af, daddr, dport);
603 558
604 read_lock(&__ip_vs_rs_lock); 559 read_lock(&ipvs->rs_lock);
605 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) { 560 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
606 if ((dest->af == af) 561 if ((dest->af == af)
607 && ip_vs_addr_equal(af, &dest->addr, daddr) 562 && ip_vs_addr_equal(af, &dest->addr, daddr)
608 && (dest->port == dport) 563 && (dest->port == dport)
609 && ((dest->protocol == protocol) || 564 && ((dest->protocol == protocol) ||
610 dest->vfwmark)) { 565 dest->vfwmark)) {
611 /* HIT */ 566 /* HIT */
612 read_unlock(&__ip_vs_rs_lock); 567 read_unlock(&ipvs->rs_lock);
613 return dest; 568 return dest;
614 } 569 }
615 } 570 }
616 read_unlock(&__ip_vs_rs_lock); 571 read_unlock(&ipvs->rs_lock);
617 572
618 return NULL; 573 return NULL;
619} 574}
@@ -652,15 +607,16 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
652 * ip_vs_lookup_real_service() looked promissing, but 607 * ip_vs_lookup_real_service() looked promissing, but
653 * seems not working as expected. 608 * seems not working as expected.
654 */ 609 */
655struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr, 610struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
611 const union nf_inet_addr *daddr,
656 __be16 dport, 612 __be16 dport,
657 const union nf_inet_addr *vaddr, 613 const union nf_inet_addr *vaddr,
658 __be16 vport, __u16 protocol) 614 __be16 vport, __u16 protocol, __u32 fwmark)
659{ 615{
660 struct ip_vs_dest *dest; 616 struct ip_vs_dest *dest;
661 struct ip_vs_service *svc; 617 struct ip_vs_service *svc;
662 618
663 svc = ip_vs_service_get(af, 0, protocol, vaddr, vport); 619 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
664 if (!svc) 620 if (!svc)
665 return NULL; 621 return NULL;
666 dest = ip_vs_lookup_dest(svc, daddr, dport); 622 dest = ip_vs_lookup_dest(svc, daddr, dport);
@@ -685,11 +641,12 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
685 __be16 dport) 641 __be16 dport)
686{ 642{
687 struct ip_vs_dest *dest, *nxt; 643 struct ip_vs_dest *dest, *nxt;
644 struct netns_ipvs *ipvs = net_ipvs(svc->net);
688 645
689 /* 646 /*
690 * Find the destination in trash 647 * Find the destination in trash
691 */ 648 */
692 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { 649 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
693 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " 650 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
694 "dest->refcnt=%d\n", 651 "dest->refcnt=%d\n",
695 dest->vfwmark, 652 dest->vfwmark,
@@ -720,6 +677,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
720 list_del(&dest->n_list); 677 list_del(&dest->n_list);
721 ip_vs_dst_reset(dest); 678 ip_vs_dst_reset(dest);
722 __ip_vs_unbind_svc(dest); 679 __ip_vs_unbind_svc(dest);
680 free_percpu(dest->stats.cpustats);
723 kfree(dest); 681 kfree(dest);
724 } 682 }
725 } 683 }
@@ -737,14 +695,16 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
737 * are expired, and the refcnt of each destination in the trash must 695 * are expired, and the refcnt of each destination in the trash must
738 * be 1, so we simply release them here. 696 * be 1, so we simply release them here.
739 */ 697 */
740static void ip_vs_trash_cleanup(void) 698static void ip_vs_trash_cleanup(struct net *net)
741{ 699{
742 struct ip_vs_dest *dest, *nxt; 700 struct ip_vs_dest *dest, *nxt;
701 struct netns_ipvs *ipvs = net_ipvs(net);
743 702
744 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { 703 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
745 list_del(&dest->n_list); 704 list_del(&dest->n_list);
746 ip_vs_dst_reset(dest); 705 ip_vs_dst_reset(dest);
747 __ip_vs_unbind_svc(dest); 706 __ip_vs_unbind_svc(dest);
707 free_percpu(dest->stats.cpustats);
748 kfree(dest); 708 kfree(dest);
749 } 709 }
750} 710}
@@ -768,6 +728,7 @@ static void
768__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, 728__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
769 struct ip_vs_dest_user_kern *udest, int add) 729 struct ip_vs_dest_user_kern *udest, int add)
770{ 730{
731 struct netns_ipvs *ipvs = net_ipvs(svc->net);
771 int conn_flags; 732 int conn_flags;
772 733
773 /* set the weight and the flags */ 734 /* set the weight and the flags */
@@ -780,12 +741,12 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
780 conn_flags |= IP_VS_CONN_F_NOOUTPUT; 741 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
781 } else { 742 } else {
782 /* 743 /*
783 * Put the real service in ip_vs_rtable if not present. 744 * Put the real service in rs_table if not present.
784 * For now only for NAT! 745 * For now only for NAT!
785 */ 746 */
786 write_lock_bh(&__ip_vs_rs_lock); 747 write_lock_bh(&ipvs->rs_lock);
787 ip_vs_rs_hash(dest); 748 ip_vs_rs_hash(ipvs, dest);
788 write_unlock_bh(&__ip_vs_rs_lock); 749 write_unlock_bh(&ipvs->rs_lock);
789 } 750 }
790 atomic_set(&dest->conn_flags, conn_flags); 751 atomic_set(&dest->conn_flags, conn_flags);
791 752
@@ -813,7 +774,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
813 spin_unlock(&dest->dst_lock); 774 spin_unlock(&dest->dst_lock);
814 775
815 if (add) 776 if (add)
816 ip_vs_new_estimator(&dest->stats); 777 ip_vs_new_estimator(svc->net, &dest->stats);
817 778
818 write_lock_bh(&__ip_vs_svc_lock); 779 write_lock_bh(&__ip_vs_svc_lock);
819 780
@@ -850,12 +811,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
850 atype = ipv6_addr_type(&udest->addr.in6); 811 atype = ipv6_addr_type(&udest->addr.in6);
851 if ((!(atype & IPV6_ADDR_UNICAST) || 812 if ((!(atype & IPV6_ADDR_UNICAST) ||
852 atype & IPV6_ADDR_LINKLOCAL) && 813 atype & IPV6_ADDR_LINKLOCAL) &&
853 !__ip_vs_addr_is_local_v6(&udest->addr.in6)) 814 !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
854 return -EINVAL; 815 return -EINVAL;
855 } else 816 } else
856#endif 817#endif
857 { 818 {
858 atype = inet_addr_type(&init_net, udest->addr.ip); 819 atype = inet_addr_type(svc->net, udest->addr.ip);
859 if (atype != RTN_LOCAL && atype != RTN_UNICAST) 820 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
860 return -EINVAL; 821 return -EINVAL;
861 } 822 }
@@ -865,6 +826,11 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
865 pr_err("%s(): no memory.\n", __func__); 826 pr_err("%s(): no memory.\n", __func__);
866 return -ENOMEM; 827 return -ENOMEM;
867 } 828 }
829 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
830 if (!dest->stats.cpustats) {
831 pr_err("%s() alloc_percpu failed\n", __func__);
832 goto err_alloc;
833 }
868 834
869 dest->af = svc->af; 835 dest->af = svc->af;
870 dest->protocol = svc->protocol; 836 dest->protocol = svc->protocol;
@@ -888,6 +854,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
888 854
889 LeaveFunction(2); 855 LeaveFunction(2);
890 return 0; 856 return 0;
857
858err_alloc:
859 kfree(dest);
860 return -ENOMEM;
891} 861}
892 862
893 863
@@ -1006,16 +976,18 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1006/* 976/*
1007 * Delete a destination (must be already unlinked from the service) 977 * Delete a destination (must be already unlinked from the service)
1008 */ 978 */
1009static void __ip_vs_del_dest(struct ip_vs_dest *dest) 979static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1010{ 980{
1011 ip_vs_kill_estimator(&dest->stats); 981 struct netns_ipvs *ipvs = net_ipvs(net);
982
983 ip_vs_kill_estimator(net, &dest->stats);
1012 984
1013 /* 985 /*
1014 * Remove it from the d-linked list with the real services. 986 * Remove it from the d-linked list with the real services.
1015 */ 987 */
1016 write_lock_bh(&__ip_vs_rs_lock); 988 write_lock_bh(&ipvs->rs_lock);
1017 ip_vs_rs_unhash(dest); 989 ip_vs_rs_unhash(dest);
1018 write_unlock_bh(&__ip_vs_rs_lock); 990 write_unlock_bh(&ipvs->rs_lock);
1019 991
1020 /* 992 /*
1021 * Decrease the refcnt of the dest, and free the dest 993 * Decrease the refcnt of the dest, and free the dest
@@ -1034,6 +1006,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1034 and only one user context can update virtual service at a 1006 and only one user context can update virtual service at a
1035 time, so the operation here is OK */ 1007 time, so the operation here is OK */
1036 atomic_dec(&dest->svc->refcnt); 1008 atomic_dec(&dest->svc->refcnt);
1009 free_percpu(dest->stats.cpustats);
1037 kfree(dest); 1010 kfree(dest);
1038 } else { 1011 } else {
1039 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " 1012 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
@@ -1041,7 +1014,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1041 IP_VS_DBG_ADDR(dest->af, &dest->addr), 1014 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1042 ntohs(dest->port), 1015 ntohs(dest->port),
1043 atomic_read(&dest->refcnt)); 1016 atomic_read(&dest->refcnt));
1044 list_add(&dest->n_list, &ip_vs_dest_trash); 1017 list_add(&dest->n_list, &ipvs->dest_trash);
1045 atomic_inc(&dest->refcnt); 1018 atomic_inc(&dest->refcnt);
1046 } 1019 }
1047} 1020}
@@ -1105,7 +1078,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1105 /* 1078 /*
1106 * Delete the destination 1079 * Delete the destination
1107 */ 1080 */
1108 __ip_vs_del_dest(dest); 1081 __ip_vs_del_dest(svc->net, dest);
1109 1082
1110 LeaveFunction(2); 1083 LeaveFunction(2);
1111 1084
@@ -1117,13 +1090,14 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1117 * Add a service into the service hash table 1090 * Add a service into the service hash table
1118 */ 1091 */
1119static int 1092static int
1120ip_vs_add_service(struct ip_vs_service_user_kern *u, 1093ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1121 struct ip_vs_service **svc_p) 1094 struct ip_vs_service **svc_p)
1122{ 1095{
1123 int ret = 0; 1096 int ret = 0;
1124 struct ip_vs_scheduler *sched = NULL; 1097 struct ip_vs_scheduler *sched = NULL;
1125 struct ip_vs_pe *pe = NULL; 1098 struct ip_vs_pe *pe = NULL;
1126 struct ip_vs_service *svc = NULL; 1099 struct ip_vs_service *svc = NULL;
1100 struct netns_ipvs *ipvs = net_ipvs(net);
1127 1101
1128 /* increase the module use count */ 1102 /* increase the module use count */
1129 ip_vs_use_count_inc(); 1103 ip_vs_use_count_inc();
@@ -1137,7 +1111,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1137 } 1111 }
1138 1112
1139 if (u->pe_name && *u->pe_name) { 1113 if (u->pe_name && *u->pe_name) {
1140 pe = ip_vs_pe_get(u->pe_name); 1114 pe = ip_vs_pe_getbyname(u->pe_name);
1141 if (pe == NULL) { 1115 if (pe == NULL) {
1142 pr_info("persistence engine module ip_vs_pe_%s " 1116 pr_info("persistence engine module ip_vs_pe_%s "
1143 "not found\n", u->pe_name); 1117 "not found\n", u->pe_name);
@@ -1159,6 +1133,11 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1159 ret = -ENOMEM; 1133 ret = -ENOMEM;
1160 goto out_err; 1134 goto out_err;
1161 } 1135 }
1136 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1137 if (!svc->stats.cpustats) {
1138 pr_err("%s() alloc_percpu failed\n", __func__);
1139 goto out_err;
1140 }
1162 1141
1163 /* I'm the first user of the service */ 1142 /* I'm the first user of the service */
1164 atomic_set(&svc->usecnt, 0); 1143 atomic_set(&svc->usecnt, 0);
@@ -1172,6 +1151,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1172 svc->flags = u->flags; 1151 svc->flags = u->flags;
1173 svc->timeout = u->timeout * HZ; 1152 svc->timeout = u->timeout * HZ;
1174 svc->netmask = u->netmask; 1153 svc->netmask = u->netmask;
1154 svc->net = net;
1175 1155
1176 INIT_LIST_HEAD(&svc->destinations); 1156 INIT_LIST_HEAD(&svc->destinations);
1177 rwlock_init(&svc->sched_lock); 1157 rwlock_init(&svc->sched_lock);
@@ -1189,15 +1169,15 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1189 1169
1190 /* Update the virtual service counters */ 1170 /* Update the virtual service counters */
1191 if (svc->port == FTPPORT) 1171 if (svc->port == FTPPORT)
1192 atomic_inc(&ip_vs_ftpsvc_counter); 1172 atomic_inc(&ipvs->ftpsvc_counter);
1193 else if (svc->port == 0) 1173 else if (svc->port == 0)
1194 atomic_inc(&ip_vs_nullsvc_counter); 1174 atomic_inc(&ipvs->nullsvc_counter);
1195 1175
1196 ip_vs_new_estimator(&svc->stats); 1176 ip_vs_new_estimator(net, &svc->stats);
1197 1177
1198 /* Count only IPv4 services for old get/setsockopt interface */ 1178 /* Count only IPv4 services for old get/setsockopt interface */
1199 if (svc->af == AF_INET) 1179 if (svc->af == AF_INET)
1200 ip_vs_num_services++; 1180 ipvs->num_services++;
1201 1181
1202 /* Hash the service into the service table */ 1182 /* Hash the service into the service table */
1203 write_lock_bh(&__ip_vs_svc_lock); 1183 write_lock_bh(&__ip_vs_svc_lock);
@@ -1207,6 +1187,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1207 *svc_p = svc; 1187 *svc_p = svc;
1208 return 0; 1188 return 0;
1209 1189
1190
1210 out_err: 1191 out_err:
1211 if (svc != NULL) { 1192 if (svc != NULL) {
1212 ip_vs_unbind_scheduler(svc); 1193 ip_vs_unbind_scheduler(svc);
@@ -1215,6 +1196,8 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1215 ip_vs_app_inc_put(svc->inc); 1196 ip_vs_app_inc_put(svc->inc);
1216 local_bh_enable(); 1197 local_bh_enable();
1217 } 1198 }
1199 if (svc->stats.cpustats)
1200 free_percpu(svc->stats.cpustats);
1218 kfree(svc); 1201 kfree(svc);
1219 } 1202 }
1220 ip_vs_scheduler_put(sched); 1203 ip_vs_scheduler_put(sched);
@@ -1248,7 +1231,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1248 old_sched = sched; 1231 old_sched = sched;
1249 1232
1250 if (u->pe_name && *u->pe_name) { 1233 if (u->pe_name && *u->pe_name) {
1251 pe = ip_vs_pe_get(u->pe_name); 1234 pe = ip_vs_pe_getbyname(u->pe_name);
1252 if (pe == NULL) { 1235 if (pe == NULL) {
1253 pr_info("persistence engine module ip_vs_pe_%s " 1236 pr_info("persistence engine module ip_vs_pe_%s "
1254 "not found\n", u->pe_name); 1237 "not found\n", u->pe_name);
@@ -1334,14 +1317,15 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1334 struct ip_vs_dest *dest, *nxt; 1317 struct ip_vs_dest *dest, *nxt;
1335 struct ip_vs_scheduler *old_sched; 1318 struct ip_vs_scheduler *old_sched;
1336 struct ip_vs_pe *old_pe; 1319 struct ip_vs_pe *old_pe;
1320 struct netns_ipvs *ipvs = net_ipvs(svc->net);
1337 1321
1338 pr_info("%s: enter\n", __func__); 1322 pr_info("%s: enter\n", __func__);
1339 1323
1340 /* Count only IPv4 services for old get/setsockopt interface */ 1324 /* Count only IPv4 services for old get/setsockopt interface */
1341 if (svc->af == AF_INET) 1325 if (svc->af == AF_INET)
1342 ip_vs_num_services--; 1326 ipvs->num_services--;
1343 1327
1344 ip_vs_kill_estimator(&svc->stats); 1328 ip_vs_kill_estimator(svc->net, &svc->stats);
1345 1329
1346 /* Unbind scheduler */ 1330 /* Unbind scheduler */
1347 old_sched = svc->scheduler; 1331 old_sched = svc->scheduler;
@@ -1364,16 +1348,16 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1364 */ 1348 */
1365 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { 1349 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1366 __ip_vs_unlink_dest(svc, dest, 0); 1350 __ip_vs_unlink_dest(svc, dest, 0);
1367 __ip_vs_del_dest(dest); 1351 __ip_vs_del_dest(svc->net, dest);
1368 } 1352 }
1369 1353
1370 /* 1354 /*
1371 * Update the virtual service counters 1355 * Update the virtual service counters
1372 */ 1356 */
1373 if (svc->port == FTPPORT) 1357 if (svc->port == FTPPORT)
1374 atomic_dec(&ip_vs_ftpsvc_counter); 1358 atomic_dec(&ipvs->ftpsvc_counter);
1375 else if (svc->port == 0) 1359 else if (svc->port == 0)
1376 atomic_dec(&ip_vs_nullsvc_counter); 1360 atomic_dec(&ipvs->nullsvc_counter);
1377 1361
1378 /* 1362 /*
1379 * Free the service if nobody refers to it 1363 * Free the service if nobody refers to it
@@ -1383,6 +1367,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1383 svc->fwmark, 1367 svc->fwmark,
1384 IP_VS_DBG_ADDR(svc->af, &svc->addr), 1368 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1385 ntohs(svc->port), atomic_read(&svc->usecnt)); 1369 ntohs(svc->port), atomic_read(&svc->usecnt));
1370 free_percpu(svc->stats.cpustats);
1386 kfree(svc); 1371 kfree(svc);
1387 } 1372 }
1388 1373
@@ -1428,17 +1413,19 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
1428/* 1413/*
1429 * Flush all the virtual services 1414 * Flush all the virtual services
1430 */ 1415 */
1431static int ip_vs_flush(void) 1416static int ip_vs_flush(struct net *net)
1432{ 1417{
1433 int idx; 1418 int idx;
1434 struct ip_vs_service *svc, *nxt; 1419 struct ip_vs_service *svc, *nxt;
1435 1420
1436 /* 1421 /*
1437 * Flush the service table hashed by <protocol,addr,port> 1422 * Flush the service table hashed by <netns,protocol,addr,port>
1438 */ 1423 */
1439 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1424 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1440 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) { 1425 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1441 ip_vs_unlink_service(svc); 1426 s_list) {
1427 if (net_eq(svc->net, net))
1428 ip_vs_unlink_service(svc);
1442 } 1429 }
1443 } 1430 }
1444 1431
@@ -1448,7 +1435,8 @@ static int ip_vs_flush(void)
1448 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1435 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1449 list_for_each_entry_safe(svc, nxt, 1436 list_for_each_entry_safe(svc, nxt,
1450 &ip_vs_svc_fwm_table[idx], f_list) { 1437 &ip_vs_svc_fwm_table[idx], f_list) {
1451 ip_vs_unlink_service(svc); 1438 if (net_eq(svc->net, net))
1439 ip_vs_unlink_service(svc);
1452 } 1440 }
1453 } 1441 }
1454 1442
@@ -1472,24 +1460,26 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)
1472 return 0; 1460 return 0;
1473} 1461}
1474 1462
1475static int ip_vs_zero_all(void) 1463static int ip_vs_zero_all(struct net *net)
1476{ 1464{
1477 int idx; 1465 int idx;
1478 struct ip_vs_service *svc; 1466 struct ip_vs_service *svc;
1479 1467
1480 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1468 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1481 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1469 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1482 ip_vs_zero_service(svc); 1470 if (net_eq(svc->net, net))
1471 ip_vs_zero_service(svc);
1483 } 1472 }
1484 } 1473 }
1485 1474
1486 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1475 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1487 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1476 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1488 ip_vs_zero_service(svc); 1477 if (net_eq(svc->net, net))
1478 ip_vs_zero_service(svc);
1489 } 1479 }
1490 } 1480 }
1491 1481
1492 ip_vs_zero_stats(&ip_vs_stats); 1482 ip_vs_zero_stats(net_ipvs(net)->tot_stats);
1493 return 0; 1483 return 0;
1494} 1484}
1495 1485
@@ -1498,6 +1488,7 @@ static int
1498proc_do_defense_mode(ctl_table *table, int write, 1488proc_do_defense_mode(ctl_table *table, int write,
1499 void __user *buffer, size_t *lenp, loff_t *ppos) 1489 void __user *buffer, size_t *lenp, loff_t *ppos)
1500{ 1490{
1491 struct net *net = current->nsproxy->net_ns;
1501 int *valp = table->data; 1492 int *valp = table->data;
1502 int val = *valp; 1493 int val = *valp;
1503 int rc; 1494 int rc;
@@ -1508,7 +1499,7 @@ proc_do_defense_mode(ctl_table *table, int write,
1508 /* Restore the correct value */ 1499 /* Restore the correct value */
1509 *valp = val; 1500 *valp = val;
1510 } else { 1501 } else {
1511 update_defense_level(); 1502 update_defense_level(net_ipvs(net));
1512 } 1503 }
1513 } 1504 }
1514 return rc; 1505 return rc;
@@ -1534,45 +1525,54 @@ proc_do_sync_threshold(ctl_table *table, int write,
1534 return rc; 1525 return rc;
1535} 1526}
1536 1527
1528static int
1529proc_do_sync_mode(ctl_table *table, int write,
1530 void __user *buffer, size_t *lenp, loff_t *ppos)
1531{
1532 int *valp = table->data;
1533 int val = *valp;
1534 int rc;
1535
1536 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1537 if (write && (*valp != val)) {
1538 if ((*valp < 0) || (*valp > 1)) {
1539 /* Restore the correct value */
1540 *valp = val;
1541 } else {
1542 struct net *net = current->nsproxy->net_ns;
1543 ip_vs_sync_switch_mode(net, val);
1544 }
1545 }
1546 return rc;
1547}
1537 1548
1538/* 1549/*
1539 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) 1550 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1551 * Do not change order or insert new entries without
1552 * align with netns init in __ip_vs_control_init()
1540 */ 1553 */
1541 1554
1542static struct ctl_table vs_vars[] = { 1555static struct ctl_table vs_vars[] = {
1543 { 1556 {
1544 .procname = "amemthresh", 1557 .procname = "amemthresh",
1545 .data = &sysctl_ip_vs_amemthresh,
1546 .maxlen = sizeof(int), 1558 .maxlen = sizeof(int),
1547 .mode = 0644, 1559 .mode = 0644,
1548 .proc_handler = proc_dointvec, 1560 .proc_handler = proc_dointvec,
1549 }, 1561 },
1550#ifdef CONFIG_IP_VS_DEBUG
1551 {
1552 .procname = "debug_level",
1553 .data = &sysctl_ip_vs_debug_level,
1554 .maxlen = sizeof(int),
1555 .mode = 0644,
1556 .proc_handler = proc_dointvec,
1557 },
1558#endif
1559 { 1562 {
1560 .procname = "am_droprate", 1563 .procname = "am_droprate",
1561 .data = &sysctl_ip_vs_am_droprate,
1562 .maxlen = sizeof(int), 1564 .maxlen = sizeof(int),
1563 .mode = 0644, 1565 .mode = 0644,
1564 .proc_handler = proc_dointvec, 1566 .proc_handler = proc_dointvec,
1565 }, 1567 },
1566 { 1568 {
1567 .procname = "drop_entry", 1569 .procname = "drop_entry",
1568 .data = &sysctl_ip_vs_drop_entry,
1569 .maxlen = sizeof(int), 1570 .maxlen = sizeof(int),
1570 .mode = 0644, 1571 .mode = 0644,
1571 .proc_handler = proc_do_defense_mode, 1572 .proc_handler = proc_do_defense_mode,
1572 }, 1573 },
1573 { 1574 {
1574 .procname = "drop_packet", 1575 .procname = "drop_packet",
1575 .data = &sysctl_ip_vs_drop_packet,
1576 .maxlen = sizeof(int), 1576 .maxlen = sizeof(int),
1577 .mode = 0644, 1577 .mode = 0644,
1578 .proc_handler = proc_do_defense_mode, 1578 .proc_handler = proc_do_defense_mode,
@@ -1580,7 +1580,6 @@ static struct ctl_table vs_vars[] = {
1580#ifdef CONFIG_IP_VS_NFCT 1580#ifdef CONFIG_IP_VS_NFCT
1581 { 1581 {
1582 .procname = "conntrack", 1582 .procname = "conntrack",
1583 .data = &sysctl_ip_vs_conntrack,
1584 .maxlen = sizeof(int), 1583 .maxlen = sizeof(int),
1585 .mode = 0644, 1584 .mode = 0644,
1586 .proc_handler = &proc_dointvec, 1585 .proc_handler = &proc_dointvec,
@@ -1588,18 +1587,62 @@ static struct ctl_table vs_vars[] = {
1588#endif 1587#endif
1589 { 1588 {
1590 .procname = "secure_tcp", 1589 .procname = "secure_tcp",
1591 .data = &sysctl_ip_vs_secure_tcp,
1592 .maxlen = sizeof(int), 1590 .maxlen = sizeof(int),
1593 .mode = 0644, 1591 .mode = 0644,
1594 .proc_handler = proc_do_defense_mode, 1592 .proc_handler = proc_do_defense_mode,
1595 }, 1593 },
1596 { 1594 {
1597 .procname = "snat_reroute", 1595 .procname = "snat_reroute",
1598 .data = &sysctl_ip_vs_snat_reroute,
1599 .maxlen = sizeof(int), 1596 .maxlen = sizeof(int),
1600 .mode = 0644, 1597 .mode = 0644,
1601 .proc_handler = &proc_dointvec, 1598 .proc_handler = &proc_dointvec,
1602 }, 1599 },
1600 {
1601 .procname = "sync_version",
1602 .maxlen = sizeof(int),
1603 .mode = 0644,
1604 .proc_handler = &proc_do_sync_mode,
1605 },
1606 {
1607 .procname = "cache_bypass",
1608 .maxlen = sizeof(int),
1609 .mode = 0644,
1610 .proc_handler = proc_dointvec,
1611 },
1612 {
1613 .procname = "expire_nodest_conn",
1614 .maxlen = sizeof(int),
1615 .mode = 0644,
1616 .proc_handler = proc_dointvec,
1617 },
1618 {
1619 .procname = "expire_quiescent_template",
1620 .maxlen = sizeof(int),
1621 .mode = 0644,
1622 .proc_handler = proc_dointvec,
1623 },
1624 {
1625 .procname = "sync_threshold",
1626 .maxlen =
1627 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1628 .mode = 0644,
1629 .proc_handler = proc_do_sync_threshold,
1630 },
1631 {
1632 .procname = "nat_icmp_send",
1633 .maxlen = sizeof(int),
1634 .mode = 0644,
1635 .proc_handler = proc_dointvec,
1636 },
1637#ifdef CONFIG_IP_VS_DEBUG
1638 {
1639 .procname = "debug_level",
1640 .data = &sysctl_ip_vs_debug_level,
1641 .maxlen = sizeof(int),
1642 .mode = 0644,
1643 .proc_handler = proc_dointvec,
1644 },
1645#endif
1603#if 0 1646#if 0
1604 { 1647 {
1605 .procname = "timeout_established", 1648 .procname = "timeout_established",
@@ -1686,41 +1729,6 @@ static struct ctl_table vs_vars[] = {
1686 .proc_handler = proc_dointvec_jiffies, 1729 .proc_handler = proc_dointvec_jiffies,
1687 }, 1730 },
1688#endif 1731#endif
1689 {
1690 .procname = "cache_bypass",
1691 .data = &sysctl_ip_vs_cache_bypass,
1692 .maxlen = sizeof(int),
1693 .mode = 0644,
1694 .proc_handler = proc_dointvec,
1695 },
1696 {
1697 .procname = "expire_nodest_conn",
1698 .data = &sysctl_ip_vs_expire_nodest_conn,
1699 .maxlen = sizeof(int),
1700 .mode = 0644,
1701 .proc_handler = proc_dointvec,
1702 },
1703 {
1704 .procname = "expire_quiescent_template",
1705 .data = &sysctl_ip_vs_expire_quiescent_template,
1706 .maxlen = sizeof(int),
1707 .mode = 0644,
1708 .proc_handler = proc_dointvec,
1709 },
1710 {
1711 .procname = "sync_threshold",
1712 .data = &sysctl_ip_vs_sync_threshold,
1713 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1714 .mode = 0644,
1715 .proc_handler = proc_do_sync_threshold,
1716 },
1717 {
1718 .procname = "nat_icmp_send",
1719 .data = &sysctl_ip_vs_nat_icmp_send,
1720 .maxlen = sizeof(int),
1721 .mode = 0644,
1722 .proc_handler = proc_dointvec,
1723 },
1724 { } 1732 { }
1725}; 1733};
1726 1734
@@ -1732,11 +1740,10 @@ const struct ctl_path net_vs_ctl_path[] = {
1732}; 1740};
1733EXPORT_SYMBOL_GPL(net_vs_ctl_path); 1741EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1734 1742
1735static struct ctl_table_header * sysctl_header;
1736
1737#ifdef CONFIG_PROC_FS 1743#ifdef CONFIG_PROC_FS
1738 1744
1739struct ip_vs_iter { 1745struct ip_vs_iter {
1746 struct seq_net_private p; /* Do not move this, netns depends upon it*/
1740 struct list_head *table; 1747 struct list_head *table;
1741 int bucket; 1748 int bucket;
1742}; 1749};
@@ -1763,6 +1770,7 @@ static inline const char *ip_vs_fwd_name(unsigned flags)
1763/* Get the Nth entry in the two lists */ 1770/* Get the Nth entry in the two lists */
1764static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) 1771static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1765{ 1772{
1773 struct net *net = seq_file_net(seq);
1766 struct ip_vs_iter *iter = seq->private; 1774 struct ip_vs_iter *iter = seq->private;
1767 int idx; 1775 int idx;
1768 struct ip_vs_service *svc; 1776 struct ip_vs_service *svc;
@@ -1770,7 +1778,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1770 /* look in hash by protocol */ 1778 /* look in hash by protocol */
1771 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1779 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1772 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1780 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1773 if (pos-- == 0){ 1781 if (net_eq(svc->net, net) && pos-- == 0) {
1774 iter->table = ip_vs_svc_table; 1782 iter->table = ip_vs_svc_table;
1775 iter->bucket = idx; 1783 iter->bucket = idx;
1776 return svc; 1784 return svc;
@@ -1781,7 +1789,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1781 /* keep looking in fwmark */ 1789 /* keep looking in fwmark */
1782 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1790 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1783 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1791 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1784 if (pos-- == 0) { 1792 if (net_eq(svc->net, net) && pos-- == 0) {
1785 iter->table = ip_vs_svc_fwm_table; 1793 iter->table = ip_vs_svc_fwm_table;
1786 iter->bucket = idx; 1794 iter->bucket = idx;
1787 return svc; 1795 return svc;
@@ -1935,7 +1943,7 @@ static const struct seq_operations ip_vs_info_seq_ops = {
1935 1943
1936static int ip_vs_info_open(struct inode *inode, struct file *file) 1944static int ip_vs_info_open(struct inode *inode, struct file *file)
1937{ 1945{
1938 return seq_open_private(file, &ip_vs_info_seq_ops, 1946 return seq_open_net(inode, file, &ip_vs_info_seq_ops,
1939 sizeof(struct ip_vs_iter)); 1947 sizeof(struct ip_vs_iter));
1940} 1948}
1941 1949
@@ -1949,13 +1957,11 @@ static const struct file_operations ip_vs_info_fops = {
1949 1957
1950#endif 1958#endif
1951 1959
1952struct ip_vs_stats ip_vs_stats = {
1953 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1954};
1955
1956#ifdef CONFIG_PROC_FS 1960#ifdef CONFIG_PROC_FS
1957static int ip_vs_stats_show(struct seq_file *seq, void *v) 1961static int ip_vs_stats_show(struct seq_file *seq, void *v)
1958{ 1962{
1963 struct net *net = seq_file_single_net(seq);
1964 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
1959 1965
1960/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 1966/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1961 seq_puts(seq, 1967 seq_puts(seq,
@@ -1963,29 +1969,29 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
1963 seq_printf(seq, 1969 seq_printf(seq,
1964 " Conns Packets Packets Bytes Bytes\n"); 1970 " Conns Packets Packets Bytes Bytes\n");
1965 1971
1966 spin_lock_bh(&ip_vs_stats.lock); 1972 spin_lock_bh(&tot_stats->lock);
1967 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns, 1973 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
1968 ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts, 1974 tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
1969 (unsigned long long) ip_vs_stats.ustats.inbytes, 1975 (unsigned long long) tot_stats->ustats.inbytes,
1970 (unsigned long long) ip_vs_stats.ustats.outbytes); 1976 (unsigned long long) tot_stats->ustats.outbytes);
1971 1977
1972/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 1978/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1973 seq_puts(seq, 1979 seq_puts(seq,
1974 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 1980 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1975 seq_printf(seq,"%8X %8X %8X %16X %16X\n", 1981 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1976 ip_vs_stats.ustats.cps, 1982 tot_stats->ustats.cps,
1977 ip_vs_stats.ustats.inpps, 1983 tot_stats->ustats.inpps,
1978 ip_vs_stats.ustats.outpps, 1984 tot_stats->ustats.outpps,
1979 ip_vs_stats.ustats.inbps, 1985 tot_stats->ustats.inbps,
1980 ip_vs_stats.ustats.outbps); 1986 tot_stats->ustats.outbps);
1981 spin_unlock_bh(&ip_vs_stats.lock); 1987 spin_unlock_bh(&tot_stats->lock);
1982 1988
1983 return 0; 1989 return 0;
1984} 1990}
1985 1991
1986static int ip_vs_stats_seq_open(struct inode *inode, struct file *file) 1992static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1987{ 1993{
1988 return single_open(file, ip_vs_stats_show, NULL); 1994 return single_open_net(inode, file, ip_vs_stats_show);
1989} 1995}
1990 1996
1991static const struct file_operations ip_vs_stats_fops = { 1997static const struct file_operations ip_vs_stats_fops = {
@@ -1996,13 +2002,68 @@ static const struct file_operations ip_vs_stats_fops = {
1996 .release = single_release, 2002 .release = single_release,
1997}; 2003};
1998 2004
2005static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2006{
2007 struct net *net = seq_file_single_net(seq);
2008 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
2009 int i;
2010
2011/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2012 seq_puts(seq,
2013 " Total Incoming Outgoing Incoming Outgoing\n");
2014 seq_printf(seq,
2015 "CPU Conns Packets Packets Bytes Bytes\n");
2016
2017 for_each_possible_cpu(i) {
2018 struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
2019 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2020 i, u->ustats.conns, u->ustats.inpkts,
2021 u->ustats.outpkts, (__u64)u->ustats.inbytes,
2022 (__u64)u->ustats.outbytes);
2023 }
2024
2025 spin_lock_bh(&tot_stats->lock);
2026 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2027 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2028 tot_stats->ustats.outpkts,
2029 (unsigned long long) tot_stats->ustats.inbytes,
2030 (unsigned long long) tot_stats->ustats.outbytes);
2031
2032/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2033 seq_puts(seq,
2034 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2035 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
2036 tot_stats->ustats.cps,
2037 tot_stats->ustats.inpps,
2038 tot_stats->ustats.outpps,
2039 tot_stats->ustats.inbps,
2040 tot_stats->ustats.outbps);
2041 spin_unlock_bh(&tot_stats->lock);
2042
2043 return 0;
2044}
2045
2046static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2047{
2048 return single_open_net(inode, file, ip_vs_stats_percpu_show);
2049}
2050
2051static const struct file_operations ip_vs_stats_percpu_fops = {
2052 .owner = THIS_MODULE,
2053 .open = ip_vs_stats_percpu_seq_open,
2054 .read = seq_read,
2055 .llseek = seq_lseek,
2056 .release = single_release,
2057};
1999#endif 2058#endif
2000 2059
2001/* 2060/*
2002 * Set timeout values for tcp tcpfin udp in the timeout_table. 2061 * Set timeout values for tcp tcpfin udp in the timeout_table.
2003 */ 2062 */
2004static int ip_vs_set_timeout(struct ip_vs_timeout_user *u) 2063static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2005{ 2064{
2065 struct ip_vs_proto_data *pd;
2066
2006 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", 2067 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2007 u->tcp_timeout, 2068 u->tcp_timeout,
2008 u->tcp_fin_timeout, 2069 u->tcp_fin_timeout,
@@ -2010,19 +2071,22 @@ static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
2010 2071
2011#ifdef CONFIG_IP_VS_PROTO_TCP 2072#ifdef CONFIG_IP_VS_PROTO_TCP
2012 if (u->tcp_timeout) { 2073 if (u->tcp_timeout) {
2013 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] 2074 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2075 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2014 = u->tcp_timeout * HZ; 2076 = u->tcp_timeout * HZ;
2015 } 2077 }
2016 2078
2017 if (u->tcp_fin_timeout) { 2079 if (u->tcp_fin_timeout) {
2018 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] 2080 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2081 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2019 = u->tcp_fin_timeout * HZ; 2082 = u->tcp_fin_timeout * HZ;
2020 } 2083 }
2021#endif 2084#endif
2022 2085
2023#ifdef CONFIG_IP_VS_PROTO_UDP 2086#ifdef CONFIG_IP_VS_PROTO_UDP
2024 if (u->udp_timeout) { 2087 if (u->udp_timeout) {
2025 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] 2088 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2089 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2026 = u->udp_timeout * HZ; 2090 = u->udp_timeout * HZ;
2027 } 2091 }
2028#endif 2092#endif
@@ -2087,6 +2151,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2087static int 2151static int
2088do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) 2152do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2089{ 2153{
2154 struct net *net = sock_net(sk);
2090 int ret; 2155 int ret;
2091 unsigned char arg[MAX_ARG_LEN]; 2156 unsigned char arg[MAX_ARG_LEN];
2092 struct ip_vs_service_user *usvc_compat; 2157 struct ip_vs_service_user *usvc_compat;
@@ -2121,19 +2186,20 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2121 2186
2122 if (cmd == IP_VS_SO_SET_FLUSH) { 2187 if (cmd == IP_VS_SO_SET_FLUSH) {
2123 /* Flush the virtual service */ 2188 /* Flush the virtual service */
2124 ret = ip_vs_flush(); 2189 ret = ip_vs_flush(net);
2125 goto out_unlock; 2190 goto out_unlock;
2126 } else if (cmd == IP_VS_SO_SET_TIMEOUT) { 2191 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2127 /* Set timeout values for (tcp tcpfin udp) */ 2192 /* Set timeout values for (tcp tcpfin udp) */
2128 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg); 2193 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2129 goto out_unlock; 2194 goto out_unlock;
2130 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { 2195 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2131 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; 2196 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2132 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid); 2197 ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2198 dm->syncid);
2133 goto out_unlock; 2199 goto out_unlock;
2134 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) { 2200 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2135 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; 2201 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2136 ret = stop_sync_thread(dm->state); 2202 ret = stop_sync_thread(net, dm->state);
2137 goto out_unlock; 2203 goto out_unlock;
2138 } 2204 }
2139 2205
@@ -2148,7 +2214,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2148 if (cmd == IP_VS_SO_SET_ZERO) { 2214 if (cmd == IP_VS_SO_SET_ZERO) {
2149 /* if no service address is set, zero counters in all */ 2215 /* if no service address is set, zero counters in all */
2150 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { 2216 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2151 ret = ip_vs_zero_all(); 2217 ret = ip_vs_zero_all(net);
2152 goto out_unlock; 2218 goto out_unlock;
2153 } 2219 }
2154 } 2220 }
@@ -2165,10 +2231,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2165 2231
2166 /* Lookup the exact service by <protocol, addr, port> or fwmark */ 2232 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2167 if (usvc.fwmark == 0) 2233 if (usvc.fwmark == 0)
2168 svc = __ip_vs_service_find(usvc.af, usvc.protocol, 2234 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2169 &usvc.addr, usvc.port); 2235 &usvc.addr, usvc.port);
2170 else 2236 else
2171 svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark); 2237 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2172 2238
2173 if (cmd != IP_VS_SO_SET_ADD 2239 if (cmd != IP_VS_SO_SET_ADD
2174 && (svc == NULL || svc->protocol != usvc.protocol)) { 2240 && (svc == NULL || svc->protocol != usvc.protocol)) {
@@ -2181,7 +2247,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2181 if (svc != NULL) 2247 if (svc != NULL)
2182 ret = -EEXIST; 2248 ret = -EEXIST;
2183 else 2249 else
2184 ret = ip_vs_add_service(&usvc, &svc); 2250 ret = ip_vs_add_service(net, &usvc, &svc);
2185 break; 2251 break;
2186 case IP_VS_SO_SET_EDIT: 2252 case IP_VS_SO_SET_EDIT:
2187 ret = ip_vs_edit_service(svc, &usvc); 2253 ret = ip_vs_edit_service(svc, &usvc);
@@ -2241,7 +2307,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2241} 2307}
2242 2308
2243static inline int 2309static inline int
2244__ip_vs_get_service_entries(const struct ip_vs_get_services *get, 2310__ip_vs_get_service_entries(struct net *net,
2311 const struct ip_vs_get_services *get,
2245 struct ip_vs_get_services __user *uptr) 2312 struct ip_vs_get_services __user *uptr)
2246{ 2313{
2247 int idx, count=0; 2314 int idx, count=0;
@@ -2252,7 +2319,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2252 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2319 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2253 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 2320 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2254 /* Only expose IPv4 entries to old interface */ 2321 /* Only expose IPv4 entries to old interface */
2255 if (svc->af != AF_INET) 2322 if (svc->af != AF_INET || !net_eq(svc->net, net))
2256 continue; 2323 continue;
2257 2324
2258 if (count >= get->num_services) 2325 if (count >= get->num_services)
@@ -2271,7 +2338,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2271 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2338 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2272 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 2339 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2273 /* Only expose IPv4 entries to old interface */ 2340 /* Only expose IPv4 entries to old interface */
2274 if (svc->af != AF_INET) 2341 if (svc->af != AF_INET || !net_eq(svc->net, net))
2275 continue; 2342 continue;
2276 2343
2277 if (count >= get->num_services) 2344 if (count >= get->num_services)
@@ -2291,7 +2358,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2291} 2358}
2292 2359
2293static inline int 2360static inline int
2294__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, 2361__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2295 struct ip_vs_get_dests __user *uptr) 2362 struct ip_vs_get_dests __user *uptr)
2296{ 2363{
2297 struct ip_vs_service *svc; 2364 struct ip_vs_service *svc;
@@ -2299,9 +2366,9 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2299 int ret = 0; 2366 int ret = 0;
2300 2367
2301 if (get->fwmark) 2368 if (get->fwmark)
2302 svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark); 2369 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2303 else 2370 else
2304 svc = __ip_vs_service_find(AF_INET, get->protocol, &addr, 2371 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2305 get->port); 2372 get->port);
2306 2373
2307 if (svc) { 2374 if (svc) {
@@ -2336,17 +2403,19 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2336} 2403}
2337 2404
2338static inline void 2405static inline void
2339__ip_vs_get_timeouts(struct ip_vs_timeout_user *u) 2406__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2340{ 2407{
2408 struct ip_vs_proto_data *pd;
2409
2341#ifdef CONFIG_IP_VS_PROTO_TCP 2410#ifdef CONFIG_IP_VS_PROTO_TCP
2342 u->tcp_timeout = 2411 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2343 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; 2412 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2344 u->tcp_fin_timeout = 2413 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2345 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2346#endif 2414#endif
2347#ifdef CONFIG_IP_VS_PROTO_UDP 2415#ifdef CONFIG_IP_VS_PROTO_UDP
2416 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2348 u->udp_timeout = 2417 u->udp_timeout =
2349 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ; 2418 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2350#endif 2419#endif
2351} 2420}
2352 2421
@@ -2375,7 +2444,10 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2375 unsigned char arg[128]; 2444 unsigned char arg[128];
2376 int ret = 0; 2445 int ret = 0;
2377 unsigned int copylen; 2446 unsigned int copylen;
2447 struct net *net = sock_net(sk);
2448 struct netns_ipvs *ipvs = net_ipvs(net);
2378 2449
2450 BUG_ON(!net);
2379 if (!capable(CAP_NET_ADMIN)) 2451 if (!capable(CAP_NET_ADMIN))
2380 return -EPERM; 2452 return -EPERM;
2381 2453
@@ -2418,7 +2490,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2418 struct ip_vs_getinfo info; 2490 struct ip_vs_getinfo info;
2419 info.version = IP_VS_VERSION_CODE; 2491 info.version = IP_VS_VERSION_CODE;
2420 info.size = ip_vs_conn_tab_size; 2492 info.size = ip_vs_conn_tab_size;
2421 info.num_services = ip_vs_num_services; 2493 info.num_services = ipvs->num_services;
2422 if (copy_to_user(user, &info, sizeof(info)) != 0) 2494 if (copy_to_user(user, &info, sizeof(info)) != 0)
2423 ret = -EFAULT; 2495 ret = -EFAULT;
2424 } 2496 }
@@ -2437,7 +2509,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2437 ret = -EINVAL; 2509 ret = -EINVAL;
2438 goto out; 2510 goto out;
2439 } 2511 }
2440 ret = __ip_vs_get_service_entries(get, user); 2512 ret = __ip_vs_get_service_entries(net, get, user);
2441 } 2513 }
2442 break; 2514 break;
2443 2515
@@ -2450,10 +2522,11 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2450 entry = (struct ip_vs_service_entry *)arg; 2522 entry = (struct ip_vs_service_entry *)arg;
2451 addr.ip = entry->addr; 2523 addr.ip = entry->addr;
2452 if (entry->fwmark) 2524 if (entry->fwmark)
2453 svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark); 2525 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2454 else 2526 else
2455 svc = __ip_vs_service_find(AF_INET, entry->protocol, 2527 svc = __ip_vs_service_find(net, AF_INET,
2456 &addr, entry->port); 2528 entry->protocol, &addr,
2529 entry->port);
2457 if (svc) { 2530 if (svc) {
2458 ip_vs_copy_service(entry, svc); 2531 ip_vs_copy_service(entry, svc);
2459 if (copy_to_user(user, entry, sizeof(*entry)) != 0) 2532 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
@@ -2476,7 +2549,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2476 ret = -EINVAL; 2549 ret = -EINVAL;
2477 goto out; 2550 goto out;
2478 } 2551 }
2479 ret = __ip_vs_get_dest_entries(get, user); 2552 ret = __ip_vs_get_dest_entries(net, get, user);
2480 } 2553 }
2481 break; 2554 break;
2482 2555
@@ -2484,7 +2557,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2484 { 2557 {
2485 struct ip_vs_timeout_user t; 2558 struct ip_vs_timeout_user t;
2486 2559
2487 __ip_vs_get_timeouts(&t); 2560 __ip_vs_get_timeouts(net, &t);
2488 if (copy_to_user(user, &t, sizeof(t)) != 0) 2561 if (copy_to_user(user, &t, sizeof(t)) != 0)
2489 ret = -EFAULT; 2562 ret = -EFAULT;
2490 } 2563 }
@@ -2495,15 +2568,17 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2495 struct ip_vs_daemon_user d[2]; 2568 struct ip_vs_daemon_user d[2];
2496 2569
2497 memset(&d, 0, sizeof(d)); 2570 memset(&d, 0, sizeof(d));
2498 if (ip_vs_sync_state & IP_VS_STATE_MASTER) { 2571 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2499 d[0].state = IP_VS_STATE_MASTER; 2572 d[0].state = IP_VS_STATE_MASTER;
2500 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn)); 2573 strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2501 d[0].syncid = ip_vs_master_syncid; 2574 sizeof(d[0].mcast_ifn));
2575 d[0].syncid = ipvs->master_syncid;
2502 } 2576 }
2503 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) { 2577 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2504 d[1].state = IP_VS_STATE_BACKUP; 2578 d[1].state = IP_VS_STATE_BACKUP;
2505 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn)); 2579 strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2506 d[1].syncid = ip_vs_backup_syncid; 2580 sizeof(d[1].mcast_ifn));
2581 d[1].syncid = ipvs->backup_syncid;
2507 } 2582 }
2508 if (copy_to_user(user, &d, sizeof(d)) != 0) 2583 if (copy_to_user(user, &d, sizeof(d)) != 0)
2509 ret = -EFAULT; 2584 ret = -EFAULT;
@@ -2542,6 +2617,7 @@ static struct genl_family ip_vs_genl_family = {
2542 .name = IPVS_GENL_NAME, 2617 .name = IPVS_GENL_NAME,
2543 .version = IPVS_GENL_VERSION, 2618 .version = IPVS_GENL_VERSION,
2544 .maxattr = IPVS_CMD_MAX, 2619 .maxattr = IPVS_CMD_MAX,
2620 .netnsok = true, /* Make ipvsadm to work on netns */
2545}; 2621};
2546 2622
2547/* Policy used for first-level command attributes */ 2623/* Policy used for first-level command attributes */
@@ -2696,11 +2772,12 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
2696 int idx = 0, i; 2772 int idx = 0, i;
2697 int start = cb->args[0]; 2773 int start = cb->args[0];
2698 struct ip_vs_service *svc; 2774 struct ip_vs_service *svc;
2775 struct net *net = skb_sknet(skb);
2699 2776
2700 mutex_lock(&__ip_vs_mutex); 2777 mutex_lock(&__ip_vs_mutex);
2701 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 2778 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2702 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { 2779 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2703 if (++idx <= start) 2780 if (++idx <= start || !net_eq(svc->net, net))
2704 continue; 2781 continue;
2705 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 2782 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2706 idx--; 2783 idx--;
@@ -2711,7 +2788,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
2711 2788
2712 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 2789 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2713 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { 2790 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2714 if (++idx <= start) 2791 if (++idx <= start || !net_eq(svc->net, net))
2715 continue; 2792 continue;
2716 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 2793 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2717 idx--; 2794 idx--;
@@ -2727,7 +2804,8 @@ nla_put_failure:
2727 return skb->len; 2804 return skb->len;
2728} 2805}
2729 2806
2730static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, 2807static int ip_vs_genl_parse_service(struct net *net,
2808 struct ip_vs_service_user_kern *usvc,
2731 struct nlattr *nla, int full_entry, 2809 struct nlattr *nla, int full_entry,
2732 struct ip_vs_service **ret_svc) 2810 struct ip_vs_service **ret_svc)
2733{ 2811{
@@ -2770,9 +2848,9 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2770 } 2848 }
2771 2849
2772 if (usvc->fwmark) 2850 if (usvc->fwmark)
2773 svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark); 2851 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
2774 else 2852 else
2775 svc = __ip_vs_service_find(usvc->af, usvc->protocol, 2853 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
2776 &usvc->addr, usvc->port); 2854 &usvc->addr, usvc->port);
2777 *ret_svc = svc; 2855 *ret_svc = svc;
2778 2856
@@ -2809,13 +2887,14 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2809 return 0; 2887 return 0;
2810} 2888}
2811 2889
2812static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla) 2890static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
2891 struct nlattr *nla)
2813{ 2892{
2814 struct ip_vs_service_user_kern usvc; 2893 struct ip_vs_service_user_kern usvc;
2815 struct ip_vs_service *svc; 2894 struct ip_vs_service *svc;
2816 int ret; 2895 int ret;
2817 2896
2818 ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc); 2897 ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
2819 return ret ? ERR_PTR(ret) : svc; 2898 return ret ? ERR_PTR(ret) : svc;
2820} 2899}
2821 2900
@@ -2883,6 +2962,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2883 struct ip_vs_service *svc; 2962 struct ip_vs_service *svc;
2884 struct ip_vs_dest *dest; 2963 struct ip_vs_dest *dest;
2885 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; 2964 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2965 struct net *net = skb_sknet(skb);
2886 2966
2887 mutex_lock(&__ip_vs_mutex); 2967 mutex_lock(&__ip_vs_mutex);
2888 2968
@@ -2891,7 +2971,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2891 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) 2971 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2892 goto out_err; 2972 goto out_err;
2893 2973
2894 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]); 2974
2975 svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
2895 if (IS_ERR(svc) || svc == NULL) 2976 if (IS_ERR(svc) || svc == NULL)
2896 goto out_err; 2977 goto out_err;
2897 2978
@@ -3005,20 +3086,23 @@ nla_put_failure:
3005static int ip_vs_genl_dump_daemons(struct sk_buff *skb, 3086static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3006 struct netlink_callback *cb) 3087 struct netlink_callback *cb)
3007{ 3088{
3089 struct net *net = skb_net(skb);
3090 struct netns_ipvs *ipvs = net_ipvs(net);
3091
3008 mutex_lock(&__ip_vs_mutex); 3092 mutex_lock(&__ip_vs_mutex);
3009 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { 3093 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3010 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, 3094 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3011 ip_vs_master_mcast_ifn, 3095 ipvs->master_mcast_ifn,
3012 ip_vs_master_syncid, cb) < 0) 3096 ipvs->master_syncid, cb) < 0)
3013 goto nla_put_failure; 3097 goto nla_put_failure;
3014 3098
3015 cb->args[0] = 1; 3099 cb->args[0] = 1;
3016 } 3100 }
3017 3101
3018 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { 3102 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3019 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, 3103 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3020 ip_vs_backup_mcast_ifn, 3104 ipvs->backup_mcast_ifn,
3021 ip_vs_backup_syncid, cb) < 0) 3105 ipvs->backup_syncid, cb) < 0)
3022 goto nla_put_failure; 3106 goto nla_put_failure;
3023 3107
3024 cb->args[1] = 1; 3108 cb->args[1] = 1;
@@ -3030,31 +3114,33 @@ nla_put_failure:
3030 return skb->len; 3114 return skb->len;
3031} 3115}
3032 3116
3033static int ip_vs_genl_new_daemon(struct nlattr **attrs) 3117static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3034{ 3118{
3035 if (!(attrs[IPVS_DAEMON_ATTR_STATE] && 3119 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3036 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && 3120 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3037 attrs[IPVS_DAEMON_ATTR_SYNC_ID])) 3121 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3038 return -EINVAL; 3122 return -EINVAL;
3039 3123
3040 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), 3124 return start_sync_thread(net,
3125 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3041 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), 3126 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3042 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); 3127 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3043} 3128}
3044 3129
3045static int ip_vs_genl_del_daemon(struct nlattr **attrs) 3130static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3046{ 3131{
3047 if (!attrs[IPVS_DAEMON_ATTR_STATE]) 3132 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3048 return -EINVAL; 3133 return -EINVAL;
3049 3134
3050 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); 3135 return stop_sync_thread(net,
3136 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3051} 3137}
3052 3138
3053static int ip_vs_genl_set_config(struct nlattr **attrs) 3139static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3054{ 3140{
3055 struct ip_vs_timeout_user t; 3141 struct ip_vs_timeout_user t;
3056 3142
3057 __ip_vs_get_timeouts(&t); 3143 __ip_vs_get_timeouts(net, &t);
3058 3144
3059 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) 3145 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3060 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); 3146 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
@@ -3066,7 +3152,7 @@ static int ip_vs_genl_set_config(struct nlattr **attrs)
3066 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) 3152 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3067 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); 3153 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3068 3154
3069 return ip_vs_set_timeout(&t); 3155 return ip_vs_set_timeout(net, &t);
3070} 3156}
3071 3157
3072static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) 3158static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
@@ -3076,16 +3162,20 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3076 struct ip_vs_dest_user_kern udest; 3162 struct ip_vs_dest_user_kern udest;
3077 int ret = 0, cmd; 3163 int ret = 0, cmd;
3078 int need_full_svc = 0, need_full_dest = 0; 3164 int need_full_svc = 0, need_full_dest = 0;
3165 struct net *net;
3166 struct netns_ipvs *ipvs;
3079 3167
3168 net = skb_sknet(skb);
3169 ipvs = net_ipvs(net);
3080 cmd = info->genlhdr->cmd; 3170 cmd = info->genlhdr->cmd;
3081 3171
3082 mutex_lock(&__ip_vs_mutex); 3172 mutex_lock(&__ip_vs_mutex);
3083 3173
3084 if (cmd == IPVS_CMD_FLUSH) { 3174 if (cmd == IPVS_CMD_FLUSH) {
3085 ret = ip_vs_flush(); 3175 ret = ip_vs_flush(net);
3086 goto out; 3176 goto out;
3087 } else if (cmd == IPVS_CMD_SET_CONFIG) { 3177 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3088 ret = ip_vs_genl_set_config(info->attrs); 3178 ret = ip_vs_genl_set_config(net, info->attrs);
3089 goto out; 3179 goto out;
3090 } else if (cmd == IPVS_CMD_NEW_DAEMON || 3180 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3091 cmd == IPVS_CMD_DEL_DAEMON) { 3181 cmd == IPVS_CMD_DEL_DAEMON) {
@@ -3101,13 +3191,13 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3101 } 3191 }
3102 3192
3103 if (cmd == IPVS_CMD_NEW_DAEMON) 3193 if (cmd == IPVS_CMD_NEW_DAEMON)
3104 ret = ip_vs_genl_new_daemon(daemon_attrs); 3194 ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3105 else 3195 else
3106 ret = ip_vs_genl_del_daemon(daemon_attrs); 3196 ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3107 goto out; 3197 goto out;
3108 } else if (cmd == IPVS_CMD_ZERO && 3198 } else if (cmd == IPVS_CMD_ZERO &&
3109 !info->attrs[IPVS_CMD_ATTR_SERVICE]) { 3199 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3110 ret = ip_vs_zero_all(); 3200 ret = ip_vs_zero_all(net);
3111 goto out; 3201 goto out;
3112 } 3202 }
3113 3203
@@ -3117,7 +3207,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3117 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) 3207 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3118 need_full_svc = 1; 3208 need_full_svc = 1;
3119 3209
3120 ret = ip_vs_genl_parse_service(&usvc, 3210 ret = ip_vs_genl_parse_service(net, &usvc,
3121 info->attrs[IPVS_CMD_ATTR_SERVICE], 3211 info->attrs[IPVS_CMD_ATTR_SERVICE],
3122 need_full_svc, &svc); 3212 need_full_svc, &svc);
3123 if (ret) 3213 if (ret)
@@ -3147,7 +3237,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3147 switch (cmd) { 3237 switch (cmd) {
3148 case IPVS_CMD_NEW_SERVICE: 3238 case IPVS_CMD_NEW_SERVICE:
3149 if (svc == NULL) 3239 if (svc == NULL)
3150 ret = ip_vs_add_service(&usvc, &svc); 3240 ret = ip_vs_add_service(net, &usvc, &svc);
3151 else 3241 else
3152 ret = -EEXIST; 3242 ret = -EEXIST;
3153 break; 3243 break;
@@ -3185,7 +3275,11 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3185 struct sk_buff *msg; 3275 struct sk_buff *msg;
3186 void *reply; 3276 void *reply;
3187 int ret, cmd, reply_cmd; 3277 int ret, cmd, reply_cmd;
3278 struct net *net;
3279 struct netns_ipvs *ipvs;
3188 3280
3281 net = skb_sknet(skb);
3282 ipvs = net_ipvs(net);
3189 cmd = info->genlhdr->cmd; 3283 cmd = info->genlhdr->cmd;
3190 3284
3191 if (cmd == IPVS_CMD_GET_SERVICE) 3285 if (cmd == IPVS_CMD_GET_SERVICE)
@@ -3214,7 +3308,8 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3214 { 3308 {
3215 struct ip_vs_service *svc; 3309 struct ip_vs_service *svc;
3216 3310
3217 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]); 3311 svc = ip_vs_genl_find_service(net,
3312 info->attrs[IPVS_CMD_ATTR_SERVICE]);
3218 if (IS_ERR(svc)) { 3313 if (IS_ERR(svc)) {
3219 ret = PTR_ERR(svc); 3314 ret = PTR_ERR(svc);
3220 goto out_err; 3315 goto out_err;
@@ -3234,7 +3329,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3234 { 3329 {
3235 struct ip_vs_timeout_user t; 3330 struct ip_vs_timeout_user t;
3236 3331
3237 __ip_vs_get_timeouts(&t); 3332 __ip_vs_get_timeouts(net, &t);
3238#ifdef CONFIG_IP_VS_PROTO_TCP 3333#ifdef CONFIG_IP_VS_PROTO_TCP
3239 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout); 3334 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3240 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, 3335 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
@@ -3380,62 +3475,172 @@ static void ip_vs_genl_unregister(void)
3380 3475
3381/* End of Generic Netlink interface definitions */ 3476/* End of Generic Netlink interface definitions */
3382 3477
3478/*
3479 * per netns intit/exit func.
3480 */
3481int __net_init __ip_vs_control_init(struct net *net)
3482{
3483 int idx;
3484 struct netns_ipvs *ipvs = net_ipvs(net);
3485 struct ctl_table *tbl;
3486
3487 atomic_set(&ipvs->dropentry, 0);
3488 spin_lock_init(&ipvs->dropentry_lock);
3489 spin_lock_init(&ipvs->droppacket_lock);
3490 spin_lock_init(&ipvs->securetcp_lock);
3491 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3492
3493 /* Initialize rs_table */
3494 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3495 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3496
3497 INIT_LIST_HEAD(&ipvs->dest_trash);
3498 atomic_set(&ipvs->ftpsvc_counter, 0);
3499 atomic_set(&ipvs->nullsvc_counter, 0);
3500
3501 /* procfs stats */
3502 ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
3503 if (ipvs->tot_stats == NULL) {
3504 pr_err("%s(): no memory.\n", __func__);
3505 return -ENOMEM;
3506 }
3507 ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3508 if (!ipvs->cpustats) {
3509 pr_err("%s() alloc_percpu failed\n", __func__);
3510 goto err_alloc;
3511 }
3512 spin_lock_init(&ipvs->tot_stats->lock);
3513
3514 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3515 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3516
3517 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3518 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3519 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3520 &ip_vs_stats_percpu_fops);
3521
3522 if (!net_eq(net, &init_net)) {
3523 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3524 if (tbl == NULL)
3525 goto err_dup;
3526 } else
3527 tbl = vs_vars;
3528 /* Initialize sysctl defaults */
3529 idx = 0;
3530 ipvs->sysctl_amemthresh = 1024;
3531 tbl[idx++].data = &ipvs->sysctl_amemthresh;
3532 ipvs->sysctl_am_droprate = 10;
3533 tbl[idx++].data = &ipvs->sysctl_am_droprate;
3534 tbl[idx++].data = &ipvs->sysctl_drop_entry;
3535 tbl[idx++].data = &ipvs->sysctl_drop_packet;
3536#ifdef CONFIG_IP_VS_NFCT
3537 tbl[idx++].data = &ipvs->sysctl_conntrack;
3538#endif
3539 tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3540 ipvs->sysctl_snat_reroute = 1;
3541 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3542 ipvs->sysctl_sync_ver = 1;
3543 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3544 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3545 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3546 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3547 ipvs->sysctl_sync_threshold[0] = 3;
3548 ipvs->sysctl_sync_threshold[1] = 50;
3549 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3550 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3551 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3552
3553
3554 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
3555 vs_vars);
3556 if (ipvs->sysctl_hdr == NULL)
3557 goto err_reg;
3558 ip_vs_new_estimator(net, ipvs->tot_stats);
3559 ipvs->sysctl_tbl = tbl;
3560 /* Schedule defense work */
3561 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3562 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3563 return 0;
3564
3565err_reg:
3566 if (!net_eq(net, &init_net))
3567 kfree(tbl);
3568err_dup:
3569 free_percpu(ipvs->cpustats);
3570err_alloc:
3571 kfree(ipvs->tot_stats);
3572 return -ENOMEM;
3573}
3574
3575static void __net_exit __ip_vs_control_cleanup(struct net *net)
3576{
3577 struct netns_ipvs *ipvs = net_ipvs(net);
3578
3579 ip_vs_trash_cleanup(net);
3580 ip_vs_kill_estimator(net, ipvs->tot_stats);
3581 cancel_delayed_work_sync(&ipvs->defense_work);
3582 cancel_work_sync(&ipvs->defense_work.work);
3583 unregister_net_sysctl_table(ipvs->sysctl_hdr);
3584 proc_net_remove(net, "ip_vs_stats_percpu");
3585 proc_net_remove(net, "ip_vs_stats");
3586 proc_net_remove(net, "ip_vs");
3587 free_percpu(ipvs->cpustats);
3588 kfree(ipvs->tot_stats);
3589}
3590
3591static struct pernet_operations ipvs_control_ops = {
3592 .init = __ip_vs_control_init,
3593 .exit = __ip_vs_control_cleanup,
3594};
3383 3595
3384int __init ip_vs_control_init(void) 3596int __init ip_vs_control_init(void)
3385{ 3597{
3386 int ret;
3387 int idx; 3598 int idx;
3599 int ret;
3388 3600
3389 EnterFunction(2); 3601 EnterFunction(2);
3390 3602
3391 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */ 3603 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3392 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 3604 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3393 INIT_LIST_HEAD(&ip_vs_svc_table[idx]); 3605 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3394 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); 3606 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3395 } 3607 }
3396 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) { 3608
3397 INIT_LIST_HEAD(&ip_vs_rtable[idx]); 3609 ret = register_pernet_subsys(&ipvs_control_ops);
3610 if (ret) {
3611 pr_err("cannot register namespace.\n");
3612 goto err;
3398 } 3613 }
3399 smp_wmb(); 3614
3615 smp_wmb(); /* Do we really need it now ? */
3400 3616
3401 ret = nf_register_sockopt(&ip_vs_sockopts); 3617 ret = nf_register_sockopt(&ip_vs_sockopts);
3402 if (ret) { 3618 if (ret) {
3403 pr_err("cannot register sockopt.\n"); 3619 pr_err("cannot register sockopt.\n");
3404 return ret; 3620 goto err_net;
3405 } 3621 }
3406 3622
3407 ret = ip_vs_genl_register(); 3623 ret = ip_vs_genl_register();
3408 if (ret) { 3624 if (ret) {
3409 pr_err("cannot register Generic Netlink interface.\n"); 3625 pr_err("cannot register Generic Netlink interface.\n");
3410 nf_unregister_sockopt(&ip_vs_sockopts); 3626 nf_unregister_sockopt(&ip_vs_sockopts);
3411 return ret; 3627 goto err_net;
3412 } 3628 }
3413 3629
3414 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3415 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3416
3417 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3418
3419 ip_vs_new_estimator(&ip_vs_stats);
3420
3421 /* Hook the defense timer */
3422 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3423
3424 LeaveFunction(2); 3630 LeaveFunction(2);
3425 return 0; 3631 return 0;
3632
3633err_net:
3634 unregister_pernet_subsys(&ipvs_control_ops);
3635err:
3636 return ret;
3426} 3637}
3427 3638
3428 3639
3429void ip_vs_control_cleanup(void) 3640void ip_vs_control_cleanup(void)
3430{ 3641{
3431 EnterFunction(2); 3642 EnterFunction(2);
3432 ip_vs_trash_cleanup(); 3643 unregister_pernet_subsys(&ipvs_control_ops);
3433 cancel_delayed_work_sync(&defense_work);
3434 cancel_work_sync(&defense_work.work);
3435 ip_vs_kill_estimator(&ip_vs_stats);
3436 unregister_sysctl_table(sysctl_header);
3437 proc_net_remove(&init_net, "ip_vs_stats");
3438 proc_net_remove(&init_net, "ip_vs");
3439 ip_vs_genl_unregister(); 3644 ip_vs_genl_unregister();
3440 nf_unregister_sockopt(&ip_vs_sockopts); 3645 nf_unregister_sockopt(&ip_vs_sockopts);
3441 LeaveFunction(2); 3646 LeaveFunction(2);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index ff28801962e0..f560a05c965a 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -8,8 +8,12 @@
8 * as published by the Free Software Foundation; either version 8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 * 10 *
11 * Changes: 11 * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
12 * 12 * Network name space (netns) aware.
13 * Global data moved to netns i.e struct netns_ipvs
14 * Affected data: est_list and est_lock.
15 * estimation_timer() runs with timer per netns.
16 * get_stats()) do the per cpu summing.
13 */ 17 */
14 18
15#define KMSG_COMPONENT "IPVS" 19#define KMSG_COMPONENT "IPVS"
@@ -48,11 +52,42 @@
48 */ 52 */
49 53
50 54
51static void estimation_timer(unsigned long arg); 55/*
56 * Make a summary from each cpu
57 */
58static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
59 struct ip_vs_cpu_stats *stats)
60{
61 int i;
62
63 for_each_possible_cpu(i) {
64 struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
65 unsigned int start;
66 __u64 inbytes, outbytes;
67 if (i) {
68 sum->conns += s->ustats.conns;
69 sum->inpkts += s->ustats.inpkts;
70 sum->outpkts += s->ustats.outpkts;
71 do {
72 start = u64_stats_fetch_begin_bh(&s->syncp);
73 inbytes = s->ustats.inbytes;
74 outbytes = s->ustats.outbytes;
75 } while (u64_stats_fetch_retry_bh(&s->syncp, start));
76 sum->inbytes += inbytes;
77 sum->outbytes += outbytes;
78 } else {
79 sum->conns = s->ustats.conns;
80 sum->inpkts = s->ustats.inpkts;
81 sum->outpkts = s->ustats.outpkts;
82 do {
83 start = u64_stats_fetch_begin_bh(&s->syncp);
84 sum->inbytes = s->ustats.inbytes;
85 sum->outbytes = s->ustats.outbytes;
86 } while (u64_stats_fetch_retry_bh(&s->syncp, start));
87 }
88 }
89}
52 90
53static LIST_HEAD(est_list);
54static DEFINE_SPINLOCK(est_lock);
55static DEFINE_TIMER(est_timer, estimation_timer, 0, 0);
56 91
57static void estimation_timer(unsigned long arg) 92static void estimation_timer(unsigned long arg)
58{ 93{
@@ -62,11 +97,16 @@ static void estimation_timer(unsigned long arg)
62 u32 n_inpkts, n_outpkts; 97 u32 n_inpkts, n_outpkts;
63 u64 n_inbytes, n_outbytes; 98 u64 n_inbytes, n_outbytes;
64 u32 rate; 99 u32 rate;
100 struct net *net = (struct net *)arg;
101 struct netns_ipvs *ipvs;
65 102
66 spin_lock(&est_lock); 103 ipvs = net_ipvs(net);
67 list_for_each_entry(e, &est_list, list) { 104 ip_vs_read_cpu_stats(&ipvs->tot_stats->ustats, ipvs->cpustats);
105 spin_lock(&ipvs->est_lock);
106 list_for_each_entry(e, &ipvs->est_list, list) {
68 s = container_of(e, struct ip_vs_stats, est); 107 s = container_of(e, struct ip_vs_stats, est);
69 108
109 ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
70 spin_lock(&s->lock); 110 spin_lock(&s->lock);
71 n_conns = s->ustats.conns; 111 n_conns = s->ustats.conns;
72 n_inpkts = s->ustats.inpkts; 112 n_inpkts = s->ustats.inpkts;
@@ -75,38 +115,39 @@ static void estimation_timer(unsigned long arg)
75 n_outbytes = s->ustats.outbytes; 115 n_outbytes = s->ustats.outbytes;
76 116
77 /* scaled by 2^10, but divided 2 seconds */ 117 /* scaled by 2^10, but divided 2 seconds */
78 rate = (n_conns - e->last_conns)<<9; 118 rate = (n_conns - e->last_conns) << 9;
79 e->last_conns = n_conns; 119 e->last_conns = n_conns;
80 e->cps += ((long)rate - (long)e->cps)>>2; 120 e->cps += ((long)rate - (long)e->cps) >> 2;
81 s->ustats.cps = (e->cps+0x1FF)>>10; 121 s->ustats.cps = (e->cps + 0x1FF) >> 10;
82 122
83 rate = (n_inpkts - e->last_inpkts)<<9; 123 rate = (n_inpkts - e->last_inpkts) << 9;
84 e->last_inpkts = n_inpkts; 124 e->last_inpkts = n_inpkts;
85 e->inpps += ((long)rate - (long)e->inpps)>>2; 125 e->inpps += ((long)rate - (long)e->inpps) >> 2;
86 s->ustats.inpps = (e->inpps+0x1FF)>>10; 126 s->ustats.inpps = (e->inpps + 0x1FF) >> 10;
87 127
88 rate = (n_outpkts - e->last_outpkts)<<9; 128 rate = (n_outpkts - e->last_outpkts) << 9;
89 e->last_outpkts = n_outpkts; 129 e->last_outpkts = n_outpkts;
90 e->outpps += ((long)rate - (long)e->outpps)>>2; 130 e->outpps += ((long)rate - (long)e->outpps) >> 2;
91 s->ustats.outpps = (e->outpps+0x1FF)>>10; 131 s->ustats.outpps = (e->outpps + 0x1FF) >> 10;
92 132
93 rate = (n_inbytes - e->last_inbytes)<<4; 133 rate = (n_inbytes - e->last_inbytes) << 4;
94 e->last_inbytes = n_inbytes; 134 e->last_inbytes = n_inbytes;
95 e->inbps += ((long)rate - (long)e->inbps)>>2; 135 e->inbps += ((long)rate - (long)e->inbps) >> 2;
96 s->ustats.inbps = (e->inbps+0xF)>>5; 136 s->ustats.inbps = (e->inbps + 0xF) >> 5;
97 137
98 rate = (n_outbytes - e->last_outbytes)<<4; 138 rate = (n_outbytes - e->last_outbytes) << 4;
99 e->last_outbytes = n_outbytes; 139 e->last_outbytes = n_outbytes;
100 e->outbps += ((long)rate - (long)e->outbps)>>2; 140 e->outbps += ((long)rate - (long)e->outbps) >> 2;
101 s->ustats.outbps = (e->outbps+0xF)>>5; 141 s->ustats.outbps = (e->outbps + 0xF) >> 5;
102 spin_unlock(&s->lock); 142 spin_unlock(&s->lock);
103 } 143 }
104 spin_unlock(&est_lock); 144 spin_unlock(&ipvs->est_lock);
105 mod_timer(&est_timer, jiffies + 2*HZ); 145 mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
106} 146}
107 147
108void ip_vs_new_estimator(struct ip_vs_stats *stats) 148void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats)
109{ 149{
150 struct netns_ipvs *ipvs = net_ipvs(net);
110 struct ip_vs_estimator *est = &stats->est; 151 struct ip_vs_estimator *est = &stats->est;
111 152
112 INIT_LIST_HEAD(&est->list); 153 INIT_LIST_HEAD(&est->list);
@@ -126,18 +167,19 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats)
126 est->last_outbytes = stats->ustats.outbytes; 167 est->last_outbytes = stats->ustats.outbytes;
127 est->outbps = stats->ustats.outbps<<5; 168 est->outbps = stats->ustats.outbps<<5;
128 169
129 spin_lock_bh(&est_lock); 170 spin_lock_bh(&ipvs->est_lock);
130 list_add(&est->list, &est_list); 171 list_add(&est->list, &ipvs->est_list);
131 spin_unlock_bh(&est_lock); 172 spin_unlock_bh(&ipvs->est_lock);
132} 173}
133 174
134void ip_vs_kill_estimator(struct ip_vs_stats *stats) 175void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats)
135{ 176{
177 struct netns_ipvs *ipvs = net_ipvs(net);
136 struct ip_vs_estimator *est = &stats->est; 178 struct ip_vs_estimator *est = &stats->est;
137 179
138 spin_lock_bh(&est_lock); 180 spin_lock_bh(&ipvs->est_lock);
139 list_del(&est->list); 181 list_del(&est->list);
140 spin_unlock_bh(&est_lock); 182 spin_unlock_bh(&ipvs->est_lock);
141} 183}
142 184
143void ip_vs_zero_estimator(struct ip_vs_stats *stats) 185void ip_vs_zero_estimator(struct ip_vs_stats *stats)
@@ -157,13 +199,35 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats)
157 est->outbps = 0; 199 est->outbps = 0;
158} 200}
159 201
160int __init ip_vs_estimator_init(void) 202static int __net_init __ip_vs_estimator_init(struct net *net)
161{ 203{
162 mod_timer(&est_timer, jiffies + 2 * HZ); 204 struct netns_ipvs *ipvs = net_ipvs(net);
205
206 INIT_LIST_HEAD(&ipvs->est_list);
207 spin_lock_init(&ipvs->est_lock);
208 setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net);
209 mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
163 return 0; 210 return 0;
164} 211}
165 212
213static void __net_exit __ip_vs_estimator_exit(struct net *net)
214{
215 del_timer_sync(&net_ipvs(net)->est_timer);
216}
217static struct pernet_operations ip_vs_app_ops = {
218 .init = __ip_vs_estimator_init,
219 .exit = __ip_vs_estimator_exit,
220};
221
222int __init ip_vs_estimator_init(void)
223{
224 int rv;
225
226 rv = register_pernet_subsys(&ip_vs_app_ops);
227 return rv;
228}
229
166void ip_vs_estimator_cleanup(void) 230void ip_vs_estimator_cleanup(void)
167{ 231{
168 del_timer_sync(&est_timer); 232 unregister_pernet_subsys(&ip_vs_app_ops);
169} 233}
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 75455000ad1c..6b5dd6ddaae9 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -157,6 +157,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
157 int ret = 0; 157 int ret = 0;
158 enum ip_conntrack_info ctinfo; 158 enum ip_conntrack_info ctinfo;
159 struct nf_conn *ct; 159 struct nf_conn *ct;
160 struct net *net;
160 161
161#ifdef CONFIG_IP_VS_IPV6 162#ifdef CONFIG_IP_VS_IPV6
162 /* This application helper doesn't work with IPv6 yet, 163 /* This application helper doesn't work with IPv6 yet,
@@ -197,18 +198,20 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
197 */ 198 */
198 { 199 {
199 struct ip_vs_conn_param p; 200 struct ip_vs_conn_param p;
200 ip_vs_conn_fill_param(AF_INET, iph->protocol, 201 ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
201 &from, port, &cp->caddr, 0, &p); 202 iph->protocol, &from, port,
203 &cp->caddr, 0, &p);
202 n_cp = ip_vs_conn_out_get(&p); 204 n_cp = ip_vs_conn_out_get(&p);
203 } 205 }
204 if (!n_cp) { 206 if (!n_cp) {
205 struct ip_vs_conn_param p; 207 struct ip_vs_conn_param p;
206 ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr, 208 ip_vs_conn_fill_param(ip_vs_conn_net(cp),
209 AF_INET, IPPROTO_TCP, &cp->caddr,
207 0, &cp->vaddr, port, &p); 210 0, &cp->vaddr, port, &p);
208 n_cp = ip_vs_conn_new(&p, &from, port, 211 n_cp = ip_vs_conn_new(&p, &from, port,
209 IP_VS_CONN_F_NO_CPORT | 212 IP_VS_CONN_F_NO_CPORT |
210 IP_VS_CONN_F_NFCT, 213 IP_VS_CONN_F_NFCT,
211 cp->dest); 214 cp->dest, skb->mark);
212 if (!n_cp) 215 if (!n_cp)
213 return 0; 216 return 0;
214 217
@@ -257,8 +260,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
257 * would be adjusted twice. 260 * would be adjusted twice.
258 */ 261 */
259 262
263 net = skb_net(skb);
260 cp->app_data = NULL; 264 cp->app_data = NULL;
261 ip_vs_tcp_conn_listen(n_cp); 265 ip_vs_tcp_conn_listen(net, n_cp);
262 ip_vs_conn_put(n_cp); 266 ip_vs_conn_put(n_cp);
263 return ret; 267 return ret;
264 } 268 }
@@ -287,6 +291,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
287 union nf_inet_addr to; 291 union nf_inet_addr to;
288 __be16 port; 292 __be16 port;
289 struct ip_vs_conn *n_cp; 293 struct ip_vs_conn *n_cp;
294 struct net *net;
290 295
291#ifdef CONFIG_IP_VS_IPV6 296#ifdef CONFIG_IP_VS_IPV6
292 /* This application helper doesn't work with IPv6 yet, 297 /* This application helper doesn't work with IPv6 yet,
@@ -358,14 +363,15 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
358 363
359 { 364 {
360 struct ip_vs_conn_param p; 365 struct ip_vs_conn_param p;
361 ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port, 366 ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
362 &cp->vaddr, htons(ntohs(cp->vport)-1), 367 iph->protocol, &to, port, &cp->vaddr,
363 &p); 368 htons(ntohs(cp->vport)-1), &p);
364 n_cp = ip_vs_conn_in_get(&p); 369 n_cp = ip_vs_conn_in_get(&p);
365 if (!n_cp) { 370 if (!n_cp) {
366 n_cp = ip_vs_conn_new(&p, &cp->daddr, 371 n_cp = ip_vs_conn_new(&p, &cp->daddr,
367 htons(ntohs(cp->dport)-1), 372 htons(ntohs(cp->dport)-1),
368 IP_VS_CONN_F_NFCT, cp->dest); 373 IP_VS_CONN_F_NFCT, cp->dest,
374 skb->mark);
369 if (!n_cp) 375 if (!n_cp)
370 return 0; 376 return 0;
371 377
@@ -377,7 +383,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
377 /* 383 /*
378 * Move tunnel to listen state 384 * Move tunnel to listen state
379 */ 385 */
380 ip_vs_tcp_conn_listen(n_cp); 386 net = skb_net(skb);
387 ip_vs_tcp_conn_listen(net, n_cp);
381 ip_vs_conn_put(n_cp); 388 ip_vs_conn_put(n_cp);
382 389
383 return 1; 390 return 1;
@@ -398,23 +405,22 @@ static struct ip_vs_app ip_vs_ftp = {
398 .pkt_in = ip_vs_ftp_in, 405 .pkt_in = ip_vs_ftp_in,
399}; 406};
400 407
401
402/* 408/*
403 * ip_vs_ftp initialization 409 * per netns ip_vs_ftp initialization
404 */ 410 */
405static int __init ip_vs_ftp_init(void) 411static int __net_init __ip_vs_ftp_init(struct net *net)
406{ 412{
407 int i, ret; 413 int i, ret;
408 struct ip_vs_app *app = &ip_vs_ftp; 414 struct ip_vs_app *app = &ip_vs_ftp;
409 415
410 ret = register_ip_vs_app(app); 416 ret = register_ip_vs_app(net, app);
411 if (ret) 417 if (ret)
412 return ret; 418 return ret;
413 419
414 for (i=0; i<IP_VS_APP_MAX_PORTS; i++) { 420 for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
415 if (!ports[i]) 421 if (!ports[i])
416 continue; 422 continue;
417 ret = register_ip_vs_app_inc(app, app->protocol, ports[i]); 423 ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]);
418 if (ret) 424 if (ret)
419 break; 425 break;
420 pr_info("%s: loaded support on port[%d] = %d\n", 426 pr_info("%s: loaded support on port[%d] = %d\n",
@@ -422,18 +428,39 @@ static int __init ip_vs_ftp_init(void)
422 } 428 }
423 429
424 if (ret) 430 if (ret)
425 unregister_ip_vs_app(app); 431 unregister_ip_vs_app(net, app);
426 432
427 return ret; 433 return ret;
428} 434}
435/*
436 * netns exit
437 */
438static void __ip_vs_ftp_exit(struct net *net)
439{
440 struct ip_vs_app *app = &ip_vs_ftp;
441
442 unregister_ip_vs_app(net, app);
443}
444
445static struct pernet_operations ip_vs_ftp_ops = {
446 .init = __ip_vs_ftp_init,
447 .exit = __ip_vs_ftp_exit,
448};
429 449
450int __init ip_vs_ftp_init(void)
451{
452 int rv;
453
454 rv = register_pernet_subsys(&ip_vs_ftp_ops);
455 return rv;
456}
430 457
431/* 458/*
432 * ip_vs_ftp finish. 459 * ip_vs_ftp finish.
433 */ 460 */
434static void __exit ip_vs_ftp_exit(void) 461static void __exit ip_vs_ftp_exit(void)
435{ 462{
436 unregister_ip_vs_app(&ip_vs_ftp); 463 unregister_pernet_subsys(&ip_vs_ftp_ops);
437} 464}
438 465
439 466
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 9323f8944199..d5bec3371871 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -70,7 +70,6 @@
70 * entries that haven't been touched for a day. 70 * entries that haven't been touched for a day.
71 */ 71 */
72#define COUNT_FOR_FULL_EXPIRATION 30 72#define COUNT_FOR_FULL_EXPIRATION 30
73static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ;
74 73
75 74
76/* 75/*
@@ -117,7 +116,7 @@ struct ip_vs_lblc_table {
117static ctl_table vs_vars_table[] = { 116static ctl_table vs_vars_table[] = {
118 { 117 {
119 .procname = "lblc_expiration", 118 .procname = "lblc_expiration",
120 .data = &sysctl_ip_vs_lblc_expiration, 119 .data = NULL,
121 .maxlen = sizeof(int), 120 .maxlen = sizeof(int),
122 .mode = 0644, 121 .mode = 0644,
123 .proc_handler = proc_dointvec_jiffies, 122 .proc_handler = proc_dointvec_jiffies,
@@ -125,8 +124,6 @@ static ctl_table vs_vars_table[] = {
125 { } 124 { }
126}; 125};
127 126
128static struct ctl_table_header * sysctl_header;
129
130static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) 127static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
131{ 128{
132 list_del(&en->list); 129 list_del(&en->list);
@@ -248,6 +245,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
248 struct ip_vs_lblc_entry *en, *nxt; 245 struct ip_vs_lblc_entry *en, *nxt;
249 unsigned long now = jiffies; 246 unsigned long now = jiffies;
250 int i, j; 247 int i, j;
248 struct netns_ipvs *ipvs = net_ipvs(svc->net);
251 249
252 for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { 250 for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
253 j = (j + 1) & IP_VS_LBLC_TAB_MASK; 251 j = (j + 1) & IP_VS_LBLC_TAB_MASK;
@@ -255,7 +253,8 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
255 write_lock(&svc->sched_lock); 253 write_lock(&svc->sched_lock);
256 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { 254 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
257 if (time_before(now, 255 if (time_before(now,
258 en->lastuse + sysctl_ip_vs_lblc_expiration)) 256 en->lastuse +
257 ipvs->sysctl_lblc_expiration))
259 continue; 258 continue;
260 259
261 ip_vs_lblc_free(en); 260 ip_vs_lblc_free(en);
@@ -543,23 +542,73 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
543 .schedule = ip_vs_lblc_schedule, 542 .schedule = ip_vs_lblc_schedule,
544}; 543};
545 544
545/*
546 * per netns init.
547 */
548static int __net_init __ip_vs_lblc_init(struct net *net)
549{
550 struct netns_ipvs *ipvs = net_ipvs(net);
551
552 if (!net_eq(net, &init_net)) {
553 ipvs->lblc_ctl_table = kmemdup(vs_vars_table,
554 sizeof(vs_vars_table),
555 GFP_KERNEL);
556 if (ipvs->lblc_ctl_table == NULL)
557 goto err_dup;
558 } else
559 ipvs->lblc_ctl_table = vs_vars_table;
560 ipvs->sysctl_lblc_expiration = 24*60*60*HZ;
561 ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration;
562
563 ipvs->lblc_ctl_header =
564 register_net_sysctl_table(net, net_vs_ctl_path,
565 ipvs->lblc_ctl_table);
566 if (!ipvs->lblc_ctl_header)
567 goto err_reg;
568
569 return 0;
570
571err_reg:
572 if (!net_eq(net, &init_net))
573 kfree(ipvs->lblc_ctl_table);
574
575err_dup:
576 return -ENOMEM;
577}
578
579static void __net_exit __ip_vs_lblc_exit(struct net *net)
580{
581 struct netns_ipvs *ipvs = net_ipvs(net);
582
583 unregister_net_sysctl_table(ipvs->lblc_ctl_header);
584
585 if (!net_eq(net, &init_net))
586 kfree(ipvs->lblc_ctl_table);
587}
588
589static struct pernet_operations ip_vs_lblc_ops = {
590 .init = __ip_vs_lblc_init,
591 .exit = __ip_vs_lblc_exit,
592};
546 593
547static int __init ip_vs_lblc_init(void) 594static int __init ip_vs_lblc_init(void)
548{ 595{
549 int ret; 596 int ret;
550 597
551 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); 598 ret = register_pernet_subsys(&ip_vs_lblc_ops);
599 if (ret)
600 return ret;
601
552 ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler); 602 ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
553 if (ret) 603 if (ret)
554 unregister_sysctl_table(sysctl_header); 604 unregister_pernet_subsys(&ip_vs_lblc_ops);
555 return ret; 605 return ret;
556} 606}
557 607
558
559static void __exit ip_vs_lblc_cleanup(void) 608static void __exit ip_vs_lblc_cleanup(void)
560{ 609{
561 unregister_sysctl_table(sysctl_header);
562 unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); 610 unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
611 unregister_pernet_subsys(&ip_vs_lblc_ops);
563} 612}
564 613
565 614
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index dbeed8ea421a..61ae8cfcf0b4 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -70,8 +70,6 @@
70 * entries that haven't been touched for a day. 70 * entries that haven't been touched for a day.
71 */ 71 */
72#define COUNT_FOR_FULL_EXPIRATION 30 72#define COUNT_FOR_FULL_EXPIRATION 30
73static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
74
75 73
76/* 74/*
77 * for IPVS lblcr entry hash table 75 * for IPVS lblcr entry hash table
@@ -296,7 +294,7 @@ struct ip_vs_lblcr_table {
296static ctl_table vs_vars_table[] = { 294static ctl_table vs_vars_table[] = {
297 { 295 {
298 .procname = "lblcr_expiration", 296 .procname = "lblcr_expiration",
299 .data = &sysctl_ip_vs_lblcr_expiration, 297 .data = NULL,
300 .maxlen = sizeof(int), 298 .maxlen = sizeof(int),
301 .mode = 0644, 299 .mode = 0644,
302 .proc_handler = proc_dointvec_jiffies, 300 .proc_handler = proc_dointvec_jiffies,
@@ -304,8 +302,6 @@ static ctl_table vs_vars_table[] = {
304 { } 302 { }
305}; 303};
306 304
307static struct ctl_table_header * sysctl_header;
308
309static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) 305static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
310{ 306{
311 list_del(&en->list); 307 list_del(&en->list);
@@ -425,14 +421,15 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
425 unsigned long now = jiffies; 421 unsigned long now = jiffies;
426 int i, j; 422 int i, j;
427 struct ip_vs_lblcr_entry *en, *nxt; 423 struct ip_vs_lblcr_entry *en, *nxt;
424 struct netns_ipvs *ipvs = net_ipvs(svc->net);
428 425
429 for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { 426 for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
430 j = (j + 1) & IP_VS_LBLCR_TAB_MASK; 427 j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
431 428
432 write_lock(&svc->sched_lock); 429 write_lock(&svc->sched_lock);
433 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { 430 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
434 if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration, 431 if (time_after(en->lastuse
435 now)) 432 + ipvs->sysctl_lblcr_expiration, now))
436 continue; 433 continue;
437 434
438 ip_vs_lblcr_free(en); 435 ip_vs_lblcr_free(en);
@@ -664,6 +661,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
664 read_lock(&svc->sched_lock); 661 read_lock(&svc->sched_lock);
665 en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr); 662 en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
666 if (en) { 663 if (en) {
664 struct netns_ipvs *ipvs = net_ipvs(svc->net);
667 /* We only hold a read lock, but this is atomic */ 665 /* We only hold a read lock, but this is atomic */
668 en->lastuse = jiffies; 666 en->lastuse = jiffies;
669 667
@@ -675,7 +673,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
675 /* More than one destination + enough time passed by, cleanup */ 673 /* More than one destination + enough time passed by, cleanup */
676 if (atomic_read(&en->set.size) > 1 && 674 if (atomic_read(&en->set.size) > 1 &&
677 time_after(jiffies, en->set.lastmod + 675 time_after(jiffies, en->set.lastmod +
678 sysctl_ip_vs_lblcr_expiration)) { 676 ipvs->sysctl_lblcr_expiration)) {
679 struct ip_vs_dest *m; 677 struct ip_vs_dest *m;
680 678
681 write_lock(&en->set.lock); 679 write_lock(&en->set.lock);
@@ -744,23 +742,73 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
744 .schedule = ip_vs_lblcr_schedule, 742 .schedule = ip_vs_lblcr_schedule,
745}; 743};
746 744
745/*
746 * per netns init.
747 */
748static int __net_init __ip_vs_lblcr_init(struct net *net)
749{
750 struct netns_ipvs *ipvs = net_ipvs(net);
751
752 if (!net_eq(net, &init_net)) {
753 ipvs->lblcr_ctl_table = kmemdup(vs_vars_table,
754 sizeof(vs_vars_table),
755 GFP_KERNEL);
756 if (ipvs->lblcr_ctl_table == NULL)
757 goto err_dup;
758 } else
759 ipvs->lblcr_ctl_table = vs_vars_table;
760 ipvs->sysctl_lblcr_expiration = 24*60*60*HZ;
761 ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration;
762
763 ipvs->lblcr_ctl_header =
764 register_net_sysctl_table(net, net_vs_ctl_path,
765 ipvs->lblcr_ctl_table);
766 if (!ipvs->lblcr_ctl_header)
767 goto err_reg;
768
769 return 0;
770
771err_reg:
772 if (!net_eq(net, &init_net))
773 kfree(ipvs->lblcr_ctl_table);
774
775err_dup:
776 return -ENOMEM;
777}
778
779static void __net_exit __ip_vs_lblcr_exit(struct net *net)
780{
781 struct netns_ipvs *ipvs = net_ipvs(net);
782
783 unregister_net_sysctl_table(ipvs->lblcr_ctl_header);
784
785 if (!net_eq(net, &init_net))
786 kfree(ipvs->lblcr_ctl_table);
787}
788
789static struct pernet_operations ip_vs_lblcr_ops = {
790 .init = __ip_vs_lblcr_init,
791 .exit = __ip_vs_lblcr_exit,
792};
747 793
748static int __init ip_vs_lblcr_init(void) 794static int __init ip_vs_lblcr_init(void)
749{ 795{
750 int ret; 796 int ret;
751 797
752 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); 798 ret = register_pernet_subsys(&ip_vs_lblcr_ops);
799 if (ret)
800 return ret;
801
753 ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); 802 ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
754 if (ret) 803 if (ret)
755 unregister_sysctl_table(sysctl_header); 804 unregister_pernet_subsys(&ip_vs_lblcr_ops);
756 return ret; 805 return ret;
757} 806}
758 807
759
760static void __exit ip_vs_lblcr_cleanup(void) 808static void __exit ip_vs_lblcr_cleanup(void)
761{ 809{
762 unregister_sysctl_table(sysctl_header);
763 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); 810 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
811 unregister_pernet_subsys(&ip_vs_lblcr_ops);
764} 812}
765 813
766 814
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 4680647cd450..f454c80df0a7 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -141,6 +141,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
141 struct nf_conntrack_tuple *orig, new_reply; 141 struct nf_conntrack_tuple *orig, new_reply;
142 struct ip_vs_conn *cp; 142 struct ip_vs_conn *cp;
143 struct ip_vs_conn_param p; 143 struct ip_vs_conn_param p;
144 struct net *net = nf_ct_net(ct);
144 145
145 if (exp->tuple.src.l3num != PF_INET) 146 if (exp->tuple.src.l3num != PF_INET)
146 return; 147 return;
@@ -155,7 +156,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
155 156
156 /* RS->CLIENT */ 157 /* RS->CLIENT */
157 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 158 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
158 ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum, 159 ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum,
159 &orig->src.u3, orig->src.u.tcp.port, 160 &orig->src.u3, orig->src.u.tcp.port,
160 &orig->dst.u3, orig->dst.u.tcp.port, &p); 161 &orig->dst.u3, orig->dst.u.tcp.port, &p);
161 cp = ip_vs_conn_out_get(&p); 162 cp = ip_vs_conn_out_get(&p);
@@ -268,7 +269,8 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
268 " for conn " FMT_CONN "\n", 269 " for conn " FMT_CONN "\n",
269 __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); 270 __func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
270 271
271 h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple); 272 h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE,
273 &tuple);
272 if (h) { 274 if (h) {
273 ct = nf_ct_tuplehash_to_ctrack(h); 275 ct = nf_ct_tuplehash_to_ctrack(h);
274 /* Show what happens instead of calling nf_ct_kill() */ 276 /* Show what happens instead of calling nf_ct_kill() */
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c
index 3414af70ee12..5cf859ccb31b 100644
--- a/net/netfilter/ipvs/ip_vs_pe.c
+++ b/net/netfilter/ipvs/ip_vs_pe.c
@@ -29,12 +29,11 @@ void ip_vs_unbind_pe(struct ip_vs_service *svc)
29} 29}
30 30
31/* Get pe in the pe list by name */ 31/* Get pe in the pe list by name */
32static struct ip_vs_pe * 32struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
33ip_vs_pe_getbyname(const char *pe_name)
34{ 33{
35 struct ip_vs_pe *pe; 34 struct ip_vs_pe *pe;
36 35
37 IP_VS_DBG(2, "%s(): pe_name \"%s\"\n", __func__, 36 IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__,
38 pe_name); 37 pe_name);
39 38
40 spin_lock_bh(&ip_vs_pe_lock); 39 spin_lock_bh(&ip_vs_pe_lock);
@@ -60,28 +59,22 @@ ip_vs_pe_getbyname(const char *pe_name)
60} 59}
61 60
62/* Lookup pe and try to load it if it doesn't exist */ 61/* Lookup pe and try to load it if it doesn't exist */
63struct ip_vs_pe *ip_vs_pe_get(const char *name) 62struct ip_vs_pe *ip_vs_pe_getbyname(const char *name)
64{ 63{
65 struct ip_vs_pe *pe; 64 struct ip_vs_pe *pe;
66 65
67 /* Search for the pe by name */ 66 /* Search for the pe by name */
68 pe = ip_vs_pe_getbyname(name); 67 pe = __ip_vs_pe_getbyname(name);
69 68
70 /* If pe not found, load the module and search again */ 69 /* If pe not found, load the module and search again */
71 if (!pe) { 70 if (!pe) {
72 request_module("ip_vs_pe_%s", name); 71 request_module("ip_vs_pe_%s", name);
73 pe = ip_vs_pe_getbyname(name); 72 pe = __ip_vs_pe_getbyname(name);
74 } 73 }
75 74
76 return pe; 75 return pe;
77} 76}
78 77
79void ip_vs_pe_put(struct ip_vs_pe *pe)
80{
81 if (pe && pe->module)
82 module_put(pe->module);
83}
84
85/* Register a pe in the pe list */ 78/* Register a pe in the pe list */
86int register_ip_vs_pe(struct ip_vs_pe *pe) 79int register_ip_vs_pe(struct ip_vs_pe *pe)
87{ 80{
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index b8b4e9620f3e..0d83bc01fed4 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -71,6 +71,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
71 struct ip_vs_iphdr iph; 71 struct ip_vs_iphdr iph;
72 unsigned int dataoff, datalen, matchoff, matchlen; 72 unsigned int dataoff, datalen, matchoff, matchlen;
73 const char *dptr; 73 const char *dptr;
74 int retc;
74 75
75 ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph); 76 ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph);
76 77
@@ -83,6 +84,8 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
83 if (dataoff >= skb->len) 84 if (dataoff >= skb->len)
84 return -EINVAL; 85 return -EINVAL;
85 86
87 if ((retc=skb_linearize(skb)) < 0)
88 return retc;
86 dptr = skb->data + dataoff; 89 dptr = skb->data + dataoff;
87 datalen = skb->len - dataoff; 90 datalen = skb->len - dataoff;
88 91
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index c53998390877..6ac986cdcff3 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -60,6 +60,31 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
60 return 0; 60 return 0;
61} 61}
62 62
63/*
64 * register an ipvs protocols netns related data
65 */
66static int
67register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
68{
69 struct netns_ipvs *ipvs = net_ipvs(net);
70 unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
71 struct ip_vs_proto_data *pd =
72 kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC);
73
74 if (!pd) {
75 pr_err("%s(): no memory.\n", __func__);
76 return -ENOMEM;
77 }
78 pd->pp = pp; /* For speed issues */
79 pd->next = ipvs->proto_data_table[hash];
80 ipvs->proto_data_table[hash] = pd;
81 atomic_set(&pd->appcnt, 0); /* Init app counter */
82
83 if (pp->init_netns != NULL)
84 pp->init_netns(net, pd);
85
86 return 0;
87}
63 88
64/* 89/*
65 * unregister an ipvs protocol 90 * unregister an ipvs protocol
@@ -82,6 +107,29 @@ static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp)
82 return -ESRCH; 107 return -ESRCH;
83} 108}
84 109
110/*
111 * unregister an ipvs protocols netns data
112 */
113static int
114unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd)
115{
116 struct netns_ipvs *ipvs = net_ipvs(net);
117 struct ip_vs_proto_data **pd_p;
118 unsigned hash = IP_VS_PROTO_HASH(pd->pp->protocol);
119
120 pd_p = &ipvs->proto_data_table[hash];
121 for (; *pd_p; pd_p = &(*pd_p)->next) {
122 if (*pd_p == pd) {
123 *pd_p = pd->next;
124 if (pd->pp->exit_netns != NULL)
125 pd->pp->exit_netns(net, pd);
126 kfree(pd);
127 return 0;
128 }
129 }
130
131 return -ESRCH;
132}
85 133
86/* 134/*
87 * get ip_vs_protocol object by its proto. 135 * get ip_vs_protocol object by its proto.
@@ -100,19 +148,44 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)
100} 148}
101EXPORT_SYMBOL(ip_vs_proto_get); 149EXPORT_SYMBOL(ip_vs_proto_get);
102 150
151/*
152 * get ip_vs_protocol object data by netns and proto
153 */
154struct ip_vs_proto_data *
155__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
156{
157 struct ip_vs_proto_data *pd;
158 unsigned hash = IP_VS_PROTO_HASH(proto);
159
160 for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) {
161 if (pd->pp->protocol == proto)
162 return pd;
163 }
164
165 return NULL;
166}
167
168struct ip_vs_proto_data *
169ip_vs_proto_data_get(struct net *net, unsigned short proto)
170{
171 struct netns_ipvs *ipvs = net_ipvs(net);
172
173 return __ipvs_proto_data_get(ipvs, proto);
174}
175EXPORT_SYMBOL(ip_vs_proto_data_get);
103 176
104/* 177/*
105 * Propagate event for state change to all protocols 178 * Propagate event for state change to all protocols
106 */ 179 */
107void ip_vs_protocol_timeout_change(int flags) 180void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags)
108{ 181{
109 struct ip_vs_protocol *pp; 182 struct ip_vs_proto_data *pd;
110 int i; 183 int i;
111 184
112 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 185 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
113 for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) { 186 for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) {
114 if (pp->timeout_change) 187 if (pd->pp->timeout_change)
115 pp->timeout_change(pp, flags); 188 pd->pp->timeout_change(pd, flags);
116 } 189 }
117 } 190 }
118} 191}
@@ -236,6 +309,46 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
236 ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg); 309 ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg);
237} 310}
238 311
312/*
313 * per network name-space init
314 */
315static int __net_init __ip_vs_protocol_init(struct net *net)
316{
317#ifdef CONFIG_IP_VS_PROTO_TCP
318 register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
319#endif
320#ifdef CONFIG_IP_VS_PROTO_UDP
321 register_ip_vs_proto_netns(net, &ip_vs_protocol_udp);
322#endif
323#ifdef CONFIG_IP_VS_PROTO_SCTP
324 register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp);
325#endif
326#ifdef CONFIG_IP_VS_PROTO_AH
327 register_ip_vs_proto_netns(net, &ip_vs_protocol_ah);
328#endif
329#ifdef CONFIG_IP_VS_PROTO_ESP
330 register_ip_vs_proto_netns(net, &ip_vs_protocol_esp);
331#endif
332 return 0;
333}
334
335static void __net_exit __ip_vs_protocol_cleanup(struct net *net)
336{
337 struct netns_ipvs *ipvs = net_ipvs(net);
338 struct ip_vs_proto_data *pd;
339 int i;
340
341 /* unregister all the ipvs proto data for this netns */
342 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
343 while ((pd = ipvs->proto_data_table[i]) != NULL)
344 unregister_ip_vs_proto_netns(net, pd);
345 }
346}
347
348static struct pernet_operations ipvs_proto_ops = {
349 .init = __ip_vs_protocol_init,
350 .exit = __ip_vs_protocol_cleanup,
351};
239 352
240int __init ip_vs_protocol_init(void) 353int __init ip_vs_protocol_init(void)
241{ 354{
@@ -265,6 +378,7 @@ int __init ip_vs_protocol_init(void)
265 REGISTER_PROTOCOL(&ip_vs_protocol_esp); 378 REGISTER_PROTOCOL(&ip_vs_protocol_esp);
266#endif 379#endif
267 pr_info("Registered protocols (%s)\n", &protocols[2]); 380 pr_info("Registered protocols (%s)\n", &protocols[2]);
381 return register_pernet_subsys(&ipvs_proto_ops);
268 382
269 return 0; 383 return 0;
270} 384}
@@ -275,6 +389,7 @@ void ip_vs_protocol_cleanup(void)
275 struct ip_vs_protocol *pp; 389 struct ip_vs_protocol *pp;
276 int i; 390 int i;
277 391
392 unregister_pernet_subsys(&ipvs_proto_ops);
278 /* unregister all the ipvs protocols */ 393 /* unregister all the ipvs protocols */
279 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 394 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
280 while ((pp = ip_vs_proto_table[i]) != NULL) 395 while ((pp = ip_vs_proto_table[i]) != NULL)
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 3a0461117d3f..5b8eb8b12c3e 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -41,28 +41,30 @@ struct isakmp_hdr {
41#define PORT_ISAKMP 500 41#define PORT_ISAKMP 500
42 42
43static void 43static void
44ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph, 44ah_esp_conn_fill_param_proto(struct net *net, int af,
45 int inverse, struct ip_vs_conn_param *p) 45 const struct ip_vs_iphdr *iph, int inverse,
46 struct ip_vs_conn_param *p)
46{ 47{
47 if (likely(!inverse)) 48 if (likely(!inverse))
48 ip_vs_conn_fill_param(af, IPPROTO_UDP, 49 ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
49 &iph->saddr, htons(PORT_ISAKMP), 50 &iph->saddr, htons(PORT_ISAKMP),
50 &iph->daddr, htons(PORT_ISAKMP), p); 51 &iph->daddr, htons(PORT_ISAKMP), p);
51 else 52 else
52 ip_vs_conn_fill_param(af, IPPROTO_UDP, 53 ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
53 &iph->daddr, htons(PORT_ISAKMP), 54 &iph->daddr, htons(PORT_ISAKMP),
54 &iph->saddr, htons(PORT_ISAKMP), p); 55 &iph->saddr, htons(PORT_ISAKMP), p);
55} 56}
56 57
57static struct ip_vs_conn * 58static struct ip_vs_conn *
58ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, 59ah_esp_conn_in_get(int af, const struct sk_buff *skb,
59 const struct ip_vs_iphdr *iph, unsigned int proto_off, 60 const struct ip_vs_iphdr *iph, unsigned int proto_off,
60 int inverse) 61 int inverse)
61{ 62{
62 struct ip_vs_conn *cp; 63 struct ip_vs_conn *cp;
63 struct ip_vs_conn_param p; 64 struct ip_vs_conn_param p;
65 struct net *net = skb_net(skb);
64 66
65 ah_esp_conn_fill_param_proto(af, iph, inverse, &p); 67 ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
66 cp = ip_vs_conn_in_get(&p); 68 cp = ip_vs_conn_in_get(&p);
67 if (!cp) { 69 if (!cp) {
68 /* 70 /*
@@ -72,7 +74,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
72 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet " 74 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
73 "%s%s %s->%s\n", 75 "%s%s %s->%s\n",
74 inverse ? "ICMP+" : "", 76 inverse ? "ICMP+" : "",
75 pp->name, 77 ip_vs_proto_get(iph->protocol)->name,
76 IP_VS_DBG_ADDR(af, &iph->saddr), 78 IP_VS_DBG_ADDR(af, &iph->saddr),
77 IP_VS_DBG_ADDR(af, &iph->daddr)); 79 IP_VS_DBG_ADDR(af, &iph->daddr));
78 } 80 }
@@ -83,21 +85,21 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
83 85
84static struct ip_vs_conn * 86static struct ip_vs_conn *
85ah_esp_conn_out_get(int af, const struct sk_buff *skb, 87ah_esp_conn_out_get(int af, const struct sk_buff *skb,
86 struct ip_vs_protocol *pp,
87 const struct ip_vs_iphdr *iph, 88 const struct ip_vs_iphdr *iph,
88 unsigned int proto_off, 89 unsigned int proto_off,
89 int inverse) 90 int inverse)
90{ 91{
91 struct ip_vs_conn *cp; 92 struct ip_vs_conn *cp;
92 struct ip_vs_conn_param p; 93 struct ip_vs_conn_param p;
94 struct net *net = skb_net(skb);
93 95
94 ah_esp_conn_fill_param_proto(af, iph, inverse, &p); 96 ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
95 cp = ip_vs_conn_out_get(&p); 97 cp = ip_vs_conn_out_get(&p);
96 if (!cp) { 98 if (!cp) {
97 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " 99 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
98 "%s%s %s->%s\n", 100 "%s%s %s->%s\n",
99 inverse ? "ICMP+" : "", 101 inverse ? "ICMP+" : "",
100 pp->name, 102 ip_vs_proto_get(iph->protocol)->name,
101 IP_VS_DBG_ADDR(af, &iph->saddr), 103 IP_VS_DBG_ADDR(af, &iph->saddr),
102 IP_VS_DBG_ADDR(af, &iph->daddr)); 104 IP_VS_DBG_ADDR(af, &iph->daddr));
103 } 105 }
@@ -107,7 +109,7 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
107 109
108 110
109static int 111static int
110ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 112ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
111 int *verdict, struct ip_vs_conn **cpp) 113 int *verdict, struct ip_vs_conn **cpp)
112{ 114{
113 /* 115 /*
@@ -117,26 +119,14 @@ ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
117 return 0; 119 return 0;
118} 120}
119 121
120static void ah_esp_init(struct ip_vs_protocol *pp)
121{
122 /* nothing to do now */
123}
124
125
126static void ah_esp_exit(struct ip_vs_protocol *pp)
127{
128 /* nothing to do now */
129}
130
131
132#ifdef CONFIG_IP_VS_PROTO_AH 122#ifdef CONFIG_IP_VS_PROTO_AH
133struct ip_vs_protocol ip_vs_protocol_ah = { 123struct ip_vs_protocol ip_vs_protocol_ah = {
134 .name = "AH", 124 .name = "AH",
135 .protocol = IPPROTO_AH, 125 .protocol = IPPROTO_AH,
136 .num_states = 1, 126 .num_states = 1,
137 .dont_defrag = 1, 127 .dont_defrag = 1,
138 .init = ah_esp_init, 128 .init = NULL,
139 .exit = ah_esp_exit, 129 .exit = NULL,
140 .conn_schedule = ah_esp_conn_schedule, 130 .conn_schedule = ah_esp_conn_schedule,
141 .conn_in_get = ah_esp_conn_in_get, 131 .conn_in_get = ah_esp_conn_in_get,
142 .conn_out_get = ah_esp_conn_out_get, 132 .conn_out_get = ah_esp_conn_out_get,
@@ -149,7 +139,6 @@ struct ip_vs_protocol ip_vs_protocol_ah = {
149 .app_conn_bind = NULL, 139 .app_conn_bind = NULL,
150 .debug_packet = ip_vs_tcpudp_debug_packet, 140 .debug_packet = ip_vs_tcpudp_debug_packet,
151 .timeout_change = NULL, /* ISAKMP */ 141 .timeout_change = NULL, /* ISAKMP */
152 .set_state_timeout = NULL,
153}; 142};
154#endif 143#endif
155 144
@@ -159,8 +148,8 @@ struct ip_vs_protocol ip_vs_protocol_esp = {
159 .protocol = IPPROTO_ESP, 148 .protocol = IPPROTO_ESP,
160 .num_states = 1, 149 .num_states = 1,
161 .dont_defrag = 1, 150 .dont_defrag = 1,
162 .init = ah_esp_init, 151 .init = NULL,
163 .exit = ah_esp_exit, 152 .exit = NULL,
164 .conn_schedule = ah_esp_conn_schedule, 153 .conn_schedule = ah_esp_conn_schedule,
165 .conn_in_get = ah_esp_conn_in_get, 154 .conn_in_get = ah_esp_conn_in_get,
166 .conn_out_get = ah_esp_conn_out_get, 155 .conn_out_get = ah_esp_conn_out_get,
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 1ea96bcd342b..fb2d04ac5d4e 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -9,9 +9,10 @@
9#include <net/ip_vs.h> 9#include <net/ip_vs.h>
10 10
11static int 11static int
12sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 12sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
13 int *verdict, struct ip_vs_conn **cpp) 13 int *verdict, struct ip_vs_conn **cpp)
14{ 14{
15 struct net *net;
15 struct ip_vs_service *svc; 16 struct ip_vs_service *svc;
16 sctp_chunkhdr_t _schunkh, *sch; 17 sctp_chunkhdr_t _schunkh, *sch;
17 sctp_sctphdr_t *sh, _sctph; 18 sctp_sctphdr_t *sh, _sctph;
@@ -27,13 +28,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
27 sizeof(_schunkh), &_schunkh); 28 sizeof(_schunkh), &_schunkh);
28 if (sch == NULL) 29 if (sch == NULL)
29 return 0; 30 return 0;
30 31 net = skb_net(skb);
31 if ((sch->type == SCTP_CID_INIT) && 32 if ((sch->type == SCTP_CID_INIT) &&
32 (svc = ip_vs_service_get(af, skb->mark, iph.protocol, 33 (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
33 &iph.daddr, sh->dest))) { 34 &iph.daddr, sh->dest))) {
34 int ignored; 35 int ignored;
35 36
36 if (ip_vs_todrop()) { 37 if (ip_vs_todrop(net_ipvs(net))) {
37 /* 38 /*
38 * It seems that we are very loaded. 39 * It seems that we are very loaded.
39 * We have to drop this packet :( 40 * We have to drop this packet :(
@@ -46,14 +47,19 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
46 * Let the virtual server select a real server for the 47 * Let the virtual server select a real server for the
47 * incoming connection, and create a connection entry. 48 * incoming connection, and create a connection entry.
48 */ 49 */
49 *cpp = ip_vs_schedule(svc, skb, pp, &ignored); 50 *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
50 if (!*cpp && !ignored) { 51 if (!*cpp && ignored <= 0) {
51 *verdict = ip_vs_leave(svc, skb, pp); 52 if (!ignored)
53 *verdict = ip_vs_leave(svc, skb, pd);
54 else {
55 ip_vs_service_put(svc);
56 *verdict = NF_DROP;
57 }
52 return 0; 58 return 0;
53 } 59 }
54 ip_vs_service_put(svc); 60 ip_vs_service_put(svc);
55 } 61 }
56 62 /* NF_ACCEPT */
57 return 1; 63 return 1;
58} 64}
59 65
@@ -856,7 +862,7 @@ static struct ipvs_sctp_nextstate
856/* 862/*
857 * Timeout table[state] 863 * Timeout table[state]
858 */ 864 */
859static int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { 865static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
860 [IP_VS_SCTP_S_NONE] = 2 * HZ, 866 [IP_VS_SCTP_S_NONE] = 2 * HZ,
861 [IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ, 867 [IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ,
862 [IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ, 868 [IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ,
@@ -900,20 +906,8 @@ static const char *sctp_state_name(int state)
900 return "?"; 906 return "?";
901} 907}
902 908
903static void sctp_timeout_change(struct ip_vs_protocol *pp, int flags)
904{
905}
906
907static int
908sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
909{
910
911return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_SCTP_S_LAST,
912 sctp_state_name_table, sname, to);
913}
914
915static inline int 909static inline int
916set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, 910set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
917 int direction, const struct sk_buff *skb) 911 int direction, const struct sk_buff *skb)
918{ 912{
919 sctp_chunkhdr_t _sctpch, *sch; 913 sctp_chunkhdr_t _sctpch, *sch;
@@ -971,7 +965,7 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
971 965
972 IP_VS_DBG_BUF(8, "%s %s %s:%d->" 966 IP_VS_DBG_BUF(8, "%s %s %s:%d->"
973 "%s:%d state: %s->%s conn->refcnt:%d\n", 967 "%s:%d state: %s->%s conn->refcnt:%d\n",
974 pp->name, 968 pd->pp->name,
975 ((direction == IP_VS_DIR_OUTPUT) ? 969 ((direction == IP_VS_DIR_OUTPUT) ?
976 "output " : "input "), 970 "output " : "input "),
977 IP_VS_DBG_ADDR(cp->af, &cp->daddr), 971 IP_VS_DBG_ADDR(cp->af, &cp->daddr),
@@ -995,75 +989,73 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
995 } 989 }
996 } 990 }
997 } 991 }
992 if (likely(pd))
993 cp->timeout = pd->timeout_table[cp->state = next_state];
994 else /* What to do ? */
995 cp->timeout = sctp_timeouts[cp->state = next_state];
998 996
999 cp->timeout = pp->timeout_table[cp->state = next_state]; 997 return 1;
1000
1001 return 1;
1002} 998}
1003 999
1004static int 1000static int
1005sctp_state_transition(struct ip_vs_conn *cp, int direction, 1001sctp_state_transition(struct ip_vs_conn *cp, int direction,
1006 const struct sk_buff *skb, struct ip_vs_protocol *pp) 1002 const struct sk_buff *skb, struct ip_vs_proto_data *pd)
1007{ 1003{
1008 int ret = 0; 1004 int ret = 0;
1009 1005
1010 spin_lock(&cp->lock); 1006 spin_lock(&cp->lock);
1011 ret = set_sctp_state(pp, cp, direction, skb); 1007 ret = set_sctp_state(pd, cp, direction, skb);
1012 spin_unlock(&cp->lock); 1008 spin_unlock(&cp->lock);
1013 1009
1014 return ret; 1010 return ret;
1015} 1011}
1016 1012
1017/*
1018 * Hash table for SCTP application incarnations
1019 */
1020#define SCTP_APP_TAB_BITS 4
1021#define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS)
1022#define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1)
1023
1024static struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
1025static DEFINE_SPINLOCK(sctp_app_lock);
1026
1027static inline __u16 sctp_app_hashkey(__be16 port) 1013static inline __u16 sctp_app_hashkey(__be16 port)
1028{ 1014{
1029 return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port) 1015 return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
1030 & SCTP_APP_TAB_MASK; 1016 & SCTP_APP_TAB_MASK;
1031} 1017}
1032 1018
1033static int sctp_register_app(struct ip_vs_app *inc) 1019static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
1034{ 1020{
1035 struct ip_vs_app *i; 1021 struct ip_vs_app *i;
1036 __u16 hash; 1022 __u16 hash;
1037 __be16 port = inc->port; 1023 __be16 port = inc->port;
1038 int ret = 0; 1024 int ret = 0;
1025 struct netns_ipvs *ipvs = net_ipvs(net);
1026 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
1039 1027
1040 hash = sctp_app_hashkey(port); 1028 hash = sctp_app_hashkey(port);
1041 1029
1042 spin_lock_bh(&sctp_app_lock); 1030 spin_lock_bh(&ipvs->sctp_app_lock);
1043 list_for_each_entry(i, &sctp_apps[hash], p_list) { 1031 list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
1044 if (i->port == port) { 1032 if (i->port == port) {
1045 ret = -EEXIST; 1033 ret = -EEXIST;
1046 goto out; 1034 goto out;
1047 } 1035 }
1048 } 1036 }
1049 list_add(&inc->p_list, &sctp_apps[hash]); 1037 list_add(&inc->p_list, &ipvs->sctp_apps[hash]);
1050 atomic_inc(&ip_vs_protocol_sctp.appcnt); 1038 atomic_inc(&pd->appcnt);
1051out: 1039out:
1052 spin_unlock_bh(&sctp_app_lock); 1040 spin_unlock_bh(&ipvs->sctp_app_lock);
1053 1041
1054 return ret; 1042 return ret;
1055} 1043}
1056 1044
1057static void sctp_unregister_app(struct ip_vs_app *inc) 1045static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
1058{ 1046{
1059 spin_lock_bh(&sctp_app_lock); 1047 struct netns_ipvs *ipvs = net_ipvs(net);
1060 atomic_dec(&ip_vs_protocol_sctp.appcnt); 1048 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
1049
1050 spin_lock_bh(&ipvs->sctp_app_lock);
1051 atomic_dec(&pd->appcnt);
1061 list_del(&inc->p_list); 1052 list_del(&inc->p_list);
1062 spin_unlock_bh(&sctp_app_lock); 1053 spin_unlock_bh(&ipvs->sctp_app_lock);
1063} 1054}
1064 1055
1065static int sctp_app_conn_bind(struct ip_vs_conn *cp) 1056static int sctp_app_conn_bind(struct ip_vs_conn *cp)
1066{ 1057{
1058 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
1067 int hash; 1059 int hash;
1068 struct ip_vs_app *inc; 1060 struct ip_vs_app *inc;
1069 int result = 0; 1061 int result = 0;
@@ -1074,12 +1066,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
1074 /* Lookup application incarnations and bind the right one */ 1066 /* Lookup application incarnations and bind the right one */
1075 hash = sctp_app_hashkey(cp->vport); 1067 hash = sctp_app_hashkey(cp->vport);
1076 1068
1077 spin_lock(&sctp_app_lock); 1069 spin_lock(&ipvs->sctp_app_lock);
1078 list_for_each_entry(inc, &sctp_apps[hash], p_list) { 1070 list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) {
1079 if (inc->port == cp->vport) { 1071 if (inc->port == cp->vport) {
1080 if (unlikely(!ip_vs_app_inc_get(inc))) 1072 if (unlikely(!ip_vs_app_inc_get(inc)))
1081 break; 1073 break;
1082 spin_unlock(&sctp_app_lock); 1074 spin_unlock(&ipvs->sctp_app_lock);
1083 1075
1084 IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" 1076 IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
1085 "%s:%u to app %s on port %u\n", 1077 "%s:%u to app %s on port %u\n",
@@ -1095,43 +1087,50 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
1095 goto out; 1087 goto out;
1096 } 1088 }
1097 } 1089 }
1098 spin_unlock(&sctp_app_lock); 1090 spin_unlock(&ipvs->sctp_app_lock);
1099out: 1091out:
1100 return result; 1092 return result;
1101} 1093}
1102 1094
1103static void ip_vs_sctp_init(struct ip_vs_protocol *pp) 1095/* ---------------------------------------------
1096 * timeouts is netns related now.
1097 * ---------------------------------------------
1098 */
1099static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
1104{ 1100{
1105 IP_VS_INIT_HASH_TABLE(sctp_apps); 1101 struct netns_ipvs *ipvs = net_ipvs(net);
1106 pp->timeout_table = sctp_timeouts;
1107}
1108 1102
1103 ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
1104 spin_lock_init(&ipvs->tcp_app_lock);
1105 pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
1106 sizeof(sctp_timeouts));
1107}
1109 1108
1110static void ip_vs_sctp_exit(struct ip_vs_protocol *pp) 1109static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd)
1111{ 1110{
1112 1111 kfree(pd->timeout_table);
1113} 1112}
1114 1113
1115struct ip_vs_protocol ip_vs_protocol_sctp = { 1114struct ip_vs_protocol ip_vs_protocol_sctp = {
1116 .name = "SCTP", 1115 .name = "SCTP",
1117 .protocol = IPPROTO_SCTP, 1116 .protocol = IPPROTO_SCTP,
1118 .num_states = IP_VS_SCTP_S_LAST, 1117 .num_states = IP_VS_SCTP_S_LAST,
1119 .dont_defrag = 0, 1118 .dont_defrag = 0,
1120 .appcnt = ATOMIC_INIT(0), 1119 .init = NULL,
1121 .init = ip_vs_sctp_init, 1120 .exit = NULL,
1122 .exit = ip_vs_sctp_exit, 1121 .init_netns = __ip_vs_sctp_init,
1123 .register_app = sctp_register_app, 1122 .exit_netns = __ip_vs_sctp_exit,
1123 .register_app = sctp_register_app,
1124 .unregister_app = sctp_unregister_app, 1124 .unregister_app = sctp_unregister_app,
1125 .conn_schedule = sctp_conn_schedule, 1125 .conn_schedule = sctp_conn_schedule,
1126 .conn_in_get = ip_vs_conn_in_get_proto, 1126 .conn_in_get = ip_vs_conn_in_get_proto,
1127 .conn_out_get = ip_vs_conn_out_get_proto, 1127 .conn_out_get = ip_vs_conn_out_get_proto,
1128 .snat_handler = sctp_snat_handler, 1128 .snat_handler = sctp_snat_handler,
1129 .dnat_handler = sctp_dnat_handler, 1129 .dnat_handler = sctp_dnat_handler,
1130 .csum_check = sctp_csum_check, 1130 .csum_check = sctp_csum_check,
1131 .state_name = sctp_state_name, 1131 .state_name = sctp_state_name,
1132 .state_transition = sctp_state_transition, 1132 .state_transition = sctp_state_transition,
1133 .app_conn_bind = sctp_app_conn_bind, 1133 .app_conn_bind = sctp_app_conn_bind,
1134 .debug_packet = ip_vs_tcpudp_debug_packet, 1134 .debug_packet = ip_vs_tcpudp_debug_packet,
1135 .timeout_change = sctp_timeout_change, 1135 .timeout_change = NULL,
1136 .set_state_timeout = sctp_set_state_timeout,
1137}; 1136};
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index f6c5200e2146..c0cc341b840d 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -9,8 +9,12 @@
9 * as published by the Free Software Foundation; either version 9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version. 10 * 2 of the License, or (at your option) any later version.
11 * 11 *
12 * Changes: 12 * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
13 * 13 *
14 * Network name space (netns) aware.
15 * Global data moved to netns i.e struct netns_ipvs
16 * tcp_timeouts table has copy per netns in a hash table per
17 * protocol ip_vs_proto_data and is handled by netns
14 */ 18 */
15 19
16#define KMSG_COMPONENT "IPVS" 20#define KMSG_COMPONENT "IPVS"
@@ -28,9 +32,10 @@
28#include <net/ip_vs.h> 32#include <net/ip_vs.h>
29 33
30static int 34static int
31tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 35tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
32 int *verdict, struct ip_vs_conn **cpp) 36 int *verdict, struct ip_vs_conn **cpp)
33{ 37{
38 struct net *net;
34 struct ip_vs_service *svc; 39 struct ip_vs_service *svc;
35 struct tcphdr _tcph, *th; 40 struct tcphdr _tcph, *th;
36 struct ip_vs_iphdr iph; 41 struct ip_vs_iphdr iph;
@@ -42,14 +47,14 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
42 *verdict = NF_DROP; 47 *verdict = NF_DROP;
43 return 0; 48 return 0;
44 } 49 }
45 50 net = skb_net(skb);
46 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ 51 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
47 if (th->syn && 52 if (th->syn &&
48 (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, 53 (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
49 th->dest))) { 54 &iph.daddr, th->dest))) {
50 int ignored; 55 int ignored;
51 56
52 if (ip_vs_todrop()) { 57 if (ip_vs_todrop(net_ipvs(net))) {
53 /* 58 /*
54 * It seems that we are very loaded. 59 * It seems that we are very loaded.
55 * We have to drop this packet :( 60 * We have to drop this packet :(
@@ -63,13 +68,19 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
63 * Let the virtual server select a real server for the 68 * Let the virtual server select a real server for the
64 * incoming connection, and create a connection entry. 69 * incoming connection, and create a connection entry.
65 */ 70 */
66 *cpp = ip_vs_schedule(svc, skb, pp, &ignored); 71 *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
67 if (!*cpp && !ignored) { 72 if (!*cpp && ignored <= 0) {
68 *verdict = ip_vs_leave(svc, skb, pp); 73 if (!ignored)
74 *verdict = ip_vs_leave(svc, skb, pd);
75 else {
76 ip_vs_service_put(svc);
77 *verdict = NF_DROP;
78 }
69 return 0; 79 return 0;
70 } 80 }
71 ip_vs_service_put(svc); 81 ip_vs_service_put(svc);
72 } 82 }
83 /* NF_ACCEPT */
73 return 1; 84 return 1;
74} 85}
75 86
@@ -338,7 +349,7 @@ static const int tcp_state_off[IP_VS_DIR_LAST] = {
338/* 349/*
339 * Timeout table[state] 350 * Timeout table[state]
340 */ 351 */
341static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = { 352static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
342 [IP_VS_TCP_S_NONE] = 2*HZ, 353 [IP_VS_TCP_S_NONE] = 2*HZ,
343 [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ, 354 [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ,
344 [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ, 355 [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ,
@@ -437,10 +448,7 @@ static struct tcp_states_t tcp_states_dos [] = {
437/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, 448/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
438}; 449};
439 450
440static struct tcp_states_t *tcp_state_table = tcp_states; 451static void tcp_timeout_change(struct ip_vs_proto_data *pd, int flags)
441
442
443static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
444{ 452{
445 int on = (flags & 1); /* secure_tcp */ 453 int on = (flags & 1); /* secure_tcp */
446 454
@@ -450,14 +458,7 @@ static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
450 ** for most if not for all of the applications. Something 458 ** for most if not for all of the applications. Something
451 ** like "capabilities" (flags) for each object. 459 ** like "capabilities" (flags) for each object.
452 */ 460 */
453 tcp_state_table = (on? tcp_states_dos : tcp_states); 461 pd->tcp_state_table = (on ? tcp_states_dos : tcp_states);
454}
455
456static int
457tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
458{
459 return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
460 tcp_state_name_table, sname, to);
461} 462}
462 463
463static inline int tcp_state_idx(struct tcphdr *th) 464static inline int tcp_state_idx(struct tcphdr *th)
@@ -474,7 +475,7 @@ static inline int tcp_state_idx(struct tcphdr *th)
474} 475}
475 476
476static inline void 477static inline void
477set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, 478set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
478 int direction, struct tcphdr *th) 479 int direction, struct tcphdr *th)
479{ 480{
480 int state_idx; 481 int state_idx;
@@ -497,7 +498,8 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
497 goto tcp_state_out; 498 goto tcp_state_out;
498 } 499 }
499 500
500 new_state = tcp_state_table[state_off+state_idx].next_state[cp->state]; 501 new_state =
502 pd->tcp_state_table[state_off+state_idx].next_state[cp->state];
501 503
502 tcp_state_out: 504 tcp_state_out:
503 if (new_state != cp->state) { 505 if (new_state != cp->state) {
@@ -505,7 +507,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
505 507
506 IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->" 508 IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
507 "%s:%d state: %s->%s conn->refcnt:%d\n", 509 "%s:%d state: %s->%s conn->refcnt:%d\n",
508 pp->name, 510 pd->pp->name,
509 ((state_off == TCP_DIR_OUTPUT) ? 511 ((state_off == TCP_DIR_OUTPUT) ?
510 "output " : "input "), 512 "output " : "input "),
511 th->syn ? 'S' : '.', 513 th->syn ? 'S' : '.',
@@ -535,17 +537,19 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
535 } 537 }
536 } 538 }
537 539
538 cp->timeout = pp->timeout_table[cp->state = new_state]; 540 if (likely(pd))
541 cp->timeout = pd->timeout_table[cp->state = new_state];
542 else /* What to do ? */
543 cp->timeout = tcp_timeouts[cp->state = new_state];
539} 544}
540 545
541
542/* 546/*
543 * Handle state transitions 547 * Handle state transitions
544 */ 548 */
545static int 549static int
546tcp_state_transition(struct ip_vs_conn *cp, int direction, 550tcp_state_transition(struct ip_vs_conn *cp, int direction,
547 const struct sk_buff *skb, 551 const struct sk_buff *skb,
548 struct ip_vs_protocol *pp) 552 struct ip_vs_proto_data *pd)
549{ 553{
550 struct tcphdr _tcph, *th; 554 struct tcphdr _tcph, *th;
551 555
@@ -560,23 +564,12 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
560 return 0; 564 return 0;
561 565
562 spin_lock(&cp->lock); 566 spin_lock(&cp->lock);
563 set_tcp_state(pp, cp, direction, th); 567 set_tcp_state(pd, cp, direction, th);
564 spin_unlock(&cp->lock); 568 spin_unlock(&cp->lock);
565 569
566 return 1; 570 return 1;
567} 571}
568 572
569
570/*
571 * Hash table for TCP application incarnations
572 */
573#define TCP_APP_TAB_BITS 4
574#define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
575#define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
576
577static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
578static DEFINE_SPINLOCK(tcp_app_lock);
579
580static inline __u16 tcp_app_hashkey(__be16 port) 573static inline __u16 tcp_app_hashkey(__be16 port)
581{ 574{
582 return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port) 575 return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
@@ -584,44 +577,50 @@ static inline __u16 tcp_app_hashkey(__be16 port)
584} 577}
585 578
586 579
587static int tcp_register_app(struct ip_vs_app *inc) 580static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
588{ 581{
589 struct ip_vs_app *i; 582 struct ip_vs_app *i;
590 __u16 hash; 583 __u16 hash;
591 __be16 port = inc->port; 584 __be16 port = inc->port;
592 int ret = 0; 585 int ret = 0;
586 struct netns_ipvs *ipvs = net_ipvs(net);
587 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
593 588
594 hash = tcp_app_hashkey(port); 589 hash = tcp_app_hashkey(port);
595 590
596 spin_lock_bh(&tcp_app_lock); 591 spin_lock_bh(&ipvs->tcp_app_lock);
597 list_for_each_entry(i, &tcp_apps[hash], p_list) { 592 list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {
598 if (i->port == port) { 593 if (i->port == port) {
599 ret = -EEXIST; 594 ret = -EEXIST;
600 goto out; 595 goto out;
601 } 596 }
602 } 597 }
603 list_add(&inc->p_list, &tcp_apps[hash]); 598 list_add(&inc->p_list, &ipvs->tcp_apps[hash]);
604 atomic_inc(&ip_vs_protocol_tcp.appcnt); 599 atomic_inc(&pd->appcnt);
605 600
606 out: 601 out:
607 spin_unlock_bh(&tcp_app_lock); 602 spin_unlock_bh(&ipvs->tcp_app_lock);
608 return ret; 603 return ret;
609} 604}
610 605
611 606
612static void 607static void
613tcp_unregister_app(struct ip_vs_app *inc) 608tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
614{ 609{
615 spin_lock_bh(&tcp_app_lock); 610 struct netns_ipvs *ipvs = net_ipvs(net);
616 atomic_dec(&ip_vs_protocol_tcp.appcnt); 611 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
612
613 spin_lock_bh(&ipvs->tcp_app_lock);
614 atomic_dec(&pd->appcnt);
617 list_del(&inc->p_list); 615 list_del(&inc->p_list);
618 spin_unlock_bh(&tcp_app_lock); 616 spin_unlock_bh(&ipvs->tcp_app_lock);
619} 617}
620 618
621 619
622static int 620static int
623tcp_app_conn_bind(struct ip_vs_conn *cp) 621tcp_app_conn_bind(struct ip_vs_conn *cp)
624{ 622{
623 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
625 int hash; 624 int hash;
626 struct ip_vs_app *inc; 625 struct ip_vs_app *inc;
627 int result = 0; 626 int result = 0;
@@ -633,12 +632,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
633 /* Lookup application incarnations and bind the right one */ 632 /* Lookup application incarnations and bind the right one */
634 hash = tcp_app_hashkey(cp->vport); 633 hash = tcp_app_hashkey(cp->vport);
635 634
636 spin_lock(&tcp_app_lock); 635 spin_lock(&ipvs->tcp_app_lock);
637 list_for_each_entry(inc, &tcp_apps[hash], p_list) { 636 list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) {
638 if (inc->port == cp->vport) { 637 if (inc->port == cp->vport) {
639 if (unlikely(!ip_vs_app_inc_get(inc))) 638 if (unlikely(!ip_vs_app_inc_get(inc)))
640 break; 639 break;
641 spin_unlock(&tcp_app_lock); 640 spin_unlock(&ipvs->tcp_app_lock);
642 641
643 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" 642 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
644 "%s:%u to app %s on port %u\n", 643 "%s:%u to app %s on port %u\n",
@@ -655,7 +654,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
655 goto out; 654 goto out;
656 } 655 }
657 } 656 }
658 spin_unlock(&tcp_app_lock); 657 spin_unlock(&ipvs->tcp_app_lock);
659 658
660 out: 659 out:
661 return result; 660 return result;
@@ -665,24 +664,35 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
665/* 664/*
666 * Set LISTEN timeout. (ip_vs_conn_put will setup timer) 665 * Set LISTEN timeout. (ip_vs_conn_put will setup timer)
667 */ 666 */
668void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp) 667void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
669{ 668{
669 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
670
670 spin_lock(&cp->lock); 671 spin_lock(&cp->lock);
671 cp->state = IP_VS_TCP_S_LISTEN; 672 cp->state = IP_VS_TCP_S_LISTEN;
672 cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN]; 673 cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
674 : tcp_timeouts[IP_VS_TCP_S_LISTEN]);
673 spin_unlock(&cp->lock); 675 spin_unlock(&cp->lock);
674} 676}
675 677
676 678/* ---------------------------------------------
677static void ip_vs_tcp_init(struct ip_vs_protocol *pp) 679 * timeouts is netns related now.
680 * ---------------------------------------------
681 */
682static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
678{ 683{
679 IP_VS_INIT_HASH_TABLE(tcp_apps); 684 struct netns_ipvs *ipvs = net_ipvs(net);
680 pp->timeout_table = tcp_timeouts;
681}
682 685
686 ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
687 spin_lock_init(&ipvs->tcp_app_lock);
688 pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
689 sizeof(tcp_timeouts));
690 pd->tcp_state_table = tcp_states;
691}
683 692
684static void ip_vs_tcp_exit(struct ip_vs_protocol *pp) 693static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd)
685{ 694{
695 kfree(pd->timeout_table);
686} 696}
687 697
688 698
@@ -691,9 +701,10 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
691 .protocol = IPPROTO_TCP, 701 .protocol = IPPROTO_TCP,
692 .num_states = IP_VS_TCP_S_LAST, 702 .num_states = IP_VS_TCP_S_LAST,
693 .dont_defrag = 0, 703 .dont_defrag = 0,
694 .appcnt = ATOMIC_INIT(0), 704 .init = NULL,
695 .init = ip_vs_tcp_init, 705 .exit = NULL,
696 .exit = ip_vs_tcp_exit, 706 .init_netns = __ip_vs_tcp_init,
707 .exit_netns = __ip_vs_tcp_exit,
697 .register_app = tcp_register_app, 708 .register_app = tcp_register_app,
698 .unregister_app = tcp_unregister_app, 709 .unregister_app = tcp_unregister_app,
699 .conn_schedule = tcp_conn_schedule, 710 .conn_schedule = tcp_conn_schedule,
@@ -707,5 +718,4 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
707 .app_conn_bind = tcp_app_conn_bind, 718 .app_conn_bind = tcp_app_conn_bind,
708 .debug_packet = ip_vs_tcpudp_debug_packet, 719 .debug_packet = ip_vs_tcpudp_debug_packet,
709 .timeout_change = tcp_timeout_change, 720 .timeout_change = tcp_timeout_change,
710 .set_state_timeout = tcp_set_state_timeout,
711}; 721};
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 9d106a06bb0a..f1282cbe6fe3 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -9,7 +9,8 @@
9 * as published by the Free Software Foundation; either version 9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version. 10 * 2 of the License, or (at your option) any later version.
11 * 11 *
12 * Changes: 12 * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
13 * Network name space (netns) aware.
13 * 14 *
14 */ 15 */
15 16
@@ -28,9 +29,10 @@
28#include <net/ip6_checksum.h> 29#include <net/ip6_checksum.h>
29 30
30static int 31static int
31udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 32udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
32 int *verdict, struct ip_vs_conn **cpp) 33 int *verdict, struct ip_vs_conn **cpp)
33{ 34{
35 struct net *net;
34 struct ip_vs_service *svc; 36 struct ip_vs_service *svc;
35 struct udphdr _udph, *uh; 37 struct udphdr _udph, *uh;
36 struct ip_vs_iphdr iph; 38 struct ip_vs_iphdr iph;
@@ -42,13 +44,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
42 *verdict = NF_DROP; 44 *verdict = NF_DROP;
43 return 0; 45 return 0;
44 } 46 }
45 47 net = skb_net(skb);
46 svc = ip_vs_service_get(af, skb->mark, iph.protocol, 48 svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
47 &iph.daddr, uh->dest); 49 &iph.daddr, uh->dest);
48 if (svc) { 50 if (svc) {
49 int ignored; 51 int ignored;
50 52
51 if (ip_vs_todrop()) { 53 if (ip_vs_todrop(net_ipvs(net))) {
52 /* 54 /*
53 * It seems that we are very loaded. 55 * It seems that we are very loaded.
54 * We have to drop this packet :( 56 * We have to drop this packet :(
@@ -62,13 +64,19 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
62 * Let the virtual server select a real server for the 64 * Let the virtual server select a real server for the
63 * incoming connection, and create a connection entry. 65 * incoming connection, and create a connection entry.
64 */ 66 */
65 *cpp = ip_vs_schedule(svc, skb, pp, &ignored); 67 *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
66 if (!*cpp && !ignored) { 68 if (!*cpp && ignored <= 0) {
67 *verdict = ip_vs_leave(svc, skb, pp); 69 if (!ignored)
70 *verdict = ip_vs_leave(svc, skb, pd);
71 else {
72 ip_vs_service_put(svc);
73 *verdict = NF_DROP;
74 }
68 return 0; 75 return 0;
69 } 76 }
70 ip_vs_service_put(svc); 77 ip_vs_service_put(svc);
71 } 78 }
79 /* NF_ACCEPT */
72 return 1; 80 return 1;
73} 81}
74 82
@@ -338,19 +346,6 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
338 return 1; 346 return 1;
339} 347}
340 348
341
342/*
343 * Note: the caller guarantees that only one of register_app,
344 * unregister_app or app_conn_bind is called each time.
345 */
346
347#define UDP_APP_TAB_BITS 4
348#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
349#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
350
351static struct list_head udp_apps[UDP_APP_TAB_SIZE];
352static DEFINE_SPINLOCK(udp_app_lock);
353
354static inline __u16 udp_app_hashkey(__be16 port) 349static inline __u16 udp_app_hashkey(__be16 port)
355{ 350{
356 return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port) 351 return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
@@ -358,44 +353,50 @@ static inline __u16 udp_app_hashkey(__be16 port)
358} 353}
359 354
360 355
361static int udp_register_app(struct ip_vs_app *inc) 356static int udp_register_app(struct net *net, struct ip_vs_app *inc)
362{ 357{
363 struct ip_vs_app *i; 358 struct ip_vs_app *i;
364 __u16 hash; 359 __u16 hash;
365 __be16 port = inc->port; 360 __be16 port = inc->port;
366 int ret = 0; 361 int ret = 0;
362 struct netns_ipvs *ipvs = net_ipvs(net);
363 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
367 364
368 hash = udp_app_hashkey(port); 365 hash = udp_app_hashkey(port);
369 366
370 367
371 spin_lock_bh(&udp_app_lock); 368 spin_lock_bh(&ipvs->udp_app_lock);
372 list_for_each_entry(i, &udp_apps[hash], p_list) { 369 list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {
373 if (i->port == port) { 370 if (i->port == port) {
374 ret = -EEXIST; 371 ret = -EEXIST;
375 goto out; 372 goto out;
376 } 373 }
377 } 374 }
378 list_add(&inc->p_list, &udp_apps[hash]); 375 list_add(&inc->p_list, &ipvs->udp_apps[hash]);
379 atomic_inc(&ip_vs_protocol_udp.appcnt); 376 atomic_inc(&pd->appcnt);
380 377
381 out: 378 out:
382 spin_unlock_bh(&udp_app_lock); 379 spin_unlock_bh(&ipvs->udp_app_lock);
383 return ret; 380 return ret;
384} 381}
385 382
386 383
387static void 384static void
388udp_unregister_app(struct ip_vs_app *inc) 385udp_unregister_app(struct net *net, struct ip_vs_app *inc)
389{ 386{
390 spin_lock_bh(&udp_app_lock); 387 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
391 atomic_dec(&ip_vs_protocol_udp.appcnt); 388 struct netns_ipvs *ipvs = net_ipvs(net);
389
390 spin_lock_bh(&ipvs->udp_app_lock);
391 atomic_dec(&pd->appcnt);
392 list_del(&inc->p_list); 392 list_del(&inc->p_list);
393 spin_unlock_bh(&udp_app_lock); 393 spin_unlock_bh(&ipvs->udp_app_lock);
394} 394}
395 395
396 396
397static int udp_app_conn_bind(struct ip_vs_conn *cp) 397static int udp_app_conn_bind(struct ip_vs_conn *cp)
398{ 398{
399 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
399 int hash; 400 int hash;
400 struct ip_vs_app *inc; 401 struct ip_vs_app *inc;
401 int result = 0; 402 int result = 0;
@@ -407,12 +408,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
407 /* Lookup application incarnations and bind the right one */ 408 /* Lookup application incarnations and bind the right one */
408 hash = udp_app_hashkey(cp->vport); 409 hash = udp_app_hashkey(cp->vport);
409 410
410 spin_lock(&udp_app_lock); 411 spin_lock(&ipvs->udp_app_lock);
411 list_for_each_entry(inc, &udp_apps[hash], p_list) { 412 list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) {
412 if (inc->port == cp->vport) { 413 if (inc->port == cp->vport) {
413 if (unlikely(!ip_vs_app_inc_get(inc))) 414 if (unlikely(!ip_vs_app_inc_get(inc)))
414 break; 415 break;
415 spin_unlock(&udp_app_lock); 416 spin_unlock(&ipvs->udp_app_lock);
416 417
417 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" 418 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
418 "%s:%u to app %s on port %u\n", 419 "%s:%u to app %s on port %u\n",
@@ -429,14 +430,14 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
429 goto out; 430 goto out;
430 } 431 }
431 } 432 }
432 spin_unlock(&udp_app_lock); 433 spin_unlock(&ipvs->udp_app_lock);
433 434
434 out: 435 out:
435 return result; 436 return result;
436} 437}
437 438
438 439
439static int udp_timeouts[IP_VS_UDP_S_LAST+1] = { 440static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
440 [IP_VS_UDP_S_NORMAL] = 5*60*HZ, 441 [IP_VS_UDP_S_NORMAL] = 5*60*HZ,
441 [IP_VS_UDP_S_LAST] = 2*HZ, 442 [IP_VS_UDP_S_LAST] = 2*HZ,
442}; 443};
@@ -446,14 +447,6 @@ static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
446 [IP_VS_UDP_S_LAST] = "BUG!", 447 [IP_VS_UDP_S_LAST] = "BUG!",
447}; 448};
448 449
449
450static int
451udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
452{
453 return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
454 udp_state_name_table, sname, to);
455}
456
457static const char * udp_state_name(int state) 450static const char * udp_state_name(int state)
458{ 451{
459 if (state >= IP_VS_UDP_S_LAST) 452 if (state >= IP_VS_UDP_S_LAST)
@@ -464,20 +457,30 @@ static const char * udp_state_name(int state)
464static int 457static int
465udp_state_transition(struct ip_vs_conn *cp, int direction, 458udp_state_transition(struct ip_vs_conn *cp, int direction,
466 const struct sk_buff *skb, 459 const struct sk_buff *skb,
467 struct ip_vs_protocol *pp) 460 struct ip_vs_proto_data *pd)
468{ 461{
469 cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL]; 462 if (unlikely(!pd)) {
463 pr_err("UDP no ns data\n");
464 return 0;
465 }
466
467 cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
470 return 1; 468 return 1;
471} 469}
472 470
473static void udp_init(struct ip_vs_protocol *pp) 471static void __udp_init(struct net *net, struct ip_vs_proto_data *pd)
474{ 472{
475 IP_VS_INIT_HASH_TABLE(udp_apps); 473 struct netns_ipvs *ipvs = net_ipvs(net);
476 pp->timeout_table = udp_timeouts; 474
475 ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
476 spin_lock_init(&ipvs->udp_app_lock);
477 pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
478 sizeof(udp_timeouts));
477} 479}
478 480
479static void udp_exit(struct ip_vs_protocol *pp) 481static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd)
480{ 482{
483 kfree(pd->timeout_table);
481} 484}
482 485
483 486
@@ -486,8 +489,10 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
486 .protocol = IPPROTO_UDP, 489 .protocol = IPPROTO_UDP,
487 .num_states = IP_VS_UDP_S_LAST, 490 .num_states = IP_VS_UDP_S_LAST,
488 .dont_defrag = 0, 491 .dont_defrag = 0,
489 .init = udp_init, 492 .init = NULL,
490 .exit = udp_exit, 493 .exit = NULL,
494 .init_netns = __udp_init,
495 .exit_netns = __udp_exit,
491 .conn_schedule = udp_conn_schedule, 496 .conn_schedule = udp_conn_schedule,
492 .conn_in_get = ip_vs_conn_in_get_proto, 497 .conn_in_get = ip_vs_conn_in_get_proto,
493 .conn_out_get = ip_vs_conn_out_get_proto, 498 .conn_out_get = ip_vs_conn_out_get_proto,
@@ -501,5 +506,4 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
501 .app_conn_bind = udp_app_conn_bind, 506 .app_conn_bind = udp_app_conn_bind,
502 .debug_packet = ip_vs_tcpudp_debug_packet, 507 .debug_packet = ip_vs_tcpudp_debug_packet,
503 .timeout_change = NULL, 508 .timeout_change = NULL,
504 .set_state_timeout = udp_set_state_timeout,
505}; 509};
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index ab85aedea17e..d1adf988eb08 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -5,6 +5,18 @@
5 * high-performance and highly available server based on a 5 * high-performance and highly available server based on a
6 * cluster of servers. 6 * cluster of servers.
7 * 7 *
8 * Version 1, is capable of handling both version 0 and 1 messages.
9 * Version 0 is the plain old format.
10 * Note Version 0 receivers will just drop Ver 1 messages.
11 * Version 1 is capable of handle IPv6, Persistence data,
12 * time-outs, and firewall marks.
13 * In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order.
14 * Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=0
15 *
16 * Definitions Message: is a complete datagram
17 * Sync_conn: is a part of a Message
18 * Param Data is an option to a Sync_conn.
19 *
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 20 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * 21 *
10 * ip_vs_sync: sync connection info from master load balancer to backups 22 * ip_vs_sync: sync connection info from master load balancer to backups
@@ -15,6 +27,8 @@
15 * Alexandre Cassen : Added SyncID support for incoming sync 27 * Alexandre Cassen : Added SyncID support for incoming sync
16 * messages filtering. 28 * messages filtering.
17 * Justin Ossevoort : Fix endian problem on sync message size. 29 * Justin Ossevoort : Fix endian problem on sync message size.
30 * Hans Schillstrom : Added Version 1: i.e. IPv6,
31 * Persistence support, fwmark and time-out.
18 */ 32 */
19 33
20#define KMSG_COMPONENT "IPVS" 34#define KMSG_COMPONENT "IPVS"
@@ -35,6 +49,8 @@
35#include <linux/wait.h> 49#include <linux/wait.h>
36#include <linux/kernel.h> 50#include <linux/kernel.h>
37 51
52#include <asm/unaligned.h> /* Used for ntoh_seq and hton_seq */
53
38#include <net/ip.h> 54#include <net/ip.h>
39#include <net/sock.h> 55#include <net/sock.h>
40 56
@@ -43,11 +59,13 @@
43#define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */ 59#define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */
44#define IP_VS_SYNC_PORT 8848 /* multicast port */ 60#define IP_VS_SYNC_PORT 8848 /* multicast port */
45 61
62#define SYNC_PROTO_VER 1 /* Protocol version in header */
46 63
47/* 64/*
48 * IPVS sync connection entry 65 * IPVS sync connection entry
66 * Version 0, i.e. original version.
49 */ 67 */
50struct ip_vs_sync_conn { 68struct ip_vs_sync_conn_v0 {
51 __u8 reserved; 69 __u8 reserved;
52 70
53 /* Protocol, addresses and port numbers */ 71 /* Protocol, addresses and port numbers */
@@ -71,41 +89,159 @@ struct ip_vs_sync_conn_options {
71 struct ip_vs_seq out_seq; /* outgoing seq. struct */ 89 struct ip_vs_seq out_seq; /* outgoing seq. struct */
72}; 90};
73 91
92/*
93 Sync Connection format (sync_conn)
94
95 0 1 2 3
96 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
97 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
98 | Type | Protocol | Ver. | Size |
99 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
100 | Flags |
101 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
102 | State | cport |
103 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
104 | vport | dport |
105 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
106 | fwmark |
107 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
108 | timeout (in sec.) |
109 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
110 | ... |
111 | IP-Addresses (v4 or v6) |
112 | ... |
113 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
114 Optional Parameters.
115 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
116 | Param. Type | Param. Length | Param. data |
117 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
118 | ... |
119 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
120 | | Param Type | Param. Length |
121 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
122 | Param data |
123 | Last Param data should be padded for 32 bit alignment |
124 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
125*/
126
127/*
128 * Type 0, IPv4 sync connection format
129 */
130struct ip_vs_sync_v4 {
131 __u8 type;
132 __u8 protocol; /* Which protocol (TCP/UDP) */
133 __be16 ver_size; /* Version msb 4 bits */
134 /* Flags and state transition */
135 __be32 flags; /* status flags */
136 __be16 state; /* state info */
137 /* Protocol, addresses and port numbers */
138 __be16 cport;
139 __be16 vport;
140 __be16 dport;
141 __be32 fwmark; /* Firewall mark from skb */
142 __be32 timeout; /* cp timeout */
143 __be32 caddr; /* client address */
144 __be32 vaddr; /* virtual address */
145 __be32 daddr; /* destination address */
146 /* The sequence options start here */
147 /* PE data padded to 32bit alignment after seq. options */
148};
149/*
150 * Type 2 messages IPv6
151 */
152struct ip_vs_sync_v6 {
153 __u8 type;
154 __u8 protocol; /* Which protocol (TCP/UDP) */
155 __be16 ver_size; /* Version msb 4 bits */
156 /* Flags and state transition */
157 __be32 flags; /* status flags */
158 __be16 state; /* state info */
159 /* Protocol, addresses and port numbers */
160 __be16 cport;
161 __be16 vport;
162 __be16 dport;
163 __be32 fwmark; /* Firewall mark from skb */
164 __be32 timeout; /* cp timeout */
165 struct in6_addr caddr; /* client address */
166 struct in6_addr vaddr; /* virtual address */
167 struct in6_addr daddr; /* destination address */
168 /* The sequence options start here */
169 /* PE data padded to 32bit alignment after seq. options */
170};
171
172union ip_vs_sync_conn {
173 struct ip_vs_sync_v4 v4;
174 struct ip_vs_sync_v6 v6;
175};
176
177/* Bits in Type field in above */
178#define STYPE_INET6 0
179#define STYPE_F_INET6 (1 << STYPE_INET6)
180
181#define SVER_SHIFT 12 /* Shift to get version */
182#define SVER_MASK 0x0fff /* Mask to strip version */
183
184#define IPVS_OPT_SEQ_DATA 1
185#define IPVS_OPT_PE_DATA 2
186#define IPVS_OPT_PE_NAME 3
187#define IPVS_OPT_PARAM 7
188
189#define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1))
190#define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1))
191#define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1))
192#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
193
74struct ip_vs_sync_thread_data { 194struct ip_vs_sync_thread_data {
195 struct net *net;
75 struct socket *sock; 196 struct socket *sock;
76 char *buf; 197 char *buf;
77}; 198};
78 199
79#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn)) 200/* Version 0 definition of packet sizes */
201#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0))
80#define FULL_CONN_SIZE \ 202#define FULL_CONN_SIZE \
81(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options)) 203(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
82 204
83 205
84/* 206/*
85 The master mulitcasts messages to the backup load balancers in the 207 The master mulitcasts messages (Datagrams) to the backup load balancers
86 following format. 208 in the following format.
209
210 Version 1:
211 Note, first byte should be Zero, so ver 0 receivers will drop the packet.
87 212
88 0 1 2 3 213 0 1 2 3
89 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 214 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
90 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 215 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
91 | Count Conns | SyncID | Size | 216 | 0 | SyncID | Size |
217 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
218 | Count Conns | Version | Reserved, set to Zero |
92 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 219 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
93 | | 220 | |
94 | IPVS Sync Connection (1) | 221 | IPVS Sync Connection (1) |
95 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 222 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
96 | . | 223 | . |
97 | . | 224 ~ . ~
98 | . | 225 | . |
99 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 226 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
100 | | 227 | |
101 | IPVS Sync Connection (n) | 228 | IPVS Sync Connection (n) |
102 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 229 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
230
231 Version 0 Header
232 0 1 2 3
233 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
234 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
235 | Count Conns | SyncID | Size |
236 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
237 | IPVS Sync Connection (1) |
103*/ 238*/
104 239
105#define SYNC_MESG_HEADER_LEN 4 240#define SYNC_MESG_HEADER_LEN 4
106#define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */ 241#define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */
107 242
108struct ip_vs_sync_mesg { 243/* Version 0 header */
244struct ip_vs_sync_mesg_v0 {
109 __u8 nr_conns; 245 __u8 nr_conns;
110 __u8 syncid; 246 __u8 syncid;
111 __u16 size; 247 __u16 size;
@@ -113,9 +249,16 @@ struct ip_vs_sync_mesg {
113 /* ip_vs_sync_conn entries start here */ 249 /* ip_vs_sync_conn entries start here */
114}; 250};
115 251
116/* the maximum length of sync (sending/receiving) message */ 252/* Version 1 header */
117static int sync_send_mesg_maxlen; 253struct ip_vs_sync_mesg {
118static int sync_recv_mesg_maxlen; 254 __u8 reserved; /* must be zero */
255 __u8 syncid;
256 __u16 size;
257 __u8 nr_conns;
258 __s8 version; /* SYNC_PROTO_VER */
259 __u16 spare;
260 /* ip_vs_sync_conn entries start here */
261};
119 262
120struct ip_vs_sync_buff { 263struct ip_vs_sync_buff {
121 struct list_head list; 264 struct list_head list;
@@ -127,28 +270,6 @@ struct ip_vs_sync_buff {
127 unsigned char *end; 270 unsigned char *end;
128}; 271};
129 272
130
131/* the sync_buff list head and the lock */
132static LIST_HEAD(ip_vs_sync_queue);
133static DEFINE_SPINLOCK(ip_vs_sync_lock);
134
135/* current sync_buff for accepting new conn entries */
136static struct ip_vs_sync_buff *curr_sb = NULL;
137static DEFINE_SPINLOCK(curr_sb_lock);
138
139/* ipvs sync daemon state */
140volatile int ip_vs_sync_state = IP_VS_STATE_NONE;
141volatile int ip_vs_master_syncid = 0;
142volatile int ip_vs_backup_syncid = 0;
143
144/* multicast interface name */
145char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
146char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
147
148/* sync daemon tasks */
149static struct task_struct *sync_master_thread;
150static struct task_struct *sync_backup_thread;
151
152/* multicast addr */ 273/* multicast addr */
153static struct sockaddr_in mcast_addr = { 274static struct sockaddr_in mcast_addr = {
154 .sin_family = AF_INET, 275 .sin_family = AF_INET,
@@ -156,41 +277,71 @@ static struct sockaddr_in mcast_addr = {
156 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), 277 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
157}; 278};
158 279
280/*
281 * Copy of struct ip_vs_seq
282 * From unaligned network order to aligned host order
283 */
284static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
285{
286 ho->init_seq = get_unaligned_be32(&no->init_seq);
287 ho->delta = get_unaligned_be32(&no->delta);
288 ho->previous_delta = get_unaligned_be32(&no->previous_delta);
289}
290
291/*
292 * Copy of struct ip_vs_seq
293 * From Aligned host order to unaligned network order
294 */
295static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
296{
297 put_unaligned_be32(ho->init_seq, &no->init_seq);
298 put_unaligned_be32(ho->delta, &no->delta);
299 put_unaligned_be32(ho->previous_delta, &no->previous_delta);
300}
159 301
160static inline struct ip_vs_sync_buff *sb_dequeue(void) 302static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs)
161{ 303{
162 struct ip_vs_sync_buff *sb; 304 struct ip_vs_sync_buff *sb;
163 305
164 spin_lock_bh(&ip_vs_sync_lock); 306 spin_lock_bh(&ipvs->sync_lock);
165 if (list_empty(&ip_vs_sync_queue)) { 307 if (list_empty(&ipvs->sync_queue)) {
166 sb = NULL; 308 sb = NULL;
167 } else { 309 } else {
168 sb = list_entry(ip_vs_sync_queue.next, 310 sb = list_entry(ipvs->sync_queue.next,
169 struct ip_vs_sync_buff, 311 struct ip_vs_sync_buff,
170 list); 312 list);
171 list_del(&sb->list); 313 list_del(&sb->list);
172 } 314 }
173 spin_unlock_bh(&ip_vs_sync_lock); 315 spin_unlock_bh(&ipvs->sync_lock);
174 316
175 return sb; 317 return sb;
176} 318}
177 319
178static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void) 320/*
321 * Create a new sync buffer for Version 1 proto.
322 */
323static inline struct ip_vs_sync_buff *
324ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
179{ 325{
180 struct ip_vs_sync_buff *sb; 326 struct ip_vs_sync_buff *sb;
181 327
182 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) 328 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
183 return NULL; 329 return NULL;
184 330
185 if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) { 331 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
332 if (!sb->mesg) {
186 kfree(sb); 333 kfree(sb);
187 return NULL; 334 return NULL;
188 } 335 }
336 sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */
337 sb->mesg->version = SYNC_PROTO_VER;
338 sb->mesg->syncid = ipvs->master_syncid;
339 sb->mesg->size = sizeof(struct ip_vs_sync_mesg);
189 sb->mesg->nr_conns = 0; 340 sb->mesg->nr_conns = 0;
190 sb->mesg->syncid = ip_vs_master_syncid; 341 sb->mesg->spare = 0;
191 sb->mesg->size = 4; 342 sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
192 sb->head = (unsigned char *)sb->mesg + 4; 343 sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen;
193 sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen; 344
194 sb->firstuse = jiffies; 345 sb->firstuse = jiffies;
195 return sb; 346 return sb;
196} 347}
@@ -201,14 +352,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
201 kfree(sb); 352 kfree(sb);
202} 353}
203 354
204static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) 355static inline void sb_queue_tail(struct netns_ipvs *ipvs)
205{ 356{
206 spin_lock(&ip_vs_sync_lock); 357 struct ip_vs_sync_buff *sb = ipvs->sync_buff;
207 if (ip_vs_sync_state & IP_VS_STATE_MASTER) 358
208 list_add_tail(&sb->list, &ip_vs_sync_queue); 359 spin_lock(&ipvs->sync_lock);
360 if (ipvs->sync_state & IP_VS_STATE_MASTER)
361 list_add_tail(&sb->list, &ipvs->sync_queue);
209 else 362 else
210 ip_vs_sync_buff_release(sb); 363 ip_vs_sync_buff_release(sb);
211 spin_unlock(&ip_vs_sync_lock); 364 spin_unlock(&ipvs->sync_lock);
212} 365}
213 366
214/* 367/*
@@ -216,36 +369,101 @@ static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
216 * than the specified time or the specified time is zero. 369 * than the specified time or the specified time is zero.
217 */ 370 */
218static inline struct ip_vs_sync_buff * 371static inline struct ip_vs_sync_buff *
219get_curr_sync_buff(unsigned long time) 372get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time)
220{ 373{
221 struct ip_vs_sync_buff *sb; 374 struct ip_vs_sync_buff *sb;
222 375
223 spin_lock_bh(&curr_sb_lock); 376 spin_lock_bh(&ipvs->sync_buff_lock);
224 if (curr_sb && (time == 0 || 377 if (ipvs->sync_buff && (time == 0 ||
225 time_before(jiffies - curr_sb->firstuse, time))) { 378 time_before(jiffies - ipvs->sync_buff->firstuse, time))) {
226 sb = curr_sb; 379 sb = ipvs->sync_buff;
227 curr_sb = NULL; 380 ipvs->sync_buff = NULL;
228 } else 381 } else
229 sb = NULL; 382 sb = NULL;
230 spin_unlock_bh(&curr_sb_lock); 383 spin_unlock_bh(&ipvs->sync_buff_lock);
231 return sb; 384 return sb;
232} 385}
233 386
387/*
388 * Switch mode from sending version 0 or 1
389 * - must handle sync_buf
390 */
391void ip_vs_sync_switch_mode(struct net *net, int mode)
392{
393 struct netns_ipvs *ipvs = net_ipvs(net);
394
395 if (!ipvs->sync_state & IP_VS_STATE_MASTER)
396 return;
397 if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff)
398 return;
399
400 spin_lock_bh(&ipvs->sync_buff_lock);
401 /* Buffer empty ? then let buf_create do the job */
402 if (ipvs->sync_buff->mesg->size <= sizeof(struct ip_vs_sync_mesg)) {
403 kfree(ipvs->sync_buff);
404 ipvs->sync_buff = NULL;
405 } else {
406 spin_lock_bh(&ipvs->sync_lock);
407 if (ipvs->sync_state & IP_VS_STATE_MASTER)
408 list_add_tail(&ipvs->sync_buff->list,
409 &ipvs->sync_queue);
410 else
411 ip_vs_sync_buff_release(ipvs->sync_buff);
412 spin_unlock_bh(&ipvs->sync_lock);
413 }
414 spin_unlock_bh(&ipvs->sync_buff_lock);
415}
234 416
235/* 417/*
418 * Create a new sync buffer for Version 0 proto.
419 */
420static inline struct ip_vs_sync_buff *
421ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
422{
423 struct ip_vs_sync_buff *sb;
424 struct ip_vs_sync_mesg_v0 *mesg;
425
426 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
427 return NULL;
428
429 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
430 if (!sb->mesg) {
431 kfree(sb);
432 return NULL;
433 }
434 mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
435 mesg->nr_conns = 0;
436 mesg->syncid = ipvs->master_syncid;
437 mesg->size = sizeof(struct ip_vs_sync_mesg_v0);
438 sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
439 sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
440 sb->firstuse = jiffies;
441 return sb;
442}
443
444/*
445 * Version 0 , could be switched in by sys_ctl.
236 * Add an ip_vs_conn information into the current sync_buff. 446 * Add an ip_vs_conn information into the current sync_buff.
237 * Called by ip_vs_in.
238 */ 447 */
239void ip_vs_sync_conn(struct ip_vs_conn *cp) 448void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
240{ 449{
241 struct ip_vs_sync_mesg *m; 450 struct netns_ipvs *ipvs = net_ipvs(net);
242 struct ip_vs_sync_conn *s; 451 struct ip_vs_sync_mesg_v0 *m;
452 struct ip_vs_sync_conn_v0 *s;
243 int len; 453 int len;
244 454
245 spin_lock(&curr_sb_lock); 455 if (unlikely(cp->af != AF_INET))
246 if (!curr_sb) { 456 return;
247 if (!(curr_sb=ip_vs_sync_buff_create())) { 457 /* Do not sync ONE PACKET */
248 spin_unlock(&curr_sb_lock); 458 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
459 return;
460
461 spin_lock(&ipvs->sync_buff_lock);
462 if (!ipvs->sync_buff) {
463 ipvs->sync_buff =
464 ip_vs_sync_buff_create_v0(ipvs);
465 if (!ipvs->sync_buff) {
466 spin_unlock(&ipvs->sync_buff_lock);
249 pr_err("ip_vs_sync_buff_create failed.\n"); 467 pr_err("ip_vs_sync_buff_create failed.\n");
250 return; 468 return;
251 } 469 }
@@ -253,10 +471,11 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
253 471
254 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : 472 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
255 SIMPLE_CONN_SIZE; 473 SIMPLE_CONN_SIZE;
256 m = curr_sb->mesg; 474 m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg;
257 s = (struct ip_vs_sync_conn *)curr_sb->head; 475 s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head;
258 476
259 /* copy members */ 477 /* copy members */
478 s->reserved = 0;
260 s->protocol = cp->protocol; 479 s->protocol = cp->protocol;
261 s->cport = cp->cport; 480 s->cport = cp->cport;
262 s->vport = cp->vport; 481 s->vport = cp->vport;
@@ -274,83 +493,366 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
274 493
275 m->nr_conns++; 494 m->nr_conns++;
276 m->size += len; 495 m->size += len;
277 curr_sb->head += len; 496 ipvs->sync_buff->head += len;
278 497
279 /* check if there is a space for next one */ 498 /* check if there is a space for next one */
280 if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) { 499 if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) {
281 sb_queue_tail(curr_sb); 500 sb_queue_tail(ipvs);
282 curr_sb = NULL; 501 ipvs->sync_buff = NULL;
283 } 502 }
284 spin_unlock(&curr_sb_lock); 503 spin_unlock(&ipvs->sync_buff_lock);
285 504
286 /* synchronize its controller if it has */ 505 /* synchronize its controller if it has */
287 if (cp->control) 506 if (cp->control)
288 ip_vs_sync_conn(cp->control); 507 ip_vs_sync_conn(net, cp->control);
508}
509
510/*
511 * Add an ip_vs_conn information into the current sync_buff.
512 * Called by ip_vs_in.
513 * Sending Version 1 messages
514 */
515void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
516{
517 struct netns_ipvs *ipvs = net_ipvs(net);
518 struct ip_vs_sync_mesg *m;
519 union ip_vs_sync_conn *s;
520 __u8 *p;
521 unsigned int len, pe_name_len, pad;
522
523 /* Handle old version of the protocol */
524 if (ipvs->sysctl_sync_ver == 0) {
525 ip_vs_sync_conn_v0(net, cp);
526 return;
527 }
528 /* Do not sync ONE PACKET */
529 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
530 goto control;
531sloop:
532 /* Sanity checks */
533 pe_name_len = 0;
534 if (cp->pe_data_len) {
535 if (!cp->pe_data || !cp->dest) {
536 IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
537 return;
538 }
539 pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
540 }
541
542 spin_lock(&ipvs->sync_buff_lock);
543
544#ifdef CONFIG_IP_VS_IPV6
545 if (cp->af == AF_INET6)
546 len = sizeof(struct ip_vs_sync_v6);
547 else
548#endif
549 len = sizeof(struct ip_vs_sync_v4);
550
551 if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
552 len += sizeof(struct ip_vs_sync_conn_options) + 2;
553
554 if (cp->pe_data_len)
555 len += cp->pe_data_len + 2; /* + Param hdr field */
556 if (pe_name_len)
557 len += pe_name_len + 2;
558
559 /* check if there is a space for this one */
560 pad = 0;
561 if (ipvs->sync_buff) {
562 pad = (4 - (size_t)ipvs->sync_buff->head) & 3;
563 if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) {
564 sb_queue_tail(ipvs);
565 ipvs->sync_buff = NULL;
566 pad = 0;
567 }
568 }
569
570 if (!ipvs->sync_buff) {
571 ipvs->sync_buff = ip_vs_sync_buff_create(ipvs);
572 if (!ipvs->sync_buff) {
573 spin_unlock(&ipvs->sync_buff_lock);
574 pr_err("ip_vs_sync_buff_create failed.\n");
575 return;
576 }
577 }
578
579 m = ipvs->sync_buff->mesg;
580 p = ipvs->sync_buff->head;
581 ipvs->sync_buff->head += pad + len;
582 m->size += pad + len;
583 /* Add ev. padding from prev. sync_conn */
584 while (pad--)
585 *(p++) = 0;
586
587 s = (union ip_vs_sync_conn *)p;
588
589 /* Set message type & copy members */
590 s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
591 s->v4.ver_size = htons(len & SVER_MASK); /* Version 0 */
592 s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
593 s->v4.state = htons(cp->state);
594 s->v4.protocol = cp->protocol;
595 s->v4.cport = cp->cport;
596 s->v4.vport = cp->vport;
597 s->v4.dport = cp->dport;
598 s->v4.fwmark = htonl(cp->fwmark);
599 s->v4.timeout = htonl(cp->timeout / HZ);
600 m->nr_conns++;
601
602#ifdef CONFIG_IP_VS_IPV6
603 if (cp->af == AF_INET6) {
604 p += sizeof(struct ip_vs_sync_v6);
605 ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6);
606 ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6);
607 ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6);
608 } else
609#endif
610 {
611 p += sizeof(struct ip_vs_sync_v4); /* options ptr */
612 s->v4.caddr = cp->caddr.ip;
613 s->v4.vaddr = cp->vaddr.ip;
614 s->v4.daddr = cp->daddr.ip;
615 }
616 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
617 *(p++) = IPVS_OPT_SEQ_DATA;
618 *(p++) = sizeof(struct ip_vs_sync_conn_options);
619 hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
620 p += sizeof(struct ip_vs_seq);
621 hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
622 p += sizeof(struct ip_vs_seq);
623 }
624 /* Handle pe data */
625 if (cp->pe_data_len && cp->pe_data) {
626 *(p++) = IPVS_OPT_PE_DATA;
627 *(p++) = cp->pe_data_len;
628 memcpy(p, cp->pe_data, cp->pe_data_len);
629 p += cp->pe_data_len;
630 if (pe_name_len) {
631 /* Add PE_NAME */
632 *(p++) = IPVS_OPT_PE_NAME;
633 *(p++) = pe_name_len;
634 memcpy(p, cp->pe->name, pe_name_len);
635 p += pe_name_len;
636 }
637 }
638
639 spin_unlock(&ipvs->sync_buff_lock);
640
641control:
642 /* synchronize its controller if it has */
643 cp = cp->control;
644 if (!cp)
645 return;
646 /*
647 * Reduce sync rate for templates
648 * i.e only increment in_pkts for Templates.
649 */
650 if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
651 int pkts = atomic_add_return(1, &cp->in_pkts);
652
653 if (pkts % ipvs->sysctl_sync_threshold[1] != 1)
654 return;
655 }
656 goto sloop;
289} 657}
290 658
659/*
660 * fill_param used by version 1
661 */
291static inline int 662static inline int
292ip_vs_conn_fill_param_sync(int af, int protocol, 663ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
293 const union nf_inet_addr *caddr, __be16 cport, 664 struct ip_vs_conn_param *p,
294 const union nf_inet_addr *vaddr, __be16 vport, 665 __u8 *pe_data, unsigned int pe_data_len,
295 struct ip_vs_conn_param *p) 666 __u8 *pe_name, unsigned int pe_name_len)
296{ 667{
297 /* XXX: Need to take into account persistence engine */ 668#ifdef CONFIG_IP_VS_IPV6
298 ip_vs_conn_fill_param(af, protocol, caddr, cport, vaddr, vport, p); 669 if (af == AF_INET6)
670 ip_vs_conn_fill_param(net, af, sc->v6.protocol,
671 (const union nf_inet_addr *)&sc->v6.caddr,
672 sc->v6.cport,
673 (const union nf_inet_addr *)&sc->v6.vaddr,
674 sc->v6.vport, p);
675 else
676#endif
677 ip_vs_conn_fill_param(net, af, sc->v4.protocol,
678 (const union nf_inet_addr *)&sc->v4.caddr,
679 sc->v4.cport,
680 (const union nf_inet_addr *)&sc->v4.vaddr,
681 sc->v4.vport, p);
682 /* Handle pe data */
683 if (pe_data_len) {
684 if (pe_name_len) {
685 char buff[IP_VS_PENAME_MAXLEN+1];
686
687 memcpy(buff, pe_name, pe_name_len);
688 buff[pe_name_len]=0;
689 p->pe = __ip_vs_pe_getbyname(buff);
690 if (!p->pe) {
691 IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",
692 buff);
693 return 1;
694 }
695 } else {
696 IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
697 return 1;
698 }
699
700 p->pe_data = kmalloc(pe_data_len, GFP_ATOMIC);
701 if (!p->pe_data) {
702 if (p->pe->module)
703 module_put(p->pe->module);
704 return -ENOMEM;
705 }
706 memcpy(p->pe_data, pe_data, pe_data_len);
707 p->pe_data_len = pe_data_len;
708 }
299 return 0; 709 return 0;
300} 710}
301 711
302/* 712/*
303 * Process received multicast message and create the corresponding 713 * Connection Add / Update.
304 * ip_vs_conn entries. 714 * Common for version 0 and 1 reception of backup sync_conns.
715 * Param: ...
716 * timeout is in sec.
305 */ 717 */
306static void ip_vs_process_message(const char *buffer, const size_t buflen) 718static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
719 unsigned int flags, unsigned int state,
720 unsigned int protocol, unsigned int type,
721 const union nf_inet_addr *daddr, __be16 dport,
722 unsigned long timeout, __u32 fwmark,
723 struct ip_vs_sync_conn_options *opt)
307{ 724{
308 struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
309 struct ip_vs_sync_conn *s;
310 struct ip_vs_sync_conn_options *opt;
311 struct ip_vs_conn *cp;
312 struct ip_vs_protocol *pp;
313 struct ip_vs_dest *dest; 725 struct ip_vs_dest *dest;
314 struct ip_vs_conn_param param; 726 struct ip_vs_conn *cp;
315 char *p; 727 struct netns_ipvs *ipvs = net_ipvs(net);
316 int i;
317 728
318 if (buflen < sizeof(struct ip_vs_sync_mesg)) { 729 if (!(flags & IP_VS_CONN_F_TEMPLATE))
319 IP_VS_ERR_RL("sync message header too short\n"); 730 cp = ip_vs_conn_in_get(param);
320 return; 731 else
321 } 732 cp = ip_vs_ct_in_get(param);
322 733
323 /* Convert size back to host byte order */ 734 if (cp && param->pe_data) /* Free pe_data */
324 m->size = ntohs(m->size); 735 kfree(param->pe_data);
736 if (!cp) {
737 /*
738 * Find the appropriate destination for the connection.
739 * If it is not found the connection will remain unbound
740 * but still handled.
741 */
742 dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
743 param->vport, protocol, fwmark);
325 744
326 if (buflen != m->size) { 745 /* Set the approprite ativity flag */
327 IP_VS_ERR_RL("bogus sync message size\n"); 746 if (protocol == IPPROTO_TCP) {
328 return; 747 if (state != IP_VS_TCP_S_ESTABLISHED)
748 flags |= IP_VS_CONN_F_INACTIVE;
749 else
750 flags &= ~IP_VS_CONN_F_INACTIVE;
751 } else if (protocol == IPPROTO_SCTP) {
752 if (state != IP_VS_SCTP_S_ESTABLISHED)
753 flags |= IP_VS_CONN_F_INACTIVE;
754 else
755 flags &= ~IP_VS_CONN_F_INACTIVE;
756 }
757 cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
758 if (dest)
759 atomic_dec(&dest->refcnt);
760 if (!cp) {
761 if (param->pe_data)
762 kfree(param->pe_data);
763 IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
764 return;
765 }
766 } else if (!cp->dest) {
767 dest = ip_vs_try_bind_dest(cp);
768 if (dest)
769 atomic_dec(&dest->refcnt);
770 } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
771 (cp->state != state)) {
772 /* update active/inactive flag for the connection */
773 dest = cp->dest;
774 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
775 (state != IP_VS_TCP_S_ESTABLISHED)) {
776 atomic_dec(&dest->activeconns);
777 atomic_inc(&dest->inactconns);
778 cp->flags |= IP_VS_CONN_F_INACTIVE;
779 } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
780 (state == IP_VS_TCP_S_ESTABLISHED)) {
781 atomic_inc(&dest->activeconns);
782 atomic_dec(&dest->inactconns);
783 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
784 }
785 } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
786 (cp->state != state)) {
787 dest = cp->dest;
788 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
789 (state != IP_VS_SCTP_S_ESTABLISHED)) {
790 atomic_dec(&dest->activeconns);
791 atomic_inc(&dest->inactconns);
792 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
793 }
329 } 794 }
330 795
331 /* SyncID sanity check */ 796 if (opt)
332 if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) { 797 memcpy(&cp->in_seq, opt, sizeof(*opt));
333 IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n", 798 atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]);
334 m->syncid); 799 cp->state = state;
335 return; 800 cp->old_state = cp->state;
801 /*
802 * For Ver 0 messages style
803 * - Not possible to recover the right timeout for templates
804 * - can not find the right fwmark
805 * virtual service. If needed, we can do it for
806 * non-fwmark persistent services.
807 * Ver 1 messages style.
808 * - No problem.
809 */
810 if (timeout) {
811 if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
812 timeout = MAX_SCHEDULE_TIMEOUT / HZ;
813 cp->timeout = timeout*HZ;
814 } else {
815 struct ip_vs_proto_data *pd;
816
817 pd = ip_vs_proto_data_get(net, protocol);
818 if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
819 cp->timeout = pd->timeout_table[state];
820 else
821 cp->timeout = (3*60*HZ);
336 } 822 }
823 ip_vs_conn_put(cp);
824}
337 825
338 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); 826/*
827 * Process received multicast message for Version 0
828 */
829static void ip_vs_process_message_v0(struct net *net, const char *buffer,
830 const size_t buflen)
831{
832 struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
833 struct ip_vs_sync_conn_v0 *s;
834 struct ip_vs_sync_conn_options *opt;
835 struct ip_vs_protocol *pp;
836 struct ip_vs_conn_param param;
837 char *p;
838 int i;
839
840 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
339 for (i=0; i<m->nr_conns; i++) { 841 for (i=0; i<m->nr_conns; i++) {
340 unsigned flags, state; 842 unsigned flags, state;
341 843
342 if (p + SIMPLE_CONN_SIZE > buffer+buflen) { 844 if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
343 IP_VS_ERR_RL("bogus conn in sync message\n"); 845 IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
344 return; 846 return;
345 } 847 }
346 s = (struct ip_vs_sync_conn *) p; 848 s = (struct ip_vs_sync_conn_v0 *) p;
347 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC; 849 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
348 flags &= ~IP_VS_CONN_F_HASHED; 850 flags &= ~IP_VS_CONN_F_HASHED;
349 if (flags & IP_VS_CONN_F_SEQ_MASK) { 851 if (flags & IP_VS_CONN_F_SEQ_MASK) {
350 opt = (struct ip_vs_sync_conn_options *)&s[1]; 852 opt = (struct ip_vs_sync_conn_options *)&s[1];
351 p += FULL_CONN_SIZE; 853 p += FULL_CONN_SIZE;
352 if (p > buffer+buflen) { 854 if (p > buffer+buflen) {
353 IP_VS_ERR_RL("bogus conn options in sync message\n"); 855 IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
354 return; 856 return;
355 } 857 }
356 } else { 858 } else {
@@ -362,118 +864,286 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
362 if (!(flags & IP_VS_CONN_F_TEMPLATE)) { 864 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
363 pp = ip_vs_proto_get(s->protocol); 865 pp = ip_vs_proto_get(s->protocol);
364 if (!pp) { 866 if (!pp) {
365 IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n", 867 IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
366 s->protocol); 868 s->protocol);
367 continue; 869 continue;
368 } 870 }
369 if (state >= pp->num_states) { 871 if (state >= pp->num_states) {
370 IP_VS_DBG(2, "Invalid %s state %u in sync msg\n", 872 IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
371 pp->name, state); 873 pp->name, state);
372 continue; 874 continue;
373 } 875 }
374 } else { 876 } else {
375 /* protocol in templates is not used for state/timeout */ 877 /* protocol in templates is not used for state/timeout */
376 pp = NULL;
377 if (state > 0) { 878 if (state > 0) {
378 IP_VS_DBG(2, "Invalid template state %u in sync msg\n", 879 IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
379 state); 880 state);
380 state = 0; 881 state = 0;
381 } 882 }
382 } 883 }
383 884
384 { 885 ip_vs_conn_fill_param(net, AF_INET, s->protocol,
385 if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol, 886 (const union nf_inet_addr *)&s->caddr,
386 (union nf_inet_addr *)&s->caddr, 887 s->cport,
387 s->cport, 888 (const union nf_inet_addr *)&s->vaddr,
388 (union nf_inet_addr *)&s->vaddr, 889 s->vport, &param);
389 s->vport, &param)) { 890
390 pr_err("ip_vs_conn_fill_param_sync failed"); 891 /* Send timeout as Zero */
391 return; 892 ip_vs_proc_conn(net, &param, flags, state, s->protocol, AF_INET,
893 (union nf_inet_addr *)&s->daddr, s->dport,
894 0, 0, opt);
895 }
896}
897
898/*
899 * Handle options
900 */
901static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
902 __u32 *opt_flags,
903 struct ip_vs_sync_conn_options *opt)
904{
905 struct ip_vs_sync_conn_options *topt;
906
907 topt = (struct ip_vs_sync_conn_options *)p;
908
909 if (plen != sizeof(struct ip_vs_sync_conn_options)) {
910 IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
911 return -EINVAL;
912 }
913 if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
914 IP_VS_DBG(2, "BACKUP, conn options found twice\n");
915 return -EINVAL;
916 }
917 ntoh_seq(&topt->in_seq, &opt->in_seq);
918 ntoh_seq(&topt->out_seq, &opt->out_seq);
919 *opt_flags |= IPVS_OPT_F_SEQ_DATA;
920 return 0;
921}
922
923static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
924 __u8 **data, unsigned int maxlen,
925 __u32 *opt_flags, __u32 flag)
926{
927 if (plen > maxlen) {
928 IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
929 return -EINVAL;
930 }
931 if (*opt_flags & flag) {
932 IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
933 return -EINVAL;
934 }
935 *data_len = plen;
936 *data = p;
937 *opt_flags |= flag;
938 return 0;
939}
940/*
941 * Process a Version 1 sync. connection
942 */
943static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
944{
945 struct ip_vs_sync_conn_options opt;
946 union ip_vs_sync_conn *s;
947 struct ip_vs_protocol *pp;
948 struct ip_vs_conn_param param;
949 __u32 flags;
950 unsigned int af, state, pe_data_len=0, pe_name_len=0;
951 __u8 *pe_data=NULL, *pe_name=NULL;
952 __u32 opt_flags=0;
953 int retc=0;
954
955 s = (union ip_vs_sync_conn *) p;
956
957 if (s->v6.type & STYPE_F_INET6) {
958#ifdef CONFIG_IP_VS_IPV6
959 af = AF_INET6;
960 p += sizeof(struct ip_vs_sync_v6);
961#else
962 IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
963 retc = 10;
964 goto out;
965#endif
966 } else if (!s->v4.type) {
967 af = AF_INET;
968 p += sizeof(struct ip_vs_sync_v4);
969 } else {
970 return -10;
971 }
972 if (p > msg_end)
973 return -20;
974
975 /* Process optional params check Type & Len. */
976 while (p < msg_end) {
977 int ptype;
978 int plen;
979
980 if (p+2 > msg_end)
981 return -30;
982 ptype = *(p++);
983 plen = *(p++);
984
985 if (!plen || ((p + plen) > msg_end))
986 return -40;
987 /* Handle seq option p = param data */
988 switch (ptype & ~IPVS_OPT_F_PARAM) {
989 case IPVS_OPT_SEQ_DATA:
990 if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
991 return -50;
992 break;
993
994 case IPVS_OPT_PE_DATA:
995 if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
996 IP_VS_PEDATA_MAXLEN, &opt_flags,
997 IPVS_OPT_F_PE_DATA))
998 return -60;
999 break;
1000
1001 case IPVS_OPT_PE_NAME:
1002 if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
1003 IP_VS_PENAME_MAXLEN, &opt_flags,
1004 IPVS_OPT_F_PE_NAME))
1005 return -70;
1006 break;
1007
1008 default:
1009 /* Param data mandatory ? */
1010 if (!(ptype & IPVS_OPT_F_PARAM)) {
1011 IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
1012 ptype & ~IPVS_OPT_F_PARAM);
1013 retc = 20;
1014 goto out;
392 } 1015 }
393 if (!(flags & IP_VS_CONN_F_TEMPLATE))
394 cp = ip_vs_conn_in_get(&param);
395 else
396 cp = ip_vs_ct_in_get(&param);
397 } 1016 }
398 if (!cp) { 1017 p += plen; /* Next option */
399 /* 1018 }
400 * Find the appropriate destination for the connection. 1019
401 * If it is not found the connection will remain unbound 1020 /* Get flags and Mask off unsupported */
402 * but still handled. 1021 flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
403 */ 1022 flags |= IP_VS_CONN_F_SYNC;
404 dest = ip_vs_find_dest(AF_INET, 1023 state = ntohs(s->v4.state);
405 (union nf_inet_addr *)&s->daddr, 1024
406 s->dport, 1025 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
407 (union nf_inet_addr *)&s->vaddr, 1026 pp = ip_vs_proto_get(s->v4.protocol);
408 s->vport, 1027 if (!pp) {
409 s->protocol); 1028 IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
410 /* Set the approprite ativity flag */ 1029 s->v4.protocol);
411 if (s->protocol == IPPROTO_TCP) { 1030 retc = 30;
412 if (state != IP_VS_TCP_S_ESTABLISHED) 1031 goto out;
413 flags |= IP_VS_CONN_F_INACTIVE; 1032 }
414 else 1033 if (state >= pp->num_states) {
415 flags &= ~IP_VS_CONN_F_INACTIVE; 1034 IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
416 } else if (s->protocol == IPPROTO_SCTP) { 1035 pp->name, state);
417 if (state != IP_VS_SCTP_S_ESTABLISHED) 1036 retc = 40;
418 flags |= IP_VS_CONN_F_INACTIVE; 1037 goto out;
419 else 1038 }
420 flags &= ~IP_VS_CONN_F_INACTIVE; 1039 } else {
1040 /* protocol in templates is not used for state/timeout */
1041 if (state > 0) {
1042 IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
1043 state);
1044 state = 0;
1045 }
1046 }
1047 if (ip_vs_conn_fill_param_sync(net, af, s, &param, pe_data,
1048 pe_data_len, pe_name, pe_name_len)) {
1049 retc = 50;
1050 goto out;
1051 }
1052 /* If only IPv4, just silent skip IPv6 */
1053 if (af == AF_INET)
1054 ip_vs_proc_conn(net, &param, flags, state, s->v4.protocol, af,
1055 (union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
1056 ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
1057 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1058 );
1059#ifdef CONFIG_IP_VS_IPV6
1060 else
1061 ip_vs_proc_conn(net, &param, flags, state, s->v6.protocol, af,
1062 (union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
1063 ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
1064 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1065 );
1066#endif
1067 return 0;
1068 /* Error exit */
1069out:
1070 IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
1071 return retc;
1072
1073}
1074/*
1075 * Process received multicast message and create the corresponding
1076 * ip_vs_conn entries.
1077 * Handles Version 0 & 1
1078 */
1079static void ip_vs_process_message(struct net *net, __u8 *buffer,
1080 const size_t buflen)
1081{
1082 struct netns_ipvs *ipvs = net_ipvs(net);
1083 struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
1084 __u8 *p, *msg_end;
1085 int i, nr_conns;
1086
1087 if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
1088 IP_VS_DBG(2, "BACKUP, message header too short\n");
1089 return;
1090 }
1091 /* Convert size back to host byte order */
1092 m2->size = ntohs(m2->size);
1093
1094 if (buflen != m2->size) {
1095 IP_VS_DBG(2, "BACKUP, bogus message size\n");
1096 return;
1097 }
1098 /* SyncID sanity check */
1099 if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) {
1100 IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
1101 return;
1102 }
1103 /* Handle version 1 message */
1104 if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
1105 && (m2->spare == 0)) {
1106
1107 msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
1108 nr_conns = m2->nr_conns;
1109
1110 for (i=0; i<nr_conns; i++) {
1111 union ip_vs_sync_conn *s;
1112 unsigned size;
1113 int retc;
1114
1115 p = msg_end;
1116 if (p + sizeof(s->v4) > buffer+buflen) {
1117 IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");
1118 return;
421 } 1119 }
422 cp = ip_vs_conn_new(&param, 1120 s = (union ip_vs_sync_conn *)p;
423 (union nf_inet_addr *)&s->daddr, 1121 size = ntohs(s->v4.ver_size) & SVER_MASK;
424 s->dport, flags, dest); 1122 msg_end = p + size;
425 if (dest) 1123 /* Basic sanity checks */
426 atomic_dec(&dest->refcnt); 1124 if (msg_end > buffer+buflen) {
427 if (!cp) { 1125 IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
428 pr_err("ip_vs_conn_new failed\n");
429 return; 1126 return;
430 } 1127 }
431 } else if (!cp->dest) { 1128 if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
432 dest = ip_vs_try_bind_dest(cp); 1129 IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
433 if (dest) 1130 ntohs(s->v4.ver_size) >> SVER_SHIFT);
434 atomic_dec(&dest->refcnt); 1131 return;
435 } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
436 (cp->state != state)) {
437 /* update active/inactive flag for the connection */
438 dest = cp->dest;
439 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
440 (state != IP_VS_TCP_S_ESTABLISHED)) {
441 atomic_dec(&dest->activeconns);
442 atomic_inc(&dest->inactconns);
443 cp->flags |= IP_VS_CONN_F_INACTIVE;
444 } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
445 (state == IP_VS_TCP_S_ESTABLISHED)) {
446 atomic_inc(&dest->activeconns);
447 atomic_dec(&dest->inactconns);
448 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
449 } 1132 }
450 } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) && 1133 /* Process a single sync_conn */
451 (cp->state != state)) { 1134 retc = ip_vs_proc_sync_conn(net, p, msg_end);
452 dest = cp->dest; 1135 if (retc < 0) {
453 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && 1136 IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
454 (state != IP_VS_SCTP_S_ESTABLISHED)) { 1137 retc);
455 atomic_dec(&dest->activeconns); 1138 return;
456 atomic_inc(&dest->inactconns);
457 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
458 } 1139 }
1140 /* Make sure we have 32 bit alignment */
1141 msg_end = p + ((size + 3) & ~3);
459 } 1142 }
460 1143 } else {
461 if (opt) 1144 /* Old type of message */
462 memcpy(&cp->in_seq, opt, sizeof(*opt)); 1145 ip_vs_process_message_v0(net, buffer, buflen);
463 atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]); 1146 return;
464 cp->state = state;
465 cp->old_state = cp->state;
466 /*
467 * We can not recover the right timeout for templates
468 * in all cases, we can not find the right fwmark
469 * virtual service. If needed, we can do it for
470 * non-fwmark persistent services.
471 */
472 if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
473 cp->timeout = pp->timeout_table[state];
474 else
475 cp->timeout = (3*60*HZ);
476 ip_vs_conn_put(cp);
477 } 1147 }
478} 1148}
479 1149
@@ -511,8 +1181,10 @@ static int set_mcast_if(struct sock *sk, char *ifname)
511{ 1181{
512 struct net_device *dev; 1182 struct net_device *dev;
513 struct inet_sock *inet = inet_sk(sk); 1183 struct inet_sock *inet = inet_sk(sk);
1184 struct net *net = sock_net(sk);
514 1185
515 if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) 1186 dev = __dev_get_by_name(net, ifname);
1187 if (!dev)
516 return -ENODEV; 1188 return -ENODEV;
517 1189
518 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) 1190 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
@@ -531,30 +1203,33 @@ static int set_mcast_if(struct sock *sk, char *ifname)
531 * Set the maximum length of sync message according to the 1203 * Set the maximum length of sync message according to the
532 * specified interface's MTU. 1204 * specified interface's MTU.
533 */ 1205 */
534static int set_sync_mesg_maxlen(int sync_state) 1206static int set_sync_mesg_maxlen(struct net *net, int sync_state)
535{ 1207{
1208 struct netns_ipvs *ipvs = net_ipvs(net);
536 struct net_device *dev; 1209 struct net_device *dev;
537 int num; 1210 int num;
538 1211
539 if (sync_state == IP_VS_STATE_MASTER) { 1212 if (sync_state == IP_VS_STATE_MASTER) {
540 if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL) 1213 dev = __dev_get_by_name(net, ipvs->master_mcast_ifn);
1214 if (!dev)
541 return -ENODEV; 1215 return -ENODEV;
542 1216
543 num = (dev->mtu - sizeof(struct iphdr) - 1217 num = (dev->mtu - sizeof(struct iphdr) -
544 sizeof(struct udphdr) - 1218 sizeof(struct udphdr) -
545 SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE; 1219 SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
546 sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN + 1220 ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
547 SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF); 1221 SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
548 IP_VS_DBG(7, "setting the maximum length of sync sending " 1222 IP_VS_DBG(7, "setting the maximum length of sync sending "
549 "message %d.\n", sync_send_mesg_maxlen); 1223 "message %d.\n", ipvs->send_mesg_maxlen);
550 } else if (sync_state == IP_VS_STATE_BACKUP) { 1224 } else if (sync_state == IP_VS_STATE_BACKUP) {
551 if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL) 1225 dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn);
1226 if (!dev)
552 return -ENODEV; 1227 return -ENODEV;
553 1228
554 sync_recv_mesg_maxlen = dev->mtu - 1229 ipvs->recv_mesg_maxlen = dev->mtu -
555 sizeof(struct iphdr) - sizeof(struct udphdr); 1230 sizeof(struct iphdr) - sizeof(struct udphdr);
556 IP_VS_DBG(7, "setting the maximum length of sync receiving " 1231 IP_VS_DBG(7, "setting the maximum length of sync receiving "
557 "message %d.\n", sync_recv_mesg_maxlen); 1232 "message %d.\n", ipvs->recv_mesg_maxlen);
558 } 1233 }
559 1234
560 return 0; 1235 return 0;
@@ -569,6 +1244,7 @@ static int set_sync_mesg_maxlen(int sync_state)
569static int 1244static int
570join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) 1245join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
571{ 1246{
1247 struct net *net = sock_net(sk);
572 struct ip_mreqn mreq; 1248 struct ip_mreqn mreq;
573 struct net_device *dev; 1249 struct net_device *dev;
574 int ret; 1250 int ret;
@@ -576,7 +1252,8 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
576 memset(&mreq, 0, sizeof(mreq)); 1252 memset(&mreq, 0, sizeof(mreq));
577 memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); 1253 memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
578 1254
579 if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) 1255 dev = __dev_get_by_name(net, ifname);
1256 if (!dev)
580 return -ENODEV; 1257 return -ENODEV;
581 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) 1258 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
582 return -EINVAL; 1259 return -EINVAL;
@@ -593,11 +1270,13 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
593 1270
594static int bind_mcastif_addr(struct socket *sock, char *ifname) 1271static int bind_mcastif_addr(struct socket *sock, char *ifname)
595{ 1272{
1273 struct net *net = sock_net(sock->sk);
596 struct net_device *dev; 1274 struct net_device *dev;
597 __be32 addr; 1275 __be32 addr;
598 struct sockaddr_in sin; 1276 struct sockaddr_in sin;
599 1277
600 if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) 1278 dev = __dev_get_by_name(net, ifname);
1279 if (!dev)
601 return -ENODEV; 1280 return -ENODEV;
602 1281
603 addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); 1282 addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
@@ -619,8 +1298,9 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
619/* 1298/*
620 * Set up sending multicast socket over UDP 1299 * Set up sending multicast socket over UDP
621 */ 1300 */
622static struct socket * make_send_sock(void) 1301static struct socket *make_send_sock(struct net *net)
623{ 1302{
1303 struct netns_ipvs *ipvs = net_ipvs(net);
624 struct socket *sock; 1304 struct socket *sock;
625 int result; 1305 int result;
626 1306
@@ -631,7 +1311,7 @@ static struct socket * make_send_sock(void)
631 return ERR_PTR(result); 1311 return ERR_PTR(result);
632 } 1312 }
633 1313
634 result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn); 1314 result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
635 if (result < 0) { 1315 if (result < 0) {
636 pr_err("Error setting outbound mcast interface\n"); 1316 pr_err("Error setting outbound mcast interface\n");
637 goto error; 1317 goto error;
@@ -640,7 +1320,7 @@ static struct socket * make_send_sock(void)
640 set_mcast_loop(sock->sk, 0); 1320 set_mcast_loop(sock->sk, 0);
641 set_mcast_ttl(sock->sk, 1); 1321 set_mcast_ttl(sock->sk, 1);
642 1322
643 result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn); 1323 result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
644 if (result < 0) { 1324 if (result < 0) {
645 pr_err("Error binding address of the mcast interface\n"); 1325 pr_err("Error binding address of the mcast interface\n");
646 goto error; 1326 goto error;
@@ -664,8 +1344,9 @@ static struct socket * make_send_sock(void)
664/* 1344/*
665 * Set up receiving multicast socket over UDP 1345 * Set up receiving multicast socket over UDP
666 */ 1346 */
667static struct socket * make_receive_sock(void) 1347static struct socket *make_receive_sock(struct net *net)
668{ 1348{
1349 struct netns_ipvs *ipvs = net_ipvs(net);
669 struct socket *sock; 1350 struct socket *sock;
670 int result; 1351 int result;
671 1352
@@ -689,7 +1370,7 @@ static struct socket * make_receive_sock(void)
689 /* join the multicast group */ 1370 /* join the multicast group */
690 result = join_mcast_group(sock->sk, 1371 result = join_mcast_group(sock->sk,
691 (struct in_addr *) &mcast_addr.sin_addr, 1372 (struct in_addr *) &mcast_addr.sin_addr,
692 ip_vs_backup_mcast_ifn); 1373 ipvs->backup_mcast_ifn);
693 if (result < 0) { 1374 if (result < 0) {
694 pr_err("Error joining to the multicast group\n"); 1375 pr_err("Error joining to the multicast group\n");
695 goto error; 1376 goto error;
@@ -760,20 +1441,21 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
760static int sync_thread_master(void *data) 1441static int sync_thread_master(void *data)
761{ 1442{
762 struct ip_vs_sync_thread_data *tinfo = data; 1443 struct ip_vs_sync_thread_data *tinfo = data;
1444 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
763 struct ip_vs_sync_buff *sb; 1445 struct ip_vs_sync_buff *sb;
764 1446
765 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, " 1447 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
766 "syncid = %d\n", 1448 "syncid = %d\n",
767 ip_vs_master_mcast_ifn, ip_vs_master_syncid); 1449 ipvs->master_mcast_ifn, ipvs->master_syncid);
768 1450
769 while (!kthread_should_stop()) { 1451 while (!kthread_should_stop()) {
770 while ((sb = sb_dequeue())) { 1452 while ((sb = sb_dequeue(ipvs))) {
771 ip_vs_send_sync_msg(tinfo->sock, sb->mesg); 1453 ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
772 ip_vs_sync_buff_release(sb); 1454 ip_vs_sync_buff_release(sb);
773 } 1455 }
774 1456
775 /* check if entries stay in curr_sb for 2 seconds */ 1457 /* check if entries stay in ipvs->sync_buff for 2 seconds */
776 sb = get_curr_sync_buff(2 * HZ); 1458 sb = get_curr_sync_buff(ipvs, 2 * HZ);
777 if (sb) { 1459 if (sb) {
778 ip_vs_send_sync_msg(tinfo->sock, sb->mesg); 1460 ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
779 ip_vs_sync_buff_release(sb); 1461 ip_vs_sync_buff_release(sb);
@@ -783,14 +1465,13 @@ static int sync_thread_master(void *data)
783 } 1465 }
784 1466
785 /* clean up the sync_buff queue */ 1467 /* clean up the sync_buff queue */
786 while ((sb=sb_dequeue())) { 1468 while ((sb = sb_dequeue(ipvs)))
787 ip_vs_sync_buff_release(sb); 1469 ip_vs_sync_buff_release(sb);
788 }
789 1470
790 /* clean up the current sync_buff */ 1471 /* clean up the current sync_buff */
791 if ((sb = get_curr_sync_buff(0))) { 1472 sb = get_curr_sync_buff(ipvs, 0);
1473 if (sb)
792 ip_vs_sync_buff_release(sb); 1474 ip_vs_sync_buff_release(sb);
793 }
794 1475
795 /* release the sending multicast socket */ 1476 /* release the sending multicast socket */
796 sock_release(tinfo->sock); 1477 sock_release(tinfo->sock);
@@ -803,11 +1484,12 @@ static int sync_thread_master(void *data)
803static int sync_thread_backup(void *data) 1484static int sync_thread_backup(void *data)
804{ 1485{
805 struct ip_vs_sync_thread_data *tinfo = data; 1486 struct ip_vs_sync_thread_data *tinfo = data;
1487 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
806 int len; 1488 int len;
807 1489
808 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " 1490 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
809 "syncid = %d\n", 1491 "syncid = %d\n",
810 ip_vs_backup_mcast_ifn, ip_vs_backup_syncid); 1492 ipvs->backup_mcast_ifn, ipvs->backup_syncid);
811 1493
812 while (!kthread_should_stop()) { 1494 while (!kthread_should_stop()) {
813 wait_event_interruptible(*sk_sleep(tinfo->sock->sk), 1495 wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
@@ -817,7 +1499,7 @@ static int sync_thread_backup(void *data)
817 /* do we have data now? */ 1499 /* do we have data now? */
818 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { 1500 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
819 len = ip_vs_receive(tinfo->sock, tinfo->buf, 1501 len = ip_vs_receive(tinfo->sock, tinfo->buf,
820 sync_recv_mesg_maxlen); 1502 ipvs->recv_mesg_maxlen);
821 if (len <= 0) { 1503 if (len <= 0) {
822 pr_err("receiving message error\n"); 1504 pr_err("receiving message error\n");
823 break; 1505 break;
@@ -826,7 +1508,7 @@ static int sync_thread_backup(void *data)
826 /* disable bottom half, because it accesses the data 1508 /* disable bottom half, because it accesses the data
827 shared by softirq while getting/creating conns */ 1509 shared by softirq while getting/creating conns */
828 local_bh_disable(); 1510 local_bh_disable();
829 ip_vs_process_message(tinfo->buf, len); 1511 ip_vs_process_message(tinfo->net, tinfo->buf, len);
830 local_bh_enable(); 1512 local_bh_enable();
831 } 1513 }
832 } 1514 }
@@ -840,41 +1522,42 @@ static int sync_thread_backup(void *data)
840} 1522}
841 1523
842 1524
843int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) 1525int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
844{ 1526{
845 struct ip_vs_sync_thread_data *tinfo; 1527 struct ip_vs_sync_thread_data *tinfo;
846 struct task_struct **realtask, *task; 1528 struct task_struct **realtask, *task;
847 struct socket *sock; 1529 struct socket *sock;
1530 struct netns_ipvs *ipvs = net_ipvs(net);
848 char *name, *buf = NULL; 1531 char *name, *buf = NULL;
849 int (*threadfn)(void *data); 1532 int (*threadfn)(void *data);
850 int result = -ENOMEM; 1533 int result = -ENOMEM;
851 1534
852 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); 1535 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
853 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", 1536 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
854 sizeof(struct ip_vs_sync_conn)); 1537 sizeof(struct ip_vs_sync_conn_v0));
855 1538
856 if (state == IP_VS_STATE_MASTER) { 1539 if (state == IP_VS_STATE_MASTER) {
857 if (sync_master_thread) 1540 if (ipvs->master_thread)
858 return -EEXIST; 1541 return -EEXIST;
859 1542
860 strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, 1543 strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
861 sizeof(ip_vs_master_mcast_ifn)); 1544 sizeof(ipvs->master_mcast_ifn));
862 ip_vs_master_syncid = syncid; 1545 ipvs->master_syncid = syncid;
863 realtask = &sync_master_thread; 1546 realtask = &ipvs->master_thread;
864 name = "ipvs_syncmaster"; 1547 name = "ipvs_master:%d";
865 threadfn = sync_thread_master; 1548 threadfn = sync_thread_master;
866 sock = make_send_sock(); 1549 sock = make_send_sock(net);
867 } else if (state == IP_VS_STATE_BACKUP) { 1550 } else if (state == IP_VS_STATE_BACKUP) {
868 if (sync_backup_thread) 1551 if (ipvs->backup_thread)
869 return -EEXIST; 1552 return -EEXIST;
870 1553
871 strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, 1554 strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
872 sizeof(ip_vs_backup_mcast_ifn)); 1555 sizeof(ipvs->backup_mcast_ifn));
873 ip_vs_backup_syncid = syncid; 1556 ipvs->backup_syncid = syncid;
874 realtask = &sync_backup_thread; 1557 realtask = &ipvs->backup_thread;
875 name = "ipvs_syncbackup"; 1558 name = "ipvs_backup:%d";
876 threadfn = sync_thread_backup; 1559 threadfn = sync_thread_backup;
877 sock = make_receive_sock(); 1560 sock = make_receive_sock(net);
878 } else { 1561 } else {
879 return -EINVAL; 1562 return -EINVAL;
880 } 1563 }
@@ -884,9 +1567,9 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
884 goto out; 1567 goto out;
885 } 1568 }
886 1569
887 set_sync_mesg_maxlen(state); 1570 set_sync_mesg_maxlen(net, state);
888 if (state == IP_VS_STATE_BACKUP) { 1571 if (state == IP_VS_STATE_BACKUP) {
889 buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL); 1572 buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL);
890 if (!buf) 1573 if (!buf)
891 goto outsocket; 1574 goto outsocket;
892 } 1575 }
@@ -895,10 +1578,11 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
895 if (!tinfo) 1578 if (!tinfo)
896 goto outbuf; 1579 goto outbuf;
897 1580
1581 tinfo->net = net;
898 tinfo->sock = sock; 1582 tinfo->sock = sock;
899 tinfo->buf = buf; 1583 tinfo->buf = buf;
900 1584
901 task = kthread_run(threadfn, tinfo, name); 1585 task = kthread_run(threadfn, tinfo, name, ipvs->gen);
902 if (IS_ERR(task)) { 1586 if (IS_ERR(task)) {
903 result = PTR_ERR(task); 1587 result = PTR_ERR(task);
904 goto outtinfo; 1588 goto outtinfo;
@@ -906,7 +1590,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
906 1590
907 /* mark as active */ 1591 /* mark as active */
908 *realtask = task; 1592 *realtask = task;
909 ip_vs_sync_state |= state; 1593 ipvs->sync_state |= state;
910 1594
911 /* increase the module use count */ 1595 /* increase the module use count */
912 ip_vs_use_count_inc(); 1596 ip_vs_use_count_inc();
@@ -924,16 +1608,18 @@ out:
924} 1608}
925 1609
926 1610
927int stop_sync_thread(int state) 1611int stop_sync_thread(struct net *net, int state)
928{ 1612{
1613 struct netns_ipvs *ipvs = net_ipvs(net);
1614
929 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); 1615 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
930 1616
931 if (state == IP_VS_STATE_MASTER) { 1617 if (state == IP_VS_STATE_MASTER) {
932 if (!sync_master_thread) 1618 if (!ipvs->master_thread)
933 return -ESRCH; 1619 return -ESRCH;
934 1620
935 pr_info("stopping master sync thread %d ...\n", 1621 pr_info("stopping master sync thread %d ...\n",
936 task_pid_nr(sync_master_thread)); 1622 task_pid_nr(ipvs->master_thread));
937 1623
938 /* 1624 /*
939 * The lock synchronizes with sb_queue_tail(), so that we don't 1625 * The lock synchronizes with sb_queue_tail(), so that we don't
@@ -941,21 +1627,21 @@ int stop_sync_thread(int state)
941 * progress of stopping the master sync daemon. 1627 * progress of stopping the master sync daemon.
942 */ 1628 */
943 1629
944 spin_lock_bh(&ip_vs_sync_lock); 1630 spin_lock_bh(&ipvs->sync_lock);
945 ip_vs_sync_state &= ~IP_VS_STATE_MASTER; 1631 ipvs->sync_state &= ~IP_VS_STATE_MASTER;
946 spin_unlock_bh(&ip_vs_sync_lock); 1632 spin_unlock_bh(&ipvs->sync_lock);
947 kthread_stop(sync_master_thread); 1633 kthread_stop(ipvs->master_thread);
948 sync_master_thread = NULL; 1634 ipvs->master_thread = NULL;
949 } else if (state == IP_VS_STATE_BACKUP) { 1635 } else if (state == IP_VS_STATE_BACKUP) {
950 if (!sync_backup_thread) 1636 if (!ipvs->backup_thread)
951 return -ESRCH; 1637 return -ESRCH;
952 1638
953 pr_info("stopping backup sync thread %d ...\n", 1639 pr_info("stopping backup sync thread %d ...\n",
954 task_pid_nr(sync_backup_thread)); 1640 task_pid_nr(ipvs->backup_thread));
955 1641
956 ip_vs_sync_state &= ~IP_VS_STATE_BACKUP; 1642 ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
957 kthread_stop(sync_backup_thread); 1643 kthread_stop(ipvs->backup_thread);
958 sync_backup_thread = NULL; 1644 ipvs->backup_thread = NULL;
959 } else { 1645 } else {
960 return -EINVAL; 1646 return -EINVAL;
961 } 1647 }
@@ -965,3 +1651,42 @@ int stop_sync_thread(int state)
965 1651
966 return 0; 1652 return 0;
967} 1653}
1654
1655/*
1656 * Initialize data struct for each netns
1657 */
1658static int __net_init __ip_vs_sync_init(struct net *net)
1659{
1660 struct netns_ipvs *ipvs = net_ipvs(net);
1661
1662 INIT_LIST_HEAD(&ipvs->sync_queue);
1663 spin_lock_init(&ipvs->sync_lock);
1664 spin_lock_init(&ipvs->sync_buff_lock);
1665
1666 ipvs->sync_mcast_addr.sin_family = AF_INET;
1667 ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT);
1668 ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP);
1669 return 0;
1670}
1671
1672static void __ip_vs_sync_cleanup(struct net *net)
1673{
1674 stop_sync_thread(net, IP_VS_STATE_MASTER);
1675 stop_sync_thread(net, IP_VS_STATE_BACKUP);
1676}
1677
1678static struct pernet_operations ipvs_sync_ops = {
1679 .init = __ip_vs_sync_init,
1680 .exit = __ip_vs_sync_cleanup,
1681};
1682
1683
1684int __init ip_vs_sync_init(void)
1685{
1686 return register_pernet_subsys(&ipvs_sync_ops);
1687}
1688
1689void __exit ip_vs_sync_cleanup(void)
1690{
1691 unregister_pernet_subsys(&ipvs_sync_ops);
1692}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 5325a3fbe4ac..1f2a4e35fb11 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -175,7 +175,6 @@ __ip_vs_reroute_locally(struct sk_buff *skb)
175 .fl4_tos = RT_TOS(iph->tos), 175 .fl4_tos = RT_TOS(iph->tos),
176 .mark = skb->mark, 176 .mark = skb->mark,
177 }; 177 };
178 struct rtable *rt;
179 178
180 if (ip_route_output_key(net, &rt, &fl)) 179 if (ip_route_output_key(net, &rt, &fl))
181 return 0; 180 return 0;
@@ -390,7 +389,8 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
390 389
391 /* MTU checking */ 390 /* MTU checking */
392 mtu = dst_mtu(&rt->dst); 391 mtu = dst_mtu(&rt->dst);
393 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { 392 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
393 !skb_is_gso(skb)) {
394 ip_rt_put(rt); 394 ip_rt_put(rt);
395 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 395 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
396 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 396 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@@ -443,7 +443,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
443 443
444 /* MTU checking */ 444 /* MTU checking */
445 mtu = dst_mtu(&rt->dst); 445 mtu = dst_mtu(&rt->dst);
446 if (skb->len > mtu) { 446 if (skb->len > mtu && !skb_is_gso(skb)) {
447 if (!skb->dev) { 447 if (!skb->dev) {
448 struct net *net = dev_net(skb_dst(skb)->dev); 448 struct net *net = dev_net(skb_dst(skb)->dev);
449 449
@@ -543,7 +543,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
543 543
544 /* MTU checking */ 544 /* MTU checking */
545 mtu = dst_mtu(&rt->dst); 545 mtu = dst_mtu(&rt->dst);
546 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { 546 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
547 !skb_is_gso(skb)) {
547 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 548 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
548 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0, 549 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
549 "ip_vs_nat_xmit(): frag needed for"); 550 "ip_vs_nat_xmit(): frag needed for");
@@ -658,7 +659,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
658 659
659 /* MTU checking */ 660 /* MTU checking */
660 mtu = dst_mtu(&rt->dst); 661 mtu = dst_mtu(&rt->dst);
661 if (skb->len > mtu) { 662 if (skb->len > mtu && !skb_is_gso(skb)) {
662 if (!skb->dev) { 663 if (!skb->dev) {
663 struct net *net = dev_net(skb_dst(skb)->dev); 664 struct net *net = dev_net(skb_dst(skb)->dev);
664 665
@@ -773,8 +774,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
773 774
774 df |= (old_iph->frag_off & htons(IP_DF)); 775 df |= (old_iph->frag_off & htons(IP_DF));
775 776
776 if ((old_iph->frag_off & htons(IP_DF)) 777 if ((old_iph->frag_off & htons(IP_DF) &&
777 && mtu < ntohs(old_iph->tot_len)) { 778 mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) {
778 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 779 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
779 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 780 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
780 goto tx_error_put; 781 goto tx_error_put;
@@ -886,7 +887,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
886 if (skb_dst(skb)) 887 if (skb_dst(skb))
887 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 888 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
888 889
889 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { 890 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) &&
891 !skb_is_gso(skb)) {
890 if (!skb->dev) { 892 if (!skb->dev) {
891 struct net *net = dev_net(skb_dst(skb)->dev); 893 struct net *net = dev_net(skb_dst(skb)->dev);
892 894
@@ -991,7 +993,8 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
991 993
992 /* MTU checking */ 994 /* MTU checking */
993 mtu = dst_mtu(&rt->dst); 995 mtu = dst_mtu(&rt->dst);
994 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) { 996 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
997 !skb_is_gso(skb)) {
995 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 998 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
996 ip_rt_put(rt); 999 ip_rt_put(rt);
997 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1000 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@@ -1158,7 +1161,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1158 1161
1159 /* MTU checking */ 1162 /* MTU checking */
1160 mtu = dst_mtu(&rt->dst); 1163 mtu = dst_mtu(&rt->dst);
1161 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { 1164 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
1165 !skb_is_gso(skb)) {
1162 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 1166 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
1163 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1167 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1164 goto tx_error_put; 1168 goto tx_error_put;
@@ -1272,7 +1276,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1272 1276
1273 /* MTU checking */ 1277 /* MTU checking */
1274 mtu = dst_mtu(&rt->dst); 1278 mtu = dst_mtu(&rt->dst);
1275 if (skb->len > mtu) { 1279 if (skb->len > mtu && !skb_is_gso(skb)) {
1276 if (!skb->dev) { 1280 if (!skb->dev) {
1277 struct net *net = dev_net(skb_dst(skb)->dev); 1281 struct net *net = dev_net(skb_dst(skb)->dev);
1278 1282
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
new file mode 100644
index 000000000000..4e99cca61612
--- /dev/null
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -0,0 +1,82 @@
1/*
2 * broadcast connection tracking helper
3 *
4 * (c) 2005 Patrick McHardy <kaber@trash.net>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/ip.h>
14#include <net/route.h>
15#include <linux/inetdevice.h>
16#include <linux/skbuff.h>
17
18#include <net/netfilter/nf_conntrack.h>
19#include <net/netfilter/nf_conntrack_helper.h>
20#include <net/netfilter/nf_conntrack_expect.h>
21
22int nf_conntrack_broadcast_help(struct sk_buff *skb,
23 unsigned int protoff,
24 struct nf_conn *ct,
25 enum ip_conntrack_info ctinfo,
26 unsigned int timeout)
27{
28 struct nf_conntrack_expect *exp;
29 struct iphdr *iph = ip_hdr(skb);
30 struct rtable *rt = skb_rtable(skb);
31 struct in_device *in_dev;
32 struct nf_conn_help *help = nfct_help(ct);
33 __be32 mask = 0;
34
35 /* we're only interested in locally generated packets */
36 if (skb->sk == NULL)
37 goto out;
38 if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
39 goto out;
40 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
41 goto out;
42
43 rcu_read_lock();
44 in_dev = __in_dev_get_rcu(rt->dst.dev);
45 if (in_dev != NULL) {
46 for_primary_ifa(in_dev) {
47 if (ifa->ifa_broadcast == iph->daddr) {
48 mask = ifa->ifa_mask;
49 break;
50 }
51 } endfor_ifa(in_dev);
52 }
53 rcu_read_unlock();
54
55 if (mask == 0)
56 goto out;
57
58 exp = nf_ct_expect_alloc(ct);
59 if (exp == NULL)
60 goto out;
61
62 exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
63 exp->tuple.src.u.udp.port = help->helper->tuple.src.u.udp.port;
64
65 exp->mask.src.u3.ip = mask;
66 exp->mask.src.u.udp.port = htons(0xFFFF);
67
68 exp->expectfn = NULL;
69 exp->flags = NF_CT_EXPECT_PERMANENT;
70 exp->class = NF_CT_EXPECT_CLASS_DEFAULT;
71 exp->helper = NULL;
72
73 nf_ct_expect_related(exp);
74 nf_ct_expect_put(exp);
75
76 nf_ct_refresh(ct, skb, timeout * HZ);
77out:
78 return NF_ACCEPT;
79}
80EXPORT_SYMBOL_GPL(nf_conntrack_broadcast_help);
81
82MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index e61511929c66..1909311c392a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -43,6 +43,7 @@
43#include <net/netfilter/nf_conntrack_acct.h> 43#include <net/netfilter/nf_conntrack_acct.h>
44#include <net/netfilter/nf_conntrack_ecache.h> 44#include <net/netfilter/nf_conntrack_ecache.h>
45#include <net/netfilter/nf_conntrack_zones.h> 45#include <net/netfilter/nf_conntrack_zones.h>
46#include <net/netfilter/nf_conntrack_timestamp.h>
46#include <net/netfilter/nf_nat.h> 47#include <net/netfilter/nf_nat.h>
47#include <net/netfilter/nf_nat_core.h> 48#include <net/netfilter/nf_nat_core.h>
48 49
@@ -282,6 +283,11 @@ EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
282static void death_by_timeout(unsigned long ul_conntrack) 283static void death_by_timeout(unsigned long ul_conntrack)
283{ 284{
284 struct nf_conn *ct = (void *)ul_conntrack; 285 struct nf_conn *ct = (void *)ul_conntrack;
286 struct nf_conn_tstamp *tstamp;
287
288 tstamp = nf_conn_tstamp_find(ct);
289 if (tstamp && tstamp->stop == 0)
290 tstamp->stop = ktime_to_ns(ktime_get_real());
285 291
286 if (!test_bit(IPS_DYING_BIT, &ct->status) && 292 if (!test_bit(IPS_DYING_BIT, &ct->status) &&
287 unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) { 293 unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
@@ -419,6 +425,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
419 struct nf_conntrack_tuple_hash *h; 425 struct nf_conntrack_tuple_hash *h;
420 struct nf_conn *ct; 426 struct nf_conn *ct;
421 struct nf_conn_help *help; 427 struct nf_conn_help *help;
428 struct nf_conn_tstamp *tstamp;
422 struct hlist_nulls_node *n; 429 struct hlist_nulls_node *n;
423 enum ip_conntrack_info ctinfo; 430 enum ip_conntrack_info ctinfo;
424 struct net *net; 431 struct net *net;
@@ -486,8 +493,16 @@ __nf_conntrack_confirm(struct sk_buff *skb)
486 ct->timeout.expires += jiffies; 493 ct->timeout.expires += jiffies;
487 add_timer(&ct->timeout); 494 add_timer(&ct->timeout);
488 atomic_inc(&ct->ct_general.use); 495 atomic_inc(&ct->ct_general.use);
489 set_bit(IPS_CONFIRMED_BIT, &ct->status); 496 ct->status |= IPS_CONFIRMED;
497
498 /* set conntrack timestamp, if enabled. */
499 tstamp = nf_conn_tstamp_find(ct);
500 if (tstamp) {
501 if (skb->tstamp.tv64 == 0)
502 __net_timestamp((struct sk_buff *)skb);
490 503
504 tstamp->start = ktime_to_ns(skb->tstamp);
505 }
491 /* Since the lookup is lockless, hash insertion must be done after 506 /* Since the lookup is lockless, hash insertion must be done after
492 * starting the timer and setting the CONFIRMED bit. The RCU barriers 507 * starting the timer and setting the CONFIRMED bit. The RCU barriers
493 * guarantee that no other CPU can find the conntrack before the above 508 * guarantee that no other CPU can find the conntrack before the above
@@ -655,7 +670,8 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
655 * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged. 670 * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged.
656 */ 671 */
657 memset(&ct->tuplehash[IP_CT_DIR_MAX], 0, 672 memset(&ct->tuplehash[IP_CT_DIR_MAX], 0,
658 sizeof(*ct) - offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX])); 673 offsetof(struct nf_conn, proto) -
674 offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));
659 spin_lock_init(&ct->lock); 675 spin_lock_init(&ct->lock);
660 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; 676 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
661 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; 677 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
@@ -745,6 +761,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
745 } 761 }
746 762
747 nf_ct_acct_ext_add(ct, GFP_ATOMIC); 763 nf_ct_acct_ext_add(ct, GFP_ATOMIC);
764 nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
748 765
749 ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; 766 ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
750 nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, 767 nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
@@ -1185,6 +1202,11 @@ struct __nf_ct_flush_report {
1185static int kill_report(struct nf_conn *i, void *data) 1202static int kill_report(struct nf_conn *i, void *data)
1186{ 1203{
1187 struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; 1204 struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
1205 struct nf_conn_tstamp *tstamp;
1206
1207 tstamp = nf_conn_tstamp_find(i);
1208 if (tstamp && tstamp->stop == 0)
1209 tstamp->stop = ktime_to_ns(ktime_get_real());
1188 1210
1189 /* If we fail to deliver the event, death_by_timeout() will retry */ 1211 /* If we fail to deliver the event, death_by_timeout() will retry */
1190 if (nf_conntrack_event_report(IPCT_DESTROY, i, 1212 if (nf_conntrack_event_report(IPCT_DESTROY, i,
@@ -1201,9 +1223,9 @@ static int kill_all(struct nf_conn *i, void *data)
1201 return 1; 1223 return 1;
1202} 1224}
1203 1225
1204void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size) 1226void nf_ct_free_hashtable(void *hash, unsigned int size)
1205{ 1227{
1206 if (vmalloced) 1228 if (is_vmalloc_addr(hash))
1207 vfree(hash); 1229 vfree(hash);
1208 else 1230 else
1209 free_pages((unsigned long)hash, 1231 free_pages((unsigned long)hash,
@@ -1270,8 +1292,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
1270 goto i_see_dead_people; 1292 goto i_see_dead_people;
1271 } 1293 }
1272 1294
1273 nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, 1295 nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
1274 net->ct.htable_size);
1275 nf_conntrack_ecache_fini(net); 1296 nf_conntrack_ecache_fini(net);
1276 nf_conntrack_acct_fini(net); 1297 nf_conntrack_acct_fini(net);
1277 nf_conntrack_expect_fini(net); 1298 nf_conntrack_expect_fini(net);
@@ -1300,21 +1321,18 @@ void nf_conntrack_cleanup(struct net *net)
1300 } 1321 }
1301} 1322}
1302 1323
1303void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls) 1324void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
1304{ 1325{
1305 struct hlist_nulls_head *hash; 1326 struct hlist_nulls_head *hash;
1306 unsigned int nr_slots, i; 1327 unsigned int nr_slots, i;
1307 size_t sz; 1328 size_t sz;
1308 1329
1309 *vmalloced = 0;
1310
1311 BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); 1330 BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
1312 nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); 1331 nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
1313 sz = nr_slots * sizeof(struct hlist_nulls_head); 1332 sz = nr_slots * sizeof(struct hlist_nulls_head);
1314 hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1333 hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1315 get_order(sz)); 1334 get_order(sz));
1316 if (!hash) { 1335 if (!hash) {
1317 *vmalloced = 1;
1318 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); 1336 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
1319 hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, 1337 hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
1320 PAGE_KERNEL); 1338 PAGE_KERNEL);
@@ -1330,7 +1348,7 @@ EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
1330 1348
1331int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) 1349int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1332{ 1350{
1333 int i, bucket, vmalloced, old_vmalloced; 1351 int i, bucket;
1334 unsigned int hashsize, old_size; 1352 unsigned int hashsize, old_size;
1335 struct hlist_nulls_head *hash, *old_hash; 1353 struct hlist_nulls_head *hash, *old_hash;
1336 struct nf_conntrack_tuple_hash *h; 1354 struct nf_conntrack_tuple_hash *h;
@@ -1347,7 +1365,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1347 if (!hashsize) 1365 if (!hashsize)
1348 return -EINVAL; 1366 return -EINVAL;
1349 1367
1350 hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced, 1); 1368 hash = nf_ct_alloc_hashtable(&hashsize, 1);
1351 if (!hash) 1369 if (!hash)
1352 return -ENOMEM; 1370 return -ENOMEM;
1353 1371
@@ -1369,15 +1387,13 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1369 } 1387 }
1370 } 1388 }
1371 old_size = init_net.ct.htable_size; 1389 old_size = init_net.ct.htable_size;
1372 old_vmalloced = init_net.ct.hash_vmalloc;
1373 old_hash = init_net.ct.hash; 1390 old_hash = init_net.ct.hash;
1374 1391
1375 init_net.ct.htable_size = nf_conntrack_htable_size = hashsize; 1392 init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
1376 init_net.ct.hash_vmalloc = vmalloced;
1377 init_net.ct.hash = hash; 1393 init_net.ct.hash = hash;
1378 spin_unlock_bh(&nf_conntrack_lock); 1394 spin_unlock_bh(&nf_conntrack_lock);
1379 1395
1380 nf_ct_free_hashtable(old_hash, old_vmalloced, old_size); 1396 nf_ct_free_hashtable(old_hash, old_size);
1381 return 0; 1397 return 0;
1382} 1398}
1383EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); 1399EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
@@ -1490,8 +1506,7 @@ static int nf_conntrack_init_net(struct net *net)
1490 } 1506 }
1491 1507
1492 net->ct.htable_size = nf_conntrack_htable_size; 1508 net->ct.htable_size = nf_conntrack_htable_size;
1493 net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1509 net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
1494 &net->ct.hash_vmalloc, 1);
1495 if (!net->ct.hash) { 1510 if (!net->ct.hash) {
1496 ret = -ENOMEM; 1511 ret = -ENOMEM;
1497 printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); 1512 printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
@@ -1503,6 +1518,9 @@ static int nf_conntrack_init_net(struct net *net)
1503 ret = nf_conntrack_acct_init(net); 1518 ret = nf_conntrack_acct_init(net);
1504 if (ret < 0) 1519 if (ret < 0)
1505 goto err_acct; 1520 goto err_acct;
1521 ret = nf_conntrack_tstamp_init(net);
1522 if (ret < 0)
1523 goto err_tstamp;
1506 ret = nf_conntrack_ecache_init(net); 1524 ret = nf_conntrack_ecache_init(net);
1507 if (ret < 0) 1525 if (ret < 0)
1508 goto err_ecache; 1526 goto err_ecache;
@@ -1510,12 +1528,13 @@ static int nf_conntrack_init_net(struct net *net)
1510 return 0; 1528 return 0;
1511 1529
1512err_ecache: 1530err_ecache:
1531 nf_conntrack_tstamp_fini(net);
1532err_tstamp:
1513 nf_conntrack_acct_fini(net); 1533 nf_conntrack_acct_fini(net);
1514err_acct: 1534err_acct:
1515 nf_conntrack_expect_fini(net); 1535 nf_conntrack_expect_fini(net);
1516err_expect: 1536err_expect:
1517 nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, 1537 nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
1518 net->ct.htable_size);
1519err_hash: 1538err_hash:
1520 kmem_cache_destroy(net->ct.nf_conntrack_cachep); 1539 kmem_cache_destroy(net->ct.nf_conntrack_cachep);
1521err_cache: 1540err_cache:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index a20fb0bd1efe..cd1e8e0970f2 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -319,7 +319,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
319 const struct nf_conntrack_expect_policy *p; 319 const struct nf_conntrack_expect_policy *p;
320 unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); 320 unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
321 321
322 atomic_inc(&exp->use); 322 /* two references : one for hash insert, one for the timer */
323 atomic_add(2, &exp->use);
323 324
324 if (master_help) { 325 if (master_help) {
325 hlist_add_head(&exp->lnode, &master_help->expectations); 326 hlist_add_head(&exp->lnode, &master_help->expectations);
@@ -333,12 +334,14 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
333 setup_timer(&exp->timeout, nf_ct_expectation_timed_out, 334 setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
334 (unsigned long)exp); 335 (unsigned long)exp);
335 if (master_help) { 336 if (master_help) {
336 p = &master_help->helper->expect_policy[exp->class]; 337 p = &rcu_dereference_protected(
338 master_help->helper,
339 lockdep_is_held(&nf_conntrack_lock)
340 )->expect_policy[exp->class];
337 exp->timeout.expires = jiffies + p->timeout * HZ; 341 exp->timeout.expires = jiffies + p->timeout * HZ;
338 } 342 }
339 add_timer(&exp->timeout); 343 add_timer(&exp->timeout);
340 344
341 atomic_inc(&exp->use);
342 NF_CT_STAT_INC(net, expect_create); 345 NF_CT_STAT_INC(net, expect_create);
343} 346}
344 347
@@ -369,7 +372,10 @@ static inline int refresh_timer(struct nf_conntrack_expect *i)
369 if (!del_timer(&i->timeout)) 372 if (!del_timer(&i->timeout))
370 return 0; 373 return 0;
371 374
372 p = &master_help->helper->expect_policy[i->class]; 375 p = &rcu_dereference_protected(
376 master_help->helper,
377 lockdep_is_held(&nf_conntrack_lock)
378 )->expect_policy[i->class];
373 i->timeout.expires = jiffies + p->timeout * HZ; 379 i->timeout.expires = jiffies + p->timeout * HZ;
374 add_timer(&i->timeout); 380 add_timer(&i->timeout);
375 return 1; 381 return 1;
@@ -407,7 +413,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
407 } 413 }
408 /* Will be over limit? */ 414 /* Will be over limit? */
409 if (master_help) { 415 if (master_help) {
410 p = &master_help->helper->expect_policy[expect->class]; 416 p = &rcu_dereference_protected(
417 master_help->helper,
418 lockdep_is_held(&nf_conntrack_lock)
419 )->expect_policy[expect->class];
411 if (p->max_expected && 420 if (p->max_expected &&
412 master_help->expecting[expect->class] >= p->max_expected) { 421 master_help->expecting[expect->class] >= p->max_expected) {
413 evict_oldest_expect(master, expect); 422 evict_oldest_expect(master, expect);
@@ -478,7 +487,7 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
478 struct hlist_node *n; 487 struct hlist_node *n;
479 488
480 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { 489 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
481 n = rcu_dereference(net->ct.expect_hash[st->bucket].first); 490 n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
482 if (n) 491 if (n)
483 return n; 492 return n;
484 } 493 }
@@ -491,11 +500,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
491 struct net *net = seq_file_net(seq); 500 struct net *net = seq_file_net(seq);
492 struct ct_expect_iter_state *st = seq->private; 501 struct ct_expect_iter_state *st = seq->private;
493 502
494 head = rcu_dereference(head->next); 503 head = rcu_dereference(hlist_next_rcu(head));
495 while (head == NULL) { 504 while (head == NULL) {
496 if (++st->bucket >= nf_ct_expect_hsize) 505 if (++st->bucket >= nf_ct_expect_hsize)
497 return NULL; 506 return NULL;
498 head = rcu_dereference(net->ct.expect_hash[st->bucket].first); 507 head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
499 } 508 }
500 return head; 509 return head;
501} 510}
@@ -630,8 +639,7 @@ int nf_conntrack_expect_init(struct net *net)
630 } 639 }
631 640
632 net->ct.expect_count = 0; 641 net->ct.expect_count = 0;
633 net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 642 net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
634 &net->ct.expect_vmalloc, 0);
635 if (net->ct.expect_hash == NULL) 643 if (net->ct.expect_hash == NULL)
636 goto err1; 644 goto err1;
637 645
@@ -653,8 +661,7 @@ err3:
653 if (net_eq(net, &init_net)) 661 if (net_eq(net, &init_net))
654 kmem_cache_destroy(nf_ct_expect_cachep); 662 kmem_cache_destroy(nf_ct_expect_cachep);
655err2: 663err2:
656 nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, 664 nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
657 nf_ct_expect_hsize);
658err1: 665err1:
659 return err; 666 return err;
660} 667}
@@ -666,6 +673,5 @@ void nf_conntrack_expect_fini(struct net *net)
666 rcu_barrier(); /* Wait for call_rcu() before destroy */ 673 rcu_barrier(); /* Wait for call_rcu() before destroy */
667 kmem_cache_destroy(nf_ct_expect_cachep); 674 kmem_cache_destroy(nf_ct_expect_cachep);
668 } 675 }
669 nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, 676 nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
670 nf_ct_expect_hsize);
671} 677}
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index bd82450c193f..80a23ed62bb0 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -140,15 +140,16 @@ static void update_alloc_size(struct nf_ct_ext_type *type)
140 /* This assumes that extended areas in conntrack for the types 140 /* This assumes that extended areas in conntrack for the types
141 whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */ 141 whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */
142 for (i = min; i <= max; i++) { 142 for (i = min; i <= max; i++) {
143 t1 = nf_ct_ext_types[i]; 143 t1 = rcu_dereference_protected(nf_ct_ext_types[i],
144 lockdep_is_held(&nf_ct_ext_type_mutex));
144 if (!t1) 145 if (!t1)
145 continue; 146 continue;
146 147
147 t1->alloc_size = sizeof(struct nf_ct_ext) 148 t1->alloc_size = ALIGN(sizeof(struct nf_ct_ext), t1->align) +
148 + ALIGN(sizeof(struct nf_ct_ext), t1->align) 149 t1->len;
149 + t1->len;
150 for (j = 0; j < NF_CT_EXT_NUM; j++) { 150 for (j = 0; j < NF_CT_EXT_NUM; j++) {
151 t2 = nf_ct_ext_types[j]; 151 t2 = rcu_dereference_protected(nf_ct_ext_types[j],
152 lockdep_is_held(&nf_ct_ext_type_mutex));
152 if (t2 == NULL || t2 == t1 || 153 if (t2 == NULL || t2 == t1 ||
153 (t2->flags & NF_CT_EXT_F_PREALLOC) == 0) 154 (t2->flags & NF_CT_EXT_F_PREALLOC) == 0)
154 continue; 155 continue;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 59e1a4cd4e8b..1bdfea357955 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -33,7 +33,6 @@ static DEFINE_MUTEX(nf_ct_helper_mutex);
33static struct hlist_head *nf_ct_helper_hash __read_mostly; 33static struct hlist_head *nf_ct_helper_hash __read_mostly;
34static unsigned int nf_ct_helper_hsize __read_mostly; 34static unsigned int nf_ct_helper_hsize __read_mostly;
35static unsigned int nf_ct_helper_count __read_mostly; 35static unsigned int nf_ct_helper_count __read_mostly;
36static int nf_ct_helper_vmalloc;
37 36
38 37
39/* Stupid hash, but collision free for the default registrations of the 38/* Stupid hash, but collision free for the default registrations of the
@@ -158,7 +157,10 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i,
158 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); 157 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
159 struct nf_conn_help *help = nfct_help(ct); 158 struct nf_conn_help *help = nfct_help(ct);
160 159
161 if (help && help->helper == me) { 160 if (help && rcu_dereference_protected(
161 help->helper,
162 lockdep_is_held(&nf_conntrack_lock)
163 ) == me) {
162 nf_conntrack_event(IPCT_HELPER, ct); 164 nf_conntrack_event(IPCT_HELPER, ct);
163 rcu_assign_pointer(help->helper, NULL); 165 rcu_assign_pointer(help->helper, NULL);
164 } 166 }
@@ -210,7 +212,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
210 hlist_for_each_entry_safe(exp, n, next, 212 hlist_for_each_entry_safe(exp, n, next,
211 &net->ct.expect_hash[i], hnode) { 213 &net->ct.expect_hash[i], hnode) {
212 struct nf_conn_help *help = nfct_help(exp->master); 214 struct nf_conn_help *help = nfct_help(exp->master);
213 if ((help->helper == me || exp->helper == me) && 215 if ((rcu_dereference_protected(
216 help->helper,
217 lockdep_is_held(&nf_conntrack_lock)
218 ) == me || exp->helper == me) &&
214 del_timer(&exp->timeout)) { 219 del_timer(&exp->timeout)) {
215 nf_ct_unlink_expect(exp); 220 nf_ct_unlink_expect(exp);
216 nf_ct_expect_put(exp); 221 nf_ct_expect_put(exp);
@@ -261,8 +266,7 @@ int nf_conntrack_helper_init(void)
261 int err; 266 int err;
262 267
263 nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ 268 nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
264 nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 269 nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
265 &nf_ct_helper_vmalloc, 0);
266 if (!nf_ct_helper_hash) 270 if (!nf_ct_helper_hash)
267 return -ENOMEM; 271 return -ENOMEM;
268 272
@@ -273,14 +277,12 @@ int nf_conntrack_helper_init(void)
273 return 0; 277 return 0;
274 278
275err1: 279err1:
276 nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc, 280 nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
277 nf_ct_helper_hsize);
278 return err; 281 return err;
279} 282}
280 283
281void nf_conntrack_helper_fini(void) 284void nf_conntrack_helper_fini(void)
282{ 285{
283 nf_ct_extend_unregister(&helper_extend); 286 nf_ct_extend_unregister(&helper_extend);
284 nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc, 287 nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
285 nf_ct_helper_hsize);
286} 288}
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index aadde018a072..4c8f30a3d6d2 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -18,14 +18,7 @@
18#include <linux/kernel.h> 18#include <linux/kernel.h>
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/init.h> 20#include <linux/init.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/inetdevice.h>
24#include <linux/if_addr.h>
25#include <linux/in.h> 21#include <linux/in.h>
26#include <linux/ip.h>
27#include <linux/netfilter.h>
28#include <net/route.h>
29 22
30#include <net/netfilter/nf_conntrack.h> 23#include <net/netfilter/nf_conntrack.h>
31#include <net/netfilter/nf_conntrack_helper.h> 24#include <net/netfilter/nf_conntrack_helper.h>
@@ -40,75 +33,26 @@ MODULE_ALIAS("ip_conntrack_netbios_ns");
40MODULE_ALIAS_NFCT_HELPER("netbios_ns"); 33MODULE_ALIAS_NFCT_HELPER("netbios_ns");
41 34
42static unsigned int timeout __read_mostly = 3; 35static unsigned int timeout __read_mostly = 3;
43module_param(timeout, uint, 0400); 36module_param(timeout, uint, S_IRUSR);
44MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); 37MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
45 38
46static int help(struct sk_buff *skb, unsigned int protoff,
47 struct nf_conn *ct, enum ip_conntrack_info ctinfo)
48{
49 struct nf_conntrack_expect *exp;
50 struct iphdr *iph = ip_hdr(skb);
51 struct rtable *rt = skb_rtable(skb);
52 struct in_device *in_dev;
53 __be32 mask = 0;
54
55 /* we're only interested in locally generated packets */
56 if (skb->sk == NULL)
57 goto out;
58 if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
59 goto out;
60 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
61 goto out;
62
63 rcu_read_lock();
64 in_dev = __in_dev_get_rcu(rt->dst.dev);
65 if (in_dev != NULL) {
66 for_primary_ifa(in_dev) {
67 if (ifa->ifa_broadcast == iph->daddr) {
68 mask = ifa->ifa_mask;
69 break;
70 }
71 } endfor_ifa(in_dev);
72 }
73 rcu_read_unlock();
74
75 if (mask == 0)
76 goto out;
77
78 exp = nf_ct_expect_alloc(ct);
79 if (exp == NULL)
80 goto out;
81
82 exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
83 exp->tuple.src.u.udp.port = htons(NMBD_PORT);
84
85 exp->mask.src.u3.ip = mask;
86 exp->mask.src.u.udp.port = htons(0xFFFF);
87
88 exp->expectfn = NULL;
89 exp->flags = NF_CT_EXPECT_PERMANENT;
90 exp->class = NF_CT_EXPECT_CLASS_DEFAULT;
91 exp->helper = NULL;
92
93 nf_ct_expect_related(exp);
94 nf_ct_expect_put(exp);
95
96 nf_ct_refresh(ct, skb, timeout * HZ);
97out:
98 return NF_ACCEPT;
99}
100
101static struct nf_conntrack_expect_policy exp_policy = { 39static struct nf_conntrack_expect_policy exp_policy = {
102 .max_expected = 1, 40 .max_expected = 1,
103}; 41};
104 42
43static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff,
44 struct nf_conn *ct, enum ip_conntrack_info ctinfo)
45{
46 return nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout);
47}
48
105static struct nf_conntrack_helper helper __read_mostly = { 49static struct nf_conntrack_helper helper __read_mostly = {
106 .name = "netbios-ns", 50 .name = "netbios-ns",
107 .tuple.src.l3num = AF_INET, 51 .tuple.src.l3num = NFPROTO_IPV4,
108 .tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT), 52 .tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT),
109 .tuple.dst.protonum = IPPROTO_UDP, 53 .tuple.dst.protonum = IPPROTO_UDP,
110 .me = THIS_MODULE, 54 .me = THIS_MODULE,
111 .help = help, 55 .help = netbios_ns_help,
112 .expect_policy = &exp_policy, 56 .expect_policy = &exp_policy,
113}; 57};
114 58
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2b7eef37875c..61c73945bb94 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -42,6 +42,7 @@
42#include <net/netfilter/nf_conntrack_tuple.h> 42#include <net/netfilter/nf_conntrack_tuple.h>
43#include <net/netfilter/nf_conntrack_acct.h> 43#include <net/netfilter/nf_conntrack_acct.h>
44#include <net/netfilter/nf_conntrack_zones.h> 44#include <net/netfilter/nf_conntrack_zones.h>
45#include <net/netfilter/nf_conntrack_timestamp.h>
45#ifdef CONFIG_NF_NAT_NEEDED 46#ifdef CONFIG_NF_NAT_NEEDED
46#include <net/netfilter/nf_nat_core.h> 47#include <net/netfilter/nf_nat_core.h>
47#include <net/netfilter/nf_nat_protocol.h> 48#include <net/netfilter/nf_nat_protocol.h>
@@ -230,6 +231,33 @@ nla_put_failure:
230 return -1; 231 return -1;
231} 232}
232 233
234static int
235ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct)
236{
237 struct nlattr *nest_count;
238 const struct nf_conn_tstamp *tstamp;
239
240 tstamp = nf_conn_tstamp_find(ct);
241 if (!tstamp)
242 return 0;
243
244 nest_count = nla_nest_start(skb, CTA_TIMESTAMP | NLA_F_NESTED);
245 if (!nest_count)
246 goto nla_put_failure;
247
248 NLA_PUT_BE64(skb, CTA_TIMESTAMP_START, cpu_to_be64(tstamp->start));
249 if (tstamp->stop != 0) {
250 NLA_PUT_BE64(skb, CTA_TIMESTAMP_STOP,
251 cpu_to_be64(tstamp->stop));
252 }
253 nla_nest_end(skb, nest_count);
254
255 return 0;
256
257nla_put_failure:
258 return -1;
259}
260
233#ifdef CONFIG_NF_CONNTRACK_MARK 261#ifdef CONFIG_NF_CONNTRACK_MARK
234static inline int 262static inline int
235ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) 263ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
@@ -404,6 +432,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
404 ctnetlink_dump_timeout(skb, ct) < 0 || 432 ctnetlink_dump_timeout(skb, ct) < 0 ||
405 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || 433 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
406 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || 434 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
435 ctnetlink_dump_timestamp(skb, ct) < 0 ||
407 ctnetlink_dump_protoinfo(skb, ct) < 0 || 436 ctnetlink_dump_protoinfo(skb, ct) < 0 ||
408 ctnetlink_dump_helpinfo(skb, ct) < 0 || 437 ctnetlink_dump_helpinfo(skb, ct) < 0 ||
409 ctnetlink_dump_mark(skb, ct) < 0 || 438 ctnetlink_dump_mark(skb, ct) < 0 ||
@@ -471,6 +500,18 @@ ctnetlink_secctx_size(const struct nf_conn *ct)
471} 500}
472 501
473static inline size_t 502static inline size_t
503ctnetlink_timestamp_size(const struct nf_conn *ct)
504{
505#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
506 if (!nf_ct_ext_exist(ct, NF_CT_EXT_TSTAMP))
507 return 0;
508 return nla_total_size(0) + 2 * nla_total_size(sizeof(uint64_t));
509#else
510 return 0;
511#endif
512}
513
514static inline size_t
474ctnetlink_nlmsg_size(const struct nf_conn *ct) 515ctnetlink_nlmsg_size(const struct nf_conn *ct)
475{ 516{
476 return NLMSG_ALIGN(sizeof(struct nfgenmsg)) 517 return NLMSG_ALIGN(sizeof(struct nfgenmsg))
@@ -481,6 +522,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
481 + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */ 522 + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */
482 + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */ 523 + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */
483 + ctnetlink_counters_size(ct) 524 + ctnetlink_counters_size(ct)
525 + ctnetlink_timestamp_size(ct)
484 + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */ 526 + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */
485 + nla_total_size(0) /* CTA_PROTOINFO */ 527 + nla_total_size(0) /* CTA_PROTOINFO */
486 + nla_total_size(0) /* CTA_HELP */ 528 + nla_total_size(0) /* CTA_HELP */
@@ -571,7 +613,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
571 613
572 if (events & (1 << IPCT_DESTROY)) { 614 if (events & (1 << IPCT_DESTROY)) {
573 if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || 615 if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
574 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) 616 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
617 ctnetlink_dump_timestamp(skb, ct) < 0)
575 goto nla_put_failure; 618 goto nla_put_failure;
576 } else { 619 } else {
577 if (ctnetlink_dump_timeout(skb, ct) < 0) 620 if (ctnetlink_dump_timeout(skb, ct) < 0)
@@ -1357,6 +1400,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
1357 } 1400 }
1358 1401
1359 nf_ct_acct_ext_add(ct, GFP_ATOMIC); 1402 nf_ct_acct_ext_add(ct, GFP_ATOMIC);
1403 nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
1360 nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); 1404 nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
1361 /* we must add conntrack extensions before confirmation. */ 1405 /* we must add conntrack extensions before confirmation. */
1362 ct->status |= IPS_CONFIRMED; 1406 ct->status |= IPS_CONFIRMED;
@@ -1375,6 +1419,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
1375 } 1419 }
1376#endif 1420#endif
1377 1421
1422 memset(&ct->proto, 0, sizeof(ct->proto));
1378 if (cda[CTA_PROTOINFO]) { 1423 if (cda[CTA_PROTOINFO]) {
1379 err = ctnetlink_change_protoinfo(ct, cda); 1424 err = ctnetlink_change_protoinfo(ct, cda);
1380 if (err < 0) 1425 if (err < 0)
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index dc7bb74110df..5701c8dd783c 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -166,6 +166,7 @@ static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto
166int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) 166int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
167{ 167{
168 int ret = 0; 168 int ret = 0;
169 struct nf_conntrack_l3proto *old;
169 170
170 if (proto->l3proto >= AF_MAX) 171 if (proto->l3proto >= AF_MAX)
171 return -EBUSY; 172 return -EBUSY;
@@ -174,7 +175,9 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
174 return -EINVAL; 175 return -EINVAL;
175 176
176 mutex_lock(&nf_ct_proto_mutex); 177 mutex_lock(&nf_ct_proto_mutex);
177 if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) { 178 old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
179 lockdep_is_held(&nf_ct_proto_mutex));
180 if (old != &nf_conntrack_l3proto_generic) {
178 ret = -EBUSY; 181 ret = -EBUSY;
179 goto out_unlock; 182 goto out_unlock;
180 } 183 }
@@ -201,7 +204,9 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
201 BUG_ON(proto->l3proto >= AF_MAX); 204 BUG_ON(proto->l3proto >= AF_MAX);
202 205
203 mutex_lock(&nf_ct_proto_mutex); 206 mutex_lock(&nf_ct_proto_mutex);
204 BUG_ON(nf_ct_l3protos[proto->l3proto] != proto); 207 BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
208 lockdep_is_held(&nf_ct_proto_mutex)
209 ) != proto);
205 rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], 210 rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
206 &nf_conntrack_l3proto_generic); 211 &nf_conntrack_l3proto_generic);
207 nf_ct_l3proto_unregister_sysctl(proto); 212 nf_ct_l3proto_unregister_sysctl(proto);
@@ -279,7 +284,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
279 mutex_lock(&nf_ct_proto_mutex); 284 mutex_lock(&nf_ct_proto_mutex);
280 if (!nf_ct_protos[l4proto->l3proto]) { 285 if (!nf_ct_protos[l4proto->l3proto]) {
281 /* l3proto may be loaded latter. */ 286 /* l3proto may be loaded latter. */
282 struct nf_conntrack_l4proto **proto_array; 287 struct nf_conntrack_l4proto __rcu **proto_array;
283 int i; 288 int i;
284 289
285 proto_array = kmalloc(MAX_NF_CT_PROTO * 290 proto_array = kmalloc(MAX_NF_CT_PROTO *
@@ -291,7 +296,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
291 } 296 }
292 297
293 for (i = 0; i < MAX_NF_CT_PROTO; i++) 298 for (i = 0; i < MAX_NF_CT_PROTO; i++)
294 proto_array[i] = &nf_conntrack_l4proto_generic; 299 RCU_INIT_POINTER(proto_array[i], &nf_conntrack_l4proto_generic);
295 300
296 /* Before making proto_array visible to lockless readers, 301 /* Before making proto_array visible to lockless readers,
297 * we must make sure its content is committed to memory. 302 * we must make sure its content is committed to memory.
@@ -299,8 +304,10 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
299 smp_wmb(); 304 smp_wmb();
300 305
301 nf_ct_protos[l4proto->l3proto] = proto_array; 306 nf_ct_protos[l4proto->l3proto] = proto_array;
302 } else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != 307 } else if (rcu_dereference_protected(
303 &nf_conntrack_l4proto_generic) { 308 nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
309 lockdep_is_held(&nf_ct_proto_mutex)
310 ) != &nf_conntrack_l4proto_generic) {
304 ret = -EBUSY; 311 ret = -EBUSY;
305 goto out_unlock; 312 goto out_unlock;
306 } 313 }
@@ -331,7 +338,10 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
331 BUG_ON(l4proto->l3proto >= PF_MAX); 338 BUG_ON(l4proto->l3proto >= PF_MAX);
332 339
333 mutex_lock(&nf_ct_proto_mutex); 340 mutex_lock(&nf_ct_proto_mutex);
334 BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto); 341 BUG_ON(rcu_dereference_protected(
342 nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
343 lockdep_is_held(&nf_ct_proto_mutex)
344 ) != l4proto);
335 rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], 345 rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
336 &nf_conntrack_l4proto_generic); 346 &nf_conntrack_l4proto_generic);
337 nf_ct_l4proto_unregister_sysctl(l4proto); 347 nf_ct_l4proto_unregister_sysctl(l4proto);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 5292560d6d4a..9ae57c57c50e 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -452,6 +452,9 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
452 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; 452 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
453 ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; 453 ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER;
454 ct->proto.dccp.state = CT_DCCP_NONE; 454 ct->proto.dccp.state = CT_DCCP_NONE;
455 ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST;
456 ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL;
457 ct->proto.dccp.handshake_seq = 0;
455 return true; 458 return true;
456 459
457out_invalid: 460out_invalid:
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index c6049c2d5ea8..6f4ee70f460b 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -413,6 +413,7 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
413 test_bit(SCTP_CID_COOKIE_ACK, map)) 413 test_bit(SCTP_CID_COOKIE_ACK, map))
414 return false; 414 return false;
415 415
416 memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp));
416 new_state = SCTP_CONNTRACK_MAX; 417 new_state = SCTP_CONNTRACK_MAX;
417 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { 418 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
418 /* Don't need lock here: this conntrack not in circulation yet */ 419 /* Don't need lock here: this conntrack not in circulation yet */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 3fb2b73b24dc..6f38d0e2ea4a 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1066,9 +1066,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1066 BUG_ON(th == NULL); 1066 BUG_ON(th == NULL);
1067 1067
1068 /* Don't need lock here: this conntrack not in circulation yet */ 1068 /* Don't need lock here: this conntrack not in circulation yet */
1069 new_state 1069 new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
1070 = tcp_conntracks[0][get_conntrack_index(th)]
1071 [TCP_CONNTRACK_NONE];
1072 1070
1073 /* Invalid: delete conntrack */ 1071 /* Invalid: delete conntrack */
1074 if (new_state >= TCP_CONNTRACK_MAX) { 1072 if (new_state >= TCP_CONNTRACK_MAX) {
@@ -1077,6 +1075,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1077 } 1075 }
1078 1076
1079 if (new_state == TCP_CONNTRACK_SYN_SENT) { 1077 if (new_state == TCP_CONNTRACK_SYN_SENT) {
1078 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1080 /* SYN packet */ 1079 /* SYN packet */
1081 ct->proto.tcp.seen[0].td_end = 1080 ct->proto.tcp.seen[0].td_end =
1082 segment_seq_plus_len(ntohl(th->seq), skb->len, 1081 segment_seq_plus_len(ntohl(th->seq), skb->len,
@@ -1088,11 +1087,11 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1088 ct->proto.tcp.seen[0].td_end; 1087 ct->proto.tcp.seen[0].td_end;
1089 1088
1090 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]); 1089 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1091 ct->proto.tcp.seen[1].flags = 0;
1092 } else if (nf_ct_tcp_loose == 0) { 1090 } else if (nf_ct_tcp_loose == 0) {
1093 /* Don't try to pick up connections. */ 1091 /* Don't try to pick up connections. */
1094 return false; 1092 return false;
1095 } else { 1093 } else {
1094 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1096 /* 1095 /*
1097 * We are in the middle of a connection, 1096 * We are in the middle of a connection,
1098 * its history is lost for us. 1097 * its history is lost for us.
@@ -1107,7 +1106,6 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1107 ct->proto.tcp.seen[0].td_maxend = 1106 ct->proto.tcp.seen[0].td_maxend =
1108 ct->proto.tcp.seen[0].td_end + 1107 ct->proto.tcp.seen[0].td_end +
1109 ct->proto.tcp.seen[0].td_maxwin; 1108 ct->proto.tcp.seen[0].td_maxwin;
1110 ct->proto.tcp.seen[0].td_scale = 0;
1111 1109
1112 /* We assume SACK and liberal window checking to handle 1110 /* We assume SACK and liberal window checking to handle
1113 * window scaling */ 1111 * window scaling */
@@ -1116,13 +1114,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1116 IP_CT_TCP_FLAG_BE_LIBERAL; 1114 IP_CT_TCP_FLAG_BE_LIBERAL;
1117 } 1115 }
1118 1116
1119 ct->proto.tcp.seen[1].td_end = 0;
1120 ct->proto.tcp.seen[1].td_maxend = 0;
1121 ct->proto.tcp.seen[1].td_maxwin = 0;
1122 ct->proto.tcp.seen[1].td_scale = 0;
1123
1124 /* tcp_packet will set them */ 1117 /* tcp_packet will set them */
1125 ct->proto.tcp.state = TCP_CONNTRACK_NONE;
1126 ct->proto.tcp.last_index = TCP_NONE_SET; 1118 ct->proto.tcp.last_index = TCP_NONE_SET;
1127 1119
1128 pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " 1120 pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c
new file mode 100644
index 000000000000..6e545e26289e
--- /dev/null
+++ b/net/netfilter/nf_conntrack_snmp.c
@@ -0,0 +1,77 @@
1/*
2 * SNMP service broadcast connection tracking helper
3 *
4 * (c) 2011 Jiri Olsa <jolsa@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/in.h>
15
16#include <net/netfilter/nf_conntrack.h>
17#include <net/netfilter/nf_conntrack_helper.h>
18#include <net/netfilter/nf_conntrack_expect.h>
19
20#define SNMP_PORT 161
21
22MODULE_AUTHOR("Jiri Olsa <jolsa@redhat.com>");
23MODULE_DESCRIPTION("SNMP service broadcast connection tracking helper");
24MODULE_LICENSE("GPL");
25MODULE_ALIAS_NFCT_HELPER("snmp");
26
27static unsigned int timeout __read_mostly = 30;
28module_param(timeout, uint, S_IRUSR);
29MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
30
31int (*nf_nat_snmp_hook)(struct sk_buff *skb,
32 unsigned int protoff,
33 struct nf_conn *ct,
34 enum ip_conntrack_info ctinfo);
35EXPORT_SYMBOL_GPL(nf_nat_snmp_hook);
36
37static int snmp_conntrack_help(struct sk_buff *skb, unsigned int protoff,
38 struct nf_conn *ct, enum ip_conntrack_info ctinfo)
39{
40 typeof(nf_nat_snmp_hook) nf_nat_snmp;
41
42 nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout);
43
44 nf_nat_snmp = rcu_dereference(nf_nat_snmp_hook);
45 if (nf_nat_snmp && ct->status & IPS_NAT_MASK)
46 return nf_nat_snmp(skb, protoff, ct, ctinfo);
47
48 return NF_ACCEPT;
49}
50
51static struct nf_conntrack_expect_policy exp_policy = {
52 .max_expected = 1,
53};
54
55static struct nf_conntrack_helper helper __read_mostly = {
56 .name = "snmp",
57 .tuple.src.l3num = NFPROTO_IPV4,
58 .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT),
59 .tuple.dst.protonum = IPPROTO_UDP,
60 .me = THIS_MODULE,
61 .help = snmp_conntrack_help,
62 .expect_policy = &exp_policy,
63};
64
65static int __init nf_conntrack_snmp_init(void)
66{
67 exp_policy.timeout = timeout;
68 return nf_conntrack_helper_register(&helper);
69}
70
71static void __exit nf_conntrack_snmp_fini(void)
72{
73 nf_conntrack_helper_unregister(&helper);
74}
75
76module_init(nf_conntrack_snmp_init);
77module_exit(nf_conntrack_snmp_fini);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index b4d7f0f24b27..0ae142825881 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -29,6 +29,8 @@
29#include <net/netfilter/nf_conntrack_helper.h> 29#include <net/netfilter/nf_conntrack_helper.h>
30#include <net/netfilter/nf_conntrack_acct.h> 30#include <net/netfilter/nf_conntrack_acct.h>
31#include <net/netfilter/nf_conntrack_zones.h> 31#include <net/netfilter/nf_conntrack_zones.h>
32#include <net/netfilter/nf_conntrack_timestamp.h>
33#include <linux/rculist_nulls.h>
32 34
33MODULE_LICENSE("GPL"); 35MODULE_LICENSE("GPL");
34 36
@@ -45,6 +47,7 @@ EXPORT_SYMBOL_GPL(print_tuple);
45struct ct_iter_state { 47struct ct_iter_state {
46 struct seq_net_private p; 48 struct seq_net_private p;
47 unsigned int bucket; 49 unsigned int bucket;
50 u_int64_t time_now;
48}; 51};
49 52
50static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) 53static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
@@ -56,7 +59,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
56 for (st->bucket = 0; 59 for (st->bucket = 0;
57 st->bucket < net->ct.htable_size; 60 st->bucket < net->ct.htable_size;
58 st->bucket++) { 61 st->bucket++) {
59 n = rcu_dereference(net->ct.hash[st->bucket].first); 62 n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
60 if (!is_a_nulls(n)) 63 if (!is_a_nulls(n))
61 return n; 64 return n;
62 } 65 }
@@ -69,13 +72,15 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
69 struct net *net = seq_file_net(seq); 72 struct net *net = seq_file_net(seq);
70 struct ct_iter_state *st = seq->private; 73 struct ct_iter_state *st = seq->private;
71 74
72 head = rcu_dereference(head->next); 75 head = rcu_dereference(hlist_nulls_next_rcu(head));
73 while (is_a_nulls(head)) { 76 while (is_a_nulls(head)) {
74 if (likely(get_nulls_value(head) == st->bucket)) { 77 if (likely(get_nulls_value(head) == st->bucket)) {
75 if (++st->bucket >= net->ct.htable_size) 78 if (++st->bucket >= net->ct.htable_size)
76 return NULL; 79 return NULL;
77 } 80 }
78 head = rcu_dereference(net->ct.hash[st->bucket].first); 81 head = rcu_dereference(
82 hlist_nulls_first_rcu(
83 &net->ct.hash[st->bucket]));
79 } 84 }
80 return head; 85 return head;
81} 86}
@@ -93,6 +98,9 @@ static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
93static void *ct_seq_start(struct seq_file *seq, loff_t *pos) 98static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
94 __acquires(RCU) 99 __acquires(RCU)
95{ 100{
101 struct ct_iter_state *st = seq->private;
102
103 st->time_now = ktime_to_ns(ktime_get_real());
96 rcu_read_lock(); 104 rcu_read_lock();
97 return ct_get_idx(seq, *pos); 105 return ct_get_idx(seq, *pos);
98} 106}
@@ -132,6 +140,34 @@ static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
132} 140}
133#endif 141#endif
134 142
143#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
144static int ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
145{
146 struct ct_iter_state *st = s->private;
147 struct nf_conn_tstamp *tstamp;
148 s64 delta_time;
149
150 tstamp = nf_conn_tstamp_find(ct);
151 if (tstamp) {
152 delta_time = st->time_now - tstamp->start;
153 if (delta_time > 0)
154 delta_time = div_s64(delta_time, NSEC_PER_SEC);
155 else
156 delta_time = 0;
157
158 return seq_printf(s, "delta-time=%llu ",
159 (unsigned long long)delta_time);
160 }
161 return 0;
162}
163#else
164static inline int
165ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
166{
167 return 0;
168}
169#endif
170
135/* return 0 on success, 1 in case of error */ 171/* return 0 on success, 1 in case of error */
136static int ct_seq_show(struct seq_file *s, void *v) 172static int ct_seq_show(struct seq_file *s, void *v)
137{ 173{
@@ -200,6 +236,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
200 goto release; 236 goto release;
201#endif 237#endif
202 238
239 if (ct_show_delta_time(s, ct))
240 goto release;
241
203 if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) 242 if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
204 goto release; 243 goto release;
205 244
diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c
new file mode 100644
index 000000000000..af7dd31af0a1
--- /dev/null
+++ b/net/netfilter/nf_conntrack_timestamp.c
@@ -0,0 +1,120 @@
1/*
2 * (C) 2010 Pablo Neira Ayuso <pablo@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation (or any later at your option).
7 */
8
9#include <linux/netfilter.h>
10#include <linux/slab.h>
11#include <linux/kernel.h>
12#include <linux/moduleparam.h>
13
14#include <net/netfilter/nf_conntrack.h>
15#include <net/netfilter/nf_conntrack_extend.h>
16#include <net/netfilter/nf_conntrack_timestamp.h>
17
18static int nf_ct_tstamp __read_mostly;
19
20module_param_named(tstamp, nf_ct_tstamp, bool, 0644);
21MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping.");
22
23#ifdef CONFIG_SYSCTL
24static struct ctl_table tstamp_sysctl_table[] = {
25 {
26 .procname = "nf_conntrack_timestamp",
27 .data = &init_net.ct.sysctl_tstamp,
28 .maxlen = sizeof(unsigned int),
29 .mode = 0644,
30 .proc_handler = proc_dointvec,
31 },
32 {}
33};
34#endif /* CONFIG_SYSCTL */
35
36static struct nf_ct_ext_type tstamp_extend __read_mostly = {
37 .len = sizeof(struct nf_conn_tstamp),
38 .align = __alignof__(struct nf_conn_tstamp),
39 .id = NF_CT_EXT_TSTAMP,
40};
41
42#ifdef CONFIG_SYSCTL
43static int nf_conntrack_tstamp_init_sysctl(struct net *net)
44{
45 struct ctl_table *table;
46
47 table = kmemdup(tstamp_sysctl_table, sizeof(tstamp_sysctl_table),
48 GFP_KERNEL);
49 if (!table)
50 goto out;
51
52 table[0].data = &net->ct.sysctl_tstamp;
53
54 net->ct.tstamp_sysctl_header = register_net_sysctl_table(net,
55 nf_net_netfilter_sysctl_path, table);
56 if (!net->ct.tstamp_sysctl_header) {
57 printk(KERN_ERR "nf_ct_tstamp: can't register to sysctl.\n");
58 goto out_register;
59 }
60 return 0;
61
62out_register:
63 kfree(table);
64out:
65 return -ENOMEM;
66}
67
68static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
69{
70 struct ctl_table *table;
71
72 table = net->ct.tstamp_sysctl_header->ctl_table_arg;
73 unregister_net_sysctl_table(net->ct.tstamp_sysctl_header);
74 kfree(table);
75}
76#else
77static int nf_conntrack_tstamp_init_sysctl(struct net *net)
78{
79 return 0;
80}
81
82static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
83{
84}
85#endif
86
87int nf_conntrack_tstamp_init(struct net *net)
88{
89 int ret;
90
91 net->ct.sysctl_tstamp = nf_ct_tstamp;
92
93 if (net_eq(net, &init_net)) {
94 ret = nf_ct_extend_register(&tstamp_extend);
95 if (ret < 0) {
96 printk(KERN_ERR "nf_ct_tstamp: Unable to register "
97 "extension\n");
98 goto out_extend_register;
99 }
100 }
101
102 ret = nf_conntrack_tstamp_init_sysctl(net);
103 if (ret < 0)
104 goto out_sysctl;
105
106 return 0;
107
108out_sysctl:
109 if (net_eq(net, &init_net))
110 nf_ct_extend_unregister(&tstamp_extend);
111out_extend_register:
112 return ret;
113}
114
115void nf_conntrack_tstamp_fini(struct net *net)
116{
117 nf_conntrack_tstamp_fini_sysctl(net);
118 if (net_eq(net, &init_net))
119 nf_ct_extend_unregister(&tstamp_extend);
120}
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index b07393eab88e..20c775cff2a8 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -161,7 +161,8 @@ static int seq_show(struct seq_file *s, void *v)
161 struct nf_logger *t; 161 struct nf_logger *t;
162 int ret; 162 int ret;
163 163
164 logger = nf_loggers[*pos]; 164 logger = rcu_dereference_protected(nf_loggers[*pos],
165 lockdep_is_held(&nf_log_mutex));
165 166
166 if (!logger) 167 if (!logger)
167 ret = seq_printf(s, "%2lld NONE (", *pos); 168 ret = seq_printf(s, "%2lld NONE (", *pos);
@@ -249,7 +250,8 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
249 mutex_unlock(&nf_log_mutex); 250 mutex_unlock(&nf_log_mutex);
250 } else { 251 } else {
251 mutex_lock(&nf_log_mutex); 252 mutex_lock(&nf_log_mutex);
252 logger = nf_loggers[tindex]; 253 logger = rcu_dereference_protected(nf_loggers[tindex],
254 lockdep_is_held(&nf_log_mutex));
253 if (!logger) 255 if (!logger)
254 table->data = "NONE"; 256 table->data = "NONE";
255 else 257 else
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 74aebed5bd28..5ab22e2bbd7d 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -27,14 +27,17 @@ static DEFINE_MUTEX(queue_handler_mutex);
27int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) 27int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
28{ 28{
29 int ret; 29 int ret;
30 const struct nf_queue_handler *old;
30 31
31 if (pf >= ARRAY_SIZE(queue_handler)) 32 if (pf >= ARRAY_SIZE(queue_handler))
32 return -EINVAL; 33 return -EINVAL;
33 34
34 mutex_lock(&queue_handler_mutex); 35 mutex_lock(&queue_handler_mutex);
35 if (queue_handler[pf] == qh) 36 old = rcu_dereference_protected(queue_handler[pf],
37 lockdep_is_held(&queue_handler_mutex));
38 if (old == qh)
36 ret = -EEXIST; 39 ret = -EEXIST;
37 else if (queue_handler[pf]) 40 else if (old)
38 ret = -EBUSY; 41 ret = -EBUSY;
39 else { 42 else {
40 rcu_assign_pointer(queue_handler[pf], qh); 43 rcu_assign_pointer(queue_handler[pf], qh);
@@ -49,11 +52,15 @@ EXPORT_SYMBOL(nf_register_queue_handler);
49/* The caller must flush their queue before this */ 52/* The caller must flush their queue before this */
50int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) 53int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
51{ 54{
55 const struct nf_queue_handler *old;
56
52 if (pf >= ARRAY_SIZE(queue_handler)) 57 if (pf >= ARRAY_SIZE(queue_handler))
53 return -EINVAL; 58 return -EINVAL;
54 59
55 mutex_lock(&queue_handler_mutex); 60 mutex_lock(&queue_handler_mutex);
56 if (queue_handler[pf] && queue_handler[pf] != qh) { 61 old = rcu_dereference_protected(queue_handler[pf],
62 lockdep_is_held(&queue_handler_mutex));
63 if (old && old != qh) {
57 mutex_unlock(&queue_handler_mutex); 64 mutex_unlock(&queue_handler_mutex);
58 return -EINVAL; 65 return -EINVAL;
59 } 66 }
@@ -73,7 +80,10 @@ void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
73 80
74 mutex_lock(&queue_handler_mutex); 81 mutex_lock(&queue_handler_mutex);
75 for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++) { 82 for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++) {
76 if (queue_handler[pf] == qh) 83 if (rcu_dereference_protected(
84 queue_handler[pf],
85 lockdep_is_held(&queue_handler_mutex)
86 ) == qh)
77 rcu_assign_pointer(queue_handler[pf], NULL); 87 rcu_assign_pointer(queue_handler[pf], NULL);
78 } 88 }
79 mutex_unlock(&queue_handler_mutex); 89 mutex_unlock(&queue_handler_mutex);
@@ -115,7 +125,7 @@ static int __nf_queue(struct sk_buff *skb,
115 int (*okfn)(struct sk_buff *), 125 int (*okfn)(struct sk_buff *),
116 unsigned int queuenum) 126 unsigned int queuenum)
117{ 127{
118 int status; 128 int status = -ENOENT;
119 struct nf_queue_entry *entry = NULL; 129 struct nf_queue_entry *entry = NULL;
120#ifdef CONFIG_BRIDGE_NETFILTER 130#ifdef CONFIG_BRIDGE_NETFILTER
121 struct net_device *physindev; 131 struct net_device *physindev;
@@ -128,16 +138,20 @@ static int __nf_queue(struct sk_buff *skb,
128 rcu_read_lock(); 138 rcu_read_lock();
129 139
130 qh = rcu_dereference(queue_handler[pf]); 140 qh = rcu_dereference(queue_handler[pf]);
131 if (!qh) 141 if (!qh) {
142 status = -ESRCH;
132 goto err_unlock; 143 goto err_unlock;
144 }
133 145
134 afinfo = nf_get_afinfo(pf); 146 afinfo = nf_get_afinfo(pf);
135 if (!afinfo) 147 if (!afinfo)
136 goto err_unlock; 148 goto err_unlock;
137 149
138 entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC); 150 entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
139 if (!entry) 151 if (!entry) {
152 status = -ENOMEM;
140 goto err_unlock; 153 goto err_unlock;
154 }
141 155
142 *entry = (struct nf_queue_entry) { 156 *entry = (struct nf_queue_entry) {
143 .skb = skb, 157 .skb = skb,
@@ -151,11 +165,9 @@ static int __nf_queue(struct sk_buff *skb,
151 165
152 /* If it's going away, ignore hook. */ 166 /* If it's going away, ignore hook. */
153 if (!try_module_get(entry->elem->owner)) { 167 if (!try_module_get(entry->elem->owner)) {
154 rcu_read_unlock(); 168 status = -ECANCELED;
155 kfree(entry); 169 goto err_unlock;
156 return 0;
157 } 170 }
158
159 /* Bump dev refs so they don't vanish while packet is out */ 171 /* Bump dev refs so they don't vanish while packet is out */
160 if (indev) 172 if (indev)
161 dev_hold(indev); 173 dev_hold(indev);
@@ -182,14 +194,13 @@ static int __nf_queue(struct sk_buff *skb,
182 goto err; 194 goto err;
183 } 195 }
184 196
185 return 1; 197 return 0;
186 198
187err_unlock: 199err_unlock:
188 rcu_read_unlock(); 200 rcu_read_unlock();
189err: 201err:
190 kfree_skb(skb);
191 kfree(entry); 202 kfree(entry);
192 return 1; 203 return status;
193} 204}
194 205
195int nf_queue(struct sk_buff *skb, 206int nf_queue(struct sk_buff *skb,
@@ -201,6 +212,8 @@ int nf_queue(struct sk_buff *skb,
201 unsigned int queuenum) 212 unsigned int queuenum)
202{ 213{
203 struct sk_buff *segs; 214 struct sk_buff *segs;
215 int err;
216 unsigned int queued;
204 217
205 if (!skb_is_gso(skb)) 218 if (!skb_is_gso(skb))
206 return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn, 219 return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
@@ -216,20 +229,35 @@ int nf_queue(struct sk_buff *skb,
216 } 229 }
217 230
218 segs = skb_gso_segment(skb, 0); 231 segs = skb_gso_segment(skb, 0);
219 kfree_skb(skb); 232 /* Does not use PTR_ERR to limit the number of error codes that can be
233 * returned by nf_queue. For instance, callers rely on -ECANCELED to mean
234 * 'ignore this hook'.
235 */
220 if (IS_ERR(segs)) 236 if (IS_ERR(segs))
221 return 1; 237 return -EINVAL;
222 238
239 queued = 0;
240 err = 0;
223 do { 241 do {
224 struct sk_buff *nskb = segs->next; 242 struct sk_buff *nskb = segs->next;
225 243
226 segs->next = NULL; 244 segs->next = NULL;
227 if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn, 245 if (err == 0)
228 queuenum)) 246 err = __nf_queue(segs, elem, pf, hook, indev,
247 outdev, okfn, queuenum);
248 if (err == 0)
249 queued++;
250 else
229 kfree_skb(segs); 251 kfree_skb(segs);
230 segs = nskb; 252 segs = nskb;
231 } while (segs); 253 } while (segs);
232 return 1; 254
255 /* also free orig skb if only some segments were queued */
256 if (unlikely(err && queued))
257 err = 0;
258 if (err == 0)
259 kfree_skb(skb);
260 return err;
233} 261}
234 262
235void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) 263void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
@@ -237,6 +265,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
237 struct sk_buff *skb = entry->skb; 265 struct sk_buff *skb = entry->skb;
238 struct list_head *elem = &entry->elem->list; 266 struct list_head *elem = &entry->elem->list;
239 const struct nf_afinfo *afinfo; 267 const struct nf_afinfo *afinfo;
268 int err;
240 269
241 rcu_read_lock(); 270 rcu_read_lock();
242 271
@@ -270,10 +299,17 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
270 local_bh_enable(); 299 local_bh_enable();
271 break; 300 break;
272 case NF_QUEUE: 301 case NF_QUEUE:
273 if (!__nf_queue(skb, elem, entry->pf, entry->hook, 302 err = __nf_queue(skb, elem, entry->pf, entry->hook,
274 entry->indev, entry->outdev, entry->okfn, 303 entry->indev, entry->outdev, entry->okfn,
275 verdict >> NF_VERDICT_BITS)) 304 verdict >> NF_VERDICT_QBITS);
276 goto next_hook; 305 if (err < 0) {
306 if (err == -ECANCELED)
307 goto next_hook;
308 if (err == -ESRCH &&
309 (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
310 goto next_hook;
311 kfree_skb(skb);
312 }
277 break; 313 break;
278 case NF_STOLEN: 314 case NF_STOLEN:
279 default: 315 default:
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 6a1572b0ab41..91592da504b9 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -874,19 +874,19 @@ static struct hlist_node *get_first(struct iter_state *st)
874 874
875 for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { 875 for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
876 if (!hlist_empty(&instance_table[st->bucket])) 876 if (!hlist_empty(&instance_table[st->bucket]))
877 return rcu_dereference_bh(instance_table[st->bucket].first); 877 return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
878 } 878 }
879 return NULL; 879 return NULL;
880} 880}
881 881
882static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h) 882static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
883{ 883{
884 h = rcu_dereference_bh(h->next); 884 h = rcu_dereference_bh(hlist_next_rcu(h));
885 while (!h) { 885 while (!h) {
886 if (++st->bucket >= INSTANCE_BUCKETS) 886 if (++st->bucket >= INSTANCE_BUCKETS)
887 return NULL; 887 return NULL;
888 888
889 h = rcu_dereference_bh(instance_table[st->bucket].first); 889 h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
890 } 890 }
891 return h; 891 return h;
892} 892}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 68e67d19724d..b83123f12b42 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -387,25 +387,31 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
387{ 387{
388 struct sk_buff *nskb; 388 struct sk_buff *nskb;
389 struct nfqnl_instance *queue; 389 struct nfqnl_instance *queue;
390 int err; 390 int err = -ENOBUFS;
391 391
392 /* rcu_read_lock()ed by nf_hook_slow() */ 392 /* rcu_read_lock()ed by nf_hook_slow() */
393 queue = instance_lookup(queuenum); 393 queue = instance_lookup(queuenum);
394 if (!queue) 394 if (!queue) {
395 err = -ESRCH;
395 goto err_out; 396 goto err_out;
397 }
396 398
397 if (queue->copy_mode == NFQNL_COPY_NONE) 399 if (queue->copy_mode == NFQNL_COPY_NONE) {
400 err = -EINVAL;
398 goto err_out; 401 goto err_out;
402 }
399 403
400 nskb = nfqnl_build_packet_message(queue, entry); 404 nskb = nfqnl_build_packet_message(queue, entry);
401 if (nskb == NULL) 405 if (nskb == NULL) {
406 err = -ENOMEM;
402 goto err_out; 407 goto err_out;
403 408 }
404 spin_lock_bh(&queue->lock); 409 spin_lock_bh(&queue->lock);
405 410
406 if (!queue->peer_pid) 411 if (!queue->peer_pid) {
412 err = -EINVAL;
407 goto err_out_free_nskb; 413 goto err_out_free_nskb;
408 414 }
409 if (queue->queue_total >= queue->queue_maxlen) { 415 if (queue->queue_total >= queue->queue_maxlen) {
410 queue->queue_dropped++; 416 queue->queue_dropped++;
411 if (net_ratelimit()) 417 if (net_ratelimit())
@@ -432,7 +438,7 @@ err_out_free_nskb:
432err_out_unlock: 438err_out_unlock:
433 spin_unlock_bh(&queue->lock); 439 spin_unlock_bh(&queue->lock);
434err_out: 440err_out:
435 return -1; 441 return err;
436} 442}
437 443
438static int 444static int
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index c94237631077..0a77d2ff2154 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -23,6 +23,7 @@
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/mm.h> 24#include <linux/mm.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/audit.h>
26#include <net/net_namespace.h> 27#include <net/net_namespace.h>
27 28
28#include <linux/netfilter/x_tables.h> 29#include <linux/netfilter/x_tables.h>
@@ -38,9 +39,8 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
38#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) 39#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
39 40
40struct compat_delta { 41struct compat_delta {
41 struct compat_delta *next; 42 unsigned int offset; /* offset in kernel */
42 unsigned int offset; 43 int delta; /* delta in 32bit user land */
43 int delta;
44}; 44};
45 45
46struct xt_af { 46struct xt_af {
@@ -49,7 +49,9 @@ struct xt_af {
49 struct list_head target; 49 struct list_head target;
50#ifdef CONFIG_COMPAT 50#ifdef CONFIG_COMPAT
51 struct mutex compat_mutex; 51 struct mutex compat_mutex;
52 struct compat_delta *compat_offsets; 52 struct compat_delta *compat_tab;
53 unsigned int number; /* number of slots in compat_tab[] */
54 unsigned int cur; /* number of used slots in compat_tab[] */
53#endif 55#endif
54}; 56};
55 57
@@ -414,54 +416,67 @@ int xt_check_match(struct xt_mtchk_param *par,
414EXPORT_SYMBOL_GPL(xt_check_match); 416EXPORT_SYMBOL_GPL(xt_check_match);
415 417
416#ifdef CONFIG_COMPAT 418#ifdef CONFIG_COMPAT
417int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta) 419int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
418{ 420{
419 struct compat_delta *tmp; 421 struct xt_af *xp = &xt[af];
420 422
421 tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL); 423 if (!xp->compat_tab) {
422 if (!tmp) 424 if (!xp->number)
423 return -ENOMEM; 425 return -EINVAL;
426 xp->compat_tab = vmalloc(sizeof(struct compat_delta) * xp->number);
427 if (!xp->compat_tab)
428 return -ENOMEM;
429 xp->cur = 0;
430 }
424 431
425 tmp->offset = offset; 432 if (xp->cur >= xp->number)
426 tmp->delta = delta; 433 return -EINVAL;
427 434
428 if (xt[af].compat_offsets) { 435 if (xp->cur)
429 tmp->next = xt[af].compat_offsets->next; 436 delta += xp->compat_tab[xp->cur - 1].delta;
430 xt[af].compat_offsets->next = tmp; 437 xp->compat_tab[xp->cur].offset = offset;
431 } else { 438 xp->compat_tab[xp->cur].delta = delta;
432 xt[af].compat_offsets = tmp; 439 xp->cur++;
433 tmp->next = NULL;
434 }
435 return 0; 440 return 0;
436} 441}
437EXPORT_SYMBOL_GPL(xt_compat_add_offset); 442EXPORT_SYMBOL_GPL(xt_compat_add_offset);
438 443
439void xt_compat_flush_offsets(u_int8_t af) 444void xt_compat_flush_offsets(u_int8_t af)
440{ 445{
441 struct compat_delta *tmp, *next; 446 if (xt[af].compat_tab) {
442 447 vfree(xt[af].compat_tab);
443 if (xt[af].compat_offsets) { 448 xt[af].compat_tab = NULL;
444 for (tmp = xt[af].compat_offsets; tmp; tmp = next) { 449 xt[af].number = 0;
445 next = tmp->next;
446 kfree(tmp);
447 }
448 xt[af].compat_offsets = NULL;
449 } 450 }
450} 451}
451EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); 452EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
452 453
453int xt_compat_calc_jump(u_int8_t af, unsigned int offset) 454int xt_compat_calc_jump(u_int8_t af, unsigned int offset)
454{ 455{
455 struct compat_delta *tmp; 456 struct compat_delta *tmp = xt[af].compat_tab;
456 int delta; 457 int mid, left = 0, right = xt[af].cur - 1;
457 458
458 for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next) 459 while (left <= right) {
459 if (tmp->offset < offset) 460 mid = (left + right) >> 1;
460 delta += tmp->delta; 461 if (offset > tmp[mid].offset)
461 return delta; 462 left = mid + 1;
463 else if (offset < tmp[mid].offset)
464 right = mid - 1;
465 else
466 return mid ? tmp[mid - 1].delta : 0;
467 }
468 WARN_ON_ONCE(1);
469 return 0;
462} 470}
463EXPORT_SYMBOL_GPL(xt_compat_calc_jump); 471EXPORT_SYMBOL_GPL(xt_compat_calc_jump);
464 472
473void xt_compat_init_offsets(u_int8_t af, unsigned int number)
474{
475 xt[af].number = number;
476 xt[af].cur = 0;
477}
478EXPORT_SYMBOL(xt_compat_init_offsets);
479
465int xt_compat_match_offset(const struct xt_match *match) 480int xt_compat_match_offset(const struct xt_match *match)
466{ 481{
467 u_int16_t csize = match->compatsize ? : match->matchsize; 482 u_int16_t csize = match->compatsize ? : match->matchsize;
@@ -820,6 +835,21 @@ xt_replace_table(struct xt_table *table,
820 */ 835 */
821 local_bh_enable(); 836 local_bh_enable();
822 837
838#ifdef CONFIG_AUDIT
839 if (audit_enabled) {
840 struct audit_buffer *ab;
841
842 ab = audit_log_start(current->audit_context, GFP_KERNEL,
843 AUDIT_NETFILTER_CFG);
844 if (ab) {
845 audit_log_format(ab, "table=%s family=%u entries=%u",
846 table->name, table->af,
847 private->number);
848 audit_log_end(ab);
849 }
850 }
851#endif
852
823 return private; 853 return private;
824} 854}
825EXPORT_SYMBOL_GPL(xt_replace_table); 855EXPORT_SYMBOL_GPL(xt_replace_table);
@@ -1338,7 +1368,7 @@ static int __init xt_init(void)
1338 mutex_init(&xt[i].mutex); 1368 mutex_init(&xt[i].mutex);
1339#ifdef CONFIG_COMPAT 1369#ifdef CONFIG_COMPAT
1340 mutex_init(&xt[i].compat_mutex); 1370 mutex_init(&xt[i].compat_mutex);
1341 xt[i].compat_offsets = NULL; 1371 xt[i].compat_tab = NULL;
1342#endif 1372#endif
1343 INIT_LIST_HEAD(&xt[i].target); 1373 INIT_LIST_HEAD(&xt[i].target);
1344 INIT_LIST_HEAD(&xt[i].match); 1374 INIT_LIST_HEAD(&xt[i].match);
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
new file mode 100644
index 000000000000..81802d27346e
--- /dev/null
+++ b/net/netfilter/xt_AUDIT.c
@@ -0,0 +1,204 @@
1/*
2 * Creates audit record for dropped/accepted packets
3 *
4 * (C) 2010-2011 Thomas Graf <tgraf@redhat.com>
5 * (C) 2010-2011 Red Hat, Inc.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10*/
11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
14#include <linux/audit.h>
15#include <linux/module.h>
16#include <linux/skbuff.h>
17#include <linux/tcp.h>
18#include <linux/udp.h>
19#include <linux/if_arp.h>
20#include <linux/netfilter/x_tables.h>
21#include <linux/netfilter/xt_AUDIT.h>
22#include <net/ipv6.h>
23#include <net/ip.h>
24
25MODULE_LICENSE("GPL");
26MODULE_AUTHOR("Thomas Graf <tgraf@redhat.com>");
27MODULE_DESCRIPTION("Xtables: creates audit records for dropped/accepted packets");
28MODULE_ALIAS("ipt_AUDIT");
29MODULE_ALIAS("ip6t_AUDIT");
30MODULE_ALIAS("ebt_AUDIT");
31MODULE_ALIAS("arpt_AUDIT");
32
33static void audit_proto(struct audit_buffer *ab, struct sk_buff *skb,
34 unsigned int proto, unsigned int offset)
35{
36 switch (proto) {
37 case IPPROTO_TCP:
38 case IPPROTO_UDP:
39 case IPPROTO_UDPLITE: {
40 const __be16 *pptr;
41 __be16 _ports[2];
42
43 pptr = skb_header_pointer(skb, offset, sizeof(_ports), _ports);
44 if (pptr == NULL) {
45 audit_log_format(ab, " truncated=1");
46 return;
47 }
48
49 audit_log_format(ab, " sport=%hu dport=%hu",
50 ntohs(pptr[0]), ntohs(pptr[1]));
51 }
52 break;
53
54 case IPPROTO_ICMP:
55 case IPPROTO_ICMPV6: {
56 const u8 *iptr;
57 u8 _ih[2];
58
59 iptr = skb_header_pointer(skb, offset, sizeof(_ih), &_ih);
60 if (iptr == NULL) {
61 audit_log_format(ab, " truncated=1");
62 return;
63 }
64
65 audit_log_format(ab, " icmptype=%hhu icmpcode=%hhu",
66 iptr[0], iptr[1]);
67
68 }
69 break;
70 }
71}
72
73static void audit_ip4(struct audit_buffer *ab, struct sk_buff *skb)
74{
75 struct iphdr _iph;
76 const struct iphdr *ih;
77
78 ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
79 if (!ih) {
80 audit_log_format(ab, " truncated=1");
81 return;
82 }
83
84 audit_log_format(ab, " saddr=%pI4 daddr=%pI4 ipid=%hu proto=%hhu",
85 &ih->saddr, &ih->daddr, ntohs(ih->id), ih->protocol);
86
87 if (ntohs(ih->frag_off) & IP_OFFSET) {
88 audit_log_format(ab, " frag=1");
89 return;
90 }
91
92 audit_proto(ab, skb, ih->protocol, ih->ihl * 4);
93}
94
95static void audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)
96{
97 struct ipv6hdr _ip6h;
98 const struct ipv6hdr *ih;
99 u8 nexthdr;
100 int offset;
101
102 ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h);
103 if (!ih) {
104 audit_log_format(ab, " truncated=1");
105 return;
106 }
107
108 nexthdr = ih->nexthdr;
109 offset = ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h),
110 &nexthdr);
111
112 audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu",
113 &ih->saddr, &ih->daddr, nexthdr);
114
115 if (offset)
116 audit_proto(ab, skb, nexthdr, offset);
117}
118
119static unsigned int
120audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
121{
122 const struct xt_audit_info *info = par->targinfo;
123 struct audit_buffer *ab;
124
125 ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
126 if (ab == NULL)
127 goto errout;
128
129 audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s",
130 info->type, par->hooknum, skb->len,
131 par->in ? par->in->name : "?",
132 par->out ? par->out->name : "?");
133
134 if (skb->mark)
135 audit_log_format(ab, " mark=%#x", skb->mark);
136
137 if (skb->dev && skb->dev->type == ARPHRD_ETHER) {
138 audit_log_format(ab, " smac=%pM dmac=%pM macproto=0x%04x",
139 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
140 ntohs(eth_hdr(skb)->h_proto));
141
142 if (par->family == NFPROTO_BRIDGE) {
143 switch (eth_hdr(skb)->h_proto) {
144 case __constant_htons(ETH_P_IP):
145 audit_ip4(ab, skb);
146 break;
147
148 case __constant_htons(ETH_P_IPV6):
149 audit_ip6(ab, skb);
150 break;
151 }
152 }
153 }
154
155 switch (par->family) {
156 case NFPROTO_IPV4:
157 audit_ip4(ab, skb);
158 break;
159
160 case NFPROTO_IPV6:
161 audit_ip6(ab, skb);
162 break;
163 }
164
165 audit_log_end(ab);
166
167errout:
168 return XT_CONTINUE;
169}
170
171static int audit_tg_check(const struct xt_tgchk_param *par)
172{
173 const struct xt_audit_info *info = par->targinfo;
174
175 if (info->type > XT_AUDIT_TYPE_MAX) {
176 pr_info("Audit type out of range (valid range: 0..%hhu)\n",
177 XT_AUDIT_TYPE_MAX);
178 return -ERANGE;
179 }
180
181 return 0;
182}
183
184static struct xt_target audit_tg_reg __read_mostly = {
185 .name = "AUDIT",
186 .family = NFPROTO_UNSPEC,
187 .target = audit_tg,
188 .targetsize = sizeof(struct xt_audit_info),
189 .checkentry = audit_tg_check,
190 .me = THIS_MODULE,
191};
192
193static int __init audit_tg_init(void)
194{
195 return xt_register_target(&audit_tg_reg);
196}
197
198static void __exit audit_tg_exit(void)
199{
200 xt_unregister_target(&audit_tg_reg);
201}
202
203module_init(audit_tg_init);
204module_exit(audit_tg_exit);
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index c2c0e4abeb99..af9c4dadf816 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -19,12 +19,14 @@
19#include <linux/netfilter_ipv6.h> 19#include <linux/netfilter_ipv6.h>
20#include <linux/netfilter/x_tables.h> 20#include <linux/netfilter/x_tables.h>
21#include <linux/netfilter/xt_CLASSIFY.h> 21#include <linux/netfilter/xt_CLASSIFY.h>
22#include <linux/netfilter_arp.h>
22 23
23MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 24MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
24MODULE_LICENSE("GPL"); 25MODULE_LICENSE("GPL");
25MODULE_DESCRIPTION("Xtables: Qdisc classification"); 26MODULE_DESCRIPTION("Xtables: Qdisc classification");
26MODULE_ALIAS("ipt_CLASSIFY"); 27MODULE_ALIAS("ipt_CLASSIFY");
27MODULE_ALIAS("ip6t_CLASSIFY"); 28MODULE_ALIAS("ip6t_CLASSIFY");
29MODULE_ALIAS("arpt_CLASSIFY");
28 30
29static unsigned int 31static unsigned int
30classify_tg(struct sk_buff *skb, const struct xt_action_param *par) 32classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
@@ -35,26 +37,36 @@ classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
35 return XT_CONTINUE; 37 return XT_CONTINUE;
36} 38}
37 39
38static struct xt_target classify_tg_reg __read_mostly = { 40static struct xt_target classify_tg_reg[] __read_mostly = {
39 .name = "CLASSIFY", 41 {
40 .revision = 0, 42 .name = "CLASSIFY",
41 .family = NFPROTO_UNSPEC, 43 .revision = 0,
42 .table = "mangle", 44 .family = NFPROTO_UNSPEC,
43 .hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | 45 .hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
44 (1 << NF_INET_POST_ROUTING), 46 (1 << NF_INET_POST_ROUTING),
45 .target = classify_tg, 47 .target = classify_tg,
46 .targetsize = sizeof(struct xt_classify_target_info), 48 .targetsize = sizeof(struct xt_classify_target_info),
47 .me = THIS_MODULE, 49 .me = THIS_MODULE,
50 },
51 {
52 .name = "CLASSIFY",
53 .revision = 0,
54 .family = NFPROTO_ARP,
55 .hooks = (1 << NF_ARP_OUT) | (1 << NF_ARP_FORWARD),
56 .target = classify_tg,
57 .targetsize = sizeof(struct xt_classify_target_info),
58 .me = THIS_MODULE,
59 },
48}; 60};
49 61
50static int __init classify_tg_init(void) 62static int __init classify_tg_init(void)
51{ 63{
52 return xt_register_target(&classify_tg_reg); 64 return xt_register_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
53} 65}
54 66
55static void __exit classify_tg_exit(void) 67static void __exit classify_tg_exit(void)
56{ 68{
57 xt_unregister_target(&classify_tg_reg); 69 xt_unregister_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
58} 70}
59 71
60module_init(classify_tg_init); 72module_init(classify_tg_init);
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index be1f22e13545..3bdd443aaf15 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -313,3 +313,5 @@ MODULE_AUTHOR("Timo Teras <ext-timo.teras@nokia.com>");
313MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>"); 313MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>");
314MODULE_DESCRIPTION("Xtables: idle time monitor"); 314MODULE_DESCRIPTION("Xtables: idle time monitor");
315MODULE_LICENSE("GPL v2"); 315MODULE_LICENSE("GPL v2");
316MODULE_ALIAS("ipt_IDLETIMER");
317MODULE_ALIAS("ip6t_IDLETIMER");
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index a4140509eea1..993de2ba89d3 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -31,6 +31,8 @@
31MODULE_LICENSE("GPL"); 31MODULE_LICENSE("GPL");
32MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>"); 32MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>");
33MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match"); 33MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match");
34MODULE_ALIAS("ipt_LED");
35MODULE_ALIAS("ip6t_LED");
34 36
35static LIST_HEAD(xt_led_triggers); 37static LIST_HEAD(xt_led_triggers);
36static DEFINE_MUTEX(xt_led_mutex); 38static DEFINE_MUTEX(xt_led_mutex);
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 039cce1bde3d..d4f4b5d66b20 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -72,18 +72,31 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
72 72
73 if (info->queues_total > 1) { 73 if (info->queues_total > 1) {
74 if (par->family == NFPROTO_IPV4) 74 if (par->family == NFPROTO_IPV4)
75 queue = hash_v4(skb) % info->queues_total + queue; 75 queue = (((u64) hash_v4(skb) * info->queues_total) >>
76 32) + queue;
76#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) 77#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
77 else if (par->family == NFPROTO_IPV6) 78 else if (par->family == NFPROTO_IPV6)
78 queue = hash_v6(skb) % info->queues_total + queue; 79 queue = (((u64) hash_v6(skb) * info->queues_total) >>
80 32) + queue;
79#endif 81#endif
80 } 82 }
81 return NF_QUEUE_NR(queue); 83 return NF_QUEUE_NR(queue);
82} 84}
83 85
84static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par) 86static unsigned int
87nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
85{ 88{
86 const struct xt_NFQ_info_v1 *info = par->targinfo; 89 const struct xt_NFQ_info_v2 *info = par->targinfo;
90 unsigned int ret = nfqueue_tg_v1(skb, par);
91
92 if (info->bypass)
93 ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
94 return ret;
95}
96
97static int nfqueue_tg_check(const struct xt_tgchk_param *par)
98{
99 const struct xt_NFQ_info_v2 *info = par->targinfo;
87 u32 maxid; 100 u32 maxid;
88 101
89 if (unlikely(!rnd_inited)) { 102 if (unlikely(!rnd_inited)) {
@@ -100,6 +113,8 @@ static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
100 info->queues_total, maxid); 113 info->queues_total, maxid);
101 return -ERANGE; 114 return -ERANGE;
102 } 115 }
116 if (par->target->revision == 2 && info->bypass > 1)
117 return -EINVAL;
103 return 0; 118 return 0;
104} 119}
105 120
@@ -115,11 +130,20 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
115 .name = "NFQUEUE", 130 .name = "NFQUEUE",
116 .revision = 1, 131 .revision = 1,
117 .family = NFPROTO_UNSPEC, 132 .family = NFPROTO_UNSPEC,
118 .checkentry = nfqueue_tg_v1_check, 133 .checkentry = nfqueue_tg_check,
119 .target = nfqueue_tg_v1, 134 .target = nfqueue_tg_v1,
120 .targetsize = sizeof(struct xt_NFQ_info_v1), 135 .targetsize = sizeof(struct xt_NFQ_info_v1),
121 .me = THIS_MODULE, 136 .me = THIS_MODULE,
122 }, 137 },
138 {
139 .name = "NFQUEUE",
140 .revision = 2,
141 .family = NFPROTO_UNSPEC,
142 .checkentry = nfqueue_tg_check,
143 .target = nfqueue_tg_v2,
144 .targetsize = sizeof(struct xt_NFQ_info_v2),
145 .me = THIS_MODULE,
146 },
123}; 147};
124 148
125static int __init nfqueue_tg_init(void) 149static int __init nfqueue_tg_init(void)
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 5c5b6b921b84..452bc16af56c 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -204,11 +204,9 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
204 &info->mask, par->family); 204 &info->mask, par->family);
205 spin_unlock_bh(&info->data->lock); 205 spin_unlock_bh(&info->data->lock);
206 206
207 if (connections < 0) { 207 if (connections < 0)
208 /* kmalloc failed, drop it entirely */ 208 /* kmalloc failed, drop it entirely */
209 par->hotdrop = true; 209 goto hotdrop;
210 return false;
211 }
212 210
213 return (connections > info->limit) ^ info->inverse; 211 return (connections > info->limit) ^ info->inverse;
214 212
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index e536710ad916..4ef1b63ad73f 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -112,6 +112,54 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info,
112 return true; 112 return true;
113} 113}
114 114
115static inline bool
116port_match(u16 min, u16 max, u16 port, bool invert)
117{
118 return (port >= min && port <= max) ^ invert;
119}
120
121static inline bool
122ct_proto_port_check_v3(const struct xt_conntrack_mtinfo3 *info,
123 const struct nf_conn *ct)
124{
125 const struct nf_conntrack_tuple *tuple;
126
127 tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
128 if ((info->match_flags & XT_CONNTRACK_PROTO) &&
129 (nf_ct_protonum(ct) == info->l4proto) ^
130 !(info->invert_flags & XT_CONNTRACK_PROTO))
131 return false;
132
133 /* Shortcut to match all recognized protocols by using ->src.all. */
134 if ((info->match_flags & XT_CONNTRACK_ORIGSRC_PORT) &&
135 !port_match(info->origsrc_port, info->origsrc_port_high,
136 ntohs(tuple->src.u.all),
137 info->invert_flags & XT_CONNTRACK_ORIGSRC_PORT))
138 return false;
139
140 if ((info->match_flags & XT_CONNTRACK_ORIGDST_PORT) &&
141 !port_match(info->origdst_port, info->origdst_port_high,
142 ntohs(tuple->dst.u.all),
143 info->invert_flags & XT_CONNTRACK_ORIGDST_PORT))
144 return false;
145
146 tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
147
148 if ((info->match_flags & XT_CONNTRACK_REPLSRC_PORT) &&
149 !port_match(info->replsrc_port, info->replsrc_port_high,
150 ntohs(tuple->src.u.all),
151 info->invert_flags & XT_CONNTRACK_REPLSRC_PORT))
152 return false;
153
154 if ((info->match_flags & XT_CONNTRACK_REPLDST_PORT) &&
155 !port_match(info->repldst_port, info->repldst_port_high,
156 ntohs(tuple->dst.u.all),
157 info->invert_flags & XT_CONNTRACK_REPLDST_PORT))
158 return false;
159
160 return true;
161}
162
115static bool 163static bool
116conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, 164conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
117 u16 state_mask, u16 status_mask) 165 u16 state_mask, u16 status_mask)
@@ -170,8 +218,13 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
170 !(info->invert_flags & XT_CONNTRACK_REPLDST)) 218 !(info->invert_flags & XT_CONNTRACK_REPLDST))
171 return false; 219 return false;
172 220
173 if (!ct_proto_port_check(info, ct)) 221 if (par->match->revision != 3) {
174 return false; 222 if (!ct_proto_port_check(info, ct))
223 return false;
224 } else {
225 if (!ct_proto_port_check_v3(par->matchinfo, ct))
226 return false;
227 }
175 228
176 if ((info->match_flags & XT_CONNTRACK_STATUS) && 229 if ((info->match_flags & XT_CONNTRACK_STATUS) &&
177 (!!(status_mask & ct->status) ^ 230 (!!(status_mask & ct->status) ^
@@ -207,6 +260,14 @@ conntrack_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
207 return conntrack_mt(skb, par, info->state_mask, info->status_mask); 260 return conntrack_mt(skb, par, info->state_mask, info->status_mask);
208} 261}
209 262
263static bool
264conntrack_mt_v3(const struct sk_buff *skb, struct xt_action_param *par)
265{
266 const struct xt_conntrack_mtinfo3 *info = par->matchinfo;
267
268 return conntrack_mt(skb, par, info->state_mask, info->status_mask);
269}
270
210static int conntrack_mt_check(const struct xt_mtchk_param *par) 271static int conntrack_mt_check(const struct xt_mtchk_param *par)
211{ 272{
212 int ret; 273 int ret;
@@ -244,6 +305,16 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = {
244 .destroy = conntrack_mt_destroy, 305 .destroy = conntrack_mt_destroy,
245 .me = THIS_MODULE, 306 .me = THIS_MODULE,
246 }, 307 },
308 {
309 .name = "conntrack",
310 .revision = 3,
311 .family = NFPROTO_UNSPEC,
312 .matchsize = sizeof(struct xt_conntrack_mtinfo3),
313 .match = conntrack_mt_v3,
314 .checkentry = conntrack_mt_check,
315 .destroy = conntrack_mt_destroy,
316 .me = THIS_MODULE,
317 },
247}; 318};
248 319
249static int __init conntrack_mt_init(void) 320static int __init conntrack_mt_init(void)
diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c
index b39db8a5cbae..c7a2e5466bc4 100644
--- a/net/netfilter/xt_cpu.c
+++ b/net/netfilter/xt_cpu.c
@@ -22,6 +22,8 @@
22MODULE_LICENSE("GPL"); 22MODULE_LICENSE("GPL");
23MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>"); 23MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>");
24MODULE_DESCRIPTION("Xtables: CPU match"); 24MODULE_DESCRIPTION("Xtables: CPU match");
25MODULE_ALIAS("ipt_cpu");
26MODULE_ALIAS("ip6t_cpu");
25 27
26static int cpu_mt_check(const struct xt_mtchk_param *par) 28static int cpu_mt_check(const struct xt_mtchk_param *par)
27{ 29{
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index 9127a3d8aa35..bb10b0717f1b 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
85 /* 85 /*
86 * Check if the packet belongs to an existing entry 86 * Check if the packet belongs to an existing entry
87 */ 87 */
88 cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */); 88 cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */);
89 if (unlikely(cp == NULL)) { 89 if (unlikely(cp == NULL)) {
90 match = false; 90 match = false;
91 goto out; 91 goto out;
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 73431d4aa6ef..e318f458713e 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -255,7 +255,7 @@ config NET_CLS_TCINDEX
255 255
256config NET_CLS_ROUTE4 256config NET_CLS_ROUTE4
257 tristate "Routing decision (ROUTE)" 257 tristate "Routing decision (ROUTE)"
258 select NET_CLS_ROUTE 258 select IP_ROUTE_CLASSID
259 select NET_CLS 259 select NET_CLS
260 ---help--- 260 ---help---
261 If you say Y here, you will be able to classify packets 261 If you say Y here, you will be able to classify packets
@@ -264,9 +264,6 @@ config NET_CLS_ROUTE4
264 To compile this code as a module, choose M here: the 264 To compile this code as a module, choose M here: the
265 module will be called cls_route. 265 module will be called cls_route.
266 266
267config NET_CLS_ROUTE
268 bool
269
270config NET_CLS_FW 267config NET_CLS_FW
271 tristate "Netfilter mark (FW)" 268 tristate "Netfilter mark (FW)"
272 select NET_CLS 269 select NET_CLS
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 5eec16e516b9..8ec01391d988 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -276,7 +276,7 @@ fallback:
276 276
277static u32 flow_get_rtclassid(const struct sk_buff *skb) 277static u32 flow_get_rtclassid(const struct sk_buff *skb)
278{ 278{
279#ifdef CONFIG_NET_CLS_ROUTE 279#ifdef CONFIG_IP_ROUTE_CLASSID
280 if (skb_dst(skb)) 280 if (skb_dst(skb))
281 return skb_dst(skb)->tclassid; 281 return skb_dst(skb)->tclassid;
282#endif 282#endif
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 7af1f65fe678..a889d099320f 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -252,7 +252,7 @@ META_COLLECTOR(int_rtclassid)
252 if (unlikely(skb_dst(skb) == NULL)) 252 if (unlikely(skb_dst(skb) == NULL))
253 *err = -1; 253 *err = -1;
254 else 254 else
255#ifdef CONFIG_NET_CLS_ROUTE 255#ifdef CONFIG_IP_ROUTE_CLASSID
256 dst->value = skb_dst(skb)->tclassid; 256 dst->value = skb_dst(skb)->tclassid;
257#else 257#else
258 dst->value = 0; 258 dst->value = 0;