diff options
author | David S. Miller <davem@davemloft.net> | 2011-01-20 03:06:15 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-01-20 03:06:15 -0500 |
commit | a07aa004c8d814a975b1a68afdb7baaa8f1b91d5 (patch) | |
tree | 652edc2dce9732a64780b9e332034b6567631a8b | |
parent | cc7ec456f82da7f89a5b376e613b3ac4311b3e9a (diff) | |
parent | 5d8449286456659cdd0998e62d80df2d9e77e9e3 (diff) |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6
135 files changed, 4477 insertions, 1712 deletions
diff --git a/include/linux/audit.h b/include/linux/audit.h index 359df0487690..9d339eb27881 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h | |||
@@ -103,6 +103,8 @@ | |||
103 | #define AUDIT_BPRM_FCAPS 1321 /* Information about fcaps increasing perms */ | 103 | #define AUDIT_BPRM_FCAPS 1321 /* Information about fcaps increasing perms */ |
104 | #define AUDIT_CAPSET 1322 /* Record showing argument to sys_capset */ | 104 | #define AUDIT_CAPSET 1322 /* Record showing argument to sys_capset */ |
105 | #define AUDIT_MMAP 1323 /* Record showing descriptor and flags in mmap */ | 105 | #define AUDIT_MMAP 1323 /* Record showing descriptor and flags in mmap */ |
106 | #define AUDIT_NETFILTER_PKT 1324 /* Packets traversing netfilter chains */ | ||
107 | #define AUDIT_NETFILTER_CFG 1325 /* Netfilter chain modifications */ | ||
106 | 108 | ||
107 | #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ | 109 | #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ |
108 | #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ | 110 | #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ |
diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h index 5f43a3b2e3ad..4deb3834d62c 100644 --- a/include/linux/ip_vs.h +++ b/include/linux/ip_vs.h | |||
@@ -89,6 +89,14 @@ | |||
89 | #define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ | 89 | #define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ |
90 | #define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */ | 90 | #define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */ |
91 | 91 | ||
92 | #define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \ | ||
93 | IP_VS_CONN_F_NOOUTPUT | \ | ||
94 | IP_VS_CONN_F_INACTIVE | \ | ||
95 | IP_VS_CONN_F_SEQ_MASK | \ | ||
96 | IP_VS_CONN_F_NO_CPORT | \ | ||
97 | IP_VS_CONN_F_TEMPLATE \ | ||
98 | ) | ||
99 | |||
92 | /* Flags that are not sent to backup server start from bit 16 */ | 100 | /* Flags that are not sent to backup server start from bit 16 */ |
93 | #define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */ | 101 | #define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */ |
94 | 102 | ||
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 1893837b3966..eeec00abb664 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h | |||
@@ -24,16 +24,20 @@ | |||
24 | #define NF_MAX_VERDICT NF_STOP | 24 | #define NF_MAX_VERDICT NF_STOP |
25 | 25 | ||
26 | /* we overload the higher bits for encoding auxiliary data such as the queue | 26 | /* we overload the higher bits for encoding auxiliary data such as the queue |
27 | * number. Not nice, but better than additional function arguments. */ | 27 | * number or errno values. Not nice, but better than additional function |
28 | #define NF_VERDICT_MASK 0x0000ffff | 28 | * arguments. */ |
29 | #define NF_VERDICT_BITS 16 | 29 | #define NF_VERDICT_MASK 0x000000ff |
30 | |||
31 | /* extra verdict flags have mask 0x0000ff00 */ | ||
32 | #define NF_VERDICT_FLAG_QUEUE_BYPASS 0x00008000 | ||
30 | 33 | ||
34 | /* queue number (NF_QUEUE) or errno (NF_DROP) */ | ||
31 | #define NF_VERDICT_QMASK 0xffff0000 | 35 | #define NF_VERDICT_QMASK 0xffff0000 |
32 | #define NF_VERDICT_QBITS 16 | 36 | #define NF_VERDICT_QBITS 16 |
33 | 37 | ||
34 | #define NF_QUEUE_NR(x) ((((x) << NF_VERDICT_BITS) & NF_VERDICT_QMASK) | NF_QUEUE) | 38 | #define NF_QUEUE_NR(x) ((((x) << 16) & NF_VERDICT_QMASK) | NF_QUEUE) |
35 | 39 | ||
36 | #define NF_DROP_ERR(x) (((-x) << NF_VERDICT_BITS) | NF_DROP) | 40 | #define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP) |
37 | 41 | ||
38 | /* only for userspace compatibility */ | 42 | /* only for userspace compatibility */ |
39 | #ifndef __KERNEL__ | 43 | #ifndef __KERNEL__ |
@@ -41,6 +45,9 @@ | |||
41 | <= 0x2000 is used for protocol-flags. */ | 45 | <= 0x2000 is used for protocol-flags. */ |
42 | #define NFC_UNKNOWN 0x4000 | 46 | #define NFC_UNKNOWN 0x4000 |
43 | #define NFC_ALTERED 0x8000 | 47 | #define NFC_ALTERED 0x8000 |
48 | |||
49 | /* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */ | ||
50 | #define NF_VERDICT_BITS 16 | ||
44 | #endif | 51 | #endif |
45 | 52 | ||
46 | enum nf_inet_hooks { | 53 | enum nf_inet_hooks { |
@@ -72,6 +79,10 @@ union nf_inet_addr { | |||
72 | 79 | ||
73 | #ifdef __KERNEL__ | 80 | #ifdef __KERNEL__ |
74 | #ifdef CONFIG_NETFILTER | 81 | #ifdef CONFIG_NETFILTER |
82 | static inline int NF_DROP_GETERR(int verdict) | ||
83 | { | ||
84 | return -(verdict >> NF_VERDICT_QBITS); | ||
85 | } | ||
75 | 86 | ||
76 | static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1, | 87 | static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1, |
77 | const union nf_inet_addr *a2) | 88 | const union nf_inet_addr *a2) |
@@ -267,7 +278,7 @@ struct nf_afinfo { | |||
267 | int route_key_size; | 278 | int route_key_size; |
268 | }; | 279 | }; |
269 | 280 | ||
270 | extern const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO]; | 281 | extern const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO]; |
271 | static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family) | 282 | static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family) |
272 | { | 283 | { |
273 | return rcu_dereference(nf_afinfo[family]); | 284 | return rcu_dereference(nf_afinfo[family]); |
@@ -357,9 +368,9 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) | |||
357 | #endif /*CONFIG_NETFILTER*/ | 368 | #endif /*CONFIG_NETFILTER*/ |
358 | 369 | ||
359 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | 370 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) |
360 | extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *); | 371 | extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu; |
361 | extern void nf_ct_attach(struct sk_buff *, struct sk_buff *); | 372 | extern void nf_ct_attach(struct sk_buff *, struct sk_buff *); |
362 | extern void (*nf_ct_destroy)(struct nf_conntrack *); | 373 | extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu; |
363 | #else | 374 | #else |
364 | static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} | 375 | static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} |
365 | #endif | 376 | #endif |
diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 9d40effe7ca7..89c0d1e20d72 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild | |||
@@ -9,6 +9,7 @@ header-y += nfnetlink_conntrack.h | |||
9 | header-y += nfnetlink_log.h | 9 | header-y += nfnetlink_log.h |
10 | header-y += nfnetlink_queue.h | 10 | header-y += nfnetlink_queue.h |
11 | header-y += x_tables.h | 11 | header-y += x_tables.h |
12 | header-y += xt_AUDIT.h | ||
12 | header-y += xt_CHECKSUM.h | 13 | header-y += xt_CHECKSUM.h |
13 | header-y += xt_CLASSIFY.h | 14 | header-y += xt_CLASSIFY.h |
14 | header-y += xt_CONNMARK.h | 15 | header-y += xt_CONNMARK.h |
@@ -55,6 +56,7 @@ header-y += xt_rateest.h | |||
55 | header-y += xt_realm.h | 56 | header-y += xt_realm.h |
56 | header-y += xt_recent.h | 57 | header-y += xt_recent.h |
57 | header-y += xt_sctp.h | 58 | header-y += xt_sctp.h |
59 | header-y += xt_socket.h | ||
58 | header-y += xt_state.h | 60 | header-y += xt_state.h |
59 | header-y += xt_statistic.h | 61 | header-y += xt_statistic.h |
60 | header-y += xt_string.h | 62 | header-y += xt_string.h |
diff --git a/include/linux/netfilter/nf_conntrack_snmp.h b/include/linux/netfilter/nf_conntrack_snmp.h new file mode 100644 index 000000000000..064bc63a5346 --- /dev/null +++ b/include/linux/netfilter/nf_conntrack_snmp.h | |||
@@ -0,0 +1,9 @@ | |||
1 | #ifndef _NF_CONNTRACK_SNMP_H | ||
2 | #define _NF_CONNTRACK_SNMP_H | ||
3 | |||
4 | extern int (*nf_nat_snmp_hook)(struct sk_buff *skb, | ||
5 | unsigned int protoff, | ||
6 | struct nf_conn *ct, | ||
7 | enum ip_conntrack_info ctinfo); | ||
8 | |||
9 | #endif /* _NF_CONNTRACK_SNMP_H */ | ||
diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 19711e3ffd42..debf1aefd753 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h | |||
@@ -42,6 +42,7 @@ enum ctattr_type { | |||
42 | CTA_SECMARK, /* obsolete */ | 42 | CTA_SECMARK, /* obsolete */ |
43 | CTA_ZONE, | 43 | CTA_ZONE, |
44 | CTA_SECCTX, | 44 | CTA_SECCTX, |
45 | CTA_TIMESTAMP, | ||
45 | __CTA_MAX | 46 | __CTA_MAX |
46 | }; | 47 | }; |
47 | #define CTA_MAX (__CTA_MAX - 1) | 48 | #define CTA_MAX (__CTA_MAX - 1) |
@@ -127,6 +128,14 @@ enum ctattr_counters { | |||
127 | }; | 128 | }; |
128 | #define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1) | 129 | #define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1) |
129 | 130 | ||
131 | enum ctattr_tstamp { | ||
132 | CTA_TIMESTAMP_UNSPEC, | ||
133 | CTA_TIMESTAMP_START, | ||
134 | CTA_TIMESTAMP_STOP, | ||
135 | __CTA_TIMESTAMP_MAX | ||
136 | }; | ||
137 | #define CTA_TIMESTAMP_MAX (__CTA_TIMESTAMP_MAX - 1) | ||
138 | |||
130 | enum ctattr_nat { | 139 | enum ctattr_nat { |
131 | CTA_NAT_UNSPEC, | 140 | CTA_NAT_UNSPEC, |
132 | CTA_NAT_MINIP, | 141 | CTA_NAT_MINIP, |
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 6712e713b299..37219525ff6f 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h | |||
@@ -611,8 +611,9 @@ struct _compat_xt_align { | |||
611 | extern void xt_compat_lock(u_int8_t af); | 611 | extern void xt_compat_lock(u_int8_t af); |
612 | extern void xt_compat_unlock(u_int8_t af); | 612 | extern void xt_compat_unlock(u_int8_t af); |
613 | 613 | ||
614 | extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta); | 614 | extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta); |
615 | extern void xt_compat_flush_offsets(u_int8_t af); | 615 | extern void xt_compat_flush_offsets(u_int8_t af); |
616 | extern void xt_compat_init_offsets(u_int8_t af, unsigned int number); | ||
616 | extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset); | 617 | extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset); |
617 | 618 | ||
618 | extern int xt_compat_match_offset(const struct xt_match *match); | 619 | extern int xt_compat_match_offset(const struct xt_match *match); |
diff --git a/include/linux/netfilter/xt_AUDIT.h b/include/linux/netfilter/xt_AUDIT.h new file mode 100644 index 000000000000..38751d2ea52b --- /dev/null +++ b/include/linux/netfilter/xt_AUDIT.h | |||
@@ -0,0 +1,30 @@ | |||
1 | /* | ||
2 | * Header file for iptables xt_AUDIT target | ||
3 | * | ||
4 | * (C) 2010-2011 Thomas Graf <tgraf@redhat.com> | ||
5 | * (C) 2010-2011 Red Hat, Inc. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #ifndef _XT_AUDIT_TARGET_H | ||
13 | #define _XT_AUDIT_TARGET_H | ||
14 | |||
15 | #include <linux/types.h> | ||
16 | |||
17 | enum { | ||
18 | XT_AUDIT_TYPE_ACCEPT = 0, | ||
19 | XT_AUDIT_TYPE_DROP, | ||
20 | XT_AUDIT_TYPE_REJECT, | ||
21 | __XT_AUDIT_TYPE_MAX, | ||
22 | }; | ||
23 | |||
24 | #define XT_AUDIT_TYPE_MAX (__XT_AUDIT_TYPE_MAX - 1) | ||
25 | |||
26 | struct xt_audit_info { | ||
27 | __u8 type; /* XT_AUDIT_TYPE_* */ | ||
28 | }; | ||
29 | |||
30 | #endif /* _XT_AUDIT_TARGET_H */ | ||
diff --git a/include/linux/netfilter/xt_CT.h b/include/linux/netfilter/xt_CT.h index 1b564106891d..fbf4c5658554 100644 --- a/include/linux/netfilter/xt_CT.h +++ b/include/linux/netfilter/xt_CT.h | |||
@@ -4,11 +4,11 @@ | |||
4 | #define XT_CT_NOTRACK 0x1 | 4 | #define XT_CT_NOTRACK 0x1 |
5 | 5 | ||
6 | struct xt_ct_target_info { | 6 | struct xt_ct_target_info { |
7 | u_int16_t flags; | 7 | __u16 flags; |
8 | u_int16_t zone; | 8 | __u16 zone; |
9 | u_int32_t ct_events; | 9 | __u32 ct_events; |
10 | u_int32_t exp_events; | 10 | __u32 exp_events; |
11 | char helper[16]; | 11 | char helper[16]; |
12 | 12 | ||
13 | /* Used internally by the kernel */ | 13 | /* Used internally by the kernel */ |
14 | struct nf_conn *ct __attribute__((aligned(8))); | 14 | struct nf_conn *ct __attribute__((aligned(8))); |
diff --git a/include/linux/netfilter/xt_NFQUEUE.h b/include/linux/netfilter/xt_NFQUEUE.h index 2584f4a777de..9eafdbbb401c 100644 --- a/include/linux/netfilter/xt_NFQUEUE.h +++ b/include/linux/netfilter/xt_NFQUEUE.h | |||
@@ -20,4 +20,10 @@ struct xt_NFQ_info_v1 { | |||
20 | __u16 queues_total; | 20 | __u16 queues_total; |
21 | }; | 21 | }; |
22 | 22 | ||
23 | struct xt_NFQ_info_v2 { | ||
24 | __u16 queuenum; | ||
25 | __u16 queues_total; | ||
26 | __u16 bypass; | ||
27 | }; | ||
28 | |||
23 | #endif /* _XT_NFQ_TARGET_H */ | 29 | #endif /* _XT_NFQ_TARGET_H */ |
diff --git a/include/linux/netfilter/xt_TCPOPTSTRIP.h b/include/linux/netfilter/xt_TCPOPTSTRIP.h index 2db543214ff5..342ef14b1761 100644 --- a/include/linux/netfilter/xt_TCPOPTSTRIP.h +++ b/include/linux/netfilter/xt_TCPOPTSTRIP.h | |||
@@ -7,7 +7,7 @@ | |||
7 | (((1U << (idx & 31)) & bmap[(idx) >> 5]) != 0) | 7 | (((1U << (idx & 31)) & bmap[(idx) >> 5]) != 0) |
8 | 8 | ||
9 | struct xt_tcpoptstrip_target_info { | 9 | struct xt_tcpoptstrip_target_info { |
10 | u_int32_t strip_bmap[8]; | 10 | __u32 strip_bmap[8]; |
11 | }; | 11 | }; |
12 | 12 | ||
13 | #endif /* _XT_TCPOPTSTRIP_H */ | 13 | #endif /* _XT_TCPOPTSTRIP_H */ |
diff --git a/include/linux/netfilter/xt_TPROXY.h b/include/linux/netfilter/xt_TPROXY.h index 3f3d69361289..8097e0b4c15e 100644 --- a/include/linux/netfilter/xt_TPROXY.h +++ b/include/linux/netfilter/xt_TPROXY.h | |||
@@ -5,15 +5,15 @@ | |||
5 | * redirection. We can get rid of that whenever we get support for | 5 | * redirection. We can get rid of that whenever we get support for |
6 | * mutliple targets in the same rule. */ | 6 | * mutliple targets in the same rule. */ |
7 | struct xt_tproxy_target_info { | 7 | struct xt_tproxy_target_info { |
8 | u_int32_t mark_mask; | 8 | __u32 mark_mask; |
9 | u_int32_t mark_value; | 9 | __u32 mark_value; |
10 | __be32 laddr; | 10 | __be32 laddr; |
11 | __be16 lport; | 11 | __be16 lport; |
12 | }; | 12 | }; |
13 | 13 | ||
14 | struct xt_tproxy_target_info_v1 { | 14 | struct xt_tproxy_target_info_v1 { |
15 | u_int32_t mark_mask; | 15 | __u32 mark_mask; |
16 | u_int32_t mark_value; | 16 | __u32 mark_value; |
17 | union nf_inet_addr laddr; | 17 | union nf_inet_addr laddr; |
18 | __be16 lport; | 18 | __be16 lport; |
19 | }; | 19 | }; |
diff --git a/include/linux/netfilter/xt_cluster.h b/include/linux/netfilter/xt_cluster.h index 886682656f09..66cfa3c782ac 100644 --- a/include/linux/netfilter/xt_cluster.h +++ b/include/linux/netfilter/xt_cluster.h | |||
@@ -6,10 +6,10 @@ enum xt_cluster_flags { | |||
6 | }; | 6 | }; |
7 | 7 | ||
8 | struct xt_cluster_match_info { | 8 | struct xt_cluster_match_info { |
9 | u_int32_t total_nodes; | 9 | __u32 total_nodes; |
10 | u_int32_t node_mask; | 10 | __u32 node_mask; |
11 | u_int32_t hash_seed; | 11 | __u32 hash_seed; |
12 | u_int32_t flags; | 12 | __u32 flags; |
13 | }; | 13 | }; |
14 | 14 | ||
15 | #define XT_CLUSTER_NODES_MAX 32 | 15 | #define XT_CLUSTER_NODES_MAX 32 |
diff --git a/include/linux/netfilter/xt_comment.h b/include/linux/netfilter/xt_comment.h index eacfedc6b5d0..0ea5e79f5bd7 100644 --- a/include/linux/netfilter/xt_comment.h +++ b/include/linux/netfilter/xt_comment.h | |||
@@ -4,7 +4,7 @@ | |||
4 | #define XT_MAX_COMMENT_LEN 256 | 4 | #define XT_MAX_COMMENT_LEN 256 |
5 | 5 | ||
6 | struct xt_comment_info { | 6 | struct xt_comment_info { |
7 | unsigned char comment[XT_MAX_COMMENT_LEN]; | 7 | char comment[XT_MAX_COMMENT_LEN]; |
8 | }; | 8 | }; |
9 | 9 | ||
10 | #endif /* XT_COMMENT_H */ | 10 | #endif /* XT_COMMENT_H */ |
diff --git a/include/linux/netfilter/xt_conntrack.h b/include/linux/netfilter/xt_conntrack.h index 54f47a2f6152..74b904d8f99c 100644 --- a/include/linux/netfilter/xt_conntrack.h +++ b/include/linux/netfilter/xt_conntrack.h | |||
@@ -58,4 +58,19 @@ struct xt_conntrack_mtinfo2 { | |||
58 | __u16 state_mask, status_mask; | 58 | __u16 state_mask, status_mask; |
59 | }; | 59 | }; |
60 | 60 | ||
61 | struct xt_conntrack_mtinfo3 { | ||
62 | union nf_inet_addr origsrc_addr, origsrc_mask; | ||
63 | union nf_inet_addr origdst_addr, origdst_mask; | ||
64 | union nf_inet_addr replsrc_addr, replsrc_mask; | ||
65 | union nf_inet_addr repldst_addr, repldst_mask; | ||
66 | __u32 expires_min, expires_max; | ||
67 | __u16 l4proto; | ||
68 | __u16 origsrc_port, origdst_port; | ||
69 | __u16 replsrc_port, repldst_port; | ||
70 | __u16 match_flags, invert_flags; | ||
71 | __u16 state_mask, status_mask; | ||
72 | __u16 origsrc_port_high, origdst_port_high; | ||
73 | __u16 replsrc_port_high, repldst_port_high; | ||
74 | }; | ||
75 | |||
61 | #endif /*_XT_CONNTRACK_H*/ | 76 | #endif /*_XT_CONNTRACK_H*/ |
diff --git a/include/linux/netfilter/xt_quota.h b/include/linux/netfilter/xt_quota.h index b0d28c659ab7..8bda65f0bc92 100644 --- a/include/linux/netfilter/xt_quota.h +++ b/include/linux/netfilter/xt_quota.h | |||
@@ -9,9 +9,9 @@ enum xt_quota_flags { | |||
9 | struct xt_quota_priv; | 9 | struct xt_quota_priv; |
10 | 10 | ||
11 | struct xt_quota_info { | 11 | struct xt_quota_info { |
12 | u_int32_t flags; | 12 | __u32 flags; |
13 | u_int32_t pad; | 13 | __u32 pad; |
14 | aligned_u64 quota; | 14 | aligned_u64 quota; |
15 | 15 | ||
16 | /* Used internally by the kernel */ | 16 | /* Used internally by the kernel */ |
17 | struct xt_quota_priv *master; | 17 | struct xt_quota_priv *master; |
diff --git a/include/linux/netfilter/xt_time.h b/include/linux/netfilter/xt_time.h index 14b6df412c9f..b8bd4568efdb 100644 --- a/include/linux/netfilter/xt_time.h +++ b/include/linux/netfilter/xt_time.h | |||
@@ -2,13 +2,13 @@ | |||
2 | #define _XT_TIME_H 1 | 2 | #define _XT_TIME_H 1 |
3 | 3 | ||
4 | struct xt_time_info { | 4 | struct xt_time_info { |
5 | u_int32_t date_start; | 5 | __u32 date_start; |
6 | u_int32_t date_stop; | 6 | __u32 date_stop; |
7 | u_int32_t daytime_start; | 7 | __u32 daytime_start; |
8 | u_int32_t daytime_stop; | 8 | __u32 daytime_stop; |
9 | u_int32_t monthdays_match; | 9 | __u32 monthdays_match; |
10 | u_int8_t weekdays_match; | 10 | __u8 weekdays_match; |
11 | u_int8_t flags; | 11 | __u8 flags; |
12 | }; | 12 | }; |
13 | 13 | ||
14 | enum { | 14 | enum { |
diff --git a/include/linux/netfilter/xt_u32.h b/include/linux/netfilter/xt_u32.h index 9947f56cdbdd..e8c3d8722bae 100644 --- a/include/linux/netfilter/xt_u32.h +++ b/include/linux/netfilter/xt_u32.h | |||
@@ -9,13 +9,13 @@ enum xt_u32_ops { | |||
9 | }; | 9 | }; |
10 | 10 | ||
11 | struct xt_u32_location_element { | 11 | struct xt_u32_location_element { |
12 | u_int32_t number; | 12 | __u32 number; |
13 | u_int8_t nextop; | 13 | __u8 nextop; |
14 | }; | 14 | }; |
15 | 15 | ||
16 | struct xt_u32_value_element { | 16 | struct xt_u32_value_element { |
17 | u_int32_t min; | 17 | __u32 min; |
18 | u_int32_t max; | 18 | __u32 max; |
19 | }; | 19 | }; |
20 | 20 | ||
21 | /* | 21 | /* |
@@ -27,14 +27,14 @@ struct xt_u32_value_element { | |||
27 | struct xt_u32_test { | 27 | struct xt_u32_test { |
28 | struct xt_u32_location_element location[XT_U32_MAXSIZE+1]; | 28 | struct xt_u32_location_element location[XT_U32_MAXSIZE+1]; |
29 | struct xt_u32_value_element value[XT_U32_MAXSIZE+1]; | 29 | struct xt_u32_value_element value[XT_U32_MAXSIZE+1]; |
30 | u_int8_t nnums; | 30 | __u8 nnums; |
31 | u_int8_t nvalues; | 31 | __u8 nvalues; |
32 | }; | 32 | }; |
33 | 33 | ||
34 | struct xt_u32 { | 34 | struct xt_u32 { |
35 | struct xt_u32_test tests[XT_U32_MAXSIZE+1]; | 35 | struct xt_u32_test tests[XT_U32_MAXSIZE+1]; |
36 | u_int8_t ntests; | 36 | __u8 ntests; |
37 | u_int8_t invert; | 37 | __u8 invert; |
38 | }; | 38 | }; |
39 | 39 | ||
40 | #endif /* _XT_U32_H */ | 40 | #endif /* _XT_U32_H */ |
diff --git a/include/linux/netfilter_bridge/ebt_802_3.h b/include/linux/netfilter_bridge/ebt_802_3.h index c73ef0b18bdc..c427764f4444 100644 --- a/include/linux/netfilter_bridge/ebt_802_3.h +++ b/include/linux/netfilter_bridge/ebt_802_3.h | |||
@@ -24,24 +24,24 @@ | |||
24 | 24 | ||
25 | /* ui has one byte ctrl, ni has two */ | 25 | /* ui has one byte ctrl, ni has two */ |
26 | struct hdr_ui { | 26 | struct hdr_ui { |
27 | uint8_t dsap; | 27 | __u8 dsap; |
28 | uint8_t ssap; | 28 | __u8 ssap; |
29 | uint8_t ctrl; | 29 | __u8 ctrl; |
30 | uint8_t orig[3]; | 30 | __u8 orig[3]; |
31 | __be16 type; | 31 | __be16 type; |
32 | }; | 32 | }; |
33 | 33 | ||
34 | struct hdr_ni { | 34 | struct hdr_ni { |
35 | uint8_t dsap; | 35 | __u8 dsap; |
36 | uint8_t ssap; | 36 | __u8 ssap; |
37 | __be16 ctrl; | 37 | __be16 ctrl; |
38 | uint8_t orig[3]; | 38 | __u8 orig[3]; |
39 | __be16 type; | 39 | __be16 type; |
40 | }; | 40 | }; |
41 | 41 | ||
42 | struct ebt_802_3_hdr { | 42 | struct ebt_802_3_hdr { |
43 | uint8_t daddr[6]; | 43 | __u8 daddr[6]; |
44 | uint8_t saddr[6]; | 44 | __u8 saddr[6]; |
45 | __be16 len; | 45 | __be16 len; |
46 | union { | 46 | union { |
47 | struct hdr_ui ui; | 47 | struct hdr_ui ui; |
@@ -59,10 +59,10 @@ static inline struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb) | |||
59 | #endif | 59 | #endif |
60 | 60 | ||
61 | struct ebt_802_3_info { | 61 | struct ebt_802_3_info { |
62 | uint8_t sap; | 62 | __u8 sap; |
63 | __be16 type; | 63 | __be16 type; |
64 | uint8_t bitmask; | 64 | __u8 bitmask; |
65 | uint8_t invflags; | 65 | __u8 invflags; |
66 | }; | 66 | }; |
67 | 67 | ||
68 | #endif | 68 | #endif |
diff --git a/include/linux/netfilter_bridge/ebt_among.h b/include/linux/netfilter_bridge/ebt_among.h index 0009558609a7..686c9619dbc0 100644 --- a/include/linux/netfilter_bridge/ebt_among.h +++ b/include/linux/netfilter_bridge/ebt_among.h | |||
@@ -30,7 +30,7 @@ | |||
30 | */ | 30 | */ |
31 | 31 | ||
32 | struct ebt_mac_wormhash_tuple { | 32 | struct ebt_mac_wormhash_tuple { |
33 | uint32_t cmp[2]; | 33 | __u32 cmp[2]; |
34 | __be32 ip; | 34 | __be32 ip; |
35 | }; | 35 | }; |
36 | 36 | ||
diff --git a/include/linux/netfilter_bridge/ebt_arp.h b/include/linux/netfilter_bridge/ebt_arp.h index cbf4843b6b0f..e62b5af95869 100644 --- a/include/linux/netfilter_bridge/ebt_arp.h +++ b/include/linux/netfilter_bridge/ebt_arp.h | |||
@@ -27,8 +27,8 @@ struct ebt_arp_info | |||
27 | unsigned char smmsk[ETH_ALEN]; | 27 | unsigned char smmsk[ETH_ALEN]; |
28 | unsigned char dmaddr[ETH_ALEN]; | 28 | unsigned char dmaddr[ETH_ALEN]; |
29 | unsigned char dmmsk[ETH_ALEN]; | 29 | unsigned char dmmsk[ETH_ALEN]; |
30 | uint8_t bitmask; | 30 | __u8 bitmask; |
31 | uint8_t invflags; | 31 | __u8 invflags; |
32 | }; | 32 | }; |
33 | 33 | ||
34 | #endif | 34 | #endif |
diff --git a/include/linux/netfilter_bridge/ebt_ip.h b/include/linux/netfilter_bridge/ebt_ip.h index 6a708fb92241..d99de58da2c7 100644 --- a/include/linux/netfilter_bridge/ebt_ip.h +++ b/include/linux/netfilter_bridge/ebt_ip.h | |||
@@ -31,12 +31,12 @@ struct ebt_ip_info { | |||
31 | __be32 daddr; | 31 | __be32 daddr; |
32 | __be32 smsk; | 32 | __be32 smsk; |
33 | __be32 dmsk; | 33 | __be32 dmsk; |
34 | uint8_t tos; | 34 | __u8 tos; |
35 | uint8_t protocol; | 35 | __u8 protocol; |
36 | uint8_t bitmask; | 36 | __u8 bitmask; |
37 | uint8_t invflags; | 37 | __u8 invflags; |
38 | uint16_t sport[2]; | 38 | __u16 sport[2]; |
39 | uint16_t dport[2]; | 39 | __u16 dport[2]; |
40 | }; | 40 | }; |
41 | 41 | ||
42 | #endif | 42 | #endif |
diff --git a/include/linux/netfilter_bridge/ebt_ip6.h b/include/linux/netfilter_bridge/ebt_ip6.h index e5de98701519..998e9d5a6b60 100644 --- a/include/linux/netfilter_bridge/ebt_ip6.h +++ b/include/linux/netfilter_bridge/ebt_ip6.h | |||
@@ -18,8 +18,11 @@ | |||
18 | #define EBT_IP6_PROTO 0x08 | 18 | #define EBT_IP6_PROTO 0x08 |
19 | #define EBT_IP6_SPORT 0x10 | 19 | #define EBT_IP6_SPORT 0x10 |
20 | #define EBT_IP6_DPORT 0x20 | 20 | #define EBT_IP6_DPORT 0x20 |
21 | #define EBT_IP6_ICMP6 0x40 | ||
22 | |||
21 | #define EBT_IP6_MASK (EBT_IP6_SOURCE | EBT_IP6_DEST | EBT_IP6_TCLASS |\ | 23 | #define EBT_IP6_MASK (EBT_IP6_SOURCE | EBT_IP6_DEST | EBT_IP6_TCLASS |\ |
22 | EBT_IP6_PROTO | EBT_IP6_SPORT | EBT_IP6_DPORT) | 24 | EBT_IP6_PROTO | EBT_IP6_SPORT | EBT_IP6_DPORT | \ |
25 | EBT_IP6_ICMP6) | ||
23 | #define EBT_IP6_MATCH "ip6" | 26 | #define EBT_IP6_MATCH "ip6" |
24 | 27 | ||
25 | /* the same values are used for the invflags */ | 28 | /* the same values are used for the invflags */ |
@@ -28,12 +31,18 @@ struct ebt_ip6_info { | |||
28 | struct in6_addr daddr; | 31 | struct in6_addr daddr; |
29 | struct in6_addr smsk; | 32 | struct in6_addr smsk; |
30 | struct in6_addr dmsk; | 33 | struct in6_addr dmsk; |
31 | uint8_t tclass; | 34 | __u8 tclass; |
32 | uint8_t protocol; | 35 | __u8 protocol; |
33 | uint8_t bitmask; | 36 | __u8 bitmask; |
34 | uint8_t invflags; | 37 | __u8 invflags; |
35 | uint16_t sport[2]; | 38 | union { |
36 | uint16_t dport[2]; | 39 | __u16 sport[2]; |
40 | __u8 icmpv6_type[2]; | ||
41 | }; | ||
42 | union { | ||
43 | __u16 dport[2]; | ||
44 | __u8 icmpv6_code[2]; | ||
45 | }; | ||
37 | }; | 46 | }; |
38 | 47 | ||
39 | #endif | 48 | #endif |
diff --git a/include/linux/netfilter_bridge/ebt_limit.h b/include/linux/netfilter_bridge/ebt_limit.h index 4bf76b751676..721d51ffa513 100644 --- a/include/linux/netfilter_bridge/ebt_limit.h +++ b/include/linux/netfilter_bridge/ebt_limit.h | |||
@@ -10,13 +10,13 @@ | |||
10 | seconds, or one every 59 hours. */ | 10 | seconds, or one every 59 hours. */ |
11 | 11 | ||
12 | struct ebt_limit_info { | 12 | struct ebt_limit_info { |
13 | u_int32_t avg; /* Average secs between packets * scale */ | 13 | __u32 avg; /* Average secs between packets * scale */ |
14 | u_int32_t burst; /* Period multiplier for upper limit. */ | 14 | __u32 burst; /* Period multiplier for upper limit. */ |
15 | 15 | ||
16 | /* Used internally by the kernel */ | 16 | /* Used internally by the kernel */ |
17 | unsigned long prev; | 17 | unsigned long prev; |
18 | u_int32_t credit; | 18 | __u32 credit; |
19 | u_int32_t credit_cap, cost; | 19 | __u32 credit_cap, cost; |
20 | }; | 20 | }; |
21 | 21 | ||
22 | #endif | 22 | #endif |
diff --git a/include/linux/netfilter_bridge/ebt_log.h b/include/linux/netfilter_bridge/ebt_log.h index cc2cdfb764bc..564beb4946ea 100644 --- a/include/linux/netfilter_bridge/ebt_log.h +++ b/include/linux/netfilter_bridge/ebt_log.h | |||
@@ -10,9 +10,9 @@ | |||
10 | #define EBT_LOG_WATCHER "log" | 10 | #define EBT_LOG_WATCHER "log" |
11 | 11 | ||
12 | struct ebt_log_info { | 12 | struct ebt_log_info { |
13 | uint8_t loglevel; | 13 | __u8 loglevel; |
14 | uint8_t prefix[EBT_LOG_PREFIX_SIZE]; | 14 | __u8 prefix[EBT_LOG_PREFIX_SIZE]; |
15 | uint32_t bitmask; | 15 | __u32 bitmask; |
16 | }; | 16 | }; |
17 | 17 | ||
18 | #endif | 18 | #endif |
diff --git a/include/linux/netfilter_bridge/ebt_mark_m.h b/include/linux/netfilter_bridge/ebt_mark_m.h index 9ceb10ec0ed6..97b96c4b8db4 100644 --- a/include/linux/netfilter_bridge/ebt_mark_m.h +++ b/include/linux/netfilter_bridge/ebt_mark_m.h | |||
@@ -6,8 +6,8 @@ | |||
6 | #define EBT_MARK_MASK (EBT_MARK_AND | EBT_MARK_OR) | 6 | #define EBT_MARK_MASK (EBT_MARK_AND | EBT_MARK_OR) |
7 | struct ebt_mark_m_info { | 7 | struct ebt_mark_m_info { |
8 | unsigned long mark, mask; | 8 | unsigned long mark, mask; |
9 | uint8_t invert; | 9 | __u8 invert; |
10 | uint8_t bitmask; | 10 | __u8 bitmask; |
11 | }; | 11 | }; |
12 | #define EBT_MARK_MATCH "mark_m" | 12 | #define EBT_MARK_MATCH "mark_m" |
13 | 13 | ||
diff --git a/include/linux/netfilter_bridge/ebt_nflog.h b/include/linux/netfilter_bridge/ebt_nflog.h index 052817849b83..477315bc3537 100644 --- a/include/linux/netfilter_bridge/ebt_nflog.h +++ b/include/linux/netfilter_bridge/ebt_nflog.h | |||
@@ -10,11 +10,11 @@ | |||
10 | #define EBT_NFLOG_DEFAULT_THRESHOLD 1 | 10 | #define EBT_NFLOG_DEFAULT_THRESHOLD 1 |
11 | 11 | ||
12 | struct ebt_nflog_info { | 12 | struct ebt_nflog_info { |
13 | u_int32_t len; | 13 | __u32 len; |
14 | u_int16_t group; | 14 | __u16 group; |
15 | u_int16_t threshold; | 15 | __u16 threshold; |
16 | u_int16_t flags; | 16 | __u16 flags; |
17 | u_int16_t pad; | 17 | __u16 pad; |
18 | char prefix[EBT_NFLOG_PREFIX_SIZE]; | 18 | char prefix[EBT_NFLOG_PREFIX_SIZE]; |
19 | }; | 19 | }; |
20 | 20 | ||
diff --git a/include/linux/netfilter_bridge/ebt_pkttype.h b/include/linux/netfilter_bridge/ebt_pkttype.h index 51a799840931..7c0fb0fdcf14 100644 --- a/include/linux/netfilter_bridge/ebt_pkttype.h +++ b/include/linux/netfilter_bridge/ebt_pkttype.h | |||
@@ -2,8 +2,8 @@ | |||
2 | #define __LINUX_BRIDGE_EBT_PKTTYPE_H | 2 | #define __LINUX_BRIDGE_EBT_PKTTYPE_H |
3 | 3 | ||
4 | struct ebt_pkttype_info { | 4 | struct ebt_pkttype_info { |
5 | uint8_t pkt_type; | 5 | __u8 pkt_type; |
6 | uint8_t invert; | 6 | __u8 invert; |
7 | }; | 7 | }; |
8 | #define EBT_PKTTYPE_MATCH "pkttype" | 8 | #define EBT_PKTTYPE_MATCH "pkttype" |
9 | 9 | ||
diff --git a/include/linux/netfilter_bridge/ebt_stp.h b/include/linux/netfilter_bridge/ebt_stp.h index e503a0aa2728..13a0bd49a92a 100644 --- a/include/linux/netfilter_bridge/ebt_stp.h +++ b/include/linux/netfilter_bridge/ebt_stp.h | |||
@@ -21,24 +21,24 @@ | |||
21 | #define EBT_STP_MATCH "stp" | 21 | #define EBT_STP_MATCH "stp" |
22 | 22 | ||
23 | struct ebt_stp_config_info { | 23 | struct ebt_stp_config_info { |
24 | uint8_t flags; | 24 | __u8 flags; |
25 | uint16_t root_priol, root_priou; | 25 | __u16 root_priol, root_priou; |
26 | char root_addr[6], root_addrmsk[6]; | 26 | char root_addr[6], root_addrmsk[6]; |
27 | uint32_t root_costl, root_costu; | 27 | __u32 root_costl, root_costu; |
28 | uint16_t sender_priol, sender_priou; | 28 | __u16 sender_priol, sender_priou; |
29 | char sender_addr[6], sender_addrmsk[6]; | 29 | char sender_addr[6], sender_addrmsk[6]; |
30 | uint16_t portl, portu; | 30 | __u16 portl, portu; |
31 | uint16_t msg_agel, msg_ageu; | 31 | __u16 msg_agel, msg_ageu; |
32 | uint16_t max_agel, max_ageu; | 32 | __u16 max_agel, max_ageu; |
33 | uint16_t hello_timel, hello_timeu; | 33 | __u16 hello_timel, hello_timeu; |
34 | uint16_t forward_delayl, forward_delayu; | 34 | __u16 forward_delayl, forward_delayu; |
35 | }; | 35 | }; |
36 | 36 | ||
37 | struct ebt_stp_info { | 37 | struct ebt_stp_info { |
38 | uint8_t type; | 38 | __u8 type; |
39 | struct ebt_stp_config_info config; | 39 | struct ebt_stp_config_info config; |
40 | uint16_t bitmask; | 40 | __u16 bitmask; |
41 | uint16_t invflags; | 41 | __u16 invflags; |
42 | }; | 42 | }; |
43 | 43 | ||
44 | #endif | 44 | #endif |
diff --git a/include/linux/netfilter_bridge/ebt_ulog.h b/include/linux/netfilter_bridge/ebt_ulog.h index b677e2671541..de35a51a7e46 100644 --- a/include/linux/netfilter_bridge/ebt_ulog.h +++ b/include/linux/netfilter_bridge/ebt_ulog.h | |||
@@ -10,7 +10,7 @@ | |||
10 | #define EBT_ULOG_VERSION 1 | 10 | #define EBT_ULOG_VERSION 1 |
11 | 11 | ||
12 | struct ebt_ulog_info { | 12 | struct ebt_ulog_info { |
13 | uint32_t nlgroup; | 13 | __u32 nlgroup; |
14 | unsigned int cprange; | 14 | unsigned int cprange; |
15 | unsigned int qthreshold; | 15 | unsigned int qthreshold; |
16 | char prefix[EBT_ULOG_PREFIX_LEN]; | 16 | char prefix[EBT_ULOG_PREFIX_LEN]; |
diff --git a/include/linux/netfilter_bridge/ebt_vlan.h b/include/linux/netfilter_bridge/ebt_vlan.h index 1d98be4031e7..48dffc1dad36 100644 --- a/include/linux/netfilter_bridge/ebt_vlan.h +++ b/include/linux/netfilter_bridge/ebt_vlan.h | |||
@@ -8,12 +8,12 @@ | |||
8 | #define EBT_VLAN_MATCH "vlan" | 8 | #define EBT_VLAN_MATCH "vlan" |
9 | 9 | ||
10 | struct ebt_vlan_info { | 10 | struct ebt_vlan_info { |
11 | uint16_t id; /* VLAN ID {1-4095} */ | 11 | __u16 id; /* VLAN ID {1-4095} */ |
12 | uint8_t prio; /* VLAN User Priority {0-7} */ | 12 | __u8 prio; /* VLAN User Priority {0-7} */ |
13 | __be16 encap; /* VLAN Encapsulated frame code {0-65535} */ | 13 | __be16 encap; /* VLAN Encapsulated frame code {0-65535} */ |
14 | uint8_t bitmask; /* Args bitmask bit 1=1 - ID arg, | 14 | __u8 bitmask; /* Args bitmask bit 1=1 - ID arg, |
15 | bit 2=1 User-Priority arg, bit 3=1 encap*/ | 15 | bit 2=1 User-Priority arg, bit 3=1 encap*/ |
16 | uint8_t invflags; /* Inverse bitmask bit 1=1 - inversed ID arg, | 16 | __u8 invflags; /* Inverse bitmask bit 1=1 - inversed ID arg, |
17 | bit 2=1 - inversed Pirority arg */ | 17 | bit 2=1 - inversed Pirority arg */ |
18 | }; | 18 | }; |
19 | 19 | ||
diff --git a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h index e5a3687c8a72..3114f06939ef 100644 --- a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h +++ b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h | |||
@@ -17,15 +17,15 @@ struct clusterip_config; | |||
17 | 17 | ||
18 | struct ipt_clusterip_tgt_info { | 18 | struct ipt_clusterip_tgt_info { |
19 | 19 | ||
20 | u_int32_t flags; | 20 | __u32 flags; |
21 | 21 | ||
22 | /* only relevant for new ones */ | 22 | /* only relevant for new ones */ |
23 | u_int8_t clustermac[6]; | 23 | __u8 clustermac[6]; |
24 | u_int16_t num_total_nodes; | 24 | __u16 num_total_nodes; |
25 | u_int16_t num_local_nodes; | 25 | __u16 num_local_nodes; |
26 | u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; | 26 | __u16 local_nodes[CLUSTERIP_MAX_NODES]; |
27 | u_int32_t hash_mode; | 27 | __u32 hash_mode; |
28 | u_int32_t hash_initval; | 28 | __u32 hash_initval; |
29 | 29 | ||
30 | /* Used internally by the kernel */ | 30 | /* Used internally by the kernel */ |
31 | struct clusterip_config *config; | 31 | struct clusterip_config *config; |
diff --git a/include/linux/netfilter_ipv4/ipt_ECN.h b/include/linux/netfilter_ipv4/ipt_ECN.h index 7ca45918ab8e..c6e3e01b75e0 100644 --- a/include/linux/netfilter_ipv4/ipt_ECN.h +++ b/include/linux/netfilter_ipv4/ipt_ECN.h | |||
@@ -19,11 +19,11 @@ | |||
19 | #define IPT_ECN_OP_MASK 0xce | 19 | #define IPT_ECN_OP_MASK 0xce |
20 | 20 | ||
21 | struct ipt_ECN_info { | 21 | struct ipt_ECN_info { |
22 | u_int8_t operation; /* bitset of operations */ | 22 | __u8 operation; /* bitset of operations */ |
23 | u_int8_t ip_ect; /* ECT codepoint of IPv4 header, pre-shifted */ | 23 | __u8 ip_ect; /* ECT codepoint of IPv4 header, pre-shifted */ |
24 | union { | 24 | union { |
25 | struct { | 25 | struct { |
26 | u_int8_t ece:1, cwr:1; /* TCP ECT bits */ | 26 | __u8 ece:1, cwr:1; /* TCP ECT bits */ |
27 | } tcp; | 27 | } tcp; |
28 | } proto; | 28 | } proto; |
29 | }; | 29 | }; |
diff --git a/include/linux/netfilter_ipv4/ipt_SAME.h b/include/linux/netfilter_ipv4/ipt_SAME.h index 2529660c5b38..fa0ebeca5d95 100644 --- a/include/linux/netfilter_ipv4/ipt_SAME.h +++ b/include/linux/netfilter_ipv4/ipt_SAME.h | |||
@@ -7,9 +7,9 @@ | |||
7 | 7 | ||
8 | struct ipt_same_info { | 8 | struct ipt_same_info { |
9 | unsigned char info; | 9 | unsigned char info; |
10 | u_int32_t rangesize; | 10 | __u32 rangesize; |
11 | u_int32_t ipnum; | 11 | __u32 ipnum; |
12 | u_int32_t *iparray; | 12 | __u32 *iparray; |
13 | 13 | ||
14 | /* hangs off end. */ | 14 | /* hangs off end. */ |
15 | struct nf_nat_range range[IPT_SAME_MAX_RANGE]; | 15 | struct nf_nat_range range[IPT_SAME_MAX_RANGE]; |
diff --git a/include/linux/netfilter_ipv4/ipt_TTL.h b/include/linux/netfilter_ipv4/ipt_TTL.h index ee6611edc112..f6250e422d5e 100644 --- a/include/linux/netfilter_ipv4/ipt_TTL.h +++ b/include/linux/netfilter_ipv4/ipt_TTL.h | |||
@@ -13,8 +13,8 @@ enum { | |||
13 | #define IPT_TTL_MAXMODE IPT_TTL_DEC | 13 | #define IPT_TTL_MAXMODE IPT_TTL_DEC |
14 | 14 | ||
15 | struct ipt_TTL_info { | 15 | struct ipt_TTL_info { |
16 | u_int8_t mode; | 16 | __u8 mode; |
17 | u_int8_t ttl; | 17 | __u8 ttl; |
18 | }; | 18 | }; |
19 | 19 | ||
20 | 20 | ||
diff --git a/include/linux/netfilter_ipv4/ipt_addrtype.h b/include/linux/netfilter_ipv4/ipt_addrtype.h index 446de6aef983..f29c3cfcc240 100644 --- a/include/linux/netfilter_ipv4/ipt_addrtype.h +++ b/include/linux/netfilter_ipv4/ipt_addrtype.h | |||
@@ -9,17 +9,17 @@ enum { | |||
9 | }; | 9 | }; |
10 | 10 | ||
11 | struct ipt_addrtype_info_v1 { | 11 | struct ipt_addrtype_info_v1 { |
12 | u_int16_t source; /* source-type mask */ | 12 | __u16 source; /* source-type mask */ |
13 | u_int16_t dest; /* dest-type mask */ | 13 | __u16 dest; /* dest-type mask */ |
14 | u_int32_t flags; | 14 | __u32 flags; |
15 | }; | 15 | }; |
16 | 16 | ||
17 | /* revision 0 */ | 17 | /* revision 0 */ |
18 | struct ipt_addrtype_info { | 18 | struct ipt_addrtype_info { |
19 | u_int16_t source; /* source-type mask */ | 19 | __u16 source; /* source-type mask */ |
20 | u_int16_t dest; /* dest-type mask */ | 20 | __u16 dest; /* dest-type mask */ |
21 | u_int32_t invert_source; | 21 | __u32 invert_source; |
22 | u_int32_t invert_dest; | 22 | __u32 invert_dest; |
23 | }; | 23 | }; |
24 | 24 | ||
25 | #endif | 25 | #endif |
diff --git a/include/linux/netfilter_ipv4/ipt_ah.h b/include/linux/netfilter_ipv4/ipt_ah.h index 2e555b4d05e3..8fea283ee62a 100644 --- a/include/linux/netfilter_ipv4/ipt_ah.h +++ b/include/linux/netfilter_ipv4/ipt_ah.h | |||
@@ -2,8 +2,8 @@ | |||
2 | #define _IPT_AH_H | 2 | #define _IPT_AH_H |
3 | 3 | ||
4 | struct ipt_ah { | 4 | struct ipt_ah { |
5 | u_int32_t spis[2]; /* Security Parameter Index */ | 5 | __u32 spis[2]; /* Security Parameter Index */ |
6 | u_int8_t invflags; /* Inverse flags */ | 6 | __u8 invflags; /* Inverse flags */ |
7 | }; | 7 | }; |
8 | 8 | ||
9 | 9 | ||
diff --git a/include/linux/netfilter_ipv4/ipt_ecn.h b/include/linux/netfilter_ipv4/ipt_ecn.h index 9945baa4ccd7..78b98aa8784d 100644 --- a/include/linux/netfilter_ipv4/ipt_ecn.h +++ b/include/linux/netfilter_ipv4/ipt_ecn.h | |||
@@ -20,12 +20,12 @@ | |||
20 | 20 | ||
21 | /* match info */ | 21 | /* match info */ |
22 | struct ipt_ecn_info { | 22 | struct ipt_ecn_info { |
23 | u_int8_t operation; | 23 | __u8 operation; |
24 | u_int8_t invert; | 24 | __u8 invert; |
25 | u_int8_t ip_ect; | 25 | __u8 ip_ect; |
26 | union { | 26 | union { |
27 | struct { | 27 | struct { |
28 | u_int8_t ect; | 28 | __u8 ect; |
29 | } tcp; | 29 | } tcp; |
30 | } proto; | 30 | } proto; |
31 | }; | 31 | }; |
diff --git a/include/linux/netfilter_ipv4/ipt_ttl.h b/include/linux/netfilter_ipv4/ipt_ttl.h index ee24fd86a3aa..93d9a06689a3 100644 --- a/include/linux/netfilter_ipv4/ipt_ttl.h +++ b/include/linux/netfilter_ipv4/ipt_ttl.h | |||
@@ -13,8 +13,8 @@ enum { | |||
13 | 13 | ||
14 | 14 | ||
15 | struct ipt_ttl_info { | 15 | struct ipt_ttl_info { |
16 | u_int8_t mode; | 16 | __u8 mode; |
17 | u_int8_t ttl; | 17 | __u8 ttl; |
18 | }; | 18 | }; |
19 | 19 | ||
20 | 20 | ||
diff --git a/include/linux/netfilter_ipv6/ip6t_HL.h b/include/linux/netfilter_ipv6/ip6t_HL.h index afb7813d45ab..81cdaf0480e3 100644 --- a/include/linux/netfilter_ipv6/ip6t_HL.h +++ b/include/linux/netfilter_ipv6/ip6t_HL.h | |||
@@ -14,8 +14,8 @@ enum { | |||
14 | #define IP6T_HL_MAXMODE IP6T_HL_DEC | 14 | #define IP6T_HL_MAXMODE IP6T_HL_DEC |
15 | 15 | ||
16 | struct ip6t_HL_info { | 16 | struct ip6t_HL_info { |
17 | u_int8_t mode; | 17 | __u8 mode; |
18 | u_int8_t hop_limit; | 18 | __u8 hop_limit; |
19 | }; | 19 | }; |
20 | 20 | ||
21 | 21 | ||
diff --git a/include/linux/netfilter_ipv6/ip6t_REJECT.h b/include/linux/netfilter_ipv6/ip6t_REJECT.h index 6be6504162bb..b999aa4e5969 100644 --- a/include/linux/netfilter_ipv6/ip6t_REJECT.h +++ b/include/linux/netfilter_ipv6/ip6t_REJECT.h | |||
@@ -12,7 +12,7 @@ enum ip6t_reject_with { | |||
12 | }; | 12 | }; |
13 | 13 | ||
14 | struct ip6t_reject_info { | 14 | struct ip6t_reject_info { |
15 | u_int32_t with; /* reject type */ | 15 | __u32 with; /* reject type */ |
16 | }; | 16 | }; |
17 | 17 | ||
18 | #endif /*_IP6T_REJECT_H*/ | 18 | #endif /*_IP6T_REJECT_H*/ |
diff --git a/include/linux/netfilter_ipv6/ip6t_ah.h b/include/linux/netfilter_ipv6/ip6t_ah.h index 17a745cfb2c7..a602c165edd1 100644 --- a/include/linux/netfilter_ipv6/ip6t_ah.h +++ b/include/linux/netfilter_ipv6/ip6t_ah.h | |||
@@ -2,10 +2,10 @@ | |||
2 | #define _IP6T_AH_H | 2 | #define _IP6T_AH_H |
3 | 3 | ||
4 | struct ip6t_ah { | 4 | struct ip6t_ah { |
5 | u_int32_t spis[2]; /* Security Parameter Index */ | 5 | __u32 spis[2]; /* Security Parameter Index */ |
6 | u_int32_t hdrlen; /* Header Length */ | 6 | __u32 hdrlen; /* Header Length */ |
7 | u_int8_t hdrres; /* Test of the Reserved Filed */ | 7 | __u8 hdrres; /* Test of the Reserved Filed */ |
8 | u_int8_t invflags; /* Inverse flags */ | 8 | __u8 invflags; /* Inverse flags */ |
9 | }; | 9 | }; |
10 | 10 | ||
11 | #define IP6T_AH_SPI 0x01 | 11 | #define IP6T_AH_SPI 0x01 |
diff --git a/include/linux/netfilter_ipv6/ip6t_frag.h b/include/linux/netfilter_ipv6/ip6t_frag.h index 3724d0850920..538b31ef5e3d 100644 --- a/include/linux/netfilter_ipv6/ip6t_frag.h +++ b/include/linux/netfilter_ipv6/ip6t_frag.h | |||
@@ -2,10 +2,10 @@ | |||
2 | #define _IP6T_FRAG_H | 2 | #define _IP6T_FRAG_H |
3 | 3 | ||
4 | struct ip6t_frag { | 4 | struct ip6t_frag { |
5 | u_int32_t ids[2]; /* Security Parameter Index */ | 5 | __u32 ids[2]; /* Security Parameter Index */ |
6 | u_int32_t hdrlen; /* Header Length */ | 6 | __u32 hdrlen; /* Header Length */ |
7 | u_int8_t flags; /* */ | 7 | __u8 flags; /* */ |
8 | u_int8_t invflags; /* Inverse flags */ | 8 | __u8 invflags; /* Inverse flags */ |
9 | }; | 9 | }; |
10 | 10 | ||
11 | #define IP6T_FRAG_IDS 0x01 | 11 | #define IP6T_FRAG_IDS 0x01 |
diff --git a/include/linux/netfilter_ipv6/ip6t_hl.h b/include/linux/netfilter_ipv6/ip6t_hl.h index 5ef91b8319a8..c6fddcb971da 100644 --- a/include/linux/netfilter_ipv6/ip6t_hl.h +++ b/include/linux/netfilter_ipv6/ip6t_hl.h | |||
@@ -14,8 +14,8 @@ enum { | |||
14 | 14 | ||
15 | 15 | ||
16 | struct ip6t_hl_info { | 16 | struct ip6t_hl_info { |
17 | u_int8_t mode; | 17 | __u8 mode; |
18 | u_int8_t hop_limit; | 18 | __u8 hop_limit; |
19 | }; | 19 | }; |
20 | 20 | ||
21 | 21 | ||
diff --git a/include/linux/netfilter_ipv6/ip6t_ipv6header.h b/include/linux/netfilter_ipv6/ip6t_ipv6header.h index 01dfd445596a..73d53bd3ff62 100644 --- a/include/linux/netfilter_ipv6/ip6t_ipv6header.h +++ b/include/linux/netfilter_ipv6/ip6t_ipv6header.h | |||
@@ -9,9 +9,9 @@ on whether they contain certain headers */ | |||
9 | #define __IPV6HEADER_H | 9 | #define __IPV6HEADER_H |
10 | 10 | ||
11 | struct ip6t_ipv6header_info { | 11 | struct ip6t_ipv6header_info { |
12 | u_int8_t matchflags; | 12 | __u8 matchflags; |
13 | u_int8_t invflags; | 13 | __u8 invflags; |
14 | u_int8_t modeflag; | 14 | __u8 modeflag; |
15 | }; | 15 | }; |
16 | 16 | ||
17 | #define MASK_HOPOPTS 128 | 17 | #define MASK_HOPOPTS 128 |
diff --git a/include/linux/netfilter_ipv6/ip6t_mh.h b/include/linux/netfilter_ipv6/ip6t_mh.h index 18549bca2d1f..98c8cf685eea 100644 --- a/include/linux/netfilter_ipv6/ip6t_mh.h +++ b/include/linux/netfilter_ipv6/ip6t_mh.h | |||
@@ -3,8 +3,8 @@ | |||
3 | 3 | ||
4 | /* MH matching stuff */ | 4 | /* MH matching stuff */ |
5 | struct ip6t_mh { | 5 | struct ip6t_mh { |
6 | u_int8_t types[2]; /* MH type range */ | 6 | __u8 types[2]; /* MH type range */ |
7 | u_int8_t invflags; /* Inverse flags */ | 7 | __u8 invflags; /* Inverse flags */ |
8 | }; | 8 | }; |
9 | 9 | ||
10 | /* Values for "invflags" field in struct ip6t_mh. */ | 10 | /* Values for "invflags" field in struct ip6t_mh. */ |
diff --git a/include/linux/netfilter_ipv6/ip6t_opts.h b/include/linux/netfilter_ipv6/ip6t_opts.h index 62d89bcd9f9c..405d309cd741 100644 --- a/include/linux/netfilter_ipv6/ip6t_opts.h +++ b/include/linux/netfilter_ipv6/ip6t_opts.h | |||
@@ -4,11 +4,11 @@ | |||
4 | #define IP6T_OPTS_OPTSNR 16 | 4 | #define IP6T_OPTS_OPTSNR 16 |
5 | 5 | ||
6 | struct ip6t_opts { | 6 | struct ip6t_opts { |
7 | u_int32_t hdrlen; /* Header Length */ | 7 | __u32 hdrlen; /* Header Length */ |
8 | u_int8_t flags; /* */ | 8 | __u8 flags; /* */ |
9 | u_int8_t invflags; /* Inverse flags */ | 9 | __u8 invflags; /* Inverse flags */ |
10 | u_int16_t opts[IP6T_OPTS_OPTSNR]; /* opts */ | 10 | __u16 opts[IP6T_OPTS_OPTSNR]; /* opts */ |
11 | u_int8_t optsnr; /* Nr of OPts */ | 11 | __u8 optsnr; /* Nr of OPts */ |
12 | }; | 12 | }; |
13 | 13 | ||
14 | #define IP6T_OPTS_LEN 0x01 | 14 | #define IP6T_OPTS_LEN 0x01 |
diff --git a/include/linux/netfilter_ipv6/ip6t_rt.h b/include/linux/netfilter_ipv6/ip6t_rt.h index ab91bfd2cd00..e8dad20acd37 100644 --- a/include/linux/netfilter_ipv6/ip6t_rt.h +++ b/include/linux/netfilter_ipv6/ip6t_rt.h | |||
@@ -6,13 +6,13 @@ | |||
6 | #define IP6T_RT_HOPS 16 | 6 | #define IP6T_RT_HOPS 16 |
7 | 7 | ||
8 | struct ip6t_rt { | 8 | struct ip6t_rt { |
9 | u_int32_t rt_type; /* Routing Type */ | 9 | __u32 rt_type; /* Routing Type */ |
10 | u_int32_t segsleft[2]; /* Segments Left */ | 10 | __u32 segsleft[2]; /* Segments Left */ |
11 | u_int32_t hdrlen; /* Header Length */ | 11 | __u32 hdrlen; /* Header Length */ |
12 | u_int8_t flags; /* */ | 12 | __u8 flags; /* */ |
13 | u_int8_t invflags; /* Inverse flags */ | 13 | __u8 invflags; /* Inverse flags */ |
14 | struct in6_addr addrs[IP6T_RT_HOPS]; /* Hops */ | 14 | struct in6_addr addrs[IP6T_RT_HOPS]; /* Hops */ |
15 | u_int8_t addrnr; /* Nr of Addresses */ | 15 | __u8 addrnr; /* Nr of Addresses */ |
16 | }; | 16 | }; |
17 | 17 | ||
18 | #define IP6T_RT_TYP 0x01 | 18 | #define IP6T_RT_TYP 0x01 |
diff --git a/include/net/dst.h b/include/net/dst.h index 93b0310317be..be5a0d4c491d 100644 --- a/include/net/dst.h +++ b/include/net/dst.h | |||
@@ -72,7 +72,7 @@ struct dst_entry { | |||
72 | 72 | ||
73 | u32 _metrics[RTAX_MAX]; | 73 | u32 _metrics[RTAX_MAX]; |
74 | 74 | ||
75 | #ifdef CONFIG_NET_CLS_ROUTE | 75 | #ifdef CONFIG_IP_ROUTE_CLASSID |
76 | __u32 tclassid; | 76 | __u32 tclassid; |
77 | #else | 77 | #else |
78 | __u32 __pad2; | 78 | __u32 __pad2; |
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 07bdb5e9e8ac..65d1fcdbc63b 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h | |||
@@ -55,7 +55,7 @@ struct fib_nh { | |||
55 | int nh_weight; | 55 | int nh_weight; |
56 | int nh_power; | 56 | int nh_power; |
57 | #endif | 57 | #endif |
58 | #ifdef CONFIG_NET_CLS_ROUTE | 58 | #ifdef CONFIG_IP_ROUTE_CLASSID |
59 | __u32 nh_tclassid; | 59 | __u32 nh_tclassid; |
60 | #endif | 60 | #endif |
61 | int nh_oif; | 61 | int nh_oif; |
@@ -201,7 +201,7 @@ static inline int fib_lookup(struct net *net, const struct flowi *flp, | |||
201 | extern int __net_init fib4_rules_init(struct net *net); | 201 | extern int __net_init fib4_rules_init(struct net *net); |
202 | extern void __net_exit fib4_rules_exit(struct net *net); | 202 | extern void __net_exit fib4_rules_exit(struct net *net); |
203 | 203 | ||
204 | #ifdef CONFIG_NET_CLS_ROUTE | 204 | #ifdef CONFIG_IP_ROUTE_CLASSID |
205 | extern u32 fib_rules_tclass(struct fib_result *res); | 205 | extern u32 fib_rules_tclass(struct fib_result *res); |
206 | #endif | 206 | #endif |
207 | 207 | ||
@@ -235,7 +235,7 @@ extern struct fib_table *fib_hash_table(u32 id); | |||
235 | 235 | ||
236 | static inline void fib_combine_itag(u32 *itag, struct fib_result *res) | 236 | static inline void fib_combine_itag(u32 *itag, struct fib_result *res) |
237 | { | 237 | { |
238 | #ifdef CONFIG_NET_CLS_ROUTE | 238 | #ifdef CONFIG_IP_ROUTE_CLASSID |
239 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 239 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
240 | u32 rtag; | 240 | u32 rtag; |
241 | #endif | 241 | #endif |
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index b7bbd6c28cfa..b23bea62f708 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h | |||
@@ -28,6 +28,80 @@ | |||
28 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | 28 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) |
29 | #include <net/netfilter/nf_conntrack.h> | 29 | #include <net/netfilter/nf_conntrack.h> |
30 | #endif | 30 | #endif |
31 | #include <net/net_namespace.h> /* Netw namespace */ | ||
32 | |||
33 | /* | ||
34 | * Generic access of ipvs struct | ||
35 | */ | ||
36 | static inline struct netns_ipvs *net_ipvs(struct net* net) | ||
37 | { | ||
38 | return net->ipvs; | ||
39 | } | ||
40 | /* | ||
41 | * Get net ptr from skb in traffic cases | ||
42 | * use skb_sknet when call is from userland (ioctl or netlink) | ||
43 | */ | ||
44 | static inline struct net *skb_net(const struct sk_buff *skb) | ||
45 | { | ||
46 | #ifdef CONFIG_NET_NS | ||
47 | #ifdef CONFIG_IP_VS_DEBUG | ||
48 | /* | ||
49 | * This is used for debug only. | ||
50 | * Start with the most likely hit | ||
51 | * End with BUG | ||
52 | */ | ||
53 | if (likely(skb->dev && skb->dev->nd_net)) | ||
54 | return dev_net(skb->dev); | ||
55 | if (skb_dst(skb)->dev) | ||
56 | return dev_net(skb_dst(skb)->dev); | ||
57 | WARN(skb->sk, "Maybe skb_sknet should be used in %s() at line:%d\n", | ||
58 | __func__, __LINE__); | ||
59 | if (likely(skb->sk && skb->sk->sk_net)) | ||
60 | return sock_net(skb->sk); | ||
61 | pr_err("There is no net ptr to find in the skb in %s() line:%d\n", | ||
62 | __func__, __LINE__); | ||
63 | BUG(); | ||
64 | #else | ||
65 | return dev_net(skb->dev ? : skb_dst(skb)->dev); | ||
66 | #endif | ||
67 | #else | ||
68 | return &init_net; | ||
69 | #endif | ||
70 | } | ||
71 | |||
72 | static inline struct net *skb_sknet(const struct sk_buff *skb) | ||
73 | { | ||
74 | #ifdef CONFIG_NET_NS | ||
75 | #ifdef CONFIG_IP_VS_DEBUG | ||
76 | /* Start with the most likely hit */ | ||
77 | if (likely(skb->sk && skb->sk->sk_net)) | ||
78 | return sock_net(skb->sk); | ||
79 | WARN(skb->dev, "Maybe skb_net should be used instead in %s() line:%d\n", | ||
80 | __func__, __LINE__); | ||
81 | if (likely(skb->dev && skb->dev->nd_net)) | ||
82 | return dev_net(skb->dev); | ||
83 | pr_err("There is no net ptr to find in the skb in %s() line:%d\n", | ||
84 | __func__, __LINE__); | ||
85 | BUG(); | ||
86 | #else | ||
87 | return sock_net(skb->sk); | ||
88 | #endif | ||
89 | #else | ||
90 | return &init_net; | ||
91 | #endif | ||
92 | } | ||
93 | /* | ||
94 | * This one needed for single_open_net since net is stored directly in | ||
95 | * private not as a struct i.e. seq_file_net cant be used. | ||
96 | */ | ||
97 | static inline struct net *seq_file_single_net(struct seq_file *seq) | ||
98 | { | ||
99 | #ifdef CONFIG_NET_NS | ||
100 | return (struct net *)seq->private; | ||
101 | #else | ||
102 | return &init_net; | ||
103 | #endif | ||
104 | } | ||
31 | 105 | ||
32 | /* Connections' size value needed by ip_vs_ctl.c */ | 106 | /* Connections' size value needed by ip_vs_ctl.c */ |
33 | extern int ip_vs_conn_tab_size; | 107 | extern int ip_vs_conn_tab_size; |
@@ -258,6 +332,23 @@ struct ip_vs_seq { | |||
258 | before last resized pkt */ | 332 | before last resized pkt */ |
259 | }; | 333 | }; |
260 | 334 | ||
335 | /* | ||
336 | * counters per cpu | ||
337 | */ | ||
338 | struct ip_vs_counters { | ||
339 | __u32 conns; /* connections scheduled */ | ||
340 | __u32 inpkts; /* incoming packets */ | ||
341 | __u32 outpkts; /* outgoing packets */ | ||
342 | __u64 inbytes; /* incoming bytes */ | ||
343 | __u64 outbytes; /* outgoing bytes */ | ||
344 | }; | ||
345 | /* | ||
346 | * Stats per cpu | ||
347 | */ | ||
348 | struct ip_vs_cpu_stats { | ||
349 | struct ip_vs_counters ustats; | ||
350 | struct u64_stats_sync syncp; | ||
351 | }; | ||
261 | 352 | ||
262 | /* | 353 | /* |
263 | * IPVS statistics objects | 354 | * IPVS statistics objects |
@@ -279,17 +370,34 @@ struct ip_vs_estimator { | |||
279 | }; | 370 | }; |
280 | 371 | ||
281 | struct ip_vs_stats { | 372 | struct ip_vs_stats { |
282 | struct ip_vs_stats_user ustats; /* statistics */ | 373 | struct ip_vs_stats_user ustats; /* statistics */ |
283 | struct ip_vs_estimator est; /* estimator */ | 374 | struct ip_vs_estimator est; /* estimator */ |
284 | 375 | struct ip_vs_cpu_stats *cpustats; /* per cpu counters */ | |
285 | spinlock_t lock; /* spin lock */ | 376 | spinlock_t lock; /* spin lock */ |
286 | }; | 377 | }; |
287 | 378 | ||
379 | /* | ||
380 | * Helper Macros for per cpu | ||
381 | * ipvs->tot_stats->ustats.count | ||
382 | */ | ||
383 | #define IPVS_STAT_INC(ipvs, count) \ | ||
384 | __this_cpu_inc((ipvs)->ustats->count) | ||
385 | |||
386 | #define IPVS_STAT_ADD(ipvs, count, value) \ | ||
387 | do {\ | ||
388 | write_seqcount_begin(per_cpu_ptr((ipvs)->ustats_seq, \ | ||
389 | raw_smp_processor_id())); \ | ||
390 | __this_cpu_add((ipvs)->ustats->count, value); \ | ||
391 | write_seqcount_end(per_cpu_ptr((ipvs)->ustats_seq, \ | ||
392 | raw_smp_processor_id())); \ | ||
393 | } while (0) | ||
394 | |||
288 | struct dst_entry; | 395 | struct dst_entry; |
289 | struct iphdr; | 396 | struct iphdr; |
290 | struct ip_vs_conn; | 397 | struct ip_vs_conn; |
291 | struct ip_vs_app; | 398 | struct ip_vs_app; |
292 | struct sk_buff; | 399 | struct sk_buff; |
400 | struct ip_vs_proto_data; | ||
293 | 401 | ||
294 | struct ip_vs_protocol { | 402 | struct ip_vs_protocol { |
295 | struct ip_vs_protocol *next; | 403 | struct ip_vs_protocol *next; |
@@ -297,21 +405,22 @@ struct ip_vs_protocol { | |||
297 | u16 protocol; | 405 | u16 protocol; |
298 | u16 num_states; | 406 | u16 num_states; |
299 | int dont_defrag; | 407 | int dont_defrag; |
300 | atomic_t appcnt; /* counter of proto app incs */ | ||
301 | int *timeout_table; /* protocol timeout table */ | ||
302 | 408 | ||
303 | void (*init)(struct ip_vs_protocol *pp); | 409 | void (*init)(struct ip_vs_protocol *pp); |
304 | 410 | ||
305 | void (*exit)(struct ip_vs_protocol *pp); | 411 | void (*exit)(struct ip_vs_protocol *pp); |
306 | 412 | ||
413 | void (*init_netns)(struct net *net, struct ip_vs_proto_data *pd); | ||
414 | |||
415 | void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd); | ||
416 | |||
307 | int (*conn_schedule)(int af, struct sk_buff *skb, | 417 | int (*conn_schedule)(int af, struct sk_buff *skb, |
308 | struct ip_vs_protocol *pp, | 418 | struct ip_vs_proto_data *pd, |
309 | int *verdict, struct ip_vs_conn **cpp); | 419 | int *verdict, struct ip_vs_conn **cpp); |
310 | 420 | ||
311 | struct ip_vs_conn * | 421 | struct ip_vs_conn * |
312 | (*conn_in_get)(int af, | 422 | (*conn_in_get)(int af, |
313 | const struct sk_buff *skb, | 423 | const struct sk_buff *skb, |
314 | struct ip_vs_protocol *pp, | ||
315 | const struct ip_vs_iphdr *iph, | 424 | const struct ip_vs_iphdr *iph, |
316 | unsigned int proto_off, | 425 | unsigned int proto_off, |
317 | int inverse); | 426 | int inverse); |
@@ -319,7 +428,6 @@ struct ip_vs_protocol { | |||
319 | struct ip_vs_conn * | 428 | struct ip_vs_conn * |
320 | (*conn_out_get)(int af, | 429 | (*conn_out_get)(int af, |
321 | const struct sk_buff *skb, | 430 | const struct sk_buff *skb, |
322 | struct ip_vs_protocol *pp, | ||
323 | const struct ip_vs_iphdr *iph, | 431 | const struct ip_vs_iphdr *iph, |
324 | unsigned int proto_off, | 432 | unsigned int proto_off, |
325 | int inverse); | 433 | int inverse); |
@@ -337,11 +445,11 @@ struct ip_vs_protocol { | |||
337 | 445 | ||
338 | int (*state_transition)(struct ip_vs_conn *cp, int direction, | 446 | int (*state_transition)(struct ip_vs_conn *cp, int direction, |
339 | const struct sk_buff *skb, | 447 | const struct sk_buff *skb, |
340 | struct ip_vs_protocol *pp); | 448 | struct ip_vs_proto_data *pd); |
341 | 449 | ||
342 | int (*register_app)(struct ip_vs_app *inc); | 450 | int (*register_app)(struct net *net, struct ip_vs_app *inc); |
343 | 451 | ||
344 | void (*unregister_app)(struct ip_vs_app *inc); | 452 | void (*unregister_app)(struct net *net, struct ip_vs_app *inc); |
345 | 453 | ||
346 | int (*app_conn_bind)(struct ip_vs_conn *cp); | 454 | int (*app_conn_bind)(struct ip_vs_conn *cp); |
347 | 455 | ||
@@ -350,14 +458,26 @@ struct ip_vs_protocol { | |||
350 | int offset, | 458 | int offset, |
351 | const char *msg); | 459 | const char *msg); |
352 | 460 | ||
353 | void (*timeout_change)(struct ip_vs_protocol *pp, int flags); | 461 | void (*timeout_change)(struct ip_vs_proto_data *pd, int flags); |
462 | }; | ||
354 | 463 | ||
355 | int (*set_state_timeout)(struct ip_vs_protocol *pp, char *sname, int to); | 464 | /* |
465 | * protocol data per netns | ||
466 | */ | ||
467 | struct ip_vs_proto_data { | ||
468 | struct ip_vs_proto_data *next; | ||
469 | struct ip_vs_protocol *pp; | ||
470 | int *timeout_table; /* protocol timeout table */ | ||
471 | atomic_t appcnt; /* counter of proto app incs. */ | ||
472 | struct tcp_states_t *tcp_state_table; | ||
356 | }; | 473 | }; |
357 | 474 | ||
358 | extern struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto); | 475 | extern struct ip_vs_protocol *ip_vs_proto_get(unsigned short proto); |
476 | extern struct ip_vs_proto_data *ip_vs_proto_data_get(struct net *net, | ||
477 | unsigned short proto); | ||
359 | 478 | ||
360 | struct ip_vs_conn_param { | 479 | struct ip_vs_conn_param { |
480 | struct net *net; | ||
361 | const union nf_inet_addr *caddr; | 481 | const union nf_inet_addr *caddr; |
362 | const union nf_inet_addr *vaddr; | 482 | const union nf_inet_addr *vaddr; |
363 | __be16 cport; | 483 | __be16 cport; |
@@ -375,16 +495,19 @@ struct ip_vs_conn_param { | |||
375 | */ | 495 | */ |
376 | struct ip_vs_conn { | 496 | struct ip_vs_conn { |
377 | struct list_head c_list; /* hashed list heads */ | 497 | struct list_head c_list; /* hashed list heads */ |
378 | 498 | #ifdef CONFIG_NET_NS | |
499 | struct net *net; /* Name space */ | ||
500 | #endif | ||
379 | /* Protocol, addresses and port numbers */ | 501 | /* Protocol, addresses and port numbers */ |
380 | u16 af; /* address family */ | 502 | u16 af; /* address family */ |
381 | union nf_inet_addr caddr; /* client address */ | 503 | __be16 cport; |
382 | union nf_inet_addr vaddr; /* virtual address */ | 504 | __be16 vport; |
383 | union nf_inet_addr daddr; /* destination address */ | 505 | __be16 dport; |
384 | volatile __u32 flags; /* status flags */ | 506 | __u32 fwmark; /* Fire wall mark from skb */ |
385 | __be16 cport; | 507 | union nf_inet_addr caddr; /* client address */ |
386 | __be16 vport; | 508 | union nf_inet_addr vaddr; /* virtual address */ |
387 | __be16 dport; | 509 | union nf_inet_addr daddr; /* destination address */ |
510 | volatile __u32 flags; /* status flags */ | ||
388 | __u16 protocol; /* Which protocol (TCP/UDP) */ | 511 | __u16 protocol; /* Which protocol (TCP/UDP) */ |
389 | 512 | ||
390 | /* counter and timer */ | 513 | /* counter and timer */ |
@@ -422,10 +545,38 @@ struct ip_vs_conn { | |||
422 | struct ip_vs_seq in_seq; /* incoming seq. struct */ | 545 | struct ip_vs_seq in_seq; /* incoming seq. struct */ |
423 | struct ip_vs_seq out_seq; /* outgoing seq. struct */ | 546 | struct ip_vs_seq out_seq; /* outgoing seq. struct */ |
424 | 547 | ||
548 | const struct ip_vs_pe *pe; | ||
425 | char *pe_data; | 549 | char *pe_data; |
426 | __u8 pe_data_len; | 550 | __u8 pe_data_len; |
427 | }; | 551 | }; |
428 | 552 | ||
553 | /* | ||
554 | * To save some memory in conn table when name space is disabled. | ||
555 | */ | ||
556 | static inline struct net *ip_vs_conn_net(const struct ip_vs_conn *cp) | ||
557 | { | ||
558 | #ifdef CONFIG_NET_NS | ||
559 | return cp->net; | ||
560 | #else | ||
561 | return &init_net; | ||
562 | #endif | ||
563 | } | ||
564 | static inline void ip_vs_conn_net_set(struct ip_vs_conn *cp, struct net *net) | ||
565 | { | ||
566 | #ifdef CONFIG_NET_NS | ||
567 | cp->net = net; | ||
568 | #endif | ||
569 | } | ||
570 | |||
571 | static inline int ip_vs_conn_net_eq(const struct ip_vs_conn *cp, | ||
572 | struct net *net) | ||
573 | { | ||
574 | #ifdef CONFIG_NET_NS | ||
575 | return cp->net == net; | ||
576 | #else | ||
577 | return 1; | ||
578 | #endif | ||
579 | } | ||
429 | 580 | ||
430 | /* | 581 | /* |
431 | * Extended internal versions of struct ip_vs_service_user and | 582 | * Extended internal versions of struct ip_vs_service_user and |
@@ -485,6 +636,7 @@ struct ip_vs_service { | |||
485 | unsigned flags; /* service status flags */ | 636 | unsigned flags; /* service status flags */ |
486 | unsigned timeout; /* persistent timeout in ticks */ | 637 | unsigned timeout; /* persistent timeout in ticks */ |
487 | __be32 netmask; /* grouping granularity */ | 638 | __be32 netmask; /* grouping granularity */ |
639 | struct net *net; | ||
488 | 640 | ||
489 | struct list_head destinations; /* real server d-linked list */ | 641 | struct list_head destinations; /* real server d-linked list */ |
490 | __u32 num_dests; /* number of servers */ | 642 | __u32 num_dests; /* number of servers */ |
@@ -510,8 +662,8 @@ struct ip_vs_dest { | |||
510 | struct list_head d_list; /* for table with all the dests */ | 662 | struct list_head d_list; /* for table with all the dests */ |
511 | 663 | ||
512 | u16 af; /* address family */ | 664 | u16 af; /* address family */ |
513 | union nf_inet_addr addr; /* IP address of the server */ | ||
514 | __be16 port; /* port number of the server */ | 665 | __be16 port; /* port number of the server */ |
666 | union nf_inet_addr addr; /* IP address of the server */ | ||
515 | volatile unsigned flags; /* dest status flags */ | 667 | volatile unsigned flags; /* dest status flags */ |
516 | atomic_t conn_flags; /* flags to copy to conn */ | 668 | atomic_t conn_flags; /* flags to copy to conn */ |
517 | atomic_t weight; /* server weight */ | 669 | atomic_t weight; /* server weight */ |
@@ -538,8 +690,8 @@ struct ip_vs_dest { | |||
538 | /* for virtual service */ | 690 | /* for virtual service */ |
539 | struct ip_vs_service *svc; /* service it belongs to */ | 691 | struct ip_vs_service *svc; /* service it belongs to */ |
540 | __u16 protocol; /* which protocol (TCP/UDP) */ | 692 | __u16 protocol; /* which protocol (TCP/UDP) */ |
541 | union nf_inet_addr vaddr; /* virtual IP address */ | ||
542 | __be16 vport; /* virtual port number */ | 693 | __be16 vport; /* virtual port number */ |
694 | union nf_inet_addr vaddr; /* virtual IP address */ | ||
543 | __u32 vfwmark; /* firewall mark of service */ | 695 | __u32 vfwmark; /* firewall mark of service */ |
544 | }; | 696 | }; |
545 | 697 | ||
@@ -674,13 +826,14 @@ enum { | |||
674 | IP_VS_DIR_LAST, | 826 | IP_VS_DIR_LAST, |
675 | }; | 827 | }; |
676 | 828 | ||
677 | static inline void ip_vs_conn_fill_param(int af, int protocol, | 829 | static inline void ip_vs_conn_fill_param(struct net *net, int af, int protocol, |
678 | const union nf_inet_addr *caddr, | 830 | const union nf_inet_addr *caddr, |
679 | __be16 cport, | 831 | __be16 cport, |
680 | const union nf_inet_addr *vaddr, | 832 | const union nf_inet_addr *vaddr, |
681 | __be16 vport, | 833 | __be16 vport, |
682 | struct ip_vs_conn_param *p) | 834 | struct ip_vs_conn_param *p) |
683 | { | 835 | { |
836 | p->net = net; | ||
684 | p->af = af; | 837 | p->af = af; |
685 | p->protocol = protocol; | 838 | p->protocol = protocol; |
686 | p->caddr = caddr; | 839 | p->caddr = caddr; |
@@ -695,7 +848,6 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p); | |||
695 | struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p); | 848 | struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p); |
696 | 849 | ||
697 | struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, | 850 | struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, |
698 | struct ip_vs_protocol *pp, | ||
699 | const struct ip_vs_iphdr *iph, | 851 | const struct ip_vs_iphdr *iph, |
700 | unsigned int proto_off, | 852 | unsigned int proto_off, |
701 | int inverse); | 853 | int inverse); |
@@ -703,7 +855,6 @@ struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, | |||
703 | struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p); | 855 | struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p); |
704 | 856 | ||
705 | struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, | 857 | struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, |
706 | struct ip_vs_protocol *pp, | ||
707 | const struct ip_vs_iphdr *iph, | 858 | const struct ip_vs_iphdr *iph, |
708 | unsigned int proto_off, | 859 | unsigned int proto_off, |
709 | int inverse); | 860 | int inverse); |
@@ -719,14 +870,14 @@ extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport); | |||
719 | struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, | 870 | struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, |
720 | const union nf_inet_addr *daddr, | 871 | const union nf_inet_addr *daddr, |
721 | __be16 dport, unsigned flags, | 872 | __be16 dport, unsigned flags, |
722 | struct ip_vs_dest *dest); | 873 | struct ip_vs_dest *dest, __u32 fwmark); |
723 | extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp); | 874 | extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp); |
724 | 875 | ||
725 | extern const char * ip_vs_state_name(__u16 proto, int state); | 876 | extern const char * ip_vs_state_name(__u16 proto, int state); |
726 | 877 | ||
727 | extern void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp); | 878 | extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp); |
728 | extern int ip_vs_check_template(struct ip_vs_conn *ct); | 879 | extern int ip_vs_check_template(struct ip_vs_conn *ct); |
729 | extern void ip_vs_random_dropentry(void); | 880 | extern void ip_vs_random_dropentry(struct net *net); |
730 | extern int ip_vs_conn_init(void); | 881 | extern int ip_vs_conn_init(void); |
731 | extern void ip_vs_conn_cleanup(void); | 882 | extern void ip_vs_conn_cleanup(void); |
732 | 883 | ||
@@ -796,12 +947,12 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp) | |||
796 | * (from ip_vs_app.c) | 947 | * (from ip_vs_app.c) |
797 | */ | 948 | */ |
798 | #define IP_VS_APP_MAX_PORTS 8 | 949 | #define IP_VS_APP_MAX_PORTS 8 |
799 | extern int register_ip_vs_app(struct ip_vs_app *app); | 950 | extern int register_ip_vs_app(struct net *net, struct ip_vs_app *app); |
800 | extern void unregister_ip_vs_app(struct ip_vs_app *app); | 951 | extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app); |
801 | extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp); | 952 | extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp); |
802 | extern void ip_vs_unbind_app(struct ip_vs_conn *cp); | 953 | extern void ip_vs_unbind_app(struct ip_vs_conn *cp); |
803 | extern int | 954 | extern int register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, |
804 | register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port); | 955 | __u16 proto, __u16 port); |
805 | extern int ip_vs_app_inc_get(struct ip_vs_app *inc); | 956 | extern int ip_vs_app_inc_get(struct ip_vs_app *inc); |
806 | extern void ip_vs_app_inc_put(struct ip_vs_app *inc); | 957 | extern void ip_vs_app_inc_put(struct ip_vs_app *inc); |
807 | 958 | ||
@@ -814,15 +965,27 @@ void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe); | |||
814 | void ip_vs_unbind_pe(struct ip_vs_service *svc); | 965 | void ip_vs_unbind_pe(struct ip_vs_service *svc); |
815 | int register_ip_vs_pe(struct ip_vs_pe *pe); | 966 | int register_ip_vs_pe(struct ip_vs_pe *pe); |
816 | int unregister_ip_vs_pe(struct ip_vs_pe *pe); | 967 | int unregister_ip_vs_pe(struct ip_vs_pe *pe); |
817 | extern struct ip_vs_pe *ip_vs_pe_get(const char *name); | 968 | struct ip_vs_pe *ip_vs_pe_getbyname(const char *name); |
818 | extern void ip_vs_pe_put(struct ip_vs_pe *pe); | 969 | struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name); |
970 | |||
971 | static inline void ip_vs_pe_get(const struct ip_vs_pe *pe) | ||
972 | { | ||
973 | if (pe && pe->module) | ||
974 | __module_get(pe->module); | ||
975 | } | ||
976 | |||
977 | static inline void ip_vs_pe_put(const struct ip_vs_pe *pe) | ||
978 | { | ||
979 | if (pe && pe->module) | ||
980 | module_put(pe->module); | ||
981 | } | ||
819 | 982 | ||
820 | /* | 983 | /* |
821 | * IPVS protocol functions (from ip_vs_proto.c) | 984 | * IPVS protocol functions (from ip_vs_proto.c) |
822 | */ | 985 | */ |
823 | extern int ip_vs_protocol_init(void); | 986 | extern int ip_vs_protocol_init(void); |
824 | extern void ip_vs_protocol_cleanup(void); | 987 | extern void ip_vs_protocol_cleanup(void); |
825 | extern void ip_vs_protocol_timeout_change(int flags); | 988 | extern void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags); |
826 | extern int *ip_vs_create_timeout_table(int *table, int size); | 989 | extern int *ip_vs_create_timeout_table(int *table, int size); |
827 | extern int | 990 | extern int |
828 | ip_vs_set_state_timeout(int *table, int num, const char *const *names, | 991 | ip_vs_set_state_timeout(int *table, int num, const char *const *names, |
@@ -852,26 +1015,21 @@ extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); | |||
852 | extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); | 1015 | extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); |
853 | extern struct ip_vs_conn * | 1016 | extern struct ip_vs_conn * |
854 | ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, | 1017 | ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, |
855 | struct ip_vs_protocol *pp, int *ignored); | 1018 | struct ip_vs_proto_data *pd, int *ignored); |
856 | extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, | 1019 | extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, |
857 | struct ip_vs_protocol *pp); | 1020 | struct ip_vs_proto_data *pd); |
858 | 1021 | ||
859 | 1022 | ||
860 | /* | 1023 | /* |
861 | * IPVS control data and functions (from ip_vs_ctl.c) | 1024 | * IPVS control data and functions (from ip_vs_ctl.c) |
862 | */ | 1025 | */ |
863 | extern int sysctl_ip_vs_cache_bypass; | ||
864 | extern int sysctl_ip_vs_expire_nodest_conn; | ||
865 | extern int sysctl_ip_vs_expire_quiescent_template; | ||
866 | extern int sysctl_ip_vs_sync_threshold[2]; | ||
867 | extern int sysctl_ip_vs_nat_icmp_send; | ||
868 | extern int sysctl_ip_vs_conntrack; | ||
869 | extern int sysctl_ip_vs_snat_reroute; | ||
870 | extern struct ip_vs_stats ip_vs_stats; | 1026 | extern struct ip_vs_stats ip_vs_stats; |
871 | extern const struct ctl_path net_vs_ctl_path[]; | 1027 | extern const struct ctl_path net_vs_ctl_path[]; |
1028 | extern int sysctl_ip_vs_sync_ver; | ||
872 | 1029 | ||
1030 | extern void ip_vs_sync_switch_mode(struct net *net, int mode); | ||
873 | extern struct ip_vs_service * | 1031 | extern struct ip_vs_service * |
874 | ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, | 1032 | ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, |
875 | const union nf_inet_addr *vaddr, __be16 vport); | 1033 | const union nf_inet_addr *vaddr, __be16 vport); |
876 | 1034 | ||
877 | static inline void ip_vs_service_put(struct ip_vs_service *svc) | 1035 | static inline void ip_vs_service_put(struct ip_vs_service *svc) |
@@ -880,7 +1038,7 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc) | |||
880 | } | 1038 | } |
881 | 1039 | ||
882 | extern struct ip_vs_dest * | 1040 | extern struct ip_vs_dest * |
883 | ip_vs_lookup_real_service(int af, __u16 protocol, | 1041 | ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, |
884 | const union nf_inet_addr *daddr, __be16 dport); | 1042 | const union nf_inet_addr *daddr, __be16 dport); |
885 | 1043 | ||
886 | extern int ip_vs_use_count_inc(void); | 1044 | extern int ip_vs_use_count_inc(void); |
@@ -888,8 +1046,9 @@ extern void ip_vs_use_count_dec(void); | |||
888 | extern int ip_vs_control_init(void); | 1046 | extern int ip_vs_control_init(void); |
889 | extern void ip_vs_control_cleanup(void); | 1047 | extern void ip_vs_control_cleanup(void); |
890 | extern struct ip_vs_dest * | 1048 | extern struct ip_vs_dest * |
891 | ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport, | 1049 | ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr, |
892 | const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol); | 1050 | __be16 dport, const union nf_inet_addr *vaddr, __be16 vport, |
1051 | __u16 protocol, __u32 fwmark); | ||
893 | extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); | 1052 | extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); |
894 | 1053 | ||
895 | 1054 | ||
@@ -897,14 +1056,12 @@ extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); | |||
897 | * IPVS sync daemon data and function prototypes | 1056 | * IPVS sync daemon data and function prototypes |
898 | * (from ip_vs_sync.c) | 1057 | * (from ip_vs_sync.c) |
899 | */ | 1058 | */ |
900 | extern volatile int ip_vs_sync_state; | 1059 | extern int start_sync_thread(struct net *net, int state, char *mcast_ifn, |
901 | extern volatile int ip_vs_master_syncid; | 1060 | __u8 syncid); |
902 | extern volatile int ip_vs_backup_syncid; | 1061 | extern int stop_sync_thread(struct net *net, int state); |
903 | extern char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; | 1062 | extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp); |
904 | extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; | 1063 | extern int ip_vs_sync_init(void); |
905 | extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid); | 1064 | extern void ip_vs_sync_cleanup(void); |
906 | extern int stop_sync_thread(int state); | ||
907 | extern void ip_vs_sync_conn(struct ip_vs_conn *cp); | ||
908 | 1065 | ||
909 | 1066 | ||
910 | /* | 1067 | /* |
@@ -912,8 +1069,8 @@ extern void ip_vs_sync_conn(struct ip_vs_conn *cp); | |||
912 | */ | 1069 | */ |
913 | extern int ip_vs_estimator_init(void); | 1070 | extern int ip_vs_estimator_init(void); |
914 | extern void ip_vs_estimator_cleanup(void); | 1071 | extern void ip_vs_estimator_cleanup(void); |
915 | extern void ip_vs_new_estimator(struct ip_vs_stats *stats); | 1072 | extern void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats); |
916 | extern void ip_vs_kill_estimator(struct ip_vs_stats *stats); | 1073 | extern void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats); |
917 | extern void ip_vs_zero_estimator(struct ip_vs_stats *stats); | 1074 | extern void ip_vs_zero_estimator(struct ip_vs_stats *stats); |
918 | 1075 | ||
919 | /* | 1076 | /* |
@@ -955,11 +1112,13 @@ extern int ip_vs_icmp_xmit_v6 | |||
955 | extern int ip_vs_drop_rate; | 1112 | extern int ip_vs_drop_rate; |
956 | extern int ip_vs_drop_counter; | 1113 | extern int ip_vs_drop_counter; |
957 | 1114 | ||
958 | static __inline__ int ip_vs_todrop(void) | 1115 | static inline int ip_vs_todrop(struct netns_ipvs *ipvs) |
959 | { | 1116 | { |
960 | if (!ip_vs_drop_rate) return 0; | 1117 | if (!ipvs->drop_rate) |
961 | if (--ip_vs_drop_counter > 0) return 0; | 1118 | return 0; |
962 | ip_vs_drop_counter = ip_vs_drop_rate; | 1119 | if (--ipvs->drop_counter > 0) |
1120 | return 0; | ||
1121 | ipvs->drop_counter = ipvs->drop_rate; | ||
963 | return 1; | 1122 | return 1; |
964 | } | 1123 | } |
965 | 1124 | ||
@@ -1047,9 +1206,9 @@ static inline void ip_vs_notrack(struct sk_buff *skb) | |||
1047 | * Netfilter connection tracking | 1206 | * Netfilter connection tracking |
1048 | * (from ip_vs_nfct.c) | 1207 | * (from ip_vs_nfct.c) |
1049 | */ | 1208 | */ |
1050 | static inline int ip_vs_conntrack_enabled(void) | 1209 | static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs) |
1051 | { | 1210 | { |
1052 | return sysctl_ip_vs_conntrack; | 1211 | return ipvs->sysctl_conntrack; |
1053 | } | 1212 | } |
1054 | 1213 | ||
1055 | extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, | 1214 | extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, |
@@ -1062,7 +1221,7 @@ extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp); | |||
1062 | 1221 | ||
1063 | #else | 1222 | #else |
1064 | 1223 | ||
1065 | static inline int ip_vs_conntrack_enabled(void) | 1224 | static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs) |
1066 | { | 1225 | { |
1067 | return 0; | 1226 | return 0; |
1068 | } | 1227 | } |
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 1bf812b21fb7..b3b4a34cb2cc 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <net/netns/conntrack.h> | 20 | #include <net/netns/conntrack.h> |
21 | #endif | 21 | #endif |
22 | #include <net/netns/xfrm.h> | 22 | #include <net/netns/xfrm.h> |
23 | #include <net/netns/ip_vs.h> | ||
23 | 24 | ||
24 | struct proc_dir_entry; | 25 | struct proc_dir_entry; |
25 | struct net_device; | 26 | struct net_device; |
@@ -94,6 +95,7 @@ struct net { | |||
94 | #ifdef CONFIG_XFRM | 95 | #ifdef CONFIG_XFRM |
95 | struct netns_xfrm xfrm; | 96 | struct netns_xfrm xfrm; |
96 | #endif | 97 | #endif |
98 | struct netns_ipvs *ipvs; | ||
97 | }; | 99 | }; |
98 | 100 | ||
99 | 101 | ||
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index d85cff10e169..d0d13378991e 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h | |||
@@ -50,11 +50,24 @@ union nf_conntrack_expect_proto { | |||
50 | /* per conntrack: application helper private data */ | 50 | /* per conntrack: application helper private data */ |
51 | union nf_conntrack_help { | 51 | union nf_conntrack_help { |
52 | /* insert conntrack helper private data (master) here */ | 52 | /* insert conntrack helper private data (master) here */ |
53 | #if defined(CONFIG_NF_CONNTRACK_FTP) || defined(CONFIG_NF_CONNTRACK_FTP_MODULE) | ||
53 | struct nf_ct_ftp_master ct_ftp_info; | 54 | struct nf_ct_ftp_master ct_ftp_info; |
55 | #endif | ||
56 | #if defined(CONFIG_NF_CONNTRACK_PPTP) || \ | ||
57 | defined(CONFIG_NF_CONNTRACK_PPTP_MODULE) | ||
54 | struct nf_ct_pptp_master ct_pptp_info; | 58 | struct nf_ct_pptp_master ct_pptp_info; |
59 | #endif | ||
60 | #if defined(CONFIG_NF_CONNTRACK_H323) || \ | ||
61 | defined(CONFIG_NF_CONNTRACK_H323_MODULE) | ||
55 | struct nf_ct_h323_master ct_h323_info; | 62 | struct nf_ct_h323_master ct_h323_info; |
63 | #endif | ||
64 | #if defined(CONFIG_NF_CONNTRACK_SANE) || \ | ||
65 | defined(CONFIG_NF_CONNTRACK_SANE_MODULE) | ||
56 | struct nf_ct_sane_master ct_sane_info; | 66 | struct nf_ct_sane_master ct_sane_info; |
67 | #endif | ||
68 | #if defined(CONFIG_NF_CONNTRACK_SIP) || defined(CONFIG_NF_CONNTRACK_SIP_MODULE) | ||
57 | struct nf_ct_sip_master ct_sip_info; | 69 | struct nf_ct_sip_master ct_sip_info; |
70 | #endif | ||
58 | }; | 71 | }; |
59 | 72 | ||
60 | #include <linux/types.h> | 73 | #include <linux/types.h> |
@@ -116,14 +129,14 @@ struct nf_conn { | |||
116 | u_int32_t secmark; | 129 | u_int32_t secmark; |
117 | #endif | 130 | #endif |
118 | 131 | ||
119 | /* Storage reserved for other modules: */ | ||
120 | union nf_conntrack_proto proto; | ||
121 | |||
122 | /* Extensions */ | 132 | /* Extensions */ |
123 | struct nf_ct_ext *ext; | 133 | struct nf_ct_ext *ext; |
124 | #ifdef CONFIG_NET_NS | 134 | #ifdef CONFIG_NET_NS |
125 | struct net *ct_net; | 135 | struct net *ct_net; |
126 | #endif | 136 | #endif |
137 | |||
138 | /* Storage reserved for other modules, must be the last member */ | ||
139 | union nf_conntrack_proto proto; | ||
127 | }; | 140 | }; |
128 | 141 | ||
129 | static inline struct nf_conn * | 142 | static inline struct nf_conn * |
@@ -189,9 +202,9 @@ extern void nf_ct_l3proto_module_put(unsigned short l3proto); | |||
189 | * Allocate a hashtable of hlist_head (if nulls == 0), | 202 | * Allocate a hashtable of hlist_head (if nulls == 0), |
190 | * or hlist_nulls_head (if nulls == 1) | 203 | * or hlist_nulls_head (if nulls == 1) |
191 | */ | 204 | */ |
192 | extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls); | 205 | extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls); |
193 | 206 | ||
194 | extern void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size); | 207 | extern void nf_ct_free_hashtable(void *hash, unsigned int size); |
195 | 208 | ||
196 | extern struct nf_conntrack_tuple_hash * | 209 | extern struct nf_conntrack_tuple_hash * |
197 | __nf_conntrack_find(struct net *net, u16 zone, | 210 | __nf_conntrack_find(struct net *net, u16 zone, |
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 96ba5f7dcab6..8fdb04b8cce0 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h | |||
@@ -23,12 +23,17 @@ struct nf_conntrack_ecache { | |||
23 | static inline struct nf_conntrack_ecache * | 23 | static inline struct nf_conntrack_ecache * |
24 | nf_ct_ecache_find(const struct nf_conn *ct) | 24 | nf_ct_ecache_find(const struct nf_conn *ct) |
25 | { | 25 | { |
26 | #ifdef CONFIG_NF_CONNTRACK_EVENTS | ||
26 | return nf_ct_ext_find(ct, NF_CT_EXT_ECACHE); | 27 | return nf_ct_ext_find(ct, NF_CT_EXT_ECACHE); |
28 | #else | ||
29 | return NULL; | ||
30 | #endif | ||
27 | } | 31 | } |
28 | 32 | ||
29 | static inline struct nf_conntrack_ecache * | 33 | static inline struct nf_conntrack_ecache * |
30 | nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) | 34 | nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) |
31 | { | 35 | { |
36 | #ifdef CONFIG_NF_CONNTRACK_EVENTS | ||
32 | struct net *net = nf_ct_net(ct); | 37 | struct net *net = nf_ct_net(ct); |
33 | struct nf_conntrack_ecache *e; | 38 | struct nf_conntrack_ecache *e; |
34 | 39 | ||
@@ -45,6 +50,9 @@ nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) | |||
45 | e->expmask = expmask; | 50 | e->expmask = expmask; |
46 | } | 51 | } |
47 | return e; | 52 | return e; |
53 | #else | ||
54 | return NULL; | ||
55 | #endif | ||
48 | }; | 56 | }; |
49 | 57 | ||
50 | #ifdef CONFIG_NF_CONNTRACK_EVENTS | 58 | #ifdef CONFIG_NF_CONNTRACK_EVENTS |
@@ -59,7 +67,7 @@ struct nf_ct_event_notifier { | |||
59 | int (*fcn)(unsigned int events, struct nf_ct_event *item); | 67 | int (*fcn)(unsigned int events, struct nf_ct_event *item); |
60 | }; | 68 | }; |
61 | 69 | ||
62 | extern struct nf_ct_event_notifier *nf_conntrack_event_cb; | 70 | extern struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; |
63 | extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb); | 71 | extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb); |
64 | extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb); | 72 | extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb); |
65 | 73 | ||
@@ -159,7 +167,7 @@ struct nf_exp_event_notifier { | |||
159 | int (*fcn)(unsigned int events, struct nf_exp_event *item); | 167 | int (*fcn)(unsigned int events, struct nf_exp_event *item); |
160 | }; | 168 | }; |
161 | 169 | ||
162 | extern struct nf_exp_event_notifier *nf_expect_event_cb; | 170 | extern struct nf_exp_event_notifier __rcu *nf_expect_event_cb; |
163 | extern int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *nb); | 171 | extern int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *nb); |
164 | extern void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *nb); | 172 | extern void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *nb); |
165 | 173 | ||
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 0772d296dfdb..2dcf31703acb 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h | |||
@@ -7,10 +7,19 @@ | |||
7 | 7 | ||
8 | enum nf_ct_ext_id { | 8 | enum nf_ct_ext_id { |
9 | NF_CT_EXT_HELPER, | 9 | NF_CT_EXT_HELPER, |
10 | #if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE) | ||
10 | NF_CT_EXT_NAT, | 11 | NF_CT_EXT_NAT, |
12 | #endif | ||
11 | NF_CT_EXT_ACCT, | 13 | NF_CT_EXT_ACCT, |
14 | #ifdef CONFIG_NF_CONNTRACK_EVENTS | ||
12 | NF_CT_EXT_ECACHE, | 15 | NF_CT_EXT_ECACHE, |
16 | #endif | ||
17 | #ifdef CONFIG_NF_CONNTRACK_ZONES | ||
13 | NF_CT_EXT_ZONE, | 18 | NF_CT_EXT_ZONE, |
19 | #endif | ||
20 | #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP | ||
21 | NF_CT_EXT_TSTAMP, | ||
22 | #endif | ||
14 | NF_CT_EXT_NUM, | 23 | NF_CT_EXT_NUM, |
15 | }; | 24 | }; |
16 | 25 | ||
@@ -19,6 +28,7 @@ enum nf_ct_ext_id { | |||
19 | #define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter | 28 | #define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter |
20 | #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache | 29 | #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache |
21 | #define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone | 30 | #define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone |
31 | #define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp | ||
22 | 32 | ||
23 | /* Extensions: optional stuff which isn't permanently in struct. */ | 33 | /* Extensions: optional stuff which isn't permanently in struct. */ |
24 | struct nf_ct_ext { | 34 | struct nf_ct_ext { |
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 32c305dbdab6..f1c1311adc2c 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h | |||
@@ -63,4 +63,10 @@ static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct) | |||
63 | extern int nf_conntrack_helper_init(void); | 63 | extern int nf_conntrack_helper_init(void); |
64 | extern void nf_conntrack_helper_fini(void); | 64 | extern void nf_conntrack_helper_fini(void); |
65 | 65 | ||
66 | extern int nf_conntrack_broadcast_help(struct sk_buff *skb, | ||
67 | unsigned int protoff, | ||
68 | struct nf_conn *ct, | ||
69 | enum ip_conntrack_info ctinfo, | ||
70 | unsigned int timeout); | ||
71 | |||
66 | #endif /*_NF_CONNTRACK_HELPER_H*/ | 72 | #endif /*_NF_CONNTRACK_HELPER_H*/ |
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index a7547611e8f1..e8010f445ae1 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h | |||
@@ -73,7 +73,7 @@ struct nf_conntrack_l3proto { | |||
73 | struct module *me; | 73 | struct module *me; |
74 | }; | 74 | }; |
75 | 75 | ||
76 | extern struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX]; | 76 | extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX]; |
77 | 77 | ||
78 | /* Protocol registration. */ | 78 | /* Protocol registration. */ |
79 | extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto); | 79 | extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto); |
diff --git a/include/net/netfilter/nf_conntrack_timestamp.h b/include/net/netfilter/nf_conntrack_timestamp.h new file mode 100644 index 000000000000..f17dcb664e29 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_timestamp.h | |||
@@ -0,0 +1,53 @@ | |||
1 | #ifndef _NF_CONNTRACK_TSTAMP_H | ||
2 | #define _NF_CONNTRACK_TSTAMP_H | ||
3 | |||
4 | #include <net/net_namespace.h> | ||
5 | #include <linux/netfilter/nf_conntrack_common.h> | ||
6 | #include <linux/netfilter/nf_conntrack_tuple_common.h> | ||
7 | #include <net/netfilter/nf_conntrack.h> | ||
8 | #include <net/netfilter/nf_conntrack_extend.h> | ||
9 | |||
10 | struct nf_conn_tstamp { | ||
11 | u_int64_t start; | ||
12 | u_int64_t stop; | ||
13 | }; | ||
14 | |||
15 | static inline | ||
16 | struct nf_conn_tstamp *nf_conn_tstamp_find(const struct nf_conn *ct) | ||
17 | { | ||
18 | #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP | ||
19 | return nf_ct_ext_find(ct, NF_CT_EXT_TSTAMP); | ||
20 | #else | ||
21 | return NULL; | ||
22 | #endif | ||
23 | } | ||
24 | |||
25 | static inline | ||
26 | struct nf_conn_tstamp *nf_ct_tstamp_ext_add(struct nf_conn *ct, gfp_t gfp) | ||
27 | { | ||
28 | #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP | ||
29 | struct net *net = nf_ct_net(ct); | ||
30 | |||
31 | if (!net->ct.sysctl_tstamp) | ||
32 | return NULL; | ||
33 | |||
34 | return nf_ct_ext_add(ct, NF_CT_EXT_TSTAMP, gfp); | ||
35 | #else | ||
36 | return NULL; | ||
37 | #endif | ||
38 | }; | ||
39 | |||
40 | static inline bool nf_ct_tstamp_enabled(struct net *net) | ||
41 | { | ||
42 | return net->ct.sysctl_tstamp != 0; | ||
43 | } | ||
44 | |||
45 | static inline void nf_ct_set_tstamp(struct net *net, bool enable) | ||
46 | { | ||
47 | net->ct.sysctl_tstamp = enable; | ||
48 | } | ||
49 | |||
50 | extern int nf_conntrack_tstamp_init(struct net *net); | ||
51 | extern void nf_conntrack_tstamp_fini(struct net *net); | ||
52 | |||
53 | #endif /* _NF_CONNTRACK_TSTAMP_H */ | ||
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index f5f09f032a90..aff80b190c12 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h | |||
@@ -56,7 +56,9 @@ struct nf_nat_multi_range_compat { | |||
56 | /* per conntrack: nat application helper private data */ | 56 | /* per conntrack: nat application helper private data */ |
57 | union nf_conntrack_nat_help { | 57 | union nf_conntrack_nat_help { |
58 | /* insert nat helper private data here */ | 58 | /* insert nat helper private data here */ |
59 | #if defined(CONFIG_NF_NAT_PPTP) || defined(CONFIG_NF_NAT_PPTP_MODULE) | ||
59 | struct nf_nat_pptp nat_pptp_info; | 60 | struct nf_nat_pptp nat_pptp_info; |
61 | #endif | ||
60 | }; | 62 | }; |
61 | 63 | ||
62 | struct nf_conn; | 64 | struct nf_conn; |
@@ -84,7 +86,11 @@ extern int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, | |||
84 | 86 | ||
85 | static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct) | 87 | static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct) |
86 | { | 88 | { |
89 | #if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE) | ||
87 | return nf_ct_ext_find(ct, NF_CT_EXT_NAT); | 90 | return nf_ct_ext_find(ct, NF_CT_EXT_NAT); |
91 | #else | ||
92 | return NULL; | ||
93 | #endif | ||
88 | } | 94 | } |
89 | 95 | ||
90 | #else /* !__KERNEL__: iptables wants this to compile. */ | 96 | #else /* !__KERNEL__: iptables wants this to compile. */ |
diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h index 33602ab66190..3dc7b98effeb 100644 --- a/include/net/netfilter/nf_nat_core.h +++ b/include/net/netfilter/nf_nat_core.h | |||
@@ -21,9 +21,9 @@ static inline int nf_nat_initialized(struct nf_conn *ct, | |||
21 | enum nf_nat_manip_type manip) | 21 | enum nf_nat_manip_type manip) |
22 | { | 22 | { |
23 | if (manip == IP_NAT_MANIP_SRC) | 23 | if (manip == IP_NAT_MANIP_SRC) |
24 | return test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); | 24 | return ct->status & IPS_SRC_NAT_DONE; |
25 | else | 25 | else |
26 | return test_bit(IPS_DST_NAT_DONE_BIT, &ct->status); | 26 | return ct->status & IPS_DST_NAT_DONE; |
27 | } | 27 | } |
28 | 28 | ||
29 | struct nlattr; | 29 | struct nlattr; |
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index d4958d4c6574..341eb089349e 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h | |||
@@ -21,15 +21,15 @@ struct netns_ct { | |||
21 | int sysctl_events; | 21 | int sysctl_events; |
22 | unsigned int sysctl_events_retry_timeout; | 22 | unsigned int sysctl_events_retry_timeout; |
23 | int sysctl_acct; | 23 | int sysctl_acct; |
24 | int sysctl_tstamp; | ||
24 | int sysctl_checksum; | 25 | int sysctl_checksum; |
25 | unsigned int sysctl_log_invalid; /* Log invalid packets */ | 26 | unsigned int sysctl_log_invalid; /* Log invalid packets */ |
26 | #ifdef CONFIG_SYSCTL | 27 | #ifdef CONFIG_SYSCTL |
27 | struct ctl_table_header *sysctl_header; | 28 | struct ctl_table_header *sysctl_header; |
28 | struct ctl_table_header *acct_sysctl_header; | 29 | struct ctl_table_header *acct_sysctl_header; |
30 | struct ctl_table_header *tstamp_sysctl_header; | ||
29 | struct ctl_table_header *event_sysctl_header; | 31 | struct ctl_table_header *event_sysctl_header; |
30 | #endif | 32 | #endif |
31 | int hash_vmalloc; | ||
32 | int expect_vmalloc; | ||
33 | char *slabname; | 33 | char *slabname; |
34 | }; | 34 | }; |
35 | #endif | 35 | #endif |
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h new file mode 100644 index 000000000000..259ebac904bf --- /dev/null +++ b/include/net/netns/ip_vs.h | |||
@@ -0,0 +1,143 @@ | |||
1 | /* | ||
2 | * IP Virtual Server | ||
3 | * Data structure for network namspace | ||
4 | * | ||
5 | */ | ||
6 | |||
7 | #ifndef IP_VS_H_ | ||
8 | #define IP_VS_H_ | ||
9 | |||
10 | #include <linux/list.h> | ||
11 | #include <linux/mutex.h> | ||
12 | #include <linux/list_nulls.h> | ||
13 | #include <linux/ip_vs.h> | ||
14 | #include <asm/atomic.h> | ||
15 | #include <linux/in.h> | ||
16 | |||
17 | struct ip_vs_stats; | ||
18 | struct ip_vs_sync_buff; | ||
19 | struct ctl_table_header; | ||
20 | |||
21 | struct netns_ipvs { | ||
22 | int gen; /* Generation */ | ||
23 | /* | ||
24 | * Hash table: for real service lookups | ||
25 | */ | ||
26 | #define IP_VS_RTAB_BITS 4 | ||
27 | #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS) | ||
28 | #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) | ||
29 | |||
30 | struct list_head rs_table[IP_VS_RTAB_SIZE]; | ||
31 | /* ip_vs_app */ | ||
32 | struct list_head app_list; | ||
33 | struct mutex app_mutex; | ||
34 | struct lock_class_key app_key; /* mutex debuging */ | ||
35 | |||
36 | /* ip_vs_proto */ | ||
37 | #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */ | ||
38 | struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE]; | ||
39 | /* ip_vs_proto_tcp */ | ||
40 | #ifdef CONFIG_IP_VS_PROTO_TCP | ||
41 | #define TCP_APP_TAB_BITS 4 | ||
42 | #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS) | ||
43 | #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1) | ||
44 | struct list_head tcp_apps[TCP_APP_TAB_SIZE]; | ||
45 | spinlock_t tcp_app_lock; | ||
46 | #endif | ||
47 | /* ip_vs_proto_udp */ | ||
48 | #ifdef CONFIG_IP_VS_PROTO_UDP | ||
49 | #define UDP_APP_TAB_BITS 4 | ||
50 | #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS) | ||
51 | #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1) | ||
52 | struct list_head udp_apps[UDP_APP_TAB_SIZE]; | ||
53 | spinlock_t udp_app_lock; | ||
54 | #endif | ||
55 | /* ip_vs_proto_sctp */ | ||
56 | #ifdef CONFIG_IP_VS_PROTO_SCTP | ||
57 | #define SCTP_APP_TAB_BITS 4 | ||
58 | #define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS) | ||
59 | #define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1) | ||
60 | /* Hash table for SCTP application incarnations */ | ||
61 | struct list_head sctp_apps[SCTP_APP_TAB_SIZE]; | ||
62 | spinlock_t sctp_app_lock; | ||
63 | #endif | ||
64 | /* ip_vs_conn */ | ||
65 | atomic_t conn_count; /* connection counter */ | ||
66 | |||
67 | /* ip_vs_ctl */ | ||
68 | struct ip_vs_stats *tot_stats; /* Statistics & est. */ | ||
69 | struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */ | ||
70 | seqcount_t *ustats_seq; /* u64 read retry */ | ||
71 | |||
72 | int num_services; /* no of virtual services */ | ||
73 | /* 1/rate drop and drop-entry variables */ | ||
74 | struct delayed_work defense_work; /* Work handler */ | ||
75 | int drop_rate; | ||
76 | int drop_counter; | ||
77 | atomic_t dropentry; | ||
78 | /* locks in ctl.c */ | ||
79 | spinlock_t dropentry_lock; /* drop entry handling */ | ||
80 | spinlock_t droppacket_lock; /* drop packet handling */ | ||
81 | spinlock_t securetcp_lock; /* state and timeout tables */ | ||
82 | rwlock_t rs_lock; /* real services table */ | ||
83 | /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ | ||
84 | struct lock_class_key ctl_key; /* ctl_mutex debuging */ | ||
85 | /* Trash for destinations */ | ||
86 | struct list_head dest_trash; | ||
87 | /* Service counters */ | ||
88 | atomic_t ftpsvc_counter; | ||
89 | atomic_t nullsvc_counter; | ||
90 | |||
91 | /* sys-ctl struct */ | ||
92 | struct ctl_table_header *sysctl_hdr; | ||
93 | struct ctl_table *sysctl_tbl; | ||
94 | /* sysctl variables */ | ||
95 | int sysctl_amemthresh; | ||
96 | int sysctl_am_droprate; | ||
97 | int sysctl_drop_entry; | ||
98 | int sysctl_drop_packet; | ||
99 | int sysctl_secure_tcp; | ||
100 | #ifdef CONFIG_IP_VS_NFCT | ||
101 | int sysctl_conntrack; | ||
102 | #endif | ||
103 | int sysctl_snat_reroute; | ||
104 | int sysctl_sync_ver; | ||
105 | int sysctl_cache_bypass; | ||
106 | int sysctl_expire_nodest_conn; | ||
107 | int sysctl_expire_quiescent_template; | ||
108 | int sysctl_sync_threshold[2]; | ||
109 | int sysctl_nat_icmp_send; | ||
110 | |||
111 | /* ip_vs_lblc */ | ||
112 | int sysctl_lblc_expiration; | ||
113 | struct ctl_table_header *lblc_ctl_header; | ||
114 | struct ctl_table *lblc_ctl_table; | ||
115 | /* ip_vs_lblcr */ | ||
116 | int sysctl_lblcr_expiration; | ||
117 | struct ctl_table_header *lblcr_ctl_header; | ||
118 | struct ctl_table *lblcr_ctl_table; | ||
119 | /* ip_vs_est */ | ||
120 | struct list_head est_list; /* estimator list */ | ||
121 | spinlock_t est_lock; | ||
122 | struct timer_list est_timer; /* Estimation timer */ | ||
123 | /* ip_vs_sync */ | ||
124 | struct list_head sync_queue; | ||
125 | spinlock_t sync_lock; | ||
126 | struct ip_vs_sync_buff *sync_buff; | ||
127 | spinlock_t sync_buff_lock; | ||
128 | struct sockaddr_in sync_mcast_addr; | ||
129 | struct task_struct *master_thread; | ||
130 | struct task_struct *backup_thread; | ||
131 | int send_mesg_maxlen; | ||
132 | int recv_mesg_maxlen; | ||
133 | volatile int sync_state; | ||
134 | volatile int master_syncid; | ||
135 | volatile int backup_syncid; | ||
136 | /* multicast interface name */ | ||
137 | char master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; | ||
138 | char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; | ||
139 | /* net name space ptr */ | ||
140 | struct net *net; /* Needed by timer routines */ | ||
141 | }; | ||
142 | |||
143 | #endif /* IP_VS_H_ */ | ||
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index d68c3f121774..e2e2ef57eca2 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h | |||
@@ -43,7 +43,6 @@ struct netns_ipv4 { | |||
43 | struct xt_table *nat_table; | 43 | struct xt_table *nat_table; |
44 | struct hlist_head *nat_bysource; | 44 | struct hlist_head *nat_bysource; |
45 | unsigned int nat_htable_size; | 45 | unsigned int nat_htable_size; |
46 | int nat_vmalloced; | ||
47 | #endif | 46 | #endif |
48 | 47 | ||
49 | int sysctl_icmp_echo_ignore_all; | 48 | int sysctl_icmp_echo_ignore_all; |
diff --git a/kernel/audit.c b/kernel/audit.c index e4956244ae50..162e88e33bc9 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -74,6 +74,8 @@ static int audit_initialized; | |||
74 | int audit_enabled; | 74 | int audit_enabled; |
75 | int audit_ever_enabled; | 75 | int audit_ever_enabled; |
76 | 76 | ||
77 | EXPORT_SYMBOL_GPL(audit_enabled); | ||
78 | |||
77 | /* Default state when kernel boots without any parameters. */ | 79 | /* Default state when kernel boots without any parameters. */ |
78 | static int audit_default; | 80 | static int audit_default; |
79 | 81 | ||
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c index 50a46afc2bcc..2ed0056a39a8 100644 --- a/net/bridge/netfilter/ebt_ip6.c +++ b/net/bridge/netfilter/ebt_ip6.c | |||
@@ -22,9 +22,15 @@ | |||
22 | #include <linux/netfilter_bridge/ebtables.h> | 22 | #include <linux/netfilter_bridge/ebtables.h> |
23 | #include <linux/netfilter_bridge/ebt_ip6.h> | 23 | #include <linux/netfilter_bridge/ebt_ip6.h> |
24 | 24 | ||
25 | struct tcpudphdr { | 25 | union pkthdr { |
26 | __be16 src; | 26 | struct { |
27 | __be16 dst; | 27 | __be16 src; |
28 | __be16 dst; | ||
29 | } tcpudphdr; | ||
30 | struct { | ||
31 | u8 type; | ||
32 | u8 code; | ||
33 | } icmphdr; | ||
28 | }; | 34 | }; |
29 | 35 | ||
30 | static bool | 36 | static bool |
@@ -33,8 +39,8 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) | |||
33 | const struct ebt_ip6_info *info = par->matchinfo; | 39 | const struct ebt_ip6_info *info = par->matchinfo; |
34 | const struct ipv6hdr *ih6; | 40 | const struct ipv6hdr *ih6; |
35 | struct ipv6hdr _ip6h; | 41 | struct ipv6hdr _ip6h; |
36 | const struct tcpudphdr *pptr; | 42 | const union pkthdr *pptr; |
37 | struct tcpudphdr _ports; | 43 | union pkthdr _pkthdr; |
38 | 44 | ||
39 | ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h); | 45 | ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h); |
40 | if (ih6 == NULL) | 46 | if (ih6 == NULL) |
@@ -56,26 +62,34 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) | |||
56 | return false; | 62 | return false; |
57 | if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO)) | 63 | if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO)) |
58 | return false; | 64 | return false; |
59 | if (!(info->bitmask & EBT_IP6_DPORT) && | 65 | if (!(info->bitmask & ( EBT_IP6_DPORT | |
60 | !(info->bitmask & EBT_IP6_SPORT)) | 66 | EBT_IP6_SPORT | EBT_IP6_ICMP6))) |
61 | return true; | 67 | return true; |
62 | pptr = skb_header_pointer(skb, offset_ph, sizeof(_ports), | 68 | |
63 | &_ports); | 69 | /* min icmpv6 headersize is 4, so sizeof(_pkthdr) is ok. */ |
70 | pptr = skb_header_pointer(skb, offset_ph, sizeof(_pkthdr), | ||
71 | &_pkthdr); | ||
64 | if (pptr == NULL) | 72 | if (pptr == NULL) |
65 | return false; | 73 | return false; |
66 | if (info->bitmask & EBT_IP6_DPORT) { | 74 | if (info->bitmask & EBT_IP6_DPORT) { |
67 | u32 dst = ntohs(pptr->dst); | 75 | u16 dst = ntohs(pptr->tcpudphdr.dst); |
68 | if (FWINV(dst < info->dport[0] || | 76 | if (FWINV(dst < info->dport[0] || |
69 | dst > info->dport[1], EBT_IP6_DPORT)) | 77 | dst > info->dport[1], EBT_IP6_DPORT)) |
70 | return false; | 78 | return false; |
71 | } | 79 | } |
72 | if (info->bitmask & EBT_IP6_SPORT) { | 80 | if (info->bitmask & EBT_IP6_SPORT) { |
73 | u32 src = ntohs(pptr->src); | 81 | u16 src = ntohs(pptr->tcpudphdr.src); |
74 | if (FWINV(src < info->sport[0] || | 82 | if (FWINV(src < info->sport[0] || |
75 | src > info->sport[1], EBT_IP6_SPORT)) | 83 | src > info->sport[1], EBT_IP6_SPORT)) |
76 | return false; | 84 | return false; |
77 | } | 85 | } |
78 | return true; | 86 | if ((info->bitmask & EBT_IP6_ICMP6) && |
87 | FWINV(pptr->icmphdr.type < info->icmpv6_type[0] || | ||
88 | pptr->icmphdr.type > info->icmpv6_type[1] || | ||
89 | pptr->icmphdr.code < info->icmpv6_code[0] || | ||
90 | pptr->icmphdr.code > info->icmpv6_code[1], | ||
91 | EBT_IP6_ICMP6)) | ||
92 | return false; | ||
79 | } | 93 | } |
80 | return true; | 94 | return true; |
81 | } | 95 | } |
@@ -103,6 +117,14 @@ static int ebt_ip6_mt_check(const struct xt_mtchk_param *par) | |||
103 | return -EINVAL; | 117 | return -EINVAL; |
104 | if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1]) | 118 | if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1]) |
105 | return -EINVAL; | 119 | return -EINVAL; |
120 | if (info->bitmask & EBT_IP6_ICMP6) { | ||
121 | if ((info->invflags & EBT_IP6_PROTO) || | ||
122 | info->protocol != IPPROTO_ICMPV6) | ||
123 | return -EINVAL; | ||
124 | if (info->icmpv6_type[0] > info->icmpv6_type[1] || | ||
125 | info->icmpv6_code[0] > info->icmpv6_code[1]) | ||
126 | return -EINVAL; | ||
127 | } | ||
106 | return 0; | 128 | return 0; |
107 | } | 129 | } |
108 | 130 | ||
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 16df0532d4b9..5f1825df9dca 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c | |||
@@ -1764,6 +1764,7 @@ static int compat_table_info(const struct ebt_table_info *info, | |||
1764 | 1764 | ||
1765 | newinfo->entries_size = size; | 1765 | newinfo->entries_size = size; |
1766 | 1766 | ||
1767 | xt_compat_init_offsets(AF_INET, info->nentries); | ||
1767 | return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, | 1768 | return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, |
1768 | entries, newinfo); | 1769 | entries, newinfo); |
1769 | } | 1770 | } |
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index a5a1050595d1..8949a05ac307 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
@@ -140,6 +140,9 @@ config IP_ROUTE_VERBOSE | |||
140 | handled by the klogd daemon which is responsible for kernel messages | 140 | handled by the klogd daemon which is responsible for kernel messages |
141 | ("man klogd"). | 141 | ("man klogd"). |
142 | 142 | ||
143 | config IP_ROUTE_CLASSID | ||
144 | bool | ||
145 | |||
143 | config IP_PNP | 146 | config IP_PNP |
144 | bool "IP: kernel level autoconfiguration" | 147 | bool "IP: kernel level autoconfiguration" |
145 | help | 148 | help |
@@ -657,4 +660,3 @@ config TCP_MD5SIG | |||
657 | on the Internet. | 660 | on the Internet. |
658 | 661 | ||
659 | If unsure, say N. | 662 | If unsure, say N. |
660 | |||
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 7981a24f5c7b..9cefe72029cf 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c | |||
@@ -41,12 +41,12 @@ struct fib4_rule { | |||
41 | __be32 srcmask; | 41 | __be32 srcmask; |
42 | __be32 dst; | 42 | __be32 dst; |
43 | __be32 dstmask; | 43 | __be32 dstmask; |
44 | #ifdef CONFIG_NET_CLS_ROUTE | 44 | #ifdef CONFIG_IP_ROUTE_CLASSID |
45 | u32 tclassid; | 45 | u32 tclassid; |
46 | #endif | 46 | #endif |
47 | }; | 47 | }; |
48 | 48 | ||
49 | #ifdef CONFIG_NET_CLS_ROUTE | 49 | #ifdef CONFIG_IP_ROUTE_CLASSID |
50 | u32 fib_rules_tclass(struct fib_result *res) | 50 | u32 fib_rules_tclass(struct fib_result *res) |
51 | { | 51 | { |
52 | return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; | 52 | return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; |
@@ -165,7 +165,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, | |||
165 | if (frh->dst_len) | 165 | if (frh->dst_len) |
166 | rule4->dst = nla_get_be32(tb[FRA_DST]); | 166 | rule4->dst = nla_get_be32(tb[FRA_DST]); |
167 | 167 | ||
168 | #ifdef CONFIG_NET_CLS_ROUTE | 168 | #ifdef CONFIG_IP_ROUTE_CLASSID |
169 | if (tb[FRA_FLOW]) | 169 | if (tb[FRA_FLOW]) |
170 | rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); | 170 | rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); |
171 | #endif | 171 | #endif |
@@ -195,7 +195,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, | |||
195 | if (frh->tos && (rule4->tos != frh->tos)) | 195 | if (frh->tos && (rule4->tos != frh->tos)) |
196 | return 0; | 196 | return 0; |
197 | 197 | ||
198 | #ifdef CONFIG_NET_CLS_ROUTE | 198 | #ifdef CONFIG_IP_ROUTE_CLASSID |
199 | if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) | 199 | if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) |
200 | return 0; | 200 | return 0; |
201 | #endif | 201 | #endif |
@@ -224,7 +224,7 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, | |||
224 | if (rule4->src_len) | 224 | if (rule4->src_len) |
225 | NLA_PUT_BE32(skb, FRA_SRC, rule4->src); | 225 | NLA_PUT_BE32(skb, FRA_SRC, rule4->src); |
226 | 226 | ||
227 | #ifdef CONFIG_NET_CLS_ROUTE | 227 | #ifdef CONFIG_IP_ROUTE_CLASSID |
228 | if (rule4->tclassid) | 228 | if (rule4->tclassid) |
229 | NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); | 229 | NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); |
230 | #endif | 230 | #endif |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 12d3dc3df1b7..9aff11d7278f 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -200,7 +200,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) | |||
200 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 200 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
201 | nh->nh_weight != onh->nh_weight || | 201 | nh->nh_weight != onh->nh_weight || |
202 | #endif | 202 | #endif |
203 | #ifdef CONFIG_NET_CLS_ROUTE | 203 | #ifdef CONFIG_IP_ROUTE_CLASSID |
204 | nh->nh_tclassid != onh->nh_tclassid || | 204 | nh->nh_tclassid != onh->nh_tclassid || |
205 | #endif | 205 | #endif |
206 | ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD)) | 206 | ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD)) |
@@ -422,7 +422,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, | |||
422 | 422 | ||
423 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); | 423 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); |
424 | nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0; | 424 | nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0; |
425 | #ifdef CONFIG_NET_CLS_ROUTE | 425 | #ifdef CONFIG_IP_ROUTE_CLASSID |
426 | nla = nla_find(attrs, attrlen, RTA_FLOW); | 426 | nla = nla_find(attrs, attrlen, RTA_FLOW); |
427 | nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; | 427 | nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; |
428 | #endif | 428 | #endif |
@@ -476,7 +476,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) | |||
476 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); | 476 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); |
477 | if (nla && nla_get_be32(nla) != nh->nh_gw) | 477 | if (nla && nla_get_be32(nla) != nh->nh_gw) |
478 | return 1; | 478 | return 1; |
479 | #ifdef CONFIG_NET_CLS_ROUTE | 479 | #ifdef CONFIG_IP_ROUTE_CLASSID |
480 | nla = nla_find(attrs, attrlen, RTA_FLOW); | 480 | nla = nla_find(attrs, attrlen, RTA_FLOW); |
481 | if (nla && nla_get_u32(nla) != nh->nh_tclassid) | 481 | if (nla && nla_get_u32(nla) != nh->nh_tclassid) |
482 | return 1; | 482 | return 1; |
@@ -779,7 +779,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
779 | goto err_inval; | 779 | goto err_inval; |
780 | if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) | 780 | if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) |
781 | goto err_inval; | 781 | goto err_inval; |
782 | #ifdef CONFIG_NET_CLS_ROUTE | 782 | #ifdef CONFIG_IP_ROUTE_CLASSID |
783 | if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) | 783 | if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) |
784 | goto err_inval; | 784 | goto err_inval; |
785 | #endif | 785 | #endif |
@@ -792,7 +792,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
792 | nh->nh_oif = cfg->fc_oif; | 792 | nh->nh_oif = cfg->fc_oif; |
793 | nh->nh_gw = cfg->fc_gw; | 793 | nh->nh_gw = cfg->fc_gw; |
794 | nh->nh_flags = cfg->fc_flags; | 794 | nh->nh_flags = cfg->fc_flags; |
795 | #ifdef CONFIG_NET_CLS_ROUTE | 795 | #ifdef CONFIG_IP_ROUTE_CLASSID |
796 | nh->nh_tclassid = cfg->fc_flow; | 796 | nh->nh_tclassid = cfg->fc_flow; |
797 | #endif | 797 | #endif |
798 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 798 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
@@ -1002,7 +1002,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | |||
1002 | 1002 | ||
1003 | if (fi->fib_nh->nh_oif) | 1003 | if (fi->fib_nh->nh_oif) |
1004 | NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); | 1004 | NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); |
1005 | #ifdef CONFIG_NET_CLS_ROUTE | 1005 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1006 | if (fi->fib_nh[0].nh_tclassid) | 1006 | if (fi->fib_nh[0].nh_tclassid) |
1007 | NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); | 1007 | NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); |
1008 | #endif | 1008 | #endif |
@@ -1027,7 +1027,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | |||
1027 | 1027 | ||
1028 | if (nh->nh_gw) | 1028 | if (nh->nh_gw) |
1029 | NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); | 1029 | NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); |
1030 | #ifdef CONFIG_NET_CLS_ROUTE | 1030 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1031 | if (nh->nh_tclassid) | 1031 | if (nh->nh_tclassid) |
1032 | NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); | 1032 | NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); |
1033 | #endif | 1033 | #endif |
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index d859bcc26cb7..d7b2b0987a3b 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
@@ -340,7 +340,7 @@ static int ip_rcv_finish(struct sk_buff *skb) | |||
340 | } | 340 | } |
341 | } | 341 | } |
342 | 342 | ||
343 | #ifdef CONFIG_NET_CLS_ROUTE | 343 | #ifdef CONFIG_IP_ROUTE_CLASSID |
344 | if (unlikely(skb_dst(skb)->tclassid)) { | 344 | if (unlikely(skb_dst(skb)->tclassid)) { |
345 | struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); | 345 | struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); |
346 | u32 idx = skb_dst(skb)->tclassid; | 346 | u32 idx = skb_dst(skb)->tclassid; |
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index babd1a2bae5f..f926a310075d 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig | |||
@@ -206,8 +206,9 @@ config IP_NF_TARGET_REDIRECT | |||
206 | 206 | ||
207 | config NF_NAT_SNMP_BASIC | 207 | config NF_NAT_SNMP_BASIC |
208 | tristate "Basic SNMP-ALG support" | 208 | tristate "Basic SNMP-ALG support" |
209 | depends on NF_NAT | 209 | depends on NF_CONNTRACK_SNMP && NF_NAT |
210 | depends on NETFILTER_ADVANCED | 210 | depends on NETFILTER_ADVANCED |
211 | default NF_NAT && NF_CONNTRACK_SNMP | ||
211 | ---help--- | 212 | ---help--- |
212 | 213 | ||
213 | This module implements an Application Layer Gateway (ALG) for | 214 | This module implements an Application Layer Gateway (ALG) for |
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index e855fffaed95..e95054c690c6 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -866,6 +866,7 @@ static int compat_table_info(const struct xt_table_info *info, | |||
866 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | 866 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); |
867 | newinfo->initial_entries = 0; | 867 | newinfo->initial_entries = 0; |
868 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; | 868 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; |
869 | xt_compat_init_offsets(NFPROTO_ARP, info->number); | ||
869 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { | 870 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { |
870 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); | 871 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); |
871 | if (ret != 0) | 872 | if (ret != 0) |
@@ -1333,6 +1334,7 @@ static int translate_compat_table(const char *name, | |||
1333 | duprintf("translate_compat_table: size %u\n", info->size); | 1334 | duprintf("translate_compat_table: size %u\n", info->size); |
1334 | j = 0; | 1335 | j = 0; |
1335 | xt_compat_lock(NFPROTO_ARP); | 1336 | xt_compat_lock(NFPROTO_ARP); |
1337 | xt_compat_init_offsets(NFPROTO_ARP, number); | ||
1336 | /* Walk through entries, checking offsets. */ | 1338 | /* Walk through entries, checking offsets. */ |
1337 | xt_entry_foreach(iter0, entry0, total_size) { | 1339 | xt_entry_foreach(iter0, entry0, total_size) { |
1338 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, | 1340 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 652efea013dc..ef7d7b9680ea 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -1063,6 +1063,7 @@ static int compat_table_info(const struct xt_table_info *info, | |||
1063 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | 1063 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); |
1064 | newinfo->initial_entries = 0; | 1064 | newinfo->initial_entries = 0; |
1065 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; | 1065 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; |
1066 | xt_compat_init_offsets(AF_INET, info->number); | ||
1066 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { | 1067 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { |
1067 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); | 1068 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); |
1068 | if (ret != 0) | 1069 | if (ret != 0) |
@@ -1664,6 +1665,7 @@ translate_compat_table(struct net *net, | |||
1664 | duprintf("translate_compat_table: size %u\n", info->size); | 1665 | duprintf("translate_compat_table: size %u\n", info->size); |
1665 | j = 0; | 1666 | j = 0; |
1666 | xt_compat_lock(AF_INET); | 1667 | xt_compat_lock(AF_INET); |
1668 | xt_compat_init_offsets(AF_INET, number); | ||
1667 | /* Walk through entries, checking offsets. */ | 1669 | /* Walk through entries, checking offsets. */ |
1668 | xt_entry_foreach(iter0, entry0, total_size) { | 1670 | xt_entry_foreach(iter0, entry0, total_size) { |
1669 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, | 1671 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, |
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 1e26a4897655..403ca57f6011 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c | |||
@@ -300,13 +300,8 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
300 | * that the ->target() function isn't called after ->destroy() */ | 300 | * that the ->target() function isn't called after ->destroy() */ |
301 | 301 | ||
302 | ct = nf_ct_get(skb, &ctinfo); | 302 | ct = nf_ct_get(skb, &ctinfo); |
303 | if (ct == NULL) { | 303 | if (ct == NULL) |
304 | pr_info("no conntrack!\n"); | ||
305 | /* FIXME: need to drop invalid ones, since replies | ||
306 | * to outgoing connections of other nodes will be | ||
307 | * marked as INVALID */ | ||
308 | return NF_DROP; | 304 | return NF_DROP; |
309 | } | ||
310 | 305 | ||
311 | /* special case: ICMP error handling. conntrack distinguishes between | 306 | /* special case: ICMP error handling. conntrack distinguishes between |
312 | * error messages (RELATED) and information requests (see below) */ | 307 | * error messages (RELATED) and information requests (see below) */ |
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 72ffc8fda2e9..d76d6c9ed946 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c | |||
@@ -442,8 +442,7 @@ ipt_log_packet(u_int8_t pf, | |||
442 | } | 442 | } |
443 | #endif | 443 | #endif |
444 | 444 | ||
445 | /* MAC logging for input path only. */ | 445 | if (in != NULL) |
446 | if (in && !out) | ||
447 | dump_mac_header(m, loginfo, skb); | 446 | dump_mac_header(m, loginfo, skb); |
448 | 447 | ||
449 | dump_packet(m, loginfo, skb, 0); | 448 | dump_packet(m, loginfo, skb, 0); |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 63f60fc5d26a..5585980fce2e 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <net/netfilter/nf_conntrack_l4proto.h> | 20 | #include <net/netfilter/nf_conntrack_l4proto.h> |
21 | #include <net/netfilter/nf_conntrack_expect.h> | 21 | #include <net/netfilter/nf_conntrack_expect.h> |
22 | #include <net/netfilter/nf_conntrack_acct.h> | 22 | #include <net/netfilter/nf_conntrack_acct.h> |
23 | #include <linux/rculist_nulls.h> | ||
23 | 24 | ||
24 | struct ct_iter_state { | 25 | struct ct_iter_state { |
25 | struct seq_net_private p; | 26 | struct seq_net_private p; |
@@ -35,7 +36,8 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) | |||
35 | for (st->bucket = 0; | 36 | for (st->bucket = 0; |
36 | st->bucket < net->ct.htable_size; | 37 | st->bucket < net->ct.htable_size; |
37 | st->bucket++) { | 38 | st->bucket++) { |
38 | n = rcu_dereference(net->ct.hash[st->bucket].first); | 39 | n = rcu_dereference( |
40 | hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); | ||
39 | if (!is_a_nulls(n)) | 41 | if (!is_a_nulls(n)) |
40 | return n; | 42 | return n; |
41 | } | 43 | } |
@@ -48,13 +50,14 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, | |||
48 | struct net *net = seq_file_net(seq); | 50 | struct net *net = seq_file_net(seq); |
49 | struct ct_iter_state *st = seq->private; | 51 | struct ct_iter_state *st = seq->private; |
50 | 52 | ||
51 | head = rcu_dereference(head->next); | 53 | head = rcu_dereference(hlist_nulls_next_rcu(head)); |
52 | while (is_a_nulls(head)) { | 54 | while (is_a_nulls(head)) { |
53 | if (likely(get_nulls_value(head) == st->bucket)) { | 55 | if (likely(get_nulls_value(head) == st->bucket)) { |
54 | if (++st->bucket >= net->ct.htable_size) | 56 | if (++st->bucket >= net->ct.htable_size) |
55 | return NULL; | 57 | return NULL; |
56 | } | 58 | } |
57 | head = rcu_dereference(net->ct.hash[st->bucket].first); | 59 | head = rcu_dereference( |
60 | hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); | ||
58 | } | 61 | } |
59 | return head; | 62 | return head; |
60 | } | 63 | } |
@@ -217,7 +220,8 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) | |||
217 | struct hlist_node *n; | 220 | struct hlist_node *n; |
218 | 221 | ||
219 | for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { | 222 | for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { |
220 | n = rcu_dereference(net->ct.expect_hash[st->bucket].first); | 223 | n = rcu_dereference( |
224 | hlist_first_rcu(&net->ct.expect_hash[st->bucket])); | ||
221 | if (n) | 225 | if (n) |
222 | return n; | 226 | return n; |
223 | } | 227 | } |
@@ -230,11 +234,12 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq, | |||
230 | struct net *net = seq_file_net(seq); | 234 | struct net *net = seq_file_net(seq); |
231 | struct ct_expect_iter_state *st = seq->private; | 235 | struct ct_expect_iter_state *st = seq->private; |
232 | 236 | ||
233 | head = rcu_dereference(head->next); | 237 | head = rcu_dereference(hlist_next_rcu(head)); |
234 | while (head == NULL) { | 238 | while (head == NULL) { |
235 | if (++st->bucket >= nf_ct_expect_hsize) | 239 | if (++st->bucket >= nf_ct_expect_hsize) |
236 | return NULL; | 240 | return NULL; |
237 | head = rcu_dereference(net->ct.expect_hash[st->bucket].first); | 241 | head = rcu_dereference( |
242 | hlist_first_rcu(&net->ct.expect_hash[st->bucket])); | ||
238 | } | 243 | } |
239 | return head; | 244 | return head; |
240 | } | 245 | } |
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c index 0f23b3f06df0..703f366fd235 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/ipv4/netfilter/nf_nat_amanda.c | |||
@@ -44,13 +44,13 @@ static unsigned int help(struct sk_buff *skb, | |||
44 | 44 | ||
45 | /* Try to get same port: if not, try to change it. */ | 45 | /* Try to get same port: if not, try to change it. */ |
46 | for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { | 46 | for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { |
47 | int ret; | 47 | int res; |
48 | 48 | ||
49 | exp->tuple.dst.u.tcp.port = htons(port); | 49 | exp->tuple.dst.u.tcp.port = htons(port); |
50 | ret = nf_ct_expect_related(exp); | 50 | res = nf_ct_expect_related(exp); |
51 | if (ret == 0) | 51 | if (res == 0) |
52 | break; | 52 | break; |
53 | else if (ret != -EBUSY) { | 53 | else if (res != -EBUSY) { |
54 | port = 0; | 54 | port = 0; |
55 | break; | 55 | break; |
56 | } | 56 | } |
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index c04787ce1a71..3002c0492fb0 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c | |||
@@ -323,9 +323,9 @@ nf_nat_setup_info(struct nf_conn *ct, | |||
323 | 323 | ||
324 | /* It's done. */ | 324 | /* It's done. */ |
325 | if (maniptype == IP_NAT_MANIP_DST) | 325 | if (maniptype == IP_NAT_MANIP_DST) |
326 | set_bit(IPS_DST_NAT_DONE_BIT, &ct->status); | 326 | ct->status |= IPS_DST_NAT_DONE; |
327 | else | 327 | else |
328 | set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); | 328 | ct->status |= IPS_SRC_NAT_DONE; |
329 | 329 | ||
330 | return NF_ACCEPT; | 330 | return NF_ACCEPT; |
331 | } | 331 | } |
@@ -502,7 +502,10 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto) | |||
502 | int ret = 0; | 502 | int ret = 0; |
503 | 503 | ||
504 | spin_lock_bh(&nf_nat_lock); | 504 | spin_lock_bh(&nf_nat_lock); |
505 | if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) { | 505 | if (rcu_dereference_protected( |
506 | nf_nat_protos[proto->protonum], | ||
507 | lockdep_is_held(&nf_nat_lock) | ||
508 | ) != &nf_nat_unknown_protocol) { | ||
506 | ret = -EBUSY; | 509 | ret = -EBUSY; |
507 | goto out; | 510 | goto out; |
508 | } | 511 | } |
@@ -679,8 +682,7 @@ static int __net_init nf_nat_net_init(struct net *net) | |||
679 | { | 682 | { |
680 | /* Leave them the same for the moment. */ | 683 | /* Leave them the same for the moment. */ |
681 | net->ipv4.nat_htable_size = net->ct.htable_size; | 684 | net->ipv4.nat_htable_size = net->ct.htable_size; |
682 | net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, | 685 | net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0); |
683 | &net->ipv4.nat_vmalloced, 0); | ||
684 | if (!net->ipv4.nat_bysource) | 686 | if (!net->ipv4.nat_bysource) |
685 | return -ENOMEM; | 687 | return -ENOMEM; |
686 | return 0; | 688 | return 0; |
@@ -702,8 +704,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) | |||
702 | { | 704 | { |
703 | nf_ct_iterate_cleanup(net, &clean_nat, NULL); | 705 | nf_ct_iterate_cleanup(net, &clean_nat, NULL); |
704 | synchronize_rcu(); | 706 | synchronize_rcu(); |
705 | nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, | 707 | nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size); |
706 | net->ipv4.nat_htable_size); | ||
707 | } | 708 | } |
708 | 709 | ||
709 | static struct pernet_operations nf_nat_net_ops = { | 710 | static struct pernet_operations nf_nat_net_ops = { |
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index ee5f419d0a56..8812a02078ab 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c | |||
@@ -54,6 +54,7 @@ | |||
54 | #include <net/netfilter/nf_conntrack_expect.h> | 54 | #include <net/netfilter/nf_conntrack_expect.h> |
55 | #include <net/netfilter/nf_conntrack_helper.h> | 55 | #include <net/netfilter/nf_conntrack_helper.h> |
56 | #include <net/netfilter/nf_nat_helper.h> | 56 | #include <net/netfilter/nf_nat_helper.h> |
57 | #include <linux/netfilter/nf_conntrack_snmp.h> | ||
57 | 58 | ||
58 | MODULE_LICENSE("GPL"); | 59 | MODULE_LICENSE("GPL"); |
59 | MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); | 60 | MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); |
@@ -1310,9 +1311,9 @@ static int __init nf_nat_snmp_basic_init(void) | |||
1310 | { | 1311 | { |
1311 | int ret = 0; | 1312 | int ret = 0; |
1312 | 1313 | ||
1313 | ret = nf_conntrack_helper_register(&snmp_helper); | 1314 | BUG_ON(nf_nat_snmp_hook != NULL); |
1314 | if (ret < 0) | 1315 | rcu_assign_pointer(nf_nat_snmp_hook, help); |
1315 | return ret; | 1316 | |
1316 | ret = nf_conntrack_helper_register(&snmp_trap_helper); | 1317 | ret = nf_conntrack_helper_register(&snmp_trap_helper); |
1317 | if (ret < 0) { | 1318 | if (ret < 0) { |
1318 | nf_conntrack_helper_unregister(&snmp_helper); | 1319 | nf_conntrack_helper_unregister(&snmp_helper); |
@@ -1323,7 +1324,7 @@ static int __init nf_nat_snmp_basic_init(void) | |||
1323 | 1324 | ||
1324 | static void __exit nf_nat_snmp_basic_fini(void) | 1325 | static void __exit nf_nat_snmp_basic_fini(void) |
1325 | { | 1326 | { |
1326 | nf_conntrack_helper_unregister(&snmp_helper); | 1327 | rcu_assign_pointer(nf_nat_snmp_hook, NULL); |
1327 | nf_conntrack_helper_unregister(&snmp_trap_helper); | 1328 | nf_conntrack_helper_unregister(&snmp_trap_helper); |
1328 | } | 1329 | } |
1329 | 1330 | ||
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 351dc4e85242..3e5b7cc2db4f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -514,7 +514,7 @@ static const struct file_operations rt_cpu_seq_fops = { | |||
514 | .release = seq_release, | 514 | .release = seq_release, |
515 | }; | 515 | }; |
516 | 516 | ||
517 | #ifdef CONFIG_NET_CLS_ROUTE | 517 | #ifdef CONFIG_IP_ROUTE_CLASSID |
518 | static int rt_acct_proc_show(struct seq_file *m, void *v) | 518 | static int rt_acct_proc_show(struct seq_file *m, void *v) |
519 | { | 519 | { |
520 | struct ip_rt_acct *dst, *src; | 520 | struct ip_rt_acct *dst, *src; |
@@ -567,14 +567,14 @@ static int __net_init ip_rt_do_proc_init(struct net *net) | |||
567 | if (!pde) | 567 | if (!pde) |
568 | goto err2; | 568 | goto err2; |
569 | 569 | ||
570 | #ifdef CONFIG_NET_CLS_ROUTE | 570 | #ifdef CONFIG_IP_ROUTE_CLASSID |
571 | pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); | 571 | pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); |
572 | if (!pde) | 572 | if (!pde) |
573 | goto err3; | 573 | goto err3; |
574 | #endif | 574 | #endif |
575 | return 0; | 575 | return 0; |
576 | 576 | ||
577 | #ifdef CONFIG_NET_CLS_ROUTE | 577 | #ifdef CONFIG_IP_ROUTE_CLASSID |
578 | err3: | 578 | err3: |
579 | remove_proc_entry("rt_cache", net->proc_net_stat); | 579 | remove_proc_entry("rt_cache", net->proc_net_stat); |
580 | #endif | 580 | #endif |
@@ -588,7 +588,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net) | |||
588 | { | 588 | { |
589 | remove_proc_entry("rt_cache", net->proc_net_stat); | 589 | remove_proc_entry("rt_cache", net->proc_net_stat); |
590 | remove_proc_entry("rt_cache", net->proc_net); | 590 | remove_proc_entry("rt_cache", net->proc_net); |
591 | #ifdef CONFIG_NET_CLS_ROUTE | 591 | #ifdef CONFIG_IP_ROUTE_CLASSID |
592 | remove_proc_entry("rt_acct", net->proc_net); | 592 | remove_proc_entry("rt_acct", net->proc_net); |
593 | #endif | 593 | #endif |
594 | } | 594 | } |
@@ -1775,7 +1775,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) | |||
1775 | memcpy(addr, &src, 4); | 1775 | memcpy(addr, &src, 4); |
1776 | } | 1776 | } |
1777 | 1777 | ||
1778 | #ifdef CONFIG_NET_CLS_ROUTE | 1778 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1779 | static void set_class_tag(struct rtable *rt, u32 tag) | 1779 | static void set_class_tag(struct rtable *rt, u32 tag) |
1780 | { | 1780 | { |
1781 | if (!(rt->dst.tclassid & 0xFFFF)) | 1781 | if (!(rt->dst.tclassid & 0xFFFF)) |
@@ -1825,7 +1825,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) | |||
1825 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) | 1825 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) |
1826 | rt->rt_gateway = FIB_RES_GW(*res); | 1826 | rt->rt_gateway = FIB_RES_GW(*res); |
1827 | dst_import_metrics(dst, fi->fib_metrics); | 1827 | dst_import_metrics(dst, fi->fib_metrics); |
1828 | #ifdef CONFIG_NET_CLS_ROUTE | 1828 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1829 | dst->tclassid = FIB_RES_NH(*res).nh_tclassid; | 1829 | dst->tclassid = FIB_RES_NH(*res).nh_tclassid; |
1830 | #endif | 1830 | #endif |
1831 | } | 1831 | } |
@@ -1835,7 +1835,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) | |||
1835 | if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) | 1835 | if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) |
1836 | dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); | 1836 | dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); |
1837 | 1837 | ||
1838 | #ifdef CONFIG_NET_CLS_ROUTE | 1838 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1839 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 1839 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
1840 | set_class_tag(rt, fib_rules_tclass(res)); | 1840 | set_class_tag(rt, fib_rules_tclass(res)); |
1841 | #endif | 1841 | #endif |
@@ -1891,7 +1891,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1891 | rth->fl.mark = skb->mark; | 1891 | rth->fl.mark = skb->mark; |
1892 | rth->fl.fl4_src = saddr; | 1892 | rth->fl.fl4_src = saddr; |
1893 | rth->rt_src = saddr; | 1893 | rth->rt_src = saddr; |
1894 | #ifdef CONFIG_NET_CLS_ROUTE | 1894 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1895 | rth->dst.tclassid = itag; | 1895 | rth->dst.tclassid = itag; |
1896 | #endif | 1896 | #endif |
1897 | rth->rt_iif = | 1897 | rth->rt_iif = |
@@ -2208,7 +2208,7 @@ local_input: | |||
2208 | rth->fl.mark = skb->mark; | 2208 | rth->fl.mark = skb->mark; |
2209 | rth->fl.fl4_src = saddr; | 2209 | rth->fl.fl4_src = saddr; |
2210 | rth->rt_src = saddr; | 2210 | rth->rt_src = saddr; |
2211 | #ifdef CONFIG_NET_CLS_ROUTE | 2211 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2212 | rth->dst.tclassid = itag; | 2212 | rth->dst.tclassid = itag; |
2213 | #endif | 2213 | #endif |
2214 | rth->rt_iif = | 2214 | rth->rt_iif = |
@@ -2828,7 +2828,7 @@ static int rt_fill_info(struct net *net, | |||
2828 | } | 2828 | } |
2829 | if (rt->dst.dev) | 2829 | if (rt->dst.dev) |
2830 | NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); | 2830 | NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); |
2831 | #ifdef CONFIG_NET_CLS_ROUTE | 2831 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2832 | if (rt->dst.tclassid) | 2832 | if (rt->dst.tclassid) |
2833 | NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); | 2833 | NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); |
2834 | #endif | 2834 | #endif |
@@ -3249,9 +3249,9 @@ static __net_initdata struct pernet_operations rt_genid_ops = { | |||
3249 | }; | 3249 | }; |
3250 | 3250 | ||
3251 | 3251 | ||
3252 | #ifdef CONFIG_NET_CLS_ROUTE | 3252 | #ifdef CONFIG_IP_ROUTE_CLASSID |
3253 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; | 3253 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; |
3254 | #endif /* CONFIG_NET_CLS_ROUTE */ | 3254 | #endif /* CONFIG_IP_ROUTE_CLASSID */ |
3255 | 3255 | ||
3256 | static __initdata unsigned long rhash_entries; | 3256 | static __initdata unsigned long rhash_entries; |
3257 | static int __init set_rhash_entries(char *str) | 3257 | static int __init set_rhash_entries(char *str) |
@@ -3267,7 +3267,7 @@ int __init ip_rt_init(void) | |||
3267 | { | 3267 | { |
3268 | int rc = 0; | 3268 | int rc = 0; |
3269 | 3269 | ||
3270 | #ifdef CONFIG_NET_CLS_ROUTE | 3270 | #ifdef CONFIG_IP_ROUTE_CLASSID |
3271 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); | 3271 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); |
3272 | if (!ip_rt_acct) | 3272 | if (!ip_rt_acct) |
3273 | panic("IP: failed to allocate ip_rt_acct\n"); | 3273 | panic("IP: failed to allocate ip_rt_acct\n"); |
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 7d227c644f72..47b7b8df7fac 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c | |||
@@ -1076,6 +1076,7 @@ static int compat_table_info(const struct xt_table_info *info, | |||
1076 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | 1076 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); |
1077 | newinfo->initial_entries = 0; | 1077 | newinfo->initial_entries = 0; |
1078 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; | 1078 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; |
1079 | xt_compat_init_offsets(AF_INET6, info->number); | ||
1079 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { | 1080 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { |
1080 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); | 1081 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); |
1081 | if (ret != 0) | 1082 | if (ret != 0) |
@@ -1679,6 +1680,7 @@ translate_compat_table(struct net *net, | |||
1679 | duprintf("translate_compat_table: size %u\n", info->size); | 1680 | duprintf("translate_compat_table: size %u\n", info->size); |
1680 | j = 0; | 1681 | j = 0; |
1681 | xt_compat_lock(AF_INET6); | 1682 | xt_compat_lock(AF_INET6); |
1683 | xt_compat_init_offsets(AF_INET6, number); | ||
1682 | /* Walk through entries, checking offsets. */ | 1684 | /* Walk through entries, checking offsets. */ |
1683 | xt_entry_foreach(iter0, entry0, total_size) { | 1685 | xt_entry_foreach(iter0, entry0, total_size) { |
1684 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, | 1686 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, |
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 09c88891a753..05027b753721 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c | |||
@@ -452,8 +452,7 @@ ip6t_log_packet(u_int8_t pf, | |||
452 | in ? in->name : "", | 452 | in ? in->name : "", |
453 | out ? out->name : ""); | 453 | out ? out->name : ""); |
454 | 454 | ||
455 | /* MAC logging for input path only. */ | 455 | if (in != NULL) |
456 | if (in && !out) | ||
457 | dump_mac_header(m, loginfo, skb); | 456 | dump_mac_header(m, loginfo, skb); |
458 | 457 | ||
459 | dump_packet(m, loginfo, skb, skb_network_offset(skb), 1); | 458 | dump_packet(m, loginfo, skb, skb_network_offset(skb), 1); |
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 79d43aa8fa8d..66e003e1fcd5 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c | |||
@@ -73,7 +73,7 @@ static struct inet_frags nf_frags; | |||
73 | static struct netns_frags nf_init_frags; | 73 | static struct netns_frags nf_init_frags; |
74 | 74 | ||
75 | #ifdef CONFIG_SYSCTL | 75 | #ifdef CONFIG_SYSCTL |
76 | struct ctl_table nf_ct_frag6_sysctl_table[] = { | 76 | static struct ctl_table nf_ct_frag6_sysctl_table[] = { |
77 | { | 77 | { |
78 | .procname = "nf_conntrack_frag6_timeout", | 78 | .procname = "nf_conntrack_frag6_timeout", |
79 | .data = &nf_init_frags.timeout, | 79 | .data = &nf_init_frags.timeout, |
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 1534f2b44caf..faf7412ea453 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig | |||
@@ -85,6 +85,17 @@ config NF_CONNTRACK_EVENTS | |||
85 | 85 | ||
86 | If unsure, say `N'. | 86 | If unsure, say `N'. |
87 | 87 | ||
88 | config NF_CONNTRACK_TIMESTAMP | ||
89 | bool 'Connection tracking timestamping' | ||
90 | depends on NETFILTER_ADVANCED | ||
91 | help | ||
92 | This option enables support for connection tracking timestamping. | ||
93 | This allows you to store the flow start-time and to obtain | ||
94 | the flow-stop time (once it has been destroyed) via Connection | ||
95 | tracking events. | ||
96 | |||
97 | If unsure, say `N'. | ||
98 | |||
88 | config NF_CT_PROTO_DCCP | 99 | config NF_CT_PROTO_DCCP |
89 | tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)' | 100 | tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)' |
90 | depends on EXPERIMENTAL | 101 | depends on EXPERIMENTAL |
@@ -185,9 +196,13 @@ config NF_CONNTRACK_IRC | |||
185 | 196 | ||
186 | To compile it as a module, choose M here. If unsure, say N. | 197 | To compile it as a module, choose M here. If unsure, say N. |
187 | 198 | ||
199 | config NF_CONNTRACK_BROADCAST | ||
200 | tristate | ||
201 | |||
188 | config NF_CONNTRACK_NETBIOS_NS | 202 | config NF_CONNTRACK_NETBIOS_NS |
189 | tristate "NetBIOS name service protocol support" | 203 | tristate "NetBIOS name service protocol support" |
190 | depends on NETFILTER_ADVANCED | 204 | depends on NETFILTER_ADVANCED |
205 | select NF_CONNTRACK_BROADCAST | ||
191 | help | 206 | help |
192 | NetBIOS name service requests are sent as broadcast messages from an | 207 | NetBIOS name service requests are sent as broadcast messages from an |
193 | unprivileged port and responded to with unicast messages to the | 208 | unprivileged port and responded to with unicast messages to the |
@@ -204,6 +219,21 @@ config NF_CONNTRACK_NETBIOS_NS | |||
204 | 219 | ||
205 | To compile it as a module, choose M here. If unsure, say N. | 220 | To compile it as a module, choose M here. If unsure, say N. |
206 | 221 | ||
222 | config NF_CONNTRACK_SNMP | ||
223 | tristate "SNMP service protocol support" | ||
224 | depends on NETFILTER_ADVANCED | ||
225 | select NF_CONNTRACK_BROADCAST | ||
226 | help | ||
227 | SNMP service requests are sent as broadcast messages from an | ||
228 | unprivileged port and responded to with unicast messages to the | ||
229 | same port. This make them hard to firewall properly because connection | ||
230 | tracking doesn't deal with broadcasts. This helper tracks locally | ||
231 | originating SNMP service requests and the corresponding | ||
232 | responses. It relies on correct IP address configuration, specifically | ||
233 | netmask and broadcast address. | ||
234 | |||
235 | To compile it as a module, choose M here. If unsure, say N. | ||
236 | |||
207 | config NF_CONNTRACK_PPTP | 237 | config NF_CONNTRACK_PPTP |
208 | tristate "PPtP protocol support" | 238 | tristate "PPtP protocol support" |
209 | depends on NETFILTER_ADVANCED | 239 | depends on NETFILTER_ADVANCED |
@@ -326,6 +356,16 @@ config NETFILTER_XT_CONNMARK | |||
326 | 356 | ||
327 | comment "Xtables targets" | 357 | comment "Xtables targets" |
328 | 358 | ||
359 | config NETFILTER_XT_TARGET_AUDIT | ||
360 | tristate "AUDIT target support" | ||
361 | depends on AUDIT | ||
362 | depends on NETFILTER_ADVANCED | ||
363 | ---help--- | ||
364 | This option adds a 'AUDIT' target, which can be used to create | ||
365 | audit records for packets dropped/accepted. | ||
366 | |||
367 | To compileit as a module, choose M here. If unsure, say N. | ||
368 | |||
329 | config NETFILTER_XT_TARGET_CHECKSUM | 369 | config NETFILTER_XT_TARGET_CHECKSUM |
330 | tristate "CHECKSUM target support" | 370 | tristate "CHECKSUM target support" |
331 | depends on IP_NF_MANGLE || IP6_NF_MANGLE | 371 | depends on IP_NF_MANGLE || IP6_NF_MANGLE |
@@ -477,6 +517,7 @@ config NETFILTER_XT_TARGET_NFLOG | |||
477 | config NETFILTER_XT_TARGET_NFQUEUE | 517 | config NETFILTER_XT_TARGET_NFQUEUE |
478 | tristate '"NFQUEUE" target Support' | 518 | tristate '"NFQUEUE" target Support' |
479 | depends on NETFILTER_ADVANCED | 519 | depends on NETFILTER_ADVANCED |
520 | select NETFILTER_NETLINK_QUEUE | ||
480 | help | 521 | help |
481 | This target replaced the old obsolete QUEUE target. | 522 | This target replaced the old obsolete QUEUE target. |
482 | 523 | ||
@@ -886,7 +927,7 @@ config NETFILTER_XT_MATCH_RATEEST | |||
886 | config NETFILTER_XT_MATCH_REALM | 927 | config NETFILTER_XT_MATCH_REALM |
887 | tristate '"realm" match support' | 928 | tristate '"realm" match support' |
888 | depends on NETFILTER_ADVANCED | 929 | depends on NETFILTER_ADVANCED |
889 | select NET_CLS_ROUTE | 930 | select IP_ROUTE_CLASSID |
890 | help | 931 | help |
891 | This option adds a `realm' match, which allows you to use the realm | 932 | This option adds a `realm' match, which allows you to use the realm |
892 | key from the routing subsystem inside iptables. | 933 | key from the routing subsystem inside iptables. |
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 441050f31111..9ae6878a85b1 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile | |||
@@ -1,6 +1,7 @@ | |||
1 | netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o | 1 | netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o |
2 | 2 | ||
3 | nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o | 3 | nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o |
4 | nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o | ||
4 | nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o | 5 | nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o |
5 | 6 | ||
6 | obj-$(CONFIG_NETFILTER) = netfilter.o | 7 | obj-$(CONFIG_NETFILTER) = netfilter.o |
@@ -28,7 +29,9 @@ obj-$(CONFIG_NF_CONNTRACK_AMANDA) += nf_conntrack_amanda.o | |||
28 | obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o | 29 | obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o |
29 | obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o | 30 | obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o |
30 | obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o | 31 | obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o |
32 | obj-$(CONFIG_NF_CONNTRACK_BROADCAST) += nf_conntrack_broadcast.o | ||
31 | obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o | 33 | obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o |
34 | obj-$(CONFIG_NF_CONNTRACK_SNMP) += nf_conntrack_snmp.o | ||
32 | obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o | 35 | obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o |
33 | obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o | 36 | obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o |
34 | obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o | 37 | obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o |
@@ -45,6 +48,7 @@ obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o | |||
45 | obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o | 48 | obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o |
46 | 49 | ||
47 | # targets | 50 | # targets |
51 | obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o | ||
48 | obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o | 52 | obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o |
49 | obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o | 53 | obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o |
50 | obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o | 54 | obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o |
diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 32fcbe290c04..1e00bf7d27c5 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c | |||
@@ -175,13 +175,21 @@ next_hook: | |||
175 | ret = 1; | 175 | ret = 1; |
176 | } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { | 176 | } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { |
177 | kfree_skb(skb); | 177 | kfree_skb(skb); |
178 | ret = -(verdict >> NF_VERDICT_BITS); | 178 | ret = NF_DROP_GETERR(verdict); |
179 | if (ret == 0) | 179 | if (ret == 0) |
180 | ret = -EPERM; | 180 | ret = -EPERM; |
181 | } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { | 181 | } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { |
182 | if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn, | 182 | ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn, |
183 | verdict >> NF_VERDICT_BITS)) | 183 | verdict >> NF_VERDICT_QBITS); |
184 | goto next_hook; | 184 | if (ret < 0) { |
185 | if (ret == -ECANCELED) | ||
186 | goto next_hook; | ||
187 | if (ret == -ESRCH && | ||
188 | (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) | ||
189 | goto next_hook; | ||
190 | kfree_skb(skb); | ||
191 | } | ||
192 | ret = 0; | ||
185 | } | 193 | } |
186 | rcu_read_unlock(); | 194 | rcu_read_unlock(); |
187 | return ret; | 195 | return ret; |
@@ -214,7 +222,7 @@ EXPORT_SYMBOL(skb_make_writable); | |||
214 | /* This does not belong here, but locally generated errors need it if connection | 222 | /* This does not belong here, but locally generated errors need it if connection |
215 | tracking in use: without this, connection may not be in hash table, and hence | 223 | tracking in use: without this, connection may not be in hash table, and hence |
216 | manufactured ICMP or RST packets will not be associated with it. */ | 224 | manufactured ICMP or RST packets will not be associated with it. */ |
217 | void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *); | 225 | void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly; |
218 | EXPORT_SYMBOL(ip_ct_attach); | 226 | EXPORT_SYMBOL(ip_ct_attach); |
219 | 227 | ||
220 | void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) | 228 | void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) |
@@ -231,7 +239,7 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) | |||
231 | } | 239 | } |
232 | EXPORT_SYMBOL(nf_ct_attach); | 240 | EXPORT_SYMBOL(nf_ct_attach); |
233 | 241 | ||
234 | void (*nf_ct_destroy)(struct nf_conntrack *); | 242 | void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly; |
235 | EXPORT_SYMBOL(nf_ct_destroy); | 243 | EXPORT_SYMBOL(nf_ct_destroy); |
236 | 244 | ||
237 | void nf_conntrack_destroy(struct nf_conntrack *nfct) | 245 | void nf_conntrack_destroy(struct nf_conntrack *nfct) |
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index a475edee0912..5c48ffb60c28 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c | |||
@@ -43,11 +43,6 @@ EXPORT_SYMBOL(register_ip_vs_app); | |||
43 | EXPORT_SYMBOL(unregister_ip_vs_app); | 43 | EXPORT_SYMBOL(unregister_ip_vs_app); |
44 | EXPORT_SYMBOL(register_ip_vs_app_inc); | 44 | EXPORT_SYMBOL(register_ip_vs_app_inc); |
45 | 45 | ||
46 | /* ipvs application list head */ | ||
47 | static LIST_HEAD(ip_vs_app_list); | ||
48 | static DEFINE_MUTEX(__ip_vs_app_mutex); | ||
49 | |||
50 | |||
51 | /* | 46 | /* |
52 | * Get an ip_vs_app object | 47 | * Get an ip_vs_app object |
53 | */ | 48 | */ |
@@ -67,7 +62,8 @@ static inline void ip_vs_app_put(struct ip_vs_app *app) | |||
67 | * Allocate/initialize app incarnation and register it in proto apps. | 62 | * Allocate/initialize app incarnation and register it in proto apps. |
68 | */ | 63 | */ |
69 | static int | 64 | static int |
70 | ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) | 65 | ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto, |
66 | __u16 port) | ||
71 | { | 67 | { |
72 | struct ip_vs_protocol *pp; | 68 | struct ip_vs_protocol *pp; |
73 | struct ip_vs_app *inc; | 69 | struct ip_vs_app *inc; |
@@ -98,7 +94,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) | |||
98 | } | 94 | } |
99 | } | 95 | } |
100 | 96 | ||
101 | ret = pp->register_app(inc); | 97 | ret = pp->register_app(net, inc); |
102 | if (ret) | 98 | if (ret) |
103 | goto out; | 99 | goto out; |
104 | 100 | ||
@@ -119,7 +115,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) | |||
119 | * Release app incarnation | 115 | * Release app incarnation |
120 | */ | 116 | */ |
121 | static void | 117 | static void |
122 | ip_vs_app_inc_release(struct ip_vs_app *inc) | 118 | ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc) |
123 | { | 119 | { |
124 | struct ip_vs_protocol *pp; | 120 | struct ip_vs_protocol *pp; |
125 | 121 | ||
@@ -127,7 +123,7 @@ ip_vs_app_inc_release(struct ip_vs_app *inc) | |||
127 | return; | 123 | return; |
128 | 124 | ||
129 | if (pp->unregister_app) | 125 | if (pp->unregister_app) |
130 | pp->unregister_app(inc); | 126 | pp->unregister_app(net, inc); |
131 | 127 | ||
132 | IP_VS_DBG(9, "%s App %s:%u unregistered\n", | 128 | IP_VS_DBG(9, "%s App %s:%u unregistered\n", |
133 | pp->name, inc->name, ntohs(inc->port)); | 129 | pp->name, inc->name, ntohs(inc->port)); |
@@ -168,15 +164,17 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc) | |||
168 | * Register an application incarnation in protocol applications | 164 | * Register an application incarnation in protocol applications |
169 | */ | 165 | */ |
170 | int | 166 | int |
171 | register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port) | 167 | register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto, |
168 | __u16 port) | ||
172 | { | 169 | { |
170 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
173 | int result; | 171 | int result; |
174 | 172 | ||
175 | mutex_lock(&__ip_vs_app_mutex); | 173 | mutex_lock(&ipvs->app_mutex); |
176 | 174 | ||
177 | result = ip_vs_app_inc_new(app, proto, port); | 175 | result = ip_vs_app_inc_new(net, app, proto, port); |
178 | 176 | ||
179 | mutex_unlock(&__ip_vs_app_mutex); | 177 | mutex_unlock(&ipvs->app_mutex); |
180 | 178 | ||
181 | return result; | 179 | return result; |
182 | } | 180 | } |
@@ -185,16 +183,17 @@ register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port) | |||
185 | /* | 183 | /* |
186 | * ip_vs_app registration routine | 184 | * ip_vs_app registration routine |
187 | */ | 185 | */ |
188 | int register_ip_vs_app(struct ip_vs_app *app) | 186 | int register_ip_vs_app(struct net *net, struct ip_vs_app *app) |
189 | { | 187 | { |
188 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
190 | /* increase the module use count */ | 189 | /* increase the module use count */ |
191 | ip_vs_use_count_inc(); | 190 | ip_vs_use_count_inc(); |
192 | 191 | ||
193 | mutex_lock(&__ip_vs_app_mutex); | 192 | mutex_lock(&ipvs->app_mutex); |
194 | 193 | ||
195 | list_add(&app->a_list, &ip_vs_app_list); | 194 | list_add(&app->a_list, &ipvs->app_list); |
196 | 195 | ||
197 | mutex_unlock(&__ip_vs_app_mutex); | 196 | mutex_unlock(&ipvs->app_mutex); |
198 | 197 | ||
199 | return 0; | 198 | return 0; |
200 | } | 199 | } |
@@ -204,19 +203,20 @@ int register_ip_vs_app(struct ip_vs_app *app) | |||
204 | * ip_vs_app unregistration routine | 203 | * ip_vs_app unregistration routine |
205 | * We are sure there are no app incarnations attached to services | 204 | * We are sure there are no app incarnations attached to services |
206 | */ | 205 | */ |
207 | void unregister_ip_vs_app(struct ip_vs_app *app) | 206 | void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) |
208 | { | 207 | { |
208 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
209 | struct ip_vs_app *inc, *nxt; | 209 | struct ip_vs_app *inc, *nxt; |
210 | 210 | ||
211 | mutex_lock(&__ip_vs_app_mutex); | 211 | mutex_lock(&ipvs->app_mutex); |
212 | 212 | ||
213 | list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) { | 213 | list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) { |
214 | ip_vs_app_inc_release(inc); | 214 | ip_vs_app_inc_release(net, inc); |
215 | } | 215 | } |
216 | 216 | ||
217 | list_del(&app->a_list); | 217 | list_del(&app->a_list); |
218 | 218 | ||
219 | mutex_unlock(&__ip_vs_app_mutex); | 219 | mutex_unlock(&ipvs->app_mutex); |
220 | 220 | ||
221 | /* decrease the module use count */ | 221 | /* decrease the module use count */ |
222 | ip_vs_use_count_dec(); | 222 | ip_vs_use_count_dec(); |
@@ -226,7 +226,8 @@ void unregister_ip_vs_app(struct ip_vs_app *app) | |||
226 | /* | 226 | /* |
227 | * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) | 227 | * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) |
228 | */ | 228 | */ |
229 | int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp) | 229 | int ip_vs_bind_app(struct ip_vs_conn *cp, |
230 | struct ip_vs_protocol *pp) | ||
230 | { | 231 | { |
231 | return pp->app_conn_bind(cp); | 232 | return pp->app_conn_bind(cp); |
232 | } | 233 | } |
@@ -481,11 +482,11 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb) | |||
481 | * /proc/net/ip_vs_app entry function | 482 | * /proc/net/ip_vs_app entry function |
482 | */ | 483 | */ |
483 | 484 | ||
484 | static struct ip_vs_app *ip_vs_app_idx(loff_t pos) | 485 | static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos) |
485 | { | 486 | { |
486 | struct ip_vs_app *app, *inc; | 487 | struct ip_vs_app *app, *inc; |
487 | 488 | ||
488 | list_for_each_entry(app, &ip_vs_app_list, a_list) { | 489 | list_for_each_entry(app, &ipvs->app_list, a_list) { |
489 | list_for_each_entry(inc, &app->incs_list, a_list) { | 490 | list_for_each_entry(inc, &app->incs_list, a_list) { |
490 | if (pos-- == 0) | 491 | if (pos-- == 0) |
491 | return inc; | 492 | return inc; |
@@ -497,19 +498,24 @@ static struct ip_vs_app *ip_vs_app_idx(loff_t pos) | |||
497 | 498 | ||
498 | static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) | 499 | static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) |
499 | { | 500 | { |
500 | mutex_lock(&__ip_vs_app_mutex); | 501 | struct net *net = seq_file_net(seq); |
502 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
501 | 503 | ||
502 | return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN; | 504 | mutex_lock(&ipvs->app_mutex); |
505 | |||
506 | return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN; | ||
503 | } | 507 | } |
504 | 508 | ||
505 | static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 509 | static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
506 | { | 510 | { |
507 | struct ip_vs_app *inc, *app; | 511 | struct ip_vs_app *inc, *app; |
508 | struct list_head *e; | 512 | struct list_head *e; |
513 | struct net *net = seq_file_net(seq); | ||
514 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
509 | 515 | ||
510 | ++*pos; | 516 | ++*pos; |
511 | if (v == SEQ_START_TOKEN) | 517 | if (v == SEQ_START_TOKEN) |
512 | return ip_vs_app_idx(0); | 518 | return ip_vs_app_idx(ipvs, 0); |
513 | 519 | ||
514 | inc = v; | 520 | inc = v; |
515 | app = inc->app; | 521 | app = inc->app; |
@@ -518,7 +524,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
518 | return list_entry(e, struct ip_vs_app, a_list); | 524 | return list_entry(e, struct ip_vs_app, a_list); |
519 | 525 | ||
520 | /* go on to next application */ | 526 | /* go on to next application */ |
521 | for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) { | 527 | for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) { |
522 | app = list_entry(e, struct ip_vs_app, a_list); | 528 | app = list_entry(e, struct ip_vs_app, a_list); |
523 | list_for_each_entry(inc, &app->incs_list, a_list) { | 529 | list_for_each_entry(inc, &app->incs_list, a_list) { |
524 | return inc; | 530 | return inc; |
@@ -529,7 +535,9 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
529 | 535 | ||
530 | static void ip_vs_app_seq_stop(struct seq_file *seq, void *v) | 536 | static void ip_vs_app_seq_stop(struct seq_file *seq, void *v) |
531 | { | 537 | { |
532 | mutex_unlock(&__ip_vs_app_mutex); | 538 | struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq)); |
539 | |||
540 | mutex_unlock(&ipvs->app_mutex); | ||
533 | } | 541 | } |
534 | 542 | ||
535 | static int ip_vs_app_seq_show(struct seq_file *seq, void *v) | 543 | static int ip_vs_app_seq_show(struct seq_file *seq, void *v) |
@@ -557,7 +565,8 @@ static const struct seq_operations ip_vs_app_seq_ops = { | |||
557 | 565 | ||
558 | static int ip_vs_app_open(struct inode *inode, struct file *file) | 566 | static int ip_vs_app_open(struct inode *inode, struct file *file) |
559 | { | 567 | { |
560 | return seq_open(file, &ip_vs_app_seq_ops); | 568 | return seq_open_net(inode, file, &ip_vs_app_seq_ops, |
569 | sizeof(struct seq_net_private)); | ||
561 | } | 570 | } |
562 | 571 | ||
563 | static const struct file_operations ip_vs_app_fops = { | 572 | static const struct file_operations ip_vs_app_fops = { |
@@ -569,15 +578,36 @@ static const struct file_operations ip_vs_app_fops = { | |||
569 | }; | 578 | }; |
570 | #endif | 579 | #endif |
571 | 580 | ||
572 | int __init ip_vs_app_init(void) | 581 | static int __net_init __ip_vs_app_init(struct net *net) |
573 | { | 582 | { |
574 | /* we will replace it with proc_net_ipvs_create() soon */ | 583 | struct netns_ipvs *ipvs = net_ipvs(net); |
575 | proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops); | 584 | |
585 | INIT_LIST_HEAD(&ipvs->app_list); | ||
586 | __mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key); | ||
587 | proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops); | ||
576 | return 0; | 588 | return 0; |
577 | } | 589 | } |
578 | 590 | ||
591 | static void __net_exit __ip_vs_app_cleanup(struct net *net) | ||
592 | { | ||
593 | proc_net_remove(net, "ip_vs_app"); | ||
594 | } | ||
595 | |||
596 | static struct pernet_operations ip_vs_app_ops = { | ||
597 | .init = __ip_vs_app_init, | ||
598 | .exit = __ip_vs_app_cleanup, | ||
599 | }; | ||
600 | |||
601 | int __init ip_vs_app_init(void) | ||
602 | { | ||
603 | int rv; | ||
604 | |||
605 | rv = register_pernet_subsys(&ip_vs_app_ops); | ||
606 | return rv; | ||
607 | } | ||
608 | |||
579 | 609 | ||
580 | void ip_vs_app_cleanup(void) | 610 | void ip_vs_app_cleanup(void) |
581 | { | 611 | { |
582 | proc_net_remove(&init_net, "ip_vs_app"); | 612 | unregister_pernet_subsys(&ip_vs_app_ops); |
583 | } | 613 | } |
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index e9adecdc8ca4..83233fe24a08 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c | |||
@@ -48,35 +48,32 @@ | |||
48 | /* | 48 | /* |
49 | * Connection hash size. Default is what was selected at compile time. | 49 | * Connection hash size. Default is what was selected at compile time. |
50 | */ | 50 | */ |
51 | int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS; | 51 | static int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS; |
52 | module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444); | 52 | module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444); |
53 | MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size"); | 53 | MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size"); |
54 | 54 | ||
55 | /* size and mask values */ | 55 | /* size and mask values */ |
56 | int ip_vs_conn_tab_size; | 56 | int ip_vs_conn_tab_size __read_mostly; |
57 | int ip_vs_conn_tab_mask; | 57 | static int ip_vs_conn_tab_mask __read_mostly; |
58 | 58 | ||
59 | /* | 59 | /* |
60 | * Connection hash table: for input and output packets lookups of IPVS | 60 | * Connection hash table: for input and output packets lookups of IPVS |
61 | */ | 61 | */ |
62 | static struct list_head *ip_vs_conn_tab; | 62 | static struct list_head *ip_vs_conn_tab __read_mostly; |
63 | 63 | ||
64 | /* SLAB cache for IPVS connections */ | 64 | /* SLAB cache for IPVS connections */ |
65 | static struct kmem_cache *ip_vs_conn_cachep __read_mostly; | 65 | static struct kmem_cache *ip_vs_conn_cachep __read_mostly; |
66 | 66 | ||
67 | /* counter for current IPVS connections */ | ||
68 | static atomic_t ip_vs_conn_count = ATOMIC_INIT(0); | ||
69 | |||
70 | /* counter for no client port connections */ | 67 | /* counter for no client port connections */ |
71 | static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0); | 68 | static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0); |
72 | 69 | ||
73 | /* random value for IPVS connection hash */ | 70 | /* random value for IPVS connection hash */ |
74 | static unsigned int ip_vs_conn_rnd; | 71 | static unsigned int ip_vs_conn_rnd __read_mostly; |
75 | 72 | ||
76 | /* | 73 | /* |
77 | * Fine locking granularity for big connection hash table | 74 | * Fine locking granularity for big connection hash table |
78 | */ | 75 | */ |
79 | #define CT_LOCKARRAY_BITS 4 | 76 | #define CT_LOCKARRAY_BITS 5 |
80 | #define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS) | 77 | #define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS) |
81 | #define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1) | 78 | #define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1) |
82 | 79 | ||
@@ -133,19 +130,19 @@ static inline void ct_write_unlock_bh(unsigned key) | |||
133 | /* | 130 | /* |
134 | * Returns hash value for IPVS connection entry | 131 | * Returns hash value for IPVS connection entry |
135 | */ | 132 | */ |
136 | static unsigned int ip_vs_conn_hashkey(int af, unsigned proto, | 133 | static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned proto, |
137 | const union nf_inet_addr *addr, | 134 | const union nf_inet_addr *addr, |
138 | __be16 port) | 135 | __be16 port) |
139 | { | 136 | { |
140 | #ifdef CONFIG_IP_VS_IPV6 | 137 | #ifdef CONFIG_IP_VS_IPV6 |
141 | if (af == AF_INET6) | 138 | if (af == AF_INET6) |
142 | return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), | 139 | return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), |
143 | (__force u32)port, proto, ip_vs_conn_rnd) | 140 | (__force u32)port, proto, ip_vs_conn_rnd) ^ |
144 | & ip_vs_conn_tab_mask; | 141 | ((size_t)net>>8)) & ip_vs_conn_tab_mask; |
145 | #endif | 142 | #endif |
146 | return jhash_3words((__force u32)addr->ip, (__force u32)port, proto, | 143 | return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto, |
147 | ip_vs_conn_rnd) | 144 | ip_vs_conn_rnd) ^ |
148 | & ip_vs_conn_tab_mask; | 145 | ((size_t)net>>8)) & ip_vs_conn_tab_mask; |
149 | } | 146 | } |
150 | 147 | ||
151 | static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, | 148 | static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, |
@@ -166,18 +163,18 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, | |||
166 | port = p->vport; | 163 | port = p->vport; |
167 | } | 164 | } |
168 | 165 | ||
169 | return ip_vs_conn_hashkey(p->af, p->protocol, addr, port); | 166 | return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port); |
170 | } | 167 | } |
171 | 168 | ||
172 | static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) | 169 | static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) |
173 | { | 170 | { |
174 | struct ip_vs_conn_param p; | 171 | struct ip_vs_conn_param p; |
175 | 172 | ||
176 | ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport, | 173 | ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol, |
177 | NULL, 0, &p); | 174 | &cp->caddr, cp->cport, NULL, 0, &p); |
178 | 175 | ||
179 | if (cp->dest && cp->dest->svc->pe) { | 176 | if (cp->pe) { |
180 | p.pe = cp->dest->svc->pe; | 177 | p.pe = cp->pe; |
181 | p.pe_data = cp->pe_data; | 178 | p.pe_data = cp->pe_data; |
182 | p.pe_data_len = cp->pe_data_len; | 179 | p.pe_data_len = cp->pe_data_len; |
183 | } | 180 | } |
@@ -186,7 +183,7 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) | |||
186 | } | 183 | } |
187 | 184 | ||
188 | /* | 185 | /* |
189 | * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port. | 186 | * Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port. |
190 | * returns bool success. | 187 | * returns bool success. |
191 | */ | 188 | */ |
192 | static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) | 189 | static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) |
@@ -269,11 +266,12 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) | |||
269 | 266 | ||
270 | list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { | 267 | list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { |
271 | if (cp->af == p->af && | 268 | if (cp->af == p->af && |
269 | p->cport == cp->cport && p->vport == cp->vport && | ||
272 | ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && | 270 | ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && |
273 | ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && | 271 | ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && |
274 | p->cport == cp->cport && p->vport == cp->vport && | ||
275 | ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && | 272 | ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && |
276 | p->protocol == cp->protocol) { | 273 | p->protocol == cp->protocol && |
274 | ip_vs_conn_net_eq(cp, p->net)) { | ||
277 | /* HIT */ | 275 | /* HIT */ |
278 | atomic_inc(&cp->refcnt); | 276 | atomic_inc(&cp->refcnt); |
279 | ct_read_unlock(hash); | 277 | ct_read_unlock(hash); |
@@ -313,23 +311,23 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, | |||
313 | struct ip_vs_conn_param *p) | 311 | struct ip_vs_conn_param *p) |
314 | { | 312 | { |
315 | __be16 _ports[2], *pptr; | 313 | __be16 _ports[2], *pptr; |
314 | struct net *net = skb_net(skb); | ||
316 | 315 | ||
317 | pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); | 316 | pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); |
318 | if (pptr == NULL) | 317 | if (pptr == NULL) |
319 | return 1; | 318 | return 1; |
320 | 319 | ||
321 | if (likely(!inverse)) | 320 | if (likely(!inverse)) |
322 | ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0], | 321 | ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr, |
323 | &iph->daddr, pptr[1], p); | 322 | pptr[0], &iph->daddr, pptr[1], p); |
324 | else | 323 | else |
325 | ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1], | 324 | ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr, |
326 | &iph->saddr, pptr[0], p); | 325 | pptr[1], &iph->saddr, pptr[0], p); |
327 | return 0; | 326 | return 0; |
328 | } | 327 | } |
329 | 328 | ||
330 | struct ip_vs_conn * | 329 | struct ip_vs_conn * |
331 | ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, | 330 | ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, |
332 | struct ip_vs_protocol *pp, | ||
333 | const struct ip_vs_iphdr *iph, | 331 | const struct ip_vs_iphdr *iph, |
334 | unsigned int proto_off, int inverse) | 332 | unsigned int proto_off, int inverse) |
335 | { | 333 | { |
@@ -353,8 +351,10 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) | |||
353 | ct_read_lock(hash); | 351 | ct_read_lock(hash); |
354 | 352 | ||
355 | list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { | 353 | list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { |
354 | if (!ip_vs_conn_net_eq(cp, p->net)) | ||
355 | continue; | ||
356 | if (p->pe_data && p->pe->ct_match) { | 356 | if (p->pe_data && p->pe->ct_match) { |
357 | if (p->pe->ct_match(p, cp)) | 357 | if (p->pe == cp->pe && p->pe->ct_match(p, cp)) |
358 | goto out; | 358 | goto out; |
359 | continue; | 359 | continue; |
360 | } | 360 | } |
@@ -404,10 +404,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) | |||
404 | 404 | ||
405 | list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { | 405 | list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { |
406 | if (cp->af == p->af && | 406 | if (cp->af == p->af && |
407 | p->vport == cp->cport && p->cport == cp->dport && | ||
407 | ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && | 408 | ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && |
408 | ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && | 409 | ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && |
409 | p->vport == cp->cport && p->cport == cp->dport && | 410 | p->protocol == cp->protocol && |
410 | p->protocol == cp->protocol) { | 411 | ip_vs_conn_net_eq(cp, p->net)) { |
411 | /* HIT */ | 412 | /* HIT */ |
412 | atomic_inc(&cp->refcnt); | 413 | atomic_inc(&cp->refcnt); |
413 | ret = cp; | 414 | ret = cp; |
@@ -428,7 +429,6 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) | |||
428 | 429 | ||
429 | struct ip_vs_conn * | 430 | struct ip_vs_conn * |
430 | ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, | 431 | ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, |
431 | struct ip_vs_protocol *pp, | ||
432 | const struct ip_vs_iphdr *iph, | 432 | const struct ip_vs_iphdr *iph, |
433 | unsigned int proto_off, int inverse) | 433 | unsigned int proto_off, int inverse) |
434 | { | 434 | { |
@@ -611,9 +611,9 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) | |||
611 | struct ip_vs_dest *dest; | 611 | struct ip_vs_dest *dest; |
612 | 612 | ||
613 | if ((cp) && (!cp->dest)) { | 613 | if ((cp) && (!cp->dest)) { |
614 | dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport, | 614 | dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr, |
615 | &cp->vaddr, cp->vport, | 615 | cp->dport, &cp->vaddr, cp->vport, |
616 | cp->protocol); | 616 | cp->protocol, cp->fwmark); |
617 | ip_vs_bind_dest(cp, dest); | 617 | ip_vs_bind_dest(cp, dest); |
618 | return dest; | 618 | return dest; |
619 | } else | 619 | } else |
@@ -686,13 +686,14 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) | |||
686 | int ip_vs_check_template(struct ip_vs_conn *ct) | 686 | int ip_vs_check_template(struct ip_vs_conn *ct) |
687 | { | 687 | { |
688 | struct ip_vs_dest *dest = ct->dest; | 688 | struct ip_vs_dest *dest = ct->dest; |
689 | struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct)); | ||
689 | 690 | ||
690 | /* | 691 | /* |
691 | * Checking the dest server status. | 692 | * Checking the dest server status. |
692 | */ | 693 | */ |
693 | if ((dest == NULL) || | 694 | if ((dest == NULL) || |
694 | !(dest->flags & IP_VS_DEST_F_AVAILABLE) || | 695 | !(dest->flags & IP_VS_DEST_F_AVAILABLE) || |
695 | (sysctl_ip_vs_expire_quiescent_template && | 696 | (ipvs->sysctl_expire_quiescent_template && |
696 | (atomic_read(&dest->weight) == 0))) { | 697 | (atomic_read(&dest->weight) == 0))) { |
697 | IP_VS_DBG_BUF(9, "check_template: dest not available for " | 698 | IP_VS_DBG_BUF(9, "check_template: dest not available for " |
698 | "protocol %s s:%s:%d v:%s:%d " | 699 | "protocol %s s:%s:%d v:%s:%d " |
@@ -730,6 +731,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct) | |||
730 | static void ip_vs_conn_expire(unsigned long data) | 731 | static void ip_vs_conn_expire(unsigned long data) |
731 | { | 732 | { |
732 | struct ip_vs_conn *cp = (struct ip_vs_conn *)data; | 733 | struct ip_vs_conn *cp = (struct ip_vs_conn *)data; |
734 | struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); | ||
733 | 735 | ||
734 | cp->timeout = 60*HZ; | 736 | cp->timeout = 60*HZ; |
735 | 737 | ||
@@ -765,13 +767,14 @@ static void ip_vs_conn_expire(unsigned long data) | |||
765 | if (cp->flags & IP_VS_CONN_F_NFCT) | 767 | if (cp->flags & IP_VS_CONN_F_NFCT) |
766 | ip_vs_conn_drop_conntrack(cp); | 768 | ip_vs_conn_drop_conntrack(cp); |
767 | 769 | ||
770 | ip_vs_pe_put(cp->pe); | ||
768 | kfree(cp->pe_data); | 771 | kfree(cp->pe_data); |
769 | if (unlikely(cp->app != NULL)) | 772 | if (unlikely(cp->app != NULL)) |
770 | ip_vs_unbind_app(cp); | 773 | ip_vs_unbind_app(cp); |
771 | ip_vs_unbind_dest(cp); | 774 | ip_vs_unbind_dest(cp); |
772 | if (cp->flags & IP_VS_CONN_F_NO_CPORT) | 775 | if (cp->flags & IP_VS_CONN_F_NO_CPORT) |
773 | atomic_dec(&ip_vs_conn_no_cport_cnt); | 776 | atomic_dec(&ip_vs_conn_no_cport_cnt); |
774 | atomic_dec(&ip_vs_conn_count); | 777 | atomic_dec(&ipvs->conn_count); |
775 | 778 | ||
776 | kmem_cache_free(ip_vs_conn_cachep, cp); | 779 | kmem_cache_free(ip_vs_conn_cachep, cp); |
777 | return; | 780 | return; |
@@ -802,10 +805,12 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp) | |||
802 | struct ip_vs_conn * | 805 | struct ip_vs_conn * |
803 | ip_vs_conn_new(const struct ip_vs_conn_param *p, | 806 | ip_vs_conn_new(const struct ip_vs_conn_param *p, |
804 | const union nf_inet_addr *daddr, __be16 dport, unsigned flags, | 807 | const union nf_inet_addr *daddr, __be16 dport, unsigned flags, |
805 | struct ip_vs_dest *dest) | 808 | struct ip_vs_dest *dest, __u32 fwmark) |
806 | { | 809 | { |
807 | struct ip_vs_conn *cp; | 810 | struct ip_vs_conn *cp; |
808 | struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol); | 811 | struct netns_ipvs *ipvs = net_ipvs(p->net); |
812 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net, | ||
813 | p->protocol); | ||
809 | 814 | ||
810 | cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); | 815 | cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); |
811 | if (cp == NULL) { | 816 | if (cp == NULL) { |
@@ -815,6 +820,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, | |||
815 | 820 | ||
816 | INIT_LIST_HEAD(&cp->c_list); | 821 | INIT_LIST_HEAD(&cp->c_list); |
817 | setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); | 822 | setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); |
823 | ip_vs_conn_net_set(cp, p->net); | ||
818 | cp->af = p->af; | 824 | cp->af = p->af; |
819 | cp->protocol = p->protocol; | 825 | cp->protocol = p->protocol; |
820 | ip_vs_addr_copy(p->af, &cp->caddr, p->caddr); | 826 | ip_vs_addr_copy(p->af, &cp->caddr, p->caddr); |
@@ -826,7 +832,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, | |||
826 | &cp->daddr, daddr); | 832 | &cp->daddr, daddr); |
827 | cp->dport = dport; | 833 | cp->dport = dport; |
828 | cp->flags = flags; | 834 | cp->flags = flags; |
829 | if (flags & IP_VS_CONN_F_TEMPLATE && p->pe_data) { | 835 | cp->fwmark = fwmark; |
836 | if (flags & IP_VS_CONN_F_TEMPLATE && p->pe) { | ||
837 | ip_vs_pe_get(p->pe); | ||
838 | cp->pe = p->pe; | ||
830 | cp->pe_data = p->pe_data; | 839 | cp->pe_data = p->pe_data; |
831 | cp->pe_data_len = p->pe_data_len; | 840 | cp->pe_data_len = p->pe_data_len; |
832 | } | 841 | } |
@@ -842,7 +851,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, | |||
842 | atomic_set(&cp->n_control, 0); | 851 | atomic_set(&cp->n_control, 0); |
843 | atomic_set(&cp->in_pkts, 0); | 852 | atomic_set(&cp->in_pkts, 0); |
844 | 853 | ||
845 | atomic_inc(&ip_vs_conn_count); | 854 | atomic_inc(&ipvs->conn_count); |
846 | if (flags & IP_VS_CONN_F_NO_CPORT) | 855 | if (flags & IP_VS_CONN_F_NO_CPORT) |
847 | atomic_inc(&ip_vs_conn_no_cport_cnt); | 856 | atomic_inc(&ip_vs_conn_no_cport_cnt); |
848 | 857 | ||
@@ -861,8 +870,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, | |||
861 | #endif | 870 | #endif |
862 | ip_vs_bind_xmit(cp); | 871 | ip_vs_bind_xmit(cp); |
863 | 872 | ||
864 | if (unlikely(pp && atomic_read(&pp->appcnt))) | 873 | if (unlikely(pd && atomic_read(&pd->appcnt))) |
865 | ip_vs_bind_app(cp, pp); | 874 | ip_vs_bind_app(cp, pd->pp); |
866 | 875 | ||
867 | /* | 876 | /* |
868 | * Allow conntrack to be preserved. By default, conntrack | 877 | * Allow conntrack to be preserved. By default, conntrack |
@@ -871,7 +880,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, | |||
871 | * IP_VS_CONN_F_ONE_PACKET too. | 880 | * IP_VS_CONN_F_ONE_PACKET too. |
872 | */ | 881 | */ |
873 | 882 | ||
874 | if (ip_vs_conntrack_enabled()) | 883 | if (ip_vs_conntrack_enabled(ipvs)) |
875 | cp->flags |= IP_VS_CONN_F_NFCT; | 884 | cp->flags |= IP_VS_CONN_F_NFCT; |
876 | 885 | ||
877 | /* Hash it in the ip_vs_conn_tab finally */ | 886 | /* Hash it in the ip_vs_conn_tab finally */ |
@@ -884,17 +893,22 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, | |||
884 | * /proc/net/ip_vs_conn entries | 893 | * /proc/net/ip_vs_conn entries |
885 | */ | 894 | */ |
886 | #ifdef CONFIG_PROC_FS | 895 | #ifdef CONFIG_PROC_FS |
896 | struct ip_vs_iter_state { | ||
897 | struct seq_net_private p; | ||
898 | struct list_head *l; | ||
899 | }; | ||
887 | 900 | ||
888 | static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) | 901 | static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) |
889 | { | 902 | { |
890 | int idx; | 903 | int idx; |
891 | struct ip_vs_conn *cp; | 904 | struct ip_vs_conn *cp; |
905 | struct ip_vs_iter_state *iter = seq->private; | ||
892 | 906 | ||
893 | for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { | 907 | for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { |
894 | ct_read_lock_bh(idx); | 908 | ct_read_lock_bh(idx); |
895 | list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { | 909 | list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { |
896 | if (pos-- == 0) { | 910 | if (pos-- == 0) { |
897 | seq->private = &ip_vs_conn_tab[idx]; | 911 | iter->l = &ip_vs_conn_tab[idx]; |
898 | return cp; | 912 | return cp; |
899 | } | 913 | } |
900 | } | 914 | } |
@@ -906,14 +920,17 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) | |||
906 | 920 | ||
907 | static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos) | 921 | static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos) |
908 | { | 922 | { |
909 | seq->private = NULL; | 923 | struct ip_vs_iter_state *iter = seq->private; |
924 | |||
925 | iter->l = NULL; | ||
910 | return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN; | 926 | return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN; |
911 | } | 927 | } |
912 | 928 | ||
913 | static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 929 | static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
914 | { | 930 | { |
915 | struct ip_vs_conn *cp = v; | 931 | struct ip_vs_conn *cp = v; |
916 | struct list_head *e, *l = seq->private; | 932 | struct ip_vs_iter_state *iter = seq->private; |
933 | struct list_head *e, *l = iter->l; | ||
917 | int idx; | 934 | int idx; |
918 | 935 | ||
919 | ++*pos; | 936 | ++*pos; |
@@ -930,18 +947,19 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
930 | while (++idx < ip_vs_conn_tab_size) { | 947 | while (++idx < ip_vs_conn_tab_size) { |
931 | ct_read_lock_bh(idx); | 948 | ct_read_lock_bh(idx); |
932 | list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { | 949 | list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { |
933 | seq->private = &ip_vs_conn_tab[idx]; | 950 | iter->l = &ip_vs_conn_tab[idx]; |
934 | return cp; | 951 | return cp; |
935 | } | 952 | } |
936 | ct_read_unlock_bh(idx); | 953 | ct_read_unlock_bh(idx); |
937 | } | 954 | } |
938 | seq->private = NULL; | 955 | iter->l = NULL; |
939 | return NULL; | 956 | return NULL; |
940 | } | 957 | } |
941 | 958 | ||
942 | static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v) | 959 | static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v) |
943 | { | 960 | { |
944 | struct list_head *l = seq->private; | 961 | struct ip_vs_iter_state *iter = seq->private; |
962 | struct list_head *l = iter->l; | ||
945 | 963 | ||
946 | if (l) | 964 | if (l) |
947 | ct_read_unlock_bh(l - ip_vs_conn_tab); | 965 | ct_read_unlock_bh(l - ip_vs_conn_tab); |
@@ -955,18 +973,19 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) | |||
955 | "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n"); | 973 | "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n"); |
956 | else { | 974 | else { |
957 | const struct ip_vs_conn *cp = v; | 975 | const struct ip_vs_conn *cp = v; |
976 | struct net *net = seq_file_net(seq); | ||
958 | char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3]; | 977 | char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3]; |
959 | size_t len = 0; | 978 | size_t len = 0; |
960 | 979 | ||
961 | if (cp->dest && cp->pe_data && | 980 | if (!ip_vs_conn_net_eq(cp, net)) |
962 | cp->dest->svc->pe->show_pe_data) { | 981 | return 0; |
982 | if (cp->pe_data) { | ||
963 | pe_data[0] = ' '; | 983 | pe_data[0] = ' '; |
964 | len = strlen(cp->dest->svc->pe->name); | 984 | len = strlen(cp->pe->name); |
965 | memcpy(pe_data + 1, cp->dest->svc->pe->name, len); | 985 | memcpy(pe_data + 1, cp->pe->name, len); |
966 | pe_data[len + 1] = ' '; | 986 | pe_data[len + 1] = ' '; |
967 | len += 2; | 987 | len += 2; |
968 | len += cp->dest->svc->pe->show_pe_data(cp, | 988 | len += cp->pe->show_pe_data(cp, pe_data + len); |
969 | pe_data + len); | ||
970 | } | 989 | } |
971 | pe_data[len] = '\0'; | 990 | pe_data[len] = '\0'; |
972 | 991 | ||
@@ -1004,7 +1023,8 @@ static const struct seq_operations ip_vs_conn_seq_ops = { | |||
1004 | 1023 | ||
1005 | static int ip_vs_conn_open(struct inode *inode, struct file *file) | 1024 | static int ip_vs_conn_open(struct inode *inode, struct file *file) |
1006 | { | 1025 | { |
1007 | return seq_open(file, &ip_vs_conn_seq_ops); | 1026 | return seq_open_net(inode, file, &ip_vs_conn_seq_ops, |
1027 | sizeof(struct ip_vs_iter_state)); | ||
1008 | } | 1028 | } |
1009 | 1029 | ||
1010 | static const struct file_operations ip_vs_conn_fops = { | 1030 | static const struct file_operations ip_vs_conn_fops = { |
@@ -1031,6 +1051,10 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v) | |||
1031 | "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n"); | 1051 | "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n"); |
1032 | else { | 1052 | else { |
1033 | const struct ip_vs_conn *cp = v; | 1053 | const struct ip_vs_conn *cp = v; |
1054 | struct net *net = seq_file_net(seq); | ||
1055 | |||
1056 | if (!ip_vs_conn_net_eq(cp, net)) | ||
1057 | return 0; | ||
1034 | 1058 | ||
1035 | #ifdef CONFIG_IP_VS_IPV6 | 1059 | #ifdef CONFIG_IP_VS_IPV6 |
1036 | if (cp->af == AF_INET6) | 1060 | if (cp->af == AF_INET6) |
@@ -1067,7 +1091,8 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = { | |||
1067 | 1091 | ||
1068 | static int ip_vs_conn_sync_open(struct inode *inode, struct file *file) | 1092 | static int ip_vs_conn_sync_open(struct inode *inode, struct file *file) |
1069 | { | 1093 | { |
1070 | return seq_open(file, &ip_vs_conn_sync_seq_ops); | 1094 | return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops, |
1095 | sizeof(struct ip_vs_iter_state)); | ||
1071 | } | 1096 | } |
1072 | 1097 | ||
1073 | static const struct file_operations ip_vs_conn_sync_fops = { | 1098 | static const struct file_operations ip_vs_conn_sync_fops = { |
@@ -1113,7 +1138,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp) | |||
1113 | } | 1138 | } |
1114 | 1139 | ||
1115 | /* Called from keventd and must protect itself from softirqs */ | 1140 | /* Called from keventd and must protect itself from softirqs */ |
1116 | void ip_vs_random_dropentry(void) | 1141 | void ip_vs_random_dropentry(struct net *net) |
1117 | { | 1142 | { |
1118 | int idx; | 1143 | int idx; |
1119 | struct ip_vs_conn *cp; | 1144 | struct ip_vs_conn *cp; |
@@ -1133,7 +1158,8 @@ void ip_vs_random_dropentry(void) | |||
1133 | if (cp->flags & IP_VS_CONN_F_TEMPLATE) | 1158 | if (cp->flags & IP_VS_CONN_F_TEMPLATE) |
1134 | /* connection template */ | 1159 | /* connection template */ |
1135 | continue; | 1160 | continue; |
1136 | 1161 | if (!ip_vs_conn_net_eq(cp, net)) | |
1162 | continue; | ||
1137 | if (cp->protocol == IPPROTO_TCP) { | 1163 | if (cp->protocol == IPPROTO_TCP) { |
1138 | switch(cp->state) { | 1164 | switch(cp->state) { |
1139 | case IP_VS_TCP_S_SYN_RECV: | 1165 | case IP_VS_TCP_S_SYN_RECV: |
@@ -1168,12 +1194,13 @@ void ip_vs_random_dropentry(void) | |||
1168 | /* | 1194 | /* |
1169 | * Flush all the connection entries in the ip_vs_conn_tab | 1195 | * Flush all the connection entries in the ip_vs_conn_tab |
1170 | */ | 1196 | */ |
1171 | static void ip_vs_conn_flush(void) | 1197 | static void ip_vs_conn_flush(struct net *net) |
1172 | { | 1198 | { |
1173 | int idx; | 1199 | int idx; |
1174 | struct ip_vs_conn *cp; | 1200 | struct ip_vs_conn *cp; |
1201 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
1175 | 1202 | ||
1176 | flush_again: | 1203 | flush_again: |
1177 | for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { | 1204 | for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { |
1178 | /* | 1205 | /* |
1179 | * Lock is actually needed in this loop. | 1206 | * Lock is actually needed in this loop. |
@@ -1181,7 +1208,8 @@ static void ip_vs_conn_flush(void) | |||
1181 | ct_write_lock_bh(idx); | 1208 | ct_write_lock_bh(idx); |
1182 | 1209 | ||
1183 | list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { | 1210 | list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { |
1184 | 1211 | if (!ip_vs_conn_net_eq(cp, net)) | |
1212 | continue; | ||
1185 | IP_VS_DBG(4, "del connection\n"); | 1213 | IP_VS_DBG(4, "del connection\n"); |
1186 | ip_vs_conn_expire_now(cp); | 1214 | ip_vs_conn_expire_now(cp); |
1187 | if (cp->control) { | 1215 | if (cp->control) { |
@@ -1194,16 +1222,41 @@ static void ip_vs_conn_flush(void) | |||
1194 | 1222 | ||
1195 | /* the counter may be not NULL, because maybe some conn entries | 1223 | /* the counter may be not NULL, because maybe some conn entries |
1196 | are run by slow timer handler or unhashed but still referred */ | 1224 | are run by slow timer handler or unhashed but still referred */ |
1197 | if (atomic_read(&ip_vs_conn_count) != 0) { | 1225 | if (atomic_read(&ipvs->conn_count) != 0) { |
1198 | schedule(); | 1226 | schedule(); |
1199 | goto flush_again; | 1227 | goto flush_again; |
1200 | } | 1228 | } |
1201 | } | 1229 | } |
1230 | /* | ||
1231 | * per netns init and exit | ||
1232 | */ | ||
1233 | int __net_init __ip_vs_conn_init(struct net *net) | ||
1234 | { | ||
1235 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
1236 | |||
1237 | atomic_set(&ipvs->conn_count, 0); | ||
1238 | |||
1239 | proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops); | ||
1240 | proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops); | ||
1241 | return 0; | ||
1242 | } | ||
1202 | 1243 | ||
1244 | static void __net_exit __ip_vs_conn_cleanup(struct net *net) | ||
1245 | { | ||
1246 | /* flush all the connection entries first */ | ||
1247 | ip_vs_conn_flush(net); | ||
1248 | proc_net_remove(net, "ip_vs_conn"); | ||
1249 | proc_net_remove(net, "ip_vs_conn_sync"); | ||
1250 | } | ||
1251 | static struct pernet_operations ipvs_conn_ops = { | ||
1252 | .init = __ip_vs_conn_init, | ||
1253 | .exit = __ip_vs_conn_cleanup, | ||
1254 | }; | ||
1203 | 1255 | ||
1204 | int __init ip_vs_conn_init(void) | 1256 | int __init ip_vs_conn_init(void) |
1205 | { | 1257 | { |
1206 | int idx; | 1258 | int idx; |
1259 | int retc; | ||
1207 | 1260 | ||
1208 | /* Compute size and mask */ | 1261 | /* Compute size and mask */ |
1209 | ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits; | 1262 | ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits; |
@@ -1241,24 +1294,18 @@ int __init ip_vs_conn_init(void) | |||
1241 | rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); | 1294 | rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); |
1242 | } | 1295 | } |
1243 | 1296 | ||
1244 | proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops); | 1297 | retc = register_pernet_subsys(&ipvs_conn_ops); |
1245 | proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops); | ||
1246 | 1298 | ||
1247 | /* calculate the random value for connection hash */ | 1299 | /* calculate the random value for connection hash */ |
1248 | get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); | 1300 | get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); |
1249 | 1301 | ||
1250 | return 0; | 1302 | return retc; |
1251 | } | 1303 | } |
1252 | 1304 | ||
1253 | |||
1254 | void ip_vs_conn_cleanup(void) | 1305 | void ip_vs_conn_cleanup(void) |
1255 | { | 1306 | { |
1256 | /* flush all the connection entries first */ | 1307 | unregister_pernet_subsys(&ipvs_conn_ops); |
1257 | ip_vs_conn_flush(); | ||
1258 | |||
1259 | /* Release the empty cache */ | 1308 | /* Release the empty cache */ |
1260 | kmem_cache_destroy(ip_vs_conn_cachep); | 1309 | kmem_cache_destroy(ip_vs_conn_cachep); |
1261 | proc_net_remove(&init_net, "ip_vs_conn"); | ||
1262 | proc_net_remove(&init_net, "ip_vs_conn_sync"); | ||
1263 | vfree(ip_vs_conn_tab); | 1310 | vfree(ip_vs_conn_tab); |
1264 | } | 1311 | } |
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index b4e51e9c5a04..f36a84f33efb 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <net/icmp.h> /* for icmp_send */ | 41 | #include <net/icmp.h> /* for icmp_send */ |
42 | #include <net/route.h> | 42 | #include <net/route.h> |
43 | #include <net/ip6_checksum.h> | 43 | #include <net/ip6_checksum.h> |
44 | #include <net/netns/generic.h> /* net_generic() */ | ||
44 | 45 | ||
45 | #include <linux/netfilter.h> | 46 | #include <linux/netfilter.h> |
46 | #include <linux/netfilter_ipv4.h> | 47 | #include <linux/netfilter_ipv4.h> |
@@ -68,6 +69,12 @@ EXPORT_SYMBOL(ip_vs_conn_put); | |||
68 | EXPORT_SYMBOL(ip_vs_get_debug_level); | 69 | EXPORT_SYMBOL(ip_vs_get_debug_level); |
69 | #endif | 70 | #endif |
70 | 71 | ||
72 | int ip_vs_net_id __read_mostly; | ||
73 | #ifdef IP_VS_GENERIC_NETNS | ||
74 | EXPORT_SYMBOL(ip_vs_net_id); | ||
75 | #endif | ||
76 | /* netns cnt used for uniqueness */ | ||
77 | static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0); | ||
71 | 78 | ||
72 | /* ID used in ICMP lookups */ | 79 | /* ID used in ICMP lookups */ |
73 | #define icmp_id(icmph) (((icmph)->un).echo.id) | 80 | #define icmp_id(icmph) (((icmph)->un).echo.id) |
@@ -108,21 +115,28 @@ static inline void | |||
108 | ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) | 115 | ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) |
109 | { | 116 | { |
110 | struct ip_vs_dest *dest = cp->dest; | 117 | struct ip_vs_dest *dest = cp->dest; |
118 | struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); | ||
119 | |||
111 | if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { | 120 | if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { |
112 | spin_lock(&dest->stats.lock); | 121 | struct ip_vs_cpu_stats *s; |
113 | dest->stats.ustats.inpkts++; | 122 | |
114 | dest->stats.ustats.inbytes += skb->len; | 123 | s = this_cpu_ptr(dest->stats.cpustats); |
115 | spin_unlock(&dest->stats.lock); | 124 | s->ustats.inpkts++; |
116 | 125 | u64_stats_update_begin(&s->syncp); | |
117 | spin_lock(&dest->svc->stats.lock); | 126 | s->ustats.inbytes += skb->len; |
118 | dest->svc->stats.ustats.inpkts++; | 127 | u64_stats_update_end(&s->syncp); |
119 | dest->svc->stats.ustats.inbytes += skb->len; | 128 | |
120 | spin_unlock(&dest->svc->stats.lock); | 129 | s = this_cpu_ptr(dest->svc->stats.cpustats); |
121 | 130 | s->ustats.inpkts++; | |
122 | spin_lock(&ip_vs_stats.lock); | 131 | u64_stats_update_begin(&s->syncp); |
123 | ip_vs_stats.ustats.inpkts++; | 132 | s->ustats.inbytes += skb->len; |
124 | ip_vs_stats.ustats.inbytes += skb->len; | 133 | u64_stats_update_end(&s->syncp); |
125 | spin_unlock(&ip_vs_stats.lock); | 134 | |
135 | s = this_cpu_ptr(ipvs->cpustats); | ||
136 | s->ustats.inpkts++; | ||
137 | u64_stats_update_begin(&s->syncp); | ||
138 | s->ustats.inbytes += skb->len; | ||
139 | u64_stats_update_end(&s->syncp); | ||
126 | } | 140 | } |
127 | } | 141 | } |
128 | 142 | ||
@@ -131,21 +145,28 @@ static inline void | |||
131 | ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) | 145 | ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) |
132 | { | 146 | { |
133 | struct ip_vs_dest *dest = cp->dest; | 147 | struct ip_vs_dest *dest = cp->dest; |
148 | struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); | ||
149 | |||
134 | if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { | 150 | if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { |
135 | spin_lock(&dest->stats.lock); | 151 | struct ip_vs_cpu_stats *s; |
136 | dest->stats.ustats.outpkts++; | 152 | |
137 | dest->stats.ustats.outbytes += skb->len; | 153 | s = this_cpu_ptr(dest->stats.cpustats); |
138 | spin_unlock(&dest->stats.lock); | 154 | s->ustats.outpkts++; |
139 | 155 | u64_stats_update_begin(&s->syncp); | |
140 | spin_lock(&dest->svc->stats.lock); | 156 | s->ustats.outbytes += skb->len; |
141 | dest->svc->stats.ustats.outpkts++; | 157 | u64_stats_update_end(&s->syncp); |
142 | dest->svc->stats.ustats.outbytes += skb->len; | 158 | |
143 | spin_unlock(&dest->svc->stats.lock); | 159 | s = this_cpu_ptr(dest->svc->stats.cpustats); |
144 | 160 | s->ustats.outpkts++; | |
145 | spin_lock(&ip_vs_stats.lock); | 161 | u64_stats_update_begin(&s->syncp); |
146 | ip_vs_stats.ustats.outpkts++; | 162 | s->ustats.outbytes += skb->len; |
147 | ip_vs_stats.ustats.outbytes += skb->len; | 163 | u64_stats_update_end(&s->syncp); |
148 | spin_unlock(&ip_vs_stats.lock); | 164 | |
165 | s = this_cpu_ptr(ipvs->cpustats); | ||
166 | s->ustats.outpkts++; | ||
167 | u64_stats_update_begin(&s->syncp); | ||
168 | s->ustats.outbytes += skb->len; | ||
169 | u64_stats_update_end(&s->syncp); | ||
149 | } | 170 | } |
150 | } | 171 | } |
151 | 172 | ||
@@ -153,41 +174,44 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) | |||
153 | static inline void | 174 | static inline void |
154 | ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) | 175 | ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) |
155 | { | 176 | { |
156 | spin_lock(&cp->dest->stats.lock); | 177 | struct netns_ipvs *ipvs = net_ipvs(svc->net); |
157 | cp->dest->stats.ustats.conns++; | 178 | struct ip_vs_cpu_stats *s; |
158 | spin_unlock(&cp->dest->stats.lock); | 179 | |
180 | s = this_cpu_ptr(cp->dest->stats.cpustats); | ||
181 | s->ustats.conns++; | ||
159 | 182 | ||
160 | spin_lock(&svc->stats.lock); | 183 | s = this_cpu_ptr(svc->stats.cpustats); |
161 | svc->stats.ustats.conns++; | 184 | s->ustats.conns++; |
162 | spin_unlock(&svc->stats.lock); | ||
163 | 185 | ||
164 | spin_lock(&ip_vs_stats.lock); | 186 | s = this_cpu_ptr(ipvs->cpustats); |
165 | ip_vs_stats.ustats.conns++; | 187 | s->ustats.conns++; |
166 | spin_unlock(&ip_vs_stats.lock); | ||
167 | } | 188 | } |
168 | 189 | ||
169 | 190 | ||
170 | static inline int | 191 | static inline int |
171 | ip_vs_set_state(struct ip_vs_conn *cp, int direction, | 192 | ip_vs_set_state(struct ip_vs_conn *cp, int direction, |
172 | const struct sk_buff *skb, | 193 | const struct sk_buff *skb, |
173 | struct ip_vs_protocol *pp) | 194 | struct ip_vs_proto_data *pd) |
174 | { | 195 | { |
175 | if (unlikely(!pp->state_transition)) | 196 | if (unlikely(!pd->pp->state_transition)) |
176 | return 0; | 197 | return 0; |
177 | return pp->state_transition(cp, direction, skb, pp); | 198 | return pd->pp->state_transition(cp, direction, skb, pd); |
178 | } | 199 | } |
179 | 200 | ||
180 | static inline void | 201 | static inline int |
181 | ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, | 202 | ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, |
182 | struct sk_buff *skb, int protocol, | 203 | struct sk_buff *skb, int protocol, |
183 | const union nf_inet_addr *caddr, __be16 cport, | 204 | const union nf_inet_addr *caddr, __be16 cport, |
184 | const union nf_inet_addr *vaddr, __be16 vport, | 205 | const union nf_inet_addr *vaddr, __be16 vport, |
185 | struct ip_vs_conn_param *p) | 206 | struct ip_vs_conn_param *p) |
186 | { | 207 | { |
187 | ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p); | 208 | ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr, |
209 | vport, p); | ||
188 | p->pe = svc->pe; | 210 | p->pe = svc->pe; |
189 | if (p->pe && p->pe->fill_param) | 211 | if (p->pe && p->pe->fill_param) |
190 | p->pe->fill_param(p, skb); | 212 | return p->pe->fill_param(p, skb); |
213 | |||
214 | return 0; | ||
191 | } | 215 | } |
192 | 216 | ||
193 | /* | 217 | /* |
@@ -200,7 +224,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, | |||
200 | static struct ip_vs_conn * | 224 | static struct ip_vs_conn * |
201 | ip_vs_sched_persist(struct ip_vs_service *svc, | 225 | ip_vs_sched_persist(struct ip_vs_service *svc, |
202 | struct sk_buff *skb, | 226 | struct sk_buff *skb, |
203 | __be16 ports[2]) | 227 | __be16 src_port, __be16 dst_port, int *ignored) |
204 | { | 228 | { |
205 | struct ip_vs_conn *cp = NULL; | 229 | struct ip_vs_conn *cp = NULL; |
206 | struct ip_vs_iphdr iph; | 230 | struct ip_vs_iphdr iph; |
@@ -224,8 +248,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
224 | 248 | ||
225 | IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " | 249 | IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " |
226 | "mnet %s\n", | 250 | "mnet %s\n", |
227 | IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]), | 251 | IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port), |
228 | IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]), | 252 | IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port), |
229 | IP_VS_DBG_ADDR(svc->af, &snet)); | 253 | IP_VS_DBG_ADDR(svc->af, &snet)); |
230 | 254 | ||
231 | /* | 255 | /* |
@@ -247,14 +271,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
247 | const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; | 271 | const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; |
248 | __be16 vport = 0; | 272 | __be16 vport = 0; |
249 | 273 | ||
250 | if (ports[1] == svc->port) { | 274 | if (dst_port == svc->port) { |
251 | /* non-FTP template: | 275 | /* non-FTP template: |
252 | * <protocol, caddr, 0, vaddr, vport, daddr, dport> | 276 | * <protocol, caddr, 0, vaddr, vport, daddr, dport> |
253 | * FTP template: | 277 | * FTP template: |
254 | * <protocol, caddr, 0, vaddr, 0, daddr, 0> | 278 | * <protocol, caddr, 0, vaddr, 0, daddr, 0> |
255 | */ | 279 | */ |
256 | if (svc->port != FTPPORT) | 280 | if (svc->port != FTPPORT) |
257 | vport = ports[1]; | 281 | vport = dst_port; |
258 | } else { | 282 | } else { |
259 | /* Note: persistent fwmark-based services and | 283 | /* Note: persistent fwmark-based services and |
260 | * persistent port zero service are handled here. | 284 | * persistent port zero service are handled here. |
@@ -268,24 +292,31 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
268 | vaddr = &fwmark; | 292 | vaddr = &fwmark; |
269 | } | 293 | } |
270 | } | 294 | } |
271 | ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0, | 295 | /* return *ignored = -1 so NF_DROP can be used */ |
272 | vaddr, vport, ¶m); | 296 | if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0, |
297 | vaddr, vport, ¶m) < 0) { | ||
298 | *ignored = -1; | ||
299 | return NULL; | ||
300 | } | ||
273 | } | 301 | } |
274 | 302 | ||
275 | /* Check if a template already exists */ | 303 | /* Check if a template already exists */ |
276 | ct = ip_vs_ct_in_get(¶m); | 304 | ct = ip_vs_ct_in_get(¶m); |
277 | if (!ct || !ip_vs_check_template(ct)) { | 305 | if (!ct || !ip_vs_check_template(ct)) { |
278 | /* No template found or the dest of the connection | 306 | /* |
307 | * No template found or the dest of the connection | ||
279 | * template is not available. | 308 | * template is not available. |
309 | * return *ignored=0 i.e. ICMP and NF_DROP | ||
280 | */ | 310 | */ |
281 | dest = svc->scheduler->schedule(svc, skb); | 311 | dest = svc->scheduler->schedule(svc, skb); |
282 | if (!dest) { | 312 | if (!dest) { |
283 | IP_VS_DBG(1, "p-schedule: no dest found.\n"); | 313 | IP_VS_DBG(1, "p-schedule: no dest found.\n"); |
284 | kfree(param.pe_data); | 314 | kfree(param.pe_data); |
315 | *ignored = 0; | ||
285 | return NULL; | 316 | return NULL; |
286 | } | 317 | } |
287 | 318 | ||
288 | if (ports[1] == svc->port && svc->port != FTPPORT) | 319 | if (dst_port == svc->port && svc->port != FTPPORT) |
289 | dport = dest->port; | 320 | dport = dest->port; |
290 | 321 | ||
291 | /* Create a template | 322 | /* Create a template |
@@ -293,9 +324,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
293 | * and thus param.pe_data will be destroyed | 324 | * and thus param.pe_data will be destroyed |
294 | * when the template expires */ | 325 | * when the template expires */ |
295 | ct = ip_vs_conn_new(¶m, &dest->addr, dport, | 326 | ct = ip_vs_conn_new(¶m, &dest->addr, dport, |
296 | IP_VS_CONN_F_TEMPLATE, dest); | 327 | IP_VS_CONN_F_TEMPLATE, dest, skb->mark); |
297 | if (ct == NULL) { | 328 | if (ct == NULL) { |
298 | kfree(param.pe_data); | 329 | kfree(param.pe_data); |
330 | *ignored = -1; | ||
299 | return NULL; | 331 | return NULL; |
300 | } | 332 | } |
301 | 333 | ||
@@ -306,7 +338,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
306 | kfree(param.pe_data); | 338 | kfree(param.pe_data); |
307 | } | 339 | } |
308 | 340 | ||
309 | dport = ports[1]; | 341 | dport = dst_port; |
310 | if (dport == svc->port && dest->port) | 342 | if (dport == svc->port && dest->port) |
311 | dport = dest->port; | 343 | dport = dest->port; |
312 | 344 | ||
@@ -317,11 +349,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
317 | /* | 349 | /* |
318 | * Create a new connection according to the template | 350 | * Create a new connection according to the template |
319 | */ | 351 | */ |
320 | ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0], | 352 | ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr, |
321 | &iph.daddr, ports[1], ¶m); | 353 | src_port, &iph.daddr, dst_port, ¶m); |
322 | cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest); | 354 | |
355 | cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest, skb->mark); | ||
323 | if (cp == NULL) { | 356 | if (cp == NULL) { |
324 | ip_vs_conn_put(ct); | 357 | ip_vs_conn_put(ct); |
358 | *ignored = -1; | ||
325 | return NULL; | 359 | return NULL; |
326 | } | 360 | } |
327 | 361 | ||
@@ -341,11 +375,27 @@ ip_vs_sched_persist(struct ip_vs_service *svc, | |||
341 | * It selects a server according to the virtual service, and | 375 | * It selects a server according to the virtual service, and |
342 | * creates a connection entry. | 376 | * creates a connection entry. |
343 | * Protocols supported: TCP, UDP | 377 | * Protocols supported: TCP, UDP |
378 | * | ||
379 | * Usage of *ignored | ||
380 | * | ||
381 | * 1 : protocol tried to schedule (eg. on SYN), found svc but the | ||
382 | * svc/scheduler decides that this packet should be accepted with | ||
383 | * NF_ACCEPT because it must not be scheduled. | ||
384 | * | ||
385 | * 0 : scheduler can not find destination, so try bypass or | ||
386 | * return ICMP and then NF_DROP (ip_vs_leave). | ||
387 | * | ||
388 | * -1 : scheduler tried to schedule but fatal error occurred, eg. | ||
389 | * ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param | ||
390 | * failure such as missing Call-ID, ENOMEM on skb_linearize | ||
391 | * or pe_data. In this case we should return NF_DROP without | ||
392 | * any attempts to send ICMP with ip_vs_leave. | ||
344 | */ | 393 | */ |
345 | struct ip_vs_conn * | 394 | struct ip_vs_conn * |
346 | ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, | 395 | ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, |
347 | struct ip_vs_protocol *pp, int *ignored) | 396 | struct ip_vs_proto_data *pd, int *ignored) |
348 | { | 397 | { |
398 | struct ip_vs_protocol *pp = pd->pp; | ||
349 | struct ip_vs_conn *cp = NULL; | 399 | struct ip_vs_conn *cp = NULL; |
350 | struct ip_vs_iphdr iph; | 400 | struct ip_vs_iphdr iph; |
351 | struct ip_vs_dest *dest; | 401 | struct ip_vs_dest *dest; |
@@ -371,12 +421,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, | |||
371 | } | 421 | } |
372 | 422 | ||
373 | /* | 423 | /* |
374 | * Do not schedule replies from local real server. It is risky | 424 | * Do not schedule replies from local real server. |
375 | * for fwmark services but mostly for persistent services. | ||
376 | */ | 425 | */ |
377 | if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && | 426 | if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && |
378 | (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) && | 427 | (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) { |
379 | (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) { | ||
380 | IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, | 428 | IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, |
381 | "Not scheduling reply for existing connection"); | 429 | "Not scheduling reply for existing connection"); |
382 | __ip_vs_conn_put(cp); | 430 | __ip_vs_conn_put(cp); |
@@ -386,10 +434,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, | |||
386 | /* | 434 | /* |
387 | * Persistent service | 435 | * Persistent service |
388 | */ | 436 | */ |
389 | if (svc->flags & IP_VS_SVC_F_PERSISTENT) { | 437 | if (svc->flags & IP_VS_SVC_F_PERSISTENT) |
390 | *ignored = 0; | 438 | return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored); |
391 | return ip_vs_sched_persist(svc, skb, pptr); | 439 | |
392 | } | 440 | *ignored = 0; |
393 | 441 | ||
394 | /* | 442 | /* |
395 | * Non-persistent service | 443 | * Non-persistent service |
@@ -402,8 +450,6 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, | |||
402 | return NULL; | 450 | return NULL; |
403 | } | 451 | } |
404 | 452 | ||
405 | *ignored = 0; | ||
406 | |||
407 | dest = svc->scheduler->schedule(svc, skb); | 453 | dest = svc->scheduler->schedule(svc, skb); |
408 | if (dest == NULL) { | 454 | if (dest == NULL) { |
409 | IP_VS_DBG(1, "Schedule: no dest found.\n"); | 455 | IP_VS_DBG(1, "Schedule: no dest found.\n"); |
@@ -419,13 +465,17 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, | |||
419 | */ | 465 | */ |
420 | { | 466 | { |
421 | struct ip_vs_conn_param p; | 467 | struct ip_vs_conn_param p; |
422 | ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, | 468 | |
423 | pptr[0], &iph.daddr, pptr[1], &p); | 469 | ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, |
470 | &iph.saddr, pptr[0], &iph.daddr, pptr[1], | ||
471 | &p); | ||
424 | cp = ip_vs_conn_new(&p, &dest->addr, | 472 | cp = ip_vs_conn_new(&p, &dest->addr, |
425 | dest->port ? dest->port : pptr[1], | 473 | dest->port ? dest->port : pptr[1], |
426 | flags, dest); | 474 | flags, dest, skb->mark); |
427 | if (!cp) | 475 | if (!cp) { |
476 | *ignored = -1; | ||
428 | return NULL; | 477 | return NULL; |
478 | } | ||
429 | } | 479 | } |
430 | 480 | ||
431 | IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u " | 481 | IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u " |
@@ -447,11 +497,14 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, | |||
447 | * no destination is available for a new connection. | 497 | * no destination is available for a new connection. |
448 | */ | 498 | */ |
449 | int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, | 499 | int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, |
450 | struct ip_vs_protocol *pp) | 500 | struct ip_vs_proto_data *pd) |
451 | { | 501 | { |
502 | struct net *net; | ||
503 | struct netns_ipvs *ipvs; | ||
452 | __be16 _ports[2], *pptr; | 504 | __be16 _ports[2], *pptr; |
453 | struct ip_vs_iphdr iph; | 505 | struct ip_vs_iphdr iph; |
454 | int unicast; | 506 | int unicast; |
507 | |||
455 | ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); | 508 | ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); |
456 | 509 | ||
457 | pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); | 510 | pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); |
@@ -459,18 +512,20 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, | |||
459 | ip_vs_service_put(svc); | 512 | ip_vs_service_put(svc); |
460 | return NF_DROP; | 513 | return NF_DROP; |
461 | } | 514 | } |
515 | net = skb_net(skb); | ||
462 | 516 | ||
463 | #ifdef CONFIG_IP_VS_IPV6 | 517 | #ifdef CONFIG_IP_VS_IPV6 |
464 | if (svc->af == AF_INET6) | 518 | if (svc->af == AF_INET6) |
465 | unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST; | 519 | unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST; |
466 | else | 520 | else |
467 | #endif | 521 | #endif |
468 | unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST); | 522 | unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST); |
469 | 523 | ||
470 | /* if it is fwmark-based service, the cache_bypass sysctl is up | 524 | /* if it is fwmark-based service, the cache_bypass sysctl is up |
471 | and the destination is a non-local unicast, then create | 525 | and the destination is a non-local unicast, then create |
472 | a cache_bypass connection entry */ | 526 | a cache_bypass connection entry */ |
473 | if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { | 527 | ipvs = net_ipvs(net); |
528 | if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) { | ||
474 | int ret, cs; | 529 | int ret, cs; |
475 | struct ip_vs_conn *cp; | 530 | struct ip_vs_conn *cp; |
476 | unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && | 531 | unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && |
@@ -484,12 +539,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, | |||
484 | IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); | 539 | IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); |
485 | { | 540 | { |
486 | struct ip_vs_conn_param p; | 541 | struct ip_vs_conn_param p; |
487 | ip_vs_conn_fill_param(svc->af, iph.protocol, | 542 | ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, |
488 | &iph.saddr, pptr[0], | 543 | &iph.saddr, pptr[0], |
489 | &iph.daddr, pptr[1], &p); | 544 | &iph.daddr, pptr[1], &p); |
490 | cp = ip_vs_conn_new(&p, &daddr, 0, | 545 | cp = ip_vs_conn_new(&p, &daddr, 0, |
491 | IP_VS_CONN_F_BYPASS | flags, | 546 | IP_VS_CONN_F_BYPASS | flags, |
492 | NULL); | 547 | NULL, skb->mark); |
493 | if (!cp) | 548 | if (!cp) |
494 | return NF_DROP; | 549 | return NF_DROP; |
495 | } | 550 | } |
@@ -498,10 +553,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, | |||
498 | ip_vs_in_stats(cp, skb); | 553 | ip_vs_in_stats(cp, skb); |
499 | 554 | ||
500 | /* set state */ | 555 | /* set state */ |
501 | cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); | 556 | cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); |
502 | 557 | ||
503 | /* transmit the first SYN packet */ | 558 | /* transmit the first SYN packet */ |
504 | ret = cp->packet_xmit(skb, cp, pp); | 559 | ret = cp->packet_xmit(skb, cp, pd->pp); |
505 | /* do not touch skb anymore */ | 560 | /* do not touch skb anymore */ |
506 | 561 | ||
507 | atomic_inc(&cp->in_pkts); | 562 | atomic_inc(&cp->in_pkts); |
@@ -682,6 +737,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, | |||
682 | struct ip_vs_protocol *pp, | 737 | struct ip_vs_protocol *pp, |
683 | unsigned int offset, unsigned int ihl) | 738 | unsigned int offset, unsigned int ihl) |
684 | { | 739 | { |
740 | struct netns_ipvs *ipvs; | ||
685 | unsigned int verdict = NF_DROP; | 741 | unsigned int verdict = NF_DROP; |
686 | 742 | ||
687 | if (IP_VS_FWD_METHOD(cp) != 0) { | 743 | if (IP_VS_FWD_METHOD(cp) != 0) { |
@@ -703,6 +759,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, | |||
703 | if (!skb_make_writable(skb, offset)) | 759 | if (!skb_make_writable(skb, offset)) |
704 | goto out; | 760 | goto out; |
705 | 761 | ||
762 | ipvs = net_ipvs(skb_net(skb)); | ||
763 | |||
706 | #ifdef CONFIG_IP_VS_IPV6 | 764 | #ifdef CONFIG_IP_VS_IPV6 |
707 | if (af == AF_INET6) | 765 | if (af == AF_INET6) |
708 | ip_vs_nat_icmp_v6(skb, pp, cp, 1); | 766 | ip_vs_nat_icmp_v6(skb, pp, cp, 1); |
@@ -712,11 +770,11 @@ static int handle_response_icmp(int af, struct sk_buff *skb, | |||
712 | 770 | ||
713 | #ifdef CONFIG_IP_VS_IPV6 | 771 | #ifdef CONFIG_IP_VS_IPV6 |
714 | if (af == AF_INET6) { | 772 | if (af == AF_INET6) { |
715 | if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0) | 773 | if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) |
716 | goto out; | 774 | goto out; |
717 | } else | 775 | } else |
718 | #endif | 776 | #endif |
719 | if ((sysctl_ip_vs_snat_reroute || | 777 | if ((ipvs->sysctl_snat_reroute || |
720 | skb_rtable(skb)->rt_flags & RTCF_LOCAL) && | 778 | skb_rtable(skb)->rt_flags & RTCF_LOCAL) && |
721 | ip_route_me_harder(skb, RTN_LOCAL) != 0) | 779 | ip_route_me_harder(skb, RTN_LOCAL) != 0) |
722 | goto out; | 780 | goto out; |
@@ -808,7 +866,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, | |||
808 | 866 | ||
809 | ip_vs_fill_iphdr(AF_INET, cih, &ciph); | 867 | ip_vs_fill_iphdr(AF_INET, cih, &ciph); |
810 | /* The embedded headers contain source and dest in reverse order */ | 868 | /* The embedded headers contain source and dest in reverse order */ |
811 | cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); | 869 | cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1); |
812 | if (!cp) | 870 | if (!cp) |
813 | return NF_ACCEPT; | 871 | return NF_ACCEPT; |
814 | 872 | ||
@@ -885,7 +943,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, | |||
885 | 943 | ||
886 | ip_vs_fill_iphdr(AF_INET6, cih, &ciph); | 944 | ip_vs_fill_iphdr(AF_INET6, cih, &ciph); |
887 | /* The embedded headers contain source and dest in reverse order */ | 945 | /* The embedded headers contain source and dest in reverse order */ |
888 | cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); | 946 | cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1); |
889 | if (!cp) | 947 | if (!cp) |
890 | return NF_ACCEPT; | 948 | return NF_ACCEPT; |
891 | 949 | ||
@@ -924,9 +982,12 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len) | |||
924 | * Used for NAT and local client. | 982 | * Used for NAT and local client. |
925 | */ | 983 | */ |
926 | static unsigned int | 984 | static unsigned int |
927 | handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | 985 | handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, |
928 | struct ip_vs_conn *cp, int ihl) | 986 | struct ip_vs_conn *cp, int ihl) |
929 | { | 987 | { |
988 | struct ip_vs_protocol *pp = pd->pp; | ||
989 | struct netns_ipvs *ipvs; | ||
990 | |||
930 | IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); | 991 | IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); |
931 | 992 | ||
932 | if (!skb_make_writable(skb, ihl)) | 993 | if (!skb_make_writable(skb, ihl)) |
@@ -961,13 +1022,15 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
961 | * if it came from this machine itself. So re-compute | 1022 | * if it came from this machine itself. So re-compute |
962 | * the routing information. | 1023 | * the routing information. |
963 | */ | 1024 | */ |
1025 | ipvs = net_ipvs(skb_net(skb)); | ||
1026 | |||
964 | #ifdef CONFIG_IP_VS_IPV6 | 1027 | #ifdef CONFIG_IP_VS_IPV6 |
965 | if (af == AF_INET6) { | 1028 | if (af == AF_INET6) { |
966 | if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0) | 1029 | if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) |
967 | goto drop; | 1030 | goto drop; |
968 | } else | 1031 | } else |
969 | #endif | 1032 | #endif |
970 | if ((sysctl_ip_vs_snat_reroute || | 1033 | if ((ipvs->sysctl_snat_reroute || |
971 | skb_rtable(skb)->rt_flags & RTCF_LOCAL) && | 1034 | skb_rtable(skb)->rt_flags & RTCF_LOCAL) && |
972 | ip_route_me_harder(skb, RTN_LOCAL) != 0) | 1035 | ip_route_me_harder(skb, RTN_LOCAL) != 0) |
973 | goto drop; | 1036 | goto drop; |
@@ -975,7 +1038,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
975 | IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); | 1038 | IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); |
976 | 1039 | ||
977 | ip_vs_out_stats(cp, skb); | 1040 | ip_vs_out_stats(cp, skb); |
978 | ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); | 1041 | ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd); |
979 | skb->ipvs_property = 1; | 1042 | skb->ipvs_property = 1; |
980 | if (!(cp->flags & IP_VS_CONN_F_NFCT)) | 1043 | if (!(cp->flags & IP_VS_CONN_F_NFCT)) |
981 | ip_vs_notrack(skb); | 1044 | ip_vs_notrack(skb); |
@@ -999,9 +1062,12 @@ drop: | |||
999 | static unsigned int | 1062 | static unsigned int |
1000 | ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) | 1063 | ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) |
1001 | { | 1064 | { |
1065 | struct net *net = NULL; | ||
1002 | struct ip_vs_iphdr iph; | 1066 | struct ip_vs_iphdr iph; |
1003 | struct ip_vs_protocol *pp; | 1067 | struct ip_vs_protocol *pp; |
1068 | struct ip_vs_proto_data *pd; | ||
1004 | struct ip_vs_conn *cp; | 1069 | struct ip_vs_conn *cp; |
1070 | struct netns_ipvs *ipvs; | ||
1005 | 1071 | ||
1006 | EnterFunction(11); | 1072 | EnterFunction(11); |
1007 | 1073 | ||
@@ -1022,6 +1088,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) | |||
1022 | if (unlikely(!skb_dst(skb))) | 1088 | if (unlikely(!skb_dst(skb))) |
1023 | return NF_ACCEPT; | 1089 | return NF_ACCEPT; |
1024 | 1090 | ||
1091 | net = skb_net(skb); | ||
1025 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); | 1092 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); |
1026 | #ifdef CONFIG_IP_VS_IPV6 | 1093 | #ifdef CONFIG_IP_VS_IPV6 |
1027 | if (af == AF_INET6) { | 1094 | if (af == AF_INET6) { |
@@ -1045,9 +1112,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) | |||
1045 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); | 1112 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); |
1046 | } | 1113 | } |
1047 | 1114 | ||
1048 | pp = ip_vs_proto_get(iph.protocol); | 1115 | pd = ip_vs_proto_data_get(net, iph.protocol); |
1049 | if (unlikely(!pp)) | 1116 | if (unlikely(!pd)) |
1050 | return NF_ACCEPT; | 1117 | return NF_ACCEPT; |
1118 | pp = pd->pp; | ||
1051 | 1119 | ||
1052 | /* reassemble IP fragments */ | 1120 | /* reassemble IP fragments */ |
1053 | #ifdef CONFIG_IP_VS_IPV6 | 1121 | #ifdef CONFIG_IP_VS_IPV6 |
@@ -1073,11 +1141,12 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) | |||
1073 | /* | 1141 | /* |
1074 | * Check if the packet belongs to an existing entry | 1142 | * Check if the packet belongs to an existing entry |
1075 | */ | 1143 | */ |
1076 | cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); | 1144 | cp = pp->conn_out_get(af, skb, &iph, iph.len, 0); |
1145 | ipvs = net_ipvs(net); | ||
1077 | 1146 | ||
1078 | if (likely(cp)) | 1147 | if (likely(cp)) |
1079 | return handle_response(af, skb, pp, cp, iph.len); | 1148 | return handle_response(af, skb, pd, cp, iph.len); |
1080 | if (sysctl_ip_vs_nat_icmp_send && | 1149 | if (ipvs->sysctl_nat_icmp_send && |
1081 | (pp->protocol == IPPROTO_TCP || | 1150 | (pp->protocol == IPPROTO_TCP || |
1082 | pp->protocol == IPPROTO_UDP || | 1151 | pp->protocol == IPPROTO_UDP || |
1083 | pp->protocol == IPPROTO_SCTP)) { | 1152 | pp->protocol == IPPROTO_SCTP)) { |
@@ -1087,7 +1156,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) | |||
1087 | sizeof(_ports), _ports); | 1156 | sizeof(_ports), _ports); |
1088 | if (pptr == NULL) | 1157 | if (pptr == NULL) |
1089 | return NF_ACCEPT; /* Not for me */ | 1158 | return NF_ACCEPT; /* Not for me */ |
1090 | if (ip_vs_lookup_real_service(af, iph.protocol, | 1159 | if (ip_vs_lookup_real_service(net, af, iph.protocol, |
1091 | &iph.saddr, | 1160 | &iph.saddr, |
1092 | pptr[0])) { | 1161 | pptr[0])) { |
1093 | /* | 1162 | /* |
@@ -1202,12 +1271,14 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb, | |||
1202 | static int | 1271 | static int |
1203 | ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) | 1272 | ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) |
1204 | { | 1273 | { |
1274 | struct net *net = NULL; | ||
1205 | struct iphdr *iph; | 1275 | struct iphdr *iph; |
1206 | struct icmphdr _icmph, *ic; | 1276 | struct icmphdr _icmph, *ic; |
1207 | struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ | 1277 | struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ |
1208 | struct ip_vs_iphdr ciph; | 1278 | struct ip_vs_iphdr ciph; |
1209 | struct ip_vs_conn *cp; | 1279 | struct ip_vs_conn *cp; |
1210 | struct ip_vs_protocol *pp; | 1280 | struct ip_vs_protocol *pp; |
1281 | struct ip_vs_proto_data *pd; | ||
1211 | unsigned int offset, ihl, verdict; | 1282 | unsigned int offset, ihl, verdict; |
1212 | union nf_inet_addr snet; | 1283 | union nf_inet_addr snet; |
1213 | 1284 | ||
@@ -1249,9 +1320,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
1249 | if (cih == NULL) | 1320 | if (cih == NULL) |
1250 | return NF_ACCEPT; /* The packet looks wrong, ignore */ | 1321 | return NF_ACCEPT; /* The packet looks wrong, ignore */ |
1251 | 1322 | ||
1252 | pp = ip_vs_proto_get(cih->protocol); | 1323 | net = skb_net(skb); |
1253 | if (!pp) | 1324 | pd = ip_vs_proto_data_get(net, cih->protocol); |
1325 | if (!pd) | ||
1254 | return NF_ACCEPT; | 1326 | return NF_ACCEPT; |
1327 | pp = pd->pp; | ||
1255 | 1328 | ||
1256 | /* Is the embedded protocol header present? */ | 1329 | /* Is the embedded protocol header present? */ |
1257 | if (unlikely(cih->frag_off & htons(IP_OFFSET) && | 1330 | if (unlikely(cih->frag_off & htons(IP_OFFSET) && |
@@ -1265,10 +1338,10 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
1265 | 1338 | ||
1266 | ip_vs_fill_iphdr(AF_INET, cih, &ciph); | 1339 | ip_vs_fill_iphdr(AF_INET, cih, &ciph); |
1267 | /* The embedded headers contain source and dest in reverse order */ | 1340 | /* The embedded headers contain source and dest in reverse order */ |
1268 | cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1); | 1341 | cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1); |
1269 | if (!cp) { | 1342 | if (!cp) { |
1270 | /* The packet could also belong to a local client */ | 1343 | /* The packet could also belong to a local client */ |
1271 | cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); | 1344 | cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1); |
1272 | if (cp) { | 1345 | if (cp) { |
1273 | snet.ip = iph->saddr; | 1346 | snet.ip = iph->saddr; |
1274 | return handle_response_icmp(AF_INET, skb, &snet, | 1347 | return handle_response_icmp(AF_INET, skb, &snet, |
@@ -1312,6 +1385,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
1312 | static int | 1385 | static int |
1313 | ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) | 1386 | ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) |
1314 | { | 1387 | { |
1388 | struct net *net = NULL; | ||
1315 | struct ipv6hdr *iph; | 1389 | struct ipv6hdr *iph; |
1316 | struct icmp6hdr _icmph, *ic; | 1390 | struct icmp6hdr _icmph, *ic; |
1317 | struct ipv6hdr _ciph, *cih; /* The ip header contained | 1391 | struct ipv6hdr _ciph, *cih; /* The ip header contained |
@@ -1319,6 +1393,7 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
1319 | struct ip_vs_iphdr ciph; | 1393 | struct ip_vs_iphdr ciph; |
1320 | struct ip_vs_conn *cp; | 1394 | struct ip_vs_conn *cp; |
1321 | struct ip_vs_protocol *pp; | 1395 | struct ip_vs_protocol *pp; |
1396 | struct ip_vs_proto_data *pd; | ||
1322 | unsigned int offset, verdict; | 1397 | unsigned int offset, verdict; |
1323 | union nf_inet_addr snet; | 1398 | union nf_inet_addr snet; |
1324 | struct rt6_info *rt; | 1399 | struct rt6_info *rt; |
@@ -1361,9 +1436,11 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
1361 | if (cih == NULL) | 1436 | if (cih == NULL) |
1362 | return NF_ACCEPT; /* The packet looks wrong, ignore */ | 1437 | return NF_ACCEPT; /* The packet looks wrong, ignore */ |
1363 | 1438 | ||
1364 | pp = ip_vs_proto_get(cih->nexthdr); | 1439 | net = skb_net(skb); |
1365 | if (!pp) | 1440 | pd = ip_vs_proto_data_get(net, cih->nexthdr); |
1441 | if (!pd) | ||
1366 | return NF_ACCEPT; | 1442 | return NF_ACCEPT; |
1443 | pp = pd->pp; | ||
1367 | 1444 | ||
1368 | /* Is the embedded protocol header present? */ | 1445 | /* Is the embedded protocol header present? */ |
1369 | /* TODO: we don't support fragmentation at the moment anyways */ | 1446 | /* TODO: we don't support fragmentation at the moment anyways */ |
@@ -1377,10 +1454,10 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
1377 | 1454 | ||
1378 | ip_vs_fill_iphdr(AF_INET6, cih, &ciph); | 1455 | ip_vs_fill_iphdr(AF_INET6, cih, &ciph); |
1379 | /* The embedded headers contain source and dest in reverse order */ | 1456 | /* The embedded headers contain source and dest in reverse order */ |
1380 | cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1); | 1457 | cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1); |
1381 | if (!cp) { | 1458 | if (!cp) { |
1382 | /* The packet could also belong to a local client */ | 1459 | /* The packet could also belong to a local client */ |
1383 | cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); | 1460 | cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1); |
1384 | if (cp) { | 1461 | if (cp) { |
1385 | ipv6_addr_copy(&snet.in6, &iph->saddr); | 1462 | ipv6_addr_copy(&snet.in6, &iph->saddr); |
1386 | return handle_response_icmp(AF_INET6, skb, &snet, | 1463 | return handle_response_icmp(AF_INET6, skb, &snet, |
@@ -1423,10 +1500,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) | |||
1423 | static unsigned int | 1500 | static unsigned int |
1424 | ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) | 1501 | ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) |
1425 | { | 1502 | { |
1503 | struct net *net; | ||
1426 | struct ip_vs_iphdr iph; | 1504 | struct ip_vs_iphdr iph; |
1427 | struct ip_vs_protocol *pp; | 1505 | struct ip_vs_protocol *pp; |
1506 | struct ip_vs_proto_data *pd; | ||
1428 | struct ip_vs_conn *cp; | 1507 | struct ip_vs_conn *cp; |
1429 | int ret, restart, pkts; | 1508 | int ret, restart, pkts; |
1509 | struct netns_ipvs *ipvs; | ||
1430 | 1510 | ||
1431 | /* Already marked as IPVS request or reply? */ | 1511 | /* Already marked as IPVS request or reply? */ |
1432 | if (skb->ipvs_property) | 1512 | if (skb->ipvs_property) |
@@ -1480,20 +1560,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) | |||
1480 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); | 1560 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); |
1481 | } | 1561 | } |
1482 | 1562 | ||
1563 | net = skb_net(skb); | ||
1483 | /* Protocol supported? */ | 1564 | /* Protocol supported? */ |
1484 | pp = ip_vs_proto_get(iph.protocol); | 1565 | pd = ip_vs_proto_data_get(net, iph.protocol); |
1485 | if (unlikely(!pp)) | 1566 | if (unlikely(!pd)) |
1486 | return NF_ACCEPT; | 1567 | return NF_ACCEPT; |
1487 | 1568 | pp = pd->pp; | |
1488 | /* | 1569 | /* |
1489 | * Check if the packet belongs to an existing connection entry | 1570 | * Check if the packet belongs to an existing connection entry |
1490 | */ | 1571 | */ |
1491 | cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0); | 1572 | cp = pp->conn_in_get(af, skb, &iph, iph.len, 0); |
1492 | 1573 | ||
1493 | if (unlikely(!cp)) { | 1574 | if (unlikely(!cp)) { |
1494 | int v; | 1575 | int v; |
1495 | 1576 | ||
1496 | if (!pp->conn_schedule(af, skb, pp, &v, &cp)) | 1577 | if (!pp->conn_schedule(af, skb, pd, &v, &cp)) |
1497 | return v; | 1578 | return v; |
1498 | } | 1579 | } |
1499 | 1580 | ||
@@ -1505,12 +1586,13 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) | |||
1505 | } | 1586 | } |
1506 | 1587 | ||
1507 | IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); | 1588 | IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); |
1508 | 1589 | net = skb_net(skb); | |
1590 | ipvs = net_ipvs(net); | ||
1509 | /* Check the server status */ | 1591 | /* Check the server status */ |
1510 | if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { | 1592 | if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { |
1511 | /* the destination server is not available */ | 1593 | /* the destination server is not available */ |
1512 | 1594 | ||
1513 | if (sysctl_ip_vs_expire_nodest_conn) { | 1595 | if (ipvs->sysctl_expire_nodest_conn) { |
1514 | /* try to expire the connection immediately */ | 1596 | /* try to expire the connection immediately */ |
1515 | ip_vs_conn_expire_now(cp); | 1597 | ip_vs_conn_expire_now(cp); |
1516 | } | 1598 | } |
@@ -1521,7 +1603,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) | |||
1521 | } | 1603 | } |
1522 | 1604 | ||
1523 | ip_vs_in_stats(cp, skb); | 1605 | ip_vs_in_stats(cp, skb); |
1524 | restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); | 1606 | restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); |
1525 | if (cp->packet_xmit) | 1607 | if (cp->packet_xmit) |
1526 | ret = cp->packet_xmit(skb, cp, pp); | 1608 | ret = cp->packet_xmit(skb, cp, pp); |
1527 | /* do not touch skb anymore */ | 1609 | /* do not touch skb anymore */ |
@@ -1535,35 +1617,41 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) | |||
1535 | * | 1617 | * |
1536 | * Sync connection if it is about to close to | 1618 | * Sync connection if it is about to close to |
1537 | * encorage the standby servers to update the connections timeout | 1619 | * encorage the standby servers to update the connections timeout |
1620 | * | ||
1621 | * For ONE_PKT let ip_vs_sync_conn() do the filter work. | ||
1538 | */ | 1622 | */ |
1539 | pkts = atomic_add_return(1, &cp->in_pkts); | 1623 | |
1540 | if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) && | 1624 | if (cp->flags & IP_VS_CONN_F_ONE_PACKET) |
1625 | pkts = ipvs->sysctl_sync_threshold[0]; | ||
1626 | else | ||
1627 | pkts = atomic_add_return(1, &cp->in_pkts); | ||
1628 | |||
1629 | if ((ipvs->sync_state & IP_VS_STATE_MASTER) && | ||
1541 | cp->protocol == IPPROTO_SCTP) { | 1630 | cp->protocol == IPPROTO_SCTP) { |
1542 | if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && | 1631 | if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && |
1543 | (pkts % sysctl_ip_vs_sync_threshold[1] | 1632 | (pkts % ipvs->sysctl_sync_threshold[1] |
1544 | == sysctl_ip_vs_sync_threshold[0])) || | 1633 | == ipvs->sysctl_sync_threshold[0])) || |
1545 | (cp->old_state != cp->state && | 1634 | (cp->old_state != cp->state && |
1546 | ((cp->state == IP_VS_SCTP_S_CLOSED) || | 1635 | ((cp->state == IP_VS_SCTP_S_CLOSED) || |
1547 | (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) || | 1636 | (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) || |
1548 | (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) { | 1637 | (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) { |
1549 | ip_vs_sync_conn(cp); | 1638 | ip_vs_sync_conn(net, cp); |
1550 | goto out; | 1639 | goto out; |
1551 | } | 1640 | } |
1552 | } | 1641 | } |
1553 | 1642 | ||
1554 | /* Keep this block last: TCP and others with pp->num_states <= 1 */ | 1643 | /* Keep this block last: TCP and others with pp->num_states <= 1 */ |
1555 | else if (af == AF_INET && | 1644 | else if ((ipvs->sync_state & IP_VS_STATE_MASTER) && |
1556 | (ip_vs_sync_state & IP_VS_STATE_MASTER) && | ||
1557 | (((cp->protocol != IPPROTO_TCP || | 1645 | (((cp->protocol != IPPROTO_TCP || |
1558 | cp->state == IP_VS_TCP_S_ESTABLISHED) && | 1646 | cp->state == IP_VS_TCP_S_ESTABLISHED) && |
1559 | (pkts % sysctl_ip_vs_sync_threshold[1] | 1647 | (pkts % ipvs->sysctl_sync_threshold[1] |
1560 | == sysctl_ip_vs_sync_threshold[0])) || | 1648 | == ipvs->sysctl_sync_threshold[0])) || |
1561 | ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && | 1649 | ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && |
1562 | ((cp->state == IP_VS_TCP_S_FIN_WAIT) || | 1650 | ((cp->state == IP_VS_TCP_S_FIN_WAIT) || |
1563 | (cp->state == IP_VS_TCP_S_CLOSE) || | 1651 | (cp->state == IP_VS_TCP_S_CLOSE) || |
1564 | (cp->state == IP_VS_TCP_S_CLOSE_WAIT) || | 1652 | (cp->state == IP_VS_TCP_S_CLOSE_WAIT) || |
1565 | (cp->state == IP_VS_TCP_S_TIME_WAIT))))) | 1653 | (cp->state == IP_VS_TCP_S_TIME_WAIT))))) |
1566 | ip_vs_sync_conn(cp); | 1654 | ip_vs_sync_conn(net, cp); |
1567 | out: | 1655 | out: |
1568 | cp->old_state = cp->state; | 1656 | cp->old_state = cp->state; |
1569 | 1657 | ||
@@ -1782,7 +1870,41 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { | |||
1782 | }, | 1870 | }, |
1783 | #endif | 1871 | #endif |
1784 | }; | 1872 | }; |
1873 | /* | ||
1874 | * Initialize IP Virtual Server netns mem. | ||
1875 | */ | ||
1876 | static int __net_init __ip_vs_init(struct net *net) | ||
1877 | { | ||
1878 | struct netns_ipvs *ipvs; | ||
1879 | |||
1880 | ipvs = net_generic(net, ip_vs_net_id); | ||
1881 | if (ipvs == NULL) { | ||
1882 | pr_err("%s(): no memory.\n", __func__); | ||
1883 | return -ENOMEM; | ||
1884 | } | ||
1885 | ipvs->net = net; | ||
1886 | /* Counters used for creating unique names */ | ||
1887 | ipvs->gen = atomic_read(&ipvs_netns_cnt); | ||
1888 | atomic_inc(&ipvs_netns_cnt); | ||
1889 | net->ipvs = ipvs; | ||
1890 | printk(KERN_INFO "IPVS: Creating netns size=%lu id=%d\n", | ||
1891 | sizeof(struct netns_ipvs), ipvs->gen); | ||
1892 | return 0; | ||
1893 | } | ||
1894 | |||
1895 | static void __net_exit __ip_vs_cleanup(struct net *net) | ||
1896 | { | ||
1897 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
1785 | 1898 | ||
1899 | IP_VS_DBG(10, "ipvs netns %d released\n", ipvs->gen); | ||
1900 | } | ||
1901 | |||
1902 | static struct pernet_operations ipvs_core_ops = { | ||
1903 | .init = __ip_vs_init, | ||
1904 | .exit = __ip_vs_cleanup, | ||
1905 | .id = &ip_vs_net_id, | ||
1906 | .size = sizeof(struct netns_ipvs), | ||
1907 | }; | ||
1786 | 1908 | ||
1787 | /* | 1909 | /* |
1788 | * Initialize IP Virtual Server | 1910 | * Initialize IP Virtual Server |
@@ -1791,8 +1913,11 @@ static int __init ip_vs_init(void) | |||
1791 | { | 1913 | { |
1792 | int ret; | 1914 | int ret; |
1793 | 1915 | ||
1794 | ip_vs_estimator_init(); | 1916 | ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */ |
1917 | if (ret < 0) | ||
1918 | return ret; | ||
1795 | 1919 | ||
1920 | ip_vs_estimator_init(); | ||
1796 | ret = ip_vs_control_init(); | 1921 | ret = ip_vs_control_init(); |
1797 | if (ret < 0) { | 1922 | if (ret < 0) { |
1798 | pr_err("can't setup control.\n"); | 1923 | pr_err("can't setup control.\n"); |
@@ -1813,15 +1938,23 @@ static int __init ip_vs_init(void) | |||
1813 | goto cleanup_app; | 1938 | goto cleanup_app; |
1814 | } | 1939 | } |
1815 | 1940 | ||
1941 | ret = ip_vs_sync_init(); | ||
1942 | if (ret < 0) { | ||
1943 | pr_err("can't setup sync data.\n"); | ||
1944 | goto cleanup_conn; | ||
1945 | } | ||
1946 | |||
1816 | ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); | 1947 | ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); |
1817 | if (ret < 0) { | 1948 | if (ret < 0) { |
1818 | pr_err("can't register hooks.\n"); | 1949 | pr_err("can't register hooks.\n"); |
1819 | goto cleanup_conn; | 1950 | goto cleanup_sync; |
1820 | } | 1951 | } |
1821 | 1952 | ||
1822 | pr_info("ipvs loaded.\n"); | 1953 | pr_info("ipvs loaded.\n"); |
1823 | return ret; | 1954 | return ret; |
1824 | 1955 | ||
1956 | cleanup_sync: | ||
1957 | ip_vs_sync_cleanup(); | ||
1825 | cleanup_conn: | 1958 | cleanup_conn: |
1826 | ip_vs_conn_cleanup(); | 1959 | ip_vs_conn_cleanup(); |
1827 | cleanup_app: | 1960 | cleanup_app: |
@@ -1831,17 +1964,20 @@ static int __init ip_vs_init(void) | |||
1831 | ip_vs_control_cleanup(); | 1964 | ip_vs_control_cleanup(); |
1832 | cleanup_estimator: | 1965 | cleanup_estimator: |
1833 | ip_vs_estimator_cleanup(); | 1966 | ip_vs_estimator_cleanup(); |
1967 | unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ | ||
1834 | return ret; | 1968 | return ret; |
1835 | } | 1969 | } |
1836 | 1970 | ||
1837 | static void __exit ip_vs_cleanup(void) | 1971 | static void __exit ip_vs_cleanup(void) |
1838 | { | 1972 | { |
1839 | nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); | 1973 | nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); |
1974 | ip_vs_sync_cleanup(); | ||
1840 | ip_vs_conn_cleanup(); | 1975 | ip_vs_conn_cleanup(); |
1841 | ip_vs_app_cleanup(); | 1976 | ip_vs_app_cleanup(); |
1842 | ip_vs_protocol_cleanup(); | 1977 | ip_vs_protocol_cleanup(); |
1843 | ip_vs_control_cleanup(); | 1978 | ip_vs_control_cleanup(); |
1844 | ip_vs_estimator_cleanup(); | 1979 | ip_vs_estimator_cleanup(); |
1980 | unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ | ||
1845 | pr_info("ipvs unloaded.\n"); | 1981 | pr_info("ipvs unloaded.\n"); |
1846 | } | 1982 | } |
1847 | 1983 | ||
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 22f7ad5101ab..09ca2ce2f2b7 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/mutex.h> | 38 | #include <linux/mutex.h> |
39 | 39 | ||
40 | #include <net/net_namespace.h> | 40 | #include <net/net_namespace.h> |
41 | #include <linux/nsproxy.h> | ||
41 | #include <net/ip.h> | 42 | #include <net/ip.h> |
42 | #ifdef CONFIG_IP_VS_IPV6 | 43 | #ifdef CONFIG_IP_VS_IPV6 |
43 | #include <net/ipv6.h> | 44 | #include <net/ipv6.h> |
@@ -57,42 +58,7 @@ static DEFINE_MUTEX(__ip_vs_mutex); | |||
57 | /* lock for service table */ | 58 | /* lock for service table */ |
58 | static DEFINE_RWLOCK(__ip_vs_svc_lock); | 59 | static DEFINE_RWLOCK(__ip_vs_svc_lock); |
59 | 60 | ||
60 | /* lock for table with the real services */ | ||
61 | static DEFINE_RWLOCK(__ip_vs_rs_lock); | ||
62 | |||
63 | /* lock for state and timeout tables */ | ||
64 | static DEFINE_SPINLOCK(ip_vs_securetcp_lock); | ||
65 | |||
66 | /* lock for drop entry handling */ | ||
67 | static DEFINE_SPINLOCK(__ip_vs_dropentry_lock); | ||
68 | |||
69 | /* lock for drop packet handling */ | ||
70 | static DEFINE_SPINLOCK(__ip_vs_droppacket_lock); | ||
71 | |||
72 | /* 1/rate drop and drop-entry variables */ | ||
73 | int ip_vs_drop_rate = 0; | ||
74 | int ip_vs_drop_counter = 0; | ||
75 | static atomic_t ip_vs_dropentry = ATOMIC_INIT(0); | ||
76 | |||
77 | /* number of virtual services */ | ||
78 | static int ip_vs_num_services = 0; | ||
79 | |||
80 | /* sysctl variables */ | 61 | /* sysctl variables */ |
81 | static int sysctl_ip_vs_drop_entry = 0; | ||
82 | static int sysctl_ip_vs_drop_packet = 0; | ||
83 | static int sysctl_ip_vs_secure_tcp = 0; | ||
84 | static int sysctl_ip_vs_amemthresh = 1024; | ||
85 | static int sysctl_ip_vs_am_droprate = 10; | ||
86 | int sysctl_ip_vs_cache_bypass = 0; | ||
87 | int sysctl_ip_vs_expire_nodest_conn = 0; | ||
88 | int sysctl_ip_vs_expire_quiescent_template = 0; | ||
89 | int sysctl_ip_vs_sync_threshold[2] = { 3, 50 }; | ||
90 | int sysctl_ip_vs_nat_icmp_send = 0; | ||
91 | #ifdef CONFIG_IP_VS_NFCT | ||
92 | int sysctl_ip_vs_conntrack; | ||
93 | #endif | ||
94 | int sysctl_ip_vs_snat_reroute = 1; | ||
95 | |||
96 | 62 | ||
97 | #ifdef CONFIG_IP_VS_DEBUG | 63 | #ifdef CONFIG_IP_VS_DEBUG |
98 | static int sysctl_ip_vs_debug_level = 0; | 64 | static int sysctl_ip_vs_debug_level = 0; |
@@ -105,7 +71,8 @@ int ip_vs_get_debug_level(void) | |||
105 | 71 | ||
106 | #ifdef CONFIG_IP_VS_IPV6 | 72 | #ifdef CONFIG_IP_VS_IPV6 |
107 | /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ | 73 | /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ |
108 | static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) | 74 | static int __ip_vs_addr_is_local_v6(struct net *net, |
75 | const struct in6_addr *addr) | ||
109 | { | 76 | { |
110 | struct rt6_info *rt; | 77 | struct rt6_info *rt; |
111 | struct flowi fl = { | 78 | struct flowi fl = { |
@@ -114,7 +81,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) | |||
114 | .fl6_src = { .s6_addr32 = {0, 0, 0, 0} }, | 81 | .fl6_src = { .s6_addr32 = {0, 0, 0, 0} }, |
115 | }; | 82 | }; |
116 | 83 | ||
117 | rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); | 84 | rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl); |
118 | if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK)) | 85 | if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK)) |
119 | return 1; | 86 | return 1; |
120 | 87 | ||
@@ -125,7 +92,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) | |||
125 | * update_defense_level is called from keventd and from sysctl, | 92 | * update_defense_level is called from keventd and from sysctl, |
126 | * so it needs to protect itself from softirqs | 93 | * so it needs to protect itself from softirqs |
127 | */ | 94 | */ |
128 | static void update_defense_level(void) | 95 | static void update_defense_level(struct netns_ipvs *ipvs) |
129 | { | 96 | { |
130 | struct sysinfo i; | 97 | struct sysinfo i; |
131 | static int old_secure_tcp = 0; | 98 | static int old_secure_tcp = 0; |
@@ -141,73 +108,73 @@ static void update_defense_level(void) | |||
141 | /* si_swapinfo(&i); */ | 108 | /* si_swapinfo(&i); */ |
142 | /* availmem = availmem - (i.totalswap - i.freeswap); */ | 109 | /* availmem = availmem - (i.totalswap - i.freeswap); */ |
143 | 110 | ||
144 | nomem = (availmem < sysctl_ip_vs_amemthresh); | 111 | nomem = (availmem < ipvs->sysctl_amemthresh); |
145 | 112 | ||
146 | local_bh_disable(); | 113 | local_bh_disable(); |
147 | 114 | ||
148 | /* drop_entry */ | 115 | /* drop_entry */ |
149 | spin_lock(&__ip_vs_dropentry_lock); | 116 | spin_lock(&ipvs->dropentry_lock); |
150 | switch (sysctl_ip_vs_drop_entry) { | 117 | switch (ipvs->sysctl_drop_entry) { |
151 | case 0: | 118 | case 0: |
152 | atomic_set(&ip_vs_dropentry, 0); | 119 | atomic_set(&ipvs->dropentry, 0); |
153 | break; | 120 | break; |
154 | case 1: | 121 | case 1: |
155 | if (nomem) { | 122 | if (nomem) { |
156 | atomic_set(&ip_vs_dropentry, 1); | 123 | atomic_set(&ipvs->dropentry, 1); |
157 | sysctl_ip_vs_drop_entry = 2; | 124 | ipvs->sysctl_drop_entry = 2; |
158 | } else { | 125 | } else { |
159 | atomic_set(&ip_vs_dropentry, 0); | 126 | atomic_set(&ipvs->dropentry, 0); |
160 | } | 127 | } |
161 | break; | 128 | break; |
162 | case 2: | 129 | case 2: |
163 | if (nomem) { | 130 | if (nomem) { |
164 | atomic_set(&ip_vs_dropentry, 1); | 131 | atomic_set(&ipvs->dropentry, 1); |
165 | } else { | 132 | } else { |
166 | atomic_set(&ip_vs_dropentry, 0); | 133 | atomic_set(&ipvs->dropentry, 0); |
167 | sysctl_ip_vs_drop_entry = 1; | 134 | ipvs->sysctl_drop_entry = 1; |
168 | }; | 135 | }; |
169 | break; | 136 | break; |
170 | case 3: | 137 | case 3: |
171 | atomic_set(&ip_vs_dropentry, 1); | 138 | atomic_set(&ipvs->dropentry, 1); |
172 | break; | 139 | break; |
173 | } | 140 | } |
174 | spin_unlock(&__ip_vs_dropentry_lock); | 141 | spin_unlock(&ipvs->dropentry_lock); |
175 | 142 | ||
176 | /* drop_packet */ | 143 | /* drop_packet */ |
177 | spin_lock(&__ip_vs_droppacket_lock); | 144 | spin_lock(&ipvs->droppacket_lock); |
178 | switch (sysctl_ip_vs_drop_packet) { | 145 | switch (ipvs->sysctl_drop_packet) { |
179 | case 0: | 146 | case 0: |
180 | ip_vs_drop_rate = 0; | 147 | ipvs->drop_rate = 0; |
181 | break; | 148 | break; |
182 | case 1: | 149 | case 1: |
183 | if (nomem) { | 150 | if (nomem) { |
184 | ip_vs_drop_rate = ip_vs_drop_counter | 151 | ipvs->drop_rate = ipvs->drop_counter |
185 | = sysctl_ip_vs_amemthresh / | 152 | = ipvs->sysctl_amemthresh / |
186 | (sysctl_ip_vs_amemthresh-availmem); | 153 | (ipvs->sysctl_amemthresh-availmem); |
187 | sysctl_ip_vs_drop_packet = 2; | 154 | ipvs->sysctl_drop_packet = 2; |
188 | } else { | 155 | } else { |
189 | ip_vs_drop_rate = 0; | 156 | ipvs->drop_rate = 0; |
190 | } | 157 | } |
191 | break; | 158 | break; |
192 | case 2: | 159 | case 2: |
193 | if (nomem) { | 160 | if (nomem) { |
194 | ip_vs_drop_rate = ip_vs_drop_counter | 161 | ipvs->drop_rate = ipvs->drop_counter |
195 | = sysctl_ip_vs_amemthresh / | 162 | = ipvs->sysctl_amemthresh / |
196 | (sysctl_ip_vs_amemthresh-availmem); | 163 | (ipvs->sysctl_amemthresh-availmem); |
197 | } else { | 164 | } else { |
198 | ip_vs_drop_rate = 0; | 165 | ipvs->drop_rate = 0; |
199 | sysctl_ip_vs_drop_packet = 1; | 166 | ipvs->sysctl_drop_packet = 1; |
200 | } | 167 | } |
201 | break; | 168 | break; |
202 | case 3: | 169 | case 3: |
203 | ip_vs_drop_rate = sysctl_ip_vs_am_droprate; | 170 | ipvs->drop_rate = ipvs->sysctl_am_droprate; |
204 | break; | 171 | break; |
205 | } | 172 | } |
206 | spin_unlock(&__ip_vs_droppacket_lock); | 173 | spin_unlock(&ipvs->droppacket_lock); |
207 | 174 | ||
208 | /* secure_tcp */ | 175 | /* secure_tcp */ |
209 | spin_lock(&ip_vs_securetcp_lock); | 176 | spin_lock(&ipvs->securetcp_lock); |
210 | switch (sysctl_ip_vs_secure_tcp) { | 177 | switch (ipvs->sysctl_secure_tcp) { |
211 | case 0: | 178 | case 0: |
212 | if (old_secure_tcp >= 2) | 179 | if (old_secure_tcp >= 2) |
213 | to_change = 0; | 180 | to_change = 0; |
@@ -216,7 +183,7 @@ static void update_defense_level(void) | |||
216 | if (nomem) { | 183 | if (nomem) { |
217 | if (old_secure_tcp < 2) | 184 | if (old_secure_tcp < 2) |
218 | to_change = 1; | 185 | to_change = 1; |
219 | sysctl_ip_vs_secure_tcp = 2; | 186 | ipvs->sysctl_secure_tcp = 2; |
220 | } else { | 187 | } else { |
221 | if (old_secure_tcp >= 2) | 188 | if (old_secure_tcp >= 2) |
222 | to_change = 0; | 189 | to_change = 0; |
@@ -229,7 +196,7 @@ static void update_defense_level(void) | |||
229 | } else { | 196 | } else { |
230 | if (old_secure_tcp >= 2) | 197 | if (old_secure_tcp >= 2) |
231 | to_change = 0; | 198 | to_change = 0; |
232 | sysctl_ip_vs_secure_tcp = 1; | 199 | ipvs->sysctl_secure_tcp = 1; |
233 | } | 200 | } |
234 | break; | 201 | break; |
235 | case 3: | 202 | case 3: |
@@ -237,10 +204,11 @@ static void update_defense_level(void) | |||
237 | to_change = 1; | 204 | to_change = 1; |
238 | break; | 205 | break; |
239 | } | 206 | } |
240 | old_secure_tcp = sysctl_ip_vs_secure_tcp; | 207 | old_secure_tcp = ipvs->sysctl_secure_tcp; |
241 | if (to_change >= 0) | 208 | if (to_change >= 0) |
242 | ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1); | 209 | ip_vs_protocol_timeout_change(ipvs, |
243 | spin_unlock(&ip_vs_securetcp_lock); | 210 | ipvs->sysctl_secure_tcp > 1); |
211 | spin_unlock(&ipvs->securetcp_lock); | ||
244 | 212 | ||
245 | local_bh_enable(); | 213 | local_bh_enable(); |
246 | } | 214 | } |
@@ -250,16 +218,16 @@ static void update_defense_level(void) | |||
250 | * Timer for checking the defense | 218 | * Timer for checking the defense |
251 | */ | 219 | */ |
252 | #define DEFENSE_TIMER_PERIOD 1*HZ | 220 | #define DEFENSE_TIMER_PERIOD 1*HZ |
253 | static void defense_work_handler(struct work_struct *work); | ||
254 | static DECLARE_DELAYED_WORK(defense_work, defense_work_handler); | ||
255 | 221 | ||
256 | static void defense_work_handler(struct work_struct *work) | 222 | static void defense_work_handler(struct work_struct *work) |
257 | { | 223 | { |
258 | update_defense_level(); | 224 | struct netns_ipvs *ipvs = |
259 | if (atomic_read(&ip_vs_dropentry)) | 225 | container_of(work, struct netns_ipvs, defense_work.work); |
260 | ip_vs_random_dropentry(); | ||
261 | 226 | ||
262 | schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); | 227 | update_defense_level(ipvs); |
228 | if (atomic_read(&ipvs->dropentry)) | ||
229 | ip_vs_random_dropentry(ipvs->net); | ||
230 | schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); | ||
263 | } | 231 | } |
264 | 232 | ||
265 | int | 233 | int |
@@ -287,33 +255,13 @@ static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; | |||
287 | /* the service table hashed by fwmark */ | 255 | /* the service table hashed by fwmark */ |
288 | static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; | 256 | static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; |
289 | 257 | ||
290 | /* | ||
291 | * Hash table: for real service lookups | ||
292 | */ | ||
293 | #define IP_VS_RTAB_BITS 4 | ||
294 | #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS) | ||
295 | #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) | ||
296 | |||
297 | static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE]; | ||
298 | |||
299 | /* | ||
300 | * Trash for destinations | ||
301 | */ | ||
302 | static LIST_HEAD(ip_vs_dest_trash); | ||
303 | |||
304 | /* | ||
305 | * FTP & NULL virtual service counters | ||
306 | */ | ||
307 | static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0); | ||
308 | static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0); | ||
309 | |||
310 | 258 | ||
311 | /* | 259 | /* |
312 | * Returns hash value for virtual service | 260 | * Returns hash value for virtual service |
313 | */ | 261 | */ |
314 | static __inline__ unsigned | 262 | static inline unsigned |
315 | ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, | 263 | ip_vs_svc_hashkey(struct net *net, int af, unsigned proto, |
316 | __be16 port) | 264 | const union nf_inet_addr *addr, __be16 port) |
317 | { | 265 | { |
318 | register unsigned porth = ntohs(port); | 266 | register unsigned porth = ntohs(port); |
319 | __be32 addr_fold = addr->ip; | 267 | __be32 addr_fold = addr->ip; |
@@ -323,6 +271,7 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, | |||
323 | addr_fold = addr->ip6[0]^addr->ip6[1]^ | 271 | addr_fold = addr->ip6[0]^addr->ip6[1]^ |
324 | addr->ip6[2]^addr->ip6[3]; | 272 | addr->ip6[2]^addr->ip6[3]; |
325 | #endif | 273 | #endif |
274 | addr_fold ^= ((size_t)net>>8); | ||
326 | 275 | ||
327 | return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth) | 276 | return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth) |
328 | & IP_VS_SVC_TAB_MASK; | 277 | & IP_VS_SVC_TAB_MASK; |
@@ -331,13 +280,13 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, | |||
331 | /* | 280 | /* |
332 | * Returns hash value of fwmark for virtual service lookup | 281 | * Returns hash value of fwmark for virtual service lookup |
333 | */ | 282 | */ |
334 | static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark) | 283 | static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark) |
335 | { | 284 | { |
336 | return fwmark & IP_VS_SVC_TAB_MASK; | 285 | return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK; |
337 | } | 286 | } |
338 | 287 | ||
339 | /* | 288 | /* |
340 | * Hashes a service in the ip_vs_svc_table by <proto,addr,port> | 289 | * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port> |
341 | * or in the ip_vs_svc_fwm_table by fwmark. | 290 | * or in the ip_vs_svc_fwm_table by fwmark. |
342 | * Should be called with locked tables. | 291 | * Should be called with locked tables. |
343 | */ | 292 | */ |
@@ -353,16 +302,16 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) | |||
353 | 302 | ||
354 | if (svc->fwmark == 0) { | 303 | if (svc->fwmark == 0) { |
355 | /* | 304 | /* |
356 | * Hash it by <protocol,addr,port> in ip_vs_svc_table | 305 | * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table |
357 | */ | 306 | */ |
358 | hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr, | 307 | hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol, |
359 | svc->port); | 308 | &svc->addr, svc->port); |
360 | list_add(&svc->s_list, &ip_vs_svc_table[hash]); | 309 | list_add(&svc->s_list, &ip_vs_svc_table[hash]); |
361 | } else { | 310 | } else { |
362 | /* | 311 | /* |
363 | * Hash it by fwmark in ip_vs_svc_fwm_table | 312 | * Hash it by fwmark in svc_fwm_table |
364 | */ | 313 | */ |
365 | hash = ip_vs_svc_fwm_hashkey(svc->fwmark); | 314 | hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark); |
366 | list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]); | 315 | list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]); |
367 | } | 316 | } |
368 | 317 | ||
@@ -374,7 +323,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) | |||
374 | 323 | ||
375 | 324 | ||
376 | /* | 325 | /* |
377 | * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table. | 326 | * Unhashes a service from svc_table / svc_fwm_table. |
378 | * Should be called with locked tables. | 327 | * Should be called with locked tables. |
379 | */ | 328 | */ |
380 | static int ip_vs_svc_unhash(struct ip_vs_service *svc) | 329 | static int ip_vs_svc_unhash(struct ip_vs_service *svc) |
@@ -386,10 +335,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) | |||
386 | } | 335 | } |
387 | 336 | ||
388 | if (svc->fwmark == 0) { | 337 | if (svc->fwmark == 0) { |
389 | /* Remove it from the ip_vs_svc_table table */ | 338 | /* Remove it from the svc_table table */ |
390 | list_del(&svc->s_list); | 339 | list_del(&svc->s_list); |
391 | } else { | 340 | } else { |
392 | /* Remove it from the ip_vs_svc_fwm_table table */ | 341 | /* Remove it from the svc_fwm_table table */ |
393 | list_del(&svc->f_list); | 342 | list_del(&svc->f_list); |
394 | } | 343 | } |
395 | 344 | ||
@@ -400,23 +349,24 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) | |||
400 | 349 | ||
401 | 350 | ||
402 | /* | 351 | /* |
403 | * Get service by {proto,addr,port} in the service table. | 352 | * Get service by {netns, proto,addr,port} in the service table. |
404 | */ | 353 | */ |
405 | static inline struct ip_vs_service * | 354 | static inline struct ip_vs_service * |
406 | __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr, | 355 | __ip_vs_service_find(struct net *net, int af, __u16 protocol, |
407 | __be16 vport) | 356 | const union nf_inet_addr *vaddr, __be16 vport) |
408 | { | 357 | { |
409 | unsigned hash; | 358 | unsigned hash; |
410 | struct ip_vs_service *svc; | 359 | struct ip_vs_service *svc; |
411 | 360 | ||
412 | /* Check for "full" addressed entries */ | 361 | /* Check for "full" addressed entries */ |
413 | hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport); | 362 | hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport); |
414 | 363 | ||
415 | list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ | 364 | list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ |
416 | if ((svc->af == af) | 365 | if ((svc->af == af) |
417 | && ip_vs_addr_equal(af, &svc->addr, vaddr) | 366 | && ip_vs_addr_equal(af, &svc->addr, vaddr) |
418 | && (svc->port == vport) | 367 | && (svc->port == vport) |
419 | && (svc->protocol == protocol)) { | 368 | && (svc->protocol == protocol) |
369 | && net_eq(svc->net, net)) { | ||
420 | /* HIT */ | 370 | /* HIT */ |
421 | return svc; | 371 | return svc; |
422 | } | 372 | } |
@@ -430,16 +380,17 @@ __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr, | |||
430 | * Get service by {fwmark} in the service table. | 380 | * Get service by {fwmark} in the service table. |
431 | */ | 381 | */ |
432 | static inline struct ip_vs_service * | 382 | static inline struct ip_vs_service * |
433 | __ip_vs_svc_fwm_find(int af, __u32 fwmark) | 383 | __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark) |
434 | { | 384 | { |
435 | unsigned hash; | 385 | unsigned hash; |
436 | struct ip_vs_service *svc; | 386 | struct ip_vs_service *svc; |
437 | 387 | ||
438 | /* Check for fwmark addressed entries */ | 388 | /* Check for fwmark addressed entries */ |
439 | hash = ip_vs_svc_fwm_hashkey(fwmark); | 389 | hash = ip_vs_svc_fwm_hashkey(net, fwmark); |
440 | 390 | ||
441 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { | 391 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { |
442 | if (svc->fwmark == fwmark && svc->af == af) { | 392 | if (svc->fwmark == fwmark && svc->af == af |
393 | && net_eq(svc->net, net)) { | ||
443 | /* HIT */ | 394 | /* HIT */ |
444 | return svc; | 395 | return svc; |
445 | } | 396 | } |
@@ -449,42 +400,44 @@ __ip_vs_svc_fwm_find(int af, __u32 fwmark) | |||
449 | } | 400 | } |
450 | 401 | ||
451 | struct ip_vs_service * | 402 | struct ip_vs_service * |
452 | ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, | 403 | ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, |
453 | const union nf_inet_addr *vaddr, __be16 vport) | 404 | const union nf_inet_addr *vaddr, __be16 vport) |
454 | { | 405 | { |
455 | struct ip_vs_service *svc; | 406 | struct ip_vs_service *svc; |
407 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
456 | 408 | ||
457 | read_lock(&__ip_vs_svc_lock); | 409 | read_lock(&__ip_vs_svc_lock); |
458 | 410 | ||
459 | /* | 411 | /* |
460 | * Check the table hashed by fwmark first | 412 | * Check the table hashed by fwmark first |
461 | */ | 413 | */ |
462 | if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark))) | 414 | svc = __ip_vs_svc_fwm_find(net, af, fwmark); |
415 | if (fwmark && svc) | ||
463 | goto out; | 416 | goto out; |
464 | 417 | ||
465 | /* | 418 | /* |
466 | * Check the table hashed by <protocol,addr,port> | 419 | * Check the table hashed by <protocol,addr,port> |
467 | * for "full" addressed entries | 420 | * for "full" addressed entries |
468 | */ | 421 | */ |
469 | svc = __ip_vs_service_find(af, protocol, vaddr, vport); | 422 | svc = __ip_vs_service_find(net, af, protocol, vaddr, vport); |
470 | 423 | ||
471 | if (svc == NULL | 424 | if (svc == NULL |
472 | && protocol == IPPROTO_TCP | 425 | && protocol == IPPROTO_TCP |
473 | && atomic_read(&ip_vs_ftpsvc_counter) | 426 | && atomic_read(&ipvs->ftpsvc_counter) |
474 | && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) { | 427 | && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) { |
475 | /* | 428 | /* |
476 | * Check if ftp service entry exists, the packet | 429 | * Check if ftp service entry exists, the packet |
477 | * might belong to FTP data connections. | 430 | * might belong to FTP data connections. |
478 | */ | 431 | */ |
479 | svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT); | 432 | svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT); |
480 | } | 433 | } |
481 | 434 | ||
482 | if (svc == NULL | 435 | if (svc == NULL |
483 | && atomic_read(&ip_vs_nullsvc_counter)) { | 436 | && atomic_read(&ipvs->nullsvc_counter)) { |
484 | /* | 437 | /* |
485 | * Check if the catch-all port (port zero) exists | 438 | * Check if the catch-all port (port zero) exists |
486 | */ | 439 | */ |
487 | svc = __ip_vs_service_find(af, protocol, vaddr, 0); | 440 | svc = __ip_vs_service_find(net, af, protocol, vaddr, 0); |
488 | } | 441 | } |
489 | 442 | ||
490 | out: | 443 | out: |
@@ -519,6 +472,7 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest) | |||
519 | svc->fwmark, | 472 | svc->fwmark, |
520 | IP_VS_DBG_ADDR(svc->af, &svc->addr), | 473 | IP_VS_DBG_ADDR(svc->af, &svc->addr), |
521 | ntohs(svc->port), atomic_read(&svc->usecnt)); | 474 | ntohs(svc->port), atomic_read(&svc->usecnt)); |
475 | free_percpu(svc->stats.cpustats); | ||
522 | kfree(svc); | 476 | kfree(svc); |
523 | } | 477 | } |
524 | } | 478 | } |
@@ -545,10 +499,10 @@ static inline unsigned ip_vs_rs_hashkey(int af, | |||
545 | } | 499 | } |
546 | 500 | ||
547 | /* | 501 | /* |
548 | * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>. | 502 | * Hashes ip_vs_dest in rs_table by <proto,addr,port>. |
549 | * should be called with locked tables. | 503 | * should be called with locked tables. |
550 | */ | 504 | */ |
551 | static int ip_vs_rs_hash(struct ip_vs_dest *dest) | 505 | static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) |
552 | { | 506 | { |
553 | unsigned hash; | 507 | unsigned hash; |
554 | 508 | ||
@@ -562,19 +516,19 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest) | |||
562 | */ | 516 | */ |
563 | hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); | 517 | hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); |
564 | 518 | ||
565 | list_add(&dest->d_list, &ip_vs_rtable[hash]); | 519 | list_add(&dest->d_list, &ipvs->rs_table[hash]); |
566 | 520 | ||
567 | return 1; | 521 | return 1; |
568 | } | 522 | } |
569 | 523 | ||
570 | /* | 524 | /* |
571 | * UNhashes ip_vs_dest from ip_vs_rtable. | 525 | * UNhashes ip_vs_dest from rs_table. |
572 | * should be called with locked tables. | 526 | * should be called with locked tables. |
573 | */ | 527 | */ |
574 | static int ip_vs_rs_unhash(struct ip_vs_dest *dest) | 528 | static int ip_vs_rs_unhash(struct ip_vs_dest *dest) |
575 | { | 529 | { |
576 | /* | 530 | /* |
577 | * Remove it from the ip_vs_rtable table. | 531 | * Remove it from the rs_table table. |
578 | */ | 532 | */ |
579 | if (!list_empty(&dest->d_list)) { | 533 | if (!list_empty(&dest->d_list)) { |
580 | list_del(&dest->d_list); | 534 | list_del(&dest->d_list); |
@@ -588,10 +542,11 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest) | |||
588 | * Lookup real service by <proto,addr,port> in the real service table. | 542 | * Lookup real service by <proto,addr,port> in the real service table. |
589 | */ | 543 | */ |
590 | struct ip_vs_dest * | 544 | struct ip_vs_dest * |
591 | ip_vs_lookup_real_service(int af, __u16 protocol, | 545 | ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, |
592 | const union nf_inet_addr *daddr, | 546 | const union nf_inet_addr *daddr, |
593 | __be16 dport) | 547 | __be16 dport) |
594 | { | 548 | { |
549 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
595 | unsigned hash; | 550 | unsigned hash; |
596 | struct ip_vs_dest *dest; | 551 | struct ip_vs_dest *dest; |
597 | 552 | ||
@@ -601,19 +556,19 @@ ip_vs_lookup_real_service(int af, __u16 protocol, | |||
601 | */ | 556 | */ |
602 | hash = ip_vs_rs_hashkey(af, daddr, dport); | 557 | hash = ip_vs_rs_hashkey(af, daddr, dport); |
603 | 558 | ||
604 | read_lock(&__ip_vs_rs_lock); | 559 | read_lock(&ipvs->rs_lock); |
605 | list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) { | 560 | list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) { |
606 | if ((dest->af == af) | 561 | if ((dest->af == af) |
607 | && ip_vs_addr_equal(af, &dest->addr, daddr) | 562 | && ip_vs_addr_equal(af, &dest->addr, daddr) |
608 | && (dest->port == dport) | 563 | && (dest->port == dport) |
609 | && ((dest->protocol == protocol) || | 564 | && ((dest->protocol == protocol) || |
610 | dest->vfwmark)) { | 565 | dest->vfwmark)) { |
611 | /* HIT */ | 566 | /* HIT */ |
612 | read_unlock(&__ip_vs_rs_lock); | 567 | read_unlock(&ipvs->rs_lock); |
613 | return dest; | 568 | return dest; |
614 | } | 569 | } |
615 | } | 570 | } |
616 | read_unlock(&__ip_vs_rs_lock); | 571 | read_unlock(&ipvs->rs_lock); |
617 | 572 | ||
618 | return NULL; | 573 | return NULL; |
619 | } | 574 | } |
@@ -652,15 +607,16 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, | |||
652 | * ip_vs_lookup_real_service() looked promissing, but | 607 | * ip_vs_lookup_real_service() looked promissing, but |
653 | * seems not working as expected. | 608 | * seems not working as expected. |
654 | */ | 609 | */ |
655 | struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr, | 610 | struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, |
611 | const union nf_inet_addr *daddr, | ||
656 | __be16 dport, | 612 | __be16 dport, |
657 | const union nf_inet_addr *vaddr, | 613 | const union nf_inet_addr *vaddr, |
658 | __be16 vport, __u16 protocol) | 614 | __be16 vport, __u16 protocol, __u32 fwmark) |
659 | { | 615 | { |
660 | struct ip_vs_dest *dest; | 616 | struct ip_vs_dest *dest; |
661 | struct ip_vs_service *svc; | 617 | struct ip_vs_service *svc; |
662 | 618 | ||
663 | svc = ip_vs_service_get(af, 0, protocol, vaddr, vport); | 619 | svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport); |
664 | if (!svc) | 620 | if (!svc) |
665 | return NULL; | 621 | return NULL; |
666 | dest = ip_vs_lookup_dest(svc, daddr, dport); | 622 | dest = ip_vs_lookup_dest(svc, daddr, dport); |
@@ -685,11 +641,12 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, | |||
685 | __be16 dport) | 641 | __be16 dport) |
686 | { | 642 | { |
687 | struct ip_vs_dest *dest, *nxt; | 643 | struct ip_vs_dest *dest, *nxt; |
644 | struct netns_ipvs *ipvs = net_ipvs(svc->net); | ||
688 | 645 | ||
689 | /* | 646 | /* |
690 | * Find the destination in trash | 647 | * Find the destination in trash |
691 | */ | 648 | */ |
692 | list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { | 649 | list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { |
693 | IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " | 650 | IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " |
694 | "dest->refcnt=%d\n", | 651 | "dest->refcnt=%d\n", |
695 | dest->vfwmark, | 652 | dest->vfwmark, |
@@ -720,6 +677,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, | |||
720 | list_del(&dest->n_list); | 677 | list_del(&dest->n_list); |
721 | ip_vs_dst_reset(dest); | 678 | ip_vs_dst_reset(dest); |
722 | __ip_vs_unbind_svc(dest); | 679 | __ip_vs_unbind_svc(dest); |
680 | free_percpu(dest->stats.cpustats); | ||
723 | kfree(dest); | 681 | kfree(dest); |
724 | } | 682 | } |
725 | } | 683 | } |
@@ -737,14 +695,16 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, | |||
737 | * are expired, and the refcnt of each destination in the trash must | 695 | * are expired, and the refcnt of each destination in the trash must |
738 | * be 1, so we simply release them here. | 696 | * be 1, so we simply release them here. |
739 | */ | 697 | */ |
740 | static void ip_vs_trash_cleanup(void) | 698 | static void ip_vs_trash_cleanup(struct net *net) |
741 | { | 699 | { |
742 | struct ip_vs_dest *dest, *nxt; | 700 | struct ip_vs_dest *dest, *nxt; |
701 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
743 | 702 | ||
744 | list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { | 703 | list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { |
745 | list_del(&dest->n_list); | 704 | list_del(&dest->n_list); |
746 | ip_vs_dst_reset(dest); | 705 | ip_vs_dst_reset(dest); |
747 | __ip_vs_unbind_svc(dest); | 706 | __ip_vs_unbind_svc(dest); |
707 | free_percpu(dest->stats.cpustats); | ||
748 | kfree(dest); | 708 | kfree(dest); |
749 | } | 709 | } |
750 | } | 710 | } |
@@ -768,6 +728,7 @@ static void | |||
768 | __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, | 728 | __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, |
769 | struct ip_vs_dest_user_kern *udest, int add) | 729 | struct ip_vs_dest_user_kern *udest, int add) |
770 | { | 730 | { |
731 | struct netns_ipvs *ipvs = net_ipvs(svc->net); | ||
771 | int conn_flags; | 732 | int conn_flags; |
772 | 733 | ||
773 | /* set the weight and the flags */ | 734 | /* set the weight and the flags */ |
@@ -780,12 +741,12 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, | |||
780 | conn_flags |= IP_VS_CONN_F_NOOUTPUT; | 741 | conn_flags |= IP_VS_CONN_F_NOOUTPUT; |
781 | } else { | 742 | } else { |
782 | /* | 743 | /* |
783 | * Put the real service in ip_vs_rtable if not present. | 744 | * Put the real service in rs_table if not present. |
784 | * For now only for NAT! | 745 | * For now only for NAT! |
785 | */ | 746 | */ |
786 | write_lock_bh(&__ip_vs_rs_lock); | 747 | write_lock_bh(&ipvs->rs_lock); |
787 | ip_vs_rs_hash(dest); | 748 | ip_vs_rs_hash(ipvs, dest); |
788 | write_unlock_bh(&__ip_vs_rs_lock); | 749 | write_unlock_bh(&ipvs->rs_lock); |
789 | } | 750 | } |
790 | atomic_set(&dest->conn_flags, conn_flags); | 751 | atomic_set(&dest->conn_flags, conn_flags); |
791 | 752 | ||
@@ -813,7 +774,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, | |||
813 | spin_unlock(&dest->dst_lock); | 774 | spin_unlock(&dest->dst_lock); |
814 | 775 | ||
815 | if (add) | 776 | if (add) |
816 | ip_vs_new_estimator(&dest->stats); | 777 | ip_vs_new_estimator(svc->net, &dest->stats); |
817 | 778 | ||
818 | write_lock_bh(&__ip_vs_svc_lock); | 779 | write_lock_bh(&__ip_vs_svc_lock); |
819 | 780 | ||
@@ -850,12 +811,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, | |||
850 | atype = ipv6_addr_type(&udest->addr.in6); | 811 | atype = ipv6_addr_type(&udest->addr.in6); |
851 | if ((!(atype & IPV6_ADDR_UNICAST) || | 812 | if ((!(atype & IPV6_ADDR_UNICAST) || |
852 | atype & IPV6_ADDR_LINKLOCAL) && | 813 | atype & IPV6_ADDR_LINKLOCAL) && |
853 | !__ip_vs_addr_is_local_v6(&udest->addr.in6)) | 814 | !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6)) |
854 | return -EINVAL; | 815 | return -EINVAL; |
855 | } else | 816 | } else |
856 | #endif | 817 | #endif |
857 | { | 818 | { |
858 | atype = inet_addr_type(&init_net, udest->addr.ip); | 819 | atype = inet_addr_type(svc->net, udest->addr.ip); |
859 | if (atype != RTN_LOCAL && atype != RTN_UNICAST) | 820 | if (atype != RTN_LOCAL && atype != RTN_UNICAST) |
860 | return -EINVAL; | 821 | return -EINVAL; |
861 | } | 822 | } |
@@ -865,6 +826,11 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, | |||
865 | pr_err("%s(): no memory.\n", __func__); | 826 | pr_err("%s(): no memory.\n", __func__); |
866 | return -ENOMEM; | 827 | return -ENOMEM; |
867 | } | 828 | } |
829 | dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); | ||
830 | if (!dest->stats.cpustats) { | ||
831 | pr_err("%s() alloc_percpu failed\n", __func__); | ||
832 | goto err_alloc; | ||
833 | } | ||
868 | 834 | ||
869 | dest->af = svc->af; | 835 | dest->af = svc->af; |
870 | dest->protocol = svc->protocol; | 836 | dest->protocol = svc->protocol; |
@@ -888,6 +854,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, | |||
888 | 854 | ||
889 | LeaveFunction(2); | 855 | LeaveFunction(2); |
890 | return 0; | 856 | return 0; |
857 | |||
858 | err_alloc: | ||
859 | kfree(dest); | ||
860 | return -ENOMEM; | ||
891 | } | 861 | } |
892 | 862 | ||
893 | 863 | ||
@@ -1006,16 +976,18 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) | |||
1006 | /* | 976 | /* |
1007 | * Delete a destination (must be already unlinked from the service) | 977 | * Delete a destination (must be already unlinked from the service) |
1008 | */ | 978 | */ |
1009 | static void __ip_vs_del_dest(struct ip_vs_dest *dest) | 979 | static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) |
1010 | { | 980 | { |
1011 | ip_vs_kill_estimator(&dest->stats); | 981 | struct netns_ipvs *ipvs = net_ipvs(net); |
982 | |||
983 | ip_vs_kill_estimator(net, &dest->stats); | ||
1012 | 984 | ||
1013 | /* | 985 | /* |
1014 | * Remove it from the d-linked list with the real services. | 986 | * Remove it from the d-linked list with the real services. |
1015 | */ | 987 | */ |
1016 | write_lock_bh(&__ip_vs_rs_lock); | 988 | write_lock_bh(&ipvs->rs_lock); |
1017 | ip_vs_rs_unhash(dest); | 989 | ip_vs_rs_unhash(dest); |
1018 | write_unlock_bh(&__ip_vs_rs_lock); | 990 | write_unlock_bh(&ipvs->rs_lock); |
1019 | 991 | ||
1020 | /* | 992 | /* |
1021 | * Decrease the refcnt of the dest, and free the dest | 993 | * Decrease the refcnt of the dest, and free the dest |
@@ -1034,6 +1006,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest) | |||
1034 | and only one user context can update virtual service at a | 1006 | and only one user context can update virtual service at a |
1035 | time, so the operation here is OK */ | 1007 | time, so the operation here is OK */ |
1036 | atomic_dec(&dest->svc->refcnt); | 1008 | atomic_dec(&dest->svc->refcnt); |
1009 | free_percpu(dest->stats.cpustats); | ||
1037 | kfree(dest); | 1010 | kfree(dest); |
1038 | } else { | 1011 | } else { |
1039 | IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " | 1012 | IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " |
@@ -1041,7 +1014,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest) | |||
1041 | IP_VS_DBG_ADDR(dest->af, &dest->addr), | 1014 | IP_VS_DBG_ADDR(dest->af, &dest->addr), |
1042 | ntohs(dest->port), | 1015 | ntohs(dest->port), |
1043 | atomic_read(&dest->refcnt)); | 1016 | atomic_read(&dest->refcnt)); |
1044 | list_add(&dest->n_list, &ip_vs_dest_trash); | 1017 | list_add(&dest->n_list, &ipvs->dest_trash); |
1045 | atomic_inc(&dest->refcnt); | 1018 | atomic_inc(&dest->refcnt); |
1046 | } | 1019 | } |
1047 | } | 1020 | } |
@@ -1105,7 +1078,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) | |||
1105 | /* | 1078 | /* |
1106 | * Delete the destination | 1079 | * Delete the destination |
1107 | */ | 1080 | */ |
1108 | __ip_vs_del_dest(dest); | 1081 | __ip_vs_del_dest(svc->net, dest); |
1109 | 1082 | ||
1110 | LeaveFunction(2); | 1083 | LeaveFunction(2); |
1111 | 1084 | ||
@@ -1117,13 +1090,14 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) | |||
1117 | * Add a service into the service hash table | 1090 | * Add a service into the service hash table |
1118 | */ | 1091 | */ |
1119 | static int | 1092 | static int |
1120 | ip_vs_add_service(struct ip_vs_service_user_kern *u, | 1093 | ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, |
1121 | struct ip_vs_service **svc_p) | 1094 | struct ip_vs_service **svc_p) |
1122 | { | 1095 | { |
1123 | int ret = 0; | 1096 | int ret = 0; |
1124 | struct ip_vs_scheduler *sched = NULL; | 1097 | struct ip_vs_scheduler *sched = NULL; |
1125 | struct ip_vs_pe *pe = NULL; | 1098 | struct ip_vs_pe *pe = NULL; |
1126 | struct ip_vs_service *svc = NULL; | 1099 | struct ip_vs_service *svc = NULL; |
1100 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
1127 | 1101 | ||
1128 | /* increase the module use count */ | 1102 | /* increase the module use count */ |
1129 | ip_vs_use_count_inc(); | 1103 | ip_vs_use_count_inc(); |
@@ -1137,7 +1111,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, | |||
1137 | } | 1111 | } |
1138 | 1112 | ||
1139 | if (u->pe_name && *u->pe_name) { | 1113 | if (u->pe_name && *u->pe_name) { |
1140 | pe = ip_vs_pe_get(u->pe_name); | 1114 | pe = ip_vs_pe_getbyname(u->pe_name); |
1141 | if (pe == NULL) { | 1115 | if (pe == NULL) { |
1142 | pr_info("persistence engine module ip_vs_pe_%s " | 1116 | pr_info("persistence engine module ip_vs_pe_%s " |
1143 | "not found\n", u->pe_name); | 1117 | "not found\n", u->pe_name); |
@@ -1159,6 +1133,11 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, | |||
1159 | ret = -ENOMEM; | 1133 | ret = -ENOMEM; |
1160 | goto out_err; | 1134 | goto out_err; |
1161 | } | 1135 | } |
1136 | svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); | ||
1137 | if (!svc->stats.cpustats) { | ||
1138 | pr_err("%s() alloc_percpu failed\n", __func__); | ||
1139 | goto out_err; | ||
1140 | } | ||
1162 | 1141 | ||
1163 | /* I'm the first user of the service */ | 1142 | /* I'm the first user of the service */ |
1164 | atomic_set(&svc->usecnt, 0); | 1143 | atomic_set(&svc->usecnt, 0); |
@@ -1172,6 +1151,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, | |||
1172 | svc->flags = u->flags; | 1151 | svc->flags = u->flags; |
1173 | svc->timeout = u->timeout * HZ; | 1152 | svc->timeout = u->timeout * HZ; |
1174 | svc->netmask = u->netmask; | 1153 | svc->netmask = u->netmask; |
1154 | svc->net = net; | ||
1175 | 1155 | ||
1176 | INIT_LIST_HEAD(&svc->destinations); | 1156 | INIT_LIST_HEAD(&svc->destinations); |
1177 | rwlock_init(&svc->sched_lock); | 1157 | rwlock_init(&svc->sched_lock); |
@@ -1189,15 +1169,15 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, | |||
1189 | 1169 | ||
1190 | /* Update the virtual service counters */ | 1170 | /* Update the virtual service counters */ |
1191 | if (svc->port == FTPPORT) | 1171 | if (svc->port == FTPPORT) |
1192 | atomic_inc(&ip_vs_ftpsvc_counter); | 1172 | atomic_inc(&ipvs->ftpsvc_counter); |
1193 | else if (svc->port == 0) | 1173 | else if (svc->port == 0) |
1194 | atomic_inc(&ip_vs_nullsvc_counter); | 1174 | atomic_inc(&ipvs->nullsvc_counter); |
1195 | 1175 | ||
1196 | ip_vs_new_estimator(&svc->stats); | 1176 | ip_vs_new_estimator(net, &svc->stats); |
1197 | 1177 | ||
1198 | /* Count only IPv4 services for old get/setsockopt interface */ | 1178 | /* Count only IPv4 services for old get/setsockopt interface */ |
1199 | if (svc->af == AF_INET) | 1179 | if (svc->af == AF_INET) |
1200 | ip_vs_num_services++; | 1180 | ipvs->num_services++; |
1201 | 1181 | ||
1202 | /* Hash the service into the service table */ | 1182 | /* Hash the service into the service table */ |
1203 | write_lock_bh(&__ip_vs_svc_lock); | 1183 | write_lock_bh(&__ip_vs_svc_lock); |
@@ -1207,6 +1187,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, | |||
1207 | *svc_p = svc; | 1187 | *svc_p = svc; |
1208 | return 0; | 1188 | return 0; |
1209 | 1189 | ||
1190 | |||
1210 | out_err: | 1191 | out_err: |
1211 | if (svc != NULL) { | 1192 | if (svc != NULL) { |
1212 | ip_vs_unbind_scheduler(svc); | 1193 | ip_vs_unbind_scheduler(svc); |
@@ -1215,6 +1196,8 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, | |||
1215 | ip_vs_app_inc_put(svc->inc); | 1196 | ip_vs_app_inc_put(svc->inc); |
1216 | local_bh_enable(); | 1197 | local_bh_enable(); |
1217 | } | 1198 | } |
1199 | if (svc->stats.cpustats) | ||
1200 | free_percpu(svc->stats.cpustats); | ||
1218 | kfree(svc); | 1201 | kfree(svc); |
1219 | } | 1202 | } |
1220 | ip_vs_scheduler_put(sched); | 1203 | ip_vs_scheduler_put(sched); |
@@ -1248,7 +1231,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) | |||
1248 | old_sched = sched; | 1231 | old_sched = sched; |
1249 | 1232 | ||
1250 | if (u->pe_name && *u->pe_name) { | 1233 | if (u->pe_name && *u->pe_name) { |
1251 | pe = ip_vs_pe_get(u->pe_name); | 1234 | pe = ip_vs_pe_getbyname(u->pe_name); |
1252 | if (pe == NULL) { | 1235 | if (pe == NULL) { |
1253 | pr_info("persistence engine module ip_vs_pe_%s " | 1236 | pr_info("persistence engine module ip_vs_pe_%s " |
1254 | "not found\n", u->pe_name); | 1237 | "not found\n", u->pe_name); |
@@ -1334,14 +1317,15 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) | |||
1334 | struct ip_vs_dest *dest, *nxt; | 1317 | struct ip_vs_dest *dest, *nxt; |
1335 | struct ip_vs_scheduler *old_sched; | 1318 | struct ip_vs_scheduler *old_sched; |
1336 | struct ip_vs_pe *old_pe; | 1319 | struct ip_vs_pe *old_pe; |
1320 | struct netns_ipvs *ipvs = net_ipvs(svc->net); | ||
1337 | 1321 | ||
1338 | pr_info("%s: enter\n", __func__); | 1322 | pr_info("%s: enter\n", __func__); |
1339 | 1323 | ||
1340 | /* Count only IPv4 services for old get/setsockopt interface */ | 1324 | /* Count only IPv4 services for old get/setsockopt interface */ |
1341 | if (svc->af == AF_INET) | 1325 | if (svc->af == AF_INET) |
1342 | ip_vs_num_services--; | 1326 | ipvs->num_services--; |
1343 | 1327 | ||
1344 | ip_vs_kill_estimator(&svc->stats); | 1328 | ip_vs_kill_estimator(svc->net, &svc->stats); |
1345 | 1329 | ||
1346 | /* Unbind scheduler */ | 1330 | /* Unbind scheduler */ |
1347 | old_sched = svc->scheduler; | 1331 | old_sched = svc->scheduler; |
@@ -1364,16 +1348,16 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) | |||
1364 | */ | 1348 | */ |
1365 | list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { | 1349 | list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { |
1366 | __ip_vs_unlink_dest(svc, dest, 0); | 1350 | __ip_vs_unlink_dest(svc, dest, 0); |
1367 | __ip_vs_del_dest(dest); | 1351 | __ip_vs_del_dest(svc->net, dest); |
1368 | } | 1352 | } |
1369 | 1353 | ||
1370 | /* | 1354 | /* |
1371 | * Update the virtual service counters | 1355 | * Update the virtual service counters |
1372 | */ | 1356 | */ |
1373 | if (svc->port == FTPPORT) | 1357 | if (svc->port == FTPPORT) |
1374 | atomic_dec(&ip_vs_ftpsvc_counter); | 1358 | atomic_dec(&ipvs->ftpsvc_counter); |
1375 | else if (svc->port == 0) | 1359 | else if (svc->port == 0) |
1376 | atomic_dec(&ip_vs_nullsvc_counter); | 1360 | atomic_dec(&ipvs->nullsvc_counter); |
1377 | 1361 | ||
1378 | /* | 1362 | /* |
1379 | * Free the service if nobody refers to it | 1363 | * Free the service if nobody refers to it |
@@ -1383,6 +1367,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) | |||
1383 | svc->fwmark, | 1367 | svc->fwmark, |
1384 | IP_VS_DBG_ADDR(svc->af, &svc->addr), | 1368 | IP_VS_DBG_ADDR(svc->af, &svc->addr), |
1385 | ntohs(svc->port), atomic_read(&svc->usecnt)); | 1369 | ntohs(svc->port), atomic_read(&svc->usecnt)); |
1370 | free_percpu(svc->stats.cpustats); | ||
1386 | kfree(svc); | 1371 | kfree(svc); |
1387 | } | 1372 | } |
1388 | 1373 | ||
@@ -1428,17 +1413,19 @@ static int ip_vs_del_service(struct ip_vs_service *svc) | |||
1428 | /* | 1413 | /* |
1429 | * Flush all the virtual services | 1414 | * Flush all the virtual services |
1430 | */ | 1415 | */ |
1431 | static int ip_vs_flush(void) | 1416 | static int ip_vs_flush(struct net *net) |
1432 | { | 1417 | { |
1433 | int idx; | 1418 | int idx; |
1434 | struct ip_vs_service *svc, *nxt; | 1419 | struct ip_vs_service *svc, *nxt; |
1435 | 1420 | ||
1436 | /* | 1421 | /* |
1437 | * Flush the service table hashed by <protocol,addr,port> | 1422 | * Flush the service table hashed by <netns,protocol,addr,port> |
1438 | */ | 1423 | */ |
1439 | for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { | 1424 | for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { |
1440 | list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) { | 1425 | list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], |
1441 | ip_vs_unlink_service(svc); | 1426 | s_list) { |
1427 | if (net_eq(svc->net, net)) | ||
1428 | ip_vs_unlink_service(svc); | ||
1442 | } | 1429 | } |
1443 | } | 1430 | } |
1444 | 1431 | ||
@@ -1448,7 +1435,8 @@ static int ip_vs_flush(void) | |||
1448 | for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { | 1435 | for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { |
1449 | list_for_each_entry_safe(svc, nxt, | 1436 | list_for_each_entry_safe(svc, nxt, |
1450 | &ip_vs_svc_fwm_table[idx], f_list) { | 1437 | &ip_vs_svc_fwm_table[idx], f_list) { |
1451 | ip_vs_unlink_service(svc); | 1438 | if (net_eq(svc->net, net)) |
1439 | ip_vs_unlink_service(svc); | ||
1452 | } | 1440 | } |
1453 | } | 1441 | } |
1454 | 1442 | ||
@@ -1472,24 +1460,26 @@ static int ip_vs_zero_service(struct ip_vs_service *svc) | |||
1472 | return 0; | 1460 | return 0; |
1473 | } | 1461 | } |
1474 | 1462 | ||
1475 | static int ip_vs_zero_all(void) | 1463 | static int ip_vs_zero_all(struct net *net) |
1476 | { | 1464 | { |
1477 | int idx; | 1465 | int idx; |
1478 | struct ip_vs_service *svc; | 1466 | struct ip_vs_service *svc; |
1479 | 1467 | ||
1480 | for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { | 1468 | for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { |
1481 | list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { | 1469 | list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { |
1482 | ip_vs_zero_service(svc); | 1470 | if (net_eq(svc->net, net)) |
1471 | ip_vs_zero_service(svc); | ||
1483 | } | 1472 | } |
1484 | } | 1473 | } |
1485 | 1474 | ||
1486 | for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { | 1475 | for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { |
1487 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { | 1476 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { |
1488 | ip_vs_zero_service(svc); | 1477 | if (net_eq(svc->net, net)) |
1478 | ip_vs_zero_service(svc); | ||
1489 | } | 1479 | } |
1490 | } | 1480 | } |
1491 | 1481 | ||
1492 | ip_vs_zero_stats(&ip_vs_stats); | 1482 | ip_vs_zero_stats(net_ipvs(net)->tot_stats); |
1493 | return 0; | 1483 | return 0; |
1494 | } | 1484 | } |
1495 | 1485 | ||
@@ -1498,6 +1488,7 @@ static int | |||
1498 | proc_do_defense_mode(ctl_table *table, int write, | 1488 | proc_do_defense_mode(ctl_table *table, int write, |
1499 | void __user *buffer, size_t *lenp, loff_t *ppos) | 1489 | void __user *buffer, size_t *lenp, loff_t *ppos) |
1500 | { | 1490 | { |
1491 | struct net *net = current->nsproxy->net_ns; | ||
1501 | int *valp = table->data; | 1492 | int *valp = table->data; |
1502 | int val = *valp; | 1493 | int val = *valp; |
1503 | int rc; | 1494 | int rc; |
@@ -1508,7 +1499,7 @@ proc_do_defense_mode(ctl_table *table, int write, | |||
1508 | /* Restore the correct value */ | 1499 | /* Restore the correct value */ |
1509 | *valp = val; | 1500 | *valp = val; |
1510 | } else { | 1501 | } else { |
1511 | update_defense_level(); | 1502 | update_defense_level(net_ipvs(net)); |
1512 | } | 1503 | } |
1513 | } | 1504 | } |
1514 | return rc; | 1505 | return rc; |
@@ -1534,45 +1525,54 @@ proc_do_sync_threshold(ctl_table *table, int write, | |||
1534 | return rc; | 1525 | return rc; |
1535 | } | 1526 | } |
1536 | 1527 | ||
1528 | static int | ||
1529 | proc_do_sync_mode(ctl_table *table, int write, | ||
1530 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
1531 | { | ||
1532 | int *valp = table->data; | ||
1533 | int val = *valp; | ||
1534 | int rc; | ||
1535 | |||
1536 | rc = proc_dointvec(table, write, buffer, lenp, ppos); | ||
1537 | if (write && (*valp != val)) { | ||
1538 | if ((*valp < 0) || (*valp > 1)) { | ||
1539 | /* Restore the correct value */ | ||
1540 | *valp = val; | ||
1541 | } else { | ||
1542 | struct net *net = current->nsproxy->net_ns; | ||
1543 | ip_vs_sync_switch_mode(net, val); | ||
1544 | } | ||
1545 | } | ||
1546 | return rc; | ||
1547 | } | ||
1537 | 1548 | ||
1538 | /* | 1549 | /* |
1539 | * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) | 1550 | * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) |
1551 | * Do not change order or insert new entries without | ||
1552 | * align with netns init in __ip_vs_control_init() | ||
1540 | */ | 1553 | */ |
1541 | 1554 | ||
1542 | static struct ctl_table vs_vars[] = { | 1555 | static struct ctl_table vs_vars[] = { |
1543 | { | 1556 | { |
1544 | .procname = "amemthresh", | 1557 | .procname = "amemthresh", |
1545 | .data = &sysctl_ip_vs_amemthresh, | ||
1546 | .maxlen = sizeof(int), | 1558 | .maxlen = sizeof(int), |
1547 | .mode = 0644, | 1559 | .mode = 0644, |
1548 | .proc_handler = proc_dointvec, | 1560 | .proc_handler = proc_dointvec, |
1549 | }, | 1561 | }, |
1550 | #ifdef CONFIG_IP_VS_DEBUG | ||
1551 | { | ||
1552 | .procname = "debug_level", | ||
1553 | .data = &sysctl_ip_vs_debug_level, | ||
1554 | .maxlen = sizeof(int), | ||
1555 | .mode = 0644, | ||
1556 | .proc_handler = proc_dointvec, | ||
1557 | }, | ||
1558 | #endif | ||
1559 | { | 1562 | { |
1560 | .procname = "am_droprate", | 1563 | .procname = "am_droprate", |
1561 | .data = &sysctl_ip_vs_am_droprate, | ||
1562 | .maxlen = sizeof(int), | 1564 | .maxlen = sizeof(int), |
1563 | .mode = 0644, | 1565 | .mode = 0644, |
1564 | .proc_handler = proc_dointvec, | 1566 | .proc_handler = proc_dointvec, |
1565 | }, | 1567 | }, |
1566 | { | 1568 | { |
1567 | .procname = "drop_entry", | 1569 | .procname = "drop_entry", |
1568 | .data = &sysctl_ip_vs_drop_entry, | ||
1569 | .maxlen = sizeof(int), | 1570 | .maxlen = sizeof(int), |
1570 | .mode = 0644, | 1571 | .mode = 0644, |
1571 | .proc_handler = proc_do_defense_mode, | 1572 | .proc_handler = proc_do_defense_mode, |
1572 | }, | 1573 | }, |
1573 | { | 1574 | { |
1574 | .procname = "drop_packet", | 1575 | .procname = "drop_packet", |
1575 | .data = &sysctl_ip_vs_drop_packet, | ||
1576 | .maxlen = sizeof(int), | 1576 | .maxlen = sizeof(int), |
1577 | .mode = 0644, | 1577 | .mode = 0644, |
1578 | .proc_handler = proc_do_defense_mode, | 1578 | .proc_handler = proc_do_defense_mode, |
@@ -1580,7 +1580,6 @@ static struct ctl_table vs_vars[] = { | |||
1580 | #ifdef CONFIG_IP_VS_NFCT | 1580 | #ifdef CONFIG_IP_VS_NFCT |
1581 | { | 1581 | { |
1582 | .procname = "conntrack", | 1582 | .procname = "conntrack", |
1583 | .data = &sysctl_ip_vs_conntrack, | ||
1584 | .maxlen = sizeof(int), | 1583 | .maxlen = sizeof(int), |
1585 | .mode = 0644, | 1584 | .mode = 0644, |
1586 | .proc_handler = &proc_dointvec, | 1585 | .proc_handler = &proc_dointvec, |
@@ -1588,18 +1587,62 @@ static struct ctl_table vs_vars[] = { | |||
1588 | #endif | 1587 | #endif |
1589 | { | 1588 | { |
1590 | .procname = "secure_tcp", | 1589 | .procname = "secure_tcp", |
1591 | .data = &sysctl_ip_vs_secure_tcp, | ||
1592 | .maxlen = sizeof(int), | 1590 | .maxlen = sizeof(int), |
1593 | .mode = 0644, | 1591 | .mode = 0644, |
1594 | .proc_handler = proc_do_defense_mode, | 1592 | .proc_handler = proc_do_defense_mode, |
1595 | }, | 1593 | }, |
1596 | { | 1594 | { |
1597 | .procname = "snat_reroute", | 1595 | .procname = "snat_reroute", |
1598 | .data = &sysctl_ip_vs_snat_reroute, | ||
1599 | .maxlen = sizeof(int), | 1596 | .maxlen = sizeof(int), |
1600 | .mode = 0644, | 1597 | .mode = 0644, |
1601 | .proc_handler = &proc_dointvec, | 1598 | .proc_handler = &proc_dointvec, |
1602 | }, | 1599 | }, |
1600 | { | ||
1601 | .procname = "sync_version", | ||
1602 | .maxlen = sizeof(int), | ||
1603 | .mode = 0644, | ||
1604 | .proc_handler = &proc_do_sync_mode, | ||
1605 | }, | ||
1606 | { | ||
1607 | .procname = "cache_bypass", | ||
1608 | .maxlen = sizeof(int), | ||
1609 | .mode = 0644, | ||
1610 | .proc_handler = proc_dointvec, | ||
1611 | }, | ||
1612 | { | ||
1613 | .procname = "expire_nodest_conn", | ||
1614 | .maxlen = sizeof(int), | ||
1615 | .mode = 0644, | ||
1616 | .proc_handler = proc_dointvec, | ||
1617 | }, | ||
1618 | { | ||
1619 | .procname = "expire_quiescent_template", | ||
1620 | .maxlen = sizeof(int), | ||
1621 | .mode = 0644, | ||
1622 | .proc_handler = proc_dointvec, | ||
1623 | }, | ||
1624 | { | ||
1625 | .procname = "sync_threshold", | ||
1626 | .maxlen = | ||
1627 | sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold), | ||
1628 | .mode = 0644, | ||
1629 | .proc_handler = proc_do_sync_threshold, | ||
1630 | }, | ||
1631 | { | ||
1632 | .procname = "nat_icmp_send", | ||
1633 | .maxlen = sizeof(int), | ||
1634 | .mode = 0644, | ||
1635 | .proc_handler = proc_dointvec, | ||
1636 | }, | ||
1637 | #ifdef CONFIG_IP_VS_DEBUG | ||
1638 | { | ||
1639 | .procname = "debug_level", | ||
1640 | .data = &sysctl_ip_vs_debug_level, | ||
1641 | .maxlen = sizeof(int), | ||
1642 | .mode = 0644, | ||
1643 | .proc_handler = proc_dointvec, | ||
1644 | }, | ||
1645 | #endif | ||
1603 | #if 0 | 1646 | #if 0 |
1604 | { | 1647 | { |
1605 | .procname = "timeout_established", | 1648 | .procname = "timeout_established", |
@@ -1686,41 +1729,6 @@ static struct ctl_table vs_vars[] = { | |||
1686 | .proc_handler = proc_dointvec_jiffies, | 1729 | .proc_handler = proc_dointvec_jiffies, |
1687 | }, | 1730 | }, |
1688 | #endif | 1731 | #endif |
1689 | { | ||
1690 | .procname = "cache_bypass", | ||
1691 | .data = &sysctl_ip_vs_cache_bypass, | ||
1692 | .maxlen = sizeof(int), | ||
1693 | .mode = 0644, | ||
1694 | .proc_handler = proc_dointvec, | ||
1695 | }, | ||
1696 | { | ||
1697 | .procname = "expire_nodest_conn", | ||
1698 | .data = &sysctl_ip_vs_expire_nodest_conn, | ||
1699 | .maxlen = sizeof(int), | ||
1700 | .mode = 0644, | ||
1701 | .proc_handler = proc_dointvec, | ||
1702 | }, | ||
1703 | { | ||
1704 | .procname = "expire_quiescent_template", | ||
1705 | .data = &sysctl_ip_vs_expire_quiescent_template, | ||
1706 | .maxlen = sizeof(int), | ||
1707 | .mode = 0644, | ||
1708 | .proc_handler = proc_dointvec, | ||
1709 | }, | ||
1710 | { | ||
1711 | .procname = "sync_threshold", | ||
1712 | .data = &sysctl_ip_vs_sync_threshold, | ||
1713 | .maxlen = sizeof(sysctl_ip_vs_sync_threshold), | ||
1714 | .mode = 0644, | ||
1715 | .proc_handler = proc_do_sync_threshold, | ||
1716 | }, | ||
1717 | { | ||
1718 | .procname = "nat_icmp_send", | ||
1719 | .data = &sysctl_ip_vs_nat_icmp_send, | ||
1720 | .maxlen = sizeof(int), | ||
1721 | .mode = 0644, | ||
1722 | .proc_handler = proc_dointvec, | ||
1723 | }, | ||
1724 | { } | 1732 | { } |
1725 | }; | 1733 | }; |
1726 | 1734 | ||
@@ -1732,11 +1740,10 @@ const struct ctl_path net_vs_ctl_path[] = { | |||
1732 | }; | 1740 | }; |
1733 | EXPORT_SYMBOL_GPL(net_vs_ctl_path); | 1741 | EXPORT_SYMBOL_GPL(net_vs_ctl_path); |
1734 | 1742 | ||
1735 | static struct ctl_table_header * sysctl_header; | ||
1736 | |||
1737 | #ifdef CONFIG_PROC_FS | 1743 | #ifdef CONFIG_PROC_FS |
1738 | 1744 | ||
1739 | struct ip_vs_iter { | 1745 | struct ip_vs_iter { |
1746 | struct seq_net_private p; /* Do not move this, netns depends upon it*/ | ||
1740 | struct list_head *table; | 1747 | struct list_head *table; |
1741 | int bucket; | 1748 | int bucket; |
1742 | }; | 1749 | }; |
@@ -1763,6 +1770,7 @@ static inline const char *ip_vs_fwd_name(unsigned flags) | |||
1763 | /* Get the Nth entry in the two lists */ | 1770 | /* Get the Nth entry in the two lists */ |
1764 | static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) | 1771 | static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) |
1765 | { | 1772 | { |
1773 | struct net *net = seq_file_net(seq); | ||
1766 | struct ip_vs_iter *iter = seq->private; | 1774 | struct ip_vs_iter *iter = seq->private; |
1767 | int idx; | 1775 | int idx; |
1768 | struct ip_vs_service *svc; | 1776 | struct ip_vs_service *svc; |
@@ -1770,7 +1778,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) | |||
1770 | /* look in hash by protocol */ | 1778 | /* look in hash by protocol */ |
1771 | for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { | 1779 | for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { |
1772 | list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { | 1780 | list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { |
1773 | if (pos-- == 0){ | 1781 | if (net_eq(svc->net, net) && pos-- == 0) { |
1774 | iter->table = ip_vs_svc_table; | 1782 | iter->table = ip_vs_svc_table; |
1775 | iter->bucket = idx; | 1783 | iter->bucket = idx; |
1776 | return svc; | 1784 | return svc; |
@@ -1781,7 +1789,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) | |||
1781 | /* keep looking in fwmark */ | 1789 | /* keep looking in fwmark */ |
1782 | for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { | 1790 | for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { |
1783 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { | 1791 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { |
1784 | if (pos-- == 0) { | 1792 | if (net_eq(svc->net, net) && pos-- == 0) { |
1785 | iter->table = ip_vs_svc_fwm_table; | 1793 | iter->table = ip_vs_svc_fwm_table; |
1786 | iter->bucket = idx; | 1794 | iter->bucket = idx; |
1787 | return svc; | 1795 | return svc; |
@@ -1935,7 +1943,7 @@ static const struct seq_operations ip_vs_info_seq_ops = { | |||
1935 | 1943 | ||
1936 | static int ip_vs_info_open(struct inode *inode, struct file *file) | 1944 | static int ip_vs_info_open(struct inode *inode, struct file *file) |
1937 | { | 1945 | { |
1938 | return seq_open_private(file, &ip_vs_info_seq_ops, | 1946 | return seq_open_net(inode, file, &ip_vs_info_seq_ops, |
1939 | sizeof(struct ip_vs_iter)); | 1947 | sizeof(struct ip_vs_iter)); |
1940 | } | 1948 | } |
1941 | 1949 | ||
@@ -1949,13 +1957,11 @@ static const struct file_operations ip_vs_info_fops = { | |||
1949 | 1957 | ||
1950 | #endif | 1958 | #endif |
1951 | 1959 | ||
1952 | struct ip_vs_stats ip_vs_stats = { | ||
1953 | .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock), | ||
1954 | }; | ||
1955 | |||
1956 | #ifdef CONFIG_PROC_FS | 1960 | #ifdef CONFIG_PROC_FS |
1957 | static int ip_vs_stats_show(struct seq_file *seq, void *v) | 1961 | static int ip_vs_stats_show(struct seq_file *seq, void *v) |
1958 | { | 1962 | { |
1963 | struct net *net = seq_file_single_net(seq); | ||
1964 | struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats; | ||
1959 | 1965 | ||
1960 | /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ | 1966 | /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ |
1961 | seq_puts(seq, | 1967 | seq_puts(seq, |
@@ -1963,29 +1969,29 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) | |||
1963 | seq_printf(seq, | 1969 | seq_printf(seq, |
1964 | " Conns Packets Packets Bytes Bytes\n"); | 1970 | " Conns Packets Packets Bytes Bytes\n"); |
1965 | 1971 | ||
1966 | spin_lock_bh(&ip_vs_stats.lock); | 1972 | spin_lock_bh(&tot_stats->lock); |
1967 | seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns, | 1973 | seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns, |
1968 | ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts, | 1974 | tot_stats->ustats.inpkts, tot_stats->ustats.outpkts, |
1969 | (unsigned long long) ip_vs_stats.ustats.inbytes, | 1975 | (unsigned long long) tot_stats->ustats.inbytes, |
1970 | (unsigned long long) ip_vs_stats.ustats.outbytes); | 1976 | (unsigned long long) tot_stats->ustats.outbytes); |
1971 | 1977 | ||
1972 | /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ | 1978 | /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ |
1973 | seq_puts(seq, | 1979 | seq_puts(seq, |
1974 | " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); | 1980 | " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); |
1975 | seq_printf(seq,"%8X %8X %8X %16X %16X\n", | 1981 | seq_printf(seq,"%8X %8X %8X %16X %16X\n", |
1976 | ip_vs_stats.ustats.cps, | 1982 | tot_stats->ustats.cps, |
1977 | ip_vs_stats.ustats.inpps, | 1983 | tot_stats->ustats.inpps, |
1978 | ip_vs_stats.ustats.outpps, | 1984 | tot_stats->ustats.outpps, |
1979 | ip_vs_stats.ustats.inbps, | 1985 | tot_stats->ustats.inbps, |
1980 | ip_vs_stats.ustats.outbps); | 1986 | tot_stats->ustats.outbps); |
1981 | spin_unlock_bh(&ip_vs_stats.lock); | 1987 | spin_unlock_bh(&tot_stats->lock); |
1982 | 1988 | ||
1983 | return 0; | 1989 | return 0; |
1984 | } | 1990 | } |
1985 | 1991 | ||
1986 | static int ip_vs_stats_seq_open(struct inode *inode, struct file *file) | 1992 | static int ip_vs_stats_seq_open(struct inode *inode, struct file *file) |
1987 | { | 1993 | { |
1988 | return single_open(file, ip_vs_stats_show, NULL); | 1994 | return single_open_net(inode, file, ip_vs_stats_show); |
1989 | } | 1995 | } |
1990 | 1996 | ||
1991 | static const struct file_operations ip_vs_stats_fops = { | 1997 | static const struct file_operations ip_vs_stats_fops = { |
@@ -1996,13 +2002,68 @@ static const struct file_operations ip_vs_stats_fops = { | |||
1996 | .release = single_release, | 2002 | .release = single_release, |
1997 | }; | 2003 | }; |
1998 | 2004 | ||
2005 | static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) | ||
2006 | { | ||
2007 | struct net *net = seq_file_single_net(seq); | ||
2008 | struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats; | ||
2009 | int i; | ||
2010 | |||
2011 | /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ | ||
2012 | seq_puts(seq, | ||
2013 | " Total Incoming Outgoing Incoming Outgoing\n"); | ||
2014 | seq_printf(seq, | ||
2015 | "CPU Conns Packets Packets Bytes Bytes\n"); | ||
2016 | |||
2017 | for_each_possible_cpu(i) { | ||
2018 | struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i); | ||
2019 | seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n", | ||
2020 | i, u->ustats.conns, u->ustats.inpkts, | ||
2021 | u->ustats.outpkts, (__u64)u->ustats.inbytes, | ||
2022 | (__u64)u->ustats.outbytes); | ||
2023 | } | ||
2024 | |||
2025 | spin_lock_bh(&tot_stats->lock); | ||
2026 | seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n", | ||
2027 | tot_stats->ustats.conns, tot_stats->ustats.inpkts, | ||
2028 | tot_stats->ustats.outpkts, | ||
2029 | (unsigned long long) tot_stats->ustats.inbytes, | ||
2030 | (unsigned long long) tot_stats->ustats.outbytes); | ||
2031 | |||
2032 | /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ | ||
2033 | seq_puts(seq, | ||
2034 | " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); | ||
2035 | seq_printf(seq, " %8X %8X %8X %16X %16X\n", | ||
2036 | tot_stats->ustats.cps, | ||
2037 | tot_stats->ustats.inpps, | ||
2038 | tot_stats->ustats.outpps, | ||
2039 | tot_stats->ustats.inbps, | ||
2040 | tot_stats->ustats.outbps); | ||
2041 | spin_unlock_bh(&tot_stats->lock); | ||
2042 | |||
2043 | return 0; | ||
2044 | } | ||
2045 | |||
2046 | static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file) | ||
2047 | { | ||
2048 | return single_open_net(inode, file, ip_vs_stats_percpu_show); | ||
2049 | } | ||
2050 | |||
2051 | static const struct file_operations ip_vs_stats_percpu_fops = { | ||
2052 | .owner = THIS_MODULE, | ||
2053 | .open = ip_vs_stats_percpu_seq_open, | ||
2054 | .read = seq_read, | ||
2055 | .llseek = seq_lseek, | ||
2056 | .release = single_release, | ||
2057 | }; | ||
1999 | #endif | 2058 | #endif |
2000 | 2059 | ||
2001 | /* | 2060 | /* |
2002 | * Set timeout values for tcp tcpfin udp in the timeout_table. | 2061 | * Set timeout values for tcp tcpfin udp in the timeout_table. |
2003 | */ | 2062 | */ |
2004 | static int ip_vs_set_timeout(struct ip_vs_timeout_user *u) | 2063 | static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u) |
2005 | { | 2064 | { |
2065 | struct ip_vs_proto_data *pd; | ||
2066 | |||
2006 | IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", | 2067 | IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", |
2007 | u->tcp_timeout, | 2068 | u->tcp_timeout, |
2008 | u->tcp_fin_timeout, | 2069 | u->tcp_fin_timeout, |
@@ -2010,19 +2071,22 @@ static int ip_vs_set_timeout(struct ip_vs_timeout_user *u) | |||
2010 | 2071 | ||
2011 | #ifdef CONFIG_IP_VS_PROTO_TCP | 2072 | #ifdef CONFIG_IP_VS_PROTO_TCP |
2012 | if (u->tcp_timeout) { | 2073 | if (u->tcp_timeout) { |
2013 | ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] | 2074 | pd = ip_vs_proto_data_get(net, IPPROTO_TCP); |
2075 | pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] | ||
2014 | = u->tcp_timeout * HZ; | 2076 | = u->tcp_timeout * HZ; |
2015 | } | 2077 | } |
2016 | 2078 | ||
2017 | if (u->tcp_fin_timeout) { | 2079 | if (u->tcp_fin_timeout) { |
2018 | ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] | 2080 | pd = ip_vs_proto_data_get(net, IPPROTO_TCP); |
2081 | pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] | ||
2019 | = u->tcp_fin_timeout * HZ; | 2082 | = u->tcp_fin_timeout * HZ; |
2020 | } | 2083 | } |
2021 | #endif | 2084 | #endif |
2022 | 2085 | ||
2023 | #ifdef CONFIG_IP_VS_PROTO_UDP | 2086 | #ifdef CONFIG_IP_VS_PROTO_UDP |
2024 | if (u->udp_timeout) { | 2087 | if (u->udp_timeout) { |
2025 | ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] | 2088 | pd = ip_vs_proto_data_get(net, IPPROTO_UDP); |
2089 | pd->timeout_table[IP_VS_UDP_S_NORMAL] | ||
2026 | = u->udp_timeout * HZ; | 2090 | = u->udp_timeout * HZ; |
2027 | } | 2091 | } |
2028 | #endif | 2092 | #endif |
@@ -2087,6 +2151,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, | |||
2087 | static int | 2151 | static int |
2088 | do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) | 2152 | do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) |
2089 | { | 2153 | { |
2154 | struct net *net = sock_net(sk); | ||
2090 | int ret; | 2155 | int ret; |
2091 | unsigned char arg[MAX_ARG_LEN]; | 2156 | unsigned char arg[MAX_ARG_LEN]; |
2092 | struct ip_vs_service_user *usvc_compat; | 2157 | struct ip_vs_service_user *usvc_compat; |
@@ -2121,19 +2186,20 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) | |||
2121 | 2186 | ||
2122 | if (cmd == IP_VS_SO_SET_FLUSH) { | 2187 | if (cmd == IP_VS_SO_SET_FLUSH) { |
2123 | /* Flush the virtual service */ | 2188 | /* Flush the virtual service */ |
2124 | ret = ip_vs_flush(); | 2189 | ret = ip_vs_flush(net); |
2125 | goto out_unlock; | 2190 | goto out_unlock; |
2126 | } else if (cmd == IP_VS_SO_SET_TIMEOUT) { | 2191 | } else if (cmd == IP_VS_SO_SET_TIMEOUT) { |
2127 | /* Set timeout values for (tcp tcpfin udp) */ | 2192 | /* Set timeout values for (tcp tcpfin udp) */ |
2128 | ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg); | 2193 | ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg); |
2129 | goto out_unlock; | 2194 | goto out_unlock; |
2130 | } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { | 2195 | } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { |
2131 | struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; | 2196 | struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; |
2132 | ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid); | 2197 | ret = start_sync_thread(net, dm->state, dm->mcast_ifn, |
2198 | dm->syncid); | ||
2133 | goto out_unlock; | 2199 | goto out_unlock; |
2134 | } else if (cmd == IP_VS_SO_SET_STOPDAEMON) { | 2200 | } else if (cmd == IP_VS_SO_SET_STOPDAEMON) { |
2135 | struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; | 2201 | struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; |
2136 | ret = stop_sync_thread(dm->state); | 2202 | ret = stop_sync_thread(net, dm->state); |
2137 | goto out_unlock; | 2203 | goto out_unlock; |
2138 | } | 2204 | } |
2139 | 2205 | ||
@@ -2148,7 +2214,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) | |||
2148 | if (cmd == IP_VS_SO_SET_ZERO) { | 2214 | if (cmd == IP_VS_SO_SET_ZERO) { |
2149 | /* if no service address is set, zero counters in all */ | 2215 | /* if no service address is set, zero counters in all */ |
2150 | if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { | 2216 | if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { |
2151 | ret = ip_vs_zero_all(); | 2217 | ret = ip_vs_zero_all(net); |
2152 | goto out_unlock; | 2218 | goto out_unlock; |
2153 | } | 2219 | } |
2154 | } | 2220 | } |
@@ -2165,10 +2231,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) | |||
2165 | 2231 | ||
2166 | /* Lookup the exact service by <protocol, addr, port> or fwmark */ | 2232 | /* Lookup the exact service by <protocol, addr, port> or fwmark */ |
2167 | if (usvc.fwmark == 0) | 2233 | if (usvc.fwmark == 0) |
2168 | svc = __ip_vs_service_find(usvc.af, usvc.protocol, | 2234 | svc = __ip_vs_service_find(net, usvc.af, usvc.protocol, |
2169 | &usvc.addr, usvc.port); | 2235 | &usvc.addr, usvc.port); |
2170 | else | 2236 | else |
2171 | svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark); | 2237 | svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark); |
2172 | 2238 | ||
2173 | if (cmd != IP_VS_SO_SET_ADD | 2239 | if (cmd != IP_VS_SO_SET_ADD |
2174 | && (svc == NULL || svc->protocol != usvc.protocol)) { | 2240 | && (svc == NULL || svc->protocol != usvc.protocol)) { |
@@ -2181,7 +2247,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) | |||
2181 | if (svc != NULL) | 2247 | if (svc != NULL) |
2182 | ret = -EEXIST; | 2248 | ret = -EEXIST; |
2183 | else | 2249 | else |
2184 | ret = ip_vs_add_service(&usvc, &svc); | 2250 | ret = ip_vs_add_service(net, &usvc, &svc); |
2185 | break; | 2251 | break; |
2186 | case IP_VS_SO_SET_EDIT: | 2252 | case IP_VS_SO_SET_EDIT: |
2187 | ret = ip_vs_edit_service(svc, &usvc); | 2253 | ret = ip_vs_edit_service(svc, &usvc); |
@@ -2241,7 +2307,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) | |||
2241 | } | 2307 | } |
2242 | 2308 | ||
2243 | static inline int | 2309 | static inline int |
2244 | __ip_vs_get_service_entries(const struct ip_vs_get_services *get, | 2310 | __ip_vs_get_service_entries(struct net *net, |
2311 | const struct ip_vs_get_services *get, | ||
2245 | struct ip_vs_get_services __user *uptr) | 2312 | struct ip_vs_get_services __user *uptr) |
2246 | { | 2313 | { |
2247 | int idx, count=0; | 2314 | int idx, count=0; |
@@ -2252,7 +2319,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, | |||
2252 | for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { | 2319 | for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { |
2253 | list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { | 2320 | list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { |
2254 | /* Only expose IPv4 entries to old interface */ | 2321 | /* Only expose IPv4 entries to old interface */ |
2255 | if (svc->af != AF_INET) | 2322 | if (svc->af != AF_INET || !net_eq(svc->net, net)) |
2256 | continue; | 2323 | continue; |
2257 | 2324 | ||
2258 | if (count >= get->num_services) | 2325 | if (count >= get->num_services) |
@@ -2271,7 +2338,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, | |||
2271 | for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { | 2338 | for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { |
2272 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { | 2339 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { |
2273 | /* Only expose IPv4 entries to old interface */ | 2340 | /* Only expose IPv4 entries to old interface */ |
2274 | if (svc->af != AF_INET) | 2341 | if (svc->af != AF_INET || !net_eq(svc->net, net)) |
2275 | continue; | 2342 | continue; |
2276 | 2343 | ||
2277 | if (count >= get->num_services) | 2344 | if (count >= get->num_services) |
@@ -2291,7 +2358,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, | |||
2291 | } | 2358 | } |
2292 | 2359 | ||
2293 | static inline int | 2360 | static inline int |
2294 | __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, | 2361 | __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, |
2295 | struct ip_vs_get_dests __user *uptr) | 2362 | struct ip_vs_get_dests __user *uptr) |
2296 | { | 2363 | { |
2297 | struct ip_vs_service *svc; | 2364 | struct ip_vs_service *svc; |
@@ -2299,9 +2366,9 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, | |||
2299 | int ret = 0; | 2366 | int ret = 0; |
2300 | 2367 | ||
2301 | if (get->fwmark) | 2368 | if (get->fwmark) |
2302 | svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark); | 2369 | svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark); |
2303 | else | 2370 | else |
2304 | svc = __ip_vs_service_find(AF_INET, get->protocol, &addr, | 2371 | svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr, |
2305 | get->port); | 2372 | get->port); |
2306 | 2373 | ||
2307 | if (svc) { | 2374 | if (svc) { |
@@ -2336,17 +2403,19 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, | |||
2336 | } | 2403 | } |
2337 | 2404 | ||
2338 | static inline void | 2405 | static inline void |
2339 | __ip_vs_get_timeouts(struct ip_vs_timeout_user *u) | 2406 | __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u) |
2340 | { | 2407 | { |
2408 | struct ip_vs_proto_data *pd; | ||
2409 | |||
2341 | #ifdef CONFIG_IP_VS_PROTO_TCP | 2410 | #ifdef CONFIG_IP_VS_PROTO_TCP |
2342 | u->tcp_timeout = | 2411 | pd = ip_vs_proto_data_get(net, IPPROTO_TCP); |
2343 | ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; | 2412 | u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; |
2344 | u->tcp_fin_timeout = | 2413 | u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; |
2345 | ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; | ||
2346 | #endif | 2414 | #endif |
2347 | #ifdef CONFIG_IP_VS_PROTO_UDP | 2415 | #ifdef CONFIG_IP_VS_PROTO_UDP |
2416 | pd = ip_vs_proto_data_get(net, IPPROTO_UDP); | ||
2348 | u->udp_timeout = | 2417 | u->udp_timeout = |
2349 | ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ; | 2418 | pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ; |
2350 | #endif | 2419 | #endif |
2351 | } | 2420 | } |
2352 | 2421 | ||
@@ -2375,7 +2444,10 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
2375 | unsigned char arg[128]; | 2444 | unsigned char arg[128]; |
2376 | int ret = 0; | 2445 | int ret = 0; |
2377 | unsigned int copylen; | 2446 | unsigned int copylen; |
2447 | struct net *net = sock_net(sk); | ||
2448 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
2378 | 2449 | ||
2450 | BUG_ON(!net); | ||
2379 | if (!capable(CAP_NET_ADMIN)) | 2451 | if (!capable(CAP_NET_ADMIN)) |
2380 | return -EPERM; | 2452 | return -EPERM; |
2381 | 2453 | ||
@@ -2418,7 +2490,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
2418 | struct ip_vs_getinfo info; | 2490 | struct ip_vs_getinfo info; |
2419 | info.version = IP_VS_VERSION_CODE; | 2491 | info.version = IP_VS_VERSION_CODE; |
2420 | info.size = ip_vs_conn_tab_size; | 2492 | info.size = ip_vs_conn_tab_size; |
2421 | info.num_services = ip_vs_num_services; | 2493 | info.num_services = ipvs->num_services; |
2422 | if (copy_to_user(user, &info, sizeof(info)) != 0) | 2494 | if (copy_to_user(user, &info, sizeof(info)) != 0) |
2423 | ret = -EFAULT; | 2495 | ret = -EFAULT; |
2424 | } | 2496 | } |
@@ -2437,7 +2509,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
2437 | ret = -EINVAL; | 2509 | ret = -EINVAL; |
2438 | goto out; | 2510 | goto out; |
2439 | } | 2511 | } |
2440 | ret = __ip_vs_get_service_entries(get, user); | 2512 | ret = __ip_vs_get_service_entries(net, get, user); |
2441 | } | 2513 | } |
2442 | break; | 2514 | break; |
2443 | 2515 | ||
@@ -2450,10 +2522,11 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
2450 | entry = (struct ip_vs_service_entry *)arg; | 2522 | entry = (struct ip_vs_service_entry *)arg; |
2451 | addr.ip = entry->addr; | 2523 | addr.ip = entry->addr; |
2452 | if (entry->fwmark) | 2524 | if (entry->fwmark) |
2453 | svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark); | 2525 | svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark); |
2454 | else | 2526 | else |
2455 | svc = __ip_vs_service_find(AF_INET, entry->protocol, | 2527 | svc = __ip_vs_service_find(net, AF_INET, |
2456 | &addr, entry->port); | 2528 | entry->protocol, &addr, |
2529 | entry->port); | ||
2457 | if (svc) { | 2530 | if (svc) { |
2458 | ip_vs_copy_service(entry, svc); | 2531 | ip_vs_copy_service(entry, svc); |
2459 | if (copy_to_user(user, entry, sizeof(*entry)) != 0) | 2532 | if (copy_to_user(user, entry, sizeof(*entry)) != 0) |
@@ -2476,7 +2549,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
2476 | ret = -EINVAL; | 2549 | ret = -EINVAL; |
2477 | goto out; | 2550 | goto out; |
2478 | } | 2551 | } |
2479 | ret = __ip_vs_get_dest_entries(get, user); | 2552 | ret = __ip_vs_get_dest_entries(net, get, user); |
2480 | } | 2553 | } |
2481 | break; | 2554 | break; |
2482 | 2555 | ||
@@ -2484,7 +2557,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
2484 | { | 2557 | { |
2485 | struct ip_vs_timeout_user t; | 2558 | struct ip_vs_timeout_user t; |
2486 | 2559 | ||
2487 | __ip_vs_get_timeouts(&t); | 2560 | __ip_vs_get_timeouts(net, &t); |
2488 | if (copy_to_user(user, &t, sizeof(t)) != 0) | 2561 | if (copy_to_user(user, &t, sizeof(t)) != 0) |
2489 | ret = -EFAULT; | 2562 | ret = -EFAULT; |
2490 | } | 2563 | } |
@@ -2495,15 +2568,17 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
2495 | struct ip_vs_daemon_user d[2]; | 2568 | struct ip_vs_daemon_user d[2]; |
2496 | 2569 | ||
2497 | memset(&d, 0, sizeof(d)); | 2570 | memset(&d, 0, sizeof(d)); |
2498 | if (ip_vs_sync_state & IP_VS_STATE_MASTER) { | 2571 | if (ipvs->sync_state & IP_VS_STATE_MASTER) { |
2499 | d[0].state = IP_VS_STATE_MASTER; | 2572 | d[0].state = IP_VS_STATE_MASTER; |
2500 | strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn)); | 2573 | strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn, |
2501 | d[0].syncid = ip_vs_master_syncid; | 2574 | sizeof(d[0].mcast_ifn)); |
2575 | d[0].syncid = ipvs->master_syncid; | ||
2502 | } | 2576 | } |
2503 | if (ip_vs_sync_state & IP_VS_STATE_BACKUP) { | 2577 | if (ipvs->sync_state & IP_VS_STATE_BACKUP) { |
2504 | d[1].state = IP_VS_STATE_BACKUP; | 2578 | d[1].state = IP_VS_STATE_BACKUP; |
2505 | strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn)); | 2579 | strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn, |
2506 | d[1].syncid = ip_vs_backup_syncid; | 2580 | sizeof(d[1].mcast_ifn)); |
2581 | d[1].syncid = ipvs->backup_syncid; | ||
2507 | } | 2582 | } |
2508 | if (copy_to_user(user, &d, sizeof(d)) != 0) | 2583 | if (copy_to_user(user, &d, sizeof(d)) != 0) |
2509 | ret = -EFAULT; | 2584 | ret = -EFAULT; |
@@ -2542,6 +2617,7 @@ static struct genl_family ip_vs_genl_family = { | |||
2542 | .name = IPVS_GENL_NAME, | 2617 | .name = IPVS_GENL_NAME, |
2543 | .version = IPVS_GENL_VERSION, | 2618 | .version = IPVS_GENL_VERSION, |
2544 | .maxattr = IPVS_CMD_MAX, | 2619 | .maxattr = IPVS_CMD_MAX, |
2620 | .netnsok = true, /* Make ipvsadm to work on netns */ | ||
2545 | }; | 2621 | }; |
2546 | 2622 | ||
2547 | /* Policy used for first-level command attributes */ | 2623 | /* Policy used for first-level command attributes */ |
@@ -2696,11 +2772,12 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, | |||
2696 | int idx = 0, i; | 2772 | int idx = 0, i; |
2697 | int start = cb->args[0]; | 2773 | int start = cb->args[0]; |
2698 | struct ip_vs_service *svc; | 2774 | struct ip_vs_service *svc; |
2775 | struct net *net = skb_sknet(skb); | ||
2699 | 2776 | ||
2700 | mutex_lock(&__ip_vs_mutex); | 2777 | mutex_lock(&__ip_vs_mutex); |
2701 | for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { | 2778 | for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { |
2702 | list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { | 2779 | list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { |
2703 | if (++idx <= start) | 2780 | if (++idx <= start || !net_eq(svc->net, net)) |
2704 | continue; | 2781 | continue; |
2705 | if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { | 2782 | if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { |
2706 | idx--; | 2783 | idx--; |
@@ -2711,7 +2788,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, | |||
2711 | 2788 | ||
2712 | for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { | 2789 | for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { |
2713 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { | 2790 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { |
2714 | if (++idx <= start) | 2791 | if (++idx <= start || !net_eq(svc->net, net)) |
2715 | continue; | 2792 | continue; |
2716 | if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { | 2793 | if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { |
2717 | idx--; | 2794 | idx--; |
@@ -2727,7 +2804,8 @@ nla_put_failure: | |||
2727 | return skb->len; | 2804 | return skb->len; |
2728 | } | 2805 | } |
2729 | 2806 | ||
2730 | static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, | 2807 | static int ip_vs_genl_parse_service(struct net *net, |
2808 | struct ip_vs_service_user_kern *usvc, | ||
2731 | struct nlattr *nla, int full_entry, | 2809 | struct nlattr *nla, int full_entry, |
2732 | struct ip_vs_service **ret_svc) | 2810 | struct ip_vs_service **ret_svc) |
2733 | { | 2811 | { |
@@ -2770,9 +2848,9 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, | |||
2770 | } | 2848 | } |
2771 | 2849 | ||
2772 | if (usvc->fwmark) | 2850 | if (usvc->fwmark) |
2773 | svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark); | 2851 | svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark); |
2774 | else | 2852 | else |
2775 | svc = __ip_vs_service_find(usvc->af, usvc->protocol, | 2853 | svc = __ip_vs_service_find(net, usvc->af, usvc->protocol, |
2776 | &usvc->addr, usvc->port); | 2854 | &usvc->addr, usvc->port); |
2777 | *ret_svc = svc; | 2855 | *ret_svc = svc; |
2778 | 2856 | ||
@@ -2809,13 +2887,14 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, | |||
2809 | return 0; | 2887 | return 0; |
2810 | } | 2888 | } |
2811 | 2889 | ||
2812 | static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla) | 2890 | static struct ip_vs_service *ip_vs_genl_find_service(struct net *net, |
2891 | struct nlattr *nla) | ||
2813 | { | 2892 | { |
2814 | struct ip_vs_service_user_kern usvc; | 2893 | struct ip_vs_service_user_kern usvc; |
2815 | struct ip_vs_service *svc; | 2894 | struct ip_vs_service *svc; |
2816 | int ret; | 2895 | int ret; |
2817 | 2896 | ||
2818 | ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc); | 2897 | ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc); |
2819 | return ret ? ERR_PTR(ret) : svc; | 2898 | return ret ? ERR_PTR(ret) : svc; |
2820 | } | 2899 | } |
2821 | 2900 | ||
@@ -2883,6 +2962,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, | |||
2883 | struct ip_vs_service *svc; | 2962 | struct ip_vs_service *svc; |
2884 | struct ip_vs_dest *dest; | 2963 | struct ip_vs_dest *dest; |
2885 | struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; | 2964 | struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; |
2965 | struct net *net = skb_sknet(skb); | ||
2886 | 2966 | ||
2887 | mutex_lock(&__ip_vs_mutex); | 2967 | mutex_lock(&__ip_vs_mutex); |
2888 | 2968 | ||
@@ -2891,7 +2971,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, | |||
2891 | IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) | 2971 | IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) |
2892 | goto out_err; | 2972 | goto out_err; |
2893 | 2973 | ||
2894 | svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]); | 2974 | |
2975 | svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]); | ||
2895 | if (IS_ERR(svc) || svc == NULL) | 2976 | if (IS_ERR(svc) || svc == NULL) |
2896 | goto out_err; | 2977 | goto out_err; |
2897 | 2978 | ||
@@ -3005,20 +3086,23 @@ nla_put_failure: | |||
3005 | static int ip_vs_genl_dump_daemons(struct sk_buff *skb, | 3086 | static int ip_vs_genl_dump_daemons(struct sk_buff *skb, |
3006 | struct netlink_callback *cb) | 3087 | struct netlink_callback *cb) |
3007 | { | 3088 | { |
3089 | struct net *net = skb_net(skb); | ||
3090 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
3091 | |||
3008 | mutex_lock(&__ip_vs_mutex); | 3092 | mutex_lock(&__ip_vs_mutex); |
3009 | if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { | 3093 | if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { |
3010 | if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, | 3094 | if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, |
3011 | ip_vs_master_mcast_ifn, | 3095 | ipvs->master_mcast_ifn, |
3012 | ip_vs_master_syncid, cb) < 0) | 3096 | ipvs->master_syncid, cb) < 0) |
3013 | goto nla_put_failure; | 3097 | goto nla_put_failure; |
3014 | 3098 | ||
3015 | cb->args[0] = 1; | 3099 | cb->args[0] = 1; |
3016 | } | 3100 | } |
3017 | 3101 | ||
3018 | if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { | 3102 | if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { |
3019 | if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, | 3103 | if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, |
3020 | ip_vs_backup_mcast_ifn, | 3104 | ipvs->backup_mcast_ifn, |
3021 | ip_vs_backup_syncid, cb) < 0) | 3105 | ipvs->backup_syncid, cb) < 0) |
3022 | goto nla_put_failure; | 3106 | goto nla_put_failure; |
3023 | 3107 | ||
3024 | cb->args[1] = 1; | 3108 | cb->args[1] = 1; |
@@ -3030,31 +3114,33 @@ nla_put_failure: | |||
3030 | return skb->len; | 3114 | return skb->len; |
3031 | } | 3115 | } |
3032 | 3116 | ||
3033 | static int ip_vs_genl_new_daemon(struct nlattr **attrs) | 3117 | static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs) |
3034 | { | 3118 | { |
3035 | if (!(attrs[IPVS_DAEMON_ATTR_STATE] && | 3119 | if (!(attrs[IPVS_DAEMON_ATTR_STATE] && |
3036 | attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && | 3120 | attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && |
3037 | attrs[IPVS_DAEMON_ATTR_SYNC_ID])) | 3121 | attrs[IPVS_DAEMON_ATTR_SYNC_ID])) |
3038 | return -EINVAL; | 3122 | return -EINVAL; |
3039 | 3123 | ||
3040 | return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), | 3124 | return start_sync_thread(net, |
3125 | nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), | ||
3041 | nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), | 3126 | nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), |
3042 | nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); | 3127 | nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); |
3043 | } | 3128 | } |
3044 | 3129 | ||
3045 | static int ip_vs_genl_del_daemon(struct nlattr **attrs) | 3130 | static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs) |
3046 | { | 3131 | { |
3047 | if (!attrs[IPVS_DAEMON_ATTR_STATE]) | 3132 | if (!attrs[IPVS_DAEMON_ATTR_STATE]) |
3048 | return -EINVAL; | 3133 | return -EINVAL; |
3049 | 3134 | ||
3050 | return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); | 3135 | return stop_sync_thread(net, |
3136 | nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); | ||
3051 | } | 3137 | } |
3052 | 3138 | ||
3053 | static int ip_vs_genl_set_config(struct nlattr **attrs) | 3139 | static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) |
3054 | { | 3140 | { |
3055 | struct ip_vs_timeout_user t; | 3141 | struct ip_vs_timeout_user t; |
3056 | 3142 | ||
3057 | __ip_vs_get_timeouts(&t); | 3143 | __ip_vs_get_timeouts(net, &t); |
3058 | 3144 | ||
3059 | if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) | 3145 | if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) |
3060 | t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); | 3146 | t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); |
@@ -3066,7 +3152,7 @@ static int ip_vs_genl_set_config(struct nlattr **attrs) | |||
3066 | if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) | 3152 | if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) |
3067 | t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); | 3153 | t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); |
3068 | 3154 | ||
3069 | return ip_vs_set_timeout(&t); | 3155 | return ip_vs_set_timeout(net, &t); |
3070 | } | 3156 | } |
3071 | 3157 | ||
3072 | static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) | 3158 | static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) |
@@ -3076,16 +3162,20 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) | |||
3076 | struct ip_vs_dest_user_kern udest; | 3162 | struct ip_vs_dest_user_kern udest; |
3077 | int ret = 0, cmd; | 3163 | int ret = 0, cmd; |
3078 | int need_full_svc = 0, need_full_dest = 0; | 3164 | int need_full_svc = 0, need_full_dest = 0; |
3165 | struct net *net; | ||
3166 | struct netns_ipvs *ipvs; | ||
3079 | 3167 | ||
3168 | net = skb_sknet(skb); | ||
3169 | ipvs = net_ipvs(net); | ||
3080 | cmd = info->genlhdr->cmd; | 3170 | cmd = info->genlhdr->cmd; |
3081 | 3171 | ||
3082 | mutex_lock(&__ip_vs_mutex); | 3172 | mutex_lock(&__ip_vs_mutex); |
3083 | 3173 | ||
3084 | if (cmd == IPVS_CMD_FLUSH) { | 3174 | if (cmd == IPVS_CMD_FLUSH) { |
3085 | ret = ip_vs_flush(); | 3175 | ret = ip_vs_flush(net); |
3086 | goto out; | 3176 | goto out; |
3087 | } else if (cmd == IPVS_CMD_SET_CONFIG) { | 3177 | } else if (cmd == IPVS_CMD_SET_CONFIG) { |
3088 | ret = ip_vs_genl_set_config(info->attrs); | 3178 | ret = ip_vs_genl_set_config(net, info->attrs); |
3089 | goto out; | 3179 | goto out; |
3090 | } else if (cmd == IPVS_CMD_NEW_DAEMON || | 3180 | } else if (cmd == IPVS_CMD_NEW_DAEMON || |
3091 | cmd == IPVS_CMD_DEL_DAEMON) { | 3181 | cmd == IPVS_CMD_DEL_DAEMON) { |
@@ -3101,13 +3191,13 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) | |||
3101 | } | 3191 | } |
3102 | 3192 | ||
3103 | if (cmd == IPVS_CMD_NEW_DAEMON) | 3193 | if (cmd == IPVS_CMD_NEW_DAEMON) |
3104 | ret = ip_vs_genl_new_daemon(daemon_attrs); | 3194 | ret = ip_vs_genl_new_daemon(net, daemon_attrs); |
3105 | else | 3195 | else |
3106 | ret = ip_vs_genl_del_daemon(daemon_attrs); | 3196 | ret = ip_vs_genl_del_daemon(net, daemon_attrs); |
3107 | goto out; | 3197 | goto out; |
3108 | } else if (cmd == IPVS_CMD_ZERO && | 3198 | } else if (cmd == IPVS_CMD_ZERO && |
3109 | !info->attrs[IPVS_CMD_ATTR_SERVICE]) { | 3199 | !info->attrs[IPVS_CMD_ATTR_SERVICE]) { |
3110 | ret = ip_vs_zero_all(); | 3200 | ret = ip_vs_zero_all(net); |
3111 | goto out; | 3201 | goto out; |
3112 | } | 3202 | } |
3113 | 3203 | ||
@@ -3117,7 +3207,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) | |||
3117 | if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) | 3207 | if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) |
3118 | need_full_svc = 1; | 3208 | need_full_svc = 1; |
3119 | 3209 | ||
3120 | ret = ip_vs_genl_parse_service(&usvc, | 3210 | ret = ip_vs_genl_parse_service(net, &usvc, |
3121 | info->attrs[IPVS_CMD_ATTR_SERVICE], | 3211 | info->attrs[IPVS_CMD_ATTR_SERVICE], |
3122 | need_full_svc, &svc); | 3212 | need_full_svc, &svc); |
3123 | if (ret) | 3213 | if (ret) |
@@ -3147,7 +3237,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) | |||
3147 | switch (cmd) { | 3237 | switch (cmd) { |
3148 | case IPVS_CMD_NEW_SERVICE: | 3238 | case IPVS_CMD_NEW_SERVICE: |
3149 | if (svc == NULL) | 3239 | if (svc == NULL) |
3150 | ret = ip_vs_add_service(&usvc, &svc); | 3240 | ret = ip_vs_add_service(net, &usvc, &svc); |
3151 | else | 3241 | else |
3152 | ret = -EEXIST; | 3242 | ret = -EEXIST; |
3153 | break; | 3243 | break; |
@@ -3185,7 +3275,11 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) | |||
3185 | struct sk_buff *msg; | 3275 | struct sk_buff *msg; |
3186 | void *reply; | 3276 | void *reply; |
3187 | int ret, cmd, reply_cmd; | 3277 | int ret, cmd, reply_cmd; |
3278 | struct net *net; | ||
3279 | struct netns_ipvs *ipvs; | ||
3188 | 3280 | ||
3281 | net = skb_sknet(skb); | ||
3282 | ipvs = net_ipvs(net); | ||
3189 | cmd = info->genlhdr->cmd; | 3283 | cmd = info->genlhdr->cmd; |
3190 | 3284 | ||
3191 | if (cmd == IPVS_CMD_GET_SERVICE) | 3285 | if (cmd == IPVS_CMD_GET_SERVICE) |
@@ -3214,7 +3308,8 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) | |||
3214 | { | 3308 | { |
3215 | struct ip_vs_service *svc; | 3309 | struct ip_vs_service *svc; |
3216 | 3310 | ||
3217 | svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]); | 3311 | svc = ip_vs_genl_find_service(net, |
3312 | info->attrs[IPVS_CMD_ATTR_SERVICE]); | ||
3218 | if (IS_ERR(svc)) { | 3313 | if (IS_ERR(svc)) { |
3219 | ret = PTR_ERR(svc); | 3314 | ret = PTR_ERR(svc); |
3220 | goto out_err; | 3315 | goto out_err; |
@@ -3234,7 +3329,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) | |||
3234 | { | 3329 | { |
3235 | struct ip_vs_timeout_user t; | 3330 | struct ip_vs_timeout_user t; |
3236 | 3331 | ||
3237 | __ip_vs_get_timeouts(&t); | 3332 | __ip_vs_get_timeouts(net, &t); |
3238 | #ifdef CONFIG_IP_VS_PROTO_TCP | 3333 | #ifdef CONFIG_IP_VS_PROTO_TCP |
3239 | NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout); | 3334 | NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout); |
3240 | NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, | 3335 | NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, |
@@ -3380,62 +3475,172 @@ static void ip_vs_genl_unregister(void) | |||
3380 | 3475 | ||
3381 | /* End of Generic Netlink interface definitions */ | 3476 | /* End of Generic Netlink interface definitions */ |
3382 | 3477 | ||
3478 | /* | ||
3479 | * per netns intit/exit func. | ||
3480 | */ | ||
3481 | int __net_init __ip_vs_control_init(struct net *net) | ||
3482 | { | ||
3483 | int idx; | ||
3484 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
3485 | struct ctl_table *tbl; | ||
3486 | |||
3487 | atomic_set(&ipvs->dropentry, 0); | ||
3488 | spin_lock_init(&ipvs->dropentry_lock); | ||
3489 | spin_lock_init(&ipvs->droppacket_lock); | ||
3490 | spin_lock_init(&ipvs->securetcp_lock); | ||
3491 | ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock); | ||
3492 | |||
3493 | /* Initialize rs_table */ | ||
3494 | for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) | ||
3495 | INIT_LIST_HEAD(&ipvs->rs_table[idx]); | ||
3496 | |||
3497 | INIT_LIST_HEAD(&ipvs->dest_trash); | ||
3498 | atomic_set(&ipvs->ftpsvc_counter, 0); | ||
3499 | atomic_set(&ipvs->nullsvc_counter, 0); | ||
3500 | |||
3501 | /* procfs stats */ | ||
3502 | ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL); | ||
3503 | if (ipvs->tot_stats == NULL) { | ||
3504 | pr_err("%s(): no memory.\n", __func__); | ||
3505 | return -ENOMEM; | ||
3506 | } | ||
3507 | ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats); | ||
3508 | if (!ipvs->cpustats) { | ||
3509 | pr_err("%s() alloc_percpu failed\n", __func__); | ||
3510 | goto err_alloc; | ||
3511 | } | ||
3512 | spin_lock_init(&ipvs->tot_stats->lock); | ||
3513 | |||
3514 | for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) | ||
3515 | INIT_LIST_HEAD(&ipvs->rs_table[idx]); | ||
3516 | |||
3517 | proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); | ||
3518 | proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); | ||
3519 | proc_net_fops_create(net, "ip_vs_stats_percpu", 0, | ||
3520 | &ip_vs_stats_percpu_fops); | ||
3521 | |||
3522 | if (!net_eq(net, &init_net)) { | ||
3523 | tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL); | ||
3524 | if (tbl == NULL) | ||
3525 | goto err_dup; | ||
3526 | } else | ||
3527 | tbl = vs_vars; | ||
3528 | /* Initialize sysctl defaults */ | ||
3529 | idx = 0; | ||
3530 | ipvs->sysctl_amemthresh = 1024; | ||
3531 | tbl[idx++].data = &ipvs->sysctl_amemthresh; | ||
3532 | ipvs->sysctl_am_droprate = 10; | ||
3533 | tbl[idx++].data = &ipvs->sysctl_am_droprate; | ||
3534 | tbl[idx++].data = &ipvs->sysctl_drop_entry; | ||
3535 | tbl[idx++].data = &ipvs->sysctl_drop_packet; | ||
3536 | #ifdef CONFIG_IP_VS_NFCT | ||
3537 | tbl[idx++].data = &ipvs->sysctl_conntrack; | ||
3538 | #endif | ||
3539 | tbl[idx++].data = &ipvs->sysctl_secure_tcp; | ||
3540 | ipvs->sysctl_snat_reroute = 1; | ||
3541 | tbl[idx++].data = &ipvs->sysctl_snat_reroute; | ||
3542 | ipvs->sysctl_sync_ver = 1; | ||
3543 | tbl[idx++].data = &ipvs->sysctl_sync_ver; | ||
3544 | tbl[idx++].data = &ipvs->sysctl_cache_bypass; | ||
3545 | tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; | ||
3546 | tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; | ||
3547 | ipvs->sysctl_sync_threshold[0] = 3; | ||
3548 | ipvs->sysctl_sync_threshold[1] = 50; | ||
3549 | tbl[idx].data = &ipvs->sysctl_sync_threshold; | ||
3550 | tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); | ||
3551 | tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; | ||
3552 | |||
3553 | |||
3554 | ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path, | ||
3555 | vs_vars); | ||
3556 | if (ipvs->sysctl_hdr == NULL) | ||
3557 | goto err_reg; | ||
3558 | ip_vs_new_estimator(net, ipvs->tot_stats); | ||
3559 | ipvs->sysctl_tbl = tbl; | ||
3560 | /* Schedule defense work */ | ||
3561 | INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); | ||
3562 | schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); | ||
3563 | return 0; | ||
3564 | |||
3565 | err_reg: | ||
3566 | if (!net_eq(net, &init_net)) | ||
3567 | kfree(tbl); | ||
3568 | err_dup: | ||
3569 | free_percpu(ipvs->cpustats); | ||
3570 | err_alloc: | ||
3571 | kfree(ipvs->tot_stats); | ||
3572 | return -ENOMEM; | ||
3573 | } | ||
3574 | |||
3575 | static void __net_exit __ip_vs_control_cleanup(struct net *net) | ||
3576 | { | ||
3577 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
3578 | |||
3579 | ip_vs_trash_cleanup(net); | ||
3580 | ip_vs_kill_estimator(net, ipvs->tot_stats); | ||
3581 | cancel_delayed_work_sync(&ipvs->defense_work); | ||
3582 | cancel_work_sync(&ipvs->defense_work.work); | ||
3583 | unregister_net_sysctl_table(ipvs->sysctl_hdr); | ||
3584 | proc_net_remove(net, "ip_vs_stats_percpu"); | ||
3585 | proc_net_remove(net, "ip_vs_stats"); | ||
3586 | proc_net_remove(net, "ip_vs"); | ||
3587 | free_percpu(ipvs->cpustats); | ||
3588 | kfree(ipvs->tot_stats); | ||
3589 | } | ||
3590 | |||
3591 | static struct pernet_operations ipvs_control_ops = { | ||
3592 | .init = __ip_vs_control_init, | ||
3593 | .exit = __ip_vs_control_cleanup, | ||
3594 | }; | ||
3383 | 3595 | ||
3384 | int __init ip_vs_control_init(void) | 3596 | int __init ip_vs_control_init(void) |
3385 | { | 3597 | { |
3386 | int ret; | ||
3387 | int idx; | 3598 | int idx; |
3599 | int ret; | ||
3388 | 3600 | ||
3389 | EnterFunction(2); | 3601 | EnterFunction(2); |
3390 | 3602 | ||
3391 | /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */ | 3603 | /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */ |
3392 | for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { | 3604 | for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { |
3393 | INIT_LIST_HEAD(&ip_vs_svc_table[idx]); | 3605 | INIT_LIST_HEAD(&ip_vs_svc_table[idx]); |
3394 | INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); | 3606 | INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); |
3395 | } | 3607 | } |
3396 | for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) { | 3608 | |
3397 | INIT_LIST_HEAD(&ip_vs_rtable[idx]); | 3609 | ret = register_pernet_subsys(&ipvs_control_ops); |
3610 | if (ret) { | ||
3611 | pr_err("cannot register namespace.\n"); | ||
3612 | goto err; | ||
3398 | } | 3613 | } |
3399 | smp_wmb(); | 3614 | |
3615 | smp_wmb(); /* Do we really need it now ? */ | ||
3400 | 3616 | ||
3401 | ret = nf_register_sockopt(&ip_vs_sockopts); | 3617 | ret = nf_register_sockopt(&ip_vs_sockopts); |
3402 | if (ret) { | 3618 | if (ret) { |
3403 | pr_err("cannot register sockopt.\n"); | 3619 | pr_err("cannot register sockopt.\n"); |
3404 | return ret; | 3620 | goto err_net; |
3405 | } | 3621 | } |
3406 | 3622 | ||
3407 | ret = ip_vs_genl_register(); | 3623 | ret = ip_vs_genl_register(); |
3408 | if (ret) { | 3624 | if (ret) { |
3409 | pr_err("cannot register Generic Netlink interface.\n"); | 3625 | pr_err("cannot register Generic Netlink interface.\n"); |
3410 | nf_unregister_sockopt(&ip_vs_sockopts); | 3626 | nf_unregister_sockopt(&ip_vs_sockopts); |
3411 | return ret; | 3627 | goto err_net; |
3412 | } | 3628 | } |
3413 | 3629 | ||
3414 | proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); | ||
3415 | proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); | ||
3416 | |||
3417 | sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars); | ||
3418 | |||
3419 | ip_vs_new_estimator(&ip_vs_stats); | ||
3420 | |||
3421 | /* Hook the defense timer */ | ||
3422 | schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); | ||
3423 | |||
3424 | LeaveFunction(2); | 3630 | LeaveFunction(2); |
3425 | return 0; | 3631 | return 0; |
3632 | |||
3633 | err_net: | ||
3634 | unregister_pernet_subsys(&ipvs_control_ops); | ||
3635 | err: | ||
3636 | return ret; | ||
3426 | } | 3637 | } |
3427 | 3638 | ||
3428 | 3639 | ||
3429 | void ip_vs_control_cleanup(void) | 3640 | void ip_vs_control_cleanup(void) |
3430 | { | 3641 | { |
3431 | EnterFunction(2); | 3642 | EnterFunction(2); |
3432 | ip_vs_trash_cleanup(); | 3643 | unregister_pernet_subsys(&ipvs_control_ops); |
3433 | cancel_delayed_work_sync(&defense_work); | ||
3434 | cancel_work_sync(&defense_work.work); | ||
3435 | ip_vs_kill_estimator(&ip_vs_stats); | ||
3436 | unregister_sysctl_table(sysctl_header); | ||
3437 | proc_net_remove(&init_net, "ip_vs_stats"); | ||
3438 | proc_net_remove(&init_net, "ip_vs"); | ||
3439 | ip_vs_genl_unregister(); | 3644 | ip_vs_genl_unregister(); |
3440 | nf_unregister_sockopt(&ip_vs_sockopts); | 3645 | nf_unregister_sockopt(&ip_vs_sockopts); |
3441 | LeaveFunction(2); | 3646 | LeaveFunction(2); |
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index ff28801962e0..f560a05c965a 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c | |||
@@ -8,8 +8,12 @@ | |||
8 | * as published by the Free Software Foundation; either version | 8 | * as published by the Free Software Foundation; either version |
9 | * 2 of the License, or (at your option) any later version. | 9 | * 2 of the License, or (at your option) any later version. |
10 | * | 10 | * |
11 | * Changes: | 11 | * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com> |
12 | * | 12 | * Network name space (netns) aware. |
13 | * Global data moved to netns i.e struct netns_ipvs | ||
14 | * Affected data: est_list and est_lock. | ||
15 | * estimation_timer() runs with timer per netns. | ||
16 | * get_stats()) do the per cpu summing. | ||
13 | */ | 17 | */ |
14 | 18 | ||
15 | #define KMSG_COMPONENT "IPVS" | 19 | #define KMSG_COMPONENT "IPVS" |
@@ -48,11 +52,42 @@ | |||
48 | */ | 52 | */ |
49 | 53 | ||
50 | 54 | ||
51 | static void estimation_timer(unsigned long arg); | 55 | /* |
56 | * Make a summary from each cpu | ||
57 | */ | ||
58 | static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, | ||
59 | struct ip_vs_cpu_stats *stats) | ||
60 | { | ||
61 | int i; | ||
62 | |||
63 | for_each_possible_cpu(i) { | ||
64 | struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i); | ||
65 | unsigned int start; | ||
66 | __u64 inbytes, outbytes; | ||
67 | if (i) { | ||
68 | sum->conns += s->ustats.conns; | ||
69 | sum->inpkts += s->ustats.inpkts; | ||
70 | sum->outpkts += s->ustats.outpkts; | ||
71 | do { | ||
72 | start = u64_stats_fetch_begin_bh(&s->syncp); | ||
73 | inbytes = s->ustats.inbytes; | ||
74 | outbytes = s->ustats.outbytes; | ||
75 | } while (u64_stats_fetch_retry_bh(&s->syncp, start)); | ||
76 | sum->inbytes += inbytes; | ||
77 | sum->outbytes += outbytes; | ||
78 | } else { | ||
79 | sum->conns = s->ustats.conns; | ||
80 | sum->inpkts = s->ustats.inpkts; | ||
81 | sum->outpkts = s->ustats.outpkts; | ||
82 | do { | ||
83 | start = u64_stats_fetch_begin_bh(&s->syncp); | ||
84 | sum->inbytes = s->ustats.inbytes; | ||
85 | sum->outbytes = s->ustats.outbytes; | ||
86 | } while (u64_stats_fetch_retry_bh(&s->syncp, start)); | ||
87 | } | ||
88 | } | ||
89 | } | ||
52 | 90 | ||
53 | static LIST_HEAD(est_list); | ||
54 | static DEFINE_SPINLOCK(est_lock); | ||
55 | static DEFINE_TIMER(est_timer, estimation_timer, 0, 0); | ||
56 | 91 | ||
57 | static void estimation_timer(unsigned long arg) | 92 | static void estimation_timer(unsigned long arg) |
58 | { | 93 | { |
@@ -62,11 +97,16 @@ static void estimation_timer(unsigned long arg) | |||
62 | u32 n_inpkts, n_outpkts; | 97 | u32 n_inpkts, n_outpkts; |
63 | u64 n_inbytes, n_outbytes; | 98 | u64 n_inbytes, n_outbytes; |
64 | u32 rate; | 99 | u32 rate; |
100 | struct net *net = (struct net *)arg; | ||
101 | struct netns_ipvs *ipvs; | ||
65 | 102 | ||
66 | spin_lock(&est_lock); | 103 | ipvs = net_ipvs(net); |
67 | list_for_each_entry(e, &est_list, list) { | 104 | ip_vs_read_cpu_stats(&ipvs->tot_stats->ustats, ipvs->cpustats); |
105 | spin_lock(&ipvs->est_lock); | ||
106 | list_for_each_entry(e, &ipvs->est_list, list) { | ||
68 | s = container_of(e, struct ip_vs_stats, est); | 107 | s = container_of(e, struct ip_vs_stats, est); |
69 | 108 | ||
109 | ip_vs_read_cpu_stats(&s->ustats, s->cpustats); | ||
70 | spin_lock(&s->lock); | 110 | spin_lock(&s->lock); |
71 | n_conns = s->ustats.conns; | 111 | n_conns = s->ustats.conns; |
72 | n_inpkts = s->ustats.inpkts; | 112 | n_inpkts = s->ustats.inpkts; |
@@ -75,38 +115,39 @@ static void estimation_timer(unsigned long arg) | |||
75 | n_outbytes = s->ustats.outbytes; | 115 | n_outbytes = s->ustats.outbytes; |
76 | 116 | ||
77 | /* scaled by 2^10, but divided 2 seconds */ | 117 | /* scaled by 2^10, but divided 2 seconds */ |
78 | rate = (n_conns - e->last_conns)<<9; | 118 | rate = (n_conns - e->last_conns) << 9; |
79 | e->last_conns = n_conns; | 119 | e->last_conns = n_conns; |
80 | e->cps += ((long)rate - (long)e->cps)>>2; | 120 | e->cps += ((long)rate - (long)e->cps) >> 2; |
81 | s->ustats.cps = (e->cps+0x1FF)>>10; | 121 | s->ustats.cps = (e->cps + 0x1FF) >> 10; |
82 | 122 | ||
83 | rate = (n_inpkts - e->last_inpkts)<<9; | 123 | rate = (n_inpkts - e->last_inpkts) << 9; |
84 | e->last_inpkts = n_inpkts; | 124 | e->last_inpkts = n_inpkts; |
85 | e->inpps += ((long)rate - (long)e->inpps)>>2; | 125 | e->inpps += ((long)rate - (long)e->inpps) >> 2; |
86 | s->ustats.inpps = (e->inpps+0x1FF)>>10; | 126 | s->ustats.inpps = (e->inpps + 0x1FF) >> 10; |
87 | 127 | ||
88 | rate = (n_outpkts - e->last_outpkts)<<9; | 128 | rate = (n_outpkts - e->last_outpkts) << 9; |
89 | e->last_outpkts = n_outpkts; | 129 | e->last_outpkts = n_outpkts; |
90 | e->outpps += ((long)rate - (long)e->outpps)>>2; | 130 | e->outpps += ((long)rate - (long)e->outpps) >> 2; |
91 | s->ustats.outpps = (e->outpps+0x1FF)>>10; | 131 | s->ustats.outpps = (e->outpps + 0x1FF) >> 10; |
92 | 132 | ||
93 | rate = (n_inbytes - e->last_inbytes)<<4; | 133 | rate = (n_inbytes - e->last_inbytes) << 4; |
94 | e->last_inbytes = n_inbytes; | 134 | e->last_inbytes = n_inbytes; |
95 | e->inbps += ((long)rate - (long)e->inbps)>>2; | 135 | e->inbps += ((long)rate - (long)e->inbps) >> 2; |
96 | s->ustats.inbps = (e->inbps+0xF)>>5; | 136 | s->ustats.inbps = (e->inbps + 0xF) >> 5; |
97 | 137 | ||
98 | rate = (n_outbytes - e->last_outbytes)<<4; | 138 | rate = (n_outbytes - e->last_outbytes) << 4; |
99 | e->last_outbytes = n_outbytes; | 139 | e->last_outbytes = n_outbytes; |
100 | e->outbps += ((long)rate - (long)e->outbps)>>2; | 140 | e->outbps += ((long)rate - (long)e->outbps) >> 2; |
101 | s->ustats.outbps = (e->outbps+0xF)>>5; | 141 | s->ustats.outbps = (e->outbps + 0xF) >> 5; |
102 | spin_unlock(&s->lock); | 142 | spin_unlock(&s->lock); |
103 | } | 143 | } |
104 | spin_unlock(&est_lock); | 144 | spin_unlock(&ipvs->est_lock); |
105 | mod_timer(&est_timer, jiffies + 2*HZ); | 145 | mod_timer(&ipvs->est_timer, jiffies + 2*HZ); |
106 | } | 146 | } |
107 | 147 | ||
108 | void ip_vs_new_estimator(struct ip_vs_stats *stats) | 148 | void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats) |
109 | { | 149 | { |
150 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
110 | struct ip_vs_estimator *est = &stats->est; | 151 | struct ip_vs_estimator *est = &stats->est; |
111 | 152 | ||
112 | INIT_LIST_HEAD(&est->list); | 153 | INIT_LIST_HEAD(&est->list); |
@@ -126,18 +167,19 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats) | |||
126 | est->last_outbytes = stats->ustats.outbytes; | 167 | est->last_outbytes = stats->ustats.outbytes; |
127 | est->outbps = stats->ustats.outbps<<5; | 168 | est->outbps = stats->ustats.outbps<<5; |
128 | 169 | ||
129 | spin_lock_bh(&est_lock); | 170 | spin_lock_bh(&ipvs->est_lock); |
130 | list_add(&est->list, &est_list); | 171 | list_add(&est->list, &ipvs->est_list); |
131 | spin_unlock_bh(&est_lock); | 172 | spin_unlock_bh(&ipvs->est_lock); |
132 | } | 173 | } |
133 | 174 | ||
134 | void ip_vs_kill_estimator(struct ip_vs_stats *stats) | 175 | void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats) |
135 | { | 176 | { |
177 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
136 | struct ip_vs_estimator *est = &stats->est; | 178 | struct ip_vs_estimator *est = &stats->est; |
137 | 179 | ||
138 | spin_lock_bh(&est_lock); | 180 | spin_lock_bh(&ipvs->est_lock); |
139 | list_del(&est->list); | 181 | list_del(&est->list); |
140 | spin_unlock_bh(&est_lock); | 182 | spin_unlock_bh(&ipvs->est_lock); |
141 | } | 183 | } |
142 | 184 | ||
143 | void ip_vs_zero_estimator(struct ip_vs_stats *stats) | 185 | void ip_vs_zero_estimator(struct ip_vs_stats *stats) |
@@ -157,13 +199,35 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats) | |||
157 | est->outbps = 0; | 199 | est->outbps = 0; |
158 | } | 200 | } |
159 | 201 | ||
160 | int __init ip_vs_estimator_init(void) | 202 | static int __net_init __ip_vs_estimator_init(struct net *net) |
161 | { | 203 | { |
162 | mod_timer(&est_timer, jiffies + 2 * HZ); | 204 | struct netns_ipvs *ipvs = net_ipvs(net); |
205 | |||
206 | INIT_LIST_HEAD(&ipvs->est_list); | ||
207 | spin_lock_init(&ipvs->est_lock); | ||
208 | setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net); | ||
209 | mod_timer(&ipvs->est_timer, jiffies + 2 * HZ); | ||
163 | return 0; | 210 | return 0; |
164 | } | 211 | } |
165 | 212 | ||
213 | static void __net_exit __ip_vs_estimator_exit(struct net *net) | ||
214 | { | ||
215 | del_timer_sync(&net_ipvs(net)->est_timer); | ||
216 | } | ||
217 | static struct pernet_operations ip_vs_app_ops = { | ||
218 | .init = __ip_vs_estimator_init, | ||
219 | .exit = __ip_vs_estimator_exit, | ||
220 | }; | ||
221 | |||
222 | int __init ip_vs_estimator_init(void) | ||
223 | { | ||
224 | int rv; | ||
225 | |||
226 | rv = register_pernet_subsys(&ip_vs_app_ops); | ||
227 | return rv; | ||
228 | } | ||
229 | |||
166 | void ip_vs_estimator_cleanup(void) | 230 | void ip_vs_estimator_cleanup(void) |
167 | { | 231 | { |
168 | del_timer_sync(&est_timer); | 232 | unregister_pernet_subsys(&ip_vs_app_ops); |
169 | } | 233 | } |
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 75455000ad1c..6b5dd6ddaae9 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c | |||
@@ -157,6 +157,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
157 | int ret = 0; | 157 | int ret = 0; |
158 | enum ip_conntrack_info ctinfo; | 158 | enum ip_conntrack_info ctinfo; |
159 | struct nf_conn *ct; | 159 | struct nf_conn *ct; |
160 | struct net *net; | ||
160 | 161 | ||
161 | #ifdef CONFIG_IP_VS_IPV6 | 162 | #ifdef CONFIG_IP_VS_IPV6 |
162 | /* This application helper doesn't work with IPv6 yet, | 163 | /* This application helper doesn't work with IPv6 yet, |
@@ -197,18 +198,20 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
197 | */ | 198 | */ |
198 | { | 199 | { |
199 | struct ip_vs_conn_param p; | 200 | struct ip_vs_conn_param p; |
200 | ip_vs_conn_fill_param(AF_INET, iph->protocol, | 201 | ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET, |
201 | &from, port, &cp->caddr, 0, &p); | 202 | iph->protocol, &from, port, |
203 | &cp->caddr, 0, &p); | ||
202 | n_cp = ip_vs_conn_out_get(&p); | 204 | n_cp = ip_vs_conn_out_get(&p); |
203 | } | 205 | } |
204 | if (!n_cp) { | 206 | if (!n_cp) { |
205 | struct ip_vs_conn_param p; | 207 | struct ip_vs_conn_param p; |
206 | ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr, | 208 | ip_vs_conn_fill_param(ip_vs_conn_net(cp), |
209 | AF_INET, IPPROTO_TCP, &cp->caddr, | ||
207 | 0, &cp->vaddr, port, &p); | 210 | 0, &cp->vaddr, port, &p); |
208 | n_cp = ip_vs_conn_new(&p, &from, port, | 211 | n_cp = ip_vs_conn_new(&p, &from, port, |
209 | IP_VS_CONN_F_NO_CPORT | | 212 | IP_VS_CONN_F_NO_CPORT | |
210 | IP_VS_CONN_F_NFCT, | 213 | IP_VS_CONN_F_NFCT, |
211 | cp->dest); | 214 | cp->dest, skb->mark); |
212 | if (!n_cp) | 215 | if (!n_cp) |
213 | return 0; | 216 | return 0; |
214 | 217 | ||
@@ -257,8 +260,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
257 | * would be adjusted twice. | 260 | * would be adjusted twice. |
258 | */ | 261 | */ |
259 | 262 | ||
263 | net = skb_net(skb); | ||
260 | cp->app_data = NULL; | 264 | cp->app_data = NULL; |
261 | ip_vs_tcp_conn_listen(n_cp); | 265 | ip_vs_tcp_conn_listen(net, n_cp); |
262 | ip_vs_conn_put(n_cp); | 266 | ip_vs_conn_put(n_cp); |
263 | return ret; | 267 | return ret; |
264 | } | 268 | } |
@@ -287,6 +291,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
287 | union nf_inet_addr to; | 291 | union nf_inet_addr to; |
288 | __be16 port; | 292 | __be16 port; |
289 | struct ip_vs_conn *n_cp; | 293 | struct ip_vs_conn *n_cp; |
294 | struct net *net; | ||
290 | 295 | ||
291 | #ifdef CONFIG_IP_VS_IPV6 | 296 | #ifdef CONFIG_IP_VS_IPV6 |
292 | /* This application helper doesn't work with IPv6 yet, | 297 | /* This application helper doesn't work with IPv6 yet, |
@@ -358,14 +363,15 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
358 | 363 | ||
359 | { | 364 | { |
360 | struct ip_vs_conn_param p; | 365 | struct ip_vs_conn_param p; |
361 | ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port, | 366 | ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET, |
362 | &cp->vaddr, htons(ntohs(cp->vport)-1), | 367 | iph->protocol, &to, port, &cp->vaddr, |
363 | &p); | 368 | htons(ntohs(cp->vport)-1), &p); |
364 | n_cp = ip_vs_conn_in_get(&p); | 369 | n_cp = ip_vs_conn_in_get(&p); |
365 | if (!n_cp) { | 370 | if (!n_cp) { |
366 | n_cp = ip_vs_conn_new(&p, &cp->daddr, | 371 | n_cp = ip_vs_conn_new(&p, &cp->daddr, |
367 | htons(ntohs(cp->dport)-1), | 372 | htons(ntohs(cp->dport)-1), |
368 | IP_VS_CONN_F_NFCT, cp->dest); | 373 | IP_VS_CONN_F_NFCT, cp->dest, |
374 | skb->mark); | ||
369 | if (!n_cp) | 375 | if (!n_cp) |
370 | return 0; | 376 | return 0; |
371 | 377 | ||
@@ -377,7 +383,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, | |||
377 | /* | 383 | /* |
378 | * Move tunnel to listen state | 384 | * Move tunnel to listen state |
379 | */ | 385 | */ |
380 | ip_vs_tcp_conn_listen(n_cp); | 386 | net = skb_net(skb); |
387 | ip_vs_tcp_conn_listen(net, n_cp); | ||
381 | ip_vs_conn_put(n_cp); | 388 | ip_vs_conn_put(n_cp); |
382 | 389 | ||
383 | return 1; | 390 | return 1; |
@@ -398,23 +405,22 @@ static struct ip_vs_app ip_vs_ftp = { | |||
398 | .pkt_in = ip_vs_ftp_in, | 405 | .pkt_in = ip_vs_ftp_in, |
399 | }; | 406 | }; |
400 | 407 | ||
401 | |||
402 | /* | 408 | /* |
403 | * ip_vs_ftp initialization | 409 | * per netns ip_vs_ftp initialization |
404 | */ | 410 | */ |
405 | static int __init ip_vs_ftp_init(void) | 411 | static int __net_init __ip_vs_ftp_init(struct net *net) |
406 | { | 412 | { |
407 | int i, ret; | 413 | int i, ret; |
408 | struct ip_vs_app *app = &ip_vs_ftp; | 414 | struct ip_vs_app *app = &ip_vs_ftp; |
409 | 415 | ||
410 | ret = register_ip_vs_app(app); | 416 | ret = register_ip_vs_app(net, app); |
411 | if (ret) | 417 | if (ret) |
412 | return ret; | 418 | return ret; |
413 | 419 | ||
414 | for (i=0; i<IP_VS_APP_MAX_PORTS; i++) { | 420 | for (i=0; i<IP_VS_APP_MAX_PORTS; i++) { |
415 | if (!ports[i]) | 421 | if (!ports[i]) |
416 | continue; | 422 | continue; |
417 | ret = register_ip_vs_app_inc(app, app->protocol, ports[i]); | 423 | ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]); |
418 | if (ret) | 424 | if (ret) |
419 | break; | 425 | break; |
420 | pr_info("%s: loaded support on port[%d] = %d\n", | 426 | pr_info("%s: loaded support on port[%d] = %d\n", |
@@ -422,18 +428,39 @@ static int __init ip_vs_ftp_init(void) | |||
422 | } | 428 | } |
423 | 429 | ||
424 | if (ret) | 430 | if (ret) |
425 | unregister_ip_vs_app(app); | 431 | unregister_ip_vs_app(net, app); |
426 | 432 | ||
427 | return ret; | 433 | return ret; |
428 | } | 434 | } |
435 | /* | ||
436 | * netns exit | ||
437 | */ | ||
438 | static void __ip_vs_ftp_exit(struct net *net) | ||
439 | { | ||
440 | struct ip_vs_app *app = &ip_vs_ftp; | ||
441 | |||
442 | unregister_ip_vs_app(net, app); | ||
443 | } | ||
444 | |||
445 | static struct pernet_operations ip_vs_ftp_ops = { | ||
446 | .init = __ip_vs_ftp_init, | ||
447 | .exit = __ip_vs_ftp_exit, | ||
448 | }; | ||
429 | 449 | ||
450 | int __init ip_vs_ftp_init(void) | ||
451 | { | ||
452 | int rv; | ||
453 | |||
454 | rv = register_pernet_subsys(&ip_vs_ftp_ops); | ||
455 | return rv; | ||
456 | } | ||
430 | 457 | ||
431 | /* | 458 | /* |
432 | * ip_vs_ftp finish. | 459 | * ip_vs_ftp finish. |
433 | */ | 460 | */ |
434 | static void __exit ip_vs_ftp_exit(void) | 461 | static void __exit ip_vs_ftp_exit(void) |
435 | { | 462 | { |
436 | unregister_ip_vs_app(&ip_vs_ftp); | 463 | unregister_pernet_subsys(&ip_vs_ftp_ops); |
437 | } | 464 | } |
438 | 465 | ||
439 | 466 | ||
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 9323f8944199..d5bec3371871 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c | |||
@@ -70,7 +70,6 @@ | |||
70 | * entries that haven't been touched for a day. | 70 | * entries that haven't been touched for a day. |
71 | */ | 71 | */ |
72 | #define COUNT_FOR_FULL_EXPIRATION 30 | 72 | #define COUNT_FOR_FULL_EXPIRATION 30 |
73 | static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ; | ||
74 | 73 | ||
75 | 74 | ||
76 | /* | 75 | /* |
@@ -117,7 +116,7 @@ struct ip_vs_lblc_table { | |||
117 | static ctl_table vs_vars_table[] = { | 116 | static ctl_table vs_vars_table[] = { |
118 | { | 117 | { |
119 | .procname = "lblc_expiration", | 118 | .procname = "lblc_expiration", |
120 | .data = &sysctl_ip_vs_lblc_expiration, | 119 | .data = NULL, |
121 | .maxlen = sizeof(int), | 120 | .maxlen = sizeof(int), |
122 | .mode = 0644, | 121 | .mode = 0644, |
123 | .proc_handler = proc_dointvec_jiffies, | 122 | .proc_handler = proc_dointvec_jiffies, |
@@ -125,8 +124,6 @@ static ctl_table vs_vars_table[] = { | |||
125 | { } | 124 | { } |
126 | }; | 125 | }; |
127 | 126 | ||
128 | static struct ctl_table_header * sysctl_header; | ||
129 | |||
130 | static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) | 127 | static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) |
131 | { | 128 | { |
132 | list_del(&en->list); | 129 | list_del(&en->list); |
@@ -248,6 +245,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) | |||
248 | struct ip_vs_lblc_entry *en, *nxt; | 245 | struct ip_vs_lblc_entry *en, *nxt; |
249 | unsigned long now = jiffies; | 246 | unsigned long now = jiffies; |
250 | int i, j; | 247 | int i, j; |
248 | struct netns_ipvs *ipvs = net_ipvs(svc->net); | ||
251 | 249 | ||
252 | for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { | 250 | for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { |
253 | j = (j + 1) & IP_VS_LBLC_TAB_MASK; | 251 | j = (j + 1) & IP_VS_LBLC_TAB_MASK; |
@@ -255,7 +253,8 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) | |||
255 | write_lock(&svc->sched_lock); | 253 | write_lock(&svc->sched_lock); |
256 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { | 254 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { |
257 | if (time_before(now, | 255 | if (time_before(now, |
258 | en->lastuse + sysctl_ip_vs_lblc_expiration)) | 256 | en->lastuse + |
257 | ipvs->sysctl_lblc_expiration)) | ||
259 | continue; | 258 | continue; |
260 | 259 | ||
261 | ip_vs_lblc_free(en); | 260 | ip_vs_lblc_free(en); |
@@ -543,23 +542,73 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = | |||
543 | .schedule = ip_vs_lblc_schedule, | 542 | .schedule = ip_vs_lblc_schedule, |
544 | }; | 543 | }; |
545 | 544 | ||
545 | /* | ||
546 | * per netns init. | ||
547 | */ | ||
548 | static int __net_init __ip_vs_lblc_init(struct net *net) | ||
549 | { | ||
550 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
551 | |||
552 | if (!net_eq(net, &init_net)) { | ||
553 | ipvs->lblc_ctl_table = kmemdup(vs_vars_table, | ||
554 | sizeof(vs_vars_table), | ||
555 | GFP_KERNEL); | ||
556 | if (ipvs->lblc_ctl_table == NULL) | ||
557 | goto err_dup; | ||
558 | } else | ||
559 | ipvs->lblc_ctl_table = vs_vars_table; | ||
560 | ipvs->sysctl_lblc_expiration = 24*60*60*HZ; | ||
561 | ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration; | ||
562 | |||
563 | ipvs->lblc_ctl_header = | ||
564 | register_net_sysctl_table(net, net_vs_ctl_path, | ||
565 | ipvs->lblc_ctl_table); | ||
566 | if (!ipvs->lblc_ctl_header) | ||
567 | goto err_reg; | ||
568 | |||
569 | return 0; | ||
570 | |||
571 | err_reg: | ||
572 | if (!net_eq(net, &init_net)) | ||
573 | kfree(ipvs->lblc_ctl_table); | ||
574 | |||
575 | err_dup: | ||
576 | return -ENOMEM; | ||
577 | } | ||
578 | |||
579 | static void __net_exit __ip_vs_lblc_exit(struct net *net) | ||
580 | { | ||
581 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
582 | |||
583 | unregister_net_sysctl_table(ipvs->lblc_ctl_header); | ||
584 | |||
585 | if (!net_eq(net, &init_net)) | ||
586 | kfree(ipvs->lblc_ctl_table); | ||
587 | } | ||
588 | |||
589 | static struct pernet_operations ip_vs_lblc_ops = { | ||
590 | .init = __ip_vs_lblc_init, | ||
591 | .exit = __ip_vs_lblc_exit, | ||
592 | }; | ||
546 | 593 | ||
547 | static int __init ip_vs_lblc_init(void) | 594 | static int __init ip_vs_lblc_init(void) |
548 | { | 595 | { |
549 | int ret; | 596 | int ret; |
550 | 597 | ||
551 | sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); | 598 | ret = register_pernet_subsys(&ip_vs_lblc_ops); |
599 | if (ret) | ||
600 | return ret; | ||
601 | |||
552 | ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler); | 602 | ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler); |
553 | if (ret) | 603 | if (ret) |
554 | unregister_sysctl_table(sysctl_header); | 604 | unregister_pernet_subsys(&ip_vs_lblc_ops); |
555 | return ret; | 605 | return ret; |
556 | } | 606 | } |
557 | 607 | ||
558 | |||
559 | static void __exit ip_vs_lblc_cleanup(void) | 608 | static void __exit ip_vs_lblc_cleanup(void) |
560 | { | 609 | { |
561 | unregister_sysctl_table(sysctl_header); | ||
562 | unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); | 610 | unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); |
611 | unregister_pernet_subsys(&ip_vs_lblc_ops); | ||
563 | } | 612 | } |
564 | 613 | ||
565 | 614 | ||
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index dbeed8ea421a..61ae8cfcf0b4 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c | |||
@@ -70,8 +70,6 @@ | |||
70 | * entries that haven't been touched for a day. | 70 | * entries that haven't been touched for a day. |
71 | */ | 71 | */ |
72 | #define COUNT_FOR_FULL_EXPIRATION 30 | 72 | #define COUNT_FOR_FULL_EXPIRATION 30 |
73 | static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ; | ||
74 | |||
75 | 73 | ||
76 | /* | 74 | /* |
77 | * for IPVS lblcr entry hash table | 75 | * for IPVS lblcr entry hash table |
@@ -296,7 +294,7 @@ struct ip_vs_lblcr_table { | |||
296 | static ctl_table vs_vars_table[] = { | 294 | static ctl_table vs_vars_table[] = { |
297 | { | 295 | { |
298 | .procname = "lblcr_expiration", | 296 | .procname = "lblcr_expiration", |
299 | .data = &sysctl_ip_vs_lblcr_expiration, | 297 | .data = NULL, |
300 | .maxlen = sizeof(int), | 298 | .maxlen = sizeof(int), |
301 | .mode = 0644, | 299 | .mode = 0644, |
302 | .proc_handler = proc_dointvec_jiffies, | 300 | .proc_handler = proc_dointvec_jiffies, |
@@ -304,8 +302,6 @@ static ctl_table vs_vars_table[] = { | |||
304 | { } | 302 | { } |
305 | }; | 303 | }; |
306 | 304 | ||
307 | static struct ctl_table_header * sysctl_header; | ||
308 | |||
309 | static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) | 305 | static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) |
310 | { | 306 | { |
311 | list_del(&en->list); | 307 | list_del(&en->list); |
@@ -425,14 +421,15 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) | |||
425 | unsigned long now = jiffies; | 421 | unsigned long now = jiffies; |
426 | int i, j; | 422 | int i, j; |
427 | struct ip_vs_lblcr_entry *en, *nxt; | 423 | struct ip_vs_lblcr_entry *en, *nxt; |
424 | struct netns_ipvs *ipvs = net_ipvs(svc->net); | ||
428 | 425 | ||
429 | for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { | 426 | for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { |
430 | j = (j + 1) & IP_VS_LBLCR_TAB_MASK; | 427 | j = (j + 1) & IP_VS_LBLCR_TAB_MASK; |
431 | 428 | ||
432 | write_lock(&svc->sched_lock); | 429 | write_lock(&svc->sched_lock); |
433 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { | 430 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { |
434 | if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration, | 431 | if (time_after(en->lastuse |
435 | now)) | 432 | + ipvs->sysctl_lblcr_expiration, now)) |
436 | continue; | 433 | continue; |
437 | 434 | ||
438 | ip_vs_lblcr_free(en); | 435 | ip_vs_lblcr_free(en); |
@@ -664,6 +661,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | |||
664 | read_lock(&svc->sched_lock); | 661 | read_lock(&svc->sched_lock); |
665 | en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr); | 662 | en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr); |
666 | if (en) { | 663 | if (en) { |
664 | struct netns_ipvs *ipvs = net_ipvs(svc->net); | ||
667 | /* We only hold a read lock, but this is atomic */ | 665 | /* We only hold a read lock, but this is atomic */ |
668 | en->lastuse = jiffies; | 666 | en->lastuse = jiffies; |
669 | 667 | ||
@@ -675,7 +673,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | |||
675 | /* More than one destination + enough time passed by, cleanup */ | 673 | /* More than one destination + enough time passed by, cleanup */ |
676 | if (atomic_read(&en->set.size) > 1 && | 674 | if (atomic_read(&en->set.size) > 1 && |
677 | time_after(jiffies, en->set.lastmod + | 675 | time_after(jiffies, en->set.lastmod + |
678 | sysctl_ip_vs_lblcr_expiration)) { | 676 | ipvs->sysctl_lblcr_expiration)) { |
679 | struct ip_vs_dest *m; | 677 | struct ip_vs_dest *m; |
680 | 678 | ||
681 | write_lock(&en->set.lock); | 679 | write_lock(&en->set.lock); |
@@ -744,23 +742,73 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler = | |||
744 | .schedule = ip_vs_lblcr_schedule, | 742 | .schedule = ip_vs_lblcr_schedule, |
745 | }; | 743 | }; |
746 | 744 | ||
745 | /* | ||
746 | * per netns init. | ||
747 | */ | ||
748 | static int __net_init __ip_vs_lblcr_init(struct net *net) | ||
749 | { | ||
750 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
751 | |||
752 | if (!net_eq(net, &init_net)) { | ||
753 | ipvs->lblcr_ctl_table = kmemdup(vs_vars_table, | ||
754 | sizeof(vs_vars_table), | ||
755 | GFP_KERNEL); | ||
756 | if (ipvs->lblcr_ctl_table == NULL) | ||
757 | goto err_dup; | ||
758 | } else | ||
759 | ipvs->lblcr_ctl_table = vs_vars_table; | ||
760 | ipvs->sysctl_lblcr_expiration = 24*60*60*HZ; | ||
761 | ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration; | ||
762 | |||
763 | ipvs->lblcr_ctl_header = | ||
764 | register_net_sysctl_table(net, net_vs_ctl_path, | ||
765 | ipvs->lblcr_ctl_table); | ||
766 | if (!ipvs->lblcr_ctl_header) | ||
767 | goto err_reg; | ||
768 | |||
769 | return 0; | ||
770 | |||
771 | err_reg: | ||
772 | if (!net_eq(net, &init_net)) | ||
773 | kfree(ipvs->lblcr_ctl_table); | ||
774 | |||
775 | err_dup: | ||
776 | return -ENOMEM; | ||
777 | } | ||
778 | |||
779 | static void __net_exit __ip_vs_lblcr_exit(struct net *net) | ||
780 | { | ||
781 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
782 | |||
783 | unregister_net_sysctl_table(ipvs->lblcr_ctl_header); | ||
784 | |||
785 | if (!net_eq(net, &init_net)) | ||
786 | kfree(ipvs->lblcr_ctl_table); | ||
787 | } | ||
788 | |||
789 | static struct pernet_operations ip_vs_lblcr_ops = { | ||
790 | .init = __ip_vs_lblcr_init, | ||
791 | .exit = __ip_vs_lblcr_exit, | ||
792 | }; | ||
747 | 793 | ||
748 | static int __init ip_vs_lblcr_init(void) | 794 | static int __init ip_vs_lblcr_init(void) |
749 | { | 795 | { |
750 | int ret; | 796 | int ret; |
751 | 797 | ||
752 | sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); | 798 | ret = register_pernet_subsys(&ip_vs_lblcr_ops); |
799 | if (ret) | ||
800 | return ret; | ||
801 | |||
753 | ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); | 802 | ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); |
754 | if (ret) | 803 | if (ret) |
755 | unregister_sysctl_table(sysctl_header); | 804 | unregister_pernet_subsys(&ip_vs_lblcr_ops); |
756 | return ret; | 805 | return ret; |
757 | } | 806 | } |
758 | 807 | ||
759 | |||
760 | static void __exit ip_vs_lblcr_cleanup(void) | 808 | static void __exit ip_vs_lblcr_cleanup(void) |
761 | { | 809 | { |
762 | unregister_sysctl_table(sysctl_header); | ||
763 | unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); | 810 | unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); |
811 | unregister_pernet_subsys(&ip_vs_lblcr_ops); | ||
764 | } | 812 | } |
765 | 813 | ||
766 | 814 | ||
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index 4680647cd450..f454c80df0a7 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c | |||
@@ -141,6 +141,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct, | |||
141 | struct nf_conntrack_tuple *orig, new_reply; | 141 | struct nf_conntrack_tuple *orig, new_reply; |
142 | struct ip_vs_conn *cp; | 142 | struct ip_vs_conn *cp; |
143 | struct ip_vs_conn_param p; | 143 | struct ip_vs_conn_param p; |
144 | struct net *net = nf_ct_net(ct); | ||
144 | 145 | ||
145 | if (exp->tuple.src.l3num != PF_INET) | 146 | if (exp->tuple.src.l3num != PF_INET) |
146 | return; | 147 | return; |
@@ -155,7 +156,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct, | |||
155 | 156 | ||
156 | /* RS->CLIENT */ | 157 | /* RS->CLIENT */ |
157 | orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; | 158 | orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; |
158 | ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum, | 159 | ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum, |
159 | &orig->src.u3, orig->src.u.tcp.port, | 160 | &orig->src.u3, orig->src.u.tcp.port, |
160 | &orig->dst.u3, orig->dst.u.tcp.port, &p); | 161 | &orig->dst.u3, orig->dst.u.tcp.port, &p); |
161 | cp = ip_vs_conn_out_get(&p); | 162 | cp = ip_vs_conn_out_get(&p); |
@@ -268,7 +269,8 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) | |||
268 | " for conn " FMT_CONN "\n", | 269 | " for conn " FMT_CONN "\n", |
269 | __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); | 270 | __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); |
270 | 271 | ||
271 | h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple); | 272 | h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE, |
273 | &tuple); | ||
272 | if (h) { | 274 | if (h) { |
273 | ct = nf_ct_tuplehash_to_ctrack(h); | 275 | ct = nf_ct_tuplehash_to_ctrack(h); |
274 | /* Show what happens instead of calling nf_ct_kill() */ | 276 | /* Show what happens instead of calling nf_ct_kill() */ |
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c index 3414af70ee12..5cf859ccb31b 100644 --- a/net/netfilter/ipvs/ip_vs_pe.c +++ b/net/netfilter/ipvs/ip_vs_pe.c | |||
@@ -29,12 +29,11 @@ void ip_vs_unbind_pe(struct ip_vs_service *svc) | |||
29 | } | 29 | } |
30 | 30 | ||
31 | /* Get pe in the pe list by name */ | 31 | /* Get pe in the pe list by name */ |
32 | static struct ip_vs_pe * | 32 | struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) |
33 | ip_vs_pe_getbyname(const char *pe_name) | ||
34 | { | 33 | { |
35 | struct ip_vs_pe *pe; | 34 | struct ip_vs_pe *pe; |
36 | 35 | ||
37 | IP_VS_DBG(2, "%s(): pe_name \"%s\"\n", __func__, | 36 | IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__, |
38 | pe_name); | 37 | pe_name); |
39 | 38 | ||
40 | spin_lock_bh(&ip_vs_pe_lock); | 39 | spin_lock_bh(&ip_vs_pe_lock); |
@@ -60,28 +59,22 @@ ip_vs_pe_getbyname(const char *pe_name) | |||
60 | } | 59 | } |
61 | 60 | ||
62 | /* Lookup pe and try to load it if it doesn't exist */ | 61 | /* Lookup pe and try to load it if it doesn't exist */ |
63 | struct ip_vs_pe *ip_vs_pe_get(const char *name) | 62 | struct ip_vs_pe *ip_vs_pe_getbyname(const char *name) |
64 | { | 63 | { |
65 | struct ip_vs_pe *pe; | 64 | struct ip_vs_pe *pe; |
66 | 65 | ||
67 | /* Search for the pe by name */ | 66 | /* Search for the pe by name */ |
68 | pe = ip_vs_pe_getbyname(name); | 67 | pe = __ip_vs_pe_getbyname(name); |
69 | 68 | ||
70 | /* If pe not found, load the module and search again */ | 69 | /* If pe not found, load the module and search again */ |
71 | if (!pe) { | 70 | if (!pe) { |
72 | request_module("ip_vs_pe_%s", name); | 71 | request_module("ip_vs_pe_%s", name); |
73 | pe = ip_vs_pe_getbyname(name); | 72 | pe = __ip_vs_pe_getbyname(name); |
74 | } | 73 | } |
75 | 74 | ||
76 | return pe; | 75 | return pe; |
77 | } | 76 | } |
78 | 77 | ||
79 | void ip_vs_pe_put(struct ip_vs_pe *pe) | ||
80 | { | ||
81 | if (pe && pe->module) | ||
82 | module_put(pe->module); | ||
83 | } | ||
84 | |||
85 | /* Register a pe in the pe list */ | 78 | /* Register a pe in the pe list */ |
86 | int register_ip_vs_pe(struct ip_vs_pe *pe) | 79 | int register_ip_vs_pe(struct ip_vs_pe *pe) |
87 | { | 80 | { |
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index b8b4e9620f3e..0d83bc01fed4 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c | |||
@@ -71,6 +71,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) | |||
71 | struct ip_vs_iphdr iph; | 71 | struct ip_vs_iphdr iph; |
72 | unsigned int dataoff, datalen, matchoff, matchlen; | 72 | unsigned int dataoff, datalen, matchoff, matchlen; |
73 | const char *dptr; | 73 | const char *dptr; |
74 | int retc; | ||
74 | 75 | ||
75 | ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph); | 76 | ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph); |
76 | 77 | ||
@@ -83,6 +84,8 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) | |||
83 | if (dataoff >= skb->len) | 84 | if (dataoff >= skb->len) |
84 | return -EINVAL; | 85 | return -EINVAL; |
85 | 86 | ||
87 | if ((retc=skb_linearize(skb)) < 0) | ||
88 | return retc; | ||
86 | dptr = skb->data + dataoff; | 89 | dptr = skb->data + dataoff; |
87 | datalen = skb->len - dataoff; | 90 | datalen = skb->len - dataoff; |
88 | 91 | ||
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index c53998390877..6ac986cdcff3 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c | |||
@@ -60,6 +60,31 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp) | |||
60 | return 0; | 60 | return 0; |
61 | } | 61 | } |
62 | 62 | ||
63 | /* | ||
64 | * register an ipvs protocols netns related data | ||
65 | */ | ||
66 | static int | ||
67 | register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp) | ||
68 | { | ||
69 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
70 | unsigned hash = IP_VS_PROTO_HASH(pp->protocol); | ||
71 | struct ip_vs_proto_data *pd = | ||
72 | kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC); | ||
73 | |||
74 | if (!pd) { | ||
75 | pr_err("%s(): no memory.\n", __func__); | ||
76 | return -ENOMEM; | ||
77 | } | ||
78 | pd->pp = pp; /* For speed issues */ | ||
79 | pd->next = ipvs->proto_data_table[hash]; | ||
80 | ipvs->proto_data_table[hash] = pd; | ||
81 | atomic_set(&pd->appcnt, 0); /* Init app counter */ | ||
82 | |||
83 | if (pp->init_netns != NULL) | ||
84 | pp->init_netns(net, pd); | ||
85 | |||
86 | return 0; | ||
87 | } | ||
63 | 88 | ||
64 | /* | 89 | /* |
65 | * unregister an ipvs protocol | 90 | * unregister an ipvs protocol |
@@ -82,6 +107,29 @@ static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp) | |||
82 | return -ESRCH; | 107 | return -ESRCH; |
83 | } | 108 | } |
84 | 109 | ||
110 | /* | ||
111 | * unregister an ipvs protocols netns data | ||
112 | */ | ||
113 | static int | ||
114 | unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd) | ||
115 | { | ||
116 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
117 | struct ip_vs_proto_data **pd_p; | ||
118 | unsigned hash = IP_VS_PROTO_HASH(pd->pp->protocol); | ||
119 | |||
120 | pd_p = &ipvs->proto_data_table[hash]; | ||
121 | for (; *pd_p; pd_p = &(*pd_p)->next) { | ||
122 | if (*pd_p == pd) { | ||
123 | *pd_p = pd->next; | ||
124 | if (pd->pp->exit_netns != NULL) | ||
125 | pd->pp->exit_netns(net, pd); | ||
126 | kfree(pd); | ||
127 | return 0; | ||
128 | } | ||
129 | } | ||
130 | |||
131 | return -ESRCH; | ||
132 | } | ||
85 | 133 | ||
86 | /* | 134 | /* |
87 | * get ip_vs_protocol object by its proto. | 135 | * get ip_vs_protocol object by its proto. |
@@ -100,19 +148,44 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto) | |||
100 | } | 148 | } |
101 | EXPORT_SYMBOL(ip_vs_proto_get); | 149 | EXPORT_SYMBOL(ip_vs_proto_get); |
102 | 150 | ||
151 | /* | ||
152 | * get ip_vs_protocol object data by netns and proto | ||
153 | */ | ||
154 | struct ip_vs_proto_data * | ||
155 | __ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) | ||
156 | { | ||
157 | struct ip_vs_proto_data *pd; | ||
158 | unsigned hash = IP_VS_PROTO_HASH(proto); | ||
159 | |||
160 | for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) { | ||
161 | if (pd->pp->protocol == proto) | ||
162 | return pd; | ||
163 | } | ||
164 | |||
165 | return NULL; | ||
166 | } | ||
167 | |||
168 | struct ip_vs_proto_data * | ||
169 | ip_vs_proto_data_get(struct net *net, unsigned short proto) | ||
170 | { | ||
171 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
172 | |||
173 | return __ipvs_proto_data_get(ipvs, proto); | ||
174 | } | ||
175 | EXPORT_SYMBOL(ip_vs_proto_data_get); | ||
103 | 176 | ||
104 | /* | 177 | /* |
105 | * Propagate event for state change to all protocols | 178 | * Propagate event for state change to all protocols |
106 | */ | 179 | */ |
107 | void ip_vs_protocol_timeout_change(int flags) | 180 | void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags) |
108 | { | 181 | { |
109 | struct ip_vs_protocol *pp; | 182 | struct ip_vs_proto_data *pd; |
110 | int i; | 183 | int i; |
111 | 184 | ||
112 | for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { | 185 | for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { |
113 | for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) { | 186 | for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) { |
114 | if (pp->timeout_change) | 187 | if (pd->pp->timeout_change) |
115 | pp->timeout_change(pp, flags); | 188 | pd->pp->timeout_change(pd, flags); |
116 | } | 189 | } |
117 | } | 190 | } |
118 | } | 191 | } |
@@ -236,6 +309,46 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, | |||
236 | ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg); | 309 | ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg); |
237 | } | 310 | } |
238 | 311 | ||
312 | /* | ||
313 | * per network name-space init | ||
314 | */ | ||
315 | static int __net_init __ip_vs_protocol_init(struct net *net) | ||
316 | { | ||
317 | #ifdef CONFIG_IP_VS_PROTO_TCP | ||
318 | register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp); | ||
319 | #endif | ||
320 | #ifdef CONFIG_IP_VS_PROTO_UDP | ||
321 | register_ip_vs_proto_netns(net, &ip_vs_protocol_udp); | ||
322 | #endif | ||
323 | #ifdef CONFIG_IP_VS_PROTO_SCTP | ||
324 | register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp); | ||
325 | #endif | ||
326 | #ifdef CONFIG_IP_VS_PROTO_AH | ||
327 | register_ip_vs_proto_netns(net, &ip_vs_protocol_ah); | ||
328 | #endif | ||
329 | #ifdef CONFIG_IP_VS_PROTO_ESP | ||
330 | register_ip_vs_proto_netns(net, &ip_vs_protocol_esp); | ||
331 | #endif | ||
332 | return 0; | ||
333 | } | ||
334 | |||
335 | static void __net_exit __ip_vs_protocol_cleanup(struct net *net) | ||
336 | { | ||
337 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
338 | struct ip_vs_proto_data *pd; | ||
339 | int i; | ||
340 | |||
341 | /* unregister all the ipvs proto data for this netns */ | ||
342 | for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { | ||
343 | while ((pd = ipvs->proto_data_table[i]) != NULL) | ||
344 | unregister_ip_vs_proto_netns(net, pd); | ||
345 | } | ||
346 | } | ||
347 | |||
348 | static struct pernet_operations ipvs_proto_ops = { | ||
349 | .init = __ip_vs_protocol_init, | ||
350 | .exit = __ip_vs_protocol_cleanup, | ||
351 | }; | ||
239 | 352 | ||
240 | int __init ip_vs_protocol_init(void) | 353 | int __init ip_vs_protocol_init(void) |
241 | { | 354 | { |
@@ -265,6 +378,7 @@ int __init ip_vs_protocol_init(void) | |||
265 | REGISTER_PROTOCOL(&ip_vs_protocol_esp); | 378 | REGISTER_PROTOCOL(&ip_vs_protocol_esp); |
266 | #endif | 379 | #endif |
267 | pr_info("Registered protocols (%s)\n", &protocols[2]); | 380 | pr_info("Registered protocols (%s)\n", &protocols[2]); |
381 | return register_pernet_subsys(&ipvs_proto_ops); | ||
268 | 382 | ||
269 | return 0; | 383 | return 0; |
270 | } | 384 | } |
@@ -275,6 +389,7 @@ void ip_vs_protocol_cleanup(void) | |||
275 | struct ip_vs_protocol *pp; | 389 | struct ip_vs_protocol *pp; |
276 | int i; | 390 | int i; |
277 | 391 | ||
392 | unregister_pernet_subsys(&ipvs_proto_ops); | ||
278 | /* unregister all the ipvs protocols */ | 393 | /* unregister all the ipvs protocols */ |
279 | for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { | 394 | for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { |
280 | while ((pp = ip_vs_proto_table[i]) != NULL) | 395 | while ((pp = ip_vs_proto_table[i]) != NULL) |
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index 3a0461117d3f..5b8eb8b12c3e 100644 --- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c | |||
@@ -41,28 +41,30 @@ struct isakmp_hdr { | |||
41 | #define PORT_ISAKMP 500 | 41 | #define PORT_ISAKMP 500 |
42 | 42 | ||
43 | static void | 43 | static void |
44 | ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph, | 44 | ah_esp_conn_fill_param_proto(struct net *net, int af, |
45 | int inverse, struct ip_vs_conn_param *p) | 45 | const struct ip_vs_iphdr *iph, int inverse, |
46 | struct ip_vs_conn_param *p) | ||
46 | { | 47 | { |
47 | if (likely(!inverse)) | 48 | if (likely(!inverse)) |
48 | ip_vs_conn_fill_param(af, IPPROTO_UDP, | 49 | ip_vs_conn_fill_param(net, af, IPPROTO_UDP, |
49 | &iph->saddr, htons(PORT_ISAKMP), | 50 | &iph->saddr, htons(PORT_ISAKMP), |
50 | &iph->daddr, htons(PORT_ISAKMP), p); | 51 | &iph->daddr, htons(PORT_ISAKMP), p); |
51 | else | 52 | else |
52 | ip_vs_conn_fill_param(af, IPPROTO_UDP, | 53 | ip_vs_conn_fill_param(net, af, IPPROTO_UDP, |
53 | &iph->daddr, htons(PORT_ISAKMP), | 54 | &iph->daddr, htons(PORT_ISAKMP), |
54 | &iph->saddr, htons(PORT_ISAKMP), p); | 55 | &iph->saddr, htons(PORT_ISAKMP), p); |
55 | } | 56 | } |
56 | 57 | ||
57 | static struct ip_vs_conn * | 58 | static struct ip_vs_conn * |
58 | ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, | 59 | ah_esp_conn_in_get(int af, const struct sk_buff *skb, |
59 | const struct ip_vs_iphdr *iph, unsigned int proto_off, | 60 | const struct ip_vs_iphdr *iph, unsigned int proto_off, |
60 | int inverse) | 61 | int inverse) |
61 | { | 62 | { |
62 | struct ip_vs_conn *cp; | 63 | struct ip_vs_conn *cp; |
63 | struct ip_vs_conn_param p; | 64 | struct ip_vs_conn_param p; |
65 | struct net *net = skb_net(skb); | ||
64 | 66 | ||
65 | ah_esp_conn_fill_param_proto(af, iph, inverse, &p); | 67 | ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); |
66 | cp = ip_vs_conn_in_get(&p); | 68 | cp = ip_vs_conn_in_get(&p); |
67 | if (!cp) { | 69 | if (!cp) { |
68 | /* | 70 | /* |
@@ -72,7 +74,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
72 | IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet " | 74 | IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet " |
73 | "%s%s %s->%s\n", | 75 | "%s%s %s->%s\n", |
74 | inverse ? "ICMP+" : "", | 76 | inverse ? "ICMP+" : "", |
75 | pp->name, | 77 | ip_vs_proto_get(iph->protocol)->name, |
76 | IP_VS_DBG_ADDR(af, &iph->saddr), | 78 | IP_VS_DBG_ADDR(af, &iph->saddr), |
77 | IP_VS_DBG_ADDR(af, &iph->daddr)); | 79 | IP_VS_DBG_ADDR(af, &iph->daddr)); |
78 | } | 80 | } |
@@ -83,21 +85,21 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
83 | 85 | ||
84 | static struct ip_vs_conn * | 86 | static struct ip_vs_conn * |
85 | ah_esp_conn_out_get(int af, const struct sk_buff *skb, | 87 | ah_esp_conn_out_get(int af, const struct sk_buff *skb, |
86 | struct ip_vs_protocol *pp, | ||
87 | const struct ip_vs_iphdr *iph, | 88 | const struct ip_vs_iphdr *iph, |
88 | unsigned int proto_off, | 89 | unsigned int proto_off, |
89 | int inverse) | 90 | int inverse) |
90 | { | 91 | { |
91 | struct ip_vs_conn *cp; | 92 | struct ip_vs_conn *cp; |
92 | struct ip_vs_conn_param p; | 93 | struct ip_vs_conn_param p; |
94 | struct net *net = skb_net(skb); | ||
93 | 95 | ||
94 | ah_esp_conn_fill_param_proto(af, iph, inverse, &p); | 96 | ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); |
95 | cp = ip_vs_conn_out_get(&p); | 97 | cp = ip_vs_conn_out_get(&p); |
96 | if (!cp) { | 98 | if (!cp) { |
97 | IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " | 99 | IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " |
98 | "%s%s %s->%s\n", | 100 | "%s%s %s->%s\n", |
99 | inverse ? "ICMP+" : "", | 101 | inverse ? "ICMP+" : "", |
100 | pp->name, | 102 | ip_vs_proto_get(iph->protocol)->name, |
101 | IP_VS_DBG_ADDR(af, &iph->saddr), | 103 | IP_VS_DBG_ADDR(af, &iph->saddr), |
102 | IP_VS_DBG_ADDR(af, &iph->daddr)); | 104 | IP_VS_DBG_ADDR(af, &iph->daddr)); |
103 | } | 105 | } |
@@ -107,7 +109,7 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb, | |||
107 | 109 | ||
108 | 110 | ||
109 | static int | 111 | static int |
110 | ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | 112 | ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, |
111 | int *verdict, struct ip_vs_conn **cpp) | 113 | int *verdict, struct ip_vs_conn **cpp) |
112 | { | 114 | { |
113 | /* | 115 | /* |
@@ -117,26 +119,14 @@ ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
117 | return 0; | 119 | return 0; |
118 | } | 120 | } |
119 | 121 | ||
120 | static void ah_esp_init(struct ip_vs_protocol *pp) | ||
121 | { | ||
122 | /* nothing to do now */ | ||
123 | } | ||
124 | |||
125 | |||
126 | static void ah_esp_exit(struct ip_vs_protocol *pp) | ||
127 | { | ||
128 | /* nothing to do now */ | ||
129 | } | ||
130 | |||
131 | |||
132 | #ifdef CONFIG_IP_VS_PROTO_AH | 122 | #ifdef CONFIG_IP_VS_PROTO_AH |
133 | struct ip_vs_protocol ip_vs_protocol_ah = { | 123 | struct ip_vs_protocol ip_vs_protocol_ah = { |
134 | .name = "AH", | 124 | .name = "AH", |
135 | .protocol = IPPROTO_AH, | 125 | .protocol = IPPROTO_AH, |
136 | .num_states = 1, | 126 | .num_states = 1, |
137 | .dont_defrag = 1, | 127 | .dont_defrag = 1, |
138 | .init = ah_esp_init, | 128 | .init = NULL, |
139 | .exit = ah_esp_exit, | 129 | .exit = NULL, |
140 | .conn_schedule = ah_esp_conn_schedule, | 130 | .conn_schedule = ah_esp_conn_schedule, |
141 | .conn_in_get = ah_esp_conn_in_get, | 131 | .conn_in_get = ah_esp_conn_in_get, |
142 | .conn_out_get = ah_esp_conn_out_get, | 132 | .conn_out_get = ah_esp_conn_out_get, |
@@ -149,7 +139,6 @@ struct ip_vs_protocol ip_vs_protocol_ah = { | |||
149 | .app_conn_bind = NULL, | 139 | .app_conn_bind = NULL, |
150 | .debug_packet = ip_vs_tcpudp_debug_packet, | 140 | .debug_packet = ip_vs_tcpudp_debug_packet, |
151 | .timeout_change = NULL, /* ISAKMP */ | 141 | .timeout_change = NULL, /* ISAKMP */ |
152 | .set_state_timeout = NULL, | ||
153 | }; | 142 | }; |
154 | #endif | 143 | #endif |
155 | 144 | ||
@@ -159,8 +148,8 @@ struct ip_vs_protocol ip_vs_protocol_esp = { | |||
159 | .protocol = IPPROTO_ESP, | 148 | .protocol = IPPROTO_ESP, |
160 | .num_states = 1, | 149 | .num_states = 1, |
161 | .dont_defrag = 1, | 150 | .dont_defrag = 1, |
162 | .init = ah_esp_init, | 151 | .init = NULL, |
163 | .exit = ah_esp_exit, | 152 | .exit = NULL, |
164 | .conn_schedule = ah_esp_conn_schedule, | 153 | .conn_schedule = ah_esp_conn_schedule, |
165 | .conn_in_get = ah_esp_conn_in_get, | 154 | .conn_in_get = ah_esp_conn_in_get, |
166 | .conn_out_get = ah_esp_conn_out_get, | 155 | .conn_out_get = ah_esp_conn_out_get, |
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 1ea96bcd342b..fb2d04ac5d4e 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c | |||
@@ -9,9 +9,10 @@ | |||
9 | #include <net/ip_vs.h> | 9 | #include <net/ip_vs.h> |
10 | 10 | ||
11 | static int | 11 | static int |
12 | sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | 12 | sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, |
13 | int *verdict, struct ip_vs_conn **cpp) | 13 | int *verdict, struct ip_vs_conn **cpp) |
14 | { | 14 | { |
15 | struct net *net; | ||
15 | struct ip_vs_service *svc; | 16 | struct ip_vs_service *svc; |
16 | sctp_chunkhdr_t _schunkh, *sch; | 17 | sctp_chunkhdr_t _schunkh, *sch; |
17 | sctp_sctphdr_t *sh, _sctph; | 18 | sctp_sctphdr_t *sh, _sctph; |
@@ -27,13 +28,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
27 | sizeof(_schunkh), &_schunkh); | 28 | sizeof(_schunkh), &_schunkh); |
28 | if (sch == NULL) | 29 | if (sch == NULL) |
29 | return 0; | 30 | return 0; |
30 | 31 | net = skb_net(skb); | |
31 | if ((sch->type == SCTP_CID_INIT) && | 32 | if ((sch->type == SCTP_CID_INIT) && |
32 | (svc = ip_vs_service_get(af, skb->mark, iph.protocol, | 33 | (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, |
33 | &iph.daddr, sh->dest))) { | 34 | &iph.daddr, sh->dest))) { |
34 | int ignored; | 35 | int ignored; |
35 | 36 | ||
36 | if (ip_vs_todrop()) { | 37 | if (ip_vs_todrop(net_ipvs(net))) { |
37 | /* | 38 | /* |
38 | * It seems that we are very loaded. | 39 | * It seems that we are very loaded. |
39 | * We have to drop this packet :( | 40 | * We have to drop this packet :( |
@@ -46,14 +47,19 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
46 | * Let the virtual server select a real server for the | 47 | * Let the virtual server select a real server for the |
47 | * incoming connection, and create a connection entry. | 48 | * incoming connection, and create a connection entry. |
48 | */ | 49 | */ |
49 | *cpp = ip_vs_schedule(svc, skb, pp, &ignored); | 50 | *cpp = ip_vs_schedule(svc, skb, pd, &ignored); |
50 | if (!*cpp && !ignored) { | 51 | if (!*cpp && ignored <= 0) { |
51 | *verdict = ip_vs_leave(svc, skb, pp); | 52 | if (!ignored) |
53 | *verdict = ip_vs_leave(svc, skb, pd); | ||
54 | else { | ||
55 | ip_vs_service_put(svc); | ||
56 | *verdict = NF_DROP; | ||
57 | } | ||
52 | return 0; | 58 | return 0; |
53 | } | 59 | } |
54 | ip_vs_service_put(svc); | 60 | ip_vs_service_put(svc); |
55 | } | 61 | } |
56 | 62 | /* NF_ACCEPT */ | |
57 | return 1; | 63 | return 1; |
58 | } | 64 | } |
59 | 65 | ||
@@ -856,7 +862,7 @@ static struct ipvs_sctp_nextstate | |||
856 | /* | 862 | /* |
857 | * Timeout table[state] | 863 | * Timeout table[state] |
858 | */ | 864 | */ |
859 | static int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { | 865 | static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { |
860 | [IP_VS_SCTP_S_NONE] = 2 * HZ, | 866 | [IP_VS_SCTP_S_NONE] = 2 * HZ, |
861 | [IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ, | 867 | [IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ, |
862 | [IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ, | 868 | [IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ, |
@@ -900,20 +906,8 @@ static const char *sctp_state_name(int state) | |||
900 | return "?"; | 906 | return "?"; |
901 | } | 907 | } |
902 | 908 | ||
903 | static void sctp_timeout_change(struct ip_vs_protocol *pp, int flags) | ||
904 | { | ||
905 | } | ||
906 | |||
907 | static int | ||
908 | sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) | ||
909 | { | ||
910 | |||
911 | return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_SCTP_S_LAST, | ||
912 | sctp_state_name_table, sname, to); | ||
913 | } | ||
914 | |||
915 | static inline int | 909 | static inline int |
916 | set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, | 910 | set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, |
917 | int direction, const struct sk_buff *skb) | 911 | int direction, const struct sk_buff *skb) |
918 | { | 912 | { |
919 | sctp_chunkhdr_t _sctpch, *sch; | 913 | sctp_chunkhdr_t _sctpch, *sch; |
@@ -971,7 +965,7 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, | |||
971 | 965 | ||
972 | IP_VS_DBG_BUF(8, "%s %s %s:%d->" | 966 | IP_VS_DBG_BUF(8, "%s %s %s:%d->" |
973 | "%s:%d state: %s->%s conn->refcnt:%d\n", | 967 | "%s:%d state: %s->%s conn->refcnt:%d\n", |
974 | pp->name, | 968 | pd->pp->name, |
975 | ((direction == IP_VS_DIR_OUTPUT) ? | 969 | ((direction == IP_VS_DIR_OUTPUT) ? |
976 | "output " : "input "), | 970 | "output " : "input "), |
977 | IP_VS_DBG_ADDR(cp->af, &cp->daddr), | 971 | IP_VS_DBG_ADDR(cp->af, &cp->daddr), |
@@ -995,75 +989,73 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, | |||
995 | } | 989 | } |
996 | } | 990 | } |
997 | } | 991 | } |
992 | if (likely(pd)) | ||
993 | cp->timeout = pd->timeout_table[cp->state = next_state]; | ||
994 | else /* What to do ? */ | ||
995 | cp->timeout = sctp_timeouts[cp->state = next_state]; | ||
998 | 996 | ||
999 | cp->timeout = pp->timeout_table[cp->state = next_state]; | 997 | return 1; |
1000 | |||
1001 | return 1; | ||
1002 | } | 998 | } |
1003 | 999 | ||
1004 | static int | 1000 | static int |
1005 | sctp_state_transition(struct ip_vs_conn *cp, int direction, | 1001 | sctp_state_transition(struct ip_vs_conn *cp, int direction, |
1006 | const struct sk_buff *skb, struct ip_vs_protocol *pp) | 1002 | const struct sk_buff *skb, struct ip_vs_proto_data *pd) |
1007 | { | 1003 | { |
1008 | int ret = 0; | 1004 | int ret = 0; |
1009 | 1005 | ||
1010 | spin_lock(&cp->lock); | 1006 | spin_lock(&cp->lock); |
1011 | ret = set_sctp_state(pp, cp, direction, skb); | 1007 | ret = set_sctp_state(pd, cp, direction, skb); |
1012 | spin_unlock(&cp->lock); | 1008 | spin_unlock(&cp->lock); |
1013 | 1009 | ||
1014 | return ret; | 1010 | return ret; |
1015 | } | 1011 | } |
1016 | 1012 | ||
1017 | /* | ||
1018 | * Hash table for SCTP application incarnations | ||
1019 | */ | ||
1020 | #define SCTP_APP_TAB_BITS 4 | ||
1021 | #define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS) | ||
1022 | #define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1) | ||
1023 | |||
1024 | static struct list_head sctp_apps[SCTP_APP_TAB_SIZE]; | ||
1025 | static DEFINE_SPINLOCK(sctp_app_lock); | ||
1026 | |||
1027 | static inline __u16 sctp_app_hashkey(__be16 port) | 1013 | static inline __u16 sctp_app_hashkey(__be16 port) |
1028 | { | 1014 | { |
1029 | return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port) | 1015 | return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port) |
1030 | & SCTP_APP_TAB_MASK; | 1016 | & SCTP_APP_TAB_MASK; |
1031 | } | 1017 | } |
1032 | 1018 | ||
1033 | static int sctp_register_app(struct ip_vs_app *inc) | 1019 | static int sctp_register_app(struct net *net, struct ip_vs_app *inc) |
1034 | { | 1020 | { |
1035 | struct ip_vs_app *i; | 1021 | struct ip_vs_app *i; |
1036 | __u16 hash; | 1022 | __u16 hash; |
1037 | __be16 port = inc->port; | 1023 | __be16 port = inc->port; |
1038 | int ret = 0; | 1024 | int ret = 0; |
1025 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
1026 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); | ||
1039 | 1027 | ||
1040 | hash = sctp_app_hashkey(port); | 1028 | hash = sctp_app_hashkey(port); |
1041 | 1029 | ||
1042 | spin_lock_bh(&sctp_app_lock); | 1030 | spin_lock_bh(&ipvs->sctp_app_lock); |
1043 | list_for_each_entry(i, &sctp_apps[hash], p_list) { | 1031 | list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) { |
1044 | if (i->port == port) { | 1032 | if (i->port == port) { |
1045 | ret = -EEXIST; | 1033 | ret = -EEXIST; |
1046 | goto out; | 1034 | goto out; |
1047 | } | 1035 | } |
1048 | } | 1036 | } |
1049 | list_add(&inc->p_list, &sctp_apps[hash]); | 1037 | list_add(&inc->p_list, &ipvs->sctp_apps[hash]); |
1050 | atomic_inc(&ip_vs_protocol_sctp.appcnt); | 1038 | atomic_inc(&pd->appcnt); |
1051 | out: | 1039 | out: |
1052 | spin_unlock_bh(&sctp_app_lock); | 1040 | spin_unlock_bh(&ipvs->sctp_app_lock); |
1053 | 1041 | ||
1054 | return ret; | 1042 | return ret; |
1055 | } | 1043 | } |
1056 | 1044 | ||
1057 | static void sctp_unregister_app(struct ip_vs_app *inc) | 1045 | static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) |
1058 | { | 1046 | { |
1059 | spin_lock_bh(&sctp_app_lock); | 1047 | struct netns_ipvs *ipvs = net_ipvs(net); |
1060 | atomic_dec(&ip_vs_protocol_sctp.appcnt); | 1048 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); |
1049 | |||
1050 | spin_lock_bh(&ipvs->sctp_app_lock); | ||
1051 | atomic_dec(&pd->appcnt); | ||
1061 | list_del(&inc->p_list); | 1052 | list_del(&inc->p_list); |
1062 | spin_unlock_bh(&sctp_app_lock); | 1053 | spin_unlock_bh(&ipvs->sctp_app_lock); |
1063 | } | 1054 | } |
1064 | 1055 | ||
1065 | static int sctp_app_conn_bind(struct ip_vs_conn *cp) | 1056 | static int sctp_app_conn_bind(struct ip_vs_conn *cp) |
1066 | { | 1057 | { |
1058 | struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); | ||
1067 | int hash; | 1059 | int hash; |
1068 | struct ip_vs_app *inc; | 1060 | struct ip_vs_app *inc; |
1069 | int result = 0; | 1061 | int result = 0; |
@@ -1074,12 +1066,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) | |||
1074 | /* Lookup application incarnations and bind the right one */ | 1066 | /* Lookup application incarnations and bind the right one */ |
1075 | hash = sctp_app_hashkey(cp->vport); | 1067 | hash = sctp_app_hashkey(cp->vport); |
1076 | 1068 | ||
1077 | spin_lock(&sctp_app_lock); | 1069 | spin_lock(&ipvs->sctp_app_lock); |
1078 | list_for_each_entry(inc, &sctp_apps[hash], p_list) { | 1070 | list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) { |
1079 | if (inc->port == cp->vport) { | 1071 | if (inc->port == cp->vport) { |
1080 | if (unlikely(!ip_vs_app_inc_get(inc))) | 1072 | if (unlikely(!ip_vs_app_inc_get(inc))) |
1081 | break; | 1073 | break; |
1082 | spin_unlock(&sctp_app_lock); | 1074 | spin_unlock(&ipvs->sctp_app_lock); |
1083 | 1075 | ||
1084 | IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" | 1076 | IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" |
1085 | "%s:%u to app %s on port %u\n", | 1077 | "%s:%u to app %s on port %u\n", |
@@ -1095,43 +1087,50 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) | |||
1095 | goto out; | 1087 | goto out; |
1096 | } | 1088 | } |
1097 | } | 1089 | } |
1098 | spin_unlock(&sctp_app_lock); | 1090 | spin_unlock(&ipvs->sctp_app_lock); |
1099 | out: | 1091 | out: |
1100 | return result; | 1092 | return result; |
1101 | } | 1093 | } |
1102 | 1094 | ||
1103 | static void ip_vs_sctp_init(struct ip_vs_protocol *pp) | 1095 | /* --------------------------------------------- |
1096 | * timeouts is netns related now. | ||
1097 | * --------------------------------------------- | ||
1098 | */ | ||
1099 | static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) | ||
1104 | { | 1100 | { |
1105 | IP_VS_INIT_HASH_TABLE(sctp_apps); | 1101 | struct netns_ipvs *ipvs = net_ipvs(net); |
1106 | pp->timeout_table = sctp_timeouts; | ||
1107 | } | ||
1108 | 1102 | ||
1103 | ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); | ||
1104 | spin_lock_init(&ipvs->tcp_app_lock); | ||
1105 | pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, | ||
1106 | sizeof(sctp_timeouts)); | ||
1107 | } | ||
1109 | 1108 | ||
1110 | static void ip_vs_sctp_exit(struct ip_vs_protocol *pp) | 1109 | static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd) |
1111 | { | 1110 | { |
1112 | 1111 | kfree(pd->timeout_table); | |
1113 | } | 1112 | } |
1114 | 1113 | ||
1115 | struct ip_vs_protocol ip_vs_protocol_sctp = { | 1114 | struct ip_vs_protocol ip_vs_protocol_sctp = { |
1116 | .name = "SCTP", | 1115 | .name = "SCTP", |
1117 | .protocol = IPPROTO_SCTP, | 1116 | .protocol = IPPROTO_SCTP, |
1118 | .num_states = IP_VS_SCTP_S_LAST, | 1117 | .num_states = IP_VS_SCTP_S_LAST, |
1119 | .dont_defrag = 0, | 1118 | .dont_defrag = 0, |
1120 | .appcnt = ATOMIC_INIT(0), | 1119 | .init = NULL, |
1121 | .init = ip_vs_sctp_init, | 1120 | .exit = NULL, |
1122 | .exit = ip_vs_sctp_exit, | 1121 | .init_netns = __ip_vs_sctp_init, |
1123 | .register_app = sctp_register_app, | 1122 | .exit_netns = __ip_vs_sctp_exit, |
1123 | .register_app = sctp_register_app, | ||
1124 | .unregister_app = sctp_unregister_app, | 1124 | .unregister_app = sctp_unregister_app, |
1125 | .conn_schedule = sctp_conn_schedule, | 1125 | .conn_schedule = sctp_conn_schedule, |
1126 | .conn_in_get = ip_vs_conn_in_get_proto, | 1126 | .conn_in_get = ip_vs_conn_in_get_proto, |
1127 | .conn_out_get = ip_vs_conn_out_get_proto, | 1127 | .conn_out_get = ip_vs_conn_out_get_proto, |
1128 | .snat_handler = sctp_snat_handler, | 1128 | .snat_handler = sctp_snat_handler, |
1129 | .dnat_handler = sctp_dnat_handler, | 1129 | .dnat_handler = sctp_dnat_handler, |
1130 | .csum_check = sctp_csum_check, | 1130 | .csum_check = sctp_csum_check, |
1131 | .state_name = sctp_state_name, | 1131 | .state_name = sctp_state_name, |
1132 | .state_transition = sctp_state_transition, | 1132 | .state_transition = sctp_state_transition, |
1133 | .app_conn_bind = sctp_app_conn_bind, | 1133 | .app_conn_bind = sctp_app_conn_bind, |
1134 | .debug_packet = ip_vs_tcpudp_debug_packet, | 1134 | .debug_packet = ip_vs_tcpudp_debug_packet, |
1135 | .timeout_change = sctp_timeout_change, | 1135 | .timeout_change = NULL, |
1136 | .set_state_timeout = sctp_set_state_timeout, | ||
1137 | }; | 1136 | }; |
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index f6c5200e2146..c0cc341b840d 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c | |||
@@ -9,8 +9,12 @@ | |||
9 | * as published by the Free Software Foundation; either version | 9 | * as published by the Free Software Foundation; either version |
10 | * 2 of the License, or (at your option) any later version. | 10 | * 2 of the License, or (at your option) any later version. |
11 | * | 11 | * |
12 | * Changes: | 12 | * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com> |
13 | * | 13 | * |
14 | * Network name space (netns) aware. | ||
15 | * Global data moved to netns i.e struct netns_ipvs | ||
16 | * tcp_timeouts table has copy per netns in a hash table per | ||
17 | * protocol ip_vs_proto_data and is handled by netns | ||
14 | */ | 18 | */ |
15 | 19 | ||
16 | #define KMSG_COMPONENT "IPVS" | 20 | #define KMSG_COMPONENT "IPVS" |
@@ -28,9 +32,10 @@ | |||
28 | #include <net/ip_vs.h> | 32 | #include <net/ip_vs.h> |
29 | 33 | ||
30 | static int | 34 | static int |
31 | tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | 35 | tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, |
32 | int *verdict, struct ip_vs_conn **cpp) | 36 | int *verdict, struct ip_vs_conn **cpp) |
33 | { | 37 | { |
38 | struct net *net; | ||
34 | struct ip_vs_service *svc; | 39 | struct ip_vs_service *svc; |
35 | struct tcphdr _tcph, *th; | 40 | struct tcphdr _tcph, *th; |
36 | struct ip_vs_iphdr iph; | 41 | struct ip_vs_iphdr iph; |
@@ -42,14 +47,14 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
42 | *verdict = NF_DROP; | 47 | *verdict = NF_DROP; |
43 | return 0; | 48 | return 0; |
44 | } | 49 | } |
45 | 50 | net = skb_net(skb); | |
46 | /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ | 51 | /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ |
47 | if (th->syn && | 52 | if (th->syn && |
48 | (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, | 53 | (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, |
49 | th->dest))) { | 54 | &iph.daddr, th->dest))) { |
50 | int ignored; | 55 | int ignored; |
51 | 56 | ||
52 | if (ip_vs_todrop()) { | 57 | if (ip_vs_todrop(net_ipvs(net))) { |
53 | /* | 58 | /* |
54 | * It seems that we are very loaded. | 59 | * It seems that we are very loaded. |
55 | * We have to drop this packet :( | 60 | * We have to drop this packet :( |
@@ -63,13 +68,19 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
63 | * Let the virtual server select a real server for the | 68 | * Let the virtual server select a real server for the |
64 | * incoming connection, and create a connection entry. | 69 | * incoming connection, and create a connection entry. |
65 | */ | 70 | */ |
66 | *cpp = ip_vs_schedule(svc, skb, pp, &ignored); | 71 | *cpp = ip_vs_schedule(svc, skb, pd, &ignored); |
67 | if (!*cpp && !ignored) { | 72 | if (!*cpp && ignored <= 0) { |
68 | *verdict = ip_vs_leave(svc, skb, pp); | 73 | if (!ignored) |
74 | *verdict = ip_vs_leave(svc, skb, pd); | ||
75 | else { | ||
76 | ip_vs_service_put(svc); | ||
77 | *verdict = NF_DROP; | ||
78 | } | ||
69 | return 0; | 79 | return 0; |
70 | } | 80 | } |
71 | ip_vs_service_put(svc); | 81 | ip_vs_service_put(svc); |
72 | } | 82 | } |
83 | /* NF_ACCEPT */ | ||
73 | return 1; | 84 | return 1; |
74 | } | 85 | } |
75 | 86 | ||
@@ -338,7 +349,7 @@ static const int tcp_state_off[IP_VS_DIR_LAST] = { | |||
338 | /* | 349 | /* |
339 | * Timeout table[state] | 350 | * Timeout table[state] |
340 | */ | 351 | */ |
341 | static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = { | 352 | static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = { |
342 | [IP_VS_TCP_S_NONE] = 2*HZ, | 353 | [IP_VS_TCP_S_NONE] = 2*HZ, |
343 | [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ, | 354 | [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ, |
344 | [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ, | 355 | [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ, |
@@ -437,10 +448,7 @@ static struct tcp_states_t tcp_states_dos [] = { | |||
437 | /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, | 448 | /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, |
438 | }; | 449 | }; |
439 | 450 | ||
440 | static struct tcp_states_t *tcp_state_table = tcp_states; | 451 | static void tcp_timeout_change(struct ip_vs_proto_data *pd, int flags) |
441 | |||
442 | |||
443 | static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags) | ||
444 | { | 452 | { |
445 | int on = (flags & 1); /* secure_tcp */ | 453 | int on = (flags & 1); /* secure_tcp */ |
446 | 454 | ||
@@ -450,14 +458,7 @@ static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags) | |||
450 | ** for most if not for all of the applications. Something | 458 | ** for most if not for all of the applications. Something |
451 | ** like "capabilities" (flags) for each object. | 459 | ** like "capabilities" (flags) for each object. |
452 | */ | 460 | */ |
453 | tcp_state_table = (on? tcp_states_dos : tcp_states); | 461 | pd->tcp_state_table = (on ? tcp_states_dos : tcp_states); |
454 | } | ||
455 | |||
456 | static int | ||
457 | tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) | ||
458 | { | ||
459 | return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST, | ||
460 | tcp_state_name_table, sname, to); | ||
461 | } | 462 | } |
462 | 463 | ||
463 | static inline int tcp_state_idx(struct tcphdr *th) | 464 | static inline int tcp_state_idx(struct tcphdr *th) |
@@ -474,7 +475,7 @@ static inline int tcp_state_idx(struct tcphdr *th) | |||
474 | } | 475 | } |
475 | 476 | ||
476 | static inline void | 477 | static inline void |
477 | set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, | 478 | set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, |
478 | int direction, struct tcphdr *th) | 479 | int direction, struct tcphdr *th) |
479 | { | 480 | { |
480 | int state_idx; | 481 | int state_idx; |
@@ -497,7 +498,8 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, | |||
497 | goto tcp_state_out; | 498 | goto tcp_state_out; |
498 | } | 499 | } |
499 | 500 | ||
500 | new_state = tcp_state_table[state_off+state_idx].next_state[cp->state]; | 501 | new_state = |
502 | pd->tcp_state_table[state_off+state_idx].next_state[cp->state]; | ||
501 | 503 | ||
502 | tcp_state_out: | 504 | tcp_state_out: |
503 | if (new_state != cp->state) { | 505 | if (new_state != cp->state) { |
@@ -505,7 +507,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, | |||
505 | 507 | ||
506 | IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->" | 508 | IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->" |
507 | "%s:%d state: %s->%s conn->refcnt:%d\n", | 509 | "%s:%d state: %s->%s conn->refcnt:%d\n", |
508 | pp->name, | 510 | pd->pp->name, |
509 | ((state_off == TCP_DIR_OUTPUT) ? | 511 | ((state_off == TCP_DIR_OUTPUT) ? |
510 | "output " : "input "), | 512 | "output " : "input "), |
511 | th->syn ? 'S' : '.', | 513 | th->syn ? 'S' : '.', |
@@ -535,17 +537,19 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, | |||
535 | } | 537 | } |
536 | } | 538 | } |
537 | 539 | ||
538 | cp->timeout = pp->timeout_table[cp->state = new_state]; | 540 | if (likely(pd)) |
541 | cp->timeout = pd->timeout_table[cp->state = new_state]; | ||
542 | else /* What to do ? */ | ||
543 | cp->timeout = tcp_timeouts[cp->state = new_state]; | ||
539 | } | 544 | } |
540 | 545 | ||
541 | |||
542 | /* | 546 | /* |
543 | * Handle state transitions | 547 | * Handle state transitions |
544 | */ | 548 | */ |
545 | static int | 549 | static int |
546 | tcp_state_transition(struct ip_vs_conn *cp, int direction, | 550 | tcp_state_transition(struct ip_vs_conn *cp, int direction, |
547 | const struct sk_buff *skb, | 551 | const struct sk_buff *skb, |
548 | struct ip_vs_protocol *pp) | 552 | struct ip_vs_proto_data *pd) |
549 | { | 553 | { |
550 | struct tcphdr _tcph, *th; | 554 | struct tcphdr _tcph, *th; |
551 | 555 | ||
@@ -560,23 +564,12 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction, | |||
560 | return 0; | 564 | return 0; |
561 | 565 | ||
562 | spin_lock(&cp->lock); | 566 | spin_lock(&cp->lock); |
563 | set_tcp_state(pp, cp, direction, th); | 567 | set_tcp_state(pd, cp, direction, th); |
564 | spin_unlock(&cp->lock); | 568 | spin_unlock(&cp->lock); |
565 | 569 | ||
566 | return 1; | 570 | return 1; |
567 | } | 571 | } |
568 | 572 | ||
569 | |||
570 | /* | ||
571 | * Hash table for TCP application incarnations | ||
572 | */ | ||
573 | #define TCP_APP_TAB_BITS 4 | ||
574 | #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS) | ||
575 | #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1) | ||
576 | |||
577 | static struct list_head tcp_apps[TCP_APP_TAB_SIZE]; | ||
578 | static DEFINE_SPINLOCK(tcp_app_lock); | ||
579 | |||
580 | static inline __u16 tcp_app_hashkey(__be16 port) | 573 | static inline __u16 tcp_app_hashkey(__be16 port) |
581 | { | 574 | { |
582 | return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port) | 575 | return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port) |
@@ -584,44 +577,50 @@ static inline __u16 tcp_app_hashkey(__be16 port) | |||
584 | } | 577 | } |
585 | 578 | ||
586 | 579 | ||
587 | static int tcp_register_app(struct ip_vs_app *inc) | 580 | static int tcp_register_app(struct net *net, struct ip_vs_app *inc) |
588 | { | 581 | { |
589 | struct ip_vs_app *i; | 582 | struct ip_vs_app *i; |
590 | __u16 hash; | 583 | __u16 hash; |
591 | __be16 port = inc->port; | 584 | __be16 port = inc->port; |
592 | int ret = 0; | 585 | int ret = 0; |
586 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
587 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); | ||
593 | 588 | ||
594 | hash = tcp_app_hashkey(port); | 589 | hash = tcp_app_hashkey(port); |
595 | 590 | ||
596 | spin_lock_bh(&tcp_app_lock); | 591 | spin_lock_bh(&ipvs->tcp_app_lock); |
597 | list_for_each_entry(i, &tcp_apps[hash], p_list) { | 592 | list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) { |
598 | if (i->port == port) { | 593 | if (i->port == port) { |
599 | ret = -EEXIST; | 594 | ret = -EEXIST; |
600 | goto out; | 595 | goto out; |
601 | } | 596 | } |
602 | } | 597 | } |
603 | list_add(&inc->p_list, &tcp_apps[hash]); | 598 | list_add(&inc->p_list, &ipvs->tcp_apps[hash]); |
604 | atomic_inc(&ip_vs_protocol_tcp.appcnt); | 599 | atomic_inc(&pd->appcnt); |
605 | 600 | ||
606 | out: | 601 | out: |
607 | spin_unlock_bh(&tcp_app_lock); | 602 | spin_unlock_bh(&ipvs->tcp_app_lock); |
608 | return ret; | 603 | return ret; |
609 | } | 604 | } |
610 | 605 | ||
611 | 606 | ||
612 | static void | 607 | static void |
613 | tcp_unregister_app(struct ip_vs_app *inc) | 608 | tcp_unregister_app(struct net *net, struct ip_vs_app *inc) |
614 | { | 609 | { |
615 | spin_lock_bh(&tcp_app_lock); | 610 | struct netns_ipvs *ipvs = net_ipvs(net); |
616 | atomic_dec(&ip_vs_protocol_tcp.appcnt); | 611 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); |
612 | |||
613 | spin_lock_bh(&ipvs->tcp_app_lock); | ||
614 | atomic_dec(&pd->appcnt); | ||
617 | list_del(&inc->p_list); | 615 | list_del(&inc->p_list); |
618 | spin_unlock_bh(&tcp_app_lock); | 616 | spin_unlock_bh(&ipvs->tcp_app_lock); |
619 | } | 617 | } |
620 | 618 | ||
621 | 619 | ||
622 | static int | 620 | static int |
623 | tcp_app_conn_bind(struct ip_vs_conn *cp) | 621 | tcp_app_conn_bind(struct ip_vs_conn *cp) |
624 | { | 622 | { |
623 | struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); | ||
625 | int hash; | 624 | int hash; |
626 | struct ip_vs_app *inc; | 625 | struct ip_vs_app *inc; |
627 | int result = 0; | 626 | int result = 0; |
@@ -633,12 +632,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) | |||
633 | /* Lookup application incarnations and bind the right one */ | 632 | /* Lookup application incarnations and bind the right one */ |
634 | hash = tcp_app_hashkey(cp->vport); | 633 | hash = tcp_app_hashkey(cp->vport); |
635 | 634 | ||
636 | spin_lock(&tcp_app_lock); | 635 | spin_lock(&ipvs->tcp_app_lock); |
637 | list_for_each_entry(inc, &tcp_apps[hash], p_list) { | 636 | list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) { |
638 | if (inc->port == cp->vport) { | 637 | if (inc->port == cp->vport) { |
639 | if (unlikely(!ip_vs_app_inc_get(inc))) | 638 | if (unlikely(!ip_vs_app_inc_get(inc))) |
640 | break; | 639 | break; |
641 | spin_unlock(&tcp_app_lock); | 640 | spin_unlock(&ipvs->tcp_app_lock); |
642 | 641 | ||
643 | IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" | 642 | IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" |
644 | "%s:%u to app %s on port %u\n", | 643 | "%s:%u to app %s on port %u\n", |
@@ -655,7 +654,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) | |||
655 | goto out; | 654 | goto out; |
656 | } | 655 | } |
657 | } | 656 | } |
658 | spin_unlock(&tcp_app_lock); | 657 | spin_unlock(&ipvs->tcp_app_lock); |
659 | 658 | ||
660 | out: | 659 | out: |
661 | return result; | 660 | return result; |
@@ -665,24 +664,35 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) | |||
665 | /* | 664 | /* |
666 | * Set LISTEN timeout. (ip_vs_conn_put will setup timer) | 665 | * Set LISTEN timeout. (ip_vs_conn_put will setup timer) |
667 | */ | 666 | */ |
668 | void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp) | 667 | void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp) |
669 | { | 668 | { |
669 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); | ||
670 | |||
670 | spin_lock(&cp->lock); | 671 | spin_lock(&cp->lock); |
671 | cp->state = IP_VS_TCP_S_LISTEN; | 672 | cp->state = IP_VS_TCP_S_LISTEN; |
672 | cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN]; | 673 | cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN] |
674 | : tcp_timeouts[IP_VS_TCP_S_LISTEN]); | ||
673 | spin_unlock(&cp->lock); | 675 | spin_unlock(&cp->lock); |
674 | } | 676 | } |
675 | 677 | ||
676 | 678 | /* --------------------------------------------- | |
677 | static void ip_vs_tcp_init(struct ip_vs_protocol *pp) | 679 | * timeouts is netns related now. |
680 | * --------------------------------------------- | ||
681 | */ | ||
682 | static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) | ||
678 | { | 683 | { |
679 | IP_VS_INIT_HASH_TABLE(tcp_apps); | 684 | struct netns_ipvs *ipvs = net_ipvs(net); |
680 | pp->timeout_table = tcp_timeouts; | ||
681 | } | ||
682 | 685 | ||
686 | ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE); | ||
687 | spin_lock_init(&ipvs->tcp_app_lock); | ||
688 | pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, | ||
689 | sizeof(tcp_timeouts)); | ||
690 | pd->tcp_state_table = tcp_states; | ||
691 | } | ||
683 | 692 | ||
684 | static void ip_vs_tcp_exit(struct ip_vs_protocol *pp) | 693 | static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd) |
685 | { | 694 | { |
695 | kfree(pd->timeout_table); | ||
686 | } | 696 | } |
687 | 697 | ||
688 | 698 | ||
@@ -691,9 +701,10 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { | |||
691 | .protocol = IPPROTO_TCP, | 701 | .protocol = IPPROTO_TCP, |
692 | .num_states = IP_VS_TCP_S_LAST, | 702 | .num_states = IP_VS_TCP_S_LAST, |
693 | .dont_defrag = 0, | 703 | .dont_defrag = 0, |
694 | .appcnt = ATOMIC_INIT(0), | 704 | .init = NULL, |
695 | .init = ip_vs_tcp_init, | 705 | .exit = NULL, |
696 | .exit = ip_vs_tcp_exit, | 706 | .init_netns = __ip_vs_tcp_init, |
707 | .exit_netns = __ip_vs_tcp_exit, | ||
697 | .register_app = tcp_register_app, | 708 | .register_app = tcp_register_app, |
698 | .unregister_app = tcp_unregister_app, | 709 | .unregister_app = tcp_unregister_app, |
699 | .conn_schedule = tcp_conn_schedule, | 710 | .conn_schedule = tcp_conn_schedule, |
@@ -707,5 +718,4 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { | |||
707 | .app_conn_bind = tcp_app_conn_bind, | 718 | .app_conn_bind = tcp_app_conn_bind, |
708 | .debug_packet = ip_vs_tcpudp_debug_packet, | 719 | .debug_packet = ip_vs_tcpudp_debug_packet, |
709 | .timeout_change = tcp_timeout_change, | 720 | .timeout_change = tcp_timeout_change, |
710 | .set_state_timeout = tcp_set_state_timeout, | ||
711 | }; | 721 | }; |
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 9d106a06bb0a..f1282cbe6fe3 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c | |||
@@ -9,7 +9,8 @@ | |||
9 | * as published by the Free Software Foundation; either version | 9 | * as published by the Free Software Foundation; either version |
10 | * 2 of the License, or (at your option) any later version. | 10 | * 2 of the License, or (at your option) any later version. |
11 | * | 11 | * |
12 | * Changes: | 12 | * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com> |
13 | * Network name space (netns) aware. | ||
13 | * | 14 | * |
14 | */ | 15 | */ |
15 | 16 | ||
@@ -28,9 +29,10 @@ | |||
28 | #include <net/ip6_checksum.h> | 29 | #include <net/ip6_checksum.h> |
29 | 30 | ||
30 | static int | 31 | static int |
31 | udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | 32 | udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, |
32 | int *verdict, struct ip_vs_conn **cpp) | 33 | int *verdict, struct ip_vs_conn **cpp) |
33 | { | 34 | { |
35 | struct net *net; | ||
34 | struct ip_vs_service *svc; | 36 | struct ip_vs_service *svc; |
35 | struct udphdr _udph, *uh; | 37 | struct udphdr _udph, *uh; |
36 | struct ip_vs_iphdr iph; | 38 | struct ip_vs_iphdr iph; |
@@ -42,13 +44,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
42 | *verdict = NF_DROP; | 44 | *verdict = NF_DROP; |
43 | return 0; | 45 | return 0; |
44 | } | 46 | } |
45 | 47 | net = skb_net(skb); | |
46 | svc = ip_vs_service_get(af, skb->mark, iph.protocol, | 48 | svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, |
47 | &iph.daddr, uh->dest); | 49 | &iph.daddr, uh->dest); |
48 | if (svc) { | 50 | if (svc) { |
49 | int ignored; | 51 | int ignored; |
50 | 52 | ||
51 | if (ip_vs_todrop()) { | 53 | if (ip_vs_todrop(net_ipvs(net))) { |
52 | /* | 54 | /* |
53 | * It seems that we are very loaded. | 55 | * It seems that we are very loaded. |
54 | * We have to drop this packet :( | 56 | * We have to drop this packet :( |
@@ -62,13 +64,19 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
62 | * Let the virtual server select a real server for the | 64 | * Let the virtual server select a real server for the |
63 | * incoming connection, and create a connection entry. | 65 | * incoming connection, and create a connection entry. |
64 | */ | 66 | */ |
65 | *cpp = ip_vs_schedule(svc, skb, pp, &ignored); | 67 | *cpp = ip_vs_schedule(svc, skb, pd, &ignored); |
66 | if (!*cpp && !ignored) { | 68 | if (!*cpp && ignored <= 0) { |
67 | *verdict = ip_vs_leave(svc, skb, pp); | 69 | if (!ignored) |
70 | *verdict = ip_vs_leave(svc, skb, pd); | ||
71 | else { | ||
72 | ip_vs_service_put(svc); | ||
73 | *verdict = NF_DROP; | ||
74 | } | ||
68 | return 0; | 75 | return 0; |
69 | } | 76 | } |
70 | ip_vs_service_put(svc); | 77 | ip_vs_service_put(svc); |
71 | } | 78 | } |
79 | /* NF_ACCEPT */ | ||
72 | return 1; | 80 | return 1; |
73 | } | 81 | } |
74 | 82 | ||
@@ -338,19 +346,6 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) | |||
338 | return 1; | 346 | return 1; |
339 | } | 347 | } |
340 | 348 | ||
341 | |||
342 | /* | ||
343 | * Note: the caller guarantees that only one of register_app, | ||
344 | * unregister_app or app_conn_bind is called each time. | ||
345 | */ | ||
346 | |||
347 | #define UDP_APP_TAB_BITS 4 | ||
348 | #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS) | ||
349 | #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1) | ||
350 | |||
351 | static struct list_head udp_apps[UDP_APP_TAB_SIZE]; | ||
352 | static DEFINE_SPINLOCK(udp_app_lock); | ||
353 | |||
354 | static inline __u16 udp_app_hashkey(__be16 port) | 349 | static inline __u16 udp_app_hashkey(__be16 port) |
355 | { | 350 | { |
356 | return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port) | 351 | return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port) |
@@ -358,44 +353,50 @@ static inline __u16 udp_app_hashkey(__be16 port) | |||
358 | } | 353 | } |
359 | 354 | ||
360 | 355 | ||
361 | static int udp_register_app(struct ip_vs_app *inc) | 356 | static int udp_register_app(struct net *net, struct ip_vs_app *inc) |
362 | { | 357 | { |
363 | struct ip_vs_app *i; | 358 | struct ip_vs_app *i; |
364 | __u16 hash; | 359 | __u16 hash; |
365 | __be16 port = inc->port; | 360 | __be16 port = inc->port; |
366 | int ret = 0; | 361 | int ret = 0; |
362 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
363 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); | ||
367 | 364 | ||
368 | hash = udp_app_hashkey(port); | 365 | hash = udp_app_hashkey(port); |
369 | 366 | ||
370 | 367 | ||
371 | spin_lock_bh(&udp_app_lock); | 368 | spin_lock_bh(&ipvs->udp_app_lock); |
372 | list_for_each_entry(i, &udp_apps[hash], p_list) { | 369 | list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) { |
373 | if (i->port == port) { | 370 | if (i->port == port) { |
374 | ret = -EEXIST; | 371 | ret = -EEXIST; |
375 | goto out; | 372 | goto out; |
376 | } | 373 | } |
377 | } | 374 | } |
378 | list_add(&inc->p_list, &udp_apps[hash]); | 375 | list_add(&inc->p_list, &ipvs->udp_apps[hash]); |
379 | atomic_inc(&ip_vs_protocol_udp.appcnt); | 376 | atomic_inc(&pd->appcnt); |
380 | 377 | ||
381 | out: | 378 | out: |
382 | spin_unlock_bh(&udp_app_lock); | 379 | spin_unlock_bh(&ipvs->udp_app_lock); |
383 | return ret; | 380 | return ret; |
384 | } | 381 | } |
385 | 382 | ||
386 | 383 | ||
387 | static void | 384 | static void |
388 | udp_unregister_app(struct ip_vs_app *inc) | 385 | udp_unregister_app(struct net *net, struct ip_vs_app *inc) |
389 | { | 386 | { |
390 | spin_lock_bh(&udp_app_lock); | 387 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); |
391 | atomic_dec(&ip_vs_protocol_udp.appcnt); | 388 | struct netns_ipvs *ipvs = net_ipvs(net); |
389 | |||
390 | spin_lock_bh(&ipvs->udp_app_lock); | ||
391 | atomic_dec(&pd->appcnt); | ||
392 | list_del(&inc->p_list); | 392 | list_del(&inc->p_list); |
393 | spin_unlock_bh(&udp_app_lock); | 393 | spin_unlock_bh(&ipvs->udp_app_lock); |
394 | } | 394 | } |
395 | 395 | ||
396 | 396 | ||
397 | static int udp_app_conn_bind(struct ip_vs_conn *cp) | 397 | static int udp_app_conn_bind(struct ip_vs_conn *cp) |
398 | { | 398 | { |
399 | struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); | ||
399 | int hash; | 400 | int hash; |
400 | struct ip_vs_app *inc; | 401 | struct ip_vs_app *inc; |
401 | int result = 0; | 402 | int result = 0; |
@@ -407,12 +408,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) | |||
407 | /* Lookup application incarnations and bind the right one */ | 408 | /* Lookup application incarnations and bind the right one */ |
408 | hash = udp_app_hashkey(cp->vport); | 409 | hash = udp_app_hashkey(cp->vport); |
409 | 410 | ||
410 | spin_lock(&udp_app_lock); | 411 | spin_lock(&ipvs->udp_app_lock); |
411 | list_for_each_entry(inc, &udp_apps[hash], p_list) { | 412 | list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) { |
412 | if (inc->port == cp->vport) { | 413 | if (inc->port == cp->vport) { |
413 | if (unlikely(!ip_vs_app_inc_get(inc))) | 414 | if (unlikely(!ip_vs_app_inc_get(inc))) |
414 | break; | 415 | break; |
415 | spin_unlock(&udp_app_lock); | 416 | spin_unlock(&ipvs->udp_app_lock); |
416 | 417 | ||
417 | IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" | 418 | IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" |
418 | "%s:%u to app %s on port %u\n", | 419 | "%s:%u to app %s on port %u\n", |
@@ -429,14 +430,14 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) | |||
429 | goto out; | 430 | goto out; |
430 | } | 431 | } |
431 | } | 432 | } |
432 | spin_unlock(&udp_app_lock); | 433 | spin_unlock(&ipvs->udp_app_lock); |
433 | 434 | ||
434 | out: | 435 | out: |
435 | return result; | 436 | return result; |
436 | } | 437 | } |
437 | 438 | ||
438 | 439 | ||
439 | static int udp_timeouts[IP_VS_UDP_S_LAST+1] = { | 440 | static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = { |
440 | [IP_VS_UDP_S_NORMAL] = 5*60*HZ, | 441 | [IP_VS_UDP_S_NORMAL] = 5*60*HZ, |
441 | [IP_VS_UDP_S_LAST] = 2*HZ, | 442 | [IP_VS_UDP_S_LAST] = 2*HZ, |
442 | }; | 443 | }; |
@@ -446,14 +447,6 @@ static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = { | |||
446 | [IP_VS_UDP_S_LAST] = "BUG!", | 447 | [IP_VS_UDP_S_LAST] = "BUG!", |
447 | }; | 448 | }; |
448 | 449 | ||
449 | |||
450 | static int | ||
451 | udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) | ||
452 | { | ||
453 | return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST, | ||
454 | udp_state_name_table, sname, to); | ||
455 | } | ||
456 | |||
457 | static const char * udp_state_name(int state) | 450 | static const char * udp_state_name(int state) |
458 | { | 451 | { |
459 | if (state >= IP_VS_UDP_S_LAST) | 452 | if (state >= IP_VS_UDP_S_LAST) |
@@ -464,20 +457,30 @@ static const char * udp_state_name(int state) | |||
464 | static int | 457 | static int |
465 | udp_state_transition(struct ip_vs_conn *cp, int direction, | 458 | udp_state_transition(struct ip_vs_conn *cp, int direction, |
466 | const struct sk_buff *skb, | 459 | const struct sk_buff *skb, |
467 | struct ip_vs_protocol *pp) | 460 | struct ip_vs_proto_data *pd) |
468 | { | 461 | { |
469 | cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL]; | 462 | if (unlikely(!pd)) { |
463 | pr_err("UDP no ns data\n"); | ||
464 | return 0; | ||
465 | } | ||
466 | |||
467 | cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL]; | ||
470 | return 1; | 468 | return 1; |
471 | } | 469 | } |
472 | 470 | ||
473 | static void udp_init(struct ip_vs_protocol *pp) | 471 | static void __udp_init(struct net *net, struct ip_vs_proto_data *pd) |
474 | { | 472 | { |
475 | IP_VS_INIT_HASH_TABLE(udp_apps); | 473 | struct netns_ipvs *ipvs = net_ipvs(net); |
476 | pp->timeout_table = udp_timeouts; | 474 | |
475 | ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE); | ||
476 | spin_lock_init(&ipvs->udp_app_lock); | ||
477 | pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, | ||
478 | sizeof(udp_timeouts)); | ||
477 | } | 479 | } |
478 | 480 | ||
479 | static void udp_exit(struct ip_vs_protocol *pp) | 481 | static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd) |
480 | { | 482 | { |
483 | kfree(pd->timeout_table); | ||
481 | } | 484 | } |
482 | 485 | ||
483 | 486 | ||
@@ -486,8 +489,10 @@ struct ip_vs_protocol ip_vs_protocol_udp = { | |||
486 | .protocol = IPPROTO_UDP, | 489 | .protocol = IPPROTO_UDP, |
487 | .num_states = IP_VS_UDP_S_LAST, | 490 | .num_states = IP_VS_UDP_S_LAST, |
488 | .dont_defrag = 0, | 491 | .dont_defrag = 0, |
489 | .init = udp_init, | 492 | .init = NULL, |
490 | .exit = udp_exit, | 493 | .exit = NULL, |
494 | .init_netns = __udp_init, | ||
495 | .exit_netns = __udp_exit, | ||
491 | .conn_schedule = udp_conn_schedule, | 496 | .conn_schedule = udp_conn_schedule, |
492 | .conn_in_get = ip_vs_conn_in_get_proto, | 497 | .conn_in_get = ip_vs_conn_in_get_proto, |
493 | .conn_out_get = ip_vs_conn_out_get_proto, | 498 | .conn_out_get = ip_vs_conn_out_get_proto, |
@@ -501,5 +506,4 @@ struct ip_vs_protocol ip_vs_protocol_udp = { | |||
501 | .app_conn_bind = udp_app_conn_bind, | 506 | .app_conn_bind = udp_app_conn_bind, |
502 | .debug_packet = ip_vs_tcpudp_debug_packet, | 507 | .debug_packet = ip_vs_tcpudp_debug_packet, |
503 | .timeout_change = NULL, | 508 | .timeout_change = NULL, |
504 | .set_state_timeout = udp_set_state_timeout, | ||
505 | }; | 509 | }; |
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index ab85aedea17e..d1adf988eb08 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c | |||
@@ -5,6 +5,18 @@ | |||
5 | * high-performance and highly available server based on a | 5 | * high-performance and highly available server based on a |
6 | * cluster of servers. | 6 | * cluster of servers. |
7 | * | 7 | * |
8 | * Version 1, is capable of handling both version 0 and 1 messages. | ||
9 | * Version 0 is the plain old format. | ||
10 | * Note Version 0 receivers will just drop Ver 1 messages. | ||
11 | * Version 1 is capable of handle IPv6, Persistence data, | ||
12 | * time-outs, and firewall marks. | ||
13 | * In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order. | ||
14 | * Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=0 | ||
15 | * | ||
16 | * Definitions Message: is a complete datagram | ||
17 | * Sync_conn: is a part of a Message | ||
18 | * Param Data is an option to a Sync_conn. | ||
19 | * | ||
8 | * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | 20 | * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> |
9 | * | 21 | * |
10 | * ip_vs_sync: sync connection info from master load balancer to backups | 22 | * ip_vs_sync: sync connection info from master load balancer to backups |
@@ -15,6 +27,8 @@ | |||
15 | * Alexandre Cassen : Added SyncID support for incoming sync | 27 | * Alexandre Cassen : Added SyncID support for incoming sync |
16 | * messages filtering. | 28 | * messages filtering. |
17 | * Justin Ossevoort : Fix endian problem on sync message size. | 29 | * Justin Ossevoort : Fix endian problem on sync message size. |
30 | * Hans Schillstrom : Added Version 1: i.e. IPv6, | ||
31 | * Persistence support, fwmark and time-out. | ||
18 | */ | 32 | */ |
19 | 33 | ||
20 | #define KMSG_COMPONENT "IPVS" | 34 | #define KMSG_COMPONENT "IPVS" |
@@ -35,6 +49,8 @@ | |||
35 | #include <linux/wait.h> | 49 | #include <linux/wait.h> |
36 | #include <linux/kernel.h> | 50 | #include <linux/kernel.h> |
37 | 51 | ||
52 | #include <asm/unaligned.h> /* Used for ntoh_seq and hton_seq */ | ||
53 | |||
38 | #include <net/ip.h> | 54 | #include <net/ip.h> |
39 | #include <net/sock.h> | 55 | #include <net/sock.h> |
40 | 56 | ||
@@ -43,11 +59,13 @@ | |||
43 | #define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */ | 59 | #define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */ |
44 | #define IP_VS_SYNC_PORT 8848 /* multicast port */ | 60 | #define IP_VS_SYNC_PORT 8848 /* multicast port */ |
45 | 61 | ||
62 | #define SYNC_PROTO_VER 1 /* Protocol version in header */ | ||
46 | 63 | ||
47 | /* | 64 | /* |
48 | * IPVS sync connection entry | 65 | * IPVS sync connection entry |
66 | * Version 0, i.e. original version. | ||
49 | */ | 67 | */ |
50 | struct ip_vs_sync_conn { | 68 | struct ip_vs_sync_conn_v0 { |
51 | __u8 reserved; | 69 | __u8 reserved; |
52 | 70 | ||
53 | /* Protocol, addresses and port numbers */ | 71 | /* Protocol, addresses and port numbers */ |
@@ -71,41 +89,159 @@ struct ip_vs_sync_conn_options { | |||
71 | struct ip_vs_seq out_seq; /* outgoing seq. struct */ | 89 | struct ip_vs_seq out_seq; /* outgoing seq. struct */ |
72 | }; | 90 | }; |
73 | 91 | ||
92 | /* | ||
93 | Sync Connection format (sync_conn) | ||
94 | |||
95 | 0 1 2 3 | ||
96 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
97 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
98 | | Type | Protocol | Ver. | Size | | ||
99 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
100 | | Flags | | ||
101 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
102 | | State | cport | | ||
103 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
104 | | vport | dport | | ||
105 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
106 | | fwmark | | ||
107 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
108 | | timeout (in sec.) | | ||
109 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
110 | | ... | | ||
111 | | IP-Addresses (v4 or v6) | | ||
112 | | ... | | ||
113 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
114 | Optional Parameters. | ||
115 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
116 | | Param. Type | Param. Length | Param. data | | ||
117 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | ||
118 | | ... | | ||
119 | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
120 | | | Param Type | Param. Length | | ||
121 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
122 | | Param data | | ||
123 | | Last Param data should be padded for 32 bit alignment | | ||
124 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
125 | */ | ||
126 | |||
127 | /* | ||
128 | * Type 0, IPv4 sync connection format | ||
129 | */ | ||
130 | struct ip_vs_sync_v4 { | ||
131 | __u8 type; | ||
132 | __u8 protocol; /* Which protocol (TCP/UDP) */ | ||
133 | __be16 ver_size; /* Version msb 4 bits */ | ||
134 | /* Flags and state transition */ | ||
135 | __be32 flags; /* status flags */ | ||
136 | __be16 state; /* state info */ | ||
137 | /* Protocol, addresses and port numbers */ | ||
138 | __be16 cport; | ||
139 | __be16 vport; | ||
140 | __be16 dport; | ||
141 | __be32 fwmark; /* Firewall mark from skb */ | ||
142 | __be32 timeout; /* cp timeout */ | ||
143 | __be32 caddr; /* client address */ | ||
144 | __be32 vaddr; /* virtual address */ | ||
145 | __be32 daddr; /* destination address */ | ||
146 | /* The sequence options start here */ | ||
147 | /* PE data padded to 32bit alignment after seq. options */ | ||
148 | }; | ||
149 | /* | ||
150 | * Type 2 messages IPv6 | ||
151 | */ | ||
152 | struct ip_vs_sync_v6 { | ||
153 | __u8 type; | ||
154 | __u8 protocol; /* Which protocol (TCP/UDP) */ | ||
155 | __be16 ver_size; /* Version msb 4 bits */ | ||
156 | /* Flags and state transition */ | ||
157 | __be32 flags; /* status flags */ | ||
158 | __be16 state; /* state info */ | ||
159 | /* Protocol, addresses and port numbers */ | ||
160 | __be16 cport; | ||
161 | __be16 vport; | ||
162 | __be16 dport; | ||
163 | __be32 fwmark; /* Firewall mark from skb */ | ||
164 | __be32 timeout; /* cp timeout */ | ||
165 | struct in6_addr caddr; /* client address */ | ||
166 | struct in6_addr vaddr; /* virtual address */ | ||
167 | struct in6_addr daddr; /* destination address */ | ||
168 | /* The sequence options start here */ | ||
169 | /* PE data padded to 32bit alignment after seq. options */ | ||
170 | }; | ||
171 | |||
172 | union ip_vs_sync_conn { | ||
173 | struct ip_vs_sync_v4 v4; | ||
174 | struct ip_vs_sync_v6 v6; | ||
175 | }; | ||
176 | |||
177 | /* Bits in Type field in above */ | ||
178 | #define STYPE_INET6 0 | ||
179 | #define STYPE_F_INET6 (1 << STYPE_INET6) | ||
180 | |||
181 | #define SVER_SHIFT 12 /* Shift to get version */ | ||
182 | #define SVER_MASK 0x0fff /* Mask to strip version */ | ||
183 | |||
184 | #define IPVS_OPT_SEQ_DATA 1 | ||
185 | #define IPVS_OPT_PE_DATA 2 | ||
186 | #define IPVS_OPT_PE_NAME 3 | ||
187 | #define IPVS_OPT_PARAM 7 | ||
188 | |||
189 | #define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1)) | ||
190 | #define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1)) | ||
191 | #define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1)) | ||
192 | #define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1)) | ||
193 | |||
74 | struct ip_vs_sync_thread_data { | 194 | struct ip_vs_sync_thread_data { |
195 | struct net *net; | ||
75 | struct socket *sock; | 196 | struct socket *sock; |
76 | char *buf; | 197 | char *buf; |
77 | }; | 198 | }; |
78 | 199 | ||
79 | #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn)) | 200 | /* Version 0 definition of packet sizes */ |
201 | #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0)) | ||
80 | #define FULL_CONN_SIZE \ | 202 | #define FULL_CONN_SIZE \ |
81 | (sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options)) | 203 | (sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options)) |
82 | 204 | ||
83 | 205 | ||
84 | /* | 206 | /* |
85 | The master mulitcasts messages to the backup load balancers in the | 207 | The master mulitcasts messages (Datagrams) to the backup load balancers |
86 | following format. | 208 | in the following format. |
209 | |||
210 | Version 1: | ||
211 | Note, first byte should be Zero, so ver 0 receivers will drop the packet. | ||
87 | 212 | ||
88 | 0 1 2 3 | 213 | 0 1 2 3 |
89 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | 214 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 |
90 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 215 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
91 | | Count Conns | SyncID | Size | | 216 | | 0 | SyncID | Size | |
217 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
218 | | Count Conns | Version | Reserved, set to Zero | | ||
92 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 219 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
93 | | | | 220 | | | |
94 | | IPVS Sync Connection (1) | | 221 | | IPVS Sync Connection (1) | |
95 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 222 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
96 | | . | | 223 | | . | |
97 | | . | | 224 | ~ . ~ |
98 | | . | | 225 | | . | |
99 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 226 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
100 | | | | 227 | | | |
101 | | IPVS Sync Connection (n) | | 228 | | IPVS Sync Connection (n) | |
102 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 229 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
230 | |||
231 | Version 0 Header | ||
232 | 0 1 2 3 | ||
233 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
234 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
235 | | Count Conns | SyncID | Size | | ||
236 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
237 | | IPVS Sync Connection (1) | | ||
103 | */ | 238 | */ |
104 | 239 | ||
105 | #define SYNC_MESG_HEADER_LEN 4 | 240 | #define SYNC_MESG_HEADER_LEN 4 |
106 | #define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */ | 241 | #define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */ |
107 | 242 | ||
108 | struct ip_vs_sync_mesg { | 243 | /* Version 0 header */ |
244 | struct ip_vs_sync_mesg_v0 { | ||
109 | __u8 nr_conns; | 245 | __u8 nr_conns; |
110 | __u8 syncid; | 246 | __u8 syncid; |
111 | __u16 size; | 247 | __u16 size; |
@@ -113,9 +249,16 @@ struct ip_vs_sync_mesg { | |||
113 | /* ip_vs_sync_conn entries start here */ | 249 | /* ip_vs_sync_conn entries start here */ |
114 | }; | 250 | }; |
115 | 251 | ||
116 | /* the maximum length of sync (sending/receiving) message */ | 252 | /* Version 1 header */ |
117 | static int sync_send_mesg_maxlen; | 253 | struct ip_vs_sync_mesg { |
118 | static int sync_recv_mesg_maxlen; | 254 | __u8 reserved; /* must be zero */ |
255 | __u8 syncid; | ||
256 | __u16 size; | ||
257 | __u8 nr_conns; | ||
258 | __s8 version; /* SYNC_PROTO_VER */ | ||
259 | __u16 spare; | ||
260 | /* ip_vs_sync_conn entries start here */ | ||
261 | }; | ||
119 | 262 | ||
120 | struct ip_vs_sync_buff { | 263 | struct ip_vs_sync_buff { |
121 | struct list_head list; | 264 | struct list_head list; |
@@ -127,28 +270,6 @@ struct ip_vs_sync_buff { | |||
127 | unsigned char *end; | 270 | unsigned char *end; |
128 | }; | 271 | }; |
129 | 272 | ||
130 | |||
131 | /* the sync_buff list head and the lock */ | ||
132 | static LIST_HEAD(ip_vs_sync_queue); | ||
133 | static DEFINE_SPINLOCK(ip_vs_sync_lock); | ||
134 | |||
135 | /* current sync_buff for accepting new conn entries */ | ||
136 | static struct ip_vs_sync_buff *curr_sb = NULL; | ||
137 | static DEFINE_SPINLOCK(curr_sb_lock); | ||
138 | |||
139 | /* ipvs sync daemon state */ | ||
140 | volatile int ip_vs_sync_state = IP_VS_STATE_NONE; | ||
141 | volatile int ip_vs_master_syncid = 0; | ||
142 | volatile int ip_vs_backup_syncid = 0; | ||
143 | |||
144 | /* multicast interface name */ | ||
145 | char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; | ||
146 | char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; | ||
147 | |||
148 | /* sync daemon tasks */ | ||
149 | static struct task_struct *sync_master_thread; | ||
150 | static struct task_struct *sync_backup_thread; | ||
151 | |||
152 | /* multicast addr */ | 273 | /* multicast addr */ |
153 | static struct sockaddr_in mcast_addr = { | 274 | static struct sockaddr_in mcast_addr = { |
154 | .sin_family = AF_INET, | 275 | .sin_family = AF_INET, |
@@ -156,41 +277,71 @@ static struct sockaddr_in mcast_addr = { | |||
156 | .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), | 277 | .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), |
157 | }; | 278 | }; |
158 | 279 | ||
280 | /* | ||
281 | * Copy of struct ip_vs_seq | ||
282 | * From unaligned network order to aligned host order | ||
283 | */ | ||
284 | static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho) | ||
285 | { | ||
286 | ho->init_seq = get_unaligned_be32(&no->init_seq); | ||
287 | ho->delta = get_unaligned_be32(&no->delta); | ||
288 | ho->previous_delta = get_unaligned_be32(&no->previous_delta); | ||
289 | } | ||
290 | |||
291 | /* | ||
292 | * Copy of struct ip_vs_seq | ||
293 | * From Aligned host order to unaligned network order | ||
294 | */ | ||
295 | static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no) | ||
296 | { | ||
297 | put_unaligned_be32(ho->init_seq, &no->init_seq); | ||
298 | put_unaligned_be32(ho->delta, &no->delta); | ||
299 | put_unaligned_be32(ho->previous_delta, &no->previous_delta); | ||
300 | } | ||
159 | 301 | ||
160 | static inline struct ip_vs_sync_buff *sb_dequeue(void) | 302 | static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs) |
161 | { | 303 | { |
162 | struct ip_vs_sync_buff *sb; | 304 | struct ip_vs_sync_buff *sb; |
163 | 305 | ||
164 | spin_lock_bh(&ip_vs_sync_lock); | 306 | spin_lock_bh(&ipvs->sync_lock); |
165 | if (list_empty(&ip_vs_sync_queue)) { | 307 | if (list_empty(&ipvs->sync_queue)) { |
166 | sb = NULL; | 308 | sb = NULL; |
167 | } else { | 309 | } else { |
168 | sb = list_entry(ip_vs_sync_queue.next, | 310 | sb = list_entry(ipvs->sync_queue.next, |
169 | struct ip_vs_sync_buff, | 311 | struct ip_vs_sync_buff, |
170 | list); | 312 | list); |
171 | list_del(&sb->list); | 313 | list_del(&sb->list); |
172 | } | 314 | } |
173 | spin_unlock_bh(&ip_vs_sync_lock); | 315 | spin_unlock_bh(&ipvs->sync_lock); |
174 | 316 | ||
175 | return sb; | 317 | return sb; |
176 | } | 318 | } |
177 | 319 | ||
178 | static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void) | 320 | /* |
321 | * Create a new sync buffer for Version 1 proto. | ||
322 | */ | ||
323 | static inline struct ip_vs_sync_buff * | ||
324 | ip_vs_sync_buff_create(struct netns_ipvs *ipvs) | ||
179 | { | 325 | { |
180 | struct ip_vs_sync_buff *sb; | 326 | struct ip_vs_sync_buff *sb; |
181 | 327 | ||
182 | if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) | 328 | if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) |
183 | return NULL; | 329 | return NULL; |
184 | 330 | ||
185 | if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) { | 331 | sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); |
332 | if (!sb->mesg) { | ||
186 | kfree(sb); | 333 | kfree(sb); |
187 | return NULL; | 334 | return NULL; |
188 | } | 335 | } |
336 | sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */ | ||
337 | sb->mesg->version = SYNC_PROTO_VER; | ||
338 | sb->mesg->syncid = ipvs->master_syncid; | ||
339 | sb->mesg->size = sizeof(struct ip_vs_sync_mesg); | ||
189 | sb->mesg->nr_conns = 0; | 340 | sb->mesg->nr_conns = 0; |
190 | sb->mesg->syncid = ip_vs_master_syncid; | 341 | sb->mesg->spare = 0; |
191 | sb->mesg->size = 4; | 342 | sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg); |
192 | sb->head = (unsigned char *)sb->mesg + 4; | 343 | sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen; |
193 | sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen; | 344 | |
194 | sb->firstuse = jiffies; | 345 | sb->firstuse = jiffies; |
195 | return sb; | 346 | return sb; |
196 | } | 347 | } |
@@ -201,14 +352,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb) | |||
201 | kfree(sb); | 352 | kfree(sb); |
202 | } | 353 | } |
203 | 354 | ||
204 | static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) | 355 | static inline void sb_queue_tail(struct netns_ipvs *ipvs) |
205 | { | 356 | { |
206 | spin_lock(&ip_vs_sync_lock); | 357 | struct ip_vs_sync_buff *sb = ipvs->sync_buff; |
207 | if (ip_vs_sync_state & IP_VS_STATE_MASTER) | 358 | |
208 | list_add_tail(&sb->list, &ip_vs_sync_queue); | 359 | spin_lock(&ipvs->sync_lock); |
360 | if (ipvs->sync_state & IP_VS_STATE_MASTER) | ||
361 | list_add_tail(&sb->list, &ipvs->sync_queue); | ||
209 | else | 362 | else |
210 | ip_vs_sync_buff_release(sb); | 363 | ip_vs_sync_buff_release(sb); |
211 | spin_unlock(&ip_vs_sync_lock); | 364 | spin_unlock(&ipvs->sync_lock); |
212 | } | 365 | } |
213 | 366 | ||
214 | /* | 367 | /* |
@@ -216,36 +369,101 @@ static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) | |||
216 | * than the specified time or the specified time is zero. | 369 | * than the specified time or the specified time is zero. |
217 | */ | 370 | */ |
218 | static inline struct ip_vs_sync_buff * | 371 | static inline struct ip_vs_sync_buff * |
219 | get_curr_sync_buff(unsigned long time) | 372 | get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time) |
220 | { | 373 | { |
221 | struct ip_vs_sync_buff *sb; | 374 | struct ip_vs_sync_buff *sb; |
222 | 375 | ||
223 | spin_lock_bh(&curr_sb_lock); | 376 | spin_lock_bh(&ipvs->sync_buff_lock); |
224 | if (curr_sb && (time == 0 || | 377 | if (ipvs->sync_buff && (time == 0 || |
225 | time_before(jiffies - curr_sb->firstuse, time))) { | 378 | time_before(jiffies - ipvs->sync_buff->firstuse, time))) { |
226 | sb = curr_sb; | 379 | sb = ipvs->sync_buff; |
227 | curr_sb = NULL; | 380 | ipvs->sync_buff = NULL; |
228 | } else | 381 | } else |
229 | sb = NULL; | 382 | sb = NULL; |
230 | spin_unlock_bh(&curr_sb_lock); | 383 | spin_unlock_bh(&ipvs->sync_buff_lock); |
231 | return sb; | 384 | return sb; |
232 | } | 385 | } |
233 | 386 | ||
387 | /* | ||
388 | * Switch mode from sending version 0 or 1 | ||
389 | * - must handle sync_buf | ||
390 | */ | ||
391 | void ip_vs_sync_switch_mode(struct net *net, int mode) | ||
392 | { | ||
393 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
394 | |||
395 | if (!ipvs->sync_state & IP_VS_STATE_MASTER) | ||
396 | return; | ||
397 | if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff) | ||
398 | return; | ||
399 | |||
400 | spin_lock_bh(&ipvs->sync_buff_lock); | ||
401 | /* Buffer empty ? then let buf_create do the job */ | ||
402 | if (ipvs->sync_buff->mesg->size <= sizeof(struct ip_vs_sync_mesg)) { | ||
403 | kfree(ipvs->sync_buff); | ||
404 | ipvs->sync_buff = NULL; | ||
405 | } else { | ||
406 | spin_lock_bh(&ipvs->sync_lock); | ||
407 | if (ipvs->sync_state & IP_VS_STATE_MASTER) | ||
408 | list_add_tail(&ipvs->sync_buff->list, | ||
409 | &ipvs->sync_queue); | ||
410 | else | ||
411 | ip_vs_sync_buff_release(ipvs->sync_buff); | ||
412 | spin_unlock_bh(&ipvs->sync_lock); | ||
413 | } | ||
414 | spin_unlock_bh(&ipvs->sync_buff_lock); | ||
415 | } | ||
234 | 416 | ||
235 | /* | 417 | /* |
418 | * Create a new sync buffer for Version 0 proto. | ||
419 | */ | ||
420 | static inline struct ip_vs_sync_buff * | ||
421 | ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs) | ||
422 | { | ||
423 | struct ip_vs_sync_buff *sb; | ||
424 | struct ip_vs_sync_mesg_v0 *mesg; | ||
425 | |||
426 | if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) | ||
427 | return NULL; | ||
428 | |||
429 | sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); | ||
430 | if (!sb->mesg) { | ||
431 | kfree(sb); | ||
432 | return NULL; | ||
433 | } | ||
434 | mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg; | ||
435 | mesg->nr_conns = 0; | ||
436 | mesg->syncid = ipvs->master_syncid; | ||
437 | mesg->size = sizeof(struct ip_vs_sync_mesg_v0); | ||
438 | sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0); | ||
439 | sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen; | ||
440 | sb->firstuse = jiffies; | ||
441 | return sb; | ||
442 | } | ||
443 | |||
444 | /* | ||
445 | * Version 0 , could be switched in by sys_ctl. | ||
236 | * Add an ip_vs_conn information into the current sync_buff. | 446 | * Add an ip_vs_conn information into the current sync_buff. |
237 | * Called by ip_vs_in. | ||
238 | */ | 447 | */ |
239 | void ip_vs_sync_conn(struct ip_vs_conn *cp) | 448 | void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp) |
240 | { | 449 | { |
241 | struct ip_vs_sync_mesg *m; | 450 | struct netns_ipvs *ipvs = net_ipvs(net); |
242 | struct ip_vs_sync_conn *s; | 451 | struct ip_vs_sync_mesg_v0 *m; |
452 | struct ip_vs_sync_conn_v0 *s; | ||
243 | int len; | 453 | int len; |
244 | 454 | ||
245 | spin_lock(&curr_sb_lock); | 455 | if (unlikely(cp->af != AF_INET)) |
246 | if (!curr_sb) { | 456 | return; |
247 | if (!(curr_sb=ip_vs_sync_buff_create())) { | 457 | /* Do not sync ONE PACKET */ |
248 | spin_unlock(&curr_sb_lock); | 458 | if (cp->flags & IP_VS_CONN_F_ONE_PACKET) |
459 | return; | ||
460 | |||
461 | spin_lock(&ipvs->sync_buff_lock); | ||
462 | if (!ipvs->sync_buff) { | ||
463 | ipvs->sync_buff = | ||
464 | ip_vs_sync_buff_create_v0(ipvs); | ||
465 | if (!ipvs->sync_buff) { | ||
466 | spin_unlock(&ipvs->sync_buff_lock); | ||
249 | pr_err("ip_vs_sync_buff_create failed.\n"); | 467 | pr_err("ip_vs_sync_buff_create failed.\n"); |
250 | return; | 468 | return; |
251 | } | 469 | } |
@@ -253,10 +471,11 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp) | |||
253 | 471 | ||
254 | len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : | 472 | len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : |
255 | SIMPLE_CONN_SIZE; | 473 | SIMPLE_CONN_SIZE; |
256 | m = curr_sb->mesg; | 474 | m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg; |
257 | s = (struct ip_vs_sync_conn *)curr_sb->head; | 475 | s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head; |
258 | 476 | ||
259 | /* copy members */ | 477 | /* copy members */ |
478 | s->reserved = 0; | ||
260 | s->protocol = cp->protocol; | 479 | s->protocol = cp->protocol; |
261 | s->cport = cp->cport; | 480 | s->cport = cp->cport; |
262 | s->vport = cp->vport; | 481 | s->vport = cp->vport; |
@@ -274,83 +493,366 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp) | |||
274 | 493 | ||
275 | m->nr_conns++; | 494 | m->nr_conns++; |
276 | m->size += len; | 495 | m->size += len; |
277 | curr_sb->head += len; | 496 | ipvs->sync_buff->head += len; |
278 | 497 | ||
279 | /* check if there is a space for next one */ | 498 | /* check if there is a space for next one */ |
280 | if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) { | 499 | if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) { |
281 | sb_queue_tail(curr_sb); | 500 | sb_queue_tail(ipvs); |
282 | curr_sb = NULL; | 501 | ipvs->sync_buff = NULL; |
283 | } | 502 | } |
284 | spin_unlock(&curr_sb_lock); | 503 | spin_unlock(&ipvs->sync_buff_lock); |
285 | 504 | ||
286 | /* synchronize its controller if it has */ | 505 | /* synchronize its controller if it has */ |
287 | if (cp->control) | 506 | if (cp->control) |
288 | ip_vs_sync_conn(cp->control); | 507 | ip_vs_sync_conn(net, cp->control); |
508 | } | ||
509 | |||
510 | /* | ||
511 | * Add an ip_vs_conn information into the current sync_buff. | ||
512 | * Called by ip_vs_in. | ||
513 | * Sending Version 1 messages | ||
514 | */ | ||
515 | void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp) | ||
516 | { | ||
517 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
518 | struct ip_vs_sync_mesg *m; | ||
519 | union ip_vs_sync_conn *s; | ||
520 | __u8 *p; | ||
521 | unsigned int len, pe_name_len, pad; | ||
522 | |||
523 | /* Handle old version of the protocol */ | ||
524 | if (ipvs->sysctl_sync_ver == 0) { | ||
525 | ip_vs_sync_conn_v0(net, cp); | ||
526 | return; | ||
527 | } | ||
528 | /* Do not sync ONE PACKET */ | ||
529 | if (cp->flags & IP_VS_CONN_F_ONE_PACKET) | ||
530 | goto control; | ||
531 | sloop: | ||
532 | /* Sanity checks */ | ||
533 | pe_name_len = 0; | ||
534 | if (cp->pe_data_len) { | ||
535 | if (!cp->pe_data || !cp->dest) { | ||
536 | IP_VS_ERR_RL("SYNC, connection pe_data invalid\n"); | ||
537 | return; | ||
538 | } | ||
539 | pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN); | ||
540 | } | ||
541 | |||
542 | spin_lock(&ipvs->sync_buff_lock); | ||
543 | |||
544 | #ifdef CONFIG_IP_VS_IPV6 | ||
545 | if (cp->af == AF_INET6) | ||
546 | len = sizeof(struct ip_vs_sync_v6); | ||
547 | else | ||
548 | #endif | ||
549 | len = sizeof(struct ip_vs_sync_v4); | ||
550 | |||
551 | if (cp->flags & IP_VS_CONN_F_SEQ_MASK) | ||
552 | len += sizeof(struct ip_vs_sync_conn_options) + 2; | ||
553 | |||
554 | if (cp->pe_data_len) | ||
555 | len += cp->pe_data_len + 2; /* + Param hdr field */ | ||
556 | if (pe_name_len) | ||
557 | len += pe_name_len + 2; | ||
558 | |||
559 | /* check if there is a space for this one */ | ||
560 | pad = 0; | ||
561 | if (ipvs->sync_buff) { | ||
562 | pad = (4 - (size_t)ipvs->sync_buff->head) & 3; | ||
563 | if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) { | ||
564 | sb_queue_tail(ipvs); | ||
565 | ipvs->sync_buff = NULL; | ||
566 | pad = 0; | ||
567 | } | ||
568 | } | ||
569 | |||
570 | if (!ipvs->sync_buff) { | ||
571 | ipvs->sync_buff = ip_vs_sync_buff_create(ipvs); | ||
572 | if (!ipvs->sync_buff) { | ||
573 | spin_unlock(&ipvs->sync_buff_lock); | ||
574 | pr_err("ip_vs_sync_buff_create failed.\n"); | ||
575 | return; | ||
576 | } | ||
577 | } | ||
578 | |||
579 | m = ipvs->sync_buff->mesg; | ||
580 | p = ipvs->sync_buff->head; | ||
581 | ipvs->sync_buff->head += pad + len; | ||
582 | m->size += pad + len; | ||
583 | /* Add ev. padding from prev. sync_conn */ | ||
584 | while (pad--) | ||
585 | *(p++) = 0; | ||
586 | |||
587 | s = (union ip_vs_sync_conn *)p; | ||
588 | |||
589 | /* Set message type & copy members */ | ||
590 | s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0); | ||
591 | s->v4.ver_size = htons(len & SVER_MASK); /* Version 0 */ | ||
592 | s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED); | ||
593 | s->v4.state = htons(cp->state); | ||
594 | s->v4.protocol = cp->protocol; | ||
595 | s->v4.cport = cp->cport; | ||
596 | s->v4.vport = cp->vport; | ||
597 | s->v4.dport = cp->dport; | ||
598 | s->v4.fwmark = htonl(cp->fwmark); | ||
599 | s->v4.timeout = htonl(cp->timeout / HZ); | ||
600 | m->nr_conns++; | ||
601 | |||
602 | #ifdef CONFIG_IP_VS_IPV6 | ||
603 | if (cp->af == AF_INET6) { | ||
604 | p += sizeof(struct ip_vs_sync_v6); | ||
605 | ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6); | ||
606 | ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6); | ||
607 | ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6); | ||
608 | } else | ||
609 | #endif | ||
610 | { | ||
611 | p += sizeof(struct ip_vs_sync_v4); /* options ptr */ | ||
612 | s->v4.caddr = cp->caddr.ip; | ||
613 | s->v4.vaddr = cp->vaddr.ip; | ||
614 | s->v4.daddr = cp->daddr.ip; | ||
615 | } | ||
616 | if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { | ||
617 | *(p++) = IPVS_OPT_SEQ_DATA; | ||
618 | *(p++) = sizeof(struct ip_vs_sync_conn_options); | ||
619 | hton_seq((struct ip_vs_seq *)p, &cp->in_seq); | ||
620 | p += sizeof(struct ip_vs_seq); | ||
621 | hton_seq((struct ip_vs_seq *)p, &cp->out_seq); | ||
622 | p += sizeof(struct ip_vs_seq); | ||
623 | } | ||
624 | /* Handle pe data */ | ||
625 | if (cp->pe_data_len && cp->pe_data) { | ||
626 | *(p++) = IPVS_OPT_PE_DATA; | ||
627 | *(p++) = cp->pe_data_len; | ||
628 | memcpy(p, cp->pe_data, cp->pe_data_len); | ||
629 | p += cp->pe_data_len; | ||
630 | if (pe_name_len) { | ||
631 | /* Add PE_NAME */ | ||
632 | *(p++) = IPVS_OPT_PE_NAME; | ||
633 | *(p++) = pe_name_len; | ||
634 | memcpy(p, cp->pe->name, pe_name_len); | ||
635 | p += pe_name_len; | ||
636 | } | ||
637 | } | ||
638 | |||
639 | spin_unlock(&ipvs->sync_buff_lock); | ||
640 | |||
641 | control: | ||
642 | /* synchronize its controller if it has */ | ||
643 | cp = cp->control; | ||
644 | if (!cp) | ||
645 | return; | ||
646 | /* | ||
647 | * Reduce sync rate for templates | ||
648 | * i.e only increment in_pkts for Templates. | ||
649 | */ | ||
650 | if (cp->flags & IP_VS_CONN_F_TEMPLATE) { | ||
651 | int pkts = atomic_add_return(1, &cp->in_pkts); | ||
652 | |||
653 | if (pkts % ipvs->sysctl_sync_threshold[1] != 1) | ||
654 | return; | ||
655 | } | ||
656 | goto sloop; | ||
289 | } | 657 | } |
290 | 658 | ||
659 | /* | ||
660 | * fill_param used by version 1 | ||
661 | */ | ||
291 | static inline int | 662 | static inline int |
292 | ip_vs_conn_fill_param_sync(int af, int protocol, | 663 | ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc, |
293 | const union nf_inet_addr *caddr, __be16 cport, | 664 | struct ip_vs_conn_param *p, |
294 | const union nf_inet_addr *vaddr, __be16 vport, | 665 | __u8 *pe_data, unsigned int pe_data_len, |
295 | struct ip_vs_conn_param *p) | 666 | __u8 *pe_name, unsigned int pe_name_len) |
296 | { | 667 | { |
297 | /* XXX: Need to take into account persistence engine */ | 668 | #ifdef CONFIG_IP_VS_IPV6 |
298 | ip_vs_conn_fill_param(af, protocol, caddr, cport, vaddr, vport, p); | 669 | if (af == AF_INET6) |
670 | ip_vs_conn_fill_param(net, af, sc->v6.protocol, | ||
671 | (const union nf_inet_addr *)&sc->v6.caddr, | ||
672 | sc->v6.cport, | ||
673 | (const union nf_inet_addr *)&sc->v6.vaddr, | ||
674 | sc->v6.vport, p); | ||
675 | else | ||
676 | #endif | ||
677 | ip_vs_conn_fill_param(net, af, sc->v4.protocol, | ||
678 | (const union nf_inet_addr *)&sc->v4.caddr, | ||
679 | sc->v4.cport, | ||
680 | (const union nf_inet_addr *)&sc->v4.vaddr, | ||
681 | sc->v4.vport, p); | ||
682 | /* Handle pe data */ | ||
683 | if (pe_data_len) { | ||
684 | if (pe_name_len) { | ||
685 | char buff[IP_VS_PENAME_MAXLEN+1]; | ||
686 | |||
687 | memcpy(buff, pe_name, pe_name_len); | ||
688 | buff[pe_name_len]=0; | ||
689 | p->pe = __ip_vs_pe_getbyname(buff); | ||
690 | if (!p->pe) { | ||
691 | IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n", | ||
692 | buff); | ||
693 | return 1; | ||
694 | } | ||
695 | } else { | ||
696 | IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n"); | ||
697 | return 1; | ||
698 | } | ||
699 | |||
700 | p->pe_data = kmalloc(pe_data_len, GFP_ATOMIC); | ||
701 | if (!p->pe_data) { | ||
702 | if (p->pe->module) | ||
703 | module_put(p->pe->module); | ||
704 | return -ENOMEM; | ||
705 | } | ||
706 | memcpy(p->pe_data, pe_data, pe_data_len); | ||
707 | p->pe_data_len = pe_data_len; | ||
708 | } | ||
299 | return 0; | 709 | return 0; |
300 | } | 710 | } |
301 | 711 | ||
302 | /* | 712 | /* |
303 | * Process received multicast message and create the corresponding | 713 | * Connection Add / Update. |
304 | * ip_vs_conn entries. | 714 | * Common for version 0 and 1 reception of backup sync_conns. |
715 | * Param: ... | ||
716 | * timeout is in sec. | ||
305 | */ | 717 | */ |
306 | static void ip_vs_process_message(const char *buffer, const size_t buflen) | 718 | static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, |
719 | unsigned int flags, unsigned int state, | ||
720 | unsigned int protocol, unsigned int type, | ||
721 | const union nf_inet_addr *daddr, __be16 dport, | ||
722 | unsigned long timeout, __u32 fwmark, | ||
723 | struct ip_vs_sync_conn_options *opt) | ||
307 | { | 724 | { |
308 | struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer; | ||
309 | struct ip_vs_sync_conn *s; | ||
310 | struct ip_vs_sync_conn_options *opt; | ||
311 | struct ip_vs_conn *cp; | ||
312 | struct ip_vs_protocol *pp; | ||
313 | struct ip_vs_dest *dest; | 725 | struct ip_vs_dest *dest; |
314 | struct ip_vs_conn_param param; | 726 | struct ip_vs_conn *cp; |
315 | char *p; | 727 | struct netns_ipvs *ipvs = net_ipvs(net); |
316 | int i; | ||
317 | 728 | ||
318 | if (buflen < sizeof(struct ip_vs_sync_mesg)) { | 729 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) |
319 | IP_VS_ERR_RL("sync message header too short\n"); | 730 | cp = ip_vs_conn_in_get(param); |
320 | return; | 731 | else |
321 | } | 732 | cp = ip_vs_ct_in_get(param); |
322 | 733 | ||
323 | /* Convert size back to host byte order */ | 734 | if (cp && param->pe_data) /* Free pe_data */ |
324 | m->size = ntohs(m->size); | 735 | kfree(param->pe_data); |
736 | if (!cp) { | ||
737 | /* | ||
738 | * Find the appropriate destination for the connection. | ||
739 | * If it is not found the connection will remain unbound | ||
740 | * but still handled. | ||
741 | */ | ||
742 | dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr, | ||
743 | param->vport, protocol, fwmark); | ||
325 | 744 | ||
326 | if (buflen != m->size) { | 745 | /* Set the approprite ativity flag */ |
327 | IP_VS_ERR_RL("bogus sync message size\n"); | 746 | if (protocol == IPPROTO_TCP) { |
328 | return; | 747 | if (state != IP_VS_TCP_S_ESTABLISHED) |
748 | flags |= IP_VS_CONN_F_INACTIVE; | ||
749 | else | ||
750 | flags &= ~IP_VS_CONN_F_INACTIVE; | ||
751 | } else if (protocol == IPPROTO_SCTP) { | ||
752 | if (state != IP_VS_SCTP_S_ESTABLISHED) | ||
753 | flags |= IP_VS_CONN_F_INACTIVE; | ||
754 | else | ||
755 | flags &= ~IP_VS_CONN_F_INACTIVE; | ||
756 | } | ||
757 | cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); | ||
758 | if (dest) | ||
759 | atomic_dec(&dest->refcnt); | ||
760 | if (!cp) { | ||
761 | if (param->pe_data) | ||
762 | kfree(param->pe_data); | ||
763 | IP_VS_DBG(2, "BACKUP, add new conn. failed\n"); | ||
764 | return; | ||
765 | } | ||
766 | } else if (!cp->dest) { | ||
767 | dest = ip_vs_try_bind_dest(cp); | ||
768 | if (dest) | ||
769 | atomic_dec(&dest->refcnt); | ||
770 | } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) && | ||
771 | (cp->state != state)) { | ||
772 | /* update active/inactive flag for the connection */ | ||
773 | dest = cp->dest; | ||
774 | if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && | ||
775 | (state != IP_VS_TCP_S_ESTABLISHED)) { | ||
776 | atomic_dec(&dest->activeconns); | ||
777 | atomic_inc(&dest->inactconns); | ||
778 | cp->flags |= IP_VS_CONN_F_INACTIVE; | ||
779 | } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) && | ||
780 | (state == IP_VS_TCP_S_ESTABLISHED)) { | ||
781 | atomic_inc(&dest->activeconns); | ||
782 | atomic_dec(&dest->inactconns); | ||
783 | cp->flags &= ~IP_VS_CONN_F_INACTIVE; | ||
784 | } | ||
785 | } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) && | ||
786 | (cp->state != state)) { | ||
787 | dest = cp->dest; | ||
788 | if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && | ||
789 | (state != IP_VS_SCTP_S_ESTABLISHED)) { | ||
790 | atomic_dec(&dest->activeconns); | ||
791 | atomic_inc(&dest->inactconns); | ||
792 | cp->flags &= ~IP_VS_CONN_F_INACTIVE; | ||
793 | } | ||
329 | } | 794 | } |
330 | 795 | ||
331 | /* SyncID sanity check */ | 796 | if (opt) |
332 | if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) { | 797 | memcpy(&cp->in_seq, opt, sizeof(*opt)); |
333 | IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n", | 798 | atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]); |
334 | m->syncid); | 799 | cp->state = state; |
335 | return; | 800 | cp->old_state = cp->state; |
801 | /* | ||
802 | * For Ver 0 messages style | ||
803 | * - Not possible to recover the right timeout for templates | ||
804 | * - can not find the right fwmark | ||
805 | * virtual service. If needed, we can do it for | ||
806 | * non-fwmark persistent services. | ||
807 | * Ver 1 messages style. | ||
808 | * - No problem. | ||
809 | */ | ||
810 | if (timeout) { | ||
811 | if (timeout > MAX_SCHEDULE_TIMEOUT / HZ) | ||
812 | timeout = MAX_SCHEDULE_TIMEOUT / HZ; | ||
813 | cp->timeout = timeout*HZ; | ||
814 | } else { | ||
815 | struct ip_vs_proto_data *pd; | ||
816 | |||
817 | pd = ip_vs_proto_data_get(net, protocol); | ||
818 | if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table) | ||
819 | cp->timeout = pd->timeout_table[state]; | ||
820 | else | ||
821 | cp->timeout = (3*60*HZ); | ||
336 | } | 822 | } |
823 | ip_vs_conn_put(cp); | ||
824 | } | ||
337 | 825 | ||
338 | p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); | 826 | /* |
827 | * Process received multicast message for Version 0 | ||
828 | */ | ||
829 | static void ip_vs_process_message_v0(struct net *net, const char *buffer, | ||
830 | const size_t buflen) | ||
831 | { | ||
832 | struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer; | ||
833 | struct ip_vs_sync_conn_v0 *s; | ||
834 | struct ip_vs_sync_conn_options *opt; | ||
835 | struct ip_vs_protocol *pp; | ||
836 | struct ip_vs_conn_param param; | ||
837 | char *p; | ||
838 | int i; | ||
839 | |||
840 | p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0); | ||
339 | for (i=0; i<m->nr_conns; i++) { | 841 | for (i=0; i<m->nr_conns; i++) { |
340 | unsigned flags, state; | 842 | unsigned flags, state; |
341 | 843 | ||
342 | if (p + SIMPLE_CONN_SIZE > buffer+buflen) { | 844 | if (p + SIMPLE_CONN_SIZE > buffer+buflen) { |
343 | IP_VS_ERR_RL("bogus conn in sync message\n"); | 845 | IP_VS_ERR_RL("BACKUP v0, bogus conn\n"); |
344 | return; | 846 | return; |
345 | } | 847 | } |
346 | s = (struct ip_vs_sync_conn *) p; | 848 | s = (struct ip_vs_sync_conn_v0 *) p; |
347 | flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC; | 849 | flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC; |
348 | flags &= ~IP_VS_CONN_F_HASHED; | 850 | flags &= ~IP_VS_CONN_F_HASHED; |
349 | if (flags & IP_VS_CONN_F_SEQ_MASK) { | 851 | if (flags & IP_VS_CONN_F_SEQ_MASK) { |
350 | opt = (struct ip_vs_sync_conn_options *)&s[1]; | 852 | opt = (struct ip_vs_sync_conn_options *)&s[1]; |
351 | p += FULL_CONN_SIZE; | 853 | p += FULL_CONN_SIZE; |
352 | if (p > buffer+buflen) { | 854 | if (p > buffer+buflen) { |
353 | IP_VS_ERR_RL("bogus conn options in sync message\n"); | 855 | IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n"); |
354 | return; | 856 | return; |
355 | } | 857 | } |
356 | } else { | 858 | } else { |
@@ -362,118 +864,286 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) | |||
362 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) { | 864 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) { |
363 | pp = ip_vs_proto_get(s->protocol); | 865 | pp = ip_vs_proto_get(s->protocol); |
364 | if (!pp) { | 866 | if (!pp) { |
365 | IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n", | 867 | IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n", |
366 | s->protocol); | 868 | s->protocol); |
367 | continue; | 869 | continue; |
368 | } | 870 | } |
369 | if (state >= pp->num_states) { | 871 | if (state >= pp->num_states) { |
370 | IP_VS_DBG(2, "Invalid %s state %u in sync msg\n", | 872 | IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n", |
371 | pp->name, state); | 873 | pp->name, state); |
372 | continue; | 874 | continue; |
373 | } | 875 | } |
374 | } else { | 876 | } else { |
375 | /* protocol in templates is not used for state/timeout */ | 877 | /* protocol in templates is not used for state/timeout */ |
376 | pp = NULL; | ||
377 | if (state > 0) { | 878 | if (state > 0) { |
378 | IP_VS_DBG(2, "Invalid template state %u in sync msg\n", | 879 | IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n", |
379 | state); | 880 | state); |
380 | state = 0; | 881 | state = 0; |
381 | } | 882 | } |
382 | } | 883 | } |
383 | 884 | ||
384 | { | 885 | ip_vs_conn_fill_param(net, AF_INET, s->protocol, |
385 | if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol, | 886 | (const union nf_inet_addr *)&s->caddr, |
386 | (union nf_inet_addr *)&s->caddr, | 887 | s->cport, |
387 | s->cport, | 888 | (const union nf_inet_addr *)&s->vaddr, |
388 | (union nf_inet_addr *)&s->vaddr, | 889 | s->vport, ¶m); |
389 | s->vport, ¶m)) { | 890 | |
390 | pr_err("ip_vs_conn_fill_param_sync failed"); | 891 | /* Send timeout as Zero */ |
391 | return; | 892 | ip_vs_proc_conn(net, ¶m, flags, state, s->protocol, AF_INET, |
893 | (union nf_inet_addr *)&s->daddr, s->dport, | ||
894 | 0, 0, opt); | ||
895 | } | ||
896 | } | ||
897 | |||
898 | /* | ||
899 | * Handle options | ||
900 | */ | ||
901 | static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen, | ||
902 | __u32 *opt_flags, | ||
903 | struct ip_vs_sync_conn_options *opt) | ||
904 | { | ||
905 | struct ip_vs_sync_conn_options *topt; | ||
906 | |||
907 | topt = (struct ip_vs_sync_conn_options *)p; | ||
908 | |||
909 | if (plen != sizeof(struct ip_vs_sync_conn_options)) { | ||
910 | IP_VS_DBG(2, "BACKUP, bogus conn options length\n"); | ||
911 | return -EINVAL; | ||
912 | } | ||
913 | if (*opt_flags & IPVS_OPT_F_SEQ_DATA) { | ||
914 | IP_VS_DBG(2, "BACKUP, conn options found twice\n"); | ||
915 | return -EINVAL; | ||
916 | } | ||
917 | ntoh_seq(&topt->in_seq, &opt->in_seq); | ||
918 | ntoh_seq(&topt->out_seq, &opt->out_seq); | ||
919 | *opt_flags |= IPVS_OPT_F_SEQ_DATA; | ||
920 | return 0; | ||
921 | } | ||
922 | |||
923 | static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len, | ||
924 | __u8 **data, unsigned int maxlen, | ||
925 | __u32 *opt_flags, __u32 flag) | ||
926 | { | ||
927 | if (plen > maxlen) { | ||
928 | IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen); | ||
929 | return -EINVAL; | ||
930 | } | ||
931 | if (*opt_flags & flag) { | ||
932 | IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag); | ||
933 | return -EINVAL; | ||
934 | } | ||
935 | *data_len = plen; | ||
936 | *data = p; | ||
937 | *opt_flags |= flag; | ||
938 | return 0; | ||
939 | } | ||
940 | /* | ||
941 | * Process a Version 1 sync. connection | ||
942 | */ | ||
943 | static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end) | ||
944 | { | ||
945 | struct ip_vs_sync_conn_options opt; | ||
946 | union ip_vs_sync_conn *s; | ||
947 | struct ip_vs_protocol *pp; | ||
948 | struct ip_vs_conn_param param; | ||
949 | __u32 flags; | ||
950 | unsigned int af, state, pe_data_len=0, pe_name_len=0; | ||
951 | __u8 *pe_data=NULL, *pe_name=NULL; | ||
952 | __u32 opt_flags=0; | ||
953 | int retc=0; | ||
954 | |||
955 | s = (union ip_vs_sync_conn *) p; | ||
956 | |||
957 | if (s->v6.type & STYPE_F_INET6) { | ||
958 | #ifdef CONFIG_IP_VS_IPV6 | ||
959 | af = AF_INET6; | ||
960 | p += sizeof(struct ip_vs_sync_v6); | ||
961 | #else | ||
962 | IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n"); | ||
963 | retc = 10; | ||
964 | goto out; | ||
965 | #endif | ||
966 | } else if (!s->v4.type) { | ||
967 | af = AF_INET; | ||
968 | p += sizeof(struct ip_vs_sync_v4); | ||
969 | } else { | ||
970 | return -10; | ||
971 | } | ||
972 | if (p > msg_end) | ||
973 | return -20; | ||
974 | |||
975 | /* Process optional params check Type & Len. */ | ||
976 | while (p < msg_end) { | ||
977 | int ptype; | ||
978 | int plen; | ||
979 | |||
980 | if (p+2 > msg_end) | ||
981 | return -30; | ||
982 | ptype = *(p++); | ||
983 | plen = *(p++); | ||
984 | |||
985 | if (!plen || ((p + plen) > msg_end)) | ||
986 | return -40; | ||
987 | /* Handle seq option p = param data */ | ||
988 | switch (ptype & ~IPVS_OPT_F_PARAM) { | ||
989 | case IPVS_OPT_SEQ_DATA: | ||
990 | if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt)) | ||
991 | return -50; | ||
992 | break; | ||
993 | |||
994 | case IPVS_OPT_PE_DATA: | ||
995 | if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data, | ||
996 | IP_VS_PEDATA_MAXLEN, &opt_flags, | ||
997 | IPVS_OPT_F_PE_DATA)) | ||
998 | return -60; | ||
999 | break; | ||
1000 | |||
1001 | case IPVS_OPT_PE_NAME: | ||
1002 | if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name, | ||
1003 | IP_VS_PENAME_MAXLEN, &opt_flags, | ||
1004 | IPVS_OPT_F_PE_NAME)) | ||
1005 | return -70; | ||
1006 | break; | ||
1007 | |||
1008 | default: | ||
1009 | /* Param data mandatory ? */ | ||
1010 | if (!(ptype & IPVS_OPT_F_PARAM)) { | ||
1011 | IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n", | ||
1012 | ptype & ~IPVS_OPT_F_PARAM); | ||
1013 | retc = 20; | ||
1014 | goto out; | ||
392 | } | 1015 | } |
393 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) | ||
394 | cp = ip_vs_conn_in_get(¶m); | ||
395 | else | ||
396 | cp = ip_vs_ct_in_get(¶m); | ||
397 | } | 1016 | } |
398 | if (!cp) { | 1017 | p += plen; /* Next option */ |
399 | /* | 1018 | } |
400 | * Find the appropriate destination for the connection. | 1019 | |
401 | * If it is not found the connection will remain unbound | 1020 | /* Get flags and Mask off unsupported */ |
402 | * but still handled. | 1021 | flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK; |
403 | */ | 1022 | flags |= IP_VS_CONN_F_SYNC; |
404 | dest = ip_vs_find_dest(AF_INET, | 1023 | state = ntohs(s->v4.state); |
405 | (union nf_inet_addr *)&s->daddr, | 1024 | |
406 | s->dport, | 1025 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) { |
407 | (union nf_inet_addr *)&s->vaddr, | 1026 | pp = ip_vs_proto_get(s->v4.protocol); |
408 | s->vport, | 1027 | if (!pp) { |
409 | s->protocol); | 1028 | IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n", |
410 | /* Set the approprite ativity flag */ | 1029 | s->v4.protocol); |
411 | if (s->protocol == IPPROTO_TCP) { | 1030 | retc = 30; |
412 | if (state != IP_VS_TCP_S_ESTABLISHED) | 1031 | goto out; |
413 | flags |= IP_VS_CONN_F_INACTIVE; | 1032 | } |
414 | else | 1033 | if (state >= pp->num_states) { |
415 | flags &= ~IP_VS_CONN_F_INACTIVE; | 1034 | IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n", |
416 | } else if (s->protocol == IPPROTO_SCTP) { | 1035 | pp->name, state); |
417 | if (state != IP_VS_SCTP_S_ESTABLISHED) | 1036 | retc = 40; |
418 | flags |= IP_VS_CONN_F_INACTIVE; | 1037 | goto out; |
419 | else | 1038 | } |
420 | flags &= ~IP_VS_CONN_F_INACTIVE; | 1039 | } else { |
1040 | /* protocol in templates is not used for state/timeout */ | ||
1041 | if (state > 0) { | ||
1042 | IP_VS_DBG(3, "BACKUP, Invalid template state %u\n", | ||
1043 | state); | ||
1044 | state = 0; | ||
1045 | } | ||
1046 | } | ||
1047 | if (ip_vs_conn_fill_param_sync(net, af, s, ¶m, pe_data, | ||
1048 | pe_data_len, pe_name, pe_name_len)) { | ||
1049 | retc = 50; | ||
1050 | goto out; | ||
1051 | } | ||
1052 | /* If only IPv4, just silent skip IPv6 */ | ||
1053 | if (af == AF_INET) | ||
1054 | ip_vs_proc_conn(net, ¶m, flags, state, s->v4.protocol, af, | ||
1055 | (union nf_inet_addr *)&s->v4.daddr, s->v4.dport, | ||
1056 | ntohl(s->v4.timeout), ntohl(s->v4.fwmark), | ||
1057 | (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) | ||
1058 | ); | ||
1059 | #ifdef CONFIG_IP_VS_IPV6 | ||
1060 | else | ||
1061 | ip_vs_proc_conn(net, ¶m, flags, state, s->v6.protocol, af, | ||
1062 | (union nf_inet_addr *)&s->v6.daddr, s->v6.dport, | ||
1063 | ntohl(s->v6.timeout), ntohl(s->v6.fwmark), | ||
1064 | (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) | ||
1065 | ); | ||
1066 | #endif | ||
1067 | return 0; | ||
1068 | /* Error exit */ | ||
1069 | out: | ||
1070 | IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc); | ||
1071 | return retc; | ||
1072 | |||
1073 | } | ||
1074 | /* | ||
1075 | * Process received multicast message and create the corresponding | ||
1076 | * ip_vs_conn entries. | ||
1077 | * Handles Version 0 & 1 | ||
1078 | */ | ||
1079 | static void ip_vs_process_message(struct net *net, __u8 *buffer, | ||
1080 | const size_t buflen) | ||
1081 | { | ||
1082 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
1083 | struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer; | ||
1084 | __u8 *p, *msg_end; | ||
1085 | int i, nr_conns; | ||
1086 | |||
1087 | if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) { | ||
1088 | IP_VS_DBG(2, "BACKUP, message header too short\n"); | ||
1089 | return; | ||
1090 | } | ||
1091 | /* Convert size back to host byte order */ | ||
1092 | m2->size = ntohs(m2->size); | ||
1093 | |||
1094 | if (buflen != m2->size) { | ||
1095 | IP_VS_DBG(2, "BACKUP, bogus message size\n"); | ||
1096 | return; | ||
1097 | } | ||
1098 | /* SyncID sanity check */ | ||
1099 | if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) { | ||
1100 | IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid); | ||
1101 | return; | ||
1102 | } | ||
1103 | /* Handle version 1 message */ | ||
1104 | if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0) | ||
1105 | && (m2->spare == 0)) { | ||
1106 | |||
1107 | msg_end = buffer + sizeof(struct ip_vs_sync_mesg); | ||
1108 | nr_conns = m2->nr_conns; | ||
1109 | |||
1110 | for (i=0; i<nr_conns; i++) { | ||
1111 | union ip_vs_sync_conn *s; | ||
1112 | unsigned size; | ||
1113 | int retc; | ||
1114 | |||
1115 | p = msg_end; | ||
1116 | if (p + sizeof(s->v4) > buffer+buflen) { | ||
1117 | IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n"); | ||
1118 | return; | ||
421 | } | 1119 | } |
422 | cp = ip_vs_conn_new(¶m, | 1120 | s = (union ip_vs_sync_conn *)p; |
423 | (union nf_inet_addr *)&s->daddr, | 1121 | size = ntohs(s->v4.ver_size) & SVER_MASK; |
424 | s->dport, flags, dest); | 1122 | msg_end = p + size; |
425 | if (dest) | 1123 | /* Basic sanity checks */ |
426 | atomic_dec(&dest->refcnt); | 1124 | if (msg_end > buffer+buflen) { |
427 | if (!cp) { | 1125 | IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n"); |
428 | pr_err("ip_vs_conn_new failed\n"); | ||
429 | return; | 1126 | return; |
430 | } | 1127 | } |
431 | } else if (!cp->dest) { | 1128 | if (ntohs(s->v4.ver_size) >> SVER_SHIFT) { |
432 | dest = ip_vs_try_bind_dest(cp); | 1129 | IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n", |
433 | if (dest) | 1130 | ntohs(s->v4.ver_size) >> SVER_SHIFT); |
434 | atomic_dec(&dest->refcnt); | 1131 | return; |
435 | } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) && | ||
436 | (cp->state != state)) { | ||
437 | /* update active/inactive flag for the connection */ | ||
438 | dest = cp->dest; | ||
439 | if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && | ||
440 | (state != IP_VS_TCP_S_ESTABLISHED)) { | ||
441 | atomic_dec(&dest->activeconns); | ||
442 | atomic_inc(&dest->inactconns); | ||
443 | cp->flags |= IP_VS_CONN_F_INACTIVE; | ||
444 | } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) && | ||
445 | (state == IP_VS_TCP_S_ESTABLISHED)) { | ||
446 | atomic_inc(&dest->activeconns); | ||
447 | atomic_dec(&dest->inactconns); | ||
448 | cp->flags &= ~IP_VS_CONN_F_INACTIVE; | ||
449 | } | 1132 | } |
450 | } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) && | 1133 | /* Process a single sync_conn */ |
451 | (cp->state != state)) { | 1134 | retc = ip_vs_proc_sync_conn(net, p, msg_end); |
452 | dest = cp->dest; | 1135 | if (retc < 0) { |
453 | if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && | 1136 | IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n", |
454 | (state != IP_VS_SCTP_S_ESTABLISHED)) { | 1137 | retc); |
455 | atomic_dec(&dest->activeconns); | 1138 | return; |
456 | atomic_inc(&dest->inactconns); | ||
457 | cp->flags &= ~IP_VS_CONN_F_INACTIVE; | ||
458 | } | 1139 | } |
1140 | /* Make sure we have 32 bit alignment */ | ||
1141 | msg_end = p + ((size + 3) & ~3); | ||
459 | } | 1142 | } |
460 | 1143 | } else { | |
461 | if (opt) | 1144 | /* Old type of message */ |
462 | memcpy(&cp->in_seq, opt, sizeof(*opt)); | 1145 | ip_vs_process_message_v0(net, buffer, buflen); |
463 | atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]); | 1146 | return; |
464 | cp->state = state; | ||
465 | cp->old_state = cp->state; | ||
466 | /* | ||
467 | * We can not recover the right timeout for templates | ||
468 | * in all cases, we can not find the right fwmark | ||
469 | * virtual service. If needed, we can do it for | ||
470 | * non-fwmark persistent services. | ||
471 | */ | ||
472 | if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table) | ||
473 | cp->timeout = pp->timeout_table[state]; | ||
474 | else | ||
475 | cp->timeout = (3*60*HZ); | ||
476 | ip_vs_conn_put(cp); | ||
477 | } | 1147 | } |
478 | } | 1148 | } |
479 | 1149 | ||
@@ -511,8 +1181,10 @@ static int set_mcast_if(struct sock *sk, char *ifname) | |||
511 | { | 1181 | { |
512 | struct net_device *dev; | 1182 | struct net_device *dev; |
513 | struct inet_sock *inet = inet_sk(sk); | 1183 | struct inet_sock *inet = inet_sk(sk); |
1184 | struct net *net = sock_net(sk); | ||
514 | 1185 | ||
515 | if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) | 1186 | dev = __dev_get_by_name(net, ifname); |
1187 | if (!dev) | ||
516 | return -ENODEV; | 1188 | return -ENODEV; |
517 | 1189 | ||
518 | if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) | 1190 | if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) |
@@ -531,30 +1203,33 @@ static int set_mcast_if(struct sock *sk, char *ifname) | |||
531 | * Set the maximum length of sync message according to the | 1203 | * Set the maximum length of sync message according to the |
532 | * specified interface's MTU. | 1204 | * specified interface's MTU. |
533 | */ | 1205 | */ |
534 | static int set_sync_mesg_maxlen(int sync_state) | 1206 | static int set_sync_mesg_maxlen(struct net *net, int sync_state) |
535 | { | 1207 | { |
1208 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
536 | struct net_device *dev; | 1209 | struct net_device *dev; |
537 | int num; | 1210 | int num; |
538 | 1211 | ||
539 | if (sync_state == IP_VS_STATE_MASTER) { | 1212 | if (sync_state == IP_VS_STATE_MASTER) { |
540 | if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL) | 1213 | dev = __dev_get_by_name(net, ipvs->master_mcast_ifn); |
1214 | if (!dev) | ||
541 | return -ENODEV; | 1215 | return -ENODEV; |
542 | 1216 | ||
543 | num = (dev->mtu - sizeof(struct iphdr) - | 1217 | num = (dev->mtu - sizeof(struct iphdr) - |
544 | sizeof(struct udphdr) - | 1218 | sizeof(struct udphdr) - |
545 | SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE; | 1219 | SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE; |
546 | sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN + | 1220 | ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN + |
547 | SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF); | 1221 | SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF); |
548 | IP_VS_DBG(7, "setting the maximum length of sync sending " | 1222 | IP_VS_DBG(7, "setting the maximum length of sync sending " |
549 | "message %d.\n", sync_send_mesg_maxlen); | 1223 | "message %d.\n", ipvs->send_mesg_maxlen); |
550 | } else if (sync_state == IP_VS_STATE_BACKUP) { | 1224 | } else if (sync_state == IP_VS_STATE_BACKUP) { |
551 | if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL) | 1225 | dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn); |
1226 | if (!dev) | ||
552 | return -ENODEV; | 1227 | return -ENODEV; |
553 | 1228 | ||
554 | sync_recv_mesg_maxlen = dev->mtu - | 1229 | ipvs->recv_mesg_maxlen = dev->mtu - |
555 | sizeof(struct iphdr) - sizeof(struct udphdr); | 1230 | sizeof(struct iphdr) - sizeof(struct udphdr); |
556 | IP_VS_DBG(7, "setting the maximum length of sync receiving " | 1231 | IP_VS_DBG(7, "setting the maximum length of sync receiving " |
557 | "message %d.\n", sync_recv_mesg_maxlen); | 1232 | "message %d.\n", ipvs->recv_mesg_maxlen); |
558 | } | 1233 | } |
559 | 1234 | ||
560 | return 0; | 1235 | return 0; |
@@ -569,6 +1244,7 @@ static int set_sync_mesg_maxlen(int sync_state) | |||
569 | static int | 1244 | static int |
570 | join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) | 1245 | join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) |
571 | { | 1246 | { |
1247 | struct net *net = sock_net(sk); | ||
572 | struct ip_mreqn mreq; | 1248 | struct ip_mreqn mreq; |
573 | struct net_device *dev; | 1249 | struct net_device *dev; |
574 | int ret; | 1250 | int ret; |
@@ -576,7 +1252,8 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) | |||
576 | memset(&mreq, 0, sizeof(mreq)); | 1252 | memset(&mreq, 0, sizeof(mreq)); |
577 | memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); | 1253 | memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); |
578 | 1254 | ||
579 | if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) | 1255 | dev = __dev_get_by_name(net, ifname); |
1256 | if (!dev) | ||
580 | return -ENODEV; | 1257 | return -ENODEV; |
581 | if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) | 1258 | if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) |
582 | return -EINVAL; | 1259 | return -EINVAL; |
@@ -593,11 +1270,13 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) | |||
593 | 1270 | ||
594 | static int bind_mcastif_addr(struct socket *sock, char *ifname) | 1271 | static int bind_mcastif_addr(struct socket *sock, char *ifname) |
595 | { | 1272 | { |
1273 | struct net *net = sock_net(sock->sk); | ||
596 | struct net_device *dev; | 1274 | struct net_device *dev; |
597 | __be32 addr; | 1275 | __be32 addr; |
598 | struct sockaddr_in sin; | 1276 | struct sockaddr_in sin; |
599 | 1277 | ||
600 | if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) | 1278 | dev = __dev_get_by_name(net, ifname); |
1279 | if (!dev) | ||
601 | return -ENODEV; | 1280 | return -ENODEV; |
602 | 1281 | ||
603 | addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); | 1282 | addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); |
@@ -619,8 +1298,9 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname) | |||
619 | /* | 1298 | /* |
620 | * Set up sending multicast socket over UDP | 1299 | * Set up sending multicast socket over UDP |
621 | */ | 1300 | */ |
622 | static struct socket * make_send_sock(void) | 1301 | static struct socket *make_send_sock(struct net *net) |
623 | { | 1302 | { |
1303 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
624 | struct socket *sock; | 1304 | struct socket *sock; |
625 | int result; | 1305 | int result; |
626 | 1306 | ||
@@ -631,7 +1311,7 @@ static struct socket * make_send_sock(void) | |||
631 | return ERR_PTR(result); | 1311 | return ERR_PTR(result); |
632 | } | 1312 | } |
633 | 1313 | ||
634 | result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn); | 1314 | result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn); |
635 | if (result < 0) { | 1315 | if (result < 0) { |
636 | pr_err("Error setting outbound mcast interface\n"); | 1316 | pr_err("Error setting outbound mcast interface\n"); |
637 | goto error; | 1317 | goto error; |
@@ -640,7 +1320,7 @@ static struct socket * make_send_sock(void) | |||
640 | set_mcast_loop(sock->sk, 0); | 1320 | set_mcast_loop(sock->sk, 0); |
641 | set_mcast_ttl(sock->sk, 1); | 1321 | set_mcast_ttl(sock->sk, 1); |
642 | 1322 | ||
643 | result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn); | 1323 | result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn); |
644 | if (result < 0) { | 1324 | if (result < 0) { |
645 | pr_err("Error binding address of the mcast interface\n"); | 1325 | pr_err("Error binding address of the mcast interface\n"); |
646 | goto error; | 1326 | goto error; |
@@ -664,8 +1344,9 @@ static struct socket * make_send_sock(void) | |||
664 | /* | 1344 | /* |
665 | * Set up receiving multicast socket over UDP | 1345 | * Set up receiving multicast socket over UDP |
666 | */ | 1346 | */ |
667 | static struct socket * make_receive_sock(void) | 1347 | static struct socket *make_receive_sock(struct net *net) |
668 | { | 1348 | { |
1349 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
669 | struct socket *sock; | 1350 | struct socket *sock; |
670 | int result; | 1351 | int result; |
671 | 1352 | ||
@@ -689,7 +1370,7 @@ static struct socket * make_receive_sock(void) | |||
689 | /* join the multicast group */ | 1370 | /* join the multicast group */ |
690 | result = join_mcast_group(sock->sk, | 1371 | result = join_mcast_group(sock->sk, |
691 | (struct in_addr *) &mcast_addr.sin_addr, | 1372 | (struct in_addr *) &mcast_addr.sin_addr, |
692 | ip_vs_backup_mcast_ifn); | 1373 | ipvs->backup_mcast_ifn); |
693 | if (result < 0) { | 1374 | if (result < 0) { |
694 | pr_err("Error joining to the multicast group\n"); | 1375 | pr_err("Error joining to the multicast group\n"); |
695 | goto error; | 1376 | goto error; |
@@ -760,20 +1441,21 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) | |||
760 | static int sync_thread_master(void *data) | 1441 | static int sync_thread_master(void *data) |
761 | { | 1442 | { |
762 | struct ip_vs_sync_thread_data *tinfo = data; | 1443 | struct ip_vs_sync_thread_data *tinfo = data; |
1444 | struct netns_ipvs *ipvs = net_ipvs(tinfo->net); | ||
763 | struct ip_vs_sync_buff *sb; | 1445 | struct ip_vs_sync_buff *sb; |
764 | 1446 | ||
765 | pr_info("sync thread started: state = MASTER, mcast_ifn = %s, " | 1447 | pr_info("sync thread started: state = MASTER, mcast_ifn = %s, " |
766 | "syncid = %d\n", | 1448 | "syncid = %d\n", |
767 | ip_vs_master_mcast_ifn, ip_vs_master_syncid); | 1449 | ipvs->master_mcast_ifn, ipvs->master_syncid); |
768 | 1450 | ||
769 | while (!kthread_should_stop()) { | 1451 | while (!kthread_should_stop()) { |
770 | while ((sb = sb_dequeue())) { | 1452 | while ((sb = sb_dequeue(ipvs))) { |
771 | ip_vs_send_sync_msg(tinfo->sock, sb->mesg); | 1453 | ip_vs_send_sync_msg(tinfo->sock, sb->mesg); |
772 | ip_vs_sync_buff_release(sb); | 1454 | ip_vs_sync_buff_release(sb); |
773 | } | 1455 | } |
774 | 1456 | ||
775 | /* check if entries stay in curr_sb for 2 seconds */ | 1457 | /* check if entries stay in ipvs->sync_buff for 2 seconds */ |
776 | sb = get_curr_sync_buff(2 * HZ); | 1458 | sb = get_curr_sync_buff(ipvs, 2 * HZ); |
777 | if (sb) { | 1459 | if (sb) { |
778 | ip_vs_send_sync_msg(tinfo->sock, sb->mesg); | 1460 | ip_vs_send_sync_msg(tinfo->sock, sb->mesg); |
779 | ip_vs_sync_buff_release(sb); | 1461 | ip_vs_sync_buff_release(sb); |
@@ -783,14 +1465,13 @@ static int sync_thread_master(void *data) | |||
783 | } | 1465 | } |
784 | 1466 | ||
785 | /* clean up the sync_buff queue */ | 1467 | /* clean up the sync_buff queue */ |
786 | while ((sb=sb_dequeue())) { | 1468 | while ((sb = sb_dequeue(ipvs))) |
787 | ip_vs_sync_buff_release(sb); | 1469 | ip_vs_sync_buff_release(sb); |
788 | } | ||
789 | 1470 | ||
790 | /* clean up the current sync_buff */ | 1471 | /* clean up the current sync_buff */ |
791 | if ((sb = get_curr_sync_buff(0))) { | 1472 | sb = get_curr_sync_buff(ipvs, 0); |
1473 | if (sb) | ||
792 | ip_vs_sync_buff_release(sb); | 1474 | ip_vs_sync_buff_release(sb); |
793 | } | ||
794 | 1475 | ||
795 | /* release the sending multicast socket */ | 1476 | /* release the sending multicast socket */ |
796 | sock_release(tinfo->sock); | 1477 | sock_release(tinfo->sock); |
@@ -803,11 +1484,12 @@ static int sync_thread_master(void *data) | |||
803 | static int sync_thread_backup(void *data) | 1484 | static int sync_thread_backup(void *data) |
804 | { | 1485 | { |
805 | struct ip_vs_sync_thread_data *tinfo = data; | 1486 | struct ip_vs_sync_thread_data *tinfo = data; |
1487 | struct netns_ipvs *ipvs = net_ipvs(tinfo->net); | ||
806 | int len; | 1488 | int len; |
807 | 1489 | ||
808 | pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " | 1490 | pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " |
809 | "syncid = %d\n", | 1491 | "syncid = %d\n", |
810 | ip_vs_backup_mcast_ifn, ip_vs_backup_syncid); | 1492 | ipvs->backup_mcast_ifn, ipvs->backup_syncid); |
811 | 1493 | ||
812 | while (!kthread_should_stop()) { | 1494 | while (!kthread_should_stop()) { |
813 | wait_event_interruptible(*sk_sleep(tinfo->sock->sk), | 1495 | wait_event_interruptible(*sk_sleep(tinfo->sock->sk), |
@@ -817,7 +1499,7 @@ static int sync_thread_backup(void *data) | |||
817 | /* do we have data now? */ | 1499 | /* do we have data now? */ |
818 | while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { | 1500 | while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { |
819 | len = ip_vs_receive(tinfo->sock, tinfo->buf, | 1501 | len = ip_vs_receive(tinfo->sock, tinfo->buf, |
820 | sync_recv_mesg_maxlen); | 1502 | ipvs->recv_mesg_maxlen); |
821 | if (len <= 0) { | 1503 | if (len <= 0) { |
822 | pr_err("receiving message error\n"); | 1504 | pr_err("receiving message error\n"); |
823 | break; | 1505 | break; |
@@ -826,7 +1508,7 @@ static int sync_thread_backup(void *data) | |||
826 | /* disable bottom half, because it accesses the data | 1508 | /* disable bottom half, because it accesses the data |
827 | shared by softirq while getting/creating conns */ | 1509 | shared by softirq while getting/creating conns */ |
828 | local_bh_disable(); | 1510 | local_bh_disable(); |
829 | ip_vs_process_message(tinfo->buf, len); | 1511 | ip_vs_process_message(tinfo->net, tinfo->buf, len); |
830 | local_bh_enable(); | 1512 | local_bh_enable(); |
831 | } | 1513 | } |
832 | } | 1514 | } |
@@ -840,41 +1522,42 @@ static int sync_thread_backup(void *data) | |||
840 | } | 1522 | } |
841 | 1523 | ||
842 | 1524 | ||
843 | int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) | 1525 | int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) |
844 | { | 1526 | { |
845 | struct ip_vs_sync_thread_data *tinfo; | 1527 | struct ip_vs_sync_thread_data *tinfo; |
846 | struct task_struct **realtask, *task; | 1528 | struct task_struct **realtask, *task; |
847 | struct socket *sock; | 1529 | struct socket *sock; |
1530 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
848 | char *name, *buf = NULL; | 1531 | char *name, *buf = NULL; |
849 | int (*threadfn)(void *data); | 1532 | int (*threadfn)(void *data); |
850 | int result = -ENOMEM; | 1533 | int result = -ENOMEM; |
851 | 1534 | ||
852 | IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); | 1535 | IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); |
853 | IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", | 1536 | IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", |
854 | sizeof(struct ip_vs_sync_conn)); | 1537 | sizeof(struct ip_vs_sync_conn_v0)); |
855 | 1538 | ||
856 | if (state == IP_VS_STATE_MASTER) { | 1539 | if (state == IP_VS_STATE_MASTER) { |
857 | if (sync_master_thread) | 1540 | if (ipvs->master_thread) |
858 | return -EEXIST; | 1541 | return -EEXIST; |
859 | 1542 | ||
860 | strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, | 1543 | strlcpy(ipvs->master_mcast_ifn, mcast_ifn, |
861 | sizeof(ip_vs_master_mcast_ifn)); | 1544 | sizeof(ipvs->master_mcast_ifn)); |
862 | ip_vs_master_syncid = syncid; | 1545 | ipvs->master_syncid = syncid; |
863 | realtask = &sync_master_thread; | 1546 | realtask = &ipvs->master_thread; |
864 | name = "ipvs_syncmaster"; | 1547 | name = "ipvs_master:%d"; |
865 | threadfn = sync_thread_master; | 1548 | threadfn = sync_thread_master; |
866 | sock = make_send_sock(); | 1549 | sock = make_send_sock(net); |
867 | } else if (state == IP_VS_STATE_BACKUP) { | 1550 | } else if (state == IP_VS_STATE_BACKUP) { |
868 | if (sync_backup_thread) | 1551 | if (ipvs->backup_thread) |
869 | return -EEXIST; | 1552 | return -EEXIST; |
870 | 1553 | ||
871 | strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, | 1554 | strlcpy(ipvs->backup_mcast_ifn, mcast_ifn, |
872 | sizeof(ip_vs_backup_mcast_ifn)); | 1555 | sizeof(ipvs->backup_mcast_ifn)); |
873 | ip_vs_backup_syncid = syncid; | 1556 | ipvs->backup_syncid = syncid; |
874 | realtask = &sync_backup_thread; | 1557 | realtask = &ipvs->backup_thread; |
875 | name = "ipvs_syncbackup"; | 1558 | name = "ipvs_backup:%d"; |
876 | threadfn = sync_thread_backup; | 1559 | threadfn = sync_thread_backup; |
877 | sock = make_receive_sock(); | 1560 | sock = make_receive_sock(net); |
878 | } else { | 1561 | } else { |
879 | return -EINVAL; | 1562 | return -EINVAL; |
880 | } | 1563 | } |
@@ -884,9 +1567,9 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) | |||
884 | goto out; | 1567 | goto out; |
885 | } | 1568 | } |
886 | 1569 | ||
887 | set_sync_mesg_maxlen(state); | 1570 | set_sync_mesg_maxlen(net, state); |
888 | if (state == IP_VS_STATE_BACKUP) { | 1571 | if (state == IP_VS_STATE_BACKUP) { |
889 | buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL); | 1572 | buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL); |
890 | if (!buf) | 1573 | if (!buf) |
891 | goto outsocket; | 1574 | goto outsocket; |
892 | } | 1575 | } |
@@ -895,10 +1578,11 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) | |||
895 | if (!tinfo) | 1578 | if (!tinfo) |
896 | goto outbuf; | 1579 | goto outbuf; |
897 | 1580 | ||
1581 | tinfo->net = net; | ||
898 | tinfo->sock = sock; | 1582 | tinfo->sock = sock; |
899 | tinfo->buf = buf; | 1583 | tinfo->buf = buf; |
900 | 1584 | ||
901 | task = kthread_run(threadfn, tinfo, name); | 1585 | task = kthread_run(threadfn, tinfo, name, ipvs->gen); |
902 | if (IS_ERR(task)) { | 1586 | if (IS_ERR(task)) { |
903 | result = PTR_ERR(task); | 1587 | result = PTR_ERR(task); |
904 | goto outtinfo; | 1588 | goto outtinfo; |
@@ -906,7 +1590,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) | |||
906 | 1590 | ||
907 | /* mark as active */ | 1591 | /* mark as active */ |
908 | *realtask = task; | 1592 | *realtask = task; |
909 | ip_vs_sync_state |= state; | 1593 | ipvs->sync_state |= state; |
910 | 1594 | ||
911 | /* increase the module use count */ | 1595 | /* increase the module use count */ |
912 | ip_vs_use_count_inc(); | 1596 | ip_vs_use_count_inc(); |
@@ -924,16 +1608,18 @@ out: | |||
924 | } | 1608 | } |
925 | 1609 | ||
926 | 1610 | ||
927 | int stop_sync_thread(int state) | 1611 | int stop_sync_thread(struct net *net, int state) |
928 | { | 1612 | { |
1613 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
1614 | |||
929 | IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); | 1615 | IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); |
930 | 1616 | ||
931 | if (state == IP_VS_STATE_MASTER) { | 1617 | if (state == IP_VS_STATE_MASTER) { |
932 | if (!sync_master_thread) | 1618 | if (!ipvs->master_thread) |
933 | return -ESRCH; | 1619 | return -ESRCH; |
934 | 1620 | ||
935 | pr_info("stopping master sync thread %d ...\n", | 1621 | pr_info("stopping master sync thread %d ...\n", |
936 | task_pid_nr(sync_master_thread)); | 1622 | task_pid_nr(ipvs->master_thread)); |
937 | 1623 | ||
938 | /* | 1624 | /* |
939 | * The lock synchronizes with sb_queue_tail(), so that we don't | 1625 | * The lock synchronizes with sb_queue_tail(), so that we don't |
@@ -941,21 +1627,21 @@ int stop_sync_thread(int state) | |||
941 | * progress of stopping the master sync daemon. | 1627 | * progress of stopping the master sync daemon. |
942 | */ | 1628 | */ |
943 | 1629 | ||
944 | spin_lock_bh(&ip_vs_sync_lock); | 1630 | spin_lock_bh(&ipvs->sync_lock); |
945 | ip_vs_sync_state &= ~IP_VS_STATE_MASTER; | 1631 | ipvs->sync_state &= ~IP_VS_STATE_MASTER; |
946 | spin_unlock_bh(&ip_vs_sync_lock); | 1632 | spin_unlock_bh(&ipvs->sync_lock); |
947 | kthread_stop(sync_master_thread); | 1633 | kthread_stop(ipvs->master_thread); |
948 | sync_master_thread = NULL; | 1634 | ipvs->master_thread = NULL; |
949 | } else if (state == IP_VS_STATE_BACKUP) { | 1635 | } else if (state == IP_VS_STATE_BACKUP) { |
950 | if (!sync_backup_thread) | 1636 | if (!ipvs->backup_thread) |
951 | return -ESRCH; | 1637 | return -ESRCH; |
952 | 1638 | ||
953 | pr_info("stopping backup sync thread %d ...\n", | 1639 | pr_info("stopping backup sync thread %d ...\n", |
954 | task_pid_nr(sync_backup_thread)); | 1640 | task_pid_nr(ipvs->backup_thread)); |
955 | 1641 | ||
956 | ip_vs_sync_state &= ~IP_VS_STATE_BACKUP; | 1642 | ipvs->sync_state &= ~IP_VS_STATE_BACKUP; |
957 | kthread_stop(sync_backup_thread); | 1643 | kthread_stop(ipvs->backup_thread); |
958 | sync_backup_thread = NULL; | 1644 | ipvs->backup_thread = NULL; |
959 | } else { | 1645 | } else { |
960 | return -EINVAL; | 1646 | return -EINVAL; |
961 | } | 1647 | } |
@@ -965,3 +1651,42 @@ int stop_sync_thread(int state) | |||
965 | 1651 | ||
966 | return 0; | 1652 | return 0; |
967 | } | 1653 | } |
1654 | |||
1655 | /* | ||
1656 | * Initialize data struct for each netns | ||
1657 | */ | ||
1658 | static int __net_init __ip_vs_sync_init(struct net *net) | ||
1659 | { | ||
1660 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
1661 | |||
1662 | INIT_LIST_HEAD(&ipvs->sync_queue); | ||
1663 | spin_lock_init(&ipvs->sync_lock); | ||
1664 | spin_lock_init(&ipvs->sync_buff_lock); | ||
1665 | |||
1666 | ipvs->sync_mcast_addr.sin_family = AF_INET; | ||
1667 | ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT); | ||
1668 | ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP); | ||
1669 | return 0; | ||
1670 | } | ||
1671 | |||
1672 | static void __ip_vs_sync_cleanup(struct net *net) | ||
1673 | { | ||
1674 | stop_sync_thread(net, IP_VS_STATE_MASTER); | ||
1675 | stop_sync_thread(net, IP_VS_STATE_BACKUP); | ||
1676 | } | ||
1677 | |||
1678 | static struct pernet_operations ipvs_sync_ops = { | ||
1679 | .init = __ip_vs_sync_init, | ||
1680 | .exit = __ip_vs_sync_cleanup, | ||
1681 | }; | ||
1682 | |||
1683 | |||
1684 | int __init ip_vs_sync_init(void) | ||
1685 | { | ||
1686 | return register_pernet_subsys(&ipvs_sync_ops); | ||
1687 | } | ||
1688 | |||
1689 | void __exit ip_vs_sync_cleanup(void) | ||
1690 | { | ||
1691 | unregister_pernet_subsys(&ipvs_sync_ops); | ||
1692 | } | ||
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 5325a3fbe4ac..1f2a4e35fb11 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c | |||
@@ -175,7 +175,6 @@ __ip_vs_reroute_locally(struct sk_buff *skb) | |||
175 | .fl4_tos = RT_TOS(iph->tos), | 175 | .fl4_tos = RT_TOS(iph->tos), |
176 | .mark = skb->mark, | 176 | .mark = skb->mark, |
177 | }; | 177 | }; |
178 | struct rtable *rt; | ||
179 | 178 | ||
180 | if (ip_route_output_key(net, &rt, &fl)) | 179 | if (ip_route_output_key(net, &rt, &fl)) |
181 | return 0; | 180 | return 0; |
@@ -390,7 +389,8 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
390 | 389 | ||
391 | /* MTU checking */ | 390 | /* MTU checking */ |
392 | mtu = dst_mtu(&rt->dst); | 391 | mtu = dst_mtu(&rt->dst); |
393 | if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { | 392 | if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && |
393 | !skb_is_gso(skb)) { | ||
394 | ip_rt_put(rt); | 394 | ip_rt_put(rt); |
395 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | 395 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); |
396 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 396 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
@@ -443,7 +443,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
443 | 443 | ||
444 | /* MTU checking */ | 444 | /* MTU checking */ |
445 | mtu = dst_mtu(&rt->dst); | 445 | mtu = dst_mtu(&rt->dst); |
446 | if (skb->len > mtu) { | 446 | if (skb->len > mtu && !skb_is_gso(skb)) { |
447 | if (!skb->dev) { | 447 | if (!skb->dev) { |
448 | struct net *net = dev_net(skb_dst(skb)->dev); | 448 | struct net *net = dev_net(skb_dst(skb)->dev); |
449 | 449 | ||
@@ -543,7 +543,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
543 | 543 | ||
544 | /* MTU checking */ | 544 | /* MTU checking */ |
545 | mtu = dst_mtu(&rt->dst); | 545 | mtu = dst_mtu(&rt->dst); |
546 | if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { | 546 | if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && |
547 | !skb_is_gso(skb)) { | ||
547 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | 548 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); |
548 | IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0, | 549 | IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0, |
549 | "ip_vs_nat_xmit(): frag needed for"); | 550 | "ip_vs_nat_xmit(): frag needed for"); |
@@ -658,7 +659,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
658 | 659 | ||
659 | /* MTU checking */ | 660 | /* MTU checking */ |
660 | mtu = dst_mtu(&rt->dst); | 661 | mtu = dst_mtu(&rt->dst); |
661 | if (skb->len > mtu) { | 662 | if (skb->len > mtu && !skb_is_gso(skb)) { |
662 | if (!skb->dev) { | 663 | if (!skb->dev) { |
663 | struct net *net = dev_net(skb_dst(skb)->dev); | 664 | struct net *net = dev_net(skb_dst(skb)->dev); |
664 | 665 | ||
@@ -773,8 +774,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
773 | 774 | ||
774 | df |= (old_iph->frag_off & htons(IP_DF)); | 775 | df |= (old_iph->frag_off & htons(IP_DF)); |
775 | 776 | ||
776 | if ((old_iph->frag_off & htons(IP_DF)) | 777 | if ((old_iph->frag_off & htons(IP_DF) && |
777 | && mtu < ntohs(old_iph->tot_len)) { | 778 | mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) { |
778 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | 779 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); |
779 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 780 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
780 | goto tx_error_put; | 781 | goto tx_error_put; |
@@ -886,7 +887,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
886 | if (skb_dst(skb)) | 887 | if (skb_dst(skb)) |
887 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); | 888 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); |
888 | 889 | ||
889 | if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { | 890 | if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) && |
891 | !skb_is_gso(skb)) { | ||
890 | if (!skb->dev) { | 892 | if (!skb->dev) { |
891 | struct net *net = dev_net(skb_dst(skb)->dev); | 893 | struct net *net = dev_net(skb_dst(skb)->dev); |
892 | 894 | ||
@@ -991,7 +993,8 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
991 | 993 | ||
992 | /* MTU checking */ | 994 | /* MTU checking */ |
993 | mtu = dst_mtu(&rt->dst); | 995 | mtu = dst_mtu(&rt->dst); |
994 | if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) { | 996 | if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu && |
997 | !skb_is_gso(skb)) { | ||
995 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | 998 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); |
996 | ip_rt_put(rt); | 999 | ip_rt_put(rt); |
997 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 1000 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
@@ -1158,7 +1161,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
1158 | 1161 | ||
1159 | /* MTU checking */ | 1162 | /* MTU checking */ |
1160 | mtu = dst_mtu(&rt->dst); | 1163 | mtu = dst_mtu(&rt->dst); |
1161 | if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { | 1164 | if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) && |
1165 | !skb_is_gso(skb)) { | ||
1162 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); | 1166 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); |
1163 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 1167 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
1164 | goto tx_error_put; | 1168 | goto tx_error_put; |
@@ -1272,7 +1276,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
1272 | 1276 | ||
1273 | /* MTU checking */ | 1277 | /* MTU checking */ |
1274 | mtu = dst_mtu(&rt->dst); | 1278 | mtu = dst_mtu(&rt->dst); |
1275 | if (skb->len > mtu) { | 1279 | if (skb->len > mtu && !skb_is_gso(skb)) { |
1276 | if (!skb->dev) { | 1280 | if (!skb->dev) { |
1277 | struct net *net = dev_net(skb_dst(skb)->dev); | 1281 | struct net *net = dev_net(skb_dst(skb)->dev); |
1278 | 1282 | ||
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c new file mode 100644 index 000000000000..4e99cca61612 --- /dev/null +++ b/net/netfilter/nf_conntrack_broadcast.c | |||
@@ -0,0 +1,82 @@ | |||
1 | /* | ||
2 | * broadcast connection tracking helper | ||
3 | * | ||
4 | * (c) 2005 Patrick McHardy <kaber@trash.net> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/ip.h> | ||
14 | #include <net/route.h> | ||
15 | #include <linux/inetdevice.h> | ||
16 | #include <linux/skbuff.h> | ||
17 | |||
18 | #include <net/netfilter/nf_conntrack.h> | ||
19 | #include <net/netfilter/nf_conntrack_helper.h> | ||
20 | #include <net/netfilter/nf_conntrack_expect.h> | ||
21 | |||
22 | int nf_conntrack_broadcast_help(struct sk_buff *skb, | ||
23 | unsigned int protoff, | ||
24 | struct nf_conn *ct, | ||
25 | enum ip_conntrack_info ctinfo, | ||
26 | unsigned int timeout) | ||
27 | { | ||
28 | struct nf_conntrack_expect *exp; | ||
29 | struct iphdr *iph = ip_hdr(skb); | ||
30 | struct rtable *rt = skb_rtable(skb); | ||
31 | struct in_device *in_dev; | ||
32 | struct nf_conn_help *help = nfct_help(ct); | ||
33 | __be32 mask = 0; | ||
34 | |||
35 | /* we're only interested in locally generated packets */ | ||
36 | if (skb->sk == NULL) | ||
37 | goto out; | ||
38 | if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST)) | ||
39 | goto out; | ||
40 | if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) | ||
41 | goto out; | ||
42 | |||
43 | rcu_read_lock(); | ||
44 | in_dev = __in_dev_get_rcu(rt->dst.dev); | ||
45 | if (in_dev != NULL) { | ||
46 | for_primary_ifa(in_dev) { | ||
47 | if (ifa->ifa_broadcast == iph->daddr) { | ||
48 | mask = ifa->ifa_mask; | ||
49 | break; | ||
50 | } | ||
51 | } endfor_ifa(in_dev); | ||
52 | } | ||
53 | rcu_read_unlock(); | ||
54 | |||
55 | if (mask == 0) | ||
56 | goto out; | ||
57 | |||
58 | exp = nf_ct_expect_alloc(ct); | ||
59 | if (exp == NULL) | ||
60 | goto out; | ||
61 | |||
62 | exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | ||
63 | exp->tuple.src.u.udp.port = help->helper->tuple.src.u.udp.port; | ||
64 | |||
65 | exp->mask.src.u3.ip = mask; | ||
66 | exp->mask.src.u.udp.port = htons(0xFFFF); | ||
67 | |||
68 | exp->expectfn = NULL; | ||
69 | exp->flags = NF_CT_EXPECT_PERMANENT; | ||
70 | exp->class = NF_CT_EXPECT_CLASS_DEFAULT; | ||
71 | exp->helper = NULL; | ||
72 | |||
73 | nf_ct_expect_related(exp); | ||
74 | nf_ct_expect_put(exp); | ||
75 | |||
76 | nf_ct_refresh(ct, skb, timeout * HZ); | ||
77 | out: | ||
78 | return NF_ACCEPT; | ||
79 | } | ||
80 | EXPORT_SYMBOL_GPL(nf_conntrack_broadcast_help); | ||
81 | |||
82 | MODULE_LICENSE("GPL"); | ||
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index e61511929c66..1909311c392a 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <net/netfilter/nf_conntrack_acct.h> | 43 | #include <net/netfilter/nf_conntrack_acct.h> |
44 | #include <net/netfilter/nf_conntrack_ecache.h> | 44 | #include <net/netfilter/nf_conntrack_ecache.h> |
45 | #include <net/netfilter/nf_conntrack_zones.h> | 45 | #include <net/netfilter/nf_conntrack_zones.h> |
46 | #include <net/netfilter/nf_conntrack_timestamp.h> | ||
46 | #include <net/netfilter/nf_nat.h> | 47 | #include <net/netfilter/nf_nat.h> |
47 | #include <net/netfilter/nf_nat_core.h> | 48 | #include <net/netfilter/nf_nat_core.h> |
48 | 49 | ||
@@ -282,6 +283,11 @@ EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list); | |||
282 | static void death_by_timeout(unsigned long ul_conntrack) | 283 | static void death_by_timeout(unsigned long ul_conntrack) |
283 | { | 284 | { |
284 | struct nf_conn *ct = (void *)ul_conntrack; | 285 | struct nf_conn *ct = (void *)ul_conntrack; |
286 | struct nf_conn_tstamp *tstamp; | ||
287 | |||
288 | tstamp = nf_conn_tstamp_find(ct); | ||
289 | if (tstamp && tstamp->stop == 0) | ||
290 | tstamp->stop = ktime_to_ns(ktime_get_real()); | ||
285 | 291 | ||
286 | if (!test_bit(IPS_DYING_BIT, &ct->status) && | 292 | if (!test_bit(IPS_DYING_BIT, &ct->status) && |
287 | unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) { | 293 | unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) { |
@@ -419,6 +425,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) | |||
419 | struct nf_conntrack_tuple_hash *h; | 425 | struct nf_conntrack_tuple_hash *h; |
420 | struct nf_conn *ct; | 426 | struct nf_conn *ct; |
421 | struct nf_conn_help *help; | 427 | struct nf_conn_help *help; |
428 | struct nf_conn_tstamp *tstamp; | ||
422 | struct hlist_nulls_node *n; | 429 | struct hlist_nulls_node *n; |
423 | enum ip_conntrack_info ctinfo; | 430 | enum ip_conntrack_info ctinfo; |
424 | struct net *net; | 431 | struct net *net; |
@@ -486,8 +493,16 @@ __nf_conntrack_confirm(struct sk_buff *skb) | |||
486 | ct->timeout.expires += jiffies; | 493 | ct->timeout.expires += jiffies; |
487 | add_timer(&ct->timeout); | 494 | add_timer(&ct->timeout); |
488 | atomic_inc(&ct->ct_general.use); | 495 | atomic_inc(&ct->ct_general.use); |
489 | set_bit(IPS_CONFIRMED_BIT, &ct->status); | 496 | ct->status |= IPS_CONFIRMED; |
497 | |||
498 | /* set conntrack timestamp, if enabled. */ | ||
499 | tstamp = nf_conn_tstamp_find(ct); | ||
500 | if (tstamp) { | ||
501 | if (skb->tstamp.tv64 == 0) | ||
502 | __net_timestamp((struct sk_buff *)skb); | ||
490 | 503 | ||
504 | tstamp->start = ktime_to_ns(skb->tstamp); | ||
505 | } | ||
491 | /* Since the lookup is lockless, hash insertion must be done after | 506 | /* Since the lookup is lockless, hash insertion must be done after |
492 | * starting the timer and setting the CONFIRMED bit. The RCU barriers | 507 | * starting the timer and setting the CONFIRMED bit. The RCU barriers |
493 | * guarantee that no other CPU can find the conntrack before the above | 508 | * guarantee that no other CPU can find the conntrack before the above |
@@ -655,7 +670,8 @@ __nf_conntrack_alloc(struct net *net, u16 zone, | |||
655 | * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged. | 670 | * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged. |
656 | */ | 671 | */ |
657 | memset(&ct->tuplehash[IP_CT_DIR_MAX], 0, | 672 | memset(&ct->tuplehash[IP_CT_DIR_MAX], 0, |
658 | sizeof(*ct) - offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX])); | 673 | offsetof(struct nf_conn, proto) - |
674 | offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX])); | ||
659 | spin_lock_init(&ct->lock); | 675 | spin_lock_init(&ct->lock); |
660 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; | 676 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; |
661 | ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; | 677 | ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; |
@@ -745,6 +761,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, | |||
745 | } | 761 | } |
746 | 762 | ||
747 | nf_ct_acct_ext_add(ct, GFP_ATOMIC); | 763 | nf_ct_acct_ext_add(ct, GFP_ATOMIC); |
764 | nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); | ||
748 | 765 | ||
749 | ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; | 766 | ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; |
750 | nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, | 767 | nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, |
@@ -1185,6 +1202,11 @@ struct __nf_ct_flush_report { | |||
1185 | static int kill_report(struct nf_conn *i, void *data) | 1202 | static int kill_report(struct nf_conn *i, void *data) |
1186 | { | 1203 | { |
1187 | struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; | 1204 | struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; |
1205 | struct nf_conn_tstamp *tstamp; | ||
1206 | |||
1207 | tstamp = nf_conn_tstamp_find(i); | ||
1208 | if (tstamp && tstamp->stop == 0) | ||
1209 | tstamp->stop = ktime_to_ns(ktime_get_real()); | ||
1188 | 1210 | ||
1189 | /* If we fail to deliver the event, death_by_timeout() will retry */ | 1211 | /* If we fail to deliver the event, death_by_timeout() will retry */ |
1190 | if (nf_conntrack_event_report(IPCT_DESTROY, i, | 1212 | if (nf_conntrack_event_report(IPCT_DESTROY, i, |
@@ -1201,9 +1223,9 @@ static int kill_all(struct nf_conn *i, void *data) | |||
1201 | return 1; | 1223 | return 1; |
1202 | } | 1224 | } |
1203 | 1225 | ||
1204 | void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size) | 1226 | void nf_ct_free_hashtable(void *hash, unsigned int size) |
1205 | { | 1227 | { |
1206 | if (vmalloced) | 1228 | if (is_vmalloc_addr(hash)) |
1207 | vfree(hash); | 1229 | vfree(hash); |
1208 | else | 1230 | else |
1209 | free_pages((unsigned long)hash, | 1231 | free_pages((unsigned long)hash, |
@@ -1270,8 +1292,7 @@ static void nf_conntrack_cleanup_net(struct net *net) | |||
1270 | goto i_see_dead_people; | 1292 | goto i_see_dead_people; |
1271 | } | 1293 | } |
1272 | 1294 | ||
1273 | nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, | 1295 | nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); |
1274 | net->ct.htable_size); | ||
1275 | nf_conntrack_ecache_fini(net); | 1296 | nf_conntrack_ecache_fini(net); |
1276 | nf_conntrack_acct_fini(net); | 1297 | nf_conntrack_acct_fini(net); |
1277 | nf_conntrack_expect_fini(net); | 1298 | nf_conntrack_expect_fini(net); |
@@ -1300,21 +1321,18 @@ void nf_conntrack_cleanup(struct net *net) | |||
1300 | } | 1321 | } |
1301 | } | 1322 | } |
1302 | 1323 | ||
1303 | void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls) | 1324 | void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) |
1304 | { | 1325 | { |
1305 | struct hlist_nulls_head *hash; | 1326 | struct hlist_nulls_head *hash; |
1306 | unsigned int nr_slots, i; | 1327 | unsigned int nr_slots, i; |
1307 | size_t sz; | 1328 | size_t sz; |
1308 | 1329 | ||
1309 | *vmalloced = 0; | ||
1310 | |||
1311 | BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); | 1330 | BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); |
1312 | nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); | 1331 | nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); |
1313 | sz = nr_slots * sizeof(struct hlist_nulls_head); | 1332 | sz = nr_slots * sizeof(struct hlist_nulls_head); |
1314 | hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, | 1333 | hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, |
1315 | get_order(sz)); | 1334 | get_order(sz)); |
1316 | if (!hash) { | 1335 | if (!hash) { |
1317 | *vmalloced = 1; | ||
1318 | printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); | 1336 | printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); |
1319 | hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, | 1337 | hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, |
1320 | PAGE_KERNEL); | 1338 | PAGE_KERNEL); |
@@ -1330,7 +1348,7 @@ EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable); | |||
1330 | 1348 | ||
1331 | int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) | 1349 | int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) |
1332 | { | 1350 | { |
1333 | int i, bucket, vmalloced, old_vmalloced; | 1351 | int i, bucket; |
1334 | unsigned int hashsize, old_size; | 1352 | unsigned int hashsize, old_size; |
1335 | struct hlist_nulls_head *hash, *old_hash; | 1353 | struct hlist_nulls_head *hash, *old_hash; |
1336 | struct nf_conntrack_tuple_hash *h; | 1354 | struct nf_conntrack_tuple_hash *h; |
@@ -1347,7 +1365,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) | |||
1347 | if (!hashsize) | 1365 | if (!hashsize) |
1348 | return -EINVAL; | 1366 | return -EINVAL; |
1349 | 1367 | ||
1350 | hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced, 1); | 1368 | hash = nf_ct_alloc_hashtable(&hashsize, 1); |
1351 | if (!hash) | 1369 | if (!hash) |
1352 | return -ENOMEM; | 1370 | return -ENOMEM; |
1353 | 1371 | ||
@@ -1369,15 +1387,13 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) | |||
1369 | } | 1387 | } |
1370 | } | 1388 | } |
1371 | old_size = init_net.ct.htable_size; | 1389 | old_size = init_net.ct.htable_size; |
1372 | old_vmalloced = init_net.ct.hash_vmalloc; | ||
1373 | old_hash = init_net.ct.hash; | 1390 | old_hash = init_net.ct.hash; |
1374 | 1391 | ||
1375 | init_net.ct.htable_size = nf_conntrack_htable_size = hashsize; | 1392 | init_net.ct.htable_size = nf_conntrack_htable_size = hashsize; |
1376 | init_net.ct.hash_vmalloc = vmalloced; | ||
1377 | init_net.ct.hash = hash; | 1393 | init_net.ct.hash = hash; |
1378 | spin_unlock_bh(&nf_conntrack_lock); | 1394 | spin_unlock_bh(&nf_conntrack_lock); |
1379 | 1395 | ||
1380 | nf_ct_free_hashtable(old_hash, old_vmalloced, old_size); | 1396 | nf_ct_free_hashtable(old_hash, old_size); |
1381 | return 0; | 1397 | return 0; |
1382 | } | 1398 | } |
1383 | EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); | 1399 | EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); |
@@ -1490,8 +1506,7 @@ static int nf_conntrack_init_net(struct net *net) | |||
1490 | } | 1506 | } |
1491 | 1507 | ||
1492 | net->ct.htable_size = nf_conntrack_htable_size; | 1508 | net->ct.htable_size = nf_conntrack_htable_size; |
1493 | net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, | 1509 | net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1); |
1494 | &net->ct.hash_vmalloc, 1); | ||
1495 | if (!net->ct.hash) { | 1510 | if (!net->ct.hash) { |
1496 | ret = -ENOMEM; | 1511 | ret = -ENOMEM; |
1497 | printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); | 1512 | printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); |
@@ -1503,6 +1518,9 @@ static int nf_conntrack_init_net(struct net *net) | |||
1503 | ret = nf_conntrack_acct_init(net); | 1518 | ret = nf_conntrack_acct_init(net); |
1504 | if (ret < 0) | 1519 | if (ret < 0) |
1505 | goto err_acct; | 1520 | goto err_acct; |
1521 | ret = nf_conntrack_tstamp_init(net); | ||
1522 | if (ret < 0) | ||
1523 | goto err_tstamp; | ||
1506 | ret = nf_conntrack_ecache_init(net); | 1524 | ret = nf_conntrack_ecache_init(net); |
1507 | if (ret < 0) | 1525 | if (ret < 0) |
1508 | goto err_ecache; | 1526 | goto err_ecache; |
@@ -1510,12 +1528,13 @@ static int nf_conntrack_init_net(struct net *net) | |||
1510 | return 0; | 1528 | return 0; |
1511 | 1529 | ||
1512 | err_ecache: | 1530 | err_ecache: |
1531 | nf_conntrack_tstamp_fini(net); | ||
1532 | err_tstamp: | ||
1513 | nf_conntrack_acct_fini(net); | 1533 | nf_conntrack_acct_fini(net); |
1514 | err_acct: | 1534 | err_acct: |
1515 | nf_conntrack_expect_fini(net); | 1535 | nf_conntrack_expect_fini(net); |
1516 | err_expect: | 1536 | err_expect: |
1517 | nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, | 1537 | nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); |
1518 | net->ct.htable_size); | ||
1519 | err_hash: | 1538 | err_hash: |
1520 | kmem_cache_destroy(net->ct.nf_conntrack_cachep); | 1539 | kmem_cache_destroy(net->ct.nf_conntrack_cachep); |
1521 | err_cache: | 1540 | err_cache: |
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index a20fb0bd1efe..cd1e8e0970f2 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c | |||
@@ -319,7 +319,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) | |||
319 | const struct nf_conntrack_expect_policy *p; | 319 | const struct nf_conntrack_expect_policy *p; |
320 | unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); | 320 | unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); |
321 | 321 | ||
322 | atomic_inc(&exp->use); | 322 | /* two references : one for hash insert, one for the timer */ |
323 | atomic_add(2, &exp->use); | ||
323 | 324 | ||
324 | if (master_help) { | 325 | if (master_help) { |
325 | hlist_add_head(&exp->lnode, &master_help->expectations); | 326 | hlist_add_head(&exp->lnode, &master_help->expectations); |
@@ -333,12 +334,14 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) | |||
333 | setup_timer(&exp->timeout, nf_ct_expectation_timed_out, | 334 | setup_timer(&exp->timeout, nf_ct_expectation_timed_out, |
334 | (unsigned long)exp); | 335 | (unsigned long)exp); |
335 | if (master_help) { | 336 | if (master_help) { |
336 | p = &master_help->helper->expect_policy[exp->class]; | 337 | p = &rcu_dereference_protected( |
338 | master_help->helper, | ||
339 | lockdep_is_held(&nf_conntrack_lock) | ||
340 | )->expect_policy[exp->class]; | ||
337 | exp->timeout.expires = jiffies + p->timeout * HZ; | 341 | exp->timeout.expires = jiffies + p->timeout * HZ; |
338 | } | 342 | } |
339 | add_timer(&exp->timeout); | 343 | add_timer(&exp->timeout); |
340 | 344 | ||
341 | atomic_inc(&exp->use); | ||
342 | NF_CT_STAT_INC(net, expect_create); | 345 | NF_CT_STAT_INC(net, expect_create); |
343 | } | 346 | } |
344 | 347 | ||
@@ -369,7 +372,10 @@ static inline int refresh_timer(struct nf_conntrack_expect *i) | |||
369 | if (!del_timer(&i->timeout)) | 372 | if (!del_timer(&i->timeout)) |
370 | return 0; | 373 | return 0; |
371 | 374 | ||
372 | p = &master_help->helper->expect_policy[i->class]; | 375 | p = &rcu_dereference_protected( |
376 | master_help->helper, | ||
377 | lockdep_is_held(&nf_conntrack_lock) | ||
378 | )->expect_policy[i->class]; | ||
373 | i->timeout.expires = jiffies + p->timeout * HZ; | 379 | i->timeout.expires = jiffies + p->timeout * HZ; |
374 | add_timer(&i->timeout); | 380 | add_timer(&i->timeout); |
375 | return 1; | 381 | return 1; |
@@ -407,7 +413,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) | |||
407 | } | 413 | } |
408 | /* Will be over limit? */ | 414 | /* Will be over limit? */ |
409 | if (master_help) { | 415 | if (master_help) { |
410 | p = &master_help->helper->expect_policy[expect->class]; | 416 | p = &rcu_dereference_protected( |
417 | master_help->helper, | ||
418 | lockdep_is_held(&nf_conntrack_lock) | ||
419 | )->expect_policy[expect->class]; | ||
411 | if (p->max_expected && | 420 | if (p->max_expected && |
412 | master_help->expecting[expect->class] >= p->max_expected) { | 421 | master_help->expecting[expect->class] >= p->max_expected) { |
413 | evict_oldest_expect(master, expect); | 422 | evict_oldest_expect(master, expect); |
@@ -478,7 +487,7 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) | |||
478 | struct hlist_node *n; | 487 | struct hlist_node *n; |
479 | 488 | ||
480 | for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { | 489 | for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { |
481 | n = rcu_dereference(net->ct.expect_hash[st->bucket].first); | 490 | n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket])); |
482 | if (n) | 491 | if (n) |
483 | return n; | 492 | return n; |
484 | } | 493 | } |
@@ -491,11 +500,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq, | |||
491 | struct net *net = seq_file_net(seq); | 500 | struct net *net = seq_file_net(seq); |
492 | struct ct_expect_iter_state *st = seq->private; | 501 | struct ct_expect_iter_state *st = seq->private; |
493 | 502 | ||
494 | head = rcu_dereference(head->next); | 503 | head = rcu_dereference(hlist_next_rcu(head)); |
495 | while (head == NULL) { | 504 | while (head == NULL) { |
496 | if (++st->bucket >= nf_ct_expect_hsize) | 505 | if (++st->bucket >= nf_ct_expect_hsize) |
497 | return NULL; | 506 | return NULL; |
498 | head = rcu_dereference(net->ct.expect_hash[st->bucket].first); | 507 | head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket])); |
499 | } | 508 | } |
500 | return head; | 509 | return head; |
501 | } | 510 | } |
@@ -630,8 +639,7 @@ int nf_conntrack_expect_init(struct net *net) | |||
630 | } | 639 | } |
631 | 640 | ||
632 | net->ct.expect_count = 0; | 641 | net->ct.expect_count = 0; |
633 | net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, | 642 | net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0); |
634 | &net->ct.expect_vmalloc, 0); | ||
635 | if (net->ct.expect_hash == NULL) | 643 | if (net->ct.expect_hash == NULL) |
636 | goto err1; | 644 | goto err1; |
637 | 645 | ||
@@ -653,8 +661,7 @@ err3: | |||
653 | if (net_eq(net, &init_net)) | 661 | if (net_eq(net, &init_net)) |
654 | kmem_cache_destroy(nf_ct_expect_cachep); | 662 | kmem_cache_destroy(nf_ct_expect_cachep); |
655 | err2: | 663 | err2: |
656 | nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, | 664 | nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize); |
657 | nf_ct_expect_hsize); | ||
658 | err1: | 665 | err1: |
659 | return err; | 666 | return err; |
660 | } | 667 | } |
@@ -666,6 +673,5 @@ void nf_conntrack_expect_fini(struct net *net) | |||
666 | rcu_barrier(); /* Wait for call_rcu() before destroy */ | 673 | rcu_barrier(); /* Wait for call_rcu() before destroy */ |
667 | kmem_cache_destroy(nf_ct_expect_cachep); | 674 | kmem_cache_destroy(nf_ct_expect_cachep); |
668 | } | 675 | } |
669 | nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, | 676 | nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize); |
670 | nf_ct_expect_hsize); | ||
671 | } | 677 | } |
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index bd82450c193f..80a23ed62bb0 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c | |||
@@ -140,15 +140,16 @@ static void update_alloc_size(struct nf_ct_ext_type *type) | |||
140 | /* This assumes that extended areas in conntrack for the types | 140 | /* This assumes that extended areas in conntrack for the types |
141 | whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */ | 141 | whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */ |
142 | for (i = min; i <= max; i++) { | 142 | for (i = min; i <= max; i++) { |
143 | t1 = nf_ct_ext_types[i]; | 143 | t1 = rcu_dereference_protected(nf_ct_ext_types[i], |
144 | lockdep_is_held(&nf_ct_ext_type_mutex)); | ||
144 | if (!t1) | 145 | if (!t1) |
145 | continue; | 146 | continue; |
146 | 147 | ||
147 | t1->alloc_size = sizeof(struct nf_ct_ext) | 148 | t1->alloc_size = ALIGN(sizeof(struct nf_ct_ext), t1->align) + |
148 | + ALIGN(sizeof(struct nf_ct_ext), t1->align) | 149 | t1->len; |
149 | + t1->len; | ||
150 | for (j = 0; j < NF_CT_EXT_NUM; j++) { | 150 | for (j = 0; j < NF_CT_EXT_NUM; j++) { |
151 | t2 = nf_ct_ext_types[j]; | 151 | t2 = rcu_dereference_protected(nf_ct_ext_types[j], |
152 | lockdep_is_held(&nf_ct_ext_type_mutex)); | ||
152 | if (t2 == NULL || t2 == t1 || | 153 | if (t2 == NULL || t2 == t1 || |
153 | (t2->flags & NF_CT_EXT_F_PREALLOC) == 0) | 154 | (t2->flags & NF_CT_EXT_F_PREALLOC) == 0) |
154 | continue; | 155 | continue; |
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 59e1a4cd4e8b..1bdfea357955 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c | |||
@@ -33,7 +33,6 @@ static DEFINE_MUTEX(nf_ct_helper_mutex); | |||
33 | static struct hlist_head *nf_ct_helper_hash __read_mostly; | 33 | static struct hlist_head *nf_ct_helper_hash __read_mostly; |
34 | static unsigned int nf_ct_helper_hsize __read_mostly; | 34 | static unsigned int nf_ct_helper_hsize __read_mostly; |
35 | static unsigned int nf_ct_helper_count __read_mostly; | 35 | static unsigned int nf_ct_helper_count __read_mostly; |
36 | static int nf_ct_helper_vmalloc; | ||
37 | 36 | ||
38 | 37 | ||
39 | /* Stupid hash, but collision free for the default registrations of the | 38 | /* Stupid hash, but collision free for the default registrations of the |
@@ -158,7 +157,10 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i, | |||
158 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); | 157 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); |
159 | struct nf_conn_help *help = nfct_help(ct); | 158 | struct nf_conn_help *help = nfct_help(ct); |
160 | 159 | ||
161 | if (help && help->helper == me) { | 160 | if (help && rcu_dereference_protected( |
161 | help->helper, | ||
162 | lockdep_is_held(&nf_conntrack_lock) | ||
163 | ) == me) { | ||
162 | nf_conntrack_event(IPCT_HELPER, ct); | 164 | nf_conntrack_event(IPCT_HELPER, ct); |
163 | rcu_assign_pointer(help->helper, NULL); | 165 | rcu_assign_pointer(help->helper, NULL); |
164 | } | 166 | } |
@@ -210,7 +212,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, | |||
210 | hlist_for_each_entry_safe(exp, n, next, | 212 | hlist_for_each_entry_safe(exp, n, next, |
211 | &net->ct.expect_hash[i], hnode) { | 213 | &net->ct.expect_hash[i], hnode) { |
212 | struct nf_conn_help *help = nfct_help(exp->master); | 214 | struct nf_conn_help *help = nfct_help(exp->master); |
213 | if ((help->helper == me || exp->helper == me) && | 215 | if ((rcu_dereference_protected( |
216 | help->helper, | ||
217 | lockdep_is_held(&nf_conntrack_lock) | ||
218 | ) == me || exp->helper == me) && | ||
214 | del_timer(&exp->timeout)) { | 219 | del_timer(&exp->timeout)) { |
215 | nf_ct_unlink_expect(exp); | 220 | nf_ct_unlink_expect(exp); |
216 | nf_ct_expect_put(exp); | 221 | nf_ct_expect_put(exp); |
@@ -261,8 +266,7 @@ int nf_conntrack_helper_init(void) | |||
261 | int err; | 266 | int err; |
262 | 267 | ||
263 | nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ | 268 | nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ |
264 | nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, | 269 | nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0); |
265 | &nf_ct_helper_vmalloc, 0); | ||
266 | if (!nf_ct_helper_hash) | 270 | if (!nf_ct_helper_hash) |
267 | return -ENOMEM; | 271 | return -ENOMEM; |
268 | 272 | ||
@@ -273,14 +277,12 @@ int nf_conntrack_helper_init(void) | |||
273 | return 0; | 277 | return 0; |
274 | 278 | ||
275 | err1: | 279 | err1: |
276 | nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc, | 280 | nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); |
277 | nf_ct_helper_hsize); | ||
278 | return err; | 281 | return err; |
279 | } | 282 | } |
280 | 283 | ||
281 | void nf_conntrack_helper_fini(void) | 284 | void nf_conntrack_helper_fini(void) |
282 | { | 285 | { |
283 | nf_ct_extend_unregister(&helper_extend); | 286 | nf_ct_extend_unregister(&helper_extend); |
284 | nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc, | 287 | nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); |
285 | nf_ct_helper_hsize); | ||
286 | } | 288 | } |
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index aadde018a072..4c8f30a3d6d2 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c | |||
@@ -18,14 +18,7 @@ | |||
18 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
19 | #include <linux/module.h> | 19 | #include <linux/module.h> |
20 | #include <linux/init.h> | 20 | #include <linux/init.h> |
21 | #include <linux/skbuff.h> | ||
22 | #include <linux/netdevice.h> | ||
23 | #include <linux/inetdevice.h> | ||
24 | #include <linux/if_addr.h> | ||
25 | #include <linux/in.h> | 21 | #include <linux/in.h> |
26 | #include <linux/ip.h> | ||
27 | #include <linux/netfilter.h> | ||
28 | #include <net/route.h> | ||
29 | 22 | ||
30 | #include <net/netfilter/nf_conntrack.h> | 23 | #include <net/netfilter/nf_conntrack.h> |
31 | #include <net/netfilter/nf_conntrack_helper.h> | 24 | #include <net/netfilter/nf_conntrack_helper.h> |
@@ -40,75 +33,26 @@ MODULE_ALIAS("ip_conntrack_netbios_ns"); | |||
40 | MODULE_ALIAS_NFCT_HELPER("netbios_ns"); | 33 | MODULE_ALIAS_NFCT_HELPER("netbios_ns"); |
41 | 34 | ||
42 | static unsigned int timeout __read_mostly = 3; | 35 | static unsigned int timeout __read_mostly = 3; |
43 | module_param(timeout, uint, 0400); | 36 | module_param(timeout, uint, S_IRUSR); |
44 | MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); | 37 | MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); |
45 | 38 | ||
46 | static int help(struct sk_buff *skb, unsigned int protoff, | ||
47 | struct nf_conn *ct, enum ip_conntrack_info ctinfo) | ||
48 | { | ||
49 | struct nf_conntrack_expect *exp; | ||
50 | struct iphdr *iph = ip_hdr(skb); | ||
51 | struct rtable *rt = skb_rtable(skb); | ||
52 | struct in_device *in_dev; | ||
53 | __be32 mask = 0; | ||
54 | |||
55 | /* we're only interested in locally generated packets */ | ||
56 | if (skb->sk == NULL) | ||
57 | goto out; | ||
58 | if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST)) | ||
59 | goto out; | ||
60 | if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) | ||
61 | goto out; | ||
62 | |||
63 | rcu_read_lock(); | ||
64 | in_dev = __in_dev_get_rcu(rt->dst.dev); | ||
65 | if (in_dev != NULL) { | ||
66 | for_primary_ifa(in_dev) { | ||
67 | if (ifa->ifa_broadcast == iph->daddr) { | ||
68 | mask = ifa->ifa_mask; | ||
69 | break; | ||
70 | } | ||
71 | } endfor_ifa(in_dev); | ||
72 | } | ||
73 | rcu_read_unlock(); | ||
74 | |||
75 | if (mask == 0) | ||
76 | goto out; | ||
77 | |||
78 | exp = nf_ct_expect_alloc(ct); | ||
79 | if (exp == NULL) | ||
80 | goto out; | ||
81 | |||
82 | exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | ||
83 | exp->tuple.src.u.udp.port = htons(NMBD_PORT); | ||
84 | |||
85 | exp->mask.src.u3.ip = mask; | ||
86 | exp->mask.src.u.udp.port = htons(0xFFFF); | ||
87 | |||
88 | exp->expectfn = NULL; | ||
89 | exp->flags = NF_CT_EXPECT_PERMANENT; | ||
90 | exp->class = NF_CT_EXPECT_CLASS_DEFAULT; | ||
91 | exp->helper = NULL; | ||
92 | |||
93 | nf_ct_expect_related(exp); | ||
94 | nf_ct_expect_put(exp); | ||
95 | |||
96 | nf_ct_refresh(ct, skb, timeout * HZ); | ||
97 | out: | ||
98 | return NF_ACCEPT; | ||
99 | } | ||
100 | |||
101 | static struct nf_conntrack_expect_policy exp_policy = { | 39 | static struct nf_conntrack_expect_policy exp_policy = { |
102 | .max_expected = 1, | 40 | .max_expected = 1, |
103 | }; | 41 | }; |
104 | 42 | ||
43 | static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff, | ||
44 | struct nf_conn *ct, enum ip_conntrack_info ctinfo) | ||
45 | { | ||
46 | return nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout); | ||
47 | } | ||
48 | |||
105 | static struct nf_conntrack_helper helper __read_mostly = { | 49 | static struct nf_conntrack_helper helper __read_mostly = { |
106 | .name = "netbios-ns", | 50 | .name = "netbios-ns", |
107 | .tuple.src.l3num = AF_INET, | 51 | .tuple.src.l3num = NFPROTO_IPV4, |
108 | .tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT), | 52 | .tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT), |
109 | .tuple.dst.protonum = IPPROTO_UDP, | 53 | .tuple.dst.protonum = IPPROTO_UDP, |
110 | .me = THIS_MODULE, | 54 | .me = THIS_MODULE, |
111 | .help = help, | 55 | .help = netbios_ns_help, |
112 | .expect_policy = &exp_policy, | 56 | .expect_policy = &exp_policy, |
113 | }; | 57 | }; |
114 | 58 | ||
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 2b7eef37875c..61c73945bb94 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include <net/netfilter/nf_conntrack_tuple.h> | 42 | #include <net/netfilter/nf_conntrack_tuple.h> |
43 | #include <net/netfilter/nf_conntrack_acct.h> | 43 | #include <net/netfilter/nf_conntrack_acct.h> |
44 | #include <net/netfilter/nf_conntrack_zones.h> | 44 | #include <net/netfilter/nf_conntrack_zones.h> |
45 | #include <net/netfilter/nf_conntrack_timestamp.h> | ||
45 | #ifdef CONFIG_NF_NAT_NEEDED | 46 | #ifdef CONFIG_NF_NAT_NEEDED |
46 | #include <net/netfilter/nf_nat_core.h> | 47 | #include <net/netfilter/nf_nat_core.h> |
47 | #include <net/netfilter/nf_nat_protocol.h> | 48 | #include <net/netfilter/nf_nat_protocol.h> |
@@ -230,6 +231,33 @@ nla_put_failure: | |||
230 | return -1; | 231 | return -1; |
231 | } | 232 | } |
232 | 233 | ||
234 | static int | ||
235 | ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct) | ||
236 | { | ||
237 | struct nlattr *nest_count; | ||
238 | const struct nf_conn_tstamp *tstamp; | ||
239 | |||
240 | tstamp = nf_conn_tstamp_find(ct); | ||
241 | if (!tstamp) | ||
242 | return 0; | ||
243 | |||
244 | nest_count = nla_nest_start(skb, CTA_TIMESTAMP | NLA_F_NESTED); | ||
245 | if (!nest_count) | ||
246 | goto nla_put_failure; | ||
247 | |||
248 | NLA_PUT_BE64(skb, CTA_TIMESTAMP_START, cpu_to_be64(tstamp->start)); | ||
249 | if (tstamp->stop != 0) { | ||
250 | NLA_PUT_BE64(skb, CTA_TIMESTAMP_STOP, | ||
251 | cpu_to_be64(tstamp->stop)); | ||
252 | } | ||
253 | nla_nest_end(skb, nest_count); | ||
254 | |||
255 | return 0; | ||
256 | |||
257 | nla_put_failure: | ||
258 | return -1; | ||
259 | } | ||
260 | |||
233 | #ifdef CONFIG_NF_CONNTRACK_MARK | 261 | #ifdef CONFIG_NF_CONNTRACK_MARK |
234 | static inline int | 262 | static inline int |
235 | ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) | 263 | ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) |
@@ -404,6 +432,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, | |||
404 | ctnetlink_dump_timeout(skb, ct) < 0 || | 432 | ctnetlink_dump_timeout(skb, ct) < 0 || |
405 | ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || | 433 | ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || |
406 | ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || | 434 | ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || |
435 | ctnetlink_dump_timestamp(skb, ct) < 0 || | ||
407 | ctnetlink_dump_protoinfo(skb, ct) < 0 || | 436 | ctnetlink_dump_protoinfo(skb, ct) < 0 || |
408 | ctnetlink_dump_helpinfo(skb, ct) < 0 || | 437 | ctnetlink_dump_helpinfo(skb, ct) < 0 || |
409 | ctnetlink_dump_mark(skb, ct) < 0 || | 438 | ctnetlink_dump_mark(skb, ct) < 0 || |
@@ -471,6 +500,18 @@ ctnetlink_secctx_size(const struct nf_conn *ct) | |||
471 | } | 500 | } |
472 | 501 | ||
473 | static inline size_t | 502 | static inline size_t |
503 | ctnetlink_timestamp_size(const struct nf_conn *ct) | ||
504 | { | ||
505 | #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP | ||
506 | if (!nf_ct_ext_exist(ct, NF_CT_EXT_TSTAMP)) | ||
507 | return 0; | ||
508 | return nla_total_size(0) + 2 * nla_total_size(sizeof(uint64_t)); | ||
509 | #else | ||
510 | return 0; | ||
511 | #endif | ||
512 | } | ||
513 | |||
514 | static inline size_t | ||
474 | ctnetlink_nlmsg_size(const struct nf_conn *ct) | 515 | ctnetlink_nlmsg_size(const struct nf_conn *ct) |
475 | { | 516 | { |
476 | return NLMSG_ALIGN(sizeof(struct nfgenmsg)) | 517 | return NLMSG_ALIGN(sizeof(struct nfgenmsg)) |
@@ -481,6 +522,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) | |||
481 | + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */ | 522 | + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */ |
482 | + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */ | 523 | + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */ |
483 | + ctnetlink_counters_size(ct) | 524 | + ctnetlink_counters_size(ct) |
525 | + ctnetlink_timestamp_size(ct) | ||
484 | + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */ | 526 | + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */ |
485 | + nla_total_size(0) /* CTA_PROTOINFO */ | 527 | + nla_total_size(0) /* CTA_PROTOINFO */ |
486 | + nla_total_size(0) /* CTA_HELP */ | 528 | + nla_total_size(0) /* CTA_HELP */ |
@@ -571,7 +613,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) | |||
571 | 613 | ||
572 | if (events & (1 << IPCT_DESTROY)) { | 614 | if (events & (1 << IPCT_DESTROY)) { |
573 | if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || | 615 | if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || |
574 | ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) | 616 | ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || |
617 | ctnetlink_dump_timestamp(skb, ct) < 0) | ||
575 | goto nla_put_failure; | 618 | goto nla_put_failure; |
576 | } else { | 619 | } else { |
577 | if (ctnetlink_dump_timeout(skb, ct) < 0) | 620 | if (ctnetlink_dump_timeout(skb, ct) < 0) |
@@ -1357,6 +1400,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, | |||
1357 | } | 1400 | } |
1358 | 1401 | ||
1359 | nf_ct_acct_ext_add(ct, GFP_ATOMIC); | 1402 | nf_ct_acct_ext_add(ct, GFP_ATOMIC); |
1403 | nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); | ||
1360 | nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); | 1404 | nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); |
1361 | /* we must add conntrack extensions before confirmation. */ | 1405 | /* we must add conntrack extensions before confirmation. */ |
1362 | ct->status |= IPS_CONFIRMED; | 1406 | ct->status |= IPS_CONFIRMED; |
@@ -1375,6 +1419,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, | |||
1375 | } | 1419 | } |
1376 | #endif | 1420 | #endif |
1377 | 1421 | ||
1422 | memset(&ct->proto, 0, sizeof(ct->proto)); | ||
1378 | if (cda[CTA_PROTOINFO]) { | 1423 | if (cda[CTA_PROTOINFO]) { |
1379 | err = ctnetlink_change_protoinfo(ct, cda); | 1424 | err = ctnetlink_change_protoinfo(ct, cda); |
1380 | if (err < 0) | 1425 | if (err < 0) |
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index dc7bb74110df..5701c8dd783c 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c | |||
@@ -166,6 +166,7 @@ static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto | |||
166 | int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) | 166 | int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) |
167 | { | 167 | { |
168 | int ret = 0; | 168 | int ret = 0; |
169 | struct nf_conntrack_l3proto *old; | ||
169 | 170 | ||
170 | if (proto->l3proto >= AF_MAX) | 171 | if (proto->l3proto >= AF_MAX) |
171 | return -EBUSY; | 172 | return -EBUSY; |
@@ -174,7 +175,9 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) | |||
174 | return -EINVAL; | 175 | return -EINVAL; |
175 | 176 | ||
176 | mutex_lock(&nf_ct_proto_mutex); | 177 | mutex_lock(&nf_ct_proto_mutex); |
177 | if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) { | 178 | old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto], |
179 | lockdep_is_held(&nf_ct_proto_mutex)); | ||
180 | if (old != &nf_conntrack_l3proto_generic) { | ||
178 | ret = -EBUSY; | 181 | ret = -EBUSY; |
179 | goto out_unlock; | 182 | goto out_unlock; |
180 | } | 183 | } |
@@ -201,7 +204,9 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) | |||
201 | BUG_ON(proto->l3proto >= AF_MAX); | 204 | BUG_ON(proto->l3proto >= AF_MAX); |
202 | 205 | ||
203 | mutex_lock(&nf_ct_proto_mutex); | 206 | mutex_lock(&nf_ct_proto_mutex); |
204 | BUG_ON(nf_ct_l3protos[proto->l3proto] != proto); | 207 | BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto], |
208 | lockdep_is_held(&nf_ct_proto_mutex) | ||
209 | ) != proto); | ||
205 | rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], | 210 | rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], |
206 | &nf_conntrack_l3proto_generic); | 211 | &nf_conntrack_l3proto_generic); |
207 | nf_ct_l3proto_unregister_sysctl(proto); | 212 | nf_ct_l3proto_unregister_sysctl(proto); |
@@ -279,7 +284,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto) | |||
279 | mutex_lock(&nf_ct_proto_mutex); | 284 | mutex_lock(&nf_ct_proto_mutex); |
280 | if (!nf_ct_protos[l4proto->l3proto]) { | 285 | if (!nf_ct_protos[l4proto->l3proto]) { |
281 | /* l3proto may be loaded latter. */ | 286 | /* l3proto may be loaded latter. */ |
282 | struct nf_conntrack_l4proto **proto_array; | 287 | struct nf_conntrack_l4proto __rcu **proto_array; |
283 | int i; | 288 | int i; |
284 | 289 | ||
285 | proto_array = kmalloc(MAX_NF_CT_PROTO * | 290 | proto_array = kmalloc(MAX_NF_CT_PROTO * |
@@ -291,7 +296,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto) | |||
291 | } | 296 | } |
292 | 297 | ||
293 | for (i = 0; i < MAX_NF_CT_PROTO; i++) | 298 | for (i = 0; i < MAX_NF_CT_PROTO; i++) |
294 | proto_array[i] = &nf_conntrack_l4proto_generic; | 299 | RCU_INIT_POINTER(proto_array[i], &nf_conntrack_l4proto_generic); |
295 | 300 | ||
296 | /* Before making proto_array visible to lockless readers, | 301 | /* Before making proto_array visible to lockless readers, |
297 | * we must make sure its content is committed to memory. | 302 | * we must make sure its content is committed to memory. |
@@ -299,8 +304,10 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto) | |||
299 | smp_wmb(); | 304 | smp_wmb(); |
300 | 305 | ||
301 | nf_ct_protos[l4proto->l3proto] = proto_array; | 306 | nf_ct_protos[l4proto->l3proto] = proto_array; |
302 | } else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != | 307 | } else if (rcu_dereference_protected( |
303 | &nf_conntrack_l4proto_generic) { | 308 | nf_ct_protos[l4proto->l3proto][l4proto->l4proto], |
309 | lockdep_is_held(&nf_ct_proto_mutex) | ||
310 | ) != &nf_conntrack_l4proto_generic) { | ||
304 | ret = -EBUSY; | 311 | ret = -EBUSY; |
305 | goto out_unlock; | 312 | goto out_unlock; |
306 | } | 313 | } |
@@ -331,7 +338,10 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) | |||
331 | BUG_ON(l4proto->l3proto >= PF_MAX); | 338 | BUG_ON(l4proto->l3proto >= PF_MAX); |
332 | 339 | ||
333 | mutex_lock(&nf_ct_proto_mutex); | 340 | mutex_lock(&nf_ct_proto_mutex); |
334 | BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto); | 341 | BUG_ON(rcu_dereference_protected( |
342 | nf_ct_protos[l4proto->l3proto][l4proto->l4proto], | ||
343 | lockdep_is_held(&nf_ct_proto_mutex) | ||
344 | ) != l4proto); | ||
335 | rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], | 345 | rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], |
336 | &nf_conntrack_l4proto_generic); | 346 | &nf_conntrack_l4proto_generic); |
337 | nf_ct_l4proto_unregister_sysctl(l4proto); | 347 | nf_ct_l4proto_unregister_sysctl(l4proto); |
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 5292560d6d4a..9ae57c57c50e 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c | |||
@@ -452,6 +452,9 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, | |||
452 | ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; | 452 | ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; |
453 | ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; | 453 | ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; |
454 | ct->proto.dccp.state = CT_DCCP_NONE; | 454 | ct->proto.dccp.state = CT_DCCP_NONE; |
455 | ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST; | ||
456 | ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL; | ||
457 | ct->proto.dccp.handshake_seq = 0; | ||
455 | return true; | 458 | return true; |
456 | 459 | ||
457 | out_invalid: | 460 | out_invalid: |
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index c6049c2d5ea8..6f4ee70f460b 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c | |||
@@ -413,6 +413,7 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, | |||
413 | test_bit(SCTP_CID_COOKIE_ACK, map)) | 413 | test_bit(SCTP_CID_COOKIE_ACK, map)) |
414 | return false; | 414 | return false; |
415 | 415 | ||
416 | memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp)); | ||
416 | new_state = SCTP_CONNTRACK_MAX; | 417 | new_state = SCTP_CONNTRACK_MAX; |
417 | for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { | 418 | for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { |
418 | /* Don't need lock here: this conntrack not in circulation yet */ | 419 | /* Don't need lock here: this conntrack not in circulation yet */ |
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 3fb2b73b24dc..6f38d0e2ea4a 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c | |||
@@ -1066,9 +1066,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, | |||
1066 | BUG_ON(th == NULL); | 1066 | BUG_ON(th == NULL); |
1067 | 1067 | ||
1068 | /* Don't need lock here: this conntrack not in circulation yet */ | 1068 | /* Don't need lock here: this conntrack not in circulation yet */ |
1069 | new_state | 1069 | new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE]; |
1070 | = tcp_conntracks[0][get_conntrack_index(th)] | ||
1071 | [TCP_CONNTRACK_NONE]; | ||
1072 | 1070 | ||
1073 | /* Invalid: delete conntrack */ | 1071 | /* Invalid: delete conntrack */ |
1074 | if (new_state >= TCP_CONNTRACK_MAX) { | 1072 | if (new_state >= TCP_CONNTRACK_MAX) { |
@@ -1077,6 +1075,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, | |||
1077 | } | 1075 | } |
1078 | 1076 | ||
1079 | if (new_state == TCP_CONNTRACK_SYN_SENT) { | 1077 | if (new_state == TCP_CONNTRACK_SYN_SENT) { |
1078 | memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp)); | ||
1080 | /* SYN packet */ | 1079 | /* SYN packet */ |
1081 | ct->proto.tcp.seen[0].td_end = | 1080 | ct->proto.tcp.seen[0].td_end = |
1082 | segment_seq_plus_len(ntohl(th->seq), skb->len, | 1081 | segment_seq_plus_len(ntohl(th->seq), skb->len, |
@@ -1088,11 +1087,11 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, | |||
1088 | ct->proto.tcp.seen[0].td_end; | 1087 | ct->proto.tcp.seen[0].td_end; |
1089 | 1088 | ||
1090 | tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]); | 1089 | tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]); |
1091 | ct->proto.tcp.seen[1].flags = 0; | ||
1092 | } else if (nf_ct_tcp_loose == 0) { | 1090 | } else if (nf_ct_tcp_loose == 0) { |
1093 | /* Don't try to pick up connections. */ | 1091 | /* Don't try to pick up connections. */ |
1094 | return false; | 1092 | return false; |
1095 | } else { | 1093 | } else { |
1094 | memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp)); | ||
1096 | /* | 1095 | /* |
1097 | * We are in the middle of a connection, | 1096 | * We are in the middle of a connection, |
1098 | * its history is lost for us. | 1097 | * its history is lost for us. |
@@ -1107,7 +1106,6 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, | |||
1107 | ct->proto.tcp.seen[0].td_maxend = | 1106 | ct->proto.tcp.seen[0].td_maxend = |
1108 | ct->proto.tcp.seen[0].td_end + | 1107 | ct->proto.tcp.seen[0].td_end + |
1109 | ct->proto.tcp.seen[0].td_maxwin; | 1108 | ct->proto.tcp.seen[0].td_maxwin; |
1110 | ct->proto.tcp.seen[0].td_scale = 0; | ||
1111 | 1109 | ||
1112 | /* We assume SACK and liberal window checking to handle | 1110 | /* We assume SACK and liberal window checking to handle |
1113 | * window scaling */ | 1111 | * window scaling */ |
@@ -1116,13 +1114,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, | |||
1116 | IP_CT_TCP_FLAG_BE_LIBERAL; | 1114 | IP_CT_TCP_FLAG_BE_LIBERAL; |
1117 | } | 1115 | } |
1118 | 1116 | ||
1119 | ct->proto.tcp.seen[1].td_end = 0; | ||
1120 | ct->proto.tcp.seen[1].td_maxend = 0; | ||
1121 | ct->proto.tcp.seen[1].td_maxwin = 0; | ||
1122 | ct->proto.tcp.seen[1].td_scale = 0; | ||
1123 | |||
1124 | /* tcp_packet will set them */ | 1117 | /* tcp_packet will set them */ |
1125 | ct->proto.tcp.state = TCP_CONNTRACK_NONE; | ||
1126 | ct->proto.tcp.last_index = TCP_NONE_SET; | 1118 | ct->proto.tcp.last_index = TCP_NONE_SET; |
1127 | 1119 | ||
1128 | pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " | 1120 | pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " |
diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c new file mode 100644 index 000000000000..6e545e26289e --- /dev/null +++ b/net/netfilter/nf_conntrack_snmp.c | |||
@@ -0,0 +1,77 @@ | |||
1 | /* | ||
2 | * SNMP service broadcast connection tracking helper | ||
3 | * | ||
4 | * (c) 2011 Jiri Olsa <jolsa@redhat.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/in.h> | ||
15 | |||
16 | #include <net/netfilter/nf_conntrack.h> | ||
17 | #include <net/netfilter/nf_conntrack_helper.h> | ||
18 | #include <net/netfilter/nf_conntrack_expect.h> | ||
19 | |||
20 | #define SNMP_PORT 161 | ||
21 | |||
22 | MODULE_AUTHOR("Jiri Olsa <jolsa@redhat.com>"); | ||
23 | MODULE_DESCRIPTION("SNMP service broadcast connection tracking helper"); | ||
24 | MODULE_LICENSE("GPL"); | ||
25 | MODULE_ALIAS_NFCT_HELPER("snmp"); | ||
26 | |||
27 | static unsigned int timeout __read_mostly = 30; | ||
28 | module_param(timeout, uint, S_IRUSR); | ||
29 | MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); | ||
30 | |||
31 | int (*nf_nat_snmp_hook)(struct sk_buff *skb, | ||
32 | unsigned int protoff, | ||
33 | struct nf_conn *ct, | ||
34 | enum ip_conntrack_info ctinfo); | ||
35 | EXPORT_SYMBOL_GPL(nf_nat_snmp_hook); | ||
36 | |||
37 | static int snmp_conntrack_help(struct sk_buff *skb, unsigned int protoff, | ||
38 | struct nf_conn *ct, enum ip_conntrack_info ctinfo) | ||
39 | { | ||
40 | typeof(nf_nat_snmp_hook) nf_nat_snmp; | ||
41 | |||
42 | nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout); | ||
43 | |||
44 | nf_nat_snmp = rcu_dereference(nf_nat_snmp_hook); | ||
45 | if (nf_nat_snmp && ct->status & IPS_NAT_MASK) | ||
46 | return nf_nat_snmp(skb, protoff, ct, ctinfo); | ||
47 | |||
48 | return NF_ACCEPT; | ||
49 | } | ||
50 | |||
51 | static struct nf_conntrack_expect_policy exp_policy = { | ||
52 | .max_expected = 1, | ||
53 | }; | ||
54 | |||
55 | static struct nf_conntrack_helper helper __read_mostly = { | ||
56 | .name = "snmp", | ||
57 | .tuple.src.l3num = NFPROTO_IPV4, | ||
58 | .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT), | ||
59 | .tuple.dst.protonum = IPPROTO_UDP, | ||
60 | .me = THIS_MODULE, | ||
61 | .help = snmp_conntrack_help, | ||
62 | .expect_policy = &exp_policy, | ||
63 | }; | ||
64 | |||
65 | static int __init nf_conntrack_snmp_init(void) | ||
66 | { | ||
67 | exp_policy.timeout = timeout; | ||
68 | return nf_conntrack_helper_register(&helper); | ||
69 | } | ||
70 | |||
71 | static void __exit nf_conntrack_snmp_fini(void) | ||
72 | { | ||
73 | nf_conntrack_helper_unregister(&helper); | ||
74 | } | ||
75 | |||
76 | module_init(nf_conntrack_snmp_init); | ||
77 | module_exit(nf_conntrack_snmp_fini); | ||
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index b4d7f0f24b27..0ae142825881 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c | |||
@@ -29,6 +29,8 @@ | |||
29 | #include <net/netfilter/nf_conntrack_helper.h> | 29 | #include <net/netfilter/nf_conntrack_helper.h> |
30 | #include <net/netfilter/nf_conntrack_acct.h> | 30 | #include <net/netfilter/nf_conntrack_acct.h> |
31 | #include <net/netfilter/nf_conntrack_zones.h> | 31 | #include <net/netfilter/nf_conntrack_zones.h> |
32 | #include <net/netfilter/nf_conntrack_timestamp.h> | ||
33 | #include <linux/rculist_nulls.h> | ||
32 | 34 | ||
33 | MODULE_LICENSE("GPL"); | 35 | MODULE_LICENSE("GPL"); |
34 | 36 | ||
@@ -45,6 +47,7 @@ EXPORT_SYMBOL_GPL(print_tuple); | |||
45 | struct ct_iter_state { | 47 | struct ct_iter_state { |
46 | struct seq_net_private p; | 48 | struct seq_net_private p; |
47 | unsigned int bucket; | 49 | unsigned int bucket; |
50 | u_int64_t time_now; | ||
48 | }; | 51 | }; |
49 | 52 | ||
50 | static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) | 53 | static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) |
@@ -56,7 +59,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) | |||
56 | for (st->bucket = 0; | 59 | for (st->bucket = 0; |
57 | st->bucket < net->ct.htable_size; | 60 | st->bucket < net->ct.htable_size; |
58 | st->bucket++) { | 61 | st->bucket++) { |
59 | n = rcu_dereference(net->ct.hash[st->bucket].first); | 62 | n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); |
60 | if (!is_a_nulls(n)) | 63 | if (!is_a_nulls(n)) |
61 | return n; | 64 | return n; |
62 | } | 65 | } |
@@ -69,13 +72,15 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, | |||
69 | struct net *net = seq_file_net(seq); | 72 | struct net *net = seq_file_net(seq); |
70 | struct ct_iter_state *st = seq->private; | 73 | struct ct_iter_state *st = seq->private; |
71 | 74 | ||
72 | head = rcu_dereference(head->next); | 75 | head = rcu_dereference(hlist_nulls_next_rcu(head)); |
73 | while (is_a_nulls(head)) { | 76 | while (is_a_nulls(head)) { |
74 | if (likely(get_nulls_value(head) == st->bucket)) { | 77 | if (likely(get_nulls_value(head) == st->bucket)) { |
75 | if (++st->bucket >= net->ct.htable_size) | 78 | if (++st->bucket >= net->ct.htable_size) |
76 | return NULL; | 79 | return NULL; |
77 | } | 80 | } |
78 | head = rcu_dereference(net->ct.hash[st->bucket].first); | 81 | head = rcu_dereference( |
82 | hlist_nulls_first_rcu( | ||
83 | &net->ct.hash[st->bucket])); | ||
79 | } | 84 | } |
80 | return head; | 85 | return head; |
81 | } | 86 | } |
@@ -93,6 +98,9 @@ static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos) | |||
93 | static void *ct_seq_start(struct seq_file *seq, loff_t *pos) | 98 | static void *ct_seq_start(struct seq_file *seq, loff_t *pos) |
94 | __acquires(RCU) | 99 | __acquires(RCU) |
95 | { | 100 | { |
101 | struct ct_iter_state *st = seq->private; | ||
102 | |||
103 | st->time_now = ktime_to_ns(ktime_get_real()); | ||
96 | rcu_read_lock(); | 104 | rcu_read_lock(); |
97 | return ct_get_idx(seq, *pos); | 105 | return ct_get_idx(seq, *pos); |
98 | } | 106 | } |
@@ -132,6 +140,34 @@ static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) | |||
132 | } | 140 | } |
133 | #endif | 141 | #endif |
134 | 142 | ||
143 | #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP | ||
144 | static int ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) | ||
145 | { | ||
146 | struct ct_iter_state *st = s->private; | ||
147 | struct nf_conn_tstamp *tstamp; | ||
148 | s64 delta_time; | ||
149 | |||
150 | tstamp = nf_conn_tstamp_find(ct); | ||
151 | if (tstamp) { | ||
152 | delta_time = st->time_now - tstamp->start; | ||
153 | if (delta_time > 0) | ||
154 | delta_time = div_s64(delta_time, NSEC_PER_SEC); | ||
155 | else | ||
156 | delta_time = 0; | ||
157 | |||
158 | return seq_printf(s, "delta-time=%llu ", | ||
159 | (unsigned long long)delta_time); | ||
160 | } | ||
161 | return 0; | ||
162 | } | ||
163 | #else | ||
164 | static inline int | ||
165 | ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) | ||
166 | { | ||
167 | return 0; | ||
168 | } | ||
169 | #endif | ||
170 | |||
135 | /* return 0 on success, 1 in case of error */ | 171 | /* return 0 on success, 1 in case of error */ |
136 | static int ct_seq_show(struct seq_file *s, void *v) | 172 | static int ct_seq_show(struct seq_file *s, void *v) |
137 | { | 173 | { |
@@ -200,6 +236,9 @@ static int ct_seq_show(struct seq_file *s, void *v) | |||
200 | goto release; | 236 | goto release; |
201 | #endif | 237 | #endif |
202 | 238 | ||
239 | if (ct_show_delta_time(s, ct)) | ||
240 | goto release; | ||
241 | |||
203 | if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) | 242 | if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) |
204 | goto release; | 243 | goto release; |
205 | 244 | ||
diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c new file mode 100644 index 000000000000..af7dd31af0a1 --- /dev/null +++ b/net/netfilter/nf_conntrack_timestamp.c | |||
@@ -0,0 +1,120 @@ | |||
1 | /* | ||
2 | * (C) 2010 Pablo Neira Ayuso <pablo@netfilter.org> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation (or any later at your option). | ||
7 | */ | ||
8 | |||
9 | #include <linux/netfilter.h> | ||
10 | #include <linux/slab.h> | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/moduleparam.h> | ||
13 | |||
14 | #include <net/netfilter/nf_conntrack.h> | ||
15 | #include <net/netfilter/nf_conntrack_extend.h> | ||
16 | #include <net/netfilter/nf_conntrack_timestamp.h> | ||
17 | |||
18 | static int nf_ct_tstamp __read_mostly; | ||
19 | |||
20 | module_param_named(tstamp, nf_ct_tstamp, bool, 0644); | ||
21 | MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping."); | ||
22 | |||
23 | #ifdef CONFIG_SYSCTL | ||
24 | static struct ctl_table tstamp_sysctl_table[] = { | ||
25 | { | ||
26 | .procname = "nf_conntrack_timestamp", | ||
27 | .data = &init_net.ct.sysctl_tstamp, | ||
28 | .maxlen = sizeof(unsigned int), | ||
29 | .mode = 0644, | ||
30 | .proc_handler = proc_dointvec, | ||
31 | }, | ||
32 | {} | ||
33 | }; | ||
34 | #endif /* CONFIG_SYSCTL */ | ||
35 | |||
36 | static struct nf_ct_ext_type tstamp_extend __read_mostly = { | ||
37 | .len = sizeof(struct nf_conn_tstamp), | ||
38 | .align = __alignof__(struct nf_conn_tstamp), | ||
39 | .id = NF_CT_EXT_TSTAMP, | ||
40 | }; | ||
41 | |||
42 | #ifdef CONFIG_SYSCTL | ||
43 | static int nf_conntrack_tstamp_init_sysctl(struct net *net) | ||
44 | { | ||
45 | struct ctl_table *table; | ||
46 | |||
47 | table = kmemdup(tstamp_sysctl_table, sizeof(tstamp_sysctl_table), | ||
48 | GFP_KERNEL); | ||
49 | if (!table) | ||
50 | goto out; | ||
51 | |||
52 | table[0].data = &net->ct.sysctl_tstamp; | ||
53 | |||
54 | net->ct.tstamp_sysctl_header = register_net_sysctl_table(net, | ||
55 | nf_net_netfilter_sysctl_path, table); | ||
56 | if (!net->ct.tstamp_sysctl_header) { | ||
57 | printk(KERN_ERR "nf_ct_tstamp: can't register to sysctl.\n"); | ||
58 | goto out_register; | ||
59 | } | ||
60 | return 0; | ||
61 | |||
62 | out_register: | ||
63 | kfree(table); | ||
64 | out: | ||
65 | return -ENOMEM; | ||
66 | } | ||
67 | |||
68 | static void nf_conntrack_tstamp_fini_sysctl(struct net *net) | ||
69 | { | ||
70 | struct ctl_table *table; | ||
71 | |||
72 | table = net->ct.tstamp_sysctl_header->ctl_table_arg; | ||
73 | unregister_net_sysctl_table(net->ct.tstamp_sysctl_header); | ||
74 | kfree(table); | ||
75 | } | ||
76 | #else | ||
77 | static int nf_conntrack_tstamp_init_sysctl(struct net *net) | ||
78 | { | ||
79 | return 0; | ||
80 | } | ||
81 | |||
82 | static void nf_conntrack_tstamp_fini_sysctl(struct net *net) | ||
83 | { | ||
84 | } | ||
85 | #endif | ||
86 | |||
87 | int nf_conntrack_tstamp_init(struct net *net) | ||
88 | { | ||
89 | int ret; | ||
90 | |||
91 | net->ct.sysctl_tstamp = nf_ct_tstamp; | ||
92 | |||
93 | if (net_eq(net, &init_net)) { | ||
94 | ret = nf_ct_extend_register(&tstamp_extend); | ||
95 | if (ret < 0) { | ||
96 | printk(KERN_ERR "nf_ct_tstamp: Unable to register " | ||
97 | "extension\n"); | ||
98 | goto out_extend_register; | ||
99 | } | ||
100 | } | ||
101 | |||
102 | ret = nf_conntrack_tstamp_init_sysctl(net); | ||
103 | if (ret < 0) | ||
104 | goto out_sysctl; | ||
105 | |||
106 | return 0; | ||
107 | |||
108 | out_sysctl: | ||
109 | if (net_eq(net, &init_net)) | ||
110 | nf_ct_extend_unregister(&tstamp_extend); | ||
111 | out_extend_register: | ||
112 | return ret; | ||
113 | } | ||
114 | |||
115 | void nf_conntrack_tstamp_fini(struct net *net) | ||
116 | { | ||
117 | nf_conntrack_tstamp_fini_sysctl(net); | ||
118 | if (net_eq(net, &init_net)) | ||
119 | nf_ct_extend_unregister(&tstamp_extend); | ||
120 | } | ||
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index b07393eab88e..20c775cff2a8 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c | |||
@@ -161,7 +161,8 @@ static int seq_show(struct seq_file *s, void *v) | |||
161 | struct nf_logger *t; | 161 | struct nf_logger *t; |
162 | int ret; | 162 | int ret; |
163 | 163 | ||
164 | logger = nf_loggers[*pos]; | 164 | logger = rcu_dereference_protected(nf_loggers[*pos], |
165 | lockdep_is_held(&nf_log_mutex)); | ||
165 | 166 | ||
166 | if (!logger) | 167 | if (!logger) |
167 | ret = seq_printf(s, "%2lld NONE (", *pos); | 168 | ret = seq_printf(s, "%2lld NONE (", *pos); |
@@ -249,7 +250,8 @@ static int nf_log_proc_dostring(ctl_table *table, int write, | |||
249 | mutex_unlock(&nf_log_mutex); | 250 | mutex_unlock(&nf_log_mutex); |
250 | } else { | 251 | } else { |
251 | mutex_lock(&nf_log_mutex); | 252 | mutex_lock(&nf_log_mutex); |
252 | logger = nf_loggers[tindex]; | 253 | logger = rcu_dereference_protected(nf_loggers[tindex], |
254 | lockdep_is_held(&nf_log_mutex)); | ||
253 | if (!logger) | 255 | if (!logger) |
254 | table->data = "NONE"; | 256 | table->data = "NONE"; |
255 | else | 257 | else |
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 74aebed5bd28..5ab22e2bbd7d 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c | |||
@@ -27,14 +27,17 @@ static DEFINE_MUTEX(queue_handler_mutex); | |||
27 | int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) | 27 | int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) |
28 | { | 28 | { |
29 | int ret; | 29 | int ret; |
30 | const struct nf_queue_handler *old; | ||
30 | 31 | ||
31 | if (pf >= ARRAY_SIZE(queue_handler)) | 32 | if (pf >= ARRAY_SIZE(queue_handler)) |
32 | return -EINVAL; | 33 | return -EINVAL; |
33 | 34 | ||
34 | mutex_lock(&queue_handler_mutex); | 35 | mutex_lock(&queue_handler_mutex); |
35 | if (queue_handler[pf] == qh) | 36 | old = rcu_dereference_protected(queue_handler[pf], |
37 | lockdep_is_held(&queue_handler_mutex)); | ||
38 | if (old == qh) | ||
36 | ret = -EEXIST; | 39 | ret = -EEXIST; |
37 | else if (queue_handler[pf]) | 40 | else if (old) |
38 | ret = -EBUSY; | 41 | ret = -EBUSY; |
39 | else { | 42 | else { |
40 | rcu_assign_pointer(queue_handler[pf], qh); | 43 | rcu_assign_pointer(queue_handler[pf], qh); |
@@ -49,11 +52,15 @@ EXPORT_SYMBOL(nf_register_queue_handler); | |||
49 | /* The caller must flush their queue before this */ | 52 | /* The caller must flush their queue before this */ |
50 | int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) | 53 | int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) |
51 | { | 54 | { |
55 | const struct nf_queue_handler *old; | ||
56 | |||
52 | if (pf >= ARRAY_SIZE(queue_handler)) | 57 | if (pf >= ARRAY_SIZE(queue_handler)) |
53 | return -EINVAL; | 58 | return -EINVAL; |
54 | 59 | ||
55 | mutex_lock(&queue_handler_mutex); | 60 | mutex_lock(&queue_handler_mutex); |
56 | if (queue_handler[pf] && queue_handler[pf] != qh) { | 61 | old = rcu_dereference_protected(queue_handler[pf], |
62 | lockdep_is_held(&queue_handler_mutex)); | ||
63 | if (old && old != qh) { | ||
57 | mutex_unlock(&queue_handler_mutex); | 64 | mutex_unlock(&queue_handler_mutex); |
58 | return -EINVAL; | 65 | return -EINVAL; |
59 | } | 66 | } |
@@ -73,7 +80,10 @@ void nf_unregister_queue_handlers(const struct nf_queue_handler *qh) | |||
73 | 80 | ||
74 | mutex_lock(&queue_handler_mutex); | 81 | mutex_lock(&queue_handler_mutex); |
75 | for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++) { | 82 | for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++) { |
76 | if (queue_handler[pf] == qh) | 83 | if (rcu_dereference_protected( |
84 | queue_handler[pf], | ||
85 | lockdep_is_held(&queue_handler_mutex) | ||
86 | ) == qh) | ||
77 | rcu_assign_pointer(queue_handler[pf], NULL); | 87 | rcu_assign_pointer(queue_handler[pf], NULL); |
78 | } | 88 | } |
79 | mutex_unlock(&queue_handler_mutex); | 89 | mutex_unlock(&queue_handler_mutex); |
@@ -115,7 +125,7 @@ static int __nf_queue(struct sk_buff *skb, | |||
115 | int (*okfn)(struct sk_buff *), | 125 | int (*okfn)(struct sk_buff *), |
116 | unsigned int queuenum) | 126 | unsigned int queuenum) |
117 | { | 127 | { |
118 | int status; | 128 | int status = -ENOENT; |
119 | struct nf_queue_entry *entry = NULL; | 129 | struct nf_queue_entry *entry = NULL; |
120 | #ifdef CONFIG_BRIDGE_NETFILTER | 130 | #ifdef CONFIG_BRIDGE_NETFILTER |
121 | struct net_device *physindev; | 131 | struct net_device *physindev; |
@@ -128,16 +138,20 @@ static int __nf_queue(struct sk_buff *skb, | |||
128 | rcu_read_lock(); | 138 | rcu_read_lock(); |
129 | 139 | ||
130 | qh = rcu_dereference(queue_handler[pf]); | 140 | qh = rcu_dereference(queue_handler[pf]); |
131 | if (!qh) | 141 | if (!qh) { |
142 | status = -ESRCH; | ||
132 | goto err_unlock; | 143 | goto err_unlock; |
144 | } | ||
133 | 145 | ||
134 | afinfo = nf_get_afinfo(pf); | 146 | afinfo = nf_get_afinfo(pf); |
135 | if (!afinfo) | 147 | if (!afinfo) |
136 | goto err_unlock; | 148 | goto err_unlock; |
137 | 149 | ||
138 | entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC); | 150 | entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC); |
139 | if (!entry) | 151 | if (!entry) { |
152 | status = -ENOMEM; | ||
140 | goto err_unlock; | 153 | goto err_unlock; |
154 | } | ||
141 | 155 | ||
142 | *entry = (struct nf_queue_entry) { | 156 | *entry = (struct nf_queue_entry) { |
143 | .skb = skb, | 157 | .skb = skb, |
@@ -151,11 +165,9 @@ static int __nf_queue(struct sk_buff *skb, | |||
151 | 165 | ||
152 | /* If it's going away, ignore hook. */ | 166 | /* If it's going away, ignore hook. */ |
153 | if (!try_module_get(entry->elem->owner)) { | 167 | if (!try_module_get(entry->elem->owner)) { |
154 | rcu_read_unlock(); | 168 | status = -ECANCELED; |
155 | kfree(entry); | 169 | goto err_unlock; |
156 | return 0; | ||
157 | } | 170 | } |
158 | |||
159 | /* Bump dev refs so they don't vanish while packet is out */ | 171 | /* Bump dev refs so they don't vanish while packet is out */ |
160 | if (indev) | 172 | if (indev) |
161 | dev_hold(indev); | 173 | dev_hold(indev); |
@@ -182,14 +194,13 @@ static int __nf_queue(struct sk_buff *skb, | |||
182 | goto err; | 194 | goto err; |
183 | } | 195 | } |
184 | 196 | ||
185 | return 1; | 197 | return 0; |
186 | 198 | ||
187 | err_unlock: | 199 | err_unlock: |
188 | rcu_read_unlock(); | 200 | rcu_read_unlock(); |
189 | err: | 201 | err: |
190 | kfree_skb(skb); | ||
191 | kfree(entry); | 202 | kfree(entry); |
192 | return 1; | 203 | return status; |
193 | } | 204 | } |
194 | 205 | ||
195 | int nf_queue(struct sk_buff *skb, | 206 | int nf_queue(struct sk_buff *skb, |
@@ -201,6 +212,8 @@ int nf_queue(struct sk_buff *skb, | |||
201 | unsigned int queuenum) | 212 | unsigned int queuenum) |
202 | { | 213 | { |
203 | struct sk_buff *segs; | 214 | struct sk_buff *segs; |
215 | int err; | ||
216 | unsigned int queued; | ||
204 | 217 | ||
205 | if (!skb_is_gso(skb)) | 218 | if (!skb_is_gso(skb)) |
206 | return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn, | 219 | return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn, |
@@ -216,20 +229,35 @@ int nf_queue(struct sk_buff *skb, | |||
216 | } | 229 | } |
217 | 230 | ||
218 | segs = skb_gso_segment(skb, 0); | 231 | segs = skb_gso_segment(skb, 0); |
219 | kfree_skb(skb); | 232 | /* Does not use PTR_ERR to limit the number of error codes that can be |
233 | * returned by nf_queue. For instance, callers rely on -ECANCELED to mean | ||
234 | * 'ignore this hook'. | ||
235 | */ | ||
220 | if (IS_ERR(segs)) | 236 | if (IS_ERR(segs)) |
221 | return 1; | 237 | return -EINVAL; |
222 | 238 | ||
239 | queued = 0; | ||
240 | err = 0; | ||
223 | do { | 241 | do { |
224 | struct sk_buff *nskb = segs->next; | 242 | struct sk_buff *nskb = segs->next; |
225 | 243 | ||
226 | segs->next = NULL; | 244 | segs->next = NULL; |
227 | if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn, | 245 | if (err == 0) |
228 | queuenum)) | 246 | err = __nf_queue(segs, elem, pf, hook, indev, |
247 | outdev, okfn, queuenum); | ||
248 | if (err == 0) | ||
249 | queued++; | ||
250 | else | ||
229 | kfree_skb(segs); | 251 | kfree_skb(segs); |
230 | segs = nskb; | 252 | segs = nskb; |
231 | } while (segs); | 253 | } while (segs); |
232 | return 1; | 254 | |
255 | /* also free orig skb if only some segments were queued */ | ||
256 | if (unlikely(err && queued)) | ||
257 | err = 0; | ||
258 | if (err == 0) | ||
259 | kfree_skb(skb); | ||
260 | return err; | ||
233 | } | 261 | } |
234 | 262 | ||
235 | void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) | 263 | void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) |
@@ -237,6 +265,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) | |||
237 | struct sk_buff *skb = entry->skb; | 265 | struct sk_buff *skb = entry->skb; |
238 | struct list_head *elem = &entry->elem->list; | 266 | struct list_head *elem = &entry->elem->list; |
239 | const struct nf_afinfo *afinfo; | 267 | const struct nf_afinfo *afinfo; |
268 | int err; | ||
240 | 269 | ||
241 | rcu_read_lock(); | 270 | rcu_read_lock(); |
242 | 271 | ||
@@ -270,10 +299,17 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) | |||
270 | local_bh_enable(); | 299 | local_bh_enable(); |
271 | break; | 300 | break; |
272 | case NF_QUEUE: | 301 | case NF_QUEUE: |
273 | if (!__nf_queue(skb, elem, entry->pf, entry->hook, | 302 | err = __nf_queue(skb, elem, entry->pf, entry->hook, |
274 | entry->indev, entry->outdev, entry->okfn, | 303 | entry->indev, entry->outdev, entry->okfn, |
275 | verdict >> NF_VERDICT_BITS)) | 304 | verdict >> NF_VERDICT_QBITS); |
276 | goto next_hook; | 305 | if (err < 0) { |
306 | if (err == -ECANCELED) | ||
307 | goto next_hook; | ||
308 | if (err == -ESRCH && | ||
309 | (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) | ||
310 | goto next_hook; | ||
311 | kfree_skb(skb); | ||
312 | } | ||
277 | break; | 313 | break; |
278 | case NF_STOLEN: | 314 | case NF_STOLEN: |
279 | default: | 315 | default: |
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 6a1572b0ab41..91592da504b9 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c | |||
@@ -874,19 +874,19 @@ static struct hlist_node *get_first(struct iter_state *st) | |||
874 | 874 | ||
875 | for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { | 875 | for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { |
876 | if (!hlist_empty(&instance_table[st->bucket])) | 876 | if (!hlist_empty(&instance_table[st->bucket])) |
877 | return rcu_dereference_bh(instance_table[st->bucket].first); | 877 | return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket])); |
878 | } | 878 | } |
879 | return NULL; | 879 | return NULL; |
880 | } | 880 | } |
881 | 881 | ||
882 | static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h) | 882 | static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h) |
883 | { | 883 | { |
884 | h = rcu_dereference_bh(h->next); | 884 | h = rcu_dereference_bh(hlist_next_rcu(h)); |
885 | while (!h) { | 885 | while (!h) { |
886 | if (++st->bucket >= INSTANCE_BUCKETS) | 886 | if (++st->bucket >= INSTANCE_BUCKETS) |
887 | return NULL; | 887 | return NULL; |
888 | 888 | ||
889 | h = rcu_dereference_bh(instance_table[st->bucket].first); | 889 | h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket])); |
890 | } | 890 | } |
891 | return h; | 891 | return h; |
892 | } | 892 | } |
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 68e67d19724d..b83123f12b42 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c | |||
@@ -387,25 +387,31 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) | |||
387 | { | 387 | { |
388 | struct sk_buff *nskb; | 388 | struct sk_buff *nskb; |
389 | struct nfqnl_instance *queue; | 389 | struct nfqnl_instance *queue; |
390 | int err; | 390 | int err = -ENOBUFS; |
391 | 391 | ||
392 | /* rcu_read_lock()ed by nf_hook_slow() */ | 392 | /* rcu_read_lock()ed by nf_hook_slow() */ |
393 | queue = instance_lookup(queuenum); | 393 | queue = instance_lookup(queuenum); |
394 | if (!queue) | 394 | if (!queue) { |
395 | err = -ESRCH; | ||
395 | goto err_out; | 396 | goto err_out; |
397 | } | ||
396 | 398 | ||
397 | if (queue->copy_mode == NFQNL_COPY_NONE) | 399 | if (queue->copy_mode == NFQNL_COPY_NONE) { |
400 | err = -EINVAL; | ||
398 | goto err_out; | 401 | goto err_out; |
402 | } | ||
399 | 403 | ||
400 | nskb = nfqnl_build_packet_message(queue, entry); | 404 | nskb = nfqnl_build_packet_message(queue, entry); |
401 | if (nskb == NULL) | 405 | if (nskb == NULL) { |
406 | err = -ENOMEM; | ||
402 | goto err_out; | 407 | goto err_out; |
403 | 408 | } | |
404 | spin_lock_bh(&queue->lock); | 409 | spin_lock_bh(&queue->lock); |
405 | 410 | ||
406 | if (!queue->peer_pid) | 411 | if (!queue->peer_pid) { |
412 | err = -EINVAL; | ||
407 | goto err_out_free_nskb; | 413 | goto err_out_free_nskb; |
408 | 414 | } | |
409 | if (queue->queue_total >= queue->queue_maxlen) { | 415 | if (queue->queue_total >= queue->queue_maxlen) { |
410 | queue->queue_dropped++; | 416 | queue->queue_dropped++; |
411 | if (net_ratelimit()) | 417 | if (net_ratelimit()) |
@@ -432,7 +438,7 @@ err_out_free_nskb: | |||
432 | err_out_unlock: | 438 | err_out_unlock: |
433 | spin_unlock_bh(&queue->lock); | 439 | spin_unlock_bh(&queue->lock); |
434 | err_out: | 440 | err_out: |
435 | return -1; | 441 | return err; |
436 | } | 442 | } |
437 | 443 | ||
438 | static int | 444 | static int |
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index c94237631077..0a77d2ff2154 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/mutex.h> | 23 | #include <linux/mutex.h> |
24 | #include <linux/mm.h> | 24 | #include <linux/mm.h> |
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <linux/audit.h> | ||
26 | #include <net/net_namespace.h> | 27 | #include <net/net_namespace.h> |
27 | 28 | ||
28 | #include <linux/netfilter/x_tables.h> | 29 | #include <linux/netfilter/x_tables.h> |
@@ -38,9 +39,8 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module"); | |||
38 | #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) | 39 | #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) |
39 | 40 | ||
40 | struct compat_delta { | 41 | struct compat_delta { |
41 | struct compat_delta *next; | 42 | unsigned int offset; /* offset in kernel */ |
42 | unsigned int offset; | 43 | int delta; /* delta in 32bit user land */ |
43 | int delta; | ||
44 | }; | 44 | }; |
45 | 45 | ||
46 | struct xt_af { | 46 | struct xt_af { |
@@ -49,7 +49,9 @@ struct xt_af { | |||
49 | struct list_head target; | 49 | struct list_head target; |
50 | #ifdef CONFIG_COMPAT | 50 | #ifdef CONFIG_COMPAT |
51 | struct mutex compat_mutex; | 51 | struct mutex compat_mutex; |
52 | struct compat_delta *compat_offsets; | 52 | struct compat_delta *compat_tab; |
53 | unsigned int number; /* number of slots in compat_tab[] */ | ||
54 | unsigned int cur; /* number of used slots in compat_tab[] */ | ||
53 | #endif | 55 | #endif |
54 | }; | 56 | }; |
55 | 57 | ||
@@ -414,54 +416,67 @@ int xt_check_match(struct xt_mtchk_param *par, | |||
414 | EXPORT_SYMBOL_GPL(xt_check_match); | 416 | EXPORT_SYMBOL_GPL(xt_check_match); |
415 | 417 | ||
416 | #ifdef CONFIG_COMPAT | 418 | #ifdef CONFIG_COMPAT |
417 | int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta) | 419 | int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta) |
418 | { | 420 | { |
419 | struct compat_delta *tmp; | 421 | struct xt_af *xp = &xt[af]; |
420 | 422 | ||
421 | tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL); | 423 | if (!xp->compat_tab) { |
422 | if (!tmp) | 424 | if (!xp->number) |
423 | return -ENOMEM; | 425 | return -EINVAL; |
426 | xp->compat_tab = vmalloc(sizeof(struct compat_delta) * xp->number); | ||
427 | if (!xp->compat_tab) | ||
428 | return -ENOMEM; | ||
429 | xp->cur = 0; | ||
430 | } | ||
424 | 431 | ||
425 | tmp->offset = offset; | 432 | if (xp->cur >= xp->number) |
426 | tmp->delta = delta; | 433 | return -EINVAL; |
427 | 434 | ||
428 | if (xt[af].compat_offsets) { | 435 | if (xp->cur) |
429 | tmp->next = xt[af].compat_offsets->next; | 436 | delta += xp->compat_tab[xp->cur - 1].delta; |
430 | xt[af].compat_offsets->next = tmp; | 437 | xp->compat_tab[xp->cur].offset = offset; |
431 | } else { | 438 | xp->compat_tab[xp->cur].delta = delta; |
432 | xt[af].compat_offsets = tmp; | 439 | xp->cur++; |
433 | tmp->next = NULL; | ||
434 | } | ||
435 | return 0; | 440 | return 0; |
436 | } | 441 | } |
437 | EXPORT_SYMBOL_GPL(xt_compat_add_offset); | 442 | EXPORT_SYMBOL_GPL(xt_compat_add_offset); |
438 | 443 | ||
439 | void xt_compat_flush_offsets(u_int8_t af) | 444 | void xt_compat_flush_offsets(u_int8_t af) |
440 | { | 445 | { |
441 | struct compat_delta *tmp, *next; | 446 | if (xt[af].compat_tab) { |
442 | 447 | vfree(xt[af].compat_tab); | |
443 | if (xt[af].compat_offsets) { | 448 | xt[af].compat_tab = NULL; |
444 | for (tmp = xt[af].compat_offsets; tmp; tmp = next) { | 449 | xt[af].number = 0; |
445 | next = tmp->next; | ||
446 | kfree(tmp); | ||
447 | } | ||
448 | xt[af].compat_offsets = NULL; | ||
449 | } | 450 | } |
450 | } | 451 | } |
451 | EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); | 452 | EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); |
452 | 453 | ||
453 | int xt_compat_calc_jump(u_int8_t af, unsigned int offset) | 454 | int xt_compat_calc_jump(u_int8_t af, unsigned int offset) |
454 | { | 455 | { |
455 | struct compat_delta *tmp; | 456 | struct compat_delta *tmp = xt[af].compat_tab; |
456 | int delta; | 457 | int mid, left = 0, right = xt[af].cur - 1; |
457 | 458 | ||
458 | for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next) | 459 | while (left <= right) { |
459 | if (tmp->offset < offset) | 460 | mid = (left + right) >> 1; |
460 | delta += tmp->delta; | 461 | if (offset > tmp[mid].offset) |
461 | return delta; | 462 | left = mid + 1; |
463 | else if (offset < tmp[mid].offset) | ||
464 | right = mid - 1; | ||
465 | else | ||
466 | return mid ? tmp[mid - 1].delta : 0; | ||
467 | } | ||
468 | WARN_ON_ONCE(1); | ||
469 | return 0; | ||
462 | } | 470 | } |
463 | EXPORT_SYMBOL_GPL(xt_compat_calc_jump); | 471 | EXPORT_SYMBOL_GPL(xt_compat_calc_jump); |
464 | 472 | ||
473 | void xt_compat_init_offsets(u_int8_t af, unsigned int number) | ||
474 | { | ||
475 | xt[af].number = number; | ||
476 | xt[af].cur = 0; | ||
477 | } | ||
478 | EXPORT_SYMBOL(xt_compat_init_offsets); | ||
479 | |||
465 | int xt_compat_match_offset(const struct xt_match *match) | 480 | int xt_compat_match_offset(const struct xt_match *match) |
466 | { | 481 | { |
467 | u_int16_t csize = match->compatsize ? : match->matchsize; | 482 | u_int16_t csize = match->compatsize ? : match->matchsize; |
@@ -820,6 +835,21 @@ xt_replace_table(struct xt_table *table, | |||
820 | */ | 835 | */ |
821 | local_bh_enable(); | 836 | local_bh_enable(); |
822 | 837 | ||
838 | #ifdef CONFIG_AUDIT | ||
839 | if (audit_enabled) { | ||
840 | struct audit_buffer *ab; | ||
841 | |||
842 | ab = audit_log_start(current->audit_context, GFP_KERNEL, | ||
843 | AUDIT_NETFILTER_CFG); | ||
844 | if (ab) { | ||
845 | audit_log_format(ab, "table=%s family=%u entries=%u", | ||
846 | table->name, table->af, | ||
847 | private->number); | ||
848 | audit_log_end(ab); | ||
849 | } | ||
850 | } | ||
851 | #endif | ||
852 | |||
823 | return private; | 853 | return private; |
824 | } | 854 | } |
825 | EXPORT_SYMBOL_GPL(xt_replace_table); | 855 | EXPORT_SYMBOL_GPL(xt_replace_table); |
@@ -1338,7 +1368,7 @@ static int __init xt_init(void) | |||
1338 | mutex_init(&xt[i].mutex); | 1368 | mutex_init(&xt[i].mutex); |
1339 | #ifdef CONFIG_COMPAT | 1369 | #ifdef CONFIG_COMPAT |
1340 | mutex_init(&xt[i].compat_mutex); | 1370 | mutex_init(&xt[i].compat_mutex); |
1341 | xt[i].compat_offsets = NULL; | 1371 | xt[i].compat_tab = NULL; |
1342 | #endif | 1372 | #endif |
1343 | INIT_LIST_HEAD(&xt[i].target); | 1373 | INIT_LIST_HEAD(&xt[i].target); |
1344 | INIT_LIST_HEAD(&xt[i].match); | 1374 | INIT_LIST_HEAD(&xt[i].match); |
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c new file mode 100644 index 000000000000..81802d27346e --- /dev/null +++ b/net/netfilter/xt_AUDIT.c | |||
@@ -0,0 +1,204 @@ | |||
1 | /* | ||
2 | * Creates audit record for dropped/accepted packets | ||
3 | * | ||
4 | * (C) 2010-2011 Thomas Graf <tgraf@redhat.com> | ||
5 | * (C) 2010-2011 Red Hat, Inc. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
13 | |||
14 | #include <linux/audit.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/skbuff.h> | ||
17 | #include <linux/tcp.h> | ||
18 | #include <linux/udp.h> | ||
19 | #include <linux/if_arp.h> | ||
20 | #include <linux/netfilter/x_tables.h> | ||
21 | #include <linux/netfilter/xt_AUDIT.h> | ||
22 | #include <net/ipv6.h> | ||
23 | #include <net/ip.h> | ||
24 | |||
25 | MODULE_LICENSE("GPL"); | ||
26 | MODULE_AUTHOR("Thomas Graf <tgraf@redhat.com>"); | ||
27 | MODULE_DESCRIPTION("Xtables: creates audit records for dropped/accepted packets"); | ||
28 | MODULE_ALIAS("ipt_AUDIT"); | ||
29 | MODULE_ALIAS("ip6t_AUDIT"); | ||
30 | MODULE_ALIAS("ebt_AUDIT"); | ||
31 | MODULE_ALIAS("arpt_AUDIT"); | ||
32 | |||
33 | static void audit_proto(struct audit_buffer *ab, struct sk_buff *skb, | ||
34 | unsigned int proto, unsigned int offset) | ||
35 | { | ||
36 | switch (proto) { | ||
37 | case IPPROTO_TCP: | ||
38 | case IPPROTO_UDP: | ||
39 | case IPPROTO_UDPLITE: { | ||
40 | const __be16 *pptr; | ||
41 | __be16 _ports[2]; | ||
42 | |||
43 | pptr = skb_header_pointer(skb, offset, sizeof(_ports), _ports); | ||
44 | if (pptr == NULL) { | ||
45 | audit_log_format(ab, " truncated=1"); | ||
46 | return; | ||
47 | } | ||
48 | |||
49 | audit_log_format(ab, " sport=%hu dport=%hu", | ||
50 | ntohs(pptr[0]), ntohs(pptr[1])); | ||
51 | } | ||
52 | break; | ||
53 | |||
54 | case IPPROTO_ICMP: | ||
55 | case IPPROTO_ICMPV6: { | ||
56 | const u8 *iptr; | ||
57 | u8 _ih[2]; | ||
58 | |||
59 | iptr = skb_header_pointer(skb, offset, sizeof(_ih), &_ih); | ||
60 | if (iptr == NULL) { | ||
61 | audit_log_format(ab, " truncated=1"); | ||
62 | return; | ||
63 | } | ||
64 | |||
65 | audit_log_format(ab, " icmptype=%hhu icmpcode=%hhu", | ||
66 | iptr[0], iptr[1]); | ||
67 | |||
68 | } | ||
69 | break; | ||
70 | } | ||
71 | } | ||
72 | |||
73 | static void audit_ip4(struct audit_buffer *ab, struct sk_buff *skb) | ||
74 | { | ||
75 | struct iphdr _iph; | ||
76 | const struct iphdr *ih; | ||
77 | |||
78 | ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); | ||
79 | if (!ih) { | ||
80 | audit_log_format(ab, " truncated=1"); | ||
81 | return; | ||
82 | } | ||
83 | |||
84 | audit_log_format(ab, " saddr=%pI4 daddr=%pI4 ipid=%hu proto=%hhu", | ||
85 | &ih->saddr, &ih->daddr, ntohs(ih->id), ih->protocol); | ||
86 | |||
87 | if (ntohs(ih->frag_off) & IP_OFFSET) { | ||
88 | audit_log_format(ab, " frag=1"); | ||
89 | return; | ||
90 | } | ||
91 | |||
92 | audit_proto(ab, skb, ih->protocol, ih->ihl * 4); | ||
93 | } | ||
94 | |||
95 | static void audit_ip6(struct audit_buffer *ab, struct sk_buff *skb) | ||
96 | { | ||
97 | struct ipv6hdr _ip6h; | ||
98 | const struct ipv6hdr *ih; | ||
99 | u8 nexthdr; | ||
100 | int offset; | ||
101 | |||
102 | ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h); | ||
103 | if (!ih) { | ||
104 | audit_log_format(ab, " truncated=1"); | ||
105 | return; | ||
106 | } | ||
107 | |||
108 | nexthdr = ih->nexthdr; | ||
109 | offset = ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h), | ||
110 | &nexthdr); | ||
111 | |||
112 | audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu", | ||
113 | &ih->saddr, &ih->daddr, nexthdr); | ||
114 | |||
115 | if (offset) | ||
116 | audit_proto(ab, skb, nexthdr, offset); | ||
117 | } | ||
118 | |||
119 | static unsigned int | ||
120 | audit_tg(struct sk_buff *skb, const struct xt_action_param *par) | ||
121 | { | ||
122 | const struct xt_audit_info *info = par->targinfo; | ||
123 | struct audit_buffer *ab; | ||
124 | |||
125 | ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT); | ||
126 | if (ab == NULL) | ||
127 | goto errout; | ||
128 | |||
129 | audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s", | ||
130 | info->type, par->hooknum, skb->len, | ||
131 | par->in ? par->in->name : "?", | ||
132 | par->out ? par->out->name : "?"); | ||
133 | |||
134 | if (skb->mark) | ||
135 | audit_log_format(ab, " mark=%#x", skb->mark); | ||
136 | |||
137 | if (skb->dev && skb->dev->type == ARPHRD_ETHER) { | ||
138 | audit_log_format(ab, " smac=%pM dmac=%pM macproto=0x%04x", | ||
139 | eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, | ||
140 | ntohs(eth_hdr(skb)->h_proto)); | ||
141 | |||
142 | if (par->family == NFPROTO_BRIDGE) { | ||
143 | switch (eth_hdr(skb)->h_proto) { | ||
144 | case __constant_htons(ETH_P_IP): | ||
145 | audit_ip4(ab, skb); | ||
146 | break; | ||
147 | |||
148 | case __constant_htons(ETH_P_IPV6): | ||
149 | audit_ip6(ab, skb); | ||
150 | break; | ||
151 | } | ||
152 | } | ||
153 | } | ||
154 | |||
155 | switch (par->family) { | ||
156 | case NFPROTO_IPV4: | ||
157 | audit_ip4(ab, skb); | ||
158 | break; | ||
159 | |||
160 | case NFPROTO_IPV6: | ||
161 | audit_ip6(ab, skb); | ||
162 | break; | ||
163 | } | ||
164 | |||
165 | audit_log_end(ab); | ||
166 | |||
167 | errout: | ||
168 | return XT_CONTINUE; | ||
169 | } | ||
170 | |||
171 | static int audit_tg_check(const struct xt_tgchk_param *par) | ||
172 | { | ||
173 | const struct xt_audit_info *info = par->targinfo; | ||
174 | |||
175 | if (info->type > XT_AUDIT_TYPE_MAX) { | ||
176 | pr_info("Audit type out of range (valid range: 0..%hhu)\n", | ||
177 | XT_AUDIT_TYPE_MAX); | ||
178 | return -ERANGE; | ||
179 | } | ||
180 | |||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | static struct xt_target audit_tg_reg __read_mostly = { | ||
185 | .name = "AUDIT", | ||
186 | .family = NFPROTO_UNSPEC, | ||
187 | .target = audit_tg, | ||
188 | .targetsize = sizeof(struct xt_audit_info), | ||
189 | .checkentry = audit_tg_check, | ||
190 | .me = THIS_MODULE, | ||
191 | }; | ||
192 | |||
193 | static int __init audit_tg_init(void) | ||
194 | { | ||
195 | return xt_register_target(&audit_tg_reg); | ||
196 | } | ||
197 | |||
198 | static void __exit audit_tg_exit(void) | ||
199 | { | ||
200 | xt_unregister_target(&audit_tg_reg); | ||
201 | } | ||
202 | |||
203 | module_init(audit_tg_init); | ||
204 | module_exit(audit_tg_exit); | ||
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c index c2c0e4abeb99..af9c4dadf816 100644 --- a/net/netfilter/xt_CLASSIFY.c +++ b/net/netfilter/xt_CLASSIFY.c | |||
@@ -19,12 +19,14 @@ | |||
19 | #include <linux/netfilter_ipv6.h> | 19 | #include <linux/netfilter_ipv6.h> |
20 | #include <linux/netfilter/x_tables.h> | 20 | #include <linux/netfilter/x_tables.h> |
21 | #include <linux/netfilter/xt_CLASSIFY.h> | 21 | #include <linux/netfilter/xt_CLASSIFY.h> |
22 | #include <linux/netfilter_arp.h> | ||
22 | 23 | ||
23 | MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); | 24 | MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); |
24 | MODULE_LICENSE("GPL"); | 25 | MODULE_LICENSE("GPL"); |
25 | MODULE_DESCRIPTION("Xtables: Qdisc classification"); | 26 | MODULE_DESCRIPTION("Xtables: Qdisc classification"); |
26 | MODULE_ALIAS("ipt_CLASSIFY"); | 27 | MODULE_ALIAS("ipt_CLASSIFY"); |
27 | MODULE_ALIAS("ip6t_CLASSIFY"); | 28 | MODULE_ALIAS("ip6t_CLASSIFY"); |
29 | MODULE_ALIAS("arpt_CLASSIFY"); | ||
28 | 30 | ||
29 | static unsigned int | 31 | static unsigned int |
30 | classify_tg(struct sk_buff *skb, const struct xt_action_param *par) | 32 | classify_tg(struct sk_buff *skb, const struct xt_action_param *par) |
@@ -35,26 +37,36 @@ classify_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
35 | return XT_CONTINUE; | 37 | return XT_CONTINUE; |
36 | } | 38 | } |
37 | 39 | ||
38 | static struct xt_target classify_tg_reg __read_mostly = { | 40 | static struct xt_target classify_tg_reg[] __read_mostly = { |
39 | .name = "CLASSIFY", | 41 | { |
40 | .revision = 0, | 42 | .name = "CLASSIFY", |
41 | .family = NFPROTO_UNSPEC, | 43 | .revision = 0, |
42 | .table = "mangle", | 44 | .family = NFPROTO_UNSPEC, |
43 | .hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | | 45 | .hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | |
44 | (1 << NF_INET_POST_ROUTING), | 46 | (1 << NF_INET_POST_ROUTING), |
45 | .target = classify_tg, | 47 | .target = classify_tg, |
46 | .targetsize = sizeof(struct xt_classify_target_info), | 48 | .targetsize = sizeof(struct xt_classify_target_info), |
47 | .me = THIS_MODULE, | 49 | .me = THIS_MODULE, |
50 | }, | ||
51 | { | ||
52 | .name = "CLASSIFY", | ||
53 | .revision = 0, | ||
54 | .family = NFPROTO_ARP, | ||
55 | .hooks = (1 << NF_ARP_OUT) | (1 << NF_ARP_FORWARD), | ||
56 | .target = classify_tg, | ||
57 | .targetsize = sizeof(struct xt_classify_target_info), | ||
58 | .me = THIS_MODULE, | ||
59 | }, | ||
48 | }; | 60 | }; |
49 | 61 | ||
50 | static int __init classify_tg_init(void) | 62 | static int __init classify_tg_init(void) |
51 | { | 63 | { |
52 | return xt_register_target(&classify_tg_reg); | 64 | return xt_register_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg)); |
53 | } | 65 | } |
54 | 66 | ||
55 | static void __exit classify_tg_exit(void) | 67 | static void __exit classify_tg_exit(void) |
56 | { | 68 | { |
57 | xt_unregister_target(&classify_tg_reg); | 69 | xt_unregister_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg)); |
58 | } | 70 | } |
59 | 71 | ||
60 | module_init(classify_tg_init); | 72 | module_init(classify_tg_init); |
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index be1f22e13545..3bdd443aaf15 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c | |||
@@ -313,3 +313,5 @@ MODULE_AUTHOR("Timo Teras <ext-timo.teras@nokia.com>"); | |||
313 | MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>"); | 313 | MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>"); |
314 | MODULE_DESCRIPTION("Xtables: idle time monitor"); | 314 | MODULE_DESCRIPTION("Xtables: idle time monitor"); |
315 | MODULE_LICENSE("GPL v2"); | 315 | MODULE_LICENSE("GPL v2"); |
316 | MODULE_ALIAS("ipt_IDLETIMER"); | ||
317 | MODULE_ALIAS("ip6t_IDLETIMER"); | ||
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c index a4140509eea1..993de2ba89d3 100644 --- a/net/netfilter/xt_LED.c +++ b/net/netfilter/xt_LED.c | |||
@@ -31,6 +31,8 @@ | |||
31 | MODULE_LICENSE("GPL"); | 31 | MODULE_LICENSE("GPL"); |
32 | MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>"); | 32 | MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>"); |
33 | MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match"); | 33 | MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match"); |
34 | MODULE_ALIAS("ipt_LED"); | ||
35 | MODULE_ALIAS("ip6t_LED"); | ||
34 | 36 | ||
35 | static LIST_HEAD(xt_led_triggers); | 37 | static LIST_HEAD(xt_led_triggers); |
36 | static DEFINE_MUTEX(xt_led_mutex); | 38 | static DEFINE_MUTEX(xt_led_mutex); |
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 039cce1bde3d..d4f4b5d66b20 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c | |||
@@ -72,18 +72,31 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) | |||
72 | 72 | ||
73 | if (info->queues_total > 1) { | 73 | if (info->queues_total > 1) { |
74 | if (par->family == NFPROTO_IPV4) | 74 | if (par->family == NFPROTO_IPV4) |
75 | queue = hash_v4(skb) % info->queues_total + queue; | 75 | queue = (((u64) hash_v4(skb) * info->queues_total) >> |
76 | 32) + queue; | ||
76 | #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) | 77 | #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) |
77 | else if (par->family == NFPROTO_IPV6) | 78 | else if (par->family == NFPROTO_IPV6) |
78 | queue = hash_v6(skb) % info->queues_total + queue; | 79 | queue = (((u64) hash_v6(skb) * info->queues_total) >> |
80 | 32) + queue; | ||
79 | #endif | 81 | #endif |
80 | } | 82 | } |
81 | return NF_QUEUE_NR(queue); | 83 | return NF_QUEUE_NR(queue); |
82 | } | 84 | } |
83 | 85 | ||
84 | static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par) | 86 | static unsigned int |
87 | nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par) | ||
85 | { | 88 | { |
86 | const struct xt_NFQ_info_v1 *info = par->targinfo; | 89 | const struct xt_NFQ_info_v2 *info = par->targinfo; |
90 | unsigned int ret = nfqueue_tg_v1(skb, par); | ||
91 | |||
92 | if (info->bypass) | ||
93 | ret |= NF_VERDICT_FLAG_QUEUE_BYPASS; | ||
94 | return ret; | ||
95 | } | ||
96 | |||
97 | static int nfqueue_tg_check(const struct xt_tgchk_param *par) | ||
98 | { | ||
99 | const struct xt_NFQ_info_v2 *info = par->targinfo; | ||
87 | u32 maxid; | 100 | u32 maxid; |
88 | 101 | ||
89 | if (unlikely(!rnd_inited)) { | 102 | if (unlikely(!rnd_inited)) { |
@@ -100,6 +113,8 @@ static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par) | |||
100 | info->queues_total, maxid); | 113 | info->queues_total, maxid); |
101 | return -ERANGE; | 114 | return -ERANGE; |
102 | } | 115 | } |
116 | if (par->target->revision == 2 && info->bypass > 1) | ||
117 | return -EINVAL; | ||
103 | return 0; | 118 | return 0; |
104 | } | 119 | } |
105 | 120 | ||
@@ -115,11 +130,20 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = { | |||
115 | .name = "NFQUEUE", | 130 | .name = "NFQUEUE", |
116 | .revision = 1, | 131 | .revision = 1, |
117 | .family = NFPROTO_UNSPEC, | 132 | .family = NFPROTO_UNSPEC, |
118 | .checkentry = nfqueue_tg_v1_check, | 133 | .checkentry = nfqueue_tg_check, |
119 | .target = nfqueue_tg_v1, | 134 | .target = nfqueue_tg_v1, |
120 | .targetsize = sizeof(struct xt_NFQ_info_v1), | 135 | .targetsize = sizeof(struct xt_NFQ_info_v1), |
121 | .me = THIS_MODULE, | 136 | .me = THIS_MODULE, |
122 | }, | 137 | }, |
138 | { | ||
139 | .name = "NFQUEUE", | ||
140 | .revision = 2, | ||
141 | .family = NFPROTO_UNSPEC, | ||
142 | .checkentry = nfqueue_tg_check, | ||
143 | .target = nfqueue_tg_v2, | ||
144 | .targetsize = sizeof(struct xt_NFQ_info_v2), | ||
145 | .me = THIS_MODULE, | ||
146 | }, | ||
123 | }; | 147 | }; |
124 | 148 | ||
125 | static int __init nfqueue_tg_init(void) | 149 | static int __init nfqueue_tg_init(void) |
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 5c5b6b921b84..452bc16af56c 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c | |||
@@ -204,11 +204,9 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) | |||
204 | &info->mask, par->family); | 204 | &info->mask, par->family); |
205 | spin_unlock_bh(&info->data->lock); | 205 | spin_unlock_bh(&info->data->lock); |
206 | 206 | ||
207 | if (connections < 0) { | 207 | if (connections < 0) |
208 | /* kmalloc failed, drop it entirely */ | 208 | /* kmalloc failed, drop it entirely */ |
209 | par->hotdrop = true; | 209 | goto hotdrop; |
210 | return false; | ||
211 | } | ||
212 | 210 | ||
213 | return (connections > info->limit) ^ info->inverse; | 211 | return (connections > info->limit) ^ info->inverse; |
214 | 212 | ||
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index e536710ad916..4ef1b63ad73f 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c | |||
@@ -112,6 +112,54 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info, | |||
112 | return true; | 112 | return true; |
113 | } | 113 | } |
114 | 114 | ||
115 | static inline bool | ||
116 | port_match(u16 min, u16 max, u16 port, bool invert) | ||
117 | { | ||
118 | return (port >= min && port <= max) ^ invert; | ||
119 | } | ||
120 | |||
121 | static inline bool | ||
122 | ct_proto_port_check_v3(const struct xt_conntrack_mtinfo3 *info, | ||
123 | const struct nf_conn *ct) | ||
124 | { | ||
125 | const struct nf_conntrack_tuple *tuple; | ||
126 | |||
127 | tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; | ||
128 | if ((info->match_flags & XT_CONNTRACK_PROTO) && | ||
129 | (nf_ct_protonum(ct) == info->l4proto) ^ | ||
130 | !(info->invert_flags & XT_CONNTRACK_PROTO)) | ||
131 | return false; | ||
132 | |||
133 | /* Shortcut to match all recognized protocols by using ->src.all. */ | ||
134 | if ((info->match_flags & XT_CONNTRACK_ORIGSRC_PORT) && | ||
135 | !port_match(info->origsrc_port, info->origsrc_port_high, | ||
136 | ntohs(tuple->src.u.all), | ||
137 | info->invert_flags & XT_CONNTRACK_ORIGSRC_PORT)) | ||
138 | return false; | ||
139 | |||
140 | if ((info->match_flags & XT_CONNTRACK_ORIGDST_PORT) && | ||
141 | !port_match(info->origdst_port, info->origdst_port_high, | ||
142 | ntohs(tuple->dst.u.all), | ||
143 | info->invert_flags & XT_CONNTRACK_ORIGDST_PORT)) | ||
144 | return false; | ||
145 | |||
146 | tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; | ||
147 | |||
148 | if ((info->match_flags & XT_CONNTRACK_REPLSRC_PORT) && | ||
149 | !port_match(info->replsrc_port, info->replsrc_port_high, | ||
150 | ntohs(tuple->src.u.all), | ||
151 | info->invert_flags & XT_CONNTRACK_REPLSRC_PORT)) | ||
152 | return false; | ||
153 | |||
154 | if ((info->match_flags & XT_CONNTRACK_REPLDST_PORT) && | ||
155 | !port_match(info->repldst_port, info->repldst_port_high, | ||
156 | ntohs(tuple->dst.u.all), | ||
157 | info->invert_flags & XT_CONNTRACK_REPLDST_PORT)) | ||
158 | return false; | ||
159 | |||
160 | return true; | ||
161 | } | ||
162 | |||
115 | static bool | 163 | static bool |
116 | conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, | 164 | conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, |
117 | u16 state_mask, u16 status_mask) | 165 | u16 state_mask, u16 status_mask) |
@@ -170,8 +218,13 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, | |||
170 | !(info->invert_flags & XT_CONNTRACK_REPLDST)) | 218 | !(info->invert_flags & XT_CONNTRACK_REPLDST)) |
171 | return false; | 219 | return false; |
172 | 220 | ||
173 | if (!ct_proto_port_check(info, ct)) | 221 | if (par->match->revision != 3) { |
174 | return false; | 222 | if (!ct_proto_port_check(info, ct)) |
223 | return false; | ||
224 | } else { | ||
225 | if (!ct_proto_port_check_v3(par->matchinfo, ct)) | ||
226 | return false; | ||
227 | } | ||
175 | 228 | ||
176 | if ((info->match_flags & XT_CONNTRACK_STATUS) && | 229 | if ((info->match_flags & XT_CONNTRACK_STATUS) && |
177 | (!!(status_mask & ct->status) ^ | 230 | (!!(status_mask & ct->status) ^ |
@@ -207,6 +260,14 @@ conntrack_mt_v2(const struct sk_buff *skb, struct xt_action_param *par) | |||
207 | return conntrack_mt(skb, par, info->state_mask, info->status_mask); | 260 | return conntrack_mt(skb, par, info->state_mask, info->status_mask); |
208 | } | 261 | } |
209 | 262 | ||
263 | static bool | ||
264 | conntrack_mt_v3(const struct sk_buff *skb, struct xt_action_param *par) | ||
265 | { | ||
266 | const struct xt_conntrack_mtinfo3 *info = par->matchinfo; | ||
267 | |||
268 | return conntrack_mt(skb, par, info->state_mask, info->status_mask); | ||
269 | } | ||
270 | |||
210 | static int conntrack_mt_check(const struct xt_mtchk_param *par) | 271 | static int conntrack_mt_check(const struct xt_mtchk_param *par) |
211 | { | 272 | { |
212 | int ret; | 273 | int ret; |
@@ -244,6 +305,16 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = { | |||
244 | .destroy = conntrack_mt_destroy, | 305 | .destroy = conntrack_mt_destroy, |
245 | .me = THIS_MODULE, | 306 | .me = THIS_MODULE, |
246 | }, | 307 | }, |
308 | { | ||
309 | .name = "conntrack", | ||
310 | .revision = 3, | ||
311 | .family = NFPROTO_UNSPEC, | ||
312 | .matchsize = sizeof(struct xt_conntrack_mtinfo3), | ||
313 | .match = conntrack_mt_v3, | ||
314 | .checkentry = conntrack_mt_check, | ||
315 | .destroy = conntrack_mt_destroy, | ||
316 | .me = THIS_MODULE, | ||
317 | }, | ||
247 | }; | 318 | }; |
248 | 319 | ||
249 | static int __init conntrack_mt_init(void) | 320 | static int __init conntrack_mt_init(void) |
diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c index b39db8a5cbae..c7a2e5466bc4 100644 --- a/net/netfilter/xt_cpu.c +++ b/net/netfilter/xt_cpu.c | |||
@@ -22,6 +22,8 @@ | |||
22 | MODULE_LICENSE("GPL"); | 22 | MODULE_LICENSE("GPL"); |
23 | MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>"); | 23 | MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>"); |
24 | MODULE_DESCRIPTION("Xtables: CPU match"); | 24 | MODULE_DESCRIPTION("Xtables: CPU match"); |
25 | MODULE_ALIAS("ipt_cpu"); | ||
26 | MODULE_ALIAS("ip6t_cpu"); | ||
25 | 27 | ||
26 | static int cpu_mt_check(const struct xt_mtchk_param *par) | 28 | static int cpu_mt_check(const struct xt_mtchk_param *par) |
27 | { | 29 | { |
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c index 9127a3d8aa35..bb10b0717f1b 100644 --- a/net/netfilter/xt_ipvs.c +++ b/net/netfilter/xt_ipvs.c | |||
@@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) | |||
85 | /* | 85 | /* |
86 | * Check if the packet belongs to an existing entry | 86 | * Check if the packet belongs to an existing entry |
87 | */ | 87 | */ |
88 | cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */); | 88 | cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */); |
89 | if (unlikely(cp == NULL)) { | 89 | if (unlikely(cp == NULL)) { |
90 | match = false; | 90 | match = false; |
91 | goto out; | 91 | goto out; |
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 73431d4aa6ef..e318f458713e 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig | |||
@@ -255,7 +255,7 @@ config NET_CLS_TCINDEX | |||
255 | 255 | ||
256 | config NET_CLS_ROUTE4 | 256 | config NET_CLS_ROUTE4 |
257 | tristate "Routing decision (ROUTE)" | 257 | tristate "Routing decision (ROUTE)" |
258 | select NET_CLS_ROUTE | 258 | select IP_ROUTE_CLASSID |
259 | select NET_CLS | 259 | select NET_CLS |
260 | ---help--- | 260 | ---help--- |
261 | If you say Y here, you will be able to classify packets | 261 | If you say Y here, you will be able to classify packets |
@@ -264,9 +264,6 @@ config NET_CLS_ROUTE4 | |||
264 | To compile this code as a module, choose M here: the | 264 | To compile this code as a module, choose M here: the |
265 | module will be called cls_route. | 265 | module will be called cls_route. |
266 | 266 | ||
267 | config NET_CLS_ROUTE | ||
268 | bool | ||
269 | |||
270 | config NET_CLS_FW | 267 | config NET_CLS_FW |
271 | tristate "Netfilter mark (FW)" | 268 | tristate "Netfilter mark (FW)" |
272 | select NET_CLS | 269 | select NET_CLS |
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 5eec16e516b9..8ec01391d988 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c | |||
@@ -276,7 +276,7 @@ fallback: | |||
276 | 276 | ||
277 | static u32 flow_get_rtclassid(const struct sk_buff *skb) | 277 | static u32 flow_get_rtclassid(const struct sk_buff *skb) |
278 | { | 278 | { |
279 | #ifdef CONFIG_NET_CLS_ROUTE | 279 | #ifdef CONFIG_IP_ROUTE_CLASSID |
280 | if (skb_dst(skb)) | 280 | if (skb_dst(skb)) |
281 | return skb_dst(skb)->tclassid; | 281 | return skb_dst(skb)->tclassid; |
282 | #endif | 282 | #endif |
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 7af1f65fe678..a889d099320f 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c | |||
@@ -252,7 +252,7 @@ META_COLLECTOR(int_rtclassid) | |||
252 | if (unlikely(skb_dst(skb) == NULL)) | 252 | if (unlikely(skb_dst(skb) == NULL)) |
253 | *err = -1; | 253 | *err = -1; |
254 | else | 254 | else |
255 | #ifdef CONFIG_NET_CLS_ROUTE | 255 | #ifdef CONFIG_IP_ROUTE_CLASSID |
256 | dst->value = skb_dst(skb)->tclassid; | 256 | dst->value = skb_dst(skb)->tclassid; |
257 | #else | 257 | #else |
258 | dst->value = 0; | 258 | dst->value = 0; |