aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2011-03-15 16:03:27 -0400
committerDavid S. Miller <davem@davemloft.net>2011-03-15 16:03:27 -0400
commit31111c26d976ca0f298312f08e44cdb078005b03 (patch)
treeca08ef55b2ea91f9e69f8a78bb8b4363a1759b54
parent0c0217b016ba8a970a6f6ab62ad0d858f39881ca (diff)
parent2f5dc63123905a89d4260ab8ee08d19ec104db04 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6
Conflicts: Documentation/feature-removal-schedule.txt
-rw-r--r--Documentation/feature-removal-schedule.txt9
-rw-r--r--include/linux/netfilter/Kbuild1
-rw-r--r--include/linux/netfilter/xt_addrtype.h44
-rw-r--r--include/net/ip_vs.h198
-rw-r--r--include/net/net_namespace.h2
-rw-r--r--include/net/netns/ip_vs.h143
-rw-r--r--net/ipv4/netfilter/Kconfig10
-rw-r--r--net/ipv4/netfilter/Makefile1
-rw-r--r--net/ipv4/netfilter/arp_tables.c3
-rw-r--r--net/ipv4/netfilter/ip_tables.c3
-rw-r--r--net/ipv4/netfilter/ipt_addrtype.c134
-rw-r--r--net/ipv6/netfilter/ip6_tables.c3
-rw-r--r--net/netfilter/Kconfig11
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/ipset/ip_set_core.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c13
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c104
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c256
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c63
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c31
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c35
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c9
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c11
-rw-r--r--net/netfilter/nf_conntrack_core.c1
-rw-r--r--net/netfilter/x_tables.c26
-rw-r--r--net/netfilter/xt_addrtype.c229
-rw-r--r--net/netfilter/xt_connlimit.c59
27 files changed, 845 insertions, 557 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index d6f5255ca547..a9c4245e2fd8 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -637,3 +637,12 @@ Why: The original implementation of memsw feature enabled by
637Who: Michal Hocko <mhocko@suse.cz> 637Who: Michal Hocko <mhocko@suse.cz>
638 638
639---------------------------- 639----------------------------
640
641What: ipt_addrtype match include file
642When: 2012
643Why: superseded by xt_addrtype
644Who: Florian Westphal <fw@strlen.de>
645Files: include/linux/netfilter_ipv4/ipt_addrtype.h
646>>>>>>> 2f5dc63123905a89d4260ab8ee08d19ec104db04
647
648----------------------------
diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 15e83bf3dd58..a1b410c76fc3 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -29,6 +29,7 @@ header-y += xt_TCPMSS.h
29header-y += xt_TCPOPTSTRIP.h 29header-y += xt_TCPOPTSTRIP.h
30header-y += xt_TEE.h 30header-y += xt_TEE.h
31header-y += xt_TPROXY.h 31header-y += xt_TPROXY.h
32header-y += xt_addrtype.h
32header-y += xt_cluster.h 33header-y += xt_cluster.h
33header-y += xt_comment.h 34header-y += xt_comment.h
34header-y += xt_connbytes.h 35header-y += xt_connbytes.h
diff --git a/include/linux/netfilter/xt_addrtype.h b/include/linux/netfilter/xt_addrtype.h
new file mode 100644
index 000000000000..b156baa9d55e
--- /dev/null
+++ b/include/linux/netfilter/xt_addrtype.h
@@ -0,0 +1,44 @@
1#ifndef _XT_ADDRTYPE_H
2#define _XT_ADDRTYPE_H
3
4#include <linux/types.h>
5
6enum {
7 XT_ADDRTYPE_INVERT_SOURCE = 0x0001,
8 XT_ADDRTYPE_INVERT_DEST = 0x0002,
9 XT_ADDRTYPE_LIMIT_IFACE_IN = 0x0004,
10 XT_ADDRTYPE_LIMIT_IFACE_OUT = 0x0008,
11};
12
13
14/* rtn_type enum values from rtnetlink.h, but shifted */
15enum {
16 XT_ADDRTYPE_UNSPEC = 1 << 0,
17 XT_ADDRTYPE_UNICAST = 1 << 1, /* 1 << RTN_UNICAST */
18 XT_ADDRTYPE_LOCAL = 1 << 2, /* 1 << RTN_LOCAL, etc */
19 XT_ADDRTYPE_BROADCAST = 1 << 3,
20 XT_ADDRTYPE_ANYCAST = 1 << 4,
21 XT_ADDRTYPE_MULTICAST = 1 << 5,
22 XT_ADDRTYPE_BLACKHOLE = 1 << 6,
23 XT_ADDRTYPE_UNREACHABLE = 1 << 7,
24 XT_ADDRTYPE_PROHIBIT = 1 << 8,
25 XT_ADDRTYPE_THROW = 1 << 9,
26 XT_ADDRTYPE_NAT = 1 << 10,
27 XT_ADDRTYPE_XRESOLVE = 1 << 11,
28};
29
30struct xt_addrtype_info_v1 {
31 __u16 source; /* source-type mask */
32 __u16 dest; /* dest-type mask */
33 __u32 flags;
34};
35
36/* revision 0 */
37struct xt_addrtype_info {
38 __u16 source; /* source-type mask */
39 __u16 dest; /* dest-type mask */
40 __u32 invert_source;
41 __u32 invert_dest;
42};
43
44#endif
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index e74da41ebd1b..272f59336b73 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -374,24 +374,9 @@ struct ip_vs_stats {
374 struct ip_vs_estimator est; /* estimator */ 374 struct ip_vs_estimator est; /* estimator */
375 struct ip_vs_cpu_stats *cpustats; /* per cpu counters */ 375 struct ip_vs_cpu_stats *cpustats; /* per cpu counters */
376 spinlock_t lock; /* spin lock */ 376 spinlock_t lock; /* spin lock */
377 struct ip_vs_stats_user ustats0; /* reset values */
377}; 378};
378 379
379/*
380 * Helper Macros for per cpu
381 * ipvs->tot_stats->ustats.count
382 */
383#define IPVS_STAT_INC(ipvs, count) \
384 __this_cpu_inc((ipvs)->ustats->count)
385
386#define IPVS_STAT_ADD(ipvs, count, value) \
387 do {\
388 write_seqcount_begin(per_cpu_ptr((ipvs)->ustats_seq, \
389 raw_smp_processor_id())); \
390 __this_cpu_add((ipvs)->ustats->count, value); \
391 write_seqcount_end(per_cpu_ptr((ipvs)->ustats_seq, \
392 raw_smp_processor_id())); \
393 } while (0)
394
395struct dst_entry; 380struct dst_entry;
396struct iphdr; 381struct iphdr;
397struct ip_vs_conn; 382struct ip_vs_conn;
@@ -803,6 +788,171 @@ struct ip_vs_app {
803 void (*timeout_change)(struct ip_vs_app *app, int flags); 788 void (*timeout_change)(struct ip_vs_app *app, int flags);
804}; 789};
805 790
791/* IPVS in network namespace */
792struct netns_ipvs {
793 int gen; /* Generation */
794 /*
795 * Hash table: for real service lookups
796 */
797 #define IP_VS_RTAB_BITS 4
798 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
799 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
800
801 struct list_head rs_table[IP_VS_RTAB_SIZE];
802 /* ip_vs_app */
803 struct list_head app_list;
804 struct mutex app_mutex;
805 struct lock_class_key app_key; /* mutex debuging */
806
807 /* ip_vs_proto */
808 #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */
809 struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
810 /* ip_vs_proto_tcp */
811#ifdef CONFIG_IP_VS_PROTO_TCP
812 #define TCP_APP_TAB_BITS 4
813 #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
814 #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
815 struct list_head tcp_apps[TCP_APP_TAB_SIZE];
816 spinlock_t tcp_app_lock;
817#endif
818 /* ip_vs_proto_udp */
819#ifdef CONFIG_IP_VS_PROTO_UDP
820 #define UDP_APP_TAB_BITS 4
821 #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
822 #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
823 struct list_head udp_apps[UDP_APP_TAB_SIZE];
824 spinlock_t udp_app_lock;
825#endif
826 /* ip_vs_proto_sctp */
827#ifdef CONFIG_IP_VS_PROTO_SCTP
828 #define SCTP_APP_TAB_BITS 4
829 #define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS)
830 #define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1)
831 /* Hash table for SCTP application incarnations */
832 struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
833 spinlock_t sctp_app_lock;
834#endif
835 /* ip_vs_conn */
836 atomic_t conn_count; /* connection counter */
837
838 /* ip_vs_ctl */
839 struct ip_vs_stats tot_stats; /* Statistics & est. */
840
841 int num_services; /* no of virtual services */
842
843 rwlock_t rs_lock; /* real services table */
844 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
845 struct lock_class_key ctl_key; /* ctl_mutex debuging */
846 /* Trash for destinations */
847 struct list_head dest_trash;
848 /* Service counters */
849 atomic_t ftpsvc_counter;
850 atomic_t nullsvc_counter;
851
852#ifdef CONFIG_SYSCTL
853 /* 1/rate drop and drop-entry variables */
854 struct delayed_work defense_work; /* Work handler */
855 int drop_rate;
856 int drop_counter;
857 atomic_t dropentry;
858 /* locks in ctl.c */
859 spinlock_t dropentry_lock; /* drop entry handling */
860 spinlock_t droppacket_lock; /* drop packet handling */
861 spinlock_t securetcp_lock; /* state and timeout tables */
862
863 /* sys-ctl struct */
864 struct ctl_table_header *sysctl_hdr;
865 struct ctl_table *sysctl_tbl;
866#endif
867
868 /* sysctl variables */
869 int sysctl_amemthresh;
870 int sysctl_am_droprate;
871 int sysctl_drop_entry;
872 int sysctl_drop_packet;
873 int sysctl_secure_tcp;
874#ifdef CONFIG_IP_VS_NFCT
875 int sysctl_conntrack;
876#endif
877 int sysctl_snat_reroute;
878 int sysctl_sync_ver;
879 int sysctl_cache_bypass;
880 int sysctl_expire_nodest_conn;
881 int sysctl_expire_quiescent_template;
882 int sysctl_sync_threshold[2];
883 int sysctl_nat_icmp_send;
884
885 /* ip_vs_lblc */
886 int sysctl_lblc_expiration;
887 struct ctl_table_header *lblc_ctl_header;
888 struct ctl_table *lblc_ctl_table;
889 /* ip_vs_lblcr */
890 int sysctl_lblcr_expiration;
891 struct ctl_table_header *lblcr_ctl_header;
892 struct ctl_table *lblcr_ctl_table;
893 /* ip_vs_est */
894 struct list_head est_list; /* estimator list */
895 spinlock_t est_lock;
896 struct timer_list est_timer; /* Estimation timer */
897 /* ip_vs_sync */
898 struct list_head sync_queue;
899 spinlock_t sync_lock;
900 struct ip_vs_sync_buff *sync_buff;
901 spinlock_t sync_buff_lock;
902 struct sockaddr_in sync_mcast_addr;
903 struct task_struct *master_thread;
904 struct task_struct *backup_thread;
905 int send_mesg_maxlen;
906 int recv_mesg_maxlen;
907 volatile int sync_state;
908 volatile int master_syncid;
909 volatile int backup_syncid;
910 /* multicast interface name */
911 char master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
912 char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
913 /* net name space ptr */
914 struct net *net; /* Needed by timer routines */
915};
916
917#define DEFAULT_SYNC_THRESHOLD 3
918#define DEFAULT_SYNC_PERIOD 50
919#define DEFAULT_SYNC_VER 1
920
921#ifdef CONFIG_SYSCTL
922
923static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
924{
925 return ipvs->sysctl_sync_threshold[0];
926}
927
928static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
929{
930 return ipvs->sysctl_sync_threshold[1];
931}
932
933static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
934{
935 return ipvs->sysctl_sync_ver;
936}
937
938#else
939
940static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
941{
942 return DEFAULT_SYNC_THRESHOLD;
943}
944
945static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
946{
947 return DEFAULT_SYNC_PERIOD;
948}
949
950static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
951{
952 return DEFAULT_SYNC_VER;
953}
954
955#endif
806 956
807/* 957/*
808 * IPVS core functions 958 * IPVS core functions
@@ -1071,9 +1221,11 @@ extern void ip_vs_sync_cleanup(void);
1071 */ 1221 */
1072extern int ip_vs_estimator_init(void); 1222extern int ip_vs_estimator_init(void);
1073extern void ip_vs_estimator_cleanup(void); 1223extern void ip_vs_estimator_cleanup(void);
1074extern void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats); 1224extern void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats);
1075extern void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats); 1225extern void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats);
1076extern void ip_vs_zero_estimator(struct ip_vs_stats *stats); 1226extern void ip_vs_zero_estimator(struct ip_vs_stats *stats);
1227extern void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
1228 struct ip_vs_stats *stats);
1077 1229
1078/* 1230/*
1079 * Various IPVS packet transmitters (from ip_vs_xmit.c) 1231 * Various IPVS packet transmitters (from ip_vs_xmit.c)
@@ -1106,6 +1258,7 @@ extern int ip_vs_icmp_xmit_v6
1106 int offset); 1258 int offset);
1107#endif 1259#endif
1108 1260
1261#ifdef CONFIG_SYSCTL
1109/* 1262/*
1110 * This is a simple mechanism to ignore packets when 1263 * This is a simple mechanism to ignore packets when
1111 * we are loaded. Just set ip_vs_drop_rate to 'n' and 1264 * we are loaded. Just set ip_vs_drop_rate to 'n' and
@@ -1121,6 +1274,9 @@ static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
1121 ipvs->drop_counter = ipvs->drop_rate; 1274 ipvs->drop_counter = ipvs->drop_rate;
1122 return 1; 1275 return 1;
1123} 1276}
1277#else
1278static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; }
1279#endif
1124 1280
1125/* 1281/*
1126 * ip_vs_fwd_tag returns the forwarding tag of the connection 1282 * ip_vs_fwd_tag returns the forwarding tag of the connection
@@ -1190,7 +1346,7 @@ static inline void ip_vs_notrack(struct sk_buff *skb)
1190{ 1346{
1191#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 1347#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1192 enum ip_conntrack_info ctinfo; 1348 enum ip_conntrack_info ctinfo;
1193 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); 1349 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
1194 1350
1195 if (!ct || !nf_ct_is_untracked(ct)) { 1351 if (!ct || !nf_ct_is_untracked(ct)) {
1196 nf_reset(skb); 1352 nf_reset(skb);
@@ -1208,7 +1364,11 @@ static inline void ip_vs_notrack(struct sk_buff *skb)
1208 */ 1364 */
1209static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs) 1365static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
1210{ 1366{
1367#ifdef CONFIG_SYSCTL
1211 return ipvs->sysctl_conntrack; 1368 return ipvs->sysctl_conntrack;
1369#else
1370 return 0;
1371#endif
1212} 1372}
1213 1373
1214extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, 1374extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index b3b4a34cb2cc..3ae491932bc8 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -20,7 +20,6 @@
20#include <net/netns/conntrack.h> 20#include <net/netns/conntrack.h>
21#endif 21#endif
22#include <net/netns/xfrm.h> 22#include <net/netns/xfrm.h>
23#include <net/netns/ip_vs.h>
24 23
25struct proc_dir_entry; 24struct proc_dir_entry;
26struct net_device; 25struct net_device;
@@ -28,6 +27,7 @@ struct sock;
28struct ctl_table_header; 27struct ctl_table_header;
29struct net_generic; 28struct net_generic;
30struct sock; 29struct sock;
30struct netns_ipvs;
31 31
32 32
33#define NETDEV_HASHBITS 8 33#define NETDEV_HASHBITS 8
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
deleted file mode 100644
index 259ebac904bf..000000000000
--- a/include/net/netns/ip_vs.h
+++ /dev/null
@@ -1,143 +0,0 @@
1/*
2 * IP Virtual Server
3 * Data structure for network namspace
4 *
5 */
6
7#ifndef IP_VS_H_
8#define IP_VS_H_
9
10#include <linux/list.h>
11#include <linux/mutex.h>
12#include <linux/list_nulls.h>
13#include <linux/ip_vs.h>
14#include <asm/atomic.h>
15#include <linux/in.h>
16
17struct ip_vs_stats;
18struct ip_vs_sync_buff;
19struct ctl_table_header;
20
21struct netns_ipvs {
22 int gen; /* Generation */
23 /*
24 * Hash table: for real service lookups
25 */
26 #define IP_VS_RTAB_BITS 4
27 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
28 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
29
30 struct list_head rs_table[IP_VS_RTAB_SIZE];
31 /* ip_vs_app */
32 struct list_head app_list;
33 struct mutex app_mutex;
34 struct lock_class_key app_key; /* mutex debuging */
35
36 /* ip_vs_proto */
37 #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */
38 struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
39 /* ip_vs_proto_tcp */
40#ifdef CONFIG_IP_VS_PROTO_TCP
41 #define TCP_APP_TAB_BITS 4
42 #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
43 #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
44 struct list_head tcp_apps[TCP_APP_TAB_SIZE];
45 spinlock_t tcp_app_lock;
46#endif
47 /* ip_vs_proto_udp */
48#ifdef CONFIG_IP_VS_PROTO_UDP
49 #define UDP_APP_TAB_BITS 4
50 #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
51 #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
52 struct list_head udp_apps[UDP_APP_TAB_SIZE];
53 spinlock_t udp_app_lock;
54#endif
55 /* ip_vs_proto_sctp */
56#ifdef CONFIG_IP_VS_PROTO_SCTP
57 #define SCTP_APP_TAB_BITS 4
58 #define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS)
59 #define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1)
60 /* Hash table for SCTP application incarnations */
61 struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
62 spinlock_t sctp_app_lock;
63#endif
64 /* ip_vs_conn */
65 atomic_t conn_count; /* connection counter */
66
67 /* ip_vs_ctl */
68 struct ip_vs_stats *tot_stats; /* Statistics & est. */
69 struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */
70 seqcount_t *ustats_seq; /* u64 read retry */
71
72 int num_services; /* no of virtual services */
73 /* 1/rate drop and drop-entry variables */
74 struct delayed_work defense_work; /* Work handler */
75 int drop_rate;
76 int drop_counter;
77 atomic_t dropentry;
78 /* locks in ctl.c */
79 spinlock_t dropentry_lock; /* drop entry handling */
80 spinlock_t droppacket_lock; /* drop packet handling */
81 spinlock_t securetcp_lock; /* state and timeout tables */
82 rwlock_t rs_lock; /* real services table */
83 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
84 struct lock_class_key ctl_key; /* ctl_mutex debuging */
85 /* Trash for destinations */
86 struct list_head dest_trash;
87 /* Service counters */
88 atomic_t ftpsvc_counter;
89 atomic_t nullsvc_counter;
90
91 /* sys-ctl struct */
92 struct ctl_table_header *sysctl_hdr;
93 struct ctl_table *sysctl_tbl;
94 /* sysctl variables */
95 int sysctl_amemthresh;
96 int sysctl_am_droprate;
97 int sysctl_drop_entry;
98 int sysctl_drop_packet;
99 int sysctl_secure_tcp;
100#ifdef CONFIG_IP_VS_NFCT
101 int sysctl_conntrack;
102#endif
103 int sysctl_snat_reroute;
104 int sysctl_sync_ver;
105 int sysctl_cache_bypass;
106 int sysctl_expire_nodest_conn;
107 int sysctl_expire_quiescent_template;
108 int sysctl_sync_threshold[2];
109 int sysctl_nat_icmp_send;
110
111 /* ip_vs_lblc */
112 int sysctl_lblc_expiration;
113 struct ctl_table_header *lblc_ctl_header;
114 struct ctl_table *lblc_ctl_table;
115 /* ip_vs_lblcr */
116 int sysctl_lblcr_expiration;
117 struct ctl_table_header *lblcr_ctl_header;
118 struct ctl_table *lblcr_ctl_table;
119 /* ip_vs_est */
120 struct list_head est_list; /* estimator list */
121 spinlock_t est_lock;
122 struct timer_list est_timer; /* Estimation timer */
123 /* ip_vs_sync */
124 struct list_head sync_queue;
125 spinlock_t sync_lock;
126 struct ip_vs_sync_buff *sync_buff;
127 spinlock_t sync_buff_lock;
128 struct sockaddr_in sync_mcast_addr;
129 struct task_struct *master_thread;
130 struct task_struct *backup_thread;
131 int send_mesg_maxlen;
132 int recv_mesg_maxlen;
133 volatile int sync_state;
134 volatile int master_syncid;
135 volatile int backup_syncid;
136 /* multicast interface name */
137 char master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
138 char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
139 /* net name space ptr */
140 struct net *net; /* Needed by timer routines */
141};
142
143#endif /* IP_VS_H_ */
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index f926a310075d..1dfc18a03fd4 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -64,16 +64,6 @@ config IP_NF_IPTABLES
64if IP_NF_IPTABLES 64if IP_NF_IPTABLES
65 65
66# The matches. 66# The matches.
67config IP_NF_MATCH_ADDRTYPE
68 tristate '"addrtype" address type match support'
69 depends on NETFILTER_ADVANCED
70 help
71 This option allows you to match what routing thinks of an address,
72 eg. UNICAST, LOCAL, BROADCAST, ...
73
74 If you want to compile it as a module, say M here and read
75 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
76
77config IP_NF_MATCH_AH 67config IP_NF_MATCH_AH
78 tristate '"ah" match support' 68 tristate '"ah" match support'
79 depends on NETFILTER_ADVANCED 69 depends on NETFILTER_ADVANCED
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 19eb59d01037..dca2082ec683 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -48,7 +48,6 @@ obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
48obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o 48obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
49 49
50# matches 50# matches
51obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
52obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o 51obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
53obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o 52obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
54 53
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e95054c690c6..4b5d457c2d76 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1066,6 +1066,7 @@ static int do_replace(struct net *net, const void __user *user,
1066 /* overflow check */ 1066 /* overflow check */
1067 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) 1067 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1068 return -ENOMEM; 1068 return -ENOMEM;
1069 tmp.name[sizeof(tmp.name)-1] = 0;
1069 1070
1070 newinfo = xt_alloc_table_info(tmp.size); 1071 newinfo = xt_alloc_table_info(tmp.size);
1071 if (!newinfo) 1072 if (!newinfo)
@@ -1488,6 +1489,7 @@ static int compat_do_replace(struct net *net, void __user *user,
1488 return -ENOMEM; 1489 return -ENOMEM;
1489 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) 1490 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1490 return -ENOMEM; 1491 return -ENOMEM;
1492 tmp.name[sizeof(tmp.name)-1] = 0;
1491 1493
1492 newinfo = xt_alloc_table_info(tmp.size); 1494 newinfo = xt_alloc_table_info(tmp.size);
1493 if (!newinfo) 1495 if (!newinfo)
@@ -1740,6 +1742,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
1740 ret = -EFAULT; 1742 ret = -EFAULT;
1741 break; 1743 break;
1742 } 1744 }
1745 rev.name[sizeof(rev.name)-1] = 0;
1743 1746
1744 try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name, 1747 try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name,
1745 rev.revision, 1, &ret), 1748 rev.revision, 1, &ret),
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index ef7d7b9680ea..b09ed0d080f9 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1262,6 +1262,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
1262 /* overflow check */ 1262 /* overflow check */
1263 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) 1263 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1264 return -ENOMEM; 1264 return -ENOMEM;
1265 tmp.name[sizeof(tmp.name)-1] = 0;
1265 1266
1266 newinfo = xt_alloc_table_info(tmp.size); 1267 newinfo = xt_alloc_table_info(tmp.size);
1267 if (!newinfo) 1268 if (!newinfo)
@@ -1807,6 +1808,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
1807 return -ENOMEM; 1808 return -ENOMEM;
1808 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) 1809 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1809 return -ENOMEM; 1810 return -ENOMEM;
1811 tmp.name[sizeof(tmp.name)-1] = 0;
1810 1812
1811 newinfo = xt_alloc_table_info(tmp.size); 1813 newinfo = xt_alloc_table_info(tmp.size);
1812 if (!newinfo) 1814 if (!newinfo)
@@ -2036,6 +2038,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2036 ret = -EFAULT; 2038 ret = -EFAULT;
2037 break; 2039 break;
2038 } 2040 }
2041 rev.name[sizeof(rev.name)-1] = 0;
2039 2042
2040 if (cmd == IPT_SO_GET_REVISION_TARGET) 2043 if (cmd == IPT_SO_GET_REVISION_TARGET)
2041 target = 1; 2044 target = 1;
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
deleted file mode 100644
index db8bff0fb86d..000000000000
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ /dev/null
@@ -1,134 +0,0 @@
1/*
2 * iptables module to match inet_addr_type() of an ip.
3 *
4 * Copyright (c) 2004 Patrick McHardy <kaber@trash.net>
5 * (C) 2007 Laszlo Attila Toth <panther@balabit.hu>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/netdevice.h>
16#include <linux/ip.h>
17#include <net/route.h>
18
19#include <linux/netfilter_ipv4/ipt_addrtype.h>
20#include <linux/netfilter/x_tables.h>
21
22MODULE_LICENSE("GPL");
23MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
24MODULE_DESCRIPTION("Xtables: address type match for IPv4");
25
26static inline bool match_type(struct net *net, const struct net_device *dev,
27 __be32 addr, u_int16_t mask)
28{
29 return !!(mask & (1 << inet_dev_addr_type(net, dev, addr)));
30}
31
32static bool
33addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
34{
35 struct net *net = dev_net(par->in ? par->in : par->out);
36 const struct ipt_addrtype_info *info = par->matchinfo;
37 const struct iphdr *iph = ip_hdr(skb);
38 bool ret = true;
39
40 if (info->source)
41 ret &= match_type(net, NULL, iph->saddr, info->source) ^
42 info->invert_source;
43 if (info->dest)
44 ret &= match_type(net, NULL, iph->daddr, info->dest) ^
45 info->invert_dest;
46
47 return ret;
48}
49
50static bool
51addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
52{
53 struct net *net = dev_net(par->in ? par->in : par->out);
54 const struct ipt_addrtype_info_v1 *info = par->matchinfo;
55 const struct iphdr *iph = ip_hdr(skb);
56 const struct net_device *dev = NULL;
57 bool ret = true;
58
59 if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN)
60 dev = par->in;
61 else if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT)
62 dev = par->out;
63
64 if (info->source)
65 ret &= match_type(net, dev, iph->saddr, info->source) ^
66 (info->flags & IPT_ADDRTYPE_INVERT_SOURCE);
67 if (ret && info->dest)
68 ret &= match_type(net, dev, iph->daddr, info->dest) ^
69 !!(info->flags & IPT_ADDRTYPE_INVERT_DEST);
70 return ret;
71}
72
73static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
74{
75 struct ipt_addrtype_info_v1 *info = par->matchinfo;
76
77 if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN &&
78 info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
79 pr_info("both incoming and outgoing "
80 "interface limitation cannot be selected\n");
81 return -EINVAL;
82 }
83
84 if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
85 (1 << NF_INET_LOCAL_IN)) &&
86 info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
87 pr_info("output interface limitation "
88 "not valid in PREROUTING and INPUT\n");
89 return -EINVAL;
90 }
91
92 if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
93 (1 << NF_INET_LOCAL_OUT)) &&
94 info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) {
95 pr_info("input interface limitation "
96 "not valid in POSTROUTING and OUTPUT\n");
97 return -EINVAL;
98 }
99
100 return 0;
101}
102
103static struct xt_match addrtype_mt_reg[] __read_mostly = {
104 {
105 .name = "addrtype",
106 .family = NFPROTO_IPV4,
107 .match = addrtype_mt_v0,
108 .matchsize = sizeof(struct ipt_addrtype_info),
109 .me = THIS_MODULE
110 },
111 {
112 .name = "addrtype",
113 .family = NFPROTO_IPV4,
114 .revision = 1,
115 .match = addrtype_mt_v1,
116 .checkentry = addrtype_mt_checkentry_v1,
117 .matchsize = sizeof(struct ipt_addrtype_info_v1),
118 .me = THIS_MODULE
119 }
120};
121
122static int __init addrtype_mt_init(void)
123{
124 return xt_register_matches(addrtype_mt_reg,
125 ARRAY_SIZE(addrtype_mt_reg));
126}
127
128static void __exit addrtype_mt_exit(void)
129{
130 xt_unregister_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg));
131}
132
133module_init(addrtype_mt_init);
134module_exit(addrtype_mt_exit);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 47b7b8df7fac..c9598a9067d7 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1275,6 +1275,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
1275 /* overflow check */ 1275 /* overflow check */
1276 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) 1276 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1277 return -ENOMEM; 1277 return -ENOMEM;
1278 tmp.name[sizeof(tmp.name)-1] = 0;
1278 1279
1279 newinfo = xt_alloc_table_info(tmp.size); 1280 newinfo = xt_alloc_table_info(tmp.size);
1280 if (!newinfo) 1281 if (!newinfo)
@@ -1822,6 +1823,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
1822 return -ENOMEM; 1823 return -ENOMEM;
1823 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) 1824 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1824 return -ENOMEM; 1825 return -ENOMEM;
1826 tmp.name[sizeof(tmp.name)-1] = 0;
1825 1827
1826 newinfo = xt_alloc_table_info(tmp.size); 1828 newinfo = xt_alloc_table_info(tmp.size);
1827 if (!newinfo) 1829 if (!newinfo)
@@ -2051,6 +2053,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2051 ret = -EFAULT; 2053 ret = -EFAULT;
2052 break; 2054 break;
2053 } 2055 }
2056 rev.name[sizeof(rev.name)-1] = 0;
2054 2057
2055 if (cmd == IP6T_SO_GET_REVISION_TARGET) 2058 if (cmd == IP6T_SO_GET_REVISION_TARGET)
2056 target = 1; 2059 target = 1;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 82a6e0d80f05..c3f988aa1152 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -649,6 +649,17 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP
649 649
650comment "Xtables matches" 650comment "Xtables matches"
651 651
652config NETFILTER_XT_MATCH_ADDRTYPE
653 tristate '"addrtype" address type match support'
654 depends on NETFILTER_ADVANCED
655 depends on (IPV6 || IPV6=n)
656 ---help---
657 This option allows you to match what routing thinks of an address,
658 eg. UNICAST, LOCAL, BROADCAST, ...
659
660 If you want to compile it as a module, say M here and read
661 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
662
652config NETFILTER_XT_MATCH_CLUSTER 663config NETFILTER_XT_MATCH_CLUSTER
653 tristate '"cluster" match support' 664 tristate '"cluster" match support'
654 depends on NF_CONNTRACK 665 depends on NF_CONNTRACK
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index d57a890eaee5..1a02853df863 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -70,6 +70,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
70obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o 70obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o
71 71
72# matches 72# matches
73obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o
73obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o 74obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
74obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o 75obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
75obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o 76obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 8b1a54c1e400..618a615acc9d 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -612,7 +612,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
612 const struct nlmsghdr *nlh, 612 const struct nlmsghdr *nlh,
613 const struct nlattr * const attr[]) 613 const struct nlattr * const attr[])
614{ 614{
615 struct ip_set *set, *clash; 615 struct ip_set *set, *clash = NULL;
616 ip_set_id_t index = IPSET_INVALID_ID; 616 ip_set_id_t index = IPSET_INVALID_ID;
617 struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {}; 617 struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
618 const char *name, *typename; 618 const char *name, *typename;
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 9c2a517b69c8..f289306cbf12 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -680,6 +680,16 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
680 atomic_dec(&dest->refcnt); 680 atomic_dec(&dest->refcnt);
681} 681}
682 682
683static int expire_quiescent_template(struct netns_ipvs *ipvs,
684 struct ip_vs_dest *dest)
685{
686#ifdef CONFIG_SYSCTL
687 return ipvs->sysctl_expire_quiescent_template &&
688 (atomic_read(&dest->weight) == 0);
689#else
690 return 0;
691#endif
692}
683 693
684/* 694/*
685 * Checking if the destination of a connection template is available. 695 * Checking if the destination of a connection template is available.
@@ -696,8 +706,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
696 */ 706 */
697 if ((dest == NULL) || 707 if ((dest == NULL) ||
698 !(dest->flags & IP_VS_DEST_F_AVAILABLE) || 708 !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
699 (ipvs->sysctl_expire_quiescent_template && 709 expire_quiescent_template(ipvs, dest)) {
700 (atomic_read(&dest->weight) == 0))) {
701 IP_VS_DBG_BUF(9, "check_template: dest not available for " 710 IP_VS_DBG_BUF(9, "check_template: dest not available for "
702 "protocol %s s:%s:%d v:%s:%d " 711 "protocol %s s:%s:%d v:%s:%d "
703 "-> d:%s:%d\n", 712 "-> d:%s:%d\n",
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 2d1f932add46..07accf6b2401 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -132,7 +132,7 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
132 s->ustats.inbytes += skb->len; 132 s->ustats.inbytes += skb->len;
133 u64_stats_update_end(&s->syncp); 133 u64_stats_update_end(&s->syncp);
134 134
135 s = this_cpu_ptr(ipvs->cpustats); 135 s = this_cpu_ptr(ipvs->tot_stats.cpustats);
136 s->ustats.inpkts++; 136 s->ustats.inpkts++;
137 u64_stats_update_begin(&s->syncp); 137 u64_stats_update_begin(&s->syncp);
138 s->ustats.inbytes += skb->len; 138 s->ustats.inbytes += skb->len;
@@ -162,7 +162,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
162 s->ustats.outbytes += skb->len; 162 s->ustats.outbytes += skb->len;
163 u64_stats_update_end(&s->syncp); 163 u64_stats_update_end(&s->syncp);
164 164
165 s = this_cpu_ptr(ipvs->cpustats); 165 s = this_cpu_ptr(ipvs->tot_stats.cpustats);
166 s->ustats.outpkts++; 166 s->ustats.outpkts++;
167 u64_stats_update_begin(&s->syncp); 167 u64_stats_update_begin(&s->syncp);
168 s->ustats.outbytes += skb->len; 168 s->ustats.outbytes += skb->len;
@@ -183,7 +183,7 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
183 s = this_cpu_ptr(svc->stats.cpustats); 183 s = this_cpu_ptr(svc->stats.cpustats);
184 s->ustats.conns++; 184 s->ustats.conns++;
185 185
186 s = this_cpu_ptr(ipvs->cpustats); 186 s = this_cpu_ptr(ipvs->tot_stats.cpustats);
187 s->ustats.conns++; 187 s->ustats.conns++;
188} 188}
189 189
@@ -499,11 +499,13 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
499int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, 499int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
500 struct ip_vs_proto_data *pd) 500 struct ip_vs_proto_data *pd)
501{ 501{
502 struct net *net;
503 struct netns_ipvs *ipvs;
504 __be16 _ports[2], *pptr; 502 __be16 _ports[2], *pptr;
505 struct ip_vs_iphdr iph; 503 struct ip_vs_iphdr iph;
504#ifdef CONFIG_SYSCTL
505 struct net *net;
506 struct netns_ipvs *ipvs;
506 int unicast; 507 int unicast;
508#endif
507 509
508 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 510 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
509 511
@@ -512,6 +514,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
512 ip_vs_service_put(svc); 514 ip_vs_service_put(svc);
513 return NF_DROP; 515 return NF_DROP;
514 } 516 }
517
518#ifdef CONFIG_SYSCTL
515 net = skb_net(skb); 519 net = skb_net(skb);
516 520
517#ifdef CONFIG_IP_VS_IPV6 521#ifdef CONFIG_IP_VS_IPV6
@@ -563,6 +567,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
563 ip_vs_conn_put(cp); 567 ip_vs_conn_put(cp);
564 return ret; 568 return ret;
565 } 569 }
570#endif
566 571
567 /* 572 /*
568 * When the virtual ftp service is presented, packets destined 573 * When the virtual ftp service is presented, packets destined
@@ -599,6 +604,33 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
599 return NF_DROP; 604 return NF_DROP;
600} 605}
601 606
607#ifdef CONFIG_SYSCTL
608
609static int sysctl_snat_reroute(struct sk_buff *skb)
610{
611 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
612 return ipvs->sysctl_snat_reroute;
613}
614
615static int sysctl_nat_icmp_send(struct net *net)
616{
617 struct netns_ipvs *ipvs = net_ipvs(net);
618 return ipvs->sysctl_nat_icmp_send;
619}
620
621static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs)
622{
623 return ipvs->sysctl_expire_nodest_conn;
624}
625
626#else
627
628static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; }
629static int sysctl_nat_icmp_send(struct net *net) { return 0; }
630static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) { return 0; }
631
632#endif
633
602__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) 634__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
603{ 635{
604 return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); 636 return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
@@ -631,6 +663,22 @@ static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
631} 663}
632#endif 664#endif
633 665
666static int ip_vs_route_me_harder(int af, struct sk_buff *skb)
667{
668#ifdef CONFIG_IP_VS_IPV6
669 if (af == AF_INET6) {
670 if (sysctl_snat_reroute(skb) && ip6_route_me_harder(skb) != 0)
671 return 1;
672 } else
673#endif
674 if ((sysctl_snat_reroute(skb) ||
675 skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
676 ip_route_me_harder(skb, RTN_LOCAL) != 0)
677 return 1;
678
679 return 0;
680}
681
634/* 682/*
635 * Packet has been made sufficiently writable in caller 683 * Packet has been made sufficiently writable in caller
636 * - inout: 1=in->out, 0=out->in 684 * - inout: 1=in->out, 0=out->in
@@ -737,7 +785,6 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
737 struct ip_vs_protocol *pp, 785 struct ip_vs_protocol *pp,
738 unsigned int offset, unsigned int ihl) 786 unsigned int offset, unsigned int ihl)
739{ 787{
740 struct netns_ipvs *ipvs;
741 unsigned int verdict = NF_DROP; 788 unsigned int verdict = NF_DROP;
742 789
743 if (IP_VS_FWD_METHOD(cp) != 0) { 790 if (IP_VS_FWD_METHOD(cp) != 0) {
@@ -759,8 +806,6 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
759 if (!skb_make_writable(skb, offset)) 806 if (!skb_make_writable(skb, offset))
760 goto out; 807 goto out;
761 808
762 ipvs = net_ipvs(skb_net(skb));
763
764#ifdef CONFIG_IP_VS_IPV6 809#ifdef CONFIG_IP_VS_IPV6
765 if (af == AF_INET6) 810 if (af == AF_INET6)
766 ip_vs_nat_icmp_v6(skb, pp, cp, 1); 811 ip_vs_nat_icmp_v6(skb, pp, cp, 1);
@@ -768,16 +813,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
768#endif 813#endif
769 ip_vs_nat_icmp(skb, pp, cp, 1); 814 ip_vs_nat_icmp(skb, pp, cp, 1);
770 815
771#ifdef CONFIG_IP_VS_IPV6 816 if (ip_vs_route_me_harder(af, skb))
772 if (af == AF_INET6) { 817 goto out;
773 if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
774 goto out;
775 } else
776#endif
777 if ((ipvs->sysctl_snat_reroute ||
778 skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
779 ip_route_me_harder(skb, RTN_LOCAL) != 0)
780 goto out;
781 818
782 /* do the statistics and put it back */ 819 /* do the statistics and put it back */
783 ip_vs_out_stats(cp, skb); 820 ip_vs_out_stats(cp, skb);
@@ -985,7 +1022,6 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
985 struct ip_vs_conn *cp, int ihl) 1022 struct ip_vs_conn *cp, int ihl)
986{ 1023{
987 struct ip_vs_protocol *pp = pd->pp; 1024 struct ip_vs_protocol *pp = pd->pp;
988 struct netns_ipvs *ipvs;
989 1025
990 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); 1026 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
991 1027
@@ -1021,18 +1057,8 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
1021 * if it came from this machine itself. So re-compute 1057 * if it came from this machine itself. So re-compute
1022 * the routing information. 1058 * the routing information.
1023 */ 1059 */
1024 ipvs = net_ipvs(skb_net(skb)); 1060 if (ip_vs_route_me_harder(af, skb))
1025 1061 goto drop;
1026#ifdef CONFIG_IP_VS_IPV6
1027 if (af == AF_INET6) {
1028 if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
1029 goto drop;
1030 } else
1031#endif
1032 if ((ipvs->sysctl_snat_reroute ||
1033 skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
1034 ip_route_me_harder(skb, RTN_LOCAL) != 0)
1035 goto drop;
1036 1062
1037 IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); 1063 IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
1038 1064
@@ -1066,7 +1092,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1066 struct ip_vs_protocol *pp; 1092 struct ip_vs_protocol *pp;
1067 struct ip_vs_proto_data *pd; 1093 struct ip_vs_proto_data *pd;
1068 struct ip_vs_conn *cp; 1094 struct ip_vs_conn *cp;
1069 struct netns_ipvs *ipvs;
1070 1095
1071 EnterFunction(11); 1096 EnterFunction(11);
1072 1097
@@ -1141,11 +1166,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1141 * Check if the packet belongs to an existing entry 1166 * Check if the packet belongs to an existing entry
1142 */ 1167 */
1143 cp = pp->conn_out_get(af, skb, &iph, iph.len, 0); 1168 cp = pp->conn_out_get(af, skb, &iph, iph.len, 0);
1144 ipvs = net_ipvs(net);
1145 1169
1146 if (likely(cp)) 1170 if (likely(cp))
1147 return handle_response(af, skb, pd, cp, iph.len); 1171 return handle_response(af, skb, pd, cp, iph.len);
1148 if (ipvs->sysctl_nat_icmp_send && 1172 if (sysctl_nat_icmp_send(net) &&
1149 (pp->protocol == IPPROTO_TCP || 1173 (pp->protocol == IPPROTO_TCP ||
1150 pp->protocol == IPPROTO_UDP || 1174 pp->protocol == IPPROTO_UDP ||
1151 pp->protocol == IPPROTO_SCTP)) { 1175 pp->protocol == IPPROTO_SCTP)) {
@@ -1570,7 +1594,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1570 if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { 1594 if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
1571 /* the destination server is not available */ 1595 /* the destination server is not available */
1572 1596
1573 if (ipvs->sysctl_expire_nodest_conn) { 1597 if (sysctl_expire_nodest_conn(ipvs)) {
1574 /* try to expire the connection immediately */ 1598 /* try to expire the connection immediately */
1575 ip_vs_conn_expire_now(cp); 1599 ip_vs_conn_expire_now(cp);
1576 } 1600 }
@@ -1600,15 +1624,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1600 */ 1624 */
1601 1625
1602 if (cp->flags & IP_VS_CONN_F_ONE_PACKET) 1626 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
1603 pkts = ipvs->sysctl_sync_threshold[0]; 1627 pkts = sysctl_sync_threshold(ipvs);
1604 else 1628 else
1605 pkts = atomic_add_return(1, &cp->in_pkts); 1629 pkts = atomic_add_return(1, &cp->in_pkts);
1606 1630
1607 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && 1631 if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
1608 cp->protocol == IPPROTO_SCTP) { 1632 cp->protocol == IPPROTO_SCTP) {
1609 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && 1633 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
1610 (pkts % ipvs->sysctl_sync_threshold[1] 1634 (pkts % sysctl_sync_period(ipvs)
1611 == ipvs->sysctl_sync_threshold[0])) || 1635 == sysctl_sync_threshold(ipvs))) ||
1612 (cp->old_state != cp->state && 1636 (cp->old_state != cp->state &&
1613 ((cp->state == IP_VS_SCTP_S_CLOSED) || 1637 ((cp->state == IP_VS_SCTP_S_CLOSED) ||
1614 (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) || 1638 (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
@@ -1622,8 +1646,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1622 else if ((ipvs->sync_state & IP_VS_STATE_MASTER) && 1646 else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
1623 (((cp->protocol != IPPROTO_TCP || 1647 (((cp->protocol != IPPROTO_TCP ||
1624 cp->state == IP_VS_TCP_S_ESTABLISHED) && 1648 cp->state == IP_VS_TCP_S_ESTABLISHED) &&
1625 (pkts % ipvs->sysctl_sync_threshold[1] 1649 (pkts % sysctl_sync_period(ipvs)
1626 == ipvs->sysctl_sync_threshold[0])) || 1650 == sysctl_sync_threshold(ipvs))) ||
1627 ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && 1651 ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
1628 ((cp->state == IP_VS_TCP_S_FIN_WAIT) || 1652 ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
1629 (cp->state == IP_VS_TCP_S_CLOSE) || 1653 (cp->state == IP_VS_TCP_S_CLOSE) ||
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index a60b20fa142e..b799cea31f95 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -86,6 +86,8 @@ static int __ip_vs_addr_is_local_v6(struct net *net,
86 return 0; 86 return 0;
87} 87}
88#endif 88#endif
89
90#ifdef CONFIG_SYSCTL
89/* 91/*
90 * update_defense_level is called from keventd and from sysctl, 92 * update_defense_level is called from keventd and from sysctl,
91 * so it needs to protect itself from softirqs 93 * so it needs to protect itself from softirqs
@@ -227,6 +229,7 @@ static void defense_work_handler(struct work_struct *work)
227 ip_vs_random_dropentry(ipvs->net); 229 ip_vs_random_dropentry(ipvs->net);
228 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); 230 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
229} 231}
232#endif
230 233
231int 234int
232ip_vs_use_count_inc(void) 235ip_vs_use_count_inc(void)
@@ -409,9 +412,11 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
409 /* 412 /*
410 * Check the table hashed by fwmark first 413 * Check the table hashed by fwmark first
411 */ 414 */
412 svc = __ip_vs_svc_fwm_find(net, af, fwmark); 415 if (fwmark) {
413 if (fwmark && svc) 416 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
414 goto out; 417 if (svc)
418 goto out;
419 }
415 420
416 /* 421 /*
417 * Check the table hashed by <protocol,addr,port> 422 * Check the table hashed by <protocol,addr,port>
@@ -707,13 +712,39 @@ static void ip_vs_trash_cleanup(struct net *net)
707 } 712 }
708} 713}
709 714
715static void
716ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
717{
718#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
719
720 spin_lock_bh(&src->lock);
721
722 IP_VS_SHOW_STATS_COUNTER(conns);
723 IP_VS_SHOW_STATS_COUNTER(inpkts);
724 IP_VS_SHOW_STATS_COUNTER(outpkts);
725 IP_VS_SHOW_STATS_COUNTER(inbytes);
726 IP_VS_SHOW_STATS_COUNTER(outbytes);
727
728 ip_vs_read_estimator(dst, src);
729
730 spin_unlock_bh(&src->lock);
731}
710 732
711static void 733static void
712ip_vs_zero_stats(struct ip_vs_stats *stats) 734ip_vs_zero_stats(struct ip_vs_stats *stats)
713{ 735{
714 spin_lock_bh(&stats->lock); 736 spin_lock_bh(&stats->lock);
715 737
716 memset(&stats->ustats, 0, sizeof(stats->ustats)); 738 /* get current counters as zero point, rates are zeroed */
739
740#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
741
742 IP_VS_ZERO_STATS_COUNTER(conns);
743 IP_VS_ZERO_STATS_COUNTER(inpkts);
744 IP_VS_ZERO_STATS_COUNTER(outpkts);
745 IP_VS_ZERO_STATS_COUNTER(inbytes);
746 IP_VS_ZERO_STATS_COUNTER(outbytes);
747
717 ip_vs_zero_estimator(stats); 748 ip_vs_zero_estimator(stats);
718 749
719 spin_unlock_bh(&stats->lock); 750 spin_unlock_bh(&stats->lock);
@@ -772,7 +803,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
772 spin_unlock_bh(&dest->dst_lock); 803 spin_unlock_bh(&dest->dst_lock);
773 804
774 if (add) 805 if (add)
775 ip_vs_new_estimator(svc->net, &dest->stats); 806 ip_vs_start_estimator(svc->net, &dest->stats);
776 807
777 write_lock_bh(&__ip_vs_svc_lock); 808 write_lock_bh(&__ip_vs_svc_lock);
778 809
@@ -978,7 +1009,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
978{ 1009{
979 struct netns_ipvs *ipvs = net_ipvs(net); 1010 struct netns_ipvs *ipvs = net_ipvs(net);
980 1011
981 ip_vs_kill_estimator(net, &dest->stats); 1012 ip_vs_stop_estimator(net, &dest->stats);
982 1013
983 /* 1014 /*
984 * Remove it from the d-linked list with the real services. 1015 * Remove it from the d-linked list with the real services.
@@ -1171,7 +1202,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1171 else if (svc->port == 0) 1202 else if (svc->port == 0)
1172 atomic_inc(&ipvs->nullsvc_counter); 1203 atomic_inc(&ipvs->nullsvc_counter);
1173 1204
1174 ip_vs_new_estimator(net, &svc->stats); 1205 ip_vs_start_estimator(net, &svc->stats);
1175 1206
1176 /* Count only IPv4 services for old get/setsockopt interface */ 1207 /* Count only IPv4 services for old get/setsockopt interface */
1177 if (svc->af == AF_INET) 1208 if (svc->af == AF_INET)
@@ -1323,7 +1354,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1323 if (svc->af == AF_INET) 1354 if (svc->af == AF_INET)
1324 ipvs->num_services--; 1355 ipvs->num_services--;
1325 1356
1326 ip_vs_kill_estimator(svc->net, &svc->stats); 1357 ip_vs_stop_estimator(svc->net, &svc->stats);
1327 1358
1328 /* Unbind scheduler */ 1359 /* Unbind scheduler */
1329 old_sched = svc->scheduler; 1360 old_sched = svc->scheduler;
@@ -1477,11 +1508,11 @@ static int ip_vs_zero_all(struct net *net)
1477 } 1508 }
1478 } 1509 }
1479 1510
1480 ip_vs_zero_stats(net_ipvs(net)->tot_stats); 1511 ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1481 return 0; 1512 return 0;
1482} 1513}
1483 1514
1484 1515#ifdef CONFIG_SYSCTL
1485static int 1516static int
1486proc_do_defense_mode(ctl_table *table, int write, 1517proc_do_defense_mode(ctl_table *table, int write,
1487 void __user *buffer, size_t *lenp, loff_t *ppos) 1518 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -1503,7 +1534,6 @@ proc_do_defense_mode(ctl_table *table, int write,
1503 return rc; 1534 return rc;
1504} 1535}
1505 1536
1506
1507static int 1537static int
1508proc_do_sync_threshold(ctl_table *table, int write, 1538proc_do_sync_threshold(ctl_table *table, int write,
1509 void __user *buffer, size_t *lenp, loff_t *ppos) 1539 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -1737,6 +1767,7 @@ const struct ctl_path net_vs_ctl_path[] = {
1737 { } 1767 { }
1738}; 1768};
1739EXPORT_SYMBOL_GPL(net_vs_ctl_path); 1769EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1770#endif
1740 1771
1741#ifdef CONFIG_PROC_FS 1772#ifdef CONFIG_PROC_FS
1742 1773
@@ -1959,7 +1990,7 @@ static const struct file_operations ip_vs_info_fops = {
1959static int ip_vs_stats_show(struct seq_file *seq, void *v) 1990static int ip_vs_stats_show(struct seq_file *seq, void *v)
1960{ 1991{
1961 struct net *net = seq_file_single_net(seq); 1992 struct net *net = seq_file_single_net(seq);
1962 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats; 1993 struct ip_vs_stats_user show;
1963 1994
1964/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 1995/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1965 seq_puts(seq, 1996 seq_puts(seq,
@@ -1967,22 +1998,18 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
1967 seq_printf(seq, 1998 seq_printf(seq,
1968 " Conns Packets Packets Bytes Bytes\n"); 1999 " Conns Packets Packets Bytes Bytes\n");
1969 2000
1970 spin_lock_bh(&tot_stats->lock); 2001 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
1971 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns, 2002 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
1972 tot_stats->ustats.inpkts, tot_stats->ustats.outpkts, 2003 show.inpkts, show.outpkts,
1973 (unsigned long long) tot_stats->ustats.inbytes, 2004 (unsigned long long) show.inbytes,
1974 (unsigned long long) tot_stats->ustats.outbytes); 2005 (unsigned long long) show.outbytes);
1975 2006
1976/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2007/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1977 seq_puts(seq, 2008 seq_puts(seq,
1978 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 2009 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1979 seq_printf(seq,"%8X %8X %8X %16X %16X\n", 2010 seq_printf(seq, "%8X %8X %8X %16X %16X\n",
1980 tot_stats->ustats.cps, 2011 show.cps, show.inpps, show.outpps,
1981 tot_stats->ustats.inpps, 2012 show.inbps, show.outbps);
1982 tot_stats->ustats.outpps,
1983 tot_stats->ustats.inbps,
1984 tot_stats->ustats.outbps);
1985 spin_unlock_bh(&tot_stats->lock);
1986 2013
1987 return 0; 2014 return 0;
1988} 2015}
@@ -2003,7 +2030,9 @@ static const struct file_operations ip_vs_stats_fops = {
2003static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) 2030static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2004{ 2031{
2005 struct net *net = seq_file_single_net(seq); 2032 struct net *net = seq_file_single_net(seq);
2006 struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats; 2033 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2034 struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
2035 struct ip_vs_stats_user rates;
2007 int i; 2036 int i;
2008 2037
2009/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2038/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
@@ -2013,30 +2042,43 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2013 "CPU Conns Packets Packets Bytes Bytes\n"); 2042 "CPU Conns Packets Packets Bytes Bytes\n");
2014 2043
2015 for_each_possible_cpu(i) { 2044 for_each_possible_cpu(i) {
2016 struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i); 2045 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2046 unsigned int start;
2047 __u64 inbytes, outbytes;
2048
2049 do {
2050 start = u64_stats_fetch_begin_bh(&u->syncp);
2051 inbytes = u->ustats.inbytes;
2052 outbytes = u->ustats.outbytes;
2053 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2054
2017 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n", 2055 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2018 i, u->ustats.conns, u->ustats.inpkts, 2056 i, u->ustats.conns, u->ustats.inpkts,
2019 u->ustats.outpkts, (__u64)u->ustats.inbytes, 2057 u->ustats.outpkts, (__u64)inbytes,
2020 (__u64)u->ustats.outbytes); 2058 (__u64)outbytes);
2021 } 2059 }
2022 2060
2023 spin_lock_bh(&tot_stats->lock); 2061 spin_lock_bh(&tot_stats->lock);
2062
2024 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n", 2063 seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2025 tot_stats->ustats.conns, tot_stats->ustats.inpkts, 2064 tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2026 tot_stats->ustats.outpkts, 2065 tot_stats->ustats.outpkts,
2027 (unsigned long long) tot_stats->ustats.inbytes, 2066 (unsigned long long) tot_stats->ustats.inbytes,
2028 (unsigned long long) tot_stats->ustats.outbytes); 2067 (unsigned long long) tot_stats->ustats.outbytes);
2029 2068
2069 ip_vs_read_estimator(&rates, tot_stats);
2070
2071 spin_unlock_bh(&tot_stats->lock);
2072
2030/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2073/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2031 seq_puts(seq, 2074 seq_puts(seq,
2032 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 2075 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2033 seq_printf(seq, " %8X %8X %8X %16X %16X\n", 2076 seq_printf(seq, " %8X %8X %8X %16X %16X\n",
2034 tot_stats->ustats.cps, 2077 rates.cps,
2035 tot_stats->ustats.inpps, 2078 rates.inpps,
2036 tot_stats->ustats.outpps, 2079 rates.outpps,
2037 tot_stats->ustats.inbps, 2080 rates.inbps,
2038 tot_stats->ustats.outbps); 2081 rates.outbps);
2039 spin_unlock_bh(&tot_stats->lock);
2040 2082
2041 return 0; 2083 return 0;
2042} 2084}
@@ -2284,14 +2326,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2284 2326
2285 2327
2286static void 2328static void
2287ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2288{
2289 spin_lock_bh(&src->lock);
2290 memcpy(dst, &src->ustats, sizeof(*dst));
2291 spin_unlock_bh(&src->lock);
2292}
2293
2294static void
2295ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) 2329ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2296{ 2330{
2297 dst->protocol = src->protocol; 2331 dst->protocol = src->protocol;
@@ -2677,31 +2711,29 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2677static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, 2711static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2678 struct ip_vs_stats *stats) 2712 struct ip_vs_stats *stats)
2679{ 2713{
2714 struct ip_vs_stats_user ustats;
2680 struct nlattr *nl_stats = nla_nest_start(skb, container_type); 2715 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2681 if (!nl_stats) 2716 if (!nl_stats)
2682 return -EMSGSIZE; 2717 return -EMSGSIZE;
2683 2718
2684 spin_lock_bh(&stats->lock); 2719 ip_vs_copy_stats(&ustats, stats);
2685
2686 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2687 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2688 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2689 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2690 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2691 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2692 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2693 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2694 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2695 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
2696 2720
2697 spin_unlock_bh(&stats->lock); 2721 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns);
2722 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts);
2723 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts);
2724 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes);
2725 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes);
2726 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps);
2727 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps);
2728 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps);
2729 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps);
2730 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps);
2698 2731
2699 nla_nest_end(skb, nl_stats); 2732 nla_nest_end(skb, nl_stats);
2700 2733
2701 return 0; 2734 return 0;
2702 2735
2703nla_put_failure: 2736nla_put_failure:
2704 spin_unlock_bh(&stats->lock);
2705 nla_nest_cancel(skb, nl_stats); 2737 nla_nest_cancel(skb, nl_stats);
2706 return -EMSGSIZE; 2738 return -EMSGSIZE;
2707} 2739}
@@ -3480,7 +3512,8 @@ static void ip_vs_genl_unregister(void)
3480/* 3512/*
3481 * per netns intit/exit func. 3513 * per netns intit/exit func.
3482 */ 3514 */
3483int __net_init __ip_vs_control_init(struct net *net) 3515#ifdef CONFIG_SYSCTL
3516int __net_init __ip_vs_control_init_sysctl(struct net *net)
3484{ 3517{
3485 int idx; 3518 int idx;
3486 struct netns_ipvs *ipvs = net_ipvs(net); 3519 struct netns_ipvs *ipvs = net_ipvs(net);
@@ -3490,38 +3523,11 @@ int __net_init __ip_vs_control_init(struct net *net)
3490 spin_lock_init(&ipvs->dropentry_lock); 3523 spin_lock_init(&ipvs->dropentry_lock);
3491 spin_lock_init(&ipvs->droppacket_lock); 3524 spin_lock_init(&ipvs->droppacket_lock);
3492 spin_lock_init(&ipvs->securetcp_lock); 3525 spin_lock_init(&ipvs->securetcp_lock);
3493 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3494
3495 /* Initialize rs_table */
3496 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3497 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3498
3499 INIT_LIST_HEAD(&ipvs->dest_trash);
3500 atomic_set(&ipvs->ftpsvc_counter, 0);
3501 atomic_set(&ipvs->nullsvc_counter, 0);
3502
3503 /* procfs stats */
3504 ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
3505 if (ipvs->tot_stats == NULL) {
3506 pr_err("%s(): no memory.\n", __func__);
3507 return -ENOMEM;
3508 }
3509 ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3510 if (!ipvs->cpustats) {
3511 pr_err("%s() alloc_percpu failed\n", __func__);
3512 goto err_alloc;
3513 }
3514 spin_lock_init(&ipvs->tot_stats->lock);
3515
3516 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3517 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3518 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3519 &ip_vs_stats_percpu_fops);
3520 3526
3521 if (!net_eq(net, &init_net)) { 3527 if (!net_eq(net, &init_net)) {
3522 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL); 3528 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3523 if (tbl == NULL) 3529 if (tbl == NULL)
3524 goto err_dup; 3530 return -ENOMEM;
3525 } else 3531 } else
3526 tbl = vs_vars; 3532 tbl = vs_vars;
3527 /* Initialize sysctl defaults */ 3533 /* Initialize sysctl defaults */
@@ -3543,52 +3549,94 @@ int __net_init __ip_vs_control_init(struct net *net)
3543 tbl[idx++].data = &ipvs->sysctl_cache_bypass; 3549 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3544 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; 3550 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3545 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; 3551 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3546 ipvs->sysctl_sync_threshold[0] = 3; 3552 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3547 ipvs->sysctl_sync_threshold[1] = 50; 3553 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3548 tbl[idx].data = &ipvs->sysctl_sync_threshold; 3554 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3549 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); 3555 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3550 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; 3556 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3551 3557
3552 3558
3553#ifdef CONFIG_SYSCTL
3554 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path, 3559 ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
3555 tbl); 3560 tbl);
3556 if (ipvs->sysctl_hdr == NULL) { 3561 if (ipvs->sysctl_hdr == NULL) {
3557 if (!net_eq(net, &init_net)) 3562 if (!net_eq(net, &init_net))
3558 kfree(tbl); 3563 kfree(tbl);
3559 goto err_dup; 3564 return -ENOMEM;
3560 } 3565 }
3561#endif 3566 ip_vs_start_estimator(net, &ipvs->tot_stats);
3562 ip_vs_new_estimator(net, ipvs->tot_stats);
3563 ipvs->sysctl_tbl = tbl; 3567 ipvs->sysctl_tbl = tbl;
3564 /* Schedule defense work */ 3568 /* Schedule defense work */
3565 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); 3569 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3566 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); 3570 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3567 return 0;
3568 3571
3569err_dup: 3572 return 0;
3570 free_percpu(ipvs->cpustats);
3571err_alloc:
3572 kfree(ipvs->tot_stats);
3573 return -ENOMEM;
3574} 3573}
3575 3574
3576static void __net_exit __ip_vs_control_cleanup(struct net *net) 3575void __net_init __ip_vs_control_cleanup_sysctl(struct net *net)
3577{ 3576{
3578 struct netns_ipvs *ipvs = net_ipvs(net); 3577 struct netns_ipvs *ipvs = net_ipvs(net);
3579 3578
3580 ip_vs_trash_cleanup(net);
3581 ip_vs_kill_estimator(net, ipvs->tot_stats);
3582 cancel_delayed_work_sync(&ipvs->defense_work); 3579 cancel_delayed_work_sync(&ipvs->defense_work);
3583 cancel_work_sync(&ipvs->defense_work.work); 3580 cancel_work_sync(&ipvs->defense_work.work);
3584#ifdef CONFIG_SYSCTL
3585 unregister_net_sysctl_table(ipvs->sysctl_hdr); 3581 unregister_net_sysctl_table(ipvs->sysctl_hdr);
3582}
3583
3584#else
3585
3586int __net_init __ip_vs_control_init_sysctl(struct net *net) { return 0; }
3587void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { }
3588
3586#endif 3589#endif
3590
3591int __net_init __ip_vs_control_init(struct net *net)
3592{
3593 int idx;
3594 struct netns_ipvs *ipvs = net_ipvs(net);
3595
3596 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
3597
3598 /* Initialize rs_table */
3599 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3600 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3601
3602 INIT_LIST_HEAD(&ipvs->dest_trash);
3603 atomic_set(&ipvs->ftpsvc_counter, 0);
3604 atomic_set(&ipvs->nullsvc_counter, 0);
3605
3606 /* procfs stats */
3607 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3608 if (ipvs->tot_stats.cpustats) {
3609 pr_err("%s(): alloc_percpu.\n", __func__);
3610 return -ENOMEM;
3611 }
3612 spin_lock_init(&ipvs->tot_stats.lock);
3613
3614 proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3615 proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3616 proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3617 &ip_vs_stats_percpu_fops);
3618
3619 if (__ip_vs_control_init_sysctl(net))
3620 goto err;
3621
3622 return 0;
3623
3624err:
3625 free_percpu(ipvs->tot_stats.cpustats);
3626 return -ENOMEM;
3627}
3628
3629static void __net_exit __ip_vs_control_cleanup(struct net *net)
3630{
3631 struct netns_ipvs *ipvs = net_ipvs(net);
3632
3633 ip_vs_trash_cleanup(net);
3634 ip_vs_stop_estimator(net, &ipvs->tot_stats);
3635 __ip_vs_control_cleanup_sysctl(net);
3587 proc_net_remove(net, "ip_vs_stats_percpu"); 3636 proc_net_remove(net, "ip_vs_stats_percpu");
3588 proc_net_remove(net, "ip_vs_stats"); 3637 proc_net_remove(net, "ip_vs_stats");
3589 proc_net_remove(net, "ip_vs"); 3638 proc_net_remove(net, "ip_vs");
3590 free_percpu(ipvs->cpustats); 3639 free_percpu(ipvs->tot_stats.cpustats);
3591 kfree(ipvs->tot_stats);
3592} 3640}
3593 3641
3594static struct pernet_operations ipvs_control_ops = { 3642static struct pernet_operations ipvs_control_ops = {
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index f560a05c965a..8c8766ca56ad 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -69,10 +69,10 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
69 sum->inpkts += s->ustats.inpkts; 69 sum->inpkts += s->ustats.inpkts;
70 sum->outpkts += s->ustats.outpkts; 70 sum->outpkts += s->ustats.outpkts;
71 do { 71 do {
72 start = u64_stats_fetch_begin_bh(&s->syncp); 72 start = u64_stats_fetch_begin(&s->syncp);
73 inbytes = s->ustats.inbytes; 73 inbytes = s->ustats.inbytes;
74 outbytes = s->ustats.outbytes; 74 outbytes = s->ustats.outbytes;
75 } while (u64_stats_fetch_retry_bh(&s->syncp, start)); 75 } while (u64_stats_fetch_retry(&s->syncp, start));
76 sum->inbytes += inbytes; 76 sum->inbytes += inbytes;
77 sum->outbytes += outbytes; 77 sum->outbytes += outbytes;
78 } else { 78 } else {
@@ -80,10 +80,10 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
80 sum->inpkts = s->ustats.inpkts; 80 sum->inpkts = s->ustats.inpkts;
81 sum->outpkts = s->ustats.outpkts; 81 sum->outpkts = s->ustats.outpkts;
82 do { 82 do {
83 start = u64_stats_fetch_begin_bh(&s->syncp); 83 start = u64_stats_fetch_begin(&s->syncp);
84 sum->inbytes = s->ustats.inbytes; 84 sum->inbytes = s->ustats.inbytes;
85 sum->outbytes = s->ustats.outbytes; 85 sum->outbytes = s->ustats.outbytes;
86 } while (u64_stats_fetch_retry_bh(&s->syncp, start)); 86 } while (u64_stats_fetch_retry(&s->syncp, start));
87 } 87 }
88 } 88 }
89} 89}
@@ -101,13 +101,12 @@ static void estimation_timer(unsigned long arg)
101 struct netns_ipvs *ipvs; 101 struct netns_ipvs *ipvs;
102 102
103 ipvs = net_ipvs(net); 103 ipvs = net_ipvs(net);
104 ip_vs_read_cpu_stats(&ipvs->tot_stats->ustats, ipvs->cpustats);
105 spin_lock(&ipvs->est_lock); 104 spin_lock(&ipvs->est_lock);
106 list_for_each_entry(e, &ipvs->est_list, list) { 105 list_for_each_entry(e, &ipvs->est_list, list) {
107 s = container_of(e, struct ip_vs_stats, est); 106 s = container_of(e, struct ip_vs_stats, est);
108 107
109 ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
110 spin_lock(&s->lock); 108 spin_lock(&s->lock);
109 ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
111 n_conns = s->ustats.conns; 110 n_conns = s->ustats.conns;
112 n_inpkts = s->ustats.inpkts; 111 n_inpkts = s->ustats.inpkts;
113 n_outpkts = s->ustats.outpkts; 112 n_outpkts = s->ustats.outpkts;
@@ -118,61 +117,41 @@ static void estimation_timer(unsigned long arg)
118 rate = (n_conns - e->last_conns) << 9; 117 rate = (n_conns - e->last_conns) << 9;
119 e->last_conns = n_conns; 118 e->last_conns = n_conns;
120 e->cps += ((long)rate - (long)e->cps) >> 2; 119 e->cps += ((long)rate - (long)e->cps) >> 2;
121 s->ustats.cps = (e->cps + 0x1FF) >> 10;
122 120
123 rate = (n_inpkts - e->last_inpkts) << 9; 121 rate = (n_inpkts - e->last_inpkts) << 9;
124 e->last_inpkts = n_inpkts; 122 e->last_inpkts = n_inpkts;
125 e->inpps += ((long)rate - (long)e->inpps) >> 2; 123 e->inpps += ((long)rate - (long)e->inpps) >> 2;
126 s->ustats.inpps = (e->inpps + 0x1FF) >> 10;
127 124
128 rate = (n_outpkts - e->last_outpkts) << 9; 125 rate = (n_outpkts - e->last_outpkts) << 9;
129 e->last_outpkts = n_outpkts; 126 e->last_outpkts = n_outpkts;
130 e->outpps += ((long)rate - (long)e->outpps) >> 2; 127 e->outpps += ((long)rate - (long)e->outpps) >> 2;
131 s->ustats.outpps = (e->outpps + 0x1FF) >> 10;
132 128
133 rate = (n_inbytes - e->last_inbytes) << 4; 129 rate = (n_inbytes - e->last_inbytes) << 4;
134 e->last_inbytes = n_inbytes; 130 e->last_inbytes = n_inbytes;
135 e->inbps += ((long)rate - (long)e->inbps) >> 2; 131 e->inbps += ((long)rate - (long)e->inbps) >> 2;
136 s->ustats.inbps = (e->inbps + 0xF) >> 5;
137 132
138 rate = (n_outbytes - e->last_outbytes) << 4; 133 rate = (n_outbytes - e->last_outbytes) << 4;
139 e->last_outbytes = n_outbytes; 134 e->last_outbytes = n_outbytes;
140 e->outbps += ((long)rate - (long)e->outbps) >> 2; 135 e->outbps += ((long)rate - (long)e->outbps) >> 2;
141 s->ustats.outbps = (e->outbps + 0xF) >> 5;
142 spin_unlock(&s->lock); 136 spin_unlock(&s->lock);
143 } 137 }
144 spin_unlock(&ipvs->est_lock); 138 spin_unlock(&ipvs->est_lock);
145 mod_timer(&ipvs->est_timer, jiffies + 2*HZ); 139 mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
146} 140}
147 141
148void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats) 142void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats)
149{ 143{
150 struct netns_ipvs *ipvs = net_ipvs(net); 144 struct netns_ipvs *ipvs = net_ipvs(net);
151 struct ip_vs_estimator *est = &stats->est; 145 struct ip_vs_estimator *est = &stats->est;
152 146
153 INIT_LIST_HEAD(&est->list); 147 INIT_LIST_HEAD(&est->list);
154 148
155 est->last_conns = stats->ustats.conns;
156 est->cps = stats->ustats.cps<<10;
157
158 est->last_inpkts = stats->ustats.inpkts;
159 est->inpps = stats->ustats.inpps<<10;
160
161 est->last_outpkts = stats->ustats.outpkts;
162 est->outpps = stats->ustats.outpps<<10;
163
164 est->last_inbytes = stats->ustats.inbytes;
165 est->inbps = stats->ustats.inbps<<5;
166
167 est->last_outbytes = stats->ustats.outbytes;
168 est->outbps = stats->ustats.outbps<<5;
169
170 spin_lock_bh(&ipvs->est_lock); 149 spin_lock_bh(&ipvs->est_lock);
171 list_add(&est->list, &ipvs->est_list); 150 list_add(&est->list, &ipvs->est_list);
172 spin_unlock_bh(&ipvs->est_lock); 151 spin_unlock_bh(&ipvs->est_lock);
173} 152}
174 153
175void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats) 154void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats)
176{ 155{
177 struct netns_ipvs *ipvs = net_ipvs(net); 156 struct netns_ipvs *ipvs = net_ipvs(net);
178 struct ip_vs_estimator *est = &stats->est; 157 struct ip_vs_estimator *est = &stats->est;
@@ -185,13 +164,14 @@ void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats)
185void ip_vs_zero_estimator(struct ip_vs_stats *stats) 164void ip_vs_zero_estimator(struct ip_vs_stats *stats)
186{ 165{
187 struct ip_vs_estimator *est = &stats->est; 166 struct ip_vs_estimator *est = &stats->est;
188 167 struct ip_vs_stats_user *u = &stats->ustats;
189 /* set counters zero, caller must hold the stats->lock lock */ 168
190 est->last_inbytes = 0; 169 /* reset counters, caller must hold the stats->lock lock */
191 est->last_outbytes = 0; 170 est->last_inbytes = u->inbytes;
192 est->last_conns = 0; 171 est->last_outbytes = u->outbytes;
193 est->last_inpkts = 0; 172 est->last_conns = u->conns;
194 est->last_outpkts = 0; 173 est->last_inpkts = u->inpkts;
174 est->last_outpkts = u->outpkts;
195 est->cps = 0; 175 est->cps = 0;
196 est->inpps = 0; 176 est->inpps = 0;
197 est->outpps = 0; 177 est->outpps = 0;
@@ -199,6 +179,19 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats)
199 est->outbps = 0; 179 est->outbps = 0;
200} 180}
201 181
182/* Get decoded rates */
183void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
184 struct ip_vs_stats *stats)
185{
186 struct ip_vs_estimator *e = &stats->est;
187
188 dst->cps = (e->cps + 0x1FF) >> 10;
189 dst->inpps = (e->inpps + 0x1FF) >> 10;
190 dst->outpps = (e->outpps + 0x1FF) >> 10;
191 dst->inbps = (e->inbps + 0xF) >> 5;
192 dst->outbps = (e->outbps + 0xF) >> 5;
193}
194
202static int __net_init __ip_vs_estimator_init(struct net *net) 195static int __net_init __ip_vs_estimator_init(struct net *net)
203{ 196{
204 struct netns_ipvs *ipvs = net_ipvs(net); 197 struct netns_ipvs *ipvs = net_ipvs(net);
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 6bf7a807649c..f276df9896b3 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -63,6 +63,8 @@
63#define CHECK_EXPIRE_INTERVAL (60*HZ) 63#define CHECK_EXPIRE_INTERVAL (60*HZ)
64#define ENTRY_TIMEOUT (6*60*HZ) 64#define ENTRY_TIMEOUT (6*60*HZ)
65 65
66#define DEFAULT_EXPIRATION (24*60*60*HZ)
67
66/* 68/*
67 * It is for full expiration check. 69 * It is for full expiration check.
68 * When there is no partial expiration check (garbage collection) 70 * When there is no partial expiration check (garbage collection)
@@ -112,7 +114,7 @@ struct ip_vs_lblc_table {
112/* 114/*
113 * IPVS LBLC sysctl table 115 * IPVS LBLC sysctl table
114 */ 116 */
115 117#ifdef CONFIG_SYSCTL
116static ctl_table vs_vars_table[] = { 118static ctl_table vs_vars_table[] = {
117 { 119 {
118 .procname = "lblc_expiration", 120 .procname = "lblc_expiration",
@@ -123,6 +125,7 @@ static ctl_table vs_vars_table[] = {
123 }, 125 },
124 { } 126 { }
125}; 127};
128#endif
126 129
127static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) 130static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
128{ 131{
@@ -238,6 +241,15 @@ static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
238 } 241 }
239} 242}
240 243
244static int sysctl_lblc_expiration(struct ip_vs_service *svc)
245{
246#ifdef CONFIG_SYSCTL
247 struct netns_ipvs *ipvs = net_ipvs(svc->net);
248 return ipvs->sysctl_lblc_expiration;
249#else
250 return DEFAULT_EXPIRATION;
251#endif
252}
241 253
242static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) 254static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
243{ 255{
@@ -245,7 +257,6 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
245 struct ip_vs_lblc_entry *en, *nxt; 257 struct ip_vs_lblc_entry *en, *nxt;
246 unsigned long now = jiffies; 258 unsigned long now = jiffies;
247 int i, j; 259 int i, j;
248 struct netns_ipvs *ipvs = net_ipvs(svc->net);
249 260
250 for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { 261 for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
251 j = (j + 1) & IP_VS_LBLC_TAB_MASK; 262 j = (j + 1) & IP_VS_LBLC_TAB_MASK;
@@ -254,7 +265,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
254 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { 265 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
255 if (time_before(now, 266 if (time_before(now,
256 en->lastuse + 267 en->lastuse +
257 ipvs->sysctl_lblc_expiration)) 268 sysctl_lblc_expiration(svc)))
258 continue; 269 continue;
259 270
260 ip_vs_lblc_free(en); 271 ip_vs_lblc_free(en);
@@ -538,6 +549,7 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
538/* 549/*
539 * per netns init. 550 * per netns init.
540 */ 551 */
552#ifdef CONFIG_SYSCTL
541static int __net_init __ip_vs_lblc_init(struct net *net) 553static int __net_init __ip_vs_lblc_init(struct net *net)
542{ 554{
543 struct netns_ipvs *ipvs = net_ipvs(net); 555 struct netns_ipvs *ipvs = net_ipvs(net);
@@ -550,10 +562,9 @@ static int __net_init __ip_vs_lblc_init(struct net *net)
550 return -ENOMEM; 562 return -ENOMEM;
551 } else 563 } else
552 ipvs->lblc_ctl_table = vs_vars_table; 564 ipvs->lblc_ctl_table = vs_vars_table;
553 ipvs->sysctl_lblc_expiration = 24*60*60*HZ; 565 ipvs->sysctl_lblc_expiration = DEFAULT_EXPIRATION;
554 ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration; 566 ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration;
555 567
556#ifdef CONFIG_SYSCTL
557 ipvs->lblc_ctl_header = 568 ipvs->lblc_ctl_header =
558 register_net_sysctl_table(net, net_vs_ctl_path, 569 register_net_sysctl_table(net, net_vs_ctl_path,
559 ipvs->lblc_ctl_table); 570 ipvs->lblc_ctl_table);
@@ -562,7 +573,6 @@ static int __net_init __ip_vs_lblc_init(struct net *net)
562 kfree(ipvs->lblc_ctl_table); 573 kfree(ipvs->lblc_ctl_table);
563 return -ENOMEM; 574 return -ENOMEM;
564 } 575 }
565#endif
566 576
567 return 0; 577 return 0;
568} 578}
@@ -571,14 +581,19 @@ static void __net_exit __ip_vs_lblc_exit(struct net *net)
571{ 581{
572 struct netns_ipvs *ipvs = net_ipvs(net); 582 struct netns_ipvs *ipvs = net_ipvs(net);
573 583
574#ifdef CONFIG_SYSCTL
575 unregister_net_sysctl_table(ipvs->lblc_ctl_header); 584 unregister_net_sysctl_table(ipvs->lblc_ctl_header);
576#endif
577 585
578 if (!net_eq(net, &init_net)) 586 if (!net_eq(net, &init_net))
579 kfree(ipvs->lblc_ctl_table); 587 kfree(ipvs->lblc_ctl_table);
580} 588}
581 589
590#else
591
592static int __net_init __ip_vs_lblc_init(struct net *net) { return 0; }
593static void __net_exit __ip_vs_lblc_exit(struct net *net) { }
594
595#endif
596
582static struct pernet_operations ip_vs_lblc_ops = { 597static struct pernet_operations ip_vs_lblc_ops = {
583 .init = __ip_vs_lblc_init, 598 .init = __ip_vs_lblc_init,
584 .exit = __ip_vs_lblc_exit, 599 .exit = __ip_vs_lblc_exit,
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 00631765b92a..cb1c9913d38b 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -63,6 +63,8 @@
63#define CHECK_EXPIRE_INTERVAL (60*HZ) 63#define CHECK_EXPIRE_INTERVAL (60*HZ)
64#define ENTRY_TIMEOUT (6*60*HZ) 64#define ENTRY_TIMEOUT (6*60*HZ)
65 65
66#define DEFAULT_EXPIRATION (24*60*60*HZ)
67
66/* 68/*
67 * It is for full expiration check. 69 * It is for full expiration check.
68 * When there is no partial expiration check (garbage collection) 70 * When there is no partial expiration check (garbage collection)
@@ -283,6 +285,7 @@ struct ip_vs_lblcr_table {
283}; 285};
284 286
285 287
288#ifdef CONFIG_SYSCTL
286/* 289/*
287 * IPVS LBLCR sysctl table 290 * IPVS LBLCR sysctl table
288 */ 291 */
@@ -297,6 +300,7 @@ static ctl_table vs_vars_table[] = {
297 }, 300 },
298 { } 301 { }
299}; 302};
303#endif
300 304
301static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) 305static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
302{ 306{
@@ -410,6 +414,15 @@ static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
410 } 414 }
411} 415}
412 416
417static int sysctl_lblcr_expiration(struct ip_vs_service *svc)
418{
419#ifdef CONFIG_SYSCTL
420 struct netns_ipvs *ipvs = net_ipvs(svc->net);
421 return ipvs->sysctl_lblcr_expiration;
422#else
423 return DEFAULT_EXPIRATION;
424#endif
425}
413 426
414static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) 427static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
415{ 428{
@@ -417,15 +430,14 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
417 unsigned long now = jiffies; 430 unsigned long now = jiffies;
418 int i, j; 431 int i, j;
419 struct ip_vs_lblcr_entry *en, *nxt; 432 struct ip_vs_lblcr_entry *en, *nxt;
420 struct netns_ipvs *ipvs = net_ipvs(svc->net);
421 433
422 for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { 434 for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
423 j = (j + 1) & IP_VS_LBLCR_TAB_MASK; 435 j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
424 436
425 write_lock(&svc->sched_lock); 437 write_lock(&svc->sched_lock);
426 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { 438 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
427 if (time_after(en->lastuse 439 if (time_after(en->lastuse +
428 + ipvs->sysctl_lblcr_expiration, now)) 440 sysctl_lblcr_expiration(svc), now))
429 continue; 441 continue;
430 442
431 ip_vs_lblcr_free(en); 443 ip_vs_lblcr_free(en);
@@ -650,7 +662,6 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
650 read_lock(&svc->sched_lock); 662 read_lock(&svc->sched_lock);
651 en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr); 663 en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
652 if (en) { 664 if (en) {
653 struct netns_ipvs *ipvs = net_ipvs(svc->net);
654 /* We only hold a read lock, but this is atomic */ 665 /* We only hold a read lock, but this is atomic */
655 en->lastuse = jiffies; 666 en->lastuse = jiffies;
656 667
@@ -662,7 +673,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
662 /* More than one destination + enough time passed by, cleanup */ 673 /* More than one destination + enough time passed by, cleanup */
663 if (atomic_read(&en->set.size) > 1 && 674 if (atomic_read(&en->set.size) > 1 &&
664 time_after(jiffies, en->set.lastmod + 675 time_after(jiffies, en->set.lastmod +
665 ipvs->sysctl_lblcr_expiration)) { 676 sysctl_lblcr_expiration(svc))) {
666 struct ip_vs_dest *m; 677 struct ip_vs_dest *m;
667 678
668 write_lock(&en->set.lock); 679 write_lock(&en->set.lock);
@@ -734,6 +745,7 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
734/* 745/*
735 * per netns init. 746 * per netns init.
736 */ 747 */
748#ifdef CONFIG_SYSCTL
737static int __net_init __ip_vs_lblcr_init(struct net *net) 749static int __net_init __ip_vs_lblcr_init(struct net *net)
738{ 750{
739 struct netns_ipvs *ipvs = net_ipvs(net); 751 struct netns_ipvs *ipvs = net_ipvs(net);
@@ -746,10 +758,9 @@ static int __net_init __ip_vs_lblcr_init(struct net *net)
746 return -ENOMEM; 758 return -ENOMEM;
747 } else 759 } else
748 ipvs->lblcr_ctl_table = vs_vars_table; 760 ipvs->lblcr_ctl_table = vs_vars_table;
749 ipvs->sysctl_lblcr_expiration = 24*60*60*HZ; 761 ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION;
750 ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration; 762 ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration;
751 763
752#ifdef CONFIG_SYSCTL
753 ipvs->lblcr_ctl_header = 764 ipvs->lblcr_ctl_header =
754 register_net_sysctl_table(net, net_vs_ctl_path, 765 register_net_sysctl_table(net, net_vs_ctl_path,
755 ipvs->lblcr_ctl_table); 766 ipvs->lblcr_ctl_table);
@@ -758,7 +769,6 @@ static int __net_init __ip_vs_lblcr_init(struct net *net)
758 kfree(ipvs->lblcr_ctl_table); 769 kfree(ipvs->lblcr_ctl_table);
759 return -ENOMEM; 770 return -ENOMEM;
760 } 771 }
761#endif
762 772
763 return 0; 773 return 0;
764} 774}
@@ -767,14 +777,19 @@ static void __net_exit __ip_vs_lblcr_exit(struct net *net)
767{ 777{
768 struct netns_ipvs *ipvs = net_ipvs(net); 778 struct netns_ipvs *ipvs = net_ipvs(net);
769 779
770#ifdef CONFIG_SYSCTL
771 unregister_net_sysctl_table(ipvs->lblcr_ctl_header); 780 unregister_net_sysctl_table(ipvs->lblcr_ctl_header);
772#endif
773 781
774 if (!net_eq(net, &init_net)) 782 if (!net_eq(net, &init_net))
775 kfree(ipvs->lblcr_ctl_table); 783 kfree(ipvs->lblcr_ctl_table);
776} 784}
777 785
786#else
787
788static int __net_init __ip_vs_lblcr_init(struct net *net) { return 0; }
789static void __net_exit __ip_vs_lblcr_exit(struct net *net) { }
790
791#endif
792
778static struct pernet_operations ip_vs_lblcr_ops = { 793static struct pernet_operations ip_vs_lblcr_ops = {
779 .init = __ip_vs_lblcr_init, 794 .init = __ip_vs_lblcr_init,
780 .exit = __ip_vs_lblcr_exit, 795 .exit = __ip_vs_lblcr_exit,
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 0d83bc01fed4..13d607ae9c52 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -92,14 +92,13 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
92 if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen)) 92 if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen))
93 return -EINVAL; 93 return -EINVAL;
94 94
95 p->pe_data = kmalloc(matchlen, GFP_ATOMIC);
96 if (!p->pe_data)
97 return -ENOMEM;
98
99 /* N.B: pe_data is only set on success, 95 /* N.B: pe_data is only set on success,
100 * this allows fallback to the default persistence logic on failure 96 * this allows fallback to the default persistence logic on failure
101 */ 97 */
102 memcpy(p->pe_data, dptr + matchoff, matchlen); 98 p->pe_data = kmemdup(dptr + matchoff, matchlen, GFP_ATOMIC);
99 if (!p->pe_data)
100 return -ENOMEM;
101
103 p->pe_data_len = matchlen; 102 p->pe_data_len = matchlen;
104 103
105 return 0; 104 return 0;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index fecf24de4af3..3e7961e85e9c 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -394,7 +394,7 @@ void ip_vs_sync_switch_mode(struct net *net, int mode)
394 394
395 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) 395 if (!(ipvs->sync_state & IP_VS_STATE_MASTER))
396 return; 396 return;
397 if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff) 397 if (mode == sysctl_sync_ver(ipvs) || !ipvs->sync_buff)
398 return; 398 return;
399 399
400 spin_lock_bh(&ipvs->sync_buff_lock); 400 spin_lock_bh(&ipvs->sync_buff_lock);
@@ -521,7 +521,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
521 unsigned int len, pe_name_len, pad; 521 unsigned int len, pe_name_len, pad;
522 522
523 /* Handle old version of the protocol */ 523 /* Handle old version of the protocol */
524 if (ipvs->sysctl_sync_ver == 0) { 524 if (sysctl_sync_ver(ipvs) == 0) {
525 ip_vs_sync_conn_v0(net, cp); 525 ip_vs_sync_conn_v0(net, cp);
526 return; 526 return;
527 } 527 }
@@ -650,7 +650,7 @@ control:
650 if (cp->flags & IP_VS_CONN_F_TEMPLATE) { 650 if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
651 int pkts = atomic_add_return(1, &cp->in_pkts); 651 int pkts = atomic_add_return(1, &cp->in_pkts);
652 652
653 if (pkts % ipvs->sysctl_sync_threshold[1] != 1) 653 if (pkts % sysctl_sync_period(ipvs) != 1)
654 return; 654 return;
655 } 655 }
656 goto sloop; 656 goto sloop;
@@ -697,13 +697,12 @@ ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
697 return 1; 697 return 1;
698 } 698 }
699 699
700 p->pe_data = kmalloc(pe_data_len, GFP_ATOMIC); 700 p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC);
701 if (!p->pe_data) { 701 if (!p->pe_data) {
702 if (p->pe->module) 702 if (p->pe->module)
703 module_put(p->pe->module); 703 module_put(p->pe->module);
704 return -ENOMEM; 704 return -ENOMEM;
705 } 705 }
706 memcpy(p->pe_data, pe_data, pe_data_len);
707 p->pe_data_len = pe_data_len; 706 p->pe_data_len = pe_data_len;
708 } 707 }
709 return 0; 708 return 0;
@@ -795,7 +794,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
795 794
796 if (opt) 795 if (opt)
797 memcpy(&cp->in_seq, opt, sizeof(*opt)); 796 memcpy(&cp->in_seq, opt, sizeof(*opt));
798 atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]); 797 atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
799 cp->state = state; 798 cp->state = state;
800 cp->old_state = cp->state; 799 cp->old_state = cp->state;
801 /* 800 /*
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 2f454efa1a8b..941286ca911d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1301,6 +1301,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
1301 1301
1302 nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); 1302 nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
1303 nf_conntrack_ecache_fini(net); 1303 nf_conntrack_ecache_fini(net);
1304 nf_conntrack_tstamp_fini(net);
1304 nf_conntrack_acct_fini(net); 1305 nf_conntrack_acct_fini(net);
1305 nf_conntrack_expect_fini(net); 1306 nf_conntrack_expect_fini(net);
1306 kmem_cache_destroy(net->ct.nf_conntrack_cachep); 1307 kmem_cache_destroy(net->ct.nf_conntrack_cachep);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 0a77d2ff2154..a9adf4c6b299 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -183,14 +183,14 @@ EXPORT_SYMBOL(xt_unregister_matches);
183/* 183/*
184 * These are weird, but module loading must not be done with mutex 184 * These are weird, but module loading must not be done with mutex
185 * held (since they will register), and we have to have a single 185 * held (since they will register), and we have to have a single
186 * function to use try_then_request_module(). 186 * function to use.
187 */ 187 */
188 188
189/* Find match, grabs ref. Returns ERR_PTR() on error. */ 189/* Find match, grabs ref. Returns ERR_PTR() on error. */
190struct xt_match *xt_find_match(u8 af, const char *name, u8 revision) 190struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
191{ 191{
192 struct xt_match *m; 192 struct xt_match *m;
193 int err = 0; 193 int err = -ENOENT;
194 194
195 if (mutex_lock_interruptible(&xt[af].mutex) != 0) 195 if (mutex_lock_interruptible(&xt[af].mutex) != 0)
196 return ERR_PTR(-EINTR); 196 return ERR_PTR(-EINTR);
@@ -221,9 +221,13 @@ xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision)
221{ 221{
222 struct xt_match *match; 222 struct xt_match *match;
223 223
224 match = try_then_request_module(xt_find_match(nfproto, name, revision), 224 match = xt_find_match(nfproto, name, revision);
225 "%st_%s", xt_prefix[nfproto], name); 225 if (IS_ERR(match)) {
226 return (match != NULL) ? match : ERR_PTR(-ENOENT); 226 request_module("%st_%s", xt_prefix[nfproto], name);
227 match = xt_find_match(nfproto, name, revision);
228 }
229
230 return match;
227} 231}
228EXPORT_SYMBOL_GPL(xt_request_find_match); 232EXPORT_SYMBOL_GPL(xt_request_find_match);
229 233
@@ -231,7 +235,7 @@ EXPORT_SYMBOL_GPL(xt_request_find_match);
231struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) 235struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
232{ 236{
233 struct xt_target *t; 237 struct xt_target *t;
234 int err = 0; 238 int err = -ENOENT;
235 239
236 if (mutex_lock_interruptible(&xt[af].mutex) != 0) 240 if (mutex_lock_interruptible(&xt[af].mutex) != 0)
237 return ERR_PTR(-EINTR); 241 return ERR_PTR(-EINTR);
@@ -261,9 +265,13 @@ struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision)
261{ 265{
262 struct xt_target *target; 266 struct xt_target *target;
263 267
264 target = try_then_request_module(xt_find_target(af, name, revision), 268 target = xt_find_target(af, name, revision);
265 "%st_%s", xt_prefix[af], name); 269 if (IS_ERR(target)) {
266 return (target != NULL) ? target : ERR_PTR(-ENOENT); 270 request_module("%st_%s", xt_prefix[af], name);
271 target = xt_find_target(af, name, revision);
272 }
273
274 return target;
267} 275}
268EXPORT_SYMBOL_GPL(xt_request_find_target); 276EXPORT_SYMBOL_GPL(xt_request_find_target);
269 277
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
new file mode 100644
index 000000000000..2220b85e9519
--- /dev/null
+++ b/net/netfilter/xt_addrtype.c
@@ -0,0 +1,229 @@
1/*
2 * iptables module to match inet_addr_type() of an ip.
3 *
4 * Copyright (c) 2004 Patrick McHardy <kaber@trash.net>
5 * (C) 2007 Laszlo Attila Toth <panther@balabit.hu>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/netdevice.h>
16#include <linux/ip.h>
17#include <net/route.h>
18
19#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
20#include <net/ipv6.h>
21#include <net/ip6_route.h>
22#include <net/ip6_fib.h>
23#endif
24
25#include <linux/netfilter/xt_addrtype.h>
26#include <linux/netfilter/x_tables.h>
27
28MODULE_LICENSE("GPL");
29MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
30MODULE_DESCRIPTION("Xtables: address type match");
31MODULE_ALIAS("ipt_addrtype");
32MODULE_ALIAS("ip6t_addrtype");
33
34#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
35static u32 xt_addrtype_rt6_to_type(const struct rt6_info *rt)
36{
37 u32 ret;
38
39 if (!rt)
40 return XT_ADDRTYPE_UNREACHABLE;
41
42 if (rt->rt6i_flags & RTF_REJECT)
43 ret = XT_ADDRTYPE_UNREACHABLE;
44 else
45 ret = 0;
46
47 if (rt->rt6i_flags & RTF_LOCAL)
48 ret |= XT_ADDRTYPE_LOCAL;
49 if (rt->rt6i_flags & RTF_ANYCAST)
50 ret |= XT_ADDRTYPE_ANYCAST;
51 return ret;
52}
53
54static bool match_type6(struct net *net, const struct net_device *dev,
55 const struct in6_addr *addr, u16 mask)
56{
57 int addr_type = ipv6_addr_type(addr);
58
59 if ((mask & XT_ADDRTYPE_MULTICAST) &&
60 !(addr_type & IPV6_ADDR_MULTICAST))
61 return false;
62 if ((mask & XT_ADDRTYPE_UNICAST) && !(addr_type & IPV6_ADDR_UNICAST))
63 return false;
64 if ((mask & XT_ADDRTYPE_UNSPEC) && addr_type != IPV6_ADDR_ANY)
65 return false;
66
67 if ((XT_ADDRTYPE_LOCAL | XT_ADDRTYPE_ANYCAST |
68 XT_ADDRTYPE_UNREACHABLE) & mask) {
69 struct rt6_info *rt;
70 u32 type;
71 int ifindex = dev ? dev->ifindex : 0;
72
73 rt = rt6_lookup(net, addr, NULL, ifindex, !!dev);
74
75 type = xt_addrtype_rt6_to_type(rt);
76
77 dst_release(&rt->dst);
78 return !!(mask & type);
79 }
80 return true;
81}
82
83static bool
84addrtype_mt6(struct net *net, const struct net_device *dev,
85 const struct sk_buff *skb, const struct xt_addrtype_info_v1 *info)
86{
87 const struct ipv6hdr *iph = ipv6_hdr(skb);
88 bool ret = true;
89
90 if (info->source)
91 ret &= match_type6(net, dev, &iph->saddr, info->source) ^
92 (info->flags & XT_ADDRTYPE_INVERT_SOURCE);
93 if (ret && info->dest)
94 ret &= match_type6(net, dev, &iph->daddr, info->dest) ^
95 !!(info->flags & XT_ADDRTYPE_INVERT_DEST);
96 return ret;
97}
98#endif
99
100static inline bool match_type(struct net *net, const struct net_device *dev,
101 __be32 addr, u_int16_t mask)
102{
103 return !!(mask & (1 << inet_dev_addr_type(net, dev, addr)));
104}
105
106static bool
107addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
108{
109 struct net *net = dev_net(par->in ? par->in : par->out);
110 const struct xt_addrtype_info *info = par->matchinfo;
111 const struct iphdr *iph = ip_hdr(skb);
112 bool ret = true;
113
114 if (info->source)
115 ret &= match_type(net, NULL, iph->saddr, info->source) ^
116 info->invert_source;
117 if (info->dest)
118 ret &= match_type(net, NULL, iph->daddr, info->dest) ^
119 info->invert_dest;
120
121 return ret;
122}
123
124static bool
125addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
126{
127 struct net *net = dev_net(par->in ? par->in : par->out);
128 const struct xt_addrtype_info_v1 *info = par->matchinfo;
129 const struct iphdr *iph;
130 const struct net_device *dev = NULL;
131 bool ret = true;
132
133 if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN)
134 dev = par->in;
135 else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT)
136 dev = par->out;
137
138#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
139 if (par->family == NFPROTO_IPV6)
140 return addrtype_mt6(net, dev, skb, info);
141#endif
142 iph = ip_hdr(skb);
143 if (info->source)
144 ret &= match_type(net, dev, iph->saddr, info->source) ^
145 (info->flags & XT_ADDRTYPE_INVERT_SOURCE);
146 if (ret && info->dest)
147 ret &= match_type(net, dev, iph->daddr, info->dest) ^
148 !!(info->flags & XT_ADDRTYPE_INVERT_DEST);
149 return ret;
150}
151
152static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
153{
154 struct xt_addrtype_info_v1 *info = par->matchinfo;
155
156 if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN &&
157 info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) {
158 pr_info("both incoming and outgoing "
159 "interface limitation cannot be selected\n");
160 return -EINVAL;
161 }
162
163 if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
164 (1 << NF_INET_LOCAL_IN)) &&
165 info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) {
166 pr_info("output interface limitation "
167 "not valid in PREROUTING and INPUT\n");
168 return -EINVAL;
169 }
170
171 if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
172 (1 << NF_INET_LOCAL_OUT)) &&
173 info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) {
174 pr_info("input interface limitation "
175 "not valid in POSTROUTING and OUTPUT\n");
176 return -EINVAL;
177 }
178
179#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
180 if (par->family == NFPROTO_IPV6) {
181 if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) {
182 pr_err("ipv6 BLACKHOLE matching not supported\n");
183 return -EINVAL;
184 }
185 if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) {
186 pr_err("ipv6 PROHIBT (THROW, NAT ..) matching not supported\n");
187 return -EINVAL;
188 }
189 if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) {
190 pr_err("ipv6 does not support BROADCAST matching\n");
191 return -EINVAL;
192 }
193 }
194#endif
195 return 0;
196}
197
198static struct xt_match addrtype_mt_reg[] __read_mostly = {
199 {
200 .name = "addrtype",
201 .family = NFPROTO_IPV4,
202 .match = addrtype_mt_v0,
203 .matchsize = sizeof(struct xt_addrtype_info),
204 .me = THIS_MODULE
205 },
206 {
207 .name = "addrtype",
208 .family = NFPROTO_UNSPEC,
209 .revision = 1,
210 .match = addrtype_mt_v1,
211 .checkentry = addrtype_mt_checkentry_v1,
212 .matchsize = sizeof(struct xt_addrtype_info_v1),
213 .me = THIS_MODULE
214 }
215};
216
217static int __init addrtype_mt_init(void)
218{
219 return xt_register_matches(addrtype_mt_reg,
220 ARRAY_SIZE(addrtype_mt_reg));
221}
222
223static void __exit addrtype_mt_exit(void)
224{
225 xt_unregister_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg));
226}
227
228module_init(addrtype_mt_init);
229module_exit(addrtype_mt_exit);
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index e029c4807404..c6d5a83450c9 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -33,17 +33,17 @@
33 33
34/* we will save the tuples of all connections we care about */ 34/* we will save the tuples of all connections we care about */
35struct xt_connlimit_conn { 35struct xt_connlimit_conn {
36 struct list_head list; 36 struct hlist_node node;
37 struct nf_conntrack_tuple tuple; 37 struct nf_conntrack_tuple tuple;
38 union nf_inet_addr addr;
38}; 39};
39 40
40struct xt_connlimit_data { 41struct xt_connlimit_data {
41 struct list_head iphash[256]; 42 struct hlist_head iphash[256];
42 spinlock_t lock; 43 spinlock_t lock;
43}; 44};
44 45
45static u_int32_t connlimit_rnd __read_mostly; 46static u_int32_t connlimit_rnd __read_mostly;
46static bool connlimit_rnd_inited __read_mostly;
47 47
48static inline unsigned int connlimit_iphash(__be32 addr) 48static inline unsigned int connlimit_iphash(__be32 addr)
49{ 49{
@@ -101,9 +101,9 @@ static int count_them(struct net *net,
101{ 101{
102 const struct nf_conntrack_tuple_hash *found; 102 const struct nf_conntrack_tuple_hash *found;
103 struct xt_connlimit_conn *conn; 103 struct xt_connlimit_conn *conn;
104 struct xt_connlimit_conn *tmp; 104 struct hlist_node *pos, *n;
105 struct nf_conn *found_ct; 105 struct nf_conn *found_ct;
106 struct list_head *hash; 106 struct hlist_head *hash;
107 bool addit = true; 107 bool addit = true;
108 int matches = 0; 108 int matches = 0;
109 109
@@ -115,7 +115,7 @@ static int count_them(struct net *net,
115 rcu_read_lock(); 115 rcu_read_lock();
116 116
117 /* check the saved connections */ 117 /* check the saved connections */
118 list_for_each_entry_safe(conn, tmp, hash, list) { 118 hlist_for_each_entry_safe(conn, pos, n, hash, node) {
119 found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE, 119 found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE,
120 &conn->tuple); 120 &conn->tuple);
121 found_ct = NULL; 121 found_ct = NULL;
@@ -135,7 +135,7 @@ static int count_them(struct net *net,
135 135
136 if (found == NULL) { 136 if (found == NULL) {
137 /* this one is gone */ 137 /* this one is gone */
138 list_del(&conn->list); 138 hlist_del(&conn->node);
139 kfree(conn); 139 kfree(conn);
140 continue; 140 continue;
141 } 141 }
@@ -146,12 +146,12 @@ static int count_them(struct net *net,
146 * closed already -> ditch it 146 * closed already -> ditch it
147 */ 147 */
148 nf_ct_put(found_ct); 148 nf_ct_put(found_ct);
149 list_del(&conn->list); 149 hlist_del(&conn->node);
150 kfree(conn); 150 kfree(conn);
151 continue; 151 continue;
152 } 152 }
153 153
154 if (same_source_net(addr, mask, &conn->tuple.src.u3, family)) 154 if (same_source_net(addr, mask, &conn->addr, family))
155 /* same source network -> be counted! */ 155 /* same source network -> be counted! */
156 ++matches; 156 ++matches;
157 nf_ct_put(found_ct); 157 nf_ct_put(found_ct);
@@ -161,11 +161,12 @@ static int count_them(struct net *net,
161 161
162 if (addit) { 162 if (addit) {
163 /* save the new connection in our list */ 163 /* save the new connection in our list */
164 conn = kzalloc(sizeof(*conn), GFP_ATOMIC); 164 conn = kmalloc(sizeof(*conn), GFP_ATOMIC);
165 if (conn == NULL) 165 if (conn == NULL)
166 return -ENOMEM; 166 return -ENOMEM;
167 conn->tuple = *tuple; 167 conn->tuple = *tuple;
168 list_add(&conn->list, hash); 168 conn->addr = *addr;
169 hlist_add_head(&conn->node, hash);
169 ++matches; 170 ++matches;
170 } 171 }
171 172
@@ -185,15 +186,11 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
185 int connections; 186 int connections;
186 187
187 ct = nf_ct_get(skb, &ctinfo); 188 ct = nf_ct_get(skb, &ctinfo);
188 if (ct != NULL) { 189 if (ct != NULL)
189 if (info->flags & XT_CONNLIMIT_DADDR) 190 tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
190 tuple_ptr = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; 191 else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
191 else 192 par->family, &tuple))
192 tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
193 } else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
194 par->family, &tuple)) {
195 goto hotdrop; 193 goto hotdrop;
196 }
197 194
198 if (par->family == NFPROTO_IPV6) { 195 if (par->family == NFPROTO_IPV6) {
199 const struct ipv6hdr *iph = ipv6_hdr(skb); 196 const struct ipv6hdr *iph = ipv6_hdr(skb);
@@ -228,9 +225,13 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
228 unsigned int i; 225 unsigned int i;
229 int ret; 226 int ret;
230 227
231 if (unlikely(!connlimit_rnd_inited)) { 228 if (unlikely(!connlimit_rnd)) {
232 get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd)); 229 u_int32_t rand;
233 connlimit_rnd_inited = true; 230
231 do {
232 get_random_bytes(&rand, sizeof(rand));
233 } while (!rand);
234 cmpxchg(&connlimit_rnd, 0, rand);
234 } 235 }
235 ret = nf_ct_l3proto_try_module_get(par->family); 236 ret = nf_ct_l3proto_try_module_get(par->family);
236 if (ret < 0) { 237 if (ret < 0) {
@@ -248,7 +249,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
248 249
249 spin_lock_init(&info->data->lock); 250 spin_lock_init(&info->data->lock);
250 for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) 251 for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i)
251 INIT_LIST_HEAD(&info->data->iphash[i]); 252 INIT_HLIST_HEAD(&info->data->iphash[i]);
252 253
253 return 0; 254 return 0;
254} 255}
@@ -257,15 +258,15 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
257{ 258{
258 const struct xt_connlimit_info *info = par->matchinfo; 259 const struct xt_connlimit_info *info = par->matchinfo;
259 struct xt_connlimit_conn *conn; 260 struct xt_connlimit_conn *conn;
260 struct xt_connlimit_conn *tmp; 261 struct hlist_node *pos, *n;
261 struct list_head *hash = info->data->iphash; 262 struct hlist_head *hash = info->data->iphash;
262 unsigned int i; 263 unsigned int i;
263 264
264 nf_ct_l3proto_module_put(par->family); 265 nf_ct_l3proto_module_put(par->family);
265 266
266 for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) { 267 for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) {
267 list_for_each_entry_safe(conn, tmp, &hash[i], list) { 268 hlist_for_each_entry_safe(conn, pos, n, &hash[i], node) {
268 list_del(&conn->list); 269 hlist_del(&conn->node);
269 kfree(conn); 270 kfree(conn);
270 } 271 }
271 } 272 }