aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-05-08 14:40:21 -0400
committerDavid S. Miller <davem@davemloft.net>2012-05-08 14:40:21 -0400
commit9bb862beb6e5839e92f709d33fda07678f062f20 (patch)
treea2c396712c5a2cda380034173fd07a67bfa0489f
parentb44907e64cc1987153f6577306108379be1523b7 (diff)
parentd16cf20e2f2f13411eece7f7fb72c17d141c4a84 (diff)
Merge branch 'master' of git://1984.lsi.us.es/net-next
-rw-r--r--Documentation/ABI/removed/ip_queue9
-rw-r--r--Documentation/networking/ip-sysctl.txt13
-rw-r--r--include/linux/ip_vs.h5
-rw-r--r--include/linux/netfilter/nf_conntrack_common.h4
-rw-r--r--include/linux/netfilter_ipv4/Kbuild1
-rw-r--r--include/linux/netfilter_ipv4/ip_queue.h72
-rw-r--r--include/linux/netlink.h2
-rw-r--r--include/net/ip_vs.h87
-rw-r--r--include/net/netfilter/nf_conntrack.h10
-rw-r--r--include/net/netfilter/nf_conntrack_helper.h4
-rw-r--r--include/net/netns/conntrack.h3
-rw-r--r--net/bridge/br_netfilter.c26
-rw-r--r--net/core/sock.c2
-rw-r--r--net/ipv4/netfilter/Makefile3
-rw-r--r--net/ipv4/netfilter/ip_queue.c639
-rw-r--r--net/ipv6/netfilter/Kconfig22
-rw-r--r--net/ipv6/netfilter/Makefile1
-rw-r--r--net/ipv6/netfilter/ip6_queue.c641
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c70
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c30
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c70
-rw-r--r--net/netfilter/ipvs/ip_vs_dh.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c662
-rw-r--r--net/netfilter/ipvs/ip_vs_wrr.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c15
-rw-r--r--net/netfilter/nf_conntrack_ecache.c10
-rw-r--r--net/netfilter/nf_conntrack_helper.c122
-rw-r--r--net/netfilter/nf_conntrack_netlink.c10
-rw-r--r--security/selinux/nlmsgtab.c13
34 files changed, 856 insertions, 1708 deletions
diff --git a/Documentation/ABI/removed/ip_queue b/Documentation/ABI/removed/ip_queue
new file mode 100644
index 000000000000..3243613bc2d2
--- /dev/null
+++ b/Documentation/ABI/removed/ip_queue
@@ -0,0 +1,9 @@
1What: ip_queue
2Date: finally removed in kernel v3.5.0
3Contact: Pablo Neira Ayuso <pablo@netfilter.org>
4Description:
5 ip_queue has been replaced by nfnetlink_queue which provides
6 more advanced queueing mechanism to user-space. The ip_queue
7 module was already announced to become obsolete years ago.
8
9Users:
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 90b0c4fd275b..6f896b94abdc 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1301,13 +1301,22 @@ bridge-nf-call-ip6tables - BOOLEAN
1301bridge-nf-filter-vlan-tagged - BOOLEAN 1301bridge-nf-filter-vlan-tagged - BOOLEAN
1302 1 : pass bridged vlan-tagged ARP/IP/IPv6 traffic to {arp,ip,ip6}tables. 1302 1 : pass bridged vlan-tagged ARP/IP/IPv6 traffic to {arp,ip,ip6}tables.
1303 0 : disable this. 1303 0 : disable this.
1304 Default: 1 1304 Default: 0
1305 1305
1306bridge-nf-filter-pppoe-tagged - BOOLEAN 1306bridge-nf-filter-pppoe-tagged - BOOLEAN
1307 1 : pass bridged pppoe-tagged IP/IPv6 traffic to {ip,ip6}tables. 1307 1 : pass bridged pppoe-tagged IP/IPv6 traffic to {ip,ip6}tables.
1308 0 : disable this. 1308 0 : disable this.
1309 Default: 1 1309 Default: 0
1310 1310
1311bridge-nf-pass-vlan-input-dev - BOOLEAN
1312 1: if bridge-nf-filter-vlan-tagged is enabled, try to find a vlan
1313 interface on the bridge and set the netfilter input device to the vlan.
1314 This allows use of e.g. "iptables -i br0.1" and makes the REDIRECT
1315 target work with vlan-on-top-of-bridge interfaces. When no matching
1316 vlan interface is found, or this switch is off, the input device is
1317 set to the bridge interface.
1318 0: disable bridge netfilter vlan interface lookup.
1319 Default: 0
1311 1320
1312proc/sys/net/sctp/* Variables: 1321proc/sys/net/sctp/* Variables:
1313 1322
diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h
index be0ef3df4acb..8a2d438dc499 100644
--- a/include/linux/ip_vs.h
+++ b/include/linux/ip_vs.h
@@ -89,6 +89,7 @@
89#define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ 89#define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */
90#define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */ 90#define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */
91 91
92/* Initial bits allowed in backup server */
92#define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \ 93#define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \
93 IP_VS_CONN_F_NOOUTPUT | \ 94 IP_VS_CONN_F_NOOUTPUT | \
94 IP_VS_CONN_F_INACTIVE | \ 95 IP_VS_CONN_F_INACTIVE | \
@@ -97,6 +98,10 @@
97 IP_VS_CONN_F_TEMPLATE \ 98 IP_VS_CONN_F_TEMPLATE \
98 ) 99 )
99 100
101/* Bits allowed to update in backup server */
102#define IP_VS_CONN_F_BACKUP_UPD_MASK (IP_VS_CONN_F_INACTIVE | \
103 IP_VS_CONN_F_SEQ_MASK)
104
100/* Flags that are not sent to backup server start from bit 16 */ 105/* Flags that are not sent to backup server start from bit 16 */
101#define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */ 106#define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */
102 107
diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 0d3dd66322ec..d146872a0b91 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -83,6 +83,10 @@ enum ip_conntrack_status {
83 /* Conntrack is a fake untracked entry */ 83 /* Conntrack is a fake untracked entry */
84 IPS_UNTRACKED_BIT = 12, 84 IPS_UNTRACKED_BIT = 12,
85 IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT), 85 IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT),
86
87 /* Conntrack got a helper explicitly attached via CT target. */
88 IPS_HELPER_BIT = 13,
89 IPS_HELPER = (1 << IPS_HELPER_BIT),
86}; 90};
87 91
88/* Connection tracking event types */ 92/* Connection tracking event types */
diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild
index 31f8bec95650..c61b8fb1a9ef 100644
--- a/include/linux/netfilter_ipv4/Kbuild
+++ b/include/linux/netfilter_ipv4/Kbuild
@@ -1,4 +1,3 @@
1header-y += ip_queue.h
2header-y += ip_tables.h 1header-y += ip_tables.h
3header-y += ipt_CLUSTERIP.h 2header-y += ipt_CLUSTERIP.h
4header-y += ipt_ECN.h 3header-y += ipt_ECN.h
diff --git a/include/linux/netfilter_ipv4/ip_queue.h b/include/linux/netfilter_ipv4/ip_queue.h
deleted file mode 100644
index a03507f465f8..000000000000
--- a/include/linux/netfilter_ipv4/ip_queue.h
+++ /dev/null
@@ -1,72 +0,0 @@
1/*
2 * This is a module which is used for queueing IPv4 packets and
3 * communicating with userspace via netlink.
4 *
5 * (C) 2000 James Morris, this code is GPL.
6 */
7#ifndef _IP_QUEUE_H
8#define _IP_QUEUE_H
9
10#ifdef __KERNEL__
11#ifdef DEBUG_IPQ
12#define QDEBUG(x...) printk(KERN_DEBUG ## x)
13#else
14#define QDEBUG(x...)
15#endif /* DEBUG_IPQ */
16#else
17#include <net/if.h>
18#endif /* ! __KERNEL__ */
19
20/* Messages sent from kernel */
21typedef struct ipq_packet_msg {
22 unsigned long packet_id; /* ID of queued packet */
23 unsigned long mark; /* Netfilter mark value */
24 long timestamp_sec; /* Packet arrival time (seconds) */
25 long timestamp_usec; /* Packet arrvial time (+useconds) */
26 unsigned int hook; /* Netfilter hook we rode in on */
27 char indev_name[IFNAMSIZ]; /* Name of incoming interface */
28 char outdev_name[IFNAMSIZ]; /* Name of outgoing interface */
29 __be16 hw_protocol; /* Hardware protocol (network order) */
30 unsigned short hw_type; /* Hardware type */
31 unsigned char hw_addrlen; /* Hardware address length */
32 unsigned char hw_addr[8]; /* Hardware address */
33 size_t data_len; /* Length of packet data */
34 unsigned char payload[0]; /* Optional packet data */
35} ipq_packet_msg_t;
36
37/* Messages sent from userspace */
38typedef struct ipq_mode_msg {
39 unsigned char value; /* Requested mode */
40 size_t range; /* Optional range of packet requested */
41} ipq_mode_msg_t;
42
43typedef struct ipq_verdict_msg {
44 unsigned int value; /* Verdict to hand to netfilter */
45 unsigned long id; /* Packet ID for this verdict */
46 size_t data_len; /* Length of replacement data */
47 unsigned char payload[0]; /* Optional replacement packet */
48} ipq_verdict_msg_t;
49
50typedef struct ipq_peer_msg {
51 union {
52 ipq_verdict_msg_t verdict;
53 ipq_mode_msg_t mode;
54 } msg;
55} ipq_peer_msg_t;
56
57/* Packet delivery modes */
58enum {
59 IPQ_COPY_NONE, /* Initial mode, packets are dropped */
60 IPQ_COPY_META, /* Copy metadata */
61 IPQ_COPY_PACKET /* Copy metadata + packet (range) */
62};
63#define IPQ_COPY_MAX IPQ_COPY_PACKET
64
65/* Types of messages */
66#define IPQM_BASE 0x10 /* standard netlink messages below this */
67#define IPQM_MODE (IPQM_BASE + 1) /* Mode request from peer */
68#define IPQM_VERDICT (IPQM_BASE + 2) /* Verdict from peer */
69#define IPQM_PACKET (IPQM_BASE + 3) /* Packet from kernel */
70#define IPQM_MAX (IPQM_BASE + 4)
71
72#endif /*_IP_QUEUE_H*/
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index a2092f582a78..0f628ffa420c 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -7,7 +7,7 @@
7#define NETLINK_ROUTE 0 /* Routing/device hook */ 7#define NETLINK_ROUTE 0 /* Routing/device hook */
8#define NETLINK_UNUSED 1 /* Unused number */ 8#define NETLINK_UNUSED 1 /* Unused number */
9#define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */ 9#define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */
10#define NETLINK_FIREWALL 3 /* Firewalling hook */ 10#define NETLINK_FIREWALL 3 /* Unused number, formerly ip_queue */
11#define NETLINK_SOCK_DIAG 4 /* socket monitoring */ 11#define NETLINK_SOCK_DIAG 4 /* socket monitoring */
12#define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */ 12#define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */
13#define NETLINK_XFRM 6 /* ipsec */ 13#define NETLINK_XFRM 6 /* ipsec */
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 93b81aa73429..d6146b4811c2 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -504,6 +504,7 @@ struct ip_vs_conn {
504 * state transition triggerd 504 * state transition triggerd
505 * synchronization 505 * synchronization
506 */ 506 */
507 unsigned long sync_endtime; /* jiffies + sent_retries */
507 508
508 /* Control members */ 509 /* Control members */
509 struct ip_vs_conn *control; /* Master control connection */ 510 struct ip_vs_conn *control; /* Master control connection */
@@ -783,6 +784,16 @@ struct ip_vs_app {
783 void (*timeout_change)(struct ip_vs_app *app, int flags); 784 void (*timeout_change)(struct ip_vs_app *app, int flags);
784}; 785};
785 786
787struct ipvs_master_sync_state {
788 struct list_head sync_queue;
789 struct ip_vs_sync_buff *sync_buff;
790 int sync_queue_len;
791 unsigned int sync_queue_delay;
792 struct task_struct *master_thread;
793 struct delayed_work master_wakeup_work;
794 struct netns_ipvs *ipvs;
795};
796
786/* IPVS in network namespace */ 797/* IPVS in network namespace */
787struct netns_ipvs { 798struct netns_ipvs {
788 int gen; /* Generation */ 799 int gen; /* Generation */
@@ -869,10 +880,15 @@ struct netns_ipvs {
869#endif 880#endif
870 int sysctl_snat_reroute; 881 int sysctl_snat_reroute;
871 int sysctl_sync_ver; 882 int sysctl_sync_ver;
883 int sysctl_sync_ports;
884 int sysctl_sync_qlen_max;
885 int sysctl_sync_sock_size;
872 int sysctl_cache_bypass; 886 int sysctl_cache_bypass;
873 int sysctl_expire_nodest_conn; 887 int sysctl_expire_nodest_conn;
874 int sysctl_expire_quiescent_template; 888 int sysctl_expire_quiescent_template;
875 int sysctl_sync_threshold[2]; 889 int sysctl_sync_threshold[2];
890 unsigned int sysctl_sync_refresh_period;
891 int sysctl_sync_retries;
876 int sysctl_nat_icmp_send; 892 int sysctl_nat_icmp_send;
877 893
878 /* ip_vs_lblc */ 894 /* ip_vs_lblc */
@@ -888,13 +904,11 @@ struct netns_ipvs {
888 spinlock_t est_lock; 904 spinlock_t est_lock;
889 struct timer_list est_timer; /* Estimation timer */ 905 struct timer_list est_timer; /* Estimation timer */
890 /* ip_vs_sync */ 906 /* ip_vs_sync */
891 struct list_head sync_queue;
892 spinlock_t sync_lock; 907 spinlock_t sync_lock;
893 struct ip_vs_sync_buff *sync_buff; 908 struct ipvs_master_sync_state *ms;
894 spinlock_t sync_buff_lock; 909 spinlock_t sync_buff_lock;
895 struct sockaddr_in sync_mcast_addr; 910 struct task_struct **backup_threads;
896 struct task_struct *master_thread; 911 int threads_mask;
897 struct task_struct *backup_thread;
898 int send_mesg_maxlen; 912 int send_mesg_maxlen;
899 int recv_mesg_maxlen; 913 int recv_mesg_maxlen;
900 volatile int sync_state; 914 volatile int sync_state;
@@ -911,6 +925,14 @@ struct netns_ipvs {
911#define DEFAULT_SYNC_THRESHOLD 3 925#define DEFAULT_SYNC_THRESHOLD 3
912#define DEFAULT_SYNC_PERIOD 50 926#define DEFAULT_SYNC_PERIOD 50
913#define DEFAULT_SYNC_VER 1 927#define DEFAULT_SYNC_VER 1
928#define DEFAULT_SYNC_REFRESH_PERIOD (0U * HZ)
929#define DEFAULT_SYNC_RETRIES 0
930#define IPVS_SYNC_WAKEUP_RATE 8
931#define IPVS_SYNC_QLEN_MAX (IPVS_SYNC_WAKEUP_RATE * 4)
932#define IPVS_SYNC_SEND_DELAY (HZ / 50)
933#define IPVS_SYNC_CHECK_PERIOD HZ
934#define IPVS_SYNC_FLUSH_TIME (HZ * 2)
935#define IPVS_SYNC_PORTS_MAX (1 << 6)
914 936
915#ifdef CONFIG_SYSCTL 937#ifdef CONFIG_SYSCTL
916 938
@@ -921,7 +943,17 @@ static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
921 943
922static inline int sysctl_sync_period(struct netns_ipvs *ipvs) 944static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
923{ 945{
924 return ipvs->sysctl_sync_threshold[1]; 946 return ACCESS_ONCE(ipvs->sysctl_sync_threshold[1]);
947}
948
949static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs)
950{
951 return ACCESS_ONCE(ipvs->sysctl_sync_refresh_period);
952}
953
954static inline int sysctl_sync_retries(struct netns_ipvs *ipvs)
955{
956 return ipvs->sysctl_sync_retries;
925} 957}
926 958
927static inline int sysctl_sync_ver(struct netns_ipvs *ipvs) 959static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
@@ -929,6 +961,21 @@ static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
929 return ipvs->sysctl_sync_ver; 961 return ipvs->sysctl_sync_ver;
930} 962}
931 963
964static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
965{
966 return ACCESS_ONCE(ipvs->sysctl_sync_ports);
967}
968
969static inline int sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
970{
971 return ipvs->sysctl_sync_qlen_max;
972}
973
974static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
975{
976 return ipvs->sysctl_sync_sock_size;
977}
978
932#else 979#else
933 980
934static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) 981static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
@@ -941,11 +988,36 @@ static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
941 return DEFAULT_SYNC_PERIOD; 988 return DEFAULT_SYNC_PERIOD;
942} 989}
943 990
991static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs)
992{
993 return DEFAULT_SYNC_REFRESH_PERIOD;
994}
995
996static inline int sysctl_sync_retries(struct netns_ipvs *ipvs)
997{
998 return DEFAULT_SYNC_RETRIES & 3;
999}
1000
944static inline int sysctl_sync_ver(struct netns_ipvs *ipvs) 1001static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
945{ 1002{
946 return DEFAULT_SYNC_VER; 1003 return DEFAULT_SYNC_VER;
947} 1004}
948 1005
1006static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
1007{
1008 return 1;
1009}
1010
1011static inline int sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
1012{
1013 return IPVS_SYNC_QLEN_MAX;
1014}
1015
1016static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
1017{
1018 return 0;
1019}
1020
949#endif 1021#endif
950 1022
951/* 1023/*
@@ -1185,7 +1257,6 @@ extern void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg);
1185extern struct ip_vs_stats ip_vs_stats; 1257extern struct ip_vs_stats ip_vs_stats;
1186extern int sysctl_ip_vs_sync_ver; 1258extern int sysctl_ip_vs_sync_ver;
1187 1259
1188extern void ip_vs_sync_switch_mode(struct net *net, int mode);
1189extern struct ip_vs_service * 1260extern struct ip_vs_service *
1190ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, 1261ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
1191 const union nf_inet_addr *vaddr, __be16 vport); 1262 const union nf_inet_addr *vaddr, __be16 vport);
@@ -1219,7 +1290,7 @@ extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
1219extern int start_sync_thread(struct net *net, int state, char *mcast_ifn, 1290extern int start_sync_thread(struct net *net, int state, char *mcast_ifn,
1220 __u8 syncid); 1291 __u8 syncid);
1221extern int stop_sync_thread(struct net *net, int state); 1292extern int stop_sync_thread(struct net *net, int state);
1222extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp); 1293extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts);
1223 1294
1224 1295
1225/* 1296/*
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index ab86036bbf0c..cce7f6a798bf 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -321,14 +321,8 @@ extern unsigned int nf_conntrack_max;
321extern unsigned int nf_conntrack_hash_rnd; 321extern unsigned int nf_conntrack_hash_rnd;
322void init_nf_conntrack_hash_rnd(void); 322void init_nf_conntrack_hash_rnd(void);
323 323
324#define NF_CT_STAT_INC(net, count) \ 324#define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count)
325 __this_cpu_inc((net)->ct.stat->count) 325#define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
326#define NF_CT_STAT_INC_ATOMIC(net, count) \
327do { \
328 local_bh_disable(); \
329 __this_cpu_inc((net)->ct.stat->count); \
330 local_bh_enable(); \
331} while (0)
332 326
333#define MODULE_ALIAS_NFCT_HELPER(helper) \ 327#define MODULE_ALIAS_NFCT_HELPER(helper) \
334 MODULE_ALIAS("nfct-helper-" helper) 328 MODULE_ALIAS("nfct-helper-" helper)
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index 5767dc242dee..1d1889409b9e 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -60,8 +60,8 @@ static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct)
60 return nf_ct_ext_find(ct, NF_CT_EXT_HELPER); 60 return nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
61} 61}
62 62
63extern int nf_conntrack_helper_init(void); 63extern int nf_conntrack_helper_init(struct net *net);
64extern void nf_conntrack_helper_fini(void); 64extern void nf_conntrack_helper_fini(struct net *net);
65 65
66extern int nf_conntrack_broadcast_help(struct sk_buff *skb, 66extern int nf_conntrack_broadcast_help(struct sk_buff *skb,
67 unsigned int protoff, 67 unsigned int protoff,
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 7a911eca0f18..a053a19870cf 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -26,11 +26,14 @@ struct netns_ct {
26 int sysctl_tstamp; 26 int sysctl_tstamp;
27 int sysctl_checksum; 27 int sysctl_checksum;
28 unsigned int sysctl_log_invalid; /* Log invalid packets */ 28 unsigned int sysctl_log_invalid; /* Log invalid packets */
29 int sysctl_auto_assign_helper;
30 bool auto_assign_helper_warned;
29#ifdef CONFIG_SYSCTL 31#ifdef CONFIG_SYSCTL
30 struct ctl_table_header *sysctl_header; 32 struct ctl_table_header *sysctl_header;
31 struct ctl_table_header *acct_sysctl_header; 33 struct ctl_table_header *acct_sysctl_header;
32 struct ctl_table_header *tstamp_sysctl_header; 34 struct ctl_table_header *tstamp_sysctl_header;
33 struct ctl_table_header *event_sysctl_header; 35 struct ctl_table_header *event_sysctl_header;
36 struct ctl_table_header *helper_sysctl_header;
34#endif 37#endif
35 char *slabname; 38 char *slabname;
36}; 39};
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 53f083686ae4..dce55d4ee83b 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -54,12 +54,14 @@ static int brnf_call_ip6tables __read_mostly = 1;
54static int brnf_call_arptables __read_mostly = 1; 54static int brnf_call_arptables __read_mostly = 1;
55static int brnf_filter_vlan_tagged __read_mostly = 0; 55static int brnf_filter_vlan_tagged __read_mostly = 0;
56static int brnf_filter_pppoe_tagged __read_mostly = 0; 56static int brnf_filter_pppoe_tagged __read_mostly = 0;
57static int brnf_pass_vlan_indev __read_mostly = 0;
57#else 58#else
58#define brnf_call_iptables 1 59#define brnf_call_iptables 1
59#define brnf_call_ip6tables 1 60#define brnf_call_ip6tables 1
60#define brnf_call_arptables 1 61#define brnf_call_arptables 1
61#define brnf_filter_vlan_tagged 0 62#define brnf_filter_vlan_tagged 0
62#define brnf_filter_pppoe_tagged 0 63#define brnf_filter_pppoe_tagged 0
64#define brnf_pass_vlan_indev 0
63#endif 65#endif
64 66
65#define IS_IP(skb) \ 67#define IS_IP(skb) \
@@ -503,6 +505,19 @@ bridged_dnat:
503 return 0; 505 return 0;
504} 506}
505 507
508static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct net_device *dev)
509{
510 struct net_device *vlan, *br;
511
512 br = bridge_parent(dev);
513 if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb))
514 return br;
515
516 vlan = __vlan_find_dev_deep(br, vlan_tx_tag_get(skb) & VLAN_VID_MASK);
517
518 return vlan ? vlan : br;
519}
520
506/* Some common code for IPv4/IPv6 */ 521/* Some common code for IPv4/IPv6 */
507static struct net_device *setup_pre_routing(struct sk_buff *skb) 522static struct net_device *setup_pre_routing(struct sk_buff *skb)
508{ 523{
@@ -515,7 +530,7 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb)
515 530
516 nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; 531 nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING;
517 nf_bridge->physindev = skb->dev; 532 nf_bridge->physindev = skb->dev;
518 skb->dev = bridge_parent(skb->dev); 533 skb->dev = brnf_get_logical_dev(skb, skb->dev);
519 if (skb->protocol == htons(ETH_P_8021Q)) 534 if (skb->protocol == htons(ETH_P_8021Q))
520 nf_bridge->mask |= BRNF_8021Q; 535 nf_bridge->mask |= BRNF_8021Q;
521 else if (skb->protocol == htons(ETH_P_PPP_SES)) 536 else if (skb->protocol == htons(ETH_P_PPP_SES))
@@ -774,7 +789,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
774 else 789 else
775 skb->protocol = htons(ETH_P_IPV6); 790 skb->protocol = htons(ETH_P_IPV6);
776 791
777 NF_HOOK(pf, NF_INET_FORWARD, skb, bridge_parent(in), parent, 792 NF_HOOK(pf, NF_INET_FORWARD, skb, brnf_get_logical_dev(skb, in), parent,
778 br_nf_forward_finish); 793 br_nf_forward_finish);
779 794
780 return NF_STOLEN; 795 return NF_STOLEN;
@@ -1002,6 +1017,13 @@ static ctl_table brnf_table[] = {
1002 .mode = 0644, 1017 .mode = 0644,
1003 .proc_handler = brnf_sysctl_call_tables, 1018 .proc_handler = brnf_sysctl_call_tables,
1004 }, 1019 },
1020 {
1021 .procname = "bridge-nf-pass-vlan-input-dev",
1022 .data = &brnf_pass_vlan_indev,
1023 .maxlen = sizeof(int),
1024 .mode = 0644,
1025 .proc_handler = brnf_sysctl_call_tables,
1026 },
1005 { } 1027 { }
1006}; 1028};
1007#endif 1029#endif
diff --git a/net/core/sock.c b/net/core/sock.c
index b8c818e69c23..26ed27fb2bfb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -259,7 +259,9 @@ static struct lock_class_key af_callback_keys[AF_MAX];
259 259
260/* Run time adjustable parameters. */ 260/* Run time adjustable parameters. */
261__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; 261__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
262EXPORT_SYMBOL(sysctl_wmem_max);
262__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; 263__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
264EXPORT_SYMBOL(sysctl_rmem_max);
263__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; 265__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
264__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; 266__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
265 267
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 240b68469a7a..c20674dc9452 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -66,6 +66,3 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
66 66
67# just filtering instance of ARP tables for now 67# just filtering instance of ARP tables for now
68obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o 68obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
69
70obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o
71
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
deleted file mode 100644
index 09775a1e1348..000000000000
--- a/net/ipv4/netfilter/ip_queue.c
+++ /dev/null
@@ -1,639 +0,0 @@
1/*
2 * This is a module which is used for queueing IPv4 packets and
3 * communicating with userspace via netlink.
4 *
5 * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
6 * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12#include <linux/module.h>
13#include <linux/skbuff.h>
14#include <linux/init.h>
15#include <linux/ip.h>
16#include <linux/notifier.h>
17#include <linux/netdevice.h>
18#include <linux/netfilter.h>
19#include <linux/netfilter_ipv4/ip_queue.h>
20#include <linux/netfilter_ipv4/ip_tables.h>
21#include <linux/netlink.h>
22#include <linux/spinlock.h>
23#include <linux/sysctl.h>
24#include <linux/proc_fs.h>
25#include <linux/seq_file.h>
26#include <linux/security.h>
27#include <linux/net.h>
28#include <linux/mutex.h>
29#include <linux/slab.h>
30#include <net/net_namespace.h>
31#include <net/sock.h>
32#include <net/route.h>
33#include <net/netfilter/nf_queue.h>
34#include <net/ip.h>
35
36#define IPQ_QMAX_DEFAULT 1024
37#define IPQ_PROC_FS_NAME "ip_queue"
38#define NET_IPQ_QMAX 2088
39#define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
40
41typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
42
43static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
44static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
45static DEFINE_SPINLOCK(queue_lock);
46static int peer_pid __read_mostly;
47static unsigned int copy_range __read_mostly;
48static unsigned int queue_total;
49static unsigned int queue_dropped = 0;
50static unsigned int queue_user_dropped = 0;
51static struct sock *ipqnl __read_mostly;
52static LIST_HEAD(queue_list);
53static DEFINE_MUTEX(ipqnl_mutex);
54
55static inline void
56__ipq_enqueue_entry(struct nf_queue_entry *entry)
57{
58 list_add_tail(&entry->list, &queue_list);
59 queue_total++;
60}
61
62static inline int
63__ipq_set_mode(unsigned char mode, unsigned int range)
64{
65 int status = 0;
66
67 switch(mode) {
68 case IPQ_COPY_NONE:
69 case IPQ_COPY_META:
70 copy_mode = mode;
71 copy_range = 0;
72 break;
73
74 case IPQ_COPY_PACKET:
75 if (range > 0xFFFF)
76 range = 0xFFFF;
77 copy_range = range;
78 copy_mode = mode;
79 break;
80
81 default:
82 status = -EINVAL;
83
84 }
85 return status;
86}
87
88static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
89
90static inline void
91__ipq_reset(void)
92{
93 peer_pid = 0;
94 net_disable_timestamp();
95 __ipq_set_mode(IPQ_COPY_NONE, 0);
96 __ipq_flush(NULL, 0);
97}
98
99static struct nf_queue_entry *
100ipq_find_dequeue_entry(unsigned long id)
101{
102 struct nf_queue_entry *entry = NULL, *i;
103
104 spin_lock_bh(&queue_lock);
105
106 list_for_each_entry(i, &queue_list, list) {
107 if ((unsigned long)i == id) {
108 entry = i;
109 break;
110 }
111 }
112
113 if (entry) {
114 list_del(&entry->list);
115 queue_total--;
116 }
117
118 spin_unlock_bh(&queue_lock);
119 return entry;
120}
121
122static void
123__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
124{
125 struct nf_queue_entry *entry, *next;
126
127 list_for_each_entry_safe(entry, next, &queue_list, list) {
128 if (!cmpfn || cmpfn(entry, data)) {
129 list_del(&entry->list);
130 queue_total--;
131 nf_reinject(entry, NF_DROP);
132 }
133 }
134}
135
136static void
137ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
138{
139 spin_lock_bh(&queue_lock);
140 __ipq_flush(cmpfn, data);
141 spin_unlock_bh(&queue_lock);
142}
143
144static struct sk_buff *
145ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
146{
147 sk_buff_data_t old_tail;
148 size_t size = 0;
149 size_t data_len = 0;
150 struct sk_buff *skb;
151 struct ipq_packet_msg *pmsg;
152 struct nlmsghdr *nlh;
153 struct timeval tv;
154
155 switch (ACCESS_ONCE(copy_mode)) {
156 case IPQ_COPY_META:
157 case IPQ_COPY_NONE:
158 size = NLMSG_SPACE(sizeof(*pmsg));
159 break;
160
161 case IPQ_COPY_PACKET:
162 if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
163 (*errp = skb_checksum_help(entry->skb)))
164 return NULL;
165
166 data_len = ACCESS_ONCE(copy_range);
167 if (data_len == 0 || data_len > entry->skb->len)
168 data_len = entry->skb->len;
169
170 size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
171 break;
172
173 default:
174 *errp = -EINVAL;
175 return NULL;
176 }
177
178 skb = alloc_skb(size, GFP_ATOMIC);
179 if (!skb)
180 goto nlmsg_failure;
181
182 old_tail = skb->tail;
183 nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
184 pmsg = NLMSG_DATA(nlh);
185 memset(pmsg, 0, sizeof(*pmsg));
186
187 pmsg->packet_id = (unsigned long )entry;
188 pmsg->data_len = data_len;
189 tv = ktime_to_timeval(entry->skb->tstamp);
190 pmsg->timestamp_sec = tv.tv_sec;
191 pmsg->timestamp_usec = tv.tv_usec;
192 pmsg->mark = entry->skb->mark;
193 pmsg->hook = entry->hook;
194 pmsg->hw_protocol = entry->skb->protocol;
195
196 if (entry->indev)
197 strcpy(pmsg->indev_name, entry->indev->name);
198 else
199 pmsg->indev_name[0] = '\0';
200
201 if (entry->outdev)
202 strcpy(pmsg->outdev_name, entry->outdev->name);
203 else
204 pmsg->outdev_name[0] = '\0';
205
206 if (entry->indev && entry->skb->dev &&
207 entry->skb->mac_header != entry->skb->network_header) {
208 pmsg->hw_type = entry->skb->dev->type;
209 pmsg->hw_addrlen = dev_parse_header(entry->skb,
210 pmsg->hw_addr);
211 }
212
213 if (data_len)
214 if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
215 BUG();
216
217 nlh->nlmsg_len = skb->tail - old_tail;
218 return skb;
219
220nlmsg_failure:
221 kfree_skb(skb);
222 *errp = -EINVAL;
223 printk(KERN_ERR "ip_queue: error creating packet message\n");
224 return NULL;
225}
226
227static int
228ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
229{
230 int status = -EINVAL;
231 struct sk_buff *nskb;
232
233 if (copy_mode == IPQ_COPY_NONE)
234 return -EAGAIN;
235
236 nskb = ipq_build_packet_message(entry, &status);
237 if (nskb == NULL)
238 return status;
239
240 spin_lock_bh(&queue_lock);
241
242 if (!peer_pid)
243 goto err_out_free_nskb;
244
245 if (queue_total >= queue_maxlen) {
246 queue_dropped++;
247 status = -ENOSPC;
248 if (net_ratelimit())
249 printk (KERN_WARNING "ip_queue: full at %d entries, "
250 "dropping packets(s). Dropped: %d\n", queue_total,
251 queue_dropped);
252 goto err_out_free_nskb;
253 }
254
255 /* netlink_unicast will either free the nskb or attach it to a socket */
256 status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
257 if (status < 0) {
258 queue_user_dropped++;
259 goto err_out_unlock;
260 }
261
262 __ipq_enqueue_entry(entry);
263
264 spin_unlock_bh(&queue_lock);
265 return status;
266
267err_out_free_nskb:
268 kfree_skb(nskb);
269
270err_out_unlock:
271 spin_unlock_bh(&queue_lock);
272 return status;
273}
274
275static int
276ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
277{
278 int diff;
279 struct iphdr *user_iph = (struct iphdr *)v->payload;
280 struct sk_buff *nskb;
281
282 if (v->data_len < sizeof(*user_iph))
283 return 0;
284 diff = v->data_len - e->skb->len;
285 if (diff < 0) {
286 if (pskb_trim(e->skb, v->data_len))
287 return -ENOMEM;
288 } else if (diff > 0) {
289 if (v->data_len > 0xFFFF)
290 return -EINVAL;
291 if (diff > skb_tailroom(e->skb)) {
292 nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
293 diff, GFP_ATOMIC);
294 if (!nskb) {
295 printk(KERN_WARNING "ip_queue: error "
296 "in mangle, dropping packet\n");
297 return -ENOMEM;
298 }
299 kfree_skb(e->skb);
300 e->skb = nskb;
301 }
302 skb_put(e->skb, diff);
303 }
304 if (!skb_make_writable(e->skb, v->data_len))
305 return -ENOMEM;
306 skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
307 e->skb->ip_summed = CHECKSUM_NONE;
308
309 return 0;
310}
311
312static int
313ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
314{
315 struct nf_queue_entry *entry;
316
317 if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN)
318 return -EINVAL;
319
320 entry = ipq_find_dequeue_entry(vmsg->id);
321 if (entry == NULL)
322 return -ENOENT;
323 else {
324 int verdict = vmsg->value;
325
326 if (vmsg->data_len && vmsg->data_len == len)
327 if (ipq_mangle_ipv4(vmsg, entry) < 0)
328 verdict = NF_DROP;
329
330 nf_reinject(entry, verdict);
331 return 0;
332 }
333}
334
335static int
336ipq_set_mode(unsigned char mode, unsigned int range)
337{
338 int status;
339
340 spin_lock_bh(&queue_lock);
341 status = __ipq_set_mode(mode, range);
342 spin_unlock_bh(&queue_lock);
343 return status;
344}
345
346static int
347ipq_receive_peer(struct ipq_peer_msg *pmsg,
348 unsigned char type, unsigned int len)
349{
350 int status = 0;
351
352 if (len < sizeof(*pmsg))
353 return -EINVAL;
354
355 switch (type) {
356 case IPQM_MODE:
357 status = ipq_set_mode(pmsg->msg.mode.value,
358 pmsg->msg.mode.range);
359 break;
360
361 case IPQM_VERDICT:
362 status = ipq_set_verdict(&pmsg->msg.verdict,
363 len - sizeof(*pmsg));
364 break;
365 default:
366 status = -EINVAL;
367 }
368 return status;
369}
370
371static int
372dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
373{
374 if (entry->indev)
375 if (entry->indev->ifindex == ifindex)
376 return 1;
377 if (entry->outdev)
378 if (entry->outdev->ifindex == ifindex)
379 return 1;
380#ifdef CONFIG_BRIDGE_NETFILTER
381 if (entry->skb->nf_bridge) {
382 if (entry->skb->nf_bridge->physindev &&
383 entry->skb->nf_bridge->physindev->ifindex == ifindex)
384 return 1;
385 if (entry->skb->nf_bridge->physoutdev &&
386 entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
387 return 1;
388 }
389#endif
390 return 0;
391}
392
393static void
394ipq_dev_drop(int ifindex)
395{
396 ipq_flush(dev_cmp, ifindex);
397}
398
399#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
400
401static inline void
402__ipq_rcv_skb(struct sk_buff *skb)
403{
404 int status, type, pid, flags;
405 unsigned int nlmsglen, skblen;
406 struct nlmsghdr *nlh;
407 bool enable_timestamp = false;
408
409 skblen = skb->len;
410 if (skblen < sizeof(*nlh))
411 return;
412
413 nlh = nlmsg_hdr(skb);
414 nlmsglen = nlh->nlmsg_len;
415 if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
416 return;
417
418 pid = nlh->nlmsg_pid;
419 flags = nlh->nlmsg_flags;
420
421 if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
422 RCV_SKB_FAIL(-EINVAL);
423
424 if (flags & MSG_TRUNC)
425 RCV_SKB_FAIL(-ECOMM);
426
427 type = nlh->nlmsg_type;
428 if (type < NLMSG_NOOP || type >= IPQM_MAX)
429 RCV_SKB_FAIL(-EINVAL);
430
431 if (type <= IPQM_BASE)
432 return;
433
434 if (!capable(CAP_NET_ADMIN))
435 RCV_SKB_FAIL(-EPERM);
436
437 spin_lock_bh(&queue_lock);
438
439 if (peer_pid) {
440 if (peer_pid != pid) {
441 spin_unlock_bh(&queue_lock);
442 RCV_SKB_FAIL(-EBUSY);
443 }
444 } else {
445 enable_timestamp = true;
446 peer_pid = pid;
447 }
448
449 spin_unlock_bh(&queue_lock);
450 if (enable_timestamp)
451 net_enable_timestamp();
452 status = ipq_receive_peer(NLMSG_DATA(nlh), type,
453 nlmsglen - NLMSG_LENGTH(0));
454 if (status < 0)
455 RCV_SKB_FAIL(status);
456
457 if (flags & NLM_F_ACK)
458 netlink_ack(skb, nlh, 0);
459}
460
461static void
462ipq_rcv_skb(struct sk_buff *skb)
463{
464 mutex_lock(&ipqnl_mutex);
465 __ipq_rcv_skb(skb);
466 mutex_unlock(&ipqnl_mutex);
467}
468
469static int
470ipq_rcv_dev_event(struct notifier_block *this,
471 unsigned long event, void *ptr)
472{
473 struct net_device *dev = ptr;
474
475 if (!net_eq(dev_net(dev), &init_net))
476 return NOTIFY_DONE;
477
478 /* Drop any packets associated with the downed device */
479 if (event == NETDEV_DOWN)
480 ipq_dev_drop(dev->ifindex);
481 return NOTIFY_DONE;
482}
483
484static struct notifier_block ipq_dev_notifier = {
485 .notifier_call = ipq_rcv_dev_event,
486};
487
488static int
489ipq_rcv_nl_event(struct notifier_block *this,
490 unsigned long event, void *ptr)
491{
492 struct netlink_notify *n = ptr;
493
494 if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) {
495 spin_lock_bh(&queue_lock);
496 if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
497 __ipq_reset();
498 spin_unlock_bh(&queue_lock);
499 }
500 return NOTIFY_DONE;
501}
502
503static struct notifier_block ipq_nl_notifier = {
504 .notifier_call = ipq_rcv_nl_event,
505};
506
507#ifdef CONFIG_SYSCTL
508static struct ctl_table_header *ipq_sysctl_header;
509
510static ctl_table ipq_table[] = {
511 {
512 .procname = NET_IPQ_QMAX_NAME,
513 .data = &queue_maxlen,
514 .maxlen = sizeof(queue_maxlen),
515 .mode = 0644,
516 .proc_handler = proc_dointvec
517 },
518 { }
519};
520#endif
521
522#ifdef CONFIG_PROC_FS
523static int ip_queue_show(struct seq_file *m, void *v)
524{
525 spin_lock_bh(&queue_lock);
526
527 seq_printf(m,
528 "Peer PID : %d\n"
529 "Copy mode : %hu\n"
530 "Copy range : %u\n"
531 "Queue length : %u\n"
532 "Queue max. length : %u\n"
533 "Queue dropped : %u\n"
534 "Netlink dropped : %u\n",
535 peer_pid,
536 copy_mode,
537 copy_range,
538 queue_total,
539 queue_maxlen,
540 queue_dropped,
541 queue_user_dropped);
542
543 spin_unlock_bh(&queue_lock);
544 return 0;
545}
546
547static int ip_queue_open(struct inode *inode, struct file *file)
548{
549 return single_open(file, ip_queue_show, NULL);
550}
551
552static const struct file_operations ip_queue_proc_fops = {
553 .open = ip_queue_open,
554 .read = seq_read,
555 .llseek = seq_lseek,
556 .release = single_release,
557 .owner = THIS_MODULE,
558};
559#endif
560
561static const struct nf_queue_handler nfqh = {
562 .name = "ip_queue",
563 .outfn = &ipq_enqueue_packet,
564};
565
566static int __init ip_queue_init(void)
567{
568 int status = -ENOMEM;
569 struct proc_dir_entry *proc __maybe_unused;
570
571 netlink_register_notifier(&ipq_nl_notifier);
572 ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
573 ipq_rcv_skb, NULL, THIS_MODULE);
574 if (ipqnl == NULL) {
575 printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
576 goto cleanup_netlink_notifier;
577 }
578
579#ifdef CONFIG_PROC_FS
580 proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net,
581 &ip_queue_proc_fops);
582 if (!proc) {
583 printk(KERN_ERR "ip_queue: failed to create proc entry\n");
584 goto cleanup_ipqnl;
585 }
586#endif
587 register_netdevice_notifier(&ipq_dev_notifier);
588#ifdef CONFIG_SYSCTL
589 ipq_sysctl_header = register_net_sysctl(&init_net, "net/ipv4", ipq_table);
590#endif
591 status = nf_register_queue_handler(NFPROTO_IPV4, &nfqh);
592 if (status < 0) {
593 printk(KERN_ERR "ip_queue: failed to register queue handler\n");
594 goto cleanup_sysctl;
595 }
596 return status;
597
598cleanup_sysctl:
599#ifdef CONFIG_SYSCTL
600 unregister_net_sysctl_table(ipq_sysctl_header);
601#endif
602 unregister_netdevice_notifier(&ipq_dev_notifier);
603 proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
604cleanup_ipqnl: __maybe_unused
605 netlink_kernel_release(ipqnl);
606 mutex_lock(&ipqnl_mutex);
607 mutex_unlock(&ipqnl_mutex);
608
609cleanup_netlink_notifier:
610 netlink_unregister_notifier(&ipq_nl_notifier);
611 return status;
612}
613
614static void __exit ip_queue_fini(void)
615{
616 nf_unregister_queue_handlers(&nfqh);
617
618 ipq_flush(NULL, 0);
619
620#ifdef CONFIG_SYSCTL
621 unregister_net_sysctl_table(ipq_sysctl_header);
622#endif
623 unregister_netdevice_notifier(&ipq_dev_notifier);
624 proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
625
626 netlink_kernel_release(ipqnl);
627 mutex_lock(&ipqnl_mutex);
628 mutex_unlock(&ipqnl_mutex);
629
630 netlink_unregister_notifier(&ipq_nl_notifier);
631}
632
633MODULE_DESCRIPTION("IPv4 packet queue handler");
634MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
635MODULE_LICENSE("GPL");
636MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_FIREWALL);
637
638module_init(ip_queue_init);
639module_exit(ip_queue_fini);
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index d33cddd16fbb..10135342799e 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -25,28 +25,6 @@ config NF_CONNTRACK_IPV6
25 25
26 To compile it as a module, choose M here. If unsure, say N. 26 To compile it as a module, choose M here. If unsure, say N.
27 27
28config IP6_NF_QUEUE
29 tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)"
30 depends on INET && IPV6 && NETFILTER
31 depends on NETFILTER_ADVANCED
32 ---help---
33
34 This option adds a queue handler to the kernel for IPv6
35 packets which enables users to receive the filtered packets
36 with QUEUE target using libipq.
37
38 This option enables the old IPv6-only "ip6_queue" implementation
39 which has been obsoleted by the new "nfnetlink_queue" code (see
40 CONFIG_NETFILTER_NETLINK_QUEUE).
41
42 (C) Fernando Anton 2001
43 IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
44 Universidad Carlos III de Madrid
45 Universidad Politecnica de Alcala de Henares
46 email: <fanton@it.uc3m.es>.
47
48 To compile it as a module, choose M here. If unsure, say N.
49
50config IP6_NF_IPTABLES 28config IP6_NF_IPTABLES
51 tristate "IP6 tables support (required for filtering)" 29 tristate "IP6 tables support (required for filtering)"
52 depends on INET && IPV6 30 depends on INET && IPV6
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index d4dfd0a21097..534d3f216f7b 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -6,7 +6,6 @@
6obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o 6obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o
7obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o 7obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
8obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o 8obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
9obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o
10obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o 9obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
11obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o 10obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
12 11
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
deleted file mode 100644
index 3ca9303b3a19..000000000000
--- a/net/ipv6/netfilter/ip6_queue.c
+++ /dev/null
@@ -1,641 +0,0 @@
1/*
2 * This is a module which is used for queueing IPv6 packets and
3 * communicating with userspace via netlink.
4 *
5 * (C) 2001 Fernando Anton, this code is GPL.
6 * IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
7 * Universidad Carlos III de Madrid - Leganes (Madrid) - Spain
8 * Universidad Politecnica de Alcala de Henares - Alcala de H. (Madrid) - Spain
9 * email: fanton@it.uc3m.es
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation.
14 */
15#include <linux/module.h>
16#include <linux/skbuff.h>
17#include <linux/init.h>
18#include <linux/ipv6.h>
19#include <linux/notifier.h>
20#include <linux/netdevice.h>
21#include <linux/netfilter.h>
22#include <linux/netlink.h>
23#include <linux/spinlock.h>
24#include <linux/sysctl.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/mutex.h>
28#include <linux/slab.h>
29#include <net/net_namespace.h>
30#include <net/sock.h>
31#include <net/ipv6.h>
32#include <net/ip6_route.h>
33#include <net/netfilter/nf_queue.h>
34#include <linux/netfilter_ipv4/ip_queue.h>
35#include <linux/netfilter_ipv4/ip_tables.h>
36#include <linux/netfilter_ipv6/ip6_tables.h>
37
38#define IPQ_QMAX_DEFAULT 1024
39#define IPQ_PROC_FS_NAME "ip6_queue"
40#define NET_IPQ_QMAX_NAME "ip6_queue_maxlen"
41
42typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
43
44static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
45static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
46static DEFINE_SPINLOCK(queue_lock);
47static int peer_pid __read_mostly;
48static unsigned int copy_range __read_mostly;
49static unsigned int queue_total;
50static unsigned int queue_dropped = 0;
51static unsigned int queue_user_dropped = 0;
52static struct sock *ipqnl __read_mostly;
53static LIST_HEAD(queue_list);
54static DEFINE_MUTEX(ipqnl_mutex);
55
56static inline void
57__ipq_enqueue_entry(struct nf_queue_entry *entry)
58{
59 list_add_tail(&entry->list, &queue_list);
60 queue_total++;
61}
62
63static inline int
64__ipq_set_mode(unsigned char mode, unsigned int range)
65{
66 int status = 0;
67
68 switch(mode) {
69 case IPQ_COPY_NONE:
70 case IPQ_COPY_META:
71 copy_mode = mode;
72 copy_range = 0;
73 break;
74
75 case IPQ_COPY_PACKET:
76 if (range > 0xFFFF)
77 range = 0xFFFF;
78 copy_range = range;
79 copy_mode = mode;
80 break;
81
82 default:
83 status = -EINVAL;
84
85 }
86 return status;
87}
88
89static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
90
91static inline void
92__ipq_reset(void)
93{
94 peer_pid = 0;
95 net_disable_timestamp();
96 __ipq_set_mode(IPQ_COPY_NONE, 0);
97 __ipq_flush(NULL, 0);
98}
99
100static struct nf_queue_entry *
101ipq_find_dequeue_entry(unsigned long id)
102{
103 struct nf_queue_entry *entry = NULL, *i;
104
105 spin_lock_bh(&queue_lock);
106
107 list_for_each_entry(i, &queue_list, list) {
108 if ((unsigned long)i == id) {
109 entry = i;
110 break;
111 }
112 }
113
114 if (entry) {
115 list_del(&entry->list);
116 queue_total--;
117 }
118
119 spin_unlock_bh(&queue_lock);
120 return entry;
121}
122
123static void
124__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
125{
126 struct nf_queue_entry *entry, *next;
127
128 list_for_each_entry_safe(entry, next, &queue_list, list) {
129 if (!cmpfn || cmpfn(entry, data)) {
130 list_del(&entry->list);
131 queue_total--;
132 nf_reinject(entry, NF_DROP);
133 }
134 }
135}
136
137static void
138ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
139{
140 spin_lock_bh(&queue_lock);
141 __ipq_flush(cmpfn, data);
142 spin_unlock_bh(&queue_lock);
143}
144
145static struct sk_buff *
146ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
147{
148 sk_buff_data_t old_tail;
149 size_t size = 0;
150 size_t data_len = 0;
151 struct sk_buff *skb;
152 struct ipq_packet_msg *pmsg;
153 struct nlmsghdr *nlh;
154 struct timeval tv;
155
156 switch (ACCESS_ONCE(copy_mode)) {
157 case IPQ_COPY_META:
158 case IPQ_COPY_NONE:
159 size = NLMSG_SPACE(sizeof(*pmsg));
160 break;
161
162 case IPQ_COPY_PACKET:
163 if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
164 (*errp = skb_checksum_help(entry->skb)))
165 return NULL;
166
167 data_len = ACCESS_ONCE(copy_range);
168 if (data_len == 0 || data_len > entry->skb->len)
169 data_len = entry->skb->len;
170
171 size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
172 break;
173
174 default:
175 *errp = -EINVAL;
176 return NULL;
177 }
178
179 skb = alloc_skb(size, GFP_ATOMIC);
180 if (!skb)
181 goto nlmsg_failure;
182
183 old_tail = skb->tail;
184 nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
185 pmsg = NLMSG_DATA(nlh);
186 memset(pmsg, 0, sizeof(*pmsg));
187
188 pmsg->packet_id = (unsigned long )entry;
189 pmsg->data_len = data_len;
190 tv = ktime_to_timeval(entry->skb->tstamp);
191 pmsg->timestamp_sec = tv.tv_sec;
192 pmsg->timestamp_usec = tv.tv_usec;
193 pmsg->mark = entry->skb->mark;
194 pmsg->hook = entry->hook;
195 pmsg->hw_protocol = entry->skb->protocol;
196
197 if (entry->indev)
198 strcpy(pmsg->indev_name, entry->indev->name);
199 else
200 pmsg->indev_name[0] = '\0';
201
202 if (entry->outdev)
203 strcpy(pmsg->outdev_name, entry->outdev->name);
204 else
205 pmsg->outdev_name[0] = '\0';
206
207 if (entry->indev && entry->skb->dev &&
208 entry->skb->mac_header != entry->skb->network_header) {
209 pmsg->hw_type = entry->skb->dev->type;
210 pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr);
211 }
212
213 if (data_len)
214 if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
215 BUG();
216
217 nlh->nlmsg_len = skb->tail - old_tail;
218 return skb;
219
220nlmsg_failure:
221 kfree_skb(skb);
222 *errp = -EINVAL;
223 printk(KERN_ERR "ip6_queue: error creating packet message\n");
224 return NULL;
225}
226
227static int
228ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
229{
230 int status = -EINVAL;
231 struct sk_buff *nskb;
232
233 if (copy_mode == IPQ_COPY_NONE)
234 return -EAGAIN;
235
236 nskb = ipq_build_packet_message(entry, &status);
237 if (nskb == NULL)
238 return status;
239
240 spin_lock_bh(&queue_lock);
241
242 if (!peer_pid)
243 goto err_out_free_nskb;
244
245 if (queue_total >= queue_maxlen) {
246 queue_dropped++;
247 status = -ENOSPC;
248 if (net_ratelimit())
249 printk (KERN_WARNING "ip6_queue: fill at %d entries, "
250 "dropping packet(s). Dropped: %d\n", queue_total,
251 queue_dropped);
252 goto err_out_free_nskb;
253 }
254
255 /* netlink_unicast will either free the nskb or attach it to a socket */
256 status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
257 if (status < 0) {
258 queue_user_dropped++;
259 goto err_out_unlock;
260 }
261
262 __ipq_enqueue_entry(entry);
263
264 spin_unlock_bh(&queue_lock);
265 return status;
266
267err_out_free_nskb:
268 kfree_skb(nskb);
269
270err_out_unlock:
271 spin_unlock_bh(&queue_lock);
272 return status;
273}
274
275static int
276ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
277{
278 int diff;
279 struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload;
280 struct sk_buff *nskb;
281
282 if (v->data_len < sizeof(*user_iph))
283 return 0;
284 diff = v->data_len - e->skb->len;
285 if (diff < 0) {
286 if (pskb_trim(e->skb, v->data_len))
287 return -ENOMEM;
288 } else if (diff > 0) {
289 if (v->data_len > 0xFFFF)
290 return -EINVAL;
291 if (diff > skb_tailroom(e->skb)) {
292 nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
293 diff, GFP_ATOMIC);
294 if (!nskb) {
295 printk(KERN_WARNING "ip6_queue: OOM "
296 "in mangle, dropping packet\n");
297 return -ENOMEM;
298 }
299 kfree_skb(e->skb);
300 e->skb = nskb;
301 }
302 skb_put(e->skb, diff);
303 }
304 if (!skb_make_writable(e->skb, v->data_len))
305 return -ENOMEM;
306 skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
307 e->skb->ip_summed = CHECKSUM_NONE;
308
309 return 0;
310}
311
312static int
313ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
314{
315 struct nf_queue_entry *entry;
316
317 if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN)
318 return -EINVAL;
319
320 entry = ipq_find_dequeue_entry(vmsg->id);
321 if (entry == NULL)
322 return -ENOENT;
323 else {
324 int verdict = vmsg->value;
325
326 if (vmsg->data_len && vmsg->data_len == len)
327 if (ipq_mangle_ipv6(vmsg, entry) < 0)
328 verdict = NF_DROP;
329
330 nf_reinject(entry, verdict);
331 return 0;
332 }
333}
334
335static int
336ipq_set_mode(unsigned char mode, unsigned int range)
337{
338 int status;
339
340 spin_lock_bh(&queue_lock);
341 status = __ipq_set_mode(mode, range);
342 spin_unlock_bh(&queue_lock);
343 return status;
344}
345
346static int
347ipq_receive_peer(struct ipq_peer_msg *pmsg,
348 unsigned char type, unsigned int len)
349{
350 int status = 0;
351
352 if (len < sizeof(*pmsg))
353 return -EINVAL;
354
355 switch (type) {
356 case IPQM_MODE:
357 status = ipq_set_mode(pmsg->msg.mode.value,
358 pmsg->msg.mode.range);
359 break;
360
361 case IPQM_VERDICT:
362 status = ipq_set_verdict(&pmsg->msg.verdict,
363 len - sizeof(*pmsg));
364 break;
365 default:
366 status = -EINVAL;
367 }
368 return status;
369}
370
371static int
372dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
373{
374 if (entry->indev)
375 if (entry->indev->ifindex == ifindex)
376 return 1;
377
378 if (entry->outdev)
379 if (entry->outdev->ifindex == ifindex)
380 return 1;
381#ifdef CONFIG_BRIDGE_NETFILTER
382 if (entry->skb->nf_bridge) {
383 if (entry->skb->nf_bridge->physindev &&
384 entry->skb->nf_bridge->physindev->ifindex == ifindex)
385 return 1;
386 if (entry->skb->nf_bridge->physoutdev &&
387 entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
388 return 1;
389 }
390#endif
391 return 0;
392}
393
394static void
395ipq_dev_drop(int ifindex)
396{
397 ipq_flush(dev_cmp, ifindex);
398}
399
400#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
401
402static inline void
403__ipq_rcv_skb(struct sk_buff *skb)
404{
405 int status, type, pid, flags;
406 unsigned int nlmsglen, skblen;
407 struct nlmsghdr *nlh;
408 bool enable_timestamp = false;
409
410 skblen = skb->len;
411 if (skblen < sizeof(*nlh))
412 return;
413
414 nlh = nlmsg_hdr(skb);
415 nlmsglen = nlh->nlmsg_len;
416 if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
417 return;
418
419 pid = nlh->nlmsg_pid;
420 flags = nlh->nlmsg_flags;
421
422 if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
423 RCV_SKB_FAIL(-EINVAL);
424
425 if (flags & MSG_TRUNC)
426 RCV_SKB_FAIL(-ECOMM);
427
428 type = nlh->nlmsg_type;
429 if (type < NLMSG_NOOP || type >= IPQM_MAX)
430 RCV_SKB_FAIL(-EINVAL);
431
432 if (type <= IPQM_BASE)
433 return;
434
435 if (!capable(CAP_NET_ADMIN))
436 RCV_SKB_FAIL(-EPERM);
437
438 spin_lock_bh(&queue_lock);
439
440 if (peer_pid) {
441 if (peer_pid != pid) {
442 spin_unlock_bh(&queue_lock);
443 RCV_SKB_FAIL(-EBUSY);
444 }
445 } else {
446 enable_timestamp = true;
447 peer_pid = pid;
448 }
449
450 spin_unlock_bh(&queue_lock);
451 if (enable_timestamp)
452 net_enable_timestamp();
453
454 status = ipq_receive_peer(NLMSG_DATA(nlh), type,
455 nlmsglen - NLMSG_LENGTH(0));
456 if (status < 0)
457 RCV_SKB_FAIL(status);
458
459 if (flags & NLM_F_ACK)
460 netlink_ack(skb, nlh, 0);
461}
462
463static void
464ipq_rcv_skb(struct sk_buff *skb)
465{
466 mutex_lock(&ipqnl_mutex);
467 __ipq_rcv_skb(skb);
468 mutex_unlock(&ipqnl_mutex);
469}
470
471static int
472ipq_rcv_dev_event(struct notifier_block *this,
473 unsigned long event, void *ptr)
474{
475 struct net_device *dev = ptr;
476
477 if (!net_eq(dev_net(dev), &init_net))
478 return NOTIFY_DONE;
479
480 /* Drop any packets associated with the downed device */
481 if (event == NETDEV_DOWN)
482 ipq_dev_drop(dev->ifindex);
483 return NOTIFY_DONE;
484}
485
486static struct notifier_block ipq_dev_notifier = {
487 .notifier_call = ipq_rcv_dev_event,
488};
489
490static int
491ipq_rcv_nl_event(struct notifier_block *this,
492 unsigned long event, void *ptr)
493{
494 struct netlink_notify *n = ptr;
495
496 if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) {
497 spin_lock_bh(&queue_lock);
498 if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
499 __ipq_reset();
500 spin_unlock_bh(&queue_lock);
501 }
502 return NOTIFY_DONE;
503}
504
505static struct notifier_block ipq_nl_notifier = {
506 .notifier_call = ipq_rcv_nl_event,
507};
508
509#ifdef CONFIG_SYSCTL
510static struct ctl_table_header *ipq_sysctl_header;
511
512static ctl_table ipq_table[] = {
513 {
514 .procname = NET_IPQ_QMAX_NAME,
515 .data = &queue_maxlen,
516 .maxlen = sizeof(queue_maxlen),
517 .mode = 0644,
518 .proc_handler = proc_dointvec
519 },
520 { }
521};
522#endif
523
524#ifdef CONFIG_PROC_FS
525static int ip6_queue_show(struct seq_file *m, void *v)
526{
527 spin_lock_bh(&queue_lock);
528
529 seq_printf(m,
530 "Peer PID : %d\n"
531 "Copy mode : %hu\n"
532 "Copy range : %u\n"
533 "Queue length : %u\n"
534 "Queue max. length : %u\n"
535 "Queue dropped : %u\n"
536 "Netfilter dropped : %u\n",
537 peer_pid,
538 copy_mode,
539 copy_range,
540 queue_total,
541 queue_maxlen,
542 queue_dropped,
543 queue_user_dropped);
544
545 spin_unlock_bh(&queue_lock);
546 return 0;
547}
548
549static int ip6_queue_open(struct inode *inode, struct file *file)
550{
551 return single_open(file, ip6_queue_show, NULL);
552}
553
554static const struct file_operations ip6_queue_proc_fops = {
555 .open = ip6_queue_open,
556 .read = seq_read,
557 .llseek = seq_lseek,
558 .release = single_release,
559 .owner = THIS_MODULE,
560};
561#endif
562
563static const struct nf_queue_handler nfqh = {
564 .name = "ip6_queue",
565 .outfn = &ipq_enqueue_packet,
566};
567
568static int __init ip6_queue_init(void)
569{
570 int status = -ENOMEM;
571 struct proc_dir_entry *proc __maybe_unused;
572
573 netlink_register_notifier(&ipq_nl_notifier);
574 ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0,
575 ipq_rcv_skb, NULL, THIS_MODULE);
576 if (ipqnl == NULL) {
577 printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
578 goto cleanup_netlink_notifier;
579 }
580
581#ifdef CONFIG_PROC_FS
582 proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net,
583 &ip6_queue_proc_fops);
584 if (!proc) {
585 printk(KERN_ERR "ip6_queue: failed to create proc entry\n");
586 goto cleanup_ipqnl;
587 }
588#endif
589 register_netdevice_notifier(&ipq_dev_notifier);
590#ifdef CONFIG_SYSCTL
591 ipq_sysctl_header = register_net_sysctl(&init_net, "net/ipv6", ipq_table);
592#endif
593 status = nf_register_queue_handler(NFPROTO_IPV6, &nfqh);
594 if (status < 0) {
595 printk(KERN_ERR "ip6_queue: failed to register queue handler\n");
596 goto cleanup_sysctl;
597 }
598 return status;
599
600cleanup_sysctl:
601#ifdef CONFIG_SYSCTL
602 unregister_net_sysctl_table(ipq_sysctl_header);
603#endif
604 unregister_netdevice_notifier(&ipq_dev_notifier);
605 proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
606
607cleanup_ipqnl: __maybe_unused
608 netlink_kernel_release(ipqnl);
609 mutex_lock(&ipqnl_mutex);
610 mutex_unlock(&ipqnl_mutex);
611
612cleanup_netlink_notifier:
613 netlink_unregister_notifier(&ipq_nl_notifier);
614 return status;
615}
616
617static void __exit ip6_queue_fini(void)
618{
619 nf_unregister_queue_handlers(&nfqh);
620
621 ipq_flush(NULL, 0);
622
623#ifdef CONFIG_SYSCTL
624 unregister_net_sysctl_table(ipq_sysctl_header);
625#endif
626 unregister_netdevice_notifier(&ipq_dev_notifier);
627 proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
628
629 netlink_kernel_release(ipqnl);
630 mutex_lock(&ipqnl_mutex);
631 mutex_unlock(&ipqnl_mutex);
632
633 netlink_unregister_notifier(&ipq_nl_notifier);
634}
635
636MODULE_DESCRIPTION("IPv6 packet queue handler");
637MODULE_LICENSE("GPL");
638MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_IP6_FW);
639
640module_init(ip6_queue_init);
641module_exit(ip6_queue_fini);
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 4a09b7873003..1548df9a7524 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -548,6 +548,7 @@ static inline void
548ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) 548ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
549{ 549{
550 unsigned int conn_flags; 550 unsigned int conn_flags;
551 __u32 flags;
551 552
552 /* if dest is NULL, then return directly */ 553 /* if dest is NULL, then return directly */
553 if (!dest) 554 if (!dest)
@@ -559,17 +560,19 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
559 conn_flags = atomic_read(&dest->conn_flags); 560 conn_flags = atomic_read(&dest->conn_flags);
560 if (cp->protocol != IPPROTO_UDP) 561 if (cp->protocol != IPPROTO_UDP)
561 conn_flags &= ~IP_VS_CONN_F_ONE_PACKET; 562 conn_flags &= ~IP_VS_CONN_F_ONE_PACKET;
563 flags = cp->flags;
562 /* Bind with the destination and its corresponding transmitter */ 564 /* Bind with the destination and its corresponding transmitter */
563 if (cp->flags & IP_VS_CONN_F_SYNC) { 565 if (flags & IP_VS_CONN_F_SYNC) {
564 /* if the connection is not template and is created 566 /* if the connection is not template and is created
565 * by sync, preserve the activity flag. 567 * by sync, preserve the activity flag.
566 */ 568 */
567 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) 569 if (!(flags & IP_VS_CONN_F_TEMPLATE))
568 conn_flags &= ~IP_VS_CONN_F_INACTIVE; 570 conn_flags &= ~IP_VS_CONN_F_INACTIVE;
569 /* connections inherit forwarding method from dest */ 571 /* connections inherit forwarding method from dest */
570 cp->flags &= ~IP_VS_CONN_F_FWD_MASK; 572 flags &= ~(IP_VS_CONN_F_FWD_MASK | IP_VS_CONN_F_NOOUTPUT);
571 } 573 }
572 cp->flags |= conn_flags; 574 flags |= conn_flags;
575 cp->flags = flags;
573 cp->dest = dest; 576 cp->dest = dest;
574 577
575 IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d " 578 IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d "
@@ -584,12 +587,12 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
584 atomic_read(&dest->refcnt)); 587 atomic_read(&dest->refcnt));
585 588
586 /* Update the connection counters */ 589 /* Update the connection counters */
587 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { 590 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
588 /* It is a normal connection, so increase the inactive 591 /* It is a normal connection, so modify the counters
589 connection counter because it is in TCP SYNRECV 592 * according to the flags, later the protocol can
590 state (inactive) or other protocol inacive state */ 593 * update them on state change
591 if ((cp->flags & IP_VS_CONN_F_SYNC) && 594 */
592 (!(cp->flags & IP_VS_CONN_F_INACTIVE))) 595 if (!(flags & IP_VS_CONN_F_INACTIVE))
593 atomic_inc(&dest->activeconns); 596 atomic_inc(&dest->activeconns);
594 else 597 else
595 atomic_inc(&dest->inactconns); 598 atomic_inc(&dest->inactconns);
@@ -613,14 +616,40 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
613{ 616{
614 struct ip_vs_dest *dest; 617 struct ip_vs_dest *dest;
615 618
616 if ((cp) && (!cp->dest)) { 619 dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
617 dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr, 620 cp->dport, &cp->vaddr, cp->vport,
618 cp->dport, &cp->vaddr, cp->vport, 621 cp->protocol, cp->fwmark, cp->flags);
619 cp->protocol, cp->fwmark, cp->flags); 622 if (dest) {
623 struct ip_vs_proto_data *pd;
624
625 spin_lock(&cp->lock);
626 if (cp->dest) {
627 spin_unlock(&cp->lock);
628 return dest;
629 }
630
631 /* Applications work depending on the forwarding method
632 * but better to reassign them always when binding dest */
633 if (cp->app)
634 ip_vs_unbind_app(cp);
635
620 ip_vs_bind_dest(cp, dest); 636 ip_vs_bind_dest(cp, dest);
621 return dest; 637 spin_unlock(&cp->lock);
622 } else 638
623 return NULL; 639 /* Update its packet transmitter */
640 cp->packet_xmit = NULL;
641#ifdef CONFIG_IP_VS_IPV6
642 if (cp->af == AF_INET6)
643 ip_vs_bind_xmit_v6(cp);
644 else
645#endif
646 ip_vs_bind_xmit(cp);
647
648 pd = ip_vs_proto_data_get(ip_vs_conn_net(cp), cp->protocol);
649 if (pd && atomic_read(&pd->appcnt))
650 ip_vs_bind_app(cp, pd->pp);
651 }
652 return dest;
624} 653}
625 654
626 655
@@ -743,7 +772,8 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
743static void ip_vs_conn_expire(unsigned long data) 772static void ip_vs_conn_expire(unsigned long data)
744{ 773{
745 struct ip_vs_conn *cp = (struct ip_vs_conn *)data; 774 struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
746 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); 775 struct net *net = ip_vs_conn_net(cp);
776 struct netns_ipvs *ipvs = net_ipvs(net);
747 777
748 cp->timeout = 60*HZ; 778 cp->timeout = 60*HZ;
749 779
@@ -808,6 +838,9 @@ static void ip_vs_conn_expire(unsigned long data)
808 atomic_read(&cp->refcnt)-1, 838 atomic_read(&cp->refcnt)-1,
809 atomic_read(&cp->n_control)); 839 atomic_read(&cp->n_control));
810 840
841 if (ipvs->sync_state & IP_VS_STATE_MASTER)
842 ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs));
843
811 ip_vs_conn_put(cp); 844 ip_vs_conn_put(cp);
812} 845}
813 846
@@ -881,6 +914,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
881 /* Set its state and timeout */ 914 /* Set its state and timeout */
882 cp->state = 0; 915 cp->state = 0;
883 cp->timeout = 3*HZ; 916 cp->timeout = 3*HZ;
917 cp->sync_endtime = jiffies & ~3UL;
884 918
885 /* Bind its packet transmitter */ 919 /* Bind its packet transmitter */
886#ifdef CONFIG_IP_VS_IPV6 920#ifdef CONFIG_IP_VS_IPV6
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index c8f36b96f44f..a54b018c6eea 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1613,34 +1613,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1613 else 1613 else
1614 pkts = atomic_add_return(1, &cp->in_pkts); 1614 pkts = atomic_add_return(1, &cp->in_pkts);
1615 1615
1616 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && 1616 if (ipvs->sync_state & IP_VS_STATE_MASTER)
1617 cp->protocol == IPPROTO_SCTP) { 1617 ip_vs_sync_conn(net, cp, pkts);
1618 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
1619 (pkts % sysctl_sync_period(ipvs)
1620 == sysctl_sync_threshold(ipvs))) ||
1621 (cp->old_state != cp->state &&
1622 ((cp->state == IP_VS_SCTP_S_CLOSED) ||
1623 (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
1624 (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
1625 ip_vs_sync_conn(net, cp);
1626 goto out;
1627 }
1628 }
1629
1630 /* Keep this block last: TCP and others with pp->num_states <= 1 */
1631 else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
1632 (((cp->protocol != IPPROTO_TCP ||
1633 cp->state == IP_VS_TCP_S_ESTABLISHED) &&
1634 (pkts % sysctl_sync_period(ipvs)
1635 == sysctl_sync_threshold(ipvs))) ||
1636 ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
1637 ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
1638 (cp->state == IP_VS_TCP_S_CLOSE) ||
1639 (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
1640 (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
1641 ip_vs_sync_conn(net, cp);
1642out:
1643 cp->old_state = cp->state;
1644 1618
1645 ip_vs_conn_put(cp); 1619 ip_vs_conn_put(cp);
1646 return ret; 1620 return ret;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 37b91996bfba..dd811b8dd97c 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1599,6 +1599,10 @@ static int ip_vs_zero_all(struct net *net)
1599} 1599}
1600 1600
1601#ifdef CONFIG_SYSCTL 1601#ifdef CONFIG_SYSCTL
1602
1603static int zero;
1604static int three = 3;
1605
1602static int 1606static int
1603proc_do_defense_mode(ctl_table *table, int write, 1607proc_do_defense_mode(ctl_table *table, int write,
1604 void __user *buffer, size_t *lenp, loff_t *ppos) 1608 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -1632,7 +1636,8 @@ proc_do_sync_threshold(ctl_table *table, int write,
1632 memcpy(val, valp, sizeof(val)); 1636 memcpy(val, valp, sizeof(val));
1633 1637
1634 rc = proc_dointvec(table, write, buffer, lenp, ppos); 1638 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1635 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) { 1639 if (write && (valp[0] < 0 || valp[1] < 0 ||
1640 (valp[0] >= valp[1] && valp[1]))) {
1636 /* Restore the correct value */ 1641 /* Restore the correct value */
1637 memcpy(valp, val, sizeof(val)); 1642 memcpy(valp, val, sizeof(val));
1638 } 1643 }
@@ -1652,9 +1657,24 @@ proc_do_sync_mode(ctl_table *table, int write,
1652 if ((*valp < 0) || (*valp > 1)) { 1657 if ((*valp < 0) || (*valp > 1)) {
1653 /* Restore the correct value */ 1658 /* Restore the correct value */
1654 *valp = val; 1659 *valp = val;
1655 } else { 1660 }
1656 struct net *net = current->nsproxy->net_ns; 1661 }
1657 ip_vs_sync_switch_mode(net, val); 1662 return rc;
1663}
1664
1665static int
1666proc_do_sync_ports(ctl_table *table, int write,
1667 void __user *buffer, size_t *lenp, loff_t *ppos)
1668{
1669 int *valp = table->data;
1670 int val = *valp;
1671 int rc;
1672
1673 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1674 if (write && (*valp != val)) {
1675 if (*valp < 1 || !is_power_of_2(*valp)) {
1676 /* Restore the correct value */
1677 *valp = val;
1658 } 1678 }
1659 } 1679 }
1660 return rc; 1680 return rc;
@@ -1718,6 +1738,24 @@ static struct ctl_table vs_vars[] = {
1718 .proc_handler = &proc_do_sync_mode, 1738 .proc_handler = &proc_do_sync_mode,
1719 }, 1739 },
1720 { 1740 {
1741 .procname = "sync_ports",
1742 .maxlen = sizeof(int),
1743 .mode = 0644,
1744 .proc_handler = &proc_do_sync_ports,
1745 },
1746 {
1747 .procname = "sync_qlen_max",
1748 .maxlen = sizeof(int),
1749 .mode = 0644,
1750 .proc_handler = proc_dointvec,
1751 },
1752 {
1753 .procname = "sync_sock_size",
1754 .maxlen = sizeof(int),
1755 .mode = 0644,
1756 .proc_handler = proc_dointvec,
1757 },
1758 {
1721 .procname = "cache_bypass", 1759 .procname = "cache_bypass",
1722 .maxlen = sizeof(int), 1760 .maxlen = sizeof(int),
1723 .mode = 0644, 1761 .mode = 0644,
@@ -1743,6 +1781,20 @@ static struct ctl_table vs_vars[] = {
1743 .proc_handler = proc_do_sync_threshold, 1781 .proc_handler = proc_do_sync_threshold,
1744 }, 1782 },
1745 { 1783 {
1784 .procname = "sync_refresh_period",
1785 .maxlen = sizeof(int),
1786 .mode = 0644,
1787 .proc_handler = proc_dointvec_jiffies,
1788 },
1789 {
1790 .procname = "sync_retries",
1791 .maxlen = sizeof(int),
1792 .mode = 0644,
1793 .proc_handler = proc_dointvec_minmax,
1794 .extra1 = &zero,
1795 .extra2 = &three,
1796 },
1797 {
1746 .procname = "nat_icmp_send", 1798 .procname = "nat_icmp_send",
1747 .maxlen = sizeof(int), 1799 .maxlen = sizeof(int),
1748 .mode = 0644, 1800 .mode = 0644,
@@ -3655,6 +3707,12 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
3655 tbl[idx++].data = &ipvs->sysctl_snat_reroute; 3707 tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3656 ipvs->sysctl_sync_ver = 1; 3708 ipvs->sysctl_sync_ver = 1;
3657 tbl[idx++].data = &ipvs->sysctl_sync_ver; 3709 tbl[idx++].data = &ipvs->sysctl_sync_ver;
3710 ipvs->sysctl_sync_ports = 1;
3711 tbl[idx++].data = &ipvs->sysctl_sync_ports;
3712 ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
3713 tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
3714 ipvs->sysctl_sync_sock_size = 0;
3715 tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
3658 tbl[idx++].data = &ipvs->sysctl_cache_bypass; 3716 tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3659 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; 3717 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3660 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; 3718 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
@@ -3662,6 +3720,10 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
3662 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; 3720 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3663 tbl[idx].data = &ipvs->sysctl_sync_threshold; 3721 tbl[idx].data = &ipvs->sysctl_sync_threshold;
3664 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); 3722 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3723 ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
3724 tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
3725 ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
3726 tbl[idx++].data = &ipvs->sysctl_sync_retries;
3665 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; 3727 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3666 3728
3667 3729
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index 1a53a7a2fff0..8b7dca9ea422 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -149,7 +149,7 @@ static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
149 149
150 /* allocate the DH table for this service */ 150 /* allocate the DH table for this service */
151 tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE, 151 tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE,
152 GFP_ATOMIC); 152 GFP_KERNEL);
153 if (tbl == NULL) 153 if (tbl == NULL)
154 return -ENOMEM; 154 return -ENOMEM;
155 155
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 510f2b5a5855..b20b29c903ef 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -485,7 +485,7 @@ static struct pernet_operations ip_vs_ftp_ops = {
485 .exit = __ip_vs_ftp_exit, 485 .exit = __ip_vs_ftp_exit,
486}; 486};
487 487
488int __init ip_vs_ftp_init(void) 488static int __init ip_vs_ftp_init(void)
489{ 489{
490 int rv; 490 int rv;
491 491
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 9b0de9a0e08e..df646ccf08a7 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -342,7 +342,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
342 /* 342 /*
343 * Allocate the ip_vs_lblc_table for this service 343 * Allocate the ip_vs_lblc_table for this service
344 */ 344 */
345 tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC); 345 tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
346 if (tbl == NULL) 346 if (tbl == NULL)
347 return -ENOMEM; 347 return -ENOMEM;
348 348
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 9dcd39a48897..570e31ea427a 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -511,7 +511,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
511 /* 511 /*
512 * Allocate the ip_vs_lblcr_table for this service 512 * Allocate the ip_vs_lblcr_table for this service
513 */ 513 */
514 tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC); 514 tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
515 if (tbl == NULL) 515 if (tbl == NULL)
516 return -ENOMEM; 516 return -ENOMEM;
517 517
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index fdc82ad9cc0e..50d82186da87 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -68,7 +68,7 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
68 struct netns_ipvs *ipvs = net_ipvs(net); 68 struct netns_ipvs *ipvs = net_ipvs(net);
69 unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); 69 unsigned int hash = IP_VS_PROTO_HASH(pp->protocol);
70 struct ip_vs_proto_data *pd = 70 struct ip_vs_proto_data *pd =
71 kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC); 71 kzalloc(sizeof(struct ip_vs_proto_data), GFP_KERNEL);
72 72
73 if (!pd) 73 if (!pd)
74 return -ENOMEM; 74 return -ENOMEM;
@@ -156,7 +156,7 @@ EXPORT_SYMBOL(ip_vs_proto_get);
156/* 156/*
157 * get ip_vs_protocol object data by netns and proto 157 * get ip_vs_protocol object data by netns and proto
158 */ 158 */
159struct ip_vs_proto_data * 159static struct ip_vs_proto_data *
160__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) 160__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
161{ 161{
162 struct ip_vs_proto_data *pd; 162 struct ip_vs_proto_data *pd;
@@ -199,7 +199,7 @@ void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags)
199int * 199int *
200ip_vs_create_timeout_table(int *table, int size) 200ip_vs_create_timeout_table(int *table, int size)
201{ 201{
202 return kmemdup(table, size, GFP_ATOMIC); 202 return kmemdup(table, size, GFP_KERNEL);
203} 203}
204 204
205 205
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 91e97ee049be..05126521743e 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -162,7 +162,7 @@ static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
162 162
163 /* allocate the SH table for this service */ 163 /* allocate the SH table for this service */
164 tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE, 164 tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,
165 GFP_ATOMIC); 165 GFP_KERNEL);
166 if (tbl == NULL) 166 if (tbl == NULL)
167 return -ENOMEM; 167 return -ENOMEM;
168 168
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index bf5e538af67b..effa10c9e4e3 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -196,6 +196,7 @@ struct ip_vs_sync_thread_data {
196 struct net *net; 196 struct net *net;
197 struct socket *sock; 197 struct socket *sock;
198 char *buf; 198 char *buf;
199 int id;
199}; 200};
200 201
201/* Version 0 definition of packet sizes */ 202/* Version 0 definition of packet sizes */
@@ -271,13 +272,6 @@ struct ip_vs_sync_buff {
271 unsigned char *end; 272 unsigned char *end;
272}; 273};
273 274
274/* multicast addr */
275static struct sockaddr_in mcast_addr = {
276 .sin_family = AF_INET,
277 .sin_port = cpu_to_be16(IP_VS_SYNC_PORT),
278 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
279};
280
281/* 275/*
282 * Copy of struct ip_vs_seq 276 * Copy of struct ip_vs_seq
283 * From unaligned network order to aligned host order 277 * From unaligned network order to aligned host order
@@ -300,18 +294,22 @@ static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
300 put_unaligned_be32(ho->previous_delta, &no->previous_delta); 294 put_unaligned_be32(ho->previous_delta, &no->previous_delta);
301} 295}
302 296
303static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs) 297static inline struct ip_vs_sync_buff *
298sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
304{ 299{
305 struct ip_vs_sync_buff *sb; 300 struct ip_vs_sync_buff *sb;
306 301
307 spin_lock_bh(&ipvs->sync_lock); 302 spin_lock_bh(&ipvs->sync_lock);
308 if (list_empty(&ipvs->sync_queue)) { 303 if (list_empty(&ms->sync_queue)) {
309 sb = NULL; 304 sb = NULL;
305 __set_current_state(TASK_INTERRUPTIBLE);
310 } else { 306 } else {
311 sb = list_entry(ipvs->sync_queue.next, 307 sb = list_entry(ms->sync_queue.next, struct ip_vs_sync_buff,
312 struct ip_vs_sync_buff,
313 list); 308 list);
314 list_del(&sb->list); 309 list_del(&sb->list);
310 ms->sync_queue_len--;
311 if (!ms->sync_queue_len)
312 ms->sync_queue_delay = 0;
315 } 313 }
316 spin_unlock_bh(&ipvs->sync_lock); 314 spin_unlock_bh(&ipvs->sync_lock);
317 315
@@ -334,7 +332,7 @@ ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
334 kfree(sb); 332 kfree(sb);
335 return NULL; 333 return NULL;
336 } 334 }
337 sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */ 335 sb->mesg->reserved = 0; /* old nr_conns i.e. must be zero now */
338 sb->mesg->version = SYNC_PROTO_VER; 336 sb->mesg->version = SYNC_PROTO_VER;
339 sb->mesg->syncid = ipvs->master_syncid; 337 sb->mesg->syncid = ipvs->master_syncid;
340 sb->mesg->size = sizeof(struct ip_vs_sync_mesg); 338 sb->mesg->size = sizeof(struct ip_vs_sync_mesg);
@@ -353,14 +351,22 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
353 kfree(sb); 351 kfree(sb);
354} 352}
355 353
356static inline void sb_queue_tail(struct netns_ipvs *ipvs) 354static inline void sb_queue_tail(struct netns_ipvs *ipvs,
355 struct ipvs_master_sync_state *ms)
357{ 356{
358 struct ip_vs_sync_buff *sb = ipvs->sync_buff; 357 struct ip_vs_sync_buff *sb = ms->sync_buff;
359 358
360 spin_lock(&ipvs->sync_lock); 359 spin_lock(&ipvs->sync_lock);
361 if (ipvs->sync_state & IP_VS_STATE_MASTER) 360 if (ipvs->sync_state & IP_VS_STATE_MASTER &&
362 list_add_tail(&sb->list, &ipvs->sync_queue); 361 ms->sync_queue_len < sysctl_sync_qlen_max(ipvs)) {
363 else 362 if (!ms->sync_queue_len)
363 schedule_delayed_work(&ms->master_wakeup_work,
364 max(IPVS_SYNC_SEND_DELAY, 1));
365 ms->sync_queue_len++;
366 list_add_tail(&sb->list, &ms->sync_queue);
367 if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE)
368 wake_up_process(ms->master_thread);
369 } else
364 ip_vs_sync_buff_release(sb); 370 ip_vs_sync_buff_release(sb);
365 spin_unlock(&ipvs->sync_lock); 371 spin_unlock(&ipvs->sync_lock);
366} 372}
@@ -370,49 +376,26 @@ static inline void sb_queue_tail(struct netns_ipvs *ipvs)
370 * than the specified time or the specified time is zero. 376 * than the specified time or the specified time is zero.
371 */ 377 */
372static inline struct ip_vs_sync_buff * 378static inline struct ip_vs_sync_buff *
373get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time) 379get_curr_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms,
380 unsigned long time)
374{ 381{
375 struct ip_vs_sync_buff *sb; 382 struct ip_vs_sync_buff *sb;
376 383
377 spin_lock_bh(&ipvs->sync_buff_lock); 384 spin_lock_bh(&ipvs->sync_buff_lock);
378 if (ipvs->sync_buff && 385 sb = ms->sync_buff;
379 time_after_eq(jiffies - ipvs->sync_buff->firstuse, time)) { 386 if (sb && time_after_eq(jiffies - sb->firstuse, time)) {
380 sb = ipvs->sync_buff; 387 ms->sync_buff = NULL;
381 ipvs->sync_buff = NULL; 388 __set_current_state(TASK_RUNNING);
382 } else 389 } else
383 sb = NULL; 390 sb = NULL;
384 spin_unlock_bh(&ipvs->sync_buff_lock); 391 spin_unlock_bh(&ipvs->sync_buff_lock);
385 return sb; 392 return sb;
386} 393}
387 394
388/* 395static inline int
389 * Switch mode from sending version 0 or 1 396select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp)
390 * - must handle sync_buf
391 */
392void ip_vs_sync_switch_mode(struct net *net, int mode)
393{ 397{
394 struct netns_ipvs *ipvs = net_ipvs(net); 398 return ((long) cp >> (1 + ilog2(sizeof(*cp)))) & ipvs->threads_mask;
395
396 if (!(ipvs->sync_state & IP_VS_STATE_MASTER))
397 return;
398 if (mode == sysctl_sync_ver(ipvs) || !ipvs->sync_buff)
399 return;
400
401 spin_lock_bh(&ipvs->sync_buff_lock);
402 /* Buffer empty ? then let buf_create do the job */
403 if (ipvs->sync_buff->mesg->size <= sizeof(struct ip_vs_sync_mesg)) {
404 kfree(ipvs->sync_buff);
405 ipvs->sync_buff = NULL;
406 } else {
407 spin_lock_bh(&ipvs->sync_lock);
408 if (ipvs->sync_state & IP_VS_STATE_MASTER)
409 list_add_tail(&ipvs->sync_buff->list,
410 &ipvs->sync_queue);
411 else
412 ip_vs_sync_buff_release(ipvs->sync_buff);
413 spin_unlock_bh(&ipvs->sync_lock);
414 }
415 spin_unlock_bh(&ipvs->sync_buff_lock);
416} 399}
417 400
418/* 401/*
@@ -442,15 +425,101 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
442 return sb; 425 return sb;
443} 426}
444 427
428/* Check if conn should be synced.
429 * pkts: conn packets, use sysctl_sync_threshold to avoid packet check
430 * - (1) sync_refresh_period: reduce sync rate. Additionally, retry
431 * sync_retries times with period of sync_refresh_period/8
432 * - (2) if both sync_refresh_period and sync_period are 0 send sync only
433 * for state changes or only once when pkts matches sync_threshold
434 * - (3) templates: rate can be reduced only with sync_refresh_period or
435 * with (2)
436 */
437static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
438 struct ip_vs_conn *cp, int pkts)
439{
440 unsigned long orig = ACCESS_ONCE(cp->sync_endtime);
441 unsigned long now = jiffies;
442 unsigned long n = (now + cp->timeout) & ~3UL;
443 unsigned int sync_refresh_period;
444 int sync_period;
445 int force;
446
447 /* Check if we sync in current state */
448 if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE))
449 force = 0;
450 else if (likely(cp->protocol == IPPROTO_TCP)) {
451 if (!((1 << cp->state) &
452 ((1 << IP_VS_TCP_S_ESTABLISHED) |
453 (1 << IP_VS_TCP_S_FIN_WAIT) |
454 (1 << IP_VS_TCP_S_CLOSE) |
455 (1 << IP_VS_TCP_S_CLOSE_WAIT) |
456 (1 << IP_VS_TCP_S_TIME_WAIT))))
457 return 0;
458 force = cp->state != cp->old_state;
459 if (force && cp->state != IP_VS_TCP_S_ESTABLISHED)
460 goto set;
461 } else if (unlikely(cp->protocol == IPPROTO_SCTP)) {
462 if (!((1 << cp->state) &
463 ((1 << IP_VS_SCTP_S_ESTABLISHED) |
464 (1 << IP_VS_SCTP_S_CLOSED) |
465 (1 << IP_VS_SCTP_S_SHUT_ACK_CLI) |
466 (1 << IP_VS_SCTP_S_SHUT_ACK_SER))))
467 return 0;
468 force = cp->state != cp->old_state;
469 if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED)
470 goto set;
471 } else {
472 /* UDP or another protocol with single state */
473 force = 0;
474 }
475
476 sync_refresh_period = sysctl_sync_refresh_period(ipvs);
477 if (sync_refresh_period > 0) {
478 long diff = n - orig;
479 long min_diff = max(cp->timeout >> 1, 10UL * HZ);
480
481 /* Avoid sync if difference is below sync_refresh_period
482 * and below the half timeout.
483 */
484 if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) {
485 int retries = orig & 3;
486
487 if (retries >= sysctl_sync_retries(ipvs))
488 return 0;
489 if (time_before(now, orig - cp->timeout +
490 (sync_refresh_period >> 3)))
491 return 0;
492 n |= retries + 1;
493 }
494 }
495 sync_period = sysctl_sync_period(ipvs);
496 if (sync_period > 0) {
497 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) &&
498 pkts % sync_period != sysctl_sync_threshold(ipvs))
499 return 0;
500 } else if (sync_refresh_period <= 0 &&
501 pkts != sysctl_sync_threshold(ipvs))
502 return 0;
503
504set:
505 cp->old_state = cp->state;
506 n = cmpxchg(&cp->sync_endtime, orig, n);
507 return n == orig || force;
508}
509
445/* 510/*
446 * Version 0 , could be switched in by sys_ctl. 511 * Version 0 , could be switched in by sys_ctl.
447 * Add an ip_vs_conn information into the current sync_buff. 512 * Add an ip_vs_conn information into the current sync_buff.
448 */ 513 */
449void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp) 514static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
515 int pkts)
450{ 516{
451 struct netns_ipvs *ipvs = net_ipvs(net); 517 struct netns_ipvs *ipvs = net_ipvs(net);
452 struct ip_vs_sync_mesg_v0 *m; 518 struct ip_vs_sync_mesg_v0 *m;
453 struct ip_vs_sync_conn_v0 *s; 519 struct ip_vs_sync_conn_v0 *s;
520 struct ip_vs_sync_buff *buff;
521 struct ipvs_master_sync_state *ms;
522 int id;
454 int len; 523 int len;
455 524
456 if (unlikely(cp->af != AF_INET)) 525 if (unlikely(cp->af != AF_INET))
@@ -459,21 +528,41 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
459 if (cp->flags & IP_VS_CONN_F_ONE_PACKET) 528 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
460 return; 529 return;
461 530
531 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
532 return;
533
462 spin_lock(&ipvs->sync_buff_lock); 534 spin_lock(&ipvs->sync_buff_lock);
463 if (!ipvs->sync_buff) { 535 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
464 ipvs->sync_buff = 536 spin_unlock(&ipvs->sync_buff_lock);
465 ip_vs_sync_buff_create_v0(ipvs); 537 return;
466 if (!ipvs->sync_buff) { 538 }
539
540 id = select_master_thread_id(ipvs, cp);
541 ms = &ipvs->ms[id];
542 buff = ms->sync_buff;
543 if (buff) {
544 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
545 /* Send buffer if it is for v1 */
546 if (!m->nr_conns) {
547 sb_queue_tail(ipvs, ms);
548 ms->sync_buff = NULL;
549 buff = NULL;
550 }
551 }
552 if (!buff) {
553 buff = ip_vs_sync_buff_create_v0(ipvs);
554 if (!buff) {
467 spin_unlock(&ipvs->sync_buff_lock); 555 spin_unlock(&ipvs->sync_buff_lock);
468 pr_err("ip_vs_sync_buff_create failed.\n"); 556 pr_err("ip_vs_sync_buff_create failed.\n");
469 return; 557 return;
470 } 558 }
559 ms->sync_buff = buff;
471 } 560 }
472 561
473 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : 562 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
474 SIMPLE_CONN_SIZE; 563 SIMPLE_CONN_SIZE;
475 m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg; 564 m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
476 s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head; 565 s = (struct ip_vs_sync_conn_v0 *) buff->head;
477 566
478 /* copy members */ 567 /* copy members */
479 s->reserved = 0; 568 s->reserved = 0;
@@ -494,18 +583,24 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
494 583
495 m->nr_conns++; 584 m->nr_conns++;
496 m->size += len; 585 m->size += len;
497 ipvs->sync_buff->head += len; 586 buff->head += len;
498 587
499 /* check if there is a space for next one */ 588 /* check if there is a space for next one */
500 if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) { 589 if (buff->head + FULL_CONN_SIZE > buff->end) {
501 sb_queue_tail(ipvs); 590 sb_queue_tail(ipvs, ms);
502 ipvs->sync_buff = NULL; 591 ms->sync_buff = NULL;
503 } 592 }
504 spin_unlock(&ipvs->sync_buff_lock); 593 spin_unlock(&ipvs->sync_buff_lock);
505 594
506 /* synchronize its controller if it has */ 595 /* synchronize its controller if it has */
507 if (cp->control) 596 cp = cp->control;
508 ip_vs_sync_conn(net, cp->control); 597 if (cp) {
598 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
599 pkts = atomic_add_return(1, &cp->in_pkts);
600 else
601 pkts = sysctl_sync_threshold(ipvs);
602 ip_vs_sync_conn(net, cp->control, pkts);
603 }
509} 604}
510 605
511/* 606/*
@@ -513,23 +608,29 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
513 * Called by ip_vs_in. 608 * Called by ip_vs_in.
514 * Sending Version 1 messages 609 * Sending Version 1 messages
515 */ 610 */
516void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp) 611void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts)
517{ 612{
518 struct netns_ipvs *ipvs = net_ipvs(net); 613 struct netns_ipvs *ipvs = net_ipvs(net);
519 struct ip_vs_sync_mesg *m; 614 struct ip_vs_sync_mesg *m;
520 union ip_vs_sync_conn *s; 615 union ip_vs_sync_conn *s;
616 struct ip_vs_sync_buff *buff;
617 struct ipvs_master_sync_state *ms;
618 int id;
521 __u8 *p; 619 __u8 *p;
522 unsigned int len, pe_name_len, pad; 620 unsigned int len, pe_name_len, pad;
523 621
524 /* Handle old version of the protocol */ 622 /* Handle old version of the protocol */
525 if (sysctl_sync_ver(ipvs) == 0) { 623 if (sysctl_sync_ver(ipvs) == 0) {
526 ip_vs_sync_conn_v0(net, cp); 624 ip_vs_sync_conn_v0(net, cp, pkts);
527 return; 625 return;
528 } 626 }
529 /* Do not sync ONE PACKET */ 627 /* Do not sync ONE PACKET */
530 if (cp->flags & IP_VS_CONN_F_ONE_PACKET) 628 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
531 goto control; 629 goto control;
532sloop: 630sloop:
631 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
632 goto control;
633
533 /* Sanity checks */ 634 /* Sanity checks */
534 pe_name_len = 0; 635 pe_name_len = 0;
535 if (cp->pe_data_len) { 636 if (cp->pe_data_len) {
@@ -541,6 +642,13 @@ sloop:
541 } 642 }
542 643
543 spin_lock(&ipvs->sync_buff_lock); 644 spin_lock(&ipvs->sync_buff_lock);
645 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
646 spin_unlock(&ipvs->sync_buff_lock);
647 return;
648 }
649
650 id = select_master_thread_id(ipvs, cp);
651 ms = &ipvs->ms[id];
544 652
545#ifdef CONFIG_IP_VS_IPV6 653#ifdef CONFIG_IP_VS_IPV6
546 if (cp->af == AF_INET6) 654 if (cp->af == AF_INET6)
@@ -559,27 +667,32 @@ sloop:
559 667
560 /* check if there is a space for this one */ 668 /* check if there is a space for this one */
561 pad = 0; 669 pad = 0;
562 if (ipvs->sync_buff) { 670 buff = ms->sync_buff;
563 pad = (4 - (size_t)ipvs->sync_buff->head) & 3; 671 if (buff) {
564 if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) { 672 m = buff->mesg;
565 sb_queue_tail(ipvs); 673 pad = (4 - (size_t) buff->head) & 3;
566 ipvs->sync_buff = NULL; 674 /* Send buffer if it is for v0 */
675 if (buff->head + len + pad > buff->end || m->reserved) {
676 sb_queue_tail(ipvs, ms);
677 ms->sync_buff = NULL;
678 buff = NULL;
567 pad = 0; 679 pad = 0;
568 } 680 }
569 } 681 }
570 682
571 if (!ipvs->sync_buff) { 683 if (!buff) {
572 ipvs->sync_buff = ip_vs_sync_buff_create(ipvs); 684 buff = ip_vs_sync_buff_create(ipvs);
573 if (!ipvs->sync_buff) { 685 if (!buff) {
574 spin_unlock(&ipvs->sync_buff_lock); 686 spin_unlock(&ipvs->sync_buff_lock);
575 pr_err("ip_vs_sync_buff_create failed.\n"); 687 pr_err("ip_vs_sync_buff_create failed.\n");
576 return; 688 return;
577 } 689 }
690 ms->sync_buff = buff;
691 m = buff->mesg;
578 } 692 }
579 693
580 m = ipvs->sync_buff->mesg; 694 p = buff->head;
581 p = ipvs->sync_buff->head; 695 buff->head += pad + len;
582 ipvs->sync_buff->head += pad + len;
583 m->size += pad + len; 696 m->size += pad + len;
584 /* Add ev. padding from prev. sync_conn */ 697 /* Add ev. padding from prev. sync_conn */
585 while (pad--) 698 while (pad--)
@@ -644,16 +757,10 @@ control:
644 cp = cp->control; 757 cp = cp->control;
645 if (!cp) 758 if (!cp)
646 return; 759 return;
647 /* 760 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
648 * Reduce sync rate for templates 761 pkts = atomic_add_return(1, &cp->in_pkts);
649 * i.e only increment in_pkts for Templates. 762 else
650 */ 763 pkts = sysctl_sync_threshold(ipvs);
651 if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
652 int pkts = atomic_add_return(1, &cp->in_pkts);
653
654 if (pkts % sysctl_sync_period(ipvs) != 1)
655 return;
656 }
657 goto sloop; 764 goto sloop;
658} 765}
659 766
@@ -731,9 +838,32 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
731 else 838 else
732 cp = ip_vs_ct_in_get(param); 839 cp = ip_vs_ct_in_get(param);
733 840
734 if (cp && param->pe_data) /* Free pe_data */ 841 if (cp) {
842 /* Free pe_data */
735 kfree(param->pe_data); 843 kfree(param->pe_data);
736 if (!cp) { 844
845 dest = cp->dest;
846 spin_lock(&cp->lock);
847 if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
848 !(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
849 if (flags & IP_VS_CONN_F_INACTIVE) {
850 atomic_dec(&dest->activeconns);
851 atomic_inc(&dest->inactconns);
852 } else {
853 atomic_inc(&dest->activeconns);
854 atomic_dec(&dest->inactconns);
855 }
856 }
857 flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
858 flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
859 cp->flags = flags;
860 spin_unlock(&cp->lock);
861 if (!dest) {
862 dest = ip_vs_try_bind_dest(cp);
863 if (dest)
864 atomic_dec(&dest->refcnt);
865 }
866 } else {
737 /* 867 /*
738 * Find the appropriate destination for the connection. 868 * Find the appropriate destination for the connection.
739 * If it is not found the connection will remain unbound 869 * If it is not found the connection will remain unbound
@@ -742,18 +872,6 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
742 dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr, 872 dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
743 param->vport, protocol, fwmark, flags); 873 param->vport, protocol, fwmark, flags);
744 874
745 /* Set the approprite ativity flag */
746 if (protocol == IPPROTO_TCP) {
747 if (state != IP_VS_TCP_S_ESTABLISHED)
748 flags |= IP_VS_CONN_F_INACTIVE;
749 else
750 flags &= ~IP_VS_CONN_F_INACTIVE;
751 } else if (protocol == IPPROTO_SCTP) {
752 if (state != IP_VS_SCTP_S_ESTABLISHED)
753 flags |= IP_VS_CONN_F_INACTIVE;
754 else
755 flags &= ~IP_VS_CONN_F_INACTIVE;
756 }
757 cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); 875 cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
758 if (dest) 876 if (dest)
759 atomic_dec(&dest->refcnt); 877 atomic_dec(&dest->refcnt);
@@ -763,34 +881,6 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
763 IP_VS_DBG(2, "BACKUP, add new conn. failed\n"); 881 IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
764 return; 882 return;
765 } 883 }
766 } else if (!cp->dest) {
767 dest = ip_vs_try_bind_dest(cp);
768 if (dest)
769 atomic_dec(&dest->refcnt);
770 } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
771 (cp->state != state)) {
772 /* update active/inactive flag for the connection */
773 dest = cp->dest;
774 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
775 (state != IP_VS_TCP_S_ESTABLISHED)) {
776 atomic_dec(&dest->activeconns);
777 atomic_inc(&dest->inactconns);
778 cp->flags |= IP_VS_CONN_F_INACTIVE;
779 } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
780 (state == IP_VS_TCP_S_ESTABLISHED)) {
781 atomic_inc(&dest->activeconns);
782 atomic_dec(&dest->inactconns);
783 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
784 }
785 } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
786 (cp->state != state)) {
787 dest = cp->dest;
788 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
789 (state != IP_VS_SCTP_S_ESTABLISHED)) {
790 atomic_dec(&dest->activeconns);
791 atomic_inc(&dest->inactconns);
792 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
793 }
794 } 884 }
795 885
796 if (opt) 886 if (opt)
@@ -1149,6 +1239,28 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
1149 1239
1150 1240
1151/* 1241/*
1242 * Setup sndbuf (mode=1) or rcvbuf (mode=0)
1243 */
1244static void set_sock_size(struct sock *sk, int mode, int val)
1245{
1246 /* setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val)); */
1247 /* setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)); */
1248 lock_sock(sk);
1249 if (mode) {
1250 val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
1251 sysctl_wmem_max);
1252 sk->sk_sndbuf = val * 2;
1253 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
1254 } else {
1255 val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
1256 sysctl_rmem_max);
1257 sk->sk_rcvbuf = val * 2;
1258 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
1259 }
1260 release_sock(sk);
1261}
1262
1263/*
1152 * Setup loopback of outgoing multicasts on a sending socket 1264 * Setup loopback of outgoing multicasts on a sending socket
1153 */ 1265 */
1154static void set_mcast_loop(struct sock *sk, u_char loop) 1266static void set_mcast_loop(struct sock *sk, u_char loop)
@@ -1298,9 +1410,15 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
1298/* 1410/*
1299 * Set up sending multicast socket over UDP 1411 * Set up sending multicast socket over UDP
1300 */ 1412 */
1301static struct socket *make_send_sock(struct net *net) 1413static struct socket *make_send_sock(struct net *net, int id)
1302{ 1414{
1303 struct netns_ipvs *ipvs = net_ipvs(net); 1415 struct netns_ipvs *ipvs = net_ipvs(net);
1416 /* multicast addr */
1417 struct sockaddr_in mcast_addr = {
1418 .sin_family = AF_INET,
1419 .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
1420 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
1421 };
1304 struct socket *sock; 1422 struct socket *sock;
1305 int result; 1423 int result;
1306 1424
@@ -1324,6 +1442,9 @@ static struct socket *make_send_sock(struct net *net)
1324 1442
1325 set_mcast_loop(sock->sk, 0); 1443 set_mcast_loop(sock->sk, 0);
1326 set_mcast_ttl(sock->sk, 1); 1444 set_mcast_ttl(sock->sk, 1);
1445 result = sysctl_sync_sock_size(ipvs);
1446 if (result > 0)
1447 set_sock_size(sock->sk, 1, result);
1327 1448
1328 result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn); 1449 result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
1329 if (result < 0) { 1450 if (result < 0) {
@@ -1349,9 +1470,15 @@ error:
1349/* 1470/*
1350 * Set up receiving multicast socket over UDP 1471 * Set up receiving multicast socket over UDP
1351 */ 1472 */
1352static struct socket *make_receive_sock(struct net *net) 1473static struct socket *make_receive_sock(struct net *net, int id)
1353{ 1474{
1354 struct netns_ipvs *ipvs = net_ipvs(net); 1475 struct netns_ipvs *ipvs = net_ipvs(net);
1476 /* multicast addr */
1477 struct sockaddr_in mcast_addr = {
1478 .sin_family = AF_INET,
1479 .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),
1480 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
1481 };
1355 struct socket *sock; 1482 struct socket *sock;
1356 int result; 1483 int result;
1357 1484
@@ -1369,6 +1496,9 @@ static struct socket *make_receive_sock(struct net *net)
1369 sk_change_net(sock->sk, net); 1496 sk_change_net(sock->sk, net);
1370 /* it is equivalent to the REUSEADDR option in user-space */ 1497 /* it is equivalent to the REUSEADDR option in user-space */
1371 sock->sk->sk_reuse = SK_CAN_REUSE; 1498 sock->sk->sk_reuse = SK_CAN_REUSE;
1499 result = sysctl_sync_sock_size(ipvs);
1500 if (result > 0)
1501 set_sock_size(sock->sk, 0, result);
1372 1502
1373 result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr, 1503 result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
1374 sizeof(struct sockaddr)); 1504 sizeof(struct sockaddr));
@@ -1411,18 +1541,22 @@ ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
1411 return len; 1541 return len;
1412} 1542}
1413 1543
1414static void 1544static int
1415ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg) 1545ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
1416{ 1546{
1417 int msize; 1547 int msize;
1548 int ret;
1418 1549
1419 msize = msg->size; 1550 msize = msg->size;
1420 1551
1421 /* Put size in network byte order */ 1552 /* Put size in network byte order */
1422 msg->size = htons(msg->size); 1553 msg->size = htons(msg->size);
1423 1554
1424 if (ip_vs_send_async(sock, (char *)msg, msize) != msize) 1555 ret = ip_vs_send_async(sock, (char *)msg, msize);
1425 pr_err("ip_vs_send_async error\n"); 1556 if (ret >= 0 || ret == -EAGAIN)
1557 return ret;
1558 pr_err("ip_vs_send_async error %d\n", ret);
1559 return 0;
1426} 1560}
1427 1561
1428static int 1562static int
@@ -1438,48 +1572,90 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
1438 iov.iov_base = buffer; 1572 iov.iov_base = buffer;
1439 iov.iov_len = (size_t)buflen; 1573 iov.iov_len = (size_t)buflen;
1440 1574
1441 len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, 0); 1575 len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, MSG_DONTWAIT);
1442 1576
1443 if (len < 0) 1577 if (len < 0)
1444 return -1; 1578 return len;
1445 1579
1446 LeaveFunction(7); 1580 LeaveFunction(7);
1447 return len; 1581 return len;
1448} 1582}
1449 1583
1584/* Wakeup the master thread for sending */
1585static void master_wakeup_work_handler(struct work_struct *work)
1586{
1587 struct ipvs_master_sync_state *ms =
1588 container_of(work, struct ipvs_master_sync_state,
1589 master_wakeup_work.work);
1590 struct netns_ipvs *ipvs = ms->ipvs;
1591
1592 spin_lock_bh(&ipvs->sync_lock);
1593 if (ms->sync_queue_len &&
1594 ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) {
1595 ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE;
1596 wake_up_process(ms->master_thread);
1597 }
1598 spin_unlock_bh(&ipvs->sync_lock);
1599}
1600
1601/* Get next buffer to send */
1602static inline struct ip_vs_sync_buff *
1603next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
1604{
1605 struct ip_vs_sync_buff *sb;
1606
1607 sb = sb_dequeue(ipvs, ms);
1608 if (sb)
1609 return sb;
1610 /* Do not delay entries in buffer for more than 2 seconds */
1611 return get_curr_sync_buff(ipvs, ms, IPVS_SYNC_FLUSH_TIME);
1612}
1450 1613
1451static int sync_thread_master(void *data) 1614static int sync_thread_master(void *data)
1452{ 1615{
1453 struct ip_vs_sync_thread_data *tinfo = data; 1616 struct ip_vs_sync_thread_data *tinfo = data;
1454 struct netns_ipvs *ipvs = net_ipvs(tinfo->net); 1617 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
1618 struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id];
1619 struct sock *sk = tinfo->sock->sk;
1455 struct ip_vs_sync_buff *sb; 1620 struct ip_vs_sync_buff *sb;
1456 1621
1457 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, " 1622 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
1458 "syncid = %d\n", 1623 "syncid = %d, id = %d\n",
1459 ipvs->master_mcast_ifn, ipvs->master_syncid); 1624 ipvs->master_mcast_ifn, ipvs->master_syncid, tinfo->id);
1460 1625
1461 while (!kthread_should_stop()) { 1626 for (;;) {
1462 while ((sb = sb_dequeue(ipvs))) { 1627 sb = next_sync_buff(ipvs, ms);
1463 ip_vs_send_sync_msg(tinfo->sock, sb->mesg); 1628 if (unlikely(kthread_should_stop()))
1464 ip_vs_sync_buff_release(sb); 1629 break;
1630 if (!sb) {
1631 schedule_timeout(IPVS_SYNC_CHECK_PERIOD);
1632 continue;
1465 } 1633 }
1466 1634 while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) {
1467 /* check if entries stay in ipvs->sync_buff for 2 seconds */ 1635 int ret = 0;
1468 sb = get_curr_sync_buff(ipvs, 2 * HZ); 1636
1469 if (sb) { 1637 __wait_event_interruptible(*sk_sleep(sk),
1470 ip_vs_send_sync_msg(tinfo->sock, sb->mesg); 1638 sock_writeable(sk) ||
1471 ip_vs_sync_buff_release(sb); 1639 kthread_should_stop(),
1640 ret);
1641 if (unlikely(kthread_should_stop()))
1642 goto done;
1472 } 1643 }
1473 1644 ip_vs_sync_buff_release(sb);
1474 schedule_timeout_interruptible(HZ);
1475 } 1645 }
1476 1646
1647done:
1648 __set_current_state(TASK_RUNNING);
1649 if (sb)
1650 ip_vs_sync_buff_release(sb);
1651
1477 /* clean up the sync_buff queue */ 1652 /* clean up the sync_buff queue */
1478 while ((sb = sb_dequeue(ipvs))) 1653 while ((sb = sb_dequeue(ipvs, ms)))
1479 ip_vs_sync_buff_release(sb); 1654 ip_vs_sync_buff_release(sb);
1655 __set_current_state(TASK_RUNNING);
1480 1656
1481 /* clean up the current sync_buff */ 1657 /* clean up the current sync_buff */
1482 sb = get_curr_sync_buff(ipvs, 0); 1658 sb = get_curr_sync_buff(ipvs, ms, 0);
1483 if (sb) 1659 if (sb)
1484 ip_vs_sync_buff_release(sb); 1660 ip_vs_sync_buff_release(sb);
1485 1661
@@ -1498,8 +1674,8 @@ static int sync_thread_backup(void *data)
1498 int len; 1674 int len;
1499 1675
1500 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " 1676 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
1501 "syncid = %d\n", 1677 "syncid = %d, id = %d\n",
1502 ipvs->backup_mcast_ifn, ipvs->backup_syncid); 1678 ipvs->backup_mcast_ifn, ipvs->backup_syncid, tinfo->id);
1503 1679
1504 while (!kthread_should_stop()) { 1680 while (!kthread_should_stop()) {
1505 wait_event_interruptible(*sk_sleep(tinfo->sock->sk), 1681 wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
@@ -1511,7 +1687,8 @@ static int sync_thread_backup(void *data)
1511 len = ip_vs_receive(tinfo->sock, tinfo->buf, 1687 len = ip_vs_receive(tinfo->sock, tinfo->buf,
1512 ipvs->recv_mesg_maxlen); 1688 ipvs->recv_mesg_maxlen);
1513 if (len <= 0) { 1689 if (len <= 0) {
1514 pr_err("receiving message error\n"); 1690 if (len != -EAGAIN)
1691 pr_err("receiving message error\n");
1515 break; 1692 break;
1516 } 1693 }
1517 1694
@@ -1535,86 +1712,140 @@ static int sync_thread_backup(void *data)
1535int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) 1712int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
1536{ 1713{
1537 struct ip_vs_sync_thread_data *tinfo; 1714 struct ip_vs_sync_thread_data *tinfo;
1538 struct task_struct **realtask, *task; 1715 struct task_struct **array = NULL, *task;
1539 struct socket *sock; 1716 struct socket *sock;
1540 struct netns_ipvs *ipvs = net_ipvs(net); 1717 struct netns_ipvs *ipvs = net_ipvs(net);
1541 char *name, *buf = NULL; 1718 char *name;
1542 int (*threadfn)(void *data); 1719 int (*threadfn)(void *data);
1720 int id, count;
1543 int result = -ENOMEM; 1721 int result = -ENOMEM;
1544 1722
1545 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); 1723 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1546 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", 1724 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
1547 sizeof(struct ip_vs_sync_conn_v0)); 1725 sizeof(struct ip_vs_sync_conn_v0));
1548 1726
1727 if (!ipvs->sync_state) {
1728 count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX);
1729 ipvs->threads_mask = count - 1;
1730 } else
1731 count = ipvs->threads_mask + 1;
1549 1732
1550 if (state == IP_VS_STATE_MASTER) { 1733 if (state == IP_VS_STATE_MASTER) {
1551 if (ipvs->master_thread) 1734 if (ipvs->ms)
1552 return -EEXIST; 1735 return -EEXIST;
1553 1736
1554 strlcpy(ipvs->master_mcast_ifn, mcast_ifn, 1737 strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
1555 sizeof(ipvs->master_mcast_ifn)); 1738 sizeof(ipvs->master_mcast_ifn));
1556 ipvs->master_syncid = syncid; 1739 ipvs->master_syncid = syncid;
1557 realtask = &ipvs->master_thread; 1740 name = "ipvs-m:%d:%d";
1558 name = "ipvs_master:%d";
1559 threadfn = sync_thread_master; 1741 threadfn = sync_thread_master;
1560 sock = make_send_sock(net);
1561 } else if (state == IP_VS_STATE_BACKUP) { 1742 } else if (state == IP_VS_STATE_BACKUP) {
1562 if (ipvs->backup_thread) 1743 if (ipvs->backup_threads)
1563 return -EEXIST; 1744 return -EEXIST;
1564 1745
1565 strlcpy(ipvs->backup_mcast_ifn, mcast_ifn, 1746 strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
1566 sizeof(ipvs->backup_mcast_ifn)); 1747 sizeof(ipvs->backup_mcast_ifn));
1567 ipvs->backup_syncid = syncid; 1748 ipvs->backup_syncid = syncid;
1568 realtask = &ipvs->backup_thread; 1749 name = "ipvs-b:%d:%d";
1569 name = "ipvs_backup:%d";
1570 threadfn = sync_thread_backup; 1750 threadfn = sync_thread_backup;
1571 sock = make_receive_sock(net);
1572 } else { 1751 } else {
1573 return -EINVAL; 1752 return -EINVAL;
1574 } 1753 }
1575 1754
1576 if (IS_ERR(sock)) { 1755 if (state == IP_VS_STATE_MASTER) {
1577 result = PTR_ERR(sock); 1756 struct ipvs_master_sync_state *ms;
1578 goto out;
1579 }
1580 1757
1581 set_sync_mesg_maxlen(net, state); 1758 ipvs->ms = kzalloc(count * sizeof(ipvs->ms[0]), GFP_KERNEL);
1582 if (state == IP_VS_STATE_BACKUP) { 1759 if (!ipvs->ms)
1583 buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL); 1760 goto out;
1584 if (!buf) 1761 ms = ipvs->ms;
1585 goto outsocket; 1762 for (id = 0; id < count; id++, ms++) {
1763 INIT_LIST_HEAD(&ms->sync_queue);
1764 ms->sync_queue_len = 0;
1765 ms->sync_queue_delay = 0;
1766 INIT_DELAYED_WORK(&ms->master_wakeup_work,
1767 master_wakeup_work_handler);
1768 ms->ipvs = ipvs;
1769 }
1770 } else {
1771 array = kzalloc(count * sizeof(struct task_struct *),
1772 GFP_KERNEL);
1773 if (!array)
1774 goto out;
1586 } 1775 }
1776 set_sync_mesg_maxlen(net, state);
1587 1777
1588 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); 1778 tinfo = NULL;
1589 if (!tinfo) 1779 for (id = 0; id < count; id++) {
1590 goto outbuf; 1780 if (state == IP_VS_STATE_MASTER)
1591 1781 sock = make_send_sock(net, id);
1592 tinfo->net = net; 1782 else
1593 tinfo->sock = sock; 1783 sock = make_receive_sock(net, id);
1594 tinfo->buf = buf; 1784 if (IS_ERR(sock)) {
1785 result = PTR_ERR(sock);
1786 goto outtinfo;
1787 }
1788 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
1789 if (!tinfo)
1790 goto outsocket;
1791 tinfo->net = net;
1792 tinfo->sock = sock;
1793 if (state == IP_VS_STATE_BACKUP) {
1794 tinfo->buf = kmalloc(ipvs->recv_mesg_maxlen,
1795 GFP_KERNEL);
1796 if (!tinfo->buf)
1797 goto outtinfo;
1798 }
1799 tinfo->id = id;
1595 1800
1596 task = kthread_run(threadfn, tinfo, name, ipvs->gen); 1801 task = kthread_run(threadfn, tinfo, name, ipvs->gen, id);
1597 if (IS_ERR(task)) { 1802 if (IS_ERR(task)) {
1598 result = PTR_ERR(task); 1803 result = PTR_ERR(task);
1599 goto outtinfo; 1804 goto outtinfo;
1805 }
1806 tinfo = NULL;
1807 if (state == IP_VS_STATE_MASTER)
1808 ipvs->ms[id].master_thread = task;
1809 else
1810 array[id] = task;
1600 } 1811 }
1601 1812
1602 /* mark as active */ 1813 /* mark as active */
1603 *realtask = task; 1814
1815 if (state == IP_VS_STATE_BACKUP)
1816 ipvs->backup_threads = array;
1817 spin_lock_bh(&ipvs->sync_buff_lock);
1604 ipvs->sync_state |= state; 1818 ipvs->sync_state |= state;
1819 spin_unlock_bh(&ipvs->sync_buff_lock);
1605 1820
1606 /* increase the module use count */ 1821 /* increase the module use count */
1607 ip_vs_use_count_inc(); 1822 ip_vs_use_count_inc();
1608 1823
1609 return 0; 1824 return 0;
1610 1825
1611outtinfo:
1612 kfree(tinfo);
1613outbuf:
1614 kfree(buf);
1615outsocket: 1826outsocket:
1616 sk_release_kernel(sock->sk); 1827 sk_release_kernel(sock->sk);
1828
1829outtinfo:
1830 if (tinfo) {
1831 sk_release_kernel(tinfo->sock->sk);
1832 kfree(tinfo->buf);
1833 kfree(tinfo);
1834 }
1835 count = id;
1836 while (count-- > 0) {
1837 if (state == IP_VS_STATE_MASTER)
1838 kthread_stop(ipvs->ms[count].master_thread);
1839 else
1840 kthread_stop(array[count]);
1841 }
1842 kfree(array);
1843
1617out: 1844out:
1845 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
1846 kfree(ipvs->ms);
1847 ipvs->ms = NULL;
1848 }
1618 return result; 1849 return result;
1619} 1850}
1620 1851
@@ -1622,38 +1853,60 @@ out:
1622int stop_sync_thread(struct net *net, int state) 1853int stop_sync_thread(struct net *net, int state)
1623{ 1854{
1624 struct netns_ipvs *ipvs = net_ipvs(net); 1855 struct netns_ipvs *ipvs = net_ipvs(net);
1856 struct task_struct **array;
1857 int id;
1625 int retc = -EINVAL; 1858 int retc = -EINVAL;
1626 1859
1627 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); 1860 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1628 1861
1629 if (state == IP_VS_STATE_MASTER) { 1862 if (state == IP_VS_STATE_MASTER) {
1630 if (!ipvs->master_thread) 1863 if (!ipvs->ms)
1631 return -ESRCH; 1864 return -ESRCH;
1632 1865
1633 pr_info("stopping master sync thread %d ...\n",
1634 task_pid_nr(ipvs->master_thread));
1635
1636 /* 1866 /*
1637 * The lock synchronizes with sb_queue_tail(), so that we don't 1867 * The lock synchronizes with sb_queue_tail(), so that we don't
1638 * add sync buffers to the queue, when we are already in 1868 * add sync buffers to the queue, when we are already in
1639 * progress of stopping the master sync daemon. 1869 * progress of stopping the master sync daemon.
1640 */ 1870 */
1641 1871
1642 spin_lock_bh(&ipvs->sync_lock); 1872 spin_lock_bh(&ipvs->sync_buff_lock);
1873 spin_lock(&ipvs->sync_lock);
1643 ipvs->sync_state &= ~IP_VS_STATE_MASTER; 1874 ipvs->sync_state &= ~IP_VS_STATE_MASTER;
1644 spin_unlock_bh(&ipvs->sync_lock); 1875 spin_unlock(&ipvs->sync_lock);
1645 retc = kthread_stop(ipvs->master_thread); 1876 spin_unlock_bh(&ipvs->sync_buff_lock);
1646 ipvs->master_thread = NULL; 1877
1878 retc = 0;
1879 for (id = ipvs->threads_mask; id >= 0; id--) {
1880 struct ipvs_master_sync_state *ms = &ipvs->ms[id];
1881 int ret;
1882
1883 pr_info("stopping master sync thread %d ...\n",
1884 task_pid_nr(ms->master_thread));
1885 cancel_delayed_work_sync(&ms->master_wakeup_work);
1886 ret = kthread_stop(ms->master_thread);
1887 if (retc >= 0)
1888 retc = ret;
1889 }
1890 kfree(ipvs->ms);
1891 ipvs->ms = NULL;
1647 } else if (state == IP_VS_STATE_BACKUP) { 1892 } else if (state == IP_VS_STATE_BACKUP) {
1648 if (!ipvs->backup_thread) 1893 if (!ipvs->backup_threads)
1649 return -ESRCH; 1894 return -ESRCH;
1650 1895
1651 pr_info("stopping backup sync thread %d ...\n",
1652 task_pid_nr(ipvs->backup_thread));
1653
1654 ipvs->sync_state &= ~IP_VS_STATE_BACKUP; 1896 ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
1655 retc = kthread_stop(ipvs->backup_thread); 1897 array = ipvs->backup_threads;
1656 ipvs->backup_thread = NULL; 1898 retc = 0;
1899 for (id = ipvs->threads_mask; id >= 0; id--) {
1900 int ret;
1901
1902 pr_info("stopping backup sync thread %d ...\n",
1903 task_pid_nr(array[id]));
1904 ret = kthread_stop(array[id]);
1905 if (retc >= 0)
1906 retc = ret;
1907 }
1908 kfree(array);
1909 ipvs->backup_threads = NULL;
1657 } 1910 }
1658 1911
1659 /* decrease the module use count */ 1912 /* decrease the module use count */
@@ -1670,13 +1923,8 @@ int __net_init ip_vs_sync_net_init(struct net *net)
1670 struct netns_ipvs *ipvs = net_ipvs(net); 1923 struct netns_ipvs *ipvs = net_ipvs(net);
1671 1924
1672 __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key); 1925 __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key);
1673 INIT_LIST_HEAD(&ipvs->sync_queue);
1674 spin_lock_init(&ipvs->sync_lock); 1926 spin_lock_init(&ipvs->sync_lock);
1675 spin_lock_init(&ipvs->sync_buff_lock); 1927 spin_lock_init(&ipvs->sync_buff_lock);
1676
1677 ipvs->sync_mcast_addr.sin_family = AF_INET;
1678 ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT);
1679 ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP);
1680 return 0; 1928 return 0;
1681} 1929}
1682 1930
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index fd0d4e09876a..231be7dd547a 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -84,7 +84,7 @@ static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
84 /* 84 /*
85 * Allocate the mark variable for WRR scheduling 85 * Allocate the mark variable for WRR scheduling
86 */ 86 */
87 mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_ATOMIC); 87 mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_KERNEL);
88 if (mark == NULL) 88 if (mark == NULL)
89 return -ENOMEM; 89 return -ENOMEM;
90 90
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index cf0747c5741f..32c59093146e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1336,7 +1336,6 @@ static void nf_conntrack_cleanup_init_net(void)
1336 while (untrack_refs() > 0) 1336 while (untrack_refs() > 0)
1337 schedule(); 1337 schedule();
1338 1338
1339 nf_conntrack_helper_fini();
1340 nf_conntrack_proto_fini(); 1339 nf_conntrack_proto_fini();
1341#ifdef CONFIG_NF_CONNTRACK_ZONES 1340#ifdef CONFIG_NF_CONNTRACK_ZONES
1342 nf_ct_extend_unregister(&nf_ct_zone_extend); 1341 nf_ct_extend_unregister(&nf_ct_zone_extend);
@@ -1354,6 +1353,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
1354 } 1353 }
1355 1354
1356 nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); 1355 nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
1356 nf_conntrack_helper_fini(net);
1357 nf_conntrack_timeout_fini(net); 1357 nf_conntrack_timeout_fini(net);
1358 nf_conntrack_ecache_fini(net); 1358 nf_conntrack_ecache_fini(net);
1359 nf_conntrack_tstamp_fini(net); 1359 nf_conntrack_tstamp_fini(net);
@@ -1504,10 +1504,6 @@ static int nf_conntrack_init_init_net(void)
1504 if (ret < 0) 1504 if (ret < 0)
1505 goto err_proto; 1505 goto err_proto;
1506 1506
1507 ret = nf_conntrack_helper_init();
1508 if (ret < 0)
1509 goto err_helper;
1510
1511#ifdef CONFIG_NF_CONNTRACK_ZONES 1507#ifdef CONFIG_NF_CONNTRACK_ZONES
1512 ret = nf_ct_extend_register(&nf_ct_zone_extend); 1508 ret = nf_ct_extend_register(&nf_ct_zone_extend);
1513 if (ret < 0) 1509 if (ret < 0)
@@ -1525,10 +1521,8 @@ static int nf_conntrack_init_init_net(void)
1525 1521
1526#ifdef CONFIG_NF_CONNTRACK_ZONES 1522#ifdef CONFIG_NF_CONNTRACK_ZONES
1527err_extend: 1523err_extend:
1528 nf_conntrack_helper_fini();
1529#endif
1530err_helper:
1531 nf_conntrack_proto_fini(); 1524 nf_conntrack_proto_fini();
1525#endif
1532err_proto: 1526err_proto:
1533 return ret; 1527 return ret;
1534} 1528}
@@ -1589,9 +1583,14 @@ static int nf_conntrack_init_net(struct net *net)
1589 ret = nf_conntrack_timeout_init(net); 1583 ret = nf_conntrack_timeout_init(net);
1590 if (ret < 0) 1584 if (ret < 0)
1591 goto err_timeout; 1585 goto err_timeout;
1586 ret = nf_conntrack_helper_init(net);
1587 if (ret < 0)
1588 goto err_helper;
1592 1589
1593 return 0; 1590 return 0;
1594 1591
1592err_helper:
1593 nf_conntrack_timeout_fini(net);
1595err_timeout: 1594err_timeout:
1596 nf_conntrack_ecache_fini(net); 1595 nf_conntrack_ecache_fini(net);
1597err_ecache: 1596err_ecache:
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index b924f3a49a8e..e7be79e640de 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
84int nf_conntrack_register_notifier(struct net *net, 84int nf_conntrack_register_notifier(struct net *net,
85 struct nf_ct_event_notifier *new) 85 struct nf_ct_event_notifier *new)
86{ 86{
87 int ret = 0; 87 int ret;
88 struct nf_ct_event_notifier *notify; 88 struct nf_ct_event_notifier *notify;
89 89
90 mutex_lock(&nf_ct_ecache_mutex); 90 mutex_lock(&nf_ct_ecache_mutex);
@@ -95,8 +95,7 @@ int nf_conntrack_register_notifier(struct net *net,
95 goto out_unlock; 95 goto out_unlock;
96 } 96 }
97 rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new); 97 rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new);
98 mutex_unlock(&nf_ct_ecache_mutex); 98 ret = 0;
99 return ret;
100 99
101out_unlock: 100out_unlock:
102 mutex_unlock(&nf_ct_ecache_mutex); 101 mutex_unlock(&nf_ct_ecache_mutex);
@@ -121,7 +120,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
121int nf_ct_expect_register_notifier(struct net *net, 120int nf_ct_expect_register_notifier(struct net *net,
122 struct nf_exp_event_notifier *new) 121 struct nf_exp_event_notifier *new)
123{ 122{
124 int ret = 0; 123 int ret;
125 struct nf_exp_event_notifier *notify; 124 struct nf_exp_event_notifier *notify;
126 125
127 mutex_lock(&nf_ct_ecache_mutex); 126 mutex_lock(&nf_ct_ecache_mutex);
@@ -132,8 +131,7 @@ int nf_ct_expect_register_notifier(struct net *net,
132 goto out_unlock; 131 goto out_unlock;
133 } 132 }
134 rcu_assign_pointer(net->ct.nf_expect_event_cb, new); 133 rcu_assign_pointer(net->ct.nf_expect_event_cb, new);
135 mutex_unlock(&nf_ct_ecache_mutex); 134 ret = 0;
136 return ret;
137 135
138out_unlock: 136out_unlock:
139 mutex_unlock(&nf_ct_ecache_mutex); 137 mutex_unlock(&nf_ct_ecache_mutex);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 436b7cb79ba4..4fa2ff961f5a 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -34,6 +34,67 @@ static struct hlist_head *nf_ct_helper_hash __read_mostly;
34static unsigned int nf_ct_helper_hsize __read_mostly; 34static unsigned int nf_ct_helper_hsize __read_mostly;
35static unsigned int nf_ct_helper_count __read_mostly; 35static unsigned int nf_ct_helper_count __read_mostly;
36 36
37static bool nf_ct_auto_assign_helper __read_mostly = true;
38module_param_named(nf_conntrack_helper, nf_ct_auto_assign_helper, bool, 0644);
39MODULE_PARM_DESC(nf_conntrack_helper,
40 "Enable automatic conntrack helper assignment (default 1)");
41
42#ifdef CONFIG_SYSCTL
43static struct ctl_table helper_sysctl_table[] = {
44 {
45 .procname = "nf_conntrack_helper",
46 .data = &init_net.ct.sysctl_auto_assign_helper,
47 .maxlen = sizeof(unsigned int),
48 .mode = 0644,
49 .proc_handler = proc_dointvec,
50 },
51 {}
52};
53
54static int nf_conntrack_helper_init_sysctl(struct net *net)
55{
56 struct ctl_table *table;
57
58 table = kmemdup(helper_sysctl_table, sizeof(helper_sysctl_table),
59 GFP_KERNEL);
60 if (!table)
61 goto out;
62
63 table[0].data = &net->ct.sysctl_auto_assign_helper;
64
65 net->ct.helper_sysctl_header =
66 register_net_sysctl(net, "net/netfilter", table);
67
68 if (!net->ct.helper_sysctl_header) {
69 pr_err("nf_conntrack_helper: can't register to sysctl.\n");
70 goto out_register;
71 }
72 return 0;
73
74out_register:
75 kfree(table);
76out:
77 return -ENOMEM;
78}
79
80static void nf_conntrack_helper_fini_sysctl(struct net *net)
81{
82 struct ctl_table *table;
83
84 table = net->ct.helper_sysctl_header->ctl_table_arg;
85 unregister_net_sysctl_table(net->ct.helper_sysctl_header);
86 kfree(table);
87}
88#else
89static int nf_conntrack_helper_init_sysctl(struct net *net)
90{
91 return 0;
92}
93
94static void nf_conntrack_helper_fini_sysctl(struct net *net)
95{
96}
97#endif /* CONFIG_SYSCTL */
37 98
38/* Stupid hash, but collision free for the default registrations of the 99/* Stupid hash, but collision free for the default registrations of the
39 * helpers currently in the kernel. */ 100 * helpers currently in the kernel. */
@@ -118,17 +179,38 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
118{ 179{
119 struct nf_conntrack_helper *helper = NULL; 180 struct nf_conntrack_helper *helper = NULL;
120 struct nf_conn_help *help; 181 struct nf_conn_help *help;
182 struct net *net = nf_ct_net(ct);
121 int ret = 0; 183 int ret = 0;
122 184
185 /* We already got a helper explicitly attached. The function
186 * nf_conntrack_alter_reply - in case NAT is in use - asks for looking
187 * the helper up again. Since now the user is in full control of
188 * making consistent helper configurations, skip this automatic
189 * re-lookup, otherwise we'll lose the helper.
190 */
191 if (test_bit(IPS_HELPER_BIT, &ct->status))
192 return 0;
193
123 if (tmpl != NULL) { 194 if (tmpl != NULL) {
124 help = nfct_help(tmpl); 195 help = nfct_help(tmpl);
125 if (help != NULL) 196 if (help != NULL) {
126 helper = help->helper; 197 helper = help->helper;
198 set_bit(IPS_HELPER_BIT, &ct->status);
199 }
127 } 200 }
128 201
129 help = nfct_help(ct); 202 help = nfct_help(ct);
130 if (helper == NULL) 203 if (net->ct.sysctl_auto_assign_helper && helper == NULL) {
131 helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); 204 helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
205 if (unlikely(!net->ct.auto_assign_helper_warned && helper)) {
206 pr_info("nf_conntrack: automatic helper "
207 "assignment is deprecated and it will "
208 "be removed soon. Use the iptables CT target "
209 "to attach helpers instead.\n");
210 net->ct.auto_assign_helper_warned = true;
211 }
212 }
213
132 if (helper == NULL) { 214 if (helper == NULL) {
133 if (help) 215 if (help)
134 RCU_INIT_POINTER(help->helper, NULL); 216 RCU_INIT_POINTER(help->helper, NULL);
@@ -315,28 +397,44 @@ static struct nf_ct_ext_type helper_extend __read_mostly = {
315 .id = NF_CT_EXT_HELPER, 397 .id = NF_CT_EXT_HELPER,
316}; 398};
317 399
318int nf_conntrack_helper_init(void) 400int nf_conntrack_helper_init(struct net *net)
319{ 401{
320 int err; 402 int err;
321 403
322 nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ 404 net->ct.auto_assign_helper_warned = false;
323 nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0); 405 net->ct.sysctl_auto_assign_helper = nf_ct_auto_assign_helper;
324 if (!nf_ct_helper_hash) 406
325 return -ENOMEM; 407 if (net_eq(net, &init_net)) {
408 nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
409 nf_ct_helper_hash =
410 nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
411 if (!nf_ct_helper_hash)
412 return -ENOMEM;
326 413
327 err = nf_ct_extend_register(&helper_extend); 414 err = nf_ct_extend_register(&helper_extend);
415 if (err < 0)
416 goto err1;
417 }
418
419 err = nf_conntrack_helper_init_sysctl(net);
328 if (err < 0) 420 if (err < 0)
329 goto err1; 421 goto out_sysctl;
330 422
331 return 0; 423 return 0;
332 424
425out_sysctl:
426 if (net_eq(net, &init_net))
427 nf_ct_extend_unregister(&helper_extend);
333err1: 428err1:
334 nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); 429 nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
335 return err; 430 return err;
336} 431}
337 432
338void nf_conntrack_helper_fini(void) 433void nf_conntrack_helper_fini(struct net *net)
339{ 434{
340 nf_ct_extend_unregister(&helper_extend); 435 nf_conntrack_helper_fini_sysctl(net);
341 nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); 436 if (net_eq(net, &init_net)) {
437 nf_ct_extend_unregister(&helper_extend);
438 nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
439 }
342} 440}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 462ec2dbe561..6f4b00a8fc73 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2080,7 +2080,15 @@ static int
2080ctnetlink_change_expect(struct nf_conntrack_expect *x, 2080ctnetlink_change_expect(struct nf_conntrack_expect *x,
2081 const struct nlattr * const cda[]) 2081 const struct nlattr * const cda[])
2082{ 2082{
2083 return -EOPNOTSUPP; 2083 if (cda[CTA_EXPECT_TIMEOUT]) {
2084 if (!del_timer(&x->timeout))
2085 return -ETIME;
2086
2087 x->timeout.expires = jiffies +
2088 ntohl(nla_get_be32(cda[CTA_EXPECT_TIMEOUT])) * HZ;
2089 add_timer(&x->timeout);
2090 }
2091 return 0;
2084} 2092}
2085 2093
2086static const struct nla_policy exp_nat_nla_policy[CTA_EXPECT_NAT_MAX+1] = { 2094static const struct nla_policy exp_nat_nla_policy[CTA_EXPECT_NAT_MAX+1] = {
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index 0920ea3bf599..d309e7f472d8 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -14,7 +14,6 @@
14#include <linux/netlink.h> 14#include <linux/netlink.h>
15#include <linux/rtnetlink.h> 15#include <linux/rtnetlink.h>
16#include <linux/if.h> 16#include <linux/if.h>
17#include <linux/netfilter_ipv4/ip_queue.h>
18#include <linux/inet_diag.h> 17#include <linux/inet_diag.h>
19#include <linux/xfrm.h> 18#include <linux/xfrm.h>
20#include <linux/audit.h> 19#include <linux/audit.h>
@@ -70,12 +69,6 @@ static struct nlmsg_perm nlmsg_route_perms[] =
70 { RTM_SETDCB, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, 69 { RTM_SETDCB, NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
71}; 70};
72 71
73static struct nlmsg_perm nlmsg_firewall_perms[] =
74{
75 { IPQM_MODE, NETLINK_FIREWALL_SOCKET__NLMSG_WRITE },
76 { IPQM_VERDICT, NETLINK_FIREWALL_SOCKET__NLMSG_WRITE },
77};
78
79static struct nlmsg_perm nlmsg_tcpdiag_perms[] = 72static struct nlmsg_perm nlmsg_tcpdiag_perms[] =
80{ 73{
81 { TCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, 74 { TCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
@@ -145,12 +138,6 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm)
145 sizeof(nlmsg_route_perms)); 138 sizeof(nlmsg_route_perms));
146 break; 139 break;
147 140
148 case SECCLASS_NETLINK_FIREWALL_SOCKET:
149 case SECCLASS_NETLINK_IP6FW_SOCKET:
150 err = nlmsg_perm(nlmsg_type, perm, nlmsg_firewall_perms,
151 sizeof(nlmsg_firewall_perms));
152 break;
153
154 case SECCLASS_NETLINK_TCPDIAG_SOCKET: 141 case SECCLASS_NETLINK_TCPDIAG_SOCKET:
155 err = nlmsg_perm(nlmsg_type, perm, nlmsg_tcpdiag_perms, 142 err = nlmsg_perm(nlmsg_type, perm, nlmsg_tcpdiag_perms,
156 sizeof(nlmsg_tcpdiag_perms)); 143 sizeof(nlmsg_tcpdiag_perms));