diff options
author | David S. Miller <davem@davemloft.net> | 2012-05-08 14:40:21 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-05-08 14:40:21 -0400 |
commit | 9bb862beb6e5839e92f709d33fda07678f062f20 (patch) | |
tree | a2c396712c5a2cda380034173fd07a67bfa0489f | |
parent | b44907e64cc1987153f6577306108379be1523b7 (diff) | |
parent | d16cf20e2f2f13411eece7f7fb72c17d141c4a84 (diff) |
Merge branch 'master' of git://1984.lsi.us.es/net-next
34 files changed, 856 insertions, 1708 deletions
diff --git a/Documentation/ABI/removed/ip_queue b/Documentation/ABI/removed/ip_queue new file mode 100644 index 000000000000..3243613bc2d2 --- /dev/null +++ b/Documentation/ABI/removed/ip_queue | |||
@@ -0,0 +1,9 @@ | |||
1 | What: ip_queue | ||
2 | Date: finally removed in kernel v3.5.0 | ||
3 | Contact: Pablo Neira Ayuso <pablo@netfilter.org> | ||
4 | Description: | ||
5 | ip_queue has been replaced by nfnetlink_queue which provides | ||
6 | more advanced queueing mechanism to user-space. The ip_queue | ||
7 | module was already announced to become obsolete years ago. | ||
8 | |||
9 | Users: | ||
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 90b0c4fd275b..6f896b94abdc 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt | |||
@@ -1301,13 +1301,22 @@ bridge-nf-call-ip6tables - BOOLEAN | |||
1301 | bridge-nf-filter-vlan-tagged - BOOLEAN | 1301 | bridge-nf-filter-vlan-tagged - BOOLEAN |
1302 | 1 : pass bridged vlan-tagged ARP/IP/IPv6 traffic to {arp,ip,ip6}tables. | 1302 | 1 : pass bridged vlan-tagged ARP/IP/IPv6 traffic to {arp,ip,ip6}tables. |
1303 | 0 : disable this. | 1303 | 0 : disable this. |
1304 | Default: 1 | 1304 | Default: 0 |
1305 | 1305 | ||
1306 | bridge-nf-filter-pppoe-tagged - BOOLEAN | 1306 | bridge-nf-filter-pppoe-tagged - BOOLEAN |
1307 | 1 : pass bridged pppoe-tagged IP/IPv6 traffic to {ip,ip6}tables. | 1307 | 1 : pass bridged pppoe-tagged IP/IPv6 traffic to {ip,ip6}tables. |
1308 | 0 : disable this. | 1308 | 0 : disable this. |
1309 | Default: 1 | 1309 | Default: 0 |
1310 | 1310 | ||
1311 | bridge-nf-pass-vlan-input-dev - BOOLEAN | ||
1312 | 1: if bridge-nf-filter-vlan-tagged is enabled, try to find a vlan | ||
1313 | interface on the bridge and set the netfilter input device to the vlan. | ||
1314 | This allows use of e.g. "iptables -i br0.1" and makes the REDIRECT | ||
1315 | target work with vlan-on-top-of-bridge interfaces. When no matching | ||
1316 | vlan interface is found, or this switch is off, the input device is | ||
1317 | set to the bridge interface. | ||
1318 | 0: disable bridge netfilter vlan interface lookup. | ||
1319 | Default: 0 | ||
1311 | 1320 | ||
1312 | proc/sys/net/sctp/* Variables: | 1321 | proc/sys/net/sctp/* Variables: |
1313 | 1322 | ||
diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h index be0ef3df4acb..8a2d438dc499 100644 --- a/include/linux/ip_vs.h +++ b/include/linux/ip_vs.h | |||
@@ -89,6 +89,7 @@ | |||
89 | #define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ | 89 | #define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ |
90 | #define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */ | 90 | #define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */ |
91 | 91 | ||
92 | /* Initial bits allowed in backup server */ | ||
92 | #define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \ | 93 | #define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \ |
93 | IP_VS_CONN_F_NOOUTPUT | \ | 94 | IP_VS_CONN_F_NOOUTPUT | \ |
94 | IP_VS_CONN_F_INACTIVE | \ | 95 | IP_VS_CONN_F_INACTIVE | \ |
@@ -97,6 +98,10 @@ | |||
97 | IP_VS_CONN_F_TEMPLATE \ | 98 | IP_VS_CONN_F_TEMPLATE \ |
98 | ) | 99 | ) |
99 | 100 | ||
101 | /* Bits allowed to update in backup server */ | ||
102 | #define IP_VS_CONN_F_BACKUP_UPD_MASK (IP_VS_CONN_F_INACTIVE | \ | ||
103 | IP_VS_CONN_F_SEQ_MASK) | ||
104 | |||
100 | /* Flags that are not sent to backup server start from bit 16 */ | 105 | /* Flags that are not sent to backup server start from bit 16 */ |
101 | #define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */ | 106 | #define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */ |
102 | 107 | ||
diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 0d3dd66322ec..d146872a0b91 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h | |||
@@ -83,6 +83,10 @@ enum ip_conntrack_status { | |||
83 | /* Conntrack is a fake untracked entry */ | 83 | /* Conntrack is a fake untracked entry */ |
84 | IPS_UNTRACKED_BIT = 12, | 84 | IPS_UNTRACKED_BIT = 12, |
85 | IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT), | 85 | IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT), |
86 | |||
87 | /* Conntrack got a helper explicitly attached via CT target. */ | ||
88 | IPS_HELPER_BIT = 13, | ||
89 | IPS_HELPER = (1 << IPS_HELPER_BIT), | ||
86 | }; | 90 | }; |
87 | 91 | ||
88 | /* Connection tracking event types */ | 92 | /* Connection tracking event types */ |
diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild index 31f8bec95650..c61b8fb1a9ef 100644 --- a/include/linux/netfilter_ipv4/Kbuild +++ b/include/linux/netfilter_ipv4/Kbuild | |||
@@ -1,4 +1,3 @@ | |||
1 | header-y += ip_queue.h | ||
2 | header-y += ip_tables.h | 1 | header-y += ip_tables.h |
3 | header-y += ipt_CLUSTERIP.h | 2 | header-y += ipt_CLUSTERIP.h |
4 | header-y += ipt_ECN.h | 3 | header-y += ipt_ECN.h |
diff --git a/include/linux/netfilter_ipv4/ip_queue.h b/include/linux/netfilter_ipv4/ip_queue.h deleted file mode 100644 index a03507f465f8..000000000000 --- a/include/linux/netfilter_ipv4/ip_queue.h +++ /dev/null | |||
@@ -1,72 +0,0 @@ | |||
1 | /* | ||
2 | * This is a module which is used for queueing IPv4 packets and | ||
3 | * communicating with userspace via netlink. | ||
4 | * | ||
5 | * (C) 2000 James Morris, this code is GPL. | ||
6 | */ | ||
7 | #ifndef _IP_QUEUE_H | ||
8 | #define _IP_QUEUE_H | ||
9 | |||
10 | #ifdef __KERNEL__ | ||
11 | #ifdef DEBUG_IPQ | ||
12 | #define QDEBUG(x...) printk(KERN_DEBUG ## x) | ||
13 | #else | ||
14 | #define QDEBUG(x...) | ||
15 | #endif /* DEBUG_IPQ */ | ||
16 | #else | ||
17 | #include <net/if.h> | ||
18 | #endif /* ! __KERNEL__ */ | ||
19 | |||
20 | /* Messages sent from kernel */ | ||
21 | typedef struct ipq_packet_msg { | ||
22 | unsigned long packet_id; /* ID of queued packet */ | ||
23 | unsigned long mark; /* Netfilter mark value */ | ||
24 | long timestamp_sec; /* Packet arrival time (seconds) */ | ||
25 | long timestamp_usec; /* Packet arrvial time (+useconds) */ | ||
26 | unsigned int hook; /* Netfilter hook we rode in on */ | ||
27 | char indev_name[IFNAMSIZ]; /* Name of incoming interface */ | ||
28 | char outdev_name[IFNAMSIZ]; /* Name of outgoing interface */ | ||
29 | __be16 hw_protocol; /* Hardware protocol (network order) */ | ||
30 | unsigned short hw_type; /* Hardware type */ | ||
31 | unsigned char hw_addrlen; /* Hardware address length */ | ||
32 | unsigned char hw_addr[8]; /* Hardware address */ | ||
33 | size_t data_len; /* Length of packet data */ | ||
34 | unsigned char payload[0]; /* Optional packet data */ | ||
35 | } ipq_packet_msg_t; | ||
36 | |||
37 | /* Messages sent from userspace */ | ||
38 | typedef struct ipq_mode_msg { | ||
39 | unsigned char value; /* Requested mode */ | ||
40 | size_t range; /* Optional range of packet requested */ | ||
41 | } ipq_mode_msg_t; | ||
42 | |||
43 | typedef struct ipq_verdict_msg { | ||
44 | unsigned int value; /* Verdict to hand to netfilter */ | ||
45 | unsigned long id; /* Packet ID for this verdict */ | ||
46 | size_t data_len; /* Length of replacement data */ | ||
47 | unsigned char payload[0]; /* Optional replacement packet */ | ||
48 | } ipq_verdict_msg_t; | ||
49 | |||
50 | typedef struct ipq_peer_msg { | ||
51 | union { | ||
52 | ipq_verdict_msg_t verdict; | ||
53 | ipq_mode_msg_t mode; | ||
54 | } msg; | ||
55 | } ipq_peer_msg_t; | ||
56 | |||
57 | /* Packet delivery modes */ | ||
58 | enum { | ||
59 | IPQ_COPY_NONE, /* Initial mode, packets are dropped */ | ||
60 | IPQ_COPY_META, /* Copy metadata */ | ||
61 | IPQ_COPY_PACKET /* Copy metadata + packet (range) */ | ||
62 | }; | ||
63 | #define IPQ_COPY_MAX IPQ_COPY_PACKET | ||
64 | |||
65 | /* Types of messages */ | ||
66 | #define IPQM_BASE 0x10 /* standard netlink messages below this */ | ||
67 | #define IPQM_MODE (IPQM_BASE + 1) /* Mode request from peer */ | ||
68 | #define IPQM_VERDICT (IPQM_BASE + 2) /* Verdict from peer */ | ||
69 | #define IPQM_PACKET (IPQM_BASE + 3) /* Packet from kernel */ | ||
70 | #define IPQM_MAX (IPQM_BASE + 4) | ||
71 | |||
72 | #endif /*_IP_QUEUE_H*/ | ||
diff --git a/include/linux/netlink.h b/include/linux/netlink.h index a2092f582a78..0f628ffa420c 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h | |||
@@ -7,7 +7,7 @@ | |||
7 | #define NETLINK_ROUTE 0 /* Routing/device hook */ | 7 | #define NETLINK_ROUTE 0 /* Routing/device hook */ |
8 | #define NETLINK_UNUSED 1 /* Unused number */ | 8 | #define NETLINK_UNUSED 1 /* Unused number */ |
9 | #define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */ | 9 | #define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */ |
10 | #define NETLINK_FIREWALL 3 /* Firewalling hook */ | 10 | #define NETLINK_FIREWALL 3 /* Unused number, formerly ip_queue */ |
11 | #define NETLINK_SOCK_DIAG 4 /* socket monitoring */ | 11 | #define NETLINK_SOCK_DIAG 4 /* socket monitoring */ |
12 | #define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */ | 12 | #define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */ |
13 | #define NETLINK_XFRM 6 /* ipsec */ | 13 | #define NETLINK_XFRM 6 /* ipsec */ |
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 93b81aa73429..d6146b4811c2 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h | |||
@@ -504,6 +504,7 @@ struct ip_vs_conn { | |||
504 | * state transition triggerd | 504 | * state transition triggerd |
505 | * synchronization | 505 | * synchronization |
506 | */ | 506 | */ |
507 | unsigned long sync_endtime; /* jiffies + sent_retries */ | ||
507 | 508 | ||
508 | /* Control members */ | 509 | /* Control members */ |
509 | struct ip_vs_conn *control; /* Master control connection */ | 510 | struct ip_vs_conn *control; /* Master control connection */ |
@@ -783,6 +784,16 @@ struct ip_vs_app { | |||
783 | void (*timeout_change)(struct ip_vs_app *app, int flags); | 784 | void (*timeout_change)(struct ip_vs_app *app, int flags); |
784 | }; | 785 | }; |
785 | 786 | ||
787 | struct ipvs_master_sync_state { | ||
788 | struct list_head sync_queue; | ||
789 | struct ip_vs_sync_buff *sync_buff; | ||
790 | int sync_queue_len; | ||
791 | unsigned int sync_queue_delay; | ||
792 | struct task_struct *master_thread; | ||
793 | struct delayed_work master_wakeup_work; | ||
794 | struct netns_ipvs *ipvs; | ||
795 | }; | ||
796 | |||
786 | /* IPVS in network namespace */ | 797 | /* IPVS in network namespace */ |
787 | struct netns_ipvs { | 798 | struct netns_ipvs { |
788 | int gen; /* Generation */ | 799 | int gen; /* Generation */ |
@@ -869,10 +880,15 @@ struct netns_ipvs { | |||
869 | #endif | 880 | #endif |
870 | int sysctl_snat_reroute; | 881 | int sysctl_snat_reroute; |
871 | int sysctl_sync_ver; | 882 | int sysctl_sync_ver; |
883 | int sysctl_sync_ports; | ||
884 | int sysctl_sync_qlen_max; | ||
885 | int sysctl_sync_sock_size; | ||
872 | int sysctl_cache_bypass; | 886 | int sysctl_cache_bypass; |
873 | int sysctl_expire_nodest_conn; | 887 | int sysctl_expire_nodest_conn; |
874 | int sysctl_expire_quiescent_template; | 888 | int sysctl_expire_quiescent_template; |
875 | int sysctl_sync_threshold[2]; | 889 | int sysctl_sync_threshold[2]; |
890 | unsigned int sysctl_sync_refresh_period; | ||
891 | int sysctl_sync_retries; | ||
876 | int sysctl_nat_icmp_send; | 892 | int sysctl_nat_icmp_send; |
877 | 893 | ||
878 | /* ip_vs_lblc */ | 894 | /* ip_vs_lblc */ |
@@ -888,13 +904,11 @@ struct netns_ipvs { | |||
888 | spinlock_t est_lock; | 904 | spinlock_t est_lock; |
889 | struct timer_list est_timer; /* Estimation timer */ | 905 | struct timer_list est_timer; /* Estimation timer */ |
890 | /* ip_vs_sync */ | 906 | /* ip_vs_sync */ |
891 | struct list_head sync_queue; | ||
892 | spinlock_t sync_lock; | 907 | spinlock_t sync_lock; |
893 | struct ip_vs_sync_buff *sync_buff; | 908 | struct ipvs_master_sync_state *ms; |
894 | spinlock_t sync_buff_lock; | 909 | spinlock_t sync_buff_lock; |
895 | struct sockaddr_in sync_mcast_addr; | 910 | struct task_struct **backup_threads; |
896 | struct task_struct *master_thread; | 911 | int threads_mask; |
897 | struct task_struct *backup_thread; | ||
898 | int send_mesg_maxlen; | 912 | int send_mesg_maxlen; |
899 | int recv_mesg_maxlen; | 913 | int recv_mesg_maxlen; |
900 | volatile int sync_state; | 914 | volatile int sync_state; |
@@ -911,6 +925,14 @@ struct netns_ipvs { | |||
911 | #define DEFAULT_SYNC_THRESHOLD 3 | 925 | #define DEFAULT_SYNC_THRESHOLD 3 |
912 | #define DEFAULT_SYNC_PERIOD 50 | 926 | #define DEFAULT_SYNC_PERIOD 50 |
913 | #define DEFAULT_SYNC_VER 1 | 927 | #define DEFAULT_SYNC_VER 1 |
928 | #define DEFAULT_SYNC_REFRESH_PERIOD (0U * HZ) | ||
929 | #define DEFAULT_SYNC_RETRIES 0 | ||
930 | #define IPVS_SYNC_WAKEUP_RATE 8 | ||
931 | #define IPVS_SYNC_QLEN_MAX (IPVS_SYNC_WAKEUP_RATE * 4) | ||
932 | #define IPVS_SYNC_SEND_DELAY (HZ / 50) | ||
933 | #define IPVS_SYNC_CHECK_PERIOD HZ | ||
934 | #define IPVS_SYNC_FLUSH_TIME (HZ * 2) | ||
935 | #define IPVS_SYNC_PORTS_MAX (1 << 6) | ||
914 | 936 | ||
915 | #ifdef CONFIG_SYSCTL | 937 | #ifdef CONFIG_SYSCTL |
916 | 938 | ||
@@ -921,7 +943,17 @@ static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) | |||
921 | 943 | ||
922 | static inline int sysctl_sync_period(struct netns_ipvs *ipvs) | 944 | static inline int sysctl_sync_period(struct netns_ipvs *ipvs) |
923 | { | 945 | { |
924 | return ipvs->sysctl_sync_threshold[1]; | 946 | return ACCESS_ONCE(ipvs->sysctl_sync_threshold[1]); |
947 | } | ||
948 | |||
949 | static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs) | ||
950 | { | ||
951 | return ACCESS_ONCE(ipvs->sysctl_sync_refresh_period); | ||
952 | } | ||
953 | |||
954 | static inline int sysctl_sync_retries(struct netns_ipvs *ipvs) | ||
955 | { | ||
956 | return ipvs->sysctl_sync_retries; | ||
925 | } | 957 | } |
926 | 958 | ||
927 | static inline int sysctl_sync_ver(struct netns_ipvs *ipvs) | 959 | static inline int sysctl_sync_ver(struct netns_ipvs *ipvs) |
@@ -929,6 +961,21 @@ static inline int sysctl_sync_ver(struct netns_ipvs *ipvs) | |||
929 | return ipvs->sysctl_sync_ver; | 961 | return ipvs->sysctl_sync_ver; |
930 | } | 962 | } |
931 | 963 | ||
964 | static inline int sysctl_sync_ports(struct netns_ipvs *ipvs) | ||
965 | { | ||
966 | return ACCESS_ONCE(ipvs->sysctl_sync_ports); | ||
967 | } | ||
968 | |||
969 | static inline int sysctl_sync_qlen_max(struct netns_ipvs *ipvs) | ||
970 | { | ||
971 | return ipvs->sysctl_sync_qlen_max; | ||
972 | } | ||
973 | |||
974 | static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs) | ||
975 | { | ||
976 | return ipvs->sysctl_sync_sock_size; | ||
977 | } | ||
978 | |||
932 | #else | 979 | #else |
933 | 980 | ||
934 | static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) | 981 | static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) |
@@ -941,11 +988,36 @@ static inline int sysctl_sync_period(struct netns_ipvs *ipvs) | |||
941 | return DEFAULT_SYNC_PERIOD; | 988 | return DEFAULT_SYNC_PERIOD; |
942 | } | 989 | } |
943 | 990 | ||
991 | static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs) | ||
992 | { | ||
993 | return DEFAULT_SYNC_REFRESH_PERIOD; | ||
994 | } | ||
995 | |||
996 | static inline int sysctl_sync_retries(struct netns_ipvs *ipvs) | ||
997 | { | ||
998 | return DEFAULT_SYNC_RETRIES & 3; | ||
999 | } | ||
1000 | |||
944 | static inline int sysctl_sync_ver(struct netns_ipvs *ipvs) | 1001 | static inline int sysctl_sync_ver(struct netns_ipvs *ipvs) |
945 | { | 1002 | { |
946 | return DEFAULT_SYNC_VER; | 1003 | return DEFAULT_SYNC_VER; |
947 | } | 1004 | } |
948 | 1005 | ||
1006 | static inline int sysctl_sync_ports(struct netns_ipvs *ipvs) | ||
1007 | { | ||
1008 | return 1; | ||
1009 | } | ||
1010 | |||
1011 | static inline int sysctl_sync_qlen_max(struct netns_ipvs *ipvs) | ||
1012 | { | ||
1013 | return IPVS_SYNC_QLEN_MAX; | ||
1014 | } | ||
1015 | |||
1016 | static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs) | ||
1017 | { | ||
1018 | return 0; | ||
1019 | } | ||
1020 | |||
949 | #endif | 1021 | #endif |
950 | 1022 | ||
951 | /* | 1023 | /* |
@@ -1185,7 +1257,6 @@ extern void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg); | |||
1185 | extern struct ip_vs_stats ip_vs_stats; | 1257 | extern struct ip_vs_stats ip_vs_stats; |
1186 | extern int sysctl_ip_vs_sync_ver; | 1258 | extern int sysctl_ip_vs_sync_ver; |
1187 | 1259 | ||
1188 | extern void ip_vs_sync_switch_mode(struct net *net, int mode); | ||
1189 | extern struct ip_vs_service * | 1260 | extern struct ip_vs_service * |
1190 | ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, | 1261 | ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, |
1191 | const union nf_inet_addr *vaddr, __be16 vport); | 1262 | const union nf_inet_addr *vaddr, __be16 vport); |
@@ -1219,7 +1290,7 @@ extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); | |||
1219 | extern int start_sync_thread(struct net *net, int state, char *mcast_ifn, | 1290 | extern int start_sync_thread(struct net *net, int state, char *mcast_ifn, |
1220 | __u8 syncid); | 1291 | __u8 syncid); |
1221 | extern int stop_sync_thread(struct net *net, int state); | 1292 | extern int stop_sync_thread(struct net *net, int state); |
1222 | extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp); | 1293 | extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts); |
1223 | 1294 | ||
1224 | 1295 | ||
1225 | /* | 1296 | /* |
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index ab86036bbf0c..cce7f6a798bf 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h | |||
@@ -321,14 +321,8 @@ extern unsigned int nf_conntrack_max; | |||
321 | extern unsigned int nf_conntrack_hash_rnd; | 321 | extern unsigned int nf_conntrack_hash_rnd; |
322 | void init_nf_conntrack_hash_rnd(void); | 322 | void init_nf_conntrack_hash_rnd(void); |
323 | 323 | ||
324 | #define NF_CT_STAT_INC(net, count) \ | 324 | #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) |
325 | __this_cpu_inc((net)->ct.stat->count) | 325 | #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) |
326 | #define NF_CT_STAT_INC_ATOMIC(net, count) \ | ||
327 | do { \ | ||
328 | local_bh_disable(); \ | ||
329 | __this_cpu_inc((net)->ct.stat->count); \ | ||
330 | local_bh_enable(); \ | ||
331 | } while (0) | ||
332 | 326 | ||
333 | #define MODULE_ALIAS_NFCT_HELPER(helper) \ | 327 | #define MODULE_ALIAS_NFCT_HELPER(helper) \ |
334 | MODULE_ALIAS("nfct-helper-" helper) | 328 | MODULE_ALIAS("nfct-helper-" helper) |
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 5767dc242dee..1d1889409b9e 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h | |||
@@ -60,8 +60,8 @@ static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct) | |||
60 | return nf_ct_ext_find(ct, NF_CT_EXT_HELPER); | 60 | return nf_ct_ext_find(ct, NF_CT_EXT_HELPER); |
61 | } | 61 | } |
62 | 62 | ||
63 | extern int nf_conntrack_helper_init(void); | 63 | extern int nf_conntrack_helper_init(struct net *net); |
64 | extern void nf_conntrack_helper_fini(void); | 64 | extern void nf_conntrack_helper_fini(struct net *net); |
65 | 65 | ||
66 | extern int nf_conntrack_broadcast_help(struct sk_buff *skb, | 66 | extern int nf_conntrack_broadcast_help(struct sk_buff *skb, |
67 | unsigned int protoff, | 67 | unsigned int protoff, |
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 7a911eca0f18..a053a19870cf 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h | |||
@@ -26,11 +26,14 @@ struct netns_ct { | |||
26 | int sysctl_tstamp; | 26 | int sysctl_tstamp; |
27 | int sysctl_checksum; | 27 | int sysctl_checksum; |
28 | unsigned int sysctl_log_invalid; /* Log invalid packets */ | 28 | unsigned int sysctl_log_invalid; /* Log invalid packets */ |
29 | int sysctl_auto_assign_helper; | ||
30 | bool auto_assign_helper_warned; | ||
29 | #ifdef CONFIG_SYSCTL | 31 | #ifdef CONFIG_SYSCTL |
30 | struct ctl_table_header *sysctl_header; | 32 | struct ctl_table_header *sysctl_header; |
31 | struct ctl_table_header *acct_sysctl_header; | 33 | struct ctl_table_header *acct_sysctl_header; |
32 | struct ctl_table_header *tstamp_sysctl_header; | 34 | struct ctl_table_header *tstamp_sysctl_header; |
33 | struct ctl_table_header *event_sysctl_header; | 35 | struct ctl_table_header *event_sysctl_header; |
36 | struct ctl_table_header *helper_sysctl_header; | ||
34 | #endif | 37 | #endif |
35 | char *slabname; | 38 | char *slabname; |
36 | }; | 39 | }; |
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 53f083686ae4..dce55d4ee83b 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c | |||
@@ -54,12 +54,14 @@ static int brnf_call_ip6tables __read_mostly = 1; | |||
54 | static int brnf_call_arptables __read_mostly = 1; | 54 | static int brnf_call_arptables __read_mostly = 1; |
55 | static int brnf_filter_vlan_tagged __read_mostly = 0; | 55 | static int brnf_filter_vlan_tagged __read_mostly = 0; |
56 | static int brnf_filter_pppoe_tagged __read_mostly = 0; | 56 | static int brnf_filter_pppoe_tagged __read_mostly = 0; |
57 | static int brnf_pass_vlan_indev __read_mostly = 0; | ||
57 | #else | 58 | #else |
58 | #define brnf_call_iptables 1 | 59 | #define brnf_call_iptables 1 |
59 | #define brnf_call_ip6tables 1 | 60 | #define brnf_call_ip6tables 1 |
60 | #define brnf_call_arptables 1 | 61 | #define brnf_call_arptables 1 |
61 | #define brnf_filter_vlan_tagged 0 | 62 | #define brnf_filter_vlan_tagged 0 |
62 | #define brnf_filter_pppoe_tagged 0 | 63 | #define brnf_filter_pppoe_tagged 0 |
64 | #define brnf_pass_vlan_indev 0 | ||
63 | #endif | 65 | #endif |
64 | 66 | ||
65 | #define IS_IP(skb) \ | 67 | #define IS_IP(skb) \ |
@@ -503,6 +505,19 @@ bridged_dnat: | |||
503 | return 0; | 505 | return 0; |
504 | } | 506 | } |
505 | 507 | ||
508 | static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct net_device *dev) | ||
509 | { | ||
510 | struct net_device *vlan, *br; | ||
511 | |||
512 | br = bridge_parent(dev); | ||
513 | if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb)) | ||
514 | return br; | ||
515 | |||
516 | vlan = __vlan_find_dev_deep(br, vlan_tx_tag_get(skb) & VLAN_VID_MASK); | ||
517 | |||
518 | return vlan ? vlan : br; | ||
519 | } | ||
520 | |||
506 | /* Some common code for IPv4/IPv6 */ | 521 | /* Some common code for IPv4/IPv6 */ |
507 | static struct net_device *setup_pre_routing(struct sk_buff *skb) | 522 | static struct net_device *setup_pre_routing(struct sk_buff *skb) |
508 | { | 523 | { |
@@ -515,7 +530,7 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb) | |||
515 | 530 | ||
516 | nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; | 531 | nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; |
517 | nf_bridge->physindev = skb->dev; | 532 | nf_bridge->physindev = skb->dev; |
518 | skb->dev = bridge_parent(skb->dev); | 533 | skb->dev = brnf_get_logical_dev(skb, skb->dev); |
519 | if (skb->protocol == htons(ETH_P_8021Q)) | 534 | if (skb->protocol == htons(ETH_P_8021Q)) |
520 | nf_bridge->mask |= BRNF_8021Q; | 535 | nf_bridge->mask |= BRNF_8021Q; |
521 | else if (skb->protocol == htons(ETH_P_PPP_SES)) | 536 | else if (skb->protocol == htons(ETH_P_PPP_SES)) |
@@ -774,7 +789,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, | |||
774 | else | 789 | else |
775 | skb->protocol = htons(ETH_P_IPV6); | 790 | skb->protocol = htons(ETH_P_IPV6); |
776 | 791 | ||
777 | NF_HOOK(pf, NF_INET_FORWARD, skb, bridge_parent(in), parent, | 792 | NF_HOOK(pf, NF_INET_FORWARD, skb, brnf_get_logical_dev(skb, in), parent, |
778 | br_nf_forward_finish); | 793 | br_nf_forward_finish); |
779 | 794 | ||
780 | return NF_STOLEN; | 795 | return NF_STOLEN; |
@@ -1002,6 +1017,13 @@ static ctl_table brnf_table[] = { | |||
1002 | .mode = 0644, | 1017 | .mode = 0644, |
1003 | .proc_handler = brnf_sysctl_call_tables, | 1018 | .proc_handler = brnf_sysctl_call_tables, |
1004 | }, | 1019 | }, |
1020 | { | ||
1021 | .procname = "bridge-nf-pass-vlan-input-dev", | ||
1022 | .data = &brnf_pass_vlan_indev, | ||
1023 | .maxlen = sizeof(int), | ||
1024 | .mode = 0644, | ||
1025 | .proc_handler = brnf_sysctl_call_tables, | ||
1026 | }, | ||
1005 | { } | 1027 | { } |
1006 | }; | 1028 | }; |
1007 | #endif | 1029 | #endif |
diff --git a/net/core/sock.c b/net/core/sock.c index b8c818e69c23..26ed27fb2bfb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -259,7 +259,9 @@ static struct lock_class_key af_callback_keys[AF_MAX]; | |||
259 | 259 | ||
260 | /* Run time adjustable parameters. */ | 260 | /* Run time adjustable parameters. */ |
261 | __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; | 261 | __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; |
262 | EXPORT_SYMBOL(sysctl_wmem_max); | ||
262 | __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; | 263 | __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; |
264 | EXPORT_SYMBOL(sysctl_rmem_max); | ||
263 | __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; | 265 | __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; |
264 | __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; | 266 | __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; |
265 | 267 | ||
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 240b68469a7a..c20674dc9452 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile | |||
@@ -66,6 +66,3 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o | |||
66 | 66 | ||
67 | # just filtering instance of ARP tables for now | 67 | # just filtering instance of ARP tables for now |
68 | obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o | 68 | obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o |
69 | |||
70 | obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o | ||
71 | |||
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c deleted file mode 100644 index 09775a1e1348..000000000000 --- a/net/ipv4/netfilter/ip_queue.c +++ /dev/null | |||
@@ -1,639 +0,0 @@ | |||
1 | /* | ||
2 | * This is a module which is used for queueing IPv4 packets and | ||
3 | * communicating with userspace via netlink. | ||
4 | * | ||
5 | * (C) 2000-2002 James Morris <jmorris@intercode.com.au> | ||
6 | * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/skbuff.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/ip.h> | ||
16 | #include <linux/notifier.h> | ||
17 | #include <linux/netdevice.h> | ||
18 | #include <linux/netfilter.h> | ||
19 | #include <linux/netfilter_ipv4/ip_queue.h> | ||
20 | #include <linux/netfilter_ipv4/ip_tables.h> | ||
21 | #include <linux/netlink.h> | ||
22 | #include <linux/spinlock.h> | ||
23 | #include <linux/sysctl.h> | ||
24 | #include <linux/proc_fs.h> | ||
25 | #include <linux/seq_file.h> | ||
26 | #include <linux/security.h> | ||
27 | #include <linux/net.h> | ||
28 | #include <linux/mutex.h> | ||
29 | #include <linux/slab.h> | ||
30 | #include <net/net_namespace.h> | ||
31 | #include <net/sock.h> | ||
32 | #include <net/route.h> | ||
33 | #include <net/netfilter/nf_queue.h> | ||
34 | #include <net/ip.h> | ||
35 | |||
36 | #define IPQ_QMAX_DEFAULT 1024 | ||
37 | #define IPQ_PROC_FS_NAME "ip_queue" | ||
38 | #define NET_IPQ_QMAX 2088 | ||
39 | #define NET_IPQ_QMAX_NAME "ip_queue_maxlen" | ||
40 | |||
41 | typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long); | ||
42 | |||
43 | static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; | ||
44 | static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; | ||
45 | static DEFINE_SPINLOCK(queue_lock); | ||
46 | static int peer_pid __read_mostly; | ||
47 | static unsigned int copy_range __read_mostly; | ||
48 | static unsigned int queue_total; | ||
49 | static unsigned int queue_dropped = 0; | ||
50 | static unsigned int queue_user_dropped = 0; | ||
51 | static struct sock *ipqnl __read_mostly; | ||
52 | static LIST_HEAD(queue_list); | ||
53 | static DEFINE_MUTEX(ipqnl_mutex); | ||
54 | |||
55 | static inline void | ||
56 | __ipq_enqueue_entry(struct nf_queue_entry *entry) | ||
57 | { | ||
58 | list_add_tail(&entry->list, &queue_list); | ||
59 | queue_total++; | ||
60 | } | ||
61 | |||
62 | static inline int | ||
63 | __ipq_set_mode(unsigned char mode, unsigned int range) | ||
64 | { | ||
65 | int status = 0; | ||
66 | |||
67 | switch(mode) { | ||
68 | case IPQ_COPY_NONE: | ||
69 | case IPQ_COPY_META: | ||
70 | copy_mode = mode; | ||
71 | copy_range = 0; | ||
72 | break; | ||
73 | |||
74 | case IPQ_COPY_PACKET: | ||
75 | if (range > 0xFFFF) | ||
76 | range = 0xFFFF; | ||
77 | copy_range = range; | ||
78 | copy_mode = mode; | ||
79 | break; | ||
80 | |||
81 | default: | ||
82 | status = -EINVAL; | ||
83 | |||
84 | } | ||
85 | return status; | ||
86 | } | ||
87 | |||
88 | static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data); | ||
89 | |||
90 | static inline void | ||
91 | __ipq_reset(void) | ||
92 | { | ||
93 | peer_pid = 0; | ||
94 | net_disable_timestamp(); | ||
95 | __ipq_set_mode(IPQ_COPY_NONE, 0); | ||
96 | __ipq_flush(NULL, 0); | ||
97 | } | ||
98 | |||
99 | static struct nf_queue_entry * | ||
100 | ipq_find_dequeue_entry(unsigned long id) | ||
101 | { | ||
102 | struct nf_queue_entry *entry = NULL, *i; | ||
103 | |||
104 | spin_lock_bh(&queue_lock); | ||
105 | |||
106 | list_for_each_entry(i, &queue_list, list) { | ||
107 | if ((unsigned long)i == id) { | ||
108 | entry = i; | ||
109 | break; | ||
110 | } | ||
111 | } | ||
112 | |||
113 | if (entry) { | ||
114 | list_del(&entry->list); | ||
115 | queue_total--; | ||
116 | } | ||
117 | |||
118 | spin_unlock_bh(&queue_lock); | ||
119 | return entry; | ||
120 | } | ||
121 | |||
122 | static void | ||
123 | __ipq_flush(ipq_cmpfn cmpfn, unsigned long data) | ||
124 | { | ||
125 | struct nf_queue_entry *entry, *next; | ||
126 | |||
127 | list_for_each_entry_safe(entry, next, &queue_list, list) { | ||
128 | if (!cmpfn || cmpfn(entry, data)) { | ||
129 | list_del(&entry->list); | ||
130 | queue_total--; | ||
131 | nf_reinject(entry, NF_DROP); | ||
132 | } | ||
133 | } | ||
134 | } | ||
135 | |||
136 | static void | ||
137 | ipq_flush(ipq_cmpfn cmpfn, unsigned long data) | ||
138 | { | ||
139 | spin_lock_bh(&queue_lock); | ||
140 | __ipq_flush(cmpfn, data); | ||
141 | spin_unlock_bh(&queue_lock); | ||
142 | } | ||
143 | |||
144 | static struct sk_buff * | ||
145 | ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) | ||
146 | { | ||
147 | sk_buff_data_t old_tail; | ||
148 | size_t size = 0; | ||
149 | size_t data_len = 0; | ||
150 | struct sk_buff *skb; | ||
151 | struct ipq_packet_msg *pmsg; | ||
152 | struct nlmsghdr *nlh; | ||
153 | struct timeval tv; | ||
154 | |||
155 | switch (ACCESS_ONCE(copy_mode)) { | ||
156 | case IPQ_COPY_META: | ||
157 | case IPQ_COPY_NONE: | ||
158 | size = NLMSG_SPACE(sizeof(*pmsg)); | ||
159 | break; | ||
160 | |||
161 | case IPQ_COPY_PACKET: | ||
162 | if (entry->skb->ip_summed == CHECKSUM_PARTIAL && | ||
163 | (*errp = skb_checksum_help(entry->skb))) | ||
164 | return NULL; | ||
165 | |||
166 | data_len = ACCESS_ONCE(copy_range); | ||
167 | if (data_len == 0 || data_len > entry->skb->len) | ||
168 | data_len = entry->skb->len; | ||
169 | |||
170 | size = NLMSG_SPACE(sizeof(*pmsg) + data_len); | ||
171 | break; | ||
172 | |||
173 | default: | ||
174 | *errp = -EINVAL; | ||
175 | return NULL; | ||
176 | } | ||
177 | |||
178 | skb = alloc_skb(size, GFP_ATOMIC); | ||
179 | if (!skb) | ||
180 | goto nlmsg_failure; | ||
181 | |||
182 | old_tail = skb->tail; | ||
183 | nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); | ||
184 | pmsg = NLMSG_DATA(nlh); | ||
185 | memset(pmsg, 0, sizeof(*pmsg)); | ||
186 | |||
187 | pmsg->packet_id = (unsigned long )entry; | ||
188 | pmsg->data_len = data_len; | ||
189 | tv = ktime_to_timeval(entry->skb->tstamp); | ||
190 | pmsg->timestamp_sec = tv.tv_sec; | ||
191 | pmsg->timestamp_usec = tv.tv_usec; | ||
192 | pmsg->mark = entry->skb->mark; | ||
193 | pmsg->hook = entry->hook; | ||
194 | pmsg->hw_protocol = entry->skb->protocol; | ||
195 | |||
196 | if (entry->indev) | ||
197 | strcpy(pmsg->indev_name, entry->indev->name); | ||
198 | else | ||
199 | pmsg->indev_name[0] = '\0'; | ||
200 | |||
201 | if (entry->outdev) | ||
202 | strcpy(pmsg->outdev_name, entry->outdev->name); | ||
203 | else | ||
204 | pmsg->outdev_name[0] = '\0'; | ||
205 | |||
206 | if (entry->indev && entry->skb->dev && | ||
207 | entry->skb->mac_header != entry->skb->network_header) { | ||
208 | pmsg->hw_type = entry->skb->dev->type; | ||
209 | pmsg->hw_addrlen = dev_parse_header(entry->skb, | ||
210 | pmsg->hw_addr); | ||
211 | } | ||
212 | |||
213 | if (data_len) | ||
214 | if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len)) | ||
215 | BUG(); | ||
216 | |||
217 | nlh->nlmsg_len = skb->tail - old_tail; | ||
218 | return skb; | ||
219 | |||
220 | nlmsg_failure: | ||
221 | kfree_skb(skb); | ||
222 | *errp = -EINVAL; | ||
223 | printk(KERN_ERR "ip_queue: error creating packet message\n"); | ||
224 | return NULL; | ||
225 | } | ||
226 | |||
227 | static int | ||
228 | ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) | ||
229 | { | ||
230 | int status = -EINVAL; | ||
231 | struct sk_buff *nskb; | ||
232 | |||
233 | if (copy_mode == IPQ_COPY_NONE) | ||
234 | return -EAGAIN; | ||
235 | |||
236 | nskb = ipq_build_packet_message(entry, &status); | ||
237 | if (nskb == NULL) | ||
238 | return status; | ||
239 | |||
240 | spin_lock_bh(&queue_lock); | ||
241 | |||
242 | if (!peer_pid) | ||
243 | goto err_out_free_nskb; | ||
244 | |||
245 | if (queue_total >= queue_maxlen) { | ||
246 | queue_dropped++; | ||
247 | status = -ENOSPC; | ||
248 | if (net_ratelimit()) | ||
249 | printk (KERN_WARNING "ip_queue: full at %d entries, " | ||
250 | "dropping packets(s). Dropped: %d\n", queue_total, | ||
251 | queue_dropped); | ||
252 | goto err_out_free_nskb; | ||
253 | } | ||
254 | |||
255 | /* netlink_unicast will either free the nskb or attach it to a socket */ | ||
256 | status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT); | ||
257 | if (status < 0) { | ||
258 | queue_user_dropped++; | ||
259 | goto err_out_unlock; | ||
260 | } | ||
261 | |||
262 | __ipq_enqueue_entry(entry); | ||
263 | |||
264 | spin_unlock_bh(&queue_lock); | ||
265 | return status; | ||
266 | |||
267 | err_out_free_nskb: | ||
268 | kfree_skb(nskb); | ||
269 | |||
270 | err_out_unlock: | ||
271 | spin_unlock_bh(&queue_lock); | ||
272 | return status; | ||
273 | } | ||
274 | |||
275 | static int | ||
276 | ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e) | ||
277 | { | ||
278 | int diff; | ||
279 | struct iphdr *user_iph = (struct iphdr *)v->payload; | ||
280 | struct sk_buff *nskb; | ||
281 | |||
282 | if (v->data_len < sizeof(*user_iph)) | ||
283 | return 0; | ||
284 | diff = v->data_len - e->skb->len; | ||
285 | if (diff < 0) { | ||
286 | if (pskb_trim(e->skb, v->data_len)) | ||
287 | return -ENOMEM; | ||
288 | } else if (diff > 0) { | ||
289 | if (v->data_len > 0xFFFF) | ||
290 | return -EINVAL; | ||
291 | if (diff > skb_tailroom(e->skb)) { | ||
292 | nskb = skb_copy_expand(e->skb, skb_headroom(e->skb), | ||
293 | diff, GFP_ATOMIC); | ||
294 | if (!nskb) { | ||
295 | printk(KERN_WARNING "ip_queue: error " | ||
296 | "in mangle, dropping packet\n"); | ||
297 | return -ENOMEM; | ||
298 | } | ||
299 | kfree_skb(e->skb); | ||
300 | e->skb = nskb; | ||
301 | } | ||
302 | skb_put(e->skb, diff); | ||
303 | } | ||
304 | if (!skb_make_writable(e->skb, v->data_len)) | ||
305 | return -ENOMEM; | ||
306 | skb_copy_to_linear_data(e->skb, v->payload, v->data_len); | ||
307 | e->skb->ip_summed = CHECKSUM_NONE; | ||
308 | |||
309 | return 0; | ||
310 | } | ||
311 | |||
312 | static int | ||
313 | ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) | ||
314 | { | ||
315 | struct nf_queue_entry *entry; | ||
316 | |||
317 | if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN) | ||
318 | return -EINVAL; | ||
319 | |||
320 | entry = ipq_find_dequeue_entry(vmsg->id); | ||
321 | if (entry == NULL) | ||
322 | return -ENOENT; | ||
323 | else { | ||
324 | int verdict = vmsg->value; | ||
325 | |||
326 | if (vmsg->data_len && vmsg->data_len == len) | ||
327 | if (ipq_mangle_ipv4(vmsg, entry) < 0) | ||
328 | verdict = NF_DROP; | ||
329 | |||
330 | nf_reinject(entry, verdict); | ||
331 | return 0; | ||
332 | } | ||
333 | } | ||
334 | |||
335 | static int | ||
336 | ipq_set_mode(unsigned char mode, unsigned int range) | ||
337 | { | ||
338 | int status; | ||
339 | |||
340 | spin_lock_bh(&queue_lock); | ||
341 | status = __ipq_set_mode(mode, range); | ||
342 | spin_unlock_bh(&queue_lock); | ||
343 | return status; | ||
344 | } | ||
345 | |||
346 | static int | ||
347 | ipq_receive_peer(struct ipq_peer_msg *pmsg, | ||
348 | unsigned char type, unsigned int len) | ||
349 | { | ||
350 | int status = 0; | ||
351 | |||
352 | if (len < sizeof(*pmsg)) | ||
353 | return -EINVAL; | ||
354 | |||
355 | switch (type) { | ||
356 | case IPQM_MODE: | ||
357 | status = ipq_set_mode(pmsg->msg.mode.value, | ||
358 | pmsg->msg.mode.range); | ||
359 | break; | ||
360 | |||
361 | case IPQM_VERDICT: | ||
362 | status = ipq_set_verdict(&pmsg->msg.verdict, | ||
363 | len - sizeof(*pmsg)); | ||
364 | break; | ||
365 | default: | ||
366 | status = -EINVAL; | ||
367 | } | ||
368 | return status; | ||
369 | } | ||
370 | |||
371 | static int | ||
372 | dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) | ||
373 | { | ||
374 | if (entry->indev) | ||
375 | if (entry->indev->ifindex == ifindex) | ||
376 | return 1; | ||
377 | if (entry->outdev) | ||
378 | if (entry->outdev->ifindex == ifindex) | ||
379 | return 1; | ||
380 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
381 | if (entry->skb->nf_bridge) { | ||
382 | if (entry->skb->nf_bridge->physindev && | ||
383 | entry->skb->nf_bridge->physindev->ifindex == ifindex) | ||
384 | return 1; | ||
385 | if (entry->skb->nf_bridge->physoutdev && | ||
386 | entry->skb->nf_bridge->physoutdev->ifindex == ifindex) | ||
387 | return 1; | ||
388 | } | ||
389 | #endif | ||
390 | return 0; | ||
391 | } | ||
392 | |||
393 | static void | ||
394 | ipq_dev_drop(int ifindex) | ||
395 | { | ||
396 | ipq_flush(dev_cmp, ifindex); | ||
397 | } | ||
398 | |||
399 | #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) | ||
400 | |||
401 | static inline void | ||
402 | __ipq_rcv_skb(struct sk_buff *skb) | ||
403 | { | ||
404 | int status, type, pid, flags; | ||
405 | unsigned int nlmsglen, skblen; | ||
406 | struct nlmsghdr *nlh; | ||
407 | bool enable_timestamp = false; | ||
408 | |||
409 | skblen = skb->len; | ||
410 | if (skblen < sizeof(*nlh)) | ||
411 | return; | ||
412 | |||
413 | nlh = nlmsg_hdr(skb); | ||
414 | nlmsglen = nlh->nlmsg_len; | ||
415 | if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen) | ||
416 | return; | ||
417 | |||
418 | pid = nlh->nlmsg_pid; | ||
419 | flags = nlh->nlmsg_flags; | ||
420 | |||
421 | if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI) | ||
422 | RCV_SKB_FAIL(-EINVAL); | ||
423 | |||
424 | if (flags & MSG_TRUNC) | ||
425 | RCV_SKB_FAIL(-ECOMM); | ||
426 | |||
427 | type = nlh->nlmsg_type; | ||
428 | if (type < NLMSG_NOOP || type >= IPQM_MAX) | ||
429 | RCV_SKB_FAIL(-EINVAL); | ||
430 | |||
431 | if (type <= IPQM_BASE) | ||
432 | return; | ||
433 | |||
434 | if (!capable(CAP_NET_ADMIN)) | ||
435 | RCV_SKB_FAIL(-EPERM); | ||
436 | |||
437 | spin_lock_bh(&queue_lock); | ||
438 | |||
439 | if (peer_pid) { | ||
440 | if (peer_pid != pid) { | ||
441 | spin_unlock_bh(&queue_lock); | ||
442 | RCV_SKB_FAIL(-EBUSY); | ||
443 | } | ||
444 | } else { | ||
445 | enable_timestamp = true; | ||
446 | peer_pid = pid; | ||
447 | } | ||
448 | |||
449 | spin_unlock_bh(&queue_lock); | ||
450 | if (enable_timestamp) | ||
451 | net_enable_timestamp(); | ||
452 | status = ipq_receive_peer(NLMSG_DATA(nlh), type, | ||
453 | nlmsglen - NLMSG_LENGTH(0)); | ||
454 | if (status < 0) | ||
455 | RCV_SKB_FAIL(status); | ||
456 | |||
457 | if (flags & NLM_F_ACK) | ||
458 | netlink_ack(skb, nlh, 0); | ||
459 | } | ||
460 | |||
461 | static void | ||
462 | ipq_rcv_skb(struct sk_buff *skb) | ||
463 | { | ||
464 | mutex_lock(&ipqnl_mutex); | ||
465 | __ipq_rcv_skb(skb); | ||
466 | mutex_unlock(&ipqnl_mutex); | ||
467 | } | ||
468 | |||
469 | static int | ||
470 | ipq_rcv_dev_event(struct notifier_block *this, | ||
471 | unsigned long event, void *ptr) | ||
472 | { | ||
473 | struct net_device *dev = ptr; | ||
474 | |||
475 | if (!net_eq(dev_net(dev), &init_net)) | ||
476 | return NOTIFY_DONE; | ||
477 | |||
478 | /* Drop any packets associated with the downed device */ | ||
479 | if (event == NETDEV_DOWN) | ||
480 | ipq_dev_drop(dev->ifindex); | ||
481 | return NOTIFY_DONE; | ||
482 | } | ||
483 | |||
484 | static struct notifier_block ipq_dev_notifier = { | ||
485 | .notifier_call = ipq_rcv_dev_event, | ||
486 | }; | ||
487 | |||
488 | static int | ||
489 | ipq_rcv_nl_event(struct notifier_block *this, | ||
490 | unsigned long event, void *ptr) | ||
491 | { | ||
492 | struct netlink_notify *n = ptr; | ||
493 | |||
494 | if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) { | ||
495 | spin_lock_bh(&queue_lock); | ||
496 | if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) | ||
497 | __ipq_reset(); | ||
498 | spin_unlock_bh(&queue_lock); | ||
499 | } | ||
500 | return NOTIFY_DONE; | ||
501 | } | ||
502 | |||
503 | static struct notifier_block ipq_nl_notifier = { | ||
504 | .notifier_call = ipq_rcv_nl_event, | ||
505 | }; | ||
506 | |||
507 | #ifdef CONFIG_SYSCTL | ||
508 | static struct ctl_table_header *ipq_sysctl_header; | ||
509 | |||
510 | static ctl_table ipq_table[] = { | ||
511 | { | ||
512 | .procname = NET_IPQ_QMAX_NAME, | ||
513 | .data = &queue_maxlen, | ||
514 | .maxlen = sizeof(queue_maxlen), | ||
515 | .mode = 0644, | ||
516 | .proc_handler = proc_dointvec | ||
517 | }, | ||
518 | { } | ||
519 | }; | ||
520 | #endif | ||
521 | |||
522 | #ifdef CONFIG_PROC_FS | ||
523 | static int ip_queue_show(struct seq_file *m, void *v) | ||
524 | { | ||
525 | spin_lock_bh(&queue_lock); | ||
526 | |||
527 | seq_printf(m, | ||
528 | "Peer PID : %d\n" | ||
529 | "Copy mode : %hu\n" | ||
530 | "Copy range : %u\n" | ||
531 | "Queue length : %u\n" | ||
532 | "Queue max. length : %u\n" | ||
533 | "Queue dropped : %u\n" | ||
534 | "Netlink dropped : %u\n", | ||
535 | peer_pid, | ||
536 | copy_mode, | ||
537 | copy_range, | ||
538 | queue_total, | ||
539 | queue_maxlen, | ||
540 | queue_dropped, | ||
541 | queue_user_dropped); | ||
542 | |||
543 | spin_unlock_bh(&queue_lock); | ||
544 | return 0; | ||
545 | } | ||
546 | |||
547 | static int ip_queue_open(struct inode *inode, struct file *file) | ||
548 | { | ||
549 | return single_open(file, ip_queue_show, NULL); | ||
550 | } | ||
551 | |||
552 | static const struct file_operations ip_queue_proc_fops = { | ||
553 | .open = ip_queue_open, | ||
554 | .read = seq_read, | ||
555 | .llseek = seq_lseek, | ||
556 | .release = single_release, | ||
557 | .owner = THIS_MODULE, | ||
558 | }; | ||
559 | #endif | ||
560 | |||
561 | static const struct nf_queue_handler nfqh = { | ||
562 | .name = "ip_queue", | ||
563 | .outfn = &ipq_enqueue_packet, | ||
564 | }; | ||
565 | |||
566 | static int __init ip_queue_init(void) | ||
567 | { | ||
568 | int status = -ENOMEM; | ||
569 | struct proc_dir_entry *proc __maybe_unused; | ||
570 | |||
571 | netlink_register_notifier(&ipq_nl_notifier); | ||
572 | ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0, | ||
573 | ipq_rcv_skb, NULL, THIS_MODULE); | ||
574 | if (ipqnl == NULL) { | ||
575 | printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); | ||
576 | goto cleanup_netlink_notifier; | ||
577 | } | ||
578 | |||
579 | #ifdef CONFIG_PROC_FS | ||
580 | proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net, | ||
581 | &ip_queue_proc_fops); | ||
582 | if (!proc) { | ||
583 | printk(KERN_ERR "ip_queue: failed to create proc entry\n"); | ||
584 | goto cleanup_ipqnl; | ||
585 | } | ||
586 | #endif | ||
587 | register_netdevice_notifier(&ipq_dev_notifier); | ||
588 | #ifdef CONFIG_SYSCTL | ||
589 | ipq_sysctl_header = register_net_sysctl(&init_net, "net/ipv4", ipq_table); | ||
590 | #endif | ||
591 | status = nf_register_queue_handler(NFPROTO_IPV4, &nfqh); | ||
592 | if (status < 0) { | ||
593 | printk(KERN_ERR "ip_queue: failed to register queue handler\n"); | ||
594 | goto cleanup_sysctl; | ||
595 | } | ||
596 | return status; | ||
597 | |||
598 | cleanup_sysctl: | ||
599 | #ifdef CONFIG_SYSCTL | ||
600 | unregister_net_sysctl_table(ipq_sysctl_header); | ||
601 | #endif | ||
602 | unregister_netdevice_notifier(&ipq_dev_notifier); | ||
603 | proc_net_remove(&init_net, IPQ_PROC_FS_NAME); | ||
604 | cleanup_ipqnl: __maybe_unused | ||
605 | netlink_kernel_release(ipqnl); | ||
606 | mutex_lock(&ipqnl_mutex); | ||
607 | mutex_unlock(&ipqnl_mutex); | ||
608 | |||
609 | cleanup_netlink_notifier: | ||
610 | netlink_unregister_notifier(&ipq_nl_notifier); | ||
611 | return status; | ||
612 | } | ||
613 | |||
614 | static void __exit ip_queue_fini(void) | ||
615 | { | ||
616 | nf_unregister_queue_handlers(&nfqh); | ||
617 | |||
618 | ipq_flush(NULL, 0); | ||
619 | |||
620 | #ifdef CONFIG_SYSCTL | ||
621 | unregister_net_sysctl_table(ipq_sysctl_header); | ||
622 | #endif | ||
623 | unregister_netdevice_notifier(&ipq_dev_notifier); | ||
624 | proc_net_remove(&init_net, IPQ_PROC_FS_NAME); | ||
625 | |||
626 | netlink_kernel_release(ipqnl); | ||
627 | mutex_lock(&ipqnl_mutex); | ||
628 | mutex_unlock(&ipqnl_mutex); | ||
629 | |||
630 | netlink_unregister_notifier(&ipq_nl_notifier); | ||
631 | } | ||
632 | |||
633 | MODULE_DESCRIPTION("IPv4 packet queue handler"); | ||
634 | MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); | ||
635 | MODULE_LICENSE("GPL"); | ||
636 | MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_FIREWALL); | ||
637 | |||
638 | module_init(ip_queue_init); | ||
639 | module_exit(ip_queue_fini); | ||
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index d33cddd16fbb..10135342799e 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig | |||
@@ -25,28 +25,6 @@ config NF_CONNTRACK_IPV6 | |||
25 | 25 | ||
26 | To compile it as a module, choose M here. If unsure, say N. | 26 | To compile it as a module, choose M here. If unsure, say N. |
27 | 27 | ||
28 | config IP6_NF_QUEUE | ||
29 | tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)" | ||
30 | depends on INET && IPV6 && NETFILTER | ||
31 | depends on NETFILTER_ADVANCED | ||
32 | ---help--- | ||
33 | |||
34 | This option adds a queue handler to the kernel for IPv6 | ||
35 | packets which enables users to receive the filtered packets | ||
36 | with QUEUE target using libipq. | ||
37 | |||
38 | This option enables the old IPv6-only "ip6_queue" implementation | ||
39 | which has been obsoleted by the new "nfnetlink_queue" code (see | ||
40 | CONFIG_NETFILTER_NETLINK_QUEUE). | ||
41 | |||
42 | (C) Fernando Anton 2001 | ||
43 | IPv64 Project - Work based in IPv64 draft by Arturo Azcorra. | ||
44 | Universidad Carlos III de Madrid | ||
45 | Universidad Politecnica de Alcala de Henares | ||
46 | email: <fanton@it.uc3m.es>. | ||
47 | |||
48 | To compile it as a module, choose M here. If unsure, say N. | ||
49 | |||
50 | config IP6_NF_IPTABLES | 28 | config IP6_NF_IPTABLES |
51 | tristate "IP6 tables support (required for filtering)" | 29 | tristate "IP6 tables support (required for filtering)" |
52 | depends on INET && IPV6 | 30 | depends on INET && IPV6 |
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index d4dfd0a21097..534d3f216f7b 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile | |||
@@ -6,7 +6,6 @@ | |||
6 | obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o | 6 | obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o |
7 | obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o | 7 | obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o |
8 | obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o | 8 | obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o |
9 | obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o | ||
10 | obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o | 9 | obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o |
11 | obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o | 10 | obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o |
12 | 11 | ||
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c deleted file mode 100644 index 3ca9303b3a19..000000000000 --- a/net/ipv6/netfilter/ip6_queue.c +++ /dev/null | |||
@@ -1,641 +0,0 @@ | |||
1 | /* | ||
2 | * This is a module which is used for queueing IPv6 packets and | ||
3 | * communicating with userspace via netlink. | ||
4 | * | ||
5 | * (C) 2001 Fernando Anton, this code is GPL. | ||
6 | * IPv64 Project - Work based in IPv64 draft by Arturo Azcorra. | ||
7 | * Universidad Carlos III de Madrid - Leganes (Madrid) - Spain | ||
8 | * Universidad Politecnica de Alcala de Henares - Alcala de H. (Madrid) - Spain | ||
9 | * email: fanton@it.uc3m.es | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License version 2 as | ||
13 | * published by the Free Software Foundation. | ||
14 | */ | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/skbuff.h> | ||
17 | #include <linux/init.h> | ||
18 | #include <linux/ipv6.h> | ||
19 | #include <linux/notifier.h> | ||
20 | #include <linux/netdevice.h> | ||
21 | #include <linux/netfilter.h> | ||
22 | #include <linux/netlink.h> | ||
23 | #include <linux/spinlock.h> | ||
24 | #include <linux/sysctl.h> | ||
25 | #include <linux/proc_fs.h> | ||
26 | #include <linux/seq_file.h> | ||
27 | #include <linux/mutex.h> | ||
28 | #include <linux/slab.h> | ||
29 | #include <net/net_namespace.h> | ||
30 | #include <net/sock.h> | ||
31 | #include <net/ipv6.h> | ||
32 | #include <net/ip6_route.h> | ||
33 | #include <net/netfilter/nf_queue.h> | ||
34 | #include <linux/netfilter_ipv4/ip_queue.h> | ||
35 | #include <linux/netfilter_ipv4/ip_tables.h> | ||
36 | #include <linux/netfilter_ipv6/ip6_tables.h> | ||
37 | |||
38 | #define IPQ_QMAX_DEFAULT 1024 | ||
39 | #define IPQ_PROC_FS_NAME "ip6_queue" | ||
40 | #define NET_IPQ_QMAX_NAME "ip6_queue_maxlen" | ||
41 | |||
42 | typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long); | ||
43 | |||
44 | static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; | ||
45 | static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; | ||
46 | static DEFINE_SPINLOCK(queue_lock); | ||
47 | static int peer_pid __read_mostly; | ||
48 | static unsigned int copy_range __read_mostly; | ||
49 | static unsigned int queue_total; | ||
50 | static unsigned int queue_dropped = 0; | ||
51 | static unsigned int queue_user_dropped = 0; | ||
52 | static struct sock *ipqnl __read_mostly; | ||
53 | static LIST_HEAD(queue_list); | ||
54 | static DEFINE_MUTEX(ipqnl_mutex); | ||
55 | |||
56 | static inline void | ||
57 | __ipq_enqueue_entry(struct nf_queue_entry *entry) | ||
58 | { | ||
59 | list_add_tail(&entry->list, &queue_list); | ||
60 | queue_total++; | ||
61 | } | ||
62 | |||
63 | static inline int | ||
64 | __ipq_set_mode(unsigned char mode, unsigned int range) | ||
65 | { | ||
66 | int status = 0; | ||
67 | |||
68 | switch(mode) { | ||
69 | case IPQ_COPY_NONE: | ||
70 | case IPQ_COPY_META: | ||
71 | copy_mode = mode; | ||
72 | copy_range = 0; | ||
73 | break; | ||
74 | |||
75 | case IPQ_COPY_PACKET: | ||
76 | if (range > 0xFFFF) | ||
77 | range = 0xFFFF; | ||
78 | copy_range = range; | ||
79 | copy_mode = mode; | ||
80 | break; | ||
81 | |||
82 | default: | ||
83 | status = -EINVAL; | ||
84 | |||
85 | } | ||
86 | return status; | ||
87 | } | ||
88 | |||
89 | static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data); | ||
90 | |||
91 | static inline void | ||
92 | __ipq_reset(void) | ||
93 | { | ||
94 | peer_pid = 0; | ||
95 | net_disable_timestamp(); | ||
96 | __ipq_set_mode(IPQ_COPY_NONE, 0); | ||
97 | __ipq_flush(NULL, 0); | ||
98 | } | ||
99 | |||
100 | static struct nf_queue_entry * | ||
101 | ipq_find_dequeue_entry(unsigned long id) | ||
102 | { | ||
103 | struct nf_queue_entry *entry = NULL, *i; | ||
104 | |||
105 | spin_lock_bh(&queue_lock); | ||
106 | |||
107 | list_for_each_entry(i, &queue_list, list) { | ||
108 | if ((unsigned long)i == id) { | ||
109 | entry = i; | ||
110 | break; | ||
111 | } | ||
112 | } | ||
113 | |||
114 | if (entry) { | ||
115 | list_del(&entry->list); | ||
116 | queue_total--; | ||
117 | } | ||
118 | |||
119 | spin_unlock_bh(&queue_lock); | ||
120 | return entry; | ||
121 | } | ||
122 | |||
123 | static void | ||
124 | __ipq_flush(ipq_cmpfn cmpfn, unsigned long data) | ||
125 | { | ||
126 | struct nf_queue_entry *entry, *next; | ||
127 | |||
128 | list_for_each_entry_safe(entry, next, &queue_list, list) { | ||
129 | if (!cmpfn || cmpfn(entry, data)) { | ||
130 | list_del(&entry->list); | ||
131 | queue_total--; | ||
132 | nf_reinject(entry, NF_DROP); | ||
133 | } | ||
134 | } | ||
135 | } | ||
136 | |||
137 | static void | ||
138 | ipq_flush(ipq_cmpfn cmpfn, unsigned long data) | ||
139 | { | ||
140 | spin_lock_bh(&queue_lock); | ||
141 | __ipq_flush(cmpfn, data); | ||
142 | spin_unlock_bh(&queue_lock); | ||
143 | } | ||
144 | |||
145 | static struct sk_buff * | ||
146 | ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) | ||
147 | { | ||
148 | sk_buff_data_t old_tail; | ||
149 | size_t size = 0; | ||
150 | size_t data_len = 0; | ||
151 | struct sk_buff *skb; | ||
152 | struct ipq_packet_msg *pmsg; | ||
153 | struct nlmsghdr *nlh; | ||
154 | struct timeval tv; | ||
155 | |||
156 | switch (ACCESS_ONCE(copy_mode)) { | ||
157 | case IPQ_COPY_META: | ||
158 | case IPQ_COPY_NONE: | ||
159 | size = NLMSG_SPACE(sizeof(*pmsg)); | ||
160 | break; | ||
161 | |||
162 | case IPQ_COPY_PACKET: | ||
163 | if (entry->skb->ip_summed == CHECKSUM_PARTIAL && | ||
164 | (*errp = skb_checksum_help(entry->skb))) | ||
165 | return NULL; | ||
166 | |||
167 | data_len = ACCESS_ONCE(copy_range); | ||
168 | if (data_len == 0 || data_len > entry->skb->len) | ||
169 | data_len = entry->skb->len; | ||
170 | |||
171 | size = NLMSG_SPACE(sizeof(*pmsg) + data_len); | ||
172 | break; | ||
173 | |||
174 | default: | ||
175 | *errp = -EINVAL; | ||
176 | return NULL; | ||
177 | } | ||
178 | |||
179 | skb = alloc_skb(size, GFP_ATOMIC); | ||
180 | if (!skb) | ||
181 | goto nlmsg_failure; | ||
182 | |||
183 | old_tail = skb->tail; | ||
184 | nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); | ||
185 | pmsg = NLMSG_DATA(nlh); | ||
186 | memset(pmsg, 0, sizeof(*pmsg)); | ||
187 | |||
188 | pmsg->packet_id = (unsigned long )entry; | ||
189 | pmsg->data_len = data_len; | ||
190 | tv = ktime_to_timeval(entry->skb->tstamp); | ||
191 | pmsg->timestamp_sec = tv.tv_sec; | ||
192 | pmsg->timestamp_usec = tv.tv_usec; | ||
193 | pmsg->mark = entry->skb->mark; | ||
194 | pmsg->hook = entry->hook; | ||
195 | pmsg->hw_protocol = entry->skb->protocol; | ||
196 | |||
197 | if (entry->indev) | ||
198 | strcpy(pmsg->indev_name, entry->indev->name); | ||
199 | else | ||
200 | pmsg->indev_name[0] = '\0'; | ||
201 | |||
202 | if (entry->outdev) | ||
203 | strcpy(pmsg->outdev_name, entry->outdev->name); | ||
204 | else | ||
205 | pmsg->outdev_name[0] = '\0'; | ||
206 | |||
207 | if (entry->indev && entry->skb->dev && | ||
208 | entry->skb->mac_header != entry->skb->network_header) { | ||
209 | pmsg->hw_type = entry->skb->dev->type; | ||
210 | pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr); | ||
211 | } | ||
212 | |||
213 | if (data_len) | ||
214 | if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len)) | ||
215 | BUG(); | ||
216 | |||
217 | nlh->nlmsg_len = skb->tail - old_tail; | ||
218 | return skb; | ||
219 | |||
220 | nlmsg_failure: | ||
221 | kfree_skb(skb); | ||
222 | *errp = -EINVAL; | ||
223 | printk(KERN_ERR "ip6_queue: error creating packet message\n"); | ||
224 | return NULL; | ||
225 | } | ||
226 | |||
227 | static int | ||
228 | ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) | ||
229 | { | ||
230 | int status = -EINVAL; | ||
231 | struct sk_buff *nskb; | ||
232 | |||
233 | if (copy_mode == IPQ_COPY_NONE) | ||
234 | return -EAGAIN; | ||
235 | |||
236 | nskb = ipq_build_packet_message(entry, &status); | ||
237 | if (nskb == NULL) | ||
238 | return status; | ||
239 | |||
240 | spin_lock_bh(&queue_lock); | ||
241 | |||
242 | if (!peer_pid) | ||
243 | goto err_out_free_nskb; | ||
244 | |||
245 | if (queue_total >= queue_maxlen) { | ||
246 | queue_dropped++; | ||
247 | status = -ENOSPC; | ||
248 | if (net_ratelimit()) | ||
249 | printk (KERN_WARNING "ip6_queue: fill at %d entries, " | ||
250 | "dropping packet(s). Dropped: %d\n", queue_total, | ||
251 | queue_dropped); | ||
252 | goto err_out_free_nskb; | ||
253 | } | ||
254 | |||
255 | /* netlink_unicast will either free the nskb or attach it to a socket */ | ||
256 | status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT); | ||
257 | if (status < 0) { | ||
258 | queue_user_dropped++; | ||
259 | goto err_out_unlock; | ||
260 | } | ||
261 | |||
262 | __ipq_enqueue_entry(entry); | ||
263 | |||
264 | spin_unlock_bh(&queue_lock); | ||
265 | return status; | ||
266 | |||
267 | err_out_free_nskb: | ||
268 | kfree_skb(nskb); | ||
269 | |||
270 | err_out_unlock: | ||
271 | spin_unlock_bh(&queue_lock); | ||
272 | return status; | ||
273 | } | ||
274 | |||
275 | static int | ||
276 | ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct nf_queue_entry *e) | ||
277 | { | ||
278 | int diff; | ||
279 | struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload; | ||
280 | struct sk_buff *nskb; | ||
281 | |||
282 | if (v->data_len < sizeof(*user_iph)) | ||
283 | return 0; | ||
284 | diff = v->data_len - e->skb->len; | ||
285 | if (diff < 0) { | ||
286 | if (pskb_trim(e->skb, v->data_len)) | ||
287 | return -ENOMEM; | ||
288 | } else if (diff > 0) { | ||
289 | if (v->data_len > 0xFFFF) | ||
290 | return -EINVAL; | ||
291 | if (diff > skb_tailroom(e->skb)) { | ||
292 | nskb = skb_copy_expand(e->skb, skb_headroom(e->skb), | ||
293 | diff, GFP_ATOMIC); | ||
294 | if (!nskb) { | ||
295 | printk(KERN_WARNING "ip6_queue: OOM " | ||
296 | "in mangle, dropping packet\n"); | ||
297 | return -ENOMEM; | ||
298 | } | ||
299 | kfree_skb(e->skb); | ||
300 | e->skb = nskb; | ||
301 | } | ||
302 | skb_put(e->skb, diff); | ||
303 | } | ||
304 | if (!skb_make_writable(e->skb, v->data_len)) | ||
305 | return -ENOMEM; | ||
306 | skb_copy_to_linear_data(e->skb, v->payload, v->data_len); | ||
307 | e->skb->ip_summed = CHECKSUM_NONE; | ||
308 | |||
309 | return 0; | ||
310 | } | ||
311 | |||
312 | static int | ||
313 | ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) | ||
314 | { | ||
315 | struct nf_queue_entry *entry; | ||
316 | |||
317 | if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN) | ||
318 | return -EINVAL; | ||
319 | |||
320 | entry = ipq_find_dequeue_entry(vmsg->id); | ||
321 | if (entry == NULL) | ||
322 | return -ENOENT; | ||
323 | else { | ||
324 | int verdict = vmsg->value; | ||
325 | |||
326 | if (vmsg->data_len && vmsg->data_len == len) | ||
327 | if (ipq_mangle_ipv6(vmsg, entry) < 0) | ||
328 | verdict = NF_DROP; | ||
329 | |||
330 | nf_reinject(entry, verdict); | ||
331 | return 0; | ||
332 | } | ||
333 | } | ||
334 | |||
335 | static int | ||
336 | ipq_set_mode(unsigned char mode, unsigned int range) | ||
337 | { | ||
338 | int status; | ||
339 | |||
340 | spin_lock_bh(&queue_lock); | ||
341 | status = __ipq_set_mode(mode, range); | ||
342 | spin_unlock_bh(&queue_lock); | ||
343 | return status; | ||
344 | } | ||
345 | |||
346 | static int | ||
347 | ipq_receive_peer(struct ipq_peer_msg *pmsg, | ||
348 | unsigned char type, unsigned int len) | ||
349 | { | ||
350 | int status = 0; | ||
351 | |||
352 | if (len < sizeof(*pmsg)) | ||
353 | return -EINVAL; | ||
354 | |||
355 | switch (type) { | ||
356 | case IPQM_MODE: | ||
357 | status = ipq_set_mode(pmsg->msg.mode.value, | ||
358 | pmsg->msg.mode.range); | ||
359 | break; | ||
360 | |||
361 | case IPQM_VERDICT: | ||
362 | status = ipq_set_verdict(&pmsg->msg.verdict, | ||
363 | len - sizeof(*pmsg)); | ||
364 | break; | ||
365 | default: | ||
366 | status = -EINVAL; | ||
367 | } | ||
368 | return status; | ||
369 | } | ||
370 | |||
371 | static int | ||
372 | dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) | ||
373 | { | ||
374 | if (entry->indev) | ||
375 | if (entry->indev->ifindex == ifindex) | ||
376 | return 1; | ||
377 | |||
378 | if (entry->outdev) | ||
379 | if (entry->outdev->ifindex == ifindex) | ||
380 | return 1; | ||
381 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
382 | if (entry->skb->nf_bridge) { | ||
383 | if (entry->skb->nf_bridge->physindev && | ||
384 | entry->skb->nf_bridge->physindev->ifindex == ifindex) | ||
385 | return 1; | ||
386 | if (entry->skb->nf_bridge->physoutdev && | ||
387 | entry->skb->nf_bridge->physoutdev->ifindex == ifindex) | ||
388 | return 1; | ||
389 | } | ||
390 | #endif | ||
391 | return 0; | ||
392 | } | ||
393 | |||
394 | static void | ||
395 | ipq_dev_drop(int ifindex) | ||
396 | { | ||
397 | ipq_flush(dev_cmp, ifindex); | ||
398 | } | ||
399 | |||
400 | #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) | ||
401 | |||
402 | static inline void | ||
403 | __ipq_rcv_skb(struct sk_buff *skb) | ||
404 | { | ||
405 | int status, type, pid, flags; | ||
406 | unsigned int nlmsglen, skblen; | ||
407 | struct nlmsghdr *nlh; | ||
408 | bool enable_timestamp = false; | ||
409 | |||
410 | skblen = skb->len; | ||
411 | if (skblen < sizeof(*nlh)) | ||
412 | return; | ||
413 | |||
414 | nlh = nlmsg_hdr(skb); | ||
415 | nlmsglen = nlh->nlmsg_len; | ||
416 | if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen) | ||
417 | return; | ||
418 | |||
419 | pid = nlh->nlmsg_pid; | ||
420 | flags = nlh->nlmsg_flags; | ||
421 | |||
422 | if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI) | ||
423 | RCV_SKB_FAIL(-EINVAL); | ||
424 | |||
425 | if (flags & MSG_TRUNC) | ||
426 | RCV_SKB_FAIL(-ECOMM); | ||
427 | |||
428 | type = nlh->nlmsg_type; | ||
429 | if (type < NLMSG_NOOP || type >= IPQM_MAX) | ||
430 | RCV_SKB_FAIL(-EINVAL); | ||
431 | |||
432 | if (type <= IPQM_BASE) | ||
433 | return; | ||
434 | |||
435 | if (!capable(CAP_NET_ADMIN)) | ||
436 | RCV_SKB_FAIL(-EPERM); | ||
437 | |||
438 | spin_lock_bh(&queue_lock); | ||
439 | |||
440 | if (peer_pid) { | ||
441 | if (peer_pid != pid) { | ||
442 | spin_unlock_bh(&queue_lock); | ||
443 | RCV_SKB_FAIL(-EBUSY); | ||
444 | } | ||
445 | } else { | ||
446 | enable_timestamp = true; | ||
447 | peer_pid = pid; | ||
448 | } | ||
449 | |||
450 | spin_unlock_bh(&queue_lock); | ||
451 | if (enable_timestamp) | ||
452 | net_enable_timestamp(); | ||
453 | |||
454 | status = ipq_receive_peer(NLMSG_DATA(nlh), type, | ||
455 | nlmsglen - NLMSG_LENGTH(0)); | ||
456 | if (status < 0) | ||
457 | RCV_SKB_FAIL(status); | ||
458 | |||
459 | if (flags & NLM_F_ACK) | ||
460 | netlink_ack(skb, nlh, 0); | ||
461 | } | ||
462 | |||
463 | static void | ||
464 | ipq_rcv_skb(struct sk_buff *skb) | ||
465 | { | ||
466 | mutex_lock(&ipqnl_mutex); | ||
467 | __ipq_rcv_skb(skb); | ||
468 | mutex_unlock(&ipqnl_mutex); | ||
469 | } | ||
470 | |||
471 | static int | ||
472 | ipq_rcv_dev_event(struct notifier_block *this, | ||
473 | unsigned long event, void *ptr) | ||
474 | { | ||
475 | struct net_device *dev = ptr; | ||
476 | |||
477 | if (!net_eq(dev_net(dev), &init_net)) | ||
478 | return NOTIFY_DONE; | ||
479 | |||
480 | /* Drop any packets associated with the downed device */ | ||
481 | if (event == NETDEV_DOWN) | ||
482 | ipq_dev_drop(dev->ifindex); | ||
483 | return NOTIFY_DONE; | ||
484 | } | ||
485 | |||
486 | static struct notifier_block ipq_dev_notifier = { | ||
487 | .notifier_call = ipq_rcv_dev_event, | ||
488 | }; | ||
489 | |||
490 | static int | ||
491 | ipq_rcv_nl_event(struct notifier_block *this, | ||
492 | unsigned long event, void *ptr) | ||
493 | { | ||
494 | struct netlink_notify *n = ptr; | ||
495 | |||
496 | if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) { | ||
497 | spin_lock_bh(&queue_lock); | ||
498 | if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) | ||
499 | __ipq_reset(); | ||
500 | spin_unlock_bh(&queue_lock); | ||
501 | } | ||
502 | return NOTIFY_DONE; | ||
503 | } | ||
504 | |||
505 | static struct notifier_block ipq_nl_notifier = { | ||
506 | .notifier_call = ipq_rcv_nl_event, | ||
507 | }; | ||
508 | |||
509 | #ifdef CONFIG_SYSCTL | ||
510 | static struct ctl_table_header *ipq_sysctl_header; | ||
511 | |||
512 | static ctl_table ipq_table[] = { | ||
513 | { | ||
514 | .procname = NET_IPQ_QMAX_NAME, | ||
515 | .data = &queue_maxlen, | ||
516 | .maxlen = sizeof(queue_maxlen), | ||
517 | .mode = 0644, | ||
518 | .proc_handler = proc_dointvec | ||
519 | }, | ||
520 | { } | ||
521 | }; | ||
522 | #endif | ||
523 | |||
524 | #ifdef CONFIG_PROC_FS | ||
525 | static int ip6_queue_show(struct seq_file *m, void *v) | ||
526 | { | ||
527 | spin_lock_bh(&queue_lock); | ||
528 | |||
529 | seq_printf(m, | ||
530 | "Peer PID : %d\n" | ||
531 | "Copy mode : %hu\n" | ||
532 | "Copy range : %u\n" | ||
533 | "Queue length : %u\n" | ||
534 | "Queue max. length : %u\n" | ||
535 | "Queue dropped : %u\n" | ||
536 | "Netfilter dropped : %u\n", | ||
537 | peer_pid, | ||
538 | copy_mode, | ||
539 | copy_range, | ||
540 | queue_total, | ||
541 | queue_maxlen, | ||
542 | queue_dropped, | ||
543 | queue_user_dropped); | ||
544 | |||
545 | spin_unlock_bh(&queue_lock); | ||
546 | return 0; | ||
547 | } | ||
548 | |||
549 | static int ip6_queue_open(struct inode *inode, struct file *file) | ||
550 | { | ||
551 | return single_open(file, ip6_queue_show, NULL); | ||
552 | } | ||
553 | |||
554 | static const struct file_operations ip6_queue_proc_fops = { | ||
555 | .open = ip6_queue_open, | ||
556 | .read = seq_read, | ||
557 | .llseek = seq_lseek, | ||
558 | .release = single_release, | ||
559 | .owner = THIS_MODULE, | ||
560 | }; | ||
561 | #endif | ||
562 | |||
563 | static const struct nf_queue_handler nfqh = { | ||
564 | .name = "ip6_queue", | ||
565 | .outfn = &ipq_enqueue_packet, | ||
566 | }; | ||
567 | |||
568 | static int __init ip6_queue_init(void) | ||
569 | { | ||
570 | int status = -ENOMEM; | ||
571 | struct proc_dir_entry *proc __maybe_unused; | ||
572 | |||
573 | netlink_register_notifier(&ipq_nl_notifier); | ||
574 | ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0, | ||
575 | ipq_rcv_skb, NULL, THIS_MODULE); | ||
576 | if (ipqnl == NULL) { | ||
577 | printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); | ||
578 | goto cleanup_netlink_notifier; | ||
579 | } | ||
580 | |||
581 | #ifdef CONFIG_PROC_FS | ||
582 | proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net, | ||
583 | &ip6_queue_proc_fops); | ||
584 | if (!proc) { | ||
585 | printk(KERN_ERR "ip6_queue: failed to create proc entry\n"); | ||
586 | goto cleanup_ipqnl; | ||
587 | } | ||
588 | #endif | ||
589 | register_netdevice_notifier(&ipq_dev_notifier); | ||
590 | #ifdef CONFIG_SYSCTL | ||
591 | ipq_sysctl_header = register_net_sysctl(&init_net, "net/ipv6", ipq_table); | ||
592 | #endif | ||
593 | status = nf_register_queue_handler(NFPROTO_IPV6, &nfqh); | ||
594 | if (status < 0) { | ||
595 | printk(KERN_ERR "ip6_queue: failed to register queue handler\n"); | ||
596 | goto cleanup_sysctl; | ||
597 | } | ||
598 | return status; | ||
599 | |||
600 | cleanup_sysctl: | ||
601 | #ifdef CONFIG_SYSCTL | ||
602 | unregister_net_sysctl_table(ipq_sysctl_header); | ||
603 | #endif | ||
604 | unregister_netdevice_notifier(&ipq_dev_notifier); | ||
605 | proc_net_remove(&init_net, IPQ_PROC_FS_NAME); | ||
606 | |||
607 | cleanup_ipqnl: __maybe_unused | ||
608 | netlink_kernel_release(ipqnl); | ||
609 | mutex_lock(&ipqnl_mutex); | ||
610 | mutex_unlock(&ipqnl_mutex); | ||
611 | |||
612 | cleanup_netlink_notifier: | ||
613 | netlink_unregister_notifier(&ipq_nl_notifier); | ||
614 | return status; | ||
615 | } | ||
616 | |||
617 | static void __exit ip6_queue_fini(void) | ||
618 | { | ||
619 | nf_unregister_queue_handlers(&nfqh); | ||
620 | |||
621 | ipq_flush(NULL, 0); | ||
622 | |||
623 | #ifdef CONFIG_SYSCTL | ||
624 | unregister_net_sysctl_table(ipq_sysctl_header); | ||
625 | #endif | ||
626 | unregister_netdevice_notifier(&ipq_dev_notifier); | ||
627 | proc_net_remove(&init_net, IPQ_PROC_FS_NAME); | ||
628 | |||
629 | netlink_kernel_release(ipqnl); | ||
630 | mutex_lock(&ipqnl_mutex); | ||
631 | mutex_unlock(&ipqnl_mutex); | ||
632 | |||
633 | netlink_unregister_notifier(&ipq_nl_notifier); | ||
634 | } | ||
635 | |||
636 | MODULE_DESCRIPTION("IPv6 packet queue handler"); | ||
637 | MODULE_LICENSE("GPL"); | ||
638 | MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_IP6_FW); | ||
639 | |||
640 | module_init(ip6_queue_init); | ||
641 | module_exit(ip6_queue_fini); | ||
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 4a09b7873003..1548df9a7524 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c | |||
@@ -548,6 +548,7 @@ static inline void | |||
548 | ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) | 548 | ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) |
549 | { | 549 | { |
550 | unsigned int conn_flags; | 550 | unsigned int conn_flags; |
551 | __u32 flags; | ||
551 | 552 | ||
552 | /* if dest is NULL, then return directly */ | 553 | /* if dest is NULL, then return directly */ |
553 | if (!dest) | 554 | if (!dest) |
@@ -559,17 +560,19 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) | |||
559 | conn_flags = atomic_read(&dest->conn_flags); | 560 | conn_flags = atomic_read(&dest->conn_flags); |
560 | if (cp->protocol != IPPROTO_UDP) | 561 | if (cp->protocol != IPPROTO_UDP) |
561 | conn_flags &= ~IP_VS_CONN_F_ONE_PACKET; | 562 | conn_flags &= ~IP_VS_CONN_F_ONE_PACKET; |
563 | flags = cp->flags; | ||
562 | /* Bind with the destination and its corresponding transmitter */ | 564 | /* Bind with the destination and its corresponding transmitter */ |
563 | if (cp->flags & IP_VS_CONN_F_SYNC) { | 565 | if (flags & IP_VS_CONN_F_SYNC) { |
564 | /* if the connection is not template and is created | 566 | /* if the connection is not template and is created |
565 | * by sync, preserve the activity flag. | 567 | * by sync, preserve the activity flag. |
566 | */ | 568 | */ |
567 | if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) | 569 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) |
568 | conn_flags &= ~IP_VS_CONN_F_INACTIVE; | 570 | conn_flags &= ~IP_VS_CONN_F_INACTIVE; |
569 | /* connections inherit forwarding method from dest */ | 571 | /* connections inherit forwarding method from dest */ |
570 | cp->flags &= ~IP_VS_CONN_F_FWD_MASK; | 572 | flags &= ~(IP_VS_CONN_F_FWD_MASK | IP_VS_CONN_F_NOOUTPUT); |
571 | } | 573 | } |
572 | cp->flags |= conn_flags; | 574 | flags |= conn_flags; |
575 | cp->flags = flags; | ||
573 | cp->dest = dest; | 576 | cp->dest = dest; |
574 | 577 | ||
575 | IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d " | 578 | IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d " |
@@ -584,12 +587,12 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) | |||
584 | atomic_read(&dest->refcnt)); | 587 | atomic_read(&dest->refcnt)); |
585 | 588 | ||
586 | /* Update the connection counters */ | 589 | /* Update the connection counters */ |
587 | if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { | 590 | if (!(flags & IP_VS_CONN_F_TEMPLATE)) { |
588 | /* It is a normal connection, so increase the inactive | 591 | /* It is a normal connection, so modify the counters |
589 | connection counter because it is in TCP SYNRECV | 592 | * according to the flags, later the protocol can |
590 | state (inactive) or other protocol inacive state */ | 593 | * update them on state change |
591 | if ((cp->flags & IP_VS_CONN_F_SYNC) && | 594 | */ |
592 | (!(cp->flags & IP_VS_CONN_F_INACTIVE))) | 595 | if (!(flags & IP_VS_CONN_F_INACTIVE)) |
593 | atomic_inc(&dest->activeconns); | 596 | atomic_inc(&dest->activeconns); |
594 | else | 597 | else |
595 | atomic_inc(&dest->inactconns); | 598 | atomic_inc(&dest->inactconns); |
@@ -613,14 +616,40 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) | |||
613 | { | 616 | { |
614 | struct ip_vs_dest *dest; | 617 | struct ip_vs_dest *dest; |
615 | 618 | ||
616 | if ((cp) && (!cp->dest)) { | 619 | dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr, |
617 | dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr, | 620 | cp->dport, &cp->vaddr, cp->vport, |
618 | cp->dport, &cp->vaddr, cp->vport, | 621 | cp->protocol, cp->fwmark, cp->flags); |
619 | cp->protocol, cp->fwmark, cp->flags); | 622 | if (dest) { |
623 | struct ip_vs_proto_data *pd; | ||
624 | |||
625 | spin_lock(&cp->lock); | ||
626 | if (cp->dest) { | ||
627 | spin_unlock(&cp->lock); | ||
628 | return dest; | ||
629 | } | ||
630 | |||
631 | /* Applications work depending on the forwarding method | ||
632 | * but better to reassign them always when binding dest */ | ||
633 | if (cp->app) | ||
634 | ip_vs_unbind_app(cp); | ||
635 | |||
620 | ip_vs_bind_dest(cp, dest); | 636 | ip_vs_bind_dest(cp, dest); |
621 | return dest; | 637 | spin_unlock(&cp->lock); |
622 | } else | 638 | |
623 | return NULL; | 639 | /* Update its packet transmitter */ |
640 | cp->packet_xmit = NULL; | ||
641 | #ifdef CONFIG_IP_VS_IPV6 | ||
642 | if (cp->af == AF_INET6) | ||
643 | ip_vs_bind_xmit_v6(cp); | ||
644 | else | ||
645 | #endif | ||
646 | ip_vs_bind_xmit(cp); | ||
647 | |||
648 | pd = ip_vs_proto_data_get(ip_vs_conn_net(cp), cp->protocol); | ||
649 | if (pd && atomic_read(&pd->appcnt)) | ||
650 | ip_vs_bind_app(cp, pd->pp); | ||
651 | } | ||
652 | return dest; | ||
624 | } | 653 | } |
625 | 654 | ||
626 | 655 | ||
@@ -743,7 +772,8 @@ int ip_vs_check_template(struct ip_vs_conn *ct) | |||
743 | static void ip_vs_conn_expire(unsigned long data) | 772 | static void ip_vs_conn_expire(unsigned long data) |
744 | { | 773 | { |
745 | struct ip_vs_conn *cp = (struct ip_vs_conn *)data; | 774 | struct ip_vs_conn *cp = (struct ip_vs_conn *)data; |
746 | struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); | 775 | struct net *net = ip_vs_conn_net(cp); |
776 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
747 | 777 | ||
748 | cp->timeout = 60*HZ; | 778 | cp->timeout = 60*HZ; |
749 | 779 | ||
@@ -808,6 +838,9 @@ static void ip_vs_conn_expire(unsigned long data) | |||
808 | atomic_read(&cp->refcnt)-1, | 838 | atomic_read(&cp->refcnt)-1, |
809 | atomic_read(&cp->n_control)); | 839 | atomic_read(&cp->n_control)); |
810 | 840 | ||
841 | if (ipvs->sync_state & IP_VS_STATE_MASTER) | ||
842 | ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs)); | ||
843 | |||
811 | ip_vs_conn_put(cp); | 844 | ip_vs_conn_put(cp); |
812 | } | 845 | } |
813 | 846 | ||
@@ -881,6 +914,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, | |||
881 | /* Set its state and timeout */ | 914 | /* Set its state and timeout */ |
882 | cp->state = 0; | 915 | cp->state = 0; |
883 | cp->timeout = 3*HZ; | 916 | cp->timeout = 3*HZ; |
917 | cp->sync_endtime = jiffies & ~3UL; | ||
884 | 918 | ||
885 | /* Bind its packet transmitter */ | 919 | /* Bind its packet transmitter */ |
886 | #ifdef CONFIG_IP_VS_IPV6 | 920 | #ifdef CONFIG_IP_VS_IPV6 |
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index c8f36b96f44f..a54b018c6eea 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c | |||
@@ -1613,34 +1613,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) | |||
1613 | else | 1613 | else |
1614 | pkts = atomic_add_return(1, &cp->in_pkts); | 1614 | pkts = atomic_add_return(1, &cp->in_pkts); |
1615 | 1615 | ||
1616 | if ((ipvs->sync_state & IP_VS_STATE_MASTER) && | 1616 | if (ipvs->sync_state & IP_VS_STATE_MASTER) |
1617 | cp->protocol == IPPROTO_SCTP) { | 1617 | ip_vs_sync_conn(net, cp, pkts); |
1618 | if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && | ||
1619 | (pkts % sysctl_sync_period(ipvs) | ||
1620 | == sysctl_sync_threshold(ipvs))) || | ||
1621 | (cp->old_state != cp->state && | ||
1622 | ((cp->state == IP_VS_SCTP_S_CLOSED) || | ||
1623 | (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) || | ||
1624 | (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) { | ||
1625 | ip_vs_sync_conn(net, cp); | ||
1626 | goto out; | ||
1627 | } | ||
1628 | } | ||
1629 | |||
1630 | /* Keep this block last: TCP and others with pp->num_states <= 1 */ | ||
1631 | else if ((ipvs->sync_state & IP_VS_STATE_MASTER) && | ||
1632 | (((cp->protocol != IPPROTO_TCP || | ||
1633 | cp->state == IP_VS_TCP_S_ESTABLISHED) && | ||
1634 | (pkts % sysctl_sync_period(ipvs) | ||
1635 | == sysctl_sync_threshold(ipvs))) || | ||
1636 | ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && | ||
1637 | ((cp->state == IP_VS_TCP_S_FIN_WAIT) || | ||
1638 | (cp->state == IP_VS_TCP_S_CLOSE) || | ||
1639 | (cp->state == IP_VS_TCP_S_CLOSE_WAIT) || | ||
1640 | (cp->state == IP_VS_TCP_S_TIME_WAIT))))) | ||
1641 | ip_vs_sync_conn(net, cp); | ||
1642 | out: | ||
1643 | cp->old_state = cp->state; | ||
1644 | 1618 | ||
1645 | ip_vs_conn_put(cp); | 1619 | ip_vs_conn_put(cp); |
1646 | return ret; | 1620 | return ret; |
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 37b91996bfba..dd811b8dd97c 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c | |||
@@ -1599,6 +1599,10 @@ static int ip_vs_zero_all(struct net *net) | |||
1599 | } | 1599 | } |
1600 | 1600 | ||
1601 | #ifdef CONFIG_SYSCTL | 1601 | #ifdef CONFIG_SYSCTL |
1602 | |||
1603 | static int zero; | ||
1604 | static int three = 3; | ||
1605 | |||
1602 | static int | 1606 | static int |
1603 | proc_do_defense_mode(ctl_table *table, int write, | 1607 | proc_do_defense_mode(ctl_table *table, int write, |
1604 | void __user *buffer, size_t *lenp, loff_t *ppos) | 1608 | void __user *buffer, size_t *lenp, loff_t *ppos) |
@@ -1632,7 +1636,8 @@ proc_do_sync_threshold(ctl_table *table, int write, | |||
1632 | memcpy(val, valp, sizeof(val)); | 1636 | memcpy(val, valp, sizeof(val)); |
1633 | 1637 | ||
1634 | rc = proc_dointvec(table, write, buffer, lenp, ppos); | 1638 | rc = proc_dointvec(table, write, buffer, lenp, ppos); |
1635 | if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) { | 1639 | if (write && (valp[0] < 0 || valp[1] < 0 || |
1640 | (valp[0] >= valp[1] && valp[1]))) { | ||
1636 | /* Restore the correct value */ | 1641 | /* Restore the correct value */ |
1637 | memcpy(valp, val, sizeof(val)); | 1642 | memcpy(valp, val, sizeof(val)); |
1638 | } | 1643 | } |
@@ -1652,9 +1657,24 @@ proc_do_sync_mode(ctl_table *table, int write, | |||
1652 | if ((*valp < 0) || (*valp > 1)) { | 1657 | if ((*valp < 0) || (*valp > 1)) { |
1653 | /* Restore the correct value */ | 1658 | /* Restore the correct value */ |
1654 | *valp = val; | 1659 | *valp = val; |
1655 | } else { | 1660 | } |
1656 | struct net *net = current->nsproxy->net_ns; | 1661 | } |
1657 | ip_vs_sync_switch_mode(net, val); | 1662 | return rc; |
1663 | } | ||
1664 | |||
1665 | static int | ||
1666 | proc_do_sync_ports(ctl_table *table, int write, | ||
1667 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
1668 | { | ||
1669 | int *valp = table->data; | ||
1670 | int val = *valp; | ||
1671 | int rc; | ||
1672 | |||
1673 | rc = proc_dointvec(table, write, buffer, lenp, ppos); | ||
1674 | if (write && (*valp != val)) { | ||
1675 | if (*valp < 1 || !is_power_of_2(*valp)) { | ||
1676 | /* Restore the correct value */ | ||
1677 | *valp = val; | ||
1658 | } | 1678 | } |
1659 | } | 1679 | } |
1660 | return rc; | 1680 | return rc; |
@@ -1718,6 +1738,24 @@ static struct ctl_table vs_vars[] = { | |||
1718 | .proc_handler = &proc_do_sync_mode, | 1738 | .proc_handler = &proc_do_sync_mode, |
1719 | }, | 1739 | }, |
1720 | { | 1740 | { |
1741 | .procname = "sync_ports", | ||
1742 | .maxlen = sizeof(int), | ||
1743 | .mode = 0644, | ||
1744 | .proc_handler = &proc_do_sync_ports, | ||
1745 | }, | ||
1746 | { | ||
1747 | .procname = "sync_qlen_max", | ||
1748 | .maxlen = sizeof(int), | ||
1749 | .mode = 0644, | ||
1750 | .proc_handler = proc_dointvec, | ||
1751 | }, | ||
1752 | { | ||
1753 | .procname = "sync_sock_size", | ||
1754 | .maxlen = sizeof(int), | ||
1755 | .mode = 0644, | ||
1756 | .proc_handler = proc_dointvec, | ||
1757 | }, | ||
1758 | { | ||
1721 | .procname = "cache_bypass", | 1759 | .procname = "cache_bypass", |
1722 | .maxlen = sizeof(int), | 1760 | .maxlen = sizeof(int), |
1723 | .mode = 0644, | 1761 | .mode = 0644, |
@@ -1743,6 +1781,20 @@ static struct ctl_table vs_vars[] = { | |||
1743 | .proc_handler = proc_do_sync_threshold, | 1781 | .proc_handler = proc_do_sync_threshold, |
1744 | }, | 1782 | }, |
1745 | { | 1783 | { |
1784 | .procname = "sync_refresh_period", | ||
1785 | .maxlen = sizeof(int), | ||
1786 | .mode = 0644, | ||
1787 | .proc_handler = proc_dointvec_jiffies, | ||
1788 | }, | ||
1789 | { | ||
1790 | .procname = "sync_retries", | ||
1791 | .maxlen = sizeof(int), | ||
1792 | .mode = 0644, | ||
1793 | .proc_handler = proc_dointvec_minmax, | ||
1794 | .extra1 = &zero, | ||
1795 | .extra2 = &three, | ||
1796 | }, | ||
1797 | { | ||
1746 | .procname = "nat_icmp_send", | 1798 | .procname = "nat_icmp_send", |
1747 | .maxlen = sizeof(int), | 1799 | .maxlen = sizeof(int), |
1748 | .mode = 0644, | 1800 | .mode = 0644, |
@@ -3655,6 +3707,12 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net) | |||
3655 | tbl[idx++].data = &ipvs->sysctl_snat_reroute; | 3707 | tbl[idx++].data = &ipvs->sysctl_snat_reroute; |
3656 | ipvs->sysctl_sync_ver = 1; | 3708 | ipvs->sysctl_sync_ver = 1; |
3657 | tbl[idx++].data = &ipvs->sysctl_sync_ver; | 3709 | tbl[idx++].data = &ipvs->sysctl_sync_ver; |
3710 | ipvs->sysctl_sync_ports = 1; | ||
3711 | tbl[idx++].data = &ipvs->sysctl_sync_ports; | ||
3712 | ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32; | ||
3713 | tbl[idx++].data = &ipvs->sysctl_sync_qlen_max; | ||
3714 | ipvs->sysctl_sync_sock_size = 0; | ||
3715 | tbl[idx++].data = &ipvs->sysctl_sync_sock_size; | ||
3658 | tbl[idx++].data = &ipvs->sysctl_cache_bypass; | 3716 | tbl[idx++].data = &ipvs->sysctl_cache_bypass; |
3659 | tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; | 3717 | tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; |
3660 | tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; | 3718 | tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; |
@@ -3662,6 +3720,10 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net) | |||
3662 | ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; | 3720 | ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; |
3663 | tbl[idx].data = &ipvs->sysctl_sync_threshold; | 3721 | tbl[idx].data = &ipvs->sysctl_sync_threshold; |
3664 | tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); | 3722 | tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); |
3723 | ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD; | ||
3724 | tbl[idx++].data = &ipvs->sysctl_sync_refresh_period; | ||
3725 | ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3); | ||
3726 | tbl[idx++].data = &ipvs->sysctl_sync_retries; | ||
3665 | tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; | 3727 | tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; |
3666 | 3728 | ||
3667 | 3729 | ||
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index 1a53a7a2fff0..8b7dca9ea422 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c | |||
@@ -149,7 +149,7 @@ static int ip_vs_dh_init_svc(struct ip_vs_service *svc) | |||
149 | 149 | ||
150 | /* allocate the DH table for this service */ | 150 | /* allocate the DH table for this service */ |
151 | tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE, | 151 | tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE, |
152 | GFP_ATOMIC); | 152 | GFP_KERNEL); |
153 | if (tbl == NULL) | 153 | if (tbl == NULL) |
154 | return -ENOMEM; | 154 | return -ENOMEM; |
155 | 155 | ||
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 510f2b5a5855..b20b29c903ef 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c | |||
@@ -485,7 +485,7 @@ static struct pernet_operations ip_vs_ftp_ops = { | |||
485 | .exit = __ip_vs_ftp_exit, | 485 | .exit = __ip_vs_ftp_exit, |
486 | }; | 486 | }; |
487 | 487 | ||
488 | int __init ip_vs_ftp_init(void) | 488 | static int __init ip_vs_ftp_init(void) |
489 | { | 489 | { |
490 | int rv; | 490 | int rv; |
491 | 491 | ||
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 9b0de9a0e08e..df646ccf08a7 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c | |||
@@ -342,7 +342,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) | |||
342 | /* | 342 | /* |
343 | * Allocate the ip_vs_lblc_table for this service | 343 | * Allocate the ip_vs_lblc_table for this service |
344 | */ | 344 | */ |
345 | tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC); | 345 | tbl = kmalloc(sizeof(*tbl), GFP_KERNEL); |
346 | if (tbl == NULL) | 346 | if (tbl == NULL) |
347 | return -ENOMEM; | 347 | return -ENOMEM; |
348 | 348 | ||
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 9dcd39a48897..570e31ea427a 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c | |||
@@ -511,7 +511,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) | |||
511 | /* | 511 | /* |
512 | * Allocate the ip_vs_lblcr_table for this service | 512 | * Allocate the ip_vs_lblcr_table for this service |
513 | */ | 513 | */ |
514 | tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC); | 514 | tbl = kmalloc(sizeof(*tbl), GFP_KERNEL); |
515 | if (tbl == NULL) | 515 | if (tbl == NULL) |
516 | return -ENOMEM; | 516 | return -ENOMEM; |
517 | 517 | ||
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index fdc82ad9cc0e..50d82186da87 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c | |||
@@ -68,7 +68,7 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp) | |||
68 | struct netns_ipvs *ipvs = net_ipvs(net); | 68 | struct netns_ipvs *ipvs = net_ipvs(net); |
69 | unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); | 69 | unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); |
70 | struct ip_vs_proto_data *pd = | 70 | struct ip_vs_proto_data *pd = |
71 | kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC); | 71 | kzalloc(sizeof(struct ip_vs_proto_data), GFP_KERNEL); |
72 | 72 | ||
73 | if (!pd) | 73 | if (!pd) |
74 | return -ENOMEM; | 74 | return -ENOMEM; |
@@ -156,7 +156,7 @@ EXPORT_SYMBOL(ip_vs_proto_get); | |||
156 | /* | 156 | /* |
157 | * get ip_vs_protocol object data by netns and proto | 157 | * get ip_vs_protocol object data by netns and proto |
158 | */ | 158 | */ |
159 | struct ip_vs_proto_data * | 159 | static struct ip_vs_proto_data * |
160 | __ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) | 160 | __ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) |
161 | { | 161 | { |
162 | struct ip_vs_proto_data *pd; | 162 | struct ip_vs_proto_data *pd; |
@@ -199,7 +199,7 @@ void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags) | |||
199 | int * | 199 | int * |
200 | ip_vs_create_timeout_table(int *table, int size) | 200 | ip_vs_create_timeout_table(int *table, int size) |
201 | { | 201 | { |
202 | return kmemdup(table, size, GFP_ATOMIC); | 202 | return kmemdup(table, size, GFP_KERNEL); |
203 | } | 203 | } |
204 | 204 | ||
205 | 205 | ||
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 91e97ee049be..05126521743e 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c | |||
@@ -162,7 +162,7 @@ static int ip_vs_sh_init_svc(struct ip_vs_service *svc) | |||
162 | 162 | ||
163 | /* allocate the SH table for this service */ | 163 | /* allocate the SH table for this service */ |
164 | tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE, | 164 | tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE, |
165 | GFP_ATOMIC); | 165 | GFP_KERNEL); |
166 | if (tbl == NULL) | 166 | if (tbl == NULL) |
167 | return -ENOMEM; | 167 | return -ENOMEM; |
168 | 168 | ||
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index bf5e538af67b..effa10c9e4e3 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c | |||
@@ -196,6 +196,7 @@ struct ip_vs_sync_thread_data { | |||
196 | struct net *net; | 196 | struct net *net; |
197 | struct socket *sock; | 197 | struct socket *sock; |
198 | char *buf; | 198 | char *buf; |
199 | int id; | ||
199 | }; | 200 | }; |
200 | 201 | ||
201 | /* Version 0 definition of packet sizes */ | 202 | /* Version 0 definition of packet sizes */ |
@@ -271,13 +272,6 @@ struct ip_vs_sync_buff { | |||
271 | unsigned char *end; | 272 | unsigned char *end; |
272 | }; | 273 | }; |
273 | 274 | ||
274 | /* multicast addr */ | ||
275 | static struct sockaddr_in mcast_addr = { | ||
276 | .sin_family = AF_INET, | ||
277 | .sin_port = cpu_to_be16(IP_VS_SYNC_PORT), | ||
278 | .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), | ||
279 | }; | ||
280 | |||
281 | /* | 275 | /* |
282 | * Copy of struct ip_vs_seq | 276 | * Copy of struct ip_vs_seq |
283 | * From unaligned network order to aligned host order | 277 | * From unaligned network order to aligned host order |
@@ -300,18 +294,22 @@ static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no) | |||
300 | put_unaligned_be32(ho->previous_delta, &no->previous_delta); | 294 | put_unaligned_be32(ho->previous_delta, &no->previous_delta); |
301 | } | 295 | } |
302 | 296 | ||
303 | static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs) | 297 | static inline struct ip_vs_sync_buff * |
298 | sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms) | ||
304 | { | 299 | { |
305 | struct ip_vs_sync_buff *sb; | 300 | struct ip_vs_sync_buff *sb; |
306 | 301 | ||
307 | spin_lock_bh(&ipvs->sync_lock); | 302 | spin_lock_bh(&ipvs->sync_lock); |
308 | if (list_empty(&ipvs->sync_queue)) { | 303 | if (list_empty(&ms->sync_queue)) { |
309 | sb = NULL; | 304 | sb = NULL; |
305 | __set_current_state(TASK_INTERRUPTIBLE); | ||
310 | } else { | 306 | } else { |
311 | sb = list_entry(ipvs->sync_queue.next, | 307 | sb = list_entry(ms->sync_queue.next, struct ip_vs_sync_buff, |
312 | struct ip_vs_sync_buff, | ||
313 | list); | 308 | list); |
314 | list_del(&sb->list); | 309 | list_del(&sb->list); |
310 | ms->sync_queue_len--; | ||
311 | if (!ms->sync_queue_len) | ||
312 | ms->sync_queue_delay = 0; | ||
315 | } | 313 | } |
316 | spin_unlock_bh(&ipvs->sync_lock); | 314 | spin_unlock_bh(&ipvs->sync_lock); |
317 | 315 | ||
@@ -334,7 +332,7 @@ ip_vs_sync_buff_create(struct netns_ipvs *ipvs) | |||
334 | kfree(sb); | 332 | kfree(sb); |
335 | return NULL; | 333 | return NULL; |
336 | } | 334 | } |
337 | sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */ | 335 | sb->mesg->reserved = 0; /* old nr_conns i.e. must be zero now */ |
338 | sb->mesg->version = SYNC_PROTO_VER; | 336 | sb->mesg->version = SYNC_PROTO_VER; |
339 | sb->mesg->syncid = ipvs->master_syncid; | 337 | sb->mesg->syncid = ipvs->master_syncid; |
340 | sb->mesg->size = sizeof(struct ip_vs_sync_mesg); | 338 | sb->mesg->size = sizeof(struct ip_vs_sync_mesg); |
@@ -353,14 +351,22 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb) | |||
353 | kfree(sb); | 351 | kfree(sb); |
354 | } | 352 | } |
355 | 353 | ||
356 | static inline void sb_queue_tail(struct netns_ipvs *ipvs) | 354 | static inline void sb_queue_tail(struct netns_ipvs *ipvs, |
355 | struct ipvs_master_sync_state *ms) | ||
357 | { | 356 | { |
358 | struct ip_vs_sync_buff *sb = ipvs->sync_buff; | 357 | struct ip_vs_sync_buff *sb = ms->sync_buff; |
359 | 358 | ||
360 | spin_lock(&ipvs->sync_lock); | 359 | spin_lock(&ipvs->sync_lock); |
361 | if (ipvs->sync_state & IP_VS_STATE_MASTER) | 360 | if (ipvs->sync_state & IP_VS_STATE_MASTER && |
362 | list_add_tail(&sb->list, &ipvs->sync_queue); | 361 | ms->sync_queue_len < sysctl_sync_qlen_max(ipvs)) { |
363 | else | 362 | if (!ms->sync_queue_len) |
363 | schedule_delayed_work(&ms->master_wakeup_work, | ||
364 | max(IPVS_SYNC_SEND_DELAY, 1)); | ||
365 | ms->sync_queue_len++; | ||
366 | list_add_tail(&sb->list, &ms->sync_queue); | ||
367 | if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE) | ||
368 | wake_up_process(ms->master_thread); | ||
369 | } else | ||
364 | ip_vs_sync_buff_release(sb); | 370 | ip_vs_sync_buff_release(sb); |
365 | spin_unlock(&ipvs->sync_lock); | 371 | spin_unlock(&ipvs->sync_lock); |
366 | } | 372 | } |
@@ -370,49 +376,26 @@ static inline void sb_queue_tail(struct netns_ipvs *ipvs) | |||
370 | * than the specified time or the specified time is zero. | 376 | * than the specified time or the specified time is zero. |
371 | */ | 377 | */ |
372 | static inline struct ip_vs_sync_buff * | 378 | static inline struct ip_vs_sync_buff * |
373 | get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time) | 379 | get_curr_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms, |
380 | unsigned long time) | ||
374 | { | 381 | { |
375 | struct ip_vs_sync_buff *sb; | 382 | struct ip_vs_sync_buff *sb; |
376 | 383 | ||
377 | spin_lock_bh(&ipvs->sync_buff_lock); | 384 | spin_lock_bh(&ipvs->sync_buff_lock); |
378 | if (ipvs->sync_buff && | 385 | sb = ms->sync_buff; |
379 | time_after_eq(jiffies - ipvs->sync_buff->firstuse, time)) { | 386 | if (sb && time_after_eq(jiffies - sb->firstuse, time)) { |
380 | sb = ipvs->sync_buff; | 387 | ms->sync_buff = NULL; |
381 | ipvs->sync_buff = NULL; | 388 | __set_current_state(TASK_RUNNING); |
382 | } else | 389 | } else |
383 | sb = NULL; | 390 | sb = NULL; |
384 | spin_unlock_bh(&ipvs->sync_buff_lock); | 391 | spin_unlock_bh(&ipvs->sync_buff_lock); |
385 | return sb; | 392 | return sb; |
386 | } | 393 | } |
387 | 394 | ||
388 | /* | 395 | static inline int |
389 | * Switch mode from sending version 0 or 1 | 396 | select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp) |
390 | * - must handle sync_buf | ||
391 | */ | ||
392 | void ip_vs_sync_switch_mode(struct net *net, int mode) | ||
393 | { | 397 | { |
394 | struct netns_ipvs *ipvs = net_ipvs(net); | 398 | return ((long) cp >> (1 + ilog2(sizeof(*cp)))) & ipvs->threads_mask; |
395 | |||
396 | if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) | ||
397 | return; | ||
398 | if (mode == sysctl_sync_ver(ipvs) || !ipvs->sync_buff) | ||
399 | return; | ||
400 | |||
401 | spin_lock_bh(&ipvs->sync_buff_lock); | ||
402 | /* Buffer empty ? then let buf_create do the job */ | ||
403 | if (ipvs->sync_buff->mesg->size <= sizeof(struct ip_vs_sync_mesg)) { | ||
404 | kfree(ipvs->sync_buff); | ||
405 | ipvs->sync_buff = NULL; | ||
406 | } else { | ||
407 | spin_lock_bh(&ipvs->sync_lock); | ||
408 | if (ipvs->sync_state & IP_VS_STATE_MASTER) | ||
409 | list_add_tail(&ipvs->sync_buff->list, | ||
410 | &ipvs->sync_queue); | ||
411 | else | ||
412 | ip_vs_sync_buff_release(ipvs->sync_buff); | ||
413 | spin_unlock_bh(&ipvs->sync_lock); | ||
414 | } | ||
415 | spin_unlock_bh(&ipvs->sync_buff_lock); | ||
416 | } | 399 | } |
417 | 400 | ||
418 | /* | 401 | /* |
@@ -442,15 +425,101 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs) | |||
442 | return sb; | 425 | return sb; |
443 | } | 426 | } |
444 | 427 | ||
428 | /* Check if conn should be synced. | ||
429 | * pkts: conn packets, use sysctl_sync_threshold to avoid packet check | ||
430 | * - (1) sync_refresh_period: reduce sync rate. Additionally, retry | ||
431 | * sync_retries times with period of sync_refresh_period/8 | ||
432 | * - (2) if both sync_refresh_period and sync_period are 0 send sync only | ||
433 | * for state changes or only once when pkts matches sync_threshold | ||
434 | * - (3) templates: rate can be reduced only with sync_refresh_period or | ||
435 | * with (2) | ||
436 | */ | ||
437 | static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs, | ||
438 | struct ip_vs_conn *cp, int pkts) | ||
439 | { | ||
440 | unsigned long orig = ACCESS_ONCE(cp->sync_endtime); | ||
441 | unsigned long now = jiffies; | ||
442 | unsigned long n = (now + cp->timeout) & ~3UL; | ||
443 | unsigned int sync_refresh_period; | ||
444 | int sync_period; | ||
445 | int force; | ||
446 | |||
447 | /* Check if we sync in current state */ | ||
448 | if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE)) | ||
449 | force = 0; | ||
450 | else if (likely(cp->protocol == IPPROTO_TCP)) { | ||
451 | if (!((1 << cp->state) & | ||
452 | ((1 << IP_VS_TCP_S_ESTABLISHED) | | ||
453 | (1 << IP_VS_TCP_S_FIN_WAIT) | | ||
454 | (1 << IP_VS_TCP_S_CLOSE) | | ||
455 | (1 << IP_VS_TCP_S_CLOSE_WAIT) | | ||
456 | (1 << IP_VS_TCP_S_TIME_WAIT)))) | ||
457 | return 0; | ||
458 | force = cp->state != cp->old_state; | ||
459 | if (force && cp->state != IP_VS_TCP_S_ESTABLISHED) | ||
460 | goto set; | ||
461 | } else if (unlikely(cp->protocol == IPPROTO_SCTP)) { | ||
462 | if (!((1 << cp->state) & | ||
463 | ((1 << IP_VS_SCTP_S_ESTABLISHED) | | ||
464 | (1 << IP_VS_SCTP_S_CLOSED) | | ||
465 | (1 << IP_VS_SCTP_S_SHUT_ACK_CLI) | | ||
466 | (1 << IP_VS_SCTP_S_SHUT_ACK_SER)))) | ||
467 | return 0; | ||
468 | force = cp->state != cp->old_state; | ||
469 | if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED) | ||
470 | goto set; | ||
471 | } else { | ||
472 | /* UDP or another protocol with single state */ | ||
473 | force = 0; | ||
474 | } | ||
475 | |||
476 | sync_refresh_period = sysctl_sync_refresh_period(ipvs); | ||
477 | if (sync_refresh_period > 0) { | ||
478 | long diff = n - orig; | ||
479 | long min_diff = max(cp->timeout >> 1, 10UL * HZ); | ||
480 | |||
481 | /* Avoid sync if difference is below sync_refresh_period | ||
482 | * and below the half timeout. | ||
483 | */ | ||
484 | if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) { | ||
485 | int retries = orig & 3; | ||
486 | |||
487 | if (retries >= sysctl_sync_retries(ipvs)) | ||
488 | return 0; | ||
489 | if (time_before(now, orig - cp->timeout + | ||
490 | (sync_refresh_period >> 3))) | ||
491 | return 0; | ||
492 | n |= retries + 1; | ||
493 | } | ||
494 | } | ||
495 | sync_period = sysctl_sync_period(ipvs); | ||
496 | if (sync_period > 0) { | ||
497 | if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) && | ||
498 | pkts % sync_period != sysctl_sync_threshold(ipvs)) | ||
499 | return 0; | ||
500 | } else if (sync_refresh_period <= 0 && | ||
501 | pkts != sysctl_sync_threshold(ipvs)) | ||
502 | return 0; | ||
503 | |||
504 | set: | ||
505 | cp->old_state = cp->state; | ||
506 | n = cmpxchg(&cp->sync_endtime, orig, n); | ||
507 | return n == orig || force; | ||
508 | } | ||
509 | |||
445 | /* | 510 | /* |
446 | * Version 0 , could be switched in by sys_ctl. | 511 | * Version 0 , could be switched in by sys_ctl. |
447 | * Add an ip_vs_conn information into the current sync_buff. | 512 | * Add an ip_vs_conn information into the current sync_buff. |
448 | */ | 513 | */ |
449 | void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp) | 514 | static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, |
515 | int pkts) | ||
450 | { | 516 | { |
451 | struct netns_ipvs *ipvs = net_ipvs(net); | 517 | struct netns_ipvs *ipvs = net_ipvs(net); |
452 | struct ip_vs_sync_mesg_v0 *m; | 518 | struct ip_vs_sync_mesg_v0 *m; |
453 | struct ip_vs_sync_conn_v0 *s; | 519 | struct ip_vs_sync_conn_v0 *s; |
520 | struct ip_vs_sync_buff *buff; | ||
521 | struct ipvs_master_sync_state *ms; | ||
522 | int id; | ||
454 | int len; | 523 | int len; |
455 | 524 | ||
456 | if (unlikely(cp->af != AF_INET)) | 525 | if (unlikely(cp->af != AF_INET)) |
@@ -459,21 +528,41 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp) | |||
459 | if (cp->flags & IP_VS_CONN_F_ONE_PACKET) | 528 | if (cp->flags & IP_VS_CONN_F_ONE_PACKET) |
460 | return; | 529 | return; |
461 | 530 | ||
531 | if (!ip_vs_sync_conn_needed(ipvs, cp, pkts)) | ||
532 | return; | ||
533 | |||
462 | spin_lock(&ipvs->sync_buff_lock); | 534 | spin_lock(&ipvs->sync_buff_lock); |
463 | if (!ipvs->sync_buff) { | 535 | if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { |
464 | ipvs->sync_buff = | 536 | spin_unlock(&ipvs->sync_buff_lock); |
465 | ip_vs_sync_buff_create_v0(ipvs); | 537 | return; |
466 | if (!ipvs->sync_buff) { | 538 | } |
539 | |||
540 | id = select_master_thread_id(ipvs, cp); | ||
541 | ms = &ipvs->ms[id]; | ||
542 | buff = ms->sync_buff; | ||
543 | if (buff) { | ||
544 | m = (struct ip_vs_sync_mesg_v0 *) buff->mesg; | ||
545 | /* Send buffer if it is for v1 */ | ||
546 | if (!m->nr_conns) { | ||
547 | sb_queue_tail(ipvs, ms); | ||
548 | ms->sync_buff = NULL; | ||
549 | buff = NULL; | ||
550 | } | ||
551 | } | ||
552 | if (!buff) { | ||
553 | buff = ip_vs_sync_buff_create_v0(ipvs); | ||
554 | if (!buff) { | ||
467 | spin_unlock(&ipvs->sync_buff_lock); | 555 | spin_unlock(&ipvs->sync_buff_lock); |
468 | pr_err("ip_vs_sync_buff_create failed.\n"); | 556 | pr_err("ip_vs_sync_buff_create failed.\n"); |
469 | return; | 557 | return; |
470 | } | 558 | } |
559 | ms->sync_buff = buff; | ||
471 | } | 560 | } |
472 | 561 | ||
473 | len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : | 562 | len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : |
474 | SIMPLE_CONN_SIZE; | 563 | SIMPLE_CONN_SIZE; |
475 | m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg; | 564 | m = (struct ip_vs_sync_mesg_v0 *) buff->mesg; |
476 | s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head; | 565 | s = (struct ip_vs_sync_conn_v0 *) buff->head; |
477 | 566 | ||
478 | /* copy members */ | 567 | /* copy members */ |
479 | s->reserved = 0; | 568 | s->reserved = 0; |
@@ -494,18 +583,24 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp) | |||
494 | 583 | ||
495 | m->nr_conns++; | 584 | m->nr_conns++; |
496 | m->size += len; | 585 | m->size += len; |
497 | ipvs->sync_buff->head += len; | 586 | buff->head += len; |
498 | 587 | ||
499 | /* check if there is a space for next one */ | 588 | /* check if there is a space for next one */ |
500 | if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) { | 589 | if (buff->head + FULL_CONN_SIZE > buff->end) { |
501 | sb_queue_tail(ipvs); | 590 | sb_queue_tail(ipvs, ms); |
502 | ipvs->sync_buff = NULL; | 591 | ms->sync_buff = NULL; |
503 | } | 592 | } |
504 | spin_unlock(&ipvs->sync_buff_lock); | 593 | spin_unlock(&ipvs->sync_buff_lock); |
505 | 594 | ||
506 | /* synchronize its controller if it has */ | 595 | /* synchronize its controller if it has */ |
507 | if (cp->control) | 596 | cp = cp->control; |
508 | ip_vs_sync_conn(net, cp->control); | 597 | if (cp) { |
598 | if (cp->flags & IP_VS_CONN_F_TEMPLATE) | ||
599 | pkts = atomic_add_return(1, &cp->in_pkts); | ||
600 | else | ||
601 | pkts = sysctl_sync_threshold(ipvs); | ||
602 | ip_vs_sync_conn(net, cp->control, pkts); | ||
603 | } | ||
509 | } | 604 | } |
510 | 605 | ||
511 | /* | 606 | /* |
@@ -513,23 +608,29 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp) | |||
513 | * Called by ip_vs_in. | 608 | * Called by ip_vs_in. |
514 | * Sending Version 1 messages | 609 | * Sending Version 1 messages |
515 | */ | 610 | */ |
516 | void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp) | 611 | void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts) |
517 | { | 612 | { |
518 | struct netns_ipvs *ipvs = net_ipvs(net); | 613 | struct netns_ipvs *ipvs = net_ipvs(net); |
519 | struct ip_vs_sync_mesg *m; | 614 | struct ip_vs_sync_mesg *m; |
520 | union ip_vs_sync_conn *s; | 615 | union ip_vs_sync_conn *s; |
616 | struct ip_vs_sync_buff *buff; | ||
617 | struct ipvs_master_sync_state *ms; | ||
618 | int id; | ||
521 | __u8 *p; | 619 | __u8 *p; |
522 | unsigned int len, pe_name_len, pad; | 620 | unsigned int len, pe_name_len, pad; |
523 | 621 | ||
524 | /* Handle old version of the protocol */ | 622 | /* Handle old version of the protocol */ |
525 | if (sysctl_sync_ver(ipvs) == 0) { | 623 | if (sysctl_sync_ver(ipvs) == 0) { |
526 | ip_vs_sync_conn_v0(net, cp); | 624 | ip_vs_sync_conn_v0(net, cp, pkts); |
527 | return; | 625 | return; |
528 | } | 626 | } |
529 | /* Do not sync ONE PACKET */ | 627 | /* Do not sync ONE PACKET */ |
530 | if (cp->flags & IP_VS_CONN_F_ONE_PACKET) | 628 | if (cp->flags & IP_VS_CONN_F_ONE_PACKET) |
531 | goto control; | 629 | goto control; |
532 | sloop: | 630 | sloop: |
631 | if (!ip_vs_sync_conn_needed(ipvs, cp, pkts)) | ||
632 | goto control; | ||
633 | |||
533 | /* Sanity checks */ | 634 | /* Sanity checks */ |
534 | pe_name_len = 0; | 635 | pe_name_len = 0; |
535 | if (cp->pe_data_len) { | 636 | if (cp->pe_data_len) { |
@@ -541,6 +642,13 @@ sloop: | |||
541 | } | 642 | } |
542 | 643 | ||
543 | spin_lock(&ipvs->sync_buff_lock); | 644 | spin_lock(&ipvs->sync_buff_lock); |
645 | if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { | ||
646 | spin_unlock(&ipvs->sync_buff_lock); | ||
647 | return; | ||
648 | } | ||
649 | |||
650 | id = select_master_thread_id(ipvs, cp); | ||
651 | ms = &ipvs->ms[id]; | ||
544 | 652 | ||
545 | #ifdef CONFIG_IP_VS_IPV6 | 653 | #ifdef CONFIG_IP_VS_IPV6 |
546 | if (cp->af == AF_INET6) | 654 | if (cp->af == AF_INET6) |
@@ -559,27 +667,32 @@ sloop: | |||
559 | 667 | ||
560 | /* check if there is a space for this one */ | 668 | /* check if there is a space for this one */ |
561 | pad = 0; | 669 | pad = 0; |
562 | if (ipvs->sync_buff) { | 670 | buff = ms->sync_buff; |
563 | pad = (4 - (size_t)ipvs->sync_buff->head) & 3; | 671 | if (buff) { |
564 | if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) { | 672 | m = buff->mesg; |
565 | sb_queue_tail(ipvs); | 673 | pad = (4 - (size_t) buff->head) & 3; |
566 | ipvs->sync_buff = NULL; | 674 | /* Send buffer if it is for v0 */ |
675 | if (buff->head + len + pad > buff->end || m->reserved) { | ||
676 | sb_queue_tail(ipvs, ms); | ||
677 | ms->sync_buff = NULL; | ||
678 | buff = NULL; | ||
567 | pad = 0; | 679 | pad = 0; |
568 | } | 680 | } |
569 | } | 681 | } |
570 | 682 | ||
571 | if (!ipvs->sync_buff) { | 683 | if (!buff) { |
572 | ipvs->sync_buff = ip_vs_sync_buff_create(ipvs); | 684 | buff = ip_vs_sync_buff_create(ipvs); |
573 | if (!ipvs->sync_buff) { | 685 | if (!buff) { |
574 | spin_unlock(&ipvs->sync_buff_lock); | 686 | spin_unlock(&ipvs->sync_buff_lock); |
575 | pr_err("ip_vs_sync_buff_create failed.\n"); | 687 | pr_err("ip_vs_sync_buff_create failed.\n"); |
576 | return; | 688 | return; |
577 | } | 689 | } |
690 | ms->sync_buff = buff; | ||
691 | m = buff->mesg; | ||
578 | } | 692 | } |
579 | 693 | ||
580 | m = ipvs->sync_buff->mesg; | 694 | p = buff->head; |
581 | p = ipvs->sync_buff->head; | 695 | buff->head += pad + len; |
582 | ipvs->sync_buff->head += pad + len; | ||
583 | m->size += pad + len; | 696 | m->size += pad + len; |
584 | /* Add ev. padding from prev. sync_conn */ | 697 | /* Add ev. padding from prev. sync_conn */ |
585 | while (pad--) | 698 | while (pad--) |
@@ -644,16 +757,10 @@ control: | |||
644 | cp = cp->control; | 757 | cp = cp->control; |
645 | if (!cp) | 758 | if (!cp) |
646 | return; | 759 | return; |
647 | /* | 760 | if (cp->flags & IP_VS_CONN_F_TEMPLATE) |
648 | * Reduce sync rate for templates | 761 | pkts = atomic_add_return(1, &cp->in_pkts); |
649 | * i.e only increment in_pkts for Templates. | 762 | else |
650 | */ | 763 | pkts = sysctl_sync_threshold(ipvs); |
651 | if (cp->flags & IP_VS_CONN_F_TEMPLATE) { | ||
652 | int pkts = atomic_add_return(1, &cp->in_pkts); | ||
653 | |||
654 | if (pkts % sysctl_sync_period(ipvs) != 1) | ||
655 | return; | ||
656 | } | ||
657 | goto sloop; | 764 | goto sloop; |
658 | } | 765 | } |
659 | 766 | ||
@@ -731,9 +838,32 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, | |||
731 | else | 838 | else |
732 | cp = ip_vs_ct_in_get(param); | 839 | cp = ip_vs_ct_in_get(param); |
733 | 840 | ||
734 | if (cp && param->pe_data) /* Free pe_data */ | 841 | if (cp) { |
842 | /* Free pe_data */ | ||
735 | kfree(param->pe_data); | 843 | kfree(param->pe_data); |
736 | if (!cp) { | 844 | |
845 | dest = cp->dest; | ||
846 | spin_lock(&cp->lock); | ||
847 | if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE && | ||
848 | !(flags & IP_VS_CONN_F_TEMPLATE) && dest) { | ||
849 | if (flags & IP_VS_CONN_F_INACTIVE) { | ||
850 | atomic_dec(&dest->activeconns); | ||
851 | atomic_inc(&dest->inactconns); | ||
852 | } else { | ||
853 | atomic_inc(&dest->activeconns); | ||
854 | atomic_dec(&dest->inactconns); | ||
855 | } | ||
856 | } | ||
857 | flags &= IP_VS_CONN_F_BACKUP_UPD_MASK; | ||
858 | flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK; | ||
859 | cp->flags = flags; | ||
860 | spin_unlock(&cp->lock); | ||
861 | if (!dest) { | ||
862 | dest = ip_vs_try_bind_dest(cp); | ||
863 | if (dest) | ||
864 | atomic_dec(&dest->refcnt); | ||
865 | } | ||
866 | } else { | ||
737 | /* | 867 | /* |
738 | * Find the appropriate destination for the connection. | 868 | * Find the appropriate destination for the connection. |
739 | * If it is not found the connection will remain unbound | 869 | * If it is not found the connection will remain unbound |
@@ -742,18 +872,6 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, | |||
742 | dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr, | 872 | dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr, |
743 | param->vport, protocol, fwmark, flags); | 873 | param->vport, protocol, fwmark, flags); |
744 | 874 | ||
745 | /* Set the approprite ativity flag */ | ||
746 | if (protocol == IPPROTO_TCP) { | ||
747 | if (state != IP_VS_TCP_S_ESTABLISHED) | ||
748 | flags |= IP_VS_CONN_F_INACTIVE; | ||
749 | else | ||
750 | flags &= ~IP_VS_CONN_F_INACTIVE; | ||
751 | } else if (protocol == IPPROTO_SCTP) { | ||
752 | if (state != IP_VS_SCTP_S_ESTABLISHED) | ||
753 | flags |= IP_VS_CONN_F_INACTIVE; | ||
754 | else | ||
755 | flags &= ~IP_VS_CONN_F_INACTIVE; | ||
756 | } | ||
757 | cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); | 875 | cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); |
758 | if (dest) | 876 | if (dest) |
759 | atomic_dec(&dest->refcnt); | 877 | atomic_dec(&dest->refcnt); |
@@ -763,34 +881,6 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, | |||
763 | IP_VS_DBG(2, "BACKUP, add new conn. failed\n"); | 881 | IP_VS_DBG(2, "BACKUP, add new conn. failed\n"); |
764 | return; | 882 | return; |
765 | } | 883 | } |
766 | } else if (!cp->dest) { | ||
767 | dest = ip_vs_try_bind_dest(cp); | ||
768 | if (dest) | ||
769 | atomic_dec(&dest->refcnt); | ||
770 | } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) && | ||
771 | (cp->state != state)) { | ||
772 | /* update active/inactive flag for the connection */ | ||
773 | dest = cp->dest; | ||
774 | if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && | ||
775 | (state != IP_VS_TCP_S_ESTABLISHED)) { | ||
776 | atomic_dec(&dest->activeconns); | ||
777 | atomic_inc(&dest->inactconns); | ||
778 | cp->flags |= IP_VS_CONN_F_INACTIVE; | ||
779 | } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) && | ||
780 | (state == IP_VS_TCP_S_ESTABLISHED)) { | ||
781 | atomic_inc(&dest->activeconns); | ||
782 | atomic_dec(&dest->inactconns); | ||
783 | cp->flags &= ~IP_VS_CONN_F_INACTIVE; | ||
784 | } | ||
785 | } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) && | ||
786 | (cp->state != state)) { | ||
787 | dest = cp->dest; | ||
788 | if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && | ||
789 | (state != IP_VS_SCTP_S_ESTABLISHED)) { | ||
790 | atomic_dec(&dest->activeconns); | ||
791 | atomic_inc(&dest->inactconns); | ||
792 | cp->flags &= ~IP_VS_CONN_F_INACTIVE; | ||
793 | } | ||
794 | } | 884 | } |
795 | 885 | ||
796 | if (opt) | 886 | if (opt) |
@@ -1149,6 +1239,28 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer, | |||
1149 | 1239 | ||
1150 | 1240 | ||
1151 | /* | 1241 | /* |
1242 | * Setup sndbuf (mode=1) or rcvbuf (mode=0) | ||
1243 | */ | ||
1244 | static void set_sock_size(struct sock *sk, int mode, int val) | ||
1245 | { | ||
1246 | /* setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val)); */ | ||
1247 | /* setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)); */ | ||
1248 | lock_sock(sk); | ||
1249 | if (mode) { | ||
1250 | val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2, | ||
1251 | sysctl_wmem_max); | ||
1252 | sk->sk_sndbuf = val * 2; | ||
1253 | sk->sk_userlocks |= SOCK_SNDBUF_LOCK; | ||
1254 | } else { | ||
1255 | val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2, | ||
1256 | sysctl_rmem_max); | ||
1257 | sk->sk_rcvbuf = val * 2; | ||
1258 | sk->sk_userlocks |= SOCK_RCVBUF_LOCK; | ||
1259 | } | ||
1260 | release_sock(sk); | ||
1261 | } | ||
1262 | |||
1263 | /* | ||
1152 | * Setup loopback of outgoing multicasts on a sending socket | 1264 | * Setup loopback of outgoing multicasts on a sending socket |
1153 | */ | 1265 | */ |
1154 | static void set_mcast_loop(struct sock *sk, u_char loop) | 1266 | static void set_mcast_loop(struct sock *sk, u_char loop) |
@@ -1298,9 +1410,15 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname) | |||
1298 | /* | 1410 | /* |
1299 | * Set up sending multicast socket over UDP | 1411 | * Set up sending multicast socket over UDP |
1300 | */ | 1412 | */ |
1301 | static struct socket *make_send_sock(struct net *net) | 1413 | static struct socket *make_send_sock(struct net *net, int id) |
1302 | { | 1414 | { |
1303 | struct netns_ipvs *ipvs = net_ipvs(net); | 1415 | struct netns_ipvs *ipvs = net_ipvs(net); |
1416 | /* multicast addr */ | ||
1417 | struct sockaddr_in mcast_addr = { | ||
1418 | .sin_family = AF_INET, | ||
1419 | .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id), | ||
1420 | .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), | ||
1421 | }; | ||
1304 | struct socket *sock; | 1422 | struct socket *sock; |
1305 | int result; | 1423 | int result; |
1306 | 1424 | ||
@@ -1324,6 +1442,9 @@ static struct socket *make_send_sock(struct net *net) | |||
1324 | 1442 | ||
1325 | set_mcast_loop(sock->sk, 0); | 1443 | set_mcast_loop(sock->sk, 0); |
1326 | set_mcast_ttl(sock->sk, 1); | 1444 | set_mcast_ttl(sock->sk, 1); |
1445 | result = sysctl_sync_sock_size(ipvs); | ||
1446 | if (result > 0) | ||
1447 | set_sock_size(sock->sk, 1, result); | ||
1327 | 1448 | ||
1328 | result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn); | 1449 | result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn); |
1329 | if (result < 0) { | 1450 | if (result < 0) { |
@@ -1349,9 +1470,15 @@ error: | |||
1349 | /* | 1470 | /* |
1350 | * Set up receiving multicast socket over UDP | 1471 | * Set up receiving multicast socket over UDP |
1351 | */ | 1472 | */ |
1352 | static struct socket *make_receive_sock(struct net *net) | 1473 | static struct socket *make_receive_sock(struct net *net, int id) |
1353 | { | 1474 | { |
1354 | struct netns_ipvs *ipvs = net_ipvs(net); | 1475 | struct netns_ipvs *ipvs = net_ipvs(net); |
1476 | /* multicast addr */ | ||
1477 | struct sockaddr_in mcast_addr = { | ||
1478 | .sin_family = AF_INET, | ||
1479 | .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id), | ||
1480 | .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), | ||
1481 | }; | ||
1355 | struct socket *sock; | 1482 | struct socket *sock; |
1356 | int result; | 1483 | int result; |
1357 | 1484 | ||
@@ -1369,6 +1496,9 @@ static struct socket *make_receive_sock(struct net *net) | |||
1369 | sk_change_net(sock->sk, net); | 1496 | sk_change_net(sock->sk, net); |
1370 | /* it is equivalent to the REUSEADDR option in user-space */ | 1497 | /* it is equivalent to the REUSEADDR option in user-space */ |
1371 | sock->sk->sk_reuse = SK_CAN_REUSE; | 1498 | sock->sk->sk_reuse = SK_CAN_REUSE; |
1499 | result = sysctl_sync_sock_size(ipvs); | ||
1500 | if (result > 0) | ||
1501 | set_sock_size(sock->sk, 0, result); | ||
1372 | 1502 | ||
1373 | result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr, | 1503 | result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr, |
1374 | sizeof(struct sockaddr)); | 1504 | sizeof(struct sockaddr)); |
@@ -1411,18 +1541,22 @@ ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length) | |||
1411 | return len; | 1541 | return len; |
1412 | } | 1542 | } |
1413 | 1543 | ||
1414 | static void | 1544 | static int |
1415 | ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg) | 1545 | ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg) |
1416 | { | 1546 | { |
1417 | int msize; | 1547 | int msize; |
1548 | int ret; | ||
1418 | 1549 | ||
1419 | msize = msg->size; | 1550 | msize = msg->size; |
1420 | 1551 | ||
1421 | /* Put size in network byte order */ | 1552 | /* Put size in network byte order */ |
1422 | msg->size = htons(msg->size); | 1553 | msg->size = htons(msg->size); |
1423 | 1554 | ||
1424 | if (ip_vs_send_async(sock, (char *)msg, msize) != msize) | 1555 | ret = ip_vs_send_async(sock, (char *)msg, msize); |
1425 | pr_err("ip_vs_send_async error\n"); | 1556 | if (ret >= 0 || ret == -EAGAIN) |
1557 | return ret; | ||
1558 | pr_err("ip_vs_send_async error %d\n", ret); | ||
1559 | return 0; | ||
1426 | } | 1560 | } |
1427 | 1561 | ||
1428 | static int | 1562 | static int |
@@ -1438,48 +1572,90 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) | |||
1438 | iov.iov_base = buffer; | 1572 | iov.iov_base = buffer; |
1439 | iov.iov_len = (size_t)buflen; | 1573 | iov.iov_len = (size_t)buflen; |
1440 | 1574 | ||
1441 | len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, 0); | 1575 | len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, MSG_DONTWAIT); |
1442 | 1576 | ||
1443 | if (len < 0) | 1577 | if (len < 0) |
1444 | return -1; | 1578 | return len; |
1445 | 1579 | ||
1446 | LeaveFunction(7); | 1580 | LeaveFunction(7); |
1447 | return len; | 1581 | return len; |
1448 | } | 1582 | } |
1449 | 1583 | ||
1584 | /* Wakeup the master thread for sending */ | ||
1585 | static void master_wakeup_work_handler(struct work_struct *work) | ||
1586 | { | ||
1587 | struct ipvs_master_sync_state *ms = | ||
1588 | container_of(work, struct ipvs_master_sync_state, | ||
1589 | master_wakeup_work.work); | ||
1590 | struct netns_ipvs *ipvs = ms->ipvs; | ||
1591 | |||
1592 | spin_lock_bh(&ipvs->sync_lock); | ||
1593 | if (ms->sync_queue_len && | ||
1594 | ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) { | ||
1595 | ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE; | ||
1596 | wake_up_process(ms->master_thread); | ||
1597 | } | ||
1598 | spin_unlock_bh(&ipvs->sync_lock); | ||
1599 | } | ||
1600 | |||
1601 | /* Get next buffer to send */ | ||
1602 | static inline struct ip_vs_sync_buff * | ||
1603 | next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms) | ||
1604 | { | ||
1605 | struct ip_vs_sync_buff *sb; | ||
1606 | |||
1607 | sb = sb_dequeue(ipvs, ms); | ||
1608 | if (sb) | ||
1609 | return sb; | ||
1610 | /* Do not delay entries in buffer for more than 2 seconds */ | ||
1611 | return get_curr_sync_buff(ipvs, ms, IPVS_SYNC_FLUSH_TIME); | ||
1612 | } | ||
1450 | 1613 | ||
1451 | static int sync_thread_master(void *data) | 1614 | static int sync_thread_master(void *data) |
1452 | { | 1615 | { |
1453 | struct ip_vs_sync_thread_data *tinfo = data; | 1616 | struct ip_vs_sync_thread_data *tinfo = data; |
1454 | struct netns_ipvs *ipvs = net_ipvs(tinfo->net); | 1617 | struct netns_ipvs *ipvs = net_ipvs(tinfo->net); |
1618 | struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id]; | ||
1619 | struct sock *sk = tinfo->sock->sk; | ||
1455 | struct ip_vs_sync_buff *sb; | 1620 | struct ip_vs_sync_buff *sb; |
1456 | 1621 | ||
1457 | pr_info("sync thread started: state = MASTER, mcast_ifn = %s, " | 1622 | pr_info("sync thread started: state = MASTER, mcast_ifn = %s, " |
1458 | "syncid = %d\n", | 1623 | "syncid = %d, id = %d\n", |
1459 | ipvs->master_mcast_ifn, ipvs->master_syncid); | 1624 | ipvs->master_mcast_ifn, ipvs->master_syncid, tinfo->id); |
1460 | 1625 | ||
1461 | while (!kthread_should_stop()) { | 1626 | for (;;) { |
1462 | while ((sb = sb_dequeue(ipvs))) { | 1627 | sb = next_sync_buff(ipvs, ms); |
1463 | ip_vs_send_sync_msg(tinfo->sock, sb->mesg); | 1628 | if (unlikely(kthread_should_stop())) |
1464 | ip_vs_sync_buff_release(sb); | 1629 | break; |
1630 | if (!sb) { | ||
1631 | schedule_timeout(IPVS_SYNC_CHECK_PERIOD); | ||
1632 | continue; | ||
1465 | } | 1633 | } |
1466 | 1634 | while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) { | |
1467 | /* check if entries stay in ipvs->sync_buff for 2 seconds */ | 1635 | int ret = 0; |
1468 | sb = get_curr_sync_buff(ipvs, 2 * HZ); | 1636 | |
1469 | if (sb) { | 1637 | __wait_event_interruptible(*sk_sleep(sk), |
1470 | ip_vs_send_sync_msg(tinfo->sock, sb->mesg); | 1638 | sock_writeable(sk) || |
1471 | ip_vs_sync_buff_release(sb); | 1639 | kthread_should_stop(), |
1640 | ret); | ||
1641 | if (unlikely(kthread_should_stop())) | ||
1642 | goto done; | ||
1472 | } | 1643 | } |
1473 | 1644 | ip_vs_sync_buff_release(sb); | |
1474 | schedule_timeout_interruptible(HZ); | ||
1475 | } | 1645 | } |
1476 | 1646 | ||
1647 | done: | ||
1648 | __set_current_state(TASK_RUNNING); | ||
1649 | if (sb) | ||
1650 | ip_vs_sync_buff_release(sb); | ||
1651 | |||
1477 | /* clean up the sync_buff queue */ | 1652 | /* clean up the sync_buff queue */ |
1478 | while ((sb = sb_dequeue(ipvs))) | 1653 | while ((sb = sb_dequeue(ipvs, ms))) |
1479 | ip_vs_sync_buff_release(sb); | 1654 | ip_vs_sync_buff_release(sb); |
1655 | __set_current_state(TASK_RUNNING); | ||
1480 | 1656 | ||
1481 | /* clean up the current sync_buff */ | 1657 | /* clean up the current sync_buff */ |
1482 | sb = get_curr_sync_buff(ipvs, 0); | 1658 | sb = get_curr_sync_buff(ipvs, ms, 0); |
1483 | if (sb) | 1659 | if (sb) |
1484 | ip_vs_sync_buff_release(sb); | 1660 | ip_vs_sync_buff_release(sb); |
1485 | 1661 | ||
@@ -1498,8 +1674,8 @@ static int sync_thread_backup(void *data) | |||
1498 | int len; | 1674 | int len; |
1499 | 1675 | ||
1500 | pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " | 1676 | pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " |
1501 | "syncid = %d\n", | 1677 | "syncid = %d, id = %d\n", |
1502 | ipvs->backup_mcast_ifn, ipvs->backup_syncid); | 1678 | ipvs->backup_mcast_ifn, ipvs->backup_syncid, tinfo->id); |
1503 | 1679 | ||
1504 | while (!kthread_should_stop()) { | 1680 | while (!kthread_should_stop()) { |
1505 | wait_event_interruptible(*sk_sleep(tinfo->sock->sk), | 1681 | wait_event_interruptible(*sk_sleep(tinfo->sock->sk), |
@@ -1511,7 +1687,8 @@ static int sync_thread_backup(void *data) | |||
1511 | len = ip_vs_receive(tinfo->sock, tinfo->buf, | 1687 | len = ip_vs_receive(tinfo->sock, tinfo->buf, |
1512 | ipvs->recv_mesg_maxlen); | 1688 | ipvs->recv_mesg_maxlen); |
1513 | if (len <= 0) { | 1689 | if (len <= 0) { |
1514 | pr_err("receiving message error\n"); | 1690 | if (len != -EAGAIN) |
1691 | pr_err("receiving message error\n"); | ||
1515 | break; | 1692 | break; |
1516 | } | 1693 | } |
1517 | 1694 | ||
@@ -1535,86 +1712,140 @@ static int sync_thread_backup(void *data) | |||
1535 | int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) | 1712 | int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) |
1536 | { | 1713 | { |
1537 | struct ip_vs_sync_thread_data *tinfo; | 1714 | struct ip_vs_sync_thread_data *tinfo; |
1538 | struct task_struct **realtask, *task; | 1715 | struct task_struct **array = NULL, *task; |
1539 | struct socket *sock; | 1716 | struct socket *sock; |
1540 | struct netns_ipvs *ipvs = net_ipvs(net); | 1717 | struct netns_ipvs *ipvs = net_ipvs(net); |
1541 | char *name, *buf = NULL; | 1718 | char *name; |
1542 | int (*threadfn)(void *data); | 1719 | int (*threadfn)(void *data); |
1720 | int id, count; | ||
1543 | int result = -ENOMEM; | 1721 | int result = -ENOMEM; |
1544 | 1722 | ||
1545 | IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); | 1723 | IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); |
1546 | IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", | 1724 | IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", |
1547 | sizeof(struct ip_vs_sync_conn_v0)); | 1725 | sizeof(struct ip_vs_sync_conn_v0)); |
1548 | 1726 | ||
1727 | if (!ipvs->sync_state) { | ||
1728 | count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX); | ||
1729 | ipvs->threads_mask = count - 1; | ||
1730 | } else | ||
1731 | count = ipvs->threads_mask + 1; | ||
1549 | 1732 | ||
1550 | if (state == IP_VS_STATE_MASTER) { | 1733 | if (state == IP_VS_STATE_MASTER) { |
1551 | if (ipvs->master_thread) | 1734 | if (ipvs->ms) |
1552 | return -EEXIST; | 1735 | return -EEXIST; |
1553 | 1736 | ||
1554 | strlcpy(ipvs->master_mcast_ifn, mcast_ifn, | 1737 | strlcpy(ipvs->master_mcast_ifn, mcast_ifn, |
1555 | sizeof(ipvs->master_mcast_ifn)); | 1738 | sizeof(ipvs->master_mcast_ifn)); |
1556 | ipvs->master_syncid = syncid; | 1739 | ipvs->master_syncid = syncid; |
1557 | realtask = &ipvs->master_thread; | 1740 | name = "ipvs-m:%d:%d"; |
1558 | name = "ipvs_master:%d"; | ||
1559 | threadfn = sync_thread_master; | 1741 | threadfn = sync_thread_master; |
1560 | sock = make_send_sock(net); | ||
1561 | } else if (state == IP_VS_STATE_BACKUP) { | 1742 | } else if (state == IP_VS_STATE_BACKUP) { |
1562 | if (ipvs->backup_thread) | 1743 | if (ipvs->backup_threads) |
1563 | return -EEXIST; | 1744 | return -EEXIST; |
1564 | 1745 | ||
1565 | strlcpy(ipvs->backup_mcast_ifn, mcast_ifn, | 1746 | strlcpy(ipvs->backup_mcast_ifn, mcast_ifn, |
1566 | sizeof(ipvs->backup_mcast_ifn)); | 1747 | sizeof(ipvs->backup_mcast_ifn)); |
1567 | ipvs->backup_syncid = syncid; | 1748 | ipvs->backup_syncid = syncid; |
1568 | realtask = &ipvs->backup_thread; | 1749 | name = "ipvs-b:%d:%d"; |
1569 | name = "ipvs_backup:%d"; | ||
1570 | threadfn = sync_thread_backup; | 1750 | threadfn = sync_thread_backup; |
1571 | sock = make_receive_sock(net); | ||
1572 | } else { | 1751 | } else { |
1573 | return -EINVAL; | 1752 | return -EINVAL; |
1574 | } | 1753 | } |
1575 | 1754 | ||
1576 | if (IS_ERR(sock)) { | 1755 | if (state == IP_VS_STATE_MASTER) { |
1577 | result = PTR_ERR(sock); | 1756 | struct ipvs_master_sync_state *ms; |
1578 | goto out; | ||
1579 | } | ||
1580 | 1757 | ||
1581 | set_sync_mesg_maxlen(net, state); | 1758 | ipvs->ms = kzalloc(count * sizeof(ipvs->ms[0]), GFP_KERNEL); |
1582 | if (state == IP_VS_STATE_BACKUP) { | 1759 | if (!ipvs->ms) |
1583 | buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL); | 1760 | goto out; |
1584 | if (!buf) | 1761 | ms = ipvs->ms; |
1585 | goto outsocket; | 1762 | for (id = 0; id < count; id++, ms++) { |
1763 | INIT_LIST_HEAD(&ms->sync_queue); | ||
1764 | ms->sync_queue_len = 0; | ||
1765 | ms->sync_queue_delay = 0; | ||
1766 | INIT_DELAYED_WORK(&ms->master_wakeup_work, | ||
1767 | master_wakeup_work_handler); | ||
1768 | ms->ipvs = ipvs; | ||
1769 | } | ||
1770 | } else { | ||
1771 | array = kzalloc(count * sizeof(struct task_struct *), | ||
1772 | GFP_KERNEL); | ||
1773 | if (!array) | ||
1774 | goto out; | ||
1586 | } | 1775 | } |
1776 | set_sync_mesg_maxlen(net, state); | ||
1587 | 1777 | ||
1588 | tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); | 1778 | tinfo = NULL; |
1589 | if (!tinfo) | 1779 | for (id = 0; id < count; id++) { |
1590 | goto outbuf; | 1780 | if (state == IP_VS_STATE_MASTER) |
1591 | 1781 | sock = make_send_sock(net, id); | |
1592 | tinfo->net = net; | 1782 | else |
1593 | tinfo->sock = sock; | 1783 | sock = make_receive_sock(net, id); |
1594 | tinfo->buf = buf; | 1784 | if (IS_ERR(sock)) { |
1785 | result = PTR_ERR(sock); | ||
1786 | goto outtinfo; | ||
1787 | } | ||
1788 | tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); | ||
1789 | if (!tinfo) | ||
1790 | goto outsocket; | ||
1791 | tinfo->net = net; | ||
1792 | tinfo->sock = sock; | ||
1793 | if (state == IP_VS_STATE_BACKUP) { | ||
1794 | tinfo->buf = kmalloc(ipvs->recv_mesg_maxlen, | ||
1795 | GFP_KERNEL); | ||
1796 | if (!tinfo->buf) | ||
1797 | goto outtinfo; | ||
1798 | } | ||
1799 | tinfo->id = id; | ||
1595 | 1800 | ||
1596 | task = kthread_run(threadfn, tinfo, name, ipvs->gen); | 1801 | task = kthread_run(threadfn, tinfo, name, ipvs->gen, id); |
1597 | if (IS_ERR(task)) { | 1802 | if (IS_ERR(task)) { |
1598 | result = PTR_ERR(task); | 1803 | result = PTR_ERR(task); |
1599 | goto outtinfo; | 1804 | goto outtinfo; |
1805 | } | ||
1806 | tinfo = NULL; | ||
1807 | if (state == IP_VS_STATE_MASTER) | ||
1808 | ipvs->ms[id].master_thread = task; | ||
1809 | else | ||
1810 | array[id] = task; | ||
1600 | } | 1811 | } |
1601 | 1812 | ||
1602 | /* mark as active */ | 1813 | /* mark as active */ |
1603 | *realtask = task; | 1814 | |
1815 | if (state == IP_VS_STATE_BACKUP) | ||
1816 | ipvs->backup_threads = array; | ||
1817 | spin_lock_bh(&ipvs->sync_buff_lock); | ||
1604 | ipvs->sync_state |= state; | 1818 | ipvs->sync_state |= state; |
1819 | spin_unlock_bh(&ipvs->sync_buff_lock); | ||
1605 | 1820 | ||
1606 | /* increase the module use count */ | 1821 | /* increase the module use count */ |
1607 | ip_vs_use_count_inc(); | 1822 | ip_vs_use_count_inc(); |
1608 | 1823 | ||
1609 | return 0; | 1824 | return 0; |
1610 | 1825 | ||
1611 | outtinfo: | ||
1612 | kfree(tinfo); | ||
1613 | outbuf: | ||
1614 | kfree(buf); | ||
1615 | outsocket: | 1826 | outsocket: |
1616 | sk_release_kernel(sock->sk); | 1827 | sk_release_kernel(sock->sk); |
1828 | |||
1829 | outtinfo: | ||
1830 | if (tinfo) { | ||
1831 | sk_release_kernel(tinfo->sock->sk); | ||
1832 | kfree(tinfo->buf); | ||
1833 | kfree(tinfo); | ||
1834 | } | ||
1835 | count = id; | ||
1836 | while (count-- > 0) { | ||
1837 | if (state == IP_VS_STATE_MASTER) | ||
1838 | kthread_stop(ipvs->ms[count].master_thread); | ||
1839 | else | ||
1840 | kthread_stop(array[count]); | ||
1841 | } | ||
1842 | kfree(array); | ||
1843 | |||
1617 | out: | 1844 | out: |
1845 | if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { | ||
1846 | kfree(ipvs->ms); | ||
1847 | ipvs->ms = NULL; | ||
1848 | } | ||
1618 | return result; | 1849 | return result; |
1619 | } | 1850 | } |
1620 | 1851 | ||
@@ -1622,38 +1853,60 @@ out: | |||
1622 | int stop_sync_thread(struct net *net, int state) | 1853 | int stop_sync_thread(struct net *net, int state) |
1623 | { | 1854 | { |
1624 | struct netns_ipvs *ipvs = net_ipvs(net); | 1855 | struct netns_ipvs *ipvs = net_ipvs(net); |
1856 | struct task_struct **array; | ||
1857 | int id; | ||
1625 | int retc = -EINVAL; | 1858 | int retc = -EINVAL; |
1626 | 1859 | ||
1627 | IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); | 1860 | IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); |
1628 | 1861 | ||
1629 | if (state == IP_VS_STATE_MASTER) { | 1862 | if (state == IP_VS_STATE_MASTER) { |
1630 | if (!ipvs->master_thread) | 1863 | if (!ipvs->ms) |
1631 | return -ESRCH; | 1864 | return -ESRCH; |
1632 | 1865 | ||
1633 | pr_info("stopping master sync thread %d ...\n", | ||
1634 | task_pid_nr(ipvs->master_thread)); | ||
1635 | |||
1636 | /* | 1866 | /* |
1637 | * The lock synchronizes with sb_queue_tail(), so that we don't | 1867 | * The lock synchronizes with sb_queue_tail(), so that we don't |
1638 | * add sync buffers to the queue, when we are already in | 1868 | * add sync buffers to the queue, when we are already in |
1639 | * progress of stopping the master sync daemon. | 1869 | * progress of stopping the master sync daemon. |
1640 | */ | 1870 | */ |
1641 | 1871 | ||
1642 | spin_lock_bh(&ipvs->sync_lock); | 1872 | spin_lock_bh(&ipvs->sync_buff_lock); |
1873 | spin_lock(&ipvs->sync_lock); | ||
1643 | ipvs->sync_state &= ~IP_VS_STATE_MASTER; | 1874 | ipvs->sync_state &= ~IP_VS_STATE_MASTER; |
1644 | spin_unlock_bh(&ipvs->sync_lock); | 1875 | spin_unlock(&ipvs->sync_lock); |
1645 | retc = kthread_stop(ipvs->master_thread); | 1876 | spin_unlock_bh(&ipvs->sync_buff_lock); |
1646 | ipvs->master_thread = NULL; | 1877 | |
1878 | retc = 0; | ||
1879 | for (id = ipvs->threads_mask; id >= 0; id--) { | ||
1880 | struct ipvs_master_sync_state *ms = &ipvs->ms[id]; | ||
1881 | int ret; | ||
1882 | |||
1883 | pr_info("stopping master sync thread %d ...\n", | ||
1884 | task_pid_nr(ms->master_thread)); | ||
1885 | cancel_delayed_work_sync(&ms->master_wakeup_work); | ||
1886 | ret = kthread_stop(ms->master_thread); | ||
1887 | if (retc >= 0) | ||
1888 | retc = ret; | ||
1889 | } | ||
1890 | kfree(ipvs->ms); | ||
1891 | ipvs->ms = NULL; | ||
1647 | } else if (state == IP_VS_STATE_BACKUP) { | 1892 | } else if (state == IP_VS_STATE_BACKUP) { |
1648 | if (!ipvs->backup_thread) | 1893 | if (!ipvs->backup_threads) |
1649 | return -ESRCH; | 1894 | return -ESRCH; |
1650 | 1895 | ||
1651 | pr_info("stopping backup sync thread %d ...\n", | ||
1652 | task_pid_nr(ipvs->backup_thread)); | ||
1653 | |||
1654 | ipvs->sync_state &= ~IP_VS_STATE_BACKUP; | 1896 | ipvs->sync_state &= ~IP_VS_STATE_BACKUP; |
1655 | retc = kthread_stop(ipvs->backup_thread); | 1897 | array = ipvs->backup_threads; |
1656 | ipvs->backup_thread = NULL; | 1898 | retc = 0; |
1899 | for (id = ipvs->threads_mask; id >= 0; id--) { | ||
1900 | int ret; | ||
1901 | |||
1902 | pr_info("stopping backup sync thread %d ...\n", | ||
1903 | task_pid_nr(array[id])); | ||
1904 | ret = kthread_stop(array[id]); | ||
1905 | if (retc >= 0) | ||
1906 | retc = ret; | ||
1907 | } | ||
1908 | kfree(array); | ||
1909 | ipvs->backup_threads = NULL; | ||
1657 | } | 1910 | } |
1658 | 1911 | ||
1659 | /* decrease the module use count */ | 1912 | /* decrease the module use count */ |
@@ -1670,13 +1923,8 @@ int __net_init ip_vs_sync_net_init(struct net *net) | |||
1670 | struct netns_ipvs *ipvs = net_ipvs(net); | 1923 | struct netns_ipvs *ipvs = net_ipvs(net); |
1671 | 1924 | ||
1672 | __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key); | 1925 | __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key); |
1673 | INIT_LIST_HEAD(&ipvs->sync_queue); | ||
1674 | spin_lock_init(&ipvs->sync_lock); | 1926 | spin_lock_init(&ipvs->sync_lock); |
1675 | spin_lock_init(&ipvs->sync_buff_lock); | 1927 | spin_lock_init(&ipvs->sync_buff_lock); |
1676 | |||
1677 | ipvs->sync_mcast_addr.sin_family = AF_INET; | ||
1678 | ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT); | ||
1679 | ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP); | ||
1680 | return 0; | 1928 | return 0; |
1681 | } | 1929 | } |
1682 | 1930 | ||
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index fd0d4e09876a..231be7dd547a 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c | |||
@@ -84,7 +84,7 @@ static int ip_vs_wrr_init_svc(struct ip_vs_service *svc) | |||
84 | /* | 84 | /* |
85 | * Allocate the mark variable for WRR scheduling | 85 | * Allocate the mark variable for WRR scheduling |
86 | */ | 86 | */ |
87 | mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_ATOMIC); | 87 | mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_KERNEL); |
88 | if (mark == NULL) | 88 | if (mark == NULL) |
89 | return -ENOMEM; | 89 | return -ENOMEM; |
90 | 90 | ||
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index cf0747c5741f..32c59093146e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c | |||
@@ -1336,7 +1336,6 @@ static void nf_conntrack_cleanup_init_net(void) | |||
1336 | while (untrack_refs() > 0) | 1336 | while (untrack_refs() > 0) |
1337 | schedule(); | 1337 | schedule(); |
1338 | 1338 | ||
1339 | nf_conntrack_helper_fini(); | ||
1340 | nf_conntrack_proto_fini(); | 1339 | nf_conntrack_proto_fini(); |
1341 | #ifdef CONFIG_NF_CONNTRACK_ZONES | 1340 | #ifdef CONFIG_NF_CONNTRACK_ZONES |
1342 | nf_ct_extend_unregister(&nf_ct_zone_extend); | 1341 | nf_ct_extend_unregister(&nf_ct_zone_extend); |
@@ -1354,6 +1353,7 @@ static void nf_conntrack_cleanup_net(struct net *net) | |||
1354 | } | 1353 | } |
1355 | 1354 | ||
1356 | nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); | 1355 | nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); |
1356 | nf_conntrack_helper_fini(net); | ||
1357 | nf_conntrack_timeout_fini(net); | 1357 | nf_conntrack_timeout_fini(net); |
1358 | nf_conntrack_ecache_fini(net); | 1358 | nf_conntrack_ecache_fini(net); |
1359 | nf_conntrack_tstamp_fini(net); | 1359 | nf_conntrack_tstamp_fini(net); |
@@ -1504,10 +1504,6 @@ static int nf_conntrack_init_init_net(void) | |||
1504 | if (ret < 0) | 1504 | if (ret < 0) |
1505 | goto err_proto; | 1505 | goto err_proto; |
1506 | 1506 | ||
1507 | ret = nf_conntrack_helper_init(); | ||
1508 | if (ret < 0) | ||
1509 | goto err_helper; | ||
1510 | |||
1511 | #ifdef CONFIG_NF_CONNTRACK_ZONES | 1507 | #ifdef CONFIG_NF_CONNTRACK_ZONES |
1512 | ret = nf_ct_extend_register(&nf_ct_zone_extend); | 1508 | ret = nf_ct_extend_register(&nf_ct_zone_extend); |
1513 | if (ret < 0) | 1509 | if (ret < 0) |
@@ -1525,10 +1521,8 @@ static int nf_conntrack_init_init_net(void) | |||
1525 | 1521 | ||
1526 | #ifdef CONFIG_NF_CONNTRACK_ZONES | 1522 | #ifdef CONFIG_NF_CONNTRACK_ZONES |
1527 | err_extend: | 1523 | err_extend: |
1528 | nf_conntrack_helper_fini(); | ||
1529 | #endif | ||
1530 | err_helper: | ||
1531 | nf_conntrack_proto_fini(); | 1524 | nf_conntrack_proto_fini(); |
1525 | #endif | ||
1532 | err_proto: | 1526 | err_proto: |
1533 | return ret; | 1527 | return ret; |
1534 | } | 1528 | } |
@@ -1589,9 +1583,14 @@ static int nf_conntrack_init_net(struct net *net) | |||
1589 | ret = nf_conntrack_timeout_init(net); | 1583 | ret = nf_conntrack_timeout_init(net); |
1590 | if (ret < 0) | 1584 | if (ret < 0) |
1591 | goto err_timeout; | 1585 | goto err_timeout; |
1586 | ret = nf_conntrack_helper_init(net); | ||
1587 | if (ret < 0) | ||
1588 | goto err_helper; | ||
1592 | 1589 | ||
1593 | return 0; | 1590 | return 0; |
1594 | 1591 | ||
1592 | err_helper: | ||
1593 | nf_conntrack_timeout_fini(net); | ||
1595 | err_timeout: | 1594 | err_timeout: |
1596 | nf_conntrack_ecache_fini(net); | 1595 | nf_conntrack_ecache_fini(net); |
1597 | err_ecache: | 1596 | err_ecache: |
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index b924f3a49a8e..e7be79e640de 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c | |||
@@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); | |||
84 | int nf_conntrack_register_notifier(struct net *net, | 84 | int nf_conntrack_register_notifier(struct net *net, |
85 | struct nf_ct_event_notifier *new) | 85 | struct nf_ct_event_notifier *new) |
86 | { | 86 | { |
87 | int ret = 0; | 87 | int ret; |
88 | struct nf_ct_event_notifier *notify; | 88 | struct nf_ct_event_notifier *notify; |
89 | 89 | ||
90 | mutex_lock(&nf_ct_ecache_mutex); | 90 | mutex_lock(&nf_ct_ecache_mutex); |
@@ -95,8 +95,7 @@ int nf_conntrack_register_notifier(struct net *net, | |||
95 | goto out_unlock; | 95 | goto out_unlock; |
96 | } | 96 | } |
97 | rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new); | 97 | rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new); |
98 | mutex_unlock(&nf_ct_ecache_mutex); | 98 | ret = 0; |
99 | return ret; | ||
100 | 99 | ||
101 | out_unlock: | 100 | out_unlock: |
102 | mutex_unlock(&nf_ct_ecache_mutex); | 101 | mutex_unlock(&nf_ct_ecache_mutex); |
@@ -121,7 +120,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); | |||
121 | int nf_ct_expect_register_notifier(struct net *net, | 120 | int nf_ct_expect_register_notifier(struct net *net, |
122 | struct nf_exp_event_notifier *new) | 121 | struct nf_exp_event_notifier *new) |
123 | { | 122 | { |
124 | int ret = 0; | 123 | int ret; |
125 | struct nf_exp_event_notifier *notify; | 124 | struct nf_exp_event_notifier *notify; |
126 | 125 | ||
127 | mutex_lock(&nf_ct_ecache_mutex); | 126 | mutex_lock(&nf_ct_ecache_mutex); |
@@ -132,8 +131,7 @@ int nf_ct_expect_register_notifier(struct net *net, | |||
132 | goto out_unlock; | 131 | goto out_unlock; |
133 | } | 132 | } |
134 | rcu_assign_pointer(net->ct.nf_expect_event_cb, new); | 133 | rcu_assign_pointer(net->ct.nf_expect_event_cb, new); |
135 | mutex_unlock(&nf_ct_ecache_mutex); | 134 | ret = 0; |
136 | return ret; | ||
137 | 135 | ||
138 | out_unlock: | 136 | out_unlock: |
139 | mutex_unlock(&nf_ct_ecache_mutex); | 137 | mutex_unlock(&nf_ct_ecache_mutex); |
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 436b7cb79ba4..4fa2ff961f5a 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c | |||
@@ -34,6 +34,67 @@ static struct hlist_head *nf_ct_helper_hash __read_mostly; | |||
34 | static unsigned int nf_ct_helper_hsize __read_mostly; | 34 | static unsigned int nf_ct_helper_hsize __read_mostly; |
35 | static unsigned int nf_ct_helper_count __read_mostly; | 35 | static unsigned int nf_ct_helper_count __read_mostly; |
36 | 36 | ||
37 | static bool nf_ct_auto_assign_helper __read_mostly = true; | ||
38 | module_param_named(nf_conntrack_helper, nf_ct_auto_assign_helper, bool, 0644); | ||
39 | MODULE_PARM_DESC(nf_conntrack_helper, | ||
40 | "Enable automatic conntrack helper assignment (default 1)"); | ||
41 | |||
42 | #ifdef CONFIG_SYSCTL | ||
43 | static struct ctl_table helper_sysctl_table[] = { | ||
44 | { | ||
45 | .procname = "nf_conntrack_helper", | ||
46 | .data = &init_net.ct.sysctl_auto_assign_helper, | ||
47 | .maxlen = sizeof(unsigned int), | ||
48 | .mode = 0644, | ||
49 | .proc_handler = proc_dointvec, | ||
50 | }, | ||
51 | {} | ||
52 | }; | ||
53 | |||
54 | static int nf_conntrack_helper_init_sysctl(struct net *net) | ||
55 | { | ||
56 | struct ctl_table *table; | ||
57 | |||
58 | table = kmemdup(helper_sysctl_table, sizeof(helper_sysctl_table), | ||
59 | GFP_KERNEL); | ||
60 | if (!table) | ||
61 | goto out; | ||
62 | |||
63 | table[0].data = &net->ct.sysctl_auto_assign_helper; | ||
64 | |||
65 | net->ct.helper_sysctl_header = | ||
66 | register_net_sysctl(net, "net/netfilter", table); | ||
67 | |||
68 | if (!net->ct.helper_sysctl_header) { | ||
69 | pr_err("nf_conntrack_helper: can't register to sysctl.\n"); | ||
70 | goto out_register; | ||
71 | } | ||
72 | return 0; | ||
73 | |||
74 | out_register: | ||
75 | kfree(table); | ||
76 | out: | ||
77 | return -ENOMEM; | ||
78 | } | ||
79 | |||
80 | static void nf_conntrack_helper_fini_sysctl(struct net *net) | ||
81 | { | ||
82 | struct ctl_table *table; | ||
83 | |||
84 | table = net->ct.helper_sysctl_header->ctl_table_arg; | ||
85 | unregister_net_sysctl_table(net->ct.helper_sysctl_header); | ||
86 | kfree(table); | ||
87 | } | ||
88 | #else | ||
89 | static int nf_conntrack_helper_init_sysctl(struct net *net) | ||
90 | { | ||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | static void nf_conntrack_helper_fini_sysctl(struct net *net) | ||
95 | { | ||
96 | } | ||
97 | #endif /* CONFIG_SYSCTL */ | ||
37 | 98 | ||
38 | /* Stupid hash, but collision free for the default registrations of the | 99 | /* Stupid hash, but collision free for the default registrations of the |
39 | * helpers currently in the kernel. */ | 100 | * helpers currently in the kernel. */ |
@@ -118,17 +179,38 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, | |||
118 | { | 179 | { |
119 | struct nf_conntrack_helper *helper = NULL; | 180 | struct nf_conntrack_helper *helper = NULL; |
120 | struct nf_conn_help *help; | 181 | struct nf_conn_help *help; |
182 | struct net *net = nf_ct_net(ct); | ||
121 | int ret = 0; | 183 | int ret = 0; |
122 | 184 | ||
185 | /* We already got a helper explicitly attached. The function | ||
186 | * nf_conntrack_alter_reply - in case NAT is in use - asks for looking | ||
187 | * the helper up again. Since now the user is in full control of | ||
188 | * making consistent helper configurations, skip this automatic | ||
189 | * re-lookup, otherwise we'll lose the helper. | ||
190 | */ | ||
191 | if (test_bit(IPS_HELPER_BIT, &ct->status)) | ||
192 | return 0; | ||
193 | |||
123 | if (tmpl != NULL) { | 194 | if (tmpl != NULL) { |
124 | help = nfct_help(tmpl); | 195 | help = nfct_help(tmpl); |
125 | if (help != NULL) | 196 | if (help != NULL) { |
126 | helper = help->helper; | 197 | helper = help->helper; |
198 | set_bit(IPS_HELPER_BIT, &ct->status); | ||
199 | } | ||
127 | } | 200 | } |
128 | 201 | ||
129 | help = nfct_help(ct); | 202 | help = nfct_help(ct); |
130 | if (helper == NULL) | 203 | if (net->ct.sysctl_auto_assign_helper && helper == NULL) { |
131 | helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); | 204 | helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); |
205 | if (unlikely(!net->ct.auto_assign_helper_warned && helper)) { | ||
206 | pr_info("nf_conntrack: automatic helper " | ||
207 | "assignment is deprecated and it will " | ||
208 | "be removed soon. Use the iptables CT target " | ||
209 | "to attach helpers instead.\n"); | ||
210 | net->ct.auto_assign_helper_warned = true; | ||
211 | } | ||
212 | } | ||
213 | |||
132 | if (helper == NULL) { | 214 | if (helper == NULL) { |
133 | if (help) | 215 | if (help) |
134 | RCU_INIT_POINTER(help->helper, NULL); | 216 | RCU_INIT_POINTER(help->helper, NULL); |
@@ -315,28 +397,44 @@ static struct nf_ct_ext_type helper_extend __read_mostly = { | |||
315 | .id = NF_CT_EXT_HELPER, | 397 | .id = NF_CT_EXT_HELPER, |
316 | }; | 398 | }; |
317 | 399 | ||
318 | int nf_conntrack_helper_init(void) | 400 | int nf_conntrack_helper_init(struct net *net) |
319 | { | 401 | { |
320 | int err; | 402 | int err; |
321 | 403 | ||
322 | nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ | 404 | net->ct.auto_assign_helper_warned = false; |
323 | nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0); | 405 | net->ct.sysctl_auto_assign_helper = nf_ct_auto_assign_helper; |
324 | if (!nf_ct_helper_hash) | 406 | |
325 | return -ENOMEM; | 407 | if (net_eq(net, &init_net)) { |
408 | nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ | ||
409 | nf_ct_helper_hash = | ||
410 | nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0); | ||
411 | if (!nf_ct_helper_hash) | ||
412 | return -ENOMEM; | ||
326 | 413 | ||
327 | err = nf_ct_extend_register(&helper_extend); | 414 | err = nf_ct_extend_register(&helper_extend); |
415 | if (err < 0) | ||
416 | goto err1; | ||
417 | } | ||
418 | |||
419 | err = nf_conntrack_helper_init_sysctl(net); | ||
328 | if (err < 0) | 420 | if (err < 0) |
329 | goto err1; | 421 | goto out_sysctl; |
330 | 422 | ||
331 | return 0; | 423 | return 0; |
332 | 424 | ||
425 | out_sysctl: | ||
426 | if (net_eq(net, &init_net)) | ||
427 | nf_ct_extend_unregister(&helper_extend); | ||
333 | err1: | 428 | err1: |
334 | nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); | 429 | nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); |
335 | return err; | 430 | return err; |
336 | } | 431 | } |
337 | 432 | ||
338 | void nf_conntrack_helper_fini(void) | 433 | void nf_conntrack_helper_fini(struct net *net) |
339 | { | 434 | { |
340 | nf_ct_extend_unregister(&helper_extend); | 435 | nf_conntrack_helper_fini_sysctl(net); |
341 | nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); | 436 | if (net_eq(net, &init_net)) { |
437 | nf_ct_extend_unregister(&helper_extend); | ||
438 | nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); | ||
439 | } | ||
342 | } | 440 | } |
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 462ec2dbe561..6f4b00a8fc73 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c | |||
@@ -2080,7 +2080,15 @@ static int | |||
2080 | ctnetlink_change_expect(struct nf_conntrack_expect *x, | 2080 | ctnetlink_change_expect(struct nf_conntrack_expect *x, |
2081 | const struct nlattr * const cda[]) | 2081 | const struct nlattr * const cda[]) |
2082 | { | 2082 | { |
2083 | return -EOPNOTSUPP; | 2083 | if (cda[CTA_EXPECT_TIMEOUT]) { |
2084 | if (!del_timer(&x->timeout)) | ||
2085 | return -ETIME; | ||
2086 | |||
2087 | x->timeout.expires = jiffies + | ||
2088 | ntohl(nla_get_be32(cda[CTA_EXPECT_TIMEOUT])) * HZ; | ||
2089 | add_timer(&x->timeout); | ||
2090 | } | ||
2091 | return 0; | ||
2084 | } | 2092 | } |
2085 | 2093 | ||
2086 | static const struct nla_policy exp_nat_nla_policy[CTA_EXPECT_NAT_MAX+1] = { | 2094 | static const struct nla_policy exp_nat_nla_policy[CTA_EXPECT_NAT_MAX+1] = { |
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index 0920ea3bf599..d309e7f472d8 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c | |||
@@ -14,7 +14,6 @@ | |||
14 | #include <linux/netlink.h> | 14 | #include <linux/netlink.h> |
15 | #include <linux/rtnetlink.h> | 15 | #include <linux/rtnetlink.h> |
16 | #include <linux/if.h> | 16 | #include <linux/if.h> |
17 | #include <linux/netfilter_ipv4/ip_queue.h> | ||
18 | #include <linux/inet_diag.h> | 17 | #include <linux/inet_diag.h> |
19 | #include <linux/xfrm.h> | 18 | #include <linux/xfrm.h> |
20 | #include <linux/audit.h> | 19 | #include <linux/audit.h> |
@@ -70,12 +69,6 @@ static struct nlmsg_perm nlmsg_route_perms[] = | |||
70 | { RTM_SETDCB, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, | 69 | { RTM_SETDCB, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, |
71 | }; | 70 | }; |
72 | 71 | ||
73 | static struct nlmsg_perm nlmsg_firewall_perms[] = | ||
74 | { | ||
75 | { IPQM_MODE, NETLINK_FIREWALL_SOCKET__NLMSG_WRITE }, | ||
76 | { IPQM_VERDICT, NETLINK_FIREWALL_SOCKET__NLMSG_WRITE }, | ||
77 | }; | ||
78 | |||
79 | static struct nlmsg_perm nlmsg_tcpdiag_perms[] = | 72 | static struct nlmsg_perm nlmsg_tcpdiag_perms[] = |
80 | { | 73 | { |
81 | { TCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, | 74 | { TCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, |
@@ -145,12 +138,6 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm) | |||
145 | sizeof(nlmsg_route_perms)); | 138 | sizeof(nlmsg_route_perms)); |
146 | break; | 139 | break; |
147 | 140 | ||
148 | case SECCLASS_NETLINK_FIREWALL_SOCKET: | ||
149 | case SECCLASS_NETLINK_IP6FW_SOCKET: | ||
150 | err = nlmsg_perm(nlmsg_type, perm, nlmsg_firewall_perms, | ||
151 | sizeof(nlmsg_firewall_perms)); | ||
152 | break; | ||
153 | |||
154 | case SECCLASS_NETLINK_TCPDIAG_SOCKET: | 141 | case SECCLASS_NETLINK_TCPDIAG_SOCKET: |
155 | err = nlmsg_perm(nlmsg_type, perm, nlmsg_tcpdiag_perms, | 142 | err = nlmsg_perm(nlmsg_type, perm, nlmsg_tcpdiag_perms, |
156 | sizeof(nlmsg_tcpdiag_perms)); | 143 | sizeof(nlmsg_tcpdiag_perms)); |