aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-07-25 03:18:10 -0400
committerDavid S. Miller <davem@davemloft.net>2015-07-25 03:18:10 -0400
commit485164381c1d663d3be1c9d2c600be0fb80a68ad (patch)
treeed96f528d8964c04dc398c46a0310a964fdc09d2
parentcc9f4daa638e660f7a910b8094122561470ac331 (diff)
parent4b31814d20cbe5cd4ccf18089751e77a04afe4f2 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
Pablo Neira Ayuso says: ==================== Netfilter/IPVS fixes for net The following patchset contains ten Netfilter/IPVS fixes, they are: 1) Address refcount leak when creating an expectation from the ctnetlink interface. 2) Fix bug splat in the IDLETIMER target related to sysfs, from Dmitry Torokhov. 3) Resolve panic for unreachable route in IPVS with locally generated traffic in the output path, from Alex Gartrell. 4) Fix wrong source address in rare cases for tunneled traffic in IPVS, from Julian Anastasov. 5) Fix crash if scheduler is changed via ipvsadm -E, again from Julian. 6) Make sure skb->sk is unset for forwarded traffic through IPVS, again from Alex Gartrell. 7) Fix crash with IPVS sync protocol v0 and FTP, from Julian. 8) Reset sender cpu for forwarded traffic in IPVS, also from Julian. 9) Allocate template conntracks through kmalloc() to resolve netns dependency problems with the conntrack kmem_cache. 10) Fix zones with expectations that clash using the same tuple, from Joe Stringer. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/netfilter/nf_conntrack.h2
-rw-r--r--include/net/netns/conntrack.h1
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c16
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c78
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c41
-rw-r--r--net/netfilter/nf_conntrack_core.c67
-rw-r--r--net/netfilter/nf_conntrack_expect.c3
-rw-r--r--net/netfilter/nf_conntrack_netlink.c5
-rw-r--r--net/netfilter/nf_synproxy_core.c7
-rw-r--r--net/netfilter/xt_CT.c8
-rw-r--r--net/netfilter/xt_IDLETIMER.c1
13 files changed, 164 insertions, 79 deletions
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 095433b8a8b0..37cd3911d5c5 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -291,7 +291,7 @@ extern unsigned int nf_conntrack_max;
291extern unsigned int nf_conntrack_hash_rnd; 291extern unsigned int nf_conntrack_hash_rnd;
292void init_nf_conntrack_hash_rnd(void); 292void init_nf_conntrack_hash_rnd(void);
293 293
294void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl); 294struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags);
295 295
296#define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) 296#define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count)
297#define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) 297#define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 29d6a94db54d..723b61c82b3f 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -68,7 +68,6 @@ struct ct_pcpu {
68 spinlock_t lock; 68 spinlock_t lock;
69 struct hlist_nulls_head unconfirmed; 69 struct hlist_nulls_head unconfirmed;
70 struct hlist_nulls_head dying; 70 struct hlist_nulls_head dying;
71 struct hlist_nulls_head tmpl;
72}; 71};
73 72
74struct netns_ct { 73struct netns_ct {
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 5d2b806a862e..38fbc194b9cb 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -319,7 +319,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
319 * return *ignored=0 i.e. ICMP and NF_DROP 319 * return *ignored=0 i.e. ICMP and NF_DROP
320 */ 320 */
321 sched = rcu_dereference(svc->scheduler); 321 sched = rcu_dereference(svc->scheduler);
322 dest = sched->schedule(svc, skb, iph); 322 if (sched) {
323 /* read svc->sched_data after svc->scheduler */
324 smp_rmb();
325 dest = sched->schedule(svc, skb, iph);
326 } else {
327 dest = NULL;
328 }
323 if (!dest) { 329 if (!dest) {
324 IP_VS_DBG(1, "p-schedule: no dest found.\n"); 330 IP_VS_DBG(1, "p-schedule: no dest found.\n");
325 kfree(param.pe_data); 331 kfree(param.pe_data);
@@ -467,7 +473,13 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
467 } 473 }
468 474
469 sched = rcu_dereference(svc->scheduler); 475 sched = rcu_dereference(svc->scheduler);
470 dest = sched->schedule(svc, skb, iph); 476 if (sched) {
477 /* read svc->sched_data after svc->scheduler */
478 smp_rmb();
479 dest = sched->schedule(svc, skb, iph);
480 } else {
481 dest = NULL;
482 }
471 if (dest == NULL) { 483 if (dest == NULL) {
472 IP_VS_DBG(1, "Schedule: no dest found.\n"); 484 IP_VS_DBG(1, "Schedule: no dest found.\n");
473 return NULL; 485 return NULL;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 285eae3a1454..24c554201a76 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -842,15 +842,16 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
842 __ip_vs_dst_cache_reset(dest); 842 __ip_vs_dst_cache_reset(dest);
843 spin_unlock_bh(&dest->dst_lock); 843 spin_unlock_bh(&dest->dst_lock);
844 844
845 sched = rcu_dereference_protected(svc->scheduler, 1);
846 if (add) { 845 if (add) {
847 ip_vs_start_estimator(svc->net, &dest->stats); 846 ip_vs_start_estimator(svc->net, &dest->stats);
848 list_add_rcu(&dest->n_list, &svc->destinations); 847 list_add_rcu(&dest->n_list, &svc->destinations);
849 svc->num_dests++; 848 svc->num_dests++;
850 if (sched->add_dest) 849 sched = rcu_dereference_protected(svc->scheduler, 1);
850 if (sched && sched->add_dest)
851 sched->add_dest(svc, dest); 851 sched->add_dest(svc, dest);
852 } else { 852 } else {
853 if (sched->upd_dest) 853 sched = rcu_dereference_protected(svc->scheduler, 1);
854 if (sched && sched->upd_dest)
854 sched->upd_dest(svc, dest); 855 sched->upd_dest(svc, dest);
855 } 856 }
856} 857}
@@ -1084,7 +1085,7 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1084 struct ip_vs_scheduler *sched; 1085 struct ip_vs_scheduler *sched;
1085 1086
1086 sched = rcu_dereference_protected(svc->scheduler, 1); 1087 sched = rcu_dereference_protected(svc->scheduler, 1);
1087 if (sched->del_dest) 1088 if (sched && sched->del_dest)
1088 sched->del_dest(svc, dest); 1089 sched->del_dest(svc, dest);
1089 } 1090 }
1090} 1091}
@@ -1175,11 +1176,14 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1175 ip_vs_use_count_inc(); 1176 ip_vs_use_count_inc();
1176 1177
1177 /* Lookup the scheduler by 'u->sched_name' */ 1178 /* Lookup the scheduler by 'u->sched_name' */
1178 sched = ip_vs_scheduler_get(u->sched_name); 1179 if (strcmp(u->sched_name, "none")) {
1179 if (sched == NULL) { 1180 sched = ip_vs_scheduler_get(u->sched_name);
1180 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); 1181 if (!sched) {
1181 ret = -ENOENT; 1182 pr_info("Scheduler module ip_vs_%s not found\n",
1182 goto out_err; 1183 u->sched_name);
1184 ret = -ENOENT;
1185 goto out_err;
1186 }
1183 } 1187 }
1184 1188
1185 if (u->pe_name && *u->pe_name) { 1189 if (u->pe_name && *u->pe_name) {
@@ -1240,10 +1244,12 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1240 spin_lock_init(&svc->stats.lock); 1244 spin_lock_init(&svc->stats.lock);
1241 1245
1242 /* Bind the scheduler */ 1246 /* Bind the scheduler */
1243 ret = ip_vs_bind_scheduler(svc, sched); 1247 if (sched) {
1244 if (ret) 1248 ret = ip_vs_bind_scheduler(svc, sched);
1245 goto out_err; 1249 if (ret)
1246 sched = NULL; 1250 goto out_err;
1251 sched = NULL;
1252 }
1247 1253
1248 /* Bind the ct retriever */ 1254 /* Bind the ct retriever */
1249 RCU_INIT_POINTER(svc->pe, pe); 1255 RCU_INIT_POINTER(svc->pe, pe);
@@ -1291,17 +1297,20 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1291static int 1297static int
1292ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) 1298ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1293{ 1299{
1294 struct ip_vs_scheduler *sched, *old_sched; 1300 struct ip_vs_scheduler *sched = NULL, *old_sched;
1295 struct ip_vs_pe *pe = NULL, *old_pe = NULL; 1301 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1296 int ret = 0; 1302 int ret = 0;
1297 1303
1298 /* 1304 /*
1299 * Lookup the scheduler, by 'u->sched_name' 1305 * Lookup the scheduler, by 'u->sched_name'
1300 */ 1306 */
1301 sched = ip_vs_scheduler_get(u->sched_name); 1307 if (strcmp(u->sched_name, "none")) {
1302 if (sched == NULL) { 1308 sched = ip_vs_scheduler_get(u->sched_name);
1303 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); 1309 if (!sched) {
1304 return -ENOENT; 1310 pr_info("Scheduler module ip_vs_%s not found\n",
1311 u->sched_name);
1312 return -ENOENT;
1313 }
1305 } 1314 }
1306 old_sched = sched; 1315 old_sched = sched;
1307 1316
@@ -1329,14 +1338,20 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1329 1338
1330 old_sched = rcu_dereference_protected(svc->scheduler, 1); 1339 old_sched = rcu_dereference_protected(svc->scheduler, 1);
1331 if (sched != old_sched) { 1340 if (sched != old_sched) {
1341 if (old_sched) {
1342 ip_vs_unbind_scheduler(svc, old_sched);
1343 RCU_INIT_POINTER(svc->scheduler, NULL);
1344 /* Wait all svc->sched_data users */
1345 synchronize_rcu();
1346 }
1332 /* Bind the new scheduler */ 1347 /* Bind the new scheduler */
1333 ret = ip_vs_bind_scheduler(svc, sched); 1348 if (sched) {
1334 if (ret) { 1349 ret = ip_vs_bind_scheduler(svc, sched);
1335 old_sched = sched; 1350 if (ret) {
1336 goto out; 1351 ip_vs_scheduler_put(sched);
1352 goto out;
1353 }
1337 } 1354 }
1338 /* Unbind the old scheduler on success */
1339 ip_vs_unbind_scheduler(svc, old_sched);
1340 } 1355 }
1341 1356
1342 /* 1357 /*
@@ -1982,6 +1997,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1982 const struct ip_vs_iter *iter = seq->private; 1997 const struct ip_vs_iter *iter = seq->private;
1983 const struct ip_vs_dest *dest; 1998 const struct ip_vs_dest *dest;
1984 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); 1999 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
2000 char *sched_name = sched ? sched->name : "none";
1985 2001
1986 if (iter->table == ip_vs_svc_table) { 2002 if (iter->table == ip_vs_svc_table) {
1987#ifdef CONFIG_IP_VS_IPV6 2003#ifdef CONFIG_IP_VS_IPV6
@@ -1990,18 +2006,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1990 ip_vs_proto_name(svc->protocol), 2006 ip_vs_proto_name(svc->protocol),
1991 &svc->addr.in6, 2007 &svc->addr.in6,
1992 ntohs(svc->port), 2008 ntohs(svc->port),
1993 sched->name); 2009 sched_name);
1994 else 2010 else
1995#endif 2011#endif
1996 seq_printf(seq, "%s %08X:%04X %s %s ", 2012 seq_printf(seq, "%s %08X:%04X %s %s ",
1997 ip_vs_proto_name(svc->protocol), 2013 ip_vs_proto_name(svc->protocol),
1998 ntohl(svc->addr.ip), 2014 ntohl(svc->addr.ip),
1999 ntohs(svc->port), 2015 ntohs(svc->port),
2000 sched->name, 2016 sched_name,
2001 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2017 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2002 } else { 2018 } else {
2003 seq_printf(seq, "FWM %08X %s %s", 2019 seq_printf(seq, "FWM %08X %s %s",
2004 svc->fwmark, sched->name, 2020 svc->fwmark, sched_name,
2005 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2021 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2006 } 2022 }
2007 2023
@@ -2427,13 +2443,15 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2427{ 2443{
2428 struct ip_vs_scheduler *sched; 2444 struct ip_vs_scheduler *sched;
2429 struct ip_vs_kstats kstats; 2445 struct ip_vs_kstats kstats;
2446 char *sched_name;
2430 2447
2431 sched = rcu_dereference_protected(src->scheduler, 1); 2448 sched = rcu_dereference_protected(src->scheduler, 1);
2449 sched_name = sched ? sched->name : "none";
2432 dst->protocol = src->protocol; 2450 dst->protocol = src->protocol;
2433 dst->addr = src->addr.ip; 2451 dst->addr = src->addr.ip;
2434 dst->port = src->port; 2452 dst->port = src->port;
2435 dst->fwmark = src->fwmark; 2453 dst->fwmark = src->fwmark;
2436 strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name)); 2454 strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name));
2437 dst->flags = src->flags; 2455 dst->flags = src->flags;
2438 dst->timeout = src->timeout / HZ; 2456 dst->timeout = src->timeout / HZ;
2439 dst->netmask = src->netmask; 2457 dst->netmask = src->netmask;
@@ -2892,6 +2910,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
2892 struct ip_vs_flags flags = { .flags = svc->flags, 2910 struct ip_vs_flags flags = { .flags = svc->flags,
2893 .mask = ~0 }; 2911 .mask = ~0 };
2894 struct ip_vs_kstats kstats; 2912 struct ip_vs_kstats kstats;
2913 char *sched_name;
2895 2914
2896 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); 2915 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2897 if (!nl_service) 2916 if (!nl_service)
@@ -2910,8 +2929,9 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
2910 } 2929 }
2911 2930
2912 sched = rcu_dereference_protected(svc->scheduler, 1); 2931 sched = rcu_dereference_protected(svc->scheduler, 1);
2932 sched_name = sched ? sched->name : "none";
2913 pe = rcu_dereference_protected(svc->pe, 1); 2933 pe = rcu_dereference_protected(svc->pe, 1);
2914 if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) || 2934 if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) ||
2915 (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) || 2935 (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) ||
2916 nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || 2936 nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
2917 nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || 2937 nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index 199760c71f39..7e8141647943 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -74,7 +74,7 @@ void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
74 74
75 if (sched->done_service) 75 if (sched->done_service)
76 sched->done_service(svc); 76 sched->done_service(svc);
77 /* svc->scheduler can not be set to NULL */ 77 /* svc->scheduler can be set to NULL only by caller */
78} 78}
79 79
80 80
@@ -147,21 +147,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
147 147
148void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg) 148void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
149{ 149{
150 struct ip_vs_scheduler *sched; 150 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
151 char *sched_name = sched ? sched->name : "none";
151 152
152 sched = rcu_dereference(svc->scheduler);
153 if (svc->fwmark) { 153 if (svc->fwmark) {
154 IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n", 154 IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n",
155 sched->name, svc->fwmark, svc->fwmark, msg); 155 sched_name, svc->fwmark, svc->fwmark, msg);
156#ifdef CONFIG_IP_VS_IPV6 156#ifdef CONFIG_IP_VS_IPV6
157 } else if (svc->af == AF_INET6) { 157 } else if (svc->af == AF_INET6) {
158 IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n", 158 IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n",
159 sched->name, ip_vs_proto_name(svc->protocol), 159 sched_name, ip_vs_proto_name(svc->protocol),
160 &svc->addr.in6, ntohs(svc->port), msg); 160 &svc->addr.in6, ntohs(svc->port), msg);
161#endif 161#endif
162 } else { 162 } else {
163 IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n", 163 IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n",
164 sched->name, ip_vs_proto_name(svc->protocol), 164 sched_name, ip_vs_proto_name(svc->protocol),
165 &svc->addr.ip, ntohs(svc->port), msg); 165 &svc->addr.ip, ntohs(svc->port), msg);
166 } 166 }
167} 167}
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index b08ba9538d12..d99ad93eb855 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -612,7 +612,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
612 pkts = atomic_add_return(1, &cp->in_pkts); 612 pkts = atomic_add_return(1, &cp->in_pkts);
613 else 613 else
614 pkts = sysctl_sync_threshold(ipvs); 614 pkts = sysctl_sync_threshold(ipvs);
615 ip_vs_sync_conn(net, cp->control, pkts); 615 ip_vs_sync_conn(net, cp, pkts);
616 } 616 }
617} 617}
618 618
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index bf66a8657a5f..258a0b0e82a2 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -130,7 +130,6 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr,
130 130
131 memset(&fl4, 0, sizeof(fl4)); 131 memset(&fl4, 0, sizeof(fl4));
132 fl4.daddr = daddr; 132 fl4.daddr = daddr;
133 fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
134 fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? 133 fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?
135 FLOWI_FLAG_KNOWN_NH : 0; 134 FLOWI_FLAG_KNOWN_NH : 0;
136 135
@@ -505,6 +504,13 @@ err_put:
505 return -1; 504 return -1;
506 505
507err_unreach: 506err_unreach:
507 /* The ip6_link_failure function requires the dev field to be set
508 * in order to get the net (further for the sake of fwmark
509 * reflection).
510 */
511 if (!skb->dev)
512 skb->dev = skb_dst(skb)->dev;
513
508 dst_link_failure(skb); 514 dst_link_failure(skb);
509 return -1; 515 return -1;
510} 516}
@@ -523,10 +529,27 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
523 if (ret == NF_ACCEPT) { 529 if (ret == NF_ACCEPT) {
524 nf_reset(skb); 530 nf_reset(skb);
525 skb_forward_csum(skb); 531 skb_forward_csum(skb);
532 if (!skb->sk)
533 skb_sender_cpu_clear(skb);
526 } 534 }
527 return ret; 535 return ret;
528} 536}
529 537
538/* In the event of a remote destination, it's possible that we would have
539 * matches against an old socket (particularly a TIME-WAIT socket). This
540 * causes havoc down the line (ip_local_out et. al. expect regular sockets
541 * and invalid memory accesses will happen) so simply drop the association
542 * in this case.
543*/
544static inline void ip_vs_drop_early_demux_sk(struct sk_buff *skb)
545{
546 /* If dev is set, the packet came from the LOCAL_IN callback and
547 * not from a local TCP socket.
548 */
549 if (skb->dev)
550 skb_orphan(skb);
551}
552
530/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ 553/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
531static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, 554static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
532 struct ip_vs_conn *cp, int local) 555 struct ip_vs_conn *cp, int local)
@@ -538,12 +561,23 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
538 ip_vs_notrack(skb); 561 ip_vs_notrack(skb);
539 else 562 else
540 ip_vs_update_conntrack(skb, cp, 1); 563 ip_vs_update_conntrack(skb, cp, 1);
564
565 /* Remove the early_demux association unless it's bound for the
566 * exact same port and address on this host after translation.
567 */
568 if (!local || cp->vport != cp->dport ||
569 !ip_vs_addr_equal(cp->af, &cp->vaddr, &cp->daddr))
570 ip_vs_drop_early_demux_sk(skb);
571
541 if (!local) { 572 if (!local) {
542 skb_forward_csum(skb); 573 skb_forward_csum(skb);
574 if (!skb->sk)
575 skb_sender_cpu_clear(skb);
543 NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, 576 NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
544 NULL, skb_dst(skb)->dev, dst_output_sk); 577 NULL, skb_dst(skb)->dev, dst_output_sk);
545 } else 578 } else
546 ret = NF_ACCEPT; 579 ret = NF_ACCEPT;
580
547 return ret; 581 return ret;
548} 582}
549 583
@@ -557,7 +591,10 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
557 if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) 591 if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
558 ip_vs_notrack(skb); 592 ip_vs_notrack(skb);
559 if (!local) { 593 if (!local) {
594 ip_vs_drop_early_demux_sk(skb);
560 skb_forward_csum(skb); 595 skb_forward_csum(skb);
596 if (!skb->sk)
597 skb_sender_cpu_clear(skb);
561 NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, 598 NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
562 NULL, skb_dst(skb)->dev, dst_output_sk); 599 NULL, skb_dst(skb)->dev, dst_output_sk);
563 } else 600 } else
@@ -845,6 +882,8 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
845 struct ipv6hdr *old_ipv6h = NULL; 882 struct ipv6hdr *old_ipv6h = NULL;
846#endif 883#endif
847 884
885 ip_vs_drop_early_demux_sk(skb);
886
848 if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) { 887 if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
849 new_skb = skb_realloc_headroom(skb, max_headroom); 888 new_skb = skb_realloc_headroom(skb, max_headroom);
850 if (!new_skb) 889 if (!new_skb)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 13fad8668f83..651039ad1681 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -287,6 +287,46 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
287 spin_unlock(&pcpu->lock); 287 spin_unlock(&pcpu->lock);
288} 288}
289 289
290/* Released via destroy_conntrack() */
291struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags)
292{
293 struct nf_conn *tmpl;
294
295 tmpl = kzalloc(sizeof(struct nf_conn), GFP_KERNEL);
296 if (tmpl == NULL)
297 return NULL;
298
299 tmpl->status = IPS_TEMPLATE;
300 write_pnet(&tmpl->ct_net, net);
301
302#ifdef CONFIG_NF_CONNTRACK_ZONES
303 if (zone) {
304 struct nf_conntrack_zone *nf_ct_zone;
305
306 nf_ct_zone = nf_ct_ext_add(tmpl, NF_CT_EXT_ZONE, GFP_ATOMIC);
307 if (!nf_ct_zone)
308 goto out_free;
309 nf_ct_zone->id = zone;
310 }
311#endif
312 atomic_set(&tmpl->ct_general.use, 0);
313
314 return tmpl;
315#ifdef CONFIG_NF_CONNTRACK_ZONES
316out_free:
317 kfree(tmpl);
318 return NULL;
319#endif
320}
321EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
322
323static void nf_ct_tmpl_free(struct nf_conn *tmpl)
324{
325 nf_ct_ext_destroy(tmpl);
326 nf_ct_ext_free(tmpl);
327 kfree(tmpl);
328}
329
290static void 330static void
291destroy_conntrack(struct nf_conntrack *nfct) 331destroy_conntrack(struct nf_conntrack *nfct)
292{ 332{
@@ -298,6 +338,10 @@ destroy_conntrack(struct nf_conntrack *nfct)
298 NF_CT_ASSERT(atomic_read(&nfct->use) == 0); 338 NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
299 NF_CT_ASSERT(!timer_pending(&ct->timeout)); 339 NF_CT_ASSERT(!timer_pending(&ct->timeout));
300 340
341 if (unlikely(nf_ct_is_template(ct))) {
342 nf_ct_tmpl_free(ct);
343 return;
344 }
301 rcu_read_lock(); 345 rcu_read_lock();
302 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 346 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
303 if (l4proto && l4proto->destroy) 347 if (l4proto && l4proto->destroy)
@@ -540,28 +584,6 @@ out:
540} 584}
541EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); 585EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
542 586
543/* deletion from this larval template list happens via nf_ct_put() */
544void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl)
545{
546 struct ct_pcpu *pcpu;
547
548 __set_bit(IPS_TEMPLATE_BIT, &tmpl->status);
549 __set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
550 nf_conntrack_get(&tmpl->ct_general);
551
552 /* add this conntrack to the (per cpu) tmpl list */
553 local_bh_disable();
554 tmpl->cpu = smp_processor_id();
555 pcpu = per_cpu_ptr(nf_ct_net(tmpl)->ct.pcpu_lists, tmpl->cpu);
556
557 spin_lock(&pcpu->lock);
558 /* Overload tuple linked list to put us in template list. */
559 hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
560 &pcpu->tmpl);
561 spin_unlock_bh(&pcpu->lock);
562}
563EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert);
564
565/* Confirm a connection given skb; places it in hash table */ 587/* Confirm a connection given skb; places it in hash table */
566int 588int
567__nf_conntrack_confirm(struct sk_buff *skb) 589__nf_conntrack_confirm(struct sk_buff *skb)
@@ -1751,7 +1773,6 @@ int nf_conntrack_init_net(struct net *net)
1751 spin_lock_init(&pcpu->lock); 1773 spin_lock_init(&pcpu->lock);
1752 INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL); 1774 INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL);
1753 INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL); 1775 INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL);
1754 INIT_HLIST_NULLS_HEAD(&pcpu->tmpl, TEMPLATE_NULLS_VAL);
1755 } 1776 }
1756 1777
1757 net->ct.stat = alloc_percpu(struct ip_conntrack_stat); 1778 net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 7a17070c5dab..b45a4223cb05 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -219,7 +219,8 @@ static inline int expect_clash(const struct nf_conntrack_expect *a,
219 a->mask.src.u3.all[count] & b->mask.src.u3.all[count]; 219 a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
220 } 220 }
221 221
222 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask); 222 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) &&
223 nf_ct_zone(a->master) == nf_ct_zone(b->master);
223} 224}
224 225
225static inline int expect_matches(const struct nf_conntrack_expect *a, 226static inline int expect_matches(const struct nf_conntrack_expect *a,
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d1c23940a86a..6b8b0abbfab4 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2995,11 +2995,6 @@ ctnetlink_create_expect(struct net *net, u16 zone,
2995 } 2995 }
2996 2996
2997 err = nf_ct_expect_related_report(exp, portid, report); 2997 err = nf_ct_expect_related_report(exp, portid, report);
2998 if (err < 0)
2999 goto err_exp;
3000
3001 return 0;
3002err_exp:
3003 nf_ct_expect_put(exp); 2998 nf_ct_expect_put(exp);
3004err_ct: 2999err_ct:
3005 nf_ct_put(ct); 3000 nf_ct_put(ct);
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 789feeae6c44..71f1e9fdfa18 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -349,12 +349,10 @@ static void __net_exit synproxy_proc_exit(struct net *net)
349static int __net_init synproxy_net_init(struct net *net) 349static int __net_init synproxy_net_init(struct net *net)
350{ 350{
351 struct synproxy_net *snet = synproxy_pernet(net); 351 struct synproxy_net *snet = synproxy_pernet(net);
352 struct nf_conntrack_tuple t;
353 struct nf_conn *ct; 352 struct nf_conn *ct;
354 int err = -ENOMEM; 353 int err = -ENOMEM;
355 354
356 memset(&t, 0, sizeof(t)); 355 ct = nf_ct_tmpl_alloc(net, 0, GFP_KERNEL);
357 ct = nf_conntrack_alloc(net, 0, &t, &t, GFP_KERNEL);
358 if (IS_ERR(ct)) { 356 if (IS_ERR(ct)) {
359 err = PTR_ERR(ct); 357 err = PTR_ERR(ct);
360 goto err1; 358 goto err1;
@@ -365,7 +363,8 @@ static int __net_init synproxy_net_init(struct net *net)
365 if (!nfct_synproxy_ext_add(ct)) 363 if (!nfct_synproxy_ext_add(ct))
366 goto err2; 364 goto err2;
367 365
368 nf_conntrack_tmpl_insert(net, ct); 366 __set_bit(IPS_CONFIRMED_BIT, &ct->status);
367 nf_conntrack_get(&ct->ct_general);
369 snet->tmpl = ct; 368 snet->tmpl = ct;
370 369
371 snet->stats = alloc_percpu(struct synproxy_stats); 370 snet->stats = alloc_percpu(struct synproxy_stats);
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 75747aecdebe..c6630030c912 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -184,7 +184,6 @@ out:
184static int xt_ct_tg_check(const struct xt_tgchk_param *par, 184static int xt_ct_tg_check(const struct xt_tgchk_param *par,
185 struct xt_ct_target_info_v1 *info) 185 struct xt_ct_target_info_v1 *info)
186{ 186{
187 struct nf_conntrack_tuple t;
188 struct nf_conn *ct; 187 struct nf_conn *ct;
189 int ret = -EOPNOTSUPP; 188 int ret = -EOPNOTSUPP;
190 189
@@ -202,8 +201,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
202 if (ret < 0) 201 if (ret < 0)
203 goto err1; 202 goto err1;
204 203
205 memset(&t, 0, sizeof(t)); 204 ct = nf_ct_tmpl_alloc(par->net, info->zone, GFP_KERNEL);
206 ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL);
207 ret = PTR_ERR(ct); 205 ret = PTR_ERR(ct);
208 if (IS_ERR(ct)) 206 if (IS_ERR(ct))
209 goto err2; 207 goto err2;
@@ -227,8 +225,8 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
227 if (ret < 0) 225 if (ret < 0)
228 goto err3; 226 goto err3;
229 } 227 }
230 228 __set_bit(IPS_CONFIRMED_BIT, &ct->status);
231 nf_conntrack_tmpl_insert(par->net, ct); 229 nf_conntrack_get(&ct->ct_general);
232out: 230out:
233 info->ct = ct; 231 info->ct = ct;
234 return 0; 232 return 0;
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index f407ebc13481..29d2c31f406c 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -126,6 +126,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info)
126 goto out; 126 goto out;
127 } 127 }
128 128
129 sysfs_attr_init(&info->timer->attr.attr);
129 info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL); 130 info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL);
130 if (!info->timer->attr.attr.name) { 131 if (!info->timer->attr.attr.name) {
131 ret = -ENOMEM; 132 ret = -ENOMEM;