aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter/ipvs/ip_vs_core.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter/ipvs/ip_vs_core.c')
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c456
1 files changed, 296 insertions, 160 deletions
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index b4e51e9c5a0..07accf6b240 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -41,6 +41,7 @@
41#include <net/icmp.h> /* for icmp_send */ 41#include <net/icmp.h> /* for icmp_send */
42#include <net/route.h> 42#include <net/route.h>
43#include <net/ip6_checksum.h> 43#include <net/ip6_checksum.h>
44#include <net/netns/generic.h> /* net_generic() */
44 45
45#include <linux/netfilter.h> 46#include <linux/netfilter.h>
46#include <linux/netfilter_ipv4.h> 47#include <linux/netfilter_ipv4.h>
@@ -68,6 +69,12 @@ EXPORT_SYMBOL(ip_vs_conn_put);
68EXPORT_SYMBOL(ip_vs_get_debug_level); 69EXPORT_SYMBOL(ip_vs_get_debug_level);
69#endif 70#endif
70 71
72int ip_vs_net_id __read_mostly;
73#ifdef IP_VS_GENERIC_NETNS
74EXPORT_SYMBOL(ip_vs_net_id);
75#endif
76/* netns cnt used for uniqueness */
77static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
71 78
72/* ID used in ICMP lookups */ 79/* ID used in ICMP lookups */
73#define icmp_id(icmph) (((icmph)->un).echo.id) 80#define icmp_id(icmph) (((icmph)->un).echo.id)
@@ -108,21 +115,28 @@ static inline void
108ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) 115ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
109{ 116{
110 struct ip_vs_dest *dest = cp->dest; 117 struct ip_vs_dest *dest = cp->dest;
118 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
119
111 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 120 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
112 spin_lock(&dest->stats.lock); 121 struct ip_vs_cpu_stats *s;
113 dest->stats.ustats.inpkts++; 122
114 dest->stats.ustats.inbytes += skb->len; 123 s = this_cpu_ptr(dest->stats.cpustats);
115 spin_unlock(&dest->stats.lock); 124 s->ustats.inpkts++;
116 125 u64_stats_update_begin(&s->syncp);
117 spin_lock(&dest->svc->stats.lock); 126 s->ustats.inbytes += skb->len;
118 dest->svc->stats.ustats.inpkts++; 127 u64_stats_update_end(&s->syncp);
119 dest->svc->stats.ustats.inbytes += skb->len; 128
120 spin_unlock(&dest->svc->stats.lock); 129 s = this_cpu_ptr(dest->svc->stats.cpustats);
121 130 s->ustats.inpkts++;
122 spin_lock(&ip_vs_stats.lock); 131 u64_stats_update_begin(&s->syncp);
123 ip_vs_stats.ustats.inpkts++; 132 s->ustats.inbytes += skb->len;
124 ip_vs_stats.ustats.inbytes += skb->len; 133 u64_stats_update_end(&s->syncp);
125 spin_unlock(&ip_vs_stats.lock); 134
135 s = this_cpu_ptr(ipvs->tot_stats.cpustats);
136 s->ustats.inpkts++;
137 u64_stats_update_begin(&s->syncp);
138 s->ustats.inbytes += skb->len;
139 u64_stats_update_end(&s->syncp);
126 } 140 }
127} 141}
128 142
@@ -131,21 +145,28 @@ static inline void
131ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) 145ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
132{ 146{
133 struct ip_vs_dest *dest = cp->dest; 147 struct ip_vs_dest *dest = cp->dest;
148 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
149
134 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 150 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
135 spin_lock(&dest->stats.lock); 151 struct ip_vs_cpu_stats *s;
136 dest->stats.ustats.outpkts++; 152
137 dest->stats.ustats.outbytes += skb->len; 153 s = this_cpu_ptr(dest->stats.cpustats);
138 spin_unlock(&dest->stats.lock); 154 s->ustats.outpkts++;
139 155 u64_stats_update_begin(&s->syncp);
140 spin_lock(&dest->svc->stats.lock); 156 s->ustats.outbytes += skb->len;
141 dest->svc->stats.ustats.outpkts++; 157 u64_stats_update_end(&s->syncp);
142 dest->svc->stats.ustats.outbytes += skb->len; 158
143 spin_unlock(&dest->svc->stats.lock); 159 s = this_cpu_ptr(dest->svc->stats.cpustats);
144 160 s->ustats.outpkts++;
145 spin_lock(&ip_vs_stats.lock); 161 u64_stats_update_begin(&s->syncp);
146 ip_vs_stats.ustats.outpkts++; 162 s->ustats.outbytes += skb->len;
147 ip_vs_stats.ustats.outbytes += skb->len; 163 u64_stats_update_end(&s->syncp);
148 spin_unlock(&ip_vs_stats.lock); 164
165 s = this_cpu_ptr(ipvs->tot_stats.cpustats);
166 s->ustats.outpkts++;
167 u64_stats_update_begin(&s->syncp);
168 s->ustats.outbytes += skb->len;
169 u64_stats_update_end(&s->syncp);
149 } 170 }
150} 171}
151 172
@@ -153,41 +174,44 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
153static inline void 174static inline void
154ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) 175ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
155{ 176{
156 spin_lock(&cp->dest->stats.lock); 177 struct netns_ipvs *ipvs = net_ipvs(svc->net);
157 cp->dest->stats.ustats.conns++; 178 struct ip_vs_cpu_stats *s;
158 spin_unlock(&cp->dest->stats.lock); 179
180 s = this_cpu_ptr(cp->dest->stats.cpustats);
181 s->ustats.conns++;
159 182
160 spin_lock(&svc->stats.lock); 183 s = this_cpu_ptr(svc->stats.cpustats);
161 svc->stats.ustats.conns++; 184 s->ustats.conns++;
162 spin_unlock(&svc->stats.lock);
163 185
164 spin_lock(&ip_vs_stats.lock); 186 s = this_cpu_ptr(ipvs->tot_stats.cpustats);
165 ip_vs_stats.ustats.conns++; 187 s->ustats.conns++;
166 spin_unlock(&ip_vs_stats.lock);
167} 188}
168 189
169 190
170static inline int 191static inline int
171ip_vs_set_state(struct ip_vs_conn *cp, int direction, 192ip_vs_set_state(struct ip_vs_conn *cp, int direction,
172 const struct sk_buff *skb, 193 const struct sk_buff *skb,
173 struct ip_vs_protocol *pp) 194 struct ip_vs_proto_data *pd)
174{ 195{
175 if (unlikely(!pp->state_transition)) 196 if (unlikely(!pd->pp->state_transition))
176 return 0; 197 return 0;
177 return pp->state_transition(cp, direction, skb, pp); 198 return pd->pp->state_transition(cp, direction, skb, pd);
178} 199}
179 200
180static inline void 201static inline int
181ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, 202ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
182 struct sk_buff *skb, int protocol, 203 struct sk_buff *skb, int protocol,
183 const union nf_inet_addr *caddr, __be16 cport, 204 const union nf_inet_addr *caddr, __be16 cport,
184 const union nf_inet_addr *vaddr, __be16 vport, 205 const union nf_inet_addr *vaddr, __be16 vport,
185 struct ip_vs_conn_param *p) 206 struct ip_vs_conn_param *p)
186{ 207{
187 ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p); 208 ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
209 vport, p);
188 p->pe = svc->pe; 210 p->pe = svc->pe;
189 if (p->pe && p->pe->fill_param) 211 if (p->pe && p->pe->fill_param)
190 p->pe->fill_param(p, skb); 212 return p->pe->fill_param(p, skb);
213
214 return 0;
191} 215}
192 216
193/* 217/*
@@ -200,7 +224,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
200static struct ip_vs_conn * 224static struct ip_vs_conn *
201ip_vs_sched_persist(struct ip_vs_service *svc, 225ip_vs_sched_persist(struct ip_vs_service *svc,
202 struct sk_buff *skb, 226 struct sk_buff *skb,
203 __be16 ports[2]) 227 __be16 src_port, __be16 dst_port, int *ignored)
204{ 228{
205 struct ip_vs_conn *cp = NULL; 229 struct ip_vs_conn *cp = NULL;
206 struct ip_vs_iphdr iph; 230 struct ip_vs_iphdr iph;
@@ -224,8 +248,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
224 248
225 IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " 249 IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
226 "mnet %s\n", 250 "mnet %s\n",
227 IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]), 251 IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port),
228 IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]), 252 IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port),
229 IP_VS_DBG_ADDR(svc->af, &snet)); 253 IP_VS_DBG_ADDR(svc->af, &snet));
230 254
231 /* 255 /*
@@ -247,14 +271,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
247 const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; 271 const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
248 __be16 vport = 0; 272 __be16 vport = 0;
249 273
250 if (ports[1] == svc->port) { 274 if (dst_port == svc->port) {
251 /* non-FTP template: 275 /* non-FTP template:
252 * <protocol, caddr, 0, vaddr, vport, daddr, dport> 276 * <protocol, caddr, 0, vaddr, vport, daddr, dport>
253 * FTP template: 277 * FTP template:
254 * <protocol, caddr, 0, vaddr, 0, daddr, 0> 278 * <protocol, caddr, 0, vaddr, 0, daddr, 0>
255 */ 279 */
256 if (svc->port != FTPPORT) 280 if (svc->port != FTPPORT)
257 vport = ports[1]; 281 vport = dst_port;
258 } else { 282 } else {
259 /* Note: persistent fwmark-based services and 283 /* Note: persistent fwmark-based services and
260 * persistent port zero service are handled here. 284 * persistent port zero service are handled here.
@@ -268,24 +292,31 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
268 vaddr = &fwmark; 292 vaddr = &fwmark;
269 } 293 }
270 } 294 }
271 ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0, 295 /* return *ignored = -1 so NF_DROP can be used */
272 vaddr, vport, &param); 296 if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
297 vaddr, vport, &param) < 0) {
298 *ignored = -1;
299 return NULL;
300 }
273 } 301 }
274 302
275 /* Check if a template already exists */ 303 /* Check if a template already exists */
276 ct = ip_vs_ct_in_get(&param); 304 ct = ip_vs_ct_in_get(&param);
277 if (!ct || !ip_vs_check_template(ct)) { 305 if (!ct || !ip_vs_check_template(ct)) {
278 /* No template found or the dest of the connection 306 /*
307 * No template found or the dest of the connection
279 * template is not available. 308 * template is not available.
309 * return *ignored=0 i.e. ICMP and NF_DROP
280 */ 310 */
281 dest = svc->scheduler->schedule(svc, skb); 311 dest = svc->scheduler->schedule(svc, skb);
282 if (!dest) { 312 if (!dest) {
283 IP_VS_DBG(1, "p-schedule: no dest found.\n"); 313 IP_VS_DBG(1, "p-schedule: no dest found.\n");
284 kfree(param.pe_data); 314 kfree(param.pe_data);
315 *ignored = 0;
285 return NULL; 316 return NULL;
286 } 317 }
287 318
288 if (ports[1] == svc->port && svc->port != FTPPORT) 319 if (dst_port == svc->port && svc->port != FTPPORT)
289 dport = dest->port; 320 dport = dest->port;
290 321
291 /* Create a template 322 /* Create a template
@@ -293,9 +324,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
293 * and thus param.pe_data will be destroyed 324 * and thus param.pe_data will be destroyed
294 * when the template expires */ 325 * when the template expires */
295 ct = ip_vs_conn_new(&param, &dest->addr, dport, 326 ct = ip_vs_conn_new(&param, &dest->addr, dport,
296 IP_VS_CONN_F_TEMPLATE, dest); 327 IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
297 if (ct == NULL) { 328 if (ct == NULL) {
298 kfree(param.pe_data); 329 kfree(param.pe_data);
330 *ignored = -1;
299 return NULL; 331 return NULL;
300 } 332 }
301 333
@@ -306,7 +338,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
306 kfree(param.pe_data); 338 kfree(param.pe_data);
307 } 339 }
308 340
309 dport = ports[1]; 341 dport = dst_port;
310 if (dport == svc->port && dest->port) 342 if (dport == svc->port && dest->port)
311 dport = dest->port; 343 dport = dest->port;
312 344
@@ -317,11 +349,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
317 /* 349 /*
318 * Create a new connection according to the template 350 * Create a new connection according to the template
319 */ 351 */
320 ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0], 352 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr,
321 &iph.daddr, ports[1], &param); 353 src_port, &iph.daddr, dst_port, &param);
322 cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest); 354
355 cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
323 if (cp == NULL) { 356 if (cp == NULL) {
324 ip_vs_conn_put(ct); 357 ip_vs_conn_put(ct);
358 *ignored = -1;
325 return NULL; 359 return NULL;
326 } 360 }
327 361
@@ -341,11 +375,27 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
341 * It selects a server according to the virtual service, and 375 * It selects a server according to the virtual service, and
342 * creates a connection entry. 376 * creates a connection entry.
343 * Protocols supported: TCP, UDP 377 * Protocols supported: TCP, UDP
378 *
379 * Usage of *ignored
380 *
381 * 1 : protocol tried to schedule (eg. on SYN), found svc but the
382 * svc/scheduler decides that this packet should be accepted with
383 * NF_ACCEPT because it must not be scheduled.
384 *
385 * 0 : scheduler can not find destination, so try bypass or
386 * return ICMP and then NF_DROP (ip_vs_leave).
387 *
388 * -1 : scheduler tried to schedule but fatal error occurred, eg.
389 * ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param
390 * failure such as missing Call-ID, ENOMEM on skb_linearize
391 * or pe_data. In this case we should return NF_DROP without
392 * any attempts to send ICMP with ip_vs_leave.
344 */ 393 */
345struct ip_vs_conn * 394struct ip_vs_conn *
346ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, 395ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
347 struct ip_vs_protocol *pp, int *ignored) 396 struct ip_vs_proto_data *pd, int *ignored)
348{ 397{
398 struct ip_vs_protocol *pp = pd->pp;
349 struct ip_vs_conn *cp = NULL; 399 struct ip_vs_conn *cp = NULL;
350 struct ip_vs_iphdr iph; 400 struct ip_vs_iphdr iph;
351 struct ip_vs_dest *dest; 401 struct ip_vs_dest *dest;
@@ -371,12 +421,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
371 } 421 }
372 422
373 /* 423 /*
374 * Do not schedule replies from local real server. It is risky 424 * Do not schedule replies from local real server.
375 * for fwmark services but mostly for persistent services.
376 */ 425 */
377 if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && 426 if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
378 (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) && 427 (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) {
379 (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
380 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, 428 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
381 "Not scheduling reply for existing connection"); 429 "Not scheduling reply for existing connection");
382 __ip_vs_conn_put(cp); 430 __ip_vs_conn_put(cp);
@@ -386,10 +434,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
386 /* 434 /*
387 * Persistent service 435 * Persistent service
388 */ 436 */
389 if (svc->flags & IP_VS_SVC_F_PERSISTENT) { 437 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
390 *ignored = 0; 438 return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored);
391 return ip_vs_sched_persist(svc, skb, pptr); 439
392 } 440 *ignored = 0;
393 441
394 /* 442 /*
395 * Non-persistent service 443 * Non-persistent service
@@ -402,8 +450,6 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
402 return NULL; 450 return NULL;
403 } 451 }
404 452
405 *ignored = 0;
406
407 dest = svc->scheduler->schedule(svc, skb); 453 dest = svc->scheduler->schedule(svc, skb);
408 if (dest == NULL) { 454 if (dest == NULL) {
409 IP_VS_DBG(1, "Schedule: no dest found.\n"); 455 IP_VS_DBG(1, "Schedule: no dest found.\n");
@@ -419,13 +465,17 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
419 */ 465 */
420 { 466 {
421 struct ip_vs_conn_param p; 467 struct ip_vs_conn_param p;
422 ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, 468
423 pptr[0], &iph.daddr, pptr[1], &p); 469 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
470 &iph.saddr, pptr[0], &iph.daddr, pptr[1],
471 &p);
424 cp = ip_vs_conn_new(&p, &dest->addr, 472 cp = ip_vs_conn_new(&p, &dest->addr,
425 dest->port ? dest->port : pptr[1], 473 dest->port ? dest->port : pptr[1],
426 flags, dest); 474 flags, dest, skb->mark);
427 if (!cp) 475 if (!cp) {
476 *ignored = -1;
428 return NULL; 477 return NULL;
478 }
429 } 479 }
430 480
431 IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u " 481 IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
@@ -447,11 +497,16 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
447 * no destination is available for a new connection. 497 * no destination is available for a new connection.
448 */ 498 */
449int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, 499int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
450 struct ip_vs_protocol *pp) 500 struct ip_vs_proto_data *pd)
451{ 501{
452 __be16 _ports[2], *pptr; 502 __be16 _ports[2], *pptr;
453 struct ip_vs_iphdr iph; 503 struct ip_vs_iphdr iph;
504#ifdef CONFIG_SYSCTL
505 struct net *net;
506 struct netns_ipvs *ipvs;
454 int unicast; 507 int unicast;
508#endif
509
455 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 510 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
456 511
457 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); 512 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
@@ -460,17 +515,21 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
460 return NF_DROP; 515 return NF_DROP;
461 } 516 }
462 517
518#ifdef CONFIG_SYSCTL
519 net = skb_net(skb);
520
463#ifdef CONFIG_IP_VS_IPV6 521#ifdef CONFIG_IP_VS_IPV6
464 if (svc->af == AF_INET6) 522 if (svc->af == AF_INET6)
465 unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST; 523 unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
466 else 524 else
467#endif 525#endif
468 unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST); 526 unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST);
469 527
470 /* if it is fwmark-based service, the cache_bypass sysctl is up 528 /* if it is fwmark-based service, the cache_bypass sysctl is up
471 and the destination is a non-local unicast, then create 529 and the destination is a non-local unicast, then create
472 a cache_bypass connection entry */ 530 a cache_bypass connection entry */
473 if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { 531 ipvs = net_ipvs(net);
532 if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
474 int ret, cs; 533 int ret, cs;
475 struct ip_vs_conn *cp; 534 struct ip_vs_conn *cp;
476 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && 535 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
@@ -484,12 +543,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
484 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); 543 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
485 { 544 {
486 struct ip_vs_conn_param p; 545 struct ip_vs_conn_param p;
487 ip_vs_conn_fill_param(svc->af, iph.protocol, 546 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
488 &iph.saddr, pptr[0], 547 &iph.saddr, pptr[0],
489 &iph.daddr, pptr[1], &p); 548 &iph.daddr, pptr[1], &p);
490 cp = ip_vs_conn_new(&p, &daddr, 0, 549 cp = ip_vs_conn_new(&p, &daddr, 0,
491 IP_VS_CONN_F_BYPASS | flags, 550 IP_VS_CONN_F_BYPASS | flags,
492 NULL); 551 NULL, skb->mark);
493 if (!cp) 552 if (!cp)
494 return NF_DROP; 553 return NF_DROP;
495 } 554 }
@@ -498,16 +557,17 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
498 ip_vs_in_stats(cp, skb); 557 ip_vs_in_stats(cp, skb);
499 558
500 /* set state */ 559 /* set state */
501 cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); 560 cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
502 561
503 /* transmit the first SYN packet */ 562 /* transmit the first SYN packet */
504 ret = cp->packet_xmit(skb, cp, pp); 563 ret = cp->packet_xmit(skb, cp, pd->pp);
505 /* do not touch skb anymore */ 564 /* do not touch skb anymore */
506 565
507 atomic_inc(&cp->in_pkts); 566 atomic_inc(&cp->in_pkts);
508 ip_vs_conn_put(cp); 567 ip_vs_conn_put(cp);
509 return ret; 568 return ret;
510 } 569 }
570#endif
511 571
512 /* 572 /*
513 * When the virtual ftp service is presented, packets destined 573 * When the virtual ftp service is presented, packets destined
@@ -544,6 +604,33 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
544 return NF_DROP; 604 return NF_DROP;
545} 605}
546 606
607#ifdef CONFIG_SYSCTL
608
609static int sysctl_snat_reroute(struct sk_buff *skb)
610{
611 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
612 return ipvs->sysctl_snat_reroute;
613}
614
615static int sysctl_nat_icmp_send(struct net *net)
616{
617 struct netns_ipvs *ipvs = net_ipvs(net);
618 return ipvs->sysctl_nat_icmp_send;
619}
620
621static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs)
622{
623 return ipvs->sysctl_expire_nodest_conn;
624}
625
626#else
627
628static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; }
629static int sysctl_nat_icmp_send(struct net *net) { return 0; }
630static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) { return 0; }
631
632#endif
633
547__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) 634__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
548{ 635{
549 return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); 636 return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
@@ -576,6 +663,22 @@ static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
576} 663}
577#endif 664#endif
578 665
666static int ip_vs_route_me_harder(int af, struct sk_buff *skb)
667{
668#ifdef CONFIG_IP_VS_IPV6
669 if (af == AF_INET6) {
670 if (sysctl_snat_reroute(skb) && ip6_route_me_harder(skb) != 0)
671 return 1;
672 } else
673#endif
674 if ((sysctl_snat_reroute(skb) ||
675 skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
676 ip_route_me_harder(skb, RTN_LOCAL) != 0)
677 return 1;
678
679 return 0;
680}
681
579/* 682/*
580 * Packet has been made sufficiently writable in caller 683 * Packet has been made sufficiently writable in caller
581 * - inout: 1=in->out, 0=out->in 684 * - inout: 1=in->out, 0=out->in
@@ -674,7 +777,7 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
674#endif 777#endif
675 778
676/* Handle relevant response ICMP messages - forward to the right 779/* Handle relevant response ICMP messages - forward to the right
677 * destination host. Used for NAT and local client. 780 * destination host.
678 */ 781 */
679static int handle_response_icmp(int af, struct sk_buff *skb, 782static int handle_response_icmp(int af, struct sk_buff *skb,
680 union nf_inet_addr *snet, 783 union nf_inet_addr *snet,
@@ -710,16 +813,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
710#endif 813#endif
711 ip_vs_nat_icmp(skb, pp, cp, 1); 814 ip_vs_nat_icmp(skb, pp, cp, 1);
712 815
713#ifdef CONFIG_IP_VS_IPV6 816 if (ip_vs_route_me_harder(af, skb))
714 if (af == AF_INET6) { 817 goto out;
715 if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
716 goto out;
717 } else
718#endif
719 if ((sysctl_ip_vs_snat_reroute ||
720 skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
721 ip_route_me_harder(skb, RTN_LOCAL) != 0)
722 goto out;
723 818
724 /* do the statistics and put it back */ 819 /* do the statistics and put it back */
725 ip_vs_out_stats(cp, skb); 820 ip_vs_out_stats(cp, skb);
@@ -808,7 +903,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
808 903
809 ip_vs_fill_iphdr(AF_INET, cih, &ciph); 904 ip_vs_fill_iphdr(AF_INET, cih, &ciph);
810 /* The embedded headers contain source and dest in reverse order */ 905 /* The embedded headers contain source and dest in reverse order */
811 cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); 906 cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
812 if (!cp) 907 if (!cp)
813 return NF_ACCEPT; 908 return NF_ACCEPT;
814 909
@@ -885,7 +980,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
885 980
886 ip_vs_fill_iphdr(AF_INET6, cih, &ciph); 981 ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
887 /* The embedded headers contain source and dest in reverse order */ 982 /* The embedded headers contain source and dest in reverse order */
888 cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); 983 cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
889 if (!cp) 984 if (!cp)
890 return NF_ACCEPT; 985 return NF_ACCEPT;
891 986
@@ -921,12 +1016,13 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
921} 1016}
922 1017
923/* Handle response packets: rewrite addresses and send away... 1018/* Handle response packets: rewrite addresses and send away...
924 * Used for NAT and local client.
925 */ 1019 */
926static unsigned int 1020static unsigned int
927handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 1021handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
928 struct ip_vs_conn *cp, int ihl) 1022 struct ip_vs_conn *cp, int ihl)
929{ 1023{
1024 struct ip_vs_protocol *pp = pd->pp;
1025
930 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); 1026 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
931 1027
932 if (!skb_make_writable(skb, ihl)) 1028 if (!skb_make_writable(skb, ihl))
@@ -961,21 +1057,13 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
961 * if it came from this machine itself. So re-compute 1057 * if it came from this machine itself. So re-compute
962 * the routing information. 1058 * the routing information.
963 */ 1059 */
964#ifdef CONFIG_IP_VS_IPV6 1060 if (ip_vs_route_me_harder(af, skb))
965 if (af == AF_INET6) { 1061 goto drop;
966 if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
967 goto drop;
968 } else
969#endif
970 if ((sysctl_ip_vs_snat_reroute ||
971 skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
972 ip_route_me_harder(skb, RTN_LOCAL) != 0)
973 goto drop;
974 1062
975 IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); 1063 IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
976 1064
977 ip_vs_out_stats(cp, skb); 1065 ip_vs_out_stats(cp, skb);
978 ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); 1066 ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);
979 skb->ipvs_property = 1; 1067 skb->ipvs_property = 1;
980 if (!(cp->flags & IP_VS_CONN_F_NFCT)) 1068 if (!(cp->flags & IP_VS_CONN_F_NFCT))
981 ip_vs_notrack(skb); 1069 ip_vs_notrack(skb);
@@ -999,8 +1087,10 @@ drop:
999static unsigned int 1087static unsigned int
1000ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) 1088ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1001{ 1089{
1090 struct net *net = NULL;
1002 struct ip_vs_iphdr iph; 1091 struct ip_vs_iphdr iph;
1003 struct ip_vs_protocol *pp; 1092 struct ip_vs_protocol *pp;
1093 struct ip_vs_proto_data *pd;
1004 struct ip_vs_conn *cp; 1094 struct ip_vs_conn *cp;
1005 1095
1006 EnterFunction(11); 1096 EnterFunction(11);
@@ -1022,6 +1112,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1022 if (unlikely(!skb_dst(skb))) 1112 if (unlikely(!skb_dst(skb)))
1023 return NF_ACCEPT; 1113 return NF_ACCEPT;
1024 1114
1115 net = skb_net(skb);
1025 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1116 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1026#ifdef CONFIG_IP_VS_IPV6 1117#ifdef CONFIG_IP_VS_IPV6
1027 if (af == AF_INET6) { 1118 if (af == AF_INET6) {
@@ -1045,9 +1136,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1045 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1136 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1046 } 1137 }
1047 1138
1048 pp = ip_vs_proto_get(iph.protocol); 1139 pd = ip_vs_proto_data_get(net, iph.protocol);
1049 if (unlikely(!pp)) 1140 if (unlikely(!pd))
1050 return NF_ACCEPT; 1141 return NF_ACCEPT;
1142 pp = pd->pp;
1051 1143
1052 /* reassemble IP fragments */ 1144 /* reassemble IP fragments */
1053#ifdef CONFIG_IP_VS_IPV6 1145#ifdef CONFIG_IP_VS_IPV6
@@ -1073,11 +1165,11 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1073 /* 1165 /*
1074 * Check if the packet belongs to an existing entry 1166 * Check if the packet belongs to an existing entry
1075 */ 1167 */
1076 cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); 1168 cp = pp->conn_out_get(af, skb, &iph, iph.len, 0);
1077 1169
1078 if (likely(cp)) 1170 if (likely(cp))
1079 return handle_response(af, skb, pp, cp, iph.len); 1171 return handle_response(af, skb, pd, cp, iph.len);
1080 if (sysctl_ip_vs_nat_icmp_send && 1172 if (sysctl_nat_icmp_send(net) &&
1081 (pp->protocol == IPPROTO_TCP || 1173 (pp->protocol == IPPROTO_TCP ||
1082 pp->protocol == IPPROTO_UDP || 1174 pp->protocol == IPPROTO_UDP ||
1083 pp->protocol == IPPROTO_SCTP)) { 1175 pp->protocol == IPPROTO_SCTP)) {
@@ -1087,7 +1179,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1087 sizeof(_ports), _ports); 1179 sizeof(_ports), _ports);
1088 if (pptr == NULL) 1180 if (pptr == NULL)
1089 return NF_ACCEPT; /* Not for me */ 1181 return NF_ACCEPT; /* Not for me */
1090 if (ip_vs_lookup_real_service(af, iph.protocol, 1182 if (ip_vs_lookup_real_service(net, af, iph.protocol,
1091 &iph.saddr, 1183 &iph.saddr,
1092 pptr[0])) { 1184 pptr[0])) {
1093 /* 1185 /*
@@ -1202,14 +1294,15 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
1202static int 1294static int
1203ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) 1295ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1204{ 1296{
1297 struct net *net = NULL;
1205 struct iphdr *iph; 1298 struct iphdr *iph;
1206 struct icmphdr _icmph, *ic; 1299 struct icmphdr _icmph, *ic;
1207 struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ 1300 struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
1208 struct ip_vs_iphdr ciph; 1301 struct ip_vs_iphdr ciph;
1209 struct ip_vs_conn *cp; 1302 struct ip_vs_conn *cp;
1210 struct ip_vs_protocol *pp; 1303 struct ip_vs_protocol *pp;
1304 struct ip_vs_proto_data *pd;
1211 unsigned int offset, ihl, verdict; 1305 unsigned int offset, ihl, verdict;
1212 union nf_inet_addr snet;
1213 1306
1214 *related = 1; 1307 *related = 1;
1215 1308
@@ -1249,9 +1342,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1249 if (cih == NULL) 1342 if (cih == NULL)
1250 return NF_ACCEPT; /* The packet looks wrong, ignore */ 1343 return NF_ACCEPT; /* The packet looks wrong, ignore */
1251 1344
1252 pp = ip_vs_proto_get(cih->protocol); 1345 net = skb_net(skb);
1253 if (!pp) 1346 pd = ip_vs_proto_data_get(net, cih->protocol);
1347 if (!pd)
1254 return NF_ACCEPT; 1348 return NF_ACCEPT;
1349 pp = pd->pp;
1255 1350
1256 /* Is the embedded protocol header present? */ 1351 /* Is the embedded protocol header present? */
1257 if (unlikely(cih->frag_off & htons(IP_OFFSET) && 1352 if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
@@ -1265,18 +1360,9 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1265 1360
1266 ip_vs_fill_iphdr(AF_INET, cih, &ciph); 1361 ip_vs_fill_iphdr(AF_INET, cih, &ciph);
1267 /* The embedded headers contain source and dest in reverse order */ 1362 /* The embedded headers contain source and dest in reverse order */
1268 cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1); 1363 cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1);
1269 if (!cp) { 1364 if (!cp)
1270 /* The packet could also belong to a local client */
1271 cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
1272 if (cp) {
1273 snet.ip = iph->saddr;
1274 return handle_response_icmp(AF_INET, skb, &snet,
1275 cih->protocol, cp, pp,
1276 offset, ihl);
1277 }
1278 return NF_ACCEPT; 1365 return NF_ACCEPT;
1279 }
1280 1366
1281 verdict = NF_DROP; 1367 verdict = NF_DROP;
1282 1368
@@ -1312,6 +1398,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1312static int 1398static int
1313ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) 1399ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1314{ 1400{
1401 struct net *net = NULL;
1315 struct ipv6hdr *iph; 1402 struct ipv6hdr *iph;
1316 struct icmp6hdr _icmph, *ic; 1403 struct icmp6hdr _icmph, *ic;
1317 struct ipv6hdr _ciph, *cih; /* The ip header contained 1404 struct ipv6hdr _ciph, *cih; /* The ip header contained
@@ -1319,8 +1406,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1319 struct ip_vs_iphdr ciph; 1406 struct ip_vs_iphdr ciph;
1320 struct ip_vs_conn *cp; 1407 struct ip_vs_conn *cp;
1321 struct ip_vs_protocol *pp; 1408 struct ip_vs_protocol *pp;
1409 struct ip_vs_proto_data *pd;
1322 unsigned int offset, verdict; 1410 unsigned int offset, verdict;
1323 union nf_inet_addr snet;
1324 struct rt6_info *rt; 1411 struct rt6_info *rt;
1325 1412
1326 *related = 1; 1413 *related = 1;
@@ -1361,9 +1448,11 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1361 if (cih == NULL) 1448 if (cih == NULL)
1362 return NF_ACCEPT; /* The packet looks wrong, ignore */ 1449 return NF_ACCEPT; /* The packet looks wrong, ignore */
1363 1450
1364 pp = ip_vs_proto_get(cih->nexthdr); 1451 net = skb_net(skb);
1365 if (!pp) 1452 pd = ip_vs_proto_data_get(net, cih->nexthdr);
1453 if (!pd)
1366 return NF_ACCEPT; 1454 return NF_ACCEPT;
1455 pp = pd->pp;
1367 1456
1368 /* Is the embedded protocol header present? */ 1457 /* Is the embedded protocol header present? */
1369 /* TODO: we don't support fragmentation at the moment anyways */ 1458 /* TODO: we don't support fragmentation at the moment anyways */
@@ -1377,19 +1466,9 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1377 1466
1378 ip_vs_fill_iphdr(AF_INET6, cih, &ciph); 1467 ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
1379 /* The embedded headers contain source and dest in reverse order */ 1468 /* The embedded headers contain source and dest in reverse order */
1380 cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1); 1469 cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1);
1381 if (!cp) { 1470 if (!cp)
1382 /* The packet could also belong to a local client */
1383 cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
1384 if (cp) {
1385 ipv6_addr_copy(&snet.in6, &iph->saddr);
1386 return handle_response_icmp(AF_INET6, skb, &snet,
1387 cih->nexthdr,
1388 cp, pp, offset,
1389 sizeof(struct ipv6hdr));
1390 }
1391 return NF_ACCEPT; 1471 return NF_ACCEPT;
1392 }
1393 1472
1394 verdict = NF_DROP; 1473 verdict = NF_DROP;
1395 1474
@@ -1423,10 +1502,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1423static unsigned int 1502static unsigned int
1424ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) 1503ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1425{ 1504{
1505 struct net *net;
1426 struct ip_vs_iphdr iph; 1506 struct ip_vs_iphdr iph;
1427 struct ip_vs_protocol *pp; 1507 struct ip_vs_protocol *pp;
1508 struct ip_vs_proto_data *pd;
1428 struct ip_vs_conn *cp; 1509 struct ip_vs_conn *cp;
1429 int ret, restart, pkts; 1510 int ret, restart, pkts;
1511 struct netns_ipvs *ipvs;
1430 1512
1431 /* Already marked as IPVS request or reply? */ 1513 /* Already marked as IPVS request or reply? */
1432 if (skb->ipvs_property) 1514 if (skb->ipvs_property)
@@ -1480,20 +1562,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1480 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1562 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1481 } 1563 }
1482 1564
1565 net = skb_net(skb);
1483 /* Protocol supported? */ 1566 /* Protocol supported? */
1484 pp = ip_vs_proto_get(iph.protocol); 1567 pd = ip_vs_proto_data_get(net, iph.protocol);
1485 if (unlikely(!pp)) 1568 if (unlikely(!pd))
1486 return NF_ACCEPT; 1569 return NF_ACCEPT;
1487 1570 pp = pd->pp;
1488 /* 1571 /*
1489 * Check if the packet belongs to an existing connection entry 1572 * Check if the packet belongs to an existing connection entry
1490 */ 1573 */
1491 cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0); 1574 cp = pp->conn_in_get(af, skb, &iph, iph.len, 0);
1492 1575
1493 if (unlikely(!cp)) { 1576 if (unlikely(!cp)) {
1494 int v; 1577 int v;
1495 1578
1496 if (!pp->conn_schedule(af, skb, pp, &v, &cp)) 1579 if (!pp->conn_schedule(af, skb, pd, &v, &cp))
1497 return v; 1580 return v;
1498 } 1581 }
1499 1582
@@ -1505,12 +1588,13 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1505 } 1588 }
1506 1589
1507 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); 1590 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
1508 1591 net = skb_net(skb);
1592 ipvs = net_ipvs(net);
1509 /* Check the server status */ 1593 /* Check the server status */
1510 if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { 1594 if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
1511 /* the destination server is not available */ 1595 /* the destination server is not available */
1512 1596
1513 if (sysctl_ip_vs_expire_nodest_conn) { 1597 if (sysctl_expire_nodest_conn(ipvs)) {
1514 /* try to expire the connection immediately */ 1598 /* try to expire the connection immediately */
1515 ip_vs_conn_expire_now(cp); 1599 ip_vs_conn_expire_now(cp);
1516 } 1600 }
@@ -1521,7 +1605,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1521 } 1605 }
1522 1606
1523 ip_vs_in_stats(cp, skb); 1607 ip_vs_in_stats(cp, skb);
1524 restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); 1608 restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
1525 if (cp->packet_xmit) 1609 if (cp->packet_xmit)
1526 ret = cp->packet_xmit(skb, cp, pp); 1610 ret = cp->packet_xmit(skb, cp, pp);
1527 /* do not touch skb anymore */ 1611 /* do not touch skb anymore */
@@ -1535,35 +1619,41 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1535 * 1619 *
1536 * Sync connection if it is about to close to 1620 * Sync connection if it is about to close to
1537 * encorage the standby servers to update the connections timeout 1621 * encorage the standby servers to update the connections timeout
1622 *
1623 * For ONE_PKT let ip_vs_sync_conn() do the filter work.
1538 */ 1624 */
1539 pkts = atomic_add_return(1, &cp->in_pkts); 1625
1540 if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) && 1626 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
1627 pkts = sysctl_sync_threshold(ipvs);
1628 else
1629 pkts = atomic_add_return(1, &cp->in_pkts);
1630
1631 if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
1541 cp->protocol == IPPROTO_SCTP) { 1632 cp->protocol == IPPROTO_SCTP) {
1542 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && 1633 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
1543 (pkts % sysctl_ip_vs_sync_threshold[1] 1634 (pkts % sysctl_sync_period(ipvs)
1544 == sysctl_ip_vs_sync_threshold[0])) || 1635 == sysctl_sync_threshold(ipvs))) ||
1545 (cp->old_state != cp->state && 1636 (cp->old_state != cp->state &&
1546 ((cp->state == IP_VS_SCTP_S_CLOSED) || 1637 ((cp->state == IP_VS_SCTP_S_CLOSED) ||
1547 (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) || 1638 (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
1548 (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) { 1639 (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
1549 ip_vs_sync_conn(cp); 1640 ip_vs_sync_conn(net, cp);
1550 goto out; 1641 goto out;
1551 } 1642 }
1552 } 1643 }
1553 1644
1554 /* Keep this block last: TCP and others with pp->num_states <= 1 */ 1645 /* Keep this block last: TCP and others with pp->num_states <= 1 */
1555 else if (af == AF_INET && 1646 else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
1556 (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
1557 (((cp->protocol != IPPROTO_TCP || 1647 (((cp->protocol != IPPROTO_TCP ||
1558 cp->state == IP_VS_TCP_S_ESTABLISHED) && 1648 cp->state == IP_VS_TCP_S_ESTABLISHED) &&
1559 (pkts % sysctl_ip_vs_sync_threshold[1] 1649 (pkts % sysctl_sync_period(ipvs)
1560 == sysctl_ip_vs_sync_threshold[0])) || 1650 == sysctl_sync_threshold(ipvs))) ||
1561 ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && 1651 ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
1562 ((cp->state == IP_VS_TCP_S_FIN_WAIT) || 1652 ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
1563 (cp->state == IP_VS_TCP_S_CLOSE) || 1653 (cp->state == IP_VS_TCP_S_CLOSE) ||
1564 (cp->state == IP_VS_TCP_S_CLOSE_WAIT) || 1654 (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
1565 (cp->state == IP_VS_TCP_S_TIME_WAIT))))) 1655 (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
1566 ip_vs_sync_conn(cp); 1656 ip_vs_sync_conn(net, cp);
1567out: 1657out:
1568 cp->old_state = cp->state; 1658 cp->old_state = cp->state;
1569 1659
@@ -1782,7 +1872,39 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1782 }, 1872 },
1783#endif 1873#endif
1784}; 1874};
1875/*
1876 * Initialize IP Virtual Server netns mem.
1877 */
1878static int __net_init __ip_vs_init(struct net *net)
1879{
1880 struct netns_ipvs *ipvs;
1881
1882 ipvs = net_generic(net, ip_vs_net_id);
1883 if (ipvs == NULL) {
1884 pr_err("%s(): no memory.\n", __func__);
1885 return -ENOMEM;
1886 }
1887 ipvs->net = net;
1888 /* Counters used for creating unique names */
1889 ipvs->gen = atomic_read(&ipvs_netns_cnt);
1890 atomic_inc(&ipvs_netns_cnt);
1891 net->ipvs = ipvs;
1892 printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n",
1893 sizeof(struct netns_ipvs), ipvs->gen);
1894 return 0;
1895}
1785 1896
1897static void __net_exit __ip_vs_cleanup(struct net *net)
1898{
1899 IP_VS_DBG(10, "ipvs netns %d released\n", net_ipvs(net)->gen);
1900}
1901
1902static struct pernet_operations ipvs_core_ops = {
1903 .init = __ip_vs_init,
1904 .exit = __ip_vs_cleanup,
1905 .id = &ip_vs_net_id,
1906 .size = sizeof(struct netns_ipvs),
1907};
1786 1908
1787/* 1909/*
1788 * Initialize IP Virtual Server 1910 * Initialize IP Virtual Server
@@ -1791,8 +1913,11 @@ static int __init ip_vs_init(void)
1791{ 1913{
1792 int ret; 1914 int ret;
1793 1915
1794 ip_vs_estimator_init(); 1916 ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */
1917 if (ret < 0)
1918 return ret;
1795 1919
1920 ip_vs_estimator_init();
1796 ret = ip_vs_control_init(); 1921 ret = ip_vs_control_init();
1797 if (ret < 0) { 1922 if (ret < 0) {
1798 pr_err("can't setup control.\n"); 1923 pr_err("can't setup control.\n");
@@ -1813,15 +1938,23 @@ static int __init ip_vs_init(void)
1813 goto cleanup_app; 1938 goto cleanup_app;
1814 } 1939 }
1815 1940
1941 ret = ip_vs_sync_init();
1942 if (ret < 0) {
1943 pr_err("can't setup sync data.\n");
1944 goto cleanup_conn;
1945 }
1946
1816 ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); 1947 ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
1817 if (ret < 0) { 1948 if (ret < 0) {
1818 pr_err("can't register hooks.\n"); 1949 pr_err("can't register hooks.\n");
1819 goto cleanup_conn; 1950 goto cleanup_sync;
1820 } 1951 }
1821 1952
1822 pr_info("ipvs loaded.\n"); 1953 pr_info("ipvs loaded.\n");
1823 return ret; 1954 return ret;
1824 1955
1956cleanup_sync:
1957 ip_vs_sync_cleanup();
1825 cleanup_conn: 1958 cleanup_conn:
1826 ip_vs_conn_cleanup(); 1959 ip_vs_conn_cleanup();
1827 cleanup_app: 1960 cleanup_app:
@@ -1831,17 +1964,20 @@ static int __init ip_vs_init(void)
1831 ip_vs_control_cleanup(); 1964 ip_vs_control_cleanup();
1832 cleanup_estimator: 1965 cleanup_estimator:
1833 ip_vs_estimator_cleanup(); 1966 ip_vs_estimator_cleanup();
1967 unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */
1834 return ret; 1968 return ret;
1835} 1969}
1836 1970
1837static void __exit ip_vs_cleanup(void) 1971static void __exit ip_vs_cleanup(void)
1838{ 1972{
1839 nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); 1973 nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
1974 ip_vs_sync_cleanup();
1840 ip_vs_conn_cleanup(); 1975 ip_vs_conn_cleanup();
1841 ip_vs_app_cleanup(); 1976 ip_vs_app_cleanup();
1842 ip_vs_protocol_cleanup(); 1977 ip_vs_protocol_cleanup();
1843 ip_vs_control_cleanup(); 1978 ip_vs_control_cleanup();
1844 ip_vs_estimator_cleanup(); 1979 ip_vs_estimator_cleanup();
1980 unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */
1845 pr_info("ipvs unloaded.\n"); 1981 pr_info("ipvs unloaded.\n");
1846} 1982}
1847 1983