aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorJan Engelhardt <jengelh@medozas.de>2010-04-19 10:05:10 -0400
committerPatrick McHardy <kaber@trash.net>2010-04-19 10:05:10 -0400
commitf3c5c1bfd430858d3a05436f82c51e53104feb6b (patch)
treeada5b570b66e141e79fdb256f69e2541a3d30c04 /net
parente281b19897dc21c1071802808d461627d747a877 (diff)
netfilter: xtables: make ip_tables reentrant
Currently, the table traverser stores return addresses in the ruleset itself (struct ip6t_entry->comefrom). This has a well-known drawback: the jumpstack is overwritten on reentry, making it necessary for targets to return absolute verdicts. Also, the ruleset (which might be heavy memory-wise) needs to be replicated for each CPU that can possibly invoke ip6t_do_table. This patch decouples the jumpstack from struct ip6t_entry and instead puts it into xt_table_info. Not being restricted by 'comefrom' anymore, we can set up a stack as needed. By default, there is room allocated for two entries into the traverser. arp_tables is not touched though, because there is just one/two modules and further patches seek to collapse the table traverser anyhow. Signed-off-by: Jan Engelhardt <jengelh@medozas.de> Signed-off-by: Patrick McHardy <kaber@trash.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/netfilter/arp_tables.c6
-rw-r--r--net/ipv4/netfilter/ip_tables.c65
-rw-r--r--net/ipv6/netfilter/ip6_tables.c56
-rw-r--r--net/netfilter/x_tables.c77
4 files changed, 138 insertions, 66 deletions
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e8e363d90365..07a699059390 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -649,6 +649,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
649 if (ret != 0) 649 if (ret != 0)
650 break; 650 break;
651 ++i; 651 ++i;
652 if (strcmp(arpt_get_target(iter)->u.user.name,
653 XT_ERROR_TARGET) == 0)
654 ++newinfo->stacksize;
652 } 655 }
653 duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); 656 duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
654 if (ret != 0) 657 if (ret != 0)
@@ -1774,8 +1777,7 @@ struct xt_table *arpt_register_table(struct net *net,
1774{ 1777{
1775 int ret; 1778 int ret;
1776 struct xt_table_info *newinfo; 1779 struct xt_table_info *newinfo;
1777 struct xt_table_info bootstrap 1780 struct xt_table_info bootstrap = {0};
1778 = { 0, 0, 0, { 0 }, { 0 }, { } };
1779 void *loc_cpu_entry; 1781 void *loc_cpu_entry;
1780 struct xt_table *new_table; 1782 struct xt_table *new_table;
1781 1783
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 18c5b1573f3e..70900ecf88e2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -321,8 +321,6 @@ ipt_do_table(struct sk_buff *skb,
321 const struct net_device *out, 321 const struct net_device *out,
322 struct xt_table *table) 322 struct xt_table *table)
323{ 323{
324#define tb_comefrom ((struct ipt_entry *)table_base)->comefrom
325
326 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); 324 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
327 const struct iphdr *ip; 325 const struct iphdr *ip;
328 bool hotdrop = false; 326 bool hotdrop = false;
@@ -330,7 +328,8 @@ ipt_do_table(struct sk_buff *skb,
330 unsigned int verdict = NF_DROP; 328 unsigned int verdict = NF_DROP;
331 const char *indev, *outdev; 329 const char *indev, *outdev;
332 const void *table_base; 330 const void *table_base;
333 struct ipt_entry *e, *back; 331 struct ipt_entry *e, **jumpstack;
332 unsigned int *stackptr, origptr, cpu;
334 const struct xt_table_info *private; 333 const struct xt_table_info *private;
335 struct xt_match_param mtpar; 334 struct xt_match_param mtpar;
336 struct xt_target_param tgpar; 335 struct xt_target_param tgpar;
@@ -356,19 +355,23 @@ ipt_do_table(struct sk_buff *skb,
356 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 355 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
357 xt_info_rdlock_bh(); 356 xt_info_rdlock_bh();
358 private = table->private; 357 private = table->private;
359 table_base = private->entries[smp_processor_id()]; 358 cpu = smp_processor_id();
359 table_base = private->entries[cpu];
360 jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
361 stackptr = &private->stackptr[cpu];
362 origptr = *stackptr;
360 363
361 e = get_entry(table_base, private->hook_entry[hook]); 364 e = get_entry(table_base, private->hook_entry[hook]);
362 365
363 /* For return from builtin chain */ 366 pr_devel("Entering %s(hook %u); sp at %u (UF %p)\n",
364 back = get_entry(table_base, private->underflow[hook]); 367 table->name, hook, origptr,
368 get_entry(table_base, private->underflow[hook]));
365 369
366 do { 370 do {
367 const struct ipt_entry_target *t; 371 const struct ipt_entry_target *t;
368 const struct xt_entry_match *ematch; 372 const struct xt_entry_match *ematch;
369 373
370 IP_NF_ASSERT(e); 374 IP_NF_ASSERT(e);
371 IP_NF_ASSERT(back);
372 if (!ip_packet_match(ip, indev, outdev, 375 if (!ip_packet_match(ip, indev, outdev,
373 &e->ip, mtpar.fragoff)) { 376 &e->ip, mtpar.fragoff)) {
374 no_match: 377 no_match:
@@ -403,17 +406,28 @@ ipt_do_table(struct sk_buff *skb,
403 verdict = (unsigned)(-v) - 1; 406 verdict = (unsigned)(-v) - 1;
404 break; 407 break;
405 } 408 }
406 e = back; 409 if (*stackptr == 0) {
407 back = get_entry(table_base, back->comefrom); 410 e = get_entry(table_base,
411 private->underflow[hook]);
412 pr_devel("Underflow (this is normal) "
413 "to %p\n", e);
414 } else {
415 e = jumpstack[--*stackptr];
416 pr_devel("Pulled %p out from pos %u\n",
417 e, *stackptr);
418 e = ipt_next_entry(e);
419 }
408 continue; 420 continue;
409 } 421 }
410 if (table_base + v != ipt_next_entry(e) && 422 if (table_base + v != ipt_next_entry(e) &&
411 !(e->ip.flags & IPT_F_GOTO)) { 423 !(e->ip.flags & IPT_F_GOTO)) {
412 /* Save old back ptr in next entry */ 424 if (*stackptr >= private->stacksize) {
413 struct ipt_entry *next = ipt_next_entry(e); 425 verdict = NF_DROP;
414 next->comefrom = (void *)back - table_base; 426 break;
415 /* set back pointer to next entry */ 427 }
416 back = next; 428 jumpstack[(*stackptr)++] = e;
429 pr_devel("Pushed %p into pos %u\n",
430 e, *stackptr - 1);
417 } 431 }
418 432
419 e = get_entry(table_base, v); 433 e = get_entry(table_base, v);
@@ -426,18 +440,7 @@ ipt_do_table(struct sk_buff *skb,
426 tgpar.targinfo = t->data; 440 tgpar.targinfo = t->data;
427 441
428 442
429#ifdef CONFIG_NETFILTER_DEBUG
430 tb_comefrom = 0xeeeeeeec;
431#endif
432 verdict = t->u.kernel.target->target(skb, &tgpar); 443 verdict = t->u.kernel.target->target(skb, &tgpar);
433#ifdef CONFIG_NETFILTER_DEBUG
434 if (tb_comefrom != 0xeeeeeeec && verdict == IPT_CONTINUE) {
435 printk("Target %s reentered!\n",
436 t->u.kernel.target->name);
437 verdict = NF_DROP;
438 }
439 tb_comefrom = 0x57acc001;
440#endif
441 /* Target might have changed stuff. */ 444 /* Target might have changed stuff. */
442 ip = ip_hdr(skb); 445 ip = ip_hdr(skb);
443 if (verdict == IPT_CONTINUE) 446 if (verdict == IPT_CONTINUE)
@@ -447,7 +450,9 @@ ipt_do_table(struct sk_buff *skb,
447 break; 450 break;
448 } while (!hotdrop); 451 } while (!hotdrop);
449 xt_info_rdunlock_bh(); 452 xt_info_rdunlock_bh();
450 453 pr_devel("Exiting %s; resetting sp from %u to %u\n",
454 __func__, *stackptr, origptr);
455 *stackptr = origptr;
451#ifdef DEBUG_ALLOW_ALL 456#ifdef DEBUG_ALLOW_ALL
452 return NF_ACCEPT; 457 return NF_ACCEPT;
453#else 458#else
@@ -455,8 +460,6 @@ ipt_do_table(struct sk_buff *skb,
455 return NF_DROP; 460 return NF_DROP;
456 else return verdict; 461 else return verdict;
457#endif 462#endif
458
459#undef tb_comefrom
460} 463}
461 464
462/* Figures out from what hook each rule can be called: returns 0 if 465/* Figures out from what hook each rule can be called: returns 0 if
@@ -838,6 +841,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
838 if (ret != 0) 841 if (ret != 0)
839 return ret; 842 return ret;
840 ++i; 843 ++i;
844 if (strcmp(ipt_get_target(iter)->u.user.name,
845 XT_ERROR_TARGET) == 0)
846 ++newinfo->stacksize;
841 } 847 }
842 848
843 if (i != repl->num_entries) { 849 if (i != repl->num_entries) {
@@ -2086,8 +2092,7 @@ struct xt_table *ipt_register_table(struct net *net,
2086{ 2092{
2087 int ret; 2093 int ret;
2088 struct xt_table_info *newinfo; 2094 struct xt_table_info *newinfo;
2089 struct xt_table_info bootstrap 2095 struct xt_table_info bootstrap = {0};
2090 = { 0, 0, 0, { 0 }, { 0 }, { } };
2091 void *loc_cpu_entry; 2096 void *loc_cpu_entry;
2092 struct xt_table *new_table; 2097 struct xt_table *new_table;
2093 2098
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index f2b815e72329..2a2770bcd640 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -351,15 +351,14 @@ ip6t_do_table(struct sk_buff *skb,
351 const struct net_device *out, 351 const struct net_device *out,
352 struct xt_table *table) 352 struct xt_table *table)
353{ 353{
354#define tb_comefrom ((struct ip6t_entry *)table_base)->comefrom
355
356 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); 354 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
357 bool hotdrop = false; 355 bool hotdrop = false;
358 /* Initializing verdict to NF_DROP keeps gcc happy. */ 356 /* Initializing verdict to NF_DROP keeps gcc happy. */
359 unsigned int verdict = NF_DROP; 357 unsigned int verdict = NF_DROP;
360 const char *indev, *outdev; 358 const char *indev, *outdev;
361 const void *table_base; 359 const void *table_base;
362 struct ip6t_entry *e, *back; 360 struct ip6t_entry *e, **jumpstack;
361 unsigned int *stackptr, origptr, cpu;
363 const struct xt_table_info *private; 362 const struct xt_table_info *private;
364 struct xt_match_param mtpar; 363 struct xt_match_param mtpar;
365 struct xt_target_param tgpar; 364 struct xt_target_param tgpar;
@@ -383,19 +382,19 @@ ip6t_do_table(struct sk_buff *skb,
383 382
384 xt_info_rdlock_bh(); 383 xt_info_rdlock_bh();
385 private = table->private; 384 private = table->private;
386 table_base = private->entries[smp_processor_id()]; 385 cpu = smp_processor_id();
386 table_base = private->entries[cpu];
387 jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
388 stackptr = &private->stackptr[cpu];
389 origptr = *stackptr;
387 390
388 e = get_entry(table_base, private->hook_entry[hook]); 391 e = get_entry(table_base, private->hook_entry[hook]);
389 392
390 /* For return from builtin chain */
391 back = get_entry(table_base, private->underflow[hook]);
392
393 do { 393 do {
394 const struct ip6t_entry_target *t; 394 const struct ip6t_entry_target *t;
395 const struct xt_entry_match *ematch; 395 const struct xt_entry_match *ematch;
396 396
397 IP_NF_ASSERT(e); 397 IP_NF_ASSERT(e);
398 IP_NF_ASSERT(back);
399 if (!ip6_packet_match(skb, indev, outdev, &e->ipv6, 398 if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
400 &mtpar.thoff, &mtpar.fragoff, &hotdrop)) { 399 &mtpar.thoff, &mtpar.fragoff, &hotdrop)) {
401 no_match: 400 no_match:
@@ -432,17 +431,20 @@ ip6t_do_table(struct sk_buff *skb,
432 verdict = (unsigned)(-v) - 1; 431 verdict = (unsigned)(-v) - 1;
433 break; 432 break;
434 } 433 }
435 e = back; 434 if (*stackptr == 0)
436 back = get_entry(table_base, back->comefrom); 435 e = get_entry(table_base,
436 private->underflow[hook]);
437 else
438 e = ip6t_next_entry(jumpstack[--*stackptr]);
437 continue; 439 continue;
438 } 440 }
439 if (table_base + v != ip6t_next_entry(e) && 441 if (table_base + v != ip6t_next_entry(e) &&
440 !(e->ipv6.flags & IP6T_F_GOTO)) { 442 !(e->ipv6.flags & IP6T_F_GOTO)) {
441 /* Save old back ptr in next entry */ 443 if (*stackptr >= private->stacksize) {
442 struct ip6t_entry *next = ip6t_next_entry(e); 444 verdict = NF_DROP;
443 next->comefrom = (void *)back - table_base; 445 break;
444 /* set back pointer to next entry */ 446 }
445 back = next; 447 jumpstack[(*stackptr)++] = e;
446 } 448 }
447 449
448 e = get_entry(table_base, v); 450 e = get_entry(table_base, v);
@@ -454,19 +456,7 @@ ip6t_do_table(struct sk_buff *skb,
454 tgpar.target = t->u.kernel.target; 456 tgpar.target = t->u.kernel.target;
455 tgpar.targinfo = t->data; 457 tgpar.targinfo = t->data;
456 458
457#ifdef CONFIG_NETFILTER_DEBUG
458 tb_comefrom = 0xeeeeeeec;
459#endif
460 verdict = t->u.kernel.target->target(skb, &tgpar); 459 verdict = t->u.kernel.target->target(skb, &tgpar);
461
462#ifdef CONFIG_NETFILTER_DEBUG
463 if (tb_comefrom != 0xeeeeeeec && verdict == IP6T_CONTINUE) {
464 printk("Target %s reentered!\n",
465 t->u.kernel.target->name);
466 verdict = NF_DROP;
467 }
468 tb_comefrom = 0x57acc001;
469#endif
470 if (verdict == IP6T_CONTINUE) 460 if (verdict == IP6T_CONTINUE)
471 e = ip6t_next_entry(e); 461 e = ip6t_next_entry(e);
472 else 462 else
@@ -474,10 +464,8 @@ ip6t_do_table(struct sk_buff *skb,
474 break; 464 break;
475 } while (!hotdrop); 465 } while (!hotdrop);
476 466
477#ifdef CONFIG_NETFILTER_DEBUG
478 tb_comefrom = NETFILTER_LINK_POISON;
479#endif
480 xt_info_rdunlock_bh(); 467 xt_info_rdunlock_bh();
468 *stackptr = origptr;
481 469
482#ifdef DEBUG_ALLOW_ALL 470#ifdef DEBUG_ALLOW_ALL
483 return NF_ACCEPT; 471 return NF_ACCEPT;
@@ -486,8 +474,6 @@ ip6t_do_table(struct sk_buff *skb,
486 return NF_DROP; 474 return NF_DROP;
487 else return verdict; 475 else return verdict;
488#endif 476#endif
489
490#undef tb_comefrom
491} 477}
492 478
493/* Figures out from what hook each rule can be called: returns 0 if 479/* Figures out from what hook each rule can be called: returns 0 if
@@ -869,6 +855,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
869 if (ret != 0) 855 if (ret != 0)
870 return ret; 856 return ret;
871 ++i; 857 ++i;
858 if (strcmp(ip6t_get_target(iter)->u.user.name,
859 XT_ERROR_TARGET) == 0)
860 ++newinfo->stacksize;
872 } 861 }
873 862
874 if (i != repl->num_entries) { 863 if (i != repl->num_entries) {
@@ -2120,8 +2109,7 @@ struct xt_table *ip6t_register_table(struct net *net,
2120{ 2109{
2121 int ret; 2110 int ret;
2122 struct xt_table_info *newinfo; 2111 struct xt_table_info *newinfo;
2123 struct xt_table_info bootstrap 2112 struct xt_table_info bootstrap = {0};
2124 = { 0, 0, 0, { 0 }, { 0 }, { } };
2125 void *loc_cpu_entry; 2113 void *loc_cpu_entry;
2126 struct xt_table *new_table; 2114 struct xt_table *new_table;
2127 2115
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 8e23d8f68459..edde5c602890 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -62,6 +62,9 @@ static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
62 [NFPROTO_IPV6] = "ip6", 62 [NFPROTO_IPV6] = "ip6",
63}; 63};
64 64
65/* Allow this many total (re)entries. */
66static const unsigned int xt_jumpstack_multiplier = 2;
67
65/* Registration hooks for targets. */ 68/* Registration hooks for targets. */
66int 69int
67xt_register_target(struct xt_target *target) 70xt_register_target(struct xt_target *target)
@@ -680,6 +683,26 @@ void xt_free_table_info(struct xt_table_info *info)
680 else 683 else
681 vfree(info->entries[cpu]); 684 vfree(info->entries[cpu]);
682 } 685 }
686
687 if (info->jumpstack != NULL) {
688 if (sizeof(void *) * info->stacksize > PAGE_SIZE) {
689 for_each_possible_cpu(cpu)
690 vfree(info->jumpstack[cpu]);
691 } else {
692 for_each_possible_cpu(cpu)
693 kfree(info->jumpstack[cpu]);
694 }
695 }
696
697 if (sizeof(void **) * nr_cpu_ids > PAGE_SIZE)
698 vfree(info->jumpstack);
699 else
700 kfree(info->jumpstack);
701 if (sizeof(unsigned int) * nr_cpu_ids > PAGE_SIZE)
702 vfree(info->stackptr);
703 else
704 kfree(info->stackptr);
705
683 kfree(info); 706 kfree(info);
684} 707}
685EXPORT_SYMBOL(xt_free_table_info); 708EXPORT_SYMBOL(xt_free_table_info);
@@ -724,6 +747,49 @@ EXPORT_SYMBOL_GPL(xt_compat_unlock);
724DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks); 747DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks);
725EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks); 748EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks);
726 749
750static int xt_jumpstack_alloc(struct xt_table_info *i)
751{
752 unsigned int size;
753 int cpu;
754
755 size = sizeof(unsigned int) * nr_cpu_ids;
756 if (size > PAGE_SIZE)
757 i->stackptr = vmalloc(size);
758 else
759 i->stackptr = kmalloc(size, GFP_KERNEL);
760 if (i->stackptr == NULL)
761 return -ENOMEM;
762 memset(i->stackptr, 0, size);
763
764 size = sizeof(void **) * nr_cpu_ids;
765 if (size > PAGE_SIZE)
766 i->jumpstack = vmalloc(size);
767 else
768 i->jumpstack = kmalloc(size, GFP_KERNEL);
769 if (i->jumpstack == NULL)
770 return -ENOMEM;
771 memset(i->jumpstack, 0, size);
772
773 i->stacksize *= xt_jumpstack_multiplier;
774 size = sizeof(void *) * i->stacksize;
775 for_each_possible_cpu(cpu) {
776 if (size > PAGE_SIZE)
777 i->jumpstack[cpu] = vmalloc_node(size,
778 cpu_to_node(cpu));
779 else
780 i->jumpstack[cpu] = kmalloc_node(size,
781 GFP_KERNEL, cpu_to_node(cpu));
782 if (i->jumpstack[cpu] == NULL)
783 /*
784 * Freeing will be done later on by the callers. The
785 * chain is: xt_replace_table -> __do_replace ->
786 * do_replace -> xt_free_table_info.
787 */
788 return -ENOMEM;
789 }
790
791 return 0;
792}
727 793
728struct xt_table_info * 794struct xt_table_info *
729xt_replace_table(struct xt_table *table, 795xt_replace_table(struct xt_table *table,
@@ -732,6 +798,7 @@ xt_replace_table(struct xt_table *table,
732 int *error) 798 int *error)
733{ 799{
734 struct xt_table_info *private; 800 struct xt_table_info *private;
801 int ret;
735 802
736 /* Do the substitution. */ 803 /* Do the substitution. */
737 local_bh_disable(); 804 local_bh_disable();
@@ -746,6 +813,12 @@ xt_replace_table(struct xt_table *table,
746 return NULL; 813 return NULL;
747 } 814 }
748 815
816 ret = xt_jumpstack_alloc(newinfo);
817 if (ret < 0) {
818 *error = ret;
819 return NULL;
820 }
821
749 table->private = newinfo; 822 table->private = newinfo;
750 newinfo->initial_entries = private->initial_entries; 823 newinfo->initial_entries = private->initial_entries;
751 824
@@ -770,6 +843,10 @@ struct xt_table *xt_register_table(struct net *net,
770 struct xt_table_info *private; 843 struct xt_table_info *private;
771 struct xt_table *t, *table; 844 struct xt_table *t, *table;
772 845
846 ret = xt_jumpstack_alloc(newinfo);
847 if (ret < 0)
848 return ERR_PTR(ret);
849
773 /* Don't add one object to multiple lists. */ 850 /* Don't add one object to multiple lists. */
774 table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL); 851 table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL);
775 if (!table) { 852 if (!table) {