aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-12-18 12:35:15 -0500
committerPablo Neira Ayuso <pablo@netfilter.org>2011-01-13 06:05:12 -0500
commit255d0dc34068a976550ce555e153c0bfcfec7cc6 (patch)
treee936c3d55eaf144cbc4edf8f9332d8089719d0d4
parentb017900aac4a158b9bf7ffdcb8a369a91115b3e4 (diff)
netfilter: x_table: speedup compat operations
One iptables invocation with 135000 rules takes 35 seconds of cpu time on a recent server, using a 32bit distro and a 64bit kernel. We eventually trigger NMI/RCU watchdog. INFO: rcu_sched_state detected stall on CPU 3 (t=6000 jiffies) COMPAT mode has quadratic behavior and consume 16 bytes of memory per rule. Switch the xt_compat algos to use an array instead of list, and use a binary search to locate an offset in the sorted array. This halves memory need (8 bytes per rule), and removes quadratic behavior [ O(N*N) -> O(N*log2(N)) ] Time of iptables goes from 35 s to 150 ms. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
-rw-r--r--include/linux/netfilter/x_tables.h3
-rw-r--r--net/bridge/netfilter/ebtables.c1
-rw-r--r--net/ipv4/netfilter/arp_tables.c2
-rw-r--r--net/ipv4/netfilter/ip_tables.c2
-rw-r--r--net/ipv6/netfilter/ip6_tables.c2
-rw-r--r--net/netfilter/x_tables.c82
6 files changed, 57 insertions, 35 deletions
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 742bec051440..0f04d985b41c 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -611,8 +611,9 @@ struct _compat_xt_align {
611extern void xt_compat_lock(u_int8_t af); 611extern void xt_compat_lock(u_int8_t af);
612extern void xt_compat_unlock(u_int8_t af); 612extern void xt_compat_unlock(u_int8_t af);
613 613
614extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta); 614extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta);
615extern void xt_compat_flush_offsets(u_int8_t af); 615extern void xt_compat_flush_offsets(u_int8_t af);
616extern void xt_compat_init_offsets(u_int8_t af, unsigned int number);
616extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset); 617extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset);
617 618
618extern int xt_compat_match_offset(const struct xt_match *match); 619extern int xt_compat_match_offset(const struct xt_match *match);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 16df0532d4b9..5f1825df9dca 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1764,6 +1764,7 @@ static int compat_table_info(const struct ebt_table_info *info,
1764 1764
1765 newinfo->entries_size = size; 1765 newinfo->entries_size = size;
1766 1766
1767 xt_compat_init_offsets(AF_INET, info->nentries);
1767 return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, 1768 return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info,
1768 entries, newinfo); 1769 entries, newinfo);
1769} 1770}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3fac340a28d5..47e5178b998b 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -883,6 +883,7 @@ static int compat_table_info(const struct xt_table_info *info,
883 memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); 883 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
884 newinfo->initial_entries = 0; 884 newinfo->initial_entries = 0;
885 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 885 loc_cpu_entry = info->entries[raw_smp_processor_id()];
886 xt_compat_init_offsets(NFPROTO_ARP, info->number);
886 xt_entry_foreach(iter, loc_cpu_entry, info->size) { 887 xt_entry_foreach(iter, loc_cpu_entry, info->size) {
887 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); 888 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
888 if (ret != 0) 889 if (ret != 0)
@@ -1350,6 +1351,7 @@ static int translate_compat_table(const char *name,
1350 duprintf("translate_compat_table: size %u\n", info->size); 1351 duprintf("translate_compat_table: size %u\n", info->size);
1351 j = 0; 1352 j = 0;
1352 xt_compat_lock(NFPROTO_ARP); 1353 xt_compat_lock(NFPROTO_ARP);
1354 xt_compat_init_offsets(NFPROTO_ARP, number);
1353 /* Walk through entries, checking offsets. */ 1355 /* Walk through entries, checking offsets. */
1354 xt_entry_foreach(iter0, entry0, total_size) { 1356 xt_entry_foreach(iter0, entry0, total_size) {
1355 ret = check_compat_entry_size_and_hooks(iter0, info, &size, 1357 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index a846d633b3b6..c5a75d70970f 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1080,6 +1080,7 @@ static int compat_table_info(const struct xt_table_info *info,
1080 memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); 1080 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1081 newinfo->initial_entries = 0; 1081 newinfo->initial_entries = 0;
1082 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 1082 loc_cpu_entry = info->entries[raw_smp_processor_id()];
1083 xt_compat_init_offsets(AF_INET, info->number);
1083 xt_entry_foreach(iter, loc_cpu_entry, info->size) { 1084 xt_entry_foreach(iter, loc_cpu_entry, info->size) {
1084 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); 1085 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
1085 if (ret != 0) 1086 if (ret != 0)
@@ -1681,6 +1682,7 @@ translate_compat_table(struct net *net,
1681 duprintf("translate_compat_table: size %u\n", info->size); 1682 duprintf("translate_compat_table: size %u\n", info->size);
1682 j = 0; 1683 j = 0;
1683 xt_compat_lock(AF_INET); 1684 xt_compat_lock(AF_INET);
1685 xt_compat_init_offsets(AF_INET, number);
1684 /* Walk through entries, checking offsets. */ 1686 /* Walk through entries, checking offsets. */
1685 xt_entry_foreach(iter0, entry0, total_size) { 1687 xt_entry_foreach(iter0, entry0, total_size) {
1686 ret = check_compat_entry_size_and_hooks(iter0, info, &size, 1688 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 455582384ece..0c9973a657fb 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1093,6 +1093,7 @@ static int compat_table_info(const struct xt_table_info *info,
1093 memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); 1093 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1094 newinfo->initial_entries = 0; 1094 newinfo->initial_entries = 0;
1095 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 1095 loc_cpu_entry = info->entries[raw_smp_processor_id()];
1096 xt_compat_init_offsets(AF_INET6, info->number);
1096 xt_entry_foreach(iter, loc_cpu_entry, info->size) { 1097 xt_entry_foreach(iter, loc_cpu_entry, info->size) {
1097 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); 1098 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
1098 if (ret != 0) 1099 if (ret != 0)
@@ -1696,6 +1697,7 @@ translate_compat_table(struct net *net,
1696 duprintf("translate_compat_table: size %u\n", info->size); 1697 duprintf("translate_compat_table: size %u\n", info->size);
1697 j = 0; 1698 j = 0;
1698 xt_compat_lock(AF_INET6); 1699 xt_compat_lock(AF_INET6);
1700 xt_compat_init_offsets(AF_INET6, number);
1699 /* Walk through entries, checking offsets. */ 1701 /* Walk through entries, checking offsets. */
1700 xt_entry_foreach(iter0, entry0, total_size) { 1702 xt_entry_foreach(iter0, entry0, total_size) {
1701 ret = check_compat_entry_size_and_hooks(iter0, info, &size, 1703 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 80463507420e..ee5de3af510a 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -38,9 +38,8 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
38#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) 38#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
39 39
40struct compat_delta { 40struct compat_delta {
41 struct compat_delta *next; 41 unsigned int offset; /* offset in kernel */
42 unsigned int offset; 42 int delta; /* delta in 32bit user land */
43 int delta;
44}; 43};
45 44
46struct xt_af { 45struct xt_af {
@@ -49,7 +48,9 @@ struct xt_af {
49 struct list_head target; 48 struct list_head target;
50#ifdef CONFIG_COMPAT 49#ifdef CONFIG_COMPAT
51 struct mutex compat_mutex; 50 struct mutex compat_mutex;
52 struct compat_delta *compat_offsets; 51 struct compat_delta *compat_tab;
52 unsigned int number; /* number of slots in compat_tab[] */
53 unsigned int cur; /* number of used slots in compat_tab[] */
53#endif 54#endif
54}; 55};
55 56
@@ -414,54 +415,67 @@ int xt_check_match(struct xt_mtchk_param *par,
414EXPORT_SYMBOL_GPL(xt_check_match); 415EXPORT_SYMBOL_GPL(xt_check_match);
415 416
416#ifdef CONFIG_COMPAT 417#ifdef CONFIG_COMPAT
417int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta) 418int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
418{ 419{
419 struct compat_delta *tmp; 420 struct xt_af *xp = &xt[af];
420 421
421 tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL); 422 if (!xp->compat_tab) {
422 if (!tmp) 423 if (!xp->number)
423 return -ENOMEM; 424 return -EINVAL;
425 xp->compat_tab = vmalloc(sizeof(struct compat_delta) * xp->number);
426 if (!xp->compat_tab)
427 return -ENOMEM;
428 xp->cur = 0;
429 }
424 430
425 tmp->offset = offset; 431 if (xp->cur >= xp->number)
426 tmp->delta = delta; 432 return -EINVAL;
427 433
428 if (xt[af].compat_offsets) { 434 if (xp->cur)
429 tmp->next = xt[af].compat_offsets->next; 435 delta += xp->compat_tab[xp->cur - 1].delta;
430 xt[af].compat_offsets->next = tmp; 436 xp->compat_tab[xp->cur].offset = offset;
431 } else { 437 xp->compat_tab[xp->cur].delta = delta;
432 xt[af].compat_offsets = tmp; 438 xp->cur++;
433 tmp->next = NULL;
434 }
435 return 0; 439 return 0;
436} 440}
437EXPORT_SYMBOL_GPL(xt_compat_add_offset); 441EXPORT_SYMBOL_GPL(xt_compat_add_offset);
438 442
439void xt_compat_flush_offsets(u_int8_t af) 443void xt_compat_flush_offsets(u_int8_t af)
440{ 444{
441 struct compat_delta *tmp, *next; 445 if (xt[af].compat_tab) {
442 446 vfree(xt[af].compat_tab);
443 if (xt[af].compat_offsets) { 447 xt[af].compat_tab = NULL;
444 for (tmp = xt[af].compat_offsets; tmp; tmp = next) { 448 xt[af].number = 0;
445 next = tmp->next;
446 kfree(tmp);
447 }
448 xt[af].compat_offsets = NULL;
449 } 449 }
450} 450}
451EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); 451EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
452 452
453int xt_compat_calc_jump(u_int8_t af, unsigned int offset) 453int xt_compat_calc_jump(u_int8_t af, unsigned int offset)
454{ 454{
455 struct compat_delta *tmp; 455 struct compat_delta *tmp = xt[af].compat_tab;
456 int delta; 456 int mid, left = 0, right = xt[af].cur - 1;
457 457
458 for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next) 458 while (left <= right) {
459 if (tmp->offset < offset) 459 mid = (left + right) >> 1;
460 delta += tmp->delta; 460 if (offset > tmp[mid].offset)
461 return delta; 461 left = mid + 1;
462 else if (offset < tmp[mid].offset)
463 right = mid - 1;
464 else
465 return mid ? tmp[mid - 1].delta : 0;
466 }
467 WARN_ON_ONCE(1);
468 return 0;
462} 469}
463EXPORT_SYMBOL_GPL(xt_compat_calc_jump); 470EXPORT_SYMBOL_GPL(xt_compat_calc_jump);
464 471
472void xt_compat_init_offsets(u_int8_t af, unsigned int number)
473{
474 xt[af].number = number;
475 xt[af].cur = 0;
476}
477EXPORT_SYMBOL(xt_compat_init_offsets);
478
465int xt_compat_match_offset(const struct xt_match *match) 479int xt_compat_match_offset(const struct xt_match *match)
466{ 480{
467 u_int16_t csize = match->compatsize ? : match->matchsize; 481 u_int16_t csize = match->compatsize ? : match->matchsize;
@@ -1337,7 +1351,7 @@ static int __init xt_init(void)
1337 mutex_init(&xt[i].mutex); 1351 mutex_init(&xt[i].mutex);
1338#ifdef CONFIG_COMPAT 1352#ifdef CONFIG_COMPAT
1339 mutex_init(&xt[i].compat_mutex); 1353 mutex_init(&xt[i].compat_mutex);
1340 xt[i].compat_offsets = NULL; 1354 xt[i].compat_tab = NULL;
1341#endif 1355#endif
1342 INIT_LIST_HEAD(&xt[i].target); 1356 INIT_LIST_HEAD(&xt[i].target);
1343 INIT_LIST_HEAD(&xt[i].match); 1357 INIT_LIST_HEAD(&xt[i].match);