diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-12-18 12:35:15 -0500 |
---|---|---|
committer | Pablo Neira Ayuso <pablo@netfilter.org> | 2011-01-13 06:05:12 -0500 |
commit | 255d0dc34068a976550ce555e153c0bfcfec7cc6 (patch) | |
tree | e936c3d55eaf144cbc4edf8f9332d8089719d0d4 | |
parent | b017900aac4a158b9bf7ffdcb8a369a91115b3e4 (diff) |
netfilter: x_table: speedup compat operations
One iptables invocation with 135000 rules takes 35 seconds of cpu time
on a recent server, using a 32bit distro and a 64bit kernel.
We eventually trigger NMI/RCU watchdog.
INFO: rcu_sched_state detected stall on CPU 3 (t=6000 jiffies)
COMPAT mode has quadratic behavior and consume 16 bytes of memory per
rule.
Switch the xt_compat algos to use an array instead of list, and use a
binary search to locate an offset in the sorted array.
This halves memory need (8 bytes per rule), and removes quadratic
behavior [ O(N*N) -> O(N*log2(N)) ]
Time of iptables goes from 35 s to 150 ms.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
-rw-r--r-- | include/linux/netfilter/x_tables.h | 3 | ||||
-rw-r--r-- | net/bridge/netfilter/ebtables.c | 1 | ||||
-rw-r--r-- | net/ipv4/netfilter/arp_tables.c | 2 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_tables.c | 2 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6_tables.c | 2 | ||||
-rw-r--r-- | net/netfilter/x_tables.c | 82 |
6 files changed, 57 insertions, 35 deletions
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 742bec051440..0f04d985b41c 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h | |||
@@ -611,8 +611,9 @@ struct _compat_xt_align { | |||
611 | extern void xt_compat_lock(u_int8_t af); | 611 | extern void xt_compat_lock(u_int8_t af); |
612 | extern void xt_compat_unlock(u_int8_t af); | 612 | extern void xt_compat_unlock(u_int8_t af); |
613 | 613 | ||
614 | extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta); | 614 | extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta); |
615 | extern void xt_compat_flush_offsets(u_int8_t af); | 615 | extern void xt_compat_flush_offsets(u_int8_t af); |
616 | extern void xt_compat_init_offsets(u_int8_t af, unsigned int number); | ||
616 | extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset); | 617 | extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset); |
617 | 618 | ||
618 | extern int xt_compat_match_offset(const struct xt_match *match); | 619 | extern int xt_compat_match_offset(const struct xt_match *match); |
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 16df0532d4b9..5f1825df9dca 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c | |||
@@ -1764,6 +1764,7 @@ static int compat_table_info(const struct ebt_table_info *info, | |||
1764 | 1764 | ||
1765 | newinfo->entries_size = size; | 1765 | newinfo->entries_size = size; |
1766 | 1766 | ||
1767 | xt_compat_init_offsets(AF_INET, info->nentries); | ||
1767 | return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, | 1768 | return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, |
1768 | entries, newinfo); | 1769 | entries, newinfo); |
1769 | } | 1770 | } |
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 3fac340a28d5..47e5178b998b 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -883,6 +883,7 @@ static int compat_table_info(const struct xt_table_info *info, | |||
883 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | 883 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); |
884 | newinfo->initial_entries = 0; | 884 | newinfo->initial_entries = 0; |
885 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; | 885 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; |
886 | xt_compat_init_offsets(NFPROTO_ARP, info->number); | ||
886 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { | 887 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { |
887 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); | 888 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); |
888 | if (ret != 0) | 889 | if (ret != 0) |
@@ -1350,6 +1351,7 @@ static int translate_compat_table(const char *name, | |||
1350 | duprintf("translate_compat_table: size %u\n", info->size); | 1351 | duprintf("translate_compat_table: size %u\n", info->size); |
1351 | j = 0; | 1352 | j = 0; |
1352 | xt_compat_lock(NFPROTO_ARP); | 1353 | xt_compat_lock(NFPROTO_ARP); |
1354 | xt_compat_init_offsets(NFPROTO_ARP, number); | ||
1353 | /* Walk through entries, checking offsets. */ | 1355 | /* Walk through entries, checking offsets. */ |
1354 | xt_entry_foreach(iter0, entry0, total_size) { | 1356 | xt_entry_foreach(iter0, entry0, total_size) { |
1355 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, | 1357 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index a846d633b3b6..c5a75d70970f 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -1080,6 +1080,7 @@ static int compat_table_info(const struct xt_table_info *info, | |||
1080 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | 1080 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); |
1081 | newinfo->initial_entries = 0; | 1081 | newinfo->initial_entries = 0; |
1082 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; | 1082 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; |
1083 | xt_compat_init_offsets(AF_INET, info->number); | ||
1083 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { | 1084 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { |
1084 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); | 1085 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); |
1085 | if (ret != 0) | 1086 | if (ret != 0) |
@@ -1681,6 +1682,7 @@ translate_compat_table(struct net *net, | |||
1681 | duprintf("translate_compat_table: size %u\n", info->size); | 1682 | duprintf("translate_compat_table: size %u\n", info->size); |
1682 | j = 0; | 1683 | j = 0; |
1683 | xt_compat_lock(AF_INET); | 1684 | xt_compat_lock(AF_INET); |
1685 | xt_compat_init_offsets(AF_INET, number); | ||
1684 | /* Walk through entries, checking offsets. */ | 1686 | /* Walk through entries, checking offsets. */ |
1685 | xt_entry_foreach(iter0, entry0, total_size) { | 1687 | xt_entry_foreach(iter0, entry0, total_size) { |
1686 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, | 1688 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, |
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 455582384ece..0c9973a657fb 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c | |||
@@ -1093,6 +1093,7 @@ static int compat_table_info(const struct xt_table_info *info, | |||
1093 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | 1093 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); |
1094 | newinfo->initial_entries = 0; | 1094 | newinfo->initial_entries = 0; |
1095 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; | 1095 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; |
1096 | xt_compat_init_offsets(AF_INET6, info->number); | ||
1096 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { | 1097 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { |
1097 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); | 1098 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); |
1098 | if (ret != 0) | 1099 | if (ret != 0) |
@@ -1696,6 +1697,7 @@ translate_compat_table(struct net *net, | |||
1696 | duprintf("translate_compat_table: size %u\n", info->size); | 1697 | duprintf("translate_compat_table: size %u\n", info->size); |
1697 | j = 0; | 1698 | j = 0; |
1698 | xt_compat_lock(AF_INET6); | 1699 | xt_compat_lock(AF_INET6); |
1700 | xt_compat_init_offsets(AF_INET6, number); | ||
1699 | /* Walk through entries, checking offsets. */ | 1701 | /* Walk through entries, checking offsets. */ |
1700 | xt_entry_foreach(iter0, entry0, total_size) { | 1702 | xt_entry_foreach(iter0, entry0, total_size) { |
1701 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, | 1703 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, |
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 80463507420e..ee5de3af510a 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c | |||
@@ -38,9 +38,8 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module"); | |||
38 | #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) | 38 | #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) |
39 | 39 | ||
40 | struct compat_delta { | 40 | struct compat_delta { |
41 | struct compat_delta *next; | 41 | unsigned int offset; /* offset in kernel */ |
42 | unsigned int offset; | 42 | int delta; /* delta in 32bit user land */ |
43 | int delta; | ||
44 | }; | 43 | }; |
45 | 44 | ||
46 | struct xt_af { | 45 | struct xt_af { |
@@ -49,7 +48,9 @@ struct xt_af { | |||
49 | struct list_head target; | 48 | struct list_head target; |
50 | #ifdef CONFIG_COMPAT | 49 | #ifdef CONFIG_COMPAT |
51 | struct mutex compat_mutex; | 50 | struct mutex compat_mutex; |
52 | struct compat_delta *compat_offsets; | 51 | struct compat_delta *compat_tab; |
52 | unsigned int number; /* number of slots in compat_tab[] */ | ||
53 | unsigned int cur; /* number of used slots in compat_tab[] */ | ||
53 | #endif | 54 | #endif |
54 | }; | 55 | }; |
55 | 56 | ||
@@ -414,54 +415,67 @@ int xt_check_match(struct xt_mtchk_param *par, | |||
414 | EXPORT_SYMBOL_GPL(xt_check_match); | 415 | EXPORT_SYMBOL_GPL(xt_check_match); |
415 | 416 | ||
416 | #ifdef CONFIG_COMPAT | 417 | #ifdef CONFIG_COMPAT |
417 | int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta) | 418 | int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta) |
418 | { | 419 | { |
419 | struct compat_delta *tmp; | 420 | struct xt_af *xp = &xt[af]; |
420 | 421 | ||
421 | tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL); | 422 | if (!xp->compat_tab) { |
422 | if (!tmp) | 423 | if (!xp->number) |
423 | return -ENOMEM; | 424 | return -EINVAL; |
425 | xp->compat_tab = vmalloc(sizeof(struct compat_delta) * xp->number); | ||
426 | if (!xp->compat_tab) | ||
427 | return -ENOMEM; | ||
428 | xp->cur = 0; | ||
429 | } | ||
424 | 430 | ||
425 | tmp->offset = offset; | 431 | if (xp->cur >= xp->number) |
426 | tmp->delta = delta; | 432 | return -EINVAL; |
427 | 433 | ||
428 | if (xt[af].compat_offsets) { | 434 | if (xp->cur) |
429 | tmp->next = xt[af].compat_offsets->next; | 435 | delta += xp->compat_tab[xp->cur - 1].delta; |
430 | xt[af].compat_offsets->next = tmp; | 436 | xp->compat_tab[xp->cur].offset = offset; |
431 | } else { | 437 | xp->compat_tab[xp->cur].delta = delta; |
432 | xt[af].compat_offsets = tmp; | 438 | xp->cur++; |
433 | tmp->next = NULL; | ||
434 | } | ||
435 | return 0; | 439 | return 0; |
436 | } | 440 | } |
437 | EXPORT_SYMBOL_GPL(xt_compat_add_offset); | 441 | EXPORT_SYMBOL_GPL(xt_compat_add_offset); |
438 | 442 | ||
439 | void xt_compat_flush_offsets(u_int8_t af) | 443 | void xt_compat_flush_offsets(u_int8_t af) |
440 | { | 444 | { |
441 | struct compat_delta *tmp, *next; | 445 | if (xt[af].compat_tab) { |
442 | 446 | vfree(xt[af].compat_tab); | |
443 | if (xt[af].compat_offsets) { | 447 | xt[af].compat_tab = NULL; |
444 | for (tmp = xt[af].compat_offsets; tmp; tmp = next) { | 448 | xt[af].number = 0; |
445 | next = tmp->next; | ||
446 | kfree(tmp); | ||
447 | } | ||
448 | xt[af].compat_offsets = NULL; | ||
449 | } | 449 | } |
450 | } | 450 | } |
451 | EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); | 451 | EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); |
452 | 452 | ||
453 | int xt_compat_calc_jump(u_int8_t af, unsigned int offset) | 453 | int xt_compat_calc_jump(u_int8_t af, unsigned int offset) |
454 | { | 454 | { |
455 | struct compat_delta *tmp; | 455 | struct compat_delta *tmp = xt[af].compat_tab; |
456 | int delta; | 456 | int mid, left = 0, right = xt[af].cur - 1; |
457 | 457 | ||
458 | for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next) | 458 | while (left <= right) { |
459 | if (tmp->offset < offset) | 459 | mid = (left + right) >> 1; |
460 | delta += tmp->delta; | 460 | if (offset > tmp[mid].offset) |
461 | return delta; | 461 | left = mid + 1; |
462 | else if (offset < tmp[mid].offset) | ||
463 | right = mid - 1; | ||
464 | else | ||
465 | return mid ? tmp[mid - 1].delta : 0; | ||
466 | } | ||
467 | WARN_ON_ONCE(1); | ||
468 | return 0; | ||
462 | } | 469 | } |
463 | EXPORT_SYMBOL_GPL(xt_compat_calc_jump); | 470 | EXPORT_SYMBOL_GPL(xt_compat_calc_jump); |
464 | 471 | ||
472 | void xt_compat_init_offsets(u_int8_t af, unsigned int number) | ||
473 | { | ||
474 | xt[af].number = number; | ||
475 | xt[af].cur = 0; | ||
476 | } | ||
477 | EXPORT_SYMBOL(xt_compat_init_offsets); | ||
478 | |||
465 | int xt_compat_match_offset(const struct xt_match *match) | 479 | int xt_compat_match_offset(const struct xt_match *match) |
466 | { | 480 | { |
467 | u_int16_t csize = match->compatsize ? : match->matchsize; | 481 | u_int16_t csize = match->compatsize ? : match->matchsize; |
@@ -1337,7 +1351,7 @@ static int __init xt_init(void) | |||
1337 | mutex_init(&xt[i].mutex); | 1351 | mutex_init(&xt[i].mutex); |
1338 | #ifdef CONFIG_COMPAT | 1352 | #ifdef CONFIG_COMPAT |
1339 | mutex_init(&xt[i].compat_mutex); | 1353 | mutex_init(&xt[i].compat_mutex); |
1340 | xt[i].compat_offsets = NULL; | 1354 | xt[i].compat_tab = NULL; |
1341 | #endif | 1355 | #endif |
1342 | INIT_LIST_HEAD(&xt[i].target); | 1356 | INIT_LIST_HEAD(&xt[i].target); |
1343 | INIT_LIST_HEAD(&xt[i].match); | 1357 | INIT_LIST_HEAD(&xt[i].match); |