diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-04-01 08:35:56 -0400 |
---|---|---|
committer | Patrick McHardy <kaber@trash.net> | 2010-04-01 08:35:56 -0400 |
commit | 02e4eb75912a5c8babccc1acdc9cc913989be04e (patch) | |
tree | 482d37284f058ae8439c47bc57416440caa04f61 /net/netfilter | |
parent | 902a3dd5e6b19048604ec533203d7d38a39505a2 (diff) |
netfilter: xt_hashlimit: RCU conversion
xt_hashlimit uses a central lock per hash table and suffers from
contention on some workloads. (Multiqueue NIC or if RPS is enabled)
After RCU conversion, central lock is only used when a writer wants to
add or delete an entry.
For 'readers', updating an existing entry, they use an individual lock
per entry.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Diffstat (limited to 'net/netfilter')
-rw-r--r-- | net/netfilter/xt_hashlimit.c | 70 |
1 files changed, 47 insertions, 23 deletions
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 5470bb097c4..453178d25cb 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c | |||
@@ -81,12 +81,14 @@ struct dsthash_ent { | |||
81 | struct dsthash_dst dst; | 81 | struct dsthash_dst dst; |
82 | 82 | ||
83 | /* modified structure members in the end */ | 83 | /* modified structure members in the end */ |
84 | spinlock_t lock; | ||
84 | unsigned long expires; /* precalculated expiry time */ | 85 | unsigned long expires; /* precalculated expiry time */ |
85 | struct { | 86 | struct { |
86 | unsigned long prev; /* last modification */ | 87 | unsigned long prev; /* last modification */ |
87 | u_int32_t credit; | 88 | u_int32_t credit; |
88 | u_int32_t credit_cap, cost; | 89 | u_int32_t credit_cap, cost; |
89 | } rateinfo; | 90 | } rateinfo; |
91 | struct rcu_head rcu; | ||
90 | }; | 92 | }; |
91 | 93 | ||
92 | struct xt_hashlimit_htable { | 94 | struct xt_hashlimit_htable { |
@@ -143,9 +145,11 @@ dsthash_find(const struct xt_hashlimit_htable *ht, | |||
143 | u_int32_t hash = hash_dst(ht, dst); | 145 | u_int32_t hash = hash_dst(ht, dst); |
144 | 146 | ||
145 | if (!hlist_empty(&ht->hash[hash])) { | 147 | if (!hlist_empty(&ht->hash[hash])) { |
146 | hlist_for_each_entry(ent, pos, &ht->hash[hash], node) | 148 | hlist_for_each_entry_rcu(ent, pos, &ht->hash[hash], node) |
147 | if (dst_cmp(ent, dst)) | 149 | if (dst_cmp(ent, dst)) { |
150 | spin_lock(&ent->lock); | ||
148 | return ent; | 151 | return ent; |
152 | } | ||
149 | } | 153 | } |
150 | return NULL; | 154 | return NULL; |
151 | } | 155 | } |
@@ -157,9 +161,10 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht, | |||
157 | { | 161 | { |
158 | struct dsthash_ent *ent; | 162 | struct dsthash_ent *ent; |
159 | 163 | ||
164 | spin_lock(&ht->lock); | ||
160 | /* initialize hash with random val at the time we allocate | 165 | /* initialize hash with random val at the time we allocate |
161 | * the first hashtable entry */ | 166 | * the first hashtable entry */ |
162 | if (!ht->rnd_initialized) { | 167 | if (unlikely(!ht->rnd_initialized)) { |
163 | get_random_bytes(&ht->rnd, sizeof(ht->rnd)); | 168 | get_random_bytes(&ht->rnd, sizeof(ht->rnd)); |
164 | ht->rnd_initialized = true; | 169 | ht->rnd_initialized = true; |
165 | } | 170 | } |
@@ -168,27 +173,36 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht, | |||
168 | /* FIXME: do something. question is what.. */ | 173 | /* FIXME: do something. question is what.. */ |
169 | if (net_ratelimit()) | 174 | if (net_ratelimit()) |
170 | pr_err("max count of %u reached\n", ht->cfg.max); | 175 | pr_err("max count of %u reached\n", ht->cfg.max); |
171 | return NULL; | 176 | ent = NULL; |
172 | } | 177 | } else |
173 | 178 | ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC); | |
174 | ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC); | ||
175 | if (!ent) { | 179 | if (!ent) { |
176 | if (net_ratelimit()) | 180 | if (net_ratelimit()) |
177 | pr_err("cannot allocate dsthash_ent\n"); | 181 | pr_err("cannot allocate dsthash_ent\n"); |
178 | return NULL; | 182 | } else { |
179 | } | 183 | memcpy(&ent->dst, dst, sizeof(ent->dst)); |
180 | memcpy(&ent->dst, dst, sizeof(ent->dst)); | 184 | spin_lock_init(&ent->lock); |
181 | 185 | ||
182 | hlist_add_head(&ent->node, &ht->hash[hash_dst(ht, dst)]); | 186 | spin_lock(&ent->lock); |
183 | ht->count++; | 187 | hlist_add_head_rcu(&ent->node, &ht->hash[hash_dst(ht, dst)]); |
188 | ht->count++; | ||
189 | } | ||
190 | spin_unlock(&ht->lock); | ||
184 | return ent; | 191 | return ent; |
185 | } | 192 | } |
186 | 193 | ||
194 | static void dsthash_free_rcu(struct rcu_head *head) | ||
195 | { | ||
196 | struct dsthash_ent *ent = container_of(head, struct dsthash_ent, rcu); | ||
197 | |||
198 | kmem_cache_free(hashlimit_cachep, ent); | ||
199 | } | ||
200 | |||
187 | static inline void | 201 | static inline void |
188 | dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent) | 202 | dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent) |
189 | { | 203 | { |
190 | hlist_del(&ent->node); | 204 | hlist_del_rcu(&ent->node); |
191 | kmem_cache_free(hashlimit_cachep, ent); | 205 | call_rcu_bh(&ent->rcu, dsthash_free_rcu); |
192 | ht->count--; | 206 | ht->count--; |
193 | } | 207 | } |
194 | static void htable_gc(unsigned long htlong); | 208 | static void htable_gc(unsigned long htlong); |
@@ -512,15 +526,14 @@ hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par) | |||
512 | if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0) | 526 | if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0) |
513 | goto hotdrop; | 527 | goto hotdrop; |
514 | 528 | ||
515 | spin_lock_bh(&hinfo->lock); | 529 | rcu_read_lock_bh(); |
516 | dh = dsthash_find(hinfo, &dst); | 530 | dh = dsthash_find(hinfo, &dst); |
517 | if (dh == NULL) { | 531 | if (dh == NULL) { |
518 | dh = dsthash_alloc_init(hinfo, &dst); | 532 | dh = dsthash_alloc_init(hinfo, &dst); |
519 | if (dh == NULL) { | 533 | if (dh == NULL) { |
520 | spin_unlock_bh(&hinfo->lock); | 534 | rcu_read_unlock_bh(); |
521 | goto hotdrop; | 535 | goto hotdrop; |
522 | } | 536 | } |
523 | |||
524 | dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); | 537 | dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); |
525 | dh->rateinfo.prev = jiffies; | 538 | dh->rateinfo.prev = jiffies; |
526 | dh->rateinfo.credit = user2credits(hinfo->cfg.avg * | 539 | dh->rateinfo.credit = user2credits(hinfo->cfg.avg * |
@@ -537,11 +550,13 @@ hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par) | |||
537 | if (dh->rateinfo.credit >= dh->rateinfo.cost) { | 550 | if (dh->rateinfo.credit >= dh->rateinfo.cost) { |
538 | /* below the limit */ | 551 | /* below the limit */ |
539 | dh->rateinfo.credit -= dh->rateinfo.cost; | 552 | dh->rateinfo.credit -= dh->rateinfo.cost; |
540 | spin_unlock_bh(&hinfo->lock); | 553 | spin_unlock(&dh->lock); |
554 | rcu_read_unlock_bh(); | ||
541 | return !(info->cfg.mode & XT_HASHLIMIT_INVERT); | 555 | return !(info->cfg.mode & XT_HASHLIMIT_INVERT); |
542 | } | 556 | } |
543 | 557 | ||
544 | spin_unlock_bh(&hinfo->lock); | 558 | spin_unlock(&dh->lock); |
559 | rcu_read_unlock_bh(); | ||
545 | /* default match is underlimit - so over the limit, we need to invert */ | 560 | /* default match is underlimit - so over the limit, we need to invert */ |
546 | return info->cfg.mode & XT_HASHLIMIT_INVERT; | 561 | return info->cfg.mode & XT_HASHLIMIT_INVERT; |
547 | 562 | ||
@@ -666,12 +681,15 @@ static void dl_seq_stop(struct seq_file *s, void *v) | |||
666 | static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, | 681 | static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, |
667 | struct seq_file *s) | 682 | struct seq_file *s) |
668 | { | 683 | { |
684 | int res; | ||
685 | |||
686 | spin_lock(&ent->lock); | ||
669 | /* recalculate to show accurate numbers */ | 687 | /* recalculate to show accurate numbers */ |
670 | rateinfo_recalc(ent, jiffies); | 688 | rateinfo_recalc(ent, jiffies); |
671 | 689 | ||
672 | switch (family) { | 690 | switch (family) { |
673 | case NFPROTO_IPV4: | 691 | case NFPROTO_IPV4: |
674 | return seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n", | 692 | res = seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n", |
675 | (long)(ent->expires - jiffies)/HZ, | 693 | (long)(ent->expires - jiffies)/HZ, |
676 | &ent->dst.ip.src, | 694 | &ent->dst.ip.src, |
677 | ntohs(ent->dst.src_port), | 695 | ntohs(ent->dst.src_port), |
@@ -679,9 +697,10 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, | |||
679 | ntohs(ent->dst.dst_port), | 697 | ntohs(ent->dst.dst_port), |
680 | ent->rateinfo.credit, ent->rateinfo.credit_cap, | 698 | ent->rateinfo.credit, ent->rateinfo.credit_cap, |
681 | ent->rateinfo.cost); | 699 | ent->rateinfo.cost); |
700 | break; | ||
682 | #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) | 701 | #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) |
683 | case NFPROTO_IPV6: | 702 | case NFPROTO_IPV6: |
684 | return seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n", | 703 | res = seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n", |
685 | (long)(ent->expires - jiffies)/HZ, | 704 | (long)(ent->expires - jiffies)/HZ, |
686 | &ent->dst.ip6.src, | 705 | &ent->dst.ip6.src, |
687 | ntohs(ent->dst.src_port), | 706 | ntohs(ent->dst.src_port), |
@@ -689,11 +708,14 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, | |||
689 | ntohs(ent->dst.dst_port), | 708 | ntohs(ent->dst.dst_port), |
690 | ent->rateinfo.credit, ent->rateinfo.credit_cap, | 709 | ent->rateinfo.credit, ent->rateinfo.credit_cap, |
691 | ent->rateinfo.cost); | 710 | ent->rateinfo.cost); |
711 | break; | ||
692 | #endif | 712 | #endif |
693 | default: | 713 | default: |
694 | BUG(); | 714 | BUG(); |
695 | return 0; | 715 | res = 0; |
696 | } | 716 | } |
717 | spin_unlock(&ent->lock); | ||
718 | return res; | ||
697 | } | 719 | } |
698 | 720 | ||
699 | static int dl_seq_show(struct seq_file *s, void *v) | 721 | static int dl_seq_show(struct seq_file *s, void *v) |
@@ -817,9 +839,11 @@ err1: | |||
817 | 839 | ||
818 | static void __exit hashlimit_mt_exit(void) | 840 | static void __exit hashlimit_mt_exit(void) |
819 | { | 841 | { |
820 | kmem_cache_destroy(hashlimit_cachep); | ||
821 | xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg)); | 842 | xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg)); |
822 | unregister_pernet_subsys(&hashlimit_net_ops); | 843 | unregister_pernet_subsys(&hashlimit_net_ops); |
844 | |||
845 | rcu_barrier_bh(); | ||
846 | kmem_cache_destroy(hashlimit_cachep); | ||
823 | } | 847 | } |
824 | 848 | ||
825 | module_init(hashlimit_mt_init); | 849 | module_init(hashlimit_mt_init); |