aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-04-01 08:35:56 -0400
committerPatrick McHardy <kaber@trash.net>2010-04-01 08:35:56 -0400
commit02e4eb75912a5c8babccc1acdc9cc913989be04e (patch)
tree482d37284f058ae8439c47bc57416440caa04f61 /net/netfilter
parent902a3dd5e6b19048604ec533203d7d38a39505a2 (diff)
netfilter: xt_hashlimit: RCU conversion
xt_hashlimit uses a central lock per hash table and suffers from contention on some workloads. (Multiqueue NIC or if RPS is enabled) After RCU conversion, central lock is only used when a writer wants to add or delete an entry. For 'readers', updating an existing entry, they use an individual lock per entry. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: Patrick McHardy <kaber@trash.net>
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/xt_hashlimit.c70
1 files changed, 47 insertions, 23 deletions
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 5470bb097c4..453178d25cb 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -81,12 +81,14 @@ struct dsthash_ent {
81 struct dsthash_dst dst; 81 struct dsthash_dst dst;
82 82
83 /* modified structure members in the end */ 83 /* modified structure members in the end */
84 spinlock_t lock;
84 unsigned long expires; /* precalculated expiry time */ 85 unsigned long expires; /* precalculated expiry time */
85 struct { 86 struct {
86 unsigned long prev; /* last modification */ 87 unsigned long prev; /* last modification */
87 u_int32_t credit; 88 u_int32_t credit;
88 u_int32_t credit_cap, cost; 89 u_int32_t credit_cap, cost;
89 } rateinfo; 90 } rateinfo;
91 struct rcu_head rcu;
90}; 92};
91 93
92struct xt_hashlimit_htable { 94struct xt_hashlimit_htable {
@@ -143,9 +145,11 @@ dsthash_find(const struct xt_hashlimit_htable *ht,
143 u_int32_t hash = hash_dst(ht, dst); 145 u_int32_t hash = hash_dst(ht, dst);
144 146
145 if (!hlist_empty(&ht->hash[hash])) { 147 if (!hlist_empty(&ht->hash[hash])) {
146 hlist_for_each_entry(ent, pos, &ht->hash[hash], node) 148 hlist_for_each_entry_rcu(ent, pos, &ht->hash[hash], node)
147 if (dst_cmp(ent, dst)) 149 if (dst_cmp(ent, dst)) {
150 spin_lock(&ent->lock);
148 return ent; 151 return ent;
152 }
149 } 153 }
150 return NULL; 154 return NULL;
151} 155}
@@ -157,9 +161,10 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht,
157{ 161{
158 struct dsthash_ent *ent; 162 struct dsthash_ent *ent;
159 163
164 spin_lock(&ht->lock);
160 /* initialize hash with random val at the time we allocate 165 /* initialize hash with random val at the time we allocate
161 * the first hashtable entry */ 166 * the first hashtable entry */
162 if (!ht->rnd_initialized) { 167 if (unlikely(!ht->rnd_initialized)) {
163 get_random_bytes(&ht->rnd, sizeof(ht->rnd)); 168 get_random_bytes(&ht->rnd, sizeof(ht->rnd));
164 ht->rnd_initialized = true; 169 ht->rnd_initialized = true;
165 } 170 }
@@ -168,27 +173,36 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht,
168 /* FIXME: do something. question is what.. */ 173 /* FIXME: do something. question is what.. */
169 if (net_ratelimit()) 174 if (net_ratelimit())
170 pr_err("max count of %u reached\n", ht->cfg.max); 175 pr_err("max count of %u reached\n", ht->cfg.max);
171 return NULL; 176 ent = NULL;
172 } 177 } else
173 178 ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC);
174 ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC);
175 if (!ent) { 179 if (!ent) {
176 if (net_ratelimit()) 180 if (net_ratelimit())
177 pr_err("cannot allocate dsthash_ent\n"); 181 pr_err("cannot allocate dsthash_ent\n");
178 return NULL; 182 } else {
179 } 183 memcpy(&ent->dst, dst, sizeof(ent->dst));
180 memcpy(&ent->dst, dst, sizeof(ent->dst)); 184 spin_lock_init(&ent->lock);
181 185
182 hlist_add_head(&ent->node, &ht->hash[hash_dst(ht, dst)]); 186 spin_lock(&ent->lock);
183 ht->count++; 187 hlist_add_head_rcu(&ent->node, &ht->hash[hash_dst(ht, dst)]);
188 ht->count++;
189 }
190 spin_unlock(&ht->lock);
184 return ent; 191 return ent;
185} 192}
186 193
194static void dsthash_free_rcu(struct rcu_head *head)
195{
196 struct dsthash_ent *ent = container_of(head, struct dsthash_ent, rcu);
197
198 kmem_cache_free(hashlimit_cachep, ent);
199}
200
187static inline void 201static inline void
188dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent) 202dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
189{ 203{
190 hlist_del(&ent->node); 204 hlist_del_rcu(&ent->node);
191 kmem_cache_free(hashlimit_cachep, ent); 205 call_rcu_bh(&ent->rcu, dsthash_free_rcu);
192 ht->count--; 206 ht->count--;
193} 207}
194static void htable_gc(unsigned long htlong); 208static void htable_gc(unsigned long htlong);
@@ -512,15 +526,14 @@ hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
512 if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0) 526 if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
513 goto hotdrop; 527 goto hotdrop;
514 528
515 spin_lock_bh(&hinfo->lock); 529 rcu_read_lock_bh();
516 dh = dsthash_find(hinfo, &dst); 530 dh = dsthash_find(hinfo, &dst);
517 if (dh == NULL) { 531 if (dh == NULL) {
518 dh = dsthash_alloc_init(hinfo, &dst); 532 dh = dsthash_alloc_init(hinfo, &dst);
519 if (dh == NULL) { 533 if (dh == NULL) {
520 spin_unlock_bh(&hinfo->lock); 534 rcu_read_unlock_bh();
521 goto hotdrop; 535 goto hotdrop;
522 } 536 }
523
524 dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); 537 dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
525 dh->rateinfo.prev = jiffies; 538 dh->rateinfo.prev = jiffies;
526 dh->rateinfo.credit = user2credits(hinfo->cfg.avg * 539 dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
@@ -537,11 +550,13 @@ hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
537 if (dh->rateinfo.credit >= dh->rateinfo.cost) { 550 if (dh->rateinfo.credit >= dh->rateinfo.cost) {
538 /* below the limit */ 551 /* below the limit */
539 dh->rateinfo.credit -= dh->rateinfo.cost; 552 dh->rateinfo.credit -= dh->rateinfo.cost;
540 spin_unlock_bh(&hinfo->lock); 553 spin_unlock(&dh->lock);
554 rcu_read_unlock_bh();
541 return !(info->cfg.mode & XT_HASHLIMIT_INVERT); 555 return !(info->cfg.mode & XT_HASHLIMIT_INVERT);
542 } 556 }
543 557
544 spin_unlock_bh(&hinfo->lock); 558 spin_unlock(&dh->lock);
559 rcu_read_unlock_bh();
545 /* default match is underlimit - so over the limit, we need to invert */ 560 /* default match is underlimit - so over the limit, we need to invert */
546 return info->cfg.mode & XT_HASHLIMIT_INVERT; 561 return info->cfg.mode & XT_HASHLIMIT_INVERT;
547 562
@@ -666,12 +681,15 @@ static void dl_seq_stop(struct seq_file *s, void *v)
666static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, 681static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
667 struct seq_file *s) 682 struct seq_file *s)
668{ 683{
684 int res;
685
686 spin_lock(&ent->lock);
669 /* recalculate to show accurate numbers */ 687 /* recalculate to show accurate numbers */
670 rateinfo_recalc(ent, jiffies); 688 rateinfo_recalc(ent, jiffies);
671 689
672 switch (family) { 690 switch (family) {
673 case NFPROTO_IPV4: 691 case NFPROTO_IPV4:
674 return seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n", 692 res = seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n",
675 (long)(ent->expires - jiffies)/HZ, 693 (long)(ent->expires - jiffies)/HZ,
676 &ent->dst.ip.src, 694 &ent->dst.ip.src,
677 ntohs(ent->dst.src_port), 695 ntohs(ent->dst.src_port),
@@ -679,9 +697,10 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
679 ntohs(ent->dst.dst_port), 697 ntohs(ent->dst.dst_port),
680 ent->rateinfo.credit, ent->rateinfo.credit_cap, 698 ent->rateinfo.credit, ent->rateinfo.credit_cap,
681 ent->rateinfo.cost); 699 ent->rateinfo.cost);
700 break;
682#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) 701#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
683 case NFPROTO_IPV6: 702 case NFPROTO_IPV6:
684 return seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n", 703 res = seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n",
685 (long)(ent->expires - jiffies)/HZ, 704 (long)(ent->expires - jiffies)/HZ,
686 &ent->dst.ip6.src, 705 &ent->dst.ip6.src,
687 ntohs(ent->dst.src_port), 706 ntohs(ent->dst.src_port),
@@ -689,11 +708,14 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
689 ntohs(ent->dst.dst_port), 708 ntohs(ent->dst.dst_port),
690 ent->rateinfo.credit, ent->rateinfo.credit_cap, 709 ent->rateinfo.credit, ent->rateinfo.credit_cap,
691 ent->rateinfo.cost); 710 ent->rateinfo.cost);
711 break;
692#endif 712#endif
693 default: 713 default:
694 BUG(); 714 BUG();
695 return 0; 715 res = 0;
696 } 716 }
717 spin_unlock(&ent->lock);
718 return res;
697} 719}
698 720
699static int dl_seq_show(struct seq_file *s, void *v) 721static int dl_seq_show(struct seq_file *s, void *v)
@@ -817,9 +839,11 @@ err1:
817 839
818static void __exit hashlimit_mt_exit(void) 840static void __exit hashlimit_mt_exit(void)
819{ 841{
820 kmem_cache_destroy(hashlimit_cachep);
821 xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg)); 842 xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
822 unregister_pernet_subsys(&hashlimit_net_ops); 843 unregister_pernet_subsys(&hashlimit_net_ops);
844
845 rcu_barrier_bh();
846 kmem_cache_destroy(hashlimit_cachep);
823} 847}
824 848
825module_init(hashlimit_mt_init); 849module_init(hashlimit_mt_init);