diff options
author | KOVACS Krisztian <hidden@balabit.hu> | 2005-09-16 20:00:04 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2005-09-16 20:00:04 -0400 |
commit | 136e92bbec0a6d4c2dd1e5b5ac869ab5470547a4 (patch) | |
tree | 514a8ae63cc351bafdd47d605afca005753f1e90 | |
parent | 4451362445b2d83886003f1d739b94e4f000eeeb (diff) |
[NETFILTER] CLUSTERIP: use a bitmap to store node responsibility data
Instead of maintaining an array containing a list of nodes this instance
is responsible for let's use a simple bitmap. This provides the
following features:
* clusterip_responsible() and the add_node()/delete_node() operations
become very simple and don't need locking
* the config structure is much smaller
In spite of the completely different internal data representation the
user-space interface remains almost unchanged; the only difference is
that the proc file does not list nodes in the order they were added.
(The target info structure remains the same.)
Signed-off-by: KOVACS Krisztian <hidden@balabit.hu>
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/ipv4/netfilter/ipt_CLUSTERIP.c | 143 |
1 files changed, 61 insertions, 82 deletions
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index adbf4d752d0f..9bcb398fbc1f 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/config.h> | 13 | #include <linux/config.h> |
14 | #include <linux/proc_fs.h> | 14 | #include <linux/proc_fs.h> |
15 | #include <linux/jhash.h> | 15 | #include <linux/jhash.h> |
16 | #include <linux/bitops.h> | ||
16 | #include <linux/skbuff.h> | 17 | #include <linux/skbuff.h> |
17 | #include <linux/ip.h> | 18 | #include <linux/ip.h> |
18 | #include <linux/tcp.h> | 19 | #include <linux/tcp.h> |
@@ -30,7 +31,7 @@ | |||
30 | #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> | 31 | #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> |
31 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 32 | #include <linux/netfilter_ipv4/ip_conntrack.h> |
32 | 33 | ||
33 | #define CLUSTERIP_VERSION "0.7" | 34 | #define CLUSTERIP_VERSION "0.8" |
34 | 35 | ||
35 | #define DEBUG_CLUSTERIP | 36 | #define DEBUG_CLUSTERIP |
36 | 37 | ||
@@ -56,8 +57,7 @@ struct clusterip_config { | |||
56 | u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ | 57 | u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ |
57 | struct net_device *dev; /* device */ | 58 | struct net_device *dev; /* device */ |
58 | u_int16_t num_total_nodes; /* total number of nodes */ | 59 | u_int16_t num_total_nodes; /* total number of nodes */ |
59 | u_int16_t num_local_nodes; /* number of local nodes */ | 60 | unsigned long local_nodes; /* node number array */ |
60 | u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; /* node number array */ | ||
61 | 61 | ||
62 | #ifdef CONFIG_PROC_FS | 62 | #ifdef CONFIG_PROC_FS |
63 | struct proc_dir_entry *pde; /* proc dir entry */ | 63 | struct proc_dir_entry *pde; /* proc dir entry */ |
@@ -68,8 +68,7 @@ struct clusterip_config { | |||
68 | 68 | ||
69 | static LIST_HEAD(clusterip_configs); | 69 | static LIST_HEAD(clusterip_configs); |
70 | 70 | ||
71 | /* clusterip_lock protects the clusterip_configs list _AND_ the configurable | 71 | /* clusterip_lock protects the clusterip_configs list */ |
72 | * data within all structurses (num_local_nodes, local_nodes[]) */ | ||
73 | static DEFINE_RWLOCK(clusterip_lock); | 72 | static DEFINE_RWLOCK(clusterip_lock); |
74 | 73 | ||
75 | #ifdef CONFIG_PROC_FS | 74 | #ifdef CONFIG_PROC_FS |
@@ -156,6 +155,17 @@ clusterip_config_find_get(u_int32_t clusterip, int entry) | |||
156 | return c; | 155 | return c; |
157 | } | 156 | } |
158 | 157 | ||
158 | static void | ||
159 | clusterip_config_init_nodelist(struct clusterip_config *c, | ||
160 | const struct ipt_clusterip_tgt_info *i) | ||
161 | { | ||
162 | int n; | ||
163 | |||
164 | for (n = 0; n < i->num_local_nodes; n++) { | ||
165 | set_bit(i->local_nodes[n] - 1, &c->local_nodes); | ||
166 | } | ||
167 | } | ||
168 | |||
159 | static struct clusterip_config * | 169 | static struct clusterip_config * |
160 | clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, | 170 | clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, |
161 | struct net_device *dev) | 171 | struct net_device *dev) |
@@ -172,8 +182,7 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, | |||
172 | c->clusterip = ip; | 182 | c->clusterip = ip; |
173 | memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); | 183 | memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); |
174 | c->num_total_nodes = i->num_total_nodes; | 184 | c->num_total_nodes = i->num_total_nodes; |
175 | c->num_local_nodes = i->num_local_nodes; | 185 | clusterip_config_init_nodelist(c, i); |
176 | memcpy(&c->local_nodes, &i->local_nodes, sizeof(c->local_nodes)); | ||
177 | c->hash_mode = i->hash_mode; | 186 | c->hash_mode = i->hash_mode; |
178 | c->hash_initval = i->hash_initval; | 187 | c->hash_initval = i->hash_initval; |
179 | atomic_set(&c->refcount, 1); | 188 | atomic_set(&c->refcount, 1); |
@@ -201,53 +210,28 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, | |||
201 | static int | 210 | static int |
202 | clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) | 211 | clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) |
203 | { | 212 | { |
204 | int i; | ||
205 | |||
206 | write_lock_bh(&clusterip_lock); | ||
207 | 213 | ||
208 | if (c->num_local_nodes >= CLUSTERIP_MAX_NODES | 214 | if (nodenum == 0 || |
209 | || nodenum > CLUSTERIP_MAX_NODES) { | 215 | nodenum > c->num_total_nodes) |
210 | write_unlock_bh(&clusterip_lock); | ||
211 | return 1; | 216 | return 1; |
212 | } | ||
213 | |||
214 | /* check if we alrady have this number in our array */ | ||
215 | for (i = 0; i < c->num_local_nodes; i++) { | ||
216 | if (c->local_nodes[i] == nodenum) { | ||
217 | write_unlock_bh(&clusterip_lock); | ||
218 | return 1; | ||
219 | } | ||
220 | } | ||
221 | 217 | ||
222 | c->local_nodes[c->num_local_nodes++] = nodenum; | 218 | /* check if we already have this number in our bitfield */ |
219 | if (test_and_set_bit(nodenum - 1, &c->local_nodes)) | ||
220 | return 1; | ||
223 | 221 | ||
224 | write_unlock_bh(&clusterip_lock); | ||
225 | return 0; | 222 | return 0; |
226 | } | 223 | } |
227 | 224 | ||
228 | static int | 225 | static int |
229 | clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) | 226 | clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) |
230 | { | 227 | { |
231 | int i; | 228 | if (nodenum == 0 || |
232 | 229 | nodenum > c->num_total_nodes) | |
233 | write_lock_bh(&clusterip_lock); | ||
234 | |||
235 | if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) { | ||
236 | write_unlock_bh(&clusterip_lock); | ||
237 | return 1; | 230 | return 1; |
238 | } | ||
239 | 231 | ||
240 | for (i = 0; i < c->num_local_nodes; i++) { | 232 | if (test_and_clear_bit(nodenum - 1, &c->local_nodes)) |
241 | if (c->local_nodes[i] == nodenum) { | 233 | return 0; |
242 | int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1)); | ||
243 | memmove(&c->local_nodes[i], &c->local_nodes[i+1], size); | ||
244 | c->num_local_nodes--; | ||
245 | write_unlock_bh(&clusterip_lock); | ||
246 | return 0; | ||
247 | } | ||
248 | } | ||
249 | 234 | ||
250 | write_unlock_bh(&clusterip_lock); | ||
251 | return 1; | 235 | return 1; |
252 | } | 236 | } |
253 | 237 | ||
@@ -315,25 +299,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) | |||
315 | static inline int | 299 | static inline int |
316 | clusterip_responsible(struct clusterip_config *config, u_int32_t hash) | 300 | clusterip_responsible(struct clusterip_config *config, u_int32_t hash) |
317 | { | 301 | { |
318 | int i; | 302 | return test_bit(hash - 1, &config->local_nodes); |
319 | |||
320 | read_lock_bh(&clusterip_lock); | ||
321 | |||
322 | if (config->num_local_nodes == 0) { | ||
323 | read_unlock_bh(&clusterip_lock); | ||
324 | return 0; | ||
325 | } | ||
326 | |||
327 | for (i = 0; i < config->num_local_nodes; i++) { | ||
328 | if (config->local_nodes[i] == hash) { | ||
329 | read_unlock_bh(&clusterip_lock); | ||
330 | return 1; | ||
331 | } | ||
332 | } | ||
333 | |||
334 | read_unlock_bh(&clusterip_lock); | ||
335 | |||
336 | return 0; | ||
337 | } | 303 | } |
338 | 304 | ||
339 | /*********************************************************************** | 305 | /*********************************************************************** |
@@ -618,56 +584,69 @@ static struct nf_hook_ops cip_arp_ops = { | |||
618 | 584 | ||
619 | #ifdef CONFIG_PROC_FS | 585 | #ifdef CONFIG_PROC_FS |
620 | 586 | ||
587 | struct clusterip_seq_position { | ||
588 | unsigned int pos; /* position */ | ||
589 | unsigned int weight; /* number of bits set == size */ | ||
590 | unsigned int bit; /* current bit */ | ||
591 | unsigned long val; /* current value */ | ||
592 | }; | ||
593 | |||
621 | static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) | 594 | static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) |
622 | { | 595 | { |
623 | struct proc_dir_entry *pde = s->private; | 596 | struct proc_dir_entry *pde = s->private; |
624 | struct clusterip_config *c = pde->data; | 597 | struct clusterip_config *c = pde->data; |
625 | unsigned int *nodeidx; | 598 | unsigned int weight; |
626 | 599 | u_int32_t local_nodes; | |
627 | read_lock_bh(&clusterip_lock); | 600 | struct clusterip_seq_position *idx; |
628 | if (*pos >= c->num_local_nodes) | 601 | |
602 | /* FIXME: possible race */ | ||
603 | local_nodes = c->local_nodes; | ||
604 | weight = hweight32(local_nodes); | ||
605 | if (*pos >= weight) | ||
629 | return NULL; | 606 | return NULL; |
630 | 607 | ||
631 | nodeidx = kmalloc(sizeof(unsigned int), GFP_KERNEL); | 608 | idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL); |
632 | if (!nodeidx) | 609 | if (!idx) |
633 | return ERR_PTR(-ENOMEM); | 610 | return ERR_PTR(-ENOMEM); |
634 | 611 | ||
635 | *nodeidx = *pos; | 612 | idx->pos = *pos; |
636 | return nodeidx; | 613 | idx->weight = weight; |
614 | idx->bit = ffs(local_nodes); | ||
615 | idx->val = local_nodes; | ||
616 | clear_bit(idx->bit - 1, &idx->val); | ||
617 | |||
618 | return idx; | ||
637 | } | 619 | } |
638 | 620 | ||
639 | static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) | 621 | static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) |
640 | { | 622 | { |
641 | struct proc_dir_entry *pde = s->private; | 623 | struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; |
642 | struct clusterip_config *c = pde->data; | ||
643 | unsigned int *nodeidx = (unsigned int *)v; | ||
644 | 624 | ||
645 | *pos = ++(*nodeidx); | 625 | *pos = ++idx->pos; |
646 | if (*pos >= c->num_local_nodes) { | 626 | if (*pos >= idx->weight) { |
647 | kfree(v); | 627 | kfree(v); |
648 | return NULL; | 628 | return NULL; |
649 | } | 629 | } |
650 | return nodeidx; | 630 | idx->bit = ffs(idx->val); |
631 | clear_bit(idx->bit - 1, &idx->val); | ||
632 | return idx; | ||
651 | } | 633 | } |
652 | 634 | ||
653 | static void clusterip_seq_stop(struct seq_file *s, void *v) | 635 | static void clusterip_seq_stop(struct seq_file *s, void *v) |
654 | { | 636 | { |
655 | kfree(v); | 637 | kfree(v); |
656 | |||
657 | read_unlock_bh(&clusterip_lock); | ||
658 | } | 638 | } |
659 | 639 | ||
660 | static int clusterip_seq_show(struct seq_file *s, void *v) | 640 | static int clusterip_seq_show(struct seq_file *s, void *v) |
661 | { | 641 | { |
662 | struct proc_dir_entry *pde = s->private; | 642 | struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; |
663 | struct clusterip_config *c = pde->data; | ||
664 | unsigned int *nodeidx = (unsigned int *)v; | ||
665 | 643 | ||
666 | if (*nodeidx != 0) | 644 | if (idx->pos != 0) |
667 | seq_putc(s, ','); | 645 | seq_putc(s, ','); |
668 | seq_printf(s, "%u", c->local_nodes[*nodeidx]); | ||
669 | 646 | ||
670 | if (*nodeidx == c->num_local_nodes-1) | 647 | seq_printf(s, "%u", idx->bit); |
648 | |||
649 | if (idx->pos == idx->weight - 1) | ||
671 | seq_putc(s, '\n'); | 650 | seq_putc(s, '\n'); |
672 | 651 | ||
673 | return 0; | 652 | return 0; |