aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorAnton Altaparmakov <aia21@cantab.net>2005-09-19 04:47:49 -0400
committerAnton Altaparmakov <aia21@cantab.net>2005-09-19 04:47:49 -0400
commit044a500e46742d39d22f1781cfb64ba93b463e39 (patch)
treeb0313211ea7ba26b90c1083ade0e4c9f486b87db /net/ipv4
parentf6098cf449b81c14a51e48dd22ae47d03126a1de (diff)
parent6c0741fbdee5bd0f8ed13ac287c4ab18e8ba7d83 (diff)
Merge branch 'master' of /home/src/linux-2.6/
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/netfilter/Kconfig14
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c5
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c223
3 files changed, 135 insertions, 107 deletions
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 30aa8e2ee214..e2162d270073 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -51,6 +51,14 @@ config IP_NF_CONNTRACK_EVENTS
51 51
52 IF unsure, say `N'. 52 IF unsure, say `N'.
53 53
54config IP_NF_CONNTRACK_NETLINK
55 tristate 'Connection tracking netlink interface'
56 depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
57 depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
58 help
59 This option enables support for a netlink-based userspace interface
60
61
54config IP_NF_CT_PROTO_SCTP 62config IP_NF_CT_PROTO_SCTP
55 tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' 63 tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
56 depends on IP_NF_CONNTRACK && EXPERIMENTAL 64 depends on IP_NF_CONNTRACK && EXPERIMENTAL
@@ -774,11 +782,5 @@ config IP_NF_ARP_MANGLE
774 Allows altering the ARP packet payload: source and destination 782 Allows altering the ARP packet payload: source and destination
775 hardware and network addresses. 783 hardware and network addresses.
776 784
777config IP_NF_CONNTRACK_NETLINK
778 tristate 'Connection tracking netlink interface'
779 depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
780 help
781 This option enables support for a netlink-based userspace interface
782
783endmenu 785endmenu
784 786
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 19cba16e6e1e..f8cd8e42961e 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -1143,7 +1143,10 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct,
1143 if (del_timer(&ct->timeout)) { 1143 if (del_timer(&ct->timeout)) {
1144 ct->timeout.expires = jiffies + extra_jiffies; 1144 ct->timeout.expires = jiffies + extra_jiffies;
1145 add_timer(&ct->timeout); 1145 add_timer(&ct->timeout);
1146 ip_conntrack_event_cache(IPCT_REFRESH, skb); 1146 /* FIXME: We loose some REFRESH events if this function
1147 * is called without an skb. I'll fix this later -HW */
1148 if (skb)
1149 ip_conntrack_event_cache(IPCT_REFRESH, skb);
1147 } 1150 }
1148 ct_add_counters(ct, ctinfo, skb); 1151 ct_add_counters(ct, ctinfo, skb);
1149 write_unlock_bh(&ip_conntrack_lock); 1152 write_unlock_bh(&ip_conntrack_lock);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 7d38913754b1..9bcb398fbc1f 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -13,6 +13,7 @@
13#include <linux/config.h> 13#include <linux/config.h>
14#include <linux/proc_fs.h> 14#include <linux/proc_fs.h>
15#include <linux/jhash.h> 15#include <linux/jhash.h>
16#include <linux/bitops.h>
16#include <linux/skbuff.h> 17#include <linux/skbuff.h>
17#include <linux/ip.h> 18#include <linux/ip.h>
18#include <linux/tcp.h> 19#include <linux/tcp.h>
@@ -30,7 +31,7 @@
30#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> 31#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
31#include <linux/netfilter_ipv4/ip_conntrack.h> 32#include <linux/netfilter_ipv4/ip_conntrack.h>
32 33
33#define CLUSTERIP_VERSION "0.7" 34#define CLUSTERIP_VERSION "0.8"
34 35
35#define DEBUG_CLUSTERIP 36#define DEBUG_CLUSTERIP
36 37
@@ -49,13 +50,14 @@ MODULE_DESCRIPTION("iptables target for CLUSTERIP");
49struct clusterip_config { 50struct clusterip_config {
50 struct list_head list; /* list of all configs */ 51 struct list_head list; /* list of all configs */
51 atomic_t refcount; /* reference count */ 52 atomic_t refcount; /* reference count */
53 atomic_t entries; /* number of entries/rules
54 * referencing us */
52 55
53 u_int32_t clusterip; /* the IP address */ 56 u_int32_t clusterip; /* the IP address */
54 u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ 57 u_int8_t clustermac[ETH_ALEN]; /* the MAC address */
55 struct net_device *dev; /* device */ 58 struct net_device *dev; /* device */
56 u_int16_t num_total_nodes; /* total number of nodes */ 59 u_int16_t num_total_nodes; /* total number of nodes */
57 u_int16_t num_local_nodes; /* number of local nodes */ 60 unsigned long local_nodes; /* node number array */
58 u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; /* node number array */
59 61
60#ifdef CONFIG_PROC_FS 62#ifdef CONFIG_PROC_FS
61 struct proc_dir_entry *pde; /* proc dir entry */ 63 struct proc_dir_entry *pde; /* proc dir entry */
@@ -66,8 +68,7 @@ struct clusterip_config {
66 68
67static LIST_HEAD(clusterip_configs); 69static LIST_HEAD(clusterip_configs);
68 70
69/* clusterip_lock protects the clusterip_configs list _AND_ the configurable 71/* clusterip_lock protects the clusterip_configs list */
70 * data within all structurses (num_local_nodes, local_nodes[]) */
71static DEFINE_RWLOCK(clusterip_lock); 72static DEFINE_RWLOCK(clusterip_lock);
72 73
73#ifdef CONFIG_PROC_FS 74#ifdef CONFIG_PROC_FS
@@ -76,23 +77,48 @@ static struct proc_dir_entry *clusterip_procdir;
76#endif 77#endif
77 78
78static inline void 79static inline void
79clusterip_config_get(struct clusterip_config *c) { 80clusterip_config_get(struct clusterip_config *c)
81{
80 atomic_inc(&c->refcount); 82 atomic_inc(&c->refcount);
81} 83}
82 84
83static inline void 85static inline void
84clusterip_config_put(struct clusterip_config *c) { 86clusterip_config_put(struct clusterip_config *c)
85 if (atomic_dec_and_test(&c->refcount)) { 87{
88 if (atomic_dec_and_test(&c->refcount))
89 kfree(c);
90}
91
92/* increase the count of entries(rules) using/referencing this config */
93static inline void
94clusterip_config_entry_get(struct clusterip_config *c)
95{
96 atomic_inc(&c->entries);
97}
98
99/* decrease the count of entries using/referencing this config. If last
100 * entry(rule) is removed, remove the config from lists, but don't free it
101 * yet, since proc-files could still be holding references */
102static inline void
103clusterip_config_entry_put(struct clusterip_config *c)
104{
105 if (atomic_dec_and_test(&c->entries)) {
86 write_lock_bh(&clusterip_lock); 106 write_lock_bh(&clusterip_lock);
87 list_del(&c->list); 107 list_del(&c->list);
88 write_unlock_bh(&clusterip_lock); 108 write_unlock_bh(&clusterip_lock);
109
89 dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); 110 dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0);
90 dev_put(c->dev); 111 dev_put(c->dev);
91 kfree(c); 112
113 /* In case anyone still accesses the file, the open/close
114 * functions are also incrementing the refcount on their own,
115 * so it's safe to remove the entry even if it's in use. */
116#ifdef CONFIG_PROC_FS
117 remove_proc_entry(c->pde->name, c->pde->parent);
118#endif
92 } 119 }
93} 120}
94 121
95
96static struct clusterip_config * 122static struct clusterip_config *
97__clusterip_config_find(u_int32_t clusterip) 123__clusterip_config_find(u_int32_t clusterip)
98{ 124{
@@ -111,7 +137,7 @@ __clusterip_config_find(u_int32_t clusterip)
111} 137}
112 138
113static inline struct clusterip_config * 139static inline struct clusterip_config *
114clusterip_config_find_get(u_int32_t clusterip) 140clusterip_config_find_get(u_int32_t clusterip, int entry)
115{ 141{
116 struct clusterip_config *c; 142 struct clusterip_config *c;
117 143
@@ -122,11 +148,24 @@ clusterip_config_find_get(u_int32_t clusterip)
122 return NULL; 148 return NULL;
123 } 149 }
124 atomic_inc(&c->refcount); 150 atomic_inc(&c->refcount);
151 if (entry)
152 atomic_inc(&c->entries);
125 read_unlock_bh(&clusterip_lock); 153 read_unlock_bh(&clusterip_lock);
126 154
127 return c; 155 return c;
128} 156}
129 157
158static void
159clusterip_config_init_nodelist(struct clusterip_config *c,
160 const struct ipt_clusterip_tgt_info *i)
161{
162 int n;
163
164 for (n = 0; n < i->num_local_nodes; n++) {
165 set_bit(i->local_nodes[n] - 1, &c->local_nodes);
166 }
167}
168
130static struct clusterip_config * 169static struct clusterip_config *
131clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, 170clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
132 struct net_device *dev) 171 struct net_device *dev)
@@ -143,11 +182,11 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
143 c->clusterip = ip; 182 c->clusterip = ip;
144 memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); 183 memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
145 c->num_total_nodes = i->num_total_nodes; 184 c->num_total_nodes = i->num_total_nodes;
146 c->num_local_nodes = i->num_local_nodes; 185 clusterip_config_init_nodelist(c, i);
147 memcpy(&c->local_nodes, &i->local_nodes, sizeof(c->local_nodes));
148 c->hash_mode = i->hash_mode; 186 c->hash_mode = i->hash_mode;
149 c->hash_initval = i->hash_initval; 187 c->hash_initval = i->hash_initval;
150 atomic_set(&c->refcount, 1); 188 atomic_set(&c->refcount, 1);
189 atomic_set(&c->entries, 1);
151 190
152#ifdef CONFIG_PROC_FS 191#ifdef CONFIG_PROC_FS
153 /* create proc dir entry */ 192 /* create proc dir entry */
@@ -171,53 +210,28 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
171static int 210static int
172clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) 211clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
173{ 212{
174 int i;
175
176 write_lock_bh(&clusterip_lock);
177 213
178 if (c->num_local_nodes >= CLUSTERIP_MAX_NODES 214 if (nodenum == 0 ||
179 || nodenum > CLUSTERIP_MAX_NODES) { 215 nodenum > c->num_total_nodes)
180 write_unlock_bh(&clusterip_lock);
181 return 1; 216 return 1;
182 }
183
184 /* check if we alrady have this number in our array */
185 for (i = 0; i < c->num_local_nodes; i++) {
186 if (c->local_nodes[i] == nodenum) {
187 write_unlock_bh(&clusterip_lock);
188 return 1;
189 }
190 }
191 217
192 c->local_nodes[c->num_local_nodes++] = nodenum; 218 /* check if we already have this number in our bitfield */
219 if (test_and_set_bit(nodenum - 1, &c->local_nodes))
220 return 1;
193 221
194 write_unlock_bh(&clusterip_lock);
195 return 0; 222 return 0;
196} 223}
197 224
198static int 225static int
199clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) 226clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
200{ 227{
201 int i; 228 if (nodenum == 0 ||
202 229 nodenum > c->num_total_nodes)
203 write_lock_bh(&clusterip_lock);
204
205 if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) {
206 write_unlock_bh(&clusterip_lock);
207 return 1; 230 return 1;
208 }
209 231
210 for (i = 0; i < c->num_local_nodes; i++) { 232 if (test_and_clear_bit(nodenum - 1, &c->local_nodes))
211 if (c->local_nodes[i] == nodenum) { 233 return 0;
212 int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1));
213 memmove(&c->local_nodes[i], &c->local_nodes[i+1], size);
214 c->num_local_nodes--;
215 write_unlock_bh(&clusterip_lock);
216 return 0;
217 }
218 }
219 234
220 write_unlock_bh(&clusterip_lock);
221 return 1; 235 return 1;
222} 236}
223 237
@@ -285,25 +299,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config)
285static inline int 299static inline int
286clusterip_responsible(struct clusterip_config *config, u_int32_t hash) 300clusterip_responsible(struct clusterip_config *config, u_int32_t hash)
287{ 301{
288 int i; 302 return test_bit(hash - 1, &config->local_nodes);
289
290 read_lock_bh(&clusterip_lock);
291
292 if (config->num_local_nodes == 0) {
293 read_unlock_bh(&clusterip_lock);
294 return 0;
295 }
296
297 for (i = 0; i < config->num_local_nodes; i++) {
298 if (config->local_nodes[i] == hash) {
299 read_unlock_bh(&clusterip_lock);
300 return 1;
301 }
302 }
303
304 read_unlock_bh(&clusterip_lock);
305
306 return 0;
307} 303}
308 304
309/*********************************************************************** 305/***********************************************************************
@@ -415,8 +411,26 @@ checkentry(const char *tablename,
415 411
416 /* FIXME: further sanity checks */ 412 /* FIXME: further sanity checks */
417 413
418 config = clusterip_config_find_get(e->ip.dst.s_addr); 414 config = clusterip_config_find_get(e->ip.dst.s_addr, 1);
419 if (!config) { 415 if (config) {
416 if (cipinfo->config != NULL) {
417 /* Case A: This is an entry that gets reloaded, since
418 * it still has a cipinfo->config pointer. Simply
419 * increase the entry refcount and return */
420 if (cipinfo->config != config) {
421 printk(KERN_ERR "CLUSTERIP: Reloaded entry "
422 "has invalid config pointer!\n");
423 return 0;
424 }
425 clusterip_config_entry_get(cipinfo->config);
426 } else {
427 /* Case B: This is a new rule referring to an existing
428 * clusterip config. */
429 cipinfo->config = config;
430 clusterip_config_entry_get(cipinfo->config);
431 }
432 } else {
433 /* Case C: This is a completely new clusterip config */
420 if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { 434 if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
421 printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); 435 printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr));
422 return 0; 436 return 0;
@@ -443,10 +457,9 @@ checkentry(const char *tablename,
443 } 457 }
444 dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); 458 dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
445 } 459 }
460 cipinfo->config = config;
446 } 461 }
447 462
448 cipinfo->config = config;
449
450 return 1; 463 return 1;
451} 464}
452 465
@@ -455,13 +468,10 @@ static void destroy(void *matchinfo, unsigned int matchinfosize)
455{ 468{
456 struct ipt_clusterip_tgt_info *cipinfo = matchinfo; 469 struct ipt_clusterip_tgt_info *cipinfo = matchinfo;
457 470
458 /* we first remove the proc entry and then drop the reference 471 /* if no more entries are referencing the config, remove it
459 * count. In case anyone still accesses the file, the open/close 472 * from the list and destroy the proc entry */
460 * functions are also incrementing the refcount on their own */ 473 clusterip_config_entry_put(cipinfo->config);
461#ifdef CONFIG_PROC_FS 474
462 remove_proc_entry(cipinfo->config->pde->name,
463 cipinfo->config->pde->parent);
464#endif
465 clusterip_config_put(cipinfo->config); 475 clusterip_config_put(cipinfo->config);
466} 476}
467 477
@@ -533,7 +543,7 @@ arp_mangle(unsigned int hook,
533 543
534 /* if there is no clusterip configuration for the arp reply's 544 /* if there is no clusterip configuration for the arp reply's
535 * source ip, we don't want to mangle it */ 545 * source ip, we don't want to mangle it */
536 c = clusterip_config_find_get(payload->src_ip); 546 c = clusterip_config_find_get(payload->src_ip, 0);
537 if (!c) 547 if (!c)
538 return NF_ACCEPT; 548 return NF_ACCEPT;
539 549
@@ -574,56 +584,69 @@ static struct nf_hook_ops cip_arp_ops = {
574 584
575#ifdef CONFIG_PROC_FS 585#ifdef CONFIG_PROC_FS
576 586
587struct clusterip_seq_position {
588 unsigned int pos; /* position */
589 unsigned int weight; /* number of bits set == size */
590 unsigned int bit; /* current bit */
591 unsigned long val; /* current value */
592};
593
577static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) 594static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
578{ 595{
579 struct proc_dir_entry *pde = s->private; 596 struct proc_dir_entry *pde = s->private;
580 struct clusterip_config *c = pde->data; 597 struct clusterip_config *c = pde->data;
581 unsigned int *nodeidx; 598 unsigned int weight;
582 599 u_int32_t local_nodes;
583 read_lock_bh(&clusterip_lock); 600 struct clusterip_seq_position *idx;
584 if (*pos >= c->num_local_nodes) 601
602 /* FIXME: possible race */
603 local_nodes = c->local_nodes;
604 weight = hweight32(local_nodes);
605 if (*pos >= weight)
585 return NULL; 606 return NULL;
586 607
587 nodeidx = kmalloc(sizeof(unsigned int), GFP_KERNEL); 608 idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL);
588 if (!nodeidx) 609 if (!idx)
589 return ERR_PTR(-ENOMEM); 610 return ERR_PTR(-ENOMEM);
590 611
591 *nodeidx = *pos; 612 idx->pos = *pos;
592 return nodeidx; 613 idx->weight = weight;
614 idx->bit = ffs(local_nodes);
615 idx->val = local_nodes;
616 clear_bit(idx->bit - 1, &idx->val);
617
618 return idx;
593} 619}
594 620
595static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) 621static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
596{ 622{
597 struct proc_dir_entry *pde = s->private; 623 struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v;
598 struct clusterip_config *c = pde->data;
599 unsigned int *nodeidx = (unsigned int *)v;
600 624
601 *pos = ++(*nodeidx); 625 *pos = ++idx->pos;
602 if (*pos >= c->num_local_nodes) { 626 if (*pos >= idx->weight) {
603 kfree(v); 627 kfree(v);
604 return NULL; 628 return NULL;
605 } 629 }
606 return nodeidx; 630 idx->bit = ffs(idx->val);
631 clear_bit(idx->bit - 1, &idx->val);
632 return idx;
607} 633}
608 634
609static void clusterip_seq_stop(struct seq_file *s, void *v) 635static void clusterip_seq_stop(struct seq_file *s, void *v)
610{ 636{
611 kfree(v); 637 kfree(v);
612
613 read_unlock_bh(&clusterip_lock);
614} 638}
615 639
616static int clusterip_seq_show(struct seq_file *s, void *v) 640static int clusterip_seq_show(struct seq_file *s, void *v)
617{ 641{
618 struct proc_dir_entry *pde = s->private; 642 struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v;
619 struct clusterip_config *c = pde->data;
620 unsigned int *nodeidx = (unsigned int *)v;
621 643
622 if (*nodeidx != 0) 644 if (idx->pos != 0)
623 seq_putc(s, ','); 645 seq_putc(s, ',');
624 seq_printf(s, "%u", c->local_nodes[*nodeidx]);
625 646
626 if (*nodeidx == c->num_local_nodes-1) 647 seq_printf(s, "%u", idx->bit);
648
649 if (idx->pos == idx->weight - 1)
627 seq_putc(s, '\n'); 650 seq_putc(s, '\n');
628 651
629 return 0; 652 return 0;