aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/fib_trie.c49
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_conn.c43
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c16
-rw-r--r--net/ipv4/ipvs/ip_vs_sync.c20
-rw-r--r--net/ipv4/netfilter/Kconfig36
-rw-r--r--net/ipv4/netfilter/Makefile5
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c7
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_pptp.c805
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c4
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_gre.c327
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c4
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c2
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_pptp.c401
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_gre.c214
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c223
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/tcp_input.c16
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c12
20 files changed, 2018 insertions, 172 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 1b63b4824164..50c0519cd70d 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -43,7 +43,7 @@
43 * 2 of the License, or (at your option) any later version. 43 * 2 of the License, or (at your option) any later version.
44 */ 44 */
45 45
46#define VERSION "0.403" 46#define VERSION "0.404"
47 47
48#include <linux/config.h> 48#include <linux/config.h>
49#include <asm/uaccess.h> 49#include <asm/uaccess.h>
@@ -224,7 +224,7 @@ static inline int tkey_mismatch(t_key a, int offset, t_key b)
224 Consider a node 'n' and its parent 'tp'. 224 Consider a node 'n' and its parent 'tp'.
225 225
226 If n is a leaf, every bit in its key is significant. Its presence is 226 If n is a leaf, every bit in its key is significant. Its presence is
227 necessitaded by path compression, since during a tree traversal (when 227 necessitated by path compression, since during a tree traversal (when
228 searching for a leaf - unless we are doing an insertion) we will completely 228 searching for a leaf - unless we are doing an insertion) we will completely
229 ignore all skipped bits we encounter. Thus we need to verify, at the end of 229 ignore all skipped bits we encounter. Thus we need to verify, at the end of
230 a potentially successful search, that we have indeed been walking the 230 a potentially successful search, that we have indeed been walking the
@@ -836,11 +836,12 @@ static void trie_init(struct trie *t)
836#endif 836#endif
837} 837}
838 838
839/* readside most use rcu_read_lock currently dump routines 839/* readside must use rcu_read_lock currently dump routines
840 via get_fa_head and dump */ 840 via get_fa_head and dump */
841 841
842static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen) 842static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
843{ 843{
844 struct hlist_head *head = &l->list;
844 struct hlist_node *node; 845 struct hlist_node *node;
845 struct leaf_info *li; 846 struct leaf_info *li;
846 847
@@ -853,7 +854,7 @@ static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen)
853 854
854static inline struct list_head * get_fa_head(struct leaf *l, int plen) 855static inline struct list_head * get_fa_head(struct leaf *l, int plen)
855{ 856{
856 struct leaf_info *li = find_leaf_info(&l->list, plen); 857 struct leaf_info *li = find_leaf_info(l, plen);
857 858
858 if (!li) 859 if (!li)
859 return NULL; 860 return NULL;
@@ -1085,7 +1086,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
1085 } 1086 }
1086 1087
1087 if (tp && tp->pos + tp->bits > 32) 1088 if (tp && tp->pos + tp->bits > 32)
1088 printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", 1089 printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
1089 tp, tp->pos, tp->bits, key, plen); 1090 tp, tp->pos, tp->bits, key, plen);
1090 1091
1091 /* Rebalance the trie */ 1092 /* Rebalance the trie */
@@ -1248,7 +1249,7 @@ err:
1248} 1249}
1249 1250
1250 1251
1251/* should be clalled with rcu_read_lock */ 1252/* should be called with rcu_read_lock */
1252static inline int check_leaf(struct trie *t, struct leaf *l, 1253static inline int check_leaf(struct trie *t, struct leaf *l,
1253 t_key key, int *plen, const struct flowi *flp, 1254 t_key key, int *plen, const struct flowi *flp,
1254 struct fib_result *res) 1255 struct fib_result *res)
@@ -1590,7 +1591,7 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1590 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); 1591 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
1591 1592
1592 l = fib_find_node(t, key); 1593 l = fib_find_node(t, key);
1593 li = find_leaf_info(&l->list, plen); 1594 li = find_leaf_info(l, plen);
1594 1595
1595 list_del_rcu(&fa->fa_list); 1596 list_del_rcu(&fa->fa_list);
1596 1597
@@ -1714,7 +1715,6 @@ static int fn_trie_flush(struct fib_table *tb)
1714 1715
1715 t->revision++; 1716 t->revision++;
1716 1717
1717 rcu_read_lock();
1718 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { 1718 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
1719 found += trie_flush_leaf(t, l); 1719 found += trie_flush_leaf(t, l);
1720 1720
@@ -1722,7 +1722,6 @@ static int fn_trie_flush(struct fib_table *tb)
1722 trie_leaf_remove(t, ll->key); 1722 trie_leaf_remove(t, ll->key);
1723 ll = l; 1723 ll = l;
1724 } 1724 }
1725 rcu_read_unlock();
1726 1725
1727 if (ll && hlist_empty(&ll->list)) 1726 if (ll && hlist_empty(&ll->list))
1728 trie_leaf_remove(t, ll->key); 1727 trie_leaf_remove(t, ll->key);
@@ -1833,16 +1832,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
1833 i++; 1832 i++;
1834 continue; 1833 continue;
1835 } 1834 }
1836 if (fa->fa_info->fib_nh == NULL) { 1835 BUG_ON(!fa->fa_info);
1837 printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1838 i++;
1839 continue;
1840 }
1841 if (fa->fa_info == NULL) {
1842 printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1843 i++;
1844 continue;
1845 }
1846 1836
1847 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, 1837 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1848 cb->nlh->nlmsg_seq, 1838 cb->nlh->nlmsg_seq,
@@ -1965,7 +1955,7 @@ struct fib_table * __init fib_hash_init(int id)
1965 trie_main = t; 1955 trie_main = t;
1966 1956
1967 if (id == RT_TABLE_LOCAL) 1957 if (id == RT_TABLE_LOCAL)
1968 printk("IPv4 FIB: Using LC-trie version %s\n", VERSION); 1958 printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION);
1969 1959
1970 return tb; 1960 return tb;
1971} 1961}
@@ -2029,7 +2019,7 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2029 iter->tnode = (struct tnode *) n; 2019 iter->tnode = (struct tnode *) n;
2030 iter->trie = t; 2020 iter->trie = t;
2031 iter->index = 0; 2021 iter->index = 0;
2032 iter->depth = 0; 2022 iter->depth = 1;
2033 return n; 2023 return n;
2034 } 2024 }
2035 return NULL; 2025 return NULL;
@@ -2274,11 +2264,12 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
2274 seq_puts(seq, "<local>:\n"); 2264 seq_puts(seq, "<local>:\n");
2275 else 2265 else
2276 seq_puts(seq, "<main>:\n"); 2266 seq_puts(seq, "<main>:\n");
2277 } else { 2267 }
2278 seq_indent(seq, iter->depth-1); 2268 seq_indent(seq, iter->depth-1);
2279 seq_printf(seq, " +-- %d.%d.%d.%d/%d\n", 2269 seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n",
2280 NIPQUAD(prf), tn->pos); 2270 NIPQUAD(prf), tn->pos, tn->bits, tn->full_children,
2281 } 2271 tn->empty_children);
2272
2282 } else { 2273 } else {
2283 struct leaf *l = (struct leaf *) n; 2274 struct leaf *l = (struct leaf *) n;
2284 int i; 2275 int i;
@@ -2287,7 +2278,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
2287 seq_indent(seq, iter->depth); 2278 seq_indent(seq, iter->depth);
2288 seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val)); 2279 seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val));
2289 for (i = 32; i >= 0; i--) { 2280 for (i = 32; i >= 0; i--) {
2290 struct leaf_info *li = find_leaf_info(&l->list, i); 2281 struct leaf_info *li = find_leaf_info(l, i);
2291 if (li) { 2282 if (li) {
2292 struct fib_alias *fa; 2283 struct fib_alias *fa;
2293 list_for_each_entry_rcu(fa, &li->falh, fa_list) { 2284 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
@@ -2383,7 +2374,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
2383 return 0; 2374 return 0;
2384 2375
2385 for (i=32; i>=0; i--) { 2376 for (i=32; i>=0; i--) {
2386 struct leaf_info *li = find_leaf_info(&l->list, i); 2377 struct leaf_info *li = find_leaf_info(l, i);
2387 struct fib_alias *fa; 2378 struct fib_alias *fa;
2388 u32 mask, prefix; 2379 u32 mask, prefix;
2389 2380
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 44607f4767b8..70c44e4c3ceb 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1603,7 +1603,7 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc)
1603 } 1603 }
1604 pmc->sources = NULL; 1604 pmc->sources = NULL;
1605 pmc->sfmode = MCAST_EXCLUDE; 1605 pmc->sfmode = MCAST_EXCLUDE;
1606 pmc->sfcount[MCAST_EXCLUDE] = 0; 1606 pmc->sfcount[MCAST_INCLUDE] = 0;
1607 pmc->sfcount[MCAST_EXCLUDE] = 1; 1607 pmc->sfcount[MCAST_EXCLUDE] = 1;
1608} 1608}
1609 1609
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index e11952ea17af..f828fa2eb7de 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -196,6 +196,7 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get
196 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 196 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
197 if (s_addr==cp->caddr && s_port==cp->cport && 197 if (s_addr==cp->caddr && s_port==cp->cport &&
198 d_port==cp->vport && d_addr==cp->vaddr && 198 d_port==cp->vport && d_addr==cp->vaddr &&
199 ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
199 protocol==cp->protocol) { 200 protocol==cp->protocol) {
200 /* HIT */ 201 /* HIT */
201 atomic_inc(&cp->refcnt); 202 atomic_inc(&cp->refcnt);
@@ -227,6 +228,40 @@ struct ip_vs_conn *ip_vs_conn_in_get
227 return cp; 228 return cp;
228} 229}
229 230
231/* Get reference to connection template */
232struct ip_vs_conn *ip_vs_ct_in_get
233(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
234{
235 unsigned hash;
236 struct ip_vs_conn *cp;
237
238 hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
239
240 ct_read_lock(hash);
241
242 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
243 if (s_addr==cp->caddr && s_port==cp->cport &&
244 d_port==cp->vport && d_addr==cp->vaddr &&
245 cp->flags & IP_VS_CONN_F_TEMPLATE &&
246 protocol==cp->protocol) {
247 /* HIT */
248 atomic_inc(&cp->refcnt);
249 goto out;
250 }
251 }
252 cp = NULL;
253
254 out:
255 ct_read_unlock(hash);
256
257 IP_VS_DBG(7, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
258 ip_vs_proto_name(protocol),
259 NIPQUAD(s_addr), ntohs(s_port),
260 NIPQUAD(d_addr), ntohs(d_port),
261 cp?"hit":"not hit");
262
263 return cp;
264}
230 265
231/* 266/*
232 * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. 267 * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
@@ -367,7 +402,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
367 atomic_read(&dest->refcnt)); 402 atomic_read(&dest->refcnt));
368 403
369 /* Update the connection counters */ 404 /* Update the connection counters */
370 if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) { 405 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
371 /* It is a normal connection, so increase the inactive 406 /* It is a normal connection, so increase the inactive
372 connection counter because it is in TCP SYNRECV 407 connection counter because it is in TCP SYNRECV
373 state (inactive) or other protocol inacive state */ 408 state (inactive) or other protocol inacive state */
@@ -406,7 +441,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
406 atomic_read(&dest->refcnt)); 441 atomic_read(&dest->refcnt));
407 442
408 /* Update the connection counters */ 443 /* Update the connection counters */
409 if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) { 444 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
410 /* It is a normal connection, so decrease the inactconns 445 /* It is a normal connection, so decrease the inactconns
411 or activeconns counter */ 446 or activeconns counter */
412 if (cp->flags & IP_VS_CONN_F_INACTIVE) { 447 if (cp->flags & IP_VS_CONN_F_INACTIVE) {
@@ -467,7 +502,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
467 /* 502 /*
468 * Invalidate the connection template 503 * Invalidate the connection template
469 */ 504 */
470 if (ct->cport) { 505 if (ct->vport != 65535) {
471 if (ip_vs_conn_unhash(ct)) { 506 if (ip_vs_conn_unhash(ct)) {
472 ct->dport = 65535; 507 ct->dport = 65535;
473 ct->vport = 65535; 508 ct->vport = 65535;
@@ -776,7 +811,7 @@ void ip_vs_random_dropentry(void)
776 ct_write_lock_bh(hash); 811 ct_write_lock_bh(hash);
777 812
778 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 813 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
779 if (!cp->cport && !(cp->flags & IP_VS_CONN_F_NO_CPORT)) 814 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
780 /* connection template */ 815 /* connection template */
781 continue; 816 continue;
782 817
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 3ac7eeca04ac..981cc3244ef2 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -243,10 +243,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
243 if (ports[1] == svc->port) { 243 if (ports[1] == svc->port) {
244 /* Check if a template already exists */ 244 /* Check if a template already exists */
245 if (svc->port != FTPPORT) 245 if (svc->port != FTPPORT)
246 ct = ip_vs_conn_in_get(iph->protocol, snet, 0, 246 ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
247 iph->daddr, ports[1]); 247 iph->daddr, ports[1]);
248 else 248 else
249 ct = ip_vs_conn_in_get(iph->protocol, snet, 0, 249 ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
250 iph->daddr, 0); 250 iph->daddr, 0);
251 251
252 if (!ct || !ip_vs_check_template(ct)) { 252 if (!ct || !ip_vs_check_template(ct)) {
@@ -272,14 +272,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
272 iph->daddr, 272 iph->daddr,
273 ports[1], 273 ports[1],
274 dest->addr, dest->port, 274 dest->addr, dest->port,
275 0, 275 IP_VS_CONN_F_TEMPLATE,
276 dest); 276 dest);
277 else 277 else
278 ct = ip_vs_conn_new(iph->protocol, 278 ct = ip_vs_conn_new(iph->protocol,
279 snet, 0, 279 snet, 0,
280 iph->daddr, 0, 280 iph->daddr, 0,
281 dest->addr, 0, 281 dest->addr, 0,
282 0, 282 IP_VS_CONN_F_TEMPLATE,
283 dest); 283 dest);
284 if (ct == NULL) 284 if (ct == NULL)
285 return NULL; 285 return NULL;
@@ -298,10 +298,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
298 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0> 298 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
299 */ 299 */
300 if (svc->fwmark) 300 if (svc->fwmark)
301 ct = ip_vs_conn_in_get(IPPROTO_IP, snet, 0, 301 ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0,
302 htonl(svc->fwmark), 0); 302 htonl(svc->fwmark), 0);
303 else 303 else
304 ct = ip_vs_conn_in_get(iph->protocol, snet, 0, 304 ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
305 iph->daddr, 0); 305 iph->daddr, 0);
306 306
307 if (!ct || !ip_vs_check_template(ct)) { 307 if (!ct || !ip_vs_check_template(ct)) {
@@ -326,14 +326,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
326 snet, 0, 326 snet, 0,
327 htonl(svc->fwmark), 0, 327 htonl(svc->fwmark), 0,
328 dest->addr, 0, 328 dest->addr, 0,
329 0, 329 IP_VS_CONN_F_TEMPLATE,
330 dest); 330 dest);
331 else 331 else
332 ct = ip_vs_conn_new(iph->protocol, 332 ct = ip_vs_conn_new(iph->protocol,
333 snet, 0, 333 snet, 0,
334 iph->daddr, 0, 334 iph->daddr, 0,
335 dest->addr, 0, 335 dest->addr, 0,
336 0, 336 IP_VS_CONN_F_TEMPLATE,
337 dest); 337 dest);
338 if (ct == NULL) 338 if (ct == NULL)
339 return NULL; 339 return NULL;
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 574d1f509b46..2e5ced3d8062 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -297,16 +297,24 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
297 297
298 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); 298 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
299 for (i=0; i<m->nr_conns; i++) { 299 for (i=0; i<m->nr_conns; i++) {
300 unsigned flags;
301
300 s = (struct ip_vs_sync_conn *)p; 302 s = (struct ip_vs_sync_conn *)p;
301 cp = ip_vs_conn_in_get(s->protocol, 303 flags = ntohs(s->flags);
302 s->caddr, s->cport, 304 if (!(flags & IP_VS_CONN_F_TEMPLATE))
303 s->vaddr, s->vport); 305 cp = ip_vs_conn_in_get(s->protocol,
306 s->caddr, s->cport,
307 s->vaddr, s->vport);
308 else
309 cp = ip_vs_ct_in_get(s->protocol,
310 s->caddr, s->cport,
311 s->vaddr, s->vport);
304 if (!cp) { 312 if (!cp) {
305 cp = ip_vs_conn_new(s->protocol, 313 cp = ip_vs_conn_new(s->protocol,
306 s->caddr, s->cport, 314 s->caddr, s->cport,
307 s->vaddr, s->vport, 315 s->vaddr, s->vport,
308 s->daddr, s->dport, 316 s->daddr, s->dport,
309 ntohs(s->flags), NULL); 317 flags, NULL);
310 if (!cp) { 318 if (!cp) {
311 IP_VS_ERR("ip_vs_conn_new failed\n"); 319 IP_VS_ERR("ip_vs_conn_new failed\n");
312 return; 320 return;
@@ -315,11 +323,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
315 } else if (!cp->dest) { 323 } else if (!cp->dest) {
316 /* it is an entry created by the synchronization */ 324 /* it is an entry created by the synchronization */
317 cp->state = ntohs(s->state); 325 cp->state = ntohs(s->state);
318 cp->flags = ntohs(s->flags) | IP_VS_CONN_F_HASHED; 326 cp->flags = flags | IP_VS_CONN_F_HASHED;
319 } /* Note that we don't touch its state and flags 327 } /* Note that we don't touch its state and flags
320 if it is a normal entry. */ 328 if it is a normal entry. */
321 329
322 if (ntohs(s->flags) & IP_VS_CONN_F_SEQ_MASK) { 330 if (flags & IP_VS_CONN_F_SEQ_MASK) {
323 opt = (struct ip_vs_sync_conn_options *)&s[1]; 331 opt = (struct ip_vs_sync_conn_options *)&s[1];
324 memcpy(&cp->in_seq, opt, sizeof(*opt)); 332 memcpy(&cp->in_seq, opt, sizeof(*opt));
325 p += FULL_CONN_SIZE; 333 p += FULL_CONN_SIZE;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 30aa8e2ee214..3cf9b451675c 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -51,6 +51,14 @@ config IP_NF_CONNTRACK_EVENTS
51 51
52 IF unsure, say `N'. 52 IF unsure, say `N'.
53 53
54config IP_NF_CONNTRACK_NETLINK
55 tristate 'Connection tracking netlink interface'
56 depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
57 depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
58 help
59 This option enables support for a netlink-based userspace interface
60
61
54config IP_NF_CT_PROTO_SCTP 62config IP_NF_CT_PROTO_SCTP
55 tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' 63 tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
56 depends on IP_NF_CONNTRACK && EXPERIMENTAL 64 depends on IP_NF_CONNTRACK && EXPERIMENTAL
@@ -129,6 +137,22 @@ config IP_NF_AMANDA
129 137
130 To compile it as a module, choose M here. If unsure, say Y. 138 To compile it as a module, choose M here. If unsure, say Y.
131 139
140config IP_NF_PPTP
141 tristate 'PPTP protocol support'
142 help
143 This module adds support for PPTP (Point to Point Tunnelling
144 Protocol, RFC2637) conncection tracking and NAT.
145
146 If you are running PPTP sessions over a stateful firewall or NAT
147 box, you may want to enable this feature.
148
149 Please note that not all PPTP modes of operation are supported yet.
150 For more info, read top of the file
151 net/ipv4/netfilter/ip_conntrack_pptp.c
152
153 If you want to compile it as a module, say M here and read
154 Documentation/modules.txt. If unsure, say `N'.
155
132config IP_NF_QUEUE 156config IP_NF_QUEUE
133 tristate "IP Userspace queueing via NETLINK (OBSOLETE)" 157 tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
134 help 158 help
@@ -613,6 +637,12 @@ config IP_NF_NAT_AMANDA
613 default IP_NF_NAT if IP_NF_AMANDA=y 637 default IP_NF_NAT if IP_NF_AMANDA=y
614 default m if IP_NF_AMANDA=m 638 default m if IP_NF_AMANDA=m
615 639
640config IP_NF_NAT_PPTP
641 tristate
642 depends on IP_NF_NAT!=n && IP_NF_PPTP!=n
643 default IP_NF_NAT if IP_NF_PPTP=y
644 default m if IP_NF_PPTP=m
645
616# mangle + specific targets 646# mangle + specific targets
617config IP_NF_MANGLE 647config IP_NF_MANGLE
618 tristate "Packet mangling" 648 tristate "Packet mangling"
@@ -774,11 +804,5 @@ config IP_NF_ARP_MANGLE
774 Allows altering the ARP packet payload: source and destination 804 Allows altering the ARP packet payload: source and destination
775 hardware and network addresses. 805 hardware and network addresses.
776 806
777config IP_NF_CONNTRACK_NETLINK
778 tristate 'Connection tracking netlink interface'
779 depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
780 help
781 This option enables support for a netlink-based userspace interface
782
783endmenu 807endmenu
784 808
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 1ba0db746817..3d45d3c0283c 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -6,6 +6,9 @@
6ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o 6ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
7iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o 7iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
8 8
9ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o
10ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o
11
9# connection tracking 12# connection tracking
10obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o 13obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
11 14
@@ -17,6 +20,7 @@ obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
17obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o 20obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o
18 21
19# connection tracking helpers 22# connection tracking helpers
23obj-$(CONFIG_IP_NF_PPTP) += ip_conntrack_pptp.o
20obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o 24obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o
21obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o 25obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o
22obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o 26obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o
@@ -24,6 +28,7 @@ obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o
24obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o 28obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o
25 29
26# NAT helpers 30# NAT helpers
31obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o
27obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o 32obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o
28obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o 33obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o
29obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o 34obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 19cba16e6e1e..c1f82e0c81cf 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -233,7 +233,7 @@ __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
233 233
234/* Just find a expectation corresponding to a tuple. */ 234/* Just find a expectation corresponding to a tuple. */
235struct ip_conntrack_expect * 235struct ip_conntrack_expect *
236ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple) 236ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
237{ 237{
238 struct ip_conntrack_expect *i; 238 struct ip_conntrack_expect *i;
239 239
@@ -1143,7 +1143,10 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct,
1143 if (del_timer(&ct->timeout)) { 1143 if (del_timer(&ct->timeout)) {
1144 ct->timeout.expires = jiffies + extra_jiffies; 1144 ct->timeout.expires = jiffies + extra_jiffies;
1145 add_timer(&ct->timeout); 1145 add_timer(&ct->timeout);
1146 ip_conntrack_event_cache(IPCT_REFRESH, skb); 1146 /* FIXME: We loose some REFRESH events if this function
1147 * is called without an skb. I'll fix this later -HW */
1148 if (skb)
1149 ip_conntrack_event_cache(IPCT_REFRESH, skb);
1147 } 1150 }
1148 ct_add_counters(ct, ctinfo, skb); 1151 ct_add_counters(ct, ctinfo, skb);
1149 write_unlock_bh(&ip_conntrack_lock); 1152 write_unlock_bh(&ip_conntrack_lock);
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
new file mode 100644
index 000000000000..79db5b70d5f6
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -0,0 +1,805 @@
1/*
2 * ip_conntrack_pptp.c - Version 3.0
3 *
4 * Connection tracking support for PPTP (Point to Point Tunneling Protocol).
5 * PPTP is a a protocol for creating virtual private networks.
6 * It is a specification defined by Microsoft and some vendors
7 * working with Microsoft. PPTP is built on top of a modified
8 * version of the Internet Generic Routing Encapsulation Protocol.
9 * GRE is defined in RFC 1701 and RFC 1702. Documentation of
10 * PPTP can be found in RFC 2637
11 *
12 * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
13 *
14 * Development of this code funded by Astaro AG (http://www.astaro.com/)
15 *
16 * Limitations:
17 * - We blindly assume that control connections are always
18 * established in PNS->PAC direction. This is a violation
19 * of RFFC2673
20 * - We can only support one single call within each session
21 *
22 * TODO:
23 * - testing of incoming PPTP calls
24 *
25 * Changes:
26 * 2002-02-05 - Version 1.3
27 * - Call ip_conntrack_unexpect_related() from
28 * pptp_destroy_siblings() to destroy expectations in case
29 * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen
30 * (Philip Craig <philipc@snapgear.com>)
31 * - Add Version information at module loadtime
32 * 2002-02-10 - Version 1.6
33 * - move to C99 style initializers
34 * - remove second expectation if first arrives
35 * 2004-10-22 - Version 2.0
36 * - merge Mandrake's 2.6.x port with recent 2.6.x API changes
37 * - fix lots of linear skb assumptions from Mandrake's port
38 * 2005-06-10 - Version 2.1
39 * - use ip_conntrack_expect_free() instead of kfree() on the
40 * expect's (which are from the slab for quite some time)
41 * 2005-06-10 - Version 3.0
42 * - port helper to post-2.6.11 API changes,
43 * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
44 * 2005-07-30 - Version 3.1
45 * - port helper to 2.6.13 API changes
46 *
47 */
48
49#include <linux/config.h>
50#include <linux/module.h>
51#include <linux/netfilter.h>
52#include <linux/ip.h>
53#include <net/checksum.h>
54#include <net/tcp.h>
55
56#include <linux/netfilter_ipv4/ip_conntrack.h>
57#include <linux/netfilter_ipv4/ip_conntrack_core.h>
58#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
59#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
60#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
61
62#define IP_CT_PPTP_VERSION "3.1"
63
64MODULE_LICENSE("GPL");
65MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
66MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP");
67
68static DEFINE_SPINLOCK(ip_pptp_lock);
69
70int
71(*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb,
72 struct ip_conntrack *ct,
73 enum ip_conntrack_info ctinfo,
74 struct PptpControlHeader *ctlh,
75 union pptp_ctrl_union *pptpReq);
76
77int
78(*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb,
79 struct ip_conntrack *ct,
80 enum ip_conntrack_info ctinfo,
81 struct PptpControlHeader *ctlh,
82 union pptp_ctrl_union *pptpReq);
83
84int
85(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig,
86 struct ip_conntrack_expect *expect_reply);
87
88void
89(*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct,
90 struct ip_conntrack_expect *exp);
91
92#if 0
93/* PptpControlMessageType names */
94const char *pptp_msg_name[] = {
95 "UNKNOWN_MESSAGE",
96 "START_SESSION_REQUEST",
97 "START_SESSION_REPLY",
98 "STOP_SESSION_REQUEST",
99 "STOP_SESSION_REPLY",
100 "ECHO_REQUEST",
101 "ECHO_REPLY",
102 "OUT_CALL_REQUEST",
103 "OUT_CALL_REPLY",
104 "IN_CALL_REQUEST",
105 "IN_CALL_REPLY",
106 "IN_CALL_CONNECT",
107 "CALL_CLEAR_REQUEST",
108 "CALL_DISCONNECT_NOTIFY",
109 "WAN_ERROR_NOTIFY",
110 "SET_LINK_INFO"
111};
112EXPORT_SYMBOL(pptp_msg_name);
113#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
114#else
115#define DEBUGP(format, args...)
116#endif
117
118#define SECS *HZ
119#define MINS * 60 SECS
120#define HOURS * 60 MINS
121
122#define PPTP_GRE_TIMEOUT (10 MINS)
123#define PPTP_GRE_STREAM_TIMEOUT (5 HOURS)
124
125static void pptp_expectfn(struct ip_conntrack *ct,
126 struct ip_conntrack_expect *exp)
127{
128 DEBUGP("increasing timeouts\n");
129
130 /* increase timeout of GRE data channel conntrack entry */
131 ct->proto.gre.timeout = PPTP_GRE_TIMEOUT;
132 ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT;
133
134 /* Can you see how rusty this code is, compared with the pre-2.6.11
135 * one? That's what happened to my shiny newnat of 2002 ;( -HW */
136
137 if (!ip_nat_pptp_hook_expectfn) {
138 struct ip_conntrack_tuple inv_t;
139 struct ip_conntrack_expect *exp_other;
140
141 /* obviously this tuple inversion only works until you do NAT */
142 invert_tuplepr(&inv_t, &exp->tuple);
143 DEBUGP("trying to unexpect other dir: ");
144 DUMP_TUPLE(&inv_t);
145
146 exp_other = ip_conntrack_expect_find(&inv_t);
147 if (exp_other) {
148 /* delete other expectation. */
149 DEBUGP("found\n");
150 ip_conntrack_unexpect_related(exp_other);
151 ip_conntrack_expect_put(exp_other);
152 } else {
153 DEBUGP("not found\n");
154 }
155 } else {
156 /* we need more than simple inversion */
157 ip_nat_pptp_hook_expectfn(ct, exp);
158 }
159}
160
161static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t)
162{
163 struct ip_conntrack_tuple_hash *h;
164 struct ip_conntrack_expect *exp;
165
166 DEBUGP("trying to timeout ct or exp for tuple ");
167 DUMP_TUPLE(t);
168
169 h = ip_conntrack_find_get(t, NULL);
170 if (h) {
171 struct ip_conntrack *sibling = tuplehash_to_ctrack(h);
172 DEBUGP("setting timeout of conntrack %p to 0\n", sibling);
173 sibling->proto.gre.timeout = 0;
174 sibling->proto.gre.stream_timeout = 0;
175 /* refresh_acct will not modify counters if skb == NULL */
176 if (del_timer(&sibling->timeout))
177 sibling->timeout.function((unsigned long)sibling);
178 ip_conntrack_put(sibling);
179 return 1;
180 } else {
181 exp = ip_conntrack_expect_find(t);
182 if (exp) {
183 DEBUGP("unexpect_related of expect %p\n", exp);
184 ip_conntrack_unexpect_related(exp);
185 ip_conntrack_expect_put(exp);
186 return 1;
187 }
188 }
189
190 return 0;
191}
192
193
194/* timeout GRE data connections */
195static void pptp_destroy_siblings(struct ip_conntrack *ct)
196{
197 struct ip_conntrack_tuple t;
198
199 /* Since ct->sibling_list has literally rusted away in 2.6.11,
200 * we now need another way to find out about our sibling
201 * contrack and expects... -HW */
202
203 /* try original (pns->pac) tuple */
204 memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t));
205 t.dst.protonum = IPPROTO_GRE;
206 t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id);
207 t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id);
208
209 if (!destroy_sibling_or_exp(&t))
210 DEBUGP("failed to timeout original pns->pac ct/exp\n");
211
212 /* try reply (pac->pns) tuple */
213 memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t));
214 t.dst.protonum = IPPROTO_GRE;
215 t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id);
216 t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id);
217
218 if (!destroy_sibling_or_exp(&t))
219 DEBUGP("failed to timeout reply pac->pns ct/exp\n");
220}
221
222/* expect GRE connections (PNS->PAC and PAC->PNS direction) */
223static inline int
224exp_gre(struct ip_conntrack *master,
225 u_int32_t seq,
226 u_int16_t callid,
227 u_int16_t peer_callid)
228{
229 struct ip_conntrack_tuple inv_tuple;
230 struct ip_conntrack_tuple exp_tuples[] = {
231 /* tuple in original direction, PNS->PAC */
232 { .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip,
233 .u = { .gre = { .key = peer_callid } }
234 },
235 .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip,
236 .u = { .gre = { .key = callid } },
237 .protonum = IPPROTO_GRE
238 },
239 },
240 /* tuple in reply direction, PAC->PNS */
241 { .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip,
242 .u = { .gre = { .key = callid } }
243 },
244 .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip,
245 .u = { .gre = { .key = peer_callid } },
246 .protonum = IPPROTO_GRE
247 },
248 }
249 };
250 struct ip_conntrack_expect *exp_orig, *exp_reply;
251 int ret = 1;
252
253 exp_orig = ip_conntrack_expect_alloc(master);
254 if (exp_orig == NULL)
255 goto out;
256
257 exp_reply = ip_conntrack_expect_alloc(master);
258 if (exp_reply == NULL)
259 goto out_put_orig;
260
261 memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple));
262
263 exp_orig->mask.src.ip = 0xffffffff;
264 exp_orig->mask.src.u.all = 0;
265 exp_orig->mask.dst.u.all = 0;
266 exp_orig->mask.dst.u.gre.key = 0xffff;
267 exp_orig->mask.dst.ip = 0xffffffff;
268 exp_orig->mask.dst.protonum = 0xff;
269
270 exp_orig->master = master;
271 exp_orig->expectfn = pptp_expectfn;
272 exp_orig->flags = 0;
273
274 exp_orig->dir = IP_CT_DIR_ORIGINAL;
275
276 /* both expectations are identical apart from tuple */
277 memcpy(exp_reply, exp_orig, sizeof(*exp_reply));
278 memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple));
279
280 exp_reply->dir = !exp_orig->dir;
281
282 if (ip_nat_pptp_hook_exp_gre)
283 ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply);
284 else {
285
286 DEBUGP("calling expect_related PNS->PAC");
287 DUMP_TUPLE(&exp_orig->tuple);
288
289 if (ip_conntrack_expect_related(exp_orig) != 0) {
290 DEBUGP("cannot expect_related()\n");
291 goto out_put_both;
292 }
293
294 DEBUGP("calling expect_related PAC->PNS");
295 DUMP_TUPLE(&exp_reply->tuple);
296
297 if (ip_conntrack_expect_related(exp_reply) != 0) {
298 DEBUGP("cannot expect_related()\n");
299 goto out_unexpect_orig;
300 }
301
302 /* Add GRE keymap entries */
303 if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) {
304 DEBUGP("cannot keymap_add() exp\n");
305 goto out_unexpect_both;
306 }
307
308 invert_tuplepr(&inv_tuple, &exp_reply->tuple);
309 if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) {
310 ip_ct_gre_keymap_destroy(master);
311 DEBUGP("cannot keymap_add() exp_inv\n");
312 goto out_unexpect_both;
313 }
314 ret = 0;
315 }
316
317out_put_both:
318 ip_conntrack_expect_put(exp_reply);
319out_put_orig:
320 ip_conntrack_expect_put(exp_orig);
321out:
322 return ret;
323
324out_unexpect_both:
325 ip_conntrack_unexpect_related(exp_reply);
326out_unexpect_orig:
327 ip_conntrack_unexpect_related(exp_orig);
328 goto out_put_both;
329}
330
331static inline int
332pptp_inbound_pkt(struct sk_buff **pskb,
333 struct tcphdr *tcph,
334 unsigned int nexthdr_off,
335 unsigned int datalen,
336 struct ip_conntrack *ct,
337 enum ip_conntrack_info ctinfo)
338{
339 struct PptpControlHeader _ctlh, *ctlh;
340 unsigned int reqlen;
341 union pptp_ctrl_union _pptpReq, *pptpReq;
342 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
343 u_int16_t msg, *cid, *pcid;
344 u_int32_t seq;
345
346 ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
347 if (!ctlh) {
348 DEBUGP("error during skb_header_pointer\n");
349 return NF_ACCEPT;
350 }
351 nexthdr_off += sizeof(_ctlh);
352 datalen -= sizeof(_ctlh);
353
354 reqlen = datalen;
355 if (reqlen > sizeof(*pptpReq))
356 reqlen = sizeof(*pptpReq);
357 pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
358 if (!pptpReq) {
359 DEBUGP("error during skb_header_pointer\n");
360 return NF_ACCEPT;
361 }
362
363 msg = ntohs(ctlh->messageType);
364 DEBUGP("inbound control message %s\n", pptp_msg_name[msg]);
365
366 switch (msg) {
367 case PPTP_START_SESSION_REPLY:
368 if (reqlen < sizeof(_pptpReq.srep)) {
369 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
370 break;
371 }
372
373 /* server confirms new control session */
374 if (info->sstate < PPTP_SESSION_REQUESTED) {
375 DEBUGP("%s without START_SESS_REQUEST\n",
376 pptp_msg_name[msg]);
377 break;
378 }
379 if (pptpReq->srep.resultCode == PPTP_START_OK)
380 info->sstate = PPTP_SESSION_CONFIRMED;
381 else
382 info->sstate = PPTP_SESSION_ERROR;
383 break;
384
385 case PPTP_STOP_SESSION_REPLY:
386 if (reqlen < sizeof(_pptpReq.strep)) {
387 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
388 break;
389 }
390
391 /* server confirms end of control session */
392 if (info->sstate > PPTP_SESSION_STOPREQ) {
393 DEBUGP("%s without STOP_SESS_REQUEST\n",
394 pptp_msg_name[msg]);
395 break;
396 }
397 if (pptpReq->strep.resultCode == PPTP_STOP_OK)
398 info->sstate = PPTP_SESSION_NONE;
399 else
400 info->sstate = PPTP_SESSION_ERROR;
401 break;
402
403 case PPTP_OUT_CALL_REPLY:
404 if (reqlen < sizeof(_pptpReq.ocack)) {
405 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
406 break;
407 }
408
409 /* server accepted call, we now expect GRE frames */
410 if (info->sstate != PPTP_SESSION_CONFIRMED) {
411 DEBUGP("%s but no session\n", pptp_msg_name[msg]);
412 break;
413 }
414 if (info->cstate != PPTP_CALL_OUT_REQ &&
415 info->cstate != PPTP_CALL_OUT_CONF) {
416 DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]);
417 break;
418 }
419 if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) {
420 info->cstate = PPTP_CALL_NONE;
421 break;
422 }
423
424 cid = &pptpReq->ocack.callID;
425 pcid = &pptpReq->ocack.peersCallID;
426
427 info->pac_call_id = ntohs(*cid);
428
429 if (htons(info->pns_call_id) != *pcid) {
430 DEBUGP("%s for unknown callid %u\n",
431 pptp_msg_name[msg], ntohs(*pcid));
432 break;
433 }
434
435 DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
436 ntohs(*cid), ntohs(*pcid));
437
438 info->cstate = PPTP_CALL_OUT_CONF;
439
440 seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
441 + sizeof(struct PptpControlHeader)
442 + ((void *)pcid - (void *)pptpReq);
443
444 if (exp_gre(ct, seq, *cid, *pcid) != 0)
445 printk("ip_conntrack_pptp: error during exp_gre\n");
446 break;
447
448 case PPTP_IN_CALL_REQUEST:
449 if (reqlen < sizeof(_pptpReq.icack)) {
450 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
451 break;
452 }
453
454 /* server tells us about incoming call request */
455 if (info->sstate != PPTP_SESSION_CONFIRMED) {
456 DEBUGP("%s but no session\n", pptp_msg_name[msg]);
457 break;
458 }
459 pcid = &pptpReq->icack.peersCallID;
460 DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
461 info->cstate = PPTP_CALL_IN_REQ;
462 info->pac_call_id = ntohs(*pcid);
463 break;
464
465 case PPTP_IN_CALL_CONNECT:
466 if (reqlen < sizeof(_pptpReq.iccon)) {
467 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
468 break;
469 }
470
471 /* server tells us about incoming call established */
472 if (info->sstate != PPTP_SESSION_CONFIRMED) {
473 DEBUGP("%s but no session\n", pptp_msg_name[msg]);
474 break;
475 }
476 if (info->sstate != PPTP_CALL_IN_REP
477 && info->sstate != PPTP_CALL_IN_CONF) {
478 DEBUGP("%s but never sent IN_CALL_REPLY\n",
479 pptp_msg_name[msg]);
480 break;
481 }
482
483 pcid = &pptpReq->iccon.peersCallID;
484 cid = &info->pac_call_id;
485
486 if (info->pns_call_id != ntohs(*pcid)) {
487 DEBUGP("%s for unknown CallID %u\n",
488 pptp_msg_name[msg], ntohs(*cid));
489 break;
490 }
491
492 DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
493 info->cstate = PPTP_CALL_IN_CONF;
494
495 /* we expect a GRE connection from PAC to PNS */
496 seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
497 + sizeof(struct PptpControlHeader)
498 + ((void *)pcid - (void *)pptpReq);
499
500 if (exp_gre(ct, seq, *cid, *pcid) != 0)
501 printk("ip_conntrack_pptp: error during exp_gre\n");
502
503 break;
504
505 case PPTP_CALL_DISCONNECT_NOTIFY:
506 if (reqlen < sizeof(_pptpReq.disc)) {
507 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
508 break;
509 }
510
511 /* server confirms disconnect */
512 cid = &pptpReq->disc.callID;
513 DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid));
514 info->cstate = PPTP_CALL_NONE;
515
516 /* untrack this call id, unexpect GRE packets */
517 pptp_destroy_siblings(ct);
518 break;
519
520 case PPTP_WAN_ERROR_NOTIFY:
521 break;
522
523 case PPTP_ECHO_REQUEST:
524 case PPTP_ECHO_REPLY:
525 /* I don't have to explain these ;) */
526 break;
527 default:
528 DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)
529 ? pptp_msg_name[msg]:pptp_msg_name[0], msg);
530 break;
531 }
532
533
534 if (ip_nat_pptp_hook_inbound)
535 return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh,
536 pptpReq);
537
538 return NF_ACCEPT;
539
540}
541
542static inline int
543pptp_outbound_pkt(struct sk_buff **pskb,
544 struct tcphdr *tcph,
545 unsigned int nexthdr_off,
546 unsigned int datalen,
547 struct ip_conntrack *ct,
548 enum ip_conntrack_info ctinfo)
549{
550 struct PptpControlHeader _ctlh, *ctlh;
551 unsigned int reqlen;
552 union pptp_ctrl_union _pptpReq, *pptpReq;
553 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
554 u_int16_t msg, *cid, *pcid;
555
556 ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
557 if (!ctlh)
558 return NF_ACCEPT;
559 nexthdr_off += sizeof(_ctlh);
560 datalen -= sizeof(_ctlh);
561
562 reqlen = datalen;
563 if (reqlen > sizeof(*pptpReq))
564 reqlen = sizeof(*pptpReq);
565 pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
566 if (!pptpReq)
567 return NF_ACCEPT;
568
569 msg = ntohs(ctlh->messageType);
570 DEBUGP("outbound control message %s\n", pptp_msg_name[msg]);
571
572 switch (msg) {
573 case PPTP_START_SESSION_REQUEST:
574 /* client requests for new control session */
575 if (info->sstate != PPTP_SESSION_NONE) {
576 DEBUGP("%s but we already have one",
577 pptp_msg_name[msg]);
578 }
579 info->sstate = PPTP_SESSION_REQUESTED;
580 break;
581 case PPTP_STOP_SESSION_REQUEST:
582 /* client requests end of control session */
583 info->sstate = PPTP_SESSION_STOPREQ;
584 break;
585
586 case PPTP_OUT_CALL_REQUEST:
587 if (reqlen < sizeof(_pptpReq.ocreq)) {
588 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
589 /* FIXME: break; */
590 }
591
592 /* client initiating connection to server */
593 if (info->sstate != PPTP_SESSION_CONFIRMED) {
594 DEBUGP("%s but no session\n",
595 pptp_msg_name[msg]);
596 break;
597 }
598 info->cstate = PPTP_CALL_OUT_REQ;
599 /* track PNS call id */
600 cid = &pptpReq->ocreq.callID;
601 DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid));
602 info->pns_call_id = ntohs(*cid);
603 break;
604 case PPTP_IN_CALL_REPLY:
605 if (reqlen < sizeof(_pptpReq.icack)) {
606 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
607 break;
608 }
609
610 /* client answers incoming call */
611 if (info->cstate != PPTP_CALL_IN_REQ
612 && info->cstate != PPTP_CALL_IN_REP) {
613 DEBUGP("%s without incall_req\n",
614 pptp_msg_name[msg]);
615 break;
616 }
617 if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) {
618 info->cstate = PPTP_CALL_NONE;
619 break;
620 }
621 pcid = &pptpReq->icack.peersCallID;
622 if (info->pac_call_id != ntohs(*pcid)) {
623 DEBUGP("%s for unknown call %u\n",
624 pptp_msg_name[msg], ntohs(*pcid));
625 break;
626 }
627 DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
628 /* part two of the three-way handshake */
629 info->cstate = PPTP_CALL_IN_REP;
630 info->pns_call_id = ntohs(pptpReq->icack.callID);
631 break;
632
633 case PPTP_CALL_CLEAR_REQUEST:
634 /* client requests hangup of call */
635 if (info->sstate != PPTP_SESSION_CONFIRMED) {
636 DEBUGP("CLEAR_CALL but no session\n");
637 break;
638 }
639 /* FUTURE: iterate over all calls and check if
640 * call ID is valid. We don't do this without newnat,
641 * because we only know about last call */
642 info->cstate = PPTP_CALL_CLEAR_REQ;
643 break;
644 case PPTP_SET_LINK_INFO:
645 break;
646 case PPTP_ECHO_REQUEST:
647 case PPTP_ECHO_REPLY:
648 /* I don't have to explain these ;) */
649 break;
650 default:
651 DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)?
652 pptp_msg_name[msg]:pptp_msg_name[0], msg);
653 /* unknown: no need to create GRE masq table entry */
654 break;
655 }
656
657 if (ip_nat_pptp_hook_outbound)
658 return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh,
659 pptpReq);
660
661 return NF_ACCEPT;
662}
663
664
665/* track caller id inside control connection, call expect_related */
666static int
667conntrack_pptp_help(struct sk_buff **pskb,
668 struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
669
670{
671 struct pptp_pkt_hdr _pptph, *pptph;
672 struct tcphdr _tcph, *tcph;
673 u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
674 u_int32_t datalen;
675 int dir = CTINFO2DIR(ctinfo);
676 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
677 unsigned int nexthdr_off;
678
679 int oldsstate, oldcstate;
680 int ret;
681
682 /* don't do any tracking before tcp handshake complete */
683 if (ctinfo != IP_CT_ESTABLISHED
684 && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
685 DEBUGP("ctinfo = %u, skipping\n", ctinfo);
686 return NF_ACCEPT;
687 }
688
689 nexthdr_off = (*pskb)->nh.iph->ihl*4;
690 tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
691 BUG_ON(!tcph);
692 nexthdr_off += tcph->doff * 4;
693 datalen = tcplen - tcph->doff * 4;
694
695 if (tcph->fin || tcph->rst) {
696 DEBUGP("RST/FIN received, timeouting GRE\n");
697 /* can't do this after real newnat */
698 info->cstate = PPTP_CALL_NONE;
699
700 /* untrack this call id, unexpect GRE packets */
701 pptp_destroy_siblings(ct);
702 }
703
704 pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
705 if (!pptph) {
706 DEBUGP("no full PPTP header, can't track\n");
707 return NF_ACCEPT;
708 }
709 nexthdr_off += sizeof(_pptph);
710 datalen -= sizeof(_pptph);
711
712 /* if it's not a control message we can't do anything with it */
713 if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL ||
714 ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) {
715 DEBUGP("not a control packet\n");
716 return NF_ACCEPT;
717 }
718
719 oldsstate = info->sstate;
720 oldcstate = info->cstate;
721
722 spin_lock_bh(&ip_pptp_lock);
723
724 /* FIXME: We just blindly assume that the control connection is always
725 * established from PNS->PAC. However, RFC makes no guarantee */
726 if (dir == IP_CT_DIR_ORIGINAL)
727 /* client -> server (PNS -> PAC) */
728 ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct,
729 ctinfo);
730 else
731 /* server -> client (PAC -> PNS) */
732 ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct,
733 ctinfo);
734 DEBUGP("sstate: %d->%d, cstate: %d->%d\n",
735 oldsstate, info->sstate, oldcstate, info->cstate);
736 spin_unlock_bh(&ip_pptp_lock);
737
738 return ret;
739}
740
741/* control protocol helper */
742static struct ip_conntrack_helper pptp = {
743 .list = { NULL, NULL },
744 .name = "pptp",
745 .me = THIS_MODULE,
746 .max_expected = 2,
747 .timeout = 5 * 60,
748 .tuple = { .src = { .ip = 0,
749 .u = { .tcp = { .port =
750 __constant_htons(PPTP_CONTROL_PORT) } }
751 },
752 .dst = { .ip = 0,
753 .u = { .all = 0 },
754 .protonum = IPPROTO_TCP
755 }
756 },
757 .mask = { .src = { .ip = 0,
758 .u = { .tcp = { .port = 0xffff } }
759 },
760 .dst = { .ip = 0,
761 .u = { .all = 0 },
762 .protonum = 0xff
763 }
764 },
765 .help = conntrack_pptp_help
766};
767
768extern void __exit ip_ct_proto_gre_fini(void);
769extern int __init ip_ct_proto_gre_init(void);
770
771/* ip_conntrack_pptp initialization */
772static int __init init(void)
773{
774 int retcode;
775
776 retcode = ip_ct_proto_gre_init();
777 if (retcode < 0)
778 return retcode;
779
780 DEBUGP(" registering helper\n");
781 if ((retcode = ip_conntrack_helper_register(&pptp))) {
782 printk(KERN_ERR "Unable to register conntrack application "
783 "helper for pptp: %d\n", retcode);
784 ip_ct_proto_gre_fini();
785 return retcode;
786 }
787
788 printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION);
789 return 0;
790}
791
792static void __exit fini(void)
793{
794 ip_conntrack_helper_unregister(&pptp);
795 ip_ct_proto_gre_fini();
796 printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION);
797}
798
799module_init(init);
800module_exit(fini);
801
802EXPORT_SYMBOL(ip_nat_pptp_hook_outbound);
803EXPORT_SYMBOL(ip_nat_pptp_hook_inbound);
804EXPORT_SYMBOL(ip_nat_pptp_hook_exp_gre);
805EXPORT_SYMBOL(ip_nat_pptp_hook_expectfn);
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 15aef3564742..b08a432efcf8 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -1270,7 +1270,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
1270 if (err < 0) 1270 if (err < 0)
1271 return err; 1271 return err;
1272 1272
1273 exp = ip_conntrack_expect_find_get(&tuple); 1273 exp = ip_conntrack_expect_find(&tuple);
1274 if (!exp) 1274 if (!exp)
1275 return -ENOENT; 1275 return -ENOENT;
1276 1276
@@ -1318,7 +1318,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
1318 return err; 1318 return err;
1319 1319
1320 /* bump usage count to 2 */ 1320 /* bump usage count to 2 */
1321 exp = ip_conntrack_expect_find_get(&tuple); 1321 exp = ip_conntrack_expect_find(&tuple);
1322 if (!exp) 1322 if (!exp)
1323 return -ENOENT; 1323 return -ENOENT;
1324 1324
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
new file mode 100644
index 000000000000..de3cb9db6f85
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -0,0 +1,327 @@
1/*
2 * ip_conntrack_proto_gre.c - Version 3.0
3 *
4 * Connection tracking protocol helper module for GRE.
5 *
6 * GRE is a generic encapsulation protocol, which is generally not very
7 * suited for NAT, as it has no protocol-specific part as port numbers.
8 *
9 * It has an optional key field, which may help us distinguishing two
10 * connections between the same two hosts.
11 *
12 * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
13 *
14 * PPTP is built on top of a modified version of GRE, and has a mandatory
15 * field called "CallID", which serves us for the same purpose as the key
16 * field in plain GRE.
17 *
18 * Documentation about PPTP can be found in RFC 2637
19 *
20 * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
21 *
22 * Development of this code funded by Astaro AG (http://www.astaro.com/)
23 *
24 */
25
26#include <linux/config.h>
27#include <linux/module.h>
28#include <linux/types.h>
29#include <linux/timer.h>
30#include <linux/netfilter.h>
31#include <linux/ip.h>
32#include <linux/in.h>
33#include <linux/list.h>
34
35static DEFINE_RWLOCK(ip_ct_gre_lock);
36#define ASSERT_READ_LOCK(x)
37#define ASSERT_WRITE_LOCK(x)
38
39#include <linux/netfilter_ipv4/listhelp.h>
40#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
41#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
42#include <linux/netfilter_ipv4/ip_conntrack_core.h>
43
44#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
45#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
46
47MODULE_LICENSE("GPL");
48MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
49MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE");
50
51/* shamelessly stolen from ip_conntrack_proto_udp.c */
52#define GRE_TIMEOUT (30*HZ)
53#define GRE_STREAM_TIMEOUT (180*HZ)
54
55#if 0
56#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
57#define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x\n", \
58 NIPQUAD((x)->src.ip), ntohs((x)->src.u.gre.key), \
59 NIPQUAD((x)->dst.ip), ntohs((x)->dst.u.gre.key))
60#else
61#define DEBUGP(x, args...)
62#define DUMP_TUPLE_GRE(x)
63#endif
64
65/* GRE KEYMAP HANDLING FUNCTIONS */
66static LIST_HEAD(gre_keymap_list);
67
68static inline int gre_key_cmpfn(const struct ip_ct_gre_keymap *km,
69 const struct ip_conntrack_tuple *t)
70{
71 return ((km->tuple.src.ip == t->src.ip) &&
72 (km->tuple.dst.ip == t->dst.ip) &&
73 (km->tuple.dst.protonum == t->dst.protonum) &&
74 (km->tuple.dst.u.all == t->dst.u.all));
75}
76
77/* look up the source key for a given tuple */
78static u_int32_t gre_keymap_lookup(struct ip_conntrack_tuple *t)
79{
80 struct ip_ct_gre_keymap *km;
81 u_int32_t key = 0;
82
83 read_lock_bh(&ip_ct_gre_lock);
84 km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
85 struct ip_ct_gre_keymap *, t);
86 if (km)
87 key = km->tuple.src.u.gre.key;
88 read_unlock_bh(&ip_ct_gre_lock);
89
90 DEBUGP("lookup src key 0x%x up key for ", key);
91 DUMP_TUPLE_GRE(t);
92
93 return key;
94}
95
96/* add a single keymap entry, associate with specified master ct */
97int
98ip_ct_gre_keymap_add(struct ip_conntrack *ct,
99 struct ip_conntrack_tuple *t, int reply)
100{
101 struct ip_ct_gre_keymap **exist_km, *km, *old;
102
103 if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
104 DEBUGP("refusing to add GRE keymap to non-pptp session\n");
105 return -1;
106 }
107
108 if (!reply)
109 exist_km = &ct->help.ct_pptp_info.keymap_orig;
110 else
111 exist_km = &ct->help.ct_pptp_info.keymap_reply;
112
113 if (*exist_km) {
114 /* check whether it's a retransmission */
115 old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
116 struct ip_ct_gre_keymap *, t);
117 if (old == *exist_km) {
118 DEBUGP("retransmission\n");
119 return 0;
120 }
121
122 DEBUGP("trying to override keymap_%s for ct %p\n",
123 reply? "reply":"orig", ct);
124 return -EEXIST;
125 }
126
127 km = kmalloc(sizeof(*km), GFP_ATOMIC);
128 if (!km)
129 return -ENOMEM;
130
131 memcpy(&km->tuple, t, sizeof(*t));
132 *exist_km = km;
133
134 DEBUGP("adding new entry %p: ", km);
135 DUMP_TUPLE_GRE(&km->tuple);
136
137 write_lock_bh(&ip_ct_gre_lock);
138 list_append(&gre_keymap_list, km);
139 write_unlock_bh(&ip_ct_gre_lock);
140
141 return 0;
142}
143
144/* destroy the keymap entries associated with specified master ct */
145void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct)
146{
147 DEBUGP("entering for ct %p\n", ct);
148
149 if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
150 DEBUGP("refusing to destroy GRE keymap to non-pptp session\n");
151 return;
152 }
153
154 write_lock_bh(&ip_ct_gre_lock);
155 if (ct->help.ct_pptp_info.keymap_orig) {
156 DEBUGP("removing %p from list\n",
157 ct->help.ct_pptp_info.keymap_orig);
158 list_del(&ct->help.ct_pptp_info.keymap_orig->list);
159 kfree(ct->help.ct_pptp_info.keymap_orig);
160 ct->help.ct_pptp_info.keymap_orig = NULL;
161 }
162 if (ct->help.ct_pptp_info.keymap_reply) {
163 DEBUGP("removing %p from list\n",
164 ct->help.ct_pptp_info.keymap_reply);
165 list_del(&ct->help.ct_pptp_info.keymap_reply->list);
166 kfree(ct->help.ct_pptp_info.keymap_reply);
167 ct->help.ct_pptp_info.keymap_reply = NULL;
168 }
169 write_unlock_bh(&ip_ct_gre_lock);
170}
171
172
173/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
174
175/* invert gre part of tuple */
176static int gre_invert_tuple(struct ip_conntrack_tuple *tuple,
177 const struct ip_conntrack_tuple *orig)
178{
179 tuple->dst.u.gre.key = orig->src.u.gre.key;
180 tuple->src.u.gre.key = orig->dst.u.gre.key;
181
182 return 1;
183}
184
185/* gre hdr info to tuple */
186static int gre_pkt_to_tuple(const struct sk_buff *skb,
187 unsigned int dataoff,
188 struct ip_conntrack_tuple *tuple)
189{
190 struct gre_hdr_pptp _pgrehdr, *pgrehdr;
191 u_int32_t srckey;
192 struct gre_hdr _grehdr, *grehdr;
193
194 /* first only delinearize old RFC1701 GRE header */
195 grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
196 if (!grehdr || grehdr->version != GRE_VERSION_PPTP) {
197 /* try to behave like "ip_conntrack_proto_generic" */
198 tuple->src.u.all = 0;
199 tuple->dst.u.all = 0;
200 return 1;
201 }
202
203 /* PPTP header is variable length, only need up to the call_id field */
204 pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr);
205 if (!pgrehdr)
206 return 1;
207
208 if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) {
209 DEBUGP("GRE_VERSION_PPTP but unknown proto\n");
210 return 0;
211 }
212
213 tuple->dst.u.gre.key = pgrehdr->call_id;
214 srckey = gre_keymap_lookup(tuple);
215 tuple->src.u.gre.key = srckey;
216
217 return 1;
218}
219
220/* print gre part of tuple */
221static int gre_print_tuple(struct seq_file *s,
222 const struct ip_conntrack_tuple *tuple)
223{
224 return seq_printf(s, "srckey=0x%x dstkey=0x%x ",
225 ntohs(tuple->src.u.gre.key),
226 ntohs(tuple->dst.u.gre.key));
227}
228
229/* print private data for conntrack */
230static int gre_print_conntrack(struct seq_file *s,
231 const struct ip_conntrack *ct)
232{
233 return seq_printf(s, "timeout=%u, stream_timeout=%u ",
234 (ct->proto.gre.timeout / HZ),
235 (ct->proto.gre.stream_timeout / HZ));
236}
237
238/* Returns verdict for packet, and may modify conntrack */
239static int gre_packet(struct ip_conntrack *ct,
240 const struct sk_buff *skb,
241 enum ip_conntrack_info conntrackinfo)
242{
243 /* If we've seen traffic both ways, this is a GRE connection.
244 * Extend timeout. */
245 if (ct->status & IPS_SEEN_REPLY) {
246 ip_ct_refresh_acct(ct, conntrackinfo, skb,
247 ct->proto.gre.stream_timeout);
248 /* Also, more likely to be important, and not a probe. */
249 set_bit(IPS_ASSURED_BIT, &ct->status);
250 } else
251 ip_ct_refresh_acct(ct, conntrackinfo, skb,
252 ct->proto.gre.timeout);
253
254 return NF_ACCEPT;
255}
256
257/* Called when a new connection for this protocol found. */
258static int gre_new(struct ip_conntrack *ct,
259 const struct sk_buff *skb)
260{
261 DEBUGP(": ");
262 DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
263
264 /* initialize to sane value. Ideally a conntrack helper
265 * (e.g. in case of pptp) is increasing them */
266 ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT;
267 ct->proto.gre.timeout = GRE_TIMEOUT;
268
269 return 1;
270}
271
272/* Called when a conntrack entry has already been removed from the hashes
273 * and is about to be deleted from memory */
274static void gre_destroy(struct ip_conntrack *ct)
275{
276 struct ip_conntrack *master = ct->master;
277 DEBUGP(" entering\n");
278
279 if (!master)
280 DEBUGP("no master !?!\n");
281 else
282 ip_ct_gre_keymap_destroy(master);
283}
284
285/* protocol helper struct */
286static struct ip_conntrack_protocol gre = {
287 .proto = IPPROTO_GRE,
288 .name = "gre",
289 .pkt_to_tuple = gre_pkt_to_tuple,
290 .invert_tuple = gre_invert_tuple,
291 .print_tuple = gre_print_tuple,
292 .print_conntrack = gre_print_conntrack,
293 .packet = gre_packet,
294 .new = gre_new,
295 .destroy = gre_destroy,
296 .me = THIS_MODULE,
297#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
298 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
299 .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
300 .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
301#endif
302};
303
304/* ip_conntrack_proto_gre initialization */
305int __init ip_ct_proto_gre_init(void)
306{
307 return ip_conntrack_protocol_register(&gre);
308}
309
310void __exit ip_ct_proto_gre_fini(void)
311{
312 struct list_head *pos, *n;
313
314 /* delete all keymap entries */
315 write_lock_bh(&ip_ct_gre_lock);
316 list_for_each_safe(pos, n, &gre_keymap_list) {
317 DEBUGP("deleting keymap %p at module unload time\n", pos);
318 list_del(pos);
319 kfree(pos);
320 }
321 write_unlock_bh(&ip_ct_gre_lock);
322
323 ip_conntrack_protocol_unregister(&gre);
324}
325
326EXPORT_SYMBOL(ip_ct_gre_keymap_add);
327EXPORT_SYMBOL(ip_ct_gre_keymap_destroy);
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index ae3e3e655db5..d3c7808010ec 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -993,11 +993,11 @@ EXPORT_SYMBOL(ip_ct_refresh_acct);
993 993
994EXPORT_SYMBOL(ip_conntrack_expect_alloc); 994EXPORT_SYMBOL(ip_conntrack_expect_alloc);
995EXPORT_SYMBOL(ip_conntrack_expect_put); 995EXPORT_SYMBOL(ip_conntrack_expect_put);
996EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get); 996EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find);
997EXPORT_SYMBOL_GPL(ip_conntrack_expect_find);
997EXPORT_SYMBOL(ip_conntrack_expect_related); 998EXPORT_SYMBOL(ip_conntrack_expect_related);
998EXPORT_SYMBOL(ip_conntrack_unexpect_related); 999EXPORT_SYMBOL(ip_conntrack_unexpect_related);
999EXPORT_SYMBOL_GPL(ip_conntrack_expect_list); 1000EXPORT_SYMBOL_GPL(ip_conntrack_expect_list);
1000EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find);
1001EXPORT_SYMBOL_GPL(ip_ct_unlink_expect); 1001EXPORT_SYMBOL_GPL(ip_ct_unlink_expect);
1002 1002
1003EXPORT_SYMBOL(ip_conntrack_tuple_taken); 1003EXPORT_SYMBOL(ip_conntrack_tuple_taken);
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 1adedb743f60..c3ea891d38e7 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -578,6 +578,8 @@ ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range)
578 578
579 return ret; 579 return ret;
580} 580}
581EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range);
582EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr);
581#endif 583#endif
582 584
583int __init ip_nat_init(void) 585int __init ip_nat_init(void)
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
new file mode 100644
index 000000000000..3cdd0684d30d
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -0,0 +1,401 @@
1/*
2 * ip_nat_pptp.c - Version 3.0
3 *
4 * NAT support for PPTP (Point to Point Tunneling Protocol).
5 * PPTP is a a protocol for creating virtual private networks.
6 * It is a specification defined by Microsoft and some vendors
7 * working with Microsoft. PPTP is built on top of a modified
8 * version of the Internet Generic Routing Encapsulation Protocol.
9 * GRE is defined in RFC 1701 and RFC 1702. Documentation of
10 * PPTP can be found in RFC 2637
11 *
12 * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
13 *
14 * Development of this code funded by Astaro AG (http://www.astaro.com/)
15 *
16 * TODO: - NAT to a unique tuple, not to TCP source port
17 * (needs netfilter tuple reservation)
18 *
19 * Changes:
20 * 2002-02-10 - Version 1.3
21 * - Use ip_nat_mangle_tcp_packet() because of cloned skb's
22 * in local connections (Philip Craig <philipc@snapgear.com>)
23 * - add checks for magicCookie and pptp version
24 * - make argument list of pptp_{out,in}bound_packet() shorter
25 * - move to C99 style initializers
26 * - print version number at module loadtime
27 * 2003-09-22 - Version 1.5
28 * - use SNATed tcp sourceport as callid, since we get called before
29 * TCP header is mangled (Philip Craig <philipc@snapgear.com>)
30 * 2004-10-22 - Version 2.0
31 * - kernel 2.6.x version
32 * 2005-06-10 - Version 3.0
33 * - kernel >= 2.6.11 version,
34 * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
35 *
36 */
37
38#include <linux/config.h>
39#include <linux/module.h>
40#include <linux/ip.h>
41#include <linux/tcp.h>
42#include <net/tcp.h>
43
44#include <linux/netfilter_ipv4/ip_nat.h>
45#include <linux/netfilter_ipv4/ip_nat_rule.h>
46#include <linux/netfilter_ipv4/ip_nat_helper.h>
47#include <linux/netfilter_ipv4/ip_nat_pptp.h>
48#include <linux/netfilter_ipv4/ip_conntrack_core.h>
49#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
50#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
51#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
52
53#define IP_NAT_PPTP_VERSION "3.0"
54
55MODULE_LICENSE("GPL");
56MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
57MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP");
58
59
60#if 0
61extern const char *pptp_msg_name[];
62#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
63 __FUNCTION__, ## args)
64#else
65#define DEBUGP(format, args...)
66#endif
67
68static void pptp_nat_expected(struct ip_conntrack *ct,
69 struct ip_conntrack_expect *exp)
70{
71 struct ip_conntrack *master = ct->master;
72 struct ip_conntrack_expect *other_exp;
73 struct ip_conntrack_tuple t;
74 struct ip_ct_pptp_master *ct_pptp_info;
75 struct ip_nat_pptp *nat_pptp_info;
76
77 ct_pptp_info = &master->help.ct_pptp_info;
78 nat_pptp_info = &master->nat.help.nat_pptp_info;
79
80 /* And here goes the grand finale of corrosion... */
81
82 if (exp->dir == IP_CT_DIR_ORIGINAL) {
83 DEBUGP("we are PNS->PAC\n");
84 /* therefore, build tuple for PAC->PNS */
85 t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
86 t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id);
87 t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
88 t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id);
89 t.dst.protonum = IPPROTO_GRE;
90 } else {
91 DEBUGP("we are PAC->PNS\n");
92 /* build tuple for PNS->PAC */
93 t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
94 t.src.u.gre.key =
95 htons(master->nat.help.nat_pptp_info.pns_call_id);
96 t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
97 t.dst.u.gre.key =
98 htons(master->nat.help.nat_pptp_info.pac_call_id);
99 t.dst.protonum = IPPROTO_GRE;
100 }
101
102 DEBUGP("trying to unexpect other dir: ");
103 DUMP_TUPLE(&t);
104 other_exp = ip_conntrack_expect_find(&t);
105 if (other_exp) {
106 ip_conntrack_unexpect_related(other_exp);
107 ip_conntrack_expect_put(other_exp);
108 DEBUGP("success\n");
109 } else {
110 DEBUGP("not found!\n");
111 }
112
113 ip_nat_follow_master(ct, exp);
114}
115
116/* outbound packets == from PNS to PAC */
117static int
118pptp_outbound_pkt(struct sk_buff **pskb,
119 struct ip_conntrack *ct,
120 enum ip_conntrack_info ctinfo,
121 struct PptpControlHeader *ctlh,
122 union pptp_ctrl_union *pptpReq)
123
124{
125 struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
126 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
127
128 u_int16_t msg, *cid = NULL, new_callid;
129
130 new_callid = htons(ct_pptp_info->pns_call_id);
131
132 switch (msg = ntohs(ctlh->messageType)) {
133 case PPTP_OUT_CALL_REQUEST:
134 cid = &pptpReq->ocreq.callID;
135 /* FIXME: ideally we would want to reserve a call ID
136 * here. current netfilter NAT core is not able to do
137 * this :( For now we use TCP source port. This breaks
138 * multiple calls within one control session */
139
140 /* save original call ID in nat_info */
141 nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
142
143 /* don't use tcph->source since we are at a DSTmanip
144 * hook (e.g. PREROUTING) and pkt is not mangled yet */
145 new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
146
147 /* save new call ID in ct info */
148 ct_pptp_info->pns_call_id = ntohs(new_callid);
149 break;
150 case PPTP_IN_CALL_REPLY:
151 cid = &pptpReq->icreq.callID;
152 break;
153 case PPTP_CALL_CLEAR_REQUEST:
154 cid = &pptpReq->clrreq.callID;
155 break;
156 default:
157 DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
158 (msg <= PPTP_MSG_MAX)?
159 pptp_msg_name[msg]:pptp_msg_name[0]);
160 /* fall through */
161
162 case PPTP_SET_LINK_INFO:
163 /* only need to NAT in case PAC is behind NAT box */
164 case PPTP_START_SESSION_REQUEST:
165 case PPTP_START_SESSION_REPLY:
166 case PPTP_STOP_SESSION_REQUEST:
167 case PPTP_STOP_SESSION_REPLY:
168 case PPTP_ECHO_REQUEST:
169 case PPTP_ECHO_REPLY:
170 /* no need to alter packet */
171 return NF_ACCEPT;
172 }
173
174 /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
175 * down to here */
176
177 IP_NF_ASSERT(cid);
178
179 DEBUGP("altering call id from 0x%04x to 0x%04x\n",
180 ntohs(*cid), ntohs(new_callid));
181
182 /* mangle packet */
183 if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
184 (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)),
185 sizeof(new_callid),
186 (char *)&new_callid,
187 sizeof(new_callid)) == 0)
188 return NF_DROP;
189
190 return NF_ACCEPT;
191}
192
193static int
194pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
195 struct ip_conntrack_expect *expect_reply)
196{
197 struct ip_ct_pptp_master *ct_pptp_info =
198 &expect_orig->master->help.ct_pptp_info;
199 struct ip_nat_pptp *nat_pptp_info =
200 &expect_orig->master->nat.help.nat_pptp_info;
201
202 struct ip_conntrack *ct = expect_orig->master;
203
204 struct ip_conntrack_tuple inv_t;
205 struct ip_conntrack_tuple *orig_t, *reply_t;
206
207 /* save original PAC call ID in nat_info */
208 nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
209
210 /* alter expectation */
211 orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
212 reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
213
214 /* alter expectation for PNS->PAC direction */
215 invert_tuplepr(&inv_t, &expect_orig->tuple);
216 expect_orig->saved_proto.gre.key = htons(nat_pptp_info->pac_call_id);
217 expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
218 expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
219 inv_t.src.ip = reply_t->src.ip;
220 inv_t.dst.ip = reply_t->dst.ip;
221 inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
222 inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
223
224 if (!ip_conntrack_expect_related(expect_orig)) {
225 DEBUGP("successfully registered expect\n");
226 } else {
227 DEBUGP("can't expect_related(expect_orig)\n");
228 return 1;
229 }
230
231 /* alter expectation for PAC->PNS direction */
232 invert_tuplepr(&inv_t, &expect_reply->tuple);
233 expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id);
234 expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
235 expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
236 inv_t.src.ip = orig_t->src.ip;
237 inv_t.dst.ip = orig_t->dst.ip;
238 inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
239 inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
240
241 if (!ip_conntrack_expect_related(expect_reply)) {
242 DEBUGP("successfully registered expect\n");
243 } else {
244 DEBUGP("can't expect_related(expect_reply)\n");
245 ip_conntrack_unexpect_related(expect_orig);
246 return 1;
247 }
248
249 if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) {
250 DEBUGP("can't register original keymap\n");
251 ip_conntrack_unexpect_related(expect_orig);
252 ip_conntrack_unexpect_related(expect_reply);
253 return 1;
254 }
255
256 if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) {
257 DEBUGP("can't register reply keymap\n");
258 ip_conntrack_unexpect_related(expect_orig);
259 ip_conntrack_unexpect_related(expect_reply);
260 ip_ct_gre_keymap_destroy(ct);
261 return 1;
262 }
263
264 return 0;
265}
266
267/* inbound packets == from PAC to PNS */
268static int
269pptp_inbound_pkt(struct sk_buff **pskb,
270 struct ip_conntrack *ct,
271 enum ip_conntrack_info ctinfo,
272 struct PptpControlHeader *ctlh,
273 union pptp_ctrl_union *pptpReq)
274{
275 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
276 u_int16_t msg, new_cid = 0, new_pcid, *pcid = NULL, *cid = NULL;
277
278 int ret = NF_ACCEPT, rv;
279
280 new_pcid = htons(nat_pptp_info->pns_call_id);
281
282 switch (msg = ntohs(ctlh->messageType)) {
283 case PPTP_OUT_CALL_REPLY:
284 pcid = &pptpReq->ocack.peersCallID;
285 cid = &pptpReq->ocack.callID;
286 break;
287 case PPTP_IN_CALL_CONNECT:
288 pcid = &pptpReq->iccon.peersCallID;
289 break;
290 case PPTP_IN_CALL_REQUEST:
291 /* only need to nat in case PAC is behind NAT box */
292 break;
293 case PPTP_WAN_ERROR_NOTIFY:
294 pcid = &pptpReq->wanerr.peersCallID;
295 break;
296 case PPTP_CALL_DISCONNECT_NOTIFY:
297 pcid = &pptpReq->disc.callID;
298 break;
299 case PPTP_SET_LINK_INFO:
300 pcid = &pptpReq->setlink.peersCallID;
301 break;
302
303 default:
304 DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)?
305 pptp_msg_name[msg]:pptp_msg_name[0]);
306 /* fall through */
307
308 case PPTP_START_SESSION_REQUEST:
309 case PPTP_START_SESSION_REPLY:
310 case PPTP_STOP_SESSION_REQUEST:
311 case PPTP_STOP_SESSION_REPLY:
312 case PPTP_ECHO_REQUEST:
313 case PPTP_ECHO_REPLY:
314 /* no need to alter packet */
315 return NF_ACCEPT;
316 }
317
318 /* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST,
319 * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */
320
321 /* mangle packet */
322 IP_NF_ASSERT(pcid);
323 DEBUGP("altering peer call id from 0x%04x to 0x%04x\n",
324 ntohs(*pcid), ntohs(new_pcid));
325
326 rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
327 (void *)pcid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)),
328 sizeof(new_pcid), (char *)&new_pcid,
329 sizeof(new_pcid));
330 if (rv != NF_ACCEPT)
331 return rv;
332
333 if (new_cid) {
334 IP_NF_ASSERT(cid);
335 DEBUGP("altering call id from 0x%04x to 0x%04x\n",
336 ntohs(*cid), ntohs(new_cid));
337 rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
338 (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)),
339 sizeof(new_cid),
340 (char *)&new_cid,
341 sizeof(new_cid));
342 if (rv != NF_ACCEPT)
343 return rv;
344 }
345
346 /* check for earlier return value of 'switch' above */
347 if (ret != NF_ACCEPT)
348 return ret;
349
350 /* great, at least we don't need to resize packets */
351 return NF_ACCEPT;
352}
353
354
355extern int __init ip_nat_proto_gre_init(void);
356extern void __exit ip_nat_proto_gre_fini(void);
357
358static int __init init(void)
359{
360 int ret;
361
362 DEBUGP("%s: registering NAT helper\n", __FILE__);
363
364 ret = ip_nat_proto_gre_init();
365 if (ret < 0)
366 return ret;
367
368 BUG_ON(ip_nat_pptp_hook_outbound);
369 ip_nat_pptp_hook_outbound = &pptp_outbound_pkt;
370
371 BUG_ON(ip_nat_pptp_hook_inbound);
372 ip_nat_pptp_hook_inbound = &pptp_inbound_pkt;
373
374 BUG_ON(ip_nat_pptp_hook_exp_gre);
375 ip_nat_pptp_hook_exp_gre = &pptp_exp_gre;
376
377 BUG_ON(ip_nat_pptp_hook_expectfn);
378 ip_nat_pptp_hook_expectfn = &pptp_nat_expected;
379
380 printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION);
381 return 0;
382}
383
384static void __exit fini(void)
385{
386 DEBUGP("cleanup_module\n" );
387
388 ip_nat_pptp_hook_expectfn = NULL;
389 ip_nat_pptp_hook_exp_gre = NULL;
390 ip_nat_pptp_hook_inbound = NULL;
391 ip_nat_pptp_hook_outbound = NULL;
392
393 ip_nat_proto_gre_fini();
394 /* Make sure noone calls it, meanwhile */
395 synchronize_net();
396
397 printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION);
398}
399
400module_init(init);
401module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
new file mode 100644
index 000000000000..7c1285401672
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -0,0 +1,214 @@
1/*
2 * ip_nat_proto_gre.c - Version 2.0
3 *
4 * NAT protocol helper module for GRE.
5 *
6 * GRE is a generic encapsulation protocol, which is generally not very
7 * suited for NAT, as it has no protocol-specific part as port numbers.
8 *
9 * It has an optional key field, which may help us distinguishing two
10 * connections between the same two hosts.
11 *
12 * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
13 *
14 * PPTP is built on top of a modified version of GRE, and has a mandatory
15 * field called "CallID", which serves us for the same purpose as the key
16 * field in plain GRE.
17 *
18 * Documentation about PPTP can be found in RFC 2637
19 *
20 * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
21 *
22 * Development of this code funded by Astaro AG (http://www.astaro.com/)
23 *
24 */
25
26#include <linux/config.h>
27#include <linux/module.h>
28#include <linux/ip.h>
29#include <linux/netfilter_ipv4/ip_nat.h>
30#include <linux/netfilter_ipv4/ip_nat_rule.h>
31#include <linux/netfilter_ipv4/ip_nat_protocol.h>
32#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
33
34MODULE_LICENSE("GPL");
35MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
36MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
37
38#if 0
39#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
40 __FUNCTION__, ## args)
41#else
42#define DEBUGP(x, args...)
43#endif
44
45/* is key in given range between min and max */
46static int
47gre_in_range(const struct ip_conntrack_tuple *tuple,
48 enum ip_nat_manip_type maniptype,
49 const union ip_conntrack_manip_proto *min,
50 const union ip_conntrack_manip_proto *max)
51{
52 u_int32_t key;
53
54 if (maniptype == IP_NAT_MANIP_SRC)
55 key = tuple->src.u.gre.key;
56 else
57 key = tuple->dst.u.gre.key;
58
59 return ntohl(key) >= ntohl(min->gre.key)
60 && ntohl(key) <= ntohl(max->gre.key);
61}
62
63/* generate unique tuple ... */
64static int
65gre_unique_tuple(struct ip_conntrack_tuple *tuple,
66 const struct ip_nat_range *range,
67 enum ip_nat_manip_type maniptype,
68 const struct ip_conntrack *conntrack)
69{
70 static u_int16_t key;
71 u_int16_t *keyptr;
72 unsigned int min, i, range_size;
73
74 if (maniptype == IP_NAT_MANIP_SRC)
75 keyptr = &tuple->src.u.gre.key;
76 else
77 keyptr = &tuple->dst.u.gre.key;
78
79 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
80 DEBUGP("%p: NATing GRE PPTP\n", conntrack);
81 min = 1;
82 range_size = 0xffff;
83 } else {
84 min = ntohl(range->min.gre.key);
85 range_size = ntohl(range->max.gre.key) - min + 1;
86 }
87
88 DEBUGP("min = %u, range_size = %u\n", min, range_size);
89
90 for (i = 0; i < range_size; i++, key++) {
91 *keyptr = htonl(min + key % range_size);
92 if (!ip_nat_used_tuple(tuple, conntrack))
93 return 1;
94 }
95
96 DEBUGP("%p: no NAT mapping\n", conntrack);
97
98 return 0;
99}
100
101/* manipulate a GRE packet according to maniptype */
102static int
103gre_manip_pkt(struct sk_buff **pskb,
104 unsigned int iphdroff,
105 const struct ip_conntrack_tuple *tuple,
106 enum ip_nat_manip_type maniptype)
107{
108 struct gre_hdr *greh;
109 struct gre_hdr_pptp *pgreh;
110 struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
111 unsigned int hdroff = iphdroff + iph->ihl*4;
112
113 /* pgreh includes two optional 32bit fields which are not required
114 * to be there. That's where the magic '8' comes from */
115 if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh)-8))
116 return 0;
117
118 greh = (void *)(*pskb)->data + hdroff;
119 pgreh = (struct gre_hdr_pptp *) greh;
120
121 /* we only have destination manip of a packet, since 'source key'
122 * is not present in the packet itself */
123 if (maniptype == IP_NAT_MANIP_DST) {
124 /* key manipulation is always dest */
125 switch (greh->version) {
126 case 0:
127 if (!greh->key) {
128 DEBUGP("can't nat GRE w/o key\n");
129 break;
130 }
131 if (greh->csum) {
132 /* FIXME: Never tested this code... */
133 *(gre_csum(greh)) =
134 ip_nat_cheat_check(~*(gre_key(greh)),
135 tuple->dst.u.gre.key,
136 *(gre_csum(greh)));
137 }
138 *(gre_key(greh)) = tuple->dst.u.gre.key;
139 break;
140 case GRE_VERSION_PPTP:
141 DEBUGP("call_id -> 0x%04x\n",
142 ntohl(tuple->dst.u.gre.key));
143 pgreh->call_id = htons(ntohl(tuple->dst.u.gre.key));
144 break;
145 default:
146 DEBUGP("can't nat unknown GRE version\n");
147 return 0;
148 break;
149 }
150 }
151 return 1;
152}
153
154/* print out a nat tuple */
155static unsigned int
156gre_print(char *buffer,
157 const struct ip_conntrack_tuple *match,
158 const struct ip_conntrack_tuple *mask)
159{
160 unsigned int len = 0;
161
162 if (mask->src.u.gre.key)
163 len += sprintf(buffer + len, "srckey=0x%x ",
164 ntohl(match->src.u.gre.key));
165
166 if (mask->dst.u.gre.key)
167 len += sprintf(buffer + len, "dstkey=0x%x ",
168 ntohl(match->src.u.gre.key));
169
170 return len;
171}
172
173/* print a range of keys */
174static unsigned int
175gre_print_range(char *buffer, const struct ip_nat_range *range)
176{
177 if (range->min.gre.key != 0
178 || range->max.gre.key != 0xFFFF) {
179 if (range->min.gre.key == range->max.gre.key)
180 return sprintf(buffer, "key 0x%x ",
181 ntohl(range->min.gre.key));
182 else
183 return sprintf(buffer, "keys 0x%u-0x%u ",
184 ntohl(range->min.gre.key),
185 ntohl(range->max.gre.key));
186 } else
187 return 0;
188}
189
190/* nat helper struct */
191static struct ip_nat_protocol gre = {
192 .name = "GRE",
193 .protonum = IPPROTO_GRE,
194 .manip_pkt = gre_manip_pkt,
195 .in_range = gre_in_range,
196 .unique_tuple = gre_unique_tuple,
197 .print = gre_print,
198 .print_range = gre_print_range,
199#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
200 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
201 .range_to_nfattr = ip_nat_port_range_to_nfattr,
202 .nfattr_to_range = ip_nat_port_nfattr_to_range,
203#endif
204};
205
206int __init ip_nat_proto_gre_init(void)
207{
208 return ip_nat_protocol_register(&gre);
209}
210
211void __exit ip_nat_proto_gre_fini(void)
212{
213 ip_nat_protocol_unregister(&gre);
214}
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 7d38913754b1..9bcb398fbc1f 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -13,6 +13,7 @@
13#include <linux/config.h> 13#include <linux/config.h>
14#include <linux/proc_fs.h> 14#include <linux/proc_fs.h>
15#include <linux/jhash.h> 15#include <linux/jhash.h>
16#include <linux/bitops.h>
16#include <linux/skbuff.h> 17#include <linux/skbuff.h>
17#include <linux/ip.h> 18#include <linux/ip.h>
18#include <linux/tcp.h> 19#include <linux/tcp.h>
@@ -30,7 +31,7 @@
30#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> 31#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
31#include <linux/netfilter_ipv4/ip_conntrack.h> 32#include <linux/netfilter_ipv4/ip_conntrack.h>
32 33
33#define CLUSTERIP_VERSION "0.7" 34#define CLUSTERIP_VERSION "0.8"
34 35
35#define DEBUG_CLUSTERIP 36#define DEBUG_CLUSTERIP
36 37
@@ -49,13 +50,14 @@ MODULE_DESCRIPTION("iptables target for CLUSTERIP");
49struct clusterip_config { 50struct clusterip_config {
50 struct list_head list; /* list of all configs */ 51 struct list_head list; /* list of all configs */
51 atomic_t refcount; /* reference count */ 52 atomic_t refcount; /* reference count */
53 atomic_t entries; /* number of entries/rules
54 * referencing us */
52 55
53 u_int32_t clusterip; /* the IP address */ 56 u_int32_t clusterip; /* the IP address */
54 u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ 57 u_int8_t clustermac[ETH_ALEN]; /* the MAC address */
55 struct net_device *dev; /* device */ 58 struct net_device *dev; /* device */
56 u_int16_t num_total_nodes; /* total number of nodes */ 59 u_int16_t num_total_nodes; /* total number of nodes */
57 u_int16_t num_local_nodes; /* number of local nodes */ 60 unsigned long local_nodes; /* node number array */
58 u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; /* node number array */
59 61
60#ifdef CONFIG_PROC_FS 62#ifdef CONFIG_PROC_FS
61 struct proc_dir_entry *pde; /* proc dir entry */ 63 struct proc_dir_entry *pde; /* proc dir entry */
@@ -66,8 +68,7 @@ struct clusterip_config {
66 68
67static LIST_HEAD(clusterip_configs); 69static LIST_HEAD(clusterip_configs);
68 70
69/* clusterip_lock protects the clusterip_configs list _AND_ the configurable 71/* clusterip_lock protects the clusterip_configs list */
70 * data within all structurses (num_local_nodes, local_nodes[]) */
71static DEFINE_RWLOCK(clusterip_lock); 72static DEFINE_RWLOCK(clusterip_lock);
72 73
73#ifdef CONFIG_PROC_FS 74#ifdef CONFIG_PROC_FS
@@ -76,23 +77,48 @@ static struct proc_dir_entry *clusterip_procdir;
76#endif 77#endif
77 78
78static inline void 79static inline void
79clusterip_config_get(struct clusterip_config *c) { 80clusterip_config_get(struct clusterip_config *c)
81{
80 atomic_inc(&c->refcount); 82 atomic_inc(&c->refcount);
81} 83}
82 84
83static inline void 85static inline void
84clusterip_config_put(struct clusterip_config *c) { 86clusterip_config_put(struct clusterip_config *c)
85 if (atomic_dec_and_test(&c->refcount)) { 87{
88 if (atomic_dec_and_test(&c->refcount))
89 kfree(c);
90}
91
92/* increase the count of entries(rules) using/referencing this config */
93static inline void
94clusterip_config_entry_get(struct clusterip_config *c)
95{
96 atomic_inc(&c->entries);
97}
98
99/* decrease the count of entries using/referencing this config. If last
100 * entry(rule) is removed, remove the config from lists, but don't free it
101 * yet, since proc-files could still be holding references */
102static inline void
103clusterip_config_entry_put(struct clusterip_config *c)
104{
105 if (atomic_dec_and_test(&c->entries)) {
86 write_lock_bh(&clusterip_lock); 106 write_lock_bh(&clusterip_lock);
87 list_del(&c->list); 107 list_del(&c->list);
88 write_unlock_bh(&clusterip_lock); 108 write_unlock_bh(&clusterip_lock);
109
89 dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); 110 dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0);
90 dev_put(c->dev); 111 dev_put(c->dev);
91 kfree(c); 112
113 /* In case anyone still accesses the file, the open/close
114 * functions are also incrementing the refcount on their own,
115 * so it's safe to remove the entry even if it's in use. */
116#ifdef CONFIG_PROC_FS
117 remove_proc_entry(c->pde->name, c->pde->parent);
118#endif
92 } 119 }
93} 120}
94 121
95
96static struct clusterip_config * 122static struct clusterip_config *
97__clusterip_config_find(u_int32_t clusterip) 123__clusterip_config_find(u_int32_t clusterip)
98{ 124{
@@ -111,7 +137,7 @@ __clusterip_config_find(u_int32_t clusterip)
111} 137}
112 138
113static inline struct clusterip_config * 139static inline struct clusterip_config *
114clusterip_config_find_get(u_int32_t clusterip) 140clusterip_config_find_get(u_int32_t clusterip, int entry)
115{ 141{
116 struct clusterip_config *c; 142 struct clusterip_config *c;
117 143
@@ -122,11 +148,24 @@ clusterip_config_find_get(u_int32_t clusterip)
122 return NULL; 148 return NULL;
123 } 149 }
124 atomic_inc(&c->refcount); 150 atomic_inc(&c->refcount);
151 if (entry)
152 atomic_inc(&c->entries);
125 read_unlock_bh(&clusterip_lock); 153 read_unlock_bh(&clusterip_lock);
126 154
127 return c; 155 return c;
128} 156}
129 157
158static void
159clusterip_config_init_nodelist(struct clusterip_config *c,
160 const struct ipt_clusterip_tgt_info *i)
161{
162 int n;
163
164 for (n = 0; n < i->num_local_nodes; n++) {
165 set_bit(i->local_nodes[n] - 1, &c->local_nodes);
166 }
167}
168
130static struct clusterip_config * 169static struct clusterip_config *
131clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, 170clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
132 struct net_device *dev) 171 struct net_device *dev)
@@ -143,11 +182,11 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
143 c->clusterip = ip; 182 c->clusterip = ip;
144 memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); 183 memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
145 c->num_total_nodes = i->num_total_nodes; 184 c->num_total_nodes = i->num_total_nodes;
146 c->num_local_nodes = i->num_local_nodes; 185 clusterip_config_init_nodelist(c, i);
147 memcpy(&c->local_nodes, &i->local_nodes, sizeof(c->local_nodes));
148 c->hash_mode = i->hash_mode; 186 c->hash_mode = i->hash_mode;
149 c->hash_initval = i->hash_initval; 187 c->hash_initval = i->hash_initval;
150 atomic_set(&c->refcount, 1); 188 atomic_set(&c->refcount, 1);
189 atomic_set(&c->entries, 1);
151 190
152#ifdef CONFIG_PROC_FS 191#ifdef CONFIG_PROC_FS
153 /* create proc dir entry */ 192 /* create proc dir entry */
@@ -171,53 +210,28 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
171static int 210static int
172clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) 211clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
173{ 212{
174 int i;
175
176 write_lock_bh(&clusterip_lock);
177 213
178 if (c->num_local_nodes >= CLUSTERIP_MAX_NODES 214 if (nodenum == 0 ||
179 || nodenum > CLUSTERIP_MAX_NODES) { 215 nodenum > c->num_total_nodes)
180 write_unlock_bh(&clusterip_lock);
181 return 1; 216 return 1;
182 }
183
184 /* check if we alrady have this number in our array */
185 for (i = 0; i < c->num_local_nodes; i++) {
186 if (c->local_nodes[i] == nodenum) {
187 write_unlock_bh(&clusterip_lock);
188 return 1;
189 }
190 }
191 217
192 c->local_nodes[c->num_local_nodes++] = nodenum; 218 /* check if we already have this number in our bitfield */
219 if (test_and_set_bit(nodenum - 1, &c->local_nodes))
220 return 1;
193 221
194 write_unlock_bh(&clusterip_lock);
195 return 0; 222 return 0;
196} 223}
197 224
198static int 225static int
199clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) 226clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
200{ 227{
201 int i; 228 if (nodenum == 0 ||
202 229 nodenum > c->num_total_nodes)
203 write_lock_bh(&clusterip_lock);
204
205 if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) {
206 write_unlock_bh(&clusterip_lock);
207 return 1; 230 return 1;
208 }
209 231
210 for (i = 0; i < c->num_local_nodes; i++) { 232 if (test_and_clear_bit(nodenum - 1, &c->local_nodes))
211 if (c->local_nodes[i] == nodenum) { 233 return 0;
212 int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1));
213 memmove(&c->local_nodes[i], &c->local_nodes[i+1], size);
214 c->num_local_nodes--;
215 write_unlock_bh(&clusterip_lock);
216 return 0;
217 }
218 }
219 234
220 write_unlock_bh(&clusterip_lock);
221 return 1; 235 return 1;
222} 236}
223 237
@@ -285,25 +299,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config)
285static inline int 299static inline int
286clusterip_responsible(struct clusterip_config *config, u_int32_t hash) 300clusterip_responsible(struct clusterip_config *config, u_int32_t hash)
287{ 301{
288 int i; 302 return test_bit(hash - 1, &config->local_nodes);
289
290 read_lock_bh(&clusterip_lock);
291
292 if (config->num_local_nodes == 0) {
293 read_unlock_bh(&clusterip_lock);
294 return 0;
295 }
296
297 for (i = 0; i < config->num_local_nodes; i++) {
298 if (config->local_nodes[i] == hash) {
299 read_unlock_bh(&clusterip_lock);
300 return 1;
301 }
302 }
303
304 read_unlock_bh(&clusterip_lock);
305
306 return 0;
307} 303}
308 304
309/*********************************************************************** 305/***********************************************************************
@@ -415,8 +411,26 @@ checkentry(const char *tablename,
415 411
416 /* FIXME: further sanity checks */ 412 /* FIXME: further sanity checks */
417 413
418 config = clusterip_config_find_get(e->ip.dst.s_addr); 414 config = clusterip_config_find_get(e->ip.dst.s_addr, 1);
419 if (!config) { 415 if (config) {
416 if (cipinfo->config != NULL) {
417 /* Case A: This is an entry that gets reloaded, since
418 * it still has a cipinfo->config pointer. Simply
419 * increase the entry refcount and return */
420 if (cipinfo->config != config) {
421 printk(KERN_ERR "CLUSTERIP: Reloaded entry "
422 "has invalid config pointer!\n");
423 return 0;
424 }
425 clusterip_config_entry_get(cipinfo->config);
426 } else {
427 /* Case B: This is a new rule referring to an existing
428 * clusterip config. */
429 cipinfo->config = config;
430 clusterip_config_entry_get(cipinfo->config);
431 }
432 } else {
433 /* Case C: This is a completely new clusterip config */
420 if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { 434 if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
421 printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); 435 printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr));
422 return 0; 436 return 0;
@@ -443,10 +457,9 @@ checkentry(const char *tablename,
443 } 457 }
444 dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); 458 dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
445 } 459 }
460 cipinfo->config = config;
446 } 461 }
447 462
448 cipinfo->config = config;
449
450 return 1; 463 return 1;
451} 464}
452 465
@@ -455,13 +468,10 @@ static void destroy(void *matchinfo, unsigned int matchinfosize)
455{ 468{
456 struct ipt_clusterip_tgt_info *cipinfo = matchinfo; 469 struct ipt_clusterip_tgt_info *cipinfo = matchinfo;
457 470
458 /* we first remove the proc entry and then drop the reference 471 /* if no more entries are referencing the config, remove it
459 * count. In case anyone still accesses the file, the open/close 472 * from the list and destroy the proc entry */
460 * functions are also incrementing the refcount on their own */ 473 clusterip_config_entry_put(cipinfo->config);
461#ifdef CONFIG_PROC_FS 474
462 remove_proc_entry(cipinfo->config->pde->name,
463 cipinfo->config->pde->parent);
464#endif
465 clusterip_config_put(cipinfo->config); 475 clusterip_config_put(cipinfo->config);
466} 476}
467 477
@@ -533,7 +543,7 @@ arp_mangle(unsigned int hook,
533 543
534 /* if there is no clusterip configuration for the arp reply's 544 /* if there is no clusterip configuration for the arp reply's
535 * source ip, we don't want to mangle it */ 545 * source ip, we don't want to mangle it */
536 c = clusterip_config_find_get(payload->src_ip); 546 c = clusterip_config_find_get(payload->src_ip, 0);
537 if (!c) 547 if (!c)
538 return NF_ACCEPT; 548 return NF_ACCEPT;
539 549
@@ -574,56 +584,69 @@ static struct nf_hook_ops cip_arp_ops = {
574 584
575#ifdef CONFIG_PROC_FS 585#ifdef CONFIG_PROC_FS
576 586
587struct clusterip_seq_position {
588 unsigned int pos; /* position */
589 unsigned int weight; /* number of bits set == size */
590 unsigned int bit; /* current bit */
591 unsigned long val; /* current value */
592};
593
577static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) 594static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
578{ 595{
579 struct proc_dir_entry *pde = s->private; 596 struct proc_dir_entry *pde = s->private;
580 struct clusterip_config *c = pde->data; 597 struct clusterip_config *c = pde->data;
581 unsigned int *nodeidx; 598 unsigned int weight;
582 599 u_int32_t local_nodes;
583 read_lock_bh(&clusterip_lock); 600 struct clusterip_seq_position *idx;
584 if (*pos >= c->num_local_nodes) 601
602 /* FIXME: possible race */
603 local_nodes = c->local_nodes;
604 weight = hweight32(local_nodes);
605 if (*pos >= weight)
585 return NULL; 606 return NULL;
586 607
587 nodeidx = kmalloc(sizeof(unsigned int), GFP_KERNEL); 608 idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL);
588 if (!nodeidx) 609 if (!idx)
589 return ERR_PTR(-ENOMEM); 610 return ERR_PTR(-ENOMEM);
590 611
591 *nodeidx = *pos; 612 idx->pos = *pos;
592 return nodeidx; 613 idx->weight = weight;
614 idx->bit = ffs(local_nodes);
615 idx->val = local_nodes;
616 clear_bit(idx->bit - 1, &idx->val);
617
618 return idx;
593} 619}
594 620
595static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) 621static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
596{ 622{
597 struct proc_dir_entry *pde = s->private; 623 struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v;
598 struct clusterip_config *c = pde->data;
599 unsigned int *nodeidx = (unsigned int *)v;
600 624
601 *pos = ++(*nodeidx); 625 *pos = ++idx->pos;
602 if (*pos >= c->num_local_nodes) { 626 if (*pos >= idx->weight) {
603 kfree(v); 627 kfree(v);
604 return NULL; 628 return NULL;
605 } 629 }
606 return nodeidx; 630 idx->bit = ffs(idx->val);
631 clear_bit(idx->bit - 1, &idx->val);
632 return idx;
607} 633}
608 634
609static void clusterip_seq_stop(struct seq_file *s, void *v) 635static void clusterip_seq_stop(struct seq_file *s, void *v)
610{ 636{
611 kfree(v); 637 kfree(v);
612
613 read_unlock_bh(&clusterip_lock);
614} 638}
615 639
616static int clusterip_seq_show(struct seq_file *s, void *v) 640static int clusterip_seq_show(struct seq_file *s, void *v)
617{ 641{
618 struct proc_dir_entry *pde = s->private; 642 struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v;
619 struct clusterip_config *c = pde->data;
620 unsigned int *nodeidx = (unsigned int *)v;
621 643
622 if (*nodeidx != 0) 644 if (idx->pos != 0)
623 seq_putc(s, ','); 645 seq_putc(s, ',');
624 seq_printf(s, "%u", c->local_nodes[*nodeidx]);
625 646
626 if (*nodeidx == c->num_local_nodes-1) 647 seq_printf(s, "%u", idx->bit);
648
649 if (idx->pos == idx->weight - 1)
627 seq_putc(s, '\n'); 650 seq_putc(s, '\n');
628 651
629 return 0; 652 return 0;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 304bb0a1d4f0..4b0d7e4d6269 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -361,7 +361,7 @@ static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
361 361
362 if (type && code) { 362 if (type && code) {
363 get_user(fl->fl_icmp_type, type); 363 get_user(fl->fl_icmp_type, type);
364 __get_user(fl->fl_icmp_code, code); 364 get_user(fl->fl_icmp_code, code);
365 probed = 1; 365 probed = 1;
366 } 366 }
367 break; 367 break;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 29222b964951..a7537c7bbd06 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -979,14 +979,19 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
979 if (!before(TCP_SKB_CB(skb)->seq, end_seq)) 979 if (!before(TCP_SKB_CB(skb)->seq, end_seq))
980 break; 980 break;
981 981
982 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
983 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
984
982 pcount = tcp_skb_pcount(skb); 985 pcount = tcp_skb_pcount(skb);
983 986
984 if (pcount > 1 && 987 if (pcount > 1 && !in_sack &&
985 (after(start_seq, TCP_SKB_CB(skb)->seq) || 988 after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
986 before(end_seq, TCP_SKB_CB(skb)->end_seq))) {
987 unsigned int pkt_len; 989 unsigned int pkt_len;
988 990
989 if (after(start_seq, TCP_SKB_CB(skb)->seq)) 991 in_sack = !after(start_seq,
992 TCP_SKB_CB(skb)->seq);
993
994 if (!in_sack)
990 pkt_len = (start_seq - 995 pkt_len = (start_seq -
991 TCP_SKB_CB(skb)->seq); 996 TCP_SKB_CB(skb)->seq);
992 else 997 else
@@ -999,9 +1004,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
999 1004
1000 fack_count += pcount; 1005 fack_count += pcount;
1001 1006
1002 in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
1003 !before(end_seq, TCP_SKB_CB(skb)->end_seq);
1004
1005 sacked = TCP_SKB_CB(skb)->sacked; 1007 sacked = TCP_SKB_CB(skb)->sacked;
1006 1008
1007 /* Account D-SACK for retransmitted packet. */ 1009 /* Account D-SACK for retransmitted packet. */
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a88db28b0af7..b1a63b2c6b4a 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -384,7 +384,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
384 newtp->frto_counter = 0; 384 newtp->frto_counter = 0;
385 newtp->frto_highmark = 0; 385 newtp->frto_highmark = 0;
386 386
387 newicsk->icsk_ca_ops = &tcp_reno; 387 newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
388 388
389 tcp_set_ca_state(newsk, TCP_CA_Open); 389 tcp_set_ca_state(newsk, TCP_CA_Open);
390 tcp_init_xmit_timers(newsk); 390 tcp_init_xmit_timers(newsk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c10e4435e3b1..5dd6dd7d091e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -435,6 +435,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
435 int nsize, old_factor; 435 int nsize, old_factor;
436 u16 flags; 436 u16 flags;
437 437
438 BUG_ON(len >= skb->len);
439
438 nsize = skb_headlen(skb) - len; 440 nsize = skb_headlen(skb) - len;
439 if (nsize < 0) 441 if (nsize < 0)
440 nsize = 0; 442 nsize = 0;
@@ -459,9 +461,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
459 flags = TCP_SKB_CB(skb)->flags; 461 flags = TCP_SKB_CB(skb)->flags;
460 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); 462 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
461 TCP_SKB_CB(buff)->flags = flags; 463 TCP_SKB_CB(buff)->flags = flags;
462 TCP_SKB_CB(buff)->sacked = 464 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
463 (TCP_SKB_CB(skb)->sacked &
464 (TCPCB_LOST | TCPCB_EVER_RETRANS | TCPCB_AT_TAIL));
465 TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL; 465 TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
466 466
467 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) { 467 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) {
@@ -499,6 +499,12 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
499 tcp_skb_pcount(buff); 499 tcp_skb_pcount(buff);
500 500
501 tp->packets_out -= diff; 501 tp->packets_out -= diff;
502
503 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
504 tp->sacked_out -= diff;
505 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
506 tp->retrans_out -= diff;
507
502 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { 508 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
503 tp->lost_out -= diff; 509 tp->lost_out -= diff;
504 tp->left_out -= diff; 510 tp->left_out -= diff;