aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/core/rtnetlink.c83
-rw-r--r--net/core/skbuff.c15
-rw-r--r--net/ipv4/inet_diag.c9
-rw-r--r--net/ipv4/netfilter/Kconfig41
-rw-r--r--net/ipv4/netfilter/Makefile6
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c2
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c12
-rw-r--r--net/ipv4/netfilter/ipt_CONNMARK.c22
-rw-r--r--net/ipv4/netfilter/ipt_NOTRACK.c4
-rw-r--r--net/ipv4/netfilter/ipt_connbytes.c39
-rw-r--r--net/ipv4/netfilter/ipt_connmark.c10
-rw-r--r--net/ipv4/netfilter/ipt_conntrack.c96
-rw-r--r--net/ipv4/netfilter/ipt_helper.c54
-rw-r--r--net/ipv4/netfilter/ipt_state.c6
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c571
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c301
-rw-r--r--net/ipv6/ip6_input.c5
-rw-r--r--net/ipv6/ip6_output.c6
-rw-r--r--net/ipv6/netfilter/Kconfig14
-rw-r--r--net/ipv6/netfilter/Makefile6
-rw-r--r--net/ipv6/netfilter/ip6t_MARK.c6
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c556
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c272
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c885
-rw-r--r--net/ipv6/raw.c4
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/netfilter/Kconfig74
-rw-r--r--net/netfilter/Makefile8
-rw-r--r--net/netfilter/nf_conntrack_core.c1538
-rw-r--r--net/netfilter/nf_conntrack_ftp.c698
-rw-r--r--net/netfilter/nf_conntrack_l3proto_generic.c98
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c85
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c670
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c1162
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c216
-rw-r--r--net/netfilter/nf_conntrack_standalone.c869
-rw-r--r--net/netlink/Makefile2
-rw-r--r--net/netlink/af_netlink.c97
-rw-r--r--net/netlink/attr.c328
-rw-r--r--net/netlink/genetlink.c579
-rw-r--r--net/xfrm/xfrm_user.c69
41 files changed, 9306 insertions, 214 deletions
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9bed7569ce3f..8700379685e0 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -49,6 +49,7 @@
49#include <net/udp.h> 49#include <net/udp.h>
50#include <net/sock.h> 50#include <net/sock.h>
51#include <net/pkt_sched.h> 51#include <net/pkt_sched.h>
52#include <net/netlink.h>
52 53
53DECLARE_MUTEX(rtnl_sem); 54DECLARE_MUTEX(rtnl_sem);
54 55
@@ -462,11 +463,6 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
462 netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL); 463 netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL);
463} 464}
464 465
465static int rtnetlink_done(struct netlink_callback *cb)
466{
467 return 0;
468}
469
470/* Protected by RTNL sempahore. */ 466/* Protected by RTNL sempahore. */
471static struct rtattr **rta_buf; 467static struct rtattr **rta_buf;
472static int rtattr_max; 468static int rtattr_max;
@@ -524,8 +520,6 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
524 } 520 }
525 521
526 if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { 522 if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
527 u32 rlen;
528
529 if (link->dumpit == NULL) 523 if (link->dumpit == NULL)
530 link = &(rtnetlink_links[PF_UNSPEC][type]); 524 link = &(rtnetlink_links[PF_UNSPEC][type]);
531 525
@@ -533,14 +527,11 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
533 goto err_inval; 527 goto err_inval;
534 528
535 if ((*errp = netlink_dump_start(rtnl, skb, nlh, 529 if ((*errp = netlink_dump_start(rtnl, skb, nlh,
536 link->dumpit, 530 link->dumpit, NULL)) != 0) {
537 rtnetlink_done)) != 0) {
538 return -1; 531 return -1;
539 } 532 }
540 rlen = NLMSG_ALIGN(nlh->nlmsg_len); 533
541 if (rlen > skb->len) 534 netlink_queue_skip(nlh, skb);
542 rlen = skb->len;
543 skb_pull(skb, rlen);
544 return -1; 535 return -1;
545 } 536 }
546 537
@@ -579,75 +570,13 @@ err_inval:
579 return -1; 570 return -1;
580} 571}
581 572
582/*
583 * Process one packet of messages.
584 * Malformed skbs with wrong lengths of messages are discarded silently.
585 */
586
587static inline int rtnetlink_rcv_skb(struct sk_buff *skb)
588{
589 int err;
590 struct nlmsghdr * nlh;
591
592 while (skb->len >= NLMSG_SPACE(0)) {
593 u32 rlen;
594
595 nlh = (struct nlmsghdr *)skb->data;
596 if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
597 return 0;
598 rlen = NLMSG_ALIGN(nlh->nlmsg_len);
599 if (rlen > skb->len)
600 rlen = skb->len;
601 if (rtnetlink_rcv_msg(skb, nlh, &err)) {
602 /* Not error, but we must interrupt processing here:
603 * Note, that in this case we do not pull message
604 * from skb, it will be processed later.
605 */
606 if (err == 0)
607 return -1;
608 netlink_ack(skb, nlh, err);
609 } else if (nlh->nlmsg_flags&NLM_F_ACK)
610 netlink_ack(skb, nlh, 0);
611 skb_pull(skb, rlen);
612 }
613
614 return 0;
615}
616
617/*
618 * rtnetlink input queue processing routine:
619 * - process as much as there was in the queue upon entry.
620 * - feed skbs to rtnetlink_rcv_skb, until it refuse a message,
621 * that will occur, when a dump started.
622 */
623
624static void rtnetlink_rcv(struct sock *sk, int len) 573static void rtnetlink_rcv(struct sock *sk, int len)
625{ 574{
626 unsigned int qlen = skb_queue_len(&sk->sk_receive_queue); 575 unsigned int qlen = 0;
627 576
628 do { 577 do {
629 struct sk_buff *skb;
630
631 rtnl_lock(); 578 rtnl_lock();
632 579 netlink_run_queue(sk, &qlen, &rtnetlink_rcv_msg);
633 if (qlen > skb_queue_len(&sk->sk_receive_queue))
634 qlen = skb_queue_len(&sk->sk_receive_queue);
635
636 for (; qlen; qlen--) {
637 skb = skb_dequeue(&sk->sk_receive_queue);
638 if (rtnetlink_rcv_skb(skb)) {
639 if (skb->len)
640 skb_queue_head(&sk->sk_receive_queue,
641 skb);
642 else {
643 kfree_skb(skb);
644 qlen--;
645 }
646 break;
647 }
648 kfree_skb(skb);
649 }
650
651 up(&rtnl_sem); 580 up(&rtnl_sem);
652 581
653 netdev_run_todo(); 582 netdev_run_todo();
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 95501e40100e..b7d13a4fff48 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -336,6 +336,9 @@ void __kfree_skb(struct sk_buff *skb)
336 } 336 }
337#ifdef CONFIG_NETFILTER 337#ifdef CONFIG_NETFILTER
338 nf_conntrack_put(skb->nfct); 338 nf_conntrack_put(skb->nfct);
339#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
340 nf_conntrack_put_reasm(skb->nfct_reasm);
341#endif
339#ifdef CONFIG_BRIDGE_NETFILTER 342#ifdef CONFIG_BRIDGE_NETFILTER
340 nf_bridge_put(skb->nf_bridge); 343 nf_bridge_put(skb->nf_bridge);
341#endif 344#endif
@@ -414,9 +417,17 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
414 C(nfct); 417 C(nfct);
415 nf_conntrack_get(skb->nfct); 418 nf_conntrack_get(skb->nfct);
416 C(nfctinfo); 419 C(nfctinfo);
420#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
421 C(nfct_reasm);
422 nf_conntrack_get_reasm(skb->nfct_reasm);
423#endif
417#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) 424#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
418 C(ipvs_property); 425 C(ipvs_property);
419#endif 426#endif
427#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
428 C(nfct_reasm);
429 nf_conntrack_get_reasm(skb->nfct_reasm);
430#endif
420#ifdef CONFIG_BRIDGE_NETFILTER 431#ifdef CONFIG_BRIDGE_NETFILTER
421 C(nf_bridge); 432 C(nf_bridge);
422 nf_bridge_get(skb->nf_bridge); 433 nf_bridge_get(skb->nf_bridge);
@@ -474,6 +485,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
474 new->nfct = old->nfct; 485 new->nfct = old->nfct;
475 nf_conntrack_get(old->nfct); 486 nf_conntrack_get(old->nfct);
476 new->nfctinfo = old->nfctinfo; 487 new->nfctinfo = old->nfctinfo;
488#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
489 new->nfct_reasm = old->nfct_reasm;
490 nf_conntrack_get_reasm(old->nfct_reasm);
491#endif
477#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) 492#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
478 new->ipvs_property = old->ipvs_property; 493 new->ipvs_property = old->ipvs_property;
479#endif 494#endif
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 71f3c7350c6e..39061ed53cfd 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -724,12 +724,6 @@ done:
724 return skb->len; 724 return skb->len;
725} 725}
726 726
727static int inet_diag_dump_done(struct netlink_callback *cb)
728{
729 return 0;
730}
731
732
733static __inline__ int 727static __inline__ int
734inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) 728inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
735{ 729{
@@ -760,8 +754,7 @@ inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
760 goto err_inval; 754 goto err_inval;
761 } 755 }
762 return netlink_dump_start(idiagnl, skb, nlh, 756 return netlink_dump_start(idiagnl, skb, nlh,
763 inet_diag_dump, 757 inet_diag_dump, NULL);
764 inet_diag_dump_done);
765 } else { 758 } else {
766 return inet_diag_get_exact(skb, nlh); 759 return inet_diag_get_exact(skb, nlh);
767 } 760 }
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 7d917e4ce1d9..9d3c8b5f327e 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -5,6 +5,20 @@
5menu "IP: Netfilter Configuration" 5menu "IP: Netfilter Configuration"
6 depends on INET && NETFILTER 6 depends on INET && NETFILTER
7 7
8config NF_CONNTRACK_IPV4
9 tristate "IPv4 support for new connection tracking (EXPERIMENTAL)"
10 depends on EXPERIMENTAL && NF_CONNTRACK
11 ---help---
12 Connection tracking keeps a record of what packets have passed
13 through your machine, in order to figure out how they are related
14 into connections.
15
16 This is IPv4 support on Layer 3 independent connection tracking.
17 Layer 3 independent connection tracking is experimental scheme
18 which generalize ip_conntrack to support other layer 3 protocols.
19
20 To compile it as a module, choose M here. If unsure, say N.
21
8# connection tracking, helpers and protocols 22# connection tracking, helpers and protocols
9config IP_NF_CONNTRACK 23config IP_NF_CONNTRACK
10 tristate "Connection tracking (required for masq/NAT)" 24 tristate "Connection tracking (required for masq/NAT)"
@@ -209,8 +223,8 @@ config IP_NF_MATCH_PKTTYPE
209 tristate "Packet type match support" 223 tristate "Packet type match support"
210 depends on IP_NF_IPTABLES 224 depends on IP_NF_IPTABLES
211 help 225 help
212 Packet type matching allows you to match a packet by 226 Packet type matching allows you to match a packet by
213 its "class", eg. BROADCAST, MULTICAST, ... 227 its "class", eg. BROADCAST, MULTICAST, ...
214 228
215 Typical usage: 229 Typical usage:
216 iptables -A INPUT -m pkttype --pkt-type broadcast -j LOG 230 iptables -A INPUT -m pkttype --pkt-type broadcast -j LOG
@@ -317,7 +331,8 @@ config IP_NF_MATCH_TCPMSS
317 331
318config IP_NF_MATCH_HELPER 332config IP_NF_MATCH_HELPER
319 tristate "Helper match support" 333 tristate "Helper match support"
320 depends on IP_NF_CONNTRACK && IP_NF_IPTABLES 334 depends on IP_NF_IPTABLES
335 depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
321 help 336 help
322 Helper matching allows you to match packets in dynamic connections 337 Helper matching allows you to match packets in dynamic connections
323 tracked by a conntrack-helper, ie. ip_conntrack_ftp 338 tracked by a conntrack-helper, ie. ip_conntrack_ftp
@@ -326,7 +341,8 @@ config IP_NF_MATCH_HELPER
326 341
327config IP_NF_MATCH_STATE 342config IP_NF_MATCH_STATE
328 tristate "Connection state match support" 343 tristate "Connection state match support"
329 depends on IP_NF_CONNTRACK && IP_NF_IPTABLES 344 depends on IP_NF_IPTABLES
345 depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
330 help 346 help
331 Connection state matching allows you to match packets based on their 347 Connection state matching allows you to match packets based on their
332 relationship to a tracked connection (ie. previous packets). This 348 relationship to a tracked connection (ie. previous packets). This
@@ -336,7 +352,8 @@ config IP_NF_MATCH_STATE
336 352
337config IP_NF_MATCH_CONNTRACK 353config IP_NF_MATCH_CONNTRACK
338 tristate "Connection tracking match support" 354 tristate "Connection tracking match support"
339 depends on IP_NF_CONNTRACK && IP_NF_IPTABLES 355 depends on IP_NF_IPTABLES
356 depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
340 help 357 help
341 This is a general conntrack match module, a superset of the state match. 358 This is a general conntrack match module, a superset of the state match.
342 359
@@ -422,7 +439,8 @@ config IP_NF_MATCH_COMMENT
422 439
423config IP_NF_MATCH_CONNMARK 440config IP_NF_MATCH_CONNMARK
424 tristate 'Connection mark match support' 441 tristate 'Connection mark match support'
425 depends on IP_NF_CONNTRACK_MARK && IP_NF_IPTABLES 442 depends on IP_NF_IPTABLES
443 depends on IP_NF_CONNTRACK_MARK || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
426 help 444 help
427 This option adds a `connmark' match, which allows you to match the 445 This option adds a `connmark' match, which allows you to match the
428 connection mark value previously set for the session by `CONNMARK'. 446 connection mark value previously set for the session by `CONNMARK'.
@@ -433,7 +451,8 @@ config IP_NF_MATCH_CONNMARK
433 451
434config IP_NF_MATCH_CONNBYTES 452config IP_NF_MATCH_CONNBYTES
435 tristate 'Connection byte/packet counter match support' 453 tristate 'Connection byte/packet counter match support'
436 depends on IP_NF_CT_ACCT && IP_NF_IPTABLES 454 depends on IP_NF_IPTABLES
455 depends on IP_NF_CT_ACCT || (NF_CT_ACCT && NF_CONNTRACK_IPV4)
437 help 456 help
438 This option adds a `connbytes' match, which allows you to match the 457 This option adds a `connbytes' match, which allows you to match the
439 number of bytes and/or packets for each direction within a connection. 458 number of bytes and/or packets for each direction within a connection.
@@ -747,7 +766,8 @@ config IP_NF_TARGET_TTL
747 766
748config IP_NF_TARGET_CONNMARK 767config IP_NF_TARGET_CONNMARK
749 tristate 'CONNMARK target support' 768 tristate 'CONNMARK target support'
750 depends on IP_NF_CONNTRACK_MARK && IP_NF_MANGLE 769 depends on IP_NF_MANGLE
770 depends on IP_NF_CONNTRACK_MARK || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
751 help 771 help
752 This option adds a `CONNMARK' target, which allows one to manipulate 772 This option adds a `CONNMARK' target, which allows one to manipulate
753 the connection mark value. Similar to the MARK target, but 773 the connection mark value. Similar to the MARK target, but
@@ -759,7 +779,8 @@ config IP_NF_TARGET_CONNMARK
759 779
760config IP_NF_TARGET_CLUSTERIP 780config IP_NF_TARGET_CLUSTERIP
761 tristate "CLUSTERIP target support (EXPERIMENTAL)" 781 tristate "CLUSTERIP target support (EXPERIMENTAL)"
762 depends on IP_NF_CONNTRACK_MARK && IP_NF_IPTABLES && EXPERIMENTAL 782 depends on IP_NF_IPTABLES && EXPERIMENTAL
783 depends on IP_NF_CONNTRACK_MARK || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
763 help 784 help
764 The CLUSTERIP target allows you to build load-balancing clusters of 785 The CLUSTERIP target allows you to build load-balancing clusters of
765 network servers without having a dedicated load-balancing 786 network servers without having a dedicated load-balancing
@@ -782,7 +803,7 @@ config IP_NF_RAW
782config IP_NF_TARGET_NOTRACK 803config IP_NF_TARGET_NOTRACK
783 tristate 'NOTRACK target support' 804 tristate 'NOTRACK target support'
784 depends on IP_NF_RAW 805 depends on IP_NF_RAW
785 depends on IP_NF_CONNTRACK 806 depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
786 help 807 help
787 The NOTRACK target allows a select rule to specify 808 The NOTRACK target allows a select rule to specify
788 which packets *not* to enter the conntrack/NAT 809 which packets *not* to enter the conntrack/NAT
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index dab4b58dd31e..058c48e258fc 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -103,3 +103,9 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
103obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o 103obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
104 104
105obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o 105obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o
106
107# objects for l3 independent conntrack
108nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
109
110# l3 independent conntrack
111obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 5c1c0a3d1c4b..d2a4fec22862 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -1376,7 +1376,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
1376 ip_conntrack_expect_put(exp); 1376 ip_conntrack_expect_put(exp);
1377 } 1377 }
1378 } 1378 }
1379 write_unlock(&ip_conntrack_lock); 1379 write_unlock_bh(&ip_conntrack_lock);
1380 } else { 1380 } else {
1381 /* This basically means we have to flush everything*/ 1381 /* This basically means we have to flush everything*/
1382 write_lock_bh(&ip_conntrack_lock); 1382 write_lock_bh(&ip_conntrack_lock);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 9bcb398fbc1f..45c52d8f4d99 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -29,7 +29,7 @@
29 29
30#include <linux/netfilter_ipv4/ip_tables.h> 30#include <linux/netfilter_ipv4/ip_tables.h>
31#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> 31#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
32#include <linux/netfilter_ipv4/ip_conntrack.h> 32#include <net/netfilter/nf_conntrack_compat.h>
33 33
34#define CLUSTERIP_VERSION "0.8" 34#define CLUSTERIP_VERSION "0.8"
35 35
@@ -316,14 +316,14 @@ target(struct sk_buff **pskb,
316{ 316{
317 const struct ipt_clusterip_tgt_info *cipinfo = targinfo; 317 const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
318 enum ip_conntrack_info ctinfo; 318 enum ip_conntrack_info ctinfo;
319 struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo); 319 u_int32_t *mark, hash;
320 u_int32_t hash;
321 320
322 /* don't need to clusterip_config_get() here, since refcount 321 /* don't need to clusterip_config_get() here, since refcount
323 * is only decremented by destroy() - and ip_tables guarantees 322 * is only decremented by destroy() - and ip_tables guarantees
324 * that the ->target() function isn't called after ->destroy() */ 323 * that the ->target() function isn't called after ->destroy() */
325 324
326 if (!ct) { 325 mark = nf_ct_get_mark((*pskb), &ctinfo);
326 if (mark == NULL) {
327 printk(KERN_ERR "CLUSTERIP: no conntrack!\n"); 327 printk(KERN_ERR "CLUSTERIP: no conntrack!\n");
328 /* FIXME: need to drop invalid ones, since replies 328 /* FIXME: need to drop invalid ones, since replies
329 * to outgoing connections of other nodes will be 329 * to outgoing connections of other nodes will be
@@ -346,7 +346,7 @@ target(struct sk_buff **pskb,
346 346
347 switch (ctinfo) { 347 switch (ctinfo) {
348 case IP_CT_NEW: 348 case IP_CT_NEW:
349 ct->mark = hash; 349 *mark = hash;
350 break; 350 break;
351 case IP_CT_RELATED: 351 case IP_CT_RELATED:
352 case IP_CT_RELATED+IP_CT_IS_REPLY: 352 case IP_CT_RELATED+IP_CT_IS_REPLY:
@@ -363,7 +363,7 @@ target(struct sk_buff **pskb,
363#ifdef DEBUG_CLUSTERP 363#ifdef DEBUG_CLUSTERP
364 DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 364 DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
365#endif 365#endif
366 DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark); 366 DEBUGP("hash=%u ct_hash=%u ", hash, *mark);
367 if (!clusterip_responsible(cipinfo->config, hash)) { 367 if (!clusterip_responsible(cipinfo->config, hash)) {
368 DEBUGP("not responsible\n"); 368 DEBUGP("not responsible\n");
369 return NF_DROP; 369 return NF_DROP;
diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c
index 05d66ab59424..8acac5a40a92 100644
--- a/net/ipv4/netfilter/ipt_CONNMARK.c
+++ b/net/ipv4/netfilter/ipt_CONNMARK.c
@@ -29,7 +29,7 @@ MODULE_LICENSE("GPL");
29 29
30#include <linux/netfilter_ipv4/ip_tables.h> 30#include <linux/netfilter_ipv4/ip_tables.h>
31#include <linux/netfilter_ipv4/ipt_CONNMARK.h> 31#include <linux/netfilter_ipv4/ipt_CONNMARK.h>
32#include <linux/netfilter_ipv4/ip_conntrack.h> 32#include <net/netfilter/nf_conntrack_compat.h>
33 33
34static unsigned int 34static unsigned int
35target(struct sk_buff **pskb, 35target(struct sk_buff **pskb,
@@ -43,24 +43,24 @@ target(struct sk_buff **pskb,
43 u_int32_t diff; 43 u_int32_t diff;
44 u_int32_t nfmark; 44 u_int32_t nfmark;
45 u_int32_t newmark; 45 u_int32_t newmark;
46 u_int32_t ctinfo;
47 u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo);
46 48
47 enum ip_conntrack_info ctinfo; 49 if (ctmark) {
48 struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo);
49 if (ct) {
50 switch(markinfo->mode) { 50 switch(markinfo->mode) {
51 case IPT_CONNMARK_SET: 51 case IPT_CONNMARK_SET:
52 newmark = (ct->mark & ~markinfo->mask) | markinfo->mark; 52 newmark = (*ctmark & ~markinfo->mask) | markinfo->mark;
53 if (newmark != ct->mark) 53 if (newmark != *ctmark)
54 ct->mark = newmark; 54 *ctmark = newmark;
55 break; 55 break;
56 case IPT_CONNMARK_SAVE: 56 case IPT_CONNMARK_SAVE:
57 newmark = (ct->mark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask); 57 newmark = (*ctmark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask);
58 if (ct->mark != newmark) 58 if (*ctmark != newmark)
59 ct->mark = newmark; 59 *ctmark = newmark;
60 break; 60 break;
61 case IPT_CONNMARK_RESTORE: 61 case IPT_CONNMARK_RESTORE:
62 nfmark = (*pskb)->nfmark; 62 nfmark = (*pskb)->nfmark;
63 diff = (ct->mark ^ nfmark) & markinfo->mask; 63 diff = (*ctmark ^ nfmark) & markinfo->mask;
64 if (diff != 0) 64 if (diff != 0)
65 (*pskb)->nfmark = nfmark ^ diff; 65 (*pskb)->nfmark = nfmark ^ diff;
66 break; 66 break;
diff --git a/net/ipv4/netfilter/ipt_NOTRACK.c b/net/ipv4/netfilter/ipt_NOTRACK.c
index a4bb9b3bc292..e3c69d072c6e 100644
--- a/net/ipv4/netfilter/ipt_NOTRACK.c
+++ b/net/ipv4/netfilter/ipt_NOTRACK.c
@@ -5,7 +5,7 @@
5#include <linux/skbuff.h> 5#include <linux/skbuff.h>
6 6
7#include <linux/netfilter_ipv4/ip_tables.h> 7#include <linux/netfilter_ipv4/ip_tables.h>
8#include <linux/netfilter_ipv4/ip_conntrack.h> 8#include <net/netfilter/nf_conntrack_compat.h>
9 9
10static unsigned int 10static unsigned int
11target(struct sk_buff **pskb, 11target(struct sk_buff **pskb,
@@ -23,7 +23,7 @@ target(struct sk_buff **pskb,
23 If there is a real ct entry correspondig to this packet, 23 If there is a real ct entry correspondig to this packet,
24 it'll hang aroun till timing out. We don't deal with it 24 it'll hang aroun till timing out. We don't deal with it
25 for performance reasons. JK */ 25 for performance reasons. JK */
26 (*pskb)->nfct = &ip_conntrack_untracked.ct_general; 26 nf_ct_untrack(*pskb);
27 (*pskb)->nfctinfo = IP_CT_NEW; 27 (*pskb)->nfctinfo = IP_CT_NEW;
28 nf_conntrack_get((*pskb)->nfct); 28 nf_conntrack_get((*pskb)->nfct);
29 29
diff --git a/net/ipv4/netfilter/ipt_connbytes.c b/net/ipv4/netfilter/ipt_connbytes.c
index df4a42c6da22..d68a048b7176 100644
--- a/net/ipv4/netfilter/ipt_connbytes.c
+++ b/net/ipv4/netfilter/ipt_connbytes.c
@@ -10,7 +10,7 @@
10 */ 10 */
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/skbuff.h> 12#include <linux/skbuff.h>
13#include <linux/netfilter_ipv4/ip_conntrack.h> 13#include <net/netfilter/nf_conntrack_compat.h>
14#include <linux/netfilter_ipv4/ip_tables.h> 14#include <linux/netfilter_ipv4/ip_tables.h>
15#include <linux/netfilter_ipv4/ipt_connbytes.h> 15#include <linux/netfilter_ipv4/ipt_connbytes.h>
16 16
@@ -46,60 +46,59 @@ match(const struct sk_buff *skb,
46 int *hotdrop) 46 int *hotdrop)
47{ 47{
48 const struct ipt_connbytes_info *sinfo = matchinfo; 48 const struct ipt_connbytes_info *sinfo = matchinfo;
49 enum ip_conntrack_info ctinfo;
50 struct ip_conntrack *ct;
51 u_int64_t what = 0; /* initialize to make gcc happy */ 49 u_int64_t what = 0; /* initialize to make gcc happy */
50 const struct ip_conntrack_counter *counters;
52 51
53 if (!(ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo))) 52 if (!(counters = nf_ct_get_counters(skb)))
54 return 0; /* no match */ 53 return 0; /* no match */
55 54
56 switch (sinfo->what) { 55 switch (sinfo->what) {
57 case IPT_CONNBYTES_PKTS: 56 case IPT_CONNBYTES_PKTS:
58 switch (sinfo->direction) { 57 switch (sinfo->direction) {
59 case IPT_CONNBYTES_DIR_ORIGINAL: 58 case IPT_CONNBYTES_DIR_ORIGINAL:
60 what = ct->counters[IP_CT_DIR_ORIGINAL].packets; 59 what = counters[IP_CT_DIR_ORIGINAL].packets;
61 break; 60 break;
62 case IPT_CONNBYTES_DIR_REPLY: 61 case IPT_CONNBYTES_DIR_REPLY:
63 what = ct->counters[IP_CT_DIR_REPLY].packets; 62 what = counters[IP_CT_DIR_REPLY].packets;
64 break; 63 break;
65 case IPT_CONNBYTES_DIR_BOTH: 64 case IPT_CONNBYTES_DIR_BOTH:
66 what = ct->counters[IP_CT_DIR_ORIGINAL].packets; 65 what = counters[IP_CT_DIR_ORIGINAL].packets;
67 what += ct->counters[IP_CT_DIR_REPLY].packets; 66 what += counters[IP_CT_DIR_REPLY].packets;
68 break; 67 break;
69 } 68 }
70 break; 69 break;
71 case IPT_CONNBYTES_BYTES: 70 case IPT_CONNBYTES_BYTES:
72 switch (sinfo->direction) { 71 switch (sinfo->direction) {
73 case IPT_CONNBYTES_DIR_ORIGINAL: 72 case IPT_CONNBYTES_DIR_ORIGINAL:
74 what = ct->counters[IP_CT_DIR_ORIGINAL].bytes; 73 what = counters[IP_CT_DIR_ORIGINAL].bytes;
75 break; 74 break;
76 case IPT_CONNBYTES_DIR_REPLY: 75 case IPT_CONNBYTES_DIR_REPLY:
77 what = ct->counters[IP_CT_DIR_REPLY].bytes; 76 what = counters[IP_CT_DIR_REPLY].bytes;
78 break; 77 break;
79 case IPT_CONNBYTES_DIR_BOTH: 78 case IPT_CONNBYTES_DIR_BOTH:
80 what = ct->counters[IP_CT_DIR_ORIGINAL].bytes; 79 what = counters[IP_CT_DIR_ORIGINAL].bytes;
81 what += ct->counters[IP_CT_DIR_REPLY].bytes; 80 what += counters[IP_CT_DIR_REPLY].bytes;
82 break; 81 break;
83 } 82 }
84 break; 83 break;
85 case IPT_CONNBYTES_AVGPKT: 84 case IPT_CONNBYTES_AVGPKT:
86 switch (sinfo->direction) { 85 switch (sinfo->direction) {
87 case IPT_CONNBYTES_DIR_ORIGINAL: 86 case IPT_CONNBYTES_DIR_ORIGINAL:
88 what = div64_64(ct->counters[IP_CT_DIR_ORIGINAL].bytes, 87 what = div64_64(counters[IP_CT_DIR_ORIGINAL].bytes,
89 ct->counters[IP_CT_DIR_ORIGINAL].packets); 88 counters[IP_CT_DIR_ORIGINAL].packets);
90 break; 89 break;
91 case IPT_CONNBYTES_DIR_REPLY: 90 case IPT_CONNBYTES_DIR_REPLY:
92 what = div64_64(ct->counters[IP_CT_DIR_REPLY].bytes, 91 what = div64_64(counters[IP_CT_DIR_REPLY].bytes,
93 ct->counters[IP_CT_DIR_REPLY].packets); 92 counters[IP_CT_DIR_REPLY].packets);
94 break; 93 break;
95 case IPT_CONNBYTES_DIR_BOTH: 94 case IPT_CONNBYTES_DIR_BOTH:
96 { 95 {
97 u_int64_t bytes; 96 u_int64_t bytes;
98 u_int64_t pkts; 97 u_int64_t pkts;
99 bytes = ct->counters[IP_CT_DIR_ORIGINAL].bytes + 98 bytes = counters[IP_CT_DIR_ORIGINAL].bytes +
100 ct->counters[IP_CT_DIR_REPLY].bytes; 99 counters[IP_CT_DIR_REPLY].bytes;
101 pkts = ct->counters[IP_CT_DIR_ORIGINAL].packets+ 100 pkts = counters[IP_CT_DIR_ORIGINAL].packets+
102 ct->counters[IP_CT_DIR_REPLY].packets; 101 counters[IP_CT_DIR_REPLY].packets;
103 102
104 /* FIXME_THEORETICAL: what to do if sum 103 /* FIXME_THEORETICAL: what to do if sum
105 * overflows ? */ 104 * overflows ? */
diff --git a/net/ipv4/netfilter/ipt_connmark.c b/net/ipv4/netfilter/ipt_connmark.c
index bf8de47ce004..5306ef293b92 100644
--- a/net/ipv4/netfilter/ipt_connmark.c
+++ b/net/ipv4/netfilter/ipt_connmark.c
@@ -28,7 +28,7 @@ MODULE_LICENSE("GPL");
28 28
29#include <linux/netfilter_ipv4/ip_tables.h> 29#include <linux/netfilter_ipv4/ip_tables.h>
30#include <linux/netfilter_ipv4/ipt_connmark.h> 30#include <linux/netfilter_ipv4/ipt_connmark.h>
31#include <linux/netfilter_ipv4/ip_conntrack.h> 31#include <net/netfilter/nf_conntrack_compat.h>
32 32
33static int 33static int
34match(const struct sk_buff *skb, 34match(const struct sk_buff *skb,
@@ -39,12 +39,12 @@ match(const struct sk_buff *skb,
39 int *hotdrop) 39 int *hotdrop)
40{ 40{
41 const struct ipt_connmark_info *info = matchinfo; 41 const struct ipt_connmark_info *info = matchinfo;
42 enum ip_conntrack_info ctinfo; 42 u_int32_t ctinfo;
43 struct ip_conntrack *ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo); 43 const u_int32_t *ctmark = nf_ct_get_mark(skb, &ctinfo);
44 if (!ct) 44 if (!ctmark)
45 return 0; 45 return 0;
46 46
47 return ((ct->mark & info->mask) == info->mark) ^ info->invert; 47 return (((*ctmark) & info->mask) == info->mark) ^ info->invert;
48} 48}
49 49
50static int 50static int
diff --git a/net/ipv4/netfilter/ipt_conntrack.c b/net/ipv4/netfilter/ipt_conntrack.c
index c1d22801b7cf..c8d18705469b 100644
--- a/net/ipv4/netfilter/ipt_conntrack.c
+++ b/net/ipv4/netfilter/ipt_conntrack.c
@@ -10,7 +10,14 @@
10 10
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/skbuff.h> 12#include <linux/skbuff.h>
13
14#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
13#include <linux/netfilter_ipv4/ip_conntrack.h> 15#include <linux/netfilter_ipv4/ip_conntrack.h>
16#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
17#else
18#include <net/netfilter/nf_conntrack.h>
19#endif
20
14#include <linux/netfilter_ipv4/ip_tables.h> 21#include <linux/netfilter_ipv4/ip_tables.h>
15#include <linux/netfilter_ipv4/ipt_conntrack.h> 22#include <linux/netfilter_ipv4/ipt_conntrack.h>
16 23
@@ -18,6 +25,8 @@ MODULE_LICENSE("GPL");
18MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); 25MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
19MODULE_DESCRIPTION("iptables connection tracking match module"); 26MODULE_DESCRIPTION("iptables connection tracking match module");
20 27
28#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
29
21static int 30static int
22match(const struct sk_buff *skb, 31match(const struct sk_buff *skb,
23 const struct net_device *in, 32 const struct net_device *in,
@@ -102,6 +111,93 @@ match(const struct sk_buff *skb,
102 return 1; 111 return 1;
103} 112}
104 113
114#else /* CONFIG_IP_NF_CONNTRACK */
115static int
116match(const struct sk_buff *skb,
117 const struct net_device *in,
118 const struct net_device *out,
119 const void *matchinfo,
120 int offset,
121 int *hotdrop)
122{
123 const struct ipt_conntrack_info *sinfo = matchinfo;
124 struct nf_conn *ct;
125 enum ip_conntrack_info ctinfo;
126 unsigned int statebit;
127
128 ct = nf_ct_get((struct sk_buff *)skb, &ctinfo);
129
130#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
131
132 if (ct == &nf_conntrack_untracked)
133 statebit = IPT_CONNTRACK_STATE_UNTRACKED;
134 else if (ct)
135 statebit = IPT_CONNTRACK_STATE_BIT(ctinfo);
136 else
137 statebit = IPT_CONNTRACK_STATE_INVALID;
138
139 if(sinfo->flags & IPT_CONNTRACK_STATE) {
140 if (ct) {
141 if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip !=
142 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip)
143 statebit |= IPT_CONNTRACK_STATE_SNAT;
144
145 if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip !=
146 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip)
147 statebit |= IPT_CONNTRACK_STATE_DNAT;
148 }
149
150 if (FWINV((statebit & sinfo->statemask) == 0, IPT_CONNTRACK_STATE))
151 return 0;
152 }
153
154 if(sinfo->flags & IPT_CONNTRACK_PROTO) {
155 if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, IPT_CONNTRACK_PROTO))
156 return 0;
157 }
158
159 if(sinfo->flags & IPT_CONNTRACK_ORIGSRC) {
160 if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, IPT_CONNTRACK_ORIGSRC))
161 return 0;
162 }
163
164 if(sinfo->flags & IPT_CONNTRACK_ORIGDST) {
165 if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, IPT_CONNTRACK_ORIGDST))
166 return 0;
167 }
168
169 if(sinfo->flags & IPT_CONNTRACK_REPLSRC) {
170 if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, IPT_CONNTRACK_REPLSRC))
171 return 0;
172 }
173
174 if(sinfo->flags & IPT_CONNTRACK_REPLDST) {
175 if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, IPT_CONNTRACK_REPLDST))
176 return 0;
177 }
178
179 if(sinfo->flags & IPT_CONNTRACK_STATUS) {
180 if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, IPT_CONNTRACK_STATUS))
181 return 0;
182 }
183
184 if(sinfo->flags & IPT_CONNTRACK_EXPIRES) {
185 unsigned long expires;
186
187 if(!ct)
188 return 0;
189
190 expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0;
191
192 if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), IPT_CONNTRACK_EXPIRES))
193 return 0;
194 }
195
196 return 1;
197}
198
199#endif /* CONFIG_NF_IP_CONNTRACK */
200
105static int check(const char *tablename, 201static int check(const char *tablename,
106 const struct ipt_ip *ip, 202 const struct ipt_ip *ip,
107 void *matchinfo, 203 void *matchinfo,
diff --git a/net/ipv4/netfilter/ipt_helper.c b/net/ipv4/netfilter/ipt_helper.c
index 3e7dd014de43..bf14e1c7798a 100644
--- a/net/ipv4/netfilter/ipt_helper.c
+++ b/net/ipv4/netfilter/ipt_helper.c
@@ -13,9 +13,15 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <linux/netfilter.h> 15#include <linux/netfilter.h>
16#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
16#include <linux/netfilter_ipv4/ip_conntrack.h> 17#include <linux/netfilter_ipv4/ip_conntrack.h>
17#include <linux/netfilter_ipv4/ip_conntrack_core.h> 18#include <linux/netfilter_ipv4/ip_conntrack_core.h>
18#include <linux/netfilter_ipv4/ip_conntrack_helper.h> 19#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
20#else
21#include <net/netfilter/nf_conntrack.h>
22#include <net/netfilter/nf_conntrack_core.h>
23#include <net/netfilter/nf_conntrack_helper.h>
24#endif
19#include <linux/netfilter_ipv4/ip_tables.h> 25#include <linux/netfilter_ipv4/ip_tables.h>
20#include <linux/netfilter_ipv4/ipt_helper.h> 26#include <linux/netfilter_ipv4/ipt_helper.h>
21 27
@@ -29,6 +35,7 @@ MODULE_DESCRIPTION("iptables helper match module");
29#define DEBUGP(format, args...) 35#define DEBUGP(format, args...)
30#endif 36#endif
31 37
38#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
32static int 39static int
33match(const struct sk_buff *skb, 40match(const struct sk_buff *skb,
34 const struct net_device *in, 41 const struct net_device *in,
@@ -73,6 +80,53 @@ out_unlock:
73 return ret; 80 return ret;
74} 81}
75 82
83#else /* CONFIG_IP_NF_CONNTRACK */
84
85static int
86match(const struct sk_buff *skb,
87 const struct net_device *in,
88 const struct net_device *out,
89 const void *matchinfo,
90 int offset,
91 int *hotdrop)
92{
93 const struct ipt_helper_info *info = matchinfo;
94 struct nf_conn *ct;
95 enum ip_conntrack_info ctinfo;
96 int ret = info->invert;
97
98 ct = nf_ct_get((struct sk_buff *)skb, &ctinfo);
99 if (!ct) {
100 DEBUGP("ipt_helper: Eek! invalid conntrack?\n");
101 return ret;
102 }
103
104 if (!ct->master) {
105 DEBUGP("ipt_helper: conntrack %p has no master\n", ct);
106 return ret;
107 }
108
109 read_lock_bh(&nf_conntrack_lock);
110 if (!ct->master->helper) {
111 DEBUGP("ipt_helper: master ct %p has no helper\n",
112 exp->expectant);
113 goto out_unlock;
114 }
115
116 DEBUGP("master's name = %s , info->name = %s\n",
117 ct->master->helper->name, info->name);
118
119 if (info->name[0] == '\0')
120 ret ^= 1;
121 else
122 ret ^= !strncmp(ct->master->helper->name, info->name,
123 strlen(ct->master->helper->name));
124out_unlock:
125 read_unlock_bh(&nf_conntrack_lock);
126 return ret;
127}
128#endif
129
76static int check(const char *tablename, 130static int check(const char *tablename,
77 const struct ipt_ip *ip, 131 const struct ipt_ip *ip,
78 void *matchinfo, 132 void *matchinfo,
diff --git a/net/ipv4/netfilter/ipt_state.c b/net/ipv4/netfilter/ipt_state.c
index b1511b97ea5f..4d7f16b70cec 100644
--- a/net/ipv4/netfilter/ipt_state.c
+++ b/net/ipv4/netfilter/ipt_state.c
@@ -10,7 +10,7 @@
10 10
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/skbuff.h> 12#include <linux/skbuff.h>
13#include <linux/netfilter_ipv4/ip_conntrack.h> 13#include <net/netfilter/nf_conntrack_compat.h>
14#include <linux/netfilter_ipv4/ip_tables.h> 14#include <linux/netfilter_ipv4/ip_tables.h>
15#include <linux/netfilter_ipv4/ipt_state.h> 15#include <linux/netfilter_ipv4/ipt_state.h>
16 16
@@ -30,9 +30,9 @@ match(const struct sk_buff *skb,
30 enum ip_conntrack_info ctinfo; 30 enum ip_conntrack_info ctinfo;
31 unsigned int statebit; 31 unsigned int statebit;
32 32
33 if (skb->nfct == &ip_conntrack_untracked.ct_general) 33 if (nf_ct_is_untracked(skb))
34 statebit = IPT_STATE_UNTRACKED; 34 statebit = IPT_STATE_UNTRACKED;
35 else if (!ip_conntrack_get(skb, &ctinfo)) 35 else if (!nf_ct_get_ctinfo(skb, &ctinfo))
36 statebit = IPT_STATE_INVALID; 36 statebit = IPT_STATE_INVALID;
37 else 37 else
38 statebit = IPT_STATE_BIT(ctinfo); 38 statebit = IPT_STATE_BIT(ctinfo);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
new file mode 100644
index 000000000000..8202c1c0afad
--- /dev/null
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -0,0 +1,571 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
9 * - move L3 protocol dependent part to this file.
10 * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
11 * - add get_features() to support various size of conntrack
12 * structures.
13 *
14 * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c
15 */
16
17#include <linux/config.h>
18#include <linux/types.h>
19#include <linux/ip.h>
20#include <linux/netfilter.h>
21#include <linux/module.h>
22#include <linux/skbuff.h>
23#include <linux/icmp.h>
24#include <linux/sysctl.h>
25#include <net/ip.h>
26
27#include <linux/netfilter_ipv4.h>
28#include <net/netfilter/nf_conntrack.h>
29#include <net/netfilter/nf_conntrack_helper.h>
30#include <net/netfilter/nf_conntrack_protocol.h>
31#include <net/netfilter/nf_conntrack_l3proto.h>
32#include <net/netfilter/nf_conntrack_core.h>
33#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
34
35#if 0
36#define DEBUGP printk
37#else
38#define DEBUGP(format, args...)
39#endif
40
41DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat);
42
43static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
44 struct nf_conntrack_tuple *tuple)
45{
46 u_int32_t _addrs[2], *ap;
47 ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
48 sizeof(u_int32_t) * 2, _addrs);
49 if (ap == NULL)
50 return 0;
51
52 tuple->src.u3.ip = ap[0];
53 tuple->dst.u3.ip = ap[1];
54
55 return 1;
56}
57
58static int ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
59 const struct nf_conntrack_tuple *orig)
60{
61 tuple->src.u3.ip = orig->dst.u3.ip;
62 tuple->dst.u3.ip = orig->src.u3.ip;
63
64 return 1;
65}
66
67static int ipv4_print_tuple(struct seq_file *s,
68 const struct nf_conntrack_tuple *tuple)
69{
70 return seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ",
71 NIPQUAD(tuple->src.u3.ip),
72 NIPQUAD(tuple->dst.u3.ip));
73}
74
75static int ipv4_print_conntrack(struct seq_file *s,
76 const struct nf_conn *conntrack)
77{
78 return 0;
79}
80
81/* Returns new sk_buff, or NULL */
82static struct sk_buff *
83nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
84{
85 skb_orphan(skb);
86
87 local_bh_disable();
88 skb = ip_defrag(skb, user);
89 local_bh_enable();
90
91 if (skb)
92 ip_send_check(skb->nh.iph);
93
94 return skb;
95}
96
97static int
98ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
99 u_int8_t *protonum)
100{
101 /* Never happen */
102 if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
103 if (net_ratelimit()) {
104 printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n",
105 (*pskb)->nh.iph->protocol, hooknum);
106 }
107 return -NF_DROP;
108 }
109
110 *dataoff = (*pskb)->nh.raw - (*pskb)->data + (*pskb)->nh.iph->ihl*4;
111 *protonum = (*pskb)->nh.iph->protocol;
112
113 return NF_ACCEPT;
114}
115
116int nat_module_is_loaded = 0;
117static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple)
118{
119 if (nat_module_is_loaded)
120 return NF_CT_F_NAT;
121
122 return NF_CT_F_BASIC;
123}
124
125static unsigned int ipv4_confirm(unsigned int hooknum,
126 struct sk_buff **pskb,
127 const struct net_device *in,
128 const struct net_device *out,
129 int (*okfn)(struct sk_buff *))
130{
131 /* We've seen it coming out the other side: confirm it */
132 return nf_conntrack_confirm(pskb);
133}
134
135static unsigned int ipv4_conntrack_help(unsigned int hooknum,
136 struct sk_buff **pskb,
137 const struct net_device *in,
138 const struct net_device *out,
139 int (*okfn)(struct sk_buff *))
140{
141 struct nf_conn *ct;
142 enum ip_conntrack_info ctinfo;
143
144 /* This is where we call the helper: as the packet goes out. */
145 ct = nf_ct_get(*pskb, &ctinfo);
146 if (ct && ct->helper) {
147 unsigned int ret;
148 ret = ct->helper->help(pskb,
149 (*pskb)->nh.raw - (*pskb)->data
150 + (*pskb)->nh.iph->ihl*4,
151 ct, ctinfo);
152 if (ret != NF_ACCEPT)
153 return ret;
154 }
155 return NF_ACCEPT;
156}
157
158static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
159 struct sk_buff **pskb,
160 const struct net_device *in,
161 const struct net_device *out,
162 int (*okfn)(struct sk_buff *))
163{
164#if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE)
165 /* Previously seen (loopback)? Ignore. Do this before
166 fragment check. */
167 if ((*pskb)->nfct)
168 return NF_ACCEPT;
169#endif
170
171 /* Gather fragments. */
172 if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
173 *pskb = nf_ct_ipv4_gather_frags(*pskb,
174 hooknum == NF_IP_PRE_ROUTING ?
175 IP_DEFRAG_CONNTRACK_IN :
176 IP_DEFRAG_CONNTRACK_OUT);
177 if (!*pskb)
178 return NF_STOLEN;
179 }
180 return NF_ACCEPT;
181}
182
183static unsigned int ipv4_refrag(unsigned int hooknum,
184 struct sk_buff **pskb,
185 const struct net_device *in,
186 const struct net_device *out,
187 int (*okfn)(struct sk_buff *))
188{
189 struct rtable *rt = (struct rtable *)(*pskb)->dst;
190
191 /* We've seen it coming out the other side: confirm */
192 if (ipv4_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
193 return NF_DROP;
194
195 /* Local packets are never produced too large for their
196 interface. We degfragment them at LOCAL_OUT, however,
197 so we have to refragment them here. */
198 if ((*pskb)->len > dst_mtu(&rt->u.dst) &&
199 !skb_shinfo(*pskb)->tso_size) {
200 /* No hook can be after us, so this should be OK. */
201 ip_fragment(*pskb, okfn);
202 return NF_STOLEN;
203 }
204 return NF_ACCEPT;
205}
206
207static unsigned int ipv4_conntrack_in(unsigned int hooknum,
208 struct sk_buff **pskb,
209 const struct net_device *in,
210 const struct net_device *out,
211 int (*okfn)(struct sk_buff *))
212{
213 return nf_conntrack_in(PF_INET, hooknum, pskb);
214}
215
216static unsigned int ipv4_conntrack_local(unsigned int hooknum,
217 struct sk_buff **pskb,
218 const struct net_device *in,
219 const struct net_device *out,
220 int (*okfn)(struct sk_buff *))
221{
222 /* root is playing with raw sockets. */
223 if ((*pskb)->len < sizeof(struct iphdr)
224 || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
225 if (net_ratelimit())
226 printk("ipt_hook: happy cracking.\n");
227 return NF_ACCEPT;
228 }
229 return nf_conntrack_in(PF_INET, hooknum, pskb);
230}
231
232/* Connection tracking may drop packets, but never alters them, so
233 make it the first hook. */
234static struct nf_hook_ops ipv4_conntrack_defrag_ops = {
235 .hook = ipv4_conntrack_defrag,
236 .owner = THIS_MODULE,
237 .pf = PF_INET,
238 .hooknum = NF_IP_PRE_ROUTING,
239 .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
240};
241
242static struct nf_hook_ops ipv4_conntrack_in_ops = {
243 .hook = ipv4_conntrack_in,
244 .owner = THIS_MODULE,
245 .pf = PF_INET,
246 .hooknum = NF_IP_PRE_ROUTING,
247 .priority = NF_IP_PRI_CONNTRACK,
248};
249
250static struct nf_hook_ops ipv4_conntrack_defrag_local_out_ops = {
251 .hook = ipv4_conntrack_defrag,
252 .owner = THIS_MODULE,
253 .pf = PF_INET,
254 .hooknum = NF_IP_LOCAL_OUT,
255 .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
256};
257
258static struct nf_hook_ops ipv4_conntrack_local_out_ops = {
259 .hook = ipv4_conntrack_local,
260 .owner = THIS_MODULE,
261 .pf = PF_INET,
262 .hooknum = NF_IP_LOCAL_OUT,
263 .priority = NF_IP_PRI_CONNTRACK,
264};
265
266/* helpers */
267static struct nf_hook_ops ipv4_conntrack_helper_out_ops = {
268 .hook = ipv4_conntrack_help,
269 .owner = THIS_MODULE,
270 .pf = PF_INET,
271 .hooknum = NF_IP_POST_ROUTING,
272 .priority = NF_IP_PRI_CONNTRACK_HELPER,
273};
274
275static struct nf_hook_ops ipv4_conntrack_helper_in_ops = {
276 .hook = ipv4_conntrack_help,
277 .owner = THIS_MODULE,
278 .pf = PF_INET,
279 .hooknum = NF_IP_LOCAL_IN,
280 .priority = NF_IP_PRI_CONNTRACK_HELPER,
281};
282
283
284/* Refragmenter; last chance. */
285static struct nf_hook_ops ipv4_conntrack_out_ops = {
286 .hook = ipv4_refrag,
287 .owner = THIS_MODULE,
288 .pf = PF_INET,
289 .hooknum = NF_IP_POST_ROUTING,
290 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
291};
292
293static struct nf_hook_ops ipv4_conntrack_local_in_ops = {
294 .hook = ipv4_confirm,
295 .owner = THIS_MODULE,
296 .pf = PF_INET,
297 .hooknum = NF_IP_LOCAL_IN,
298 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
299};
300
301#ifdef CONFIG_SYSCTL
302/* From nf_conntrack_proto_icmp.c */
303extern unsigned long nf_ct_icmp_timeout;
304static struct ctl_table_header *nf_ct_ipv4_sysctl_header;
305
306static ctl_table nf_ct_sysctl_table[] = {
307 {
308 .ctl_name = NET_NF_CONNTRACK_ICMP_TIMEOUT,
309 .procname = "nf_conntrack_icmp_timeout",
310 .data = &nf_ct_icmp_timeout,
311 .maxlen = sizeof(unsigned int),
312 .mode = 0644,
313 .proc_handler = &proc_dointvec_jiffies,
314 },
315 { .ctl_name = 0 }
316};
317
318static ctl_table nf_ct_netfilter_table[] = {
319 {
320 .ctl_name = NET_NETFILTER,
321 .procname = "netfilter",
322 .mode = 0555,
323 .child = nf_ct_sysctl_table,
324 },
325 { .ctl_name = 0 }
326};
327
328static ctl_table nf_ct_net_table[] = {
329 {
330 .ctl_name = CTL_NET,
331 .procname = "net",
332 .mode = 0555,
333 .child = nf_ct_netfilter_table,
334 },
335 { .ctl_name = 0 }
336};
337#endif
338
339/* Fast function for those who don't want to parse /proc (and I don't
340 blame them). */
341/* Reversing the socket's dst/src point of view gives us the reply
342 mapping. */
343static int
344getorigdst(struct sock *sk, int optval, void __user *user, int *len)
345{
346 struct inet_sock *inet = inet_sk(sk);
347 struct nf_conntrack_tuple_hash *h;
348 struct nf_conntrack_tuple tuple;
349
350 NF_CT_TUPLE_U_BLANK(&tuple);
351 tuple.src.u3.ip = inet->rcv_saddr;
352 tuple.src.u.tcp.port = inet->sport;
353 tuple.dst.u3.ip = inet->daddr;
354 tuple.dst.u.tcp.port = inet->dport;
355 tuple.src.l3num = PF_INET;
356 tuple.dst.protonum = IPPROTO_TCP;
357
358 /* We only do TCP at the moment: is there a better way? */
359 if (strcmp(sk->sk_prot->name, "TCP")) {
360 DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
361 return -ENOPROTOOPT;
362 }
363
364 if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
365 DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
366 *len, sizeof(struct sockaddr_in));
367 return -EINVAL;
368 }
369
370 h = nf_conntrack_find_get(&tuple, NULL);
371 if (h) {
372 struct sockaddr_in sin;
373 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
374
375 sin.sin_family = AF_INET;
376 sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
377 .tuple.dst.u.tcp.port;
378 sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
379 .tuple.dst.u3.ip;
380
381 DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
382 NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
383 nf_ct_put(ct);
384 if (copy_to_user(user, &sin, sizeof(sin)) != 0)
385 return -EFAULT;
386 else
387 return 0;
388 }
389 DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
390 NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port),
391 NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port));
392 return -ENOENT;
393}
394
395static struct nf_sockopt_ops so_getorigdst = {
396 .pf = PF_INET,
397 .get_optmin = SO_ORIGINAL_DST,
398 .get_optmax = SO_ORIGINAL_DST+1,
399 .get = &getorigdst,
400};
401
402struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
403 .l3proto = PF_INET,
404 .name = "ipv4",
405 .pkt_to_tuple = ipv4_pkt_to_tuple,
406 .invert_tuple = ipv4_invert_tuple,
407 .print_tuple = ipv4_print_tuple,
408 .print_conntrack = ipv4_print_conntrack,
409 .prepare = ipv4_prepare,
410 .get_features = ipv4_get_features,
411 .me = THIS_MODULE,
412};
413
414extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp4;
415extern struct nf_conntrack_protocol nf_conntrack_protocol_udp4;
416extern struct nf_conntrack_protocol nf_conntrack_protocol_icmp;
417static int init_or_cleanup(int init)
418{
419 int ret = 0;
420
421 if (!init) goto cleanup;
422
423 ret = nf_register_sockopt(&so_getorigdst);
424 if (ret < 0) {
425 printk(KERN_ERR "Unable to register netfilter socket option\n");
426 goto cleanup_nothing;
427 }
428
429 ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp4);
430 if (ret < 0) {
431 printk("nf_conntrack_ipv4: can't register tcp.\n");
432 goto cleanup_sockopt;
433 }
434
435 ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp4);
436 if (ret < 0) {
437 printk("nf_conntrack_ipv4: can't register udp.\n");
438 goto cleanup_tcp;
439 }
440
441 ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmp);
442 if (ret < 0) {
443 printk("nf_conntrack_ipv4: can't register icmp.\n");
444 goto cleanup_udp;
445 }
446
447 ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4);
448 if (ret < 0) {
449 printk("nf_conntrack_ipv4: can't register ipv4\n");
450 goto cleanup_icmp;
451 }
452
453 ret = nf_register_hook(&ipv4_conntrack_defrag_ops);
454 if (ret < 0) {
455 printk("nf_conntrack_ipv4: can't register pre-routing defrag hook.\n");
456 goto cleanup_ipv4;
457 }
458 ret = nf_register_hook(&ipv4_conntrack_defrag_local_out_ops);
459 if (ret < 0) {
460 printk("nf_conntrack_ipv4: can't register local_out defrag hook.\n");
461 goto cleanup_defragops;
462 }
463
464 ret = nf_register_hook(&ipv4_conntrack_in_ops);
465 if (ret < 0) {
466 printk("nf_conntrack_ipv4: can't register pre-routing hook.\n");
467 goto cleanup_defraglocalops;
468 }
469
470 ret = nf_register_hook(&ipv4_conntrack_local_out_ops);
471 if (ret < 0) {
472 printk("nf_conntrack_ipv4: can't register local out hook.\n");
473 goto cleanup_inops;
474 }
475
476 ret = nf_register_hook(&ipv4_conntrack_helper_in_ops);
477 if (ret < 0) {
478 printk("nf_conntrack_ipv4: can't register local helper hook.\n");
479 goto cleanup_inandlocalops;
480 }
481
482 ret = nf_register_hook(&ipv4_conntrack_helper_out_ops);
483 if (ret < 0) {
484 printk("nf_conntrack_ipv4: can't register postrouting helper hook.\n");
485 goto cleanup_helperinops;
486 }
487
488 ret = nf_register_hook(&ipv4_conntrack_out_ops);
489 if (ret < 0) {
490 printk("nf_conntrack_ipv4: can't register post-routing hook.\n");
491 goto cleanup_helperoutops;
492 }
493
494 ret = nf_register_hook(&ipv4_conntrack_local_in_ops);
495 if (ret < 0) {
496 printk("nf_conntrack_ipv4: can't register local in hook.\n");
497 goto cleanup_inoutandlocalops;
498 }
499
500#ifdef CONFIG_SYSCTL
501 nf_ct_ipv4_sysctl_header = register_sysctl_table(nf_ct_net_table, 0);
502 if (nf_ct_ipv4_sysctl_header == NULL) {
503 printk("nf_conntrack: can't register to sysctl.\n");
504 ret = -ENOMEM;
505 goto cleanup_localinops;
506 }
507#endif
508
509 /* For use by REJECT target */
510 ip_ct_attach = __nf_conntrack_attach;
511
512 return ret;
513
514 cleanup:
515 synchronize_net();
516 ip_ct_attach = NULL;
517#ifdef CONFIG_SYSCTL
518 unregister_sysctl_table(nf_ct_ipv4_sysctl_header);
519 cleanup_localinops:
520#endif
521 nf_unregister_hook(&ipv4_conntrack_local_in_ops);
522 cleanup_inoutandlocalops:
523 nf_unregister_hook(&ipv4_conntrack_out_ops);
524 cleanup_helperoutops:
525 nf_unregister_hook(&ipv4_conntrack_helper_out_ops);
526 cleanup_helperinops:
527 nf_unregister_hook(&ipv4_conntrack_helper_in_ops);
528 cleanup_inandlocalops:
529 nf_unregister_hook(&ipv4_conntrack_local_out_ops);
530 cleanup_inops:
531 nf_unregister_hook(&ipv4_conntrack_in_ops);
532 cleanup_defraglocalops:
533 nf_unregister_hook(&ipv4_conntrack_defrag_local_out_ops);
534 cleanup_defragops:
535 nf_unregister_hook(&ipv4_conntrack_defrag_ops);
536 cleanup_ipv4:
537 nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
538 cleanup_icmp:
539 nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmp);
540 cleanup_udp:
541 nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp4);
542 cleanup_tcp:
543 nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp4);
544 cleanup_sockopt:
545 nf_unregister_sockopt(&so_getorigdst);
546 cleanup_nothing:
547 return ret;
548}
549
550MODULE_LICENSE("GPL");
551
552static int __init init(void)
553{
554 need_nf_conntrack();
555 return init_or_cleanup(1);
556}
557
558static void __exit fini(void)
559{
560 init_or_cleanup(0);
561}
562
563module_init(init);
564module_exit(fini);
565
566void need_ip_conntrack(void)
567{
568}
569
570EXPORT_SYMBOL(need_ip_conntrack);
571EXPORT_SYMBOL(nf_ct_ipv4_gather_frags);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
new file mode 100644
index 000000000000..7ddb5c08f7b8
--- /dev/null
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -0,0 +1,301 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
9 * - enable working with Layer 3 protocol independent connection tracking.
10 *
11 * Derived from net/ipv4/netfilter/ip_conntrack_proto_icmp.c
12 */
13
14#include <linux/types.h>
15#include <linux/sched.h>
16#include <linux/timer.h>
17#include <linux/netfilter.h>
18#include <linux/in.h>
19#include <linux/icmp.h>
20#include <linux/seq_file.h>
21#include <net/ip.h>
22#include <net/checksum.h>
23#include <linux/netfilter_ipv4.h>
24#include <net/netfilter/nf_conntrack_tuple.h>
25#include <net/netfilter/nf_conntrack_protocol.h>
26#include <net/netfilter/nf_conntrack_core.h>
27
28unsigned long nf_ct_icmp_timeout = 30*HZ;
29
30#if 0
31#define DEBUGP printk
32#else
33#define DEBUGP(format, args...)
34#endif
35
36static int icmp_pkt_to_tuple(const struct sk_buff *skb,
37 unsigned int dataoff,
38 struct nf_conntrack_tuple *tuple)
39{
40 struct icmphdr _hdr, *hp;
41
42 hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
43 if (hp == NULL)
44 return 0;
45
46 tuple->dst.u.icmp.type = hp->type;
47 tuple->src.u.icmp.id = hp->un.echo.id;
48 tuple->dst.u.icmp.code = hp->code;
49
50 return 1;
51}
52
53static int icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
54 const struct nf_conntrack_tuple *orig)
55{
56 /* Add 1; spaces filled with 0. */
57 static u_int8_t invmap[]
58 = { [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
59 [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
60 [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
61 [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
62 [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
63 [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
64 [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
65 [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1};
66
67 if (orig->dst.u.icmp.type >= sizeof(invmap)
68 || !invmap[orig->dst.u.icmp.type])
69 return 0;
70
71 tuple->src.u.icmp.id = orig->src.u.icmp.id;
72 tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
73 tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
74 return 1;
75}
76
77/* Print out the per-protocol part of the tuple. */
78static int icmp_print_tuple(struct seq_file *s,
79 const struct nf_conntrack_tuple *tuple)
80{
81 return seq_printf(s, "type=%u code=%u id=%u ",
82 tuple->dst.u.icmp.type,
83 tuple->dst.u.icmp.code,
84 ntohs(tuple->src.u.icmp.id));
85}
86
87/* Print out the private part of the conntrack. */
88static int icmp_print_conntrack(struct seq_file *s,
89 const struct nf_conn *conntrack)
90{
91 return 0;
92}
93
94/* Returns verdict for packet, or -1 for invalid. */
95static int icmp_packet(struct nf_conn *ct,
96 const struct sk_buff *skb,
97 unsigned int dataoff,
98 enum ip_conntrack_info ctinfo,
99 int pf,
100 unsigned int hooknum)
101{
102 /* Try to delete connection immediately after all replies:
103 won't actually vanish as we still have skb, and del_timer
104 means this will only run once even if count hits zero twice
105 (theoretically possible with SMP) */
106 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
107 if (atomic_dec_and_test(&ct->proto.icmp.count)
108 && del_timer(&ct->timeout))
109 ct->timeout.function((unsigned long)ct);
110 } else {
111 atomic_inc(&ct->proto.icmp.count);
112 nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
113 nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
114 }
115
116 return NF_ACCEPT;
117}
118
119/* Called when a new connection for this protocol found. */
120static int icmp_new(struct nf_conn *conntrack,
121 const struct sk_buff *skb, unsigned int dataoff)
122{
123 static u_int8_t valid_new[]
124 = { [ICMP_ECHO] = 1,
125 [ICMP_TIMESTAMP] = 1,
126 [ICMP_INFO_REQUEST] = 1,
127 [ICMP_ADDRESS] = 1 };
128
129 if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
130 || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
131 /* Can't create a new ICMP `conn' with this. */
132 DEBUGP("icmp: can't create new conn with type %u\n",
133 conntrack->tuplehash[0].tuple.dst.u.icmp.type);
134 NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
135 return 0;
136 }
137 atomic_set(&conntrack->proto.icmp.count, 0);
138 return 1;
139}
140
141extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
142/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
143static int
144icmp_error_message(struct sk_buff *skb,
145 enum ip_conntrack_info *ctinfo,
146 unsigned int hooknum)
147{
148 struct nf_conntrack_tuple innertuple, origtuple;
149 struct {
150 struct icmphdr icmp;
151 struct iphdr ip;
152 } _in, *inside;
153 struct nf_conntrack_protocol *innerproto;
154 struct nf_conntrack_tuple_hash *h;
155 int dataoff;
156
157 NF_CT_ASSERT(skb->nfct == NULL);
158
159 /* Not enough header? */
160 inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in);
161 if (inside == NULL)
162 return -NF_ACCEPT;
163
164 /* Ignore ICMP's containing fragments (shouldn't happen) */
165 if (inside->ip.frag_off & htons(IP_OFFSET)) {
166 DEBUGP("icmp_error_message: fragment of proto %u\n",
167 inside->ip.protocol);
168 return -NF_ACCEPT;
169 }
170
171 innerproto = nf_ct_find_proto(PF_INET, inside->ip.protocol);
172 dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp);
173 /* Are they talking about one of our connections? */
174 if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
175 inside->ip.protocol, &origtuple,
176 &nf_conntrack_l3proto_ipv4, innerproto)) {
177 DEBUGP("icmp_error_message: ! get_tuple p=%u",
178 inside->ip.protocol);
179 return -NF_ACCEPT;
180 }
181
182 /* Ordinarily, we'd expect the inverted tupleproto, but it's
183 been preserved inside the ICMP. */
184 if (!nf_ct_invert_tuple(&innertuple, &origtuple,
185 &nf_conntrack_l3proto_ipv4, innerproto)) {
186 DEBUGP("icmp_error_message: no match\n");
187 return -NF_ACCEPT;
188 }
189
190 *ctinfo = IP_CT_RELATED;
191
192 h = nf_conntrack_find_get(&innertuple, NULL);
193 if (!h) {
194 /* Locally generated ICMPs will match inverted if they
195 haven't been SNAT'ed yet */
196 /* FIXME: NAT code has to handle half-done double NAT --RR */
197 if (hooknum == NF_IP_LOCAL_OUT)
198 h = nf_conntrack_find_get(&origtuple, NULL);
199
200 if (!h) {
201 DEBUGP("icmp_error_message: no match\n");
202 return -NF_ACCEPT;
203 }
204
205 /* Reverse direction from that found */
206 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
207 *ctinfo += IP_CT_IS_REPLY;
208 } else {
209 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
210 *ctinfo += IP_CT_IS_REPLY;
211 }
212
213 /* Update skb to refer to this connection */
214 skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
215 skb->nfctinfo = *ctinfo;
216 return -NF_ACCEPT;
217}
218
219/* Small and modified version of icmp_rcv */
220static int
221icmp_error(struct sk_buff *skb, unsigned int dataoff,
222 enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum)
223{
224 struct icmphdr _ih, *icmph;
225
226 /* Not enough header? */
227 icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih);
228 if (icmph == NULL) {
229 if (LOG_INVALID(IPPROTO_ICMP))
230 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
231 "nf_ct_icmp: short packet ");
232 return -NF_ACCEPT;
233 }
234
235 /* See ip_conntrack_proto_tcp.c */
236 if (hooknum != NF_IP_PRE_ROUTING)
237 goto checksum_skipped;
238
239 switch (skb->ip_summed) {
240 case CHECKSUM_HW:
241 if (!(u16)csum_fold(skb->csum))
242 break;
243 if (LOG_INVALID(IPPROTO_ICMP))
244 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
245 "nf_ct_icmp: bad HW ICMP checksum ");
246 return -NF_ACCEPT;
247 case CHECKSUM_NONE:
248 if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) {
249 if (LOG_INVALID(IPPROTO_ICMP))
250 nf_log_packet(PF_INET, 0, skb, NULL, NULL,
251 NULL,
252 "nf_ct_icmp: bad ICMP checksum ");
253 return -NF_ACCEPT;
254 }
255 default:
256 break;
257 }
258
259checksum_skipped:
260 /*
261 * 18 is the highest 'known' ICMP type. Anything else is a mystery
262 *
263 * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently
264 * discarded.
265 */
266 if (icmph->type > NR_ICMP_TYPES) {
267 if (LOG_INVALID(IPPROTO_ICMP))
268 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
269 "nf_ct_icmp: invalid ICMP type ");
270 return -NF_ACCEPT;
271 }
272
273 /* Need to track icmp error message? */
274 if (icmph->type != ICMP_DEST_UNREACH
275 && icmph->type != ICMP_SOURCE_QUENCH
276 && icmph->type != ICMP_TIME_EXCEEDED
277 && icmph->type != ICMP_PARAMETERPROB
278 && icmph->type != ICMP_REDIRECT)
279 return NF_ACCEPT;
280
281 return icmp_error_message(skb, ctinfo, hooknum);
282}
283
284struct nf_conntrack_protocol nf_conntrack_protocol_icmp =
285{
286 .list = { NULL, NULL },
287 .l3proto = PF_INET,
288 .proto = IPPROTO_ICMP,
289 .name = "icmp",
290 .pkt_to_tuple = icmp_pkt_to_tuple,
291 .invert_tuple = icmp_invert_tuple,
292 .print_tuple = icmp_print_tuple,
293 .print_conntrack = icmp_print_conntrack,
294 .packet = icmp_packet,
295 .new = icmp_new,
296 .error = icmp_error,
297 .destroy = NULL,
298 .me = NULL
299};
300
301EXPORT_SYMBOL(nf_conntrack_protocol_icmp);
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 6e3480426939..a6026d2787d2 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -176,6 +176,11 @@ resubmit:
176 if (ipprot->flags & INET6_PROTO_FINAL) { 176 if (ipprot->flags & INET6_PROTO_FINAL) {
177 struct ipv6hdr *hdr; 177 struct ipv6hdr *hdr;
178 178
179 /* Free reference early: we don't need it any more,
180 and it may hold ip_conntrack module loaded
181 indefinitely. */
182 nf_reset(skb);
183
179 skb_postpull_rcsum(skb, skb->nh.raw, 184 skb_postpull_rcsum(skb, skb->nh.raw,
180 skb->h.raw - skb->nh.raw); 185 skb->h.raw - skb->nh.raw);
181 hdr = skb->nh.ipv6h; 186 hdr = skb->nh.ipv6h;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index dbd9767b32e4..c1fa693511a1 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -441,9 +441,15 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
441#ifdef CONFIG_NETFILTER 441#ifdef CONFIG_NETFILTER
442 to->nfmark = from->nfmark; 442 to->nfmark = from->nfmark;
443 /* Connection association is same as pre-frag packet */ 443 /* Connection association is same as pre-frag packet */
444 nf_conntrack_put(to->nfct);
444 to->nfct = from->nfct; 445 to->nfct = from->nfct;
445 nf_conntrack_get(to->nfct); 446 nf_conntrack_get(to->nfct);
446 to->nfctinfo = from->nfctinfo; 447 to->nfctinfo = from->nfctinfo;
448#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
449 nf_conntrack_put_reasm(to->nfct_reasm);
450 to->nfct_reasm = from->nfct_reasm;
451 nf_conntrack_get_reasm(to->nfct_reasm);
452#endif
447#ifdef CONFIG_BRIDGE_NETFILTER 453#ifdef CONFIG_BRIDGE_NETFILTER
448 nf_bridge_put(to->nf_bridge); 454 nf_bridge_put(to->nf_bridge);
449 to->nf_bridge = from->nf_bridge; 455 to->nf_bridge = from->nf_bridge;
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index bb7ccfe33f23..971ba60bf6e9 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -278,5 +278,19 @@ config IP6_NF_RAW
278 If you want to compile it as a module, say M here and read 278 If you want to compile it as a module, say M here and read
279 <file:Documentation/modules.txt>. If unsure, say `N'. 279 <file:Documentation/modules.txt>. If unsure, say `N'.
280 280
281config NF_CONNTRACK_IPV6
282 tristate "IPv6 support for new connection tracking (EXPERIMENTAL)"
283 depends on EXPERIMENTAL && NF_CONNTRACK
284 ---help---
285 Connection tracking keeps a record of what packets have passed
286 through your machine, in order to figure out how they are related
287 into connections.
288
289 This is IPv6 support on Layer 3 independent connection tracking.
290 Layer 3 independent connection tracking is experimental scheme
291 which generalize ip_conntrack to support other layer 3 protocols.
292
293 To compile it as a module, choose M here. If unsure, say N.
294
281endmenu 295endmenu
282 296
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 2b2c370e8b1c..9ab5b2ca1f59 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -27,3 +27,9 @@ obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o
27obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o 27obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
28obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o 28obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o
29obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o 29obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
30
31# objects for l3 independent conntrack
32nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o
33
34# l3 independent conntrack
35obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o
diff --git a/net/ipv6/netfilter/ip6t_MARK.c b/net/ipv6/netfilter/ip6t_MARK.c
index 0c7584f92172..eab8fb864ee0 100644
--- a/net/ipv6/netfilter/ip6t_MARK.c
+++ b/net/ipv6/netfilter/ip6t_MARK.c
@@ -56,9 +56,9 @@ checkentry(const char *tablename,
56 return 1; 56 return 1;
57} 57}
58 58
59static struct ip6t_target ip6t_mark_reg = { 59static struct ip6t_target ip6t_mark_reg = {
60 .name = "MARK", 60 .name = "MARK",
61 .target = target, 61 .target = target,
62 .checkentry = checkentry, 62 .checkentry = checkentry,
63 .me = THIS_MODULE 63 .me = THIS_MODULE
64}; 64};
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
new file mode 100644
index 000000000000..e2c90b3a8074
--- /dev/null
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -0,0 +1,556 @@
1/*
2 * Copyright (C)2004 USAGI/WIDE Project
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Author:
9 * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
10 *
11 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
12 * - support Layer 3 protocol independent connection tracking.
13 * Based on the original ip_conntrack code which had the following
14 * copyright information:
15 * (C) 1999-2001 Paul `Rusty' Russell
16 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
17 *
18 * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
19 * - add get_features() to support various size of conntrack
20 * structures.
21 */
22
23#include <linux/config.h>
24#include <linux/types.h>
25#include <linux/ipv6.h>
26#include <linux/in6.h>
27#include <linux/netfilter.h>
28#include <linux/module.h>
29#include <linux/skbuff.h>
30#include <linux/icmp.h>
31#include <linux/sysctl.h>
32#include <net/ipv6.h>
33
34#include <linux/netfilter_ipv6.h>
35#include <net/netfilter/nf_conntrack.h>
36#include <net/netfilter/nf_conntrack_helper.h>
37#include <net/netfilter/nf_conntrack_protocol.h>
38#include <net/netfilter/nf_conntrack_l3proto.h>
39#include <net/netfilter/nf_conntrack_core.h>
40
41#if 0
42#define DEBUGP printk
43#else
44#define DEBUGP(format, args...)
45#endif
46
47DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
48
49static int ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
50 struct nf_conntrack_tuple *tuple)
51{
52 u_int32_t _addrs[8], *ap;
53
54 ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr),
55 sizeof(_addrs), _addrs);
56 if (ap == NULL)
57 return 0;
58
59 memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6));
60 memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6));
61
62 return 1;
63}
64
65static int ipv6_invert_tuple(struct nf_conntrack_tuple *tuple,
66 const struct nf_conntrack_tuple *orig)
67{
68 memcpy(tuple->src.u3.ip6, orig->dst.u3.ip6, sizeof(tuple->src.u3.ip6));
69 memcpy(tuple->dst.u3.ip6, orig->src.u3.ip6, sizeof(tuple->dst.u3.ip6));
70
71 return 1;
72}
73
74static int ipv6_print_tuple(struct seq_file *s,
75 const struct nf_conntrack_tuple *tuple)
76{
77 return seq_printf(s, "src=%x:%x:%x:%x:%x:%x:%x:%x dst=%x:%x:%x:%x:%x:%x:%x:%x ",
78 NIP6(*((struct in6_addr *)tuple->src.u3.ip6)),
79 NIP6(*((struct in6_addr *)tuple->dst.u3.ip6)));
80}
81
82static int ipv6_print_conntrack(struct seq_file *s,
83 const struct nf_conn *conntrack)
84{
85 return 0;
86}
87
88/*
89 * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c
90 *
91 * This function parses (probably truncated) exthdr set "hdr"
92 * of length "len". "nexthdrp" initially points to some place,
93 * where type of the first header can be found.
94 *
95 * It skips all well-known exthdrs, and returns pointer to the start
96 * of unparsable area i.e. the first header with unknown type.
97 * if success, *nexthdr is updated by type/protocol of this header.
98 *
99 * NOTES: - it may return pointer pointing beyond end of packet,
100 * if the last recognized header is truncated in the middle.
101 * - if packet is truncated, so that all parsed headers are skipped,
102 * it returns -1.
103 * - if packet is fragmented, return pointer of the fragment header.
104 * - ESP is unparsable for now and considered like
105 * normal payload protocol.
106 * - Note also special handling of AUTH header. Thanks to IPsec wizards.
107 */
108
109int nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp,
110 int len)
111{
112 u8 nexthdr = *nexthdrp;
113
114 while (ipv6_ext_hdr(nexthdr)) {
115 struct ipv6_opt_hdr hdr;
116 int hdrlen;
117
118 if (len < (int)sizeof(struct ipv6_opt_hdr))
119 return -1;
120 if (nexthdr == NEXTHDR_NONE)
121 break;
122 if (nexthdr == NEXTHDR_FRAGMENT)
123 break;
124 if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
125 BUG();
126 if (nexthdr == NEXTHDR_AUTH)
127 hdrlen = (hdr.hdrlen+2)<<2;
128 else
129 hdrlen = ipv6_optlen(&hdr);
130
131 nexthdr = hdr.nexthdr;
132 len -= hdrlen;
133 start += hdrlen;
134 }
135
136 *nexthdrp = nexthdr;
137 return start;
138}
139
140static int
141ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
142 u_int8_t *protonum)
143{
144 unsigned int extoff;
145 unsigned char pnum;
146 int protoff;
147
148 extoff = (u8*)((*pskb)->nh.ipv6h + 1) - (*pskb)->data;
149 pnum = (*pskb)->nh.ipv6h->nexthdr;
150
151 protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
152 (*pskb)->len - extoff);
153
154 /*
155 * (protoff == (*pskb)->len) mean that the packet doesn't have no data
156 * except of IPv6 & ext headers. but it's tracked anyway. - YK
157 */
158 if ((protoff < 0) || (protoff > (*pskb)->len)) {
159 DEBUGP("ip6_conntrack_core: can't find proto in pkt\n");
160 NF_CT_STAT_INC(error);
161 NF_CT_STAT_INC(invalid);
162 return -NF_ACCEPT;
163 }
164
165 *dataoff = protoff;
166 *protonum = pnum;
167 return NF_ACCEPT;
168}
169
170static u_int32_t ipv6_get_features(const struct nf_conntrack_tuple *tuple)
171{
172 return NF_CT_F_BASIC;
173}
174
175static unsigned int ipv6_confirm(unsigned int hooknum,
176 struct sk_buff **pskb,
177 const struct net_device *in,
178 const struct net_device *out,
179 int (*okfn)(struct sk_buff *))
180{
181 struct nf_conn *ct;
182 enum ip_conntrack_info ctinfo;
183
184 /* This is where we call the helper: as the packet goes out. */
185 ct = nf_ct_get(*pskb, &ctinfo);
186 if (ct && ct->helper) {
187 unsigned int ret, protoff;
188 unsigned int extoff = (u8*)((*pskb)->nh.ipv6h + 1)
189 - (*pskb)->data;
190 unsigned char pnum = (*pskb)->nh.ipv6h->nexthdr;
191
192 protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
193 (*pskb)->len - extoff);
194 if (protoff < 0 || protoff > (*pskb)->len ||
195 pnum == NEXTHDR_FRAGMENT) {
196 DEBUGP("proto header not found\n");
197 return NF_ACCEPT;
198 }
199
200 ret = ct->helper->help(pskb, protoff, ct, ctinfo);
201 if (ret != NF_ACCEPT)
202 return ret;
203 }
204
205 /* We've seen it coming out the other side: confirm it */
206
207 return nf_conntrack_confirm(pskb);
208}
209
210extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb);
211extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
212 struct net_device *in,
213 struct net_device *out,
214 int (*okfn)(struct sk_buff *));
215static unsigned int ipv6_defrag(unsigned int hooknum,
216 struct sk_buff **pskb,
217 const struct net_device *in,
218 const struct net_device *out,
219 int (*okfn)(struct sk_buff *))
220{
221 struct sk_buff *reasm;
222
223 /* Previously seen (loopback)? */
224 if ((*pskb)->nfct)
225 return NF_ACCEPT;
226
227 reasm = nf_ct_frag6_gather(*pskb);
228
229 /* queued */
230 if (reasm == NULL)
231 return NF_STOLEN;
232
233 /* error occured or not fragmented */
234 if (reasm == *pskb)
235 return NF_ACCEPT;
236
237 nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
238 (struct net_device *)out, okfn);
239
240 return NF_STOLEN;
241}
242
243static unsigned int ipv6_conntrack_in(unsigned int hooknum,
244 struct sk_buff **pskb,
245 const struct net_device *in,
246 const struct net_device *out,
247 int (*okfn)(struct sk_buff *))
248{
249 struct sk_buff *reasm = (*pskb)->nfct_reasm;
250
251 /* This packet is fragmented and has reassembled packet. */
252 if (reasm) {
253 /* Reassembled packet isn't parsed yet ? */
254 if (!reasm->nfct) {
255 unsigned int ret;
256
257 ret = nf_conntrack_in(PF_INET6, hooknum, &reasm);
258 if (ret != NF_ACCEPT)
259 return ret;
260 }
261 nf_conntrack_get(reasm->nfct);
262 (*pskb)->nfct = reasm->nfct;
263 return NF_ACCEPT;
264 }
265
266 return nf_conntrack_in(PF_INET6, hooknum, pskb);
267}
268
269static unsigned int ipv6_conntrack_local(unsigned int hooknum,
270 struct sk_buff **pskb,
271 const struct net_device *in,
272 const struct net_device *out,
273 int (*okfn)(struct sk_buff *))
274{
275 /* root is playing with raw sockets. */
276 if ((*pskb)->len < sizeof(struct ipv6hdr)) {
277 if (net_ratelimit())
278 printk("ipv6_conntrack_local: packet too short\n");
279 return NF_ACCEPT;
280 }
281 return ipv6_conntrack_in(hooknum, pskb, in, out, okfn);
282}
283
284/* Connection tracking may drop packets, but never alters them, so
285 make it the first hook. */
286static struct nf_hook_ops ipv6_conntrack_defrag_ops = {
287 .hook = ipv6_defrag,
288 .owner = THIS_MODULE,
289 .pf = PF_INET6,
290 .hooknum = NF_IP6_PRE_ROUTING,
291 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
292};
293
294static struct nf_hook_ops ipv6_conntrack_in_ops = {
295 .hook = ipv6_conntrack_in,
296 .owner = THIS_MODULE,
297 .pf = PF_INET6,
298 .hooknum = NF_IP6_PRE_ROUTING,
299 .priority = NF_IP6_PRI_CONNTRACK,
300};
301
302static struct nf_hook_ops ipv6_conntrack_local_out_ops = {
303 .hook = ipv6_conntrack_local,
304 .owner = THIS_MODULE,
305 .pf = PF_INET6,
306 .hooknum = NF_IP6_LOCAL_OUT,
307 .priority = NF_IP6_PRI_CONNTRACK,
308};
309
310static struct nf_hook_ops ipv6_conntrack_defrag_local_out_ops = {
311 .hook = ipv6_defrag,
312 .owner = THIS_MODULE,
313 .pf = PF_INET6,
314 .hooknum = NF_IP6_LOCAL_OUT,
315 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
316};
317
318/* Refragmenter; last chance. */
319static struct nf_hook_ops ipv6_conntrack_out_ops = {
320 .hook = ipv6_confirm,
321 .owner = THIS_MODULE,
322 .pf = PF_INET6,
323 .hooknum = NF_IP6_POST_ROUTING,
324 .priority = NF_IP6_PRI_LAST,
325};
326
327static struct nf_hook_ops ipv6_conntrack_local_in_ops = {
328 .hook = ipv6_confirm,
329 .owner = THIS_MODULE,
330 .pf = PF_INET6,
331 .hooknum = NF_IP6_LOCAL_IN,
332 .priority = NF_IP6_PRI_LAST-1,
333};
334
335#ifdef CONFIG_SYSCTL
336
337/* From nf_conntrack_proto_icmpv6.c */
338extern unsigned long nf_ct_icmpv6_timeout;
339
340/* From nf_conntrack_frag6.c */
341extern unsigned long nf_ct_frag6_timeout;
342extern unsigned long nf_ct_frag6_low_thresh;
343extern unsigned long nf_ct_frag6_high_thresh;
344
345static struct ctl_table_header *nf_ct_ipv6_sysctl_header;
346
347static ctl_table nf_ct_sysctl_table[] = {
348 {
349 .ctl_name = NET_NF_CONNTRACK_ICMPV6_TIMEOUT,
350 .procname = "nf_conntrack_icmpv6_timeout",
351 .data = &nf_ct_icmpv6_timeout,
352 .maxlen = sizeof(unsigned int),
353 .mode = 0644,
354 .proc_handler = &proc_dointvec_jiffies,
355 },
356 {
357 .ctl_name = NET_NF_CONNTRACK_FRAG6_TIMEOUT,
358 .procname = "nf_conntrack_frag6_timeout",
359 .data = &nf_ct_frag6_timeout,
360 .maxlen = sizeof(unsigned int),
361 .mode = 0644,
362 .proc_handler = &proc_dointvec_jiffies,
363 },
364 {
365 .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH,
366 .procname = "nf_conntrack_frag6_low_thresh",
367 .data = &nf_ct_frag6_low_thresh,
368 .maxlen = sizeof(unsigned int),
369 .mode = 0644,
370 .proc_handler = &proc_dointvec_jiffies,
371 },
372 {
373 .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH,
374 .procname = "nf_conntrack_frag6_high_thresh",
375 .data = &nf_ct_frag6_high_thresh,
376 .maxlen = sizeof(unsigned int),
377 .mode = 0644,
378 .proc_handler = &proc_dointvec_jiffies,
379 },
380 { .ctl_name = 0 }
381};
382
383static ctl_table nf_ct_netfilter_table[] = {
384 {
385 .ctl_name = NET_NETFILTER,
386 .procname = "netfilter",
387 .mode = 0555,
388 .child = nf_ct_sysctl_table,
389 },
390 { .ctl_name = 0 }
391};
392
393static ctl_table nf_ct_net_table[] = {
394 {
395 .ctl_name = CTL_NET,
396 .procname = "net",
397 .mode = 0555,
398 .child = nf_ct_netfilter_table,
399 },
400 { .ctl_name = 0 }
401};
402#endif
403
404struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
405 .l3proto = PF_INET6,
406 .name = "ipv6",
407 .pkt_to_tuple = ipv6_pkt_to_tuple,
408 .invert_tuple = ipv6_invert_tuple,
409 .print_tuple = ipv6_print_tuple,
410 .print_conntrack = ipv6_print_conntrack,
411 .prepare = ipv6_prepare,
412 .get_features = ipv6_get_features,
413 .me = THIS_MODULE,
414};
415
416extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp6;
417extern struct nf_conntrack_protocol nf_conntrack_protocol_udp6;
418extern struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6;
419extern int nf_ct_frag6_init(void);
420extern void nf_ct_frag6_cleanup(void);
421static int init_or_cleanup(int init)
422{
423 int ret = 0;
424
425 if (!init) goto cleanup;
426
427 ret = nf_ct_frag6_init();
428 if (ret < 0) {
429 printk("nf_conntrack_ipv6: can't initialize frag6.\n");
430 goto cleanup_nothing;
431 }
432 ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp6);
433 if (ret < 0) {
434 printk("nf_conntrack_ipv6: can't register tcp.\n");
435 goto cleanup_frag6;
436 }
437
438 ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp6);
439 if (ret < 0) {
440 printk("nf_conntrack_ipv6: can't register udp.\n");
441 goto cleanup_tcp;
442 }
443
444 ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmpv6);
445 if (ret < 0) {
446 printk("nf_conntrack_ipv6: can't register icmpv6.\n");
447 goto cleanup_udp;
448 }
449
450 ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv6);
451 if (ret < 0) {
452 printk("nf_conntrack_ipv6: can't register ipv6\n");
453 goto cleanup_icmpv6;
454 }
455
456 ret = nf_register_hook(&ipv6_conntrack_defrag_ops);
457 if (ret < 0) {
458 printk("nf_conntrack_ipv6: can't register pre-routing defrag "
459 "hook.\n");
460 goto cleanup_ipv6;
461 }
462
463 ret = nf_register_hook(&ipv6_conntrack_defrag_local_out_ops);
464 if (ret < 0) {
465 printk("nf_conntrack_ipv6: can't register local_out defrag "
466 "hook.\n");
467 goto cleanup_defragops;
468 }
469
470 ret = nf_register_hook(&ipv6_conntrack_in_ops);
471 if (ret < 0) {
472 printk("nf_conntrack_ipv6: can't register pre-routing hook.\n");
473 goto cleanup_defraglocalops;
474 }
475
476 ret = nf_register_hook(&ipv6_conntrack_local_out_ops);
477 if (ret < 0) {
478 printk("nf_conntrack_ipv6: can't register local out hook.\n");
479 goto cleanup_inops;
480 }
481
482 ret = nf_register_hook(&ipv6_conntrack_out_ops);
483 if (ret < 0) {
484 printk("nf_conntrack_ipv6: can't register post-routing hook.\n");
485 goto cleanup_inandlocalops;
486 }
487
488 ret = nf_register_hook(&ipv6_conntrack_local_in_ops);
489 if (ret < 0) {
490 printk("nf_conntrack_ipv6: can't register local in hook.\n");
491 goto cleanup_inoutandlocalops;
492 }
493
494#ifdef CONFIG_SYSCTL
495 nf_ct_ipv6_sysctl_header = register_sysctl_table(nf_ct_net_table, 0);
496 if (nf_ct_ipv6_sysctl_header == NULL) {
497 printk("nf_conntrack: can't register to sysctl.\n");
498 ret = -ENOMEM;
499 goto cleanup_localinops;
500 }
501#endif
502 return ret;
503
504 cleanup:
505 synchronize_net();
506#ifdef CONFIG_SYSCTL
507 unregister_sysctl_table(nf_ct_ipv6_sysctl_header);
508 cleanup_localinops:
509#endif
510 nf_unregister_hook(&ipv6_conntrack_local_in_ops);
511 cleanup_inoutandlocalops:
512 nf_unregister_hook(&ipv6_conntrack_out_ops);
513 cleanup_inandlocalops:
514 nf_unregister_hook(&ipv6_conntrack_local_out_ops);
515 cleanup_inops:
516 nf_unregister_hook(&ipv6_conntrack_in_ops);
517 cleanup_defraglocalops:
518 nf_unregister_hook(&ipv6_conntrack_defrag_local_out_ops);
519 cleanup_defragops:
520 nf_unregister_hook(&ipv6_conntrack_defrag_ops);
521 cleanup_ipv6:
522 nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
523 cleanup_icmpv6:
524 nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmpv6);
525 cleanup_udp:
526 nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp6);
527 cleanup_tcp:
528 nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp6);
529 cleanup_frag6:
530 nf_ct_frag6_cleanup();
531 cleanup_nothing:
532 return ret;
533}
534
535MODULE_LICENSE("GPL");
536MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>");
537
538static int __init init(void)
539{
540 need_nf_conntrack();
541 return init_or_cleanup(1);
542}
543
544static void __exit fini(void)
545{
546 init_or_cleanup(0);
547}
548
549module_init(init);
550module_exit(fini);
551
552void need_ip6_conntrack(void)
553{
554}
555
556EXPORT_SYMBOL(need_ip6_conntrack);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
new file mode 100644
index 000000000000..c0f1da5497a9
--- /dev/null
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -0,0 +1,272 @@
1/*
2 * Copyright (C)2003,2004 USAGI/WIDE Project
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Author:
9 * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
10 *
11 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
12 * - ICMPv6 tracking support. Derived from the original ip_conntrack code
13 * net/ipv4/netfilter/ip_conntrack_proto_icmp.c which had the following
14 * copyright information:
15 * (C) 1999-2001 Paul `Rusty' Russell
16 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
17 */
18
19#include <linux/types.h>
20#include <linux/sched.h>
21#include <linux/timer.h>
22#include <linux/module.h>
23#include <linux/netfilter.h>
24#include <linux/in6.h>
25#include <linux/icmpv6.h>
26#include <linux/ipv6.h>
27#include <net/ipv6.h>
28#include <net/ip6_checksum.h>
29#include <linux/seq_file.h>
30#include <linux/netfilter_ipv6.h>
31#include <net/netfilter/nf_conntrack_tuple.h>
32#include <net/netfilter/nf_conntrack_protocol.h>
33#include <net/netfilter/nf_conntrack_core.h>
34#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
35
36unsigned long nf_ct_icmpv6_timeout = 30*HZ;
37
38#if 0
39#define DEBUGP printk
40#else
41#define DEBUGP(format, args...)
42#endif
43
44static int icmpv6_pkt_to_tuple(const struct sk_buff *skb,
45 unsigned int dataoff,
46 struct nf_conntrack_tuple *tuple)
47{
48 struct icmp6hdr _hdr, *hp;
49
50 hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
51 if (hp == NULL)
52 return 0;
53 tuple->dst.u.icmp.type = hp->icmp6_type;
54 tuple->src.u.icmp.id = hp->icmp6_identifier;
55 tuple->dst.u.icmp.code = hp->icmp6_code;
56
57 return 1;
58}
59
60static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
61 const struct nf_conntrack_tuple *orig)
62{
63 /* Add 1; spaces filled with 0. */
64 static u_int8_t invmap[] = {
65 [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1,
66 [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1,
67 [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_QUERY + 1,
68 [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1
69 };
70
71 __u8 type = orig->dst.u.icmp.type - 128;
72 if (type >= sizeof(invmap) || !invmap[type])
73 return 0;
74
75 tuple->src.u.icmp.id = orig->src.u.icmp.id;
76 tuple->dst.u.icmp.type = invmap[type] - 1;
77 tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
78 return 1;
79}
80
81/* Print out the per-protocol part of the tuple. */
82static int icmpv6_print_tuple(struct seq_file *s,
83 const struct nf_conntrack_tuple *tuple)
84{
85 return seq_printf(s, "type=%u code=%u id=%u ",
86 tuple->dst.u.icmp.type,
87 tuple->dst.u.icmp.code,
88 ntohs(tuple->src.u.icmp.id));
89}
90
91/* Print out the private part of the conntrack. */
92static int icmpv6_print_conntrack(struct seq_file *s,
93 const struct nf_conn *conntrack)
94{
95 return 0;
96}
97
98/* Returns verdict for packet, or -1 for invalid. */
99static int icmpv6_packet(struct nf_conn *ct,
100 const struct sk_buff *skb,
101 unsigned int dataoff,
102 enum ip_conntrack_info ctinfo,
103 int pf,
104 unsigned int hooknum)
105{
106 /* Try to delete connection immediately after all replies:
107 won't actually vanish as we still have skb, and del_timer
108 means this will only run once even if count hits zero twice
109 (theoretically possible with SMP) */
110 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
111 if (atomic_dec_and_test(&ct->proto.icmp.count)
112 && del_timer(&ct->timeout))
113 ct->timeout.function((unsigned long)ct);
114 } else {
115 atomic_inc(&ct->proto.icmp.count);
116 nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
117 nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout);
118 }
119
120 return NF_ACCEPT;
121}
122
123/* Called when a new connection for this protocol found. */
124static int icmpv6_new(struct nf_conn *conntrack,
125 const struct sk_buff *skb,
126 unsigned int dataoff)
127{
128 static u_int8_t valid_new[] = {
129 [ICMPV6_ECHO_REQUEST - 128] = 1,
130 [ICMPV6_NI_QUERY - 128] = 1
131 };
132
133 if (conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128 >= sizeof(valid_new)
134 || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128]) {
135 /* Can't create a new ICMPv6 `conn' with this. */
136 DEBUGP("icmp: can't create new conn with type %u\n",
137 conntrack->tuplehash[0].tuple.dst.u.icmp.type);
138 NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
139 return 0;
140 }
141 atomic_set(&conntrack->proto.icmp.count, 0);
142 return 1;
143}
144
145extern int
146nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, int len);
147extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6;
148static int
149icmpv6_error_message(struct sk_buff *skb,
150 unsigned int icmp6off,
151 enum ip_conntrack_info *ctinfo,
152 unsigned int hooknum)
153{
154 struct nf_conntrack_tuple intuple, origtuple;
155 struct nf_conntrack_tuple_hash *h;
156 struct icmp6hdr _hdr, *hp;
157 unsigned int inip6off;
158 struct nf_conntrack_protocol *inproto;
159 u_int8_t inprotonum;
160 unsigned int inprotoff;
161
162 NF_CT_ASSERT(skb->nfct == NULL);
163
164 hp = skb_header_pointer(skb, icmp6off, sizeof(_hdr), &_hdr);
165 if (hp == NULL) {
166 DEBUGP("icmpv6_error: Can't get ICMPv6 hdr.\n");
167 return -NF_ACCEPT;
168 }
169
170 inip6off = icmp6off + sizeof(_hdr);
171 if (skb_copy_bits(skb, inip6off+offsetof(struct ipv6hdr, nexthdr),
172 &inprotonum, sizeof(inprotonum)) != 0) {
173 DEBUGP("icmpv6_error: Can't get nexthdr in inner IPv6 header.\n");
174 return -NF_ACCEPT;
175 }
176 inprotoff = nf_ct_ipv6_skip_exthdr(skb,
177 inip6off + sizeof(struct ipv6hdr),
178 &inprotonum,
179 skb->len - inip6off
180 - sizeof(struct ipv6hdr));
181
182 if ((inprotoff < 0) || (inprotoff > skb->len) ||
183 (inprotonum == NEXTHDR_FRAGMENT)) {
184 DEBUGP("icmpv6_error: Can't get protocol header in ICMPv6 payload.\n");
185 return -NF_ACCEPT;
186 }
187
188 inproto = nf_ct_find_proto(PF_INET6, inprotonum);
189
190 /* Are they talking about one of our connections? */
191 if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum,
192 &origtuple, &nf_conntrack_l3proto_ipv6, inproto)) {
193 DEBUGP("icmpv6_error: Can't get tuple\n");
194 return -NF_ACCEPT;
195 }
196
197 /* Ordinarily, we'd expect the inverted tupleproto, but it's
198 been preserved inside the ICMP. */
199 if (!nf_ct_invert_tuple(&intuple, &origtuple,
200 &nf_conntrack_l3proto_ipv6, inproto)) {
201 DEBUGP("icmpv6_error: Can't invert tuple\n");
202 return -NF_ACCEPT;
203 }
204
205 *ctinfo = IP_CT_RELATED;
206
207 h = nf_conntrack_find_get(&intuple, NULL);
208 if (!h) {
209 DEBUGP("icmpv6_error: no match\n");
210 return -NF_ACCEPT;
211 } else {
212 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
213 *ctinfo += IP_CT_IS_REPLY;
214 }
215
216 /* Update skb to refer to this connection */
217 skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
218 skb->nfctinfo = *ctinfo;
219 return -NF_ACCEPT;
220}
221
222static int
223icmpv6_error(struct sk_buff *skb, unsigned int dataoff,
224 enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum)
225{
226 struct icmp6hdr _ih, *icmp6h;
227
228 icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
229 if (icmp6h == NULL) {
230 if (LOG_INVALID(IPPROTO_ICMPV6))
231 nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
232 "nf_ct_icmpv6: short packet ");
233 return -NF_ACCEPT;
234 }
235
236 if (hooknum != NF_IP6_PRE_ROUTING)
237 goto skipped;
238
239 /* Ignore it if the checksum's bogus. */
240 if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,
241 skb->len - dataoff, IPPROTO_ICMPV6,
242 skb_checksum(skb, dataoff,
243 skb->len - dataoff, 0))) {
244 nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
245 "nf_ct_icmpv6: ICMPv6 checksum failed\n");
246 return -NF_ACCEPT;
247 }
248
249skipped:
250
251 /* is not error message ? */
252 if (icmp6h->icmp6_type >= 128)
253 return NF_ACCEPT;
254
255 return icmpv6_error_message(skb, dataoff, ctinfo, hooknum);
256}
257
258struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 =
259{
260 .l3proto = PF_INET6,
261 .proto = IPPROTO_ICMPV6,
262 .name = "icmpv6",
263 .pkt_to_tuple = icmpv6_pkt_to_tuple,
264 .invert_tuple = icmpv6_invert_tuple,
265 .print_tuple = icmpv6_print_tuple,
266 .print_conntrack = icmpv6_print_conntrack,
267 .packet = icmpv6_packet,
268 .new = icmpv6_new,
269 .error = icmpv6_error,
270};
271
272EXPORT_SYMBOL(nf_conntrack_protocol_icmpv6);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
new file mode 100644
index 000000000000..7640b9bb7694
--- /dev/null
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -0,0 +1,885 @@
1/*
2 * IPv6 fragment reassembly for connection tracking
3 *
4 * Copyright (C)2004 USAGI/WIDE Project
5 *
6 * Author:
7 * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
8 *
9 * Based on: net/ipv6/reassembly.c
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version
14 * 2 of the License, or (at your option) any later version.
15 */
16
17#include <linux/config.h>
18#include <linux/errno.h>
19#include <linux/types.h>
20#include <linux/string.h>
21#include <linux/socket.h>
22#include <linux/sockios.h>
23#include <linux/jiffies.h>
24#include <linux/net.h>
25#include <linux/list.h>
26#include <linux/netdevice.h>
27#include <linux/in6.h>
28#include <linux/ipv6.h>
29#include <linux/icmpv6.h>
30#include <linux/random.h>
31#include <linux/jhash.h>
32
33#include <net/sock.h>
34#include <net/snmp.h>
35
36#include <net/ipv6.h>
37#include <net/protocol.h>
38#include <net/transp_v6.h>
39#include <net/rawv6.h>
40#include <net/ndisc.h>
41#include <net/addrconf.h>
42#include <linux/sysctl.h>
43#include <linux/netfilter.h>
44#include <linux/netfilter_ipv6.h>
45#include <linux/kernel.h>
46#include <linux/module.h>
47
48#if 0
49#define DEBUGP printk
50#else
51#define DEBUGP(format, args...)
52#endif
53
54#define NF_CT_FRAG6_HIGH_THRESH 262144 /* == 256*1024 */
55#define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */
56#define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT
57
58int nf_ct_frag6_high_thresh = 256*1024;
59int nf_ct_frag6_low_thresh = 192*1024;
60int nf_ct_frag6_timeout = IPV6_FRAG_TIMEOUT;
61
62struct nf_ct_frag6_skb_cb
63{
64 struct inet6_skb_parm h;
65 int offset;
66 struct sk_buff *orig;
67};
68
69#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb))
70
71struct nf_ct_frag6_queue
72{
73 struct nf_ct_frag6_queue *next;
74 struct list_head lru_list; /* lru list member */
75
76 __u32 id; /* fragment id */
77 struct in6_addr saddr;
78 struct in6_addr daddr;
79
80 spinlock_t lock;
81 atomic_t refcnt;
82 struct timer_list timer; /* expire timer */
83 struct sk_buff *fragments;
84 int len;
85 int meat;
86 struct timeval stamp;
87 unsigned int csum;
88 __u8 last_in; /* has first/last segment arrived? */
89#define COMPLETE 4
90#define FIRST_IN 2
91#define LAST_IN 1
92 __u16 nhoffset;
93 struct nf_ct_frag6_queue **pprev;
94};
95
96/* Hash table. */
97
98#define FRAG6Q_HASHSZ 64
99
100static struct nf_ct_frag6_queue *nf_ct_frag6_hash[FRAG6Q_HASHSZ];
101static rwlock_t nf_ct_frag6_lock = RW_LOCK_UNLOCKED;
102static u32 nf_ct_frag6_hash_rnd;
103static LIST_HEAD(nf_ct_frag6_lru_list);
104int nf_ct_frag6_nqueues = 0;
105
106static __inline__ void __fq_unlink(struct nf_ct_frag6_queue *fq)
107{
108 if (fq->next)
109 fq->next->pprev = fq->pprev;
110 *fq->pprev = fq->next;
111 list_del(&fq->lru_list);
112 nf_ct_frag6_nqueues--;
113}
114
115static __inline__ void fq_unlink(struct nf_ct_frag6_queue *fq)
116{
117 write_lock(&nf_ct_frag6_lock);
118 __fq_unlink(fq);
119 write_unlock(&nf_ct_frag6_lock);
120}
121
122static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr,
123 struct in6_addr *daddr)
124{
125 u32 a, b, c;
126
127 a = saddr->s6_addr32[0];
128 b = saddr->s6_addr32[1];
129 c = saddr->s6_addr32[2];
130
131 a += JHASH_GOLDEN_RATIO;
132 b += JHASH_GOLDEN_RATIO;
133 c += nf_ct_frag6_hash_rnd;
134 __jhash_mix(a, b, c);
135
136 a += saddr->s6_addr32[3];
137 b += daddr->s6_addr32[0];
138 c += daddr->s6_addr32[1];
139 __jhash_mix(a, b, c);
140
141 a += daddr->s6_addr32[2];
142 b += daddr->s6_addr32[3];
143 c += id;
144 __jhash_mix(a, b, c);
145
146 return c & (FRAG6Q_HASHSZ - 1);
147}
148
149static struct timer_list nf_ct_frag6_secret_timer;
150int nf_ct_frag6_secret_interval = 10 * 60 * HZ;
151
152static void nf_ct_frag6_secret_rebuild(unsigned long dummy)
153{
154 unsigned long now = jiffies;
155 int i;
156
157 write_lock(&nf_ct_frag6_lock);
158 get_random_bytes(&nf_ct_frag6_hash_rnd, sizeof(u32));
159 for (i = 0; i < FRAG6Q_HASHSZ; i++) {
160 struct nf_ct_frag6_queue *q;
161
162 q = nf_ct_frag6_hash[i];
163 while (q) {
164 struct nf_ct_frag6_queue *next = q->next;
165 unsigned int hval = ip6qhashfn(q->id,
166 &q->saddr,
167 &q->daddr);
168
169 if (hval != i) {
170 /* Unlink. */
171 if (q->next)
172 q->next->pprev = q->pprev;
173 *q->pprev = q->next;
174
175 /* Relink to new hash chain. */
176 if ((q->next = nf_ct_frag6_hash[hval]) != NULL)
177 q->next->pprev = &q->next;
178 nf_ct_frag6_hash[hval] = q;
179 q->pprev = &nf_ct_frag6_hash[hval];
180 }
181
182 q = next;
183 }
184 }
185 write_unlock(&nf_ct_frag6_lock);
186
187 mod_timer(&nf_ct_frag6_secret_timer, now + nf_ct_frag6_secret_interval);
188}
189
190atomic_t nf_ct_frag6_mem = ATOMIC_INIT(0);
191
192/* Memory Tracking Functions. */
193static inline void frag_kfree_skb(struct sk_buff *skb)
194{
195 atomic_sub(skb->truesize, &nf_ct_frag6_mem);
196 if (NFCT_FRAG6_CB(skb)->orig)
197 kfree_skb(NFCT_FRAG6_CB(skb)->orig);
198
199 kfree_skb(skb);
200}
201
202static inline void frag_free_queue(struct nf_ct_frag6_queue *fq)
203{
204 atomic_sub(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem);
205 kfree(fq);
206}
207
208static inline struct nf_ct_frag6_queue *frag_alloc_queue(void)
209{
210 struct nf_ct_frag6_queue *fq = kmalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC);
211
212 if (!fq)
213 return NULL;
214 atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem);
215 return fq;
216}
217
218/* Destruction primitives. */
219
220/* Complete destruction of fq. */
221static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq)
222{
223 struct sk_buff *fp;
224
225 BUG_TRAP(fq->last_in&COMPLETE);
226 BUG_TRAP(del_timer(&fq->timer) == 0);
227
228 /* Release all fragment data. */
229 fp = fq->fragments;
230 while (fp) {
231 struct sk_buff *xp = fp->next;
232
233 frag_kfree_skb(fp);
234 fp = xp;
235 }
236
237 frag_free_queue(fq);
238}
239
240static __inline__ void fq_put(struct nf_ct_frag6_queue *fq)
241{
242 if (atomic_dec_and_test(&fq->refcnt))
243 nf_ct_frag6_destroy(fq);
244}
245
246/* Kill fq entry. It is not destroyed immediately,
247 * because caller (and someone more) holds reference count.
248 */
249static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
250{
251 if (del_timer(&fq->timer))
252 atomic_dec(&fq->refcnt);
253
254 if (!(fq->last_in & COMPLETE)) {
255 fq_unlink(fq);
256 atomic_dec(&fq->refcnt);
257 fq->last_in |= COMPLETE;
258 }
259}
260
261static void nf_ct_frag6_evictor(void)
262{
263 struct nf_ct_frag6_queue *fq;
264 struct list_head *tmp;
265
266 for (;;) {
267 if (atomic_read(&nf_ct_frag6_mem) <= nf_ct_frag6_low_thresh)
268 return;
269 read_lock(&nf_ct_frag6_lock);
270 if (list_empty(&nf_ct_frag6_lru_list)) {
271 read_unlock(&nf_ct_frag6_lock);
272 return;
273 }
274 tmp = nf_ct_frag6_lru_list.next;
275 fq = list_entry(tmp, struct nf_ct_frag6_queue, lru_list);
276 atomic_inc(&fq->refcnt);
277 read_unlock(&nf_ct_frag6_lock);
278
279 spin_lock(&fq->lock);
280 if (!(fq->last_in&COMPLETE))
281 fq_kill(fq);
282 spin_unlock(&fq->lock);
283
284 fq_put(fq);
285 }
286}
287
288static void nf_ct_frag6_expire(unsigned long data)
289{
290 struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data;
291
292 spin_lock(&fq->lock);
293
294 if (fq->last_in & COMPLETE)
295 goto out;
296
297 fq_kill(fq);
298
299out:
300 spin_unlock(&fq->lock);
301 fq_put(fq);
302}
303
304/* Creation primitives. */
305
306
307static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash,
308 struct nf_ct_frag6_queue *fq_in)
309{
310 struct nf_ct_frag6_queue *fq;
311
312 write_lock(&nf_ct_frag6_lock);
313#ifdef CONFIG_SMP
314 for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) {
315 if (fq->id == fq_in->id &&
316 !ipv6_addr_cmp(&fq_in->saddr, &fq->saddr) &&
317 !ipv6_addr_cmp(&fq_in->daddr, &fq->daddr)) {
318 atomic_inc(&fq->refcnt);
319 write_unlock(&nf_ct_frag6_lock);
320 fq_in->last_in |= COMPLETE;
321 fq_put(fq_in);
322 return fq;
323 }
324 }
325#endif
326 fq = fq_in;
327
328 if (!mod_timer(&fq->timer, jiffies + nf_ct_frag6_timeout))
329 atomic_inc(&fq->refcnt);
330
331 atomic_inc(&fq->refcnt);
332 if ((fq->next = nf_ct_frag6_hash[hash]) != NULL)
333 fq->next->pprev = &fq->next;
334 nf_ct_frag6_hash[hash] = fq;
335 fq->pprev = &nf_ct_frag6_hash[hash];
336 INIT_LIST_HEAD(&fq->lru_list);
337 list_add_tail(&fq->lru_list, &nf_ct_frag6_lru_list);
338 nf_ct_frag6_nqueues++;
339 write_unlock(&nf_ct_frag6_lock);
340 return fq;
341}
342
343
344static struct nf_ct_frag6_queue *
345nf_ct_frag6_create(unsigned int hash, u32 id, struct in6_addr *src, struct in6_addr *dst)
346{
347 struct nf_ct_frag6_queue *fq;
348
349 if ((fq = frag_alloc_queue()) == NULL) {
350 DEBUGP("Can't alloc new queue\n");
351 goto oom;
352 }
353
354 memset(fq, 0, sizeof(struct nf_ct_frag6_queue));
355
356 fq->id = id;
357 ipv6_addr_copy(&fq->saddr, src);
358 ipv6_addr_copy(&fq->daddr, dst);
359
360 init_timer(&fq->timer);
361 fq->timer.function = nf_ct_frag6_expire;
362 fq->timer.data = (long) fq;
363 fq->lock = SPIN_LOCK_UNLOCKED;
364 atomic_set(&fq->refcnt, 1);
365
366 return nf_ct_frag6_intern(hash, fq);
367
368oom:
369 return NULL;
370}
371
372static __inline__ struct nf_ct_frag6_queue *
373fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst)
374{
375 struct nf_ct_frag6_queue *fq;
376 unsigned int hash = ip6qhashfn(id, src, dst);
377
378 read_lock(&nf_ct_frag6_lock);
379 for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) {
380 if (fq->id == id &&
381 !ipv6_addr_cmp(src, &fq->saddr) &&
382 !ipv6_addr_cmp(dst, &fq->daddr)) {
383 atomic_inc(&fq->refcnt);
384 read_unlock(&nf_ct_frag6_lock);
385 return fq;
386 }
387 }
388 read_unlock(&nf_ct_frag6_lock);
389
390 return nf_ct_frag6_create(hash, id, src, dst);
391}
392
393
394static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
395 struct frag_hdr *fhdr, int nhoff)
396{
397 struct sk_buff *prev, *next;
398 int offset, end;
399
400 if (fq->last_in & COMPLETE) {
401 DEBUGP("Allready completed\n");
402 goto err;
403 }
404
405 offset = ntohs(fhdr->frag_off) & ~0x7;
406 end = offset + (ntohs(skb->nh.ipv6h->payload_len) -
407 ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
408
409 if ((unsigned int)end > IPV6_MAXPLEN) {
410 DEBUGP("offset is too large.\n");
411 return -1;
412 }
413
414 if (skb->ip_summed == CHECKSUM_HW)
415 skb->csum = csum_sub(skb->csum,
416 csum_partial(skb->nh.raw,
417 (u8*)(fhdr + 1) - skb->nh.raw,
418 0));
419
420 /* Is this the final fragment? */
421 if (!(fhdr->frag_off & htons(IP6_MF))) {
422 /* If we already have some bits beyond end
423 * or have different end, the segment is corrupted.
424 */
425 if (end < fq->len ||
426 ((fq->last_in & LAST_IN) && end != fq->len)) {
427 DEBUGP("already received last fragment\n");
428 goto err;
429 }
430 fq->last_in |= LAST_IN;
431 fq->len = end;
432 } else {
433 /* Check if the fragment is rounded to 8 bytes.
434 * Required by the RFC.
435 */
436 if (end & 0x7) {
437 /* RFC2460 says always send parameter problem in
438 * this case. -DaveM
439 */
440 DEBUGP("the end of this fragment is not rounded to 8 bytes.\n");
441 return -1;
442 }
443 if (end > fq->len) {
444 /* Some bits beyond end -> corruption. */
445 if (fq->last_in & LAST_IN) {
446 DEBUGP("last packet already reached.\n");
447 goto err;
448 }
449 fq->len = end;
450 }
451 }
452
453 if (end == offset)
454 goto err;
455
456 /* Point into the IP datagram 'data' part. */
457 if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) {
458 DEBUGP("queue: message is too short.\n");
459 goto err;
460 }
461 if (end-offset < skb->len) {
462 if (pskb_trim(skb, end - offset)) {
463 DEBUGP("Can't trim\n");
464 goto err;
465 }
466 if (skb->ip_summed != CHECKSUM_UNNECESSARY)
467 skb->ip_summed = CHECKSUM_NONE;
468 }
469
470 /* Find out which fragments are in front and at the back of us
471 * in the chain of fragments so far. We must know where to put
472 * this fragment, right?
473 */
474 prev = NULL;
475 for (next = fq->fragments; next != NULL; next = next->next) {
476 if (NFCT_FRAG6_CB(next)->offset >= offset)
477 break; /* bingo! */
478 prev = next;
479 }
480
481 /* We found where to put this one. Check for overlap with
482 * preceding fragment, and, if needed, align things so that
483 * any overlaps are eliminated.
484 */
485 if (prev) {
486 int i = (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset;
487
488 if (i > 0) {
489 offset += i;
490 if (end <= offset) {
491 DEBUGP("overlap\n");
492 goto err;
493 }
494 if (!pskb_pull(skb, i)) {
495 DEBUGP("Can't pull\n");
496 goto err;
497 }
498 if (skb->ip_summed != CHECKSUM_UNNECESSARY)
499 skb->ip_summed = CHECKSUM_NONE;
500 }
501 }
502
503 /* Look for overlap with succeeding segments.
504 * If we can merge fragments, do it.
505 */
506 while (next && NFCT_FRAG6_CB(next)->offset < end) {
507 /* overlap is 'i' bytes */
508 int i = end - NFCT_FRAG6_CB(next)->offset;
509
510 if (i < next->len) {
511 /* Eat head of the next overlapped fragment
512 * and leave the loop. The next ones cannot overlap.
513 */
514 DEBUGP("Eat head of the overlapped parts.: %d", i);
515 if (!pskb_pull(next, i))
516 goto err;
517
518 /* next fragment */
519 NFCT_FRAG6_CB(next)->offset += i;
520 fq->meat -= i;
521 if (next->ip_summed != CHECKSUM_UNNECESSARY)
522 next->ip_summed = CHECKSUM_NONE;
523 break;
524 } else {
525 struct sk_buff *free_it = next;
526
527 /* Old fragmnet is completely overridden with
528 * new one drop it.
529 */
530 next = next->next;
531
532 if (prev)
533 prev->next = next;
534 else
535 fq->fragments = next;
536
537 fq->meat -= free_it->len;
538 frag_kfree_skb(free_it);
539 }
540 }
541
542 NFCT_FRAG6_CB(skb)->offset = offset;
543
544 /* Insert this fragment in the chain of fragments. */
545 skb->next = next;
546 if (prev)
547 prev->next = skb;
548 else
549 fq->fragments = skb;
550
551 skb->dev = NULL;
552 skb_get_timestamp(skb, &fq->stamp);
553 fq->meat += skb->len;
554 atomic_add(skb->truesize, &nf_ct_frag6_mem);
555
556 /* The first fragment.
557 * nhoffset is obtained from the first fragment, of course.
558 */
559 if (offset == 0) {
560 fq->nhoffset = nhoff;
561 fq->last_in |= FIRST_IN;
562 }
563 write_lock(&nf_ct_frag6_lock);
564 list_move_tail(&fq->lru_list, &nf_ct_frag6_lru_list);
565 write_unlock(&nf_ct_frag6_lock);
566 return 0;
567
568err:
569 return -1;
570}
571
572/*
573 * Check if this packet is complete.
574 * Returns NULL on failure by any reason, and pointer
575 * to current nexthdr field in reassembled frame.
576 *
577 * It is called with locked fq, and caller must check that
578 * queue is eligible for reassembly i.e. it is not COMPLETE,
579 * the last and the first frames arrived and all the bits are here.
580 */
581static struct sk_buff *
582nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
583{
584 struct sk_buff *fp, *op, *head = fq->fragments;
585 int payload_len;
586
587 fq_kill(fq);
588
589 BUG_TRAP(head != NULL);
590 BUG_TRAP(NFCT_FRAG6_CB(head)->offset == 0);
591
592 /* Unfragmented part is taken from the first segment. */
593 payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr);
594 if (payload_len > IPV6_MAXPLEN) {
595 DEBUGP("payload len is too large.\n");
596 goto out_oversize;
597 }
598
599 /* Head of list must not be cloned. */
600 if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) {
601 DEBUGP("skb is cloned but can't expand head");
602 goto out_oom;
603 }
604
605 /* If the first fragment is fragmented itself, we split
606 * it to two chunks: the first with data and paged part
607 * and the second, holding only fragments. */
608 if (skb_shinfo(head)->frag_list) {
609 struct sk_buff *clone;
610 int i, plen = 0;
611
612 if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) {
613 DEBUGP("Can't alloc skb\n");
614 goto out_oom;
615 }
616 clone->next = head->next;
617 head->next = clone;
618 skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
619 skb_shinfo(head)->frag_list = NULL;
620 for (i=0; i<skb_shinfo(head)->nr_frags; i++)
621 plen += skb_shinfo(head)->frags[i].size;
622 clone->len = clone->data_len = head->data_len - plen;
623 head->data_len -= clone->len;
624 head->len -= clone->len;
625 clone->csum = 0;
626 clone->ip_summed = head->ip_summed;
627
628 NFCT_FRAG6_CB(clone)->orig = NULL;
629 atomic_add(clone->truesize, &nf_ct_frag6_mem);
630 }
631
632 /* We have to remove fragment header from datagram and to relocate
633 * header in order to calculate ICV correctly. */
634 head->nh.raw[fq->nhoffset] = head->h.raw[0];
635 memmove(head->head + sizeof(struct frag_hdr), head->head,
636 (head->data - head->head) - sizeof(struct frag_hdr));
637 head->mac.raw += sizeof(struct frag_hdr);
638 head->nh.raw += sizeof(struct frag_hdr);
639
640 skb_shinfo(head)->frag_list = head->next;
641 head->h.raw = head->data;
642 skb_push(head, head->data - head->nh.raw);
643 atomic_sub(head->truesize, &nf_ct_frag6_mem);
644
645 for (fp=head->next; fp; fp = fp->next) {
646 head->data_len += fp->len;
647 head->len += fp->len;
648 if (head->ip_summed != fp->ip_summed)
649 head->ip_summed = CHECKSUM_NONE;
650 else if (head->ip_summed == CHECKSUM_HW)
651 head->csum = csum_add(head->csum, fp->csum);
652 head->truesize += fp->truesize;
653 atomic_sub(fp->truesize, &nf_ct_frag6_mem);
654 }
655
656 head->next = NULL;
657 head->dev = dev;
658 skb_set_timestamp(head, &fq->stamp);
659 head->nh.ipv6h->payload_len = htons(payload_len);
660
661 /* Yes, and fold redundant checksum back. 8) */
662 if (head->ip_summed == CHECKSUM_HW)
663 head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
664
665 fq->fragments = NULL;
666
667 /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
668 fp = skb_shinfo(head)->frag_list;
669 if (NFCT_FRAG6_CB(fp)->orig == NULL)
670 /* at above code, head skb is divided into two skbs. */
671 fp = fp->next;
672
673 op = NFCT_FRAG6_CB(head)->orig;
674 for (; fp; fp = fp->next) {
675 struct sk_buff *orig = NFCT_FRAG6_CB(fp)->orig;
676
677 op->next = orig;
678 op = orig;
679 NFCT_FRAG6_CB(fp)->orig = NULL;
680 }
681
682 return head;
683
684out_oversize:
685 if (net_ratelimit())
686 printk(KERN_DEBUG "nf_ct_frag6_reasm: payload len = %d\n", payload_len);
687 goto out_fail;
688out_oom:
689 if (net_ratelimit())
690 printk(KERN_DEBUG "nf_ct_frag6_reasm: no memory for reassembly\n");
691out_fail:
692 return NULL;
693}
694
695/*
696 * find the header just before Fragment Header.
697 *
698 * if success return 0 and set ...
699 * (*prevhdrp): the value of "Next Header Field" in the header
700 * just before Fragment Header.
701 * (*prevhoff): the offset of "Next Header Field" in the header
702 * just before Fragment Header.
703 * (*fhoff) : the offset of Fragment Header.
704 *
705 * Based on ipv6_skip_hdr() in net/ipv6/exthdr.c
706 *
707 */
708static int
709find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
710{
711 u8 nexthdr = skb->nh.ipv6h->nexthdr;
712 u8 prev_nhoff = (u8 *)&skb->nh.ipv6h->nexthdr - skb->data;
713 int start = (u8 *)(skb->nh.ipv6h+1) - skb->data;
714 int len = skb->len - start;
715 u8 prevhdr = NEXTHDR_IPV6;
716
717 while (nexthdr != NEXTHDR_FRAGMENT) {
718 struct ipv6_opt_hdr hdr;
719 int hdrlen;
720
721 if (!ipv6_ext_hdr(nexthdr)) {
722 return -1;
723 }
724 if (len < (int)sizeof(struct ipv6_opt_hdr)) {
725 DEBUGP("too short\n");
726 return -1;
727 }
728 if (nexthdr == NEXTHDR_NONE) {
729 DEBUGP("next header is none\n");
730 return -1;
731 }
732 if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
733 BUG();
734 if (nexthdr == NEXTHDR_AUTH)
735 hdrlen = (hdr.hdrlen+2)<<2;
736 else
737 hdrlen = ipv6_optlen(&hdr);
738
739 prevhdr = nexthdr;
740 prev_nhoff = start;
741
742 nexthdr = hdr.nexthdr;
743 len -= hdrlen;
744 start += hdrlen;
745 }
746
747 if (len < 0)
748 return -1;
749
750 *prevhdrp = prevhdr;
751 *prevhoff = prev_nhoff;
752 *fhoff = start;
753
754 return 0;
755}
756
757struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
758{
759 struct sk_buff *clone;
760 struct net_device *dev = skb->dev;
761 struct frag_hdr *fhdr;
762 struct nf_ct_frag6_queue *fq;
763 struct ipv6hdr *hdr;
764 int fhoff, nhoff;
765 u8 prevhdr;
766 struct sk_buff *ret_skb = NULL;
767
768 /* Jumbo payload inhibits frag. header */
769 if (skb->nh.ipv6h->payload_len == 0) {
770 DEBUGP("payload len = 0\n");
771 return skb;
772 }
773
774 if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0)
775 return skb;
776
777 clone = skb_clone(skb, GFP_ATOMIC);
778 if (clone == NULL) {
779 DEBUGP("Can't clone skb\n");
780 return skb;
781 }
782
783 NFCT_FRAG6_CB(clone)->orig = skb;
784
785 if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) {
786 DEBUGP("message is too short.\n");
787 goto ret_orig;
788 }
789
790 clone->h.raw = clone->data + fhoff;
791 hdr = clone->nh.ipv6h;
792 fhdr = (struct frag_hdr *)clone->h.raw;
793
794 if (!(fhdr->frag_off & htons(0xFFF9))) {
795 DEBUGP("Invalid fragment offset\n");
796 /* It is not a fragmented frame */
797 goto ret_orig;
798 }
799
800 if (atomic_read(&nf_ct_frag6_mem) > nf_ct_frag6_high_thresh)
801 nf_ct_frag6_evictor();
802
803 fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr);
804 if (fq == NULL) {
805 DEBUGP("Can't find and can't create new queue\n");
806 goto ret_orig;
807 }
808
809 spin_lock(&fq->lock);
810
811 if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
812 spin_unlock(&fq->lock);
813 DEBUGP("Can't insert skb to queue\n");
814 fq_put(fq);
815 goto ret_orig;
816 }
817
818 if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) {
819 ret_skb = nf_ct_frag6_reasm(fq, dev);
820 if (ret_skb == NULL)
821 DEBUGP("Can't reassemble fragmented packets\n");
822 }
823 spin_unlock(&fq->lock);
824
825 fq_put(fq);
826 return ret_skb;
827
828ret_orig:
829 kfree_skb(clone);
830 return skb;
831}
832
833void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
834 struct net_device *in, struct net_device *out,
835 int (*okfn)(struct sk_buff *))
836{
837 struct sk_buff *s, *s2;
838
839 for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
840 nf_conntrack_put_reasm(s->nfct_reasm);
841 nf_conntrack_get_reasm(skb);
842 s->nfct_reasm = skb;
843
844 s2 = s->next;
845 NF_HOOK_THRESH(PF_INET6, hooknum, s, in, out, okfn,
846 NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
847 s = s2;
848 }
849 nf_conntrack_put_reasm(skb);
850}
851
852int nf_ct_frag6_kfree_frags(struct sk_buff *skb)
853{
854 struct sk_buff *s, *s2;
855
856 for (s = NFCT_FRAG6_CB(skb)->orig; s; s = s2) {
857
858 s2 = s->next;
859 kfree_skb(s);
860 }
861
862 kfree_skb(skb);
863
864 return 0;
865}
866
867int nf_ct_frag6_init(void)
868{
869 nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
870 (jiffies ^ (jiffies >> 6)));
871
872 init_timer(&nf_ct_frag6_secret_timer);
873 nf_ct_frag6_secret_timer.function = nf_ct_frag6_secret_rebuild;
874 nf_ct_frag6_secret_timer.expires = jiffies
875 + nf_ct_frag6_secret_interval;
876 add_timer(&nf_ct_frag6_secret_timer);
877
878 return 0;
879}
880
881void nf_ct_frag6_cleanup(void)
882{
883 del_timer(&nf_ct_frag6_secret_timer);
884 nf_ct_frag6_evictor();
885}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a1265a320b11..651c79b41eeb 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -174,8 +174,10 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
174 struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); 174 struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
175 175
176 /* Not releasing hash table! */ 176 /* Not releasing hash table! */
177 if (clone) 177 if (clone) {
178 nf_reset(clone);
178 rawv6_rcv(sk, clone); 179 rawv6_rcv(sk, clone);
180 }
179 } 181 }
180 sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr, 182 sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr,
181 IP6CB(skb)->iif); 183 IP6CB(skb)->iif);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 227e99ed510c..f7f42c3e96cb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1710,7 +1710,7 @@ static void fib6_dump_end(struct netlink_callback *cb)
1710static int fib6_dump_done(struct netlink_callback *cb) 1710static int fib6_dump_done(struct netlink_callback *cb)
1711{ 1711{
1712 fib6_dump_end(cb); 1712 fib6_dump_end(cb);
1713 return cb->done(cb); 1713 return cb->done ? cb->done(cb) : 0;
1714} 1714}
1715 1715
1716int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 1716int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 8296b38bf270..a84f9221e5f0 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -1,3 +1,6 @@
1menu "Core Netfilter Configuration"
2 depends on NET && NETFILTER
3
1config NETFILTER_NETLINK 4config NETFILTER_NETLINK
2 tristate "Netfilter netlink interface" 5 tristate "Netfilter netlink interface"
3 help 6 help
@@ -22,3 +25,74 @@ config NETFILTER_NETLINK_LOG
22 and is also scheduled to replace the old syslog-based ipt_LOG 25 and is also scheduled to replace the old syslog-based ipt_LOG
23 and ip6t_LOG modules. 26 and ip6t_LOG modules.
24 27
28config NF_CONNTRACK
29 tristate "Layer 3 Independent Connection tracking (EXPERIMENTAL)"
30 depends on EXPERIMENTAL && IP_NF_CONNTRACK=n
31 default n
32 ---help---
33 Connection tracking keeps a record of what packets have passed
34 through your machine, in order to figure out how they are related
35 into connections.
36
37 Layer 3 independent connection tracking is experimental scheme
38 which generalize ip_conntrack to support other layer 3 protocols.
39
40 To compile it as a module, choose M here. If unsure, say N.
41
42config NF_CT_ACCT
43 bool "Connection tracking flow accounting"
44 depends on NF_CONNTRACK
45 help
46 If this option is enabled, the connection tracking code will
47 keep per-flow packet and byte counters.
48
49 Those counters can be used for flow-based accounting or the
50 `connbytes' match.
51
52 If unsure, say `N'.
53
54config NF_CONNTRACK_MARK
55 bool 'Connection mark tracking support'
56 depends on NF_CONNTRACK
57 help
58 This option enables support for connection marks, used by the
59 `CONNMARK' target and `connmark' match. Similar to the mark value
60 of packets, but this mark value is kept in the conntrack session
61 instead of the individual packets.
62
63config NF_CONNTRACK_EVENTS
64 bool "Connection tracking events"
65 depends on NF_CONNTRACK
66 help
67 If this option is enabled, the connection tracking code will
68 provide a notifier chain that can be used by other kernel code
69 to get notified aboutchanges in the connection tracking state.
70
71 If unsure, say `N'.
72
73config NF_CT_PROTO_SCTP
74 tristate 'SCTP protocol on new connection tracking support (EXPERIMENTAL)'
75 depends on EXPERIMENTAL && NF_CONNTRACK
76 default n
77 help
78 With this option enabled, the layer 3 independent connection
79 tracking code will be able to do state tracking on SCTP connections.
80
81 If you want to compile it as a module, say M here and read
82 Documentation/modules.txt. If unsure, say `N'.
83
84config NF_CONNTRACK_FTP
85 tristate "FTP support on new connection tracking (EXPERIMENTAL)"
86 depends on EXPERIMENTAL && NF_CONNTRACK
87 help
88 Tracking FTP connections is problematic: special helpers are
89 required for tracking them, and doing masquerading and other forms
90 of Network Address Translation on them.
91
92 This is FTP support on Layer 3 independent connection tracking.
93 Layer 3 independent connection tracking is experimental scheme
94 which generalize ip_conntrack to support other layer 3 protocols.
95
96 To compile it as a module, choose M here. If unsure, say N.
97
98endmenu
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index b3b44f8b415a..55f019ad2c08 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -5,3 +5,11 @@ obj-$(CONFIG_NETFILTER) = netfilter.o
5obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o 5obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
6obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o 6obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
7obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o 7obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
8
9nf_conntrack-objs := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o
10
11obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
12obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o
13
14# SCTP protocol connection tracking
15obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
new file mode 100644
index 000000000000..9a67c796b385
--- /dev/null
+++ b/net/netfilter/nf_conntrack_core.c
@@ -0,0 +1,1538 @@
1/* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
3 extension. */
4
5/* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2005 Netfilter Core Team <coreteam@netfilter.org>
7 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 *
13 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
14 * - new API and handling of conntrack/nat helpers
15 * - now capable of multiple expectations for one master
16 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
17 * - add usage/reference counts to ip_conntrack_expect
18 * - export ip_conntrack[_expect]_{find_get,put} functions
19 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
20 * - generalize L3 protocol denendent part.
21 * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
22 * - add support various size of conntrack structures.
23 *
24 * Derived from net/ipv4/netfilter/ip_conntrack_core.c
25 */
26
27#include <linux/config.h>
28#include <linux/types.h>
29#include <linux/netfilter.h>
30#include <linux/module.h>
31#include <linux/skbuff.h>
32#include <linux/proc_fs.h>
33#include <linux/vmalloc.h>
34#include <linux/stddef.h>
35#include <linux/slab.h>
36#include <linux/random.h>
37#include <linux/jhash.h>
38#include <linux/err.h>
39#include <linux/percpu.h>
40#include <linux/moduleparam.h>
41#include <linux/notifier.h>
42#include <linux/kernel.h>
43#include <linux/netdevice.h>
44#include <linux/socket.h>
45
46/* This rwlock protects the main hash table, protocol/helper/expected
47 registrations, conntrack timers*/
48#define ASSERT_READ_LOCK(x)
49#define ASSERT_WRITE_LOCK(x)
50
51#include <net/netfilter/nf_conntrack.h>
52#include <net/netfilter/nf_conntrack_l3proto.h>
53#include <net/netfilter/nf_conntrack_protocol.h>
54#include <net/netfilter/nf_conntrack_helper.h>
55#include <net/netfilter/nf_conntrack_core.h>
56#include <linux/netfilter_ipv4/listhelp.h>
57
58#define NF_CONNTRACK_VERSION "0.4.1"
59
60#if 0
61#define DEBUGP printk
62#else
63#define DEBUGP(format, args...)
64#endif
65
66DEFINE_RWLOCK(nf_conntrack_lock);
67
68/* nf_conntrack_standalone needs this */
69atomic_t nf_conntrack_count = ATOMIC_INIT(0);
70
71void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL;
72LIST_HEAD(nf_conntrack_expect_list);
73struct nf_conntrack_protocol **nf_ct_protos[PF_MAX];
74struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX];
75static LIST_HEAD(helpers);
76unsigned int nf_conntrack_htable_size = 0;
77int nf_conntrack_max;
78struct list_head *nf_conntrack_hash;
79static kmem_cache_t *nf_conntrack_expect_cachep;
80struct nf_conn nf_conntrack_untracked;
81unsigned int nf_ct_log_invalid;
82static LIST_HEAD(unconfirmed);
83static int nf_conntrack_vmalloc;
84
85#ifdef CONFIG_NF_CONNTRACK_EVENTS
86struct notifier_block *nf_conntrack_chain;
87struct notifier_block *nf_conntrack_expect_chain;
88
89DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
90
91/* deliver cached events and clear cache entry - must be called with locally
92 * disabled softirqs */
93static inline void
94__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
95{
96 DEBUGP("ecache: delivering events for %p\n", ecache->ct);
97 if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
98 && ecache->events)
99 notifier_call_chain(&nf_conntrack_chain, ecache->events,
100 ecache->ct);
101
102 ecache->events = 0;
103 nf_ct_put(ecache->ct);
104 ecache->ct = NULL;
105}
106
107/* Deliver all cached events for a particular conntrack. This is called
108 * by code prior to async packet handling for freeing the skb */
109void nf_ct_deliver_cached_events(const struct nf_conn *ct)
110{
111 struct nf_conntrack_ecache *ecache;
112
113 local_bh_disable();
114 ecache = &__get_cpu_var(nf_conntrack_ecache);
115 if (ecache->ct == ct)
116 __nf_ct_deliver_cached_events(ecache);
117 local_bh_enable();
118}
119
120/* Deliver cached events for old pending events, if current conntrack != old */
121void __nf_ct_event_cache_init(struct nf_conn *ct)
122{
123 struct nf_conntrack_ecache *ecache;
124
125 /* take care of delivering potentially old events */
126 ecache = &__get_cpu_var(nf_conntrack_ecache);
127 BUG_ON(ecache->ct == ct);
128 if (ecache->ct)
129 __nf_ct_deliver_cached_events(ecache);
130 /* initialize for this conntrack/packet */
131 ecache->ct = ct;
132 nf_conntrack_get(&ct->ct_general);
133}
134
135/* flush the event cache - touches other CPU's data and must not be called
136 * while packets are still passing through the code */
137static void nf_ct_event_cache_flush(void)
138{
139 struct nf_conntrack_ecache *ecache;
140 int cpu;
141
142 for_each_cpu(cpu) {
143 ecache = &per_cpu(nf_conntrack_ecache, cpu);
144 if (ecache->ct)
145 nf_ct_put(ecache->ct);
146 }
147}
148#else
149static inline void nf_ct_event_cache_flush(void) {}
150#endif /* CONFIG_NF_CONNTRACK_EVENTS */
151
152DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
153EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat);
154
155/*
156 * This scheme offers various size of "struct nf_conn" dependent on
157 * features(helper, nat, ...)
158 */
159
160#define NF_CT_FEATURES_NAMELEN 256
161static struct {
162 /* name of slab cache. printed in /proc/slabinfo */
163 char *name;
164
165 /* size of slab cache */
166 size_t size;
167
168 /* slab cache pointer */
169 kmem_cache_t *cachep;
170
171 /* allocated slab cache + modules which uses this slab cache */
172 int use;
173
174 /* Initialization */
175 int (*init_conntrack)(struct nf_conn *, u_int32_t);
176
177} nf_ct_cache[NF_CT_F_NUM];
178
179/* protect members of nf_ct_cache except of "use" */
180DEFINE_RWLOCK(nf_ct_cache_lock);
181
182/* This avoids calling kmem_cache_create() with same name simultaneously */
183DECLARE_MUTEX(nf_ct_cache_mutex);
184
185extern struct nf_conntrack_protocol nf_conntrack_generic_protocol;
186struct nf_conntrack_protocol *
187nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol)
188{
189 if (unlikely(nf_ct_protos[l3proto] == NULL))
190 return &nf_conntrack_generic_protocol;
191
192 return nf_ct_protos[l3proto][protocol];
193}
194
195static int nf_conntrack_hash_rnd_initted;
196static unsigned int nf_conntrack_hash_rnd;
197
198static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
199 unsigned int size, unsigned int rnd)
200{
201 unsigned int a, b;
202 a = jhash((void *)tuple->src.u3.all, sizeof(tuple->src.u3.all),
203 ((tuple->src.l3num) << 16) | tuple->dst.protonum);
204 b = jhash((void *)tuple->dst.u3.all, sizeof(tuple->dst.u3.all),
205 (tuple->src.u.all << 16) | tuple->dst.u.all);
206
207 return jhash_2words(a, b, rnd) % size;
208}
209
210static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple)
211{
212 return __hash_conntrack(tuple, nf_conntrack_htable_size,
213 nf_conntrack_hash_rnd);
214}
215
216/* Initialize "struct nf_conn" which has spaces for helper */
217static int
218init_conntrack_for_helper(struct nf_conn *conntrack, u_int32_t features)
219{
220
221 conntrack->help = (union nf_conntrack_help *)
222 (((unsigned long)conntrack->data
223 + (__alignof__(union nf_conntrack_help) - 1))
224 & (~((unsigned long)(__alignof__(union nf_conntrack_help) -1))));
225 return 0;
226}
227
228int nf_conntrack_register_cache(u_int32_t features, const char *name,
229 size_t size,
230 int (*init)(struct nf_conn *, u_int32_t))
231{
232 int ret = 0;
233 char *cache_name;
234 kmem_cache_t *cachep;
235
236 DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n",
237 features, name, size);
238
239 if (features < NF_CT_F_BASIC || features >= NF_CT_F_NUM) {
240 DEBUGP("nf_conntrack_register_cache: invalid features.: 0x%x\n",
241 features);
242 return -EINVAL;
243 }
244
245 down(&nf_ct_cache_mutex);
246
247 write_lock_bh(&nf_ct_cache_lock);
248 /* e.g: multiple helpers are loaded */
249 if (nf_ct_cache[features].use > 0) {
250 DEBUGP("nf_conntrack_register_cache: already resisterd.\n");
251 if ((!strncmp(nf_ct_cache[features].name, name,
252 NF_CT_FEATURES_NAMELEN))
253 && nf_ct_cache[features].size == size
254 && nf_ct_cache[features].init_conntrack == init) {
255 DEBUGP("nf_conntrack_register_cache: reusing.\n");
256 nf_ct_cache[features].use++;
257 ret = 0;
258 } else
259 ret = -EBUSY;
260
261 write_unlock_bh(&nf_ct_cache_lock);
262 up(&nf_ct_cache_mutex);
263 return ret;
264 }
265 write_unlock_bh(&nf_ct_cache_lock);
266
267 /*
268 * The memory space for name of slab cache must be alive until
269 * cache is destroyed.
270 */
271 cache_name = kmalloc(sizeof(char)*NF_CT_FEATURES_NAMELEN, GFP_ATOMIC);
272 if (cache_name == NULL) {
273 DEBUGP("nf_conntrack_register_cache: can't alloc cache_name\n");
274 ret = -ENOMEM;
275 goto out_up_mutex;
276 }
277
278 if (strlcpy(cache_name, name, NF_CT_FEATURES_NAMELEN)
279 >= NF_CT_FEATURES_NAMELEN) {
280 printk("nf_conntrack_register_cache: name too long\n");
281 ret = -EINVAL;
282 goto out_free_name;
283 }
284
285 cachep = kmem_cache_create(cache_name, size, 0, 0,
286 NULL, NULL);
287 if (!cachep) {
288 printk("nf_conntrack_register_cache: Can't create slab cache "
289 "for the features = 0x%x\n", features);
290 ret = -ENOMEM;
291 goto out_free_name;
292 }
293
294 write_lock_bh(&nf_ct_cache_lock);
295 nf_ct_cache[features].use = 1;
296 nf_ct_cache[features].size = size;
297 nf_ct_cache[features].init_conntrack = init;
298 nf_ct_cache[features].cachep = cachep;
299 nf_ct_cache[features].name = cache_name;
300 write_unlock_bh(&nf_ct_cache_lock);
301
302 goto out_up_mutex;
303
304out_free_name:
305 kfree(cache_name);
306out_up_mutex:
307 up(&nf_ct_cache_mutex);
308 return ret;
309}
310
311/* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */
312void nf_conntrack_unregister_cache(u_int32_t features)
313{
314 kmem_cache_t *cachep;
315 char *name;
316
317 /*
318 * This assures that kmem_cache_create() isn't called before destroying
319 * slab cache.
320 */
321 DEBUGP("nf_conntrack_unregister_cache: 0x%04x\n", features);
322 down(&nf_ct_cache_mutex);
323
324 write_lock_bh(&nf_ct_cache_lock);
325 if (--nf_ct_cache[features].use > 0) {
326 write_unlock_bh(&nf_ct_cache_lock);
327 up(&nf_ct_cache_mutex);
328 return;
329 }
330 cachep = nf_ct_cache[features].cachep;
331 name = nf_ct_cache[features].name;
332 nf_ct_cache[features].cachep = NULL;
333 nf_ct_cache[features].name = NULL;
334 nf_ct_cache[features].init_conntrack = NULL;
335 nf_ct_cache[features].size = 0;
336 write_unlock_bh(&nf_ct_cache_lock);
337
338 synchronize_net();
339
340 kmem_cache_destroy(cachep);
341 kfree(name);
342
343 up(&nf_ct_cache_mutex);
344}
345
346int
347nf_ct_get_tuple(const struct sk_buff *skb,
348 unsigned int nhoff,
349 unsigned int dataoff,
350 u_int16_t l3num,
351 u_int8_t protonum,
352 struct nf_conntrack_tuple *tuple,
353 const struct nf_conntrack_l3proto *l3proto,
354 const struct nf_conntrack_protocol *protocol)
355{
356 NF_CT_TUPLE_U_BLANK(tuple);
357
358 tuple->src.l3num = l3num;
359 if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
360 return 0;
361
362 tuple->dst.protonum = protonum;
363 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
364
365 return protocol->pkt_to_tuple(skb, dataoff, tuple);
366}
367
368int
369nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
370 const struct nf_conntrack_tuple *orig,
371 const struct nf_conntrack_l3proto *l3proto,
372 const struct nf_conntrack_protocol *protocol)
373{
374 NF_CT_TUPLE_U_BLANK(inverse);
375
376 inverse->src.l3num = orig->src.l3num;
377 if (l3proto->invert_tuple(inverse, orig) == 0)
378 return 0;
379
380 inverse->dst.dir = !orig->dst.dir;
381
382 inverse->dst.protonum = orig->dst.protonum;
383 return protocol->invert_tuple(inverse, orig);
384}
385
386/* nf_conntrack_expect helper functions */
387static void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
388{
389 ASSERT_WRITE_LOCK(&nf_conntrack_lock);
390 NF_CT_ASSERT(!timer_pending(&exp_timeout));
391 list_del(&exp->list);
392 NF_CT_STAT_INC(expect_delete);
393 exp->master->expecting--;
394 nf_conntrack_expect_put(exp);
395}
396
397static void expectation_timed_out(unsigned long ul_expect)
398{
399 struct nf_conntrack_expect *exp = (void *)ul_expect;
400
401 write_lock_bh(&nf_conntrack_lock);
402 nf_ct_unlink_expect(exp);
403 write_unlock_bh(&nf_conntrack_lock);
404 nf_conntrack_expect_put(exp);
405}
406
407/* If an expectation for this connection is found, it gets delete from
408 * global list then returned. */
409static struct nf_conntrack_expect *
410find_expectation(const struct nf_conntrack_tuple *tuple)
411{
412 struct nf_conntrack_expect *i;
413
414 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
415 /* If master is not in hash table yet (ie. packet hasn't left
416 this machine yet), how can other end know about expected?
417 Hence these are not the droids you are looking for (if
418 master ct never got confirmed, we'd hold a reference to it
419 and weird things would happen to future packets). */
420 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
421 && nf_ct_is_confirmed(i->master)) {
422 if (i->flags & NF_CT_EXPECT_PERMANENT) {
423 atomic_inc(&i->use);
424 return i;
425 } else if (del_timer(&i->timeout)) {
426 nf_ct_unlink_expect(i);
427 return i;
428 }
429 }
430 }
431 return NULL;
432}
433
434/* delete all expectations for this conntrack */
435static void remove_expectations(struct nf_conn *ct)
436{
437 struct nf_conntrack_expect *i, *tmp;
438
439 /* Optimization: most connection never expect any others. */
440 if (ct->expecting == 0)
441 return;
442
443 list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) {
444 if (i->master == ct && del_timer(&i->timeout)) {
445 nf_ct_unlink_expect(i);
446 nf_conntrack_expect_put(i);
447 }
448 }
449}
450
451static void
452clean_from_lists(struct nf_conn *ct)
453{
454 unsigned int ho, hr;
455
456 DEBUGP("clean_from_lists(%p)\n", ct);
457 ASSERT_WRITE_LOCK(&nf_conntrack_lock);
458
459 ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
460 hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
461 LIST_DELETE(&nf_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
462 LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
463
464 /* Destroy all pending expectations */
465 remove_expectations(ct);
466}
467
468static void
469destroy_conntrack(struct nf_conntrack *nfct)
470{
471 struct nf_conn *ct = (struct nf_conn *)nfct;
472 struct nf_conntrack_l3proto *l3proto;
473 struct nf_conntrack_protocol *proto;
474
475 DEBUGP("destroy_conntrack(%p)\n", ct);
476 NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
477 NF_CT_ASSERT(!timer_pending(&ct->timeout));
478
479 nf_conntrack_event(IPCT_DESTROY, ct);
480 set_bit(IPS_DYING_BIT, &ct->status);
481
482 /* To make sure we don't get any weird locking issues here:
483 * destroy_conntrack() MUST NOT be called with a write lock
484 * to nf_conntrack_lock!!! -HW */
485 l3proto = nf_ct_find_l3proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num);
486 if (l3proto && l3proto->destroy)
487 l3proto->destroy(ct);
488
489 proto = nf_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num,
490 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
491 if (proto && proto->destroy)
492 proto->destroy(ct);
493
494 if (nf_conntrack_destroyed)
495 nf_conntrack_destroyed(ct);
496
497 write_lock_bh(&nf_conntrack_lock);
498 /* Expectations will have been removed in clean_from_lists,
499 * except TFTP can create an expectation on the first packet,
500 * before connection is in the list, so we need to clean here,
501 * too. */
502 remove_expectations(ct);
503
504 /* We overload first tuple to link into unconfirmed list. */
505 if (!nf_ct_is_confirmed(ct)) {
506 BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
507 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
508 }
509
510 NF_CT_STAT_INC(delete);
511 write_unlock_bh(&nf_conntrack_lock);
512
513 if (ct->master)
514 nf_ct_put(ct->master);
515
516 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
517 nf_conntrack_free(ct);
518}
519
520static void death_by_timeout(unsigned long ul_conntrack)
521{
522 struct nf_conn *ct = (void *)ul_conntrack;
523
524 write_lock_bh(&nf_conntrack_lock);
525 /* Inside lock so preempt is disabled on module removal path.
526 * Otherwise we can get spurious warnings. */
527 NF_CT_STAT_INC(delete_list);
528 clean_from_lists(ct);
529 write_unlock_bh(&nf_conntrack_lock);
530 nf_ct_put(ct);
531}
532
533static inline int
534conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i,
535 const struct nf_conntrack_tuple *tuple,
536 const struct nf_conn *ignored_conntrack)
537{
538 ASSERT_READ_LOCK(&nf_conntrack_lock);
539 return nf_ct_tuplehash_to_ctrack(i) != ignored_conntrack
540 && nf_ct_tuple_equal(tuple, &i->tuple);
541}
542
543static struct nf_conntrack_tuple_hash *
544__nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
545 const struct nf_conn *ignored_conntrack)
546{
547 struct nf_conntrack_tuple_hash *h;
548 unsigned int hash = hash_conntrack(tuple);
549
550 ASSERT_READ_LOCK(&nf_conntrack_lock);
551 list_for_each_entry(h, &nf_conntrack_hash[hash], list) {
552 if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
553 NF_CT_STAT_INC(found);
554 return h;
555 }
556 NF_CT_STAT_INC(searched);
557 }
558
559 return NULL;
560}
561
562/* Find a connection corresponding to a tuple. */
563struct nf_conntrack_tuple_hash *
564nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple,
565 const struct nf_conn *ignored_conntrack)
566{
567 struct nf_conntrack_tuple_hash *h;
568
569 read_lock_bh(&nf_conntrack_lock);
570 h = __nf_conntrack_find(tuple, ignored_conntrack);
571 if (h)
572 atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
573 read_unlock_bh(&nf_conntrack_lock);
574
575 return h;
576}
577
578/* Confirm a connection given skb; places it in hash table */
579int
580__nf_conntrack_confirm(struct sk_buff **pskb)
581{
582 unsigned int hash, repl_hash;
583 struct nf_conn *ct;
584 enum ip_conntrack_info ctinfo;
585
586 ct = nf_ct_get(*pskb, &ctinfo);
587
588 /* ipt_REJECT uses nf_conntrack_attach to attach related
589 ICMP/TCP RST packets in other direction. Actual packet
590 which created connection will be IP_CT_NEW or for an
591 expected connection, IP_CT_RELATED. */
592 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
593 return NF_ACCEPT;
594
595 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
596 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
597
598 /* We're not in hash table, and we refuse to set up related
599 connections for unconfirmed conns. But packet copies and
600 REJECT will give spurious warnings here. */
601 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
602
603 /* No external references means noone else could have
604 confirmed us. */
605 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
606 DEBUGP("Confirming conntrack %p\n", ct);
607
608 write_lock_bh(&nf_conntrack_lock);
609
610 /* See if there's one in the list already, including reverse:
611 NAT could have grabbed it without realizing, since we're
612 not in the hash. If there is, we lost race. */
613 if (!LIST_FIND(&nf_conntrack_hash[hash],
614 conntrack_tuple_cmp,
615 struct nf_conntrack_tuple_hash *,
616 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
617 && !LIST_FIND(&nf_conntrack_hash[repl_hash],
618 conntrack_tuple_cmp,
619 struct nf_conntrack_tuple_hash *,
620 &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
621 /* Remove from unconfirmed list */
622 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
623
624 list_prepend(&nf_conntrack_hash[hash],
625 &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
626 list_prepend(&nf_conntrack_hash[repl_hash],
627 &ct->tuplehash[IP_CT_DIR_REPLY]);
628 /* Timer relative to confirmation time, not original
629 setting time, otherwise we'd get timer wrap in
630 weird delay cases. */
631 ct->timeout.expires += jiffies;
632 add_timer(&ct->timeout);
633 atomic_inc(&ct->ct_general.use);
634 set_bit(IPS_CONFIRMED_BIT, &ct->status);
635 NF_CT_STAT_INC(insert);
636 write_unlock_bh(&nf_conntrack_lock);
637 if (ct->helper)
638 nf_conntrack_event_cache(IPCT_HELPER, *pskb);
639#ifdef CONFIG_NF_NAT_NEEDED
640 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
641 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
642 nf_conntrack_event_cache(IPCT_NATINFO, *pskb);
643#endif
644 nf_conntrack_event_cache(master_ct(ct) ?
645 IPCT_RELATED : IPCT_NEW, *pskb);
646 return NF_ACCEPT;
647 }
648
649 NF_CT_STAT_INC(insert_failed);
650 write_unlock_bh(&nf_conntrack_lock);
651 return NF_DROP;
652}
653
654/* Returns true if a connection correspondings to the tuple (required
655 for NAT). */
656int
657nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
658 const struct nf_conn *ignored_conntrack)
659{
660 struct nf_conntrack_tuple_hash *h;
661
662 read_lock_bh(&nf_conntrack_lock);
663 h = __nf_conntrack_find(tuple, ignored_conntrack);
664 read_unlock_bh(&nf_conntrack_lock);
665
666 return h != NULL;
667}
668
669/* There's a small race here where we may free a just-assured
670 connection. Too bad: we're in trouble anyway. */
671static inline int unreplied(const struct nf_conntrack_tuple_hash *i)
672{
673 return !(test_bit(IPS_ASSURED_BIT,
674 &nf_ct_tuplehash_to_ctrack(i)->status));
675}
676
677static int early_drop(struct list_head *chain)
678{
679 /* Traverse backwards: gives us oldest, which is roughly LRU */
680 struct nf_conntrack_tuple_hash *h;
681 struct nf_conn *ct = NULL;
682 int dropped = 0;
683
684 read_lock_bh(&nf_conntrack_lock);
685 h = LIST_FIND_B(chain, unreplied, struct nf_conntrack_tuple_hash *);
686 if (h) {
687 ct = nf_ct_tuplehash_to_ctrack(h);
688 atomic_inc(&ct->ct_general.use);
689 }
690 read_unlock_bh(&nf_conntrack_lock);
691
692 if (!ct)
693 return dropped;
694
695 if (del_timer(&ct->timeout)) {
696 death_by_timeout((unsigned long)ct);
697 dropped = 1;
698 NF_CT_STAT_INC(early_drop);
699 }
700 nf_ct_put(ct);
701 return dropped;
702}
703
704static inline int helper_cmp(const struct nf_conntrack_helper *i,
705 const struct nf_conntrack_tuple *rtuple)
706{
707 return nf_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
708}
709
710static struct nf_conntrack_helper *
711nf_ct_find_helper(const struct nf_conntrack_tuple *tuple)
712{
713 return LIST_FIND(&helpers, helper_cmp,
714 struct nf_conntrack_helper *,
715 tuple);
716}
717
718static struct nf_conn *
719__nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
720 const struct nf_conntrack_tuple *repl,
721 const struct nf_conntrack_l3proto *l3proto)
722{
723 struct nf_conn *conntrack = NULL;
724 u_int32_t features = 0;
725
726 if (!nf_conntrack_hash_rnd_initted) {
727 get_random_bytes(&nf_conntrack_hash_rnd, 4);
728 nf_conntrack_hash_rnd_initted = 1;
729 }
730
731 if (nf_conntrack_max
732 && atomic_read(&nf_conntrack_count) >= nf_conntrack_max) {
733 unsigned int hash = hash_conntrack(orig);
734 /* Try dropping from this hash chain. */
735 if (!early_drop(&nf_conntrack_hash[hash])) {
736 if (net_ratelimit())
737 printk(KERN_WARNING
738 "nf_conntrack: table full, dropping"
739 " packet.\n");
740 return ERR_PTR(-ENOMEM);
741 }
742 }
743
744 /* find features needed by this conntrack. */
745 features = l3proto->get_features(orig);
746 read_lock_bh(&nf_conntrack_lock);
747 if (nf_ct_find_helper(repl) != NULL)
748 features |= NF_CT_F_HELP;
749 read_unlock_bh(&nf_conntrack_lock);
750
751 DEBUGP("nf_conntrack_alloc: features=0x%x\n", features);
752
753 read_lock_bh(&nf_ct_cache_lock);
754
755 if (!nf_ct_cache[features].use) {
756 DEBUGP("nf_conntrack_alloc: not supported features = 0x%x\n",
757 features);
758 goto out;
759 }
760
761 conntrack = kmem_cache_alloc(nf_ct_cache[features].cachep, GFP_ATOMIC);
762 if (conntrack == NULL) {
763 DEBUGP("nf_conntrack_alloc: Can't alloc conntrack from cache\n");
764 goto out;
765 }
766
767 memset(conntrack, 0, nf_ct_cache[features].size);
768 conntrack->features = features;
769 if (nf_ct_cache[features].init_conntrack &&
770 nf_ct_cache[features].init_conntrack(conntrack, features) < 0) {
771 DEBUGP("nf_conntrack_alloc: failed to init\n");
772 kmem_cache_free(nf_ct_cache[features].cachep, conntrack);
773 conntrack = NULL;
774 goto out;
775 }
776
777 atomic_set(&conntrack->ct_general.use, 1);
778 conntrack->ct_general.destroy = destroy_conntrack;
779 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
780 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
781 /* Don't set timer yet: wait for confirmation */
782 init_timer(&conntrack->timeout);
783 conntrack->timeout.data = (unsigned long)conntrack;
784 conntrack->timeout.function = death_by_timeout;
785
786 atomic_inc(&nf_conntrack_count);
787out:
788 read_unlock_bh(&nf_ct_cache_lock);
789 return conntrack;
790}
791
792struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
793 const struct nf_conntrack_tuple *repl)
794{
795 struct nf_conntrack_l3proto *l3proto;
796
797 l3proto = nf_ct_find_l3proto(orig->src.l3num);
798 return __nf_conntrack_alloc(orig, repl, l3proto);
799}
800
801void nf_conntrack_free(struct nf_conn *conntrack)
802{
803 u_int32_t features = conntrack->features;
804 NF_CT_ASSERT(features >= NF_CT_F_BASIC && features < NF_CT_F_NUM);
805 DEBUGP("nf_conntrack_free: features = 0x%x, conntrack=%p\n", features,
806 conntrack);
807 kmem_cache_free(nf_ct_cache[features].cachep, conntrack);
808 atomic_dec(&nf_conntrack_count);
809}
810
811/* Allocate a new conntrack: we return -ENOMEM if classification
812 failed due to stress. Otherwise it really is unclassifiable. */
813static struct nf_conntrack_tuple_hash *
814init_conntrack(const struct nf_conntrack_tuple *tuple,
815 struct nf_conntrack_l3proto *l3proto,
816 struct nf_conntrack_protocol *protocol,
817 struct sk_buff *skb,
818 unsigned int dataoff)
819{
820 struct nf_conn *conntrack;
821 struct nf_conntrack_tuple repl_tuple;
822 struct nf_conntrack_expect *exp;
823
824 if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, protocol)) {
825 DEBUGP("Can't invert tuple.\n");
826 return NULL;
827 }
828
829 conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto);
830 if (conntrack == NULL || IS_ERR(conntrack)) {
831 DEBUGP("Can't allocate conntrack.\n");
832 return (struct nf_conntrack_tuple_hash *)conntrack;
833 }
834
835 if (!protocol->new(conntrack, skb, dataoff)) {
836 nf_conntrack_free(conntrack);
837 DEBUGP("init conntrack: can't track with proto module\n");
838 return NULL;
839 }
840
841 write_lock_bh(&nf_conntrack_lock);
842 exp = find_expectation(tuple);
843
844 if (exp) {
845 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
846 conntrack, exp);
847 /* Welcome, Mr. Bond. We've been expecting you... */
848 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
849 conntrack->master = exp->master;
850#ifdef CONFIG_NF_CONNTRACK_MARK
851 conntrack->mark = exp->master->mark;
852#endif
853 nf_conntrack_get(&conntrack->master->ct_general);
854 NF_CT_STAT_INC(expect_new);
855 } else {
856 conntrack->helper = nf_ct_find_helper(&repl_tuple);
857
858 NF_CT_STAT_INC(new);
859 }
860
861 /* Overload tuple linked list to put us in unconfirmed list. */
862 list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
863
864 write_unlock_bh(&nf_conntrack_lock);
865
866 if (exp) {
867 if (exp->expectfn)
868 exp->expectfn(conntrack, exp);
869 nf_conntrack_expect_put(exp);
870 }
871
872 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
873}
874
875/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
876static inline struct nf_conn *
877resolve_normal_ct(struct sk_buff *skb,
878 unsigned int dataoff,
879 u_int16_t l3num,
880 u_int8_t protonum,
881 struct nf_conntrack_l3proto *l3proto,
882 struct nf_conntrack_protocol *proto,
883 int *set_reply,
884 enum ip_conntrack_info *ctinfo)
885{
886 struct nf_conntrack_tuple tuple;
887 struct nf_conntrack_tuple_hash *h;
888 struct nf_conn *ct;
889
890 if (!nf_ct_get_tuple(skb, (unsigned int)(skb->nh.raw - skb->data),
891 dataoff, l3num, protonum, &tuple, l3proto,
892 proto)) {
893 DEBUGP("resolve_normal_ct: Can't get tuple\n");
894 return NULL;
895 }
896
897 /* look for tuple match */
898 h = nf_conntrack_find_get(&tuple, NULL);
899 if (!h) {
900 h = init_conntrack(&tuple, l3proto, proto, skb, dataoff);
901 if (!h)
902 return NULL;
903 if (IS_ERR(h))
904 return (void *)h;
905 }
906 ct = nf_ct_tuplehash_to_ctrack(h);
907
908 /* It exists; we have (non-exclusive) reference. */
909 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
910 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
911 /* Please set reply bit if this packet OK */
912 *set_reply = 1;
913 } else {
914 /* Once we've had two way comms, always ESTABLISHED. */
915 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
916 DEBUGP("nf_conntrack_in: normal packet for %p\n", ct);
917 *ctinfo = IP_CT_ESTABLISHED;
918 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
919 DEBUGP("nf_conntrack_in: related packet for %p\n", ct);
920 *ctinfo = IP_CT_RELATED;
921 } else {
922 DEBUGP("nf_conntrack_in: new packet for %p\n", ct);
923 *ctinfo = IP_CT_NEW;
924 }
925 *set_reply = 0;
926 }
927 skb->nfct = &ct->ct_general;
928 skb->nfctinfo = *ctinfo;
929 return ct;
930}
931
932unsigned int
933nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
934{
935 struct nf_conn *ct;
936 enum ip_conntrack_info ctinfo;
937 struct nf_conntrack_l3proto *l3proto;
938 struct nf_conntrack_protocol *proto;
939 unsigned int dataoff;
940 u_int8_t protonum;
941 int set_reply = 0;
942 int ret;
943
944 /* Previously seen (loopback or untracked)? Ignore. */
945 if ((*pskb)->nfct) {
946 NF_CT_STAT_INC(ignore);
947 return NF_ACCEPT;
948 }
949
950 l3proto = nf_ct_find_l3proto((u_int16_t)pf);
951 if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) {
952 DEBUGP("not prepared to track yet or error occured\n");
953 return -ret;
954 }
955
956 proto = nf_ct_find_proto((u_int16_t)pf, protonum);
957
958 /* It may be an special packet, error, unclean...
959 * inverse of the return code tells to the netfilter
960 * core what to do with the packet. */
961 if (proto->error != NULL &&
962 (ret = proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
963 NF_CT_STAT_INC(error);
964 NF_CT_STAT_INC(invalid);
965 return -ret;
966 }
967
968 ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, proto,
969 &set_reply, &ctinfo);
970 if (!ct) {
971 /* Not valid part of a connection */
972 NF_CT_STAT_INC(invalid);
973 return NF_ACCEPT;
974 }
975
976 if (IS_ERR(ct)) {
977 /* Too stressed to deal. */
978 NF_CT_STAT_INC(drop);
979 return NF_DROP;
980 }
981
982 NF_CT_ASSERT((*pskb)->nfct);
983
984 ret = proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum);
985 if (ret < 0) {
986 /* Invalid: inverse of the return code tells
987 * the netfilter core what to do */
988 DEBUGP("nf_conntrack_in: Can't track with proto module\n");
989 nf_conntrack_put((*pskb)->nfct);
990 (*pskb)->nfct = NULL;
991 NF_CT_STAT_INC(invalid);
992 return -ret;
993 }
994
995 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
996 nf_conntrack_event_cache(IPCT_STATUS, *pskb);
997
998 return ret;
999}
1000
1001int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
1002 const struct nf_conntrack_tuple *orig)
1003{
1004 return nf_ct_invert_tuple(inverse, orig,
1005 nf_ct_find_l3proto(orig->src.l3num),
1006 nf_ct_find_proto(orig->src.l3num,
1007 orig->dst.protonum));
1008}
1009
1010/* Would two expected things clash? */
1011static inline int expect_clash(const struct nf_conntrack_expect *a,
1012 const struct nf_conntrack_expect *b)
1013{
1014 /* Part covered by intersection of masks must be unequal,
1015 otherwise they clash */
1016 struct nf_conntrack_tuple intersect_mask;
1017 int count;
1018
1019 intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num;
1020 intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
1021 intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all;
1022 intersect_mask.dst.protonum = a->mask.dst.protonum
1023 & b->mask.dst.protonum;
1024
1025 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
1026 intersect_mask.src.u3.all[count] =
1027 a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
1028 }
1029
1030 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
1031 intersect_mask.dst.u3.all[count] =
1032 a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count];
1033 }
1034
1035 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
1036}
1037
1038static inline int expect_matches(const struct nf_conntrack_expect *a,
1039 const struct nf_conntrack_expect *b)
1040{
1041 return a->master == b->master
1042 && nf_ct_tuple_equal(&a->tuple, &b->tuple)
1043 && nf_ct_tuple_equal(&a->mask, &b->mask);
1044}
1045
1046/* Generally a bad idea to call this: could have matched already. */
1047void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp)
1048{
1049 struct nf_conntrack_expect *i;
1050
1051 write_lock_bh(&nf_conntrack_lock);
1052 /* choose the the oldest expectation to evict */
1053 list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
1054 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
1055 nf_ct_unlink_expect(i);
1056 write_unlock_bh(&nf_conntrack_lock);
1057 nf_conntrack_expect_put(i);
1058 return;
1059 }
1060 }
1061 write_unlock_bh(&nf_conntrack_lock);
1062}
1063
1064/* We don't increase the master conntrack refcount for non-fulfilled
1065 * conntracks. During the conntrack destruction, the expectations are
1066 * always killed before the conntrack itself */
1067struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me)
1068{
1069 struct nf_conntrack_expect *new;
1070
1071 new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC);
1072 if (!new) {
1073 DEBUGP("expect_related: OOM allocating expect\n");
1074 return NULL;
1075 }
1076 new->master = me;
1077 atomic_set(&new->use, 1);
1078 return new;
1079}
1080
1081void nf_conntrack_expect_put(struct nf_conntrack_expect *exp)
1082{
1083 if (atomic_dec_and_test(&exp->use))
1084 kmem_cache_free(nf_conntrack_expect_cachep, exp);
1085}
1086
1087static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
1088{
1089 atomic_inc(&exp->use);
1090 exp->master->expecting++;
1091 list_add(&exp->list, &nf_conntrack_expect_list);
1092
1093 init_timer(&exp->timeout);
1094 exp->timeout.data = (unsigned long)exp;
1095 exp->timeout.function = expectation_timed_out;
1096 exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
1097 add_timer(&exp->timeout);
1098
1099 atomic_inc(&exp->use);
1100 NF_CT_STAT_INC(expect_create);
1101}
1102
1103/* Race with expectations being used means we could have none to find; OK. */
1104static void evict_oldest_expect(struct nf_conn *master)
1105{
1106 struct nf_conntrack_expect *i;
1107
1108 list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
1109 if (i->master == master) {
1110 if (del_timer(&i->timeout)) {
1111 nf_ct_unlink_expect(i);
1112 nf_conntrack_expect_put(i);
1113 }
1114 break;
1115 }
1116 }
1117}
1118
1119static inline int refresh_timer(struct nf_conntrack_expect *i)
1120{
1121 if (!del_timer(&i->timeout))
1122 return 0;
1123
1124 i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
1125 add_timer(&i->timeout);
1126 return 1;
1127}
1128
1129int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
1130{
1131 struct nf_conntrack_expect *i;
1132 int ret;
1133
1134 DEBUGP("nf_conntrack_expect_related %p\n", related_to);
1135 DEBUGP("tuple: "); NF_CT_DUMP_TUPLE(&expect->tuple);
1136 DEBUGP("mask: "); NF_CT_DUMP_TUPLE(&expect->mask);
1137
1138 write_lock_bh(&nf_conntrack_lock);
1139 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
1140 if (expect_matches(i, expect)) {
1141 /* Refresh timer: if it's dying, ignore.. */
1142 if (refresh_timer(i)) {
1143 ret = 0;
1144 goto out;
1145 }
1146 } else if (expect_clash(i, expect)) {
1147 ret = -EBUSY;
1148 goto out;
1149 }
1150 }
1151 /* Will be over limit? */
1152 if (expect->master->helper->max_expected &&
1153 expect->master->expecting >= expect->master->helper->max_expected)
1154 evict_oldest_expect(expect->master);
1155
1156 nf_conntrack_expect_insert(expect);
1157 nf_conntrack_expect_event(IPEXP_NEW, expect);
1158 ret = 0;
1159out:
1160 write_unlock_bh(&nf_conntrack_lock);
1161 return ret;
1162}
1163
1164/* Alter reply tuple (maybe alter helper). This is for NAT, and is
1165 implicitly racy: see __nf_conntrack_confirm */
1166void nf_conntrack_alter_reply(struct nf_conn *conntrack,
1167 const struct nf_conntrack_tuple *newreply)
1168{
1169 write_lock_bh(&nf_conntrack_lock);
1170 /* Should be unconfirmed, so not in hash table yet */
1171 NF_CT_ASSERT(!nf_ct_is_confirmed(conntrack));
1172
1173 DEBUGP("Altering reply tuple of %p to ", conntrack);
1174 NF_CT_DUMP_TUPLE(newreply);
1175
1176 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
1177 if (!conntrack->master && conntrack->expecting == 0)
1178 conntrack->helper = nf_ct_find_helper(newreply);
1179 write_unlock_bh(&nf_conntrack_lock);
1180}
1181
1182int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
1183{
1184 int ret;
1185 BUG_ON(me->timeout == 0);
1186
1187 ret = nf_conntrack_register_cache(NF_CT_F_HELP, "nf_conntrack:help",
1188 sizeof(struct nf_conn)
1189 + sizeof(union nf_conntrack_help)
1190 + __alignof__(union nf_conntrack_help),
1191 init_conntrack_for_helper);
1192 if (ret < 0) {
1193 printk(KERN_ERR "nf_conntrack_helper_reigster: Unable to create slab cache for conntracks\n");
1194 return ret;
1195 }
1196 write_lock_bh(&nf_conntrack_lock);
1197 list_prepend(&helpers, me);
1198 write_unlock_bh(&nf_conntrack_lock);
1199
1200 return 0;
1201}
1202
1203static inline int unhelp(struct nf_conntrack_tuple_hash *i,
1204 const struct nf_conntrack_helper *me)
1205{
1206 if (nf_ct_tuplehash_to_ctrack(i)->helper == me) {
1207 nf_conntrack_event(IPCT_HELPER, nf_ct_tuplehash_to_ctrack(i));
1208 nf_ct_tuplehash_to_ctrack(i)->helper = NULL;
1209 }
1210 return 0;
1211}
1212
1213void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
1214{
1215 unsigned int i;
1216 struct nf_conntrack_expect *exp, *tmp;
1217
1218 /* Need write lock here, to delete helper. */
1219 write_lock_bh(&nf_conntrack_lock);
1220 LIST_DELETE(&helpers, me);
1221
1222 /* Get rid of expectations */
1223 list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) {
1224 if (exp->master->helper == me && del_timer(&exp->timeout)) {
1225 nf_ct_unlink_expect(exp);
1226 nf_conntrack_expect_put(exp);
1227 }
1228 }
1229
1230 /* Get rid of expecteds, set helpers to NULL. */
1231 LIST_FIND_W(&unconfirmed, unhelp, struct nf_conntrack_tuple_hash*, me);
1232 for (i = 0; i < nf_conntrack_htable_size; i++)
1233 LIST_FIND_W(&nf_conntrack_hash[i], unhelp,
1234 struct nf_conntrack_tuple_hash *, me);
1235 write_unlock_bh(&nf_conntrack_lock);
1236
1237 /* Someone could be still looking at the helper in a bh. */
1238 synchronize_net();
1239}
1240
1241/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
1242void __nf_ct_refresh_acct(struct nf_conn *ct,
1243 enum ip_conntrack_info ctinfo,
1244 const struct sk_buff *skb,
1245 unsigned long extra_jiffies,
1246 int do_acct)
1247{
1248 int event = 0;
1249
1250 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
1251 NF_CT_ASSERT(skb);
1252
1253 write_lock_bh(&nf_conntrack_lock);
1254
1255 /* If not in hash table, timer will not be active yet */
1256 if (!nf_ct_is_confirmed(ct)) {
1257 ct->timeout.expires = extra_jiffies;
1258 event = IPCT_REFRESH;
1259 } else {
1260 /* Need del_timer for race avoidance (may already be dying). */
1261 if (del_timer(&ct->timeout)) {
1262 ct->timeout.expires = jiffies + extra_jiffies;
1263 add_timer(&ct->timeout);
1264 event = IPCT_REFRESH;
1265 }
1266 }
1267
1268#ifdef CONFIG_NF_CT_ACCT
1269 if (do_acct) {
1270 ct->counters[CTINFO2DIR(ctinfo)].packets++;
1271 ct->counters[CTINFO2DIR(ctinfo)].bytes +=
1272 skb->len - (unsigned int)(skb->nh.raw - skb->data);
1273 if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
1274 || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
1275 event |= IPCT_COUNTER_FILLING;
1276 }
1277#endif
1278
1279 write_unlock_bh(&nf_conntrack_lock);
1280
1281 /* must be unlocked when calling event cache */
1282 if (event)
1283 nf_conntrack_event_cache(event, skb);
1284}
1285
1286/* Used by ipt_REJECT and ip6t_REJECT. */
1287void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
1288{
1289 struct nf_conn *ct;
1290 enum ip_conntrack_info ctinfo;
1291
1292 /* This ICMP is in reverse direction to the packet which caused it */
1293 ct = nf_ct_get(skb, &ctinfo);
1294 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1295 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
1296 else
1297 ctinfo = IP_CT_RELATED;
1298
1299 /* Attach to new skbuff, and increment count */
1300 nskb->nfct = &ct->ct_general;
1301 nskb->nfctinfo = ctinfo;
1302 nf_conntrack_get(nskb->nfct);
1303}
1304
1305static inline int
1306do_iter(const struct nf_conntrack_tuple_hash *i,
1307 int (*iter)(struct nf_conn *i, void *data),
1308 void *data)
1309{
1310 return iter(nf_ct_tuplehash_to_ctrack(i), data);
1311}
1312
1313/* Bring out ya dead! */
1314static struct nf_conntrack_tuple_hash *
1315get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
1316 void *data, unsigned int *bucket)
1317{
1318 struct nf_conntrack_tuple_hash *h = NULL;
1319
1320 write_lock_bh(&nf_conntrack_lock);
1321 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
1322 h = LIST_FIND_W(&nf_conntrack_hash[*bucket], do_iter,
1323 struct nf_conntrack_tuple_hash *, iter, data);
1324 if (h)
1325 break;
1326 }
1327 if (!h)
1328 h = LIST_FIND_W(&unconfirmed, do_iter,
1329 struct nf_conntrack_tuple_hash *, iter, data);
1330 if (h)
1331 atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
1332 write_unlock_bh(&nf_conntrack_lock);
1333
1334 return h;
1335}
1336
1337void
1338nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data)
1339{
1340 struct nf_conntrack_tuple_hash *h;
1341 unsigned int bucket = 0;
1342
1343 while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
1344 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
1345 /* Time to push up daises... */
1346 if (del_timer(&ct->timeout))
1347 death_by_timeout((unsigned long)ct);
1348 /* ... else the timer will get him soon. */
1349
1350 nf_ct_put(ct);
1351 }
1352}
1353
1354static int kill_all(struct nf_conn *i, void *data)
1355{
1356 return 1;
1357}
1358
1359static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size)
1360{
1361 if (vmalloced)
1362 vfree(hash);
1363 else
1364 free_pages((unsigned long)hash,
1365 get_order(sizeof(struct list_head) * size));
1366}
1367
1368/* Mishearing the voices in his head, our hero wonders how he's
1369 supposed to kill the mall. */
1370void nf_conntrack_cleanup(void)
1371{
1372 int i;
1373
1374 /* This makes sure all current packets have passed through
1375 netfilter framework. Roll on, two-stage module
1376 delete... */
1377 synchronize_net();
1378
1379 nf_ct_event_cache_flush();
1380 i_see_dead_people:
1381 nf_ct_iterate_cleanup(kill_all, NULL);
1382 if (atomic_read(&nf_conntrack_count) != 0) {
1383 schedule();
1384 goto i_see_dead_people;
1385 }
1386
1387 for (i = 0; i < NF_CT_F_NUM; i++) {
1388 if (nf_ct_cache[i].use == 0)
1389 continue;
1390
1391 NF_CT_ASSERT(nf_ct_cache[i].use == 1);
1392 nf_ct_cache[i].use = 1;
1393 nf_conntrack_unregister_cache(i);
1394 }
1395 kmem_cache_destroy(nf_conntrack_expect_cachep);
1396 free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
1397 nf_conntrack_htable_size);
1398}
1399
1400static struct list_head *alloc_hashtable(int size, int *vmalloced)
1401{
1402 struct list_head *hash;
1403 unsigned int i;
1404
1405 *vmalloced = 0;
1406 hash = (void*)__get_free_pages(GFP_KERNEL,
1407 get_order(sizeof(struct list_head)
1408 * size));
1409 if (!hash) {
1410 *vmalloced = 1;
1411 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
1412 hash = vmalloc(sizeof(struct list_head) * size);
1413 }
1414
1415 if (hash)
1416 for (i = 0; i < size; i++)
1417 INIT_LIST_HEAD(&hash[i]);
1418
1419 return hash;
1420}
1421
1422int set_hashsize(const char *val, struct kernel_param *kp)
1423{
1424 int i, bucket, hashsize, vmalloced;
1425 int old_vmalloced, old_size;
1426 int rnd;
1427 struct list_head *hash, *old_hash;
1428 struct nf_conntrack_tuple_hash *h;
1429
1430 /* On boot, we can set this without any fancy locking. */
1431 if (!nf_conntrack_htable_size)
1432 return param_set_uint(val, kp);
1433
1434 hashsize = simple_strtol(val, NULL, 0);
1435 if (!hashsize)
1436 return -EINVAL;
1437
1438 hash = alloc_hashtable(hashsize, &vmalloced);
1439 if (!hash)
1440 return -ENOMEM;
1441
1442 /* We have to rehahs for the new table anyway, so we also can
1443 * use a newrandom seed */
1444 get_random_bytes(&rnd, 4);
1445
1446 write_lock_bh(&nf_conntrack_lock);
1447 for (i = 0; i < nf_conntrack_htable_size; i++) {
1448 while (!list_empty(&nf_conntrack_hash[i])) {
1449 h = list_entry(nf_conntrack_hash[i].next,
1450 struct nf_conntrack_tuple_hash, list);
1451 list_del(&h->list);
1452 bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
1453 list_add_tail(&h->list, &hash[bucket]);
1454 }
1455 }
1456 old_size = nf_conntrack_htable_size;
1457 old_vmalloced = nf_conntrack_vmalloc;
1458 old_hash = nf_conntrack_hash;
1459
1460 nf_conntrack_htable_size = hashsize;
1461 nf_conntrack_vmalloc = vmalloced;
1462 nf_conntrack_hash = hash;
1463 nf_conntrack_hash_rnd = rnd;
1464 write_unlock_bh(&nf_conntrack_lock);
1465
1466 free_conntrack_hash(old_hash, old_vmalloced, old_size);
1467 return 0;
1468}
1469
1470module_param_call(hashsize, set_hashsize, param_get_uint,
1471 &nf_conntrack_htable_size, 0600);
1472
1473int __init nf_conntrack_init(void)
1474{
1475 unsigned int i;
1476 int ret;
1477
1478 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1479 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1480 if (!nf_conntrack_htable_size) {
1481 nf_conntrack_htable_size
1482 = (((num_physpages << PAGE_SHIFT) / 16384)
1483 / sizeof(struct list_head));
1484 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1485 nf_conntrack_htable_size = 8192;
1486 if (nf_conntrack_htable_size < 16)
1487 nf_conntrack_htable_size = 16;
1488 }
1489 nf_conntrack_max = 8 * nf_conntrack_htable_size;
1490
1491 printk("nf_conntrack version %s (%u buckets, %d max)\n",
1492 NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
1493 nf_conntrack_max);
1494
1495 nf_conntrack_hash = alloc_hashtable(nf_conntrack_htable_size,
1496 &nf_conntrack_vmalloc);
1497 if (!nf_conntrack_hash) {
1498 printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
1499 goto err_out;
1500 }
1501
1502 ret = nf_conntrack_register_cache(NF_CT_F_BASIC, "nf_conntrack:basic",
1503 sizeof(struct nf_conn), NULL);
1504 if (ret < 0) {
1505 printk(KERN_ERR "Unable to create nf_conn slab cache\n");
1506 goto err_free_hash;
1507 }
1508
1509 nf_conntrack_expect_cachep = kmem_cache_create("nf_conntrack_expect",
1510 sizeof(struct nf_conntrack_expect),
1511 0, 0, NULL, NULL);
1512 if (!nf_conntrack_expect_cachep) {
1513 printk(KERN_ERR "Unable to create nf_expect slab cache\n");
1514 goto err_free_conntrack_slab;
1515 }
1516
1517 /* Don't NEED lock here, but good form anyway. */
1518 write_lock_bh(&nf_conntrack_lock);
1519 for (i = 0; i < PF_MAX; i++)
1520 nf_ct_l3protos[i] = &nf_conntrack_generic_l3proto;
1521 write_unlock_bh(&nf_conntrack_lock);
1522
1523 /* Set up fake conntrack:
1524 - to never be deleted, not in any hashes */
1525 atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
1526 /* - and look it like as a confirmed connection */
1527 set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
1528
1529 return ret;
1530
1531err_free_conntrack_slab:
1532 nf_conntrack_unregister_cache(NF_CT_F_BASIC);
1533err_free_hash:
1534 free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
1535 nf_conntrack_htable_size);
1536err_out:
1537 return -ENOMEM;
1538}
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
new file mode 100644
index 000000000000..65080e269f27
--- /dev/null
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -0,0 +1,698 @@
1/* FTP extension for connection tracking. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
12 * - enable working with Layer 3 protocol independent connection tracking.
13 * - track EPRT and EPSV commands with IPv6 address.
14 *
15 * Derived from net/ipv4/netfilter/ip_conntrack_ftp.c
16 */
17
18#include <linux/config.h>
19#include <linux/module.h>
20#include <linux/moduleparam.h>
21#include <linux/netfilter.h>
22#include <linux/ip.h>
23#include <linux/ipv6.h>
24#include <linux/ctype.h>
25#include <net/checksum.h>
26#include <net/tcp.h>
27
28#include <net/netfilter/nf_conntrack.h>
29#include <net/netfilter/nf_conntrack_helper.h>
30#include <linux/netfilter/nf_conntrack_ftp.h>
31
32MODULE_LICENSE("GPL");
33MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
34MODULE_DESCRIPTION("ftp connection tracking helper");
35
36/* This is slow, but it's simple. --RR */
37static char *ftp_buffer;
38
39static DEFINE_SPINLOCK(nf_ftp_lock);
40
41#define MAX_PORTS 8
42static u_int16_t ports[MAX_PORTS];
43static unsigned int ports_c;
44module_param_array(ports, ushort, &ports_c, 0400);
45
46static int loose;
47module_param(loose, int, 0600);
48
49unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb,
50 enum ip_conntrack_info ctinfo,
51 enum ip_ct_ftp_type type,
52 unsigned int matchoff,
53 unsigned int matchlen,
54 struct nf_conntrack_expect *exp,
55 u32 *seq);
56EXPORT_SYMBOL_GPL(nf_nat_ftp_hook);
57
58#if 0
59#define DEBUGP printk
60#else
61#define DEBUGP(format, args...)
62#endif
63
64static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, char);
65static int try_eprt(const char *, size_t, struct nf_conntrack_man *, char);
66static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *,
67 char);
68
69static struct ftp_search {
70 enum ip_conntrack_dir dir;
71 const char *pattern;
72 size_t plen;
73 char skip;
74 char term;
75 enum ip_ct_ftp_type ftptype;
76 int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char);
77} search[] = {
78 {
79 IP_CT_DIR_ORIGINAL,
80 "PORT", sizeof("PORT") - 1, ' ', '\r',
81 IP_CT_FTP_PORT,
82 try_rfc959,
83 },
84 {
85 IP_CT_DIR_REPLY,
86 "227 ", sizeof("227 ") - 1, '(', ')',
87 IP_CT_FTP_PASV,
88 try_rfc959,
89 },
90 {
91 IP_CT_DIR_ORIGINAL,
92 "EPRT", sizeof("EPRT") - 1, ' ', '\r',
93 IP_CT_FTP_EPRT,
94 try_eprt,
95 },
96 {
97 IP_CT_DIR_REPLY,
98 "229 ", sizeof("229 ") - 1, '(', ')',
99 IP_CT_FTP_EPSV,
100 try_epsv_response,
101 },
102};
103
104/* This code is based on inet_pton() in glibc-2.2.4 */
105static int
106get_ipv6_addr(const char *src, size_t dlen, struct in6_addr *dst, u_int8_t term)
107{
108 static const char xdigits[] = "0123456789abcdef";
109 u_int8_t tmp[16], *tp, *endp, *colonp;
110 int ch, saw_xdigit;
111 u_int32_t val;
112 size_t clen = 0;
113
114 tp = memset(tmp, '\0', sizeof(tmp));
115 endp = tp + sizeof(tmp);
116 colonp = NULL;
117
118 /* Leading :: requires some special handling. */
119 if (*src == ':'){
120 if (*++src != ':') {
121 DEBUGP("invalid \":\" at the head of addr\n");
122 return 0;
123 }
124 clen++;
125 }
126
127 saw_xdigit = 0;
128 val = 0;
129 while ((clen < dlen) && (*src != term)) {
130 const char *pch;
131
132 ch = tolower(*src++);
133 clen++;
134
135 pch = strchr(xdigits, ch);
136 if (pch != NULL) {
137 val <<= 4;
138 val |= (pch - xdigits);
139 if (val > 0xffff)
140 return 0;
141
142 saw_xdigit = 1;
143 continue;
144 }
145 if (ch != ':') {
146 DEBUGP("get_ipv6_addr: invalid char. \'%c\'\n", ch);
147 return 0;
148 }
149
150 if (!saw_xdigit) {
151 if (colonp) {
152 DEBUGP("invalid location of \"::\".\n");
153 return 0;
154 }
155 colonp = tp;
156 continue;
157 } else if (*src == term) {
158 DEBUGP("trancated IPv6 addr\n");
159 return 0;
160 }
161
162 if (tp + 2 > endp)
163 return 0;
164 *tp++ = (u_int8_t) (val >> 8) & 0xff;
165 *tp++ = (u_int8_t) val & 0xff;
166
167 saw_xdigit = 0;
168 val = 0;
169 continue;
170 }
171 if (saw_xdigit) {
172 if (tp + 2 > endp)
173 return 0;
174 *tp++ = (u_int8_t) (val >> 8) & 0xff;
175 *tp++ = (u_int8_t) val & 0xff;
176 }
177 if (colonp != NULL) {
178 /*
179 * Since some memmove()'s erroneously fail to handle
180 * overlapping regions, we'll do the shift by hand.
181 */
182 const int n = tp - colonp;
183 int i;
184
185 if (tp == endp)
186 return 0;
187
188 for (i = 1; i <= n; i++) {
189 endp[- i] = colonp[n - i];
190 colonp[n - i] = 0;
191 }
192 tp = endp;
193 }
194 if (tp != endp || (*src != term))
195 return 0;
196
197 memcpy(dst->s6_addr, tmp, sizeof(dst->s6_addr));
198 return clen;
199}
200
201static int try_number(const char *data, size_t dlen, u_int32_t array[],
202 int array_size, char sep, char term)
203{
204 u_int32_t i, len;
205
206 memset(array, 0, sizeof(array[0])*array_size);
207
208 /* Keep data pointing at next char. */
209 for (i = 0, len = 0; len < dlen && i < array_size; len++, data++) {
210 if (*data >= '0' && *data <= '9') {
211 array[i] = array[i]*10 + *data - '0';
212 }
213 else if (*data == sep)
214 i++;
215 else {
216 /* Unexpected character; true if it's the
217 terminator and we're finished. */
218 if (*data == term && i == array_size - 1)
219 return len;
220
221 DEBUGP("Char %u (got %u nums) `%u' unexpected\n",
222 len, i, *data);
223 return 0;
224 }
225 }
226 DEBUGP("Failed to fill %u numbers separated by %c\n", array_size, sep);
227
228 return 0;
229}
230
231/* Returns 0, or length of numbers: 192,168,1,1,5,6 */
232static int try_rfc959(const char *data, size_t dlen,
233 struct nf_conntrack_man *cmd, char term)
234{
235 int length;
236 u_int32_t array[6];
237
238 length = try_number(data, dlen, array, 6, ',', term);
239 if (length == 0)
240 return 0;
241
242 cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16) |
243 (array[2] << 8) | array[3]);
244 cmd->u.tcp.port = htons((array[4] << 8) | array[5]);
245 return length;
246}
247
248/* Grab port: number up to delimiter */
249static int get_port(const char *data, int start, size_t dlen, char delim,
250 u_int16_t *port)
251{
252 u_int16_t tmp_port = 0;
253 int i;
254
255 for (i = start; i < dlen; i++) {
256 /* Finished? */
257 if (data[i] == delim) {
258 if (tmp_port == 0)
259 break;
260 *port = htons(tmp_port);
261 DEBUGP("get_port: return %d\n", tmp_port);
262 return i + 1;
263 }
264 else if (data[i] >= '0' && data[i] <= '9')
265 tmp_port = tmp_port*10 + data[i] - '0';
266 else { /* Some other crap */
267 DEBUGP("get_port: invalid char.\n");
268 break;
269 }
270 }
271 return 0;
272}
273
274/* Returns 0, or length of numbers: |1|132.235.1.2|6275| or |2|3ffe::1|6275| */
275static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd,
276 char term)
277{
278 char delim;
279 int length;
280
281 /* First character is delimiter, then "1" for IPv4 or "2" for IPv6,
282 then delimiter again. */
283 if (dlen <= 3) {
284 DEBUGP("EPRT: too short\n");
285 return 0;
286 }
287 delim = data[0];
288 if (isdigit(delim) || delim < 33 || delim > 126 || data[2] != delim) {
289 DEBUGP("try_eprt: invalid delimitter.\n");
290 return 0;
291 }
292
293 if ((cmd->l3num == PF_INET && data[1] != '1') ||
294 (cmd->l3num == PF_INET6 && data[1] != '2')) {
295 DEBUGP("EPRT: invalid protocol number.\n");
296 return 0;
297 }
298
299 DEBUGP("EPRT: Got %c%c%c\n", delim, data[1], delim);
300
301 if (data[1] == '1') {
302 u_int32_t array[4];
303
304 /* Now we have IP address. */
305 length = try_number(data + 3, dlen - 3, array, 4, '.', delim);
306 if (length != 0)
307 cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16)
308 | (array[2] << 8) | array[3]);
309 } else {
310 /* Now we have IPv6 address. */
311 length = get_ipv6_addr(data + 3, dlen - 3,
312 (struct in6_addr *)cmd->u3.ip6, delim);
313 }
314
315 if (length == 0)
316 return 0;
317 DEBUGP("EPRT: Got IP address!\n");
318 /* Start offset includes initial "|1|", and trailing delimiter */
319 return get_port(data, 3 + length + 1, dlen, delim, &cmd->u.tcp.port);
320}
321
322/* Returns 0, or length of numbers: |||6446| */
323static int try_epsv_response(const char *data, size_t dlen,
324 struct nf_conntrack_man *cmd, char term)
325{
326 char delim;
327
328 /* Three delimiters. */
329 if (dlen <= 3) return 0;
330 delim = data[0];
331 if (isdigit(delim) || delim < 33 || delim > 126
332 || data[1] != delim || data[2] != delim)
333 return 0;
334
335 return get_port(data, 3, dlen, delim, &cmd->u.tcp.port);
336}
337
338/* Return 1 for match, 0 for accept, -1 for partial. */
339static int find_pattern(const char *data, size_t dlen,
340 const char *pattern, size_t plen,
341 char skip, char term,
342 unsigned int *numoff,
343 unsigned int *numlen,
344 struct nf_conntrack_man *cmd,
345 int (*getnum)(const char *, size_t,
346 struct nf_conntrack_man *, char))
347{
348 size_t i;
349
350 DEBUGP("find_pattern `%s': dlen = %u\n", pattern, dlen);
351 if (dlen == 0)
352 return 0;
353
354 if (dlen <= plen) {
355 /* Short packet: try for partial? */
356 if (strnicmp(data, pattern, dlen) == 0)
357 return -1;
358 else return 0;
359 }
360
361 if (strnicmp(data, pattern, plen) != 0) {
362#if 0
363 size_t i;
364
365 DEBUGP("ftp: string mismatch\n");
366 for (i = 0; i < plen; i++) {
367 DEBUGP("ftp:char %u `%c'(%u) vs `%c'(%u)\n",
368 i, data[i], data[i],
369 pattern[i], pattern[i]);
370 }
371#endif
372 return 0;
373 }
374
375 DEBUGP("Pattern matches!\n");
376 /* Now we've found the constant string, try to skip
377 to the 'skip' character */
378 for (i = plen; data[i] != skip; i++)
379 if (i == dlen - 1) return -1;
380
381 /* Skip over the last character */
382 i++;
383
384 DEBUGP("Skipped up to `%c'!\n", skip);
385
386 *numoff = i;
387 *numlen = getnum(data + i, dlen - i, cmd, term);
388 if (!*numlen)
389 return -1;
390
391 DEBUGP("Match succeeded!\n");
392 return 1;
393}
394
395/* Look up to see if we're just after a \n. */
396static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir)
397{
398 unsigned int i;
399
400 for (i = 0; i < info->seq_aft_nl_num[dir]; i++)
401 if (info->seq_aft_nl[dir][i] == seq)
402 return 1;
403 return 0;
404}
405
406/* We don't update if it's older than what we have. */
407static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir,
408 struct sk_buff *skb)
409{
410 unsigned int i, oldest = NUM_SEQ_TO_REMEMBER;
411
412 /* Look for oldest: if we find exact match, we're done. */
413 for (i = 0; i < info->seq_aft_nl_num[dir]; i++) {
414 if (info->seq_aft_nl[dir][i] == nl_seq)
415 return;
416
417 if (oldest == info->seq_aft_nl_num[dir]
418 || before(info->seq_aft_nl[dir][i], oldest))
419 oldest = i;
420 }
421
422 if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
423 info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
424 nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
425 } else if (oldest != NUM_SEQ_TO_REMEMBER) {
426 info->seq_aft_nl[dir][oldest] = nl_seq;
427 nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
428 }
429}
430
431static int help(struct sk_buff **pskb,
432 unsigned int protoff,
433 struct nf_conn *ct,
434 enum ip_conntrack_info ctinfo)
435{
436 unsigned int dataoff, datalen;
437 struct tcphdr _tcph, *th;
438 char *fb_ptr;
439 int ret;
440 u32 seq;
441 int dir = CTINFO2DIR(ctinfo);
442 unsigned int matchlen, matchoff;
443 struct ip_ct_ftp_master *ct_ftp_info = &ct->help->ct_ftp_info;
444 struct nf_conntrack_expect *exp;
445 struct nf_conntrack_man cmd = {};
446
447 unsigned int i;
448 int found = 0, ends_in_nl;
449
450 /* Until there's been traffic both ways, don't look in packets. */
451 if (ctinfo != IP_CT_ESTABLISHED
452 && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
453 DEBUGP("ftp: Conntrackinfo = %u\n", ctinfo);
454 return NF_ACCEPT;
455 }
456
457 th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph);
458 if (th == NULL)
459 return NF_ACCEPT;
460
461 dataoff = protoff + th->doff * 4;
462 /* No data? */
463 if (dataoff >= (*pskb)->len) {
464 DEBUGP("ftp: dataoff(%u) >= skblen(%u)\n", dataoff,
465 (*pskb)->len);
466 return NF_ACCEPT;
467 }
468 datalen = (*pskb)->len - dataoff;
469
470 spin_lock_bh(&nf_ftp_lock);
471 fb_ptr = skb_header_pointer(*pskb, dataoff, datalen, ftp_buffer);
472 BUG_ON(fb_ptr == NULL);
473
474 ends_in_nl = (fb_ptr[datalen - 1] == '\n');
475 seq = ntohl(th->seq) + datalen;
476
477 /* Look up to see if we're just after a \n. */
478 if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) {
479 /* Now if this ends in \n, update ftp info. */
480 DEBUGP("nf_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n",
481 ct_ftp_info->seq_aft_nl_num[dir] > 0 ? "" : "(UNSET)",
482 ct_ftp_info->seq_aft_nl[dir][0],
483 ct_ftp_info->seq_aft_nl_num[dir] > 1 ? "" : "(UNSET)",
484 ct_ftp_info->seq_aft_nl[dir][1]);
485 ret = NF_ACCEPT;
486 goto out_update_nl;
487 }
488
489 /* Initialize IP/IPv6 addr to expected address (it's not mentioned
490 in EPSV responses) */
491 cmd.l3num = ct->tuplehash[dir].tuple.src.l3num;
492 memcpy(cmd.u3.all, &ct->tuplehash[dir].tuple.src.u3.all,
493 sizeof(cmd.u3.all));
494
495 for (i = 0; i < ARRAY_SIZE(search); i++) {
496 if (search[i].dir != dir) continue;
497
498 found = find_pattern(fb_ptr, datalen,
499 search[i].pattern,
500 search[i].plen,
501 search[i].skip,
502 search[i].term,
503 &matchoff, &matchlen,
504 &cmd,
505 search[i].getnum);
506 if (found) break;
507 }
508 if (found == -1) {
509 /* We don't usually drop packets. After all, this is
510 connection tracking, not packet filtering.
511 However, it is necessary for accurate tracking in
512 this case. */
513 if (net_ratelimit())
514 printk("conntrack_ftp: partial %s %u+%u\n",
515 search[i].pattern,
516 ntohl(th->seq), datalen);
517 ret = NF_DROP;
518 goto out;
519 } else if (found == 0) { /* No match */
520 ret = NF_ACCEPT;
521 goto out_update_nl;
522 }
523
524 DEBUGP("conntrack_ftp: match `%.*s' (%u bytes at %u)\n",
525 (int)matchlen, fb_ptr + matchoff,
526 matchlen, ntohl(th->seq) + matchoff);
527
528 exp = nf_conntrack_expect_alloc(ct);
529 if (exp == NULL) {
530 ret = NF_DROP;
531 goto out;
532 }
533
534 /* We refer to the reverse direction ("!dir") tuples here,
535 * because we're expecting something in the other direction.
536 * Doesn't matter unless NAT is happening. */
537 exp->tuple.dst.u3 = ct->tuplehash[!dir].tuple.dst.u3;
538
539 /* Update the ftp info */
540 if ((cmd.l3num == ct->tuplehash[dir].tuple.src.l3num) &&
541 memcmp(&cmd.u3.all, &ct->tuplehash[dir].tuple.src.u3.all,
542 sizeof(cmd.u3.all))) {
543 /* Enrico Scholz's passive FTP to partially RNAT'd ftp
544 server: it really wants us to connect to a
545 different IP address. Simply don't record it for
546 NAT. */
547 if (cmd.l3num == PF_INET) {
548 DEBUGP("conntrack_ftp: NOT RECORDING: %u,%u,%u,%u != %u.%u.%u.%u\n",
549 NIPQUAD(cmd.u3.ip),
550 NIPQUAD(ct->tuplehash[dir].tuple.src.u3.ip));
551 } else {
552 DEBUGP("conntrack_ftp: NOT RECORDING: %x:%x:%x:%x:%x:%x:%x:%x != %x:%x:%x:%x:%x:%x:%x:%x\n",
553 NIP6(*((struct in6_addr *)cmd.u3.ip6)),
554 NIP6(*((struct in6_addr *)ct->tuplehash[dir]
555 .tuple.src.u3.ip6)));
556 }
557
558 /* Thanks to Cristiano Lincoln Mattos
559 <lincoln@cesar.org.br> for reporting this potential
560 problem (DMZ machines opening holes to internal
561 networks, or the packet filter itself). */
562 if (!loose) {
563 ret = NF_ACCEPT;
564 goto out_put_expect;
565 }
566 memcpy(&exp->tuple.dst.u3, &cmd.u3.all,
567 sizeof(exp->tuple.dst.u3));
568 }
569
570 exp->tuple.src.u3 = ct->tuplehash[!dir].tuple.src.u3;
571 exp->tuple.src.l3num = cmd.l3num;
572 exp->tuple.src.u.tcp.port = 0;
573 exp->tuple.dst.u.tcp.port = cmd.u.tcp.port;
574 exp->tuple.dst.protonum = IPPROTO_TCP;
575
576 exp->mask = (struct nf_conntrack_tuple)
577 { .src = { .l3num = 0xFFFF,
578 .u = { .tcp = { 0 }},
579 },
580 .dst = { .protonum = 0xFF,
581 .u = { .tcp = { 0xFFFF }},
582 },
583 };
584 if (cmd.l3num == PF_INET) {
585 exp->mask.src.u3.ip = 0xFFFFFFFF;
586 exp->mask.dst.u3.ip = 0xFFFFFFFF;
587 } else {
588 memset(exp->mask.src.u3.ip6, 0xFF,
589 sizeof(exp->mask.src.u3.ip6));
590 memset(exp->mask.dst.u3.ip6, 0xFF,
591 sizeof(exp->mask.src.u3.ip6));
592 }
593
594 exp->expectfn = NULL;
595 exp->flags = 0;
596
597 /* Now, NAT might want to mangle the packet, and register the
598 * (possibly changed) expectation itself. */
599 if (nf_nat_ftp_hook)
600 ret = nf_nat_ftp_hook(pskb, ctinfo, search[i].ftptype,
601 matchoff, matchlen, exp, &seq);
602 else {
603 /* Can't expect this? Best to drop packet now. */
604 if (nf_conntrack_expect_related(exp) != 0)
605 ret = NF_DROP;
606 else
607 ret = NF_ACCEPT;
608 }
609
610out_put_expect:
611 nf_conntrack_expect_put(exp);
612
613out_update_nl:
614 /* Now if this ends in \n, update ftp info. Seq may have been
615 * adjusted by NAT code. */
616 if (ends_in_nl)
617 update_nl_seq(seq, ct_ftp_info, dir, *pskb);
618 out:
619 spin_unlock_bh(&nf_ftp_lock);
620 return ret;
621}
622
623static struct nf_conntrack_helper ftp[MAX_PORTS][2];
624static char ftp_names[MAX_PORTS][2][sizeof("ftp-65535")];
625
626/* don't make this __exit, since it's called from __init ! */
627static void fini(void)
628{
629 int i, j;
630 for (i = 0; i < ports_c; i++) {
631 for (j = 0; j < 2; j++) {
632 if (ftp[i][j].me == NULL)
633 continue;
634
635 DEBUGP("nf_ct_ftp: unregistering helper for pf: %d "
636 "port: %d\n",
637 ftp[i][j].tuple.src.l3num, ports[i]);
638 nf_conntrack_helper_unregister(&ftp[i][j]);
639 }
640 }
641
642 kfree(ftp_buffer);
643}
644
645static int __init init(void)
646{
647 int i, j = -1, ret = 0;
648 char *tmpname;
649
650 ftp_buffer = kmalloc(65536, GFP_KERNEL);
651 if (!ftp_buffer)
652 return -ENOMEM;
653
654 if (ports_c == 0)
655 ports[ports_c++] = FTP_PORT;
656
657 /* FIXME should be configurable whether IPv4 and IPv6 FTP connections
658 are tracked or not - YK */
659 for (i = 0; i < ports_c; i++) {
660 memset(&ftp[i], 0, sizeof(struct nf_conntrack_helper));
661
662 ftp[i][0].tuple.src.l3num = PF_INET;
663 ftp[i][1].tuple.src.l3num = PF_INET6;
664 for (j = 0; j < 2; j++) {
665 ftp[i][j].tuple.src.u.tcp.port = htons(ports[i]);
666 ftp[i][j].tuple.dst.protonum = IPPROTO_TCP;
667 ftp[i][j].mask.src.u.tcp.port = 0xFFFF;
668 ftp[i][j].mask.dst.protonum = 0xFF;
669 ftp[i][j].max_expected = 1;
670 ftp[i][j].timeout = 5 * 60; /* 5 Minutes */
671 ftp[i][j].me = THIS_MODULE;
672 ftp[i][j].help = help;
673 tmpname = &ftp_names[i][j][0];
674 if (ports[i] == FTP_PORT)
675 sprintf(tmpname, "ftp");
676 else
677 sprintf(tmpname, "ftp-%d", ports[i]);
678 ftp[i][j].name = tmpname;
679
680 DEBUGP("nf_ct_ftp: registering helper for pf: %d "
681 "port: %d\n",
682 ftp[i][j].tuple.src.l3num, ports[i]);
683 ret = nf_conntrack_helper_register(&ftp[i][j]);
684 if (ret) {
685 printk("nf_ct_ftp: failed to register helper "
686 " for pf: %d port: %d\n",
687 ftp[i][j].tuple.src.l3num, ports[i]);
688 fini();
689 return ret;
690 }
691 }
692 }
693
694 return 0;
695}
696
697module_init(init);
698module_exit(fini);
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
new file mode 100644
index 000000000000..7de4f06c63c5
--- /dev/null
+++ b/net/netfilter/nf_conntrack_l3proto_generic.c
@@ -0,0 +1,98 @@
1/*
2 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
3 *
4 * Based largely upon the original ip_conntrack code which
5 * had the following copyright information:
6 *
7 * (C) 1999-2001 Paul `Rusty' Russell
8 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 *
14 * Author:
15 * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
16 */
17
18#include <linux/config.h>
19#include <linux/types.h>
20#include <linux/ip.h>
21#include <linux/netfilter.h>
22#include <linux/module.h>
23#include <linux/skbuff.h>
24#include <linux/icmp.h>
25#include <linux/sysctl.h>
26#include <net/ip.h>
27
28#include <linux/netfilter_ipv4.h>
29#include <net/netfilter/nf_conntrack.h>
30#include <net/netfilter/nf_conntrack_protocol.h>
31#include <net/netfilter/nf_conntrack_l3proto.h>
32#include <net/netfilter/nf_conntrack_core.h>
33#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
34
35#if 0
36#define DEBUGP printk
37#else
38#define DEBUGP(format, args...)
39#endif
40
41DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat);
42
43static int generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
44 struct nf_conntrack_tuple *tuple)
45{
46 memset(&tuple->src.u3, 0, sizeof(tuple->src.u3));
47 memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3));
48
49 return 1;
50}
51
52static int generic_invert_tuple(struct nf_conntrack_tuple *tuple,
53 const struct nf_conntrack_tuple *orig)
54{
55 memset(&tuple->src.u3, 0, sizeof(tuple->src.u3));
56 memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3));
57
58 return 1;
59}
60
61static int generic_print_tuple(struct seq_file *s,
62 const struct nf_conntrack_tuple *tuple)
63{
64 return 0;
65}
66
67static int generic_print_conntrack(struct seq_file *s,
68 const struct nf_conn *conntrack)
69{
70 return 0;
71}
72
73static int
74generic_prepare(struct sk_buff **pskb, unsigned int hooknum,
75 unsigned int *dataoff, u_int8_t *protonum)
76{
77 /* Never track !!! */
78 return -NF_ACCEPT;
79}
80
81
82static u_int32_t generic_get_features(const struct nf_conntrack_tuple *tuple)
83
84{
85 return NF_CT_F_BASIC;
86}
87
88struct nf_conntrack_l3proto nf_conntrack_generic_l3proto = {
89 .l3proto = PF_UNSPEC,
90 .name = "unknown",
91 .pkt_to_tuple = generic_pkt_to_tuple,
92 .invert_tuple = generic_invert_tuple,
93 .print_tuple = generic_print_tuple,
94 .print_conntrack = generic_print_conntrack,
95 .prepare = generic_prepare,
96 .get_features = generic_get_features,
97 .me = THIS_MODULE,
98};
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
new file mode 100644
index 000000000000..36425f6c833f
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -0,0 +1,85 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
9 * - enable working with L3 protocol independent connection tracking.
10 *
11 * Derived from net/ipv4/netfilter/ip_conntrack_proto_generic.c
12 */
13
14#include <linux/types.h>
15#include <linux/sched.h>
16#include <linux/timer.h>
17#include <linux/netfilter.h>
18#include <net/netfilter/nf_conntrack_protocol.h>
19
20unsigned long nf_ct_generic_timeout = 600*HZ;
21
22static int generic_pkt_to_tuple(const struct sk_buff *skb,
23 unsigned int dataoff,
24 struct nf_conntrack_tuple *tuple)
25{
26 tuple->src.u.all = 0;
27 tuple->dst.u.all = 0;
28
29 return 1;
30}
31
32static int generic_invert_tuple(struct nf_conntrack_tuple *tuple,
33 const struct nf_conntrack_tuple *orig)
34{
35 tuple->src.u.all = 0;
36 tuple->dst.u.all = 0;
37
38 return 1;
39}
40
41/* Print out the per-protocol part of the tuple. */
42static int generic_print_tuple(struct seq_file *s,
43 const struct nf_conntrack_tuple *tuple)
44{
45 return 0;
46}
47
48/* Print out the private part of the conntrack. */
49static int generic_print_conntrack(struct seq_file *s,
50 const struct nf_conn *state)
51{
52 return 0;
53}
54
55/* Returns verdict for packet, or -1 for invalid. */
56static int packet(struct nf_conn *conntrack,
57 const struct sk_buff *skb,
58 unsigned int dataoff,
59 enum ip_conntrack_info ctinfo,
60 int pf,
61 unsigned int hooknum)
62{
63 nf_ct_refresh_acct(conntrack, ctinfo, skb, nf_ct_generic_timeout);
64 return NF_ACCEPT;
65}
66
67/* Called when a new connection for this protocol found. */
68static int new(struct nf_conn *conntrack, const struct sk_buff *skb,
69 unsigned int dataoff)
70{
71 return 1;
72}
73
74struct nf_conntrack_protocol nf_conntrack_generic_protocol =
75{
76 .l3proto = PF_UNSPEC,
77 .proto = 0,
78 .name = "unknown",
79 .pkt_to_tuple = generic_pkt_to_tuple,
80 .invert_tuple = generic_invert_tuple,
81 .print_tuple = generic_print_tuple,
82 .print_conntrack = generic_print_conntrack,
83 .packet = packet,
84 .new = new,
85};
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
new file mode 100644
index 000000000000..3a600f77b4e0
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -0,0 +1,670 @@
1/*
2 * Connection tracking protocol helper module for SCTP.
3 *
4 * SCTP is defined in RFC 2960. References to various sections in this code
5 * are to this RFC.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * 17 Oct 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
12 * - enable working with L3 protocol independent connection tracking.
13 *
14 * Derived from net/ipv4/ip_conntrack_sctp.c
15 */
16
17/*
18 * Added support for proc manipulation of timeouts.
19 */
20
21#include <linux/types.h>
22#include <linux/sched.h>
23#include <linux/timer.h>
24#include <linux/netfilter.h>
25#include <linux/module.h>
26#include <linux/in.h>
27#include <linux/ip.h>
28#include <linux/sctp.h>
29#include <linux/string.h>
30#include <linux/seq_file.h>
31
32#include <net/netfilter/nf_conntrack.h>
33#include <net/netfilter/nf_conntrack_protocol.h>
34
35#if 0
36#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__)
37#else
38#define DEBUGP(format, args...)
39#endif
40
41/* Protects conntrack->proto.sctp */
42static DEFINE_RWLOCK(sctp_lock);
43
44/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
45 closely. They're more complex. --RR
46
47 And so for me for SCTP :D -Kiran */
48
49static const char *sctp_conntrack_names[] = {
50 "NONE",
51 "CLOSED",
52 "COOKIE_WAIT",
53 "COOKIE_ECHOED",
54 "ESTABLISHED",
55 "SHUTDOWN_SENT",
56 "SHUTDOWN_RECD",
57 "SHUTDOWN_ACK_SENT",
58};
59
60#define SECS * HZ
61#define MINS * 60 SECS
62#define HOURS * 60 MINS
63#define DAYS * 24 HOURS
64
65static unsigned long nf_ct_sctp_timeout_closed = 10 SECS;
66static unsigned long nf_ct_sctp_timeout_cookie_wait = 3 SECS;
67static unsigned long nf_ct_sctp_timeout_cookie_echoed = 3 SECS;
68static unsigned long nf_ct_sctp_timeout_established = 5 DAYS;
69static unsigned long nf_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000;
70static unsigned long nf_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000;
71static unsigned long nf_ct_sctp_timeout_shutdown_ack_sent = 3 SECS;
72
73static unsigned long * sctp_timeouts[]
74= { NULL, /* SCTP_CONNTRACK_NONE */
75 &nf_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */
76 &nf_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */
77 &nf_ct_sctp_timeout_cookie_echoed, /* SCTP_CONNTRACK_COOKIE_ECHOED */
78 &nf_ct_sctp_timeout_established, /* SCTP_CONNTRACK_ESTABLISHED */
79 &nf_ct_sctp_timeout_shutdown_sent, /* SCTP_CONNTRACK_SHUTDOWN_SENT */
80 &nf_ct_sctp_timeout_shutdown_recd, /* SCTP_CONNTRACK_SHUTDOWN_RECD */
81 &nf_ct_sctp_timeout_shutdown_ack_sent /* SCTP_CONNTRACK_SHUTDOWN_ACK_SENT */
82 };
83
84#define sNO SCTP_CONNTRACK_NONE
85#define sCL SCTP_CONNTRACK_CLOSED
86#define sCW SCTP_CONNTRACK_COOKIE_WAIT
87#define sCE SCTP_CONNTRACK_COOKIE_ECHOED
88#define sES SCTP_CONNTRACK_ESTABLISHED
89#define sSS SCTP_CONNTRACK_SHUTDOWN_SENT
90#define sSR SCTP_CONNTRACK_SHUTDOWN_RECD
91#define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
92#define sIV SCTP_CONNTRACK_MAX
93
94/*
95 These are the descriptions of the states:
96
97NOTE: These state names are tantalizingly similar to the states of an
98SCTP endpoint. But the interpretation of the states is a little different,
99considering that these are the states of the connection and not of an end
100point. Please note the subtleties. -Kiran
101
102NONE - Nothing so far.
103COOKIE WAIT - We have seen an INIT chunk in the original direction, or also
104 an INIT_ACK chunk in the reply direction.
105COOKIE ECHOED - We have seen a COOKIE_ECHO chunk in the original direction.
106ESTABLISHED - We have seen a COOKIE_ACK in the reply direction.
107SHUTDOWN_SENT - We have seen a SHUTDOWN chunk in the original direction.
108SHUTDOWN_RECD - We have seen a SHUTDOWN chunk in the reply directoin.
109SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
110 to that of the SHUTDOWN chunk.
111CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
112 the SHUTDOWN chunk. Connection is closed.
113*/
114
115/* TODO
116 - I have assumed that the first INIT is in the original direction.
117 This messes things when an INIT comes in the reply direction in CLOSED
118 state.
119 - Check the error type in the reply dir before transitioning from
120cookie echoed to closed.
121 - Sec 5.2.4 of RFC 2960
122 - Multi Homing support.
123*/
124
125/* SCTP conntrack state transitions */
126static enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
127 {
128/* ORIGINAL */
129/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
130/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA},
131/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},
132/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
133/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA},
134/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA},
135/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/
136/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */
137/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */
138/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL}
139 },
140 {
141/* REPLY */
142/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
143/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */
144/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},
145/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
146/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA},
147/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA},
148/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA},
149/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */
150/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA},
151/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL}
152 }
153};
154
155static int sctp_pkt_to_tuple(const struct sk_buff *skb,
156 unsigned int dataoff,
157 struct nf_conntrack_tuple *tuple)
158{
159 sctp_sctphdr_t _hdr, *hp;
160
161 DEBUGP(__FUNCTION__);
162 DEBUGP("\n");
163
164 /* Actually only need first 8 bytes. */
165 hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
166 if (hp == NULL)
167 return 0;
168
169 tuple->src.u.sctp.port = hp->source;
170 tuple->dst.u.sctp.port = hp->dest;
171 return 1;
172}
173
174static int sctp_invert_tuple(struct nf_conntrack_tuple *tuple,
175 const struct nf_conntrack_tuple *orig)
176{
177 DEBUGP(__FUNCTION__);
178 DEBUGP("\n");
179
180 tuple->src.u.sctp.port = orig->dst.u.sctp.port;
181 tuple->dst.u.sctp.port = orig->src.u.sctp.port;
182 return 1;
183}
184
185/* Print out the per-protocol part of the tuple. */
186static int sctp_print_tuple(struct seq_file *s,
187 const struct nf_conntrack_tuple *tuple)
188{
189 DEBUGP(__FUNCTION__);
190 DEBUGP("\n");
191
192 return seq_printf(s, "sport=%hu dport=%hu ",
193 ntohs(tuple->src.u.sctp.port),
194 ntohs(tuple->dst.u.sctp.port));
195}
196
197/* Print out the private part of the conntrack. */
198static int sctp_print_conntrack(struct seq_file *s,
199 const struct nf_conn *conntrack)
200{
201 enum sctp_conntrack state;
202
203 DEBUGP(__FUNCTION__);
204 DEBUGP("\n");
205
206 read_lock_bh(&sctp_lock);
207 state = conntrack->proto.sctp.state;
208 read_unlock_bh(&sctp_lock);
209
210 return seq_printf(s, "%s ", sctp_conntrack_names[state]);
211}
212
213#define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \
214for (offset = dataoff + sizeof(sctp_sctphdr_t), count = 0; \
215 offset < skb->len && \
216 (sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch)); \
217 offset += (htons(sch->length) + 3) & ~3, count++)
218
219/* Some validity checks to make sure the chunks are fine */
220static int do_basic_checks(struct nf_conn *conntrack,
221 const struct sk_buff *skb,
222 unsigned int dataoff,
223 char *map)
224{
225 u_int32_t offset, count;
226 sctp_chunkhdr_t _sch, *sch;
227 int flag;
228
229 DEBUGP(__FUNCTION__);
230 DEBUGP("\n");
231
232 flag = 0;
233
234 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
235 DEBUGP("Chunk Num: %d Type: %d\n", count, sch->type);
236
237 if (sch->type == SCTP_CID_INIT
238 || sch->type == SCTP_CID_INIT_ACK
239 || sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
240 flag = 1;
241 }
242
243 /* Cookie Ack/Echo chunks not the first OR
244 Init / Init Ack / Shutdown compl chunks not the only chunks */
245 if ((sch->type == SCTP_CID_COOKIE_ACK
246 || sch->type == SCTP_CID_COOKIE_ECHO
247 || flag)
248 && count !=0 ) {
249 DEBUGP("Basic checks failed\n");
250 return 1;
251 }
252
253 if (map) {
254 set_bit(sch->type, (void *)map);
255 }
256 }
257
258 DEBUGP("Basic checks passed\n");
259 return 0;
260}
261
262static int new_state(enum ip_conntrack_dir dir,
263 enum sctp_conntrack cur_state,
264 int chunk_type)
265{
266 int i;
267
268 DEBUGP(__FUNCTION__);
269 DEBUGP("\n");
270
271 DEBUGP("Chunk type: %d\n", chunk_type);
272
273 switch (chunk_type) {
274 case SCTP_CID_INIT:
275 DEBUGP("SCTP_CID_INIT\n");
276 i = 0; break;
277 case SCTP_CID_INIT_ACK:
278 DEBUGP("SCTP_CID_INIT_ACK\n");
279 i = 1; break;
280 case SCTP_CID_ABORT:
281 DEBUGP("SCTP_CID_ABORT\n");
282 i = 2; break;
283 case SCTP_CID_SHUTDOWN:
284 DEBUGP("SCTP_CID_SHUTDOWN\n");
285 i = 3; break;
286 case SCTP_CID_SHUTDOWN_ACK:
287 DEBUGP("SCTP_CID_SHUTDOWN_ACK\n");
288 i = 4; break;
289 case SCTP_CID_ERROR:
290 DEBUGP("SCTP_CID_ERROR\n");
291 i = 5; break;
292 case SCTP_CID_COOKIE_ECHO:
293 DEBUGP("SCTP_CID_COOKIE_ECHO\n");
294 i = 6; break;
295 case SCTP_CID_COOKIE_ACK:
296 DEBUGP("SCTP_CID_COOKIE_ACK\n");
297 i = 7; break;
298 case SCTP_CID_SHUTDOWN_COMPLETE:
299 DEBUGP("SCTP_CID_SHUTDOWN_COMPLETE\n");
300 i = 8; break;
301 default:
302 /* Other chunks like DATA, SACK, HEARTBEAT and
303 its ACK do not cause a change in state */
304 DEBUGP("Unknown chunk type, Will stay in %s\n",
305 sctp_conntrack_names[cur_state]);
306 return cur_state;
307 }
308
309 DEBUGP("dir: %d cur_state: %s chunk_type: %d new_state: %s\n",
310 dir, sctp_conntrack_names[cur_state], chunk_type,
311 sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]);
312
313 return sctp_conntracks[dir][i][cur_state];
314}
315
316/* Returns verdict for packet, or -1 for invalid. */
317static int sctp_packet(struct nf_conn *conntrack,
318 const struct sk_buff *skb,
319 unsigned int dataoff,
320 enum ip_conntrack_info ctinfo,
321 int pf,
322 unsigned int hooknum)
323{
324 enum sctp_conntrack newconntrack, oldsctpstate;
325 sctp_sctphdr_t _sctph, *sh;
326 sctp_chunkhdr_t _sch, *sch;
327 u_int32_t offset, count;
328 char map[256 / sizeof (char)] = {0};
329
330 DEBUGP(__FUNCTION__);
331 DEBUGP("\n");
332
333 sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
334 if (sh == NULL)
335 return -1;
336
337 if (do_basic_checks(conntrack, skb, dataoff, map) != 0)
338 return -1;
339
340 /* Check the verification tag (Sec 8.5) */
341 if (!test_bit(SCTP_CID_INIT, (void *)map)
342 && !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map)
343 && !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map)
344 && !test_bit(SCTP_CID_ABORT, (void *)map)
345 && !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map)
346 && (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
347 DEBUGP("Verification tag check failed\n");
348 return -1;
349 }
350
351 oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX;
352 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
353 write_lock_bh(&sctp_lock);
354
355 /* Special cases of Verification tag check (Sec 8.5.1) */
356 if (sch->type == SCTP_CID_INIT) {
357 /* Sec 8.5.1 (A) */
358 if (sh->vtag != 0) {
359 write_unlock_bh(&sctp_lock);
360 return -1;
361 }
362 } else if (sch->type == SCTP_CID_ABORT) {
363 /* Sec 8.5.1 (B) */
364 if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
365 && !(sh->vtag == conntrack->proto.sctp.vtag
366 [1 - CTINFO2DIR(ctinfo)])) {
367 write_unlock_bh(&sctp_lock);
368 return -1;
369 }
370 } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
371 /* Sec 8.5.1 (C) */
372 if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
373 && !(sh->vtag == conntrack->proto.sctp.vtag
374 [1 - CTINFO2DIR(ctinfo)]
375 && (sch->flags & 1))) {
376 write_unlock_bh(&sctp_lock);
377 return -1;
378 }
379 } else if (sch->type == SCTP_CID_COOKIE_ECHO) {
380 /* Sec 8.5.1 (D) */
381 if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
382 write_unlock_bh(&sctp_lock);
383 return -1;
384 }
385 }
386
387 oldsctpstate = conntrack->proto.sctp.state;
388 newconntrack = new_state(CTINFO2DIR(ctinfo), oldsctpstate, sch->type);
389
390 /* Invalid */
391 if (newconntrack == SCTP_CONNTRACK_MAX) {
392 DEBUGP("nf_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n",
393 CTINFO2DIR(ctinfo), sch->type, oldsctpstate);
394 write_unlock_bh(&sctp_lock);
395 return -1;
396 }
397
398 /* If it is an INIT or an INIT ACK note down the vtag */
399 if (sch->type == SCTP_CID_INIT
400 || sch->type == SCTP_CID_INIT_ACK) {
401 sctp_inithdr_t _inithdr, *ih;
402
403 ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
404 sizeof(_inithdr), &_inithdr);
405 if (ih == NULL) {
406 write_unlock_bh(&sctp_lock);
407 return -1;
408 }
409 DEBUGP("Setting vtag %x for dir %d\n",
410 ih->init_tag, !CTINFO2DIR(ctinfo));
411 conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag;
412 }
413
414 conntrack->proto.sctp.state = newconntrack;
415 if (oldsctpstate != newconntrack)
416 nf_conntrack_event_cache(IPCT_PROTOINFO, skb);
417 write_unlock_bh(&sctp_lock);
418 }
419
420 nf_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]);
421
422 if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED
423 && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY
424 && newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
425 DEBUGP("Setting assured bit\n");
426 set_bit(IPS_ASSURED_BIT, &conntrack->status);
427 nf_conntrack_event_cache(IPCT_STATUS, skb);
428 }
429
430 return NF_ACCEPT;
431}
432
433/* Called when a new connection for this protocol found. */
434static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb,
435 unsigned int dataoff)
436{
437 enum sctp_conntrack newconntrack;
438 sctp_sctphdr_t _sctph, *sh;
439 sctp_chunkhdr_t _sch, *sch;
440 u_int32_t offset, count;
441 char map[256 / sizeof (char)] = {0};
442
443 DEBUGP(__FUNCTION__);
444 DEBUGP("\n");
445
446 sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
447 if (sh == NULL)
448 return 0;
449
450 if (do_basic_checks(conntrack, skb, dataoff, map) != 0)
451 return 0;
452
453 /* If an OOTB packet has any of these chunks discard (Sec 8.4) */
454 if ((test_bit (SCTP_CID_ABORT, (void *)map))
455 || (test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map))
456 || (test_bit (SCTP_CID_COOKIE_ACK, (void *)map))) {
457 return 0;
458 }
459
460 newconntrack = SCTP_CONNTRACK_MAX;
461 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
462 /* Don't need lock here: this conntrack not in circulation yet */
463 newconntrack = new_state(IP_CT_DIR_ORIGINAL,
464 SCTP_CONNTRACK_NONE, sch->type);
465
466 /* Invalid: delete conntrack */
467 if (newconntrack == SCTP_CONNTRACK_MAX) {
468 DEBUGP("nf_conntrack_sctp: invalid new deleting.\n");
469 return 0;
470 }
471
472 /* Copy the vtag into the state info */
473 if (sch->type == SCTP_CID_INIT) {
474 if (sh->vtag == 0) {
475 sctp_inithdr_t _inithdr, *ih;
476
477 ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
478 sizeof(_inithdr), &_inithdr);
479 if (ih == NULL)
480 return 0;
481
482 DEBUGP("Setting vtag %x for new conn\n",
483 ih->init_tag);
484
485 conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] =
486 ih->init_tag;
487 } else {
488 /* Sec 8.5.1 (A) */
489 return 0;
490 }
491 }
492 /* If it is a shutdown ack OOTB packet, we expect a return
493 shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
494 else {
495 DEBUGP("Setting vtag %x for new conn OOTB\n",
496 sh->vtag);
497 conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
498 }
499
500 conntrack->proto.sctp.state = newconntrack;
501 }
502
503 return 1;
504}
505
506struct nf_conntrack_protocol nf_conntrack_protocol_sctp4 = {
507 .l3proto = PF_INET,
508 .proto = IPPROTO_SCTP,
509 .name = "sctp",
510 .pkt_to_tuple = sctp_pkt_to_tuple,
511 .invert_tuple = sctp_invert_tuple,
512 .print_tuple = sctp_print_tuple,
513 .print_conntrack = sctp_print_conntrack,
514 .packet = sctp_packet,
515 .new = sctp_new,
516 .destroy = NULL,
517 .me = THIS_MODULE
518};
519
520struct nf_conntrack_protocol nf_conntrack_protocol_sctp6 = {
521 .l3proto = PF_INET6,
522 .proto = IPPROTO_SCTP,
523 .name = "sctp",
524 .pkt_to_tuple = sctp_pkt_to_tuple,
525 .invert_tuple = sctp_invert_tuple,
526 .print_tuple = sctp_print_tuple,
527 .print_conntrack = sctp_print_conntrack,
528 .packet = sctp_packet,
529 .new = sctp_new,
530 .destroy = NULL,
531 .me = THIS_MODULE
532};
533
534#ifdef CONFIG_SYSCTL
535static ctl_table nf_ct_sysctl_table[] = {
536 {
537 .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED,
538 .procname = "nf_conntrack_sctp_timeout_closed",
539 .data = &nf_ct_sctp_timeout_closed,
540 .maxlen = sizeof(unsigned int),
541 .mode = 0644,
542 .proc_handler = &proc_dointvec_jiffies,
543 },
544 {
545 .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT,
546 .procname = "nf_conntrack_sctp_timeout_cookie_wait",
547 .data = &nf_ct_sctp_timeout_cookie_wait,
548 .maxlen = sizeof(unsigned int),
549 .mode = 0644,
550 .proc_handler = &proc_dointvec_jiffies,
551 },
552 {
553 .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED,
554 .procname = "nf_conntrack_sctp_timeout_cookie_echoed",
555 .data = &nf_ct_sctp_timeout_cookie_echoed,
556 .maxlen = sizeof(unsigned int),
557 .mode = 0644,
558 .proc_handler = &proc_dointvec_jiffies,
559 },
560 {
561 .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED,
562 .procname = "nf_conntrack_sctp_timeout_established",
563 .data = &nf_ct_sctp_timeout_established,
564 .maxlen = sizeof(unsigned int),
565 .mode = 0644,
566 .proc_handler = &proc_dointvec_jiffies,
567 },
568 {
569 .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT,
570 .procname = "nf_conntrack_sctp_timeout_shutdown_sent",
571 .data = &nf_ct_sctp_timeout_shutdown_sent,
572 .maxlen = sizeof(unsigned int),
573 .mode = 0644,
574 .proc_handler = &proc_dointvec_jiffies,
575 },
576 {
577 .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD,
578 .procname = "nf_conntrack_sctp_timeout_shutdown_recd",
579 .data = &nf_ct_sctp_timeout_shutdown_recd,
580 .maxlen = sizeof(unsigned int),
581 .mode = 0644,
582 .proc_handler = &proc_dointvec_jiffies,
583 },
584 {
585 .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT,
586 .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent",
587 .data = &nf_ct_sctp_timeout_shutdown_ack_sent,
588 .maxlen = sizeof(unsigned int),
589 .mode = 0644,
590 .proc_handler = &proc_dointvec_jiffies,
591 },
592 { .ctl_name = 0 }
593};
594
595static ctl_table nf_ct_netfilter_table[] = {
596 {
597 .ctl_name = NET_NETFILTER,
598 .procname = "netfilter",
599 .mode = 0555,
600 .child = nf_ct_sysctl_table,
601 },
602 { .ctl_name = 0 }
603};
604
605static ctl_table nf_ct_net_table[] = {
606 {
607 .ctl_name = CTL_NET,
608 .procname = "net",
609 .mode = 0555,
610 .child = nf_ct_netfilter_table,
611 },
612 { .ctl_name = 0 }
613};
614
615static struct ctl_table_header *nf_ct_sysctl_header;
616#endif
617
618int __init init(void)
619{
620 int ret;
621
622 ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_sctp4);
623 if (ret) {
624 printk("nf_conntrack_proto_sctp4: protocol register failed\n");
625 goto out;
626 }
627 ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_sctp6);
628 if (ret) {
629 printk("nf_conntrack_proto_sctp6: protocol register failed\n");
630 goto cleanup_sctp4;
631 }
632
633#ifdef CONFIG_SYSCTL
634 nf_ct_sysctl_header = register_sysctl_table(nf_ct_net_table, 0);
635 if (nf_ct_sysctl_header == NULL) {
636 printk("nf_conntrack_proto_sctp: can't register to sysctl.\n");
637 goto cleanup;
638 }
639#endif
640
641 return ret;
642
643#ifdef CONFIG_SYSCTL
644 cleanup:
645 nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp6);
646#endif
647 cleanup_sctp4:
648 nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp4);
649 out:
650 DEBUGP("SCTP conntrack module loading %s\n",
651 ret ? "failed": "succeeded");
652 return ret;
653}
654
655void __exit fini(void)
656{
657 nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp6);
658 nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp4);
659#ifdef CONFIG_SYSCTL
660 unregister_sysctl_table(nf_ct_sysctl_header);
661#endif
662 DEBUGP("SCTP conntrack module unloaded\n");
663}
664
665module_init(init);
666module_exit(fini);
667
668MODULE_LICENSE("GPL");
669MODULE_AUTHOR("Kiran Kumar Immidi");
670MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP");
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
new file mode 100644
index 000000000000..83d90dd624f0
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -0,0 +1,1162 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>:
9 * - Real stateful connection tracking
10 * - Modified state transitions table
11 * - Window scaling support added
12 * - SACK support added
13 *
14 * Willy Tarreau:
15 * - State table bugfixes
16 * - More robust state changes
17 * - Tuning timer parameters
18 *
19 * 27 Oct 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
20 * - genelized Layer 3 protocol part.
21 *
22 * Derived from net/ipv4/netfilter/ip_conntrack_proto_tcp.c
23 *
24 * version 2.2
25 */
26
27#include <linux/config.h>
28#include <linux/types.h>
29#include <linux/sched.h>
30#include <linux/timer.h>
31#include <linux/netfilter.h>
32#include <linux/module.h>
33#include <linux/in.h>
34#include <linux/tcp.h>
35#include <linux/spinlock.h>
36#include <linux/skbuff.h>
37#include <linux/ipv6.h>
38#include <net/ip6_checksum.h>
39
40#include <net/tcp.h>
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv4.h>
44#include <linux/netfilter_ipv6.h>
45#include <net/netfilter/nf_conntrack.h>
46#include <net/netfilter/nf_conntrack_protocol.h>
47
48#if 0
49#define DEBUGP printk
50#define DEBUGP_VARS
51#else
52#define DEBUGP(format, args...)
53#endif
54
55/* Protects conntrack->proto.tcp */
56static DEFINE_RWLOCK(tcp_lock);
57
58/* "Be conservative in what you do,
59 be liberal in what you accept from others."
60 If it's non-zero, we mark only out of window RST segments as INVALID. */
61int nf_ct_tcp_be_liberal = 0;
62
63/* When connection is picked up from the middle, how many packets are required
64 to pass in each direction when we assume we are in sync - if any side uses
65 window scaling, we lost the game.
66 If it is set to zero, we disable picking up already established
67 connections. */
68int nf_ct_tcp_loose = 3;
69
70/* Max number of the retransmitted packets without receiving an (acceptable)
71 ACK from the destination. If this number is reached, a shorter timer
72 will be started. */
73int nf_ct_tcp_max_retrans = 3;
74
75 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
76 closely. They're more complex. --RR */
77
78static const char *tcp_conntrack_names[] = {
79 "NONE",
80 "SYN_SENT",
81 "SYN_RECV",
82 "ESTABLISHED",
83 "FIN_WAIT",
84 "CLOSE_WAIT",
85 "LAST_ACK",
86 "TIME_WAIT",
87 "CLOSE",
88 "LISTEN"
89};
90
91#define SECS * HZ
92#define MINS * 60 SECS
93#define HOURS * 60 MINS
94#define DAYS * 24 HOURS
95
96unsigned long nf_ct_tcp_timeout_syn_sent = 2 MINS;
97unsigned long nf_ct_tcp_timeout_syn_recv = 60 SECS;
98unsigned long nf_ct_tcp_timeout_established = 5 DAYS;
99unsigned long nf_ct_tcp_timeout_fin_wait = 2 MINS;
100unsigned long nf_ct_tcp_timeout_close_wait = 60 SECS;
101unsigned long nf_ct_tcp_timeout_last_ack = 30 SECS;
102unsigned long nf_ct_tcp_timeout_time_wait = 2 MINS;
103unsigned long nf_ct_tcp_timeout_close = 10 SECS;
104
105/* RFC1122 says the R2 limit should be at least 100 seconds.
106 Linux uses 15 packets as limit, which corresponds
107 to ~13-30min depending on RTO. */
108unsigned long nf_ct_tcp_timeout_max_retrans = 5 MINS;
109
110static unsigned long * tcp_timeouts[]
111= { NULL, /* TCP_CONNTRACK_NONE */
112 &nf_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */
113 &nf_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */
114 &nf_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */
115 &nf_ct_tcp_timeout_fin_wait, /* TCP_CONNTRACK_FIN_WAIT, */
116 &nf_ct_tcp_timeout_close_wait, /* TCP_CONNTRACK_CLOSE_WAIT, */
117 &nf_ct_tcp_timeout_last_ack, /* TCP_CONNTRACK_LAST_ACK, */
118 &nf_ct_tcp_timeout_time_wait, /* TCP_CONNTRACK_TIME_WAIT, */
119 &nf_ct_tcp_timeout_close, /* TCP_CONNTRACK_CLOSE, */
120 NULL, /* TCP_CONNTRACK_LISTEN */
121 };
122
123#define sNO TCP_CONNTRACK_NONE
124#define sSS TCP_CONNTRACK_SYN_SENT
125#define sSR TCP_CONNTRACK_SYN_RECV
126#define sES TCP_CONNTRACK_ESTABLISHED
127#define sFW TCP_CONNTRACK_FIN_WAIT
128#define sCW TCP_CONNTRACK_CLOSE_WAIT
129#define sLA TCP_CONNTRACK_LAST_ACK
130#define sTW TCP_CONNTRACK_TIME_WAIT
131#define sCL TCP_CONNTRACK_CLOSE
132#define sLI TCP_CONNTRACK_LISTEN
133#define sIV TCP_CONNTRACK_MAX
134#define sIG TCP_CONNTRACK_IGNORE
135
136/* What TCP flags are set from RST/SYN/FIN/ACK. */
137enum tcp_bit_set {
138 TCP_SYN_SET,
139 TCP_SYNACK_SET,
140 TCP_FIN_SET,
141 TCP_ACK_SET,
142 TCP_RST_SET,
143 TCP_NONE_SET,
144};
145
146/*
147 * The TCP state transition table needs a few words...
148 *
149 * We are the man in the middle. All the packets go through us
150 * but might get lost in transit to the destination.
151 * It is assumed that the destinations can't receive segments
152 * we haven't seen.
153 *
154 * The checked segment is in window, but our windows are *not*
155 * equivalent with the ones of the sender/receiver. We always
156 * try to guess the state of the current sender.
157 *
158 * The meaning of the states are:
159 *
160 * NONE: initial state
161 * SYN_SENT: SYN-only packet seen
162 * SYN_RECV: SYN-ACK packet seen
163 * ESTABLISHED: ACK packet seen
164 * FIN_WAIT: FIN packet seen
165 * CLOSE_WAIT: ACK seen (after FIN)
166 * LAST_ACK: FIN seen (after FIN)
167 * TIME_WAIT: last ACK seen
168 * CLOSE: closed connection
169 *
170 * LISTEN state is not used.
171 *
172 * Packets marked as IGNORED (sIG):
173 * if they may be either invalid or valid
174 * and the receiver may send back a connection
175 * closing RST or a SYN/ACK.
176 *
177 * Packets marked as INVALID (sIV):
178 * if they are invalid
179 * or we do not support the request (simultaneous open)
180 */
181static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
182 {
183/* ORIGINAL */
184/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
185/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
186/*
187 * sNO -> sSS Initialize a new connection
188 * sSS -> sSS Retransmitted SYN
189 * sSR -> sIG Late retransmitted SYN?
190 * sES -> sIG Error: SYNs in window outside the SYN_SENT state
191 * are errors. Receiver will reply with RST
192 * and close the connection.
193 * Or we are not in sync and hold a dead connection.
194 * sFW -> sIG
195 * sCW -> sIG
196 * sLA -> sIG
197 * sTW -> sSS Reopened connection (RFC 1122).
198 * sCL -> sSS
199 */
200/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
201/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
202/*
203 * A SYN/ACK from the client is always invalid:
204 * - either it tries to set up a simultaneous open, which is
205 * not supported;
206 * - or the firewall has just been inserted between the two hosts
207 * during the session set-up. The SYN will be retransmitted
208 * by the true client (or it'll time out).
209 */
210/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
211/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
212/*
213 * sNO -> sIV Too late and no reason to do anything...
214 * sSS -> sIV Client migth not send FIN in this state:
215 * we enforce waiting for a SYN/ACK reply first.
216 * sSR -> sFW Close started.
217 * sES -> sFW
218 * sFW -> sLA FIN seen in both directions, waiting for
219 * the last ACK.
220 * Migth be a retransmitted FIN as well...
221 * sCW -> sLA
222 * sLA -> sLA Retransmitted FIN. Remain in the same state.
223 * sTW -> sTW
224 * sCL -> sCL
225 */
226/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
227/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
228/*
229 * sNO -> sES Assumed.
230 * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
231 * sSR -> sES Established state is reached.
232 * sES -> sES :-)
233 * sFW -> sCW Normal close request answered by ACK.
234 * sCW -> sCW
235 * sLA -> sTW Last ACK detected.
236 * sTW -> sTW Retransmitted last ACK. Remain in the same state.
237 * sCL -> sCL
238 */
239/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
240/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
241/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
242 },
243 {
244/* REPLY */
245/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
246/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
247/*
248 * sNO -> sIV Never reached.
249 * sSS -> sIV Simultaneous open, not supported
250 * sSR -> sIV Simultaneous open, not supported.
251 * sES -> sIV Server may not initiate a connection.
252 * sFW -> sIV
253 * sCW -> sIV
254 * sLA -> sIV
255 * sTW -> sIV Reopened connection, but server may not do it.
256 * sCL -> sIV
257 */
258/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
259/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
260/*
261 * sSS -> sSR Standard open.
262 * sSR -> sSR Retransmitted SYN/ACK.
263 * sES -> sIG Late retransmitted SYN/ACK?
264 * sFW -> sIG Might be SYN/ACK answering ignored SYN
265 * sCW -> sIG
266 * sLA -> sIG
267 * sTW -> sIG
268 * sCL -> sIG
269 */
270/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
271/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
272/*
273 * sSS -> sIV Server might not send FIN in this state.
274 * sSR -> sFW Close started.
275 * sES -> sFW
276 * sFW -> sLA FIN seen in both directions.
277 * sCW -> sLA
278 * sLA -> sLA Retransmitted FIN.
279 * sTW -> sTW
280 * sCL -> sCL
281 */
282/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
283/*ack*/ { sIV, sIV, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
284/*
285 * sSS -> sIV Might be a half-open connection.
286 * sSR -> sSR Might answer late resent SYN.
287 * sES -> sES :-)
288 * sFW -> sCW Normal close request answered by ACK.
289 * sCW -> sCW
290 * sLA -> sTW Last ACK detected.
291 * sTW -> sTW Retransmitted last ACK.
292 * sCL -> sCL
293 */
294/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
295/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
296/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
297 }
298};
299
300static int tcp_pkt_to_tuple(const struct sk_buff *skb,
301 unsigned int dataoff,
302 struct nf_conntrack_tuple *tuple)
303{
304 struct tcphdr _hdr, *hp;
305
306 /* Actually only need first 8 bytes. */
307 hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
308 if (hp == NULL)
309 return 0;
310
311 tuple->src.u.tcp.port = hp->source;
312 tuple->dst.u.tcp.port = hp->dest;
313
314 return 1;
315}
316
317static int tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
318 const struct nf_conntrack_tuple *orig)
319{
320 tuple->src.u.tcp.port = orig->dst.u.tcp.port;
321 tuple->dst.u.tcp.port = orig->src.u.tcp.port;
322 return 1;
323}
324
325/* Print out the per-protocol part of the tuple. */
326static int tcp_print_tuple(struct seq_file *s,
327 const struct nf_conntrack_tuple *tuple)
328{
329 return seq_printf(s, "sport=%hu dport=%hu ",
330 ntohs(tuple->src.u.tcp.port),
331 ntohs(tuple->dst.u.tcp.port));
332}
333
334/* Print out the private part of the conntrack. */
335static int tcp_print_conntrack(struct seq_file *s,
336 const struct nf_conn *conntrack)
337{
338 enum tcp_conntrack state;
339
340 read_lock_bh(&tcp_lock);
341 state = conntrack->proto.tcp.state;
342 read_unlock_bh(&tcp_lock);
343
344 return seq_printf(s, "%s ", tcp_conntrack_names[state]);
345}
346
347static unsigned int get_conntrack_index(const struct tcphdr *tcph)
348{
349 if (tcph->rst) return TCP_RST_SET;
350 else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
351 else if (tcph->fin) return TCP_FIN_SET;
352 else if (tcph->ack) return TCP_ACK_SET;
353 else return TCP_NONE_SET;
354}
355
356/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
357 in IP Filter' by Guido van Rooij.
358
359 http://www.nluug.nl/events/sane2000/papers.html
360 http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
361
362 The boundaries and the conditions are changed according to RFC793:
363 the packet must intersect the window (i.e. segments may be
364 after the right or before the left edge) and thus receivers may ACK
365 segments after the right edge of the window.
366
367 td_maxend = max(sack + max(win,1)) seen in reply packets
368 td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
369 td_maxwin += seq + len - sender.td_maxend
370 if seq + len > sender.td_maxend
371 td_end = max(seq + len) seen in sent packets
372
373 I. Upper bound for valid data: seq <= sender.td_maxend
374 II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin
375 III. Upper bound for valid ack: sack <= receiver.td_end
376 IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW
377
378 where sack is the highest right edge of sack block found in the packet.
379
380 The upper bound limit for a valid ack is not ignored -
381 we doesn't have to deal with fragments.
382*/
383
384static inline __u32 segment_seq_plus_len(__u32 seq,
385 size_t len,
386 unsigned int dataoff,
387 struct tcphdr *tcph)
388{
389 /* XXX Should I use payload length field in IP/IPv6 header ?
390 * - YK */
391 return (seq + len - dataoff - tcph->doff*4
392 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
393}
394
395/* Fixme: what about big packets? */
396#define MAXACKWINCONST 66000
397#define MAXACKWINDOW(sender) \
398 ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \
399 : MAXACKWINCONST)
400
401/*
402 * Simplified tcp_parse_options routine from tcp_input.c
403 */
404static void tcp_options(const struct sk_buff *skb,
405 unsigned int dataoff,
406 struct tcphdr *tcph,
407 struct ip_ct_tcp_state *state)
408{
409 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
410 unsigned char *ptr;
411 int length = (tcph->doff*4) - sizeof(struct tcphdr);
412
413 if (!length)
414 return;
415
416 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
417 length, buff);
418 BUG_ON(ptr == NULL);
419
420 state->td_scale =
421 state->flags = 0;
422
423 while (length > 0) {
424 int opcode=*ptr++;
425 int opsize;
426
427 switch (opcode) {
428 case TCPOPT_EOL:
429 return;
430 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
431 length--;
432 continue;
433 default:
434 opsize=*ptr++;
435 if (opsize < 2) /* "silly options" */
436 return;
437 if (opsize > length)
438 break; /* don't parse partial options */
439
440 if (opcode == TCPOPT_SACK_PERM
441 && opsize == TCPOLEN_SACK_PERM)
442 state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
443 else if (opcode == TCPOPT_WINDOW
444 && opsize == TCPOLEN_WINDOW) {
445 state->td_scale = *(u_int8_t *)ptr;
446
447 if (state->td_scale > 14) {
448 /* See RFC1323 */
449 state->td_scale = 14;
450 }
451 state->flags |=
452 IP_CT_TCP_FLAG_WINDOW_SCALE;
453 }
454 ptr += opsize - 2;
455 length -= opsize;
456 }
457 }
458}
459
460static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
461 struct tcphdr *tcph, __u32 *sack)
462{
463 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
464 unsigned char *ptr;
465 int length = (tcph->doff*4) - sizeof(struct tcphdr);
466 __u32 tmp;
467
468 if (!length)
469 return;
470
471 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
472 length, buff);
473 BUG_ON(ptr == NULL);
474
475 /* Fast path for timestamp-only option */
476 if (length == TCPOLEN_TSTAMP_ALIGNED*4
477 && *(__u32 *)ptr ==
478 __constant_ntohl((TCPOPT_NOP << 24)
479 | (TCPOPT_NOP << 16)
480 | (TCPOPT_TIMESTAMP << 8)
481 | TCPOLEN_TIMESTAMP))
482 return;
483
484 while (length > 0) {
485 int opcode = *ptr++;
486 int opsize, i;
487
488 switch (opcode) {
489 case TCPOPT_EOL:
490 return;
491 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
492 length--;
493 continue;
494 default:
495 opsize = *ptr++;
496 if (opsize < 2) /* "silly options" */
497 return;
498 if (opsize > length)
499 break; /* don't parse partial options */
500
501 if (opcode == TCPOPT_SACK
502 && opsize >= (TCPOLEN_SACK_BASE
503 + TCPOLEN_SACK_PERBLOCK)
504 && !((opsize - TCPOLEN_SACK_BASE)
505 % TCPOLEN_SACK_PERBLOCK)) {
506 for (i = 0;
507 i < (opsize - TCPOLEN_SACK_BASE);
508 i += TCPOLEN_SACK_PERBLOCK) {
509 memcpy(&tmp, (__u32 *)(ptr + i) + 1,
510 sizeof(__u32));
511 tmp = ntohl(tmp);
512
513 if (after(tmp, *sack))
514 *sack = tmp;
515 }
516 return;
517 }
518 ptr += opsize - 2;
519 length -= opsize;
520 }
521 }
522}
523
524static int tcp_in_window(struct ip_ct_tcp *state,
525 enum ip_conntrack_dir dir,
526 unsigned int index,
527 const struct sk_buff *skb,
528 unsigned int dataoff,
529 struct tcphdr *tcph,
530 int pf)
531{
532 struct ip_ct_tcp_state *sender = &state->seen[dir];
533 struct ip_ct_tcp_state *receiver = &state->seen[!dir];
534 __u32 seq, ack, sack, end, win, swin;
535 int res;
536
537 /*
538 * Get the required data from the packet.
539 */
540 seq = ntohl(tcph->seq);
541 ack = sack = ntohl(tcph->ack_seq);
542 win = ntohs(tcph->window);
543 end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
544
545 if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
546 tcp_sack(skb, dataoff, tcph, &sack);
547
548 DEBUGP("tcp_in_window: START\n");
549 DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
550 "seq=%u ack=%u sack=%u win=%u end=%u\n",
551 NIPQUAD(iph->saddr), ntohs(tcph->source),
552 NIPQUAD(iph->daddr), ntohs(tcph->dest),
553 seq, ack, sack, win, end);
554 DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
555 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
556 sender->td_end, sender->td_maxend, sender->td_maxwin,
557 sender->td_scale,
558 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
559 receiver->td_scale);
560
561 if (sender->td_end == 0) {
562 /*
563 * Initialize sender data.
564 */
565 if (tcph->syn && tcph->ack) {
566 /*
567 * Outgoing SYN-ACK in reply to a SYN.
568 */
569 sender->td_end =
570 sender->td_maxend = end;
571 sender->td_maxwin = (win == 0 ? 1 : win);
572
573 tcp_options(skb, dataoff, tcph, sender);
574 /*
575 * RFC 1323:
576 * Both sides must send the Window Scale option
577 * to enable window scaling in either direction.
578 */
579 if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
580 && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
581 sender->td_scale =
582 receiver->td_scale = 0;
583 } else {
584 /*
585 * We are in the middle of a connection,
586 * its history is lost for us.
587 * Let's try to use the data from the packet.
588 */
589 sender->td_end = end;
590 sender->td_maxwin = (win == 0 ? 1 : win);
591 sender->td_maxend = end + sender->td_maxwin;
592 }
593 } else if (((state->state == TCP_CONNTRACK_SYN_SENT
594 && dir == IP_CT_DIR_ORIGINAL)
595 || (state->state == TCP_CONNTRACK_SYN_RECV
596 && dir == IP_CT_DIR_REPLY))
597 && after(end, sender->td_end)) {
598 /*
599 * RFC 793: "if a TCP is reinitialized ... then it need
600 * not wait at all; it must only be sure to use sequence
601 * numbers larger than those recently used."
602 */
603 sender->td_end =
604 sender->td_maxend = end;
605 sender->td_maxwin = (win == 0 ? 1 : win);
606
607 tcp_options(skb, dataoff, tcph, sender);
608 }
609
610 if (!(tcph->ack)) {
611 /*
612 * If there is no ACK, just pretend it was set and OK.
613 */
614 ack = sack = receiver->td_end;
615 } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
616 (TCP_FLAG_ACK|TCP_FLAG_RST))
617 && (ack == 0)) {
618 /*
619 * Broken TCP stacks, that set ACK in RST packets as well
620 * with zero ack value.
621 */
622 ack = sack = receiver->td_end;
623 }
624
625 if (seq == end
626 && (!tcph->rst
627 || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
628 /*
629 * Packets contains no data: we assume it is valid
630 * and check the ack value only.
631 * However RST segments are always validated by their
632 * SEQ number, except when seq == 0 (reset sent answering
633 * SYN.
634 */
635 seq = end = sender->td_end;
636
637 DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
638 "seq=%u ack=%u sack =%u win=%u end=%u\n",
639 NIPQUAD(iph->saddr), ntohs(tcph->source),
640 NIPQUAD(iph->daddr), ntohs(tcph->dest),
641 seq, ack, sack, win, end);
642 DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
643 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
644 sender->td_end, sender->td_maxend, sender->td_maxwin,
645 sender->td_scale,
646 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
647 receiver->td_scale);
648
649 DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
650 before(seq, sender->td_maxend + 1),
651 after(end, sender->td_end - receiver->td_maxwin - 1),
652 before(sack, receiver->td_end + 1),
653 after(ack, receiver->td_end - MAXACKWINDOW(sender)));
654
655 if (sender->loose || receiver->loose ||
656 (before(seq, sender->td_maxend + 1) &&
657 after(end, sender->td_end - receiver->td_maxwin - 1) &&
658 before(sack, receiver->td_end + 1) &&
659 after(ack, receiver->td_end - MAXACKWINDOW(sender)))) {
660 /*
661 * Take into account window scaling (RFC 1323).
662 */
663 if (!tcph->syn)
664 win <<= sender->td_scale;
665
666 /*
667 * Update sender data.
668 */
669 swin = win + (sack - ack);
670 if (sender->td_maxwin < swin)
671 sender->td_maxwin = swin;
672 if (after(end, sender->td_end))
673 sender->td_end = end;
674 /*
675 * Update receiver data.
676 */
677 if (after(end, sender->td_maxend))
678 receiver->td_maxwin += end - sender->td_maxend;
679 if (after(sack + win, receiver->td_maxend - 1)) {
680 receiver->td_maxend = sack + win;
681 if (win == 0)
682 receiver->td_maxend++;
683 }
684
685 /*
686 * Check retransmissions.
687 */
688 if (index == TCP_ACK_SET) {
689 if (state->last_dir == dir
690 && state->last_seq == seq
691 && state->last_ack == ack
692 && state->last_end == end)
693 state->retrans++;
694 else {
695 state->last_dir = dir;
696 state->last_seq = seq;
697 state->last_ack = ack;
698 state->last_end = end;
699 state->retrans = 0;
700 }
701 }
702 /*
703 * Close the window of disabled window tracking :-)
704 */
705 if (sender->loose)
706 sender->loose--;
707
708 res = 1;
709 } else {
710 if (LOG_INVALID(IPPROTO_TCP))
711 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
712 "nf_ct_tcp: %s ",
713 before(seq, sender->td_maxend + 1) ?
714 after(end, sender->td_end - receiver->td_maxwin - 1) ?
715 before(sack, receiver->td_end + 1) ?
716 after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
717 : "ACK is under the lower bound (possible overly delayed ACK)"
718 : "ACK is over the upper bound (ACKed data not seen yet)"
719 : "SEQ is under the lower bound (already ACKed data retransmitted)"
720 : "SEQ is over the upper bound (over the window of the receiver)");
721
722 res = nf_ct_tcp_be_liberal;
723 }
724
725 DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
726 "receiver end=%u maxend=%u maxwin=%u\n",
727 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
728 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
729
730 return res;
731}
732
733#ifdef CONFIG_IP_NF_NAT_NEEDED
734/* Update sender->td_end after NAT successfully mangled the packet */
735/* Caller must linearize skb at tcp header. */
736void nf_conntrack_tcp_update(struct sk_buff *skb,
737 unsigned int dataoff,
738 struct nf_conn *conntrack,
739 int dir)
740{
741 struct tcphdr *tcph = (void *)skb->data + dataoff;
742 __u32 end;
743#ifdef DEBUGP_VARS
744 struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir];
745 struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir];
746#endif
747
748 end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph);
749
750 write_lock_bh(&tcp_lock);
751 /*
752 * We have to worry for the ack in the reply packet only...
753 */
754 if (after(end, conntrack->proto.tcp.seen[dir].td_end))
755 conntrack->proto.tcp.seen[dir].td_end = end;
756 conntrack->proto.tcp.last_end = end;
757 write_unlock_bh(&tcp_lock);
758 DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
759 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
760 sender->td_end, sender->td_maxend, sender->td_maxwin,
761 sender->td_scale,
762 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
763 receiver->td_scale);
764}
765
766#endif
767
768#define TH_FIN 0x01
769#define TH_SYN 0x02
770#define TH_RST 0x04
771#define TH_PUSH 0x08
772#define TH_ACK 0x10
773#define TH_URG 0x20
774#define TH_ECE 0x40
775#define TH_CWR 0x80
776
777/* table of valid flag combinations - ECE and CWR are always valid */
778static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
779{
780 [TH_SYN] = 1,
781 [TH_SYN|TH_ACK] = 1,
782 [TH_SYN|TH_ACK|TH_PUSH] = 1,
783 [TH_RST] = 1,
784 [TH_RST|TH_ACK] = 1,
785 [TH_RST|TH_ACK|TH_PUSH] = 1,
786 [TH_FIN|TH_ACK] = 1,
787 [TH_ACK] = 1,
788 [TH_ACK|TH_PUSH] = 1,
789 [TH_ACK|TH_URG] = 1,
790 [TH_ACK|TH_URG|TH_PUSH] = 1,
791 [TH_FIN|TH_ACK|TH_PUSH] = 1,
792 [TH_FIN|TH_ACK|TH_URG] = 1,
793 [TH_FIN|TH_ACK|TH_URG|TH_PUSH] = 1,
794};
795
796/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
797static int tcp_error(struct sk_buff *skb,
798 unsigned int dataoff,
799 enum ip_conntrack_info *ctinfo,
800 int pf,
801 unsigned int hooknum,
802 int(*csum)(const struct sk_buff *,unsigned int))
803{
804 struct tcphdr _tcph, *th;
805 unsigned int tcplen = skb->len - dataoff;
806 u_int8_t tcpflags;
807
808 /* Smaller that minimal TCP header? */
809 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
810 if (th == NULL) {
811 if (LOG_INVALID(IPPROTO_TCP))
812 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
813 "nf_ct_tcp: short packet ");
814 return -NF_ACCEPT;
815 }
816
817 /* Not whole TCP header or malformed packet */
818 if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
819 if (LOG_INVALID(IPPROTO_TCP))
820 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
821 "nf_ct_tcp: truncated/malformed packet ");
822 return -NF_ACCEPT;
823 }
824
825 /* Checksum invalid? Ignore.
826 * We skip checking packets on the outgoing path
827 * because the semantic of CHECKSUM_HW is different there
828 * and moreover root might send raw packets.
829 */
830 /* FIXME: Source route IP option packets --RR */
831 if (((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) ||
832 (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING))
833 && skb->ip_summed != CHECKSUM_UNNECESSARY
834 && csum(skb, dataoff)) {
835 if (LOG_INVALID(IPPROTO_TCP))
836 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
837 "nf_ct_tcp: bad TCP checksum ");
838 return -NF_ACCEPT;
839 }
840
841 /* Check TCP flags. */
842 tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR));
843 if (!tcp_valid_flags[tcpflags]) {
844 if (LOG_INVALID(IPPROTO_TCP))
845 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
846 "nf_ct_tcp: invalid TCP flag combination ");
847 return -NF_ACCEPT;
848 }
849
850 return NF_ACCEPT;
851}
852
853static int csum4(const struct sk_buff *skb, unsigned int dataoff)
854{
855 return csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
856 skb->len - dataoff, IPPROTO_TCP,
857 skb->ip_summed == CHECKSUM_HW ? skb->csum
858 : skb_checksum(skb, dataoff,
859 skb->len - dataoff, 0));
860}
861
862static int csum6(const struct sk_buff *skb, unsigned int dataoff)
863{
864 return csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,
865 skb->len - dataoff, IPPROTO_TCP,
866 skb->ip_summed == CHECKSUM_HW ? skb->csum
867 : skb_checksum(skb, dataoff, skb->len - dataoff,
868 0));
869}
870
871static int tcp_error4(struct sk_buff *skb,
872 unsigned int dataoff,
873 enum ip_conntrack_info *ctinfo,
874 int pf,
875 unsigned int hooknum)
876{
877 return tcp_error(skb, dataoff, ctinfo, pf, hooknum, csum4);
878}
879
880static int tcp_error6(struct sk_buff *skb,
881 unsigned int dataoff,
882 enum ip_conntrack_info *ctinfo,
883 int pf,
884 unsigned int hooknum)
885{
886 return tcp_error(skb, dataoff, ctinfo, pf, hooknum, csum6);
887}
888
889/* Returns verdict for packet, or -1 for invalid. */
890static int tcp_packet(struct nf_conn *conntrack,
891 const struct sk_buff *skb,
892 unsigned int dataoff,
893 enum ip_conntrack_info ctinfo,
894 int pf,
895 unsigned int hooknum)
896{
897 enum tcp_conntrack new_state, old_state;
898 enum ip_conntrack_dir dir;
899 struct tcphdr *th, _tcph;
900 unsigned long timeout;
901 unsigned int index;
902
903 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
904 BUG_ON(th == NULL);
905
906 write_lock_bh(&tcp_lock);
907 old_state = conntrack->proto.tcp.state;
908 dir = CTINFO2DIR(ctinfo);
909 index = get_conntrack_index(th);
910 new_state = tcp_conntracks[dir][index][old_state];
911
912 switch (new_state) {
913 case TCP_CONNTRACK_IGNORE:
914 /* Either SYN in ORIGINAL
915 * or SYN/ACK in REPLY. */
916 if (index == TCP_SYNACK_SET
917 && conntrack->proto.tcp.last_index == TCP_SYN_SET
918 && conntrack->proto.tcp.last_dir != dir
919 && ntohl(th->ack_seq) ==
920 conntrack->proto.tcp.last_end) {
921 /* This SYN/ACK acknowledges a SYN that we earlier
922 * ignored as invalid. This means that the client and
923 * the server are both in sync, while the firewall is
924 * not. We kill this session and block the SYN/ACK so
925 * that the client cannot but retransmit its SYN and
926 * thus initiate a clean new session.
927 */
928 write_unlock_bh(&tcp_lock);
929 if (LOG_INVALID(IPPROTO_TCP))
930 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
931 "nf_ct_tcp: killing out of sync session ");
932 if (del_timer(&conntrack->timeout))
933 conntrack->timeout.function((unsigned long)
934 conntrack);
935 return -NF_DROP;
936 }
937 conntrack->proto.tcp.last_index = index;
938 conntrack->proto.tcp.last_dir = dir;
939 conntrack->proto.tcp.last_seq = ntohl(th->seq);
940 conntrack->proto.tcp.last_end =
941 segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
942
943 write_unlock_bh(&tcp_lock);
944 if (LOG_INVALID(IPPROTO_TCP))
945 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
946 "nf_ct_tcp: invalid packed ignored ");
947 return NF_ACCEPT;
948 case TCP_CONNTRACK_MAX:
949 /* Invalid packet */
950 DEBUGP("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
951 dir, get_conntrack_index(th),
952 old_state);
953 write_unlock_bh(&tcp_lock);
954 if (LOG_INVALID(IPPROTO_TCP))
955 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
956 "nf_ct_tcp: invalid state ");
957 return -NF_ACCEPT;
958 case TCP_CONNTRACK_SYN_SENT:
959 if (old_state < TCP_CONNTRACK_TIME_WAIT)
960 break;
961 if ((conntrack->proto.tcp.seen[dir].flags &
962 IP_CT_TCP_FLAG_CLOSE_INIT)
963 || after(ntohl(th->seq),
964 conntrack->proto.tcp.seen[dir].td_end)) {
965 /* Attempt to reopen a closed connection.
966 * Delete this connection and look up again. */
967 write_unlock_bh(&tcp_lock);
968 if (del_timer(&conntrack->timeout))
969 conntrack->timeout.function((unsigned long)
970 conntrack);
971 return -NF_REPEAT;
972 }
973 case TCP_CONNTRACK_CLOSE:
974 if (index == TCP_RST_SET
975 && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
976 && conntrack->proto.tcp.last_index == TCP_SYN_SET
977 && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
978 /* RST sent to invalid SYN we had let trough
979 * SYN was in window then, tear down connection.
980 * We skip window checking, because packet might ACK
981 * segments we ignored in the SYN. */
982 goto in_window;
983 }
984 /* Just fall trough */
985 default:
986 /* Keep compilers happy. */
987 break;
988 }
989
990 if (!tcp_in_window(&conntrack->proto.tcp, dir, index,
991 skb, dataoff, th, pf)) {
992 write_unlock_bh(&tcp_lock);
993 return -NF_ACCEPT;
994 }
995 in_window:
996 /* From now on we have got in-window packets */
997 conntrack->proto.tcp.last_index = index;
998
999 DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
1000 "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1001 NIPQUAD(iph->saddr), ntohs(th->source),
1002 NIPQUAD(iph->daddr), ntohs(th->dest),
1003 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1004 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1005 old_state, new_state);
1006
1007 conntrack->proto.tcp.state = new_state;
1008 if (old_state != new_state
1009 && (new_state == TCP_CONNTRACK_FIN_WAIT
1010 || new_state == TCP_CONNTRACK_CLOSE))
1011 conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1012 timeout = conntrack->proto.tcp.retrans >= nf_ct_tcp_max_retrans
1013 && *tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans
1014 ? nf_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
1015 write_unlock_bh(&tcp_lock);
1016
1017 nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
1018 if (new_state != old_state)
1019 nf_conntrack_event_cache(IPCT_PROTOINFO, skb);
1020
1021 if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
1022 /* If only reply is a RST, we can consider ourselves not to
1023 have an established connection: this is a fairly common
1024 problem case, so we can delete the conntrack
1025 immediately. --RR */
1026 if (th->rst) {
1027 if (del_timer(&conntrack->timeout))
1028 conntrack->timeout.function((unsigned long)
1029 conntrack);
1030 return NF_ACCEPT;
1031 }
1032 } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
1033 && (old_state == TCP_CONNTRACK_SYN_RECV
1034 || old_state == TCP_CONNTRACK_ESTABLISHED)
1035 && new_state == TCP_CONNTRACK_ESTABLISHED) {
1036 /* Set ASSURED if we see see valid ack in ESTABLISHED
1037 after SYN_RECV or a valid answer for a picked up
1038 connection. */
1039 set_bit(IPS_ASSURED_BIT, &conntrack->status);
1040 nf_conntrack_event_cache(IPCT_STATUS, skb);
1041 }
1042 nf_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
1043
1044 return NF_ACCEPT;
1045}
1046
1047/* Called when a new connection for this protocol found. */
1048static int tcp_new(struct nf_conn *conntrack,
1049 const struct sk_buff *skb,
1050 unsigned int dataoff)
1051{
1052 enum tcp_conntrack new_state;
1053 struct tcphdr *th, _tcph;
1054#ifdef DEBUGP_VARS
1055 struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0];
1056 struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1];
1057#endif
1058
1059 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1060 BUG_ON(th == NULL);
1061
1062 /* Don't need lock here: this conntrack not in circulation yet */
1063 new_state
1064 = tcp_conntracks[0][get_conntrack_index(th)]
1065 [TCP_CONNTRACK_NONE];
1066
1067 /* Invalid: delete conntrack */
1068 if (new_state >= TCP_CONNTRACK_MAX) {
1069 DEBUGP("nf_ct_tcp: invalid new deleting.\n");
1070 return 0;
1071 }
1072
1073 if (new_state == TCP_CONNTRACK_SYN_SENT) {
1074 /* SYN packet */
1075 conntrack->proto.tcp.seen[0].td_end =
1076 segment_seq_plus_len(ntohl(th->seq), skb->len,
1077 dataoff, th);
1078 conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1079 if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
1080 conntrack->proto.tcp.seen[0].td_maxwin = 1;
1081 conntrack->proto.tcp.seen[0].td_maxend =
1082 conntrack->proto.tcp.seen[0].td_end;
1083
1084 tcp_options(skb, dataoff, th, &conntrack->proto.tcp.seen[0]);
1085 conntrack->proto.tcp.seen[1].flags = 0;
1086 conntrack->proto.tcp.seen[0].loose =
1087 conntrack->proto.tcp.seen[1].loose = 0;
1088 } else if (nf_ct_tcp_loose == 0) {
1089 /* Don't try to pick up connections. */
1090 return 0;
1091 } else {
1092 /*
1093 * We are in the middle of a connection,
1094 * its history is lost for us.
1095 * Let's try to use the data from the packet.
1096 */
1097 conntrack->proto.tcp.seen[0].td_end =
1098 segment_seq_plus_len(ntohl(th->seq), skb->len,
1099 dataoff, th);
1100 conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1101 if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
1102 conntrack->proto.tcp.seen[0].td_maxwin = 1;
1103 conntrack->proto.tcp.seen[0].td_maxend =
1104 conntrack->proto.tcp.seen[0].td_end +
1105 conntrack->proto.tcp.seen[0].td_maxwin;
1106 conntrack->proto.tcp.seen[0].td_scale = 0;
1107
1108 /* We assume SACK. Should we assume window scaling too? */
1109 conntrack->proto.tcp.seen[0].flags =
1110 conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM;
1111 conntrack->proto.tcp.seen[0].loose =
1112 conntrack->proto.tcp.seen[1].loose = nf_ct_tcp_loose;
1113 }
1114
1115 conntrack->proto.tcp.seen[1].td_end = 0;
1116 conntrack->proto.tcp.seen[1].td_maxend = 0;
1117 conntrack->proto.tcp.seen[1].td_maxwin = 1;
1118 conntrack->proto.tcp.seen[1].td_scale = 0;
1119
1120 /* tcp_packet will set them */
1121 conntrack->proto.tcp.state = TCP_CONNTRACK_NONE;
1122 conntrack->proto.tcp.last_index = TCP_NONE_SET;
1123
1124 DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1125 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1126 sender->td_end, sender->td_maxend, sender->td_maxwin,
1127 sender->td_scale,
1128 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1129 receiver->td_scale);
1130 return 1;
1131}
1132
1133struct nf_conntrack_protocol nf_conntrack_protocol_tcp4 =
1134{
1135 .l3proto = PF_INET,
1136 .proto = IPPROTO_TCP,
1137 .name = "tcp",
1138 .pkt_to_tuple = tcp_pkt_to_tuple,
1139 .invert_tuple = tcp_invert_tuple,
1140 .print_tuple = tcp_print_tuple,
1141 .print_conntrack = tcp_print_conntrack,
1142 .packet = tcp_packet,
1143 .new = tcp_new,
1144 .error = tcp_error4,
1145};
1146
1147struct nf_conntrack_protocol nf_conntrack_protocol_tcp6 =
1148{
1149 .l3proto = PF_INET6,
1150 .proto = IPPROTO_TCP,
1151 .name = "tcp",
1152 .pkt_to_tuple = tcp_pkt_to_tuple,
1153 .invert_tuple = tcp_invert_tuple,
1154 .print_tuple = tcp_print_tuple,
1155 .print_conntrack = tcp_print_conntrack,
1156 .packet = tcp_packet,
1157 .new = tcp_new,
1158 .error = tcp_error6,
1159};
1160
1161EXPORT_SYMBOL(nf_conntrack_protocol_tcp4);
1162EXPORT_SYMBOL(nf_conntrack_protocol_tcp6);
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
new file mode 100644
index 000000000000..3cae7ce420dd
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -0,0 +1,216 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
9 * - enable working with Layer 3 protocol independent connection tracking.
10 *
11 * Derived from net/ipv4/netfilter/ip_conntrack_proto_udp.c
12 */
13
14#include <linux/types.h>
15#include <linux/sched.h>
16#include <linux/timer.h>
17#include <linux/module.h>
18#include <linux/netfilter.h>
19#include <linux/udp.h>
20#include <linux/seq_file.h>
21#include <linux/skbuff.h>
22#include <linux/ipv6.h>
23#include <net/ip6_checksum.h>
24#include <net/checksum.h>
25#include <linux/netfilter.h>
26#include <linux/netfilter_ipv4.h>
27#include <linux/netfilter_ipv6.h>
28#include <net/netfilter/nf_conntrack_protocol.h>
29
30unsigned long nf_ct_udp_timeout = 30*HZ;
31unsigned long nf_ct_udp_timeout_stream = 180*HZ;
32
33static int udp_pkt_to_tuple(const struct sk_buff *skb,
34 unsigned int dataoff,
35 struct nf_conntrack_tuple *tuple)
36{
37 struct udphdr _hdr, *hp;
38
39 /* Actually only need first 8 bytes. */
40 hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
41 if (hp == NULL)
42 return 0;
43
44 tuple->src.u.udp.port = hp->source;
45 tuple->dst.u.udp.port = hp->dest;
46
47 return 1;
48}
49
50static int udp_invert_tuple(struct nf_conntrack_tuple *tuple,
51 const struct nf_conntrack_tuple *orig)
52{
53 tuple->src.u.udp.port = orig->dst.u.udp.port;
54 tuple->dst.u.udp.port = orig->src.u.udp.port;
55 return 1;
56}
57
58/* Print out the per-protocol part of the tuple. */
59static int udp_print_tuple(struct seq_file *s,
60 const struct nf_conntrack_tuple *tuple)
61{
62 return seq_printf(s, "sport=%hu dport=%hu ",
63 ntohs(tuple->src.u.udp.port),
64 ntohs(tuple->dst.u.udp.port));
65}
66
67/* Print out the private part of the conntrack. */
68static int udp_print_conntrack(struct seq_file *s,
69 const struct nf_conn *conntrack)
70{
71 return 0;
72}
73
74/* Returns verdict for packet, and may modify conntracktype */
75static int udp_packet(struct nf_conn *conntrack,
76 const struct sk_buff *skb,
77 unsigned int dataoff,
78 enum ip_conntrack_info ctinfo,
79 int pf,
80 unsigned int hooknum)
81{
82 /* If we've seen traffic both ways, this is some kind of UDP
83 stream. Extend timeout. */
84 if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
85 nf_ct_refresh_acct(conntrack, ctinfo, skb,
86 nf_ct_udp_timeout_stream);
87 /* Also, more likely to be important, and not a probe */
88 if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status))
89 nf_conntrack_event_cache(IPCT_STATUS, skb);
90 } else
91 nf_ct_refresh_acct(conntrack, ctinfo, skb, nf_ct_udp_timeout);
92
93 return NF_ACCEPT;
94}
95
96/* Called when a new connection for this protocol found. */
97static int udp_new(struct nf_conn *conntrack, const struct sk_buff *skb,
98 unsigned int dataoff)
99{
100 return 1;
101}
102
103static int udp_error(struct sk_buff *skb, unsigned int dataoff,
104 enum ip_conntrack_info *ctinfo,
105 int pf,
106 unsigned int hooknum,
107 int (*csum)(const struct sk_buff *, unsigned int))
108{
109 unsigned int udplen = skb->len - dataoff;
110 struct udphdr _hdr, *hdr;
111
112 /* Header is too small? */
113 hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
114 if (hdr == NULL) {
115 if (LOG_INVALID(IPPROTO_UDP))
116 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
117 "nf_ct_udp: short packet ");
118 return -NF_ACCEPT;
119 }
120
121 /* Truncated/malformed packets */
122 if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
123 if (LOG_INVALID(IPPROTO_UDP))
124 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
125 "nf_ct_udp: truncated/malformed packet ");
126 return -NF_ACCEPT;
127 }
128
129 /* Packet with no checksum */
130 if (!hdr->check)
131 return NF_ACCEPT;
132
133 /* Checksum invalid? Ignore.
134 * We skip checking packets on the outgoing path
135 * because the semantic of CHECKSUM_HW is different there
136 * and moreover root might send raw packets.
137 * FIXME: Source route IP option packets --RR */
138 if (((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) ||
139 (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING))
140 && skb->ip_summed != CHECKSUM_UNNECESSARY
141 && csum(skb, dataoff)) {
142 if (LOG_INVALID(IPPROTO_UDP))
143 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
144 "nf_ct_udp: bad UDP checksum ");
145 return -NF_ACCEPT;
146 }
147
148 return NF_ACCEPT;
149}
150
151static int csum4(const struct sk_buff *skb, unsigned int dataoff)
152{
153 return csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
154 skb->len - dataoff, IPPROTO_UDP,
155 skb->ip_summed == CHECKSUM_HW ? skb->csum
156 : skb_checksum(skb, dataoff,
157 skb->len - dataoff, 0));
158}
159
160static int csum6(const struct sk_buff *skb, unsigned int dataoff)
161{
162 return csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,
163 skb->len - dataoff, IPPROTO_UDP,
164 skb->ip_summed == CHECKSUM_HW ? skb->csum
165 : skb_checksum(skb, dataoff, skb->len - dataoff,
166 0));
167}
168
169static int udp_error4(struct sk_buff *skb,
170 unsigned int dataoff,
171 enum ip_conntrack_info *ctinfo,
172 int pf,
173 unsigned int hooknum)
174{
175 return udp_error(skb, dataoff, ctinfo, pf, hooknum, csum4);
176}
177
178static int udp_error6(struct sk_buff *skb,
179 unsigned int dataoff,
180 enum ip_conntrack_info *ctinfo,
181 int pf,
182 unsigned int hooknum)
183{
184 return udp_error(skb, dataoff, ctinfo, pf, hooknum, csum6);
185}
186
187struct nf_conntrack_protocol nf_conntrack_protocol_udp4 =
188{
189 .l3proto = PF_INET,
190 .proto = IPPROTO_UDP,
191 .name = "udp",
192 .pkt_to_tuple = udp_pkt_to_tuple,
193 .invert_tuple = udp_invert_tuple,
194 .print_tuple = udp_print_tuple,
195 .print_conntrack = udp_print_conntrack,
196 .packet = udp_packet,
197 .new = udp_new,
198 .error = udp_error4,
199};
200
201struct nf_conntrack_protocol nf_conntrack_protocol_udp6 =
202{
203 .l3proto = PF_INET6,
204 .proto = IPPROTO_UDP,
205 .name = "udp",
206 .pkt_to_tuple = udp_pkt_to_tuple,
207 .invert_tuple = udp_invert_tuple,
208 .print_tuple = udp_print_tuple,
209 .print_conntrack = udp_print_conntrack,
210 .packet = udp_packet,
211 .new = udp_new,
212 .error = udp_error6,
213};
214
215EXPORT_SYMBOL(nf_conntrack_protocol_udp4);
216EXPORT_SYMBOL(nf_conntrack_protocol_udp6);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
new file mode 100644
index 000000000000..45224db4fe2f
--- /dev/null
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -0,0 +1,869 @@
1/* This file contains all the functions required for the standalone
2 nf_conntrack module.
3
4 These are not required by the compatibility layer.
5*/
6
7/* (C) 1999-2001 Paul `Rusty' Russell
8 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 *
14 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
15 * - generalize L3 protocol dependent part.
16 *
17 * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c
18 */
19
20#include <linux/config.h>
21#include <linux/types.h>
22#include <linux/netfilter.h>
23#include <linux/module.h>
24#include <linux/skbuff.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/percpu.h>
28#include <linux/netdevice.h>
29#ifdef CONFIG_SYSCTL
30#include <linux/sysctl.h>
31#endif
32
33#define ASSERT_READ_LOCK(x)
34#define ASSERT_WRITE_LOCK(x)
35
36#include <net/netfilter/nf_conntrack.h>
37#include <net/netfilter/nf_conntrack_l3proto.h>
38#include <net/netfilter/nf_conntrack_protocol.h>
39#include <net/netfilter/nf_conntrack_core.h>
40#include <net/netfilter/nf_conntrack_helper.h>
41#include <linux/netfilter_ipv4/listhelp.h>
42
43#if 0
44#define DEBUGP printk
45#else
46#define DEBUGP(format, args...)
47#endif
48
49MODULE_LICENSE("GPL");
50
51extern atomic_t nf_conntrack_count;
52DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
53
54static int kill_l3proto(struct nf_conn *i, void *data)
55{
56 return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num ==
57 ((struct nf_conntrack_l3proto *)data)->l3proto);
58}
59
60static int kill_proto(struct nf_conn *i, void *data)
61{
62 struct nf_conntrack_protocol *proto;
63 proto = (struct nf_conntrack_protocol *)data;
64 return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum ==
65 proto->proto) &&
66 (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num ==
67 proto->l3proto);
68}
69
70#ifdef CONFIG_PROC_FS
71static int
72print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
73 struct nf_conntrack_l3proto *l3proto,
74 struct nf_conntrack_protocol *proto)
75{
76 return l3proto->print_tuple(s, tuple) || proto->print_tuple(s, tuple);
77}
78
79#ifdef CONFIG_NF_CT_ACCT
80static unsigned int
81seq_print_counters(struct seq_file *s,
82 const struct ip_conntrack_counter *counter)
83{
84 return seq_printf(s, "packets=%llu bytes=%llu ",
85 (unsigned long long)counter->packets,
86 (unsigned long long)counter->bytes);
87}
88#else
89#define seq_print_counters(x, y) 0
90#endif
91
92struct ct_iter_state {
93 unsigned int bucket;
94};
95
96static struct list_head *ct_get_first(struct seq_file *seq)
97{
98 struct ct_iter_state *st = seq->private;
99
100 for (st->bucket = 0;
101 st->bucket < nf_conntrack_htable_size;
102 st->bucket++) {
103 if (!list_empty(&nf_conntrack_hash[st->bucket]))
104 return nf_conntrack_hash[st->bucket].next;
105 }
106 return NULL;
107}
108
109static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head)
110{
111 struct ct_iter_state *st = seq->private;
112
113 head = head->next;
114 while (head == &nf_conntrack_hash[st->bucket]) {
115 if (++st->bucket >= nf_conntrack_htable_size)
116 return NULL;
117 head = nf_conntrack_hash[st->bucket].next;
118 }
119 return head;
120}
121
122static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos)
123{
124 struct list_head *head = ct_get_first(seq);
125
126 if (head)
127 while (pos && (head = ct_get_next(seq, head)))
128 pos--;
129 return pos ? NULL : head;
130}
131
132static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
133{
134 read_lock_bh(&nf_conntrack_lock);
135 return ct_get_idx(seq, *pos);
136}
137
138static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
139{
140 (*pos)++;
141 return ct_get_next(s, v);
142}
143
144static void ct_seq_stop(struct seq_file *s, void *v)
145{
146 read_unlock_bh(&nf_conntrack_lock);
147}
148
149/* return 0 on success, 1 in case of error */
150static int ct_seq_show(struct seq_file *s, void *v)
151{
152 const struct nf_conntrack_tuple_hash *hash = v;
153 const struct nf_conn *conntrack = nf_ct_tuplehash_to_ctrack(hash);
154 struct nf_conntrack_l3proto *l3proto;
155 struct nf_conntrack_protocol *proto;
156
157 ASSERT_READ_LOCK(&nf_conntrack_lock);
158 NF_CT_ASSERT(conntrack);
159
160 /* we only want to print DIR_ORIGINAL */
161 if (NF_CT_DIRECTION(hash))
162 return 0;
163
164 l3proto = nf_ct_find_l3proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
165 .tuple.src.l3num);
166
167 NF_CT_ASSERT(l3proto);
168 proto = nf_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
169 .tuple.src.l3num,
170 conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
171 .tuple.dst.protonum);
172 NF_CT_ASSERT(proto);
173
174 if (seq_printf(s, "%-8s %u %-8s %u %ld ",
175 l3proto->name,
176 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num,
177 proto->name,
178 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum,
179 timer_pending(&conntrack->timeout)
180 ? (long)(conntrack->timeout.expires - jiffies)/HZ : 0) != 0)
181 return -ENOSPC;
182
183 if (l3proto->print_conntrack(s, conntrack))
184 return -ENOSPC;
185
186 if (proto->print_conntrack(s, conntrack))
187 return -ENOSPC;
188
189 if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
190 l3proto, proto))
191 return -ENOSPC;
192
193 if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL]))
194 return -ENOSPC;
195
196 if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
197 if (seq_printf(s, "[UNREPLIED] "))
198 return -ENOSPC;
199
200 if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
201 l3proto, proto))
202 return -ENOSPC;
203
204 if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY]))
205 return -ENOSPC;
206
207 if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
208 if (seq_printf(s, "[ASSURED] "))
209 return -ENOSPC;
210
211#if defined(CONFIG_NF_CONNTRACK_MARK)
212 if (seq_printf(s, "mark=%u ", conntrack->mark))
213 return -ENOSPC;
214#endif
215
216 if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use)))
217 return -ENOSPC;
218
219 return 0;
220}
221
222static struct seq_operations ct_seq_ops = {
223 .start = ct_seq_start,
224 .next = ct_seq_next,
225 .stop = ct_seq_stop,
226 .show = ct_seq_show
227};
228
229static int ct_open(struct inode *inode, struct file *file)
230{
231 struct seq_file *seq;
232 struct ct_iter_state *st;
233 int ret;
234
235 st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL);
236 if (st == NULL)
237 return -ENOMEM;
238 ret = seq_open(file, &ct_seq_ops);
239 if (ret)
240 goto out_free;
241 seq = file->private_data;
242 seq->private = st;
243 memset(st, 0, sizeof(struct ct_iter_state));
244 return ret;
245out_free:
246 kfree(st);
247 return ret;
248}
249
250static struct file_operations ct_file_ops = {
251 .owner = THIS_MODULE,
252 .open = ct_open,
253 .read = seq_read,
254 .llseek = seq_lseek,
255 .release = seq_release_private,
256};
257
258/* expects */
259static void *exp_seq_start(struct seq_file *s, loff_t *pos)
260{
261 struct list_head *e = &nf_conntrack_expect_list;
262 loff_t i;
263
264 /* strange seq_file api calls stop even if we fail,
265 * thus we need to grab lock since stop unlocks */
266 read_lock_bh(&nf_conntrack_lock);
267
268 if (list_empty(e))
269 return NULL;
270
271 for (i = 0; i <= *pos; i++) {
272 e = e->next;
273 if (e == &nf_conntrack_expect_list)
274 return NULL;
275 }
276 return e;
277}
278
279static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
280{
281 struct list_head *e = v;
282
283 ++*pos;
284 e = e->next;
285
286 if (e == &nf_conntrack_expect_list)
287 return NULL;
288
289 return e;
290}
291
292static void exp_seq_stop(struct seq_file *s, void *v)
293{
294 read_unlock_bh(&nf_conntrack_lock);
295}
296
297static int exp_seq_show(struct seq_file *s, void *v)
298{
299 struct nf_conntrack_expect *expect = v;
300
301 if (expect->timeout.function)
302 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
303 ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
304 else
305 seq_printf(s, "- ");
306 seq_printf(s, "l3proto = %u proto=%u ",
307 expect->tuple.src.l3num,
308 expect->tuple.dst.protonum);
309 print_tuple(s, &expect->tuple,
310 nf_ct_find_l3proto(expect->tuple.src.l3num),
311 nf_ct_find_proto(expect->tuple.src.l3num,
312 expect->tuple.dst.protonum));
313 return seq_putc(s, '\n');
314}
315
316static struct seq_operations exp_seq_ops = {
317 .start = exp_seq_start,
318 .next = exp_seq_next,
319 .stop = exp_seq_stop,
320 .show = exp_seq_show
321};
322
323static int exp_open(struct inode *inode, struct file *file)
324{
325 return seq_open(file, &exp_seq_ops);
326}
327
328static struct file_operations exp_file_ops = {
329 .owner = THIS_MODULE,
330 .open = exp_open,
331 .read = seq_read,
332 .llseek = seq_lseek,
333 .release = seq_release
334};
335
336static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
337{
338 int cpu;
339
340 if (*pos == 0)
341 return SEQ_START_TOKEN;
342
343 for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
344 if (!cpu_possible(cpu))
345 continue;
346 *pos = cpu + 1;
347 return &per_cpu(nf_conntrack_stat, cpu);
348 }
349
350 return NULL;
351}
352
353static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
354{
355 int cpu;
356
357 for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
358 if (!cpu_possible(cpu))
359 continue;
360 *pos = cpu + 1;
361 return &per_cpu(nf_conntrack_stat, cpu);
362 }
363
364 return NULL;
365}
366
367static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
368{
369}
370
371static int ct_cpu_seq_show(struct seq_file *seq, void *v)
372{
373 unsigned int nr_conntracks = atomic_read(&nf_conntrack_count);
374 struct ip_conntrack_stat *st = v;
375
376 if (v == SEQ_START_TOKEN) {
377 seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n");
378 return 0;
379 }
380
381 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
382 "%08x %08x %08x %08x %08x %08x %08x %08x \n",
383 nr_conntracks,
384 st->searched,
385 st->found,
386 st->new,
387 st->invalid,
388 st->ignore,
389 st->delete,
390 st->delete_list,
391 st->insert,
392 st->insert_failed,
393 st->drop,
394 st->early_drop,
395 st->error,
396
397 st->expect_new,
398 st->expect_create,
399 st->expect_delete
400 );
401 return 0;
402}
403
404static struct seq_operations ct_cpu_seq_ops = {
405 .start = ct_cpu_seq_start,
406 .next = ct_cpu_seq_next,
407 .stop = ct_cpu_seq_stop,
408 .show = ct_cpu_seq_show,
409};
410
411static int ct_cpu_seq_open(struct inode *inode, struct file *file)
412{
413 return seq_open(file, &ct_cpu_seq_ops);
414}
415
416static struct file_operations ct_cpu_seq_fops = {
417 .owner = THIS_MODULE,
418 .open = ct_cpu_seq_open,
419 .read = seq_read,
420 .llseek = seq_lseek,
421 .release = seq_release_private,
422};
423#endif /* CONFIG_PROC_FS */
424
425/* Sysctl support */
426
427#ifdef CONFIG_SYSCTL
428
429/* From nf_conntrack_core.c */
430extern int nf_conntrack_max;
431extern unsigned int nf_conntrack_htable_size;
432
433/* From nf_conntrack_proto_tcp.c */
434extern unsigned long nf_ct_tcp_timeout_syn_sent;
435extern unsigned long nf_ct_tcp_timeout_syn_recv;
436extern unsigned long nf_ct_tcp_timeout_established;
437extern unsigned long nf_ct_tcp_timeout_fin_wait;
438extern unsigned long nf_ct_tcp_timeout_close_wait;
439extern unsigned long nf_ct_tcp_timeout_last_ack;
440extern unsigned long nf_ct_tcp_timeout_time_wait;
441extern unsigned long nf_ct_tcp_timeout_close;
442extern unsigned long nf_ct_tcp_timeout_max_retrans;
443extern int nf_ct_tcp_loose;
444extern int nf_ct_tcp_be_liberal;
445extern int nf_ct_tcp_max_retrans;
446
447/* From nf_conntrack_proto_udp.c */
448extern unsigned long nf_ct_udp_timeout;
449extern unsigned long nf_ct_udp_timeout_stream;
450
451/* From nf_conntrack_proto_generic.c */
452extern unsigned long nf_ct_generic_timeout;
453
454/* Log invalid packets of a given protocol */
455static int log_invalid_proto_min = 0;
456static int log_invalid_proto_max = 255;
457
458static struct ctl_table_header *nf_ct_sysctl_header;
459
460static ctl_table nf_ct_sysctl_table[] = {
461 {
462 .ctl_name = NET_NF_CONNTRACK_MAX,
463 .procname = "nf_conntrack_max",
464 .data = &nf_conntrack_max,
465 .maxlen = sizeof(int),
466 .mode = 0644,
467 .proc_handler = &proc_dointvec,
468 },
469 {
470 .ctl_name = NET_NF_CONNTRACK_COUNT,
471 .procname = "nf_conntrack_count",
472 .data = &nf_conntrack_count,
473 .maxlen = sizeof(int),
474 .mode = 0444,
475 .proc_handler = &proc_dointvec,
476 },
477 {
478 .ctl_name = NET_NF_CONNTRACK_BUCKETS,
479 .procname = "nf_conntrack_buckets",
480 .data = &nf_conntrack_htable_size,
481 .maxlen = sizeof(unsigned int),
482 .mode = 0444,
483 .proc_handler = &proc_dointvec,
484 },
485 {
486 .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT,
487 .procname = "nf_conntrack_tcp_timeout_syn_sent",
488 .data = &nf_ct_tcp_timeout_syn_sent,
489 .maxlen = sizeof(unsigned int),
490 .mode = 0644,
491 .proc_handler = &proc_dointvec_jiffies,
492 },
493 {
494 .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV,
495 .procname = "nf_conntrack_tcp_timeout_syn_recv",
496 .data = &nf_ct_tcp_timeout_syn_recv,
497 .maxlen = sizeof(unsigned int),
498 .mode = 0644,
499 .proc_handler = &proc_dointvec_jiffies,
500 },
501 {
502 .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED,
503 .procname = "nf_conntrack_tcp_timeout_established",
504 .data = &nf_ct_tcp_timeout_established,
505 .maxlen = sizeof(unsigned int),
506 .mode = 0644,
507 .proc_handler = &proc_dointvec_jiffies,
508 },
509 {
510 .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT,
511 .procname = "nf_conntrack_tcp_timeout_fin_wait",
512 .data = &nf_ct_tcp_timeout_fin_wait,
513 .maxlen = sizeof(unsigned int),
514 .mode = 0644,
515 .proc_handler = &proc_dointvec_jiffies,
516 },
517 {
518 .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT,
519 .procname = "nf_conntrack_tcp_timeout_close_wait",
520 .data = &nf_ct_tcp_timeout_close_wait,
521 .maxlen = sizeof(unsigned int),
522 .mode = 0644,
523 .proc_handler = &proc_dointvec_jiffies,
524 },
525 {
526 .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK,
527 .procname = "nf_conntrack_tcp_timeout_last_ack",
528 .data = &nf_ct_tcp_timeout_last_ack,
529 .maxlen = sizeof(unsigned int),
530 .mode = 0644,
531 .proc_handler = &proc_dointvec_jiffies,
532 },
533 {
534 .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT,
535 .procname = "nf_conntrack_tcp_timeout_time_wait",
536 .data = &nf_ct_tcp_timeout_time_wait,
537 .maxlen = sizeof(unsigned int),
538 .mode = 0644,
539 .proc_handler = &proc_dointvec_jiffies,
540 },
541 {
542 .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE,
543 .procname = "nf_conntrack_tcp_timeout_close",
544 .data = &nf_ct_tcp_timeout_close,
545 .maxlen = sizeof(unsigned int),
546 .mode = 0644,
547 .proc_handler = &proc_dointvec_jiffies,
548 },
549 {
550 .ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT,
551 .procname = "nf_conntrack_udp_timeout",
552 .data = &nf_ct_udp_timeout,
553 .maxlen = sizeof(unsigned int),
554 .mode = 0644,
555 .proc_handler = &proc_dointvec_jiffies,
556 },
557 {
558 .ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM,
559 .procname = "nf_conntrack_udp_timeout_stream",
560 .data = &nf_ct_udp_timeout_stream,
561 .maxlen = sizeof(unsigned int),
562 .mode = 0644,
563 .proc_handler = &proc_dointvec_jiffies,
564 },
565 {
566 .ctl_name = NET_NF_CONNTRACK_GENERIC_TIMEOUT,
567 .procname = "nf_conntrack_generic_timeout",
568 .data = &nf_ct_generic_timeout,
569 .maxlen = sizeof(unsigned int),
570 .mode = 0644,
571 .proc_handler = &proc_dointvec_jiffies,
572 },
573 {
574 .ctl_name = NET_NF_CONNTRACK_LOG_INVALID,
575 .procname = "nf_conntrack_log_invalid",
576 .data = &nf_ct_log_invalid,
577 .maxlen = sizeof(unsigned int),
578 .mode = 0644,
579 .proc_handler = &proc_dointvec_minmax,
580 .strategy = &sysctl_intvec,
581 .extra1 = &log_invalid_proto_min,
582 .extra2 = &log_invalid_proto_max,
583 },
584 {
585 .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS,
586 .procname = "nf_conntrack_tcp_timeout_max_retrans",
587 .data = &nf_ct_tcp_timeout_max_retrans,
588 .maxlen = sizeof(unsigned int),
589 .mode = 0644,
590 .proc_handler = &proc_dointvec_jiffies,
591 },
592 {
593 .ctl_name = NET_NF_CONNTRACK_TCP_LOOSE,
594 .procname = "nf_conntrack_tcp_loose",
595 .data = &nf_ct_tcp_loose,
596 .maxlen = sizeof(unsigned int),
597 .mode = 0644,
598 .proc_handler = &proc_dointvec,
599 },
600 {
601 .ctl_name = NET_NF_CONNTRACK_TCP_BE_LIBERAL,
602 .procname = "nf_conntrack_tcp_be_liberal",
603 .data = &nf_ct_tcp_be_liberal,
604 .maxlen = sizeof(unsigned int),
605 .mode = 0644,
606 .proc_handler = &proc_dointvec,
607 },
608 {
609 .ctl_name = NET_NF_CONNTRACK_TCP_MAX_RETRANS,
610 .procname = "nf_conntrack_tcp_max_retrans",
611 .data = &nf_ct_tcp_max_retrans,
612 .maxlen = sizeof(unsigned int),
613 .mode = 0644,
614 .proc_handler = &proc_dointvec,
615 },
616
617 { .ctl_name = 0 }
618};
619
620#define NET_NF_CONNTRACK_MAX 2089
621
622static ctl_table nf_ct_netfilter_table[] = {
623 {
624 .ctl_name = NET_NETFILTER,
625 .procname = "netfilter",
626 .mode = 0555,
627 .child = nf_ct_sysctl_table,
628 },
629 {
630 .ctl_name = NET_NF_CONNTRACK_MAX,
631 .procname = "nf_conntrack_max",
632 .data = &nf_conntrack_max,
633 .maxlen = sizeof(int),
634 .mode = 0644,
635 .proc_handler = &proc_dointvec,
636 },
637 { .ctl_name = 0 }
638};
639
640static ctl_table nf_ct_net_table[] = {
641 {
642 .ctl_name = CTL_NET,
643 .procname = "net",
644 .mode = 0555,
645 .child = nf_ct_netfilter_table,
646 },
647 { .ctl_name = 0 }
648};
649EXPORT_SYMBOL(nf_ct_log_invalid);
650#endif /* CONFIG_SYSCTL */
651
652static int init_or_cleanup(int init)
653{
654#ifdef CONFIG_PROC_FS
655 struct proc_dir_entry *proc, *proc_exp, *proc_stat;
656#endif
657 int ret = 0;
658
659 if (!init) goto cleanup;
660
661 ret = nf_conntrack_init();
662 if (ret < 0)
663 goto cleanup_nothing;
664
665#ifdef CONFIG_PROC_FS
666 proc = proc_net_fops_create("nf_conntrack", 0440, &ct_file_ops);
667 if (!proc) goto cleanup_init;
668
669 proc_exp = proc_net_fops_create("nf_conntrack_expect", 0440,
670 &exp_file_ops);
671 if (!proc_exp) goto cleanup_proc;
672
673 proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, proc_net_stat);
674 if (!proc_stat)
675 goto cleanup_proc_exp;
676
677 proc_stat->proc_fops = &ct_cpu_seq_fops;
678 proc_stat->owner = THIS_MODULE;
679#endif
680#ifdef CONFIG_SYSCTL
681 nf_ct_sysctl_header = register_sysctl_table(nf_ct_net_table, 0);
682 if (nf_ct_sysctl_header == NULL) {
683 printk("nf_conntrack: can't register to sysctl.\n");
684 ret = -ENOMEM;
685 goto cleanup_proc_stat;
686 }
687#endif
688
689 return ret;
690
691 cleanup:
692#ifdef CONFIG_SYSCTL
693 unregister_sysctl_table(nf_ct_sysctl_header);
694 cleanup_proc_stat:
695#endif
696#ifdef CONFIG_PROC_FS
697 proc_net_remove("nf_conntrack_stat");
698 cleanup_proc_exp:
699 proc_net_remove("nf_conntrack_expect");
700 cleanup_proc:
701 proc_net_remove("nf_conntrack");
702 cleanup_init:
703#endif /* CNFIG_PROC_FS */
704 nf_conntrack_cleanup();
705 cleanup_nothing:
706 return ret;
707}
708
709int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
710{
711 int ret = 0;
712
713 write_lock_bh(&nf_conntrack_lock);
714 if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_generic_l3proto) {
715 ret = -EBUSY;
716 goto out;
717 }
718 nf_ct_l3protos[proto->l3proto] = proto;
719out:
720 write_unlock_bh(&nf_conntrack_lock);
721
722 return ret;
723}
724
725void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
726{
727 write_lock_bh(&nf_conntrack_lock);
728 nf_ct_l3protos[proto->l3proto] = &nf_conntrack_generic_l3proto;
729 write_unlock_bh(&nf_conntrack_lock);
730
731 /* Somebody could be still looking at the proto in bh. */
732 synchronize_net();
733
734 /* Remove all contrack entries for this protocol */
735 nf_ct_iterate_cleanup(kill_l3proto, proto);
736}
737
738/* FIXME: Allow NULL functions and sub in pointers to generic for
739 them. --RR */
740int nf_conntrack_protocol_register(struct nf_conntrack_protocol *proto)
741{
742 int ret = 0;
743
744retry:
745 write_lock_bh(&nf_conntrack_lock);
746 if (nf_ct_protos[proto->l3proto]) {
747 if (nf_ct_protos[proto->l3proto][proto->proto]
748 != &nf_conntrack_generic_protocol) {
749 ret = -EBUSY;
750 goto out_unlock;
751 }
752 } else {
753 /* l3proto may be loaded latter. */
754 struct nf_conntrack_protocol **proto_array;
755 int i;
756
757 write_unlock_bh(&nf_conntrack_lock);
758
759 proto_array = (struct nf_conntrack_protocol **)
760 kmalloc(MAX_NF_CT_PROTO *
761 sizeof(struct nf_conntrack_protocol *),
762 GFP_KERNEL);
763 if (proto_array == NULL) {
764 ret = -ENOMEM;
765 goto out;
766 }
767 for (i = 0; i < MAX_NF_CT_PROTO; i++)
768 proto_array[i] = &nf_conntrack_generic_protocol;
769
770 write_lock_bh(&nf_conntrack_lock);
771 if (nf_ct_protos[proto->l3proto]) {
772 /* bad timing, but no problem */
773 write_unlock_bh(&nf_conntrack_lock);
774 kfree(proto_array);
775 } else {
776 nf_ct_protos[proto->l3proto] = proto_array;
777 write_unlock_bh(&nf_conntrack_lock);
778 }
779
780 /*
781 * Just once because array is never freed until unloading
782 * nf_conntrack.ko
783 */
784 goto retry;
785 }
786
787 nf_ct_protos[proto->l3proto][proto->proto] = proto;
788
789out_unlock:
790 write_unlock_bh(&nf_conntrack_lock);
791out:
792 return ret;
793}
794
795void nf_conntrack_protocol_unregister(struct nf_conntrack_protocol *proto)
796{
797 write_lock_bh(&nf_conntrack_lock);
798 nf_ct_protos[proto->l3proto][proto->proto]
799 = &nf_conntrack_generic_protocol;
800 write_unlock_bh(&nf_conntrack_lock);
801
802 /* Somebody could be still looking at the proto in bh. */
803 synchronize_net();
804
805 /* Remove all contrack entries for this protocol */
806 nf_ct_iterate_cleanup(kill_proto, proto);
807}
808
809static int __init init(void)
810{
811 return init_or_cleanup(1);
812}
813
814static void __exit fini(void)
815{
816 init_or_cleanup(0);
817}
818
819module_init(init);
820module_exit(fini);
821
822/* Some modules need us, but don't depend directly on any symbol.
823 They should call this. */
824void need_nf_conntrack(void)
825{
826}
827
828#ifdef CONFIG_NF_CONNTRACK_EVENTS
829EXPORT_SYMBOL_GPL(nf_conntrack_chain);
830EXPORT_SYMBOL_GPL(nf_conntrack_expect_chain);
831EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
832EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
833EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init);
834EXPORT_PER_CPU_SYMBOL_GPL(nf_conntrack_ecache);
835EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
836#endif
837EXPORT_SYMBOL(nf_conntrack_l3proto_register);
838EXPORT_SYMBOL(nf_conntrack_l3proto_unregister);
839EXPORT_SYMBOL(nf_conntrack_protocol_register);
840EXPORT_SYMBOL(nf_conntrack_protocol_unregister);
841EXPORT_SYMBOL(nf_ct_invert_tuplepr);
842EXPORT_SYMBOL(nf_conntrack_alter_reply);
843EXPORT_SYMBOL(nf_conntrack_destroyed);
844EXPORT_SYMBOL(need_nf_conntrack);
845EXPORT_SYMBOL(nf_conntrack_helper_register);
846EXPORT_SYMBOL(nf_conntrack_helper_unregister);
847EXPORT_SYMBOL(nf_ct_iterate_cleanup);
848EXPORT_SYMBOL(__nf_ct_refresh_acct);
849EXPORT_SYMBOL(nf_ct_protos);
850EXPORT_SYMBOL(nf_ct_find_proto);
851EXPORT_SYMBOL(nf_ct_l3protos);
852EXPORT_SYMBOL(nf_conntrack_expect_alloc);
853EXPORT_SYMBOL(nf_conntrack_expect_put);
854EXPORT_SYMBOL(nf_conntrack_expect_related);
855EXPORT_SYMBOL(nf_conntrack_unexpect_related);
856EXPORT_SYMBOL(nf_conntrack_tuple_taken);
857EXPORT_SYMBOL(nf_conntrack_htable_size);
858EXPORT_SYMBOL(nf_conntrack_lock);
859EXPORT_SYMBOL(nf_conntrack_hash);
860EXPORT_SYMBOL(nf_conntrack_untracked);
861EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
862#ifdef CONFIG_IP_NF_NAT_NEEDED
863EXPORT_SYMBOL(nf_conntrack_tcp_update);
864#endif
865EXPORT_SYMBOL(__nf_conntrack_confirm);
866EXPORT_SYMBOL(nf_ct_get_tuple);
867EXPORT_SYMBOL(nf_ct_invert_tuple);
868EXPORT_SYMBOL(nf_conntrack_in);
869EXPORT_SYMBOL(__nf_conntrack_attach);
diff --git a/net/netlink/Makefile b/net/netlink/Makefile
index 39d9c2dcd03c..e3589c2de49e 100644
--- a/net/netlink/Makefile
+++ b/net/netlink/Makefile
@@ -2,4 +2,4 @@
2# Makefile for the netlink driver. 2# Makefile for the netlink driver.
3# 3#
4 4
5obj-y := af_netlink.o 5obj-y := af_netlink.o attr.o genetlink.o
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 5ca283537bc6..8c38ee6d255e 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -58,6 +58,7 @@
58 58
59#include <net/sock.h> 59#include <net/sock.h>
60#include <net/scm.h> 60#include <net/scm.h>
61#include <net/netlink.h>
61 62
62#define Nprintk(a...) 63#define Nprintk(a...)
63#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) 64#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
@@ -427,7 +428,8 @@ static int netlink_release(struct socket *sock)
427 428
428 spin_lock(&nlk->cb_lock); 429 spin_lock(&nlk->cb_lock);
429 if (nlk->cb) { 430 if (nlk->cb) {
430 nlk->cb->done(nlk->cb); 431 if (nlk->cb->done)
432 nlk->cb->done(nlk->cb);
431 netlink_destroy_callback(nlk->cb); 433 netlink_destroy_callback(nlk->cb);
432 nlk->cb = NULL; 434 nlk->cb = NULL;
433 } 435 }
@@ -1322,7 +1324,8 @@ static int netlink_dump(struct sock *sk)
1322 skb_queue_tail(&sk->sk_receive_queue, skb); 1324 skb_queue_tail(&sk->sk_receive_queue, skb);
1323 sk->sk_data_ready(sk, skb->len); 1325 sk->sk_data_ready(sk, skb->len);
1324 1326
1325 cb->done(cb); 1327 if (cb->done)
1328 cb->done(cb);
1326 nlk->cb = NULL; 1329 nlk->cb = NULL;
1327 spin_unlock(&nlk->cb_lock); 1330 spin_unlock(&nlk->cb_lock);
1328 1331
@@ -1409,6 +1412,94 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1409 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); 1412 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1410} 1413}
1411 1414
1415static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1416 struct nlmsghdr *, int *))
1417{
1418 unsigned int total_len;
1419 struct nlmsghdr *nlh;
1420 int err;
1421
1422 while (skb->len >= nlmsg_total_size(0)) {
1423 nlh = (struct nlmsghdr *) skb->data;
1424
1425 if (skb->len < nlh->nlmsg_len)
1426 return 0;
1427
1428 total_len = min(NLMSG_ALIGN(nlh->nlmsg_len), skb->len);
1429
1430 if (cb(skb, nlh, &err) < 0) {
1431 /* Not an error, but we have to interrupt processing
1432 * here. Note: that in this case we do not pull
1433 * message from skb, it will be processed later.
1434 */
1435 if (err == 0)
1436 return -1;
1437 netlink_ack(skb, nlh, err);
1438 } else if (nlh->nlmsg_flags & NLM_F_ACK)
1439 netlink_ack(skb, nlh, 0);
1440
1441 skb_pull(skb, total_len);
1442 }
1443
1444 return 0;
1445}
1446
1447/**
1448 * nelink_run_queue - Process netlink receive queue.
1449 * @sk: Netlink socket containing the queue
1450 * @qlen: Place to store queue length upon entry
1451 * @cb: Callback function invoked for each netlink message found
1452 *
1453 * Processes as much as there was in the queue upon entry and invokes
1454 * a callback function for each netlink message found. The callback
1455 * function may refuse a message by returning a negative error code
1456 * but setting the error pointer to 0 in which case this function
1457 * returns with a qlen != 0.
1458 *
1459 * qlen must be initialized to 0 before the initial entry, afterwards
1460 * the function may be called repeatedly until qlen reaches 0.
1461 */
1462void netlink_run_queue(struct sock *sk, unsigned int *qlen,
1463 int (*cb)(struct sk_buff *, struct nlmsghdr *, int *))
1464{
1465 struct sk_buff *skb;
1466
1467 if (!*qlen || *qlen > skb_queue_len(&sk->sk_receive_queue))
1468 *qlen = skb_queue_len(&sk->sk_receive_queue);
1469
1470 for (; *qlen; (*qlen)--) {
1471 skb = skb_dequeue(&sk->sk_receive_queue);
1472 if (netlink_rcv_skb(skb, cb)) {
1473 if (skb->len)
1474 skb_queue_head(&sk->sk_receive_queue, skb);
1475 else {
1476 kfree_skb(skb);
1477 (*qlen)--;
1478 }
1479 break;
1480 }
1481
1482 kfree_skb(skb);
1483 }
1484}
1485
1486/**
1487 * netlink_queue_skip - Skip netlink message while processing queue.
1488 * @nlh: Netlink message to be skipped
1489 * @skb: Socket buffer containing the netlink messages.
1490 *
1491 * Pulls the given netlink message off the socket buffer so the next
1492 * call to netlink_queue_run() will not reconsider the message.
1493 */
1494void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb)
1495{
1496 int msglen = NLMSG_ALIGN(nlh->nlmsg_len);
1497
1498 if (msglen > skb->len)
1499 msglen = skb->len;
1500
1501 skb_pull(skb, msglen);
1502}
1412 1503
1413#ifdef CONFIG_PROC_FS 1504#ifdef CONFIG_PROC_FS
1414struct nl_seq_iter { 1505struct nl_seq_iter {
@@ -1657,6 +1748,8 @@ out:
1657core_initcall(netlink_proto_init); 1748core_initcall(netlink_proto_init);
1658 1749
1659EXPORT_SYMBOL(netlink_ack); 1750EXPORT_SYMBOL(netlink_ack);
1751EXPORT_SYMBOL(netlink_run_queue);
1752EXPORT_SYMBOL(netlink_queue_skip);
1660EXPORT_SYMBOL(netlink_broadcast); 1753EXPORT_SYMBOL(netlink_broadcast);
1661EXPORT_SYMBOL(netlink_dump_start); 1754EXPORT_SYMBOL(netlink_dump_start);
1662EXPORT_SYMBOL(netlink_kernel_create); 1755EXPORT_SYMBOL(netlink_kernel_create);
diff --git a/net/netlink/attr.c b/net/netlink/attr.c
new file mode 100644
index 000000000000..fffef4ab276f
--- /dev/null
+++ b/net/netlink/attr.c
@@ -0,0 +1,328 @@
1/*
2 * NETLINK Netlink attributes
3 *
4 * Authors: Thomas Graf <tgraf@suug.ch>
5 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
6 */
7
8#include <linux/config.h>
9#include <linux/module.h>
10#include <linux/kernel.h>
11#include <linux/errno.h>
12#include <linux/jiffies.h>
13#include <linux/netdevice.h>
14#include <linux/skbuff.h>
15#include <linux/string.h>
16#include <linux/types.h>
17#include <net/netlink.h>
18
19static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = {
20 [NLA_U8] = sizeof(u8),
21 [NLA_U16] = sizeof(u16),
22 [NLA_U32] = sizeof(u32),
23 [NLA_U64] = sizeof(u64),
24 [NLA_STRING] = 1,
25 [NLA_NESTED] = NLA_HDRLEN,
26};
27
28static int validate_nla(struct nlattr *nla, int maxtype,
29 struct nla_policy *policy)
30{
31 struct nla_policy *pt;
32 int minlen = 0;
33
34 if (nla->nla_type <= 0 || nla->nla_type > maxtype)
35 return 0;
36
37 pt = &policy[nla->nla_type];
38
39 BUG_ON(pt->type > NLA_TYPE_MAX);
40
41 if (pt->minlen)
42 minlen = pt->minlen;
43 else if (pt->type != NLA_UNSPEC)
44 minlen = nla_attr_minlen[pt->type];
45
46 if (pt->type == NLA_FLAG && nla_len(nla) > 0)
47 return -ERANGE;
48
49 if (nla_len(nla) < minlen)
50 return -ERANGE;
51
52 return 0;
53}
54
55/**
56 * nla_validate - Validate a stream of attributes
57 * @head: head of attribute stream
58 * @len: length of attribute stream
59 * @maxtype: maximum attribute type to be expected
60 * @policy: validation policy
61 *
62 * Validates all attributes in the specified attribute stream against the
63 * specified policy. Attributes with a type exceeding maxtype will be
64 * ignored. See documenation of struct nla_policy for more details.
65 *
66 * Returns 0 on success or a negative error code.
67 */
68int nla_validate(struct nlattr *head, int len, int maxtype,
69 struct nla_policy *policy)
70{
71 struct nlattr *nla;
72 int rem, err;
73
74 nla_for_each_attr(nla, head, len, rem) {
75 err = validate_nla(nla, maxtype, policy);
76 if (err < 0)
77 goto errout;
78 }
79
80 err = 0;
81errout:
82 return err;
83}
84
85/**
86 * nla_parse - Parse a stream of attributes into a tb buffer
87 * @tb: destination array with maxtype+1 elements
88 * @maxtype: maximum attribute type to be expected
89 * @head: head of attribute stream
90 * @len: length of attribute stream
91 *
92 * Parses a stream of attributes and stores a pointer to each attribute in
93 * the tb array accessable via the attribute type. Attributes with a type
94 * exceeding maxtype will be silently ignored for backwards compatibility
95 * reasons. policy may be set to NULL if no validation is required.
96 *
97 * Returns 0 on success or a negative error code.
98 */
99int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len,
100 struct nla_policy *policy)
101{
102 struct nlattr *nla;
103 int rem, err;
104
105 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
106
107 nla_for_each_attr(nla, head, len, rem) {
108 u16 type = nla->nla_type;
109
110 if (type > 0 && type <= maxtype) {
111 if (policy) {
112 err = validate_nla(nla, maxtype, policy);
113 if (err < 0)
114 goto errout;
115 }
116
117 tb[type] = nla;
118 }
119 }
120
121 if (unlikely(rem > 0))
122 printk(KERN_WARNING "netlink: %d bytes leftover after parsing "
123 "attributes.\n", rem);
124
125 err = 0;
126errout:
127 return err;
128}
129
130/**
131 * nla_find - Find a specific attribute in a stream of attributes
132 * @head: head of attribute stream
133 * @len: length of attribute stream
134 * @attrtype: type of attribute to look for
135 *
136 * Returns the first attribute in the stream matching the specified type.
137 */
138struct nlattr *nla_find(struct nlattr *head, int len, int attrtype)
139{
140 struct nlattr *nla;
141 int rem;
142
143 nla_for_each_attr(nla, head, len, rem)
144 if (nla->nla_type == attrtype)
145 return nla;
146
147 return NULL;
148}
149
150/**
151 * nla_strlcpy - Copy string attribute payload into a sized buffer
152 * @dst: where to copy the string to
153 * @src: attribute to copy the string from
154 * @dstsize: size of destination buffer
155 *
156 * Copies at most dstsize - 1 bytes into the destination buffer.
157 * The result is always a valid NUL-terminated string. Unlike
158 * strlcpy the destination buffer is always padded out.
159 *
160 * Returns the length of the source buffer.
161 */
162size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize)
163{
164 size_t srclen = nla_len(nla);
165 char *src = nla_data(nla);
166
167 if (srclen > 0 && src[srclen - 1] == '\0')
168 srclen--;
169
170 if (dstsize > 0) {
171 size_t len = (srclen >= dstsize) ? dstsize - 1 : srclen;
172
173 memset(dst, 0, dstsize);
174 memcpy(dst, src, len);
175 }
176
177 return srclen;
178}
179
180/**
181 * nla_memcpy - Copy a netlink attribute into another memory area
182 * @dest: where to copy to memcpy
183 * @src: netlink attribute to copy from
184 * @count: size of the destination area
185 *
186 * Note: The number of bytes copied is limited by the length of
187 * attribute's payload. memcpy
188 *
189 * Returns the number of bytes copied.
190 */
191int nla_memcpy(void *dest, struct nlattr *src, int count)
192{
193 int minlen = min_t(int, count, nla_len(src));
194
195 memcpy(dest, nla_data(src), minlen);
196
197 return minlen;
198}
199
200/**
201 * nla_memcmp - Compare an attribute with sized memory area
202 * @nla: netlink attribute
203 * @data: memory area
204 * @size: size of memory area
205 */
206int nla_memcmp(const struct nlattr *nla, const void *data,
207 size_t size)
208{
209 int d = nla_len(nla) - size;
210
211 if (d == 0)
212 d = memcmp(nla_data(nla), data, size);
213
214 return d;
215}
216
217/**
218 * nla_strcmp - Compare a string attribute against a string
219 * @nla: netlink string attribute
220 * @str: another string
221 */
222int nla_strcmp(const struct nlattr *nla, const char *str)
223{
224 int len = strlen(str) + 1;
225 int d = nla_len(nla) - len;
226
227 if (d == 0)
228 d = memcmp(nla_data(nla), str, len);
229
230 return d;
231}
232
233/**
234 * __nla_reserve - reserve room for attribute on the skb
235 * @skb: socket buffer to reserve room on
236 * @attrtype: attribute type
237 * @attrlen: length of attribute payload
238 *
239 * Adds a netlink attribute header to a socket buffer and reserves
240 * room for the payload but does not copy it.
241 *
242 * The caller is responsible to ensure that the skb provides enough
243 * tailroom for the attribute header and payload.
244 */
245struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
246{
247 struct nlattr *nla;
248
249 nla = (struct nlattr *) skb_put(skb, nla_total_size(attrlen));
250 nla->nla_type = attrtype;
251 nla->nla_len = nla_attr_size(attrlen);
252
253 memset((unsigned char *) nla + nla->nla_len, 0, nla_padlen(attrlen));
254
255 return nla;
256}
257
258/**
259 * nla_reserve - reserve room for attribute on the skb
260 * @skb: socket buffer to reserve room on
261 * @attrtype: attribute type
262 * @attrlen: length of attribute payload
263 *
264 * Adds a netlink attribute header to a socket buffer and reserves
265 * room for the payload but does not copy it.
266 *
267 * Returns NULL if the tailroom of the skb is insufficient to store
268 * the attribute header and payload.
269 */
270struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
271{
272 if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen)))
273 return NULL;
274
275 return __nla_reserve(skb, attrtype, attrlen);
276}
277
278/**
279 * __nla_put - Add a netlink attribute to a socket buffer
280 * @skb: socket buffer to add attribute to
281 * @attrtype: attribute type
282 * @attrlen: length of attribute payload
283 * @data: head of attribute payload
284 *
285 * The caller is responsible to ensure that the skb provides enough
286 * tailroom for the attribute header and payload.
287 */
288void __nla_put(struct sk_buff *skb, int attrtype, int attrlen,
289 const void *data)
290{
291 struct nlattr *nla;
292
293 nla = __nla_reserve(skb, attrtype, attrlen);
294 memcpy(nla_data(nla), data, attrlen);
295}
296
297
298/**
299 * nla_put - Add a netlink attribute to a socket buffer
300 * @skb: socket buffer to add attribute to
301 * @attrtype: attribute type
302 * @attrlen: length of attribute payload
303 * @data: head of attribute payload
304 *
305 * Returns -1 if the tailroom of the skb is insufficient to store
306 * the attribute header and payload.
307 */
308int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
309{
310 if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen)))
311 return -1;
312
313 __nla_put(skb, attrtype, attrlen, data);
314 return 0;
315}
316
317
318EXPORT_SYMBOL(nla_validate);
319EXPORT_SYMBOL(nla_parse);
320EXPORT_SYMBOL(nla_find);
321EXPORT_SYMBOL(nla_strlcpy);
322EXPORT_SYMBOL(__nla_reserve);
323EXPORT_SYMBOL(nla_reserve);
324EXPORT_SYMBOL(__nla_put);
325EXPORT_SYMBOL(nla_put);
326EXPORT_SYMBOL(nla_memcpy);
327EXPORT_SYMBOL(nla_memcmp);
328EXPORT_SYMBOL(nla_strcmp);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
new file mode 100644
index 000000000000..287cfcc56951
--- /dev/null
+++ b/net/netlink/genetlink.c
@@ -0,0 +1,579 @@
1/*
2 * NETLINK Generic Netlink Family
3 *
4 * Authors: Jamal Hadi Salim
5 * Thomas Graf <tgraf@suug.ch>
6 */
7
8#include <linux/config.h>
9#include <linux/module.h>
10#include <linux/kernel.h>
11#include <linux/errno.h>
12#include <linux/types.h>
13#include <linux/socket.h>
14#include <linux/string.h>
15#include <linux/skbuff.h>
16#include <net/sock.h>
17#include <net/genetlink.h>
18
19struct sock *genl_sock = NULL;
20
21static DECLARE_MUTEX(genl_sem); /* serialization of message processing */
22
23static void genl_lock(void)
24{
25 down(&genl_sem);
26}
27
28static int genl_trylock(void)
29{
30 return down_trylock(&genl_sem);
31}
32
33static void genl_unlock(void)
34{
35 up(&genl_sem);
36
37 if (genl_sock && genl_sock->sk_receive_queue.qlen)
38 genl_sock->sk_data_ready(genl_sock, 0);
39}
40
41#define GENL_FAM_TAB_SIZE 16
42#define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1)
43
44static struct list_head family_ht[GENL_FAM_TAB_SIZE];
45
46static int genl_ctrl_event(int event, void *data);
47
48static inline unsigned int genl_family_hash(unsigned int id)
49{
50 return id & GENL_FAM_TAB_MASK;
51}
52
53static inline struct list_head *genl_family_chain(unsigned int id)
54{
55 return &family_ht[genl_family_hash(id)];
56}
57
58static struct genl_family *genl_family_find_byid(unsigned int id)
59{
60 struct genl_family *f;
61
62 list_for_each_entry(f, genl_family_chain(id), family_list)
63 if (f->id == id)
64 return f;
65
66 return NULL;
67}
68
69static struct genl_family *genl_family_find_byname(char *name)
70{
71 struct genl_family *f;
72 int i;
73
74 for (i = 0; i < GENL_FAM_TAB_SIZE; i++)
75 list_for_each_entry(f, genl_family_chain(i), family_list)
76 if (strcmp(f->name, name) == 0)
77 return f;
78
79 return NULL;
80}
81
82static struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family)
83{
84 struct genl_ops *ops;
85
86 list_for_each_entry(ops, &family->ops_list, ops_list)
87 if (ops->cmd == cmd)
88 return ops;
89
90 return NULL;
91}
92
93/* Of course we are going to have problems once we hit
94 * 2^16 alive types, but that can only happen by year 2K
95*/
96static inline u16 genl_generate_id(void)
97{
98 static u16 id_gen_idx;
99 int overflowed = 0;
100
101 do {
102 if (id_gen_idx == 0)
103 id_gen_idx = GENL_MIN_ID;
104
105 if (++id_gen_idx > GENL_MAX_ID) {
106 if (!overflowed) {
107 overflowed = 1;
108 id_gen_idx = 0;
109 continue;
110 } else
111 return 0;
112 }
113
114 } while (genl_family_find_byid(id_gen_idx));
115
116 return id_gen_idx;
117}
118
119/**
120 * genl_register_ops - register generic netlink operations
121 * @family: generic netlink family
122 * @ops: operations to be registered
123 *
124 * Registers the specified operations and assigns them to the specified
125 * family. Either a doit or dumpit callback must be specified or the
126 * operation will fail. Only one operation structure per command
127 * identifier may be registered.
128 *
129 * See include/net/genetlink.h for more documenation on the operations
130 * structure.
131 *
132 * Returns 0 on success or a negative error code.
133 */
134int genl_register_ops(struct genl_family *family, struct genl_ops *ops)
135{
136 int err = -EINVAL;
137
138 if (ops->dumpit == NULL && ops->doit == NULL)
139 goto errout;
140
141 if (genl_get_cmd(ops->cmd, family)) {
142 err = -EEXIST;
143 goto errout;
144 }
145
146 genl_lock();
147 list_add_tail(&ops->ops_list, &family->ops_list);
148 genl_unlock();
149
150 genl_ctrl_event(CTRL_CMD_NEWOPS, ops);
151 err = 0;
152errout:
153 return err;
154}
155
156/**
157 * genl_unregister_ops - unregister generic netlink operations
158 * @family: generic netlink family
159 * @ops: operations to be unregistered
160 *
161 * Unregisters the specified operations and unassigns them from the
162 * specified family. The operation blocks until the current message
163 * processing has finished and doesn't start again until the
164 * unregister process has finished.
165 *
166 * Note: It is not necessary to unregister all operations before
167 * unregistering the family, unregistering the family will cause
168 * all assigned operations to be unregistered automatically.
169 *
170 * Returns 0 on success or a negative error code.
171 */
172int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops)
173{
174 struct genl_ops *rc;
175
176 genl_lock();
177 list_for_each_entry(rc, &family->ops_list, ops_list) {
178 if (rc == ops) {
179 list_del(&ops->ops_list);
180 genl_unlock();
181 genl_ctrl_event(CTRL_CMD_DELOPS, ops);
182 return 0;
183 }
184 }
185 genl_unlock();
186
187 return -ENOENT;
188}
189
190/**
191 * genl_register_family - register a generic netlink family
192 * @family: generic netlink family
193 *
194 * Registers the specified family after validating it first. Only one
195 * family may be registered with the same family name or identifier.
196 * The family id may equal GENL_ID_GENERATE causing an unique id to
197 * be automatically generated and assigned.
198 *
199 * Return 0 on success or a negative error code.
200 */
201int genl_register_family(struct genl_family *family)
202{
203 int err = -EINVAL;
204
205 if (family->id && family->id < GENL_MIN_ID)
206 goto errout;
207
208 if (family->id > GENL_MAX_ID)
209 goto errout;
210
211 INIT_LIST_HEAD(&family->ops_list);
212
213 genl_lock();
214
215 if (genl_family_find_byname(family->name)) {
216 err = -EEXIST;
217 goto errout_locked;
218 }
219
220 if (genl_family_find_byid(family->id)) {
221 err = -EEXIST;
222 goto errout_locked;
223 }
224
225 if (!try_module_get(family->owner)) {
226 err = -EBUSY;
227 goto errout_locked;
228 }
229
230 if (family->id == GENL_ID_GENERATE) {
231 u16 newid = genl_generate_id();
232
233 if (!newid) {
234 err = -ENOMEM;
235 goto errout_locked;
236 }
237
238 family->id = newid;
239 }
240
241 if (family->maxattr) {
242 family->attrbuf = kmalloc((family->maxattr+1) *
243 sizeof(struct nlattr *), GFP_KERNEL);
244 if (family->attrbuf == NULL) {
245 err = -ENOMEM;
246 goto errout;
247 }
248 } else
249 family->attrbuf = NULL;
250
251 list_add_tail(&family->family_list, genl_family_chain(family->id));
252 genl_unlock();
253
254 genl_ctrl_event(CTRL_CMD_NEWFAMILY, family);
255
256 return 0;
257
258errout_locked:
259 genl_unlock();
260errout:
261 return err;
262}
263
264/**
265 * genl_unregister_family - unregister generic netlink family
266 * @family: generic netlink family
267 *
268 * Unregisters the specified family.
269 *
270 * Returns 0 on success or a negative error code.
271 */
272int genl_unregister_family(struct genl_family *family)
273{
274 struct genl_family *rc;
275
276 genl_lock();
277
278 list_for_each_entry(rc, genl_family_chain(family->id), family_list) {
279 if (family->id != rc->id || strcmp(rc->name, family->name))
280 continue;
281
282 list_del(&rc->family_list);
283 INIT_LIST_HEAD(&family->ops_list);
284 genl_unlock();
285
286 module_put(family->owner);
287 kfree(family->attrbuf);
288 genl_ctrl_event(CTRL_CMD_DELFAMILY, family);
289 return 0;
290 }
291
292 genl_unlock();
293
294 return -ENOENT;
295}
296
297static inline int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
298 int *errp)
299{
300 struct genl_ops *ops;
301 struct genl_family *family;
302 struct genl_info info;
303 struct genlmsghdr *hdr = nlmsg_data(nlh);
304 int hdrlen, err = -EINVAL;
305
306 if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
307 goto ignore;
308
309 if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
310 goto ignore;
311
312 family = genl_family_find_byid(nlh->nlmsg_type);
313 if (family == NULL) {
314 err = -ENOENT;
315 goto errout;
316 }
317
318 hdrlen = GENL_HDRLEN + family->hdrsize;
319 if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
320 goto errout;
321
322 ops = genl_get_cmd(hdr->cmd, family);
323 if (ops == NULL) {
324 err = -EOPNOTSUPP;
325 goto errout;
326 }
327
328 if ((ops->flags & GENL_ADMIN_PERM) && security_netlink_recv(skb)) {
329 err = -EPERM;
330 goto errout;
331 }
332
333 if (nlh->nlmsg_flags & NLM_F_DUMP) {
334 if (ops->dumpit == NULL) {
335 err = -EOPNOTSUPP;
336 goto errout;
337 }
338
339 *errp = err = netlink_dump_start(genl_sock, skb, nlh,
340 ops->dumpit, NULL);
341 if (err == 0)
342 skb_pull(skb, min(NLMSG_ALIGN(nlh->nlmsg_len),
343 skb->len));
344 return -1;
345 }
346
347 if (ops->doit == NULL) {
348 err = -EOPNOTSUPP;
349 goto errout;
350 }
351
352 if (family->attrbuf) {
353 err = nlmsg_parse(nlh, hdrlen, family->attrbuf, family->maxattr,
354 ops->policy);
355 if (err < 0)
356 goto errout;
357 }
358
359 info.snd_seq = nlh->nlmsg_seq;
360 info.snd_pid = NETLINK_CB(skb).pid;
361 info.nlhdr = nlh;
362 info.genlhdr = nlmsg_data(nlh);
363 info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
364 info.attrs = family->attrbuf;
365
366 *errp = err = ops->doit(skb, &info);
367 return err;
368
369ignore:
370 return 0;
371
372errout:
373 *errp = err;
374 return -1;
375}
376
377static void genl_rcv(struct sock *sk, int len)
378{
379 unsigned int qlen = 0;
380
381 do {
382 if (genl_trylock())
383 return;
384 netlink_run_queue(sk, &qlen, &genl_rcv_msg);
385 genl_unlock();
386 } while (qlen && genl_sock && genl_sock->sk_receive_queue.qlen);
387}
388
389/**************************************************************************
390 * Controller
391 **************************************************************************/
392
393static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq,
394 u32 flags, struct sk_buff *skb, u8 cmd)
395{
396 void *hdr;
397
398 hdr = genlmsg_put(skb, pid, seq, GENL_ID_CTRL, 0, flags, cmd,
399 family->version);
400 if (hdr == NULL)
401 return -1;
402
403 NLA_PUT_STRING(skb, CTRL_ATTR_FAMILY_NAME, family->name);
404 NLA_PUT_U16(skb, CTRL_ATTR_FAMILY_ID, family->id);
405
406 return genlmsg_end(skb, hdr);
407
408nla_put_failure:
409 return genlmsg_cancel(skb, hdr);
410}
411
412static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
413{
414
415 int i, n = 0;
416 struct genl_family *rt;
417 int chains_to_skip = cb->args[0];
418 int fams_to_skip = cb->args[1];
419
420 for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
421 if (i < chains_to_skip)
422 continue;
423 n = 0;
424 list_for_each_entry(rt, genl_family_chain(i), family_list) {
425 if (++n < fams_to_skip)
426 continue;
427 if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).pid,
428 cb->nlh->nlmsg_seq, NLM_F_MULTI,
429 skb, CTRL_CMD_NEWFAMILY) < 0)
430 goto errout;
431 }
432
433 fams_to_skip = 0;
434 }
435
436errout:
437 cb->args[0] = i;
438 cb->args[1] = n;
439
440 return skb->len;
441}
442
443static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid,
444 int seq, int cmd)
445{
446 struct sk_buff *skb;
447 int err;
448
449 skb = nlmsg_new(NLMSG_GOODSIZE);
450 if (skb == NULL)
451 return ERR_PTR(-ENOBUFS);
452
453 err = ctrl_fill_info(family, pid, seq, 0, skb, cmd);
454 if (err < 0) {
455 nlmsg_free(skb);
456 return ERR_PTR(err);
457 }
458
459 return skb;
460}
461
462static struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] __read_mostly = {
463 [CTRL_ATTR_FAMILY_ID] = { .type = NLA_U16 },
464 [CTRL_ATTR_FAMILY_NAME] = { .type = NLA_STRING },
465};
466
467static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
468{
469 struct sk_buff *msg;
470 struct genl_family *res = NULL;
471 int err = -EINVAL;
472
473 if (info->attrs[CTRL_ATTR_FAMILY_ID]) {
474 u16 id = nla_get_u16(info->attrs[CTRL_ATTR_FAMILY_ID]);
475 res = genl_family_find_byid(id);
476 }
477
478 if (info->attrs[CTRL_ATTR_FAMILY_NAME]) {
479 char name[GENL_NAMSIZ];
480
481 if (nla_strlcpy(name, info->attrs[CTRL_ATTR_FAMILY_NAME],
482 GENL_NAMSIZ) >= GENL_NAMSIZ)
483 goto errout;
484
485 res = genl_family_find_byname(name);
486 }
487
488 if (res == NULL) {
489 err = -ENOENT;
490 goto errout;
491 }
492
493 msg = ctrl_build_msg(res, info->snd_pid, info->snd_seq,
494 CTRL_CMD_NEWFAMILY);
495 if (IS_ERR(msg)) {
496 err = PTR_ERR(msg);
497 goto errout;
498 }
499
500 err = genlmsg_unicast(msg, info->snd_pid);
501errout:
502 return err;
503}
504
505static int genl_ctrl_event(int event, void *data)
506{
507 struct sk_buff *msg;
508
509 if (genl_sock == NULL)
510 return 0;
511
512 switch (event) {
513 case CTRL_CMD_NEWFAMILY:
514 case CTRL_CMD_DELFAMILY:
515 msg = ctrl_build_msg(data, 0, 0, event);
516 if (IS_ERR(msg))
517 return PTR_ERR(msg);
518
519 genlmsg_multicast(msg, 0, GENL_ID_CTRL);
520 break;
521 }
522
523 return 0;
524}
525
526static struct genl_ops genl_ctrl_ops = {
527 .cmd = CTRL_CMD_GETFAMILY,
528 .doit = ctrl_getfamily,
529 .dumpit = ctrl_dumpfamily,
530 .policy = ctrl_policy,
531};
532
533static struct genl_family genl_ctrl = {
534 .id = GENL_ID_CTRL,
535 .name = "nlctrl",
536 .version = 0x1,
537 .maxattr = CTRL_ATTR_MAX,
538 .owner = THIS_MODULE,
539};
540
541static int __init genl_init(void)
542{
543 int i, err;
544
545 for (i = 0; i < GENL_FAM_TAB_SIZE; i++)
546 INIT_LIST_HEAD(&family_ht[i]);
547
548 err = genl_register_family(&genl_ctrl);
549 if (err < 0)
550 goto errout;
551
552 err = genl_register_ops(&genl_ctrl, &genl_ctrl_ops);
553 if (err < 0)
554 goto errout_register;
555
556 netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV);
557 genl_sock = netlink_kernel_create(NETLINK_GENERIC, GENL_MAX_ID,
558 genl_rcv, THIS_MODULE);
559 if (genl_sock == NULL) {
560 panic("GENL: Cannot initialize generic netlink\n");
561 return -ENOMEM;
562 }
563
564 return 0;
565
566errout_register:
567 genl_unregister_family(&genl_ctrl);
568errout:
569 panic("GENL: Cannot register controller: %d\n", err);
570 return err;
571}
572
573subsys_initcall(genl_init);
574
575EXPORT_SYMBOL(genl_sock);
576EXPORT_SYMBOL(genl_register_ops);
577EXPORT_SYMBOL(genl_unregister_ops);
578EXPORT_SYMBOL(genl_register_family);
579EXPORT_SYMBOL(genl_unregister_family);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index c35336a0f71b..0cdd9a07e043 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -18,7 +18,6 @@
18#include <linux/string.h> 18#include <linux/string.h>
19#include <linux/net.h> 19#include <linux/net.h>
20#include <linux/skbuff.h> 20#include <linux/skbuff.h>
21#include <linux/netlink.h>
22#include <linux/rtnetlink.h> 21#include <linux/rtnetlink.h>
23#include <linux/pfkeyv2.h> 22#include <linux/pfkeyv2.h>
24#include <linux/ipsec.h> 23#include <linux/ipsec.h>
@@ -26,6 +25,7 @@
26#include <linux/security.h> 25#include <linux/security.h>
27#include <net/sock.h> 26#include <net/sock.h>
28#include <net/xfrm.h> 27#include <net/xfrm.h>
28#include <net/netlink.h>
29#include <asm/uaccess.h> 29#include <asm/uaccess.h>
30 30
31static struct sock *xfrm_nl; 31static struct sock *xfrm_nl;
@@ -948,11 +948,6 @@ static struct xfrm_link {
948 [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_flush_policy }, 948 [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_flush_policy },
949}; 949};
950 950
951static int xfrm_done(struct netlink_callback *cb)
952{
953 return 0;
954}
955
956static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) 951static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
957{ 952{
958 struct rtattr *xfrma[XFRMA_MAX]; 953 struct rtattr *xfrma[XFRMA_MAX];
@@ -984,20 +979,15 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err
984 if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) || 979 if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) ||
985 type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) && 980 type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) &&
986 (nlh->nlmsg_flags & NLM_F_DUMP)) { 981 (nlh->nlmsg_flags & NLM_F_DUMP)) {
987 u32 rlen;
988
989 if (link->dump == NULL) 982 if (link->dump == NULL)
990 goto err_einval; 983 goto err_einval;
991 984
992 if ((*errp = netlink_dump_start(xfrm_nl, skb, nlh, 985 if ((*errp = netlink_dump_start(xfrm_nl, skb, nlh,
993 link->dump, 986 link->dump, NULL)) != 0) {
994 xfrm_done)) != 0) {
995 return -1; 987 return -1;
996 } 988 }
997 rlen = NLMSG_ALIGN(nlh->nlmsg_len); 989
998 if (rlen > skb->len) 990 netlink_queue_skip(nlh, skb);
999 rlen = skb->len;
1000 skb_pull(skb, rlen);
1001 return -1; 991 return -1;
1002 } 992 }
1003 993
@@ -1032,60 +1022,13 @@ err_einval:
1032 return -1; 1022 return -1;
1033} 1023}
1034 1024
1035static int xfrm_user_rcv_skb(struct sk_buff *skb)
1036{
1037 int err;
1038 struct nlmsghdr *nlh;
1039
1040 while (skb->len >= NLMSG_SPACE(0)) {
1041 u32 rlen;
1042
1043 nlh = (struct nlmsghdr *) skb->data;
1044 if (nlh->nlmsg_len < sizeof(*nlh) ||
1045 skb->len < nlh->nlmsg_len)
1046 return 0;
1047 rlen = NLMSG_ALIGN(nlh->nlmsg_len);
1048 if (rlen > skb->len)
1049 rlen = skb->len;
1050 if (xfrm_user_rcv_msg(skb, nlh, &err) < 0) {
1051 if (err == 0)
1052 return -1;
1053 netlink_ack(skb, nlh, err);
1054 } else if (nlh->nlmsg_flags & NLM_F_ACK)
1055 netlink_ack(skb, nlh, 0);
1056 skb_pull(skb, rlen);
1057 }
1058
1059 return 0;
1060}
1061
1062static void xfrm_netlink_rcv(struct sock *sk, int len) 1025static void xfrm_netlink_rcv(struct sock *sk, int len)
1063{ 1026{
1064 unsigned int qlen = skb_queue_len(&sk->sk_receive_queue); 1027 unsigned int qlen = 0;
1065 1028
1066 do { 1029 do {
1067 struct sk_buff *skb;
1068
1069 down(&xfrm_cfg_sem); 1030 down(&xfrm_cfg_sem);
1070 1031 netlink_run_queue(sk, &qlen, &xfrm_user_rcv_msg);
1071 if (qlen > skb_queue_len(&sk->sk_receive_queue))
1072 qlen = skb_queue_len(&sk->sk_receive_queue);
1073
1074 for (; qlen; qlen--) {
1075 skb = skb_dequeue(&sk->sk_receive_queue);
1076 if (xfrm_user_rcv_skb(skb)) {
1077 if (skb->len)
1078 skb_queue_head(&sk->sk_receive_queue,
1079 skb);
1080 else {
1081 kfree_skb(skb);
1082 qlen--;
1083 }
1084 break;
1085 }
1086 kfree_skb(skb);
1087 }
1088
1089 up(&xfrm_cfg_sem); 1032 up(&xfrm_cfg_sem);
1090 1033
1091 } while (qlen); 1034 } while (qlen);