diff options
Diffstat (limited to 'net')
41 files changed, 9306 insertions, 214 deletions
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9bed7569ce3f..8700379685e0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <net/udp.h> | 49 | #include <net/udp.h> |
50 | #include <net/sock.h> | 50 | #include <net/sock.h> |
51 | #include <net/pkt_sched.h> | 51 | #include <net/pkt_sched.h> |
52 | #include <net/netlink.h> | ||
52 | 53 | ||
53 | DECLARE_MUTEX(rtnl_sem); | 54 | DECLARE_MUTEX(rtnl_sem); |
54 | 55 | ||
@@ -462,11 +463,6 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) | |||
462 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL); | 463 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL); |
463 | } | 464 | } |
464 | 465 | ||
465 | static int rtnetlink_done(struct netlink_callback *cb) | ||
466 | { | ||
467 | return 0; | ||
468 | } | ||
469 | |||
470 | /* Protected by RTNL sempahore. */ | 466 | /* Protected by RTNL sempahore. */ |
471 | static struct rtattr **rta_buf; | 467 | static struct rtattr **rta_buf; |
472 | static int rtattr_max; | 468 | static int rtattr_max; |
@@ -524,8 +520,6 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) | |||
524 | } | 520 | } |
525 | 521 | ||
526 | if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { | 522 | if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { |
527 | u32 rlen; | ||
528 | |||
529 | if (link->dumpit == NULL) | 523 | if (link->dumpit == NULL) |
530 | link = &(rtnetlink_links[PF_UNSPEC][type]); | 524 | link = &(rtnetlink_links[PF_UNSPEC][type]); |
531 | 525 | ||
@@ -533,14 +527,11 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) | |||
533 | goto err_inval; | 527 | goto err_inval; |
534 | 528 | ||
535 | if ((*errp = netlink_dump_start(rtnl, skb, nlh, | 529 | if ((*errp = netlink_dump_start(rtnl, skb, nlh, |
536 | link->dumpit, | 530 | link->dumpit, NULL)) != 0) { |
537 | rtnetlink_done)) != 0) { | ||
538 | return -1; | 531 | return -1; |
539 | } | 532 | } |
540 | rlen = NLMSG_ALIGN(nlh->nlmsg_len); | 533 | |
541 | if (rlen > skb->len) | 534 | netlink_queue_skip(nlh, skb); |
542 | rlen = skb->len; | ||
543 | skb_pull(skb, rlen); | ||
544 | return -1; | 535 | return -1; |
545 | } | 536 | } |
546 | 537 | ||
@@ -579,75 +570,13 @@ err_inval: | |||
579 | return -1; | 570 | return -1; |
580 | } | 571 | } |
581 | 572 | ||
582 | /* | ||
583 | * Process one packet of messages. | ||
584 | * Malformed skbs with wrong lengths of messages are discarded silently. | ||
585 | */ | ||
586 | |||
587 | static inline int rtnetlink_rcv_skb(struct sk_buff *skb) | ||
588 | { | ||
589 | int err; | ||
590 | struct nlmsghdr * nlh; | ||
591 | |||
592 | while (skb->len >= NLMSG_SPACE(0)) { | ||
593 | u32 rlen; | ||
594 | |||
595 | nlh = (struct nlmsghdr *)skb->data; | ||
596 | if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) | ||
597 | return 0; | ||
598 | rlen = NLMSG_ALIGN(nlh->nlmsg_len); | ||
599 | if (rlen > skb->len) | ||
600 | rlen = skb->len; | ||
601 | if (rtnetlink_rcv_msg(skb, nlh, &err)) { | ||
602 | /* Not error, but we must interrupt processing here: | ||
603 | * Note, that in this case we do not pull message | ||
604 | * from skb, it will be processed later. | ||
605 | */ | ||
606 | if (err == 0) | ||
607 | return -1; | ||
608 | netlink_ack(skb, nlh, err); | ||
609 | } else if (nlh->nlmsg_flags&NLM_F_ACK) | ||
610 | netlink_ack(skb, nlh, 0); | ||
611 | skb_pull(skb, rlen); | ||
612 | } | ||
613 | |||
614 | return 0; | ||
615 | } | ||
616 | |||
617 | /* | ||
618 | * rtnetlink input queue processing routine: | ||
619 | * - process as much as there was in the queue upon entry. | ||
620 | * - feed skbs to rtnetlink_rcv_skb, until it refuse a message, | ||
621 | * that will occur, when a dump started. | ||
622 | */ | ||
623 | |||
624 | static void rtnetlink_rcv(struct sock *sk, int len) | 573 | static void rtnetlink_rcv(struct sock *sk, int len) |
625 | { | 574 | { |
626 | unsigned int qlen = skb_queue_len(&sk->sk_receive_queue); | 575 | unsigned int qlen = 0; |
627 | 576 | ||
628 | do { | 577 | do { |
629 | struct sk_buff *skb; | ||
630 | |||
631 | rtnl_lock(); | 578 | rtnl_lock(); |
632 | 579 | netlink_run_queue(sk, &qlen, &rtnetlink_rcv_msg); | |
633 | if (qlen > skb_queue_len(&sk->sk_receive_queue)) | ||
634 | qlen = skb_queue_len(&sk->sk_receive_queue); | ||
635 | |||
636 | for (; qlen; qlen--) { | ||
637 | skb = skb_dequeue(&sk->sk_receive_queue); | ||
638 | if (rtnetlink_rcv_skb(skb)) { | ||
639 | if (skb->len) | ||
640 | skb_queue_head(&sk->sk_receive_queue, | ||
641 | skb); | ||
642 | else { | ||
643 | kfree_skb(skb); | ||
644 | qlen--; | ||
645 | } | ||
646 | break; | ||
647 | } | ||
648 | kfree_skb(skb); | ||
649 | } | ||
650 | |||
651 | up(&rtnl_sem); | 580 | up(&rtnl_sem); |
652 | 581 | ||
653 | netdev_run_todo(); | 582 | netdev_run_todo(); |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 95501e40100e..b7d13a4fff48 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -336,6 +336,9 @@ void __kfree_skb(struct sk_buff *skb) | |||
336 | } | 336 | } |
337 | #ifdef CONFIG_NETFILTER | 337 | #ifdef CONFIG_NETFILTER |
338 | nf_conntrack_put(skb->nfct); | 338 | nf_conntrack_put(skb->nfct); |
339 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
340 | nf_conntrack_put_reasm(skb->nfct_reasm); | ||
341 | #endif | ||
339 | #ifdef CONFIG_BRIDGE_NETFILTER | 342 | #ifdef CONFIG_BRIDGE_NETFILTER |
340 | nf_bridge_put(skb->nf_bridge); | 343 | nf_bridge_put(skb->nf_bridge); |
341 | #endif | 344 | #endif |
@@ -414,9 +417,17 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) | |||
414 | C(nfct); | 417 | C(nfct); |
415 | nf_conntrack_get(skb->nfct); | 418 | nf_conntrack_get(skb->nfct); |
416 | C(nfctinfo); | 419 | C(nfctinfo); |
420 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
421 | C(nfct_reasm); | ||
422 | nf_conntrack_get_reasm(skb->nfct_reasm); | ||
423 | #endif | ||
417 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) | 424 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) |
418 | C(ipvs_property); | 425 | C(ipvs_property); |
419 | #endif | 426 | #endif |
427 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
428 | C(nfct_reasm); | ||
429 | nf_conntrack_get_reasm(skb->nfct_reasm); | ||
430 | #endif | ||
420 | #ifdef CONFIG_BRIDGE_NETFILTER | 431 | #ifdef CONFIG_BRIDGE_NETFILTER |
421 | C(nf_bridge); | 432 | C(nf_bridge); |
422 | nf_bridge_get(skb->nf_bridge); | 433 | nf_bridge_get(skb->nf_bridge); |
@@ -474,6 +485,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
474 | new->nfct = old->nfct; | 485 | new->nfct = old->nfct; |
475 | nf_conntrack_get(old->nfct); | 486 | nf_conntrack_get(old->nfct); |
476 | new->nfctinfo = old->nfctinfo; | 487 | new->nfctinfo = old->nfctinfo; |
488 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
489 | new->nfct_reasm = old->nfct_reasm; | ||
490 | nf_conntrack_get_reasm(old->nfct_reasm); | ||
491 | #endif | ||
477 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) | 492 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) |
478 | new->ipvs_property = old->ipvs_property; | 493 | new->ipvs_property = old->ipvs_property; |
479 | #endif | 494 | #endif |
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 71f3c7350c6e..39061ed53cfd 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
@@ -724,12 +724,6 @@ done: | |||
724 | return skb->len; | 724 | return skb->len; |
725 | } | 725 | } |
726 | 726 | ||
727 | static int inet_diag_dump_done(struct netlink_callback *cb) | ||
728 | { | ||
729 | return 0; | ||
730 | } | ||
731 | |||
732 | |||
733 | static __inline__ int | 727 | static __inline__ int |
734 | inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | 728 | inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) |
735 | { | 729 | { |
@@ -760,8 +754,7 @@ inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
760 | goto err_inval; | 754 | goto err_inval; |
761 | } | 755 | } |
762 | return netlink_dump_start(idiagnl, skb, nlh, | 756 | return netlink_dump_start(idiagnl, skb, nlh, |
763 | inet_diag_dump, | 757 | inet_diag_dump, NULL); |
764 | inet_diag_dump_done); | ||
765 | } else { | 758 | } else { |
766 | return inet_diag_get_exact(skb, nlh); | 759 | return inet_diag_get_exact(skb, nlh); |
767 | } | 760 | } |
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 7d917e4ce1d9..9d3c8b5f327e 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig | |||
@@ -5,6 +5,20 @@ | |||
5 | menu "IP: Netfilter Configuration" | 5 | menu "IP: Netfilter Configuration" |
6 | depends on INET && NETFILTER | 6 | depends on INET && NETFILTER |
7 | 7 | ||
8 | config NF_CONNTRACK_IPV4 | ||
9 | tristate "IPv4 support for new connection tracking (EXPERIMENTAL)" | ||
10 | depends on EXPERIMENTAL && NF_CONNTRACK | ||
11 | ---help--- | ||
12 | Connection tracking keeps a record of what packets have passed | ||
13 | through your machine, in order to figure out how they are related | ||
14 | into connections. | ||
15 | |||
16 | This is IPv4 support on Layer 3 independent connection tracking. | ||
17 | Layer 3 independent connection tracking is experimental scheme | ||
18 | which generalize ip_conntrack to support other layer 3 protocols. | ||
19 | |||
20 | To compile it as a module, choose M here. If unsure, say N. | ||
21 | |||
8 | # connection tracking, helpers and protocols | 22 | # connection tracking, helpers and protocols |
9 | config IP_NF_CONNTRACK | 23 | config IP_NF_CONNTRACK |
10 | tristate "Connection tracking (required for masq/NAT)" | 24 | tristate "Connection tracking (required for masq/NAT)" |
@@ -209,8 +223,8 @@ config IP_NF_MATCH_PKTTYPE | |||
209 | tristate "Packet type match support" | 223 | tristate "Packet type match support" |
210 | depends on IP_NF_IPTABLES | 224 | depends on IP_NF_IPTABLES |
211 | help | 225 | help |
212 | Packet type matching allows you to match a packet by | 226 | Packet type matching allows you to match a packet by |
213 | its "class", eg. BROADCAST, MULTICAST, ... | 227 | its "class", eg. BROADCAST, MULTICAST, ... |
214 | 228 | ||
215 | Typical usage: | 229 | Typical usage: |
216 | iptables -A INPUT -m pkttype --pkt-type broadcast -j LOG | 230 | iptables -A INPUT -m pkttype --pkt-type broadcast -j LOG |
@@ -317,7 +331,8 @@ config IP_NF_MATCH_TCPMSS | |||
317 | 331 | ||
318 | config IP_NF_MATCH_HELPER | 332 | config IP_NF_MATCH_HELPER |
319 | tristate "Helper match support" | 333 | tristate "Helper match support" |
320 | depends on IP_NF_CONNTRACK && IP_NF_IPTABLES | 334 | depends on IP_NF_IPTABLES |
335 | depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4 | ||
321 | help | 336 | help |
322 | Helper matching allows you to match packets in dynamic connections | 337 | Helper matching allows you to match packets in dynamic connections |
323 | tracked by a conntrack-helper, ie. ip_conntrack_ftp | 338 | tracked by a conntrack-helper, ie. ip_conntrack_ftp |
@@ -326,7 +341,8 @@ config IP_NF_MATCH_HELPER | |||
326 | 341 | ||
327 | config IP_NF_MATCH_STATE | 342 | config IP_NF_MATCH_STATE |
328 | tristate "Connection state match support" | 343 | tristate "Connection state match support" |
329 | depends on IP_NF_CONNTRACK && IP_NF_IPTABLES | 344 | depends on IP_NF_IPTABLES |
345 | depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4 | ||
330 | help | 346 | help |
331 | Connection state matching allows you to match packets based on their | 347 | Connection state matching allows you to match packets based on their |
332 | relationship to a tracked connection (ie. previous packets). This | 348 | relationship to a tracked connection (ie. previous packets). This |
@@ -336,7 +352,8 @@ config IP_NF_MATCH_STATE | |||
336 | 352 | ||
337 | config IP_NF_MATCH_CONNTRACK | 353 | config IP_NF_MATCH_CONNTRACK |
338 | tristate "Connection tracking match support" | 354 | tristate "Connection tracking match support" |
339 | depends on IP_NF_CONNTRACK && IP_NF_IPTABLES | 355 | depends on IP_NF_IPTABLES |
356 | depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4 | ||
340 | help | 357 | help |
341 | This is a general conntrack match module, a superset of the state match. | 358 | This is a general conntrack match module, a superset of the state match. |
342 | 359 | ||
@@ -422,7 +439,8 @@ config IP_NF_MATCH_COMMENT | |||
422 | 439 | ||
423 | config IP_NF_MATCH_CONNMARK | 440 | config IP_NF_MATCH_CONNMARK |
424 | tristate 'Connection mark match support' | 441 | tristate 'Connection mark match support' |
425 | depends on IP_NF_CONNTRACK_MARK && IP_NF_IPTABLES | 442 | depends on IP_NF_IPTABLES |
443 | depends on IP_NF_CONNTRACK_MARK || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4) | ||
426 | help | 444 | help |
427 | This option adds a `connmark' match, which allows you to match the | 445 | This option adds a `connmark' match, which allows you to match the |
428 | connection mark value previously set for the session by `CONNMARK'. | 446 | connection mark value previously set for the session by `CONNMARK'. |
@@ -433,7 +451,8 @@ config IP_NF_MATCH_CONNMARK | |||
433 | 451 | ||
434 | config IP_NF_MATCH_CONNBYTES | 452 | config IP_NF_MATCH_CONNBYTES |
435 | tristate 'Connection byte/packet counter match support' | 453 | tristate 'Connection byte/packet counter match support' |
436 | depends on IP_NF_CT_ACCT && IP_NF_IPTABLES | 454 | depends on IP_NF_IPTABLES |
455 | depends on IP_NF_CT_ACCT || (NF_CT_ACCT && NF_CONNTRACK_IPV4) | ||
437 | help | 456 | help |
438 | This option adds a `connbytes' match, which allows you to match the | 457 | This option adds a `connbytes' match, which allows you to match the |
439 | number of bytes and/or packets for each direction within a connection. | 458 | number of bytes and/or packets for each direction within a connection. |
@@ -747,7 +766,8 @@ config IP_NF_TARGET_TTL | |||
747 | 766 | ||
748 | config IP_NF_TARGET_CONNMARK | 767 | config IP_NF_TARGET_CONNMARK |
749 | tristate 'CONNMARK target support' | 768 | tristate 'CONNMARK target support' |
750 | depends on IP_NF_CONNTRACK_MARK && IP_NF_MANGLE | 769 | depends on IP_NF_MANGLE |
770 | depends on IP_NF_CONNTRACK_MARK || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4) | ||
751 | help | 771 | help |
752 | This option adds a `CONNMARK' target, which allows one to manipulate | 772 | This option adds a `CONNMARK' target, which allows one to manipulate |
753 | the connection mark value. Similar to the MARK target, but | 773 | the connection mark value. Similar to the MARK target, but |
@@ -759,7 +779,8 @@ config IP_NF_TARGET_CONNMARK | |||
759 | 779 | ||
760 | config IP_NF_TARGET_CLUSTERIP | 780 | config IP_NF_TARGET_CLUSTERIP |
761 | tristate "CLUSTERIP target support (EXPERIMENTAL)" | 781 | tristate "CLUSTERIP target support (EXPERIMENTAL)" |
762 | depends on IP_NF_CONNTRACK_MARK && IP_NF_IPTABLES && EXPERIMENTAL | 782 | depends on IP_NF_IPTABLES && EXPERIMENTAL |
783 | depends on IP_NF_CONNTRACK_MARK || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4) | ||
763 | help | 784 | help |
764 | The CLUSTERIP target allows you to build load-balancing clusters of | 785 | The CLUSTERIP target allows you to build load-balancing clusters of |
765 | network servers without having a dedicated load-balancing | 786 | network servers without having a dedicated load-balancing |
@@ -782,7 +803,7 @@ config IP_NF_RAW | |||
782 | config IP_NF_TARGET_NOTRACK | 803 | config IP_NF_TARGET_NOTRACK |
783 | tristate 'NOTRACK target support' | 804 | tristate 'NOTRACK target support' |
784 | depends on IP_NF_RAW | 805 | depends on IP_NF_RAW |
785 | depends on IP_NF_CONNTRACK | 806 | depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4 |
786 | help | 807 | help |
787 | The NOTRACK target allows a select rule to specify | 808 | The NOTRACK target allows a select rule to specify |
788 | which packets *not* to enter the conntrack/NAT | 809 | which packets *not* to enter the conntrack/NAT |
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index dab4b58dd31e..058c48e258fc 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile | |||
@@ -103,3 +103,9 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o | |||
103 | obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o | 103 | obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o |
104 | 104 | ||
105 | obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o | 105 | obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o |
106 | |||
107 | # objects for l3 independent conntrack | ||
108 | nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o | ||
109 | |||
110 | # l3 independent conntrack | ||
111 | obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 5c1c0a3d1c4b..d2a4fec22862 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c | |||
@@ -1376,7 +1376,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, | |||
1376 | ip_conntrack_expect_put(exp); | 1376 | ip_conntrack_expect_put(exp); |
1377 | } | 1377 | } |
1378 | } | 1378 | } |
1379 | write_unlock(&ip_conntrack_lock); | 1379 | write_unlock_bh(&ip_conntrack_lock); |
1380 | } else { | 1380 | } else { |
1381 | /* This basically means we have to flush everything*/ | 1381 | /* This basically means we have to flush everything*/ |
1382 | write_lock_bh(&ip_conntrack_lock); | 1382 | write_lock_bh(&ip_conntrack_lock); |
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 9bcb398fbc1f..45c52d8f4d99 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c | |||
@@ -29,7 +29,7 @@ | |||
29 | 29 | ||
30 | #include <linux/netfilter_ipv4/ip_tables.h> | 30 | #include <linux/netfilter_ipv4/ip_tables.h> |
31 | #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> | 31 | #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> |
32 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 32 | #include <net/netfilter/nf_conntrack_compat.h> |
33 | 33 | ||
34 | #define CLUSTERIP_VERSION "0.8" | 34 | #define CLUSTERIP_VERSION "0.8" |
35 | 35 | ||
@@ -316,14 +316,14 @@ target(struct sk_buff **pskb, | |||
316 | { | 316 | { |
317 | const struct ipt_clusterip_tgt_info *cipinfo = targinfo; | 317 | const struct ipt_clusterip_tgt_info *cipinfo = targinfo; |
318 | enum ip_conntrack_info ctinfo; | 318 | enum ip_conntrack_info ctinfo; |
319 | struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo); | 319 | u_int32_t *mark, hash; |
320 | u_int32_t hash; | ||
321 | 320 | ||
322 | /* don't need to clusterip_config_get() here, since refcount | 321 | /* don't need to clusterip_config_get() here, since refcount |
323 | * is only decremented by destroy() - and ip_tables guarantees | 322 | * is only decremented by destroy() - and ip_tables guarantees |
324 | * that the ->target() function isn't called after ->destroy() */ | 323 | * that the ->target() function isn't called after ->destroy() */ |
325 | 324 | ||
326 | if (!ct) { | 325 | mark = nf_ct_get_mark((*pskb), &ctinfo); |
326 | if (mark == NULL) { | ||
327 | printk(KERN_ERR "CLUSTERIP: no conntrack!\n"); | 327 | printk(KERN_ERR "CLUSTERIP: no conntrack!\n"); |
328 | /* FIXME: need to drop invalid ones, since replies | 328 | /* FIXME: need to drop invalid ones, since replies |
329 | * to outgoing connections of other nodes will be | 329 | * to outgoing connections of other nodes will be |
@@ -346,7 +346,7 @@ target(struct sk_buff **pskb, | |||
346 | 346 | ||
347 | switch (ctinfo) { | 347 | switch (ctinfo) { |
348 | case IP_CT_NEW: | 348 | case IP_CT_NEW: |
349 | ct->mark = hash; | 349 | *mark = hash; |
350 | break; | 350 | break; |
351 | case IP_CT_RELATED: | 351 | case IP_CT_RELATED: |
352 | case IP_CT_RELATED+IP_CT_IS_REPLY: | 352 | case IP_CT_RELATED+IP_CT_IS_REPLY: |
@@ -363,7 +363,7 @@ target(struct sk_buff **pskb, | |||
363 | #ifdef DEBUG_CLUSTERP | 363 | #ifdef DEBUG_CLUSTERP |
364 | DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | 364 | DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); |
365 | #endif | 365 | #endif |
366 | DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark); | 366 | DEBUGP("hash=%u ct_hash=%u ", hash, *mark); |
367 | if (!clusterip_responsible(cipinfo->config, hash)) { | 367 | if (!clusterip_responsible(cipinfo->config, hash)) { |
368 | DEBUGP("not responsible\n"); | 368 | DEBUGP("not responsible\n"); |
369 | return NF_DROP; | 369 | return NF_DROP; |
diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c index 05d66ab59424..8acac5a40a92 100644 --- a/net/ipv4/netfilter/ipt_CONNMARK.c +++ b/net/ipv4/netfilter/ipt_CONNMARK.c | |||
@@ -29,7 +29,7 @@ MODULE_LICENSE("GPL"); | |||
29 | 29 | ||
30 | #include <linux/netfilter_ipv4/ip_tables.h> | 30 | #include <linux/netfilter_ipv4/ip_tables.h> |
31 | #include <linux/netfilter_ipv4/ipt_CONNMARK.h> | 31 | #include <linux/netfilter_ipv4/ipt_CONNMARK.h> |
32 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 32 | #include <net/netfilter/nf_conntrack_compat.h> |
33 | 33 | ||
34 | static unsigned int | 34 | static unsigned int |
35 | target(struct sk_buff **pskb, | 35 | target(struct sk_buff **pskb, |
@@ -43,24 +43,24 @@ target(struct sk_buff **pskb, | |||
43 | u_int32_t diff; | 43 | u_int32_t diff; |
44 | u_int32_t nfmark; | 44 | u_int32_t nfmark; |
45 | u_int32_t newmark; | 45 | u_int32_t newmark; |
46 | u_int32_t ctinfo; | ||
47 | u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo); | ||
46 | 48 | ||
47 | enum ip_conntrack_info ctinfo; | 49 | if (ctmark) { |
48 | struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo); | ||
49 | if (ct) { | ||
50 | switch(markinfo->mode) { | 50 | switch(markinfo->mode) { |
51 | case IPT_CONNMARK_SET: | 51 | case IPT_CONNMARK_SET: |
52 | newmark = (ct->mark & ~markinfo->mask) | markinfo->mark; | 52 | newmark = (*ctmark & ~markinfo->mask) | markinfo->mark; |
53 | if (newmark != ct->mark) | 53 | if (newmark != *ctmark) |
54 | ct->mark = newmark; | 54 | *ctmark = newmark; |
55 | break; | 55 | break; |
56 | case IPT_CONNMARK_SAVE: | 56 | case IPT_CONNMARK_SAVE: |
57 | newmark = (ct->mark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask); | 57 | newmark = (*ctmark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask); |
58 | if (ct->mark != newmark) | 58 | if (*ctmark != newmark) |
59 | ct->mark = newmark; | 59 | *ctmark = newmark; |
60 | break; | 60 | break; |
61 | case IPT_CONNMARK_RESTORE: | 61 | case IPT_CONNMARK_RESTORE: |
62 | nfmark = (*pskb)->nfmark; | 62 | nfmark = (*pskb)->nfmark; |
63 | diff = (ct->mark ^ nfmark) & markinfo->mask; | 63 | diff = (*ctmark ^ nfmark) & markinfo->mask; |
64 | if (diff != 0) | 64 | if (diff != 0) |
65 | (*pskb)->nfmark = nfmark ^ diff; | 65 | (*pskb)->nfmark = nfmark ^ diff; |
66 | break; | 66 | break; |
diff --git a/net/ipv4/netfilter/ipt_NOTRACK.c b/net/ipv4/netfilter/ipt_NOTRACK.c index a4bb9b3bc292..e3c69d072c6e 100644 --- a/net/ipv4/netfilter/ipt_NOTRACK.c +++ b/net/ipv4/netfilter/ipt_NOTRACK.c | |||
@@ -5,7 +5,7 @@ | |||
5 | #include <linux/skbuff.h> | 5 | #include <linux/skbuff.h> |
6 | 6 | ||
7 | #include <linux/netfilter_ipv4/ip_tables.h> | 7 | #include <linux/netfilter_ipv4/ip_tables.h> |
8 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 8 | #include <net/netfilter/nf_conntrack_compat.h> |
9 | 9 | ||
10 | static unsigned int | 10 | static unsigned int |
11 | target(struct sk_buff **pskb, | 11 | target(struct sk_buff **pskb, |
@@ -23,7 +23,7 @@ target(struct sk_buff **pskb, | |||
23 | If there is a real ct entry correspondig to this packet, | 23 | If there is a real ct entry correspondig to this packet, |
24 | it'll hang aroun till timing out. We don't deal with it | 24 | it'll hang aroun till timing out. We don't deal with it |
25 | for performance reasons. JK */ | 25 | for performance reasons. JK */ |
26 | (*pskb)->nfct = &ip_conntrack_untracked.ct_general; | 26 | nf_ct_untrack(*pskb); |
27 | (*pskb)->nfctinfo = IP_CT_NEW; | 27 | (*pskb)->nfctinfo = IP_CT_NEW; |
28 | nf_conntrack_get((*pskb)->nfct); | 28 | nf_conntrack_get((*pskb)->nfct); |
29 | 29 | ||
diff --git a/net/ipv4/netfilter/ipt_connbytes.c b/net/ipv4/netfilter/ipt_connbytes.c index df4a42c6da22..d68a048b7176 100644 --- a/net/ipv4/netfilter/ipt_connbytes.c +++ b/net/ipv4/netfilter/ipt_connbytes.c | |||
@@ -10,7 +10,7 @@ | |||
10 | */ | 10 | */ |
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/skbuff.h> | 12 | #include <linux/skbuff.h> |
13 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 13 | #include <net/netfilter/nf_conntrack_compat.h> |
14 | #include <linux/netfilter_ipv4/ip_tables.h> | 14 | #include <linux/netfilter_ipv4/ip_tables.h> |
15 | #include <linux/netfilter_ipv4/ipt_connbytes.h> | 15 | #include <linux/netfilter_ipv4/ipt_connbytes.h> |
16 | 16 | ||
@@ -46,60 +46,59 @@ match(const struct sk_buff *skb, | |||
46 | int *hotdrop) | 46 | int *hotdrop) |
47 | { | 47 | { |
48 | const struct ipt_connbytes_info *sinfo = matchinfo; | 48 | const struct ipt_connbytes_info *sinfo = matchinfo; |
49 | enum ip_conntrack_info ctinfo; | ||
50 | struct ip_conntrack *ct; | ||
51 | u_int64_t what = 0; /* initialize to make gcc happy */ | 49 | u_int64_t what = 0; /* initialize to make gcc happy */ |
50 | const struct ip_conntrack_counter *counters; | ||
52 | 51 | ||
53 | if (!(ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo))) | 52 | if (!(counters = nf_ct_get_counters(skb))) |
54 | return 0; /* no match */ | 53 | return 0; /* no match */ |
55 | 54 | ||
56 | switch (sinfo->what) { | 55 | switch (sinfo->what) { |
57 | case IPT_CONNBYTES_PKTS: | 56 | case IPT_CONNBYTES_PKTS: |
58 | switch (sinfo->direction) { | 57 | switch (sinfo->direction) { |
59 | case IPT_CONNBYTES_DIR_ORIGINAL: | 58 | case IPT_CONNBYTES_DIR_ORIGINAL: |
60 | what = ct->counters[IP_CT_DIR_ORIGINAL].packets; | 59 | what = counters[IP_CT_DIR_ORIGINAL].packets; |
61 | break; | 60 | break; |
62 | case IPT_CONNBYTES_DIR_REPLY: | 61 | case IPT_CONNBYTES_DIR_REPLY: |
63 | what = ct->counters[IP_CT_DIR_REPLY].packets; | 62 | what = counters[IP_CT_DIR_REPLY].packets; |
64 | break; | 63 | break; |
65 | case IPT_CONNBYTES_DIR_BOTH: | 64 | case IPT_CONNBYTES_DIR_BOTH: |
66 | what = ct->counters[IP_CT_DIR_ORIGINAL].packets; | 65 | what = counters[IP_CT_DIR_ORIGINAL].packets; |
67 | what += ct->counters[IP_CT_DIR_REPLY].packets; | 66 | what += counters[IP_CT_DIR_REPLY].packets; |
68 | break; | 67 | break; |
69 | } | 68 | } |
70 | break; | 69 | break; |
71 | case IPT_CONNBYTES_BYTES: | 70 | case IPT_CONNBYTES_BYTES: |
72 | switch (sinfo->direction) { | 71 | switch (sinfo->direction) { |
73 | case IPT_CONNBYTES_DIR_ORIGINAL: | 72 | case IPT_CONNBYTES_DIR_ORIGINAL: |
74 | what = ct->counters[IP_CT_DIR_ORIGINAL].bytes; | 73 | what = counters[IP_CT_DIR_ORIGINAL].bytes; |
75 | break; | 74 | break; |
76 | case IPT_CONNBYTES_DIR_REPLY: | 75 | case IPT_CONNBYTES_DIR_REPLY: |
77 | what = ct->counters[IP_CT_DIR_REPLY].bytes; | 76 | what = counters[IP_CT_DIR_REPLY].bytes; |
78 | break; | 77 | break; |
79 | case IPT_CONNBYTES_DIR_BOTH: | 78 | case IPT_CONNBYTES_DIR_BOTH: |
80 | what = ct->counters[IP_CT_DIR_ORIGINAL].bytes; | 79 | what = counters[IP_CT_DIR_ORIGINAL].bytes; |
81 | what += ct->counters[IP_CT_DIR_REPLY].bytes; | 80 | what += counters[IP_CT_DIR_REPLY].bytes; |
82 | break; | 81 | break; |
83 | } | 82 | } |
84 | break; | 83 | break; |
85 | case IPT_CONNBYTES_AVGPKT: | 84 | case IPT_CONNBYTES_AVGPKT: |
86 | switch (sinfo->direction) { | 85 | switch (sinfo->direction) { |
87 | case IPT_CONNBYTES_DIR_ORIGINAL: | 86 | case IPT_CONNBYTES_DIR_ORIGINAL: |
88 | what = div64_64(ct->counters[IP_CT_DIR_ORIGINAL].bytes, | 87 | what = div64_64(counters[IP_CT_DIR_ORIGINAL].bytes, |
89 | ct->counters[IP_CT_DIR_ORIGINAL].packets); | 88 | counters[IP_CT_DIR_ORIGINAL].packets); |
90 | break; | 89 | break; |
91 | case IPT_CONNBYTES_DIR_REPLY: | 90 | case IPT_CONNBYTES_DIR_REPLY: |
92 | what = div64_64(ct->counters[IP_CT_DIR_REPLY].bytes, | 91 | what = div64_64(counters[IP_CT_DIR_REPLY].bytes, |
93 | ct->counters[IP_CT_DIR_REPLY].packets); | 92 | counters[IP_CT_DIR_REPLY].packets); |
94 | break; | 93 | break; |
95 | case IPT_CONNBYTES_DIR_BOTH: | 94 | case IPT_CONNBYTES_DIR_BOTH: |
96 | { | 95 | { |
97 | u_int64_t bytes; | 96 | u_int64_t bytes; |
98 | u_int64_t pkts; | 97 | u_int64_t pkts; |
99 | bytes = ct->counters[IP_CT_DIR_ORIGINAL].bytes + | 98 | bytes = counters[IP_CT_DIR_ORIGINAL].bytes + |
100 | ct->counters[IP_CT_DIR_REPLY].bytes; | 99 | counters[IP_CT_DIR_REPLY].bytes; |
101 | pkts = ct->counters[IP_CT_DIR_ORIGINAL].packets+ | 100 | pkts = counters[IP_CT_DIR_ORIGINAL].packets+ |
102 | ct->counters[IP_CT_DIR_REPLY].packets; | 101 | counters[IP_CT_DIR_REPLY].packets; |
103 | 102 | ||
104 | /* FIXME_THEORETICAL: what to do if sum | 103 | /* FIXME_THEORETICAL: what to do if sum |
105 | * overflows ? */ | 104 | * overflows ? */ |
diff --git a/net/ipv4/netfilter/ipt_connmark.c b/net/ipv4/netfilter/ipt_connmark.c index bf8de47ce004..5306ef293b92 100644 --- a/net/ipv4/netfilter/ipt_connmark.c +++ b/net/ipv4/netfilter/ipt_connmark.c | |||
@@ -28,7 +28,7 @@ MODULE_LICENSE("GPL"); | |||
28 | 28 | ||
29 | #include <linux/netfilter_ipv4/ip_tables.h> | 29 | #include <linux/netfilter_ipv4/ip_tables.h> |
30 | #include <linux/netfilter_ipv4/ipt_connmark.h> | 30 | #include <linux/netfilter_ipv4/ipt_connmark.h> |
31 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 31 | #include <net/netfilter/nf_conntrack_compat.h> |
32 | 32 | ||
33 | static int | 33 | static int |
34 | match(const struct sk_buff *skb, | 34 | match(const struct sk_buff *skb, |
@@ -39,12 +39,12 @@ match(const struct sk_buff *skb, | |||
39 | int *hotdrop) | 39 | int *hotdrop) |
40 | { | 40 | { |
41 | const struct ipt_connmark_info *info = matchinfo; | 41 | const struct ipt_connmark_info *info = matchinfo; |
42 | enum ip_conntrack_info ctinfo; | 42 | u_int32_t ctinfo; |
43 | struct ip_conntrack *ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo); | 43 | const u_int32_t *ctmark = nf_ct_get_mark(skb, &ctinfo); |
44 | if (!ct) | 44 | if (!ctmark) |
45 | return 0; | 45 | return 0; |
46 | 46 | ||
47 | return ((ct->mark & info->mask) == info->mark) ^ info->invert; | 47 | return (((*ctmark) & info->mask) == info->mark) ^ info->invert; |
48 | } | 48 | } |
49 | 49 | ||
50 | static int | 50 | static int |
diff --git a/net/ipv4/netfilter/ipt_conntrack.c b/net/ipv4/netfilter/ipt_conntrack.c index c1d22801b7cf..c8d18705469b 100644 --- a/net/ipv4/netfilter/ipt_conntrack.c +++ b/net/ipv4/netfilter/ipt_conntrack.c | |||
@@ -10,7 +10,14 @@ | |||
10 | 10 | ||
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/skbuff.h> | 12 | #include <linux/skbuff.h> |
13 | |||
14 | #if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE) | ||
13 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 15 | #include <linux/netfilter_ipv4/ip_conntrack.h> |
16 | #include <linux/netfilter_ipv4/ip_conntrack_tuple.h> | ||
17 | #else | ||
18 | #include <net/netfilter/nf_conntrack.h> | ||
19 | #endif | ||
20 | |||
14 | #include <linux/netfilter_ipv4/ip_tables.h> | 21 | #include <linux/netfilter_ipv4/ip_tables.h> |
15 | #include <linux/netfilter_ipv4/ipt_conntrack.h> | 22 | #include <linux/netfilter_ipv4/ipt_conntrack.h> |
16 | 23 | ||
@@ -18,6 +25,8 @@ MODULE_LICENSE("GPL"); | |||
18 | MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); | 25 | MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); |
19 | MODULE_DESCRIPTION("iptables connection tracking match module"); | 26 | MODULE_DESCRIPTION("iptables connection tracking match module"); |
20 | 27 | ||
28 | #if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE) | ||
29 | |||
21 | static int | 30 | static int |
22 | match(const struct sk_buff *skb, | 31 | match(const struct sk_buff *skb, |
23 | const struct net_device *in, | 32 | const struct net_device *in, |
@@ -102,6 +111,93 @@ match(const struct sk_buff *skb, | |||
102 | return 1; | 111 | return 1; |
103 | } | 112 | } |
104 | 113 | ||
114 | #else /* CONFIG_IP_NF_CONNTRACK */ | ||
115 | static int | ||
116 | match(const struct sk_buff *skb, | ||
117 | const struct net_device *in, | ||
118 | const struct net_device *out, | ||
119 | const void *matchinfo, | ||
120 | int offset, | ||
121 | int *hotdrop) | ||
122 | { | ||
123 | const struct ipt_conntrack_info *sinfo = matchinfo; | ||
124 | struct nf_conn *ct; | ||
125 | enum ip_conntrack_info ctinfo; | ||
126 | unsigned int statebit; | ||
127 | |||
128 | ct = nf_ct_get((struct sk_buff *)skb, &ctinfo); | ||
129 | |||
130 | #define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg)) | ||
131 | |||
132 | if (ct == &nf_conntrack_untracked) | ||
133 | statebit = IPT_CONNTRACK_STATE_UNTRACKED; | ||
134 | else if (ct) | ||
135 | statebit = IPT_CONNTRACK_STATE_BIT(ctinfo); | ||
136 | else | ||
137 | statebit = IPT_CONNTRACK_STATE_INVALID; | ||
138 | |||
139 | if(sinfo->flags & IPT_CONNTRACK_STATE) { | ||
140 | if (ct) { | ||
141 | if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip != | ||
142 | ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip) | ||
143 | statebit |= IPT_CONNTRACK_STATE_SNAT; | ||
144 | |||
145 | if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip != | ||
146 | ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip) | ||
147 | statebit |= IPT_CONNTRACK_STATE_DNAT; | ||
148 | } | ||
149 | |||
150 | if (FWINV((statebit & sinfo->statemask) == 0, IPT_CONNTRACK_STATE)) | ||
151 | return 0; | ||
152 | } | ||
153 | |||
154 | if(sinfo->flags & IPT_CONNTRACK_PROTO) { | ||
155 | if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, IPT_CONNTRACK_PROTO)) | ||
156 | return 0; | ||
157 | } | ||
158 | |||
159 | if(sinfo->flags & IPT_CONNTRACK_ORIGSRC) { | ||
160 | if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, IPT_CONNTRACK_ORIGSRC)) | ||
161 | return 0; | ||
162 | } | ||
163 | |||
164 | if(sinfo->flags & IPT_CONNTRACK_ORIGDST) { | ||
165 | if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, IPT_CONNTRACK_ORIGDST)) | ||
166 | return 0; | ||
167 | } | ||
168 | |||
169 | if(sinfo->flags & IPT_CONNTRACK_REPLSRC) { | ||
170 | if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, IPT_CONNTRACK_REPLSRC)) | ||
171 | return 0; | ||
172 | } | ||
173 | |||
174 | if(sinfo->flags & IPT_CONNTRACK_REPLDST) { | ||
175 | if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, IPT_CONNTRACK_REPLDST)) | ||
176 | return 0; | ||
177 | } | ||
178 | |||
179 | if(sinfo->flags & IPT_CONNTRACK_STATUS) { | ||
180 | if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, IPT_CONNTRACK_STATUS)) | ||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | if(sinfo->flags & IPT_CONNTRACK_EXPIRES) { | ||
185 | unsigned long expires; | ||
186 | |||
187 | if(!ct) | ||
188 | return 0; | ||
189 | |||
190 | expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0; | ||
191 | |||
192 | if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), IPT_CONNTRACK_EXPIRES)) | ||
193 | return 0; | ||
194 | } | ||
195 | |||
196 | return 1; | ||
197 | } | ||
198 | |||
199 | #endif /* CONFIG_NF_IP_CONNTRACK */ | ||
200 | |||
105 | static int check(const char *tablename, | 201 | static int check(const char *tablename, |
106 | const struct ipt_ip *ip, | 202 | const struct ipt_ip *ip, |
107 | void *matchinfo, | 203 | void *matchinfo, |
diff --git a/net/ipv4/netfilter/ipt_helper.c b/net/ipv4/netfilter/ipt_helper.c index 3e7dd014de43..bf14e1c7798a 100644 --- a/net/ipv4/netfilter/ipt_helper.c +++ b/net/ipv4/netfilter/ipt_helper.c | |||
@@ -13,9 +13,15 @@ | |||
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/skbuff.h> | 14 | #include <linux/skbuff.h> |
15 | #include <linux/netfilter.h> | 15 | #include <linux/netfilter.h> |
16 | #if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE) | ||
16 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 17 | #include <linux/netfilter_ipv4/ip_conntrack.h> |
17 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | 18 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> |
18 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | 19 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> |
20 | #else | ||
21 | #include <net/netfilter/nf_conntrack.h> | ||
22 | #include <net/netfilter/nf_conntrack_core.h> | ||
23 | #include <net/netfilter/nf_conntrack_helper.h> | ||
24 | #endif | ||
19 | #include <linux/netfilter_ipv4/ip_tables.h> | 25 | #include <linux/netfilter_ipv4/ip_tables.h> |
20 | #include <linux/netfilter_ipv4/ipt_helper.h> | 26 | #include <linux/netfilter_ipv4/ipt_helper.h> |
21 | 27 | ||
@@ -29,6 +35,7 @@ MODULE_DESCRIPTION("iptables helper match module"); | |||
29 | #define DEBUGP(format, args...) | 35 | #define DEBUGP(format, args...) |
30 | #endif | 36 | #endif |
31 | 37 | ||
38 | #if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE) | ||
32 | static int | 39 | static int |
33 | match(const struct sk_buff *skb, | 40 | match(const struct sk_buff *skb, |
34 | const struct net_device *in, | 41 | const struct net_device *in, |
@@ -73,6 +80,53 @@ out_unlock: | |||
73 | return ret; | 80 | return ret; |
74 | } | 81 | } |
75 | 82 | ||
83 | #else /* CONFIG_IP_NF_CONNTRACK */ | ||
84 | |||
85 | static int | ||
86 | match(const struct sk_buff *skb, | ||
87 | const struct net_device *in, | ||
88 | const struct net_device *out, | ||
89 | const void *matchinfo, | ||
90 | int offset, | ||
91 | int *hotdrop) | ||
92 | { | ||
93 | const struct ipt_helper_info *info = matchinfo; | ||
94 | struct nf_conn *ct; | ||
95 | enum ip_conntrack_info ctinfo; | ||
96 | int ret = info->invert; | ||
97 | |||
98 | ct = nf_ct_get((struct sk_buff *)skb, &ctinfo); | ||
99 | if (!ct) { | ||
100 | DEBUGP("ipt_helper: Eek! invalid conntrack?\n"); | ||
101 | return ret; | ||
102 | } | ||
103 | |||
104 | if (!ct->master) { | ||
105 | DEBUGP("ipt_helper: conntrack %p has no master\n", ct); | ||
106 | return ret; | ||
107 | } | ||
108 | |||
109 | read_lock_bh(&nf_conntrack_lock); | ||
110 | if (!ct->master->helper) { | ||
111 | DEBUGP("ipt_helper: master ct %p has no helper\n", | ||
112 | exp->expectant); | ||
113 | goto out_unlock; | ||
114 | } | ||
115 | |||
116 | DEBUGP("master's name = %s , info->name = %s\n", | ||
117 | ct->master->helper->name, info->name); | ||
118 | |||
119 | if (info->name[0] == '\0') | ||
120 | ret ^= 1; | ||
121 | else | ||
122 | ret ^= !strncmp(ct->master->helper->name, info->name, | ||
123 | strlen(ct->master->helper->name)); | ||
124 | out_unlock: | ||
125 | read_unlock_bh(&nf_conntrack_lock); | ||
126 | return ret; | ||
127 | } | ||
128 | #endif | ||
129 | |||
76 | static int check(const char *tablename, | 130 | static int check(const char *tablename, |
77 | const struct ipt_ip *ip, | 131 | const struct ipt_ip *ip, |
78 | void *matchinfo, | 132 | void *matchinfo, |
diff --git a/net/ipv4/netfilter/ipt_state.c b/net/ipv4/netfilter/ipt_state.c index b1511b97ea5f..4d7f16b70cec 100644 --- a/net/ipv4/netfilter/ipt_state.c +++ b/net/ipv4/netfilter/ipt_state.c | |||
@@ -10,7 +10,7 @@ | |||
10 | 10 | ||
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/skbuff.h> | 12 | #include <linux/skbuff.h> |
13 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 13 | #include <net/netfilter/nf_conntrack_compat.h> |
14 | #include <linux/netfilter_ipv4/ip_tables.h> | 14 | #include <linux/netfilter_ipv4/ip_tables.h> |
15 | #include <linux/netfilter_ipv4/ipt_state.h> | 15 | #include <linux/netfilter_ipv4/ipt_state.h> |
16 | 16 | ||
@@ -30,9 +30,9 @@ match(const struct sk_buff *skb, | |||
30 | enum ip_conntrack_info ctinfo; | 30 | enum ip_conntrack_info ctinfo; |
31 | unsigned int statebit; | 31 | unsigned int statebit; |
32 | 32 | ||
33 | if (skb->nfct == &ip_conntrack_untracked.ct_general) | 33 | if (nf_ct_is_untracked(skb)) |
34 | statebit = IPT_STATE_UNTRACKED; | 34 | statebit = IPT_STATE_UNTRACKED; |
35 | else if (!ip_conntrack_get(skb, &ctinfo)) | 35 | else if (!nf_ct_get_ctinfo(skb, &ctinfo)) |
36 | statebit = IPT_STATE_INVALID; | 36 | statebit = IPT_STATE_INVALID; |
37 | else | 37 | else |
38 | statebit = IPT_STATE_BIT(ctinfo); | 38 | statebit = IPT_STATE_BIT(ctinfo); |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c new file mode 100644 index 000000000000..8202c1c0afad --- /dev/null +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | |||
@@ -0,0 +1,571 @@ | |||
1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
9 | * - move L3 protocol dependent part to this file. | ||
10 | * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
11 | * - add get_features() to support various size of conntrack | ||
12 | * structures. | ||
13 | * | ||
14 | * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c | ||
15 | */ | ||
16 | |||
17 | #include <linux/config.h> | ||
18 | #include <linux/types.h> | ||
19 | #include <linux/ip.h> | ||
20 | #include <linux/netfilter.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/skbuff.h> | ||
23 | #include <linux/icmp.h> | ||
24 | #include <linux/sysctl.h> | ||
25 | #include <net/ip.h> | ||
26 | |||
27 | #include <linux/netfilter_ipv4.h> | ||
28 | #include <net/netfilter/nf_conntrack.h> | ||
29 | #include <net/netfilter/nf_conntrack_helper.h> | ||
30 | #include <net/netfilter/nf_conntrack_protocol.h> | ||
31 | #include <net/netfilter/nf_conntrack_l3proto.h> | ||
32 | #include <net/netfilter/nf_conntrack_core.h> | ||
33 | #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> | ||
34 | |||
35 | #if 0 | ||
36 | #define DEBUGP printk | ||
37 | #else | ||
38 | #define DEBUGP(format, args...) | ||
39 | #endif | ||
40 | |||
41 | DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat); | ||
42 | |||
43 | static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, | ||
44 | struct nf_conntrack_tuple *tuple) | ||
45 | { | ||
46 | u_int32_t _addrs[2], *ap; | ||
47 | ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr), | ||
48 | sizeof(u_int32_t) * 2, _addrs); | ||
49 | if (ap == NULL) | ||
50 | return 0; | ||
51 | |||
52 | tuple->src.u3.ip = ap[0]; | ||
53 | tuple->dst.u3.ip = ap[1]; | ||
54 | |||
55 | return 1; | ||
56 | } | ||
57 | |||
58 | static int ipv4_invert_tuple(struct nf_conntrack_tuple *tuple, | ||
59 | const struct nf_conntrack_tuple *orig) | ||
60 | { | ||
61 | tuple->src.u3.ip = orig->dst.u3.ip; | ||
62 | tuple->dst.u3.ip = orig->src.u3.ip; | ||
63 | |||
64 | return 1; | ||
65 | } | ||
66 | |||
67 | static int ipv4_print_tuple(struct seq_file *s, | ||
68 | const struct nf_conntrack_tuple *tuple) | ||
69 | { | ||
70 | return seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ", | ||
71 | NIPQUAD(tuple->src.u3.ip), | ||
72 | NIPQUAD(tuple->dst.u3.ip)); | ||
73 | } | ||
74 | |||
75 | static int ipv4_print_conntrack(struct seq_file *s, | ||
76 | const struct nf_conn *conntrack) | ||
77 | { | ||
78 | return 0; | ||
79 | } | ||
80 | |||
81 | /* Returns new sk_buff, or NULL */ | ||
82 | static struct sk_buff * | ||
83 | nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) | ||
84 | { | ||
85 | skb_orphan(skb); | ||
86 | |||
87 | local_bh_disable(); | ||
88 | skb = ip_defrag(skb, user); | ||
89 | local_bh_enable(); | ||
90 | |||
91 | if (skb) | ||
92 | ip_send_check(skb->nh.iph); | ||
93 | |||
94 | return skb; | ||
95 | } | ||
96 | |||
97 | static int | ||
98 | ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, | ||
99 | u_int8_t *protonum) | ||
100 | { | ||
101 | /* Never happen */ | ||
102 | if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) { | ||
103 | if (net_ratelimit()) { | ||
104 | printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n", | ||
105 | (*pskb)->nh.iph->protocol, hooknum); | ||
106 | } | ||
107 | return -NF_DROP; | ||
108 | } | ||
109 | |||
110 | *dataoff = (*pskb)->nh.raw - (*pskb)->data + (*pskb)->nh.iph->ihl*4; | ||
111 | *protonum = (*pskb)->nh.iph->protocol; | ||
112 | |||
113 | return NF_ACCEPT; | ||
114 | } | ||
115 | |||
116 | int nat_module_is_loaded = 0; | ||
117 | static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple) | ||
118 | { | ||
119 | if (nat_module_is_loaded) | ||
120 | return NF_CT_F_NAT; | ||
121 | |||
122 | return NF_CT_F_BASIC; | ||
123 | } | ||
124 | |||
125 | static unsigned int ipv4_confirm(unsigned int hooknum, | ||
126 | struct sk_buff **pskb, | ||
127 | const struct net_device *in, | ||
128 | const struct net_device *out, | ||
129 | int (*okfn)(struct sk_buff *)) | ||
130 | { | ||
131 | /* We've seen it coming out the other side: confirm it */ | ||
132 | return nf_conntrack_confirm(pskb); | ||
133 | } | ||
134 | |||
135 | static unsigned int ipv4_conntrack_help(unsigned int hooknum, | ||
136 | struct sk_buff **pskb, | ||
137 | const struct net_device *in, | ||
138 | const struct net_device *out, | ||
139 | int (*okfn)(struct sk_buff *)) | ||
140 | { | ||
141 | struct nf_conn *ct; | ||
142 | enum ip_conntrack_info ctinfo; | ||
143 | |||
144 | /* This is where we call the helper: as the packet goes out. */ | ||
145 | ct = nf_ct_get(*pskb, &ctinfo); | ||
146 | if (ct && ct->helper) { | ||
147 | unsigned int ret; | ||
148 | ret = ct->helper->help(pskb, | ||
149 | (*pskb)->nh.raw - (*pskb)->data | ||
150 | + (*pskb)->nh.iph->ihl*4, | ||
151 | ct, ctinfo); | ||
152 | if (ret != NF_ACCEPT) | ||
153 | return ret; | ||
154 | } | ||
155 | return NF_ACCEPT; | ||
156 | } | ||
157 | |||
158 | static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, | ||
159 | struct sk_buff **pskb, | ||
160 | const struct net_device *in, | ||
161 | const struct net_device *out, | ||
162 | int (*okfn)(struct sk_buff *)) | ||
163 | { | ||
164 | #if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE) | ||
165 | /* Previously seen (loopback)? Ignore. Do this before | ||
166 | fragment check. */ | ||
167 | if ((*pskb)->nfct) | ||
168 | return NF_ACCEPT; | ||
169 | #endif | ||
170 | |||
171 | /* Gather fragments. */ | ||
172 | if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { | ||
173 | *pskb = nf_ct_ipv4_gather_frags(*pskb, | ||
174 | hooknum == NF_IP_PRE_ROUTING ? | ||
175 | IP_DEFRAG_CONNTRACK_IN : | ||
176 | IP_DEFRAG_CONNTRACK_OUT); | ||
177 | if (!*pskb) | ||
178 | return NF_STOLEN; | ||
179 | } | ||
180 | return NF_ACCEPT; | ||
181 | } | ||
182 | |||
183 | static unsigned int ipv4_refrag(unsigned int hooknum, | ||
184 | struct sk_buff **pskb, | ||
185 | const struct net_device *in, | ||
186 | const struct net_device *out, | ||
187 | int (*okfn)(struct sk_buff *)) | ||
188 | { | ||
189 | struct rtable *rt = (struct rtable *)(*pskb)->dst; | ||
190 | |||
191 | /* We've seen it coming out the other side: confirm */ | ||
192 | if (ipv4_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT) | ||
193 | return NF_DROP; | ||
194 | |||
195 | /* Local packets are never produced too large for their | ||
196 | interface. We degfragment them at LOCAL_OUT, however, | ||
197 | so we have to refragment them here. */ | ||
198 | if ((*pskb)->len > dst_mtu(&rt->u.dst) && | ||
199 | !skb_shinfo(*pskb)->tso_size) { | ||
200 | /* No hook can be after us, so this should be OK. */ | ||
201 | ip_fragment(*pskb, okfn); | ||
202 | return NF_STOLEN; | ||
203 | } | ||
204 | return NF_ACCEPT; | ||
205 | } | ||
206 | |||
207 | static unsigned int ipv4_conntrack_in(unsigned int hooknum, | ||
208 | struct sk_buff **pskb, | ||
209 | const struct net_device *in, | ||
210 | const struct net_device *out, | ||
211 | int (*okfn)(struct sk_buff *)) | ||
212 | { | ||
213 | return nf_conntrack_in(PF_INET, hooknum, pskb); | ||
214 | } | ||
215 | |||
216 | static unsigned int ipv4_conntrack_local(unsigned int hooknum, | ||
217 | struct sk_buff **pskb, | ||
218 | const struct net_device *in, | ||
219 | const struct net_device *out, | ||
220 | int (*okfn)(struct sk_buff *)) | ||
221 | { | ||
222 | /* root is playing with raw sockets. */ | ||
223 | if ((*pskb)->len < sizeof(struct iphdr) | ||
224 | || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) { | ||
225 | if (net_ratelimit()) | ||
226 | printk("ipt_hook: happy cracking.\n"); | ||
227 | return NF_ACCEPT; | ||
228 | } | ||
229 | return nf_conntrack_in(PF_INET, hooknum, pskb); | ||
230 | } | ||
231 | |||
232 | /* Connection tracking may drop packets, but never alters them, so | ||
233 | make it the first hook. */ | ||
234 | static struct nf_hook_ops ipv4_conntrack_defrag_ops = { | ||
235 | .hook = ipv4_conntrack_defrag, | ||
236 | .owner = THIS_MODULE, | ||
237 | .pf = PF_INET, | ||
238 | .hooknum = NF_IP_PRE_ROUTING, | ||
239 | .priority = NF_IP_PRI_CONNTRACK_DEFRAG, | ||
240 | }; | ||
241 | |||
242 | static struct nf_hook_ops ipv4_conntrack_in_ops = { | ||
243 | .hook = ipv4_conntrack_in, | ||
244 | .owner = THIS_MODULE, | ||
245 | .pf = PF_INET, | ||
246 | .hooknum = NF_IP_PRE_ROUTING, | ||
247 | .priority = NF_IP_PRI_CONNTRACK, | ||
248 | }; | ||
249 | |||
250 | static struct nf_hook_ops ipv4_conntrack_defrag_local_out_ops = { | ||
251 | .hook = ipv4_conntrack_defrag, | ||
252 | .owner = THIS_MODULE, | ||
253 | .pf = PF_INET, | ||
254 | .hooknum = NF_IP_LOCAL_OUT, | ||
255 | .priority = NF_IP_PRI_CONNTRACK_DEFRAG, | ||
256 | }; | ||
257 | |||
258 | static struct nf_hook_ops ipv4_conntrack_local_out_ops = { | ||
259 | .hook = ipv4_conntrack_local, | ||
260 | .owner = THIS_MODULE, | ||
261 | .pf = PF_INET, | ||
262 | .hooknum = NF_IP_LOCAL_OUT, | ||
263 | .priority = NF_IP_PRI_CONNTRACK, | ||
264 | }; | ||
265 | |||
266 | /* helpers */ | ||
267 | static struct nf_hook_ops ipv4_conntrack_helper_out_ops = { | ||
268 | .hook = ipv4_conntrack_help, | ||
269 | .owner = THIS_MODULE, | ||
270 | .pf = PF_INET, | ||
271 | .hooknum = NF_IP_POST_ROUTING, | ||
272 | .priority = NF_IP_PRI_CONNTRACK_HELPER, | ||
273 | }; | ||
274 | |||
275 | static struct nf_hook_ops ipv4_conntrack_helper_in_ops = { | ||
276 | .hook = ipv4_conntrack_help, | ||
277 | .owner = THIS_MODULE, | ||
278 | .pf = PF_INET, | ||
279 | .hooknum = NF_IP_LOCAL_IN, | ||
280 | .priority = NF_IP_PRI_CONNTRACK_HELPER, | ||
281 | }; | ||
282 | |||
283 | |||
284 | /* Refragmenter; last chance. */ | ||
285 | static struct nf_hook_ops ipv4_conntrack_out_ops = { | ||
286 | .hook = ipv4_refrag, | ||
287 | .owner = THIS_MODULE, | ||
288 | .pf = PF_INET, | ||
289 | .hooknum = NF_IP_POST_ROUTING, | ||
290 | .priority = NF_IP_PRI_CONNTRACK_CONFIRM, | ||
291 | }; | ||
292 | |||
293 | static struct nf_hook_ops ipv4_conntrack_local_in_ops = { | ||
294 | .hook = ipv4_confirm, | ||
295 | .owner = THIS_MODULE, | ||
296 | .pf = PF_INET, | ||
297 | .hooknum = NF_IP_LOCAL_IN, | ||
298 | .priority = NF_IP_PRI_CONNTRACK_CONFIRM, | ||
299 | }; | ||
300 | |||
301 | #ifdef CONFIG_SYSCTL | ||
302 | /* From nf_conntrack_proto_icmp.c */ | ||
303 | extern unsigned long nf_ct_icmp_timeout; | ||
304 | static struct ctl_table_header *nf_ct_ipv4_sysctl_header; | ||
305 | |||
306 | static ctl_table nf_ct_sysctl_table[] = { | ||
307 | { | ||
308 | .ctl_name = NET_NF_CONNTRACK_ICMP_TIMEOUT, | ||
309 | .procname = "nf_conntrack_icmp_timeout", | ||
310 | .data = &nf_ct_icmp_timeout, | ||
311 | .maxlen = sizeof(unsigned int), | ||
312 | .mode = 0644, | ||
313 | .proc_handler = &proc_dointvec_jiffies, | ||
314 | }, | ||
315 | { .ctl_name = 0 } | ||
316 | }; | ||
317 | |||
318 | static ctl_table nf_ct_netfilter_table[] = { | ||
319 | { | ||
320 | .ctl_name = NET_NETFILTER, | ||
321 | .procname = "netfilter", | ||
322 | .mode = 0555, | ||
323 | .child = nf_ct_sysctl_table, | ||
324 | }, | ||
325 | { .ctl_name = 0 } | ||
326 | }; | ||
327 | |||
328 | static ctl_table nf_ct_net_table[] = { | ||
329 | { | ||
330 | .ctl_name = CTL_NET, | ||
331 | .procname = "net", | ||
332 | .mode = 0555, | ||
333 | .child = nf_ct_netfilter_table, | ||
334 | }, | ||
335 | { .ctl_name = 0 } | ||
336 | }; | ||
337 | #endif | ||
338 | |||
339 | /* Fast function for those who don't want to parse /proc (and I don't | ||
340 | blame them). */ | ||
341 | /* Reversing the socket's dst/src point of view gives us the reply | ||
342 | mapping. */ | ||
343 | static int | ||
344 | getorigdst(struct sock *sk, int optval, void __user *user, int *len) | ||
345 | { | ||
346 | struct inet_sock *inet = inet_sk(sk); | ||
347 | struct nf_conntrack_tuple_hash *h; | ||
348 | struct nf_conntrack_tuple tuple; | ||
349 | |||
350 | NF_CT_TUPLE_U_BLANK(&tuple); | ||
351 | tuple.src.u3.ip = inet->rcv_saddr; | ||
352 | tuple.src.u.tcp.port = inet->sport; | ||
353 | tuple.dst.u3.ip = inet->daddr; | ||
354 | tuple.dst.u.tcp.port = inet->dport; | ||
355 | tuple.src.l3num = PF_INET; | ||
356 | tuple.dst.protonum = IPPROTO_TCP; | ||
357 | |||
358 | /* We only do TCP at the moment: is there a better way? */ | ||
359 | if (strcmp(sk->sk_prot->name, "TCP")) { | ||
360 | DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n"); | ||
361 | return -ENOPROTOOPT; | ||
362 | } | ||
363 | |||
364 | if ((unsigned int) *len < sizeof(struct sockaddr_in)) { | ||
365 | DEBUGP("SO_ORIGINAL_DST: len %u not %u\n", | ||
366 | *len, sizeof(struct sockaddr_in)); | ||
367 | return -EINVAL; | ||
368 | } | ||
369 | |||
370 | h = nf_conntrack_find_get(&tuple, NULL); | ||
371 | if (h) { | ||
372 | struct sockaddr_in sin; | ||
373 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); | ||
374 | |||
375 | sin.sin_family = AF_INET; | ||
376 | sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL] | ||
377 | .tuple.dst.u.tcp.port; | ||
378 | sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL] | ||
379 | .tuple.dst.u3.ip; | ||
380 | |||
381 | DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", | ||
382 | NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); | ||
383 | nf_ct_put(ct); | ||
384 | if (copy_to_user(user, &sin, sizeof(sin)) != 0) | ||
385 | return -EFAULT; | ||
386 | else | ||
387 | return 0; | ||
388 | } | ||
389 | DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n", | ||
390 | NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port), | ||
391 | NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port)); | ||
392 | return -ENOENT; | ||
393 | } | ||
394 | |||
395 | static struct nf_sockopt_ops so_getorigdst = { | ||
396 | .pf = PF_INET, | ||
397 | .get_optmin = SO_ORIGINAL_DST, | ||
398 | .get_optmax = SO_ORIGINAL_DST+1, | ||
399 | .get = &getorigdst, | ||
400 | }; | ||
401 | |||
402 | struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = { | ||
403 | .l3proto = PF_INET, | ||
404 | .name = "ipv4", | ||
405 | .pkt_to_tuple = ipv4_pkt_to_tuple, | ||
406 | .invert_tuple = ipv4_invert_tuple, | ||
407 | .print_tuple = ipv4_print_tuple, | ||
408 | .print_conntrack = ipv4_print_conntrack, | ||
409 | .prepare = ipv4_prepare, | ||
410 | .get_features = ipv4_get_features, | ||
411 | .me = THIS_MODULE, | ||
412 | }; | ||
413 | |||
414 | extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp4; | ||
415 | extern struct nf_conntrack_protocol nf_conntrack_protocol_udp4; | ||
416 | extern struct nf_conntrack_protocol nf_conntrack_protocol_icmp; | ||
417 | static int init_or_cleanup(int init) | ||
418 | { | ||
419 | int ret = 0; | ||
420 | |||
421 | if (!init) goto cleanup; | ||
422 | |||
423 | ret = nf_register_sockopt(&so_getorigdst); | ||
424 | if (ret < 0) { | ||
425 | printk(KERN_ERR "Unable to register netfilter socket option\n"); | ||
426 | goto cleanup_nothing; | ||
427 | } | ||
428 | |||
429 | ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp4); | ||
430 | if (ret < 0) { | ||
431 | printk("nf_conntrack_ipv4: can't register tcp.\n"); | ||
432 | goto cleanup_sockopt; | ||
433 | } | ||
434 | |||
435 | ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp4); | ||
436 | if (ret < 0) { | ||
437 | printk("nf_conntrack_ipv4: can't register udp.\n"); | ||
438 | goto cleanup_tcp; | ||
439 | } | ||
440 | |||
441 | ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmp); | ||
442 | if (ret < 0) { | ||
443 | printk("nf_conntrack_ipv4: can't register icmp.\n"); | ||
444 | goto cleanup_udp; | ||
445 | } | ||
446 | |||
447 | ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4); | ||
448 | if (ret < 0) { | ||
449 | printk("nf_conntrack_ipv4: can't register ipv4\n"); | ||
450 | goto cleanup_icmp; | ||
451 | } | ||
452 | |||
453 | ret = nf_register_hook(&ipv4_conntrack_defrag_ops); | ||
454 | if (ret < 0) { | ||
455 | printk("nf_conntrack_ipv4: can't register pre-routing defrag hook.\n"); | ||
456 | goto cleanup_ipv4; | ||
457 | } | ||
458 | ret = nf_register_hook(&ipv4_conntrack_defrag_local_out_ops); | ||
459 | if (ret < 0) { | ||
460 | printk("nf_conntrack_ipv4: can't register local_out defrag hook.\n"); | ||
461 | goto cleanup_defragops; | ||
462 | } | ||
463 | |||
464 | ret = nf_register_hook(&ipv4_conntrack_in_ops); | ||
465 | if (ret < 0) { | ||
466 | printk("nf_conntrack_ipv4: can't register pre-routing hook.\n"); | ||
467 | goto cleanup_defraglocalops; | ||
468 | } | ||
469 | |||
470 | ret = nf_register_hook(&ipv4_conntrack_local_out_ops); | ||
471 | if (ret < 0) { | ||
472 | printk("nf_conntrack_ipv4: can't register local out hook.\n"); | ||
473 | goto cleanup_inops; | ||
474 | } | ||
475 | |||
476 | ret = nf_register_hook(&ipv4_conntrack_helper_in_ops); | ||
477 | if (ret < 0) { | ||
478 | printk("nf_conntrack_ipv4: can't register local helper hook.\n"); | ||
479 | goto cleanup_inandlocalops; | ||
480 | } | ||
481 | |||
482 | ret = nf_register_hook(&ipv4_conntrack_helper_out_ops); | ||
483 | if (ret < 0) { | ||
484 | printk("nf_conntrack_ipv4: can't register postrouting helper hook.\n"); | ||
485 | goto cleanup_helperinops; | ||
486 | } | ||
487 | |||
488 | ret = nf_register_hook(&ipv4_conntrack_out_ops); | ||
489 | if (ret < 0) { | ||
490 | printk("nf_conntrack_ipv4: can't register post-routing hook.\n"); | ||
491 | goto cleanup_helperoutops; | ||
492 | } | ||
493 | |||
494 | ret = nf_register_hook(&ipv4_conntrack_local_in_ops); | ||
495 | if (ret < 0) { | ||
496 | printk("nf_conntrack_ipv4: can't register local in hook.\n"); | ||
497 | goto cleanup_inoutandlocalops; | ||
498 | } | ||
499 | |||
500 | #ifdef CONFIG_SYSCTL | ||
501 | nf_ct_ipv4_sysctl_header = register_sysctl_table(nf_ct_net_table, 0); | ||
502 | if (nf_ct_ipv4_sysctl_header == NULL) { | ||
503 | printk("nf_conntrack: can't register to sysctl.\n"); | ||
504 | ret = -ENOMEM; | ||
505 | goto cleanup_localinops; | ||
506 | } | ||
507 | #endif | ||
508 | |||
509 | /* For use by REJECT target */ | ||
510 | ip_ct_attach = __nf_conntrack_attach; | ||
511 | |||
512 | return ret; | ||
513 | |||
514 | cleanup: | ||
515 | synchronize_net(); | ||
516 | ip_ct_attach = NULL; | ||
517 | #ifdef CONFIG_SYSCTL | ||
518 | unregister_sysctl_table(nf_ct_ipv4_sysctl_header); | ||
519 | cleanup_localinops: | ||
520 | #endif | ||
521 | nf_unregister_hook(&ipv4_conntrack_local_in_ops); | ||
522 | cleanup_inoutandlocalops: | ||
523 | nf_unregister_hook(&ipv4_conntrack_out_ops); | ||
524 | cleanup_helperoutops: | ||
525 | nf_unregister_hook(&ipv4_conntrack_helper_out_ops); | ||
526 | cleanup_helperinops: | ||
527 | nf_unregister_hook(&ipv4_conntrack_helper_in_ops); | ||
528 | cleanup_inandlocalops: | ||
529 | nf_unregister_hook(&ipv4_conntrack_local_out_ops); | ||
530 | cleanup_inops: | ||
531 | nf_unregister_hook(&ipv4_conntrack_in_ops); | ||
532 | cleanup_defraglocalops: | ||
533 | nf_unregister_hook(&ipv4_conntrack_defrag_local_out_ops); | ||
534 | cleanup_defragops: | ||
535 | nf_unregister_hook(&ipv4_conntrack_defrag_ops); | ||
536 | cleanup_ipv4: | ||
537 | nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); | ||
538 | cleanup_icmp: | ||
539 | nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmp); | ||
540 | cleanup_udp: | ||
541 | nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp4); | ||
542 | cleanup_tcp: | ||
543 | nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp4); | ||
544 | cleanup_sockopt: | ||
545 | nf_unregister_sockopt(&so_getorigdst); | ||
546 | cleanup_nothing: | ||
547 | return ret; | ||
548 | } | ||
549 | |||
550 | MODULE_LICENSE("GPL"); | ||
551 | |||
552 | static int __init init(void) | ||
553 | { | ||
554 | need_nf_conntrack(); | ||
555 | return init_or_cleanup(1); | ||
556 | } | ||
557 | |||
558 | static void __exit fini(void) | ||
559 | { | ||
560 | init_or_cleanup(0); | ||
561 | } | ||
562 | |||
563 | module_init(init); | ||
564 | module_exit(fini); | ||
565 | |||
566 | void need_ip_conntrack(void) | ||
567 | { | ||
568 | } | ||
569 | |||
570 | EXPORT_SYMBOL(need_ip_conntrack); | ||
571 | EXPORT_SYMBOL(nf_ct_ipv4_gather_frags); | ||
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c new file mode 100644 index 000000000000..7ddb5c08f7b8 --- /dev/null +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c | |||
@@ -0,0 +1,301 @@ | |||
1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
9 | * - enable working with Layer 3 protocol independent connection tracking. | ||
10 | * | ||
11 | * Derived from net/ipv4/netfilter/ip_conntrack_proto_icmp.c | ||
12 | */ | ||
13 | |||
14 | #include <linux/types.h> | ||
15 | #include <linux/sched.h> | ||
16 | #include <linux/timer.h> | ||
17 | #include <linux/netfilter.h> | ||
18 | #include <linux/in.h> | ||
19 | #include <linux/icmp.h> | ||
20 | #include <linux/seq_file.h> | ||
21 | #include <net/ip.h> | ||
22 | #include <net/checksum.h> | ||
23 | #include <linux/netfilter_ipv4.h> | ||
24 | #include <net/netfilter/nf_conntrack_tuple.h> | ||
25 | #include <net/netfilter/nf_conntrack_protocol.h> | ||
26 | #include <net/netfilter/nf_conntrack_core.h> | ||
27 | |||
28 | unsigned long nf_ct_icmp_timeout = 30*HZ; | ||
29 | |||
30 | #if 0 | ||
31 | #define DEBUGP printk | ||
32 | #else | ||
33 | #define DEBUGP(format, args...) | ||
34 | #endif | ||
35 | |||
36 | static int icmp_pkt_to_tuple(const struct sk_buff *skb, | ||
37 | unsigned int dataoff, | ||
38 | struct nf_conntrack_tuple *tuple) | ||
39 | { | ||
40 | struct icmphdr _hdr, *hp; | ||
41 | |||
42 | hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); | ||
43 | if (hp == NULL) | ||
44 | return 0; | ||
45 | |||
46 | tuple->dst.u.icmp.type = hp->type; | ||
47 | tuple->src.u.icmp.id = hp->un.echo.id; | ||
48 | tuple->dst.u.icmp.code = hp->code; | ||
49 | |||
50 | return 1; | ||
51 | } | ||
52 | |||
53 | static int icmp_invert_tuple(struct nf_conntrack_tuple *tuple, | ||
54 | const struct nf_conntrack_tuple *orig) | ||
55 | { | ||
56 | /* Add 1; spaces filled with 0. */ | ||
57 | static u_int8_t invmap[] | ||
58 | = { [ICMP_ECHO] = ICMP_ECHOREPLY + 1, | ||
59 | [ICMP_ECHOREPLY] = ICMP_ECHO + 1, | ||
60 | [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1, | ||
61 | [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1, | ||
62 | [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1, | ||
63 | [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1, | ||
64 | [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1, | ||
65 | [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1}; | ||
66 | |||
67 | if (orig->dst.u.icmp.type >= sizeof(invmap) | ||
68 | || !invmap[orig->dst.u.icmp.type]) | ||
69 | return 0; | ||
70 | |||
71 | tuple->src.u.icmp.id = orig->src.u.icmp.id; | ||
72 | tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1; | ||
73 | tuple->dst.u.icmp.code = orig->dst.u.icmp.code; | ||
74 | return 1; | ||
75 | } | ||
76 | |||
77 | /* Print out the per-protocol part of the tuple. */ | ||
78 | static int icmp_print_tuple(struct seq_file *s, | ||
79 | const struct nf_conntrack_tuple *tuple) | ||
80 | { | ||
81 | return seq_printf(s, "type=%u code=%u id=%u ", | ||
82 | tuple->dst.u.icmp.type, | ||
83 | tuple->dst.u.icmp.code, | ||
84 | ntohs(tuple->src.u.icmp.id)); | ||
85 | } | ||
86 | |||
87 | /* Print out the private part of the conntrack. */ | ||
88 | static int icmp_print_conntrack(struct seq_file *s, | ||
89 | const struct nf_conn *conntrack) | ||
90 | { | ||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | /* Returns verdict for packet, or -1 for invalid. */ | ||
95 | static int icmp_packet(struct nf_conn *ct, | ||
96 | const struct sk_buff *skb, | ||
97 | unsigned int dataoff, | ||
98 | enum ip_conntrack_info ctinfo, | ||
99 | int pf, | ||
100 | unsigned int hooknum) | ||
101 | { | ||
102 | /* Try to delete connection immediately after all replies: | ||
103 | won't actually vanish as we still have skb, and del_timer | ||
104 | means this will only run once even if count hits zero twice | ||
105 | (theoretically possible with SMP) */ | ||
106 | if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { | ||
107 | if (atomic_dec_and_test(&ct->proto.icmp.count) | ||
108 | && del_timer(&ct->timeout)) | ||
109 | ct->timeout.function((unsigned long)ct); | ||
110 | } else { | ||
111 | atomic_inc(&ct->proto.icmp.count); | ||
112 | nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); | ||
113 | nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout); | ||
114 | } | ||
115 | |||
116 | return NF_ACCEPT; | ||
117 | } | ||
118 | |||
119 | /* Called when a new connection for this protocol found. */ | ||
120 | static int icmp_new(struct nf_conn *conntrack, | ||
121 | const struct sk_buff *skb, unsigned int dataoff) | ||
122 | { | ||
123 | static u_int8_t valid_new[] | ||
124 | = { [ICMP_ECHO] = 1, | ||
125 | [ICMP_TIMESTAMP] = 1, | ||
126 | [ICMP_INFO_REQUEST] = 1, | ||
127 | [ICMP_ADDRESS] = 1 }; | ||
128 | |||
129 | if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) | ||
130 | || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { | ||
131 | /* Can't create a new ICMP `conn' with this. */ | ||
132 | DEBUGP("icmp: can't create new conn with type %u\n", | ||
133 | conntrack->tuplehash[0].tuple.dst.u.icmp.type); | ||
134 | NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple); | ||
135 | return 0; | ||
136 | } | ||
137 | atomic_set(&conntrack->proto.icmp.count, 0); | ||
138 | return 1; | ||
139 | } | ||
140 | |||
141 | extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; | ||
142 | /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ | ||
143 | static int | ||
144 | icmp_error_message(struct sk_buff *skb, | ||
145 | enum ip_conntrack_info *ctinfo, | ||
146 | unsigned int hooknum) | ||
147 | { | ||
148 | struct nf_conntrack_tuple innertuple, origtuple; | ||
149 | struct { | ||
150 | struct icmphdr icmp; | ||
151 | struct iphdr ip; | ||
152 | } _in, *inside; | ||
153 | struct nf_conntrack_protocol *innerproto; | ||
154 | struct nf_conntrack_tuple_hash *h; | ||
155 | int dataoff; | ||
156 | |||
157 | NF_CT_ASSERT(skb->nfct == NULL); | ||
158 | |||
159 | /* Not enough header? */ | ||
160 | inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in); | ||
161 | if (inside == NULL) | ||
162 | return -NF_ACCEPT; | ||
163 | |||
164 | /* Ignore ICMP's containing fragments (shouldn't happen) */ | ||
165 | if (inside->ip.frag_off & htons(IP_OFFSET)) { | ||
166 | DEBUGP("icmp_error_message: fragment of proto %u\n", | ||
167 | inside->ip.protocol); | ||
168 | return -NF_ACCEPT; | ||
169 | } | ||
170 | |||
171 | innerproto = nf_ct_find_proto(PF_INET, inside->ip.protocol); | ||
172 | dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp); | ||
173 | /* Are they talking about one of our connections? */ | ||
174 | if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET, | ||
175 | inside->ip.protocol, &origtuple, | ||
176 | &nf_conntrack_l3proto_ipv4, innerproto)) { | ||
177 | DEBUGP("icmp_error_message: ! get_tuple p=%u", | ||
178 | inside->ip.protocol); | ||
179 | return -NF_ACCEPT; | ||
180 | } | ||
181 | |||
182 | /* Ordinarily, we'd expect the inverted tupleproto, but it's | ||
183 | been preserved inside the ICMP. */ | ||
184 | if (!nf_ct_invert_tuple(&innertuple, &origtuple, | ||
185 | &nf_conntrack_l3proto_ipv4, innerproto)) { | ||
186 | DEBUGP("icmp_error_message: no match\n"); | ||
187 | return -NF_ACCEPT; | ||
188 | } | ||
189 | |||
190 | *ctinfo = IP_CT_RELATED; | ||
191 | |||
192 | h = nf_conntrack_find_get(&innertuple, NULL); | ||
193 | if (!h) { | ||
194 | /* Locally generated ICMPs will match inverted if they | ||
195 | haven't been SNAT'ed yet */ | ||
196 | /* FIXME: NAT code has to handle half-done double NAT --RR */ | ||
197 | if (hooknum == NF_IP_LOCAL_OUT) | ||
198 | h = nf_conntrack_find_get(&origtuple, NULL); | ||
199 | |||
200 | if (!h) { | ||
201 | DEBUGP("icmp_error_message: no match\n"); | ||
202 | return -NF_ACCEPT; | ||
203 | } | ||
204 | |||
205 | /* Reverse direction from that found */ | ||
206 | if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) | ||
207 | *ctinfo += IP_CT_IS_REPLY; | ||
208 | } else { | ||
209 | if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) | ||
210 | *ctinfo += IP_CT_IS_REPLY; | ||
211 | } | ||
212 | |||
213 | /* Update skb to refer to this connection */ | ||
214 | skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; | ||
215 | skb->nfctinfo = *ctinfo; | ||
216 | return -NF_ACCEPT; | ||
217 | } | ||
218 | |||
219 | /* Small and modified version of icmp_rcv */ | ||
220 | static int | ||
221 | icmp_error(struct sk_buff *skb, unsigned int dataoff, | ||
222 | enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum) | ||
223 | { | ||
224 | struct icmphdr _ih, *icmph; | ||
225 | |||
226 | /* Not enough header? */ | ||
227 | icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih); | ||
228 | if (icmph == NULL) { | ||
229 | if (LOG_INVALID(IPPROTO_ICMP)) | ||
230 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, | ||
231 | "nf_ct_icmp: short packet "); | ||
232 | return -NF_ACCEPT; | ||
233 | } | ||
234 | |||
235 | /* See ip_conntrack_proto_tcp.c */ | ||
236 | if (hooknum != NF_IP_PRE_ROUTING) | ||
237 | goto checksum_skipped; | ||
238 | |||
239 | switch (skb->ip_summed) { | ||
240 | case CHECKSUM_HW: | ||
241 | if (!(u16)csum_fold(skb->csum)) | ||
242 | break; | ||
243 | if (LOG_INVALID(IPPROTO_ICMP)) | ||
244 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, | ||
245 | "nf_ct_icmp: bad HW ICMP checksum "); | ||
246 | return -NF_ACCEPT; | ||
247 | case CHECKSUM_NONE: | ||
248 | if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) { | ||
249 | if (LOG_INVALID(IPPROTO_ICMP)) | ||
250 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, | ||
251 | NULL, | ||
252 | "nf_ct_icmp: bad ICMP checksum "); | ||
253 | return -NF_ACCEPT; | ||
254 | } | ||
255 | default: | ||
256 | break; | ||
257 | } | ||
258 | |||
259 | checksum_skipped: | ||
260 | /* | ||
261 | * 18 is the highest 'known' ICMP type. Anything else is a mystery | ||
262 | * | ||
263 | * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently | ||
264 | * discarded. | ||
265 | */ | ||
266 | if (icmph->type > NR_ICMP_TYPES) { | ||
267 | if (LOG_INVALID(IPPROTO_ICMP)) | ||
268 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, | ||
269 | "nf_ct_icmp: invalid ICMP type "); | ||
270 | return -NF_ACCEPT; | ||
271 | } | ||
272 | |||
273 | /* Need to track icmp error message? */ | ||
274 | if (icmph->type != ICMP_DEST_UNREACH | ||
275 | && icmph->type != ICMP_SOURCE_QUENCH | ||
276 | && icmph->type != ICMP_TIME_EXCEEDED | ||
277 | && icmph->type != ICMP_PARAMETERPROB | ||
278 | && icmph->type != ICMP_REDIRECT) | ||
279 | return NF_ACCEPT; | ||
280 | |||
281 | return icmp_error_message(skb, ctinfo, hooknum); | ||
282 | } | ||
283 | |||
284 | struct nf_conntrack_protocol nf_conntrack_protocol_icmp = | ||
285 | { | ||
286 | .list = { NULL, NULL }, | ||
287 | .l3proto = PF_INET, | ||
288 | .proto = IPPROTO_ICMP, | ||
289 | .name = "icmp", | ||
290 | .pkt_to_tuple = icmp_pkt_to_tuple, | ||
291 | .invert_tuple = icmp_invert_tuple, | ||
292 | .print_tuple = icmp_print_tuple, | ||
293 | .print_conntrack = icmp_print_conntrack, | ||
294 | .packet = icmp_packet, | ||
295 | .new = icmp_new, | ||
296 | .error = icmp_error, | ||
297 | .destroy = NULL, | ||
298 | .me = NULL | ||
299 | }; | ||
300 | |||
301 | EXPORT_SYMBOL(nf_conntrack_protocol_icmp); | ||
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 6e3480426939..a6026d2787d2 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c | |||
@@ -176,6 +176,11 @@ resubmit: | |||
176 | if (ipprot->flags & INET6_PROTO_FINAL) { | 176 | if (ipprot->flags & INET6_PROTO_FINAL) { |
177 | struct ipv6hdr *hdr; | 177 | struct ipv6hdr *hdr; |
178 | 178 | ||
179 | /* Free reference early: we don't need it any more, | ||
180 | and it may hold ip_conntrack module loaded | ||
181 | indefinitely. */ | ||
182 | nf_reset(skb); | ||
183 | |||
179 | skb_postpull_rcsum(skb, skb->nh.raw, | 184 | skb_postpull_rcsum(skb, skb->nh.raw, |
180 | skb->h.raw - skb->nh.raw); | 185 | skb->h.raw - skb->nh.raw); |
181 | hdr = skb->nh.ipv6h; | 186 | hdr = skb->nh.ipv6h; |
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index dbd9767b32e4..c1fa693511a1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c | |||
@@ -441,9 +441,15 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) | |||
441 | #ifdef CONFIG_NETFILTER | 441 | #ifdef CONFIG_NETFILTER |
442 | to->nfmark = from->nfmark; | 442 | to->nfmark = from->nfmark; |
443 | /* Connection association is same as pre-frag packet */ | 443 | /* Connection association is same as pre-frag packet */ |
444 | nf_conntrack_put(to->nfct); | ||
444 | to->nfct = from->nfct; | 445 | to->nfct = from->nfct; |
445 | nf_conntrack_get(to->nfct); | 446 | nf_conntrack_get(to->nfct); |
446 | to->nfctinfo = from->nfctinfo; | 447 | to->nfctinfo = from->nfctinfo; |
448 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
449 | nf_conntrack_put_reasm(to->nfct_reasm); | ||
450 | to->nfct_reasm = from->nfct_reasm; | ||
451 | nf_conntrack_get_reasm(to->nfct_reasm); | ||
452 | #endif | ||
447 | #ifdef CONFIG_BRIDGE_NETFILTER | 453 | #ifdef CONFIG_BRIDGE_NETFILTER |
448 | nf_bridge_put(to->nf_bridge); | 454 | nf_bridge_put(to->nf_bridge); |
449 | to->nf_bridge = from->nf_bridge; | 455 | to->nf_bridge = from->nf_bridge; |
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index bb7ccfe33f23..971ba60bf6e9 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig | |||
@@ -278,5 +278,19 @@ config IP6_NF_RAW | |||
278 | If you want to compile it as a module, say M here and read | 278 | If you want to compile it as a module, say M here and read |
279 | <file:Documentation/modules.txt>. If unsure, say `N'. | 279 | <file:Documentation/modules.txt>. If unsure, say `N'. |
280 | 280 | ||
281 | config NF_CONNTRACK_IPV6 | ||
282 | tristate "IPv6 support for new connection tracking (EXPERIMENTAL)" | ||
283 | depends on EXPERIMENTAL && NF_CONNTRACK | ||
284 | ---help--- | ||
285 | Connection tracking keeps a record of what packets have passed | ||
286 | through your machine, in order to figure out how they are related | ||
287 | into connections. | ||
288 | |||
289 | This is IPv6 support on Layer 3 independent connection tracking. | ||
290 | Layer 3 independent connection tracking is experimental scheme | ||
291 | which generalize ip_conntrack to support other layer 3 protocols. | ||
292 | |||
293 | To compile it as a module, choose M here. If unsure, say N. | ||
294 | |||
281 | endmenu | 295 | endmenu |
282 | 296 | ||
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 2b2c370e8b1c..9ab5b2ca1f59 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile | |||
@@ -27,3 +27,9 @@ obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o | |||
27 | obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o | 27 | obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o |
28 | obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o | 28 | obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o |
29 | obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o | 29 | obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o |
30 | |||
31 | # objects for l3 independent conntrack | ||
32 | nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o | ||
33 | |||
34 | # l3 independent conntrack | ||
35 | obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o | ||
diff --git a/net/ipv6/netfilter/ip6t_MARK.c b/net/ipv6/netfilter/ip6t_MARK.c index 0c7584f92172..eab8fb864ee0 100644 --- a/net/ipv6/netfilter/ip6t_MARK.c +++ b/net/ipv6/netfilter/ip6t_MARK.c | |||
@@ -56,9 +56,9 @@ checkentry(const char *tablename, | |||
56 | return 1; | 56 | return 1; |
57 | } | 57 | } |
58 | 58 | ||
59 | static struct ip6t_target ip6t_mark_reg = { | 59 | static struct ip6t_target ip6t_mark_reg = { |
60 | .name = "MARK", | 60 | .name = "MARK", |
61 | .target = target, | 61 | .target = target, |
62 | .checkentry = checkentry, | 62 | .checkentry = checkentry, |
63 | .me = THIS_MODULE | 63 | .me = THIS_MODULE |
64 | }; | 64 | }; |
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c new file mode 100644 index 000000000000..e2c90b3a8074 --- /dev/null +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | |||
@@ -0,0 +1,556 @@ | |||
1 | /* | ||
2 | * Copyright (C)2004 USAGI/WIDE Project | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * Author: | ||
9 | * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
10 | * | ||
11 | * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
12 | * - support Layer 3 protocol independent connection tracking. | ||
13 | * Based on the original ip_conntrack code which had the following | ||
14 | * copyright information: | ||
15 | * (C) 1999-2001 Paul `Rusty' Russell | ||
16 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
17 | * | ||
18 | * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
19 | * - add get_features() to support various size of conntrack | ||
20 | * structures. | ||
21 | */ | ||
22 | |||
23 | #include <linux/config.h> | ||
24 | #include <linux/types.h> | ||
25 | #include <linux/ipv6.h> | ||
26 | #include <linux/in6.h> | ||
27 | #include <linux/netfilter.h> | ||
28 | #include <linux/module.h> | ||
29 | #include <linux/skbuff.h> | ||
30 | #include <linux/icmp.h> | ||
31 | #include <linux/sysctl.h> | ||
32 | #include <net/ipv6.h> | ||
33 | |||
34 | #include <linux/netfilter_ipv6.h> | ||
35 | #include <net/netfilter/nf_conntrack.h> | ||
36 | #include <net/netfilter/nf_conntrack_helper.h> | ||
37 | #include <net/netfilter/nf_conntrack_protocol.h> | ||
38 | #include <net/netfilter/nf_conntrack_l3proto.h> | ||
39 | #include <net/netfilter/nf_conntrack_core.h> | ||
40 | |||
41 | #if 0 | ||
42 | #define DEBUGP printk | ||
43 | #else | ||
44 | #define DEBUGP(format, args...) | ||
45 | #endif | ||
46 | |||
47 | DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); | ||
48 | |||
49 | static int ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, | ||
50 | struct nf_conntrack_tuple *tuple) | ||
51 | { | ||
52 | u_int32_t _addrs[8], *ap; | ||
53 | |||
54 | ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr), | ||
55 | sizeof(_addrs), _addrs); | ||
56 | if (ap == NULL) | ||
57 | return 0; | ||
58 | |||
59 | memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6)); | ||
60 | memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6)); | ||
61 | |||
62 | return 1; | ||
63 | } | ||
64 | |||
65 | static int ipv6_invert_tuple(struct nf_conntrack_tuple *tuple, | ||
66 | const struct nf_conntrack_tuple *orig) | ||
67 | { | ||
68 | memcpy(tuple->src.u3.ip6, orig->dst.u3.ip6, sizeof(tuple->src.u3.ip6)); | ||
69 | memcpy(tuple->dst.u3.ip6, orig->src.u3.ip6, sizeof(tuple->dst.u3.ip6)); | ||
70 | |||
71 | return 1; | ||
72 | } | ||
73 | |||
74 | static int ipv6_print_tuple(struct seq_file *s, | ||
75 | const struct nf_conntrack_tuple *tuple) | ||
76 | { | ||
77 | return seq_printf(s, "src=%x:%x:%x:%x:%x:%x:%x:%x dst=%x:%x:%x:%x:%x:%x:%x:%x ", | ||
78 | NIP6(*((struct in6_addr *)tuple->src.u3.ip6)), | ||
79 | NIP6(*((struct in6_addr *)tuple->dst.u3.ip6))); | ||
80 | } | ||
81 | |||
82 | static int ipv6_print_conntrack(struct seq_file *s, | ||
83 | const struct nf_conn *conntrack) | ||
84 | { | ||
85 | return 0; | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c | ||
90 | * | ||
91 | * This function parses (probably truncated) exthdr set "hdr" | ||
92 | * of length "len". "nexthdrp" initially points to some place, | ||
93 | * where type of the first header can be found. | ||
94 | * | ||
95 | * It skips all well-known exthdrs, and returns pointer to the start | ||
96 | * of unparsable area i.e. the first header with unknown type. | ||
97 | * if success, *nexthdr is updated by type/protocol of this header. | ||
98 | * | ||
99 | * NOTES: - it may return pointer pointing beyond end of packet, | ||
100 | * if the last recognized header is truncated in the middle. | ||
101 | * - if packet is truncated, so that all parsed headers are skipped, | ||
102 | * it returns -1. | ||
103 | * - if packet is fragmented, return pointer of the fragment header. | ||
104 | * - ESP is unparsable for now and considered like | ||
105 | * normal payload protocol. | ||
106 | * - Note also special handling of AUTH header. Thanks to IPsec wizards. | ||
107 | */ | ||
108 | |||
109 | int nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, | ||
110 | int len) | ||
111 | { | ||
112 | u8 nexthdr = *nexthdrp; | ||
113 | |||
114 | while (ipv6_ext_hdr(nexthdr)) { | ||
115 | struct ipv6_opt_hdr hdr; | ||
116 | int hdrlen; | ||
117 | |||
118 | if (len < (int)sizeof(struct ipv6_opt_hdr)) | ||
119 | return -1; | ||
120 | if (nexthdr == NEXTHDR_NONE) | ||
121 | break; | ||
122 | if (nexthdr == NEXTHDR_FRAGMENT) | ||
123 | break; | ||
124 | if (skb_copy_bits(skb, start, &hdr, sizeof(hdr))) | ||
125 | BUG(); | ||
126 | if (nexthdr == NEXTHDR_AUTH) | ||
127 | hdrlen = (hdr.hdrlen+2)<<2; | ||
128 | else | ||
129 | hdrlen = ipv6_optlen(&hdr); | ||
130 | |||
131 | nexthdr = hdr.nexthdr; | ||
132 | len -= hdrlen; | ||
133 | start += hdrlen; | ||
134 | } | ||
135 | |||
136 | *nexthdrp = nexthdr; | ||
137 | return start; | ||
138 | } | ||
139 | |||
140 | static int | ||
141 | ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, | ||
142 | u_int8_t *protonum) | ||
143 | { | ||
144 | unsigned int extoff; | ||
145 | unsigned char pnum; | ||
146 | int protoff; | ||
147 | |||
148 | extoff = (u8*)((*pskb)->nh.ipv6h + 1) - (*pskb)->data; | ||
149 | pnum = (*pskb)->nh.ipv6h->nexthdr; | ||
150 | |||
151 | protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum, | ||
152 | (*pskb)->len - extoff); | ||
153 | |||
154 | /* | ||
155 | * (protoff == (*pskb)->len) mean that the packet doesn't have no data | ||
156 | * except of IPv6 & ext headers. but it's tracked anyway. - YK | ||
157 | */ | ||
158 | if ((protoff < 0) || (protoff > (*pskb)->len)) { | ||
159 | DEBUGP("ip6_conntrack_core: can't find proto in pkt\n"); | ||
160 | NF_CT_STAT_INC(error); | ||
161 | NF_CT_STAT_INC(invalid); | ||
162 | return -NF_ACCEPT; | ||
163 | } | ||
164 | |||
165 | *dataoff = protoff; | ||
166 | *protonum = pnum; | ||
167 | return NF_ACCEPT; | ||
168 | } | ||
169 | |||
170 | static u_int32_t ipv6_get_features(const struct nf_conntrack_tuple *tuple) | ||
171 | { | ||
172 | return NF_CT_F_BASIC; | ||
173 | } | ||
174 | |||
175 | static unsigned int ipv6_confirm(unsigned int hooknum, | ||
176 | struct sk_buff **pskb, | ||
177 | const struct net_device *in, | ||
178 | const struct net_device *out, | ||
179 | int (*okfn)(struct sk_buff *)) | ||
180 | { | ||
181 | struct nf_conn *ct; | ||
182 | enum ip_conntrack_info ctinfo; | ||
183 | |||
184 | /* This is where we call the helper: as the packet goes out. */ | ||
185 | ct = nf_ct_get(*pskb, &ctinfo); | ||
186 | if (ct && ct->helper) { | ||
187 | unsigned int ret, protoff; | ||
188 | unsigned int extoff = (u8*)((*pskb)->nh.ipv6h + 1) | ||
189 | - (*pskb)->data; | ||
190 | unsigned char pnum = (*pskb)->nh.ipv6h->nexthdr; | ||
191 | |||
192 | protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum, | ||
193 | (*pskb)->len - extoff); | ||
194 | if (protoff < 0 || protoff > (*pskb)->len || | ||
195 | pnum == NEXTHDR_FRAGMENT) { | ||
196 | DEBUGP("proto header not found\n"); | ||
197 | return NF_ACCEPT; | ||
198 | } | ||
199 | |||
200 | ret = ct->helper->help(pskb, protoff, ct, ctinfo); | ||
201 | if (ret != NF_ACCEPT) | ||
202 | return ret; | ||
203 | } | ||
204 | |||
205 | /* We've seen it coming out the other side: confirm it */ | ||
206 | |||
207 | return nf_conntrack_confirm(pskb); | ||
208 | } | ||
209 | |||
210 | extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb); | ||
211 | extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, | ||
212 | struct net_device *in, | ||
213 | struct net_device *out, | ||
214 | int (*okfn)(struct sk_buff *)); | ||
215 | static unsigned int ipv6_defrag(unsigned int hooknum, | ||
216 | struct sk_buff **pskb, | ||
217 | const struct net_device *in, | ||
218 | const struct net_device *out, | ||
219 | int (*okfn)(struct sk_buff *)) | ||
220 | { | ||
221 | struct sk_buff *reasm; | ||
222 | |||
223 | /* Previously seen (loopback)? */ | ||
224 | if ((*pskb)->nfct) | ||
225 | return NF_ACCEPT; | ||
226 | |||
227 | reasm = nf_ct_frag6_gather(*pskb); | ||
228 | |||
229 | /* queued */ | ||
230 | if (reasm == NULL) | ||
231 | return NF_STOLEN; | ||
232 | |||
233 | /* error occured or not fragmented */ | ||
234 | if (reasm == *pskb) | ||
235 | return NF_ACCEPT; | ||
236 | |||
237 | nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in, | ||
238 | (struct net_device *)out, okfn); | ||
239 | |||
240 | return NF_STOLEN; | ||
241 | } | ||
242 | |||
243 | static unsigned int ipv6_conntrack_in(unsigned int hooknum, | ||
244 | struct sk_buff **pskb, | ||
245 | const struct net_device *in, | ||
246 | const struct net_device *out, | ||
247 | int (*okfn)(struct sk_buff *)) | ||
248 | { | ||
249 | struct sk_buff *reasm = (*pskb)->nfct_reasm; | ||
250 | |||
251 | /* This packet is fragmented and has reassembled packet. */ | ||
252 | if (reasm) { | ||
253 | /* Reassembled packet isn't parsed yet ? */ | ||
254 | if (!reasm->nfct) { | ||
255 | unsigned int ret; | ||
256 | |||
257 | ret = nf_conntrack_in(PF_INET6, hooknum, &reasm); | ||
258 | if (ret != NF_ACCEPT) | ||
259 | return ret; | ||
260 | } | ||
261 | nf_conntrack_get(reasm->nfct); | ||
262 | (*pskb)->nfct = reasm->nfct; | ||
263 | return NF_ACCEPT; | ||
264 | } | ||
265 | |||
266 | return nf_conntrack_in(PF_INET6, hooknum, pskb); | ||
267 | } | ||
268 | |||
269 | static unsigned int ipv6_conntrack_local(unsigned int hooknum, | ||
270 | struct sk_buff **pskb, | ||
271 | const struct net_device *in, | ||
272 | const struct net_device *out, | ||
273 | int (*okfn)(struct sk_buff *)) | ||
274 | { | ||
275 | /* root is playing with raw sockets. */ | ||
276 | if ((*pskb)->len < sizeof(struct ipv6hdr)) { | ||
277 | if (net_ratelimit()) | ||
278 | printk("ipv6_conntrack_local: packet too short\n"); | ||
279 | return NF_ACCEPT; | ||
280 | } | ||
281 | return ipv6_conntrack_in(hooknum, pskb, in, out, okfn); | ||
282 | } | ||
283 | |||
284 | /* Connection tracking may drop packets, but never alters them, so | ||
285 | make it the first hook. */ | ||
286 | static struct nf_hook_ops ipv6_conntrack_defrag_ops = { | ||
287 | .hook = ipv6_defrag, | ||
288 | .owner = THIS_MODULE, | ||
289 | .pf = PF_INET6, | ||
290 | .hooknum = NF_IP6_PRE_ROUTING, | ||
291 | .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, | ||
292 | }; | ||
293 | |||
294 | static struct nf_hook_ops ipv6_conntrack_in_ops = { | ||
295 | .hook = ipv6_conntrack_in, | ||
296 | .owner = THIS_MODULE, | ||
297 | .pf = PF_INET6, | ||
298 | .hooknum = NF_IP6_PRE_ROUTING, | ||
299 | .priority = NF_IP6_PRI_CONNTRACK, | ||
300 | }; | ||
301 | |||
302 | static struct nf_hook_ops ipv6_conntrack_local_out_ops = { | ||
303 | .hook = ipv6_conntrack_local, | ||
304 | .owner = THIS_MODULE, | ||
305 | .pf = PF_INET6, | ||
306 | .hooknum = NF_IP6_LOCAL_OUT, | ||
307 | .priority = NF_IP6_PRI_CONNTRACK, | ||
308 | }; | ||
309 | |||
310 | static struct nf_hook_ops ipv6_conntrack_defrag_local_out_ops = { | ||
311 | .hook = ipv6_defrag, | ||
312 | .owner = THIS_MODULE, | ||
313 | .pf = PF_INET6, | ||
314 | .hooknum = NF_IP6_LOCAL_OUT, | ||
315 | .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, | ||
316 | }; | ||
317 | |||
318 | /* Refragmenter; last chance. */ | ||
319 | static struct nf_hook_ops ipv6_conntrack_out_ops = { | ||
320 | .hook = ipv6_confirm, | ||
321 | .owner = THIS_MODULE, | ||
322 | .pf = PF_INET6, | ||
323 | .hooknum = NF_IP6_POST_ROUTING, | ||
324 | .priority = NF_IP6_PRI_LAST, | ||
325 | }; | ||
326 | |||
327 | static struct nf_hook_ops ipv6_conntrack_local_in_ops = { | ||
328 | .hook = ipv6_confirm, | ||
329 | .owner = THIS_MODULE, | ||
330 | .pf = PF_INET6, | ||
331 | .hooknum = NF_IP6_LOCAL_IN, | ||
332 | .priority = NF_IP6_PRI_LAST-1, | ||
333 | }; | ||
334 | |||
335 | #ifdef CONFIG_SYSCTL | ||
336 | |||
337 | /* From nf_conntrack_proto_icmpv6.c */ | ||
338 | extern unsigned long nf_ct_icmpv6_timeout; | ||
339 | |||
340 | /* From nf_conntrack_frag6.c */ | ||
341 | extern unsigned long nf_ct_frag6_timeout; | ||
342 | extern unsigned long nf_ct_frag6_low_thresh; | ||
343 | extern unsigned long nf_ct_frag6_high_thresh; | ||
344 | |||
345 | static struct ctl_table_header *nf_ct_ipv6_sysctl_header; | ||
346 | |||
347 | static ctl_table nf_ct_sysctl_table[] = { | ||
348 | { | ||
349 | .ctl_name = NET_NF_CONNTRACK_ICMPV6_TIMEOUT, | ||
350 | .procname = "nf_conntrack_icmpv6_timeout", | ||
351 | .data = &nf_ct_icmpv6_timeout, | ||
352 | .maxlen = sizeof(unsigned int), | ||
353 | .mode = 0644, | ||
354 | .proc_handler = &proc_dointvec_jiffies, | ||
355 | }, | ||
356 | { | ||
357 | .ctl_name = NET_NF_CONNTRACK_FRAG6_TIMEOUT, | ||
358 | .procname = "nf_conntrack_frag6_timeout", | ||
359 | .data = &nf_ct_frag6_timeout, | ||
360 | .maxlen = sizeof(unsigned int), | ||
361 | .mode = 0644, | ||
362 | .proc_handler = &proc_dointvec_jiffies, | ||
363 | }, | ||
364 | { | ||
365 | .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH, | ||
366 | .procname = "nf_conntrack_frag6_low_thresh", | ||
367 | .data = &nf_ct_frag6_low_thresh, | ||
368 | .maxlen = sizeof(unsigned int), | ||
369 | .mode = 0644, | ||
370 | .proc_handler = &proc_dointvec_jiffies, | ||
371 | }, | ||
372 | { | ||
373 | .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, | ||
374 | .procname = "nf_conntrack_frag6_high_thresh", | ||
375 | .data = &nf_ct_frag6_high_thresh, | ||
376 | .maxlen = sizeof(unsigned int), | ||
377 | .mode = 0644, | ||
378 | .proc_handler = &proc_dointvec_jiffies, | ||
379 | }, | ||
380 | { .ctl_name = 0 } | ||
381 | }; | ||
382 | |||
383 | static ctl_table nf_ct_netfilter_table[] = { | ||
384 | { | ||
385 | .ctl_name = NET_NETFILTER, | ||
386 | .procname = "netfilter", | ||
387 | .mode = 0555, | ||
388 | .child = nf_ct_sysctl_table, | ||
389 | }, | ||
390 | { .ctl_name = 0 } | ||
391 | }; | ||
392 | |||
393 | static ctl_table nf_ct_net_table[] = { | ||
394 | { | ||
395 | .ctl_name = CTL_NET, | ||
396 | .procname = "net", | ||
397 | .mode = 0555, | ||
398 | .child = nf_ct_netfilter_table, | ||
399 | }, | ||
400 | { .ctl_name = 0 } | ||
401 | }; | ||
402 | #endif | ||
403 | |||
404 | struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = { | ||
405 | .l3proto = PF_INET6, | ||
406 | .name = "ipv6", | ||
407 | .pkt_to_tuple = ipv6_pkt_to_tuple, | ||
408 | .invert_tuple = ipv6_invert_tuple, | ||
409 | .print_tuple = ipv6_print_tuple, | ||
410 | .print_conntrack = ipv6_print_conntrack, | ||
411 | .prepare = ipv6_prepare, | ||
412 | .get_features = ipv6_get_features, | ||
413 | .me = THIS_MODULE, | ||
414 | }; | ||
415 | |||
416 | extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp6; | ||
417 | extern struct nf_conntrack_protocol nf_conntrack_protocol_udp6; | ||
418 | extern struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6; | ||
419 | extern int nf_ct_frag6_init(void); | ||
420 | extern void nf_ct_frag6_cleanup(void); | ||
421 | static int init_or_cleanup(int init) | ||
422 | { | ||
423 | int ret = 0; | ||
424 | |||
425 | if (!init) goto cleanup; | ||
426 | |||
427 | ret = nf_ct_frag6_init(); | ||
428 | if (ret < 0) { | ||
429 | printk("nf_conntrack_ipv6: can't initialize frag6.\n"); | ||
430 | goto cleanup_nothing; | ||
431 | } | ||
432 | ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp6); | ||
433 | if (ret < 0) { | ||
434 | printk("nf_conntrack_ipv6: can't register tcp.\n"); | ||
435 | goto cleanup_frag6; | ||
436 | } | ||
437 | |||
438 | ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp6); | ||
439 | if (ret < 0) { | ||
440 | printk("nf_conntrack_ipv6: can't register udp.\n"); | ||
441 | goto cleanup_tcp; | ||
442 | } | ||
443 | |||
444 | ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmpv6); | ||
445 | if (ret < 0) { | ||
446 | printk("nf_conntrack_ipv6: can't register icmpv6.\n"); | ||
447 | goto cleanup_udp; | ||
448 | } | ||
449 | |||
450 | ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv6); | ||
451 | if (ret < 0) { | ||
452 | printk("nf_conntrack_ipv6: can't register ipv6\n"); | ||
453 | goto cleanup_icmpv6; | ||
454 | } | ||
455 | |||
456 | ret = nf_register_hook(&ipv6_conntrack_defrag_ops); | ||
457 | if (ret < 0) { | ||
458 | printk("nf_conntrack_ipv6: can't register pre-routing defrag " | ||
459 | "hook.\n"); | ||
460 | goto cleanup_ipv6; | ||
461 | } | ||
462 | |||
463 | ret = nf_register_hook(&ipv6_conntrack_defrag_local_out_ops); | ||
464 | if (ret < 0) { | ||
465 | printk("nf_conntrack_ipv6: can't register local_out defrag " | ||
466 | "hook.\n"); | ||
467 | goto cleanup_defragops; | ||
468 | } | ||
469 | |||
470 | ret = nf_register_hook(&ipv6_conntrack_in_ops); | ||
471 | if (ret < 0) { | ||
472 | printk("nf_conntrack_ipv6: can't register pre-routing hook.\n"); | ||
473 | goto cleanup_defraglocalops; | ||
474 | } | ||
475 | |||
476 | ret = nf_register_hook(&ipv6_conntrack_local_out_ops); | ||
477 | if (ret < 0) { | ||
478 | printk("nf_conntrack_ipv6: can't register local out hook.\n"); | ||
479 | goto cleanup_inops; | ||
480 | } | ||
481 | |||
482 | ret = nf_register_hook(&ipv6_conntrack_out_ops); | ||
483 | if (ret < 0) { | ||
484 | printk("nf_conntrack_ipv6: can't register post-routing hook.\n"); | ||
485 | goto cleanup_inandlocalops; | ||
486 | } | ||
487 | |||
488 | ret = nf_register_hook(&ipv6_conntrack_local_in_ops); | ||
489 | if (ret < 0) { | ||
490 | printk("nf_conntrack_ipv6: can't register local in hook.\n"); | ||
491 | goto cleanup_inoutandlocalops; | ||
492 | } | ||
493 | |||
494 | #ifdef CONFIG_SYSCTL | ||
495 | nf_ct_ipv6_sysctl_header = register_sysctl_table(nf_ct_net_table, 0); | ||
496 | if (nf_ct_ipv6_sysctl_header == NULL) { | ||
497 | printk("nf_conntrack: can't register to sysctl.\n"); | ||
498 | ret = -ENOMEM; | ||
499 | goto cleanup_localinops; | ||
500 | } | ||
501 | #endif | ||
502 | return ret; | ||
503 | |||
504 | cleanup: | ||
505 | synchronize_net(); | ||
506 | #ifdef CONFIG_SYSCTL | ||
507 | unregister_sysctl_table(nf_ct_ipv6_sysctl_header); | ||
508 | cleanup_localinops: | ||
509 | #endif | ||
510 | nf_unregister_hook(&ipv6_conntrack_local_in_ops); | ||
511 | cleanup_inoutandlocalops: | ||
512 | nf_unregister_hook(&ipv6_conntrack_out_ops); | ||
513 | cleanup_inandlocalops: | ||
514 | nf_unregister_hook(&ipv6_conntrack_local_out_ops); | ||
515 | cleanup_inops: | ||
516 | nf_unregister_hook(&ipv6_conntrack_in_ops); | ||
517 | cleanup_defraglocalops: | ||
518 | nf_unregister_hook(&ipv6_conntrack_defrag_local_out_ops); | ||
519 | cleanup_defragops: | ||
520 | nf_unregister_hook(&ipv6_conntrack_defrag_ops); | ||
521 | cleanup_ipv6: | ||
522 | nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6); | ||
523 | cleanup_icmpv6: | ||
524 | nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmpv6); | ||
525 | cleanup_udp: | ||
526 | nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp6); | ||
527 | cleanup_tcp: | ||
528 | nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp6); | ||
529 | cleanup_frag6: | ||
530 | nf_ct_frag6_cleanup(); | ||
531 | cleanup_nothing: | ||
532 | return ret; | ||
533 | } | ||
534 | |||
535 | MODULE_LICENSE("GPL"); | ||
536 | MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>"); | ||
537 | |||
538 | static int __init init(void) | ||
539 | { | ||
540 | need_nf_conntrack(); | ||
541 | return init_or_cleanup(1); | ||
542 | } | ||
543 | |||
544 | static void __exit fini(void) | ||
545 | { | ||
546 | init_or_cleanup(0); | ||
547 | } | ||
548 | |||
549 | module_init(init); | ||
550 | module_exit(fini); | ||
551 | |||
552 | void need_ip6_conntrack(void) | ||
553 | { | ||
554 | } | ||
555 | |||
556 | EXPORT_SYMBOL(need_ip6_conntrack); | ||
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c new file mode 100644 index 000000000000..c0f1da5497a9 --- /dev/null +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | |||
@@ -0,0 +1,272 @@ | |||
1 | /* | ||
2 | * Copyright (C)2003,2004 USAGI/WIDE Project | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * Author: | ||
9 | * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
10 | * | ||
11 | * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
12 | * - ICMPv6 tracking support. Derived from the original ip_conntrack code | ||
13 | * net/ipv4/netfilter/ip_conntrack_proto_icmp.c which had the following | ||
14 | * copyright information: | ||
15 | * (C) 1999-2001 Paul `Rusty' Russell | ||
16 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
17 | */ | ||
18 | |||
19 | #include <linux/types.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/timer.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/netfilter.h> | ||
24 | #include <linux/in6.h> | ||
25 | #include <linux/icmpv6.h> | ||
26 | #include <linux/ipv6.h> | ||
27 | #include <net/ipv6.h> | ||
28 | #include <net/ip6_checksum.h> | ||
29 | #include <linux/seq_file.h> | ||
30 | #include <linux/netfilter_ipv6.h> | ||
31 | #include <net/netfilter/nf_conntrack_tuple.h> | ||
32 | #include <net/netfilter/nf_conntrack_protocol.h> | ||
33 | #include <net/netfilter/nf_conntrack_core.h> | ||
34 | #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> | ||
35 | |||
36 | unsigned long nf_ct_icmpv6_timeout = 30*HZ; | ||
37 | |||
38 | #if 0 | ||
39 | #define DEBUGP printk | ||
40 | #else | ||
41 | #define DEBUGP(format, args...) | ||
42 | #endif | ||
43 | |||
44 | static int icmpv6_pkt_to_tuple(const struct sk_buff *skb, | ||
45 | unsigned int dataoff, | ||
46 | struct nf_conntrack_tuple *tuple) | ||
47 | { | ||
48 | struct icmp6hdr _hdr, *hp; | ||
49 | |||
50 | hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); | ||
51 | if (hp == NULL) | ||
52 | return 0; | ||
53 | tuple->dst.u.icmp.type = hp->icmp6_type; | ||
54 | tuple->src.u.icmp.id = hp->icmp6_identifier; | ||
55 | tuple->dst.u.icmp.code = hp->icmp6_code; | ||
56 | |||
57 | return 1; | ||
58 | } | ||
59 | |||
60 | static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple, | ||
61 | const struct nf_conntrack_tuple *orig) | ||
62 | { | ||
63 | /* Add 1; spaces filled with 0. */ | ||
64 | static u_int8_t invmap[] = { | ||
65 | [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1, | ||
66 | [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1, | ||
67 | [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_QUERY + 1, | ||
68 | [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1 | ||
69 | }; | ||
70 | |||
71 | __u8 type = orig->dst.u.icmp.type - 128; | ||
72 | if (type >= sizeof(invmap) || !invmap[type]) | ||
73 | return 0; | ||
74 | |||
75 | tuple->src.u.icmp.id = orig->src.u.icmp.id; | ||
76 | tuple->dst.u.icmp.type = invmap[type] - 1; | ||
77 | tuple->dst.u.icmp.code = orig->dst.u.icmp.code; | ||
78 | return 1; | ||
79 | } | ||
80 | |||
81 | /* Print out the per-protocol part of the tuple. */ | ||
82 | static int icmpv6_print_tuple(struct seq_file *s, | ||
83 | const struct nf_conntrack_tuple *tuple) | ||
84 | { | ||
85 | return seq_printf(s, "type=%u code=%u id=%u ", | ||
86 | tuple->dst.u.icmp.type, | ||
87 | tuple->dst.u.icmp.code, | ||
88 | ntohs(tuple->src.u.icmp.id)); | ||
89 | } | ||
90 | |||
91 | /* Print out the private part of the conntrack. */ | ||
92 | static int icmpv6_print_conntrack(struct seq_file *s, | ||
93 | const struct nf_conn *conntrack) | ||
94 | { | ||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | /* Returns verdict for packet, or -1 for invalid. */ | ||
99 | static int icmpv6_packet(struct nf_conn *ct, | ||
100 | const struct sk_buff *skb, | ||
101 | unsigned int dataoff, | ||
102 | enum ip_conntrack_info ctinfo, | ||
103 | int pf, | ||
104 | unsigned int hooknum) | ||
105 | { | ||
106 | /* Try to delete connection immediately after all replies: | ||
107 | won't actually vanish as we still have skb, and del_timer | ||
108 | means this will only run once even if count hits zero twice | ||
109 | (theoretically possible with SMP) */ | ||
110 | if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { | ||
111 | if (atomic_dec_and_test(&ct->proto.icmp.count) | ||
112 | && del_timer(&ct->timeout)) | ||
113 | ct->timeout.function((unsigned long)ct); | ||
114 | } else { | ||
115 | atomic_inc(&ct->proto.icmp.count); | ||
116 | nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); | ||
117 | nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout); | ||
118 | } | ||
119 | |||
120 | return NF_ACCEPT; | ||
121 | } | ||
122 | |||
123 | /* Called when a new connection for this protocol found. */ | ||
124 | static int icmpv6_new(struct nf_conn *conntrack, | ||
125 | const struct sk_buff *skb, | ||
126 | unsigned int dataoff) | ||
127 | { | ||
128 | static u_int8_t valid_new[] = { | ||
129 | [ICMPV6_ECHO_REQUEST - 128] = 1, | ||
130 | [ICMPV6_NI_QUERY - 128] = 1 | ||
131 | }; | ||
132 | |||
133 | if (conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128 >= sizeof(valid_new) | ||
134 | || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128]) { | ||
135 | /* Can't create a new ICMPv6 `conn' with this. */ | ||
136 | DEBUGP("icmp: can't create new conn with type %u\n", | ||
137 | conntrack->tuplehash[0].tuple.dst.u.icmp.type); | ||
138 | NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple); | ||
139 | return 0; | ||
140 | } | ||
141 | atomic_set(&conntrack->proto.icmp.count, 0); | ||
142 | return 1; | ||
143 | } | ||
144 | |||
145 | extern int | ||
146 | nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, int len); | ||
147 | extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; | ||
148 | static int | ||
149 | icmpv6_error_message(struct sk_buff *skb, | ||
150 | unsigned int icmp6off, | ||
151 | enum ip_conntrack_info *ctinfo, | ||
152 | unsigned int hooknum) | ||
153 | { | ||
154 | struct nf_conntrack_tuple intuple, origtuple; | ||
155 | struct nf_conntrack_tuple_hash *h; | ||
156 | struct icmp6hdr _hdr, *hp; | ||
157 | unsigned int inip6off; | ||
158 | struct nf_conntrack_protocol *inproto; | ||
159 | u_int8_t inprotonum; | ||
160 | unsigned int inprotoff; | ||
161 | |||
162 | NF_CT_ASSERT(skb->nfct == NULL); | ||
163 | |||
164 | hp = skb_header_pointer(skb, icmp6off, sizeof(_hdr), &_hdr); | ||
165 | if (hp == NULL) { | ||
166 | DEBUGP("icmpv6_error: Can't get ICMPv6 hdr.\n"); | ||
167 | return -NF_ACCEPT; | ||
168 | } | ||
169 | |||
170 | inip6off = icmp6off + sizeof(_hdr); | ||
171 | if (skb_copy_bits(skb, inip6off+offsetof(struct ipv6hdr, nexthdr), | ||
172 | &inprotonum, sizeof(inprotonum)) != 0) { | ||
173 | DEBUGP("icmpv6_error: Can't get nexthdr in inner IPv6 header.\n"); | ||
174 | return -NF_ACCEPT; | ||
175 | } | ||
176 | inprotoff = nf_ct_ipv6_skip_exthdr(skb, | ||
177 | inip6off + sizeof(struct ipv6hdr), | ||
178 | &inprotonum, | ||
179 | skb->len - inip6off | ||
180 | - sizeof(struct ipv6hdr)); | ||
181 | |||
182 | if ((inprotoff < 0) || (inprotoff > skb->len) || | ||
183 | (inprotonum == NEXTHDR_FRAGMENT)) { | ||
184 | DEBUGP("icmpv6_error: Can't get protocol header in ICMPv6 payload.\n"); | ||
185 | return -NF_ACCEPT; | ||
186 | } | ||
187 | |||
188 | inproto = nf_ct_find_proto(PF_INET6, inprotonum); | ||
189 | |||
190 | /* Are they talking about one of our connections? */ | ||
191 | if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum, | ||
192 | &origtuple, &nf_conntrack_l3proto_ipv6, inproto)) { | ||
193 | DEBUGP("icmpv6_error: Can't get tuple\n"); | ||
194 | return -NF_ACCEPT; | ||
195 | } | ||
196 | |||
197 | /* Ordinarily, we'd expect the inverted tupleproto, but it's | ||
198 | been preserved inside the ICMP. */ | ||
199 | if (!nf_ct_invert_tuple(&intuple, &origtuple, | ||
200 | &nf_conntrack_l3proto_ipv6, inproto)) { | ||
201 | DEBUGP("icmpv6_error: Can't invert tuple\n"); | ||
202 | return -NF_ACCEPT; | ||
203 | } | ||
204 | |||
205 | *ctinfo = IP_CT_RELATED; | ||
206 | |||
207 | h = nf_conntrack_find_get(&intuple, NULL); | ||
208 | if (!h) { | ||
209 | DEBUGP("icmpv6_error: no match\n"); | ||
210 | return -NF_ACCEPT; | ||
211 | } else { | ||
212 | if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) | ||
213 | *ctinfo += IP_CT_IS_REPLY; | ||
214 | } | ||
215 | |||
216 | /* Update skb to refer to this connection */ | ||
217 | skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; | ||
218 | skb->nfctinfo = *ctinfo; | ||
219 | return -NF_ACCEPT; | ||
220 | } | ||
221 | |||
222 | static int | ||
223 | icmpv6_error(struct sk_buff *skb, unsigned int dataoff, | ||
224 | enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum) | ||
225 | { | ||
226 | struct icmp6hdr _ih, *icmp6h; | ||
227 | |||
228 | icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); | ||
229 | if (icmp6h == NULL) { | ||
230 | if (LOG_INVALID(IPPROTO_ICMPV6)) | ||
231 | nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, | ||
232 | "nf_ct_icmpv6: short packet "); | ||
233 | return -NF_ACCEPT; | ||
234 | } | ||
235 | |||
236 | if (hooknum != NF_IP6_PRE_ROUTING) | ||
237 | goto skipped; | ||
238 | |||
239 | /* Ignore it if the checksum's bogus. */ | ||
240 | if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, | ||
241 | skb->len - dataoff, IPPROTO_ICMPV6, | ||
242 | skb_checksum(skb, dataoff, | ||
243 | skb->len - dataoff, 0))) { | ||
244 | nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, | ||
245 | "nf_ct_icmpv6: ICMPv6 checksum failed\n"); | ||
246 | return -NF_ACCEPT; | ||
247 | } | ||
248 | |||
249 | skipped: | ||
250 | |||
251 | /* is not error message ? */ | ||
252 | if (icmp6h->icmp6_type >= 128) | ||
253 | return NF_ACCEPT; | ||
254 | |||
255 | return icmpv6_error_message(skb, dataoff, ctinfo, hooknum); | ||
256 | } | ||
257 | |||
258 | struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 = | ||
259 | { | ||
260 | .l3proto = PF_INET6, | ||
261 | .proto = IPPROTO_ICMPV6, | ||
262 | .name = "icmpv6", | ||
263 | .pkt_to_tuple = icmpv6_pkt_to_tuple, | ||
264 | .invert_tuple = icmpv6_invert_tuple, | ||
265 | .print_tuple = icmpv6_print_tuple, | ||
266 | .print_conntrack = icmpv6_print_conntrack, | ||
267 | .packet = icmpv6_packet, | ||
268 | .new = icmpv6_new, | ||
269 | .error = icmpv6_error, | ||
270 | }; | ||
271 | |||
272 | EXPORT_SYMBOL(nf_conntrack_protocol_icmpv6); | ||
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c new file mode 100644 index 000000000000..7640b9bb7694 --- /dev/null +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c | |||
@@ -0,0 +1,885 @@ | |||
1 | /* | ||
2 | * IPv6 fragment reassembly for connection tracking | ||
3 | * | ||
4 | * Copyright (C)2004 USAGI/WIDE Project | ||
5 | * | ||
6 | * Author: | ||
7 | * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
8 | * | ||
9 | * Based on: net/ipv6/reassembly.c | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or | ||
12 | * modify it under the terms of the GNU General Public License | ||
13 | * as published by the Free Software Foundation; either version | ||
14 | * 2 of the License, or (at your option) any later version. | ||
15 | */ | ||
16 | |||
17 | #include <linux/config.h> | ||
18 | #include <linux/errno.h> | ||
19 | #include <linux/types.h> | ||
20 | #include <linux/string.h> | ||
21 | #include <linux/socket.h> | ||
22 | #include <linux/sockios.h> | ||
23 | #include <linux/jiffies.h> | ||
24 | #include <linux/net.h> | ||
25 | #include <linux/list.h> | ||
26 | #include <linux/netdevice.h> | ||
27 | #include <linux/in6.h> | ||
28 | #include <linux/ipv6.h> | ||
29 | #include <linux/icmpv6.h> | ||
30 | #include <linux/random.h> | ||
31 | #include <linux/jhash.h> | ||
32 | |||
33 | #include <net/sock.h> | ||
34 | #include <net/snmp.h> | ||
35 | |||
36 | #include <net/ipv6.h> | ||
37 | #include <net/protocol.h> | ||
38 | #include <net/transp_v6.h> | ||
39 | #include <net/rawv6.h> | ||
40 | #include <net/ndisc.h> | ||
41 | #include <net/addrconf.h> | ||
42 | #include <linux/sysctl.h> | ||
43 | #include <linux/netfilter.h> | ||
44 | #include <linux/netfilter_ipv6.h> | ||
45 | #include <linux/kernel.h> | ||
46 | #include <linux/module.h> | ||
47 | |||
48 | #if 0 | ||
49 | #define DEBUGP printk | ||
50 | #else | ||
51 | #define DEBUGP(format, args...) | ||
52 | #endif | ||
53 | |||
54 | #define NF_CT_FRAG6_HIGH_THRESH 262144 /* == 256*1024 */ | ||
55 | #define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */ | ||
56 | #define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT | ||
57 | |||
58 | int nf_ct_frag6_high_thresh = 256*1024; | ||
59 | int nf_ct_frag6_low_thresh = 192*1024; | ||
60 | int nf_ct_frag6_timeout = IPV6_FRAG_TIMEOUT; | ||
61 | |||
62 | struct nf_ct_frag6_skb_cb | ||
63 | { | ||
64 | struct inet6_skb_parm h; | ||
65 | int offset; | ||
66 | struct sk_buff *orig; | ||
67 | }; | ||
68 | |||
69 | #define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb)) | ||
70 | |||
71 | struct nf_ct_frag6_queue | ||
72 | { | ||
73 | struct nf_ct_frag6_queue *next; | ||
74 | struct list_head lru_list; /* lru list member */ | ||
75 | |||
76 | __u32 id; /* fragment id */ | ||
77 | struct in6_addr saddr; | ||
78 | struct in6_addr daddr; | ||
79 | |||
80 | spinlock_t lock; | ||
81 | atomic_t refcnt; | ||
82 | struct timer_list timer; /* expire timer */ | ||
83 | struct sk_buff *fragments; | ||
84 | int len; | ||
85 | int meat; | ||
86 | struct timeval stamp; | ||
87 | unsigned int csum; | ||
88 | __u8 last_in; /* has first/last segment arrived? */ | ||
89 | #define COMPLETE 4 | ||
90 | #define FIRST_IN 2 | ||
91 | #define LAST_IN 1 | ||
92 | __u16 nhoffset; | ||
93 | struct nf_ct_frag6_queue **pprev; | ||
94 | }; | ||
95 | |||
96 | /* Hash table. */ | ||
97 | |||
98 | #define FRAG6Q_HASHSZ 64 | ||
99 | |||
100 | static struct nf_ct_frag6_queue *nf_ct_frag6_hash[FRAG6Q_HASHSZ]; | ||
101 | static rwlock_t nf_ct_frag6_lock = RW_LOCK_UNLOCKED; | ||
102 | static u32 nf_ct_frag6_hash_rnd; | ||
103 | static LIST_HEAD(nf_ct_frag6_lru_list); | ||
104 | int nf_ct_frag6_nqueues = 0; | ||
105 | |||
106 | static __inline__ void __fq_unlink(struct nf_ct_frag6_queue *fq) | ||
107 | { | ||
108 | if (fq->next) | ||
109 | fq->next->pprev = fq->pprev; | ||
110 | *fq->pprev = fq->next; | ||
111 | list_del(&fq->lru_list); | ||
112 | nf_ct_frag6_nqueues--; | ||
113 | } | ||
114 | |||
115 | static __inline__ void fq_unlink(struct nf_ct_frag6_queue *fq) | ||
116 | { | ||
117 | write_lock(&nf_ct_frag6_lock); | ||
118 | __fq_unlink(fq); | ||
119 | write_unlock(&nf_ct_frag6_lock); | ||
120 | } | ||
121 | |||
122 | static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr, | ||
123 | struct in6_addr *daddr) | ||
124 | { | ||
125 | u32 a, b, c; | ||
126 | |||
127 | a = saddr->s6_addr32[0]; | ||
128 | b = saddr->s6_addr32[1]; | ||
129 | c = saddr->s6_addr32[2]; | ||
130 | |||
131 | a += JHASH_GOLDEN_RATIO; | ||
132 | b += JHASH_GOLDEN_RATIO; | ||
133 | c += nf_ct_frag6_hash_rnd; | ||
134 | __jhash_mix(a, b, c); | ||
135 | |||
136 | a += saddr->s6_addr32[3]; | ||
137 | b += daddr->s6_addr32[0]; | ||
138 | c += daddr->s6_addr32[1]; | ||
139 | __jhash_mix(a, b, c); | ||
140 | |||
141 | a += daddr->s6_addr32[2]; | ||
142 | b += daddr->s6_addr32[3]; | ||
143 | c += id; | ||
144 | __jhash_mix(a, b, c); | ||
145 | |||
146 | return c & (FRAG6Q_HASHSZ - 1); | ||
147 | } | ||
148 | |||
149 | static struct timer_list nf_ct_frag6_secret_timer; | ||
150 | int nf_ct_frag6_secret_interval = 10 * 60 * HZ; | ||
151 | |||
152 | static void nf_ct_frag6_secret_rebuild(unsigned long dummy) | ||
153 | { | ||
154 | unsigned long now = jiffies; | ||
155 | int i; | ||
156 | |||
157 | write_lock(&nf_ct_frag6_lock); | ||
158 | get_random_bytes(&nf_ct_frag6_hash_rnd, sizeof(u32)); | ||
159 | for (i = 0; i < FRAG6Q_HASHSZ; i++) { | ||
160 | struct nf_ct_frag6_queue *q; | ||
161 | |||
162 | q = nf_ct_frag6_hash[i]; | ||
163 | while (q) { | ||
164 | struct nf_ct_frag6_queue *next = q->next; | ||
165 | unsigned int hval = ip6qhashfn(q->id, | ||
166 | &q->saddr, | ||
167 | &q->daddr); | ||
168 | |||
169 | if (hval != i) { | ||
170 | /* Unlink. */ | ||
171 | if (q->next) | ||
172 | q->next->pprev = q->pprev; | ||
173 | *q->pprev = q->next; | ||
174 | |||
175 | /* Relink to new hash chain. */ | ||
176 | if ((q->next = nf_ct_frag6_hash[hval]) != NULL) | ||
177 | q->next->pprev = &q->next; | ||
178 | nf_ct_frag6_hash[hval] = q; | ||
179 | q->pprev = &nf_ct_frag6_hash[hval]; | ||
180 | } | ||
181 | |||
182 | q = next; | ||
183 | } | ||
184 | } | ||
185 | write_unlock(&nf_ct_frag6_lock); | ||
186 | |||
187 | mod_timer(&nf_ct_frag6_secret_timer, now + nf_ct_frag6_secret_interval); | ||
188 | } | ||
189 | |||
190 | atomic_t nf_ct_frag6_mem = ATOMIC_INIT(0); | ||
191 | |||
192 | /* Memory Tracking Functions. */ | ||
193 | static inline void frag_kfree_skb(struct sk_buff *skb) | ||
194 | { | ||
195 | atomic_sub(skb->truesize, &nf_ct_frag6_mem); | ||
196 | if (NFCT_FRAG6_CB(skb)->orig) | ||
197 | kfree_skb(NFCT_FRAG6_CB(skb)->orig); | ||
198 | |||
199 | kfree_skb(skb); | ||
200 | } | ||
201 | |||
202 | static inline void frag_free_queue(struct nf_ct_frag6_queue *fq) | ||
203 | { | ||
204 | atomic_sub(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem); | ||
205 | kfree(fq); | ||
206 | } | ||
207 | |||
208 | static inline struct nf_ct_frag6_queue *frag_alloc_queue(void) | ||
209 | { | ||
210 | struct nf_ct_frag6_queue *fq = kmalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC); | ||
211 | |||
212 | if (!fq) | ||
213 | return NULL; | ||
214 | atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem); | ||
215 | return fq; | ||
216 | } | ||
217 | |||
218 | /* Destruction primitives. */ | ||
219 | |||
220 | /* Complete destruction of fq. */ | ||
221 | static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq) | ||
222 | { | ||
223 | struct sk_buff *fp; | ||
224 | |||
225 | BUG_TRAP(fq->last_in&COMPLETE); | ||
226 | BUG_TRAP(del_timer(&fq->timer) == 0); | ||
227 | |||
228 | /* Release all fragment data. */ | ||
229 | fp = fq->fragments; | ||
230 | while (fp) { | ||
231 | struct sk_buff *xp = fp->next; | ||
232 | |||
233 | frag_kfree_skb(fp); | ||
234 | fp = xp; | ||
235 | } | ||
236 | |||
237 | frag_free_queue(fq); | ||
238 | } | ||
239 | |||
240 | static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) | ||
241 | { | ||
242 | if (atomic_dec_and_test(&fq->refcnt)) | ||
243 | nf_ct_frag6_destroy(fq); | ||
244 | } | ||
245 | |||
246 | /* Kill fq entry. It is not destroyed immediately, | ||
247 | * because caller (and someone more) holds reference count. | ||
248 | */ | ||
249 | static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq) | ||
250 | { | ||
251 | if (del_timer(&fq->timer)) | ||
252 | atomic_dec(&fq->refcnt); | ||
253 | |||
254 | if (!(fq->last_in & COMPLETE)) { | ||
255 | fq_unlink(fq); | ||
256 | atomic_dec(&fq->refcnt); | ||
257 | fq->last_in |= COMPLETE; | ||
258 | } | ||
259 | } | ||
260 | |||
261 | static void nf_ct_frag6_evictor(void) | ||
262 | { | ||
263 | struct nf_ct_frag6_queue *fq; | ||
264 | struct list_head *tmp; | ||
265 | |||
266 | for (;;) { | ||
267 | if (atomic_read(&nf_ct_frag6_mem) <= nf_ct_frag6_low_thresh) | ||
268 | return; | ||
269 | read_lock(&nf_ct_frag6_lock); | ||
270 | if (list_empty(&nf_ct_frag6_lru_list)) { | ||
271 | read_unlock(&nf_ct_frag6_lock); | ||
272 | return; | ||
273 | } | ||
274 | tmp = nf_ct_frag6_lru_list.next; | ||
275 | fq = list_entry(tmp, struct nf_ct_frag6_queue, lru_list); | ||
276 | atomic_inc(&fq->refcnt); | ||
277 | read_unlock(&nf_ct_frag6_lock); | ||
278 | |||
279 | spin_lock(&fq->lock); | ||
280 | if (!(fq->last_in&COMPLETE)) | ||
281 | fq_kill(fq); | ||
282 | spin_unlock(&fq->lock); | ||
283 | |||
284 | fq_put(fq); | ||
285 | } | ||
286 | } | ||
287 | |||
288 | static void nf_ct_frag6_expire(unsigned long data) | ||
289 | { | ||
290 | struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data; | ||
291 | |||
292 | spin_lock(&fq->lock); | ||
293 | |||
294 | if (fq->last_in & COMPLETE) | ||
295 | goto out; | ||
296 | |||
297 | fq_kill(fq); | ||
298 | |||
299 | out: | ||
300 | spin_unlock(&fq->lock); | ||
301 | fq_put(fq); | ||
302 | } | ||
303 | |||
304 | /* Creation primitives. */ | ||
305 | |||
306 | |||
307 | static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash, | ||
308 | struct nf_ct_frag6_queue *fq_in) | ||
309 | { | ||
310 | struct nf_ct_frag6_queue *fq; | ||
311 | |||
312 | write_lock(&nf_ct_frag6_lock); | ||
313 | #ifdef CONFIG_SMP | ||
314 | for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) { | ||
315 | if (fq->id == fq_in->id && | ||
316 | !ipv6_addr_cmp(&fq_in->saddr, &fq->saddr) && | ||
317 | !ipv6_addr_cmp(&fq_in->daddr, &fq->daddr)) { | ||
318 | atomic_inc(&fq->refcnt); | ||
319 | write_unlock(&nf_ct_frag6_lock); | ||
320 | fq_in->last_in |= COMPLETE; | ||
321 | fq_put(fq_in); | ||
322 | return fq; | ||
323 | } | ||
324 | } | ||
325 | #endif | ||
326 | fq = fq_in; | ||
327 | |||
328 | if (!mod_timer(&fq->timer, jiffies + nf_ct_frag6_timeout)) | ||
329 | atomic_inc(&fq->refcnt); | ||
330 | |||
331 | atomic_inc(&fq->refcnt); | ||
332 | if ((fq->next = nf_ct_frag6_hash[hash]) != NULL) | ||
333 | fq->next->pprev = &fq->next; | ||
334 | nf_ct_frag6_hash[hash] = fq; | ||
335 | fq->pprev = &nf_ct_frag6_hash[hash]; | ||
336 | INIT_LIST_HEAD(&fq->lru_list); | ||
337 | list_add_tail(&fq->lru_list, &nf_ct_frag6_lru_list); | ||
338 | nf_ct_frag6_nqueues++; | ||
339 | write_unlock(&nf_ct_frag6_lock); | ||
340 | return fq; | ||
341 | } | ||
342 | |||
343 | |||
344 | static struct nf_ct_frag6_queue * | ||
345 | nf_ct_frag6_create(unsigned int hash, u32 id, struct in6_addr *src, struct in6_addr *dst) | ||
346 | { | ||
347 | struct nf_ct_frag6_queue *fq; | ||
348 | |||
349 | if ((fq = frag_alloc_queue()) == NULL) { | ||
350 | DEBUGP("Can't alloc new queue\n"); | ||
351 | goto oom; | ||
352 | } | ||
353 | |||
354 | memset(fq, 0, sizeof(struct nf_ct_frag6_queue)); | ||
355 | |||
356 | fq->id = id; | ||
357 | ipv6_addr_copy(&fq->saddr, src); | ||
358 | ipv6_addr_copy(&fq->daddr, dst); | ||
359 | |||
360 | init_timer(&fq->timer); | ||
361 | fq->timer.function = nf_ct_frag6_expire; | ||
362 | fq->timer.data = (long) fq; | ||
363 | fq->lock = SPIN_LOCK_UNLOCKED; | ||
364 | atomic_set(&fq->refcnt, 1); | ||
365 | |||
366 | return nf_ct_frag6_intern(hash, fq); | ||
367 | |||
368 | oom: | ||
369 | return NULL; | ||
370 | } | ||
371 | |||
372 | static __inline__ struct nf_ct_frag6_queue * | ||
373 | fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst) | ||
374 | { | ||
375 | struct nf_ct_frag6_queue *fq; | ||
376 | unsigned int hash = ip6qhashfn(id, src, dst); | ||
377 | |||
378 | read_lock(&nf_ct_frag6_lock); | ||
379 | for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) { | ||
380 | if (fq->id == id && | ||
381 | !ipv6_addr_cmp(src, &fq->saddr) && | ||
382 | !ipv6_addr_cmp(dst, &fq->daddr)) { | ||
383 | atomic_inc(&fq->refcnt); | ||
384 | read_unlock(&nf_ct_frag6_lock); | ||
385 | return fq; | ||
386 | } | ||
387 | } | ||
388 | read_unlock(&nf_ct_frag6_lock); | ||
389 | |||
390 | return nf_ct_frag6_create(hash, id, src, dst); | ||
391 | } | ||
392 | |||
393 | |||
394 | static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, | ||
395 | struct frag_hdr *fhdr, int nhoff) | ||
396 | { | ||
397 | struct sk_buff *prev, *next; | ||
398 | int offset, end; | ||
399 | |||
400 | if (fq->last_in & COMPLETE) { | ||
401 | DEBUGP("Allready completed\n"); | ||
402 | goto err; | ||
403 | } | ||
404 | |||
405 | offset = ntohs(fhdr->frag_off) & ~0x7; | ||
406 | end = offset + (ntohs(skb->nh.ipv6h->payload_len) - | ||
407 | ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1))); | ||
408 | |||
409 | if ((unsigned int)end > IPV6_MAXPLEN) { | ||
410 | DEBUGP("offset is too large.\n"); | ||
411 | return -1; | ||
412 | } | ||
413 | |||
414 | if (skb->ip_summed == CHECKSUM_HW) | ||
415 | skb->csum = csum_sub(skb->csum, | ||
416 | csum_partial(skb->nh.raw, | ||
417 | (u8*)(fhdr + 1) - skb->nh.raw, | ||
418 | 0)); | ||
419 | |||
420 | /* Is this the final fragment? */ | ||
421 | if (!(fhdr->frag_off & htons(IP6_MF))) { | ||
422 | /* If we already have some bits beyond end | ||
423 | * or have different end, the segment is corrupted. | ||
424 | */ | ||
425 | if (end < fq->len || | ||
426 | ((fq->last_in & LAST_IN) && end != fq->len)) { | ||
427 | DEBUGP("already received last fragment\n"); | ||
428 | goto err; | ||
429 | } | ||
430 | fq->last_in |= LAST_IN; | ||
431 | fq->len = end; | ||
432 | } else { | ||
433 | /* Check if the fragment is rounded to 8 bytes. | ||
434 | * Required by the RFC. | ||
435 | */ | ||
436 | if (end & 0x7) { | ||
437 | /* RFC2460 says always send parameter problem in | ||
438 | * this case. -DaveM | ||
439 | */ | ||
440 | DEBUGP("the end of this fragment is not rounded to 8 bytes.\n"); | ||
441 | return -1; | ||
442 | } | ||
443 | if (end > fq->len) { | ||
444 | /* Some bits beyond end -> corruption. */ | ||
445 | if (fq->last_in & LAST_IN) { | ||
446 | DEBUGP("last packet already reached.\n"); | ||
447 | goto err; | ||
448 | } | ||
449 | fq->len = end; | ||
450 | } | ||
451 | } | ||
452 | |||
453 | if (end == offset) | ||
454 | goto err; | ||
455 | |||
456 | /* Point into the IP datagram 'data' part. */ | ||
457 | if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) { | ||
458 | DEBUGP("queue: message is too short.\n"); | ||
459 | goto err; | ||
460 | } | ||
461 | if (end-offset < skb->len) { | ||
462 | if (pskb_trim(skb, end - offset)) { | ||
463 | DEBUGP("Can't trim\n"); | ||
464 | goto err; | ||
465 | } | ||
466 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) | ||
467 | skb->ip_summed = CHECKSUM_NONE; | ||
468 | } | ||
469 | |||
470 | /* Find out which fragments are in front and at the back of us | ||
471 | * in the chain of fragments so far. We must know where to put | ||
472 | * this fragment, right? | ||
473 | */ | ||
474 | prev = NULL; | ||
475 | for (next = fq->fragments; next != NULL; next = next->next) { | ||
476 | if (NFCT_FRAG6_CB(next)->offset >= offset) | ||
477 | break; /* bingo! */ | ||
478 | prev = next; | ||
479 | } | ||
480 | |||
481 | /* We found where to put this one. Check for overlap with | ||
482 | * preceding fragment, and, if needed, align things so that | ||
483 | * any overlaps are eliminated. | ||
484 | */ | ||
485 | if (prev) { | ||
486 | int i = (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset; | ||
487 | |||
488 | if (i > 0) { | ||
489 | offset += i; | ||
490 | if (end <= offset) { | ||
491 | DEBUGP("overlap\n"); | ||
492 | goto err; | ||
493 | } | ||
494 | if (!pskb_pull(skb, i)) { | ||
495 | DEBUGP("Can't pull\n"); | ||
496 | goto err; | ||
497 | } | ||
498 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) | ||
499 | skb->ip_summed = CHECKSUM_NONE; | ||
500 | } | ||
501 | } | ||
502 | |||
503 | /* Look for overlap with succeeding segments. | ||
504 | * If we can merge fragments, do it. | ||
505 | */ | ||
506 | while (next && NFCT_FRAG6_CB(next)->offset < end) { | ||
507 | /* overlap is 'i' bytes */ | ||
508 | int i = end - NFCT_FRAG6_CB(next)->offset; | ||
509 | |||
510 | if (i < next->len) { | ||
511 | /* Eat head of the next overlapped fragment | ||
512 | * and leave the loop. The next ones cannot overlap. | ||
513 | */ | ||
514 | DEBUGP("Eat head of the overlapped parts.: %d", i); | ||
515 | if (!pskb_pull(next, i)) | ||
516 | goto err; | ||
517 | |||
518 | /* next fragment */ | ||
519 | NFCT_FRAG6_CB(next)->offset += i; | ||
520 | fq->meat -= i; | ||
521 | if (next->ip_summed != CHECKSUM_UNNECESSARY) | ||
522 | next->ip_summed = CHECKSUM_NONE; | ||
523 | break; | ||
524 | } else { | ||
525 | struct sk_buff *free_it = next; | ||
526 | |||
527 | /* Old fragmnet is completely overridden with | ||
528 | * new one drop it. | ||
529 | */ | ||
530 | next = next->next; | ||
531 | |||
532 | if (prev) | ||
533 | prev->next = next; | ||
534 | else | ||
535 | fq->fragments = next; | ||
536 | |||
537 | fq->meat -= free_it->len; | ||
538 | frag_kfree_skb(free_it); | ||
539 | } | ||
540 | } | ||
541 | |||
542 | NFCT_FRAG6_CB(skb)->offset = offset; | ||
543 | |||
544 | /* Insert this fragment in the chain of fragments. */ | ||
545 | skb->next = next; | ||
546 | if (prev) | ||
547 | prev->next = skb; | ||
548 | else | ||
549 | fq->fragments = skb; | ||
550 | |||
551 | skb->dev = NULL; | ||
552 | skb_get_timestamp(skb, &fq->stamp); | ||
553 | fq->meat += skb->len; | ||
554 | atomic_add(skb->truesize, &nf_ct_frag6_mem); | ||
555 | |||
556 | /* The first fragment. | ||
557 | * nhoffset is obtained from the first fragment, of course. | ||
558 | */ | ||
559 | if (offset == 0) { | ||
560 | fq->nhoffset = nhoff; | ||
561 | fq->last_in |= FIRST_IN; | ||
562 | } | ||
563 | write_lock(&nf_ct_frag6_lock); | ||
564 | list_move_tail(&fq->lru_list, &nf_ct_frag6_lru_list); | ||
565 | write_unlock(&nf_ct_frag6_lock); | ||
566 | return 0; | ||
567 | |||
568 | err: | ||
569 | return -1; | ||
570 | } | ||
571 | |||
572 | /* | ||
573 | * Check if this packet is complete. | ||
574 | * Returns NULL on failure by any reason, and pointer | ||
575 | * to current nexthdr field in reassembled frame. | ||
576 | * | ||
577 | * It is called with locked fq, and caller must check that | ||
578 | * queue is eligible for reassembly i.e. it is not COMPLETE, | ||
579 | * the last and the first frames arrived and all the bits are here. | ||
580 | */ | ||
581 | static struct sk_buff * | ||
582 | nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) | ||
583 | { | ||
584 | struct sk_buff *fp, *op, *head = fq->fragments; | ||
585 | int payload_len; | ||
586 | |||
587 | fq_kill(fq); | ||
588 | |||
589 | BUG_TRAP(head != NULL); | ||
590 | BUG_TRAP(NFCT_FRAG6_CB(head)->offset == 0); | ||
591 | |||
592 | /* Unfragmented part is taken from the first segment. */ | ||
593 | payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr); | ||
594 | if (payload_len > IPV6_MAXPLEN) { | ||
595 | DEBUGP("payload len is too large.\n"); | ||
596 | goto out_oversize; | ||
597 | } | ||
598 | |||
599 | /* Head of list must not be cloned. */ | ||
600 | if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) { | ||
601 | DEBUGP("skb is cloned but can't expand head"); | ||
602 | goto out_oom; | ||
603 | } | ||
604 | |||
605 | /* If the first fragment is fragmented itself, we split | ||
606 | * it to two chunks: the first with data and paged part | ||
607 | * and the second, holding only fragments. */ | ||
608 | if (skb_shinfo(head)->frag_list) { | ||
609 | struct sk_buff *clone; | ||
610 | int i, plen = 0; | ||
611 | |||
612 | if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) { | ||
613 | DEBUGP("Can't alloc skb\n"); | ||
614 | goto out_oom; | ||
615 | } | ||
616 | clone->next = head->next; | ||
617 | head->next = clone; | ||
618 | skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; | ||
619 | skb_shinfo(head)->frag_list = NULL; | ||
620 | for (i=0; i<skb_shinfo(head)->nr_frags; i++) | ||
621 | plen += skb_shinfo(head)->frags[i].size; | ||
622 | clone->len = clone->data_len = head->data_len - plen; | ||
623 | head->data_len -= clone->len; | ||
624 | head->len -= clone->len; | ||
625 | clone->csum = 0; | ||
626 | clone->ip_summed = head->ip_summed; | ||
627 | |||
628 | NFCT_FRAG6_CB(clone)->orig = NULL; | ||
629 | atomic_add(clone->truesize, &nf_ct_frag6_mem); | ||
630 | } | ||
631 | |||
632 | /* We have to remove fragment header from datagram and to relocate | ||
633 | * header in order to calculate ICV correctly. */ | ||
634 | head->nh.raw[fq->nhoffset] = head->h.raw[0]; | ||
635 | memmove(head->head + sizeof(struct frag_hdr), head->head, | ||
636 | (head->data - head->head) - sizeof(struct frag_hdr)); | ||
637 | head->mac.raw += sizeof(struct frag_hdr); | ||
638 | head->nh.raw += sizeof(struct frag_hdr); | ||
639 | |||
640 | skb_shinfo(head)->frag_list = head->next; | ||
641 | head->h.raw = head->data; | ||
642 | skb_push(head, head->data - head->nh.raw); | ||
643 | atomic_sub(head->truesize, &nf_ct_frag6_mem); | ||
644 | |||
645 | for (fp=head->next; fp; fp = fp->next) { | ||
646 | head->data_len += fp->len; | ||
647 | head->len += fp->len; | ||
648 | if (head->ip_summed != fp->ip_summed) | ||
649 | head->ip_summed = CHECKSUM_NONE; | ||
650 | else if (head->ip_summed == CHECKSUM_HW) | ||
651 | head->csum = csum_add(head->csum, fp->csum); | ||
652 | head->truesize += fp->truesize; | ||
653 | atomic_sub(fp->truesize, &nf_ct_frag6_mem); | ||
654 | } | ||
655 | |||
656 | head->next = NULL; | ||
657 | head->dev = dev; | ||
658 | skb_set_timestamp(head, &fq->stamp); | ||
659 | head->nh.ipv6h->payload_len = htons(payload_len); | ||
660 | |||
661 | /* Yes, and fold redundant checksum back. 8) */ | ||
662 | if (head->ip_summed == CHECKSUM_HW) | ||
663 | head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); | ||
664 | |||
665 | fq->fragments = NULL; | ||
666 | |||
667 | /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */ | ||
668 | fp = skb_shinfo(head)->frag_list; | ||
669 | if (NFCT_FRAG6_CB(fp)->orig == NULL) | ||
670 | /* at above code, head skb is divided into two skbs. */ | ||
671 | fp = fp->next; | ||
672 | |||
673 | op = NFCT_FRAG6_CB(head)->orig; | ||
674 | for (; fp; fp = fp->next) { | ||
675 | struct sk_buff *orig = NFCT_FRAG6_CB(fp)->orig; | ||
676 | |||
677 | op->next = orig; | ||
678 | op = orig; | ||
679 | NFCT_FRAG6_CB(fp)->orig = NULL; | ||
680 | } | ||
681 | |||
682 | return head; | ||
683 | |||
684 | out_oversize: | ||
685 | if (net_ratelimit()) | ||
686 | printk(KERN_DEBUG "nf_ct_frag6_reasm: payload len = %d\n", payload_len); | ||
687 | goto out_fail; | ||
688 | out_oom: | ||
689 | if (net_ratelimit()) | ||
690 | printk(KERN_DEBUG "nf_ct_frag6_reasm: no memory for reassembly\n"); | ||
691 | out_fail: | ||
692 | return NULL; | ||
693 | } | ||
694 | |||
695 | /* | ||
696 | * find the header just before Fragment Header. | ||
697 | * | ||
698 | * if success return 0 and set ... | ||
699 | * (*prevhdrp): the value of "Next Header Field" in the header | ||
700 | * just before Fragment Header. | ||
701 | * (*prevhoff): the offset of "Next Header Field" in the header | ||
702 | * just before Fragment Header. | ||
703 | * (*fhoff) : the offset of Fragment Header. | ||
704 | * | ||
705 | * Based on ipv6_skip_hdr() in net/ipv6/exthdr.c | ||
706 | * | ||
707 | */ | ||
708 | static int | ||
709 | find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) | ||
710 | { | ||
711 | u8 nexthdr = skb->nh.ipv6h->nexthdr; | ||
712 | u8 prev_nhoff = (u8 *)&skb->nh.ipv6h->nexthdr - skb->data; | ||
713 | int start = (u8 *)(skb->nh.ipv6h+1) - skb->data; | ||
714 | int len = skb->len - start; | ||
715 | u8 prevhdr = NEXTHDR_IPV6; | ||
716 | |||
717 | while (nexthdr != NEXTHDR_FRAGMENT) { | ||
718 | struct ipv6_opt_hdr hdr; | ||
719 | int hdrlen; | ||
720 | |||
721 | if (!ipv6_ext_hdr(nexthdr)) { | ||
722 | return -1; | ||
723 | } | ||
724 | if (len < (int)sizeof(struct ipv6_opt_hdr)) { | ||
725 | DEBUGP("too short\n"); | ||
726 | return -1; | ||
727 | } | ||
728 | if (nexthdr == NEXTHDR_NONE) { | ||
729 | DEBUGP("next header is none\n"); | ||
730 | return -1; | ||
731 | } | ||
732 | if (skb_copy_bits(skb, start, &hdr, sizeof(hdr))) | ||
733 | BUG(); | ||
734 | if (nexthdr == NEXTHDR_AUTH) | ||
735 | hdrlen = (hdr.hdrlen+2)<<2; | ||
736 | else | ||
737 | hdrlen = ipv6_optlen(&hdr); | ||
738 | |||
739 | prevhdr = nexthdr; | ||
740 | prev_nhoff = start; | ||
741 | |||
742 | nexthdr = hdr.nexthdr; | ||
743 | len -= hdrlen; | ||
744 | start += hdrlen; | ||
745 | } | ||
746 | |||
747 | if (len < 0) | ||
748 | return -1; | ||
749 | |||
750 | *prevhdrp = prevhdr; | ||
751 | *prevhoff = prev_nhoff; | ||
752 | *fhoff = start; | ||
753 | |||
754 | return 0; | ||
755 | } | ||
756 | |||
757 | struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) | ||
758 | { | ||
759 | struct sk_buff *clone; | ||
760 | struct net_device *dev = skb->dev; | ||
761 | struct frag_hdr *fhdr; | ||
762 | struct nf_ct_frag6_queue *fq; | ||
763 | struct ipv6hdr *hdr; | ||
764 | int fhoff, nhoff; | ||
765 | u8 prevhdr; | ||
766 | struct sk_buff *ret_skb = NULL; | ||
767 | |||
768 | /* Jumbo payload inhibits frag. header */ | ||
769 | if (skb->nh.ipv6h->payload_len == 0) { | ||
770 | DEBUGP("payload len = 0\n"); | ||
771 | return skb; | ||
772 | } | ||
773 | |||
774 | if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0) | ||
775 | return skb; | ||
776 | |||
777 | clone = skb_clone(skb, GFP_ATOMIC); | ||
778 | if (clone == NULL) { | ||
779 | DEBUGP("Can't clone skb\n"); | ||
780 | return skb; | ||
781 | } | ||
782 | |||
783 | NFCT_FRAG6_CB(clone)->orig = skb; | ||
784 | |||
785 | if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) { | ||
786 | DEBUGP("message is too short.\n"); | ||
787 | goto ret_orig; | ||
788 | } | ||
789 | |||
790 | clone->h.raw = clone->data + fhoff; | ||
791 | hdr = clone->nh.ipv6h; | ||
792 | fhdr = (struct frag_hdr *)clone->h.raw; | ||
793 | |||
794 | if (!(fhdr->frag_off & htons(0xFFF9))) { | ||
795 | DEBUGP("Invalid fragment offset\n"); | ||
796 | /* It is not a fragmented frame */ | ||
797 | goto ret_orig; | ||
798 | } | ||
799 | |||
800 | if (atomic_read(&nf_ct_frag6_mem) > nf_ct_frag6_high_thresh) | ||
801 | nf_ct_frag6_evictor(); | ||
802 | |||
803 | fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr); | ||
804 | if (fq == NULL) { | ||
805 | DEBUGP("Can't find and can't create new queue\n"); | ||
806 | goto ret_orig; | ||
807 | } | ||
808 | |||
809 | spin_lock(&fq->lock); | ||
810 | |||
811 | if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { | ||
812 | spin_unlock(&fq->lock); | ||
813 | DEBUGP("Can't insert skb to queue\n"); | ||
814 | fq_put(fq); | ||
815 | goto ret_orig; | ||
816 | } | ||
817 | |||
818 | if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) { | ||
819 | ret_skb = nf_ct_frag6_reasm(fq, dev); | ||
820 | if (ret_skb == NULL) | ||
821 | DEBUGP("Can't reassemble fragmented packets\n"); | ||
822 | } | ||
823 | spin_unlock(&fq->lock); | ||
824 | |||
825 | fq_put(fq); | ||
826 | return ret_skb; | ||
827 | |||
828 | ret_orig: | ||
829 | kfree_skb(clone); | ||
830 | return skb; | ||
831 | } | ||
832 | |||
833 | void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, | ||
834 | struct net_device *in, struct net_device *out, | ||
835 | int (*okfn)(struct sk_buff *)) | ||
836 | { | ||
837 | struct sk_buff *s, *s2; | ||
838 | |||
839 | for (s = NFCT_FRAG6_CB(skb)->orig; s;) { | ||
840 | nf_conntrack_put_reasm(s->nfct_reasm); | ||
841 | nf_conntrack_get_reasm(skb); | ||
842 | s->nfct_reasm = skb; | ||
843 | |||
844 | s2 = s->next; | ||
845 | NF_HOOK_THRESH(PF_INET6, hooknum, s, in, out, okfn, | ||
846 | NF_IP6_PRI_CONNTRACK_DEFRAG + 1); | ||
847 | s = s2; | ||
848 | } | ||
849 | nf_conntrack_put_reasm(skb); | ||
850 | } | ||
851 | |||
852 | int nf_ct_frag6_kfree_frags(struct sk_buff *skb) | ||
853 | { | ||
854 | struct sk_buff *s, *s2; | ||
855 | |||
856 | for (s = NFCT_FRAG6_CB(skb)->orig; s; s = s2) { | ||
857 | |||
858 | s2 = s->next; | ||
859 | kfree_skb(s); | ||
860 | } | ||
861 | |||
862 | kfree_skb(skb); | ||
863 | |||
864 | return 0; | ||
865 | } | ||
866 | |||
867 | int nf_ct_frag6_init(void) | ||
868 | { | ||
869 | nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ | ||
870 | (jiffies ^ (jiffies >> 6))); | ||
871 | |||
872 | init_timer(&nf_ct_frag6_secret_timer); | ||
873 | nf_ct_frag6_secret_timer.function = nf_ct_frag6_secret_rebuild; | ||
874 | nf_ct_frag6_secret_timer.expires = jiffies | ||
875 | + nf_ct_frag6_secret_interval; | ||
876 | add_timer(&nf_ct_frag6_secret_timer); | ||
877 | |||
878 | return 0; | ||
879 | } | ||
880 | |||
881 | void nf_ct_frag6_cleanup(void) | ||
882 | { | ||
883 | del_timer(&nf_ct_frag6_secret_timer); | ||
884 | nf_ct_frag6_evictor(); | ||
885 | } | ||
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a1265a320b11..651c79b41eeb 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c | |||
@@ -174,8 +174,10 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) | |||
174 | struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); | 174 | struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); |
175 | 175 | ||
176 | /* Not releasing hash table! */ | 176 | /* Not releasing hash table! */ |
177 | if (clone) | 177 | if (clone) { |
178 | nf_reset(clone); | ||
178 | rawv6_rcv(sk, clone); | 179 | rawv6_rcv(sk, clone); |
180 | } | ||
179 | } | 181 | } |
180 | sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr, | 182 | sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr, |
181 | IP6CB(skb)->iif); | 183 | IP6CB(skb)->iif); |
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 227e99ed510c..f7f42c3e96cb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c | |||
@@ -1710,7 +1710,7 @@ static void fib6_dump_end(struct netlink_callback *cb) | |||
1710 | static int fib6_dump_done(struct netlink_callback *cb) | 1710 | static int fib6_dump_done(struct netlink_callback *cb) |
1711 | { | 1711 | { |
1712 | fib6_dump_end(cb); | 1712 | fib6_dump_end(cb); |
1713 | return cb->done(cb); | 1713 | return cb->done ? cb->done(cb) : 0; |
1714 | } | 1714 | } |
1715 | 1715 | ||
1716 | int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) | 1716 | int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) |
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 8296b38bf270..a84f9221e5f0 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig | |||
@@ -1,3 +1,6 @@ | |||
1 | menu "Core Netfilter Configuration" | ||
2 | depends on NET && NETFILTER | ||
3 | |||
1 | config NETFILTER_NETLINK | 4 | config NETFILTER_NETLINK |
2 | tristate "Netfilter netlink interface" | 5 | tristate "Netfilter netlink interface" |
3 | help | 6 | help |
@@ -22,3 +25,74 @@ config NETFILTER_NETLINK_LOG | |||
22 | and is also scheduled to replace the old syslog-based ipt_LOG | 25 | and is also scheduled to replace the old syslog-based ipt_LOG |
23 | and ip6t_LOG modules. | 26 | and ip6t_LOG modules. |
24 | 27 | ||
28 | config NF_CONNTRACK | ||
29 | tristate "Layer 3 Independent Connection tracking (EXPERIMENTAL)" | ||
30 | depends on EXPERIMENTAL && IP_NF_CONNTRACK=n | ||
31 | default n | ||
32 | ---help--- | ||
33 | Connection tracking keeps a record of what packets have passed | ||
34 | through your machine, in order to figure out how they are related | ||
35 | into connections. | ||
36 | |||
37 | Layer 3 independent connection tracking is experimental scheme | ||
38 | which generalize ip_conntrack to support other layer 3 protocols. | ||
39 | |||
40 | To compile it as a module, choose M here. If unsure, say N. | ||
41 | |||
42 | config NF_CT_ACCT | ||
43 | bool "Connection tracking flow accounting" | ||
44 | depends on NF_CONNTRACK | ||
45 | help | ||
46 | If this option is enabled, the connection tracking code will | ||
47 | keep per-flow packet and byte counters. | ||
48 | |||
49 | Those counters can be used for flow-based accounting or the | ||
50 | `connbytes' match. | ||
51 | |||
52 | If unsure, say `N'. | ||
53 | |||
54 | config NF_CONNTRACK_MARK | ||
55 | bool 'Connection mark tracking support' | ||
56 | depends on NF_CONNTRACK | ||
57 | help | ||
58 | This option enables support for connection marks, used by the | ||
59 | `CONNMARK' target and `connmark' match. Similar to the mark value | ||
60 | of packets, but this mark value is kept in the conntrack session | ||
61 | instead of the individual packets. | ||
62 | |||
63 | config NF_CONNTRACK_EVENTS | ||
64 | bool "Connection tracking events" | ||
65 | depends on NF_CONNTRACK | ||
66 | help | ||
67 | If this option is enabled, the connection tracking code will | ||
68 | provide a notifier chain that can be used by other kernel code | ||
69 | to get notified aboutchanges in the connection tracking state. | ||
70 | |||
71 | If unsure, say `N'. | ||
72 | |||
73 | config NF_CT_PROTO_SCTP | ||
74 | tristate 'SCTP protocol on new connection tracking support (EXPERIMENTAL)' | ||
75 | depends on EXPERIMENTAL && NF_CONNTRACK | ||
76 | default n | ||
77 | help | ||
78 | With this option enabled, the layer 3 independent connection | ||
79 | tracking code will be able to do state tracking on SCTP connections. | ||
80 | |||
81 | If you want to compile it as a module, say M here and read | ||
82 | Documentation/modules.txt. If unsure, say `N'. | ||
83 | |||
84 | config NF_CONNTRACK_FTP | ||
85 | tristate "FTP support on new connection tracking (EXPERIMENTAL)" | ||
86 | depends on EXPERIMENTAL && NF_CONNTRACK | ||
87 | help | ||
88 | Tracking FTP connections is problematic: special helpers are | ||
89 | required for tracking them, and doing masquerading and other forms | ||
90 | of Network Address Translation on them. | ||
91 | |||
92 | This is FTP support on Layer 3 independent connection tracking. | ||
93 | Layer 3 independent connection tracking is experimental scheme | ||
94 | which generalize ip_conntrack to support other layer 3 protocols. | ||
95 | |||
96 | To compile it as a module, choose M here. If unsure, say N. | ||
97 | |||
98 | endmenu | ||
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index b3b44f8b415a..55f019ad2c08 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile | |||
@@ -5,3 +5,11 @@ obj-$(CONFIG_NETFILTER) = netfilter.o | |||
5 | obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o | 5 | obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o |
6 | obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o | 6 | obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o |
7 | obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o | 7 | obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o |
8 | |||
9 | nf_conntrack-objs := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o | ||
10 | |||
11 | obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o | ||
12 | obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o | ||
13 | |||
14 | # SCTP protocol connection tracking | ||
15 | obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o | ||
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c new file mode 100644 index 000000000000..9a67c796b385 --- /dev/null +++ b/net/netfilter/nf_conntrack_core.c | |||
@@ -0,0 +1,1538 @@ | |||
1 | /* Connection state tracking for netfilter. This is separated from, | ||
2 | but required by, the NAT layer; it can also be used by an iptables | ||
3 | extension. */ | ||
4 | |||
5 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
6 | * (C) 2002-2005 Netfilter Core Team <coreteam@netfilter.org> | ||
7 | * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License version 2 as | ||
11 | * published by the Free Software Foundation. | ||
12 | * | ||
13 | * 23 Apr 2001: Harald Welte <laforge@gnumonks.org> | ||
14 | * - new API and handling of conntrack/nat helpers | ||
15 | * - now capable of multiple expectations for one master | ||
16 | * 16 Jul 2002: Harald Welte <laforge@gnumonks.org> | ||
17 | * - add usage/reference counts to ip_conntrack_expect | ||
18 | * - export ip_conntrack[_expect]_{find_get,put} functions | ||
19 | * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
20 | * - generalize L3 protocol denendent part. | ||
21 | * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
22 | * - add support various size of conntrack structures. | ||
23 | * | ||
24 | * Derived from net/ipv4/netfilter/ip_conntrack_core.c | ||
25 | */ | ||
26 | |||
27 | #include <linux/config.h> | ||
28 | #include <linux/types.h> | ||
29 | #include <linux/netfilter.h> | ||
30 | #include <linux/module.h> | ||
31 | #include <linux/skbuff.h> | ||
32 | #include <linux/proc_fs.h> | ||
33 | #include <linux/vmalloc.h> | ||
34 | #include <linux/stddef.h> | ||
35 | #include <linux/slab.h> | ||
36 | #include <linux/random.h> | ||
37 | #include <linux/jhash.h> | ||
38 | #include <linux/err.h> | ||
39 | #include <linux/percpu.h> | ||
40 | #include <linux/moduleparam.h> | ||
41 | #include <linux/notifier.h> | ||
42 | #include <linux/kernel.h> | ||
43 | #include <linux/netdevice.h> | ||
44 | #include <linux/socket.h> | ||
45 | |||
46 | /* This rwlock protects the main hash table, protocol/helper/expected | ||
47 | registrations, conntrack timers*/ | ||
48 | #define ASSERT_READ_LOCK(x) | ||
49 | #define ASSERT_WRITE_LOCK(x) | ||
50 | |||
51 | #include <net/netfilter/nf_conntrack.h> | ||
52 | #include <net/netfilter/nf_conntrack_l3proto.h> | ||
53 | #include <net/netfilter/nf_conntrack_protocol.h> | ||
54 | #include <net/netfilter/nf_conntrack_helper.h> | ||
55 | #include <net/netfilter/nf_conntrack_core.h> | ||
56 | #include <linux/netfilter_ipv4/listhelp.h> | ||
57 | |||
58 | #define NF_CONNTRACK_VERSION "0.4.1" | ||
59 | |||
60 | #if 0 | ||
61 | #define DEBUGP printk | ||
62 | #else | ||
63 | #define DEBUGP(format, args...) | ||
64 | #endif | ||
65 | |||
66 | DEFINE_RWLOCK(nf_conntrack_lock); | ||
67 | |||
68 | /* nf_conntrack_standalone needs this */ | ||
69 | atomic_t nf_conntrack_count = ATOMIC_INIT(0); | ||
70 | |||
71 | void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL; | ||
72 | LIST_HEAD(nf_conntrack_expect_list); | ||
73 | struct nf_conntrack_protocol **nf_ct_protos[PF_MAX]; | ||
74 | struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX]; | ||
75 | static LIST_HEAD(helpers); | ||
76 | unsigned int nf_conntrack_htable_size = 0; | ||
77 | int nf_conntrack_max; | ||
78 | struct list_head *nf_conntrack_hash; | ||
79 | static kmem_cache_t *nf_conntrack_expect_cachep; | ||
80 | struct nf_conn nf_conntrack_untracked; | ||
81 | unsigned int nf_ct_log_invalid; | ||
82 | static LIST_HEAD(unconfirmed); | ||
83 | static int nf_conntrack_vmalloc; | ||
84 | |||
85 | #ifdef CONFIG_NF_CONNTRACK_EVENTS | ||
86 | struct notifier_block *nf_conntrack_chain; | ||
87 | struct notifier_block *nf_conntrack_expect_chain; | ||
88 | |||
89 | DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache); | ||
90 | |||
91 | /* deliver cached events and clear cache entry - must be called with locally | ||
92 | * disabled softirqs */ | ||
93 | static inline void | ||
94 | __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache) | ||
95 | { | ||
96 | DEBUGP("ecache: delivering events for %p\n", ecache->ct); | ||
97 | if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct) | ||
98 | && ecache->events) | ||
99 | notifier_call_chain(&nf_conntrack_chain, ecache->events, | ||
100 | ecache->ct); | ||
101 | |||
102 | ecache->events = 0; | ||
103 | nf_ct_put(ecache->ct); | ||
104 | ecache->ct = NULL; | ||
105 | } | ||
106 | |||
107 | /* Deliver all cached events for a particular conntrack. This is called | ||
108 | * by code prior to async packet handling for freeing the skb */ | ||
109 | void nf_ct_deliver_cached_events(const struct nf_conn *ct) | ||
110 | { | ||
111 | struct nf_conntrack_ecache *ecache; | ||
112 | |||
113 | local_bh_disable(); | ||
114 | ecache = &__get_cpu_var(nf_conntrack_ecache); | ||
115 | if (ecache->ct == ct) | ||
116 | __nf_ct_deliver_cached_events(ecache); | ||
117 | local_bh_enable(); | ||
118 | } | ||
119 | |||
120 | /* Deliver cached events for old pending events, if current conntrack != old */ | ||
121 | void __nf_ct_event_cache_init(struct nf_conn *ct) | ||
122 | { | ||
123 | struct nf_conntrack_ecache *ecache; | ||
124 | |||
125 | /* take care of delivering potentially old events */ | ||
126 | ecache = &__get_cpu_var(nf_conntrack_ecache); | ||
127 | BUG_ON(ecache->ct == ct); | ||
128 | if (ecache->ct) | ||
129 | __nf_ct_deliver_cached_events(ecache); | ||
130 | /* initialize for this conntrack/packet */ | ||
131 | ecache->ct = ct; | ||
132 | nf_conntrack_get(&ct->ct_general); | ||
133 | } | ||
134 | |||
135 | /* flush the event cache - touches other CPU's data and must not be called | ||
136 | * while packets are still passing through the code */ | ||
137 | static void nf_ct_event_cache_flush(void) | ||
138 | { | ||
139 | struct nf_conntrack_ecache *ecache; | ||
140 | int cpu; | ||
141 | |||
142 | for_each_cpu(cpu) { | ||
143 | ecache = &per_cpu(nf_conntrack_ecache, cpu); | ||
144 | if (ecache->ct) | ||
145 | nf_ct_put(ecache->ct); | ||
146 | } | ||
147 | } | ||
148 | #else | ||
149 | static inline void nf_ct_event_cache_flush(void) {} | ||
150 | #endif /* CONFIG_NF_CONNTRACK_EVENTS */ | ||
151 | |||
152 | DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); | ||
153 | EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat); | ||
154 | |||
155 | /* | ||
156 | * This scheme offers various size of "struct nf_conn" dependent on | ||
157 | * features(helper, nat, ...) | ||
158 | */ | ||
159 | |||
160 | #define NF_CT_FEATURES_NAMELEN 256 | ||
161 | static struct { | ||
162 | /* name of slab cache. printed in /proc/slabinfo */ | ||
163 | char *name; | ||
164 | |||
165 | /* size of slab cache */ | ||
166 | size_t size; | ||
167 | |||
168 | /* slab cache pointer */ | ||
169 | kmem_cache_t *cachep; | ||
170 | |||
171 | /* allocated slab cache + modules which uses this slab cache */ | ||
172 | int use; | ||
173 | |||
174 | /* Initialization */ | ||
175 | int (*init_conntrack)(struct nf_conn *, u_int32_t); | ||
176 | |||
177 | } nf_ct_cache[NF_CT_F_NUM]; | ||
178 | |||
179 | /* protect members of nf_ct_cache except of "use" */ | ||
180 | DEFINE_RWLOCK(nf_ct_cache_lock); | ||
181 | |||
182 | /* This avoids calling kmem_cache_create() with same name simultaneously */ | ||
183 | DECLARE_MUTEX(nf_ct_cache_mutex); | ||
184 | |||
185 | extern struct nf_conntrack_protocol nf_conntrack_generic_protocol; | ||
186 | struct nf_conntrack_protocol * | ||
187 | nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol) | ||
188 | { | ||
189 | if (unlikely(nf_ct_protos[l3proto] == NULL)) | ||
190 | return &nf_conntrack_generic_protocol; | ||
191 | |||
192 | return nf_ct_protos[l3proto][protocol]; | ||
193 | } | ||
194 | |||
195 | static int nf_conntrack_hash_rnd_initted; | ||
196 | static unsigned int nf_conntrack_hash_rnd; | ||
197 | |||
198 | static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, | ||
199 | unsigned int size, unsigned int rnd) | ||
200 | { | ||
201 | unsigned int a, b; | ||
202 | a = jhash((void *)tuple->src.u3.all, sizeof(tuple->src.u3.all), | ||
203 | ((tuple->src.l3num) << 16) | tuple->dst.protonum); | ||
204 | b = jhash((void *)tuple->dst.u3.all, sizeof(tuple->dst.u3.all), | ||
205 | (tuple->src.u.all << 16) | tuple->dst.u.all); | ||
206 | |||
207 | return jhash_2words(a, b, rnd) % size; | ||
208 | } | ||
209 | |||
210 | static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple) | ||
211 | { | ||
212 | return __hash_conntrack(tuple, nf_conntrack_htable_size, | ||
213 | nf_conntrack_hash_rnd); | ||
214 | } | ||
215 | |||
216 | /* Initialize "struct nf_conn" which has spaces for helper */ | ||
217 | static int | ||
218 | init_conntrack_for_helper(struct nf_conn *conntrack, u_int32_t features) | ||
219 | { | ||
220 | |||
221 | conntrack->help = (union nf_conntrack_help *) | ||
222 | (((unsigned long)conntrack->data | ||
223 | + (__alignof__(union nf_conntrack_help) - 1)) | ||
224 | & (~((unsigned long)(__alignof__(union nf_conntrack_help) -1)))); | ||
225 | return 0; | ||
226 | } | ||
227 | |||
228 | int nf_conntrack_register_cache(u_int32_t features, const char *name, | ||
229 | size_t size, | ||
230 | int (*init)(struct nf_conn *, u_int32_t)) | ||
231 | { | ||
232 | int ret = 0; | ||
233 | char *cache_name; | ||
234 | kmem_cache_t *cachep; | ||
235 | |||
236 | DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n", | ||
237 | features, name, size); | ||
238 | |||
239 | if (features < NF_CT_F_BASIC || features >= NF_CT_F_NUM) { | ||
240 | DEBUGP("nf_conntrack_register_cache: invalid features.: 0x%x\n", | ||
241 | features); | ||
242 | return -EINVAL; | ||
243 | } | ||
244 | |||
245 | down(&nf_ct_cache_mutex); | ||
246 | |||
247 | write_lock_bh(&nf_ct_cache_lock); | ||
248 | /* e.g: multiple helpers are loaded */ | ||
249 | if (nf_ct_cache[features].use > 0) { | ||
250 | DEBUGP("nf_conntrack_register_cache: already resisterd.\n"); | ||
251 | if ((!strncmp(nf_ct_cache[features].name, name, | ||
252 | NF_CT_FEATURES_NAMELEN)) | ||
253 | && nf_ct_cache[features].size == size | ||
254 | && nf_ct_cache[features].init_conntrack == init) { | ||
255 | DEBUGP("nf_conntrack_register_cache: reusing.\n"); | ||
256 | nf_ct_cache[features].use++; | ||
257 | ret = 0; | ||
258 | } else | ||
259 | ret = -EBUSY; | ||
260 | |||
261 | write_unlock_bh(&nf_ct_cache_lock); | ||
262 | up(&nf_ct_cache_mutex); | ||
263 | return ret; | ||
264 | } | ||
265 | write_unlock_bh(&nf_ct_cache_lock); | ||
266 | |||
267 | /* | ||
268 | * The memory space for name of slab cache must be alive until | ||
269 | * cache is destroyed. | ||
270 | */ | ||
271 | cache_name = kmalloc(sizeof(char)*NF_CT_FEATURES_NAMELEN, GFP_ATOMIC); | ||
272 | if (cache_name == NULL) { | ||
273 | DEBUGP("nf_conntrack_register_cache: can't alloc cache_name\n"); | ||
274 | ret = -ENOMEM; | ||
275 | goto out_up_mutex; | ||
276 | } | ||
277 | |||
278 | if (strlcpy(cache_name, name, NF_CT_FEATURES_NAMELEN) | ||
279 | >= NF_CT_FEATURES_NAMELEN) { | ||
280 | printk("nf_conntrack_register_cache: name too long\n"); | ||
281 | ret = -EINVAL; | ||
282 | goto out_free_name; | ||
283 | } | ||
284 | |||
285 | cachep = kmem_cache_create(cache_name, size, 0, 0, | ||
286 | NULL, NULL); | ||
287 | if (!cachep) { | ||
288 | printk("nf_conntrack_register_cache: Can't create slab cache " | ||
289 | "for the features = 0x%x\n", features); | ||
290 | ret = -ENOMEM; | ||
291 | goto out_free_name; | ||
292 | } | ||
293 | |||
294 | write_lock_bh(&nf_ct_cache_lock); | ||
295 | nf_ct_cache[features].use = 1; | ||
296 | nf_ct_cache[features].size = size; | ||
297 | nf_ct_cache[features].init_conntrack = init; | ||
298 | nf_ct_cache[features].cachep = cachep; | ||
299 | nf_ct_cache[features].name = cache_name; | ||
300 | write_unlock_bh(&nf_ct_cache_lock); | ||
301 | |||
302 | goto out_up_mutex; | ||
303 | |||
304 | out_free_name: | ||
305 | kfree(cache_name); | ||
306 | out_up_mutex: | ||
307 | up(&nf_ct_cache_mutex); | ||
308 | return ret; | ||
309 | } | ||
310 | |||
311 | /* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */ | ||
312 | void nf_conntrack_unregister_cache(u_int32_t features) | ||
313 | { | ||
314 | kmem_cache_t *cachep; | ||
315 | char *name; | ||
316 | |||
317 | /* | ||
318 | * This assures that kmem_cache_create() isn't called before destroying | ||
319 | * slab cache. | ||
320 | */ | ||
321 | DEBUGP("nf_conntrack_unregister_cache: 0x%04x\n", features); | ||
322 | down(&nf_ct_cache_mutex); | ||
323 | |||
324 | write_lock_bh(&nf_ct_cache_lock); | ||
325 | if (--nf_ct_cache[features].use > 0) { | ||
326 | write_unlock_bh(&nf_ct_cache_lock); | ||
327 | up(&nf_ct_cache_mutex); | ||
328 | return; | ||
329 | } | ||
330 | cachep = nf_ct_cache[features].cachep; | ||
331 | name = nf_ct_cache[features].name; | ||
332 | nf_ct_cache[features].cachep = NULL; | ||
333 | nf_ct_cache[features].name = NULL; | ||
334 | nf_ct_cache[features].init_conntrack = NULL; | ||
335 | nf_ct_cache[features].size = 0; | ||
336 | write_unlock_bh(&nf_ct_cache_lock); | ||
337 | |||
338 | synchronize_net(); | ||
339 | |||
340 | kmem_cache_destroy(cachep); | ||
341 | kfree(name); | ||
342 | |||
343 | up(&nf_ct_cache_mutex); | ||
344 | } | ||
345 | |||
346 | int | ||
347 | nf_ct_get_tuple(const struct sk_buff *skb, | ||
348 | unsigned int nhoff, | ||
349 | unsigned int dataoff, | ||
350 | u_int16_t l3num, | ||
351 | u_int8_t protonum, | ||
352 | struct nf_conntrack_tuple *tuple, | ||
353 | const struct nf_conntrack_l3proto *l3proto, | ||
354 | const struct nf_conntrack_protocol *protocol) | ||
355 | { | ||
356 | NF_CT_TUPLE_U_BLANK(tuple); | ||
357 | |||
358 | tuple->src.l3num = l3num; | ||
359 | if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0) | ||
360 | return 0; | ||
361 | |||
362 | tuple->dst.protonum = protonum; | ||
363 | tuple->dst.dir = IP_CT_DIR_ORIGINAL; | ||
364 | |||
365 | return protocol->pkt_to_tuple(skb, dataoff, tuple); | ||
366 | } | ||
367 | |||
368 | int | ||
369 | nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, | ||
370 | const struct nf_conntrack_tuple *orig, | ||
371 | const struct nf_conntrack_l3proto *l3proto, | ||
372 | const struct nf_conntrack_protocol *protocol) | ||
373 | { | ||
374 | NF_CT_TUPLE_U_BLANK(inverse); | ||
375 | |||
376 | inverse->src.l3num = orig->src.l3num; | ||
377 | if (l3proto->invert_tuple(inverse, orig) == 0) | ||
378 | return 0; | ||
379 | |||
380 | inverse->dst.dir = !orig->dst.dir; | ||
381 | |||
382 | inverse->dst.protonum = orig->dst.protonum; | ||
383 | return protocol->invert_tuple(inverse, orig); | ||
384 | } | ||
385 | |||
386 | /* nf_conntrack_expect helper functions */ | ||
387 | static void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) | ||
388 | { | ||
389 | ASSERT_WRITE_LOCK(&nf_conntrack_lock); | ||
390 | NF_CT_ASSERT(!timer_pending(&exp_timeout)); | ||
391 | list_del(&exp->list); | ||
392 | NF_CT_STAT_INC(expect_delete); | ||
393 | exp->master->expecting--; | ||
394 | nf_conntrack_expect_put(exp); | ||
395 | } | ||
396 | |||
397 | static void expectation_timed_out(unsigned long ul_expect) | ||
398 | { | ||
399 | struct nf_conntrack_expect *exp = (void *)ul_expect; | ||
400 | |||
401 | write_lock_bh(&nf_conntrack_lock); | ||
402 | nf_ct_unlink_expect(exp); | ||
403 | write_unlock_bh(&nf_conntrack_lock); | ||
404 | nf_conntrack_expect_put(exp); | ||
405 | } | ||
406 | |||
407 | /* If an expectation for this connection is found, it gets delete from | ||
408 | * global list then returned. */ | ||
409 | static struct nf_conntrack_expect * | ||
410 | find_expectation(const struct nf_conntrack_tuple *tuple) | ||
411 | { | ||
412 | struct nf_conntrack_expect *i; | ||
413 | |||
414 | list_for_each_entry(i, &nf_conntrack_expect_list, list) { | ||
415 | /* If master is not in hash table yet (ie. packet hasn't left | ||
416 | this machine yet), how can other end know about expected? | ||
417 | Hence these are not the droids you are looking for (if | ||
418 | master ct never got confirmed, we'd hold a reference to it | ||
419 | and weird things would happen to future packets). */ | ||
420 | if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) | ||
421 | && nf_ct_is_confirmed(i->master)) { | ||
422 | if (i->flags & NF_CT_EXPECT_PERMANENT) { | ||
423 | atomic_inc(&i->use); | ||
424 | return i; | ||
425 | } else if (del_timer(&i->timeout)) { | ||
426 | nf_ct_unlink_expect(i); | ||
427 | return i; | ||
428 | } | ||
429 | } | ||
430 | } | ||
431 | return NULL; | ||
432 | } | ||
433 | |||
434 | /* delete all expectations for this conntrack */ | ||
435 | static void remove_expectations(struct nf_conn *ct) | ||
436 | { | ||
437 | struct nf_conntrack_expect *i, *tmp; | ||
438 | |||
439 | /* Optimization: most connection never expect any others. */ | ||
440 | if (ct->expecting == 0) | ||
441 | return; | ||
442 | |||
443 | list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) { | ||
444 | if (i->master == ct && del_timer(&i->timeout)) { | ||
445 | nf_ct_unlink_expect(i); | ||
446 | nf_conntrack_expect_put(i); | ||
447 | } | ||
448 | } | ||
449 | } | ||
450 | |||
451 | static void | ||
452 | clean_from_lists(struct nf_conn *ct) | ||
453 | { | ||
454 | unsigned int ho, hr; | ||
455 | |||
456 | DEBUGP("clean_from_lists(%p)\n", ct); | ||
457 | ASSERT_WRITE_LOCK(&nf_conntrack_lock); | ||
458 | |||
459 | ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | ||
460 | hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); | ||
461 | LIST_DELETE(&nf_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); | ||
462 | LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); | ||
463 | |||
464 | /* Destroy all pending expectations */ | ||
465 | remove_expectations(ct); | ||
466 | } | ||
467 | |||
468 | static void | ||
469 | destroy_conntrack(struct nf_conntrack *nfct) | ||
470 | { | ||
471 | struct nf_conn *ct = (struct nf_conn *)nfct; | ||
472 | struct nf_conntrack_l3proto *l3proto; | ||
473 | struct nf_conntrack_protocol *proto; | ||
474 | |||
475 | DEBUGP("destroy_conntrack(%p)\n", ct); | ||
476 | NF_CT_ASSERT(atomic_read(&nfct->use) == 0); | ||
477 | NF_CT_ASSERT(!timer_pending(&ct->timeout)); | ||
478 | |||
479 | nf_conntrack_event(IPCT_DESTROY, ct); | ||
480 | set_bit(IPS_DYING_BIT, &ct->status); | ||
481 | |||
482 | /* To make sure we don't get any weird locking issues here: | ||
483 | * destroy_conntrack() MUST NOT be called with a write lock | ||
484 | * to nf_conntrack_lock!!! -HW */ | ||
485 | l3proto = nf_ct_find_l3proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num); | ||
486 | if (l3proto && l3proto->destroy) | ||
487 | l3proto->destroy(ct); | ||
488 | |||
489 | proto = nf_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, | ||
490 | ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); | ||
491 | if (proto && proto->destroy) | ||
492 | proto->destroy(ct); | ||
493 | |||
494 | if (nf_conntrack_destroyed) | ||
495 | nf_conntrack_destroyed(ct); | ||
496 | |||
497 | write_lock_bh(&nf_conntrack_lock); | ||
498 | /* Expectations will have been removed in clean_from_lists, | ||
499 | * except TFTP can create an expectation on the first packet, | ||
500 | * before connection is in the list, so we need to clean here, | ||
501 | * too. */ | ||
502 | remove_expectations(ct); | ||
503 | |||
504 | /* We overload first tuple to link into unconfirmed list. */ | ||
505 | if (!nf_ct_is_confirmed(ct)) { | ||
506 | BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list)); | ||
507 | list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); | ||
508 | } | ||
509 | |||
510 | NF_CT_STAT_INC(delete); | ||
511 | write_unlock_bh(&nf_conntrack_lock); | ||
512 | |||
513 | if (ct->master) | ||
514 | nf_ct_put(ct->master); | ||
515 | |||
516 | DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct); | ||
517 | nf_conntrack_free(ct); | ||
518 | } | ||
519 | |||
520 | static void death_by_timeout(unsigned long ul_conntrack) | ||
521 | { | ||
522 | struct nf_conn *ct = (void *)ul_conntrack; | ||
523 | |||
524 | write_lock_bh(&nf_conntrack_lock); | ||
525 | /* Inside lock so preempt is disabled on module removal path. | ||
526 | * Otherwise we can get spurious warnings. */ | ||
527 | NF_CT_STAT_INC(delete_list); | ||
528 | clean_from_lists(ct); | ||
529 | write_unlock_bh(&nf_conntrack_lock); | ||
530 | nf_ct_put(ct); | ||
531 | } | ||
532 | |||
533 | static inline int | ||
534 | conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i, | ||
535 | const struct nf_conntrack_tuple *tuple, | ||
536 | const struct nf_conn *ignored_conntrack) | ||
537 | { | ||
538 | ASSERT_READ_LOCK(&nf_conntrack_lock); | ||
539 | return nf_ct_tuplehash_to_ctrack(i) != ignored_conntrack | ||
540 | && nf_ct_tuple_equal(tuple, &i->tuple); | ||
541 | } | ||
542 | |||
543 | static struct nf_conntrack_tuple_hash * | ||
544 | __nf_conntrack_find(const struct nf_conntrack_tuple *tuple, | ||
545 | const struct nf_conn *ignored_conntrack) | ||
546 | { | ||
547 | struct nf_conntrack_tuple_hash *h; | ||
548 | unsigned int hash = hash_conntrack(tuple); | ||
549 | |||
550 | ASSERT_READ_LOCK(&nf_conntrack_lock); | ||
551 | list_for_each_entry(h, &nf_conntrack_hash[hash], list) { | ||
552 | if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { | ||
553 | NF_CT_STAT_INC(found); | ||
554 | return h; | ||
555 | } | ||
556 | NF_CT_STAT_INC(searched); | ||
557 | } | ||
558 | |||
559 | return NULL; | ||
560 | } | ||
561 | |||
562 | /* Find a connection corresponding to a tuple. */ | ||
563 | struct nf_conntrack_tuple_hash * | ||
564 | nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple, | ||
565 | const struct nf_conn *ignored_conntrack) | ||
566 | { | ||
567 | struct nf_conntrack_tuple_hash *h; | ||
568 | |||
569 | read_lock_bh(&nf_conntrack_lock); | ||
570 | h = __nf_conntrack_find(tuple, ignored_conntrack); | ||
571 | if (h) | ||
572 | atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); | ||
573 | read_unlock_bh(&nf_conntrack_lock); | ||
574 | |||
575 | return h; | ||
576 | } | ||
577 | |||
578 | /* Confirm a connection given skb; places it in hash table */ | ||
579 | int | ||
580 | __nf_conntrack_confirm(struct sk_buff **pskb) | ||
581 | { | ||
582 | unsigned int hash, repl_hash; | ||
583 | struct nf_conn *ct; | ||
584 | enum ip_conntrack_info ctinfo; | ||
585 | |||
586 | ct = nf_ct_get(*pskb, &ctinfo); | ||
587 | |||
588 | /* ipt_REJECT uses nf_conntrack_attach to attach related | ||
589 | ICMP/TCP RST packets in other direction. Actual packet | ||
590 | which created connection will be IP_CT_NEW or for an | ||
591 | expected connection, IP_CT_RELATED. */ | ||
592 | if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) | ||
593 | return NF_ACCEPT; | ||
594 | |||
595 | hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | ||
596 | repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); | ||
597 | |||
598 | /* We're not in hash table, and we refuse to set up related | ||
599 | connections for unconfirmed conns. But packet copies and | ||
600 | REJECT will give spurious warnings here. */ | ||
601 | /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ | ||
602 | |||
603 | /* No external references means noone else could have | ||
604 | confirmed us. */ | ||
605 | NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); | ||
606 | DEBUGP("Confirming conntrack %p\n", ct); | ||
607 | |||
608 | write_lock_bh(&nf_conntrack_lock); | ||
609 | |||
610 | /* See if there's one in the list already, including reverse: | ||
611 | NAT could have grabbed it without realizing, since we're | ||
612 | not in the hash. If there is, we lost race. */ | ||
613 | if (!LIST_FIND(&nf_conntrack_hash[hash], | ||
614 | conntrack_tuple_cmp, | ||
615 | struct nf_conntrack_tuple_hash *, | ||
616 | &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) | ||
617 | && !LIST_FIND(&nf_conntrack_hash[repl_hash], | ||
618 | conntrack_tuple_cmp, | ||
619 | struct nf_conntrack_tuple_hash *, | ||
620 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { | ||
621 | /* Remove from unconfirmed list */ | ||
622 | list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); | ||
623 | |||
624 | list_prepend(&nf_conntrack_hash[hash], | ||
625 | &ct->tuplehash[IP_CT_DIR_ORIGINAL]); | ||
626 | list_prepend(&nf_conntrack_hash[repl_hash], | ||
627 | &ct->tuplehash[IP_CT_DIR_REPLY]); | ||
628 | /* Timer relative to confirmation time, not original | ||
629 | setting time, otherwise we'd get timer wrap in | ||
630 | weird delay cases. */ | ||
631 | ct->timeout.expires += jiffies; | ||
632 | add_timer(&ct->timeout); | ||
633 | atomic_inc(&ct->ct_general.use); | ||
634 | set_bit(IPS_CONFIRMED_BIT, &ct->status); | ||
635 | NF_CT_STAT_INC(insert); | ||
636 | write_unlock_bh(&nf_conntrack_lock); | ||
637 | if (ct->helper) | ||
638 | nf_conntrack_event_cache(IPCT_HELPER, *pskb); | ||
639 | #ifdef CONFIG_NF_NAT_NEEDED | ||
640 | if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || | ||
641 | test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) | ||
642 | nf_conntrack_event_cache(IPCT_NATINFO, *pskb); | ||
643 | #endif | ||
644 | nf_conntrack_event_cache(master_ct(ct) ? | ||
645 | IPCT_RELATED : IPCT_NEW, *pskb); | ||
646 | return NF_ACCEPT; | ||
647 | } | ||
648 | |||
649 | NF_CT_STAT_INC(insert_failed); | ||
650 | write_unlock_bh(&nf_conntrack_lock); | ||
651 | return NF_DROP; | ||
652 | } | ||
653 | |||
654 | /* Returns true if a connection correspondings to the tuple (required | ||
655 | for NAT). */ | ||
656 | int | ||
657 | nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, | ||
658 | const struct nf_conn *ignored_conntrack) | ||
659 | { | ||
660 | struct nf_conntrack_tuple_hash *h; | ||
661 | |||
662 | read_lock_bh(&nf_conntrack_lock); | ||
663 | h = __nf_conntrack_find(tuple, ignored_conntrack); | ||
664 | read_unlock_bh(&nf_conntrack_lock); | ||
665 | |||
666 | return h != NULL; | ||
667 | } | ||
668 | |||
669 | /* There's a small race here where we may free a just-assured | ||
670 | connection. Too bad: we're in trouble anyway. */ | ||
671 | static inline int unreplied(const struct nf_conntrack_tuple_hash *i) | ||
672 | { | ||
673 | return !(test_bit(IPS_ASSURED_BIT, | ||
674 | &nf_ct_tuplehash_to_ctrack(i)->status)); | ||
675 | } | ||
676 | |||
677 | static int early_drop(struct list_head *chain) | ||
678 | { | ||
679 | /* Traverse backwards: gives us oldest, which is roughly LRU */ | ||
680 | struct nf_conntrack_tuple_hash *h; | ||
681 | struct nf_conn *ct = NULL; | ||
682 | int dropped = 0; | ||
683 | |||
684 | read_lock_bh(&nf_conntrack_lock); | ||
685 | h = LIST_FIND_B(chain, unreplied, struct nf_conntrack_tuple_hash *); | ||
686 | if (h) { | ||
687 | ct = nf_ct_tuplehash_to_ctrack(h); | ||
688 | atomic_inc(&ct->ct_general.use); | ||
689 | } | ||
690 | read_unlock_bh(&nf_conntrack_lock); | ||
691 | |||
692 | if (!ct) | ||
693 | return dropped; | ||
694 | |||
695 | if (del_timer(&ct->timeout)) { | ||
696 | death_by_timeout((unsigned long)ct); | ||
697 | dropped = 1; | ||
698 | NF_CT_STAT_INC(early_drop); | ||
699 | } | ||
700 | nf_ct_put(ct); | ||
701 | return dropped; | ||
702 | } | ||
703 | |||
704 | static inline int helper_cmp(const struct nf_conntrack_helper *i, | ||
705 | const struct nf_conntrack_tuple *rtuple) | ||
706 | { | ||
707 | return nf_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); | ||
708 | } | ||
709 | |||
710 | static struct nf_conntrack_helper * | ||
711 | nf_ct_find_helper(const struct nf_conntrack_tuple *tuple) | ||
712 | { | ||
713 | return LIST_FIND(&helpers, helper_cmp, | ||
714 | struct nf_conntrack_helper *, | ||
715 | tuple); | ||
716 | } | ||
717 | |||
718 | static struct nf_conn * | ||
719 | __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, | ||
720 | const struct nf_conntrack_tuple *repl, | ||
721 | const struct nf_conntrack_l3proto *l3proto) | ||
722 | { | ||
723 | struct nf_conn *conntrack = NULL; | ||
724 | u_int32_t features = 0; | ||
725 | |||
726 | if (!nf_conntrack_hash_rnd_initted) { | ||
727 | get_random_bytes(&nf_conntrack_hash_rnd, 4); | ||
728 | nf_conntrack_hash_rnd_initted = 1; | ||
729 | } | ||
730 | |||
731 | if (nf_conntrack_max | ||
732 | && atomic_read(&nf_conntrack_count) >= nf_conntrack_max) { | ||
733 | unsigned int hash = hash_conntrack(orig); | ||
734 | /* Try dropping from this hash chain. */ | ||
735 | if (!early_drop(&nf_conntrack_hash[hash])) { | ||
736 | if (net_ratelimit()) | ||
737 | printk(KERN_WARNING | ||
738 | "nf_conntrack: table full, dropping" | ||
739 | " packet.\n"); | ||
740 | return ERR_PTR(-ENOMEM); | ||
741 | } | ||
742 | } | ||
743 | |||
744 | /* find features needed by this conntrack. */ | ||
745 | features = l3proto->get_features(orig); | ||
746 | read_lock_bh(&nf_conntrack_lock); | ||
747 | if (nf_ct_find_helper(repl) != NULL) | ||
748 | features |= NF_CT_F_HELP; | ||
749 | read_unlock_bh(&nf_conntrack_lock); | ||
750 | |||
751 | DEBUGP("nf_conntrack_alloc: features=0x%x\n", features); | ||
752 | |||
753 | read_lock_bh(&nf_ct_cache_lock); | ||
754 | |||
755 | if (!nf_ct_cache[features].use) { | ||
756 | DEBUGP("nf_conntrack_alloc: not supported features = 0x%x\n", | ||
757 | features); | ||
758 | goto out; | ||
759 | } | ||
760 | |||
761 | conntrack = kmem_cache_alloc(nf_ct_cache[features].cachep, GFP_ATOMIC); | ||
762 | if (conntrack == NULL) { | ||
763 | DEBUGP("nf_conntrack_alloc: Can't alloc conntrack from cache\n"); | ||
764 | goto out; | ||
765 | } | ||
766 | |||
767 | memset(conntrack, 0, nf_ct_cache[features].size); | ||
768 | conntrack->features = features; | ||
769 | if (nf_ct_cache[features].init_conntrack && | ||
770 | nf_ct_cache[features].init_conntrack(conntrack, features) < 0) { | ||
771 | DEBUGP("nf_conntrack_alloc: failed to init\n"); | ||
772 | kmem_cache_free(nf_ct_cache[features].cachep, conntrack); | ||
773 | conntrack = NULL; | ||
774 | goto out; | ||
775 | } | ||
776 | |||
777 | atomic_set(&conntrack->ct_general.use, 1); | ||
778 | conntrack->ct_general.destroy = destroy_conntrack; | ||
779 | conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; | ||
780 | conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; | ||
781 | /* Don't set timer yet: wait for confirmation */ | ||
782 | init_timer(&conntrack->timeout); | ||
783 | conntrack->timeout.data = (unsigned long)conntrack; | ||
784 | conntrack->timeout.function = death_by_timeout; | ||
785 | |||
786 | atomic_inc(&nf_conntrack_count); | ||
787 | out: | ||
788 | read_unlock_bh(&nf_ct_cache_lock); | ||
789 | return conntrack; | ||
790 | } | ||
791 | |||
792 | struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, | ||
793 | const struct nf_conntrack_tuple *repl) | ||
794 | { | ||
795 | struct nf_conntrack_l3proto *l3proto; | ||
796 | |||
797 | l3proto = nf_ct_find_l3proto(orig->src.l3num); | ||
798 | return __nf_conntrack_alloc(orig, repl, l3proto); | ||
799 | } | ||
800 | |||
801 | void nf_conntrack_free(struct nf_conn *conntrack) | ||
802 | { | ||
803 | u_int32_t features = conntrack->features; | ||
804 | NF_CT_ASSERT(features >= NF_CT_F_BASIC && features < NF_CT_F_NUM); | ||
805 | DEBUGP("nf_conntrack_free: features = 0x%x, conntrack=%p\n", features, | ||
806 | conntrack); | ||
807 | kmem_cache_free(nf_ct_cache[features].cachep, conntrack); | ||
808 | atomic_dec(&nf_conntrack_count); | ||
809 | } | ||
810 | |||
811 | /* Allocate a new conntrack: we return -ENOMEM if classification | ||
812 | failed due to stress. Otherwise it really is unclassifiable. */ | ||
813 | static struct nf_conntrack_tuple_hash * | ||
814 | init_conntrack(const struct nf_conntrack_tuple *tuple, | ||
815 | struct nf_conntrack_l3proto *l3proto, | ||
816 | struct nf_conntrack_protocol *protocol, | ||
817 | struct sk_buff *skb, | ||
818 | unsigned int dataoff) | ||
819 | { | ||
820 | struct nf_conn *conntrack; | ||
821 | struct nf_conntrack_tuple repl_tuple; | ||
822 | struct nf_conntrack_expect *exp; | ||
823 | |||
824 | if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, protocol)) { | ||
825 | DEBUGP("Can't invert tuple.\n"); | ||
826 | return NULL; | ||
827 | } | ||
828 | |||
829 | conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto); | ||
830 | if (conntrack == NULL || IS_ERR(conntrack)) { | ||
831 | DEBUGP("Can't allocate conntrack.\n"); | ||
832 | return (struct nf_conntrack_tuple_hash *)conntrack; | ||
833 | } | ||
834 | |||
835 | if (!protocol->new(conntrack, skb, dataoff)) { | ||
836 | nf_conntrack_free(conntrack); | ||
837 | DEBUGP("init conntrack: can't track with proto module\n"); | ||
838 | return NULL; | ||
839 | } | ||
840 | |||
841 | write_lock_bh(&nf_conntrack_lock); | ||
842 | exp = find_expectation(tuple); | ||
843 | |||
844 | if (exp) { | ||
845 | DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n", | ||
846 | conntrack, exp); | ||
847 | /* Welcome, Mr. Bond. We've been expecting you... */ | ||
848 | __set_bit(IPS_EXPECTED_BIT, &conntrack->status); | ||
849 | conntrack->master = exp->master; | ||
850 | #ifdef CONFIG_NF_CONNTRACK_MARK | ||
851 | conntrack->mark = exp->master->mark; | ||
852 | #endif | ||
853 | nf_conntrack_get(&conntrack->master->ct_general); | ||
854 | NF_CT_STAT_INC(expect_new); | ||
855 | } else { | ||
856 | conntrack->helper = nf_ct_find_helper(&repl_tuple); | ||
857 | |||
858 | NF_CT_STAT_INC(new); | ||
859 | } | ||
860 | |||
861 | /* Overload tuple linked list to put us in unconfirmed list. */ | ||
862 | list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed); | ||
863 | |||
864 | write_unlock_bh(&nf_conntrack_lock); | ||
865 | |||
866 | if (exp) { | ||
867 | if (exp->expectfn) | ||
868 | exp->expectfn(conntrack, exp); | ||
869 | nf_conntrack_expect_put(exp); | ||
870 | } | ||
871 | |||
872 | return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]; | ||
873 | } | ||
874 | |||
875 | /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ | ||
876 | static inline struct nf_conn * | ||
877 | resolve_normal_ct(struct sk_buff *skb, | ||
878 | unsigned int dataoff, | ||
879 | u_int16_t l3num, | ||
880 | u_int8_t protonum, | ||
881 | struct nf_conntrack_l3proto *l3proto, | ||
882 | struct nf_conntrack_protocol *proto, | ||
883 | int *set_reply, | ||
884 | enum ip_conntrack_info *ctinfo) | ||
885 | { | ||
886 | struct nf_conntrack_tuple tuple; | ||
887 | struct nf_conntrack_tuple_hash *h; | ||
888 | struct nf_conn *ct; | ||
889 | |||
890 | if (!nf_ct_get_tuple(skb, (unsigned int)(skb->nh.raw - skb->data), | ||
891 | dataoff, l3num, protonum, &tuple, l3proto, | ||
892 | proto)) { | ||
893 | DEBUGP("resolve_normal_ct: Can't get tuple\n"); | ||
894 | return NULL; | ||
895 | } | ||
896 | |||
897 | /* look for tuple match */ | ||
898 | h = nf_conntrack_find_get(&tuple, NULL); | ||
899 | if (!h) { | ||
900 | h = init_conntrack(&tuple, l3proto, proto, skb, dataoff); | ||
901 | if (!h) | ||
902 | return NULL; | ||
903 | if (IS_ERR(h)) | ||
904 | return (void *)h; | ||
905 | } | ||
906 | ct = nf_ct_tuplehash_to_ctrack(h); | ||
907 | |||
908 | /* It exists; we have (non-exclusive) reference. */ | ||
909 | if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { | ||
910 | *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY; | ||
911 | /* Please set reply bit if this packet OK */ | ||
912 | *set_reply = 1; | ||
913 | } else { | ||
914 | /* Once we've had two way comms, always ESTABLISHED. */ | ||
915 | if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { | ||
916 | DEBUGP("nf_conntrack_in: normal packet for %p\n", ct); | ||
917 | *ctinfo = IP_CT_ESTABLISHED; | ||
918 | } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { | ||
919 | DEBUGP("nf_conntrack_in: related packet for %p\n", ct); | ||
920 | *ctinfo = IP_CT_RELATED; | ||
921 | } else { | ||
922 | DEBUGP("nf_conntrack_in: new packet for %p\n", ct); | ||
923 | *ctinfo = IP_CT_NEW; | ||
924 | } | ||
925 | *set_reply = 0; | ||
926 | } | ||
927 | skb->nfct = &ct->ct_general; | ||
928 | skb->nfctinfo = *ctinfo; | ||
929 | return ct; | ||
930 | } | ||
931 | |||
932 | unsigned int | ||
933 | nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) | ||
934 | { | ||
935 | struct nf_conn *ct; | ||
936 | enum ip_conntrack_info ctinfo; | ||
937 | struct nf_conntrack_l3proto *l3proto; | ||
938 | struct nf_conntrack_protocol *proto; | ||
939 | unsigned int dataoff; | ||
940 | u_int8_t protonum; | ||
941 | int set_reply = 0; | ||
942 | int ret; | ||
943 | |||
944 | /* Previously seen (loopback or untracked)? Ignore. */ | ||
945 | if ((*pskb)->nfct) { | ||
946 | NF_CT_STAT_INC(ignore); | ||
947 | return NF_ACCEPT; | ||
948 | } | ||
949 | |||
950 | l3proto = nf_ct_find_l3proto((u_int16_t)pf); | ||
951 | if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) { | ||
952 | DEBUGP("not prepared to track yet or error occured\n"); | ||
953 | return -ret; | ||
954 | } | ||
955 | |||
956 | proto = nf_ct_find_proto((u_int16_t)pf, protonum); | ||
957 | |||
958 | /* It may be an special packet, error, unclean... | ||
959 | * inverse of the return code tells to the netfilter | ||
960 | * core what to do with the packet. */ | ||
961 | if (proto->error != NULL && | ||
962 | (ret = proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) { | ||
963 | NF_CT_STAT_INC(error); | ||
964 | NF_CT_STAT_INC(invalid); | ||
965 | return -ret; | ||
966 | } | ||
967 | |||
968 | ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, proto, | ||
969 | &set_reply, &ctinfo); | ||
970 | if (!ct) { | ||
971 | /* Not valid part of a connection */ | ||
972 | NF_CT_STAT_INC(invalid); | ||
973 | return NF_ACCEPT; | ||
974 | } | ||
975 | |||
976 | if (IS_ERR(ct)) { | ||
977 | /* Too stressed to deal. */ | ||
978 | NF_CT_STAT_INC(drop); | ||
979 | return NF_DROP; | ||
980 | } | ||
981 | |||
982 | NF_CT_ASSERT((*pskb)->nfct); | ||
983 | |||
984 | ret = proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum); | ||
985 | if (ret < 0) { | ||
986 | /* Invalid: inverse of the return code tells | ||
987 | * the netfilter core what to do */ | ||
988 | DEBUGP("nf_conntrack_in: Can't track with proto module\n"); | ||
989 | nf_conntrack_put((*pskb)->nfct); | ||
990 | (*pskb)->nfct = NULL; | ||
991 | NF_CT_STAT_INC(invalid); | ||
992 | return -ret; | ||
993 | } | ||
994 | |||
995 | if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) | ||
996 | nf_conntrack_event_cache(IPCT_STATUS, *pskb); | ||
997 | |||
998 | return ret; | ||
999 | } | ||
1000 | |||
1001 | int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, | ||
1002 | const struct nf_conntrack_tuple *orig) | ||
1003 | { | ||
1004 | return nf_ct_invert_tuple(inverse, orig, | ||
1005 | nf_ct_find_l3proto(orig->src.l3num), | ||
1006 | nf_ct_find_proto(orig->src.l3num, | ||
1007 | orig->dst.protonum)); | ||
1008 | } | ||
1009 | |||
1010 | /* Would two expected things clash? */ | ||
1011 | static inline int expect_clash(const struct nf_conntrack_expect *a, | ||
1012 | const struct nf_conntrack_expect *b) | ||
1013 | { | ||
1014 | /* Part covered by intersection of masks must be unequal, | ||
1015 | otherwise they clash */ | ||
1016 | struct nf_conntrack_tuple intersect_mask; | ||
1017 | int count; | ||
1018 | |||
1019 | intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num; | ||
1020 | intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all; | ||
1021 | intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all; | ||
1022 | intersect_mask.dst.protonum = a->mask.dst.protonum | ||
1023 | & b->mask.dst.protonum; | ||
1024 | |||
1025 | for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ | ||
1026 | intersect_mask.src.u3.all[count] = | ||
1027 | a->mask.src.u3.all[count] & b->mask.src.u3.all[count]; | ||
1028 | } | ||
1029 | |||
1030 | for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ | ||
1031 | intersect_mask.dst.u3.all[count] = | ||
1032 | a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count]; | ||
1033 | } | ||
1034 | |||
1035 | return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask); | ||
1036 | } | ||
1037 | |||
1038 | static inline int expect_matches(const struct nf_conntrack_expect *a, | ||
1039 | const struct nf_conntrack_expect *b) | ||
1040 | { | ||
1041 | return a->master == b->master | ||
1042 | && nf_ct_tuple_equal(&a->tuple, &b->tuple) | ||
1043 | && nf_ct_tuple_equal(&a->mask, &b->mask); | ||
1044 | } | ||
1045 | |||
1046 | /* Generally a bad idea to call this: could have matched already. */ | ||
1047 | void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp) | ||
1048 | { | ||
1049 | struct nf_conntrack_expect *i; | ||
1050 | |||
1051 | write_lock_bh(&nf_conntrack_lock); | ||
1052 | /* choose the the oldest expectation to evict */ | ||
1053 | list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) { | ||
1054 | if (expect_matches(i, exp) && del_timer(&i->timeout)) { | ||
1055 | nf_ct_unlink_expect(i); | ||
1056 | write_unlock_bh(&nf_conntrack_lock); | ||
1057 | nf_conntrack_expect_put(i); | ||
1058 | return; | ||
1059 | } | ||
1060 | } | ||
1061 | write_unlock_bh(&nf_conntrack_lock); | ||
1062 | } | ||
1063 | |||
1064 | /* We don't increase the master conntrack refcount for non-fulfilled | ||
1065 | * conntracks. During the conntrack destruction, the expectations are | ||
1066 | * always killed before the conntrack itself */ | ||
1067 | struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me) | ||
1068 | { | ||
1069 | struct nf_conntrack_expect *new; | ||
1070 | |||
1071 | new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC); | ||
1072 | if (!new) { | ||
1073 | DEBUGP("expect_related: OOM allocating expect\n"); | ||
1074 | return NULL; | ||
1075 | } | ||
1076 | new->master = me; | ||
1077 | atomic_set(&new->use, 1); | ||
1078 | return new; | ||
1079 | } | ||
1080 | |||
1081 | void nf_conntrack_expect_put(struct nf_conntrack_expect *exp) | ||
1082 | { | ||
1083 | if (atomic_dec_and_test(&exp->use)) | ||
1084 | kmem_cache_free(nf_conntrack_expect_cachep, exp); | ||
1085 | } | ||
1086 | |||
1087 | static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp) | ||
1088 | { | ||
1089 | atomic_inc(&exp->use); | ||
1090 | exp->master->expecting++; | ||
1091 | list_add(&exp->list, &nf_conntrack_expect_list); | ||
1092 | |||
1093 | init_timer(&exp->timeout); | ||
1094 | exp->timeout.data = (unsigned long)exp; | ||
1095 | exp->timeout.function = expectation_timed_out; | ||
1096 | exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ; | ||
1097 | add_timer(&exp->timeout); | ||
1098 | |||
1099 | atomic_inc(&exp->use); | ||
1100 | NF_CT_STAT_INC(expect_create); | ||
1101 | } | ||
1102 | |||
1103 | /* Race with expectations being used means we could have none to find; OK. */ | ||
1104 | static void evict_oldest_expect(struct nf_conn *master) | ||
1105 | { | ||
1106 | struct nf_conntrack_expect *i; | ||
1107 | |||
1108 | list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) { | ||
1109 | if (i->master == master) { | ||
1110 | if (del_timer(&i->timeout)) { | ||
1111 | nf_ct_unlink_expect(i); | ||
1112 | nf_conntrack_expect_put(i); | ||
1113 | } | ||
1114 | break; | ||
1115 | } | ||
1116 | } | ||
1117 | } | ||
1118 | |||
1119 | static inline int refresh_timer(struct nf_conntrack_expect *i) | ||
1120 | { | ||
1121 | if (!del_timer(&i->timeout)) | ||
1122 | return 0; | ||
1123 | |||
1124 | i->timeout.expires = jiffies + i->master->helper->timeout*HZ; | ||
1125 | add_timer(&i->timeout); | ||
1126 | return 1; | ||
1127 | } | ||
1128 | |||
1129 | int nf_conntrack_expect_related(struct nf_conntrack_expect *expect) | ||
1130 | { | ||
1131 | struct nf_conntrack_expect *i; | ||
1132 | int ret; | ||
1133 | |||
1134 | DEBUGP("nf_conntrack_expect_related %p\n", related_to); | ||
1135 | DEBUGP("tuple: "); NF_CT_DUMP_TUPLE(&expect->tuple); | ||
1136 | DEBUGP("mask: "); NF_CT_DUMP_TUPLE(&expect->mask); | ||
1137 | |||
1138 | write_lock_bh(&nf_conntrack_lock); | ||
1139 | list_for_each_entry(i, &nf_conntrack_expect_list, list) { | ||
1140 | if (expect_matches(i, expect)) { | ||
1141 | /* Refresh timer: if it's dying, ignore.. */ | ||
1142 | if (refresh_timer(i)) { | ||
1143 | ret = 0; | ||
1144 | goto out; | ||
1145 | } | ||
1146 | } else if (expect_clash(i, expect)) { | ||
1147 | ret = -EBUSY; | ||
1148 | goto out; | ||
1149 | } | ||
1150 | } | ||
1151 | /* Will be over limit? */ | ||
1152 | if (expect->master->helper->max_expected && | ||
1153 | expect->master->expecting >= expect->master->helper->max_expected) | ||
1154 | evict_oldest_expect(expect->master); | ||
1155 | |||
1156 | nf_conntrack_expect_insert(expect); | ||
1157 | nf_conntrack_expect_event(IPEXP_NEW, expect); | ||
1158 | ret = 0; | ||
1159 | out: | ||
1160 | write_unlock_bh(&nf_conntrack_lock); | ||
1161 | return ret; | ||
1162 | } | ||
1163 | |||
1164 | /* Alter reply tuple (maybe alter helper). This is for NAT, and is | ||
1165 | implicitly racy: see __nf_conntrack_confirm */ | ||
1166 | void nf_conntrack_alter_reply(struct nf_conn *conntrack, | ||
1167 | const struct nf_conntrack_tuple *newreply) | ||
1168 | { | ||
1169 | write_lock_bh(&nf_conntrack_lock); | ||
1170 | /* Should be unconfirmed, so not in hash table yet */ | ||
1171 | NF_CT_ASSERT(!nf_ct_is_confirmed(conntrack)); | ||
1172 | |||
1173 | DEBUGP("Altering reply tuple of %p to ", conntrack); | ||
1174 | NF_CT_DUMP_TUPLE(newreply); | ||
1175 | |||
1176 | conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; | ||
1177 | if (!conntrack->master && conntrack->expecting == 0) | ||
1178 | conntrack->helper = nf_ct_find_helper(newreply); | ||
1179 | write_unlock_bh(&nf_conntrack_lock); | ||
1180 | } | ||
1181 | |||
1182 | int nf_conntrack_helper_register(struct nf_conntrack_helper *me) | ||
1183 | { | ||
1184 | int ret; | ||
1185 | BUG_ON(me->timeout == 0); | ||
1186 | |||
1187 | ret = nf_conntrack_register_cache(NF_CT_F_HELP, "nf_conntrack:help", | ||
1188 | sizeof(struct nf_conn) | ||
1189 | + sizeof(union nf_conntrack_help) | ||
1190 | + __alignof__(union nf_conntrack_help), | ||
1191 | init_conntrack_for_helper); | ||
1192 | if (ret < 0) { | ||
1193 | printk(KERN_ERR "nf_conntrack_helper_reigster: Unable to create slab cache for conntracks\n"); | ||
1194 | return ret; | ||
1195 | } | ||
1196 | write_lock_bh(&nf_conntrack_lock); | ||
1197 | list_prepend(&helpers, me); | ||
1198 | write_unlock_bh(&nf_conntrack_lock); | ||
1199 | |||
1200 | return 0; | ||
1201 | } | ||
1202 | |||
1203 | static inline int unhelp(struct nf_conntrack_tuple_hash *i, | ||
1204 | const struct nf_conntrack_helper *me) | ||
1205 | { | ||
1206 | if (nf_ct_tuplehash_to_ctrack(i)->helper == me) { | ||
1207 | nf_conntrack_event(IPCT_HELPER, nf_ct_tuplehash_to_ctrack(i)); | ||
1208 | nf_ct_tuplehash_to_ctrack(i)->helper = NULL; | ||
1209 | } | ||
1210 | return 0; | ||
1211 | } | ||
1212 | |||
1213 | void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) | ||
1214 | { | ||
1215 | unsigned int i; | ||
1216 | struct nf_conntrack_expect *exp, *tmp; | ||
1217 | |||
1218 | /* Need write lock here, to delete helper. */ | ||
1219 | write_lock_bh(&nf_conntrack_lock); | ||
1220 | LIST_DELETE(&helpers, me); | ||
1221 | |||
1222 | /* Get rid of expectations */ | ||
1223 | list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) { | ||
1224 | if (exp->master->helper == me && del_timer(&exp->timeout)) { | ||
1225 | nf_ct_unlink_expect(exp); | ||
1226 | nf_conntrack_expect_put(exp); | ||
1227 | } | ||
1228 | } | ||
1229 | |||
1230 | /* Get rid of expecteds, set helpers to NULL. */ | ||
1231 | LIST_FIND_W(&unconfirmed, unhelp, struct nf_conntrack_tuple_hash*, me); | ||
1232 | for (i = 0; i < nf_conntrack_htable_size; i++) | ||
1233 | LIST_FIND_W(&nf_conntrack_hash[i], unhelp, | ||
1234 | struct nf_conntrack_tuple_hash *, me); | ||
1235 | write_unlock_bh(&nf_conntrack_lock); | ||
1236 | |||
1237 | /* Someone could be still looking at the helper in a bh. */ | ||
1238 | synchronize_net(); | ||
1239 | } | ||
1240 | |||
1241 | /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ | ||
1242 | void __nf_ct_refresh_acct(struct nf_conn *ct, | ||
1243 | enum ip_conntrack_info ctinfo, | ||
1244 | const struct sk_buff *skb, | ||
1245 | unsigned long extra_jiffies, | ||
1246 | int do_acct) | ||
1247 | { | ||
1248 | int event = 0; | ||
1249 | |||
1250 | NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); | ||
1251 | NF_CT_ASSERT(skb); | ||
1252 | |||
1253 | write_lock_bh(&nf_conntrack_lock); | ||
1254 | |||
1255 | /* If not in hash table, timer will not be active yet */ | ||
1256 | if (!nf_ct_is_confirmed(ct)) { | ||
1257 | ct->timeout.expires = extra_jiffies; | ||
1258 | event = IPCT_REFRESH; | ||
1259 | } else { | ||
1260 | /* Need del_timer for race avoidance (may already be dying). */ | ||
1261 | if (del_timer(&ct->timeout)) { | ||
1262 | ct->timeout.expires = jiffies + extra_jiffies; | ||
1263 | add_timer(&ct->timeout); | ||
1264 | event = IPCT_REFRESH; | ||
1265 | } | ||
1266 | } | ||
1267 | |||
1268 | #ifdef CONFIG_NF_CT_ACCT | ||
1269 | if (do_acct) { | ||
1270 | ct->counters[CTINFO2DIR(ctinfo)].packets++; | ||
1271 | ct->counters[CTINFO2DIR(ctinfo)].bytes += | ||
1272 | skb->len - (unsigned int)(skb->nh.raw - skb->data); | ||
1273 | if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000) | ||
1274 | || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000)) | ||
1275 | event |= IPCT_COUNTER_FILLING; | ||
1276 | } | ||
1277 | #endif | ||
1278 | |||
1279 | write_unlock_bh(&nf_conntrack_lock); | ||
1280 | |||
1281 | /* must be unlocked when calling event cache */ | ||
1282 | if (event) | ||
1283 | nf_conntrack_event_cache(event, skb); | ||
1284 | } | ||
1285 | |||
1286 | /* Used by ipt_REJECT and ip6t_REJECT. */ | ||
1287 | void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) | ||
1288 | { | ||
1289 | struct nf_conn *ct; | ||
1290 | enum ip_conntrack_info ctinfo; | ||
1291 | |||
1292 | /* This ICMP is in reverse direction to the packet which caused it */ | ||
1293 | ct = nf_ct_get(skb, &ctinfo); | ||
1294 | if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) | ||
1295 | ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; | ||
1296 | else | ||
1297 | ctinfo = IP_CT_RELATED; | ||
1298 | |||
1299 | /* Attach to new skbuff, and increment count */ | ||
1300 | nskb->nfct = &ct->ct_general; | ||
1301 | nskb->nfctinfo = ctinfo; | ||
1302 | nf_conntrack_get(nskb->nfct); | ||
1303 | } | ||
1304 | |||
1305 | static inline int | ||
1306 | do_iter(const struct nf_conntrack_tuple_hash *i, | ||
1307 | int (*iter)(struct nf_conn *i, void *data), | ||
1308 | void *data) | ||
1309 | { | ||
1310 | return iter(nf_ct_tuplehash_to_ctrack(i), data); | ||
1311 | } | ||
1312 | |||
1313 | /* Bring out ya dead! */ | ||
1314 | static struct nf_conntrack_tuple_hash * | ||
1315 | get_next_corpse(int (*iter)(struct nf_conn *i, void *data), | ||
1316 | void *data, unsigned int *bucket) | ||
1317 | { | ||
1318 | struct nf_conntrack_tuple_hash *h = NULL; | ||
1319 | |||
1320 | write_lock_bh(&nf_conntrack_lock); | ||
1321 | for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { | ||
1322 | h = LIST_FIND_W(&nf_conntrack_hash[*bucket], do_iter, | ||
1323 | struct nf_conntrack_tuple_hash *, iter, data); | ||
1324 | if (h) | ||
1325 | break; | ||
1326 | } | ||
1327 | if (!h) | ||
1328 | h = LIST_FIND_W(&unconfirmed, do_iter, | ||
1329 | struct nf_conntrack_tuple_hash *, iter, data); | ||
1330 | if (h) | ||
1331 | atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); | ||
1332 | write_unlock_bh(&nf_conntrack_lock); | ||
1333 | |||
1334 | return h; | ||
1335 | } | ||
1336 | |||
1337 | void | ||
1338 | nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data) | ||
1339 | { | ||
1340 | struct nf_conntrack_tuple_hash *h; | ||
1341 | unsigned int bucket = 0; | ||
1342 | |||
1343 | while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { | ||
1344 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); | ||
1345 | /* Time to push up daises... */ | ||
1346 | if (del_timer(&ct->timeout)) | ||
1347 | death_by_timeout((unsigned long)ct); | ||
1348 | /* ... else the timer will get him soon. */ | ||
1349 | |||
1350 | nf_ct_put(ct); | ||
1351 | } | ||
1352 | } | ||
1353 | |||
1354 | static int kill_all(struct nf_conn *i, void *data) | ||
1355 | { | ||
1356 | return 1; | ||
1357 | } | ||
1358 | |||
1359 | static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size) | ||
1360 | { | ||
1361 | if (vmalloced) | ||
1362 | vfree(hash); | ||
1363 | else | ||
1364 | free_pages((unsigned long)hash, | ||
1365 | get_order(sizeof(struct list_head) * size)); | ||
1366 | } | ||
1367 | |||
1368 | /* Mishearing the voices in his head, our hero wonders how he's | ||
1369 | supposed to kill the mall. */ | ||
1370 | void nf_conntrack_cleanup(void) | ||
1371 | { | ||
1372 | int i; | ||
1373 | |||
1374 | /* This makes sure all current packets have passed through | ||
1375 | netfilter framework. Roll on, two-stage module | ||
1376 | delete... */ | ||
1377 | synchronize_net(); | ||
1378 | |||
1379 | nf_ct_event_cache_flush(); | ||
1380 | i_see_dead_people: | ||
1381 | nf_ct_iterate_cleanup(kill_all, NULL); | ||
1382 | if (atomic_read(&nf_conntrack_count) != 0) { | ||
1383 | schedule(); | ||
1384 | goto i_see_dead_people; | ||
1385 | } | ||
1386 | |||
1387 | for (i = 0; i < NF_CT_F_NUM; i++) { | ||
1388 | if (nf_ct_cache[i].use == 0) | ||
1389 | continue; | ||
1390 | |||
1391 | NF_CT_ASSERT(nf_ct_cache[i].use == 1); | ||
1392 | nf_ct_cache[i].use = 1; | ||
1393 | nf_conntrack_unregister_cache(i); | ||
1394 | } | ||
1395 | kmem_cache_destroy(nf_conntrack_expect_cachep); | ||
1396 | free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc, | ||
1397 | nf_conntrack_htable_size); | ||
1398 | } | ||
1399 | |||
1400 | static struct list_head *alloc_hashtable(int size, int *vmalloced) | ||
1401 | { | ||
1402 | struct list_head *hash; | ||
1403 | unsigned int i; | ||
1404 | |||
1405 | *vmalloced = 0; | ||
1406 | hash = (void*)__get_free_pages(GFP_KERNEL, | ||
1407 | get_order(sizeof(struct list_head) | ||
1408 | * size)); | ||
1409 | if (!hash) { | ||
1410 | *vmalloced = 1; | ||
1411 | printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); | ||
1412 | hash = vmalloc(sizeof(struct list_head) * size); | ||
1413 | } | ||
1414 | |||
1415 | if (hash) | ||
1416 | for (i = 0; i < size; i++) | ||
1417 | INIT_LIST_HEAD(&hash[i]); | ||
1418 | |||
1419 | return hash; | ||
1420 | } | ||
1421 | |||
1422 | int set_hashsize(const char *val, struct kernel_param *kp) | ||
1423 | { | ||
1424 | int i, bucket, hashsize, vmalloced; | ||
1425 | int old_vmalloced, old_size; | ||
1426 | int rnd; | ||
1427 | struct list_head *hash, *old_hash; | ||
1428 | struct nf_conntrack_tuple_hash *h; | ||
1429 | |||
1430 | /* On boot, we can set this without any fancy locking. */ | ||
1431 | if (!nf_conntrack_htable_size) | ||
1432 | return param_set_uint(val, kp); | ||
1433 | |||
1434 | hashsize = simple_strtol(val, NULL, 0); | ||
1435 | if (!hashsize) | ||
1436 | return -EINVAL; | ||
1437 | |||
1438 | hash = alloc_hashtable(hashsize, &vmalloced); | ||
1439 | if (!hash) | ||
1440 | return -ENOMEM; | ||
1441 | |||
1442 | /* We have to rehahs for the new table anyway, so we also can | ||
1443 | * use a newrandom seed */ | ||
1444 | get_random_bytes(&rnd, 4); | ||
1445 | |||
1446 | write_lock_bh(&nf_conntrack_lock); | ||
1447 | for (i = 0; i < nf_conntrack_htable_size; i++) { | ||
1448 | while (!list_empty(&nf_conntrack_hash[i])) { | ||
1449 | h = list_entry(nf_conntrack_hash[i].next, | ||
1450 | struct nf_conntrack_tuple_hash, list); | ||
1451 | list_del(&h->list); | ||
1452 | bucket = __hash_conntrack(&h->tuple, hashsize, rnd); | ||
1453 | list_add_tail(&h->list, &hash[bucket]); | ||
1454 | } | ||
1455 | } | ||
1456 | old_size = nf_conntrack_htable_size; | ||
1457 | old_vmalloced = nf_conntrack_vmalloc; | ||
1458 | old_hash = nf_conntrack_hash; | ||
1459 | |||
1460 | nf_conntrack_htable_size = hashsize; | ||
1461 | nf_conntrack_vmalloc = vmalloced; | ||
1462 | nf_conntrack_hash = hash; | ||
1463 | nf_conntrack_hash_rnd = rnd; | ||
1464 | write_unlock_bh(&nf_conntrack_lock); | ||
1465 | |||
1466 | free_conntrack_hash(old_hash, old_vmalloced, old_size); | ||
1467 | return 0; | ||
1468 | } | ||
1469 | |||
1470 | module_param_call(hashsize, set_hashsize, param_get_uint, | ||
1471 | &nf_conntrack_htable_size, 0600); | ||
1472 | |||
1473 | int __init nf_conntrack_init(void) | ||
1474 | { | ||
1475 | unsigned int i; | ||
1476 | int ret; | ||
1477 | |||
1478 | /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB | ||
1479 | * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ | ||
1480 | if (!nf_conntrack_htable_size) { | ||
1481 | nf_conntrack_htable_size | ||
1482 | = (((num_physpages << PAGE_SHIFT) / 16384) | ||
1483 | / sizeof(struct list_head)); | ||
1484 | if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) | ||
1485 | nf_conntrack_htable_size = 8192; | ||
1486 | if (nf_conntrack_htable_size < 16) | ||
1487 | nf_conntrack_htable_size = 16; | ||
1488 | } | ||
1489 | nf_conntrack_max = 8 * nf_conntrack_htable_size; | ||
1490 | |||
1491 | printk("nf_conntrack version %s (%u buckets, %d max)\n", | ||
1492 | NF_CONNTRACK_VERSION, nf_conntrack_htable_size, | ||
1493 | nf_conntrack_max); | ||
1494 | |||
1495 | nf_conntrack_hash = alloc_hashtable(nf_conntrack_htable_size, | ||
1496 | &nf_conntrack_vmalloc); | ||
1497 | if (!nf_conntrack_hash) { | ||
1498 | printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); | ||
1499 | goto err_out; | ||
1500 | } | ||
1501 | |||
1502 | ret = nf_conntrack_register_cache(NF_CT_F_BASIC, "nf_conntrack:basic", | ||
1503 | sizeof(struct nf_conn), NULL); | ||
1504 | if (ret < 0) { | ||
1505 | printk(KERN_ERR "Unable to create nf_conn slab cache\n"); | ||
1506 | goto err_free_hash; | ||
1507 | } | ||
1508 | |||
1509 | nf_conntrack_expect_cachep = kmem_cache_create("nf_conntrack_expect", | ||
1510 | sizeof(struct nf_conntrack_expect), | ||
1511 | 0, 0, NULL, NULL); | ||
1512 | if (!nf_conntrack_expect_cachep) { | ||
1513 | printk(KERN_ERR "Unable to create nf_expect slab cache\n"); | ||
1514 | goto err_free_conntrack_slab; | ||
1515 | } | ||
1516 | |||
1517 | /* Don't NEED lock here, but good form anyway. */ | ||
1518 | write_lock_bh(&nf_conntrack_lock); | ||
1519 | for (i = 0; i < PF_MAX; i++) | ||
1520 | nf_ct_l3protos[i] = &nf_conntrack_generic_l3proto; | ||
1521 | write_unlock_bh(&nf_conntrack_lock); | ||
1522 | |||
1523 | /* Set up fake conntrack: | ||
1524 | - to never be deleted, not in any hashes */ | ||
1525 | atomic_set(&nf_conntrack_untracked.ct_general.use, 1); | ||
1526 | /* - and look it like as a confirmed connection */ | ||
1527 | set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); | ||
1528 | |||
1529 | return ret; | ||
1530 | |||
1531 | err_free_conntrack_slab: | ||
1532 | nf_conntrack_unregister_cache(NF_CT_F_BASIC); | ||
1533 | err_free_hash: | ||
1534 | free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc, | ||
1535 | nf_conntrack_htable_size); | ||
1536 | err_out: | ||
1537 | return -ENOMEM; | ||
1538 | } | ||
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c new file mode 100644 index 000000000000..65080e269f27 --- /dev/null +++ b/net/netfilter/nf_conntrack_ftp.c | |||
@@ -0,0 +1,698 @@ | |||
1 | /* FTP extension for connection tracking. */ | ||
2 | |||
3 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
4 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
5 | * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
12 | * - enable working with Layer 3 protocol independent connection tracking. | ||
13 | * - track EPRT and EPSV commands with IPv6 address. | ||
14 | * | ||
15 | * Derived from net/ipv4/netfilter/ip_conntrack_ftp.c | ||
16 | */ | ||
17 | |||
18 | #include <linux/config.h> | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/moduleparam.h> | ||
21 | #include <linux/netfilter.h> | ||
22 | #include <linux/ip.h> | ||
23 | #include <linux/ipv6.h> | ||
24 | #include <linux/ctype.h> | ||
25 | #include <net/checksum.h> | ||
26 | #include <net/tcp.h> | ||
27 | |||
28 | #include <net/netfilter/nf_conntrack.h> | ||
29 | #include <net/netfilter/nf_conntrack_helper.h> | ||
30 | #include <linux/netfilter/nf_conntrack_ftp.h> | ||
31 | |||
32 | MODULE_LICENSE("GPL"); | ||
33 | MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>"); | ||
34 | MODULE_DESCRIPTION("ftp connection tracking helper"); | ||
35 | |||
36 | /* This is slow, but it's simple. --RR */ | ||
37 | static char *ftp_buffer; | ||
38 | |||
39 | static DEFINE_SPINLOCK(nf_ftp_lock); | ||
40 | |||
41 | #define MAX_PORTS 8 | ||
42 | static u_int16_t ports[MAX_PORTS]; | ||
43 | static unsigned int ports_c; | ||
44 | module_param_array(ports, ushort, &ports_c, 0400); | ||
45 | |||
46 | static int loose; | ||
47 | module_param(loose, int, 0600); | ||
48 | |||
49 | unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb, | ||
50 | enum ip_conntrack_info ctinfo, | ||
51 | enum ip_ct_ftp_type type, | ||
52 | unsigned int matchoff, | ||
53 | unsigned int matchlen, | ||
54 | struct nf_conntrack_expect *exp, | ||
55 | u32 *seq); | ||
56 | EXPORT_SYMBOL_GPL(nf_nat_ftp_hook); | ||
57 | |||
58 | #if 0 | ||
59 | #define DEBUGP printk | ||
60 | #else | ||
61 | #define DEBUGP(format, args...) | ||
62 | #endif | ||
63 | |||
64 | static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, char); | ||
65 | static int try_eprt(const char *, size_t, struct nf_conntrack_man *, char); | ||
66 | static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *, | ||
67 | char); | ||
68 | |||
69 | static struct ftp_search { | ||
70 | enum ip_conntrack_dir dir; | ||
71 | const char *pattern; | ||
72 | size_t plen; | ||
73 | char skip; | ||
74 | char term; | ||
75 | enum ip_ct_ftp_type ftptype; | ||
76 | int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char); | ||
77 | } search[] = { | ||
78 | { | ||
79 | IP_CT_DIR_ORIGINAL, | ||
80 | "PORT", sizeof("PORT") - 1, ' ', '\r', | ||
81 | IP_CT_FTP_PORT, | ||
82 | try_rfc959, | ||
83 | }, | ||
84 | { | ||
85 | IP_CT_DIR_REPLY, | ||
86 | "227 ", sizeof("227 ") - 1, '(', ')', | ||
87 | IP_CT_FTP_PASV, | ||
88 | try_rfc959, | ||
89 | }, | ||
90 | { | ||
91 | IP_CT_DIR_ORIGINAL, | ||
92 | "EPRT", sizeof("EPRT") - 1, ' ', '\r', | ||
93 | IP_CT_FTP_EPRT, | ||
94 | try_eprt, | ||
95 | }, | ||
96 | { | ||
97 | IP_CT_DIR_REPLY, | ||
98 | "229 ", sizeof("229 ") - 1, '(', ')', | ||
99 | IP_CT_FTP_EPSV, | ||
100 | try_epsv_response, | ||
101 | }, | ||
102 | }; | ||
103 | |||
104 | /* This code is based on inet_pton() in glibc-2.2.4 */ | ||
105 | static int | ||
106 | get_ipv6_addr(const char *src, size_t dlen, struct in6_addr *dst, u_int8_t term) | ||
107 | { | ||
108 | static const char xdigits[] = "0123456789abcdef"; | ||
109 | u_int8_t tmp[16], *tp, *endp, *colonp; | ||
110 | int ch, saw_xdigit; | ||
111 | u_int32_t val; | ||
112 | size_t clen = 0; | ||
113 | |||
114 | tp = memset(tmp, '\0', sizeof(tmp)); | ||
115 | endp = tp + sizeof(tmp); | ||
116 | colonp = NULL; | ||
117 | |||
118 | /* Leading :: requires some special handling. */ | ||
119 | if (*src == ':'){ | ||
120 | if (*++src != ':') { | ||
121 | DEBUGP("invalid \":\" at the head of addr\n"); | ||
122 | return 0; | ||
123 | } | ||
124 | clen++; | ||
125 | } | ||
126 | |||
127 | saw_xdigit = 0; | ||
128 | val = 0; | ||
129 | while ((clen < dlen) && (*src != term)) { | ||
130 | const char *pch; | ||
131 | |||
132 | ch = tolower(*src++); | ||
133 | clen++; | ||
134 | |||
135 | pch = strchr(xdigits, ch); | ||
136 | if (pch != NULL) { | ||
137 | val <<= 4; | ||
138 | val |= (pch - xdigits); | ||
139 | if (val > 0xffff) | ||
140 | return 0; | ||
141 | |||
142 | saw_xdigit = 1; | ||
143 | continue; | ||
144 | } | ||
145 | if (ch != ':') { | ||
146 | DEBUGP("get_ipv6_addr: invalid char. \'%c\'\n", ch); | ||
147 | return 0; | ||
148 | } | ||
149 | |||
150 | if (!saw_xdigit) { | ||
151 | if (colonp) { | ||
152 | DEBUGP("invalid location of \"::\".\n"); | ||
153 | return 0; | ||
154 | } | ||
155 | colonp = tp; | ||
156 | continue; | ||
157 | } else if (*src == term) { | ||
158 | DEBUGP("trancated IPv6 addr\n"); | ||
159 | return 0; | ||
160 | } | ||
161 | |||
162 | if (tp + 2 > endp) | ||
163 | return 0; | ||
164 | *tp++ = (u_int8_t) (val >> 8) & 0xff; | ||
165 | *tp++ = (u_int8_t) val & 0xff; | ||
166 | |||
167 | saw_xdigit = 0; | ||
168 | val = 0; | ||
169 | continue; | ||
170 | } | ||
171 | if (saw_xdigit) { | ||
172 | if (tp + 2 > endp) | ||
173 | return 0; | ||
174 | *tp++ = (u_int8_t) (val >> 8) & 0xff; | ||
175 | *tp++ = (u_int8_t) val & 0xff; | ||
176 | } | ||
177 | if (colonp != NULL) { | ||
178 | /* | ||
179 | * Since some memmove()'s erroneously fail to handle | ||
180 | * overlapping regions, we'll do the shift by hand. | ||
181 | */ | ||
182 | const int n = tp - colonp; | ||
183 | int i; | ||
184 | |||
185 | if (tp == endp) | ||
186 | return 0; | ||
187 | |||
188 | for (i = 1; i <= n; i++) { | ||
189 | endp[- i] = colonp[n - i]; | ||
190 | colonp[n - i] = 0; | ||
191 | } | ||
192 | tp = endp; | ||
193 | } | ||
194 | if (tp != endp || (*src != term)) | ||
195 | return 0; | ||
196 | |||
197 | memcpy(dst->s6_addr, tmp, sizeof(dst->s6_addr)); | ||
198 | return clen; | ||
199 | } | ||
200 | |||
201 | static int try_number(const char *data, size_t dlen, u_int32_t array[], | ||
202 | int array_size, char sep, char term) | ||
203 | { | ||
204 | u_int32_t i, len; | ||
205 | |||
206 | memset(array, 0, sizeof(array[0])*array_size); | ||
207 | |||
208 | /* Keep data pointing at next char. */ | ||
209 | for (i = 0, len = 0; len < dlen && i < array_size; len++, data++) { | ||
210 | if (*data >= '0' && *data <= '9') { | ||
211 | array[i] = array[i]*10 + *data - '0'; | ||
212 | } | ||
213 | else if (*data == sep) | ||
214 | i++; | ||
215 | else { | ||
216 | /* Unexpected character; true if it's the | ||
217 | terminator and we're finished. */ | ||
218 | if (*data == term && i == array_size - 1) | ||
219 | return len; | ||
220 | |||
221 | DEBUGP("Char %u (got %u nums) `%u' unexpected\n", | ||
222 | len, i, *data); | ||
223 | return 0; | ||
224 | } | ||
225 | } | ||
226 | DEBUGP("Failed to fill %u numbers separated by %c\n", array_size, sep); | ||
227 | |||
228 | return 0; | ||
229 | } | ||
230 | |||
231 | /* Returns 0, or length of numbers: 192,168,1,1,5,6 */ | ||
232 | static int try_rfc959(const char *data, size_t dlen, | ||
233 | struct nf_conntrack_man *cmd, char term) | ||
234 | { | ||
235 | int length; | ||
236 | u_int32_t array[6]; | ||
237 | |||
238 | length = try_number(data, dlen, array, 6, ',', term); | ||
239 | if (length == 0) | ||
240 | return 0; | ||
241 | |||
242 | cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16) | | ||
243 | (array[2] << 8) | array[3]); | ||
244 | cmd->u.tcp.port = htons((array[4] << 8) | array[5]); | ||
245 | return length; | ||
246 | } | ||
247 | |||
248 | /* Grab port: number up to delimiter */ | ||
249 | static int get_port(const char *data, int start, size_t dlen, char delim, | ||
250 | u_int16_t *port) | ||
251 | { | ||
252 | u_int16_t tmp_port = 0; | ||
253 | int i; | ||
254 | |||
255 | for (i = start; i < dlen; i++) { | ||
256 | /* Finished? */ | ||
257 | if (data[i] == delim) { | ||
258 | if (tmp_port == 0) | ||
259 | break; | ||
260 | *port = htons(tmp_port); | ||
261 | DEBUGP("get_port: return %d\n", tmp_port); | ||
262 | return i + 1; | ||
263 | } | ||
264 | else if (data[i] >= '0' && data[i] <= '9') | ||
265 | tmp_port = tmp_port*10 + data[i] - '0'; | ||
266 | else { /* Some other crap */ | ||
267 | DEBUGP("get_port: invalid char.\n"); | ||
268 | break; | ||
269 | } | ||
270 | } | ||
271 | return 0; | ||
272 | } | ||
273 | |||
274 | /* Returns 0, or length of numbers: |1|132.235.1.2|6275| or |2|3ffe::1|6275| */ | ||
275 | static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd, | ||
276 | char term) | ||
277 | { | ||
278 | char delim; | ||
279 | int length; | ||
280 | |||
281 | /* First character is delimiter, then "1" for IPv4 or "2" for IPv6, | ||
282 | then delimiter again. */ | ||
283 | if (dlen <= 3) { | ||
284 | DEBUGP("EPRT: too short\n"); | ||
285 | return 0; | ||
286 | } | ||
287 | delim = data[0]; | ||
288 | if (isdigit(delim) || delim < 33 || delim > 126 || data[2] != delim) { | ||
289 | DEBUGP("try_eprt: invalid delimitter.\n"); | ||
290 | return 0; | ||
291 | } | ||
292 | |||
293 | if ((cmd->l3num == PF_INET && data[1] != '1') || | ||
294 | (cmd->l3num == PF_INET6 && data[1] != '2')) { | ||
295 | DEBUGP("EPRT: invalid protocol number.\n"); | ||
296 | return 0; | ||
297 | } | ||
298 | |||
299 | DEBUGP("EPRT: Got %c%c%c\n", delim, data[1], delim); | ||
300 | |||
301 | if (data[1] == '1') { | ||
302 | u_int32_t array[4]; | ||
303 | |||
304 | /* Now we have IP address. */ | ||
305 | length = try_number(data + 3, dlen - 3, array, 4, '.', delim); | ||
306 | if (length != 0) | ||
307 | cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16) | ||
308 | | (array[2] << 8) | array[3]); | ||
309 | } else { | ||
310 | /* Now we have IPv6 address. */ | ||
311 | length = get_ipv6_addr(data + 3, dlen - 3, | ||
312 | (struct in6_addr *)cmd->u3.ip6, delim); | ||
313 | } | ||
314 | |||
315 | if (length == 0) | ||
316 | return 0; | ||
317 | DEBUGP("EPRT: Got IP address!\n"); | ||
318 | /* Start offset includes initial "|1|", and trailing delimiter */ | ||
319 | return get_port(data, 3 + length + 1, dlen, delim, &cmd->u.tcp.port); | ||
320 | } | ||
321 | |||
322 | /* Returns 0, or length of numbers: |||6446| */ | ||
323 | static int try_epsv_response(const char *data, size_t dlen, | ||
324 | struct nf_conntrack_man *cmd, char term) | ||
325 | { | ||
326 | char delim; | ||
327 | |||
328 | /* Three delimiters. */ | ||
329 | if (dlen <= 3) return 0; | ||
330 | delim = data[0]; | ||
331 | if (isdigit(delim) || delim < 33 || delim > 126 | ||
332 | || data[1] != delim || data[2] != delim) | ||
333 | return 0; | ||
334 | |||
335 | return get_port(data, 3, dlen, delim, &cmd->u.tcp.port); | ||
336 | } | ||
337 | |||
338 | /* Return 1 for match, 0 for accept, -1 for partial. */ | ||
339 | static int find_pattern(const char *data, size_t dlen, | ||
340 | const char *pattern, size_t plen, | ||
341 | char skip, char term, | ||
342 | unsigned int *numoff, | ||
343 | unsigned int *numlen, | ||
344 | struct nf_conntrack_man *cmd, | ||
345 | int (*getnum)(const char *, size_t, | ||
346 | struct nf_conntrack_man *, char)) | ||
347 | { | ||
348 | size_t i; | ||
349 | |||
350 | DEBUGP("find_pattern `%s': dlen = %u\n", pattern, dlen); | ||
351 | if (dlen == 0) | ||
352 | return 0; | ||
353 | |||
354 | if (dlen <= plen) { | ||
355 | /* Short packet: try for partial? */ | ||
356 | if (strnicmp(data, pattern, dlen) == 0) | ||
357 | return -1; | ||
358 | else return 0; | ||
359 | } | ||
360 | |||
361 | if (strnicmp(data, pattern, plen) != 0) { | ||
362 | #if 0 | ||
363 | size_t i; | ||
364 | |||
365 | DEBUGP("ftp: string mismatch\n"); | ||
366 | for (i = 0; i < plen; i++) { | ||
367 | DEBUGP("ftp:char %u `%c'(%u) vs `%c'(%u)\n", | ||
368 | i, data[i], data[i], | ||
369 | pattern[i], pattern[i]); | ||
370 | } | ||
371 | #endif | ||
372 | return 0; | ||
373 | } | ||
374 | |||
375 | DEBUGP("Pattern matches!\n"); | ||
376 | /* Now we've found the constant string, try to skip | ||
377 | to the 'skip' character */ | ||
378 | for (i = plen; data[i] != skip; i++) | ||
379 | if (i == dlen - 1) return -1; | ||
380 | |||
381 | /* Skip over the last character */ | ||
382 | i++; | ||
383 | |||
384 | DEBUGP("Skipped up to `%c'!\n", skip); | ||
385 | |||
386 | *numoff = i; | ||
387 | *numlen = getnum(data + i, dlen - i, cmd, term); | ||
388 | if (!*numlen) | ||
389 | return -1; | ||
390 | |||
391 | DEBUGP("Match succeeded!\n"); | ||
392 | return 1; | ||
393 | } | ||
394 | |||
395 | /* Look up to see if we're just after a \n. */ | ||
396 | static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir) | ||
397 | { | ||
398 | unsigned int i; | ||
399 | |||
400 | for (i = 0; i < info->seq_aft_nl_num[dir]; i++) | ||
401 | if (info->seq_aft_nl[dir][i] == seq) | ||
402 | return 1; | ||
403 | return 0; | ||
404 | } | ||
405 | |||
406 | /* We don't update if it's older than what we have. */ | ||
407 | static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir, | ||
408 | struct sk_buff *skb) | ||
409 | { | ||
410 | unsigned int i, oldest = NUM_SEQ_TO_REMEMBER; | ||
411 | |||
412 | /* Look for oldest: if we find exact match, we're done. */ | ||
413 | for (i = 0; i < info->seq_aft_nl_num[dir]; i++) { | ||
414 | if (info->seq_aft_nl[dir][i] == nl_seq) | ||
415 | return; | ||
416 | |||
417 | if (oldest == info->seq_aft_nl_num[dir] | ||
418 | || before(info->seq_aft_nl[dir][i], oldest)) | ||
419 | oldest = i; | ||
420 | } | ||
421 | |||
422 | if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) { | ||
423 | info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq; | ||
424 | nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); | ||
425 | } else if (oldest != NUM_SEQ_TO_REMEMBER) { | ||
426 | info->seq_aft_nl[dir][oldest] = nl_seq; | ||
427 | nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); | ||
428 | } | ||
429 | } | ||
430 | |||
431 | static int help(struct sk_buff **pskb, | ||
432 | unsigned int protoff, | ||
433 | struct nf_conn *ct, | ||
434 | enum ip_conntrack_info ctinfo) | ||
435 | { | ||
436 | unsigned int dataoff, datalen; | ||
437 | struct tcphdr _tcph, *th; | ||
438 | char *fb_ptr; | ||
439 | int ret; | ||
440 | u32 seq; | ||
441 | int dir = CTINFO2DIR(ctinfo); | ||
442 | unsigned int matchlen, matchoff; | ||
443 | struct ip_ct_ftp_master *ct_ftp_info = &ct->help->ct_ftp_info; | ||
444 | struct nf_conntrack_expect *exp; | ||
445 | struct nf_conntrack_man cmd = {}; | ||
446 | |||
447 | unsigned int i; | ||
448 | int found = 0, ends_in_nl; | ||
449 | |||
450 | /* Until there's been traffic both ways, don't look in packets. */ | ||
451 | if (ctinfo != IP_CT_ESTABLISHED | ||
452 | && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { | ||
453 | DEBUGP("ftp: Conntrackinfo = %u\n", ctinfo); | ||
454 | return NF_ACCEPT; | ||
455 | } | ||
456 | |||
457 | th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); | ||
458 | if (th == NULL) | ||
459 | return NF_ACCEPT; | ||
460 | |||
461 | dataoff = protoff + th->doff * 4; | ||
462 | /* No data? */ | ||
463 | if (dataoff >= (*pskb)->len) { | ||
464 | DEBUGP("ftp: dataoff(%u) >= skblen(%u)\n", dataoff, | ||
465 | (*pskb)->len); | ||
466 | return NF_ACCEPT; | ||
467 | } | ||
468 | datalen = (*pskb)->len - dataoff; | ||
469 | |||
470 | spin_lock_bh(&nf_ftp_lock); | ||
471 | fb_ptr = skb_header_pointer(*pskb, dataoff, datalen, ftp_buffer); | ||
472 | BUG_ON(fb_ptr == NULL); | ||
473 | |||
474 | ends_in_nl = (fb_ptr[datalen - 1] == '\n'); | ||
475 | seq = ntohl(th->seq) + datalen; | ||
476 | |||
477 | /* Look up to see if we're just after a \n. */ | ||
478 | if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) { | ||
479 | /* Now if this ends in \n, update ftp info. */ | ||
480 | DEBUGP("nf_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n", | ||
481 | ct_ftp_info->seq_aft_nl_num[dir] > 0 ? "" : "(UNSET)", | ||
482 | ct_ftp_info->seq_aft_nl[dir][0], | ||
483 | ct_ftp_info->seq_aft_nl_num[dir] > 1 ? "" : "(UNSET)", | ||
484 | ct_ftp_info->seq_aft_nl[dir][1]); | ||
485 | ret = NF_ACCEPT; | ||
486 | goto out_update_nl; | ||
487 | } | ||
488 | |||
489 | /* Initialize IP/IPv6 addr to expected address (it's not mentioned | ||
490 | in EPSV responses) */ | ||
491 | cmd.l3num = ct->tuplehash[dir].tuple.src.l3num; | ||
492 | memcpy(cmd.u3.all, &ct->tuplehash[dir].tuple.src.u3.all, | ||
493 | sizeof(cmd.u3.all)); | ||
494 | |||
495 | for (i = 0; i < ARRAY_SIZE(search); i++) { | ||
496 | if (search[i].dir != dir) continue; | ||
497 | |||
498 | found = find_pattern(fb_ptr, datalen, | ||
499 | search[i].pattern, | ||
500 | search[i].plen, | ||
501 | search[i].skip, | ||
502 | search[i].term, | ||
503 | &matchoff, &matchlen, | ||
504 | &cmd, | ||
505 | search[i].getnum); | ||
506 | if (found) break; | ||
507 | } | ||
508 | if (found == -1) { | ||
509 | /* We don't usually drop packets. After all, this is | ||
510 | connection tracking, not packet filtering. | ||
511 | However, it is necessary for accurate tracking in | ||
512 | this case. */ | ||
513 | if (net_ratelimit()) | ||
514 | printk("conntrack_ftp: partial %s %u+%u\n", | ||
515 | search[i].pattern, | ||
516 | ntohl(th->seq), datalen); | ||
517 | ret = NF_DROP; | ||
518 | goto out; | ||
519 | } else if (found == 0) { /* No match */ | ||
520 | ret = NF_ACCEPT; | ||
521 | goto out_update_nl; | ||
522 | } | ||
523 | |||
524 | DEBUGP("conntrack_ftp: match `%.*s' (%u bytes at %u)\n", | ||
525 | (int)matchlen, fb_ptr + matchoff, | ||
526 | matchlen, ntohl(th->seq) + matchoff); | ||
527 | |||
528 | exp = nf_conntrack_expect_alloc(ct); | ||
529 | if (exp == NULL) { | ||
530 | ret = NF_DROP; | ||
531 | goto out; | ||
532 | } | ||
533 | |||
534 | /* We refer to the reverse direction ("!dir") tuples here, | ||
535 | * because we're expecting something in the other direction. | ||
536 | * Doesn't matter unless NAT is happening. */ | ||
537 | exp->tuple.dst.u3 = ct->tuplehash[!dir].tuple.dst.u3; | ||
538 | |||
539 | /* Update the ftp info */ | ||
540 | if ((cmd.l3num == ct->tuplehash[dir].tuple.src.l3num) && | ||
541 | memcmp(&cmd.u3.all, &ct->tuplehash[dir].tuple.src.u3.all, | ||
542 | sizeof(cmd.u3.all))) { | ||
543 | /* Enrico Scholz's passive FTP to partially RNAT'd ftp | ||
544 | server: it really wants us to connect to a | ||
545 | different IP address. Simply don't record it for | ||
546 | NAT. */ | ||
547 | if (cmd.l3num == PF_INET) { | ||
548 | DEBUGP("conntrack_ftp: NOT RECORDING: %u,%u,%u,%u != %u.%u.%u.%u\n", | ||
549 | NIPQUAD(cmd.u3.ip), | ||
550 | NIPQUAD(ct->tuplehash[dir].tuple.src.u3.ip)); | ||
551 | } else { | ||
552 | DEBUGP("conntrack_ftp: NOT RECORDING: %x:%x:%x:%x:%x:%x:%x:%x != %x:%x:%x:%x:%x:%x:%x:%x\n", | ||
553 | NIP6(*((struct in6_addr *)cmd.u3.ip6)), | ||
554 | NIP6(*((struct in6_addr *)ct->tuplehash[dir] | ||
555 | .tuple.src.u3.ip6))); | ||
556 | } | ||
557 | |||
558 | /* Thanks to Cristiano Lincoln Mattos | ||
559 | <lincoln@cesar.org.br> for reporting this potential | ||
560 | problem (DMZ machines opening holes to internal | ||
561 | networks, or the packet filter itself). */ | ||
562 | if (!loose) { | ||
563 | ret = NF_ACCEPT; | ||
564 | goto out_put_expect; | ||
565 | } | ||
566 | memcpy(&exp->tuple.dst.u3, &cmd.u3.all, | ||
567 | sizeof(exp->tuple.dst.u3)); | ||
568 | } | ||
569 | |||
570 | exp->tuple.src.u3 = ct->tuplehash[!dir].tuple.src.u3; | ||
571 | exp->tuple.src.l3num = cmd.l3num; | ||
572 | exp->tuple.src.u.tcp.port = 0; | ||
573 | exp->tuple.dst.u.tcp.port = cmd.u.tcp.port; | ||
574 | exp->tuple.dst.protonum = IPPROTO_TCP; | ||
575 | |||
576 | exp->mask = (struct nf_conntrack_tuple) | ||
577 | { .src = { .l3num = 0xFFFF, | ||
578 | .u = { .tcp = { 0 }}, | ||
579 | }, | ||
580 | .dst = { .protonum = 0xFF, | ||
581 | .u = { .tcp = { 0xFFFF }}, | ||
582 | }, | ||
583 | }; | ||
584 | if (cmd.l3num == PF_INET) { | ||
585 | exp->mask.src.u3.ip = 0xFFFFFFFF; | ||
586 | exp->mask.dst.u3.ip = 0xFFFFFFFF; | ||
587 | } else { | ||
588 | memset(exp->mask.src.u3.ip6, 0xFF, | ||
589 | sizeof(exp->mask.src.u3.ip6)); | ||
590 | memset(exp->mask.dst.u3.ip6, 0xFF, | ||
591 | sizeof(exp->mask.src.u3.ip6)); | ||
592 | } | ||
593 | |||
594 | exp->expectfn = NULL; | ||
595 | exp->flags = 0; | ||
596 | |||
597 | /* Now, NAT might want to mangle the packet, and register the | ||
598 | * (possibly changed) expectation itself. */ | ||
599 | if (nf_nat_ftp_hook) | ||
600 | ret = nf_nat_ftp_hook(pskb, ctinfo, search[i].ftptype, | ||
601 | matchoff, matchlen, exp, &seq); | ||
602 | else { | ||
603 | /* Can't expect this? Best to drop packet now. */ | ||
604 | if (nf_conntrack_expect_related(exp) != 0) | ||
605 | ret = NF_DROP; | ||
606 | else | ||
607 | ret = NF_ACCEPT; | ||
608 | } | ||
609 | |||
610 | out_put_expect: | ||
611 | nf_conntrack_expect_put(exp); | ||
612 | |||
613 | out_update_nl: | ||
614 | /* Now if this ends in \n, update ftp info. Seq may have been | ||
615 | * adjusted by NAT code. */ | ||
616 | if (ends_in_nl) | ||
617 | update_nl_seq(seq, ct_ftp_info, dir, *pskb); | ||
618 | out: | ||
619 | spin_unlock_bh(&nf_ftp_lock); | ||
620 | return ret; | ||
621 | } | ||
622 | |||
623 | static struct nf_conntrack_helper ftp[MAX_PORTS][2]; | ||
624 | static char ftp_names[MAX_PORTS][2][sizeof("ftp-65535")]; | ||
625 | |||
626 | /* don't make this __exit, since it's called from __init ! */ | ||
627 | static void fini(void) | ||
628 | { | ||
629 | int i, j; | ||
630 | for (i = 0; i < ports_c; i++) { | ||
631 | for (j = 0; j < 2; j++) { | ||
632 | if (ftp[i][j].me == NULL) | ||
633 | continue; | ||
634 | |||
635 | DEBUGP("nf_ct_ftp: unregistering helper for pf: %d " | ||
636 | "port: %d\n", | ||
637 | ftp[i][j].tuple.src.l3num, ports[i]); | ||
638 | nf_conntrack_helper_unregister(&ftp[i][j]); | ||
639 | } | ||
640 | } | ||
641 | |||
642 | kfree(ftp_buffer); | ||
643 | } | ||
644 | |||
645 | static int __init init(void) | ||
646 | { | ||
647 | int i, j = -1, ret = 0; | ||
648 | char *tmpname; | ||
649 | |||
650 | ftp_buffer = kmalloc(65536, GFP_KERNEL); | ||
651 | if (!ftp_buffer) | ||
652 | return -ENOMEM; | ||
653 | |||
654 | if (ports_c == 0) | ||
655 | ports[ports_c++] = FTP_PORT; | ||
656 | |||
657 | /* FIXME should be configurable whether IPv4 and IPv6 FTP connections | ||
658 | are tracked or not - YK */ | ||
659 | for (i = 0; i < ports_c; i++) { | ||
660 | memset(&ftp[i], 0, sizeof(struct nf_conntrack_helper)); | ||
661 | |||
662 | ftp[i][0].tuple.src.l3num = PF_INET; | ||
663 | ftp[i][1].tuple.src.l3num = PF_INET6; | ||
664 | for (j = 0; j < 2; j++) { | ||
665 | ftp[i][j].tuple.src.u.tcp.port = htons(ports[i]); | ||
666 | ftp[i][j].tuple.dst.protonum = IPPROTO_TCP; | ||
667 | ftp[i][j].mask.src.u.tcp.port = 0xFFFF; | ||
668 | ftp[i][j].mask.dst.protonum = 0xFF; | ||
669 | ftp[i][j].max_expected = 1; | ||
670 | ftp[i][j].timeout = 5 * 60; /* 5 Minutes */ | ||
671 | ftp[i][j].me = THIS_MODULE; | ||
672 | ftp[i][j].help = help; | ||
673 | tmpname = &ftp_names[i][j][0]; | ||
674 | if (ports[i] == FTP_PORT) | ||
675 | sprintf(tmpname, "ftp"); | ||
676 | else | ||
677 | sprintf(tmpname, "ftp-%d", ports[i]); | ||
678 | ftp[i][j].name = tmpname; | ||
679 | |||
680 | DEBUGP("nf_ct_ftp: registering helper for pf: %d " | ||
681 | "port: %d\n", | ||
682 | ftp[i][j].tuple.src.l3num, ports[i]); | ||
683 | ret = nf_conntrack_helper_register(&ftp[i][j]); | ||
684 | if (ret) { | ||
685 | printk("nf_ct_ftp: failed to register helper " | ||
686 | " for pf: %d port: %d\n", | ||
687 | ftp[i][j].tuple.src.l3num, ports[i]); | ||
688 | fini(); | ||
689 | return ret; | ||
690 | } | ||
691 | } | ||
692 | } | ||
693 | |||
694 | return 0; | ||
695 | } | ||
696 | |||
697 | module_init(init); | ||
698 | module_exit(fini); | ||
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c new file mode 100644 index 000000000000..7de4f06c63c5 --- /dev/null +++ b/net/netfilter/nf_conntrack_l3proto_generic.c | |||
@@ -0,0 +1,98 @@ | |||
1 | /* | ||
2 | * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> | ||
3 | * | ||
4 | * Based largely upon the original ip_conntrack code which | ||
5 | * had the following copyright information: | ||
6 | * | ||
7 | * (C) 1999-2001 Paul `Rusty' Russell | ||
8 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License version 2 as | ||
12 | * published by the Free Software Foundation. | ||
13 | * | ||
14 | * Author: | ||
15 | * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
16 | */ | ||
17 | |||
18 | #include <linux/config.h> | ||
19 | #include <linux/types.h> | ||
20 | #include <linux/ip.h> | ||
21 | #include <linux/netfilter.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/skbuff.h> | ||
24 | #include <linux/icmp.h> | ||
25 | #include <linux/sysctl.h> | ||
26 | #include <net/ip.h> | ||
27 | |||
28 | #include <linux/netfilter_ipv4.h> | ||
29 | #include <net/netfilter/nf_conntrack.h> | ||
30 | #include <net/netfilter/nf_conntrack_protocol.h> | ||
31 | #include <net/netfilter/nf_conntrack_l3proto.h> | ||
32 | #include <net/netfilter/nf_conntrack_core.h> | ||
33 | #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> | ||
34 | |||
35 | #if 0 | ||
36 | #define DEBUGP printk | ||
37 | #else | ||
38 | #define DEBUGP(format, args...) | ||
39 | #endif | ||
40 | |||
41 | DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat); | ||
42 | |||
43 | static int generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, | ||
44 | struct nf_conntrack_tuple *tuple) | ||
45 | { | ||
46 | memset(&tuple->src.u3, 0, sizeof(tuple->src.u3)); | ||
47 | memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3)); | ||
48 | |||
49 | return 1; | ||
50 | } | ||
51 | |||
52 | static int generic_invert_tuple(struct nf_conntrack_tuple *tuple, | ||
53 | const struct nf_conntrack_tuple *orig) | ||
54 | { | ||
55 | memset(&tuple->src.u3, 0, sizeof(tuple->src.u3)); | ||
56 | memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3)); | ||
57 | |||
58 | return 1; | ||
59 | } | ||
60 | |||
61 | static int generic_print_tuple(struct seq_file *s, | ||
62 | const struct nf_conntrack_tuple *tuple) | ||
63 | { | ||
64 | return 0; | ||
65 | } | ||
66 | |||
67 | static int generic_print_conntrack(struct seq_file *s, | ||
68 | const struct nf_conn *conntrack) | ||
69 | { | ||
70 | return 0; | ||
71 | } | ||
72 | |||
73 | static int | ||
74 | generic_prepare(struct sk_buff **pskb, unsigned int hooknum, | ||
75 | unsigned int *dataoff, u_int8_t *protonum) | ||
76 | { | ||
77 | /* Never track !!! */ | ||
78 | return -NF_ACCEPT; | ||
79 | } | ||
80 | |||
81 | |||
82 | static u_int32_t generic_get_features(const struct nf_conntrack_tuple *tuple) | ||
83 | |||
84 | { | ||
85 | return NF_CT_F_BASIC; | ||
86 | } | ||
87 | |||
88 | struct nf_conntrack_l3proto nf_conntrack_generic_l3proto = { | ||
89 | .l3proto = PF_UNSPEC, | ||
90 | .name = "unknown", | ||
91 | .pkt_to_tuple = generic_pkt_to_tuple, | ||
92 | .invert_tuple = generic_invert_tuple, | ||
93 | .print_tuple = generic_print_tuple, | ||
94 | .print_conntrack = generic_print_conntrack, | ||
95 | .prepare = generic_prepare, | ||
96 | .get_features = generic_get_features, | ||
97 | .me = THIS_MODULE, | ||
98 | }; | ||
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c new file mode 100644 index 000000000000..36425f6c833f --- /dev/null +++ b/net/netfilter/nf_conntrack_proto_generic.c | |||
@@ -0,0 +1,85 @@ | |||
1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
9 | * - enable working with L3 protocol independent connection tracking. | ||
10 | * | ||
11 | * Derived from net/ipv4/netfilter/ip_conntrack_proto_generic.c | ||
12 | */ | ||
13 | |||
14 | #include <linux/types.h> | ||
15 | #include <linux/sched.h> | ||
16 | #include <linux/timer.h> | ||
17 | #include <linux/netfilter.h> | ||
18 | #include <net/netfilter/nf_conntrack_protocol.h> | ||
19 | |||
20 | unsigned long nf_ct_generic_timeout = 600*HZ; | ||
21 | |||
22 | static int generic_pkt_to_tuple(const struct sk_buff *skb, | ||
23 | unsigned int dataoff, | ||
24 | struct nf_conntrack_tuple *tuple) | ||
25 | { | ||
26 | tuple->src.u.all = 0; | ||
27 | tuple->dst.u.all = 0; | ||
28 | |||
29 | return 1; | ||
30 | } | ||
31 | |||
32 | static int generic_invert_tuple(struct nf_conntrack_tuple *tuple, | ||
33 | const struct nf_conntrack_tuple *orig) | ||
34 | { | ||
35 | tuple->src.u.all = 0; | ||
36 | tuple->dst.u.all = 0; | ||
37 | |||
38 | return 1; | ||
39 | } | ||
40 | |||
41 | /* Print out the per-protocol part of the tuple. */ | ||
42 | static int generic_print_tuple(struct seq_file *s, | ||
43 | const struct nf_conntrack_tuple *tuple) | ||
44 | { | ||
45 | return 0; | ||
46 | } | ||
47 | |||
48 | /* Print out the private part of the conntrack. */ | ||
49 | static int generic_print_conntrack(struct seq_file *s, | ||
50 | const struct nf_conn *state) | ||
51 | { | ||
52 | return 0; | ||
53 | } | ||
54 | |||
55 | /* Returns verdict for packet, or -1 for invalid. */ | ||
56 | static int packet(struct nf_conn *conntrack, | ||
57 | const struct sk_buff *skb, | ||
58 | unsigned int dataoff, | ||
59 | enum ip_conntrack_info ctinfo, | ||
60 | int pf, | ||
61 | unsigned int hooknum) | ||
62 | { | ||
63 | nf_ct_refresh_acct(conntrack, ctinfo, skb, nf_ct_generic_timeout); | ||
64 | return NF_ACCEPT; | ||
65 | } | ||
66 | |||
67 | /* Called when a new connection for this protocol found. */ | ||
68 | static int new(struct nf_conn *conntrack, const struct sk_buff *skb, | ||
69 | unsigned int dataoff) | ||
70 | { | ||
71 | return 1; | ||
72 | } | ||
73 | |||
74 | struct nf_conntrack_protocol nf_conntrack_generic_protocol = | ||
75 | { | ||
76 | .l3proto = PF_UNSPEC, | ||
77 | .proto = 0, | ||
78 | .name = "unknown", | ||
79 | .pkt_to_tuple = generic_pkt_to_tuple, | ||
80 | .invert_tuple = generic_invert_tuple, | ||
81 | .print_tuple = generic_print_tuple, | ||
82 | .print_conntrack = generic_print_conntrack, | ||
83 | .packet = packet, | ||
84 | .new = new, | ||
85 | }; | ||
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c new file mode 100644 index 000000000000..3a600f77b4e0 --- /dev/null +++ b/net/netfilter/nf_conntrack_proto_sctp.c | |||
@@ -0,0 +1,670 @@ | |||
1 | /* | ||
2 | * Connection tracking protocol helper module for SCTP. | ||
3 | * | ||
4 | * SCTP is defined in RFC 2960. References to various sections in this code | ||
5 | * are to this RFC. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * 17 Oct 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
12 | * - enable working with L3 protocol independent connection tracking. | ||
13 | * | ||
14 | * Derived from net/ipv4/ip_conntrack_sctp.c | ||
15 | */ | ||
16 | |||
17 | /* | ||
18 | * Added support for proc manipulation of timeouts. | ||
19 | */ | ||
20 | |||
21 | #include <linux/types.h> | ||
22 | #include <linux/sched.h> | ||
23 | #include <linux/timer.h> | ||
24 | #include <linux/netfilter.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/in.h> | ||
27 | #include <linux/ip.h> | ||
28 | #include <linux/sctp.h> | ||
29 | #include <linux/string.h> | ||
30 | #include <linux/seq_file.h> | ||
31 | |||
32 | #include <net/netfilter/nf_conntrack.h> | ||
33 | #include <net/netfilter/nf_conntrack_protocol.h> | ||
34 | |||
35 | #if 0 | ||
36 | #define DEBUGP(format, ...) printk(format, ## __VA_ARGS__) | ||
37 | #else | ||
38 | #define DEBUGP(format, args...) | ||
39 | #endif | ||
40 | |||
41 | /* Protects conntrack->proto.sctp */ | ||
42 | static DEFINE_RWLOCK(sctp_lock); | ||
43 | |||
44 | /* FIXME: Examine ipfilter's timeouts and conntrack transitions more | ||
45 | closely. They're more complex. --RR | ||
46 | |||
47 | And so for me for SCTP :D -Kiran */ | ||
48 | |||
49 | static const char *sctp_conntrack_names[] = { | ||
50 | "NONE", | ||
51 | "CLOSED", | ||
52 | "COOKIE_WAIT", | ||
53 | "COOKIE_ECHOED", | ||
54 | "ESTABLISHED", | ||
55 | "SHUTDOWN_SENT", | ||
56 | "SHUTDOWN_RECD", | ||
57 | "SHUTDOWN_ACK_SENT", | ||
58 | }; | ||
59 | |||
60 | #define SECS * HZ | ||
61 | #define MINS * 60 SECS | ||
62 | #define HOURS * 60 MINS | ||
63 | #define DAYS * 24 HOURS | ||
64 | |||
65 | static unsigned long nf_ct_sctp_timeout_closed = 10 SECS; | ||
66 | static unsigned long nf_ct_sctp_timeout_cookie_wait = 3 SECS; | ||
67 | static unsigned long nf_ct_sctp_timeout_cookie_echoed = 3 SECS; | ||
68 | static unsigned long nf_ct_sctp_timeout_established = 5 DAYS; | ||
69 | static unsigned long nf_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; | ||
70 | static unsigned long nf_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; | ||
71 | static unsigned long nf_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; | ||
72 | |||
73 | static unsigned long * sctp_timeouts[] | ||
74 | = { NULL, /* SCTP_CONNTRACK_NONE */ | ||
75 | &nf_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */ | ||
76 | &nf_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */ | ||
77 | &nf_ct_sctp_timeout_cookie_echoed, /* SCTP_CONNTRACK_COOKIE_ECHOED */ | ||
78 | &nf_ct_sctp_timeout_established, /* SCTP_CONNTRACK_ESTABLISHED */ | ||
79 | &nf_ct_sctp_timeout_shutdown_sent, /* SCTP_CONNTRACK_SHUTDOWN_SENT */ | ||
80 | &nf_ct_sctp_timeout_shutdown_recd, /* SCTP_CONNTRACK_SHUTDOWN_RECD */ | ||
81 | &nf_ct_sctp_timeout_shutdown_ack_sent /* SCTP_CONNTRACK_SHUTDOWN_ACK_SENT */ | ||
82 | }; | ||
83 | |||
84 | #define sNO SCTP_CONNTRACK_NONE | ||
85 | #define sCL SCTP_CONNTRACK_CLOSED | ||
86 | #define sCW SCTP_CONNTRACK_COOKIE_WAIT | ||
87 | #define sCE SCTP_CONNTRACK_COOKIE_ECHOED | ||
88 | #define sES SCTP_CONNTRACK_ESTABLISHED | ||
89 | #define sSS SCTP_CONNTRACK_SHUTDOWN_SENT | ||
90 | #define sSR SCTP_CONNTRACK_SHUTDOWN_RECD | ||
91 | #define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT | ||
92 | #define sIV SCTP_CONNTRACK_MAX | ||
93 | |||
94 | /* | ||
95 | These are the descriptions of the states: | ||
96 | |||
97 | NOTE: These state names are tantalizingly similar to the states of an | ||
98 | SCTP endpoint. But the interpretation of the states is a little different, | ||
99 | considering that these are the states of the connection and not of an end | ||
100 | point. Please note the subtleties. -Kiran | ||
101 | |||
102 | NONE - Nothing so far. | ||
103 | COOKIE WAIT - We have seen an INIT chunk in the original direction, or also | ||
104 | an INIT_ACK chunk in the reply direction. | ||
105 | COOKIE ECHOED - We have seen a COOKIE_ECHO chunk in the original direction. | ||
106 | ESTABLISHED - We have seen a COOKIE_ACK in the reply direction. | ||
107 | SHUTDOWN_SENT - We have seen a SHUTDOWN chunk in the original direction. | ||
108 | SHUTDOWN_RECD - We have seen a SHUTDOWN chunk in the reply directoin. | ||
109 | SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite | ||
110 | to that of the SHUTDOWN chunk. | ||
111 | CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of | ||
112 | the SHUTDOWN chunk. Connection is closed. | ||
113 | */ | ||
114 | |||
115 | /* TODO | ||
116 | - I have assumed that the first INIT is in the original direction. | ||
117 | This messes things when an INIT comes in the reply direction in CLOSED | ||
118 | state. | ||
119 | - Check the error type in the reply dir before transitioning from | ||
120 | cookie echoed to closed. | ||
121 | - Sec 5.2.4 of RFC 2960 | ||
122 | - Multi Homing support. | ||
123 | */ | ||
124 | |||
125 | /* SCTP conntrack state transitions */ | ||
126 | static enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = { | ||
127 | { | ||
128 | /* ORIGINAL */ | ||
129 | /* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */ | ||
130 | /* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA}, | ||
131 | /* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA}, | ||
132 | /* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, | ||
133 | /* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA}, | ||
134 | /* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA}, | ||
135 | /* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/ | ||
136 | /* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */ | ||
137 | /* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */ | ||
138 | /* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL} | ||
139 | }, | ||
140 | { | ||
141 | /* REPLY */ | ||
142 | /* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */ | ||
143 | /* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */ | ||
144 | /* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA}, | ||
145 | /* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, | ||
146 | /* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA}, | ||
147 | /* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA}, | ||
148 | /* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA}, | ||
149 | /* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */ | ||
150 | /* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA}, | ||
151 | /* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL} | ||
152 | } | ||
153 | }; | ||
154 | |||
155 | static int sctp_pkt_to_tuple(const struct sk_buff *skb, | ||
156 | unsigned int dataoff, | ||
157 | struct nf_conntrack_tuple *tuple) | ||
158 | { | ||
159 | sctp_sctphdr_t _hdr, *hp; | ||
160 | |||
161 | DEBUGP(__FUNCTION__); | ||
162 | DEBUGP("\n"); | ||
163 | |||
164 | /* Actually only need first 8 bytes. */ | ||
165 | hp = skb_header_pointer(skb, dataoff, 8, &_hdr); | ||
166 | if (hp == NULL) | ||
167 | return 0; | ||
168 | |||
169 | tuple->src.u.sctp.port = hp->source; | ||
170 | tuple->dst.u.sctp.port = hp->dest; | ||
171 | return 1; | ||
172 | } | ||
173 | |||
174 | static int sctp_invert_tuple(struct nf_conntrack_tuple *tuple, | ||
175 | const struct nf_conntrack_tuple *orig) | ||
176 | { | ||
177 | DEBUGP(__FUNCTION__); | ||
178 | DEBUGP("\n"); | ||
179 | |||
180 | tuple->src.u.sctp.port = orig->dst.u.sctp.port; | ||
181 | tuple->dst.u.sctp.port = orig->src.u.sctp.port; | ||
182 | return 1; | ||
183 | } | ||
184 | |||
185 | /* Print out the per-protocol part of the tuple. */ | ||
186 | static int sctp_print_tuple(struct seq_file *s, | ||
187 | const struct nf_conntrack_tuple *tuple) | ||
188 | { | ||
189 | DEBUGP(__FUNCTION__); | ||
190 | DEBUGP("\n"); | ||
191 | |||
192 | return seq_printf(s, "sport=%hu dport=%hu ", | ||
193 | ntohs(tuple->src.u.sctp.port), | ||
194 | ntohs(tuple->dst.u.sctp.port)); | ||
195 | } | ||
196 | |||
197 | /* Print out the private part of the conntrack. */ | ||
198 | static int sctp_print_conntrack(struct seq_file *s, | ||
199 | const struct nf_conn *conntrack) | ||
200 | { | ||
201 | enum sctp_conntrack state; | ||
202 | |||
203 | DEBUGP(__FUNCTION__); | ||
204 | DEBUGP("\n"); | ||
205 | |||
206 | read_lock_bh(&sctp_lock); | ||
207 | state = conntrack->proto.sctp.state; | ||
208 | read_unlock_bh(&sctp_lock); | ||
209 | |||
210 | return seq_printf(s, "%s ", sctp_conntrack_names[state]); | ||
211 | } | ||
212 | |||
213 | #define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \ | ||
214 | for (offset = dataoff + sizeof(sctp_sctphdr_t), count = 0; \ | ||
215 | offset < skb->len && \ | ||
216 | (sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch)); \ | ||
217 | offset += (htons(sch->length) + 3) & ~3, count++) | ||
218 | |||
219 | /* Some validity checks to make sure the chunks are fine */ | ||
220 | static int do_basic_checks(struct nf_conn *conntrack, | ||
221 | const struct sk_buff *skb, | ||
222 | unsigned int dataoff, | ||
223 | char *map) | ||
224 | { | ||
225 | u_int32_t offset, count; | ||
226 | sctp_chunkhdr_t _sch, *sch; | ||
227 | int flag; | ||
228 | |||
229 | DEBUGP(__FUNCTION__); | ||
230 | DEBUGP("\n"); | ||
231 | |||
232 | flag = 0; | ||
233 | |||
234 | for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { | ||
235 | DEBUGP("Chunk Num: %d Type: %d\n", count, sch->type); | ||
236 | |||
237 | if (sch->type == SCTP_CID_INIT | ||
238 | || sch->type == SCTP_CID_INIT_ACK | ||
239 | || sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { | ||
240 | flag = 1; | ||
241 | } | ||
242 | |||
243 | /* Cookie Ack/Echo chunks not the first OR | ||
244 | Init / Init Ack / Shutdown compl chunks not the only chunks */ | ||
245 | if ((sch->type == SCTP_CID_COOKIE_ACK | ||
246 | || sch->type == SCTP_CID_COOKIE_ECHO | ||
247 | || flag) | ||
248 | && count !=0 ) { | ||
249 | DEBUGP("Basic checks failed\n"); | ||
250 | return 1; | ||
251 | } | ||
252 | |||
253 | if (map) { | ||
254 | set_bit(sch->type, (void *)map); | ||
255 | } | ||
256 | } | ||
257 | |||
258 | DEBUGP("Basic checks passed\n"); | ||
259 | return 0; | ||
260 | } | ||
261 | |||
262 | static int new_state(enum ip_conntrack_dir dir, | ||
263 | enum sctp_conntrack cur_state, | ||
264 | int chunk_type) | ||
265 | { | ||
266 | int i; | ||
267 | |||
268 | DEBUGP(__FUNCTION__); | ||
269 | DEBUGP("\n"); | ||
270 | |||
271 | DEBUGP("Chunk type: %d\n", chunk_type); | ||
272 | |||
273 | switch (chunk_type) { | ||
274 | case SCTP_CID_INIT: | ||
275 | DEBUGP("SCTP_CID_INIT\n"); | ||
276 | i = 0; break; | ||
277 | case SCTP_CID_INIT_ACK: | ||
278 | DEBUGP("SCTP_CID_INIT_ACK\n"); | ||
279 | i = 1; break; | ||
280 | case SCTP_CID_ABORT: | ||
281 | DEBUGP("SCTP_CID_ABORT\n"); | ||
282 | i = 2; break; | ||
283 | case SCTP_CID_SHUTDOWN: | ||
284 | DEBUGP("SCTP_CID_SHUTDOWN\n"); | ||
285 | i = 3; break; | ||
286 | case SCTP_CID_SHUTDOWN_ACK: | ||
287 | DEBUGP("SCTP_CID_SHUTDOWN_ACK\n"); | ||
288 | i = 4; break; | ||
289 | case SCTP_CID_ERROR: | ||
290 | DEBUGP("SCTP_CID_ERROR\n"); | ||
291 | i = 5; break; | ||
292 | case SCTP_CID_COOKIE_ECHO: | ||
293 | DEBUGP("SCTP_CID_COOKIE_ECHO\n"); | ||
294 | i = 6; break; | ||
295 | case SCTP_CID_COOKIE_ACK: | ||
296 | DEBUGP("SCTP_CID_COOKIE_ACK\n"); | ||
297 | i = 7; break; | ||
298 | case SCTP_CID_SHUTDOWN_COMPLETE: | ||
299 | DEBUGP("SCTP_CID_SHUTDOWN_COMPLETE\n"); | ||
300 | i = 8; break; | ||
301 | default: | ||
302 | /* Other chunks like DATA, SACK, HEARTBEAT and | ||
303 | its ACK do not cause a change in state */ | ||
304 | DEBUGP("Unknown chunk type, Will stay in %s\n", | ||
305 | sctp_conntrack_names[cur_state]); | ||
306 | return cur_state; | ||
307 | } | ||
308 | |||
309 | DEBUGP("dir: %d cur_state: %s chunk_type: %d new_state: %s\n", | ||
310 | dir, sctp_conntrack_names[cur_state], chunk_type, | ||
311 | sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]); | ||
312 | |||
313 | return sctp_conntracks[dir][i][cur_state]; | ||
314 | } | ||
315 | |||
316 | /* Returns verdict for packet, or -1 for invalid. */ | ||
317 | static int sctp_packet(struct nf_conn *conntrack, | ||
318 | const struct sk_buff *skb, | ||
319 | unsigned int dataoff, | ||
320 | enum ip_conntrack_info ctinfo, | ||
321 | int pf, | ||
322 | unsigned int hooknum) | ||
323 | { | ||
324 | enum sctp_conntrack newconntrack, oldsctpstate; | ||
325 | sctp_sctphdr_t _sctph, *sh; | ||
326 | sctp_chunkhdr_t _sch, *sch; | ||
327 | u_int32_t offset, count; | ||
328 | char map[256 / sizeof (char)] = {0}; | ||
329 | |||
330 | DEBUGP(__FUNCTION__); | ||
331 | DEBUGP("\n"); | ||
332 | |||
333 | sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); | ||
334 | if (sh == NULL) | ||
335 | return -1; | ||
336 | |||
337 | if (do_basic_checks(conntrack, skb, dataoff, map) != 0) | ||
338 | return -1; | ||
339 | |||
340 | /* Check the verification tag (Sec 8.5) */ | ||
341 | if (!test_bit(SCTP_CID_INIT, (void *)map) | ||
342 | && !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map) | ||
343 | && !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map) | ||
344 | && !test_bit(SCTP_CID_ABORT, (void *)map) | ||
345 | && !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map) | ||
346 | && (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) { | ||
347 | DEBUGP("Verification tag check failed\n"); | ||
348 | return -1; | ||
349 | } | ||
350 | |||
351 | oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX; | ||
352 | for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { | ||
353 | write_lock_bh(&sctp_lock); | ||
354 | |||
355 | /* Special cases of Verification tag check (Sec 8.5.1) */ | ||
356 | if (sch->type == SCTP_CID_INIT) { | ||
357 | /* Sec 8.5.1 (A) */ | ||
358 | if (sh->vtag != 0) { | ||
359 | write_unlock_bh(&sctp_lock); | ||
360 | return -1; | ||
361 | } | ||
362 | } else if (sch->type == SCTP_CID_ABORT) { | ||
363 | /* Sec 8.5.1 (B) */ | ||
364 | if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) | ||
365 | && !(sh->vtag == conntrack->proto.sctp.vtag | ||
366 | [1 - CTINFO2DIR(ctinfo)])) { | ||
367 | write_unlock_bh(&sctp_lock); | ||
368 | return -1; | ||
369 | } | ||
370 | } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { | ||
371 | /* Sec 8.5.1 (C) */ | ||
372 | if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) | ||
373 | && !(sh->vtag == conntrack->proto.sctp.vtag | ||
374 | [1 - CTINFO2DIR(ctinfo)] | ||
375 | && (sch->flags & 1))) { | ||
376 | write_unlock_bh(&sctp_lock); | ||
377 | return -1; | ||
378 | } | ||
379 | } else if (sch->type == SCTP_CID_COOKIE_ECHO) { | ||
380 | /* Sec 8.5.1 (D) */ | ||
381 | if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) { | ||
382 | write_unlock_bh(&sctp_lock); | ||
383 | return -1; | ||
384 | } | ||
385 | } | ||
386 | |||
387 | oldsctpstate = conntrack->proto.sctp.state; | ||
388 | newconntrack = new_state(CTINFO2DIR(ctinfo), oldsctpstate, sch->type); | ||
389 | |||
390 | /* Invalid */ | ||
391 | if (newconntrack == SCTP_CONNTRACK_MAX) { | ||
392 | DEBUGP("nf_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n", | ||
393 | CTINFO2DIR(ctinfo), sch->type, oldsctpstate); | ||
394 | write_unlock_bh(&sctp_lock); | ||
395 | return -1; | ||
396 | } | ||
397 | |||
398 | /* If it is an INIT or an INIT ACK note down the vtag */ | ||
399 | if (sch->type == SCTP_CID_INIT | ||
400 | || sch->type == SCTP_CID_INIT_ACK) { | ||
401 | sctp_inithdr_t _inithdr, *ih; | ||
402 | |||
403 | ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t), | ||
404 | sizeof(_inithdr), &_inithdr); | ||
405 | if (ih == NULL) { | ||
406 | write_unlock_bh(&sctp_lock); | ||
407 | return -1; | ||
408 | } | ||
409 | DEBUGP("Setting vtag %x for dir %d\n", | ||
410 | ih->init_tag, !CTINFO2DIR(ctinfo)); | ||
411 | conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag; | ||
412 | } | ||
413 | |||
414 | conntrack->proto.sctp.state = newconntrack; | ||
415 | if (oldsctpstate != newconntrack) | ||
416 | nf_conntrack_event_cache(IPCT_PROTOINFO, skb); | ||
417 | write_unlock_bh(&sctp_lock); | ||
418 | } | ||
419 | |||
420 | nf_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]); | ||
421 | |||
422 | if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED | ||
423 | && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY | ||
424 | && newconntrack == SCTP_CONNTRACK_ESTABLISHED) { | ||
425 | DEBUGP("Setting assured bit\n"); | ||
426 | set_bit(IPS_ASSURED_BIT, &conntrack->status); | ||
427 | nf_conntrack_event_cache(IPCT_STATUS, skb); | ||
428 | } | ||
429 | |||
430 | return NF_ACCEPT; | ||
431 | } | ||
432 | |||
433 | /* Called when a new connection for this protocol found. */ | ||
434 | static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb, | ||
435 | unsigned int dataoff) | ||
436 | { | ||
437 | enum sctp_conntrack newconntrack; | ||
438 | sctp_sctphdr_t _sctph, *sh; | ||
439 | sctp_chunkhdr_t _sch, *sch; | ||
440 | u_int32_t offset, count; | ||
441 | char map[256 / sizeof (char)] = {0}; | ||
442 | |||
443 | DEBUGP(__FUNCTION__); | ||
444 | DEBUGP("\n"); | ||
445 | |||
446 | sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); | ||
447 | if (sh == NULL) | ||
448 | return 0; | ||
449 | |||
450 | if (do_basic_checks(conntrack, skb, dataoff, map) != 0) | ||
451 | return 0; | ||
452 | |||
453 | /* If an OOTB packet has any of these chunks discard (Sec 8.4) */ | ||
454 | if ((test_bit (SCTP_CID_ABORT, (void *)map)) | ||
455 | || (test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map)) | ||
456 | || (test_bit (SCTP_CID_COOKIE_ACK, (void *)map))) { | ||
457 | return 0; | ||
458 | } | ||
459 | |||
460 | newconntrack = SCTP_CONNTRACK_MAX; | ||
461 | for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { | ||
462 | /* Don't need lock here: this conntrack not in circulation yet */ | ||
463 | newconntrack = new_state(IP_CT_DIR_ORIGINAL, | ||
464 | SCTP_CONNTRACK_NONE, sch->type); | ||
465 | |||
466 | /* Invalid: delete conntrack */ | ||
467 | if (newconntrack == SCTP_CONNTRACK_MAX) { | ||
468 | DEBUGP("nf_conntrack_sctp: invalid new deleting.\n"); | ||
469 | return 0; | ||
470 | } | ||
471 | |||
472 | /* Copy the vtag into the state info */ | ||
473 | if (sch->type == SCTP_CID_INIT) { | ||
474 | if (sh->vtag == 0) { | ||
475 | sctp_inithdr_t _inithdr, *ih; | ||
476 | |||
477 | ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t), | ||
478 | sizeof(_inithdr), &_inithdr); | ||
479 | if (ih == NULL) | ||
480 | return 0; | ||
481 | |||
482 | DEBUGP("Setting vtag %x for new conn\n", | ||
483 | ih->init_tag); | ||
484 | |||
485 | conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = | ||
486 | ih->init_tag; | ||
487 | } else { | ||
488 | /* Sec 8.5.1 (A) */ | ||
489 | return 0; | ||
490 | } | ||
491 | } | ||
492 | /* If it is a shutdown ack OOTB packet, we expect a return | ||
493 | shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ | ||
494 | else { | ||
495 | DEBUGP("Setting vtag %x for new conn OOTB\n", | ||
496 | sh->vtag); | ||
497 | conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag; | ||
498 | } | ||
499 | |||
500 | conntrack->proto.sctp.state = newconntrack; | ||
501 | } | ||
502 | |||
503 | return 1; | ||
504 | } | ||
505 | |||
506 | struct nf_conntrack_protocol nf_conntrack_protocol_sctp4 = { | ||
507 | .l3proto = PF_INET, | ||
508 | .proto = IPPROTO_SCTP, | ||
509 | .name = "sctp", | ||
510 | .pkt_to_tuple = sctp_pkt_to_tuple, | ||
511 | .invert_tuple = sctp_invert_tuple, | ||
512 | .print_tuple = sctp_print_tuple, | ||
513 | .print_conntrack = sctp_print_conntrack, | ||
514 | .packet = sctp_packet, | ||
515 | .new = sctp_new, | ||
516 | .destroy = NULL, | ||
517 | .me = THIS_MODULE | ||
518 | }; | ||
519 | |||
520 | struct nf_conntrack_protocol nf_conntrack_protocol_sctp6 = { | ||
521 | .l3proto = PF_INET6, | ||
522 | .proto = IPPROTO_SCTP, | ||
523 | .name = "sctp", | ||
524 | .pkt_to_tuple = sctp_pkt_to_tuple, | ||
525 | .invert_tuple = sctp_invert_tuple, | ||
526 | .print_tuple = sctp_print_tuple, | ||
527 | .print_conntrack = sctp_print_conntrack, | ||
528 | .packet = sctp_packet, | ||
529 | .new = sctp_new, | ||
530 | .destroy = NULL, | ||
531 | .me = THIS_MODULE | ||
532 | }; | ||
533 | |||
534 | #ifdef CONFIG_SYSCTL | ||
535 | static ctl_table nf_ct_sysctl_table[] = { | ||
536 | { | ||
537 | .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED, | ||
538 | .procname = "nf_conntrack_sctp_timeout_closed", | ||
539 | .data = &nf_ct_sctp_timeout_closed, | ||
540 | .maxlen = sizeof(unsigned int), | ||
541 | .mode = 0644, | ||
542 | .proc_handler = &proc_dointvec_jiffies, | ||
543 | }, | ||
544 | { | ||
545 | .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT, | ||
546 | .procname = "nf_conntrack_sctp_timeout_cookie_wait", | ||
547 | .data = &nf_ct_sctp_timeout_cookie_wait, | ||
548 | .maxlen = sizeof(unsigned int), | ||
549 | .mode = 0644, | ||
550 | .proc_handler = &proc_dointvec_jiffies, | ||
551 | }, | ||
552 | { | ||
553 | .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED, | ||
554 | .procname = "nf_conntrack_sctp_timeout_cookie_echoed", | ||
555 | .data = &nf_ct_sctp_timeout_cookie_echoed, | ||
556 | .maxlen = sizeof(unsigned int), | ||
557 | .mode = 0644, | ||
558 | .proc_handler = &proc_dointvec_jiffies, | ||
559 | }, | ||
560 | { | ||
561 | .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED, | ||
562 | .procname = "nf_conntrack_sctp_timeout_established", | ||
563 | .data = &nf_ct_sctp_timeout_established, | ||
564 | .maxlen = sizeof(unsigned int), | ||
565 | .mode = 0644, | ||
566 | .proc_handler = &proc_dointvec_jiffies, | ||
567 | }, | ||
568 | { | ||
569 | .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT, | ||
570 | .procname = "nf_conntrack_sctp_timeout_shutdown_sent", | ||
571 | .data = &nf_ct_sctp_timeout_shutdown_sent, | ||
572 | .maxlen = sizeof(unsigned int), | ||
573 | .mode = 0644, | ||
574 | .proc_handler = &proc_dointvec_jiffies, | ||
575 | }, | ||
576 | { | ||
577 | .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD, | ||
578 | .procname = "nf_conntrack_sctp_timeout_shutdown_recd", | ||
579 | .data = &nf_ct_sctp_timeout_shutdown_recd, | ||
580 | .maxlen = sizeof(unsigned int), | ||
581 | .mode = 0644, | ||
582 | .proc_handler = &proc_dointvec_jiffies, | ||
583 | }, | ||
584 | { | ||
585 | .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT, | ||
586 | .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent", | ||
587 | .data = &nf_ct_sctp_timeout_shutdown_ack_sent, | ||
588 | .maxlen = sizeof(unsigned int), | ||
589 | .mode = 0644, | ||
590 | .proc_handler = &proc_dointvec_jiffies, | ||
591 | }, | ||
592 | { .ctl_name = 0 } | ||
593 | }; | ||
594 | |||
595 | static ctl_table nf_ct_netfilter_table[] = { | ||
596 | { | ||
597 | .ctl_name = NET_NETFILTER, | ||
598 | .procname = "netfilter", | ||
599 | .mode = 0555, | ||
600 | .child = nf_ct_sysctl_table, | ||
601 | }, | ||
602 | { .ctl_name = 0 } | ||
603 | }; | ||
604 | |||
605 | static ctl_table nf_ct_net_table[] = { | ||
606 | { | ||
607 | .ctl_name = CTL_NET, | ||
608 | .procname = "net", | ||
609 | .mode = 0555, | ||
610 | .child = nf_ct_netfilter_table, | ||
611 | }, | ||
612 | { .ctl_name = 0 } | ||
613 | }; | ||
614 | |||
615 | static struct ctl_table_header *nf_ct_sysctl_header; | ||
616 | #endif | ||
617 | |||
618 | int __init init(void) | ||
619 | { | ||
620 | int ret; | ||
621 | |||
622 | ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_sctp4); | ||
623 | if (ret) { | ||
624 | printk("nf_conntrack_proto_sctp4: protocol register failed\n"); | ||
625 | goto out; | ||
626 | } | ||
627 | ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_sctp6); | ||
628 | if (ret) { | ||
629 | printk("nf_conntrack_proto_sctp6: protocol register failed\n"); | ||
630 | goto cleanup_sctp4; | ||
631 | } | ||
632 | |||
633 | #ifdef CONFIG_SYSCTL | ||
634 | nf_ct_sysctl_header = register_sysctl_table(nf_ct_net_table, 0); | ||
635 | if (nf_ct_sysctl_header == NULL) { | ||
636 | printk("nf_conntrack_proto_sctp: can't register to sysctl.\n"); | ||
637 | goto cleanup; | ||
638 | } | ||
639 | #endif | ||
640 | |||
641 | return ret; | ||
642 | |||
643 | #ifdef CONFIG_SYSCTL | ||
644 | cleanup: | ||
645 | nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp6); | ||
646 | #endif | ||
647 | cleanup_sctp4: | ||
648 | nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp4); | ||
649 | out: | ||
650 | DEBUGP("SCTP conntrack module loading %s\n", | ||
651 | ret ? "failed": "succeeded"); | ||
652 | return ret; | ||
653 | } | ||
654 | |||
655 | void __exit fini(void) | ||
656 | { | ||
657 | nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp6); | ||
658 | nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp4); | ||
659 | #ifdef CONFIG_SYSCTL | ||
660 | unregister_sysctl_table(nf_ct_sysctl_header); | ||
661 | #endif | ||
662 | DEBUGP("SCTP conntrack module unloaded\n"); | ||
663 | } | ||
664 | |||
665 | module_init(init); | ||
666 | module_exit(fini); | ||
667 | |||
668 | MODULE_LICENSE("GPL"); | ||
669 | MODULE_AUTHOR("Kiran Kumar Immidi"); | ||
670 | MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP"); | ||
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c new file mode 100644 index 000000000000..83d90dd624f0 --- /dev/null +++ b/net/netfilter/nf_conntrack_proto_tcp.c | |||
@@ -0,0 +1,1162 @@ | |||
1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>: | ||
9 | * - Real stateful connection tracking | ||
10 | * - Modified state transitions table | ||
11 | * - Window scaling support added | ||
12 | * - SACK support added | ||
13 | * | ||
14 | * Willy Tarreau: | ||
15 | * - State table bugfixes | ||
16 | * - More robust state changes | ||
17 | * - Tuning timer parameters | ||
18 | * | ||
19 | * 27 Oct 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
20 | * - genelized Layer 3 protocol part. | ||
21 | * | ||
22 | * Derived from net/ipv4/netfilter/ip_conntrack_proto_tcp.c | ||
23 | * | ||
24 | * version 2.2 | ||
25 | */ | ||
26 | |||
27 | #include <linux/config.h> | ||
28 | #include <linux/types.h> | ||
29 | #include <linux/sched.h> | ||
30 | #include <linux/timer.h> | ||
31 | #include <linux/netfilter.h> | ||
32 | #include <linux/module.h> | ||
33 | #include <linux/in.h> | ||
34 | #include <linux/tcp.h> | ||
35 | #include <linux/spinlock.h> | ||
36 | #include <linux/skbuff.h> | ||
37 | #include <linux/ipv6.h> | ||
38 | #include <net/ip6_checksum.h> | ||
39 | |||
40 | #include <net/tcp.h> | ||
41 | |||
42 | #include <linux/netfilter.h> | ||
43 | #include <linux/netfilter_ipv4.h> | ||
44 | #include <linux/netfilter_ipv6.h> | ||
45 | #include <net/netfilter/nf_conntrack.h> | ||
46 | #include <net/netfilter/nf_conntrack_protocol.h> | ||
47 | |||
48 | #if 0 | ||
49 | #define DEBUGP printk | ||
50 | #define DEBUGP_VARS | ||
51 | #else | ||
52 | #define DEBUGP(format, args...) | ||
53 | #endif | ||
54 | |||
55 | /* Protects conntrack->proto.tcp */ | ||
56 | static DEFINE_RWLOCK(tcp_lock); | ||
57 | |||
58 | /* "Be conservative in what you do, | ||
59 | be liberal in what you accept from others." | ||
60 | If it's non-zero, we mark only out of window RST segments as INVALID. */ | ||
61 | int nf_ct_tcp_be_liberal = 0; | ||
62 | |||
63 | /* When connection is picked up from the middle, how many packets are required | ||
64 | to pass in each direction when we assume we are in sync - if any side uses | ||
65 | window scaling, we lost the game. | ||
66 | If it is set to zero, we disable picking up already established | ||
67 | connections. */ | ||
68 | int nf_ct_tcp_loose = 3; | ||
69 | |||
70 | /* Max number of the retransmitted packets without receiving an (acceptable) | ||
71 | ACK from the destination. If this number is reached, a shorter timer | ||
72 | will be started. */ | ||
73 | int nf_ct_tcp_max_retrans = 3; | ||
74 | |||
75 | /* FIXME: Examine ipfilter's timeouts and conntrack transitions more | ||
76 | closely. They're more complex. --RR */ | ||
77 | |||
78 | static const char *tcp_conntrack_names[] = { | ||
79 | "NONE", | ||
80 | "SYN_SENT", | ||
81 | "SYN_RECV", | ||
82 | "ESTABLISHED", | ||
83 | "FIN_WAIT", | ||
84 | "CLOSE_WAIT", | ||
85 | "LAST_ACK", | ||
86 | "TIME_WAIT", | ||
87 | "CLOSE", | ||
88 | "LISTEN" | ||
89 | }; | ||
90 | |||
91 | #define SECS * HZ | ||
92 | #define MINS * 60 SECS | ||
93 | #define HOURS * 60 MINS | ||
94 | #define DAYS * 24 HOURS | ||
95 | |||
96 | unsigned long nf_ct_tcp_timeout_syn_sent = 2 MINS; | ||
97 | unsigned long nf_ct_tcp_timeout_syn_recv = 60 SECS; | ||
98 | unsigned long nf_ct_tcp_timeout_established = 5 DAYS; | ||
99 | unsigned long nf_ct_tcp_timeout_fin_wait = 2 MINS; | ||
100 | unsigned long nf_ct_tcp_timeout_close_wait = 60 SECS; | ||
101 | unsigned long nf_ct_tcp_timeout_last_ack = 30 SECS; | ||
102 | unsigned long nf_ct_tcp_timeout_time_wait = 2 MINS; | ||
103 | unsigned long nf_ct_tcp_timeout_close = 10 SECS; | ||
104 | |||
105 | /* RFC1122 says the R2 limit should be at least 100 seconds. | ||
106 | Linux uses 15 packets as limit, which corresponds | ||
107 | to ~13-30min depending on RTO. */ | ||
108 | unsigned long nf_ct_tcp_timeout_max_retrans = 5 MINS; | ||
109 | |||
110 | static unsigned long * tcp_timeouts[] | ||
111 | = { NULL, /* TCP_CONNTRACK_NONE */ | ||
112 | &nf_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */ | ||
113 | &nf_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */ | ||
114 | &nf_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */ | ||
115 | &nf_ct_tcp_timeout_fin_wait, /* TCP_CONNTRACK_FIN_WAIT, */ | ||
116 | &nf_ct_tcp_timeout_close_wait, /* TCP_CONNTRACK_CLOSE_WAIT, */ | ||
117 | &nf_ct_tcp_timeout_last_ack, /* TCP_CONNTRACK_LAST_ACK, */ | ||
118 | &nf_ct_tcp_timeout_time_wait, /* TCP_CONNTRACK_TIME_WAIT, */ | ||
119 | &nf_ct_tcp_timeout_close, /* TCP_CONNTRACK_CLOSE, */ | ||
120 | NULL, /* TCP_CONNTRACK_LISTEN */ | ||
121 | }; | ||
122 | |||
123 | #define sNO TCP_CONNTRACK_NONE | ||
124 | #define sSS TCP_CONNTRACK_SYN_SENT | ||
125 | #define sSR TCP_CONNTRACK_SYN_RECV | ||
126 | #define sES TCP_CONNTRACK_ESTABLISHED | ||
127 | #define sFW TCP_CONNTRACK_FIN_WAIT | ||
128 | #define sCW TCP_CONNTRACK_CLOSE_WAIT | ||
129 | #define sLA TCP_CONNTRACK_LAST_ACK | ||
130 | #define sTW TCP_CONNTRACK_TIME_WAIT | ||
131 | #define sCL TCP_CONNTRACK_CLOSE | ||
132 | #define sLI TCP_CONNTRACK_LISTEN | ||
133 | #define sIV TCP_CONNTRACK_MAX | ||
134 | #define sIG TCP_CONNTRACK_IGNORE | ||
135 | |||
136 | /* What TCP flags are set from RST/SYN/FIN/ACK. */ | ||
137 | enum tcp_bit_set { | ||
138 | TCP_SYN_SET, | ||
139 | TCP_SYNACK_SET, | ||
140 | TCP_FIN_SET, | ||
141 | TCP_ACK_SET, | ||
142 | TCP_RST_SET, | ||
143 | TCP_NONE_SET, | ||
144 | }; | ||
145 | |||
146 | /* | ||
147 | * The TCP state transition table needs a few words... | ||
148 | * | ||
149 | * We are the man in the middle. All the packets go through us | ||
150 | * but might get lost in transit to the destination. | ||
151 | * It is assumed that the destinations can't receive segments | ||
152 | * we haven't seen. | ||
153 | * | ||
154 | * The checked segment is in window, but our windows are *not* | ||
155 | * equivalent with the ones of the sender/receiver. We always | ||
156 | * try to guess the state of the current sender. | ||
157 | * | ||
158 | * The meaning of the states are: | ||
159 | * | ||
160 | * NONE: initial state | ||
161 | * SYN_SENT: SYN-only packet seen | ||
162 | * SYN_RECV: SYN-ACK packet seen | ||
163 | * ESTABLISHED: ACK packet seen | ||
164 | * FIN_WAIT: FIN packet seen | ||
165 | * CLOSE_WAIT: ACK seen (after FIN) | ||
166 | * LAST_ACK: FIN seen (after FIN) | ||
167 | * TIME_WAIT: last ACK seen | ||
168 | * CLOSE: closed connection | ||
169 | * | ||
170 | * LISTEN state is not used. | ||
171 | * | ||
172 | * Packets marked as IGNORED (sIG): | ||
173 | * if they may be either invalid or valid | ||
174 | * and the receiver may send back a connection | ||
175 | * closing RST or a SYN/ACK. | ||
176 | * | ||
177 | * Packets marked as INVALID (sIV): | ||
178 | * if they are invalid | ||
179 | * or we do not support the request (simultaneous open) | ||
180 | */ | ||
181 | static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { | ||
182 | { | ||
183 | /* ORIGINAL */ | ||
184 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
185 | /*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV }, | ||
186 | /* | ||
187 | * sNO -> sSS Initialize a new connection | ||
188 | * sSS -> sSS Retransmitted SYN | ||
189 | * sSR -> sIG Late retransmitted SYN? | ||
190 | * sES -> sIG Error: SYNs in window outside the SYN_SENT state | ||
191 | * are errors. Receiver will reply with RST | ||
192 | * and close the connection. | ||
193 | * Or we are not in sync and hold a dead connection. | ||
194 | * sFW -> sIG | ||
195 | * sCW -> sIG | ||
196 | * sLA -> sIG | ||
197 | * sTW -> sSS Reopened connection (RFC 1122). | ||
198 | * sCL -> sSS | ||
199 | */ | ||
200 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
201 | /*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, | ||
202 | /* | ||
203 | * A SYN/ACK from the client is always invalid: | ||
204 | * - either it tries to set up a simultaneous open, which is | ||
205 | * not supported; | ||
206 | * - or the firewall has just been inserted between the two hosts | ||
207 | * during the session set-up. The SYN will be retransmitted | ||
208 | * by the true client (or it'll time out). | ||
209 | */ | ||
210 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
211 | /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, | ||
212 | /* | ||
213 | * sNO -> sIV Too late and no reason to do anything... | ||
214 | * sSS -> sIV Client migth not send FIN in this state: | ||
215 | * we enforce waiting for a SYN/ACK reply first. | ||
216 | * sSR -> sFW Close started. | ||
217 | * sES -> sFW | ||
218 | * sFW -> sLA FIN seen in both directions, waiting for | ||
219 | * the last ACK. | ||
220 | * Migth be a retransmitted FIN as well... | ||
221 | * sCW -> sLA | ||
222 | * sLA -> sLA Retransmitted FIN. Remain in the same state. | ||
223 | * sTW -> sTW | ||
224 | * sCL -> sCL | ||
225 | */ | ||
226 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
227 | /*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV }, | ||
228 | /* | ||
229 | * sNO -> sES Assumed. | ||
230 | * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet. | ||
231 | * sSR -> sES Established state is reached. | ||
232 | * sES -> sES :-) | ||
233 | * sFW -> sCW Normal close request answered by ACK. | ||
234 | * sCW -> sCW | ||
235 | * sLA -> sTW Last ACK detected. | ||
236 | * sTW -> sTW Retransmitted last ACK. Remain in the same state. | ||
237 | * sCL -> sCL | ||
238 | */ | ||
239 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
240 | /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, | ||
241 | /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } | ||
242 | }, | ||
243 | { | ||
244 | /* REPLY */ | ||
245 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
246 | /*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, | ||
247 | /* | ||
248 | * sNO -> sIV Never reached. | ||
249 | * sSS -> sIV Simultaneous open, not supported | ||
250 | * sSR -> sIV Simultaneous open, not supported. | ||
251 | * sES -> sIV Server may not initiate a connection. | ||
252 | * sFW -> sIV | ||
253 | * sCW -> sIV | ||
254 | * sLA -> sIV | ||
255 | * sTW -> sIV Reopened connection, but server may not do it. | ||
256 | * sCL -> sIV | ||
257 | */ | ||
258 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
259 | /*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV }, | ||
260 | /* | ||
261 | * sSS -> sSR Standard open. | ||
262 | * sSR -> sSR Retransmitted SYN/ACK. | ||
263 | * sES -> sIG Late retransmitted SYN/ACK? | ||
264 | * sFW -> sIG Might be SYN/ACK answering ignored SYN | ||
265 | * sCW -> sIG | ||
266 | * sLA -> sIG | ||
267 | * sTW -> sIG | ||
268 | * sCL -> sIG | ||
269 | */ | ||
270 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
271 | /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, | ||
272 | /* | ||
273 | * sSS -> sIV Server might not send FIN in this state. | ||
274 | * sSR -> sFW Close started. | ||
275 | * sES -> sFW | ||
276 | * sFW -> sLA FIN seen in both directions. | ||
277 | * sCW -> sLA | ||
278 | * sLA -> sLA Retransmitted FIN. | ||
279 | * sTW -> sTW | ||
280 | * sCL -> sCL | ||
281 | */ | ||
282 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
283 | /*ack*/ { sIV, sIV, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV }, | ||
284 | /* | ||
285 | * sSS -> sIV Might be a half-open connection. | ||
286 | * sSR -> sSR Might answer late resent SYN. | ||
287 | * sES -> sES :-) | ||
288 | * sFW -> sCW Normal close request answered by ACK. | ||
289 | * sCW -> sCW | ||
290 | * sLA -> sTW Last ACK detected. | ||
291 | * sTW -> sTW Retransmitted last ACK. | ||
292 | * sCL -> sCL | ||
293 | */ | ||
294 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
295 | /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, | ||
296 | /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } | ||
297 | } | ||
298 | }; | ||
299 | |||
300 | static int tcp_pkt_to_tuple(const struct sk_buff *skb, | ||
301 | unsigned int dataoff, | ||
302 | struct nf_conntrack_tuple *tuple) | ||
303 | { | ||
304 | struct tcphdr _hdr, *hp; | ||
305 | |||
306 | /* Actually only need first 8 bytes. */ | ||
307 | hp = skb_header_pointer(skb, dataoff, 8, &_hdr); | ||
308 | if (hp == NULL) | ||
309 | return 0; | ||
310 | |||
311 | tuple->src.u.tcp.port = hp->source; | ||
312 | tuple->dst.u.tcp.port = hp->dest; | ||
313 | |||
314 | return 1; | ||
315 | } | ||
316 | |||
317 | static int tcp_invert_tuple(struct nf_conntrack_tuple *tuple, | ||
318 | const struct nf_conntrack_tuple *orig) | ||
319 | { | ||
320 | tuple->src.u.tcp.port = orig->dst.u.tcp.port; | ||
321 | tuple->dst.u.tcp.port = orig->src.u.tcp.port; | ||
322 | return 1; | ||
323 | } | ||
324 | |||
325 | /* Print out the per-protocol part of the tuple. */ | ||
326 | static int tcp_print_tuple(struct seq_file *s, | ||
327 | const struct nf_conntrack_tuple *tuple) | ||
328 | { | ||
329 | return seq_printf(s, "sport=%hu dport=%hu ", | ||
330 | ntohs(tuple->src.u.tcp.port), | ||
331 | ntohs(tuple->dst.u.tcp.port)); | ||
332 | } | ||
333 | |||
334 | /* Print out the private part of the conntrack. */ | ||
335 | static int tcp_print_conntrack(struct seq_file *s, | ||
336 | const struct nf_conn *conntrack) | ||
337 | { | ||
338 | enum tcp_conntrack state; | ||
339 | |||
340 | read_lock_bh(&tcp_lock); | ||
341 | state = conntrack->proto.tcp.state; | ||
342 | read_unlock_bh(&tcp_lock); | ||
343 | |||
344 | return seq_printf(s, "%s ", tcp_conntrack_names[state]); | ||
345 | } | ||
346 | |||
347 | static unsigned int get_conntrack_index(const struct tcphdr *tcph) | ||
348 | { | ||
349 | if (tcph->rst) return TCP_RST_SET; | ||
350 | else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET); | ||
351 | else if (tcph->fin) return TCP_FIN_SET; | ||
352 | else if (tcph->ack) return TCP_ACK_SET; | ||
353 | else return TCP_NONE_SET; | ||
354 | } | ||
355 | |||
356 | /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering | ||
357 | in IP Filter' by Guido van Rooij. | ||
358 | |||
359 | http://www.nluug.nl/events/sane2000/papers.html | ||
360 | http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz | ||
361 | |||
362 | The boundaries and the conditions are changed according to RFC793: | ||
363 | the packet must intersect the window (i.e. segments may be | ||
364 | after the right or before the left edge) and thus receivers may ACK | ||
365 | segments after the right edge of the window. | ||
366 | |||
367 | td_maxend = max(sack + max(win,1)) seen in reply packets | ||
368 | td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets | ||
369 | td_maxwin += seq + len - sender.td_maxend | ||
370 | if seq + len > sender.td_maxend | ||
371 | td_end = max(seq + len) seen in sent packets | ||
372 | |||
373 | I. Upper bound for valid data: seq <= sender.td_maxend | ||
374 | II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin | ||
375 | III. Upper bound for valid ack: sack <= receiver.td_end | ||
376 | IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW | ||
377 | |||
378 | where sack is the highest right edge of sack block found in the packet. | ||
379 | |||
380 | The upper bound limit for a valid ack is not ignored - | ||
381 | we doesn't have to deal with fragments. | ||
382 | */ | ||
383 | |||
384 | static inline __u32 segment_seq_plus_len(__u32 seq, | ||
385 | size_t len, | ||
386 | unsigned int dataoff, | ||
387 | struct tcphdr *tcph) | ||
388 | { | ||
389 | /* XXX Should I use payload length field in IP/IPv6 header ? | ||
390 | * - YK */ | ||
391 | return (seq + len - dataoff - tcph->doff*4 | ||
392 | + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0)); | ||
393 | } | ||
394 | |||
395 | /* Fixme: what about big packets? */ | ||
396 | #define MAXACKWINCONST 66000 | ||
397 | #define MAXACKWINDOW(sender) \ | ||
398 | ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \ | ||
399 | : MAXACKWINCONST) | ||
400 | |||
401 | /* | ||
402 | * Simplified tcp_parse_options routine from tcp_input.c | ||
403 | */ | ||
404 | static void tcp_options(const struct sk_buff *skb, | ||
405 | unsigned int dataoff, | ||
406 | struct tcphdr *tcph, | ||
407 | struct ip_ct_tcp_state *state) | ||
408 | { | ||
409 | unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; | ||
410 | unsigned char *ptr; | ||
411 | int length = (tcph->doff*4) - sizeof(struct tcphdr); | ||
412 | |||
413 | if (!length) | ||
414 | return; | ||
415 | |||
416 | ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr), | ||
417 | length, buff); | ||
418 | BUG_ON(ptr == NULL); | ||
419 | |||
420 | state->td_scale = | ||
421 | state->flags = 0; | ||
422 | |||
423 | while (length > 0) { | ||
424 | int opcode=*ptr++; | ||
425 | int opsize; | ||
426 | |||
427 | switch (opcode) { | ||
428 | case TCPOPT_EOL: | ||
429 | return; | ||
430 | case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ | ||
431 | length--; | ||
432 | continue; | ||
433 | default: | ||
434 | opsize=*ptr++; | ||
435 | if (opsize < 2) /* "silly options" */ | ||
436 | return; | ||
437 | if (opsize > length) | ||
438 | break; /* don't parse partial options */ | ||
439 | |||
440 | if (opcode == TCPOPT_SACK_PERM | ||
441 | && opsize == TCPOLEN_SACK_PERM) | ||
442 | state->flags |= IP_CT_TCP_FLAG_SACK_PERM; | ||
443 | else if (opcode == TCPOPT_WINDOW | ||
444 | && opsize == TCPOLEN_WINDOW) { | ||
445 | state->td_scale = *(u_int8_t *)ptr; | ||
446 | |||
447 | if (state->td_scale > 14) { | ||
448 | /* See RFC1323 */ | ||
449 | state->td_scale = 14; | ||
450 | } | ||
451 | state->flags |= | ||
452 | IP_CT_TCP_FLAG_WINDOW_SCALE; | ||
453 | } | ||
454 | ptr += opsize - 2; | ||
455 | length -= opsize; | ||
456 | } | ||
457 | } | ||
458 | } | ||
459 | |||
460 | static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, | ||
461 | struct tcphdr *tcph, __u32 *sack) | ||
462 | { | ||
463 | unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; | ||
464 | unsigned char *ptr; | ||
465 | int length = (tcph->doff*4) - sizeof(struct tcphdr); | ||
466 | __u32 tmp; | ||
467 | |||
468 | if (!length) | ||
469 | return; | ||
470 | |||
471 | ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr), | ||
472 | length, buff); | ||
473 | BUG_ON(ptr == NULL); | ||
474 | |||
475 | /* Fast path for timestamp-only option */ | ||
476 | if (length == TCPOLEN_TSTAMP_ALIGNED*4 | ||
477 | && *(__u32 *)ptr == | ||
478 | __constant_ntohl((TCPOPT_NOP << 24) | ||
479 | | (TCPOPT_NOP << 16) | ||
480 | | (TCPOPT_TIMESTAMP << 8) | ||
481 | | TCPOLEN_TIMESTAMP)) | ||
482 | return; | ||
483 | |||
484 | while (length > 0) { | ||
485 | int opcode = *ptr++; | ||
486 | int opsize, i; | ||
487 | |||
488 | switch (opcode) { | ||
489 | case TCPOPT_EOL: | ||
490 | return; | ||
491 | case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ | ||
492 | length--; | ||
493 | continue; | ||
494 | default: | ||
495 | opsize = *ptr++; | ||
496 | if (opsize < 2) /* "silly options" */ | ||
497 | return; | ||
498 | if (opsize > length) | ||
499 | break; /* don't parse partial options */ | ||
500 | |||
501 | if (opcode == TCPOPT_SACK | ||
502 | && opsize >= (TCPOLEN_SACK_BASE | ||
503 | + TCPOLEN_SACK_PERBLOCK) | ||
504 | && !((opsize - TCPOLEN_SACK_BASE) | ||
505 | % TCPOLEN_SACK_PERBLOCK)) { | ||
506 | for (i = 0; | ||
507 | i < (opsize - TCPOLEN_SACK_BASE); | ||
508 | i += TCPOLEN_SACK_PERBLOCK) { | ||
509 | memcpy(&tmp, (__u32 *)(ptr + i) + 1, | ||
510 | sizeof(__u32)); | ||
511 | tmp = ntohl(tmp); | ||
512 | |||
513 | if (after(tmp, *sack)) | ||
514 | *sack = tmp; | ||
515 | } | ||
516 | return; | ||
517 | } | ||
518 | ptr += opsize - 2; | ||
519 | length -= opsize; | ||
520 | } | ||
521 | } | ||
522 | } | ||
523 | |||
524 | static int tcp_in_window(struct ip_ct_tcp *state, | ||
525 | enum ip_conntrack_dir dir, | ||
526 | unsigned int index, | ||
527 | const struct sk_buff *skb, | ||
528 | unsigned int dataoff, | ||
529 | struct tcphdr *tcph, | ||
530 | int pf) | ||
531 | { | ||
532 | struct ip_ct_tcp_state *sender = &state->seen[dir]; | ||
533 | struct ip_ct_tcp_state *receiver = &state->seen[!dir]; | ||
534 | __u32 seq, ack, sack, end, win, swin; | ||
535 | int res; | ||
536 | |||
537 | /* | ||
538 | * Get the required data from the packet. | ||
539 | */ | ||
540 | seq = ntohl(tcph->seq); | ||
541 | ack = sack = ntohl(tcph->ack_seq); | ||
542 | win = ntohs(tcph->window); | ||
543 | end = segment_seq_plus_len(seq, skb->len, dataoff, tcph); | ||
544 | |||
545 | if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) | ||
546 | tcp_sack(skb, dataoff, tcph, &sack); | ||
547 | |||
548 | DEBUGP("tcp_in_window: START\n"); | ||
549 | DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " | ||
550 | "seq=%u ack=%u sack=%u win=%u end=%u\n", | ||
551 | NIPQUAD(iph->saddr), ntohs(tcph->source), | ||
552 | NIPQUAD(iph->daddr), ntohs(tcph->dest), | ||
553 | seq, ack, sack, win, end); | ||
554 | DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " | ||
555 | "receiver end=%u maxend=%u maxwin=%u scale=%i\n", | ||
556 | sender->td_end, sender->td_maxend, sender->td_maxwin, | ||
557 | sender->td_scale, | ||
558 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin, | ||
559 | receiver->td_scale); | ||
560 | |||
561 | if (sender->td_end == 0) { | ||
562 | /* | ||
563 | * Initialize sender data. | ||
564 | */ | ||
565 | if (tcph->syn && tcph->ack) { | ||
566 | /* | ||
567 | * Outgoing SYN-ACK in reply to a SYN. | ||
568 | */ | ||
569 | sender->td_end = | ||
570 | sender->td_maxend = end; | ||
571 | sender->td_maxwin = (win == 0 ? 1 : win); | ||
572 | |||
573 | tcp_options(skb, dataoff, tcph, sender); | ||
574 | /* | ||
575 | * RFC 1323: | ||
576 | * Both sides must send the Window Scale option | ||
577 | * to enable window scaling in either direction. | ||
578 | */ | ||
579 | if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE | ||
580 | && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) | ||
581 | sender->td_scale = | ||
582 | receiver->td_scale = 0; | ||
583 | } else { | ||
584 | /* | ||
585 | * We are in the middle of a connection, | ||
586 | * its history is lost for us. | ||
587 | * Let's try to use the data from the packet. | ||
588 | */ | ||
589 | sender->td_end = end; | ||
590 | sender->td_maxwin = (win == 0 ? 1 : win); | ||
591 | sender->td_maxend = end + sender->td_maxwin; | ||
592 | } | ||
593 | } else if (((state->state == TCP_CONNTRACK_SYN_SENT | ||
594 | && dir == IP_CT_DIR_ORIGINAL) | ||
595 | || (state->state == TCP_CONNTRACK_SYN_RECV | ||
596 | && dir == IP_CT_DIR_REPLY)) | ||
597 | && after(end, sender->td_end)) { | ||
598 | /* | ||
599 | * RFC 793: "if a TCP is reinitialized ... then it need | ||
600 | * not wait at all; it must only be sure to use sequence | ||
601 | * numbers larger than those recently used." | ||
602 | */ | ||
603 | sender->td_end = | ||
604 | sender->td_maxend = end; | ||
605 | sender->td_maxwin = (win == 0 ? 1 : win); | ||
606 | |||
607 | tcp_options(skb, dataoff, tcph, sender); | ||
608 | } | ||
609 | |||
610 | if (!(tcph->ack)) { | ||
611 | /* | ||
612 | * If there is no ACK, just pretend it was set and OK. | ||
613 | */ | ||
614 | ack = sack = receiver->td_end; | ||
615 | } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) == | ||
616 | (TCP_FLAG_ACK|TCP_FLAG_RST)) | ||
617 | && (ack == 0)) { | ||
618 | /* | ||
619 | * Broken TCP stacks, that set ACK in RST packets as well | ||
620 | * with zero ack value. | ||
621 | */ | ||
622 | ack = sack = receiver->td_end; | ||
623 | } | ||
624 | |||
625 | if (seq == end | ||
626 | && (!tcph->rst | ||
627 | || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT))) | ||
628 | /* | ||
629 | * Packets contains no data: we assume it is valid | ||
630 | * and check the ack value only. | ||
631 | * However RST segments are always validated by their | ||
632 | * SEQ number, except when seq == 0 (reset sent answering | ||
633 | * SYN. | ||
634 | */ | ||
635 | seq = end = sender->td_end; | ||
636 | |||
637 | DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " | ||
638 | "seq=%u ack=%u sack =%u win=%u end=%u\n", | ||
639 | NIPQUAD(iph->saddr), ntohs(tcph->source), | ||
640 | NIPQUAD(iph->daddr), ntohs(tcph->dest), | ||
641 | seq, ack, sack, win, end); | ||
642 | DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " | ||
643 | "receiver end=%u maxend=%u maxwin=%u scale=%i\n", | ||
644 | sender->td_end, sender->td_maxend, sender->td_maxwin, | ||
645 | sender->td_scale, | ||
646 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin, | ||
647 | receiver->td_scale); | ||
648 | |||
649 | DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n", | ||
650 | before(seq, sender->td_maxend + 1), | ||
651 | after(end, sender->td_end - receiver->td_maxwin - 1), | ||
652 | before(sack, receiver->td_end + 1), | ||
653 | after(ack, receiver->td_end - MAXACKWINDOW(sender))); | ||
654 | |||
655 | if (sender->loose || receiver->loose || | ||
656 | (before(seq, sender->td_maxend + 1) && | ||
657 | after(end, sender->td_end - receiver->td_maxwin - 1) && | ||
658 | before(sack, receiver->td_end + 1) && | ||
659 | after(ack, receiver->td_end - MAXACKWINDOW(sender)))) { | ||
660 | /* | ||
661 | * Take into account window scaling (RFC 1323). | ||
662 | */ | ||
663 | if (!tcph->syn) | ||
664 | win <<= sender->td_scale; | ||
665 | |||
666 | /* | ||
667 | * Update sender data. | ||
668 | */ | ||
669 | swin = win + (sack - ack); | ||
670 | if (sender->td_maxwin < swin) | ||
671 | sender->td_maxwin = swin; | ||
672 | if (after(end, sender->td_end)) | ||
673 | sender->td_end = end; | ||
674 | /* | ||
675 | * Update receiver data. | ||
676 | */ | ||
677 | if (after(end, sender->td_maxend)) | ||
678 | receiver->td_maxwin += end - sender->td_maxend; | ||
679 | if (after(sack + win, receiver->td_maxend - 1)) { | ||
680 | receiver->td_maxend = sack + win; | ||
681 | if (win == 0) | ||
682 | receiver->td_maxend++; | ||
683 | } | ||
684 | |||
685 | /* | ||
686 | * Check retransmissions. | ||
687 | */ | ||
688 | if (index == TCP_ACK_SET) { | ||
689 | if (state->last_dir == dir | ||
690 | && state->last_seq == seq | ||
691 | && state->last_ack == ack | ||
692 | && state->last_end == end) | ||
693 | state->retrans++; | ||
694 | else { | ||
695 | state->last_dir = dir; | ||
696 | state->last_seq = seq; | ||
697 | state->last_ack = ack; | ||
698 | state->last_end = end; | ||
699 | state->retrans = 0; | ||
700 | } | ||
701 | } | ||
702 | /* | ||
703 | * Close the window of disabled window tracking :-) | ||
704 | */ | ||
705 | if (sender->loose) | ||
706 | sender->loose--; | ||
707 | |||
708 | res = 1; | ||
709 | } else { | ||
710 | if (LOG_INVALID(IPPROTO_TCP)) | ||
711 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, | ||
712 | "nf_ct_tcp: %s ", | ||
713 | before(seq, sender->td_maxend + 1) ? | ||
714 | after(end, sender->td_end - receiver->td_maxwin - 1) ? | ||
715 | before(sack, receiver->td_end + 1) ? | ||
716 | after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG" | ||
717 | : "ACK is under the lower bound (possible overly delayed ACK)" | ||
718 | : "ACK is over the upper bound (ACKed data not seen yet)" | ||
719 | : "SEQ is under the lower bound (already ACKed data retransmitted)" | ||
720 | : "SEQ is over the upper bound (over the window of the receiver)"); | ||
721 | |||
722 | res = nf_ct_tcp_be_liberal; | ||
723 | } | ||
724 | |||
725 | DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u " | ||
726 | "receiver end=%u maxend=%u maxwin=%u\n", | ||
727 | res, sender->td_end, sender->td_maxend, sender->td_maxwin, | ||
728 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin); | ||
729 | |||
730 | return res; | ||
731 | } | ||
732 | |||
733 | #ifdef CONFIG_IP_NF_NAT_NEEDED | ||
734 | /* Update sender->td_end after NAT successfully mangled the packet */ | ||
735 | /* Caller must linearize skb at tcp header. */ | ||
736 | void nf_conntrack_tcp_update(struct sk_buff *skb, | ||
737 | unsigned int dataoff, | ||
738 | struct nf_conn *conntrack, | ||
739 | int dir) | ||
740 | { | ||
741 | struct tcphdr *tcph = (void *)skb->data + dataoff; | ||
742 | __u32 end; | ||
743 | #ifdef DEBUGP_VARS | ||
744 | struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir]; | ||
745 | struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir]; | ||
746 | #endif | ||
747 | |||
748 | end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph); | ||
749 | |||
750 | write_lock_bh(&tcp_lock); | ||
751 | /* | ||
752 | * We have to worry for the ack in the reply packet only... | ||
753 | */ | ||
754 | if (after(end, conntrack->proto.tcp.seen[dir].td_end)) | ||
755 | conntrack->proto.tcp.seen[dir].td_end = end; | ||
756 | conntrack->proto.tcp.last_end = end; | ||
757 | write_unlock_bh(&tcp_lock); | ||
758 | DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " | ||
759 | "receiver end=%u maxend=%u maxwin=%u scale=%i\n", | ||
760 | sender->td_end, sender->td_maxend, sender->td_maxwin, | ||
761 | sender->td_scale, | ||
762 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin, | ||
763 | receiver->td_scale); | ||
764 | } | ||
765 | |||
766 | #endif | ||
767 | |||
768 | #define TH_FIN 0x01 | ||
769 | #define TH_SYN 0x02 | ||
770 | #define TH_RST 0x04 | ||
771 | #define TH_PUSH 0x08 | ||
772 | #define TH_ACK 0x10 | ||
773 | #define TH_URG 0x20 | ||
774 | #define TH_ECE 0x40 | ||
775 | #define TH_CWR 0x80 | ||
776 | |||
777 | /* table of valid flag combinations - ECE and CWR are always valid */ | ||
778 | static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] = | ||
779 | { | ||
780 | [TH_SYN] = 1, | ||
781 | [TH_SYN|TH_ACK] = 1, | ||
782 | [TH_SYN|TH_ACK|TH_PUSH] = 1, | ||
783 | [TH_RST] = 1, | ||
784 | [TH_RST|TH_ACK] = 1, | ||
785 | [TH_RST|TH_ACK|TH_PUSH] = 1, | ||
786 | [TH_FIN|TH_ACK] = 1, | ||
787 | [TH_ACK] = 1, | ||
788 | [TH_ACK|TH_PUSH] = 1, | ||
789 | [TH_ACK|TH_URG] = 1, | ||
790 | [TH_ACK|TH_URG|TH_PUSH] = 1, | ||
791 | [TH_FIN|TH_ACK|TH_PUSH] = 1, | ||
792 | [TH_FIN|TH_ACK|TH_URG] = 1, | ||
793 | [TH_FIN|TH_ACK|TH_URG|TH_PUSH] = 1, | ||
794 | }; | ||
795 | |||
796 | /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ | ||
797 | static int tcp_error(struct sk_buff *skb, | ||
798 | unsigned int dataoff, | ||
799 | enum ip_conntrack_info *ctinfo, | ||
800 | int pf, | ||
801 | unsigned int hooknum, | ||
802 | int(*csum)(const struct sk_buff *,unsigned int)) | ||
803 | { | ||
804 | struct tcphdr _tcph, *th; | ||
805 | unsigned int tcplen = skb->len - dataoff; | ||
806 | u_int8_t tcpflags; | ||
807 | |||
808 | /* Smaller that minimal TCP header? */ | ||
809 | th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); | ||
810 | if (th == NULL) { | ||
811 | if (LOG_INVALID(IPPROTO_TCP)) | ||
812 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, | ||
813 | "nf_ct_tcp: short packet "); | ||
814 | return -NF_ACCEPT; | ||
815 | } | ||
816 | |||
817 | /* Not whole TCP header or malformed packet */ | ||
818 | if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { | ||
819 | if (LOG_INVALID(IPPROTO_TCP)) | ||
820 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, | ||
821 | "nf_ct_tcp: truncated/malformed packet "); | ||
822 | return -NF_ACCEPT; | ||
823 | } | ||
824 | |||
825 | /* Checksum invalid? Ignore. | ||
826 | * We skip checking packets on the outgoing path | ||
827 | * because the semantic of CHECKSUM_HW is different there | ||
828 | * and moreover root might send raw packets. | ||
829 | */ | ||
830 | /* FIXME: Source route IP option packets --RR */ | ||
831 | if (((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || | ||
832 | (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING)) | ||
833 | && skb->ip_summed != CHECKSUM_UNNECESSARY | ||
834 | && csum(skb, dataoff)) { | ||
835 | if (LOG_INVALID(IPPROTO_TCP)) | ||
836 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, | ||
837 | "nf_ct_tcp: bad TCP checksum "); | ||
838 | return -NF_ACCEPT; | ||
839 | } | ||
840 | |||
841 | /* Check TCP flags. */ | ||
842 | tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR)); | ||
843 | if (!tcp_valid_flags[tcpflags]) { | ||
844 | if (LOG_INVALID(IPPROTO_TCP)) | ||
845 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, | ||
846 | "nf_ct_tcp: invalid TCP flag combination "); | ||
847 | return -NF_ACCEPT; | ||
848 | } | ||
849 | |||
850 | return NF_ACCEPT; | ||
851 | } | ||
852 | |||
853 | static int csum4(const struct sk_buff *skb, unsigned int dataoff) | ||
854 | { | ||
855 | return csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, | ||
856 | skb->len - dataoff, IPPROTO_TCP, | ||
857 | skb->ip_summed == CHECKSUM_HW ? skb->csum | ||
858 | : skb_checksum(skb, dataoff, | ||
859 | skb->len - dataoff, 0)); | ||
860 | } | ||
861 | |||
862 | static int csum6(const struct sk_buff *skb, unsigned int dataoff) | ||
863 | { | ||
864 | return csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, | ||
865 | skb->len - dataoff, IPPROTO_TCP, | ||
866 | skb->ip_summed == CHECKSUM_HW ? skb->csum | ||
867 | : skb_checksum(skb, dataoff, skb->len - dataoff, | ||
868 | 0)); | ||
869 | } | ||
870 | |||
871 | static int tcp_error4(struct sk_buff *skb, | ||
872 | unsigned int dataoff, | ||
873 | enum ip_conntrack_info *ctinfo, | ||
874 | int pf, | ||
875 | unsigned int hooknum) | ||
876 | { | ||
877 | return tcp_error(skb, dataoff, ctinfo, pf, hooknum, csum4); | ||
878 | } | ||
879 | |||
880 | static int tcp_error6(struct sk_buff *skb, | ||
881 | unsigned int dataoff, | ||
882 | enum ip_conntrack_info *ctinfo, | ||
883 | int pf, | ||
884 | unsigned int hooknum) | ||
885 | { | ||
886 | return tcp_error(skb, dataoff, ctinfo, pf, hooknum, csum6); | ||
887 | } | ||
888 | |||
889 | /* Returns verdict for packet, or -1 for invalid. */ | ||
890 | static int tcp_packet(struct nf_conn *conntrack, | ||
891 | const struct sk_buff *skb, | ||
892 | unsigned int dataoff, | ||
893 | enum ip_conntrack_info ctinfo, | ||
894 | int pf, | ||
895 | unsigned int hooknum) | ||
896 | { | ||
897 | enum tcp_conntrack new_state, old_state; | ||
898 | enum ip_conntrack_dir dir; | ||
899 | struct tcphdr *th, _tcph; | ||
900 | unsigned long timeout; | ||
901 | unsigned int index; | ||
902 | |||
903 | th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); | ||
904 | BUG_ON(th == NULL); | ||
905 | |||
906 | write_lock_bh(&tcp_lock); | ||
907 | old_state = conntrack->proto.tcp.state; | ||
908 | dir = CTINFO2DIR(ctinfo); | ||
909 | index = get_conntrack_index(th); | ||
910 | new_state = tcp_conntracks[dir][index][old_state]; | ||
911 | |||
912 | switch (new_state) { | ||
913 | case TCP_CONNTRACK_IGNORE: | ||
914 | /* Either SYN in ORIGINAL | ||
915 | * or SYN/ACK in REPLY. */ | ||
916 | if (index == TCP_SYNACK_SET | ||
917 | && conntrack->proto.tcp.last_index == TCP_SYN_SET | ||
918 | && conntrack->proto.tcp.last_dir != dir | ||
919 | && ntohl(th->ack_seq) == | ||
920 | conntrack->proto.tcp.last_end) { | ||
921 | /* This SYN/ACK acknowledges a SYN that we earlier | ||
922 | * ignored as invalid. This means that the client and | ||
923 | * the server are both in sync, while the firewall is | ||
924 | * not. We kill this session and block the SYN/ACK so | ||
925 | * that the client cannot but retransmit its SYN and | ||
926 | * thus initiate a clean new session. | ||
927 | */ | ||
928 | write_unlock_bh(&tcp_lock); | ||
929 | if (LOG_INVALID(IPPROTO_TCP)) | ||
930 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, | ||
931 | "nf_ct_tcp: killing out of sync session "); | ||
932 | if (del_timer(&conntrack->timeout)) | ||
933 | conntrack->timeout.function((unsigned long) | ||
934 | conntrack); | ||
935 | return -NF_DROP; | ||
936 | } | ||
937 | conntrack->proto.tcp.last_index = index; | ||
938 | conntrack->proto.tcp.last_dir = dir; | ||
939 | conntrack->proto.tcp.last_seq = ntohl(th->seq); | ||
940 | conntrack->proto.tcp.last_end = | ||
941 | segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); | ||
942 | |||
943 | write_unlock_bh(&tcp_lock); | ||
944 | if (LOG_INVALID(IPPROTO_TCP)) | ||
945 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, | ||
946 | "nf_ct_tcp: invalid packed ignored "); | ||
947 | return NF_ACCEPT; | ||
948 | case TCP_CONNTRACK_MAX: | ||
949 | /* Invalid packet */ | ||
950 | DEBUGP("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", | ||
951 | dir, get_conntrack_index(th), | ||
952 | old_state); | ||
953 | write_unlock_bh(&tcp_lock); | ||
954 | if (LOG_INVALID(IPPROTO_TCP)) | ||
955 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, | ||
956 | "nf_ct_tcp: invalid state "); | ||
957 | return -NF_ACCEPT; | ||
958 | case TCP_CONNTRACK_SYN_SENT: | ||
959 | if (old_state < TCP_CONNTRACK_TIME_WAIT) | ||
960 | break; | ||
961 | if ((conntrack->proto.tcp.seen[dir].flags & | ||
962 | IP_CT_TCP_FLAG_CLOSE_INIT) | ||
963 | || after(ntohl(th->seq), | ||
964 | conntrack->proto.tcp.seen[dir].td_end)) { | ||
965 | /* Attempt to reopen a closed connection. | ||
966 | * Delete this connection and look up again. */ | ||
967 | write_unlock_bh(&tcp_lock); | ||
968 | if (del_timer(&conntrack->timeout)) | ||
969 | conntrack->timeout.function((unsigned long) | ||
970 | conntrack); | ||
971 | return -NF_REPEAT; | ||
972 | } | ||
973 | case TCP_CONNTRACK_CLOSE: | ||
974 | if (index == TCP_RST_SET | ||
975 | && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) | ||
976 | && conntrack->proto.tcp.last_index == TCP_SYN_SET | ||
977 | && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) { | ||
978 | /* RST sent to invalid SYN we had let trough | ||
979 | * SYN was in window then, tear down connection. | ||
980 | * We skip window checking, because packet might ACK | ||
981 | * segments we ignored in the SYN. */ | ||
982 | goto in_window; | ||
983 | } | ||
984 | /* Just fall trough */ | ||
985 | default: | ||
986 | /* Keep compilers happy. */ | ||
987 | break; | ||
988 | } | ||
989 | |||
990 | if (!tcp_in_window(&conntrack->proto.tcp, dir, index, | ||
991 | skb, dataoff, th, pf)) { | ||
992 | write_unlock_bh(&tcp_lock); | ||
993 | return -NF_ACCEPT; | ||
994 | } | ||
995 | in_window: | ||
996 | /* From now on we have got in-window packets */ | ||
997 | conntrack->proto.tcp.last_index = index; | ||
998 | |||
999 | DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " | ||
1000 | "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", | ||
1001 | NIPQUAD(iph->saddr), ntohs(th->source), | ||
1002 | NIPQUAD(iph->daddr), ntohs(th->dest), | ||
1003 | (th->syn ? 1 : 0), (th->ack ? 1 : 0), | ||
1004 | (th->fin ? 1 : 0), (th->rst ? 1 : 0), | ||
1005 | old_state, new_state); | ||
1006 | |||
1007 | conntrack->proto.tcp.state = new_state; | ||
1008 | if (old_state != new_state | ||
1009 | && (new_state == TCP_CONNTRACK_FIN_WAIT | ||
1010 | || new_state == TCP_CONNTRACK_CLOSE)) | ||
1011 | conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; | ||
1012 | timeout = conntrack->proto.tcp.retrans >= nf_ct_tcp_max_retrans | ||
1013 | && *tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans | ||
1014 | ? nf_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state]; | ||
1015 | write_unlock_bh(&tcp_lock); | ||
1016 | |||
1017 | nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); | ||
1018 | if (new_state != old_state) | ||
1019 | nf_conntrack_event_cache(IPCT_PROTOINFO, skb); | ||
1020 | |||
1021 | if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { | ||
1022 | /* If only reply is a RST, we can consider ourselves not to | ||
1023 | have an established connection: this is a fairly common | ||
1024 | problem case, so we can delete the conntrack | ||
1025 | immediately. --RR */ | ||
1026 | if (th->rst) { | ||
1027 | if (del_timer(&conntrack->timeout)) | ||
1028 | conntrack->timeout.function((unsigned long) | ||
1029 | conntrack); | ||
1030 | return NF_ACCEPT; | ||
1031 | } | ||
1032 | } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status) | ||
1033 | && (old_state == TCP_CONNTRACK_SYN_RECV | ||
1034 | || old_state == TCP_CONNTRACK_ESTABLISHED) | ||
1035 | && new_state == TCP_CONNTRACK_ESTABLISHED) { | ||
1036 | /* Set ASSURED if we see see valid ack in ESTABLISHED | ||
1037 | after SYN_RECV or a valid answer for a picked up | ||
1038 | connection. */ | ||
1039 | set_bit(IPS_ASSURED_BIT, &conntrack->status); | ||
1040 | nf_conntrack_event_cache(IPCT_STATUS, skb); | ||
1041 | } | ||
1042 | nf_ct_refresh_acct(conntrack, ctinfo, skb, timeout); | ||
1043 | |||
1044 | return NF_ACCEPT; | ||
1045 | } | ||
1046 | |||
1047 | /* Called when a new connection for this protocol found. */ | ||
1048 | static int tcp_new(struct nf_conn *conntrack, | ||
1049 | const struct sk_buff *skb, | ||
1050 | unsigned int dataoff) | ||
1051 | { | ||
1052 | enum tcp_conntrack new_state; | ||
1053 | struct tcphdr *th, _tcph; | ||
1054 | #ifdef DEBUGP_VARS | ||
1055 | struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0]; | ||
1056 | struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1]; | ||
1057 | #endif | ||
1058 | |||
1059 | th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); | ||
1060 | BUG_ON(th == NULL); | ||
1061 | |||
1062 | /* Don't need lock here: this conntrack not in circulation yet */ | ||
1063 | new_state | ||
1064 | = tcp_conntracks[0][get_conntrack_index(th)] | ||
1065 | [TCP_CONNTRACK_NONE]; | ||
1066 | |||
1067 | /* Invalid: delete conntrack */ | ||
1068 | if (new_state >= TCP_CONNTRACK_MAX) { | ||
1069 | DEBUGP("nf_ct_tcp: invalid new deleting.\n"); | ||
1070 | return 0; | ||
1071 | } | ||
1072 | |||
1073 | if (new_state == TCP_CONNTRACK_SYN_SENT) { | ||
1074 | /* SYN packet */ | ||
1075 | conntrack->proto.tcp.seen[0].td_end = | ||
1076 | segment_seq_plus_len(ntohl(th->seq), skb->len, | ||
1077 | dataoff, th); | ||
1078 | conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window); | ||
1079 | if (conntrack->proto.tcp.seen[0].td_maxwin == 0) | ||
1080 | conntrack->proto.tcp.seen[0].td_maxwin = 1; | ||
1081 | conntrack->proto.tcp.seen[0].td_maxend = | ||
1082 | conntrack->proto.tcp.seen[0].td_end; | ||
1083 | |||
1084 | tcp_options(skb, dataoff, th, &conntrack->proto.tcp.seen[0]); | ||
1085 | conntrack->proto.tcp.seen[1].flags = 0; | ||
1086 | conntrack->proto.tcp.seen[0].loose = | ||
1087 | conntrack->proto.tcp.seen[1].loose = 0; | ||
1088 | } else if (nf_ct_tcp_loose == 0) { | ||
1089 | /* Don't try to pick up connections. */ | ||
1090 | return 0; | ||
1091 | } else { | ||
1092 | /* | ||
1093 | * We are in the middle of a connection, | ||
1094 | * its history is lost for us. | ||
1095 | * Let's try to use the data from the packet. | ||
1096 | */ | ||
1097 | conntrack->proto.tcp.seen[0].td_end = | ||
1098 | segment_seq_plus_len(ntohl(th->seq), skb->len, | ||
1099 | dataoff, th); | ||
1100 | conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window); | ||
1101 | if (conntrack->proto.tcp.seen[0].td_maxwin == 0) | ||
1102 | conntrack->proto.tcp.seen[0].td_maxwin = 1; | ||
1103 | conntrack->proto.tcp.seen[0].td_maxend = | ||
1104 | conntrack->proto.tcp.seen[0].td_end + | ||
1105 | conntrack->proto.tcp.seen[0].td_maxwin; | ||
1106 | conntrack->proto.tcp.seen[0].td_scale = 0; | ||
1107 | |||
1108 | /* We assume SACK. Should we assume window scaling too? */ | ||
1109 | conntrack->proto.tcp.seen[0].flags = | ||
1110 | conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM; | ||
1111 | conntrack->proto.tcp.seen[0].loose = | ||
1112 | conntrack->proto.tcp.seen[1].loose = nf_ct_tcp_loose; | ||
1113 | } | ||
1114 | |||
1115 | conntrack->proto.tcp.seen[1].td_end = 0; | ||
1116 | conntrack->proto.tcp.seen[1].td_maxend = 0; | ||
1117 | conntrack->proto.tcp.seen[1].td_maxwin = 1; | ||
1118 | conntrack->proto.tcp.seen[1].td_scale = 0; | ||
1119 | |||
1120 | /* tcp_packet will set them */ | ||
1121 | conntrack->proto.tcp.state = TCP_CONNTRACK_NONE; | ||
1122 | conntrack->proto.tcp.last_index = TCP_NONE_SET; | ||
1123 | |||
1124 | DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " | ||
1125 | "receiver end=%u maxend=%u maxwin=%u scale=%i\n", | ||
1126 | sender->td_end, sender->td_maxend, sender->td_maxwin, | ||
1127 | sender->td_scale, | ||
1128 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin, | ||
1129 | receiver->td_scale); | ||
1130 | return 1; | ||
1131 | } | ||
1132 | |||
1133 | struct nf_conntrack_protocol nf_conntrack_protocol_tcp4 = | ||
1134 | { | ||
1135 | .l3proto = PF_INET, | ||
1136 | .proto = IPPROTO_TCP, | ||
1137 | .name = "tcp", | ||
1138 | .pkt_to_tuple = tcp_pkt_to_tuple, | ||
1139 | .invert_tuple = tcp_invert_tuple, | ||
1140 | .print_tuple = tcp_print_tuple, | ||
1141 | .print_conntrack = tcp_print_conntrack, | ||
1142 | .packet = tcp_packet, | ||
1143 | .new = tcp_new, | ||
1144 | .error = tcp_error4, | ||
1145 | }; | ||
1146 | |||
1147 | struct nf_conntrack_protocol nf_conntrack_protocol_tcp6 = | ||
1148 | { | ||
1149 | .l3proto = PF_INET6, | ||
1150 | .proto = IPPROTO_TCP, | ||
1151 | .name = "tcp", | ||
1152 | .pkt_to_tuple = tcp_pkt_to_tuple, | ||
1153 | .invert_tuple = tcp_invert_tuple, | ||
1154 | .print_tuple = tcp_print_tuple, | ||
1155 | .print_conntrack = tcp_print_conntrack, | ||
1156 | .packet = tcp_packet, | ||
1157 | .new = tcp_new, | ||
1158 | .error = tcp_error6, | ||
1159 | }; | ||
1160 | |||
1161 | EXPORT_SYMBOL(nf_conntrack_protocol_tcp4); | ||
1162 | EXPORT_SYMBOL(nf_conntrack_protocol_tcp6); | ||
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c new file mode 100644 index 000000000000..3cae7ce420dd --- /dev/null +++ b/net/netfilter/nf_conntrack_proto_udp.c | |||
@@ -0,0 +1,216 @@ | |||
1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
9 | * - enable working with Layer 3 protocol independent connection tracking. | ||
10 | * | ||
11 | * Derived from net/ipv4/netfilter/ip_conntrack_proto_udp.c | ||
12 | */ | ||
13 | |||
14 | #include <linux/types.h> | ||
15 | #include <linux/sched.h> | ||
16 | #include <linux/timer.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/netfilter.h> | ||
19 | #include <linux/udp.h> | ||
20 | #include <linux/seq_file.h> | ||
21 | #include <linux/skbuff.h> | ||
22 | #include <linux/ipv6.h> | ||
23 | #include <net/ip6_checksum.h> | ||
24 | #include <net/checksum.h> | ||
25 | #include <linux/netfilter.h> | ||
26 | #include <linux/netfilter_ipv4.h> | ||
27 | #include <linux/netfilter_ipv6.h> | ||
28 | #include <net/netfilter/nf_conntrack_protocol.h> | ||
29 | |||
30 | unsigned long nf_ct_udp_timeout = 30*HZ; | ||
31 | unsigned long nf_ct_udp_timeout_stream = 180*HZ; | ||
32 | |||
33 | static int udp_pkt_to_tuple(const struct sk_buff *skb, | ||
34 | unsigned int dataoff, | ||
35 | struct nf_conntrack_tuple *tuple) | ||
36 | { | ||
37 | struct udphdr _hdr, *hp; | ||
38 | |||
39 | /* Actually only need first 8 bytes. */ | ||
40 | hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); | ||
41 | if (hp == NULL) | ||
42 | return 0; | ||
43 | |||
44 | tuple->src.u.udp.port = hp->source; | ||
45 | tuple->dst.u.udp.port = hp->dest; | ||
46 | |||
47 | return 1; | ||
48 | } | ||
49 | |||
50 | static int udp_invert_tuple(struct nf_conntrack_tuple *tuple, | ||
51 | const struct nf_conntrack_tuple *orig) | ||
52 | { | ||
53 | tuple->src.u.udp.port = orig->dst.u.udp.port; | ||
54 | tuple->dst.u.udp.port = orig->src.u.udp.port; | ||
55 | return 1; | ||
56 | } | ||
57 | |||
58 | /* Print out the per-protocol part of the tuple. */ | ||
59 | static int udp_print_tuple(struct seq_file *s, | ||
60 | const struct nf_conntrack_tuple *tuple) | ||
61 | { | ||
62 | return seq_printf(s, "sport=%hu dport=%hu ", | ||
63 | ntohs(tuple->src.u.udp.port), | ||
64 | ntohs(tuple->dst.u.udp.port)); | ||
65 | } | ||
66 | |||
67 | /* Print out the private part of the conntrack. */ | ||
68 | static int udp_print_conntrack(struct seq_file *s, | ||
69 | const struct nf_conn *conntrack) | ||
70 | { | ||
71 | return 0; | ||
72 | } | ||
73 | |||
74 | /* Returns verdict for packet, and may modify conntracktype */ | ||
75 | static int udp_packet(struct nf_conn *conntrack, | ||
76 | const struct sk_buff *skb, | ||
77 | unsigned int dataoff, | ||
78 | enum ip_conntrack_info ctinfo, | ||
79 | int pf, | ||
80 | unsigned int hooknum) | ||
81 | { | ||
82 | /* If we've seen traffic both ways, this is some kind of UDP | ||
83 | stream. Extend timeout. */ | ||
84 | if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { | ||
85 | nf_ct_refresh_acct(conntrack, ctinfo, skb, | ||
86 | nf_ct_udp_timeout_stream); | ||
87 | /* Also, more likely to be important, and not a probe */ | ||
88 | if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status)) | ||
89 | nf_conntrack_event_cache(IPCT_STATUS, skb); | ||
90 | } else | ||
91 | nf_ct_refresh_acct(conntrack, ctinfo, skb, nf_ct_udp_timeout); | ||
92 | |||
93 | return NF_ACCEPT; | ||
94 | } | ||
95 | |||
96 | /* Called when a new connection for this protocol found. */ | ||
97 | static int udp_new(struct nf_conn *conntrack, const struct sk_buff *skb, | ||
98 | unsigned int dataoff) | ||
99 | { | ||
100 | return 1; | ||
101 | } | ||
102 | |||
103 | static int udp_error(struct sk_buff *skb, unsigned int dataoff, | ||
104 | enum ip_conntrack_info *ctinfo, | ||
105 | int pf, | ||
106 | unsigned int hooknum, | ||
107 | int (*csum)(const struct sk_buff *, unsigned int)) | ||
108 | { | ||
109 | unsigned int udplen = skb->len - dataoff; | ||
110 | struct udphdr _hdr, *hdr; | ||
111 | |||
112 | /* Header is too small? */ | ||
113 | hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); | ||
114 | if (hdr == NULL) { | ||
115 | if (LOG_INVALID(IPPROTO_UDP)) | ||
116 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, | ||
117 | "nf_ct_udp: short packet "); | ||
118 | return -NF_ACCEPT; | ||
119 | } | ||
120 | |||
121 | /* Truncated/malformed packets */ | ||
122 | if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { | ||
123 | if (LOG_INVALID(IPPROTO_UDP)) | ||
124 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, | ||
125 | "nf_ct_udp: truncated/malformed packet "); | ||
126 | return -NF_ACCEPT; | ||
127 | } | ||
128 | |||
129 | /* Packet with no checksum */ | ||
130 | if (!hdr->check) | ||
131 | return NF_ACCEPT; | ||
132 | |||
133 | /* Checksum invalid? Ignore. | ||
134 | * We skip checking packets on the outgoing path | ||
135 | * because the semantic of CHECKSUM_HW is different there | ||
136 | * and moreover root might send raw packets. | ||
137 | * FIXME: Source route IP option packets --RR */ | ||
138 | if (((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || | ||
139 | (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING)) | ||
140 | && skb->ip_summed != CHECKSUM_UNNECESSARY | ||
141 | && csum(skb, dataoff)) { | ||
142 | if (LOG_INVALID(IPPROTO_UDP)) | ||
143 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, | ||
144 | "nf_ct_udp: bad UDP checksum "); | ||
145 | return -NF_ACCEPT; | ||
146 | } | ||
147 | |||
148 | return NF_ACCEPT; | ||
149 | } | ||
150 | |||
151 | static int csum4(const struct sk_buff *skb, unsigned int dataoff) | ||
152 | { | ||
153 | return csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, | ||
154 | skb->len - dataoff, IPPROTO_UDP, | ||
155 | skb->ip_summed == CHECKSUM_HW ? skb->csum | ||
156 | : skb_checksum(skb, dataoff, | ||
157 | skb->len - dataoff, 0)); | ||
158 | } | ||
159 | |||
160 | static int csum6(const struct sk_buff *skb, unsigned int dataoff) | ||
161 | { | ||
162 | return csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, | ||
163 | skb->len - dataoff, IPPROTO_UDP, | ||
164 | skb->ip_summed == CHECKSUM_HW ? skb->csum | ||
165 | : skb_checksum(skb, dataoff, skb->len - dataoff, | ||
166 | 0)); | ||
167 | } | ||
168 | |||
169 | static int udp_error4(struct sk_buff *skb, | ||
170 | unsigned int dataoff, | ||
171 | enum ip_conntrack_info *ctinfo, | ||
172 | int pf, | ||
173 | unsigned int hooknum) | ||
174 | { | ||
175 | return udp_error(skb, dataoff, ctinfo, pf, hooknum, csum4); | ||
176 | } | ||
177 | |||
178 | static int udp_error6(struct sk_buff *skb, | ||
179 | unsigned int dataoff, | ||
180 | enum ip_conntrack_info *ctinfo, | ||
181 | int pf, | ||
182 | unsigned int hooknum) | ||
183 | { | ||
184 | return udp_error(skb, dataoff, ctinfo, pf, hooknum, csum6); | ||
185 | } | ||
186 | |||
187 | struct nf_conntrack_protocol nf_conntrack_protocol_udp4 = | ||
188 | { | ||
189 | .l3proto = PF_INET, | ||
190 | .proto = IPPROTO_UDP, | ||
191 | .name = "udp", | ||
192 | .pkt_to_tuple = udp_pkt_to_tuple, | ||
193 | .invert_tuple = udp_invert_tuple, | ||
194 | .print_tuple = udp_print_tuple, | ||
195 | .print_conntrack = udp_print_conntrack, | ||
196 | .packet = udp_packet, | ||
197 | .new = udp_new, | ||
198 | .error = udp_error4, | ||
199 | }; | ||
200 | |||
201 | struct nf_conntrack_protocol nf_conntrack_protocol_udp6 = | ||
202 | { | ||
203 | .l3proto = PF_INET6, | ||
204 | .proto = IPPROTO_UDP, | ||
205 | .name = "udp", | ||
206 | .pkt_to_tuple = udp_pkt_to_tuple, | ||
207 | .invert_tuple = udp_invert_tuple, | ||
208 | .print_tuple = udp_print_tuple, | ||
209 | .print_conntrack = udp_print_conntrack, | ||
210 | .packet = udp_packet, | ||
211 | .new = udp_new, | ||
212 | .error = udp_error6, | ||
213 | }; | ||
214 | |||
215 | EXPORT_SYMBOL(nf_conntrack_protocol_udp4); | ||
216 | EXPORT_SYMBOL(nf_conntrack_protocol_udp6); | ||
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c new file mode 100644 index 000000000000..45224db4fe2f --- /dev/null +++ b/net/netfilter/nf_conntrack_standalone.c | |||
@@ -0,0 +1,869 @@ | |||
1 | /* This file contains all the functions required for the standalone | ||
2 | nf_conntrack module. | ||
3 | |||
4 | These are not required by the compatibility layer. | ||
5 | */ | ||
6 | |||
7 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
8 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License version 2 as | ||
12 | * published by the Free Software Foundation. | ||
13 | * | ||
14 | * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
15 | * - generalize L3 protocol dependent part. | ||
16 | * | ||
17 | * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c | ||
18 | */ | ||
19 | |||
20 | #include <linux/config.h> | ||
21 | #include <linux/types.h> | ||
22 | #include <linux/netfilter.h> | ||
23 | #include <linux/module.h> | ||
24 | #include <linux/skbuff.h> | ||
25 | #include <linux/proc_fs.h> | ||
26 | #include <linux/seq_file.h> | ||
27 | #include <linux/percpu.h> | ||
28 | #include <linux/netdevice.h> | ||
29 | #ifdef CONFIG_SYSCTL | ||
30 | #include <linux/sysctl.h> | ||
31 | #endif | ||
32 | |||
33 | #define ASSERT_READ_LOCK(x) | ||
34 | #define ASSERT_WRITE_LOCK(x) | ||
35 | |||
36 | #include <net/netfilter/nf_conntrack.h> | ||
37 | #include <net/netfilter/nf_conntrack_l3proto.h> | ||
38 | #include <net/netfilter/nf_conntrack_protocol.h> | ||
39 | #include <net/netfilter/nf_conntrack_core.h> | ||
40 | #include <net/netfilter/nf_conntrack_helper.h> | ||
41 | #include <linux/netfilter_ipv4/listhelp.h> | ||
42 | |||
43 | #if 0 | ||
44 | #define DEBUGP printk | ||
45 | #else | ||
46 | #define DEBUGP(format, args...) | ||
47 | #endif | ||
48 | |||
49 | MODULE_LICENSE("GPL"); | ||
50 | |||
51 | extern atomic_t nf_conntrack_count; | ||
52 | DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); | ||
53 | |||
54 | static int kill_l3proto(struct nf_conn *i, void *data) | ||
55 | { | ||
56 | return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num == | ||
57 | ((struct nf_conntrack_l3proto *)data)->l3proto); | ||
58 | } | ||
59 | |||
60 | static int kill_proto(struct nf_conn *i, void *data) | ||
61 | { | ||
62 | struct nf_conntrack_protocol *proto; | ||
63 | proto = (struct nf_conntrack_protocol *)data; | ||
64 | return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == | ||
65 | proto->proto) && | ||
66 | (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num == | ||
67 | proto->l3proto); | ||
68 | } | ||
69 | |||
70 | #ifdef CONFIG_PROC_FS | ||
71 | static int | ||
72 | print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, | ||
73 | struct nf_conntrack_l3proto *l3proto, | ||
74 | struct nf_conntrack_protocol *proto) | ||
75 | { | ||
76 | return l3proto->print_tuple(s, tuple) || proto->print_tuple(s, tuple); | ||
77 | } | ||
78 | |||
79 | #ifdef CONFIG_NF_CT_ACCT | ||
80 | static unsigned int | ||
81 | seq_print_counters(struct seq_file *s, | ||
82 | const struct ip_conntrack_counter *counter) | ||
83 | { | ||
84 | return seq_printf(s, "packets=%llu bytes=%llu ", | ||
85 | (unsigned long long)counter->packets, | ||
86 | (unsigned long long)counter->bytes); | ||
87 | } | ||
88 | #else | ||
89 | #define seq_print_counters(x, y) 0 | ||
90 | #endif | ||
91 | |||
92 | struct ct_iter_state { | ||
93 | unsigned int bucket; | ||
94 | }; | ||
95 | |||
96 | static struct list_head *ct_get_first(struct seq_file *seq) | ||
97 | { | ||
98 | struct ct_iter_state *st = seq->private; | ||
99 | |||
100 | for (st->bucket = 0; | ||
101 | st->bucket < nf_conntrack_htable_size; | ||
102 | st->bucket++) { | ||
103 | if (!list_empty(&nf_conntrack_hash[st->bucket])) | ||
104 | return nf_conntrack_hash[st->bucket].next; | ||
105 | } | ||
106 | return NULL; | ||
107 | } | ||
108 | |||
109 | static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head) | ||
110 | { | ||
111 | struct ct_iter_state *st = seq->private; | ||
112 | |||
113 | head = head->next; | ||
114 | while (head == &nf_conntrack_hash[st->bucket]) { | ||
115 | if (++st->bucket >= nf_conntrack_htable_size) | ||
116 | return NULL; | ||
117 | head = nf_conntrack_hash[st->bucket].next; | ||
118 | } | ||
119 | return head; | ||
120 | } | ||
121 | |||
122 | static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos) | ||
123 | { | ||
124 | struct list_head *head = ct_get_first(seq); | ||
125 | |||
126 | if (head) | ||
127 | while (pos && (head = ct_get_next(seq, head))) | ||
128 | pos--; | ||
129 | return pos ? NULL : head; | ||
130 | } | ||
131 | |||
132 | static void *ct_seq_start(struct seq_file *seq, loff_t *pos) | ||
133 | { | ||
134 | read_lock_bh(&nf_conntrack_lock); | ||
135 | return ct_get_idx(seq, *pos); | ||
136 | } | ||
137 | |||
138 | static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) | ||
139 | { | ||
140 | (*pos)++; | ||
141 | return ct_get_next(s, v); | ||
142 | } | ||
143 | |||
144 | static void ct_seq_stop(struct seq_file *s, void *v) | ||
145 | { | ||
146 | read_unlock_bh(&nf_conntrack_lock); | ||
147 | } | ||
148 | |||
149 | /* return 0 on success, 1 in case of error */ | ||
150 | static int ct_seq_show(struct seq_file *s, void *v) | ||
151 | { | ||
152 | const struct nf_conntrack_tuple_hash *hash = v; | ||
153 | const struct nf_conn *conntrack = nf_ct_tuplehash_to_ctrack(hash); | ||
154 | struct nf_conntrack_l3proto *l3proto; | ||
155 | struct nf_conntrack_protocol *proto; | ||
156 | |||
157 | ASSERT_READ_LOCK(&nf_conntrack_lock); | ||
158 | NF_CT_ASSERT(conntrack); | ||
159 | |||
160 | /* we only want to print DIR_ORIGINAL */ | ||
161 | if (NF_CT_DIRECTION(hash)) | ||
162 | return 0; | ||
163 | |||
164 | l3proto = nf_ct_find_l3proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] | ||
165 | .tuple.src.l3num); | ||
166 | |||
167 | NF_CT_ASSERT(l3proto); | ||
168 | proto = nf_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] | ||
169 | .tuple.src.l3num, | ||
170 | conntrack->tuplehash[IP_CT_DIR_ORIGINAL] | ||
171 | .tuple.dst.protonum); | ||
172 | NF_CT_ASSERT(proto); | ||
173 | |||
174 | if (seq_printf(s, "%-8s %u %-8s %u %ld ", | ||
175 | l3proto->name, | ||
176 | conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num, | ||
177 | proto->name, | ||
178 | conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum, | ||
179 | timer_pending(&conntrack->timeout) | ||
180 | ? (long)(conntrack->timeout.expires - jiffies)/HZ : 0) != 0) | ||
181 | return -ENOSPC; | ||
182 | |||
183 | if (l3proto->print_conntrack(s, conntrack)) | ||
184 | return -ENOSPC; | ||
185 | |||
186 | if (proto->print_conntrack(s, conntrack)) | ||
187 | return -ENOSPC; | ||
188 | |||
189 | if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, | ||
190 | l3proto, proto)) | ||
191 | return -ENOSPC; | ||
192 | |||
193 | if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL])) | ||
194 | return -ENOSPC; | ||
195 | |||
196 | if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status))) | ||
197 | if (seq_printf(s, "[UNREPLIED] ")) | ||
198 | return -ENOSPC; | ||
199 | |||
200 | if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple, | ||
201 | l3proto, proto)) | ||
202 | return -ENOSPC; | ||
203 | |||
204 | if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY])) | ||
205 | return -ENOSPC; | ||
206 | |||
207 | if (test_bit(IPS_ASSURED_BIT, &conntrack->status)) | ||
208 | if (seq_printf(s, "[ASSURED] ")) | ||
209 | return -ENOSPC; | ||
210 | |||
211 | #if defined(CONFIG_NF_CONNTRACK_MARK) | ||
212 | if (seq_printf(s, "mark=%u ", conntrack->mark)) | ||
213 | return -ENOSPC; | ||
214 | #endif | ||
215 | |||
216 | if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use))) | ||
217 | return -ENOSPC; | ||
218 | |||
219 | return 0; | ||
220 | } | ||
221 | |||
222 | static struct seq_operations ct_seq_ops = { | ||
223 | .start = ct_seq_start, | ||
224 | .next = ct_seq_next, | ||
225 | .stop = ct_seq_stop, | ||
226 | .show = ct_seq_show | ||
227 | }; | ||
228 | |||
229 | static int ct_open(struct inode *inode, struct file *file) | ||
230 | { | ||
231 | struct seq_file *seq; | ||
232 | struct ct_iter_state *st; | ||
233 | int ret; | ||
234 | |||
235 | st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL); | ||
236 | if (st == NULL) | ||
237 | return -ENOMEM; | ||
238 | ret = seq_open(file, &ct_seq_ops); | ||
239 | if (ret) | ||
240 | goto out_free; | ||
241 | seq = file->private_data; | ||
242 | seq->private = st; | ||
243 | memset(st, 0, sizeof(struct ct_iter_state)); | ||
244 | return ret; | ||
245 | out_free: | ||
246 | kfree(st); | ||
247 | return ret; | ||
248 | } | ||
249 | |||
250 | static struct file_operations ct_file_ops = { | ||
251 | .owner = THIS_MODULE, | ||
252 | .open = ct_open, | ||
253 | .read = seq_read, | ||
254 | .llseek = seq_lseek, | ||
255 | .release = seq_release_private, | ||
256 | }; | ||
257 | |||
258 | /* expects */ | ||
259 | static void *exp_seq_start(struct seq_file *s, loff_t *pos) | ||
260 | { | ||
261 | struct list_head *e = &nf_conntrack_expect_list; | ||
262 | loff_t i; | ||
263 | |||
264 | /* strange seq_file api calls stop even if we fail, | ||
265 | * thus we need to grab lock since stop unlocks */ | ||
266 | read_lock_bh(&nf_conntrack_lock); | ||
267 | |||
268 | if (list_empty(e)) | ||
269 | return NULL; | ||
270 | |||
271 | for (i = 0; i <= *pos; i++) { | ||
272 | e = e->next; | ||
273 | if (e == &nf_conntrack_expect_list) | ||
274 | return NULL; | ||
275 | } | ||
276 | return e; | ||
277 | } | ||
278 | |||
279 | static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos) | ||
280 | { | ||
281 | struct list_head *e = v; | ||
282 | |||
283 | ++*pos; | ||
284 | e = e->next; | ||
285 | |||
286 | if (e == &nf_conntrack_expect_list) | ||
287 | return NULL; | ||
288 | |||
289 | return e; | ||
290 | } | ||
291 | |||
292 | static void exp_seq_stop(struct seq_file *s, void *v) | ||
293 | { | ||
294 | read_unlock_bh(&nf_conntrack_lock); | ||
295 | } | ||
296 | |||
297 | static int exp_seq_show(struct seq_file *s, void *v) | ||
298 | { | ||
299 | struct nf_conntrack_expect *expect = v; | ||
300 | |||
301 | if (expect->timeout.function) | ||
302 | seq_printf(s, "%ld ", timer_pending(&expect->timeout) | ||
303 | ? (long)(expect->timeout.expires - jiffies)/HZ : 0); | ||
304 | else | ||
305 | seq_printf(s, "- "); | ||
306 | seq_printf(s, "l3proto = %u proto=%u ", | ||
307 | expect->tuple.src.l3num, | ||
308 | expect->tuple.dst.protonum); | ||
309 | print_tuple(s, &expect->tuple, | ||
310 | nf_ct_find_l3proto(expect->tuple.src.l3num), | ||
311 | nf_ct_find_proto(expect->tuple.src.l3num, | ||
312 | expect->tuple.dst.protonum)); | ||
313 | return seq_putc(s, '\n'); | ||
314 | } | ||
315 | |||
316 | static struct seq_operations exp_seq_ops = { | ||
317 | .start = exp_seq_start, | ||
318 | .next = exp_seq_next, | ||
319 | .stop = exp_seq_stop, | ||
320 | .show = exp_seq_show | ||
321 | }; | ||
322 | |||
323 | static int exp_open(struct inode *inode, struct file *file) | ||
324 | { | ||
325 | return seq_open(file, &exp_seq_ops); | ||
326 | } | ||
327 | |||
328 | static struct file_operations exp_file_ops = { | ||
329 | .owner = THIS_MODULE, | ||
330 | .open = exp_open, | ||
331 | .read = seq_read, | ||
332 | .llseek = seq_lseek, | ||
333 | .release = seq_release | ||
334 | }; | ||
335 | |||
336 | static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) | ||
337 | { | ||
338 | int cpu; | ||
339 | |||
340 | if (*pos == 0) | ||
341 | return SEQ_START_TOKEN; | ||
342 | |||
343 | for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { | ||
344 | if (!cpu_possible(cpu)) | ||
345 | continue; | ||
346 | *pos = cpu + 1; | ||
347 | return &per_cpu(nf_conntrack_stat, cpu); | ||
348 | } | ||
349 | |||
350 | return NULL; | ||
351 | } | ||
352 | |||
353 | static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
354 | { | ||
355 | int cpu; | ||
356 | |||
357 | for (cpu = *pos; cpu < NR_CPUS; ++cpu) { | ||
358 | if (!cpu_possible(cpu)) | ||
359 | continue; | ||
360 | *pos = cpu + 1; | ||
361 | return &per_cpu(nf_conntrack_stat, cpu); | ||
362 | } | ||
363 | |||
364 | return NULL; | ||
365 | } | ||
366 | |||
367 | static void ct_cpu_seq_stop(struct seq_file *seq, void *v) | ||
368 | { | ||
369 | } | ||
370 | |||
371 | static int ct_cpu_seq_show(struct seq_file *seq, void *v) | ||
372 | { | ||
373 | unsigned int nr_conntracks = atomic_read(&nf_conntrack_count); | ||
374 | struct ip_conntrack_stat *st = v; | ||
375 | |||
376 | if (v == SEQ_START_TOKEN) { | ||
377 | seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n"); | ||
378 | return 0; | ||
379 | } | ||
380 | |||
381 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " | ||
382 | "%08x %08x %08x %08x %08x %08x %08x %08x \n", | ||
383 | nr_conntracks, | ||
384 | st->searched, | ||
385 | st->found, | ||
386 | st->new, | ||
387 | st->invalid, | ||
388 | st->ignore, | ||
389 | st->delete, | ||
390 | st->delete_list, | ||
391 | st->insert, | ||
392 | st->insert_failed, | ||
393 | st->drop, | ||
394 | st->early_drop, | ||
395 | st->error, | ||
396 | |||
397 | st->expect_new, | ||
398 | st->expect_create, | ||
399 | st->expect_delete | ||
400 | ); | ||
401 | return 0; | ||
402 | } | ||
403 | |||
404 | static struct seq_operations ct_cpu_seq_ops = { | ||
405 | .start = ct_cpu_seq_start, | ||
406 | .next = ct_cpu_seq_next, | ||
407 | .stop = ct_cpu_seq_stop, | ||
408 | .show = ct_cpu_seq_show, | ||
409 | }; | ||
410 | |||
411 | static int ct_cpu_seq_open(struct inode *inode, struct file *file) | ||
412 | { | ||
413 | return seq_open(file, &ct_cpu_seq_ops); | ||
414 | } | ||
415 | |||
416 | static struct file_operations ct_cpu_seq_fops = { | ||
417 | .owner = THIS_MODULE, | ||
418 | .open = ct_cpu_seq_open, | ||
419 | .read = seq_read, | ||
420 | .llseek = seq_lseek, | ||
421 | .release = seq_release_private, | ||
422 | }; | ||
423 | #endif /* CONFIG_PROC_FS */ | ||
424 | |||
425 | /* Sysctl support */ | ||
426 | |||
427 | #ifdef CONFIG_SYSCTL | ||
428 | |||
429 | /* From nf_conntrack_core.c */ | ||
430 | extern int nf_conntrack_max; | ||
431 | extern unsigned int nf_conntrack_htable_size; | ||
432 | |||
433 | /* From nf_conntrack_proto_tcp.c */ | ||
434 | extern unsigned long nf_ct_tcp_timeout_syn_sent; | ||
435 | extern unsigned long nf_ct_tcp_timeout_syn_recv; | ||
436 | extern unsigned long nf_ct_tcp_timeout_established; | ||
437 | extern unsigned long nf_ct_tcp_timeout_fin_wait; | ||
438 | extern unsigned long nf_ct_tcp_timeout_close_wait; | ||
439 | extern unsigned long nf_ct_tcp_timeout_last_ack; | ||
440 | extern unsigned long nf_ct_tcp_timeout_time_wait; | ||
441 | extern unsigned long nf_ct_tcp_timeout_close; | ||
442 | extern unsigned long nf_ct_tcp_timeout_max_retrans; | ||
443 | extern int nf_ct_tcp_loose; | ||
444 | extern int nf_ct_tcp_be_liberal; | ||
445 | extern int nf_ct_tcp_max_retrans; | ||
446 | |||
447 | /* From nf_conntrack_proto_udp.c */ | ||
448 | extern unsigned long nf_ct_udp_timeout; | ||
449 | extern unsigned long nf_ct_udp_timeout_stream; | ||
450 | |||
451 | /* From nf_conntrack_proto_generic.c */ | ||
452 | extern unsigned long nf_ct_generic_timeout; | ||
453 | |||
454 | /* Log invalid packets of a given protocol */ | ||
455 | static int log_invalid_proto_min = 0; | ||
456 | static int log_invalid_proto_max = 255; | ||
457 | |||
458 | static struct ctl_table_header *nf_ct_sysctl_header; | ||
459 | |||
460 | static ctl_table nf_ct_sysctl_table[] = { | ||
461 | { | ||
462 | .ctl_name = NET_NF_CONNTRACK_MAX, | ||
463 | .procname = "nf_conntrack_max", | ||
464 | .data = &nf_conntrack_max, | ||
465 | .maxlen = sizeof(int), | ||
466 | .mode = 0644, | ||
467 | .proc_handler = &proc_dointvec, | ||
468 | }, | ||
469 | { | ||
470 | .ctl_name = NET_NF_CONNTRACK_COUNT, | ||
471 | .procname = "nf_conntrack_count", | ||
472 | .data = &nf_conntrack_count, | ||
473 | .maxlen = sizeof(int), | ||
474 | .mode = 0444, | ||
475 | .proc_handler = &proc_dointvec, | ||
476 | }, | ||
477 | { | ||
478 | .ctl_name = NET_NF_CONNTRACK_BUCKETS, | ||
479 | .procname = "nf_conntrack_buckets", | ||
480 | .data = &nf_conntrack_htable_size, | ||
481 | .maxlen = sizeof(unsigned int), | ||
482 | .mode = 0444, | ||
483 | .proc_handler = &proc_dointvec, | ||
484 | }, | ||
485 | { | ||
486 | .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, | ||
487 | .procname = "nf_conntrack_tcp_timeout_syn_sent", | ||
488 | .data = &nf_ct_tcp_timeout_syn_sent, | ||
489 | .maxlen = sizeof(unsigned int), | ||
490 | .mode = 0644, | ||
491 | .proc_handler = &proc_dointvec_jiffies, | ||
492 | }, | ||
493 | { | ||
494 | .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, | ||
495 | .procname = "nf_conntrack_tcp_timeout_syn_recv", | ||
496 | .data = &nf_ct_tcp_timeout_syn_recv, | ||
497 | .maxlen = sizeof(unsigned int), | ||
498 | .mode = 0644, | ||
499 | .proc_handler = &proc_dointvec_jiffies, | ||
500 | }, | ||
501 | { | ||
502 | .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED, | ||
503 | .procname = "nf_conntrack_tcp_timeout_established", | ||
504 | .data = &nf_ct_tcp_timeout_established, | ||
505 | .maxlen = sizeof(unsigned int), | ||
506 | .mode = 0644, | ||
507 | .proc_handler = &proc_dointvec_jiffies, | ||
508 | }, | ||
509 | { | ||
510 | .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT, | ||
511 | .procname = "nf_conntrack_tcp_timeout_fin_wait", | ||
512 | .data = &nf_ct_tcp_timeout_fin_wait, | ||
513 | .maxlen = sizeof(unsigned int), | ||
514 | .mode = 0644, | ||
515 | .proc_handler = &proc_dointvec_jiffies, | ||
516 | }, | ||
517 | { | ||
518 | .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT, | ||
519 | .procname = "nf_conntrack_tcp_timeout_close_wait", | ||
520 | .data = &nf_ct_tcp_timeout_close_wait, | ||
521 | .maxlen = sizeof(unsigned int), | ||
522 | .mode = 0644, | ||
523 | .proc_handler = &proc_dointvec_jiffies, | ||
524 | }, | ||
525 | { | ||
526 | .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK, | ||
527 | .procname = "nf_conntrack_tcp_timeout_last_ack", | ||
528 | .data = &nf_ct_tcp_timeout_last_ack, | ||
529 | .maxlen = sizeof(unsigned int), | ||
530 | .mode = 0644, | ||
531 | .proc_handler = &proc_dointvec_jiffies, | ||
532 | }, | ||
533 | { | ||
534 | .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT, | ||
535 | .procname = "nf_conntrack_tcp_timeout_time_wait", | ||
536 | .data = &nf_ct_tcp_timeout_time_wait, | ||
537 | .maxlen = sizeof(unsigned int), | ||
538 | .mode = 0644, | ||
539 | .proc_handler = &proc_dointvec_jiffies, | ||
540 | }, | ||
541 | { | ||
542 | .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE, | ||
543 | .procname = "nf_conntrack_tcp_timeout_close", | ||
544 | .data = &nf_ct_tcp_timeout_close, | ||
545 | .maxlen = sizeof(unsigned int), | ||
546 | .mode = 0644, | ||
547 | .proc_handler = &proc_dointvec_jiffies, | ||
548 | }, | ||
549 | { | ||
550 | .ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT, | ||
551 | .procname = "nf_conntrack_udp_timeout", | ||
552 | .data = &nf_ct_udp_timeout, | ||
553 | .maxlen = sizeof(unsigned int), | ||
554 | .mode = 0644, | ||
555 | .proc_handler = &proc_dointvec_jiffies, | ||
556 | }, | ||
557 | { | ||
558 | .ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM, | ||
559 | .procname = "nf_conntrack_udp_timeout_stream", | ||
560 | .data = &nf_ct_udp_timeout_stream, | ||
561 | .maxlen = sizeof(unsigned int), | ||
562 | .mode = 0644, | ||
563 | .proc_handler = &proc_dointvec_jiffies, | ||
564 | }, | ||
565 | { | ||
566 | .ctl_name = NET_NF_CONNTRACK_GENERIC_TIMEOUT, | ||
567 | .procname = "nf_conntrack_generic_timeout", | ||
568 | .data = &nf_ct_generic_timeout, | ||
569 | .maxlen = sizeof(unsigned int), | ||
570 | .mode = 0644, | ||
571 | .proc_handler = &proc_dointvec_jiffies, | ||
572 | }, | ||
573 | { | ||
574 | .ctl_name = NET_NF_CONNTRACK_LOG_INVALID, | ||
575 | .procname = "nf_conntrack_log_invalid", | ||
576 | .data = &nf_ct_log_invalid, | ||
577 | .maxlen = sizeof(unsigned int), | ||
578 | .mode = 0644, | ||
579 | .proc_handler = &proc_dointvec_minmax, | ||
580 | .strategy = &sysctl_intvec, | ||
581 | .extra1 = &log_invalid_proto_min, | ||
582 | .extra2 = &log_invalid_proto_max, | ||
583 | }, | ||
584 | { | ||
585 | .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS, | ||
586 | .procname = "nf_conntrack_tcp_timeout_max_retrans", | ||
587 | .data = &nf_ct_tcp_timeout_max_retrans, | ||
588 | .maxlen = sizeof(unsigned int), | ||
589 | .mode = 0644, | ||
590 | .proc_handler = &proc_dointvec_jiffies, | ||
591 | }, | ||
592 | { | ||
593 | .ctl_name = NET_NF_CONNTRACK_TCP_LOOSE, | ||
594 | .procname = "nf_conntrack_tcp_loose", | ||
595 | .data = &nf_ct_tcp_loose, | ||
596 | .maxlen = sizeof(unsigned int), | ||
597 | .mode = 0644, | ||
598 | .proc_handler = &proc_dointvec, | ||
599 | }, | ||
600 | { | ||
601 | .ctl_name = NET_NF_CONNTRACK_TCP_BE_LIBERAL, | ||
602 | .procname = "nf_conntrack_tcp_be_liberal", | ||
603 | .data = &nf_ct_tcp_be_liberal, | ||
604 | .maxlen = sizeof(unsigned int), | ||
605 | .mode = 0644, | ||
606 | .proc_handler = &proc_dointvec, | ||
607 | }, | ||
608 | { | ||
609 | .ctl_name = NET_NF_CONNTRACK_TCP_MAX_RETRANS, | ||
610 | .procname = "nf_conntrack_tcp_max_retrans", | ||
611 | .data = &nf_ct_tcp_max_retrans, | ||
612 | .maxlen = sizeof(unsigned int), | ||
613 | .mode = 0644, | ||
614 | .proc_handler = &proc_dointvec, | ||
615 | }, | ||
616 | |||
617 | { .ctl_name = 0 } | ||
618 | }; | ||
619 | |||
620 | #define NET_NF_CONNTRACK_MAX 2089 | ||
621 | |||
622 | static ctl_table nf_ct_netfilter_table[] = { | ||
623 | { | ||
624 | .ctl_name = NET_NETFILTER, | ||
625 | .procname = "netfilter", | ||
626 | .mode = 0555, | ||
627 | .child = nf_ct_sysctl_table, | ||
628 | }, | ||
629 | { | ||
630 | .ctl_name = NET_NF_CONNTRACK_MAX, | ||
631 | .procname = "nf_conntrack_max", | ||
632 | .data = &nf_conntrack_max, | ||
633 | .maxlen = sizeof(int), | ||
634 | .mode = 0644, | ||
635 | .proc_handler = &proc_dointvec, | ||
636 | }, | ||
637 | { .ctl_name = 0 } | ||
638 | }; | ||
639 | |||
640 | static ctl_table nf_ct_net_table[] = { | ||
641 | { | ||
642 | .ctl_name = CTL_NET, | ||
643 | .procname = "net", | ||
644 | .mode = 0555, | ||
645 | .child = nf_ct_netfilter_table, | ||
646 | }, | ||
647 | { .ctl_name = 0 } | ||
648 | }; | ||
649 | EXPORT_SYMBOL(nf_ct_log_invalid); | ||
650 | #endif /* CONFIG_SYSCTL */ | ||
651 | |||
652 | static int init_or_cleanup(int init) | ||
653 | { | ||
654 | #ifdef CONFIG_PROC_FS | ||
655 | struct proc_dir_entry *proc, *proc_exp, *proc_stat; | ||
656 | #endif | ||
657 | int ret = 0; | ||
658 | |||
659 | if (!init) goto cleanup; | ||
660 | |||
661 | ret = nf_conntrack_init(); | ||
662 | if (ret < 0) | ||
663 | goto cleanup_nothing; | ||
664 | |||
665 | #ifdef CONFIG_PROC_FS | ||
666 | proc = proc_net_fops_create("nf_conntrack", 0440, &ct_file_ops); | ||
667 | if (!proc) goto cleanup_init; | ||
668 | |||
669 | proc_exp = proc_net_fops_create("nf_conntrack_expect", 0440, | ||
670 | &exp_file_ops); | ||
671 | if (!proc_exp) goto cleanup_proc; | ||
672 | |||
673 | proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, proc_net_stat); | ||
674 | if (!proc_stat) | ||
675 | goto cleanup_proc_exp; | ||
676 | |||
677 | proc_stat->proc_fops = &ct_cpu_seq_fops; | ||
678 | proc_stat->owner = THIS_MODULE; | ||
679 | #endif | ||
680 | #ifdef CONFIG_SYSCTL | ||
681 | nf_ct_sysctl_header = register_sysctl_table(nf_ct_net_table, 0); | ||
682 | if (nf_ct_sysctl_header == NULL) { | ||
683 | printk("nf_conntrack: can't register to sysctl.\n"); | ||
684 | ret = -ENOMEM; | ||
685 | goto cleanup_proc_stat; | ||
686 | } | ||
687 | #endif | ||
688 | |||
689 | return ret; | ||
690 | |||
691 | cleanup: | ||
692 | #ifdef CONFIG_SYSCTL | ||
693 | unregister_sysctl_table(nf_ct_sysctl_header); | ||
694 | cleanup_proc_stat: | ||
695 | #endif | ||
696 | #ifdef CONFIG_PROC_FS | ||
697 | proc_net_remove("nf_conntrack_stat"); | ||
698 | cleanup_proc_exp: | ||
699 | proc_net_remove("nf_conntrack_expect"); | ||
700 | cleanup_proc: | ||
701 | proc_net_remove("nf_conntrack"); | ||
702 | cleanup_init: | ||
703 | #endif /* CNFIG_PROC_FS */ | ||
704 | nf_conntrack_cleanup(); | ||
705 | cleanup_nothing: | ||
706 | return ret; | ||
707 | } | ||
708 | |||
709 | int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) | ||
710 | { | ||
711 | int ret = 0; | ||
712 | |||
713 | write_lock_bh(&nf_conntrack_lock); | ||
714 | if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_generic_l3proto) { | ||
715 | ret = -EBUSY; | ||
716 | goto out; | ||
717 | } | ||
718 | nf_ct_l3protos[proto->l3proto] = proto; | ||
719 | out: | ||
720 | write_unlock_bh(&nf_conntrack_lock); | ||
721 | |||
722 | return ret; | ||
723 | } | ||
724 | |||
725 | void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) | ||
726 | { | ||
727 | write_lock_bh(&nf_conntrack_lock); | ||
728 | nf_ct_l3protos[proto->l3proto] = &nf_conntrack_generic_l3proto; | ||
729 | write_unlock_bh(&nf_conntrack_lock); | ||
730 | |||
731 | /* Somebody could be still looking at the proto in bh. */ | ||
732 | synchronize_net(); | ||
733 | |||
734 | /* Remove all contrack entries for this protocol */ | ||
735 | nf_ct_iterate_cleanup(kill_l3proto, proto); | ||
736 | } | ||
737 | |||
738 | /* FIXME: Allow NULL functions and sub in pointers to generic for | ||
739 | them. --RR */ | ||
740 | int nf_conntrack_protocol_register(struct nf_conntrack_protocol *proto) | ||
741 | { | ||
742 | int ret = 0; | ||
743 | |||
744 | retry: | ||
745 | write_lock_bh(&nf_conntrack_lock); | ||
746 | if (nf_ct_protos[proto->l3proto]) { | ||
747 | if (nf_ct_protos[proto->l3proto][proto->proto] | ||
748 | != &nf_conntrack_generic_protocol) { | ||
749 | ret = -EBUSY; | ||
750 | goto out_unlock; | ||
751 | } | ||
752 | } else { | ||
753 | /* l3proto may be loaded latter. */ | ||
754 | struct nf_conntrack_protocol **proto_array; | ||
755 | int i; | ||
756 | |||
757 | write_unlock_bh(&nf_conntrack_lock); | ||
758 | |||
759 | proto_array = (struct nf_conntrack_protocol **) | ||
760 | kmalloc(MAX_NF_CT_PROTO * | ||
761 | sizeof(struct nf_conntrack_protocol *), | ||
762 | GFP_KERNEL); | ||
763 | if (proto_array == NULL) { | ||
764 | ret = -ENOMEM; | ||
765 | goto out; | ||
766 | } | ||
767 | for (i = 0; i < MAX_NF_CT_PROTO; i++) | ||
768 | proto_array[i] = &nf_conntrack_generic_protocol; | ||
769 | |||
770 | write_lock_bh(&nf_conntrack_lock); | ||
771 | if (nf_ct_protos[proto->l3proto]) { | ||
772 | /* bad timing, but no problem */ | ||
773 | write_unlock_bh(&nf_conntrack_lock); | ||
774 | kfree(proto_array); | ||
775 | } else { | ||
776 | nf_ct_protos[proto->l3proto] = proto_array; | ||
777 | write_unlock_bh(&nf_conntrack_lock); | ||
778 | } | ||
779 | |||
780 | /* | ||
781 | * Just once because array is never freed until unloading | ||
782 | * nf_conntrack.ko | ||
783 | */ | ||
784 | goto retry; | ||
785 | } | ||
786 | |||
787 | nf_ct_protos[proto->l3proto][proto->proto] = proto; | ||
788 | |||
789 | out_unlock: | ||
790 | write_unlock_bh(&nf_conntrack_lock); | ||
791 | out: | ||
792 | return ret; | ||
793 | } | ||
794 | |||
795 | void nf_conntrack_protocol_unregister(struct nf_conntrack_protocol *proto) | ||
796 | { | ||
797 | write_lock_bh(&nf_conntrack_lock); | ||
798 | nf_ct_protos[proto->l3proto][proto->proto] | ||
799 | = &nf_conntrack_generic_protocol; | ||
800 | write_unlock_bh(&nf_conntrack_lock); | ||
801 | |||
802 | /* Somebody could be still looking at the proto in bh. */ | ||
803 | synchronize_net(); | ||
804 | |||
805 | /* Remove all contrack entries for this protocol */ | ||
806 | nf_ct_iterate_cleanup(kill_proto, proto); | ||
807 | } | ||
808 | |||
809 | static int __init init(void) | ||
810 | { | ||
811 | return init_or_cleanup(1); | ||
812 | } | ||
813 | |||
814 | static void __exit fini(void) | ||
815 | { | ||
816 | init_or_cleanup(0); | ||
817 | } | ||
818 | |||
819 | module_init(init); | ||
820 | module_exit(fini); | ||
821 | |||
822 | /* Some modules need us, but don't depend directly on any symbol. | ||
823 | They should call this. */ | ||
824 | void need_nf_conntrack(void) | ||
825 | { | ||
826 | } | ||
827 | |||
828 | #ifdef CONFIG_NF_CONNTRACK_EVENTS | ||
829 | EXPORT_SYMBOL_GPL(nf_conntrack_chain); | ||
830 | EXPORT_SYMBOL_GPL(nf_conntrack_expect_chain); | ||
831 | EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); | ||
832 | EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); | ||
833 | EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init); | ||
834 | EXPORT_PER_CPU_SYMBOL_GPL(nf_conntrack_ecache); | ||
835 | EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); | ||
836 | #endif | ||
837 | EXPORT_SYMBOL(nf_conntrack_l3proto_register); | ||
838 | EXPORT_SYMBOL(nf_conntrack_l3proto_unregister); | ||
839 | EXPORT_SYMBOL(nf_conntrack_protocol_register); | ||
840 | EXPORT_SYMBOL(nf_conntrack_protocol_unregister); | ||
841 | EXPORT_SYMBOL(nf_ct_invert_tuplepr); | ||
842 | EXPORT_SYMBOL(nf_conntrack_alter_reply); | ||
843 | EXPORT_SYMBOL(nf_conntrack_destroyed); | ||
844 | EXPORT_SYMBOL(need_nf_conntrack); | ||
845 | EXPORT_SYMBOL(nf_conntrack_helper_register); | ||
846 | EXPORT_SYMBOL(nf_conntrack_helper_unregister); | ||
847 | EXPORT_SYMBOL(nf_ct_iterate_cleanup); | ||
848 | EXPORT_SYMBOL(__nf_ct_refresh_acct); | ||
849 | EXPORT_SYMBOL(nf_ct_protos); | ||
850 | EXPORT_SYMBOL(nf_ct_find_proto); | ||
851 | EXPORT_SYMBOL(nf_ct_l3protos); | ||
852 | EXPORT_SYMBOL(nf_conntrack_expect_alloc); | ||
853 | EXPORT_SYMBOL(nf_conntrack_expect_put); | ||
854 | EXPORT_SYMBOL(nf_conntrack_expect_related); | ||
855 | EXPORT_SYMBOL(nf_conntrack_unexpect_related); | ||
856 | EXPORT_SYMBOL(nf_conntrack_tuple_taken); | ||
857 | EXPORT_SYMBOL(nf_conntrack_htable_size); | ||
858 | EXPORT_SYMBOL(nf_conntrack_lock); | ||
859 | EXPORT_SYMBOL(nf_conntrack_hash); | ||
860 | EXPORT_SYMBOL(nf_conntrack_untracked); | ||
861 | EXPORT_SYMBOL_GPL(nf_conntrack_find_get); | ||
862 | #ifdef CONFIG_IP_NF_NAT_NEEDED | ||
863 | EXPORT_SYMBOL(nf_conntrack_tcp_update); | ||
864 | #endif | ||
865 | EXPORT_SYMBOL(__nf_conntrack_confirm); | ||
866 | EXPORT_SYMBOL(nf_ct_get_tuple); | ||
867 | EXPORT_SYMBOL(nf_ct_invert_tuple); | ||
868 | EXPORT_SYMBOL(nf_conntrack_in); | ||
869 | EXPORT_SYMBOL(__nf_conntrack_attach); | ||
diff --git a/net/netlink/Makefile b/net/netlink/Makefile index 39d9c2dcd03c..e3589c2de49e 100644 --- a/net/netlink/Makefile +++ b/net/netlink/Makefile | |||
@@ -2,4 +2,4 @@ | |||
2 | # Makefile for the netlink driver. | 2 | # Makefile for the netlink driver. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-y := af_netlink.o | 5 | obj-y := af_netlink.o attr.o genetlink.o |
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 5ca283537bc6..8c38ee6d255e 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c | |||
@@ -58,6 +58,7 @@ | |||
58 | 58 | ||
59 | #include <net/sock.h> | 59 | #include <net/sock.h> |
60 | #include <net/scm.h> | 60 | #include <net/scm.h> |
61 | #include <net/netlink.h> | ||
61 | 62 | ||
62 | #define Nprintk(a...) | 63 | #define Nprintk(a...) |
63 | #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) | 64 | #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) |
@@ -427,7 +428,8 @@ static int netlink_release(struct socket *sock) | |||
427 | 428 | ||
428 | spin_lock(&nlk->cb_lock); | 429 | spin_lock(&nlk->cb_lock); |
429 | if (nlk->cb) { | 430 | if (nlk->cb) { |
430 | nlk->cb->done(nlk->cb); | 431 | if (nlk->cb->done) |
432 | nlk->cb->done(nlk->cb); | ||
431 | netlink_destroy_callback(nlk->cb); | 433 | netlink_destroy_callback(nlk->cb); |
432 | nlk->cb = NULL; | 434 | nlk->cb = NULL; |
433 | } | 435 | } |
@@ -1322,7 +1324,8 @@ static int netlink_dump(struct sock *sk) | |||
1322 | skb_queue_tail(&sk->sk_receive_queue, skb); | 1324 | skb_queue_tail(&sk->sk_receive_queue, skb); |
1323 | sk->sk_data_ready(sk, skb->len); | 1325 | sk->sk_data_ready(sk, skb->len); |
1324 | 1326 | ||
1325 | cb->done(cb); | 1327 | if (cb->done) |
1328 | cb->done(cb); | ||
1326 | nlk->cb = NULL; | 1329 | nlk->cb = NULL; |
1327 | spin_unlock(&nlk->cb_lock); | 1330 | spin_unlock(&nlk->cb_lock); |
1328 | 1331 | ||
@@ -1409,6 +1412,94 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) | |||
1409 | netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); | 1412 | netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); |
1410 | } | 1413 | } |
1411 | 1414 | ||
1415 | static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, | ||
1416 | struct nlmsghdr *, int *)) | ||
1417 | { | ||
1418 | unsigned int total_len; | ||
1419 | struct nlmsghdr *nlh; | ||
1420 | int err; | ||
1421 | |||
1422 | while (skb->len >= nlmsg_total_size(0)) { | ||
1423 | nlh = (struct nlmsghdr *) skb->data; | ||
1424 | |||
1425 | if (skb->len < nlh->nlmsg_len) | ||
1426 | return 0; | ||
1427 | |||
1428 | total_len = min(NLMSG_ALIGN(nlh->nlmsg_len), skb->len); | ||
1429 | |||
1430 | if (cb(skb, nlh, &err) < 0) { | ||
1431 | /* Not an error, but we have to interrupt processing | ||
1432 | * here. Note: that in this case we do not pull | ||
1433 | * message from skb, it will be processed later. | ||
1434 | */ | ||
1435 | if (err == 0) | ||
1436 | return -1; | ||
1437 | netlink_ack(skb, nlh, err); | ||
1438 | } else if (nlh->nlmsg_flags & NLM_F_ACK) | ||
1439 | netlink_ack(skb, nlh, 0); | ||
1440 | |||
1441 | skb_pull(skb, total_len); | ||
1442 | } | ||
1443 | |||
1444 | return 0; | ||
1445 | } | ||
1446 | |||
1447 | /** | ||
1448 | * nelink_run_queue - Process netlink receive queue. | ||
1449 | * @sk: Netlink socket containing the queue | ||
1450 | * @qlen: Place to store queue length upon entry | ||
1451 | * @cb: Callback function invoked for each netlink message found | ||
1452 | * | ||
1453 | * Processes as much as there was in the queue upon entry and invokes | ||
1454 | * a callback function for each netlink message found. The callback | ||
1455 | * function may refuse a message by returning a negative error code | ||
1456 | * but setting the error pointer to 0 in which case this function | ||
1457 | * returns with a qlen != 0. | ||
1458 | * | ||
1459 | * qlen must be initialized to 0 before the initial entry, afterwards | ||
1460 | * the function may be called repeatedly until qlen reaches 0. | ||
1461 | */ | ||
1462 | void netlink_run_queue(struct sock *sk, unsigned int *qlen, | ||
1463 | int (*cb)(struct sk_buff *, struct nlmsghdr *, int *)) | ||
1464 | { | ||
1465 | struct sk_buff *skb; | ||
1466 | |||
1467 | if (!*qlen || *qlen > skb_queue_len(&sk->sk_receive_queue)) | ||
1468 | *qlen = skb_queue_len(&sk->sk_receive_queue); | ||
1469 | |||
1470 | for (; *qlen; (*qlen)--) { | ||
1471 | skb = skb_dequeue(&sk->sk_receive_queue); | ||
1472 | if (netlink_rcv_skb(skb, cb)) { | ||
1473 | if (skb->len) | ||
1474 | skb_queue_head(&sk->sk_receive_queue, skb); | ||
1475 | else { | ||
1476 | kfree_skb(skb); | ||
1477 | (*qlen)--; | ||
1478 | } | ||
1479 | break; | ||
1480 | } | ||
1481 | |||
1482 | kfree_skb(skb); | ||
1483 | } | ||
1484 | } | ||
1485 | |||
1486 | /** | ||
1487 | * netlink_queue_skip - Skip netlink message while processing queue. | ||
1488 | * @nlh: Netlink message to be skipped | ||
1489 | * @skb: Socket buffer containing the netlink messages. | ||
1490 | * | ||
1491 | * Pulls the given netlink message off the socket buffer so the next | ||
1492 | * call to netlink_queue_run() will not reconsider the message. | ||
1493 | */ | ||
1494 | void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb) | ||
1495 | { | ||
1496 | int msglen = NLMSG_ALIGN(nlh->nlmsg_len); | ||
1497 | |||
1498 | if (msglen > skb->len) | ||
1499 | msglen = skb->len; | ||
1500 | |||
1501 | skb_pull(skb, msglen); | ||
1502 | } | ||
1412 | 1503 | ||
1413 | #ifdef CONFIG_PROC_FS | 1504 | #ifdef CONFIG_PROC_FS |
1414 | struct nl_seq_iter { | 1505 | struct nl_seq_iter { |
@@ -1657,6 +1748,8 @@ out: | |||
1657 | core_initcall(netlink_proto_init); | 1748 | core_initcall(netlink_proto_init); |
1658 | 1749 | ||
1659 | EXPORT_SYMBOL(netlink_ack); | 1750 | EXPORT_SYMBOL(netlink_ack); |
1751 | EXPORT_SYMBOL(netlink_run_queue); | ||
1752 | EXPORT_SYMBOL(netlink_queue_skip); | ||
1660 | EXPORT_SYMBOL(netlink_broadcast); | 1753 | EXPORT_SYMBOL(netlink_broadcast); |
1661 | EXPORT_SYMBOL(netlink_dump_start); | 1754 | EXPORT_SYMBOL(netlink_dump_start); |
1662 | EXPORT_SYMBOL(netlink_kernel_create); | 1755 | EXPORT_SYMBOL(netlink_kernel_create); |
diff --git a/net/netlink/attr.c b/net/netlink/attr.c new file mode 100644 index 000000000000..fffef4ab276f --- /dev/null +++ b/net/netlink/attr.c | |||
@@ -0,0 +1,328 @@ | |||
1 | /* | ||
2 | * NETLINK Netlink attributes | ||
3 | * | ||
4 | * Authors: Thomas Graf <tgraf@suug.ch> | ||
5 | * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> | ||
6 | */ | ||
7 | |||
8 | #include <linux/config.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/errno.h> | ||
12 | #include <linux/jiffies.h> | ||
13 | #include <linux/netdevice.h> | ||
14 | #include <linux/skbuff.h> | ||
15 | #include <linux/string.h> | ||
16 | #include <linux/types.h> | ||
17 | #include <net/netlink.h> | ||
18 | |||
19 | static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { | ||
20 | [NLA_U8] = sizeof(u8), | ||
21 | [NLA_U16] = sizeof(u16), | ||
22 | [NLA_U32] = sizeof(u32), | ||
23 | [NLA_U64] = sizeof(u64), | ||
24 | [NLA_STRING] = 1, | ||
25 | [NLA_NESTED] = NLA_HDRLEN, | ||
26 | }; | ||
27 | |||
28 | static int validate_nla(struct nlattr *nla, int maxtype, | ||
29 | struct nla_policy *policy) | ||
30 | { | ||
31 | struct nla_policy *pt; | ||
32 | int minlen = 0; | ||
33 | |||
34 | if (nla->nla_type <= 0 || nla->nla_type > maxtype) | ||
35 | return 0; | ||
36 | |||
37 | pt = &policy[nla->nla_type]; | ||
38 | |||
39 | BUG_ON(pt->type > NLA_TYPE_MAX); | ||
40 | |||
41 | if (pt->minlen) | ||
42 | minlen = pt->minlen; | ||
43 | else if (pt->type != NLA_UNSPEC) | ||
44 | minlen = nla_attr_minlen[pt->type]; | ||
45 | |||
46 | if (pt->type == NLA_FLAG && nla_len(nla) > 0) | ||
47 | return -ERANGE; | ||
48 | |||
49 | if (nla_len(nla) < minlen) | ||
50 | return -ERANGE; | ||
51 | |||
52 | return 0; | ||
53 | } | ||
54 | |||
55 | /** | ||
56 | * nla_validate - Validate a stream of attributes | ||
57 | * @head: head of attribute stream | ||
58 | * @len: length of attribute stream | ||
59 | * @maxtype: maximum attribute type to be expected | ||
60 | * @policy: validation policy | ||
61 | * | ||
62 | * Validates all attributes in the specified attribute stream against the | ||
63 | * specified policy. Attributes with a type exceeding maxtype will be | ||
64 | * ignored. See documenation of struct nla_policy for more details. | ||
65 | * | ||
66 | * Returns 0 on success or a negative error code. | ||
67 | */ | ||
68 | int nla_validate(struct nlattr *head, int len, int maxtype, | ||
69 | struct nla_policy *policy) | ||
70 | { | ||
71 | struct nlattr *nla; | ||
72 | int rem, err; | ||
73 | |||
74 | nla_for_each_attr(nla, head, len, rem) { | ||
75 | err = validate_nla(nla, maxtype, policy); | ||
76 | if (err < 0) | ||
77 | goto errout; | ||
78 | } | ||
79 | |||
80 | err = 0; | ||
81 | errout: | ||
82 | return err; | ||
83 | } | ||
84 | |||
85 | /** | ||
86 | * nla_parse - Parse a stream of attributes into a tb buffer | ||
87 | * @tb: destination array with maxtype+1 elements | ||
88 | * @maxtype: maximum attribute type to be expected | ||
89 | * @head: head of attribute stream | ||
90 | * @len: length of attribute stream | ||
91 | * | ||
92 | * Parses a stream of attributes and stores a pointer to each attribute in | ||
93 | * the tb array accessable via the attribute type. Attributes with a type | ||
94 | * exceeding maxtype will be silently ignored for backwards compatibility | ||
95 | * reasons. policy may be set to NULL if no validation is required. | ||
96 | * | ||
97 | * Returns 0 on success or a negative error code. | ||
98 | */ | ||
99 | int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, | ||
100 | struct nla_policy *policy) | ||
101 | { | ||
102 | struct nlattr *nla; | ||
103 | int rem, err; | ||
104 | |||
105 | memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); | ||
106 | |||
107 | nla_for_each_attr(nla, head, len, rem) { | ||
108 | u16 type = nla->nla_type; | ||
109 | |||
110 | if (type > 0 && type <= maxtype) { | ||
111 | if (policy) { | ||
112 | err = validate_nla(nla, maxtype, policy); | ||
113 | if (err < 0) | ||
114 | goto errout; | ||
115 | } | ||
116 | |||
117 | tb[type] = nla; | ||
118 | } | ||
119 | } | ||
120 | |||
121 | if (unlikely(rem > 0)) | ||
122 | printk(KERN_WARNING "netlink: %d bytes leftover after parsing " | ||
123 | "attributes.\n", rem); | ||
124 | |||
125 | err = 0; | ||
126 | errout: | ||
127 | return err; | ||
128 | } | ||
129 | |||
130 | /** | ||
131 | * nla_find - Find a specific attribute in a stream of attributes | ||
132 | * @head: head of attribute stream | ||
133 | * @len: length of attribute stream | ||
134 | * @attrtype: type of attribute to look for | ||
135 | * | ||
136 | * Returns the first attribute in the stream matching the specified type. | ||
137 | */ | ||
138 | struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) | ||
139 | { | ||
140 | struct nlattr *nla; | ||
141 | int rem; | ||
142 | |||
143 | nla_for_each_attr(nla, head, len, rem) | ||
144 | if (nla->nla_type == attrtype) | ||
145 | return nla; | ||
146 | |||
147 | return NULL; | ||
148 | } | ||
149 | |||
150 | /** | ||
151 | * nla_strlcpy - Copy string attribute payload into a sized buffer | ||
152 | * @dst: where to copy the string to | ||
153 | * @src: attribute to copy the string from | ||
154 | * @dstsize: size of destination buffer | ||
155 | * | ||
156 | * Copies at most dstsize - 1 bytes into the destination buffer. | ||
157 | * The result is always a valid NUL-terminated string. Unlike | ||
158 | * strlcpy the destination buffer is always padded out. | ||
159 | * | ||
160 | * Returns the length of the source buffer. | ||
161 | */ | ||
162 | size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize) | ||
163 | { | ||
164 | size_t srclen = nla_len(nla); | ||
165 | char *src = nla_data(nla); | ||
166 | |||
167 | if (srclen > 0 && src[srclen - 1] == '\0') | ||
168 | srclen--; | ||
169 | |||
170 | if (dstsize > 0) { | ||
171 | size_t len = (srclen >= dstsize) ? dstsize - 1 : srclen; | ||
172 | |||
173 | memset(dst, 0, dstsize); | ||
174 | memcpy(dst, src, len); | ||
175 | } | ||
176 | |||
177 | return srclen; | ||
178 | } | ||
179 | |||
180 | /** | ||
181 | * nla_memcpy - Copy a netlink attribute into another memory area | ||
182 | * @dest: where to copy to memcpy | ||
183 | * @src: netlink attribute to copy from | ||
184 | * @count: size of the destination area | ||
185 | * | ||
186 | * Note: The number of bytes copied is limited by the length of | ||
187 | * attribute's payload. memcpy | ||
188 | * | ||
189 | * Returns the number of bytes copied. | ||
190 | */ | ||
191 | int nla_memcpy(void *dest, struct nlattr *src, int count) | ||
192 | { | ||
193 | int minlen = min_t(int, count, nla_len(src)); | ||
194 | |||
195 | memcpy(dest, nla_data(src), minlen); | ||
196 | |||
197 | return minlen; | ||
198 | } | ||
199 | |||
200 | /** | ||
201 | * nla_memcmp - Compare an attribute with sized memory area | ||
202 | * @nla: netlink attribute | ||
203 | * @data: memory area | ||
204 | * @size: size of memory area | ||
205 | */ | ||
206 | int nla_memcmp(const struct nlattr *nla, const void *data, | ||
207 | size_t size) | ||
208 | { | ||
209 | int d = nla_len(nla) - size; | ||
210 | |||
211 | if (d == 0) | ||
212 | d = memcmp(nla_data(nla), data, size); | ||
213 | |||
214 | return d; | ||
215 | } | ||
216 | |||
217 | /** | ||
218 | * nla_strcmp - Compare a string attribute against a string | ||
219 | * @nla: netlink string attribute | ||
220 | * @str: another string | ||
221 | */ | ||
222 | int nla_strcmp(const struct nlattr *nla, const char *str) | ||
223 | { | ||
224 | int len = strlen(str) + 1; | ||
225 | int d = nla_len(nla) - len; | ||
226 | |||
227 | if (d == 0) | ||
228 | d = memcmp(nla_data(nla), str, len); | ||
229 | |||
230 | return d; | ||
231 | } | ||
232 | |||
233 | /** | ||
234 | * __nla_reserve - reserve room for attribute on the skb | ||
235 | * @skb: socket buffer to reserve room on | ||
236 | * @attrtype: attribute type | ||
237 | * @attrlen: length of attribute payload | ||
238 | * | ||
239 | * Adds a netlink attribute header to a socket buffer and reserves | ||
240 | * room for the payload but does not copy it. | ||
241 | * | ||
242 | * The caller is responsible to ensure that the skb provides enough | ||
243 | * tailroom for the attribute header and payload. | ||
244 | */ | ||
245 | struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) | ||
246 | { | ||
247 | struct nlattr *nla; | ||
248 | |||
249 | nla = (struct nlattr *) skb_put(skb, nla_total_size(attrlen)); | ||
250 | nla->nla_type = attrtype; | ||
251 | nla->nla_len = nla_attr_size(attrlen); | ||
252 | |||
253 | memset((unsigned char *) nla + nla->nla_len, 0, nla_padlen(attrlen)); | ||
254 | |||
255 | return nla; | ||
256 | } | ||
257 | |||
258 | /** | ||
259 | * nla_reserve - reserve room for attribute on the skb | ||
260 | * @skb: socket buffer to reserve room on | ||
261 | * @attrtype: attribute type | ||
262 | * @attrlen: length of attribute payload | ||
263 | * | ||
264 | * Adds a netlink attribute header to a socket buffer and reserves | ||
265 | * room for the payload but does not copy it. | ||
266 | * | ||
267 | * Returns NULL if the tailroom of the skb is insufficient to store | ||
268 | * the attribute header and payload. | ||
269 | */ | ||
270 | struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) | ||
271 | { | ||
272 | if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) | ||
273 | return NULL; | ||
274 | |||
275 | return __nla_reserve(skb, attrtype, attrlen); | ||
276 | } | ||
277 | |||
278 | /** | ||
279 | * __nla_put - Add a netlink attribute to a socket buffer | ||
280 | * @skb: socket buffer to add attribute to | ||
281 | * @attrtype: attribute type | ||
282 | * @attrlen: length of attribute payload | ||
283 | * @data: head of attribute payload | ||
284 | * | ||
285 | * The caller is responsible to ensure that the skb provides enough | ||
286 | * tailroom for the attribute header and payload. | ||
287 | */ | ||
288 | void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, | ||
289 | const void *data) | ||
290 | { | ||
291 | struct nlattr *nla; | ||
292 | |||
293 | nla = __nla_reserve(skb, attrtype, attrlen); | ||
294 | memcpy(nla_data(nla), data, attrlen); | ||
295 | } | ||
296 | |||
297 | |||
298 | /** | ||
299 | * nla_put - Add a netlink attribute to a socket buffer | ||
300 | * @skb: socket buffer to add attribute to | ||
301 | * @attrtype: attribute type | ||
302 | * @attrlen: length of attribute payload | ||
303 | * @data: head of attribute payload | ||
304 | * | ||
305 | * Returns -1 if the tailroom of the skb is insufficient to store | ||
306 | * the attribute header and payload. | ||
307 | */ | ||
308 | int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) | ||
309 | { | ||
310 | if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) | ||
311 | return -1; | ||
312 | |||
313 | __nla_put(skb, attrtype, attrlen, data); | ||
314 | return 0; | ||
315 | } | ||
316 | |||
317 | |||
318 | EXPORT_SYMBOL(nla_validate); | ||
319 | EXPORT_SYMBOL(nla_parse); | ||
320 | EXPORT_SYMBOL(nla_find); | ||
321 | EXPORT_SYMBOL(nla_strlcpy); | ||
322 | EXPORT_SYMBOL(__nla_reserve); | ||
323 | EXPORT_SYMBOL(nla_reserve); | ||
324 | EXPORT_SYMBOL(__nla_put); | ||
325 | EXPORT_SYMBOL(nla_put); | ||
326 | EXPORT_SYMBOL(nla_memcpy); | ||
327 | EXPORT_SYMBOL(nla_memcmp); | ||
328 | EXPORT_SYMBOL(nla_strcmp); | ||
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c new file mode 100644 index 000000000000..287cfcc56951 --- /dev/null +++ b/net/netlink/genetlink.c | |||
@@ -0,0 +1,579 @@ | |||
1 | /* | ||
2 | * NETLINK Generic Netlink Family | ||
3 | * | ||
4 | * Authors: Jamal Hadi Salim | ||
5 | * Thomas Graf <tgraf@suug.ch> | ||
6 | */ | ||
7 | |||
8 | #include <linux/config.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/errno.h> | ||
12 | #include <linux/types.h> | ||
13 | #include <linux/socket.h> | ||
14 | #include <linux/string.h> | ||
15 | #include <linux/skbuff.h> | ||
16 | #include <net/sock.h> | ||
17 | #include <net/genetlink.h> | ||
18 | |||
19 | struct sock *genl_sock = NULL; | ||
20 | |||
21 | static DECLARE_MUTEX(genl_sem); /* serialization of message processing */ | ||
22 | |||
23 | static void genl_lock(void) | ||
24 | { | ||
25 | down(&genl_sem); | ||
26 | } | ||
27 | |||
28 | static int genl_trylock(void) | ||
29 | { | ||
30 | return down_trylock(&genl_sem); | ||
31 | } | ||
32 | |||
33 | static void genl_unlock(void) | ||
34 | { | ||
35 | up(&genl_sem); | ||
36 | |||
37 | if (genl_sock && genl_sock->sk_receive_queue.qlen) | ||
38 | genl_sock->sk_data_ready(genl_sock, 0); | ||
39 | } | ||
40 | |||
41 | #define GENL_FAM_TAB_SIZE 16 | ||
42 | #define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1) | ||
43 | |||
44 | static struct list_head family_ht[GENL_FAM_TAB_SIZE]; | ||
45 | |||
46 | static int genl_ctrl_event(int event, void *data); | ||
47 | |||
48 | static inline unsigned int genl_family_hash(unsigned int id) | ||
49 | { | ||
50 | return id & GENL_FAM_TAB_MASK; | ||
51 | } | ||
52 | |||
53 | static inline struct list_head *genl_family_chain(unsigned int id) | ||
54 | { | ||
55 | return &family_ht[genl_family_hash(id)]; | ||
56 | } | ||
57 | |||
58 | static struct genl_family *genl_family_find_byid(unsigned int id) | ||
59 | { | ||
60 | struct genl_family *f; | ||
61 | |||
62 | list_for_each_entry(f, genl_family_chain(id), family_list) | ||
63 | if (f->id == id) | ||
64 | return f; | ||
65 | |||
66 | return NULL; | ||
67 | } | ||
68 | |||
69 | static struct genl_family *genl_family_find_byname(char *name) | ||
70 | { | ||
71 | struct genl_family *f; | ||
72 | int i; | ||
73 | |||
74 | for (i = 0; i < GENL_FAM_TAB_SIZE; i++) | ||
75 | list_for_each_entry(f, genl_family_chain(i), family_list) | ||
76 | if (strcmp(f->name, name) == 0) | ||
77 | return f; | ||
78 | |||
79 | return NULL; | ||
80 | } | ||
81 | |||
82 | static struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family) | ||
83 | { | ||
84 | struct genl_ops *ops; | ||
85 | |||
86 | list_for_each_entry(ops, &family->ops_list, ops_list) | ||
87 | if (ops->cmd == cmd) | ||
88 | return ops; | ||
89 | |||
90 | return NULL; | ||
91 | } | ||
92 | |||
93 | /* Of course we are going to have problems once we hit | ||
94 | * 2^16 alive types, but that can only happen by year 2K | ||
95 | */ | ||
96 | static inline u16 genl_generate_id(void) | ||
97 | { | ||
98 | static u16 id_gen_idx; | ||
99 | int overflowed = 0; | ||
100 | |||
101 | do { | ||
102 | if (id_gen_idx == 0) | ||
103 | id_gen_idx = GENL_MIN_ID; | ||
104 | |||
105 | if (++id_gen_idx > GENL_MAX_ID) { | ||
106 | if (!overflowed) { | ||
107 | overflowed = 1; | ||
108 | id_gen_idx = 0; | ||
109 | continue; | ||
110 | } else | ||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | } while (genl_family_find_byid(id_gen_idx)); | ||
115 | |||
116 | return id_gen_idx; | ||
117 | } | ||
118 | |||
119 | /** | ||
120 | * genl_register_ops - register generic netlink operations | ||
121 | * @family: generic netlink family | ||
122 | * @ops: operations to be registered | ||
123 | * | ||
124 | * Registers the specified operations and assigns them to the specified | ||
125 | * family. Either a doit or dumpit callback must be specified or the | ||
126 | * operation will fail. Only one operation structure per command | ||
127 | * identifier may be registered. | ||
128 | * | ||
129 | * See include/net/genetlink.h for more documenation on the operations | ||
130 | * structure. | ||
131 | * | ||
132 | * Returns 0 on success or a negative error code. | ||
133 | */ | ||
134 | int genl_register_ops(struct genl_family *family, struct genl_ops *ops) | ||
135 | { | ||
136 | int err = -EINVAL; | ||
137 | |||
138 | if (ops->dumpit == NULL && ops->doit == NULL) | ||
139 | goto errout; | ||
140 | |||
141 | if (genl_get_cmd(ops->cmd, family)) { | ||
142 | err = -EEXIST; | ||
143 | goto errout; | ||
144 | } | ||
145 | |||
146 | genl_lock(); | ||
147 | list_add_tail(&ops->ops_list, &family->ops_list); | ||
148 | genl_unlock(); | ||
149 | |||
150 | genl_ctrl_event(CTRL_CMD_NEWOPS, ops); | ||
151 | err = 0; | ||
152 | errout: | ||
153 | return err; | ||
154 | } | ||
155 | |||
156 | /** | ||
157 | * genl_unregister_ops - unregister generic netlink operations | ||
158 | * @family: generic netlink family | ||
159 | * @ops: operations to be unregistered | ||
160 | * | ||
161 | * Unregisters the specified operations and unassigns them from the | ||
162 | * specified family. The operation blocks until the current message | ||
163 | * processing has finished and doesn't start again until the | ||
164 | * unregister process has finished. | ||
165 | * | ||
166 | * Note: It is not necessary to unregister all operations before | ||
167 | * unregistering the family, unregistering the family will cause | ||
168 | * all assigned operations to be unregistered automatically. | ||
169 | * | ||
170 | * Returns 0 on success or a negative error code. | ||
171 | */ | ||
172 | int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops) | ||
173 | { | ||
174 | struct genl_ops *rc; | ||
175 | |||
176 | genl_lock(); | ||
177 | list_for_each_entry(rc, &family->ops_list, ops_list) { | ||
178 | if (rc == ops) { | ||
179 | list_del(&ops->ops_list); | ||
180 | genl_unlock(); | ||
181 | genl_ctrl_event(CTRL_CMD_DELOPS, ops); | ||
182 | return 0; | ||
183 | } | ||
184 | } | ||
185 | genl_unlock(); | ||
186 | |||
187 | return -ENOENT; | ||
188 | } | ||
189 | |||
190 | /** | ||
191 | * genl_register_family - register a generic netlink family | ||
192 | * @family: generic netlink family | ||
193 | * | ||
194 | * Registers the specified family after validating it first. Only one | ||
195 | * family may be registered with the same family name or identifier. | ||
196 | * The family id may equal GENL_ID_GENERATE causing an unique id to | ||
197 | * be automatically generated and assigned. | ||
198 | * | ||
199 | * Return 0 on success or a negative error code. | ||
200 | */ | ||
201 | int genl_register_family(struct genl_family *family) | ||
202 | { | ||
203 | int err = -EINVAL; | ||
204 | |||
205 | if (family->id && family->id < GENL_MIN_ID) | ||
206 | goto errout; | ||
207 | |||
208 | if (family->id > GENL_MAX_ID) | ||
209 | goto errout; | ||
210 | |||
211 | INIT_LIST_HEAD(&family->ops_list); | ||
212 | |||
213 | genl_lock(); | ||
214 | |||
215 | if (genl_family_find_byname(family->name)) { | ||
216 | err = -EEXIST; | ||
217 | goto errout_locked; | ||
218 | } | ||
219 | |||
220 | if (genl_family_find_byid(family->id)) { | ||
221 | err = -EEXIST; | ||
222 | goto errout_locked; | ||
223 | } | ||
224 | |||
225 | if (!try_module_get(family->owner)) { | ||
226 | err = -EBUSY; | ||
227 | goto errout_locked; | ||
228 | } | ||
229 | |||
230 | if (family->id == GENL_ID_GENERATE) { | ||
231 | u16 newid = genl_generate_id(); | ||
232 | |||
233 | if (!newid) { | ||
234 | err = -ENOMEM; | ||
235 | goto errout_locked; | ||
236 | } | ||
237 | |||
238 | family->id = newid; | ||
239 | } | ||
240 | |||
241 | if (family->maxattr) { | ||
242 | family->attrbuf = kmalloc((family->maxattr+1) * | ||
243 | sizeof(struct nlattr *), GFP_KERNEL); | ||
244 | if (family->attrbuf == NULL) { | ||
245 | err = -ENOMEM; | ||
246 | goto errout; | ||
247 | } | ||
248 | } else | ||
249 | family->attrbuf = NULL; | ||
250 | |||
251 | list_add_tail(&family->family_list, genl_family_chain(family->id)); | ||
252 | genl_unlock(); | ||
253 | |||
254 | genl_ctrl_event(CTRL_CMD_NEWFAMILY, family); | ||
255 | |||
256 | return 0; | ||
257 | |||
258 | errout_locked: | ||
259 | genl_unlock(); | ||
260 | errout: | ||
261 | return err; | ||
262 | } | ||
263 | |||
264 | /** | ||
265 | * genl_unregister_family - unregister generic netlink family | ||
266 | * @family: generic netlink family | ||
267 | * | ||
268 | * Unregisters the specified family. | ||
269 | * | ||
270 | * Returns 0 on success or a negative error code. | ||
271 | */ | ||
272 | int genl_unregister_family(struct genl_family *family) | ||
273 | { | ||
274 | struct genl_family *rc; | ||
275 | |||
276 | genl_lock(); | ||
277 | |||
278 | list_for_each_entry(rc, genl_family_chain(family->id), family_list) { | ||
279 | if (family->id != rc->id || strcmp(rc->name, family->name)) | ||
280 | continue; | ||
281 | |||
282 | list_del(&rc->family_list); | ||
283 | INIT_LIST_HEAD(&family->ops_list); | ||
284 | genl_unlock(); | ||
285 | |||
286 | module_put(family->owner); | ||
287 | kfree(family->attrbuf); | ||
288 | genl_ctrl_event(CTRL_CMD_DELFAMILY, family); | ||
289 | return 0; | ||
290 | } | ||
291 | |||
292 | genl_unlock(); | ||
293 | |||
294 | return -ENOENT; | ||
295 | } | ||
296 | |||
297 | static inline int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, | ||
298 | int *errp) | ||
299 | { | ||
300 | struct genl_ops *ops; | ||
301 | struct genl_family *family; | ||
302 | struct genl_info info; | ||
303 | struct genlmsghdr *hdr = nlmsg_data(nlh); | ||
304 | int hdrlen, err = -EINVAL; | ||
305 | |||
306 | if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) | ||
307 | goto ignore; | ||
308 | |||
309 | if (nlh->nlmsg_type < NLMSG_MIN_TYPE) | ||
310 | goto ignore; | ||
311 | |||
312 | family = genl_family_find_byid(nlh->nlmsg_type); | ||
313 | if (family == NULL) { | ||
314 | err = -ENOENT; | ||
315 | goto errout; | ||
316 | } | ||
317 | |||
318 | hdrlen = GENL_HDRLEN + family->hdrsize; | ||
319 | if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) | ||
320 | goto errout; | ||
321 | |||
322 | ops = genl_get_cmd(hdr->cmd, family); | ||
323 | if (ops == NULL) { | ||
324 | err = -EOPNOTSUPP; | ||
325 | goto errout; | ||
326 | } | ||
327 | |||
328 | if ((ops->flags & GENL_ADMIN_PERM) && security_netlink_recv(skb)) { | ||
329 | err = -EPERM; | ||
330 | goto errout; | ||
331 | } | ||
332 | |||
333 | if (nlh->nlmsg_flags & NLM_F_DUMP) { | ||
334 | if (ops->dumpit == NULL) { | ||
335 | err = -EOPNOTSUPP; | ||
336 | goto errout; | ||
337 | } | ||
338 | |||
339 | *errp = err = netlink_dump_start(genl_sock, skb, nlh, | ||
340 | ops->dumpit, NULL); | ||
341 | if (err == 0) | ||
342 | skb_pull(skb, min(NLMSG_ALIGN(nlh->nlmsg_len), | ||
343 | skb->len)); | ||
344 | return -1; | ||
345 | } | ||
346 | |||
347 | if (ops->doit == NULL) { | ||
348 | err = -EOPNOTSUPP; | ||
349 | goto errout; | ||
350 | } | ||
351 | |||
352 | if (family->attrbuf) { | ||
353 | err = nlmsg_parse(nlh, hdrlen, family->attrbuf, family->maxattr, | ||
354 | ops->policy); | ||
355 | if (err < 0) | ||
356 | goto errout; | ||
357 | } | ||
358 | |||
359 | info.snd_seq = nlh->nlmsg_seq; | ||
360 | info.snd_pid = NETLINK_CB(skb).pid; | ||
361 | info.nlhdr = nlh; | ||
362 | info.genlhdr = nlmsg_data(nlh); | ||
363 | info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; | ||
364 | info.attrs = family->attrbuf; | ||
365 | |||
366 | *errp = err = ops->doit(skb, &info); | ||
367 | return err; | ||
368 | |||
369 | ignore: | ||
370 | return 0; | ||
371 | |||
372 | errout: | ||
373 | *errp = err; | ||
374 | return -1; | ||
375 | } | ||
376 | |||
377 | static void genl_rcv(struct sock *sk, int len) | ||
378 | { | ||
379 | unsigned int qlen = 0; | ||
380 | |||
381 | do { | ||
382 | if (genl_trylock()) | ||
383 | return; | ||
384 | netlink_run_queue(sk, &qlen, &genl_rcv_msg); | ||
385 | genl_unlock(); | ||
386 | } while (qlen && genl_sock && genl_sock->sk_receive_queue.qlen); | ||
387 | } | ||
388 | |||
389 | /************************************************************************** | ||
390 | * Controller | ||
391 | **************************************************************************/ | ||
392 | |||
393 | static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, | ||
394 | u32 flags, struct sk_buff *skb, u8 cmd) | ||
395 | { | ||
396 | void *hdr; | ||
397 | |||
398 | hdr = genlmsg_put(skb, pid, seq, GENL_ID_CTRL, 0, flags, cmd, | ||
399 | family->version); | ||
400 | if (hdr == NULL) | ||
401 | return -1; | ||
402 | |||
403 | NLA_PUT_STRING(skb, CTRL_ATTR_FAMILY_NAME, family->name); | ||
404 | NLA_PUT_U16(skb, CTRL_ATTR_FAMILY_ID, family->id); | ||
405 | |||
406 | return genlmsg_end(skb, hdr); | ||
407 | |||
408 | nla_put_failure: | ||
409 | return genlmsg_cancel(skb, hdr); | ||
410 | } | ||
411 | |||
412 | static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) | ||
413 | { | ||
414 | |||
415 | int i, n = 0; | ||
416 | struct genl_family *rt; | ||
417 | int chains_to_skip = cb->args[0]; | ||
418 | int fams_to_skip = cb->args[1]; | ||
419 | |||
420 | for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { | ||
421 | if (i < chains_to_skip) | ||
422 | continue; | ||
423 | n = 0; | ||
424 | list_for_each_entry(rt, genl_family_chain(i), family_list) { | ||
425 | if (++n < fams_to_skip) | ||
426 | continue; | ||
427 | if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).pid, | ||
428 | cb->nlh->nlmsg_seq, NLM_F_MULTI, | ||
429 | skb, CTRL_CMD_NEWFAMILY) < 0) | ||
430 | goto errout; | ||
431 | } | ||
432 | |||
433 | fams_to_skip = 0; | ||
434 | } | ||
435 | |||
436 | errout: | ||
437 | cb->args[0] = i; | ||
438 | cb->args[1] = n; | ||
439 | |||
440 | return skb->len; | ||
441 | } | ||
442 | |||
443 | static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid, | ||
444 | int seq, int cmd) | ||
445 | { | ||
446 | struct sk_buff *skb; | ||
447 | int err; | ||
448 | |||
449 | skb = nlmsg_new(NLMSG_GOODSIZE); | ||
450 | if (skb == NULL) | ||
451 | return ERR_PTR(-ENOBUFS); | ||
452 | |||
453 | err = ctrl_fill_info(family, pid, seq, 0, skb, cmd); | ||
454 | if (err < 0) { | ||
455 | nlmsg_free(skb); | ||
456 | return ERR_PTR(err); | ||
457 | } | ||
458 | |||
459 | return skb; | ||
460 | } | ||
461 | |||
462 | static struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] __read_mostly = { | ||
463 | [CTRL_ATTR_FAMILY_ID] = { .type = NLA_U16 }, | ||
464 | [CTRL_ATTR_FAMILY_NAME] = { .type = NLA_STRING }, | ||
465 | }; | ||
466 | |||
467 | static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) | ||
468 | { | ||
469 | struct sk_buff *msg; | ||
470 | struct genl_family *res = NULL; | ||
471 | int err = -EINVAL; | ||
472 | |||
473 | if (info->attrs[CTRL_ATTR_FAMILY_ID]) { | ||
474 | u16 id = nla_get_u16(info->attrs[CTRL_ATTR_FAMILY_ID]); | ||
475 | res = genl_family_find_byid(id); | ||
476 | } | ||
477 | |||
478 | if (info->attrs[CTRL_ATTR_FAMILY_NAME]) { | ||
479 | char name[GENL_NAMSIZ]; | ||
480 | |||
481 | if (nla_strlcpy(name, info->attrs[CTRL_ATTR_FAMILY_NAME], | ||
482 | GENL_NAMSIZ) >= GENL_NAMSIZ) | ||
483 | goto errout; | ||
484 | |||
485 | res = genl_family_find_byname(name); | ||
486 | } | ||
487 | |||
488 | if (res == NULL) { | ||
489 | err = -ENOENT; | ||
490 | goto errout; | ||
491 | } | ||
492 | |||
493 | msg = ctrl_build_msg(res, info->snd_pid, info->snd_seq, | ||
494 | CTRL_CMD_NEWFAMILY); | ||
495 | if (IS_ERR(msg)) { | ||
496 | err = PTR_ERR(msg); | ||
497 | goto errout; | ||
498 | } | ||
499 | |||
500 | err = genlmsg_unicast(msg, info->snd_pid); | ||
501 | errout: | ||
502 | return err; | ||
503 | } | ||
504 | |||
505 | static int genl_ctrl_event(int event, void *data) | ||
506 | { | ||
507 | struct sk_buff *msg; | ||
508 | |||
509 | if (genl_sock == NULL) | ||
510 | return 0; | ||
511 | |||
512 | switch (event) { | ||
513 | case CTRL_CMD_NEWFAMILY: | ||
514 | case CTRL_CMD_DELFAMILY: | ||
515 | msg = ctrl_build_msg(data, 0, 0, event); | ||
516 | if (IS_ERR(msg)) | ||
517 | return PTR_ERR(msg); | ||
518 | |||
519 | genlmsg_multicast(msg, 0, GENL_ID_CTRL); | ||
520 | break; | ||
521 | } | ||
522 | |||
523 | return 0; | ||
524 | } | ||
525 | |||
526 | static struct genl_ops genl_ctrl_ops = { | ||
527 | .cmd = CTRL_CMD_GETFAMILY, | ||
528 | .doit = ctrl_getfamily, | ||
529 | .dumpit = ctrl_dumpfamily, | ||
530 | .policy = ctrl_policy, | ||
531 | }; | ||
532 | |||
533 | static struct genl_family genl_ctrl = { | ||
534 | .id = GENL_ID_CTRL, | ||
535 | .name = "nlctrl", | ||
536 | .version = 0x1, | ||
537 | .maxattr = CTRL_ATTR_MAX, | ||
538 | .owner = THIS_MODULE, | ||
539 | }; | ||
540 | |||
541 | static int __init genl_init(void) | ||
542 | { | ||
543 | int i, err; | ||
544 | |||
545 | for (i = 0; i < GENL_FAM_TAB_SIZE; i++) | ||
546 | INIT_LIST_HEAD(&family_ht[i]); | ||
547 | |||
548 | err = genl_register_family(&genl_ctrl); | ||
549 | if (err < 0) | ||
550 | goto errout; | ||
551 | |||
552 | err = genl_register_ops(&genl_ctrl, &genl_ctrl_ops); | ||
553 | if (err < 0) | ||
554 | goto errout_register; | ||
555 | |||
556 | netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV); | ||
557 | genl_sock = netlink_kernel_create(NETLINK_GENERIC, GENL_MAX_ID, | ||
558 | genl_rcv, THIS_MODULE); | ||
559 | if (genl_sock == NULL) { | ||
560 | panic("GENL: Cannot initialize generic netlink\n"); | ||
561 | return -ENOMEM; | ||
562 | } | ||
563 | |||
564 | return 0; | ||
565 | |||
566 | errout_register: | ||
567 | genl_unregister_family(&genl_ctrl); | ||
568 | errout: | ||
569 | panic("GENL: Cannot register controller: %d\n", err); | ||
570 | return err; | ||
571 | } | ||
572 | |||
573 | subsys_initcall(genl_init); | ||
574 | |||
575 | EXPORT_SYMBOL(genl_sock); | ||
576 | EXPORT_SYMBOL(genl_register_ops); | ||
577 | EXPORT_SYMBOL(genl_unregister_ops); | ||
578 | EXPORT_SYMBOL(genl_register_family); | ||
579 | EXPORT_SYMBOL(genl_unregister_family); | ||
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index c35336a0f71b..0cdd9a07e043 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c | |||
@@ -18,7 +18,6 @@ | |||
18 | #include <linux/string.h> | 18 | #include <linux/string.h> |
19 | #include <linux/net.h> | 19 | #include <linux/net.h> |
20 | #include <linux/skbuff.h> | 20 | #include <linux/skbuff.h> |
21 | #include <linux/netlink.h> | ||
22 | #include <linux/rtnetlink.h> | 21 | #include <linux/rtnetlink.h> |
23 | #include <linux/pfkeyv2.h> | 22 | #include <linux/pfkeyv2.h> |
24 | #include <linux/ipsec.h> | 23 | #include <linux/ipsec.h> |
@@ -26,6 +25,7 @@ | |||
26 | #include <linux/security.h> | 25 | #include <linux/security.h> |
27 | #include <net/sock.h> | 26 | #include <net/sock.h> |
28 | #include <net/xfrm.h> | 27 | #include <net/xfrm.h> |
28 | #include <net/netlink.h> | ||
29 | #include <asm/uaccess.h> | 29 | #include <asm/uaccess.h> |
30 | 30 | ||
31 | static struct sock *xfrm_nl; | 31 | static struct sock *xfrm_nl; |
@@ -948,11 +948,6 @@ static struct xfrm_link { | |||
948 | [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_flush_policy }, | 948 | [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_flush_policy }, |
949 | }; | 949 | }; |
950 | 950 | ||
951 | static int xfrm_done(struct netlink_callback *cb) | ||
952 | { | ||
953 | return 0; | ||
954 | } | ||
955 | |||
956 | static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) | 951 | static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) |
957 | { | 952 | { |
958 | struct rtattr *xfrma[XFRMA_MAX]; | 953 | struct rtattr *xfrma[XFRMA_MAX]; |
@@ -984,20 +979,15 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err | |||
984 | if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) || | 979 | if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) || |
985 | type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) && | 980 | type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) && |
986 | (nlh->nlmsg_flags & NLM_F_DUMP)) { | 981 | (nlh->nlmsg_flags & NLM_F_DUMP)) { |
987 | u32 rlen; | ||
988 | |||
989 | if (link->dump == NULL) | 982 | if (link->dump == NULL) |
990 | goto err_einval; | 983 | goto err_einval; |
991 | 984 | ||
992 | if ((*errp = netlink_dump_start(xfrm_nl, skb, nlh, | 985 | if ((*errp = netlink_dump_start(xfrm_nl, skb, nlh, |
993 | link->dump, | 986 | link->dump, NULL)) != 0) { |
994 | xfrm_done)) != 0) { | ||
995 | return -1; | 987 | return -1; |
996 | } | 988 | } |
997 | rlen = NLMSG_ALIGN(nlh->nlmsg_len); | 989 | |
998 | if (rlen > skb->len) | 990 | netlink_queue_skip(nlh, skb); |
999 | rlen = skb->len; | ||
1000 | skb_pull(skb, rlen); | ||
1001 | return -1; | 991 | return -1; |
1002 | } | 992 | } |
1003 | 993 | ||
@@ -1032,60 +1022,13 @@ err_einval: | |||
1032 | return -1; | 1022 | return -1; |
1033 | } | 1023 | } |
1034 | 1024 | ||
1035 | static int xfrm_user_rcv_skb(struct sk_buff *skb) | ||
1036 | { | ||
1037 | int err; | ||
1038 | struct nlmsghdr *nlh; | ||
1039 | |||
1040 | while (skb->len >= NLMSG_SPACE(0)) { | ||
1041 | u32 rlen; | ||
1042 | |||
1043 | nlh = (struct nlmsghdr *) skb->data; | ||
1044 | if (nlh->nlmsg_len < sizeof(*nlh) || | ||
1045 | skb->len < nlh->nlmsg_len) | ||
1046 | return 0; | ||
1047 | rlen = NLMSG_ALIGN(nlh->nlmsg_len); | ||
1048 | if (rlen > skb->len) | ||
1049 | rlen = skb->len; | ||
1050 | if (xfrm_user_rcv_msg(skb, nlh, &err) < 0) { | ||
1051 | if (err == 0) | ||
1052 | return -1; | ||
1053 | netlink_ack(skb, nlh, err); | ||
1054 | } else if (nlh->nlmsg_flags & NLM_F_ACK) | ||
1055 | netlink_ack(skb, nlh, 0); | ||
1056 | skb_pull(skb, rlen); | ||
1057 | } | ||
1058 | |||
1059 | return 0; | ||
1060 | } | ||
1061 | |||
1062 | static void xfrm_netlink_rcv(struct sock *sk, int len) | 1025 | static void xfrm_netlink_rcv(struct sock *sk, int len) |
1063 | { | 1026 | { |
1064 | unsigned int qlen = skb_queue_len(&sk->sk_receive_queue); | 1027 | unsigned int qlen = 0; |
1065 | 1028 | ||
1066 | do { | 1029 | do { |
1067 | struct sk_buff *skb; | ||
1068 | |||
1069 | down(&xfrm_cfg_sem); | 1030 | down(&xfrm_cfg_sem); |
1070 | 1031 | netlink_run_queue(sk, &qlen, &xfrm_user_rcv_msg); | |
1071 | if (qlen > skb_queue_len(&sk->sk_receive_queue)) | ||
1072 | qlen = skb_queue_len(&sk->sk_receive_queue); | ||
1073 | |||
1074 | for (; qlen; qlen--) { | ||
1075 | skb = skb_dequeue(&sk->sk_receive_queue); | ||
1076 | if (xfrm_user_rcv_skb(skb)) { | ||
1077 | if (skb->len) | ||
1078 | skb_queue_head(&sk->sk_receive_queue, | ||
1079 | skb); | ||
1080 | else { | ||
1081 | kfree_skb(skb); | ||
1082 | qlen--; | ||
1083 | } | ||
1084 | break; | ||
1085 | } | ||
1086 | kfree_skb(skb); | ||
1087 | } | ||
1088 | |||
1089 | up(&xfrm_cfg_sem); | 1032 | up(&xfrm_cfg_sem); |
1090 | 1033 | ||
1091 | } while (qlen); | 1034 | } while (qlen); |