diff options
author | Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> | 2009-11-06 03:43:42 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-11-06 03:43:42 -0500 |
commit | f9dd09c7f7199685601d75882447a6598be8a3e0 (patch) | |
tree | 98ab4a75ec6c74cdb4aa807c491002ba33de56c5 /net | |
parent | f5209b4446d185cc95f46363f8043a743530c15a (diff) |
netfilter: nf_nat: fix NAT issue in 2.6.30.4+
Vitezslav Samel discovered that since 2.6.30.4+ active FTP can not work
over NAT. The "cause" of the problem was a fix of unacknowledged data
detection with NAT (commit a3a9f79e361e864f0e9d75ebe2a0cb43d17c4272).
However, actually, that fix uncovered a long standing bug in TCP conntrack:
when NAT was enabled, we simply updated the max of the right edge of
the segments we have seen (td_end), by the offset NAT produced with
changing IP/port in the data. However, we did not update the other parameter
(td_maxend) which is affected by the NAT offset. Thus that could drift
away from the correct value and thus resulted breaking active FTP.
The patch below fixes the issue by *not* updating the conntrack parameters
from NAT, but instead taking into account the NAT offsets in conntrack in a
consistent way. (Updating from NAT would be more harder and expensive because
it'd need to re-calculate parameters we already calculated in conntrack.)
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/netfilter/nf_nat_core.c | 3 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_nat_helper.c | 34 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_core.c | 8 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_proto_tcp.c | 64 |
4 files changed, 60 insertions, 49 deletions
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 68afc6ecd343..fe1a64479dd0 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c | |||
@@ -750,6 +750,8 @@ static int __init nf_nat_init(void) | |||
750 | BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); | 750 | BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); |
751 | rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, | 751 | rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, |
752 | nfnetlink_parse_nat_setup); | 752 | nfnetlink_parse_nat_setup); |
753 | BUG_ON(nf_ct_nat_offset != NULL); | ||
754 | rcu_assign_pointer(nf_ct_nat_offset, nf_nat_get_offset); | ||
753 | return 0; | 755 | return 0; |
754 | 756 | ||
755 | cleanup_extend: | 757 | cleanup_extend: |
@@ -764,6 +766,7 @@ static void __exit nf_nat_cleanup(void) | |||
764 | nf_ct_extend_unregister(&nat_extend); | 766 | nf_ct_extend_unregister(&nat_extend); |
765 | rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL); | 767 | rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL); |
766 | rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL); | 768 | rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL); |
769 | rcu_assign_pointer(nf_ct_nat_offset, NULL); | ||
767 | synchronize_net(); | 770 | synchronize_net(); |
768 | } | 771 | } |
769 | 772 | ||
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 09172a65d9b6..f9520fa3aba9 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c | |||
@@ -73,6 +73,28 @@ adjust_tcp_sequence(u32 seq, | |||
73 | DUMP_OFFSET(this_way); | 73 | DUMP_OFFSET(this_way); |
74 | } | 74 | } |
75 | 75 | ||
76 | /* Get the offset value, for conntrack */ | ||
77 | s16 nf_nat_get_offset(const struct nf_conn *ct, | ||
78 | enum ip_conntrack_dir dir, | ||
79 | u32 seq) | ||
80 | { | ||
81 | struct nf_conn_nat *nat = nfct_nat(ct); | ||
82 | struct nf_nat_seq *this_way; | ||
83 | s16 offset; | ||
84 | |||
85 | if (!nat) | ||
86 | return 0; | ||
87 | |||
88 | this_way = &nat->seq[dir]; | ||
89 | spin_lock_bh(&nf_nat_seqofs_lock); | ||
90 | offset = after(seq, this_way->correction_pos) | ||
91 | ? this_way->offset_after : this_way->offset_before; | ||
92 | spin_unlock_bh(&nf_nat_seqofs_lock); | ||
93 | |||
94 | return offset; | ||
95 | } | ||
96 | EXPORT_SYMBOL_GPL(nf_nat_get_offset); | ||
97 | |||
76 | /* Frobs data inside this packet, which is linear. */ | 98 | /* Frobs data inside this packet, which is linear. */ |
77 | static void mangle_contents(struct sk_buff *skb, | 99 | static void mangle_contents(struct sk_buff *skb, |
78 | unsigned int dataoff, | 100 | unsigned int dataoff, |
@@ -189,11 +211,6 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, | |||
189 | adjust_tcp_sequence(ntohl(tcph->seq), | 211 | adjust_tcp_sequence(ntohl(tcph->seq), |
190 | (int)rep_len - (int)match_len, | 212 | (int)rep_len - (int)match_len, |
191 | ct, ctinfo); | 213 | ct, ctinfo); |
192 | /* Tell TCP window tracking about seq change */ | ||
193 | nf_conntrack_tcp_update(skb, ip_hdrlen(skb), | ||
194 | ct, CTINFO2DIR(ctinfo), | ||
195 | (int)rep_len - (int)match_len); | ||
196 | |||
197 | nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); | 214 | nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); |
198 | } | 215 | } |
199 | return 1; | 216 | return 1; |
@@ -415,12 +432,7 @@ nf_nat_seq_adjust(struct sk_buff *skb, | |||
415 | tcph->seq = newseq; | 432 | tcph->seq = newseq; |
416 | tcph->ack_seq = newack; | 433 | tcph->ack_seq = newack; |
417 | 434 | ||
418 | if (!nf_nat_sack_adjust(skb, tcph, ct, ctinfo)) | 435 | return nf_nat_sack_adjust(skb, tcph, ct, ctinfo); |
419 | return 0; | ||
420 | |||
421 | nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir, seqoff); | ||
422 | |||
423 | return 1; | ||
424 | } | 436 | } |
425 | 437 | ||
426 | /* Setup NAT on this expected conntrack so it follows master. */ | 438 | /* Setup NAT on this expected conntrack so it follows master. */ |
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7c9ec3dee96e..0cdfb388a191 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c | |||
@@ -1350,6 +1350,11 @@ err_stat: | |||
1350 | return ret; | 1350 | return ret; |
1351 | } | 1351 | } |
1352 | 1352 | ||
1353 | s16 (*nf_ct_nat_offset)(const struct nf_conn *ct, | ||
1354 | enum ip_conntrack_dir dir, | ||
1355 | u32 seq); | ||
1356 | EXPORT_SYMBOL_GPL(nf_ct_nat_offset); | ||
1357 | |||
1353 | int nf_conntrack_init(struct net *net) | 1358 | int nf_conntrack_init(struct net *net) |
1354 | { | 1359 | { |
1355 | int ret; | 1360 | int ret; |
@@ -1367,6 +1372,9 @@ int nf_conntrack_init(struct net *net) | |||
1367 | /* For use by REJECT target */ | 1372 | /* For use by REJECT target */ |
1368 | rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach); | 1373 | rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach); |
1369 | rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); | 1374 | rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); |
1375 | |||
1376 | /* Howto get NAT offsets */ | ||
1377 | rcu_assign_pointer(nf_ct_nat_offset, NULL); | ||
1370 | } | 1378 | } |
1371 | return 0; | 1379 | return 0; |
1372 | 1380 | ||
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 97a82ba75376..ba2b76937283 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c | |||
@@ -492,6 +492,21 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, | |||
492 | } | 492 | } |
493 | } | 493 | } |
494 | 494 | ||
495 | #ifdef CONFIG_NF_NAT_NEEDED | ||
496 | static inline s16 nat_offset(const struct nf_conn *ct, | ||
497 | enum ip_conntrack_dir dir, | ||
498 | u32 seq) | ||
499 | { | ||
500 | typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset); | ||
501 | |||
502 | return get_offset != NULL ? get_offset(ct, dir, seq) : 0; | ||
503 | } | ||
504 | #define NAT_OFFSET(pf, ct, dir, seq) \ | ||
505 | (pf == NFPROTO_IPV4 ? nat_offset(ct, dir, seq) : 0) | ||
506 | #else | ||
507 | #define NAT_OFFSET(pf, ct, dir, seq) 0 | ||
508 | #endif | ||
509 | |||
495 | static bool tcp_in_window(const struct nf_conn *ct, | 510 | static bool tcp_in_window(const struct nf_conn *ct, |
496 | struct ip_ct_tcp *state, | 511 | struct ip_ct_tcp *state, |
497 | enum ip_conntrack_dir dir, | 512 | enum ip_conntrack_dir dir, |
@@ -506,6 +521,7 @@ static bool tcp_in_window(const struct nf_conn *ct, | |||
506 | struct ip_ct_tcp_state *receiver = &state->seen[!dir]; | 521 | struct ip_ct_tcp_state *receiver = &state->seen[!dir]; |
507 | const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; | 522 | const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; |
508 | __u32 seq, ack, sack, end, win, swin; | 523 | __u32 seq, ack, sack, end, win, swin; |
524 | s16 receiver_offset; | ||
509 | bool res; | 525 | bool res; |
510 | 526 | ||
511 | /* | 527 | /* |
@@ -519,11 +535,16 @@ static bool tcp_in_window(const struct nf_conn *ct, | |||
519 | if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) | 535 | if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) |
520 | tcp_sack(skb, dataoff, tcph, &sack); | 536 | tcp_sack(skb, dataoff, tcph, &sack); |
521 | 537 | ||
538 | /* Take into account NAT sequence number mangling */ | ||
539 | receiver_offset = NAT_OFFSET(pf, ct, !dir, ack - 1); | ||
540 | ack -= receiver_offset; | ||
541 | sack -= receiver_offset; | ||
542 | |||
522 | pr_debug("tcp_in_window: START\n"); | 543 | pr_debug("tcp_in_window: START\n"); |
523 | pr_debug("tcp_in_window: "); | 544 | pr_debug("tcp_in_window: "); |
524 | nf_ct_dump_tuple(tuple); | 545 | nf_ct_dump_tuple(tuple); |
525 | pr_debug("seq=%u ack=%u sack=%u win=%u end=%u\n", | 546 | pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", |
526 | seq, ack, sack, win, end); | 547 | seq, ack, receiver_offset, sack, receiver_offset, win, end); |
527 | pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " | 548 | pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " |
528 | "receiver end=%u maxend=%u maxwin=%u scale=%i\n", | 549 | "receiver end=%u maxend=%u maxwin=%u scale=%i\n", |
529 | sender->td_end, sender->td_maxend, sender->td_maxwin, | 550 | sender->td_end, sender->td_maxend, sender->td_maxwin, |
@@ -613,8 +634,8 @@ static bool tcp_in_window(const struct nf_conn *ct, | |||
613 | 634 | ||
614 | pr_debug("tcp_in_window: "); | 635 | pr_debug("tcp_in_window: "); |
615 | nf_ct_dump_tuple(tuple); | 636 | nf_ct_dump_tuple(tuple); |
616 | pr_debug("seq=%u ack=%u sack =%u win=%u end=%u\n", | 637 | pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", |
617 | seq, ack, sack, win, end); | 638 | seq, ack, receiver_offset, sack, receiver_offset, win, end); |
618 | pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " | 639 | pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " |
619 | "receiver end=%u maxend=%u maxwin=%u scale=%i\n", | 640 | "receiver end=%u maxend=%u maxwin=%u scale=%i\n", |
620 | sender->td_end, sender->td_maxend, sender->td_maxwin, | 641 | sender->td_end, sender->td_maxend, sender->td_maxwin, |
@@ -700,7 +721,7 @@ static bool tcp_in_window(const struct nf_conn *ct, | |||
700 | before(seq, sender->td_maxend + 1) ? | 721 | before(seq, sender->td_maxend + 1) ? |
701 | after(end, sender->td_end - receiver->td_maxwin - 1) ? | 722 | after(end, sender->td_end - receiver->td_maxwin - 1) ? |
702 | before(sack, receiver->td_end + 1) ? | 723 | before(sack, receiver->td_end + 1) ? |
703 | after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG" | 724 | after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG" |
704 | : "ACK is under the lower bound (possible overly delayed ACK)" | 725 | : "ACK is under the lower bound (possible overly delayed ACK)" |
705 | : "ACK is over the upper bound (ACKed data not seen yet)" | 726 | : "ACK is over the upper bound (ACKed data not seen yet)" |
706 | : "SEQ is under the lower bound (already ACKed data retransmitted)" | 727 | : "SEQ is under the lower bound (already ACKed data retransmitted)" |
@@ -715,39 +736,6 @@ static bool tcp_in_window(const struct nf_conn *ct, | |||
715 | return res; | 736 | return res; |
716 | } | 737 | } |
717 | 738 | ||
718 | #ifdef CONFIG_NF_NAT_NEEDED | ||
719 | /* Update sender->td_end after NAT successfully mangled the packet */ | ||
720 | /* Caller must linearize skb at tcp header. */ | ||
721 | void nf_conntrack_tcp_update(const struct sk_buff *skb, | ||
722 | unsigned int dataoff, | ||
723 | struct nf_conn *ct, int dir, | ||
724 | s16 offset) | ||
725 | { | ||
726 | const struct tcphdr *tcph = (const void *)skb->data + dataoff; | ||
727 | const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[dir]; | ||
728 | const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[!dir]; | ||
729 | __u32 end; | ||
730 | |||
731 | end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph); | ||
732 | |||
733 | spin_lock_bh(&ct->lock); | ||
734 | /* | ||
735 | * We have to worry for the ack in the reply packet only... | ||
736 | */ | ||
737 | if (ct->proto.tcp.seen[dir].td_end + offset == end) | ||
738 | ct->proto.tcp.seen[dir].td_end = end; | ||
739 | ct->proto.tcp.last_end = end; | ||
740 | spin_unlock_bh(&ct->lock); | ||
741 | pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " | ||
742 | "receiver end=%u maxend=%u maxwin=%u scale=%i\n", | ||
743 | sender->td_end, sender->td_maxend, sender->td_maxwin, | ||
744 | sender->td_scale, | ||
745 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin, | ||
746 | receiver->td_scale); | ||
747 | } | ||
748 | EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update); | ||
749 | #endif | ||
750 | |||
751 | #define TH_FIN 0x01 | 739 | #define TH_FIN 0x01 |
752 | #define TH_SYN 0x02 | 740 | #define TH_SYN 0x02 |
753 | #define TH_RST 0x04 | 741 | #define TH_RST 0x04 |