aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorJozsef Kadlecsik <kadlec@blackhole.kfki.hu>2009-11-06 03:43:42 -0500
committerDavid S. Miller <davem@davemloft.net>2009-11-06 03:43:42 -0500
commitf9dd09c7f7199685601d75882447a6598be8a3e0 (patch)
tree98ab4a75ec6c74cdb4aa807c491002ba33de56c5 /net
parentf5209b4446d185cc95f46363f8043a743530c15a (diff)
netfilter: nf_nat: fix NAT issue in 2.6.30.4+
Vitezslav Samel discovered that since 2.6.30.4+ active FTP can not work over NAT. The "cause" of the problem was a fix of unacknowledged data detection with NAT (commit a3a9f79e361e864f0e9d75ebe2a0cb43d17c4272). However, actually, that fix uncovered a long standing bug in TCP conntrack: when NAT was enabled, we simply updated the max of the right edge of the segments we have seen (td_end), by the offset NAT produced with changing IP/port in the data. However, we did not update the other parameter (td_maxend) which is affected by the NAT offset. Thus that could drift away from the correct value and thus resulted breaking active FTP. The patch below fixes the issue by *not* updating the conntrack parameters from NAT, but instead taking into account the NAT offsets in conntrack in a consistent way. (Updating from NAT would be more harder and expensive because it'd need to re-calculate parameters we already calculated in conntrack.) Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> Signed-off-by: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c3
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c34
-rw-r--r--net/netfilter/nf_conntrack_core.c8
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c64
4 files changed, 60 insertions, 49 deletions
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 68afc6ecd343..fe1a64479dd0 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -750,6 +750,8 @@ static int __init nf_nat_init(void)
750 BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); 750 BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
751 rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, 751 rcu_assign_pointer(nfnetlink_parse_nat_setup_hook,
752 nfnetlink_parse_nat_setup); 752 nfnetlink_parse_nat_setup);
753 BUG_ON(nf_ct_nat_offset != NULL);
754 rcu_assign_pointer(nf_ct_nat_offset, nf_nat_get_offset);
753 return 0; 755 return 0;
754 756
755 cleanup_extend: 757 cleanup_extend:
@@ -764,6 +766,7 @@ static void __exit nf_nat_cleanup(void)
764 nf_ct_extend_unregister(&nat_extend); 766 nf_ct_extend_unregister(&nat_extend);
765 rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL); 767 rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL);
766 rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL); 768 rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL);
769 rcu_assign_pointer(nf_ct_nat_offset, NULL);
767 synchronize_net(); 770 synchronize_net();
768} 771}
769 772
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 09172a65d9b6..f9520fa3aba9 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -73,6 +73,28 @@ adjust_tcp_sequence(u32 seq,
73 DUMP_OFFSET(this_way); 73 DUMP_OFFSET(this_way);
74} 74}
75 75
76/* Get the offset value, for conntrack */
77s16 nf_nat_get_offset(const struct nf_conn *ct,
78 enum ip_conntrack_dir dir,
79 u32 seq)
80{
81 struct nf_conn_nat *nat = nfct_nat(ct);
82 struct nf_nat_seq *this_way;
83 s16 offset;
84
85 if (!nat)
86 return 0;
87
88 this_way = &nat->seq[dir];
89 spin_lock_bh(&nf_nat_seqofs_lock);
90 offset = after(seq, this_way->correction_pos)
91 ? this_way->offset_after : this_way->offset_before;
92 spin_unlock_bh(&nf_nat_seqofs_lock);
93
94 return offset;
95}
96EXPORT_SYMBOL_GPL(nf_nat_get_offset);
97
76/* Frobs data inside this packet, which is linear. */ 98/* Frobs data inside this packet, which is linear. */
77static void mangle_contents(struct sk_buff *skb, 99static void mangle_contents(struct sk_buff *skb,
78 unsigned int dataoff, 100 unsigned int dataoff,
@@ -189,11 +211,6 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb,
189 adjust_tcp_sequence(ntohl(tcph->seq), 211 adjust_tcp_sequence(ntohl(tcph->seq),
190 (int)rep_len - (int)match_len, 212 (int)rep_len - (int)match_len,
191 ct, ctinfo); 213 ct, ctinfo);
192 /* Tell TCP window tracking about seq change */
193 nf_conntrack_tcp_update(skb, ip_hdrlen(skb),
194 ct, CTINFO2DIR(ctinfo),
195 (int)rep_len - (int)match_len);
196
197 nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); 214 nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
198 } 215 }
199 return 1; 216 return 1;
@@ -415,12 +432,7 @@ nf_nat_seq_adjust(struct sk_buff *skb,
415 tcph->seq = newseq; 432 tcph->seq = newseq;
416 tcph->ack_seq = newack; 433 tcph->ack_seq = newack;
417 434
418 if (!nf_nat_sack_adjust(skb, tcph, ct, ctinfo)) 435 return nf_nat_sack_adjust(skb, tcph, ct, ctinfo);
419 return 0;
420
421 nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir, seqoff);
422
423 return 1;
424} 436}
425 437
426/* Setup NAT on this expected conntrack so it follows master. */ 438/* Setup NAT on this expected conntrack so it follows master. */
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 7c9ec3dee96e..0cdfb388a191 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1350,6 +1350,11 @@ err_stat:
1350 return ret; 1350 return ret;
1351} 1351}
1352 1352
1353s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
1354 enum ip_conntrack_dir dir,
1355 u32 seq);
1356EXPORT_SYMBOL_GPL(nf_ct_nat_offset);
1357
1353int nf_conntrack_init(struct net *net) 1358int nf_conntrack_init(struct net *net)
1354{ 1359{
1355 int ret; 1360 int ret;
@@ -1367,6 +1372,9 @@ int nf_conntrack_init(struct net *net)
1367 /* For use by REJECT target */ 1372 /* For use by REJECT target */
1368 rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach); 1373 rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach);
1369 rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); 1374 rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
1375
1376 /* Howto get NAT offsets */
1377 rcu_assign_pointer(nf_ct_nat_offset, NULL);
1370 } 1378 }
1371 return 0; 1379 return 0;
1372 1380
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 97a82ba75376..ba2b76937283 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -492,6 +492,21 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
492 } 492 }
493} 493}
494 494
495#ifdef CONFIG_NF_NAT_NEEDED
496static inline s16 nat_offset(const struct nf_conn *ct,
497 enum ip_conntrack_dir dir,
498 u32 seq)
499{
500 typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset);
501
502 return get_offset != NULL ? get_offset(ct, dir, seq) : 0;
503}
504#define NAT_OFFSET(pf, ct, dir, seq) \
505 (pf == NFPROTO_IPV4 ? nat_offset(ct, dir, seq) : 0)
506#else
507#define NAT_OFFSET(pf, ct, dir, seq) 0
508#endif
509
495static bool tcp_in_window(const struct nf_conn *ct, 510static bool tcp_in_window(const struct nf_conn *ct,
496 struct ip_ct_tcp *state, 511 struct ip_ct_tcp *state,
497 enum ip_conntrack_dir dir, 512 enum ip_conntrack_dir dir,
@@ -506,6 +521,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
506 struct ip_ct_tcp_state *receiver = &state->seen[!dir]; 521 struct ip_ct_tcp_state *receiver = &state->seen[!dir];
507 const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; 522 const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
508 __u32 seq, ack, sack, end, win, swin; 523 __u32 seq, ack, sack, end, win, swin;
524 s16 receiver_offset;
509 bool res; 525 bool res;
510 526
511 /* 527 /*
@@ -519,11 +535,16 @@ static bool tcp_in_window(const struct nf_conn *ct,
519 if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) 535 if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
520 tcp_sack(skb, dataoff, tcph, &sack); 536 tcp_sack(skb, dataoff, tcph, &sack);
521 537
538 /* Take into account NAT sequence number mangling */
539 receiver_offset = NAT_OFFSET(pf, ct, !dir, ack - 1);
540 ack -= receiver_offset;
541 sack -= receiver_offset;
542
522 pr_debug("tcp_in_window: START\n"); 543 pr_debug("tcp_in_window: START\n");
523 pr_debug("tcp_in_window: "); 544 pr_debug("tcp_in_window: ");
524 nf_ct_dump_tuple(tuple); 545 nf_ct_dump_tuple(tuple);
525 pr_debug("seq=%u ack=%u sack=%u win=%u end=%u\n", 546 pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
526 seq, ack, sack, win, end); 547 seq, ack, receiver_offset, sack, receiver_offset, win, end);
527 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " 548 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
528 "receiver end=%u maxend=%u maxwin=%u scale=%i\n", 549 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
529 sender->td_end, sender->td_maxend, sender->td_maxwin, 550 sender->td_end, sender->td_maxend, sender->td_maxwin,
@@ -613,8 +634,8 @@ static bool tcp_in_window(const struct nf_conn *ct,
613 634
614 pr_debug("tcp_in_window: "); 635 pr_debug("tcp_in_window: ");
615 nf_ct_dump_tuple(tuple); 636 nf_ct_dump_tuple(tuple);
616 pr_debug("seq=%u ack=%u sack =%u win=%u end=%u\n", 637 pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
617 seq, ack, sack, win, end); 638 seq, ack, receiver_offset, sack, receiver_offset, win, end);
618 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " 639 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
619 "receiver end=%u maxend=%u maxwin=%u scale=%i\n", 640 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
620 sender->td_end, sender->td_maxend, sender->td_maxwin, 641 sender->td_end, sender->td_maxend, sender->td_maxwin,
@@ -700,7 +721,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
700 before(seq, sender->td_maxend + 1) ? 721 before(seq, sender->td_maxend + 1) ?
701 after(end, sender->td_end - receiver->td_maxwin - 1) ? 722 after(end, sender->td_end - receiver->td_maxwin - 1) ?
702 before(sack, receiver->td_end + 1) ? 723 before(sack, receiver->td_end + 1) ?
703 after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG" 724 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
704 : "ACK is under the lower bound (possible overly delayed ACK)" 725 : "ACK is under the lower bound (possible overly delayed ACK)"
705 : "ACK is over the upper bound (ACKed data not seen yet)" 726 : "ACK is over the upper bound (ACKed data not seen yet)"
706 : "SEQ is under the lower bound (already ACKed data retransmitted)" 727 : "SEQ is under the lower bound (already ACKed data retransmitted)"
@@ -715,39 +736,6 @@ static bool tcp_in_window(const struct nf_conn *ct,
715 return res; 736 return res;
716} 737}
717 738
718#ifdef CONFIG_NF_NAT_NEEDED
719/* Update sender->td_end after NAT successfully mangled the packet */
720/* Caller must linearize skb at tcp header. */
721void nf_conntrack_tcp_update(const struct sk_buff *skb,
722 unsigned int dataoff,
723 struct nf_conn *ct, int dir,
724 s16 offset)
725{
726 const struct tcphdr *tcph = (const void *)skb->data + dataoff;
727 const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[dir];
728 const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[!dir];
729 __u32 end;
730
731 end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph);
732
733 spin_lock_bh(&ct->lock);
734 /*
735 * We have to worry for the ack in the reply packet only...
736 */
737 if (ct->proto.tcp.seen[dir].td_end + offset == end)
738 ct->proto.tcp.seen[dir].td_end = end;
739 ct->proto.tcp.last_end = end;
740 spin_unlock_bh(&ct->lock);
741 pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
742 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
743 sender->td_end, sender->td_maxend, sender->td_maxwin,
744 sender->td_scale,
745 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
746 receiver->td_scale);
747}
748EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update);
749#endif
750
751#define TH_FIN 0x01 739#define TH_FIN 0x01
752#define TH_SYN 0x02 740#define TH_SYN 0x02
753#define TH_RST 0x04 741#define TH_RST 0x04