aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPatrick McHardy <kaber@trash.net>2012-08-26 13:13:58 -0400
committerPablo Neira Ayuso <pablo@netfilter.org>2012-08-29 21:00:10 -0400
commit4cdd34084d539c758d00c5dc7bf95db2e4f2bc70 (patch)
tree6a5892006d512dfab477dd7fc3d8dd0cdf75cedb
parent590e3f79a21edd2e9857ac3ced25ba6b2a491ef8 (diff)
netfilter: nf_conntrack_ipv6: improve fragmentation handling
The IPv6 conntrack fragmentation currently has a couple of shortcomings. Fragmentes are collected in PREROUTING/OUTPUT, are defragmented, the defragmented packet is then passed to conntrack, the resulting conntrack information is attached to each original fragment and the fragments then continue their way through the stack. Helper invocation occurs in the POSTROUTING hook, at which point only the original fragments are available. The result of this is that fragmented packets are never passed to helpers. This patch improves the situation in the following way: - If a reassembled packet belongs to a connection that has a helper assigned, the reassembled packet is passed through the stack instead of the original fragments. - During defragmentation, the largest received fragment size is stored. On output, the packet is refragmented if required. If the largest received fragment size exceeds the outgoing MTU, a "packet too big" message is generated, thus behaving as if the original fragments were passed through the stack from an outside point of view. - The ipv6_helper() hook function can't receive fragments anymore for connections using a helper, so it is switched to use ipv6_skip_exthdr() instead of the netfilter specific nf_ct_ipv6_skip_exthdr() and the reassembled packets are passed to connection tracking helpers. The result of this is that we can properly track fragmented packets, but still generate ICMPv6 Packet too big messages if we would have before. This patch is also required as a precondition for IPv6 NAT, where NAT helpers might enlarge packets up to a point that they require fragmentation. In that case we can't generate Packet too big messages since the proper MTU can't be calculated in all cases (f.i. when changing textual representation of a variable amount of addresses), so the packet is transparently fragmented iff the original packet or fragments would have fit the outgoing MTU. IPVS parts by Jesper Dangaard Brouer <brouer@redhat.com>. Signed-off-by: Patrick McHardy <kaber@trash.net>
-rw-r--r--include/linux/ipv6.h1
-rw-r--r--net/ipv6/ip6_output.c7
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c41
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c19
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c9
5 files changed, 62 insertions, 15 deletions
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 879db26ec40..0b94e91ed68 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -256,6 +256,7 @@ struct inet6_skb_parm {
256#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 256#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
257 __u16 dsthao; 257 __u16 dsthao;
258#endif 258#endif
259 __u16 frag_max_size;
259 260
260#define IP6SKB_XFRM_TRANSFORMED 1 261#define IP6SKB_XFRM_TRANSFORMED 1
261#define IP6SKB_FORWARDED 2 262#define IP6SKB_FORWARDED 2
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5b2d63ed793..a4f6263fddc 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -493,7 +493,8 @@ int ip6_forward(struct sk_buff *skb)
493 if (mtu < IPV6_MIN_MTU) 493 if (mtu < IPV6_MIN_MTU)
494 mtu = IPV6_MIN_MTU; 494 mtu = IPV6_MIN_MTU;
495 495
496 if (skb->len > mtu && !skb_is_gso(skb)) { 496 if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
497 (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
497 /* Again, force OUTPUT device used as source address */ 498 /* Again, force OUTPUT device used as source address */
498 skb->dev = dst->dev; 499 skb->dev = dst->dev;
499 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 500 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -636,7 +637,9 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
636 /* We must not fragment if the socket is set to force MTU discovery 637 /* We must not fragment if the socket is set to force MTU discovery
637 * or if the skb it not generated by a local socket. 638 * or if the skb it not generated by a local socket.
638 */ 639 */
639 if (unlikely(!skb->local_df && skb->len > mtu)) { 640 if (unlikely(!skb->local_df && skb->len > mtu) ||
641 (IP6CB(skb)->frag_max_size &&
642 IP6CB(skb)->frag_max_size > mtu)) {
640 if (skb->sk && dst_allfrag(skb_dst(skb))) 643 if (skb->sk && dst_allfrag(skb_dst(skb)))
641 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); 644 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
642 645
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 4794f96cf2e..521ddca876f 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -153,10 +153,10 @@ static unsigned int ipv6_helper(unsigned int hooknum,
153 const struct nf_conn_help *help; 153 const struct nf_conn_help *help;
154 const struct nf_conntrack_helper *helper; 154 const struct nf_conntrack_helper *helper;
155 enum ip_conntrack_info ctinfo; 155 enum ip_conntrack_info ctinfo;
156 unsigned int ret, protoff; 156 unsigned int ret;
157 unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data; 157 __be16 frag_off;
158 unsigned char pnum = ipv6_hdr(skb)->nexthdr; 158 int protoff;
159 159 u8 nexthdr;
160 160
161 /* This is where we call the helper: as the packet goes out. */ 161 /* This is where we call the helper: as the packet goes out. */
162 ct = nf_ct_get(skb, &ctinfo); 162 ct = nf_ct_get(skb, &ctinfo);
@@ -171,9 +171,10 @@ static unsigned int ipv6_helper(unsigned int hooknum,
171 if (!helper) 171 if (!helper)
172 return NF_ACCEPT; 172 return NF_ACCEPT;
173 173
174 protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, 174 nexthdr = ipv6_hdr(skb)->nexthdr;
175 skb->len - extoff); 175 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
176 if (protoff > skb->len || pnum == NEXTHDR_FRAGMENT) { 176 &frag_off);
177 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
177 pr_debug("proto header not found\n"); 178 pr_debug("proto header not found\n");
178 return NF_ACCEPT; 179 return NF_ACCEPT;
179 } 180 }
@@ -199,9 +200,14 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
199static unsigned int __ipv6_conntrack_in(struct net *net, 200static unsigned int __ipv6_conntrack_in(struct net *net,
200 unsigned int hooknum, 201 unsigned int hooknum,
201 struct sk_buff *skb, 202 struct sk_buff *skb,
203 const struct net_device *in,
204 const struct net_device *out,
202 int (*okfn)(struct sk_buff *)) 205 int (*okfn)(struct sk_buff *))
203{ 206{
204 struct sk_buff *reasm = skb->nfct_reasm; 207 struct sk_buff *reasm = skb->nfct_reasm;
208 const struct nf_conn_help *help;
209 struct nf_conn *ct;
210 enum ip_conntrack_info ctinfo;
205 211
206 /* This packet is fragmented and has reassembled packet. */ 212 /* This packet is fragmented and has reassembled packet. */
207 if (reasm) { 213 if (reasm) {
@@ -213,6 +219,23 @@ static unsigned int __ipv6_conntrack_in(struct net *net,
213 if (ret != NF_ACCEPT) 219 if (ret != NF_ACCEPT)
214 return ret; 220 return ret;
215 } 221 }
222
223 /* Conntrack helpers need the entire reassembled packet in the
224 * POST_ROUTING hook.
225 */
226 ct = nf_ct_get(reasm, &ctinfo);
227 if (ct != NULL && !nf_ct_is_untracked(ct)) {
228 help = nfct_help(ct);
229 if (help && help->helper) {
230 nf_conntrack_get_reasm(skb);
231 NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm,
232 (struct net_device *)in,
233 (struct net_device *)out,
234 okfn, NF_IP6_PRI_CONNTRACK + 1);
235 return NF_DROP_ERR(-ECANCELED);
236 }
237 }
238
216 nf_conntrack_get(reasm->nfct); 239 nf_conntrack_get(reasm->nfct);
217 skb->nfct = reasm->nfct; 240 skb->nfct = reasm->nfct;
218 skb->nfctinfo = reasm->nfctinfo; 241 skb->nfctinfo = reasm->nfctinfo;
@@ -228,7 +251,7 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum,
228 const struct net_device *out, 251 const struct net_device *out,
229 int (*okfn)(struct sk_buff *)) 252 int (*okfn)(struct sk_buff *))
230{ 253{
231 return __ipv6_conntrack_in(dev_net(in), hooknum, skb, okfn); 254 return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn);
232} 255}
233 256
234static unsigned int ipv6_conntrack_local(unsigned int hooknum, 257static unsigned int ipv6_conntrack_local(unsigned int hooknum,
@@ -242,7 +265,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
242 net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); 265 net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
243 return NF_ACCEPT; 266 return NF_ACCEPT;
244 } 267 }
245 return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn); 268 return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn);
246} 269}
247 270
248static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { 271static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index c9c78c2e666..f94fb3ac2a7 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -190,6 +190,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
190 const struct frag_hdr *fhdr, int nhoff) 190 const struct frag_hdr *fhdr, int nhoff)
191{ 191{
192 struct sk_buff *prev, *next; 192 struct sk_buff *prev, *next;
193 unsigned int payload_len;
193 int offset, end; 194 int offset, end;
194 195
195 if (fq->q.last_in & INET_FRAG_COMPLETE) { 196 if (fq->q.last_in & INET_FRAG_COMPLETE) {
@@ -197,8 +198,10 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
197 goto err; 198 goto err;
198 } 199 }
199 200
201 payload_len = ntohs(ipv6_hdr(skb)->payload_len);
202
200 offset = ntohs(fhdr->frag_off) & ~0x7; 203 offset = ntohs(fhdr->frag_off) & ~0x7;
201 end = offset + (ntohs(ipv6_hdr(skb)->payload_len) - 204 end = offset + (payload_len -
202 ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); 205 ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
203 206
204 if ((unsigned int)end > IPV6_MAXPLEN) { 207 if ((unsigned int)end > IPV6_MAXPLEN) {
@@ -307,6 +310,8 @@ found:
307 skb->dev = NULL; 310 skb->dev = NULL;
308 fq->q.stamp = skb->tstamp; 311 fq->q.stamp = skb->tstamp;
309 fq->q.meat += skb->len; 312 fq->q.meat += skb->len;
313 if (payload_len > fq->q.max_size)
314 fq->q.max_size = payload_len;
310 atomic_add(skb->truesize, &nf_init_frags.mem); 315 atomic_add(skb->truesize, &nf_init_frags.mem);
311 316
312 /* The first fragment. 317 /* The first fragment.
@@ -412,10 +417,12 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
412 } 417 }
413 atomic_sub(head->truesize, &nf_init_frags.mem); 418 atomic_sub(head->truesize, &nf_init_frags.mem);
414 419
420 head->local_df = 1;
415 head->next = NULL; 421 head->next = NULL;
416 head->dev = dev; 422 head->dev = dev;
417 head->tstamp = fq->q.stamp; 423 head->tstamp = fq->q.stamp;
418 ipv6_hdr(head)->payload_len = htons(payload_len); 424 ipv6_hdr(head)->payload_len = htons(payload_len);
425 IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
419 426
420 /* Yes, and fold redundant checksum back. 8) */ 427 /* Yes, and fold redundant checksum back. 8) */
421 if (head->ip_summed == CHECKSUM_COMPLETE) 428 if (head->ip_summed == CHECKSUM_COMPLETE)
@@ -592,6 +599,7 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
592 int (*okfn)(struct sk_buff *)) 599 int (*okfn)(struct sk_buff *))
593{ 600{
594 struct sk_buff *s, *s2; 601 struct sk_buff *s, *s2;
602 unsigned int ret = 0;
595 603
596 for (s = NFCT_FRAG6_CB(skb)->orig; s;) { 604 for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
597 nf_conntrack_put_reasm(s->nfct_reasm); 605 nf_conntrack_put_reasm(s->nfct_reasm);
@@ -601,8 +609,13 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
601 s2 = s->next; 609 s2 = s->next;
602 s->next = NULL; 610 s->next = NULL;
603 611
604 NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, in, out, okfn, 612 if (ret != -ECANCELED)
605 NF_IP6_PRI_CONNTRACK_DEFRAG + 1); 613 ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s,
614 in, out, okfn,
615 NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
616 else
617 kfree_skb(s);
618
606 s = s2; 619 s = s2;
607 } 620 }
608 nf_conntrack_put_reasm(skb); 621 nf_conntrack_put_reasm(skb);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 67a39786b0a..56f6d5d81a7 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -88,7 +88,14 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
88static inline bool 88static inline bool
89__mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) 89__mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
90{ 90{
91 if (skb->len > mtu && !skb_is_gso(skb)) { 91 if (IP6CB(skb)->frag_max_size) {
92 /* frag_max_size tell us that, this packet have been
93 * defragmented by netfilter IPv6 conntrack module.
94 */
95 if (IP6CB(skb)->frag_max_size > mtu)
96 return true; /* largest fragment violate MTU */
97 }
98 else if (skb->len > mtu && !skb_is_gso(skb)) {
92 return true; /* Packet size violate MTU size */ 99 return true; /* Packet size violate MTU size */
93 } 100 }
94 return false; 101 return false;