aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/ip_fragment.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/ip_fragment.c')
-rw-r--r--net/ipv4/ip_fragment.c87
1 files changed, 67 insertions, 20 deletions
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b7c41654dde5..0ad6035f6366 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -45,6 +45,7 @@
45#include <linux/udp.h> 45#include <linux/udp.h>
46#include <linux/inet.h> 46#include <linux/inet.h>
47#include <linux/netfilter_ipv4.h> 47#include <linux/netfilter_ipv4.h>
48#include <net/inet_ecn.h>
48 49
49/* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6 50/* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
50 * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c 51 * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
@@ -70,11 +71,46 @@ struct ipq {
70 __be32 daddr; 71 __be32 daddr;
71 __be16 id; 72 __be16 id;
72 u8 protocol; 73 u8 protocol;
74 u8 ecn; /* RFC3168 support */
73 int iif; 75 int iif;
74 unsigned int rid; 76 unsigned int rid;
75 struct inet_peer *peer; 77 struct inet_peer *peer;
76}; 78};
77 79
80/* RFC 3168 support :
81 * We want to check ECN values of all fragments, do detect invalid combinations.
82 * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value.
83 */
84#define IPFRAG_ECN_NOT_ECT 0x01 /* one frag had ECN_NOT_ECT */
85#define IPFRAG_ECN_ECT_1 0x02 /* one frag had ECN_ECT_1 */
86#define IPFRAG_ECN_ECT_0 0x04 /* one frag had ECN_ECT_0 */
87#define IPFRAG_ECN_CE 0x08 /* one frag had ECN_CE */
88
89static inline u8 ip4_frag_ecn(u8 tos)
90{
91 return 1 << (tos & INET_ECN_MASK);
92}
93
94/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
95 * Value : 0xff if frame should be dropped.
96 * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
97 */
98static const u8 ip4_frag_ecn_table[16] = {
99 /* at least one fragment had CE, and others ECT_0 or ECT_1 */
100 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE,
101 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
102 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
103
104 /* invalid combinations : drop frame */
105 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff,
106 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff,
107 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff,
108 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
109 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff,
110 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff,
111 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
112};
113
78static struct inet_frags ip4_frags; 114static struct inet_frags ip4_frags;
79 115
80int ip_frag_nqueues(struct net *net) 116int ip_frag_nqueues(struct net *net)
@@ -116,11 +152,11 @@ static int ip4_frag_match(struct inet_frag_queue *q, void *a)
116 struct ip4_create_arg *arg = a; 152 struct ip4_create_arg *arg = a;
117 153
118 qp = container_of(q, struct ipq, q); 154 qp = container_of(q, struct ipq, q);
119 return (qp->id == arg->iph->id && 155 return qp->id == arg->iph->id &&
120 qp->saddr == arg->iph->saddr && 156 qp->saddr == arg->iph->saddr &&
121 qp->daddr == arg->iph->daddr && 157 qp->daddr == arg->iph->daddr &&
122 qp->protocol == arg->iph->protocol && 158 qp->protocol == arg->iph->protocol &&
123 qp->user == arg->user); 159 qp->user == arg->user;
124} 160}
125 161
126/* Memory Tracking Functions. */ 162/* Memory Tracking Functions. */
@@ -137,11 +173,12 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a)
137 173
138 qp->protocol = arg->iph->protocol; 174 qp->protocol = arg->iph->protocol;
139 qp->id = arg->iph->id; 175 qp->id = arg->iph->id;
176 qp->ecn = ip4_frag_ecn(arg->iph->tos);
140 qp->saddr = arg->iph->saddr; 177 qp->saddr = arg->iph->saddr;
141 qp->daddr = arg->iph->daddr; 178 qp->daddr = arg->iph->daddr;
142 qp->user = arg->user; 179 qp->user = arg->user;
143 qp->peer = sysctl_ipfrag_max_dist ? 180 qp->peer = sysctl_ipfrag_max_dist ?
144 inet_getpeer(arg->iph->saddr, 1) : NULL; 181 inet_getpeer_v4(arg->iph->saddr, 1) : NULL;
145} 182}
146 183
147static __inline__ void ip4_frag_free(struct inet_frag_queue *q) 184static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
@@ -204,31 +241,30 @@ static void ip_expire(unsigned long arg)
204 241
205 if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { 242 if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) {
206 struct sk_buff *head = qp->q.fragments; 243 struct sk_buff *head = qp->q.fragments;
244 const struct iphdr *iph;
245 int err;
207 246
208 rcu_read_lock(); 247 rcu_read_lock();
209 head->dev = dev_get_by_index_rcu(net, qp->iif); 248 head->dev = dev_get_by_index_rcu(net, qp->iif);
210 if (!head->dev) 249 if (!head->dev)
211 goto out_rcu_unlock; 250 goto out_rcu_unlock;
212 251
252 /* skb dst is stale, drop it, and perform route lookup again */
253 skb_dst_drop(head);
254 iph = ip_hdr(head);
255 err = ip_route_input_noref(head, iph->daddr, iph->saddr,
256 iph->tos, head->dev);
257 if (err)
258 goto out_rcu_unlock;
259
213 /* 260 /*
214 * Only search router table for the head fragment, 261 * Only an end host needs to send an ICMP
215 * when defraging timeout at PRE_ROUTING HOOK. 262 * "Fragment Reassembly Timeout" message, per RFC792.
216 */ 263 */
217 if (qp->user == IP_DEFRAG_CONNTRACK_IN && !skb_dst(head)) { 264 if (qp->user == IP_DEFRAG_CONNTRACK_IN &&
218 const struct iphdr *iph = ip_hdr(head); 265 skb_rtable(head)->rt_type != RTN_LOCAL)
219 int err = ip_route_input(head, iph->daddr, iph->saddr, 266 goto out_rcu_unlock;
220 iph->tos, head->dev);
221 if (unlikely(err))
222 goto out_rcu_unlock;
223
224 /*
225 * Only an end host needs to send an ICMP
226 * "Fragment Reassembly Timeout" message, per RFC792.
227 */
228 if (skb_rtable(head)->rt_type != RTN_LOCAL)
229 goto out_rcu_unlock;
230 267
231 }
232 268
233 /* Send an ICMP "Fragment Reassembly Timeout" message. */ 269 /* Send an ICMP "Fragment Reassembly Timeout" message. */
234 icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); 270 icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
@@ -316,6 +352,7 @@ static int ip_frag_reinit(struct ipq *qp)
316 qp->q.fragments = NULL; 352 qp->q.fragments = NULL;
317 qp->q.fragments_tail = NULL; 353 qp->q.fragments_tail = NULL;
318 qp->iif = 0; 354 qp->iif = 0;
355 qp->ecn = 0;
319 356
320 return 0; 357 return 0;
321} 358}
@@ -328,6 +365,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
328 int flags, offset; 365 int flags, offset;
329 int ihl, end; 366 int ihl, end;
330 int err = -ENOENT; 367 int err = -ENOENT;
368 u8 ecn;
331 369
332 if (qp->q.last_in & INET_FRAG_COMPLETE) 370 if (qp->q.last_in & INET_FRAG_COMPLETE)
333 goto err; 371 goto err;
@@ -339,6 +377,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
339 goto err; 377 goto err;
340 } 378 }
341 379
380 ecn = ip4_frag_ecn(ip_hdr(skb)->tos);
342 offset = ntohs(ip_hdr(skb)->frag_off); 381 offset = ntohs(ip_hdr(skb)->frag_off);
343 flags = offset & ~IP_OFFSET; 382 flags = offset & ~IP_OFFSET;
344 offset &= IP_OFFSET; 383 offset &= IP_OFFSET;
@@ -472,6 +511,7 @@ found:
472 } 511 }
473 qp->q.stamp = skb->tstamp; 512 qp->q.stamp = skb->tstamp;
474 qp->q.meat += skb->len; 513 qp->q.meat += skb->len;
514 qp->ecn |= ecn;
475 atomic_add(skb->truesize, &qp->q.net->mem); 515 atomic_add(skb->truesize, &qp->q.net->mem);
476 if (offset == 0) 516 if (offset == 0)
477 qp->q.last_in |= INET_FRAG_FIRST_IN; 517 qp->q.last_in |= INET_FRAG_FIRST_IN;
@@ -502,9 +542,15 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
502 int len; 542 int len;
503 int ihlen; 543 int ihlen;
504 int err; 544 int err;
545 u8 ecn;
505 546
506 ipq_kill(qp); 547 ipq_kill(qp);
507 548
549 ecn = ip4_frag_ecn_table[qp->ecn];
550 if (unlikely(ecn == 0xff)) {
551 err = -EINVAL;
552 goto out_fail;
553 }
508 /* Make the one we just received the head. */ 554 /* Make the one we just received the head. */
509 if (prev) { 555 if (prev) {
510 head = prev->next; 556 head = prev->next;
@@ -542,7 +588,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
542 /* If the first fragment is fragmented itself, we split 588 /* If the first fragment is fragmented itself, we split
543 * it to two chunks: the first with data and paged part 589 * it to two chunks: the first with data and paged part
544 * and the second, holding only fragments. */ 590 * and the second, holding only fragments. */
545 if (skb_has_frags(head)) { 591 if (skb_has_frag_list(head)) {
546 struct sk_buff *clone; 592 struct sk_buff *clone;
547 int i, plen = 0; 593 int i, plen = 0;
548 594
@@ -583,6 +629,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
583 iph = ip_hdr(head); 629 iph = ip_hdr(head);
584 iph->frag_off = 0; 630 iph->frag_off = 0;
585 iph->tot_len = htons(len); 631 iph->tot_len = htons(len);
632 iph->tos |= ecn;
586 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); 633 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
587 qp->q.fragments = NULL; 634 qp->q.fragments = NULL;
588 qp->q.fragments_tail = NULL; 635 qp->q.fragments_tail = NULL;