diff options
Diffstat (limited to 'net/ipv4/ip_fragment.c')
-rw-r--r-- | net/ipv4/ip_fragment.c | 87 |
1 files changed, 67 insertions, 20 deletions
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b7c41654dde5..0ad6035f6366 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <linux/udp.h> | 45 | #include <linux/udp.h> |
46 | #include <linux/inet.h> | 46 | #include <linux/inet.h> |
47 | #include <linux/netfilter_ipv4.h> | 47 | #include <linux/netfilter_ipv4.h> |
48 | #include <net/inet_ecn.h> | ||
48 | 49 | ||
49 | /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6 | 50 | /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6 |
50 | * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c | 51 | * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c |
@@ -70,11 +71,46 @@ struct ipq { | |||
70 | __be32 daddr; | 71 | __be32 daddr; |
71 | __be16 id; | 72 | __be16 id; |
72 | u8 protocol; | 73 | u8 protocol; |
74 | u8 ecn; /* RFC3168 support */ | ||
73 | int iif; | 75 | int iif; |
74 | unsigned int rid; | 76 | unsigned int rid; |
75 | struct inet_peer *peer; | 77 | struct inet_peer *peer; |
76 | }; | 78 | }; |
77 | 79 | ||
80 | /* RFC 3168 support : | ||
81 | * We want to check ECN values of all fragments, do detect invalid combinations. | ||
82 | * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value. | ||
83 | */ | ||
84 | #define IPFRAG_ECN_NOT_ECT 0x01 /* one frag had ECN_NOT_ECT */ | ||
85 | #define IPFRAG_ECN_ECT_1 0x02 /* one frag had ECN_ECT_1 */ | ||
86 | #define IPFRAG_ECN_ECT_0 0x04 /* one frag had ECN_ECT_0 */ | ||
87 | #define IPFRAG_ECN_CE 0x08 /* one frag had ECN_CE */ | ||
88 | |||
89 | static inline u8 ip4_frag_ecn(u8 tos) | ||
90 | { | ||
91 | return 1 << (tos & INET_ECN_MASK); | ||
92 | } | ||
93 | |||
94 | /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements | ||
95 | * Value : 0xff if frame should be dropped. | ||
96 | * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field | ||
97 | */ | ||
98 | static const u8 ip4_frag_ecn_table[16] = { | ||
99 | /* at least one fragment had CE, and others ECT_0 or ECT_1 */ | ||
100 | [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE, | ||
101 | [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE, | ||
102 | [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE, | ||
103 | |||
104 | /* invalid combinations : drop frame */ | ||
105 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, | ||
106 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, | ||
107 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, | ||
108 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, | ||
109 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, | ||
110 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, | ||
111 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, | ||
112 | }; | ||
113 | |||
78 | static struct inet_frags ip4_frags; | 114 | static struct inet_frags ip4_frags; |
79 | 115 | ||
80 | int ip_frag_nqueues(struct net *net) | 116 | int ip_frag_nqueues(struct net *net) |
@@ -116,11 +152,11 @@ static int ip4_frag_match(struct inet_frag_queue *q, void *a) | |||
116 | struct ip4_create_arg *arg = a; | 152 | struct ip4_create_arg *arg = a; |
117 | 153 | ||
118 | qp = container_of(q, struct ipq, q); | 154 | qp = container_of(q, struct ipq, q); |
119 | return (qp->id == arg->iph->id && | 155 | return qp->id == arg->iph->id && |
120 | qp->saddr == arg->iph->saddr && | 156 | qp->saddr == arg->iph->saddr && |
121 | qp->daddr == arg->iph->daddr && | 157 | qp->daddr == arg->iph->daddr && |
122 | qp->protocol == arg->iph->protocol && | 158 | qp->protocol == arg->iph->protocol && |
123 | qp->user == arg->user); | 159 | qp->user == arg->user; |
124 | } | 160 | } |
125 | 161 | ||
126 | /* Memory Tracking Functions. */ | 162 | /* Memory Tracking Functions. */ |
@@ -137,11 +173,12 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a) | |||
137 | 173 | ||
138 | qp->protocol = arg->iph->protocol; | 174 | qp->protocol = arg->iph->protocol; |
139 | qp->id = arg->iph->id; | 175 | qp->id = arg->iph->id; |
176 | qp->ecn = ip4_frag_ecn(arg->iph->tos); | ||
140 | qp->saddr = arg->iph->saddr; | 177 | qp->saddr = arg->iph->saddr; |
141 | qp->daddr = arg->iph->daddr; | 178 | qp->daddr = arg->iph->daddr; |
142 | qp->user = arg->user; | 179 | qp->user = arg->user; |
143 | qp->peer = sysctl_ipfrag_max_dist ? | 180 | qp->peer = sysctl_ipfrag_max_dist ? |
144 | inet_getpeer(arg->iph->saddr, 1) : NULL; | 181 | inet_getpeer_v4(arg->iph->saddr, 1) : NULL; |
145 | } | 182 | } |
146 | 183 | ||
147 | static __inline__ void ip4_frag_free(struct inet_frag_queue *q) | 184 | static __inline__ void ip4_frag_free(struct inet_frag_queue *q) |
@@ -204,31 +241,30 @@ static void ip_expire(unsigned long arg) | |||
204 | 241 | ||
205 | if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { | 242 | if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { |
206 | struct sk_buff *head = qp->q.fragments; | 243 | struct sk_buff *head = qp->q.fragments; |
244 | const struct iphdr *iph; | ||
245 | int err; | ||
207 | 246 | ||
208 | rcu_read_lock(); | 247 | rcu_read_lock(); |
209 | head->dev = dev_get_by_index_rcu(net, qp->iif); | 248 | head->dev = dev_get_by_index_rcu(net, qp->iif); |
210 | if (!head->dev) | 249 | if (!head->dev) |
211 | goto out_rcu_unlock; | 250 | goto out_rcu_unlock; |
212 | 251 | ||
252 | /* skb dst is stale, drop it, and perform route lookup again */ | ||
253 | skb_dst_drop(head); | ||
254 | iph = ip_hdr(head); | ||
255 | err = ip_route_input_noref(head, iph->daddr, iph->saddr, | ||
256 | iph->tos, head->dev); | ||
257 | if (err) | ||
258 | goto out_rcu_unlock; | ||
259 | |||
213 | /* | 260 | /* |
214 | * Only search router table for the head fragment, | 261 | * Only an end host needs to send an ICMP |
215 | * when defraging timeout at PRE_ROUTING HOOK. | 262 | * "Fragment Reassembly Timeout" message, per RFC792. |
216 | */ | 263 | */ |
217 | if (qp->user == IP_DEFRAG_CONNTRACK_IN && !skb_dst(head)) { | 264 | if (qp->user == IP_DEFRAG_CONNTRACK_IN && |
218 | const struct iphdr *iph = ip_hdr(head); | 265 | skb_rtable(head)->rt_type != RTN_LOCAL) |
219 | int err = ip_route_input(head, iph->daddr, iph->saddr, | 266 | goto out_rcu_unlock; |
220 | iph->tos, head->dev); | ||
221 | if (unlikely(err)) | ||
222 | goto out_rcu_unlock; | ||
223 | |||
224 | /* | ||
225 | * Only an end host needs to send an ICMP | ||
226 | * "Fragment Reassembly Timeout" message, per RFC792. | ||
227 | */ | ||
228 | if (skb_rtable(head)->rt_type != RTN_LOCAL) | ||
229 | goto out_rcu_unlock; | ||
230 | 267 | ||
231 | } | ||
232 | 268 | ||
233 | /* Send an ICMP "Fragment Reassembly Timeout" message. */ | 269 | /* Send an ICMP "Fragment Reassembly Timeout" message. */ |
234 | icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); | 270 | icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); |
@@ -316,6 +352,7 @@ static int ip_frag_reinit(struct ipq *qp) | |||
316 | qp->q.fragments = NULL; | 352 | qp->q.fragments = NULL; |
317 | qp->q.fragments_tail = NULL; | 353 | qp->q.fragments_tail = NULL; |
318 | qp->iif = 0; | 354 | qp->iif = 0; |
355 | qp->ecn = 0; | ||
319 | 356 | ||
320 | return 0; | 357 | return 0; |
321 | } | 358 | } |
@@ -328,6 +365,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
328 | int flags, offset; | 365 | int flags, offset; |
329 | int ihl, end; | 366 | int ihl, end; |
330 | int err = -ENOENT; | 367 | int err = -ENOENT; |
368 | u8 ecn; | ||
331 | 369 | ||
332 | if (qp->q.last_in & INET_FRAG_COMPLETE) | 370 | if (qp->q.last_in & INET_FRAG_COMPLETE) |
333 | goto err; | 371 | goto err; |
@@ -339,6 +377,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
339 | goto err; | 377 | goto err; |
340 | } | 378 | } |
341 | 379 | ||
380 | ecn = ip4_frag_ecn(ip_hdr(skb)->tos); | ||
342 | offset = ntohs(ip_hdr(skb)->frag_off); | 381 | offset = ntohs(ip_hdr(skb)->frag_off); |
343 | flags = offset & ~IP_OFFSET; | 382 | flags = offset & ~IP_OFFSET; |
344 | offset &= IP_OFFSET; | 383 | offset &= IP_OFFSET; |
@@ -472,6 +511,7 @@ found: | |||
472 | } | 511 | } |
473 | qp->q.stamp = skb->tstamp; | 512 | qp->q.stamp = skb->tstamp; |
474 | qp->q.meat += skb->len; | 513 | qp->q.meat += skb->len; |
514 | qp->ecn |= ecn; | ||
475 | atomic_add(skb->truesize, &qp->q.net->mem); | 515 | atomic_add(skb->truesize, &qp->q.net->mem); |
476 | if (offset == 0) | 516 | if (offset == 0) |
477 | qp->q.last_in |= INET_FRAG_FIRST_IN; | 517 | qp->q.last_in |= INET_FRAG_FIRST_IN; |
@@ -502,9 +542,15 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | |||
502 | int len; | 542 | int len; |
503 | int ihlen; | 543 | int ihlen; |
504 | int err; | 544 | int err; |
545 | u8 ecn; | ||
505 | 546 | ||
506 | ipq_kill(qp); | 547 | ipq_kill(qp); |
507 | 548 | ||
549 | ecn = ip4_frag_ecn_table[qp->ecn]; | ||
550 | if (unlikely(ecn == 0xff)) { | ||
551 | err = -EINVAL; | ||
552 | goto out_fail; | ||
553 | } | ||
508 | /* Make the one we just received the head. */ | 554 | /* Make the one we just received the head. */ |
509 | if (prev) { | 555 | if (prev) { |
510 | head = prev->next; | 556 | head = prev->next; |
@@ -542,7 +588,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | |||
542 | /* If the first fragment is fragmented itself, we split | 588 | /* If the first fragment is fragmented itself, we split |
543 | * it to two chunks: the first with data and paged part | 589 | * it to two chunks: the first with data and paged part |
544 | * and the second, holding only fragments. */ | 590 | * and the second, holding only fragments. */ |
545 | if (skb_has_frags(head)) { | 591 | if (skb_has_frag_list(head)) { |
546 | struct sk_buff *clone; | 592 | struct sk_buff *clone; |
547 | int i, plen = 0; | 593 | int i, plen = 0; |
548 | 594 | ||
@@ -583,6 +629,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | |||
583 | iph = ip_hdr(head); | 629 | iph = ip_hdr(head); |
584 | iph->frag_off = 0; | 630 | iph->frag_off = 0; |
585 | iph->tot_len = htons(len); | 631 | iph->tot_len = htons(len); |
632 | iph->tos |= ecn; | ||
586 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); | 633 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); |
587 | qp->q.fragments = NULL; | 634 | qp->q.fragments = NULL; |
588 | qp->q.fragments_tail = NULL; | 635 | qp->q.fragments_tail = NULL; |