diff options
Diffstat (limited to 'net/ipv4/ip_fragment.c')
-rw-r--r-- | net/ipv4/ip_fragment.c | 87 |
1 files changed, 38 insertions, 49 deletions
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index ed32313e307c..15f0e2bad7ad 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -55,6 +55,7 @@ | |||
55 | */ | 55 | */ |
56 | 56 | ||
57 | static int sysctl_ipfrag_max_dist __read_mostly = 64; | 57 | static int sysctl_ipfrag_max_dist __read_mostly = 64; |
58 | static const char ip_frag_cache_name[] = "ip4-frags"; | ||
58 | 59 | ||
59 | struct ipfrag_skb_cb | 60 | struct ipfrag_skb_cb |
60 | { | 61 | { |
@@ -86,11 +87,6 @@ static inline u8 ip4_frag_ecn(u8 tos) | |||
86 | 87 | ||
87 | static struct inet_frags ip4_frags; | 88 | static struct inet_frags ip4_frags; |
88 | 89 | ||
89 | int ip_frag_nqueues(struct net *net) | ||
90 | { | ||
91 | return net->ipv4.frags.nqueues; | ||
92 | } | ||
93 | |||
94 | int ip_frag_mem(struct net *net) | 90 | int ip_frag_mem(struct net *net) |
95 | { | 91 | { |
96 | return sum_frag_mem_limit(&net->ipv4.frags); | 92 | return sum_frag_mem_limit(&net->ipv4.frags); |
@@ -109,21 +105,21 @@ static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) | |||
109 | net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd)); | 105 | net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd)); |
110 | return jhash_3words((__force u32)id << 16 | prot, | 106 | return jhash_3words((__force u32)id << 16 | prot, |
111 | (__force u32)saddr, (__force u32)daddr, | 107 | (__force u32)saddr, (__force u32)daddr, |
112 | ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1); | 108 | ip4_frags.rnd); |
113 | } | 109 | } |
114 | 110 | ||
115 | static unsigned int ip4_hashfn(struct inet_frag_queue *q) | 111 | static unsigned int ip4_hashfn(const struct inet_frag_queue *q) |
116 | { | 112 | { |
117 | struct ipq *ipq; | 113 | const struct ipq *ipq; |
118 | 114 | ||
119 | ipq = container_of(q, struct ipq, q); | 115 | ipq = container_of(q, struct ipq, q); |
120 | return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); | 116 | return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); |
121 | } | 117 | } |
122 | 118 | ||
123 | static bool ip4_frag_match(struct inet_frag_queue *q, void *a) | 119 | static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a) |
124 | { | 120 | { |
125 | struct ipq *qp; | 121 | const struct ipq *qp; |
126 | struct ip4_create_arg *arg = a; | 122 | const struct ip4_create_arg *arg = a; |
127 | 123 | ||
128 | qp = container_of(q, struct ipq, q); | 124 | qp = container_of(q, struct ipq, q); |
129 | return qp->id == arg->iph->id && | 125 | return qp->id == arg->iph->id && |
@@ -133,14 +129,14 @@ static bool ip4_frag_match(struct inet_frag_queue *q, void *a) | |||
133 | qp->user == arg->user; | 129 | qp->user == arg->user; |
134 | } | 130 | } |
135 | 131 | ||
136 | static void ip4_frag_init(struct inet_frag_queue *q, void *a) | 132 | static void ip4_frag_init(struct inet_frag_queue *q, const void *a) |
137 | { | 133 | { |
138 | struct ipq *qp = container_of(q, struct ipq, q); | 134 | struct ipq *qp = container_of(q, struct ipq, q); |
139 | struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4, | 135 | struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4, |
140 | frags); | 136 | frags); |
141 | struct net *net = container_of(ipv4, struct net, ipv4); | 137 | struct net *net = container_of(ipv4, struct net, ipv4); |
142 | 138 | ||
143 | struct ip4_create_arg *arg = a; | 139 | const struct ip4_create_arg *arg = a; |
144 | 140 | ||
145 | qp->protocol = arg->iph->protocol; | 141 | qp->protocol = arg->iph->protocol; |
146 | qp->id = arg->iph->id; | 142 | qp->id = arg->iph->id; |
@@ -177,18 +173,6 @@ static void ipq_kill(struct ipq *ipq) | |||
177 | inet_frag_kill(&ipq->q, &ip4_frags); | 173 | inet_frag_kill(&ipq->q, &ip4_frags); |
178 | } | 174 | } |
179 | 175 | ||
180 | /* Memory limiting on fragments. Evictor trashes the oldest | ||
181 | * fragment queue until we are back under the threshold. | ||
182 | */ | ||
183 | static void ip_evictor(struct net *net) | ||
184 | { | ||
185 | int evicted; | ||
186 | |||
187 | evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags, false); | ||
188 | if (evicted) | ||
189 | IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted); | ||
190 | } | ||
191 | |||
192 | /* | 176 | /* |
193 | * Oops, a fragment queue timed out. Kill it and send an ICMP reply. | 177 | * Oops, a fragment queue timed out. Kill it and send an ICMP reply. |
194 | */ | 178 | */ |
@@ -202,19 +186,22 @@ static void ip_expire(unsigned long arg) | |||
202 | 186 | ||
203 | spin_lock(&qp->q.lock); | 187 | spin_lock(&qp->q.lock); |
204 | 188 | ||
205 | if (qp->q.last_in & INET_FRAG_COMPLETE) | 189 | if (qp->q.flags & INET_FRAG_COMPLETE) |
206 | goto out; | 190 | goto out; |
207 | 191 | ||
208 | ipq_kill(qp); | 192 | ipq_kill(qp); |
209 | |||
210 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); | ||
211 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); | 193 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); |
212 | 194 | ||
213 | if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { | 195 | if (!(qp->q.flags & INET_FRAG_EVICTED)) { |
214 | struct sk_buff *head = qp->q.fragments; | 196 | struct sk_buff *head = qp->q.fragments; |
215 | const struct iphdr *iph; | 197 | const struct iphdr *iph; |
216 | int err; | 198 | int err; |
217 | 199 | ||
200 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); | ||
201 | |||
202 | if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments) | ||
203 | goto out; | ||
204 | |||
218 | rcu_read_lock(); | 205 | rcu_read_lock(); |
219 | head->dev = dev_get_by_index_rcu(net, qp->iif); | 206 | head->dev = dev_get_by_index_rcu(net, qp->iif); |
220 | if (!head->dev) | 207 | if (!head->dev) |
@@ -227,8 +214,7 @@ static void ip_expire(unsigned long arg) | |||
227 | if (err) | 214 | if (err) |
228 | goto out_rcu_unlock; | 215 | goto out_rcu_unlock; |
229 | 216 | ||
230 | /* | 217 | /* Only an end host needs to send an ICMP |
231 | * Only an end host needs to send an ICMP | ||
232 | * "Fragment Reassembly Timeout" message, per RFC792. | 218 | * "Fragment Reassembly Timeout" message, per RFC792. |
233 | */ | 219 | */ |
234 | if (qp->user == IP_DEFRAG_AF_PACKET || | 220 | if (qp->user == IP_DEFRAG_AF_PACKET || |
@@ -237,7 +223,6 @@ static void ip_expire(unsigned long arg) | |||
237 | (skb_rtable(head)->rt_type != RTN_LOCAL))) | 223 | (skb_rtable(head)->rt_type != RTN_LOCAL))) |
238 | goto out_rcu_unlock; | 224 | goto out_rcu_unlock; |
239 | 225 | ||
240 | |||
241 | /* Send an ICMP "Fragment Reassembly Timeout" message. */ | 226 | /* Send an ICMP "Fragment Reassembly Timeout" message. */ |
242 | icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); | 227 | icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); |
243 | out_rcu_unlock: | 228 | out_rcu_unlock: |
@@ -260,7 +245,6 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) | |||
260 | arg.iph = iph; | 245 | arg.iph = iph; |
261 | arg.user = user; | 246 | arg.user = user; |
262 | 247 | ||
263 | read_lock(&ip4_frags.lock); | ||
264 | hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); | 248 | hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); |
265 | 249 | ||
266 | q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); | 250 | q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); |
@@ -319,7 +303,7 @@ static int ip_frag_reinit(struct ipq *qp) | |||
319 | } while (fp); | 303 | } while (fp); |
320 | sub_frag_mem_limit(&qp->q, sum_truesize); | 304 | sub_frag_mem_limit(&qp->q, sum_truesize); |
321 | 305 | ||
322 | qp->q.last_in = 0; | 306 | qp->q.flags = 0; |
323 | qp->q.len = 0; | 307 | qp->q.len = 0; |
324 | qp->q.meat = 0; | 308 | qp->q.meat = 0; |
325 | qp->q.fragments = NULL; | 309 | qp->q.fragments = NULL; |
@@ -340,7 +324,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
340 | int err = -ENOENT; | 324 | int err = -ENOENT; |
341 | u8 ecn; | 325 | u8 ecn; |
342 | 326 | ||
343 | if (qp->q.last_in & INET_FRAG_COMPLETE) | 327 | if (qp->q.flags & INET_FRAG_COMPLETE) |
344 | goto err; | 328 | goto err; |
345 | 329 | ||
346 | if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && | 330 | if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && |
@@ -367,9 +351,9 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
367 | * or have different end, the segment is corrupted. | 351 | * or have different end, the segment is corrupted. |
368 | */ | 352 | */ |
369 | if (end < qp->q.len || | 353 | if (end < qp->q.len || |
370 | ((qp->q.last_in & INET_FRAG_LAST_IN) && end != qp->q.len)) | 354 | ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len)) |
371 | goto err; | 355 | goto err; |
372 | qp->q.last_in |= INET_FRAG_LAST_IN; | 356 | qp->q.flags |= INET_FRAG_LAST_IN; |
373 | qp->q.len = end; | 357 | qp->q.len = end; |
374 | } else { | 358 | } else { |
375 | if (end&7) { | 359 | if (end&7) { |
@@ -379,7 +363,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
379 | } | 363 | } |
380 | if (end > qp->q.len) { | 364 | if (end > qp->q.len) { |
381 | /* Some bits beyond end -> corruption. */ | 365 | /* Some bits beyond end -> corruption. */ |
382 | if (qp->q.last_in & INET_FRAG_LAST_IN) | 366 | if (qp->q.flags & INET_FRAG_LAST_IN) |
383 | goto err; | 367 | goto err; |
384 | qp->q.len = end; | 368 | qp->q.len = end; |
385 | } | 369 | } |
@@ -488,13 +472,13 @@ found: | |||
488 | qp->ecn |= ecn; | 472 | qp->ecn |= ecn; |
489 | add_frag_mem_limit(&qp->q, skb->truesize); | 473 | add_frag_mem_limit(&qp->q, skb->truesize); |
490 | if (offset == 0) | 474 | if (offset == 0) |
491 | qp->q.last_in |= INET_FRAG_FIRST_IN; | 475 | qp->q.flags |= INET_FRAG_FIRST_IN; |
492 | 476 | ||
493 | if (ip_hdr(skb)->frag_off & htons(IP_DF) && | 477 | if (ip_hdr(skb)->frag_off & htons(IP_DF) && |
494 | skb->len + ihl > qp->q.max_size) | 478 | skb->len + ihl > qp->q.max_size) |
495 | qp->q.max_size = skb->len + ihl; | 479 | qp->q.max_size = skb->len + ihl; |
496 | 480 | ||
497 | if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && | 481 | if (qp->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && |
498 | qp->q.meat == qp->q.len) { | 482 | qp->q.meat == qp->q.len) { |
499 | unsigned long orefdst = skb->_skb_refdst; | 483 | unsigned long orefdst = skb->_skb_refdst; |
500 | 484 | ||
@@ -505,7 +489,6 @@ found: | |||
505 | } | 489 | } |
506 | 490 | ||
507 | skb_dst_drop(skb); | 491 | skb_dst_drop(skb); |
508 | inet_frag_lru_move(&qp->q); | ||
509 | return -EINPROGRESS; | 492 | return -EINPROGRESS; |
510 | 493 | ||
511 | err: | 494 | err: |
@@ -655,9 +638,6 @@ int ip_defrag(struct sk_buff *skb, u32 user) | |||
655 | net = skb->dev ? dev_net(skb->dev) : dev_net(skb_dst(skb)->dev); | 638 | net = skb->dev ? dev_net(skb->dev) : dev_net(skb_dst(skb)->dev); |
656 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); | 639 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); |
657 | 640 | ||
658 | /* Start by cleaning up the memory. */ | ||
659 | ip_evictor(net); | ||
660 | |||
661 | /* Lookup (or create) queue header */ | 641 | /* Lookup (or create) queue header */ |
662 | if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) { | 642 | if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) { |
663 | int ret; | 643 | int ret; |
@@ -721,14 +701,17 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { | |||
721 | .data = &init_net.ipv4.frags.high_thresh, | 701 | .data = &init_net.ipv4.frags.high_thresh, |
722 | .maxlen = sizeof(int), | 702 | .maxlen = sizeof(int), |
723 | .mode = 0644, | 703 | .mode = 0644, |
724 | .proc_handler = proc_dointvec | 704 | .proc_handler = proc_dointvec_minmax, |
705 | .extra1 = &init_net.ipv4.frags.low_thresh | ||
725 | }, | 706 | }, |
726 | { | 707 | { |
727 | .procname = "ipfrag_low_thresh", | 708 | .procname = "ipfrag_low_thresh", |
728 | .data = &init_net.ipv4.frags.low_thresh, | 709 | .data = &init_net.ipv4.frags.low_thresh, |
729 | .maxlen = sizeof(int), | 710 | .maxlen = sizeof(int), |
730 | .mode = 0644, | 711 | .mode = 0644, |
731 | .proc_handler = proc_dointvec | 712 | .proc_handler = proc_dointvec_minmax, |
713 | .extra1 = &zero, | ||
714 | .extra2 = &init_net.ipv4.frags.high_thresh | ||
732 | }, | 715 | }, |
733 | { | 716 | { |
734 | .procname = "ipfrag_time", | 717 | .procname = "ipfrag_time", |
@@ -740,10 +723,12 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { | |||
740 | { } | 723 | { } |
741 | }; | 724 | }; |
742 | 725 | ||
726 | /* secret interval has been deprecated */ | ||
727 | static int ip4_frags_secret_interval_unused; | ||
743 | static struct ctl_table ip4_frags_ctl_table[] = { | 728 | static struct ctl_table ip4_frags_ctl_table[] = { |
744 | { | 729 | { |
745 | .procname = "ipfrag_secret_interval", | 730 | .procname = "ipfrag_secret_interval", |
746 | .data = &ip4_frags.secret_interval, | 731 | .data = &ip4_frags_secret_interval_unused, |
747 | .maxlen = sizeof(int), | 732 | .maxlen = sizeof(int), |
748 | .mode = 0644, | 733 | .mode = 0644, |
749 | .proc_handler = proc_dointvec_jiffies, | 734 | .proc_handler = proc_dointvec_jiffies, |
@@ -771,7 +756,10 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net) | |||
771 | goto err_alloc; | 756 | goto err_alloc; |
772 | 757 | ||
773 | table[0].data = &net->ipv4.frags.high_thresh; | 758 | table[0].data = &net->ipv4.frags.high_thresh; |
759 | table[0].extra1 = &net->ipv4.frags.low_thresh; | ||
760 | table[0].extra2 = &init_net.ipv4.frags.high_thresh; | ||
774 | table[1].data = &net->ipv4.frags.low_thresh; | 761 | table[1].data = &net->ipv4.frags.low_thresh; |
762 | table[1].extra2 = &net->ipv4.frags.high_thresh; | ||
775 | table[2].data = &net->ipv4.frags.timeout; | 763 | table[2].data = &net->ipv4.frags.timeout; |
776 | 764 | ||
777 | /* Don't export sysctls to unprivileged users */ | 765 | /* Don't export sysctls to unprivileged users */ |
@@ -873,6 +861,7 @@ void __init ipfrag_init(void) | |||
873 | ip4_frags.qsize = sizeof(struct ipq); | 861 | ip4_frags.qsize = sizeof(struct ipq); |
874 | ip4_frags.match = ip4_frag_match; | 862 | ip4_frags.match = ip4_frag_match; |
875 | ip4_frags.frag_expire = ip_expire; | 863 | ip4_frags.frag_expire = ip_expire; |
876 | ip4_frags.secret_interval = 10 * 60 * HZ; | 864 | ip4_frags.frags_cache_name = ip_frag_cache_name; |
877 | inet_frags_init(&ip4_frags); | 865 | if (inet_frags_init(&ip4_frags)) |
866 | panic("IP: failed to allocate ip4_frags cache\n"); | ||
878 | } | 867 | } |