aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/ip_fragment.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/ip_fragment.c')
-rw-r--r--net/ipv4/ip_fragment.c87
1 files changed, 38 insertions, 49 deletions
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index ed32313e307c..15f0e2bad7ad 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -55,6 +55,7 @@
55 */ 55 */
56 56
57static int sysctl_ipfrag_max_dist __read_mostly = 64; 57static int sysctl_ipfrag_max_dist __read_mostly = 64;
58static const char ip_frag_cache_name[] = "ip4-frags";
58 59
59struct ipfrag_skb_cb 60struct ipfrag_skb_cb
60{ 61{
@@ -86,11 +87,6 @@ static inline u8 ip4_frag_ecn(u8 tos)
86 87
87static struct inet_frags ip4_frags; 88static struct inet_frags ip4_frags;
88 89
89int ip_frag_nqueues(struct net *net)
90{
91 return net->ipv4.frags.nqueues;
92}
93
94int ip_frag_mem(struct net *net) 90int ip_frag_mem(struct net *net)
95{ 91{
96 return sum_frag_mem_limit(&net->ipv4.frags); 92 return sum_frag_mem_limit(&net->ipv4.frags);
@@ -109,21 +105,21 @@ static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
109 net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd)); 105 net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd));
110 return jhash_3words((__force u32)id << 16 | prot, 106 return jhash_3words((__force u32)id << 16 | prot,
111 (__force u32)saddr, (__force u32)daddr, 107 (__force u32)saddr, (__force u32)daddr,
112 ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1); 108 ip4_frags.rnd);
113} 109}
114 110
115static unsigned int ip4_hashfn(struct inet_frag_queue *q) 111static unsigned int ip4_hashfn(const struct inet_frag_queue *q)
116{ 112{
117 struct ipq *ipq; 113 const struct ipq *ipq;
118 114
119 ipq = container_of(q, struct ipq, q); 115 ipq = container_of(q, struct ipq, q);
120 return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); 116 return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
121} 117}
122 118
123static bool ip4_frag_match(struct inet_frag_queue *q, void *a) 119static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a)
124{ 120{
125 struct ipq *qp; 121 const struct ipq *qp;
126 struct ip4_create_arg *arg = a; 122 const struct ip4_create_arg *arg = a;
127 123
128 qp = container_of(q, struct ipq, q); 124 qp = container_of(q, struct ipq, q);
129 return qp->id == arg->iph->id && 125 return qp->id == arg->iph->id &&
@@ -133,14 +129,14 @@ static bool ip4_frag_match(struct inet_frag_queue *q, void *a)
133 qp->user == arg->user; 129 qp->user == arg->user;
134} 130}
135 131
136static void ip4_frag_init(struct inet_frag_queue *q, void *a) 132static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
137{ 133{
138 struct ipq *qp = container_of(q, struct ipq, q); 134 struct ipq *qp = container_of(q, struct ipq, q);
139 struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4, 135 struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4,
140 frags); 136 frags);
141 struct net *net = container_of(ipv4, struct net, ipv4); 137 struct net *net = container_of(ipv4, struct net, ipv4);
142 138
143 struct ip4_create_arg *arg = a; 139 const struct ip4_create_arg *arg = a;
144 140
145 qp->protocol = arg->iph->protocol; 141 qp->protocol = arg->iph->protocol;
146 qp->id = arg->iph->id; 142 qp->id = arg->iph->id;
@@ -177,18 +173,6 @@ static void ipq_kill(struct ipq *ipq)
177 inet_frag_kill(&ipq->q, &ip4_frags); 173 inet_frag_kill(&ipq->q, &ip4_frags);
178} 174}
179 175
180/* Memory limiting on fragments. Evictor trashes the oldest
181 * fragment queue until we are back under the threshold.
182 */
183static void ip_evictor(struct net *net)
184{
185 int evicted;
186
187 evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags, false);
188 if (evicted)
189 IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted);
190}
191
192/* 176/*
193 * Oops, a fragment queue timed out. Kill it and send an ICMP reply. 177 * Oops, a fragment queue timed out. Kill it and send an ICMP reply.
194 */ 178 */
@@ -202,19 +186,22 @@ static void ip_expire(unsigned long arg)
202 186
203 spin_lock(&qp->q.lock); 187 spin_lock(&qp->q.lock);
204 188
205 if (qp->q.last_in & INET_FRAG_COMPLETE) 189 if (qp->q.flags & INET_FRAG_COMPLETE)
206 goto out; 190 goto out;
207 191
208 ipq_kill(qp); 192 ipq_kill(qp);
209
210 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
211 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); 193 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
212 194
213 if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { 195 if (!(qp->q.flags & INET_FRAG_EVICTED)) {
214 struct sk_buff *head = qp->q.fragments; 196 struct sk_buff *head = qp->q.fragments;
215 const struct iphdr *iph; 197 const struct iphdr *iph;
216 int err; 198 int err;
217 199
200 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
201
202 if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
203 goto out;
204
218 rcu_read_lock(); 205 rcu_read_lock();
219 head->dev = dev_get_by_index_rcu(net, qp->iif); 206 head->dev = dev_get_by_index_rcu(net, qp->iif);
220 if (!head->dev) 207 if (!head->dev)
@@ -227,8 +214,7 @@ static void ip_expire(unsigned long arg)
227 if (err) 214 if (err)
228 goto out_rcu_unlock; 215 goto out_rcu_unlock;
229 216
230 /* 217 /* Only an end host needs to send an ICMP
231 * Only an end host needs to send an ICMP
232 * "Fragment Reassembly Timeout" message, per RFC792. 218 * "Fragment Reassembly Timeout" message, per RFC792.
233 */ 219 */
234 if (qp->user == IP_DEFRAG_AF_PACKET || 220 if (qp->user == IP_DEFRAG_AF_PACKET ||
@@ -237,7 +223,6 @@ static void ip_expire(unsigned long arg)
237 (skb_rtable(head)->rt_type != RTN_LOCAL))) 223 (skb_rtable(head)->rt_type != RTN_LOCAL)))
238 goto out_rcu_unlock; 224 goto out_rcu_unlock;
239 225
240
241 /* Send an ICMP "Fragment Reassembly Timeout" message. */ 226 /* Send an ICMP "Fragment Reassembly Timeout" message. */
242 icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); 227 icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
243out_rcu_unlock: 228out_rcu_unlock:
@@ -260,7 +245,6 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
260 arg.iph = iph; 245 arg.iph = iph;
261 arg.user = user; 246 arg.user = user;
262 247
263 read_lock(&ip4_frags.lock);
264 hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); 248 hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
265 249
266 q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); 250 q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
@@ -319,7 +303,7 @@ static int ip_frag_reinit(struct ipq *qp)
319 } while (fp); 303 } while (fp);
320 sub_frag_mem_limit(&qp->q, sum_truesize); 304 sub_frag_mem_limit(&qp->q, sum_truesize);
321 305
322 qp->q.last_in = 0; 306 qp->q.flags = 0;
323 qp->q.len = 0; 307 qp->q.len = 0;
324 qp->q.meat = 0; 308 qp->q.meat = 0;
325 qp->q.fragments = NULL; 309 qp->q.fragments = NULL;
@@ -340,7 +324,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
340 int err = -ENOENT; 324 int err = -ENOENT;
341 u8 ecn; 325 u8 ecn;
342 326
343 if (qp->q.last_in & INET_FRAG_COMPLETE) 327 if (qp->q.flags & INET_FRAG_COMPLETE)
344 goto err; 328 goto err;
345 329
346 if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && 330 if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
@@ -367,9 +351,9 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
367 * or have different end, the segment is corrupted. 351 * or have different end, the segment is corrupted.
368 */ 352 */
369 if (end < qp->q.len || 353 if (end < qp->q.len ||
370 ((qp->q.last_in & INET_FRAG_LAST_IN) && end != qp->q.len)) 354 ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len))
371 goto err; 355 goto err;
372 qp->q.last_in |= INET_FRAG_LAST_IN; 356 qp->q.flags |= INET_FRAG_LAST_IN;
373 qp->q.len = end; 357 qp->q.len = end;
374 } else { 358 } else {
375 if (end&7) { 359 if (end&7) {
@@ -379,7 +363,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
379 } 363 }
380 if (end > qp->q.len) { 364 if (end > qp->q.len) {
381 /* Some bits beyond end -> corruption. */ 365 /* Some bits beyond end -> corruption. */
382 if (qp->q.last_in & INET_FRAG_LAST_IN) 366 if (qp->q.flags & INET_FRAG_LAST_IN)
383 goto err; 367 goto err;
384 qp->q.len = end; 368 qp->q.len = end;
385 } 369 }
@@ -488,13 +472,13 @@ found:
488 qp->ecn |= ecn; 472 qp->ecn |= ecn;
489 add_frag_mem_limit(&qp->q, skb->truesize); 473 add_frag_mem_limit(&qp->q, skb->truesize);
490 if (offset == 0) 474 if (offset == 0)
491 qp->q.last_in |= INET_FRAG_FIRST_IN; 475 qp->q.flags |= INET_FRAG_FIRST_IN;
492 476
493 if (ip_hdr(skb)->frag_off & htons(IP_DF) && 477 if (ip_hdr(skb)->frag_off & htons(IP_DF) &&
494 skb->len + ihl > qp->q.max_size) 478 skb->len + ihl > qp->q.max_size)
495 qp->q.max_size = skb->len + ihl; 479 qp->q.max_size = skb->len + ihl;
496 480
497 if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && 481 if (qp->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
498 qp->q.meat == qp->q.len) { 482 qp->q.meat == qp->q.len) {
499 unsigned long orefdst = skb->_skb_refdst; 483 unsigned long orefdst = skb->_skb_refdst;
500 484
@@ -505,7 +489,6 @@ found:
505 } 489 }
506 490
507 skb_dst_drop(skb); 491 skb_dst_drop(skb);
508 inet_frag_lru_move(&qp->q);
509 return -EINPROGRESS; 492 return -EINPROGRESS;
510 493
511err: 494err:
@@ -655,9 +638,6 @@ int ip_defrag(struct sk_buff *skb, u32 user)
655 net = skb->dev ? dev_net(skb->dev) : dev_net(skb_dst(skb)->dev); 638 net = skb->dev ? dev_net(skb->dev) : dev_net(skb_dst(skb)->dev);
656 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); 639 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
657 640
658 /* Start by cleaning up the memory. */
659 ip_evictor(net);
660
661 /* Lookup (or create) queue header */ 641 /* Lookup (or create) queue header */
662 if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) { 642 if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) {
663 int ret; 643 int ret;
@@ -721,14 +701,17 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
721 .data = &init_net.ipv4.frags.high_thresh, 701 .data = &init_net.ipv4.frags.high_thresh,
722 .maxlen = sizeof(int), 702 .maxlen = sizeof(int),
723 .mode = 0644, 703 .mode = 0644,
724 .proc_handler = proc_dointvec 704 .proc_handler = proc_dointvec_minmax,
705 .extra1 = &init_net.ipv4.frags.low_thresh
725 }, 706 },
726 { 707 {
727 .procname = "ipfrag_low_thresh", 708 .procname = "ipfrag_low_thresh",
728 .data = &init_net.ipv4.frags.low_thresh, 709 .data = &init_net.ipv4.frags.low_thresh,
729 .maxlen = sizeof(int), 710 .maxlen = sizeof(int),
730 .mode = 0644, 711 .mode = 0644,
731 .proc_handler = proc_dointvec 712 .proc_handler = proc_dointvec_minmax,
713 .extra1 = &zero,
714 .extra2 = &init_net.ipv4.frags.high_thresh
732 }, 715 },
733 { 716 {
734 .procname = "ipfrag_time", 717 .procname = "ipfrag_time",
@@ -740,10 +723,12 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
740 { } 723 { }
741}; 724};
742 725
726/* secret interval has been deprecated */
727static int ip4_frags_secret_interval_unused;
743static struct ctl_table ip4_frags_ctl_table[] = { 728static struct ctl_table ip4_frags_ctl_table[] = {
744 { 729 {
745 .procname = "ipfrag_secret_interval", 730 .procname = "ipfrag_secret_interval",
746 .data = &ip4_frags.secret_interval, 731 .data = &ip4_frags_secret_interval_unused,
747 .maxlen = sizeof(int), 732 .maxlen = sizeof(int),
748 .mode = 0644, 733 .mode = 0644,
749 .proc_handler = proc_dointvec_jiffies, 734 .proc_handler = proc_dointvec_jiffies,
@@ -771,7 +756,10 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net)
771 goto err_alloc; 756 goto err_alloc;
772 757
773 table[0].data = &net->ipv4.frags.high_thresh; 758 table[0].data = &net->ipv4.frags.high_thresh;
759 table[0].extra1 = &net->ipv4.frags.low_thresh;
760 table[0].extra2 = &init_net.ipv4.frags.high_thresh;
774 table[1].data = &net->ipv4.frags.low_thresh; 761 table[1].data = &net->ipv4.frags.low_thresh;
762 table[1].extra2 = &net->ipv4.frags.high_thresh;
775 table[2].data = &net->ipv4.frags.timeout; 763 table[2].data = &net->ipv4.frags.timeout;
776 764
777 /* Don't export sysctls to unprivileged users */ 765 /* Don't export sysctls to unprivileged users */
@@ -873,6 +861,7 @@ void __init ipfrag_init(void)
873 ip4_frags.qsize = sizeof(struct ipq); 861 ip4_frags.qsize = sizeof(struct ipq);
874 ip4_frags.match = ip4_frag_match; 862 ip4_frags.match = ip4_frag_match;
875 ip4_frags.frag_expire = ip_expire; 863 ip4_frags.frag_expire = ip_expire;
876 ip4_frags.secret_interval = 10 * 60 * HZ; 864 ip4_frags.frags_cache_name = ip_frag_cache_name;
877 inet_frags_init(&ip4_frags); 865 if (inet_frags_init(&ip4_frags))
866 panic("IP: failed to allocate ip4_frags cache\n");
878} 867}