diff options
Diffstat (limited to 'net/ipv4/ip_fragment.c')
| -rw-r--r-- | net/ipv4/ip_fragment.c | 87 |
1 files changed, 38 insertions, 49 deletions
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index ed32313e307c..15f0e2bad7ad 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
| @@ -55,6 +55,7 @@ | |||
| 55 | */ | 55 | */ |
| 56 | 56 | ||
| 57 | static int sysctl_ipfrag_max_dist __read_mostly = 64; | 57 | static int sysctl_ipfrag_max_dist __read_mostly = 64; |
| 58 | static const char ip_frag_cache_name[] = "ip4-frags"; | ||
| 58 | 59 | ||
| 59 | struct ipfrag_skb_cb | 60 | struct ipfrag_skb_cb |
| 60 | { | 61 | { |
| @@ -86,11 +87,6 @@ static inline u8 ip4_frag_ecn(u8 tos) | |||
| 86 | 87 | ||
| 87 | static struct inet_frags ip4_frags; | 88 | static struct inet_frags ip4_frags; |
| 88 | 89 | ||
| 89 | int ip_frag_nqueues(struct net *net) | ||
| 90 | { | ||
| 91 | return net->ipv4.frags.nqueues; | ||
| 92 | } | ||
| 93 | |||
| 94 | int ip_frag_mem(struct net *net) | 90 | int ip_frag_mem(struct net *net) |
| 95 | { | 91 | { |
| 96 | return sum_frag_mem_limit(&net->ipv4.frags); | 92 | return sum_frag_mem_limit(&net->ipv4.frags); |
| @@ -109,21 +105,21 @@ static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) | |||
| 109 | net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd)); | 105 | net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd)); |
| 110 | return jhash_3words((__force u32)id << 16 | prot, | 106 | return jhash_3words((__force u32)id << 16 | prot, |
| 111 | (__force u32)saddr, (__force u32)daddr, | 107 | (__force u32)saddr, (__force u32)daddr, |
| 112 | ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1); | 108 | ip4_frags.rnd); |
| 113 | } | 109 | } |
| 114 | 110 | ||
| 115 | static unsigned int ip4_hashfn(struct inet_frag_queue *q) | 111 | static unsigned int ip4_hashfn(const struct inet_frag_queue *q) |
| 116 | { | 112 | { |
| 117 | struct ipq *ipq; | 113 | const struct ipq *ipq; |
| 118 | 114 | ||
| 119 | ipq = container_of(q, struct ipq, q); | 115 | ipq = container_of(q, struct ipq, q); |
| 120 | return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); | 116 | return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); |
| 121 | } | 117 | } |
| 122 | 118 | ||
| 123 | static bool ip4_frag_match(struct inet_frag_queue *q, void *a) | 119 | static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a) |
| 124 | { | 120 | { |
| 125 | struct ipq *qp; | 121 | const struct ipq *qp; |
| 126 | struct ip4_create_arg *arg = a; | 122 | const struct ip4_create_arg *arg = a; |
| 127 | 123 | ||
| 128 | qp = container_of(q, struct ipq, q); | 124 | qp = container_of(q, struct ipq, q); |
| 129 | return qp->id == arg->iph->id && | 125 | return qp->id == arg->iph->id && |
| @@ -133,14 +129,14 @@ static bool ip4_frag_match(struct inet_frag_queue *q, void *a) | |||
| 133 | qp->user == arg->user; | 129 | qp->user == arg->user; |
| 134 | } | 130 | } |
| 135 | 131 | ||
| 136 | static void ip4_frag_init(struct inet_frag_queue *q, void *a) | 132 | static void ip4_frag_init(struct inet_frag_queue *q, const void *a) |
| 137 | { | 133 | { |
| 138 | struct ipq *qp = container_of(q, struct ipq, q); | 134 | struct ipq *qp = container_of(q, struct ipq, q); |
| 139 | struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4, | 135 | struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4, |
| 140 | frags); | 136 | frags); |
| 141 | struct net *net = container_of(ipv4, struct net, ipv4); | 137 | struct net *net = container_of(ipv4, struct net, ipv4); |
| 142 | 138 | ||
| 143 | struct ip4_create_arg *arg = a; | 139 | const struct ip4_create_arg *arg = a; |
| 144 | 140 | ||
| 145 | qp->protocol = arg->iph->protocol; | 141 | qp->protocol = arg->iph->protocol; |
| 146 | qp->id = arg->iph->id; | 142 | qp->id = arg->iph->id; |
| @@ -177,18 +173,6 @@ static void ipq_kill(struct ipq *ipq) | |||
| 177 | inet_frag_kill(&ipq->q, &ip4_frags); | 173 | inet_frag_kill(&ipq->q, &ip4_frags); |
| 178 | } | 174 | } |
| 179 | 175 | ||
| 180 | /* Memory limiting on fragments. Evictor trashes the oldest | ||
| 181 | * fragment queue until we are back under the threshold. | ||
| 182 | */ | ||
| 183 | static void ip_evictor(struct net *net) | ||
| 184 | { | ||
| 185 | int evicted; | ||
| 186 | |||
| 187 | evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags, false); | ||
| 188 | if (evicted) | ||
| 189 | IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted); | ||
| 190 | } | ||
| 191 | |||
| 192 | /* | 176 | /* |
| 193 | * Oops, a fragment queue timed out. Kill it and send an ICMP reply. | 177 | * Oops, a fragment queue timed out. Kill it and send an ICMP reply. |
| 194 | */ | 178 | */ |
| @@ -202,19 +186,22 @@ static void ip_expire(unsigned long arg) | |||
| 202 | 186 | ||
| 203 | spin_lock(&qp->q.lock); | 187 | spin_lock(&qp->q.lock); |
| 204 | 188 | ||
| 205 | if (qp->q.last_in & INET_FRAG_COMPLETE) | 189 | if (qp->q.flags & INET_FRAG_COMPLETE) |
| 206 | goto out; | 190 | goto out; |
| 207 | 191 | ||
| 208 | ipq_kill(qp); | 192 | ipq_kill(qp); |
| 209 | |||
| 210 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); | ||
| 211 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); | 193 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); |
| 212 | 194 | ||
| 213 | if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { | 195 | if (!(qp->q.flags & INET_FRAG_EVICTED)) { |
| 214 | struct sk_buff *head = qp->q.fragments; | 196 | struct sk_buff *head = qp->q.fragments; |
| 215 | const struct iphdr *iph; | 197 | const struct iphdr *iph; |
| 216 | int err; | 198 | int err; |
| 217 | 199 | ||
| 200 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); | ||
| 201 | |||
| 202 | if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments) | ||
| 203 | goto out; | ||
| 204 | |||
| 218 | rcu_read_lock(); | 205 | rcu_read_lock(); |
| 219 | head->dev = dev_get_by_index_rcu(net, qp->iif); | 206 | head->dev = dev_get_by_index_rcu(net, qp->iif); |
| 220 | if (!head->dev) | 207 | if (!head->dev) |
| @@ -227,8 +214,7 @@ static void ip_expire(unsigned long arg) | |||
| 227 | if (err) | 214 | if (err) |
| 228 | goto out_rcu_unlock; | 215 | goto out_rcu_unlock; |
| 229 | 216 | ||
| 230 | /* | 217 | /* Only an end host needs to send an ICMP |
| 231 | * Only an end host needs to send an ICMP | ||
| 232 | * "Fragment Reassembly Timeout" message, per RFC792. | 218 | * "Fragment Reassembly Timeout" message, per RFC792. |
| 233 | */ | 219 | */ |
| 234 | if (qp->user == IP_DEFRAG_AF_PACKET || | 220 | if (qp->user == IP_DEFRAG_AF_PACKET || |
| @@ -237,7 +223,6 @@ static void ip_expire(unsigned long arg) | |||
| 237 | (skb_rtable(head)->rt_type != RTN_LOCAL))) | 223 | (skb_rtable(head)->rt_type != RTN_LOCAL))) |
| 238 | goto out_rcu_unlock; | 224 | goto out_rcu_unlock; |
| 239 | 225 | ||
| 240 | |||
| 241 | /* Send an ICMP "Fragment Reassembly Timeout" message. */ | 226 | /* Send an ICMP "Fragment Reassembly Timeout" message. */ |
| 242 | icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); | 227 | icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); |
| 243 | out_rcu_unlock: | 228 | out_rcu_unlock: |
| @@ -260,7 +245,6 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) | |||
| 260 | arg.iph = iph; | 245 | arg.iph = iph; |
| 261 | arg.user = user; | 246 | arg.user = user; |
| 262 | 247 | ||
| 263 | read_lock(&ip4_frags.lock); | ||
| 264 | hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); | 248 | hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); |
| 265 | 249 | ||
| 266 | q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); | 250 | q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); |
| @@ -319,7 +303,7 @@ static int ip_frag_reinit(struct ipq *qp) | |||
| 319 | } while (fp); | 303 | } while (fp); |
| 320 | sub_frag_mem_limit(&qp->q, sum_truesize); | 304 | sub_frag_mem_limit(&qp->q, sum_truesize); |
| 321 | 305 | ||
| 322 | qp->q.last_in = 0; | 306 | qp->q.flags = 0; |
| 323 | qp->q.len = 0; | 307 | qp->q.len = 0; |
| 324 | qp->q.meat = 0; | 308 | qp->q.meat = 0; |
| 325 | qp->q.fragments = NULL; | 309 | qp->q.fragments = NULL; |
| @@ -340,7 +324,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
| 340 | int err = -ENOENT; | 324 | int err = -ENOENT; |
| 341 | u8 ecn; | 325 | u8 ecn; |
| 342 | 326 | ||
| 343 | if (qp->q.last_in & INET_FRAG_COMPLETE) | 327 | if (qp->q.flags & INET_FRAG_COMPLETE) |
| 344 | goto err; | 328 | goto err; |
| 345 | 329 | ||
| 346 | if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && | 330 | if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && |
| @@ -367,9 +351,9 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
| 367 | * or have different end, the segment is corrupted. | 351 | * or have different end, the segment is corrupted. |
| 368 | */ | 352 | */ |
| 369 | if (end < qp->q.len || | 353 | if (end < qp->q.len || |
| 370 | ((qp->q.last_in & INET_FRAG_LAST_IN) && end != qp->q.len)) | 354 | ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len)) |
| 371 | goto err; | 355 | goto err; |
| 372 | qp->q.last_in |= INET_FRAG_LAST_IN; | 356 | qp->q.flags |= INET_FRAG_LAST_IN; |
| 373 | qp->q.len = end; | 357 | qp->q.len = end; |
| 374 | } else { | 358 | } else { |
| 375 | if (end&7) { | 359 | if (end&7) { |
| @@ -379,7 +363,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
| 379 | } | 363 | } |
| 380 | if (end > qp->q.len) { | 364 | if (end > qp->q.len) { |
| 381 | /* Some bits beyond end -> corruption. */ | 365 | /* Some bits beyond end -> corruption. */ |
| 382 | if (qp->q.last_in & INET_FRAG_LAST_IN) | 366 | if (qp->q.flags & INET_FRAG_LAST_IN) |
| 383 | goto err; | 367 | goto err; |
| 384 | qp->q.len = end; | 368 | qp->q.len = end; |
| 385 | } | 369 | } |
| @@ -488,13 +472,13 @@ found: | |||
| 488 | qp->ecn |= ecn; | 472 | qp->ecn |= ecn; |
| 489 | add_frag_mem_limit(&qp->q, skb->truesize); | 473 | add_frag_mem_limit(&qp->q, skb->truesize); |
| 490 | if (offset == 0) | 474 | if (offset == 0) |
| 491 | qp->q.last_in |= INET_FRAG_FIRST_IN; | 475 | qp->q.flags |= INET_FRAG_FIRST_IN; |
| 492 | 476 | ||
| 493 | if (ip_hdr(skb)->frag_off & htons(IP_DF) && | 477 | if (ip_hdr(skb)->frag_off & htons(IP_DF) && |
| 494 | skb->len + ihl > qp->q.max_size) | 478 | skb->len + ihl > qp->q.max_size) |
| 495 | qp->q.max_size = skb->len + ihl; | 479 | qp->q.max_size = skb->len + ihl; |
| 496 | 480 | ||
| 497 | if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && | 481 | if (qp->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && |
| 498 | qp->q.meat == qp->q.len) { | 482 | qp->q.meat == qp->q.len) { |
| 499 | unsigned long orefdst = skb->_skb_refdst; | 483 | unsigned long orefdst = skb->_skb_refdst; |
| 500 | 484 | ||
| @@ -505,7 +489,6 @@ found: | |||
| 505 | } | 489 | } |
| 506 | 490 | ||
| 507 | skb_dst_drop(skb); | 491 | skb_dst_drop(skb); |
| 508 | inet_frag_lru_move(&qp->q); | ||
| 509 | return -EINPROGRESS; | 492 | return -EINPROGRESS; |
| 510 | 493 | ||
| 511 | err: | 494 | err: |
| @@ -655,9 +638,6 @@ int ip_defrag(struct sk_buff *skb, u32 user) | |||
| 655 | net = skb->dev ? dev_net(skb->dev) : dev_net(skb_dst(skb)->dev); | 638 | net = skb->dev ? dev_net(skb->dev) : dev_net(skb_dst(skb)->dev); |
| 656 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); | 639 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); |
| 657 | 640 | ||
| 658 | /* Start by cleaning up the memory. */ | ||
| 659 | ip_evictor(net); | ||
| 660 | |||
| 661 | /* Lookup (or create) queue header */ | 641 | /* Lookup (or create) queue header */ |
| 662 | if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) { | 642 | if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) { |
| 663 | int ret; | 643 | int ret; |
| @@ -721,14 +701,17 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { | |||
| 721 | .data = &init_net.ipv4.frags.high_thresh, | 701 | .data = &init_net.ipv4.frags.high_thresh, |
| 722 | .maxlen = sizeof(int), | 702 | .maxlen = sizeof(int), |
| 723 | .mode = 0644, | 703 | .mode = 0644, |
| 724 | .proc_handler = proc_dointvec | 704 | .proc_handler = proc_dointvec_minmax, |
| 705 | .extra1 = &init_net.ipv4.frags.low_thresh | ||
| 725 | }, | 706 | }, |
| 726 | { | 707 | { |
| 727 | .procname = "ipfrag_low_thresh", | 708 | .procname = "ipfrag_low_thresh", |
| 728 | .data = &init_net.ipv4.frags.low_thresh, | 709 | .data = &init_net.ipv4.frags.low_thresh, |
| 729 | .maxlen = sizeof(int), | 710 | .maxlen = sizeof(int), |
| 730 | .mode = 0644, | 711 | .mode = 0644, |
| 731 | .proc_handler = proc_dointvec | 712 | .proc_handler = proc_dointvec_minmax, |
| 713 | .extra1 = &zero, | ||
| 714 | .extra2 = &init_net.ipv4.frags.high_thresh | ||
| 732 | }, | 715 | }, |
| 733 | { | 716 | { |
| 734 | .procname = "ipfrag_time", | 717 | .procname = "ipfrag_time", |
| @@ -740,10 +723,12 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { | |||
| 740 | { } | 723 | { } |
| 741 | }; | 724 | }; |
| 742 | 725 | ||
| 726 | /* secret interval has been deprecated */ | ||
| 727 | static int ip4_frags_secret_interval_unused; | ||
| 743 | static struct ctl_table ip4_frags_ctl_table[] = { | 728 | static struct ctl_table ip4_frags_ctl_table[] = { |
| 744 | { | 729 | { |
| 745 | .procname = "ipfrag_secret_interval", | 730 | .procname = "ipfrag_secret_interval", |
| 746 | .data = &ip4_frags.secret_interval, | 731 | .data = &ip4_frags_secret_interval_unused, |
| 747 | .maxlen = sizeof(int), | 732 | .maxlen = sizeof(int), |
| 748 | .mode = 0644, | 733 | .mode = 0644, |
| 749 | .proc_handler = proc_dointvec_jiffies, | 734 | .proc_handler = proc_dointvec_jiffies, |
| @@ -771,7 +756,10 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net) | |||
| 771 | goto err_alloc; | 756 | goto err_alloc; |
| 772 | 757 | ||
| 773 | table[0].data = &net->ipv4.frags.high_thresh; | 758 | table[0].data = &net->ipv4.frags.high_thresh; |
| 759 | table[0].extra1 = &net->ipv4.frags.low_thresh; | ||
| 760 | table[0].extra2 = &init_net.ipv4.frags.high_thresh; | ||
| 774 | table[1].data = &net->ipv4.frags.low_thresh; | 761 | table[1].data = &net->ipv4.frags.low_thresh; |
| 762 | table[1].extra2 = &net->ipv4.frags.high_thresh; | ||
| 775 | table[2].data = &net->ipv4.frags.timeout; | 763 | table[2].data = &net->ipv4.frags.timeout; |
| 776 | 764 | ||
| 777 | /* Don't export sysctls to unprivileged users */ | 765 | /* Don't export sysctls to unprivileged users */ |
| @@ -873,6 +861,7 @@ void __init ipfrag_init(void) | |||
| 873 | ip4_frags.qsize = sizeof(struct ipq); | 861 | ip4_frags.qsize = sizeof(struct ipq); |
| 874 | ip4_frags.match = ip4_frag_match; | 862 | ip4_frags.match = ip4_frag_match; |
| 875 | ip4_frags.frag_expire = ip_expire; | 863 | ip4_frags.frag_expire = ip_expire; |
| 876 | ip4_frags.secret_interval = 10 * 60 * HZ; | 864 | ip4_frags.frags_cache_name = ip_frag_cache_name; |
| 877 | inet_frags_init(&ip4_frags); | 865 | if (inet_frags_init(&ip4_frags)) |
| 866 | panic("IP: failed to allocate ip4_frags cache\n"); | ||
| 878 | } | 867 | } |
