diff options
author | Hannes Frederic Sowa <hannes@stressinduktion.org> | 2013-03-15 07:32:30 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-03-19 10:28:36 -0400 |
commit | 5a3da1fe9561828d0ca7eca664b16ec2b9bf0055 (patch) | |
tree | b7bbaabf1271af7d912e9bdbb7f5810d2d0a5d3e | |
parent | 271648b4c610eed540daaf9ff366209825757565 (diff) |
inet: limit length of fragment queue hash table bucket lists
This patch introduces a constant limit of the fragment queue hash
table bucket list lengths. Currently the limit 128 is choosen somewhat
arbitrary and just ensures that we can fill up the fragment cache with
empty packets up to the default ip_frag_high_thresh limits. It should
just protect from list iteration eating considerable amounts of cpu.
If we reach the maximum length in one hash bucket a warning is printed.
This is implemented on the caller side of inet_frag_find to distinguish
between the different users of inet_fragment.c.
I dropped the out of memory warning in the ipv4 fragment lookup path,
because we already get a warning by the slab allocator.
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Jesper Dangaard Brouer <jbrouer@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/inet_frag.h | 9 | ||||
-rw-r--r-- | net/ipv4/inet_fragment.c | 20 | ||||
-rw-r--r-- | net/ipv4/ip_fragment.c | 11 | ||||
-rw-r--r-- | net/ipv6/netfilter/nf_conntrack_reasm.c | 12 | ||||
-rw-r--r-- | net/ipv6/reassembly.c | 8 |
5 files changed, 44 insertions, 16 deletions
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 76c3fe5ecc2e..0a1dcc2fa2f5 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h | |||
@@ -43,6 +43,13 @@ struct inet_frag_queue { | |||
43 | 43 | ||
44 | #define INETFRAGS_HASHSZ 64 | 44 | #define INETFRAGS_HASHSZ 64 |
45 | 45 | ||
46 | /* averaged: | ||
47 | * max_depth = default ipfrag_high_thresh / INETFRAGS_HASHSZ / | ||
48 | * rounded up (SKB_TRUELEN(0) + sizeof(struct ipq or | ||
49 | * struct frag_queue)) | ||
50 | */ | ||
51 | #define INETFRAGS_MAXDEPTH 128 | ||
52 | |||
46 | struct inet_frags { | 53 | struct inet_frags { |
47 | struct hlist_head hash[INETFRAGS_HASHSZ]; | 54 | struct hlist_head hash[INETFRAGS_HASHSZ]; |
48 | /* This rwlock is a global lock (seperate per IPv4, IPv6 and | 55 | /* This rwlock is a global lock (seperate per IPv4, IPv6 and |
@@ -76,6 +83,8 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force); | |||
76 | struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, | 83 | struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, |
77 | struct inet_frags *f, void *key, unsigned int hash) | 84 | struct inet_frags *f, void *key, unsigned int hash) |
78 | __releases(&f->lock); | 85 | __releases(&f->lock); |
86 | void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, | ||
87 | const char *prefix); | ||
79 | 88 | ||
80 | static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) | 89 | static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) |
81 | { | 90 | { |
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 245ae078a07f..f4fd23de9b13 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/rtnetlink.h> | 21 | #include <linux/rtnetlink.h> |
22 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
23 | 23 | ||
24 | #include <net/sock.h> | ||
24 | #include <net/inet_frag.h> | 25 | #include <net/inet_frag.h> |
25 | 26 | ||
26 | static void inet_frag_secret_rebuild(unsigned long dummy) | 27 | static void inet_frag_secret_rebuild(unsigned long dummy) |
@@ -277,6 +278,7 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, | |||
277 | __releases(&f->lock) | 278 | __releases(&f->lock) |
278 | { | 279 | { |
279 | struct inet_frag_queue *q; | 280 | struct inet_frag_queue *q; |
281 | int depth = 0; | ||
280 | 282 | ||
281 | hlist_for_each_entry(q, &f->hash[hash], list) { | 283 | hlist_for_each_entry(q, &f->hash[hash], list) { |
282 | if (q->net == nf && f->match(q, key)) { | 284 | if (q->net == nf && f->match(q, key)) { |
@@ -284,9 +286,25 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, | |||
284 | read_unlock(&f->lock); | 286 | read_unlock(&f->lock); |
285 | return q; | 287 | return q; |
286 | } | 288 | } |
289 | depth++; | ||
287 | } | 290 | } |
288 | read_unlock(&f->lock); | 291 | read_unlock(&f->lock); |
289 | 292 | ||
290 | return inet_frag_create(nf, f, key); | 293 | if (depth <= INETFRAGS_MAXDEPTH) |
294 | return inet_frag_create(nf, f, key); | ||
295 | else | ||
296 | return ERR_PTR(-ENOBUFS); | ||
291 | } | 297 | } |
292 | EXPORT_SYMBOL(inet_frag_find); | 298 | EXPORT_SYMBOL(inet_frag_find); |
299 | |||
300 | void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, | ||
301 | const char *prefix) | ||
302 | { | ||
303 | static const char msg[] = "inet_frag_find: Fragment hash bucket" | ||
304 | " list length grew over limit " __stringify(INETFRAGS_MAXDEPTH) | ||
305 | ". Dropping fragment.\n"; | ||
306 | |||
307 | if (PTR_ERR(q) == -ENOBUFS) | ||
308 | LIMIT_NETDEBUG(KERN_WARNING "%s%s", prefix, msg); | ||
309 | } | ||
310 | EXPORT_SYMBOL(inet_frag_maybe_warn_overflow); | ||
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b6d30acb600c..a6445b843ef4 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -292,14 +292,11 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) | |||
292 | hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); | 292 | hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); |
293 | 293 | ||
294 | q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); | 294 | q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); |
295 | if (q == NULL) | 295 | if (IS_ERR_OR_NULL(q)) { |
296 | goto out_nomem; | 296 | inet_frag_maybe_warn_overflow(q, pr_fmt()); |
297 | 297 | return NULL; | |
298 | } | ||
298 | return container_of(q, struct ipq, q); | 299 | return container_of(q, struct ipq, q); |
299 | |||
300 | out_nomem: | ||
301 | LIMIT_NETDEBUG(KERN_ERR pr_fmt("ip_frag_create: no memory left !\n")); | ||
302 | return NULL; | ||
303 | } | 300 | } |
304 | 301 | ||
305 | /* Is the fragment too far ahead to be part of ipq? */ | 302 | /* Is the fragment too far ahead to be part of ipq? */ |
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 54087e96d7b8..6700069949dd 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c | |||
@@ -14,6 +14,8 @@ | |||
14 | * 2 of the License, or (at your option) any later version. | 14 | * 2 of the License, or (at your option) any later version. |
15 | */ | 15 | */ |
16 | 16 | ||
17 | #define pr_fmt(fmt) "IPv6-nf: " fmt | ||
18 | |||
17 | #include <linux/errno.h> | 19 | #include <linux/errno.h> |
18 | #include <linux/types.h> | 20 | #include <linux/types.h> |
19 | #include <linux/string.h> | 21 | #include <linux/string.h> |
@@ -180,13 +182,11 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id, | |||
180 | 182 | ||
181 | q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash); | 183 | q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash); |
182 | local_bh_enable(); | 184 | local_bh_enable(); |
183 | if (q == NULL) | 185 | if (IS_ERR_OR_NULL(q)) { |
184 | goto oom; | 186 | inet_frag_maybe_warn_overflow(q, pr_fmt()); |
185 | 187 | return NULL; | |
188 | } | ||
186 | return container_of(q, struct frag_queue, q); | 189 | return container_of(q, struct frag_queue, q); |
187 | |||
188 | oom: | ||
189 | return NULL; | ||
190 | } | 190 | } |
191 | 191 | ||
192 | 192 | ||
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 3c6a77290c6e..196ab9347ad1 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c | |||
@@ -26,6 +26,9 @@ | |||
26 | * YOSHIFUJI,H. @USAGI Always remove fragment header to | 26 | * YOSHIFUJI,H. @USAGI Always remove fragment header to |
27 | * calculate ICV correctly. | 27 | * calculate ICV correctly. |
28 | */ | 28 | */ |
29 | |||
30 | #define pr_fmt(fmt) "IPv6: " fmt | ||
31 | |||
29 | #include <linux/errno.h> | 32 | #include <linux/errno.h> |
30 | #include <linux/types.h> | 33 | #include <linux/types.h> |
31 | #include <linux/string.h> | 34 | #include <linux/string.h> |
@@ -185,9 +188,10 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6 | |||
185 | hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd); | 188 | hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd); |
186 | 189 | ||
187 | q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); | 190 | q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); |
188 | if (q == NULL) | 191 | if (IS_ERR_OR_NULL(q)) { |
192 | inet_frag_maybe_warn_overflow(q, pr_fmt()); | ||
189 | return NULL; | 193 | return NULL; |
190 | 194 | } | |
191 | return container_of(q, struct frag_queue, q); | 195 | return container_of(q, struct frag_queue, q); |
192 | } | 196 | } |
193 | 197 | ||