summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorJiri Wiesner <jwiesner@suse.com>2018-12-05 10:55:29 -0500
committerDavid S. Miller <davem@davemloft.net>2018-12-05 23:44:46 -0500
commitebaf39e6032faf77218220707fc3fa22487784e0 (patch)
tree1ff6b562ec031ef925841174a9ed9a41b4e2fef0 /net
parentafd0a8006e98b1890908f81746c94ca5dae29d7c (diff)
ipv4: ipv6: netfilter: Adjust the frag mem limit when truesize changes
The *_frag_reasm() functions are susceptible to miscalculating the byte count of packet fragments in case the truesize of a head buffer changes. The truesize member may be changed by the call to skb_unclone(), leaving the fragment memory limit counter unbalanced even if all fragments are processed. This miscalculation goes unnoticed as long as the network namespace which holds the counter is not destroyed. Should an attempt be made to destroy a network namespace that holds an unbalanced fragment memory limit counter the cleanup of the namespace never finishes. The thread handling the cleanup gets stuck in inet_frags_exit_net() waiting for the percpu counter to reach zero. The thread is usually in running state with a stacktrace similar to: PID: 1073 TASK: ffff880626711440 CPU: 1 COMMAND: "kworker/u48:4" #5 [ffff880621563d48] _raw_spin_lock at ffffffff815f5480 #6 [ffff880621563d48] inet_evict_bucket at ffffffff8158020b #7 [ffff880621563d80] inet_frags_exit_net at ffffffff8158051c #8 [ffff880621563db0] ops_exit_list at ffffffff814f5856 #9 [ffff880621563dd8] cleanup_net at ffffffff814f67c0 #10 [ffff880621563e38] process_one_work at ffffffff81096f14 It is not possible to create new network namespaces, and processes that call unshare() end up being stuck in uninterruptible sleep state waiting to acquire the net_mutex. The bug was observed in the IPv6 netfilter code by Per Sundstrom. I thank him for his analysis of the problem. The parts of this patch that apply to IPv4 and IPv6 fragment reassembly are preemptive measures. Signed-off-by: Jiri Wiesner <jwiesner@suse.com> Reported-by: Per Sundstrom <per.sundstrom@redqube.se> Acked-by: Peter Oskolkov <posk@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/ip_fragment.c7
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c8
-rw-r--r--net/ipv6/reassembly.c8
3 files changed, 21 insertions, 2 deletions
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index d6ee343fdb86..aa0b22697998 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -515,6 +515,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
515 struct rb_node *rbn; 515 struct rb_node *rbn;
516 int len; 516 int len;
517 int ihlen; 517 int ihlen;
518 int delta;
518 int err; 519 int err;
519 u8 ecn; 520 u8 ecn;
520 521
@@ -556,10 +557,16 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
556 if (len > 65535) 557 if (len > 65535)
557 goto out_oversize; 558 goto out_oversize;
558 559
560 delta = - head->truesize;
561
559 /* Head of list must not be cloned. */ 562 /* Head of list must not be cloned. */
560 if (skb_unclone(head, GFP_ATOMIC)) 563 if (skb_unclone(head, GFP_ATOMIC))
561 goto out_nomem; 564 goto out_nomem;
562 565
566 delta += head->truesize;
567 if (delta)
568 add_frag_mem_limit(qp->q.net, delta);
569
563 /* If the first fragment is fragmented itself, we split 570 /* If the first fragment is fragmented itself, we split
564 * it to two chunks: the first with data and paged part 571 * it to two chunks: the first with data and paged part
565 * and the second, holding only fragments. */ 572 * and the second, holding only fragments. */
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index d219979c3e52..181da2c40f9a 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -341,7 +341,7 @@ static bool
341nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) 341nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev)
342{ 342{
343 struct sk_buff *fp, *head = fq->q.fragments; 343 struct sk_buff *fp, *head = fq->q.fragments;
344 int payload_len; 344 int payload_len, delta;
345 u8 ecn; 345 u8 ecn;
346 346
347 inet_frag_kill(&fq->q); 347 inet_frag_kill(&fq->q);
@@ -363,10 +363,16 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic
363 return false; 363 return false;
364 } 364 }
365 365
366 delta = - head->truesize;
367
366 /* Head of list must not be cloned. */ 368 /* Head of list must not be cloned. */
367 if (skb_unclone(head, GFP_ATOMIC)) 369 if (skb_unclone(head, GFP_ATOMIC))
368 return false; 370 return false;
369 371
372 delta += head->truesize;
373 if (delta)
374 add_frag_mem_limit(fq->q.net, delta);
375
370 /* If the first fragment is fragmented itself, we split 376 /* If the first fragment is fragmented itself, we split
371 * it to two chunks: the first with data and paged part 377 * it to two chunks: the first with data and paged part
372 * and the second, holding only fragments. */ 378 * and the second, holding only fragments. */
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 5c3c92713096..aa26c45486d9 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -281,7 +281,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
281{ 281{
282 struct net *net = container_of(fq->q.net, struct net, ipv6.frags); 282 struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
283 struct sk_buff *fp, *head = fq->q.fragments; 283 struct sk_buff *fp, *head = fq->q.fragments;
284 int payload_len; 284 int payload_len, delta;
285 unsigned int nhoff; 285 unsigned int nhoff;
286 int sum_truesize; 286 int sum_truesize;
287 u8 ecn; 287 u8 ecn;
@@ -322,10 +322,16 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
322 if (payload_len > IPV6_MAXPLEN) 322 if (payload_len > IPV6_MAXPLEN)
323 goto out_oversize; 323 goto out_oversize;
324 324
325 delta = - head->truesize;
326
325 /* Head of list must not be cloned. */ 327 /* Head of list must not be cloned. */
326 if (skb_unclone(head, GFP_ATOMIC)) 328 if (skb_unclone(head, GFP_ATOMIC))
327 goto out_oom; 329 goto out_oom;
328 330
331 delta += head->truesize;
332 if (delta)
333 add_frag_mem_limit(fq->q.net, delta);
334
329 /* If the first fragment is fragmented itself, we split 335 /* If the first fragment is fragmented itself, we split
330 * it to two chunks: the first with data and paged part 336 * it to two chunks: the first with data and paged part
331 * and the second, holding only fragments. */ 337 * and the second, holding only fragments. */