aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@plumgrid.com>2013-10-04 03:14:06 -0400
committerDavid S. Miller <davem@davemloft.net>2013-10-07 15:16:45 -0400
commitd45ed4a4e33ae103053c0a53d280014e7101bb5c (patch)
tree93e87b546199d28d844647649ea8dda7eaefe303
parent582442d6d5bc74c1e11a2515f9387d3d227278e2 (diff)
net: fix unsafe set_memory_rw from softirq
on x86 system with net.core.bpf_jit_enable = 1 sudo tcpdump -i eth1 'tcp port 22' causes the warning: [ 56.766097] Possible unsafe locking scenario: [ 56.766097] [ 56.780146] CPU0 [ 56.786807] ---- [ 56.793188] lock(&(&vb->lock)->rlock); [ 56.799593] <Interrupt> [ 56.805889] lock(&(&vb->lock)->rlock); [ 56.812266] [ 56.812266] *** DEADLOCK *** [ 56.812266] [ 56.830670] 1 lock held by ksoftirqd/1/13: [ 56.836838] #0: (rcu_read_lock){.+.+..}, at: [<ffffffff8118f44c>] vm_unmap_aliases+0x8c/0x380 [ 56.849757] [ 56.849757] stack backtrace: [ 56.862194] CPU: 1 PID: 13 Comm: ksoftirqd/1 Not tainted 3.12.0-rc3+ #45 [ 56.868721] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012 [ 56.882004] ffffffff821944c0 ffff88080bbdb8c8 ffffffff8175a145 0000000000000007 [ 56.895630] ffff88080bbd5f40 ffff88080bbdb928 ffffffff81755b14 0000000000000001 [ 56.909313] ffff880800000001 ffff880800000000 ffffffff8101178f 0000000000000001 [ 56.923006] Call Trace: [ 56.929532] [<ffffffff8175a145>] dump_stack+0x55/0x76 [ 56.936067] [<ffffffff81755b14>] print_usage_bug+0x1f7/0x208 [ 56.942445] [<ffffffff8101178f>] ? save_stack_trace+0x2f/0x50 [ 56.948932] [<ffffffff810cc0a0>] ? check_usage_backwards+0x150/0x150 [ 56.955470] [<ffffffff810ccb52>] mark_lock+0x282/0x2c0 [ 56.961945] [<ffffffff810ccfed>] __lock_acquire+0x45d/0x1d50 [ 56.968474] [<ffffffff810cce6e>] ? __lock_acquire+0x2de/0x1d50 [ 56.975140] [<ffffffff81393bf5>] ? cpumask_next_and+0x55/0x90 [ 56.981942] [<ffffffff810cef72>] lock_acquire+0x92/0x1d0 [ 56.988745] [<ffffffff8118f52a>] ? vm_unmap_aliases+0x16a/0x380 [ 56.995619] [<ffffffff817628f1>] _raw_spin_lock+0x41/0x50 [ 57.002493] [<ffffffff8118f52a>] ? vm_unmap_aliases+0x16a/0x380 [ 57.009447] [<ffffffff8118f52a>] vm_unmap_aliases+0x16a/0x380 [ 57.016477] [<ffffffff8118f44c>] ? vm_unmap_aliases+0x8c/0x380 [ 57.023607] [<ffffffff810436b0>] change_page_attr_set_clr+0xc0/0x460 [ 57.030818] [<ffffffff810cfb8d>] ? trace_hardirqs_on+0xd/0x10 [ 57.037896] [<ffffffff811a8330>] ? kmem_cache_free+0xb0/0x2b0 [ 57.044789] [<ffffffff811b59c3>] ? free_object_rcu+0x93/0xa0 [ 57.051720] [<ffffffff81043d9f>] set_memory_rw+0x2f/0x40 [ 57.058727] [<ffffffff8104e17c>] bpf_jit_free+0x2c/0x40 [ 57.065577] [<ffffffff81642cba>] sk_filter_release_rcu+0x1a/0x30 [ 57.072338] [<ffffffff811108e2>] rcu_process_callbacks+0x202/0x7c0 [ 57.078962] [<ffffffff81057f17>] __do_softirq+0xf7/0x3f0 [ 57.085373] [<ffffffff81058245>] run_ksoftirqd+0x35/0x70 cannot reuse jited filter memory, since it's readonly, so use original bpf insns memory to hold work_struct defer kfree of sk_filter until jit completed freeing tested on x86_64 and i386 Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/arm/net/bpf_jit_32.c1
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c1
-rw-r--r--arch/s390/net/bpf_jit_comp.c4
-rw-r--r--arch/sparc/net/bpf_jit_comp.c1
-rw-r--r--arch/x86/net/bpf_jit_comp.c18
-rw-r--r--include/linux/filter.h15
-rw-r--r--include/net/sock.h6
-rw-r--r--net/core/filter.c8
8 files changed, 36 insertions, 18 deletions
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index f50d223a0bd3..99b44e0e8d86 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -930,4 +930,5 @@ void bpf_jit_free(struct sk_filter *fp)
930{ 930{
931 if (fp->bpf_func != sk_run_filter) 931 if (fp->bpf_func != sk_run_filter)
932 module_free(NULL, fp->bpf_func); 932 module_free(NULL, fp->bpf_func);
933 kfree(fp);
933} 934}
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index bf56e33f8257..2345bdb4d917 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -691,4 +691,5 @@ void bpf_jit_free(struct sk_filter *fp)
691{ 691{
692 if (fp->bpf_func != sk_run_filter) 692 if (fp->bpf_func != sk_run_filter)
693 module_free(NULL, fp->bpf_func); 693 module_free(NULL, fp->bpf_func);
694 kfree(fp);
694} 695}
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 709239285869..a5df511e27a2 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -881,7 +881,9 @@ void bpf_jit_free(struct sk_filter *fp)
881 struct bpf_binary_header *header = (void *)addr; 881 struct bpf_binary_header *header = (void *)addr;
882 882
883 if (fp->bpf_func == sk_run_filter) 883 if (fp->bpf_func == sk_run_filter)
884 return; 884 goto free_filter;
885 set_memory_rw(addr, header->pages); 885 set_memory_rw(addr, header->pages);
886 module_free(NULL, header); 886 module_free(NULL, header);
887free_filter:
888 kfree(fp);
887} 889}
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index 9c7be59e6f5a..218b6b23c378 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -808,4 +808,5 @@ void bpf_jit_free(struct sk_filter *fp)
808{ 808{
809 if (fp->bpf_func != sk_run_filter) 809 if (fp->bpf_func != sk_run_filter)
810 module_free(NULL, fp->bpf_func); 810 module_free(NULL, fp->bpf_func);
811 kfree(fp);
811} 812}
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 79c216aa0e2b..516593e1ce33 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -772,13 +772,21 @@ out:
772 return; 772 return;
773} 773}
774 774
775static void bpf_jit_free_deferred(struct work_struct *work)
776{
777 struct sk_filter *fp = container_of(work, struct sk_filter, work);
778 unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
779 struct bpf_binary_header *header = (void *)addr;
780
781 set_memory_rw(addr, header->pages);
782 module_free(NULL, header);
783 kfree(fp);
784}
785
775void bpf_jit_free(struct sk_filter *fp) 786void bpf_jit_free(struct sk_filter *fp)
776{ 787{
777 if (fp->bpf_func != sk_run_filter) { 788 if (fp->bpf_func != sk_run_filter) {
778 unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; 789 INIT_WORK(&fp->work, bpf_jit_free_deferred);
779 struct bpf_binary_header *header = (void *)addr; 790 schedule_work(&fp->work);
780
781 set_memory_rw(addr, header->pages);
782 module_free(NULL, header);
783 } 791 }
784} 792}
diff --git a/include/linux/filter.h b/include/linux/filter.h
index a6ac84871d6d..ff4e40cd45b1 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -6,6 +6,7 @@
6 6
7#include <linux/atomic.h> 7#include <linux/atomic.h>
8#include <linux/compat.h> 8#include <linux/compat.h>
9#include <linux/workqueue.h>
9#include <uapi/linux/filter.h> 10#include <uapi/linux/filter.h>
10 11
11#ifdef CONFIG_COMPAT 12#ifdef CONFIG_COMPAT
@@ -25,15 +26,19 @@ struct sk_filter
25{ 26{
26 atomic_t refcnt; 27 atomic_t refcnt;
27 unsigned int len; /* Number of filter blocks */ 28 unsigned int len; /* Number of filter blocks */
29 struct rcu_head rcu;
28 unsigned int (*bpf_func)(const struct sk_buff *skb, 30 unsigned int (*bpf_func)(const struct sk_buff *skb,
29 const struct sock_filter *filter); 31 const struct sock_filter *filter);
30 struct rcu_head rcu; 32 union {
31 struct sock_filter insns[0]; 33 struct sock_filter insns[0];
34 struct work_struct work;
35 };
32}; 36};
33 37
34static inline unsigned int sk_filter_len(const struct sk_filter *fp) 38static inline unsigned int sk_filter_size(unsigned int proglen)
35{ 39{
36 return fp->len * sizeof(struct sock_filter) + sizeof(*fp); 40 return max(sizeof(struct sk_filter),
41 offsetof(struct sk_filter, insns[proglen]));
37} 42}
38 43
39extern int sk_filter(struct sock *sk, struct sk_buff *skb); 44extern int sk_filter(struct sock *sk, struct sk_buff *skb);
@@ -67,11 +72,13 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
67} 72}
68#define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns) 73#define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns)
69#else 74#else
75#include <linux/slab.h>
70static inline void bpf_jit_compile(struct sk_filter *fp) 76static inline void bpf_jit_compile(struct sk_filter *fp)
71{ 77{
72} 78}
73static inline void bpf_jit_free(struct sk_filter *fp) 79static inline void bpf_jit_free(struct sk_filter *fp)
74{ 80{
81 kfree(fp);
75} 82}
76#define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns) 83#define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns)
77#endif 84#endif
diff --git a/include/net/sock.h b/include/net/sock.h
index 1d37a8086bed..808cbc2ec6c1 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1630,16 +1630,14 @@ static inline void sk_filter_release(struct sk_filter *fp)
1630 1630
1631static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) 1631static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
1632{ 1632{
1633 unsigned int size = sk_filter_len(fp); 1633 atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc);
1634
1635 atomic_sub(size, &sk->sk_omem_alloc);
1636 sk_filter_release(fp); 1634 sk_filter_release(fp);
1637} 1635}
1638 1636
1639static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp) 1637static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1640{ 1638{
1641 atomic_inc(&fp->refcnt); 1639 atomic_inc(&fp->refcnt);
1642 atomic_add(sk_filter_len(fp), &sk->sk_omem_alloc); 1640 atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc);
1643} 1641}
1644 1642
1645/* 1643/*
diff --git a/net/core/filter.c b/net/core/filter.c
index 6438f29ff266..01b780856db2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -644,7 +644,6 @@ void sk_filter_release_rcu(struct rcu_head *rcu)
644 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); 644 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
645 645
646 bpf_jit_free(fp); 646 bpf_jit_free(fp);
647 kfree(fp);
648} 647}
649EXPORT_SYMBOL(sk_filter_release_rcu); 648EXPORT_SYMBOL(sk_filter_release_rcu);
650 649
@@ -683,7 +682,7 @@ int sk_unattached_filter_create(struct sk_filter **pfp,
683 if (fprog->filter == NULL) 682 if (fprog->filter == NULL)
684 return -EINVAL; 683 return -EINVAL;
685 684
686 fp = kmalloc(fsize + sizeof(*fp), GFP_KERNEL); 685 fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL);
687 if (!fp) 686 if (!fp)
688 return -ENOMEM; 687 return -ENOMEM;
689 memcpy(fp->insns, fprog->filter, fsize); 688 memcpy(fp->insns, fprog->filter, fsize);
@@ -723,6 +722,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
723{ 722{
724 struct sk_filter *fp, *old_fp; 723 struct sk_filter *fp, *old_fp;
725 unsigned int fsize = sizeof(struct sock_filter) * fprog->len; 724 unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
725 unsigned int sk_fsize = sk_filter_size(fprog->len);
726 int err; 726 int err;
727 727
728 if (sock_flag(sk, SOCK_FILTER_LOCKED)) 728 if (sock_flag(sk, SOCK_FILTER_LOCKED))
@@ -732,11 +732,11 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
732 if (fprog->filter == NULL) 732 if (fprog->filter == NULL)
733 return -EINVAL; 733 return -EINVAL;
734 734
735 fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL); 735 fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL);
736 if (!fp) 736 if (!fp)
737 return -ENOMEM; 737 return -ENOMEM;
738 if (copy_from_user(fp->insns, fprog->filter, fsize)) { 738 if (copy_from_user(fp->insns, fprog->filter, fsize)) {
739 sock_kfree_s(sk, fp, fsize+sizeof(*fp)); 739 sock_kfree_s(sk, fp, sk_fsize);
740 return -EFAULT; 740 return -EFAULT;
741 } 741 }
742 742