diff options
author | Alexei Starovoitov <ast@plumgrid.com> | 2013-10-04 03:14:06 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-10-07 15:16:45 -0400 |
commit | d45ed4a4e33ae103053c0a53d280014e7101bb5c (patch) | |
tree | 93e87b546199d28d844647649ea8dda7eaefe303 | |
parent | 582442d6d5bc74c1e11a2515f9387d3d227278e2 (diff) |
net: fix unsafe set_memory_rw from softirq
on x86 system with net.core.bpf_jit_enable = 1
sudo tcpdump -i eth1 'tcp port 22'
causes the warning:
[ 56.766097] Possible unsafe locking scenario:
[ 56.766097]
[ 56.780146] CPU0
[ 56.786807] ----
[ 56.793188] lock(&(&vb->lock)->rlock);
[ 56.799593] <Interrupt>
[ 56.805889] lock(&(&vb->lock)->rlock);
[ 56.812266]
[ 56.812266] *** DEADLOCK ***
[ 56.812266]
[ 56.830670] 1 lock held by ksoftirqd/1/13:
[ 56.836838] #0: (rcu_read_lock){.+.+..}, at: [<ffffffff8118f44c>] vm_unmap_aliases+0x8c/0x380
[ 56.849757]
[ 56.849757] stack backtrace:
[ 56.862194] CPU: 1 PID: 13 Comm: ksoftirqd/1 Not tainted 3.12.0-rc3+ #45
[ 56.868721] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012
[ 56.882004] ffffffff821944c0 ffff88080bbdb8c8 ffffffff8175a145 0000000000000007
[ 56.895630] ffff88080bbd5f40 ffff88080bbdb928 ffffffff81755b14 0000000000000001
[ 56.909313] ffff880800000001 ffff880800000000 ffffffff8101178f 0000000000000001
[ 56.923006] Call Trace:
[ 56.929532] [<ffffffff8175a145>] dump_stack+0x55/0x76
[ 56.936067] [<ffffffff81755b14>] print_usage_bug+0x1f7/0x208
[ 56.942445] [<ffffffff8101178f>] ? save_stack_trace+0x2f/0x50
[ 56.948932] [<ffffffff810cc0a0>] ? check_usage_backwards+0x150/0x150
[ 56.955470] [<ffffffff810ccb52>] mark_lock+0x282/0x2c0
[ 56.961945] [<ffffffff810ccfed>] __lock_acquire+0x45d/0x1d50
[ 56.968474] [<ffffffff810cce6e>] ? __lock_acquire+0x2de/0x1d50
[ 56.975140] [<ffffffff81393bf5>] ? cpumask_next_and+0x55/0x90
[ 56.981942] [<ffffffff810cef72>] lock_acquire+0x92/0x1d0
[ 56.988745] [<ffffffff8118f52a>] ? vm_unmap_aliases+0x16a/0x380
[ 56.995619] [<ffffffff817628f1>] _raw_spin_lock+0x41/0x50
[ 57.002493] [<ffffffff8118f52a>] ? vm_unmap_aliases+0x16a/0x380
[ 57.009447] [<ffffffff8118f52a>] vm_unmap_aliases+0x16a/0x380
[ 57.016477] [<ffffffff8118f44c>] ? vm_unmap_aliases+0x8c/0x380
[ 57.023607] [<ffffffff810436b0>] change_page_attr_set_clr+0xc0/0x460
[ 57.030818] [<ffffffff810cfb8d>] ? trace_hardirqs_on+0xd/0x10
[ 57.037896] [<ffffffff811a8330>] ? kmem_cache_free+0xb0/0x2b0
[ 57.044789] [<ffffffff811b59c3>] ? free_object_rcu+0x93/0xa0
[ 57.051720] [<ffffffff81043d9f>] set_memory_rw+0x2f/0x40
[ 57.058727] [<ffffffff8104e17c>] bpf_jit_free+0x2c/0x40
[ 57.065577] [<ffffffff81642cba>] sk_filter_release_rcu+0x1a/0x30
[ 57.072338] [<ffffffff811108e2>] rcu_process_callbacks+0x202/0x7c0
[ 57.078962] [<ffffffff81057f17>] __do_softirq+0xf7/0x3f0
[ 57.085373] [<ffffffff81058245>] run_ksoftirqd+0x35/0x70
cannot reuse jited filter memory, since it's readonly,
so use original bpf insns memory to hold work_struct
defer kfree of sk_filter until jit completed freeing
tested on x86_64 and i386
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | arch/arm/net/bpf_jit_32.c | 1 | ||||
-rw-r--r-- | arch/powerpc/net/bpf_jit_comp.c | 1 | ||||
-rw-r--r-- | arch/s390/net/bpf_jit_comp.c | 4 | ||||
-rw-r--r-- | arch/sparc/net/bpf_jit_comp.c | 1 | ||||
-rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 18 | ||||
-rw-r--r-- | include/linux/filter.h | 15 | ||||
-rw-r--r-- | include/net/sock.h | 6 | ||||
-rw-r--r-- | net/core/filter.c | 8 |
8 files changed, 36 insertions, 18 deletions
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index f50d223a0bd3..99b44e0e8d86 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c | |||
@@ -930,4 +930,5 @@ void bpf_jit_free(struct sk_filter *fp) | |||
930 | { | 930 | { |
931 | if (fp->bpf_func != sk_run_filter) | 931 | if (fp->bpf_func != sk_run_filter) |
932 | module_free(NULL, fp->bpf_func); | 932 | module_free(NULL, fp->bpf_func); |
933 | kfree(fp); | ||
933 | } | 934 | } |
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index bf56e33f8257..2345bdb4d917 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c | |||
@@ -691,4 +691,5 @@ void bpf_jit_free(struct sk_filter *fp) | |||
691 | { | 691 | { |
692 | if (fp->bpf_func != sk_run_filter) | 692 | if (fp->bpf_func != sk_run_filter) |
693 | module_free(NULL, fp->bpf_func); | 693 | module_free(NULL, fp->bpf_func); |
694 | kfree(fp); | ||
694 | } | 695 | } |
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 709239285869..a5df511e27a2 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c | |||
@@ -881,7 +881,9 @@ void bpf_jit_free(struct sk_filter *fp) | |||
881 | struct bpf_binary_header *header = (void *)addr; | 881 | struct bpf_binary_header *header = (void *)addr; |
882 | 882 | ||
883 | if (fp->bpf_func == sk_run_filter) | 883 | if (fp->bpf_func == sk_run_filter) |
884 | return; | 884 | goto free_filter; |
885 | set_memory_rw(addr, header->pages); | 885 | set_memory_rw(addr, header->pages); |
886 | module_free(NULL, header); | 886 | module_free(NULL, header); |
887 | free_filter: | ||
888 | kfree(fp); | ||
887 | } | 889 | } |
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index 9c7be59e6f5a..218b6b23c378 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c | |||
@@ -808,4 +808,5 @@ void bpf_jit_free(struct sk_filter *fp) | |||
808 | { | 808 | { |
809 | if (fp->bpf_func != sk_run_filter) | 809 | if (fp->bpf_func != sk_run_filter) |
810 | module_free(NULL, fp->bpf_func); | 810 | module_free(NULL, fp->bpf_func); |
811 | kfree(fp); | ||
811 | } | 812 | } |
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 79c216aa0e2b..516593e1ce33 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c | |||
@@ -772,13 +772,21 @@ out: | |||
772 | return; | 772 | return; |
773 | } | 773 | } |
774 | 774 | ||
775 | static void bpf_jit_free_deferred(struct work_struct *work) | ||
776 | { | ||
777 | struct sk_filter *fp = container_of(work, struct sk_filter, work); | ||
778 | unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; | ||
779 | struct bpf_binary_header *header = (void *)addr; | ||
780 | |||
781 | set_memory_rw(addr, header->pages); | ||
782 | module_free(NULL, header); | ||
783 | kfree(fp); | ||
784 | } | ||
785 | |||
775 | void bpf_jit_free(struct sk_filter *fp) | 786 | void bpf_jit_free(struct sk_filter *fp) |
776 | { | 787 | { |
777 | if (fp->bpf_func != sk_run_filter) { | 788 | if (fp->bpf_func != sk_run_filter) { |
778 | unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; | 789 | INIT_WORK(&fp->work, bpf_jit_free_deferred); |
779 | struct bpf_binary_header *header = (void *)addr; | 790 | schedule_work(&fp->work); |
780 | |||
781 | set_memory_rw(addr, header->pages); | ||
782 | module_free(NULL, header); | ||
783 | } | 791 | } |
784 | } | 792 | } |
diff --git a/include/linux/filter.h b/include/linux/filter.h index a6ac84871d6d..ff4e40cd45b1 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h | |||
@@ -6,6 +6,7 @@ | |||
6 | 6 | ||
7 | #include <linux/atomic.h> | 7 | #include <linux/atomic.h> |
8 | #include <linux/compat.h> | 8 | #include <linux/compat.h> |
9 | #include <linux/workqueue.h> | ||
9 | #include <uapi/linux/filter.h> | 10 | #include <uapi/linux/filter.h> |
10 | 11 | ||
11 | #ifdef CONFIG_COMPAT | 12 | #ifdef CONFIG_COMPAT |
@@ -25,15 +26,19 @@ struct sk_filter | |||
25 | { | 26 | { |
26 | atomic_t refcnt; | 27 | atomic_t refcnt; |
27 | unsigned int len; /* Number of filter blocks */ | 28 | unsigned int len; /* Number of filter blocks */ |
29 | struct rcu_head rcu; | ||
28 | unsigned int (*bpf_func)(const struct sk_buff *skb, | 30 | unsigned int (*bpf_func)(const struct sk_buff *skb, |
29 | const struct sock_filter *filter); | 31 | const struct sock_filter *filter); |
30 | struct rcu_head rcu; | 32 | union { |
31 | struct sock_filter insns[0]; | 33 | struct sock_filter insns[0]; |
34 | struct work_struct work; | ||
35 | }; | ||
32 | }; | 36 | }; |
33 | 37 | ||
34 | static inline unsigned int sk_filter_len(const struct sk_filter *fp) | 38 | static inline unsigned int sk_filter_size(unsigned int proglen) |
35 | { | 39 | { |
36 | return fp->len * sizeof(struct sock_filter) + sizeof(*fp); | 40 | return max(sizeof(struct sk_filter), |
41 | offsetof(struct sk_filter, insns[proglen])); | ||
37 | } | 42 | } |
38 | 43 | ||
39 | extern int sk_filter(struct sock *sk, struct sk_buff *skb); | 44 | extern int sk_filter(struct sock *sk, struct sk_buff *skb); |
@@ -67,11 +72,13 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, | |||
67 | } | 72 | } |
68 | #define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns) | 73 | #define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns) |
69 | #else | 74 | #else |
75 | #include <linux/slab.h> | ||
70 | static inline void bpf_jit_compile(struct sk_filter *fp) | 76 | static inline void bpf_jit_compile(struct sk_filter *fp) |
71 | { | 77 | { |
72 | } | 78 | } |
73 | static inline void bpf_jit_free(struct sk_filter *fp) | 79 | static inline void bpf_jit_free(struct sk_filter *fp) |
74 | { | 80 | { |
81 | kfree(fp); | ||
75 | } | 82 | } |
76 | #define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns) | 83 | #define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns) |
77 | #endif | 84 | #endif |
diff --git a/include/net/sock.h b/include/net/sock.h index 1d37a8086bed..808cbc2ec6c1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -1630,16 +1630,14 @@ static inline void sk_filter_release(struct sk_filter *fp) | |||
1630 | 1630 | ||
1631 | static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) | 1631 | static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) |
1632 | { | 1632 | { |
1633 | unsigned int size = sk_filter_len(fp); | 1633 | atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc); |
1634 | |||
1635 | atomic_sub(size, &sk->sk_omem_alloc); | ||
1636 | sk_filter_release(fp); | 1634 | sk_filter_release(fp); |
1637 | } | 1635 | } |
1638 | 1636 | ||
1639 | static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp) | 1637 | static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp) |
1640 | { | 1638 | { |
1641 | atomic_inc(&fp->refcnt); | 1639 | atomic_inc(&fp->refcnt); |
1642 | atomic_add(sk_filter_len(fp), &sk->sk_omem_alloc); | 1640 | atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc); |
1643 | } | 1641 | } |
1644 | 1642 | ||
1645 | /* | 1643 | /* |
diff --git a/net/core/filter.c b/net/core/filter.c index 6438f29ff266..01b780856db2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
@@ -644,7 +644,6 @@ void sk_filter_release_rcu(struct rcu_head *rcu) | |||
644 | struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); | 644 | struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); |
645 | 645 | ||
646 | bpf_jit_free(fp); | 646 | bpf_jit_free(fp); |
647 | kfree(fp); | ||
648 | } | 647 | } |
649 | EXPORT_SYMBOL(sk_filter_release_rcu); | 648 | EXPORT_SYMBOL(sk_filter_release_rcu); |
650 | 649 | ||
@@ -683,7 +682,7 @@ int sk_unattached_filter_create(struct sk_filter **pfp, | |||
683 | if (fprog->filter == NULL) | 682 | if (fprog->filter == NULL) |
684 | return -EINVAL; | 683 | return -EINVAL; |
685 | 684 | ||
686 | fp = kmalloc(fsize + sizeof(*fp), GFP_KERNEL); | 685 | fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL); |
687 | if (!fp) | 686 | if (!fp) |
688 | return -ENOMEM; | 687 | return -ENOMEM; |
689 | memcpy(fp->insns, fprog->filter, fsize); | 688 | memcpy(fp->insns, fprog->filter, fsize); |
@@ -723,6 +722,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) | |||
723 | { | 722 | { |
724 | struct sk_filter *fp, *old_fp; | 723 | struct sk_filter *fp, *old_fp; |
725 | unsigned int fsize = sizeof(struct sock_filter) * fprog->len; | 724 | unsigned int fsize = sizeof(struct sock_filter) * fprog->len; |
725 | unsigned int sk_fsize = sk_filter_size(fprog->len); | ||
726 | int err; | 726 | int err; |
727 | 727 | ||
728 | if (sock_flag(sk, SOCK_FILTER_LOCKED)) | 728 | if (sock_flag(sk, SOCK_FILTER_LOCKED)) |
@@ -732,11 +732,11 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) | |||
732 | if (fprog->filter == NULL) | 732 | if (fprog->filter == NULL) |
733 | return -EINVAL; | 733 | return -EINVAL; |
734 | 734 | ||
735 | fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL); | 735 | fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL); |
736 | if (!fp) | 736 | if (!fp) |
737 | return -ENOMEM; | 737 | return -ENOMEM; |
738 | if (copy_from_user(fp->insns, fprog->filter, fsize)) { | 738 | if (copy_from_user(fp->insns, fprog->filter, fsize)) { |
739 | sock_kfree_s(sk, fp, fsize+sizeof(*fp)); | 739 | sock_kfree_s(sk, fp, sk_fsize); |
740 | return -EFAULT; | 740 | return -EFAULT; |
741 | } | 741 | } |
742 | 742 | ||