diff options
author | Daniel Borkmann <dborkman@redhat.com> | 2014-03-28 13:58:19 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-03-31 00:45:09 -0400 |
commit | a3ea269b8bcdbb0c5fa2fd449a436e7987446975 (patch) | |
tree | 9e118e381c14f9a065cc4136be2a9bf6e5257dfb | |
parent | f8bbbfc3b97f4c7a6c7c23185e520b22bfc3a21d (diff) |
net: filter: keep original BPF program around
In order to open up the possibility to internally transform a BPF program
into an alternative and possibly non-trivial reversible representation, we
need to keep the original BPF program around, so that it can be passed back
to user space w/o the need of a complex decoder.
The reason for that use case resides in commit a8fc92778080 ("sk-filter:
Add ability to get socket filter program (v2)"), that is, the ability
to retrieve the currently attached BPF filter from a given socket used
mainly by the checkpoint-restore project, for example.
Therefore, we add two helpers sk_{store,release}_orig_filter for taking
care of that. In the sk_unattached_filter_create() case, there's no such
possibility/requirement to retrieve a loaded BPF program. Therefore, we
can spare us the work in that case.
This approach will simplify and slightly speed up both, sk_get_filter()
and sock_diag_put_filterinfo() handlers as we won't need to successively
decode filters anymore through sk_decode_filter(). As we still need
sk_decode_filter() later on, we're keeping it around.
Joint work with Alexei Starovoitov.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/filter.h | 15 | ||||
-rw-r--r-- | net/core/filter.c | 86 | ||||
-rw-r--r-- | net/core/sock_diag.c | 23 |
3 files changed, 93 insertions, 31 deletions
diff --git a/include/linux/filter.h b/include/linux/filter.h index e65e23087367..93a9792e27bc 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h | |||
@@ -19,14 +19,19 @@ struct compat_sock_fprog { | |||
19 | }; | 19 | }; |
20 | #endif | 20 | #endif |
21 | 21 | ||
22 | struct sock_fprog_kern { | ||
23 | u16 len; | ||
24 | struct sock_filter *filter; | ||
25 | }; | ||
26 | |||
22 | struct sk_buff; | 27 | struct sk_buff; |
23 | struct sock; | 28 | struct sock; |
24 | 29 | ||
25 | struct sk_filter | 30 | struct sk_filter { |
26 | { | ||
27 | atomic_t refcnt; | 31 | atomic_t refcnt; |
28 | u32 jited:1, /* Is our filter JIT'ed? */ | 32 | u32 jited:1, /* Is our filter JIT'ed? */ |
29 | len:31; /* Number of filter blocks */ | 33 | len:31; /* Number of filter blocks */ |
34 | struct sock_fprog_kern *orig_prog; /* Original BPF program */ | ||
30 | struct rcu_head rcu; | 35 | struct rcu_head rcu; |
31 | unsigned int (*bpf_func)(const struct sk_buff *skb, | 36 | unsigned int (*bpf_func)(const struct sk_buff *skb, |
32 | const struct sock_filter *filter); | 37 | const struct sock_filter *filter); |
@@ -42,14 +47,20 @@ static inline unsigned int sk_filter_size(unsigned int proglen) | |||
42 | offsetof(struct sk_filter, insns[proglen])); | 47 | offsetof(struct sk_filter, insns[proglen])); |
43 | } | 48 | } |
44 | 49 | ||
50 | #define sk_filter_proglen(fprog) \ | ||
51 | (fprog->len * sizeof(fprog->filter[0])) | ||
52 | |||
45 | extern int sk_filter(struct sock *sk, struct sk_buff *skb); | 53 | extern int sk_filter(struct sock *sk, struct sk_buff *skb); |
46 | extern unsigned int sk_run_filter(const struct sk_buff *skb, | 54 | extern unsigned int sk_run_filter(const struct sk_buff *skb, |
47 | const struct sock_filter *filter); | 55 | const struct sock_filter *filter); |
56 | |||
48 | extern int sk_unattached_filter_create(struct sk_filter **pfp, | 57 | extern int sk_unattached_filter_create(struct sk_filter **pfp, |
49 | struct sock_fprog *fprog); | 58 | struct sock_fprog *fprog); |
50 | extern void sk_unattached_filter_destroy(struct sk_filter *fp); | 59 | extern void sk_unattached_filter_destroy(struct sk_filter *fp); |
60 | |||
51 | extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); | 61 | extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); |
52 | extern int sk_detach_filter(struct sock *sk); | 62 | extern int sk_detach_filter(struct sock *sk); |
63 | |||
53 | extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen); | 64 | extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen); |
54 | extern int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned len); | 65 | extern int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned len); |
55 | extern void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to); | 66 | extern void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to); |
diff --git a/net/core/filter.c b/net/core/filter.c index bb3c76458ca9..9730e7fe4770 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
@@ -629,6 +629,37 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) | |||
629 | } | 629 | } |
630 | EXPORT_SYMBOL(sk_chk_filter); | 630 | EXPORT_SYMBOL(sk_chk_filter); |
631 | 631 | ||
632 | static int sk_store_orig_filter(struct sk_filter *fp, | ||
633 | const struct sock_fprog *fprog) | ||
634 | { | ||
635 | unsigned int fsize = sk_filter_proglen(fprog); | ||
636 | struct sock_fprog_kern *fkprog; | ||
637 | |||
638 | fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL); | ||
639 | if (!fp->orig_prog) | ||
640 | return -ENOMEM; | ||
641 | |||
642 | fkprog = fp->orig_prog; | ||
643 | fkprog->len = fprog->len; | ||
644 | fkprog->filter = kmemdup(fp->insns, fsize, GFP_KERNEL); | ||
645 | if (!fkprog->filter) { | ||
646 | kfree(fp->orig_prog); | ||
647 | return -ENOMEM; | ||
648 | } | ||
649 | |||
650 | return 0; | ||
651 | } | ||
652 | |||
653 | static void sk_release_orig_filter(struct sk_filter *fp) | ||
654 | { | ||
655 | struct sock_fprog_kern *fprog = fp->orig_prog; | ||
656 | |||
657 | if (fprog) { | ||
658 | kfree(fprog->filter); | ||
659 | kfree(fprog); | ||
660 | } | ||
661 | } | ||
662 | |||
632 | /** | 663 | /** |
633 | * sk_filter_release_rcu - Release a socket filter by rcu_head | 664 | * sk_filter_release_rcu - Release a socket filter by rcu_head |
634 | * @rcu: rcu_head that contains the sk_filter to free | 665 | * @rcu: rcu_head that contains the sk_filter to free |
@@ -637,6 +668,7 @@ void sk_filter_release_rcu(struct rcu_head *rcu) | |||
637 | { | 668 | { |
638 | struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); | 669 | struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); |
639 | 670 | ||
671 | sk_release_orig_filter(fp); | ||
640 | bpf_jit_free(fp); | 672 | bpf_jit_free(fp); |
641 | } | 673 | } |
642 | EXPORT_SYMBOL(sk_filter_release_rcu); | 674 | EXPORT_SYMBOL(sk_filter_release_rcu); |
@@ -669,8 +701,8 @@ static int __sk_prepare_filter(struct sk_filter *fp) | |||
669 | int sk_unattached_filter_create(struct sk_filter **pfp, | 701 | int sk_unattached_filter_create(struct sk_filter **pfp, |
670 | struct sock_fprog *fprog) | 702 | struct sock_fprog *fprog) |
671 | { | 703 | { |
704 | unsigned int fsize = sk_filter_proglen(fprog); | ||
672 | struct sk_filter *fp; | 705 | struct sk_filter *fp; |
673 | unsigned int fsize = sizeof(struct sock_filter) * fprog->len; | ||
674 | int err; | 706 | int err; |
675 | 707 | ||
676 | /* Make sure new filter is there and in the right amounts. */ | 708 | /* Make sure new filter is there and in the right amounts. */ |
@@ -680,10 +712,16 @@ int sk_unattached_filter_create(struct sk_filter **pfp, | |||
680 | fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL); | 712 | fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL); |
681 | if (!fp) | 713 | if (!fp) |
682 | return -ENOMEM; | 714 | return -ENOMEM; |
715 | |||
683 | memcpy(fp->insns, fprog->filter, fsize); | 716 | memcpy(fp->insns, fprog->filter, fsize); |
684 | 717 | ||
685 | atomic_set(&fp->refcnt, 1); | 718 | atomic_set(&fp->refcnt, 1); |
686 | fp->len = fprog->len; | 719 | fp->len = fprog->len; |
720 | /* Since unattached filters are not copied back to user | ||
721 | * space through sk_get_filter(), we do not need to hold | ||
722 | * a copy here, and can spare us the work. | ||
723 | */ | ||
724 | fp->orig_prog = NULL; | ||
687 | 725 | ||
688 | err = __sk_prepare_filter(fp); | 726 | err = __sk_prepare_filter(fp); |
689 | if (err) | 727 | if (err) |
@@ -716,7 +754,7 @@ EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy); | |||
716 | int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) | 754 | int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) |
717 | { | 755 | { |
718 | struct sk_filter *fp, *old_fp; | 756 | struct sk_filter *fp, *old_fp; |
719 | unsigned int fsize = sizeof(struct sock_filter) * fprog->len; | 757 | unsigned int fsize = sk_filter_proglen(fprog); |
720 | unsigned int sk_fsize = sk_filter_size(fprog->len); | 758 | unsigned int sk_fsize = sk_filter_size(fprog->len); |
721 | int err; | 759 | int err; |
722 | 760 | ||
@@ -730,6 +768,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) | |||
730 | fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL); | 768 | fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL); |
731 | if (!fp) | 769 | if (!fp) |
732 | return -ENOMEM; | 770 | return -ENOMEM; |
771 | |||
733 | if (copy_from_user(fp->insns, fprog->filter, fsize)) { | 772 | if (copy_from_user(fp->insns, fprog->filter, fsize)) { |
734 | sock_kfree_s(sk, fp, sk_fsize); | 773 | sock_kfree_s(sk, fp, sk_fsize); |
735 | return -EFAULT; | 774 | return -EFAULT; |
@@ -738,6 +777,12 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) | |||
738 | atomic_set(&fp->refcnt, 1); | 777 | atomic_set(&fp->refcnt, 1); |
739 | fp->len = fprog->len; | 778 | fp->len = fprog->len; |
740 | 779 | ||
780 | err = sk_store_orig_filter(fp, fprog); | ||
781 | if (err) { | ||
782 | sk_filter_uncharge(sk, fp); | ||
783 | return -ENOMEM; | ||
784 | } | ||
785 | |||
741 | err = __sk_prepare_filter(fp); | 786 | err = __sk_prepare_filter(fp); |
742 | if (err) { | 787 | if (err) { |
743 | sk_filter_uncharge(sk, fp); | 788 | sk_filter_uncharge(sk, fp); |
@@ -750,6 +795,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) | |||
750 | 795 | ||
751 | if (old_fp) | 796 | if (old_fp) |
752 | sk_filter_uncharge(sk, old_fp); | 797 | sk_filter_uncharge(sk, old_fp); |
798 | |||
753 | return 0; | 799 | return 0; |
754 | } | 800 | } |
755 | EXPORT_SYMBOL_GPL(sk_attach_filter); | 801 | EXPORT_SYMBOL_GPL(sk_attach_filter); |
@@ -769,6 +815,7 @@ int sk_detach_filter(struct sock *sk) | |||
769 | sk_filter_uncharge(sk, filter); | 815 | sk_filter_uncharge(sk, filter); |
770 | ret = 0; | 816 | ret = 0; |
771 | } | 817 | } |
818 | |||
772 | return ret; | 819 | return ret; |
773 | } | 820 | } |
774 | EXPORT_SYMBOL_GPL(sk_detach_filter); | 821 | EXPORT_SYMBOL_GPL(sk_detach_filter); |
@@ -851,34 +898,41 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) | |||
851 | to->k = filt->k; | 898 | to->k = filt->k; |
852 | } | 899 | } |
853 | 900 | ||
854 | int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) | 901 | int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, |
902 | unsigned int len) | ||
855 | { | 903 | { |
904 | struct sock_fprog_kern *fprog; | ||
856 | struct sk_filter *filter; | 905 | struct sk_filter *filter; |
857 | int i, ret; | 906 | int ret = 0; |
858 | 907 | ||
859 | lock_sock(sk); | 908 | lock_sock(sk); |
860 | filter = rcu_dereference_protected(sk->sk_filter, | 909 | filter = rcu_dereference_protected(sk->sk_filter, |
861 | sock_owned_by_user(sk)); | 910 | sock_owned_by_user(sk)); |
862 | ret = 0; | ||
863 | if (!filter) | 911 | if (!filter) |
864 | goto out; | 912 | goto out; |
865 | ret = filter->len; | 913 | |
914 | /* We're copying the filter that has been originally attached, | ||
915 | * so no conversion/decode needed anymore. | ||
916 | */ | ||
917 | fprog = filter->orig_prog; | ||
918 | |||
919 | ret = fprog->len; | ||
866 | if (!len) | 920 | if (!len) |
921 | /* User space only enquires number of filter blocks. */ | ||
867 | goto out; | 922 | goto out; |
923 | |||
868 | ret = -EINVAL; | 924 | ret = -EINVAL; |
869 | if (len < filter->len) | 925 | if (len < fprog->len) |
870 | goto out; | 926 | goto out; |
871 | 927 | ||
872 | ret = -EFAULT; | 928 | ret = -EFAULT; |
873 | for (i = 0; i < filter->len; i++) { | 929 | if (copy_to_user(ubuf, fprog->filter, sk_filter_proglen(fprog))) |
874 | struct sock_filter fb; | 930 | goto out; |
875 | |||
876 | sk_decode_filter(&filter->insns[i], &fb); | ||
877 | if (copy_to_user(&ubuf[i], &fb, sizeof(fb))) | ||
878 | goto out; | ||
879 | } | ||
880 | 931 | ||
881 | ret = filter->len; | 932 | /* Instead of bytes, the API requests to return the number |
933 | * of filter blocks. | ||
934 | */ | ||
935 | ret = fprog->len; | ||
882 | out: | 936 | out: |
883 | release_sock(sk); | 937 | release_sock(sk); |
884 | return ret; | 938 | return ret; |
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index a0e9cf6379de..d7af18859322 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c | |||
@@ -52,9 +52,10 @@ EXPORT_SYMBOL_GPL(sock_diag_put_meminfo); | |||
52 | int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, | 52 | int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, |
53 | struct sk_buff *skb, int attrtype) | 53 | struct sk_buff *skb, int attrtype) |
54 | { | 54 | { |
55 | struct nlattr *attr; | 55 | struct sock_fprog_kern *fprog; |
56 | struct sk_filter *filter; | 56 | struct sk_filter *filter; |
57 | unsigned int len; | 57 | struct nlattr *attr; |
58 | unsigned int flen; | ||
58 | int err = 0; | 59 | int err = 0; |
59 | 60 | ||
60 | if (!ns_capable(user_ns, CAP_NET_ADMIN)) { | 61 | if (!ns_capable(user_ns, CAP_NET_ADMIN)) { |
@@ -63,24 +64,20 @@ int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, | |||
63 | } | 64 | } |
64 | 65 | ||
65 | rcu_read_lock(); | 66 | rcu_read_lock(); |
66 | |||
67 | filter = rcu_dereference(sk->sk_filter); | 67 | filter = rcu_dereference(sk->sk_filter); |
68 | len = filter ? filter->len * sizeof(struct sock_filter) : 0; | 68 | if (!filter) |
69 | goto out; | ||
69 | 70 | ||
70 | attr = nla_reserve(skb, attrtype, len); | 71 | fprog = filter->orig_prog; |
72 | flen = sk_filter_proglen(fprog); | ||
73 | |||
74 | attr = nla_reserve(skb, attrtype, flen); | ||
71 | if (attr == NULL) { | 75 | if (attr == NULL) { |
72 | err = -EMSGSIZE; | 76 | err = -EMSGSIZE; |
73 | goto out; | 77 | goto out; |
74 | } | 78 | } |
75 | 79 | ||
76 | if (filter) { | 80 | memcpy(nla_data(attr), fprog->filter, flen); |
77 | struct sock_filter *fb = (struct sock_filter *)nla_data(attr); | ||
78 | int i; | ||
79 | |||
80 | for (i = 0; i < filter->len; i++, fb++) | ||
81 | sk_decode_filter(&filter->insns[i], fb); | ||
82 | } | ||
83 | |||
84 | out: | 81 | out: |
85 | rcu_read_unlock(); | 82 | rcu_read_unlock(); |
86 | return err; | 83 | return err; |