aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorPavel Emelyanov <xemul@parallels.com>2012-10-31 22:01:48 -0400
committerDavid S. Miller <davem@davemloft.net>2012-11-01 11:17:15 -0400
commita8fc92778080c845eaadc369a0ecf5699a03bef0 (patch)
tree2d83a965c40765c52195e32a903bd8c7678f014d /net/core
parent96442e42429e5f268ab97a3586c7694a3acc55a7 (diff)
sk-filter: Add ability to get socket filter program (v2)
The SO_ATTACH_FILTER option is set only. I propose to add the get ability by using SO_ATTACH_FILTER in getsockopt. To be less irritating to eyes the SO_GET_FILTER alias to it is declared. This ability is required by checkpoint-restore project to be able to save full state of a socket. There are two issues with getting filter back. First, kernel modifies the sock_filter->code on filter load, thus in order to return the filter element back to user we have to decode it into user-visible constants. Fortunately the modification in question is interconvertible. Second, the BPF_S_ALU_DIV_K code modifies the command argument k to speed up the run-time division by doing kernel_k = reciprocal(user_k). Bad news is that different user_k may result in same kernel_k, so we can't get the original user_k back. Good news is that we don't have to do it. What we need to is calculate a user2_k so, that reciprocal(user2_k) == reciprocal(user_k) == kernel_k i.e. if it's re-loaded back the compiled again value will be exactly the same as it was. That said, the user2_k can be calculated like this user2_k = reciprocal(kernel_k) with an exception, that if kernel_k == 0, then user2_k == 1. The optlen argument is treated like this -- when zero, kernel returns the amount of sock_fprog elements in filter, otherwise it should be large enough for the sock_fprog array. changes since v1: * Declared SO_GET_FILTER in all arch headers * Added decode of vlan-tag codes Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r--net/core/filter.c130
-rw-r--r--net/core/sock.c6
2 files changed, 136 insertions, 0 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index 5a114d41bf11..c23543cba132 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -760,3 +760,133 @@ int sk_detach_filter(struct sock *sk)
760 return ret; 760 return ret;
761} 761}
762EXPORT_SYMBOL_GPL(sk_detach_filter); 762EXPORT_SYMBOL_GPL(sk_detach_filter);
763
764static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
765{
766 static const u16 decodes[] = {
767 [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K,
768 [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X,
769 [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K,
770 [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X,
771 [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K,
772 [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X,
773 [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X,
774 [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K,
775 [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X,
776 [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K,
777 [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X,
778 [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K,
779 [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X,
780 [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K,
781 [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X,
782 [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K,
783 [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X,
784 [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K,
785 [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X,
786 [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG,
787 [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS,
788 [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS,
789 [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS,
790 [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS,
791 [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS,
792 [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS,
793 [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS,
794 [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS,
795 [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS,
796 [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS,
797 [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS,
798 [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS,
799 [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS,
800 [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS,
801 [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS,
802 [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS,
803 [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
804 [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN,
805 [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND,
806 [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND,
807 [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND,
808 [BPF_S_LD_IMM] = BPF_LD|BPF_IMM,
809 [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN,
810 [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH,
811 [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM,
812 [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX,
813 [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA,
814 [BPF_S_RET_K] = BPF_RET|BPF_K,
815 [BPF_S_RET_A] = BPF_RET|BPF_A,
816 [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K,
817 [BPF_S_LD_MEM] = BPF_LD|BPF_MEM,
818 [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM,
819 [BPF_S_ST] = BPF_ST,
820 [BPF_S_STX] = BPF_STX,
821 [BPF_S_JMP_JA] = BPF_JMP|BPF_JA,
822 [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K,
823 [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X,
824 [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K,
825 [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X,
826 [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K,
827 [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X,
828 [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K,
829 [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X,
830 };
831 u16 code;
832
833 code = filt->code;
834
835 to->code = decodes[code];
836 to->jt = filt->jt;
837 to->jf = filt->jf;
838
839 if (code == BPF_S_ALU_DIV_K) {
840 /*
841 * When loaded this rule user gave us X, which was
842 * translated into R = r(X). Now we calculate the
843 * RR = r(R) and report it back. If next time this
844 * value is loaded and RRR = r(RR) is calculated
845 * then the R == RRR will be true.
846 *
847 * One exception. X == 1 translates into R == 0 and
848 * we can't calculate RR out of it with r().
849 */
850
851 if (filt->k == 0)
852 to->k = 1;
853 else
854 to->k = reciprocal_value(filt->k);
855
856 BUG_ON(reciprocal_value(to->k) != filt->k);
857 } else
858 to->k = filt->k;
859}
860
861int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len)
862{
863 struct sk_filter *filter;
864 int i, ret;
865
866 lock_sock(sk);
867 filter = rcu_dereference_protected(sk->sk_filter,
868 sock_owned_by_user(sk));
869 ret = 0;
870 if (!filter)
871 goto out;
872 ret = filter->len;
873 if (!len)
874 goto out;
875 ret = -EINVAL;
876 if (len < filter->len)
877 goto out;
878
879 ret = -EFAULT;
880 for (i = 0; i < filter->len; i++) {
881 struct sock_filter fb;
882
883 sk_decode_filter(&filter->insns[i], &fb);
884 if (copy_to_user(&ubuf[i], &fb, sizeof(fb)))
885 goto out;
886 }
887
888 ret = filter->len;
889out:
890 release_sock(sk);
891 return ret;
892}
diff --git a/net/core/sock.c b/net/core/sock.c
index 0a023b8daa55..06286006a2cc 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1077,6 +1077,12 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
1077 case SO_BINDTODEVICE: 1077 case SO_BINDTODEVICE:
1078 v.val = sk->sk_bound_dev_if; 1078 v.val = sk->sk_bound_dev_if;
1079 break; 1079 break;
1080 case SO_GET_FILTER:
1081 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1082 if (len < 0)
1083 return len;
1084
1085 goto lenout;
1080 default: 1086 default:
1081 return -ENOPROTOOPT; 1087 return -ENOPROTOOPT;
1082 } 1088 }