aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-03-31 00:45:49 -0400
committerDavid S. Miller <davem@davemloft.net>2014-03-31 00:45:49 -0400
commit9109e17f7c3ace48629397b44db5ce06bf168644 (patch)
tree495b67bcf755829a5409da5b7444ea9b93f60b35
parent64c27237a07129758e33f5f824ba5c33b7f57417 (diff)
parent9a985cdc5ccb0d557720221d01bd70c19f04bb8c (diff)
Merge branch 'filter-next'
Daniel Borkmann says: ==================== BPF updates We sat down and have heavily reworked the whole previous patchset from v10 [1] to address all comments/concerns. This patchset therefore *replaces* the internal BPF interpreter with the new layout as discussed in [1], and migrates some exotic callers to properly use the BPF API for a transparent upgrade. All other callers that already use the BPF API in a way it should be used, need no further changes to run the new internals. We also removed the sysctl knob entirely, and do not expose any structure to userland, so that implementation details only reside in kernel space. Since we are replacing the interpreter we had to migrate seccomp in one patch along with the interpreter to not break anything. When attaching a new filter, the flow can be described as following: i) test if jit compiler is enabled and can compile the user BPF, ii) if so, then go for it, iii) if not, then transparently migrate the filter into the new representation, and run it in the interpreter. Also, we have scratched the jit flag from the len attribute and made it as initial patch in this series as Pablo has suggested in the last feedback, thanks. For details, please refer to the patches themselves. We did extensive testing of BPF and seccomp on the new interpreter itself and also on the user ABIs and could not find any issues; new performance numbers as posted in patch 8 are also still the same. Please find more details in the patches themselves. For all the previous history from v1 to v10, see [1]. We have decided to drop the v11 as we have pedantically reworked the set, but of course, included all previous feedback. v3 -> v4: - Applied feedback from Dave regarding swap insns - Rebased on net-next v2 -> v3: - Rebased to latest net-next (i.e. w/ rxhash->hash rename) - Fixed patch 8/9 commit message/doc as suggested by Dave - Rest is unchanged v1 -> v2: - Rebased to latest net-next - Added static to ptp_filter as suggested by Dave - Fixed a typo in patch 8's commit message - Rest unchanged Thanks ! [1] http://thread.gmane.org/gmane.linux.kernel/1665858 ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/filter.txt125
-rw-r--r--arch/arm/net/bpf_jit_32.c3
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c3
-rw-r--r--arch/s390/net/bpf_jit_comp.c5
-rw-r--r--arch/sparc/net/bpf_jit_comp.c3
-rw-r--r--arch/x86/net/bpf_jit_comp.c3
-rw-r--r--drivers/isdn/i4l/isdn_ppp.c61
-rw-r--r--drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c11
-rw-r--r--drivers/net/ethernet/ti/cpts.c10
-rw-r--r--drivers/net/ethernet/xscale/ixp4xx_eth.c11
-rw-r--r--drivers/net/ppp/ppp_generic.c60
-rw-r--r--include/linux/filter.h118
-rw-r--r--include/linux/isdn_ppp.h5
-rw-r--r--include/linux/ptp_classify.h14
-rw-r--r--include/linux/seccomp.h1
-rw-r--r--include/net/sock.h27
-rw-r--r--kernel/seccomp.c119
-rw-r--r--net/core/filter.c1565
-rw-r--r--net/core/sock_diag.c23
-rw-r--r--net/core/timestamping.c27
20 files changed, 1658 insertions, 536 deletions
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index a06b48d2f5cc..81f940f4e884 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -546,6 +546,130 @@ ffffffffa0069c8f + <x>:
546For BPF JIT developers, bpf_jit_disasm, bpf_asm and bpf_dbg provides a useful 546For BPF JIT developers, bpf_jit_disasm, bpf_asm and bpf_dbg provides a useful
547toolchain for developing and testing the kernel's JIT compiler. 547toolchain for developing and testing the kernel's JIT compiler.
548 548
549BPF kernel internals
550--------------------
551Internally, for the kernel interpreter, a different BPF instruction set
552format with similar underlying principles from BPF described in previous
553paragraphs is being used. However, the instruction set format is modelled
554closer to the underlying architecture to mimic native instruction sets, so
555that a better performance can be achieved (more details later).
556
557It is designed to be JITed with one to one mapping, which can also open up
558the possibility for GCC/LLVM compilers to generate optimized BPF code through
559a BPF backend that performs almost as fast as natively compiled code.
560
561The new instruction set was originally designed with the possible goal in
562mind to write programs in "restricted C" and compile into BPF with a optional
563GCC/LLVM backend, so that it can just-in-time map to modern 64-bit CPUs with
564minimal performance overhead over two steps, that is, C -> BPF -> native code.
565
566Currently, the new format is being used for running user BPF programs, which
567includes seccomp BPF, classic socket filters, cls_bpf traffic classifier,
568team driver's classifier for its load-balancing mode, netfilter's xt_bpf
569extension, PTP dissector/classifier, and much more. They are all internally
570converted by the kernel into the new instruction set representation and run
571in the extended interpreter. For in-kernel handlers, this all works
572transparently by using sk_unattached_filter_create() for setting up the
573filter, resp. sk_unattached_filter_destroy() for destroying it. The macro
574SK_RUN_FILTER(filter, ctx) transparently invokes the right BPF function to
575run the filter. 'filter' is a pointer to struct sk_filter that we got from
576sk_unattached_filter_create(), and 'ctx' the given context (e.g. skb pointer).
577All constraints and restrictions from sk_chk_filter() apply before a
578conversion to the new layout is being done behind the scenes!
579
580Currently, for JITing, the user BPF format is being used and current BPF JIT
581compilers reused whenever possible. In other words, we do not (yet!) perform
582a JIT compilation in the new layout, however, future work will successively
583migrate traditional JIT compilers into the new instruction format as well, so
584that they will profit from the very same benefits. Thus, when speaking about
585JIT in the following, a JIT compiler (TBD) for the new instruction format is
586meant in this context.
587
588Some core changes of the new internal format:
589
590- Number of registers increase from 2 to 10:
591
592 The old format had two registers A and X, and a hidden frame pointer. The
593 new layout extends this to be 10 internal registers and a read-only frame
594 pointer. Since 64-bit CPUs are passing arguments to functions via registers
595 the number of args from BPF program to in-kernel function is restricted
596 to 5 and one register is used to accept return value from an in-kernel
597 function. Natively, x86_64 passes first 6 arguments in registers, aarch64/
598 sparcv9/mips64 have 7 - 8 registers for arguments; x86_64 has 6 callee saved
599 registers, and aarch64/sparcv9/mips64 have 11 or more callee saved registers.
600
601 Therefore, BPF calling convention is defined as:
602
603 * R0 - return value from in-kernel function
604 * R1 - R5 - arguments from BPF program to in-kernel function
605 * R6 - R9 - callee saved registers that in-kernel function will preserve
606 * R10 - read-only frame pointer to access stack
607
608 Thus, all BPF registers map one to one to HW registers on x86_64, aarch64,
609 etc, and BPF calling convention maps directly to ABIs used by the kernel on
610 64-bit architectures.
611
612 On 32-bit architectures JIT may map programs that use only 32-bit arithmetic
613 and may let more complex programs to be interpreted.
614
615 R0 - R5 are scratch registers and BPF program needs spill/fill them if
616 necessary across calls. Note that there is only one BPF program (== one BPF
617 main routine) and it cannot call other BPF functions, it can only call
618 predefined in-kernel functions, though.
619
620- Register width increases from 32-bit to 64-bit:
621
622 Still, the semantics of the original 32-bit ALU operations are preserved
623 via 32-bit subregisters. All BPF registers are 64-bit with 32-bit lower
624 subregisters that zero-extend into 64-bit if they are being written to.
625 That behavior maps directly to x86_64 and arm64 subregister definition, but
626 makes other JITs more difficult.
627
628 32-bit architectures run 64-bit internal BPF programs via interpreter.
629 Their JITs may convert BPF programs that only use 32-bit subregisters into
630 native instruction set and let the rest being interpreted.
631
632 Operation is 64-bit, because on 64-bit architectures, pointers are also
633 64-bit wide, and we want to pass 64-bit values in/out of kernel functions,
634 so 32-bit BPF registers would otherwise require to define register-pair
635 ABI, thus, there won't be able to use a direct BPF register to HW register
636 mapping and JIT would need to do combine/split/move operations for every
637 register in and out of the function, which is complex, bug prone and slow.
638 Another reason is the use of atomic 64-bit counters.
639
640- Conditional jt/jf targets replaced with jt/fall-through:
641
642 While the original design has constructs such as "if (cond) jump_true;
643 else jump_false;", they are being replaced into alternative constructs like
644 "if (cond) jump_true; /* else fall-through */".
645
646- Introduces bpf_call insn and register passing convention for zero overhead
647 calls from/to other kernel functions:
648
649 After a kernel function call, R1 - R5 are reset to unreadable and R0 has a
650 return type of the function. Since R6 - R9 are callee saved, their state is
651 preserved across the call.
652
653Also in the new design, BPF is limited to 4096 insns, which means that any
654program will terminate quickly and will only call a fixed number of kernel
655functions. Original BPF and the new format are two operand instructions,
656which helps to do one-to-one mapping between BPF insn and x86 insn during JIT.
657
658The input context pointer for invoking the interpreter function is generic,
659its content is defined by a specific use case. For seccomp register R1 points
660to seccomp_data, for converted BPF filters R1 points to a skb.
661
662A program, that is translated internally consists of the following elements:
663
664 op:16, jt:8, jf:8, k:32 ==> op:8, a_reg:4, x_reg:4, off:16, imm:32
665
666Just like the original BPF, the new format runs within a controlled environment,
667is deterministic and the kernel can easily prove that. The safety of the program
668can be determined in two steps: first step does depth-first-search to disallow
669loops and other CFG validation; second step starts from the first insn and
670descends all possible paths. It simulates execution of every insn and observes
671the state change of registers and stack.
672
549Misc 673Misc
550---- 674----
551 675
@@ -561,3 +685,4 @@ the underlying architecture.
561 685
562Jay Schulist <jschlst@samba.org> 686Jay Schulist <jschlst@samba.org>
563Daniel Borkmann <dborkman@redhat.com> 687Daniel Borkmann <dborkman@redhat.com>
688Alexei Starovoitov <ast@plumgrid.com>
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 7ddb9c83cdfc..6f879c319a9d 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -925,6 +925,7 @@ void bpf_jit_compile(struct sk_filter *fp)
925 bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); 925 bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);
926 926
927 fp->bpf_func = (void *)ctx.target; 927 fp->bpf_func = (void *)ctx.target;
928 fp->jited = 1;
928out: 929out:
929 kfree(ctx.offsets); 930 kfree(ctx.offsets);
930 return; 931 return;
@@ -932,7 +933,7 @@ out:
932 933
933void bpf_jit_free(struct sk_filter *fp) 934void bpf_jit_free(struct sk_filter *fp)
934{ 935{
935 if (fp->bpf_func != sk_run_filter) 936 if (fp->jited)
936 module_free(NULL, fp->bpf_func); 937 module_free(NULL, fp->bpf_func);
937 kfree(fp); 938 kfree(fp);
938} 939}
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 4afad6c17d50..808ce1cae21a 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -689,6 +689,7 @@ void bpf_jit_compile(struct sk_filter *fp)
689 ((u64 *)image)[0] = (u64)code_base; 689 ((u64 *)image)[0] = (u64)code_base;
690 ((u64 *)image)[1] = local_paca->kernel_toc; 690 ((u64 *)image)[1] = local_paca->kernel_toc;
691 fp->bpf_func = (void *)image; 691 fp->bpf_func = (void *)image;
692 fp->jited = 1;
692 } 693 }
693out: 694out:
694 kfree(addrs); 695 kfree(addrs);
@@ -697,7 +698,7 @@ out:
697 698
698void bpf_jit_free(struct sk_filter *fp) 699void bpf_jit_free(struct sk_filter *fp)
699{ 700{
700 if (fp->bpf_func != sk_run_filter) 701 if (fp->jited)
701 module_free(NULL, fp->bpf_func); 702 module_free(NULL, fp->bpf_func);
702 kfree(fp); 703 kfree(fp);
703} 704}
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 153f8f2cfd56..9c36dc398f90 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -877,6 +877,7 @@ void bpf_jit_compile(struct sk_filter *fp)
877 if (jit.start) { 877 if (jit.start) {
878 set_memory_ro((unsigned long)header, header->pages); 878 set_memory_ro((unsigned long)header, header->pages);
879 fp->bpf_func = (void *) jit.start; 879 fp->bpf_func = (void *) jit.start;
880 fp->jited = 1;
880 } 881 }
881out: 882out:
882 kfree(addrs); 883 kfree(addrs);
@@ -887,10 +888,12 @@ void bpf_jit_free(struct sk_filter *fp)
887 unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; 888 unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
888 struct bpf_binary_header *header = (void *)addr; 889 struct bpf_binary_header *header = (void *)addr;
889 890
890 if (fp->bpf_func == sk_run_filter) 891 if (!fp->jited)
891 goto free_filter; 892 goto free_filter;
893
892 set_memory_rw(addr, header->pages); 894 set_memory_rw(addr, header->pages);
893 module_free(NULL, header); 895 module_free(NULL, header);
896
894free_filter: 897free_filter:
895 kfree(fp); 898 kfree(fp);
896} 899}
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index d96d2a7c78ee..a82c6b2a9780 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -809,6 +809,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf];
809 if (image) { 809 if (image) {
810 bpf_flush_icache(image, image + proglen); 810 bpf_flush_icache(image, image + proglen);
811 fp->bpf_func = (void *)image; 811 fp->bpf_func = (void *)image;
812 fp->jited = 1;
812 } 813 }
813out: 814out:
814 kfree(addrs); 815 kfree(addrs);
@@ -817,7 +818,7 @@ out:
817 818
818void bpf_jit_free(struct sk_filter *fp) 819void bpf_jit_free(struct sk_filter *fp)
819{ 820{
820 if (fp->bpf_func != sk_run_filter) 821 if (fp->jited)
821 module_free(NULL, fp->bpf_func); 822 module_free(NULL, fp->bpf_func);
822 kfree(fp); 823 kfree(fp);
823} 824}
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 293c57b74edc..dc017735bb91 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -772,6 +772,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
772 bpf_flush_icache(header, image + proglen); 772 bpf_flush_icache(header, image + proglen);
773 set_memory_ro((unsigned long)header, header->pages); 773 set_memory_ro((unsigned long)header, header->pages);
774 fp->bpf_func = (void *)image; 774 fp->bpf_func = (void *)image;
775 fp->jited = 1;
775 } 776 }
776out: 777out:
777 kfree(addrs); 778 kfree(addrs);
@@ -791,7 +792,7 @@ static void bpf_jit_free_deferred(struct work_struct *work)
791 792
792void bpf_jit_free(struct sk_filter *fp) 793void bpf_jit_free(struct sk_filter *fp)
793{ 794{
794 if (fp->bpf_func != sk_run_filter) { 795 if (fp->jited) {
795 INIT_WORK(&fp->work, bpf_jit_free_deferred); 796 INIT_WORK(&fp->work, bpf_jit_free_deferred);
796 schedule_work(&fp->work); 797 schedule_work(&fp->work);
797 } else { 798 } else {
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index 38ceac5053a0..a5da511e3c9a 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -378,10 +378,15 @@ isdn_ppp_release(int min, struct file *file)
378 is->slcomp = NULL; 378 is->slcomp = NULL;
379#endif 379#endif
380#ifdef CONFIG_IPPP_FILTER 380#ifdef CONFIG_IPPP_FILTER
381 kfree(is->pass_filter); 381 if (is->pass_filter) {
382 is->pass_filter = NULL; 382 sk_unattached_filter_destroy(is->pass_filter);
383 kfree(is->active_filter); 383 is->pass_filter = NULL;
384 is->active_filter = NULL; 384 }
385
386 if (is->active_filter) {
387 sk_unattached_filter_destroy(is->active_filter);
388 is->active_filter = NULL;
389 }
385#endif 390#endif
386 391
387/* TODO: if this was the previous master: link the stuff to the new master */ 392/* TODO: if this was the previous master: link the stuff to the new master */
@@ -629,25 +634,41 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg)
629#ifdef CONFIG_IPPP_FILTER 634#ifdef CONFIG_IPPP_FILTER
630 case PPPIOCSPASS: 635 case PPPIOCSPASS:
631 { 636 {
637 struct sock_fprog fprog;
632 struct sock_filter *code; 638 struct sock_filter *code;
633 int len = get_filter(argp, &code); 639 int err, len = get_filter(argp, &code);
640
634 if (len < 0) 641 if (len < 0)
635 return len; 642 return len;
636 kfree(is->pass_filter); 643
637 is->pass_filter = code; 644 fprog.len = len;
638 is->pass_len = len; 645 fprog.filter = code;
639 break; 646
647 if (is->pass_filter)
648 sk_unattached_filter_destroy(is->pass_filter);
649 err = sk_unattached_filter_create(&is->pass_filter, &fprog);
650 kfree(code);
651
652 return err;
640 } 653 }
641 case PPPIOCSACTIVE: 654 case PPPIOCSACTIVE:
642 { 655 {
656 struct sock_fprog fprog;
643 struct sock_filter *code; 657 struct sock_filter *code;
644 int len = get_filter(argp, &code); 658 int err, len = get_filter(argp, &code);
659
645 if (len < 0) 660 if (len < 0)
646 return len; 661 return len;
647 kfree(is->active_filter); 662
648 is->active_filter = code; 663 fprog.len = len;
649 is->active_len = len; 664 fprog.filter = code;
650 break; 665
666 if (is->active_filter)
667 sk_unattached_filter_destroy(is->active_filter);
668 err = sk_unattached_filter_create(&is->active_filter, &fprog);
669 kfree(code);
670
671 return err;
651 } 672 }
652#endif /* CONFIG_IPPP_FILTER */ 673#endif /* CONFIG_IPPP_FILTER */
653 default: 674 default:
@@ -1147,14 +1168,14 @@ isdn_ppp_push_higher(isdn_net_dev *net_dev, isdn_net_local *lp, struct sk_buff *
1147 } 1168 }
1148 1169
1149 if (is->pass_filter 1170 if (is->pass_filter
1150 && sk_run_filter(skb, is->pass_filter) == 0) { 1171 && SK_RUN_FILTER(is->pass_filter, skb) == 0) {
1151 if (is->debug & 0x2) 1172 if (is->debug & 0x2)
1152 printk(KERN_DEBUG "IPPP: inbound frame filtered.\n"); 1173 printk(KERN_DEBUG "IPPP: inbound frame filtered.\n");
1153 kfree_skb(skb); 1174 kfree_skb(skb);
1154 return; 1175 return;
1155 } 1176 }
1156 if (!(is->active_filter 1177 if (!(is->active_filter
1157 && sk_run_filter(skb, is->active_filter) == 0)) { 1178 && SK_RUN_FILTER(is->active_filter, skb) == 0)) {
1158 if (is->debug & 0x2) 1179 if (is->debug & 0x2)
1159 printk(KERN_DEBUG "IPPP: link-active filter: resetting huptimer.\n"); 1180 printk(KERN_DEBUG "IPPP: link-active filter: resetting huptimer.\n");
1160 lp->huptimer = 0; 1181 lp->huptimer = 0;
@@ -1293,14 +1314,14 @@ isdn_ppp_xmit(struct sk_buff *skb, struct net_device *netdev)
1293 } 1314 }
1294 1315
1295 if (ipt->pass_filter 1316 if (ipt->pass_filter
1296 && sk_run_filter(skb, ipt->pass_filter) == 0) { 1317 && SK_RUN_FILTER(ipt->pass_filter, skb) == 0) {
1297 if (ipt->debug & 0x4) 1318 if (ipt->debug & 0x4)
1298 printk(KERN_DEBUG "IPPP: outbound frame filtered.\n"); 1319 printk(KERN_DEBUG "IPPP: outbound frame filtered.\n");
1299 kfree_skb(skb); 1320 kfree_skb(skb);
1300 goto unlock; 1321 goto unlock;
1301 } 1322 }
1302 if (!(ipt->active_filter 1323 if (!(ipt->active_filter
1303 && sk_run_filter(skb, ipt->active_filter) == 0)) { 1324 && SK_RUN_FILTER(ipt->active_filter, skb) == 0)) {
1304 if (ipt->debug & 0x4) 1325 if (ipt->debug & 0x4)
1305 printk(KERN_DEBUG "IPPP: link-active filter: resetting huptimer.\n"); 1326 printk(KERN_DEBUG "IPPP: link-active filter: resetting huptimer.\n");
1306 lp->huptimer = 0; 1327 lp->huptimer = 0;
@@ -1490,9 +1511,9 @@ int isdn_ppp_autodial_filter(struct sk_buff *skb, isdn_net_local *lp)
1490 } 1511 }
1491 1512
1492 drop |= is->pass_filter 1513 drop |= is->pass_filter
1493 && sk_run_filter(skb, is->pass_filter) == 0; 1514 && SK_RUN_FILTER(is->pass_filter, skb) == 0;
1494 drop |= is->active_filter 1515 drop |= is->active_filter
1495 && sk_run_filter(skb, is->active_filter) == 0; 1516 && SK_RUN_FILTER(is->active_filter, skb) == 0;
1496 1517
1497 skb_push(skb, IPPP_MAX_HEADER - 4); 1518 skb_push(skb, IPPP_MAX_HEADER - 4);
1498 return drop; 1519 return drop;
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
index 464e91058c81..73e66838cfef 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
@@ -120,10 +120,6 @@ static void pch_gbe_mdio_write(struct net_device *netdev, int addr, int reg,
120 int data); 120 int data);
121static void pch_gbe_set_multi(struct net_device *netdev); 121static void pch_gbe_set_multi(struct net_device *netdev);
122 122
123static struct sock_filter ptp_filter[] = {
124 PTP_FILTER
125};
126
127static int pch_ptp_match(struct sk_buff *skb, u16 uid_hi, u32 uid_lo, u16 seqid) 123static int pch_ptp_match(struct sk_buff *skb, u16 uid_hi, u32 uid_lo, u16 seqid)
128{ 124{
129 u8 *data = skb->data; 125 u8 *data = skb->data;
@@ -131,7 +127,7 @@ static int pch_ptp_match(struct sk_buff *skb, u16 uid_hi, u32 uid_lo, u16 seqid)
131 u16 *hi, *id; 127 u16 *hi, *id;
132 u32 lo; 128 u32 lo;
133 129
134 if (sk_run_filter(skb, ptp_filter) == PTP_CLASS_NONE) 130 if (ptp_classify_raw(skb) == PTP_CLASS_NONE)
135 return 0; 131 return 0;
136 132
137 offset = ETH_HLEN + IPV4_HLEN(data) + UDP_HLEN; 133 offset = ETH_HLEN + IPV4_HLEN(data) + UDP_HLEN;
@@ -2635,11 +2631,6 @@ static int pch_gbe_probe(struct pci_dev *pdev,
2635 2631
2636 adapter->ptp_pdev = pci_get_bus_and_slot(adapter->pdev->bus->number, 2632 adapter->ptp_pdev = pci_get_bus_and_slot(adapter->pdev->bus->number,
2637 PCI_DEVFN(12, 4)); 2633 PCI_DEVFN(12, 4));
2638 if (ptp_filter_init(ptp_filter, ARRAY_SIZE(ptp_filter))) {
2639 dev_err(&pdev->dev, "Bad ptp filter\n");
2640 ret = -EINVAL;
2641 goto err_free_netdev;
2642 }
2643 2634
2644 netdev->netdev_ops = &pch_gbe_netdev_ops; 2635 netdev->netdev_ops = &pch_gbe_netdev_ops;
2645 netdev->watchdog_timeo = PCH_GBE_WATCHDOG_PERIOD; 2636 netdev->watchdog_timeo = PCH_GBE_WATCHDOG_PERIOD;
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 372cb192c5aa..a3bbf59eaafd 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -31,10 +31,6 @@
31 31
32#ifdef CONFIG_TI_CPTS 32#ifdef CONFIG_TI_CPTS
33 33
34static struct sock_filter ptp_filter[] = {
35 PTP_FILTER
36};
37
38#define cpts_read32(c, r) __raw_readl(&c->reg->r) 34#define cpts_read32(c, r) __raw_readl(&c->reg->r)
39#define cpts_write32(c, v, r) __raw_writel(v, &c->reg->r) 35#define cpts_write32(c, v, r) __raw_writel(v, &c->reg->r)
40 36
@@ -301,7 +297,7 @@ static u64 cpts_find_ts(struct cpts *cpts, struct sk_buff *skb, int ev_type)
301 u64 ns = 0; 297 u64 ns = 0;
302 struct cpts_event *event; 298 struct cpts_event *event;
303 struct list_head *this, *next; 299 struct list_head *this, *next;
304 unsigned int class = sk_run_filter(skb, ptp_filter); 300 unsigned int class = ptp_classify_raw(skb);
305 unsigned long flags; 301 unsigned long flags;
306 u16 seqid; 302 u16 seqid;
307 u8 mtype; 303 u8 mtype;
@@ -372,10 +368,6 @@ int cpts_register(struct device *dev, struct cpts *cpts,
372 int err, i; 368 int err, i;
373 unsigned long flags; 369 unsigned long flags;
374 370
375 if (ptp_filter_init(ptp_filter, ARRAY_SIZE(ptp_filter))) {
376 pr_err("cpts: bad ptp filter\n");
377 return -EINVAL;
378 }
379 cpts->info = cpts_info; 371 cpts->info = cpts_info;
380 cpts->clock = ptp_clock_register(&cpts->info, dev); 372 cpts->clock = ptp_clock_register(&cpts->info, dev);
381 if (IS_ERR(cpts->clock)) { 373 if (IS_ERR(cpts->clock)) {
diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index 25283f17d82f..f7e0f0f7c2e2 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c
@@ -256,10 +256,6 @@ static int ports_open;
256static struct port *npe_port_tab[MAX_NPES]; 256static struct port *npe_port_tab[MAX_NPES];
257static struct dma_pool *dma_pool; 257static struct dma_pool *dma_pool;
258 258
259static struct sock_filter ptp_filter[] = {
260 PTP_FILTER
261};
262
263static int ixp_ptp_match(struct sk_buff *skb, u16 uid_hi, u32 uid_lo, u16 seqid) 259static int ixp_ptp_match(struct sk_buff *skb, u16 uid_hi, u32 uid_lo, u16 seqid)
264{ 260{
265 u8 *data = skb->data; 261 u8 *data = skb->data;
@@ -267,7 +263,7 @@ static int ixp_ptp_match(struct sk_buff *skb, u16 uid_hi, u32 uid_lo, u16 seqid)
267 u16 *hi, *id; 263 u16 *hi, *id;
268 u32 lo; 264 u32 lo;
269 265
270 if (sk_run_filter(skb, ptp_filter) != PTP_CLASS_V1_IPV4) 266 if (ptp_classify_raw(skb) != PTP_CLASS_V1_IPV4)
271 return 0; 267 return 0;
272 268
273 offset = ETH_HLEN + IPV4_HLEN(data) + UDP_HLEN; 269 offset = ETH_HLEN + IPV4_HLEN(data) + UDP_HLEN;
@@ -1413,11 +1409,6 @@ static int eth_init_one(struct platform_device *pdev)
1413 char phy_id[MII_BUS_ID_SIZE + 3]; 1409 char phy_id[MII_BUS_ID_SIZE + 3];
1414 int err; 1410 int err;
1415 1411
1416 if (ptp_filter_init(ptp_filter, ARRAY_SIZE(ptp_filter))) {
1417 pr_err("ixp4xx_eth: bad ptp filter\n");
1418 return -EINVAL;
1419 }
1420
1421 if (!(dev = alloc_etherdev(sizeof(struct port)))) 1412 if (!(dev = alloc_etherdev(sizeof(struct port))))
1422 return -ENOMEM; 1413 return -ENOMEM;
1423 1414
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 72ff14b811c6..e3923ebb693f 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -143,9 +143,8 @@ struct ppp {
143 struct sk_buff_head mrq; /* MP: receive reconstruction queue */ 143 struct sk_buff_head mrq; /* MP: receive reconstruction queue */
144#endif /* CONFIG_PPP_MULTILINK */ 144#endif /* CONFIG_PPP_MULTILINK */
145#ifdef CONFIG_PPP_FILTER 145#ifdef CONFIG_PPP_FILTER
146 struct sock_filter *pass_filter; /* filter for packets to pass */ 146 struct sk_filter *pass_filter; /* filter for packets to pass */
147 struct sock_filter *active_filter;/* filter for pkts to reset idle */ 147 struct sk_filter *active_filter;/* filter for pkts to reset idle */
148 unsigned pass_len, active_len;
149#endif /* CONFIG_PPP_FILTER */ 148#endif /* CONFIG_PPP_FILTER */
150 struct net *ppp_net; /* the net we belong to */ 149 struct net *ppp_net; /* the net we belong to */
151 struct ppp_link_stats stats64; /* 64 bit network stats */ 150 struct ppp_link_stats stats64; /* 64 bit network stats */
@@ -755,28 +754,42 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
755 case PPPIOCSPASS: 754 case PPPIOCSPASS:
756 { 755 {
757 struct sock_filter *code; 756 struct sock_filter *code;
757
758 err = get_filter(argp, &code); 758 err = get_filter(argp, &code);
759 if (err >= 0) { 759 if (err >= 0) {
760 struct sock_fprog fprog = {
761 .len = err,
762 .filter = code,
763 };
764
760 ppp_lock(ppp); 765 ppp_lock(ppp);
761 kfree(ppp->pass_filter); 766 if (ppp->pass_filter)
762 ppp->pass_filter = code; 767 sk_unattached_filter_destroy(ppp->pass_filter);
763 ppp->pass_len = err; 768 err = sk_unattached_filter_create(&ppp->pass_filter,
769 &fprog);
770 kfree(code);
764 ppp_unlock(ppp); 771 ppp_unlock(ppp);
765 err = 0;
766 } 772 }
767 break; 773 break;
768 } 774 }
769 case PPPIOCSACTIVE: 775 case PPPIOCSACTIVE:
770 { 776 {
771 struct sock_filter *code; 777 struct sock_filter *code;
778
772 err = get_filter(argp, &code); 779 err = get_filter(argp, &code);
773 if (err >= 0) { 780 if (err >= 0) {
781 struct sock_fprog fprog = {
782 .len = err,
783 .filter = code,
784 };
785
774 ppp_lock(ppp); 786 ppp_lock(ppp);
775 kfree(ppp->active_filter); 787 if (ppp->active_filter)
776 ppp->active_filter = code; 788 sk_unattached_filter_destroy(ppp->active_filter);
777 ppp->active_len = err; 789 err = sk_unattached_filter_create(&ppp->active_filter,
790 &fprog);
791 kfree(code);
778 ppp_unlock(ppp); 792 ppp_unlock(ppp);
779 err = 0;
780 } 793 }
781 break; 794 break;
782 } 795 }
@@ -1184,7 +1197,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
1184 a four-byte PPP header on each packet */ 1197 a four-byte PPP header on each packet */
1185 *skb_push(skb, 2) = 1; 1198 *skb_push(skb, 2) = 1;
1186 if (ppp->pass_filter && 1199 if (ppp->pass_filter &&
1187 sk_run_filter(skb, ppp->pass_filter) == 0) { 1200 SK_RUN_FILTER(ppp->pass_filter, skb) == 0) {
1188 if (ppp->debug & 1) 1201 if (ppp->debug & 1)
1189 netdev_printk(KERN_DEBUG, ppp->dev, 1202 netdev_printk(KERN_DEBUG, ppp->dev,
1190 "PPP: outbound frame " 1203 "PPP: outbound frame "
@@ -1194,7 +1207,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
1194 } 1207 }
1195 /* if this packet passes the active filter, record the time */ 1208 /* if this packet passes the active filter, record the time */
1196 if (!(ppp->active_filter && 1209 if (!(ppp->active_filter &&
1197 sk_run_filter(skb, ppp->active_filter) == 0)) 1210 SK_RUN_FILTER(ppp->active_filter, skb) == 0))
1198 ppp->last_xmit = jiffies; 1211 ppp->last_xmit = jiffies;
1199 skb_pull(skb, 2); 1212 skb_pull(skb, 2);
1200#else 1213#else
@@ -1818,7 +1831,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
1818 1831
1819 *skb_push(skb, 2) = 0; 1832 *skb_push(skb, 2) = 0;
1820 if (ppp->pass_filter && 1833 if (ppp->pass_filter &&
1821 sk_run_filter(skb, ppp->pass_filter) == 0) { 1834 SK_RUN_FILTER(ppp->pass_filter, skb) == 0) {
1822 if (ppp->debug & 1) 1835 if (ppp->debug & 1)
1823 netdev_printk(KERN_DEBUG, ppp->dev, 1836 netdev_printk(KERN_DEBUG, ppp->dev,
1824 "PPP: inbound frame " 1837 "PPP: inbound frame "
@@ -1827,7 +1840,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
1827 return; 1840 return;
1828 } 1841 }
1829 if (!(ppp->active_filter && 1842 if (!(ppp->active_filter &&
1830 sk_run_filter(skb, ppp->active_filter) == 0)) 1843 SK_RUN_FILTER(ppp->active_filter, skb) == 0))
1831 ppp->last_recv = jiffies; 1844 ppp->last_recv = jiffies;
1832 __skb_pull(skb, 2); 1845 __skb_pull(skb, 2);
1833 } else 1846 } else
@@ -2672,6 +2685,10 @@ ppp_create_interface(struct net *net, int unit, int *retp)
2672 ppp->minseq = -1; 2685 ppp->minseq = -1;
2673 skb_queue_head_init(&ppp->mrq); 2686 skb_queue_head_init(&ppp->mrq);
2674#endif /* CONFIG_PPP_MULTILINK */ 2687#endif /* CONFIG_PPP_MULTILINK */
2688#ifdef CONFIG_PPP_FILTER
2689 ppp->pass_filter = NULL;
2690 ppp->active_filter = NULL;
2691#endif /* CONFIG_PPP_FILTER */
2675 2692
2676 /* 2693 /*
2677 * drum roll: don't forget to set 2694 * drum roll: don't forget to set
@@ -2802,10 +2819,15 @@ static void ppp_destroy_interface(struct ppp *ppp)
2802 skb_queue_purge(&ppp->mrq); 2819 skb_queue_purge(&ppp->mrq);
2803#endif /* CONFIG_PPP_MULTILINK */ 2820#endif /* CONFIG_PPP_MULTILINK */
2804#ifdef CONFIG_PPP_FILTER 2821#ifdef CONFIG_PPP_FILTER
2805 kfree(ppp->pass_filter); 2822 if (ppp->pass_filter) {
2806 ppp->pass_filter = NULL; 2823 sk_unattached_filter_destroy(ppp->pass_filter);
2807 kfree(ppp->active_filter); 2824 ppp->pass_filter = NULL;
2808 ppp->active_filter = NULL; 2825 }
2826
2827 if (ppp->active_filter) {
2828 sk_unattached_filter_destroy(ppp->active_filter);
2829 ppp->active_filter = NULL;
2830 }
2809#endif /* CONFIG_PPP_FILTER */ 2831#endif /* CONFIG_PPP_FILTER */
2810 2832
2811 kfree_skb(ppp->xmit_pending); 2833 kfree_skb(ppp->xmit_pending);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index e568c8ef896b..262dcbb75ffe 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -9,28 +9,81 @@
9#include <linux/workqueue.h> 9#include <linux/workqueue.h>
10#include <uapi/linux/filter.h> 10#include <uapi/linux/filter.h>
11 11
12#ifdef CONFIG_COMPAT 12/* Internally used and optimized filter representation with extended
13/* 13 * instruction set based on top of classic BPF.
14 * A struct sock_filter is architecture independent.
15 */ 14 */
15
16/* instruction classes */
17#define BPF_ALU64 0x07 /* alu mode in double word width */
18
19/* ld/ldx fields */
20#define BPF_DW 0x18 /* double word */
21#define BPF_XADD 0xc0 /* exclusive add */
22
23/* alu/jmp fields */
24#define BPF_MOV 0xb0 /* mov reg to reg */
25#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */
26
27/* change endianness of a register */
28#define BPF_END 0xd0 /* flags for endianness conversion: */
29#define BPF_TO_LE 0x00 /* convert to little-endian */
30#define BPF_TO_BE 0x08 /* convert to big-endian */
31#define BPF_FROM_LE BPF_TO_LE
32#define BPF_FROM_BE BPF_TO_BE
33
34#define BPF_JNE 0x50 /* jump != */
35#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */
36#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
37#define BPF_CALL 0x80 /* function call */
38#define BPF_EXIT 0x90 /* function return */
39
40/* BPF has 10 general purpose 64-bit registers and stack frame. */
41#define MAX_BPF_REG 11
42
43/* BPF program can access up to 512 bytes of stack space. */
44#define MAX_BPF_STACK 512
45
46/* Arg1, context and stack frame pointer register positions. */
47#define ARG1_REG 1
48#define CTX_REG 6
49#define FP_REG 10
50
51struct sock_filter_int {
52 __u8 code; /* opcode */
53 __u8 a_reg:4; /* dest register */
54 __u8 x_reg:4; /* source register */
55 __s16 off; /* signed offset */
56 __s32 imm; /* signed immediate constant */
57};
58
59#ifdef CONFIG_COMPAT
60/* A struct sock_filter is architecture independent. */
16struct compat_sock_fprog { 61struct compat_sock_fprog {
17 u16 len; 62 u16 len;
18 compat_uptr_t filter; /* struct sock_filter * */ 63 compat_uptr_t filter; /* struct sock_filter * */
19}; 64};
20#endif 65#endif
21 66
67struct sock_fprog_kern {
68 u16 len;
69 struct sock_filter *filter;
70};
71
22struct sk_buff; 72struct sk_buff;
23struct sock; 73struct sock;
74struct seccomp_data;
24 75
25struct sk_filter 76struct sk_filter {
26{
27 atomic_t refcnt; 77 atomic_t refcnt;
28 unsigned int len; /* Number of filter blocks */ 78 u32 jited:1, /* Is our filter JIT'ed? */
79 len:31; /* Number of filter blocks */
80 struct sock_fprog_kern *orig_prog; /* Original BPF program */
29 struct rcu_head rcu; 81 struct rcu_head rcu;
30 unsigned int (*bpf_func)(const struct sk_buff *skb, 82 unsigned int (*bpf_func)(const struct sk_buff *skb,
31 const struct sock_filter *filter); 83 const struct sock_filter_int *filter);
32 union { 84 union {
33 struct sock_filter insns[0]; 85 struct sock_filter insns[0];
86 struct sock_filter_int insnsi[0];
34 struct work_struct work; 87 struct work_struct work;
35 }; 88 };
36}; 89};
@@ -41,25 +94,44 @@ static inline unsigned int sk_filter_size(unsigned int proglen)
41 offsetof(struct sk_filter, insns[proglen])); 94 offsetof(struct sk_filter, insns[proglen]));
42} 95}
43 96
44extern int sk_filter(struct sock *sk, struct sk_buff *skb); 97#define sk_filter_proglen(fprog) \
45extern unsigned int sk_run_filter(const struct sk_buff *skb, 98 (fprog->len * sizeof(fprog->filter[0]))
46 const struct sock_filter *filter); 99
47extern int sk_unattached_filter_create(struct sk_filter **pfp, 100#define SK_RUN_FILTER(filter, ctx) \
48 struct sock_fprog *fprog); 101 (*filter->bpf_func)(ctx, filter->insnsi)
49extern void sk_unattached_filter_destroy(struct sk_filter *fp); 102
50extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); 103int sk_filter(struct sock *sk, struct sk_buff *skb);
51extern int sk_detach_filter(struct sock *sk); 104
52extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen); 105u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
53extern int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned len); 106 const struct sock_filter_int *insni);
54extern void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to); 107u32 sk_run_filter_int_skb(const struct sk_buff *ctx,
108 const struct sock_filter_int *insni);
109
110int sk_convert_filter(struct sock_filter *prog, int len,
111 struct sock_filter_int *new_prog, int *new_len);
112
113int sk_unattached_filter_create(struct sk_filter **pfp,
114 struct sock_fprog *fprog);
115void sk_unattached_filter_destroy(struct sk_filter *fp);
116
117int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
118int sk_detach_filter(struct sock *sk);
119
120int sk_chk_filter(struct sock_filter *filter, unsigned int flen);
121int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
122 unsigned int len);
123void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to);
124
125void sk_filter_charge(struct sock *sk, struct sk_filter *fp);
126void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
55 127
56#ifdef CONFIG_BPF_JIT 128#ifdef CONFIG_BPF_JIT
57#include <stdarg.h> 129#include <stdarg.h>
58#include <linux/linkage.h> 130#include <linux/linkage.h>
59#include <linux/printk.h> 131#include <linux/printk.h>
60 132
61extern void bpf_jit_compile(struct sk_filter *fp); 133void bpf_jit_compile(struct sk_filter *fp);
62extern void bpf_jit_free(struct sk_filter *fp); 134void bpf_jit_free(struct sk_filter *fp);
63 135
64static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, 136static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
65 u32 pass, void *image) 137 u32 pass, void *image)
@@ -70,7 +142,6 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
70 print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET, 142 print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET,
71 16, 1, image, proglen, false); 143 16, 1, image, proglen, false);
72} 144}
73#define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns)
74#else 145#else
75#include <linux/slab.h> 146#include <linux/slab.h>
76static inline void bpf_jit_compile(struct sk_filter *fp) 147static inline void bpf_jit_compile(struct sk_filter *fp)
@@ -80,7 +151,6 @@ static inline void bpf_jit_free(struct sk_filter *fp)
80{ 151{
81 kfree(fp); 152 kfree(fp);
82} 153}
83#define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns)
84#endif 154#endif
85 155
86static inline int bpf_tell_extensions(void) 156static inline int bpf_tell_extensions(void)
diff --git a/include/linux/isdn_ppp.h b/include/linux/isdn_ppp.h
index d5f62bc5f4be..8e10f57f109f 100644
--- a/include/linux/isdn_ppp.h
+++ b/include/linux/isdn_ppp.h
@@ -180,9 +180,8 @@ struct ippp_struct {
180 struct slcompress *slcomp; 180 struct slcompress *slcomp;
181#endif 181#endif
182#ifdef CONFIG_IPPP_FILTER 182#ifdef CONFIG_IPPP_FILTER
183 struct sock_filter *pass_filter; /* filter for packets to pass */ 183 struct sk_filter *pass_filter; /* filter for packets to pass */
184 struct sock_filter *active_filter; /* filter for pkts to reset idle */ 184 struct sk_filter *active_filter; /* filter for pkts to reset idle */
185 unsigned pass_len, active_len;
186#endif 185#endif
187 unsigned long debug; 186 unsigned long debug;
188 struct isdn_ppp_compressor *compressor,*decompressor; 187 struct isdn_ppp_compressor *compressor,*decompressor;
diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h
index 1dc420ba213a..6d3b0a2ef9ce 100644
--- a/include/linux/ptp_classify.h
+++ b/include/linux/ptp_classify.h
@@ -27,11 +27,7 @@
27#include <linux/if_vlan.h> 27#include <linux/if_vlan.h>
28#include <linux/ip.h> 28#include <linux/ip.h>
29#include <linux/filter.h> 29#include <linux/filter.h>
30#ifdef __KERNEL__
31#include <linux/in.h> 30#include <linux/in.h>
32#else
33#include <netinet/in.h>
34#endif
35 31
36#define PTP_CLASS_NONE 0x00 /* not a PTP event message */ 32#define PTP_CLASS_NONE 0x00 /* not a PTP event message */
37#define PTP_CLASS_V1 0x01 /* protocol version 1 */ 33#define PTP_CLASS_V1 0x01 /* protocol version 1 */
@@ -84,14 +80,6 @@
84#define OP_RETA (BPF_RET | BPF_A) 80#define OP_RETA (BPF_RET | BPF_A)
85#define OP_RETK (BPF_RET | BPF_K) 81#define OP_RETK (BPF_RET | BPF_K)
86 82
87static inline int ptp_filter_init(struct sock_filter *f, int len)
88{
89 if (OP_LDH == f[0].code)
90 return sk_chk_filter(f, len);
91 else
92 return 0;
93}
94
95#define PTP_FILTER \ 83#define PTP_FILTER \
96 {OP_LDH, 0, 0, OFF_ETYPE }, /* */ \ 84 {OP_LDH, 0, 0, OFF_ETYPE }, /* */ \
97 {OP_JEQ, 0, 12, ETH_P_IP }, /* f goto L20 */ \ 85 {OP_JEQ, 0, 12, ETH_P_IP }, /* f goto L20 */ \
@@ -137,4 +125,6 @@ static inline int ptp_filter_init(struct sock_filter *f, int len)
137 {OP_RETA, 0, 0, 0 }, /* */ \ 125 {OP_RETA, 0, 0, 0 }, /* */ \
138/*L6x*/ {OP_RETK, 0, 0, PTP_CLASS_NONE }, 126/*L6x*/ {OP_RETK, 0, 0, PTP_CLASS_NONE },
139 127
128unsigned int ptp_classify_raw(const struct sk_buff *skb);
129
140#endif 130#endif
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 6f19cfd1840e..4054b0994071 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -76,7 +76,6 @@ static inline int seccomp_mode(struct seccomp *s)
76#ifdef CONFIG_SECCOMP_FILTER 76#ifdef CONFIG_SECCOMP_FILTER
77extern void put_seccomp_filter(struct task_struct *tsk); 77extern void put_seccomp_filter(struct task_struct *tsk);
78extern void get_seccomp_filter(struct task_struct *tsk); 78extern void get_seccomp_filter(struct task_struct *tsk);
79extern u32 seccomp_bpf_load(int off);
80#else /* CONFIG_SECCOMP_FILTER */ 79#else /* CONFIG_SECCOMP_FILTER */
81static inline void put_seccomp_filter(struct task_struct *tsk) 80static inline void put_seccomp_filter(struct task_struct *tsk)
82{ 81{
diff --git a/include/net/sock.h b/include/net/sock.h
index 8d7c431a0660..06a5668f05c9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1621,33 +1621,6 @@ void sk_common_release(struct sock *sk);
1621/* Initialise core socket variables */ 1621/* Initialise core socket variables */
1622void sock_init_data(struct socket *sock, struct sock *sk); 1622void sock_init_data(struct socket *sock, struct sock *sk);
1623 1623
1624void sk_filter_release_rcu(struct rcu_head *rcu);
1625
1626/**
1627 * sk_filter_release - release a socket filter
1628 * @fp: filter to remove
1629 *
1630 * Remove a filter from a socket and release its resources.
1631 */
1632
1633static inline void sk_filter_release(struct sk_filter *fp)
1634{
1635 if (atomic_dec_and_test(&fp->refcnt))
1636 call_rcu(&fp->rcu, sk_filter_release_rcu);
1637}
1638
1639static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
1640{
1641 atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc);
1642 sk_filter_release(fp);
1643}
1644
1645static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1646{
1647 atomic_inc(&fp->refcnt);
1648 atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc);
1649}
1650
1651/* 1624/*
1652 * Socket reference counting postulates. 1625 * Socket reference counting postulates.
1653 * 1626 *
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index b7a10048a32c..4f18e754c23e 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -55,60 +55,33 @@ struct seccomp_filter {
55 atomic_t usage; 55 atomic_t usage;
56 struct seccomp_filter *prev; 56 struct seccomp_filter *prev;
57 unsigned short len; /* Instruction count */ 57 unsigned short len; /* Instruction count */
58 struct sock_filter insns[]; 58 struct sock_filter_int insnsi[];
59}; 59};
60 60
61/* Limit any path through the tree to 256KB worth of instructions. */ 61/* Limit any path through the tree to 256KB worth of instructions. */
62#define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter)) 62#define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter))
63 63
64/** 64/*
65 * get_u32 - returns a u32 offset into data
66 * @data: a unsigned 64 bit value
67 * @index: 0 or 1 to return the first or second 32-bits
68 *
69 * This inline exists to hide the length of unsigned long. If a 32-bit
70 * unsigned long is passed in, it will be extended and the top 32-bits will be
71 * 0. If it is a 64-bit unsigned long, then whatever data is resident will be
72 * properly returned.
73 *
74 * Endianness is explicitly ignored and left for BPF program authors to manage 65 * Endianness is explicitly ignored and left for BPF program authors to manage
75 * as per the specific architecture. 66 * as per the specific architecture.
76 */ 67 */
77static inline u32 get_u32(u64 data, int index) 68static void populate_seccomp_data(struct seccomp_data *sd)
78{ 69{
79 return ((u32 *)&data)[index]; 70 struct task_struct *task = current;
80} 71 struct pt_regs *regs = task_pt_regs(task);
81 72
82/* Helper for bpf_load below. */ 73 sd->nr = syscall_get_nr(task, regs);
83#define BPF_DATA(_name) offsetof(struct seccomp_data, _name) 74 sd->arch = syscall_get_arch(task, regs);
84/** 75
85 * bpf_load: checks and returns a pointer to the requested offset 76 /* Unroll syscall_get_args to help gcc on arm. */
86 * @off: offset into struct seccomp_data to load from 77 syscall_get_arguments(task, regs, 0, 1, (unsigned long *) &sd->args[0]);
87 * 78 syscall_get_arguments(task, regs, 1, 1, (unsigned long *) &sd->args[1]);
88 * Returns the requested 32-bits of data. 79 syscall_get_arguments(task, regs, 2, 1, (unsigned long *) &sd->args[2]);
89 * seccomp_check_filter() should assure that @off is 32-bit aligned 80 syscall_get_arguments(task, regs, 3, 1, (unsigned long *) &sd->args[3]);
90 * and not out of bounds. Failure to do so is a BUG. 81 syscall_get_arguments(task, regs, 4, 1, (unsigned long *) &sd->args[4]);
91 */ 82 syscall_get_arguments(task, regs, 5, 1, (unsigned long *) &sd->args[5]);
92u32 seccomp_bpf_load(int off) 83
93{ 84 sd->instruction_pointer = KSTK_EIP(task);
94 struct pt_regs *regs = task_pt_regs(current);
95 if (off == BPF_DATA(nr))
96 return syscall_get_nr(current, regs);
97 if (off == BPF_DATA(arch))
98 return syscall_get_arch(current, regs);
99 if (off >= BPF_DATA(args[0]) && off < BPF_DATA(args[6])) {
100 unsigned long value;
101 int arg = (off - BPF_DATA(args[0])) / sizeof(u64);
102 int index = !!(off % sizeof(u64));
103 syscall_get_arguments(current, regs, arg, 1, &value);
104 return get_u32(value, index);
105 }
106 if (off == BPF_DATA(instruction_pointer))
107 return get_u32(KSTK_EIP(current), 0);
108 if (off == BPF_DATA(instruction_pointer) + sizeof(u32))
109 return get_u32(KSTK_EIP(current), 1);
110 /* seccomp_check_filter should make this impossible. */
111 BUG();
112} 85}
113 86
114/** 87/**
@@ -133,17 +106,17 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
133 106
134 switch (code) { 107 switch (code) {
135 case BPF_S_LD_W_ABS: 108 case BPF_S_LD_W_ABS:
136 ftest->code = BPF_S_ANC_SECCOMP_LD_W; 109 ftest->code = BPF_LDX | BPF_W | BPF_ABS;
137 /* 32-bit aligned and not out of bounds. */ 110 /* 32-bit aligned and not out of bounds. */
138 if (k >= sizeof(struct seccomp_data) || k & 3) 111 if (k >= sizeof(struct seccomp_data) || k & 3)
139 return -EINVAL; 112 return -EINVAL;
140 continue; 113 continue;
141 case BPF_S_LD_W_LEN: 114 case BPF_S_LD_W_LEN:
142 ftest->code = BPF_S_LD_IMM; 115 ftest->code = BPF_LD | BPF_IMM;
143 ftest->k = sizeof(struct seccomp_data); 116 ftest->k = sizeof(struct seccomp_data);
144 continue; 117 continue;
145 case BPF_S_LDX_W_LEN: 118 case BPF_S_LDX_W_LEN:
146 ftest->code = BPF_S_LDX_IMM; 119 ftest->code = BPF_LDX | BPF_IMM;
147 ftest->k = sizeof(struct seccomp_data); 120 ftest->k = sizeof(struct seccomp_data);
148 continue; 121 continue;
149 /* Explicitly include allowed calls. */ 122 /* Explicitly include allowed calls. */
@@ -185,6 +158,7 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
185 case BPF_S_JMP_JGT_X: 158 case BPF_S_JMP_JGT_X:
186 case BPF_S_JMP_JSET_K: 159 case BPF_S_JMP_JSET_K:
187 case BPF_S_JMP_JSET_X: 160 case BPF_S_JMP_JSET_X:
161 sk_decode_filter(ftest, ftest);
188 continue; 162 continue;
189 default: 163 default:
190 return -EINVAL; 164 return -EINVAL;
@@ -202,18 +176,21 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
202static u32 seccomp_run_filters(int syscall) 176static u32 seccomp_run_filters(int syscall)
203{ 177{
204 struct seccomp_filter *f; 178 struct seccomp_filter *f;
179 struct seccomp_data sd;
205 u32 ret = SECCOMP_RET_ALLOW; 180 u32 ret = SECCOMP_RET_ALLOW;
206 181
207 /* Ensure unexpected behavior doesn't result in failing open. */ 182 /* Ensure unexpected behavior doesn't result in failing open. */
208 if (WARN_ON(current->seccomp.filter == NULL)) 183 if (WARN_ON(current->seccomp.filter == NULL))
209 return SECCOMP_RET_KILL; 184 return SECCOMP_RET_KILL;
210 185
186 populate_seccomp_data(&sd);
187
211 /* 188 /*
212 * All filters in the list are evaluated and the lowest BPF return 189 * All filters in the list are evaluated and the lowest BPF return
213 * value always takes priority (ignoring the DATA). 190 * value always takes priority (ignoring the DATA).
214 */ 191 */
215 for (f = current->seccomp.filter; f; f = f->prev) { 192 for (f = current->seccomp.filter; f; f = f->prev) {
216 u32 cur_ret = sk_run_filter(NULL, f->insns); 193 u32 cur_ret = sk_run_filter_int_seccomp(&sd, f->insnsi);
217 if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) 194 if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
218 ret = cur_ret; 195 ret = cur_ret;
219 } 196 }
@@ -231,6 +208,8 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
231 struct seccomp_filter *filter; 208 struct seccomp_filter *filter;
232 unsigned long fp_size = fprog->len * sizeof(struct sock_filter); 209 unsigned long fp_size = fprog->len * sizeof(struct sock_filter);
233 unsigned long total_insns = fprog->len; 210 unsigned long total_insns = fprog->len;
211 struct sock_filter *fp;
212 int new_len;
234 long ret; 213 long ret;
235 214
236 if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) 215 if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
@@ -252,28 +231,43 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
252 CAP_SYS_ADMIN) != 0) 231 CAP_SYS_ADMIN) != 0)
253 return -EACCES; 232 return -EACCES;
254 233
255 /* Allocate a new seccomp_filter */ 234 fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN);
256 filter = kzalloc(sizeof(struct seccomp_filter) + fp_size, 235 if (!fp)
257 GFP_KERNEL|__GFP_NOWARN);
258 if (!filter)
259 return -ENOMEM; 236 return -ENOMEM;
260 atomic_set(&filter->usage, 1);
261 filter->len = fprog->len;
262 237
263 /* Copy the instructions from fprog. */ 238 /* Copy the instructions from fprog. */
264 ret = -EFAULT; 239 ret = -EFAULT;
265 if (copy_from_user(filter->insns, fprog->filter, fp_size)) 240 if (copy_from_user(fp, fprog->filter, fp_size))
266 goto fail; 241 goto free_prog;
267 242
268 /* Check and rewrite the fprog via the skb checker */ 243 /* Check and rewrite the fprog via the skb checker */
269 ret = sk_chk_filter(filter->insns, filter->len); 244 ret = sk_chk_filter(fp, fprog->len);
270 if (ret) 245 if (ret)
271 goto fail; 246 goto free_prog;
272 247
273 /* Check and rewrite the fprog for seccomp use */ 248 /* Check and rewrite the fprog for seccomp use */
274 ret = seccomp_check_filter(filter->insns, filter->len); 249 ret = seccomp_check_filter(fp, fprog->len);
250 if (ret)
251 goto free_prog;
252
253 /* Convert 'sock_filter' insns to 'sock_filter_int' insns */
254 ret = sk_convert_filter(fp, fprog->len, NULL, &new_len);
255 if (ret)
256 goto free_prog;
257
258 /* Allocate a new seccomp_filter */
259 filter = kzalloc(sizeof(struct seccomp_filter) +
260 sizeof(struct sock_filter_int) * new_len,
261 GFP_KERNEL|__GFP_NOWARN);
262 if (!filter)
263 goto free_prog;
264
265 ret = sk_convert_filter(fp, fprog->len, filter->insnsi, &new_len);
275 if (ret) 266 if (ret)
276 goto fail; 267 goto free_filter;
268
269 atomic_set(&filter->usage, 1);
270 filter->len = new_len;
277 271
278 /* 272 /*
279 * If there is an existing filter, make it the prev and don't drop its 273 * If there is an existing filter, make it the prev and don't drop its
@@ -282,8 +276,11 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
282 filter->prev = current->seccomp.filter; 276 filter->prev = current->seccomp.filter;
283 current->seccomp.filter = filter; 277 current->seccomp.filter = filter;
284 return 0; 278 return 0;
285fail: 279
280free_filter:
286 kfree(filter); 281 kfree(filter);
282free_prog:
283 kfree(fp);
287 return ret; 284 return ret;
288} 285}
289 286
diff --git a/net/core/filter.c b/net/core/filter.c
index 65b75966e206..3733381190ec 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1,11 +1,16 @@
1/* 1/*
2 * Linux Socket Filter - Kernel level socket filtering 2 * Linux Socket Filter - Kernel level socket filtering
3 * 3 *
4 * Author: 4 * Based on the design of the Berkeley Packet Filter. The new
5 * Jay Schulist <jschlst@samba.org> 5 * internal format has been designed by PLUMgrid:
6 * 6 *
7 * Based on the design of: 7 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
8 * - The Berkeley Packet Filter 8 *
9 * Authors:
10 *
11 * Jay Schulist <jschlst@samba.org>
12 * Alexei Starovoitov <ast@plumgrid.com>
13 * Daniel Borkmann <dborkman@redhat.com>
9 * 14 *
10 * This program is free software; you can redistribute it and/or 15 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License 16 * modify it under the terms of the GNU General Public License
@@ -108,304 +113,1045 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
108} 113}
109EXPORT_SYMBOL(sk_filter); 114EXPORT_SYMBOL(sk_filter);
110 115
116/* Base function for offset calculation. Needs to go into .text section,
117 * therefore keeping it non-static as well; will also be used by JITs
118 * anyway later on, so do not let the compiler omit it.
119 */
120noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
121{
122 return 0;
123}
124
111/** 125/**
112 * sk_run_filter - run a filter on a socket 126 * __sk_run_filter - run a filter on a given context
113 * @skb: buffer to run the filter on 127 * @ctx: buffer to run the filter on
114 * @fentry: filter to apply 128 * @fentry: filter to apply
115 * 129 *
116 * Decode and apply filter instructions to the skb->data. 130 * Decode and apply filter instructions to the skb->data. Return length to
117 * Return length to keep, 0 for none. @skb is the data we are 131 * keep, 0 for none. @ctx is the data we are operating on, @filter is the
118 * filtering, @filter is the array of filter instructions. 132 * array of filter instructions.
119 * Because all jumps are guaranteed to be before last instruction,
120 * and last instruction guaranteed to be a RET, we dont need to check
121 * flen. (We used to pass to this function the length of filter)
122 */ 133 */
123unsigned int sk_run_filter(const struct sk_buff *skb, 134unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
124 const struct sock_filter *fentry)
125{ 135{
136 u64 stack[MAX_BPF_STACK / sizeof(u64)];
137 u64 regs[MAX_BPF_REG], tmp;
126 void *ptr; 138 void *ptr;
127 u32 A = 0; /* Accumulator */ 139 int off;
128 u32 X = 0; /* Index Register */
129 u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
130 u32 tmp;
131 int k;
132 140
133 /* 141#define K insn->imm
134 * Process array of filter instructions. 142#define A regs[insn->a_reg]
135 */ 143#define X regs[insn->x_reg]
136 for (;; fentry++) { 144#define R0 regs[0]
137#if defined(CONFIG_X86_32) 145
138#define K (fentry->k) 146#define CONT ({insn++; goto select_insn; })
139#else 147#define CONT_JMP ({insn++; goto select_insn; })
140 const u32 K = fentry->k; 148
141#endif 149 static const void *jumptable[256] = {
142 150 [0 ... 255] = &&default_label,
143 switch (fentry->code) { 151 /* Now overwrite non-defaults ... */
144 case BPF_S_ALU_ADD_X: 152#define DL(A, B, C) [A|B|C] = &&A##_##B##_##C
145 A += X; 153 DL(BPF_ALU, BPF_ADD, BPF_X),
146 continue; 154 DL(BPF_ALU, BPF_ADD, BPF_K),
147 case BPF_S_ALU_ADD_K: 155 DL(BPF_ALU, BPF_SUB, BPF_X),
148 A += K; 156 DL(BPF_ALU, BPF_SUB, BPF_K),
149 continue; 157 DL(BPF_ALU, BPF_AND, BPF_X),
150 case BPF_S_ALU_SUB_X: 158 DL(BPF_ALU, BPF_AND, BPF_K),
151 A -= X; 159 DL(BPF_ALU, BPF_OR, BPF_X),
152 continue; 160 DL(BPF_ALU, BPF_OR, BPF_K),
153 case BPF_S_ALU_SUB_K: 161 DL(BPF_ALU, BPF_LSH, BPF_X),
154 A -= K; 162 DL(BPF_ALU, BPF_LSH, BPF_K),
155 continue; 163 DL(BPF_ALU, BPF_RSH, BPF_X),
156 case BPF_S_ALU_MUL_X: 164 DL(BPF_ALU, BPF_RSH, BPF_K),
157 A *= X; 165 DL(BPF_ALU, BPF_XOR, BPF_X),
158 continue; 166 DL(BPF_ALU, BPF_XOR, BPF_K),
159 case BPF_S_ALU_MUL_K: 167 DL(BPF_ALU, BPF_MUL, BPF_X),
160 A *= K; 168 DL(BPF_ALU, BPF_MUL, BPF_K),
161 continue; 169 DL(BPF_ALU, BPF_MOV, BPF_X),
162 case BPF_S_ALU_DIV_X: 170 DL(BPF_ALU, BPF_MOV, BPF_K),
163 if (X == 0) 171 DL(BPF_ALU, BPF_DIV, BPF_X),
164 return 0; 172 DL(BPF_ALU, BPF_DIV, BPF_K),
165 A /= X; 173 DL(BPF_ALU, BPF_MOD, BPF_X),
166 continue; 174 DL(BPF_ALU, BPF_MOD, BPF_K),
167 case BPF_S_ALU_DIV_K: 175 DL(BPF_ALU, BPF_NEG, 0),
168 A /= K; 176 DL(BPF_ALU, BPF_END, BPF_TO_BE),
169 continue; 177 DL(BPF_ALU, BPF_END, BPF_TO_LE),
170 case BPF_S_ALU_MOD_X: 178 DL(BPF_ALU64, BPF_ADD, BPF_X),
171 if (X == 0) 179 DL(BPF_ALU64, BPF_ADD, BPF_K),
172 return 0; 180 DL(BPF_ALU64, BPF_SUB, BPF_X),
173 A %= X; 181 DL(BPF_ALU64, BPF_SUB, BPF_K),
174 continue; 182 DL(BPF_ALU64, BPF_AND, BPF_X),
175 case BPF_S_ALU_MOD_K: 183 DL(BPF_ALU64, BPF_AND, BPF_K),
176 A %= K; 184 DL(BPF_ALU64, BPF_OR, BPF_X),
177 continue; 185 DL(BPF_ALU64, BPF_OR, BPF_K),
178 case BPF_S_ALU_AND_X: 186 DL(BPF_ALU64, BPF_LSH, BPF_X),
179 A &= X; 187 DL(BPF_ALU64, BPF_LSH, BPF_K),
180 continue; 188 DL(BPF_ALU64, BPF_RSH, BPF_X),
181 case BPF_S_ALU_AND_K: 189 DL(BPF_ALU64, BPF_RSH, BPF_K),
182 A &= K; 190 DL(BPF_ALU64, BPF_XOR, BPF_X),
183 continue; 191 DL(BPF_ALU64, BPF_XOR, BPF_K),
184 case BPF_S_ALU_OR_X: 192 DL(BPF_ALU64, BPF_MUL, BPF_X),
185 A |= X; 193 DL(BPF_ALU64, BPF_MUL, BPF_K),
186 continue; 194 DL(BPF_ALU64, BPF_MOV, BPF_X),
187 case BPF_S_ALU_OR_K: 195 DL(BPF_ALU64, BPF_MOV, BPF_K),
188 A |= K; 196 DL(BPF_ALU64, BPF_ARSH, BPF_X),
189 continue; 197 DL(BPF_ALU64, BPF_ARSH, BPF_K),
190 case BPF_S_ANC_ALU_XOR_X: 198 DL(BPF_ALU64, BPF_DIV, BPF_X),
191 case BPF_S_ALU_XOR_X: 199 DL(BPF_ALU64, BPF_DIV, BPF_K),
192 A ^= X; 200 DL(BPF_ALU64, BPF_MOD, BPF_X),
193 continue; 201 DL(BPF_ALU64, BPF_MOD, BPF_K),
194 case BPF_S_ALU_XOR_K: 202 DL(BPF_ALU64, BPF_NEG, 0),
195 A ^= K; 203 DL(BPF_JMP, BPF_CALL, 0),
196 continue; 204 DL(BPF_JMP, BPF_JA, 0),
197 case BPF_S_ALU_LSH_X: 205 DL(BPF_JMP, BPF_JEQ, BPF_X),
198 A <<= X; 206 DL(BPF_JMP, BPF_JEQ, BPF_K),
199 continue; 207 DL(BPF_JMP, BPF_JNE, BPF_X),
200 case BPF_S_ALU_LSH_K: 208 DL(BPF_JMP, BPF_JNE, BPF_K),
201 A <<= K; 209 DL(BPF_JMP, BPF_JGT, BPF_X),
202 continue; 210 DL(BPF_JMP, BPF_JGT, BPF_K),
203 case BPF_S_ALU_RSH_X: 211 DL(BPF_JMP, BPF_JGE, BPF_X),
204 A >>= X; 212 DL(BPF_JMP, BPF_JGE, BPF_K),
205 continue; 213 DL(BPF_JMP, BPF_JSGT, BPF_X),
206 case BPF_S_ALU_RSH_K: 214 DL(BPF_JMP, BPF_JSGT, BPF_K),
207 A >>= K; 215 DL(BPF_JMP, BPF_JSGE, BPF_X),
208 continue; 216 DL(BPF_JMP, BPF_JSGE, BPF_K),
209 case BPF_S_ALU_NEG: 217 DL(BPF_JMP, BPF_JSET, BPF_X),
210 A = -A; 218 DL(BPF_JMP, BPF_JSET, BPF_K),
211 continue; 219 DL(BPF_JMP, BPF_EXIT, 0),
212 case BPF_S_JMP_JA: 220 DL(BPF_STX, BPF_MEM, BPF_B),
213 fentry += K; 221 DL(BPF_STX, BPF_MEM, BPF_H),
214 continue; 222 DL(BPF_STX, BPF_MEM, BPF_W),
215 case BPF_S_JMP_JGT_K: 223 DL(BPF_STX, BPF_MEM, BPF_DW),
216 fentry += (A > K) ? fentry->jt : fentry->jf; 224 DL(BPF_STX, BPF_XADD, BPF_W),
217 continue; 225 DL(BPF_STX, BPF_XADD, BPF_DW),
218 case BPF_S_JMP_JGE_K: 226 DL(BPF_ST, BPF_MEM, BPF_B),
219 fentry += (A >= K) ? fentry->jt : fentry->jf; 227 DL(BPF_ST, BPF_MEM, BPF_H),
220 continue; 228 DL(BPF_ST, BPF_MEM, BPF_W),
221 case BPF_S_JMP_JEQ_K: 229 DL(BPF_ST, BPF_MEM, BPF_DW),
222 fentry += (A == K) ? fentry->jt : fentry->jf; 230 DL(BPF_LDX, BPF_MEM, BPF_B),
223 continue; 231 DL(BPF_LDX, BPF_MEM, BPF_H),
224 case BPF_S_JMP_JSET_K: 232 DL(BPF_LDX, BPF_MEM, BPF_W),
225 fentry += (A & K) ? fentry->jt : fentry->jf; 233 DL(BPF_LDX, BPF_MEM, BPF_DW),
226 continue; 234 DL(BPF_LD, BPF_ABS, BPF_W),
227 case BPF_S_JMP_JGT_X: 235 DL(BPF_LD, BPF_ABS, BPF_H),
228 fentry += (A > X) ? fentry->jt : fentry->jf; 236 DL(BPF_LD, BPF_ABS, BPF_B),
229 continue; 237 DL(BPF_LD, BPF_IND, BPF_W),
230 case BPF_S_JMP_JGE_X: 238 DL(BPF_LD, BPF_IND, BPF_H),
231 fentry += (A >= X) ? fentry->jt : fentry->jf; 239 DL(BPF_LD, BPF_IND, BPF_B),
232 continue; 240#undef DL
233 case BPF_S_JMP_JEQ_X: 241 };
234 fentry += (A == X) ? fentry->jt : fentry->jf; 242
235 continue; 243 regs[FP_REG] = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
236 case BPF_S_JMP_JSET_X: 244 regs[ARG1_REG] = (u64) (unsigned long) ctx;
237 fentry += (A & X) ? fentry->jt : fentry->jf; 245
238 continue; 246select_insn:
239 case BPF_S_LD_W_ABS: 247 goto *jumptable[insn->code];
240 k = K; 248
241load_w: 249 /* ALU */
242 ptr = load_pointer(skb, k, 4, &tmp); 250#define ALU(OPCODE, OP) \
243 if (ptr != NULL) { 251 BPF_ALU64_##OPCODE##_BPF_X: \
244 A = get_unaligned_be32(ptr); 252 A = A OP X; \
245 continue; 253 CONT; \
246 } 254 BPF_ALU_##OPCODE##_BPF_X: \
247 return 0; 255 A = (u32) A OP (u32) X; \
248 case BPF_S_LD_H_ABS: 256 CONT; \
249 k = K; 257 BPF_ALU64_##OPCODE##_BPF_K: \
250load_h: 258 A = A OP K; \
251 ptr = load_pointer(skb, k, 2, &tmp); 259 CONT; \
252 if (ptr != NULL) { 260 BPF_ALU_##OPCODE##_BPF_K: \
253 A = get_unaligned_be16(ptr); 261 A = (u32) A OP (u32) K; \
254 continue; 262 CONT;
263
264 ALU(BPF_ADD, +)
265 ALU(BPF_SUB, -)
266 ALU(BPF_AND, &)
267 ALU(BPF_OR, |)
268 ALU(BPF_LSH, <<)
269 ALU(BPF_RSH, >>)
270 ALU(BPF_XOR, ^)
271 ALU(BPF_MUL, *)
272#undef ALU
273 BPF_ALU_BPF_NEG_0:
274 A = (u32) -A;
275 CONT;
276 BPF_ALU64_BPF_NEG_0:
277 A = -A;
278 CONT;
279 BPF_ALU_BPF_MOV_BPF_X:
280 A = (u32) X;
281 CONT;
282 BPF_ALU_BPF_MOV_BPF_K:
283 A = (u32) K;
284 CONT;
285 BPF_ALU64_BPF_MOV_BPF_X:
286 A = X;
287 CONT;
288 BPF_ALU64_BPF_MOV_BPF_K:
289 A = K;
290 CONT;
291 BPF_ALU64_BPF_ARSH_BPF_X:
292 (*(s64 *) &A) >>= X;
293 CONT;
294 BPF_ALU64_BPF_ARSH_BPF_K:
295 (*(s64 *) &A) >>= K;
296 CONT;
297 BPF_ALU64_BPF_MOD_BPF_X:
298 tmp = A;
299 if (X)
300 A = do_div(tmp, X);
301 CONT;
302 BPF_ALU_BPF_MOD_BPF_X:
303 tmp = (u32) A;
304 if (X)
305 A = do_div(tmp, (u32) X);
306 CONT;
307 BPF_ALU64_BPF_MOD_BPF_K:
308 tmp = A;
309 if (K)
310 A = do_div(tmp, K);
311 CONT;
312 BPF_ALU_BPF_MOD_BPF_K:
313 tmp = (u32) A;
314 if (K)
315 A = do_div(tmp, (u32) K);
316 CONT;
317 BPF_ALU64_BPF_DIV_BPF_X:
318 if (X)
319 do_div(A, X);
320 CONT;
321 BPF_ALU_BPF_DIV_BPF_X:
322 tmp = (u32) A;
323 if (X)
324 do_div(tmp, (u32) X);
325 A = (u32) tmp;
326 CONT;
327 BPF_ALU64_BPF_DIV_BPF_K:
328 if (K)
329 do_div(A, K);
330 CONT;
331 BPF_ALU_BPF_DIV_BPF_K:
332 tmp = (u32) A;
333 if (K)
334 do_div(tmp, (u32) K);
335 A = (u32) tmp;
336 CONT;
337 BPF_ALU_BPF_END_BPF_TO_BE:
338 switch (K) {
339 case 16:
340 A = (__force u16) cpu_to_be16(A);
341 break;
342 case 32:
343 A = (__force u32) cpu_to_be32(A);
344 break;
345 case 64:
346 A = (__force u64) cpu_to_be64(A);
347 break;
348 }
349 CONT;
350 BPF_ALU_BPF_END_BPF_TO_LE:
351 switch (K) {
352 case 16:
353 A = (__force u16) cpu_to_le16(A);
354 break;
355 case 32:
356 A = (__force u32) cpu_to_le32(A);
357 break;
358 case 64:
359 A = (__force u64) cpu_to_le64(A);
360 break;
361 }
362 CONT;
363
364 /* CALL */
365 BPF_JMP_BPF_CALL_0:
366 /* Function call scratches R1-R5 registers, preserves R6-R9,
367 * and stores return value into R0.
368 */
369 R0 = (__bpf_call_base + insn->imm)(regs[1], regs[2], regs[3],
370 regs[4], regs[5]);
371 CONT;
372
373 /* JMP */
374 BPF_JMP_BPF_JA_0:
375 insn += insn->off;
376 CONT;
377 BPF_JMP_BPF_JEQ_BPF_X:
378 if (A == X) {
379 insn += insn->off;
380 CONT_JMP;
381 }
382 CONT;
383 BPF_JMP_BPF_JEQ_BPF_K:
384 if (A == K) {
385 insn += insn->off;
386 CONT_JMP;
387 }
388 CONT;
389 BPF_JMP_BPF_JNE_BPF_X:
390 if (A != X) {
391 insn += insn->off;
392 CONT_JMP;
393 }
394 CONT;
395 BPF_JMP_BPF_JNE_BPF_K:
396 if (A != K) {
397 insn += insn->off;
398 CONT_JMP;
399 }
400 CONT;
401 BPF_JMP_BPF_JGT_BPF_X:
402 if (A > X) {
403 insn += insn->off;
404 CONT_JMP;
405 }
406 CONT;
407 BPF_JMP_BPF_JGT_BPF_K:
408 if (A > K) {
409 insn += insn->off;
410 CONT_JMP;
411 }
412 CONT;
413 BPF_JMP_BPF_JGE_BPF_X:
414 if (A >= X) {
415 insn += insn->off;
416 CONT_JMP;
417 }
418 CONT;
419 BPF_JMP_BPF_JGE_BPF_K:
420 if (A >= K) {
421 insn += insn->off;
422 CONT_JMP;
423 }
424 CONT;
425 BPF_JMP_BPF_JSGT_BPF_X:
426 if (((s64)A) > ((s64)X)) {
427 insn += insn->off;
428 CONT_JMP;
429 }
430 CONT;
431 BPF_JMP_BPF_JSGT_BPF_K:
432 if (((s64)A) > ((s64)K)) {
433 insn += insn->off;
434 CONT_JMP;
435 }
436 CONT;
437 BPF_JMP_BPF_JSGE_BPF_X:
438 if (((s64)A) >= ((s64)X)) {
439 insn += insn->off;
440 CONT_JMP;
441 }
442 CONT;
443 BPF_JMP_BPF_JSGE_BPF_K:
444 if (((s64)A) >= ((s64)K)) {
445 insn += insn->off;
446 CONT_JMP;
447 }
448 CONT;
449 BPF_JMP_BPF_JSET_BPF_X:
450 if (A & X) {
451 insn += insn->off;
452 CONT_JMP;
453 }
454 CONT;
455 BPF_JMP_BPF_JSET_BPF_K:
456 if (A & K) {
457 insn += insn->off;
458 CONT_JMP;
459 }
460 CONT;
461 BPF_JMP_BPF_EXIT_0:
462 return R0;
463
464 /* STX and ST and LDX*/
465#define LDST(SIZEOP, SIZE) \
466 BPF_STX_BPF_MEM_##SIZEOP: \
467 *(SIZE *)(unsigned long) (A + insn->off) = X; \
468 CONT; \
469 BPF_ST_BPF_MEM_##SIZEOP: \
470 *(SIZE *)(unsigned long) (A + insn->off) = K; \
471 CONT; \
472 BPF_LDX_BPF_MEM_##SIZEOP: \
473 A = *(SIZE *)(unsigned long) (X + insn->off); \
474 CONT;
475
476 LDST(BPF_B, u8)
477 LDST(BPF_H, u16)
478 LDST(BPF_W, u32)
479 LDST(BPF_DW, u64)
480#undef LDST
481 BPF_STX_BPF_XADD_BPF_W: /* lock xadd *(u32 *)(A + insn->off) += X */
482 atomic_add((u32) X, (atomic_t *)(unsigned long)
483 (A + insn->off));
484 CONT;
485 BPF_STX_BPF_XADD_BPF_DW: /* lock xadd *(u64 *)(A + insn->off) += X */
486 atomic64_add((u64) X, (atomic64_t *)(unsigned long)
487 (A + insn->off));
488 CONT;
489 BPF_LD_BPF_ABS_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + K)) */
490 off = K;
491load_word:
492 /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only
493 * appearing in the programs where ctx == skb. All programs
494 * keep 'ctx' in regs[CTX_REG] == R6, sk_convert_filter()
495 * saves it in R6, internal BPF verifier will check that
496 * R6 == ctx.
497 *
498 * BPF_ABS and BPF_IND are wrappers of function calls, so
499 * they scratch R1-R5 registers, preserve R6-R9, and store
500 * return value into R0.
501 *
502 * Implicit input:
503 * ctx
504 *
505 * Explicit input:
506 * X == any register
507 * K == 32-bit immediate
508 *
509 * Output:
510 * R0 - 8/16/32-bit skb data converted to cpu endianness
511 */
512 ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp);
513 if (likely(ptr != NULL)) {
514 R0 = get_unaligned_be32(ptr);
515 CONT;
516 }
517 return 0;
518 BPF_LD_BPF_ABS_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + K)) */
519 off = K;
520load_half:
521 ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp);
522 if (likely(ptr != NULL)) {
523 R0 = get_unaligned_be16(ptr);
524 CONT;
525 }
526 return 0;
527 BPF_LD_BPF_ABS_BPF_B: /* R0 = *(u8 *) (ctx + K) */
528 off = K;
529load_byte:
530 ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp);
531 if (likely(ptr != NULL)) {
532 R0 = *(u8 *)ptr;
533 CONT;
534 }
535 return 0;
536 BPF_LD_BPF_IND_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + X + K)) */
537 off = K + X;
538 goto load_word;
539 BPF_LD_BPF_IND_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + X + K)) */
540 off = K + X;
541 goto load_half;
542 BPF_LD_BPF_IND_BPF_B: /* R0 = *(u8 *) (skb->data + X + K) */
543 off = K + X;
544 goto load_byte;
545
546 default_label:
547 /* If we ever reach this, we have a bug somewhere. */
548 WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
549 return 0;
550#undef CONT_JMP
551#undef CONT
552
553#undef R0
554#undef X
555#undef A
556#undef K
557}
558
559u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
560 const struct sock_filter_int *insni)
561 __attribute__ ((alias ("__sk_run_filter")));
562
563u32 sk_run_filter_int_skb(const struct sk_buff *ctx,
564 const struct sock_filter_int *insni)
565 __attribute__ ((alias ("__sk_run_filter")));
566EXPORT_SYMBOL_GPL(sk_run_filter_int_skb);
567
568/* Helper to find the offset of pkt_type in sk_buff structure. We want
569 * to make sure its still a 3bit field starting at a byte boundary;
570 * taken from arch/x86/net/bpf_jit_comp.c.
571 */
572#define PKT_TYPE_MAX 7
573static unsigned int pkt_type_offset(void)
574{
575 struct sk_buff skb_probe = { .pkt_type = ~0, };
576 u8 *ct = (u8 *) &skb_probe;
577 unsigned int off;
578
579 for (off = 0; off < sizeof(struct sk_buff); off++) {
580 if (ct[off] == PKT_TYPE_MAX)
581 return off;
582 }
583
584 pr_err_once("Please fix %s, as pkt_type couldn't be found!\n", __func__);
585 return -1;
586}
587
588static u64 __skb_get_pay_offset(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
589{
590 struct sk_buff *skb = (struct sk_buff *)(long) ctx;
591
592 return __skb_get_poff(skb);
593}
594
595static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
596{
597 struct sk_buff *skb = (struct sk_buff *)(long) ctx;
598 struct nlattr *nla;
599
600 if (skb_is_nonlinear(skb))
601 return 0;
602
603 if (A > skb->len - sizeof(struct nlattr))
604 return 0;
605
606 nla = nla_find((struct nlattr *) &skb->data[A], skb->len - A, X);
607 if (nla)
608 return (void *) nla - (void *) skb->data;
609
610 return 0;
611}
612
613static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
614{
615 struct sk_buff *skb = (struct sk_buff *)(long) ctx;
616 struct nlattr *nla;
617
618 if (skb_is_nonlinear(skb))
619 return 0;
620
621 if (A > skb->len - sizeof(struct nlattr))
622 return 0;
623
624 nla = (struct nlattr *) &skb->data[A];
625 if (nla->nla_len > A - skb->len)
626 return 0;
627
628 nla = nla_find_nested(nla, X);
629 if (nla)
630 return (void *) nla - (void *) skb->data;
631
632 return 0;
633}
634
635static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
636{
637 return raw_smp_processor_id();
638}
639
640/* Register mappings for user programs. */
641#define A_REG 0
642#define X_REG 7
643#define TMP_REG 8
644#define ARG2_REG 2
645#define ARG3_REG 3
646
647static bool convert_bpf_extensions(struct sock_filter *fp,
648 struct sock_filter_int **insnp)
649{
650 struct sock_filter_int *insn = *insnp;
651
652 switch (fp->k) {
653 case SKF_AD_OFF + SKF_AD_PROTOCOL:
654 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
655
656 insn->code = BPF_LDX | BPF_MEM | BPF_H;
657 insn->a_reg = A_REG;
658 insn->x_reg = CTX_REG;
659 insn->off = offsetof(struct sk_buff, protocol);
660 insn++;
661
662 /* A = ntohs(A) [emitting a nop or swap16] */
663 insn->code = BPF_ALU | BPF_END | BPF_FROM_BE;
664 insn->a_reg = A_REG;
665 insn->imm = 16;
666 break;
667
668 case SKF_AD_OFF + SKF_AD_PKTTYPE:
669 insn->code = BPF_LDX | BPF_MEM | BPF_B;
670 insn->a_reg = A_REG;
671 insn->x_reg = CTX_REG;
672 insn->off = pkt_type_offset();
673 if (insn->off < 0)
674 return false;
675 insn++;
676
677 insn->code = BPF_ALU | BPF_AND | BPF_K;
678 insn->a_reg = A_REG;
679 insn->imm = PKT_TYPE_MAX;
680 break;
681
682 case SKF_AD_OFF + SKF_AD_IFINDEX:
683 case SKF_AD_OFF + SKF_AD_HATYPE:
684 if (FIELD_SIZEOF(struct sk_buff, dev) == 8)
685 insn->code = BPF_LDX | BPF_MEM | BPF_DW;
686 else
687 insn->code = BPF_LDX | BPF_MEM | BPF_W;
688 insn->a_reg = TMP_REG;
689 insn->x_reg = CTX_REG;
690 insn->off = offsetof(struct sk_buff, dev);
691 insn++;
692
693 insn->code = BPF_JMP | BPF_JNE | BPF_K;
694 insn->a_reg = TMP_REG;
695 insn->imm = 0;
696 insn->off = 1;
697 insn++;
698
699 insn->code = BPF_JMP | BPF_EXIT;
700 insn++;
701
702 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
703 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
704
705 insn->a_reg = A_REG;
706 insn->x_reg = TMP_REG;
707
708 if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) {
709 insn->code = BPF_LDX | BPF_MEM | BPF_W;
710 insn->off = offsetof(struct net_device, ifindex);
711 } else {
712 insn->code = BPF_LDX | BPF_MEM | BPF_H;
713 insn->off = offsetof(struct net_device, type);
714 }
715 break;
716
717 case SKF_AD_OFF + SKF_AD_MARK:
718 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
719
720 insn->code = BPF_LDX | BPF_MEM | BPF_W;
721 insn->a_reg = A_REG;
722 insn->x_reg = CTX_REG;
723 insn->off = offsetof(struct sk_buff, mark);
724 break;
725
726 case SKF_AD_OFF + SKF_AD_RXHASH:
727 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
728
729 insn->code = BPF_LDX | BPF_MEM | BPF_W;
730 insn->a_reg = A_REG;
731 insn->x_reg = CTX_REG;
732 insn->off = offsetof(struct sk_buff, hash);
733 break;
734
735 case SKF_AD_OFF + SKF_AD_QUEUE:
736 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
737
738 insn->code = BPF_LDX | BPF_MEM | BPF_H;
739 insn->a_reg = A_REG;
740 insn->x_reg = CTX_REG;
741 insn->off = offsetof(struct sk_buff, queue_mapping);
742 break;
743
744 case SKF_AD_OFF + SKF_AD_VLAN_TAG:
745 case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
746 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
747
748 insn->code = BPF_LDX | BPF_MEM | BPF_H;
749 insn->a_reg = A_REG;
750 insn->x_reg = CTX_REG;
751 insn->off = offsetof(struct sk_buff, vlan_tci);
752 insn++;
753
754 BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
755
756 if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
757 insn->code = BPF_ALU | BPF_AND | BPF_K;
758 insn->a_reg = A_REG;
759 insn->imm = ~VLAN_TAG_PRESENT;
760 } else {
761 insn->code = BPF_ALU | BPF_RSH | BPF_K;
762 insn->a_reg = A_REG;
763 insn->imm = 12;
764 insn++;
765
766 insn->code = BPF_ALU | BPF_AND | BPF_K;
767 insn->a_reg = A_REG;
768 insn->imm = 1;
769 }
770 break;
771
772 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
773 case SKF_AD_OFF + SKF_AD_NLATTR:
774 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
775 case SKF_AD_OFF + SKF_AD_CPU:
776 /* arg1 = ctx */
777 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
778 insn->a_reg = ARG1_REG;
779 insn->x_reg = CTX_REG;
780 insn++;
781
782 /* arg2 = A */
783 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
784 insn->a_reg = ARG2_REG;
785 insn->x_reg = A_REG;
786 insn++;
787
788 /* arg3 = X */
789 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
790 insn->a_reg = ARG3_REG;
791 insn->x_reg = X_REG;
792 insn++;
793
794 /* Emit call(ctx, arg2=A, arg3=X) */
795 insn->code = BPF_JMP | BPF_CALL;
796 switch (fp->k) {
797 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
798 insn->imm = __skb_get_pay_offset - __bpf_call_base;
799 break;
800 case SKF_AD_OFF + SKF_AD_NLATTR:
801 insn->imm = __skb_get_nlattr - __bpf_call_base;
802 break;
803 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
804 insn->imm = __skb_get_nlattr_nest - __bpf_call_base;
805 break;
806 case SKF_AD_OFF + SKF_AD_CPU:
807 insn->imm = __get_raw_cpu_id - __bpf_call_base;
808 break;
809 }
810 break;
811
812 case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
813 insn->code = BPF_ALU | BPF_XOR | BPF_X;
814 insn->a_reg = A_REG;
815 insn->x_reg = X_REG;
816 break;
817
818 default:
819 /* This is just a dummy call to avoid letting the compiler
820 * evict __bpf_call_base() as an optimization. Placed here
821 * where no-one bothers.
822 */
823 BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0);
824 return false;
825 }
826
827 *insnp = insn;
828 return true;
829}
830
831/**
832 * sk_convert_filter - convert filter program
833 * @prog: the user passed filter program
834 * @len: the length of the user passed filter program
835 * @new_prog: buffer where converted program will be stored
836 * @new_len: pointer to store length of converted program
837 *
838 * Remap 'sock_filter' style BPF instruction set to 'sock_filter_ext' style.
839 * Conversion workflow:
840 *
841 * 1) First pass for calculating the new program length:
842 * sk_convert_filter(old_prog, old_len, NULL, &new_len)
843 *
844 * 2) 2nd pass to remap in two passes: 1st pass finds new
845 * jump offsets, 2nd pass remapping:
846 * new_prog = kmalloc(sizeof(struct sock_filter_int) * new_len);
847 * sk_convert_filter(old_prog, old_len, new_prog, &new_len);
848 *
849 * User BPF's register A is mapped to our BPF register 6, user BPF
850 * register X is mapped to BPF register 7; frame pointer is always
851 * register 10; Context 'void *ctx' is stored in register 1, that is,
852 * for socket filters: ctx == 'struct sk_buff *', for seccomp:
853 * ctx == 'struct seccomp_data *'.
854 */
855int sk_convert_filter(struct sock_filter *prog, int len,
856 struct sock_filter_int *new_prog, int *new_len)
857{
858 int new_flen = 0, pass = 0, target, i;
859 struct sock_filter_int *new_insn;
860 struct sock_filter *fp;
861 int *addrs = NULL;
862 u8 bpf_src;
863
864 BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
865 BUILD_BUG_ON(FP_REG + 1 != MAX_BPF_REG);
866
867 if (len <= 0 || len >= BPF_MAXINSNS)
868 return -EINVAL;
869
870 if (new_prog) {
871 addrs = kzalloc(len * sizeof(*addrs), GFP_KERNEL);
872 if (!addrs)
873 return -ENOMEM;
874 }
875
876do_pass:
877 new_insn = new_prog;
878 fp = prog;
879
880 if (new_insn) {
881 new_insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
882 new_insn->a_reg = CTX_REG;
883 new_insn->x_reg = ARG1_REG;
884 }
885 new_insn++;
886
887 for (i = 0; i < len; fp++, i++) {
888 struct sock_filter_int tmp_insns[6] = { };
889 struct sock_filter_int *insn = tmp_insns;
890
891 if (addrs)
892 addrs[i] = new_insn - new_prog;
893
894 switch (fp->code) {
895 /* All arithmetic insns and skb loads map as-is. */
896 case BPF_ALU | BPF_ADD | BPF_X:
897 case BPF_ALU | BPF_ADD | BPF_K:
898 case BPF_ALU | BPF_SUB | BPF_X:
899 case BPF_ALU | BPF_SUB | BPF_K:
900 case BPF_ALU | BPF_AND | BPF_X:
901 case BPF_ALU | BPF_AND | BPF_K:
902 case BPF_ALU | BPF_OR | BPF_X:
903 case BPF_ALU | BPF_OR | BPF_K:
904 case BPF_ALU | BPF_LSH | BPF_X:
905 case BPF_ALU | BPF_LSH | BPF_K:
906 case BPF_ALU | BPF_RSH | BPF_X:
907 case BPF_ALU | BPF_RSH | BPF_K:
908 case BPF_ALU | BPF_XOR | BPF_X:
909 case BPF_ALU | BPF_XOR | BPF_K:
910 case BPF_ALU | BPF_MUL | BPF_X:
911 case BPF_ALU | BPF_MUL | BPF_K:
912 case BPF_ALU | BPF_DIV | BPF_X:
913 case BPF_ALU | BPF_DIV | BPF_K:
914 case BPF_ALU | BPF_MOD | BPF_X:
915 case BPF_ALU | BPF_MOD | BPF_K:
916 case BPF_ALU | BPF_NEG:
917 case BPF_LD | BPF_ABS | BPF_W:
918 case BPF_LD | BPF_ABS | BPF_H:
919 case BPF_LD | BPF_ABS | BPF_B:
920 case BPF_LD | BPF_IND | BPF_W:
921 case BPF_LD | BPF_IND | BPF_H:
922 case BPF_LD | BPF_IND | BPF_B:
923 /* Check for overloaded BPF extension and
924 * directly convert it if found, otherwise
925 * just move on with mapping.
926 */
927 if (BPF_CLASS(fp->code) == BPF_LD &&
928 BPF_MODE(fp->code) == BPF_ABS &&
929 convert_bpf_extensions(fp, &insn))
930 break;
931
932 insn->code = fp->code;
933 insn->a_reg = A_REG;
934 insn->x_reg = X_REG;
935 insn->imm = fp->k;
936 break;
937
938 /* Jump opcodes map as-is, but offsets need adjustment. */
939 case BPF_JMP | BPF_JA:
940 target = i + fp->k + 1;
941 insn->code = fp->code;
942#define EMIT_JMP \
943 do { \
944 if (target >= len || target < 0) \
945 goto err; \
946 insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \
947 /* Adjust pc relative offset for 2nd or 3rd insn. */ \
948 insn->off -= insn - tmp_insns; \
949 } while (0)
950
951 EMIT_JMP;
952 break;
953
954 case BPF_JMP | BPF_JEQ | BPF_K:
955 case BPF_JMP | BPF_JEQ | BPF_X:
956 case BPF_JMP | BPF_JSET | BPF_K:
957 case BPF_JMP | BPF_JSET | BPF_X:
958 case BPF_JMP | BPF_JGT | BPF_K:
959 case BPF_JMP | BPF_JGT | BPF_X:
960 case BPF_JMP | BPF_JGE | BPF_K:
961 case BPF_JMP | BPF_JGE | BPF_X:
962 if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) {
963 /* BPF immediates are signed, zero extend
964 * immediate into tmp register and use it
965 * in compare insn.
966 */
967 insn->code = BPF_ALU | BPF_MOV | BPF_K;
968 insn->a_reg = TMP_REG;
969 insn->imm = fp->k;
970 insn++;
971
972 insn->a_reg = A_REG;
973 insn->x_reg = TMP_REG;
974 bpf_src = BPF_X;
975 } else {
976 insn->a_reg = A_REG;
977 insn->x_reg = X_REG;
978 insn->imm = fp->k;
979 bpf_src = BPF_SRC(fp->code);
255 } 980 }
256 return 0; 981
257 case BPF_S_LD_B_ABS: 982 /* Common case where 'jump_false' is next insn. */
258 k = K; 983 if (fp->jf == 0) {
259load_b: 984 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
260 ptr = load_pointer(skb, k, 1, &tmp); 985 target = i + fp->jt + 1;
261 if (ptr != NULL) { 986 EMIT_JMP;
262 A = *(u8 *)ptr; 987 break;
263 continue;
264 } 988 }
265 return 0; 989
266 case BPF_S_LD_W_LEN: 990 /* Convert JEQ into JNE when 'jump_true' is next insn. */
267 A = skb->len; 991 if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
268 continue; 992 insn->code = BPF_JMP | BPF_JNE | bpf_src;
269 case BPF_S_LDX_W_LEN: 993 target = i + fp->jf + 1;
270 X = skb->len; 994 EMIT_JMP;
271 continue; 995 break;
272 case BPF_S_LD_W_IND:
273 k = X + K;
274 goto load_w;
275 case BPF_S_LD_H_IND:
276 k = X + K;
277 goto load_h;
278 case BPF_S_LD_B_IND:
279 k = X + K;
280 goto load_b;
281 case BPF_S_LDX_B_MSH:
282 ptr = load_pointer(skb, K, 1, &tmp);
283 if (ptr != NULL) {
284 X = (*(u8 *)ptr & 0xf) << 2;
285 continue;
286 } 996 }
287 return 0; 997
288 case BPF_S_LD_IMM: 998 /* Other jumps are mapped into two insns: Jxx and JA. */
289 A = K; 999 target = i + fp->jt + 1;
290 continue; 1000 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
291 case BPF_S_LDX_IMM: 1001 EMIT_JMP;
292 X = K; 1002 insn++;
293 continue; 1003
294 case BPF_S_LD_MEM: 1004 insn->code = BPF_JMP | BPF_JA;
295 A = mem[K]; 1005 target = i + fp->jf + 1;
296 continue; 1006 EMIT_JMP;
297 case BPF_S_LDX_MEM: 1007 break;
298 X = mem[K]; 1008
299 continue; 1009 /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
300 case BPF_S_MISC_TAX: 1010 case BPF_LDX | BPF_MSH | BPF_B:
301 X = A; 1011 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
302 continue; 1012 insn->a_reg = TMP_REG;
303 case BPF_S_MISC_TXA: 1013 insn->x_reg = A_REG;
304 A = X; 1014 insn++;
305 continue; 1015
306 case BPF_S_RET_K: 1016 insn->code = BPF_LD | BPF_ABS | BPF_B;
307 return K; 1017 insn->a_reg = A_REG;
308 case BPF_S_RET_A: 1018 insn->imm = fp->k;
309 return A; 1019 insn++;
310 case BPF_S_ST: 1020
311 mem[K] = A; 1021 insn->code = BPF_ALU | BPF_AND | BPF_K;
312 continue; 1022 insn->a_reg = A_REG;
313 case BPF_S_STX: 1023 insn->imm = 0xf;
314 mem[K] = X; 1024 insn++;
315 continue; 1025
316 case BPF_S_ANC_PROTOCOL: 1026 insn->code = BPF_ALU | BPF_LSH | BPF_K;
317 A = ntohs(skb->protocol); 1027 insn->a_reg = A_REG;
318 continue; 1028 insn->imm = 2;
319 case BPF_S_ANC_PKTTYPE: 1029 insn++;
320 A = skb->pkt_type; 1030
321 continue; 1031 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
322 case BPF_S_ANC_IFINDEX: 1032 insn->a_reg = X_REG;
323 if (!skb->dev) 1033 insn->x_reg = A_REG;
324 return 0; 1034 insn++;
325 A = skb->dev->ifindex; 1035
326 continue; 1036 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
327 case BPF_S_ANC_MARK: 1037 insn->a_reg = A_REG;
328 A = skb->mark; 1038 insn->x_reg = TMP_REG;
329 continue; 1039 break;
330 case BPF_S_ANC_QUEUE: 1040
331 A = skb->queue_mapping; 1041 /* RET_K, RET_A are remaped into 2 insns. */
332 continue; 1042 case BPF_RET | BPF_A:
333 case BPF_S_ANC_HATYPE: 1043 case BPF_RET | BPF_K:
334 if (!skb->dev) 1044 insn->code = BPF_ALU | BPF_MOV |
335 return 0; 1045 (BPF_RVAL(fp->code) == BPF_K ?
336 A = skb->dev->type; 1046 BPF_K : BPF_X);
337 continue; 1047 insn->a_reg = 0;
338 case BPF_S_ANC_RXHASH: 1048 insn->x_reg = A_REG;
339 A = skb->hash; 1049 insn->imm = fp->k;
340 continue; 1050 insn++;
341 case BPF_S_ANC_CPU: 1051
342 A = raw_smp_processor_id(); 1052 insn->code = BPF_JMP | BPF_EXIT;
343 continue; 1053 break;
344 case BPF_S_ANC_VLAN_TAG: 1054
345 A = vlan_tx_tag_get(skb); 1055 /* Store to stack. */
346 continue; 1056 case BPF_ST:
347 case BPF_S_ANC_VLAN_TAG_PRESENT: 1057 case BPF_STX:
348 A = !!vlan_tx_tag_present(skb); 1058 insn->code = BPF_STX | BPF_MEM | BPF_W;
349 continue; 1059 insn->a_reg = FP_REG;
350 case BPF_S_ANC_PAY_OFFSET: 1060 insn->x_reg = fp->code == BPF_ST ? A_REG : X_REG;
351 A = __skb_get_poff(skb); 1061 insn->off = -(BPF_MEMWORDS - fp->k) * 4;
352 continue; 1062 break;
353 case BPF_S_ANC_NLATTR: { 1063
354 struct nlattr *nla; 1064 /* Load from stack. */
355 1065 case BPF_LD | BPF_MEM:
356 if (skb_is_nonlinear(skb)) 1066 case BPF_LDX | BPF_MEM:
357 return 0; 1067 insn->code = BPF_LDX | BPF_MEM | BPF_W;
358 if (A > skb->len - sizeof(struct nlattr)) 1068 insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
359 return 0; 1069 A_REG : X_REG;
360 1070 insn->x_reg = FP_REG;
361 nla = nla_find((struct nlattr *)&skb->data[A], 1071 insn->off = -(BPF_MEMWORDS - fp->k) * 4;
362 skb->len - A, X); 1072 break;
363 if (nla) 1073
364 A = (void *)nla - (void *)skb->data; 1074 /* A = K or X = K */
365 else 1075 case BPF_LD | BPF_IMM:
366 A = 0; 1076 case BPF_LDX | BPF_IMM:
367 continue; 1077 insn->code = BPF_ALU | BPF_MOV | BPF_K;
368 } 1078 insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
369 case BPF_S_ANC_NLATTR_NEST: { 1079 A_REG : X_REG;
370 struct nlattr *nla; 1080 insn->imm = fp->k;
371 1081 break;
372 if (skb_is_nonlinear(skb)) 1082
373 return 0; 1083 /* X = A */
374 if (A > skb->len - sizeof(struct nlattr)) 1084 case BPF_MISC | BPF_TAX:
375 return 0; 1085 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
376 1086 insn->a_reg = X_REG;
377 nla = (struct nlattr *)&skb->data[A]; 1087 insn->x_reg = A_REG;
378 if (nla->nla_len > A - skb->len) 1088 break;
379 return 0; 1089
380 1090 /* A = X */
381 nla = nla_find_nested(nla, X); 1091 case BPF_MISC | BPF_TXA:
382 if (nla) 1092 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
383 A = (void *)nla - (void *)skb->data; 1093 insn->a_reg = A_REG;
384 else 1094 insn->x_reg = X_REG;
385 A = 0; 1095 break;
386 continue; 1096
387 } 1097 /* A = skb->len or X = skb->len */
388#ifdef CONFIG_SECCOMP_FILTER 1098 case BPF_LD | BPF_W | BPF_LEN:
389 case BPF_S_ANC_SECCOMP_LD_W: 1099 case BPF_LDX | BPF_W | BPF_LEN:
390 A = seccomp_bpf_load(fentry->k); 1100 insn->code = BPF_LDX | BPF_MEM | BPF_W;
391 continue; 1101 insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
392#endif 1102 A_REG : X_REG;
1103 insn->x_reg = CTX_REG;
1104 insn->off = offsetof(struct sk_buff, len);
1105 break;
1106
1107 /* access seccomp_data fields */
1108 case BPF_LDX | BPF_ABS | BPF_W:
1109 insn->code = BPF_LDX | BPF_MEM | BPF_W;
1110 insn->a_reg = A_REG;
1111 insn->x_reg = CTX_REG;
1112 insn->off = fp->k;
1113 break;
1114
393 default: 1115 default:
394 WARN_RATELIMIT(1, "Unknown code:%u jt:%u tf:%u k:%u\n", 1116 goto err;
395 fentry->code, fentry->jt,
396 fentry->jf, fentry->k);
397 return 0;
398 } 1117 }
1118
1119 insn++;
1120 if (new_prog)
1121 memcpy(new_insn, tmp_insns,
1122 sizeof(*insn) * (insn - tmp_insns));
1123
1124 new_insn += insn - tmp_insns;
1125 }
1126
1127 if (!new_prog) {
1128 /* Only calculating new length. */
1129 *new_len = new_insn - new_prog;
1130 return 0;
399 } 1131 }
400 1132
1133 pass++;
1134 if (new_flen != new_insn - new_prog) {
1135 new_flen = new_insn - new_prog;
1136 if (pass > 2)
1137 goto err;
1138
1139 goto do_pass;
1140 }
1141
1142 kfree(addrs);
1143 BUG_ON(*new_len != new_flen);
401 return 0; 1144 return 0;
1145err:
1146 kfree(addrs);
1147 return -EINVAL;
402} 1148}
403EXPORT_SYMBOL(sk_run_filter);
404 1149
405/* 1150/* Security:
406 * Security : 1151 *
407 * A BPF program is able to use 16 cells of memory to store intermediate 1152 * A BPF program is able to use 16 cells of memory to store intermediate
408 * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()) 1153 * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()).
1154 *
409 * As we dont want to clear mem[] array for each packet going through 1155 * As we dont want to clear mem[] array for each packet going through
410 * sk_run_filter(), we check that filter loaded by user never try to read 1156 * sk_run_filter(), we check that filter loaded by user never try to read
411 * a cell if not previously written, and we check all branches to be sure 1157 * a cell if not previously written, and we check all branches to be sure
@@ -629,30 +1375,197 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
629} 1375}
630EXPORT_SYMBOL(sk_chk_filter); 1376EXPORT_SYMBOL(sk_chk_filter);
631 1377
1378static int sk_store_orig_filter(struct sk_filter *fp,
1379 const struct sock_fprog *fprog)
1380{
1381 unsigned int fsize = sk_filter_proglen(fprog);
1382 struct sock_fprog_kern *fkprog;
1383
1384 fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
1385 if (!fp->orig_prog)
1386 return -ENOMEM;
1387
1388 fkprog = fp->orig_prog;
1389 fkprog->len = fprog->len;
1390 fkprog->filter = kmemdup(fp->insns, fsize, GFP_KERNEL);
1391 if (!fkprog->filter) {
1392 kfree(fp->orig_prog);
1393 return -ENOMEM;
1394 }
1395
1396 return 0;
1397}
1398
1399static void sk_release_orig_filter(struct sk_filter *fp)
1400{
1401 struct sock_fprog_kern *fprog = fp->orig_prog;
1402
1403 if (fprog) {
1404 kfree(fprog->filter);
1405 kfree(fprog);
1406 }
1407}
1408
632/** 1409/**
633 * sk_filter_release_rcu - Release a socket filter by rcu_head 1410 * sk_filter_release_rcu - Release a socket filter by rcu_head
634 * @rcu: rcu_head that contains the sk_filter to free 1411 * @rcu: rcu_head that contains the sk_filter to free
635 */ 1412 */
636void sk_filter_release_rcu(struct rcu_head *rcu) 1413static void sk_filter_release_rcu(struct rcu_head *rcu)
637{ 1414{
638 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); 1415 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
639 1416
1417 sk_release_orig_filter(fp);
640 bpf_jit_free(fp); 1418 bpf_jit_free(fp);
641} 1419}
642EXPORT_SYMBOL(sk_filter_release_rcu);
643 1420
644static int __sk_prepare_filter(struct sk_filter *fp) 1421/**
1422 * sk_filter_release - release a socket filter
1423 * @fp: filter to remove
1424 *
1425 * Remove a filter from a socket and release its resources.
1426 */
1427static void sk_filter_release(struct sk_filter *fp)
1428{
1429 if (atomic_dec_and_test(&fp->refcnt))
1430 call_rcu(&fp->rcu, sk_filter_release_rcu);
1431}
1432
1433void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
1434{
1435 atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc);
1436 sk_filter_release(fp);
1437}
1438
1439void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1440{
1441 atomic_inc(&fp->refcnt);
1442 atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc);
1443}
1444
1445static struct sk_filter *__sk_migrate_realloc(struct sk_filter *fp,
1446 struct sock *sk,
1447 unsigned int len)
1448{
1449 struct sk_filter *fp_new;
1450
1451 if (sk == NULL)
1452 return krealloc(fp, len, GFP_KERNEL);
1453
1454 fp_new = sock_kmalloc(sk, len, GFP_KERNEL);
1455 if (fp_new) {
1456 memcpy(fp_new, fp, sizeof(struct sk_filter));
1457 /* As we're kepping orig_prog in fp_new along,
1458 * we need to make sure we're not evicting it
1459 * from the old fp.
1460 */
1461 fp->orig_prog = NULL;
1462 sk_filter_uncharge(sk, fp);
1463 }
1464
1465 return fp_new;
1466}
1467
1468static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
1469 struct sock *sk)
1470{
1471 struct sock_filter *old_prog;
1472 struct sk_filter *old_fp;
1473 int i, err, new_len, old_len = fp->len;
1474
1475 /* We are free to overwrite insns et al right here as it
1476 * won't be used at this point in time anymore internally
1477 * after the migration to the internal BPF instruction
1478 * representation.
1479 */
1480 BUILD_BUG_ON(sizeof(struct sock_filter) !=
1481 sizeof(struct sock_filter_int));
1482
1483 /* For now, we need to unfiddle BPF_S_* identifiers in place.
1484 * This can sooner or later on be subject to removal, e.g. when
1485 * JITs have been converted.
1486 */
1487 for (i = 0; i < fp->len; i++)
1488 sk_decode_filter(&fp->insns[i], &fp->insns[i]);
1489
1490 /* Conversion cannot happen on overlapping memory areas,
1491 * so we need to keep the user BPF around until the 2nd
1492 * pass. At this time, the user BPF is stored in fp->insns.
1493 */
1494 old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
1495 GFP_KERNEL);
1496 if (!old_prog) {
1497 err = -ENOMEM;
1498 goto out_err;
1499 }
1500
1501 /* 1st pass: calculate the new program length. */
1502 err = sk_convert_filter(old_prog, old_len, NULL, &new_len);
1503 if (err)
1504 goto out_err_free;
1505
1506 /* Expand fp for appending the new filter representation. */
1507 old_fp = fp;
1508 fp = __sk_migrate_realloc(old_fp, sk, sk_filter_size(new_len));
1509 if (!fp) {
1510 /* The old_fp is still around in case we couldn't
1511 * allocate new memory, so uncharge on that one.
1512 */
1513 fp = old_fp;
1514 err = -ENOMEM;
1515 goto out_err_free;
1516 }
1517
1518 fp->bpf_func = sk_run_filter_int_skb;
1519 fp->len = new_len;
1520
1521 /* 2nd pass: remap sock_filter insns into sock_filter_int insns. */
1522 err = sk_convert_filter(old_prog, old_len, fp->insnsi, &new_len);
1523 if (err)
1524 /* 2nd sk_convert_filter() can fail only if it fails
1525 * to allocate memory, remapping must succeed. Note,
1526 * that at this time old_fp has already been released
1527 * by __sk_migrate_realloc().
1528 */
1529 goto out_err_free;
1530
1531 kfree(old_prog);
1532 return fp;
1533
1534out_err_free:
1535 kfree(old_prog);
1536out_err:
1537 /* Rollback filter setup. */
1538 if (sk != NULL)
1539 sk_filter_uncharge(sk, fp);
1540 else
1541 kfree(fp);
1542 return ERR_PTR(err);
1543}
1544
1545static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
1546 struct sock *sk)
645{ 1547{
646 int err; 1548 int err;
647 1549
648 fp->bpf_func = sk_run_filter; 1550 fp->bpf_func = NULL;
1551 fp->jited = 0;
649 1552
650 err = sk_chk_filter(fp->insns, fp->len); 1553 err = sk_chk_filter(fp->insns, fp->len);
651 if (err) 1554 if (err)
652 return err; 1555 return ERR_PTR(err);
653 1556
1557 /* Probe if we can JIT compile the filter and if so, do
1558 * the compilation of the filter.
1559 */
654 bpf_jit_compile(fp); 1560 bpf_jit_compile(fp);
655 return 0; 1561
1562 /* JIT compiler couldn't process this filter, so do the
1563 * internal BPF translation for the optimized interpreter.
1564 */
1565 if (!fp->jited)
1566 fp = __sk_migrate_filter(fp, sk);
1567
1568 return fp;
656} 1569}
657 1570
658/** 1571/**
@@ -668,9 +1581,8 @@ static int __sk_prepare_filter(struct sk_filter *fp)
668int sk_unattached_filter_create(struct sk_filter **pfp, 1581int sk_unattached_filter_create(struct sk_filter **pfp,
669 struct sock_fprog *fprog) 1582 struct sock_fprog *fprog)
670{ 1583{
1584 unsigned int fsize = sk_filter_proglen(fprog);
671 struct sk_filter *fp; 1585 struct sk_filter *fp;
672 unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
673 int err;
674 1586
675 /* Make sure new filter is there and in the right amounts. */ 1587 /* Make sure new filter is there and in the right amounts. */
676 if (fprog->filter == NULL) 1588 if (fprog->filter == NULL)
@@ -679,20 +1591,26 @@ int sk_unattached_filter_create(struct sk_filter **pfp,
679 fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL); 1591 fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL);
680 if (!fp) 1592 if (!fp)
681 return -ENOMEM; 1593 return -ENOMEM;
1594
682 memcpy(fp->insns, fprog->filter, fsize); 1595 memcpy(fp->insns, fprog->filter, fsize);
683 1596
684 atomic_set(&fp->refcnt, 1); 1597 atomic_set(&fp->refcnt, 1);
685 fp->len = fprog->len; 1598 fp->len = fprog->len;
1599 /* Since unattached filters are not copied back to user
1600 * space through sk_get_filter(), we do not need to hold
1601 * a copy here, and can spare us the work.
1602 */
1603 fp->orig_prog = NULL;
686 1604
687 err = __sk_prepare_filter(fp); 1605 /* __sk_prepare_filter() already takes care of uncharging
688 if (err) 1606 * memory in case something goes wrong.
689 goto free_mem; 1607 */
1608 fp = __sk_prepare_filter(fp, NULL);
1609 if (IS_ERR(fp))
1610 return PTR_ERR(fp);
690 1611
691 *pfp = fp; 1612 *pfp = fp;
692 return 0; 1613 return 0;
693free_mem:
694 kfree(fp);
695 return err;
696} 1614}
697EXPORT_SYMBOL_GPL(sk_unattached_filter_create); 1615EXPORT_SYMBOL_GPL(sk_unattached_filter_create);
698 1616
@@ -715,7 +1633,7 @@ EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy);
715int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) 1633int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
716{ 1634{
717 struct sk_filter *fp, *old_fp; 1635 struct sk_filter *fp, *old_fp;
718 unsigned int fsize = sizeof(struct sock_filter) * fprog->len; 1636 unsigned int fsize = sk_filter_proglen(fprog);
719 unsigned int sk_fsize = sk_filter_size(fprog->len); 1637 unsigned int sk_fsize = sk_filter_size(fprog->len);
720 int err; 1638 int err;
721 1639
@@ -729,6 +1647,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
729 fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL); 1647 fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL);
730 if (!fp) 1648 if (!fp)
731 return -ENOMEM; 1649 return -ENOMEM;
1650
732 if (copy_from_user(fp->insns, fprog->filter, fsize)) { 1651 if (copy_from_user(fp->insns, fprog->filter, fsize)) {
733 sock_kfree_s(sk, fp, sk_fsize); 1652 sock_kfree_s(sk, fp, sk_fsize);
734 return -EFAULT; 1653 return -EFAULT;
@@ -737,18 +1656,26 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
737 atomic_set(&fp->refcnt, 1); 1656 atomic_set(&fp->refcnt, 1);
738 fp->len = fprog->len; 1657 fp->len = fprog->len;
739 1658
740 err = __sk_prepare_filter(fp); 1659 err = sk_store_orig_filter(fp, fprog);
741 if (err) { 1660 if (err) {
742 sk_filter_uncharge(sk, fp); 1661 sk_filter_uncharge(sk, fp);
743 return err; 1662 return -ENOMEM;
744 } 1663 }
745 1664
1665 /* __sk_prepare_filter() already takes care of uncharging
1666 * memory in case something goes wrong.
1667 */
1668 fp = __sk_prepare_filter(fp, sk);
1669 if (IS_ERR(fp))
1670 return PTR_ERR(fp);
1671
746 old_fp = rcu_dereference_protected(sk->sk_filter, 1672 old_fp = rcu_dereference_protected(sk->sk_filter,
747 sock_owned_by_user(sk)); 1673 sock_owned_by_user(sk));
748 rcu_assign_pointer(sk->sk_filter, fp); 1674 rcu_assign_pointer(sk->sk_filter, fp);
749 1675
750 if (old_fp) 1676 if (old_fp)
751 sk_filter_uncharge(sk, old_fp); 1677 sk_filter_uncharge(sk, old_fp);
1678
752 return 0; 1679 return 0;
753} 1680}
754EXPORT_SYMBOL_GPL(sk_attach_filter); 1681EXPORT_SYMBOL_GPL(sk_attach_filter);
@@ -768,6 +1695,7 @@ int sk_detach_filter(struct sock *sk)
768 sk_filter_uncharge(sk, filter); 1695 sk_filter_uncharge(sk, filter);
769 ret = 0; 1696 ret = 0;
770 } 1697 }
1698
771 return ret; 1699 return ret;
772} 1700}
773EXPORT_SYMBOL_GPL(sk_detach_filter); 1701EXPORT_SYMBOL_GPL(sk_detach_filter);
@@ -850,34 +1778,41 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
850 to->k = filt->k; 1778 to->k = filt->k;
851} 1779}
852 1780
853int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) 1781int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
1782 unsigned int len)
854{ 1783{
1784 struct sock_fprog_kern *fprog;
855 struct sk_filter *filter; 1785 struct sk_filter *filter;
856 int i, ret; 1786 int ret = 0;
857 1787
858 lock_sock(sk); 1788 lock_sock(sk);
859 filter = rcu_dereference_protected(sk->sk_filter, 1789 filter = rcu_dereference_protected(sk->sk_filter,
860 sock_owned_by_user(sk)); 1790 sock_owned_by_user(sk));
861 ret = 0;
862 if (!filter) 1791 if (!filter)
863 goto out; 1792 goto out;
864 ret = filter->len; 1793
1794 /* We're copying the filter that has been originally attached,
1795 * so no conversion/decode needed anymore.
1796 */
1797 fprog = filter->orig_prog;
1798
1799 ret = fprog->len;
865 if (!len) 1800 if (!len)
1801 /* User space only enquires number of filter blocks. */
866 goto out; 1802 goto out;
1803
867 ret = -EINVAL; 1804 ret = -EINVAL;
868 if (len < filter->len) 1805 if (len < fprog->len)
869 goto out; 1806 goto out;
870 1807
871 ret = -EFAULT; 1808 ret = -EFAULT;
872 for (i = 0; i < filter->len; i++) { 1809 if (copy_to_user(ubuf, fprog->filter, sk_filter_proglen(fprog)))
873 struct sock_filter fb; 1810 goto out;
874
875 sk_decode_filter(&filter->insns[i], &fb);
876 if (copy_to_user(&ubuf[i], &fb, sizeof(fb)))
877 goto out;
878 }
879 1811
880 ret = filter->len; 1812 /* Instead of bytes, the API requests to return the number
1813 * of filter blocks.
1814 */
1815 ret = fprog->len;
881out: 1816out:
882 release_sock(sk); 1817 release_sock(sk);
883 return ret; 1818 return ret;
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index a0e9cf6379de..d7af18859322 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -52,9 +52,10 @@ EXPORT_SYMBOL_GPL(sock_diag_put_meminfo);
52int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, 52int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk,
53 struct sk_buff *skb, int attrtype) 53 struct sk_buff *skb, int attrtype)
54{ 54{
55 struct nlattr *attr; 55 struct sock_fprog_kern *fprog;
56 struct sk_filter *filter; 56 struct sk_filter *filter;
57 unsigned int len; 57 struct nlattr *attr;
58 unsigned int flen;
58 int err = 0; 59 int err = 0;
59 60
60 if (!ns_capable(user_ns, CAP_NET_ADMIN)) { 61 if (!ns_capable(user_ns, CAP_NET_ADMIN)) {
@@ -63,24 +64,20 @@ int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk,
63 } 64 }
64 65
65 rcu_read_lock(); 66 rcu_read_lock();
66
67 filter = rcu_dereference(sk->sk_filter); 67 filter = rcu_dereference(sk->sk_filter);
68 len = filter ? filter->len * sizeof(struct sock_filter) : 0; 68 if (!filter)
69 goto out;
69 70
70 attr = nla_reserve(skb, attrtype, len); 71 fprog = filter->orig_prog;
72 flen = sk_filter_proglen(fprog);
73
74 attr = nla_reserve(skb, attrtype, flen);
71 if (attr == NULL) { 75 if (attr == NULL) {
72 err = -EMSGSIZE; 76 err = -EMSGSIZE;
73 goto out; 77 goto out;
74 } 78 }
75 79
76 if (filter) { 80 memcpy(nla_data(attr), fprog->filter, flen);
77 struct sock_filter *fb = (struct sock_filter *)nla_data(attr);
78 int i;
79
80 for (i = 0; i < filter->len; i++, fb++)
81 sk_decode_filter(&filter->insns[i], fb);
82 }
83
84out: 81out:
85 rcu_read_unlock(); 82 rcu_read_unlock();
86 return err; 83 return err;
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 661b5a40ec10..9ff26b3cc021 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -23,16 +23,19 @@
23#include <linux/skbuff.h> 23#include <linux/skbuff.h>
24#include <linux/export.h> 24#include <linux/export.h>
25 25
26static struct sock_filter ptp_filter[] = { 26static struct sk_filter *ptp_insns __read_mostly;
27 PTP_FILTER 27
28}; 28unsigned int ptp_classify_raw(const struct sk_buff *skb)
29{
30 return SK_RUN_FILTER(ptp_insns, skb);
31}
32EXPORT_SYMBOL_GPL(ptp_classify_raw);
29 33
30static unsigned int classify(const struct sk_buff *skb) 34static unsigned int classify(const struct sk_buff *skb)
31{ 35{
32 if (likely(skb->dev && 36 if (likely(skb->dev && skb->dev->phydev &&
33 skb->dev->phydev &&
34 skb->dev->phydev->drv)) 37 skb->dev->phydev->drv))
35 return sk_run_filter(skb, ptp_filter); 38 return ptp_classify_raw(skb);
36 else 39 else
37 return PTP_CLASS_NONE; 40 return PTP_CLASS_NONE;
38} 41}
@@ -60,11 +63,13 @@ void skb_clone_tx_timestamp(struct sk_buff *skb)
60 if (likely(phydev->drv->txtstamp)) { 63 if (likely(phydev->drv->txtstamp)) {
61 if (!atomic_inc_not_zero(&sk->sk_refcnt)) 64 if (!atomic_inc_not_zero(&sk->sk_refcnt))
62 return; 65 return;
66
63 clone = skb_clone(skb, GFP_ATOMIC); 67 clone = skb_clone(skb, GFP_ATOMIC);
64 if (!clone) { 68 if (!clone) {
65 sock_put(sk); 69 sock_put(sk);
66 return; 70 return;
67 } 71 }
72
68 clone->sk = sk; 73 clone->sk = sk;
69 phydev->drv->txtstamp(phydev, clone, type); 74 phydev->drv->txtstamp(phydev, clone, type);
70 } 75 }
@@ -89,12 +94,15 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
89 } 94 }
90 95
91 *skb_hwtstamps(skb) = *hwtstamps; 96 *skb_hwtstamps(skb) = *hwtstamps;
97
92 serr = SKB_EXT_ERR(skb); 98 serr = SKB_EXT_ERR(skb);
93 memset(serr, 0, sizeof(*serr)); 99 memset(serr, 0, sizeof(*serr));
94 serr->ee.ee_errno = ENOMSG; 100 serr->ee.ee_errno = ENOMSG;
95 serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; 101 serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
96 skb->sk = NULL; 102 skb->sk = NULL;
103
97 err = sock_queue_err_skb(sk, skb); 104 err = sock_queue_err_skb(sk, skb);
105
98 sock_put(sk); 106 sock_put(sk);
99 if (err) 107 if (err)
100 kfree_skb(skb); 108 kfree_skb(skb);
@@ -135,5 +143,10 @@ EXPORT_SYMBOL_GPL(skb_defer_rx_timestamp);
135 143
136void __init skb_timestamping_init(void) 144void __init skb_timestamping_init(void)
137{ 145{
138 BUG_ON(sk_chk_filter(ptp_filter, ARRAY_SIZE(ptp_filter))); 146 static struct sock_filter ptp_filter[] = { PTP_FILTER };
147 struct sock_fprog ptp_prog = {
148 .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter,
149 };
150
151 BUG_ON(sk_unattached_filter_create(&ptp_insns, &ptp_prog));
139} 152}