aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarkos Chandras <markos.chandras@imgtec.com>2015-06-04 06:56:16 -0400
committerRalf Baechle <ralf@linux-mips.org>2015-06-21 15:54:25 -0400
commit266a88e2200eefa216180ce2761eb84e06f3d77e (patch)
treeba408f5efb9382895717f9f99fa456f0732c788f
parentbeaf70b8b7d025e7293ac013b198fc550ee2d3ec (diff)
MIPS: BPF: Introduce BPF ASM helpers
This commit introduces BPF ASM helpers for MIPS and MIPS64 kernels. The purpose of this patch is to twofold: 1) We are now able to handle negative offsets instead of either falling back to the interpreter or to simply not do anything and bail out. 2) Optimize reads from the packet header instead of calling the C helpers Because of this patch, we are now able to get rid of quite a bit of code in the JIT generation process by using MIPS optimized assembly code. The new assembly code makes the test_bpf testsuite happy with all 60 test passing successfully compared to the previous implementation where 2 tests were failing. Doing some basic analysis in the results between the old implementation and the new one we can obtain the following summary running current mainline on an ER8 board (+/- 30us delta is ignored to prevent noise from kernel scheduling or IRQ latencies): Summary: 22 tests are faster, 7 are slower and 47 saw no improvement with the most notable improvement being the tcpdump tests. The 7 tests that seem to be a bit slower is because they all follow the slow path (bpf_internal_load_pointer_neg_helper) which is meant to be slow so that's not a problem. Signed-off-by: Markos Chandras <markos.chandras@imgtec.com> Cc: netdev@vger.kernel.org Cc: "David S. Miller" <davem@davemloft.net> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Daniel Borkmann <dborkman@redhat.com> Cc: Hannes Frederic Sowa <hannes@stressinduktion.org> Cc: linux-kernel@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: netdev@vger.kernel.org Patchwork: http://patchwork.linux-mips.org/patch/10530/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
-rw-r--r--arch/mips/net/Makefile2
-rw-r--r--arch/mips/net/bpf_jit.c174
-rw-r--r--arch/mips/net/bpf_jit.h33
-rw-r--r--arch/mips/net/bpf_jit_asm.S238
4 files changed, 302 insertions, 145 deletions
diff --git a/arch/mips/net/Makefile b/arch/mips/net/Makefile
index ae74b3a91f5c..8c2771401f54 100644
--- a/arch/mips/net/Makefile
+++ b/arch/mips/net/Makefile
@@ -1,3 +1,3 @@
1# MIPS networking code 1# MIPS networking code
2 2
3obj-$(CONFIG_BPF_JIT) += bpf_jit.o 3obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_asm.o
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index f0db4f8310b2..0c4a133f6216 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -29,11 +29,14 @@
29#include "bpf_jit.h" 29#include "bpf_jit.h"
30 30
31/* ABI 31/* ABI
32 * 32 * r_skb_hl SKB header length
33 * r_data SKB data pointer
34 * r_off Offset
33 * r_A BPF register A 35 * r_A BPF register A
34 * r_X BPF register X 36 * r_X BPF register X
35 * r_skb *skb 37 * r_skb *skb
36 * r_M *scratch memory 38 * r_M *scratch memory
39 * r_skb_len SKB length
37 * 40 *
38 * On entry (*bpf_func)(*skb, *filter) 41 * On entry (*bpf_func)(*skb, *filter)
39 * a0 = MIPS_R_A0 = skb; 42 * a0 = MIPS_R_A0 = skb;
@@ -75,6 +78,8 @@
75#define SEEN_X SEEN_SREG(4) 78#define SEEN_X SEEN_SREG(4)
76#define SEEN_SKB SEEN_SREG(5) 79#define SEEN_SKB SEEN_SREG(5)
77#define SEEN_MEM SEEN_SREG(6) 80#define SEEN_MEM SEEN_SREG(6)
81/* SEEN_SK_DATA also implies skb_hl an skb_len */
82#define SEEN_SKB_DATA (SEEN_SREG(7) | SEEN_SREG(1) | SEEN_SREG(0))
78 83
79/* Arguments used by JIT */ 84/* Arguments used by JIT */
80#define ARGS_USED_BY_JIT 2 /* only applicable to 64-bit */ 85#define ARGS_USED_BY_JIT 2 /* only applicable to 64-bit */
@@ -537,20 +542,6 @@ static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset)
537 /* Adjust the stack pointer */ 542 /* Adjust the stack pointer */
538 emit_stack_offset(-align_sp(offset), ctx); 543 emit_stack_offset(-align_sp(offset), ctx);
539 544
540 if (ctx->flags & SEEN_CALL) {
541 /* Argument save area */
542 if (config_enabled(CONFIG_64BIT))
543 /* Bottom of current frame */
544 real_off = align_sp(offset) - SZREG;
545 else
546 /* Top of previous frame */
547 real_off = align_sp(offset) + SZREG;
548 emit_store_stack_reg(MIPS_R_A0, r_sp, real_off, ctx);
549 emit_store_stack_reg(MIPS_R_A1, r_sp, real_off + SZREG, ctx);
550
551 real_off = 0;
552 }
553
554 tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; 545 tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT;
555 /* sflags is essentially a bitmap */ 546 /* sflags is essentially a bitmap */
556 while (tmp_flags) { 547 while (tmp_flags) {
@@ -583,19 +574,6 @@ static void restore_bpf_jit_regs(struct jit_ctx *ctx,
583 int i, real_off = 0; 574 int i, real_off = 0;
584 u32 sflags, tmp_flags; 575 u32 sflags, tmp_flags;
585 576
586 if (ctx->flags & SEEN_CALL) {
587 if (config_enabled(CONFIG_64BIT))
588 /* Bottom of current frame */
589 real_off = align_sp(offset) - SZREG;
590 else
591 /* Top of previous frame */
592 real_off = align_sp(offset) + SZREG;
593 emit_load_stack_reg(MIPS_R_A0, r_sp, real_off, ctx);
594 emit_load_stack_reg(MIPS_R_A1, r_sp, real_off + SZREG, ctx);
595
596 real_off = 0;
597 }
598
599 tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; 577 tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT;
600 /* sflags is a bitmap */ 578 /* sflags is a bitmap */
601 i = 0; 579 i = 0;
@@ -629,17 +607,7 @@ static unsigned int get_stack_depth(struct jit_ctx *ctx)
629 sp_off += 4 * BPF_MEMWORDS; /* BPF_MEMWORDS are 32-bit */ 607 sp_off += 4 * BPF_MEMWORDS; /* BPF_MEMWORDS are 32-bit */
630 608
631 if (ctx->flags & SEEN_CALL) 609 if (ctx->flags & SEEN_CALL)
632 /* 610 sp_off += SZREG; /* Space for our ra register */
633 * The JIT code make calls to external functions using 2
634 * arguments. Therefore, for o32 we don't need to allocate
635 * space because we don't care if the argumetns are lost
636 * across calls. We do need however to preserve incoming
637 * arguments but the space is already allocated for us by
638 * the caller. On the other hand, for n64, we need to allocate
639 * this space ourselves. We need to preserve $ra as well.
640 */
641 sp_off += config_enabled(CONFIG_64BIT) ?
642 (ARGS_USED_BY_JIT + 1) * SZREG : SZREG;
643 611
644 return sp_off; 612 return sp_off;
645} 613}
@@ -656,6 +624,19 @@ static void build_prologue(struct jit_ctx *ctx)
656 if (ctx->flags & SEEN_SKB) 624 if (ctx->flags & SEEN_SKB)
657 emit_reg_move(r_skb, MIPS_R_A0, ctx); 625 emit_reg_move(r_skb, MIPS_R_A0, ctx);
658 626
627 if (ctx->flags & SEEN_SKB_DATA) {
628 /* Load packet length */
629 emit_load(r_skb_len, r_skb, offsetof(struct sk_buff, len),
630 ctx);
631 emit_load(r_tmp, r_skb, offsetof(struct sk_buff, data_len),
632 ctx);
633 /* Load the data pointer */
634 emit_load_ptr(r_skb_data, r_skb,
635 offsetof(struct sk_buff, data), ctx);
636 /* Load the header length */
637 emit_subu(r_skb_hl, r_skb_len, r_tmp, ctx);
638 }
639
659 if (ctx->flags & SEEN_X) 640 if (ctx->flags & SEEN_X)
660 emit_jit_reg_move(r_X, r_zero, ctx); 641 emit_jit_reg_move(r_X, r_zero, ctx);
661 642
@@ -678,43 +659,17 @@ static void build_epilogue(struct jit_ctx *ctx)
678 emit_nop(ctx); 659 emit_nop(ctx);
679} 660}
680 661
681static u64 jit_get_skb_b(struct sk_buff *skb, unsigned offset) 662#define CHOOSE_LOAD_FUNC(K, func) \
682{ 663 ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative : func) : \
683 u8 ret; 664 func##_positive)
684 int err;
685
686 err = skb_copy_bits(skb, offset, &ret, 1);
687
688 return (u64)err << 32 | ret;
689}
690
691static u64 jit_get_skb_h(struct sk_buff *skb, unsigned offset)
692{
693 u16 ret;
694 int err;
695
696 err = skb_copy_bits(skb, offset, &ret, 2);
697
698 return (u64)err << 32 | ntohs(ret);
699}
700
701static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset)
702{
703 u32 ret;
704 int err;
705
706 err = skb_copy_bits(skb, offset, &ret, 4);
707
708 return (u64)err << 32 | ntohl(ret);
709}
710 665
711static int build_body(struct jit_ctx *ctx) 666static int build_body(struct jit_ctx *ctx)
712{ 667{
713 void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
714 const struct bpf_prog *prog = ctx->skf; 668 const struct bpf_prog *prog = ctx->skf;
715 const struct sock_filter *inst; 669 const struct sock_filter *inst;
716 unsigned int i, off, load_order, condt; 670 unsigned int i, off, condt;
717 u32 k, b_off __maybe_unused; 671 u32 k, b_off __maybe_unused;
672 u8 (*sk_load_func)(unsigned long *skb, int offset);
718 673
719 for (i = 0; i < prog->len; i++) { 674 for (i = 0; i < prog->len; i++) {
720 u16 code; 675 u16 code;
@@ -748,71 +703,46 @@ static int build_body(struct jit_ctx *ctx)
748 break; 703 break;
749 case BPF_LD | BPF_W | BPF_ABS: 704 case BPF_LD | BPF_W | BPF_ABS:
750 /* A <- P[k:4] */ 705 /* A <- P[k:4] */
751 load_order = 2; 706 sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_word);
752 goto load; 707 goto load;
753 case BPF_LD | BPF_H | BPF_ABS: 708 case BPF_LD | BPF_H | BPF_ABS:
754 /* A <- P[k:2] */ 709 /* A <- P[k:2] */
755 load_order = 1; 710 sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_half);
756 goto load; 711 goto load;
757 case BPF_LD | BPF_B | BPF_ABS: 712 case BPF_LD | BPF_B | BPF_ABS:
758 /* A <- P[k:1] */ 713 /* A <- P[k:1] */
759 load_order = 0; 714 sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_byte);
760load: 715load:
761 /* the interpreter will deal with the negative K */
762 if ((int)k < 0)
763 return -ENOTSUPP;
764
765 emit_load_imm(r_off, k, ctx); 716 emit_load_imm(r_off, k, ctx);
766load_common: 717load_common:
767 /*
768 * We may got here from the indirect loads so
769 * return if offset is negative.
770 */
771 emit_slt(r_s0, r_off, r_zero, ctx);
772 emit_bcond(MIPS_COND_NE, r_s0, r_zero,
773 b_imm(prog->len, ctx), ctx);
774 emit_reg_move(r_ret, r_zero, ctx);
775
776 ctx->flags |= SEEN_CALL | SEEN_OFF | 718 ctx->flags |= SEEN_CALL | SEEN_OFF |
777 SEEN_SKB | SEEN_A; 719 SEEN_SKB | SEEN_A | SEEN_SKB_DATA;
778 720
779 emit_load_func(r_s0, (ptr)load_func[load_order], 721 emit_load_func(r_s0, (ptr)sk_load_func, ctx);
780 ctx);
781 emit_reg_move(MIPS_R_A0, r_skb, ctx); 722 emit_reg_move(MIPS_R_A0, r_skb, ctx);
782 emit_jalr(MIPS_R_RA, r_s0, ctx); 723 emit_jalr(MIPS_R_RA, r_s0, ctx);
783 /* Load second argument to delay slot */ 724 /* Load second argument to delay slot */
784 emit_reg_move(MIPS_R_A1, r_off, ctx); 725 emit_reg_move(MIPS_R_A1, r_off, ctx);
785 /* Check the error value */ 726 /* Check the error value */
786 if (config_enabled(CONFIG_64BIT)) { 727 emit_bcond(MIPS_COND_EQ, r_ret, 0, b_imm(i + 1, ctx),
787 /* Get error code from the top 32-bits */ 728 ctx);
788 emit_dsrl32(r_s0, r_val, 0, ctx); 729 /* Load return register on DS for failures */
789 /* Branch to 3 instructions ahead */ 730 emit_reg_move(r_ret, r_zero, ctx);
790 emit_bcond(MIPS_COND_NE, r_s0, r_zero, 3 << 2,
791 ctx);
792 } else {
793 /* Branch to 3 instructions ahead */
794 emit_bcond(MIPS_COND_NE, r_err, r_zero, 3 << 2,
795 ctx);
796 }
797 emit_nop(ctx);
798 /* We are good */
799 emit_b(b_imm(i + 1, ctx), ctx);
800 emit_jit_reg_move(r_A, r_val, ctx);
801 /* Return with error */ 731 /* Return with error */
802 emit_b(b_imm(prog->len, ctx), ctx); 732 emit_b(b_imm(prog->len, ctx), ctx);
803 emit_reg_move(r_ret, r_zero, ctx); 733 emit_nop(ctx);
804 break; 734 break;
805 case BPF_LD | BPF_W | BPF_IND: 735 case BPF_LD | BPF_W | BPF_IND:
806 /* A <- P[X + k:4] */ 736 /* A <- P[X + k:4] */
807 load_order = 2; 737 sk_load_func = sk_load_word;
808 goto load_ind; 738 goto load_ind;
809 case BPF_LD | BPF_H | BPF_IND: 739 case BPF_LD | BPF_H | BPF_IND:
810 /* A <- P[X + k:2] */ 740 /* A <- P[X + k:2] */
811 load_order = 1; 741 sk_load_func = sk_load_half;
812 goto load_ind; 742 goto load_ind;
813 case BPF_LD | BPF_B | BPF_IND: 743 case BPF_LD | BPF_B | BPF_IND:
814 /* A <- P[X + k:1] */ 744 /* A <- P[X + k:1] */
815 load_order = 0; 745 sk_load_func = sk_load_byte;
816load_ind: 746load_ind:
817 ctx->flags |= SEEN_OFF | SEEN_X; 747 ctx->flags |= SEEN_OFF | SEEN_X;
818 emit_addiu(r_off, r_X, k, ctx); 748 emit_addiu(r_off, r_X, k, ctx);
@@ -834,14 +764,10 @@ load_ind:
834 emit_load(r_X, r_skb, off, ctx); 764 emit_load(r_X, r_skb, off, ctx);
835 break; 765 break;
836 case BPF_LDX | BPF_B | BPF_MSH: 766 case BPF_LDX | BPF_B | BPF_MSH:
837 /* the interpreter will deal with the negative K */
838 if ((int)k < 0)
839 return -ENOTSUPP;
840
841 /* X <- 4 * (P[k:1] & 0xf) */ 767 /* X <- 4 * (P[k:1] & 0xf) */
842 ctx->flags |= SEEN_X | SEEN_CALL | SEEN_SKB; 768 ctx->flags |= SEEN_X | SEEN_CALL | SEEN_SKB;
843 /* Load offset to a1 */ 769 /* Load offset to a1 */
844 emit_load_func(r_s0, (ptr)jit_get_skb_b, ctx); 770 emit_load_func(r_s0, (ptr)sk_load_byte, ctx);
845 /* 771 /*
846 * This may emit two instructions so it may not fit 772 * This may emit two instructions so it may not fit
847 * in the delay slot. So use a0 in the delay slot. 773 * in the delay slot. So use a0 in the delay slot.
@@ -850,25 +776,15 @@ load_ind:
850 emit_jalr(MIPS_R_RA, r_s0, ctx); 776 emit_jalr(MIPS_R_RA, r_s0, ctx);
851 emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */ 777 emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */
852 /* Check the error value */ 778 /* Check the error value */
853 if (config_enabled(CONFIG_64BIT)) { 779 emit_bcond(MIPS_COND_NE, r_ret, 0,
854 /* Top 32-bits of $v0 on 64-bit */ 780 b_imm(prog->len, ctx), ctx);
855 emit_dsrl32(r_s0, r_val, 0, ctx); 781 emit_reg_move(r_ret, r_zero, ctx);
856 emit_bcond(MIPS_COND_NE, r_s0, r_zero,
857 3 << 2, ctx);
858 } else {
859 emit_bcond(MIPS_COND_NE, r_err, r_zero,
860 3 << 2, ctx);
861 }
862 /* No need for delay slot */
863 /* We are good */ 782 /* We are good */
864 /* X <- P[1:K] & 0xf */ 783 /* X <- P[1:K] & 0xf */
865 emit_andi(r_X, r_val, 0xf, ctx); 784 emit_andi(r_X, r_A, 0xf, ctx);
866 /* X << 2 */ 785 /* X << 2 */
867 emit_b(b_imm(i + 1, ctx), ctx); 786 emit_b(b_imm(i + 1, ctx), ctx);
868 emit_sll(r_X, r_X, 2, ctx); /* delay slot */ 787 emit_sll(r_X, r_X, 2, ctx); /* delay slot */
869 /* Return with error */
870 emit_b(b_imm(prog->len, ctx), ctx);
871 emit_load_imm(r_ret, 0, ctx); /* delay slot */
872 break; 788 break;
873 case BPF_ST: 789 case BPF_ST:
874 /* M[k] <- A */ 790 /* M[k] <- A */
@@ -942,7 +858,7 @@ load_ind:
942 /* Check if r_X is zero */ 858 /* Check if r_X is zero */
943 emit_bcond(MIPS_COND_EQ, r_X, r_zero, 859 emit_bcond(MIPS_COND_EQ, r_X, r_zero,
944 b_imm(prog->len, ctx), ctx); 860 b_imm(prog->len, ctx), ctx);
945 emit_load_imm(r_val, 0, ctx); /* delay slot */ 861 emit_load_imm(r_ret, 0, ctx); /* delay slot */
946 emit_div(r_A, r_X, ctx); 862 emit_div(r_A, r_X, ctx);
947 break; 863 break;
948 case BPF_ALU | BPF_MOD | BPF_X: 864 case BPF_ALU | BPF_MOD | BPF_X:
@@ -951,7 +867,7 @@ load_ind:
951 /* Check if r_X is zero */ 867 /* Check if r_X is zero */
952 emit_bcond(MIPS_COND_EQ, r_X, r_zero, 868 emit_bcond(MIPS_COND_EQ, r_X, r_zero,
953 b_imm(prog->len, ctx), ctx); 869 b_imm(prog->len, ctx), ctx);
954 emit_load_imm(r_val, 0, ctx); /* delay slot */ 870 emit_load_imm(r_ret, 0, ctx); /* delay slot */
955 emit_mod(r_A, r_X, ctx); 871 emit_mod(r_A, r_X, ctx);
956 break; 872 break;
957 case BPF_ALU | BPF_OR | BPF_K: 873 case BPF_ALU | BPF_OR | BPF_K:
diff --git a/arch/mips/net/bpf_jit.h b/arch/mips/net/bpf_jit.h
index 3afa7a6d81b3..8f9f54841123 100644
--- a/arch/mips/net/bpf_jit.h
+++ b/arch/mips/net/bpf_jit.h
@@ -15,7 +15,6 @@
15/* Registers used by JIT */ 15/* Registers used by JIT */
16#define MIPS_R_ZERO 0 16#define MIPS_R_ZERO 0
17#define MIPS_R_V0 2 17#define MIPS_R_V0 2
18#define MIPS_R_V1 3
19#define MIPS_R_A0 4 18#define MIPS_R_A0 4
20#define MIPS_R_A1 5 19#define MIPS_R_A1 5
21#define MIPS_R_T4 12 20#define MIPS_R_T4 12
@@ -43,20 +42,6 @@
43#define MIPS_COND_X (0x1 << 5) 42#define MIPS_COND_X (0x1 << 5)
44#define MIPS_COND_K (0x1 << 6) 43#define MIPS_COND_K (0x1 << 6)
45 44
46/* ABI specific return values */
47#ifdef CONFIG_32BIT /* O32 */
48#ifdef CONFIG_CPU_LITTLE_ENDIAN
49#define r_err MIPS_R_V1
50#define r_val MIPS_R_V0
51#else /* CONFIG_CPU_LITTLE_ENDIAN */
52#define r_err MIPS_R_V0
53#define r_val MIPS_R_V1
54#endif
55#else /* N64 */
56#define r_err MIPS_R_V0
57#define r_val MIPS_R_V0
58#endif
59
60#define r_ret MIPS_R_V0 45#define r_ret MIPS_R_V0
61 46
62/* 47/*
@@ -65,11 +50,14 @@
65 * any of the $s0-$s6 registers will only be preserved if 50 * any of the $s0-$s6 registers will only be preserved if
66 * they are going to actually be used. 51 * they are going to actually be used.
67 */ 52 */
53#define r_skb_hl MIPS_R_S0 /* skb header length */
54#define r_skb_data MIPS_R_S1 /* skb actual data */
68#define r_off MIPS_R_S2 55#define r_off MIPS_R_S2
69#define r_A MIPS_R_S3 56#define r_A MIPS_R_S3
70#define r_X MIPS_R_S4 57#define r_X MIPS_R_S4
71#define r_skb MIPS_R_S5 58#define r_skb MIPS_R_S5
72#define r_M MIPS_R_S6 59#define r_M MIPS_R_S6
60#define r_skb_len MIPS_R_S7
73#define r_s0 MIPS_R_T4 /* scratch reg 1 */ 61#define r_s0 MIPS_R_T4 /* scratch reg 1 */
74#define r_s1 MIPS_R_T5 /* scratch reg 2 */ 62#define r_s1 MIPS_R_T5 /* scratch reg 2 */
75#define r_tmp_imm MIPS_R_T6 /* No need to preserve this */ 63#define r_tmp_imm MIPS_R_T6 /* No need to preserve this */
@@ -78,4 +66,19 @@
78#define r_sp MIPS_R_SP 66#define r_sp MIPS_R_SP
79#define r_ra MIPS_R_RA 67#define r_ra MIPS_R_RA
80 68
69#ifndef __ASSEMBLY__
70
71/* Declare ASM helpers */
72
73#define DECLARE_LOAD_FUNC(func) \
74 extern u8 func(unsigned long *skb, int offset); \
75 extern u8 func##_negative(unsigned long *skb, int offset); \
76 extern u8 func##_positive(unsigned long *skb, int offset)
77
78DECLARE_LOAD_FUNC(sk_load_word);
79DECLARE_LOAD_FUNC(sk_load_half);
80DECLARE_LOAD_FUNC(sk_load_byte);
81
82#endif
83
81#endif /* BPF_JIT_MIPS_OP_H */ 84#endif /* BPF_JIT_MIPS_OP_H */
diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S
new file mode 100644
index 000000000000..e92726099be0
--- /dev/null
+++ b/arch/mips/net/bpf_jit_asm.S
@@ -0,0 +1,238 @@
1/*
2 * bpf_jib_asm.S: Packet/header access helper functions for MIPS/MIPS64 BPF
3 * compiler.
4 *
5 * Copyright (C) 2015 Imagination Technologies Ltd.
6 * Author: Markos Chandras <markos.chandras@imgtec.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; version 2 of the License.
11 */
12
13#include <asm/asm.h>
14#include <asm/regdef.h>
15#include "bpf_jit.h"
16
17/* ABI
18 *
19 * r_skb_hl skb header length
20 * r_skb_data skb data
21 * r_off(a1) offset register
22 * r_A BPF register A
23 * r_X PF register X
24 * r_skb(a0) *skb
25 * r_M *scratch memory
26 * r_skb_le skb length
27 * r_s0 Scratch register 0
28 * r_s1 Scratch register 1
29 *
30 * On entry:
31 * a0: *skb
32 * a1: offset (imm or imm + X)
33 *
34 * All non-BPF-ABI registers are free for use. On return, we only
35 * care about r_ret. The BPF-ABI registers are assumed to remain
36 * unmodified during the entire filter operation.
37 */
38
39#define skb a0
40#define offset a1
41#define SKF_LL_OFF (-0x200000) /* Can't include linux/filter.h in assembly */
42
43 /* We know better :) so prevent assembler reordering etc */
44 .set noreorder
45
46#define is_offset_negative(TYPE) \
47 /* If offset is negative we have more work to do */ \
48 slti t0, offset, 0; \
49 bgtz t0, bpf_slow_path_##TYPE##_neg; \
50 /* Be careful what follows in DS. */
51
52#define is_offset_in_header(SIZE, TYPE) \
53 /* Reading from header? */ \
54 addiu $r_s0, $r_skb_hl, -SIZE; \
55 slt t0, $r_s0, offset; \
56 bgtz t0, bpf_slow_path_##TYPE; \
57
58LEAF(sk_load_word)
59 is_offset_negative(word)
60 .globl sk_load_word_positive
61sk_load_word_positive:
62 is_offset_in_header(4, word)
63 /* Offset within header boundaries */
64 PTR_ADDU t1, $r_skb_data, offset
65 lw $r_A, 0(t1)
66#ifdef CONFIG_CPU_LITTLE_ENDIAN
67 wsbh t0, $r_A
68 rotr $r_A, t0, 16
69#endif
70 jr $r_ra
71 move $r_ret, zero
72 END(sk_load_word)
73
74LEAF(sk_load_half)
75 is_offset_negative(half)
76 .globl sk_load_half_positive
77sk_load_half_positive:
78 is_offset_in_header(2, half)
79 /* Offset within header boundaries */
80 PTR_ADDU t1, $r_skb_data, offset
81 lh $r_A, 0(t1)
82#ifdef CONFIG_CPU_LITTLE_ENDIAN
83 wsbh t0, $r_A
84 seh $r_A, t0
85#endif
86 jr $r_ra
87 move $r_ret, zero
88 END(sk_load_half)
89
90LEAF(sk_load_byte)
91 is_offset_negative(byte)
92 .globl sk_load_byte_positive
93sk_load_byte_positive:
94 is_offset_in_header(1, byte)
95 /* Offset within header boundaries */
96 PTR_ADDU t1, $r_skb_data, offset
97 lb $r_A, 0(t1)
98 jr $r_ra
99 move $r_ret, zero
100 END(sk_load_byte)
101
102/*
103 * call skb_copy_bits:
104 * (prototype in linux/skbuff.h)
105 *
106 * int skb_copy_bits(sk_buff *skb, int offset, void *to, int len)
107 *
108 * o32 mandates we leave 4 spaces for argument registers in case
109 * the callee needs to use them. Even though we don't care about
110 * the argument registers ourselves, we need to allocate that space
111 * to remain ABI compliant since the callee may want to use that space.
112 * We also allocate 2 more spaces for $r_ra and our return register (*to).
113 *
114 * n64 is a bit different. The *caller* will allocate the space to preserve
115 * the arguments. So in 64-bit kernels, we allocate the 4-arg space for no
116 * good reason but it does not matter that much really.
117 *
118 * (void *to) is returned in r_s0
119 *
120 */
121#define bpf_slow_path_common(SIZE) \
122 /* Quick check. Are we within reasonable boundaries? */ \
123 LONG_ADDIU $r_s1, $r_skb_len, -SIZE; \
124 sltu $r_s0, offset, $r_s1; \
125 beqz $r_s0, fault; \
126 /* Load 4th argument in DS */ \
127 LONG_ADDIU a3, zero, SIZE; \
128 PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \
129 PTR_LA t0, skb_copy_bits; \
130 PTR_S $r_ra, (5 * SZREG)($r_sp); \
131 /* Assign low slot to a2 */ \
132 move a2, $r_sp; \
133 jalr t0; \
134 /* Reset our destination slot (DS but it's ok) */ \
135 INT_S zero, (4 * SZREG)($r_sp); \
136 /* \
137 * skb_copy_bits returns 0 on success and -EFAULT \
138 * on error. Our data live in a2. Do not bother with \
139 * our data if an error has been returned. \
140 */ \
141 /* Restore our frame */ \
142 PTR_L $r_ra, (5 * SZREG)($r_sp); \
143 INT_L $r_s0, (4 * SZREG)($r_sp); \
144 bltz v0, fault; \
145 PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \
146 move $r_ret, zero; \
147
148NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp)
149 bpf_slow_path_common(4)
150#ifdef CONFIG_CPU_LITTLE_ENDIAN
151 wsbh t0, $r_s0
152 jr $r_ra
153 rotr $r_A, t0, 16
154#endif
155 jr $r_ra
156 move $r_A, $r_s0
157
158 END(bpf_slow_path_word)
159
160NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp)
161 bpf_slow_path_common(2)
162#ifdef CONFIG_CPU_LITTLE_ENDIAN
163 jr $r_ra
164 wsbh $r_A, $r_s0
165#endif
166 jr $r_ra
167 move $r_A, $r_s0
168
169 END(bpf_slow_path_half)
170
171NESTED(bpf_slow_path_byte, (6 * SZREG), $r_sp)
172 bpf_slow_path_common(1)
173 jr $r_ra
174 move $r_A, $r_s0
175
176 END(bpf_slow_path_byte)
177
178/*
179 * Negative entry points
180 */
181 .macro bpf_is_end_of_data
182 li t0, SKF_LL_OFF
183 /* Reading link layer data? */
184 slt t1, offset, t0
185 bgtz t1, fault
186 /* Be careful what follows in DS. */
187 .endm
188/*
189 * call skb_copy_bits:
190 * (prototype in linux/filter.h)
191 *
192 * void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb,
193 * int k, unsigned int size)
194 *
195 * see above (bpf_slow_path_common) for ABI restrictions
196 */
197#define bpf_negative_common(SIZE) \
198 PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \
199 PTR_LA t0, bpf_internal_load_pointer_neg_helper; \
200 PTR_S $r_ra, (5 * SZREG)($r_sp); \
201 jalr t0; \
202 li a2, SIZE; \
203 PTR_L $r_ra, (5 * SZREG)($r_sp); \
204 /* Check return pointer */ \
205 beqz v0, fault; \
206 PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \
207 /* Preserve our pointer */ \
208 move $r_s0, v0; \
209 /* Set return value */ \
210 move $r_ret, zero; \
211
212bpf_slow_path_word_neg:
213 bpf_is_end_of_data
214NESTED(sk_load_word_negative, (6 * SZREG), $r_sp)
215 bpf_negative_common(4)
216 jr $r_ra
217 lw $r_A, 0($r_s0)
218 END(sk_load_word_negative)
219
220bpf_slow_path_half_neg:
221 bpf_is_end_of_data
222NESTED(sk_load_half_negative, (6 * SZREG), $r_sp)
223 bpf_negative_common(2)
224 jr $r_ra
225 lhu $r_A, 0($r_s0)
226 END(sk_load_half_negative)
227
228bpf_slow_path_byte_neg:
229 bpf_is_end_of_data
230NESTED(sk_load_byte_negative, (6 * SZREG), $r_sp)
231 bpf_negative_common(1)
232 jr $r_ra
233 lbu $r_A, 0($r_s0)
234 END(sk_load_byte_negative)
235
236fault:
237 jr $r_ra
238 addiu $r_ret, zero, 1