diff options
author | Max Filippov <jcmvbkbc@gmail.com> | 2013-07-14 18:02:24 -0400 |
---|---|---|
committer | Chris Zankel <chris@zankel.net> | 2013-09-06 12:48:12 -0400 |
commit | fff96d69f2af28f3243d1349341b6305f318f5a4 (patch) | |
tree | 68a2a0bf65ca705cfbcad147520f40cf5aef84f1 | |
parent | 99d5040ebc3cccc90dfe031f615ac3fbc79905b6 (diff) |
xtensa: new fast_alloca handler
Instead of emulating movsp instruction in the kernel use window
underflow handler to load missing register window and retry failed
movsp.
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Chris Zankel <chris@zankel.net>
-rw-r--r-- | arch/xtensa/kernel/entry.S | 192 |
1 files changed, 40 insertions, 152 deletions
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index ab025c1f6e23..de1dfa18d0a1 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S | |||
@@ -31,7 +31,6 @@ | |||
31 | /* Unimplemented features. */ | 31 | /* Unimplemented features. */ |
32 | 32 | ||
33 | #undef KERNEL_STACK_OVERFLOW_CHECK | 33 | #undef KERNEL_STACK_OVERFLOW_CHECK |
34 | #undef ALLOCA_EXCEPTION_IN_IRAM | ||
35 | 34 | ||
36 | /* Not well tested. | 35 | /* Not well tested. |
37 | * | 36 | * |
@@ -819,11 +818,27 @@ ENDPROC(unrecoverable_exception) | |||
819 | * | 818 | * |
820 | * The ALLOCA handler is entered when user code executes the MOVSP | 819 | * The ALLOCA handler is entered when user code executes the MOVSP |
821 | * instruction and the caller's frame is not in the register file. | 820 | * instruction and the caller's frame is not in the register file. |
822 | * In this case, the caller frame's a0..a3 are on the stack just | ||
823 | * below sp (a1), and this handler moves them. | ||
824 | * | 821 | * |
825 | * For "MOVSP <ar>,<as>" without destination register a1, this routine | 822 | * This algorithm was taken from the Ross Morley's RTOS Porting Layer: |
826 | * simply moves the value from <as> to <ar> without moving the save area. | 823 | * |
824 | * /home/ross/rtos/porting/XtensaRTOS-PortingLayer-20090507/xtensa_vectors.S | ||
825 | * | ||
826 | * It leverages the existing window spill/fill routines and their support for | ||
827 | * double exceptions. The 'movsp' instruction will only cause an exception if | ||
828 | * the next window needs to be loaded. In fact this ALLOCA exception may be | ||
829 | * replaced at some point by changing the hardware to do a underflow exception | ||
830 | * of the proper size instead. | ||
831 | * | ||
832 | * This algorithm simply backs out the register changes started by the user | ||
833 | * excpetion handler, makes it appear that we have started a window underflow | ||
834 | * by rotating the window back and then setting the old window base (OWB) in | ||
835 | * the 'ps' register with the rolled back window base. The 'movsp' instruction | ||
836 | * will be re-executed and this time since the next window frames is in the | ||
837 | * active AR registers it won't cause an exception. | ||
838 | * | ||
839 | * If the WindowUnderflow code gets a TLB miss the page will get mapped | ||
840 | * the the partial windeowUnderflow will be handeled in the double exception | ||
841 | * handler. | ||
827 | * | 842 | * |
828 | * Entry condition: | 843 | * Entry condition: |
829 | * | 844 | * |
@@ -838,155 +853,28 @@ ENDPROC(unrecoverable_exception) | |||
838 | * < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception | 853 | * < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception |
839 | */ | 854 | */ |
840 | 855 | ||
841 | #if XCHAL_HAVE_BE | ||
842 | #define _EXTUI_MOVSP_SRC(ar) extui ar, ar, 4, 4 | ||
843 | #define _EXTUI_MOVSP_DST(ar) extui ar, ar, 0, 4 | ||
844 | #else | ||
845 | #define _EXTUI_MOVSP_SRC(ar) extui ar, ar, 0, 4 | ||
846 | #define _EXTUI_MOVSP_DST(ar) extui ar, ar, 4, 4 | ||
847 | #endif | ||
848 | |||
849 | ENTRY(fast_alloca) | 856 | ENTRY(fast_alloca) |
857 | rsr a0, windowbase | ||
858 | rotw -1 | ||
859 | rsr a2, ps | ||
860 | extui a3, a2, PS_OWB_SHIFT, PS_OWB_WIDTH | ||
861 | xor a3, a3, a4 | ||
862 | l32i a4, a6, PT_AREG0 | ||
863 | l32i a1, a6, PT_DEPC | ||
864 | rsr a6, depc | ||
865 | wsr a1, depc | ||
866 | slli a3, a3, PS_OWB_SHIFT | ||
867 | xor a2, a2, a3 | ||
868 | wsr a2, ps | ||
869 | rsync | ||
850 | 870 | ||
851 | /* We shouldn't be in a double exception. */ | 871 | _bbci.l a4, 31, 4f |
852 | 872 | rotw -1 | |
853 | l32i a0, a2, PT_DEPC | 873 | _bbci.l a8, 30, 8f |
854 | _bgeui a0, VALID_DOUBLE_EXCEPTION_ADDRESS, .Lunhandled_double | 874 | rotw -1 |
855 | 875 | j _WindowUnderflow12 | |
856 | rsr a0, depc # get a2 | 876 | 8: j _WindowUnderflow8 |
857 | s32i a4, a2, PT_AREG4 # save a4 and | 877 | 4: j _WindowUnderflow4 |
858 | s32i a3, a2, PT_AREG3 | ||
859 | s32i a0, a2, PT_AREG2 # a2 to stack | ||
860 | |||
861 | /* Exit critical section. */ | ||
862 | |||
863 | movi a0, 0 | ||
864 | rsr a3, excsave1 | ||
865 | s32i a0, a3, EXC_TABLE_FIXUP | ||
866 | |||
867 | rsr a4, epc1 # get exception address | ||
868 | |||
869 | #ifdef ALLOCA_EXCEPTION_IN_IRAM | ||
870 | #error iram not supported | ||
871 | #else | ||
872 | /* Note: l8ui not allowed in IRAM/IROM!! */ | ||
873 | l8ui a0, a4, 1 # read as(src) from MOVSP instruction | ||
874 | #endif | ||
875 | movi a3, .Lmovsp_src | ||
876 | _EXTUI_MOVSP_SRC(a0) # extract source register number | ||
877 | addx8 a3, a0, a3 | ||
878 | jx a3 | ||
879 | |||
880 | .Lunhandled_double: | ||
881 | wsr a0, excsave1 | ||
882 | movi a0, unrecoverable_exception | ||
883 | callx0 a0 | ||
884 | |||
885 | .align 8 | ||
886 | .Lmovsp_src: | ||
887 | l32i a3, a2, PT_AREG0; _j 1f; .align 8 | ||
888 | mov a3, a1; _j 1f; .align 8 | ||
889 | l32i a3, a2, PT_AREG2; _j 1f; .align 8 | ||
890 | l32i a3, a2, PT_AREG3; _j 1f; .align 8 | ||
891 | l32i a3, a2, PT_AREG4; _j 1f; .align 8 | ||
892 | mov a3, a5; _j 1f; .align 8 | ||
893 | mov a3, a6; _j 1f; .align 8 | ||
894 | mov a3, a7; _j 1f; .align 8 | ||
895 | mov a3, a8; _j 1f; .align 8 | ||
896 | mov a3, a9; _j 1f; .align 8 | ||
897 | mov a3, a10; _j 1f; .align 8 | ||
898 | mov a3, a11; _j 1f; .align 8 | ||
899 | mov a3, a12; _j 1f; .align 8 | ||
900 | mov a3, a13; _j 1f; .align 8 | ||
901 | mov a3, a14; _j 1f; .align 8 | ||
902 | mov a3, a15; _j 1f; .align 8 | ||
903 | |||
904 | 1: | ||
905 | |||
906 | #ifdef ALLOCA_EXCEPTION_IN_IRAM | ||
907 | #error iram not supported | ||
908 | #else | ||
909 | l8ui a0, a4, 0 # read ar(dst) from MOVSP instruction | ||
910 | #endif | ||
911 | addi a4, a4, 3 # step over movsp | ||
912 | _EXTUI_MOVSP_DST(a0) # extract destination register | ||
913 | wsr a4, epc1 # save new epc_1 | ||
914 | |||
915 | _bnei a0, 1, 1f # no 'movsp a1, ax': jump | ||
916 | |||
917 | /* Move the save area. This implies the use of the L32E | ||
918 | * and S32E instructions, because this move must be done with | ||
919 | * the user's PS.RING privilege levels, not with ring 0 | ||
920 | * (kernel's) privileges currently active with PS.EXCM | ||
921 | * set. Note that we have stil registered a fixup routine with the | ||
922 | * double exception vector in case a double exception occurs. | ||
923 | */ | ||
924 | |||
925 | /* a0,a4:avail a1:old user stack a2:exc. stack a3:new user stack. */ | ||
926 | |||
927 | l32e a0, a1, -16 | ||
928 | l32e a4, a1, -12 | ||
929 | s32e a0, a3, -16 | ||
930 | s32e a4, a3, -12 | ||
931 | l32e a0, a1, -8 | ||
932 | l32e a4, a1, -4 | ||
933 | s32e a0, a3, -8 | ||
934 | s32e a4, a3, -4 | ||
935 | |||
936 | /* Restore stack-pointer and all the other saved registers. */ | ||
937 | |||
938 | mov a1, a3 | ||
939 | |||
940 | l32i a4, a2, PT_AREG4 | ||
941 | l32i a3, a2, PT_AREG3 | ||
942 | l32i a0, a2, PT_AREG0 | ||
943 | l32i a2, a2, PT_AREG2 | ||
944 | rfe | ||
945 | |||
946 | /* MOVSP <at>,<as> was invoked with <at> != a1. | ||
947 | * Because the stack pointer is not being modified, | ||
948 | * we should be able to just modify the pointer | ||
949 | * without moving any save area. | ||
950 | * The processor only traps these occurrences if the | ||
951 | * caller window isn't live, so unfortunately we can't | ||
952 | * use this as an alternate trap mechanism. | ||
953 | * So we just do the move. This requires that we | ||
954 | * resolve the destination register, not just the source, | ||
955 | * so there's some extra work. | ||
956 | * (PERHAPS NOT REALLY NEEDED, BUT CLEANER...) | ||
957 | */ | ||
958 | |||
959 | /* a0 dst-reg, a1 user-stack, a2 stack, a3 value of src reg. */ | ||
960 | |||
961 | 1: movi a4, .Lmovsp_dst | ||
962 | addx8 a4, a0, a4 | ||
963 | jx a4 | ||
964 | |||
965 | .align 8 | ||
966 | .Lmovsp_dst: | ||
967 | s32i a3, a2, PT_AREG0; _j 1f; .align 8 | ||
968 | mov a1, a3; _j 1f; .align 8 | ||
969 | s32i a3, a2, PT_AREG2; _j 1f; .align 8 | ||
970 | s32i a3, a2, PT_AREG3; _j 1f; .align 8 | ||
971 | s32i a3, a2, PT_AREG4; _j 1f; .align 8 | ||
972 | mov a5, a3; _j 1f; .align 8 | ||
973 | mov a6, a3; _j 1f; .align 8 | ||
974 | mov a7, a3; _j 1f; .align 8 | ||
975 | mov a8, a3; _j 1f; .align 8 | ||
976 | mov a9, a3; _j 1f; .align 8 | ||
977 | mov a10, a3; _j 1f; .align 8 | ||
978 | mov a11, a3; _j 1f; .align 8 | ||
979 | mov a12, a3; _j 1f; .align 8 | ||
980 | mov a13, a3; _j 1f; .align 8 | ||
981 | mov a14, a3; _j 1f; .align 8 | ||
982 | mov a15, a3; _j 1f; .align 8 | ||
983 | |||
984 | 1: l32i a4, a2, PT_AREG4 | ||
985 | l32i a3, a2, PT_AREG3 | ||
986 | l32i a0, a2, PT_AREG0 | ||
987 | l32i a2, a2, PT_AREG2 | ||
988 | rfe | ||
989 | |||
990 | ENDPROC(fast_alloca) | 878 | ENDPROC(fast_alloca) |
991 | 879 | ||
992 | /* | 880 | /* |