diff options
| author | Max Filippov <jcmvbkbc@gmail.com> | 2013-07-14 18:02:24 -0400 |
|---|---|---|
| committer | Chris Zankel <chris@zankel.net> | 2013-09-06 12:48:12 -0400 |
| commit | fff96d69f2af28f3243d1349341b6305f318f5a4 (patch) | |
| tree | 68a2a0bf65ca705cfbcad147520f40cf5aef84f1 | |
| parent | 99d5040ebc3cccc90dfe031f615ac3fbc79905b6 (diff) | |
xtensa: new fast_alloca handler
Instead of emulating movsp instruction in the kernel use window
underflow handler to load missing register window and retry failed
movsp.
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Chris Zankel <chris@zankel.net>
| -rw-r--r-- | arch/xtensa/kernel/entry.S | 192 |
1 files changed, 40 insertions, 152 deletions
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index ab025c1f6e23..de1dfa18d0a1 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S | |||
| @@ -31,7 +31,6 @@ | |||
| 31 | /* Unimplemented features. */ | 31 | /* Unimplemented features. */ |
| 32 | 32 | ||
| 33 | #undef KERNEL_STACK_OVERFLOW_CHECK | 33 | #undef KERNEL_STACK_OVERFLOW_CHECK |
| 34 | #undef ALLOCA_EXCEPTION_IN_IRAM | ||
| 35 | 34 | ||
| 36 | /* Not well tested. | 35 | /* Not well tested. |
| 37 | * | 36 | * |
| @@ -819,11 +818,27 @@ ENDPROC(unrecoverable_exception) | |||
| 819 | * | 818 | * |
| 820 | * The ALLOCA handler is entered when user code executes the MOVSP | 819 | * The ALLOCA handler is entered when user code executes the MOVSP |
| 821 | * instruction and the caller's frame is not in the register file. | 820 | * instruction and the caller's frame is not in the register file. |
| 822 | * In this case, the caller frame's a0..a3 are on the stack just | ||
| 823 | * below sp (a1), and this handler moves them. | ||
| 824 | * | 821 | * |
| 825 | * For "MOVSP <ar>,<as>" without destination register a1, this routine | 822 | * This algorithm was taken from the Ross Morley's RTOS Porting Layer: |
| 826 | * simply moves the value from <as> to <ar> without moving the save area. | 823 | * |
| 824 | * /home/ross/rtos/porting/XtensaRTOS-PortingLayer-20090507/xtensa_vectors.S | ||
| 825 | * | ||
| 826 | * It leverages the existing window spill/fill routines and their support for | ||
| 827 | * double exceptions. The 'movsp' instruction will only cause an exception if | ||
| 828 | * the next window needs to be loaded. In fact this ALLOCA exception may be | ||
| 829 | * replaced at some point by changing the hardware to do a underflow exception | ||
| 830 | * of the proper size instead. | ||
| 831 | * | ||
| 832 | * This algorithm simply backs out the register changes started by the user | ||
| 833 | * excpetion handler, makes it appear that we have started a window underflow | ||
| 834 | * by rotating the window back and then setting the old window base (OWB) in | ||
| 835 | * the 'ps' register with the rolled back window base. The 'movsp' instruction | ||
| 836 | * will be re-executed and this time since the next window frames is in the | ||
| 837 | * active AR registers it won't cause an exception. | ||
| 838 | * | ||
| 839 | * If the WindowUnderflow code gets a TLB miss the page will get mapped | ||
| 840 | * the the partial windeowUnderflow will be handeled in the double exception | ||
| 841 | * handler. | ||
| 827 | * | 842 | * |
| 828 | * Entry condition: | 843 | * Entry condition: |
| 829 | * | 844 | * |
| @@ -838,155 +853,28 @@ ENDPROC(unrecoverable_exception) | |||
| 838 | * < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception | 853 | * < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception |
| 839 | */ | 854 | */ |
| 840 | 855 | ||
| 841 | #if XCHAL_HAVE_BE | ||
| 842 | #define _EXTUI_MOVSP_SRC(ar) extui ar, ar, 4, 4 | ||
| 843 | #define _EXTUI_MOVSP_DST(ar) extui ar, ar, 0, 4 | ||
| 844 | #else | ||
| 845 | #define _EXTUI_MOVSP_SRC(ar) extui ar, ar, 0, 4 | ||
| 846 | #define _EXTUI_MOVSP_DST(ar) extui ar, ar, 4, 4 | ||
| 847 | #endif | ||
| 848 | |||
| 849 | ENTRY(fast_alloca) | 856 | ENTRY(fast_alloca) |
| 857 | rsr a0, windowbase | ||
| 858 | rotw -1 | ||
| 859 | rsr a2, ps | ||
| 860 | extui a3, a2, PS_OWB_SHIFT, PS_OWB_WIDTH | ||
| 861 | xor a3, a3, a4 | ||
| 862 | l32i a4, a6, PT_AREG0 | ||
| 863 | l32i a1, a6, PT_DEPC | ||
| 864 | rsr a6, depc | ||
| 865 | wsr a1, depc | ||
| 866 | slli a3, a3, PS_OWB_SHIFT | ||
| 867 | xor a2, a2, a3 | ||
| 868 | wsr a2, ps | ||
| 869 | rsync | ||
| 850 | 870 | ||
| 851 | /* We shouldn't be in a double exception. */ | 871 | _bbci.l a4, 31, 4f |
| 852 | 872 | rotw -1 | |
| 853 | l32i a0, a2, PT_DEPC | 873 | _bbci.l a8, 30, 8f |
| 854 | _bgeui a0, VALID_DOUBLE_EXCEPTION_ADDRESS, .Lunhandled_double | 874 | rotw -1 |
| 855 | 875 | j _WindowUnderflow12 | |
| 856 | rsr a0, depc # get a2 | 876 | 8: j _WindowUnderflow8 |
| 857 | s32i a4, a2, PT_AREG4 # save a4 and | 877 | 4: j _WindowUnderflow4 |
| 858 | s32i a3, a2, PT_AREG3 | ||
| 859 | s32i a0, a2, PT_AREG2 # a2 to stack | ||
| 860 | |||
| 861 | /* Exit critical section. */ | ||
| 862 | |||
| 863 | movi a0, 0 | ||
| 864 | rsr a3, excsave1 | ||
| 865 | s32i a0, a3, EXC_TABLE_FIXUP | ||
| 866 | |||
| 867 | rsr a4, epc1 # get exception address | ||
| 868 | |||
| 869 | #ifdef ALLOCA_EXCEPTION_IN_IRAM | ||
| 870 | #error iram not supported | ||
| 871 | #else | ||
| 872 | /* Note: l8ui not allowed in IRAM/IROM!! */ | ||
| 873 | l8ui a0, a4, 1 # read as(src) from MOVSP instruction | ||
| 874 | #endif | ||
| 875 | movi a3, .Lmovsp_src | ||
| 876 | _EXTUI_MOVSP_SRC(a0) # extract source register number | ||
| 877 | addx8 a3, a0, a3 | ||
| 878 | jx a3 | ||
| 879 | |||
| 880 | .Lunhandled_double: | ||
| 881 | wsr a0, excsave1 | ||
| 882 | movi a0, unrecoverable_exception | ||
| 883 | callx0 a0 | ||
| 884 | |||
| 885 | .align 8 | ||
| 886 | .Lmovsp_src: | ||
| 887 | l32i a3, a2, PT_AREG0; _j 1f; .align 8 | ||
| 888 | mov a3, a1; _j 1f; .align 8 | ||
| 889 | l32i a3, a2, PT_AREG2; _j 1f; .align 8 | ||
| 890 | l32i a3, a2, PT_AREG3; _j 1f; .align 8 | ||
| 891 | l32i a3, a2, PT_AREG4; _j 1f; .align 8 | ||
| 892 | mov a3, a5; _j 1f; .align 8 | ||
| 893 | mov a3, a6; _j 1f; .align 8 | ||
| 894 | mov a3, a7; _j 1f; .align 8 | ||
| 895 | mov a3, a8; _j 1f; .align 8 | ||
| 896 | mov a3, a9; _j 1f; .align 8 | ||
| 897 | mov a3, a10; _j 1f; .align 8 | ||
| 898 | mov a3, a11; _j 1f; .align 8 | ||
| 899 | mov a3, a12; _j 1f; .align 8 | ||
| 900 | mov a3, a13; _j 1f; .align 8 | ||
| 901 | mov a3, a14; _j 1f; .align 8 | ||
| 902 | mov a3, a15; _j 1f; .align 8 | ||
| 903 | |||
| 904 | 1: | ||
| 905 | |||
| 906 | #ifdef ALLOCA_EXCEPTION_IN_IRAM | ||
| 907 | #error iram not supported | ||
| 908 | #else | ||
| 909 | l8ui a0, a4, 0 # read ar(dst) from MOVSP instruction | ||
| 910 | #endif | ||
| 911 | addi a4, a4, 3 # step over movsp | ||
| 912 | _EXTUI_MOVSP_DST(a0) # extract destination register | ||
| 913 | wsr a4, epc1 # save new epc_1 | ||
| 914 | |||
| 915 | _bnei a0, 1, 1f # no 'movsp a1, ax': jump | ||
| 916 | |||
| 917 | /* Move the save area. This implies the use of the L32E | ||
| 918 | * and S32E instructions, because this move must be done with | ||
| 919 | * the user's PS.RING privilege levels, not with ring 0 | ||
| 920 | * (kernel's) privileges currently active with PS.EXCM | ||
| 921 | * set. Note that we have stil registered a fixup routine with the | ||
| 922 | * double exception vector in case a double exception occurs. | ||
| 923 | */ | ||
| 924 | |||
| 925 | /* a0,a4:avail a1:old user stack a2:exc. stack a3:new user stack. */ | ||
| 926 | |||
| 927 | l32e a0, a1, -16 | ||
| 928 | l32e a4, a1, -12 | ||
| 929 | s32e a0, a3, -16 | ||
| 930 | s32e a4, a3, -12 | ||
| 931 | l32e a0, a1, -8 | ||
| 932 | l32e a4, a1, -4 | ||
| 933 | s32e a0, a3, -8 | ||
| 934 | s32e a4, a3, -4 | ||
| 935 | |||
| 936 | /* Restore stack-pointer and all the other saved registers. */ | ||
| 937 | |||
| 938 | mov a1, a3 | ||
| 939 | |||
| 940 | l32i a4, a2, PT_AREG4 | ||
| 941 | l32i a3, a2, PT_AREG3 | ||
| 942 | l32i a0, a2, PT_AREG0 | ||
| 943 | l32i a2, a2, PT_AREG2 | ||
| 944 | rfe | ||
| 945 | |||
| 946 | /* MOVSP <at>,<as> was invoked with <at> != a1. | ||
| 947 | * Because the stack pointer is not being modified, | ||
| 948 | * we should be able to just modify the pointer | ||
| 949 | * without moving any save area. | ||
| 950 | * The processor only traps these occurrences if the | ||
| 951 | * caller window isn't live, so unfortunately we can't | ||
| 952 | * use this as an alternate trap mechanism. | ||
| 953 | * So we just do the move. This requires that we | ||
| 954 | * resolve the destination register, not just the source, | ||
| 955 | * so there's some extra work. | ||
| 956 | * (PERHAPS NOT REALLY NEEDED, BUT CLEANER...) | ||
| 957 | */ | ||
| 958 | |||
| 959 | /* a0 dst-reg, a1 user-stack, a2 stack, a3 value of src reg. */ | ||
| 960 | |||
| 961 | 1: movi a4, .Lmovsp_dst | ||
| 962 | addx8 a4, a0, a4 | ||
| 963 | jx a4 | ||
| 964 | |||
| 965 | .align 8 | ||
| 966 | .Lmovsp_dst: | ||
| 967 | s32i a3, a2, PT_AREG0; _j 1f; .align 8 | ||
| 968 | mov a1, a3; _j 1f; .align 8 | ||
| 969 | s32i a3, a2, PT_AREG2; _j 1f; .align 8 | ||
| 970 | s32i a3, a2, PT_AREG3; _j 1f; .align 8 | ||
| 971 | s32i a3, a2, PT_AREG4; _j 1f; .align 8 | ||
| 972 | mov a5, a3; _j 1f; .align 8 | ||
| 973 | mov a6, a3; _j 1f; .align 8 | ||
| 974 | mov a7, a3; _j 1f; .align 8 | ||
| 975 | mov a8, a3; _j 1f; .align 8 | ||
| 976 | mov a9, a3; _j 1f; .align 8 | ||
| 977 | mov a10, a3; _j 1f; .align 8 | ||
| 978 | mov a11, a3; _j 1f; .align 8 | ||
| 979 | mov a12, a3; _j 1f; .align 8 | ||
| 980 | mov a13, a3; _j 1f; .align 8 | ||
| 981 | mov a14, a3; _j 1f; .align 8 | ||
| 982 | mov a15, a3; _j 1f; .align 8 | ||
| 983 | |||
| 984 | 1: l32i a4, a2, PT_AREG4 | ||
| 985 | l32i a3, a2, PT_AREG3 | ||
| 986 | l32i a0, a2, PT_AREG0 | ||
| 987 | l32i a2, a2, PT_AREG2 | ||
| 988 | rfe | ||
| 989 | |||
| 990 | ENDPROC(fast_alloca) | 878 | ENDPROC(fast_alloca) |
| 991 | 879 | ||
| 992 | /* | 880 | /* |
