diff options
35 files changed, 768 insertions, 372 deletions
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index c199990e12b6..323a4df59a6c 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c | |||
@@ -27,14 +27,58 @@ | |||
27 | 27 | ||
28 | int bpf_jit_enable __read_mostly; | 28 | int bpf_jit_enable __read_mostly; |
29 | 29 | ||
30 | /* | ||
31 | * eBPF prog stack layout: | ||
32 | * | ||
33 | * high | ||
34 | * original ARM_SP => +-----+ | ||
35 | * | | callee saved registers | ||
36 | * +-----+ <= (BPF_FP + SCRATCH_SIZE) | ||
37 | * | ... | eBPF JIT scratch space | ||
38 | * eBPF fp register => +-----+ | ||
39 | * (BPF_FP) | ... | eBPF prog stack | ||
40 | * +-----+ | ||
41 | * |RSVD | JIT scratchpad | ||
42 | * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE) | ||
43 | * | | | ||
44 | * | ... | Function call stack | ||
45 | * | | | ||
46 | * +-----+ | ||
47 | * low | ||
48 | * | ||
49 | * The callee saved registers depends on whether frame pointers are enabled. | ||
50 | * With frame pointers (to be compliant with the ABI): | ||
51 | * | ||
52 | * high | ||
53 | * original ARM_SP => +------------------+ \ | ||
54 | * | pc | | | ||
55 | * current ARM_FP => +------------------+ } callee saved registers | ||
56 | * |r4-r8,r10,fp,ip,lr| | | ||
57 | * +------------------+ / | ||
58 | * low | ||
59 | * | ||
60 | * Without frame pointers: | ||
61 | * | ||
62 | * high | ||
63 | * original ARM_SP => +------------------+ | ||
64 | * | r4-r8,r10,fp,lr | callee saved registers | ||
65 | * current ARM_FP => +------------------+ | ||
66 | * low | ||
67 | * | ||
68 | * When popping registers off the stack at the end of a BPF function, we | ||
69 | * reference them via the current ARM_FP register. | ||
70 | */ | ||
71 | #define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \ | ||
72 | 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R10 | \ | ||
73 | 1 << ARM_FP) | ||
74 | #define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR) | ||
75 | #define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC) | ||
76 | |||
30 | #define STACK_OFFSET(k) (k) | 77 | #define STACK_OFFSET(k) (k) |
31 | #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */ | 78 | #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */ |
32 | #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */ | 79 | #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */ |
33 | #define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */ | 80 | #define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */ |
34 | 81 | ||
35 | /* Flags used for JIT optimization */ | ||
36 | #define SEEN_CALL (1 << 0) | ||
37 | |||
38 | #define FLAG_IMM_OVERFLOW (1 << 0) | 82 | #define FLAG_IMM_OVERFLOW (1 << 0) |
39 | 83 | ||
40 | /* | 84 | /* |
@@ -95,7 +139,6 @@ static const u8 bpf2a32[][2] = { | |||
95 | * idx : index of current last JITed instruction. | 139 | * idx : index of current last JITed instruction. |
96 | * prologue_bytes : bytes used in prologue. | 140 | * prologue_bytes : bytes used in prologue. |
97 | * epilogue_offset : offset of epilogue starting. | 141 | * epilogue_offset : offset of epilogue starting. |
98 | * seen : bit mask used for JIT optimization. | ||
99 | * offsets : array of eBPF instruction offsets in | 142 | * offsets : array of eBPF instruction offsets in |
100 | * JITed code. | 143 | * JITed code. |
101 | * target : final JITed code. | 144 | * target : final JITed code. |
@@ -110,7 +153,6 @@ struct jit_ctx { | |||
110 | unsigned int idx; | 153 | unsigned int idx; |
111 | unsigned int prologue_bytes; | 154 | unsigned int prologue_bytes; |
112 | unsigned int epilogue_offset; | 155 | unsigned int epilogue_offset; |
113 | u32 seen; | ||
114 | u32 flags; | 156 | u32 flags; |
115 | u32 *offsets; | 157 | u32 *offsets; |
116 | u32 *target; | 158 | u32 *target; |
@@ -179,8 +221,13 @@ static void jit_fill_hole(void *area, unsigned int size) | |||
179 | *ptr++ = __opcode_to_mem_arm(ARM_INST_UDF); | 221 | *ptr++ = __opcode_to_mem_arm(ARM_INST_UDF); |
180 | } | 222 | } |
181 | 223 | ||
182 | /* Stack must be multiples of 16 Bytes */ | 224 | #if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5) |
183 | #define STACK_ALIGN(sz) (((sz) + 3) & ~3) | 225 | /* EABI requires the stack to be aligned to 64-bit boundaries */ |
226 | #define STACK_ALIGNMENT 8 | ||
227 | #else | ||
228 | /* Stack must be aligned to 32-bit boundaries */ | ||
229 | #define STACK_ALIGNMENT 4 | ||
230 | #endif | ||
184 | 231 | ||
185 | /* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4, | 232 | /* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4, |
186 | * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9, | 233 | * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9, |
@@ -194,7 +241,7 @@ static void jit_fill_hole(void *area, unsigned int size) | |||
194 | + SCRATCH_SIZE + \ | 241 | + SCRATCH_SIZE + \ |
195 | + 4 /* extra for skb_copy_bits buffer */) | 242 | + 4 /* extra for skb_copy_bits buffer */) |
196 | 243 | ||
197 | #define STACK_SIZE STACK_ALIGN(_STACK_SIZE) | 244 | #define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT) |
198 | 245 | ||
199 | /* Get the offset of eBPF REGISTERs stored on scratch space. */ | 246 | /* Get the offset of eBPF REGISTERs stored on scratch space. */ |
200 | #define STACK_VAR(off) (STACK_SIZE-off-4) | 247 | #define STACK_VAR(off) (STACK_SIZE-off-4) |
@@ -285,16 +332,19 @@ static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx) | |||
285 | emit_mov_i_no8m(rd, val, ctx); | 332 | emit_mov_i_no8m(rd, val, ctx); |
286 | } | 333 | } |
287 | 334 | ||
288 | static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) | 335 | static void emit_bx_r(u8 tgt_reg, struct jit_ctx *ctx) |
289 | { | 336 | { |
290 | ctx->seen |= SEEN_CALL; | ||
291 | #if __LINUX_ARM_ARCH__ < 5 | ||
292 | emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); | ||
293 | |||
294 | if (elf_hwcap & HWCAP_THUMB) | 337 | if (elf_hwcap & HWCAP_THUMB) |
295 | emit(ARM_BX(tgt_reg), ctx); | 338 | emit(ARM_BX(tgt_reg), ctx); |
296 | else | 339 | else |
297 | emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx); | 340 | emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx); |
341 | } | ||
342 | |||
343 | static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) | ||
344 | { | ||
345 | #if __LINUX_ARM_ARCH__ < 5 | ||
346 | emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); | ||
347 | emit_bx_r(tgt_reg, ctx); | ||
298 | #else | 348 | #else |
299 | emit(ARM_BLX_R(tgt_reg), ctx); | 349 | emit(ARM_BLX_R(tgt_reg), ctx); |
300 | #endif | 350 | #endif |
@@ -354,7 +404,6 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) | |||
354 | } | 404 | } |
355 | 405 | ||
356 | /* Call appropriate function */ | 406 | /* Call appropriate function */ |
357 | ctx->seen |= SEEN_CALL; | ||
358 | emit_mov_i(ARM_IP, op == BPF_DIV ? | 407 | emit_mov_i(ARM_IP, op == BPF_DIV ? |
359 | (u32)jit_udiv32 : (u32)jit_mod32, ctx); | 408 | (u32)jit_udiv32 : (u32)jit_mod32, ctx); |
360 | emit_blx_r(ARM_IP, ctx); | 409 | emit_blx_r(ARM_IP, ctx); |
@@ -620,8 +669,6 @@ static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk, | |||
620 | /* Do LSH operation */ | 669 | /* Do LSH operation */ |
621 | emit(ARM_SUB_I(ARM_IP, rt, 32), ctx); | 670 | emit(ARM_SUB_I(ARM_IP, rt, 32), ctx); |
622 | emit(ARM_RSB_I(tmp2[0], rt, 32), ctx); | 671 | emit(ARM_RSB_I(tmp2[0], rt, 32), ctx); |
623 | /* As we are using ARM_LR */ | ||
624 | ctx->seen |= SEEN_CALL; | ||
625 | emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx); | 672 | emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx); |
626 | emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx); | 673 | emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx); |
627 | emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx); | 674 | emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx); |
@@ -656,8 +703,6 @@ static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk, | |||
656 | /* Do the ARSH operation */ | 703 | /* Do the ARSH operation */ |
657 | emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); | 704 | emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); |
658 | emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); | 705 | emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); |
659 | /* As we are using ARM_LR */ | ||
660 | ctx->seen |= SEEN_CALL; | ||
661 | emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); | 706 | emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); |
662 | emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); | 707 | emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); |
663 | _emit(ARM_COND_MI, ARM_B(0), ctx); | 708 | _emit(ARM_COND_MI, ARM_B(0), ctx); |
@@ -692,8 +737,6 @@ static inline void emit_a32_lsr_r64(const u8 dst[], const u8 src[], bool dstk, | |||
692 | /* Do LSH operation */ | 737 | /* Do LSH operation */ |
693 | emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); | 738 | emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); |
694 | emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); | 739 | emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); |
695 | /* As we are using ARM_LR */ | ||
696 | ctx->seen |= SEEN_CALL; | ||
697 | emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); | 740 | emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); |
698 | emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); | 741 | emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); |
699 | emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx); | 742 | emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx); |
@@ -828,8 +871,6 @@ static inline void emit_a32_mul_r64(const u8 dst[], const u8 src[], bool dstk, | |||
828 | /* Do Multiplication */ | 871 | /* Do Multiplication */ |
829 | emit(ARM_MUL(ARM_IP, rd, rn), ctx); | 872 | emit(ARM_MUL(ARM_IP, rd, rn), ctx); |
830 | emit(ARM_MUL(ARM_LR, rm, rt), ctx); | 873 | emit(ARM_MUL(ARM_LR, rm, rt), ctx); |
831 | /* As we are using ARM_LR */ | ||
832 | ctx->seen |= SEEN_CALL; | ||
833 | emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx); | 874 | emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx); |
834 | 875 | ||
835 | emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx); | 876 | emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx); |
@@ -872,33 +913,53 @@ static inline void emit_str_r(const u8 dst, const u8 src, bool dstk, | |||
872 | } | 913 | } |
873 | 914 | ||
874 | /* dst = *(size*)(src + off) */ | 915 | /* dst = *(size*)(src + off) */ |
875 | static inline void emit_ldx_r(const u8 dst, const u8 src, bool dstk, | 916 | static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk, |
876 | const s32 off, struct jit_ctx *ctx, const u8 sz){ | 917 | s32 off, struct jit_ctx *ctx, const u8 sz){ |
877 | const u8 *tmp = bpf2a32[TMP_REG_1]; | 918 | const u8 *tmp = bpf2a32[TMP_REG_1]; |
878 | u8 rd = dstk ? tmp[1] : dst; | 919 | const u8 *rd = dstk ? tmp : dst; |
879 | u8 rm = src; | 920 | u8 rm = src; |
921 | s32 off_max; | ||
880 | 922 | ||
881 | if (off) { | 923 | if (sz == BPF_H) |
924 | off_max = 0xff; | ||
925 | else | ||
926 | off_max = 0xfff; | ||
927 | |||
928 | if (off < 0 || off > off_max) { | ||
882 | emit_a32_mov_i(tmp[0], off, false, ctx); | 929 | emit_a32_mov_i(tmp[0], off, false, ctx); |
883 | emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); | 930 | emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); |
884 | rm = tmp[0]; | 931 | rm = tmp[0]; |
932 | off = 0; | ||
933 | } else if (rd[1] == rm) { | ||
934 | emit(ARM_MOV_R(tmp[0], rm), ctx); | ||
935 | rm = tmp[0]; | ||
885 | } | 936 | } |
886 | switch (sz) { | 937 | switch (sz) { |
887 | case BPF_W: | 938 | case BPF_B: |
888 | /* Load a Word */ | 939 | /* Load a Byte */ |
889 | emit(ARM_LDR_I(rd, rm, 0), ctx); | 940 | emit(ARM_LDRB_I(rd[1], rm, off), ctx); |
941 | emit_a32_mov_i(dst[0], 0, dstk, ctx); | ||
890 | break; | 942 | break; |
891 | case BPF_H: | 943 | case BPF_H: |
892 | /* Load a HalfWord */ | 944 | /* Load a HalfWord */ |
893 | emit(ARM_LDRH_I(rd, rm, 0), ctx); | 945 | emit(ARM_LDRH_I(rd[1], rm, off), ctx); |
946 | emit_a32_mov_i(dst[0], 0, dstk, ctx); | ||
894 | break; | 947 | break; |
895 | case BPF_B: | 948 | case BPF_W: |
896 | /* Load a Byte */ | 949 | /* Load a Word */ |
897 | emit(ARM_LDRB_I(rd, rm, 0), ctx); | 950 | emit(ARM_LDR_I(rd[1], rm, off), ctx); |
951 | emit_a32_mov_i(dst[0], 0, dstk, ctx); | ||
952 | break; | ||
953 | case BPF_DW: | ||
954 | /* Load a Double Word */ | ||
955 | emit(ARM_LDR_I(rd[1], rm, off), ctx); | ||
956 | emit(ARM_LDR_I(rd[0], rm, off + 4), ctx); | ||
898 | break; | 957 | break; |
899 | } | 958 | } |
900 | if (dstk) | 959 | if (dstk) |
901 | emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); | 960 | emit(ARM_STR_I(rd[1], ARM_SP, STACK_VAR(dst[1])), ctx); |
961 | if (dstk && sz == BPF_DW) | ||
962 | emit(ARM_STR_I(rd[0], ARM_SP, STACK_VAR(dst[0])), ctx); | ||
902 | } | 963 | } |
903 | 964 | ||
904 | /* Arithmatic Operation */ | 965 | /* Arithmatic Operation */ |
@@ -906,7 +967,6 @@ static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm, | |||
906 | const u8 rn, struct jit_ctx *ctx, u8 op) { | 967 | const u8 rn, struct jit_ctx *ctx, u8 op) { |
907 | switch (op) { | 968 | switch (op) { |
908 | case BPF_JSET: | 969 | case BPF_JSET: |
909 | ctx->seen |= SEEN_CALL; | ||
910 | emit(ARM_AND_R(ARM_IP, rt, rn), ctx); | 970 | emit(ARM_AND_R(ARM_IP, rt, rn), ctx); |
911 | emit(ARM_AND_R(ARM_LR, rd, rm), ctx); | 971 | emit(ARM_AND_R(ARM_LR, rd, rm), ctx); |
912 | emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx); | 972 | emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx); |
@@ -945,7 +1005,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) | |||
945 | const u8 *tcc = bpf2a32[TCALL_CNT]; | 1005 | const u8 *tcc = bpf2a32[TCALL_CNT]; |
946 | const int idx0 = ctx->idx; | 1006 | const int idx0 = ctx->idx; |
947 | #define cur_offset (ctx->idx - idx0) | 1007 | #define cur_offset (ctx->idx - idx0) |
948 | #define jmp_offset (out_offset - (cur_offset)) | 1008 | #define jmp_offset (out_offset - (cur_offset) - 2) |
949 | u32 off, lo, hi; | 1009 | u32 off, lo, hi; |
950 | 1010 | ||
951 | /* if (index >= array->map.max_entries) | 1011 | /* if (index >= array->map.max_entries) |
@@ -956,7 +1016,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) | |||
956 | emit_a32_mov_i(tmp[1], off, false, ctx); | 1016 | emit_a32_mov_i(tmp[1], off, false, ctx); |
957 | emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx); | 1017 | emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx); |
958 | emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx); | 1018 | emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx); |
959 | /* index (64 bit) */ | 1019 | /* index is 32-bit for arrays */ |
960 | emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx); | 1020 | emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx); |
961 | /* index >= array->map.max_entries */ | 1021 | /* index >= array->map.max_entries */ |
962 | emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx); | 1022 | emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx); |
@@ -997,7 +1057,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) | |||
997 | emit_a32_mov_i(tmp2[1], off, false, ctx); | 1057 | emit_a32_mov_i(tmp2[1], off, false, ctx); |
998 | emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx); | 1058 | emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx); |
999 | emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx); | 1059 | emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx); |
1000 | emit(ARM_BX(tmp[1]), ctx); | 1060 | emit_bx_r(tmp[1], ctx); |
1001 | 1061 | ||
1002 | /* out: */ | 1062 | /* out: */ |
1003 | if (out_offset == -1) | 1063 | if (out_offset == -1) |
@@ -1070,54 +1130,22 @@ static void build_prologue(struct jit_ctx *ctx) | |||
1070 | const u8 r2 = bpf2a32[BPF_REG_1][1]; | 1130 | const u8 r2 = bpf2a32[BPF_REG_1][1]; |
1071 | const u8 r3 = bpf2a32[BPF_REG_1][0]; | 1131 | const u8 r3 = bpf2a32[BPF_REG_1][0]; |
1072 | const u8 r4 = bpf2a32[BPF_REG_6][1]; | 1132 | const u8 r4 = bpf2a32[BPF_REG_6][1]; |
1073 | const u8 r5 = bpf2a32[BPF_REG_6][0]; | ||
1074 | const u8 r6 = bpf2a32[TMP_REG_1][1]; | ||
1075 | const u8 r7 = bpf2a32[TMP_REG_1][0]; | ||
1076 | const u8 r8 = bpf2a32[TMP_REG_2][1]; | ||
1077 | const u8 r10 = bpf2a32[TMP_REG_2][0]; | ||
1078 | const u8 fplo = bpf2a32[BPF_REG_FP][1]; | 1133 | const u8 fplo = bpf2a32[BPF_REG_FP][1]; |
1079 | const u8 fphi = bpf2a32[BPF_REG_FP][0]; | 1134 | const u8 fphi = bpf2a32[BPF_REG_FP][0]; |
1080 | const u8 sp = ARM_SP; | ||
1081 | const u8 *tcc = bpf2a32[TCALL_CNT]; | 1135 | const u8 *tcc = bpf2a32[TCALL_CNT]; |
1082 | 1136 | ||
1083 | u16 reg_set = 0; | ||
1084 | |||
1085 | /* | ||
1086 | * eBPF prog stack layout | ||
1087 | * | ||
1088 | * high | ||
1089 | * original ARM_SP => +-----+ eBPF prologue | ||
1090 | * |FP/LR| | ||
1091 | * current ARM_FP => +-----+ | ||
1092 | * | ... | callee saved registers | ||
1093 | * eBPF fp register => +-----+ <= (BPF_FP) | ||
1094 | * | ... | eBPF JIT scratch space | ||
1095 | * | | eBPF prog stack | ||
1096 | * +-----+ | ||
1097 | * |RSVD | JIT scratchpad | ||
1098 | * current A64_SP => +-----+ <= (BPF_FP - STACK_SIZE) | ||
1099 | * | | | ||
1100 | * | ... | Function call stack | ||
1101 | * | | | ||
1102 | * +-----+ | ||
1103 | * low | ||
1104 | */ | ||
1105 | |||
1106 | /* Save callee saved registers. */ | 1137 | /* Save callee saved registers. */ |
1107 | reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10); | ||
1108 | #ifdef CONFIG_FRAME_POINTER | 1138 | #ifdef CONFIG_FRAME_POINTER |
1109 | reg_set |= (1<<ARM_FP) | (1<<ARM_IP) | (1<<ARM_LR) | (1<<ARM_PC); | 1139 | u16 reg_set = CALLEE_PUSH_MASK | 1 << ARM_IP | 1 << ARM_PC; |
1110 | emit(ARM_MOV_R(ARM_IP, sp), ctx); | 1140 | emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx); |
1111 | emit(ARM_PUSH(reg_set), ctx); | 1141 | emit(ARM_PUSH(reg_set), ctx); |
1112 | emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx); | 1142 | emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx); |
1113 | #else | 1143 | #else |
1114 | /* Check if call instruction exists in BPF body */ | 1144 | emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx); |
1115 | if (ctx->seen & SEEN_CALL) | 1145 | emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx); |
1116 | reg_set |= (1<<ARM_LR); | ||
1117 | emit(ARM_PUSH(reg_set), ctx); | ||
1118 | #endif | 1146 | #endif |
1119 | /* Save frame pointer for later */ | 1147 | /* Save frame pointer for later */ |
1120 | emit(ARM_SUB_I(ARM_IP, sp, SCRATCH_SIZE), ctx); | 1148 | emit(ARM_SUB_I(ARM_IP, ARM_SP, SCRATCH_SIZE), ctx); |
1121 | 1149 | ||
1122 | ctx->stack_size = imm8m(STACK_SIZE); | 1150 | ctx->stack_size = imm8m(STACK_SIZE); |
1123 | 1151 | ||
@@ -1140,33 +1168,19 @@ static void build_prologue(struct jit_ctx *ctx) | |||
1140 | /* end of prologue */ | 1168 | /* end of prologue */ |
1141 | } | 1169 | } |
1142 | 1170 | ||
1171 | /* restore callee saved registers. */ | ||
1143 | static void build_epilogue(struct jit_ctx *ctx) | 1172 | static void build_epilogue(struct jit_ctx *ctx) |
1144 | { | 1173 | { |
1145 | const u8 r4 = bpf2a32[BPF_REG_6][1]; | ||
1146 | const u8 r5 = bpf2a32[BPF_REG_6][0]; | ||
1147 | const u8 r6 = bpf2a32[TMP_REG_1][1]; | ||
1148 | const u8 r7 = bpf2a32[TMP_REG_1][0]; | ||
1149 | const u8 r8 = bpf2a32[TMP_REG_2][1]; | ||
1150 | const u8 r10 = bpf2a32[TMP_REG_2][0]; | ||
1151 | u16 reg_set = 0; | ||
1152 | |||
1153 | /* unwind function call stack */ | ||
1154 | emit(ARM_ADD_I(ARM_SP, ARM_SP, ctx->stack_size), ctx); | ||
1155 | |||
1156 | /* restore callee saved registers. */ | ||
1157 | reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10); | ||
1158 | #ifdef CONFIG_FRAME_POINTER | 1174 | #ifdef CONFIG_FRAME_POINTER |
1159 | /* the first instruction of the prologue was: mov ip, sp */ | 1175 | /* When using frame pointers, some additional registers need to |
1160 | reg_set |= (1<<ARM_FP) | (1<<ARM_SP) | (1<<ARM_PC); | 1176 | * be loaded. */ |
1177 | u16 reg_set = CALLEE_POP_MASK | 1 << ARM_SP; | ||
1178 | emit(ARM_SUB_I(ARM_SP, ARM_FP, hweight16(reg_set) * 4), ctx); | ||
1161 | emit(ARM_LDM(ARM_SP, reg_set), ctx); | 1179 | emit(ARM_LDM(ARM_SP, reg_set), ctx); |
1162 | #else | 1180 | #else |
1163 | if (ctx->seen & SEEN_CALL) | ||
1164 | reg_set |= (1<<ARM_PC); | ||
1165 | /* Restore callee saved registers. */ | 1181 | /* Restore callee saved registers. */ |
1166 | emit(ARM_POP(reg_set), ctx); | 1182 | emit(ARM_MOV_R(ARM_SP, ARM_FP), ctx); |
1167 | /* Return back to the callee function */ | 1183 | emit(ARM_POP(CALLEE_POP_MASK), ctx); |
1168 | if (!(ctx->seen & SEEN_CALL)) | ||
1169 | emit(ARM_BX(ARM_LR), ctx); | ||
1170 | #endif | 1184 | #endif |
1171 | } | 1185 | } |
1172 | 1186 | ||
@@ -1394,8 +1408,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) | |||
1394 | emit_rev32(rt, rt, ctx); | 1408 | emit_rev32(rt, rt, ctx); |
1395 | goto emit_bswap_uxt; | 1409 | goto emit_bswap_uxt; |
1396 | case 64: | 1410 | case 64: |
1397 | /* Because of the usage of ARM_LR */ | ||
1398 | ctx->seen |= SEEN_CALL; | ||
1399 | emit_rev32(ARM_LR, rt, ctx); | 1411 | emit_rev32(ARM_LR, rt, ctx); |
1400 | emit_rev32(rt, rd, ctx); | 1412 | emit_rev32(rt, rd, ctx); |
1401 | emit(ARM_MOV_R(rd, ARM_LR), ctx); | 1413 | emit(ARM_MOV_R(rd, ARM_LR), ctx); |
@@ -1448,22 +1460,7 @@ exit: | |||
1448 | rn = sstk ? tmp2[1] : src_lo; | 1460 | rn = sstk ? tmp2[1] : src_lo; |
1449 | if (sstk) | 1461 | if (sstk) |
1450 | emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); | 1462 | emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); |
1451 | switch (BPF_SIZE(code)) { | 1463 | emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code)); |
1452 | case BPF_W: | ||
1453 | /* Load a Word */ | ||
1454 | case BPF_H: | ||
1455 | /* Load a Half-Word */ | ||
1456 | case BPF_B: | ||
1457 | /* Load a Byte */ | ||
1458 | emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_SIZE(code)); | ||
1459 | emit_a32_mov_i(dst_hi, 0, dstk, ctx); | ||
1460 | break; | ||
1461 | case BPF_DW: | ||
1462 | /* Load a double word */ | ||
1463 | emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_W); | ||
1464 | emit_ldx_r(dst_hi, rn, dstk, off+4, ctx, BPF_W); | ||
1465 | break; | ||
1466 | } | ||
1467 | break; | 1464 | break; |
1468 | /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */ | 1465 | /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */ |
1469 | case BPF_LD | BPF_ABS | BPF_W: | 1466 | case BPF_LD | BPF_ABS | BPF_W: |
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index a1f28a54f23a..60c4c342316c 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S | |||
@@ -244,6 +244,17 @@ ENTRY(__switch_to_asm) | |||
244 | movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset | 244 | movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset |
245 | #endif | 245 | #endif |
246 | 246 | ||
247 | #ifdef CONFIG_RETPOLINE | ||
248 | /* | ||
249 | * When switching from a shallower to a deeper call stack | ||
250 | * the RSB may either underflow or use entries populated | ||
251 | * with userspace addresses. On CPUs where those concerns | ||
252 | * exist, overwrite the RSB with entries which capture | ||
253 | * speculative execution to prevent attack. | ||
254 | */ | ||
255 | FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW | ||
256 | #endif | ||
257 | |||
247 | /* restore callee-saved registers */ | 258 | /* restore callee-saved registers */ |
248 | popl %esi | 259 | popl %esi |
249 | popl %edi | 260 | popl %edi |
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 4f8e1d35a97c..aa15b4c0e3d1 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S | |||
@@ -491,6 +491,17 @@ ENTRY(__switch_to_asm) | |||
491 | movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset | 491 | movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset |
492 | #endif | 492 | #endif |
493 | 493 | ||
494 | #ifdef CONFIG_RETPOLINE | ||
495 | /* | ||
496 | * When switching from a shallower to a deeper call stack | ||
497 | * the RSB may either underflow or use entries populated | ||
498 | * with userspace addresses. On CPUs where those concerns | ||
499 | * exist, overwrite the RSB with entries which capture | ||
500 | * speculative execution to prevent attack. | ||
501 | */ | ||
502 | FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW | ||
503 | #endif | ||
504 | |||
494 | /* restore callee-saved registers */ | 505 | /* restore callee-saved registers */ |
495 | popq %r15 | 506 | popq %r15 |
496 | popq %r14 | 507 | popq %r14 |
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 005908ee9333..a2efb490f743 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c | |||
@@ -755,14 +755,14 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = { | |||
755 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init), | 755 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init), |
756 | 756 | ||
757 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init), | 757 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init), |
758 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsw_rapl_init), | 758 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsx_rapl_init), |
759 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_rapl_init), | 759 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_rapl_init), |
760 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init), | 760 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init), |
761 | 761 | ||
762 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init), | 762 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init), |
763 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init), | 763 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init), |
764 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsx_rapl_init), | 764 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsx_rapl_init), |
765 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init), | 765 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsx_rapl_init), |
766 | 766 | ||
767 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init), | 767 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init), |
768 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_rapl_init), | 768 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_rapl_init), |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index a9e57f08bfa6..98722773391d 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -136,6 +136,7 @@ extern void disconnect_bsp_APIC(int virt_wire_setup); | |||
136 | extern void disable_local_APIC(void); | 136 | extern void disable_local_APIC(void); |
137 | extern void lapic_shutdown(void); | 137 | extern void lapic_shutdown(void); |
138 | extern void sync_Arb_IDs(void); | 138 | extern void sync_Arb_IDs(void); |
139 | extern void init_bsp_APIC(void); | ||
139 | extern void apic_intr_mode_init(void); | 140 | extern void apic_intr_mode_init(void); |
140 | extern void setup_local_APIC(void); | 141 | extern void setup_local_APIC(void); |
141 | extern void init_apic_mappings(void); | 142 | extern void init_apic_mappings(void); |
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f275447862f4..25b9375c1484 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h | |||
@@ -206,11 +206,11 @@ | |||
206 | #define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ | 206 | #define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ |
207 | #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ | 207 | #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ |
208 | #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ | 208 | #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ |
209 | #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ | ||
210 | #define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ | 209 | #define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ |
211 | #define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ | 210 | #define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ |
212 | 211 | ||
213 | #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ | 212 | #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ |
213 | #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ | ||
214 | 214 | ||
215 | /* Virtualization flags: Linux defined, word 8 */ | 215 | /* Virtualization flags: Linux defined, word 8 */ |
216 | #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ | 216 | #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ |
@@ -245,6 +245,7 @@ | |||
245 | #define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ | 245 | #define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ |
246 | #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ | 246 | #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ |
247 | #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ | 247 | #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ |
248 | #define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */ | ||
248 | #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ | 249 | #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ |
249 | #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ | 250 | #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ |
250 | #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ | 251 | #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ |
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index c9459a4c3c68..22c5f3e6f820 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h | |||
@@ -39,7 +39,7 @@ void __init sme_unmap_bootdata(char *real_mode_data); | |||
39 | 39 | ||
40 | void __init sme_early_init(void); | 40 | void __init sme_early_init(void); |
41 | 41 | ||
42 | void __init sme_encrypt_kernel(void); | 42 | void __init sme_encrypt_kernel(struct boot_params *bp); |
43 | void __init sme_enable(struct boot_params *bp); | 43 | void __init sme_enable(struct boot_params *bp); |
44 | 44 | ||
45 | int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size); | 45 | int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size); |
@@ -67,7 +67,7 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { } | |||
67 | 67 | ||
68 | static inline void __init sme_early_init(void) { } | 68 | static inline void __init sme_early_init(void) { } |
69 | 69 | ||
70 | static inline void __init sme_encrypt_kernel(void) { } | 70 | static inline void __init sme_encrypt_kernel(struct boot_params *bp) { } |
71 | static inline void __init sme_enable(struct boot_params *bp) { } | 71 | static inline void __init sme_enable(struct boot_params *bp) { } |
72 | 72 | ||
73 | static inline bool sme_active(void) { return false; } | 73 | static inline bool sme_active(void) { return false; } |
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 402a11c803c3..7b45d8424150 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h | |||
@@ -11,7 +11,7 @@ | |||
11 | * Fill the CPU return stack buffer. | 11 | * Fill the CPU return stack buffer. |
12 | * | 12 | * |
13 | * Each entry in the RSB, if used for a speculative 'ret', contains an | 13 | * Each entry in the RSB, if used for a speculative 'ret', contains an |
14 | * infinite 'pause; jmp' loop to capture speculative execution. | 14 | * infinite 'pause; lfence; jmp' loop to capture speculative execution. |
15 | * | 15 | * |
16 | * This is required in various cases for retpoline and IBRS-based | 16 | * This is required in various cases for retpoline and IBRS-based |
17 | * mitigations for the Spectre variant 2 vulnerability. Sometimes to | 17 | * mitigations for the Spectre variant 2 vulnerability. Sometimes to |
@@ -38,11 +38,13 @@ | |||
38 | call 772f; \ | 38 | call 772f; \ |
39 | 773: /* speculation trap */ \ | 39 | 773: /* speculation trap */ \ |
40 | pause; \ | 40 | pause; \ |
41 | lfence; \ | ||
41 | jmp 773b; \ | 42 | jmp 773b; \ |
42 | 772: \ | 43 | 772: \ |
43 | call 774f; \ | 44 | call 774f; \ |
44 | 775: /* speculation trap */ \ | 45 | 775: /* speculation trap */ \ |
45 | pause; \ | 46 | pause; \ |
47 | lfence; \ | ||
46 | jmp 775b; \ | 48 | jmp 775b; \ |
47 | 774: \ | 49 | 774: \ |
48 | dec reg; \ | 50 | dec reg; \ |
@@ -73,6 +75,7 @@ | |||
73 | call .Ldo_rop_\@ | 75 | call .Ldo_rop_\@ |
74 | .Lspec_trap_\@: | 76 | .Lspec_trap_\@: |
75 | pause | 77 | pause |
78 | lfence | ||
76 | jmp .Lspec_trap_\@ | 79 | jmp .Lspec_trap_\@ |
77 | .Ldo_rop_\@: | 80 | .Ldo_rop_\@: |
78 | mov \reg, (%_ASM_SP) | 81 | mov \reg, (%_ASM_SP) |
@@ -165,6 +168,7 @@ | |||
165 | " .align 16\n" \ | 168 | " .align 16\n" \ |
166 | "901: call 903f;\n" \ | 169 | "901: call 903f;\n" \ |
167 | "902: pause;\n" \ | 170 | "902: pause;\n" \ |
171 | " lfence;\n" \ | ||
168 | " jmp 902b;\n" \ | 172 | " jmp 902b;\n" \ |
169 | " .align 16\n" \ | 173 | " .align 16\n" \ |
170 | "903: addl $4, %%esp;\n" \ | 174 | "903: addl $4, %%esp;\n" \ |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 880441f24146..25ddf02598d2 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -1286,6 +1286,55 @@ static int __init apic_intr_mode_select(void) | |||
1286 | return APIC_SYMMETRIC_IO; | 1286 | return APIC_SYMMETRIC_IO; |
1287 | } | 1287 | } |
1288 | 1288 | ||
1289 | /* | ||
1290 | * An initial setup of the virtual wire mode. | ||
1291 | */ | ||
1292 | void __init init_bsp_APIC(void) | ||
1293 | { | ||
1294 | unsigned int value; | ||
1295 | |||
1296 | /* | ||
1297 | * Don't do the setup now if we have a SMP BIOS as the | ||
1298 | * through-I/O-APIC virtual wire mode might be active. | ||
1299 | */ | ||
1300 | if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC)) | ||
1301 | return; | ||
1302 | |||
1303 | /* | ||
1304 | * Do not trust the local APIC being empty at bootup. | ||
1305 | */ | ||
1306 | clear_local_APIC(); | ||
1307 | |||
1308 | /* | ||
1309 | * Enable APIC. | ||
1310 | */ | ||
1311 | value = apic_read(APIC_SPIV); | ||
1312 | value &= ~APIC_VECTOR_MASK; | ||
1313 | value |= APIC_SPIV_APIC_ENABLED; | ||
1314 | |||
1315 | #ifdef CONFIG_X86_32 | ||
1316 | /* This bit is reserved on P4/Xeon and should be cleared */ | ||
1317 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && | ||
1318 | (boot_cpu_data.x86 == 15)) | ||
1319 | value &= ~APIC_SPIV_FOCUS_DISABLED; | ||
1320 | else | ||
1321 | #endif | ||
1322 | value |= APIC_SPIV_FOCUS_DISABLED; | ||
1323 | value |= SPURIOUS_APIC_VECTOR; | ||
1324 | apic_write(APIC_SPIV, value); | ||
1325 | |||
1326 | /* | ||
1327 | * Set up the virtual wire mode. | ||
1328 | */ | ||
1329 | apic_write(APIC_LVT0, APIC_DM_EXTINT); | ||
1330 | value = APIC_DM_NMI; | ||
1331 | if (!lapic_is_integrated()) /* 82489DX */ | ||
1332 | value |= APIC_LVT_LEVEL_TRIGGER; | ||
1333 | if (apic_extnmi == APIC_EXTNMI_NONE) | ||
1334 | value |= APIC_LVT_MASKED; | ||
1335 | apic_write(APIC_LVT1, value); | ||
1336 | } | ||
1337 | |||
1289 | /* Init the interrupt delivery mode for the BSP */ | 1338 | /* Init the interrupt delivery mode for the BSP */ |
1290 | void __init apic_intr_mode_init(void) | 1339 | void __init apic_intr_mode_init(void) |
1291 | { | 1340 | { |
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index f8b03bb8e725..3cc471beb50b 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c | |||
@@ -542,14 +542,17 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, | |||
542 | 542 | ||
543 | err = assign_irq_vector_policy(irqd, info); | 543 | err = assign_irq_vector_policy(irqd, info); |
544 | trace_vector_setup(virq + i, false, err); | 544 | trace_vector_setup(virq + i, false, err); |
545 | if (err) | 545 | if (err) { |
546 | irqd->chip_data = NULL; | ||
547 | free_apic_chip_data(apicd); | ||
546 | goto error; | 548 | goto error; |
549 | } | ||
547 | } | 550 | } |
548 | 551 | ||
549 | return 0; | 552 | return 0; |
550 | 553 | ||
551 | error: | 554 | error: |
552 | x86_vector_free_irqs(domain, virq, i + 1); | 555 | x86_vector_free_irqs(domain, virq, i); |
553 | return err; | 556 | return err; |
554 | } | 557 | } |
555 | 558 | ||
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e4dc26185aa7..390b3dc3d438 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <asm/alternative.h> | 23 | #include <asm/alternative.h> |
24 | #include <asm/pgtable.h> | 24 | #include <asm/pgtable.h> |
25 | #include <asm/set_memory.h> | 25 | #include <asm/set_memory.h> |
26 | #include <asm/intel-family.h> | ||
26 | 27 | ||
27 | static void __init spectre_v2_select_mitigation(void); | 28 | static void __init spectre_v2_select_mitigation(void); |
28 | 29 | ||
@@ -155,6 +156,23 @@ disable: | |||
155 | return SPECTRE_V2_CMD_NONE; | 156 | return SPECTRE_V2_CMD_NONE; |
156 | } | 157 | } |
157 | 158 | ||
159 | /* Check for Skylake-like CPUs (for RSB handling) */ | ||
160 | static bool __init is_skylake_era(void) | ||
161 | { | ||
162 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && | ||
163 | boot_cpu_data.x86 == 6) { | ||
164 | switch (boot_cpu_data.x86_model) { | ||
165 | case INTEL_FAM6_SKYLAKE_MOBILE: | ||
166 | case INTEL_FAM6_SKYLAKE_DESKTOP: | ||
167 | case INTEL_FAM6_SKYLAKE_X: | ||
168 | case INTEL_FAM6_KABYLAKE_MOBILE: | ||
169 | case INTEL_FAM6_KABYLAKE_DESKTOP: | ||
170 | return true; | ||
171 | } | ||
172 | } | ||
173 | return false; | ||
174 | } | ||
175 | |||
158 | static void __init spectre_v2_select_mitigation(void) | 176 | static void __init spectre_v2_select_mitigation(void) |
159 | { | 177 | { |
160 | enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); | 178 | enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); |
@@ -213,6 +231,24 @@ retpoline_auto: | |||
213 | 231 | ||
214 | spectre_v2_enabled = mode; | 232 | spectre_v2_enabled = mode; |
215 | pr_info("%s\n", spectre_v2_strings[mode]); | 233 | pr_info("%s\n", spectre_v2_strings[mode]); |
234 | |||
235 | /* | ||
236 | * If neither SMEP or KPTI are available, there is a risk of | ||
237 | * hitting userspace addresses in the RSB after a context switch | ||
238 | * from a shallow call stack to a deeper one. To prevent this fill | ||
239 | * the entire RSB, even when using IBRS. | ||
240 | * | ||
241 | * Skylake era CPUs have a separate issue with *underflow* of the | ||
242 | * RSB, when they will predict 'ret' targets from the generic BTB. | ||
243 | * The proper mitigation for this is IBRS. If IBRS is not supported | ||
244 | * or deactivated in favour of retpolines the RSB fill on context | ||
245 | * switch is required. | ||
246 | */ | ||
247 | if ((!boot_cpu_has(X86_FEATURE_PTI) && | ||
248 | !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { | ||
249 | setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); | ||
250 | pr_info("Filling RSB on context switch\n"); | ||
251 | } | ||
216 | } | 252 | } |
217 | 253 | ||
218 | #undef pr_fmt | 254 | #undef pr_fmt |
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 88dcf8479013..99442370de40 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c | |||
@@ -525,10 +525,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) | |||
525 | */ | 525 | */ |
526 | if (static_branch_unlikely(&rdt_mon_enable_key)) | 526 | if (static_branch_unlikely(&rdt_mon_enable_key)) |
527 | rmdir_mondata_subdir_allrdtgrp(r, d->id); | 527 | rmdir_mondata_subdir_allrdtgrp(r, d->id); |
528 | kfree(d->ctrl_val); | ||
529 | kfree(d->rmid_busy_llc); | ||
530 | kfree(d->mbm_total); | ||
531 | kfree(d->mbm_local); | ||
532 | list_del(&d->list); | 528 | list_del(&d->list); |
533 | if (is_mbm_enabled()) | 529 | if (is_mbm_enabled()) |
534 | cancel_delayed_work(&d->mbm_over); | 530 | cancel_delayed_work(&d->mbm_over); |
@@ -545,6 +541,10 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) | |||
545 | cancel_delayed_work(&d->cqm_limbo); | 541 | cancel_delayed_work(&d->cqm_limbo); |
546 | } | 542 | } |
547 | 543 | ||
544 | kfree(d->ctrl_val); | ||
545 | kfree(d->rmid_busy_llc); | ||
546 | kfree(d->mbm_total); | ||
547 | kfree(d->mbm_local); | ||
548 | kfree(d); | 548 | kfree(d); |
549 | return; | 549 | return; |
550 | } | 550 | } |
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 05459ad3db46..d0e69769abfd 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c | |||
@@ -21,7 +21,6 @@ struct cpuid_bit { | |||
21 | static const struct cpuid_bit cpuid_bits[] = { | 21 | static const struct cpuid_bit cpuid_bits[] = { |
22 | { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, | 22 | { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, |
23 | { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, | 23 | { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, |
24 | { X86_FEATURE_INTEL_PT, CPUID_EBX, 25, 0x00000007, 0 }, | ||
25 | { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 }, | 24 | { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 }, |
26 | { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 }, | 25 | { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 }, |
27 | { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, | 26 | { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 6a5d757b9cfd..7ba5d819ebe3 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -157,8 +157,8 @@ unsigned long __head __startup_64(unsigned long physaddr, | |||
157 | p = fixup_pointer(&phys_base, physaddr); | 157 | p = fixup_pointer(&phys_base, physaddr); |
158 | *p += load_delta - sme_get_me_mask(); | 158 | *p += load_delta - sme_get_me_mask(); |
159 | 159 | ||
160 | /* Encrypt the kernel (if SME is active) */ | 160 | /* Encrypt the kernel and related (if SME is active) */ |
161 | sme_encrypt_kernel(); | 161 | sme_encrypt_kernel(bp); |
162 | 162 | ||
163 | /* | 163 | /* |
164 | * Return the SME encryption mask (if SME is active) to be used as a | 164 | * Return the SME encryption mask (if SME is active) to be used as a |
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index d985cef3984f..56d99be3706a 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c | |||
@@ -56,7 +56,7 @@ struct idt_data { | |||
56 | * Early traps running on the DEFAULT_STACK because the other interrupt | 56 | * Early traps running on the DEFAULT_STACK because the other interrupt |
57 | * stacks work only after cpu_init(). | 57 | * stacks work only after cpu_init(). |
58 | */ | 58 | */ |
59 | static const __initdata struct idt_data early_idts[] = { | 59 | static const __initconst struct idt_data early_idts[] = { |
60 | INTG(X86_TRAP_DB, debug), | 60 | INTG(X86_TRAP_DB, debug), |
61 | SYSG(X86_TRAP_BP, int3), | 61 | SYSG(X86_TRAP_BP, int3), |
62 | #ifdef CONFIG_X86_32 | 62 | #ifdef CONFIG_X86_32 |
@@ -70,7 +70,7 @@ static const __initdata struct idt_data early_idts[] = { | |||
70 | * the traps which use them are reinitialized with IST after cpu_init() has | 70 | * the traps which use them are reinitialized with IST after cpu_init() has |
71 | * set up TSS. | 71 | * set up TSS. |
72 | */ | 72 | */ |
73 | static const __initdata struct idt_data def_idts[] = { | 73 | static const __initconst struct idt_data def_idts[] = { |
74 | INTG(X86_TRAP_DE, divide_error), | 74 | INTG(X86_TRAP_DE, divide_error), |
75 | INTG(X86_TRAP_NMI, nmi), | 75 | INTG(X86_TRAP_NMI, nmi), |
76 | INTG(X86_TRAP_BR, bounds), | 76 | INTG(X86_TRAP_BR, bounds), |
@@ -108,7 +108,7 @@ static const __initdata struct idt_data def_idts[] = { | |||
108 | /* | 108 | /* |
109 | * The APIC and SMP idt entries | 109 | * The APIC and SMP idt entries |
110 | */ | 110 | */ |
111 | static const __initdata struct idt_data apic_idts[] = { | 111 | static const __initconst struct idt_data apic_idts[] = { |
112 | #ifdef CONFIG_SMP | 112 | #ifdef CONFIG_SMP |
113 | INTG(RESCHEDULE_VECTOR, reschedule_interrupt), | 113 | INTG(RESCHEDULE_VECTOR, reschedule_interrupt), |
114 | INTG(CALL_FUNCTION_VECTOR, call_function_interrupt), | 114 | INTG(CALL_FUNCTION_VECTOR, call_function_interrupt), |
@@ -150,7 +150,7 @@ static const __initdata struct idt_data apic_idts[] = { | |||
150 | * Early traps running on the DEFAULT_STACK because the other interrupt | 150 | * Early traps running on the DEFAULT_STACK because the other interrupt |
151 | * stacks work only after cpu_init(). | 151 | * stacks work only after cpu_init(). |
152 | */ | 152 | */ |
153 | static const __initdata struct idt_data early_pf_idts[] = { | 153 | static const __initconst struct idt_data early_pf_idts[] = { |
154 | INTG(X86_TRAP_PF, page_fault), | 154 | INTG(X86_TRAP_PF, page_fault), |
155 | }; | 155 | }; |
156 | 156 | ||
@@ -158,7 +158,7 @@ static const __initdata struct idt_data early_pf_idts[] = { | |||
158 | * Override for the debug_idt. Same as the default, but with interrupt | 158 | * Override for the debug_idt. Same as the default, but with interrupt |
159 | * stack set to DEFAULT_STACK (0). Required for NMI trap handling. | 159 | * stack set to DEFAULT_STACK (0). Required for NMI trap handling. |
160 | */ | 160 | */ |
161 | static const __initdata struct idt_data dbg_idts[] = { | 161 | static const __initconst struct idt_data dbg_idts[] = { |
162 | INTG(X86_TRAP_DB, debug), | 162 | INTG(X86_TRAP_DB, debug), |
163 | INTG(X86_TRAP_BP, int3), | 163 | INTG(X86_TRAP_BP, int3), |
164 | }; | 164 | }; |
@@ -180,7 +180,7 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; | |||
180 | * The exceptions which use Interrupt stacks. They are setup after | 180 | * The exceptions which use Interrupt stacks. They are setup after |
181 | * cpu_init() when the TSS has been initialized. | 181 | * cpu_init() when the TSS has been initialized. |
182 | */ | 182 | */ |
183 | static const __initdata struct idt_data ist_idts[] = { | 183 | static const __initconst struct idt_data ist_idts[] = { |
184 | ISTG(X86_TRAP_DB, debug, DEBUG_STACK), | 184 | ISTG(X86_TRAP_DB, debug, DEBUG_STACK), |
185 | ISTG(X86_TRAP_NMI, nmi, NMI_STACK), | 185 | ISTG(X86_TRAP_NMI, nmi, NMI_STACK), |
186 | SISTG(X86_TRAP_BP, int3, DEBUG_STACK), | 186 | SISTG(X86_TRAP_BP, int3, DEBUG_STACK), |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 8da3e909e967..a539410c4ea9 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -61,6 +61,9 @@ void __init init_ISA_irqs(void) | |||
61 | struct irq_chip *chip = legacy_pic->chip; | 61 | struct irq_chip *chip = legacy_pic->chip; |
62 | int i; | 62 | int i; |
63 | 63 | ||
64 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) | ||
65 | init_bsp_APIC(); | ||
66 | #endif | ||
64 | legacy_pic->init(0); | 67 | legacy_pic->init(0); |
65 | 68 | ||
66 | for (i = 0; i < nr_legacy_irqs(); i++) | 69 | for (i = 0; i < nr_legacy_irqs(); i++) |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 145810b0edf6..68d7ab81c62f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -364,16 +364,6 @@ static void __init reserve_initrd(void) | |||
364 | !ramdisk_image || !ramdisk_size) | 364 | !ramdisk_image || !ramdisk_size) |
365 | return; /* No initrd provided by bootloader */ | 365 | return; /* No initrd provided by bootloader */ |
366 | 366 | ||
367 | /* | ||
368 | * If SME is active, this memory will be marked encrypted by the | ||
369 | * kernel when it is accessed (including relocation). However, the | ||
370 | * ramdisk image was loaded decrypted by the bootloader, so make | ||
371 | * sure that it is encrypted before accessing it. For SEV the | ||
372 | * ramdisk will already be encrypted, so only do this for SME. | ||
373 | */ | ||
374 | if (sme_active()) | ||
375 | sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image); | ||
376 | |||
377 | initrd_start = 0; | 367 | initrd_start = 0; |
378 | 368 | ||
379 | mapped_size = memblock_mem_size(max_pfn_mapped); | 369 | mapped_size = memblock_mem_size(max_pfn_mapped); |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 8ea117f8142e..e169e85db434 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -602,7 +602,6 @@ unsigned long native_calibrate_tsc(void) | |||
602 | case INTEL_FAM6_KABYLAKE_DESKTOP: | 602 | case INTEL_FAM6_KABYLAKE_DESKTOP: |
603 | crystal_khz = 24000; /* 24.0 MHz */ | 603 | crystal_khz = 24000; /* 24.0 MHz */ |
604 | break; | 604 | break; |
605 | case INTEL_FAM6_SKYLAKE_X: | ||
606 | case INTEL_FAM6_ATOM_DENVERTON: | 605 | case INTEL_FAM6_ATOM_DENVERTON: |
607 | crystal_khz = 25000; /* 25.0 MHz */ | 606 | crystal_khz = 25000; /* 25.0 MHz */ |
608 | break; | 607 | break; |
@@ -612,6 +611,8 @@ unsigned long native_calibrate_tsc(void) | |||
612 | } | 611 | } |
613 | } | 612 | } |
614 | 613 | ||
614 | if (crystal_khz == 0) | ||
615 | return 0; | ||
615 | /* | 616 | /* |
616 | * TSC frequency determined by CPUID is a "hardware reported" | 617 | * TSC frequency determined by CPUID is a "hardware reported" |
617 | * frequency and is the most accurate one so far we have. This | 618 | * frequency and is the most accurate one so far we have. This |
@@ -1315,6 +1316,12 @@ void __init tsc_init(void) | |||
1315 | (unsigned long)cpu_khz / 1000, | 1316 | (unsigned long)cpu_khz / 1000, |
1316 | (unsigned long)cpu_khz % 1000); | 1317 | (unsigned long)cpu_khz % 1000); |
1317 | 1318 | ||
1319 | if (cpu_khz != tsc_khz) { | ||
1320 | pr_info("Detected %lu.%03lu MHz TSC", | ||
1321 | (unsigned long)tsc_khz / 1000, | ||
1322 | (unsigned long)tsc_khz % 1000); | ||
1323 | } | ||
1324 | |||
1318 | /* Sanitize TSC ADJUST before cyc2ns gets initialized */ | 1325 | /* Sanitize TSC ADJUST before cyc2ns gets initialized */ |
1319 | tsc_store_and_check_tsc_adjust(true); | 1326 | tsc_store_and_check_tsc_adjust(true); |
1320 | 1327 | ||
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 06fe3d51d385..b3e40773dce0 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -172,14 +172,15 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) | |||
172 | * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really | 172 | * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really |
173 | * faulted on a pte with its pkey=4. | 173 | * faulted on a pte with its pkey=4. |
174 | */ | 174 | */ |
175 | static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey) | 175 | static void fill_sig_info_pkey(int si_signo, int si_code, siginfo_t *info, |
176 | u32 *pkey) | ||
176 | { | 177 | { |
177 | /* This is effectively an #ifdef */ | 178 | /* This is effectively an #ifdef */ |
178 | if (!boot_cpu_has(X86_FEATURE_OSPKE)) | 179 | if (!boot_cpu_has(X86_FEATURE_OSPKE)) |
179 | return; | 180 | return; |
180 | 181 | ||
181 | /* Fault not from Protection Keys: nothing to do */ | 182 | /* Fault not from Protection Keys: nothing to do */ |
182 | if (si_code != SEGV_PKUERR) | 183 | if ((si_code != SEGV_PKUERR) || (si_signo != SIGSEGV)) |
183 | return; | 184 | return; |
184 | /* | 185 | /* |
185 | * force_sig_info_fault() is called from a number of | 186 | * force_sig_info_fault() is called from a number of |
@@ -218,7 +219,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address, | |||
218 | lsb = PAGE_SHIFT; | 219 | lsb = PAGE_SHIFT; |
219 | info.si_addr_lsb = lsb; | 220 | info.si_addr_lsb = lsb; |
220 | 221 | ||
221 | fill_sig_info_pkey(si_code, &info, pkey); | 222 | fill_sig_info_pkey(si_signo, si_code, &info, pkey); |
222 | 223 | ||
223 | force_sig_info(si_signo, &info, tsk); | 224 | force_sig_info(si_signo, &info, tsk); |
224 | } | 225 | } |
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 47388f0c0e59..af6f2f9c6a26 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c | |||
@@ -21,10 +21,14 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES]; | |||
21 | 21 | ||
22 | static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); | 22 | static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); |
23 | 23 | ||
24 | static __init void *early_alloc(size_t size, int nid) | 24 | static __init void *early_alloc(size_t size, int nid, bool panic) |
25 | { | 25 | { |
26 | return memblock_virt_alloc_try_nid_nopanic(size, size, | 26 | if (panic) |
27 | __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid); | 27 | return memblock_virt_alloc_try_nid(size, size, |
28 | __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid); | ||
29 | else | ||
30 | return memblock_virt_alloc_try_nid_nopanic(size, size, | ||
31 | __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid); | ||
28 | } | 32 | } |
29 | 33 | ||
30 | static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, | 34 | static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, |
@@ -38,14 +42,14 @@ static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, | |||
38 | if (boot_cpu_has(X86_FEATURE_PSE) && | 42 | if (boot_cpu_has(X86_FEATURE_PSE) && |
39 | ((end - addr) == PMD_SIZE) && | 43 | ((end - addr) == PMD_SIZE) && |
40 | IS_ALIGNED(addr, PMD_SIZE)) { | 44 | IS_ALIGNED(addr, PMD_SIZE)) { |
41 | p = early_alloc(PMD_SIZE, nid); | 45 | p = early_alloc(PMD_SIZE, nid, false); |
42 | if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL)) | 46 | if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL)) |
43 | return; | 47 | return; |
44 | else if (p) | 48 | else if (p) |
45 | memblock_free(__pa(p), PMD_SIZE); | 49 | memblock_free(__pa(p), PMD_SIZE); |
46 | } | 50 | } |
47 | 51 | ||
48 | p = early_alloc(PAGE_SIZE, nid); | 52 | p = early_alloc(PAGE_SIZE, nid, true); |
49 | pmd_populate_kernel(&init_mm, pmd, p); | 53 | pmd_populate_kernel(&init_mm, pmd, p); |
50 | } | 54 | } |
51 | 55 | ||
@@ -57,7 +61,7 @@ static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, | |||
57 | if (!pte_none(*pte)) | 61 | if (!pte_none(*pte)) |
58 | continue; | 62 | continue; |
59 | 63 | ||
60 | p = early_alloc(PAGE_SIZE, nid); | 64 | p = early_alloc(PAGE_SIZE, nid, true); |
61 | entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL); | 65 | entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL); |
62 | set_pte_at(&init_mm, addr, pte, entry); | 66 | set_pte_at(&init_mm, addr, pte, entry); |
63 | } while (pte++, addr += PAGE_SIZE, addr != end); | 67 | } while (pte++, addr += PAGE_SIZE, addr != end); |
@@ -75,14 +79,14 @@ static void __init kasan_populate_pud(pud_t *pud, unsigned long addr, | |||
75 | if (boot_cpu_has(X86_FEATURE_GBPAGES) && | 79 | if (boot_cpu_has(X86_FEATURE_GBPAGES) && |
76 | ((end - addr) == PUD_SIZE) && | 80 | ((end - addr) == PUD_SIZE) && |
77 | IS_ALIGNED(addr, PUD_SIZE)) { | 81 | IS_ALIGNED(addr, PUD_SIZE)) { |
78 | p = early_alloc(PUD_SIZE, nid); | 82 | p = early_alloc(PUD_SIZE, nid, false); |
79 | if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL)) | 83 | if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL)) |
80 | return; | 84 | return; |
81 | else if (p) | 85 | else if (p) |
82 | memblock_free(__pa(p), PUD_SIZE); | 86 | memblock_free(__pa(p), PUD_SIZE); |
83 | } | 87 | } |
84 | 88 | ||
85 | p = early_alloc(PAGE_SIZE, nid); | 89 | p = early_alloc(PAGE_SIZE, nid, true); |
86 | pud_populate(&init_mm, pud, p); | 90 | pud_populate(&init_mm, pud, p); |
87 | } | 91 | } |
88 | 92 | ||
@@ -101,7 +105,7 @@ static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr, | |||
101 | unsigned long next; | 105 | unsigned long next; |
102 | 106 | ||
103 | if (p4d_none(*p4d)) { | 107 | if (p4d_none(*p4d)) { |
104 | void *p = early_alloc(PAGE_SIZE, nid); | 108 | void *p = early_alloc(PAGE_SIZE, nid, true); |
105 | 109 | ||
106 | p4d_populate(&init_mm, p4d, p); | 110 | p4d_populate(&init_mm, p4d, p); |
107 | } | 111 | } |
@@ -122,7 +126,7 @@ static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr, | |||
122 | unsigned long next; | 126 | unsigned long next; |
123 | 127 | ||
124 | if (pgd_none(*pgd)) { | 128 | if (pgd_none(*pgd)) { |
125 | p = early_alloc(PAGE_SIZE, nid); | 129 | p = early_alloc(PAGE_SIZE, nid, true); |
126 | pgd_populate(&init_mm, pgd, p); | 130 | pgd_populate(&init_mm, pgd, p); |
127 | } | 131 | } |
128 | 132 | ||
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 391b13402e40..3ef362f598e3 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c | |||
@@ -464,37 +464,62 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size) | |||
464 | set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT); | 464 | set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT); |
465 | } | 465 | } |
466 | 466 | ||
467 | static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start, | 467 | struct sme_populate_pgd_data { |
468 | unsigned long end) | 468 | void *pgtable_area; |
469 | pgd_t *pgd; | ||
470 | |||
471 | pmdval_t pmd_flags; | ||
472 | pteval_t pte_flags; | ||
473 | unsigned long paddr; | ||
474 | |||
475 | unsigned long vaddr; | ||
476 | unsigned long vaddr_end; | ||
477 | }; | ||
478 | |||
479 | static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) | ||
469 | { | 480 | { |
470 | unsigned long pgd_start, pgd_end, pgd_size; | 481 | unsigned long pgd_start, pgd_end, pgd_size; |
471 | pgd_t *pgd_p; | 482 | pgd_t *pgd_p; |
472 | 483 | ||
473 | pgd_start = start & PGDIR_MASK; | 484 | pgd_start = ppd->vaddr & PGDIR_MASK; |
474 | pgd_end = end & PGDIR_MASK; | 485 | pgd_end = ppd->vaddr_end & PGDIR_MASK; |
475 | 486 | ||
476 | pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1); | 487 | pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t); |
477 | pgd_size *= sizeof(pgd_t); | ||
478 | 488 | ||
479 | pgd_p = pgd_base + pgd_index(start); | 489 | pgd_p = ppd->pgd + pgd_index(ppd->vaddr); |
480 | 490 | ||
481 | memset(pgd_p, 0, pgd_size); | 491 | memset(pgd_p, 0, pgd_size); |
482 | } | 492 | } |
483 | 493 | ||
484 | #define PGD_FLAGS _KERNPG_TABLE_NOENC | 494 | #define PGD_FLAGS _KERNPG_TABLE_NOENC |
485 | #define P4D_FLAGS _KERNPG_TABLE_NOENC | 495 | #define P4D_FLAGS _KERNPG_TABLE_NOENC |
486 | #define PUD_FLAGS _KERNPG_TABLE_NOENC | 496 | #define PUD_FLAGS _KERNPG_TABLE_NOENC |
487 | #define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) | 497 | #define PMD_FLAGS _KERNPG_TABLE_NOENC |
498 | |||
499 | #define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) | ||
500 | |||
501 | #define PMD_FLAGS_DEC PMD_FLAGS_LARGE | ||
502 | #define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \ | ||
503 | (_PAGE_PAT | _PAGE_PWT)) | ||
504 | |||
505 | #define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC) | ||
506 | |||
507 | #define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL) | ||
508 | |||
509 | #define PTE_FLAGS_DEC PTE_FLAGS | ||
510 | #define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \ | ||
511 | (_PAGE_PAT | _PAGE_PWT)) | ||
512 | |||
513 | #define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC) | ||
488 | 514 | ||
489 | static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area, | 515 | static pmd_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) |
490 | unsigned long vaddr, pmdval_t pmd_val) | ||
491 | { | 516 | { |
492 | pgd_t *pgd_p; | 517 | pgd_t *pgd_p; |
493 | p4d_t *p4d_p; | 518 | p4d_t *p4d_p; |
494 | pud_t *pud_p; | 519 | pud_t *pud_p; |
495 | pmd_t *pmd_p; | 520 | pmd_t *pmd_p; |
496 | 521 | ||
497 | pgd_p = pgd_base + pgd_index(vaddr); | 522 | pgd_p = ppd->pgd + pgd_index(ppd->vaddr); |
498 | if (native_pgd_val(*pgd_p)) { | 523 | if (native_pgd_val(*pgd_p)) { |
499 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) | 524 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) |
500 | p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); | 525 | p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); |
@@ -504,15 +529,15 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area, | |||
504 | pgd_t pgd; | 529 | pgd_t pgd; |
505 | 530 | ||
506 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | 531 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { |
507 | p4d_p = pgtable_area; | 532 | p4d_p = ppd->pgtable_area; |
508 | memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D); | 533 | memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D); |
509 | pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D; | 534 | ppd->pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D; |
510 | 535 | ||
511 | pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS); | 536 | pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS); |
512 | } else { | 537 | } else { |
513 | pud_p = pgtable_area; | 538 | pud_p = ppd->pgtable_area; |
514 | memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); | 539 | memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); |
515 | pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; | 540 | ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; |
516 | 541 | ||
517 | pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS); | 542 | pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS); |
518 | } | 543 | } |
@@ -520,58 +545,160 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area, | |||
520 | } | 545 | } |
521 | 546 | ||
522 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | 547 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { |
523 | p4d_p += p4d_index(vaddr); | 548 | p4d_p += p4d_index(ppd->vaddr); |
524 | if (native_p4d_val(*p4d_p)) { | 549 | if (native_p4d_val(*p4d_p)) { |
525 | pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK); | 550 | pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK); |
526 | } else { | 551 | } else { |
527 | p4d_t p4d; | 552 | p4d_t p4d; |
528 | 553 | ||
529 | pud_p = pgtable_area; | 554 | pud_p = ppd->pgtable_area; |
530 | memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); | 555 | memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); |
531 | pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; | 556 | ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; |
532 | 557 | ||
533 | p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS); | 558 | p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS); |
534 | native_set_p4d(p4d_p, p4d); | 559 | native_set_p4d(p4d_p, p4d); |
535 | } | 560 | } |
536 | } | 561 | } |
537 | 562 | ||
538 | pud_p += pud_index(vaddr); | 563 | pud_p += pud_index(ppd->vaddr); |
539 | if (native_pud_val(*pud_p)) { | 564 | if (native_pud_val(*pud_p)) { |
540 | if (native_pud_val(*pud_p) & _PAGE_PSE) | 565 | if (native_pud_val(*pud_p) & _PAGE_PSE) |
541 | goto out; | 566 | return NULL; |
542 | 567 | ||
543 | pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK); | 568 | pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK); |
544 | } else { | 569 | } else { |
545 | pud_t pud; | 570 | pud_t pud; |
546 | 571 | ||
547 | pmd_p = pgtable_area; | 572 | pmd_p = ppd->pgtable_area; |
548 | memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD); | 573 | memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD); |
549 | pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD; | 574 | ppd->pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD; |
550 | 575 | ||
551 | pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS); | 576 | pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS); |
552 | native_set_pud(pud_p, pud); | 577 | native_set_pud(pud_p, pud); |
553 | } | 578 | } |
554 | 579 | ||
555 | pmd_p += pmd_index(vaddr); | 580 | return pmd_p; |
581 | } | ||
582 | |||
583 | static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) | ||
584 | { | ||
585 | pmd_t *pmd_p; | ||
586 | |||
587 | pmd_p = sme_prepare_pgd(ppd); | ||
588 | if (!pmd_p) | ||
589 | return; | ||
590 | |||
591 | pmd_p += pmd_index(ppd->vaddr); | ||
556 | if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE)) | 592 | if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE)) |
557 | native_set_pmd(pmd_p, native_make_pmd(pmd_val)); | 593 | native_set_pmd(pmd_p, native_make_pmd(ppd->paddr | ppd->pmd_flags)); |
594 | } | ||
558 | 595 | ||
559 | out: | 596 | static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) |
560 | return pgtable_area; | 597 | { |
598 | pmd_t *pmd_p; | ||
599 | pte_t *pte_p; | ||
600 | |||
601 | pmd_p = sme_prepare_pgd(ppd); | ||
602 | if (!pmd_p) | ||
603 | return; | ||
604 | |||
605 | pmd_p += pmd_index(ppd->vaddr); | ||
606 | if (native_pmd_val(*pmd_p)) { | ||
607 | if (native_pmd_val(*pmd_p) & _PAGE_PSE) | ||
608 | return; | ||
609 | |||
610 | pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK); | ||
611 | } else { | ||
612 | pmd_t pmd; | ||
613 | |||
614 | pte_p = ppd->pgtable_area; | ||
615 | memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE); | ||
616 | ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE; | ||
617 | |||
618 | pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS); | ||
619 | native_set_pmd(pmd_p, pmd); | ||
620 | } | ||
621 | |||
622 | pte_p += pte_index(ppd->vaddr); | ||
623 | if (!native_pte_val(*pte_p)) | ||
624 | native_set_pte(pte_p, native_make_pte(ppd->paddr | ppd->pte_flags)); | ||
625 | } | ||
626 | |||
627 | static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) | ||
628 | { | ||
629 | while (ppd->vaddr < ppd->vaddr_end) { | ||
630 | sme_populate_pgd_large(ppd); | ||
631 | |||
632 | ppd->vaddr += PMD_PAGE_SIZE; | ||
633 | ppd->paddr += PMD_PAGE_SIZE; | ||
634 | } | ||
635 | } | ||
636 | |||
637 | static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) | ||
638 | { | ||
639 | while (ppd->vaddr < ppd->vaddr_end) { | ||
640 | sme_populate_pgd(ppd); | ||
641 | |||
642 | ppd->vaddr += PAGE_SIZE; | ||
643 | ppd->paddr += PAGE_SIZE; | ||
644 | } | ||
645 | } | ||
646 | |||
647 | static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, | ||
648 | pmdval_t pmd_flags, pteval_t pte_flags) | ||
649 | { | ||
650 | unsigned long vaddr_end; | ||
651 | |||
652 | ppd->pmd_flags = pmd_flags; | ||
653 | ppd->pte_flags = pte_flags; | ||
654 | |||
655 | /* Save original end value since we modify the struct value */ | ||
656 | vaddr_end = ppd->vaddr_end; | ||
657 | |||
658 | /* If start is not 2MB aligned, create PTE entries */ | ||
659 | ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE); | ||
660 | __sme_map_range_pte(ppd); | ||
661 | |||
662 | /* Create PMD entries */ | ||
663 | ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK; | ||
664 | __sme_map_range_pmd(ppd); | ||
665 | |||
666 | /* If end is not 2MB aligned, create PTE entries */ | ||
667 | ppd->vaddr_end = vaddr_end; | ||
668 | __sme_map_range_pte(ppd); | ||
669 | } | ||
670 | |||
671 | static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) | ||
672 | { | ||
673 | __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC); | ||
674 | } | ||
675 | |||
676 | static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) | ||
677 | { | ||
678 | __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC); | ||
679 | } | ||
680 | |||
681 | static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) | ||
682 | { | ||
683 | __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP); | ||
561 | } | 684 | } |
562 | 685 | ||
563 | static unsigned long __init sme_pgtable_calc(unsigned long len) | 686 | static unsigned long __init sme_pgtable_calc(unsigned long len) |
564 | { | 687 | { |
565 | unsigned long p4d_size, pud_size, pmd_size; | 688 | unsigned long p4d_size, pud_size, pmd_size, pte_size; |
566 | unsigned long total; | 689 | unsigned long total; |
567 | 690 | ||
568 | /* | 691 | /* |
569 | * Perform a relatively simplistic calculation of the pagetable | 692 | * Perform a relatively simplistic calculation of the pagetable |
570 | * entries that are needed. That mappings will be covered by 2MB | 693 | * entries that are needed. Those mappings will be covered mostly |
571 | * PMD entries so we can conservatively calculate the required | 694 | * by 2MB PMD entries so we can conservatively calculate the required |
572 | * number of P4D, PUD and PMD structures needed to perform the | 695 | * number of P4D, PUD and PMD structures needed to perform the |
573 | * mappings. Incrementing the count for each covers the case where | 696 | * mappings. For mappings that are not 2MB aligned, PTE mappings |
574 | * the addresses cross entries. | 697 | * would be needed for the start and end portion of the address range |
698 | * that fall outside of the 2MB alignment. This results in, at most, | ||
699 | * two extra pages to hold PTE entries for each range that is mapped. | ||
700 | * Incrementing the count for each covers the case where the addresses | ||
701 | * cross entries. | ||
575 | */ | 702 | */ |
576 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | 703 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { |
577 | p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; | 704 | p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; |
@@ -585,8 +712,9 @@ static unsigned long __init sme_pgtable_calc(unsigned long len) | |||
585 | } | 712 | } |
586 | pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1; | 713 | pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1; |
587 | pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; | 714 | pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; |
715 | pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE; | ||
588 | 716 | ||
589 | total = p4d_size + pud_size + pmd_size; | 717 | total = p4d_size + pud_size + pmd_size + pte_size; |
590 | 718 | ||
591 | /* | 719 | /* |
592 | * Now calculate the added pagetable structures needed to populate | 720 | * Now calculate the added pagetable structures needed to populate |
@@ -610,29 +738,29 @@ static unsigned long __init sme_pgtable_calc(unsigned long len) | |||
610 | return total; | 738 | return total; |
611 | } | 739 | } |
612 | 740 | ||
613 | void __init sme_encrypt_kernel(void) | 741 | void __init sme_encrypt_kernel(struct boot_params *bp) |
614 | { | 742 | { |
615 | unsigned long workarea_start, workarea_end, workarea_len; | 743 | unsigned long workarea_start, workarea_end, workarea_len; |
616 | unsigned long execute_start, execute_end, execute_len; | 744 | unsigned long execute_start, execute_end, execute_len; |
617 | unsigned long kernel_start, kernel_end, kernel_len; | 745 | unsigned long kernel_start, kernel_end, kernel_len; |
746 | unsigned long initrd_start, initrd_end, initrd_len; | ||
747 | struct sme_populate_pgd_data ppd; | ||
618 | unsigned long pgtable_area_len; | 748 | unsigned long pgtable_area_len; |
619 | unsigned long paddr, pmd_flags; | ||
620 | unsigned long decrypted_base; | 749 | unsigned long decrypted_base; |
621 | void *pgtable_area; | ||
622 | pgd_t *pgd; | ||
623 | 750 | ||
624 | if (!sme_active()) | 751 | if (!sme_active()) |
625 | return; | 752 | return; |
626 | 753 | ||
627 | /* | 754 | /* |
628 | * Prepare for encrypting the kernel by building new pagetables with | 755 | * Prepare for encrypting the kernel and initrd by building new |
629 | * the necessary attributes needed to encrypt the kernel in place. | 756 | * pagetables with the necessary attributes needed to encrypt the |
757 | * kernel in place. | ||
630 | * | 758 | * |
631 | * One range of virtual addresses will map the memory occupied | 759 | * One range of virtual addresses will map the memory occupied |
632 | * by the kernel as encrypted. | 760 | * by the kernel and initrd as encrypted. |
633 | * | 761 | * |
634 | * Another range of virtual addresses will map the memory occupied | 762 | * Another range of virtual addresses will map the memory occupied |
635 | * by the kernel as decrypted and write-protected. | 763 | * by the kernel and initrd as decrypted and write-protected. |
636 | * | 764 | * |
637 | * The use of write-protect attribute will prevent any of the | 765 | * The use of write-protect attribute will prevent any of the |
638 | * memory from being cached. | 766 | * memory from being cached. |
@@ -643,6 +771,20 @@ void __init sme_encrypt_kernel(void) | |||
643 | kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); | 771 | kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); |
644 | kernel_len = kernel_end - kernel_start; | 772 | kernel_len = kernel_end - kernel_start; |
645 | 773 | ||
774 | initrd_start = 0; | ||
775 | initrd_end = 0; | ||
776 | initrd_len = 0; | ||
777 | #ifdef CONFIG_BLK_DEV_INITRD | ||
778 | initrd_len = (unsigned long)bp->hdr.ramdisk_size | | ||
779 | ((unsigned long)bp->ext_ramdisk_size << 32); | ||
780 | if (initrd_len) { | ||
781 | initrd_start = (unsigned long)bp->hdr.ramdisk_image | | ||
782 | ((unsigned long)bp->ext_ramdisk_image << 32); | ||
783 | initrd_end = PAGE_ALIGN(initrd_start + initrd_len); | ||
784 | initrd_len = initrd_end - initrd_start; | ||
785 | } | ||
786 | #endif | ||
787 | |||
646 | /* Set the encryption workarea to be immediately after the kernel */ | 788 | /* Set the encryption workarea to be immediately after the kernel */ |
647 | workarea_start = kernel_end; | 789 | workarea_start = kernel_end; |
648 | 790 | ||
@@ -665,16 +807,21 @@ void __init sme_encrypt_kernel(void) | |||
665 | */ | 807 | */ |
666 | pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD; | 808 | pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD; |
667 | pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2; | 809 | pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2; |
810 | if (initrd_len) | ||
811 | pgtable_area_len += sme_pgtable_calc(initrd_len) * 2; | ||
668 | 812 | ||
669 | /* PUDs and PMDs needed in the current pagetables for the workarea */ | 813 | /* PUDs and PMDs needed in the current pagetables for the workarea */ |
670 | pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len); | 814 | pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len); |
671 | 815 | ||
672 | /* | 816 | /* |
673 | * The total workarea includes the executable encryption area and | 817 | * The total workarea includes the executable encryption area and |
674 | * the pagetable area. | 818 | * the pagetable area. The start of the workarea is already 2MB |
819 | * aligned, align the end of the workarea on a 2MB boundary so that | ||
820 | * we don't try to create/allocate PTE entries from the workarea | ||
821 | * before it is mapped. | ||
675 | */ | 822 | */ |
676 | workarea_len = execute_len + pgtable_area_len; | 823 | workarea_len = execute_len + pgtable_area_len; |
677 | workarea_end = workarea_start + workarea_len; | 824 | workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE); |
678 | 825 | ||
679 | /* | 826 | /* |
680 | * Set the address to the start of where newly created pagetable | 827 | * Set the address to the start of where newly created pagetable |
@@ -683,45 +830,30 @@ void __init sme_encrypt_kernel(void) | |||
683 | * pagetables and when the new encrypted and decrypted kernel | 830 | * pagetables and when the new encrypted and decrypted kernel |
684 | * mappings are populated. | 831 | * mappings are populated. |
685 | */ | 832 | */ |
686 | pgtable_area = (void *)execute_end; | 833 | ppd.pgtable_area = (void *)execute_end; |
687 | 834 | ||
688 | /* | 835 | /* |
689 | * Make sure the current pagetable structure has entries for | 836 | * Make sure the current pagetable structure has entries for |
690 | * addressing the workarea. | 837 | * addressing the workarea. |
691 | */ | 838 | */ |
692 | pgd = (pgd_t *)native_read_cr3_pa(); | 839 | ppd.pgd = (pgd_t *)native_read_cr3_pa(); |
693 | paddr = workarea_start; | 840 | ppd.paddr = workarea_start; |
694 | while (paddr < workarea_end) { | 841 | ppd.vaddr = workarea_start; |
695 | pgtable_area = sme_populate_pgd(pgd, pgtable_area, | 842 | ppd.vaddr_end = workarea_end; |
696 | paddr, | 843 | sme_map_range_decrypted(&ppd); |
697 | paddr + PMD_FLAGS); | ||
698 | |||
699 | paddr += PMD_PAGE_SIZE; | ||
700 | } | ||
701 | 844 | ||
702 | /* Flush the TLB - no globals so cr3 is enough */ | 845 | /* Flush the TLB - no globals so cr3 is enough */ |
703 | native_write_cr3(__native_read_cr3()); | 846 | native_write_cr3(__native_read_cr3()); |
704 | 847 | ||
705 | /* | 848 | /* |
706 | * A new pagetable structure is being built to allow for the kernel | 849 | * A new pagetable structure is being built to allow for the kernel |
707 | * to be encrypted. It starts with an empty PGD that will then be | 850 | * and initrd to be encrypted. It starts with an empty PGD that will |
708 | * populated with new PUDs and PMDs as the encrypted and decrypted | 851 | * then be populated with new PUDs and PMDs as the encrypted and |
709 | * kernel mappings are created. | 852 | * decrypted kernel mappings are created. |
710 | */ | 853 | */ |
711 | pgd = pgtable_area; | 854 | ppd.pgd = ppd.pgtable_area; |
712 | memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD); | 855 | memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD); |
713 | pgtable_area += sizeof(*pgd) * PTRS_PER_PGD; | 856 | ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD; |
714 | |||
715 | /* Add encrypted kernel (identity) mappings */ | ||
716 | pmd_flags = PMD_FLAGS | _PAGE_ENC; | ||
717 | paddr = kernel_start; | ||
718 | while (paddr < kernel_end) { | ||
719 | pgtable_area = sme_populate_pgd(pgd, pgtable_area, | ||
720 | paddr, | ||
721 | paddr + pmd_flags); | ||
722 | |||
723 | paddr += PMD_PAGE_SIZE; | ||
724 | } | ||
725 | 857 | ||
726 | /* | 858 | /* |
727 | * A different PGD index/entry must be used to get different | 859 | * A different PGD index/entry must be used to get different |
@@ -730,47 +862,79 @@ void __init sme_encrypt_kernel(void) | |||
730 | * the base of the mapping. | 862 | * the base of the mapping. |
731 | */ | 863 | */ |
732 | decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1); | 864 | decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1); |
865 | if (initrd_len) { | ||
866 | unsigned long check_base; | ||
867 | |||
868 | check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1); | ||
869 | decrypted_base = max(decrypted_base, check_base); | ||
870 | } | ||
733 | decrypted_base <<= PGDIR_SHIFT; | 871 | decrypted_base <<= PGDIR_SHIFT; |
734 | 872 | ||
873 | /* Add encrypted kernel (identity) mappings */ | ||
874 | ppd.paddr = kernel_start; | ||
875 | ppd.vaddr = kernel_start; | ||
876 | ppd.vaddr_end = kernel_end; | ||
877 | sme_map_range_encrypted(&ppd); | ||
878 | |||
735 | /* Add decrypted, write-protected kernel (non-identity) mappings */ | 879 | /* Add decrypted, write-protected kernel (non-identity) mappings */ |
736 | pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT); | 880 | ppd.paddr = kernel_start; |
737 | paddr = kernel_start; | 881 | ppd.vaddr = kernel_start + decrypted_base; |
738 | while (paddr < kernel_end) { | 882 | ppd.vaddr_end = kernel_end + decrypted_base; |
739 | pgtable_area = sme_populate_pgd(pgd, pgtable_area, | 883 | sme_map_range_decrypted_wp(&ppd); |
740 | paddr + decrypted_base, | 884 | |
741 | paddr + pmd_flags); | 885 | if (initrd_len) { |
742 | 886 | /* Add encrypted initrd (identity) mappings */ | |
743 | paddr += PMD_PAGE_SIZE; | 887 | ppd.paddr = initrd_start; |
888 | ppd.vaddr = initrd_start; | ||
889 | ppd.vaddr_end = initrd_end; | ||
890 | sme_map_range_encrypted(&ppd); | ||
891 | /* | ||
892 | * Add decrypted, write-protected initrd (non-identity) mappings | ||
893 | */ | ||
894 | ppd.paddr = initrd_start; | ||
895 | ppd.vaddr = initrd_start + decrypted_base; | ||
896 | ppd.vaddr_end = initrd_end + decrypted_base; | ||
897 | sme_map_range_decrypted_wp(&ppd); | ||
744 | } | 898 | } |
745 | 899 | ||
746 | /* Add decrypted workarea mappings to both kernel mappings */ | 900 | /* Add decrypted workarea mappings to both kernel mappings */ |
747 | paddr = workarea_start; | 901 | ppd.paddr = workarea_start; |
748 | while (paddr < workarea_end) { | 902 | ppd.vaddr = workarea_start; |
749 | pgtable_area = sme_populate_pgd(pgd, pgtable_area, | 903 | ppd.vaddr_end = workarea_end; |
750 | paddr, | 904 | sme_map_range_decrypted(&ppd); |
751 | paddr + PMD_FLAGS); | ||
752 | 905 | ||
753 | pgtable_area = sme_populate_pgd(pgd, pgtable_area, | 906 | ppd.paddr = workarea_start; |
754 | paddr + decrypted_base, | 907 | ppd.vaddr = workarea_start + decrypted_base; |
755 | paddr + PMD_FLAGS); | 908 | ppd.vaddr_end = workarea_end + decrypted_base; |
756 | 909 | sme_map_range_decrypted(&ppd); | |
757 | paddr += PMD_PAGE_SIZE; | ||
758 | } | ||
759 | 910 | ||
760 | /* Perform the encryption */ | 911 | /* Perform the encryption */ |
761 | sme_encrypt_execute(kernel_start, kernel_start + decrypted_base, | 912 | sme_encrypt_execute(kernel_start, kernel_start + decrypted_base, |
762 | kernel_len, workarea_start, (unsigned long)pgd); | 913 | kernel_len, workarea_start, (unsigned long)ppd.pgd); |
914 | |||
915 | if (initrd_len) | ||
916 | sme_encrypt_execute(initrd_start, initrd_start + decrypted_base, | ||
917 | initrd_len, workarea_start, | ||
918 | (unsigned long)ppd.pgd); | ||
763 | 919 | ||
764 | /* | 920 | /* |
765 | * At this point we are running encrypted. Remove the mappings for | 921 | * At this point we are running encrypted. Remove the mappings for |
766 | * the decrypted areas - all that is needed for this is to remove | 922 | * the decrypted areas - all that is needed for this is to remove |
767 | * the PGD entry/entries. | 923 | * the PGD entry/entries. |
768 | */ | 924 | */ |
769 | sme_clear_pgd(pgd, kernel_start + decrypted_base, | 925 | ppd.vaddr = kernel_start + decrypted_base; |
770 | kernel_end + decrypted_base); | 926 | ppd.vaddr_end = kernel_end + decrypted_base; |
927 | sme_clear_pgd(&ppd); | ||
928 | |||
929 | if (initrd_len) { | ||
930 | ppd.vaddr = initrd_start + decrypted_base; | ||
931 | ppd.vaddr_end = initrd_end + decrypted_base; | ||
932 | sme_clear_pgd(&ppd); | ||
933 | } | ||
771 | 934 | ||
772 | sme_clear_pgd(pgd, workarea_start + decrypted_base, | 935 | ppd.vaddr = workarea_start + decrypted_base; |
773 | workarea_end + decrypted_base); | 936 | ppd.vaddr_end = workarea_end + decrypted_base; |
937 | sme_clear_pgd(&ppd); | ||
774 | 938 | ||
775 | /* Flush the TLB - no globals so cr3 is enough */ | 939 | /* Flush the TLB - no globals so cr3 is enough */ |
776 | native_write_cr3(__native_read_cr3()); | 940 | native_write_cr3(__native_read_cr3()); |
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 730e6d541df1..01f682cf77a8 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S | |||
@@ -22,9 +22,9 @@ ENTRY(sme_encrypt_execute) | |||
22 | 22 | ||
23 | /* | 23 | /* |
24 | * Entry parameters: | 24 | * Entry parameters: |
25 | * RDI - virtual address for the encrypted kernel mapping | 25 | * RDI - virtual address for the encrypted mapping |
26 | * RSI - virtual address for the decrypted kernel mapping | 26 | * RSI - virtual address for the decrypted mapping |
27 | * RDX - length of kernel | 27 | * RDX - length to encrypt |
28 | * RCX - virtual address of the encryption workarea, including: | 28 | * RCX - virtual address of the encryption workarea, including: |
29 | * - stack page (PAGE_SIZE) | 29 | * - stack page (PAGE_SIZE) |
30 | * - encryption routine page (PAGE_SIZE) | 30 | * - encryption routine page (PAGE_SIZE) |
@@ -41,9 +41,9 @@ ENTRY(sme_encrypt_execute) | |||
41 | addq $PAGE_SIZE, %rax /* Workarea encryption routine */ | 41 | addq $PAGE_SIZE, %rax /* Workarea encryption routine */ |
42 | 42 | ||
43 | push %r12 | 43 | push %r12 |
44 | movq %rdi, %r10 /* Encrypted kernel */ | 44 | movq %rdi, %r10 /* Encrypted area */ |
45 | movq %rsi, %r11 /* Decrypted kernel */ | 45 | movq %rsi, %r11 /* Decrypted area */ |
46 | movq %rdx, %r12 /* Kernel length */ | 46 | movq %rdx, %r12 /* Area length */ |
47 | 47 | ||
48 | /* Copy encryption routine into the workarea */ | 48 | /* Copy encryption routine into the workarea */ |
49 | movq %rax, %rdi /* Workarea encryption routine */ | 49 | movq %rax, %rdi /* Workarea encryption routine */ |
@@ -52,10 +52,10 @@ ENTRY(sme_encrypt_execute) | |||
52 | rep movsb | 52 | rep movsb |
53 | 53 | ||
54 | /* Setup registers for call */ | 54 | /* Setup registers for call */ |
55 | movq %r10, %rdi /* Encrypted kernel */ | 55 | movq %r10, %rdi /* Encrypted area */ |
56 | movq %r11, %rsi /* Decrypted kernel */ | 56 | movq %r11, %rsi /* Decrypted area */ |
57 | movq %r8, %rdx /* Pagetables used for encryption */ | 57 | movq %r8, %rdx /* Pagetables used for encryption */ |
58 | movq %r12, %rcx /* Kernel length */ | 58 | movq %r12, %rcx /* Area length */ |
59 | movq %rax, %r8 /* Workarea encryption routine */ | 59 | movq %rax, %r8 /* Workarea encryption routine */ |
60 | addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */ | 60 | addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */ |
61 | 61 | ||
@@ -71,7 +71,7 @@ ENDPROC(sme_encrypt_execute) | |||
71 | 71 | ||
72 | ENTRY(__enc_copy) | 72 | ENTRY(__enc_copy) |
73 | /* | 73 | /* |
74 | * Routine used to encrypt kernel. | 74 | * Routine used to encrypt memory in place. |
75 | * This routine must be run outside of the kernel proper since | 75 | * This routine must be run outside of the kernel proper since |
76 | * the kernel will be encrypted during the process. So this | 76 | * the kernel will be encrypted during the process. So this |
77 | * routine is defined here and then copied to an area outside | 77 | * routine is defined here and then copied to an area outside |
@@ -79,19 +79,19 @@ ENTRY(__enc_copy) | |||
79 | * during execution. | 79 | * during execution. |
80 | * | 80 | * |
81 | * On entry the registers must be: | 81 | * On entry the registers must be: |
82 | * RDI - virtual address for the encrypted kernel mapping | 82 | * RDI - virtual address for the encrypted mapping |
83 | * RSI - virtual address for the decrypted kernel mapping | 83 | * RSI - virtual address for the decrypted mapping |
84 | * RDX - address of the pagetables to use for encryption | 84 | * RDX - address of the pagetables to use for encryption |
85 | * RCX - length of kernel | 85 | * RCX - length of area |
86 | * R8 - intermediate copy buffer | 86 | * R8 - intermediate copy buffer |
87 | * | 87 | * |
88 | * RAX - points to this routine | 88 | * RAX - points to this routine |
89 | * | 89 | * |
90 | * The kernel will be encrypted by copying from the non-encrypted | 90 | * The area will be encrypted by copying from the non-encrypted |
91 | * kernel space to an intermediate buffer and then copying from the | 91 | * memory space to an intermediate buffer and then copying from the |
92 | * intermediate buffer back to the encrypted kernel space. The physical | 92 | * intermediate buffer back to the encrypted memory space. The physical |
93 | * addresses of the two kernel space mappings are the same which | 93 | * addresses of the two mappings are the same which results in the area |
94 | * results in the kernel being encrypted "in place". | 94 | * being encrypted "in place". |
95 | */ | 95 | */ |
96 | /* Enable the new page tables */ | 96 | /* Enable the new page tables */ |
97 | mov %rdx, %cr3 | 97 | mov %rdx, %cr3 |
@@ -103,47 +103,55 @@ ENTRY(__enc_copy) | |||
103 | orq $X86_CR4_PGE, %rdx | 103 | orq $X86_CR4_PGE, %rdx |
104 | mov %rdx, %cr4 | 104 | mov %rdx, %cr4 |
105 | 105 | ||
106 | push %r15 | ||
107 | push %r12 | ||
108 | |||
109 | movq %rcx, %r9 /* Save area length */ | ||
110 | movq %rdi, %r10 /* Save encrypted area address */ | ||
111 | movq %rsi, %r11 /* Save decrypted area address */ | ||
112 | |||
106 | /* Set the PAT register PA5 entry to write-protect */ | 113 | /* Set the PAT register PA5 entry to write-protect */ |
107 | push %rcx | ||
108 | movl $MSR_IA32_CR_PAT, %ecx | 114 | movl $MSR_IA32_CR_PAT, %ecx |
109 | rdmsr | 115 | rdmsr |
110 | push %rdx /* Save original PAT value */ | 116 | mov %rdx, %r15 /* Save original PAT value */ |
111 | andl $0xffff00ff, %edx /* Clear PA5 */ | 117 | andl $0xffff00ff, %edx /* Clear PA5 */ |
112 | orl $0x00000500, %edx /* Set PA5 to WP */ | 118 | orl $0x00000500, %edx /* Set PA5 to WP */ |
113 | wrmsr | 119 | wrmsr |
114 | pop %rdx /* RDX contains original PAT value */ | ||
115 | pop %rcx | ||
116 | |||
117 | movq %rcx, %r9 /* Save kernel length */ | ||
118 | movq %rdi, %r10 /* Save encrypted kernel address */ | ||
119 | movq %rsi, %r11 /* Save decrypted kernel address */ | ||
120 | 120 | ||
121 | wbinvd /* Invalidate any cache entries */ | 121 | wbinvd /* Invalidate any cache entries */ |
122 | 122 | ||
123 | /* Copy/encrypt 2MB at a time */ | 123 | /* Copy/encrypt up to 2MB at a time */ |
124 | movq $PMD_PAGE_SIZE, %r12 | ||
124 | 1: | 125 | 1: |
125 | movq %r11, %rsi /* Source - decrypted kernel */ | 126 | cmpq %r12, %r9 |
127 | jnb 2f | ||
128 | movq %r9, %r12 | ||
129 | |||
130 | 2: | ||
131 | movq %r11, %rsi /* Source - decrypted area */ | ||
126 | movq %r8, %rdi /* Dest - intermediate copy buffer */ | 132 | movq %r8, %rdi /* Dest - intermediate copy buffer */ |
127 | movq $PMD_PAGE_SIZE, %rcx /* 2MB length */ | 133 | movq %r12, %rcx |
128 | rep movsb | 134 | rep movsb |
129 | 135 | ||
130 | movq %r8, %rsi /* Source - intermediate copy buffer */ | 136 | movq %r8, %rsi /* Source - intermediate copy buffer */ |
131 | movq %r10, %rdi /* Dest - encrypted kernel */ | 137 | movq %r10, %rdi /* Dest - encrypted area */ |
132 | movq $PMD_PAGE_SIZE, %rcx /* 2MB length */ | 138 | movq %r12, %rcx |
133 | rep movsb | 139 | rep movsb |
134 | 140 | ||
135 | addq $PMD_PAGE_SIZE, %r11 | 141 | addq %r12, %r11 |
136 | addq $PMD_PAGE_SIZE, %r10 | 142 | addq %r12, %r10 |
137 | subq $PMD_PAGE_SIZE, %r9 /* Kernel length decrement */ | 143 | subq %r12, %r9 /* Kernel length decrement */ |
138 | jnz 1b /* Kernel length not zero? */ | 144 | jnz 1b /* Kernel length not zero? */ |
139 | 145 | ||
140 | /* Restore PAT register */ | 146 | /* Restore PAT register */ |
141 | push %rdx /* Save original PAT value */ | ||
142 | movl $MSR_IA32_CR_PAT, %ecx | 147 | movl $MSR_IA32_CR_PAT, %ecx |
143 | rdmsr | 148 | rdmsr |
144 | pop %rdx /* Restore original PAT value */ | 149 | mov %r15, %rdx /* Restore original PAT value */ |
145 | wrmsr | 150 | wrmsr |
146 | 151 | ||
152 | pop %r12 | ||
153 | pop %r15 | ||
154 | |||
147 | ret | 155 | ret |
148 | .L__enc_copy_end: | 156 | .L__enc_copy_end: |
149 | ENDPROC(__enc_copy) | 157 | ENDPROC(__enc_copy) |
diff --git a/drivers/gpio/gpio-mmio.c b/drivers/gpio/gpio-mmio.c index f9042bcc27a4..7b14d6280e44 100644 --- a/drivers/gpio/gpio-mmio.c +++ b/drivers/gpio/gpio-mmio.c | |||
@@ -152,14 +152,13 @@ static int bgpio_get_set_multiple(struct gpio_chip *gc, unsigned long *mask, | |||
152 | { | 152 | { |
153 | unsigned long get_mask = 0; | 153 | unsigned long get_mask = 0; |
154 | unsigned long set_mask = 0; | 154 | unsigned long set_mask = 0; |
155 | int bit = 0; | ||
156 | 155 | ||
157 | while ((bit = find_next_bit(mask, gc->ngpio, bit)) != gc->ngpio) { | 156 | /* Make sure we first clear any bits that are zero when we read the register */ |
158 | if (gc->bgpio_dir & BIT(bit)) | 157 | *bits &= ~*mask; |
159 | set_mask |= BIT(bit); | 158 | |
160 | else | 159 | /* Exploit the fact that we know which directions are set */ |
161 | get_mask |= BIT(bit); | 160 | set_mask = *mask & gc->bgpio_dir; |
162 | } | 161 | get_mask = *mask & ~gc->bgpio_dir; |
163 | 162 | ||
164 | if (set_mask) | 163 | if (set_mask) |
165 | *bits |= gc->read_reg(gc->reg_set) & set_mask; | 164 | *bits |= gc->read_reg(gc->reg_set) & set_mask; |
@@ -176,13 +175,13 @@ static int bgpio_get(struct gpio_chip *gc, unsigned int gpio) | |||
176 | 175 | ||
177 | /* | 176 | /* |
178 | * This only works if the bits in the GPIO register are in native endianness. | 177 | * This only works if the bits in the GPIO register are in native endianness. |
179 | * It is dirt simple and fast in this case. (Also the most common case.) | ||
180 | */ | 178 | */ |
181 | static int bgpio_get_multiple(struct gpio_chip *gc, unsigned long *mask, | 179 | static int bgpio_get_multiple(struct gpio_chip *gc, unsigned long *mask, |
182 | unsigned long *bits) | 180 | unsigned long *bits) |
183 | { | 181 | { |
184 | 182 | /* Make sure we first clear any bits that are zero when we read the register */ | |
185 | *bits = gc->read_reg(gc->reg_dat) & *mask; | 183 | *bits &= ~*mask; |
184 | *bits |= gc->read_reg(gc->reg_dat) & *mask; | ||
186 | return 0; | 185 | return 0; |
187 | } | 186 | } |
188 | 187 | ||
@@ -196,9 +195,12 @@ static int bgpio_get_multiple_be(struct gpio_chip *gc, unsigned long *mask, | |||
196 | unsigned long val; | 195 | unsigned long val; |
197 | int bit; | 196 | int bit; |
198 | 197 | ||
198 | /* Make sure we first clear any bits that are zero when we read the register */ | ||
199 | *bits &= ~*mask; | ||
200 | |||
199 | /* Create a mirrored mask */ | 201 | /* Create a mirrored mask */ |
200 | bit = 0; | 202 | bit = -1; |
201 | while ((bit = find_next_bit(mask, gc->ngpio, bit)) != gc->ngpio) | 203 | while ((bit = find_next_bit(mask, gc->ngpio, bit + 1)) < gc->ngpio) |
202 | readmask |= bgpio_line2mask(gc, bit); | 204 | readmask |= bgpio_line2mask(gc, bit); |
203 | 205 | ||
204 | /* Read the register */ | 206 | /* Read the register */ |
@@ -208,8 +210,8 @@ static int bgpio_get_multiple_be(struct gpio_chip *gc, unsigned long *mask, | |||
208 | * Mirror the result into the "bits" result, this will give line 0 | 210 | * Mirror the result into the "bits" result, this will give line 0 |
209 | * in bit 0 ... line 31 in bit 31 for a 32bit register. | 211 | * in bit 0 ... line 31 in bit 31 for a 32bit register. |
210 | */ | 212 | */ |
211 | bit = 0; | 213 | bit = -1; |
212 | while ((bit = find_next_bit(&val, gc->ngpio, bit)) != gc->ngpio) | 214 | while ((bit = find_next_bit(&val, gc->ngpio, bit + 1)) < gc->ngpio) |
213 | *bits |= bgpio_line2mask(gc, bit); | 215 | *bits |= bgpio_line2mask(gc, bit); |
214 | 216 | ||
215 | return 0; | 217 | return 0; |
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 85140c9af581..8b941f814472 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c | |||
@@ -687,6 +687,20 @@ static inline void esdhc_pltfm_set_clock(struct sdhci_host *host, | |||
687 | return; | 687 | return; |
688 | } | 688 | } |
689 | 689 | ||
690 | /* For i.MX53 eSDHCv3, SYSCTL.SDCLKFS may not be set to 0. */ | ||
691 | if (is_imx53_esdhc(imx_data)) { | ||
692 | /* | ||
693 | * According to the i.MX53 reference manual, if DLLCTRL[10] can | ||
694 | * be set, then the controller is eSDHCv3, else it is eSDHCv2. | ||
695 | */ | ||
696 | val = readl(host->ioaddr + ESDHC_DLL_CTRL); | ||
697 | writel(val | BIT(10), host->ioaddr + ESDHC_DLL_CTRL); | ||
698 | temp = readl(host->ioaddr + ESDHC_DLL_CTRL); | ||
699 | writel(val, host->ioaddr + ESDHC_DLL_CTRL); | ||
700 | if (temp & BIT(10)) | ||
701 | pre_div = 2; | ||
702 | } | ||
703 | |||
690 | temp = sdhci_readl(host, ESDHC_SYSTEM_CONTROL); | 704 | temp = sdhci_readl(host, ESDHC_SYSTEM_CONTROL); |
691 | temp &= ~(ESDHC_CLOCK_IPGEN | ESDHC_CLOCK_HCKEN | ESDHC_CLOCK_PEREN | 705 | temp &= ~(ESDHC_CLOCK_IPGEN | ESDHC_CLOCK_HCKEN | ESDHC_CLOCK_PEREN |
692 | | ESDHC_CLOCK_MASK); | 706 | | ESDHC_CLOCK_MASK); |
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d53550e612bc..4276ebfff22b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c | |||
@@ -451,10 +451,13 @@ static void **nvme_pci_iod_list(struct request *req) | |||
451 | static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) | 451 | static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) |
452 | { | 452 | { |
453 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); | 453 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
454 | int nseg = blk_rq_nr_phys_segments(req); | ||
454 | unsigned int avg_seg_size; | 455 | unsigned int avg_seg_size; |
455 | 456 | ||
456 | avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), | 457 | if (nseg == 0) |
457 | blk_rq_nr_phys_segments(req)); | 458 | return false; |
459 | |||
460 | avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), nseg); | ||
458 | 461 | ||
459 | if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1)))) | 462 | if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1)))) |
460 | return false; | 463 | return false; |
@@ -722,20 +725,19 @@ static void nvme_pci_sgl_set_seg(struct nvme_sgl_desc *sge, | |||
722 | } | 725 | } |
723 | 726 | ||
724 | static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, | 727 | static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, |
725 | struct request *req, struct nvme_rw_command *cmd) | 728 | struct request *req, struct nvme_rw_command *cmd, int entries) |
726 | { | 729 | { |
727 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); | 730 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
728 | int length = blk_rq_payload_bytes(req); | ||
729 | struct dma_pool *pool; | 731 | struct dma_pool *pool; |
730 | struct nvme_sgl_desc *sg_list; | 732 | struct nvme_sgl_desc *sg_list; |
731 | struct scatterlist *sg = iod->sg; | 733 | struct scatterlist *sg = iod->sg; |
732 | int entries = iod->nents, i = 0; | ||
733 | dma_addr_t sgl_dma; | 734 | dma_addr_t sgl_dma; |
735 | int i = 0; | ||
734 | 736 | ||
735 | /* setting the transfer type as SGL */ | 737 | /* setting the transfer type as SGL */ |
736 | cmd->flags = NVME_CMD_SGL_METABUF; | 738 | cmd->flags = NVME_CMD_SGL_METABUF; |
737 | 739 | ||
738 | if (length == sg_dma_len(sg)) { | 740 | if (entries == 1) { |
739 | nvme_pci_sgl_set_data(&cmd->dptr.sgl, sg); | 741 | nvme_pci_sgl_set_data(&cmd->dptr.sgl, sg); |
740 | return BLK_STS_OK; | 742 | return BLK_STS_OK; |
741 | } | 743 | } |
@@ -775,13 +777,9 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, | |||
775 | } | 777 | } |
776 | 778 | ||
777 | nvme_pci_sgl_set_data(&sg_list[i++], sg); | 779 | nvme_pci_sgl_set_data(&sg_list[i++], sg); |
778 | |||
779 | length -= sg_dma_len(sg); | ||
780 | sg = sg_next(sg); | 780 | sg = sg_next(sg); |
781 | entries--; | 781 | } while (--entries > 0); |
782 | } while (length > 0); | ||
783 | 782 | ||
784 | WARN_ON(entries > 0); | ||
785 | return BLK_STS_OK; | 783 | return BLK_STS_OK; |
786 | } | 784 | } |
787 | 785 | ||
@@ -793,6 +791,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, | |||
793 | enum dma_data_direction dma_dir = rq_data_dir(req) ? | 791 | enum dma_data_direction dma_dir = rq_data_dir(req) ? |
794 | DMA_TO_DEVICE : DMA_FROM_DEVICE; | 792 | DMA_TO_DEVICE : DMA_FROM_DEVICE; |
795 | blk_status_t ret = BLK_STS_IOERR; | 793 | blk_status_t ret = BLK_STS_IOERR; |
794 | int nr_mapped; | ||
796 | 795 | ||
797 | sg_init_table(iod->sg, blk_rq_nr_phys_segments(req)); | 796 | sg_init_table(iod->sg, blk_rq_nr_phys_segments(req)); |
798 | iod->nents = blk_rq_map_sg(q, req, iod->sg); | 797 | iod->nents = blk_rq_map_sg(q, req, iod->sg); |
@@ -800,12 +799,13 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, | |||
800 | goto out; | 799 | goto out; |
801 | 800 | ||
802 | ret = BLK_STS_RESOURCE; | 801 | ret = BLK_STS_RESOURCE; |
803 | if (!dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir, | 802 | nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir, |
804 | DMA_ATTR_NO_WARN)) | 803 | DMA_ATTR_NO_WARN); |
804 | if (!nr_mapped) | ||
805 | goto out; | 805 | goto out; |
806 | 806 | ||
807 | if (iod->use_sgl) | 807 | if (iod->use_sgl) |
808 | ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw); | 808 | ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped); |
809 | else | 809 | else |
810 | ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); | 810 | ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); |
811 | 811 | ||
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 4178d2493547..5e335b6203f4 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h | |||
@@ -71,7 +71,7 @@ extern void delayacct_init(void); | |||
71 | extern void __delayacct_tsk_init(struct task_struct *); | 71 | extern void __delayacct_tsk_init(struct task_struct *); |
72 | extern void __delayacct_tsk_exit(struct task_struct *); | 72 | extern void __delayacct_tsk_exit(struct task_struct *); |
73 | extern void __delayacct_blkio_start(void); | 73 | extern void __delayacct_blkio_start(void); |
74 | extern void __delayacct_blkio_end(void); | 74 | extern void __delayacct_blkio_end(struct task_struct *); |
75 | extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *); | 75 | extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *); |
76 | extern __u64 __delayacct_blkio_ticks(struct task_struct *); | 76 | extern __u64 __delayacct_blkio_ticks(struct task_struct *); |
77 | extern void __delayacct_freepages_start(void); | 77 | extern void __delayacct_freepages_start(void); |
@@ -122,10 +122,10 @@ static inline void delayacct_blkio_start(void) | |||
122 | __delayacct_blkio_start(); | 122 | __delayacct_blkio_start(); |
123 | } | 123 | } |
124 | 124 | ||
125 | static inline void delayacct_blkio_end(void) | 125 | static inline void delayacct_blkio_end(struct task_struct *p) |
126 | { | 126 | { |
127 | if (current->delays) | 127 | if (current->delays) |
128 | __delayacct_blkio_end(); | 128 | __delayacct_blkio_end(p); |
129 | delayacct_clear_flag(DELAYACCT_PF_BLKIO); | 129 | delayacct_clear_flag(DELAYACCT_PF_BLKIO); |
130 | } | 130 | } |
131 | 131 | ||
@@ -169,7 +169,7 @@ static inline void delayacct_tsk_free(struct task_struct *tsk) | |||
169 | {} | 169 | {} |
170 | static inline void delayacct_blkio_start(void) | 170 | static inline void delayacct_blkio_start(void) |
171 | {} | 171 | {} |
172 | static inline void delayacct_blkio_end(void) | 172 | static inline void delayacct_blkio_end(struct task_struct *p) |
173 | {} | 173 | {} |
174 | static inline int delayacct_add_tsk(struct taskstats *d, | 174 | static inline int delayacct_add_tsk(struct taskstats *d, |
175 | struct task_struct *tsk) | 175 | struct task_struct *tsk) |
diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h index bae807eb2933..853291714ae0 100644 --- a/include/linux/vermagic.h +++ b/include/linux/vermagic.h | |||
@@ -31,11 +31,17 @@ | |||
31 | #else | 31 | #else |
32 | #define MODULE_RANDSTRUCT_PLUGIN | 32 | #define MODULE_RANDSTRUCT_PLUGIN |
33 | #endif | 33 | #endif |
34 | #ifdef RETPOLINE | ||
35 | #define MODULE_VERMAGIC_RETPOLINE "retpoline " | ||
36 | #else | ||
37 | #define MODULE_VERMAGIC_RETPOLINE "" | ||
38 | #endif | ||
34 | 39 | ||
35 | #define VERMAGIC_STRING \ | 40 | #define VERMAGIC_STRING \ |
36 | UTS_RELEASE " " \ | 41 | UTS_RELEASE " " \ |
37 | MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \ | 42 | MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \ |
38 | MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS \ | 43 | MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS \ |
39 | MODULE_ARCH_VERMAGIC \ | 44 | MODULE_ARCH_VERMAGIC \ |
40 | MODULE_RANDSTRUCT_PLUGIN | 45 | MODULE_RANDSTRUCT_PLUGIN \ |
46 | MODULE_VERMAGIC_RETPOLINE | ||
41 | 47 | ||
diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 4a1c33416b6a..e2764d767f18 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c | |||
@@ -51,16 +51,16 @@ void __delayacct_tsk_init(struct task_struct *tsk) | |||
51 | * Finish delay accounting for a statistic using its timestamps (@start), | 51 | * Finish delay accounting for a statistic using its timestamps (@start), |
52 | * accumalator (@total) and @count | 52 | * accumalator (@total) and @count |
53 | */ | 53 | */ |
54 | static void delayacct_end(u64 *start, u64 *total, u32 *count) | 54 | static void delayacct_end(spinlock_t *lock, u64 *start, u64 *total, u32 *count) |
55 | { | 55 | { |
56 | s64 ns = ktime_get_ns() - *start; | 56 | s64 ns = ktime_get_ns() - *start; |
57 | unsigned long flags; | 57 | unsigned long flags; |
58 | 58 | ||
59 | if (ns > 0) { | 59 | if (ns > 0) { |
60 | spin_lock_irqsave(¤t->delays->lock, flags); | 60 | spin_lock_irqsave(lock, flags); |
61 | *total += ns; | 61 | *total += ns; |
62 | (*count)++; | 62 | (*count)++; |
63 | spin_unlock_irqrestore(¤t->delays->lock, flags); | 63 | spin_unlock_irqrestore(lock, flags); |
64 | } | 64 | } |
65 | } | 65 | } |
66 | 66 | ||
@@ -69,17 +69,25 @@ void __delayacct_blkio_start(void) | |||
69 | current->delays->blkio_start = ktime_get_ns(); | 69 | current->delays->blkio_start = ktime_get_ns(); |
70 | } | 70 | } |
71 | 71 | ||
72 | void __delayacct_blkio_end(void) | 72 | /* |
73 | * We cannot rely on the `current` macro, as we haven't yet switched back to | ||
74 | * the process being woken. | ||
75 | */ | ||
76 | void __delayacct_blkio_end(struct task_struct *p) | ||
73 | { | 77 | { |
74 | if (current->delays->flags & DELAYACCT_PF_SWAPIN) | 78 | struct task_delay_info *delays = p->delays; |
75 | /* Swapin block I/O */ | 79 | u64 *total; |
76 | delayacct_end(¤t->delays->blkio_start, | 80 | u32 *count; |
77 | ¤t->delays->swapin_delay, | 81 | |
78 | ¤t->delays->swapin_count); | 82 | if (p->delays->flags & DELAYACCT_PF_SWAPIN) { |
79 | else /* Other block I/O */ | 83 | total = &delays->swapin_delay; |
80 | delayacct_end(¤t->delays->blkio_start, | 84 | count = &delays->swapin_count; |
81 | ¤t->delays->blkio_delay, | 85 | } else { |
82 | ¤t->delays->blkio_count); | 86 | total = &delays->blkio_delay; |
87 | count = &delays->blkio_count; | ||
88 | } | ||
89 | |||
90 | delayacct_end(&delays->lock, &delays->blkio_start, total, count); | ||
83 | } | 91 | } |
84 | 92 | ||
85 | int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) | 93 | int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) |
@@ -153,8 +161,10 @@ void __delayacct_freepages_start(void) | |||
153 | 161 | ||
154 | void __delayacct_freepages_end(void) | 162 | void __delayacct_freepages_end(void) |
155 | { | 163 | { |
156 | delayacct_end(¤t->delays->freepages_start, | 164 | delayacct_end( |
157 | ¤t->delays->freepages_delay, | 165 | ¤t->delays->lock, |
158 | ¤t->delays->freepages_count); | 166 | ¤t->delays->freepages_start, |
167 | ¤t->delays->freepages_delay, | ||
168 | ¤t->delays->freepages_count); | ||
159 | } | 169 | } |
160 | 170 | ||
diff --git a/kernel/futex.c b/kernel/futex.c index 57d0b3657e16..8c5424dd5924 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -1878,6 +1878,9 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, | |||
1878 | struct futex_q *this, *next; | 1878 | struct futex_q *this, *next; |
1879 | DEFINE_WAKE_Q(wake_q); | 1879 | DEFINE_WAKE_Q(wake_q); |
1880 | 1880 | ||
1881 | if (nr_wake < 0 || nr_requeue < 0) | ||
1882 | return -EINVAL; | ||
1883 | |||
1881 | /* | 1884 | /* |
1882 | * When PI not supported: return -ENOSYS if requeue_pi is true, | 1885 | * When PI not supported: return -ENOSYS if requeue_pi is true, |
1883 | * consequently the compiler knows requeue_pi is always false past | 1886 | * consequently the compiler knows requeue_pi is always false past |
@@ -2294,21 +2297,17 @@ static void unqueue_me_pi(struct futex_q *q) | |||
2294 | spin_unlock(q->lock_ptr); | 2297 | spin_unlock(q->lock_ptr); |
2295 | } | 2298 | } |
2296 | 2299 | ||
2297 | /* | ||
2298 | * Fixup the pi_state owner with the new owner. | ||
2299 | * | ||
2300 | * Must be called with hash bucket lock held and mm->sem held for non | ||
2301 | * private futexes. | ||
2302 | */ | ||
2303 | static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | 2300 | static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, |
2304 | struct task_struct *newowner) | 2301 | struct task_struct *argowner) |
2305 | { | 2302 | { |
2306 | u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; | ||
2307 | struct futex_pi_state *pi_state = q->pi_state; | 2303 | struct futex_pi_state *pi_state = q->pi_state; |
2308 | u32 uval, uninitialized_var(curval), newval; | 2304 | u32 uval, uninitialized_var(curval), newval; |
2309 | struct task_struct *oldowner; | 2305 | struct task_struct *oldowner, *newowner; |
2306 | u32 newtid; | ||
2310 | int ret; | 2307 | int ret; |
2311 | 2308 | ||
2309 | lockdep_assert_held(q->lock_ptr); | ||
2310 | |||
2312 | raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); | 2311 | raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); |
2313 | 2312 | ||
2314 | oldowner = pi_state->owner; | 2313 | oldowner = pi_state->owner; |
@@ -2317,11 +2316,17 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | |||
2317 | newtid |= FUTEX_OWNER_DIED; | 2316 | newtid |= FUTEX_OWNER_DIED; |
2318 | 2317 | ||
2319 | /* | 2318 | /* |
2320 | * We are here either because we stole the rtmutex from the | 2319 | * We are here because either: |
2321 | * previous highest priority waiter or we are the highest priority | 2320 | * |
2322 | * waiter but have failed to get the rtmutex the first time. | 2321 | * - we stole the lock and pi_state->owner needs updating to reflect |
2322 | * that (@argowner == current), | ||
2323 | * | ||
2324 | * or: | ||
2325 | * | ||
2326 | * - someone stole our lock and we need to fix things to point to the | ||
2327 | * new owner (@argowner == NULL). | ||
2323 | * | 2328 | * |
2324 | * We have to replace the newowner TID in the user space variable. | 2329 | * Either way, we have to replace the TID in the user space variable. |
2325 | * This must be atomic as we have to preserve the owner died bit here. | 2330 | * This must be atomic as we have to preserve the owner died bit here. |
2326 | * | 2331 | * |
2327 | * Note: We write the user space value _before_ changing the pi_state | 2332 | * Note: We write the user space value _before_ changing the pi_state |
@@ -2334,6 +2339,42 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | |||
2334 | * in the PID check in lookup_pi_state. | 2339 | * in the PID check in lookup_pi_state. |
2335 | */ | 2340 | */ |
2336 | retry: | 2341 | retry: |
2342 | if (!argowner) { | ||
2343 | if (oldowner != current) { | ||
2344 | /* | ||
2345 | * We raced against a concurrent self; things are | ||
2346 | * already fixed up. Nothing to do. | ||
2347 | */ | ||
2348 | ret = 0; | ||
2349 | goto out_unlock; | ||
2350 | } | ||
2351 | |||
2352 | if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { | ||
2353 | /* We got the lock after all, nothing to fix. */ | ||
2354 | ret = 0; | ||
2355 | goto out_unlock; | ||
2356 | } | ||
2357 | |||
2358 | /* | ||
2359 | * Since we just failed the trylock; there must be an owner. | ||
2360 | */ | ||
2361 | newowner = rt_mutex_owner(&pi_state->pi_mutex); | ||
2362 | BUG_ON(!newowner); | ||
2363 | } else { | ||
2364 | WARN_ON_ONCE(argowner != current); | ||
2365 | if (oldowner == current) { | ||
2366 | /* | ||
2367 | * We raced against a concurrent self; things are | ||
2368 | * already fixed up. Nothing to do. | ||
2369 | */ | ||
2370 | ret = 0; | ||
2371 | goto out_unlock; | ||
2372 | } | ||
2373 | newowner = argowner; | ||
2374 | } | ||
2375 | |||
2376 | newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; | ||
2377 | |||
2337 | if (get_futex_value_locked(&uval, uaddr)) | 2378 | if (get_futex_value_locked(&uval, uaddr)) |
2338 | goto handle_fault; | 2379 | goto handle_fault; |
2339 | 2380 | ||
@@ -2434,9 +2475,9 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) | |||
2434 | * Got the lock. We might not be the anticipated owner if we | 2475 | * Got the lock. We might not be the anticipated owner if we |
2435 | * did a lock-steal - fix up the PI-state in that case: | 2476 | * did a lock-steal - fix up the PI-state in that case: |
2436 | * | 2477 | * |
2437 | * We can safely read pi_state->owner without holding wait_lock | 2478 | * Speculative pi_state->owner read (we don't hold wait_lock); |
2438 | * because we now own the rt_mutex, only the owner will attempt | 2479 | * since we own the lock pi_state->owner == current is the |
2439 | * to change it. | 2480 | * stable state, anything else needs more attention. |
2440 | */ | 2481 | */ |
2441 | if (q->pi_state->owner != current) | 2482 | if (q->pi_state->owner != current) |
2442 | ret = fixup_pi_state_owner(uaddr, q, current); | 2483 | ret = fixup_pi_state_owner(uaddr, q, current); |
@@ -2444,6 +2485,19 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) | |||
2444 | } | 2485 | } |
2445 | 2486 | ||
2446 | /* | 2487 | /* |
2488 | * If we didn't get the lock; check if anybody stole it from us. In | ||
2489 | * that case, we need to fix up the uval to point to them instead of | ||
2490 | * us, otherwise bad things happen. [10] | ||
2491 | * | ||
2492 | * Another speculative read; pi_state->owner == current is unstable | ||
2493 | * but needs our attention. | ||
2494 | */ | ||
2495 | if (q->pi_state->owner == current) { | ||
2496 | ret = fixup_pi_state_owner(uaddr, q, NULL); | ||
2497 | goto out; | ||
2498 | } | ||
2499 | |||
2500 | /* | ||
2447 | * Paranoia check. If we did not take the lock, then we should not be | 2501 | * Paranoia check. If we did not take the lock, then we should not be |
2448 | * the owner of the rt_mutex. | 2502 | * the owner of the rt_mutex. |
2449 | */ | 2503 | */ |
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 6f3dba6e4e9e..65cc0cb984e6 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c | |||
@@ -1290,6 +1290,19 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
1290 | return ret; | 1290 | return ret; |
1291 | } | 1291 | } |
1292 | 1292 | ||
1293 | static inline int __rt_mutex_slowtrylock(struct rt_mutex *lock) | ||
1294 | { | ||
1295 | int ret = try_to_take_rt_mutex(lock, current, NULL); | ||
1296 | |||
1297 | /* | ||
1298 | * try_to_take_rt_mutex() sets the lock waiters bit | ||
1299 | * unconditionally. Clean this up. | ||
1300 | */ | ||
1301 | fixup_rt_mutex_waiters(lock); | ||
1302 | |||
1303 | return ret; | ||
1304 | } | ||
1305 | |||
1293 | /* | 1306 | /* |
1294 | * Slow path try-lock function: | 1307 | * Slow path try-lock function: |
1295 | */ | 1308 | */ |
@@ -1312,13 +1325,7 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) | |||
1312 | */ | 1325 | */ |
1313 | raw_spin_lock_irqsave(&lock->wait_lock, flags); | 1326 | raw_spin_lock_irqsave(&lock->wait_lock, flags); |
1314 | 1327 | ||
1315 | ret = try_to_take_rt_mutex(lock, current, NULL); | 1328 | ret = __rt_mutex_slowtrylock(lock); |
1316 | |||
1317 | /* | ||
1318 | * try_to_take_rt_mutex() sets the lock waiters bit | ||
1319 | * unconditionally. Clean this up. | ||
1320 | */ | ||
1321 | fixup_rt_mutex_waiters(lock); | ||
1322 | 1329 | ||
1323 | raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | 1330 | raw_spin_unlock_irqrestore(&lock->wait_lock, flags); |
1324 | 1331 | ||
@@ -1505,6 +1512,11 @@ int __sched rt_mutex_futex_trylock(struct rt_mutex *lock) | |||
1505 | return rt_mutex_slowtrylock(lock); | 1512 | return rt_mutex_slowtrylock(lock); |
1506 | } | 1513 | } |
1507 | 1514 | ||
1515 | int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock) | ||
1516 | { | ||
1517 | return __rt_mutex_slowtrylock(lock); | ||
1518 | } | ||
1519 | |||
1508 | /** | 1520 | /** |
1509 | * rt_mutex_timed_lock - lock a rt_mutex interruptible | 1521 | * rt_mutex_timed_lock - lock a rt_mutex interruptible |
1510 | * the timeout structure is provided | 1522 | * the timeout structure is provided |
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index 124e98ca0b17..68686b3ec3c1 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h | |||
@@ -148,6 +148,7 @@ extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, | |||
148 | struct rt_mutex_waiter *waiter); | 148 | struct rt_mutex_waiter *waiter); |
149 | 149 | ||
150 | extern int rt_mutex_futex_trylock(struct rt_mutex *l); | 150 | extern int rt_mutex_futex_trylock(struct rt_mutex *l); |
151 | extern int __rt_mutex_futex_trylock(struct rt_mutex *l); | ||
151 | 152 | ||
152 | extern void rt_mutex_futex_unlock(struct rt_mutex *lock); | 153 | extern void rt_mutex_futex_unlock(struct rt_mutex *lock); |
153 | extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, | 154 | extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 644fa2e3d993..a7bf32aabfda 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -2056,7 +2056,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |||
2056 | p->state = TASK_WAKING; | 2056 | p->state = TASK_WAKING; |
2057 | 2057 | ||
2058 | if (p->in_iowait) { | 2058 | if (p->in_iowait) { |
2059 | delayacct_blkio_end(); | 2059 | delayacct_blkio_end(p); |
2060 | atomic_dec(&task_rq(p)->nr_iowait); | 2060 | atomic_dec(&task_rq(p)->nr_iowait); |
2061 | } | 2061 | } |
2062 | 2062 | ||
@@ -2069,7 +2069,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |||
2069 | #else /* CONFIG_SMP */ | 2069 | #else /* CONFIG_SMP */ |
2070 | 2070 | ||
2071 | if (p->in_iowait) { | 2071 | if (p->in_iowait) { |
2072 | delayacct_blkio_end(); | 2072 | delayacct_blkio_end(p); |
2073 | atomic_dec(&task_rq(p)->nr_iowait); | 2073 | atomic_dec(&task_rq(p)->nr_iowait); |
2074 | } | 2074 | } |
2075 | 2075 | ||
@@ -2122,7 +2122,7 @@ static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf) | |||
2122 | 2122 | ||
2123 | if (!task_on_rq_queued(p)) { | 2123 | if (!task_on_rq_queued(p)) { |
2124 | if (p->in_iowait) { | 2124 | if (p->in_iowait) { |
2125 | delayacct_blkio_end(); | 2125 | delayacct_blkio_end(p); |
2126 | atomic_dec(&rq->nr_iowait); | 2126 | atomic_dec(&rq->nr_iowait); |
2127 | } | 2127 | } |
2128 | ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK); | 2128 | ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK); |
diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 89a9e1b4264a..0bcf00e3ce48 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c | |||
@@ -1696,7 +1696,7 @@ void run_local_timers(void) | |||
1696 | hrtimer_run_queues(); | 1696 | hrtimer_run_queues(); |
1697 | /* Raise the softirq only if required. */ | 1697 | /* Raise the softirq only if required. */ |
1698 | if (time_before(jiffies, base->clk)) { | 1698 | if (time_before(jiffies, base->clk)) { |
1699 | if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active) | 1699 | if (!IS_ENABLED(CONFIG_NO_HZ_COMMON)) |
1700 | return; | 1700 | return; |
1701 | /* CPU is awake, so check the deferrable base. */ | 1701 | /* CPU is awake, so check the deferrable base. */ |
1702 | base++; | 1702 | base++; |
diff --git a/scripts/Makefile.build b/scripts/Makefile.build index cb8997ed0149..47cddf32aeba 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build | |||
@@ -265,12 +265,18 @@ else | |||
265 | objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable) | 265 | objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable) |
266 | endif | 266 | endif |
267 | 267 | ||
268 | ifdef CONFIG_MODVERSIONS | ||
269 | objtool_o = $(@D)/.tmp_$(@F) | ||
270 | else | ||
271 | objtool_o = $(@) | ||
272 | endif | ||
273 | |||
268 | # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory | 274 | # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory |
269 | # 'OBJECT_FILES_NON_STANDARD_foo.o := 'y': skip objtool checking for a file | 275 | # 'OBJECT_FILES_NON_STANDARD_foo.o := 'y': skip objtool checking for a file |
270 | # 'OBJECT_FILES_NON_STANDARD_foo.o := 'n': override directory skip for a file | 276 | # 'OBJECT_FILES_NON_STANDARD_foo.o := 'n': override directory skip for a file |
271 | cmd_objtool = $(if $(patsubst y%,, \ | 277 | cmd_objtool = $(if $(patsubst y%,, \ |
272 | $(OBJECT_FILES_NON_STANDARD_$(basetarget).o)$(OBJECT_FILES_NON_STANDARD)n), \ | 278 | $(OBJECT_FILES_NON_STANDARD_$(basetarget).o)$(OBJECT_FILES_NON_STANDARD)n), \ |
273 | $(__objtool_obj) $(objtool_args) "$(@)";) | 279 | $(__objtool_obj) $(objtool_args) "$(objtool_o)";) |
274 | objtool_obj = $(if $(patsubst y%,, \ | 280 | objtool_obj = $(if $(patsubst y%,, \ |
275 | $(OBJECT_FILES_NON_STANDARD_$(basetarget).o)$(OBJECT_FILES_NON_STANDARD)n), \ | 281 | $(OBJECT_FILES_NON_STANDARD_$(basetarget).o)$(OBJECT_FILES_NON_STANDARD)n), \ |
276 | $(__objtool_obj)) | 282 | $(__objtool_obj)) |
@@ -286,16 +292,16 @@ objtool_dep = $(objtool_obj) \ | |||
286 | define rule_cc_o_c | 292 | define rule_cc_o_c |
287 | $(call echo-cmd,checksrc) $(cmd_checksrc) \ | 293 | $(call echo-cmd,checksrc) $(cmd_checksrc) \ |
288 | $(call cmd_and_fixdep,cc_o_c) \ | 294 | $(call cmd_and_fixdep,cc_o_c) \ |
289 | $(cmd_modversions_c) \ | ||
290 | $(cmd_checkdoc) \ | 295 | $(cmd_checkdoc) \ |
291 | $(call echo-cmd,objtool) $(cmd_objtool) \ | 296 | $(call echo-cmd,objtool) $(cmd_objtool) \ |
297 | $(cmd_modversions_c) \ | ||
292 | $(call echo-cmd,record_mcount) $(cmd_record_mcount) | 298 | $(call echo-cmd,record_mcount) $(cmd_record_mcount) |
293 | endef | 299 | endef |
294 | 300 | ||
295 | define rule_as_o_S | 301 | define rule_as_o_S |
296 | $(call cmd_and_fixdep,as_o_S) \ | 302 | $(call cmd_and_fixdep,as_o_S) \ |
297 | $(cmd_modversions_S) \ | 303 | $(call echo-cmd,objtool) $(cmd_objtool) \ |
298 | $(call echo-cmd,objtool) $(cmd_objtool) | 304 | $(cmd_modversions_S) |
299 | endef | 305 | endef |
300 | 306 | ||
301 | # List module undefined symbols (or empty line if not enabled) | 307 | # List module undefined symbols (or empty line if not enabled) |
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 24460155c82c..c1c338661699 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <stdlib.h> | 26 | #include <stdlib.h> |
27 | #include <string.h> | 27 | #include <string.h> |
28 | #include <unistd.h> | 28 | #include <unistd.h> |
29 | #include <errno.h> | ||
29 | 30 | ||
30 | #include "elf.h" | 31 | #include "elf.h" |
31 | #include "warn.h" | 32 | #include "warn.h" |
@@ -358,7 +359,8 @@ struct elf *elf_open(const char *name, int flags) | |||
358 | 359 | ||
359 | elf->fd = open(name, flags); | 360 | elf->fd = open(name, flags); |
360 | if (elf->fd == -1) { | 361 | if (elf->fd == -1) { |
361 | perror("open"); | 362 | fprintf(stderr, "objtool: Can't open '%s': %s\n", |
363 | name, strerror(errno)); | ||
362 | goto err; | 364 | goto err; |
363 | } | 365 | } |
364 | 366 | ||