diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-13 16:16:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-13 16:16:36 -0400 |
commit | 60f898eeaaa1c5d0162a4240bacf33a6c87ecef6 (patch) | |
tree | 23eeac4b1e9a616779d22c104dbc8bd45dfeefd1 /arch/x86/include | |
parent | 977e1ba50893c15121557b39de586901fe3f75cf (diff) | |
parent | 3b75232d55680ca166dffa274d0587d5faf0a016 (diff) |
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 asm changes from Ingo Molnar:
"There were lots of changes in this development cycle:
- over 100 separate cleanups, restructuring changes, speedups and
fixes in the x86 system call, irq, trap and other entry code, part
of a heroic effort to deobfuscate a decade old spaghetti asm code
and its C code dependencies (Denys Vlasenko, Andy Lutomirski)
- alternatives code fixes and enhancements (Borislav Petkov)
- simplifications and cleanups to the compat code (Brian Gerst)
- signal handling fixes and new x86 testcases (Andy Lutomirski)
- various other fixes and cleanups
By their nature many of these changes are risky - we tried to test
them well on many different x86 systems (there are no known
regressions), and they are split up finely to help bisection - but
there's still a fair bit of residual risk left so caveat emptor"
* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (148 commits)
perf/x86/64: Report regs_user->ax too in get_regs_user()
perf/x86/64: Simplify regs_user->abi setting code in get_regs_user()
perf/x86/64: Do report user_regs->cx while we are in syscall, in get_regs_user()
perf/x86/64: Do not guess user_regs->cs, ss, sp in get_regs_user()
x86/asm/entry/32: Tidy up JNZ instructions after TESTs
x86/asm/entry/64: Reduce padding in execve stubs
x86/asm/entry/64: Remove GET_THREAD_INFO() in ret_from_fork
x86/asm/entry/64: Simplify jumps in ret_from_fork
x86/asm/entry/64: Remove a redundant jump
x86/asm/entry/64: Optimize [v]fork/clone stubs
x86/asm/entry: Zero EXTRA_REGS for stub32_execve() too
x86/asm/entry/64: Move stub_x32_execvecloser() to stub_execveat()
x86/asm/entry/64: Use common code for rt_sigreturn() epilogue
x86/asm/entry/64: Add forgotten CFI annotation
x86/asm/entry/irq: Simplify interrupt dispatch table (IDT) layout
x86/asm/entry/64: Move opportunistic sysret code to syscall code path
x86, selftests: Add sigreturn selftest
x86/alternatives: Guard NOPs optimization
x86/asm/entry: Clear EXTRA_REGS for all executable formats
x86/signal: Remove pax argument from restore_sigcontext
...
Diffstat (limited to 'arch/x86/include')
28 files changed, 697 insertions, 490 deletions
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index 372231c22a47..bdf02eeee765 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h | |||
@@ -18,12 +18,63 @@ | |||
18 | .endm | 18 | .endm |
19 | #endif | 19 | #endif |
20 | 20 | ||
21 | .macro altinstruction_entry orig alt feature orig_len alt_len | 21 | .macro altinstruction_entry orig alt feature orig_len alt_len pad_len |
22 | .long \orig - . | 22 | .long \orig - . |
23 | .long \alt - . | 23 | .long \alt - . |
24 | .word \feature | 24 | .word \feature |
25 | .byte \orig_len | 25 | .byte \orig_len |
26 | .byte \alt_len | 26 | .byte \alt_len |
27 | .byte \pad_len | ||
28 | .endm | ||
29 | |||
30 | .macro ALTERNATIVE oldinstr, newinstr, feature | ||
31 | 140: | ||
32 | \oldinstr | ||
33 | 141: | ||
34 | .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90 | ||
35 | 142: | ||
36 | |||
37 | .pushsection .altinstructions,"a" | ||
38 | altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b | ||
39 | .popsection | ||
40 | |||
41 | .pushsection .altinstr_replacement,"ax" | ||
42 | 143: | ||
43 | \newinstr | ||
44 | 144: | ||
45 | .popsection | ||
46 | .endm | ||
47 | |||
48 | #define old_len 141b-140b | ||
49 | #define new_len1 144f-143f | ||
50 | #define new_len2 145f-144f | ||
51 | |||
52 | /* | ||
53 | * max without conditionals. Idea adapted from: | ||
54 | * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax | ||
55 | */ | ||
56 | #define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) | ||
57 | |||
58 | .macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 | ||
59 | 140: | ||
60 | \oldinstr | ||
61 | 141: | ||
62 | .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \ | ||
63 | (alt_max_short(new_len1, new_len2) - (old_len)),0x90 | ||
64 | 142: | ||
65 | |||
66 | .pushsection .altinstructions,"a" | ||
67 | altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b | ||
68 | altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b | ||
69 | .popsection | ||
70 | |||
71 | .pushsection .altinstr_replacement,"ax" | ||
72 | 143: | ||
73 | \newinstr1 | ||
74 | 144: | ||
75 | \newinstr2 | ||
76 | 145: | ||
77 | .popsection | ||
27 | .endm | 78 | .endm |
28 | 79 | ||
29 | #endif /* __ASSEMBLY__ */ | 80 | #endif /* __ASSEMBLY__ */ |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 473bdbee378a..ba32af062f61 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -48,8 +48,9 @@ struct alt_instr { | |||
48 | s32 repl_offset; /* offset to replacement instruction */ | 48 | s32 repl_offset; /* offset to replacement instruction */ |
49 | u16 cpuid; /* cpuid bit set for replacement */ | 49 | u16 cpuid; /* cpuid bit set for replacement */ |
50 | u8 instrlen; /* length of original instruction */ | 50 | u8 instrlen; /* length of original instruction */ |
51 | u8 replacementlen; /* length of new instruction, <= instrlen */ | 51 | u8 replacementlen; /* length of new instruction */ |
52 | }; | 52 | u8 padlen; /* length of build-time padding */ |
53 | } __packed; | ||
53 | 54 | ||
54 | extern void alternative_instructions(void); | 55 | extern void alternative_instructions(void); |
55 | extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); | 56 | extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); |
@@ -76,50 +77,69 @@ static inline int alternatives_text_reserved(void *start, void *end) | |||
76 | } | 77 | } |
77 | #endif /* CONFIG_SMP */ | 78 | #endif /* CONFIG_SMP */ |
78 | 79 | ||
79 | #define OLDINSTR(oldinstr) "661:\n\t" oldinstr "\n662:\n" | 80 | #define b_replacement(num) "664"#num |
81 | #define e_replacement(num) "665"#num | ||
80 | 82 | ||
81 | #define b_replacement(number) "663"#number | 83 | #define alt_end_marker "663" |
82 | #define e_replacement(number) "664"#number | 84 | #define alt_slen "662b-661b" |
85 | #define alt_pad_len alt_end_marker"b-662b" | ||
86 | #define alt_total_slen alt_end_marker"b-661b" | ||
87 | #define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f" | ||
83 | 88 | ||
84 | #define alt_slen "662b-661b" | 89 | #define __OLDINSTR(oldinstr, num) \ |
85 | #define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f" | 90 | "661:\n\t" oldinstr "\n662:\n" \ |
91 | ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \ | ||
92 | "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n" | ||
86 | 93 | ||
87 | #define ALTINSTR_ENTRY(feature, number) \ | 94 | #define OLDINSTR(oldinstr, num) \ |
95 | __OLDINSTR(oldinstr, num) \ | ||
96 | alt_end_marker ":\n" | ||
97 | |||
98 | /* | ||
99 | * max without conditionals. Idea adapted from: | ||
100 | * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax | ||
101 | * | ||
102 | * The additional "-" is needed because gas works with s32s. | ||
103 | */ | ||
104 | #define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))" | ||
105 | |||
106 | /* | ||
107 | * Pad the second replacement alternative with additional NOPs if it is | ||
108 | * additionally longer than the first replacement alternative. | ||
109 | */ | ||
110 | #define OLDINSTR_2(oldinstr, num1, num2) \ | ||
111 | "661:\n\t" oldinstr "\n662:\n" \ | ||
112 | ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \ | ||
113 | "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \ | ||
114 | alt_end_marker ":\n" | ||
115 | |||
116 | #define ALTINSTR_ENTRY(feature, num) \ | ||
88 | " .long 661b - .\n" /* label */ \ | 117 | " .long 661b - .\n" /* label */ \ |
89 | " .long " b_replacement(number)"f - .\n" /* new instruction */ \ | 118 | " .long " b_replacement(num)"f - .\n" /* new instruction */ \ |
90 | " .word " __stringify(feature) "\n" /* feature bit */ \ | 119 | " .word " __stringify(feature) "\n" /* feature bit */ \ |
91 | " .byte " alt_slen "\n" /* source len */ \ | 120 | " .byte " alt_total_slen "\n" /* source len */ \ |
92 | " .byte " alt_rlen(number) "\n" /* replacement len */ | 121 | " .byte " alt_rlen(num) "\n" /* replacement len */ \ |
93 | 122 | " .byte " alt_pad_len "\n" /* pad len */ | |
94 | #define DISCARD_ENTRY(number) /* rlen <= slen */ \ | ||
95 | " .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n" | ||
96 | 123 | ||
97 | #define ALTINSTR_REPLACEMENT(newinstr, feature, number) /* replacement */ \ | 124 | #define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \ |
98 | b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t" | 125 | b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t" |
99 | 126 | ||
100 | /* alternative assembly primitive: */ | 127 | /* alternative assembly primitive: */ |
101 | #define ALTERNATIVE(oldinstr, newinstr, feature) \ | 128 | #define ALTERNATIVE(oldinstr, newinstr, feature) \ |
102 | OLDINSTR(oldinstr) \ | 129 | OLDINSTR(oldinstr, 1) \ |
103 | ".pushsection .altinstructions,\"a\"\n" \ | 130 | ".pushsection .altinstructions,\"a\"\n" \ |
104 | ALTINSTR_ENTRY(feature, 1) \ | 131 | ALTINSTR_ENTRY(feature, 1) \ |
105 | ".popsection\n" \ | 132 | ".popsection\n" \ |
106 | ".pushsection .discard,\"aw\",@progbits\n" \ | ||
107 | DISCARD_ENTRY(1) \ | ||
108 | ".popsection\n" \ | ||
109 | ".pushsection .altinstr_replacement, \"ax\"\n" \ | 133 | ".pushsection .altinstr_replacement, \"ax\"\n" \ |
110 | ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ | 134 | ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ |
111 | ".popsection" | 135 | ".popsection" |
112 | 136 | ||
113 | #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ | 137 | #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ |
114 | OLDINSTR(oldinstr) \ | 138 | OLDINSTR_2(oldinstr, 1, 2) \ |
115 | ".pushsection .altinstructions,\"a\"\n" \ | 139 | ".pushsection .altinstructions,\"a\"\n" \ |
116 | ALTINSTR_ENTRY(feature1, 1) \ | 140 | ALTINSTR_ENTRY(feature1, 1) \ |
117 | ALTINSTR_ENTRY(feature2, 2) \ | 141 | ALTINSTR_ENTRY(feature2, 2) \ |
118 | ".popsection\n" \ | 142 | ".popsection\n" \ |
119 | ".pushsection .discard,\"aw\",@progbits\n" \ | ||
120 | DISCARD_ENTRY(1) \ | ||
121 | DISCARD_ENTRY(2) \ | ||
122 | ".popsection\n" \ | ||
123 | ".pushsection .altinstr_replacement, \"ax\"\n" \ | 143 | ".pushsection .altinstr_replacement, \"ax\"\n" \ |
124 | ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ | 144 | ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ |
125 | ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ | 145 | ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ |
@@ -146,6 +166,9 @@ static inline int alternatives_text_reserved(void *start, void *end) | |||
146 | #define alternative(oldinstr, newinstr, feature) \ | 166 | #define alternative(oldinstr, newinstr, feature) \ |
147 | asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory") | 167 | asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory") |
148 | 168 | ||
169 | #define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \ | ||
170 | asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory") | ||
171 | |||
149 | /* | 172 | /* |
150 | * Alternative inline assembly with input. | 173 | * Alternative inline assembly with input. |
151 | * | 174 | * |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 08f217354442..976b86a325e5 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -91,7 +91,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v) | |||
91 | { | 91 | { |
92 | volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); | 92 | volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); |
93 | 93 | ||
94 | alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP, | 94 | alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP, |
95 | ASM_OUTPUT2("=r" (v), "=m" (*addr)), | 95 | ASM_OUTPUT2("=r" (v), "=m" (*addr)), |
96 | ASM_OUTPUT2("0" (v), "m" (*addr))); | 96 | ASM_OUTPUT2("0" (v), "m" (*addr))); |
97 | } | 97 | } |
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 2ab1eb33106e..959e45b81fe2 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h | |||
@@ -95,13 +95,11 @@ do { \ | |||
95 | * Stop RDTSC speculation. This is needed when you need to use RDTSC | 95 | * Stop RDTSC speculation. This is needed when you need to use RDTSC |
96 | * (or get_cycles or vread that possibly accesses the TSC) in a defined | 96 | * (or get_cycles or vread that possibly accesses the TSC) in a defined |
97 | * code region. | 97 | * code region. |
98 | * | ||
99 | * (Could use an alternative three way for this if there was one.) | ||
100 | */ | 98 | */ |
101 | static __always_inline void rdtsc_barrier(void) | 99 | static __always_inline void rdtsc_barrier(void) |
102 | { | 100 | { |
103 | alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); | 101 | alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, |
104 | alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); | 102 | "lfence", X86_FEATURE_LFENCE_RDTSC); |
105 | } | 103 | } |
106 | 104 | ||
107 | #endif /* _ASM_X86_BARRIER_H */ | 105 | #endif /* _ASM_X86_BARRIER_H */ |
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index 1f1297b46f83..1c8b50edb2db 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h | |||
@@ -55,143 +55,157 @@ For 32-bit we have the following conventions - kernel is built with | |||
55 | * for assembly code: | 55 | * for assembly code: |
56 | */ | 56 | */ |
57 | 57 | ||
58 | #define R15 0 | 58 | /* The layout forms the "struct pt_regs" on the stack: */ |
59 | #define R14 8 | 59 | /* |
60 | #define R13 16 | 60 | * C ABI says these regs are callee-preserved. They aren't saved on kernel entry |
61 | #define R12 24 | 61 | * unless syscall needs a complete, fully filled "struct pt_regs". |
62 | #define RBP 32 | 62 | */ |
63 | #define RBX 40 | 63 | #define R15 0*8 |
64 | 64 | #define R14 1*8 | |
65 | /* arguments: interrupts/non tracing syscalls only save up to here: */ | 65 | #define R13 2*8 |
66 | #define R11 48 | 66 | #define R12 3*8 |
67 | #define R10 56 | 67 | #define RBP 4*8 |
68 | #define R9 64 | 68 | #define RBX 5*8 |
69 | #define R8 72 | 69 | /* These regs are callee-clobbered. Always saved on kernel entry. */ |
70 | #define RAX 80 | 70 | #define R11 6*8 |
71 | #define RCX 88 | 71 | #define R10 7*8 |
72 | #define RDX 96 | 72 | #define R9 8*8 |
73 | #define RSI 104 | 73 | #define R8 9*8 |
74 | #define RDI 112 | 74 | #define RAX 10*8 |
75 | #define ORIG_RAX 120 /* + error_code */ | 75 | #define RCX 11*8 |
76 | /* end of arguments */ | 76 | #define RDX 12*8 |
77 | 77 | #define RSI 13*8 | |
78 | /* cpu exception frame or undefined in case of fast syscall: */ | 78 | #define RDI 14*8 |
79 | #define RIP 128 | 79 | /* |
80 | #define CS 136 | 80 | * On syscall entry, this is syscall#. On CPU exception, this is error code. |
81 | #define EFLAGS 144 | 81 | * On hw interrupt, it's IRQ number: |
82 | #define RSP 152 | 82 | */ |
83 | #define SS 160 | 83 | #define ORIG_RAX 15*8 |
84 | 84 | /* Return frame for iretq */ | |
85 | #define ARGOFFSET R11 | 85 | #define RIP 16*8 |
86 | 86 | #define CS 17*8 | |
87 | .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0 | 87 | #define EFLAGS 18*8 |
88 | subq $9*8+\addskip, %rsp | 88 | #define RSP 19*8 |
89 | CFI_ADJUST_CFA_OFFSET 9*8+\addskip | 89 | #define SS 20*8 |
90 | movq_cfi rdi, 8*8 | 90 | |
91 | movq_cfi rsi, 7*8 | 91 | #define SIZEOF_PTREGS 21*8 |
92 | movq_cfi rdx, 6*8 | 92 | |
93 | 93 | .macro ALLOC_PT_GPREGS_ON_STACK addskip=0 | |
94 | .if \save_rcx | 94 | subq $15*8+\addskip, %rsp |
95 | movq_cfi rcx, 5*8 | 95 | CFI_ADJUST_CFA_OFFSET 15*8+\addskip |
96 | .endif | 96 | .endm |
97 | 97 | ||
98 | .if \rax_enosys | 98 | .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1 |
99 | movq $-ENOSYS, 4*8(%rsp) | 99 | .if \r11 |
100 | .else | 100 | movq_cfi r11, 6*8+\offset |
101 | movq_cfi rax, 4*8 | ||
102 | .endif | 101 | .endif |
103 | 102 | .if \r8910 | |
104 | .if \save_r891011 | 103 | movq_cfi r10, 7*8+\offset |
105 | movq_cfi r8, 3*8 | 104 | movq_cfi r9, 8*8+\offset |
106 | movq_cfi r9, 2*8 | 105 | movq_cfi r8, 9*8+\offset |
107 | movq_cfi r10, 1*8 | 106 | .endif |
108 | movq_cfi r11, 0*8 | 107 | .if \rax |
108 | movq_cfi rax, 10*8+\offset | ||
109 | .endif | ||
110 | .if \rcx | ||
111 | movq_cfi rcx, 11*8+\offset | ||
109 | .endif | 112 | .endif |
113 | movq_cfi rdx, 12*8+\offset | ||
114 | movq_cfi rsi, 13*8+\offset | ||
115 | movq_cfi rdi, 14*8+\offset | ||
116 | .endm | ||
117 | .macro SAVE_C_REGS offset=0 | ||
118 | SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1 | ||
119 | .endm | ||
120 | .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0 | ||
121 | SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1 | ||
122 | .endm | ||
123 | .macro SAVE_C_REGS_EXCEPT_R891011 | ||
124 | SAVE_C_REGS_HELPER 0, 1, 1, 0, 0 | ||
125 | .endm | ||
126 | .macro SAVE_C_REGS_EXCEPT_RCX_R891011 | ||
127 | SAVE_C_REGS_HELPER 0, 1, 0, 0, 0 | ||
128 | .endm | ||
129 | .macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11 | ||
130 | SAVE_C_REGS_HELPER 0, 0, 0, 1, 0 | ||
131 | .endm | ||
132 | |||
133 | .macro SAVE_EXTRA_REGS offset=0 | ||
134 | movq_cfi r15, 0*8+\offset | ||
135 | movq_cfi r14, 1*8+\offset | ||
136 | movq_cfi r13, 2*8+\offset | ||
137 | movq_cfi r12, 3*8+\offset | ||
138 | movq_cfi rbp, 4*8+\offset | ||
139 | movq_cfi rbx, 5*8+\offset | ||
140 | .endm | ||
141 | .macro SAVE_EXTRA_REGS_RBP offset=0 | ||
142 | movq_cfi rbp, 4*8+\offset | ||
143 | .endm | ||
110 | 144 | ||
145 | .macro RESTORE_EXTRA_REGS offset=0 | ||
146 | movq_cfi_restore 0*8+\offset, r15 | ||
147 | movq_cfi_restore 1*8+\offset, r14 | ||
148 | movq_cfi_restore 2*8+\offset, r13 | ||
149 | movq_cfi_restore 3*8+\offset, r12 | ||
150 | movq_cfi_restore 4*8+\offset, rbp | ||
151 | movq_cfi_restore 5*8+\offset, rbx | ||
111 | .endm | 152 | .endm |
112 | 153 | ||
113 | #define ARG_SKIP (9*8) | 154 | .macro ZERO_EXTRA_REGS |
155 | xorl %r15d, %r15d | ||
156 | xorl %r14d, %r14d | ||
157 | xorl %r13d, %r13d | ||
158 | xorl %r12d, %r12d | ||
159 | xorl %ebp, %ebp | ||
160 | xorl %ebx, %ebx | ||
161 | .endm | ||
114 | 162 | ||
115 | .macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \ | 163 | .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 |
116 | rstor_r8910=1, rstor_rdx=1 | ||
117 | .if \rstor_r11 | 164 | .if \rstor_r11 |
118 | movq_cfi_restore 0*8, r11 | 165 | movq_cfi_restore 6*8, r11 |
119 | .endif | 166 | .endif |
120 | |||
121 | .if \rstor_r8910 | 167 | .if \rstor_r8910 |
122 | movq_cfi_restore 1*8, r10 | 168 | movq_cfi_restore 7*8, r10 |
123 | movq_cfi_restore 2*8, r9 | 169 | movq_cfi_restore 8*8, r9 |
124 | movq_cfi_restore 3*8, r8 | 170 | movq_cfi_restore 9*8, r8 |
125 | .endif | 171 | .endif |
126 | |||
127 | .if \rstor_rax | 172 | .if \rstor_rax |
128 | movq_cfi_restore 4*8, rax | 173 | movq_cfi_restore 10*8, rax |
129 | .endif | 174 | .endif |
130 | |||
131 | .if \rstor_rcx | 175 | .if \rstor_rcx |
132 | movq_cfi_restore 5*8, rcx | 176 | movq_cfi_restore 11*8, rcx |
133 | .endif | 177 | .endif |
134 | |||
135 | .if \rstor_rdx | 178 | .if \rstor_rdx |
136 | movq_cfi_restore 6*8, rdx | 179 | movq_cfi_restore 12*8, rdx |
137 | .endif | ||
138 | |||
139 | movq_cfi_restore 7*8, rsi | ||
140 | movq_cfi_restore 8*8, rdi | ||
141 | |||
142 | .if ARG_SKIP+\addskip > 0 | ||
143 | addq $ARG_SKIP+\addskip, %rsp | ||
144 | CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip) | ||
145 | .endif | 180 | .endif |
181 | movq_cfi_restore 13*8, rsi | ||
182 | movq_cfi_restore 14*8, rdi | ||
146 | .endm | 183 | .endm |
147 | 184 | .macro RESTORE_C_REGS | |
148 | .macro LOAD_ARGS offset, skiprax=0 | 185 | RESTORE_C_REGS_HELPER 1,1,1,1,1 |
149 | movq \offset(%rsp), %r11 | ||
150 | movq \offset+8(%rsp), %r10 | ||
151 | movq \offset+16(%rsp), %r9 | ||
152 | movq \offset+24(%rsp), %r8 | ||
153 | movq \offset+40(%rsp), %rcx | ||
154 | movq \offset+48(%rsp), %rdx | ||
155 | movq \offset+56(%rsp), %rsi | ||
156 | movq \offset+64(%rsp), %rdi | ||
157 | .if \skiprax | ||
158 | .else | ||
159 | movq \offset+72(%rsp), %rax | ||
160 | .endif | ||
161 | .endm | 186 | .endm |
162 | 187 | .macro RESTORE_C_REGS_EXCEPT_RAX | |
163 | #define REST_SKIP (6*8) | 188 | RESTORE_C_REGS_HELPER 0,1,1,1,1 |
164 | |||
165 | .macro SAVE_REST | ||
166 | subq $REST_SKIP, %rsp | ||
167 | CFI_ADJUST_CFA_OFFSET REST_SKIP | ||
168 | movq_cfi rbx, 5*8 | ||
169 | movq_cfi rbp, 4*8 | ||
170 | movq_cfi r12, 3*8 | ||
171 | movq_cfi r13, 2*8 | ||
172 | movq_cfi r14, 1*8 | ||
173 | movq_cfi r15, 0*8 | ||
174 | .endm | 189 | .endm |
175 | 190 | .macro RESTORE_C_REGS_EXCEPT_RCX | |
176 | .macro RESTORE_REST | 191 | RESTORE_C_REGS_HELPER 1,0,1,1,1 |
177 | movq_cfi_restore 0*8, r15 | ||
178 | movq_cfi_restore 1*8, r14 | ||
179 | movq_cfi_restore 2*8, r13 | ||
180 | movq_cfi_restore 3*8, r12 | ||
181 | movq_cfi_restore 4*8, rbp | ||
182 | movq_cfi_restore 5*8, rbx | ||
183 | addq $REST_SKIP, %rsp | ||
184 | CFI_ADJUST_CFA_OFFSET -(REST_SKIP) | ||
185 | .endm | 192 | .endm |
186 | 193 | .macro RESTORE_C_REGS_EXCEPT_R11 | |
187 | .macro SAVE_ALL | 194 | RESTORE_C_REGS_HELPER 1,1,0,1,1 |
188 | SAVE_ARGS | 195 | .endm |
189 | SAVE_REST | 196 | .macro RESTORE_C_REGS_EXCEPT_RCX_R11 |
197 | RESTORE_C_REGS_HELPER 1,0,0,1,1 | ||
198 | .endm | ||
199 | .macro RESTORE_RSI_RDI | ||
200 | RESTORE_C_REGS_HELPER 0,0,0,0,0 | ||
201 | .endm | ||
202 | .macro RESTORE_RSI_RDI_RDX | ||
203 | RESTORE_C_REGS_HELPER 0,0,0,0,1 | ||
190 | .endm | 204 | .endm |
191 | 205 | ||
192 | .macro RESTORE_ALL addskip=0 | 206 | .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0 |
193 | RESTORE_REST | 207 | addq $15*8+\addskip, %rsp |
194 | RESTORE_ARGS 1, \addskip | 208 | CFI_ADJUST_CFA_OFFSET -(15*8+\addskip) |
195 | .endm | 209 | .endm |
196 | 210 | ||
197 | .macro icebp | 211 | .macro icebp |
@@ -210,37 +224,23 @@ For 32-bit we have the following conventions - kernel is built with | |||
210 | */ | 224 | */ |
211 | 225 | ||
212 | .macro SAVE_ALL | 226 | .macro SAVE_ALL |
213 | pushl_cfi %eax | 227 | pushl_cfi_reg eax |
214 | CFI_REL_OFFSET eax, 0 | 228 | pushl_cfi_reg ebp |
215 | pushl_cfi %ebp | 229 | pushl_cfi_reg edi |
216 | CFI_REL_OFFSET ebp, 0 | 230 | pushl_cfi_reg esi |
217 | pushl_cfi %edi | 231 | pushl_cfi_reg edx |
218 | CFI_REL_OFFSET edi, 0 | 232 | pushl_cfi_reg ecx |
219 | pushl_cfi %esi | 233 | pushl_cfi_reg ebx |
220 | CFI_REL_OFFSET esi, 0 | ||
221 | pushl_cfi %edx | ||
222 | CFI_REL_OFFSET edx, 0 | ||
223 | pushl_cfi %ecx | ||
224 | CFI_REL_OFFSET ecx, 0 | ||
225 | pushl_cfi %ebx | ||
226 | CFI_REL_OFFSET ebx, 0 | ||
227 | .endm | 234 | .endm |
228 | 235 | ||
229 | .macro RESTORE_ALL | 236 | .macro RESTORE_ALL |
230 | popl_cfi %ebx | 237 | popl_cfi_reg ebx |
231 | CFI_RESTORE ebx | 238 | popl_cfi_reg ecx |
232 | popl_cfi %ecx | 239 | popl_cfi_reg edx |
233 | CFI_RESTORE ecx | 240 | popl_cfi_reg esi |
234 | popl_cfi %edx | 241 | popl_cfi_reg edi |
235 | CFI_RESTORE edx | 242 | popl_cfi_reg ebp |
236 | popl_cfi %esi | 243 | popl_cfi_reg eax |
237 | CFI_RESTORE esi | ||
238 | popl_cfi %edi | ||
239 | CFI_RESTORE edi | ||
240 | popl_cfi %ebp | ||
241 | CFI_RESTORE ebp | ||
242 | popl_cfi %eax | ||
243 | CFI_RESTORE eax | ||
244 | .endm | 244 | .endm |
245 | 245 | ||
246 | #endif /* CONFIG_X86_64 */ | 246 | #endif /* CONFIG_X86_64 */ |
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 59c6c401f79f..acdee09228b3 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h | |||
@@ -301,7 +301,7 @@ static inline void __user *arch_compat_alloc_user_space(long len) | |||
301 | sp = task_pt_regs(current)->sp; | 301 | sp = task_pt_regs(current)->sp; |
302 | } else { | 302 | } else { |
303 | /* -128 for the x32 ABI redzone */ | 303 | /* -128 for the x32 ABI redzone */ |
304 | sp = this_cpu_read(old_rsp) - 128; | 304 | sp = task_pt_regs(current)->sp - 128; |
305 | } | 305 | } |
306 | 306 | ||
307 | return (void __user *)round_down(sp - len, 16); | 307 | return (void __user *)round_down(sp - len, 16); |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 90a54851aedc..854c04b3c9c2 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -231,7 +231,9 @@ | |||
231 | #define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ | 231 | #define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ |
232 | #define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ | 232 | #define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ |
233 | #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ | 233 | #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ |
234 | #define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */ | ||
234 | #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ | 235 | #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ |
236 | #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ | ||
235 | #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ | 237 | #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ |
236 | #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ | 238 | #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ |
237 | #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ | 239 | #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ |
@@ -418,6 +420,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) | |||
418 | " .word %P0\n" /* 1: do replace */ | 420 | " .word %P0\n" /* 1: do replace */ |
419 | " .byte 2b - 1b\n" /* source len */ | 421 | " .byte 2b - 1b\n" /* source len */ |
420 | " .byte 0\n" /* replacement len */ | 422 | " .byte 0\n" /* replacement len */ |
423 | " .byte 0\n" /* pad len */ | ||
421 | ".previous\n" | 424 | ".previous\n" |
422 | /* skipping size check since replacement size = 0 */ | 425 | /* skipping size check since replacement size = 0 */ |
423 | : : "i" (X86_FEATURE_ALWAYS) : : t_warn); | 426 | : : "i" (X86_FEATURE_ALWAYS) : : t_warn); |
@@ -432,6 +435,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) | |||
432 | " .word %P0\n" /* feature bit */ | 435 | " .word %P0\n" /* feature bit */ |
433 | " .byte 2b - 1b\n" /* source len */ | 436 | " .byte 2b - 1b\n" /* source len */ |
434 | " .byte 0\n" /* replacement len */ | 437 | " .byte 0\n" /* replacement len */ |
438 | " .byte 0\n" /* pad len */ | ||
435 | ".previous\n" | 439 | ".previous\n" |
436 | /* skipping size check since replacement size = 0 */ | 440 | /* skipping size check since replacement size = 0 */ |
437 | : : "i" (bit) : : t_no); | 441 | : : "i" (bit) : : t_no); |
@@ -457,6 +461,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) | |||
457 | " .word %P1\n" /* feature bit */ | 461 | " .word %P1\n" /* feature bit */ |
458 | " .byte 2b - 1b\n" /* source len */ | 462 | " .byte 2b - 1b\n" /* source len */ |
459 | " .byte 4f - 3f\n" /* replacement len */ | 463 | " .byte 4f - 3f\n" /* replacement len */ |
464 | " .byte 0\n" /* pad len */ | ||
460 | ".previous\n" | 465 | ".previous\n" |
461 | ".section .discard,\"aw\",@progbits\n" | 466 | ".section .discard,\"aw\",@progbits\n" |
462 | " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ | 467 | " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ |
@@ -483,31 +488,30 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) | |||
483 | static __always_inline __pure bool _static_cpu_has_safe(u16 bit) | 488 | static __always_inline __pure bool _static_cpu_has_safe(u16 bit) |
484 | { | 489 | { |
485 | #ifdef CC_HAVE_ASM_GOTO | 490 | #ifdef CC_HAVE_ASM_GOTO |
486 | /* | 491 | asm_volatile_goto("1: jmp %l[t_dynamic]\n" |
487 | * We need to spell the jumps to the compiler because, depending on the offset, | ||
488 | * the replacement jump can be bigger than the original jump, and this we cannot | ||
489 | * have. Thus, we force the jump to the widest, 4-byte, signed relative | ||
490 | * offset even though the last would often fit in less bytes. | ||
491 | */ | ||
492 | asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n" | ||
493 | "2:\n" | 492 | "2:\n" |
493 | ".skip -(((5f-4f) - (2b-1b)) > 0) * " | ||
494 | "((5f-4f) - (2b-1b)),0x90\n" | ||
495 | "3:\n" | ||
494 | ".section .altinstructions,\"a\"\n" | 496 | ".section .altinstructions,\"a\"\n" |
495 | " .long 1b - .\n" /* src offset */ | 497 | " .long 1b - .\n" /* src offset */ |
496 | " .long 3f - .\n" /* repl offset */ | 498 | " .long 4f - .\n" /* repl offset */ |
497 | " .word %P1\n" /* always replace */ | 499 | " .word %P1\n" /* always replace */ |
498 | " .byte 2b - 1b\n" /* src len */ | 500 | " .byte 3b - 1b\n" /* src len */ |
499 | " .byte 4f - 3f\n" /* repl len */ | 501 | " .byte 5f - 4f\n" /* repl len */ |
502 | " .byte 3b - 2b\n" /* pad len */ | ||
500 | ".previous\n" | 503 | ".previous\n" |
501 | ".section .altinstr_replacement,\"ax\"\n" | 504 | ".section .altinstr_replacement,\"ax\"\n" |
502 | "3: .byte 0xe9\n .long %l[t_no] - 2b\n" | 505 | "4: jmp %l[t_no]\n" |
503 | "4:\n" | 506 | "5:\n" |
504 | ".previous\n" | 507 | ".previous\n" |
505 | ".section .altinstructions,\"a\"\n" | 508 | ".section .altinstructions,\"a\"\n" |
506 | " .long 1b - .\n" /* src offset */ | 509 | " .long 1b - .\n" /* src offset */ |
507 | " .long 0\n" /* no replacement */ | 510 | " .long 0\n" /* no replacement */ |
508 | " .word %P0\n" /* feature bit */ | 511 | " .word %P0\n" /* feature bit */ |
509 | " .byte 2b - 1b\n" /* src len */ | 512 | " .byte 3b - 1b\n" /* src len */ |
510 | " .byte 0\n" /* repl len */ | 513 | " .byte 0\n" /* repl len */ |
514 | " .byte 0\n" /* pad len */ | ||
511 | ".previous\n" | 515 | ".previous\n" |
512 | : : "i" (bit), "i" (X86_FEATURE_ALWAYS) | 516 | : : "i" (bit), "i" (X86_FEATURE_ALWAYS) |
513 | : : t_dynamic, t_no); | 517 | : : t_dynamic, t_no); |
@@ -527,6 +531,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit) | |||
527 | " .word %P2\n" /* always replace */ | 531 | " .word %P2\n" /* always replace */ |
528 | " .byte 2b - 1b\n" /* source len */ | 532 | " .byte 2b - 1b\n" /* source len */ |
529 | " .byte 4f - 3f\n" /* replacement len */ | 533 | " .byte 4f - 3f\n" /* replacement len */ |
534 | " .byte 0\n" /* pad len */ | ||
530 | ".previous\n" | 535 | ".previous\n" |
531 | ".section .discard,\"aw\",@progbits\n" | 536 | ".section .discard,\"aw\",@progbits\n" |
532 | " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ | 537 | " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ |
@@ -541,6 +546,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit) | |||
541 | " .word %P1\n" /* feature bit */ | 546 | " .word %P1\n" /* feature bit */ |
542 | " .byte 4b - 3b\n" /* src len */ | 547 | " .byte 4b - 3b\n" /* src len */ |
543 | " .byte 6f - 5f\n" /* repl len */ | 548 | " .byte 6f - 5f\n" /* repl len */ |
549 | " .byte 0\n" /* pad len */ | ||
544 | ".previous\n" | 550 | ".previous\n" |
545 | ".section .discard,\"aw\",@progbits\n" | 551 | ".section .discard,\"aw\",@progbits\n" |
546 | " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */ | 552 | " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */ |
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index a94b82e8f156..a0bf89fd2647 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h | |||
@@ -376,11 +376,16 @@ static inline void _set_gate(int gate, unsigned type, void *addr, | |||
376 | * Pentium F0 0F bugfix can have resulted in the mapped | 376 | * Pentium F0 0F bugfix can have resulted in the mapped |
377 | * IDT being write-protected. | 377 | * IDT being write-protected. |
378 | */ | 378 | */ |
379 | #define set_intr_gate(n, addr) \ | 379 | #define set_intr_gate_notrace(n, addr) \ |
380 | do { \ | 380 | do { \ |
381 | BUG_ON((unsigned)n > 0xFF); \ | 381 | BUG_ON((unsigned)n > 0xFF); \ |
382 | _set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0, \ | 382 | _set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0, \ |
383 | __KERNEL_CS); \ | 383 | __KERNEL_CS); \ |
384 | } while (0) | ||
385 | |||
386 | #define set_intr_gate(n, addr) \ | ||
387 | do { \ | ||
388 | set_intr_gate_notrace(n, addr); \ | ||
384 | _trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\ | 389 | _trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\ |
385 | 0, 0, __KERNEL_CS); \ | 390 | 0, 0, __KERNEL_CS); \ |
386 | } while (0) | 391 | } while (0) |
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index f6f15986df6c..de1cdaf4d743 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h | |||
@@ -86,11 +86,23 @@ | |||
86 | CFI_ADJUST_CFA_OFFSET 8 | 86 | CFI_ADJUST_CFA_OFFSET 8 |
87 | .endm | 87 | .endm |
88 | 88 | ||
89 | .macro pushq_cfi_reg reg | ||
90 | pushq %\reg | ||
91 | CFI_ADJUST_CFA_OFFSET 8 | ||
92 | CFI_REL_OFFSET \reg, 0 | ||
93 | .endm | ||
94 | |||
89 | .macro popq_cfi reg | 95 | .macro popq_cfi reg |
90 | popq \reg | 96 | popq \reg |
91 | CFI_ADJUST_CFA_OFFSET -8 | 97 | CFI_ADJUST_CFA_OFFSET -8 |
92 | .endm | 98 | .endm |
93 | 99 | ||
100 | .macro popq_cfi_reg reg | ||
101 | popq %\reg | ||
102 | CFI_ADJUST_CFA_OFFSET -8 | ||
103 | CFI_RESTORE \reg | ||
104 | .endm | ||
105 | |||
94 | .macro pushfq_cfi | 106 | .macro pushfq_cfi |
95 | pushfq | 107 | pushfq |
96 | CFI_ADJUST_CFA_OFFSET 8 | 108 | CFI_ADJUST_CFA_OFFSET 8 |
@@ -116,11 +128,23 @@ | |||
116 | CFI_ADJUST_CFA_OFFSET 4 | 128 | CFI_ADJUST_CFA_OFFSET 4 |
117 | .endm | 129 | .endm |
118 | 130 | ||
131 | .macro pushl_cfi_reg reg | ||
132 | pushl %\reg | ||
133 | CFI_ADJUST_CFA_OFFSET 4 | ||
134 | CFI_REL_OFFSET \reg, 0 | ||
135 | .endm | ||
136 | |||
119 | .macro popl_cfi reg | 137 | .macro popl_cfi reg |
120 | popl \reg | 138 | popl \reg |
121 | CFI_ADJUST_CFA_OFFSET -4 | 139 | CFI_ADJUST_CFA_OFFSET -4 |
122 | .endm | 140 | .endm |
123 | 141 | ||
142 | .macro popl_cfi_reg reg | ||
143 | popl %\reg | ||
144 | CFI_ADJUST_CFA_OFFSET -4 | ||
145 | CFI_RESTORE \reg | ||
146 | .endm | ||
147 | |||
124 | .macro pushfl_cfi | 148 | .macro pushfl_cfi |
125 | pushfl | 149 | pushfl |
126 | CFI_ADJUST_CFA_OFFSET 4 | 150 | CFI_ADJUST_CFA_OFFSET 4 |
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index ca3347a9dab5..3563107b5060 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h | |||
@@ -171,10 +171,11 @@ do { \ | |||
171 | static inline void elf_common_init(struct thread_struct *t, | 171 | static inline void elf_common_init(struct thread_struct *t, |
172 | struct pt_regs *regs, const u16 ds) | 172 | struct pt_regs *regs, const u16 ds) |
173 | { | 173 | { |
174 | regs->ax = regs->bx = regs->cx = regs->dx = 0; | 174 | /* Commented-out registers are cleared in stub_execve */ |
175 | regs->si = regs->di = regs->bp = 0; | 175 | /*regs->ax = regs->bx =*/ regs->cx = regs->dx = 0; |
176 | regs->si = regs->di /*= regs->bp*/ = 0; | ||
176 | regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0; | 177 | regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0; |
177 | regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; | 178 | /*regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;*/ |
178 | t->fs = t->gs = 0; | 179 | t->fs = t->gs = 0; |
179 | t->fsindex = t->gsindex = 0; | 180 | t->fsindex = t->gsindex = 0; |
180 | t->ds = t->es = ds; | 181 | t->ds = t->es = ds; |
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 9662290e0b20..e9571ddabc4f 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -181,10 +181,9 @@ extern __visible void smp_call_function_single_interrupt(struct pt_regs *); | |||
181 | extern __visible void smp_invalidate_interrupt(struct pt_regs *); | 181 | extern __visible void smp_invalidate_interrupt(struct pt_regs *); |
182 | #endif | 182 | #endif |
183 | 183 | ||
184 | extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR | 184 | extern char irq_entries_start[]; |
185 | - FIRST_EXTERNAL_VECTOR])(void); | ||
186 | #ifdef CONFIG_TRACING | 185 | #ifdef CONFIG_TRACING |
187 | #define trace_interrupt interrupt | 186 | #define trace_irq_entries_start irq_entries_start |
188 | #endif | 187 | #endif |
189 | 188 | ||
190 | #define VECTOR_UNDEFINED (-1) | 189 | #define VECTOR_UNDEFINED (-1) |
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 47f29b1d1846..e7814b74caf8 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h | |||
@@ -69,7 +69,7 @@ struct insn { | |||
69 | const insn_byte_t *next_byte; | 69 | const insn_byte_t *next_byte; |
70 | }; | 70 | }; |
71 | 71 | ||
72 | #define MAX_INSN_SIZE 16 | 72 | #define MAX_INSN_SIZE 15 |
73 | 73 | ||
74 | #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) | 74 | #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) |
75 | #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) | 75 | #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) |
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 0a8b519226b8..b77f5edb03b0 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h | |||
@@ -136,10 +136,6 @@ static inline notrace unsigned long arch_local_irq_save(void) | |||
136 | #define USERGS_SYSRET32 \ | 136 | #define USERGS_SYSRET32 \ |
137 | swapgs; \ | 137 | swapgs; \ |
138 | sysretl | 138 | sysretl |
139 | #define ENABLE_INTERRUPTS_SYSEXIT32 \ | ||
140 | swapgs; \ | ||
141 | sti; \ | ||
142 | sysexit | ||
143 | 139 | ||
144 | #else | 140 | #else |
145 | #define INTERRUPT_RETURN iret | 141 | #define INTERRUPT_RETURN iret |
@@ -163,22 +159,27 @@ static inline int arch_irqs_disabled(void) | |||
163 | 159 | ||
164 | return arch_irqs_disabled_flags(flags); | 160 | return arch_irqs_disabled_flags(flags); |
165 | } | 161 | } |
162 | #endif /* !__ASSEMBLY__ */ | ||
166 | 163 | ||
164 | #ifdef __ASSEMBLY__ | ||
165 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
166 | # define TRACE_IRQS_ON call trace_hardirqs_on_thunk; | ||
167 | # define TRACE_IRQS_OFF call trace_hardirqs_off_thunk; | ||
167 | #else | 168 | #else |
168 | 169 | # define TRACE_IRQS_ON | |
169 | #ifdef CONFIG_X86_64 | 170 | # define TRACE_IRQS_OFF |
170 | #define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk | 171 | #endif |
171 | #define ARCH_LOCKDEP_SYS_EXIT_IRQ \ | 172 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
173 | # ifdef CONFIG_X86_64 | ||
174 | # define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk | ||
175 | # define LOCKDEP_SYS_EXIT_IRQ \ | ||
172 | TRACE_IRQS_ON; \ | 176 | TRACE_IRQS_ON; \ |
173 | sti; \ | 177 | sti; \ |
174 | SAVE_REST; \ | 178 | call lockdep_sys_exit_thunk; \ |
175 | LOCKDEP_SYS_EXIT; \ | ||
176 | RESTORE_REST; \ | ||
177 | cli; \ | 179 | cli; \ |
178 | TRACE_IRQS_OFF; | 180 | TRACE_IRQS_OFF; |
179 | 181 | # else | |
180 | #else | 182 | # define LOCKDEP_SYS_EXIT \ |
181 | #define ARCH_LOCKDEP_SYS_EXIT \ | ||
182 | pushl %eax; \ | 183 | pushl %eax; \ |
183 | pushl %ecx; \ | 184 | pushl %ecx; \ |
184 | pushl %edx; \ | 185 | pushl %edx; \ |
@@ -186,24 +187,12 @@ static inline int arch_irqs_disabled(void) | |||
186 | popl %edx; \ | 187 | popl %edx; \ |
187 | popl %ecx; \ | 188 | popl %ecx; \ |
188 | popl %eax; | 189 | popl %eax; |
189 | 190 | # define LOCKDEP_SYS_EXIT_IRQ | |
190 | #define ARCH_LOCKDEP_SYS_EXIT_IRQ | 191 | # endif |
191 | #endif | ||
192 | |||
193 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
194 | # define TRACE_IRQS_ON call trace_hardirqs_on_thunk; | ||
195 | # define TRACE_IRQS_OFF call trace_hardirqs_off_thunk; | ||
196 | #else | 192 | #else |
197 | # define TRACE_IRQS_ON | ||
198 | # define TRACE_IRQS_OFF | ||
199 | #endif | ||
200 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
201 | # define LOCKDEP_SYS_EXIT ARCH_LOCKDEP_SYS_EXIT | ||
202 | # define LOCKDEP_SYS_EXIT_IRQ ARCH_LOCKDEP_SYS_EXIT_IRQ | ||
203 | # else | ||
204 | # define LOCKDEP_SYS_EXIT | 193 | # define LOCKDEP_SYS_EXIT |
205 | # define LOCKDEP_SYS_EXIT_IRQ | 194 | # define LOCKDEP_SYS_EXIT_IRQ |
206 | # endif | 195 | #endif |
207 | |||
208 | #endif /* __ASSEMBLY__ */ | 196 | #endif /* __ASSEMBLY__ */ |
197 | |||
209 | #endif | 198 | #endif |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 965c47d254aa..5f6051d5d139 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -976,11 +976,6 @@ extern void default_banner(void); | |||
976 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ | 976 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ |
977 | CLBR_NONE, \ | 977 | CLBR_NONE, \ |
978 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) | 978 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) |
979 | |||
980 | #define ENABLE_INTERRUPTS_SYSEXIT32 \ | ||
981 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ | ||
982 | CLBR_NONE, \ | ||
983 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) | ||
984 | #endif /* CONFIG_X86_32 */ | 979 | #endif /* CONFIG_X86_32 */ |
985 | 980 | ||
986 | #endif /* __ASSEMBLY__ */ | 981 | #endif /* __ASSEMBLY__ */ |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index ec1c93588cef..d2203b5d9538 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -210,8 +210,23 @@ struct x86_hw_tss { | |||
210 | unsigned long sp0; | 210 | unsigned long sp0; |
211 | unsigned short ss0, __ss0h; | 211 | unsigned short ss0, __ss0h; |
212 | unsigned long sp1; | 212 | unsigned long sp1; |
213 | /* ss1 caches MSR_IA32_SYSENTER_CS: */ | 213 | |
214 | unsigned short ss1, __ss1h; | 214 | /* |
215 | * We don't use ring 1, so ss1 is a convenient scratch space in | ||
216 | * the same cacheline as sp0. We use ss1 to cache the value in | ||
217 | * MSR_IA32_SYSENTER_CS. When we context switch | ||
218 | * MSR_IA32_SYSENTER_CS, we first check if the new value being | ||
219 | * written matches ss1, and, if it's not, then we wrmsr the new | ||
220 | * value and update ss1. | ||
221 | * | ||
222 | * The only reason we context switch MSR_IA32_SYSENTER_CS is | ||
223 | * that we set it to zero in vm86 tasks to avoid corrupting the | ||
224 | * stack if we were to go through the sysenter path from vm86 | ||
225 | * mode. | ||
226 | */ | ||
227 | unsigned short ss1; /* MSR_IA32_SYSENTER_CS */ | ||
228 | |||
229 | unsigned short __ss1h; | ||
215 | unsigned long sp2; | 230 | unsigned long sp2; |
216 | unsigned short ss2, __ss2h; | 231 | unsigned short ss2, __ss2h; |
217 | unsigned long __cr3; | 232 | unsigned long __cr3; |
@@ -276,13 +291,17 @@ struct tss_struct { | |||
276 | unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; | 291 | unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; |
277 | 292 | ||
278 | /* | 293 | /* |
279 | * .. and then another 0x100 bytes for the emergency kernel stack: | 294 | * Space for the temporary SYSENTER stack: |
280 | */ | 295 | */ |
281 | unsigned long stack[64]; | 296 | unsigned long SYSENTER_stack[64]; |
282 | 297 | ||
283 | } ____cacheline_aligned; | 298 | } ____cacheline_aligned; |
284 | 299 | ||
285 | DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss); | 300 | DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); |
301 | |||
302 | #ifdef CONFIG_X86_32 | ||
303 | DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); | ||
304 | #endif | ||
286 | 305 | ||
287 | /* | 306 | /* |
288 | * Save the original ist values for checking stack pointers during debugging | 307 | * Save the original ist values for checking stack pointers during debugging |
@@ -474,7 +493,6 @@ struct thread_struct { | |||
474 | #ifdef CONFIG_X86_32 | 493 | #ifdef CONFIG_X86_32 |
475 | unsigned long sysenter_cs; | 494 | unsigned long sysenter_cs; |
476 | #else | 495 | #else |
477 | unsigned long usersp; /* Copy from PDA */ | ||
478 | unsigned short es; | 496 | unsigned short es; |
479 | unsigned short ds; | 497 | unsigned short ds; |
480 | unsigned short fsindex; | 498 | unsigned short fsindex; |
@@ -564,6 +582,16 @@ static inline void native_swapgs(void) | |||
564 | #endif | 582 | #endif |
565 | } | 583 | } |
566 | 584 | ||
585 | static inline unsigned long current_top_of_stack(void) | ||
586 | { | ||
587 | #ifdef CONFIG_X86_64 | ||
588 | return this_cpu_read_stable(cpu_tss.x86_tss.sp0); | ||
589 | #else | ||
590 | /* sp0 on x86_32 is special in and around vm86 mode. */ | ||
591 | return this_cpu_read_stable(cpu_current_top_of_stack); | ||
592 | #endif | ||
593 | } | ||
594 | |||
567 | #ifdef CONFIG_PARAVIRT | 595 | #ifdef CONFIG_PARAVIRT |
568 | #include <asm/paravirt.h> | 596 | #include <asm/paravirt.h> |
569 | #else | 597 | #else |
@@ -761,10 +789,10 @@ extern char ignore_fpu_irq; | |||
761 | #define ARCH_HAS_SPINLOCK_PREFETCH | 789 | #define ARCH_HAS_SPINLOCK_PREFETCH |
762 | 790 | ||
763 | #ifdef CONFIG_X86_32 | 791 | #ifdef CONFIG_X86_32 |
764 | # define BASE_PREFETCH ASM_NOP4 | 792 | # define BASE_PREFETCH "" |
765 | # define ARCH_HAS_PREFETCH | 793 | # define ARCH_HAS_PREFETCH |
766 | #else | 794 | #else |
767 | # define BASE_PREFETCH "prefetcht0 (%1)" | 795 | # define BASE_PREFETCH "prefetcht0 %P1" |
768 | #endif | 796 | #endif |
769 | 797 | ||
770 | /* | 798 | /* |
@@ -775,10 +803,9 @@ extern char ignore_fpu_irq; | |||
775 | */ | 803 | */ |
776 | static inline void prefetch(const void *x) | 804 | static inline void prefetch(const void *x) |
777 | { | 805 | { |
778 | alternative_input(BASE_PREFETCH, | 806 | alternative_input(BASE_PREFETCH, "prefetchnta %P1", |
779 | "prefetchnta (%1)", | ||
780 | X86_FEATURE_XMM, | 807 | X86_FEATURE_XMM, |
781 | "r" (x)); | 808 | "m" (*(const char *)x)); |
782 | } | 809 | } |
783 | 810 | ||
784 | /* | 811 | /* |
@@ -788,10 +815,9 @@ static inline void prefetch(const void *x) | |||
788 | */ | 815 | */ |
789 | static inline void prefetchw(const void *x) | 816 | static inline void prefetchw(const void *x) |
790 | { | 817 | { |
791 | alternative_input(BASE_PREFETCH, | 818 | alternative_input(BASE_PREFETCH, "prefetchw %P1", |
792 | "prefetchw (%1)", | 819 | X86_FEATURE_3DNOWPREFETCH, |
793 | X86_FEATURE_3DNOW, | 820 | "m" (*(const char *)x)); |
794 | "r" (x)); | ||
795 | } | 821 | } |
796 | 822 | ||
797 | static inline void spin_lock_prefetch(const void *x) | 823 | static inline void spin_lock_prefetch(const void *x) |
@@ -799,6 +825,9 @@ static inline void spin_lock_prefetch(const void *x) | |||
799 | prefetchw(x); | 825 | prefetchw(x); |
800 | } | 826 | } |
801 | 827 | ||
828 | #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \ | ||
829 | TOP_OF_KERNEL_STACK_PADDING) | ||
830 | |||
802 | #ifdef CONFIG_X86_32 | 831 | #ifdef CONFIG_X86_32 |
803 | /* | 832 | /* |
804 | * User space process size: 3GB (default). | 833 | * User space process size: 3GB (default). |
@@ -809,39 +838,16 @@ static inline void spin_lock_prefetch(const void *x) | |||
809 | #define STACK_TOP_MAX STACK_TOP | 838 | #define STACK_TOP_MAX STACK_TOP |
810 | 839 | ||
811 | #define INIT_THREAD { \ | 840 | #define INIT_THREAD { \ |
812 | .sp0 = sizeof(init_stack) + (long)&init_stack, \ | 841 | .sp0 = TOP_OF_INIT_STACK, \ |
813 | .vm86_info = NULL, \ | 842 | .vm86_info = NULL, \ |
814 | .sysenter_cs = __KERNEL_CS, \ | 843 | .sysenter_cs = __KERNEL_CS, \ |
815 | .io_bitmap_ptr = NULL, \ | 844 | .io_bitmap_ptr = NULL, \ |
816 | } | 845 | } |
817 | 846 | ||
818 | /* | ||
819 | * Note that the .io_bitmap member must be extra-big. This is because | ||
820 | * the CPU will access an additional byte beyond the end of the IO | ||
821 | * permission bitmap. The extra byte must be all 1 bits, and must | ||
822 | * be within the limit. | ||
823 | */ | ||
824 | #define INIT_TSS { \ | ||
825 | .x86_tss = { \ | ||
826 | .sp0 = sizeof(init_stack) + (long)&init_stack, \ | ||
827 | .ss0 = __KERNEL_DS, \ | ||
828 | .ss1 = __KERNEL_CS, \ | ||
829 | .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ | ||
830 | }, \ | ||
831 | .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, \ | ||
832 | } | ||
833 | |||
834 | extern unsigned long thread_saved_pc(struct task_struct *tsk); | 847 | extern unsigned long thread_saved_pc(struct task_struct *tsk); |
835 | 848 | ||
836 | #define THREAD_SIZE_LONGS (THREAD_SIZE/sizeof(unsigned long)) | ||
837 | #define KSTK_TOP(info) \ | ||
838 | ({ \ | ||
839 | unsigned long *__ptr = (unsigned long *)(info); \ | ||
840 | (unsigned long)(&__ptr[THREAD_SIZE_LONGS]); \ | ||
841 | }) | ||
842 | |||
843 | /* | 849 | /* |
844 | * The below -8 is to reserve 8 bytes on top of the ring0 stack. | 850 | * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack. |
845 | * This is necessary to guarantee that the entire "struct pt_regs" | 851 | * This is necessary to guarantee that the entire "struct pt_regs" |
846 | * is accessible even if the CPU haven't stored the SS/ESP registers | 852 | * is accessible even if the CPU haven't stored the SS/ESP registers |
847 | * on the stack (interrupt gate does not save these registers | 853 | * on the stack (interrupt gate does not save these registers |
@@ -850,11 +856,11 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); | |||
850 | * "struct pt_regs" is possible, but they may contain the | 856 | * "struct pt_regs" is possible, but they may contain the |
851 | * completely wrong values. | 857 | * completely wrong values. |
852 | */ | 858 | */ |
853 | #define task_pt_regs(task) \ | 859 | #define task_pt_regs(task) \ |
854 | ({ \ | 860 | ({ \ |
855 | struct pt_regs *__regs__; \ | 861 | unsigned long __ptr = (unsigned long)task_stack_page(task); \ |
856 | __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \ | 862 | __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \ |
857 | __regs__ - 1; \ | 863 | ((struct pt_regs *)__ptr) - 1; \ |
858 | }) | 864 | }) |
859 | 865 | ||
860 | #define KSTK_ESP(task) (task_pt_regs(task)->sp) | 866 | #define KSTK_ESP(task) (task_pt_regs(task)->sp) |
@@ -886,11 +892,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); | |||
886 | #define STACK_TOP_MAX TASK_SIZE_MAX | 892 | #define STACK_TOP_MAX TASK_SIZE_MAX |
887 | 893 | ||
888 | #define INIT_THREAD { \ | 894 | #define INIT_THREAD { \ |
889 | .sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ | 895 | .sp0 = TOP_OF_INIT_STACK \ |
890 | } | ||
891 | |||
892 | #define INIT_TSS { \ | ||
893 | .x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ | ||
894 | } | 896 | } |
895 | 897 | ||
896 | /* | 898 | /* |
@@ -902,11 +904,6 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); | |||
902 | #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) | 904 | #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) |
903 | extern unsigned long KSTK_ESP(struct task_struct *task); | 905 | extern unsigned long KSTK_ESP(struct task_struct *task); |
904 | 906 | ||
905 | /* | ||
906 | * User space RSP while inside the SYSCALL fast path | ||
907 | */ | ||
908 | DECLARE_PER_CPU(unsigned long, old_rsp); | ||
909 | |||
910 | #endif /* CONFIG_X86_64 */ | 907 | #endif /* CONFIG_X86_64 */ |
911 | 908 | ||
912 | extern void start_thread(struct pt_regs *regs, unsigned long new_ip, | 909 | extern void start_thread(struct pt_regs *regs, unsigned long new_ip, |
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 86fc2bb82287..19507ffa5d28 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
@@ -31,13 +31,17 @@ struct pt_regs { | |||
31 | #else /* __i386__ */ | 31 | #else /* __i386__ */ |
32 | 32 | ||
33 | struct pt_regs { | 33 | struct pt_regs { |
34 | /* | ||
35 | * C ABI says these regs are callee-preserved. They aren't saved on kernel entry | ||
36 | * unless syscall needs a complete, fully filled "struct pt_regs". | ||
37 | */ | ||
34 | unsigned long r15; | 38 | unsigned long r15; |
35 | unsigned long r14; | 39 | unsigned long r14; |
36 | unsigned long r13; | 40 | unsigned long r13; |
37 | unsigned long r12; | 41 | unsigned long r12; |
38 | unsigned long bp; | 42 | unsigned long bp; |
39 | unsigned long bx; | 43 | unsigned long bx; |
40 | /* arguments: non interrupts/non tracing syscalls only save up to here*/ | 44 | /* These regs are callee-clobbered. Always saved on kernel entry. */ |
41 | unsigned long r11; | 45 | unsigned long r11; |
42 | unsigned long r10; | 46 | unsigned long r10; |
43 | unsigned long r9; | 47 | unsigned long r9; |
@@ -47,9 +51,12 @@ struct pt_regs { | |||
47 | unsigned long dx; | 51 | unsigned long dx; |
48 | unsigned long si; | 52 | unsigned long si; |
49 | unsigned long di; | 53 | unsigned long di; |
54 | /* | ||
55 | * On syscall entry, this is syscall#. On CPU exception, this is error code. | ||
56 | * On hw interrupt, it's IRQ number: | ||
57 | */ | ||
50 | unsigned long orig_ax; | 58 | unsigned long orig_ax; |
51 | /* end of arguments */ | 59 | /* Return frame for iretq */ |
52 | /* cpu exception frame or undefined */ | ||
53 | unsigned long ip; | 60 | unsigned long ip; |
54 | unsigned long cs; | 61 | unsigned long cs; |
55 | unsigned long flags; | 62 | unsigned long flags; |
@@ -89,11 +96,13 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) | |||
89 | } | 96 | } |
90 | 97 | ||
91 | /* | 98 | /* |
92 | * user_mode_vm(regs) determines whether a register set came from user mode. | 99 | * user_mode(regs) determines whether a register set came from user |
93 | * This is true if V8086 mode was enabled OR if the register set was from | 100 | * mode. On x86_32, this is true if V8086 mode was enabled OR if the |
94 | * protected mode with RPL-3 CS value. This tricky test checks that with | 101 | * register set was from protected mode with RPL-3 CS value. This |
95 | * one comparison. Many places in the kernel can bypass this full check | 102 | * tricky test checks that with one comparison. |
96 | * if they have already ruled out V8086 mode, so user_mode(regs) can be used. | 103 | * |
104 | * On x86_64, vm86 mode is mercifully nonexistent, and we don't need | ||
105 | * the extra check. | ||
97 | */ | 106 | */ |
98 | static inline int user_mode(struct pt_regs *regs) | 107 | static inline int user_mode(struct pt_regs *regs) |
99 | { | 108 | { |
@@ -104,16 +113,6 @@ static inline int user_mode(struct pt_regs *regs) | |||
104 | #endif | 113 | #endif |
105 | } | 114 | } |
106 | 115 | ||
107 | static inline int user_mode_vm(struct pt_regs *regs) | ||
108 | { | ||
109 | #ifdef CONFIG_X86_32 | ||
110 | return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >= | ||
111 | USER_RPL; | ||
112 | #else | ||
113 | return user_mode(regs); | ||
114 | #endif | ||
115 | } | ||
116 | |||
117 | static inline int v8086_mode(struct pt_regs *regs) | 116 | static inline int v8086_mode(struct pt_regs *regs) |
118 | { | 117 | { |
119 | #ifdef CONFIG_X86_32 | 118 | #ifdef CONFIG_X86_32 |
@@ -138,12 +137,8 @@ static inline bool user_64bit_mode(struct pt_regs *regs) | |||
138 | #endif | 137 | #endif |
139 | } | 138 | } |
140 | 139 | ||
141 | #define current_user_stack_pointer() this_cpu_read(old_rsp) | 140 | #define current_user_stack_pointer() current_pt_regs()->sp |
142 | /* ia32 vs. x32 difference */ | 141 | #define compat_user_stack_pointer() current_pt_regs()->sp |
143 | #define compat_user_stack_pointer() \ | ||
144 | (test_thread_flag(TIF_IA32) \ | ||
145 | ? current_pt_regs()->sp \ | ||
146 | : this_cpu_read(old_rsp)) | ||
147 | #endif | 142 | #endif |
148 | 143 | ||
149 | #ifdef CONFIG_X86_32 | 144 | #ifdef CONFIG_X86_32 |
@@ -248,7 +243,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, | |||
248 | */ | 243 | */ |
249 | #define arch_ptrace_stop_needed(code, info) \ | 244 | #define arch_ptrace_stop_needed(code, info) \ |
250 | ({ \ | 245 | ({ \ |
251 | set_thread_flag(TIF_NOTIFY_RESUME); \ | 246 | force_iret(); \ |
252 | false; \ | 247 | false; \ |
253 | }) | 248 | }) |
254 | 249 | ||
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index db257a58571f..5a9856eb12ba 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h | |||
@@ -3,8 +3,10 @@ | |||
3 | 3 | ||
4 | #include <linux/const.h> | 4 | #include <linux/const.h> |
5 | 5 | ||
6 | /* Constructor for a conventional segment GDT (or LDT) entry */ | 6 | /* |
7 | /* This is a macro so it can be used in initializers */ | 7 | * Constructor for a conventional segment GDT (or LDT) entry. |
8 | * This is a macro so it can be used in initializers. | ||
9 | */ | ||
8 | #define GDT_ENTRY(flags, base, limit) \ | 10 | #define GDT_ENTRY(flags, base, limit) \ |
9 | ((((base) & _AC(0xff000000,ULL)) << (56-24)) | \ | 11 | ((((base) & _AC(0xff000000,ULL)) << (56-24)) | \ |
10 | (((flags) & _AC(0x0000f0ff,ULL)) << 40) | \ | 12 | (((flags) & _AC(0x0000f0ff,ULL)) << 40) | \ |
@@ -12,198 +14,228 @@ | |||
12 | (((base) & _AC(0x00ffffff,ULL)) << 16) | \ | 14 | (((base) & _AC(0x00ffffff,ULL)) << 16) | \ |
13 | (((limit) & _AC(0x0000ffff,ULL)))) | 15 | (((limit) & _AC(0x0000ffff,ULL)))) |
14 | 16 | ||
15 | /* Simple and small GDT entries for booting only */ | 17 | /* Simple and small GDT entries for booting only: */ |
16 | 18 | ||
17 | #define GDT_ENTRY_BOOT_CS 2 | 19 | #define GDT_ENTRY_BOOT_CS 2 |
18 | #define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8) | 20 | #define GDT_ENTRY_BOOT_DS 3 |
21 | #define GDT_ENTRY_BOOT_TSS 4 | ||
22 | #define __BOOT_CS (GDT_ENTRY_BOOT_CS*8) | ||
23 | #define __BOOT_DS (GDT_ENTRY_BOOT_DS*8) | ||
24 | #define __BOOT_TSS (GDT_ENTRY_BOOT_TSS*8) | ||
25 | |||
26 | /* | ||
27 | * Bottom two bits of selector give the ring | ||
28 | * privilege level | ||
29 | */ | ||
30 | #define SEGMENT_RPL_MASK 0x3 | ||
19 | 31 | ||
20 | #define GDT_ENTRY_BOOT_DS (GDT_ENTRY_BOOT_CS + 1) | 32 | /* User mode is privilege level 3: */ |
21 | #define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8) | 33 | #define USER_RPL 0x3 |
22 | 34 | ||
23 | #define GDT_ENTRY_BOOT_TSS (GDT_ENTRY_BOOT_CS + 2) | 35 | /* Bit 2 is Table Indicator (TI): selects between LDT or GDT */ |
24 | #define __BOOT_TSS (GDT_ENTRY_BOOT_TSS * 8) | 36 | #define SEGMENT_TI_MASK 0x4 |
37 | /* LDT segment has TI set ... */ | ||
38 | #define SEGMENT_LDT 0x4 | ||
39 | /* ... GDT has it cleared */ | ||
40 | #define SEGMENT_GDT 0x0 | ||
25 | 41 | ||
26 | #define SEGMENT_RPL_MASK 0x3 /* | 42 | #define GDT_ENTRY_INVALID_SEG 0 |
27 | * Bottom two bits of selector give the ring | ||
28 | * privilege level | ||
29 | */ | ||
30 | #define SEGMENT_TI_MASK 0x4 /* Bit 2 is table indicator (LDT/GDT) */ | ||
31 | #define USER_RPL 0x3 /* User mode is privilege level 3 */ | ||
32 | #define SEGMENT_LDT 0x4 /* LDT segment has TI set... */ | ||
33 | #define SEGMENT_GDT 0x0 /* ... GDT has it cleared */ | ||
34 | 43 | ||
35 | #ifdef CONFIG_X86_32 | 44 | #ifdef CONFIG_X86_32 |
36 | /* | 45 | /* |
37 | * The layout of the per-CPU GDT under Linux: | 46 | * The layout of the per-CPU GDT under Linux: |
38 | * | 47 | * |
39 | * 0 - null | 48 | * 0 - null <=== cacheline #1 |
40 | * 1 - reserved | 49 | * 1 - reserved |
41 | * 2 - reserved | 50 | * 2 - reserved |
42 | * 3 - reserved | 51 | * 3 - reserved |
43 | * | 52 | * |
44 | * 4 - unused <==== new cacheline | 53 | * 4 - unused <=== cacheline #2 |
45 | * 5 - unused | 54 | * 5 - unused |
46 | * | 55 | * |
47 | * ------- start of TLS (Thread-Local Storage) segments: | 56 | * ------- start of TLS (Thread-Local Storage) segments: |
48 | * | 57 | * |
49 | * 6 - TLS segment #1 [ glibc's TLS segment ] | 58 | * 6 - TLS segment #1 [ glibc's TLS segment ] |
50 | * 7 - TLS segment #2 [ Wine's %fs Win32 segment ] | 59 | * 7 - TLS segment #2 [ Wine's %fs Win32 segment ] |
51 | * 8 - TLS segment #3 | 60 | * 8 - TLS segment #3 <=== cacheline #3 |
52 | * 9 - reserved | 61 | * 9 - reserved |
53 | * 10 - reserved | 62 | * 10 - reserved |
54 | * 11 - reserved | 63 | * 11 - reserved |
55 | * | 64 | * |
56 | * ------- start of kernel segments: | 65 | * ------- start of kernel segments: |
57 | * | 66 | * |
58 | * 12 - kernel code segment <==== new cacheline | 67 | * 12 - kernel code segment <=== cacheline #4 |
59 | * 13 - kernel data segment | 68 | * 13 - kernel data segment |
60 | * 14 - default user CS | 69 | * 14 - default user CS |
61 | * 15 - default user DS | 70 | * 15 - default user DS |
62 | * 16 - TSS | 71 | * 16 - TSS <=== cacheline #5 |
63 | * 17 - LDT | 72 | * 17 - LDT |
64 | * 18 - PNPBIOS support (16->32 gate) | 73 | * 18 - PNPBIOS support (16->32 gate) |
65 | * 19 - PNPBIOS support | 74 | * 19 - PNPBIOS support |
66 | * 20 - PNPBIOS support | 75 | * 20 - PNPBIOS support <=== cacheline #6 |
67 | * 21 - PNPBIOS support | 76 | * 21 - PNPBIOS support |
68 | * 22 - PNPBIOS support | 77 | * 22 - PNPBIOS support |
69 | * 23 - APM BIOS support | 78 | * 23 - APM BIOS support |
70 | * 24 - APM BIOS support | 79 | * 24 - APM BIOS support <=== cacheline #7 |
71 | * 25 - APM BIOS support | 80 | * 25 - APM BIOS support |
72 | * | 81 | * |
73 | * 26 - ESPFIX small SS | 82 | * 26 - ESPFIX small SS |
74 | * 27 - per-cpu [ offset to per-cpu data area ] | 83 | * 27 - per-cpu [ offset to per-cpu data area ] |
75 | * 28 - stack_canary-20 [ for stack protector ] | 84 | * 28 - stack_canary-20 [ for stack protector ] <=== cacheline #8 |
76 | * 29 - unused | 85 | * 29 - unused |
77 | * 30 - unused | 86 | * 30 - unused |
78 | * 31 - TSS for double fault handler | 87 | * 31 - TSS for double fault handler |
79 | */ | 88 | */ |
80 | #define GDT_ENTRY_TLS_MIN 6 | 89 | #define GDT_ENTRY_TLS_MIN 6 |
81 | #define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) | 90 | #define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) |
82 | 91 | ||
92 | #define GDT_ENTRY_KERNEL_CS 12 | ||
93 | #define GDT_ENTRY_KERNEL_DS 13 | ||
83 | #define GDT_ENTRY_DEFAULT_USER_CS 14 | 94 | #define GDT_ENTRY_DEFAULT_USER_CS 14 |
84 | |||
85 | #define GDT_ENTRY_DEFAULT_USER_DS 15 | 95 | #define GDT_ENTRY_DEFAULT_USER_DS 15 |
96 | #define GDT_ENTRY_TSS 16 | ||
97 | #define GDT_ENTRY_LDT 17 | ||
98 | #define GDT_ENTRY_PNPBIOS_CS32 18 | ||
99 | #define GDT_ENTRY_PNPBIOS_CS16 19 | ||
100 | #define GDT_ENTRY_PNPBIOS_DS 20 | ||
101 | #define GDT_ENTRY_PNPBIOS_TS1 21 | ||
102 | #define GDT_ENTRY_PNPBIOS_TS2 22 | ||
103 | #define GDT_ENTRY_APMBIOS_BASE 23 | ||
104 | |||
105 | #define GDT_ENTRY_ESPFIX_SS 26 | ||
106 | #define GDT_ENTRY_PERCPU 27 | ||
107 | #define GDT_ENTRY_STACK_CANARY 28 | ||
108 | |||
109 | #define GDT_ENTRY_DOUBLEFAULT_TSS 31 | ||
86 | 110 | ||
87 | #define GDT_ENTRY_KERNEL_BASE (12) | 111 | /* |
112 | * Number of entries in the GDT table: | ||
113 | */ | ||
114 | #define GDT_ENTRIES 32 | ||
88 | 115 | ||
89 | #define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE+0) | 116 | /* |
117 | * Segment selector values corresponding to the above entries: | ||
118 | */ | ||
90 | 119 | ||
91 | #define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE+1) | 120 | #define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8) |
121 | #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8) | ||
122 | #define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3) | ||
123 | #define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3) | ||
124 | #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8) | ||
92 | 125 | ||
93 | #define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE+4) | 126 | /* segment for calling fn: */ |
94 | #define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE+5) | 127 | #define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32*8) |
128 | /* code segment for BIOS: */ | ||
129 | #define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16*8) | ||
95 | 130 | ||
96 | #define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE+6) | 131 | /* "Is this PNP code selector (PNP_CS32 or PNP_CS16)?" */ |
97 | #define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE+11) | 132 | #define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == PNP_CS32) |
98 | 133 | ||
99 | #define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE+14) | 134 | /* data segment for BIOS: */ |
100 | #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8) | 135 | #define PNP_DS (GDT_ENTRY_PNPBIOS_DS*8) |
136 | /* transfer data segment: */ | ||
137 | #define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1*8) | ||
138 | /* another data segment: */ | ||
139 | #define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2*8) | ||
101 | 140 | ||
102 | #define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE+15) | ||
103 | #ifdef CONFIG_SMP | 141 | #ifdef CONFIG_SMP |
104 | #define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8) | 142 | # define __KERNEL_PERCPU (GDT_ENTRY_PERCPU*8) |
105 | #else | 143 | #else |
106 | #define __KERNEL_PERCPU 0 | 144 | # define __KERNEL_PERCPU 0 |
107 | #endif | 145 | #endif |
108 | 146 | ||
109 | #define GDT_ENTRY_STACK_CANARY (GDT_ENTRY_KERNEL_BASE+16) | ||
110 | #ifdef CONFIG_CC_STACKPROTECTOR | 147 | #ifdef CONFIG_CC_STACKPROTECTOR |
111 | #define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8) | 148 | # define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8) |
112 | #else | 149 | #else |
113 | #define __KERNEL_STACK_CANARY 0 | 150 | # define __KERNEL_STACK_CANARY 0 |
114 | #endif | 151 | #endif |
115 | 152 | ||
116 | #define GDT_ENTRY_DOUBLEFAULT_TSS 31 | 153 | #else /* 64-bit: */ |
117 | |||
118 | /* | ||
119 | * The GDT has 32 entries | ||
120 | */ | ||
121 | #define GDT_ENTRIES 32 | ||
122 | 154 | ||
123 | /* The PnP BIOS entries in the GDT */ | 155 | #include <asm/cache.h> |
124 | #define GDT_ENTRY_PNPBIOS_CS32 (GDT_ENTRY_PNPBIOS_BASE + 0) | ||
125 | #define GDT_ENTRY_PNPBIOS_CS16 (GDT_ENTRY_PNPBIOS_BASE + 1) | ||
126 | #define GDT_ENTRY_PNPBIOS_DS (GDT_ENTRY_PNPBIOS_BASE + 2) | ||
127 | #define GDT_ENTRY_PNPBIOS_TS1 (GDT_ENTRY_PNPBIOS_BASE + 3) | ||
128 | #define GDT_ENTRY_PNPBIOS_TS2 (GDT_ENTRY_PNPBIOS_BASE + 4) | ||
129 | |||
130 | /* The PnP BIOS selectors */ | ||
131 | #define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32 * 8) /* segment for calling fn */ | ||
132 | #define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16 * 8) /* code segment for BIOS */ | ||
133 | #define PNP_DS (GDT_ENTRY_PNPBIOS_DS * 8) /* data segment for BIOS */ | ||
134 | #define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */ | ||
135 | #define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */ | ||
136 | 156 | ||
157 | #define GDT_ENTRY_KERNEL32_CS 1 | ||
158 | #define GDT_ENTRY_KERNEL_CS 2 | ||
159 | #define GDT_ENTRY_KERNEL_DS 3 | ||
137 | 160 | ||
138 | /* | 161 | /* |
139 | * Matching rules for certain types of segments. | 162 | * We cannot use the same code segment descriptor for user and kernel mode, |
163 | * not even in long flat mode, because of different DPL. | ||
164 | * | ||
165 | * GDT layout to get 64-bit SYSCALL/SYSRET support right. SYSRET hardcodes | ||
166 | * selectors: | ||
167 | * | ||
168 | * if returning to 32-bit userspace: cs = STAR.SYSRET_CS, | ||
169 | * if returning to 64-bit userspace: cs = STAR.SYSRET_CS+16, | ||
170 | * | ||
171 | * ss = STAR.SYSRET_CS+8 (in either case) | ||
172 | * | ||
173 | * thus USER_DS should be between 32-bit and 64-bit code selectors: | ||
140 | */ | 174 | */ |
175 | #define GDT_ENTRY_DEFAULT_USER32_CS 4 | ||
176 | #define GDT_ENTRY_DEFAULT_USER_DS 5 | ||
177 | #define GDT_ENTRY_DEFAULT_USER_CS 6 | ||
141 | 178 | ||
142 | /* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ | 179 | /* Needs two entries */ |
143 | #define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) | 180 | #define GDT_ENTRY_TSS 8 |
144 | 181 | /* Needs two entries */ | |
182 | #define GDT_ENTRY_LDT 10 | ||
145 | 183 | ||
146 | #else | 184 | #define GDT_ENTRY_TLS_MIN 12 |
147 | #include <asm/cache.h> | 185 | #define GDT_ENTRY_TLS_MAX 14 |
148 | |||
149 | #define GDT_ENTRY_KERNEL32_CS 1 | ||
150 | #define GDT_ENTRY_KERNEL_CS 2 | ||
151 | #define GDT_ENTRY_KERNEL_DS 3 | ||
152 | 186 | ||
153 | #define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS * 8) | 187 | /* Abused to load per CPU data from limit */ |
188 | #define GDT_ENTRY_PER_CPU 15 | ||
154 | 189 | ||
155 | /* | 190 | /* |
156 | * we cannot use the same code segment descriptor for user and kernel | 191 | * Number of entries in the GDT table: |
157 | * -- not even in the long flat mode, because of different DPL /kkeil | ||
158 | * The segment offset needs to contain a RPL. Grr. -AK | ||
159 | * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets) | ||
160 | */ | 192 | */ |
161 | #define GDT_ENTRY_DEFAULT_USER32_CS 4 | 193 | #define GDT_ENTRIES 16 |
162 | #define GDT_ENTRY_DEFAULT_USER_DS 5 | ||
163 | #define GDT_ENTRY_DEFAULT_USER_CS 6 | ||
164 | #define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8+3) | ||
165 | #define __USER32_DS __USER_DS | ||
166 | |||
167 | #define GDT_ENTRY_TSS 8 /* needs two entries */ | ||
168 | #define GDT_ENTRY_LDT 10 /* needs two entries */ | ||
169 | #define GDT_ENTRY_TLS_MIN 12 | ||
170 | #define GDT_ENTRY_TLS_MAX 14 | ||
171 | |||
172 | #define GDT_ENTRY_PER_CPU 15 /* Abused to load per CPU data from limit */ | ||
173 | #define __PER_CPU_SEG (GDT_ENTRY_PER_CPU * 8 + 3) | ||
174 | 194 | ||
175 | /* TLS indexes for 64bit - hardcoded in arch_prctl */ | 195 | /* |
176 | #define FS_TLS 0 | 196 | * Segment selector values corresponding to the above entries: |
177 | #define GS_TLS 1 | 197 | * |
178 | 198 | * Note, selectors also need to have a correct RPL, | |
179 | #define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3) | 199 | * expressed with the +3 value for user-space selectors: |
180 | #define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3) | 200 | */ |
181 | 201 | #define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS*8) | |
182 | #define GDT_ENTRIES 16 | 202 | #define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8) |
203 | #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8) | ||
204 | #define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8 + 3) | ||
205 | #define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3) | ||
206 | #define __USER32_DS __USER_DS | ||
207 | #define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3) | ||
208 | #define __PER_CPU_SEG (GDT_ENTRY_PER_CPU*8 + 3) | ||
209 | |||
210 | /* TLS indexes for 64-bit - hardcoded in arch_prctl(): */ | ||
211 | #define FS_TLS 0 | ||
212 | #define GS_TLS 1 | ||
213 | |||
214 | #define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3) | ||
215 | #define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3) | ||
183 | 216 | ||
184 | #endif | 217 | #endif |
185 | 218 | ||
186 | #define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8) | ||
187 | #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8) | ||
188 | #define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8+3) | ||
189 | #define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8+3) | ||
190 | #ifndef CONFIG_PARAVIRT | 219 | #ifndef CONFIG_PARAVIRT |
191 | #define get_kernel_rpl() 0 | 220 | # define get_kernel_rpl() 0 |
192 | #endif | 221 | #endif |
193 | 222 | ||
194 | #define IDT_ENTRIES 256 | 223 | #define IDT_ENTRIES 256 |
195 | #define NUM_EXCEPTION_VECTORS 32 | 224 | #define NUM_EXCEPTION_VECTORS 32 |
196 | /* Bitmask of exception vectors which push an error code on the stack */ | 225 | |
197 | #define EXCEPTION_ERRCODE_MASK 0x00027d00 | 226 | /* Bitmask of exception vectors which push an error code on the stack: */ |
198 | #define GDT_SIZE (GDT_ENTRIES * 8) | 227 | #define EXCEPTION_ERRCODE_MASK 0x00027d00 |
199 | #define GDT_ENTRY_TLS_ENTRIES 3 | 228 | |
200 | #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) | 229 | #define GDT_SIZE (GDT_ENTRIES*8) |
230 | #define GDT_ENTRY_TLS_ENTRIES 3 | ||
231 | #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8) | ||
201 | 232 | ||
202 | #ifdef __KERNEL__ | 233 | #ifdef __KERNEL__ |
203 | #ifndef __ASSEMBLY__ | 234 | #ifndef __ASSEMBLY__ |
235 | |||
204 | extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5]; | 236 | extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5]; |
205 | #ifdef CONFIG_TRACING | 237 | #ifdef CONFIG_TRACING |
206 | #define trace_early_idt_handlers early_idt_handlers | 238 | # define trace_early_idt_handlers early_idt_handlers |
207 | #endif | 239 | #endif |
208 | 240 | ||
209 | /* | 241 | /* |
@@ -228,37 +260,30 @@ do { \ | |||
228 | } while (0) | 260 | } while (0) |
229 | 261 | ||
230 | /* | 262 | /* |
231 | * Save a segment register away | 263 | * Save a segment register away: |
232 | */ | 264 | */ |
233 | #define savesegment(seg, value) \ | 265 | #define savesegment(seg, value) \ |
234 | asm("mov %%" #seg ",%0":"=r" (value) : : "memory") | 266 | asm("mov %%" #seg ",%0":"=r" (value) : : "memory") |
235 | 267 | ||
236 | /* | 268 | /* |
237 | * x86_32 user gs accessors. | 269 | * x86-32 user GS accessors: |
238 | */ | 270 | */ |
239 | #ifdef CONFIG_X86_32 | 271 | #ifdef CONFIG_X86_32 |
240 | #ifdef CONFIG_X86_32_LAZY_GS | 272 | # ifdef CONFIG_X86_32_LAZY_GS |
241 | #define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;}) | 273 | # define get_user_gs(regs) (u16)({ unsigned long v; savesegment(gs, v); v; }) |
242 | #define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) | 274 | # define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) |
243 | #define task_user_gs(tsk) ((tsk)->thread.gs) | 275 | # define task_user_gs(tsk) ((tsk)->thread.gs) |
244 | #define lazy_save_gs(v) savesegment(gs, (v)) | 276 | # define lazy_save_gs(v) savesegment(gs, (v)) |
245 | #define lazy_load_gs(v) loadsegment(gs, (v)) | 277 | # define lazy_load_gs(v) loadsegment(gs, (v)) |
246 | #else /* X86_32_LAZY_GS */ | 278 | # else /* X86_32_LAZY_GS */ |
247 | #define get_user_gs(regs) (u16)((regs)->gs) | 279 | # define get_user_gs(regs) (u16)((regs)->gs) |
248 | #define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0) | 280 | # define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0) |
249 | #define task_user_gs(tsk) (task_pt_regs(tsk)->gs) | 281 | # define task_user_gs(tsk) (task_pt_regs(tsk)->gs) |
250 | #define lazy_save_gs(v) do { } while (0) | 282 | # define lazy_save_gs(v) do { } while (0) |
251 | #define lazy_load_gs(v) do { } while (0) | 283 | # define lazy_load_gs(v) do { } while (0) |
252 | #endif /* X86_32_LAZY_GS */ | 284 | # endif /* X86_32_LAZY_GS */ |
253 | #endif /* X86_32 */ | 285 | #endif /* X86_32 */ |
254 | 286 | ||
255 | static inline unsigned long get_limit(unsigned long segment) | ||
256 | { | ||
257 | unsigned long __limit; | ||
258 | asm("lsll %1,%0" : "=r" (__limit) : "r" (segment)); | ||
259 | return __limit + 1; | ||
260 | } | ||
261 | |||
262 | #endif /* !__ASSEMBLY__ */ | 287 | #endif /* !__ASSEMBLY__ */ |
263 | #endif /* __KERNEL__ */ | 288 | #endif /* __KERNEL__ */ |
264 | 289 | ||
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ff4e7b236e21..f69e06b283fb 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h | |||
@@ -66,6 +66,11 @@ static inline void x86_ce4100_early_setup(void) { } | |||
66 | */ | 66 | */ |
67 | extern struct boot_params boot_params; | 67 | extern struct boot_params boot_params; |
68 | 68 | ||
69 | static inline bool kaslr_enabled(void) | ||
70 | { | ||
71 | return !!(boot_params.hdr.loadflags & KASLR_FLAG); | ||
72 | } | ||
73 | |||
69 | /* | 74 | /* |
70 | * Do NOT EVER look at the BIOS memory size location. | 75 | * Do NOT EVER look at the BIOS memory size location. |
71 | * It does not work on many machines. | 76 | * It does not work on many machines. |
diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index 9dfce4e0417d..6fe6b182c998 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h | |||
@@ -57,9 +57,9 @@ struct sigcontext { | |||
57 | unsigned long ip; | 57 | unsigned long ip; |
58 | unsigned long flags; | 58 | unsigned long flags; |
59 | unsigned short cs; | 59 | unsigned short cs; |
60 | unsigned short gs; | 60 | unsigned short __pad2; /* Was called gs, but was always zero. */ |
61 | unsigned short fs; | 61 | unsigned short __pad1; /* Was called fs, but was always zero. */ |
62 | unsigned short __pad0; | 62 | unsigned short ss; |
63 | unsigned long err; | 63 | unsigned long err; |
64 | unsigned long trapno; | 64 | unsigned long trapno; |
65 | unsigned long oldmask; | 65 | unsigned long oldmask; |
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h index 7a958164088c..89db46752a8f 100644 --- a/arch/x86/include/asm/sighandling.h +++ b/arch/x86/include/asm/sighandling.h | |||
@@ -13,9 +13,7 @@ | |||
13 | X86_EFLAGS_CF | X86_EFLAGS_RF) | 13 | X86_EFLAGS_CF | X86_EFLAGS_RF) |
14 | 14 | ||
15 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where); | 15 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where); |
16 | 16 | int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc); | |
17 | int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | ||
18 | unsigned long *pax); | ||
19 | int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, | 17 | int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, |
20 | struct pt_regs *regs, unsigned long mask); | 18 | struct pt_regs *regs, unsigned long mask); |
21 | 19 | ||
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index 8d3120f4e270..ba665ebd17bb 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h | |||
@@ -27,23 +27,11 @@ | |||
27 | 27 | ||
28 | #ifdef CONFIG_X86_SMAP | 28 | #ifdef CONFIG_X86_SMAP |
29 | 29 | ||
30 | #define ASM_CLAC \ | 30 | #define ASM_CLAC \ |
31 | 661: ASM_NOP3 ; \ | 31 | ALTERNATIVE "", __stringify(__ASM_CLAC), X86_FEATURE_SMAP |
32 | .pushsection .altinstr_replacement, "ax" ; \ | 32 | |
33 | 662: __ASM_CLAC ; \ | 33 | #define ASM_STAC \ |
34 | .popsection ; \ | 34 | ALTERNATIVE "", __stringify(__ASM_STAC), X86_FEATURE_SMAP |
35 | .pushsection .altinstructions, "a" ; \ | ||
36 | altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \ | ||
37 | .popsection | ||
38 | |||
39 | #define ASM_STAC \ | ||
40 | 661: ASM_NOP3 ; \ | ||
41 | .pushsection .altinstr_replacement, "ax" ; \ | ||
42 | 662: __ASM_STAC ; \ | ||
43 | .popsection ; \ | ||
44 | .pushsection .altinstructions, "a" ; \ | ||
45 | altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \ | ||
46 | .popsection | ||
47 | 35 | ||
48 | #else /* CONFIG_X86_SMAP */ | 36 | #else /* CONFIG_X86_SMAP */ |
49 | 37 | ||
@@ -61,20 +49,20 @@ | |||
61 | static __always_inline void clac(void) | 49 | static __always_inline void clac(void) |
62 | { | 50 | { |
63 | /* Note: a barrier is implicit in alternative() */ | 51 | /* Note: a barrier is implicit in alternative() */ |
64 | alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP); | 52 | alternative("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP); |
65 | } | 53 | } |
66 | 54 | ||
67 | static __always_inline void stac(void) | 55 | static __always_inline void stac(void) |
68 | { | 56 | { |
69 | /* Note: a barrier is implicit in alternative() */ | 57 | /* Note: a barrier is implicit in alternative() */ |
70 | alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP); | 58 | alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP); |
71 | } | 59 | } |
72 | 60 | ||
73 | /* These macros can be used in asm() statements */ | 61 | /* These macros can be used in asm() statements */ |
74 | #define ASM_CLAC \ | 62 | #define ASM_CLAC \ |
75 | ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP) | 63 | ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP) |
76 | #define ASM_STAC \ | 64 | #define ASM_STAC \ |
77 | ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP) | 65 | ALTERNATIVE("", __stringify(__ASM_STAC), X86_FEATURE_SMAP) |
78 | 66 | ||
79 | #else /* CONFIG_X86_SMAP */ | 67 | #else /* CONFIG_X86_SMAP */ |
80 | 68 | ||
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 8cd1cc3bc835..81d02fc7dafa 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -154,6 +154,7 @@ void cpu_die_common(unsigned int cpu); | |||
154 | void native_smp_prepare_boot_cpu(void); | 154 | void native_smp_prepare_boot_cpu(void); |
155 | void native_smp_prepare_cpus(unsigned int max_cpus); | 155 | void native_smp_prepare_cpus(unsigned int max_cpus); |
156 | void native_smp_cpus_done(unsigned int max_cpus); | 156 | void native_smp_cpus_done(unsigned int max_cpus); |
157 | void common_cpu_up(unsigned int cpunum, struct task_struct *tidle); | ||
157 | int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); | 158 | int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); |
158 | int native_cpu_disable(void); | 159 | int native_cpu_disable(void); |
159 | void native_cpu_die(unsigned int cpu); | 160 | void native_cpu_die(unsigned int cpu); |
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 6a4b00fafb00..aeb4666e0c0a 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h | |||
@@ -4,6 +4,8 @@ | |||
4 | 4 | ||
5 | #ifdef __KERNEL__ | 5 | #ifdef __KERNEL__ |
6 | 6 | ||
7 | #include <asm/nops.h> | ||
8 | |||
7 | static inline void native_clts(void) | 9 | static inline void native_clts(void) |
8 | { | 10 | { |
9 | asm volatile("clts"); | 11 | asm volatile("clts"); |
@@ -199,6 +201,28 @@ static inline void clflushopt(volatile void *__p) | |||
199 | "+m" (*(volatile char __force *)__p)); | 201 | "+m" (*(volatile char __force *)__p)); |
200 | } | 202 | } |
201 | 203 | ||
204 | static inline void clwb(volatile void *__p) | ||
205 | { | ||
206 | volatile struct { char x[64]; } *p = __p; | ||
207 | |||
208 | asm volatile(ALTERNATIVE_2( | ||
209 | ".byte " __stringify(NOP_DS_PREFIX) "; clflush (%[pax])", | ||
210 | ".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */ | ||
211 | X86_FEATURE_CLFLUSHOPT, | ||
212 | ".byte 0x66, 0x0f, 0xae, 0x30", /* clwb (%%rax) */ | ||
213 | X86_FEATURE_CLWB) | ||
214 | : [p] "+m" (*p) | ||
215 | : [pax] "a" (p)); | ||
216 | } | ||
217 | |||
218 | static inline void pcommit_sfence(void) | ||
219 | { | ||
220 | alternative(ASM_NOP7, | ||
221 | ".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */ | ||
222 | "sfence", | ||
223 | X86_FEATURE_PCOMMIT); | ||
224 | } | ||
225 | |||
202 | #define nop() asm volatile ("nop") | 226 | #define nop() asm volatile ("nop") |
203 | 227 | ||
204 | 228 | ||
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 1d4e4f279a32..ea2dbe82cba3 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -13,6 +13,33 @@ | |||
13 | #include <asm/types.h> | 13 | #include <asm/types.h> |
14 | 14 | ||
15 | /* | 15 | /* |
16 | * TOP_OF_KERNEL_STACK_PADDING is a number of unused bytes that we | ||
17 | * reserve at the top of the kernel stack. We do it because of a nasty | ||
18 | * 32-bit corner case. On x86_32, the hardware stack frame is | ||
19 | * variable-length. Except for vm86 mode, struct pt_regs assumes a | ||
20 | * maximum-length frame. If we enter from CPL 0, the top 8 bytes of | ||
21 | * pt_regs don't actually exist. Ordinarily this doesn't matter, but it | ||
22 | * does in at least one case: | ||
23 | * | ||
24 | * If we take an NMI early enough in SYSENTER, then we can end up with | ||
25 | * pt_regs that extends above sp0. On the way out, in the espfix code, | ||
26 | * we can read the saved SS value, but that value will be above sp0. | ||
27 | * Without this offset, that can result in a page fault. (We are | ||
28 | * careful that, in this case, the value we read doesn't matter.) | ||
29 | * | ||
30 | * In vm86 mode, the hardware frame is much longer still, but we neither | ||
31 | * access the extra members from NMI context, nor do we write such a | ||
32 | * frame at sp0 at all. | ||
33 | * | ||
34 | * x86_64 has a fixed-length stack frame. | ||
35 | */ | ||
36 | #ifdef CONFIG_X86_32 | ||
37 | # define TOP_OF_KERNEL_STACK_PADDING 8 | ||
38 | #else | ||
39 | # define TOP_OF_KERNEL_STACK_PADDING 0 | ||
40 | #endif | ||
41 | |||
42 | /* | ||
16 | * low level task data that entry.S needs immediate access to | 43 | * low level task data that entry.S needs immediate access to |
17 | * - this struct should fit entirely inside of one cache line | 44 | * - this struct should fit entirely inside of one cache line |
18 | * - this struct shares the supervisor stack pages | 45 | * - this struct shares the supervisor stack pages |
@@ -145,7 +172,6 @@ struct thread_info { | |||
145 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) | 172 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) |
146 | 173 | ||
147 | #define STACK_WARN (THREAD_SIZE/8) | 174 | #define STACK_WARN (THREAD_SIZE/8) |
148 | #define KERNEL_STACK_OFFSET (5*(BITS_PER_LONG/8)) | ||
149 | 175 | ||
150 | /* | 176 | /* |
151 | * macros/functions for gaining access to the thread information structure | 177 | * macros/functions for gaining access to the thread information structure |
@@ -158,10 +184,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack); | |||
158 | 184 | ||
159 | static inline struct thread_info *current_thread_info(void) | 185 | static inline struct thread_info *current_thread_info(void) |
160 | { | 186 | { |
161 | struct thread_info *ti; | 187 | return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE); |
162 | ti = (void *)(this_cpu_read_stable(kernel_stack) + | ||
163 | KERNEL_STACK_OFFSET - THREAD_SIZE); | ||
164 | return ti; | ||
165 | } | 188 | } |
166 | 189 | ||
167 | static inline unsigned long current_stack_pointer(void) | 190 | static inline unsigned long current_stack_pointer(void) |
@@ -177,16 +200,37 @@ static inline unsigned long current_stack_pointer(void) | |||
177 | 200 | ||
178 | #else /* !__ASSEMBLY__ */ | 201 | #else /* !__ASSEMBLY__ */ |
179 | 202 | ||
180 | /* how to get the thread information struct from ASM */ | 203 | /* Load thread_info address into "reg" */ |
181 | #define GET_THREAD_INFO(reg) \ | 204 | #define GET_THREAD_INFO(reg) \ |
182 | _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \ | 205 | _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \ |
183 | _ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ; | 206 | _ASM_SUB $(THREAD_SIZE),reg ; |
184 | 207 | ||
185 | /* | 208 | /* |
186 | * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in | 209 | * ASM operand which evaluates to a 'thread_info' address of |
187 | * a certain register (to be used in assembler memory operands). | 210 | * the current task, if it is known that "reg" is exactly "off" |
211 | * bytes below the top of the stack currently. | ||
212 | * | ||
213 | * ( The kernel stack's size is known at build time, it is usually | ||
214 | * 2 or 4 pages, and the bottom of the kernel stack contains | ||
215 | * the thread_info structure. So to access the thread_info very | ||
216 | * quickly from assembly code we can calculate down from the | ||
217 | * top of the kernel stack to the bottom, using constant, | ||
218 | * build-time calculations only. ) | ||
219 | * | ||
220 | * For example, to fetch the current thread_info->flags value into %eax | ||
221 | * on x86-64 defconfig kernels, in syscall entry code where RSP is | ||
222 | * currently at exactly SIZEOF_PTREGS bytes away from the top of the | ||
223 | * stack: | ||
224 | * | ||
225 | * mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax | ||
226 | * | ||
227 | * will translate to: | ||
228 | * | ||
229 | * 8b 84 24 b8 c0 ff ff mov -0x3f48(%rsp), %eax | ||
230 | * | ||
231 | * which is below the current RSP by almost 16K. | ||
188 | */ | 232 | */ |
189 | #define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg) | 233 | #define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg) |
190 | 234 | ||
191 | #endif | 235 | #endif |
192 | 236 | ||
@@ -236,6 +280,16 @@ static inline bool is_ia32_task(void) | |||
236 | #endif | 280 | #endif |
237 | return false; | 281 | return false; |
238 | } | 282 | } |
283 | |||
284 | /* | ||
285 | * Force syscall return via IRET by making it look as if there was | ||
286 | * some work pending. IRET is our most capable (but slowest) syscall | ||
287 | * return path, which is able to restore modified SS, CS and certain | ||
288 | * EFLAGS values that other (fast) syscall return instructions | ||
289 | * are not able to restore properly. | ||
290 | */ | ||
291 | #define force_iret() set_thread_flag(TIF_NOTIFY_RESUME) | ||
292 | |||
239 | #endif /* !__ASSEMBLY__ */ | 293 | #endif /* !__ASSEMBLY__ */ |
240 | 294 | ||
241 | #ifndef __ASSEMBLY__ | 295 | #ifndef __ASSEMBLY__ |
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 225b0988043a..ab456dc233b5 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h | |||
@@ -15,6 +15,7 @@ | |||
15 | 15 | ||
16 | /* loadflags */ | 16 | /* loadflags */ |
17 | #define LOADED_HIGH (1<<0) | 17 | #define LOADED_HIGH (1<<0) |
18 | #define KASLR_FLAG (1<<1) | ||
18 | #define QUIET_FLAG (1<<5) | 19 | #define QUIET_FLAG (1<<5) |
19 | #define KEEP_SEGMENTS (1<<6) | 20 | #define KEEP_SEGMENTS (1<<6) |
20 | #define CAN_USE_HEAP (1<<7) | 21 | #define CAN_USE_HEAP (1<<7) |
diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h index 7b0a55a88851..580aee3072e0 100644 --- a/arch/x86/include/uapi/asm/ptrace-abi.h +++ b/arch/x86/include/uapi/asm/ptrace-abi.h | |||
@@ -25,13 +25,17 @@ | |||
25 | #else /* __i386__ */ | 25 | #else /* __i386__ */ |
26 | 26 | ||
27 | #if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS) | 27 | #if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS) |
28 | /* | ||
29 | * C ABI says these regs are callee-preserved. They aren't saved on kernel entry | ||
30 | * unless syscall needs a complete, fully filled "struct pt_regs". | ||
31 | */ | ||
28 | #define R15 0 | 32 | #define R15 0 |
29 | #define R14 8 | 33 | #define R14 8 |
30 | #define R13 16 | 34 | #define R13 16 |
31 | #define R12 24 | 35 | #define R12 24 |
32 | #define RBP 32 | 36 | #define RBP 32 |
33 | #define RBX 40 | 37 | #define RBX 40 |
34 | /* arguments: interrupts/non tracing syscalls only save up to here*/ | 38 | /* These regs are callee-clobbered. Always saved on kernel entry. */ |
35 | #define R11 48 | 39 | #define R11 48 |
36 | #define R10 56 | 40 | #define R10 56 |
37 | #define R9 64 | 41 | #define R9 64 |
@@ -41,15 +45,17 @@ | |||
41 | #define RDX 96 | 45 | #define RDX 96 |
42 | #define RSI 104 | 46 | #define RSI 104 |
43 | #define RDI 112 | 47 | #define RDI 112 |
44 | #define ORIG_RAX 120 /* = ERROR */ | 48 | /* |
45 | /* end of arguments */ | 49 | * On syscall entry, this is syscall#. On CPU exception, this is error code. |
46 | /* cpu exception frame or undefined in case of fast syscall. */ | 50 | * On hw interrupt, it's IRQ number: |
51 | */ | ||
52 | #define ORIG_RAX 120 | ||
53 | /* Return frame for iretq */ | ||
47 | #define RIP 128 | 54 | #define RIP 128 |
48 | #define CS 136 | 55 | #define CS 136 |
49 | #define EFLAGS 144 | 56 | #define EFLAGS 144 |
50 | #define RSP 152 | 57 | #define RSP 152 |
51 | #define SS 160 | 58 | #define SS 160 |
52 | #define ARGOFFSET R11 | ||
53 | #endif /* __ASSEMBLY__ */ | 59 | #endif /* __ASSEMBLY__ */ |
54 | 60 | ||
55 | /* top of stack page */ | 61 | /* top of stack page */ |
diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h index ac4b9aa4d999..bc16115af39b 100644 --- a/arch/x86/include/uapi/asm/ptrace.h +++ b/arch/x86/include/uapi/asm/ptrace.h | |||
@@ -41,13 +41,17 @@ struct pt_regs { | |||
41 | #ifndef __KERNEL__ | 41 | #ifndef __KERNEL__ |
42 | 42 | ||
43 | struct pt_regs { | 43 | struct pt_regs { |
44 | /* | ||
45 | * C ABI says these regs are callee-preserved. They aren't saved on kernel entry | ||
46 | * unless syscall needs a complete, fully filled "struct pt_regs". | ||
47 | */ | ||
44 | unsigned long r15; | 48 | unsigned long r15; |
45 | unsigned long r14; | 49 | unsigned long r14; |
46 | unsigned long r13; | 50 | unsigned long r13; |
47 | unsigned long r12; | 51 | unsigned long r12; |
48 | unsigned long rbp; | 52 | unsigned long rbp; |
49 | unsigned long rbx; | 53 | unsigned long rbx; |
50 | /* arguments: non interrupts/non tracing syscalls only save up to here*/ | 54 | /* These regs are callee-clobbered. Always saved on kernel entry. */ |
51 | unsigned long r11; | 55 | unsigned long r11; |
52 | unsigned long r10; | 56 | unsigned long r10; |
53 | unsigned long r9; | 57 | unsigned long r9; |
@@ -57,9 +61,12 @@ struct pt_regs { | |||
57 | unsigned long rdx; | 61 | unsigned long rdx; |
58 | unsigned long rsi; | 62 | unsigned long rsi; |
59 | unsigned long rdi; | 63 | unsigned long rdi; |
64 | /* | ||
65 | * On syscall entry, this is syscall#. On CPU exception, this is error code. | ||
66 | * On hw interrupt, it's IRQ number: | ||
67 | */ | ||
60 | unsigned long orig_rax; | 68 | unsigned long orig_rax; |
61 | /* end of arguments */ | 69 | /* Return frame for iretq */ |
62 | /* cpu exception frame or undefined */ | ||
63 | unsigned long rip; | 70 | unsigned long rip; |
64 | unsigned long cs; | 71 | unsigned long cs; |
65 | unsigned long eflags; | 72 | unsigned long eflags; |
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h index d8b9f9081e86..16dc4e8a2cd3 100644 --- a/arch/x86/include/uapi/asm/sigcontext.h +++ b/arch/x86/include/uapi/asm/sigcontext.h | |||
@@ -177,9 +177,24 @@ struct sigcontext { | |||
177 | __u64 rip; | 177 | __u64 rip; |
178 | __u64 eflags; /* RFLAGS */ | 178 | __u64 eflags; /* RFLAGS */ |
179 | __u16 cs; | 179 | __u16 cs; |
180 | __u16 gs; | 180 | |
181 | __u16 fs; | 181 | /* |
182 | __u16 __pad0; | 182 | * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"), |
183 | * Linux saved and restored fs and gs in these slots. This | ||
184 | * was counterproductive, as fsbase and gsbase were never | ||
185 | * saved, so arch_prctl was presumably unreliable. | ||
186 | * | ||
187 | * If these slots are ever needed for any other purpose, there | ||
188 | * is some risk that very old 64-bit binaries could get | ||
189 | * confused. I doubt that many such binaries still work, | ||
190 | * though, since the same patch in 2.5.64 also removed the | ||
191 | * 64-bit set_thread_area syscall, so it appears that there is | ||
192 | * no TLS API that works in both pre- and post-2.5.64 kernels. | ||
193 | */ | ||
194 | __u16 __pad2; /* Was gs. */ | ||
195 | __u16 __pad1; /* Was fs. */ | ||
196 | |||
197 | __u16 ss; | ||
183 | __u64 err; | 198 | __u64 err; |
184 | __u64 trapno; | 199 | __u64 trapno; |
185 | __u64 oldmask; | 200 | __u64 oldmask; |