diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-03 19:13:28 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-03 19:13:28 -0400 |
commit | 1a4a2bc460721bc8f91e4c1294d39b38e5af132f (patch) | |
tree | fe646d05f6e17f05601e0a32cc796bec718ab6e7 /arch/x86/kernel | |
parent | 110a9e42b68719f584879c5c5c727bbae90d15f9 (diff) | |
parent | 1ef55be16ed69538f89e0a6508be5e62fdc9851c (diff) |
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull low-level x86 updates from Ingo Molnar:
"In this cycle this topic tree has become one of those 'super topics'
that accumulated a lot of changes:
- Add CONFIG_VMAP_STACK=y support to the core kernel and enable it on
x86 - preceded by an array of changes. v4.8 saw preparatory changes
in this area already - this is the rest of the work. Includes the
thread stack caching performance optimization. (Andy Lutomirski)
- switch_to() cleanups and all around enhancements. (Brian Gerst)
- A large number of dumpstack infrastructure enhancements and an
unwinder abstraction. The secret long term plan is safe(r) live
patching plus maybe another attempt at debuginfo based unwinding -
but all these current bits are standalone enhancements in a frame
pointer based debug environment as well. (Josh Poimboeuf)
- More __ro_after_init and const annotations. (Kees Cook)
- Enable KASLR for the vmemmap memory region. (Thomas Garnier)"
[ The virtually mapped stack changes are pretty fundamental, and not
x86-specific per se, even if they are only used on x86 right now. ]
* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (70 commits)
x86/asm: Get rid of __read_cr4_safe()
thread_info: Use unsigned long for flags
x86/alternatives: Add stack frame dependency to alternative_call_2()
x86/dumpstack: Fix show_stack() task pointer regression
x86/dumpstack: Remove dump_trace() and related callbacks
x86/dumpstack: Convert show_trace_log_lvl() to use the new unwinder
oprofile/x86: Convert x86_backtrace() to use the new unwinder
x86/stacktrace: Convert save_stack_trace_*() to use the new unwinder
perf/x86: Convert perf_callchain_kernel() to use the new unwinder
x86/unwind: Add new unwind interface and implementations
x86/dumpstack: Remove NULL task pointer convention
fork: Optimize task creation by caching two thread stacks per CPU if CONFIG_VMAP_STACK=y
sched/core: Free the stack early if CONFIG_THREAD_INFO_IN_TASK
lib/syscall: Pin the task stack in collect_syscall()
x86/process: Pin the target stack in get_wchan()
x86/dumpstack: Pin the target stack when dumping it
kthread: Pin the stack via try_get_task_stack()/put_task_stack() in to_live_kthread() function
sched/core: Add try_get_task_stack() and put_task_stack()
x86/entry/64: Fix a minor comment rebase error
iommu/amd: Don't put completion-wait semaphore on stack
...
Diffstat (limited to 'arch/x86/kernel')
42 files changed, 664 insertions, 589 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0503f5bfb18d..45257cf84370 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -125,6 +125,12 @@ obj-$(CONFIG_EFI) += sysfb_efi.o | |||
125 | obj-$(CONFIG_PERF_EVENTS) += perf_regs.o | 125 | obj-$(CONFIG_PERF_EVENTS) += perf_regs.o |
126 | obj-$(CONFIG_TRACING) += tracepoint.o | 126 | obj-$(CONFIG_TRACING) += tracepoint.o |
127 | 127 | ||
128 | ifdef CONFIG_FRAME_POINTER | ||
129 | obj-y += unwind_frame.o | ||
130 | else | ||
131 | obj-y += unwind_guess.o | ||
132 | endif | ||
133 | |||
128 | ### | 134 | ### |
129 | # 64 bit specific files | 135 | # 64 bit specific files |
130 | ifeq ($(CONFIG_X86_64),y) | 136 | ifeq ($(CONFIG_X86_64),y) |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index adb3eaf8fe2a..48587335ede8 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -99,7 +99,7 @@ int x86_acpi_suspend_lowlevel(void) | |||
99 | saved_magic = 0x12345678; | 99 | saved_magic = 0x12345678; |
100 | #else /* CONFIG_64BIT */ | 100 | #else /* CONFIG_64BIT */ |
101 | #ifdef CONFIG_SMP | 101 | #ifdef CONFIG_SMP |
102 | stack_start = (unsigned long)temp_stack + sizeof(temp_stack); | 102 | initial_stack = (unsigned long)temp_stack + sizeof(temp_stack); |
103 | early_gdt_descr.address = | 103 | early_gdt_descr.address = |
104 | (unsigned long)get_cpu_gdt_table(smp_processor_id()); | 104 | (unsigned long)get_cpu_gdt_table(smp_processor_id()); |
105 | initial_gs = per_cpu_offset(smp_processor_id()); | 105 | initial_gs = per_cpu_offset(smp_processor_id()); |
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 5b2ae106bd4a..8862da76ef6f 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c | |||
@@ -25,7 +25,7 @@ | |||
25 | static struct apic apic_physflat; | 25 | static struct apic apic_physflat; |
26 | static struct apic apic_flat; | 26 | static struct apic apic_flat; |
27 | 27 | ||
28 | struct apic __read_mostly *apic = &apic_flat; | 28 | struct apic *apic __ro_after_init = &apic_flat; |
29 | EXPORT_SYMBOL_GPL(apic); | 29 | EXPORT_SYMBOL_GPL(apic); |
30 | 30 | ||
31 | static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 31 | static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
@@ -154,7 +154,7 @@ static int flat_probe(void) | |||
154 | return 1; | 154 | return 1; |
155 | } | 155 | } |
156 | 156 | ||
157 | static struct apic apic_flat = { | 157 | static struct apic apic_flat __ro_after_init = { |
158 | .name = "flat", | 158 | .name = "flat", |
159 | .probe = flat_probe, | 159 | .probe = flat_probe, |
160 | .acpi_madt_oem_check = flat_acpi_madt_oem_check, | 160 | .acpi_madt_oem_check = flat_acpi_madt_oem_check, |
@@ -248,7 +248,7 @@ static int physflat_probe(void) | |||
248 | return 0; | 248 | return 0; |
249 | } | 249 | } |
250 | 250 | ||
251 | static struct apic apic_physflat = { | 251 | static struct apic apic_physflat __ro_after_init = { |
252 | 252 | ||
253 | .name = "physical flat", | 253 | .name = "physical flat", |
254 | .probe = physflat_probe, | 254 | .probe = physflat_probe, |
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index c05688b2deff..b109e4389c92 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c | |||
@@ -108,7 +108,7 @@ static void noop_apic_write(u32 reg, u32 v) | |||
108 | WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); | 108 | WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); |
109 | } | 109 | } |
110 | 110 | ||
111 | struct apic apic_noop = { | 111 | struct apic apic_noop __ro_after_init = { |
112 | .name = "noop", | 112 | .name = "noop", |
113 | .probe = noop_probe, | 113 | .probe = noop_probe, |
114 | .acpi_madt_oem_check = NULL, | 114 | .acpi_madt_oem_check = NULL, |
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 06dbaa458bfe..56012010332c 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c | |||
@@ -142,7 +142,7 @@ static int probe_bigsmp(void) | |||
142 | return dmi_bigsmp; | 142 | return dmi_bigsmp; |
143 | } | 143 | } |
144 | 144 | ||
145 | static struct apic apic_bigsmp = { | 145 | static struct apic apic_bigsmp __ro_after_init = { |
146 | 146 | ||
147 | .name = "bigsmp", | 147 | .name = "bigsmp", |
148 | .probe = probe_bigsmp, | 148 | .probe = probe_bigsmp, |
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index ade25320df96..015bbf30e3e3 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c | |||
@@ -269,7 +269,7 @@ static void hpet_msi_write_msg(struct irq_data *data, struct msi_msg *msg) | |||
269 | hpet_msi_write(irq_data_get_irq_handler_data(data), msg); | 269 | hpet_msi_write(irq_data_get_irq_handler_data(data), msg); |
270 | } | 270 | } |
271 | 271 | ||
272 | static struct irq_chip hpet_msi_controller = { | 272 | static struct irq_chip hpet_msi_controller __ro_after_init = { |
273 | .name = "HPET-MSI", | 273 | .name = "HPET-MSI", |
274 | .irq_unmask = hpet_msi_unmask, | 274 | .irq_unmask = hpet_msi_unmask, |
275 | .irq_mask = hpet_msi_mask, | 275 | .irq_mask = hpet_msi_mask, |
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 563096267ca2..c48264e202fd 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c | |||
@@ -72,7 +72,7 @@ static int probe_default(void) | |||
72 | return 1; | 72 | return 1; |
73 | } | 73 | } |
74 | 74 | ||
75 | static struct apic apic_default = { | 75 | static struct apic apic_default __ro_after_init = { |
76 | 76 | ||
77 | .name = "default", | 77 | .name = "default", |
78 | .probe = probe_default, | 78 | .probe = probe_default, |
@@ -126,7 +126,7 @@ static struct apic apic_default = { | |||
126 | 126 | ||
127 | apic_driver(apic_default); | 127 | apic_driver(apic_default); |
128 | 128 | ||
129 | struct apic *apic = &apic_default; | 129 | struct apic *apic __ro_after_init = &apic_default; |
130 | EXPORT_SYMBOL_GPL(apic); | 130 | EXPORT_SYMBOL_GPL(apic); |
131 | 131 | ||
132 | static int cmdline_apic __initdata; | 132 | static int cmdline_apic __initdata; |
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 54f35d988025..200af5ae9662 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c | |||
@@ -227,7 +227,7 @@ static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask, | |||
227 | cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu)); | 227 | cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu)); |
228 | } | 228 | } |
229 | 229 | ||
230 | static struct apic apic_x2apic_cluster = { | 230 | static struct apic apic_x2apic_cluster __ro_after_init = { |
231 | 231 | ||
232 | .name = "cluster x2apic", | 232 | .name = "cluster x2apic", |
233 | .probe = x2apic_cluster_probe, | 233 | .probe = x2apic_cluster_probe, |
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 4f13f54f1b1f..ff111f05a314 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c | |||
@@ -98,7 +98,7 @@ static int x2apic_phys_probe(void) | |||
98 | return apic == &apic_x2apic_phys; | 98 | return apic == &apic_x2apic_phys; |
99 | } | 99 | } |
100 | 100 | ||
101 | static struct apic apic_x2apic_phys = { | 101 | static struct apic apic_x2apic_phys __ro_after_init = { |
102 | 102 | ||
103 | .name = "physical x2apic", | 103 | .name = "physical x2apic", |
104 | .probe = x2apic_phys_probe, | 104 | .probe = x2apic_phys_probe, |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index cb0673c1e940..b9f6157d4271 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -560,7 +560,7 @@ static int uv_probe(void) | |||
560 | return apic == &apic_x2apic_uv_x; | 560 | return apic == &apic_x2apic_uv_x; |
561 | } | 561 | } |
562 | 562 | ||
563 | static struct apic __refdata apic_x2apic_uv_x = { | 563 | static struct apic apic_x2apic_uv_x __ro_after_init = { |
564 | 564 | ||
565 | .name = "UV large system", | 565 | .name = "UV large system", |
566 | .probe = uv_probe, | 566 | .probe = uv_probe, |
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 2bd5c6ff7ee7..c62e015b126c 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c | |||
@@ -29,10 +29,13 @@ | |||
29 | 29 | ||
30 | void common(void) { | 30 | void common(void) { |
31 | BLANK(); | 31 | BLANK(); |
32 | OFFSET(TI_flags, thread_info, flags); | 32 | OFFSET(TASK_threadsp, task_struct, thread.sp); |
33 | OFFSET(TI_status, thread_info, status); | 33 | #ifdef CONFIG_CC_STACKPROTECTOR |
34 | OFFSET(TASK_stack_canary, task_struct, stack_canary); | ||
35 | #endif | ||
34 | 36 | ||
35 | BLANK(); | 37 | BLANK(); |
38 | OFFSET(TASK_TI_flags, task_struct, thread_info.flags); | ||
36 | OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); | 39 | OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); |
37 | 40 | ||
38 | BLANK(); | 41 | BLANK(); |
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index ecdc1d217dc0..880aa093268d 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
@@ -57,6 +57,11 @@ void foo(void) | |||
57 | /* Size of SYSENTER_stack */ | 57 | /* Size of SYSENTER_stack */ |
58 | DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); | 58 | DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); |
59 | 59 | ||
60 | #ifdef CONFIG_CC_STACKPROTECTOR | ||
61 | BLANK(); | ||
62 | OFFSET(stack_canary_offset, stack_canary, canary); | ||
63 | #endif | ||
64 | |||
60 | #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) | 65 | #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) |
61 | BLANK(); | 66 | BLANK(); |
62 | OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); | 67 | OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); |
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index d875f97d4e0b..210927ee2e74 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
@@ -56,6 +56,11 @@ int main(void) | |||
56 | OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); | 56 | OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); |
57 | BLANK(); | 57 | BLANK(); |
58 | 58 | ||
59 | #ifdef CONFIG_CC_STACKPROTECTOR | ||
60 | DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary)); | ||
61 | BLANK(); | ||
62 | #endif | ||
63 | |||
59 | DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); | 64 | DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); |
60 | DEFINE(NR_syscalls, sizeof(syscalls_64)); | 65 | DEFINE(NR_syscalls, sizeof(syscalls_64)); |
61 | 66 | ||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index bcc9ccc220c9..9bd910a7dd0a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1264,9 +1264,14 @@ static __init int setup_disablecpuid(char *arg) | |||
1264 | __setup("clearcpuid=", setup_disablecpuid); | 1264 | __setup("clearcpuid=", setup_disablecpuid); |
1265 | 1265 | ||
1266 | #ifdef CONFIG_X86_64 | 1266 | #ifdef CONFIG_X86_64 |
1267 | struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; | 1267 | struct desc_ptr idt_descr __ro_after_init = { |
1268 | struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1, | 1268 | .size = NR_VECTORS * 16 - 1, |
1269 | (unsigned long) debug_idt_table }; | 1269 | .address = (unsigned long) idt_table, |
1270 | }; | ||
1271 | const struct desc_ptr debug_idt_descr = { | ||
1272 | .size = NR_VECTORS * 16 - 1, | ||
1273 | .address = (unsigned long) debug_idt_table, | ||
1274 | }; | ||
1270 | 1275 | ||
1271 | DEFINE_PER_CPU_FIRST(union irq_stack_union, | 1276 | DEFINE_PER_CPU_FIRST(union irq_stack_union, |
1272 | irq_stack_union) __aligned(PAGE_SIZE) __visible; | 1277 | irq_stack_union) __aligned(PAGE_SIZE) __visible; |
@@ -1280,7 +1285,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = | |||
1280 | EXPORT_PER_CPU_SYMBOL(current_task); | 1285 | EXPORT_PER_CPU_SYMBOL(current_task); |
1281 | 1286 | ||
1282 | DEFINE_PER_CPU(char *, irq_stack_ptr) = | 1287 | DEFINE_PER_CPU(char *, irq_stack_ptr) = |
1283 | init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; | 1288 | init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE; |
1284 | 1289 | ||
1285 | DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; | 1290 | DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; |
1286 | 1291 | ||
@@ -1304,11 +1309,6 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks | |||
1304 | /* May not be marked __init: used by software suspend */ | 1309 | /* May not be marked __init: used by software suspend */ |
1305 | void syscall_init(void) | 1310 | void syscall_init(void) |
1306 | { | 1311 | { |
1307 | /* | ||
1308 | * LSTAR and STAR live in a bit strange symbiosis. | ||
1309 | * They both write to the same internal register. STAR allows to | ||
1310 | * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. | ||
1311 | */ | ||
1312 | wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); | 1312 | wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); |
1313 | wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); | 1313 | wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); |
1314 | 1314 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 28f1b54b7fad..24e87e74990d 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -72,14 +72,14 @@ static DEFINE_MUTEX(mtrr_mutex); | |||
72 | u64 size_or_mask, size_and_mask; | 72 | u64 size_or_mask, size_and_mask; |
73 | static bool mtrr_aps_delayed_init; | 73 | static bool mtrr_aps_delayed_init; |
74 | 74 | ||
75 | static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; | 75 | static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM] __ro_after_init; |
76 | 76 | ||
77 | const struct mtrr_ops *mtrr_if; | 77 | const struct mtrr_ops *mtrr_if; |
78 | 78 | ||
79 | static void set_mtrr(unsigned int reg, unsigned long base, | 79 | static void set_mtrr(unsigned int reg, unsigned long base, |
80 | unsigned long size, mtrr_type type); | 80 | unsigned long size, mtrr_type type); |
81 | 81 | ||
82 | void set_mtrr_ops(const struct mtrr_ops *ops) | 82 | void __init set_mtrr_ops(const struct mtrr_ops *ops) |
83 | { | 83 | { |
84 | if (ops->vendor && ops->vendor < X86_VENDOR_NUM) | 84 | if (ops->vendor && ops->vendor < X86_VENDOR_NUM) |
85 | mtrr_ops[ops->vendor] = ops; | 85 | mtrr_ops[ops->vendor] = ops; |
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 6c7ced07d16d..ad8bd763efa5 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h | |||
@@ -54,7 +54,7 @@ void fill_mtrr_var_range(unsigned int index, | |||
54 | bool get_mtrr_state(void); | 54 | bool get_mtrr_state(void); |
55 | void mtrr_bp_pat_init(void); | 55 | void mtrr_bp_pat_init(void); |
56 | 56 | ||
57 | extern void set_mtrr_ops(const struct mtrr_ops *ops); | 57 | extern void __init set_mtrr_ops(const struct mtrr_ops *ops); |
58 | 58 | ||
59 | extern u64 size_or_mask, size_and_mask; | 59 | extern u64 size_or_mask, size_and_mask; |
60 | extern const struct mtrr_ops *mtrr_if; | 60 | extern const struct mtrr_ops *mtrr_if; |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 92e8f0a7159c..9b7cf5c28f5f 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -17,7 +17,7 @@ | |||
17 | #include <linux/sysfs.h> | 17 | #include <linux/sysfs.h> |
18 | 18 | ||
19 | #include <asm/stacktrace.h> | 19 | #include <asm/stacktrace.h> |
20 | 20 | #include <asm/unwind.h> | |
21 | 21 | ||
22 | int panic_on_unrecovered_nmi; | 22 | int panic_on_unrecovered_nmi; |
23 | int panic_on_io_nmi; | 23 | int panic_on_io_nmi; |
@@ -25,11 +25,29 @@ unsigned int code_bytes = 64; | |||
25 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | 25 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; |
26 | static int die_counter; | 26 | static int die_counter; |
27 | 27 | ||
28 | bool in_task_stack(unsigned long *stack, struct task_struct *task, | ||
29 | struct stack_info *info) | ||
30 | { | ||
31 | unsigned long *begin = task_stack_page(task); | ||
32 | unsigned long *end = task_stack_page(task) + THREAD_SIZE; | ||
33 | |||
34 | if (stack < begin || stack >= end) | ||
35 | return false; | ||
36 | |||
37 | info->type = STACK_TYPE_TASK; | ||
38 | info->begin = begin; | ||
39 | info->end = end; | ||
40 | info->next_sp = NULL; | ||
41 | |||
42 | return true; | ||
43 | } | ||
44 | |||
28 | static void printk_stack_address(unsigned long address, int reliable, | 45 | static void printk_stack_address(unsigned long address, int reliable, |
29 | void *data) | 46 | char *log_lvl) |
30 | { | 47 | { |
48 | touch_nmi_watchdog(); | ||
31 | printk("%s [<%p>] %s%pB\n", | 49 | printk("%s [<%p>] %s%pB\n", |
32 | (char *)data, (void *)address, reliable ? "" : "? ", | 50 | log_lvl, (void *)address, reliable ? "" : "? ", |
33 | (void *)address); | 51 | (void *)address); |
34 | } | 52 | } |
35 | 53 | ||
@@ -38,176 +56,120 @@ void printk_address(unsigned long address) | |||
38 | pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address); | 56 | pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address); |
39 | } | 57 | } |
40 | 58 | ||
41 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 59 | void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
42 | static void | 60 | unsigned long *stack, char *log_lvl) |
43 | print_ftrace_graph_addr(unsigned long addr, void *data, | ||
44 | const struct stacktrace_ops *ops, | ||
45 | struct task_struct *task, int *graph) | ||
46 | { | 61 | { |
47 | unsigned long ret_addr; | 62 | struct unwind_state state; |
48 | int index; | 63 | struct stack_info stack_info = {0}; |
49 | 64 | unsigned long visit_mask = 0; | |
50 | if (addr != (unsigned long)return_to_handler) | 65 | int graph_idx = 0; |
51 | return; | ||
52 | |||
53 | index = task->curr_ret_stack; | ||
54 | |||
55 | if (!task->ret_stack || index < *graph) | ||
56 | return; | ||
57 | |||
58 | index -= *graph; | ||
59 | ret_addr = task->ret_stack[index].ret; | ||
60 | |||
61 | ops->address(data, ret_addr, 1); | ||
62 | 66 | ||
63 | (*graph)++; | 67 | printk("%sCall Trace:\n", log_lvl); |
64 | } | ||
65 | #else | ||
66 | static inline void | ||
67 | print_ftrace_graph_addr(unsigned long addr, void *data, | ||
68 | const struct stacktrace_ops *ops, | ||
69 | struct task_struct *task, int *graph) | ||
70 | { } | ||
71 | #endif | ||
72 | |||
73 | /* | ||
74 | * x86-64 can have up to three kernel stacks: | ||
75 | * process stack | ||
76 | * interrupt stack | ||
77 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | ||
78 | */ | ||
79 | |||
80 | static inline int valid_stack_ptr(struct task_struct *task, | ||
81 | void *p, unsigned int size, void *end) | ||
82 | { | ||
83 | void *t = task_stack_page(task); | ||
84 | if (end) { | ||
85 | if (p < end && p >= (end-THREAD_SIZE)) | ||
86 | return 1; | ||
87 | else | ||
88 | return 0; | ||
89 | } | ||
90 | return p >= t && p < t + THREAD_SIZE - size; | ||
91 | } | ||
92 | 68 | ||
93 | unsigned long | 69 | unwind_start(&state, task, regs, stack); |
94 | print_context_stack(struct task_struct *task, | ||
95 | unsigned long *stack, unsigned long bp, | ||
96 | const struct stacktrace_ops *ops, void *data, | ||
97 | unsigned long *end, int *graph) | ||
98 | { | ||
99 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
100 | 70 | ||
101 | /* | 71 | /* |
102 | * If we overflowed the stack into a guard page, jump back to the | 72 | * Iterate through the stacks, starting with the current stack pointer. |
103 | * bottom of the usable stack. | 73 | * Each stack has a pointer to the next one. |
74 | * | ||
75 | * x86-64 can have several stacks: | ||
76 | * - task stack | ||
77 | * - interrupt stack | ||
78 | * - HW exception stacks (double fault, nmi, debug, mce) | ||
79 | * | ||
80 | * x86-32 can have up to three stacks: | ||
81 | * - task stack | ||
82 | * - softirq stack | ||
83 | * - hardirq stack | ||
104 | */ | 84 | */ |
105 | if ((unsigned long)task_stack_page(task) - (unsigned long)stack < | 85 | for (; stack; stack = stack_info.next_sp) { |
106 | PAGE_SIZE) | 86 | const char *str_begin, *str_end; |
107 | stack = (unsigned long *)task_stack_page(task); | ||
108 | |||
109 | while (valid_stack_ptr(task, stack, sizeof(*stack), end)) { | ||
110 | unsigned long addr; | ||
111 | |||
112 | addr = *stack; | ||
113 | if (__kernel_text_address(addr)) { | ||
114 | if ((unsigned long) stack == bp + sizeof(long)) { | ||
115 | ops->address(data, addr, 1); | ||
116 | frame = frame->next_frame; | ||
117 | bp = (unsigned long) frame; | ||
118 | } else { | ||
119 | ops->address(data, addr, 0); | ||
120 | } | ||
121 | print_ftrace_graph_addr(addr, data, ops, task, graph); | ||
122 | } | ||
123 | stack++; | ||
124 | } | ||
125 | return bp; | ||
126 | } | ||
127 | EXPORT_SYMBOL_GPL(print_context_stack); | ||
128 | |||
129 | unsigned long | ||
130 | print_context_stack_bp(struct task_struct *task, | ||
131 | unsigned long *stack, unsigned long bp, | ||
132 | const struct stacktrace_ops *ops, void *data, | ||
133 | unsigned long *end, int *graph) | ||
134 | { | ||
135 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
136 | unsigned long *ret_addr = &frame->return_address; | ||
137 | 87 | ||
138 | while (valid_stack_ptr(task, ret_addr, sizeof(*ret_addr), end)) { | 88 | /* |
139 | unsigned long addr = *ret_addr; | 89 | * If we overflowed the task stack into a guard page, jump back |
90 | * to the bottom of the usable stack. | ||
91 | */ | ||
92 | if (task_stack_page(task) - (void *)stack < PAGE_SIZE) | ||
93 | stack = task_stack_page(task); | ||
140 | 94 | ||
141 | if (!__kernel_text_address(addr)) | 95 | if (get_stack_info(stack, task, &stack_info, &visit_mask)) |
142 | break; | 96 | break; |
143 | 97 | ||
144 | if (ops->address(data, addr, 1)) | 98 | stack_type_str(stack_info.type, &str_begin, &str_end); |
145 | break; | 99 | if (str_begin) |
146 | frame = frame->next_frame; | 100 | printk("%s <%s> ", log_lvl, str_begin); |
147 | ret_addr = &frame->return_address; | 101 | |
148 | print_ftrace_graph_addr(addr, data, ops, task, graph); | 102 | /* |
149 | } | 103 | * Scan the stack, printing any text addresses we find. At the |
150 | 104 | * same time, follow proper stack frames with the unwinder. | |
151 | return (unsigned long)frame; | 105 | * |
152 | } | 106 | * Addresses found during the scan which are not reported by |
153 | EXPORT_SYMBOL_GPL(print_context_stack_bp); | 107 | * the unwinder are considered to be additional clues which are |
154 | 108 | * sometimes useful for debugging and are prefixed with '?'. | |
155 | static int print_trace_stack(void *data, char *name) | 109 | * This also serves as a failsafe option in case the unwinder |
156 | { | 110 | * goes off in the weeds. |
157 | printk("%s <%s> ", (char *)data, name); | 111 | */ |
158 | return 0; | 112 | for (; stack < stack_info.end; stack++) { |
159 | } | 113 | unsigned long real_addr; |
160 | 114 | int reliable = 0; | |
161 | /* | 115 | unsigned long addr = *stack; |
162 | * Print one address/symbol entries per line. | 116 | unsigned long *ret_addr_p = |
163 | */ | 117 | unwind_get_return_address_ptr(&state); |
164 | static int print_trace_address(void *data, unsigned long addr, int reliable) | 118 | |
165 | { | 119 | if (!__kernel_text_address(addr)) |
166 | touch_nmi_watchdog(); | 120 | continue; |
167 | printk_stack_address(addr, reliable, data); | 121 | |
168 | return 0; | 122 | if (stack == ret_addr_p) |
169 | } | 123 | reliable = 1; |
170 | 124 | ||
171 | static const struct stacktrace_ops print_trace_ops = { | 125 | /* |
172 | .stack = print_trace_stack, | 126 | * When function graph tracing is enabled for a |
173 | .address = print_trace_address, | 127 | * function, its return address on the stack is |
174 | .walk_stack = print_context_stack, | 128 | * replaced with the address of an ftrace handler |
175 | }; | 129 | * (return_to_handler). In that case, before printing |
176 | 130 | * the "real" address, we want to print the handler | |
177 | void | 131 | * address as an "unreliable" hint that function graph |
178 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | 132 | * tracing was involved. |
179 | unsigned long *stack, unsigned long bp, char *log_lvl) | 133 | */ |
180 | { | 134 | real_addr = ftrace_graph_ret_addr(task, &graph_idx, |
181 | printk("%sCall Trace:\n", log_lvl); | 135 | addr, stack); |
182 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | 136 | if (real_addr != addr) |
183 | } | 137 | printk_stack_address(addr, 0, log_lvl); |
138 | printk_stack_address(real_addr, reliable, log_lvl); | ||
139 | |||
140 | if (!reliable) | ||
141 | continue; | ||
142 | |||
143 | /* | ||
144 | * Get the next frame from the unwinder. No need to | ||
145 | * check for an error: if anything goes wrong, the rest | ||
146 | * of the addresses will just be printed as unreliable. | ||
147 | */ | ||
148 | unwind_next_frame(&state); | ||
149 | } | ||
184 | 150 | ||
185 | void show_trace(struct task_struct *task, struct pt_regs *regs, | 151 | if (str_end) |
186 | unsigned long *stack, unsigned long bp) | 152 | printk("%s <%s> ", log_lvl, str_end); |
187 | { | 153 | } |
188 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
189 | } | 154 | } |
190 | 155 | ||
191 | void show_stack(struct task_struct *task, unsigned long *sp) | 156 | void show_stack(struct task_struct *task, unsigned long *sp) |
192 | { | 157 | { |
193 | unsigned long bp = 0; | 158 | task = task ? : current; |
194 | unsigned long stack; | ||
195 | 159 | ||
196 | /* | 160 | /* |
197 | * Stack frames below this one aren't interesting. Don't show them | 161 | * Stack frames below this one aren't interesting. Don't show them |
198 | * if we're printing for %current. | 162 | * if we're printing for %current. |
199 | */ | 163 | */ |
200 | if (!sp && (!task || task == current)) { | 164 | if (!sp && task == current) |
201 | sp = &stack; | 165 | sp = get_stack_pointer(current, NULL); |
202 | bp = stack_frame(current, NULL); | ||
203 | } | ||
204 | 166 | ||
205 | show_stack_log_lvl(task, NULL, sp, bp, ""); | 167 | show_stack_log_lvl(task, NULL, sp, ""); |
206 | } | 168 | } |
207 | 169 | ||
208 | void show_stack_regs(struct pt_regs *regs) | 170 | void show_stack_regs(struct pt_regs *regs) |
209 | { | 171 | { |
210 | show_stack_log_lvl(current, regs, (unsigned long *)regs->sp, regs->bp, ""); | 172 | show_stack_log_lvl(current, regs, NULL, ""); |
211 | } | 173 | } |
212 | 174 | ||
213 | static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; | 175 | static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; |
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 09675712eba8..06eb322b5f9f 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -16,93 +16,121 @@ | |||
16 | 16 | ||
17 | #include <asm/stacktrace.h> | 17 | #include <asm/stacktrace.h> |
18 | 18 | ||
19 | static void *is_irq_stack(void *p, void *irq) | 19 | void stack_type_str(enum stack_type type, const char **begin, const char **end) |
20 | { | 20 | { |
21 | if (p < irq || p >= (irq + THREAD_SIZE)) | 21 | switch (type) { |
22 | return NULL; | 22 | case STACK_TYPE_IRQ: |
23 | return irq + THREAD_SIZE; | 23 | case STACK_TYPE_SOFTIRQ: |
24 | *begin = "IRQ"; | ||
25 | *end = "EOI"; | ||
26 | break; | ||
27 | default: | ||
28 | *begin = NULL; | ||
29 | *end = NULL; | ||
30 | } | ||
24 | } | 31 | } |
25 | 32 | ||
26 | 33 | static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info) | |
27 | static void *is_hardirq_stack(unsigned long *stack, int cpu) | ||
28 | { | 34 | { |
29 | void *irq = per_cpu(hardirq_stack, cpu); | 35 | unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack); |
36 | unsigned long *end = begin + (THREAD_SIZE / sizeof(long)); | ||
30 | 37 | ||
31 | return is_irq_stack(stack, irq); | 38 | /* |
32 | } | 39 | * This is a software stack, so 'end' can be a valid stack pointer. |
40 | * It just means the stack is empty. | ||
41 | */ | ||
42 | if (stack < begin || stack > end) | ||
43 | return false; | ||
33 | 44 | ||
34 | static void *is_softirq_stack(unsigned long *stack, int cpu) | 45 | info->type = STACK_TYPE_IRQ; |
35 | { | 46 | info->begin = begin; |
36 | void *irq = per_cpu(softirq_stack, cpu); | 47 | info->end = end; |
37 | 48 | ||
38 | return is_irq_stack(stack, irq); | 49 | /* |
50 | * See irq_32.c -- the next stack pointer is stored at the beginning of | ||
51 | * the stack. | ||
52 | */ | ||
53 | info->next_sp = (unsigned long *)*begin; | ||
54 | |||
55 | return true; | ||
39 | } | 56 | } |
40 | 57 | ||
41 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 58 | static bool in_softirq_stack(unsigned long *stack, struct stack_info *info) |
42 | unsigned long *stack, unsigned long bp, | ||
43 | const struct stacktrace_ops *ops, void *data) | ||
44 | { | 59 | { |
45 | const unsigned cpu = get_cpu(); | 60 | unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack); |
46 | int graph = 0; | 61 | unsigned long *end = begin + (THREAD_SIZE / sizeof(long)); |
47 | u32 *prev_esp; | ||
48 | 62 | ||
49 | if (!task) | 63 | /* |
50 | task = current; | 64 | * This is a software stack, so 'end' can be a valid stack pointer. |
65 | * It just means the stack is empty. | ||
66 | */ | ||
67 | if (stack < begin || stack > end) | ||
68 | return false; | ||
51 | 69 | ||
52 | if (!stack) { | 70 | info->type = STACK_TYPE_SOFTIRQ; |
53 | unsigned long dummy; | 71 | info->begin = begin; |
72 | info->end = end; | ||
54 | 73 | ||
55 | stack = &dummy; | 74 | /* |
56 | if (task != current) | 75 | * The next stack pointer is stored at the beginning of the stack. |
57 | stack = (unsigned long *)task->thread.sp; | 76 | * See irq_32.c. |
58 | } | 77 | */ |
78 | info->next_sp = (unsigned long *)*begin; | ||
59 | 79 | ||
60 | if (!bp) | 80 | return true; |
61 | bp = stack_frame(task, regs); | 81 | } |
62 | 82 | ||
63 | for (;;) { | 83 | int get_stack_info(unsigned long *stack, struct task_struct *task, |
64 | void *end_stack; | 84 | struct stack_info *info, unsigned long *visit_mask) |
85 | { | ||
86 | if (!stack) | ||
87 | goto unknown; | ||
65 | 88 | ||
66 | end_stack = is_hardirq_stack(stack, cpu); | 89 | task = task ? : current; |
67 | if (!end_stack) | ||
68 | end_stack = is_softirq_stack(stack, cpu); | ||
69 | 90 | ||
70 | bp = ops->walk_stack(task, stack, bp, ops, data, | 91 | if (in_task_stack(stack, task, info)) |
71 | end_stack, &graph); | 92 | goto recursion_check; |
72 | 93 | ||
73 | /* Stop if not on irq stack */ | 94 | if (task != current) |
74 | if (!end_stack) | 95 | goto unknown; |
75 | break; | ||
76 | 96 | ||
77 | /* The previous esp is saved on the bottom of the stack */ | 97 | if (in_hardirq_stack(stack, info)) |
78 | prev_esp = (u32 *)(end_stack - THREAD_SIZE); | 98 | goto recursion_check; |
79 | stack = (unsigned long *)*prev_esp; | ||
80 | if (!stack) | ||
81 | break; | ||
82 | 99 | ||
83 | if (ops->stack(data, "IRQ") < 0) | 100 | if (in_softirq_stack(stack, info)) |
84 | break; | 101 | goto recursion_check; |
85 | touch_nmi_watchdog(); | 102 | |
103 | goto unknown; | ||
104 | |||
105 | recursion_check: | ||
106 | /* | ||
107 | * Make sure we don't iterate through any given stack more than once. | ||
108 | * If it comes up a second time then there's something wrong going on: | ||
109 | * just break out and report an unknown stack type. | ||
110 | */ | ||
111 | if (visit_mask) { | ||
112 | if (*visit_mask & (1UL << info->type)) | ||
113 | goto unknown; | ||
114 | *visit_mask |= 1UL << info->type; | ||
86 | } | 115 | } |
87 | put_cpu(); | 116 | |
117 | return 0; | ||
118 | |||
119 | unknown: | ||
120 | info->type = STACK_TYPE_UNKNOWN; | ||
121 | return -EINVAL; | ||
88 | } | 122 | } |
89 | EXPORT_SYMBOL(dump_trace); | ||
90 | 123 | ||
91 | void | 124 | void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
92 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 125 | unsigned long *sp, char *log_lvl) |
93 | unsigned long *sp, unsigned long bp, char *log_lvl) | ||
94 | { | 126 | { |
95 | unsigned long *stack; | 127 | unsigned long *stack; |
96 | int i; | 128 | int i; |
97 | 129 | ||
98 | if (sp == NULL) { | 130 | if (!try_get_task_stack(task)) |
99 | if (regs) | 131 | return; |
100 | sp = (unsigned long *)regs->sp; | 132 | |
101 | else if (task) | 133 | sp = sp ? : get_stack_pointer(task, regs); |
102 | sp = (unsigned long *)task->thread.sp; | ||
103 | else | ||
104 | sp = (unsigned long *)&sp; | ||
105 | } | ||
106 | 134 | ||
107 | stack = sp; | 135 | stack = sp; |
108 | for (i = 0; i < kstack_depth_to_print; i++) { | 136 | for (i = 0; i < kstack_depth_to_print; i++) { |
@@ -117,7 +145,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
117 | touch_nmi_watchdog(); | 145 | touch_nmi_watchdog(); |
118 | } | 146 | } |
119 | pr_cont("\n"); | 147 | pr_cont("\n"); |
120 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 148 | show_trace_log_lvl(task, regs, sp, log_lvl); |
149 | |||
150 | put_task_stack(task); | ||
121 | } | 151 | } |
122 | 152 | ||
123 | 153 | ||
@@ -139,7 +169,7 @@ void show_regs(struct pt_regs *regs) | |||
139 | u8 *ip; | 169 | u8 *ip; |
140 | 170 | ||
141 | pr_emerg("Stack:\n"); | 171 | pr_emerg("Stack:\n"); |
142 | show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); | 172 | show_stack_log_lvl(current, regs, NULL, KERN_EMERG); |
143 | 173 | ||
144 | pr_emerg("Code:"); | 174 | pr_emerg("Code:"); |
145 | 175 | ||
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 9ee4520ce83c..36cf1a498227 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -16,261 +16,145 @@ | |||
16 | 16 | ||
17 | #include <asm/stacktrace.h> | 17 | #include <asm/stacktrace.h> |
18 | 18 | ||
19 | static char *exception_stack_names[N_EXCEPTION_STACKS] = { | ||
20 | [ DOUBLEFAULT_STACK-1 ] = "#DF", | ||
21 | [ NMI_STACK-1 ] = "NMI", | ||
22 | [ DEBUG_STACK-1 ] = "#DB", | ||
23 | [ MCE_STACK-1 ] = "#MC", | ||
24 | }; | ||
19 | 25 | ||
20 | #define N_EXCEPTION_STACKS_END \ | 26 | static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = { |
21 | (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) | 27 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, |
22 | 28 | [DEBUG_STACK - 1] = DEBUG_STKSZ | |
23 | static char x86_stack_ids[][8] = { | ||
24 | [ DEBUG_STACK-1 ] = "#DB", | ||
25 | [ NMI_STACK-1 ] = "NMI", | ||
26 | [ DOUBLEFAULT_STACK-1 ] = "#DF", | ||
27 | [ MCE_STACK-1 ] = "#MC", | ||
28 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
29 | [ N_EXCEPTION_STACKS ... | ||
30 | N_EXCEPTION_STACKS_END ] = "#DB[?]" | ||
31 | #endif | ||
32 | }; | 29 | }; |
33 | 30 | ||
34 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | 31 | void stack_type_str(enum stack_type type, const char **begin, const char **end) |
35 | unsigned *usedp, char **idp) | ||
36 | { | 32 | { |
37 | unsigned k; | 33 | BUILD_BUG_ON(N_EXCEPTION_STACKS != 4); |
38 | 34 | ||
39 | /* | 35 | switch (type) { |
40 | * Iterate over all exception stacks, and figure out whether | 36 | case STACK_TYPE_IRQ: |
41 | * 'stack' is in one of them: | 37 | *begin = "IRQ"; |
42 | */ | 38 | *end = "EOI"; |
43 | for (k = 0; k < N_EXCEPTION_STACKS; k++) { | 39 | break; |
44 | unsigned long end = per_cpu(orig_ist, cpu).ist[k]; | 40 | case STACK_TYPE_EXCEPTION ... STACK_TYPE_EXCEPTION_LAST: |
45 | /* | 41 | *begin = exception_stack_names[type - STACK_TYPE_EXCEPTION]; |
46 | * Is 'stack' above this exception frame's end? | 42 | *end = "EOE"; |
47 | * If yes then skip to the next frame. | 43 | break; |
48 | */ | 44 | default: |
49 | if (stack >= end) | 45 | *begin = NULL; |
50 | continue; | 46 | *end = NULL; |
51 | /* | ||
52 | * Is 'stack' above this exception frame's start address? | ||
53 | * If yes then we found the right frame. | ||
54 | */ | ||
55 | if (stack >= end - EXCEPTION_STKSZ) { | ||
56 | /* | ||
57 | * Make sure we only iterate through an exception | ||
58 | * stack once. If it comes up for the second time | ||
59 | * then there's something wrong going on - just | ||
60 | * break out and return NULL: | ||
61 | */ | ||
62 | if (*usedp & (1U << k)) | ||
63 | break; | ||
64 | *usedp |= 1U << k; | ||
65 | *idp = x86_stack_ids[k]; | ||
66 | return (unsigned long *)end; | ||
67 | } | ||
68 | /* | ||
69 | * If this is a debug stack, and if it has a larger size than | ||
70 | * the usual exception stacks, then 'stack' might still | ||
71 | * be within the lower portion of the debug stack: | ||
72 | */ | ||
73 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
74 | if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { | ||
75 | unsigned j = N_EXCEPTION_STACKS - 1; | ||
76 | |||
77 | /* | ||
78 | * Black magic. A large debug stack is composed of | ||
79 | * multiple exception stack entries, which we | ||
80 | * iterate through now. Dont look: | ||
81 | */ | ||
82 | do { | ||
83 | ++j; | ||
84 | end -= EXCEPTION_STKSZ; | ||
85 | x86_stack_ids[j][4] = '1' + | ||
86 | (j - N_EXCEPTION_STACKS); | ||
87 | } while (stack < end - EXCEPTION_STKSZ); | ||
88 | if (*usedp & (1U << j)) | ||
89 | break; | ||
90 | *usedp |= 1U << j; | ||
91 | *idp = x86_stack_ids[j]; | ||
92 | return (unsigned long *)end; | ||
93 | } | ||
94 | #endif | ||
95 | } | 47 | } |
96 | return NULL; | ||
97 | } | 48 | } |
98 | 49 | ||
99 | static inline int | 50 | static bool in_exception_stack(unsigned long *stack, struct stack_info *info) |
100 | in_irq_stack(unsigned long *stack, unsigned long *irq_stack, | ||
101 | unsigned long *irq_stack_end) | ||
102 | { | 51 | { |
103 | return (stack >= irq_stack && stack < irq_stack_end); | 52 | unsigned long *begin, *end; |
104 | } | 53 | struct pt_regs *regs; |
105 | 54 | unsigned k; | |
106 | static const unsigned long irq_stack_size = | ||
107 | (IRQ_STACK_SIZE - 64) / sizeof(unsigned long); | ||
108 | |||
109 | enum stack_type { | ||
110 | STACK_IS_UNKNOWN, | ||
111 | STACK_IS_NORMAL, | ||
112 | STACK_IS_EXCEPTION, | ||
113 | STACK_IS_IRQ, | ||
114 | }; | ||
115 | |||
116 | static enum stack_type | ||
117 | analyze_stack(int cpu, struct task_struct *task, unsigned long *stack, | ||
118 | unsigned long **stack_end, unsigned long *irq_stack, | ||
119 | unsigned *used, char **id) | ||
120 | { | ||
121 | unsigned long addr; | ||
122 | 55 | ||
123 | addr = ((unsigned long)stack & (~(THREAD_SIZE - 1))); | 56 | BUILD_BUG_ON(N_EXCEPTION_STACKS != 4); |
124 | if ((unsigned long)task_stack_page(task) == addr) | ||
125 | return STACK_IS_NORMAL; | ||
126 | 57 | ||
127 | *stack_end = in_exception_stack(cpu, (unsigned long)stack, | 58 | for (k = 0; k < N_EXCEPTION_STACKS; k++) { |
128 | used, id); | 59 | end = (unsigned long *)raw_cpu_ptr(&orig_ist)->ist[k]; |
129 | if (*stack_end) | 60 | begin = end - (exception_stack_sizes[k] / sizeof(long)); |
130 | return STACK_IS_EXCEPTION; | 61 | regs = (struct pt_regs *)end - 1; |
131 | 62 | ||
132 | if (!irq_stack) | 63 | if (stack < begin || stack >= end) |
133 | return STACK_IS_NORMAL; | 64 | continue; |
134 | 65 | ||
135 | *stack_end = irq_stack; | 66 | info->type = STACK_TYPE_EXCEPTION + k; |
136 | irq_stack = irq_stack - irq_stack_size; | 67 | info->begin = begin; |
68 | info->end = end; | ||
69 | info->next_sp = (unsigned long *)regs->sp; | ||
137 | 70 | ||
138 | if (in_irq_stack(stack, irq_stack, *stack_end)) | 71 | return true; |
139 | return STACK_IS_IRQ; | 72 | } |
140 | 73 | ||
141 | return STACK_IS_UNKNOWN; | 74 | return false; |
142 | } | 75 | } |
143 | 76 | ||
144 | /* | 77 | static bool in_irq_stack(unsigned long *stack, struct stack_info *info) |
145 | * x86-64 can have up to three kernel stacks: | ||
146 | * process stack | ||
147 | * interrupt stack | ||
148 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | ||
149 | */ | ||
150 | |||
151 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | ||
152 | unsigned long *stack, unsigned long bp, | ||
153 | const struct stacktrace_ops *ops, void *data) | ||
154 | { | 78 | { |
155 | const unsigned cpu = get_cpu(); | 79 | unsigned long *end = (unsigned long *)this_cpu_read(irq_stack_ptr); |
156 | unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu); | 80 | unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long)); |
157 | unsigned long dummy; | ||
158 | unsigned used = 0; | ||
159 | int graph = 0; | ||
160 | int done = 0; | ||
161 | |||
162 | if (!task) | ||
163 | task = current; | ||
164 | |||
165 | if (!stack) { | ||
166 | if (regs) | ||
167 | stack = (unsigned long *)regs->sp; | ||
168 | else if (task != current) | ||
169 | stack = (unsigned long *)task->thread.sp; | ||
170 | else | ||
171 | stack = &dummy; | ||
172 | } | ||
173 | 81 | ||
174 | if (!bp) | ||
175 | bp = stack_frame(task, regs); | ||
176 | /* | 82 | /* |
177 | * Print function call entries in all stacks, starting at the | 83 | * This is a software stack, so 'end' can be a valid stack pointer. |
178 | * current stack address. If the stacks consist of nested | 84 | * It just means the stack is empty. |
179 | * exceptions | ||
180 | */ | 85 | */ |
181 | while (!done) { | 86 | if (stack < begin || stack > end) |
182 | unsigned long *stack_end; | 87 | return false; |
183 | enum stack_type stype; | ||
184 | char *id; | ||
185 | 88 | ||
186 | stype = analyze_stack(cpu, task, stack, &stack_end, | 89 | info->type = STACK_TYPE_IRQ; |
187 | irq_stack, &used, &id); | 90 | info->begin = begin; |
91 | info->end = end; | ||
188 | 92 | ||
189 | /* Default finish unless specified to continue */ | 93 | /* |
190 | done = 1; | 94 | * The next stack pointer is the first thing pushed by the entry code |
95 | * after switching to the irq stack. | ||
96 | */ | ||
97 | info->next_sp = (unsigned long *)*(end - 1); | ||
191 | 98 | ||
192 | switch (stype) { | 99 | return true; |
100 | } | ||
193 | 101 | ||
194 | /* Break out early if we are on the thread stack */ | 102 | int get_stack_info(unsigned long *stack, struct task_struct *task, |
195 | case STACK_IS_NORMAL: | 103 | struct stack_info *info, unsigned long *visit_mask) |
196 | break; | 104 | { |
105 | if (!stack) | ||
106 | goto unknown; | ||
197 | 107 | ||
198 | case STACK_IS_EXCEPTION: | 108 | task = task ? : current; |
199 | 109 | ||
200 | if (ops->stack(data, id) < 0) | 110 | if (in_task_stack(stack, task, info)) |
201 | break; | 111 | goto recursion_check; |
202 | 112 | ||
203 | bp = ops->walk_stack(task, stack, bp, ops, | 113 | if (task != current) |
204 | data, stack_end, &graph); | 114 | goto unknown; |
205 | ops->stack(data, "<EOE>"); | ||
206 | /* | ||
207 | * We link to the next stack via the | ||
208 | * second-to-last pointer (index -2 to end) in the | ||
209 | * exception stack: | ||
210 | */ | ||
211 | stack = (unsigned long *) stack_end[-2]; | ||
212 | done = 0; | ||
213 | break; | ||
214 | 115 | ||
215 | case STACK_IS_IRQ: | 116 | if (in_exception_stack(stack, info)) |
117 | goto recursion_check; | ||
216 | 118 | ||
217 | if (ops->stack(data, "IRQ") < 0) | 119 | if (in_irq_stack(stack, info)) |
218 | break; | 120 | goto recursion_check; |
219 | bp = ops->walk_stack(task, stack, bp, | ||
220 | ops, data, stack_end, &graph); | ||
221 | /* | ||
222 | * We link to the next stack (which would be | ||
223 | * the process stack normally) the last | ||
224 | * pointer (index -1 to end) in the IRQ stack: | ||
225 | */ | ||
226 | stack = (unsigned long *) (stack_end[-1]); | ||
227 | irq_stack = NULL; | ||
228 | ops->stack(data, "EOI"); | ||
229 | done = 0; | ||
230 | break; | ||
231 | 121 | ||
232 | case STACK_IS_UNKNOWN: | 122 | goto unknown; |
233 | ops->stack(data, "UNK"); | ||
234 | break; | ||
235 | } | ||
236 | } | ||
237 | 123 | ||
124 | recursion_check: | ||
238 | /* | 125 | /* |
239 | * This handles the process stack: | 126 | * Make sure we don't iterate through any given stack more than once. |
127 | * If it comes up a second time then there's something wrong going on: | ||
128 | * just break out and report an unknown stack type. | ||
240 | */ | 129 | */ |
241 | bp = ops->walk_stack(task, stack, bp, ops, data, NULL, &graph); | 130 | if (visit_mask) { |
242 | put_cpu(); | 131 | if (*visit_mask & (1UL << info->type)) |
132 | goto unknown; | ||
133 | *visit_mask |= 1UL << info->type; | ||
134 | } | ||
135 | |||
136 | return 0; | ||
137 | |||
138 | unknown: | ||
139 | info->type = STACK_TYPE_UNKNOWN; | ||
140 | return -EINVAL; | ||
243 | } | 141 | } |
244 | EXPORT_SYMBOL(dump_trace); | ||
245 | 142 | ||
246 | void | 143 | void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
247 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 144 | unsigned long *sp, char *log_lvl) |
248 | unsigned long *sp, unsigned long bp, char *log_lvl) | ||
249 | { | 145 | { |
250 | unsigned long *irq_stack_end; | 146 | unsigned long *irq_stack_end; |
251 | unsigned long *irq_stack; | 147 | unsigned long *irq_stack; |
252 | unsigned long *stack; | 148 | unsigned long *stack; |
253 | int cpu; | ||
254 | int i; | 149 | int i; |
255 | 150 | ||
256 | preempt_disable(); | 151 | if (!try_get_task_stack(task)) |
257 | cpu = smp_processor_id(); | 152 | return; |
258 | 153 | ||
259 | irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); | 154 | irq_stack_end = (unsigned long *)this_cpu_read(irq_stack_ptr); |
260 | irq_stack = (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); | 155 | irq_stack = irq_stack_end - (IRQ_STACK_SIZE / sizeof(long)); |
261 | 156 | ||
262 | /* | 157 | sp = sp ? : get_stack_pointer(task, regs); |
263 | * Debugging aid: "show_stack(NULL, NULL);" prints the | ||
264 | * back trace for this cpu: | ||
265 | */ | ||
266 | if (sp == NULL) { | ||
267 | if (regs) | ||
268 | sp = (unsigned long *)regs->sp; | ||
269 | else if (task) | ||
270 | sp = (unsigned long *)task->thread.sp; | ||
271 | else | ||
272 | sp = (unsigned long *)&sp; | ||
273 | } | ||
274 | 158 | ||
275 | stack = sp; | 159 | stack = sp; |
276 | for (i = 0; i < kstack_depth_to_print; i++) { | 160 | for (i = 0; i < kstack_depth_to_print; i++) { |
@@ -299,18 +183,17 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
299 | stack++; | 183 | stack++; |
300 | touch_nmi_watchdog(); | 184 | touch_nmi_watchdog(); |
301 | } | 185 | } |
302 | preempt_enable(); | ||
303 | 186 | ||
304 | pr_cont("\n"); | 187 | pr_cont("\n"); |
305 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 188 | show_trace_log_lvl(task, regs, sp, log_lvl); |
189 | |||
190 | put_task_stack(task); | ||
306 | } | 191 | } |
307 | 192 | ||
308 | void show_regs(struct pt_regs *regs) | 193 | void show_regs(struct pt_regs *regs) |
309 | { | 194 | { |
310 | int i; | 195 | int i; |
311 | unsigned long sp; | ||
312 | 196 | ||
313 | sp = regs->sp; | ||
314 | show_regs_print_info(KERN_DEFAULT); | 197 | show_regs_print_info(KERN_DEFAULT); |
315 | __show_regs(regs, 1); | 198 | __show_regs(regs, 1); |
316 | 199 | ||
@@ -325,8 +208,7 @@ void show_regs(struct pt_regs *regs) | |||
325 | u8 *ip; | 208 | u8 *ip; |
326 | 209 | ||
327 | printk(KERN_DEFAULT "Stack:\n"); | 210 | printk(KERN_DEFAULT "Stack:\n"); |
328 | show_stack_log_lvl(NULL, regs, (unsigned long *)sp, | 211 | show_stack_log_lvl(current, regs, NULL, KERN_DEFAULT); |
329 | 0, KERN_DEFAULT); | ||
330 | 212 | ||
331 | printk(KERN_DEFAULT "Code: "); | 213 | printk(KERN_DEFAULT "Code: "); |
332 | 214 | ||
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 93982aebb398..2f2b8c7ccb85 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c | |||
@@ -317,7 +317,6 @@ static void __init fpu__init_system_ctx_switch(void) | |||
317 | on_boot_cpu = 0; | 317 | on_boot_cpu = 0; |
318 | 318 | ||
319 | WARN_ON_FPU(current->thread.fpu.fpstate_active); | 319 | WARN_ON_FPU(current->thread.fpu.fpstate_active); |
320 | current_thread_info()->status = 0; | ||
321 | 320 | ||
322 | if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE) | 321 | if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE) |
323 | eagerfpu = ENABLE; | 322 | eagerfpu = ENABLE; |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index d036cfb4495d..8639bb2ae058 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -1029,7 +1029,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, | |||
1029 | } | 1029 | } |
1030 | 1030 | ||
1031 | if (ftrace_push_return_trace(old, self_addr, &trace.depth, | 1031 | if (ftrace_push_return_trace(old, self_addr, &trace.depth, |
1032 | frame_pointer) == -EBUSY) { | 1032 | frame_pointer, parent) == -EBUSY) { |
1033 | *parent = old; | 1033 | *parent = old; |
1034 | return; | 1034 | return; |
1035 | } | 1035 | } |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 6f8902b0d151..5f401262f12d 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -94,7 +94,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE) | |||
94 | */ | 94 | */ |
95 | __HEAD | 95 | __HEAD |
96 | ENTRY(startup_32) | 96 | ENTRY(startup_32) |
97 | movl pa(stack_start),%ecx | 97 | movl pa(initial_stack),%ecx |
98 | 98 | ||
99 | /* test KEEP_SEGMENTS flag to see if the bootloader is asking | 99 | /* test KEEP_SEGMENTS flag to see if the bootloader is asking |
100 | us to not reload segments */ | 100 | us to not reload segments */ |
@@ -286,7 +286,7 @@ num_subarch_entries = (. - subarch_entries) / 4 | |||
286 | * start_secondary(). | 286 | * start_secondary(). |
287 | */ | 287 | */ |
288 | ENTRY(start_cpu0) | 288 | ENTRY(start_cpu0) |
289 | movl stack_start, %ecx | 289 | movl initial_stack, %ecx |
290 | movl %ecx, %esp | 290 | movl %ecx, %esp |
291 | jmp *(initial_code) | 291 | jmp *(initial_code) |
292 | ENDPROC(start_cpu0) | 292 | ENDPROC(start_cpu0) |
@@ -307,7 +307,7 @@ ENTRY(startup_32_smp) | |||
307 | movl %eax,%es | 307 | movl %eax,%es |
308 | movl %eax,%fs | 308 | movl %eax,%fs |
309 | movl %eax,%gs | 309 | movl %eax,%gs |
310 | movl pa(stack_start),%ecx | 310 | movl pa(initial_stack),%ecx |
311 | movl %eax,%ss | 311 | movl %eax,%ss |
312 | leal -__PAGE_OFFSET(%ecx),%esp | 312 | leal -__PAGE_OFFSET(%ecx),%esp |
313 | 313 | ||
@@ -703,7 +703,7 @@ ENTRY(initial_page_table) | |||
703 | 703 | ||
704 | .data | 704 | .data |
705 | .balign 4 | 705 | .balign 4 |
706 | ENTRY(stack_start) | 706 | ENTRY(initial_stack) |
707 | .long init_thread_union+THREAD_SIZE | 707 | .long init_thread_union+THREAD_SIZE |
708 | 708 | ||
709 | __INITRODATA | 709 | __INITRODATA |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 9f8efc9f0075..c98a559c346e 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -66,7 +66,7 @@ startup_64: | |||
66 | */ | 66 | */ |
67 | 67 | ||
68 | /* | 68 | /* |
69 | * Setup stack for verify_cpu(). "-8" because stack_start is defined | 69 | * Setup stack for verify_cpu(). "-8" because initial_stack is defined |
70 | * this way, see below. Our best guess is a NULL ptr for stack | 70 | * this way, see below. Our best guess is a NULL ptr for stack |
71 | * termination heuristics and we don't want to break anything which | 71 | * termination heuristics and we don't want to break anything which |
72 | * might depend on it (kgdb, ...). | 72 | * might depend on it (kgdb, ...). |
@@ -226,7 +226,7 @@ ENTRY(secondary_startup_64) | |||
226 | movq %rax, %cr0 | 226 | movq %rax, %cr0 |
227 | 227 | ||
228 | /* Setup a boot time stack */ | 228 | /* Setup a boot time stack */ |
229 | movq stack_start(%rip), %rsp | 229 | movq initial_stack(%rip), %rsp |
230 | 230 | ||
231 | /* zero EFLAGS after setting rsp */ | 231 | /* zero EFLAGS after setting rsp */ |
232 | pushq $0 | 232 | pushq $0 |
@@ -310,7 +310,7 @@ ENDPROC(secondary_startup_64) | |||
310 | * start_secondary(). | 310 | * start_secondary(). |
311 | */ | 311 | */ |
312 | ENTRY(start_cpu0) | 312 | ENTRY(start_cpu0) |
313 | movq stack_start(%rip),%rsp | 313 | movq initial_stack(%rip),%rsp |
314 | movq initial_code(%rip),%rax | 314 | movq initial_code(%rip),%rax |
315 | pushq $0 # fake return address to stop unwinder | 315 | pushq $0 # fake return address to stop unwinder |
316 | pushq $__KERNEL_CS # set correct cs | 316 | pushq $__KERNEL_CS # set correct cs |
@@ -319,17 +319,15 @@ ENTRY(start_cpu0) | |||
319 | ENDPROC(start_cpu0) | 319 | ENDPROC(start_cpu0) |
320 | #endif | 320 | #endif |
321 | 321 | ||
322 | /* SMP bootup changes these two */ | 322 | /* Both SMP bootup and ACPI suspend change these variables */ |
323 | __REFDATA | 323 | __REFDATA |
324 | .balign 8 | 324 | .balign 8 |
325 | GLOBAL(initial_code) | 325 | GLOBAL(initial_code) |
326 | .quad x86_64_start_kernel | 326 | .quad x86_64_start_kernel |
327 | GLOBAL(initial_gs) | 327 | GLOBAL(initial_gs) |
328 | .quad INIT_PER_CPU_VAR(irq_stack_union) | 328 | .quad INIT_PER_CPU_VAR(irq_stack_union) |
329 | 329 | GLOBAL(initial_stack) | |
330 | GLOBAL(stack_start) | ||
331 | .quad init_thread_union+THREAD_SIZE-8 | 330 | .quad init_thread_union+THREAD_SIZE-8 |
332 | .word 0 | ||
333 | __FINITDATA | 331 | __FINITDATA |
334 | 332 | ||
335 | bad_address: | 333 | bad_address: |
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 4a7903714065..9ebd0b0e73d9 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c | |||
@@ -40,8 +40,7 @@ static inline void stack_overflow_check(struct pt_regs *regs) | |||
40 | if (user_mode(regs)) | 40 | if (user_mode(regs)) |
41 | return; | 41 | return; |
42 | 42 | ||
43 | if (regs->sp >= curbase + sizeof(struct thread_info) + | 43 | if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN && |
44 | sizeof(struct pt_regs) + STACK_TOP_MARGIN && | ||
45 | regs->sp <= curbase + THREAD_SIZE) | 44 | regs->sp <= curbase + THREAD_SIZE) |
46 | return; | 45 | return; |
47 | 46 | ||
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 04cde527d728..8e36f249646e 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include <asm/apicdef.h> | 50 | #include <asm/apicdef.h> |
51 | #include <asm/apic.h> | 51 | #include <asm/apic.h> |
52 | #include <asm/nmi.h> | 52 | #include <asm/nmi.h> |
53 | #include <asm/switch_to.h> | ||
53 | 54 | ||
54 | struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = | 55 | struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = |
55 | { | 56 | { |
@@ -166,21 +167,19 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) | |||
166 | gdb_regs[GDB_DX] = 0; | 167 | gdb_regs[GDB_DX] = 0; |
167 | gdb_regs[GDB_SI] = 0; | 168 | gdb_regs[GDB_SI] = 0; |
168 | gdb_regs[GDB_DI] = 0; | 169 | gdb_regs[GDB_DI] = 0; |
169 | gdb_regs[GDB_BP] = *(unsigned long *)p->thread.sp; | 170 | gdb_regs[GDB_BP] = ((struct inactive_task_frame *)p->thread.sp)->bp; |
170 | #ifdef CONFIG_X86_32 | 171 | #ifdef CONFIG_X86_32 |
171 | gdb_regs[GDB_DS] = __KERNEL_DS; | 172 | gdb_regs[GDB_DS] = __KERNEL_DS; |
172 | gdb_regs[GDB_ES] = __KERNEL_DS; | 173 | gdb_regs[GDB_ES] = __KERNEL_DS; |
173 | gdb_regs[GDB_PS] = 0; | 174 | gdb_regs[GDB_PS] = 0; |
174 | gdb_regs[GDB_CS] = __KERNEL_CS; | 175 | gdb_regs[GDB_CS] = __KERNEL_CS; |
175 | gdb_regs[GDB_PC] = p->thread.ip; | ||
176 | gdb_regs[GDB_SS] = __KERNEL_DS; | 176 | gdb_regs[GDB_SS] = __KERNEL_DS; |
177 | gdb_regs[GDB_FS] = 0xFFFF; | 177 | gdb_regs[GDB_FS] = 0xFFFF; |
178 | gdb_regs[GDB_GS] = 0xFFFF; | 178 | gdb_regs[GDB_GS] = 0xFFFF; |
179 | #else | 179 | #else |
180 | gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); | 180 | gdb_regs32[GDB_PS] = 0; |
181 | gdb_regs32[GDB_CS] = __KERNEL_CS; | 181 | gdb_regs32[GDB_CS] = __KERNEL_CS; |
182 | gdb_regs32[GDB_SS] = __KERNEL_DS; | 182 | gdb_regs32[GDB_SS] = __KERNEL_DS; |
183 | gdb_regs[GDB_PC] = 0; | ||
184 | gdb_regs[GDB_R8] = 0; | 183 | gdb_regs[GDB_R8] = 0; |
185 | gdb_regs[GDB_R9] = 0; | 184 | gdb_regs[GDB_R9] = 0; |
186 | gdb_regs[GDB_R10] = 0; | 185 | gdb_regs[GDB_R10] = 0; |
@@ -190,6 +189,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) | |||
190 | gdb_regs[GDB_R14] = 0; | 189 | gdb_regs[GDB_R14] = 0; |
191 | gdb_regs[GDB_R15] = 0; | 190 | gdb_regs[GDB_R15] = 0; |
192 | #endif | 191 | #endif |
192 | gdb_regs[GDB_PC] = 0; | ||
193 | gdb_regs[GDB_SP] = p->thread.sp; | 193 | gdb_regs[GDB_SP] = p->thread.sp; |
194 | } | 194 | } |
195 | 195 | ||
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c index c2bedaea11f7..4afc67f5facc 100644 --- a/arch/x86/kernel/ksysfs.c +++ b/arch/x86/kernel/ksysfs.c | |||
@@ -184,7 +184,7 @@ out: | |||
184 | 184 | ||
185 | static struct kobj_attribute type_attr = __ATTR_RO(type); | 185 | static struct kobj_attribute type_attr = __ATTR_RO(type); |
186 | 186 | ||
187 | static struct bin_attribute data_attr = { | 187 | static struct bin_attribute data_attr __ro_after_init = { |
188 | .attr = { | 188 | .attr = { |
189 | .name = "data", | 189 | .name = "data", |
190 | .mode = S_IRUGO, | 190 | .mode = S_IRUGO, |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 3692249a70f1..60b9949f1e65 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -29,7 +29,7 @@ | |||
29 | #include <asm/x86_init.h> | 29 | #include <asm/x86_init.h> |
30 | #include <asm/reboot.h> | 30 | #include <asm/reboot.h> |
31 | 31 | ||
32 | static int kvmclock = 1; | 32 | static int kvmclock __ro_after_init = 1; |
33 | static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; | 33 | static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; |
34 | static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; | 34 | static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; |
35 | static cycle_t kvm_sched_clock_offset; | 35 | static cycle_t kvm_sched_clock_offset; |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 1acfd76e3e26..bbf3d5933eaa 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -332,7 +332,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = { | |||
332 | .read_cr0 = native_read_cr0, | 332 | .read_cr0 = native_read_cr0, |
333 | .write_cr0 = native_write_cr0, | 333 | .write_cr0 = native_write_cr0, |
334 | .read_cr4 = native_read_cr4, | 334 | .read_cr4 = native_read_cr4, |
335 | .read_cr4_safe = native_read_cr4_safe, | ||
336 | .write_cr4 = native_write_cr4, | 335 | .write_cr4 = native_write_cr4, |
337 | #ifdef CONFIG_X86_64 | 336 | #ifdef CONFIG_X86_64 |
338 | .read_cr8 = native_read_cr8, | 337 | .read_cr8 = native_read_cr8, |
@@ -389,7 +388,7 @@ NOKPROBE_SYMBOL(native_load_idt); | |||
389 | #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) | 388 | #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) |
390 | #endif | 389 | #endif |
391 | 390 | ||
392 | struct pv_mmu_ops pv_mmu_ops = { | 391 | struct pv_mmu_ops pv_mmu_ops __ro_after_init = { |
393 | 392 | ||
394 | .read_cr2 = native_read_cr2, | 393 | .read_cr2 = native_read_cr2, |
395 | .write_cr2 = native_write_cr2, | 394 | .write_cr2 = native_write_cr2, |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 62c0b0ea2ce4..4002b475171c 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <asm/tlbflush.h> | 32 | #include <asm/tlbflush.h> |
33 | #include <asm/mce.h> | 33 | #include <asm/mce.h> |
34 | #include <asm/vm86.h> | 34 | #include <asm/vm86.h> |
35 | #include <asm/switch_to.h> | ||
35 | 36 | ||
36 | /* | 37 | /* |
37 | * per-CPU TSS segments. Threads are completely 'soft' on Linux, | 38 | * per-CPU TSS segments. Threads are completely 'soft' on Linux, |
@@ -513,6 +514,17 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) | |||
513 | } | 514 | } |
514 | 515 | ||
515 | /* | 516 | /* |
517 | * Return saved PC of a blocked thread. | ||
518 | * What is this good for? it will be always the scheduler or ret_from_fork. | ||
519 | */ | ||
520 | unsigned long thread_saved_pc(struct task_struct *tsk) | ||
521 | { | ||
522 | struct inactive_task_frame *frame = | ||
523 | (struct inactive_task_frame *) READ_ONCE(tsk->thread.sp); | ||
524 | return READ_ONCE_NOCHECK(frame->ret_addr); | ||
525 | } | ||
526 | |||
527 | /* | ||
516 | * Called from fs/proc with a reference on @p to find the function | 528 | * Called from fs/proc with a reference on @p to find the function |
517 | * which called into schedule(). This needs to be done carefully | 529 | * which called into schedule(). This needs to be done carefully |
518 | * because the task might wake up and we might look at a stack | 530 | * because the task might wake up and we might look at a stack |
@@ -520,15 +532,18 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) | |||
520 | */ | 532 | */ |
521 | unsigned long get_wchan(struct task_struct *p) | 533 | unsigned long get_wchan(struct task_struct *p) |
522 | { | 534 | { |
523 | unsigned long start, bottom, top, sp, fp, ip; | 535 | unsigned long start, bottom, top, sp, fp, ip, ret = 0; |
524 | int count = 0; | 536 | int count = 0; |
525 | 537 | ||
526 | if (!p || p == current || p->state == TASK_RUNNING) | 538 | if (!p || p == current || p->state == TASK_RUNNING) |
527 | return 0; | 539 | return 0; |
528 | 540 | ||
541 | if (!try_get_task_stack(p)) | ||
542 | return 0; | ||
543 | |||
529 | start = (unsigned long)task_stack_page(p); | 544 | start = (unsigned long)task_stack_page(p); |
530 | if (!start) | 545 | if (!start) |
531 | return 0; | 546 | goto out; |
532 | 547 | ||
533 | /* | 548 | /* |
534 | * Layout of the stack page: | 549 | * Layout of the stack page: |
@@ -537,9 +552,7 @@ unsigned long get_wchan(struct task_struct *p) | |||
537 | * PADDING | 552 | * PADDING |
538 | * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING | 553 | * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING |
539 | * stack | 554 | * stack |
540 | * ----------- bottom = start + sizeof(thread_info) | 555 | * ----------- bottom = start |
541 | * thread_info | ||
542 | * ----------- start | ||
543 | * | 556 | * |
544 | * The tasks stack pointer points at the location where the | 557 | * The tasks stack pointer points at the location where the |
545 | * framepointer is stored. The data on the stack is: | 558 | * framepointer is stored. The data on the stack is: |
@@ -550,20 +563,25 @@ unsigned long get_wchan(struct task_struct *p) | |||
550 | */ | 563 | */ |
551 | top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; | 564 | top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; |
552 | top -= 2 * sizeof(unsigned long); | 565 | top -= 2 * sizeof(unsigned long); |
553 | bottom = start + sizeof(struct thread_info); | 566 | bottom = start; |
554 | 567 | ||
555 | sp = READ_ONCE(p->thread.sp); | 568 | sp = READ_ONCE(p->thread.sp); |
556 | if (sp < bottom || sp > top) | 569 | if (sp < bottom || sp > top) |
557 | return 0; | 570 | goto out; |
558 | 571 | ||
559 | fp = READ_ONCE_NOCHECK(*(unsigned long *)sp); | 572 | fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp); |
560 | do { | 573 | do { |
561 | if (fp < bottom || fp > top) | 574 | if (fp < bottom || fp > top) |
562 | return 0; | 575 | goto out; |
563 | ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long))); | 576 | ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long))); |
564 | if (!in_sched_functions(ip)) | 577 | if (!in_sched_functions(ip)) { |
565 | return ip; | 578 | ret = ip; |
579 | goto out; | ||
580 | } | ||
566 | fp = READ_ONCE_NOCHECK(*(unsigned long *)fp); | 581 | fp = READ_ONCE_NOCHECK(*(unsigned long *)fp); |
567 | } while (count++ < 16 && p->state != TASK_RUNNING); | 582 | } while (count++ < 16 && p->state != TASK_RUNNING); |
568 | return 0; | 583 | |
584 | out: | ||
585 | put_task_stack(p); | ||
586 | return ret; | ||
569 | } | 587 | } |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index d86be29c38c7..bd7be8efdc4c 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -55,17 +55,6 @@ | |||
55 | #include <asm/switch_to.h> | 55 | #include <asm/switch_to.h> |
56 | #include <asm/vm86.h> | 56 | #include <asm/vm86.h> |
57 | 57 | ||
58 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | ||
59 | asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread"); | ||
60 | |||
61 | /* | ||
62 | * Return saved PC of a blocked thread. | ||
63 | */ | ||
64 | unsigned long thread_saved_pc(struct task_struct *tsk) | ||
65 | { | ||
66 | return ((unsigned long *)tsk->thread.sp)[3]; | ||
67 | } | ||
68 | |||
69 | void __show_regs(struct pt_regs *regs, int all) | 58 | void __show_regs(struct pt_regs *regs, int all) |
70 | { | 59 | { |
71 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; | 60 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; |
@@ -101,7 +90,7 @@ void __show_regs(struct pt_regs *regs, int all) | |||
101 | cr0 = read_cr0(); | 90 | cr0 = read_cr0(); |
102 | cr2 = read_cr2(); | 91 | cr2 = read_cr2(); |
103 | cr3 = read_cr3(); | 92 | cr3 = read_cr3(); |
104 | cr4 = __read_cr4_safe(); | 93 | cr4 = __read_cr4(); |
105 | printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", | 94 | printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", |
106 | cr0, cr2, cr3, cr4); | 95 | cr0, cr2, cr3, cr4); |
107 | 96 | ||
@@ -133,35 +122,31 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, | |||
133 | unsigned long arg, struct task_struct *p, unsigned long tls) | 122 | unsigned long arg, struct task_struct *p, unsigned long tls) |
134 | { | 123 | { |
135 | struct pt_regs *childregs = task_pt_regs(p); | 124 | struct pt_regs *childregs = task_pt_regs(p); |
125 | struct fork_frame *fork_frame = container_of(childregs, struct fork_frame, regs); | ||
126 | struct inactive_task_frame *frame = &fork_frame->frame; | ||
136 | struct task_struct *tsk; | 127 | struct task_struct *tsk; |
137 | int err; | 128 | int err; |
138 | 129 | ||
139 | p->thread.sp = (unsigned long) childregs; | 130 | frame->bp = 0; |
131 | frame->ret_addr = (unsigned long) ret_from_fork; | ||
132 | p->thread.sp = (unsigned long) fork_frame; | ||
140 | p->thread.sp0 = (unsigned long) (childregs+1); | 133 | p->thread.sp0 = (unsigned long) (childregs+1); |
141 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | 134 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); |
142 | 135 | ||
143 | if (unlikely(p->flags & PF_KTHREAD)) { | 136 | if (unlikely(p->flags & PF_KTHREAD)) { |
144 | /* kernel thread */ | 137 | /* kernel thread */ |
145 | memset(childregs, 0, sizeof(struct pt_regs)); | 138 | memset(childregs, 0, sizeof(struct pt_regs)); |
146 | p->thread.ip = (unsigned long) ret_from_kernel_thread; | 139 | frame->bx = sp; /* function */ |
147 | task_user_gs(p) = __KERNEL_STACK_CANARY; | 140 | frame->di = arg; |
148 | childregs->ds = __USER_DS; | ||
149 | childregs->es = __USER_DS; | ||
150 | childregs->fs = __KERNEL_PERCPU; | ||
151 | childregs->bx = sp; /* function */ | ||
152 | childregs->bp = arg; | ||
153 | childregs->orig_ax = -1; | ||
154 | childregs->cs = __KERNEL_CS | get_kernel_rpl(); | ||
155 | childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; | ||
156 | p->thread.io_bitmap_ptr = NULL; | 141 | p->thread.io_bitmap_ptr = NULL; |
157 | return 0; | 142 | return 0; |
158 | } | 143 | } |
144 | frame->bx = 0; | ||
159 | *childregs = *current_pt_regs(); | 145 | *childregs = *current_pt_regs(); |
160 | childregs->ax = 0; | 146 | childregs->ax = 0; |
161 | if (sp) | 147 | if (sp) |
162 | childregs->sp = sp; | 148 | childregs->sp = sp; |
163 | 149 | ||
164 | p->thread.ip = (unsigned long) ret_from_fork; | ||
165 | task_user_gs(p) = get_user_gs(current_pt_regs()); | 150 | task_user_gs(p) = get_user_gs(current_pt_regs()); |
166 | 151 | ||
167 | p->thread.io_bitmap_ptr = NULL; | 152 | p->thread.io_bitmap_ptr = NULL; |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 63236d8f84bf..de9acaf2d371 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -50,8 +50,6 @@ | |||
50 | #include <asm/switch_to.h> | 50 | #include <asm/switch_to.h> |
51 | #include <asm/xen/hypervisor.h> | 51 | #include <asm/xen/hypervisor.h> |
52 | 52 | ||
53 | asmlinkage extern void ret_from_fork(void); | ||
54 | |||
55 | __visible DEFINE_PER_CPU(unsigned long, rsp_scratch); | 53 | __visible DEFINE_PER_CPU(unsigned long, rsp_scratch); |
56 | 54 | ||
57 | /* Prints also some state that isn't saved in the pt_regs */ | 55 | /* Prints also some state that isn't saved in the pt_regs */ |
@@ -141,12 +139,17 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, | |||
141 | { | 139 | { |
142 | int err; | 140 | int err; |
143 | struct pt_regs *childregs; | 141 | struct pt_regs *childregs; |
142 | struct fork_frame *fork_frame; | ||
143 | struct inactive_task_frame *frame; | ||
144 | struct task_struct *me = current; | 144 | struct task_struct *me = current; |
145 | 145 | ||
146 | p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; | 146 | p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; |
147 | childregs = task_pt_regs(p); | 147 | childregs = task_pt_regs(p); |
148 | p->thread.sp = (unsigned long) childregs; | 148 | fork_frame = container_of(childregs, struct fork_frame, regs); |
149 | set_tsk_thread_flag(p, TIF_FORK); | 149 | frame = &fork_frame->frame; |
150 | frame->bp = 0; | ||
151 | frame->ret_addr = (unsigned long) ret_from_fork; | ||
152 | p->thread.sp = (unsigned long) fork_frame; | ||
150 | p->thread.io_bitmap_ptr = NULL; | 153 | p->thread.io_bitmap_ptr = NULL; |
151 | 154 | ||
152 | savesegment(gs, p->thread.gsindex); | 155 | savesegment(gs, p->thread.gsindex); |
@@ -160,15 +163,11 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, | |||
160 | if (unlikely(p->flags & PF_KTHREAD)) { | 163 | if (unlikely(p->flags & PF_KTHREAD)) { |
161 | /* kernel thread */ | 164 | /* kernel thread */ |
162 | memset(childregs, 0, sizeof(struct pt_regs)); | 165 | memset(childregs, 0, sizeof(struct pt_regs)); |
163 | childregs->sp = (unsigned long)childregs; | 166 | frame->bx = sp; /* function */ |
164 | childregs->ss = __KERNEL_DS; | 167 | frame->r12 = arg; |
165 | childregs->bx = sp; /* function */ | ||
166 | childregs->bp = arg; | ||
167 | childregs->orig_ax = -1; | ||
168 | childregs->cs = __KERNEL_CS | get_kernel_rpl(); | ||
169 | childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; | ||
170 | return 0; | 168 | return 0; |
171 | } | 169 | } |
170 | frame->bx = 0; | ||
172 | *childregs = *current_pt_regs(); | 171 | *childregs = *current_pt_regs(); |
173 | 172 | ||
174 | childregs->ax = 0; | 173 | childregs->ax = 0; |
@@ -511,7 +510,7 @@ void set_personality_ia32(bool x32) | |||
511 | current->personality &= ~READ_IMPLIES_EXEC; | 510 | current->personality &= ~READ_IMPLIES_EXEC; |
512 | /* in_compat_syscall() uses the presence of the x32 | 511 | /* in_compat_syscall() uses the presence of the x32 |
513 | syscall bit flag to determine compat status */ | 512 | syscall bit flag to determine compat status */ |
514 | current_thread_info()->status &= ~TS_COMPAT; | 513 | current->thread.status &= ~TS_COMPAT; |
515 | } else { | 514 | } else { |
516 | set_thread_flag(TIF_IA32); | 515 | set_thread_flag(TIF_IA32); |
517 | clear_thread_flag(TIF_X32); | 516 | clear_thread_flag(TIF_X32); |
@@ -519,7 +518,7 @@ void set_personality_ia32(bool x32) | |||
519 | current->mm->context.ia32_compat = TIF_IA32; | 518 | current->mm->context.ia32_compat = TIF_IA32; |
520 | current->personality |= force_personality32; | 519 | current->personality |= force_personality32; |
521 | /* Prepare the first "return" to user space */ | 520 | /* Prepare the first "return" to user space */ |
522 | current_thread_info()->status |= TS_COMPAT; | 521 | current->thread.status |= TS_COMPAT; |
523 | } | 522 | } |
524 | } | 523 | } |
525 | EXPORT_SYMBOL_GPL(set_personality_ia32); | 524 | EXPORT_SYMBOL_GPL(set_personality_ia32); |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index f79576a541ff..ce94c38cf4d6 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -173,8 +173,8 @@ unsigned long kernel_stack_pointer(struct pt_regs *regs) | |||
173 | return sp; | 173 | return sp; |
174 | 174 | ||
175 | prev_esp = (u32 *)(context); | 175 | prev_esp = (u32 *)(context); |
176 | if (prev_esp) | 176 | if (*prev_esp) |
177 | return (unsigned long)prev_esp; | 177 | return (unsigned long)*prev_esp; |
178 | 178 | ||
179 | return (unsigned long)regs; | 179 | return (unsigned long)regs; |
180 | } | 180 | } |
@@ -934,7 +934,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) | |||
934 | */ | 934 | */ |
935 | regs->orig_ax = value; | 935 | regs->orig_ax = value; |
936 | if (syscall_get_nr(child, regs) >= 0) | 936 | if (syscall_get_nr(child, regs) >= 0) |
937 | task_thread_info(child)->status |= TS_I386_REGS_POKED; | 937 | child->thread.status |= TS_I386_REGS_POKED; |
938 | break; | 938 | break; |
939 | 939 | ||
940 | case offsetof(struct user32, regs.eflags): | 940 | case offsetof(struct user32, regs.eflags): |
@@ -1250,7 +1250,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, | |||
1250 | 1250 | ||
1251 | #ifdef CONFIG_X86_64 | 1251 | #ifdef CONFIG_X86_64 |
1252 | 1252 | ||
1253 | static struct user_regset x86_64_regsets[] __read_mostly = { | 1253 | static struct user_regset x86_64_regsets[] __ro_after_init = { |
1254 | [REGSET_GENERAL] = { | 1254 | [REGSET_GENERAL] = { |
1255 | .core_note_type = NT_PRSTATUS, | 1255 | .core_note_type = NT_PRSTATUS, |
1256 | .n = sizeof(struct user_regs_struct) / sizeof(long), | 1256 | .n = sizeof(struct user_regs_struct) / sizeof(long), |
@@ -1291,7 +1291,7 @@ static const struct user_regset_view user_x86_64_view = { | |||
1291 | #endif /* CONFIG_X86_64 */ | 1291 | #endif /* CONFIG_X86_64 */ |
1292 | 1292 | ||
1293 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | 1293 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION |
1294 | static struct user_regset x86_32_regsets[] __read_mostly = { | 1294 | static struct user_regset x86_32_regsets[] __ro_after_init = { |
1295 | [REGSET_GENERAL] = { | 1295 | [REGSET_GENERAL] = { |
1296 | .core_note_type = NT_PRSTATUS, | 1296 | .core_note_type = NT_PRSTATUS, |
1297 | .n = sizeof(struct user_regs_struct32) / sizeof(u32), | 1297 | .n = sizeof(struct user_regs_struct32) / sizeof(u32), |
@@ -1344,7 +1344,7 @@ static const struct user_regset_view user_x86_32_view = { | |||
1344 | */ | 1344 | */ |
1345 | u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; | 1345 | u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; |
1346 | 1346 | ||
1347 | void update_regset_xstate_info(unsigned int size, u64 xstate_mask) | 1347 | void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask) |
1348 | { | 1348 | { |
1349 | #ifdef CONFIG_X86_64 | 1349 | #ifdef CONFIG_X86_64 |
1350 | x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64); | 1350 | x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64); |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 63bf27d972b7..e244c19a2451 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -705,7 +705,7 @@ static void native_machine_power_off(void) | |||
705 | tboot_shutdown(TB_SHUTDOWN_HALT); | 705 | tboot_shutdown(TB_SHUTDOWN_HALT); |
706 | } | 706 | } |
707 | 707 | ||
708 | struct machine_ops machine_ops = { | 708 | struct machine_ops machine_ops __ro_after_init = { |
709 | .power_off = native_machine_power_off, | 709 | .power_off = native_machine_power_off, |
710 | .shutdown = native_machine_shutdown, | 710 | .shutdown = native_machine_shutdown, |
711 | .emergency_restart = native_machine_emergency_restart, | 711 | .emergency_restart = native_machine_emergency_restart, |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 2c4bc85dfe90..eeb094ea794a 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -210,9 +210,9 @@ EXPORT_SYMBOL(boot_cpu_data); | |||
210 | 210 | ||
211 | 211 | ||
212 | #if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) | 212 | #if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) |
213 | __visible unsigned long mmu_cr4_features; | 213 | __visible unsigned long mmu_cr4_features __ro_after_init; |
214 | #else | 214 | #else |
215 | __visible unsigned long mmu_cr4_features = X86_CR4_PAE; | 215 | __visible unsigned long mmu_cr4_features __ro_after_init = X86_CR4_PAE; |
216 | #endif | 216 | #endif |
217 | 217 | ||
218 | /* Boot loader ID and version as integers, for the benefit of proc_dointvec */ | 218 | /* Boot loader ID and version as integers, for the benefit of proc_dointvec */ |
@@ -1137,7 +1137,7 @@ void __init setup_arch(char **cmdline_p) | |||
1137 | * auditing all the early-boot CR4 manipulation would be needed to | 1137 | * auditing all the early-boot CR4 manipulation would be needed to |
1138 | * rule it out. | 1138 | * rule it out. |
1139 | */ | 1139 | */ |
1140 | mmu_cr4_features = __read_cr4_safe(); | 1140 | mmu_cr4_features = __read_cr4(); |
1141 | 1141 | ||
1142 | memblock_set_current_limit(get_max_mapped()); | 1142 | memblock_set_current_limit(get_max_mapped()); |
1143 | 1143 | ||
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 7a40e068302d..2bbd27f89802 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -33,7 +33,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number); | |||
33 | DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; | 33 | DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; |
34 | EXPORT_PER_CPU_SYMBOL(this_cpu_off); | 34 | EXPORT_PER_CPU_SYMBOL(this_cpu_off); |
35 | 35 | ||
36 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { | 36 | unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = { |
37 | [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, | 37 | [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, |
38 | }; | 38 | }; |
39 | EXPORT_SYMBOL(__per_cpu_offset); | 39 | EXPORT_SYMBOL(__per_cpu_offset); |
@@ -246,7 +246,7 @@ void __init setup_per_cpu_areas(void) | |||
246 | #ifdef CONFIG_X86_64 | 246 | #ifdef CONFIG_X86_64 |
247 | per_cpu(irq_stack_ptr, cpu) = | 247 | per_cpu(irq_stack_ptr, cpu) = |
248 | per_cpu(irq_stack_union.irq_stack, cpu) + | 248 | per_cpu(irq_stack_union.irq_stack, cpu) + |
249 | IRQ_STACK_SIZE - 64; | 249 | IRQ_STACK_SIZE; |
250 | #endif | 250 | #endif |
251 | #ifdef CONFIG_NUMA | 251 | #ifdef CONFIG_NUMA |
252 | per_cpu(x86_cpu_to_node_map, cpu) = | 252 | per_cpu(x86_cpu_to_node_map, cpu) = |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 04cb3212db2d..da20ecb5397a 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -783,7 +783,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) | |||
783 | * than the tracee. | 783 | * than the tracee. |
784 | */ | 784 | */ |
785 | #ifdef CONFIG_IA32_EMULATION | 785 | #ifdef CONFIG_IA32_EMULATION |
786 | if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) | 786 | if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) |
787 | return __NR_ia32_restart_syscall; | 787 | return __NR_ia32_restart_syscall; |
788 | #endif | 788 | #endif |
789 | #ifdef CONFIG_X86_X32_ABI | 789 | #ifdef CONFIG_X86_X32_ABI |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 54e2f1a968a4..7249dcf2cbcb 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -943,7 +943,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle) | |||
943 | per_cpu(cpu_current_top_of_stack, cpu) = | 943 | per_cpu(cpu_current_top_of_stack, cpu) = |
944 | (unsigned long)task_stack_page(idle) + THREAD_SIZE; | 944 | (unsigned long)task_stack_page(idle) + THREAD_SIZE; |
945 | #else | 945 | #else |
946 | clear_tsk_thread_flag(idle, TIF_FORK); | ||
947 | initial_gs = per_cpu_offset(cpu); | 946 | initial_gs = per_cpu_offset(cpu); |
948 | #endif | 947 | #endif |
949 | } | 948 | } |
@@ -970,7 +969,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
970 | 969 | ||
971 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); | 970 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); |
972 | initial_code = (unsigned long)start_secondary; | 971 | initial_code = (unsigned long)start_secondary; |
973 | stack_start = idle->thread.sp; | 972 | initial_stack = idle->thread.sp; |
974 | 973 | ||
975 | /* | 974 | /* |
976 | * Enable the espfix hack for this CPU | 975 | * Enable the espfix hack for this CPU |
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 4738f5e0f2ab..0653788026e2 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -8,80 +8,69 @@ | |||
8 | #include <linux/export.h> | 8 | #include <linux/export.h> |
9 | #include <linux/uaccess.h> | 9 | #include <linux/uaccess.h> |
10 | #include <asm/stacktrace.h> | 10 | #include <asm/stacktrace.h> |
11 | #include <asm/unwind.h> | ||
11 | 12 | ||
12 | static int save_stack_stack(void *data, char *name) | 13 | static int save_stack_address(struct stack_trace *trace, unsigned long addr, |
14 | bool nosched) | ||
13 | { | 15 | { |
14 | return 0; | ||
15 | } | ||
16 | |||
17 | static int | ||
18 | __save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched) | ||
19 | { | ||
20 | struct stack_trace *trace = data; | ||
21 | #ifdef CONFIG_FRAME_POINTER | ||
22 | if (!reliable) | ||
23 | return 0; | ||
24 | #endif | ||
25 | if (nosched && in_sched_functions(addr)) | 16 | if (nosched && in_sched_functions(addr)) |
26 | return 0; | 17 | return 0; |
18 | |||
27 | if (trace->skip > 0) { | 19 | if (trace->skip > 0) { |
28 | trace->skip--; | 20 | trace->skip--; |
29 | return 0; | 21 | return 0; |
30 | } | 22 | } |
31 | if (trace->nr_entries < trace->max_entries) { | ||
32 | trace->entries[trace->nr_entries++] = addr; | ||
33 | return 0; | ||
34 | } else { | ||
35 | return -1; /* no more room, stop walking the stack */ | ||
36 | } | ||
37 | } | ||
38 | 23 | ||
39 | static int save_stack_address(void *data, unsigned long addr, int reliable) | 24 | if (trace->nr_entries >= trace->max_entries) |
40 | { | 25 | return -1; |
41 | return __save_stack_address(data, addr, reliable, false); | 26 | |
27 | trace->entries[trace->nr_entries++] = addr; | ||
28 | return 0; | ||
42 | } | 29 | } |
43 | 30 | ||
44 | static int | 31 | static void __save_stack_trace(struct stack_trace *trace, |
45 | save_stack_address_nosched(void *data, unsigned long addr, int reliable) | 32 | struct task_struct *task, struct pt_regs *regs, |
33 | bool nosched) | ||
46 | { | 34 | { |
47 | return __save_stack_address(data, addr, reliable, true); | 35 | struct unwind_state state; |
48 | } | 36 | unsigned long addr; |
49 | 37 | ||
50 | static const struct stacktrace_ops save_stack_ops = { | 38 | if (regs) |
51 | .stack = save_stack_stack, | 39 | save_stack_address(trace, regs->ip, nosched); |
52 | .address = save_stack_address, | ||
53 | .walk_stack = print_context_stack, | ||
54 | }; | ||
55 | 40 | ||
56 | static const struct stacktrace_ops save_stack_ops_nosched = { | 41 | for (unwind_start(&state, task, regs, NULL); !unwind_done(&state); |
57 | .stack = save_stack_stack, | 42 | unwind_next_frame(&state)) { |
58 | .address = save_stack_address_nosched, | 43 | addr = unwind_get_return_address(&state); |
59 | .walk_stack = print_context_stack, | 44 | if (!addr || save_stack_address(trace, addr, nosched)) |
60 | }; | 45 | break; |
46 | } | ||
47 | |||
48 | if (trace->nr_entries < trace->max_entries) | ||
49 | trace->entries[trace->nr_entries++] = ULONG_MAX; | ||
50 | } | ||
61 | 51 | ||
62 | /* | 52 | /* |
63 | * Save stack-backtrace addresses into a stack_trace buffer. | 53 | * Save stack-backtrace addresses into a stack_trace buffer. |
64 | */ | 54 | */ |
65 | void save_stack_trace(struct stack_trace *trace) | 55 | void save_stack_trace(struct stack_trace *trace) |
66 | { | 56 | { |
67 | dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); | 57 | __save_stack_trace(trace, current, NULL, false); |
68 | if (trace->nr_entries < trace->max_entries) | ||
69 | trace->entries[trace->nr_entries++] = ULONG_MAX; | ||
70 | } | 58 | } |
71 | EXPORT_SYMBOL_GPL(save_stack_trace); | 59 | EXPORT_SYMBOL_GPL(save_stack_trace); |
72 | 60 | ||
73 | void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) | 61 | void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) |
74 | { | 62 | { |
75 | dump_trace(current, regs, NULL, 0, &save_stack_ops, trace); | 63 | __save_stack_trace(trace, current, regs, false); |
76 | if (trace->nr_entries < trace->max_entries) | ||
77 | trace->entries[trace->nr_entries++] = ULONG_MAX; | ||
78 | } | 64 | } |
79 | 65 | ||
80 | void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) | 66 | void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) |
81 | { | 67 | { |
82 | dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); | 68 | if (!try_get_task_stack(tsk)) |
83 | if (trace->nr_entries < trace->max_entries) | 69 | return; |
84 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 70 | |
71 | __save_stack_trace(trace, tsk, NULL, true); | ||
72 | |||
73 | put_task_stack(tsk); | ||
85 | } | 74 | } |
86 | EXPORT_SYMBOL_GPL(save_stack_trace_tsk); | 75 | EXPORT_SYMBOL_GPL(save_stack_trace_tsk); |
87 | 76 | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b70ca12dd389..bd4e3d4d3625 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -292,12 +292,30 @@ DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) | |||
292 | DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) | 292 | DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) |
293 | DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) | 293 | DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) |
294 | 294 | ||
295 | #ifdef CONFIG_VMAP_STACK | ||
296 | __visible void __noreturn handle_stack_overflow(const char *message, | ||
297 | struct pt_regs *regs, | ||
298 | unsigned long fault_address) | ||
299 | { | ||
300 | printk(KERN_EMERG "BUG: stack guard page was hit at %p (stack is %p..%p)\n", | ||
301 | (void *)fault_address, current->stack, | ||
302 | (char *)current->stack + THREAD_SIZE - 1); | ||
303 | die(message, regs, 0); | ||
304 | |||
305 | /* Be absolutely certain we don't return. */ | ||
306 | panic(message); | ||
307 | } | ||
308 | #endif | ||
309 | |||
295 | #ifdef CONFIG_X86_64 | 310 | #ifdef CONFIG_X86_64 |
296 | /* Runs on IST stack */ | 311 | /* Runs on IST stack */ |
297 | dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | 312 | dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) |
298 | { | 313 | { |
299 | static const char str[] = "double fault"; | 314 | static const char str[] = "double fault"; |
300 | struct task_struct *tsk = current; | 315 | struct task_struct *tsk = current; |
316 | #ifdef CONFIG_VMAP_STACK | ||
317 | unsigned long cr2; | ||
318 | #endif | ||
301 | 319 | ||
302 | #ifdef CONFIG_X86_ESPFIX64 | 320 | #ifdef CONFIG_X86_ESPFIX64 |
303 | extern unsigned char native_irq_return_iret[]; | 321 | extern unsigned char native_irq_return_iret[]; |
@@ -332,6 +350,49 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | |||
332 | tsk->thread.error_code = error_code; | 350 | tsk->thread.error_code = error_code; |
333 | tsk->thread.trap_nr = X86_TRAP_DF; | 351 | tsk->thread.trap_nr = X86_TRAP_DF; |
334 | 352 | ||
353 | #ifdef CONFIG_VMAP_STACK | ||
354 | /* | ||
355 | * If we overflow the stack into a guard page, the CPU will fail | ||
356 | * to deliver #PF and will send #DF instead. Similarly, if we | ||
357 | * take any non-IST exception while too close to the bottom of | ||
358 | * the stack, the processor will get a page fault while | ||
359 | * delivering the exception and will generate a double fault. | ||
360 | * | ||
361 | * According to the SDM (footnote in 6.15 under "Interrupt 14 - | ||
362 | * Page-Fault Exception (#PF): | ||
363 | * | ||
364 | * Processors update CR2 whenever a page fault is detected. If a | ||
365 | * second page fault occurs while an earlier page fault is being | ||
366 | * deliv- ered, the faulting linear address of the second fault will | ||
367 | * overwrite the contents of CR2 (replacing the previous | ||
368 | * address). These updates to CR2 occur even if the page fault | ||
369 | * results in a double fault or occurs during the delivery of a | ||
370 | * double fault. | ||
371 | * | ||
372 | * The logic below has a small possibility of incorrectly diagnosing | ||
373 | * some errors as stack overflows. For example, if the IDT or GDT | ||
374 | * gets corrupted such that #GP delivery fails due to a bad descriptor | ||
375 | * causing #GP and we hit this condition while CR2 coincidentally | ||
376 | * points to the stack guard page, we'll think we overflowed the | ||
377 | * stack. Given that we're going to panic one way or another | ||
378 | * if this happens, this isn't necessarily worth fixing. | ||
379 | * | ||
380 | * If necessary, we could improve the test by only diagnosing | ||
381 | * a stack overflow if the saved RSP points within 47 bytes of | ||
382 | * the bottom of the stack: if RSP == tsk_stack + 48 and we | ||
383 | * take an exception, the stack is already aligned and there | ||
384 | * will be enough room SS, RSP, RFLAGS, CS, RIP, and a | ||
385 | * possible error code, so a stack overflow would *not* double | ||
386 | * fault. With any less space left, exception delivery could | ||
387 | * fail, and, as a practical matter, we've overflowed the | ||
388 | * stack even if the actual trigger for the double fault was | ||
389 | * something else. | ||
390 | */ | ||
391 | cr2 = read_cr2(); | ||
392 | if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE) | ||
393 | handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2); | ||
394 | #endif | ||
395 | |||
335 | #ifdef CONFIG_DOUBLEFAULT | 396 | #ifdef CONFIG_DOUBLEFAULT |
336 | df_debug(regs, error_code); | 397 | df_debug(regs, error_code); |
337 | #endif | 398 | #endif |
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c new file mode 100644 index 000000000000..a2456d4d286a --- /dev/null +++ b/arch/x86/kernel/unwind_frame.c | |||
@@ -0,0 +1,93 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <asm/ptrace.h> | ||
3 | #include <asm/bitops.h> | ||
4 | #include <asm/stacktrace.h> | ||
5 | #include <asm/unwind.h> | ||
6 | |||
7 | #define FRAME_HEADER_SIZE (sizeof(long) * 2) | ||
8 | |||
9 | unsigned long unwind_get_return_address(struct unwind_state *state) | ||
10 | { | ||
11 | unsigned long addr; | ||
12 | unsigned long *addr_p = unwind_get_return_address_ptr(state); | ||
13 | |||
14 | if (unwind_done(state)) | ||
15 | return 0; | ||
16 | |||
17 | addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p, | ||
18 | addr_p); | ||
19 | |||
20 | return __kernel_text_address(addr) ? addr : 0; | ||
21 | } | ||
22 | EXPORT_SYMBOL_GPL(unwind_get_return_address); | ||
23 | |||
24 | static bool update_stack_state(struct unwind_state *state, void *addr, | ||
25 | size_t len) | ||
26 | { | ||
27 | struct stack_info *info = &state->stack_info; | ||
28 | |||
29 | /* | ||
30 | * If addr isn't on the current stack, switch to the next one. | ||
31 | * | ||
32 | * We may have to traverse multiple stacks to deal with the possibility | ||
33 | * that 'info->next_sp' could point to an empty stack and 'addr' could | ||
34 | * be on a subsequent stack. | ||
35 | */ | ||
36 | while (!on_stack(info, addr, len)) | ||
37 | if (get_stack_info(info->next_sp, state->task, info, | ||
38 | &state->stack_mask)) | ||
39 | return false; | ||
40 | |||
41 | return true; | ||
42 | } | ||
43 | |||
44 | bool unwind_next_frame(struct unwind_state *state) | ||
45 | { | ||
46 | unsigned long *next_bp; | ||
47 | |||
48 | if (unwind_done(state)) | ||
49 | return false; | ||
50 | |||
51 | next_bp = (unsigned long *)*state->bp; | ||
52 | |||
53 | /* make sure the next frame's data is accessible */ | ||
54 | if (!update_stack_state(state, next_bp, FRAME_HEADER_SIZE)) | ||
55 | return false; | ||
56 | |||
57 | /* move to the next frame */ | ||
58 | state->bp = next_bp; | ||
59 | return true; | ||
60 | } | ||
61 | EXPORT_SYMBOL_GPL(unwind_next_frame); | ||
62 | |||
63 | void __unwind_start(struct unwind_state *state, struct task_struct *task, | ||
64 | struct pt_regs *regs, unsigned long *first_frame) | ||
65 | { | ||
66 | memset(state, 0, sizeof(*state)); | ||
67 | state->task = task; | ||
68 | |||
69 | /* don't even attempt to start from user mode regs */ | ||
70 | if (regs && user_mode(regs)) { | ||
71 | state->stack_info.type = STACK_TYPE_UNKNOWN; | ||
72 | return; | ||
73 | } | ||
74 | |||
75 | /* set up the starting stack frame */ | ||
76 | state->bp = get_frame_pointer(task, regs); | ||
77 | |||
78 | /* initialize stack info and make sure the frame data is accessible */ | ||
79 | get_stack_info(state->bp, state->task, &state->stack_info, | ||
80 | &state->stack_mask); | ||
81 | update_stack_state(state, state->bp, FRAME_HEADER_SIZE); | ||
82 | |||
83 | /* | ||
84 | * The caller can provide the address of the first frame directly | ||
85 | * (first_frame) or indirectly (regs->sp) to indicate which stack frame | ||
86 | * to start unwinding at. Skip ahead until we reach it. | ||
87 | */ | ||
88 | while (!unwind_done(state) && | ||
89 | (!on_stack(&state->stack_info, first_frame, sizeof(long)) || | ||
90 | state->bp < first_frame)) | ||
91 | unwind_next_frame(state); | ||
92 | } | ||
93 | EXPORT_SYMBOL_GPL(__unwind_start); | ||
diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c new file mode 100644 index 000000000000..b5a834c93065 --- /dev/null +++ b/arch/x86/kernel/unwind_guess.c | |||
@@ -0,0 +1,43 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/ftrace.h> | ||
3 | #include <asm/ptrace.h> | ||
4 | #include <asm/bitops.h> | ||
5 | #include <asm/stacktrace.h> | ||
6 | #include <asm/unwind.h> | ||
7 | |||
8 | bool unwind_next_frame(struct unwind_state *state) | ||
9 | { | ||
10 | struct stack_info *info = &state->stack_info; | ||
11 | |||
12 | if (unwind_done(state)) | ||
13 | return false; | ||
14 | |||
15 | do { | ||
16 | for (state->sp++; state->sp < info->end; state->sp++) | ||
17 | if (__kernel_text_address(*state->sp)) | ||
18 | return true; | ||
19 | |||
20 | state->sp = info->next_sp; | ||
21 | |||
22 | } while (!get_stack_info(state->sp, state->task, info, | ||
23 | &state->stack_mask)); | ||
24 | |||
25 | return false; | ||
26 | } | ||
27 | EXPORT_SYMBOL_GPL(unwind_next_frame); | ||
28 | |||
29 | void __unwind_start(struct unwind_state *state, struct task_struct *task, | ||
30 | struct pt_regs *regs, unsigned long *first_frame) | ||
31 | { | ||
32 | memset(state, 0, sizeof(*state)); | ||
33 | |||
34 | state->task = task; | ||
35 | state->sp = first_frame; | ||
36 | |||
37 | get_stack_info(first_frame, state->task, &state->stack_info, | ||
38 | &state->stack_mask); | ||
39 | |||
40 | if (!__kernel_text_address(*first_frame)) | ||
41 | unwind_next_frame(state); | ||
42 | } | ||
43 | EXPORT_SYMBOL_GPL(__unwind_start); | ||
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 76c5e52436c4..0bd9f1287f39 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -91,7 +91,7 @@ struct x86_cpuinit_ops x86_cpuinit = { | |||
91 | static void default_nmi_init(void) { }; | 91 | static void default_nmi_init(void) { }; |
92 | static int default_i8042_detect(void) { return 1; }; | 92 | static int default_i8042_detect(void) { return 1; }; |
93 | 93 | ||
94 | struct x86_platform_ops x86_platform = { | 94 | struct x86_platform_ops x86_platform __ro_after_init = { |
95 | .calibrate_cpu = native_calibrate_cpu, | 95 | .calibrate_cpu = native_calibrate_cpu, |
96 | .calibrate_tsc = native_calibrate_tsc, | 96 | .calibrate_tsc = native_calibrate_tsc, |
97 | .get_wallclock = mach_get_cmos_time, | 97 | .get_wallclock = mach_get_cmos_time, |
@@ -108,7 +108,7 @@ struct x86_platform_ops x86_platform = { | |||
108 | EXPORT_SYMBOL_GPL(x86_platform); | 108 | EXPORT_SYMBOL_GPL(x86_platform); |
109 | 109 | ||
110 | #if defined(CONFIG_PCI_MSI) | 110 | #if defined(CONFIG_PCI_MSI) |
111 | struct x86_msi_ops x86_msi = { | 111 | struct x86_msi_ops x86_msi __ro_after_init = { |
112 | .setup_msi_irqs = native_setup_msi_irqs, | 112 | .setup_msi_irqs = native_setup_msi_irqs, |
113 | .teardown_msi_irq = native_teardown_msi_irq, | 113 | .teardown_msi_irq = native_teardown_msi_irq, |
114 | .teardown_msi_irqs = default_teardown_msi_irqs, | 114 | .teardown_msi_irqs = default_teardown_msi_irqs, |
@@ -137,7 +137,7 @@ void arch_restore_msi_irqs(struct pci_dev *dev) | |||
137 | } | 137 | } |
138 | #endif | 138 | #endif |
139 | 139 | ||
140 | struct x86_io_apic_ops x86_io_apic_ops = { | 140 | struct x86_io_apic_ops x86_io_apic_ops __ro_after_init = { |
141 | .read = native_io_apic_read, | 141 | .read = native_io_apic_read, |
142 | .disable = native_disable_io_apic, | 142 | .disable = native_disable_io_apic, |
143 | }; | 143 | }; |