aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-10-03 19:13:28 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-10-03 19:13:28 -0400
commit1a4a2bc460721bc8f91e4c1294d39b38e5af132f (patch)
treefe646d05f6e17f05601e0a32cc796bec718ab6e7 /arch/x86/kernel
parent110a9e42b68719f584879c5c5c727bbae90d15f9 (diff)
parent1ef55be16ed69538f89e0a6508be5e62fdc9851c (diff)
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull low-level x86 updates from Ingo Molnar: "In this cycle this topic tree has become one of those 'super topics' that accumulated a lot of changes: - Add CONFIG_VMAP_STACK=y support to the core kernel and enable it on x86 - preceded by an array of changes. v4.8 saw preparatory changes in this area already - this is the rest of the work. Includes the thread stack caching performance optimization. (Andy Lutomirski) - switch_to() cleanups and all around enhancements. (Brian Gerst) - A large number of dumpstack infrastructure enhancements and an unwinder abstraction. The secret long term plan is safe(r) live patching plus maybe another attempt at debuginfo based unwinding - but all these current bits are standalone enhancements in a frame pointer based debug environment as well. (Josh Poimboeuf) - More __ro_after_init and const annotations. (Kees Cook) - Enable KASLR for the vmemmap memory region. (Thomas Garnier)" [ The virtually mapped stack changes are pretty fundamental, and not x86-specific per se, even if they are only used on x86 right now. ] * 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (70 commits) x86/asm: Get rid of __read_cr4_safe() thread_info: Use unsigned long for flags x86/alternatives: Add stack frame dependency to alternative_call_2() x86/dumpstack: Fix show_stack() task pointer regression x86/dumpstack: Remove dump_trace() and related callbacks x86/dumpstack: Convert show_trace_log_lvl() to use the new unwinder oprofile/x86: Convert x86_backtrace() to use the new unwinder x86/stacktrace: Convert save_stack_trace_*() to use the new unwinder perf/x86: Convert perf_callchain_kernel() to use the new unwinder x86/unwind: Add new unwind interface and implementations x86/dumpstack: Remove NULL task pointer convention fork: Optimize task creation by caching two thread stacks per CPU if CONFIG_VMAP_STACK=y sched/core: Free the stack early if CONFIG_THREAD_INFO_IN_TASK lib/syscall: Pin the task stack in collect_syscall() x86/process: Pin the target stack in get_wchan() x86/dumpstack: Pin the target stack when dumping it kthread: Pin the stack via try_get_task_stack()/put_task_stack() in to_live_kthread() function sched/core: Add try_get_task_stack() and put_task_stack() x86/entry/64: Fix a minor comment rebase error iommu/amd: Don't put completion-wait semaphore on stack ...
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile6
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c6
-rw-r--r--arch/x86/kernel/apic/apic_noop.c2
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c2
-rw-r--r--arch/x86/kernel/apic/msi.c2
-rw-r--r--arch/x86/kernel/apic/probe_32.c4
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c2
-rw-r--r--arch/x86/kernel/asm-offsets.c7
-rw-r--r--arch/x86/kernel/asm-offsets_32.c5
-rw-r--r--arch/x86/kernel/asm-offsets_64.c5
-rw-r--r--arch/x86/kernel/cpu/common.c18
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c4
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h2
-rw-r--r--arch/x86/kernel/dumpstack.c258
-rw-r--r--arch/x86/kernel/dumpstack_32.c154
-rw-r--r--arch/x86/kernel/dumpstack_64.c318
-rw-r--r--arch/x86/kernel/fpu/init.c1
-rw-r--r--arch/x86/kernel/ftrace.c2
-rw-r--r--arch/x86/kernel/head_32.S8
-rw-r--r--arch/x86/kernel/head_64.S12
-rw-r--r--arch/x86/kernel/irq_64.c3
-rw-r--r--arch/x86/kernel/kgdb.c8
-rw-r--r--arch/x86/kernel/ksysfs.c2
-rw-r--r--arch/x86/kernel/kvmclock.c2
-rw-r--r--arch/x86/kernel/paravirt.c3
-rw-r--r--arch/x86/kernel/process.c42
-rw-r--r--arch/x86/kernel/process_32.c33
-rw-r--r--arch/x86/kernel/process_64.c25
-rw-r--r--arch/x86/kernel/ptrace.c12
-rw-r--r--arch/x86/kernel/reboot.c2
-rw-r--r--arch/x86/kernel/setup.c6
-rw-r--r--arch/x86/kernel/setup_percpu.c4
-rw-r--r--arch/x86/kernel/signal.c2
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--arch/x86/kernel/stacktrace.c79
-rw-r--r--arch/x86/kernel/traps.c61
-rw-r--r--arch/x86/kernel/unwind_frame.c93
-rw-r--r--arch/x86/kernel/unwind_guess.c43
-rw-r--r--arch/x86/kernel/x86_init.c6
42 files changed, 664 insertions, 589 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0503f5bfb18d..45257cf84370 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -125,6 +125,12 @@ obj-$(CONFIG_EFI) += sysfb_efi.o
125obj-$(CONFIG_PERF_EVENTS) += perf_regs.o 125obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
126obj-$(CONFIG_TRACING) += tracepoint.o 126obj-$(CONFIG_TRACING) += tracepoint.o
127 127
128ifdef CONFIG_FRAME_POINTER
129obj-y += unwind_frame.o
130else
131obj-y += unwind_guess.o
132endif
133
128### 134###
129# 64 bit specific files 135# 64 bit specific files
130ifeq ($(CONFIG_X86_64),y) 136ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index adb3eaf8fe2a..48587335ede8 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -99,7 +99,7 @@ int x86_acpi_suspend_lowlevel(void)
99 saved_magic = 0x12345678; 99 saved_magic = 0x12345678;
100#else /* CONFIG_64BIT */ 100#else /* CONFIG_64BIT */
101#ifdef CONFIG_SMP 101#ifdef CONFIG_SMP
102 stack_start = (unsigned long)temp_stack + sizeof(temp_stack); 102 initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
103 early_gdt_descr.address = 103 early_gdt_descr.address =
104 (unsigned long)get_cpu_gdt_table(smp_processor_id()); 104 (unsigned long)get_cpu_gdt_table(smp_processor_id());
105 initial_gs = per_cpu_offset(smp_processor_id()); 105 initial_gs = per_cpu_offset(smp_processor_id());
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 5b2ae106bd4a..8862da76ef6f 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -25,7 +25,7 @@
25static struct apic apic_physflat; 25static struct apic apic_physflat;
26static struct apic apic_flat; 26static struct apic apic_flat;
27 27
28struct apic __read_mostly *apic = &apic_flat; 28struct apic *apic __ro_after_init = &apic_flat;
29EXPORT_SYMBOL_GPL(apic); 29EXPORT_SYMBOL_GPL(apic);
30 30
31static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 31static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
@@ -154,7 +154,7 @@ static int flat_probe(void)
154 return 1; 154 return 1;
155} 155}
156 156
157static struct apic apic_flat = { 157static struct apic apic_flat __ro_after_init = {
158 .name = "flat", 158 .name = "flat",
159 .probe = flat_probe, 159 .probe = flat_probe,
160 .acpi_madt_oem_check = flat_acpi_madt_oem_check, 160 .acpi_madt_oem_check = flat_acpi_madt_oem_check,
@@ -248,7 +248,7 @@ static int physflat_probe(void)
248 return 0; 248 return 0;
249} 249}
250 250
251static struct apic apic_physflat = { 251static struct apic apic_physflat __ro_after_init = {
252 252
253 .name = "physical flat", 253 .name = "physical flat",
254 .probe = physflat_probe, 254 .probe = physflat_probe,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index c05688b2deff..b109e4389c92 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -108,7 +108,7 @@ static void noop_apic_write(u32 reg, u32 v)
108 WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); 108 WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
109} 109}
110 110
111struct apic apic_noop = { 111struct apic apic_noop __ro_after_init = {
112 .name = "noop", 112 .name = "noop",
113 .probe = noop_probe, 113 .probe = noop_probe,
114 .acpi_madt_oem_check = NULL, 114 .acpi_madt_oem_check = NULL,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 06dbaa458bfe..56012010332c 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -142,7 +142,7 @@ static int probe_bigsmp(void)
142 return dmi_bigsmp; 142 return dmi_bigsmp;
143} 143}
144 144
145static struct apic apic_bigsmp = { 145static struct apic apic_bigsmp __ro_after_init = {
146 146
147 .name = "bigsmp", 147 .name = "bigsmp",
148 .probe = probe_bigsmp, 148 .probe = probe_bigsmp,
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index ade25320df96..015bbf30e3e3 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -269,7 +269,7 @@ static void hpet_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
269 hpet_msi_write(irq_data_get_irq_handler_data(data), msg); 269 hpet_msi_write(irq_data_get_irq_handler_data(data), msg);
270} 270}
271 271
272static struct irq_chip hpet_msi_controller = { 272static struct irq_chip hpet_msi_controller __ro_after_init = {
273 .name = "HPET-MSI", 273 .name = "HPET-MSI",
274 .irq_unmask = hpet_msi_unmask, 274 .irq_unmask = hpet_msi_unmask,
275 .irq_mask = hpet_msi_mask, 275 .irq_mask = hpet_msi_mask,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 563096267ca2..c48264e202fd 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -72,7 +72,7 @@ static int probe_default(void)
72 return 1; 72 return 1;
73} 73}
74 74
75static struct apic apic_default = { 75static struct apic apic_default __ro_after_init = {
76 76
77 .name = "default", 77 .name = "default",
78 .probe = probe_default, 78 .probe = probe_default,
@@ -126,7 +126,7 @@ static struct apic apic_default = {
126 126
127apic_driver(apic_default); 127apic_driver(apic_default);
128 128
129struct apic *apic = &apic_default; 129struct apic *apic __ro_after_init = &apic_default;
130EXPORT_SYMBOL_GPL(apic); 130EXPORT_SYMBOL_GPL(apic);
131 131
132static int cmdline_apic __initdata; 132static int cmdline_apic __initdata;
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 54f35d988025..200af5ae9662 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -227,7 +227,7 @@ static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask,
227 cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu)); 227 cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu));
228} 228}
229 229
230static struct apic apic_x2apic_cluster = { 230static struct apic apic_x2apic_cluster __ro_after_init = {
231 231
232 .name = "cluster x2apic", 232 .name = "cluster x2apic",
233 .probe = x2apic_cluster_probe, 233 .probe = x2apic_cluster_probe,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 4f13f54f1b1f..ff111f05a314 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -98,7 +98,7 @@ static int x2apic_phys_probe(void)
98 return apic == &apic_x2apic_phys; 98 return apic == &apic_x2apic_phys;
99} 99}
100 100
101static struct apic apic_x2apic_phys = { 101static struct apic apic_x2apic_phys __ro_after_init = {
102 102
103 .name = "physical x2apic", 103 .name = "physical x2apic",
104 .probe = x2apic_phys_probe, 104 .probe = x2apic_phys_probe,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index cb0673c1e940..b9f6157d4271 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -560,7 +560,7 @@ static int uv_probe(void)
560 return apic == &apic_x2apic_uv_x; 560 return apic == &apic_x2apic_uv_x;
561} 561}
562 562
563static struct apic __refdata apic_x2apic_uv_x = { 563static struct apic apic_x2apic_uv_x __ro_after_init = {
564 564
565 .name = "UV large system", 565 .name = "UV large system",
566 .probe = uv_probe, 566 .probe = uv_probe,
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 2bd5c6ff7ee7..c62e015b126c 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -29,10 +29,13 @@
29 29
30void common(void) { 30void common(void) {
31 BLANK(); 31 BLANK();
32 OFFSET(TI_flags, thread_info, flags); 32 OFFSET(TASK_threadsp, task_struct, thread.sp);
33 OFFSET(TI_status, thread_info, status); 33#ifdef CONFIG_CC_STACKPROTECTOR
34 OFFSET(TASK_stack_canary, task_struct, stack_canary);
35#endif
34 36
35 BLANK(); 37 BLANK();
38 OFFSET(TASK_TI_flags, task_struct, thread_info.flags);
36 OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); 39 OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
37 40
38 BLANK(); 41 BLANK();
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index ecdc1d217dc0..880aa093268d 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -57,6 +57,11 @@ void foo(void)
57 /* Size of SYSENTER_stack */ 57 /* Size of SYSENTER_stack */
58 DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); 58 DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
59 59
60#ifdef CONFIG_CC_STACKPROTECTOR
61 BLANK();
62 OFFSET(stack_canary_offset, stack_canary, canary);
63#endif
64
60#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) 65#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
61 BLANK(); 66 BLANK();
62 OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); 67 OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index d875f97d4e0b..210927ee2e74 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -56,6 +56,11 @@ int main(void)
56 OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); 56 OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
57 BLANK(); 57 BLANK();
58 58
59#ifdef CONFIG_CC_STACKPROTECTOR
60 DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary));
61 BLANK();
62#endif
63
59 DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); 64 DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
60 DEFINE(NR_syscalls, sizeof(syscalls_64)); 65 DEFINE(NR_syscalls, sizeof(syscalls_64));
61 66
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index bcc9ccc220c9..9bd910a7dd0a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1264,9 +1264,14 @@ static __init int setup_disablecpuid(char *arg)
1264__setup("clearcpuid=", setup_disablecpuid); 1264__setup("clearcpuid=", setup_disablecpuid);
1265 1265
1266#ifdef CONFIG_X86_64 1266#ifdef CONFIG_X86_64
1267struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; 1267struct desc_ptr idt_descr __ro_after_init = {
1268struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1, 1268 .size = NR_VECTORS * 16 - 1,
1269 (unsigned long) debug_idt_table }; 1269 .address = (unsigned long) idt_table,
1270};
1271const struct desc_ptr debug_idt_descr = {
1272 .size = NR_VECTORS * 16 - 1,
1273 .address = (unsigned long) debug_idt_table,
1274};
1270 1275
1271DEFINE_PER_CPU_FIRST(union irq_stack_union, 1276DEFINE_PER_CPU_FIRST(union irq_stack_union,
1272 irq_stack_union) __aligned(PAGE_SIZE) __visible; 1277 irq_stack_union) __aligned(PAGE_SIZE) __visible;
@@ -1280,7 +1285,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
1280EXPORT_PER_CPU_SYMBOL(current_task); 1285EXPORT_PER_CPU_SYMBOL(current_task);
1281 1286
1282DEFINE_PER_CPU(char *, irq_stack_ptr) = 1287DEFINE_PER_CPU(char *, irq_stack_ptr) =
1283 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; 1288 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE;
1284 1289
1285DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; 1290DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
1286 1291
@@ -1304,11 +1309,6 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
1304/* May not be marked __init: used by software suspend */ 1309/* May not be marked __init: used by software suspend */
1305void syscall_init(void) 1310void syscall_init(void)
1306{ 1311{
1307 /*
1308 * LSTAR and STAR live in a bit strange symbiosis.
1309 * They both write to the same internal register. STAR allows to
1310 * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
1311 */
1312 wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); 1312 wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
1313 wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); 1313 wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
1314 1314
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 28f1b54b7fad..24e87e74990d 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -72,14 +72,14 @@ static DEFINE_MUTEX(mtrr_mutex);
72u64 size_or_mask, size_and_mask; 72u64 size_or_mask, size_and_mask;
73static bool mtrr_aps_delayed_init; 73static bool mtrr_aps_delayed_init;
74 74
75static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; 75static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM] __ro_after_init;
76 76
77const struct mtrr_ops *mtrr_if; 77const struct mtrr_ops *mtrr_if;
78 78
79static void set_mtrr(unsigned int reg, unsigned long base, 79static void set_mtrr(unsigned int reg, unsigned long base,
80 unsigned long size, mtrr_type type); 80 unsigned long size, mtrr_type type);
81 81
82void set_mtrr_ops(const struct mtrr_ops *ops) 82void __init set_mtrr_ops(const struct mtrr_ops *ops)
83{ 83{
84 if (ops->vendor && ops->vendor < X86_VENDOR_NUM) 84 if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
85 mtrr_ops[ops->vendor] = ops; 85 mtrr_ops[ops->vendor] = ops;
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 6c7ced07d16d..ad8bd763efa5 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -54,7 +54,7 @@ void fill_mtrr_var_range(unsigned int index,
54bool get_mtrr_state(void); 54bool get_mtrr_state(void);
55void mtrr_bp_pat_init(void); 55void mtrr_bp_pat_init(void);
56 56
57extern void set_mtrr_ops(const struct mtrr_ops *ops); 57extern void __init set_mtrr_ops(const struct mtrr_ops *ops);
58 58
59extern u64 size_or_mask, size_and_mask; 59extern u64 size_or_mask, size_and_mask;
60extern const struct mtrr_ops *mtrr_if; 60extern const struct mtrr_ops *mtrr_if;
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 92e8f0a7159c..9b7cf5c28f5f 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -17,7 +17,7 @@
17#include <linux/sysfs.h> 17#include <linux/sysfs.h>
18 18
19#include <asm/stacktrace.h> 19#include <asm/stacktrace.h>
20 20#include <asm/unwind.h>
21 21
22int panic_on_unrecovered_nmi; 22int panic_on_unrecovered_nmi;
23int panic_on_io_nmi; 23int panic_on_io_nmi;
@@ -25,11 +25,29 @@ unsigned int code_bytes = 64;
25int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; 25int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
26static int die_counter; 26static int die_counter;
27 27
28bool in_task_stack(unsigned long *stack, struct task_struct *task,
29 struct stack_info *info)
30{
31 unsigned long *begin = task_stack_page(task);
32 unsigned long *end = task_stack_page(task) + THREAD_SIZE;
33
34 if (stack < begin || stack >= end)
35 return false;
36
37 info->type = STACK_TYPE_TASK;
38 info->begin = begin;
39 info->end = end;
40 info->next_sp = NULL;
41
42 return true;
43}
44
28static void printk_stack_address(unsigned long address, int reliable, 45static void printk_stack_address(unsigned long address, int reliable,
29 void *data) 46 char *log_lvl)
30{ 47{
48 touch_nmi_watchdog();
31 printk("%s [<%p>] %s%pB\n", 49 printk("%s [<%p>] %s%pB\n",
32 (char *)data, (void *)address, reliable ? "" : "? ", 50 log_lvl, (void *)address, reliable ? "" : "? ",
33 (void *)address); 51 (void *)address);
34} 52}
35 53
@@ -38,176 +56,120 @@ void printk_address(unsigned long address)
38 pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address); 56 pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address);
39} 57}
40 58
41#ifdef CONFIG_FUNCTION_GRAPH_TRACER 59void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
42static void 60 unsigned long *stack, char *log_lvl)
43print_ftrace_graph_addr(unsigned long addr, void *data,
44 const struct stacktrace_ops *ops,
45 struct task_struct *task, int *graph)
46{ 61{
47 unsigned long ret_addr; 62 struct unwind_state state;
48 int index; 63 struct stack_info stack_info = {0};
49 64 unsigned long visit_mask = 0;
50 if (addr != (unsigned long)return_to_handler) 65 int graph_idx = 0;
51 return;
52
53 index = task->curr_ret_stack;
54
55 if (!task->ret_stack || index < *graph)
56 return;
57
58 index -= *graph;
59 ret_addr = task->ret_stack[index].ret;
60
61 ops->address(data, ret_addr, 1);
62 66
63 (*graph)++; 67 printk("%sCall Trace:\n", log_lvl);
64}
65#else
66static inline void
67print_ftrace_graph_addr(unsigned long addr, void *data,
68 const struct stacktrace_ops *ops,
69 struct task_struct *task, int *graph)
70{ }
71#endif
72
73/*
74 * x86-64 can have up to three kernel stacks:
75 * process stack
76 * interrupt stack
77 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
78 */
79
80static inline int valid_stack_ptr(struct task_struct *task,
81 void *p, unsigned int size, void *end)
82{
83 void *t = task_stack_page(task);
84 if (end) {
85 if (p < end && p >= (end-THREAD_SIZE))
86 return 1;
87 else
88 return 0;
89 }
90 return p >= t && p < t + THREAD_SIZE - size;
91}
92 68
93unsigned long 69 unwind_start(&state, task, regs, stack);
94print_context_stack(struct task_struct *task,
95 unsigned long *stack, unsigned long bp,
96 const struct stacktrace_ops *ops, void *data,
97 unsigned long *end, int *graph)
98{
99 struct stack_frame *frame = (struct stack_frame *)bp;
100 70
101 /* 71 /*
102 * If we overflowed the stack into a guard page, jump back to the 72 * Iterate through the stacks, starting with the current stack pointer.
103 * bottom of the usable stack. 73 * Each stack has a pointer to the next one.
74 *
75 * x86-64 can have several stacks:
76 * - task stack
77 * - interrupt stack
78 * - HW exception stacks (double fault, nmi, debug, mce)
79 *
80 * x86-32 can have up to three stacks:
81 * - task stack
82 * - softirq stack
83 * - hardirq stack
104 */ 84 */
105 if ((unsigned long)task_stack_page(task) - (unsigned long)stack < 85 for (; stack; stack = stack_info.next_sp) {
106 PAGE_SIZE) 86 const char *str_begin, *str_end;
107 stack = (unsigned long *)task_stack_page(task);
108
109 while (valid_stack_ptr(task, stack, sizeof(*stack), end)) {
110 unsigned long addr;
111
112 addr = *stack;
113 if (__kernel_text_address(addr)) {
114 if ((unsigned long) stack == bp + sizeof(long)) {
115 ops->address(data, addr, 1);
116 frame = frame->next_frame;
117 bp = (unsigned long) frame;
118 } else {
119 ops->address(data, addr, 0);
120 }
121 print_ftrace_graph_addr(addr, data, ops, task, graph);
122 }
123 stack++;
124 }
125 return bp;
126}
127EXPORT_SYMBOL_GPL(print_context_stack);
128
129unsigned long
130print_context_stack_bp(struct task_struct *task,
131 unsigned long *stack, unsigned long bp,
132 const struct stacktrace_ops *ops, void *data,
133 unsigned long *end, int *graph)
134{
135 struct stack_frame *frame = (struct stack_frame *)bp;
136 unsigned long *ret_addr = &frame->return_address;
137 87
138 while (valid_stack_ptr(task, ret_addr, sizeof(*ret_addr), end)) { 88 /*
139 unsigned long addr = *ret_addr; 89 * If we overflowed the task stack into a guard page, jump back
90 * to the bottom of the usable stack.
91 */
92 if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
93 stack = task_stack_page(task);
140 94
141 if (!__kernel_text_address(addr)) 95 if (get_stack_info(stack, task, &stack_info, &visit_mask))
142 break; 96 break;
143 97
144 if (ops->address(data, addr, 1)) 98 stack_type_str(stack_info.type, &str_begin, &str_end);
145 break; 99 if (str_begin)
146 frame = frame->next_frame; 100 printk("%s <%s> ", log_lvl, str_begin);
147 ret_addr = &frame->return_address; 101
148 print_ftrace_graph_addr(addr, data, ops, task, graph); 102 /*
149 } 103 * Scan the stack, printing any text addresses we find. At the
150 104 * same time, follow proper stack frames with the unwinder.
151 return (unsigned long)frame; 105 *
152} 106 * Addresses found during the scan which are not reported by
153EXPORT_SYMBOL_GPL(print_context_stack_bp); 107 * the unwinder are considered to be additional clues which are
154 108 * sometimes useful for debugging and are prefixed with '?'.
155static int print_trace_stack(void *data, char *name) 109 * This also serves as a failsafe option in case the unwinder
156{ 110 * goes off in the weeds.
157 printk("%s <%s> ", (char *)data, name); 111 */
158 return 0; 112 for (; stack < stack_info.end; stack++) {
159} 113 unsigned long real_addr;
160 114 int reliable = 0;
161/* 115 unsigned long addr = *stack;
162 * Print one address/symbol entries per line. 116 unsigned long *ret_addr_p =
163 */ 117 unwind_get_return_address_ptr(&state);
164static int print_trace_address(void *data, unsigned long addr, int reliable) 118
165{ 119 if (!__kernel_text_address(addr))
166 touch_nmi_watchdog(); 120 continue;
167 printk_stack_address(addr, reliable, data); 121
168 return 0; 122 if (stack == ret_addr_p)
169} 123 reliable = 1;
170 124
171static const struct stacktrace_ops print_trace_ops = { 125 /*
172 .stack = print_trace_stack, 126 * When function graph tracing is enabled for a
173 .address = print_trace_address, 127 * function, its return address on the stack is
174 .walk_stack = print_context_stack, 128 * replaced with the address of an ftrace handler
175}; 129 * (return_to_handler). In that case, before printing
176 130 * the "real" address, we want to print the handler
177void 131 * address as an "unreliable" hint that function graph
178show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 132 * tracing was involved.
179 unsigned long *stack, unsigned long bp, char *log_lvl) 133 */
180{ 134 real_addr = ftrace_graph_ret_addr(task, &graph_idx,
181 printk("%sCall Trace:\n", log_lvl); 135 addr, stack);
182 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); 136 if (real_addr != addr)
183} 137 printk_stack_address(addr, 0, log_lvl);
138 printk_stack_address(real_addr, reliable, log_lvl);
139
140 if (!reliable)
141 continue;
142
143 /*
144 * Get the next frame from the unwinder. No need to
145 * check for an error: if anything goes wrong, the rest
146 * of the addresses will just be printed as unreliable.
147 */
148 unwind_next_frame(&state);
149 }
184 150
185void show_trace(struct task_struct *task, struct pt_regs *regs, 151 if (str_end)
186 unsigned long *stack, unsigned long bp) 152 printk("%s <%s> ", log_lvl, str_end);
187{ 153 }
188 show_trace_log_lvl(task, regs, stack, bp, "");
189} 154}
190 155
191void show_stack(struct task_struct *task, unsigned long *sp) 156void show_stack(struct task_struct *task, unsigned long *sp)
192{ 157{
193 unsigned long bp = 0; 158 task = task ? : current;
194 unsigned long stack;
195 159
196 /* 160 /*
197 * Stack frames below this one aren't interesting. Don't show them 161 * Stack frames below this one aren't interesting. Don't show them
198 * if we're printing for %current. 162 * if we're printing for %current.
199 */ 163 */
200 if (!sp && (!task || task == current)) { 164 if (!sp && task == current)
201 sp = &stack; 165 sp = get_stack_pointer(current, NULL);
202 bp = stack_frame(current, NULL);
203 }
204 166
205 show_stack_log_lvl(task, NULL, sp, bp, ""); 167 show_stack_log_lvl(task, NULL, sp, "");
206} 168}
207 169
208void show_stack_regs(struct pt_regs *regs) 170void show_stack_regs(struct pt_regs *regs)
209{ 171{
210 show_stack_log_lvl(current, regs, (unsigned long *)regs->sp, regs->bp, ""); 172 show_stack_log_lvl(current, regs, NULL, "");
211} 173}
212 174
213static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; 175static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 09675712eba8..06eb322b5f9f 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -16,93 +16,121 @@
16 16
17#include <asm/stacktrace.h> 17#include <asm/stacktrace.h>
18 18
19static void *is_irq_stack(void *p, void *irq) 19void stack_type_str(enum stack_type type, const char **begin, const char **end)
20{ 20{
21 if (p < irq || p >= (irq + THREAD_SIZE)) 21 switch (type) {
22 return NULL; 22 case STACK_TYPE_IRQ:
23 return irq + THREAD_SIZE; 23 case STACK_TYPE_SOFTIRQ:
24 *begin = "IRQ";
25 *end = "EOI";
26 break;
27 default:
28 *begin = NULL;
29 *end = NULL;
30 }
24} 31}
25 32
26 33static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
27static void *is_hardirq_stack(unsigned long *stack, int cpu)
28{ 34{
29 void *irq = per_cpu(hardirq_stack, cpu); 35 unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack);
36 unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
30 37
31 return is_irq_stack(stack, irq); 38 /*
32} 39 * This is a software stack, so 'end' can be a valid stack pointer.
40 * It just means the stack is empty.
41 */
42 if (stack < begin || stack > end)
43 return false;
33 44
34static void *is_softirq_stack(unsigned long *stack, int cpu) 45 info->type = STACK_TYPE_IRQ;
35{ 46 info->begin = begin;
36 void *irq = per_cpu(softirq_stack, cpu); 47 info->end = end;
37 48
38 return is_irq_stack(stack, irq); 49 /*
50 * See irq_32.c -- the next stack pointer is stored at the beginning of
51 * the stack.
52 */
53 info->next_sp = (unsigned long *)*begin;
54
55 return true;
39} 56}
40 57
41void dump_trace(struct task_struct *task, struct pt_regs *regs, 58static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
42 unsigned long *stack, unsigned long bp,
43 const struct stacktrace_ops *ops, void *data)
44{ 59{
45 const unsigned cpu = get_cpu(); 60 unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack);
46 int graph = 0; 61 unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
47 u32 *prev_esp;
48 62
49 if (!task) 63 /*
50 task = current; 64 * This is a software stack, so 'end' can be a valid stack pointer.
65 * It just means the stack is empty.
66 */
67 if (stack < begin || stack > end)
68 return false;
51 69
52 if (!stack) { 70 info->type = STACK_TYPE_SOFTIRQ;
53 unsigned long dummy; 71 info->begin = begin;
72 info->end = end;
54 73
55 stack = &dummy; 74 /*
56 if (task != current) 75 * The next stack pointer is stored at the beginning of the stack.
57 stack = (unsigned long *)task->thread.sp; 76 * See irq_32.c.
58 } 77 */
78 info->next_sp = (unsigned long *)*begin;
59 79
60 if (!bp) 80 return true;
61 bp = stack_frame(task, regs); 81}
62 82
63 for (;;) { 83int get_stack_info(unsigned long *stack, struct task_struct *task,
64 void *end_stack; 84 struct stack_info *info, unsigned long *visit_mask)
85{
86 if (!stack)
87 goto unknown;
65 88
66 end_stack = is_hardirq_stack(stack, cpu); 89 task = task ? : current;
67 if (!end_stack)
68 end_stack = is_softirq_stack(stack, cpu);
69 90
70 bp = ops->walk_stack(task, stack, bp, ops, data, 91 if (in_task_stack(stack, task, info))
71 end_stack, &graph); 92 goto recursion_check;
72 93
73 /* Stop if not on irq stack */ 94 if (task != current)
74 if (!end_stack) 95 goto unknown;
75 break;
76 96
77 /* The previous esp is saved on the bottom of the stack */ 97 if (in_hardirq_stack(stack, info))
78 prev_esp = (u32 *)(end_stack - THREAD_SIZE); 98 goto recursion_check;
79 stack = (unsigned long *)*prev_esp;
80 if (!stack)
81 break;
82 99
83 if (ops->stack(data, "IRQ") < 0) 100 if (in_softirq_stack(stack, info))
84 break; 101 goto recursion_check;
85 touch_nmi_watchdog(); 102
103 goto unknown;
104
105recursion_check:
106 /*
107 * Make sure we don't iterate through any given stack more than once.
108 * If it comes up a second time then there's something wrong going on:
109 * just break out and report an unknown stack type.
110 */
111 if (visit_mask) {
112 if (*visit_mask & (1UL << info->type))
113 goto unknown;
114 *visit_mask |= 1UL << info->type;
86 } 115 }
87 put_cpu(); 116
117 return 0;
118
119unknown:
120 info->type = STACK_TYPE_UNKNOWN;
121 return -EINVAL;
88} 122}
89EXPORT_SYMBOL(dump_trace);
90 123
91void 124void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
92show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 125 unsigned long *sp, char *log_lvl)
93 unsigned long *sp, unsigned long bp, char *log_lvl)
94{ 126{
95 unsigned long *stack; 127 unsigned long *stack;
96 int i; 128 int i;
97 129
98 if (sp == NULL) { 130 if (!try_get_task_stack(task))
99 if (regs) 131 return;
100 sp = (unsigned long *)regs->sp; 132
101 else if (task) 133 sp = sp ? : get_stack_pointer(task, regs);
102 sp = (unsigned long *)task->thread.sp;
103 else
104 sp = (unsigned long *)&sp;
105 }
106 134
107 stack = sp; 135 stack = sp;
108 for (i = 0; i < kstack_depth_to_print; i++) { 136 for (i = 0; i < kstack_depth_to_print; i++) {
@@ -117,7 +145,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
117 touch_nmi_watchdog(); 145 touch_nmi_watchdog();
118 } 146 }
119 pr_cont("\n"); 147 pr_cont("\n");
120 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 148 show_trace_log_lvl(task, regs, sp, log_lvl);
149
150 put_task_stack(task);
121} 151}
122 152
123 153
@@ -139,7 +169,7 @@ void show_regs(struct pt_regs *regs)
139 u8 *ip; 169 u8 *ip;
140 170
141 pr_emerg("Stack:\n"); 171 pr_emerg("Stack:\n");
142 show_stack_log_lvl(NULL, regs, &regs->sp, 0, KERN_EMERG); 172 show_stack_log_lvl(current, regs, NULL, KERN_EMERG);
143 173
144 pr_emerg("Code:"); 174 pr_emerg("Code:");
145 175
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 9ee4520ce83c..36cf1a498227 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -16,261 +16,145 @@
16 16
17#include <asm/stacktrace.h> 17#include <asm/stacktrace.h>
18 18
19static char *exception_stack_names[N_EXCEPTION_STACKS] = {
20 [ DOUBLEFAULT_STACK-1 ] = "#DF",
21 [ NMI_STACK-1 ] = "NMI",
22 [ DEBUG_STACK-1 ] = "#DB",
23 [ MCE_STACK-1 ] = "#MC",
24};
19 25
20#define N_EXCEPTION_STACKS_END \ 26static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = {
21 (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) 27 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
22 28 [DEBUG_STACK - 1] = DEBUG_STKSZ
23static char x86_stack_ids[][8] = {
24 [ DEBUG_STACK-1 ] = "#DB",
25 [ NMI_STACK-1 ] = "NMI",
26 [ DOUBLEFAULT_STACK-1 ] = "#DF",
27 [ MCE_STACK-1 ] = "#MC",
28#if DEBUG_STKSZ > EXCEPTION_STKSZ
29 [ N_EXCEPTION_STACKS ...
30 N_EXCEPTION_STACKS_END ] = "#DB[?]"
31#endif
32}; 29};
33 30
34static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, 31void stack_type_str(enum stack_type type, const char **begin, const char **end)
35 unsigned *usedp, char **idp)
36{ 32{
37 unsigned k; 33 BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
38 34
39 /* 35 switch (type) {
40 * Iterate over all exception stacks, and figure out whether 36 case STACK_TYPE_IRQ:
41 * 'stack' is in one of them: 37 *begin = "IRQ";
42 */ 38 *end = "EOI";
43 for (k = 0; k < N_EXCEPTION_STACKS; k++) { 39 break;
44 unsigned long end = per_cpu(orig_ist, cpu).ist[k]; 40 case STACK_TYPE_EXCEPTION ... STACK_TYPE_EXCEPTION_LAST:
45 /* 41 *begin = exception_stack_names[type - STACK_TYPE_EXCEPTION];
46 * Is 'stack' above this exception frame's end? 42 *end = "EOE";
47 * If yes then skip to the next frame. 43 break;
48 */ 44 default:
49 if (stack >= end) 45 *begin = NULL;
50 continue; 46 *end = NULL;
51 /*
52 * Is 'stack' above this exception frame's start address?
53 * If yes then we found the right frame.
54 */
55 if (stack >= end - EXCEPTION_STKSZ) {
56 /*
57 * Make sure we only iterate through an exception
58 * stack once. If it comes up for the second time
59 * then there's something wrong going on - just
60 * break out and return NULL:
61 */
62 if (*usedp & (1U << k))
63 break;
64 *usedp |= 1U << k;
65 *idp = x86_stack_ids[k];
66 return (unsigned long *)end;
67 }
68 /*
69 * If this is a debug stack, and if it has a larger size than
70 * the usual exception stacks, then 'stack' might still
71 * be within the lower portion of the debug stack:
72 */
73#if DEBUG_STKSZ > EXCEPTION_STKSZ
74 if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
75 unsigned j = N_EXCEPTION_STACKS - 1;
76
77 /*
78 * Black magic. A large debug stack is composed of
79 * multiple exception stack entries, which we
80 * iterate through now. Dont look:
81 */
82 do {
83 ++j;
84 end -= EXCEPTION_STKSZ;
85 x86_stack_ids[j][4] = '1' +
86 (j - N_EXCEPTION_STACKS);
87 } while (stack < end - EXCEPTION_STKSZ);
88 if (*usedp & (1U << j))
89 break;
90 *usedp |= 1U << j;
91 *idp = x86_stack_ids[j];
92 return (unsigned long *)end;
93 }
94#endif
95 } 47 }
96 return NULL;
97} 48}
98 49
99static inline int 50static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
100in_irq_stack(unsigned long *stack, unsigned long *irq_stack,
101 unsigned long *irq_stack_end)
102{ 51{
103 return (stack >= irq_stack && stack < irq_stack_end); 52 unsigned long *begin, *end;
104} 53 struct pt_regs *regs;
105 54 unsigned k;
106static const unsigned long irq_stack_size =
107 (IRQ_STACK_SIZE - 64) / sizeof(unsigned long);
108
109enum stack_type {
110 STACK_IS_UNKNOWN,
111 STACK_IS_NORMAL,
112 STACK_IS_EXCEPTION,
113 STACK_IS_IRQ,
114};
115
116static enum stack_type
117analyze_stack(int cpu, struct task_struct *task, unsigned long *stack,
118 unsigned long **stack_end, unsigned long *irq_stack,
119 unsigned *used, char **id)
120{
121 unsigned long addr;
122 55
123 addr = ((unsigned long)stack & (~(THREAD_SIZE - 1))); 56 BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
124 if ((unsigned long)task_stack_page(task) == addr)
125 return STACK_IS_NORMAL;
126 57
127 *stack_end = in_exception_stack(cpu, (unsigned long)stack, 58 for (k = 0; k < N_EXCEPTION_STACKS; k++) {
128 used, id); 59 end = (unsigned long *)raw_cpu_ptr(&orig_ist)->ist[k];
129 if (*stack_end) 60 begin = end - (exception_stack_sizes[k] / sizeof(long));
130 return STACK_IS_EXCEPTION; 61 regs = (struct pt_regs *)end - 1;
131 62
132 if (!irq_stack) 63 if (stack < begin || stack >= end)
133 return STACK_IS_NORMAL; 64 continue;
134 65
135 *stack_end = irq_stack; 66 info->type = STACK_TYPE_EXCEPTION + k;
136 irq_stack = irq_stack - irq_stack_size; 67 info->begin = begin;
68 info->end = end;
69 info->next_sp = (unsigned long *)regs->sp;
137 70
138 if (in_irq_stack(stack, irq_stack, *stack_end)) 71 return true;
139 return STACK_IS_IRQ; 72 }
140 73
141 return STACK_IS_UNKNOWN; 74 return false;
142} 75}
143 76
144/* 77static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
145 * x86-64 can have up to three kernel stacks:
146 * process stack
147 * interrupt stack
148 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
149 */
150
151void dump_trace(struct task_struct *task, struct pt_regs *regs,
152 unsigned long *stack, unsigned long bp,
153 const struct stacktrace_ops *ops, void *data)
154{ 78{
155 const unsigned cpu = get_cpu(); 79 unsigned long *end = (unsigned long *)this_cpu_read(irq_stack_ptr);
156 unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu); 80 unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long));
157 unsigned long dummy;
158 unsigned used = 0;
159 int graph = 0;
160 int done = 0;
161
162 if (!task)
163 task = current;
164
165 if (!stack) {
166 if (regs)
167 stack = (unsigned long *)regs->sp;
168 else if (task != current)
169 stack = (unsigned long *)task->thread.sp;
170 else
171 stack = &dummy;
172 }
173 81
174 if (!bp)
175 bp = stack_frame(task, regs);
176 /* 82 /*
177 * Print function call entries in all stacks, starting at the 83 * This is a software stack, so 'end' can be a valid stack pointer.
178 * current stack address. If the stacks consist of nested 84 * It just means the stack is empty.
179 * exceptions
180 */ 85 */
181 while (!done) { 86 if (stack < begin || stack > end)
182 unsigned long *stack_end; 87 return false;
183 enum stack_type stype;
184 char *id;
185 88
186 stype = analyze_stack(cpu, task, stack, &stack_end, 89 info->type = STACK_TYPE_IRQ;
187 irq_stack, &used, &id); 90 info->begin = begin;
91 info->end = end;
188 92
189 /* Default finish unless specified to continue */ 93 /*
190 done = 1; 94 * The next stack pointer is the first thing pushed by the entry code
95 * after switching to the irq stack.
96 */
97 info->next_sp = (unsigned long *)*(end - 1);
191 98
192 switch (stype) { 99 return true;
100}
193 101
194 /* Break out early if we are on the thread stack */ 102int get_stack_info(unsigned long *stack, struct task_struct *task,
195 case STACK_IS_NORMAL: 103 struct stack_info *info, unsigned long *visit_mask)
196 break; 104{
105 if (!stack)
106 goto unknown;
197 107
198 case STACK_IS_EXCEPTION: 108 task = task ? : current;
199 109
200 if (ops->stack(data, id) < 0) 110 if (in_task_stack(stack, task, info))
201 break; 111 goto recursion_check;
202 112
203 bp = ops->walk_stack(task, stack, bp, ops, 113 if (task != current)
204 data, stack_end, &graph); 114 goto unknown;
205 ops->stack(data, "<EOE>");
206 /*
207 * We link to the next stack via the
208 * second-to-last pointer (index -2 to end) in the
209 * exception stack:
210 */
211 stack = (unsigned long *) stack_end[-2];
212 done = 0;
213 break;
214 115
215 case STACK_IS_IRQ: 116 if (in_exception_stack(stack, info))
117 goto recursion_check;
216 118
217 if (ops->stack(data, "IRQ") < 0) 119 if (in_irq_stack(stack, info))
218 break; 120 goto recursion_check;
219 bp = ops->walk_stack(task, stack, bp,
220 ops, data, stack_end, &graph);
221 /*
222 * We link to the next stack (which would be
223 * the process stack normally) the last
224 * pointer (index -1 to end) in the IRQ stack:
225 */
226 stack = (unsigned long *) (stack_end[-1]);
227 irq_stack = NULL;
228 ops->stack(data, "EOI");
229 done = 0;
230 break;
231 121
232 case STACK_IS_UNKNOWN: 122 goto unknown;
233 ops->stack(data, "UNK");
234 break;
235 }
236 }
237 123
124recursion_check:
238 /* 125 /*
239 * This handles the process stack: 126 * Make sure we don't iterate through any given stack more than once.
127 * If it comes up a second time then there's something wrong going on:
128 * just break out and report an unknown stack type.
240 */ 129 */
241 bp = ops->walk_stack(task, stack, bp, ops, data, NULL, &graph); 130 if (visit_mask) {
242 put_cpu(); 131 if (*visit_mask & (1UL << info->type))
132 goto unknown;
133 *visit_mask |= 1UL << info->type;
134 }
135
136 return 0;
137
138unknown:
139 info->type = STACK_TYPE_UNKNOWN;
140 return -EINVAL;
243} 141}
244EXPORT_SYMBOL(dump_trace);
245 142
246void 143void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
247show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 144 unsigned long *sp, char *log_lvl)
248 unsigned long *sp, unsigned long bp, char *log_lvl)
249{ 145{
250 unsigned long *irq_stack_end; 146 unsigned long *irq_stack_end;
251 unsigned long *irq_stack; 147 unsigned long *irq_stack;
252 unsigned long *stack; 148 unsigned long *stack;
253 int cpu;
254 int i; 149 int i;
255 150
256 preempt_disable(); 151 if (!try_get_task_stack(task))
257 cpu = smp_processor_id(); 152 return;
258 153
259 irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); 154 irq_stack_end = (unsigned long *)this_cpu_read(irq_stack_ptr);
260 irq_stack = (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); 155 irq_stack = irq_stack_end - (IRQ_STACK_SIZE / sizeof(long));
261 156
262 /* 157 sp = sp ? : get_stack_pointer(task, regs);
263 * Debugging aid: "show_stack(NULL, NULL);" prints the
264 * back trace for this cpu:
265 */
266 if (sp == NULL) {
267 if (regs)
268 sp = (unsigned long *)regs->sp;
269 else if (task)
270 sp = (unsigned long *)task->thread.sp;
271 else
272 sp = (unsigned long *)&sp;
273 }
274 158
275 stack = sp; 159 stack = sp;
276 for (i = 0; i < kstack_depth_to_print; i++) { 160 for (i = 0; i < kstack_depth_to_print; i++) {
@@ -299,18 +183,17 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
299 stack++; 183 stack++;
300 touch_nmi_watchdog(); 184 touch_nmi_watchdog();
301 } 185 }
302 preempt_enable();
303 186
304 pr_cont("\n"); 187 pr_cont("\n");
305 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 188 show_trace_log_lvl(task, regs, sp, log_lvl);
189
190 put_task_stack(task);
306} 191}
307 192
308void show_regs(struct pt_regs *regs) 193void show_regs(struct pt_regs *regs)
309{ 194{
310 int i; 195 int i;
311 unsigned long sp;
312 196
313 sp = regs->sp;
314 show_regs_print_info(KERN_DEFAULT); 197 show_regs_print_info(KERN_DEFAULT);
315 __show_regs(regs, 1); 198 __show_regs(regs, 1);
316 199
@@ -325,8 +208,7 @@ void show_regs(struct pt_regs *regs)
325 u8 *ip; 208 u8 *ip;
326 209
327 printk(KERN_DEFAULT "Stack:\n"); 210 printk(KERN_DEFAULT "Stack:\n");
328 show_stack_log_lvl(NULL, regs, (unsigned long *)sp, 211 show_stack_log_lvl(current, regs, NULL, KERN_DEFAULT);
329 0, KERN_DEFAULT);
330 212
331 printk(KERN_DEFAULT "Code: "); 213 printk(KERN_DEFAULT "Code: ");
332 214
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 93982aebb398..2f2b8c7ccb85 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -317,7 +317,6 @@ static void __init fpu__init_system_ctx_switch(void)
317 on_boot_cpu = 0; 317 on_boot_cpu = 0;
318 318
319 WARN_ON_FPU(current->thread.fpu.fpstate_active); 319 WARN_ON_FPU(current->thread.fpu.fpstate_active);
320 current_thread_info()->status = 0;
321 320
322 if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE) 321 if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE)
323 eagerfpu = ENABLE; 322 eagerfpu = ENABLE;
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index d036cfb4495d..8639bb2ae058 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -1029,7 +1029,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
1029 } 1029 }
1030 1030
1031 if (ftrace_push_return_trace(old, self_addr, &trace.depth, 1031 if (ftrace_push_return_trace(old, self_addr, &trace.depth,
1032 frame_pointer) == -EBUSY) { 1032 frame_pointer, parent) == -EBUSY) {
1033 *parent = old; 1033 *parent = old;
1034 return; 1034 return;
1035 } 1035 }
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 6f8902b0d151..5f401262f12d 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -94,7 +94,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE)
94 */ 94 */
95__HEAD 95__HEAD
96ENTRY(startup_32) 96ENTRY(startup_32)
97 movl pa(stack_start),%ecx 97 movl pa(initial_stack),%ecx
98 98
99 /* test KEEP_SEGMENTS flag to see if the bootloader is asking 99 /* test KEEP_SEGMENTS flag to see if the bootloader is asking
100 us to not reload segments */ 100 us to not reload segments */
@@ -286,7 +286,7 @@ num_subarch_entries = (. - subarch_entries) / 4
286 * start_secondary(). 286 * start_secondary().
287 */ 287 */
288ENTRY(start_cpu0) 288ENTRY(start_cpu0)
289 movl stack_start, %ecx 289 movl initial_stack, %ecx
290 movl %ecx, %esp 290 movl %ecx, %esp
291 jmp *(initial_code) 291 jmp *(initial_code)
292ENDPROC(start_cpu0) 292ENDPROC(start_cpu0)
@@ -307,7 +307,7 @@ ENTRY(startup_32_smp)
307 movl %eax,%es 307 movl %eax,%es
308 movl %eax,%fs 308 movl %eax,%fs
309 movl %eax,%gs 309 movl %eax,%gs
310 movl pa(stack_start),%ecx 310 movl pa(initial_stack),%ecx
311 movl %eax,%ss 311 movl %eax,%ss
312 leal -__PAGE_OFFSET(%ecx),%esp 312 leal -__PAGE_OFFSET(%ecx),%esp
313 313
@@ -703,7 +703,7 @@ ENTRY(initial_page_table)
703 703
704.data 704.data
705.balign 4 705.balign 4
706ENTRY(stack_start) 706ENTRY(initial_stack)
707 .long init_thread_union+THREAD_SIZE 707 .long init_thread_union+THREAD_SIZE
708 708
709__INITRODATA 709__INITRODATA
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 9f8efc9f0075..c98a559c346e 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -66,7 +66,7 @@ startup_64:
66 */ 66 */
67 67
68 /* 68 /*
69 * Setup stack for verify_cpu(). "-8" because stack_start is defined 69 * Setup stack for verify_cpu(). "-8" because initial_stack is defined
70 * this way, see below. Our best guess is a NULL ptr for stack 70 * this way, see below. Our best guess is a NULL ptr for stack
71 * termination heuristics and we don't want to break anything which 71 * termination heuristics and we don't want to break anything which
72 * might depend on it (kgdb, ...). 72 * might depend on it (kgdb, ...).
@@ -226,7 +226,7 @@ ENTRY(secondary_startup_64)
226 movq %rax, %cr0 226 movq %rax, %cr0
227 227
228 /* Setup a boot time stack */ 228 /* Setup a boot time stack */
229 movq stack_start(%rip), %rsp 229 movq initial_stack(%rip), %rsp
230 230
231 /* zero EFLAGS after setting rsp */ 231 /* zero EFLAGS after setting rsp */
232 pushq $0 232 pushq $0
@@ -310,7 +310,7 @@ ENDPROC(secondary_startup_64)
310 * start_secondary(). 310 * start_secondary().
311 */ 311 */
312ENTRY(start_cpu0) 312ENTRY(start_cpu0)
313 movq stack_start(%rip),%rsp 313 movq initial_stack(%rip),%rsp
314 movq initial_code(%rip),%rax 314 movq initial_code(%rip),%rax
315 pushq $0 # fake return address to stop unwinder 315 pushq $0 # fake return address to stop unwinder
316 pushq $__KERNEL_CS # set correct cs 316 pushq $__KERNEL_CS # set correct cs
@@ -319,17 +319,15 @@ ENTRY(start_cpu0)
319ENDPROC(start_cpu0) 319ENDPROC(start_cpu0)
320#endif 320#endif
321 321
322 /* SMP bootup changes these two */ 322 /* Both SMP bootup and ACPI suspend change these variables */
323 __REFDATA 323 __REFDATA
324 .balign 8 324 .balign 8
325 GLOBAL(initial_code) 325 GLOBAL(initial_code)
326 .quad x86_64_start_kernel 326 .quad x86_64_start_kernel
327 GLOBAL(initial_gs) 327 GLOBAL(initial_gs)
328 .quad INIT_PER_CPU_VAR(irq_stack_union) 328 .quad INIT_PER_CPU_VAR(irq_stack_union)
329 329 GLOBAL(initial_stack)
330 GLOBAL(stack_start)
331 .quad init_thread_union+THREAD_SIZE-8 330 .quad init_thread_union+THREAD_SIZE-8
332 .word 0
333 __FINITDATA 331 __FINITDATA
334 332
335bad_address: 333bad_address:
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 4a7903714065..9ebd0b0e73d9 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -40,8 +40,7 @@ static inline void stack_overflow_check(struct pt_regs *regs)
40 if (user_mode(regs)) 40 if (user_mode(regs))
41 return; 41 return;
42 42
43 if (regs->sp >= curbase + sizeof(struct thread_info) + 43 if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
44 sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
45 regs->sp <= curbase + THREAD_SIZE) 44 regs->sp <= curbase + THREAD_SIZE)
46 return; 45 return;
47 46
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 04cde527d728..8e36f249646e 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -50,6 +50,7 @@
50#include <asm/apicdef.h> 50#include <asm/apicdef.h>
51#include <asm/apic.h> 51#include <asm/apic.h>
52#include <asm/nmi.h> 52#include <asm/nmi.h>
53#include <asm/switch_to.h>
53 54
54struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = 55struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
55{ 56{
@@ -166,21 +167,19 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
166 gdb_regs[GDB_DX] = 0; 167 gdb_regs[GDB_DX] = 0;
167 gdb_regs[GDB_SI] = 0; 168 gdb_regs[GDB_SI] = 0;
168 gdb_regs[GDB_DI] = 0; 169 gdb_regs[GDB_DI] = 0;
169 gdb_regs[GDB_BP] = *(unsigned long *)p->thread.sp; 170 gdb_regs[GDB_BP] = ((struct inactive_task_frame *)p->thread.sp)->bp;
170#ifdef CONFIG_X86_32 171#ifdef CONFIG_X86_32
171 gdb_regs[GDB_DS] = __KERNEL_DS; 172 gdb_regs[GDB_DS] = __KERNEL_DS;
172 gdb_regs[GDB_ES] = __KERNEL_DS; 173 gdb_regs[GDB_ES] = __KERNEL_DS;
173 gdb_regs[GDB_PS] = 0; 174 gdb_regs[GDB_PS] = 0;
174 gdb_regs[GDB_CS] = __KERNEL_CS; 175 gdb_regs[GDB_CS] = __KERNEL_CS;
175 gdb_regs[GDB_PC] = p->thread.ip;
176 gdb_regs[GDB_SS] = __KERNEL_DS; 176 gdb_regs[GDB_SS] = __KERNEL_DS;
177 gdb_regs[GDB_FS] = 0xFFFF; 177 gdb_regs[GDB_FS] = 0xFFFF;
178 gdb_regs[GDB_GS] = 0xFFFF; 178 gdb_regs[GDB_GS] = 0xFFFF;
179#else 179#else
180 gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); 180 gdb_regs32[GDB_PS] = 0;
181 gdb_regs32[GDB_CS] = __KERNEL_CS; 181 gdb_regs32[GDB_CS] = __KERNEL_CS;
182 gdb_regs32[GDB_SS] = __KERNEL_DS; 182 gdb_regs32[GDB_SS] = __KERNEL_DS;
183 gdb_regs[GDB_PC] = 0;
184 gdb_regs[GDB_R8] = 0; 183 gdb_regs[GDB_R8] = 0;
185 gdb_regs[GDB_R9] = 0; 184 gdb_regs[GDB_R9] = 0;
186 gdb_regs[GDB_R10] = 0; 185 gdb_regs[GDB_R10] = 0;
@@ -190,6 +189,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
190 gdb_regs[GDB_R14] = 0; 189 gdb_regs[GDB_R14] = 0;
191 gdb_regs[GDB_R15] = 0; 190 gdb_regs[GDB_R15] = 0;
192#endif 191#endif
192 gdb_regs[GDB_PC] = 0;
193 gdb_regs[GDB_SP] = p->thread.sp; 193 gdb_regs[GDB_SP] = p->thread.sp;
194} 194}
195 195
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index c2bedaea11f7..4afc67f5facc 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -184,7 +184,7 @@ out:
184 184
185static struct kobj_attribute type_attr = __ATTR_RO(type); 185static struct kobj_attribute type_attr = __ATTR_RO(type);
186 186
187static struct bin_attribute data_attr = { 187static struct bin_attribute data_attr __ro_after_init = {
188 .attr = { 188 .attr = {
189 .name = "data", 189 .name = "data",
190 .mode = S_IRUGO, 190 .mode = S_IRUGO,
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 3692249a70f1..60b9949f1e65 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -29,7 +29,7 @@
29#include <asm/x86_init.h> 29#include <asm/x86_init.h>
30#include <asm/reboot.h> 30#include <asm/reboot.h>
31 31
32static int kvmclock = 1; 32static int kvmclock __ro_after_init = 1;
33static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; 33static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
34static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; 34static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
35static cycle_t kvm_sched_clock_offset; 35static cycle_t kvm_sched_clock_offset;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 1acfd76e3e26..bbf3d5933eaa 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -332,7 +332,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
332 .read_cr0 = native_read_cr0, 332 .read_cr0 = native_read_cr0,
333 .write_cr0 = native_write_cr0, 333 .write_cr0 = native_write_cr0,
334 .read_cr4 = native_read_cr4, 334 .read_cr4 = native_read_cr4,
335 .read_cr4_safe = native_read_cr4_safe,
336 .write_cr4 = native_write_cr4, 335 .write_cr4 = native_write_cr4,
337#ifdef CONFIG_X86_64 336#ifdef CONFIG_X86_64
338 .read_cr8 = native_read_cr8, 337 .read_cr8 = native_read_cr8,
@@ -389,7 +388,7 @@ NOKPROBE_SYMBOL(native_load_idt);
389#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) 388#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
390#endif 389#endif
391 390
392struct pv_mmu_ops pv_mmu_ops = { 391struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
393 392
394 .read_cr2 = native_read_cr2, 393 .read_cr2 = native_read_cr2,
395 .write_cr2 = native_write_cr2, 394 .write_cr2 = native_write_cr2,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 62c0b0ea2ce4..4002b475171c 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -32,6 +32,7 @@
32#include <asm/tlbflush.h> 32#include <asm/tlbflush.h>
33#include <asm/mce.h> 33#include <asm/mce.h>
34#include <asm/vm86.h> 34#include <asm/vm86.h>
35#include <asm/switch_to.h>
35 36
36/* 37/*
37 * per-CPU TSS segments. Threads are completely 'soft' on Linux, 38 * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -513,6 +514,17 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
513} 514}
514 515
515/* 516/*
517 * Return saved PC of a blocked thread.
518 * What is this good for? it will be always the scheduler or ret_from_fork.
519 */
520unsigned long thread_saved_pc(struct task_struct *tsk)
521{
522 struct inactive_task_frame *frame =
523 (struct inactive_task_frame *) READ_ONCE(tsk->thread.sp);
524 return READ_ONCE_NOCHECK(frame->ret_addr);
525}
526
527/*
516 * Called from fs/proc with a reference on @p to find the function 528 * Called from fs/proc with a reference on @p to find the function
517 * which called into schedule(). This needs to be done carefully 529 * which called into schedule(). This needs to be done carefully
518 * because the task might wake up and we might look at a stack 530 * because the task might wake up and we might look at a stack
@@ -520,15 +532,18 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
520 */ 532 */
521unsigned long get_wchan(struct task_struct *p) 533unsigned long get_wchan(struct task_struct *p)
522{ 534{
523 unsigned long start, bottom, top, sp, fp, ip; 535 unsigned long start, bottom, top, sp, fp, ip, ret = 0;
524 int count = 0; 536 int count = 0;
525 537
526 if (!p || p == current || p->state == TASK_RUNNING) 538 if (!p || p == current || p->state == TASK_RUNNING)
527 return 0; 539 return 0;
528 540
541 if (!try_get_task_stack(p))
542 return 0;
543
529 start = (unsigned long)task_stack_page(p); 544 start = (unsigned long)task_stack_page(p);
530 if (!start) 545 if (!start)
531 return 0; 546 goto out;
532 547
533 /* 548 /*
534 * Layout of the stack page: 549 * Layout of the stack page:
@@ -537,9 +552,7 @@ unsigned long get_wchan(struct task_struct *p)
537 * PADDING 552 * PADDING
538 * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING 553 * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING
539 * stack 554 * stack
540 * ----------- bottom = start + sizeof(thread_info) 555 * ----------- bottom = start
541 * thread_info
542 * ----------- start
543 * 556 *
544 * The tasks stack pointer points at the location where the 557 * The tasks stack pointer points at the location where the
545 * framepointer is stored. The data on the stack is: 558 * framepointer is stored. The data on the stack is:
@@ -550,20 +563,25 @@ unsigned long get_wchan(struct task_struct *p)
550 */ 563 */
551 top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; 564 top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;
552 top -= 2 * sizeof(unsigned long); 565 top -= 2 * sizeof(unsigned long);
553 bottom = start + sizeof(struct thread_info); 566 bottom = start;
554 567
555 sp = READ_ONCE(p->thread.sp); 568 sp = READ_ONCE(p->thread.sp);
556 if (sp < bottom || sp > top) 569 if (sp < bottom || sp > top)
557 return 0; 570 goto out;
558 571
559 fp = READ_ONCE_NOCHECK(*(unsigned long *)sp); 572 fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp);
560 do { 573 do {
561 if (fp < bottom || fp > top) 574 if (fp < bottom || fp > top)
562 return 0; 575 goto out;
563 ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long))); 576 ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long)));
564 if (!in_sched_functions(ip)) 577 if (!in_sched_functions(ip)) {
565 return ip; 578 ret = ip;
579 goto out;
580 }
566 fp = READ_ONCE_NOCHECK(*(unsigned long *)fp); 581 fp = READ_ONCE_NOCHECK(*(unsigned long *)fp);
567 } while (count++ < 16 && p->state != TASK_RUNNING); 582 } while (count++ < 16 && p->state != TASK_RUNNING);
568 return 0; 583
584out:
585 put_task_stack(p);
586 return ret;
569} 587}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index d86be29c38c7..bd7be8efdc4c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -55,17 +55,6 @@
55#include <asm/switch_to.h> 55#include <asm/switch_to.h>
56#include <asm/vm86.h> 56#include <asm/vm86.h>
57 57
58asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
59asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
60
61/*
62 * Return saved PC of a blocked thread.
63 */
64unsigned long thread_saved_pc(struct task_struct *tsk)
65{
66 return ((unsigned long *)tsk->thread.sp)[3];
67}
68
69void __show_regs(struct pt_regs *regs, int all) 58void __show_regs(struct pt_regs *regs, int all)
70{ 59{
71 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; 60 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
@@ -101,7 +90,7 @@ void __show_regs(struct pt_regs *regs, int all)
101 cr0 = read_cr0(); 90 cr0 = read_cr0();
102 cr2 = read_cr2(); 91 cr2 = read_cr2();
103 cr3 = read_cr3(); 92 cr3 = read_cr3();
104 cr4 = __read_cr4_safe(); 93 cr4 = __read_cr4();
105 printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", 94 printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
106 cr0, cr2, cr3, cr4); 95 cr0, cr2, cr3, cr4);
107 96
@@ -133,35 +122,31 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
133 unsigned long arg, struct task_struct *p, unsigned long tls) 122 unsigned long arg, struct task_struct *p, unsigned long tls)
134{ 123{
135 struct pt_regs *childregs = task_pt_regs(p); 124 struct pt_regs *childregs = task_pt_regs(p);
125 struct fork_frame *fork_frame = container_of(childregs, struct fork_frame, regs);
126 struct inactive_task_frame *frame = &fork_frame->frame;
136 struct task_struct *tsk; 127 struct task_struct *tsk;
137 int err; 128 int err;
138 129
139 p->thread.sp = (unsigned long) childregs; 130 frame->bp = 0;
131 frame->ret_addr = (unsigned long) ret_from_fork;
132 p->thread.sp = (unsigned long) fork_frame;
140 p->thread.sp0 = (unsigned long) (childregs+1); 133 p->thread.sp0 = (unsigned long) (childregs+1);
141 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); 134 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
142 135
143 if (unlikely(p->flags & PF_KTHREAD)) { 136 if (unlikely(p->flags & PF_KTHREAD)) {
144 /* kernel thread */ 137 /* kernel thread */
145 memset(childregs, 0, sizeof(struct pt_regs)); 138 memset(childregs, 0, sizeof(struct pt_regs));
146 p->thread.ip = (unsigned long) ret_from_kernel_thread; 139 frame->bx = sp; /* function */
147 task_user_gs(p) = __KERNEL_STACK_CANARY; 140 frame->di = arg;
148 childregs->ds = __USER_DS;
149 childregs->es = __USER_DS;
150 childregs->fs = __KERNEL_PERCPU;
151 childregs->bx = sp; /* function */
152 childregs->bp = arg;
153 childregs->orig_ax = -1;
154 childregs->cs = __KERNEL_CS | get_kernel_rpl();
155 childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
156 p->thread.io_bitmap_ptr = NULL; 141 p->thread.io_bitmap_ptr = NULL;
157 return 0; 142 return 0;
158 } 143 }
144 frame->bx = 0;
159 *childregs = *current_pt_regs(); 145 *childregs = *current_pt_regs();
160 childregs->ax = 0; 146 childregs->ax = 0;
161 if (sp) 147 if (sp)
162 childregs->sp = sp; 148 childregs->sp = sp;
163 149
164 p->thread.ip = (unsigned long) ret_from_fork;
165 task_user_gs(p) = get_user_gs(current_pt_regs()); 150 task_user_gs(p) = get_user_gs(current_pt_regs());
166 151
167 p->thread.io_bitmap_ptr = NULL; 152 p->thread.io_bitmap_ptr = NULL;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 63236d8f84bf..de9acaf2d371 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -50,8 +50,6 @@
50#include <asm/switch_to.h> 50#include <asm/switch_to.h>
51#include <asm/xen/hypervisor.h> 51#include <asm/xen/hypervisor.h>
52 52
53asmlinkage extern void ret_from_fork(void);
54
55__visible DEFINE_PER_CPU(unsigned long, rsp_scratch); 53__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
56 54
57/* Prints also some state that isn't saved in the pt_regs */ 55/* Prints also some state that isn't saved in the pt_regs */
@@ -141,12 +139,17 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
141{ 139{
142 int err; 140 int err;
143 struct pt_regs *childregs; 141 struct pt_regs *childregs;
142 struct fork_frame *fork_frame;
143 struct inactive_task_frame *frame;
144 struct task_struct *me = current; 144 struct task_struct *me = current;
145 145
146 p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; 146 p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
147 childregs = task_pt_regs(p); 147 childregs = task_pt_regs(p);
148 p->thread.sp = (unsigned long) childregs; 148 fork_frame = container_of(childregs, struct fork_frame, regs);
149 set_tsk_thread_flag(p, TIF_FORK); 149 frame = &fork_frame->frame;
150 frame->bp = 0;
151 frame->ret_addr = (unsigned long) ret_from_fork;
152 p->thread.sp = (unsigned long) fork_frame;
150 p->thread.io_bitmap_ptr = NULL; 153 p->thread.io_bitmap_ptr = NULL;
151 154
152 savesegment(gs, p->thread.gsindex); 155 savesegment(gs, p->thread.gsindex);
@@ -160,15 +163,11 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
160 if (unlikely(p->flags & PF_KTHREAD)) { 163 if (unlikely(p->flags & PF_KTHREAD)) {
161 /* kernel thread */ 164 /* kernel thread */
162 memset(childregs, 0, sizeof(struct pt_regs)); 165 memset(childregs, 0, sizeof(struct pt_regs));
163 childregs->sp = (unsigned long)childregs; 166 frame->bx = sp; /* function */
164 childregs->ss = __KERNEL_DS; 167 frame->r12 = arg;
165 childregs->bx = sp; /* function */
166 childregs->bp = arg;
167 childregs->orig_ax = -1;
168 childregs->cs = __KERNEL_CS | get_kernel_rpl();
169 childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
170 return 0; 168 return 0;
171 } 169 }
170 frame->bx = 0;
172 *childregs = *current_pt_regs(); 171 *childregs = *current_pt_regs();
173 172
174 childregs->ax = 0; 173 childregs->ax = 0;
@@ -511,7 +510,7 @@ void set_personality_ia32(bool x32)
511 current->personality &= ~READ_IMPLIES_EXEC; 510 current->personality &= ~READ_IMPLIES_EXEC;
512 /* in_compat_syscall() uses the presence of the x32 511 /* in_compat_syscall() uses the presence of the x32
513 syscall bit flag to determine compat status */ 512 syscall bit flag to determine compat status */
514 current_thread_info()->status &= ~TS_COMPAT; 513 current->thread.status &= ~TS_COMPAT;
515 } else { 514 } else {
516 set_thread_flag(TIF_IA32); 515 set_thread_flag(TIF_IA32);
517 clear_thread_flag(TIF_X32); 516 clear_thread_flag(TIF_X32);
@@ -519,7 +518,7 @@ void set_personality_ia32(bool x32)
519 current->mm->context.ia32_compat = TIF_IA32; 518 current->mm->context.ia32_compat = TIF_IA32;
520 current->personality |= force_personality32; 519 current->personality |= force_personality32;
521 /* Prepare the first "return" to user space */ 520 /* Prepare the first "return" to user space */
522 current_thread_info()->status |= TS_COMPAT; 521 current->thread.status |= TS_COMPAT;
523 } 522 }
524} 523}
525EXPORT_SYMBOL_GPL(set_personality_ia32); 524EXPORT_SYMBOL_GPL(set_personality_ia32);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index f79576a541ff..ce94c38cf4d6 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -173,8 +173,8 @@ unsigned long kernel_stack_pointer(struct pt_regs *regs)
173 return sp; 173 return sp;
174 174
175 prev_esp = (u32 *)(context); 175 prev_esp = (u32 *)(context);
176 if (prev_esp) 176 if (*prev_esp)
177 return (unsigned long)prev_esp; 177 return (unsigned long)*prev_esp;
178 178
179 return (unsigned long)regs; 179 return (unsigned long)regs;
180} 180}
@@ -934,7 +934,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
934 */ 934 */
935 regs->orig_ax = value; 935 regs->orig_ax = value;
936 if (syscall_get_nr(child, regs) >= 0) 936 if (syscall_get_nr(child, regs) >= 0)
937 task_thread_info(child)->status |= TS_I386_REGS_POKED; 937 child->thread.status |= TS_I386_REGS_POKED;
938 break; 938 break;
939 939
940 case offsetof(struct user32, regs.eflags): 940 case offsetof(struct user32, regs.eflags):
@@ -1250,7 +1250,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
1250 1250
1251#ifdef CONFIG_X86_64 1251#ifdef CONFIG_X86_64
1252 1252
1253static struct user_regset x86_64_regsets[] __read_mostly = { 1253static struct user_regset x86_64_regsets[] __ro_after_init = {
1254 [REGSET_GENERAL] = { 1254 [REGSET_GENERAL] = {
1255 .core_note_type = NT_PRSTATUS, 1255 .core_note_type = NT_PRSTATUS,
1256 .n = sizeof(struct user_regs_struct) / sizeof(long), 1256 .n = sizeof(struct user_regs_struct) / sizeof(long),
@@ -1291,7 +1291,7 @@ static const struct user_regset_view user_x86_64_view = {
1291#endif /* CONFIG_X86_64 */ 1291#endif /* CONFIG_X86_64 */
1292 1292
1293#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 1293#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
1294static struct user_regset x86_32_regsets[] __read_mostly = { 1294static struct user_regset x86_32_regsets[] __ro_after_init = {
1295 [REGSET_GENERAL] = { 1295 [REGSET_GENERAL] = {
1296 .core_note_type = NT_PRSTATUS, 1296 .core_note_type = NT_PRSTATUS,
1297 .n = sizeof(struct user_regs_struct32) / sizeof(u32), 1297 .n = sizeof(struct user_regs_struct32) / sizeof(u32),
@@ -1344,7 +1344,7 @@ static const struct user_regset_view user_x86_32_view = {
1344 */ 1344 */
1345u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; 1345u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
1346 1346
1347void update_regset_xstate_info(unsigned int size, u64 xstate_mask) 1347void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask)
1348{ 1348{
1349#ifdef CONFIG_X86_64 1349#ifdef CONFIG_X86_64
1350 x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64); 1350 x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 63bf27d972b7..e244c19a2451 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -705,7 +705,7 @@ static void native_machine_power_off(void)
705 tboot_shutdown(TB_SHUTDOWN_HALT); 705 tboot_shutdown(TB_SHUTDOWN_HALT);
706} 706}
707 707
708struct machine_ops machine_ops = { 708struct machine_ops machine_ops __ro_after_init = {
709 .power_off = native_machine_power_off, 709 .power_off = native_machine_power_off,
710 .shutdown = native_machine_shutdown, 710 .shutdown = native_machine_shutdown,
711 .emergency_restart = native_machine_emergency_restart, 711 .emergency_restart = native_machine_emergency_restart,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2c4bc85dfe90..eeb094ea794a 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -210,9 +210,9 @@ EXPORT_SYMBOL(boot_cpu_data);
210 210
211 211
212#if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) 212#if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
213__visible unsigned long mmu_cr4_features; 213__visible unsigned long mmu_cr4_features __ro_after_init;
214#else 214#else
215__visible unsigned long mmu_cr4_features = X86_CR4_PAE; 215__visible unsigned long mmu_cr4_features __ro_after_init = X86_CR4_PAE;
216#endif 216#endif
217 217
218/* Boot loader ID and version as integers, for the benefit of proc_dointvec */ 218/* Boot loader ID and version as integers, for the benefit of proc_dointvec */
@@ -1137,7 +1137,7 @@ void __init setup_arch(char **cmdline_p)
1137 * auditing all the early-boot CR4 manipulation would be needed to 1137 * auditing all the early-boot CR4 manipulation would be needed to
1138 * rule it out. 1138 * rule it out.
1139 */ 1139 */
1140 mmu_cr4_features = __read_cr4_safe(); 1140 mmu_cr4_features = __read_cr4();
1141 1141
1142 memblock_set_current_limit(get_max_mapped()); 1142 memblock_set_current_limit(get_max_mapped());
1143 1143
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 7a40e068302d..2bbd27f89802 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -33,7 +33,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number);
33DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; 33DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
34EXPORT_PER_CPU_SYMBOL(this_cpu_off); 34EXPORT_PER_CPU_SYMBOL(this_cpu_off);
35 35
36unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { 36unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = {
37 [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, 37 [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
38}; 38};
39EXPORT_SYMBOL(__per_cpu_offset); 39EXPORT_SYMBOL(__per_cpu_offset);
@@ -246,7 +246,7 @@ void __init setup_per_cpu_areas(void)
246#ifdef CONFIG_X86_64 246#ifdef CONFIG_X86_64
247 per_cpu(irq_stack_ptr, cpu) = 247 per_cpu(irq_stack_ptr, cpu) =
248 per_cpu(irq_stack_union.irq_stack, cpu) + 248 per_cpu(irq_stack_union.irq_stack, cpu) +
249 IRQ_STACK_SIZE - 64; 249 IRQ_STACK_SIZE;
250#endif 250#endif
251#ifdef CONFIG_NUMA 251#ifdef CONFIG_NUMA
252 per_cpu(x86_cpu_to_node_map, cpu) = 252 per_cpu(x86_cpu_to_node_map, cpu) =
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 04cb3212db2d..da20ecb5397a 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -783,7 +783,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
783 * than the tracee. 783 * than the tracee.
784 */ 784 */
785#ifdef CONFIG_IA32_EMULATION 785#ifdef CONFIG_IA32_EMULATION
786 if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) 786 if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
787 return __NR_ia32_restart_syscall; 787 return __NR_ia32_restart_syscall;
788#endif 788#endif
789#ifdef CONFIG_X86_X32_ABI 789#ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 54e2f1a968a4..7249dcf2cbcb 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -943,7 +943,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
943 per_cpu(cpu_current_top_of_stack, cpu) = 943 per_cpu(cpu_current_top_of_stack, cpu) =
944 (unsigned long)task_stack_page(idle) + THREAD_SIZE; 944 (unsigned long)task_stack_page(idle) + THREAD_SIZE;
945#else 945#else
946 clear_tsk_thread_flag(idle, TIF_FORK);
947 initial_gs = per_cpu_offset(cpu); 946 initial_gs = per_cpu_offset(cpu);
948#endif 947#endif
949} 948}
@@ -970,7 +969,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
970 969
971 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); 970 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
972 initial_code = (unsigned long)start_secondary; 971 initial_code = (unsigned long)start_secondary;
973 stack_start = idle->thread.sp; 972 initial_stack = idle->thread.sp;
974 973
975 /* 974 /*
976 * Enable the espfix hack for this CPU 975 * Enable the espfix hack for this CPU
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 4738f5e0f2ab..0653788026e2 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -8,80 +8,69 @@
8#include <linux/export.h> 8#include <linux/export.h>
9#include <linux/uaccess.h> 9#include <linux/uaccess.h>
10#include <asm/stacktrace.h> 10#include <asm/stacktrace.h>
11#include <asm/unwind.h>
11 12
12static int save_stack_stack(void *data, char *name) 13static int save_stack_address(struct stack_trace *trace, unsigned long addr,
14 bool nosched)
13{ 15{
14 return 0;
15}
16
17static int
18__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched)
19{
20 struct stack_trace *trace = data;
21#ifdef CONFIG_FRAME_POINTER
22 if (!reliable)
23 return 0;
24#endif
25 if (nosched && in_sched_functions(addr)) 16 if (nosched && in_sched_functions(addr))
26 return 0; 17 return 0;
18
27 if (trace->skip > 0) { 19 if (trace->skip > 0) {
28 trace->skip--; 20 trace->skip--;
29 return 0; 21 return 0;
30 } 22 }
31 if (trace->nr_entries < trace->max_entries) {
32 trace->entries[trace->nr_entries++] = addr;
33 return 0;
34 } else {
35 return -1; /* no more room, stop walking the stack */
36 }
37}
38 23
39static int save_stack_address(void *data, unsigned long addr, int reliable) 24 if (trace->nr_entries >= trace->max_entries)
40{ 25 return -1;
41 return __save_stack_address(data, addr, reliable, false); 26
27 trace->entries[trace->nr_entries++] = addr;
28 return 0;
42} 29}
43 30
44static int 31static void __save_stack_trace(struct stack_trace *trace,
45save_stack_address_nosched(void *data, unsigned long addr, int reliable) 32 struct task_struct *task, struct pt_regs *regs,
33 bool nosched)
46{ 34{
47 return __save_stack_address(data, addr, reliable, true); 35 struct unwind_state state;
48} 36 unsigned long addr;
49 37
50static const struct stacktrace_ops save_stack_ops = { 38 if (regs)
51 .stack = save_stack_stack, 39 save_stack_address(trace, regs->ip, nosched);
52 .address = save_stack_address,
53 .walk_stack = print_context_stack,
54};
55 40
56static const struct stacktrace_ops save_stack_ops_nosched = { 41 for (unwind_start(&state, task, regs, NULL); !unwind_done(&state);
57 .stack = save_stack_stack, 42 unwind_next_frame(&state)) {
58 .address = save_stack_address_nosched, 43 addr = unwind_get_return_address(&state);
59 .walk_stack = print_context_stack, 44 if (!addr || save_stack_address(trace, addr, nosched))
60}; 45 break;
46 }
47
48 if (trace->nr_entries < trace->max_entries)
49 trace->entries[trace->nr_entries++] = ULONG_MAX;
50}
61 51
62/* 52/*
63 * Save stack-backtrace addresses into a stack_trace buffer. 53 * Save stack-backtrace addresses into a stack_trace buffer.
64 */ 54 */
65void save_stack_trace(struct stack_trace *trace) 55void save_stack_trace(struct stack_trace *trace)
66{ 56{
67 dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); 57 __save_stack_trace(trace, current, NULL, false);
68 if (trace->nr_entries < trace->max_entries)
69 trace->entries[trace->nr_entries++] = ULONG_MAX;
70} 58}
71EXPORT_SYMBOL_GPL(save_stack_trace); 59EXPORT_SYMBOL_GPL(save_stack_trace);
72 60
73void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) 61void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
74{ 62{
75 dump_trace(current, regs, NULL, 0, &save_stack_ops, trace); 63 __save_stack_trace(trace, current, regs, false);
76 if (trace->nr_entries < trace->max_entries)
77 trace->entries[trace->nr_entries++] = ULONG_MAX;
78} 64}
79 65
80void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) 66void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
81{ 67{
82 dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); 68 if (!try_get_task_stack(tsk))
83 if (trace->nr_entries < trace->max_entries) 69 return;
84 trace->entries[trace->nr_entries++] = ULONG_MAX; 70
71 __save_stack_trace(trace, tsk, NULL, true);
72
73 put_task_stack(tsk);
85} 74}
86EXPORT_SYMBOL_GPL(save_stack_trace_tsk); 75EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
87 76
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b70ca12dd389..bd4e3d4d3625 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -292,12 +292,30 @@ DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present)
292DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) 292DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment)
293DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) 293DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check)
294 294
295#ifdef CONFIG_VMAP_STACK
296__visible void __noreturn handle_stack_overflow(const char *message,
297 struct pt_regs *regs,
298 unsigned long fault_address)
299{
300 printk(KERN_EMERG "BUG: stack guard page was hit at %p (stack is %p..%p)\n",
301 (void *)fault_address, current->stack,
302 (char *)current->stack + THREAD_SIZE - 1);
303 die(message, regs, 0);
304
305 /* Be absolutely certain we don't return. */
306 panic(message);
307}
308#endif
309
295#ifdef CONFIG_X86_64 310#ifdef CONFIG_X86_64
296/* Runs on IST stack */ 311/* Runs on IST stack */
297dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) 312dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
298{ 313{
299 static const char str[] = "double fault"; 314 static const char str[] = "double fault";
300 struct task_struct *tsk = current; 315 struct task_struct *tsk = current;
316#ifdef CONFIG_VMAP_STACK
317 unsigned long cr2;
318#endif
301 319
302#ifdef CONFIG_X86_ESPFIX64 320#ifdef CONFIG_X86_ESPFIX64
303 extern unsigned char native_irq_return_iret[]; 321 extern unsigned char native_irq_return_iret[];
@@ -332,6 +350,49 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
332 tsk->thread.error_code = error_code; 350 tsk->thread.error_code = error_code;
333 tsk->thread.trap_nr = X86_TRAP_DF; 351 tsk->thread.trap_nr = X86_TRAP_DF;
334 352
353#ifdef CONFIG_VMAP_STACK
354 /*
355 * If we overflow the stack into a guard page, the CPU will fail
356 * to deliver #PF and will send #DF instead. Similarly, if we
357 * take any non-IST exception while too close to the bottom of
358 * the stack, the processor will get a page fault while
359 * delivering the exception and will generate a double fault.
360 *
361 * According to the SDM (footnote in 6.15 under "Interrupt 14 -
362 * Page-Fault Exception (#PF):
363 *
364 * Processors update CR2 whenever a page fault is detected. If a
365 * second page fault occurs while an earlier page fault is being
366 * deliv- ered, the faulting linear address of the second fault will
367 * overwrite the contents of CR2 (replacing the previous
368 * address). These updates to CR2 occur even if the page fault
369 * results in a double fault or occurs during the delivery of a
370 * double fault.
371 *
372 * The logic below has a small possibility of incorrectly diagnosing
373 * some errors as stack overflows. For example, if the IDT or GDT
374 * gets corrupted such that #GP delivery fails due to a bad descriptor
375 * causing #GP and we hit this condition while CR2 coincidentally
376 * points to the stack guard page, we'll think we overflowed the
377 * stack. Given that we're going to panic one way or another
378 * if this happens, this isn't necessarily worth fixing.
379 *
380 * If necessary, we could improve the test by only diagnosing
381 * a stack overflow if the saved RSP points within 47 bytes of
382 * the bottom of the stack: if RSP == tsk_stack + 48 and we
383 * take an exception, the stack is already aligned and there
384 * will be enough room SS, RSP, RFLAGS, CS, RIP, and a
385 * possible error code, so a stack overflow would *not* double
386 * fault. With any less space left, exception delivery could
387 * fail, and, as a practical matter, we've overflowed the
388 * stack even if the actual trigger for the double fault was
389 * something else.
390 */
391 cr2 = read_cr2();
392 if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE)
393 handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2);
394#endif
395
335#ifdef CONFIG_DOUBLEFAULT 396#ifdef CONFIG_DOUBLEFAULT
336 df_debug(regs, error_code); 397 df_debug(regs, error_code);
337#endif 398#endif
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
new file mode 100644
index 000000000000..a2456d4d286a
--- /dev/null
+++ b/arch/x86/kernel/unwind_frame.c
@@ -0,0 +1,93 @@
1#include <linux/sched.h>
2#include <asm/ptrace.h>
3#include <asm/bitops.h>
4#include <asm/stacktrace.h>
5#include <asm/unwind.h>
6
7#define FRAME_HEADER_SIZE (sizeof(long) * 2)
8
9unsigned long unwind_get_return_address(struct unwind_state *state)
10{
11 unsigned long addr;
12 unsigned long *addr_p = unwind_get_return_address_ptr(state);
13
14 if (unwind_done(state))
15 return 0;
16
17 addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
18 addr_p);
19
20 return __kernel_text_address(addr) ? addr : 0;
21}
22EXPORT_SYMBOL_GPL(unwind_get_return_address);
23
24static bool update_stack_state(struct unwind_state *state, void *addr,
25 size_t len)
26{
27 struct stack_info *info = &state->stack_info;
28
29 /*
30 * If addr isn't on the current stack, switch to the next one.
31 *
32 * We may have to traverse multiple stacks to deal with the possibility
33 * that 'info->next_sp' could point to an empty stack and 'addr' could
34 * be on a subsequent stack.
35 */
36 while (!on_stack(info, addr, len))
37 if (get_stack_info(info->next_sp, state->task, info,
38 &state->stack_mask))
39 return false;
40
41 return true;
42}
43
44bool unwind_next_frame(struct unwind_state *state)
45{
46 unsigned long *next_bp;
47
48 if (unwind_done(state))
49 return false;
50
51 next_bp = (unsigned long *)*state->bp;
52
53 /* make sure the next frame's data is accessible */
54 if (!update_stack_state(state, next_bp, FRAME_HEADER_SIZE))
55 return false;
56
57 /* move to the next frame */
58 state->bp = next_bp;
59 return true;
60}
61EXPORT_SYMBOL_GPL(unwind_next_frame);
62
63void __unwind_start(struct unwind_state *state, struct task_struct *task,
64 struct pt_regs *regs, unsigned long *first_frame)
65{
66 memset(state, 0, sizeof(*state));
67 state->task = task;
68
69 /* don't even attempt to start from user mode regs */
70 if (regs && user_mode(regs)) {
71 state->stack_info.type = STACK_TYPE_UNKNOWN;
72 return;
73 }
74
75 /* set up the starting stack frame */
76 state->bp = get_frame_pointer(task, regs);
77
78 /* initialize stack info and make sure the frame data is accessible */
79 get_stack_info(state->bp, state->task, &state->stack_info,
80 &state->stack_mask);
81 update_stack_state(state, state->bp, FRAME_HEADER_SIZE);
82
83 /*
84 * The caller can provide the address of the first frame directly
85 * (first_frame) or indirectly (regs->sp) to indicate which stack frame
86 * to start unwinding at. Skip ahead until we reach it.
87 */
88 while (!unwind_done(state) &&
89 (!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
90 state->bp < first_frame))
91 unwind_next_frame(state);
92}
93EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c
new file mode 100644
index 000000000000..b5a834c93065
--- /dev/null
+++ b/arch/x86/kernel/unwind_guess.c
@@ -0,0 +1,43 @@
1#include <linux/sched.h>
2#include <linux/ftrace.h>
3#include <asm/ptrace.h>
4#include <asm/bitops.h>
5#include <asm/stacktrace.h>
6#include <asm/unwind.h>
7
8bool unwind_next_frame(struct unwind_state *state)
9{
10 struct stack_info *info = &state->stack_info;
11
12 if (unwind_done(state))
13 return false;
14
15 do {
16 for (state->sp++; state->sp < info->end; state->sp++)
17 if (__kernel_text_address(*state->sp))
18 return true;
19
20 state->sp = info->next_sp;
21
22 } while (!get_stack_info(state->sp, state->task, info,
23 &state->stack_mask));
24
25 return false;
26}
27EXPORT_SYMBOL_GPL(unwind_next_frame);
28
29void __unwind_start(struct unwind_state *state, struct task_struct *task,
30 struct pt_regs *regs, unsigned long *first_frame)
31{
32 memset(state, 0, sizeof(*state));
33
34 state->task = task;
35 state->sp = first_frame;
36
37 get_stack_info(first_frame, state->task, &state->stack_info,
38 &state->stack_mask);
39
40 if (!__kernel_text_address(*first_frame))
41 unwind_next_frame(state);
42}
43EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 76c5e52436c4..0bd9f1287f39 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -91,7 +91,7 @@ struct x86_cpuinit_ops x86_cpuinit = {
91static void default_nmi_init(void) { }; 91static void default_nmi_init(void) { };
92static int default_i8042_detect(void) { return 1; }; 92static int default_i8042_detect(void) { return 1; };
93 93
94struct x86_platform_ops x86_platform = { 94struct x86_platform_ops x86_platform __ro_after_init = {
95 .calibrate_cpu = native_calibrate_cpu, 95 .calibrate_cpu = native_calibrate_cpu,
96 .calibrate_tsc = native_calibrate_tsc, 96 .calibrate_tsc = native_calibrate_tsc,
97 .get_wallclock = mach_get_cmos_time, 97 .get_wallclock = mach_get_cmos_time,
@@ -108,7 +108,7 @@ struct x86_platform_ops x86_platform = {
108EXPORT_SYMBOL_GPL(x86_platform); 108EXPORT_SYMBOL_GPL(x86_platform);
109 109
110#if defined(CONFIG_PCI_MSI) 110#if defined(CONFIG_PCI_MSI)
111struct x86_msi_ops x86_msi = { 111struct x86_msi_ops x86_msi __ro_after_init = {
112 .setup_msi_irqs = native_setup_msi_irqs, 112 .setup_msi_irqs = native_setup_msi_irqs,
113 .teardown_msi_irq = native_teardown_msi_irq, 113 .teardown_msi_irq = native_teardown_msi_irq,
114 .teardown_msi_irqs = default_teardown_msi_irqs, 114 .teardown_msi_irqs = default_teardown_msi_irqs,
@@ -137,7 +137,7 @@ void arch_restore_msi_irqs(struct pci_dev *dev)
137} 137}
138#endif 138#endif
139 139
140struct x86_io_apic_ops x86_io_apic_ops = { 140struct x86_io_apic_ops x86_io_apic_ops __ro_after_init = {
141 .read = native_io_apic_read, 141 .read = native_io_apic_read,
142 .disable = native_disable_io_apic, 142 .disable = native_disable_io_apic,
143}; 143};