aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/aperture_64.c59
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c2
-rw-r--r--arch/x86/kernel/apm_32.c11
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c6
-rw-r--r--arch/x86/kernel/cpu/microcode/core_early.c37
-rw-r--r--arch/x86/kernel/cpu/perf_event.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c22
-rw-r--r--arch/x86/kernel/cpu/rdrand.c1
-rw-r--r--arch/x86/kernel/entry_64.S185
-rw-r--r--arch/x86/kernel/ldt.c4
-rw-r--r--arch/x86/kernel/uprobes.c551
12 files changed, 528 insertions, 352 deletions
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 9fa8aa051f54..76164e173a24 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -10,6 +10,8 @@
10 * 10 *
11 * Copyright 2002 Andi Kleen, SuSE Labs. 11 * Copyright 2002 Andi Kleen, SuSE Labs.
12 */ 12 */
13#define pr_fmt(fmt) "AGP: " fmt
14
13#include <linux/kernel.h> 15#include <linux/kernel.h>
14#include <linux/types.h> 16#include <linux/types.h>
15#include <linux/init.h> 17#include <linux/init.h>
@@ -75,14 +77,13 @@ static u32 __init allocate_aperture(void)
75 addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR, 77 addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR,
76 aper_size, aper_size); 78 aper_size, aper_size);
77 if (!addr) { 79 if (!addr) {
78 printk(KERN_ERR 80 pr_err("Cannot allocate aperture memory hole [mem %#010lx-%#010lx] (%uKB)\n",
79 "Cannot allocate aperture memory hole (%lx,%uK)\n", 81 addr, addr + aper_size - 1, aper_size >> 10);
80 addr, aper_size>>10);
81 return 0; 82 return 0;
82 } 83 }
83 memblock_reserve(addr, aper_size); 84 memblock_reserve(addr, aper_size);
84 printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", 85 pr_info("Mapping aperture over RAM [mem %#010lx-%#010lx] (%uKB)\n",
85 aper_size >> 10, addr); 86 addr, addr + aper_size - 1, aper_size >> 10);
86 register_nosave_region(addr >> PAGE_SHIFT, 87 register_nosave_region(addr >> PAGE_SHIFT,
87 (addr+aper_size) >> PAGE_SHIFT); 88 (addr+aper_size) >> PAGE_SHIFT);
88 89
@@ -126,10 +127,11 @@ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order)
126 u64 aper; 127 u64 aper;
127 u32 old_order; 128 u32 old_order;
128 129
129 printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", bus, slot, func); 130 pr_info("pci 0000:%02x:%02x:%02x: AGP bridge\n", bus, slot, func);
130 apsizereg = read_pci_config_16(bus, slot, func, cap + 0x14); 131 apsizereg = read_pci_config_16(bus, slot, func, cap + 0x14);
131 if (apsizereg == 0xffffffff) { 132 if (apsizereg == 0xffffffff) {
132 printk(KERN_ERR "APSIZE in AGP bridge unreadable\n"); 133 pr_err("pci 0000:%02x:%02x.%d: APSIZE unreadable\n",
134 bus, slot, func);
133 return 0; 135 return 0;
134 } 136 }
135 137
@@ -153,16 +155,18 @@ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order)
153 * On some sick chips, APSIZE is 0. It means it wants 4G 155 * On some sick chips, APSIZE is 0. It means it wants 4G
154 * so let double check that order, and lets trust AMD NB settings: 156 * so let double check that order, and lets trust AMD NB settings:
155 */ 157 */
156 printk(KERN_INFO "Aperture from AGP @ %Lx old size %u MB\n", 158 pr_info("pci 0000:%02x:%02x.%d: AGP aperture [bus addr %#010Lx-%#010Lx] (old size %uMB)\n",
157 aper, 32 << old_order); 159 bus, slot, func, aper, aper + (32ULL << (old_order + 20)) - 1,
160 32 << old_order);
158 if (aper + (32ULL<<(20 + *order)) > 0x100000000ULL) { 161 if (aper + (32ULL<<(20 + *order)) > 0x100000000ULL) {
159 printk(KERN_INFO "Aperture size %u MB (APSIZE %x) is not right, using settings from NB\n", 162 pr_info("pci 0000:%02x:%02x.%d: AGP aperture size %uMB (APSIZE %#x) is not right, using settings from NB\n",
160 32 << *order, apsizereg); 163 bus, slot, func, 32 << *order, apsizereg);
161 *order = old_order; 164 *order = old_order;
162 } 165 }
163 166
164 printk(KERN_INFO "Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", 167 pr_info("pci 0000:%02x:%02x.%d: AGP aperture [bus addr %#010Lx-%#010Lx] (%uMB, APSIZE %#x)\n",
165 aper, 32 << *order, apsizereg); 168 bus, slot, func, aper, aper + (32ULL << (*order + 20)) - 1,
169 32 << *order, apsizereg);
166 170
167 if (!aperture_valid(aper, (32*1024*1024) << *order, 32<<20)) 171 if (!aperture_valid(aper, (32*1024*1024) << *order, 32<<20))
168 return 0; 172 return 0;
@@ -218,7 +222,7 @@ static u32 __init search_agp_bridge(u32 *order, int *valid_agp)
218 } 222 }
219 } 223 }
220 } 224 }
221 printk(KERN_INFO "No AGP bridge found\n"); 225 pr_info("No AGP bridge found\n");
222 226
223 return 0; 227 return 0;
224} 228}
@@ -310,7 +314,8 @@ void __init early_gart_iommu_check(void)
310 if (e820_any_mapped(aper_base, aper_base + aper_size, 314 if (e820_any_mapped(aper_base, aper_base + aper_size,
311 E820_RAM)) { 315 E820_RAM)) {
312 /* reserve it, so we can reuse it in second kernel */ 316 /* reserve it, so we can reuse it in second kernel */
313 printk(KERN_INFO "update e820 for GART\n"); 317 pr_info("e820: reserve [mem %#010Lx-%#010Lx] for GART\n",
318 aper_base, aper_base + aper_size - 1);
314 e820_add_region(aper_base, aper_size, E820_RESERVED); 319 e820_add_region(aper_base, aper_size, E820_RESERVED);
315 update_e820(); 320 update_e820();
316 } 321 }
@@ -354,7 +359,7 @@ int __init gart_iommu_hole_init(void)
354 !early_pci_allowed()) 359 !early_pci_allowed())
355 return -ENODEV; 360 return -ENODEV;
356 361
357 printk(KERN_INFO "Checking aperture...\n"); 362 pr_info("Checking aperture...\n");
358 363
359 if (!fallback_aper_force) 364 if (!fallback_aper_force)
360 agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp); 365 agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp);
@@ -395,8 +400,9 @@ int __init gart_iommu_hole_init(void)
395 aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; 400 aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff;
396 aper_base <<= 25; 401 aper_base <<= 25;
397 402
398 printk(KERN_INFO "Node %d: aperture @ %Lx size %u MB\n", 403 pr_info("Node %d: aperture [bus addr %#010Lx-%#010Lx] (%uMB)\n",
399 node, aper_base, aper_size >> 20); 404 node, aper_base, aper_base + aper_size - 1,
405 aper_size >> 20);
400 node++; 406 node++;
401 407
402 if (!aperture_valid(aper_base, aper_size, 64<<20)) { 408 if (!aperture_valid(aper_base, aper_size, 64<<20)) {
@@ -407,9 +413,9 @@ int __init gart_iommu_hole_init(void)
407 if (!no_iommu && 413 if (!no_iommu &&
408 max_pfn > MAX_DMA32_PFN && 414 max_pfn > MAX_DMA32_PFN &&
409 !printed_gart_size_msg) { 415 !printed_gart_size_msg) {
410 printk(KERN_ERR "you are using iommu with agp, but GART size is less than 64M\n"); 416 pr_err("you are using iommu with agp, but GART size is less than 64MB\n");
411 printk(KERN_ERR "please increase GART size in your BIOS setup\n"); 417 pr_err("please increase GART size in your BIOS setup\n");
412 printk(KERN_ERR "if BIOS doesn't have that option, contact your HW vendor!\n"); 418 pr_err("if BIOS doesn't have that option, contact your HW vendor!\n");
413 printed_gart_size_msg = 1; 419 printed_gart_size_msg = 1;
414 } 420 }
415 } else { 421 } else {
@@ -446,13 +452,10 @@ out:
446 force_iommu || 452 force_iommu ||
447 valid_agp || 453 valid_agp ||
448 fallback_aper_force) { 454 fallback_aper_force) {
449 printk(KERN_INFO 455 pr_info("Your BIOS doesn't leave a aperture memory hole\n");
450 "Your BIOS doesn't leave a aperture memory hole\n"); 456 pr_info("Please enable the IOMMU option in the BIOS setup\n");
451 printk(KERN_INFO 457 pr_info("This costs you %dMB of RAM\n",
452 "Please enable the IOMMU option in the BIOS setup\n"); 458 32 << fallback_aper_order);
453 printk(KERN_INFO
454 "This costs you %d MB of RAM\n",
455 32 << fallback_aper_order);
456 459
457 aper_order = fallback_aper_order; 460 aper_order = fallback_aper_order;
458 aper_alloc = allocate_aperture(); 461 aper_alloc = allocate_aperture();
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index a698d7165c96..eab67047dec3 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -57,7 +57,7 @@ void arch_trigger_all_cpu_backtrace(void)
57 } 57 }
58 58
59 clear_bit(0, &backtrace_flag); 59 clear_bit(0, &backtrace_flag);
60 smp_mb__after_clear_bit(); 60 smp_mb__after_atomic();
61} 61}
62 62
63static int __kprobes 63static int __kprobes
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 3ab03430211d..f3a1f04ed4cb 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -844,21 +844,10 @@ static int apm_do_idle(void)
844 int polling; 844 int polling;
845 int err = 0; 845 int err = 0;
846 846
847 polling = !!(current_thread_info()->status & TS_POLLING);
848 if (polling) {
849 current_thread_info()->status &= ~TS_POLLING;
850 /*
851 * TS_POLLING-cleared state must be visible before we
852 * test NEED_RESCHED:
853 */
854 smp_mb();
855 }
856 if (!need_resched()) { 847 if (!need_resched()) {
857 idled = 1; 848 idled = 1;
858 ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax, &err); 849 ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax, &err);
859 } 850 }
860 if (polling)
861 current_thread_info()->status |= TS_POLLING;
862 851
863 if (!idled) 852 if (!idled)
864 return 0; 853 return 0;
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 15c987698b0f..dd9d6190b08d 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -97,6 +97,9 @@ MODULE_LICENSE("GPL");
97 97
98static struct microcode_ops *microcode_ops; 98static struct microcode_ops *microcode_ops;
99 99
100bool dis_ucode_ldr;
101module_param(dis_ucode_ldr, bool, 0);
102
100/* 103/*
101 * Synchronization. 104 * Synchronization.
102 * 105 *
@@ -546,6 +549,9 @@ static int __init microcode_init(void)
546 struct cpuinfo_x86 *c = &cpu_data(0); 549 struct cpuinfo_x86 *c = &cpu_data(0);
547 int error; 550 int error;
548 551
552 if (dis_ucode_ldr)
553 return 0;
554
549 if (c->x86_vendor == X86_VENDOR_INTEL) 555 if (c->x86_vendor == X86_VENDOR_INTEL)
550 microcode_ops = init_intel_microcode(); 556 microcode_ops = init_intel_microcode();
551 else if (c->x86_vendor == X86_VENDOR_AMD) 557 else if (c->x86_vendor == X86_VENDOR_AMD)
diff --git a/arch/x86/kernel/cpu/microcode/core_early.c b/arch/x86/kernel/cpu/microcode/core_early.c
index be7f8514f577..5f28a64e71ea 100644
--- a/arch/x86/kernel/cpu/microcode/core_early.c
+++ b/arch/x86/kernel/cpu/microcode/core_early.c
@@ -17,9 +17,11 @@
17 * 2 of the License, or (at your option) any later version. 17 * 2 of the License, or (at your option) any later version.
18 */ 18 */
19#include <linux/module.h> 19#include <linux/module.h>
20#include <asm/microcode.h>
20#include <asm/microcode_intel.h> 21#include <asm/microcode_intel.h>
21#include <asm/microcode_amd.h> 22#include <asm/microcode_amd.h>
22#include <asm/processor.h> 23#include <asm/processor.h>
24#include <asm/cmdline.h>
23 25
24#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24)) 26#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
25#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u') 27#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u')
@@ -72,10 +74,33 @@ static int x86_family(void)
72 return x86; 74 return x86;
73} 75}
74 76
77static bool __init check_loader_disabled_bsp(void)
78{
79#ifdef CONFIG_X86_32
80 const char *cmdline = (const char *)__pa_nodebug(boot_command_line);
81 const char *opt = "dis_ucode_ldr";
82 const char *option = (const char *)__pa_nodebug(opt);
83 bool *res = (bool *)__pa_nodebug(&dis_ucode_ldr);
84
85#else /* CONFIG_X86_64 */
86 const char *cmdline = boot_command_line;
87 const char *option = "dis_ucode_ldr";
88 bool *res = &dis_ucode_ldr;
89#endif
90
91 if (cmdline_find_option_bool(cmdline, option))
92 *res = true;
93
94 return *res;
95}
96
75void __init load_ucode_bsp(void) 97void __init load_ucode_bsp(void)
76{ 98{
77 int vendor, x86; 99 int vendor, x86;
78 100
101 if (check_loader_disabled_bsp())
102 return;
103
79 if (!have_cpuid_p()) 104 if (!have_cpuid_p())
80 return; 105 return;
81 106
@@ -96,10 +121,22 @@ void __init load_ucode_bsp(void)
96 } 121 }
97} 122}
98 123
124static bool check_loader_disabled_ap(void)
125{
126#ifdef CONFIG_X86_32
127 return __pa_nodebug(dis_ucode_ldr);
128#else
129 return dis_ucode_ldr;
130#endif
131}
132
99void load_ucode_ap(void) 133void load_ucode_ap(void)
100{ 134{
101 int vendor, x86; 135 int vendor, x86;
102 136
137 if (check_loader_disabled_ap())
138 return;
139
103 if (!have_cpuid_p()) 140 if (!have_cpuid_p())
104 return; 141 return;
105 142
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ae407f7226c8..89f3b7c1af20 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -721,6 +721,7 @@ int perf_assign_events(struct perf_event **events, int n,
721 721
722 return sched.state.unassigned; 722 return sched.state.unassigned;
723} 723}
724EXPORT_SYMBOL_GPL(perf_assign_events);
724 725
725int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) 726int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
726{ 727{
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index aa333d966886..adb02aa62af5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -169,7 +169,6 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
169{ 169{
170 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 170 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
171 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 171 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
172 FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
173 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */ 172 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
174 EVENT_CONSTRAINT_END 173 EVENT_CONSTRAINT_END
175}; 174};
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index ae96cfa5eddd..980970cb744d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -108,15 +108,31 @@ static u64 precise_store_data(u64 status)
108 return val; 108 return val;
109} 109}
110 110
111static u64 precise_store_data_hsw(u64 status) 111static u64 precise_store_data_hsw(struct perf_event *event, u64 status)
112{ 112{
113 union perf_mem_data_src dse; 113 union perf_mem_data_src dse;
114 u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK;
114 115
115 dse.val = 0; 116 dse.val = 0;
116 dse.mem_op = PERF_MEM_OP_STORE; 117 dse.mem_op = PERF_MEM_OP_STORE;
117 dse.mem_lvl = PERF_MEM_LVL_NA; 118 dse.mem_lvl = PERF_MEM_LVL_NA;
119
120 /*
121 * L1 info only valid for following events:
122 *
123 * MEM_UOPS_RETIRED.STLB_MISS_STORES
124 * MEM_UOPS_RETIRED.LOCK_STORES
125 * MEM_UOPS_RETIRED.SPLIT_STORES
126 * MEM_UOPS_RETIRED.ALL_STORES
127 */
128 if (cfg != 0x12d0 && cfg != 0x22d0 && cfg != 0x42d0 && cfg != 0x82d0)
129 return dse.mem_lvl;
130
118 if (status & 1) 131 if (status & 1)
119 dse.mem_lvl = PERF_MEM_LVL_L1; 132 dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
133 else
134 dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
135
120 /* Nothing else supported. Sorry. */ 136 /* Nothing else supported. Sorry. */
121 return dse.val; 137 return dse.val;
122} 138}
@@ -887,7 +903,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
887 data.data_src.val = load_latency_data(pebs->dse); 903 data.data_src.val = load_latency_data(pebs->dse);
888 else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) 904 else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
889 data.data_src.val = 905 data.data_src.val =
890 precise_store_data_hsw(pebs->dse); 906 precise_store_data_hsw(event, pebs->dse);
891 else 907 else
892 data.data_src.val = precise_store_data(pebs->dse); 908 data.data_src.val = precise_store_data(pebs->dse);
893 } 909 }
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index 384df5105fbc..136ac74dee82 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -27,6 +27,7 @@
27static int __init x86_rdrand_setup(char *s) 27static int __init x86_rdrand_setup(char *s)
28{ 28{
29 setup_clear_cpu_cap(X86_FEATURE_RDRAND); 29 setup_clear_cpu_cap(X86_FEATURE_RDRAND);
30 setup_clear_cpu_cap(X86_FEATURE_RDSEED);
30 return 1; 31 return 1;
31} 32}
32__setup("nordrand", x86_rdrand_setup); 33__setup("nordrand", x86_rdrand_setup);
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1e96c3628bf2..be846d2468f7 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -36,7 +36,7 @@
36 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack 36 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
37 * frame that is otherwise undefined after a SYSCALL 37 * frame that is otherwise undefined after a SYSCALL
38 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. 38 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
39 * - errorentry/paranoidentry/zeroentry - Define exception entry points. 39 * - idtentry - Define exception entry points.
40 */ 40 */
41 41
42#include <linux/linkage.h> 42#include <linux/linkage.h>
@@ -1203,125 +1203,100 @@ apicinterrupt IRQ_WORK_VECTOR \
1203/* 1203/*
1204 * Exception entry points. 1204 * Exception entry points.
1205 */ 1205 */
1206.macro zeroentry sym do_sym 1206#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
1207ENTRY(\sym)
1208 INTR_FRAME
1209 ASM_CLAC
1210 PARAVIRT_ADJUST_EXCEPTION_FRAME
1211 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1212 subq $ORIG_RAX-R15, %rsp
1213 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1214 call error_entry
1215 DEFAULT_FRAME 0
1216 movq %rsp,%rdi /* pt_regs pointer */
1217 xorl %esi,%esi /* no error code */
1218 call \do_sym
1219 jmp error_exit /* %ebx: no swapgs flag */
1220 CFI_ENDPROC
1221END(\sym)
1222.endm
1223 1207
1224.macro paranoidzeroentry sym do_sym 1208.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
1225ENTRY(\sym) 1209ENTRY(\sym)
1226 INTR_FRAME 1210 /* Sanity check */
1227 ASM_CLAC 1211 .if \shift_ist != -1 && \paranoid == 0
1228 PARAVIRT_ADJUST_EXCEPTION_FRAME 1212 .error "using shift_ist requires paranoid=1"
1229 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ 1213 .endif
1230 subq $ORIG_RAX-R15, %rsp
1231 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1232 call save_paranoid
1233 TRACE_IRQS_OFF
1234 movq %rsp,%rdi /* pt_regs pointer */
1235 xorl %esi,%esi /* no error code */
1236 call \do_sym
1237 jmp paranoid_exit /* %ebx: no swapgs flag */
1238 CFI_ENDPROC
1239END(\sym)
1240.endm
1241 1214
1242#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) 1215 .if \has_error_code
1243.macro paranoidzeroentry_ist sym do_sym ist 1216 XCPT_FRAME
1244ENTRY(\sym) 1217 .else
1245 INTR_FRAME 1218 INTR_FRAME
1246 ASM_CLAC 1219 .endif
1247 PARAVIRT_ADJUST_EXCEPTION_FRAME
1248 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1249 subq $ORIG_RAX-R15, %rsp
1250 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1251 call save_paranoid
1252 TRACE_IRQS_OFF_DEBUG
1253 movq %rsp,%rdi /* pt_regs pointer */
1254 xorl %esi,%esi /* no error code */
1255 subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
1256 call \do_sym
1257 addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
1258 jmp paranoid_exit /* %ebx: no swapgs flag */
1259 CFI_ENDPROC
1260END(\sym)
1261.endm
1262 1220
1263.macro errorentry sym do_sym
1264ENTRY(\sym)
1265 XCPT_FRAME
1266 ASM_CLAC 1221 ASM_CLAC
1267 PARAVIRT_ADJUST_EXCEPTION_FRAME 1222 PARAVIRT_ADJUST_EXCEPTION_FRAME
1223
1224 .ifeq \has_error_code
1225 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1226 .endif
1227
1268 subq $ORIG_RAX-R15, %rsp 1228 subq $ORIG_RAX-R15, %rsp
1269 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 1229 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1230
1231 .if \paranoid
1232 call save_paranoid
1233 .else
1270 call error_entry 1234 call error_entry
1235 .endif
1236
1271 DEFAULT_FRAME 0 1237 DEFAULT_FRAME 0
1238
1239 .if \paranoid
1240 .if \shift_ist != -1
1241 TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */
1242 .else
1243 TRACE_IRQS_OFF
1244 .endif
1245 .endif
1246
1272 movq %rsp,%rdi /* pt_regs pointer */ 1247 movq %rsp,%rdi /* pt_regs pointer */
1248
1249 .if \has_error_code
1273 movq ORIG_RAX(%rsp),%rsi /* get error code */ 1250 movq ORIG_RAX(%rsp),%rsi /* get error code */
1274 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ 1251 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
1252 .else
1253 xorl %esi,%esi /* no error code */
1254 .endif
1255
1256 .if \shift_ist != -1
1257 subq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist)
1258 .endif
1259
1275 call \do_sym 1260 call \do_sym
1261
1262 .if \shift_ist != -1
1263 addq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist)
1264 .endif
1265
1266 .if \paranoid
1267 jmp paranoid_exit /* %ebx: no swapgs flag */
1268 .else
1276 jmp error_exit /* %ebx: no swapgs flag */ 1269 jmp error_exit /* %ebx: no swapgs flag */
1270 .endif
1271
1277 CFI_ENDPROC 1272 CFI_ENDPROC
1278END(\sym) 1273END(\sym)
1279.endm 1274.endm
1280 1275
1281#ifdef CONFIG_TRACING 1276#ifdef CONFIG_TRACING
1282.macro trace_errorentry sym do_sym 1277.macro trace_idtentry sym do_sym has_error_code:req
1283errorentry trace(\sym) trace(\do_sym) 1278idtentry trace(\sym) trace(\do_sym) has_error_code=\has_error_code
1284errorentry \sym \do_sym 1279idtentry \sym \do_sym has_error_code=\has_error_code
1285.endm 1280.endm
1286#else 1281#else
1287.macro trace_errorentry sym do_sym 1282.macro trace_idtentry sym do_sym has_error_code:req
1288errorentry \sym \do_sym 1283idtentry \sym \do_sym has_error_code=\has_error_code
1289.endm 1284.endm
1290#endif 1285#endif
1291 1286
1292 /* error code is on the stack already */ 1287idtentry divide_error do_divide_error has_error_code=0
1293.macro paranoiderrorentry sym do_sym 1288idtentry overflow do_overflow has_error_code=0
1294ENTRY(\sym) 1289idtentry bounds do_bounds has_error_code=0
1295 XCPT_FRAME 1290idtentry invalid_op do_invalid_op has_error_code=0
1296 ASM_CLAC 1291idtentry device_not_available do_device_not_available has_error_code=0
1297 PARAVIRT_ADJUST_EXCEPTION_FRAME 1292idtentry double_fault do_double_fault has_error_code=1 paranoid=1
1298 subq $ORIG_RAX-R15, %rsp 1293idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0
1299 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 1294idtentry invalid_TSS do_invalid_TSS has_error_code=1
1300 call save_paranoid 1295idtentry segment_not_present do_segment_not_present has_error_code=1
1301 DEFAULT_FRAME 0 1296idtentry spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0
1302 TRACE_IRQS_OFF 1297idtentry coprocessor_error do_coprocessor_error has_error_code=0
1303 movq %rsp,%rdi /* pt_regs pointer */ 1298idtentry alignment_check do_alignment_check has_error_code=1
1304 movq ORIG_RAX(%rsp),%rsi /* get error code */ 1299idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0
1305 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
1306 call \do_sym
1307 jmp paranoid_exit /* %ebx: no swapgs flag */
1308 CFI_ENDPROC
1309END(\sym)
1310.endm
1311
1312zeroentry divide_error do_divide_error
1313zeroentry overflow do_overflow
1314zeroentry bounds do_bounds
1315zeroentry invalid_op do_invalid_op
1316zeroentry device_not_available do_device_not_available
1317paranoiderrorentry double_fault do_double_fault
1318zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
1319errorentry invalid_TSS do_invalid_TSS
1320errorentry segment_not_present do_segment_not_present
1321zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
1322zeroentry coprocessor_error do_coprocessor_error
1323errorentry alignment_check do_alignment_check
1324zeroentry simd_coprocessor_error do_simd_coprocessor_error
1325 1300
1326 1301
1327 /* Reload gs selector with exception handling */ 1302 /* Reload gs selector with exception handling */
@@ -1371,7 +1346,7 @@ ENTRY(do_softirq_own_stack)
1371END(do_softirq_own_stack) 1346END(do_softirq_own_stack)
1372 1347
1373#ifdef CONFIG_XEN 1348#ifdef CONFIG_XEN
1374zeroentry xen_hypervisor_callback xen_do_hypervisor_callback 1349idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
1375 1350
1376/* 1351/*
1377 * A note on the "critical region" in our callback handler. 1352 * A note on the "critical region" in our callback handler.
@@ -1482,21 +1457,21 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
1482 */ 1457 */
1483 .pushsection .kprobes.text, "ax" 1458 .pushsection .kprobes.text, "ax"
1484 1459
1485paranoidzeroentry_ist debug do_debug DEBUG_STACK 1460idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
1486paranoidzeroentry_ist int3 do_int3 DEBUG_STACK 1461idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
1487paranoiderrorentry stack_segment do_stack_segment 1462idtentry stack_segment do_stack_segment has_error_code=1 paranoid=1
1488#ifdef CONFIG_XEN 1463#ifdef CONFIG_XEN
1489zeroentry xen_debug do_debug 1464idtentry xen_debug do_debug has_error_code=0
1490zeroentry xen_int3 do_int3 1465idtentry xen_int3 do_int3 has_error_code=0
1491errorentry xen_stack_segment do_stack_segment 1466idtentry xen_stack_segment do_stack_segment has_error_code=1
1492#endif 1467#endif
1493errorentry general_protection do_general_protection 1468idtentry general_protection do_general_protection has_error_code=1
1494trace_errorentry page_fault do_page_fault 1469trace_idtentry page_fault do_page_fault has_error_code=1
1495#ifdef CONFIG_KVM_GUEST 1470#ifdef CONFIG_KVM_GUEST
1496errorentry async_page_fault do_async_page_fault 1471idtentry async_page_fault do_async_page_fault has_error_code=1
1497#endif 1472#endif
1498#ifdef CONFIG_X86_MCE 1473#ifdef CONFIG_X86_MCE
1499paranoidzeroentry machine_check *machine_check_vector(%rip) 1474idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
1500#endif 1475#endif
1501 1476
1502 /* 1477 /*
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index af1d14a9ebda..dcbbaa165bde 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -20,6 +20,8 @@
20#include <asm/mmu_context.h> 20#include <asm/mmu_context.h>
21#include <asm/syscalls.h> 21#include <asm/syscalls.h>
22 22
23int sysctl_ldt16 = 0;
24
23#ifdef CONFIG_SMP 25#ifdef CONFIG_SMP
24static void flush_ldt(void *current_mm) 26static void flush_ldt(void *current_mm)
25{ 27{
@@ -234,7 +236,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
234 * IRET leaking the high bits of the kernel stack address. 236 * IRET leaking the high bits of the kernel stack address.
235 */ 237 */
236#ifdef CONFIG_X86_64 238#ifdef CONFIG_X86_64
237 if (!ldt_info.seg_32bit) { 239 if (!ldt_info.seg_32bit && !sysctl_ldt16) {
238 error = -EINVAL; 240 error = -EINVAL;
239 goto out_unlock; 241 goto out_unlock;
240 } 242 }
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 2ed845928b5f..ace22916ade3 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -53,7 +53,7 @@
53#define OPCODE1(insn) ((insn)->opcode.bytes[0]) 53#define OPCODE1(insn) ((insn)->opcode.bytes[0])
54#define OPCODE2(insn) ((insn)->opcode.bytes[1]) 54#define OPCODE2(insn) ((insn)->opcode.bytes[1])
55#define OPCODE3(insn) ((insn)->opcode.bytes[2]) 55#define OPCODE3(insn) ((insn)->opcode.bytes[2])
56#define MODRM_REG(insn) X86_MODRM_REG(insn->modrm.value) 56#define MODRM_REG(insn) X86_MODRM_REG((insn)->modrm.value)
57 57
58#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ 58#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
59 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ 59 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
@@ -229,63 +229,6 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
229 return -ENOTSUPP; 229 return -ENOTSUPP;
230} 230}
231 231
232/*
233 * Figure out which fixups arch_uprobe_post_xol() will need to perform, and
234 * annotate arch_uprobe->fixups accordingly. To start with,
235 * arch_uprobe->fixups is either zero or it reflects rip-related fixups.
236 */
237static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
238{
239 bool fix_ip = true, fix_call = false; /* defaults */
240 int reg;
241
242 insn_get_opcode(insn); /* should be a nop */
243
244 switch (OPCODE1(insn)) {
245 case 0x9d:
246 /* popf */
247 auprobe->fixups |= UPROBE_FIX_SETF;
248 break;
249 case 0xc3: /* ret/lret */
250 case 0xcb:
251 case 0xc2:
252 case 0xca:
253 /* ip is correct */
254 fix_ip = false;
255 break;
256 case 0xe8: /* call relative - Fix return addr */
257 fix_call = true;
258 break;
259 case 0x9a: /* call absolute - Fix return addr, not ip */
260 fix_call = true;
261 fix_ip = false;
262 break;
263 case 0xff:
264 insn_get_modrm(insn);
265 reg = MODRM_REG(insn);
266 if (reg == 2 || reg == 3) {
267 /* call or lcall, indirect */
268 /* Fix return addr; ip is correct. */
269 fix_call = true;
270 fix_ip = false;
271 } else if (reg == 4 || reg == 5) {
272 /* jmp or ljmp, indirect */
273 /* ip is correct. */
274 fix_ip = false;
275 }
276 break;
277 case 0xea: /* jmp absolute -- ip is correct */
278 fix_ip = false;
279 break;
280 default:
281 break;
282 }
283 if (fix_ip)
284 auprobe->fixups |= UPROBE_FIX_IP;
285 if (fix_call)
286 auprobe->fixups |= UPROBE_FIX_CALL;
287}
288
289#ifdef CONFIG_X86_64 232#ifdef CONFIG_X86_64
290/* 233/*
291 * If arch_uprobe->insn doesn't use rip-relative addressing, return 234 * If arch_uprobe->insn doesn't use rip-relative addressing, return
@@ -310,15 +253,11 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
310 * - The displacement is always 4 bytes. 253 * - The displacement is always 4 bytes.
311 */ 254 */
312static void 255static void
313handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) 256handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
314{ 257{
315 u8 *cursor; 258 u8 *cursor;
316 u8 reg; 259 u8 reg;
317 260
318 if (mm->context.ia32_compat)
319 return;
320
321 auprobe->rip_rela_target_address = 0x0;
322 if (!insn_rip_relative(insn)) 261 if (!insn_rip_relative(insn))
323 return; 262 return;
324 263
@@ -372,7 +311,48 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct ins
372 cursor++; 311 cursor++;
373 memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes); 312 memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes);
374 } 313 }
375 return; 314}
315
316/*
317 * If we're emulating a rip-relative instruction, save the contents
318 * of the scratch register and store the target address in that register.
319 */
320static void
321pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
322 struct arch_uprobe_task *autask)
323{
324 if (auprobe->fixups & UPROBE_FIX_RIP_AX) {
325 autask->saved_scratch_register = regs->ax;
326 regs->ax = current->utask->vaddr;
327 regs->ax += auprobe->rip_rela_target_address;
328 } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) {
329 autask->saved_scratch_register = regs->cx;
330 regs->cx = current->utask->vaddr;
331 regs->cx += auprobe->rip_rela_target_address;
332 }
333}
334
335static void
336handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)
337{
338 if (auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) {
339 struct arch_uprobe_task *autask;
340
341 autask = &current->utask->autask;
342 if (auprobe->fixups & UPROBE_FIX_RIP_AX)
343 regs->ax = autask->saved_scratch_register;
344 else
345 regs->cx = autask->saved_scratch_register;
346
347 /*
348 * The original instruction includes a displacement, and so
349 * is 4 bytes longer than what we've just single-stepped.
350 * Caller may need to apply other fixups to handle stuff
351 * like "jmpq *...(%rip)" and "callq *...(%rip)".
352 */
353 if (correction)
354 *correction += 4;
355 }
376} 356}
377 357
378static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn) 358static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn)
@@ -401,9 +381,19 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm,
401 return validate_insn_64bits(auprobe, insn); 381 return validate_insn_64bits(auprobe, insn);
402} 382}
403#else /* 32-bit: */ 383#else /* 32-bit: */
404static void handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) 384/*
385 * No RIP-relative addressing on 32-bit
386 */
387static void handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
388{
389}
390static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
391 struct arch_uprobe_task *autask)
392{
393}
394static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
395 long *correction)
405{ 396{
406 /* No RIP-relative addressing on 32-bit */
407} 397}
408 398
409static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) 399static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
@@ -412,141 +402,311 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm,
412} 402}
413#endif /* CONFIG_X86_64 */ 403#endif /* CONFIG_X86_64 */
414 404
415/** 405struct uprobe_xol_ops {
416 * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. 406 bool (*emulate)(struct arch_uprobe *, struct pt_regs *);
417 * @mm: the probed address space. 407 int (*pre_xol)(struct arch_uprobe *, struct pt_regs *);
418 * @arch_uprobe: the probepoint information. 408 int (*post_xol)(struct arch_uprobe *, struct pt_regs *);
419 * @addr: virtual address at which to install the probepoint 409};
420 * Return 0 on success or a -ve number on error. 410
411static inline int sizeof_long(void)
412{
413 return is_ia32_task() ? 4 : 8;
414}
415
416static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
417{
418 pre_xol_rip_insn(auprobe, regs, &current->utask->autask);
419 return 0;
420}
421
422/*
423 * Adjust the return address pushed by a call insn executed out of line.
421 */ 424 */
422int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) 425static int adjust_ret_addr(unsigned long sp, long correction)
423{ 426{
424 int ret; 427 int rasize = sizeof_long();
425 struct insn insn; 428 long ra;
426 429
427 auprobe->fixups = 0; 430 if (copy_from_user(&ra, (void __user *)sp, rasize))
428 ret = validate_insn_bits(auprobe, mm, &insn); 431 return -EFAULT;
429 if (ret != 0)
430 return ret;
431 432
432 handle_riprel_insn(auprobe, mm, &insn); 433 ra += correction;
433 prepare_fixups(auprobe, &insn); 434 if (copy_to_user((void __user *)sp, &ra, rasize))
435 return -EFAULT;
434 436
435 return 0; 437 return 0;
436} 438}
437 439
438#ifdef CONFIG_X86_64 440static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
439/*
440 * If we're emulating a rip-relative instruction, save the contents
441 * of the scratch register and store the target address in that register.
442 */
443static void
444pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
445 struct arch_uprobe_task *autask)
446{ 441{
447 if (auprobe->fixups & UPROBE_FIX_RIP_AX) { 442 struct uprobe_task *utask = current->utask;
448 autask->saved_scratch_register = regs->ax; 443 long correction = (long)(utask->vaddr - utask->xol_vaddr);
449 regs->ax = current->utask->vaddr; 444
450 regs->ax += auprobe->rip_rela_target_address; 445 handle_riprel_post_xol(auprobe, regs, &correction);
451 } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) { 446 if (auprobe->fixups & UPROBE_FIX_IP)
452 autask->saved_scratch_register = regs->cx; 447 regs->ip += correction;
453 regs->cx = current->utask->vaddr; 448
454 regs->cx += auprobe->rip_rela_target_address; 449 if (auprobe->fixups & UPROBE_FIX_CALL) {
450 if (adjust_ret_addr(regs->sp, correction)) {
451 regs->sp += sizeof_long();
452 return -ERESTART;
453 }
455 } 454 }
455
456 return 0;
456} 457}
457#else 458
458static void 459static struct uprobe_xol_ops default_xol_ops = {
459pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, 460 .pre_xol = default_pre_xol_op,
460 struct arch_uprobe_task *autask) 461 .post_xol = default_post_xol_op,
462};
463
464static bool branch_is_call(struct arch_uprobe *auprobe)
461{ 465{
462 /* No RIP-relative addressing on 32-bit */ 466 return auprobe->branch.opc1 == 0xe8;
463} 467}
464#endif
465 468
466/* 469#define CASE_COND \
467 * arch_uprobe_pre_xol - prepare to execute out of line. 470 COND(70, 71, XF(OF)) \
468 * @auprobe: the probepoint information. 471 COND(72, 73, XF(CF)) \
469 * @regs: reflects the saved user state of current task. 472 COND(74, 75, XF(ZF)) \
470 */ 473 COND(78, 79, XF(SF)) \
471int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) 474 COND(7a, 7b, XF(PF)) \
472{ 475 COND(76, 77, XF(CF) || XF(ZF)) \
473 struct arch_uprobe_task *autask; 476 COND(7c, 7d, XF(SF) != XF(OF)) \
477 COND(7e, 7f, XF(ZF) || XF(SF) != XF(OF))
474 478
475 autask = &current->utask->autask; 479#define COND(op_y, op_n, expr) \
476 autask->saved_trap_nr = current->thread.trap_nr; 480 case 0x ## op_y: DO((expr) != 0) \
477 current->thread.trap_nr = UPROBE_TRAP_NR; 481 case 0x ## op_n: DO((expr) == 0)
478 regs->ip = current->utask->xol_vaddr;
479 pre_xol_rip_insn(auprobe, regs, autask);
480 482
481 autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF); 483#define XF(xf) (!!(flags & X86_EFLAGS_ ## xf))
482 regs->flags |= X86_EFLAGS_TF;
483 if (test_tsk_thread_flag(current, TIF_BLOCKSTEP))
484 set_task_blockstep(current, false);
485 484
486 return 0; 485static bool is_cond_jmp_opcode(u8 opcode)
486{
487 switch (opcode) {
488 #define DO(expr) \
489 return true;
490 CASE_COND
491 #undef DO
492
493 default:
494 return false;
495 }
487} 496}
488 497
489/* 498static bool check_jmp_cond(struct arch_uprobe *auprobe, struct pt_regs *regs)
490 * This function is called by arch_uprobe_post_xol() to adjust the return
491 * address pushed by a call instruction executed out of line.
492 */
493static int adjust_ret_addr(unsigned long sp, long correction)
494{ 499{
495 int rasize, ncopied; 500 unsigned long flags = regs->flags;
496 long ra = 0;
497 501
498 if (is_ia32_task()) 502 switch (auprobe->branch.opc1) {
499 rasize = 4; 503 #define DO(expr) \
500 else 504 return expr;
501 rasize = 8; 505 CASE_COND
506 #undef DO
502 507
503 ncopied = copy_from_user(&ra, (void __user *)sp, rasize); 508 default: /* not a conditional jmp */
504 if (unlikely(ncopied)) 509 return true;
505 return -EFAULT; 510 }
511}
506 512
507 ra += correction; 513#undef XF
508 ncopied = copy_to_user((void __user *)sp, &ra, rasize); 514#undef COND
509 if (unlikely(ncopied)) 515#undef CASE_COND
510 return -EFAULT;
511 516
512 return 0; 517static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
518{
519 unsigned long new_ip = regs->ip += auprobe->branch.ilen;
520 unsigned long offs = (long)auprobe->branch.offs;
521
522 if (branch_is_call(auprobe)) {
523 unsigned long new_sp = regs->sp - sizeof_long();
524 /*
525 * If it fails we execute this (mangled, see the comment in
526 * branch_clear_offset) insn out-of-line. In the likely case
527 * this should trigger the trap, and the probed application
528 * should die or restart the same insn after it handles the
529 * signal, arch_uprobe_post_xol() won't be even called.
530 *
531 * But there is corner case, see the comment in ->post_xol().
532 */
533 if (copy_to_user((void __user *)new_sp, &new_ip, sizeof_long()))
534 return false;
535 regs->sp = new_sp;
536 } else if (!check_jmp_cond(auprobe, regs)) {
537 offs = 0;
538 }
539
540 regs->ip = new_ip + offs;
541 return true;
513} 542}
514 543
515#ifdef CONFIG_X86_64 544static int branch_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
516static bool is_riprel_insn(struct arch_uprobe *auprobe)
517{ 545{
518 return ((auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) != 0); 546 BUG_ON(!branch_is_call(auprobe));
547 /*
548 * We can only get here if branch_emulate_op() failed to push the ret
549 * address _and_ another thread expanded our stack before the (mangled)
550 * "call" insn was executed out-of-line. Just restore ->sp and restart.
551 * We could also restore ->ip and try to call branch_emulate_op() again.
552 */
553 regs->sp += sizeof_long();
554 return -ERESTART;
519} 555}
520 556
521static void 557static void branch_clear_offset(struct arch_uprobe *auprobe, struct insn *insn)
522handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)
523{ 558{
524 if (is_riprel_insn(auprobe)) { 559 /*
525 struct arch_uprobe_task *autask; 560 * Turn this insn into "call 1f; 1:", this is what we will execute
561 * out-of-line if ->emulate() fails. We only need this to generate
562 * a trap, so that the probed task receives the correct signal with
563 * the properly filled siginfo.
564 *
565 * But see the comment in ->post_xol(), in the unlikely case it can
566 * succeed. So we need to ensure that the new ->ip can not fall into
567 * the non-canonical area and trigger #GP.
568 *
569 * We could turn it into (say) "pushf", but then we would need to
570 * divorce ->insn[] and ->ixol[]. We need to preserve the 1st byte
571 * of ->insn[] for set_orig_insn().
572 */
573 memset(auprobe->insn + insn_offset_immediate(insn),
574 0, insn->immediate.nbytes);
575}
526 576
527 autask = &current->utask->autask; 577static struct uprobe_xol_ops branch_xol_ops = {
528 if (auprobe->fixups & UPROBE_FIX_RIP_AX) 578 .emulate = branch_emulate_op,
529 regs->ax = autask->saved_scratch_register; 579 .post_xol = branch_post_xol_op,
530 else 580};
531 regs->cx = autask->saved_scratch_register; 581
582/* Returns -ENOSYS if branch_xol_ops doesn't handle this insn */
583static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
584{
585 u8 opc1 = OPCODE1(insn);
586
587 /* has the side-effect of processing the entire instruction */
588 insn_get_length(insn);
589 if (WARN_ON_ONCE(!insn_complete(insn)))
590 return -ENOEXEC;
591
592 switch (opc1) {
593 case 0xeb: /* jmp 8 */
594 case 0xe9: /* jmp 32 */
595 case 0x90: /* prefix* + nop; same as jmp with .offs = 0 */
596 break;
597
598 case 0xe8: /* call relative */
599 branch_clear_offset(auprobe, insn);
600 break;
532 601
602 case 0x0f:
603 if (insn->opcode.nbytes != 2)
604 return -ENOSYS;
533 /* 605 /*
534 * The original instruction includes a displacement, and so 606 * If it is a "near" conditional jmp, OPCODE2() - 0x10 matches
535 * is 4 bytes longer than what we've just single-stepped. 607 * OPCODE1() of the "short" jmp which checks the same condition.
536 * Fall through to handle stuff like "jmpq *...(%rip)" and
537 * "callq *...(%rip)".
538 */ 608 */
539 if (correction) 609 opc1 = OPCODE2(insn) - 0x10;
540 *correction += 4; 610 default:
611 if (!is_cond_jmp_opcode(opc1))
612 return -ENOSYS;
541 } 613 }
614
615 auprobe->branch.opc1 = opc1;
616 auprobe->branch.ilen = insn->length;
617 auprobe->branch.offs = insn->immediate.value;
618
619 auprobe->ops = &branch_xol_ops;
620 return 0;
542} 621}
543#else 622
544static void 623/**
545handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) 624 * arch_uprobe_analyze_insn - instruction analysis including validity and fixups.
625 * @mm: the probed address space.
626 * @arch_uprobe: the probepoint information.
627 * @addr: virtual address at which to install the probepoint
628 * Return 0 on success or a -ve number on error.
629 */
630int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr)
631{
632 struct insn insn;
633 bool fix_ip = true, fix_call = false;
634 int ret;
635
636 ret = validate_insn_bits(auprobe, mm, &insn);
637 if (ret)
638 return ret;
639
640 ret = branch_setup_xol_ops(auprobe, &insn);
641 if (ret != -ENOSYS)
642 return ret;
643
644 /*
645 * Figure out which fixups arch_uprobe_post_xol() will need to perform,
646 * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups
647 * is either zero or it reflects rip-related fixups.
648 */
649 switch (OPCODE1(&insn)) {
650 case 0x9d: /* popf */
651 auprobe->fixups |= UPROBE_FIX_SETF;
652 break;
653 case 0xc3: /* ret or lret -- ip is correct */
654 case 0xcb:
655 case 0xc2:
656 case 0xca:
657 fix_ip = false;
658 break;
659 case 0x9a: /* call absolute - Fix return addr, not ip */
660 fix_call = true;
661 fix_ip = false;
662 break;
663 case 0xea: /* jmp absolute -- ip is correct */
664 fix_ip = false;
665 break;
666 case 0xff:
667 insn_get_modrm(&insn);
668 switch (MODRM_REG(&insn)) {
669 case 2: case 3: /* call or lcall, indirect */
670 fix_call = true;
671 case 4: case 5: /* jmp or ljmp, indirect */
672 fix_ip = false;
673 }
674 /* fall through */
675 default:
676 handle_riprel_insn(auprobe, &insn);
677 }
678
679 if (fix_ip)
680 auprobe->fixups |= UPROBE_FIX_IP;
681 if (fix_call)
682 auprobe->fixups |= UPROBE_FIX_CALL;
683
684 auprobe->ops = &default_xol_ops;
685 return 0;
686}
687
688/*
689 * arch_uprobe_pre_xol - prepare to execute out of line.
690 * @auprobe: the probepoint information.
691 * @regs: reflects the saved user state of current task.
692 */
693int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
546{ 694{
547 /* No RIP-relative addressing on 32-bit */ 695 struct uprobe_task *utask = current->utask;
696
697 regs->ip = utask->xol_vaddr;
698 utask->autask.saved_trap_nr = current->thread.trap_nr;
699 current->thread.trap_nr = UPROBE_TRAP_NR;
700
701 utask->autask.saved_tf = !!(regs->flags & X86_EFLAGS_TF);
702 regs->flags |= X86_EFLAGS_TF;
703 if (test_tsk_thread_flag(current, TIF_BLOCKSTEP))
704 set_task_blockstep(current, false);
705
706 if (auprobe->ops->pre_xol)
707 return auprobe->ops->pre_xol(auprobe, regs);
708 return 0;
548} 709}
549#endif
550 710
551/* 711/*
552 * If xol insn itself traps and generates a signal(Say, 712 * If xol insn itself traps and generates a signal(Say,
@@ -592,22 +752,25 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t)
592 */ 752 */
593int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) 753int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
594{ 754{
595 struct uprobe_task *utask; 755 struct uprobe_task *utask = current->utask;
596 long correction;
597 int result = 0;
598 756
599 WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); 757 WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR);
600 758
601 utask = current->utask; 759 if (auprobe->ops->post_xol) {
602 current->thread.trap_nr = utask->autask.saved_trap_nr; 760 int err = auprobe->ops->post_xol(auprobe, regs);
603 correction = (long)(utask->vaddr - utask->xol_vaddr); 761 if (err) {
604 handle_riprel_post_xol(auprobe, regs, &correction); 762 arch_uprobe_abort_xol(auprobe, regs);
605 if (auprobe->fixups & UPROBE_FIX_IP) 763 /*
606 regs->ip += correction; 764 * Restart the probed insn. ->post_xol() must ensure
607 765 * this is really possible if it returns -ERESTART.
608 if (auprobe->fixups & UPROBE_FIX_CALL) 766 */
609 result = adjust_ret_addr(regs->sp, correction); 767 if (err == -ERESTART)
768 return 0;
769 return err;
770 }
771 }
610 772
773 current->thread.trap_nr = utask->autask.saved_trap_nr;
611 /* 774 /*
612 * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP 775 * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP
613 * so we can get an extra SIGTRAP if we do not clear TF. We need 776 * so we can get an extra SIGTRAP if we do not clear TF. We need
@@ -618,7 +781,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
618 else if (!(auprobe->fixups & UPROBE_FIX_SETF)) 781 else if (!(auprobe->fixups & UPROBE_FIX_SETF))
619 regs->flags &= ~X86_EFLAGS_TF; 782 regs->flags &= ~X86_EFLAGS_TF;
620 783
621 return result; 784 return 0;
622} 785}
623 786
624/* callback routine for handling exceptions. */ 787/* callback routine for handling exceptions. */
@@ -652,8 +815,9 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
652 815
653/* 816/*
654 * This function gets called when XOL instruction either gets trapped or 817 * This function gets called when XOL instruction either gets trapped or
655 * the thread has a fatal signal, so reset the instruction pointer to its 818 * the thread has a fatal signal, or if arch_uprobe_post_xol() failed.
656 * probed address. 819 * Reset the instruction pointer to its probed address for the potential
820 * restart or for post mortem analysis.
657 */ 821 */
658void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) 822void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
659{ 823{
@@ -668,25 +832,10 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
668 regs->flags &= ~X86_EFLAGS_TF; 832 regs->flags &= ~X86_EFLAGS_TF;
669} 833}
670 834
671/*
672 * Skip these instructions as per the currently known x86 ISA.
673 * rep=0x66*; nop=0x90
674 */
675static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) 835static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
676{ 836{
677 int i; 837 if (auprobe->ops->emulate)
678 838 return auprobe->ops->emulate(auprobe, regs);
679 for (i = 0; i < MAX_UINSN_BYTES; i++) {
680 if (auprobe->insn[i] == 0x66)
681 continue;
682
683 if (auprobe->insn[i] == 0x90) {
684 regs->ip += i + 1;
685 return true;
686 }
687
688 break;
689 }
690 return false; 839 return false;
691} 840}
692 841
@@ -701,23 +850,21 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
701unsigned long 850unsigned long
702arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs) 851arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs)
703{ 852{
704 int rasize, ncopied; 853 int rasize = sizeof_long(), nleft;
705 unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */ 854 unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */
706 855
707 rasize = is_ia32_task() ? 4 : 8; 856 if (copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize))
708 ncopied = copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize);
709 if (unlikely(ncopied))
710 return -1; 857 return -1;
711 858
712 /* check whether address has been already hijacked */ 859 /* check whether address has been already hijacked */
713 if (orig_ret_vaddr == trampoline_vaddr) 860 if (orig_ret_vaddr == trampoline_vaddr)
714 return orig_ret_vaddr; 861 return orig_ret_vaddr;
715 862
716 ncopied = copy_to_user((void __user *)regs->sp, &trampoline_vaddr, rasize); 863 nleft = copy_to_user((void __user *)regs->sp, &trampoline_vaddr, rasize);
717 if (likely(!ncopied)) 864 if (likely(!nleft))
718 return orig_ret_vaddr; 865 return orig_ret_vaddr;
719 866
720 if (ncopied != rasize) { 867 if (nleft != rasize) {
721 pr_err("uprobe: return address clobbered: pid=%d, %%sp=%#lx, " 868 pr_err("uprobe: return address clobbered: pid=%d, %%sp=%#lx, "
722 "%%ip=%#lx\n", current->pid, regs->sp, regs->ip); 869 "%%ip=%#lx\n", current->pid, regs->sp, regs->ip);
723 870