aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorMasami Hiramatsu <mhiramat@redhat.com>2008-01-30 07:31:21 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-30 07:31:21 -0500
commit8533bbe9f87b01f49ff951f665ea1988252fa3c2 (patch)
tree5286298af37057c1086405a96e6dce0b0df1fb64 /arch
parentda07ab0375897bb9e108b28129df140ecd3ee94e (diff)
x86: prepare kprobes code for x86 unification
This patch cleanup kprobes code on x86 for unification. This patch is based on Arjan's previous work. - Remove spurious whitespace changes - Add harmless includes - Make the 32/64 files more identical - Generalize structure fields' and local variable name. - Wrap accessing to stack address by macros. - Modify bitmap making macro. - Merge fixup code into is_riprel() and change its name to fix_riprel(). - Set MAX_INSN_SIZE to 16 on both arch. - Use u32 for bitmaps on both architectures. - Clarify some comments. Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com> Signed-off-by: Arjan van de Ven <arjan@infradead.org> Signed-off-by: Jim Keniston <jkenisto@us.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/kernel/kprobes_32.c321
-rw-r--r--arch/x86/kernel/kprobes_64.c409
-rw-r--r--arch/x86/mm/fault_32.c1
-rw-r--r--arch/x86/mm/fault_64.c1
4 files changed, 411 insertions, 321 deletions
diff --git a/arch/x86/kernel/kprobes_32.c b/arch/x86/kernel/kprobes_32.c
index 8eccd2d04709..8e06431d8b03 100644
--- a/arch/x86/kernel/kprobes_32.c
+++ b/arch/x86/kernel/kprobes_32.c
@@ -29,10 +29,15 @@
29 29
30#include <linux/kprobes.h> 30#include <linux/kprobes.h>
31#include <linux/ptrace.h> 31#include <linux/ptrace.h>
32#include <linux/string.h>
33#include <linux/slab.h>
32#include <linux/preempt.h> 34#include <linux/preempt.h>
35#include <linux/module.h>
33#include <linux/kdebug.h> 36#include <linux/kdebug.h>
37
34#include <asm/cacheflush.h> 38#include <asm/cacheflush.h>
35#include <asm/desc.h> 39#include <asm/desc.h>
40#include <asm/pgtable.h>
36#include <asm/uaccess.h> 41#include <asm/uaccess.h>
37#include <asm/alternative.h> 42#include <asm/alternative.h>
38 43
@@ -41,65 +46,121 @@ void jprobe_return_end(void);
41DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; 46DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
42DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); 47DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
43 48
49/*
50 * "&regs->sp" looks wrong, but it's correct for x86_32. x86_32 CPUs
51 * don't save the ss and esp registers if the CPU is already in kernel
52 * mode when it traps. So for kprobes, regs->sp and regs->ss are not
53 * the [nonexistent] saved stack pointer and ss register, but rather
54 * the top 8 bytes of the pre-int3 stack. So &regs->sp happens to
55 * point to the top of the pre-int3 stack.
56 */
57#define stack_addr(regs) ((unsigned long *)&regs->sp)
58
59#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
60 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
61 (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
62 (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
63 (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
64 << (row % 32))
65 /*
66 * Undefined/reserved opcodes, conditional jump, Opcode Extension
67 * Groups, and some special opcodes can not boost.
68 */
69static const u32 twobyte_is_boostable[256 / 32] = {
70 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
71 /* ---------------------------------------------- */
72 W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */
73 W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 10 */
74 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */
75 W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
76 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
77 W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
78 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1) | /* 60 */
79 W(0x70, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
80 W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 80 */
81 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
82 W(0xa0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* a0 */
83 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) , /* b0 */
84 W(0xc0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
85 W(0xd0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) , /* d0 */
86 W(0xe0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* e0 */
87 W(0xf0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0) /* f0 */
88 /* ----------------------------------------------- */
89 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
90};
91static const u32 onebyte_has_modrm[256 / 32] = {
92 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
93 /* ----------------------------------------------- */
94 W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */
95 W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */
96 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */
97 W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */
98 W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
99 W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
100 W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */
101 W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */
102 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
103 W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */
104 W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */
105 W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */
106 W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */
107 W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
108 W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */
109 W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */
110 /* ----------------------------------------------- */
111 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
112};
113static const u32 twobyte_has_modrm[256 / 32] = {
114 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
115 /* ----------------------------------------------- */
116 W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */
117 W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */
118 W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */
119 W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */
120 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */
121 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */
122 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */
123 W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */
124 W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */
125 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */
126 W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */
127 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */
128 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */
129 W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */
130 W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */
131 W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */
132 /* ----------------------------------------------- */
133 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
134};
135#undef W
136
44struct kretprobe_blackpoint kretprobe_blacklist[] = { 137struct kretprobe_blackpoint kretprobe_blacklist[] = {
45 {"__switch_to", }, /* This function switches only current task, but 138 {"__switch_to", }, /* This function switches only current task, but
46 doesn't switch kernel stack.*/ 139 doesn't switch kernel stack.*/
47 {NULL, NULL} /* Terminator */ 140 {NULL, NULL} /* Terminator */
48}; 141};
49const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); 142const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
50 143
51/* insert a jmp code */ 144/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
52static __always_inline void set_jmp_op(void *from, void *to) 145static __always_inline void set_jmp_op(void *from, void *to)
53{ 146{
54 struct __arch_jmp_op { 147 struct __arch_jmp_op {
55 char op; 148 char op;
56 long raddr; 149 s32 raddr;
57 } __attribute__((packed)) *jop; 150 } __attribute__((packed)) * jop;
58 jop = (struct __arch_jmp_op *)from; 151 jop = (struct __arch_jmp_op *)from;
59 jop->raddr = (long)(to) - ((long)(from) + 5); 152 jop->raddr = (s32)((long)(to) - ((long)(from) + 5));
60 jop->op = RELATIVEJUMP_INSTRUCTION; 153 jop->op = RELATIVEJUMP_INSTRUCTION;
61} 154}
62 155
63/* 156/*
64 * returns non-zero if opcodes can be boosted. 157 * returns non-zero if opcode is boostable.
65 */ 158 */
66static __always_inline int can_boost(kprobe_opcode_t *opcodes) 159static __always_inline int can_boost(kprobe_opcode_t *opcodes)
67{ 160{
68#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \
69 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
70 (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
71 (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
72 (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
73 << (row % 32))
74 /*
75 * Undefined/reserved opcodes, conditional jump, Opcode Extension
76 * Groups, and some special opcodes can not be boost.
77 */
78 static const unsigned long twobyte_is_boostable[256 / 32] = {
79 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
80 /* ------------------------------- */
81 W(0x00, 0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0)| /* 00 */
82 W(0x10, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 10 */
83 W(0x20, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0)| /* 20 */
84 W(0x30, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */
85 W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */
86 W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 50 */
87 W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1)| /* 60 */
88 W(0x70, 0,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */
89 W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 80 */
90 W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */
91 W(0xa0, 1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1)| /* a0 */
92 W(0xb0, 1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1), /* b0 */
93 W(0xc0, 1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1)| /* c0 */
94 W(0xd0, 0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1), /* d0 */
95 W(0xe0, 0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1)| /* e0 */
96 W(0xf0, 0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0) /* f0 */
97 /* ------------------------------- */
98 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
99 };
100#undef W
101 kprobe_opcode_t opcode; 161 kprobe_opcode_t opcode;
102 kprobe_opcode_t *orig_opcodes = opcodes; 162 kprobe_opcode_t *orig_opcodes = opcodes;
163
103retry: 164retry:
104 if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) 165 if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
105 return 0; 166 return 0;
@@ -109,7 +170,8 @@ retry:
109 if (opcode == 0x0f) { 170 if (opcode == 0x0f) {
110 if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) 171 if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
111 return 0; 172 return 0;
112 return test_bit(*opcodes, twobyte_is_boostable); 173 return test_bit(*opcodes,
174 (unsigned long *)twobyte_is_boostable);
113 } 175 }
114 176
115 switch (opcode & 0xf0) { 177 switch (opcode & 0xf0) {
@@ -132,12 +194,13 @@ retry:
132 case 0xf0: 194 case 0xf0:
133 if ((opcode & 0x0c) == 0 && opcode != 0xf1) 195 if ((opcode & 0x0c) == 0 && opcode != 0xf1)
134 goto retry; /* lock/rep(ne) prefix */ 196 goto retry; /* lock/rep(ne) prefix */
135 /* clear and set flags can be boost */ 197 /* clear and set flags are boostable */
136 return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); 198 return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
137 default: 199 default:
200 /* segment override prefixes are boostable */
138 if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e) 201 if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e)
139 goto retry; /* prefixes */ 202 goto retry; /* prefixes */
140 /* can't boost CS override and call */ 203 /* CS override prefix and call are not boostable */
141 return (opcode != 0x2e && opcode != 0x9a); 204 return (opcode != 0x2e && opcode != 0x9a);
142 } 205 }
143} 206}
@@ -145,9 +208,9 @@ retry:
145/* 208/*
146 * returns non-zero if opcode modifies the interrupt flag. 209 * returns non-zero if opcode modifies the interrupt flag.
147 */ 210 */
148static int __kprobes is_IF_modifier(kprobe_opcode_t opcode) 211static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
149{ 212{
150 switch (opcode) { 213 switch (*insn) {
151 case 0xfa: /* cli */ 214 case 0xfa: /* cli */
152 case 0xfb: /* sti */ 215 case 0xfb: /* sti */
153 case 0xcf: /* iret/iretd */ 216 case 0xcf: /* iret/iretd */
@@ -157,20 +220,24 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t opcode)
157 return 0; 220 return 0;
158} 221}
159 222
223static void __kprobes arch_copy_kprobe(struct kprobe *p)
224{
225 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
226 if (can_boost(p->addr))
227 p->ainsn.boostable = 0;
228 else
229 p->ainsn.boostable = -1;
230
231 p->opcode = *p->addr;
232}
233
160int __kprobes arch_prepare_kprobe(struct kprobe *p) 234int __kprobes arch_prepare_kprobe(struct kprobe *p)
161{ 235{
162 /* insn: must be on special executable page on i386. */ 236 /* insn: must be on special executable page on x86. */
163 p->ainsn.insn = get_insn_slot(); 237 p->ainsn.insn = get_insn_slot();
164 if (!p->ainsn.insn) 238 if (!p->ainsn.insn)
165 return -ENOMEM; 239 return -ENOMEM;
166 240 arch_copy_kprobe(p);
167 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
168 p->opcode = *p->addr;
169 if (can_boost(p->addr)) {
170 p->ainsn.boostable = 0;
171 } else {
172 p->ainsn.boostable = -1;
173 }
174 return 0; 241 return 0;
175} 242}
176 243
@@ -195,26 +262,26 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
195{ 262{
196 kcb->prev_kprobe.kp = kprobe_running(); 263 kcb->prev_kprobe.kp = kprobe_running();
197 kcb->prev_kprobe.status = kcb->kprobe_status; 264 kcb->prev_kprobe.status = kcb->kprobe_status;
198 kcb->prev_kprobe.old_eflags = kcb->kprobe_old_eflags; 265 kcb->prev_kprobe.old_flags = kcb->kprobe_old_flags;
199 kcb->prev_kprobe.saved_eflags = kcb->kprobe_saved_eflags; 266 kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags;
200} 267}
201 268
202static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) 269static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
203{ 270{
204 __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; 271 __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
205 kcb->kprobe_status = kcb->prev_kprobe.status; 272 kcb->kprobe_status = kcb->prev_kprobe.status;
206 kcb->kprobe_old_eflags = kcb->prev_kprobe.old_eflags; 273 kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags;
207 kcb->kprobe_saved_eflags = kcb->prev_kprobe.saved_eflags; 274 kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags;
208} 275}
209 276
210static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, 277static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
211 struct kprobe_ctlblk *kcb) 278 struct kprobe_ctlblk *kcb)
212{ 279{
213 __get_cpu_var(current_kprobe) = p; 280 __get_cpu_var(current_kprobe) = p;
214 kcb->kprobe_saved_eflags = kcb->kprobe_old_eflags 281 kcb->kprobe_saved_flags = kcb->kprobe_old_flags
215 = (regs->flags & (TF_MASK | IF_MASK)); 282 = (regs->flags & (TF_MASK | IF_MASK));
216 if (is_IF_modifier(p->opcode)) 283 if (is_IF_modifier(p->ainsn.insn))
217 kcb->kprobe_saved_eflags &= ~IF_MASK; 284 kcb->kprobe_saved_flags &= ~IF_MASK;
218} 285}
219 286
220static __always_inline void clear_btf(void) 287static __always_inline void clear_btf(void)
@@ -245,7 +312,7 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
245void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 312void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
246 struct pt_regs *regs) 313 struct pt_regs *regs)
247{ 314{
248 unsigned long *sara = (unsigned long *)&regs->sp; 315 unsigned long *sara = stack_addr(regs);
249 316
250 ri->ret_addr = (kprobe_opcode_t *) *sara; 317 ri->ret_addr = (kprobe_opcode_t *) *sara;
251 318
@@ -280,7 +347,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
280 if (kcb->kprobe_status == KPROBE_HIT_SS && 347 if (kcb->kprobe_status == KPROBE_HIT_SS &&
281 *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { 348 *p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
282 regs->flags &= ~TF_MASK; 349 regs->flags &= ~TF_MASK;
283 regs->flags |= kcb->kprobe_saved_eflags; 350 regs->flags |= kcb->kprobe_saved_flags;
284 goto no_kprobe; 351 goto no_kprobe;
285 } 352 }
286 /* We have reentered the kprobe_handler(), since 353 /* We have reentered the kprobe_handler(), since
@@ -301,7 +368,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
301 * another cpu right after we hit, no further 368 * another cpu right after we hit, no further
302 * handling of this interrupt is appropriate 369 * handling of this interrupt is appropriate
303 */ 370 */
304 regs->ip -= sizeof(kprobe_opcode_t); 371 regs->ip = (unsigned long)addr;
305 ret = 1; 372 ret = 1;
306 goto no_kprobe; 373 goto no_kprobe;
307 } 374 }
@@ -325,7 +392,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
325 * Back up over the (now missing) int3 and run 392 * Back up over the (now missing) int3 and run
326 * the original instruction. 393 * the original instruction.
327 */ 394 */
328 regs->ip -= sizeof(kprobe_opcode_t); 395 regs->ip = (unsigned long)addr;
329 ret = 1; 396 ret = 1;
330 } 397 }
331 /* Not one of ours: let kernel handle it */ 398 /* Not one of ours: let kernel handle it */
@@ -341,7 +408,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
341 408
342ss_probe: 409ss_probe:
343#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM) 410#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM)
344 if (p->ainsn.boostable == 1 && !p->post_handler){ 411 if (p->ainsn.boostable == 1 && !p->post_handler) {
345 /* Boost up -- we can execute copied instructions directly */ 412 /* Boost up -- we can execute copied instructions directly */
346 reset_current_kprobe(); 413 reset_current_kprobe();
347 regs->ip = (unsigned long)p->ainsn.insn; 414 regs->ip = (unsigned long)p->ainsn.insn;
@@ -359,16 +426,18 @@ no_kprobe:
359} 426}
360 427
361/* 428/*
362 * For function-return probes, init_kprobes() establishes a probepoint 429 * When a retprobed function returns, this code saves registers and
363 * here. When a retprobed function returns, this probe is hit and 430 * calls trampoline_handler() runs, which calls the kretprobe's handler.
364 * trampoline_probe_handler() runs, calling the kretprobe's handler.
365 */ 431 */
366 void __kprobes kretprobe_trampoline_holder(void) 432 void __kprobes kretprobe_trampoline_holder(void)
367 { 433 {
368 asm volatile ( ".global kretprobe_trampoline\n" 434 asm volatile ( ".global kretprobe_trampoline\n"
369 "kretprobe_trampoline: \n" 435 "kretprobe_trampoline: \n"
370 " pushf\n" 436 " pushf\n"
371 /* skip cs, ip, orig_ax */ 437 /*
438 * Skip cs, ip, orig_ax.
439 * trampoline_handler() will plug in these values
440 */
372 " subl $12, %esp\n" 441 " subl $12, %esp\n"
373 " pushl %fs\n" 442 " pushl %fs\n"
374 " pushl %ds\n" 443 " pushl %ds\n"
@@ -382,10 +451,10 @@ no_kprobe:
382 " pushl %ebx\n" 451 " pushl %ebx\n"
383 " movl %esp, %eax\n" 452 " movl %esp, %eax\n"
384 " call trampoline_handler\n" 453 " call trampoline_handler\n"
385 /* move flags to cs */ 454 /* Move flags to cs */
386 " movl 52(%esp), %edx\n" 455 " movl 52(%esp), %edx\n"
387 " movl %edx, 48(%esp)\n" 456 " movl %edx, 48(%esp)\n"
388 /* save true return address on flags */ 457 /* Replace saved flags with true return address. */
389 " movl %eax, 52(%esp)\n" 458 " movl %eax, 52(%esp)\n"
390 " popl %ebx\n" 459 " popl %ebx\n"
391 " popl %ecx\n" 460 " popl %ecx\n"
@@ -394,16 +463,16 @@ no_kprobe:
394 " popl %edi\n" 463 " popl %edi\n"
395 " popl %ebp\n" 464 " popl %ebp\n"
396 " popl %eax\n" 465 " popl %eax\n"
397 /* skip ip, orig_ax, es, ds, fs */ 466 /* Skip ip, orig_ax, es, ds, fs */
398 " addl $20, %esp\n" 467 " addl $20, %esp\n"
399 " popf\n" 468 " popf\n"
400 " ret\n"); 469 " ret\n");
401} 470 }
402 471
403/* 472/*
404 * Called from kretprobe_trampoline 473 * Called from kretprobe_trampoline
405 */ 474 */
406void *__kprobes trampoline_handler(struct pt_regs *regs) 475void * __kprobes trampoline_handler(struct pt_regs *regs)
407{ 476{
408 struct kretprobe_instance *ri = NULL; 477 struct kretprobe_instance *ri = NULL;
409 struct hlist_head *head, empty_rp; 478 struct hlist_head *head, empty_rp;
@@ -417,27 +486,27 @@ void *__kprobes trampoline_handler(struct pt_regs *regs)
417 /* fixup registers */ 486 /* fixup registers */
418 regs->cs = __KERNEL_CS | get_kernel_rpl(); 487 regs->cs = __KERNEL_CS | get_kernel_rpl();
419 regs->ip = trampoline_address; 488 regs->ip = trampoline_address;
420 regs->orig_ax = 0xffffffff; 489 regs->orig_ax = ~0UL;
421 490
422 /* 491 /*
423 * It is possible to have multiple instances associated with a given 492 * It is possible to have multiple instances associated with a given
424 * task either because an multiple functions in the call path 493 * task either because multiple functions in the call path have
425 * have a return probe installed on them, and/or more then one return 494 * return probes installed on them, and/or more then one
426 * return probe was registered for a target function. 495 * return probe was registered for a target function.
427 * 496 *
428 * We can handle this because: 497 * We can handle this because:
429 * - instances are always inserted at the head of the list 498 * - instances are always pushed into the head of the list
430 * - when multiple return probes are registered for the same 499 * - when multiple return probes are registered for the same
431 * function, the first instance's ret_addr will point to the 500 * function, the (chronologically) first instance's ret_addr
432 * real return address, and all the rest will point to 501 * will be the real return address, and all the rest will
433 * kretprobe_trampoline 502 * point to kretprobe_trampoline.
434 */ 503 */
435 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { 504 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
436 if (ri->task != current) 505 if (ri->task != current)
437 /* another task is sharing our hash bucket */ 506 /* another task is sharing our hash bucket */
438 continue; 507 continue;
439 508
440 if (ri->rp && ri->rp->handler){ 509 if (ri->rp && ri->rp->handler) {
441 __get_cpu_var(current_kprobe) = &ri->rp->kp; 510 __get_cpu_var(current_kprobe) = &ri->rp->kp;
442 get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; 511 get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
443 ri->rp->handler(ri, regs); 512 ri->rp->handler(ri, regs);
@@ -457,13 +526,14 @@ void *__kprobes trampoline_handler(struct pt_regs *regs)
457 } 526 }
458 527
459 kretprobe_assert(ri, orig_ret_address, trampoline_address); 528 kretprobe_assert(ri, orig_ret_address, trampoline_address);
529
460 spin_unlock_irqrestore(&kretprobe_lock, flags); 530 spin_unlock_irqrestore(&kretprobe_lock, flags);
461 531
462 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 532 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
463 hlist_del(&ri->hlist); 533 hlist_del(&ri->hlist);
464 kfree(ri); 534 kfree(ri);
465 } 535 }
466 return (void*)orig_ret_address; 536 return (void *)orig_ret_address;
467} 537}
468 538
469/* 539/*
@@ -488,48 +558,55 @@ void *__kprobes trampoline_handler(struct pt_regs *regs)
488 * that is atop the stack is the address following the copied instruction. 558 * that is atop the stack is the address following the copied instruction.
489 * We need to make it the address following the original instruction. 559 * We need to make it the address following the original instruction.
490 * 560 *
491 * This function also checks instruction size for preparing direct execution. 561 * If this is the first time we've single-stepped the instruction at
562 * this probepoint, and the instruction is boostable, boost it: add a
563 * jump instruction after the copied instruction, that jumps to the next
564 * instruction after the probepoint.
492 */ 565 */
493static void __kprobes resume_execution(struct kprobe *p, 566static void __kprobes resume_execution(struct kprobe *p,
494 struct pt_regs *regs, struct kprobe_ctlblk *kcb) 567 struct pt_regs *regs, struct kprobe_ctlblk *kcb)
495{ 568{
496 unsigned long *tos = (unsigned long *)&regs->sp; 569 unsigned long *tos = stack_addr(regs);
497 unsigned long copy_eip = (unsigned long)p->ainsn.insn; 570 unsigned long copy_ip = (unsigned long)p->ainsn.insn;
498 unsigned long orig_eip = (unsigned long)p->addr; 571 unsigned long orig_ip = (unsigned long)p->addr;
572 kprobe_opcode_t *insn = p->ainsn.insn;
499 573
500 regs->flags &= ~TF_MASK; 574 regs->flags &= ~TF_MASK;
501 switch (p->ainsn.insn[0]) { 575 switch (*insn) {
502 case 0x9c: /* pushfl */ 576 case 0x9c: /* pushfl */
503 *tos &= ~(TF_MASK | IF_MASK); 577 *tos &= ~(TF_MASK | IF_MASK);
504 *tos |= kcb->kprobe_old_eflags; 578 *tos |= kcb->kprobe_old_flags;
505 break; 579 break;
506 case 0xc2: /* iret/ret/lret */ 580 case 0xc2: /* iret/ret/lret */
507 case 0xc3: 581 case 0xc3:
508 case 0xca: 582 case 0xca:
509 case 0xcb: 583 case 0xcb:
510 case 0xcf: 584 case 0xcf:
511 case 0xea: /* jmp absolute -- ip is correct */ 585 case 0xea: /* jmp absolute -- ip is correct */
512 /* ip is already adjusted, no more changes required */ 586 /* ip is already adjusted, no more changes required */
513 p->ainsn.boostable = 1; 587 p->ainsn.boostable = 1;
514 goto no_change; 588 goto no_change;
515 case 0xe8: /* call relative - Fix return addr */ 589 case 0xe8: /* call relative - Fix return addr */
516 *tos = orig_eip + (*tos - copy_eip); 590 *tos = orig_ip + (*tos - copy_ip);
517 break; 591 break;
518 case 0x9a: /* call absolute -- same as call absolute, indirect */ 592 case 0x9a: /* call absolute -- same as call absolute, indirect */
519 *tos = orig_eip + (*tos - copy_eip); 593 *tos = orig_ip + (*tos - copy_ip);
520 goto no_change; 594 goto no_change;
521 case 0xff: 595 case 0xff:
522 if ((p->ainsn.insn[1] & 0x30) == 0x10) { 596 if ((insn[1] & 0x30) == 0x10) {
523 /* 597 /*
524 * call absolute, indirect 598 * call absolute, indirect
525 * Fix return addr; ip is correct. 599 * Fix return addr; ip is correct.
526 * But this is not boostable 600 * But this is not boostable
527 */ 601 */
528 *tos = orig_eip + (*tos - copy_eip); 602 *tos = orig_ip + (*tos - copy_ip);
529 goto no_change; 603 goto no_change;
530 } else if (((p->ainsn.insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */ 604 } else if (((insn[1] & 0x31) == 0x20) ||
531 ((p->ainsn.insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */ 605 ((insn[1] & 0x31) == 0x21)) {
532 /* ip is correct. And this is boostable */ 606 /*
607 * jmp near and far, absolute indirect
608 * ip is correct. And this is boostable
609 */
533 p->ainsn.boostable = 1; 610 p->ainsn.boostable = 1;
534 goto no_change; 611 goto no_change;
535 } 612 }
@@ -538,21 +615,21 @@ static void __kprobes resume_execution(struct kprobe *p,
538 } 615 }
539 616
540 if (p->ainsn.boostable == 0) { 617 if (p->ainsn.boostable == 0) {
541 if ((regs->ip > copy_eip) && 618 if ((regs->ip > copy_ip) &&
542 (regs->ip - copy_eip) + 5 < MAX_INSN_SIZE) { 619 (regs->ip - copy_ip) + 5 < MAX_INSN_SIZE) {
543 /* 620 /*
544 * These instructions can be executed directly if it 621 * These instructions can be executed directly if it
545 * jumps back to correct address. 622 * jumps back to correct address.
546 */ 623 */
547 set_jmp_op((void *)regs->ip, 624 set_jmp_op((void *)regs->ip,
548 (void *)orig_eip + (regs->ip - copy_eip)); 625 (void *)orig_ip + (regs->ip - copy_ip));
549 p->ainsn.boostable = 1; 626 p->ainsn.boostable = 1;
550 } else { 627 } else {
551 p->ainsn.boostable = -1; 628 p->ainsn.boostable = -1;
552 } 629 }
553 } 630 }
554 631
555 regs->ip = orig_eip + (regs->ip - copy_eip); 632 regs->ip += orig_ip - copy_ip;
556 633
557no_change: 634no_change:
558 restore_btf(); 635 restore_btf();
@@ -578,10 +655,10 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs)
578 } 655 }
579 656
580 resume_execution(cur, regs, kcb); 657 resume_execution(cur, regs, kcb);
581 regs->flags |= kcb->kprobe_saved_eflags; 658 regs->flags |= kcb->kprobe_saved_flags;
582 trace_hardirqs_fixup_flags(regs->flags); 659 trace_hardirqs_fixup_flags(regs->flags);
583 660
584 /*Restore back the original saved kprobes variables and continue. */ 661 /* Restore back the original saved kprobes variables and continue. */
585 if (kcb->kprobe_status == KPROBE_REENTER) { 662 if (kcb->kprobe_status == KPROBE_REENTER) {
586 restore_previous_kprobe(kcb); 663 restore_previous_kprobe(kcb);
587 goto out; 664 goto out;
@@ -617,7 +694,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
617 * normal page fault. 694 * normal page fault.
618 */ 695 */
619 regs->ip = (unsigned long)cur->addr; 696 regs->ip = (unsigned long)cur->addr;
620 regs->flags |= kcb->kprobe_old_eflags; 697 regs->flags |= kcb->kprobe_old_flags;
621 if (kcb->kprobe_status == KPROBE_REENTER) 698 if (kcb->kprobe_status == KPROBE_REENTER)
622 restore_previous_kprobe(kcb); 699 restore_previous_kprobe(kcb);
623 else 700 else
@@ -628,7 +705,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
628 case KPROBE_HIT_SSDONE: 705 case KPROBE_HIT_SSDONE:
629 /* 706 /*
630 * We increment the nmissed count for accounting, 707 * We increment the nmissed count for accounting,
631 * we can also use npre/npostfault count for accouting 708 * we can also use npre/npostfault count for accounting
632 * these specific fault cases. 709 * these specific fault cases.
633 */ 710 */
634 kprobes_inc_nmissed_count(cur); 711 kprobes_inc_nmissed_count(cur);
@@ -651,7 +728,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
651 return 1; 728 return 1;
652 729
653 /* 730 /*
654 * fixup_exception() could not handle it, 731 * fixup routine could not handle it,
655 * Let do_page_fault() fix it. 732 * Let do_page_fault() fix it.
656 */ 733 */
657 break; 734 break;
@@ -662,7 +739,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
662} 739}
663 740
664/* 741/*
665 * Wrapper routine to for handling exceptions. 742 * Wrapper routine for handling exceptions.
666 */ 743 */
667int __kprobes kprobe_exceptions_notify(struct notifier_block *self, 744int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
668 unsigned long val, void *data) 745 unsigned long val, void *data)
@@ -703,11 +780,11 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
703 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 780 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
704 781
705 kcb->jprobe_saved_regs = *regs; 782 kcb->jprobe_saved_regs = *regs;
706 kcb->jprobe_saved_esp = &regs->sp; 783 kcb->jprobe_saved_sp = stack_addr(regs);
707 addr = (unsigned long)(kcb->jprobe_saved_esp); 784 addr = (unsigned long)(kcb->jprobe_saved_sp);
708 785
709 /* 786 /*
710 * TBD: As Linus pointed out, gcc assumes that the callee 787 * As Linus pointed out, gcc assumes that the callee
711 * owns the argument space and could overwrite it, e.g. 788 * owns the argument space and could overwrite it, e.g.
712 * tailcall optimization. So, to be absolutely safe 789 * tailcall optimization. So, to be absolutely safe
713 * we also save and restore enough stack bytes to cover 790 * we also save and restore enough stack bytes to cover
@@ -730,21 +807,20 @@ void __kprobes jprobe_return(void)
730 " .globl jprobe_return_end \n" 807 " .globl jprobe_return_end \n"
731 " jprobe_return_end: \n" 808 " jprobe_return_end: \n"
732 " nop \n"::"b" 809 " nop \n"::"b"
733 (kcb->jprobe_saved_esp):"memory"); 810 (kcb->jprobe_saved_sp):"memory");
734} 811}
735 812
736int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) 813int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
737{ 814{
738 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 815 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
739 u8 *addr = (u8 *) (regs->ip - 1); 816 u8 *addr = (u8 *) (regs->ip - 1);
740 unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_esp);
741 struct jprobe *jp = container_of(p, struct jprobe, kp); 817 struct jprobe *jp = container_of(p, struct jprobe, kp);
742 818
743 if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) { 819 if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
744 if (&regs->sp != kcb->jprobe_saved_esp) { 820 if (stack_addr(regs) != kcb->jprobe_saved_sp) {
745 struct pt_regs *saved_regs = &kcb->jprobe_saved_regs; 821 struct pt_regs *saved_regs = &kcb->jprobe_saved_regs;
746 printk("current sp %p does not match saved sp %p\n", 822 printk("current sp %p does not match saved sp %p\n",
747 &regs->sp, kcb->jprobe_saved_esp); 823 stack_addr(regs), kcb->jprobe_saved_sp);
748 printk("Saved registers for jprobe %p\n", jp); 824 printk("Saved registers for jprobe %p\n", jp);
749 show_registers(saved_regs); 825 show_registers(saved_regs);
750 printk("Current registers\n"); 826 printk("Current registers\n");
@@ -752,20 +828,21 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
752 BUG(); 828 BUG();
753 } 829 }
754 *regs = kcb->jprobe_saved_regs; 830 *regs = kcb->jprobe_saved_regs;
755 memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack, 831 memcpy((kprobe_opcode_t *)(kcb->jprobe_saved_sp),
756 MIN_STACK_SIZE(stack_addr)); 832 kcb->jprobes_stack,
833 MIN_STACK_SIZE(kcb->jprobe_saved_sp));
757 preempt_enable_no_resched(); 834 preempt_enable_no_resched();
758 return 1; 835 return 1;
759 } 836 }
760 return 0; 837 return 0;
761} 838}
762 839
763int __kprobes arch_trampoline_kprobe(struct kprobe *p) 840int __init arch_init_kprobes(void)
764{ 841{
765 return 0; 842 return 0;
766} 843}
767 844
768int __init arch_init_kprobes(void) 845int __kprobes arch_trampoline_kprobe(struct kprobe *p)
769{ 846{
770 return 0; 847 return 0;
771} 848}
diff --git a/arch/x86/kernel/kprobes_64.c b/arch/x86/kernel/kprobes_64.c
index bc93b1dd9a01..2d7763749b1b 100644
--- a/arch/x86/kernel/kprobes_64.c
+++ b/arch/x86/kernel/kprobes_64.c
@@ -40,16 +40,97 @@
40#include <linux/module.h> 40#include <linux/module.h>
41#include <linux/kdebug.h> 41#include <linux/kdebug.h>
42 42
43#include <asm/cacheflush.h>
44#include <asm/desc.h>
43#include <asm/pgtable.h> 45#include <asm/pgtable.h>
44#include <asm/uaccess.h> 46#include <asm/uaccess.h>
45#include <asm/alternative.h> 47#include <asm/alternative.h>
46 48
47void jprobe_return_end(void); 49void jprobe_return_end(void);
48static void __kprobes arch_copy_kprobe(struct kprobe *p);
49 50
50DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; 51DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
51DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); 52DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
52 53
54#define stack_addr(regs) ((unsigned long *)regs->sp)
55
56#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
57 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
58 (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
59 (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
60 (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
61 << (row % 32))
62 /*
63 * Undefined/reserved opcodes, conditional jump, Opcode Extension
64 * Groups, and some special opcodes can not boost.
65 */
66static const u32 twobyte_is_boostable[256 / 32] = {
67 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
68 /* ---------------------------------------------- */
69 W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */
70 W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 10 */
71 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */
72 W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
73 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
74 W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
75 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1) | /* 60 */
76 W(0x70, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
77 W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 80 */
78 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
79 W(0xa0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* a0 */
80 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) , /* b0 */
81 W(0xc0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
82 W(0xd0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) , /* d0 */
83 W(0xe0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* e0 */
84 W(0xf0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0) /* f0 */
85 /* ----------------------------------------------- */
86 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
87};
88static const u32 onebyte_has_modrm[256 / 32] = {
89 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
90 /* ----------------------------------------------- */
91 W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */
92 W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */
93 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */
94 W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */
95 W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
96 W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
97 W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */
98 W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */
99 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
100 W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */
101 W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */
102 W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */
103 W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */
104 W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
105 W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */
106 W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */
107 /* ----------------------------------------------- */
108 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
109};
110static const u32 twobyte_has_modrm[256 / 32] = {
111 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
112 /* ----------------------------------------------- */
113 W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */
114 W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */
115 W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */
116 W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */
117 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */
118 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */
119 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */
120 W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */
121 W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */
122 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */
123 W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */
124 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */
125 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */
126 W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */
127 W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */
128 W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */
129 /* ----------------------------------------------- */
130 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
131};
132#undef W
133
53struct kretprobe_blackpoint kretprobe_blacklist[] = { 134struct kretprobe_blackpoint kretprobe_blacklist[] = {
54 {"__switch_to", }, /* This function switches only current task, but 135 {"__switch_to", }, /* This function switches only current task, but
55 doesn't switch kernel stack.*/ 136 doesn't switch kernel stack.*/
@@ -70,44 +151,11 @@ static __always_inline void set_jmp_op(void *from, void *to)
70} 151}
71 152
72/* 153/*
73 * returns non-zero if opcode is boostable 154 * returns non-zero if opcode is boostable.
74 * RIP relative instructions are adjusted at copying time 155 * RIP relative instructions are adjusted at copying time
75 */ 156 */
76static __always_inline int can_boost(kprobe_opcode_t *opcodes) 157static __always_inline int can_boost(kprobe_opcode_t *opcodes)
77{ 158{
78#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
79 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
80 (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
81 (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
82 (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
83 << (row % 64))
84 /*
85 * Undefined/reserved opcodes, conditional jump, Opcode Extension
86 * Groups, and some special opcodes can not boost.
87 */
88 static const unsigned long twobyte_is_boostable[256 / 64] = {
89 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
90 /* ---------------------------------------------- */
91 W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0)|/* 00 */
92 W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)|/* 10 */
93 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)|/* 20 */
94 W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),/* 30 */
95 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)|/* 40 */
96 W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)|/* 50 */
97 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1)|/* 60 */
98 W(0x70, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1),/* 70 */
99 W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)|/* 80 */
100 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)|/* 90 */
101 W(0xa0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1)|/* a0 */
102 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1),/* b0 */
103 W(0xc0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1)|/* c0 */
104 W(0xd0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1)|/* d0 */
105 W(0xe0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1)|/* e0 */
106 W(0xf0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0) /* f0 */
107 /* ----------------------------------------------- */
108 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
109 };
110#undef W
111 kprobe_opcode_t opcode; 159 kprobe_opcode_t opcode;
112 kprobe_opcode_t *orig_opcodes = opcodes; 160 kprobe_opcode_t *orig_opcodes = opcodes;
113 161
@@ -120,7 +168,8 @@ retry:
120 if (opcode == 0x0f) { 168 if (opcode == 0x0f) {
121 if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) 169 if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
122 return 0; 170 return 0;
123 return test_bit(*opcodes, twobyte_is_boostable); 171 return test_bit(*opcodes,
172 (unsigned long *)twobyte_is_boostable);
124 } 173 }
125 174
126 switch (opcode & 0xf0) { 175 switch (opcode & 0xf0) {
@@ -169,80 +218,25 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
169 return 1; 218 return 1;
170 } 219 }
171 220
172 if (*insn >= 0x40 && *insn <= 0x4f && *++insn == 0xcf) 221 /*
173 return 1; 222 * on 64 bit x86, 0x40-0x4f are prefixes so we need to look
174 return 0; 223 * at the next byte instead.. but of course not recurse infinitely
175} 224 */
176 225 if (*insn >= 0x40 && *insn <= 0x4f)
177int __kprobes arch_prepare_kprobe(struct kprobe *p) 226 return is_IF_modifier(++insn);
178{
179 /* insn: must be on special executable page on x86_64. */
180 p->ainsn.insn = get_insn_slot();
181 if (!p->ainsn.insn) {
182 return -ENOMEM;
183 }
184 arch_copy_kprobe(p);
185 return 0; 227 return 0;
186} 228}
187 229
188/* 230/*
189 * Determine if the instruction uses the %rip-relative addressing mode. 231 * Adjust the displacement if the instruction uses the %rip-relative
232 * addressing mode.
190 * If it does, Return the address of the 32-bit displacement word. 233 * If it does, Return the address of the 32-bit displacement word.
191 * If not, return null. 234 * If not, return null.
192 */ 235 */
193static s32 __kprobes *is_riprel(u8 *insn) 236static void __kprobes fix_riprel(struct kprobe *p)
194{ 237{
195#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \ 238 u8 *insn = p->ainsn.insn;
196 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ 239 s64 disp;
197 (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
198 (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
199 (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
200 << (row % 64))
201 static const u64 onebyte_has_modrm[256 / 64] = {
202 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
203 /* ------------------------------- */
204 W(0x00, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 00 */
205 W(0x10, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 10 */
206 W(0x20, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 20 */
207 W(0x30, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0), /* 30 */
208 W(0x40, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 40 */
209 W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 50 */
210 W(0x60, 0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0)| /* 60 */
211 W(0x70, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 70 */
212 W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */
213 W(0x90, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 90 */
214 W(0xa0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* a0 */
215 W(0xb0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* b0 */
216 W(0xc0, 1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0)| /* c0 */
217 W(0xd0, 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1)| /* d0 */
218 W(0xe0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* e0 */
219 W(0xf0, 0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1) /* f0 */
220 /* ------------------------------- */
221 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
222 };
223 static const u64 twobyte_has_modrm[256 / 64] = {
224 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
225 /* ------------------------------- */
226 W(0x00, 1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,1)| /* 0f */
227 W(0x10, 1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0)| /* 1f */
228 W(0x20, 1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1)| /* 2f */
229 W(0x30, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 3f */
230 W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 4f */
231 W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 5f */
232 W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 6f */
233 W(0x70, 1,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1), /* 7f */
234 W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 8f */
235 W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 9f */
236 W(0xa0, 0,0,0,1,1,1,1,1,0,0,0,1,1,1,1,1)| /* af */
237 W(0xb0, 1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1), /* bf */
238 W(0xc0, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0)| /* cf */
239 W(0xd0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* df */
240 W(0xe0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* ef */
241 W(0xf0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0) /* ff */
242 /* ------------------------------- */
243 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
244 };
245#undef W
246 int need_modrm; 240 int need_modrm;
247 241
248 /* Skip legacy instruction prefixes. */ 242 /* Skip legacy instruction prefixes. */
@@ -271,54 +265,60 @@ static s32 __kprobes *is_riprel(u8 *insn)
271 265
272 if (*insn == 0x0f) { /* Two-byte opcode. */ 266 if (*insn == 0x0f) { /* Two-byte opcode. */
273 ++insn; 267 ++insn;
274 need_modrm = test_bit(*insn, twobyte_has_modrm); 268 need_modrm = test_bit(*insn,
275 } else { /* One-byte opcode. */ 269 (unsigned long *)twobyte_has_modrm);
276 need_modrm = test_bit(*insn, onebyte_has_modrm); 270 } else /* One-byte opcode. */
277 } 271 need_modrm = test_bit(*insn,
272 (unsigned long *)onebyte_has_modrm);
278 273
279 if (need_modrm) { 274 if (need_modrm) {
280 u8 modrm = *++insn; 275 u8 modrm = *++insn;
281 if ((modrm & 0xc7) == 0x05) { /* %rip+disp32 addressing mode */ 276 if ((modrm & 0xc7) == 0x05) { /* %rip+disp32 addressing mode */
282 /* Displacement follows ModRM byte. */ 277 /* Displacement follows ModRM byte. */
283 return (s32 *) ++insn; 278 ++insn;
279 /*
280 * The copied instruction uses the %rip-relative
281 * addressing mode. Adjust the displacement for the
282 * difference between the original location of this
283 * instruction and the location of the copy that will
284 * actually be run. The tricky bit here is making sure
285 * that the sign extension happens correctly in this
286 * calculation, since we need a signed 32-bit result to
287 * be sign-extended to 64 bits when it's added to the
288 * %rip value and yield the same 64-bit result that the
289 * sign-extension of the original signed 32-bit
290 * displacement would have given.
291 */
292 disp = (u8 *) p->addr + *((s32 *) insn) -
293 (u8 *) p->ainsn.insn;
294 BUG_ON((s64) (s32) disp != disp); /* Sanity check. */
295 *(s32 *)insn = (s32) disp;
284 } 296 }
285 } 297 }
286
287 /* No %rip-relative addressing mode here. */
288 return NULL;
289} 298}
290 299
291static void __kprobes arch_copy_kprobe(struct kprobe *p) 300static void __kprobes arch_copy_kprobe(struct kprobe *p)
292{ 301{
293 s32 *ripdisp; 302 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
294 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE); 303 fix_riprel(p);
295 ripdisp = is_riprel(p->ainsn.insn); 304 if (can_boost(p->addr))
296 if (ripdisp) {
297 /*
298 * The copied instruction uses the %rip-relative
299 * addressing mode. Adjust the displacement for the
300 * difference between the original location of this
301 * instruction and the location of the copy that will
302 * actually be run. The tricky bit here is making sure
303 * that the sign extension happens correctly in this
304 * calculation, since we need a signed 32-bit result to
305 * be sign-extended to 64 bits when it's added to the
306 * %rip value and yield the same 64-bit result that the
307 * sign-extension of the original signed 32-bit
308 * displacement would have given.
309 */
310 s64 disp = (u8 *) p->addr + *ripdisp - (u8 *) p->ainsn.insn;
311 BUG_ON((s64) (s32) disp != disp); /* Sanity check. */
312 *ripdisp = disp;
313 }
314 if (can_boost(p->addr)) {
315 p->ainsn.boostable = 0; 305 p->ainsn.boostable = 0;
316 } else { 306 else
317 p->ainsn.boostable = -1; 307 p->ainsn.boostable = -1;
318 } 308
319 p->opcode = *p->addr; 309 p->opcode = *p->addr;
320} 310}
321 311
312int __kprobes arch_prepare_kprobe(struct kprobe *p)
313{
314 /* insn: must be on special executable page on x86. */
315 p->ainsn.insn = get_insn_slot();
316 if (!p->ainsn.insn)
317 return -ENOMEM;
318 arch_copy_kprobe(p);
319 return 0;
320}
321
322void __kprobes arch_arm_kprobe(struct kprobe *p) 322void __kprobes arch_arm_kprobe(struct kprobe *p)
323{ 323{
324 text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); 324 text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1);
@@ -340,26 +340,26 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
340{ 340{
341 kcb->prev_kprobe.kp = kprobe_running(); 341 kcb->prev_kprobe.kp = kprobe_running();
342 kcb->prev_kprobe.status = kcb->kprobe_status; 342 kcb->prev_kprobe.status = kcb->kprobe_status;
343 kcb->prev_kprobe.old_rflags = kcb->kprobe_old_rflags; 343 kcb->prev_kprobe.old_flags = kcb->kprobe_old_flags;
344 kcb->prev_kprobe.saved_rflags = kcb->kprobe_saved_rflags; 344 kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags;
345} 345}
346 346
347static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) 347static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
348{ 348{
349 __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; 349 __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
350 kcb->kprobe_status = kcb->prev_kprobe.status; 350 kcb->kprobe_status = kcb->prev_kprobe.status;
351 kcb->kprobe_old_rflags = kcb->prev_kprobe.old_rflags; 351 kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags;
352 kcb->kprobe_saved_rflags = kcb->prev_kprobe.saved_rflags; 352 kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags;
353} 353}
354 354
355static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, 355static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
356 struct kprobe_ctlblk *kcb) 356 struct kprobe_ctlblk *kcb)
357{ 357{
358 __get_cpu_var(current_kprobe) = p; 358 __get_cpu_var(current_kprobe) = p;
359 kcb->kprobe_saved_rflags = kcb->kprobe_old_rflags 359 kcb->kprobe_saved_flags = kcb->kprobe_old_flags
360 = (regs->flags & (TF_MASK | IF_MASK)); 360 = (regs->flags & (TF_MASK | IF_MASK));
361 if (is_IF_modifier(p->ainsn.insn)) 361 if (is_IF_modifier(p->ainsn.insn))
362 kcb->kprobe_saved_rflags &= ~IF_MASK; 362 kcb->kprobe_saved_flags &= ~IF_MASK;
363} 363}
364 364
365static __always_inline void clear_btf(void) 365static __always_inline void clear_btf(void)
@@ -390,20 +390,27 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
390void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 390void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
391 struct pt_regs *regs) 391 struct pt_regs *regs)
392{ 392{
393 unsigned long *sara = (unsigned long *)regs->sp; 393 unsigned long *sara = stack_addr(regs);
394 394
395 ri->ret_addr = (kprobe_opcode_t *) *sara; 395 ri->ret_addr = (kprobe_opcode_t *) *sara;
396
396 /* Replace the return addr with trampoline addr */ 397 /* Replace the return addr with trampoline addr */
397 *sara = (unsigned long) &kretprobe_trampoline; 398 *sara = (unsigned long) &kretprobe_trampoline;
398} 399}
399 400
400int __kprobes kprobe_handler(struct pt_regs *regs) 401/*
402 * Interrupts are disabled on entry as trap3 is an interrupt gate and they
403 * remain disabled thorough out this function.
404 */
405static int __kprobes kprobe_handler(struct pt_regs *regs)
401{ 406{
402 struct kprobe *p; 407 struct kprobe *p;
403 int ret = 0; 408 int ret = 0;
404 kprobe_opcode_t *addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); 409 kprobe_opcode_t *addr;
405 struct kprobe_ctlblk *kcb; 410 struct kprobe_ctlblk *kcb;
406 411
412 addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
413
407 /* 414 /*
408 * We don't want to be preempted for the entire 415 * We don't want to be preempted for the entire
409 * duration of kprobe processing 416 * duration of kprobe processing
@@ -418,7 +425,7 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
418 if (kcb->kprobe_status == KPROBE_HIT_SS && 425 if (kcb->kprobe_status == KPROBE_HIT_SS &&
419 *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { 426 *p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
420 regs->flags &= ~TF_MASK; 427 regs->flags &= ~TF_MASK;
421 regs->flags |= kcb->kprobe_saved_rflags; 428 regs->flags |= kcb->kprobe_saved_flags;
422 goto no_kprobe; 429 goto no_kprobe;
423 } else if (kcb->kprobe_status == KPROBE_HIT_SSDONE) { 430 } else if (kcb->kprobe_status == KPROBE_HIT_SSDONE) {
424 /* TODO: Provide re-entrancy from 431 /* TODO: Provide re-entrancy from
@@ -429,22 +436,20 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
429 arch_disarm_kprobe(p); 436 arch_disarm_kprobe(p);
430 regs->ip = (unsigned long)p->addr; 437 regs->ip = (unsigned long)p->addr;
431 reset_current_kprobe(); 438 reset_current_kprobe();
432 ret = 1;
433 } else {
434 /* We have reentered the kprobe_handler(), since
435 * another probe was hit while within the
436 * handler. We here save the original kprobe
437 * variables and just single step on instruction
438 * of the new probe without calling any user
439 * handlers.
440 */
441 save_previous_kprobe(kcb);
442 set_current_kprobe(p, regs, kcb);
443 kprobes_inc_nmissed_count(p);
444 prepare_singlestep(p, regs);
445 kcb->kprobe_status = KPROBE_REENTER;
446 return 1; 439 return 1;
447 } 440 }
441 /* We have reentered the kprobe_handler(), since
442 * another probe was hit while within the handler.
443 * We here save the original kprobes variables and
444 * just single step on the instruction of the new probe
445 * without calling any user handlers.
446 */
447 save_previous_kprobe(kcb);
448 set_current_kprobe(p, regs, kcb);
449 kprobes_inc_nmissed_count(p);
450 prepare_singlestep(p, regs);
451 kcb->kprobe_status = KPROBE_REENTER;
452 return 1;
448 } else { 453 } else {
449 if (*addr != BREAKPOINT_INSTRUCTION) { 454 if (*addr != BREAKPOINT_INSTRUCTION) {
450 /* The breakpoint instruction was removed by 455 /* The breakpoint instruction was removed by
@@ -578,23 +583,23 @@ fastcall void * __kprobes trampoline_handler(struct pt_regs *regs)
578 INIT_HLIST_HEAD(&empty_rp); 583 INIT_HLIST_HEAD(&empty_rp);
579 spin_lock_irqsave(&kretprobe_lock, flags); 584 spin_lock_irqsave(&kretprobe_lock, flags);
580 head = kretprobe_inst_table_head(current); 585 head = kretprobe_inst_table_head(current);
581 /* fixup rt_regs */ 586 /* fixup registers */
582 regs->cs = __KERNEL_CS; 587 regs->cs = __KERNEL_CS;
583 regs->ip = trampoline_address; 588 regs->ip = trampoline_address;
584 regs->orig_ax = 0xffffffffffffffff; 589 regs->orig_ax = ~0UL;
585 590
586 /* 591 /*
587 * It is possible to have multiple instances associated with a given 592 * It is possible to have multiple instances associated with a given
588 * task either because an multiple functions in the call path 593 * task either because multiple functions in the call path have
589 * have a return probe installed on them, and/or more then one return 594 * return probes installed on them, and/or more then one
590 * return probe was registered for a target function. 595 * return probe was registered for a target function.
591 * 596 *
592 * We can handle this because: 597 * We can handle this because:
593 * - instances are always inserted at the head of the list 598 * - instances are always pushed into the head of the list
594 * - when multiple return probes are registered for the same 599 * - when multiple return probes are registered for the same
595 * function, the first instance's ret_addr will point to the 600 * function, the (chronologically) first instance's ret_addr
596 * real return address, and all the rest will point to 601 * will be the real return address, and all the rest will
597 * kretprobe_trampoline 602 * point to kretprobe_trampoline.
598 */ 603 */
599 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { 604 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
600 if (ri->task != current) 605 if (ri->task != current)
@@ -661,9 +666,9 @@ fastcall void * __kprobes trampoline_handler(struct pt_regs *regs)
661static void __kprobes resume_execution(struct kprobe *p, 666static void __kprobes resume_execution(struct kprobe *p,
662 struct pt_regs *regs, struct kprobe_ctlblk *kcb) 667 struct pt_regs *regs, struct kprobe_ctlblk *kcb)
663{ 668{
664 unsigned long *tos = (unsigned long *)regs->sp; 669 unsigned long *tos = stack_addr(regs);
665 unsigned long copy_rip = (unsigned long)p->ainsn.insn; 670 unsigned long copy_ip = (unsigned long)p->ainsn.insn;
666 unsigned long orig_rip = (unsigned long)p->addr; 671 unsigned long orig_ip = (unsigned long)p->addr;
667 kprobe_opcode_t *insn = p->ainsn.insn; 672 kprobe_opcode_t *insn = p->ainsn.insn;
668 673
669 /*skip the REX prefix*/ 674 /*skip the REX prefix*/
@@ -674,7 +679,7 @@ static void __kprobes resume_execution(struct kprobe *p,
674 switch (*insn) { 679 switch (*insn) {
675 case 0x9c: /* pushfl */ 680 case 0x9c: /* pushfl */
676 *tos &= ~(TF_MASK | IF_MASK); 681 *tos &= ~(TF_MASK | IF_MASK);
677 *tos |= kcb->kprobe_old_rflags; 682 *tos |= kcb->kprobe_old_flags;
678 break; 683 break;
679 case 0xc2: /* iret/ret/lret */ 684 case 0xc2: /* iret/ret/lret */
680 case 0xc3: 685 case 0xc3:
@@ -686,18 +691,23 @@ static void __kprobes resume_execution(struct kprobe *p,
686 p->ainsn.boostable = 1; 691 p->ainsn.boostable = 1;
687 goto no_change; 692 goto no_change;
688 case 0xe8: /* call relative - Fix return addr */ 693 case 0xe8: /* call relative - Fix return addr */
689 *tos = orig_rip + (*tos - copy_rip); 694 *tos = orig_ip + (*tos - copy_ip);
690 break; 695 break;
691 case 0xff: 696 case 0xff:
692 if ((insn[1] & 0x30) == 0x10) { 697 if ((insn[1] & 0x30) == 0x10) {
693 /* call absolute, indirect */ 698 /*
694 /* Fix return addr; ip is correct. */ 699 * call absolute, indirect
695 /* not boostable */ 700 * Fix return addr; ip is correct.
696 *tos = orig_rip + (*tos - copy_rip); 701 * But this is not boostable
702 */
703 *tos = orig_ip + (*tos - copy_ip);
697 goto no_change; 704 goto no_change;
698 } else if (((insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */ 705 } else if (((insn[1] & 0x31) == 0x20) ||
699 ((insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */ 706 ((insn[1] & 0x31) == 0x21)) {
700 /* ip is correct. And this is boostable */ 707 /*
708 * jmp near and far, absolute indirect
709 * ip is correct. And this is boostable
710 */
701 p->ainsn.boostable = 1; 711 p->ainsn.boostable = 1;
702 goto no_change; 712 goto no_change;
703 } 713 }
@@ -706,21 +716,21 @@ static void __kprobes resume_execution(struct kprobe *p,
706 } 716 }
707 717
708 if (p->ainsn.boostable == 0) { 718 if (p->ainsn.boostable == 0) {
709 if ((regs->ip > copy_rip) && 719 if ((regs->ip > copy_ip) &&
710 (regs->ip - copy_rip) + 5 < MAX_INSN_SIZE) { 720 (regs->ip - copy_ip) + 5 < MAX_INSN_SIZE) {
711 /* 721 /*
712 * These instructions can be executed directly if it 722 * These instructions can be executed directly if it
713 * jumps back to correct address. 723 * jumps back to correct address.
714 */ 724 */
715 set_jmp_op((void *)regs->ip, 725 set_jmp_op((void *)regs->ip,
716 (void *)orig_rip + (regs->ip - copy_rip)); 726 (void *)orig_ip + (regs->ip - copy_ip));
717 p->ainsn.boostable = 1; 727 p->ainsn.boostable = 1;
718 } else { 728 } else {
719 p->ainsn.boostable = -1; 729 p->ainsn.boostable = -1;
720 } 730 }
721 } 731 }
722 732
723 regs->ip = orig_rip + (regs->ip - copy_rip); 733 regs->ip += orig_ip - copy_ip;
724 734
725no_change: 735no_change:
726 restore_btf(); 736 restore_btf();
@@ -728,7 +738,11 @@ no_change:
728 return; 738 return;
729} 739}
730 740
731int __kprobes post_kprobe_handler(struct pt_regs *regs) 741/*
742 * Interrupts are disabled on entry as trap1 is an interrupt gate and they
743 * remain disabled thoroughout this function.
744 */
745static int __kprobes post_kprobe_handler(struct pt_regs *regs)
732{ 746{
733 struct kprobe *cur = kprobe_running(); 747 struct kprobe *cur = kprobe_running();
734 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 748 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -742,10 +756,10 @@ int __kprobes post_kprobe_handler(struct pt_regs *regs)
742 } 756 }
743 757
744 resume_execution(cur, regs, kcb); 758 resume_execution(cur, regs, kcb);
745 regs->flags |= kcb->kprobe_saved_rflags; 759 regs->flags |= kcb->kprobe_saved_flags;
746 trace_hardirqs_fixup_flags(regs->flags); 760 trace_hardirqs_fixup_flags(regs->flags);
747 761
748 /* Restore the original saved kprobes variables and continue. */ 762 /* Restore back the original saved kprobes variables and continue. */
749 if (kcb->kprobe_status == KPROBE_REENTER) { 763 if (kcb->kprobe_status == KPROBE_REENTER) {
750 restore_previous_kprobe(kcb); 764 restore_previous_kprobe(kcb);
751 goto out; 765 goto out;
@@ -782,7 +796,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
782 * normal page fault. 796 * normal page fault.
783 */ 797 */
784 regs->ip = (unsigned long)cur->addr; 798 regs->ip = (unsigned long)cur->addr;
785 regs->flags |= kcb->kprobe_old_rflags; 799 regs->flags |= kcb->kprobe_old_flags;
786 if (kcb->kprobe_status == KPROBE_REENTER) 800 if (kcb->kprobe_status == KPROBE_REENTER)
787 restore_previous_kprobe(kcb); 801 restore_previous_kprobe(kcb);
788 else 802 else
@@ -793,7 +807,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
793 case KPROBE_HIT_SSDONE: 807 case KPROBE_HIT_SSDONE:
794 /* 808 /*
795 * We increment the nmissed count for accounting, 809 * We increment the nmissed count for accounting,
796 * we can also use npre/npostfault count for accouting 810 * we can also use npre/npostfault count for accounting
797 * these specific fault cases. 811 * these specific fault cases.
798 */ 812 */
799 kprobes_inc_nmissed_count(cur); 813 kprobes_inc_nmissed_count(cur);
@@ -819,7 +833,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
819 } 833 }
820 834
821 /* 835 /*
822 * fixup() could not handle it, 836 * fixup routine could not handle it,
823 * Let do_page_fault() fix it. 837 * Let do_page_fault() fix it.
824 */ 838 */
825 break; 839 break;
@@ -838,7 +852,7 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
838 struct die_args *args = (struct die_args *)data; 852 struct die_args *args = (struct die_args *)data;
839 int ret = NOTIFY_DONE; 853 int ret = NOTIFY_DONE;
840 854
841 if (args->regs && user_mode(args->regs)) 855 if (args->regs && user_mode_vm(args->regs))
842 return ret; 856 return ret;
843 857
844 switch (val) { 858 switch (val) {
@@ -871,8 +885,9 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
871 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 885 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
872 886
873 kcb->jprobe_saved_regs = *regs; 887 kcb->jprobe_saved_regs = *regs;
874 kcb->jprobe_saved_rsp = (long *) regs->sp; 888 kcb->jprobe_saved_sp = stack_addr(regs);
875 addr = (unsigned long)(kcb->jprobe_saved_rsp); 889 addr = (unsigned long)(kcb->jprobe_saved_sp);
890
876 /* 891 /*
877 * As Linus pointed out, gcc assumes that the callee 892 * As Linus pointed out, gcc assumes that the callee
878 * owns the argument space and could overwrite it, e.g. 893 * owns the argument space and could overwrite it, e.g.
@@ -897,21 +912,20 @@ void __kprobes jprobe_return(void)
897 " .globl jprobe_return_end \n" 912 " .globl jprobe_return_end \n"
898 " jprobe_return_end: \n" 913 " jprobe_return_end: \n"
899 " nop \n"::"b" 914 " nop \n"::"b"
900 (kcb->jprobe_saved_rsp):"memory"); 915 (kcb->jprobe_saved_sp):"memory");
901} 916}
902 917
903int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) 918int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
904{ 919{
905 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 920 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
906 u8 *addr = (u8 *) (regs->ip - 1); 921 u8 *addr = (u8 *) (regs->ip - 1);
907 unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_rsp);
908 struct jprobe *jp = container_of(p, struct jprobe, kp); 922 struct jprobe *jp = container_of(p, struct jprobe, kp);
909 923
910 if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) { 924 if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
911 if ((unsigned long *)regs->sp != kcb->jprobe_saved_rsp) { 925 if (stack_addr(regs) != kcb->jprobe_saved_sp) {
912 struct pt_regs *saved_regs = &kcb->jprobe_saved_regs; 926 struct pt_regs *saved_regs = &kcb->jprobe_saved_regs;
913 printk("current sp %p does not match saved sp %p\n", 927 printk("current sp %p does not match saved sp %p\n",
914 (long *)regs->sp, kcb->jprobe_saved_rsp); 928 stack_addr(regs), kcb->jprobe_saved_sp);
915 printk("Saved registers for jprobe %p\n", jp); 929 printk("Saved registers for jprobe %p\n", jp);
916 show_registers(saved_regs); 930 show_registers(saved_regs);
917 printk("Current registers\n"); 931 printk("Current registers\n");
@@ -919,8 +933,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
919 BUG(); 933 BUG();
920 } 934 }
921 *regs = kcb->jprobe_saved_regs; 935 *regs = kcb->jprobe_saved_regs;
922 memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack, 936 memcpy((kprobe_opcode_t *)(kcb->jprobe_saved_sp),
923 MIN_STACK_SIZE(stack_addr)); 937 kcb->jprobes_stack,
938 MIN_STACK_SIZE(kcb->jprobe_saved_sp));
924 preempt_enable_no_resched(); 939 preempt_enable_no_resched();
925 return 1; 940 return 1;
926 } 941 }
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 8aed912b04ec..db8d748814e4 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -25,7 +25,6 @@
25#include <linux/kprobes.h> 25#include <linux/kprobes.h>
26#include <linux/uaccess.h> 26#include <linux/uaccess.h>
27#include <linux/kdebug.h> 27#include <linux/kdebug.h>
28#include <linux/kprobes.h>
29 28
30#include <asm/system.h> 29#include <asm/system.h>
31#include <asm/desc.h> 30#include <asm/desc.h>
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 88a7abda29ce..162050d4e5a3 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -25,7 +25,6 @@
25#include <linux/kprobes.h> 25#include <linux/kprobes.h>
26#include <linux/uaccess.h> 26#include <linux/uaccess.h>
27#include <linux/kdebug.h> 27#include <linux/kdebug.h>
28#include <linux/kprobes.h>
29 28
30#include <asm/system.h> 29#include <asm/system.h>
31#include <asm/pgalloc.h> 30#include <asm/pgalloc.h>