diff options
Diffstat (limited to 'arch/x86_64/kernel/ptrace.c')
-rw-r--r-- | arch/x86_64/kernel/ptrace.c | 547 |
1 files changed, 547 insertions, 0 deletions
diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c new file mode 100644 index 000000000000..0b7b101debdf --- /dev/null +++ b/arch/x86_64/kernel/ptrace.c | |||
@@ -0,0 +1,547 @@ | |||
1 | /* ptrace.c */ | ||
2 | /* By Ross Biro 1/23/92 */ | ||
3 | /* | ||
4 | * Pentium III FXSR, SSE support | ||
5 | * Gareth Hughes <gareth@valinux.com>, May 2000 | ||
6 | * | ||
7 | * x86-64 port 2000-2002 Andi Kleen | ||
8 | */ | ||
9 | |||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/sched.h> | ||
12 | #include <linux/mm.h> | ||
13 | #include <linux/smp.h> | ||
14 | #include <linux/smp_lock.h> | ||
15 | #include <linux/errno.h> | ||
16 | #include <linux/ptrace.h> | ||
17 | #include <linux/user.h> | ||
18 | #include <linux/security.h> | ||
19 | #include <linux/audit.h> | ||
20 | #include <linux/seccomp.h> | ||
21 | |||
22 | #include <asm/uaccess.h> | ||
23 | #include <asm/pgtable.h> | ||
24 | #include <asm/system.h> | ||
25 | #include <asm/processor.h> | ||
26 | #include <asm/i387.h> | ||
27 | #include <asm/debugreg.h> | ||
28 | #include <asm/ldt.h> | ||
29 | #include <asm/desc.h> | ||
30 | #include <asm/proto.h> | ||
31 | #include <asm/ia32.h> | ||
32 | |||
33 | /* | ||
34 | * does not yet catch signals sent when the child dies. | ||
35 | * in exit.c or in signal.c. | ||
36 | */ | ||
37 | |||
38 | /* determines which flags the user has access to. */ | ||
39 | /* 1 = access 0 = no access */ | ||
40 | #define FLAG_MASK 0x44dd5UL | ||
41 | |||
42 | /* set's the trap flag. */ | ||
43 | #define TRAP_FLAG 0x100UL | ||
44 | |||
45 | /* | ||
46 | * eflags and offset of eflags on child stack.. | ||
47 | */ | ||
48 | #define EFLAGS offsetof(struct pt_regs, eflags) | ||
49 | #define EFL_OFFSET ((int)(EFLAGS-sizeof(struct pt_regs))) | ||
50 | |||
51 | /* | ||
52 | * this routine will get a word off of the processes privileged stack. | ||
53 | * the offset is how far from the base addr as stored in the TSS. | ||
54 | * this routine assumes that all the privileged stacks are in our | ||
55 | * data space. | ||
56 | */ | ||
57 | static inline unsigned long get_stack_long(struct task_struct *task, int offset) | ||
58 | { | ||
59 | unsigned char *stack; | ||
60 | |||
61 | stack = (unsigned char *)task->thread.rsp0; | ||
62 | stack += offset; | ||
63 | return (*((unsigned long *)stack)); | ||
64 | } | ||
65 | |||
66 | /* | ||
67 | * this routine will put a word on the processes privileged stack. | ||
68 | * the offset is how far from the base addr as stored in the TSS. | ||
69 | * this routine assumes that all the privileged stacks are in our | ||
70 | * data space. | ||
71 | */ | ||
72 | static inline long put_stack_long(struct task_struct *task, int offset, | ||
73 | unsigned long data) | ||
74 | { | ||
75 | unsigned char * stack; | ||
76 | |||
77 | stack = (unsigned char *) task->thread.rsp0; | ||
78 | stack += offset; | ||
79 | *(unsigned long *) stack = data; | ||
80 | return 0; | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * Called by kernel/ptrace.c when detaching.. | ||
85 | * | ||
86 | * Make sure the single step bit is not set. | ||
87 | */ | ||
88 | void ptrace_disable(struct task_struct *child) | ||
89 | { | ||
90 | long tmp; | ||
91 | |||
92 | clear_tsk_thread_flag(child, TIF_SINGLESTEP); | ||
93 | tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG; | ||
94 | put_stack_long(child, EFL_OFFSET, tmp); | ||
95 | } | ||
96 | |||
97 | static int putreg(struct task_struct *child, | ||
98 | unsigned long regno, unsigned long value) | ||
99 | { | ||
100 | unsigned long tmp; | ||
101 | |||
102 | /* Some code in the 64bit emulation may not be 64bit clean. | ||
103 | Don't take any chances. */ | ||
104 | if (test_tsk_thread_flag(child, TIF_IA32)) | ||
105 | value &= 0xffffffff; | ||
106 | switch (regno) { | ||
107 | case offsetof(struct user_regs_struct,fs): | ||
108 | if (value && (value & 3) != 3) | ||
109 | return -EIO; | ||
110 | child->thread.fsindex = value & 0xffff; | ||
111 | return 0; | ||
112 | case offsetof(struct user_regs_struct,gs): | ||
113 | if (value && (value & 3) != 3) | ||
114 | return -EIO; | ||
115 | child->thread.gsindex = value & 0xffff; | ||
116 | return 0; | ||
117 | case offsetof(struct user_regs_struct,ds): | ||
118 | if (value && (value & 3) != 3) | ||
119 | return -EIO; | ||
120 | child->thread.ds = value & 0xffff; | ||
121 | return 0; | ||
122 | case offsetof(struct user_regs_struct,es): | ||
123 | if (value && (value & 3) != 3) | ||
124 | return -EIO; | ||
125 | child->thread.es = value & 0xffff; | ||
126 | return 0; | ||
127 | case offsetof(struct user_regs_struct,ss): | ||
128 | if ((value & 3) != 3) | ||
129 | return -EIO; | ||
130 | value &= 0xffff; | ||
131 | return 0; | ||
132 | case offsetof(struct user_regs_struct,fs_base): | ||
133 | if (!((value >> 48) == 0 || (value >> 48) == 0xffff)) | ||
134 | return -EIO; | ||
135 | child->thread.fs = value; | ||
136 | return 0; | ||
137 | case offsetof(struct user_regs_struct,gs_base): | ||
138 | if (!((value >> 48) == 0 || (value >> 48) == 0xffff)) | ||
139 | return -EIO; | ||
140 | child->thread.gs = value; | ||
141 | return 0; | ||
142 | case offsetof(struct user_regs_struct, eflags): | ||
143 | value &= FLAG_MASK; | ||
144 | tmp = get_stack_long(child, EFL_OFFSET); | ||
145 | tmp &= ~FLAG_MASK; | ||
146 | value |= tmp; | ||
147 | break; | ||
148 | case offsetof(struct user_regs_struct,cs): | ||
149 | if ((value & 3) != 3) | ||
150 | return -EIO; | ||
151 | value &= 0xffff; | ||
152 | break; | ||
153 | } | ||
154 | put_stack_long(child, regno - sizeof(struct pt_regs), value); | ||
155 | return 0; | ||
156 | } | ||
157 | |||
158 | static unsigned long getreg(struct task_struct *child, unsigned long regno) | ||
159 | { | ||
160 | unsigned long val; | ||
161 | switch (regno) { | ||
162 | case offsetof(struct user_regs_struct, fs): | ||
163 | return child->thread.fsindex; | ||
164 | case offsetof(struct user_regs_struct, gs): | ||
165 | return child->thread.gsindex; | ||
166 | case offsetof(struct user_regs_struct, ds): | ||
167 | return child->thread.ds; | ||
168 | case offsetof(struct user_regs_struct, es): | ||
169 | return child->thread.es; | ||
170 | case offsetof(struct user_regs_struct, fs_base): | ||
171 | return child->thread.fs; | ||
172 | case offsetof(struct user_regs_struct, gs_base): | ||
173 | return child->thread.gs; | ||
174 | default: | ||
175 | regno = regno - sizeof(struct pt_regs); | ||
176 | val = get_stack_long(child, regno); | ||
177 | if (test_tsk_thread_flag(child, TIF_IA32)) | ||
178 | val &= 0xffffffff; | ||
179 | return val; | ||
180 | } | ||
181 | |||
182 | } | ||
183 | |||
184 | asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data) | ||
185 | { | ||
186 | struct task_struct *child; | ||
187 | long i, ret; | ||
188 | unsigned ui; | ||
189 | |||
190 | /* This lock_kernel fixes a subtle race with suid exec */ | ||
191 | lock_kernel(); | ||
192 | ret = -EPERM; | ||
193 | if (request == PTRACE_TRACEME) { | ||
194 | /* are we already being traced? */ | ||
195 | if (current->ptrace & PT_PTRACED) | ||
196 | goto out; | ||
197 | ret = security_ptrace(current->parent, current); | ||
198 | if (ret) | ||
199 | goto out; | ||
200 | /* set the ptrace bit in the process flags. */ | ||
201 | current->ptrace |= PT_PTRACED; | ||
202 | ret = 0; | ||
203 | goto out; | ||
204 | } | ||
205 | ret = -ESRCH; | ||
206 | read_lock(&tasklist_lock); | ||
207 | child = find_task_by_pid(pid); | ||
208 | if (child) | ||
209 | get_task_struct(child); | ||
210 | read_unlock(&tasklist_lock); | ||
211 | if (!child) | ||
212 | goto out; | ||
213 | |||
214 | ret = -EPERM; | ||
215 | if (pid == 1) /* you may not mess with init */ | ||
216 | goto out_tsk; | ||
217 | |||
218 | if (request == PTRACE_ATTACH) { | ||
219 | ret = ptrace_attach(child); | ||
220 | goto out_tsk; | ||
221 | } | ||
222 | ret = ptrace_check_attach(child, request == PTRACE_KILL); | ||
223 | if (ret < 0) | ||
224 | goto out_tsk; | ||
225 | |||
226 | switch (request) { | ||
227 | /* when I and D space are separate, these will need to be fixed. */ | ||
228 | case PTRACE_PEEKTEXT: /* read word at location addr. */ | ||
229 | case PTRACE_PEEKDATA: { | ||
230 | unsigned long tmp; | ||
231 | int copied; | ||
232 | |||
233 | copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); | ||
234 | ret = -EIO; | ||
235 | if (copied != sizeof(tmp)) | ||
236 | break; | ||
237 | ret = put_user(tmp,(unsigned long __user *) data); | ||
238 | break; | ||
239 | } | ||
240 | |||
241 | /* read the word at location addr in the USER area. */ | ||
242 | case PTRACE_PEEKUSR: { | ||
243 | unsigned long tmp; | ||
244 | |||
245 | ret = -EIO; | ||
246 | if ((addr & 7) || | ||
247 | addr > sizeof(struct user) - 7) | ||
248 | break; | ||
249 | |||
250 | switch (addr) { | ||
251 | case 0 ... sizeof(struct user_regs_struct): | ||
252 | tmp = getreg(child, addr); | ||
253 | break; | ||
254 | case offsetof(struct user, u_debugreg[0]): | ||
255 | tmp = child->thread.debugreg0; | ||
256 | break; | ||
257 | case offsetof(struct user, u_debugreg[1]): | ||
258 | tmp = child->thread.debugreg1; | ||
259 | break; | ||
260 | case offsetof(struct user, u_debugreg[2]): | ||
261 | tmp = child->thread.debugreg2; | ||
262 | break; | ||
263 | case offsetof(struct user, u_debugreg[3]): | ||
264 | tmp = child->thread.debugreg3; | ||
265 | break; | ||
266 | case offsetof(struct user, u_debugreg[6]): | ||
267 | tmp = child->thread.debugreg6; | ||
268 | break; | ||
269 | case offsetof(struct user, u_debugreg[7]): | ||
270 | tmp = child->thread.debugreg7; | ||
271 | break; | ||
272 | default: | ||
273 | tmp = 0; | ||
274 | break; | ||
275 | } | ||
276 | ret = put_user(tmp,(unsigned long __user *) data); | ||
277 | break; | ||
278 | } | ||
279 | |||
280 | /* when I and D space are separate, this will have to be fixed. */ | ||
281 | case PTRACE_POKETEXT: /* write the word at location addr. */ | ||
282 | case PTRACE_POKEDATA: | ||
283 | ret = 0; | ||
284 | if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) | ||
285 | break; | ||
286 | ret = -EIO; | ||
287 | break; | ||
288 | |||
289 | case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ | ||
290 | ret = -EIO; | ||
291 | if ((addr & 7) || | ||
292 | addr > sizeof(struct user) - 7) | ||
293 | break; | ||
294 | |||
295 | switch (addr) { | ||
296 | case 0 ... sizeof(struct user_regs_struct): | ||
297 | ret = putreg(child, addr, data); | ||
298 | break; | ||
299 | /* Disallows to set a breakpoint into the vsyscall */ | ||
300 | case offsetof(struct user, u_debugreg[0]): | ||
301 | if (data >= TASK_SIZE-7) break; | ||
302 | child->thread.debugreg0 = data; | ||
303 | ret = 0; | ||
304 | break; | ||
305 | case offsetof(struct user, u_debugreg[1]): | ||
306 | if (data >= TASK_SIZE-7) break; | ||
307 | child->thread.debugreg1 = data; | ||
308 | ret = 0; | ||
309 | break; | ||
310 | case offsetof(struct user, u_debugreg[2]): | ||
311 | if (data >= TASK_SIZE-7) break; | ||
312 | child->thread.debugreg2 = data; | ||
313 | ret = 0; | ||
314 | break; | ||
315 | case offsetof(struct user, u_debugreg[3]): | ||
316 | if (data >= TASK_SIZE-7) break; | ||
317 | child->thread.debugreg3 = data; | ||
318 | ret = 0; | ||
319 | break; | ||
320 | case offsetof(struct user, u_debugreg[6]): | ||
321 | if (data >> 32) | ||
322 | break; | ||
323 | child->thread.debugreg6 = data; | ||
324 | ret = 0; | ||
325 | break; | ||
326 | case offsetof(struct user, u_debugreg[7]): | ||
327 | /* See arch/i386/kernel/ptrace.c for an explanation of | ||
328 | * this awkward check.*/ | ||
329 | data &= ~DR_CONTROL_RESERVED; | ||
330 | for(i=0; i<4; i++) | ||
331 | if ((0x5454 >> ((data >> (16 + 4*i)) & 0xf)) & 1) | ||
332 | break; | ||
333 | if (i == 4) { | ||
334 | child->thread.debugreg7 = data; | ||
335 | ret = 0; | ||
336 | } | ||
337 | break; | ||
338 | } | ||
339 | break; | ||
340 | case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ | ||
341 | case PTRACE_CONT: { /* restart after signal. */ | ||
342 | long tmp; | ||
343 | |||
344 | ret = -EIO; | ||
345 | if ((unsigned long) data > _NSIG) | ||
346 | break; | ||
347 | if (request == PTRACE_SYSCALL) | ||
348 | set_tsk_thread_flag(child,TIF_SYSCALL_TRACE); | ||
349 | else | ||
350 | clear_tsk_thread_flag(child,TIF_SYSCALL_TRACE); | ||
351 | clear_tsk_thread_flag(child, TIF_SINGLESTEP); | ||
352 | child->exit_code = data; | ||
353 | /* make sure the single step bit is not set. */ | ||
354 | tmp = get_stack_long(child, EFL_OFFSET); | ||
355 | tmp &= ~TRAP_FLAG; | ||
356 | put_stack_long(child, EFL_OFFSET,tmp); | ||
357 | wake_up_process(child); | ||
358 | ret = 0; | ||
359 | break; | ||
360 | } | ||
361 | |||
362 | #ifdef CONFIG_IA32_EMULATION | ||
363 | /* This makes only sense with 32bit programs. Allow a | ||
364 | 64bit debugger to fully examine them too. Better | ||
365 | don't use it against 64bit processes, use | ||
366 | PTRACE_ARCH_PRCTL instead. */ | ||
367 | case PTRACE_SET_THREAD_AREA: { | ||
368 | struct user_desc __user *p; | ||
369 | int old; | ||
370 | p = (struct user_desc __user *)data; | ||
371 | get_user(old, &p->entry_number); | ||
372 | put_user(addr, &p->entry_number); | ||
373 | ret = do_set_thread_area(&child->thread, p); | ||
374 | put_user(old, &p->entry_number); | ||
375 | break; | ||
376 | case PTRACE_GET_THREAD_AREA: | ||
377 | p = (struct user_desc __user *)data; | ||
378 | get_user(old, &p->entry_number); | ||
379 | put_user(addr, &p->entry_number); | ||
380 | ret = do_get_thread_area(&child->thread, p); | ||
381 | put_user(old, &p->entry_number); | ||
382 | break; | ||
383 | } | ||
384 | #endif | ||
385 | /* normal 64bit interface to access TLS data. | ||
386 | Works just like arch_prctl, except that the arguments | ||
387 | are reversed. */ | ||
388 | case PTRACE_ARCH_PRCTL: | ||
389 | ret = do_arch_prctl(child, data, addr); | ||
390 | break; | ||
391 | |||
392 | /* | ||
393 | * make the child exit. Best I can do is send it a sigkill. | ||
394 | * perhaps it should be put in the status that it wants to | ||
395 | * exit. | ||
396 | */ | ||
397 | case PTRACE_KILL: { | ||
398 | long tmp; | ||
399 | |||
400 | ret = 0; | ||
401 | if (child->exit_state == EXIT_ZOMBIE) /* already dead */ | ||
402 | break; | ||
403 | clear_tsk_thread_flag(child, TIF_SINGLESTEP); | ||
404 | child->exit_code = SIGKILL; | ||
405 | /* make sure the single step bit is not set. */ | ||
406 | tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG; | ||
407 | put_stack_long(child, EFL_OFFSET, tmp); | ||
408 | wake_up_process(child); | ||
409 | break; | ||
410 | } | ||
411 | |||
412 | case PTRACE_SINGLESTEP: { /* set the trap flag. */ | ||
413 | long tmp; | ||
414 | |||
415 | ret = -EIO; | ||
416 | if ((unsigned long) data > _NSIG) | ||
417 | break; | ||
418 | clear_tsk_thread_flag(child,TIF_SYSCALL_TRACE); | ||
419 | if ((child->ptrace & PT_DTRACE) == 0) { | ||
420 | /* Spurious delayed TF traps may occur */ | ||
421 | child->ptrace |= PT_DTRACE; | ||
422 | } | ||
423 | tmp = get_stack_long(child, EFL_OFFSET) | TRAP_FLAG; | ||
424 | put_stack_long(child, EFL_OFFSET, tmp); | ||
425 | set_tsk_thread_flag(child, TIF_SINGLESTEP); | ||
426 | child->exit_code = data; | ||
427 | /* give it a chance to run. */ | ||
428 | wake_up_process(child); | ||
429 | ret = 0; | ||
430 | break; | ||
431 | } | ||
432 | |||
433 | case PTRACE_DETACH: | ||
434 | /* detach a process that was attached. */ | ||
435 | ret = ptrace_detach(child, data); | ||
436 | break; | ||
437 | |||
438 | case PTRACE_GETREGS: { /* Get all gp regs from the child. */ | ||
439 | if (!access_ok(VERIFY_WRITE, (unsigned __user *)data, | ||
440 | sizeof(struct user_regs_struct))) { | ||
441 | ret = -EIO; | ||
442 | break; | ||
443 | } | ||
444 | ret = 0; | ||
445 | for (ui = 0; ui < sizeof(struct user_regs_struct); ui += sizeof(long)) { | ||
446 | ret |= __put_user(getreg(child, ui),(unsigned long __user *) data); | ||
447 | data += sizeof(long); | ||
448 | } | ||
449 | break; | ||
450 | } | ||
451 | |||
452 | case PTRACE_SETREGS: { /* Set all gp regs in the child. */ | ||
453 | unsigned long tmp; | ||
454 | if (!access_ok(VERIFY_READ, (unsigned __user *)data, | ||
455 | sizeof(struct user_regs_struct))) { | ||
456 | ret = -EIO; | ||
457 | break; | ||
458 | } | ||
459 | ret = 0; | ||
460 | for (ui = 0; ui < sizeof(struct user_regs_struct); ui += sizeof(long)) { | ||
461 | ret |= __get_user(tmp, (unsigned long __user *) data); | ||
462 | putreg(child, ui, tmp); | ||
463 | data += sizeof(long); | ||
464 | } | ||
465 | break; | ||
466 | } | ||
467 | |||
468 | case PTRACE_GETFPREGS: { /* Get the child extended FPU state. */ | ||
469 | if (!access_ok(VERIFY_WRITE, (unsigned __user *)data, | ||
470 | sizeof(struct user_i387_struct))) { | ||
471 | ret = -EIO; | ||
472 | break; | ||
473 | } | ||
474 | ret = get_fpregs((struct user_i387_struct __user *)data, child); | ||
475 | break; | ||
476 | } | ||
477 | |||
478 | case PTRACE_SETFPREGS: { /* Set the child extended FPU state. */ | ||
479 | if (!access_ok(VERIFY_READ, (unsigned __user *)data, | ||
480 | sizeof(struct user_i387_struct))) { | ||
481 | ret = -EIO; | ||
482 | break; | ||
483 | } | ||
484 | set_stopped_child_used_math(child); | ||
485 | ret = set_fpregs(child, (struct user_i387_struct __user *)data); | ||
486 | break; | ||
487 | } | ||
488 | |||
489 | default: | ||
490 | ret = ptrace_request(child, request, addr, data); | ||
491 | break; | ||
492 | } | ||
493 | out_tsk: | ||
494 | put_task_struct(child); | ||
495 | out: | ||
496 | unlock_kernel(); | ||
497 | return ret; | ||
498 | } | ||
499 | |||
500 | static void syscall_trace(struct pt_regs *regs) | ||
501 | { | ||
502 | |||
503 | #if 0 | ||
504 | printk("trace %s rip %lx rsp %lx rax %d origrax %d caller %lx tiflags %x ptrace %x\n", | ||
505 | current->comm, | ||
506 | regs->rip, regs->rsp, regs->rax, regs->orig_rax, __builtin_return_address(0), | ||
507 | current_thread_info()->flags, current->ptrace); | ||
508 | #endif | ||
509 | |||
510 | ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) | ||
511 | ? 0x80 : 0)); | ||
512 | /* | ||
513 | * this isn't the same as continuing with a signal, but it will do | ||
514 | * for normal use. strace only continues with a signal if the | ||
515 | * stopping signal is not SIGTRAP. -brl | ||
516 | */ | ||
517 | if (current->exit_code) { | ||
518 | send_sig(current->exit_code, current, 1); | ||
519 | current->exit_code = 0; | ||
520 | } | ||
521 | } | ||
522 | |||
523 | asmlinkage void syscall_trace_enter(struct pt_regs *regs) | ||
524 | { | ||
525 | /* do the secure computing check first */ | ||
526 | secure_computing(regs->orig_rax); | ||
527 | |||
528 | if (unlikely(current->audit_context)) | ||
529 | audit_syscall_entry(current, regs->orig_rax, | ||
530 | regs->rdi, regs->rsi, | ||
531 | regs->rdx, regs->r10); | ||
532 | |||
533 | if (test_thread_flag(TIF_SYSCALL_TRACE) | ||
534 | && (current->ptrace & PT_PTRACED)) | ||
535 | syscall_trace(regs); | ||
536 | } | ||
537 | |||
538 | asmlinkage void syscall_trace_leave(struct pt_regs *regs) | ||
539 | { | ||
540 | if (unlikely(current->audit_context)) | ||
541 | audit_syscall_exit(current, regs->rax); | ||
542 | |||
543 | if ((test_thread_flag(TIF_SYSCALL_TRACE) | ||
544 | || test_thread_flag(TIF_SINGLESTEP)) | ||
545 | && (current->ptrace & PT_PTRACED)) | ||
546 | syscall_trace(regs); | ||
547 | } | ||