aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events
diff options
context:
space:
mode:
authorSrikar Dronamraju <srikar@linux.vnet.ibm.com>2012-03-13 14:00:11 -0400
committerIngo Molnar <mingo@elte.hu>2012-03-14 02:41:36 -0400
commit0326f5a94ddea33fa331b2519f4172f4fb387baa (patch)
tree5485c637754a126c90852e5285842e8462d2826a /kernel/events
parentef15eda98217f5183f457e7a2de8b79555ef908b (diff)
uprobes/core: Handle breakpoint and singlestep exceptions
Uprobes uses exception notifiers to get to know if a thread hit a breakpoint or a singlestep exception. When a thread hits a uprobe or is singlestepping post a uprobe hit, the uprobe exception notifier sets its TIF_UPROBE bit, which will then be checked on its return to userspace path (do_notify_resume() ->uprobe_notify_resume()), where the consumers handlers are run (in task context) based on the defined filters. Uprobe hits are thread specific and hence we need to maintain information about if a task hit a uprobe, what uprobe was hit, the slot where the original instruction was copied for xol so that it can be singlestepped with appropriate fixups. In some cases, special care is needed for instructions that are executed out of line (xol). These are architecture specific artefacts, such as handling RIP relative instructions on x86_64. Since the instruction at which the uprobe was inserted is executed out of line, architecture specific fixups are added so that the thread continues normal execution in the presence of a uprobe. Postpone the signals until we execute the probed insn. post_xol() path does a recalc_sigpending() before return to user-mode, this ensures the signal can't be lost. Uprobes relies on DIE_DEBUG notification to notify if a singlestep is complete. Adds x86 specific uprobe exception notifiers and appropriate hooks needed to determine a uprobe hit and subsequent post processing. Add requisite x86 fixups for xol for uprobes. Specific cases needing fixups include relative jumps (x86_64), calls, etc. Where possible, we check and skip singlestepping the breakpointed instructions. For now we skip single byte as well as few multibyte nop instructions. However this can be extended to other instructions too. Credits to Oleg Nesterov for suggestions/patches related to signal, breakpoint, singlestep handling code. Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com> Cc: Jim Keniston <jkenisto@linux.vnet.ibm.com> Cc: Linux-mm <linux-mm@kvack.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Christoph Hellwig <hch@infradead.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Arnaldo Carvalho de Melo <acme@infradead.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/20120313180011.29771.89027.sendpatchset@srdronam.in.ibm.com [ Performed various cleanliness edits ] Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/uprobes.c323
1 files changed, 319 insertions, 4 deletions
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index e56e56aa7535..b807d1566b64 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -30,9 +30,12 @@
30#include <linux/rmap.h> /* anon_vma_prepare */ 30#include <linux/rmap.h> /* anon_vma_prepare */
31#include <linux/mmu_notifier.h> /* set_pte_at_notify */ 31#include <linux/mmu_notifier.h> /* set_pte_at_notify */
32#include <linux/swap.h> /* try_to_free_swap */ 32#include <linux/swap.h> /* try_to_free_swap */
33#include <linux/ptrace.h> /* user_enable_single_step */
34#include <linux/kdebug.h> /* notifier mechanism */
33 35
34#include <linux/uprobes.h> 36#include <linux/uprobes.h>
35 37
38static struct srcu_struct uprobes_srcu;
36static struct rb_root uprobes_tree = RB_ROOT; 39static struct rb_root uprobes_tree = RB_ROOT;
37 40
38static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ 41static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
@@ -486,6 +489,9 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe)
486 u = __insert_uprobe(uprobe); 489 u = __insert_uprobe(uprobe);
487 spin_unlock_irqrestore(&uprobes_treelock, flags); 490 spin_unlock_irqrestore(&uprobes_treelock, flags);
488 491
492 /* For now assume that the instruction need not be single-stepped */
493 uprobe->flags |= UPROBE_SKIP_SSTEP;
494
489 return u; 495 return u;
490} 496}
491 497
@@ -523,6 +529,21 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
523 return uprobe; 529 return uprobe;
524} 530}
525 531
532static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
533{
534 struct uprobe_consumer *uc;
535
536 if (!(uprobe->flags & UPROBE_RUN_HANDLER))
537 return;
538
539 down_read(&uprobe->consumer_rwsem);
540 for (uc = uprobe->consumers; uc; uc = uc->next) {
541 if (!uc->filter || uc->filter(uc, current))
542 uc->handler(uc, regs);
543 }
544 up_read(&uprobe->consumer_rwsem);
545}
546
526/* Returns the previous consumer */ 547/* Returns the previous consumer */
527static struct uprobe_consumer * 548static struct uprobe_consumer *
528consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc) 549consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
@@ -645,7 +666,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
645 if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) 666 if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn))
646 return -EEXIST; 667 return -EEXIST;
647 668
648 ret = arch_uprobes_analyze_insn(&uprobe->arch, mm); 669 ret = arch_uprobe_analyze_insn(&uprobe->arch, mm);
649 if (ret) 670 if (ret)
650 return ret; 671 return ret;
651 672
@@ -662,10 +683,21 @@ remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, loff_t vaddr)
662 set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true); 683 set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true);
663} 684}
664 685
686/*
687 * There could be threads that have hit the breakpoint and are entering the
688 * notifier code and trying to acquire the uprobes_treelock. The thread
689 * calling delete_uprobe() that is removing the uprobe from the rb_tree can
690 * race with these threads and might acquire the uprobes_treelock compared
691 * to some of the breakpoint hit threads. In such a case, the breakpoint
692 * hit threads will not find the uprobe. The current unregistering thread
693 * waits till all other threads have hit a breakpoint, to acquire the
694 * uprobes_treelock before the uprobe is removed from the rbtree.
695 */
665static void delete_uprobe(struct uprobe *uprobe) 696static void delete_uprobe(struct uprobe *uprobe)
666{ 697{
667 unsigned long flags; 698 unsigned long flags;
668 699
700 synchronize_srcu(&uprobes_srcu);
669 spin_lock_irqsave(&uprobes_treelock, flags); 701 spin_lock_irqsave(&uprobes_treelock, flags);
670 rb_erase(&uprobe->rb_node, &uprobes_tree); 702 rb_erase(&uprobe->rb_node, &uprobes_tree);
671 spin_unlock_irqrestore(&uprobes_treelock, flags); 703 spin_unlock_irqrestore(&uprobes_treelock, flags);
@@ -1010,6 +1042,288 @@ int uprobe_mmap(struct vm_area_struct *vma)
1010 return ret; 1042 return ret;
1011} 1043}
1012 1044
1045/**
1046 * uprobe_get_swbp_addr - compute address of swbp given post-swbp regs
1047 * @regs: Reflects the saved state of the task after it has hit a breakpoint
1048 * instruction.
1049 * Return the address of the breakpoint instruction.
1050 */
1051unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs)
1052{
1053 return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE;
1054}
1055
1056/*
1057 * Called with no locks held.
1058 * Called in context of a exiting or a exec-ing thread.
1059 */
1060void uprobe_free_utask(struct task_struct *t)
1061{
1062 struct uprobe_task *utask = t->utask;
1063
1064 if (t->uprobe_srcu_id != -1)
1065 srcu_read_unlock_raw(&uprobes_srcu, t->uprobe_srcu_id);
1066
1067 if (!utask)
1068 return;
1069
1070 if (utask->active_uprobe)
1071 put_uprobe(utask->active_uprobe);
1072
1073 kfree(utask);
1074 t->utask = NULL;
1075}
1076
1077/*
1078 * Called in context of a new clone/fork from copy_process.
1079 */
1080void uprobe_copy_process(struct task_struct *t)
1081{
1082 t->utask = NULL;
1083 t->uprobe_srcu_id = -1;
1084}
1085
1086/*
1087 * Allocate a uprobe_task object for the task.
1088 * Called when the thread hits a breakpoint for the first time.
1089 *
1090 * Returns:
1091 * - pointer to new uprobe_task on success
1092 * - NULL otherwise
1093 */
1094static struct uprobe_task *add_utask(void)
1095{
1096 struct uprobe_task *utask;
1097
1098 utask = kzalloc(sizeof *utask, GFP_KERNEL);
1099 if (unlikely(!utask))
1100 return NULL;
1101
1102 utask->active_uprobe = NULL;
1103 current->utask = utask;
1104 return utask;
1105}
1106
1107/* Prepare to single-step probed instruction out of line. */
1108static int
1109pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long vaddr)
1110{
1111 return -EFAULT;
1112}
1113
1114/*
1115 * If we are singlestepping, then ensure this thread is not connected to
1116 * non-fatal signals until completion of singlestep. When xol insn itself
1117 * triggers the signal, restart the original insn even if the task is
1118 * already SIGKILL'ed (since coredump should report the correct ip). This
1119 * is even more important if the task has a handler for SIGSEGV/etc, The
1120 * _same_ instruction should be repeated again after return from the signal
1121 * handler, and SSTEP can never finish in this case.
1122 */
1123bool uprobe_deny_signal(void)
1124{
1125 struct task_struct *t = current;
1126 struct uprobe_task *utask = t->utask;
1127
1128 if (likely(!utask || !utask->active_uprobe))
1129 return false;
1130
1131 WARN_ON_ONCE(utask->state != UTASK_SSTEP);
1132
1133 if (signal_pending(t)) {
1134 spin_lock_irq(&t->sighand->siglock);
1135 clear_tsk_thread_flag(t, TIF_SIGPENDING);
1136 spin_unlock_irq(&t->sighand->siglock);
1137
1138 if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) {
1139 utask->state = UTASK_SSTEP_TRAPPED;
1140 set_tsk_thread_flag(t, TIF_UPROBE);
1141 set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
1142 }
1143 }
1144
1145 return true;
1146}
1147
1148/*
1149 * Avoid singlestepping the original instruction if the original instruction
1150 * is a NOP or can be emulated.
1151 */
1152static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)
1153{
1154 if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
1155 return true;
1156
1157 uprobe->flags &= ~UPROBE_SKIP_SSTEP;
1158 return false;
1159}
1160
1161/*
1162 * Run handler and ask thread to singlestep.
1163 * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
1164 */
1165static void handle_swbp(struct pt_regs *regs)
1166{
1167 struct vm_area_struct *vma;
1168 struct uprobe_task *utask;
1169 struct uprobe *uprobe;
1170 struct mm_struct *mm;
1171 unsigned long bp_vaddr;
1172
1173 uprobe = NULL;
1174 bp_vaddr = uprobe_get_swbp_addr(regs);
1175 mm = current->mm;
1176 down_read(&mm->mmap_sem);
1177 vma = find_vma(mm, bp_vaddr);
1178
1179 if (vma && vma->vm_start <= bp_vaddr && valid_vma(vma, false)) {
1180 struct inode *inode;
1181 loff_t offset;
1182
1183 inode = vma->vm_file->f_mapping->host;
1184 offset = bp_vaddr - vma->vm_start;
1185 offset += (vma->vm_pgoff << PAGE_SHIFT);
1186 uprobe = find_uprobe(inode, offset);
1187 }
1188
1189 srcu_read_unlock_raw(&uprobes_srcu, current->uprobe_srcu_id);
1190 current->uprobe_srcu_id = -1;
1191 up_read(&mm->mmap_sem);
1192
1193 if (!uprobe) {
1194 /* No matching uprobe; signal SIGTRAP. */
1195 send_sig(SIGTRAP, current, 0);
1196 return;
1197 }
1198
1199 utask = current->utask;
1200 if (!utask) {
1201 utask = add_utask();
1202 /* Cannot allocate; re-execute the instruction. */
1203 if (!utask)
1204 goto cleanup_ret;
1205 }
1206 utask->active_uprobe = uprobe;
1207 handler_chain(uprobe, regs);
1208 if (uprobe->flags & UPROBE_SKIP_SSTEP && can_skip_sstep(uprobe, regs))
1209 goto cleanup_ret;
1210
1211 utask->state = UTASK_SSTEP;
1212 if (!pre_ssout(uprobe, regs, bp_vaddr)) {
1213 user_enable_single_step(current);
1214 return;
1215 }
1216
1217cleanup_ret:
1218 if (utask) {
1219 utask->active_uprobe = NULL;
1220 utask->state = UTASK_RUNNING;
1221 }
1222 if (uprobe) {
1223 if (!(uprobe->flags & UPROBE_SKIP_SSTEP))
1224
1225 /*
1226 * cannot singlestep; cannot skip instruction;
1227 * re-execute the instruction.
1228 */
1229 instruction_pointer_set(regs, bp_vaddr);
1230
1231 put_uprobe(uprobe);
1232 }
1233}
1234
1235/*
1236 * Perform required fix-ups and disable singlestep.
1237 * Allow pending signals to take effect.
1238 */
1239static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
1240{
1241 struct uprobe *uprobe;
1242
1243 uprobe = utask->active_uprobe;
1244 if (utask->state == UTASK_SSTEP_ACK)
1245 arch_uprobe_post_xol(&uprobe->arch, regs);
1246 else if (utask->state == UTASK_SSTEP_TRAPPED)
1247 arch_uprobe_abort_xol(&uprobe->arch, regs);
1248 else
1249 WARN_ON_ONCE(1);
1250
1251 put_uprobe(uprobe);
1252 utask->active_uprobe = NULL;
1253 utask->state = UTASK_RUNNING;
1254 user_disable_single_step(current);
1255
1256 spin_lock_irq(&current->sighand->siglock);
1257 recalc_sigpending(); /* see uprobe_deny_signal() */
1258 spin_unlock_irq(&current->sighand->siglock);
1259}
1260
1261/*
1262 * On breakpoint hit, breakpoint notifier sets the TIF_UPROBE flag. (and on
1263 * subsequent probe hits on the thread sets the state to UTASK_BP_HIT) and
1264 * allows the thread to return from interrupt.
1265 *
1266 * On singlestep exception, singlestep notifier sets the TIF_UPROBE flag and
1267 * also sets the state to UTASK_SSTEP_ACK and allows the thread to return from
1268 * interrupt.
1269 *
1270 * While returning to userspace, thread notices the TIF_UPROBE flag and calls
1271 * uprobe_notify_resume().
1272 */
1273void uprobe_notify_resume(struct pt_regs *regs)
1274{
1275 struct uprobe_task *utask;
1276
1277 utask = current->utask;
1278 if (!utask || utask->state == UTASK_BP_HIT)
1279 handle_swbp(regs);
1280 else
1281 handle_singlestep(utask, regs);
1282}
1283
1284/*
1285 * uprobe_pre_sstep_notifier gets called from interrupt context as part of
1286 * notifier mechanism. Set TIF_UPROBE flag and indicate breakpoint hit.
1287 */
1288int uprobe_pre_sstep_notifier(struct pt_regs *regs)
1289{
1290 struct uprobe_task *utask;
1291
1292 if (!current->mm)
1293 return 0;
1294
1295 utask = current->utask;
1296 if (utask)
1297 utask->state = UTASK_BP_HIT;
1298
1299 set_thread_flag(TIF_UPROBE);
1300 current->uprobe_srcu_id = srcu_read_lock_raw(&uprobes_srcu);
1301
1302 return 1;
1303}
1304
1305/*
1306 * uprobe_post_sstep_notifier gets called in interrupt context as part of notifier
1307 * mechanism. Set TIF_UPROBE flag and indicate completion of singlestep.
1308 */
1309int uprobe_post_sstep_notifier(struct pt_regs *regs)
1310{
1311 struct uprobe_task *utask = current->utask;
1312
1313 if (!current->mm || !utask || !utask->active_uprobe)
1314 /* task is currently not uprobed */
1315 return 0;
1316
1317 utask->state = UTASK_SSTEP_ACK;
1318 set_thread_flag(TIF_UPROBE);
1319 return 1;
1320}
1321
1322static struct notifier_block uprobe_exception_nb = {
1323 .notifier_call = arch_uprobe_exception_notify,
1324 .priority = INT_MAX-1, /* notified after kprobes, kgdb */
1325};
1326
1013static int __init init_uprobes(void) 1327static int __init init_uprobes(void)
1014{ 1328{
1015 int i; 1329 int i;
@@ -1018,12 +1332,13 @@ static int __init init_uprobes(void)
1018 mutex_init(&uprobes_mutex[i]); 1332 mutex_init(&uprobes_mutex[i]);
1019 mutex_init(&uprobes_mmap_mutex[i]); 1333 mutex_init(&uprobes_mmap_mutex[i]);
1020 } 1334 }
1021 return 0; 1335 init_srcu_struct(&uprobes_srcu);
1336
1337 return register_die_notifier(&uprobe_exception_nb);
1022} 1338}
1339module_init(init_uprobes);
1023 1340
1024static void __exit exit_uprobes(void) 1341static void __exit exit_uprobes(void)
1025{ 1342{
1026} 1343}
1027
1028module_init(init_uprobes);
1029module_exit(exit_uprobes); 1344module_exit(exit_uprobes);