diff options
| author | Ingo Molnar <mingo@kernel.org> | 2013-10-31 06:11:35 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2013-10-31 06:11:35 -0400 |
| commit | 0e73453e172aaa38fd59fd4d3fc589e8fc9b9a70 (patch) | |
| tree | f651a01eb98dd5454d433da9ae35579242a2cb36 | |
| parent | 5a3126d4fe7c311fe12f98fef0470f6cb582d1ef (diff) | |
| parent | 3ab679661721b1ec2aaad99a801870ed59ab1110 (diff) | |
Merge branch 'uprobes/core' of git://git.kernel.org/pub/scm/linux/kernel/git/oleg/misc into perf/core
Fix uprobes bugs that happen if fork() is called with pending ret-probes,
from Oleg Nesterov.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
| -rw-r--r-- | include/linux/uprobes.h | 6 | ||||
| -rw-r--r-- | kernel/events/uprobes.c | 156 | ||||
| -rw-r--r-- | kernel/fork.c | 2 |
3 files changed, 124 insertions, 40 deletions
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 06f28beed7c2..9e0d5a6fe7a8 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h | |||
| @@ -117,13 +117,13 @@ extern void uprobe_start_dup_mmap(void); | |||
| 117 | extern void uprobe_end_dup_mmap(void); | 117 | extern void uprobe_end_dup_mmap(void); |
| 118 | extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); | 118 | extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); |
| 119 | extern void uprobe_free_utask(struct task_struct *t); | 119 | extern void uprobe_free_utask(struct task_struct *t); |
| 120 | extern void uprobe_copy_process(struct task_struct *t); | 120 | extern void uprobe_copy_process(struct task_struct *t, unsigned long flags); |
| 121 | extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); | 121 | extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); |
| 122 | extern int uprobe_post_sstep_notifier(struct pt_regs *regs); | 122 | extern int uprobe_post_sstep_notifier(struct pt_regs *regs); |
| 123 | extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); | 123 | extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); |
| 124 | extern void uprobe_notify_resume(struct pt_regs *regs); | 124 | extern void uprobe_notify_resume(struct pt_regs *regs); |
| 125 | extern bool uprobe_deny_signal(void); | 125 | extern bool uprobe_deny_signal(void); |
| 126 | extern bool __weak arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs); | 126 | extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs); |
| 127 | extern void uprobe_clear_state(struct mm_struct *mm); | 127 | extern void uprobe_clear_state(struct mm_struct *mm); |
| 128 | #else /* !CONFIG_UPROBES */ | 128 | #else /* !CONFIG_UPROBES */ |
| 129 | struct uprobes_state { | 129 | struct uprobes_state { |
| @@ -174,7 +174,7 @@ static inline unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) | |||
| 174 | static inline void uprobe_free_utask(struct task_struct *t) | 174 | static inline void uprobe_free_utask(struct task_struct *t) |
| 175 | { | 175 | { |
| 176 | } | 176 | } |
| 177 | static inline void uprobe_copy_process(struct task_struct *t) | 177 | static inline void uprobe_copy_process(struct task_struct *t, unsigned long flags) |
| 178 | { | 178 | { |
| 179 | } | 179 | } |
| 180 | static inline void uprobe_clear_state(struct mm_struct *mm) | 180 | static inline void uprobe_clear_state(struct mm_struct *mm) |
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ad8e1bdca70e..ae9e1d2ef256 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
| @@ -35,6 +35,7 @@ | |||
| 35 | #include <linux/kdebug.h> /* notifier mechanism */ | 35 | #include <linux/kdebug.h> /* notifier mechanism */ |
| 36 | #include "../../mm/internal.h" /* munlock_vma_page */ | 36 | #include "../../mm/internal.h" /* munlock_vma_page */ |
| 37 | #include <linux/percpu-rwsem.h> | 37 | #include <linux/percpu-rwsem.h> |
| 38 | #include <linux/task_work.h> | ||
| 38 | 39 | ||
| 39 | #include <linux/uprobes.h> | 40 | #include <linux/uprobes.h> |
| 40 | 41 | ||
| @@ -1096,21 +1097,22 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon | |||
| 1096 | } | 1097 | } |
| 1097 | 1098 | ||
| 1098 | /* Slot allocation for XOL */ | 1099 | /* Slot allocation for XOL */ |
| 1099 | static int xol_add_vma(struct xol_area *area) | 1100 | static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) |
| 1100 | { | 1101 | { |
| 1101 | struct mm_struct *mm = current->mm; | ||
| 1102 | int ret = -EALREADY; | 1102 | int ret = -EALREADY; |
| 1103 | 1103 | ||
| 1104 | down_write(&mm->mmap_sem); | 1104 | down_write(&mm->mmap_sem); |
| 1105 | if (mm->uprobes_state.xol_area) | 1105 | if (mm->uprobes_state.xol_area) |
| 1106 | goto fail; | 1106 | goto fail; |
| 1107 | 1107 | ||
| 1108 | ret = -ENOMEM; | 1108 | if (!area->vaddr) { |
| 1109 | /* Try to map as high as possible, this is only a hint. */ | 1109 | /* Try to map as high as possible, this is only a hint. */ |
| 1110 | area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); | 1110 | area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, |
| 1111 | if (area->vaddr & ~PAGE_MASK) { | 1111 | PAGE_SIZE, 0, 0); |
| 1112 | ret = area->vaddr; | 1112 | if (area->vaddr & ~PAGE_MASK) { |
| 1113 | goto fail; | 1113 | ret = area->vaddr; |
| 1114 | goto fail; | ||
| 1115 | } | ||
| 1114 | } | 1116 | } |
| 1115 | 1117 | ||
| 1116 | ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE, | 1118 | ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE, |
| @@ -1120,30 +1122,19 @@ static int xol_add_vma(struct xol_area *area) | |||
| 1120 | 1122 | ||
| 1121 | smp_wmb(); /* pairs with get_xol_area() */ | 1123 | smp_wmb(); /* pairs with get_xol_area() */ |
| 1122 | mm->uprobes_state.xol_area = area; | 1124 | mm->uprobes_state.xol_area = area; |
| 1123 | ret = 0; | ||
| 1124 | fail: | 1125 | fail: |
| 1125 | up_write(&mm->mmap_sem); | 1126 | up_write(&mm->mmap_sem); |
| 1126 | 1127 | ||
| 1127 | return ret; | 1128 | return ret; |
| 1128 | } | 1129 | } |
| 1129 | 1130 | ||
| 1130 | /* | 1131 | static struct xol_area *__create_xol_area(unsigned long vaddr) |
| 1131 | * get_xol_area - Allocate process's xol_area if necessary. | ||
| 1132 | * This area will be used for storing instructions for execution out of line. | ||
| 1133 | * | ||
| 1134 | * Returns the allocated area or NULL. | ||
| 1135 | */ | ||
| 1136 | static struct xol_area *get_xol_area(void) | ||
| 1137 | { | 1132 | { |
| 1138 | struct mm_struct *mm = current->mm; | 1133 | struct mm_struct *mm = current->mm; |
| 1139 | struct xol_area *area; | ||
| 1140 | uprobe_opcode_t insn = UPROBE_SWBP_INSN; | 1134 | uprobe_opcode_t insn = UPROBE_SWBP_INSN; |
| 1135 | struct xol_area *area; | ||
| 1141 | 1136 | ||
| 1142 | area = mm->uprobes_state.xol_area; | 1137 | area = kmalloc(sizeof(*area), GFP_KERNEL); |
| 1143 | if (area) | ||
| 1144 | goto ret; | ||
| 1145 | |||
| 1146 | area = kzalloc(sizeof(*area), GFP_KERNEL); | ||
| 1147 | if (unlikely(!area)) | 1138 | if (unlikely(!area)) |
| 1148 | goto out; | 1139 | goto out; |
| 1149 | 1140 | ||
| @@ -1155,13 +1146,14 @@ static struct xol_area *get_xol_area(void) | |||
| 1155 | if (!area->page) | 1146 | if (!area->page) |
| 1156 | goto free_bitmap; | 1147 | goto free_bitmap; |
| 1157 | 1148 | ||
| 1158 | /* allocate first slot of task's xol_area for the return probes */ | 1149 | area->vaddr = vaddr; |
| 1150 | init_waitqueue_head(&area->wq); | ||
| 1151 | /* Reserve the 1st slot for get_trampoline_vaddr() */ | ||
| 1159 | set_bit(0, area->bitmap); | 1152 | set_bit(0, area->bitmap); |
| 1160 | copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE); | ||
| 1161 | atomic_set(&area->slot_count, 1); | 1153 | atomic_set(&area->slot_count, 1); |
| 1162 | init_waitqueue_head(&area->wq); | 1154 | copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE); |
| 1163 | 1155 | ||
| 1164 | if (!xol_add_vma(area)) | 1156 | if (!xol_add_vma(mm, area)) |
| 1165 | return area; | 1157 | return area; |
| 1166 | 1158 | ||
| 1167 | __free_page(area->page); | 1159 | __free_page(area->page); |
| @@ -1170,9 +1162,25 @@ static struct xol_area *get_xol_area(void) | |||
| 1170 | free_area: | 1162 | free_area: |
| 1171 | kfree(area); | 1163 | kfree(area); |
| 1172 | out: | 1164 | out: |
| 1165 | return NULL; | ||
| 1166 | } | ||
| 1167 | |||
| 1168 | /* | ||
| 1169 | * get_xol_area - Allocate process's xol_area if necessary. | ||
| 1170 | * This area will be used for storing instructions for execution out of line. | ||
| 1171 | * | ||
| 1172 | * Returns the allocated area or NULL. | ||
| 1173 | */ | ||
| 1174 | static struct xol_area *get_xol_area(void) | ||
| 1175 | { | ||
| 1176 | struct mm_struct *mm = current->mm; | ||
| 1177 | struct xol_area *area; | ||
| 1178 | |||
| 1179 | if (!mm->uprobes_state.xol_area) | ||
| 1180 | __create_xol_area(0); | ||
| 1181 | |||
| 1173 | area = mm->uprobes_state.xol_area; | 1182 | area = mm->uprobes_state.xol_area; |
| 1174 | ret: | 1183 | smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ |
| 1175 | smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ | ||
| 1176 | return area; | 1184 | return area; |
| 1177 | } | 1185 | } |
| 1178 | 1186 | ||
| @@ -1345,14 +1353,6 @@ void uprobe_free_utask(struct task_struct *t) | |||
| 1345 | } | 1353 | } |
| 1346 | 1354 | ||
| 1347 | /* | 1355 | /* |
| 1348 | * Called in context of a new clone/fork from copy_process. | ||
| 1349 | */ | ||
| 1350 | void uprobe_copy_process(struct task_struct *t) | ||
| 1351 | { | ||
| 1352 | t->utask = NULL; | ||
| 1353 | } | ||
| 1354 | |||
| 1355 | /* | ||
| 1356 | * Allocate a uprobe_task object for the task if if necessary. | 1356 | * Allocate a uprobe_task object for the task if if necessary. |
| 1357 | * Called when the thread hits a breakpoint. | 1357 | * Called when the thread hits a breakpoint. |
| 1358 | * | 1358 | * |
| @@ -1367,6 +1367,90 @@ static struct uprobe_task *get_utask(void) | |||
| 1367 | return current->utask; | 1367 | return current->utask; |
| 1368 | } | 1368 | } |
| 1369 | 1369 | ||
| 1370 | static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) | ||
| 1371 | { | ||
| 1372 | struct uprobe_task *n_utask; | ||
| 1373 | struct return_instance **p, *o, *n; | ||
| 1374 | |||
| 1375 | n_utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); | ||
| 1376 | if (!n_utask) | ||
| 1377 | return -ENOMEM; | ||
| 1378 | t->utask = n_utask; | ||
| 1379 | |||
| 1380 | p = &n_utask->return_instances; | ||
| 1381 | for (o = o_utask->return_instances; o; o = o->next) { | ||
| 1382 | n = kmalloc(sizeof(struct return_instance), GFP_KERNEL); | ||
| 1383 | if (!n) | ||
| 1384 | return -ENOMEM; | ||
| 1385 | |||
| 1386 | *n = *o; | ||
| 1387 | atomic_inc(&n->uprobe->ref); | ||
| 1388 | n->next = NULL; | ||
| 1389 | |||
| 1390 | *p = n; | ||
| 1391 | p = &n->next; | ||
| 1392 | n_utask->depth++; | ||
| 1393 | } | ||
| 1394 | |||
| 1395 | return 0; | ||
| 1396 | } | ||
| 1397 | |||
| 1398 | static void uprobe_warn(struct task_struct *t, const char *msg) | ||
| 1399 | { | ||
| 1400 | pr_warn("uprobe: %s:%d failed to %s\n", | ||
| 1401 | current->comm, current->pid, msg); | ||
| 1402 | } | ||
| 1403 | |||
| 1404 | static void dup_xol_work(struct callback_head *work) | ||
| 1405 | { | ||
| 1406 | kfree(work); | ||
| 1407 | |||
| 1408 | if (current->flags & PF_EXITING) | ||
| 1409 | return; | ||
| 1410 | |||
| 1411 | if (!__create_xol_area(current->utask->vaddr)) | ||
| 1412 | uprobe_warn(current, "dup xol area"); | ||
| 1413 | } | ||
| 1414 | |||
| 1415 | /* | ||
| 1416 | * Called in context of a new clone/fork from copy_process. | ||
| 1417 | */ | ||
| 1418 | void uprobe_copy_process(struct task_struct *t, unsigned long flags) | ||
| 1419 | { | ||
| 1420 | struct uprobe_task *utask = current->utask; | ||
| 1421 | struct mm_struct *mm = current->mm; | ||
| 1422 | struct callback_head *work; | ||
| 1423 | struct xol_area *area; | ||
| 1424 | |||
| 1425 | t->utask = NULL; | ||
| 1426 | |||
| 1427 | if (!utask || !utask->return_instances) | ||
| 1428 | return; | ||
| 1429 | |||
| 1430 | if (mm == t->mm && !(flags & CLONE_VFORK)) | ||
| 1431 | return; | ||
| 1432 | |||
| 1433 | if (dup_utask(t, utask)) | ||
| 1434 | return uprobe_warn(t, "dup ret instances"); | ||
| 1435 | |||
| 1436 | /* The task can fork() after dup_xol_work() fails */ | ||
| 1437 | area = mm->uprobes_state.xol_area; | ||
| 1438 | if (!area) | ||
| 1439 | return uprobe_warn(t, "dup xol area"); | ||
| 1440 | |||
| 1441 | if (mm == t->mm) | ||
| 1442 | return; | ||
| 1443 | |||
| 1444 | /* TODO: move it into the union in uprobe_task */ | ||
| 1445 | work = kmalloc(sizeof(*work), GFP_KERNEL); | ||
| 1446 | if (!work) | ||
| 1447 | return uprobe_warn(t, "dup xol area"); | ||
| 1448 | |||
| 1449 | utask->vaddr = area->vaddr; | ||
| 1450 | init_task_work(work, dup_xol_work); | ||
| 1451 | task_work_add(t, work, true); | ||
| 1452 | } | ||
| 1453 | |||
| 1370 | /* | 1454 | /* |
| 1371 | * Current area->vaddr notion assume the trampoline address is always | 1455 | * Current area->vaddr notion assume the trampoline address is always |
| 1372 | * equal area->vaddr. | 1456 | * equal area->vaddr. |
diff --git a/kernel/fork.c b/kernel/fork.c index 086fe73ad6bd..8531609b6a82 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -1373,7 +1373,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1373 | INIT_LIST_HEAD(&p->pi_state_list); | 1373 | INIT_LIST_HEAD(&p->pi_state_list); |
| 1374 | p->pi_state_cache = NULL; | 1374 | p->pi_state_cache = NULL; |
| 1375 | #endif | 1375 | #endif |
| 1376 | uprobe_copy_process(p); | ||
| 1377 | /* | 1376 | /* |
| 1378 | * sigaltstack should be cleared when sharing the same VM | 1377 | * sigaltstack should be cleared when sharing the same VM |
| 1379 | */ | 1378 | */ |
| @@ -1490,6 +1489,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1490 | perf_event_fork(p); | 1489 | perf_event_fork(p); |
| 1491 | 1490 | ||
| 1492 | trace_task_newtask(p, clone_flags); | 1491 | trace_task_newtask(p, clone_flags); |
| 1492 | uprobe_copy_process(p, clone_flags); | ||
| 1493 | 1493 | ||
| 1494 | return p; | 1494 | return p; |
| 1495 | 1495 | ||
