diff options
author | Ingo Molnar <mingo@kernel.org> | 2013-10-31 06:11:35 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-10-31 06:11:35 -0400 |
commit | 0e73453e172aaa38fd59fd4d3fc589e8fc9b9a70 (patch) | |
tree | f651a01eb98dd5454d433da9ae35579242a2cb36 | |
parent | 5a3126d4fe7c311fe12f98fef0470f6cb582d1ef (diff) | |
parent | 3ab679661721b1ec2aaad99a801870ed59ab1110 (diff) |
Merge branch 'uprobes/core' of git://git.kernel.org/pub/scm/linux/kernel/git/oleg/misc into perf/core
Fix uprobes bugs that happen if fork() is called with pending ret-probes,
from Oleg Nesterov.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | include/linux/uprobes.h | 6 | ||||
-rw-r--r-- | kernel/events/uprobes.c | 156 | ||||
-rw-r--r-- | kernel/fork.c | 2 |
3 files changed, 124 insertions, 40 deletions
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 06f28beed7c2..9e0d5a6fe7a8 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h | |||
@@ -117,13 +117,13 @@ extern void uprobe_start_dup_mmap(void); | |||
117 | extern void uprobe_end_dup_mmap(void); | 117 | extern void uprobe_end_dup_mmap(void); |
118 | extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); | 118 | extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); |
119 | extern void uprobe_free_utask(struct task_struct *t); | 119 | extern void uprobe_free_utask(struct task_struct *t); |
120 | extern void uprobe_copy_process(struct task_struct *t); | 120 | extern void uprobe_copy_process(struct task_struct *t, unsigned long flags); |
121 | extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); | 121 | extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); |
122 | extern int uprobe_post_sstep_notifier(struct pt_regs *regs); | 122 | extern int uprobe_post_sstep_notifier(struct pt_regs *regs); |
123 | extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); | 123 | extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); |
124 | extern void uprobe_notify_resume(struct pt_regs *regs); | 124 | extern void uprobe_notify_resume(struct pt_regs *regs); |
125 | extern bool uprobe_deny_signal(void); | 125 | extern bool uprobe_deny_signal(void); |
126 | extern bool __weak arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs); | 126 | extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs); |
127 | extern void uprobe_clear_state(struct mm_struct *mm); | 127 | extern void uprobe_clear_state(struct mm_struct *mm); |
128 | #else /* !CONFIG_UPROBES */ | 128 | #else /* !CONFIG_UPROBES */ |
129 | struct uprobes_state { | 129 | struct uprobes_state { |
@@ -174,7 +174,7 @@ static inline unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) | |||
174 | static inline void uprobe_free_utask(struct task_struct *t) | 174 | static inline void uprobe_free_utask(struct task_struct *t) |
175 | { | 175 | { |
176 | } | 176 | } |
177 | static inline void uprobe_copy_process(struct task_struct *t) | 177 | static inline void uprobe_copy_process(struct task_struct *t, unsigned long flags) |
178 | { | 178 | { |
179 | } | 179 | } |
180 | static inline void uprobe_clear_state(struct mm_struct *mm) | 180 | static inline void uprobe_clear_state(struct mm_struct *mm) |
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ad8e1bdca70e..ae9e1d2ef256 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/kdebug.h> /* notifier mechanism */ | 35 | #include <linux/kdebug.h> /* notifier mechanism */ |
36 | #include "../../mm/internal.h" /* munlock_vma_page */ | 36 | #include "../../mm/internal.h" /* munlock_vma_page */ |
37 | #include <linux/percpu-rwsem.h> | 37 | #include <linux/percpu-rwsem.h> |
38 | #include <linux/task_work.h> | ||
38 | 39 | ||
39 | #include <linux/uprobes.h> | 40 | #include <linux/uprobes.h> |
40 | 41 | ||
@@ -1096,21 +1097,22 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon | |||
1096 | } | 1097 | } |
1097 | 1098 | ||
1098 | /* Slot allocation for XOL */ | 1099 | /* Slot allocation for XOL */ |
1099 | static int xol_add_vma(struct xol_area *area) | 1100 | static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) |
1100 | { | 1101 | { |
1101 | struct mm_struct *mm = current->mm; | ||
1102 | int ret = -EALREADY; | 1102 | int ret = -EALREADY; |
1103 | 1103 | ||
1104 | down_write(&mm->mmap_sem); | 1104 | down_write(&mm->mmap_sem); |
1105 | if (mm->uprobes_state.xol_area) | 1105 | if (mm->uprobes_state.xol_area) |
1106 | goto fail; | 1106 | goto fail; |
1107 | 1107 | ||
1108 | ret = -ENOMEM; | 1108 | if (!area->vaddr) { |
1109 | /* Try to map as high as possible, this is only a hint. */ | 1109 | /* Try to map as high as possible, this is only a hint. */ |
1110 | area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); | 1110 | area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, |
1111 | if (area->vaddr & ~PAGE_MASK) { | 1111 | PAGE_SIZE, 0, 0); |
1112 | ret = area->vaddr; | 1112 | if (area->vaddr & ~PAGE_MASK) { |
1113 | goto fail; | 1113 | ret = area->vaddr; |
1114 | goto fail; | ||
1115 | } | ||
1114 | } | 1116 | } |
1115 | 1117 | ||
1116 | ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE, | 1118 | ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE, |
@@ -1120,30 +1122,19 @@ static int xol_add_vma(struct xol_area *area) | |||
1120 | 1122 | ||
1121 | smp_wmb(); /* pairs with get_xol_area() */ | 1123 | smp_wmb(); /* pairs with get_xol_area() */ |
1122 | mm->uprobes_state.xol_area = area; | 1124 | mm->uprobes_state.xol_area = area; |
1123 | ret = 0; | ||
1124 | fail: | 1125 | fail: |
1125 | up_write(&mm->mmap_sem); | 1126 | up_write(&mm->mmap_sem); |
1126 | 1127 | ||
1127 | return ret; | 1128 | return ret; |
1128 | } | 1129 | } |
1129 | 1130 | ||
1130 | /* | 1131 | static struct xol_area *__create_xol_area(unsigned long vaddr) |
1131 | * get_xol_area - Allocate process's xol_area if necessary. | ||
1132 | * This area will be used for storing instructions for execution out of line. | ||
1133 | * | ||
1134 | * Returns the allocated area or NULL. | ||
1135 | */ | ||
1136 | static struct xol_area *get_xol_area(void) | ||
1137 | { | 1132 | { |
1138 | struct mm_struct *mm = current->mm; | 1133 | struct mm_struct *mm = current->mm; |
1139 | struct xol_area *area; | ||
1140 | uprobe_opcode_t insn = UPROBE_SWBP_INSN; | 1134 | uprobe_opcode_t insn = UPROBE_SWBP_INSN; |
1135 | struct xol_area *area; | ||
1141 | 1136 | ||
1142 | area = mm->uprobes_state.xol_area; | 1137 | area = kmalloc(sizeof(*area), GFP_KERNEL); |
1143 | if (area) | ||
1144 | goto ret; | ||
1145 | |||
1146 | area = kzalloc(sizeof(*area), GFP_KERNEL); | ||
1147 | if (unlikely(!area)) | 1138 | if (unlikely(!area)) |
1148 | goto out; | 1139 | goto out; |
1149 | 1140 | ||
@@ -1155,13 +1146,14 @@ static struct xol_area *get_xol_area(void) | |||
1155 | if (!area->page) | 1146 | if (!area->page) |
1156 | goto free_bitmap; | 1147 | goto free_bitmap; |
1157 | 1148 | ||
1158 | /* allocate first slot of task's xol_area for the return probes */ | 1149 | area->vaddr = vaddr; |
1150 | init_waitqueue_head(&area->wq); | ||
1151 | /* Reserve the 1st slot for get_trampoline_vaddr() */ | ||
1159 | set_bit(0, area->bitmap); | 1152 | set_bit(0, area->bitmap); |
1160 | copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE); | ||
1161 | atomic_set(&area->slot_count, 1); | 1153 | atomic_set(&area->slot_count, 1); |
1162 | init_waitqueue_head(&area->wq); | 1154 | copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE); |
1163 | 1155 | ||
1164 | if (!xol_add_vma(area)) | 1156 | if (!xol_add_vma(mm, area)) |
1165 | return area; | 1157 | return area; |
1166 | 1158 | ||
1167 | __free_page(area->page); | 1159 | __free_page(area->page); |
@@ -1170,9 +1162,25 @@ static struct xol_area *get_xol_area(void) | |||
1170 | free_area: | 1162 | free_area: |
1171 | kfree(area); | 1163 | kfree(area); |
1172 | out: | 1164 | out: |
1165 | return NULL; | ||
1166 | } | ||
1167 | |||
1168 | /* | ||
1169 | * get_xol_area - Allocate process's xol_area if necessary. | ||
1170 | * This area will be used for storing instructions for execution out of line. | ||
1171 | * | ||
1172 | * Returns the allocated area or NULL. | ||
1173 | */ | ||
1174 | static struct xol_area *get_xol_area(void) | ||
1175 | { | ||
1176 | struct mm_struct *mm = current->mm; | ||
1177 | struct xol_area *area; | ||
1178 | |||
1179 | if (!mm->uprobes_state.xol_area) | ||
1180 | __create_xol_area(0); | ||
1181 | |||
1173 | area = mm->uprobes_state.xol_area; | 1182 | area = mm->uprobes_state.xol_area; |
1174 | ret: | 1183 | smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ |
1175 | smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ | ||
1176 | return area; | 1184 | return area; |
1177 | } | 1185 | } |
1178 | 1186 | ||
@@ -1345,14 +1353,6 @@ void uprobe_free_utask(struct task_struct *t) | |||
1345 | } | 1353 | } |
1346 | 1354 | ||
1347 | /* | 1355 | /* |
1348 | * Called in context of a new clone/fork from copy_process. | ||
1349 | */ | ||
1350 | void uprobe_copy_process(struct task_struct *t) | ||
1351 | { | ||
1352 | t->utask = NULL; | ||
1353 | } | ||
1354 | |||
1355 | /* | ||
1356 | * Allocate a uprobe_task object for the task if if necessary. | 1356 | * Allocate a uprobe_task object for the task if if necessary. |
1357 | * Called when the thread hits a breakpoint. | 1357 | * Called when the thread hits a breakpoint. |
1358 | * | 1358 | * |
@@ -1367,6 +1367,90 @@ static struct uprobe_task *get_utask(void) | |||
1367 | return current->utask; | 1367 | return current->utask; |
1368 | } | 1368 | } |
1369 | 1369 | ||
1370 | static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) | ||
1371 | { | ||
1372 | struct uprobe_task *n_utask; | ||
1373 | struct return_instance **p, *o, *n; | ||
1374 | |||
1375 | n_utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); | ||
1376 | if (!n_utask) | ||
1377 | return -ENOMEM; | ||
1378 | t->utask = n_utask; | ||
1379 | |||
1380 | p = &n_utask->return_instances; | ||
1381 | for (o = o_utask->return_instances; o; o = o->next) { | ||
1382 | n = kmalloc(sizeof(struct return_instance), GFP_KERNEL); | ||
1383 | if (!n) | ||
1384 | return -ENOMEM; | ||
1385 | |||
1386 | *n = *o; | ||
1387 | atomic_inc(&n->uprobe->ref); | ||
1388 | n->next = NULL; | ||
1389 | |||
1390 | *p = n; | ||
1391 | p = &n->next; | ||
1392 | n_utask->depth++; | ||
1393 | } | ||
1394 | |||
1395 | return 0; | ||
1396 | } | ||
1397 | |||
1398 | static void uprobe_warn(struct task_struct *t, const char *msg) | ||
1399 | { | ||
1400 | pr_warn("uprobe: %s:%d failed to %s\n", | ||
1401 | current->comm, current->pid, msg); | ||
1402 | } | ||
1403 | |||
1404 | static void dup_xol_work(struct callback_head *work) | ||
1405 | { | ||
1406 | kfree(work); | ||
1407 | |||
1408 | if (current->flags & PF_EXITING) | ||
1409 | return; | ||
1410 | |||
1411 | if (!__create_xol_area(current->utask->vaddr)) | ||
1412 | uprobe_warn(current, "dup xol area"); | ||
1413 | } | ||
1414 | |||
1415 | /* | ||
1416 | * Called in context of a new clone/fork from copy_process. | ||
1417 | */ | ||
1418 | void uprobe_copy_process(struct task_struct *t, unsigned long flags) | ||
1419 | { | ||
1420 | struct uprobe_task *utask = current->utask; | ||
1421 | struct mm_struct *mm = current->mm; | ||
1422 | struct callback_head *work; | ||
1423 | struct xol_area *area; | ||
1424 | |||
1425 | t->utask = NULL; | ||
1426 | |||
1427 | if (!utask || !utask->return_instances) | ||
1428 | return; | ||
1429 | |||
1430 | if (mm == t->mm && !(flags & CLONE_VFORK)) | ||
1431 | return; | ||
1432 | |||
1433 | if (dup_utask(t, utask)) | ||
1434 | return uprobe_warn(t, "dup ret instances"); | ||
1435 | |||
1436 | /* The task can fork() after dup_xol_work() fails */ | ||
1437 | area = mm->uprobes_state.xol_area; | ||
1438 | if (!area) | ||
1439 | return uprobe_warn(t, "dup xol area"); | ||
1440 | |||
1441 | if (mm == t->mm) | ||
1442 | return; | ||
1443 | |||
1444 | /* TODO: move it into the union in uprobe_task */ | ||
1445 | work = kmalloc(sizeof(*work), GFP_KERNEL); | ||
1446 | if (!work) | ||
1447 | return uprobe_warn(t, "dup xol area"); | ||
1448 | |||
1449 | utask->vaddr = area->vaddr; | ||
1450 | init_task_work(work, dup_xol_work); | ||
1451 | task_work_add(t, work, true); | ||
1452 | } | ||
1453 | |||
1370 | /* | 1454 | /* |
1371 | * Current area->vaddr notion assume the trampoline address is always | 1455 | * Current area->vaddr notion assume the trampoline address is always |
1372 | * equal area->vaddr. | 1456 | * equal area->vaddr. |
diff --git a/kernel/fork.c b/kernel/fork.c index 086fe73ad6bd..8531609b6a82 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1373,7 +1373,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1373 | INIT_LIST_HEAD(&p->pi_state_list); | 1373 | INIT_LIST_HEAD(&p->pi_state_list); |
1374 | p->pi_state_cache = NULL; | 1374 | p->pi_state_cache = NULL; |
1375 | #endif | 1375 | #endif |
1376 | uprobe_copy_process(p); | ||
1377 | /* | 1376 | /* |
1378 | * sigaltstack should be cleared when sharing the same VM | 1377 | * sigaltstack should be cleared when sharing the same VM |
1379 | */ | 1378 | */ |
@@ -1490,6 +1489,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1490 | perf_event_fork(p); | 1489 | perf_event_fork(p); |
1491 | 1490 | ||
1492 | trace_task_newtask(p, clone_flags); | 1491 | trace_task_newtask(p, clone_flags); |
1492 | uprobe_copy_process(p, clone_flags); | ||
1493 | 1493 | ||
1494 | return p; | 1494 | return p; |
1495 | 1495 | ||