diff options
author | Frederic Weisbecker <fweisbec@gmail.com> | 2013-05-02 11:37:49 -0400 |
---|---|---|
committer | Frederic Weisbecker <fweisbec@gmail.com> | 2013-05-02 11:54:19 -0400 |
commit | c032862fba51a3ca504752d3a25186b324c5ce83 (patch) | |
tree | 955dc2ba4ab3df76ecc2bb780ee84aca04967e8d /kernel/events | |
parent | fda76e074c7737fc57855dd17c762e50ed526052 (diff) | |
parent | 8700c95adb033843fc163d112b9d21d4fda78018 (diff) |
Merge commit '8700c95adb03' into timers/nohz
The full dynticks tree needs the latest RCU and sched
upstream updates in order to fix some dependencies.
Merge a common upstream merge point that has these
updates.
Conflicts:
include/linux/perf_event.h
kernel/rcutree.h
kernel/rcutree_plugin.h
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Diffstat (limited to 'kernel/events')
-rw-r--r-- | kernel/events/core.c | 70 | ||||
-rw-r--r-- | kernel/events/internal.h | 2 | ||||
-rw-r--r-- | kernel/events/ring_buffer.c | 22 | ||||
-rw-r--r-- | kernel/events/uprobes.c | 300 |
4 files changed, 328 insertions, 66 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index ddb993b52190..6b41c1899a8b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/ftrace_event.h> | 38 | #include <linux/ftrace_event.h> |
39 | #include <linux/hw_breakpoint.h> | 39 | #include <linux/hw_breakpoint.h> |
40 | #include <linux/mm_types.h> | 40 | #include <linux/mm_types.h> |
41 | #include <linux/cgroup.h> | ||
41 | 42 | ||
42 | #include "internal.h" | 43 | #include "internal.h" |
43 | 44 | ||
@@ -235,6 +236,20 @@ static void perf_ctx_unlock(struct perf_cpu_context *cpuctx, | |||
235 | #ifdef CONFIG_CGROUP_PERF | 236 | #ifdef CONFIG_CGROUP_PERF |
236 | 237 | ||
237 | /* | 238 | /* |
239 | * perf_cgroup_info keeps track of time_enabled for a cgroup. | ||
240 | * This is a per-cpu dynamically allocated data structure. | ||
241 | */ | ||
242 | struct perf_cgroup_info { | ||
243 | u64 time; | ||
244 | u64 timestamp; | ||
245 | }; | ||
246 | |||
247 | struct perf_cgroup { | ||
248 | struct cgroup_subsys_state css; | ||
249 | struct perf_cgroup_info __percpu *info; | ||
250 | }; | ||
251 | |||
252 | /* | ||
238 | * Must ensure cgroup is pinned (css_get) before calling | 253 | * Must ensure cgroup is pinned (css_get) before calling |
239 | * this function. In other words, we cannot call this function | 254 | * this function. In other words, we cannot call this function |
240 | * if there is no cgroup event for the current CPU context. | 255 | * if there is no cgroup event for the current CPU context. |
@@ -252,7 +267,22 @@ perf_cgroup_match(struct perf_event *event) | |||
252 | struct perf_event_context *ctx = event->ctx; | 267 | struct perf_event_context *ctx = event->ctx; |
253 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | 268 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); |
254 | 269 | ||
255 | return !event->cgrp || event->cgrp == cpuctx->cgrp; | 270 | /* @event doesn't care about cgroup */ |
271 | if (!event->cgrp) | ||
272 | return true; | ||
273 | |||
274 | /* wants specific cgroup scope but @cpuctx isn't associated with any */ | ||
275 | if (!cpuctx->cgrp) | ||
276 | return false; | ||
277 | |||
278 | /* | ||
279 | * Cgroup scoping is recursive. An event enabled for a cgroup is | ||
280 | * also enabled for all its descendant cgroups. If @cpuctx's | ||
281 | * cgroup is a descendant of @event's (the test covers identity | ||
282 | * case), it's a match. | ||
283 | */ | ||
284 | return cgroup_is_descendant(cpuctx->cgrp->css.cgroup, | ||
285 | event->cgrp->css.cgroup); | ||
256 | } | 286 | } |
257 | 287 | ||
258 | static inline bool perf_tryget_cgroup(struct perf_event *event) | 288 | static inline bool perf_tryget_cgroup(struct perf_event *event) |
@@ -966,9 +996,15 @@ static void perf_event__header_size(struct perf_event *event) | |||
966 | if (sample_type & PERF_SAMPLE_PERIOD) | 996 | if (sample_type & PERF_SAMPLE_PERIOD) |
967 | size += sizeof(data->period); | 997 | size += sizeof(data->period); |
968 | 998 | ||
999 | if (sample_type & PERF_SAMPLE_WEIGHT) | ||
1000 | size += sizeof(data->weight); | ||
1001 | |||
969 | if (sample_type & PERF_SAMPLE_READ) | 1002 | if (sample_type & PERF_SAMPLE_READ) |
970 | size += event->read_size; | 1003 | size += event->read_size; |
971 | 1004 | ||
1005 | if (sample_type & PERF_SAMPLE_DATA_SRC) | ||
1006 | size += sizeof(data->data_src.val); | ||
1007 | |||
972 | event->header_size = size; | 1008 | event->header_size = size; |
973 | } | 1009 | } |
974 | 1010 | ||
@@ -4193,6 +4229,12 @@ void perf_output_sample(struct perf_output_handle *handle, | |||
4193 | perf_output_sample_ustack(handle, | 4229 | perf_output_sample_ustack(handle, |
4194 | data->stack_user_size, | 4230 | data->stack_user_size, |
4195 | data->regs_user.regs); | 4231 | data->regs_user.regs); |
4232 | |||
4233 | if (sample_type & PERF_SAMPLE_WEIGHT) | ||
4234 | perf_output_put(handle, data->weight); | ||
4235 | |||
4236 | if (sample_type & PERF_SAMPLE_DATA_SRC) | ||
4237 | perf_output_put(handle, data->data_src.val); | ||
4196 | } | 4238 | } |
4197 | 4239 | ||
4198 | void perf_prepare_sample(struct perf_event_header *header, | 4240 | void perf_prepare_sample(struct perf_event_header *header, |
@@ -4449,12 +4491,15 @@ static void perf_event_task_event(struct perf_task_event *task_event) | |||
4449 | if (ctxn < 0) | 4491 | if (ctxn < 0) |
4450 | goto next; | 4492 | goto next; |
4451 | ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); | 4493 | ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); |
4494 | if (ctx) | ||
4495 | perf_event_task_ctx(ctx, task_event); | ||
4452 | } | 4496 | } |
4453 | if (ctx) | ||
4454 | perf_event_task_ctx(ctx, task_event); | ||
4455 | next: | 4497 | next: |
4456 | put_cpu_ptr(pmu->pmu_cpu_context); | 4498 | put_cpu_ptr(pmu->pmu_cpu_context); |
4457 | } | 4499 | } |
4500 | if (task_event->task_ctx) | ||
4501 | perf_event_task_ctx(task_event->task_ctx, task_event); | ||
4502 | |||
4458 | rcu_read_unlock(); | 4503 | rcu_read_unlock(); |
4459 | } | 4504 | } |
4460 | 4505 | ||
@@ -4608,6 +4653,7 @@ void perf_event_comm(struct task_struct *task) | |||
4608 | struct perf_event_context *ctx; | 4653 | struct perf_event_context *ctx; |
4609 | int ctxn; | 4654 | int ctxn; |
4610 | 4655 | ||
4656 | rcu_read_lock(); | ||
4611 | for_each_task_context_nr(ctxn) { | 4657 | for_each_task_context_nr(ctxn) { |
4612 | ctx = task->perf_event_ctxp[ctxn]; | 4658 | ctx = task->perf_event_ctxp[ctxn]; |
4613 | if (!ctx) | 4659 | if (!ctx) |
@@ -4615,6 +4661,7 @@ void perf_event_comm(struct task_struct *task) | |||
4615 | 4661 | ||
4616 | perf_event_enable_on_exec(ctx); | 4662 | perf_event_enable_on_exec(ctx); |
4617 | } | 4663 | } |
4664 | rcu_read_unlock(); | ||
4618 | 4665 | ||
4619 | if (!atomic_read(&nr_comm_events)) | 4666 | if (!atomic_read(&nr_comm_events)) |
4620 | return; | 4667 | return; |
@@ -4749,7 +4796,8 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) | |||
4749 | } else { | 4796 | } else { |
4750 | if (arch_vma_name(mmap_event->vma)) { | 4797 | if (arch_vma_name(mmap_event->vma)) { |
4751 | name = strncpy(tmp, arch_vma_name(mmap_event->vma), | 4798 | name = strncpy(tmp, arch_vma_name(mmap_event->vma), |
4752 | sizeof(tmp)); | 4799 | sizeof(tmp) - 1); |
4800 | tmp[sizeof(tmp) - 1] = '\0'; | ||
4753 | goto got_name; | 4801 | goto got_name; |
4754 | } | 4802 | } |
4755 | 4803 | ||
@@ -4776,6 +4824,9 @@ got_name: | |||
4776 | mmap_event->file_name = name; | 4824 | mmap_event->file_name = name; |
4777 | mmap_event->file_size = size; | 4825 | mmap_event->file_size = size; |
4778 | 4826 | ||
4827 | if (!(vma->vm_flags & VM_EXEC)) | ||
4828 | mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA; | ||
4829 | |||
4779 | mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; | 4830 | mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; |
4780 | 4831 | ||
4781 | rcu_read_lock(); | 4832 | rcu_read_lock(); |
@@ -5342,7 +5393,7 @@ static void sw_perf_event_destroy(struct perf_event *event) | |||
5342 | 5393 | ||
5343 | static int perf_swevent_init(struct perf_event *event) | 5394 | static int perf_swevent_init(struct perf_event *event) |
5344 | { | 5395 | { |
5345 | int event_id = event->attr.config; | 5396 | u64 event_id = event->attr.config; |
5346 | 5397 | ||
5347 | if (event->attr.type != PERF_TYPE_SOFTWARE) | 5398 | if (event->attr.type != PERF_TYPE_SOFTWARE) |
5348 | return -ENOENT; | 5399 | return -ENOENT; |
@@ -5662,6 +5713,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event) | |||
5662 | event->attr.sample_period = NSEC_PER_SEC / freq; | 5713 | event->attr.sample_period = NSEC_PER_SEC / freq; |
5663 | hwc->sample_period = event->attr.sample_period; | 5714 | hwc->sample_period = event->attr.sample_period; |
5664 | local64_set(&hwc->period_left, hwc->sample_period); | 5715 | local64_set(&hwc->period_left, hwc->sample_period); |
5716 | hwc->last_period = hwc->sample_period; | ||
5665 | event->attr.freq = 0; | 5717 | event->attr.freq = 0; |
5666 | } | 5718 | } |
5667 | } | 5719 | } |
@@ -5997,6 +6049,7 @@ skip_type: | |||
5997 | if (pmu->pmu_cpu_context) | 6049 | if (pmu->pmu_cpu_context) |
5998 | goto got_cpu_context; | 6050 | goto got_cpu_context; |
5999 | 6051 | ||
6052 | ret = -ENOMEM; | ||
6000 | pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); | 6053 | pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); |
6001 | if (!pmu->pmu_cpu_context) | 6054 | if (!pmu->pmu_cpu_context) |
6002 | goto free_dev; | 6055 | goto free_dev; |
@@ -7524,12 +7577,5 @@ struct cgroup_subsys perf_subsys = { | |||
7524 | .css_free = perf_cgroup_css_free, | 7577 | .css_free = perf_cgroup_css_free, |
7525 | .exit = perf_cgroup_exit, | 7578 | .exit = perf_cgroup_exit, |
7526 | .attach = perf_cgroup_attach, | 7579 | .attach = perf_cgroup_attach, |
7527 | |||
7528 | /* | ||
7529 | * perf_event cgroup doesn't handle nesting correctly. | ||
7530 | * ctx->nr_cgroups adjustments should be propagated through the | ||
7531 | * cgroup hierarchy. Fix it and remove the following. | ||
7532 | */ | ||
7533 | .broken_hierarchy = true, | ||
7534 | }; | 7580 | }; |
7535 | #endif /* CONFIG_CGROUP_PERF */ | 7581 | #endif /* CONFIG_CGROUP_PERF */ |
diff --git a/kernel/events/internal.h b/kernel/events/internal.h index d56a64c99a8b..eb675c4d59df 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h | |||
@@ -16,7 +16,7 @@ struct ring_buffer { | |||
16 | int page_order; /* allocation order */ | 16 | int page_order; /* allocation order */ |
17 | #endif | 17 | #endif |
18 | int nr_pages; /* nr of data pages */ | 18 | int nr_pages; /* nr of data pages */ |
19 | int writable; /* are we writable */ | 19 | int overwrite; /* can overwrite itself */ |
20 | 20 | ||
21 | atomic_t poll; /* POLL_ for wakeups */ | 21 | atomic_t poll; /* POLL_ for wakeups */ |
22 | 22 | ||
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 23cb34ff3973..97fddb09762b 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c | |||
@@ -18,12 +18,24 @@ | |||
18 | static bool perf_output_space(struct ring_buffer *rb, unsigned long tail, | 18 | static bool perf_output_space(struct ring_buffer *rb, unsigned long tail, |
19 | unsigned long offset, unsigned long head) | 19 | unsigned long offset, unsigned long head) |
20 | { | 20 | { |
21 | unsigned long mask; | 21 | unsigned long sz = perf_data_size(rb); |
22 | unsigned long mask = sz - 1; | ||
22 | 23 | ||
23 | if (!rb->writable) | 24 | /* |
25 | * check if user-writable | ||
26 | * overwrite : over-write its own tail | ||
27 | * !overwrite: buffer possibly drops events. | ||
28 | */ | ||
29 | if (rb->overwrite) | ||
24 | return true; | 30 | return true; |
25 | 31 | ||
26 | mask = perf_data_size(rb) - 1; | 32 | /* |
33 | * verify that payload is not bigger than buffer | ||
34 | * otherwise masking logic may fail to detect | ||
35 | * the "not enough space" condition | ||
36 | */ | ||
37 | if ((head - offset) > sz) | ||
38 | return false; | ||
27 | 39 | ||
28 | offset = (offset - tail) & mask; | 40 | offset = (offset - tail) & mask; |
29 | head = (head - tail) & mask; | 41 | head = (head - tail) & mask; |
@@ -212,7 +224,9 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) | |||
212 | rb->watermark = max_size / 2; | 224 | rb->watermark = max_size / 2; |
213 | 225 | ||
214 | if (flags & RING_BUFFER_WRITABLE) | 226 | if (flags & RING_BUFFER_WRITABLE) |
215 | rb->writable = 1; | 227 | rb->overwrite = 0; |
228 | else | ||
229 | rb->overwrite = 1; | ||
216 | 230 | ||
217 | atomic_set(&rb->refcount, 1); | 231 | atomic_set(&rb->refcount, 1); |
218 | 232 | ||
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index a567c8c7ef31..f3569747d629 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
@@ -75,6 +75,15 @@ struct uprobe { | |||
75 | struct arch_uprobe arch; | 75 | struct arch_uprobe arch; |
76 | }; | 76 | }; |
77 | 77 | ||
78 | struct return_instance { | ||
79 | struct uprobe *uprobe; | ||
80 | unsigned long func; | ||
81 | unsigned long orig_ret_vaddr; /* original return address */ | ||
82 | bool chained; /* true, if instance is nested */ | ||
83 | |||
84 | struct return_instance *next; /* keep as stack */ | ||
85 | }; | ||
86 | |||
78 | /* | 87 | /* |
79 | * valid_vma: Verify if the specified vma is an executable vma | 88 | * valid_vma: Verify if the specified vma is an executable vma |
80 | * Relax restrictions while unregistering: vm_flags might have | 89 | * Relax restrictions while unregistering: vm_flags might have |
@@ -173,10 +182,31 @@ bool __weak is_swbp_insn(uprobe_opcode_t *insn) | |||
173 | return *insn == UPROBE_SWBP_INSN; | 182 | return *insn == UPROBE_SWBP_INSN; |
174 | } | 183 | } |
175 | 184 | ||
176 | static void copy_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *opcode) | 185 | /** |
186 | * is_trap_insn - check if instruction is breakpoint instruction. | ||
187 | * @insn: instruction to be checked. | ||
188 | * Default implementation of is_trap_insn | ||
189 | * Returns true if @insn is a breakpoint instruction. | ||
190 | * | ||
191 | * This function is needed for the case where an architecture has multiple | ||
192 | * trap instructions (like powerpc). | ||
193 | */ | ||
194 | bool __weak is_trap_insn(uprobe_opcode_t *insn) | ||
195 | { | ||
196 | return is_swbp_insn(insn); | ||
197 | } | ||
198 | |||
199 | static void copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len) | ||
177 | { | 200 | { |
178 | void *kaddr = kmap_atomic(page); | 201 | void *kaddr = kmap_atomic(page); |
179 | memcpy(opcode, kaddr + (vaddr & ~PAGE_MASK), UPROBE_SWBP_INSN_SIZE); | 202 | memcpy(dst, kaddr + (vaddr & ~PAGE_MASK), len); |
203 | kunmap_atomic(kaddr); | ||
204 | } | ||
205 | |||
206 | static void copy_to_page(struct page *page, unsigned long vaddr, const void *src, int len) | ||
207 | { | ||
208 | void *kaddr = kmap_atomic(page); | ||
209 | memcpy(kaddr + (vaddr & ~PAGE_MASK), src, len); | ||
180 | kunmap_atomic(kaddr); | 210 | kunmap_atomic(kaddr); |
181 | } | 211 | } |
182 | 212 | ||
@@ -185,7 +215,16 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t | |||
185 | uprobe_opcode_t old_opcode; | 215 | uprobe_opcode_t old_opcode; |
186 | bool is_swbp; | 216 | bool is_swbp; |
187 | 217 | ||
188 | copy_opcode(page, vaddr, &old_opcode); | 218 | /* |
219 | * Note: We only check if the old_opcode is UPROBE_SWBP_INSN here. | ||
220 | * We do not check if it is any other 'trap variant' which could | ||
221 | * be conditional trap instruction such as the one powerpc supports. | ||
222 | * | ||
223 | * The logic is that we do not care if the underlying instruction | ||
224 | * is a trap variant; uprobes always wins over any other (gdb) | ||
225 | * breakpoint. | ||
226 | */ | ||
227 | copy_from_page(page, vaddr, &old_opcode, UPROBE_SWBP_INSN_SIZE); | ||
189 | is_swbp = is_swbp_insn(&old_opcode); | 228 | is_swbp = is_swbp_insn(&old_opcode); |
190 | 229 | ||
191 | if (is_swbp_insn(new_opcode)) { | 230 | if (is_swbp_insn(new_opcode)) { |
@@ -204,7 +243,7 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t | |||
204 | * Expect the breakpoint instruction to be the smallest size instruction for | 243 | * Expect the breakpoint instruction to be the smallest size instruction for |
205 | * the architecture. If an arch has variable length instruction and the | 244 | * the architecture. If an arch has variable length instruction and the |
206 | * breakpoint instruction is not of the smallest length instruction | 245 | * breakpoint instruction is not of the smallest length instruction |
207 | * supported by that architecture then we need to modify is_swbp_at_addr and | 246 | * supported by that architecture then we need to modify is_trap_at_addr and |
208 | * write_opcode accordingly. This would never be a problem for archs that | 247 | * write_opcode accordingly. This would never be a problem for archs that |
209 | * have fixed length instructions. | 248 | * have fixed length instructions. |
210 | */ | 249 | */ |
@@ -225,7 +264,6 @@ static int write_opcode(struct mm_struct *mm, unsigned long vaddr, | |||
225 | uprobe_opcode_t opcode) | 264 | uprobe_opcode_t opcode) |
226 | { | 265 | { |
227 | struct page *old_page, *new_page; | 266 | struct page *old_page, *new_page; |
228 | void *vaddr_old, *vaddr_new; | ||
229 | struct vm_area_struct *vma; | 267 | struct vm_area_struct *vma; |
230 | int ret; | 268 | int ret; |
231 | 269 | ||
@@ -246,15 +284,8 @@ retry: | |||
246 | 284 | ||
247 | __SetPageUptodate(new_page); | 285 | __SetPageUptodate(new_page); |
248 | 286 | ||
249 | /* copy the page now that we've got it stable */ | 287 | copy_highpage(new_page, old_page); |
250 | vaddr_old = kmap_atomic(old_page); | 288 | copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); |
251 | vaddr_new = kmap_atomic(new_page); | ||
252 | |||
253 | memcpy(vaddr_new, vaddr_old, PAGE_SIZE); | ||
254 | memcpy(vaddr_new + (vaddr & ~PAGE_MASK), &opcode, UPROBE_SWBP_INSN_SIZE); | ||
255 | |||
256 | kunmap_atomic(vaddr_new); | ||
257 | kunmap_atomic(vaddr_old); | ||
258 | 289 | ||
259 | ret = anon_vma_prepare(vma); | 290 | ret = anon_vma_prepare(vma); |
260 | if (ret) | 291 | if (ret) |
@@ -477,30 +508,18 @@ __copy_insn(struct address_space *mapping, struct file *filp, char *insn, | |||
477 | unsigned long nbytes, loff_t offset) | 508 | unsigned long nbytes, loff_t offset) |
478 | { | 509 | { |
479 | struct page *page; | 510 | struct page *page; |
480 | void *vaddr; | ||
481 | unsigned long off; | ||
482 | pgoff_t idx; | ||
483 | |||
484 | if (!filp) | ||
485 | return -EINVAL; | ||
486 | 511 | ||
487 | if (!mapping->a_ops->readpage) | 512 | if (!mapping->a_ops->readpage) |
488 | return -EIO; | 513 | return -EIO; |
489 | |||
490 | idx = offset >> PAGE_CACHE_SHIFT; | ||
491 | off = offset & ~PAGE_MASK; | ||
492 | |||
493 | /* | 514 | /* |
494 | * Ensure that the page that has the original instruction is | 515 | * Ensure that the page that has the original instruction is |
495 | * populated and in page-cache. | 516 | * populated and in page-cache. |
496 | */ | 517 | */ |
497 | page = read_mapping_page(mapping, idx, filp); | 518 | page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp); |
498 | if (IS_ERR(page)) | 519 | if (IS_ERR(page)) |
499 | return PTR_ERR(page); | 520 | return PTR_ERR(page); |
500 | 521 | ||
501 | vaddr = kmap_atomic(page); | 522 | copy_from_page(page, offset, insn, nbytes); |
502 | memcpy(insn, vaddr + off, nbytes); | ||
503 | kunmap_atomic(vaddr); | ||
504 | page_cache_release(page); | 523 | page_cache_release(page); |
505 | 524 | ||
506 | return 0; | 525 | return 0; |
@@ -550,7 +569,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, | |||
550 | goto out; | 569 | goto out; |
551 | 570 | ||
552 | ret = -ENOTSUPP; | 571 | ret = -ENOTSUPP; |
553 | if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) | 572 | if (is_trap_insn((uprobe_opcode_t *)uprobe->arch.insn)) |
554 | goto out; | 573 | goto out; |
555 | 574 | ||
556 | ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); | 575 | ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); |
@@ -758,7 +777,7 @@ register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new) | |||
758 | down_write(&mm->mmap_sem); | 777 | down_write(&mm->mmap_sem); |
759 | vma = find_vma(mm, info->vaddr); | 778 | vma = find_vma(mm, info->vaddr); |
760 | if (!vma || !valid_vma(vma, is_register) || | 779 | if (!vma || !valid_vma(vma, is_register) || |
761 | vma->vm_file->f_mapping->host != uprobe->inode) | 780 | file_inode(vma->vm_file) != uprobe->inode) |
762 | goto unlock; | 781 | goto unlock; |
763 | 782 | ||
764 | if (vma->vm_start > info->vaddr || | 783 | if (vma->vm_start > info->vaddr || |
@@ -828,6 +847,10 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * | |||
828 | struct uprobe *uprobe; | 847 | struct uprobe *uprobe; |
829 | int ret; | 848 | int ret; |
830 | 849 | ||
850 | /* Uprobe must have at least one set consumer */ | ||
851 | if (!uc->handler && !uc->ret_handler) | ||
852 | return -EINVAL; | ||
853 | |||
831 | /* Racy, just to catch the obvious mistakes */ | 854 | /* Racy, just to catch the obvious mistakes */ |
832 | if (offset > i_size_read(inode)) | 855 | if (offset > i_size_read(inode)) |
833 | return -EINVAL; | 856 | return -EINVAL; |
@@ -917,7 +940,7 @@ static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm) | |||
917 | loff_t offset; | 940 | loff_t offset; |
918 | 941 | ||
919 | if (!valid_vma(vma, false) || | 942 | if (!valid_vma(vma, false) || |
920 | vma->vm_file->f_mapping->host != uprobe->inode) | 943 | file_inode(vma->vm_file) != uprobe->inode) |
921 | continue; | 944 | continue; |
922 | 945 | ||
923 | offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT; | 946 | offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT; |
@@ -1010,7 +1033,7 @@ int uprobe_mmap(struct vm_area_struct *vma) | |||
1010 | if (no_uprobe_events() || !valid_vma(vma, true)) | 1033 | if (no_uprobe_events() || !valid_vma(vma, true)) |
1011 | return 0; | 1034 | return 0; |
1012 | 1035 | ||
1013 | inode = vma->vm_file->f_mapping->host; | 1036 | inode = file_inode(vma->vm_file); |
1014 | if (!inode) | 1037 | if (!inode) |
1015 | return 0; | 1038 | return 0; |
1016 | 1039 | ||
@@ -1041,7 +1064,7 @@ vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long e | |||
1041 | struct inode *inode; | 1064 | struct inode *inode; |
1042 | struct rb_node *n; | 1065 | struct rb_node *n; |
1043 | 1066 | ||
1044 | inode = vma->vm_file->f_mapping->host; | 1067 | inode = file_inode(vma->vm_file); |
1045 | 1068 | ||
1046 | min = vaddr_to_offset(vma, start); | 1069 | min = vaddr_to_offset(vma, start); |
1047 | max = min + (end - start) - 1; | 1070 | max = min + (end - start) - 1; |
@@ -1114,6 +1137,7 @@ static struct xol_area *get_xol_area(void) | |||
1114 | { | 1137 | { |
1115 | struct mm_struct *mm = current->mm; | 1138 | struct mm_struct *mm = current->mm; |
1116 | struct xol_area *area; | 1139 | struct xol_area *area; |
1140 | uprobe_opcode_t insn = UPROBE_SWBP_INSN; | ||
1117 | 1141 | ||
1118 | area = mm->uprobes_state.xol_area; | 1142 | area = mm->uprobes_state.xol_area; |
1119 | if (area) | 1143 | if (area) |
@@ -1131,7 +1155,12 @@ static struct xol_area *get_xol_area(void) | |||
1131 | if (!area->page) | 1155 | if (!area->page) |
1132 | goto free_bitmap; | 1156 | goto free_bitmap; |
1133 | 1157 | ||
1158 | /* allocate first slot of task's xol_area for the return probes */ | ||
1159 | set_bit(0, area->bitmap); | ||
1160 | copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE); | ||
1161 | atomic_set(&area->slot_count, 1); | ||
1134 | init_waitqueue_head(&area->wq); | 1162 | init_waitqueue_head(&area->wq); |
1163 | |||
1135 | if (!xol_add_vma(area)) | 1164 | if (!xol_add_vma(area)) |
1136 | return area; | 1165 | return area; |
1137 | 1166 | ||
@@ -1216,9 +1245,7 @@ static unsigned long xol_take_insn_slot(struct xol_area *area) | |||
1216 | static unsigned long xol_get_insn_slot(struct uprobe *uprobe) | 1245 | static unsigned long xol_get_insn_slot(struct uprobe *uprobe) |
1217 | { | 1246 | { |
1218 | struct xol_area *area; | 1247 | struct xol_area *area; |
1219 | unsigned long offset; | ||
1220 | unsigned long xol_vaddr; | 1248 | unsigned long xol_vaddr; |
1221 | void *vaddr; | ||
1222 | 1249 | ||
1223 | area = get_xol_area(); | 1250 | area = get_xol_area(); |
1224 | if (!area) | 1251 | if (!area) |
@@ -1229,10 +1256,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe) | |||
1229 | return 0; | 1256 | return 0; |
1230 | 1257 | ||
1231 | /* Initialize the slot */ | 1258 | /* Initialize the slot */ |
1232 | offset = xol_vaddr & ~PAGE_MASK; | 1259 | copy_to_page(area->page, xol_vaddr, uprobe->arch.insn, MAX_UINSN_BYTES); |
1233 | vaddr = kmap_atomic(area->page); | ||
1234 | memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES); | ||
1235 | kunmap_atomic(vaddr); | ||
1236 | /* | 1260 | /* |
1237 | * We probably need flush_icache_user_range() but it needs vma. | 1261 | * We probably need flush_icache_user_range() but it needs vma. |
1238 | * This should work on supported architectures too. | 1262 | * This should work on supported architectures too. |
@@ -1298,6 +1322,7 @@ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs) | |||
1298 | void uprobe_free_utask(struct task_struct *t) | 1322 | void uprobe_free_utask(struct task_struct *t) |
1299 | { | 1323 | { |
1300 | struct uprobe_task *utask = t->utask; | 1324 | struct uprobe_task *utask = t->utask; |
1325 | struct return_instance *ri, *tmp; | ||
1301 | 1326 | ||
1302 | if (!utask) | 1327 | if (!utask) |
1303 | return; | 1328 | return; |
@@ -1305,6 +1330,15 @@ void uprobe_free_utask(struct task_struct *t) | |||
1305 | if (utask->active_uprobe) | 1330 | if (utask->active_uprobe) |
1306 | put_uprobe(utask->active_uprobe); | 1331 | put_uprobe(utask->active_uprobe); |
1307 | 1332 | ||
1333 | ri = utask->return_instances; | ||
1334 | while (ri) { | ||
1335 | tmp = ri; | ||
1336 | ri = ri->next; | ||
1337 | |||
1338 | put_uprobe(tmp->uprobe); | ||
1339 | kfree(tmp); | ||
1340 | } | ||
1341 | |||
1308 | xol_free_insn_slot(t); | 1342 | xol_free_insn_slot(t); |
1309 | kfree(utask); | 1343 | kfree(utask); |
1310 | t->utask = NULL; | 1344 | t->utask = NULL; |
@@ -1333,6 +1367,93 @@ static struct uprobe_task *get_utask(void) | |||
1333 | return current->utask; | 1367 | return current->utask; |
1334 | } | 1368 | } |
1335 | 1369 | ||
1370 | /* | ||
1371 | * Current area->vaddr notion assume the trampoline address is always | ||
1372 | * equal area->vaddr. | ||
1373 | * | ||
1374 | * Returns -1 in case the xol_area is not allocated. | ||
1375 | */ | ||
1376 | static unsigned long get_trampoline_vaddr(void) | ||
1377 | { | ||
1378 | struct xol_area *area; | ||
1379 | unsigned long trampoline_vaddr = -1; | ||
1380 | |||
1381 | area = current->mm->uprobes_state.xol_area; | ||
1382 | smp_read_barrier_depends(); | ||
1383 | if (area) | ||
1384 | trampoline_vaddr = area->vaddr; | ||
1385 | |||
1386 | return trampoline_vaddr; | ||
1387 | } | ||
1388 | |||
1389 | static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) | ||
1390 | { | ||
1391 | struct return_instance *ri; | ||
1392 | struct uprobe_task *utask; | ||
1393 | unsigned long orig_ret_vaddr, trampoline_vaddr; | ||
1394 | bool chained = false; | ||
1395 | |||
1396 | if (!get_xol_area()) | ||
1397 | return; | ||
1398 | |||
1399 | utask = get_utask(); | ||
1400 | if (!utask) | ||
1401 | return; | ||
1402 | |||
1403 | if (utask->depth >= MAX_URETPROBE_DEPTH) { | ||
1404 | printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to" | ||
1405 | " nestedness limit pid/tgid=%d/%d\n", | ||
1406 | current->pid, current->tgid); | ||
1407 | return; | ||
1408 | } | ||
1409 | |||
1410 | ri = kzalloc(sizeof(struct return_instance), GFP_KERNEL); | ||
1411 | if (!ri) | ||
1412 | goto fail; | ||
1413 | |||
1414 | trampoline_vaddr = get_trampoline_vaddr(); | ||
1415 | orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs); | ||
1416 | if (orig_ret_vaddr == -1) | ||
1417 | goto fail; | ||
1418 | |||
1419 | /* | ||
1420 | * We don't want to keep trampoline address in stack, rather keep the | ||
1421 | * original return address of first caller thru all the consequent | ||
1422 | * instances. This also makes breakpoint unwrapping easier. | ||
1423 | */ | ||
1424 | if (orig_ret_vaddr == trampoline_vaddr) { | ||
1425 | if (!utask->return_instances) { | ||
1426 | /* | ||
1427 | * This situation is not possible. Likely we have an | ||
1428 | * attack from user-space. | ||
1429 | */ | ||
1430 | pr_warn("uprobe: unable to set uretprobe pid/tgid=%d/%d\n", | ||
1431 | current->pid, current->tgid); | ||
1432 | goto fail; | ||
1433 | } | ||
1434 | |||
1435 | chained = true; | ||
1436 | orig_ret_vaddr = utask->return_instances->orig_ret_vaddr; | ||
1437 | } | ||
1438 | |||
1439 | atomic_inc(&uprobe->ref); | ||
1440 | ri->uprobe = uprobe; | ||
1441 | ri->func = instruction_pointer(regs); | ||
1442 | ri->orig_ret_vaddr = orig_ret_vaddr; | ||
1443 | ri->chained = chained; | ||
1444 | |||
1445 | utask->depth++; | ||
1446 | |||
1447 | /* add instance to the stack */ | ||
1448 | ri->next = utask->return_instances; | ||
1449 | utask->return_instances = ri; | ||
1450 | |||
1451 | return; | ||
1452 | |||
1453 | fail: | ||
1454 | kfree(ri); | ||
1455 | } | ||
1456 | |||
1336 | /* Prepare to single-step probed instruction out of line. */ | 1457 | /* Prepare to single-step probed instruction out of line. */ |
1337 | static int | 1458 | static int |
1338 | pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr) | 1459 | pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr) |
@@ -1431,7 +1552,7 @@ static void mmf_recalc_uprobes(struct mm_struct *mm) | |||
1431 | clear_bit(MMF_HAS_UPROBES, &mm->flags); | 1552 | clear_bit(MMF_HAS_UPROBES, &mm->flags); |
1432 | } | 1553 | } |
1433 | 1554 | ||
1434 | static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) | 1555 | static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr) |
1435 | { | 1556 | { |
1436 | struct page *page; | 1557 | struct page *page; |
1437 | uprobe_opcode_t opcode; | 1558 | uprobe_opcode_t opcode; |
@@ -1449,10 +1570,11 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) | |||
1449 | if (result < 0) | 1570 | if (result < 0) |
1450 | return result; | 1571 | return result; |
1451 | 1572 | ||
1452 | copy_opcode(page, vaddr, &opcode); | 1573 | copy_from_page(page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); |
1453 | put_page(page); | 1574 | put_page(page); |
1454 | out: | 1575 | out: |
1455 | return is_swbp_insn(&opcode); | 1576 | /* This needs to return true for any variant of the trap insn */ |
1577 | return is_trap_insn(&opcode); | ||
1456 | } | 1578 | } |
1457 | 1579 | ||
1458 | static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) | 1580 | static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) |
@@ -1465,14 +1587,14 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) | |||
1465 | vma = find_vma(mm, bp_vaddr); | 1587 | vma = find_vma(mm, bp_vaddr); |
1466 | if (vma && vma->vm_start <= bp_vaddr) { | 1588 | if (vma && vma->vm_start <= bp_vaddr) { |
1467 | if (valid_vma(vma, false)) { | 1589 | if (valid_vma(vma, false)) { |
1468 | struct inode *inode = vma->vm_file->f_mapping->host; | 1590 | struct inode *inode = file_inode(vma->vm_file); |
1469 | loff_t offset = vaddr_to_offset(vma, bp_vaddr); | 1591 | loff_t offset = vaddr_to_offset(vma, bp_vaddr); |
1470 | 1592 | ||
1471 | uprobe = find_uprobe(inode, offset); | 1593 | uprobe = find_uprobe(inode, offset); |
1472 | } | 1594 | } |
1473 | 1595 | ||
1474 | if (!uprobe) | 1596 | if (!uprobe) |
1475 | *is_swbp = is_swbp_at_addr(mm, bp_vaddr); | 1597 | *is_swbp = is_trap_at_addr(mm, bp_vaddr); |
1476 | } else { | 1598 | } else { |
1477 | *is_swbp = -EFAULT; | 1599 | *is_swbp = -EFAULT; |
1478 | } | 1600 | } |
@@ -1488,16 +1610,27 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) | |||
1488 | { | 1610 | { |
1489 | struct uprobe_consumer *uc; | 1611 | struct uprobe_consumer *uc; |
1490 | int remove = UPROBE_HANDLER_REMOVE; | 1612 | int remove = UPROBE_HANDLER_REMOVE; |
1613 | bool need_prep = false; /* prepare return uprobe, when needed */ | ||
1491 | 1614 | ||
1492 | down_read(&uprobe->register_rwsem); | 1615 | down_read(&uprobe->register_rwsem); |
1493 | for (uc = uprobe->consumers; uc; uc = uc->next) { | 1616 | for (uc = uprobe->consumers; uc; uc = uc->next) { |
1494 | int rc = uc->handler(uc, regs); | 1617 | int rc = 0; |
1618 | |||
1619 | if (uc->handler) { | ||
1620 | rc = uc->handler(uc, regs); | ||
1621 | WARN(rc & ~UPROBE_HANDLER_MASK, | ||
1622 | "bad rc=0x%x from %pf()\n", rc, uc->handler); | ||
1623 | } | ||
1624 | |||
1625 | if (uc->ret_handler) | ||
1626 | need_prep = true; | ||
1495 | 1627 | ||
1496 | WARN(rc & ~UPROBE_HANDLER_MASK, | ||
1497 | "bad rc=0x%x from %pf()\n", rc, uc->handler); | ||
1498 | remove &= rc; | 1628 | remove &= rc; |
1499 | } | 1629 | } |
1500 | 1630 | ||
1631 | if (need_prep && !remove) | ||
1632 | prepare_uretprobe(uprobe, regs); /* put bp at return */ | ||
1633 | |||
1501 | if (remove && uprobe->consumers) { | 1634 | if (remove && uprobe->consumers) { |
1502 | WARN_ON(!uprobe_is_active(uprobe)); | 1635 | WARN_ON(!uprobe_is_active(uprobe)); |
1503 | unapply_uprobe(uprobe, current->mm); | 1636 | unapply_uprobe(uprobe, current->mm); |
@@ -1505,6 +1638,64 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) | |||
1505 | up_read(&uprobe->register_rwsem); | 1638 | up_read(&uprobe->register_rwsem); |
1506 | } | 1639 | } |
1507 | 1640 | ||
1641 | static void | ||
1642 | handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs) | ||
1643 | { | ||
1644 | struct uprobe *uprobe = ri->uprobe; | ||
1645 | struct uprobe_consumer *uc; | ||
1646 | |||
1647 | down_read(&uprobe->register_rwsem); | ||
1648 | for (uc = uprobe->consumers; uc; uc = uc->next) { | ||
1649 | if (uc->ret_handler) | ||
1650 | uc->ret_handler(uc, ri->func, regs); | ||
1651 | } | ||
1652 | up_read(&uprobe->register_rwsem); | ||
1653 | } | ||
1654 | |||
1655 | static bool handle_trampoline(struct pt_regs *regs) | ||
1656 | { | ||
1657 | struct uprobe_task *utask; | ||
1658 | struct return_instance *ri, *tmp; | ||
1659 | bool chained; | ||
1660 | |||
1661 | utask = current->utask; | ||
1662 | if (!utask) | ||
1663 | return false; | ||
1664 | |||
1665 | ri = utask->return_instances; | ||
1666 | if (!ri) | ||
1667 | return false; | ||
1668 | |||
1669 | /* | ||
1670 | * TODO: we should throw out return_instance's invalidated by | ||
1671 | * longjmp(), currently we assume that the probed function always | ||
1672 | * returns. | ||
1673 | */ | ||
1674 | instruction_pointer_set(regs, ri->orig_ret_vaddr); | ||
1675 | |||
1676 | for (;;) { | ||
1677 | handle_uretprobe_chain(ri, regs); | ||
1678 | |||
1679 | chained = ri->chained; | ||
1680 | put_uprobe(ri->uprobe); | ||
1681 | |||
1682 | tmp = ri; | ||
1683 | ri = ri->next; | ||
1684 | kfree(tmp); | ||
1685 | |||
1686 | if (!chained) | ||
1687 | break; | ||
1688 | |||
1689 | utask->depth--; | ||
1690 | |||
1691 | BUG_ON(!ri); | ||
1692 | } | ||
1693 | |||
1694 | utask->return_instances = ri; | ||
1695 | |||
1696 | return true; | ||
1697 | } | ||
1698 | |||
1508 | /* | 1699 | /* |
1509 | * Run handler and ask thread to singlestep. | 1700 | * Run handler and ask thread to singlestep. |
1510 | * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. | 1701 | * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. |
@@ -1516,8 +1707,15 @@ static void handle_swbp(struct pt_regs *regs) | |||
1516 | int uninitialized_var(is_swbp); | 1707 | int uninitialized_var(is_swbp); |
1517 | 1708 | ||
1518 | bp_vaddr = uprobe_get_swbp_addr(regs); | 1709 | bp_vaddr = uprobe_get_swbp_addr(regs); |
1519 | uprobe = find_active_uprobe(bp_vaddr, &is_swbp); | 1710 | if (bp_vaddr == get_trampoline_vaddr()) { |
1711 | if (handle_trampoline(regs)) | ||
1712 | return; | ||
1713 | |||
1714 | pr_warn("uprobe: unable to handle uretprobe pid/tgid=%d/%d\n", | ||
1715 | current->pid, current->tgid); | ||
1716 | } | ||
1520 | 1717 | ||
1718 | uprobe = find_active_uprobe(bp_vaddr, &is_swbp); | ||
1521 | if (!uprobe) { | 1719 | if (!uprobe) { |
1522 | if (is_swbp > 0) { | 1720 | if (is_swbp > 0) { |
1523 | /* No matching uprobe; signal SIGTRAP. */ | 1721 | /* No matching uprobe; signal SIGTRAP. */ |
@@ -1616,7 +1814,11 @@ void uprobe_notify_resume(struct pt_regs *regs) | |||
1616 | */ | 1814 | */ |
1617 | int uprobe_pre_sstep_notifier(struct pt_regs *regs) | 1815 | int uprobe_pre_sstep_notifier(struct pt_regs *regs) |
1618 | { | 1816 | { |
1619 | if (!current->mm || !test_bit(MMF_HAS_UPROBES, ¤t->mm->flags)) | 1817 | if (!current->mm) |
1818 | return 0; | ||
1819 | |||
1820 | if (!test_bit(MMF_HAS_UPROBES, ¤t->mm->flags) && | ||
1821 | (!current->utask || !current->utask->return_instances)) | ||
1620 | return 0; | 1822 | return 0; |
1621 | 1823 | ||
1622 | set_thread_flag(TIF_UPROBE); | 1824 | set_thread_flag(TIF_UPROBE); |