diff options
Diffstat (limited to 'kernel/events')
-rw-r--r-- | kernel/events/core.c | 132 | ||||
-rw-r--r-- | kernel/events/uprobes.c | 156 |
2 files changed, 205 insertions, 83 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 953c14348375..17b3c6cf1606 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -175,8 +175,8 @@ int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE; | |||
175 | static int max_samples_per_tick __read_mostly = DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ); | 175 | static int max_samples_per_tick __read_mostly = DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ); |
176 | static int perf_sample_period_ns __read_mostly = DEFAULT_SAMPLE_PERIOD_NS; | 176 | static int perf_sample_period_ns __read_mostly = DEFAULT_SAMPLE_PERIOD_NS; |
177 | 177 | ||
178 | static atomic_t perf_sample_allowed_ns __read_mostly = | 178 | static int perf_sample_allowed_ns __read_mostly = |
179 | ATOMIC_INIT( DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100); | 179 | DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100; |
180 | 180 | ||
181 | void update_perf_cpu_limits(void) | 181 | void update_perf_cpu_limits(void) |
182 | { | 182 | { |
@@ -184,7 +184,7 @@ void update_perf_cpu_limits(void) | |||
184 | 184 | ||
185 | tmp *= sysctl_perf_cpu_time_max_percent; | 185 | tmp *= sysctl_perf_cpu_time_max_percent; |
186 | do_div(tmp, 100); | 186 | do_div(tmp, 100); |
187 | atomic_set(&perf_sample_allowed_ns, tmp); | 187 | ACCESS_ONCE(perf_sample_allowed_ns) = tmp; |
188 | } | 188 | } |
189 | 189 | ||
190 | static int perf_rotate_context(struct perf_cpu_context *cpuctx); | 190 | static int perf_rotate_context(struct perf_cpu_context *cpuctx); |
@@ -193,7 +193,7 @@ int perf_proc_update_handler(struct ctl_table *table, int write, | |||
193 | void __user *buffer, size_t *lenp, | 193 | void __user *buffer, size_t *lenp, |
194 | loff_t *ppos) | 194 | loff_t *ppos) |
195 | { | 195 | { |
196 | int ret = proc_dointvec(table, write, buffer, lenp, ppos); | 196 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
197 | 197 | ||
198 | if (ret || !write) | 198 | if (ret || !write) |
199 | return ret; | 199 | return ret; |
@@ -228,14 +228,15 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, | |||
228 | * we detect that events are taking too long. | 228 | * we detect that events are taking too long. |
229 | */ | 229 | */ |
230 | #define NR_ACCUMULATED_SAMPLES 128 | 230 | #define NR_ACCUMULATED_SAMPLES 128 |
231 | DEFINE_PER_CPU(u64, running_sample_length); | 231 | static DEFINE_PER_CPU(u64, running_sample_length); |
232 | 232 | ||
233 | void perf_sample_event_took(u64 sample_len_ns) | 233 | void perf_sample_event_took(u64 sample_len_ns) |
234 | { | 234 | { |
235 | u64 avg_local_sample_len; | 235 | u64 avg_local_sample_len; |
236 | u64 local_samples_len; | 236 | u64 local_samples_len; |
237 | u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns); | ||
237 | 238 | ||
238 | if (atomic_read(&perf_sample_allowed_ns) == 0) | 239 | if (allowed_ns == 0) |
239 | return; | 240 | return; |
240 | 241 | ||
241 | /* decay the counter by 1 average sample */ | 242 | /* decay the counter by 1 average sample */ |
@@ -251,7 +252,7 @@ void perf_sample_event_took(u64 sample_len_ns) | |||
251 | */ | 252 | */ |
252 | avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES; | 253 | avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES; |
253 | 254 | ||
254 | if (avg_local_sample_len <= atomic_read(&perf_sample_allowed_ns)) | 255 | if (avg_local_sample_len <= allowed_ns) |
255 | return; | 256 | return; |
256 | 257 | ||
257 | if (max_samples_per_tick <= 1) | 258 | if (max_samples_per_tick <= 1) |
@@ -262,10 +263,9 @@ void perf_sample_event_took(u64 sample_len_ns) | |||
262 | perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; | 263 | perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; |
263 | 264 | ||
264 | printk_ratelimited(KERN_WARNING | 265 | printk_ratelimited(KERN_WARNING |
265 | "perf samples too long (%lld > %d), lowering " | 266 | "perf samples too long (%lld > %lld), lowering " |
266 | "kernel.perf_event_max_sample_rate to %d\n", | 267 | "kernel.perf_event_max_sample_rate to %d\n", |
267 | avg_local_sample_len, | 268 | avg_local_sample_len, allowed_ns, |
268 | atomic_read(&perf_sample_allowed_ns), | ||
269 | sysctl_perf_event_sample_rate); | 269 | sysctl_perf_event_sample_rate); |
270 | 270 | ||
271 | update_perf_cpu_limits(); | 271 | update_perf_cpu_limits(); |
@@ -899,6 +899,7 @@ static void unclone_ctx(struct perf_event_context *ctx) | |||
899 | put_ctx(ctx->parent_ctx); | 899 | put_ctx(ctx->parent_ctx); |
900 | ctx->parent_ctx = NULL; | 900 | ctx->parent_ctx = NULL; |
901 | } | 901 | } |
902 | ctx->generation++; | ||
902 | } | 903 | } |
903 | 904 | ||
904 | static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) | 905 | static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) |
@@ -1136,6 +1137,8 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
1136 | ctx->nr_events++; | 1137 | ctx->nr_events++; |
1137 | if (event->attr.inherit_stat) | 1138 | if (event->attr.inherit_stat) |
1138 | ctx->nr_stat++; | 1139 | ctx->nr_stat++; |
1140 | |||
1141 | ctx->generation++; | ||
1139 | } | 1142 | } |
1140 | 1143 | ||
1141 | /* | 1144 | /* |
@@ -1201,6 +1204,9 @@ static void perf_event__header_size(struct perf_event *event) | |||
1201 | if (sample_type & PERF_SAMPLE_DATA_SRC) | 1204 | if (sample_type & PERF_SAMPLE_DATA_SRC) |
1202 | size += sizeof(data->data_src.val); | 1205 | size += sizeof(data->data_src.val); |
1203 | 1206 | ||
1207 | if (sample_type & PERF_SAMPLE_TRANSACTION) | ||
1208 | size += sizeof(data->txn); | ||
1209 | |||
1204 | event->header_size = size; | 1210 | event->header_size = size; |
1205 | } | 1211 | } |
1206 | 1212 | ||
@@ -1310,6 +1316,8 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
1310 | */ | 1316 | */ |
1311 | if (event->state > PERF_EVENT_STATE_OFF) | 1317 | if (event->state > PERF_EVENT_STATE_OFF) |
1312 | event->state = PERF_EVENT_STATE_OFF; | 1318 | event->state = PERF_EVENT_STATE_OFF; |
1319 | |||
1320 | ctx->generation++; | ||
1313 | } | 1321 | } |
1314 | 1322 | ||
1315 | static void perf_group_detach(struct perf_event *event) | 1323 | static void perf_group_detach(struct perf_event *event) |
@@ -2146,22 +2154,38 @@ static void ctx_sched_out(struct perf_event_context *ctx, | |||
2146 | } | 2154 | } |
2147 | 2155 | ||
2148 | /* | 2156 | /* |
2149 | * Test whether two contexts are equivalent, i.e. whether they | 2157 | * Test whether two contexts are equivalent, i.e. whether they have both been |
2150 | * have both been cloned from the same version of the same context | 2158 | * cloned from the same version of the same context. |
2151 | * and they both have the same number of enabled events. | 2159 | * |
2152 | * If the number of enabled events is the same, then the set | 2160 | * Equivalence is measured using a generation number in the context that is |
2153 | * of enabled events should be the same, because these are both | 2161 | * incremented on each modification to it; see unclone_ctx(), list_add_event() |
2154 | * inherited contexts, therefore we can't access individual events | 2162 | * and list_del_event(). |
2155 | * in them directly with an fd; we can only enable/disable all | ||
2156 | * events via prctl, or enable/disable all events in a family | ||
2157 | * via ioctl, which will have the same effect on both contexts. | ||
2158 | */ | 2163 | */ |
2159 | static int context_equiv(struct perf_event_context *ctx1, | 2164 | static int context_equiv(struct perf_event_context *ctx1, |
2160 | struct perf_event_context *ctx2) | 2165 | struct perf_event_context *ctx2) |
2161 | { | 2166 | { |
2162 | return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx | 2167 | /* Pinning disables the swap optimization */ |
2163 | && ctx1->parent_gen == ctx2->parent_gen | 2168 | if (ctx1->pin_count || ctx2->pin_count) |
2164 | && !ctx1->pin_count && !ctx2->pin_count; | 2169 | return 0; |
2170 | |||
2171 | /* If ctx1 is the parent of ctx2 */ | ||
2172 | if (ctx1 == ctx2->parent_ctx && ctx1->generation == ctx2->parent_gen) | ||
2173 | return 1; | ||
2174 | |||
2175 | /* If ctx2 is the parent of ctx1 */ | ||
2176 | if (ctx1->parent_ctx == ctx2 && ctx1->parent_gen == ctx2->generation) | ||
2177 | return 1; | ||
2178 | |||
2179 | /* | ||
2180 | * If ctx1 and ctx2 have the same parent; we flatten the parent | ||
2181 | * hierarchy, see perf_event_init_context(). | ||
2182 | */ | ||
2183 | if (ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx && | ||
2184 | ctx1->parent_gen == ctx2->parent_gen) | ||
2185 | return 1; | ||
2186 | |||
2187 | /* Unmatched */ | ||
2188 | return 0; | ||
2165 | } | 2189 | } |
2166 | 2190 | ||
2167 | static void __perf_event_sync_stat(struct perf_event *event, | 2191 | static void __perf_event_sync_stat(struct perf_event *event, |
@@ -2244,7 +2268,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, | |||
2244 | { | 2268 | { |
2245 | struct perf_event_context *ctx = task->perf_event_ctxp[ctxn]; | 2269 | struct perf_event_context *ctx = task->perf_event_ctxp[ctxn]; |
2246 | struct perf_event_context *next_ctx; | 2270 | struct perf_event_context *next_ctx; |
2247 | struct perf_event_context *parent; | 2271 | struct perf_event_context *parent, *next_parent; |
2248 | struct perf_cpu_context *cpuctx; | 2272 | struct perf_cpu_context *cpuctx; |
2249 | int do_switch = 1; | 2273 | int do_switch = 1; |
2250 | 2274 | ||
@@ -2256,10 +2280,18 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, | |||
2256 | return; | 2280 | return; |
2257 | 2281 | ||
2258 | rcu_read_lock(); | 2282 | rcu_read_lock(); |
2259 | parent = rcu_dereference(ctx->parent_ctx); | ||
2260 | next_ctx = next->perf_event_ctxp[ctxn]; | 2283 | next_ctx = next->perf_event_ctxp[ctxn]; |
2261 | if (parent && next_ctx && | 2284 | if (!next_ctx) |
2262 | rcu_dereference(next_ctx->parent_ctx) == parent) { | 2285 | goto unlock; |
2286 | |||
2287 | parent = rcu_dereference(ctx->parent_ctx); | ||
2288 | next_parent = rcu_dereference(next_ctx->parent_ctx); | ||
2289 | |||
2290 | /* If neither context have a parent context; they cannot be clones. */ | ||
2291 | if (!parent && !next_parent) | ||
2292 | goto unlock; | ||
2293 | |||
2294 | if (next_parent == ctx || next_ctx == parent || next_parent == parent) { | ||
2263 | /* | 2295 | /* |
2264 | * Looks like the two contexts are clones, so we might be | 2296 | * Looks like the two contexts are clones, so we might be |
2265 | * able to optimize the context switch. We lock both | 2297 | * able to optimize the context switch. We lock both |
@@ -2287,6 +2319,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, | |||
2287 | raw_spin_unlock(&next_ctx->lock); | 2319 | raw_spin_unlock(&next_ctx->lock); |
2288 | raw_spin_unlock(&ctx->lock); | 2320 | raw_spin_unlock(&ctx->lock); |
2289 | } | 2321 | } |
2322 | unlock: | ||
2290 | rcu_read_unlock(); | 2323 | rcu_read_unlock(); |
2291 | 2324 | ||
2292 | if (do_switch) { | 2325 | if (do_switch) { |
@@ -4572,6 +4605,9 @@ void perf_output_sample(struct perf_output_handle *handle, | |||
4572 | if (sample_type & PERF_SAMPLE_DATA_SRC) | 4605 | if (sample_type & PERF_SAMPLE_DATA_SRC) |
4573 | perf_output_put(handle, data->data_src.val); | 4606 | perf_output_put(handle, data->data_src.val); |
4574 | 4607 | ||
4608 | if (sample_type & PERF_SAMPLE_TRANSACTION) | ||
4609 | perf_output_put(handle, data->txn); | ||
4610 | |||
4575 | if (!event->attr.watermark) { | 4611 | if (!event->attr.watermark) { |
4576 | int wakeup_events = event->attr.wakeup_events; | 4612 | int wakeup_events = event->attr.wakeup_events; |
4577 | 4613 | ||
@@ -5100,24 +5136,23 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) | |||
5100 | unsigned int size; | 5136 | unsigned int size; |
5101 | char tmp[16]; | 5137 | char tmp[16]; |
5102 | char *buf = NULL; | 5138 | char *buf = NULL; |
5103 | const char *name; | 5139 | char *name; |
5104 | |||
5105 | memset(tmp, 0, sizeof(tmp)); | ||
5106 | 5140 | ||
5107 | if (file) { | 5141 | if (file) { |
5108 | struct inode *inode; | 5142 | struct inode *inode; |
5109 | dev_t dev; | 5143 | dev_t dev; |
5110 | /* | 5144 | |
5111 | * d_path works from the end of the rb backwards, so we | 5145 | buf = kmalloc(PATH_MAX, GFP_KERNEL); |
5112 | * need to add enough zero bytes after the string to handle | ||
5113 | * the 64bit alignment we do later. | ||
5114 | */ | ||
5115 | buf = kzalloc(PATH_MAX + sizeof(u64), GFP_KERNEL); | ||
5116 | if (!buf) { | 5146 | if (!buf) { |
5117 | name = strncpy(tmp, "//enomem", sizeof(tmp)); | 5147 | name = strncpy(tmp, "//enomem", sizeof(tmp)); |
5118 | goto got_name; | 5148 | goto got_name; |
5119 | } | 5149 | } |
5120 | name = d_path(&file->f_path, buf, PATH_MAX); | 5150 | /* |
5151 | * d_path() works from the end of the rb backwards, so we | ||
5152 | * need to add enough zero bytes after the string to handle | ||
5153 | * the 64bit alignment we do later. | ||
5154 | */ | ||
5155 | name = d_path(&file->f_path, buf, PATH_MAX - sizeof(u64)); | ||
5121 | if (IS_ERR(name)) { | 5156 | if (IS_ERR(name)) { |
5122 | name = strncpy(tmp, "//toolong", sizeof(tmp)); | 5157 | name = strncpy(tmp, "//toolong", sizeof(tmp)); |
5123 | goto got_name; | 5158 | goto got_name; |
@@ -5130,21 +5165,19 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) | |||
5130 | min = MINOR(dev); | 5165 | min = MINOR(dev); |
5131 | 5166 | ||
5132 | } else { | 5167 | } else { |
5133 | if (arch_vma_name(mmap_event->vma)) { | 5168 | name = (char *)arch_vma_name(vma); |
5134 | name = strncpy(tmp, arch_vma_name(mmap_event->vma), | 5169 | if (name) { |
5135 | sizeof(tmp) - 1); | 5170 | name = strncpy(tmp, name, sizeof(tmp) - 1); |
5136 | tmp[sizeof(tmp) - 1] = '\0'; | 5171 | tmp[sizeof(tmp) - 1] = '\0'; |
5137 | goto got_name; | 5172 | goto got_name; |
5138 | } | 5173 | } |
5139 | 5174 | ||
5140 | if (!vma->vm_mm) { | 5175 | if (vma->vm_start <= vma->vm_mm->start_brk && |
5141 | name = strncpy(tmp, "[vdso]", sizeof(tmp)); | ||
5142 | goto got_name; | ||
5143 | } else if (vma->vm_start <= vma->vm_mm->start_brk && | ||
5144 | vma->vm_end >= vma->vm_mm->brk) { | 5176 | vma->vm_end >= vma->vm_mm->brk) { |
5145 | name = strncpy(tmp, "[heap]", sizeof(tmp)); | 5177 | name = strncpy(tmp, "[heap]", sizeof(tmp)); |
5146 | goto got_name; | 5178 | goto got_name; |
5147 | } else if (vma->vm_start <= vma->vm_mm->start_stack && | 5179 | } |
5180 | if (vma->vm_start <= vma->vm_mm->start_stack && | ||
5148 | vma->vm_end >= vma->vm_mm->start_stack) { | 5181 | vma->vm_end >= vma->vm_mm->start_stack) { |
5149 | name = strncpy(tmp, "[stack]", sizeof(tmp)); | 5182 | name = strncpy(tmp, "[stack]", sizeof(tmp)); |
5150 | goto got_name; | 5183 | goto got_name; |
@@ -5155,7 +5188,14 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) | |||
5155 | } | 5188 | } |
5156 | 5189 | ||
5157 | got_name: | 5190 | got_name: |
5158 | size = ALIGN(strlen(name)+1, sizeof(u64)); | 5191 | /* |
5192 | * Since our buffer works in 8 byte units we need to align our string | ||
5193 | * size to a multiple of 8. However, we must guarantee the tail end is | ||
5194 | * zero'd out to avoid leaking random bits to userspace. | ||
5195 | */ | ||
5196 | size = strlen(name)+1; | ||
5197 | while (!IS_ALIGNED(size, sizeof(u64))) | ||
5198 | name[size++] = '\0'; | ||
5159 | 5199 | ||
5160 | mmap_event->file_name = name; | 5200 | mmap_event->file_name = name; |
5161 | mmap_event->file_size = size; | 5201 | mmap_event->file_size = size; |
@@ -7126,7 +7166,6 @@ SYSCALL_DEFINE5(perf_event_open, | |||
7126 | } | 7166 | } |
7127 | 7167 | ||
7128 | perf_install_in_context(ctx, event, event->cpu); | 7168 | perf_install_in_context(ctx, event, event->cpu); |
7129 | ++ctx->generation; | ||
7130 | perf_unpin_context(ctx); | 7169 | perf_unpin_context(ctx); |
7131 | mutex_unlock(&ctx->mutex); | 7170 | mutex_unlock(&ctx->mutex); |
7132 | 7171 | ||
@@ -7209,7 +7248,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
7209 | WARN_ON_ONCE(ctx->parent_ctx); | 7248 | WARN_ON_ONCE(ctx->parent_ctx); |
7210 | mutex_lock(&ctx->mutex); | 7249 | mutex_lock(&ctx->mutex); |
7211 | perf_install_in_context(ctx, event, cpu); | 7250 | perf_install_in_context(ctx, event, cpu); |
7212 | ++ctx->generation; | ||
7213 | perf_unpin_context(ctx); | 7251 | perf_unpin_context(ctx); |
7214 | mutex_unlock(&ctx->mutex); | 7252 | mutex_unlock(&ctx->mutex); |
7215 | 7253 | ||
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ad8e1bdca70e..ae9e1d2ef256 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/kdebug.h> /* notifier mechanism */ | 35 | #include <linux/kdebug.h> /* notifier mechanism */ |
36 | #include "../../mm/internal.h" /* munlock_vma_page */ | 36 | #include "../../mm/internal.h" /* munlock_vma_page */ |
37 | #include <linux/percpu-rwsem.h> | 37 | #include <linux/percpu-rwsem.h> |
38 | #include <linux/task_work.h> | ||
38 | 39 | ||
39 | #include <linux/uprobes.h> | 40 | #include <linux/uprobes.h> |
40 | 41 | ||
@@ -1096,21 +1097,22 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon | |||
1096 | } | 1097 | } |
1097 | 1098 | ||
1098 | /* Slot allocation for XOL */ | 1099 | /* Slot allocation for XOL */ |
1099 | static int xol_add_vma(struct xol_area *area) | 1100 | static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) |
1100 | { | 1101 | { |
1101 | struct mm_struct *mm = current->mm; | ||
1102 | int ret = -EALREADY; | 1102 | int ret = -EALREADY; |
1103 | 1103 | ||
1104 | down_write(&mm->mmap_sem); | 1104 | down_write(&mm->mmap_sem); |
1105 | if (mm->uprobes_state.xol_area) | 1105 | if (mm->uprobes_state.xol_area) |
1106 | goto fail; | 1106 | goto fail; |
1107 | 1107 | ||
1108 | ret = -ENOMEM; | 1108 | if (!area->vaddr) { |
1109 | /* Try to map as high as possible, this is only a hint. */ | 1109 | /* Try to map as high as possible, this is only a hint. */ |
1110 | area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); | 1110 | area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, |
1111 | if (area->vaddr & ~PAGE_MASK) { | 1111 | PAGE_SIZE, 0, 0); |
1112 | ret = area->vaddr; | 1112 | if (area->vaddr & ~PAGE_MASK) { |
1113 | goto fail; | 1113 | ret = area->vaddr; |
1114 | goto fail; | ||
1115 | } | ||
1114 | } | 1116 | } |
1115 | 1117 | ||
1116 | ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE, | 1118 | ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE, |
@@ -1120,30 +1122,19 @@ static int xol_add_vma(struct xol_area *area) | |||
1120 | 1122 | ||
1121 | smp_wmb(); /* pairs with get_xol_area() */ | 1123 | smp_wmb(); /* pairs with get_xol_area() */ |
1122 | mm->uprobes_state.xol_area = area; | 1124 | mm->uprobes_state.xol_area = area; |
1123 | ret = 0; | ||
1124 | fail: | 1125 | fail: |
1125 | up_write(&mm->mmap_sem); | 1126 | up_write(&mm->mmap_sem); |
1126 | 1127 | ||
1127 | return ret; | 1128 | return ret; |
1128 | } | 1129 | } |
1129 | 1130 | ||
1130 | /* | 1131 | static struct xol_area *__create_xol_area(unsigned long vaddr) |
1131 | * get_xol_area - Allocate process's xol_area if necessary. | ||
1132 | * This area will be used for storing instructions for execution out of line. | ||
1133 | * | ||
1134 | * Returns the allocated area or NULL. | ||
1135 | */ | ||
1136 | static struct xol_area *get_xol_area(void) | ||
1137 | { | 1132 | { |
1138 | struct mm_struct *mm = current->mm; | 1133 | struct mm_struct *mm = current->mm; |
1139 | struct xol_area *area; | ||
1140 | uprobe_opcode_t insn = UPROBE_SWBP_INSN; | 1134 | uprobe_opcode_t insn = UPROBE_SWBP_INSN; |
1135 | struct xol_area *area; | ||
1141 | 1136 | ||
1142 | area = mm->uprobes_state.xol_area; | 1137 | area = kmalloc(sizeof(*area), GFP_KERNEL); |
1143 | if (area) | ||
1144 | goto ret; | ||
1145 | |||
1146 | area = kzalloc(sizeof(*area), GFP_KERNEL); | ||
1147 | if (unlikely(!area)) | 1138 | if (unlikely(!area)) |
1148 | goto out; | 1139 | goto out; |
1149 | 1140 | ||
@@ -1155,13 +1146,14 @@ static struct xol_area *get_xol_area(void) | |||
1155 | if (!area->page) | 1146 | if (!area->page) |
1156 | goto free_bitmap; | 1147 | goto free_bitmap; |
1157 | 1148 | ||
1158 | /* allocate first slot of task's xol_area for the return probes */ | 1149 | area->vaddr = vaddr; |
1150 | init_waitqueue_head(&area->wq); | ||
1151 | /* Reserve the 1st slot for get_trampoline_vaddr() */ | ||
1159 | set_bit(0, area->bitmap); | 1152 | set_bit(0, area->bitmap); |
1160 | copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE); | ||
1161 | atomic_set(&area->slot_count, 1); | 1153 | atomic_set(&area->slot_count, 1); |
1162 | init_waitqueue_head(&area->wq); | 1154 | copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE); |
1163 | 1155 | ||
1164 | if (!xol_add_vma(area)) | 1156 | if (!xol_add_vma(mm, area)) |
1165 | return area; | 1157 | return area; |
1166 | 1158 | ||
1167 | __free_page(area->page); | 1159 | __free_page(area->page); |
@@ -1170,9 +1162,25 @@ static struct xol_area *get_xol_area(void) | |||
1170 | free_area: | 1162 | free_area: |
1171 | kfree(area); | 1163 | kfree(area); |
1172 | out: | 1164 | out: |
1165 | return NULL; | ||
1166 | } | ||
1167 | |||
1168 | /* | ||
1169 | * get_xol_area - Allocate process's xol_area if necessary. | ||
1170 | * This area will be used for storing instructions for execution out of line. | ||
1171 | * | ||
1172 | * Returns the allocated area or NULL. | ||
1173 | */ | ||
1174 | static struct xol_area *get_xol_area(void) | ||
1175 | { | ||
1176 | struct mm_struct *mm = current->mm; | ||
1177 | struct xol_area *area; | ||
1178 | |||
1179 | if (!mm->uprobes_state.xol_area) | ||
1180 | __create_xol_area(0); | ||
1181 | |||
1173 | area = mm->uprobes_state.xol_area; | 1182 | area = mm->uprobes_state.xol_area; |
1174 | ret: | 1183 | smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ |
1175 | smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ | ||
1176 | return area; | 1184 | return area; |
1177 | } | 1185 | } |
1178 | 1186 | ||
@@ -1345,14 +1353,6 @@ void uprobe_free_utask(struct task_struct *t) | |||
1345 | } | 1353 | } |
1346 | 1354 | ||
1347 | /* | 1355 | /* |
1348 | * Called in context of a new clone/fork from copy_process. | ||
1349 | */ | ||
1350 | void uprobe_copy_process(struct task_struct *t) | ||
1351 | { | ||
1352 | t->utask = NULL; | ||
1353 | } | ||
1354 | |||
1355 | /* | ||
1356 | * Allocate a uprobe_task object for the task if if necessary. | 1356 | * Allocate a uprobe_task object for the task if if necessary. |
1357 | * Called when the thread hits a breakpoint. | 1357 | * Called when the thread hits a breakpoint. |
1358 | * | 1358 | * |
@@ -1367,6 +1367,90 @@ static struct uprobe_task *get_utask(void) | |||
1367 | return current->utask; | 1367 | return current->utask; |
1368 | } | 1368 | } |
1369 | 1369 | ||
1370 | static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) | ||
1371 | { | ||
1372 | struct uprobe_task *n_utask; | ||
1373 | struct return_instance **p, *o, *n; | ||
1374 | |||
1375 | n_utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); | ||
1376 | if (!n_utask) | ||
1377 | return -ENOMEM; | ||
1378 | t->utask = n_utask; | ||
1379 | |||
1380 | p = &n_utask->return_instances; | ||
1381 | for (o = o_utask->return_instances; o; o = o->next) { | ||
1382 | n = kmalloc(sizeof(struct return_instance), GFP_KERNEL); | ||
1383 | if (!n) | ||
1384 | return -ENOMEM; | ||
1385 | |||
1386 | *n = *o; | ||
1387 | atomic_inc(&n->uprobe->ref); | ||
1388 | n->next = NULL; | ||
1389 | |||
1390 | *p = n; | ||
1391 | p = &n->next; | ||
1392 | n_utask->depth++; | ||
1393 | } | ||
1394 | |||
1395 | return 0; | ||
1396 | } | ||
1397 | |||
1398 | static void uprobe_warn(struct task_struct *t, const char *msg) | ||
1399 | { | ||
1400 | pr_warn("uprobe: %s:%d failed to %s\n", | ||
1401 | current->comm, current->pid, msg); | ||
1402 | } | ||
1403 | |||
1404 | static void dup_xol_work(struct callback_head *work) | ||
1405 | { | ||
1406 | kfree(work); | ||
1407 | |||
1408 | if (current->flags & PF_EXITING) | ||
1409 | return; | ||
1410 | |||
1411 | if (!__create_xol_area(current->utask->vaddr)) | ||
1412 | uprobe_warn(current, "dup xol area"); | ||
1413 | } | ||
1414 | |||
1415 | /* | ||
1416 | * Called in context of a new clone/fork from copy_process. | ||
1417 | */ | ||
1418 | void uprobe_copy_process(struct task_struct *t, unsigned long flags) | ||
1419 | { | ||
1420 | struct uprobe_task *utask = current->utask; | ||
1421 | struct mm_struct *mm = current->mm; | ||
1422 | struct callback_head *work; | ||
1423 | struct xol_area *area; | ||
1424 | |||
1425 | t->utask = NULL; | ||
1426 | |||
1427 | if (!utask || !utask->return_instances) | ||
1428 | return; | ||
1429 | |||
1430 | if (mm == t->mm && !(flags & CLONE_VFORK)) | ||
1431 | return; | ||
1432 | |||
1433 | if (dup_utask(t, utask)) | ||
1434 | return uprobe_warn(t, "dup ret instances"); | ||
1435 | |||
1436 | /* The task can fork() after dup_xol_work() fails */ | ||
1437 | area = mm->uprobes_state.xol_area; | ||
1438 | if (!area) | ||
1439 | return uprobe_warn(t, "dup xol area"); | ||
1440 | |||
1441 | if (mm == t->mm) | ||
1442 | return; | ||
1443 | |||
1444 | /* TODO: move it into the union in uprobe_task */ | ||
1445 | work = kmalloc(sizeof(*work), GFP_KERNEL); | ||
1446 | if (!work) | ||
1447 | return uprobe_warn(t, "dup xol area"); | ||
1448 | |||
1449 | utask->vaddr = area->vaddr; | ||
1450 | init_task_work(work, dup_xol_work); | ||
1451 | task_work_add(t, work, true); | ||
1452 | } | ||
1453 | |||
1370 | /* | 1454 | /* |
1371 | * Current area->vaddr notion assume the trampoline address is always | 1455 | * Current area->vaddr notion assume the trampoline address is always |
1372 | * equal area->vaddr. | 1456 | * equal area->vaddr. |