aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/core.c132
-rw-r--r--kernel/events/uprobes.c156
2 files changed, 205 insertions, 83 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 953c14348375..17b3c6cf1606 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -175,8 +175,8 @@ int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE;
175static int max_samples_per_tick __read_mostly = DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ); 175static int max_samples_per_tick __read_mostly = DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ);
176static int perf_sample_period_ns __read_mostly = DEFAULT_SAMPLE_PERIOD_NS; 176static int perf_sample_period_ns __read_mostly = DEFAULT_SAMPLE_PERIOD_NS;
177 177
178static atomic_t perf_sample_allowed_ns __read_mostly = 178static int perf_sample_allowed_ns __read_mostly =
179 ATOMIC_INIT( DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100); 179 DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100;
180 180
181void update_perf_cpu_limits(void) 181void update_perf_cpu_limits(void)
182{ 182{
@@ -184,7 +184,7 @@ void update_perf_cpu_limits(void)
184 184
185 tmp *= sysctl_perf_cpu_time_max_percent; 185 tmp *= sysctl_perf_cpu_time_max_percent;
186 do_div(tmp, 100); 186 do_div(tmp, 100);
187 atomic_set(&perf_sample_allowed_ns, tmp); 187 ACCESS_ONCE(perf_sample_allowed_ns) = tmp;
188} 188}
189 189
190static int perf_rotate_context(struct perf_cpu_context *cpuctx); 190static int perf_rotate_context(struct perf_cpu_context *cpuctx);
@@ -193,7 +193,7 @@ int perf_proc_update_handler(struct ctl_table *table, int write,
193 void __user *buffer, size_t *lenp, 193 void __user *buffer, size_t *lenp,
194 loff_t *ppos) 194 loff_t *ppos)
195{ 195{
196 int ret = proc_dointvec(table, write, buffer, lenp, ppos); 196 int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
197 197
198 if (ret || !write) 198 if (ret || !write)
199 return ret; 199 return ret;
@@ -228,14 +228,15 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
228 * we detect that events are taking too long. 228 * we detect that events are taking too long.
229 */ 229 */
230#define NR_ACCUMULATED_SAMPLES 128 230#define NR_ACCUMULATED_SAMPLES 128
231DEFINE_PER_CPU(u64, running_sample_length); 231static DEFINE_PER_CPU(u64, running_sample_length);
232 232
233void perf_sample_event_took(u64 sample_len_ns) 233void perf_sample_event_took(u64 sample_len_ns)
234{ 234{
235 u64 avg_local_sample_len; 235 u64 avg_local_sample_len;
236 u64 local_samples_len; 236 u64 local_samples_len;
237 u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns);
237 238
238 if (atomic_read(&perf_sample_allowed_ns) == 0) 239 if (allowed_ns == 0)
239 return; 240 return;
240 241
241 /* decay the counter by 1 average sample */ 242 /* decay the counter by 1 average sample */
@@ -251,7 +252,7 @@ void perf_sample_event_took(u64 sample_len_ns)
251 */ 252 */
252 avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES; 253 avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES;
253 254
254 if (avg_local_sample_len <= atomic_read(&perf_sample_allowed_ns)) 255 if (avg_local_sample_len <= allowed_ns)
255 return; 256 return;
256 257
257 if (max_samples_per_tick <= 1) 258 if (max_samples_per_tick <= 1)
@@ -262,10 +263,9 @@ void perf_sample_event_took(u64 sample_len_ns)
262 perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; 263 perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
263 264
264 printk_ratelimited(KERN_WARNING 265 printk_ratelimited(KERN_WARNING
265 "perf samples too long (%lld > %d), lowering " 266 "perf samples too long (%lld > %lld), lowering "
266 "kernel.perf_event_max_sample_rate to %d\n", 267 "kernel.perf_event_max_sample_rate to %d\n",
267 avg_local_sample_len, 268 avg_local_sample_len, allowed_ns,
268 atomic_read(&perf_sample_allowed_ns),
269 sysctl_perf_event_sample_rate); 269 sysctl_perf_event_sample_rate);
270 270
271 update_perf_cpu_limits(); 271 update_perf_cpu_limits();
@@ -899,6 +899,7 @@ static void unclone_ctx(struct perf_event_context *ctx)
899 put_ctx(ctx->parent_ctx); 899 put_ctx(ctx->parent_ctx);
900 ctx->parent_ctx = NULL; 900 ctx->parent_ctx = NULL;
901 } 901 }
902 ctx->generation++;
902} 903}
903 904
904static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) 905static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
@@ -1136,6 +1137,8 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
1136 ctx->nr_events++; 1137 ctx->nr_events++;
1137 if (event->attr.inherit_stat) 1138 if (event->attr.inherit_stat)
1138 ctx->nr_stat++; 1139 ctx->nr_stat++;
1140
1141 ctx->generation++;
1139} 1142}
1140 1143
1141/* 1144/*
@@ -1201,6 +1204,9 @@ static void perf_event__header_size(struct perf_event *event)
1201 if (sample_type & PERF_SAMPLE_DATA_SRC) 1204 if (sample_type & PERF_SAMPLE_DATA_SRC)
1202 size += sizeof(data->data_src.val); 1205 size += sizeof(data->data_src.val);
1203 1206
1207 if (sample_type & PERF_SAMPLE_TRANSACTION)
1208 size += sizeof(data->txn);
1209
1204 event->header_size = size; 1210 event->header_size = size;
1205} 1211}
1206 1212
@@ -1310,6 +1316,8 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
1310 */ 1316 */
1311 if (event->state > PERF_EVENT_STATE_OFF) 1317 if (event->state > PERF_EVENT_STATE_OFF)
1312 event->state = PERF_EVENT_STATE_OFF; 1318 event->state = PERF_EVENT_STATE_OFF;
1319
1320 ctx->generation++;
1313} 1321}
1314 1322
1315static void perf_group_detach(struct perf_event *event) 1323static void perf_group_detach(struct perf_event *event)
@@ -2146,22 +2154,38 @@ static void ctx_sched_out(struct perf_event_context *ctx,
2146} 2154}
2147 2155
2148/* 2156/*
2149 * Test whether two contexts are equivalent, i.e. whether they 2157 * Test whether two contexts are equivalent, i.e. whether they have both been
2150 * have both been cloned from the same version of the same context 2158 * cloned from the same version of the same context.
2151 * and they both have the same number of enabled events. 2159 *
2152 * If the number of enabled events is the same, then the set 2160 * Equivalence is measured using a generation number in the context that is
2153 * of enabled events should be the same, because these are both 2161 * incremented on each modification to it; see unclone_ctx(), list_add_event()
2154 * inherited contexts, therefore we can't access individual events 2162 * and list_del_event().
2155 * in them directly with an fd; we can only enable/disable all
2156 * events via prctl, or enable/disable all events in a family
2157 * via ioctl, which will have the same effect on both contexts.
2158 */ 2163 */
2159static int context_equiv(struct perf_event_context *ctx1, 2164static int context_equiv(struct perf_event_context *ctx1,
2160 struct perf_event_context *ctx2) 2165 struct perf_event_context *ctx2)
2161{ 2166{
2162 return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx 2167 /* Pinning disables the swap optimization */
2163 && ctx1->parent_gen == ctx2->parent_gen 2168 if (ctx1->pin_count || ctx2->pin_count)
2164 && !ctx1->pin_count && !ctx2->pin_count; 2169 return 0;
2170
2171 /* If ctx1 is the parent of ctx2 */
2172 if (ctx1 == ctx2->parent_ctx && ctx1->generation == ctx2->parent_gen)
2173 return 1;
2174
2175 /* If ctx2 is the parent of ctx1 */
2176 if (ctx1->parent_ctx == ctx2 && ctx1->parent_gen == ctx2->generation)
2177 return 1;
2178
2179 /*
2180 * If ctx1 and ctx2 have the same parent; we flatten the parent
2181 * hierarchy, see perf_event_init_context().
2182 */
2183 if (ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx &&
2184 ctx1->parent_gen == ctx2->parent_gen)
2185 return 1;
2186
2187 /* Unmatched */
2188 return 0;
2165} 2189}
2166 2190
2167static void __perf_event_sync_stat(struct perf_event *event, 2191static void __perf_event_sync_stat(struct perf_event *event,
@@ -2244,7 +2268,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
2244{ 2268{
2245 struct perf_event_context *ctx = task->perf_event_ctxp[ctxn]; 2269 struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
2246 struct perf_event_context *next_ctx; 2270 struct perf_event_context *next_ctx;
2247 struct perf_event_context *parent; 2271 struct perf_event_context *parent, *next_parent;
2248 struct perf_cpu_context *cpuctx; 2272 struct perf_cpu_context *cpuctx;
2249 int do_switch = 1; 2273 int do_switch = 1;
2250 2274
@@ -2256,10 +2280,18 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
2256 return; 2280 return;
2257 2281
2258 rcu_read_lock(); 2282 rcu_read_lock();
2259 parent = rcu_dereference(ctx->parent_ctx);
2260 next_ctx = next->perf_event_ctxp[ctxn]; 2283 next_ctx = next->perf_event_ctxp[ctxn];
2261 if (parent && next_ctx && 2284 if (!next_ctx)
2262 rcu_dereference(next_ctx->parent_ctx) == parent) { 2285 goto unlock;
2286
2287 parent = rcu_dereference(ctx->parent_ctx);
2288 next_parent = rcu_dereference(next_ctx->parent_ctx);
2289
2290 /* If neither context have a parent context; they cannot be clones. */
2291 if (!parent && !next_parent)
2292 goto unlock;
2293
2294 if (next_parent == ctx || next_ctx == parent || next_parent == parent) {
2263 /* 2295 /*
2264 * Looks like the two contexts are clones, so we might be 2296 * Looks like the two contexts are clones, so we might be
2265 * able to optimize the context switch. We lock both 2297 * able to optimize the context switch. We lock both
@@ -2287,6 +2319,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
2287 raw_spin_unlock(&next_ctx->lock); 2319 raw_spin_unlock(&next_ctx->lock);
2288 raw_spin_unlock(&ctx->lock); 2320 raw_spin_unlock(&ctx->lock);
2289 } 2321 }
2322unlock:
2290 rcu_read_unlock(); 2323 rcu_read_unlock();
2291 2324
2292 if (do_switch) { 2325 if (do_switch) {
@@ -4572,6 +4605,9 @@ void perf_output_sample(struct perf_output_handle *handle,
4572 if (sample_type & PERF_SAMPLE_DATA_SRC) 4605 if (sample_type & PERF_SAMPLE_DATA_SRC)
4573 perf_output_put(handle, data->data_src.val); 4606 perf_output_put(handle, data->data_src.val);
4574 4607
4608 if (sample_type & PERF_SAMPLE_TRANSACTION)
4609 perf_output_put(handle, data->txn);
4610
4575 if (!event->attr.watermark) { 4611 if (!event->attr.watermark) {
4576 int wakeup_events = event->attr.wakeup_events; 4612 int wakeup_events = event->attr.wakeup_events;
4577 4613
@@ -5100,24 +5136,23 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
5100 unsigned int size; 5136 unsigned int size;
5101 char tmp[16]; 5137 char tmp[16];
5102 char *buf = NULL; 5138 char *buf = NULL;
5103 const char *name; 5139 char *name;
5104
5105 memset(tmp, 0, sizeof(tmp));
5106 5140
5107 if (file) { 5141 if (file) {
5108 struct inode *inode; 5142 struct inode *inode;
5109 dev_t dev; 5143 dev_t dev;
5110 /* 5144
5111 * d_path works from the end of the rb backwards, so we 5145 buf = kmalloc(PATH_MAX, GFP_KERNEL);
5112 * need to add enough zero bytes after the string to handle
5113 * the 64bit alignment we do later.
5114 */
5115 buf = kzalloc(PATH_MAX + sizeof(u64), GFP_KERNEL);
5116 if (!buf) { 5146 if (!buf) {
5117 name = strncpy(tmp, "//enomem", sizeof(tmp)); 5147 name = strncpy(tmp, "//enomem", sizeof(tmp));
5118 goto got_name; 5148 goto got_name;
5119 } 5149 }
5120 name = d_path(&file->f_path, buf, PATH_MAX); 5150 /*
5151 * d_path() works from the end of the rb backwards, so we
5152 * need to add enough zero bytes after the string to handle
5153 * the 64bit alignment we do later.
5154 */
5155 name = d_path(&file->f_path, buf, PATH_MAX - sizeof(u64));
5121 if (IS_ERR(name)) { 5156 if (IS_ERR(name)) {
5122 name = strncpy(tmp, "//toolong", sizeof(tmp)); 5157 name = strncpy(tmp, "//toolong", sizeof(tmp));
5123 goto got_name; 5158 goto got_name;
@@ -5130,21 +5165,19 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
5130 min = MINOR(dev); 5165 min = MINOR(dev);
5131 5166
5132 } else { 5167 } else {
5133 if (arch_vma_name(mmap_event->vma)) { 5168 name = (char *)arch_vma_name(vma);
5134 name = strncpy(tmp, arch_vma_name(mmap_event->vma), 5169 if (name) {
5135 sizeof(tmp) - 1); 5170 name = strncpy(tmp, name, sizeof(tmp) - 1);
5136 tmp[sizeof(tmp) - 1] = '\0'; 5171 tmp[sizeof(tmp) - 1] = '\0';
5137 goto got_name; 5172 goto got_name;
5138 } 5173 }
5139 5174
5140 if (!vma->vm_mm) { 5175 if (vma->vm_start <= vma->vm_mm->start_brk &&
5141 name = strncpy(tmp, "[vdso]", sizeof(tmp));
5142 goto got_name;
5143 } else if (vma->vm_start <= vma->vm_mm->start_brk &&
5144 vma->vm_end >= vma->vm_mm->brk) { 5176 vma->vm_end >= vma->vm_mm->brk) {
5145 name = strncpy(tmp, "[heap]", sizeof(tmp)); 5177 name = strncpy(tmp, "[heap]", sizeof(tmp));
5146 goto got_name; 5178 goto got_name;
5147 } else if (vma->vm_start <= vma->vm_mm->start_stack && 5179 }
5180 if (vma->vm_start <= vma->vm_mm->start_stack &&
5148 vma->vm_end >= vma->vm_mm->start_stack) { 5181 vma->vm_end >= vma->vm_mm->start_stack) {
5149 name = strncpy(tmp, "[stack]", sizeof(tmp)); 5182 name = strncpy(tmp, "[stack]", sizeof(tmp));
5150 goto got_name; 5183 goto got_name;
@@ -5155,7 +5188,14 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
5155 } 5188 }
5156 5189
5157got_name: 5190got_name:
5158 size = ALIGN(strlen(name)+1, sizeof(u64)); 5191 /*
5192 * Since our buffer works in 8 byte units we need to align our string
5193 * size to a multiple of 8. However, we must guarantee the tail end is
5194 * zero'd out to avoid leaking random bits to userspace.
5195 */
5196 size = strlen(name)+1;
5197 while (!IS_ALIGNED(size, sizeof(u64)))
5198 name[size++] = '\0';
5159 5199
5160 mmap_event->file_name = name; 5200 mmap_event->file_name = name;
5161 mmap_event->file_size = size; 5201 mmap_event->file_size = size;
@@ -7126,7 +7166,6 @@ SYSCALL_DEFINE5(perf_event_open,
7126 } 7166 }
7127 7167
7128 perf_install_in_context(ctx, event, event->cpu); 7168 perf_install_in_context(ctx, event, event->cpu);
7129 ++ctx->generation;
7130 perf_unpin_context(ctx); 7169 perf_unpin_context(ctx);
7131 mutex_unlock(&ctx->mutex); 7170 mutex_unlock(&ctx->mutex);
7132 7171
@@ -7209,7 +7248,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
7209 WARN_ON_ONCE(ctx->parent_ctx); 7248 WARN_ON_ONCE(ctx->parent_ctx);
7210 mutex_lock(&ctx->mutex); 7249 mutex_lock(&ctx->mutex);
7211 perf_install_in_context(ctx, event, cpu); 7250 perf_install_in_context(ctx, event, cpu);
7212 ++ctx->generation;
7213 perf_unpin_context(ctx); 7251 perf_unpin_context(ctx);
7214 mutex_unlock(&ctx->mutex); 7252 mutex_unlock(&ctx->mutex);
7215 7253
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index ad8e1bdca70e..ae9e1d2ef256 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -35,6 +35,7 @@
35#include <linux/kdebug.h> /* notifier mechanism */ 35#include <linux/kdebug.h> /* notifier mechanism */
36#include "../../mm/internal.h" /* munlock_vma_page */ 36#include "../../mm/internal.h" /* munlock_vma_page */
37#include <linux/percpu-rwsem.h> 37#include <linux/percpu-rwsem.h>
38#include <linux/task_work.h>
38 39
39#include <linux/uprobes.h> 40#include <linux/uprobes.h>
40 41
@@ -1096,21 +1097,22 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
1096} 1097}
1097 1098
1098/* Slot allocation for XOL */ 1099/* Slot allocation for XOL */
1099static int xol_add_vma(struct xol_area *area) 1100static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
1100{ 1101{
1101 struct mm_struct *mm = current->mm;
1102 int ret = -EALREADY; 1102 int ret = -EALREADY;
1103 1103
1104 down_write(&mm->mmap_sem); 1104 down_write(&mm->mmap_sem);
1105 if (mm->uprobes_state.xol_area) 1105 if (mm->uprobes_state.xol_area)
1106 goto fail; 1106 goto fail;
1107 1107
1108 ret = -ENOMEM; 1108 if (!area->vaddr) {
1109 /* Try to map as high as possible, this is only a hint. */ 1109 /* Try to map as high as possible, this is only a hint. */
1110 area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); 1110 area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE,
1111 if (area->vaddr & ~PAGE_MASK) { 1111 PAGE_SIZE, 0, 0);
1112 ret = area->vaddr; 1112 if (area->vaddr & ~PAGE_MASK) {
1113 goto fail; 1113 ret = area->vaddr;
1114 goto fail;
1115 }
1114 } 1116 }
1115 1117
1116 ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE, 1118 ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE,
@@ -1120,30 +1122,19 @@ static int xol_add_vma(struct xol_area *area)
1120 1122
1121 smp_wmb(); /* pairs with get_xol_area() */ 1123 smp_wmb(); /* pairs with get_xol_area() */
1122 mm->uprobes_state.xol_area = area; 1124 mm->uprobes_state.xol_area = area;
1123 ret = 0;
1124 fail: 1125 fail:
1125 up_write(&mm->mmap_sem); 1126 up_write(&mm->mmap_sem);
1126 1127
1127 return ret; 1128 return ret;
1128} 1129}
1129 1130
1130/* 1131static struct xol_area *__create_xol_area(unsigned long vaddr)
1131 * get_xol_area - Allocate process's xol_area if necessary.
1132 * This area will be used for storing instructions for execution out of line.
1133 *
1134 * Returns the allocated area or NULL.
1135 */
1136static struct xol_area *get_xol_area(void)
1137{ 1132{
1138 struct mm_struct *mm = current->mm; 1133 struct mm_struct *mm = current->mm;
1139 struct xol_area *area;
1140 uprobe_opcode_t insn = UPROBE_SWBP_INSN; 1134 uprobe_opcode_t insn = UPROBE_SWBP_INSN;
1135 struct xol_area *area;
1141 1136
1142 area = mm->uprobes_state.xol_area; 1137 area = kmalloc(sizeof(*area), GFP_KERNEL);
1143 if (area)
1144 goto ret;
1145
1146 area = kzalloc(sizeof(*area), GFP_KERNEL);
1147 if (unlikely(!area)) 1138 if (unlikely(!area))
1148 goto out; 1139 goto out;
1149 1140
@@ -1155,13 +1146,14 @@ static struct xol_area *get_xol_area(void)
1155 if (!area->page) 1146 if (!area->page)
1156 goto free_bitmap; 1147 goto free_bitmap;
1157 1148
1158 /* allocate first slot of task's xol_area for the return probes */ 1149 area->vaddr = vaddr;
1150 init_waitqueue_head(&area->wq);
1151 /* Reserve the 1st slot for get_trampoline_vaddr() */
1159 set_bit(0, area->bitmap); 1152 set_bit(0, area->bitmap);
1160 copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE);
1161 atomic_set(&area->slot_count, 1); 1153 atomic_set(&area->slot_count, 1);
1162 init_waitqueue_head(&area->wq); 1154 copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE);
1163 1155
1164 if (!xol_add_vma(area)) 1156 if (!xol_add_vma(mm, area))
1165 return area; 1157 return area;
1166 1158
1167 __free_page(area->page); 1159 __free_page(area->page);
@@ -1170,9 +1162,25 @@ static struct xol_area *get_xol_area(void)
1170 free_area: 1162 free_area:
1171 kfree(area); 1163 kfree(area);
1172 out: 1164 out:
1165 return NULL;
1166}
1167
1168/*
1169 * get_xol_area - Allocate process's xol_area if necessary.
1170 * This area will be used for storing instructions for execution out of line.
1171 *
1172 * Returns the allocated area or NULL.
1173 */
1174static struct xol_area *get_xol_area(void)
1175{
1176 struct mm_struct *mm = current->mm;
1177 struct xol_area *area;
1178
1179 if (!mm->uprobes_state.xol_area)
1180 __create_xol_area(0);
1181
1173 area = mm->uprobes_state.xol_area; 1182 area = mm->uprobes_state.xol_area;
1174 ret: 1183 smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */
1175 smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */
1176 return area; 1184 return area;
1177} 1185}
1178 1186
@@ -1345,14 +1353,6 @@ void uprobe_free_utask(struct task_struct *t)
1345} 1353}
1346 1354
1347/* 1355/*
1348 * Called in context of a new clone/fork from copy_process.
1349 */
1350void uprobe_copy_process(struct task_struct *t)
1351{
1352 t->utask = NULL;
1353}
1354
1355/*
1356 * Allocate a uprobe_task object for the task if if necessary. 1356 * Allocate a uprobe_task object for the task if if necessary.
1357 * Called when the thread hits a breakpoint. 1357 * Called when the thread hits a breakpoint.
1358 * 1358 *
@@ -1367,6 +1367,90 @@ static struct uprobe_task *get_utask(void)
1367 return current->utask; 1367 return current->utask;
1368} 1368}
1369 1369
1370static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
1371{
1372 struct uprobe_task *n_utask;
1373 struct return_instance **p, *o, *n;
1374
1375 n_utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL);
1376 if (!n_utask)
1377 return -ENOMEM;
1378 t->utask = n_utask;
1379
1380 p = &n_utask->return_instances;
1381 for (o = o_utask->return_instances; o; o = o->next) {
1382 n = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
1383 if (!n)
1384 return -ENOMEM;
1385
1386 *n = *o;
1387 atomic_inc(&n->uprobe->ref);
1388 n->next = NULL;
1389
1390 *p = n;
1391 p = &n->next;
1392 n_utask->depth++;
1393 }
1394
1395 return 0;
1396}
1397
1398static void uprobe_warn(struct task_struct *t, const char *msg)
1399{
1400 pr_warn("uprobe: %s:%d failed to %s\n",
1401 current->comm, current->pid, msg);
1402}
1403
1404static void dup_xol_work(struct callback_head *work)
1405{
1406 kfree(work);
1407
1408 if (current->flags & PF_EXITING)
1409 return;
1410
1411 if (!__create_xol_area(current->utask->vaddr))
1412 uprobe_warn(current, "dup xol area");
1413}
1414
1415/*
1416 * Called in context of a new clone/fork from copy_process.
1417 */
1418void uprobe_copy_process(struct task_struct *t, unsigned long flags)
1419{
1420 struct uprobe_task *utask = current->utask;
1421 struct mm_struct *mm = current->mm;
1422 struct callback_head *work;
1423 struct xol_area *area;
1424
1425 t->utask = NULL;
1426
1427 if (!utask || !utask->return_instances)
1428 return;
1429
1430 if (mm == t->mm && !(flags & CLONE_VFORK))
1431 return;
1432
1433 if (dup_utask(t, utask))
1434 return uprobe_warn(t, "dup ret instances");
1435
1436 /* The task can fork() after dup_xol_work() fails */
1437 area = mm->uprobes_state.xol_area;
1438 if (!area)
1439 return uprobe_warn(t, "dup xol area");
1440
1441 if (mm == t->mm)
1442 return;
1443
1444 /* TODO: move it into the union in uprobe_task */
1445 work = kmalloc(sizeof(*work), GFP_KERNEL);
1446 if (!work)
1447 return uprobe_warn(t, "dup xol area");
1448
1449 utask->vaddr = area->vaddr;
1450 init_task_work(work, dup_xol_work);
1451 task_work_add(t, work, true);
1452}
1453
1370/* 1454/*
1371 * Current area->vaddr notion assume the trampoline address is always 1455 * Current area->vaddr notion assume the trampoline address is always
1372 * equal area->vaddr. 1456 * equal area->vaddr.