diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/events/core.c | 156 | ||||
-rw-r--r-- | kernel/events/internal.h | 35 | ||||
-rw-r--r-- | kernel/events/ring_buffer.c | 101 | ||||
-rw-r--r-- | kernel/events/uprobes.c | 223 | ||||
-rw-r--r-- | kernel/fork.c | 2 | ||||
-rw-r--r-- | kernel/sysctl.c | 1 |
6 files changed, 314 insertions, 204 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 663f43a20f73..8c875ef6e120 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -175,8 +175,8 @@ int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE; | |||
175 | static int max_samples_per_tick __read_mostly = DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ); | 175 | static int max_samples_per_tick __read_mostly = DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ); |
176 | static int perf_sample_period_ns __read_mostly = DEFAULT_SAMPLE_PERIOD_NS; | 176 | static int perf_sample_period_ns __read_mostly = DEFAULT_SAMPLE_PERIOD_NS; |
177 | 177 | ||
178 | static atomic_t perf_sample_allowed_ns __read_mostly = | 178 | static int perf_sample_allowed_ns __read_mostly = |
179 | ATOMIC_INIT( DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100); | 179 | DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100; |
180 | 180 | ||
181 | void update_perf_cpu_limits(void) | 181 | void update_perf_cpu_limits(void) |
182 | { | 182 | { |
@@ -184,7 +184,7 @@ void update_perf_cpu_limits(void) | |||
184 | 184 | ||
185 | tmp *= sysctl_perf_cpu_time_max_percent; | 185 | tmp *= sysctl_perf_cpu_time_max_percent; |
186 | do_div(tmp, 100); | 186 | do_div(tmp, 100); |
187 | atomic_set(&perf_sample_allowed_ns, tmp); | 187 | ACCESS_ONCE(perf_sample_allowed_ns) = tmp; |
188 | } | 188 | } |
189 | 189 | ||
190 | static int perf_rotate_context(struct perf_cpu_context *cpuctx); | 190 | static int perf_rotate_context(struct perf_cpu_context *cpuctx); |
@@ -193,7 +193,7 @@ int perf_proc_update_handler(struct ctl_table *table, int write, | |||
193 | void __user *buffer, size_t *lenp, | 193 | void __user *buffer, size_t *lenp, |
194 | loff_t *ppos) | 194 | loff_t *ppos) |
195 | { | 195 | { |
196 | int ret = proc_dointvec(table, write, buffer, lenp, ppos); | 196 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
197 | 197 | ||
198 | if (ret || !write) | 198 | if (ret || !write) |
199 | return ret; | 199 | return ret; |
@@ -228,14 +228,15 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, | |||
228 | * we detect that events are taking too long. | 228 | * we detect that events are taking too long. |
229 | */ | 229 | */ |
230 | #define NR_ACCUMULATED_SAMPLES 128 | 230 | #define NR_ACCUMULATED_SAMPLES 128 |
231 | DEFINE_PER_CPU(u64, running_sample_length); | 231 | static DEFINE_PER_CPU(u64, running_sample_length); |
232 | 232 | ||
233 | void perf_sample_event_took(u64 sample_len_ns) | 233 | void perf_sample_event_took(u64 sample_len_ns) |
234 | { | 234 | { |
235 | u64 avg_local_sample_len; | 235 | u64 avg_local_sample_len; |
236 | u64 local_samples_len; | 236 | u64 local_samples_len; |
237 | u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns); | ||
237 | 238 | ||
238 | if (atomic_read(&perf_sample_allowed_ns) == 0) | 239 | if (allowed_ns == 0) |
239 | return; | 240 | return; |
240 | 241 | ||
241 | /* decay the counter by 1 average sample */ | 242 | /* decay the counter by 1 average sample */ |
@@ -251,7 +252,7 @@ void perf_sample_event_took(u64 sample_len_ns) | |||
251 | */ | 252 | */ |
252 | avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES; | 253 | avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES; |
253 | 254 | ||
254 | if (avg_local_sample_len <= atomic_read(&perf_sample_allowed_ns)) | 255 | if (avg_local_sample_len <= allowed_ns) |
255 | return; | 256 | return; |
256 | 257 | ||
257 | if (max_samples_per_tick <= 1) | 258 | if (max_samples_per_tick <= 1) |
@@ -262,10 +263,9 @@ void perf_sample_event_took(u64 sample_len_ns) | |||
262 | perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; | 263 | perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; |
263 | 264 | ||
264 | printk_ratelimited(KERN_WARNING | 265 | printk_ratelimited(KERN_WARNING |
265 | "perf samples too long (%lld > %d), lowering " | 266 | "perf samples too long (%lld > %lld), lowering " |
266 | "kernel.perf_event_max_sample_rate to %d\n", | 267 | "kernel.perf_event_max_sample_rate to %d\n", |
267 | avg_local_sample_len, | 268 | avg_local_sample_len, allowed_ns, |
268 | atomic_read(&perf_sample_allowed_ns), | ||
269 | sysctl_perf_event_sample_rate); | 269 | sysctl_perf_event_sample_rate); |
270 | 270 | ||
271 | update_perf_cpu_limits(); | 271 | update_perf_cpu_limits(); |
@@ -899,6 +899,7 @@ static void unclone_ctx(struct perf_event_context *ctx) | |||
899 | put_ctx(ctx->parent_ctx); | 899 | put_ctx(ctx->parent_ctx); |
900 | ctx->parent_ctx = NULL; | 900 | ctx->parent_ctx = NULL; |
901 | } | 901 | } |
902 | ctx->generation++; | ||
902 | } | 903 | } |
903 | 904 | ||
904 | static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) | 905 | static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) |
@@ -1136,6 +1137,8 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
1136 | ctx->nr_events++; | 1137 | ctx->nr_events++; |
1137 | if (event->attr.inherit_stat) | 1138 | if (event->attr.inherit_stat) |
1138 | ctx->nr_stat++; | 1139 | ctx->nr_stat++; |
1140 | |||
1141 | ctx->generation++; | ||
1139 | } | 1142 | } |
1140 | 1143 | ||
1141 | /* | 1144 | /* |
@@ -1201,6 +1204,9 @@ static void perf_event__header_size(struct perf_event *event) | |||
1201 | if (sample_type & PERF_SAMPLE_DATA_SRC) | 1204 | if (sample_type & PERF_SAMPLE_DATA_SRC) |
1202 | size += sizeof(data->data_src.val); | 1205 | size += sizeof(data->data_src.val); |
1203 | 1206 | ||
1207 | if (sample_type & PERF_SAMPLE_TRANSACTION) | ||
1208 | size += sizeof(data->txn); | ||
1209 | |||
1204 | event->header_size = size; | 1210 | event->header_size = size; |
1205 | } | 1211 | } |
1206 | 1212 | ||
@@ -1310,6 +1316,8 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
1310 | */ | 1316 | */ |
1311 | if (event->state > PERF_EVENT_STATE_OFF) | 1317 | if (event->state > PERF_EVENT_STATE_OFF) |
1312 | event->state = PERF_EVENT_STATE_OFF; | 1318 | event->state = PERF_EVENT_STATE_OFF; |
1319 | |||
1320 | ctx->generation++; | ||
1313 | } | 1321 | } |
1314 | 1322 | ||
1315 | static void perf_group_detach(struct perf_event *event) | 1323 | static void perf_group_detach(struct perf_event *event) |
@@ -2146,22 +2154,38 @@ static void ctx_sched_out(struct perf_event_context *ctx, | |||
2146 | } | 2154 | } |
2147 | 2155 | ||
2148 | /* | 2156 | /* |
2149 | * Test whether two contexts are equivalent, i.e. whether they | 2157 | * Test whether two contexts are equivalent, i.e. whether they have both been |
2150 | * have both been cloned from the same version of the same context | 2158 | * cloned from the same version of the same context. |
2151 | * and they both have the same number of enabled events. | 2159 | * |
2152 | * If the number of enabled events is the same, then the set | 2160 | * Equivalence is measured using a generation number in the context that is |
2153 | * of enabled events should be the same, because these are both | 2161 | * incremented on each modification to it; see unclone_ctx(), list_add_event() |
2154 | * inherited contexts, therefore we can't access individual events | 2162 | * and list_del_event(). |
2155 | * in them directly with an fd; we can only enable/disable all | ||
2156 | * events via prctl, or enable/disable all events in a family | ||
2157 | * via ioctl, which will have the same effect on both contexts. | ||
2158 | */ | 2163 | */ |
2159 | static int context_equiv(struct perf_event_context *ctx1, | 2164 | static int context_equiv(struct perf_event_context *ctx1, |
2160 | struct perf_event_context *ctx2) | 2165 | struct perf_event_context *ctx2) |
2161 | { | 2166 | { |
2162 | return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx | 2167 | /* Pinning disables the swap optimization */ |
2163 | && ctx1->parent_gen == ctx2->parent_gen | 2168 | if (ctx1->pin_count || ctx2->pin_count) |
2164 | && !ctx1->pin_count && !ctx2->pin_count; | 2169 | return 0; |
2170 | |||
2171 | /* If ctx1 is the parent of ctx2 */ | ||
2172 | if (ctx1 == ctx2->parent_ctx && ctx1->generation == ctx2->parent_gen) | ||
2173 | return 1; | ||
2174 | |||
2175 | /* If ctx2 is the parent of ctx1 */ | ||
2176 | if (ctx1->parent_ctx == ctx2 && ctx1->parent_gen == ctx2->generation) | ||
2177 | return 1; | ||
2178 | |||
2179 | /* | ||
2180 | * If ctx1 and ctx2 have the same parent; we flatten the parent | ||
2181 | * hierarchy, see perf_event_init_context(). | ||
2182 | */ | ||
2183 | if (ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx && | ||
2184 | ctx1->parent_gen == ctx2->parent_gen) | ||
2185 | return 1; | ||
2186 | |||
2187 | /* Unmatched */ | ||
2188 | return 0; | ||
2165 | } | 2189 | } |
2166 | 2190 | ||
2167 | static void __perf_event_sync_stat(struct perf_event *event, | 2191 | static void __perf_event_sync_stat(struct perf_event *event, |
@@ -2244,7 +2268,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, | |||
2244 | { | 2268 | { |
2245 | struct perf_event_context *ctx = task->perf_event_ctxp[ctxn]; | 2269 | struct perf_event_context *ctx = task->perf_event_ctxp[ctxn]; |
2246 | struct perf_event_context *next_ctx; | 2270 | struct perf_event_context *next_ctx; |
2247 | struct perf_event_context *parent; | 2271 | struct perf_event_context *parent, *next_parent; |
2248 | struct perf_cpu_context *cpuctx; | 2272 | struct perf_cpu_context *cpuctx; |
2249 | int do_switch = 1; | 2273 | int do_switch = 1; |
2250 | 2274 | ||
@@ -2256,10 +2280,18 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, | |||
2256 | return; | 2280 | return; |
2257 | 2281 | ||
2258 | rcu_read_lock(); | 2282 | rcu_read_lock(); |
2259 | parent = rcu_dereference(ctx->parent_ctx); | ||
2260 | next_ctx = next->perf_event_ctxp[ctxn]; | 2283 | next_ctx = next->perf_event_ctxp[ctxn]; |
2261 | if (parent && next_ctx && | 2284 | if (!next_ctx) |
2262 | rcu_dereference(next_ctx->parent_ctx) == parent) { | 2285 | goto unlock; |
2286 | |||
2287 | parent = rcu_dereference(ctx->parent_ctx); | ||
2288 | next_parent = rcu_dereference(next_ctx->parent_ctx); | ||
2289 | |||
2290 | /* If neither context have a parent context; they cannot be clones. */ | ||
2291 | if (!parent && !next_parent) | ||
2292 | goto unlock; | ||
2293 | |||
2294 | if (next_parent == ctx || next_ctx == parent || next_parent == parent) { | ||
2263 | /* | 2295 | /* |
2264 | * Looks like the two contexts are clones, so we might be | 2296 | * Looks like the two contexts are clones, so we might be |
2265 | * able to optimize the context switch. We lock both | 2297 | * able to optimize the context switch. We lock both |
@@ -2287,6 +2319,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, | |||
2287 | raw_spin_unlock(&next_ctx->lock); | 2319 | raw_spin_unlock(&next_ctx->lock); |
2288 | raw_spin_unlock(&ctx->lock); | 2320 | raw_spin_unlock(&ctx->lock); |
2289 | } | 2321 | } |
2322 | unlock: | ||
2290 | rcu_read_unlock(); | 2323 | rcu_read_unlock(); |
2291 | 2324 | ||
2292 | if (do_switch) { | 2325 | if (do_switch) { |
@@ -4572,6 +4605,9 @@ void perf_output_sample(struct perf_output_handle *handle, | |||
4572 | if (sample_type & PERF_SAMPLE_DATA_SRC) | 4605 | if (sample_type & PERF_SAMPLE_DATA_SRC) |
4573 | perf_output_put(handle, data->data_src.val); | 4606 | perf_output_put(handle, data->data_src.val); |
4574 | 4607 | ||
4608 | if (sample_type & PERF_SAMPLE_TRANSACTION) | ||
4609 | perf_output_put(handle, data->txn); | ||
4610 | |||
4575 | if (!event->attr.watermark) { | 4611 | if (!event->attr.watermark) { |
4576 | int wakeup_events = event->attr.wakeup_events; | 4612 | int wakeup_events = event->attr.wakeup_events; |
4577 | 4613 | ||
@@ -5100,27 +5136,26 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) | |||
5100 | unsigned int size; | 5136 | unsigned int size; |
5101 | char tmp[16]; | 5137 | char tmp[16]; |
5102 | char *buf = NULL; | 5138 | char *buf = NULL; |
5103 | const char *name; | 5139 | char *name; |
5104 | |||
5105 | memset(tmp, 0, sizeof(tmp)); | ||
5106 | 5140 | ||
5107 | if (file) { | 5141 | if (file) { |
5108 | struct inode *inode; | 5142 | struct inode *inode; |
5109 | dev_t dev; | 5143 | dev_t dev; |
5144 | |||
5145 | buf = kmalloc(PATH_MAX, GFP_KERNEL); | ||
5146 | if (!buf) { | ||
5147 | name = "//enomem"; | ||
5148 | goto cpy_name; | ||
5149 | } | ||
5110 | /* | 5150 | /* |
5111 | * d_path works from the end of the rb backwards, so we | 5151 | * d_path() works from the end of the rb backwards, so we |
5112 | * need to add enough zero bytes after the string to handle | 5152 | * need to add enough zero bytes after the string to handle |
5113 | * the 64bit alignment we do later. | 5153 | * the 64bit alignment we do later. |
5114 | */ | 5154 | */ |
5115 | buf = kzalloc(PATH_MAX + sizeof(u64), GFP_KERNEL); | 5155 | name = d_path(&file->f_path, buf, PATH_MAX - sizeof(u64)); |
5116 | if (!buf) { | ||
5117 | name = strncpy(tmp, "//enomem", sizeof(tmp)); | ||
5118 | goto got_name; | ||
5119 | } | ||
5120 | name = d_path(&file->f_path, buf, PATH_MAX); | ||
5121 | if (IS_ERR(name)) { | 5156 | if (IS_ERR(name)) { |
5122 | name = strncpy(tmp, "//toolong", sizeof(tmp)); | 5157 | name = "//toolong"; |
5123 | goto got_name; | 5158 | goto cpy_name; |
5124 | } | 5159 | } |
5125 | inode = file_inode(vma->vm_file); | 5160 | inode = file_inode(vma->vm_file); |
5126 | dev = inode->i_sb->s_dev; | 5161 | dev = inode->i_sb->s_dev; |
@@ -5128,34 +5163,39 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) | |||
5128 | gen = inode->i_generation; | 5163 | gen = inode->i_generation; |
5129 | maj = MAJOR(dev); | 5164 | maj = MAJOR(dev); |
5130 | min = MINOR(dev); | 5165 | min = MINOR(dev); |
5131 | 5166 | goto got_name; | |
5132 | } else { | 5167 | } else { |
5133 | if (arch_vma_name(mmap_event->vma)) { | 5168 | name = (char *)arch_vma_name(vma); |
5134 | name = strncpy(tmp, arch_vma_name(mmap_event->vma), | 5169 | if (name) |
5135 | sizeof(tmp) - 1); | 5170 | goto cpy_name; |
5136 | tmp[sizeof(tmp) - 1] = '\0'; | ||
5137 | goto got_name; | ||
5138 | } | ||
5139 | 5171 | ||
5140 | if (!vma->vm_mm) { | 5172 | if (vma->vm_start <= vma->vm_mm->start_brk && |
5141 | name = strncpy(tmp, "[vdso]", sizeof(tmp)); | ||
5142 | goto got_name; | ||
5143 | } else if (vma->vm_start <= vma->vm_mm->start_brk && | ||
5144 | vma->vm_end >= vma->vm_mm->brk) { | 5173 | vma->vm_end >= vma->vm_mm->brk) { |
5145 | name = strncpy(tmp, "[heap]", sizeof(tmp)); | 5174 | name = "[heap]"; |
5146 | goto got_name; | 5175 | goto cpy_name; |
5147 | } else if (vma->vm_start <= vma->vm_mm->start_stack && | 5176 | } |
5177 | if (vma->vm_start <= vma->vm_mm->start_stack && | ||
5148 | vma->vm_end >= vma->vm_mm->start_stack) { | 5178 | vma->vm_end >= vma->vm_mm->start_stack) { |
5149 | name = strncpy(tmp, "[stack]", sizeof(tmp)); | 5179 | name = "[stack]"; |
5150 | goto got_name; | 5180 | goto cpy_name; |
5151 | } | 5181 | } |
5152 | 5182 | ||
5153 | name = strncpy(tmp, "//anon", sizeof(tmp)); | 5183 | name = "//anon"; |
5154 | goto got_name; | 5184 | goto cpy_name; |
5155 | } | 5185 | } |
5156 | 5186 | ||
5187 | cpy_name: | ||
5188 | strlcpy(tmp, name, sizeof(tmp)); | ||
5189 | name = tmp; | ||
5157 | got_name: | 5190 | got_name: |
5158 | size = ALIGN(strlen(name)+1, sizeof(u64)); | 5191 | /* |
5192 | * Since our buffer works in 8 byte units we need to align our string | ||
5193 | * size to a multiple of 8. However, we must guarantee the tail end is | ||
5194 | * zero'd out to avoid leaking random bits to userspace. | ||
5195 | */ | ||
5196 | size = strlen(name)+1; | ||
5197 | while (!IS_ALIGNED(size, sizeof(u64))) | ||
5198 | name[size++] = '\0'; | ||
5159 | 5199 | ||
5160 | mmap_event->file_name = name; | 5200 | mmap_event->file_name = name; |
5161 | mmap_event->file_size = size; | 5201 | mmap_event->file_size = size; |
@@ -7129,7 +7169,6 @@ SYSCALL_DEFINE5(perf_event_open, | |||
7129 | } | 7169 | } |
7130 | 7170 | ||
7131 | perf_install_in_context(ctx, event, event->cpu); | 7171 | perf_install_in_context(ctx, event, event->cpu); |
7132 | ++ctx->generation; | ||
7133 | perf_unpin_context(ctx); | 7172 | perf_unpin_context(ctx); |
7134 | mutex_unlock(&ctx->mutex); | 7173 | mutex_unlock(&ctx->mutex); |
7135 | 7174 | ||
@@ -7212,7 +7251,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
7212 | WARN_ON_ONCE(ctx->parent_ctx); | 7251 | WARN_ON_ONCE(ctx->parent_ctx); |
7213 | mutex_lock(&ctx->mutex); | 7252 | mutex_lock(&ctx->mutex); |
7214 | perf_install_in_context(ctx, event, cpu); | 7253 | perf_install_in_context(ctx, event, cpu); |
7215 | ++ctx->generation; | ||
7216 | perf_unpin_context(ctx); | 7254 | perf_unpin_context(ctx); |
7217 | mutex_unlock(&ctx->mutex); | 7255 | mutex_unlock(&ctx->mutex); |
7218 | 7256 | ||
diff --git a/kernel/events/internal.h b/kernel/events/internal.h index ca6599723be5..569b218782ad 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h | |||
@@ -82,16 +82,16 @@ static inline unsigned long perf_data_size(struct ring_buffer *rb) | |||
82 | } | 82 | } |
83 | 83 | ||
84 | #define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \ | 84 | #define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \ |
85 | static inline unsigned int \ | 85 | static inline unsigned long \ |
86 | func_name(struct perf_output_handle *handle, \ | 86 | func_name(struct perf_output_handle *handle, \ |
87 | const void *buf, unsigned int len) \ | 87 | const void *buf, unsigned long len) \ |
88 | { \ | 88 | { \ |
89 | unsigned long size, written; \ | 89 | unsigned long size, written; \ |
90 | \ | 90 | \ |
91 | do { \ | 91 | do { \ |
92 | size = min_t(unsigned long, handle->size, len); \ | 92 | size = min(handle->size, len); \ |
93 | \ | ||
94 | written = memcpy_func(handle->addr, buf, size); \ | 93 | written = memcpy_func(handle->addr, buf, size); \ |
94 | written = size - written; \ | ||
95 | \ | 95 | \ |
96 | len -= written; \ | 96 | len -= written; \ |
97 | handle->addr += written; \ | 97 | handle->addr += written; \ |
@@ -110,20 +110,37 @@ func_name(struct perf_output_handle *handle, \ | |||
110 | return len; \ | 110 | return len; \ |
111 | } | 111 | } |
112 | 112 | ||
113 | static inline int memcpy_common(void *dst, const void *src, size_t n) | 113 | static inline unsigned long |
114 | memcpy_common(void *dst, const void *src, unsigned long n) | ||
114 | { | 115 | { |
115 | memcpy(dst, src, n); | 116 | memcpy(dst, src, n); |
116 | return n; | 117 | return 0; |
117 | } | 118 | } |
118 | 119 | ||
119 | DEFINE_OUTPUT_COPY(__output_copy, memcpy_common) | 120 | DEFINE_OUTPUT_COPY(__output_copy, memcpy_common) |
120 | 121 | ||
121 | #define MEMCPY_SKIP(dst, src, n) (n) | 122 | static inline unsigned long |
123 | memcpy_skip(void *dst, const void *src, unsigned long n) | ||
124 | { | ||
125 | return 0; | ||
126 | } | ||
122 | 127 | ||
123 | DEFINE_OUTPUT_COPY(__output_skip, MEMCPY_SKIP) | 128 | DEFINE_OUTPUT_COPY(__output_skip, memcpy_skip) |
124 | 129 | ||
125 | #ifndef arch_perf_out_copy_user | 130 | #ifndef arch_perf_out_copy_user |
126 | #define arch_perf_out_copy_user __copy_from_user_inatomic | 131 | #define arch_perf_out_copy_user arch_perf_out_copy_user |
132 | |||
133 | static inline unsigned long | ||
134 | arch_perf_out_copy_user(void *dst, const void *src, unsigned long n) | ||
135 | { | ||
136 | unsigned long ret; | ||
137 | |||
138 | pagefault_disable(); | ||
139 | ret = __copy_from_user_inatomic(dst, src, n); | ||
140 | pagefault_enable(); | ||
141 | |||
142 | return ret; | ||
143 | } | ||
127 | #endif | 144 | #endif |
128 | 145 | ||
129 | DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user) | 146 | DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user) |
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 9c2ddfbf4525..e8b168af135b 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c | |||
@@ -12,40 +12,10 @@ | |||
12 | #include <linux/perf_event.h> | 12 | #include <linux/perf_event.h> |
13 | #include <linux/vmalloc.h> | 13 | #include <linux/vmalloc.h> |
14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
15 | #include <linux/circ_buf.h> | ||
15 | 16 | ||
16 | #include "internal.h" | 17 | #include "internal.h" |
17 | 18 | ||
18 | static bool perf_output_space(struct ring_buffer *rb, unsigned long tail, | ||
19 | unsigned long offset, unsigned long head) | ||
20 | { | ||
21 | unsigned long sz = perf_data_size(rb); | ||
22 | unsigned long mask = sz - 1; | ||
23 | |||
24 | /* | ||
25 | * check if user-writable | ||
26 | * overwrite : over-write its own tail | ||
27 | * !overwrite: buffer possibly drops events. | ||
28 | */ | ||
29 | if (rb->overwrite) | ||
30 | return true; | ||
31 | |||
32 | /* | ||
33 | * verify that payload is not bigger than buffer | ||
34 | * otherwise masking logic may fail to detect | ||
35 | * the "not enough space" condition | ||
36 | */ | ||
37 | if ((head - offset) > sz) | ||
38 | return false; | ||
39 | |||
40 | offset = (offset - tail) & mask; | ||
41 | head = (head - tail) & mask; | ||
42 | |||
43 | if ((int)(head - offset) < 0) | ||
44 | return false; | ||
45 | |||
46 | return true; | ||
47 | } | ||
48 | |||
49 | static void perf_output_wakeup(struct perf_output_handle *handle) | 19 | static void perf_output_wakeup(struct perf_output_handle *handle) |
50 | { | 20 | { |
51 | atomic_set(&handle->rb->poll, POLL_IN); | 21 | atomic_set(&handle->rb->poll, POLL_IN); |
@@ -115,8 +85,8 @@ again: | |||
115 | rb->user_page->data_head = head; | 85 | rb->user_page->data_head = head; |
116 | 86 | ||
117 | /* | 87 | /* |
118 | * Now check if we missed an update, rely on the (compiler) | 88 | * Now check if we missed an update -- rely on previous implied |
119 | * barrier in atomic_dec_and_test() to re-read rb->head. | 89 | * compiler barriers to force a re-read. |
120 | */ | 90 | */ |
121 | if (unlikely(head != local_read(&rb->head))) { | 91 | if (unlikely(head != local_read(&rb->head))) { |
122 | local_inc(&rb->nest); | 92 | local_inc(&rb->nest); |
@@ -135,8 +105,7 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
135 | { | 105 | { |
136 | struct ring_buffer *rb; | 106 | struct ring_buffer *rb; |
137 | unsigned long tail, offset, head; | 107 | unsigned long tail, offset, head; |
138 | int have_lost; | 108 | int have_lost, page_shift; |
139 | struct perf_sample_data sample_data; | ||
140 | struct { | 109 | struct { |
141 | struct perf_event_header header; | 110 | struct perf_event_header header; |
142 | u64 id; | 111 | u64 id; |
@@ -151,57 +120,63 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
151 | event = event->parent; | 120 | event = event->parent; |
152 | 121 | ||
153 | rb = rcu_dereference(event->rb); | 122 | rb = rcu_dereference(event->rb); |
154 | if (!rb) | 123 | if (unlikely(!rb)) |
155 | goto out; | 124 | goto out; |
156 | 125 | ||
157 | handle->rb = rb; | 126 | if (unlikely(!rb->nr_pages)) |
158 | handle->event = event; | ||
159 | |||
160 | if (!rb->nr_pages) | ||
161 | goto out; | 127 | goto out; |
162 | 128 | ||
129 | handle->rb = rb; | ||
130 | handle->event = event; | ||
131 | |||
163 | have_lost = local_read(&rb->lost); | 132 | have_lost = local_read(&rb->lost); |
164 | if (have_lost) { | 133 | if (unlikely(have_lost)) { |
165 | lost_event.header.size = sizeof(lost_event); | 134 | size += sizeof(lost_event); |
166 | perf_event_header__init_id(&lost_event.header, &sample_data, | 135 | if (event->attr.sample_id_all) |
167 | event); | 136 | size += event->id_header_size; |
168 | size += lost_event.header.size; | ||
169 | } | 137 | } |
170 | 138 | ||
171 | perf_output_get_handle(handle); | 139 | perf_output_get_handle(handle); |
172 | 140 | ||
173 | do { | 141 | do { |
174 | /* | ||
175 | * Userspace could choose to issue a mb() before updating the | ||
176 | * tail pointer. So that all reads will be completed before the | ||
177 | * write is issued. | ||
178 | * | ||
179 | * See perf_output_put_handle(). | ||
180 | */ | ||
181 | tail = ACCESS_ONCE(rb->user_page->data_tail); | 142 | tail = ACCESS_ONCE(rb->user_page->data_tail); |
182 | smp_mb(); | ||
183 | offset = head = local_read(&rb->head); | 143 | offset = head = local_read(&rb->head); |
184 | head += size; | 144 | if (!rb->overwrite && |
185 | if (unlikely(!perf_output_space(rb, tail, offset, head))) | 145 | unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size)) |
186 | goto fail; | 146 | goto fail; |
147 | head += size; | ||
187 | } while (local_cmpxchg(&rb->head, offset, head) != offset); | 148 | } while (local_cmpxchg(&rb->head, offset, head) != offset); |
188 | 149 | ||
189 | if (head - local_read(&rb->wakeup) > rb->watermark) | 150 | /* |
151 | * Separate the userpage->tail read from the data stores below. | ||
152 | * Matches the MB userspace SHOULD issue after reading the data | ||
153 | * and before storing the new tail position. | ||
154 | * | ||
155 | * See perf_output_put_handle(). | ||
156 | */ | ||
157 | smp_mb(); | ||
158 | |||
159 | if (unlikely(head - local_read(&rb->wakeup) > rb->watermark)) | ||
190 | local_add(rb->watermark, &rb->wakeup); | 160 | local_add(rb->watermark, &rb->wakeup); |
191 | 161 | ||
192 | handle->page = offset >> (PAGE_SHIFT + page_order(rb)); | 162 | page_shift = PAGE_SHIFT + page_order(rb); |
193 | handle->page &= rb->nr_pages - 1; | ||
194 | handle->size = offset & ((PAGE_SIZE << page_order(rb)) - 1); | ||
195 | handle->addr = rb->data_pages[handle->page]; | ||
196 | handle->addr += handle->size; | ||
197 | handle->size = (PAGE_SIZE << page_order(rb)) - handle->size; | ||
198 | 163 | ||
199 | if (have_lost) { | 164 | handle->page = (offset >> page_shift) & (rb->nr_pages - 1); |
165 | offset &= (1UL << page_shift) - 1; | ||
166 | handle->addr = rb->data_pages[handle->page] + offset; | ||
167 | handle->size = (1UL << page_shift) - offset; | ||
168 | |||
169 | if (unlikely(have_lost)) { | ||
170 | struct perf_sample_data sample_data; | ||
171 | |||
172 | lost_event.header.size = sizeof(lost_event); | ||
200 | lost_event.header.type = PERF_RECORD_LOST; | 173 | lost_event.header.type = PERF_RECORD_LOST; |
201 | lost_event.header.misc = 0; | 174 | lost_event.header.misc = 0; |
202 | lost_event.id = event->id; | 175 | lost_event.id = event->id; |
203 | lost_event.lost = local_xchg(&rb->lost, 0); | 176 | lost_event.lost = local_xchg(&rb->lost, 0); |
204 | 177 | ||
178 | perf_event_header__init_id(&lost_event.header, | ||
179 | &sample_data, event); | ||
205 | perf_output_put(handle, lost_event); | 180 | perf_output_put(handle, lost_event); |
206 | perf_event__output_id_sample(event, handle, &sample_data); | 181 | perf_event__output_id_sample(event, handle, &sample_data); |
207 | } | 182 | } |
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ad8e1bdca70e..24b7d6ca871b 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/kdebug.h> /* notifier mechanism */ | 35 | #include <linux/kdebug.h> /* notifier mechanism */ |
36 | #include "../../mm/internal.h" /* munlock_vma_page */ | 36 | #include "../../mm/internal.h" /* munlock_vma_page */ |
37 | #include <linux/percpu-rwsem.h> | 37 | #include <linux/percpu-rwsem.h> |
38 | #include <linux/task_work.h> | ||
38 | 39 | ||
39 | #include <linux/uprobes.h> | 40 | #include <linux/uprobes.h> |
40 | 41 | ||
@@ -244,12 +245,12 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t | |||
244 | * the architecture. If an arch has variable length instruction and the | 245 | * the architecture. If an arch has variable length instruction and the |
245 | * breakpoint instruction is not of the smallest length instruction | 246 | * breakpoint instruction is not of the smallest length instruction |
246 | * supported by that architecture then we need to modify is_trap_at_addr and | 247 | * supported by that architecture then we need to modify is_trap_at_addr and |
247 | * write_opcode accordingly. This would never be a problem for archs that | 248 | * uprobe_write_opcode accordingly. This would never be a problem for archs |
248 | * have fixed length instructions. | 249 | * that have fixed length instructions. |
249 | */ | 250 | */ |
250 | 251 | ||
251 | /* | 252 | /* |
252 | * write_opcode - write the opcode at a given virtual address. | 253 | * uprobe_write_opcode - write the opcode at a given virtual address. |
253 | * @mm: the probed process address space. | 254 | * @mm: the probed process address space. |
254 | * @vaddr: the virtual address to store the opcode. | 255 | * @vaddr: the virtual address to store the opcode. |
255 | * @opcode: opcode to be written at @vaddr. | 256 | * @opcode: opcode to be written at @vaddr. |
@@ -260,7 +261,7 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t | |||
260 | * For mm @mm, write the opcode at @vaddr. | 261 | * For mm @mm, write the opcode at @vaddr. |
261 | * Return 0 (success) or a negative errno. | 262 | * Return 0 (success) or a negative errno. |
262 | */ | 263 | */ |
263 | static int write_opcode(struct mm_struct *mm, unsigned long vaddr, | 264 | int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, |
264 | uprobe_opcode_t opcode) | 265 | uprobe_opcode_t opcode) |
265 | { | 266 | { |
266 | struct page *old_page, *new_page; | 267 | struct page *old_page, *new_page; |
@@ -314,7 +315,7 @@ put_old: | |||
314 | */ | 315 | */ |
315 | int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) | 316 | int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) |
316 | { | 317 | { |
317 | return write_opcode(mm, vaddr, UPROBE_SWBP_INSN); | 318 | return uprobe_write_opcode(mm, vaddr, UPROBE_SWBP_INSN); |
318 | } | 319 | } |
319 | 320 | ||
320 | /** | 321 | /** |
@@ -329,7 +330,7 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned | |||
329 | int __weak | 330 | int __weak |
330 | set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) | 331 | set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) |
331 | { | 332 | { |
332 | return write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); | 333 | return uprobe_write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); |
333 | } | 334 | } |
334 | 335 | ||
335 | static int match_uprobe(struct uprobe *l, struct uprobe *r) | 336 | static int match_uprobe(struct uprobe *l, struct uprobe *r) |
@@ -503,9 +504,8 @@ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc) | |||
503 | return ret; | 504 | return ret; |
504 | } | 505 | } |
505 | 506 | ||
506 | static int | 507 | static int __copy_insn(struct address_space *mapping, struct file *filp, |
507 | __copy_insn(struct address_space *mapping, struct file *filp, char *insn, | 508 | void *insn, int nbytes, loff_t offset) |
508 | unsigned long nbytes, loff_t offset) | ||
509 | { | 509 | { |
510 | struct page *page; | 510 | struct page *page; |
511 | 511 | ||
@@ -527,28 +527,28 @@ __copy_insn(struct address_space *mapping, struct file *filp, char *insn, | |||
527 | 527 | ||
528 | static int copy_insn(struct uprobe *uprobe, struct file *filp) | 528 | static int copy_insn(struct uprobe *uprobe, struct file *filp) |
529 | { | 529 | { |
530 | struct address_space *mapping; | 530 | struct address_space *mapping = uprobe->inode->i_mapping; |
531 | unsigned long nbytes; | 531 | loff_t offs = uprobe->offset; |
532 | int bytes; | 532 | void *insn = uprobe->arch.insn; |
533 | 533 | int size = MAX_UINSN_BYTES; | |
534 | nbytes = PAGE_SIZE - (uprobe->offset & ~PAGE_MASK); | 534 | int len, err = -EIO; |
535 | mapping = uprobe->inode->i_mapping; | ||
536 | 535 | ||
537 | /* Instruction at end of binary; copy only available bytes */ | 536 | /* Copy only available bytes, -EIO if nothing was read */ |
538 | if (uprobe->offset + MAX_UINSN_BYTES > uprobe->inode->i_size) | 537 | do { |
539 | bytes = uprobe->inode->i_size - uprobe->offset; | 538 | if (offs >= i_size_read(uprobe->inode)) |
540 | else | 539 | break; |
541 | bytes = MAX_UINSN_BYTES; | ||
542 | 540 | ||
543 | /* Instruction at the page-boundary; copy bytes in second page */ | 541 | len = min_t(int, size, PAGE_SIZE - (offs & ~PAGE_MASK)); |
544 | if (nbytes < bytes) { | 542 | err = __copy_insn(mapping, filp, insn, len, offs); |
545 | int err = __copy_insn(mapping, filp, uprobe->arch.insn + nbytes, | ||
546 | bytes - nbytes, uprobe->offset + nbytes); | ||
547 | if (err) | 543 | if (err) |
548 | return err; | 544 | break; |
549 | bytes = nbytes; | 545 | |
550 | } | 546 | insn += len; |
551 | return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset); | 547 | offs += len; |
548 | size -= len; | ||
549 | } while (size); | ||
550 | |||
551 | return err; | ||
552 | } | 552 | } |
553 | 553 | ||
554 | static int prepare_uprobe(struct uprobe *uprobe, struct file *file, | 554 | static int prepare_uprobe(struct uprobe *uprobe, struct file *file, |
@@ -576,7 +576,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, | |||
576 | if (ret) | 576 | if (ret) |
577 | goto out; | 577 | goto out; |
578 | 578 | ||
579 | /* write_opcode() assumes we don't cross page boundary */ | 579 | /* uprobe_write_opcode() assumes we don't cross page boundary */ |
580 | BUG_ON((uprobe->offset & ~PAGE_MASK) + | 580 | BUG_ON((uprobe->offset & ~PAGE_MASK) + |
581 | UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); | 581 | UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); |
582 | 582 | ||
@@ -1096,21 +1096,22 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon | |||
1096 | } | 1096 | } |
1097 | 1097 | ||
1098 | /* Slot allocation for XOL */ | 1098 | /* Slot allocation for XOL */ |
1099 | static int xol_add_vma(struct xol_area *area) | 1099 | static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) |
1100 | { | 1100 | { |
1101 | struct mm_struct *mm = current->mm; | ||
1102 | int ret = -EALREADY; | 1101 | int ret = -EALREADY; |
1103 | 1102 | ||
1104 | down_write(&mm->mmap_sem); | 1103 | down_write(&mm->mmap_sem); |
1105 | if (mm->uprobes_state.xol_area) | 1104 | if (mm->uprobes_state.xol_area) |
1106 | goto fail; | 1105 | goto fail; |
1107 | 1106 | ||
1108 | ret = -ENOMEM; | 1107 | if (!area->vaddr) { |
1109 | /* Try to map as high as possible, this is only a hint. */ | 1108 | /* Try to map as high as possible, this is only a hint. */ |
1110 | area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); | 1109 | area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, |
1111 | if (area->vaddr & ~PAGE_MASK) { | 1110 | PAGE_SIZE, 0, 0); |
1112 | ret = area->vaddr; | 1111 | if (area->vaddr & ~PAGE_MASK) { |
1113 | goto fail; | 1112 | ret = area->vaddr; |
1113 | goto fail; | ||
1114 | } | ||
1114 | } | 1115 | } |
1115 | 1116 | ||
1116 | ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE, | 1117 | ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE, |
@@ -1120,30 +1121,19 @@ static int xol_add_vma(struct xol_area *area) | |||
1120 | 1121 | ||
1121 | smp_wmb(); /* pairs with get_xol_area() */ | 1122 | smp_wmb(); /* pairs with get_xol_area() */ |
1122 | mm->uprobes_state.xol_area = area; | 1123 | mm->uprobes_state.xol_area = area; |
1123 | ret = 0; | ||
1124 | fail: | 1124 | fail: |
1125 | up_write(&mm->mmap_sem); | 1125 | up_write(&mm->mmap_sem); |
1126 | 1126 | ||
1127 | return ret; | 1127 | return ret; |
1128 | } | 1128 | } |
1129 | 1129 | ||
1130 | /* | 1130 | static struct xol_area *__create_xol_area(unsigned long vaddr) |
1131 | * get_xol_area - Allocate process's xol_area if necessary. | ||
1132 | * This area will be used for storing instructions for execution out of line. | ||
1133 | * | ||
1134 | * Returns the allocated area or NULL. | ||
1135 | */ | ||
1136 | static struct xol_area *get_xol_area(void) | ||
1137 | { | 1131 | { |
1138 | struct mm_struct *mm = current->mm; | 1132 | struct mm_struct *mm = current->mm; |
1139 | struct xol_area *area; | ||
1140 | uprobe_opcode_t insn = UPROBE_SWBP_INSN; | 1133 | uprobe_opcode_t insn = UPROBE_SWBP_INSN; |
1134 | struct xol_area *area; | ||
1141 | 1135 | ||
1142 | area = mm->uprobes_state.xol_area; | 1136 | area = kmalloc(sizeof(*area), GFP_KERNEL); |
1143 | if (area) | ||
1144 | goto ret; | ||
1145 | |||
1146 | area = kzalloc(sizeof(*area), GFP_KERNEL); | ||
1147 | if (unlikely(!area)) | 1137 | if (unlikely(!area)) |
1148 | goto out; | 1138 | goto out; |
1149 | 1139 | ||
@@ -1155,13 +1145,14 @@ static struct xol_area *get_xol_area(void) | |||
1155 | if (!area->page) | 1145 | if (!area->page) |
1156 | goto free_bitmap; | 1146 | goto free_bitmap; |
1157 | 1147 | ||
1158 | /* allocate first slot of task's xol_area for the return probes */ | 1148 | area->vaddr = vaddr; |
1149 | init_waitqueue_head(&area->wq); | ||
1150 | /* Reserve the 1st slot for get_trampoline_vaddr() */ | ||
1159 | set_bit(0, area->bitmap); | 1151 | set_bit(0, area->bitmap); |
1160 | copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE); | ||
1161 | atomic_set(&area->slot_count, 1); | 1152 | atomic_set(&area->slot_count, 1); |
1162 | init_waitqueue_head(&area->wq); | 1153 | copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE); |
1163 | 1154 | ||
1164 | if (!xol_add_vma(area)) | 1155 | if (!xol_add_vma(mm, area)) |
1165 | return area; | 1156 | return area; |
1166 | 1157 | ||
1167 | __free_page(area->page); | 1158 | __free_page(area->page); |
@@ -1170,9 +1161,25 @@ static struct xol_area *get_xol_area(void) | |||
1170 | free_area: | 1161 | free_area: |
1171 | kfree(area); | 1162 | kfree(area); |
1172 | out: | 1163 | out: |
1164 | return NULL; | ||
1165 | } | ||
1166 | |||
1167 | /* | ||
1168 | * get_xol_area - Allocate process's xol_area if necessary. | ||
1169 | * This area will be used for storing instructions for execution out of line. | ||
1170 | * | ||
1171 | * Returns the allocated area or NULL. | ||
1172 | */ | ||
1173 | static struct xol_area *get_xol_area(void) | ||
1174 | { | ||
1175 | struct mm_struct *mm = current->mm; | ||
1176 | struct xol_area *area; | ||
1177 | |||
1178 | if (!mm->uprobes_state.xol_area) | ||
1179 | __create_xol_area(0); | ||
1180 | |||
1173 | area = mm->uprobes_state.xol_area; | 1181 | area = mm->uprobes_state.xol_area; |
1174 | ret: | 1182 | smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ |
1175 | smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ | ||
1176 | return area; | 1183 | return area; |
1177 | } | 1184 | } |
1178 | 1185 | ||
@@ -1256,7 +1263,8 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe) | |||
1256 | return 0; | 1263 | return 0; |
1257 | 1264 | ||
1258 | /* Initialize the slot */ | 1265 | /* Initialize the slot */ |
1259 | copy_to_page(area->page, xol_vaddr, uprobe->arch.insn, MAX_UINSN_BYTES); | 1266 | copy_to_page(area->page, xol_vaddr, |
1267 | uprobe->arch.ixol, sizeof(uprobe->arch.ixol)); | ||
1260 | /* | 1268 | /* |
1261 | * We probably need flush_icache_user_range() but it needs vma. | 1269 | * We probably need flush_icache_user_range() but it needs vma. |
1262 | * This should work on supported architectures too. | 1270 | * This should work on supported architectures too. |
@@ -1345,14 +1353,6 @@ void uprobe_free_utask(struct task_struct *t) | |||
1345 | } | 1353 | } |
1346 | 1354 | ||
1347 | /* | 1355 | /* |
1348 | * Called in context of a new clone/fork from copy_process. | ||
1349 | */ | ||
1350 | void uprobe_copy_process(struct task_struct *t) | ||
1351 | { | ||
1352 | t->utask = NULL; | ||
1353 | } | ||
1354 | |||
1355 | /* | ||
1356 | * Allocate a uprobe_task object for the task if if necessary. | 1356 | * Allocate a uprobe_task object for the task if if necessary. |
1357 | * Called when the thread hits a breakpoint. | 1357 | * Called when the thread hits a breakpoint. |
1358 | * | 1358 | * |
@@ -1367,6 +1367,90 @@ static struct uprobe_task *get_utask(void) | |||
1367 | return current->utask; | 1367 | return current->utask; |
1368 | } | 1368 | } |
1369 | 1369 | ||
1370 | static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) | ||
1371 | { | ||
1372 | struct uprobe_task *n_utask; | ||
1373 | struct return_instance **p, *o, *n; | ||
1374 | |||
1375 | n_utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); | ||
1376 | if (!n_utask) | ||
1377 | return -ENOMEM; | ||
1378 | t->utask = n_utask; | ||
1379 | |||
1380 | p = &n_utask->return_instances; | ||
1381 | for (o = o_utask->return_instances; o; o = o->next) { | ||
1382 | n = kmalloc(sizeof(struct return_instance), GFP_KERNEL); | ||
1383 | if (!n) | ||
1384 | return -ENOMEM; | ||
1385 | |||
1386 | *n = *o; | ||
1387 | atomic_inc(&n->uprobe->ref); | ||
1388 | n->next = NULL; | ||
1389 | |||
1390 | *p = n; | ||
1391 | p = &n->next; | ||
1392 | n_utask->depth++; | ||
1393 | } | ||
1394 | |||
1395 | return 0; | ||
1396 | } | ||
1397 | |||
1398 | static void uprobe_warn(struct task_struct *t, const char *msg) | ||
1399 | { | ||
1400 | pr_warn("uprobe: %s:%d failed to %s\n", | ||
1401 | current->comm, current->pid, msg); | ||
1402 | } | ||
1403 | |||
1404 | static void dup_xol_work(struct callback_head *work) | ||
1405 | { | ||
1406 | kfree(work); | ||
1407 | |||
1408 | if (current->flags & PF_EXITING) | ||
1409 | return; | ||
1410 | |||
1411 | if (!__create_xol_area(current->utask->vaddr)) | ||
1412 | uprobe_warn(current, "dup xol area"); | ||
1413 | } | ||
1414 | |||
1415 | /* | ||
1416 | * Called in context of a new clone/fork from copy_process. | ||
1417 | */ | ||
1418 | void uprobe_copy_process(struct task_struct *t, unsigned long flags) | ||
1419 | { | ||
1420 | struct uprobe_task *utask = current->utask; | ||
1421 | struct mm_struct *mm = current->mm; | ||
1422 | struct callback_head *work; | ||
1423 | struct xol_area *area; | ||
1424 | |||
1425 | t->utask = NULL; | ||
1426 | |||
1427 | if (!utask || !utask->return_instances) | ||
1428 | return; | ||
1429 | |||
1430 | if (mm == t->mm && !(flags & CLONE_VFORK)) | ||
1431 | return; | ||
1432 | |||
1433 | if (dup_utask(t, utask)) | ||
1434 | return uprobe_warn(t, "dup ret instances"); | ||
1435 | |||
1436 | /* The task can fork() after dup_xol_work() fails */ | ||
1437 | area = mm->uprobes_state.xol_area; | ||
1438 | if (!area) | ||
1439 | return uprobe_warn(t, "dup xol area"); | ||
1440 | |||
1441 | if (mm == t->mm) | ||
1442 | return; | ||
1443 | |||
1444 | /* TODO: move it into the union in uprobe_task */ | ||
1445 | work = kmalloc(sizeof(*work), GFP_KERNEL); | ||
1446 | if (!work) | ||
1447 | return uprobe_warn(t, "dup xol area"); | ||
1448 | |||
1449 | t->utask->vaddr = area->vaddr; | ||
1450 | init_task_work(work, dup_xol_work); | ||
1451 | task_work_add(t, work, true); | ||
1452 | } | ||
1453 | |||
1370 | /* | 1454 | /* |
1371 | * Current area->vaddr notion assume the trampoline address is always | 1455 | * Current area->vaddr notion assume the trampoline address is always |
1372 | * equal area->vaddr. | 1456 | * equal area->vaddr. |
@@ -1857,9 +1941,4 @@ static int __init init_uprobes(void) | |||
1857 | 1941 | ||
1858 | return register_die_notifier(&uprobe_exception_nb); | 1942 | return register_die_notifier(&uprobe_exception_nb); |
1859 | } | 1943 | } |
1860 | module_init(init_uprobes); | 1944 | __initcall(init_uprobes); |
1861 | |||
1862 | static void __exit exit_uprobes(void) | ||
1863 | { | ||
1864 | } | ||
1865 | module_exit(exit_uprobes); | ||
diff --git a/kernel/fork.c b/kernel/fork.c index 086fe73ad6bd..8531609b6a82 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1373,7 +1373,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1373 | INIT_LIST_HEAD(&p->pi_state_list); | 1373 | INIT_LIST_HEAD(&p->pi_state_list); |
1374 | p->pi_state_cache = NULL; | 1374 | p->pi_state_cache = NULL; |
1375 | #endif | 1375 | #endif |
1376 | uprobe_copy_process(p); | ||
1377 | /* | 1376 | /* |
1378 | * sigaltstack should be cleared when sharing the same VM | 1377 | * sigaltstack should be cleared when sharing the same VM |
1379 | */ | 1378 | */ |
@@ -1490,6 +1489,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1490 | perf_event_fork(p); | 1489 | perf_event_fork(p); |
1491 | 1490 | ||
1492 | trace_task_newtask(p, clone_flags); | 1491 | trace_task_newtask(p, clone_flags); |
1492 | uprobe_copy_process(p, clone_flags); | ||
1493 | 1493 | ||
1494 | return p; | 1494 | return p; |
1495 | 1495 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8b80f1bae21a..5fee859888a4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -1049,6 +1049,7 @@ static struct ctl_table kern_table[] = { | |||
1049 | .maxlen = sizeof(sysctl_perf_event_sample_rate), | 1049 | .maxlen = sizeof(sysctl_perf_event_sample_rate), |
1050 | .mode = 0644, | 1050 | .mode = 0644, |
1051 | .proc_handler = perf_proc_update_handler, | 1051 | .proc_handler = perf_proc_update_handler, |
1052 | .extra1 = &one, | ||
1052 | }, | 1053 | }, |
1053 | { | 1054 | { |
1054 | .procname = "perf_cpu_time_max_percent", | 1055 | .procname = "perf_cpu_time_max_percent", |