diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/perf_event.c | 227 | 
1 files changed, 151 insertions, 76 deletions
| diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 27f69a04541d..c9f8a757649d 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
| @@ -289,6 +289,15 @@ static void update_event_times(struct perf_event *event) | |||
| 289 | event->total_time_running = run_end - event->tstamp_running; | 289 | event->total_time_running = run_end - event->tstamp_running; | 
| 290 | } | 290 | } | 
| 291 | 291 | ||
| 292 | static struct list_head * | ||
| 293 | ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) | ||
| 294 | { | ||
| 295 | if (event->attr.pinned) | ||
| 296 | return &ctx->pinned_groups; | ||
| 297 | else | ||
| 298 | return &ctx->flexible_groups; | ||
| 299 | } | ||
| 300 | |||
| 292 | /* | 301 | /* | 
| 293 | * Add a event from the lists for its context. | 302 | * Add a event from the lists for its context. | 
| 294 | * Must be called with ctx->mutex and ctx->lock held. | 303 | * Must be called with ctx->mutex and ctx->lock held. | 
| @@ -303,9 +312,12 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
| 303 | * add it straight to the context's event list, or to the group | 312 | * add it straight to the context's event list, or to the group | 
| 304 | * leader's sibling list: | 313 | * leader's sibling list: | 
| 305 | */ | 314 | */ | 
| 306 | if (group_leader == event) | 315 | if (group_leader == event) { | 
| 307 | list_add_tail(&event->group_entry, &ctx->group_list); | 316 | struct list_head *list; | 
| 308 | else { | 317 | |
| 318 | list = ctx_group_list(event, ctx); | ||
| 319 | list_add_tail(&event->group_entry, list); | ||
| 320 | } else { | ||
| 309 | list_add_tail(&event->group_entry, &group_leader->sibling_list); | 321 | list_add_tail(&event->group_entry, &group_leader->sibling_list); | 
| 310 | group_leader->nr_siblings++; | 322 | group_leader->nr_siblings++; | 
| 311 | } | 323 | } | 
| @@ -355,8 +367,10 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
| 355 | * to the context list directly: | 367 | * to the context list directly: | 
| 356 | */ | 368 | */ | 
| 357 | list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { | 369 | list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { | 
| 370 | struct list_head *list; | ||
| 358 | 371 | ||
| 359 | list_move_tail(&sibling->group_entry, &ctx->group_list); | 372 | list = ctx_group_list(event, ctx); | 
| 373 | list_move_tail(&sibling->group_entry, list); | ||
| 360 | sibling->group_leader = sibling; | 374 | sibling->group_leader = sibling; | 
| 361 | } | 375 | } | 
| 362 | } | 376 | } | 
| @@ -1056,7 +1070,10 @@ void __perf_event_sched_out(struct perf_event_context *ctx, | |||
| 1056 | 1070 | ||
| 1057 | perf_disable(); | 1071 | perf_disable(); | 
| 1058 | if (ctx->nr_active) { | 1072 | if (ctx->nr_active) { | 
| 1059 | list_for_each_entry(event, &ctx->group_list, group_entry) | 1073 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) | 
| 1074 | group_sched_out(event, cpuctx, ctx); | ||
| 1075 | |||
| 1076 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) | ||
| 1060 | group_sched_out(event, cpuctx, ctx); | 1077 | group_sched_out(event, cpuctx, ctx); | 
| 1061 | } | 1078 | } | 
| 1062 | perf_enable(); | 1079 | perf_enable(); | 
| @@ -1271,9 +1288,8 @@ __perf_event_sched_in(struct perf_event_context *ctx, | |||
| 1271 | * First go through the list and put on any pinned groups | 1288 | * First go through the list and put on any pinned groups | 
| 1272 | * in order to give them the best chance of going on. | 1289 | * in order to give them the best chance of going on. | 
| 1273 | */ | 1290 | */ | 
| 1274 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1291 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { | 
| 1275 | if (event->state <= PERF_EVENT_STATE_OFF || | 1292 | if (event->state <= PERF_EVENT_STATE_OFF) | 
| 1276 | !event->attr.pinned) | ||
| 1277 | continue; | 1293 | continue; | 
| 1278 | if (event->cpu != -1 && event->cpu != cpu) | 1294 | if (event->cpu != -1 && event->cpu != cpu) | 
| 1279 | continue; | 1295 | continue; | 
| @@ -1291,15 +1307,10 @@ __perf_event_sched_in(struct perf_event_context *ctx, | |||
| 1291 | } | 1307 | } | 
| 1292 | } | 1308 | } | 
| 1293 | 1309 | ||
| 1294 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1310 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) { | 
| 1295 | /* | 1311 | /* Ignore events in OFF or ERROR state */ | 
| 1296 | * Ignore events in OFF or ERROR state, and | 1312 | if (event->state <= PERF_EVENT_STATE_OFF) | 
| 1297 | * ignore pinned events since we did them already. | ||
| 1298 | */ | ||
| 1299 | if (event->state <= PERF_EVENT_STATE_OFF || | ||
| 1300 | event->attr.pinned) | ||
| 1301 | continue; | 1313 | continue; | 
| 1302 | |||
| 1303 | /* | 1314 | /* | 
| 1304 | * Listen to the 'cpu' scheduling filter constraint | 1315 | * Listen to the 'cpu' scheduling filter constraint | 
| 1305 | * of events: | 1316 | * of events: | 
| @@ -1453,8 +1464,13 @@ static void rotate_ctx(struct perf_event_context *ctx) | |||
| 1453 | * Rotate the first entry last (works just fine for group events too): | 1464 | * Rotate the first entry last (works just fine for group events too): | 
| 1454 | */ | 1465 | */ | 
| 1455 | perf_disable(); | 1466 | perf_disable(); | 
| 1456 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1467 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { | 
| 1457 | list_move_tail(&event->group_entry, &ctx->group_list); | 1468 | list_move_tail(&event->group_entry, &ctx->pinned_groups); | 
| 1469 | break; | ||
| 1470 | } | ||
| 1471 | |||
| 1472 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) { | ||
| 1473 | list_move_tail(&event->group_entry, &ctx->flexible_groups); | ||
| 1458 | break; | 1474 | break; | 
| 1459 | } | 1475 | } | 
| 1460 | perf_enable(); | 1476 | perf_enable(); | 
| @@ -1490,6 +1506,21 @@ void perf_event_task_tick(struct task_struct *curr) | |||
| 1490 | perf_event_task_sched_in(curr); | 1506 | perf_event_task_sched_in(curr); | 
| 1491 | } | 1507 | } | 
| 1492 | 1508 | ||
| 1509 | static int event_enable_on_exec(struct perf_event *event, | ||
| 1510 | struct perf_event_context *ctx) | ||
| 1511 | { | ||
| 1512 | if (!event->attr.enable_on_exec) | ||
| 1513 | return 0; | ||
| 1514 | |||
| 1515 | event->attr.enable_on_exec = 0; | ||
| 1516 | if (event->state >= PERF_EVENT_STATE_INACTIVE) | ||
| 1517 | return 0; | ||
| 1518 | |||
| 1519 | __perf_event_mark_enabled(event, ctx); | ||
| 1520 | |||
| 1521 | return 1; | ||
| 1522 | } | ||
| 1523 | |||
| 1493 | /* | 1524 | /* | 
| 1494 | * Enable all of a task's events that have been marked enable-on-exec. | 1525 | * Enable all of a task's events that have been marked enable-on-exec. | 
| 1495 | * This expects task == current. | 1526 | * This expects task == current. | 
| @@ -1500,6 +1531,7 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
| 1500 | struct perf_event *event; | 1531 | struct perf_event *event; | 
| 1501 | unsigned long flags; | 1532 | unsigned long flags; | 
| 1502 | int enabled = 0; | 1533 | int enabled = 0; | 
| 1534 | int ret; | ||
| 1503 | 1535 | ||
| 1504 | local_irq_save(flags); | 1536 | local_irq_save(flags); | 
| 1505 | ctx = task->perf_event_ctxp; | 1537 | ctx = task->perf_event_ctxp; | 
| @@ -1510,14 +1542,16 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
| 1510 | 1542 | ||
| 1511 | raw_spin_lock(&ctx->lock); | 1543 | raw_spin_lock(&ctx->lock); | 
| 1512 | 1544 | ||
| 1513 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1545 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { | 
| 1514 | if (!event->attr.enable_on_exec) | 1546 | ret = event_enable_on_exec(event, ctx); | 
| 1515 | continue; | 1547 | if (ret) | 
| 1516 | event->attr.enable_on_exec = 0; | 1548 | enabled = 1; | 
| 1517 | if (event->state >= PERF_EVENT_STATE_INACTIVE) | 1549 | } | 
| 1518 | continue; | 1550 | |
| 1519 | __perf_event_mark_enabled(event, ctx); | 1551 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) { | 
| 1520 | enabled = 1; | 1552 | ret = event_enable_on_exec(event, ctx); | 
| 1553 | if (ret) | ||
| 1554 | enabled = 1; | ||
| 1521 | } | 1555 | } | 
| 1522 | 1556 | ||
| 1523 | /* | 1557 | /* | 
| @@ -1591,7 +1625,8 @@ __perf_event_init_context(struct perf_event_context *ctx, | |||
| 1591 | { | 1625 | { | 
| 1592 | raw_spin_lock_init(&ctx->lock); | 1626 | raw_spin_lock_init(&ctx->lock); | 
| 1593 | mutex_init(&ctx->mutex); | 1627 | mutex_init(&ctx->mutex); | 
| 1594 | INIT_LIST_HEAD(&ctx->group_list); | 1628 | INIT_LIST_HEAD(&ctx->pinned_groups); | 
| 1629 | INIT_LIST_HEAD(&ctx->flexible_groups); | ||
| 1595 | INIT_LIST_HEAD(&ctx->event_list); | 1630 | INIT_LIST_HEAD(&ctx->event_list); | 
| 1596 | atomic_set(&ctx->refcount, 1); | 1631 | atomic_set(&ctx->refcount, 1); | 
| 1597 | ctx->task = task; | 1632 | ctx->task = task; | 
| @@ -5032,7 +5067,11 @@ void perf_event_exit_task(struct task_struct *child) | |||
| 5032 | mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); | 5067 | mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); | 
| 5033 | 5068 | ||
| 5034 | again: | 5069 | again: | 
| 5035 | list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list, | 5070 | list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups, | 
| 5071 | group_entry) | ||
| 5072 | __perf_event_exit_task(child_event, child_ctx, child); | ||
| 5073 | |||
| 5074 | list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups, | ||
| 5036 | group_entry) | 5075 | group_entry) | 
| 5037 | __perf_event_exit_task(child_event, child_ctx, child); | 5076 | __perf_event_exit_task(child_event, child_ctx, child); | 
| 5038 | 5077 | ||
| @@ -5041,7 +5080,8 @@ again: | |||
| 5041 | * its siblings to the list, but we obtained 'tmp' before that which | 5080 | * its siblings to the list, but we obtained 'tmp' before that which | 
| 5042 | * will still point to the list head terminating the iteration. | 5081 | * will still point to the list head terminating the iteration. | 
| 5043 | */ | 5082 | */ | 
| 5044 | if (!list_empty(&child_ctx->group_list)) | 5083 | if (!list_empty(&child_ctx->pinned_groups) || | 
| 5084 | !list_empty(&child_ctx->flexible_groups)) | ||
| 5045 | goto again; | 5085 | goto again; | 
| 5046 | 5086 | ||
| 5047 | mutex_unlock(&child_ctx->mutex); | 5087 | mutex_unlock(&child_ctx->mutex); | 
| @@ -5049,6 +5089,24 @@ again: | |||
| 5049 | put_ctx(child_ctx); | 5089 | put_ctx(child_ctx); | 
| 5050 | } | 5090 | } | 
| 5051 | 5091 | ||
| 5092 | static void perf_free_event(struct perf_event *event, | ||
| 5093 | struct perf_event_context *ctx) | ||
| 5094 | { | ||
| 5095 | struct perf_event *parent = event->parent; | ||
| 5096 | |||
| 5097 | if (WARN_ON_ONCE(!parent)) | ||
| 5098 | return; | ||
| 5099 | |||
| 5100 | mutex_lock(&parent->child_mutex); | ||
| 5101 | list_del_init(&event->child_list); | ||
| 5102 | mutex_unlock(&parent->child_mutex); | ||
| 5103 | |||
| 5104 | fput(parent->filp); | ||
| 5105 | |||
| 5106 | list_del_event(event, ctx); | ||
| 5107 | free_event(event); | ||
| 5108 | } | ||
| 5109 | |||
| 5052 | /* | 5110 | /* | 
| 5053 | * free an unexposed, unused context as created by inheritance by | 5111 | * free an unexposed, unused context as created by inheritance by | 
| 5054 | * init_task below, used by fork() in case of fail. | 5112 | * init_task below, used by fork() in case of fail. | 
| @@ -5063,36 +5121,70 @@ void perf_event_free_task(struct task_struct *task) | |||
| 5063 | 5121 | ||
| 5064 | mutex_lock(&ctx->mutex); | 5122 | mutex_lock(&ctx->mutex); | 
| 5065 | again: | 5123 | again: | 
| 5066 | list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) { | 5124 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) | 
| 5067 | struct perf_event *parent = event->parent; | 5125 | perf_free_event(event, ctx); | 
| 5068 | 5126 | ||
| 5069 | if (WARN_ON_ONCE(!parent)) | 5127 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, | 
| 5070 | continue; | 5128 | group_entry) | 
| 5129 | perf_free_event(event, ctx); | ||
| 5071 | 5130 | ||
| 5072 | mutex_lock(&parent->child_mutex); | 5131 | if (!list_empty(&ctx->pinned_groups) || | 
| 5073 | list_del_init(&event->child_list); | 5132 | !list_empty(&ctx->flexible_groups)) | 
| 5074 | mutex_unlock(&parent->child_mutex); | 5133 | goto again; | 
| 5075 | 5134 | ||
| 5076 | fput(parent->filp); | 5135 | mutex_unlock(&ctx->mutex); | 
| 5077 | 5136 | ||
| 5078 | list_del_event(event, ctx); | 5137 | put_ctx(ctx); | 
| 5079 | free_event(event); | 5138 | } | 
| 5139 | |||
| 5140 | static int | ||
| 5141 | inherit_task_group(struct perf_event *event, struct task_struct *parent, | ||
| 5142 | struct perf_event_context *parent_ctx, | ||
| 5143 | struct task_struct *child, | ||
| 5144 | int *inherited_all) | ||
| 5145 | { | ||
| 5146 | int ret; | ||
| 5147 | struct perf_event_context *child_ctx = child->perf_event_ctxp; | ||
| 5148 | |||
| 5149 | if (!event->attr.inherit) { | ||
| 5150 | *inherited_all = 0; | ||
| 5151 | return 0; | ||
| 5080 | } | 5152 | } | 
| 5081 | 5153 | ||
| 5082 | if (!list_empty(&ctx->group_list)) | 5154 | if (!child_ctx) { | 
| 5083 | goto again; | 5155 | /* | 
| 5156 | * This is executed from the parent task context, so | ||
| 5157 | * inherit events that have been marked for cloning. | ||
| 5158 | * First allocate and initialize a context for the | ||
| 5159 | * child. | ||
| 5160 | */ | ||
| 5084 | 5161 | ||
| 5085 | mutex_unlock(&ctx->mutex); | 5162 | child_ctx = kzalloc(sizeof(struct perf_event_context), | 
| 5163 | GFP_KERNEL); | ||
| 5164 | if (!child_ctx) | ||
| 5165 | return -ENOMEM; | ||
| 5086 | 5166 | ||
| 5087 | put_ctx(ctx); | 5167 | __perf_event_init_context(child_ctx, child); | 
| 5168 | child->perf_event_ctxp = child_ctx; | ||
| 5169 | get_task_struct(child); | ||
| 5170 | } | ||
| 5171 | |||
| 5172 | ret = inherit_group(event, parent, parent_ctx, | ||
| 5173 | child, child_ctx); | ||
| 5174 | |||
| 5175 | if (ret) | ||
| 5176 | *inherited_all = 0; | ||
| 5177 | |||
| 5178 | return ret; | ||
| 5088 | } | 5179 | } | 
| 5089 | 5180 | ||
| 5181 | |||
| 5090 | /* | 5182 | /* | 
| 5091 | * Initialize the perf_event context in task_struct | 5183 | * Initialize the perf_event context in task_struct | 
| 5092 | */ | 5184 | */ | 
| 5093 | int perf_event_init_task(struct task_struct *child) | 5185 | int perf_event_init_task(struct task_struct *child) | 
| 5094 | { | 5186 | { | 
| 5095 | struct perf_event_context *child_ctx = NULL, *parent_ctx; | 5187 | struct perf_event_context *child_ctx, *parent_ctx; | 
| 5096 | struct perf_event_context *cloned_ctx; | 5188 | struct perf_event_context *cloned_ctx; | 
| 5097 | struct perf_event *event; | 5189 | struct perf_event *event; | 
| 5098 | struct task_struct *parent = current; | 5190 | struct task_struct *parent = current; | 
| @@ -5130,41 +5222,22 @@ int perf_event_init_task(struct task_struct *child) | |||
| 5130 | * We dont have to disable NMIs - we are only looking at | 5222 | * We dont have to disable NMIs - we are only looking at | 
| 5131 | * the list, not manipulating it: | 5223 | * the list, not manipulating it: | 
| 5132 | */ | 5224 | */ | 
| 5133 | list_for_each_entry(event, &parent_ctx->group_list, group_entry) { | 5225 | list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { | 
| 5134 | 5226 | ret = inherit_task_group(event, parent, parent_ctx, child, | |
| 5135 | if (!event->attr.inherit) { | 5227 | &inherited_all); | 
| 5136 | inherited_all = 0; | 5228 | if (ret) | 
| 5137 | continue; | 5229 | break; | 
| 5138 | } | 5230 | } | 
| 5139 | |||
| 5140 | if (!child->perf_event_ctxp) { | ||
| 5141 | /* | ||
| 5142 | * This is executed from the parent task context, so | ||
| 5143 | * inherit events that have been marked for cloning. | ||
| 5144 | * First allocate and initialize a context for the | ||
| 5145 | * child. | ||
| 5146 | */ | ||
| 5147 | |||
| 5148 | child_ctx = kzalloc(sizeof(struct perf_event_context), | ||
| 5149 | GFP_KERNEL); | ||
| 5150 | if (!child_ctx) { | ||
| 5151 | ret = -ENOMEM; | ||
| 5152 | break; | ||
| 5153 | } | ||
| 5154 | |||
| 5155 | __perf_event_init_context(child_ctx, child); | ||
| 5156 | child->perf_event_ctxp = child_ctx; | ||
| 5157 | get_task_struct(child); | ||
| 5158 | } | ||
| 5159 | 5231 | ||
| 5160 | ret = inherit_group(event, parent, parent_ctx, | 5232 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { | 
| 5161 | child, child_ctx); | 5233 | ret = inherit_task_group(event, parent, parent_ctx, child, | 
| 5162 | if (ret) { | 5234 | &inherited_all); | 
| 5163 | inherited_all = 0; | 5235 | if (ret) | 
| 5164 | break; | 5236 | break; | 
| 5165 | } | ||
| 5166 | } | 5237 | } | 
| 5167 | 5238 | ||
| 5239 | child_ctx = child->perf_event_ctxp; | ||
| 5240 | |||
| 5168 | if (child_ctx && inherited_all) { | 5241 | if (child_ctx && inherited_all) { | 
| 5169 | /* | 5242 | /* | 
| 5170 | * Mark the child context as a clone of the parent | 5243 | * Mark the child context as a clone of the parent | 
| @@ -5213,7 +5286,9 @@ static void __perf_event_exit_cpu(void *info) | |||
| 5213 | struct perf_event_context *ctx = &cpuctx->ctx; | 5286 | struct perf_event_context *ctx = &cpuctx->ctx; | 
| 5214 | struct perf_event *event, *tmp; | 5287 | struct perf_event *event, *tmp; | 
| 5215 | 5288 | ||
| 5216 | list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) | 5289 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) | 
| 5290 | __perf_event_remove_from_context(event); | ||
| 5291 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) | ||
| 5217 | __perf_event_remove_from_context(event); | 5292 | __perf_event_remove_from_context(event); | 
| 5218 | } | 5293 | } | 
| 5219 | static void perf_event_exit_cpu(int cpu) | 5294 | static void perf_event_exit_cpu(int cpu) | 
