aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/perf_event.c
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2010-01-09 14:04:47 -0500
committerFrederic Weisbecker <fweisbec@gmail.com>2010-01-16 06:27:42 -0500
commit889ff0150661512d79484219612b7e2e024b6c07 (patch)
tree6343ad7386aa3aefa7fe3f8ab9389f6abec1d141 /kernel/perf_event.c
parent881516eb828a3f7276c378bcef96b7788fc99016 (diff)
perf/core: Split context's event group list into pinned and non-pinned lists
Split-up struct perf_event_context::group_list into pinned_groups and flexible_groups (non-pinned). This first appears to be useless as it duplicates various loops around the group list handlings. But it scales better in the fast-path in perf_sched_in(). We don't anymore iterate twice through the entire list to separate pinned and non-pinned scheduling. Instead we interate through two distinct lists. The another desired effect is that it makes easier to define distinct scheduling rules on both. Changes in v2: - Respectively rename pinned_grp_list and volatile_grp_list into pinned_groups and flexible_groups as per Ingo suggestion. - Various cleanups Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Acked-by: Peter Zijlstra <peterz@infradead.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r--kernel/perf_event.c227
1 files changed, 151 insertions, 76 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 27f69a04541d..c9f8a757649d 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -289,6 +289,15 @@ static void update_event_times(struct perf_event *event)
289 event->total_time_running = run_end - event->tstamp_running; 289 event->total_time_running = run_end - event->tstamp_running;
290} 290}
291 291
292static struct list_head *
293ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
294{
295 if (event->attr.pinned)
296 return &ctx->pinned_groups;
297 else
298 return &ctx->flexible_groups;
299}
300
292/* 301/*
293 * Add a event from the lists for its context. 302 * Add a event from the lists for its context.
294 * Must be called with ctx->mutex and ctx->lock held. 303 * Must be called with ctx->mutex and ctx->lock held.
@@ -303,9 +312,12 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
303 * add it straight to the context's event list, or to the group 312 * add it straight to the context's event list, or to the group
304 * leader's sibling list: 313 * leader's sibling list:
305 */ 314 */
306 if (group_leader == event) 315 if (group_leader == event) {
307 list_add_tail(&event->group_entry, &ctx->group_list); 316 struct list_head *list;
308 else { 317
318 list = ctx_group_list(event, ctx);
319 list_add_tail(&event->group_entry, list);
320 } else {
309 list_add_tail(&event->group_entry, &group_leader->sibling_list); 321 list_add_tail(&event->group_entry, &group_leader->sibling_list);
310 group_leader->nr_siblings++; 322 group_leader->nr_siblings++;
311 } 323 }
@@ -355,8 +367,10 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
355 * to the context list directly: 367 * to the context list directly:
356 */ 368 */
357 list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { 369 list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) {
370 struct list_head *list;
358 371
359 list_move_tail(&sibling->group_entry, &ctx->group_list); 372 list = ctx_group_list(event, ctx);
373 list_move_tail(&sibling->group_entry, list);
360 sibling->group_leader = sibling; 374 sibling->group_leader = sibling;
361 } 375 }
362} 376}
@@ -1056,7 +1070,10 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
1056 1070
1057 perf_disable(); 1071 perf_disable();
1058 if (ctx->nr_active) { 1072 if (ctx->nr_active) {
1059 list_for_each_entry(event, &ctx->group_list, group_entry) 1073 list_for_each_entry(event, &ctx->pinned_groups, group_entry)
1074 group_sched_out(event, cpuctx, ctx);
1075
1076 list_for_each_entry(event, &ctx->flexible_groups, group_entry)
1060 group_sched_out(event, cpuctx, ctx); 1077 group_sched_out(event, cpuctx, ctx);
1061 } 1078 }
1062 perf_enable(); 1079 perf_enable();
@@ -1271,9 +1288,8 @@ __perf_event_sched_in(struct perf_event_context *ctx,
1271 * First go through the list and put on any pinned groups 1288 * First go through the list and put on any pinned groups
1272 * in order to give them the best chance of going on. 1289 * in order to give them the best chance of going on.
1273 */ 1290 */
1274 list_for_each_entry(event, &ctx->group_list, group_entry) { 1291 list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
1275 if (event->state <= PERF_EVENT_STATE_OFF || 1292 if (event->state <= PERF_EVENT_STATE_OFF)
1276 !event->attr.pinned)
1277 continue; 1293 continue;
1278 if (event->cpu != -1 && event->cpu != cpu) 1294 if (event->cpu != -1 && event->cpu != cpu)
1279 continue; 1295 continue;
@@ -1291,15 +1307,10 @@ __perf_event_sched_in(struct perf_event_context *ctx,
1291 } 1307 }
1292 } 1308 }
1293 1309
1294 list_for_each_entry(event, &ctx->group_list, group_entry) { 1310 list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
1295 /* 1311 /* Ignore events in OFF or ERROR state */
1296 * Ignore events in OFF or ERROR state, and 1312 if (event->state <= PERF_EVENT_STATE_OFF)
1297 * ignore pinned events since we did them already.
1298 */
1299 if (event->state <= PERF_EVENT_STATE_OFF ||
1300 event->attr.pinned)
1301 continue; 1313 continue;
1302
1303 /* 1314 /*
1304 * Listen to the 'cpu' scheduling filter constraint 1315 * Listen to the 'cpu' scheduling filter constraint
1305 * of events: 1316 * of events:
@@ -1453,8 +1464,13 @@ static void rotate_ctx(struct perf_event_context *ctx)
1453 * Rotate the first entry last (works just fine for group events too): 1464 * Rotate the first entry last (works just fine for group events too):
1454 */ 1465 */
1455 perf_disable(); 1466 perf_disable();
1456 list_for_each_entry(event, &ctx->group_list, group_entry) { 1467 list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
1457 list_move_tail(&event->group_entry, &ctx->group_list); 1468 list_move_tail(&event->group_entry, &ctx->pinned_groups);
1469 break;
1470 }
1471
1472 list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
1473 list_move_tail(&event->group_entry, &ctx->flexible_groups);
1458 break; 1474 break;
1459 } 1475 }
1460 perf_enable(); 1476 perf_enable();
@@ -1490,6 +1506,21 @@ void perf_event_task_tick(struct task_struct *curr)
1490 perf_event_task_sched_in(curr); 1506 perf_event_task_sched_in(curr);
1491} 1507}
1492 1508
1509static int event_enable_on_exec(struct perf_event *event,
1510 struct perf_event_context *ctx)
1511{
1512 if (!event->attr.enable_on_exec)
1513 return 0;
1514
1515 event->attr.enable_on_exec = 0;
1516 if (event->state >= PERF_EVENT_STATE_INACTIVE)
1517 return 0;
1518
1519 __perf_event_mark_enabled(event, ctx);
1520
1521 return 1;
1522}
1523
1493/* 1524/*
1494 * Enable all of a task's events that have been marked enable-on-exec. 1525 * Enable all of a task's events that have been marked enable-on-exec.
1495 * This expects task == current. 1526 * This expects task == current.
@@ -1500,6 +1531,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
1500 struct perf_event *event; 1531 struct perf_event *event;
1501 unsigned long flags; 1532 unsigned long flags;
1502 int enabled = 0; 1533 int enabled = 0;
1534 int ret;
1503 1535
1504 local_irq_save(flags); 1536 local_irq_save(flags);
1505 ctx = task->perf_event_ctxp; 1537 ctx = task->perf_event_ctxp;
@@ -1510,14 +1542,16 @@ static void perf_event_enable_on_exec(struct task_struct *task)
1510 1542
1511 raw_spin_lock(&ctx->lock); 1543 raw_spin_lock(&ctx->lock);
1512 1544
1513 list_for_each_entry(event, &ctx->group_list, group_entry) { 1545 list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
1514 if (!event->attr.enable_on_exec) 1546 ret = event_enable_on_exec(event, ctx);
1515 continue; 1547 if (ret)
1516 event->attr.enable_on_exec = 0; 1548 enabled = 1;
1517 if (event->state >= PERF_EVENT_STATE_INACTIVE) 1549 }
1518 continue; 1550
1519 __perf_event_mark_enabled(event, ctx); 1551 list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
1520 enabled = 1; 1552 ret = event_enable_on_exec(event, ctx);
1553 if (ret)
1554 enabled = 1;
1521 } 1555 }
1522 1556
1523 /* 1557 /*
@@ -1591,7 +1625,8 @@ __perf_event_init_context(struct perf_event_context *ctx,
1591{ 1625{
1592 raw_spin_lock_init(&ctx->lock); 1626 raw_spin_lock_init(&ctx->lock);
1593 mutex_init(&ctx->mutex); 1627 mutex_init(&ctx->mutex);
1594 INIT_LIST_HEAD(&ctx->group_list); 1628 INIT_LIST_HEAD(&ctx->pinned_groups);
1629 INIT_LIST_HEAD(&ctx->flexible_groups);
1595 INIT_LIST_HEAD(&ctx->event_list); 1630 INIT_LIST_HEAD(&ctx->event_list);
1596 atomic_set(&ctx->refcount, 1); 1631 atomic_set(&ctx->refcount, 1);
1597 ctx->task = task; 1632 ctx->task = task;
@@ -5032,7 +5067,11 @@ void perf_event_exit_task(struct task_struct *child)
5032 mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); 5067 mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING);
5033 5068
5034again: 5069again:
5035 list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list, 5070 list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups,
5071 group_entry)
5072 __perf_event_exit_task(child_event, child_ctx, child);
5073
5074 list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups,
5036 group_entry) 5075 group_entry)
5037 __perf_event_exit_task(child_event, child_ctx, child); 5076 __perf_event_exit_task(child_event, child_ctx, child);
5038 5077
@@ -5041,7 +5080,8 @@ again:
5041 * its siblings to the list, but we obtained 'tmp' before that which 5080 * its siblings to the list, but we obtained 'tmp' before that which
5042 * will still point to the list head terminating the iteration. 5081 * will still point to the list head terminating the iteration.
5043 */ 5082 */
5044 if (!list_empty(&child_ctx->group_list)) 5083 if (!list_empty(&child_ctx->pinned_groups) ||
5084 !list_empty(&child_ctx->flexible_groups))
5045 goto again; 5085 goto again;
5046 5086
5047 mutex_unlock(&child_ctx->mutex); 5087 mutex_unlock(&child_ctx->mutex);
@@ -5049,6 +5089,24 @@ again:
5049 put_ctx(child_ctx); 5089 put_ctx(child_ctx);
5050} 5090}
5051 5091
5092static void perf_free_event(struct perf_event *event,
5093 struct perf_event_context *ctx)
5094{
5095 struct perf_event *parent = event->parent;
5096
5097 if (WARN_ON_ONCE(!parent))
5098 return;
5099
5100 mutex_lock(&parent->child_mutex);
5101 list_del_init(&event->child_list);
5102 mutex_unlock(&parent->child_mutex);
5103
5104 fput(parent->filp);
5105
5106 list_del_event(event, ctx);
5107 free_event(event);
5108}
5109
5052/* 5110/*
5053 * free an unexposed, unused context as created by inheritance by 5111 * free an unexposed, unused context as created by inheritance by
5054 * init_task below, used by fork() in case of fail. 5112 * init_task below, used by fork() in case of fail.
@@ -5063,36 +5121,70 @@ void perf_event_free_task(struct task_struct *task)
5063 5121
5064 mutex_lock(&ctx->mutex); 5122 mutex_lock(&ctx->mutex);
5065again: 5123again:
5066 list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) { 5124 list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
5067 struct perf_event *parent = event->parent; 5125 perf_free_event(event, ctx);
5068 5126
5069 if (WARN_ON_ONCE(!parent)) 5127 list_for_each_entry_safe(event, tmp, &ctx->flexible_groups,
5070 continue; 5128 group_entry)
5129 perf_free_event(event, ctx);
5071 5130
5072 mutex_lock(&parent->child_mutex); 5131 if (!list_empty(&ctx->pinned_groups) ||
5073 list_del_init(&event->child_list); 5132 !list_empty(&ctx->flexible_groups))
5074 mutex_unlock(&parent->child_mutex); 5133 goto again;
5075 5134
5076 fput(parent->filp); 5135 mutex_unlock(&ctx->mutex);
5077 5136
5078 list_del_event(event, ctx); 5137 put_ctx(ctx);
5079 free_event(event); 5138}
5139
5140static int
5141inherit_task_group(struct perf_event *event, struct task_struct *parent,
5142 struct perf_event_context *parent_ctx,
5143 struct task_struct *child,
5144 int *inherited_all)
5145{
5146 int ret;
5147 struct perf_event_context *child_ctx = child->perf_event_ctxp;
5148
5149 if (!event->attr.inherit) {
5150 *inherited_all = 0;
5151 return 0;
5080 } 5152 }
5081 5153
5082 if (!list_empty(&ctx->group_list)) 5154 if (!child_ctx) {
5083 goto again; 5155 /*
5156 * This is executed from the parent task context, so
5157 * inherit events that have been marked for cloning.
5158 * First allocate and initialize a context for the
5159 * child.
5160 */
5084 5161
5085 mutex_unlock(&ctx->mutex); 5162 child_ctx = kzalloc(sizeof(struct perf_event_context),
5163 GFP_KERNEL);
5164 if (!child_ctx)
5165 return -ENOMEM;
5086 5166
5087 put_ctx(ctx); 5167 __perf_event_init_context(child_ctx, child);
5168 child->perf_event_ctxp = child_ctx;
5169 get_task_struct(child);
5170 }
5171
5172 ret = inherit_group(event, parent, parent_ctx,
5173 child, child_ctx);
5174
5175 if (ret)
5176 *inherited_all = 0;
5177
5178 return ret;
5088} 5179}
5089 5180
5181
5090/* 5182/*
5091 * Initialize the perf_event context in task_struct 5183 * Initialize the perf_event context in task_struct
5092 */ 5184 */
5093int perf_event_init_task(struct task_struct *child) 5185int perf_event_init_task(struct task_struct *child)
5094{ 5186{
5095 struct perf_event_context *child_ctx = NULL, *parent_ctx; 5187 struct perf_event_context *child_ctx, *parent_ctx;
5096 struct perf_event_context *cloned_ctx; 5188 struct perf_event_context *cloned_ctx;
5097 struct perf_event *event; 5189 struct perf_event *event;
5098 struct task_struct *parent = current; 5190 struct task_struct *parent = current;
@@ -5130,41 +5222,22 @@ int perf_event_init_task(struct task_struct *child)
5130 * We dont have to disable NMIs - we are only looking at 5222 * We dont have to disable NMIs - we are only looking at
5131 * the list, not manipulating it: 5223 * the list, not manipulating it:
5132 */ 5224 */
5133 list_for_each_entry(event, &parent_ctx->group_list, group_entry) { 5225 list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) {
5134 5226 ret = inherit_task_group(event, parent, parent_ctx, child,
5135 if (!event->attr.inherit) { 5227 &inherited_all);
5136 inherited_all = 0; 5228 if (ret)
5137 continue; 5229 break;
5138 } 5230 }
5139
5140 if (!child->perf_event_ctxp) {
5141 /*
5142 * This is executed from the parent task context, so
5143 * inherit events that have been marked for cloning.
5144 * First allocate and initialize a context for the
5145 * child.
5146 */
5147
5148 child_ctx = kzalloc(sizeof(struct perf_event_context),
5149 GFP_KERNEL);
5150 if (!child_ctx) {
5151 ret = -ENOMEM;
5152 break;
5153 }
5154
5155 __perf_event_init_context(child_ctx, child);
5156 child->perf_event_ctxp = child_ctx;
5157 get_task_struct(child);
5158 }
5159 5231
5160 ret = inherit_group(event, parent, parent_ctx, 5232 list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
5161 child, child_ctx); 5233 ret = inherit_task_group(event, parent, parent_ctx, child,
5162 if (ret) { 5234 &inherited_all);
5163 inherited_all = 0; 5235 if (ret)
5164 break; 5236 break;
5165 }
5166 } 5237 }
5167 5238
5239 child_ctx = child->perf_event_ctxp;
5240
5168 if (child_ctx && inherited_all) { 5241 if (child_ctx && inherited_all) {
5169 /* 5242 /*
5170 * Mark the child context as a clone of the parent 5243 * Mark the child context as a clone of the parent
@@ -5213,7 +5286,9 @@ static void __perf_event_exit_cpu(void *info)
5213 struct perf_event_context *ctx = &cpuctx->ctx; 5286 struct perf_event_context *ctx = &cpuctx->ctx;
5214 struct perf_event *event, *tmp; 5287 struct perf_event *event, *tmp;
5215 5288
5216 list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) 5289 list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
5290 __perf_event_remove_from_context(event);
5291 list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
5217 __perf_event_remove_from_context(event); 5292 __perf_event_remove_from_context(event);
5218} 5293}
5219static void perf_event_exit_cpu(int cpu) 5294static void perf_event_exit_cpu(int cpu)