diff options
author | Frederic Weisbecker <fweisbec@gmail.com> | 2010-01-09 14:04:47 -0500 |
---|---|---|
committer | Frederic Weisbecker <fweisbec@gmail.com> | 2010-01-16 06:27:42 -0500 |
commit | 889ff0150661512d79484219612b7e2e024b6c07 (patch) | |
tree | 6343ad7386aa3aefa7fe3f8ab9389f6abec1d141 /kernel/perf_event.c | |
parent | 881516eb828a3f7276c378bcef96b7788fc99016 (diff) |
perf/core: Split context's event group list into pinned and non-pinned lists
Split-up struct perf_event_context::group_list into pinned_groups
and flexible_groups (non-pinned).
This first appears to be useless as it duplicates various loops around
the group list handlings.
But it scales better in the fast-path in perf_sched_in(). We don't
anymore iterate twice through the entire list to separate pinned and
non-pinned scheduling. Instead we interate through two distinct lists.
The another desired effect is that it makes easier to define distinct
scheduling rules on both.
Changes in v2:
- Respectively rename pinned_grp_list and
volatile_grp_list into pinned_groups and flexible_groups as per
Ingo suggestion.
- Various cleanups
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r-- | kernel/perf_event.c | 227 |
1 files changed, 151 insertions, 76 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 27f69a04541d..c9f8a757649d 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -289,6 +289,15 @@ static void update_event_times(struct perf_event *event) | |||
289 | event->total_time_running = run_end - event->tstamp_running; | 289 | event->total_time_running = run_end - event->tstamp_running; |
290 | } | 290 | } |
291 | 291 | ||
292 | static struct list_head * | ||
293 | ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) | ||
294 | { | ||
295 | if (event->attr.pinned) | ||
296 | return &ctx->pinned_groups; | ||
297 | else | ||
298 | return &ctx->flexible_groups; | ||
299 | } | ||
300 | |||
292 | /* | 301 | /* |
293 | * Add a event from the lists for its context. | 302 | * Add a event from the lists for its context. |
294 | * Must be called with ctx->mutex and ctx->lock held. | 303 | * Must be called with ctx->mutex and ctx->lock held. |
@@ -303,9 +312,12 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
303 | * add it straight to the context's event list, or to the group | 312 | * add it straight to the context's event list, or to the group |
304 | * leader's sibling list: | 313 | * leader's sibling list: |
305 | */ | 314 | */ |
306 | if (group_leader == event) | 315 | if (group_leader == event) { |
307 | list_add_tail(&event->group_entry, &ctx->group_list); | 316 | struct list_head *list; |
308 | else { | 317 | |
318 | list = ctx_group_list(event, ctx); | ||
319 | list_add_tail(&event->group_entry, list); | ||
320 | } else { | ||
309 | list_add_tail(&event->group_entry, &group_leader->sibling_list); | 321 | list_add_tail(&event->group_entry, &group_leader->sibling_list); |
310 | group_leader->nr_siblings++; | 322 | group_leader->nr_siblings++; |
311 | } | 323 | } |
@@ -355,8 +367,10 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
355 | * to the context list directly: | 367 | * to the context list directly: |
356 | */ | 368 | */ |
357 | list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { | 369 | list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { |
370 | struct list_head *list; | ||
358 | 371 | ||
359 | list_move_tail(&sibling->group_entry, &ctx->group_list); | 372 | list = ctx_group_list(event, ctx); |
373 | list_move_tail(&sibling->group_entry, list); | ||
360 | sibling->group_leader = sibling; | 374 | sibling->group_leader = sibling; |
361 | } | 375 | } |
362 | } | 376 | } |
@@ -1056,7 +1070,10 @@ void __perf_event_sched_out(struct perf_event_context *ctx, | |||
1056 | 1070 | ||
1057 | perf_disable(); | 1071 | perf_disable(); |
1058 | if (ctx->nr_active) { | 1072 | if (ctx->nr_active) { |
1059 | list_for_each_entry(event, &ctx->group_list, group_entry) | 1073 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) |
1074 | group_sched_out(event, cpuctx, ctx); | ||
1075 | |||
1076 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) | ||
1060 | group_sched_out(event, cpuctx, ctx); | 1077 | group_sched_out(event, cpuctx, ctx); |
1061 | } | 1078 | } |
1062 | perf_enable(); | 1079 | perf_enable(); |
@@ -1271,9 +1288,8 @@ __perf_event_sched_in(struct perf_event_context *ctx, | |||
1271 | * First go through the list and put on any pinned groups | 1288 | * First go through the list and put on any pinned groups |
1272 | * in order to give them the best chance of going on. | 1289 | * in order to give them the best chance of going on. |
1273 | */ | 1290 | */ |
1274 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1291 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { |
1275 | if (event->state <= PERF_EVENT_STATE_OFF || | 1292 | if (event->state <= PERF_EVENT_STATE_OFF) |
1276 | !event->attr.pinned) | ||
1277 | continue; | 1293 | continue; |
1278 | if (event->cpu != -1 && event->cpu != cpu) | 1294 | if (event->cpu != -1 && event->cpu != cpu) |
1279 | continue; | 1295 | continue; |
@@ -1291,15 +1307,10 @@ __perf_event_sched_in(struct perf_event_context *ctx, | |||
1291 | } | 1307 | } |
1292 | } | 1308 | } |
1293 | 1309 | ||
1294 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1310 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) { |
1295 | /* | 1311 | /* Ignore events in OFF or ERROR state */ |
1296 | * Ignore events in OFF or ERROR state, and | 1312 | if (event->state <= PERF_EVENT_STATE_OFF) |
1297 | * ignore pinned events since we did them already. | ||
1298 | */ | ||
1299 | if (event->state <= PERF_EVENT_STATE_OFF || | ||
1300 | event->attr.pinned) | ||
1301 | continue; | 1313 | continue; |
1302 | |||
1303 | /* | 1314 | /* |
1304 | * Listen to the 'cpu' scheduling filter constraint | 1315 | * Listen to the 'cpu' scheduling filter constraint |
1305 | * of events: | 1316 | * of events: |
@@ -1453,8 +1464,13 @@ static void rotate_ctx(struct perf_event_context *ctx) | |||
1453 | * Rotate the first entry last (works just fine for group events too): | 1464 | * Rotate the first entry last (works just fine for group events too): |
1454 | */ | 1465 | */ |
1455 | perf_disable(); | 1466 | perf_disable(); |
1456 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1467 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { |
1457 | list_move_tail(&event->group_entry, &ctx->group_list); | 1468 | list_move_tail(&event->group_entry, &ctx->pinned_groups); |
1469 | break; | ||
1470 | } | ||
1471 | |||
1472 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) { | ||
1473 | list_move_tail(&event->group_entry, &ctx->flexible_groups); | ||
1458 | break; | 1474 | break; |
1459 | } | 1475 | } |
1460 | perf_enable(); | 1476 | perf_enable(); |
@@ -1490,6 +1506,21 @@ void perf_event_task_tick(struct task_struct *curr) | |||
1490 | perf_event_task_sched_in(curr); | 1506 | perf_event_task_sched_in(curr); |
1491 | } | 1507 | } |
1492 | 1508 | ||
1509 | static int event_enable_on_exec(struct perf_event *event, | ||
1510 | struct perf_event_context *ctx) | ||
1511 | { | ||
1512 | if (!event->attr.enable_on_exec) | ||
1513 | return 0; | ||
1514 | |||
1515 | event->attr.enable_on_exec = 0; | ||
1516 | if (event->state >= PERF_EVENT_STATE_INACTIVE) | ||
1517 | return 0; | ||
1518 | |||
1519 | __perf_event_mark_enabled(event, ctx); | ||
1520 | |||
1521 | return 1; | ||
1522 | } | ||
1523 | |||
1493 | /* | 1524 | /* |
1494 | * Enable all of a task's events that have been marked enable-on-exec. | 1525 | * Enable all of a task's events that have been marked enable-on-exec. |
1495 | * This expects task == current. | 1526 | * This expects task == current. |
@@ -1500,6 +1531,7 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
1500 | struct perf_event *event; | 1531 | struct perf_event *event; |
1501 | unsigned long flags; | 1532 | unsigned long flags; |
1502 | int enabled = 0; | 1533 | int enabled = 0; |
1534 | int ret; | ||
1503 | 1535 | ||
1504 | local_irq_save(flags); | 1536 | local_irq_save(flags); |
1505 | ctx = task->perf_event_ctxp; | 1537 | ctx = task->perf_event_ctxp; |
@@ -1510,14 +1542,16 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
1510 | 1542 | ||
1511 | raw_spin_lock(&ctx->lock); | 1543 | raw_spin_lock(&ctx->lock); |
1512 | 1544 | ||
1513 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1545 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { |
1514 | if (!event->attr.enable_on_exec) | 1546 | ret = event_enable_on_exec(event, ctx); |
1515 | continue; | 1547 | if (ret) |
1516 | event->attr.enable_on_exec = 0; | 1548 | enabled = 1; |
1517 | if (event->state >= PERF_EVENT_STATE_INACTIVE) | 1549 | } |
1518 | continue; | 1550 | |
1519 | __perf_event_mark_enabled(event, ctx); | 1551 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) { |
1520 | enabled = 1; | 1552 | ret = event_enable_on_exec(event, ctx); |
1553 | if (ret) | ||
1554 | enabled = 1; | ||
1521 | } | 1555 | } |
1522 | 1556 | ||
1523 | /* | 1557 | /* |
@@ -1591,7 +1625,8 @@ __perf_event_init_context(struct perf_event_context *ctx, | |||
1591 | { | 1625 | { |
1592 | raw_spin_lock_init(&ctx->lock); | 1626 | raw_spin_lock_init(&ctx->lock); |
1593 | mutex_init(&ctx->mutex); | 1627 | mutex_init(&ctx->mutex); |
1594 | INIT_LIST_HEAD(&ctx->group_list); | 1628 | INIT_LIST_HEAD(&ctx->pinned_groups); |
1629 | INIT_LIST_HEAD(&ctx->flexible_groups); | ||
1595 | INIT_LIST_HEAD(&ctx->event_list); | 1630 | INIT_LIST_HEAD(&ctx->event_list); |
1596 | atomic_set(&ctx->refcount, 1); | 1631 | atomic_set(&ctx->refcount, 1); |
1597 | ctx->task = task; | 1632 | ctx->task = task; |
@@ -5032,7 +5067,11 @@ void perf_event_exit_task(struct task_struct *child) | |||
5032 | mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); | 5067 | mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); |
5033 | 5068 | ||
5034 | again: | 5069 | again: |
5035 | list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list, | 5070 | list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups, |
5071 | group_entry) | ||
5072 | __perf_event_exit_task(child_event, child_ctx, child); | ||
5073 | |||
5074 | list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups, | ||
5036 | group_entry) | 5075 | group_entry) |
5037 | __perf_event_exit_task(child_event, child_ctx, child); | 5076 | __perf_event_exit_task(child_event, child_ctx, child); |
5038 | 5077 | ||
@@ -5041,7 +5080,8 @@ again: | |||
5041 | * its siblings to the list, but we obtained 'tmp' before that which | 5080 | * its siblings to the list, but we obtained 'tmp' before that which |
5042 | * will still point to the list head terminating the iteration. | 5081 | * will still point to the list head terminating the iteration. |
5043 | */ | 5082 | */ |
5044 | if (!list_empty(&child_ctx->group_list)) | 5083 | if (!list_empty(&child_ctx->pinned_groups) || |
5084 | !list_empty(&child_ctx->flexible_groups)) | ||
5045 | goto again; | 5085 | goto again; |
5046 | 5086 | ||
5047 | mutex_unlock(&child_ctx->mutex); | 5087 | mutex_unlock(&child_ctx->mutex); |
@@ -5049,6 +5089,24 @@ again: | |||
5049 | put_ctx(child_ctx); | 5089 | put_ctx(child_ctx); |
5050 | } | 5090 | } |
5051 | 5091 | ||
5092 | static void perf_free_event(struct perf_event *event, | ||
5093 | struct perf_event_context *ctx) | ||
5094 | { | ||
5095 | struct perf_event *parent = event->parent; | ||
5096 | |||
5097 | if (WARN_ON_ONCE(!parent)) | ||
5098 | return; | ||
5099 | |||
5100 | mutex_lock(&parent->child_mutex); | ||
5101 | list_del_init(&event->child_list); | ||
5102 | mutex_unlock(&parent->child_mutex); | ||
5103 | |||
5104 | fput(parent->filp); | ||
5105 | |||
5106 | list_del_event(event, ctx); | ||
5107 | free_event(event); | ||
5108 | } | ||
5109 | |||
5052 | /* | 5110 | /* |
5053 | * free an unexposed, unused context as created by inheritance by | 5111 | * free an unexposed, unused context as created by inheritance by |
5054 | * init_task below, used by fork() in case of fail. | 5112 | * init_task below, used by fork() in case of fail. |
@@ -5063,36 +5121,70 @@ void perf_event_free_task(struct task_struct *task) | |||
5063 | 5121 | ||
5064 | mutex_lock(&ctx->mutex); | 5122 | mutex_lock(&ctx->mutex); |
5065 | again: | 5123 | again: |
5066 | list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) { | 5124 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) |
5067 | struct perf_event *parent = event->parent; | 5125 | perf_free_event(event, ctx); |
5068 | 5126 | ||
5069 | if (WARN_ON_ONCE(!parent)) | 5127 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, |
5070 | continue; | 5128 | group_entry) |
5129 | perf_free_event(event, ctx); | ||
5071 | 5130 | ||
5072 | mutex_lock(&parent->child_mutex); | 5131 | if (!list_empty(&ctx->pinned_groups) || |
5073 | list_del_init(&event->child_list); | 5132 | !list_empty(&ctx->flexible_groups)) |
5074 | mutex_unlock(&parent->child_mutex); | 5133 | goto again; |
5075 | 5134 | ||
5076 | fput(parent->filp); | 5135 | mutex_unlock(&ctx->mutex); |
5077 | 5136 | ||
5078 | list_del_event(event, ctx); | 5137 | put_ctx(ctx); |
5079 | free_event(event); | 5138 | } |
5139 | |||
5140 | static int | ||
5141 | inherit_task_group(struct perf_event *event, struct task_struct *parent, | ||
5142 | struct perf_event_context *parent_ctx, | ||
5143 | struct task_struct *child, | ||
5144 | int *inherited_all) | ||
5145 | { | ||
5146 | int ret; | ||
5147 | struct perf_event_context *child_ctx = child->perf_event_ctxp; | ||
5148 | |||
5149 | if (!event->attr.inherit) { | ||
5150 | *inherited_all = 0; | ||
5151 | return 0; | ||
5080 | } | 5152 | } |
5081 | 5153 | ||
5082 | if (!list_empty(&ctx->group_list)) | 5154 | if (!child_ctx) { |
5083 | goto again; | 5155 | /* |
5156 | * This is executed from the parent task context, so | ||
5157 | * inherit events that have been marked for cloning. | ||
5158 | * First allocate and initialize a context for the | ||
5159 | * child. | ||
5160 | */ | ||
5084 | 5161 | ||
5085 | mutex_unlock(&ctx->mutex); | 5162 | child_ctx = kzalloc(sizeof(struct perf_event_context), |
5163 | GFP_KERNEL); | ||
5164 | if (!child_ctx) | ||
5165 | return -ENOMEM; | ||
5086 | 5166 | ||
5087 | put_ctx(ctx); | 5167 | __perf_event_init_context(child_ctx, child); |
5168 | child->perf_event_ctxp = child_ctx; | ||
5169 | get_task_struct(child); | ||
5170 | } | ||
5171 | |||
5172 | ret = inherit_group(event, parent, parent_ctx, | ||
5173 | child, child_ctx); | ||
5174 | |||
5175 | if (ret) | ||
5176 | *inherited_all = 0; | ||
5177 | |||
5178 | return ret; | ||
5088 | } | 5179 | } |
5089 | 5180 | ||
5181 | |||
5090 | /* | 5182 | /* |
5091 | * Initialize the perf_event context in task_struct | 5183 | * Initialize the perf_event context in task_struct |
5092 | */ | 5184 | */ |
5093 | int perf_event_init_task(struct task_struct *child) | 5185 | int perf_event_init_task(struct task_struct *child) |
5094 | { | 5186 | { |
5095 | struct perf_event_context *child_ctx = NULL, *parent_ctx; | 5187 | struct perf_event_context *child_ctx, *parent_ctx; |
5096 | struct perf_event_context *cloned_ctx; | 5188 | struct perf_event_context *cloned_ctx; |
5097 | struct perf_event *event; | 5189 | struct perf_event *event; |
5098 | struct task_struct *parent = current; | 5190 | struct task_struct *parent = current; |
@@ -5130,41 +5222,22 @@ int perf_event_init_task(struct task_struct *child) | |||
5130 | * We dont have to disable NMIs - we are only looking at | 5222 | * We dont have to disable NMIs - we are only looking at |
5131 | * the list, not manipulating it: | 5223 | * the list, not manipulating it: |
5132 | */ | 5224 | */ |
5133 | list_for_each_entry(event, &parent_ctx->group_list, group_entry) { | 5225 | list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { |
5134 | 5226 | ret = inherit_task_group(event, parent, parent_ctx, child, | |
5135 | if (!event->attr.inherit) { | 5227 | &inherited_all); |
5136 | inherited_all = 0; | 5228 | if (ret) |
5137 | continue; | 5229 | break; |
5138 | } | 5230 | } |
5139 | |||
5140 | if (!child->perf_event_ctxp) { | ||
5141 | /* | ||
5142 | * This is executed from the parent task context, so | ||
5143 | * inherit events that have been marked for cloning. | ||
5144 | * First allocate and initialize a context for the | ||
5145 | * child. | ||
5146 | */ | ||
5147 | |||
5148 | child_ctx = kzalloc(sizeof(struct perf_event_context), | ||
5149 | GFP_KERNEL); | ||
5150 | if (!child_ctx) { | ||
5151 | ret = -ENOMEM; | ||
5152 | break; | ||
5153 | } | ||
5154 | |||
5155 | __perf_event_init_context(child_ctx, child); | ||
5156 | child->perf_event_ctxp = child_ctx; | ||
5157 | get_task_struct(child); | ||
5158 | } | ||
5159 | 5231 | ||
5160 | ret = inherit_group(event, parent, parent_ctx, | 5232 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { |
5161 | child, child_ctx); | 5233 | ret = inherit_task_group(event, parent, parent_ctx, child, |
5162 | if (ret) { | 5234 | &inherited_all); |
5163 | inherited_all = 0; | 5235 | if (ret) |
5164 | break; | 5236 | break; |
5165 | } | ||
5166 | } | 5237 | } |
5167 | 5238 | ||
5239 | child_ctx = child->perf_event_ctxp; | ||
5240 | |||
5168 | if (child_ctx && inherited_all) { | 5241 | if (child_ctx && inherited_all) { |
5169 | /* | 5242 | /* |
5170 | * Mark the child context as a clone of the parent | 5243 | * Mark the child context as a clone of the parent |
@@ -5213,7 +5286,9 @@ static void __perf_event_exit_cpu(void *info) | |||
5213 | struct perf_event_context *ctx = &cpuctx->ctx; | 5286 | struct perf_event_context *ctx = &cpuctx->ctx; |
5214 | struct perf_event *event, *tmp; | 5287 | struct perf_event *event, *tmp; |
5215 | 5288 | ||
5216 | list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) | 5289 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) |
5290 | __perf_event_remove_from_context(event); | ||
5291 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) | ||
5217 | __perf_event_remove_from_context(event); | 5292 | __perf_event_remove_from_context(event); |
5218 | } | 5293 | } |
5219 | static void perf_event_exit_cpu(int cpu) | 5294 | static void perf_event_exit_cpu(int cpu) |