diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-09-02 10:50:03 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-09-09 14:46:33 -0400 |
commit | 8dc85d547285668e509f86c177bcd4ea055bcaaf (patch) | |
tree | 5474dd6c31952b342f0a3f1bcec09e3049129264 /kernel/perf_event.c | |
parent | eb184479874238393ac186c4e054d24311c34aaa (diff) |
perf: Multiple task contexts
Provide the infrastructure for multiple task contexts.
A more flexible approach would have resulted in more pointer chases
in the scheduling hot-paths. This approach has the limitation of a
static number of task contexts.
Since I expect most external PMUs to be system wide, or at least node
wide (as per the intel uncore unit) they won't actually need a task
context.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r-- | kernel/perf_event.c | 336 |
1 files changed, 231 insertions, 105 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 13d98d756347..7223ea875861 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -148,13 +148,13 @@ static u64 primary_event_id(struct perf_event *event) | |||
148 | * the context could get moved to another task. | 148 | * the context could get moved to another task. |
149 | */ | 149 | */ |
150 | static struct perf_event_context * | 150 | static struct perf_event_context * |
151 | perf_lock_task_context(struct task_struct *task, unsigned long *flags) | 151 | perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags) |
152 | { | 152 | { |
153 | struct perf_event_context *ctx; | 153 | struct perf_event_context *ctx; |
154 | 154 | ||
155 | rcu_read_lock(); | 155 | rcu_read_lock(); |
156 | retry: | 156 | retry: |
157 | ctx = rcu_dereference(task->perf_event_ctxp); | 157 | ctx = rcu_dereference(task->perf_event_ctxp[ctxn]); |
158 | if (ctx) { | 158 | if (ctx) { |
159 | /* | 159 | /* |
160 | * If this context is a clone of another, it might | 160 | * If this context is a clone of another, it might |
@@ -167,7 +167,7 @@ retry: | |||
167 | * can't get swapped on us any more. | 167 | * can't get swapped on us any more. |
168 | */ | 168 | */ |
169 | raw_spin_lock_irqsave(&ctx->lock, *flags); | 169 | raw_spin_lock_irqsave(&ctx->lock, *flags); |
170 | if (ctx != rcu_dereference(task->perf_event_ctxp)) { | 170 | if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) { |
171 | raw_spin_unlock_irqrestore(&ctx->lock, *flags); | 171 | raw_spin_unlock_irqrestore(&ctx->lock, *flags); |
172 | goto retry; | 172 | goto retry; |
173 | } | 173 | } |
@@ -186,12 +186,13 @@ retry: | |||
186 | * can't get swapped to another task. This also increments its | 186 | * can't get swapped to another task. This also increments its |
187 | * reference count so that the context can't get freed. | 187 | * reference count so that the context can't get freed. |
188 | */ | 188 | */ |
189 | static struct perf_event_context *perf_pin_task_context(struct task_struct *task) | 189 | static struct perf_event_context * |
190 | perf_pin_task_context(struct task_struct *task, int ctxn) | ||
190 | { | 191 | { |
191 | struct perf_event_context *ctx; | 192 | struct perf_event_context *ctx; |
192 | unsigned long flags; | 193 | unsigned long flags; |
193 | 194 | ||
194 | ctx = perf_lock_task_context(task, &flags); | 195 | ctx = perf_lock_task_context(task, ctxn, &flags); |
195 | if (ctx) { | 196 | if (ctx) { |
196 | ++ctx->pin_count; | 197 | ++ctx->pin_count; |
197 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | 198 | raw_spin_unlock_irqrestore(&ctx->lock, flags); |
@@ -1179,28 +1180,15 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, | |||
1179 | } | 1180 | } |
1180 | } | 1181 | } |
1181 | 1182 | ||
1182 | /* | 1183 | void perf_event_context_sched_out(struct task_struct *task, int ctxn, |
1183 | * Called from scheduler to remove the events of the current task, | 1184 | struct task_struct *next) |
1184 | * with interrupts disabled. | ||
1185 | * | ||
1186 | * We stop each event and update the event value in event->count. | ||
1187 | * | ||
1188 | * This does not protect us against NMI, but disable() | ||
1189 | * sets the disabled bit in the control field of event _before_ | ||
1190 | * accessing the event control register. If a NMI hits, then it will | ||
1191 | * not restart the event. | ||
1192 | */ | ||
1193 | void perf_event_task_sched_out(struct task_struct *task, | ||
1194 | struct task_struct *next) | ||
1195 | { | 1185 | { |
1196 | struct perf_event_context *ctx = task->perf_event_ctxp; | 1186 | struct perf_event_context *ctx = task->perf_event_ctxp[ctxn]; |
1197 | struct perf_event_context *next_ctx; | 1187 | struct perf_event_context *next_ctx; |
1198 | struct perf_event_context *parent; | 1188 | struct perf_event_context *parent; |
1199 | struct perf_cpu_context *cpuctx; | 1189 | struct perf_cpu_context *cpuctx; |
1200 | int do_switch = 1; | 1190 | int do_switch = 1; |
1201 | 1191 | ||
1202 | perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); | ||
1203 | |||
1204 | if (likely(!ctx)) | 1192 | if (likely(!ctx)) |
1205 | return; | 1193 | return; |
1206 | 1194 | ||
@@ -1210,7 +1198,7 @@ void perf_event_task_sched_out(struct task_struct *task, | |||
1210 | 1198 | ||
1211 | rcu_read_lock(); | 1199 | rcu_read_lock(); |
1212 | parent = rcu_dereference(ctx->parent_ctx); | 1200 | parent = rcu_dereference(ctx->parent_ctx); |
1213 | next_ctx = next->perf_event_ctxp; | 1201 | next_ctx = next->perf_event_ctxp[ctxn]; |
1214 | if (parent && next_ctx && | 1202 | if (parent && next_ctx && |
1215 | rcu_dereference(next_ctx->parent_ctx) == parent) { | 1203 | rcu_dereference(next_ctx->parent_ctx) == parent) { |
1216 | /* | 1204 | /* |
@@ -1229,8 +1217,8 @@ void perf_event_task_sched_out(struct task_struct *task, | |||
1229 | * XXX do we need a memory barrier of sorts | 1217 | * XXX do we need a memory barrier of sorts |
1230 | * wrt to rcu_dereference() of perf_event_ctxp | 1218 | * wrt to rcu_dereference() of perf_event_ctxp |
1231 | */ | 1219 | */ |
1232 | task->perf_event_ctxp = next_ctx; | 1220 | task->perf_event_ctxp[ctxn] = next_ctx; |
1233 | next->perf_event_ctxp = ctx; | 1221 | next->perf_event_ctxp[ctxn] = ctx; |
1234 | ctx->task = next; | 1222 | ctx->task = next; |
1235 | next_ctx->task = task; | 1223 | next_ctx->task = task; |
1236 | do_switch = 0; | 1224 | do_switch = 0; |
@@ -1248,6 +1236,31 @@ void perf_event_task_sched_out(struct task_struct *task, | |||
1248 | } | 1236 | } |
1249 | } | 1237 | } |
1250 | 1238 | ||
1239 | #define for_each_task_context_nr(ctxn) \ | ||
1240 | for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++) | ||
1241 | |||
1242 | /* | ||
1243 | * Called from scheduler to remove the events of the current task, | ||
1244 | * with interrupts disabled. | ||
1245 | * | ||
1246 | * We stop each event and update the event value in event->count. | ||
1247 | * | ||
1248 | * This does not protect us against NMI, but disable() | ||
1249 | * sets the disabled bit in the control field of event _before_ | ||
1250 | * accessing the event control register. If a NMI hits, then it will | ||
1251 | * not restart the event. | ||
1252 | */ | ||
1253 | void perf_event_task_sched_out(struct task_struct *task, | ||
1254 | struct task_struct *next) | ||
1255 | { | ||
1256 | int ctxn; | ||
1257 | |||
1258 | perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); | ||
1259 | |||
1260 | for_each_task_context_nr(ctxn) | ||
1261 | perf_event_context_sched_out(task, ctxn, next); | ||
1262 | } | ||
1263 | |||
1251 | static void task_ctx_sched_out(struct perf_event_context *ctx, | 1264 | static void task_ctx_sched_out(struct perf_event_context *ctx, |
1252 | enum event_type_t event_type) | 1265 | enum event_type_t event_type) |
1253 | { | 1266 | { |
@@ -1366,38 +1379,23 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | |||
1366 | ctx_sched_in(ctx, cpuctx, event_type); | 1379 | ctx_sched_in(ctx, cpuctx, event_type); |
1367 | } | 1380 | } |
1368 | 1381 | ||
1369 | static void task_ctx_sched_in(struct task_struct *task, | 1382 | static void task_ctx_sched_in(struct perf_event_context *ctx, |
1370 | enum event_type_t event_type) | 1383 | enum event_type_t event_type) |
1371 | { | 1384 | { |
1372 | struct perf_event_context *ctx = task->perf_event_ctxp; | 1385 | struct perf_cpu_context *cpuctx; |
1373 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | ||
1374 | 1386 | ||
1375 | if (likely(!ctx)) | 1387 | cpuctx = __get_cpu_context(ctx); |
1376 | return; | ||
1377 | if (cpuctx->task_ctx == ctx) | 1388 | if (cpuctx->task_ctx == ctx) |
1378 | return; | 1389 | return; |
1390 | |||
1379 | ctx_sched_in(ctx, cpuctx, event_type); | 1391 | ctx_sched_in(ctx, cpuctx, event_type); |
1380 | cpuctx->task_ctx = ctx; | 1392 | cpuctx->task_ctx = ctx; |
1381 | } | 1393 | } |
1382 | /* | 1394 | |
1383 | * Called from scheduler to add the events of the current task | 1395 | void perf_event_context_sched_in(struct perf_event_context *ctx) |
1384 | * with interrupts disabled. | ||
1385 | * | ||
1386 | * We restore the event value and then enable it. | ||
1387 | * | ||
1388 | * This does not protect us against NMI, but enable() | ||
1389 | * sets the enabled bit in the control field of event _before_ | ||
1390 | * accessing the event control register. If a NMI hits, then it will | ||
1391 | * keep the event running. | ||
1392 | */ | ||
1393 | void perf_event_task_sched_in(struct task_struct *task) | ||
1394 | { | 1396 | { |
1395 | struct perf_event_context *ctx = task->perf_event_ctxp; | ||
1396 | struct perf_cpu_context *cpuctx; | 1397 | struct perf_cpu_context *cpuctx; |
1397 | 1398 | ||
1398 | if (likely(!ctx)) | ||
1399 | return; | ||
1400 | |||
1401 | cpuctx = __get_cpu_context(ctx); | 1399 | cpuctx = __get_cpu_context(ctx); |
1402 | if (cpuctx->task_ctx == ctx) | 1400 | if (cpuctx->task_ctx == ctx) |
1403 | return; | 1401 | return; |
@@ -1422,6 +1420,31 @@ void perf_event_task_sched_in(struct task_struct *task) | |||
1422 | perf_pmu_rotate_start(ctx->pmu); | 1420 | perf_pmu_rotate_start(ctx->pmu); |
1423 | } | 1421 | } |
1424 | 1422 | ||
1423 | /* | ||
1424 | * Called from scheduler to add the events of the current task | ||
1425 | * with interrupts disabled. | ||
1426 | * | ||
1427 | * We restore the event value and then enable it. | ||
1428 | * | ||
1429 | * This does not protect us against NMI, but enable() | ||
1430 | * sets the enabled bit in the control field of event _before_ | ||
1431 | * accessing the event control register. If a NMI hits, then it will | ||
1432 | * keep the event running. | ||
1433 | */ | ||
1434 | void perf_event_task_sched_in(struct task_struct *task) | ||
1435 | { | ||
1436 | struct perf_event_context *ctx; | ||
1437 | int ctxn; | ||
1438 | |||
1439 | for_each_task_context_nr(ctxn) { | ||
1440 | ctx = task->perf_event_ctxp[ctxn]; | ||
1441 | if (likely(!ctx)) | ||
1442 | continue; | ||
1443 | |||
1444 | perf_event_context_sched_in(ctx); | ||
1445 | } | ||
1446 | } | ||
1447 | |||
1425 | #define MAX_INTERRUPTS (~0ULL) | 1448 | #define MAX_INTERRUPTS (~0ULL) |
1426 | 1449 | ||
1427 | static void perf_log_throttle(struct perf_event *event, int enable); | 1450 | static void perf_log_throttle(struct perf_event *event, int enable); |
@@ -1588,7 +1611,7 @@ static enum hrtimer_restart perf_event_context_tick(struct hrtimer *timer) | |||
1588 | { | 1611 | { |
1589 | enum hrtimer_restart restart = HRTIMER_NORESTART; | 1612 | enum hrtimer_restart restart = HRTIMER_NORESTART; |
1590 | struct perf_cpu_context *cpuctx; | 1613 | struct perf_cpu_context *cpuctx; |
1591 | struct perf_event_context *ctx; | 1614 | struct perf_event_context *ctx = NULL; |
1592 | int rotate = 0; | 1615 | int rotate = 0; |
1593 | 1616 | ||
1594 | cpuctx = container_of(timer, struct perf_cpu_context, timer); | 1617 | cpuctx = container_of(timer, struct perf_cpu_context, timer); |
@@ -1599,7 +1622,7 @@ static enum hrtimer_restart perf_event_context_tick(struct hrtimer *timer) | |||
1599 | rotate = 1; | 1622 | rotate = 1; |
1600 | } | 1623 | } |
1601 | 1624 | ||
1602 | ctx = current->perf_event_ctxp; | 1625 | ctx = cpuctx->task_ctx; |
1603 | if (ctx && ctx->nr_events) { | 1626 | if (ctx && ctx->nr_events) { |
1604 | restart = HRTIMER_RESTART; | 1627 | restart = HRTIMER_RESTART; |
1605 | if (ctx->nr_events != ctx->nr_active) | 1628 | if (ctx->nr_events != ctx->nr_active) |
@@ -1623,7 +1646,7 @@ static enum hrtimer_restart perf_event_context_tick(struct hrtimer *timer) | |||
1623 | 1646 | ||
1624 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); | 1647 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); |
1625 | if (ctx) | 1648 | if (ctx) |
1626 | task_ctx_sched_in(current, EVENT_FLEXIBLE); | 1649 | task_ctx_sched_in(ctx, EVENT_FLEXIBLE); |
1627 | 1650 | ||
1628 | done: | 1651 | done: |
1629 | hrtimer_forward_now(timer, ns_to_ktime(cpuctx->timer_interval)); | 1652 | hrtimer_forward_now(timer, ns_to_ktime(cpuctx->timer_interval)); |
@@ -1650,20 +1673,18 @@ static int event_enable_on_exec(struct perf_event *event, | |||
1650 | * Enable all of a task's events that have been marked enable-on-exec. | 1673 | * Enable all of a task's events that have been marked enable-on-exec. |
1651 | * This expects task == current. | 1674 | * This expects task == current. |
1652 | */ | 1675 | */ |
1653 | static void perf_event_enable_on_exec(struct task_struct *task) | 1676 | static void perf_event_enable_on_exec(struct perf_event_context *ctx) |
1654 | { | 1677 | { |
1655 | struct perf_event_context *ctx; | ||
1656 | struct perf_event *event; | 1678 | struct perf_event *event; |
1657 | unsigned long flags; | 1679 | unsigned long flags; |
1658 | int enabled = 0; | 1680 | int enabled = 0; |
1659 | int ret; | 1681 | int ret; |
1660 | 1682 | ||
1661 | local_irq_save(flags); | 1683 | local_irq_save(flags); |
1662 | ctx = task->perf_event_ctxp; | ||
1663 | if (!ctx || !ctx->nr_events) | 1684 | if (!ctx || !ctx->nr_events) |
1664 | goto out; | 1685 | goto out; |
1665 | 1686 | ||
1666 | __perf_event_task_sched_out(ctx); | 1687 | task_ctx_sched_out(ctx, EVENT_ALL); |
1667 | 1688 | ||
1668 | raw_spin_lock(&ctx->lock); | 1689 | raw_spin_lock(&ctx->lock); |
1669 | 1690 | ||
@@ -1687,7 +1708,7 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
1687 | 1708 | ||
1688 | raw_spin_unlock(&ctx->lock); | 1709 | raw_spin_unlock(&ctx->lock); |
1689 | 1710 | ||
1690 | perf_event_task_sched_in(task); | 1711 | perf_event_context_sched_in(ctx); |
1691 | out: | 1712 | out: |
1692 | local_irq_restore(flags); | 1713 | local_irq_restore(flags); |
1693 | } | 1714 | } |
@@ -1995,7 +2016,7 @@ find_get_context(struct pmu *pmu, pid_t pid, int cpu) | |||
1995 | struct perf_cpu_context *cpuctx; | 2016 | struct perf_cpu_context *cpuctx; |
1996 | struct task_struct *task; | 2017 | struct task_struct *task; |
1997 | unsigned long flags; | 2018 | unsigned long flags; |
1998 | int err; | 2019 | int ctxn, err; |
1999 | 2020 | ||
2000 | if (pid == -1 && cpu != -1) { | 2021 | if (pid == -1 && cpu != -1) { |
2001 | /* Must be root to operate on a CPU event: */ | 2022 | /* Must be root to operate on a CPU event: */ |
@@ -2044,8 +2065,13 @@ find_get_context(struct pmu *pmu, pid_t pid, int cpu) | |||
2044 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | 2065 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) |
2045 | goto errout; | 2066 | goto errout; |
2046 | 2067 | ||
2068 | err = -EINVAL; | ||
2069 | ctxn = pmu->task_ctx_nr; | ||
2070 | if (ctxn < 0) | ||
2071 | goto errout; | ||
2072 | |||
2047 | retry: | 2073 | retry: |
2048 | ctx = perf_lock_task_context(task, &flags); | 2074 | ctx = perf_lock_task_context(task, ctxn, &flags); |
2049 | if (ctx) { | 2075 | if (ctx) { |
2050 | unclone_ctx(ctx); | 2076 | unclone_ctx(ctx); |
2051 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | 2077 | raw_spin_unlock_irqrestore(&ctx->lock, flags); |
@@ -2059,7 +2085,7 @@ retry: | |||
2059 | 2085 | ||
2060 | get_ctx(ctx); | 2086 | get_ctx(ctx); |
2061 | 2087 | ||
2062 | if (cmpxchg(&task->perf_event_ctxp, NULL, ctx)) { | 2088 | if (cmpxchg(&task->perf_event_ctxp[ctxn], NULL, ctx)) { |
2063 | /* | 2089 | /* |
2064 | * We raced with some other task; use | 2090 | * We raced with some other task; use |
2065 | * the context they set. | 2091 | * the context they set. |
@@ -3773,19 +3799,26 @@ static void perf_event_task_ctx(struct perf_event_context *ctx, | |||
3773 | 3799 | ||
3774 | static void perf_event_task_event(struct perf_task_event *task_event) | 3800 | static void perf_event_task_event(struct perf_task_event *task_event) |
3775 | { | 3801 | { |
3776 | struct perf_event_context *ctx = task_event->task_ctx; | ||
3777 | struct perf_cpu_context *cpuctx; | 3802 | struct perf_cpu_context *cpuctx; |
3803 | struct perf_event_context *ctx; | ||
3778 | struct pmu *pmu; | 3804 | struct pmu *pmu; |
3805 | int ctxn; | ||
3779 | 3806 | ||
3780 | rcu_read_lock_sched(); | 3807 | rcu_read_lock_sched(); |
3781 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 3808 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
3782 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | 3809 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); |
3783 | perf_event_task_ctx(&cpuctx->ctx, task_event); | 3810 | perf_event_task_ctx(&cpuctx->ctx, task_event); |
3811 | |||
3812 | ctx = task_event->task_ctx; | ||
3813 | if (!ctx) { | ||
3814 | ctxn = pmu->task_ctx_nr; | ||
3815 | if (ctxn < 0) | ||
3816 | continue; | ||
3817 | ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); | ||
3818 | } | ||
3819 | if (ctx) | ||
3820 | perf_event_task_ctx(ctx, task_event); | ||
3784 | } | 3821 | } |
3785 | if (!ctx) | ||
3786 | ctx = rcu_dereference(current->perf_event_ctxp); | ||
3787 | if (ctx) | ||
3788 | perf_event_task_ctx(ctx, task_event); | ||
3789 | rcu_read_unlock_sched(); | 3822 | rcu_read_unlock_sched(); |
3790 | } | 3823 | } |
3791 | 3824 | ||
@@ -3890,9 +3923,10 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
3890 | { | 3923 | { |
3891 | struct perf_cpu_context *cpuctx; | 3924 | struct perf_cpu_context *cpuctx; |
3892 | struct perf_event_context *ctx; | 3925 | struct perf_event_context *ctx; |
3926 | char comm[TASK_COMM_LEN]; | ||
3893 | unsigned int size; | 3927 | unsigned int size; |
3894 | struct pmu *pmu; | 3928 | struct pmu *pmu; |
3895 | char comm[TASK_COMM_LEN]; | 3929 | int ctxn; |
3896 | 3930 | ||
3897 | memset(comm, 0, sizeof(comm)); | 3931 | memset(comm, 0, sizeof(comm)); |
3898 | strlcpy(comm, comm_event->task->comm, sizeof(comm)); | 3932 | strlcpy(comm, comm_event->task->comm, sizeof(comm)); |
@@ -3907,19 +3941,31 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
3907 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 3941 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
3908 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | 3942 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); |
3909 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); | 3943 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); |
3944 | |||
3945 | ctxn = pmu->task_ctx_nr; | ||
3946 | if (ctxn < 0) | ||
3947 | continue; | ||
3948 | |||
3949 | ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); | ||
3950 | if (ctx) | ||
3951 | perf_event_comm_ctx(ctx, comm_event); | ||
3910 | } | 3952 | } |
3911 | ctx = rcu_dereference(current->perf_event_ctxp); | ||
3912 | if (ctx) | ||
3913 | perf_event_comm_ctx(ctx, comm_event); | ||
3914 | rcu_read_unlock_sched(); | 3953 | rcu_read_unlock_sched(); |
3915 | } | 3954 | } |
3916 | 3955 | ||
3917 | void perf_event_comm(struct task_struct *task) | 3956 | void perf_event_comm(struct task_struct *task) |
3918 | { | 3957 | { |
3919 | struct perf_comm_event comm_event; | 3958 | struct perf_comm_event comm_event; |
3959 | struct perf_event_context *ctx; | ||
3960 | int ctxn; | ||
3920 | 3961 | ||
3921 | if (task->perf_event_ctxp) | 3962 | for_each_task_context_nr(ctxn) { |
3922 | perf_event_enable_on_exec(task); | 3963 | ctx = task->perf_event_ctxp[ctxn]; |
3964 | if (!ctx) | ||
3965 | continue; | ||
3966 | |||
3967 | perf_event_enable_on_exec(ctx); | ||
3968 | } | ||
3923 | 3969 | ||
3924 | if (!atomic_read(&nr_comm_events)) | 3970 | if (!atomic_read(&nr_comm_events)) |
3925 | return; | 3971 | return; |
@@ -4022,6 +4068,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) | |||
4022 | char *buf = NULL; | 4068 | char *buf = NULL; |
4023 | const char *name; | 4069 | const char *name; |
4024 | struct pmu *pmu; | 4070 | struct pmu *pmu; |
4071 | int ctxn; | ||
4025 | 4072 | ||
4026 | memset(tmp, 0, sizeof(tmp)); | 4073 | memset(tmp, 0, sizeof(tmp)); |
4027 | 4074 | ||
@@ -4078,10 +4125,17 @@ got_name: | |||
4078 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | 4125 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); |
4079 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, | 4126 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, |
4080 | vma->vm_flags & VM_EXEC); | 4127 | vma->vm_flags & VM_EXEC); |
4128 | |||
4129 | ctxn = pmu->task_ctx_nr; | ||
4130 | if (ctxn < 0) | ||
4131 | continue; | ||
4132 | |||
4133 | ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); | ||
4134 | if (ctx) { | ||
4135 | perf_event_mmap_ctx(ctx, mmap_event, | ||
4136 | vma->vm_flags & VM_EXEC); | ||
4137 | } | ||
4081 | } | 4138 | } |
4082 | ctx = rcu_dereference(current->perf_event_ctxp); | ||
4083 | if (ctx) | ||
4084 | perf_event_mmap_ctx(ctx, mmap_event, vma->vm_flags & VM_EXEC); | ||
4085 | rcu_read_unlock_sched(); | 4139 | rcu_read_unlock_sched(); |
4086 | 4140 | ||
4087 | kfree(buf); | 4141 | kfree(buf); |
@@ -5042,6 +5096,43 @@ static void perf_pmu_cancel_txn(struct pmu *pmu) | |||
5042 | perf_pmu_enable(pmu); | 5096 | perf_pmu_enable(pmu); |
5043 | } | 5097 | } |
5044 | 5098 | ||
5099 | /* | ||
5100 | * Ensures all contexts with the same task_ctx_nr have the same | ||
5101 | * pmu_cpu_context too. | ||
5102 | */ | ||
5103 | static void *find_pmu_context(int ctxn) | ||
5104 | { | ||
5105 | struct pmu *pmu; | ||
5106 | |||
5107 | if (ctxn < 0) | ||
5108 | return NULL; | ||
5109 | |||
5110 | list_for_each_entry(pmu, &pmus, entry) { | ||
5111 | if (pmu->task_ctx_nr == ctxn) | ||
5112 | return pmu->pmu_cpu_context; | ||
5113 | } | ||
5114 | |||
5115 | return NULL; | ||
5116 | } | ||
5117 | |||
5118 | static void free_pmu_context(void * __percpu cpu_context) | ||
5119 | { | ||
5120 | struct pmu *pmu; | ||
5121 | |||
5122 | mutex_lock(&pmus_lock); | ||
5123 | /* | ||
5124 | * Like a real lame refcount. | ||
5125 | */ | ||
5126 | list_for_each_entry(pmu, &pmus, entry) { | ||
5127 | if (pmu->pmu_cpu_context == cpu_context) | ||
5128 | goto out; | ||
5129 | } | ||
5130 | |||
5131 | free_percpu(cpu_context); | ||
5132 | out: | ||
5133 | mutex_unlock(&pmus_lock); | ||
5134 | } | ||
5135 | |||
5045 | int perf_pmu_register(struct pmu *pmu) | 5136 | int perf_pmu_register(struct pmu *pmu) |
5046 | { | 5137 | { |
5047 | int cpu, ret; | 5138 | int cpu, ret; |
@@ -5052,6 +5143,10 @@ int perf_pmu_register(struct pmu *pmu) | |||
5052 | if (!pmu->pmu_disable_count) | 5143 | if (!pmu->pmu_disable_count) |
5053 | goto unlock; | 5144 | goto unlock; |
5054 | 5145 | ||
5146 | pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr); | ||
5147 | if (pmu->pmu_cpu_context) | ||
5148 | goto got_cpu_context; | ||
5149 | |||
5055 | pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); | 5150 | pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); |
5056 | if (!pmu->pmu_cpu_context) | 5151 | if (!pmu->pmu_cpu_context) |
5057 | goto free_pdc; | 5152 | goto free_pdc; |
@@ -5067,6 +5162,7 @@ int perf_pmu_register(struct pmu *pmu) | |||
5067 | cpuctx->timer.function = perf_event_context_tick; | 5162 | cpuctx->timer.function = perf_event_context_tick; |
5068 | } | 5163 | } |
5069 | 5164 | ||
5165 | got_cpu_context: | ||
5070 | if (!pmu->start_txn) { | 5166 | if (!pmu->start_txn) { |
5071 | if (pmu->pmu_enable) { | 5167 | if (pmu->pmu_enable) { |
5072 | /* | 5168 | /* |
@@ -5114,7 +5210,7 @@ void perf_pmu_unregister(struct pmu *pmu) | |||
5114 | synchronize_srcu(&pmus_srcu); | 5210 | synchronize_srcu(&pmus_srcu); |
5115 | 5211 | ||
5116 | free_percpu(pmu->pmu_disable_count); | 5212 | free_percpu(pmu->pmu_disable_count); |
5117 | free_percpu(pmu->pmu_cpu_context); | 5213 | free_pmu_context(pmu->pmu_cpu_context); |
5118 | } | 5214 | } |
5119 | 5215 | ||
5120 | struct pmu *perf_init_event(struct perf_event *event) | 5216 | struct pmu *perf_init_event(struct perf_event *event) |
@@ -5628,16 +5724,13 @@ __perf_event_exit_task(struct perf_event *child_event, | |||
5628 | } | 5724 | } |
5629 | } | 5725 | } |
5630 | 5726 | ||
5631 | /* | 5727 | static void perf_event_exit_task_context(struct task_struct *child, int ctxn) |
5632 | * When a child task exits, feed back event values to parent events. | ||
5633 | */ | ||
5634 | void perf_event_exit_task(struct task_struct *child) | ||
5635 | { | 5728 | { |
5636 | struct perf_event *child_event, *tmp; | 5729 | struct perf_event *child_event, *tmp; |
5637 | struct perf_event_context *child_ctx; | 5730 | struct perf_event_context *child_ctx; |
5638 | unsigned long flags; | 5731 | unsigned long flags; |
5639 | 5732 | ||
5640 | if (likely(!child->perf_event_ctxp)) { | 5733 | if (likely(!child->perf_event_ctxp[ctxn])) { |
5641 | perf_event_task(child, NULL, 0); | 5734 | perf_event_task(child, NULL, 0); |
5642 | return; | 5735 | return; |
5643 | } | 5736 | } |
@@ -5649,7 +5742,7 @@ void perf_event_exit_task(struct task_struct *child) | |||
5649 | * scheduled, so we are now safe from rescheduling changing | 5742 | * scheduled, so we are now safe from rescheduling changing |
5650 | * our context. | 5743 | * our context. |
5651 | */ | 5744 | */ |
5652 | child_ctx = child->perf_event_ctxp; | 5745 | child_ctx = child->perf_event_ctxp[ctxn]; |
5653 | __perf_event_task_sched_out(child_ctx); | 5746 | __perf_event_task_sched_out(child_ctx); |
5654 | 5747 | ||
5655 | /* | 5748 | /* |
@@ -5658,7 +5751,7 @@ void perf_event_exit_task(struct task_struct *child) | |||
5658 | * incremented the context's refcount before we do put_ctx below. | 5751 | * incremented the context's refcount before we do put_ctx below. |
5659 | */ | 5752 | */ |
5660 | raw_spin_lock(&child_ctx->lock); | 5753 | raw_spin_lock(&child_ctx->lock); |
5661 | child->perf_event_ctxp = NULL; | 5754 | child->perf_event_ctxp[ctxn] = NULL; |
5662 | /* | 5755 | /* |
5663 | * If this context is a clone; unclone it so it can't get | 5756 | * If this context is a clone; unclone it so it can't get |
5664 | * swapped to another process while we're removing all | 5757 | * swapped to another process while we're removing all |
@@ -5711,6 +5804,17 @@ again: | |||
5711 | put_ctx(child_ctx); | 5804 | put_ctx(child_ctx); |
5712 | } | 5805 | } |
5713 | 5806 | ||
5807 | /* | ||
5808 | * When a child task exits, feed back event values to parent events. | ||
5809 | */ | ||
5810 | void perf_event_exit_task(struct task_struct *child) | ||
5811 | { | ||
5812 | int ctxn; | ||
5813 | |||
5814 | for_each_task_context_nr(ctxn) | ||
5815 | perf_event_exit_task_context(child, ctxn); | ||
5816 | } | ||
5817 | |||
5714 | static void perf_free_event(struct perf_event *event, | 5818 | static void perf_free_event(struct perf_event *event, |
5715 | struct perf_event_context *ctx) | 5819 | struct perf_event_context *ctx) |
5716 | { | 5820 | { |
@@ -5732,32 +5836,37 @@ static void perf_free_event(struct perf_event *event, | |||
5732 | 5836 | ||
5733 | /* | 5837 | /* |
5734 | * free an unexposed, unused context as created by inheritance by | 5838 | * free an unexposed, unused context as created by inheritance by |
5735 | * init_task below, used by fork() in case of fail. | 5839 | * perf_event_init_task below, used by fork() in case of fail. |
5736 | */ | 5840 | */ |
5737 | void perf_event_free_task(struct task_struct *task) | 5841 | void perf_event_free_task(struct task_struct *task) |
5738 | { | 5842 | { |
5739 | struct perf_event_context *ctx = task->perf_event_ctxp; | 5843 | struct perf_event_context *ctx; |
5740 | struct perf_event *event, *tmp; | 5844 | struct perf_event *event, *tmp; |
5845 | int ctxn; | ||
5741 | 5846 | ||
5742 | if (!ctx) | 5847 | for_each_task_context_nr(ctxn) { |
5743 | return; | 5848 | ctx = task->perf_event_ctxp[ctxn]; |
5849 | if (!ctx) | ||
5850 | continue; | ||
5744 | 5851 | ||
5745 | mutex_lock(&ctx->mutex); | 5852 | mutex_lock(&ctx->mutex); |
5746 | again: | 5853 | again: |
5747 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) | 5854 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, |
5748 | perf_free_event(event, ctx); | 5855 | group_entry) |
5856 | perf_free_event(event, ctx); | ||
5749 | 5857 | ||
5750 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, | 5858 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, |
5751 | group_entry) | 5859 | group_entry) |
5752 | perf_free_event(event, ctx); | 5860 | perf_free_event(event, ctx); |
5753 | 5861 | ||
5754 | if (!list_empty(&ctx->pinned_groups) || | 5862 | if (!list_empty(&ctx->pinned_groups) || |
5755 | !list_empty(&ctx->flexible_groups)) | 5863 | !list_empty(&ctx->flexible_groups)) |
5756 | goto again; | 5864 | goto again; |
5757 | 5865 | ||
5758 | mutex_unlock(&ctx->mutex); | 5866 | mutex_unlock(&ctx->mutex); |
5759 | 5867 | ||
5760 | put_ctx(ctx); | 5868 | put_ctx(ctx); |
5869 | } | ||
5761 | } | 5870 | } |
5762 | 5871 | ||
5763 | /* | 5872 | /* |
@@ -5863,17 +5972,18 @@ static int inherit_group(struct perf_event *parent_event, | |||
5863 | static int | 5972 | static int |
5864 | inherit_task_group(struct perf_event *event, struct task_struct *parent, | 5973 | inherit_task_group(struct perf_event *event, struct task_struct *parent, |
5865 | struct perf_event_context *parent_ctx, | 5974 | struct perf_event_context *parent_ctx, |
5866 | struct task_struct *child, | 5975 | struct task_struct *child, int ctxn, |
5867 | int *inherited_all) | 5976 | int *inherited_all) |
5868 | { | 5977 | { |
5869 | int ret; | 5978 | int ret; |
5870 | struct perf_event_context *child_ctx = child->perf_event_ctxp; | 5979 | struct perf_event_context *child_ctx; |
5871 | 5980 | ||
5872 | if (!event->attr.inherit) { | 5981 | if (!event->attr.inherit) { |
5873 | *inherited_all = 0; | 5982 | *inherited_all = 0; |
5874 | return 0; | 5983 | return 0; |
5875 | } | 5984 | } |
5876 | 5985 | ||
5986 | child_ctx = child->perf_event_ctxp[ctxn]; | ||
5877 | if (!child_ctx) { | 5987 | if (!child_ctx) { |
5878 | /* | 5988 | /* |
5879 | * This is executed from the parent task context, so | 5989 | * This is executed from the parent task context, so |
@@ -5886,7 +5996,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, | |||
5886 | if (!child_ctx) | 5996 | if (!child_ctx) |
5887 | return -ENOMEM; | 5997 | return -ENOMEM; |
5888 | 5998 | ||
5889 | child->perf_event_ctxp = child_ctx; | 5999 | child->perf_event_ctxp[ctxn] = child_ctx; |
5890 | } | 6000 | } |
5891 | 6001 | ||
5892 | ret = inherit_group(event, parent, parent_ctx, | 6002 | ret = inherit_group(event, parent, parent_ctx, |
@@ -5901,7 +6011,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, | |||
5901 | /* | 6011 | /* |
5902 | * Initialize the perf_event context in task_struct | 6012 | * Initialize the perf_event context in task_struct |
5903 | */ | 6013 | */ |
5904 | int perf_event_init_task(struct task_struct *child) | 6014 | int perf_event_init_context(struct task_struct *child, int ctxn) |
5905 | { | 6015 | { |
5906 | struct perf_event_context *child_ctx, *parent_ctx; | 6016 | struct perf_event_context *child_ctx, *parent_ctx; |
5907 | struct perf_event_context *cloned_ctx; | 6017 | struct perf_event_context *cloned_ctx; |
@@ -5910,19 +6020,19 @@ int perf_event_init_task(struct task_struct *child) | |||
5910 | int inherited_all = 1; | 6020 | int inherited_all = 1; |
5911 | int ret = 0; | 6021 | int ret = 0; |
5912 | 6022 | ||
5913 | child->perf_event_ctxp = NULL; | 6023 | child->perf_event_ctxp[ctxn] = NULL; |
5914 | 6024 | ||
5915 | mutex_init(&child->perf_event_mutex); | 6025 | mutex_init(&child->perf_event_mutex); |
5916 | INIT_LIST_HEAD(&child->perf_event_list); | 6026 | INIT_LIST_HEAD(&child->perf_event_list); |
5917 | 6027 | ||
5918 | if (likely(!parent->perf_event_ctxp)) | 6028 | if (likely(!parent->perf_event_ctxp[ctxn])) |
5919 | return 0; | 6029 | return 0; |
5920 | 6030 | ||
5921 | /* | 6031 | /* |
5922 | * If the parent's context is a clone, pin it so it won't get | 6032 | * If the parent's context is a clone, pin it so it won't get |
5923 | * swapped under us. | 6033 | * swapped under us. |
5924 | */ | 6034 | */ |
5925 | parent_ctx = perf_pin_task_context(parent); | 6035 | parent_ctx = perf_pin_task_context(parent, ctxn); |
5926 | 6036 | ||
5927 | /* | 6037 | /* |
5928 | * No need to check if parent_ctx != NULL here; since we saw | 6038 | * No need to check if parent_ctx != NULL here; since we saw |
@@ -5942,20 +6052,20 @@ int perf_event_init_task(struct task_struct *child) | |||
5942 | * the list, not manipulating it: | 6052 | * the list, not manipulating it: |
5943 | */ | 6053 | */ |
5944 | list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { | 6054 | list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { |
5945 | ret = inherit_task_group(event, parent, parent_ctx, child, | 6055 | ret = inherit_task_group(event, parent, parent_ctx, |
5946 | &inherited_all); | 6056 | child, ctxn, &inherited_all); |
5947 | if (ret) | 6057 | if (ret) |
5948 | break; | 6058 | break; |
5949 | } | 6059 | } |
5950 | 6060 | ||
5951 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { | 6061 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { |
5952 | ret = inherit_task_group(event, parent, parent_ctx, child, | 6062 | ret = inherit_task_group(event, parent, parent_ctx, |
5953 | &inherited_all); | 6063 | child, ctxn, &inherited_all); |
5954 | if (ret) | 6064 | if (ret) |
5955 | break; | 6065 | break; |
5956 | } | 6066 | } |
5957 | 6067 | ||
5958 | child_ctx = child->perf_event_ctxp; | 6068 | child_ctx = child->perf_event_ctxp[ctxn]; |
5959 | 6069 | ||
5960 | if (child_ctx && inherited_all) { | 6070 | if (child_ctx && inherited_all) { |
5961 | /* | 6071 | /* |
@@ -5984,6 +6094,22 @@ int perf_event_init_task(struct task_struct *child) | |||
5984 | return ret; | 6094 | return ret; |
5985 | } | 6095 | } |
5986 | 6096 | ||
6097 | /* | ||
6098 | * Initialize the perf_event context in task_struct | ||
6099 | */ | ||
6100 | int perf_event_init_task(struct task_struct *child) | ||
6101 | { | ||
6102 | int ctxn, ret; | ||
6103 | |||
6104 | for_each_task_context_nr(ctxn) { | ||
6105 | ret = perf_event_init_context(child, ctxn); | ||
6106 | if (ret) | ||
6107 | return ret; | ||
6108 | } | ||
6109 | |||
6110 | return 0; | ||
6111 | } | ||
6112 | |||
5987 | static void __init perf_event_init_all_cpus(void) | 6113 | static void __init perf_event_init_all_cpus(void) |
5988 | { | 6114 | { |
5989 | struct swevent_htable *swhash; | 6115 | struct swevent_htable *swhash; |