diff options
Diffstat (limited to 'kernel/perf_counter.c')
-rw-r--r-- | kernel/perf_counter.c | 248 |
1 files changed, 206 insertions, 42 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 416861ce8b27..f5e81dd193d1 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -80,8 +80,6 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) | |||
80 | 80 | ||
81 | list_del_init(&sibling->list_entry); | 81 | list_del_init(&sibling->list_entry); |
82 | list_add_tail(&sibling->list_entry, &ctx->counter_list); | 82 | list_add_tail(&sibling->list_entry, &ctx->counter_list); |
83 | WARN_ON_ONCE(!sibling->group_leader); | ||
84 | WARN_ON_ONCE(sibling->group_leader == sibling); | ||
85 | sibling->group_leader = sibling; | 83 | sibling->group_leader = sibling; |
86 | } | 84 | } |
87 | } | 85 | } |
@@ -97,6 +95,7 @@ static void __perf_counter_remove_from_context(void *info) | |||
97 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 95 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
98 | struct perf_counter *counter = info; | 96 | struct perf_counter *counter = info; |
99 | struct perf_counter_context *ctx = counter->ctx; | 97 | struct perf_counter_context *ctx = counter->ctx; |
98 | unsigned long flags; | ||
100 | u64 perf_flags; | 99 | u64 perf_flags; |
101 | 100 | ||
102 | /* | 101 | /* |
@@ -107,7 +106,7 @@ static void __perf_counter_remove_from_context(void *info) | |||
107 | if (ctx->task && cpuctx->task_ctx != ctx) | 106 | if (ctx->task && cpuctx->task_ctx != ctx) |
108 | return; | 107 | return; |
109 | 108 | ||
110 | spin_lock(&ctx->lock); | 109 | spin_lock_irqsave(&ctx->lock, flags); |
111 | 110 | ||
112 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) { | 111 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) { |
113 | counter->hw_ops->hw_perf_counter_disable(counter); | 112 | counter->hw_ops->hw_perf_counter_disable(counter); |
@@ -136,7 +135,7 @@ static void __perf_counter_remove_from_context(void *info) | |||
136 | perf_max_counters - perf_reserved_percpu); | 135 | perf_max_counters - perf_reserved_percpu); |
137 | } | 136 | } |
138 | 137 | ||
139 | spin_unlock(&ctx->lock); | 138 | spin_unlock_irqrestore(&ctx->lock, flags); |
140 | } | 139 | } |
141 | 140 | ||
142 | 141 | ||
@@ -199,6 +198,7 @@ static void __perf_install_in_context(void *info) | |||
199 | struct perf_counter *counter = info; | 198 | struct perf_counter *counter = info; |
200 | struct perf_counter_context *ctx = counter->ctx; | 199 | struct perf_counter_context *ctx = counter->ctx; |
201 | int cpu = smp_processor_id(); | 200 | int cpu = smp_processor_id(); |
201 | unsigned long flags; | ||
202 | u64 perf_flags; | 202 | u64 perf_flags; |
203 | 203 | ||
204 | /* | 204 | /* |
@@ -209,7 +209,7 @@ static void __perf_install_in_context(void *info) | |||
209 | if (ctx->task && cpuctx->task_ctx != ctx) | 209 | if (ctx->task && cpuctx->task_ctx != ctx) |
210 | return; | 210 | return; |
211 | 211 | ||
212 | spin_lock(&ctx->lock); | 212 | spin_lock_irqsave(&ctx->lock, flags); |
213 | 213 | ||
214 | /* | 214 | /* |
215 | * Protect the list operation against NMI by disabling the | 215 | * Protect the list operation against NMI by disabling the |
@@ -232,7 +232,7 @@ static void __perf_install_in_context(void *info) | |||
232 | if (!ctx->task && cpuctx->max_pertask) | 232 | if (!ctx->task && cpuctx->max_pertask) |
233 | cpuctx->max_pertask--; | 233 | cpuctx->max_pertask--; |
234 | 234 | ||
235 | spin_unlock(&ctx->lock); | 235 | spin_unlock_irqrestore(&ctx->lock, flags); |
236 | } | 236 | } |
237 | 237 | ||
238 | /* | 238 | /* |
@@ -446,10 +446,9 @@ int perf_counter_task_disable(void) | |||
446 | */ | 446 | */ |
447 | perf_flags = hw_perf_save_disable(); | 447 | perf_flags = hw_perf_save_disable(); |
448 | 448 | ||
449 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { | 449 | list_for_each_entry(counter, &ctx->counter_list, list_entry) |
450 | WARN_ON_ONCE(counter->state == PERF_COUNTER_STATE_ACTIVE); | ||
451 | counter->state = PERF_COUNTER_STATE_OFF; | 450 | counter->state = PERF_COUNTER_STATE_OFF; |
452 | } | 451 | |
453 | hw_perf_restore(perf_flags); | 452 | hw_perf_restore(perf_flags); |
454 | 453 | ||
455 | spin_unlock(&ctx->lock); | 454 | spin_unlock(&ctx->lock); |
@@ -526,26 +525,6 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) | |||
526 | } | 525 | } |
527 | 526 | ||
528 | /* | 527 | /* |
529 | * Initialize the perf_counter context in a task_struct: | ||
530 | */ | ||
531 | static void | ||
532 | __perf_counter_init_context(struct perf_counter_context *ctx, | ||
533 | struct task_struct *task) | ||
534 | { | ||
535 | spin_lock_init(&ctx->lock); | ||
536 | INIT_LIST_HEAD(&ctx->counter_list); | ||
537 | ctx->nr_counters = 0; | ||
538 | ctx->task = task; | ||
539 | } | ||
540 | /* | ||
541 | * Initialize the perf_counter context in task_struct | ||
542 | */ | ||
543 | void perf_counter_init_task(struct task_struct *task) | ||
544 | { | ||
545 | __perf_counter_init_context(&task->perf_counter_ctx, task); | ||
546 | } | ||
547 | |||
548 | /* | ||
549 | * Cross CPU call to read the hardware counter | 528 | * Cross CPU call to read the hardware counter |
550 | */ | 529 | */ |
551 | static void __hw_perf_counter_read(void *info) | 530 | static void __hw_perf_counter_read(void *info) |
@@ -663,7 +642,6 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) | |||
663 | cpuctx = &per_cpu(perf_cpu_context, cpu); | 642 | cpuctx = &per_cpu(perf_cpu_context, cpu); |
664 | ctx = &cpuctx->ctx; | 643 | ctx = &cpuctx->ctx; |
665 | 644 | ||
666 | WARN_ON_ONCE(ctx->task); | ||
667 | return ctx; | 645 | return ctx; |
668 | } | 646 | } |
669 | 647 | ||
@@ -915,12 +893,13 @@ sw_perf_counter_init(struct perf_counter *counter) | |||
915 | static struct perf_counter * | 893 | static struct perf_counter * |
916 | perf_counter_alloc(struct perf_counter_hw_event *hw_event, | 894 | perf_counter_alloc(struct perf_counter_hw_event *hw_event, |
917 | int cpu, | 895 | int cpu, |
918 | struct perf_counter *group_leader) | 896 | struct perf_counter *group_leader, |
897 | gfp_t gfpflags) | ||
919 | { | 898 | { |
920 | const struct hw_perf_counter_ops *hw_ops; | 899 | const struct hw_perf_counter_ops *hw_ops; |
921 | struct perf_counter *counter; | 900 | struct perf_counter *counter; |
922 | 901 | ||
923 | counter = kzalloc(sizeof(*counter), GFP_KERNEL); | 902 | counter = kzalloc(sizeof(*counter), gfpflags); |
924 | if (!counter) | 903 | if (!counter) |
925 | return NULL; | 904 | return NULL; |
926 | 905 | ||
@@ -947,9 +926,8 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, | |||
947 | hw_ops = NULL; | 926 | hw_ops = NULL; |
948 | if (!hw_event->raw && hw_event->type < 0) | 927 | if (!hw_event->raw && hw_event->type < 0) |
949 | hw_ops = sw_perf_counter_init(counter); | 928 | hw_ops = sw_perf_counter_init(counter); |
950 | if (!hw_ops) { | 929 | if (!hw_ops) |
951 | hw_ops = hw_perf_counter_init(counter); | 930 | hw_ops = hw_perf_counter_init(counter); |
952 | } | ||
953 | 931 | ||
954 | if (!hw_ops) { | 932 | if (!hw_ops) { |
955 | kfree(counter); | 933 | kfree(counter); |
@@ -975,8 +953,10 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user, | |||
975 | struct perf_counter *counter, *group_leader; | 953 | struct perf_counter *counter, *group_leader; |
976 | struct perf_counter_hw_event hw_event; | 954 | struct perf_counter_hw_event hw_event; |
977 | struct perf_counter_context *ctx; | 955 | struct perf_counter_context *ctx; |
956 | struct file *counter_file = NULL; | ||
978 | struct file *group_file = NULL; | 957 | struct file *group_file = NULL; |
979 | int fput_needed = 0; | 958 | int fput_needed = 0; |
959 | int fput_needed2 = 0; | ||
980 | int ret; | 960 | int ret; |
981 | 961 | ||
982 | if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0) | 962 | if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0) |
@@ -1017,25 +997,29 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user, | |||
1017 | } | 997 | } |
1018 | 998 | ||
1019 | ret = -EINVAL; | 999 | ret = -EINVAL; |
1020 | counter = perf_counter_alloc(&hw_event, cpu, group_leader); | 1000 | counter = perf_counter_alloc(&hw_event, cpu, group_leader, GFP_KERNEL); |
1021 | if (!counter) | 1001 | if (!counter) |
1022 | goto err_put_context; | 1002 | goto err_put_context; |
1023 | 1003 | ||
1024 | perf_install_in_context(ctx, counter, cpu); | ||
1025 | |||
1026 | ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0); | 1004 | ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0); |
1027 | if (ret < 0) | 1005 | if (ret < 0) |
1028 | goto err_remove_free_put_context; | 1006 | goto err_free_put_context; |
1007 | |||
1008 | counter_file = fget_light(ret, &fput_needed2); | ||
1009 | if (!counter_file) | ||
1010 | goto err_free_put_context; | ||
1011 | |||
1012 | counter->filp = counter_file; | ||
1013 | perf_install_in_context(ctx, counter, cpu); | ||
1014 | |||
1015 | fput_light(counter_file, fput_needed2); | ||
1029 | 1016 | ||
1030 | out_fput: | 1017 | out_fput: |
1031 | fput_light(group_file, fput_needed); | 1018 | fput_light(group_file, fput_needed); |
1032 | 1019 | ||
1033 | return ret; | 1020 | return ret; |
1034 | 1021 | ||
1035 | err_remove_free_put_context: | 1022 | err_free_put_context: |
1036 | mutex_lock(&counter->mutex); | ||
1037 | perf_counter_remove_from_context(counter); | ||
1038 | mutex_unlock(&counter->mutex); | ||
1039 | kfree(counter); | 1023 | kfree(counter); |
1040 | 1024 | ||
1041 | err_put_context: | 1025 | err_put_context: |
@@ -1044,6 +1028,186 @@ err_put_context: | |||
1044 | goto out_fput; | 1028 | goto out_fput; |
1045 | } | 1029 | } |
1046 | 1030 | ||
1031 | /* | ||
1032 | * Initialize the perf_counter context in a task_struct: | ||
1033 | */ | ||
1034 | static void | ||
1035 | __perf_counter_init_context(struct perf_counter_context *ctx, | ||
1036 | struct task_struct *task) | ||
1037 | { | ||
1038 | memset(ctx, 0, sizeof(*ctx)); | ||
1039 | spin_lock_init(&ctx->lock); | ||
1040 | INIT_LIST_HEAD(&ctx->counter_list); | ||
1041 | ctx->task = task; | ||
1042 | } | ||
1043 | |||
1044 | /* | ||
1045 | * inherit a counter from parent task to child task: | ||
1046 | */ | ||
1047 | static int | ||
1048 | inherit_counter(struct perf_counter *parent_counter, | ||
1049 | struct task_struct *parent, | ||
1050 | struct perf_counter_context *parent_ctx, | ||
1051 | struct task_struct *child, | ||
1052 | struct perf_counter_context *child_ctx) | ||
1053 | { | ||
1054 | struct perf_counter *child_counter; | ||
1055 | |||
1056 | child_counter = perf_counter_alloc(&parent_counter->hw_event, | ||
1057 | parent_counter->cpu, NULL, | ||
1058 | GFP_ATOMIC); | ||
1059 | if (!child_counter) | ||
1060 | return -ENOMEM; | ||
1061 | |||
1062 | /* | ||
1063 | * Link it up in the child's context: | ||
1064 | */ | ||
1065 | child_counter->ctx = child_ctx; | ||
1066 | child_counter->task = child; | ||
1067 | list_add_counter(child_counter, child_ctx); | ||
1068 | child_ctx->nr_counters++; | ||
1069 | |||
1070 | child_counter->parent = parent_counter; | ||
1071 | parent_counter->nr_inherited++; | ||
1072 | /* | ||
1073 | * inherit into child's child as well: | ||
1074 | */ | ||
1075 | child_counter->hw_event.inherit = 1; | ||
1076 | |||
1077 | /* | ||
1078 | * Get a reference to the parent filp - we will fput it | ||
1079 | * when the child counter exits. This is safe to do because | ||
1080 | * we are in the parent and we know that the filp still | ||
1081 | * exists and has a nonzero count: | ||
1082 | */ | ||
1083 | atomic_long_inc(&parent_counter->filp->f_count); | ||
1084 | |||
1085 | return 0; | ||
1086 | } | ||
1087 | |||
1088 | static void | ||
1089 | __perf_counter_exit_task(struct task_struct *child, | ||
1090 | struct perf_counter *child_counter, | ||
1091 | struct perf_counter_context *child_ctx) | ||
1092 | { | ||
1093 | struct perf_counter *parent_counter; | ||
1094 | u64 parent_val, child_val; | ||
1095 | u64 perf_flags; | ||
1096 | |||
1097 | /* | ||
1098 | * Disable and unlink this counter. | ||
1099 | * | ||
1100 | * Be careful about zapping the list - IRQ/NMI context | ||
1101 | * could still be processing it: | ||
1102 | */ | ||
1103 | local_irq_disable(); | ||
1104 | perf_flags = hw_perf_save_disable(); | ||
1105 | |||
1106 | if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) | ||
1107 | child_counter->hw_ops->hw_perf_counter_disable(child_counter); | ||
1108 | list_del_init(&child_counter->list_entry); | ||
1109 | |||
1110 | hw_perf_restore(perf_flags); | ||
1111 | local_irq_enable(); | ||
1112 | |||
1113 | parent_counter = child_counter->parent; | ||
1114 | /* | ||
1115 | * It can happen that parent exits first, and has counters | ||
1116 | * that are still around due to the child reference. These | ||
1117 | * counters need to be zapped - but otherwise linger. | ||
1118 | */ | ||
1119 | if (!parent_counter) | ||
1120 | return; | ||
1121 | |||
1122 | parent_val = atomic64_read(&parent_counter->count); | ||
1123 | child_val = atomic64_read(&child_counter->count); | ||
1124 | |||
1125 | /* | ||
1126 | * Add back the child's count to the parent's count: | ||
1127 | */ | ||
1128 | atomic64_add(child_val, &parent_counter->count); | ||
1129 | |||
1130 | fput(parent_counter->filp); | ||
1131 | |||
1132 | kfree(child_counter); | ||
1133 | } | ||
1134 | |||
1135 | /* | ||
1136 | * When a child task exist, feed back counter values to parent counters. | ||
1137 | * | ||
1138 | * Note: we are running in child context, but the PID is not hashed | ||
1139 | * anymore so new counters will not be added. | ||
1140 | */ | ||
1141 | void perf_counter_exit_task(struct task_struct *child) | ||
1142 | { | ||
1143 | struct perf_counter *child_counter, *tmp; | ||
1144 | struct perf_counter_context *child_ctx; | ||
1145 | |||
1146 | child_ctx = &child->perf_counter_ctx; | ||
1147 | |||
1148 | if (likely(!child_ctx->nr_counters)) | ||
1149 | return; | ||
1150 | |||
1151 | list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list, | ||
1152 | list_entry) | ||
1153 | __perf_counter_exit_task(child, child_counter, child_ctx); | ||
1154 | } | ||
1155 | |||
1156 | /* | ||
1157 | * Initialize the perf_counter context in task_struct | ||
1158 | */ | ||
1159 | void perf_counter_init_task(struct task_struct *child) | ||
1160 | { | ||
1161 | struct perf_counter_context *child_ctx, *parent_ctx; | ||
1162 | struct perf_counter *counter, *parent_counter; | ||
1163 | struct task_struct *parent = current; | ||
1164 | unsigned long flags; | ||
1165 | |||
1166 | child_ctx = &child->perf_counter_ctx; | ||
1167 | parent_ctx = &parent->perf_counter_ctx; | ||
1168 | |||
1169 | __perf_counter_init_context(child_ctx, child); | ||
1170 | |||
1171 | /* | ||
1172 | * This is executed from the parent task context, so inherit | ||
1173 | * counters that have been marked for cloning: | ||
1174 | */ | ||
1175 | |||
1176 | if (likely(!parent_ctx->nr_counters)) | ||
1177 | return; | ||
1178 | |||
1179 | /* | ||
1180 | * Lock the parent list. No need to lock the child - not PID | ||
1181 | * hashed yet and not running, so nobody can access it. | ||
1182 | */ | ||
1183 | spin_lock_irqsave(&parent_ctx->lock, flags); | ||
1184 | |||
1185 | /* | ||
1186 | * We dont have to disable NMIs - we are only looking at | ||
1187 | * the list, not manipulating it: | ||
1188 | */ | ||
1189 | list_for_each_entry(counter, &parent_ctx->counter_list, list_entry) { | ||
1190 | if (!counter->hw_event.inherit || counter->group_leader != counter) | ||
1191 | continue; | ||
1192 | |||
1193 | /* | ||
1194 | * Instead of creating recursive hierarchies of counters, | ||
1195 | * we link inheritd counters back to the original parent, | ||
1196 | * which has a filp for sure, which we use as the reference | ||
1197 | * count: | ||
1198 | */ | ||
1199 | parent_counter = counter; | ||
1200 | if (counter->parent) | ||
1201 | parent_counter = counter->parent; | ||
1202 | |||
1203 | if (inherit_counter(parent_counter, parent, | ||
1204 | parent_ctx, child, child_ctx)) | ||
1205 | break; | ||
1206 | } | ||
1207 | |||
1208 | spin_unlock_irqrestore(&parent_ctx->lock, flags); | ||
1209 | } | ||
1210 | |||
1047 | static void __cpuinit perf_counter_init_cpu(int cpu) | 1211 | static void __cpuinit perf_counter_init_cpu(int cpu) |
1048 | { | 1212 | { |
1049 | struct perf_cpu_context *cpuctx; | 1213 | struct perf_cpu_context *cpuctx; |